diff --git a/src/core/cpu_newrec_compiler_aarch32.cpp b/src/core/cpu_newrec_compiler_aarch32.cpp index 218bb6940..3dccbfe36 100644 --- a/src/core/cpu_newrec_compiler_aarch32.cpp +++ b/src/core/cpu_newrec_compiler_aarch32.cpp @@ -1931,21 +1931,22 @@ void CPU::NewRec::AArch32Compiler::Compile_mtc0(CompileFlags cf) Flush(FLUSH_FOR_C_CALL); SwitchToFarCodeIfBitSet(changed_bits, 16); - armAsm->push(RegisterList(RARG1, RARG2)); + armAsm->push(RegisterList(RARG1)); EmitCall(reinterpret_cast(&CPU::UpdateMemoryPointers)); - armAsm->pop(RegisterList(RARG1, RARG2)); + armAsm->pop(RegisterList(RARG1)); if (CodeCache::IsUsingFastmem() && m_block->HasFlag(CodeCache::BlockFlags::ContainsLoadStoreInstructions) && IsHostRegAllocated(RMEMBASE.GetCode())) { FreeHostReg(RMEMBASE.GetCode()); } SwitchToNearCode(true); - } - if (reg == Cop0Reg::SR || reg == Cop0Reg::CAUSE) + TestInterrupts(RARG1); + } + else if (reg == Cop0Reg::CAUSE) { - const Register sr = (reg == Cop0Reg::SR) ? RARG2 : (armAsm->ldr(RARG1, PTR(&g_state.cop0_regs.sr.bits)), RARG1); - TestInterrupts(sr); + armAsm->ldr(RARG1, PTR(&g_state.cop0_regs.sr.bits)); + TestInterrupts(RARG1); } if (reg == Cop0Reg::DCIC && g_settings.cpu_recompiler_memory_exceptions) diff --git a/src/core/cpu_newrec_compiler_aarch64.cpp b/src/core/cpu_newrec_compiler_aarch64.cpp index 6725af68b..114b35d42 100644 --- a/src/core/cpu_newrec_compiler_aarch64.cpp +++ b/src/core/cpu_newrec_compiler_aarch64.cpp @@ -1911,18 +1911,19 @@ void CPU::NewRec::AArch64Compiler::Compile_mtc0(CompileFlags cf) SwitchToFarCodeIfBitSet(changed_bits, 16); armAsm->sub(sp, sp, 16); - armAsm->stp(RWARG1, RWARG2, MemOperand(sp)); + armAsm->str(RWARG1, MemOperand(sp)); EmitCall(reinterpret_cast(&CPU::UpdateMemoryPointers)); - armAsm->ldp(RWARG1, RWARG2, MemOperand(sp)); + armAsm->ldr(RWARG1, MemOperand(sp)); armAsm->add(sp, sp, 16); armAsm->ldr(RMEMBASE, PTR(&g_state.fastmem_base)); SwitchToNearCode(true); - } - if (reg == Cop0Reg::SR || reg == Cop0Reg::CAUSE) + TestInterrupts(RWARG1); + } + else if (reg == Cop0Reg::CAUSE) { - const WRegister sr = (reg == Cop0Reg::SR) ? RWARG2 : (armAsm->ldr(RWARG1, PTR(&g_state.cop0_regs.sr.bits)), RWARG1); - TestInterrupts(sr); + armAsm->ldr(RWARG1, PTR(&g_state.cop0_regs.sr.bits)); + TestInterrupts(RWARG1); } if (reg == Cop0Reg::DCIC && g_settings.cpu_recompiler_memory_exceptions) diff --git a/src/core/cpu_newrec_compiler_riscv64.cpp b/src/core/cpu_newrec_compiler_riscv64.cpp index bdfd2ccac..be3146192 100644 --- a/src/core/cpu_newrec_compiler_riscv64.cpp +++ b/src/core/cpu_newrec_compiler_riscv64.cpp @@ -2160,19 +2160,18 @@ void CPU::NewRec::RISCV64Compiler::Compile_mtc0(CompileFlags cf) SwitchToFarCode(true, &Assembler::BEQ, RSCRATCH, zero); rvAsm->ADDI(sp, sp, -16); rvAsm->SW(RARG1, 0, sp); - rvAsm->SW(RARG2, 8, sp); EmitCall(reinterpret_cast(&CPU::UpdateMemoryPointers)); - rvAsm->SW(RARG2, 8, sp); - rvAsm->SW(RARG1, 0, sp); + rvAsm->LW(RARG1, 0, sp); rvAsm->ADDI(sp, sp, 16); rvAsm->LD(RMEMBASE, PTR(&g_state.fastmem_base)); SwitchToNearCode(true); - } - if (reg == Cop0Reg::SR || reg == Cop0Reg::CAUSE) + TestInterrupts(RARG1); + } + else if (reg == Cop0Reg::CAUSE) { - const GPR sr = (reg == Cop0Reg::SR) ? RARG2 : (rvAsm->LW(RARG1, PTR(&g_state.cop0_regs.sr.bits)), RARG1); - TestInterrupts(sr); + rvAsm->LW(RARG1, PTR(&g_state.cop0_regs.sr.bits)); + TestInterrupts(RARG1); } if (reg == Cop0Reg::DCIC && g_settings.cpu_recompiler_memory_exceptions) diff --git a/src/core/cpu_newrec_compiler_x64.cpp b/src/core/cpu_newrec_compiler_x64.cpp index fd54572aa..991995a82 100644 --- a/src/core/cpu_newrec_compiler_x64.cpp +++ b/src/core/cpu_newrec_compiler_x64.cpp @@ -28,6 +28,13 @@ Log_SetChannel(CPU::NewRec); static constexpr u32 BACKPATCH_JMP_SIZE = 5; +// on win32, we need to reserve an additional 32 bytes shadow space when calling out to C +#ifdef _WIN32 +static constexpr u32 STACK_SHADOW_SIZE = 32; +#else +static constexpr u32 STACK_SHADOW_SIZE = 0; +#endif + using namespace Xbyak; using CPU::Recompiler::IsCallerSavedRegister; @@ -1881,20 +1888,20 @@ void CPU::NewRec::X64Compiler::Compile_mtc0(CompileFlags cf) cg->test(changed_bits, 1u << 16); SwitchToFarCode(true, &CodeGenerator::jnz); - cg->push(RWARG1); - cg->push(RWARG2); + cg->mov(cg->dword[cg->rsp], RWARG2); + cg->sub(cg->rsp, STACK_SHADOW_SIZE + 8); cg->call(&CPU::UpdateMemoryPointers); - cg->pop(RWARG2); - cg->pop(RWARG1); + cg->add(cg->rsp, STACK_SHADOW_SIZE + 8); + cg->mov(RWARG2, cg->dword[cg->rsp]); cg->mov(RMEMBASE, cg->qword[PTR(&g_state.fastmem_base)]); SwitchToNearCode(true); - } - if (reg == Cop0Reg::SR || reg == Cop0Reg::CAUSE) + TestInterrupts(RWARG2); + } + else if (reg == Cop0Reg::CAUSE) { - const Reg32 sr = - (reg == Cop0Reg::SR) ? RWARG2 : (cg->mov(RWARG1, cg->dword[PTR(&g_state.cop0_regs.sr.bits)]), RWARG1); - TestInterrupts(sr); + cg->mov(RWARG1, cg->dword[PTR(&g_state.cop0_regs.sr.bits)]); + TestInterrupts(RWARG1); } if (reg == Cop0Reg::DCIC && g_settings.cpu_recompiler_memory_exceptions) @@ -2104,13 +2111,6 @@ u32 CPU::NewRec::CompileLoadStoreThunk(void* thunk_code, u32 thunk_space, void* static constexpr u32 GPR_SIZE = 8; - // on win32, we need to reserve an additional 32 bytes shadow space when calling out to C -#ifdef _WIN32 - static constexpr u32 SHADOW_SIZE = 32; -#else - static constexpr u32 SHADOW_SIZE = 0; -#endif - // save regs u32 num_gprs = 0; @@ -2120,13 +2120,13 @@ u32 CPU::NewRec::CompileLoadStoreThunk(void* thunk_code, u32 thunk_space, void* num_gprs++; } - const u32 stack_size = (((num_gprs + 1) & ~1u) * GPR_SIZE) + SHADOW_SIZE; + const u32 stack_size = (((num_gprs + 1) & ~1u) * GPR_SIZE) + STACK_SHADOW_SIZE; if (stack_size > 0) { cg->sub(cg->rsp, stack_size); - u32 stack_offset = SHADOW_SIZE; + u32 stack_offset = STACK_SHADOW_SIZE; for (u32 i = 0; i < NUM_HOST_REGS; i++) { if ((gpr_bitmask & (1u << i)) && IsCallerSavedRegister(i) && (!is_load || data_register != i)) @@ -2201,7 +2201,7 @@ u32 CPU::NewRec::CompileLoadStoreThunk(void* thunk_code, u32 thunk_space, void* // restore regs if (stack_size > 0) { - u32 stack_offset = SHADOW_SIZE; + u32 stack_offset = STACK_SHADOW_SIZE; for (u32 i = 0; i < NUM_HOST_REGS; i++) { if ((gpr_bitmask & (1u << i)) && IsCallerSavedRegister(i) && (!is_load || data_register != i)) diff --git a/src/core/cpu_recompiler_code_generator_aarch64.cpp b/src/core/cpu_recompiler_code_generator_aarch64.cpp index a8510cece..035261743 100644 --- a/src/core/cpu_recompiler_code_generator_aarch64.cpp +++ b/src/core/cpu_recompiler_code_generator_aarch64.cpp @@ -1411,17 +1411,7 @@ void CodeGenerator::RestoreStackAfterCall(u32 adjust_size) void CodeGenerator::EmitCall(const void* ptr) { - const s64 displacement = armGetPCDisplacement(GetCurrentCodePointer(), ptr); - const bool use_blr = !vixl::IsInt26(displacement); - if (use_blr) - { - m_emit->Mov(GetHostReg64(RSCRATCH), reinterpret_cast(ptr)); - m_emit->Blr(GetHostReg64(RSCRATCH)); - } - else - { - m_emit->bl(displacement); - } + armEmitCall(m_emit, ptr, false); } void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr)