From 8ebda3cdc8b3694389a970c0abb3852728ea2108 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sun, 31 Mar 2024 13:17:05 +1000 Subject: [PATCH] CPU/NewRec: Fix register corruption in swl/swr --- src/core/cpu_newrec_compiler_aarch32.cpp | 30 +++++++++------- src/core/cpu_newrec_compiler_aarch64.cpp | 32 +++++++++-------- src/core/cpu_newrec_compiler_riscv64.cpp | 32 +++++++++-------- src/core/cpu_newrec_compiler_x64.cpp | 45 +++++++++++++----------- 4 files changed, 75 insertions(+), 64 deletions(-) diff --git a/src/core/cpu_newrec_compiler_aarch32.cpp b/src/core/cpu_newrec_compiler_aarch32.cpp index 0592b7f93..e96897cd4 100644 --- a/src/core/cpu_newrec_compiler_aarch32.cpp +++ b/src/core/cpu_newrec_compiler_aarch32.cpp @@ -1785,7 +1785,13 @@ void CPU::NewRec::AArch32Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize { DebugAssert(size == MemoryAccessSize::Word && !sign); + // TODO: this can take over rt's value if it's no longer needed + // NOTE: can't trust T in cf because of the alloc const Register addr = Register(AllocateTempHostReg(HR_CALLEE_SAVED)); + const Register value = g_settings.gpu_pgxp_enable ? Register(AllocateTempHostReg(HR_CALLEE_SAVED)) : RARG2; + if (g_settings.gpu_pgxp_enable) + MoveMIPSRegToReg(value, inst->r.rt); + FlushForLoadStore(address, true, use_fastmem); // TODO: if address is constant, this can be simplified.. @@ -1794,20 +1800,13 @@ void CPU::NewRec::AArch32Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize armAsm->and_(RARG1, addr, armCheckLogicalConstant(~0x3u)); GenerateLoad(RARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RRET; }); - // TODO: this can take over rt's value if it's no longer needed - // NOTE: can't trust T in cf because of the flush - const Reg rt = inst->r.rt; - const Register value = g_settings.gpu_pgxp_enable ? Register(AllocateTempHostReg(HR_CALLEE_SAVED)) : RARG2; - MoveMIPSRegToReg(value, rt); - armAsm->and_(RSCRATCH, addr, 3); armAsm->lsl(RSCRATCH, RSCRATCH, 3); // *8 + armAsm->and_(addr, addr, armCheckLogicalConstant(~0x3u)); - // Don't need the original address anymore. + // Need to load down here for PGXP-off, because it's in a volatile reg that can get overwritten by flush. if (!g_settings.gpu_pgxp_enable) - FreeHostReg(addr.GetCode()); - else - armAsm->and_(addr, addr, armCheckLogicalConstant(~0x3u)); + MoveMIPSRegToReg(value, inst->r.rt); if (inst->op == InstructionOp::swl) { @@ -1836,10 +1835,15 @@ void CPU::NewRec::AArch32Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize armAsm->orr(value, value, RRET); } - GenerateStore(addr, value, MemoryAccessSize::Word, use_fastmem); - - if (g_settings.gpu_pgxp_enable) + if (!g_settings.gpu_pgxp_enable) { + GenerateStore(addr, value, MemoryAccessSize::Word, use_fastmem); + FreeHostReg(addr.GetCode()); + } + else + { + GenerateStore(addr, value, MemoryAccessSize::Word, use_fastmem); + Flush(FLUSH_FOR_C_CALL); armAsm->mov(RARG3, value); FreeHostReg(value.GetCode()); diff --git a/src/core/cpu_newrec_compiler_aarch64.cpp b/src/core/cpu_newrec_compiler_aarch64.cpp index 13a47201b..b040e4fe1 100644 --- a/src/core/cpu_newrec_compiler_aarch64.cpp +++ b/src/core/cpu_newrec_compiler_aarch64.cpp @@ -1764,7 +1764,13 @@ void CPU::NewRec::AArch64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize { DebugAssert(size == MemoryAccessSize::Word && !sign); + // TODO: this can take over rt's value if it's no longer needed + // NOTE: can't trust T in cf because of the alloc const WRegister addr = WRegister(AllocateTempHostReg(HR_CALLEE_SAVED)); + const WRegister value = g_settings.gpu_pgxp_enable ? WRegister(AllocateTempHostReg(HR_CALLEE_SAVED)) : RWARG2; + if (g_settings.gpu_pgxp_enable) + MoveMIPSRegToReg(value, inst->r.rt); + FlushForLoadStore(address, true, use_fastmem); // TODO: if address is constant, this can be simplified.. @@ -1773,20 +1779,13 @@ void CPU::NewRec::AArch64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize armAsm->and_(RWARG1, addr, armCheckLogicalConstant(~0x3u)); GenerateLoad(RWARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RWRET; }); - // TODO: this can take over rt's value if it's no longer needed - // NOTE: can't trust T in cf because of the flush - const Reg rt = inst->r.rt; - const WRegister value = g_settings.gpu_pgxp_enable ? WRegister(AllocateTempHostReg(HR_CALLEE_SAVED)) : RWARG2; - MoveMIPSRegToReg(value, rt); - armAsm->and_(RWSCRATCH, addr, 3); armAsm->lsl(RWSCRATCH, RWSCRATCH, 3); // *8 + armAsm->and_(addr, addr, armCheckLogicalConstant(~0x3u)); - // Don't need the original address anymore. + // Need to load down here for PGXP-off, because it's in a volatile reg that can get overwritten by flush. if (!g_settings.gpu_pgxp_enable) - FreeHostReg(addr.GetCode()); - else - armAsm->and_(addr, addr, armCheckLogicalConstant(~0x3u)); + MoveMIPSRegToReg(value, inst->r.rt); if (inst->op == InstructionOp::swl) { @@ -1815,12 +1814,15 @@ void CPU::NewRec::AArch64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize armAsm->orr(value, value, RWRET); } - FreeHostReg(addr.GetCode()); - - GenerateStore(addr, value, MemoryAccessSize::Word, use_fastmem); - - if (g_settings.gpu_pgxp_enable) + if (!g_settings.gpu_pgxp_enable) { + GenerateStore(addr, value, MemoryAccessSize::Word, use_fastmem); + FreeHostReg(addr.GetCode()); + } + else + { + GenerateStore(addr, value, MemoryAccessSize::Word, use_fastmem); + Flush(FLUSH_FOR_C_CALL); armAsm->mov(RWARG3, value); FreeHostReg(value.GetCode()); diff --git a/src/core/cpu_newrec_compiler_riscv64.cpp b/src/core/cpu_newrec_compiler_riscv64.cpp index a847828ae..0a0631966 100644 --- a/src/core/cpu_newrec_compiler_riscv64.cpp +++ b/src/core/cpu_newrec_compiler_riscv64.cpp @@ -2071,7 +2071,13 @@ void CPU::NewRec::RISCV64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize { DebugAssert(size == MemoryAccessSize::Word && !sign); + // TODO: this can take over rt's value if it's no longer needed + // NOTE: can't trust T in cf because of the alloc const GPR addr = GPR(AllocateTempHostReg(HR_CALLEE_SAVED)); + const GPR value = g_settings.gpu_pgxp_enable ? GPR(AllocateTempHostReg(HR_CALLEE_SAVED)) : RARG2; + if (g_settings.gpu_pgxp_enable) + MoveMIPSRegToReg(value, inst->r.rt); + FlushForLoadStore(address, true, use_fastmem); // TODO: if address is constant, this can be simplified.. @@ -2080,20 +2086,13 @@ void CPU::NewRec::RISCV64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize rvAsm->ANDI(RARG1, addr, ~0x3u); GenerateLoad(RARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RRET; }); - // TODO: this can take over rt's value if it's no longer needed - // NOTE: can't trust T in cf because of the flush - const Reg rt = inst->r.rt; - const GPR value = g_settings.gpu_pgxp_enable ? GPR(AllocateTempHostReg(HR_CALLEE_SAVED)) : RARG2; - MoveMIPSRegToReg(value, rt); - rvAsm->ANDI(RSCRATCH, addr, 3); rvAsm->SLLIW(RSCRATCH, RSCRATCH, 3); // *8 + rvAsm->ANDI(addr, addr, ~0x3u); - // Don't need the original address anymore. + // Need to load down here for PGXP-off, because it's in a volatile reg that can get overwritten by flush. if (!g_settings.gpu_pgxp_enable) - FreeHostReg(addr.Index()); - else - rvAsm->ANDI(addr, addr, ~0x3u); + MoveMIPSRegToReg(value, inst->r.rt); if (inst->op == InstructionOp::swl) { @@ -2122,12 +2121,15 @@ void CPU::NewRec::RISCV64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize rvAsm->OR(value, value, RRET); } - FreeHostReg(addr.Index()); - - GenerateStore(addr, value, MemoryAccessSize::Word, use_fastmem); - - if (g_settings.gpu_pgxp_enable) + if (!g_settings.gpu_pgxp_enable) { + GenerateStore(addr, value, MemoryAccessSize::Word, use_fastmem); + FreeHostReg(addr.Index()); + } + else + { + GenerateStore(addr, value, MemoryAccessSize::Word, use_fastmem); + Flush(FLUSH_FOR_C_CALL); rvAsm->MV(RARG3, value); FreeHostReg(value.Index()); diff --git a/src/core/cpu_newrec_compiler_x64.cpp b/src/core/cpu_newrec_compiler_x64.cpp index 6de39d0a8..0b59bac47 100644 --- a/src/core/cpu_newrec_compiler_x64.cpp +++ b/src/core/cpu_newrec_compiler_x64.cpp @@ -1577,14 +1577,13 @@ void CPU::NewRec::X64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize siz cg->mov(RWARG2, 24); cg->sub(RWARG2, cg->ecx); - const Reg32& temp = (RWARG3 == cg->ecx) ? RWARG4 : RWARG3; if (inst->op == InstructionOp::lwl) { // const u32 mask = UINT32_C(0x00FFFFFF) >> shift; // new_value = (value & mask) | (RWRET << (24 - shift)); - cg->mov(temp, 0xFFFFFFu); - cg->shr(temp, cg->cl); - cg->and_(value, temp); + cg->mov(RWARG3, 0xFFFFFFu); + cg->shr(RWARG3, cg->cl); + cg->and_(value, RWARG3); cg->mov(cg->ecx, RWARG2); cg->shl(RWRET, cg->cl); cg->or_(value, RWRET); @@ -1594,10 +1593,10 @@ void CPU::NewRec::X64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize siz // const u32 mask = UINT32_C(0xFFFFFF00) << (24 - shift); // new_value = (value & mask) | (RWRET >> shift); cg->shr(RWRET, cg->cl); - cg->mov(temp, 0xFFFFFF00u); + cg->mov(RWARG3, 0xFFFFFF00u); cg->mov(cg->ecx, RWARG2); - cg->shl(temp, cg->cl); - cg->and_(value, temp); + cg->shl(RWARG3, cg->cl); + cg->and_(value, RWARG3); cg->or_(value, RWRET); } @@ -1730,7 +1729,13 @@ void CPU::NewRec::X64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize siz { DebugAssert(size == MemoryAccessSize::Word && !sign); + // TODO: this can take over rt's value if it's no longer needed + // NOTE: can't trust T in cf because of the alloc const Reg32 addr = Reg32(AllocateTempHostReg(HR_CALLEE_SAVED)); + const Reg32 value = g_settings.gpu_pgxp_enable ? Reg32(AllocateTempHostReg(HR_CALLEE_SAVED)) : RWARG2; + if (g_settings.gpu_pgxp_enable) + MoveMIPSRegToReg(value, inst->r.rt); + FlushForLoadStore(address, true, use_fastmem); // TODO: if address is constant, this can be simplified.. @@ -1740,22 +1745,15 @@ void CPU::NewRec::X64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize siz cg->and_(RWARG1, ~0x3u); GenerateLoad(RWARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RWRET; }); - // TODO: this can take over rt's value if it's no longer needed - // NOTE: can't trust T in cf because of the flush - const Reg rt = inst->r.rt; - const Reg32 value = g_settings.gpu_pgxp_enable ? Reg32(AllocateTempHostReg(HR_CALLEE_SAVED)) : RWARG2; DebugAssert(value != cg->ecx); - MoveMIPSRegToReg(value, rt); - cg->mov(cg->ecx, addr); cg->and_(cg->ecx, 3); cg->shl(cg->ecx, 3); // *8 + cg->and_(addr, ~0x3u); - // Don't need the original address anymore. - if (g_settings.gpu_pgxp_enable) - cg->and_(addr, ~0x3u); - else - FreeHostReg(addr.getIdx()); + // Need to load down here for PGXP-off, because it's in a volatile reg that can get overwritten by flush. + if (!g_settings.gpu_pgxp_enable) + MoveMIPSRegToReg(value, inst->r.rt); if (inst->op == InstructionOp::swl) { @@ -1787,10 +1785,15 @@ void CPU::NewRec::X64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize siz cg->or_(value, RWRET); } - GenerateStore(addr, value, MemoryAccessSize::Word, use_fastmem); - - if (g_settings.gpu_pgxp_enable) + if (!g_settings.gpu_pgxp_enable) { + GenerateStore(addr, value, MemoryAccessSize::Word, use_fastmem); + FreeHostReg(addr.getIdx()); + } + else + { + GenerateStore(addr, value, MemoryAccessSize::Word, use_fastmem); + Flush(FLUSH_FOR_C_CALL); cg->mov(RWARG3, value); FreeHostReg(value.getIdx());