From bc2c334370e1cedc3b98079cc5f31cb1cc2de46e Mon Sep 17 00:00:00 2001 From: Stenzek Date: Wed, 13 Dec 2023 20:56:24 +1000 Subject: [PATCH] Misc: Combine some redundant functions --- src/core/cpu_core.cpp | 316 +++++++++++---------- src/core/cpu_core.h | 6 +- src/core/cpu_newrec_compiler.cpp | 4 +- src/core/cpu_pgxp.cpp | 125 ++++---- src/core/cpu_pgxp.h | 40 ++- src/core/cpu_recompiler_code_generator.cpp | 96 +++++-- 6 files changed, 326 insertions(+), 261 deletions(-) diff --git a/src/core/cpu_core.cpp b/src/core/cpu_core.cpp index 2f399feb5..6b0f6e15e 100644 --- a/src/core/cpu_core.cpp +++ b/src/core/cpu_core.cpp @@ -10,11 +10,11 @@ #include "cpu_code_cache_private.h" #include "cpu_core_private.h" #include "cpu_disasm.h" +#include "cpu_pgxp.h" #include "cpu_recompiler_thunks.h" #include "gte.h" #include "host.h" #include "pcdrv.h" -#include "cpu_pgxp.h" #include "settings.h" #include "system.h" #include "timing_event.h" @@ -920,236 +920,250 @@ restart_instruction: { case InstructionFunct::sll: { - const u32 new_value = ReadReg(inst.r.rt) << inst.r.shamt; - if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_SLL(inst.bits, ReadReg(inst.r.rt)); + const u32 rtVal = ReadReg(inst.r.rt); + const u32 rdVal = rtVal << inst.r.shamt; + WriteReg(inst.r.rd, rdVal); - WriteReg(inst.r.rd, new_value); + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_SLL(inst.bits, rtVal); } break; case InstructionFunct::srl: { - const u32 new_value = ReadReg(inst.r.rt) >> inst.r.shamt; - if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_SRL(inst.bits, ReadReg(inst.r.rt)); + const u32 rtVal = ReadReg(inst.r.rt); + const u32 rdVal = rtVal >> inst.r.shamt; + WriteReg(inst.r.rd, rdVal); - WriteReg(inst.r.rd, new_value); + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_SRL(inst.bits, rtVal); } break; case InstructionFunct::sra: { - const u32 new_value = static_cast(static_cast(ReadReg(inst.r.rt)) >> inst.r.shamt); - if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_SRA(inst.bits, ReadReg(inst.r.rt)); + const u32 rtVal = ReadReg(inst.r.rt); + const u32 rdVal = static_cast(static_cast(rtVal) >> inst.r.shamt); + WriteReg(inst.r.rd, rdVal); - WriteReg(inst.r.rd, new_value); + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_SRA(inst.bits, rtVal); } break; case InstructionFunct::sllv: { - const u32 shift_amount = ReadReg(inst.r.rs) & UINT32_C(0x1F); - const u32 new_value = ReadReg(inst.r.rt) << shift_amount; + const u32 rtVal = ReadReg(inst.r.rt); + const u32 shamt = ReadReg(inst.r.rs) & UINT32_C(0x1F); + const u32 rdVal = rtVal << shamt; if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_SLLV(inst.bits, ReadReg(inst.r.rt), shift_amount); + PGXP::CPU_SLLV(inst.bits, rtVal, shamt); - WriteReg(inst.r.rd, new_value); + WriteReg(inst.r.rd, rdVal); } break; case InstructionFunct::srlv: { - const u32 shift_amount = ReadReg(inst.r.rs) & UINT32_C(0x1F); - const u32 new_value = ReadReg(inst.r.rt) >> shift_amount; - if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_SRLV(inst.bits, ReadReg(inst.r.rt), shift_amount); + const u32 rtVal = ReadReg(inst.r.rt); + const u32 shamt = ReadReg(inst.r.rs) & UINT32_C(0x1F); + const u32 rdVal = rtVal >> shamt; + WriteReg(inst.r.rd, rdVal); - WriteReg(inst.r.rd, new_value); + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_SRLV(inst.bits, rtVal, shamt); } break; case InstructionFunct::srav: { - const u32 shift_amount = ReadReg(inst.r.rs) & UINT32_C(0x1F); - const u32 new_value = static_cast(static_cast(ReadReg(inst.r.rt)) >> shift_amount); - if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_SRAV(inst.bits, ReadReg(inst.r.rt), shift_amount); + const u32 rtVal = ReadReg(inst.r.rt); + const u32 shamt = ReadReg(inst.r.rs) & UINT32_C(0x1F); + const u32 rdVal = static_cast(static_cast(rtVal) >> shamt); + WriteReg(inst.r.rd, rdVal); - WriteReg(inst.r.rd, new_value); + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_SRAV(inst.bits, rtVal, shamt); } break; case InstructionFunct::and_: { - const u32 new_value = ReadReg(inst.r.rs) & ReadReg(inst.r.rt); - if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_AND_(inst.bits, ReadReg(inst.r.rs), ReadReg(inst.r.rt)); - + const u32 rsVal = ReadReg(inst.r.rs); + const u32 rtVal = ReadReg(inst.r.rt); + const u32 new_value = rsVal & rtVal; WriteReg(inst.r.rd, new_value); + + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_AND_(inst.bits, rsVal, rtVal); } break; case InstructionFunct::or_: { - const u32 new_value = ReadReg(inst.r.rs) | ReadReg(inst.r.rt); - if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_OR_(inst.bits, ReadReg(inst.r.rs), ReadReg(inst.r.rt)); - + const u32 rsVal = ReadReg(inst.r.rs); + const u32 rtVal = ReadReg(inst.r.rt); + const u32 new_value = rsVal | rtVal; WriteReg(inst.r.rd, new_value); + + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_OR_(inst.bits, rsVal, rtVal); + else if constexpr (pgxp_mode >= PGXPMode::Memory) + PGXP::TryMove(inst.r.rd, inst.r.rs, inst.r.rt); } break; case InstructionFunct::xor_: { - const u32 new_value = ReadReg(inst.r.rs) ^ ReadReg(inst.r.rt); - if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_XOR_(inst.bits, ReadReg(inst.r.rs), ReadReg(inst.r.rt)); - + const u32 rsVal = ReadReg(inst.r.rs); + const u32 rtVal = ReadReg(inst.r.rt); + const u32 new_value = rsVal ^ rtVal; WriteReg(inst.r.rd, new_value); + + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_XOR_(inst.bits, rsVal, rtVal); + else if constexpr (pgxp_mode >= PGXPMode::Memory) + PGXP::TryMove(inst.r.rd, inst.r.rs, inst.r.rt); } break; case InstructionFunct::nor: { - const u32 new_value = ~(ReadReg(inst.r.rs) | ReadReg(inst.r.rt)); - if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_NOR(inst.bits, ReadReg(inst.r.rs), ReadReg(inst.r.rt)); - + const u32 rsVal = ReadReg(inst.r.rs); + const u32 rtVal = ReadReg(inst.r.rt); + const u32 new_value = ~(rsVal | rtVal); WriteReg(inst.r.rd, new_value); + + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_NOR(inst.bits, rsVal, rtVal); } break; case InstructionFunct::add: { - const u32 old_value = ReadReg(inst.r.rs); - const u32 add_value = ReadReg(inst.r.rt); - const u32 new_value = old_value + add_value; - if (AddOverflow(old_value, add_value, new_value)) + const u32 rsVal = ReadReg(inst.r.rs); + const u32 rtVal = ReadReg(inst.r.rt); + const u32 rdVal = rsVal + rtVal; + if (AddOverflow(rsVal, rtVal, rdVal)) { RaiseException(Exception::Ov); return; } + WriteReg(inst.r.rd, rdVal); + if constexpr (pgxp_mode == PGXPMode::CPU) - PGXP::CPU_ADD(inst.bits, ReadReg(inst.r.rs), ReadReg(inst.r.rt)); + PGXP::CPU_ADD(inst.bits, rsVal, rtVal); else if constexpr (pgxp_mode >= PGXPMode::Memory) - { - if (add_value == 0) - { - PGXP::CPU_MOVE((static_cast(inst.r.rd.GetValue()) << 8) | static_cast(inst.r.rs.GetValue()), - old_value); - } - } - - WriteReg(inst.r.rd, new_value); + PGXP::TryMove(inst.r.rd, inst.r.rs, inst.r.rt); } break; case InstructionFunct::addu: { - const u32 old_value = ReadReg(inst.r.rs); - const u32 add_value = ReadReg(inst.r.rt); - const u32 new_value = old_value + add_value; + const u32 rsVal = ReadReg(inst.r.rs); + const u32 rtVal = ReadReg(inst.r.rt); + const u32 rdVal = rsVal + rtVal; + WriteReg(inst.r.rd, rdVal); + if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_ADD(inst.bits, old_value, add_value); + PGXP::CPU_ADD(inst.bits, rsVal, rtVal); else if constexpr (pgxp_mode >= PGXPMode::Memory) - { - if (add_value == 0) - { - PGXP::CPU_MOVE((static_cast(inst.r.rd.GetValue()) << 8) | static_cast(inst.r.rs.GetValue()), - old_value); - } - } - - WriteReg(inst.r.rd, new_value); + PGXP::TryMove(inst.r.rd, inst.r.rs, inst.r.rt); } break; case InstructionFunct::sub: { - const u32 old_value = ReadReg(inst.r.rs); - const u32 sub_value = ReadReg(inst.r.rt); - const u32 new_value = old_value - sub_value; - if (SubOverflow(old_value, sub_value, new_value)) + const u32 rsVal = ReadReg(inst.r.rs); + const u32 rtVal = ReadReg(inst.r.rt); + const u32 rdVal = rsVal - rtVal; + if (SubOverflow(rsVal, rtVal, rdVal)) { RaiseException(Exception::Ov); return; } - if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_SUB(inst.bits, ReadReg(inst.r.rs), ReadReg(inst.r.rt)); + WriteReg(inst.r.rd, rdVal); - WriteReg(inst.r.rd, new_value); + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_SUB(inst.bits, rsVal, rtVal); } break; case InstructionFunct::subu: { - const u32 new_value = ReadReg(inst.r.rs) - ReadReg(inst.r.rt); - if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_SUB(inst.bits, ReadReg(inst.r.rs), ReadReg(inst.r.rt)); + const u32 rsVal = ReadReg(inst.r.rs); + const u32 rtVal = ReadReg(inst.r.rt); + const u32 rdVal = rsVal - rtVal; + WriteReg(inst.r.rd, rdVal); - WriteReg(inst.r.rd, new_value); + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_SUB(inst.bits, rsVal, rtVal); } break; case InstructionFunct::slt: { - const u32 result = BoolToUInt32(static_cast(ReadReg(inst.r.rs)) < static_cast(ReadReg(inst.r.rt))); - if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_SLT(inst.bits, ReadReg(inst.r.rs), ReadReg(inst.r.rt)); - + const u32 rsVal = ReadReg(inst.r.rs); + const u32 rtVal = ReadReg(inst.r.rt); + const u32 result = BoolToUInt32(static_cast(rsVal) < static_cast(rtVal)); WriteReg(inst.r.rd, result); + + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_SLT(inst.bits, rsVal, rtVal); } break; case InstructionFunct::sltu: { - const u32 result = BoolToUInt32(ReadReg(inst.r.rs) < ReadReg(inst.r.rt)); - if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_SLTU(inst.bits, ReadReg(inst.r.rs), ReadReg(inst.r.rt)); - + const u32 rsVal = ReadReg(inst.r.rs); + const u32 rtVal = ReadReg(inst.r.rt); + const u32 result = BoolToUInt32(rsVal < rtVal); WriteReg(inst.r.rd, result); + + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_SLTU(inst.bits, rsVal, rtVal); } break; case InstructionFunct::mfhi: { - if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_MFHI(inst.bits, g_state.regs.hi); + const u32 value = g_state.regs.hi; + WriteReg(inst.r.rd, value); - WriteReg(inst.r.rd, g_state.regs.hi); + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_MOVE(static_cast(inst.r.rd.GetValue()), static_cast(Reg::hi), value); } break; case InstructionFunct::mthi: { const u32 value = ReadReg(inst.r.rs); - if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_MTHI(inst.bits, value); - g_state.regs.hi = value; + + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_MOVE(static_cast(Reg::hi), static_cast(inst.r.rs.GetValue()), value); } break; case InstructionFunct::mflo: { - if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_MFLO(inst.bits, g_state.regs.lo); + const u32 value = g_state.regs.lo; + WriteReg(inst.r.rd, value); - WriteReg(inst.r.rd, g_state.regs.lo); + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_MOVE(static_cast(inst.r.rd.GetValue()), static_cast(Reg::lo), value); } break; case InstructionFunct::mtlo: { const u32 value = ReadReg(inst.r.rs); - if constexpr (pgxp_mode == PGXPMode::CPU) - PGXP::CPU_MTLO(inst.bits, value); - g_state.regs.lo = value; + + if constexpr (pgxp_mode == PGXPMode::CPU) + PGXP::CPU_MOVE(static_cast(Reg::lo), static_cast(inst.r.rs.GetValue()), value); } break; @@ -1174,11 +1188,11 @@ restart_instruction: const u32 rhs = ReadReg(inst.r.rt); const u64 result = ZeroExtend64(lhs) * ZeroExtend64(rhs); - if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_MULTU(inst.bits, lhs, rhs); - g_state.regs.hi = Truncate32(result >> 32); g_state.regs.lo = Truncate32(result); + + if constexpr (pgxp_mode >= PGXPMode::CPU) + PGXP::CPU_MULTU(inst.bits, lhs, rhs); } break; @@ -1285,103 +1299,95 @@ restart_instruction: case InstructionOp::andi: { - const u32 new_value = ReadReg(inst.i.rs) & inst.i.imm_zext32(); + const u32 rsVal = ReadReg(inst.i.rs); + const u32 new_value = rsVal & inst.i.imm_zext32(); + WriteReg(inst.i.rt, new_value); if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_ANDI(inst.bits, ReadReg(inst.i.rs)); - - WriteReg(inst.i.rt, new_value); + PGXP::CPU_ANDI(inst.bits, rsVal); } break; case InstructionOp::ori: { - const u32 new_value = ReadReg(inst.i.rs) | inst.i.imm_zext32(); + const u32 rsVal = ReadReg(inst.i.rs); + const u32 imm = inst.i.imm_zext32(); + const u32 rtVal = rsVal | imm; + WriteReg(inst.i.rt, rtVal); if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_ORI(inst.bits, ReadReg(inst.i.rs)); - - WriteReg(inst.i.rt, new_value); + PGXP::CPU_ORI(inst.bits, rsVal); + else if constexpr (pgxp_mode >= PGXPMode::Memory) + PGXP::TryMoveImm(inst.r.rd, inst.r.rs, imm); } break; case InstructionOp::xori: { - const u32 new_value = ReadReg(inst.i.rs) ^ inst.i.imm_zext32(); + const u32 rsVal = ReadReg(inst.i.rs); + const u32 imm = inst.i.imm_zext32(); + const u32 new_value = ReadReg(inst.i.rs) ^ imm; + WriteReg(inst.i.rt, new_value); if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_XORI(inst.bits, ReadReg(inst.i.rs)); - - WriteReg(inst.i.rt, new_value); + PGXP::CPU_XORI(inst.bits, rsVal); + else if constexpr (pgxp_mode >= PGXPMode::Memory) + PGXP::TryMoveImm(inst.r.rd, inst.r.rs, imm); } break; case InstructionOp::addi: { - const u32 old_value = ReadReg(inst.i.rs); - const u32 add_value = inst.i.imm_sext32(); - const u32 new_value = old_value + add_value; - if (AddOverflow(old_value, add_value, new_value)) + const u32 rsVal = ReadReg(inst.i.rs); + const u32 imm = inst.i.imm_sext32(); + const u32 rtVal = rsVal + imm; + if (AddOverflow(rsVal, imm, rtVal)) { RaiseException(Exception::Ov); return; } + WriteReg(inst.i.rt, rtVal); + if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_ADDI(inst.bits, ReadReg(inst.i.rs)); + PGXP::CPU_ADDI(inst.bits, rsVal); else if constexpr (pgxp_mode >= PGXPMode::Memory) - { - if (add_value == 0) - { - PGXP::CPU_MOVE((static_cast(inst.i.rt.GetValue()) << 8) | static_cast(inst.i.rs.GetValue()), - old_value); - } - } - - WriteReg(inst.i.rt, new_value); + PGXP::TryMoveImm(inst.r.rd, inst.r.rs, imm); } break; case InstructionOp::addiu: { - const u32 old_value = ReadReg(inst.i.rs); - const u32 add_value = inst.i.imm_sext32(); - const u32 new_value = old_value + add_value; + const u32 rsVal = ReadReg(inst.i.rs); + const u32 imm = inst.i.imm_sext32(); + const u32 rtVal = rsVal + imm; + WriteReg(inst.i.rt, rtVal); if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_ADDI(inst.bits, ReadReg(inst.i.rs)); + PGXP::CPU_ADDI(inst.bits, rsVal); else if constexpr (pgxp_mode >= PGXPMode::Memory) - { - if (add_value == 0) - { - PGXP::CPU_MOVE((static_cast(inst.i.rt.GetValue()) << 8) | static_cast(inst.i.rs.GetValue()), - old_value); - } - } - - WriteReg(inst.i.rt, new_value); + PGXP::TryMoveImm(inst.r.rd, inst.r.rs, imm); } break; case InstructionOp::slti: { - const u32 result = BoolToUInt32(static_cast(ReadReg(inst.i.rs)) < static_cast(inst.i.imm_sext32())); + const u32 rsVal = ReadReg(inst.i.rs); + const u32 result = BoolToUInt32(static_cast(rsVal) < static_cast(inst.i.imm_sext32())); + WriteReg(inst.i.rt, result); if constexpr (pgxp_mode >= PGXPMode::CPU) - PGXP::CPU_SLTI(inst.bits, ReadReg(inst.i.rs)); - - WriteReg(inst.i.rt, result); + PGXP::CPU_SLTI(inst.bits, rsVal); } break; case InstructionOp::sltiu: { const u32 result = BoolToUInt32(ReadReg(inst.i.rs) < inst.i.imm_sext32()); + WriteReg(inst.i.rt, result); if constexpr (pgxp_mode >= PGXPMode::CPU) PGXP::CPU_SLTIU(inst.bits, ReadReg(inst.i.rs)); - - WriteReg(inst.i.rt, result); } break; @@ -1671,20 +1677,20 @@ restart_instruction: case CopCommonInstruction::mfcn: { const u32 value = ReadCop0Reg(static_cast(inst.r.rd.GetValue())); + WriteRegDelayed(inst.r.rt, value); if constexpr (pgxp_mode == PGXPMode::CPU) PGXP::CPU_MFC0(inst.bits, value); - - WriteRegDelayed(inst.r.rt, value); } break; case CopCommonInstruction::mtcn: { - WriteCop0Reg(static_cast(inst.r.rd.GetValue()), ReadReg(inst.r.rt)); + const u32 rtVal = ReadReg(inst.r.rt); + WriteCop0Reg(static_cast(inst.r.rd.GetValue()), rtVal); if constexpr (pgxp_mode == PGXPMode::CPU) - PGXP::CPU_MTC0(inst.bits, ReadCop0Reg(static_cast(inst.r.rd.GetValue())), ReadReg(inst.i.rt)); + PGXP::CPU_MTC0(inst.bits, ReadCop0Reg(static_cast(inst.r.rd.GetValue())), rtVal); } break; @@ -2380,7 +2386,6 @@ void CPU::CodeCache::InterpretUncachedBlock() break; } - in_branch_delay_slot = branch; } } @@ -2930,8 +2935,7 @@ bool CPU::SafeWriteMemoryWord(VirtualMemoryAddress addr, u32 value) if ((addr & 3) == 0) return DoSafeMemoryAccess(addr, value); - return SafeWriteMemoryHalfWord(addr, Truncate16(value)) && - SafeWriteMemoryHalfWord(addr + 2, Truncate16(value >> 16)); + return SafeWriteMemoryHalfWord(addr, Truncate16(value)) && SafeWriteMemoryHalfWord(addr + 2, Truncate16(value >> 16)); } void* CPU::GetDirectReadMemoryPointer(VirtualMemoryAddress address, MemoryAccessSize size, TickCount* read_ticks) diff --git a/src/core/cpu_core.h b/src/core/cpu_core.h index 8055c53a0..f93ca9fd5 100644 --- a/src/core/cpu_core.h +++ b/src/core/cpu_core.h @@ -52,13 +52,13 @@ struct PGXP_value float x; float y; float z; + u32 value; union { u32 flags; u8 compFlags[4]; u16 halfFlags[2]; }; - u32 value; }; struct State @@ -107,9 +107,7 @@ struct State std::array scratchpad = {}; - PGXP_value pgxp_gpr[32]; - PGXP_value pgxp_hi; - PGXP_value pgxp_lo; + PGXP_value pgxp_gpr[static_cast(Reg::count)]; PGXP_value pgxp_cop0[32]; PGXP_value pgxp_gte[64]; diff --git a/src/core/cpu_newrec_compiler.cpp b/src/core/cpu_newrec_compiler.cpp index 4bb6c82fe..69f4fe4d2 100644 --- a/src/core/cpu_newrec_compiler.cpp +++ b/src/core/cpu_newrec_compiler.cpp @@ -1728,8 +1728,8 @@ void CPU::NewRec::Compiler::CompileMoveRegTemplate(Reg dst, Reg src, bool pgxp_m if (g_settings.gpu_pgxp_enable && pgxp_move) { // might've been renamed, so use dst here - GeneratePGXPCallWithMIPSRegs(reinterpret_cast(&PGXP::CPU_MOVE), - (static_cast(dst) << 8) | (static_cast(src)), dst); + GeneratePGXPCallWithMIPSRegs(reinterpret_cast(&PGXP::CPU_MOVE_Packed), PGXP::PackMoveArgs(dst, src), + dst); } } diff --git a/src/core/cpu_pgxp.cpp b/src/core/cpu_pgxp.cpp index 81416682a..f46198f36 100644 --- a/src/core/cpu_pgxp.cpp +++ b/src/core/cpu_pgxp.cpp @@ -79,8 +79,8 @@ static void CPU_BITWISE(u32 instr, u32 rdVal, u32 rsVal, u32 rtVal); static void WriteMem(const PGXP_value* value, u32 addr); static void WriteMem16(const PGXP_value* src, u32 addr); -static const PGXP_value PGXP_value_invalid = {0.f, 0.f, 0.f, {0}, 0}; -static const PGXP_value PGXP_value_zero = {0.f, 0.f, 0.f, {VALID_ALL}, 0}; +static const PGXP_value PGXP_value_invalid = {0.f, 0.f, 0.f, 0, {0}}; +static const PGXP_value PGXP_value_zero = {0.f, 0.f, 0.f, 0, {VALID_ALL}}; static PGXP_value* s_mem = nullptr; static PGXP_value* s_vertex_cache = nullptr; @@ -568,11 +568,17 @@ void CPU::PGXP::CPU_SW(u32 instr, u32 addr, u32 rtVal) WriteMem(val, addr); } -void CPU::PGXP::CPU_MOVE(u32 rd_and_rs, u32 rsVal) +void CPU::PGXP::CPU_MOVE_Packed(u32 rd_and_rs, u32 rsVal) { const u32 Rs = (rd_and_rs & 0xFFu); + const u32 Rd = (rd_and_rs >> 8); + CPU_MOVE(Rd, Rs, rsVal); +} + +void CPU::PGXP::CPU_MOVE(u32 Rd, u32 Rs, u32 rsVal) +{ Validate(&g_state.pgxp_gpr[Rs], rsVal); - g_state.pgxp_gpr[(rd_and_rs >> 8)] = g_state.pgxp_gpr[Rs]; + g_state.pgxp_gpr[Rd] = g_state.pgxp_gpr[Rs]; } void CPU::PGXP::CPU_ADDI(u32 instr, u32 rsVal) @@ -1031,9 +1037,9 @@ void CPU::PGXP::CPU_MULT(u32 instr, u32 rsVal, u32 rtVal) MakeValid(&g_state.pgxp_gpr[rt(instr)], rtVal); } - g_state.pgxp_lo = g_state.pgxp_hi = g_state.pgxp_gpr[rs(instr)]; + g_state.pgxp_gpr[static_cast(Reg::lo)] = g_state.pgxp_gpr[static_cast(Reg::hi)] = g_state.pgxp_gpr[rs(instr)]; - g_state.pgxp_lo.halfFlags[0] = g_state.pgxp_hi.halfFlags[0] = + g_state.pgxp_gpr[static_cast(Reg::lo)].halfFlags[0] = g_state.pgxp_gpr[static_cast(Reg::hi)].halfFlags[0] = (g_state.pgxp_gpr[rs(instr)].halfFlags[0] & g_state.pgxp_gpr[rt(instr)].halfFlags[0]); double xx, xy, yx, yy; @@ -1056,15 +1062,15 @@ void CPU::PGXP::CPU_MULT(u32 instr, u32 rsVal, u32 rtVal) hy = f16Overflow(hx); - g_state.pgxp_lo.x = (float)f16Sign(lx); - g_state.pgxp_lo.y = (float)f16Sign(ly); - g_state.pgxp_hi.x = (float)f16Sign(hx); - g_state.pgxp_hi.y = (float)f16Sign(hy); + g_state.pgxp_gpr[static_cast(Reg::lo)].x = (float)f16Sign(lx); + g_state.pgxp_gpr[static_cast(Reg::lo)].y = (float)f16Sign(ly); + g_state.pgxp_gpr[static_cast(Reg::hi)].x = (float)f16Sign(hx); + g_state.pgxp_gpr[static_cast(Reg::hi)].y = (float)f16Sign(hy); // compute PSX value const u64 result = static_cast(static_cast(SignExtend64(rsVal)) * static_cast(SignExtend64(rtVal))); - g_state.pgxp_hi.value = Truncate32(result >> 32); - g_state.pgxp_lo.value = Truncate32(result); + g_state.pgxp_gpr[static_cast(Reg::hi)].value = Truncate32(result >> 32); + g_state.pgxp_gpr[static_cast(Reg::lo)].value = Truncate32(result); } void CPU::PGXP::CPU_MULTU(u32 instr, u32 rsVal, u32 rtVal) @@ -1081,9 +1087,9 @@ void CPU::PGXP::CPU_MULTU(u32 instr, u32 rsVal, u32 rtVal) MakeValid(&g_state.pgxp_gpr[rt(instr)], rtVal); } - g_state.pgxp_lo = g_state.pgxp_hi = g_state.pgxp_gpr[rs(instr)]; + g_state.pgxp_gpr[static_cast(Reg::lo)] = g_state.pgxp_gpr[static_cast(Reg::hi)] = g_state.pgxp_gpr[rs(instr)]; - g_state.pgxp_lo.halfFlags[0] = g_state.pgxp_hi.halfFlags[0] = + g_state.pgxp_gpr[static_cast(Reg::lo)].halfFlags[0] = g_state.pgxp_gpr[static_cast(Reg::hi)].halfFlags[0] = (g_state.pgxp_gpr[rs(instr)].halfFlags[0] & g_state.pgxp_gpr[rt(instr)].halfFlags[0]); double xx, xy, yx, yy; @@ -1106,15 +1112,15 @@ void CPU::PGXP::CPU_MULTU(u32 instr, u32 rsVal, u32 rtVal) hy = f16Overflow(hx); - g_state.pgxp_lo.x = (float)f16Sign(lx); - g_state.pgxp_lo.y = (float)f16Sign(ly); - g_state.pgxp_hi.x = (float)f16Sign(hx); - g_state.pgxp_hi.y = (float)f16Sign(hy); + g_state.pgxp_gpr[static_cast(Reg::lo)].x = (float)f16Sign(lx); + g_state.pgxp_gpr[static_cast(Reg::lo)].y = (float)f16Sign(ly); + g_state.pgxp_gpr[static_cast(Reg::hi)].x = (float)f16Sign(hx); + g_state.pgxp_gpr[static_cast(Reg::hi)].y = (float)f16Sign(hy); // compute PSX value const u64 result = ZeroExtend64(rsVal) * ZeroExtend64(rtVal); - g_state.pgxp_hi.value = Truncate32(result >> 32); - g_state.pgxp_lo.value = Truncate32(result); + g_state.pgxp_gpr[static_cast(Reg::hi)].value = Truncate32(result >> 32); + g_state.pgxp_gpr[static_cast(Reg::lo)].value = Truncate32(result); } void CPU::PGXP::CPU_DIV(u32 instr, u32 rsVal, u32 rtVal) @@ -1132,39 +1138,42 @@ void CPU::PGXP::CPU_DIV(u32 instr, u32 rsVal, u32 rtVal) MakeValid(&g_state.pgxp_gpr[rt(instr)], rtVal); } - g_state.pgxp_lo = g_state.pgxp_hi = g_state.pgxp_gpr[rs(instr)]; + g_state.pgxp_gpr[static_cast(Reg::lo)] = g_state.pgxp_gpr[static_cast(Reg::hi)] = g_state.pgxp_gpr[rs(instr)]; - g_state.pgxp_lo.halfFlags[0] = g_state.pgxp_hi.halfFlags[0] = + g_state.pgxp_gpr[static_cast(Reg::lo)].halfFlags[0] = g_state.pgxp_gpr[static_cast(Reg::hi)].halfFlags[0] = (g_state.pgxp_gpr[rs(instr)].halfFlags[0] & g_state.pgxp_gpr[rt(instr)].halfFlags[0]); double vs = f16Unsign(g_state.pgxp_gpr[rs(instr)].x) + (g_state.pgxp_gpr[rs(instr)].y) * (double)(1 << 16); double vt = f16Unsign(g_state.pgxp_gpr[rt(instr)].x) + (g_state.pgxp_gpr[rt(instr)].y) * (double)(1 << 16); double lo = vs / vt; - g_state.pgxp_lo.y = (float)f16Sign(f16Overflow(lo)); - g_state.pgxp_lo.x = (float)f16Sign(lo); + g_state.pgxp_gpr[static_cast(Reg::lo)].y = (float)f16Sign(f16Overflow(lo)); + g_state.pgxp_gpr[static_cast(Reg::lo)].x = (float)f16Sign(lo); double hi = fmod(vs, vt); - g_state.pgxp_hi.y = (float)f16Sign(f16Overflow(hi)); - g_state.pgxp_hi.x = (float)f16Sign(hi); + g_state.pgxp_gpr[static_cast(Reg::hi)].y = (float)f16Sign(f16Overflow(hi)); + g_state.pgxp_gpr[static_cast(Reg::hi)].x = (float)f16Sign(hi); // compute PSX value if (static_cast(rtVal) == 0) { // divide by zero - g_state.pgxp_lo.value = (static_cast(rsVal) >= 0) ? UINT32_C(0xFFFFFFFF) : UINT32_C(1); - g_state.pgxp_hi.value = static_cast(static_cast(rsVal)); + g_state.pgxp_gpr[static_cast(Reg::lo)].value = + (static_cast(rsVal) >= 0) ? UINT32_C(0xFFFFFFFF) : UINT32_C(1); + g_state.pgxp_gpr[static_cast(Reg::hi)].value = static_cast(static_cast(rsVal)); } else if (rsVal == UINT32_C(0x80000000) && static_cast(rtVal) == -1) { // unrepresentable - g_state.pgxp_lo.value = UINT32_C(0x80000000); - g_state.pgxp_hi.value = 0; + g_state.pgxp_gpr[static_cast(Reg::lo)].value = UINT32_C(0x80000000); + g_state.pgxp_gpr[static_cast(Reg::hi)].value = 0; } else { - g_state.pgxp_lo.value = static_cast(static_cast(rsVal) / static_cast(rtVal)); - g_state.pgxp_hi.value = static_cast(static_cast(rsVal) % static_cast(rtVal)); + g_state.pgxp_gpr[static_cast(Reg::lo)].value = + static_cast(static_cast(rsVal) / static_cast(rtVal)); + g_state.pgxp_gpr[static_cast(Reg::hi)].value = + static_cast(static_cast(rsVal) % static_cast(rtVal)); } } @@ -1183,32 +1192,32 @@ void CPU::PGXP::CPU_DIVU(u32 instr, u32 rsVal, u32 rtVal) MakeValid(&g_state.pgxp_gpr[rt(instr)], rtVal); } - g_state.pgxp_lo = g_state.pgxp_hi = g_state.pgxp_gpr[rs(instr)]; + g_state.pgxp_gpr[static_cast(Reg::lo)] = g_state.pgxp_gpr[static_cast(Reg::hi)] = g_state.pgxp_gpr[rs(instr)]; - g_state.pgxp_lo.halfFlags[0] = g_state.pgxp_hi.halfFlags[0] = + g_state.pgxp_gpr[static_cast(Reg::lo)].halfFlags[0] = g_state.pgxp_gpr[static_cast(Reg::hi)].halfFlags[0] = (g_state.pgxp_gpr[rs(instr)].halfFlags[0] & g_state.pgxp_gpr[rt(instr)].halfFlags[0]); double vs = f16Unsign(g_state.pgxp_gpr[rs(instr)].x) + f16Unsign(g_state.pgxp_gpr[rs(instr)].y) * (double)(1 << 16); double vt = f16Unsign(g_state.pgxp_gpr[rt(instr)].x) + f16Unsign(g_state.pgxp_gpr[rt(instr)].y) * (double)(1 << 16); double lo = vs / vt; - g_state.pgxp_lo.y = (float)f16Sign(f16Overflow(lo)); - g_state.pgxp_lo.x = (float)f16Sign(lo); + g_state.pgxp_gpr[static_cast(Reg::lo)].y = (float)f16Sign(f16Overflow(lo)); + g_state.pgxp_gpr[static_cast(Reg::lo)].x = (float)f16Sign(lo); double hi = fmod(vs, vt); - g_state.pgxp_hi.y = (float)f16Sign(f16Overflow(hi)); - g_state.pgxp_hi.x = (float)f16Sign(hi); + g_state.pgxp_gpr[static_cast(Reg::hi)].y = (float)f16Sign(f16Overflow(hi)); + g_state.pgxp_gpr[static_cast(Reg::hi)].x = (float)f16Sign(hi); if (rtVal == 0) { // divide by zero - g_state.pgxp_lo.value = UINT32_C(0xFFFFFFFF); - g_state.pgxp_hi.value = rsVal; + g_state.pgxp_gpr[static_cast(Reg::lo)].value = UINT32_C(0xFFFFFFFF); + g_state.pgxp_gpr[static_cast(Reg::hi)].value = rsVal; } else { - g_state.pgxp_lo.value = rsVal / rtVal; - g_state.pgxp_hi.value = rsVal % rtVal; + g_state.pgxp_gpr[static_cast(Reg::lo)].value = rsVal / rtVal; + g_state.pgxp_gpr[static_cast(Reg::hi)].value = rsVal % rtVal; } } @@ -1575,38 +1584,6 @@ void CPU::PGXP::CPU_SRAV(u32 instr, u32 rtVal, u32 rsVal) g_state.pgxp_gpr[rd(instr)] = ret; } -void CPU::PGXP::CPU_MFHI(u32 instr, u32 hiVal) -{ - // Rd = Hi - Validate(&g_state.pgxp_hi, hiVal); - - g_state.pgxp_gpr[rd(instr)] = g_state.pgxp_hi; -} - -void CPU::PGXP::CPU_MTHI(u32 instr, u32 rsVal) -{ - // Hi = Rd - Validate(&g_state.pgxp_gpr[rs(instr)], rsVal); - - g_state.pgxp_hi = g_state.pgxp_gpr[rd(instr)]; -} - -void CPU::PGXP::CPU_MFLO(u32 instr, u32 loVal) -{ - // Rd = Lo - Validate(&g_state.pgxp_lo, loVal); - - g_state.pgxp_gpr[rd(instr)] = g_state.pgxp_lo; -} - -void CPU::PGXP::CPU_MTLO(u32 instr, u32 rsVal) -{ - // Lo = Rd - Validate(&g_state.pgxp_gpr[rs(instr)], rsVal); - - g_state.pgxp_lo = g_state.pgxp_gpr[rd(instr)]; -} - void CPU::PGXP::CPU_MFC0(u32 instr, u32 rdVal) { // CPU[Rt] = CP0[Rd] diff --git a/src/core/cpu_pgxp.h b/src/core/cpu_pgxp.h index 39d315058..a8305a56f 100644 --- a/src/core/cpu_pgxp.h +++ b/src/core/cpu_pgxp.h @@ -2,7 +2,7 @@ // SPDX-License-Identifier: GPL-2.0+ #pragma once -#include "types.h" +#include "cpu_core.h" namespace CPU::PGXP { @@ -34,7 +34,13 @@ void CPU_LBx(u32 instr, u32 addr, u32 rtVal); void CPU_SB(u32 instr, u32 addr, u32 rtVal); void CPU_SH(u32 instr, u32 addr, u32 rtVal); void CPU_SW(u32 instr, u32 addr, u32 rtVal); -void CPU_MOVE(u32 rd_and_rs, u32 rsVal); +void CPU_MOVE(u32 Rd, u32 Rs, u32 rsVal); + +ALWAYS_INLINE static u32 PackMoveArgs(Reg rd, Reg rs) +{ + return (static_cast(rd) << 8) | static_cast(rs); +} +void CPU_MOVE_Packed(u32 rd_and_rs, u32 rsVal); // Arithmetic with immediate value void CPU_ADDI(u32 instr, u32 rsVal); @@ -73,14 +79,30 @@ void CPU_SLLV(u32 instr, u32 rtVal, u32 rsVal); void CPU_SRLV(u32 instr, u32 rtVal, u32 rsVal); void CPU_SRAV(u32 instr, u32 rtVal, u32 rsVal); -// Move registers -void CPU_MFHI(u32 instr, u32 hiVal); -void CPU_MTHI(u32 instr, u32 rsVal); -void CPU_MFLO(u32 instr, u32 loVal); -void CPU_MTLO(u32 instr, u32 rsVal); - // CP0 Data transfer tracking void CPU_MFC0(u32 instr, u32 rdVal); void CPU_MTC0(u32 instr, u32 rdVal, u32 rtVal); -} // namespace PGXP \ No newline at end of file +ALWAYS_INLINE void TryMove(Reg rd, Reg rs, Reg rt) +{ + u32 src; + if (rs == Reg::zero) + src = static_cast(rt); + else if (rt == Reg::zero) + src = static_cast(rs); + else + return; + + CPU_MOVE(static_cast(rd), src, g_state.regs.r[src]); +} + +ALWAYS_INLINE void TryMoveImm(Reg rd, Reg rs, u32 imm) +{ + if (imm == 0) + { + const u32 src = static_cast(rs); + CPU_MOVE(static_cast(rd), src, g_state.regs.r[src]); + } +} + +} // namespace CPU::PGXP \ No newline at end of file diff --git a/src/core/cpu_recompiler_code_generator.cpp b/src/core/cpu_recompiler_code_generator.cpp index 5c6743047..e2ff21631 100644 --- a/src/core/cpu_recompiler_code_generator.cpp +++ b/src/core/cpu_recompiler_code_generator.cpp @@ -6,8 +6,8 @@ #include "cpu_core.h" #include "cpu_core_private.h" #include "cpu_disasm.h" -#include "gte.h" #include "cpu_pgxp.h" +#include "gte.h" #include "settings.h" Log_SetChannel(CPU::Recompiler); @@ -1269,6 +1269,13 @@ bool CodeGenerator::Compile_Bitwise(Instruction instruction, const CodeCache::In result = OrValues(lhs, rhs); if (spec_lhs && spec_rhs) spec_value = *spec_lhs | *spec_rhs; + + if (g_settings.gpu_pgxp_enable && !g_settings.gpu_pgxp_cpu && dest != Reg::zero && + instruction.i.rs != Reg::zero && dest != instruction.i.rs && rhs.HasConstantValue(0)) + { + EmitFunctionCall(nullptr, &PGXP::CPU_MOVE_Packed, + Value::FromConstantU32(PGXP::PackMoveArgs(dest, instruction.i.rs)), lhs); + } } break; @@ -1291,6 +1298,13 @@ bool CodeGenerator::Compile_Bitwise(Instruction instruction, const CodeCache::In result = XorValues(lhs, rhs); if (spec_lhs && spec_rhs) spec_value = *spec_lhs ^ *spec_rhs; + + if (g_settings.gpu_pgxp_enable && !g_settings.gpu_pgxp_cpu && dest != Reg::zero && + instruction.i.rs != Reg::zero && dest != instruction.i.rs && rhs.HasConstantValue(0)) + { + EmitFunctionCall(nullptr, &PGXP::CPU_MOVE_Packed, + Value::FromConstantU32(PGXP::PackMoveArgs(dest, instruction.i.rs)), lhs); + } } break; @@ -1306,6 +1320,16 @@ bool CodeGenerator::Compile_Bitwise(Instruction instruction, const CodeCache::In result = OrValues(lhs, rhs); if (spec_lhs && spec_rhs) spec_value = *spec_lhs | *spec_rhs; + + if (g_settings.gpu_pgxp_enable && !g_settings.gpu_pgxp_cpu && dest != Reg::zero && + ((lhs.HasConstantValue(0) && instruction.r.rt != Reg::zero && dest != instruction.r.rs) || + (rhs.HasConstantValue(0) && instruction.r.rs != Reg::zero && dest != instruction.r.rt))) + { + EmitFunctionCall(nullptr, &PGXP::CPU_MOVE_Packed, + Value::FromConstantU32( + PGXP::PackMoveArgs(dest, lhs.HasConstantValue(0) ? instruction.r.rt : instruction.r.rs)), + lhs.HasConstantValue(0) ? rhs : lhs); + } } break; @@ -1328,6 +1352,16 @@ bool CodeGenerator::Compile_Bitwise(Instruction instruction, const CodeCache::In result = XorValues(lhs, rhs); if (spec_lhs && spec_rhs) spec_value = *spec_lhs ^ *spec_rhs; + + if (g_settings.gpu_pgxp_enable && !g_settings.gpu_pgxp_cpu && dest != Reg::zero && + ((lhs.HasConstantValue(0) && instruction.r.rt != Reg::zero && dest != instruction.r.rs) || + (rhs.HasConstantValue(0) && instruction.r.rs != Reg::zero && dest != instruction.r.rt))) + { + EmitFunctionCall(nullptr, &PGXP::CPU_MOVE_Packed, + Value::FromConstantU32( + PGXP::PackMoveArgs(dest, lhs.HasConstantValue(0) ? instruction.r.rt : instruction.r.rs)), + lhs.HasConstantValue(0) ? rhs : lhs); + } } break; @@ -1787,7 +1821,10 @@ bool CodeGenerator::Compile_MoveHiLo(Instruction instruction, const CodeCache::I { Value hi = m_register_cache.ReadGuestRegister(Reg::hi); if (g_settings.UsingPGXPCPUMode()) - EmitFunctionCall(nullptr, &PGXP::CPU_MFHI, Value::FromConstantU32(instruction.bits), hi); + { + EmitFunctionCall(nullptr, &PGXP::CPU_MOVE_Packed, + Value::FromConstantU32(PGXP::PackMoveArgs(instruction.r.rd, Reg::hi)), hi); + } m_register_cache.WriteGuestRegister(instruction.r.rd, std::move(hi)); SpeculativeWriteReg(instruction.r.rd, std::nullopt); @@ -1798,7 +1835,10 @@ bool CodeGenerator::Compile_MoveHiLo(Instruction instruction, const CodeCache::I { Value rs = m_register_cache.ReadGuestRegister(instruction.r.rs); if (g_settings.UsingPGXPCPUMode()) - EmitFunctionCall(nullptr, &PGXP::CPU_MTHI, Value::FromConstantU32(instruction.bits), rs); + { + EmitFunctionCall(nullptr, &PGXP::CPU_MOVE_Packed, + Value::FromConstantU32(PGXP::PackMoveArgs(Reg::hi, instruction.r.rs)), rs); + } m_register_cache.WriteGuestRegister(Reg::hi, std::move(rs)); } @@ -1808,7 +1848,10 @@ bool CodeGenerator::Compile_MoveHiLo(Instruction instruction, const CodeCache::I { Value lo = m_register_cache.ReadGuestRegister(Reg::lo); if (g_settings.UsingPGXPCPUMode()) - EmitFunctionCall(nullptr, &PGXP::CPU_MFLO, Value::FromConstantU32(instruction.bits), lo); + { + EmitFunctionCall(nullptr, &PGXP::CPU_MOVE_Packed, + Value::FromConstantU32(PGXP::PackMoveArgs(instruction.r.rd, Reg::lo)), lo); + } m_register_cache.WriteGuestRegister(instruction.r.rd, std::move(lo)); SpeculativeWriteReg(instruction.r.rd, std::nullopt); @@ -1819,7 +1862,10 @@ bool CodeGenerator::Compile_MoveHiLo(Instruction instruction, const CodeCache::I { Value rs = m_register_cache.ReadGuestRegister(instruction.r.rs); if (g_settings.UsingPGXPCPUMode()) - EmitFunctionCall(nullptr, &PGXP::CPU_MTLO, Value::FromConstantU32(instruction.bits), rs); + { + EmitFunctionCall(nullptr, &PGXP::CPU_MOVE_Packed, + Value::FromConstantU32(PGXP::PackMoveArgs(Reg::lo, instruction.r.rs)), rs); + } m_register_cache.WriteGuestRegister(Reg::lo, std::move(rs)); } @@ -1842,7 +1888,6 @@ bool CodeGenerator::Compile_Add(Instruction instruction, const CodeCache::Instru instruction.r.funct == InstructionFunct::add)); Value lhs, rhs; - Reg lhs_src; SpeculativeValue lhs_spec, rhs_spec; Reg dest; @@ -1853,7 +1898,6 @@ bool CodeGenerator::Compile_Add(Instruction instruction, const CodeCache::Instru { // rt <- rs + sext(imm) dest = instruction.i.rt; - lhs_src = instruction.i.rs; lhs = m_register_cache.ReadGuestRegister(instruction.i.rs); rhs = Value::FromConstantU32(instruction.i.imm_sext32()); @@ -1866,7 +1910,6 @@ bool CodeGenerator::Compile_Add(Instruction instruction, const CodeCache::Instru { Assert(instruction.r.funct == InstructionFunct::add || instruction.r.funct == InstructionFunct::addu); dest = instruction.r.rd; - lhs_src = instruction.r.rs; lhs = m_register_cache.ReadGuestRegister(instruction.r.rs); rhs = m_register_cache.ReadGuestRegister(instruction.r.rt); lhs_spec = SpeculativeReadReg(instruction.r.rs); @@ -1880,17 +1923,38 @@ bool CodeGenerator::Compile_Add(Instruction instruction, const CodeCache::Instru } // detect register moves and handle them for pgxp - if (g_settings.gpu_pgxp_enable && rhs.HasConstantValue(0)) - { - EmitFunctionCall(nullptr, &PGXP::CPU_MOVE, - Value::FromConstantU32((static_cast(dest) << 8) | (static_cast(lhs_src))), lhs); - } - else if (g_settings.UsingPGXPCPUMode()) + if (dest != Reg::zero && g_settings.gpu_pgxp_enable) { + bool handled = false; if (instruction.op != InstructionOp::funct) - EmitFunctionCall(nullptr, &PGXP::CPU_ADDI, Value::FromConstantU32(instruction.bits), lhs); + { + if (g_settings.gpu_pgxp_enable && !g_settings.gpu_pgxp_cpu && instruction.i.rs != Reg::zero && + dest != instruction.i.rs && rhs.HasConstantValue(0)) + { + handled = true; + EmitFunctionCall(nullptr, &PGXP::CPU_MOVE_Packed, + Value::FromConstantU32(PGXP::PackMoveArgs(dest, instruction.i.rs)), lhs); + } + } else - EmitFunctionCall(nullptr, &PGXP::CPU_ADD, Value::FromConstantU32(instruction.bits), lhs, rhs); + { + if (g_settings.gpu_pgxp_enable && !g_settings.gpu_pgxp_cpu && + ((lhs.HasConstantValue(0) && instruction.r.rt != Reg::zero && dest != instruction.r.rs) || + (rhs.HasConstantValue(0) && instruction.r.rs != Reg::zero && dest != instruction.r.rt))) + { + handled = true; + EmitFunctionCall(nullptr, &PGXP::CPU_MOVE_Packed, + Value::FromConstantU32(PGXP::PackMoveArgs(dest, instruction.i.rs)), lhs); + } + } + + if (g_settings.gpu_pgxp_cpu && !handled) + { + if (instruction.op != InstructionOp::funct) + EmitFunctionCall(nullptr, &PGXP::CPU_ADDI, Value::FromConstantU32(instruction.bits), lhs); + else + EmitFunctionCall(nullptr, &PGXP::CPU_ADD, Value::FromConstantU32(instruction.bits), lhs, rhs); + } } Value result = AddValues(lhs, rhs, check_overflow);