From d4775e097e747801208a3db41e321ffe71e9c6c8 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Tue, 20 Aug 2024 17:23:36 +1000 Subject: [PATCH] CPU/PGXP: Combine logical/arithmetic shift functions --- src/core/cpu_pgxp.cpp | 105 ++++++++++-------------------------------- 1 file changed, 25 insertions(+), 80 deletions(-) diff --git a/src/core/cpu_pgxp.cpp b/src/core/cpu_pgxp.cpp index 34f35f550..3811f8947 100644 --- a/src/core/cpu_pgxp.cpp +++ b/src/core/cpu_pgxp.cpp @@ -90,8 +90,7 @@ static void ValidateAndLoadMem16(PGXP_value* dest, u32 addr, u32 value, bool sig static void CPU_MTC2_int(const PGXP_value& value, u32 reg, u32 val); static void CPU_BITWISE(Instruction instr, u32 rdVal, u32 rsVal, u32 rtVal); static void CPU_SLL(Instruction instr, u32 rtVal, u32 sh); -static void CPU_SRL(Instruction instr, u32 rtVal, u32 sh); -static void CPU_SRA(Instruction instr, u32 rtVal, u32 sh, bool is_variable); +static void CPU_SRx(Instruction instr, u32 rtVal, u32 sh, bool sign, bool is_variable); static void WriteMem(u32 addr, const PGXP_value& value); static void WriteMem16(u32 addr, const PGXP_value& value); @@ -1384,23 +1383,23 @@ void CPU::PGXP::CPU_SLLV(Instruction instr, u32 rtVal, u32 rsVal) CPU_SLL(instr, rtVal, sh); } -ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_SRL(Instruction instr, u32 rtVal, u32 sh) +ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_SRx(Instruction instr, u32 rtVal, u32 sh, bool sign, bool is_variable) { - const u32 rdVal = rtVal >> sh; + const u32 rdVal = sign ? static_cast(static_cast(rtVal) >> sh) : (rtVal >> sh); PGXP_value& prtVal = ValidateAndGetRtValue(instr, rtVal); double x = prtVal.x; - double y = f16Unsign(prtVal.y); + double y = sign ? prtVal.y : f16Unsign(prtVal.y); const u32 iX = SignExtend32(LOWORD_S16(rtVal)); // remove Y const u32 iY = SET_LOWORD(rtVal, HIWORD_U16(iX)); // overwrite x with sign(x) // Shift test values const u32 dX = static_cast(static_cast(iX) >> sh); - const u32 dY = iY >> sh; + const u32 dY = sign ? static_cast(static_cast(iY) >> sh) : (iY >> sh); if (LOWORD_S16(dX) != HIWORD_S16(iX)) - x = x / (1 << sh); + x = x / static_cast(1 << sh); else x = LOWORD_S16(dX); // only sign bits left @@ -1412,20 +1411,20 @@ ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_SRL(Instruction instr, u32 rtVal, u32 } else if (sh < 16) { - x += y * static_cast(1 << (16 - sh)); - if (GetRtValue(instr).x < 0) - x += static_cast(1 << (16 - sh)); + x += y * static_cast(1 << (16 - sh)); + if (prtVal.x < 0) + x += static_cast(1 << (16 - sh)); } else { - x += y / static_cast(1 << (sh - 16)); + x += y / static_cast(1 << (sh - 16)); } } if ((HIWORD_S16(dY) == 0) || (HIWORD_S16(dY) == -1)) y = HIWORD_S16(dY); else - y = y / static_cast(1 << sh); + y = y / static_cast(1 << sh); PGXP_value& prdVal = GetRdValue(instr); prdVal = prtVal; @@ -1433,6 +1432,16 @@ ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_SRL(Instruction instr, u32 rtVal, u32 prdVal.y = static_cast(f16Sign(y)); prdVal.value = rdVal; prdVal.flags |= VALID_TAINTED_Z; + + // Use low precision/rounded values when we're not shifting an entire component, + // and it's not originally from a 3D value. Too many false positives in P2/etc. + // What we probably should do is not set the valid flag on non-3D values to begin + // with, only letting them become valid when used in another expression. + if (sign && !is_variable && !(prdVal.flags & VALID_Z) && sh < 16) + { + prdVal.flags = 0; + MakeValid(&prdVal, rdVal); + } } void CPU::PGXP::CPU_SRL(Instruction instr, u32 rtVal) @@ -1441,7 +1450,7 @@ void CPU::PGXP::CPU_SRL(Instruction instr, u32 rtVal) // Rd = Rt >> Sa const u32 sh = instr.r.shamt; - CPU_SRL(instr, rtVal, sh); + CPU_SRx(instr, rtVal, sh, false, false); } void CPU::PGXP::CPU_SRLV(Instruction instr, u32 rtVal, u32 rsVal) @@ -1450,71 +1459,7 @@ void CPU::PGXP::CPU_SRLV(Instruction instr, u32 rtVal, u32 rsVal) // Rd = Rt >> Sa const u32 sh = rsVal & 0x1F; - CPU_SRL(instr, rtVal, sh); -} - -void CPU::PGXP::CPU_SRA(Instruction instr, u32 rtVal, u32 sh, bool is_variable) -{ - const u32 rdVal = static_cast(static_cast(rtVal) >> sh); - PGXP_value& prtVal = ValidateAndGetRtValue(instr, rtVal); - - double x = prtVal.x; - double y = prtVal.y; - - const u32 iX = SignExtend32(LOWORD_S16(rtVal)); // remove Y - const u32 iY = SET_LOWORD(rtVal, HIWORD_U16(iX)); // overwrite x with sign(x) - - // Shift test values - const u32 dX = static_cast(static_cast(iX) >> sh); - const u32 dY = static_cast(static_cast(iY) >> sh); - - if (LOWORD_S16(dX) != HIWORD_S16(iX)) - x = x / (1 << sh); - else - x = LOWORD_S16(dX); // only sign bits left - - if (LOWORD_S16(dY) != HIWORD_S16(iX)) - { - if (sh == 16) - { - x = y; - } - else if (sh < 16) - { - x += y * (1 << (16 - sh)); - if (GetRtValue(instr).x < 0) - x += 1 << (16 - sh); - } - else - { - x += y / (1 << (sh - 16)); - } - } - - if ((HIWORD_S16(dY) == 0) || (HIWORD_S16(dY) == -1)) - y = HIWORD_S16(dY); - else - y = y / (1 << sh); - - x = f16Sign(x); - y = f16Sign(y); - - PGXP_value& prdVal = GetRdValue(instr); - prdVal = prtVal; - prdVal.x = static_cast(x); - prdVal.y = static_cast(y); - prdVal.value = rdVal; - prdVal.flags |= VALID_TAINTED_Z; - - // Use low precision/rounded values when we're not shifting an entire component, - // and it's not originally from a 3D value. Too many false positives in P2/etc. - // What we probably should do is not set the valid flag on non-3D values to begin - // with, only letting them become valid when used in another expression. - if (!is_variable && !(prdVal.flags & VALID_Z) && sh < 16) - { - prdVal.flags = 0; - MakeValid(&prdVal, rdVal); - } + CPU_SRx(instr, rtVal, sh, false, true); } void CPU::PGXP::CPU_SRA(Instruction instr, u32 rtVal) @@ -1523,7 +1468,7 @@ void CPU::PGXP::CPU_SRA(Instruction instr, u32 rtVal) // Rd = Rt >> Sa const u32 sh = instr.r.shamt; - CPU_SRA(instr, rtVal, sh, false); + CPU_SRx(instr, rtVal, sh, true, false); } void CPU::PGXP::CPU_SRAV(Instruction instr, u32 rtVal, u32 rsVal) @@ -1532,7 +1477,7 @@ void CPU::PGXP::CPU_SRAV(Instruction instr, u32 rtVal, u32 rsVal) // Rd = Rt >> Sa const u32 sh = rsVal & 0x1F; - CPU_SRA(instr, rtVal, sh, true); + CPU_SRx(instr, rtVal, sh, true, true); } void CPU::PGXP::CPU_MFC0(Instruction instr, u32 rdVal)