From d4775e097e747801208a3db41e321ffe71e9c6c8 Mon Sep 17 00:00:00 2001
From: Stenzek <stenzek@gmail.com>
Date: Tue, 20 Aug 2024 17:23:36 +1000
Subject: [PATCH] CPU/PGXP: Combine logical/arithmetic shift functions

---
 src/core/cpu_pgxp.cpp | 105 ++++++++++--------------------------------
 1 file changed, 25 insertions(+), 80 deletions(-)
diff --git a/src/core/cpu_pgxp.cpp b/src/core/cpu_pgxp.cpp
index 34f35f550..3811f8947 100644
--- a/src/core/cpu_pgxp.cpp
+++ b/src/core/cpu_pgxp.cpp
@@ -90,8 +90,7 @@ static void ValidateAndLoadMem16(PGXP_value* dest, u32 addr, u32 value, bool sig
 static void CPU_MTC2_int(const PGXP_value& value, u32 reg, u32 val);
 static void CPU_BITWISE(Instruction instr, u32 rdVal, u32 rsVal, u32 rtVal);
 static void CPU_SLL(Instruction instr, u32 rtVal, u32 sh);
-static void CPU_SRL(Instruction instr, u32 rtVal, u32 sh);
-static void CPU_SRA(Instruction instr, u32 rtVal, u32 sh, bool is_variable);
+static void CPU_SRx(Instruction instr, u32 rtVal, u32 sh, bool sign, bool is_variable);
 
 static void WriteMem(u32 addr, const PGXP_value& value);
 static void WriteMem16(u32 addr, const PGXP_value& value);
@@ -1384,23 +1383,23 @@ void CPU::PGXP::CPU_SLLV(Instruction instr, u32 rtVal, u32 rsVal)
   CPU_SLL(instr, rtVal, sh);
 }
 
-ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_SRL(Instruction instr, u32 rtVal, u32 sh)
+ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_SRx(Instruction instr, u32 rtVal, u32 sh, bool sign, bool is_variable)
 {
-  const u32 rdVal = rtVal >> sh;
+  const u32 rdVal = sign ? static_cast<u32>(static_cast<s32>(rtVal) >> sh) : (rtVal >> sh);
   PGXP_value& prtVal = ValidateAndGetRtValue(instr, rtVal);
 
   double x = prtVal.x;
-  double y = f16Unsign(prtVal.y);
+  double y = sign ? prtVal.y : f16Unsign(prtVal.y);
 
   const u32 iX = SignExtend32(LOWORD_S16(rtVal));   // remove Y
   const u32 iY = SET_LOWORD(rtVal, HIWORD_U16(iX)); // overwrite x with sign(x)
 
   // Shift test values
   const u32 dX = static_cast<u32>(static_cast<s32>(iX) >> sh);
-  const u32 dY = iY >> sh;
+  const u32 dY = sign ? static_cast<u32>(static_cast<s32>(iY) >> sh) : (iY >> sh);
 
   if (LOWORD_S16(dX) != HIWORD_S16(iX))
-    x = x / (1 << sh);
+    x = x / static_cast<double>(1 << sh);
   else
     x = LOWORD_S16(dX); // only sign bits left
 
@@ -1412,20 +1411,20 @@ ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_SRL(Instruction instr, u32 rtVal, u32
     }
     else if (sh < 16)
     {
-      x += y * static_cast<float>(1 << (16 - sh));
-      if (GetRtValue(instr).x < 0)
-        x += static_cast<float>(1 << (16 - sh));
+      x += y * static_cast<double>(1 << (16 - sh));
+      if (prtVal.x < 0)
+        x += static_cast<double>(1 << (16 - sh));
     }
     else
     {
-      x += y / static_cast<float>(1 << (sh - 16));
+      x += y / static_cast<double>(1 << (sh - 16));
     }
   }
 
   if ((HIWORD_S16(dY) == 0) || (HIWORD_S16(dY) == -1))
     y = HIWORD_S16(dY);
   else
-    y = y / static_cast<float>(1 << sh);
+    y = y / static_cast<double>(1 << sh);
 
   PGXP_value& prdVal = GetRdValue(instr);
   prdVal = prtVal;
@@ -1433,6 +1432,16 @@ ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_SRL(Instruction instr, u32 rtVal, u32
   prdVal.y = static_cast<float>(f16Sign(y));
   prdVal.value = rdVal;
   prdVal.flags |= VALID_TAINTED_Z;
+
+  // Use low precision/rounded values when we're not shifting an entire component,
+  // and it's not originally from a 3D value. Too many false positives in P2/etc.
+  // What we probably should do is not set the valid flag on non-3D values to begin
+  // with, only letting them become valid when used in another expression.
+  if (sign && !is_variable && !(prdVal.flags & VALID_Z) && sh < 16)
+  {
+    prdVal.flags = 0;
+    MakeValid(&prdVal, rdVal);
+  }
 }
 
 void CPU::PGXP::CPU_SRL(Instruction instr, u32 rtVal)
@@ -1441,7 +1450,7 @@ void CPU::PGXP::CPU_SRL(Instruction instr, u32 rtVal)
 
   // Rd = Rt >> Sa
   const u32 sh = instr.r.shamt;
-  CPU_SRL(instr, rtVal, sh);
+  CPU_SRx(instr, rtVal, sh, false, false);
 }
 
 void CPU::PGXP::CPU_SRLV(Instruction instr, u32 rtVal, u32 rsVal)
@@ -1450,71 +1459,7 @@ void CPU::PGXP::CPU_SRLV(Instruction instr, u32 rtVal, u32 rsVal)
 
   // Rd = Rt >> Sa
   const u32 sh = rsVal & 0x1F;
-  CPU_SRL(instr, rtVal, sh);
-}
-
-void CPU::PGXP::CPU_SRA(Instruction instr, u32 rtVal, u32 sh, bool is_variable)
-{
-  const u32 rdVal = static_cast<u32>(static_cast<s32>(rtVal) >> sh);
-  PGXP_value& prtVal = ValidateAndGetRtValue(instr, rtVal);
-
-  double x = prtVal.x;
-  double y = prtVal.y;
-
-  const u32 iX = SignExtend32(LOWORD_S16(rtVal));   // remove Y
-  const u32 iY = SET_LOWORD(rtVal, HIWORD_U16(iX)); // overwrite x with sign(x)
-
-  // Shift test values
-  const u32 dX = static_cast<u32>(static_cast<s32>(iX) >> sh);
-  const u32 dY = static_cast<u32>(static_cast<s32>(iY) >> sh);
-
-  if (LOWORD_S16(dX) != HIWORD_S16(iX))
-    x = x / (1 << sh);
-  else
-    x = LOWORD_S16(dX); // only sign bits left
-
-  if (LOWORD_S16(dY) != HIWORD_S16(iX))
-  {
-    if (sh == 16)
-    {
-      x = y;
-    }
-    else if (sh < 16)
-    {
-      x += y * (1 << (16 - sh));
-      if (GetRtValue(instr).x < 0)
-        x += 1 << (16 - sh);
-    }
-    else
-    {
-      x += y / (1 << (sh - 16));
-    }
-  }
-
-  if ((HIWORD_S16(dY) == 0) || (HIWORD_S16(dY) == -1))
-    y = HIWORD_S16(dY);
-  else
-    y = y / (1 << sh);
-
-  x = f16Sign(x);
-  y = f16Sign(y);
-
-  PGXP_value& prdVal = GetRdValue(instr);
-  prdVal = prtVal;
-  prdVal.x = static_cast<float>(x);
-  prdVal.y = static_cast<float>(y);
-  prdVal.value = rdVal;
-  prdVal.flags |= VALID_TAINTED_Z;
-
-  // Use low precision/rounded values when we're not shifting an entire component,
-  // and it's not originally from a 3D value. Too many false positives in P2/etc.
-  // What we probably should do is not set the valid flag on non-3D values to begin
-  // with, only letting them become valid when used in another expression.
-  if (!is_variable && !(prdVal.flags & VALID_Z) && sh < 16)
-  {
-    prdVal.flags = 0;
-    MakeValid(&prdVal, rdVal);
-  }
+  CPU_SRx(instr, rtVal, sh, false, true);
 }
 
 void CPU::PGXP::CPU_SRA(Instruction instr, u32 rtVal)
@@ -1523,7 +1468,7 @@ void CPU::PGXP::CPU_SRA(Instruction instr, u32 rtVal)
 
   // Rd = Rt >> Sa
   const u32 sh = instr.r.shamt;
-  CPU_SRA(instr, rtVal, sh, false);
+  CPU_SRx(instr, rtVal, sh, true, false);
 }
 
 void CPU::PGXP::CPU_SRAV(Instruction instr, u32 rtVal, u32 rsVal)
@@ -1532,7 +1477,7 @@ void CPU::PGXP::CPU_SRAV(Instruction instr, u32 rtVal, u32 rsVal)
 
   // Rd = Rt >> Sa
   const u32 sh = rsVal & 0x1F;
-  CPU_SRA(instr, rtVal, sh, true);
+  CPU_SRx(instr, rtVal, sh, true, true);
 }
 
 void CPU::PGXP::CPU_MFC0(Instruction instr, u32 rdVal)