CPU/PGXP: Refactor add/sub for small speedup

This commit is contained in:
Stenzek 2024-07-10 20:30:35 +10:00
parent e5c62b8baf
commit 11f7bfc461
No known key found for this signature in database

View file

@ -946,47 +946,42 @@ void CPU::PGXP::CPU_ADD(u32 instr, u32 rsVal, u32 rtVal)
PGXP_value& prsVal = g_state.pgxp_gpr[rs(instr)]; PGXP_value& prsVal = g_state.pgxp_gpr[rs(instr)];
PGXP_value& prtVal = g_state.pgxp_gpr[rt(instr)]; PGXP_value& prtVal = g_state.pgxp_gpr[rt(instr)];
PGXP_value& prdVal = g_state.pgxp_gpr[rd(instr)];
// Rd = Rs + Rt (signed) // Rd = Rs + Rt (signed)
Validate(&prsVal, rsVal); Validate(&prsVal, rsVal);
Validate(&prtVal, rtVal); Validate(&prtVal, rtVal);
PGXP_value ret;
if (rtVal == 0) if (rtVal == 0)
{ {
ret = prsVal; prdVal = prsVal;
CopyZIfMissing(ret, prtVal); CopyZIfMissing(prdVal, prtVal);
} }
else if (rsVal == 0) else if (rsVal == 0)
{ {
ret = prtVal; prdVal = prtVal;
CopyZIfMissing(ret, prsVal); CopyZIfMissing(prdVal, prsVal);
} }
else else
{ {
ret = prsVal; const double x = f16Unsign(prsVal.GetValidX(rsVal)) + f16Unsign(prtVal.GetValidX(rtVal));
ret.x = (float)f16Unsign(prsVal.GetValidX(rsVal));
ret.x += (float)f16Unsign(prtVal.GetValidX(rtVal));
// carry on over/underflow // carry on over/underflow
float of = (ret.x > USHRT_MAX) ? 1.f : (ret.x < 0) ? -1.f : 0.f; const float of = (x > USHRT_MAX) ? 1.f : (x < 0) ? -1.f : 0.f;
ret.x = (float)f16Sign(ret.x); prdVal.x = static_cast<float>(f16Sign(x));
// ret.x -= of * (USHRT_MAX + 1); // prdVal.x -= of * (USHRT_MAX + 1);
ret.y = prsVal.GetValidY(rsVal) + prtVal.GetValidY(rtVal) + of; prdVal.y = prsVal.GetValidY(rsVal) + prtVal.GetValidY(rtVal) + of;
// truncate on overflow/underflow // truncate on overflow/underflow
ret.y += (ret.y > SHRT_MAX) ? -(USHRT_MAX + 1) : (ret.y < SHRT_MIN) ? USHRT_MAX + 1 : 0.f; prdVal.y += (prdVal.y > SHRT_MAX) ? -(USHRT_MAX + 1) : (prdVal.y < SHRT_MIN) ? USHRT_MAX + 1 : 0.f;
prdVal.value = rsVal + rtVal;
// valid x/y only if one side had a valid x/y // valid x/y only if one side had a valid x/y
ret.flags |= (prtVal.flags & VALID_XY); prdVal.flags = prsVal.flags | (prtVal.flags & VALID_XY) | VALID_TAINTED_Z;
SelectZ(ret, ret, prtVal); SelectZ(prdVal, prsVal, prtVal);
ret.flags |= VALID_TAINTED_Z;
} }
ret.value = rsVal + rtVal;
g_state.pgxp_gpr[rd(instr)] = ret;
} }
void CPU::PGXP::CPU_SUB(u32 instr, u32 rsVal, u32 rtVal) void CPU::PGXP::CPU_SUB(u32 instr, u32 rsVal, u32 rtVal)
@ -995,42 +990,37 @@ void CPU::PGXP::CPU_SUB(u32 instr, u32 rsVal, u32 rtVal)
PGXP_value& prsVal = g_state.pgxp_gpr[rs(instr)]; PGXP_value& prsVal = g_state.pgxp_gpr[rs(instr)];
PGXP_value& prtVal = g_state.pgxp_gpr[rt(instr)]; PGXP_value& prtVal = g_state.pgxp_gpr[rt(instr)];
PGXP_value& prdVal = g_state.pgxp_gpr[rd(instr)];
// Rd = Rs - Rt (signed) // Rd = Rs - Rt (signed)
Validate(&prsVal, rsVal); Validate(&prsVal, rsVal);
Validate(&prtVal, rtVal); Validate(&prtVal, rtVal);
PGXP_value ret;
if (rtVal == 0) if (rtVal == 0)
{ {
ret = prsVal; prdVal = prsVal;
CopyZIfMissing(ret, prtVal); CopyZIfMissing(prdVal, prtVal);
} }
else else
{ {
ret = prsVal; const double x = f16Unsign(prsVal.GetValidX(rsVal)) - f16Unsign(prtVal.GetValidX(rtVal));
ret.x = (float)f16Unsign(prsVal.GetValidX(rsVal));
ret.x -= (float)f16Unsign(prtVal.GetValidX(rtVal));
// carry on over/underflow // carry on over/underflow
float of = (ret.x > USHRT_MAX) ? 1.f : (ret.x < 0) ? -1.f : 0.f; const float of = (x > USHRT_MAX) ? 1.f : (x < 0) ? -1.f : 0.f;
ret.x = (float)f16Sign(ret.x); prdVal.x = static_cast<float>(f16Sign(x));
// ret.x -= of * (USHRT_MAX + 1); // prdVal.x -= of * (USHRT_MAX + 1);
ret.y = prsVal.GetValidY(rsVal) - (prtVal.GetValidY(rtVal) - of); prdVal.y = prsVal.GetValidY(rsVal) - (prtVal.GetValidY(rtVal) - of);
// truncate on overflow/underflow // truncate on overflow/underflow
ret.y += (ret.y > SHRT_MAX) ? -(USHRT_MAX + 1) : (ret.y < SHRT_MIN) ? USHRT_MAX + 1 : 0.f; prdVal.y += (prdVal.y > SHRT_MAX) ? -(USHRT_MAX + 1) : (prdVal.y < SHRT_MIN) ? USHRT_MAX + 1 : 0.f;
prdVal.value = rsVal - rtVal;
// valid x/y only if one side had a valid x/y // valid x/y only if one side had a valid x/y
ret.flags |= (prtVal.flags & VALID_XY); prdVal.flags = prsVal.flags | (prtVal.flags & VALID_XY) | VALID_TAINTED_Z;
SelectZ(ret, ret, prtVal); SelectZ(prdVal, prsVal, prtVal);
ret.flags |= VALID_TAINTED_Z;
} }
ret.value = rsVal - rtVal;
g_state.pgxp_gpr[rd(instr)] = ret;
} }
ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_BITWISE(u32 instr, u32 rdVal, u32 rsVal, u32 rtVal) ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_BITWISE(u32 instr, u32 rdVal, u32 rsVal, u32 rtVal)