GTE: More cleanups

This commit is contained in:
Connor McLaughlin 2019-09-26 01:40:55 +10:00
parent f704d8fc63
commit d885abc528
3 changed files with 63 additions and 122 deletions

View file

@ -316,44 +316,6 @@ void Core::ExecuteInstruction(Instruction inst)
} }
} }
void Core::SetMAC(u32 index, s64 value)
{
if (value < INT64_C(-2147483648))
m_regs.FLAG.SetMACUnderflow(index);
else if (value > INT64_C(2147483647))
m_regs.FLAG.SetMACOverflow(index);
m_regs.dr32[24 + index] = Truncate32(static_cast<u64>(value));
}
void Core::SetIR(u32 index, s32 value, bool lm)
{
if (lm && value < 0)
{
m_regs.FLAG.SetIRSaturated(index);
m_regs.dr32[8 + index] = 0;
return;
}
// saturate to -32768..32767
if (!lm && value < -32768)
{
m_regs.FLAG.SetIRSaturated(index);
m_regs.dr32[8 + index] = static_cast<u32>(-1);
return;
}
if (value > 32767)
{
m_regs.FLAG.SetIRSaturated(index);
m_regs.dr32[8 + index] = UINT32_C(0x7FFF);
return;
}
// store the sign extension in the padding bits
m_regs.dr32[8 + index] = value;
}
void Core::SetOTZ(s32 value) void Core::SetOTZ(s32 value)
{ {
if (value < 0) if (value < 0)
@ -455,9 +417,9 @@ void Core::RTPS(const s16 V[3], bool sf, bool lm)
// MAC0=(((H*20000h/SZ3)+1)/2)*IR1+OFX, SX2=MAC0/10000h ;ScrX FIFO -400h..+3FFh // MAC0=(((H*20000h/SZ3)+1)/2)*IR1+OFX, SX2=MAC0/10000h ;ScrX FIFO -400h..+3FFh
// MAC0=(((H*20000h/SZ3)+1)/2)*IR2+OFY, SY2=MAC0/10000h ;ScrY FIFO -400h..+3FFh // MAC0=(((H*20000h/SZ3)+1)/2)*IR2+OFY, SY2=MAC0/10000h ;ScrY FIFO -400h..+3FFh
// MAC0=(((H*20000h/SZ3)+1)/2)*DQA+DQB, IR0=MAC0/1000h ;Depth cueing 0..+1000h // MAC0=(((H*20000h/SZ3)+1)/2)*DQA+DQB, IR0=MAC0/1000h ;Depth cueing 0..+1000h
const s32 Sx = TruncateAndSetMAC<0>(s64(result) * s64(m_regs.IR1) + s64(m_regs.OFX), 16); const s32 Sx = s32(TruncateAndSetMAC<0>(s64(result) * s64(m_regs.IR1) + s64(m_regs.OFX), 16));
const s32 Sy = TruncateAndSetMAC<0>(s64(result) * s64(m_regs.IR2) + s64(m_regs.OFY), 16); const s32 Sy = s32(TruncateAndSetMAC<0>(s64(result) * s64(m_regs.IR2) + s64(m_regs.OFY), 16));
const s32 Sz = TruncateAndSetMAC<0>(s64(result) * s64(m_regs.DQA) + s64(m_regs.DQB), 12); const s32 Sz = s32(TruncateAndSetMAC<0>(s64(result) * s64(m_regs.DQA) + s64(m_regs.DQB), 12));
PushSXY(Sx, Sy); PushSXY(Sx, Sy);
TruncateAndSetIR<0>(Sz, true); TruncateAndSetIR<0>(Sz, true);
} }
@ -486,11 +448,10 @@ void Core::Execute_NCLIP(Instruction inst)
// MAC0 = SX0*SY1 + SX1*SY2 + SX2*SY0 - SX0*SY2 - SX1*SY0 - SX2*SY1 // MAC0 = SX0*SY1 + SX1*SY2 + SX2*SY0 - SX0*SY2 - SX1*SY0 - SX2*SY1
m_regs.FLAG.Clear(); m_regs.FLAG.Clear();
const s64 MAC0x = s64(m_regs.SXY0[0]) * s64(m_regs.SXY1[1]) + s64(m_regs.SXY1[0]) * s64(m_regs.SXY2[1]) + TruncateAndSetMAC<0>(s64(m_regs.SXY0[0]) * s64(m_regs.SXY1[1]) + s64(m_regs.SXY1[0]) * s64(m_regs.SXY2[1]) +
s64(m_regs.SXY2[0]) * s64(m_regs.SXY0[1]) - s64(m_regs.SXY0[0]) * s64(m_regs.SXY2[1]) - s64(m_regs.SXY2[0]) * s64(m_regs.SXY0[1]) - s64(m_regs.SXY0[0]) * s64(m_regs.SXY2[1]) -
s64(m_regs.SXY1[0]) * s64(m_regs.SXY0[1]) - s64(m_regs.SXY2[0]) * s64(m_regs.SXY1[1]); s64(m_regs.SXY1[0]) * s64(m_regs.SXY0[1]) - s64(m_regs.SXY2[0]) * s64(m_regs.SXY1[1]),
0);
SetMAC(0, MAC0x);
m_regs.FLAG.UpdateError(); m_regs.FLAG.UpdateError();
} }
@ -499,15 +460,16 @@ void Core::Execute_SQR(Instruction inst)
{ {
m_regs.FLAG.Clear(); m_regs.FLAG.Clear();
const u8 shift = inst.sf ? 12 : 0; // 32-bit multiply for speed - 16x16 isn't >32bit, and we know it won't overflow/underflow.
SetMAC(1, (s32(m_regs.IR1) * s32(m_regs.IR1)) >> shift); const u8 shift = inst.GetShift();
SetMAC(2, (s32(m_regs.IR2) * s32(m_regs.IR2)) >> shift); m_regs.MAC1 = (s32(m_regs.IR1) * s32(m_regs.IR1)) >> shift;
SetMAC(3, (s32(m_regs.IR3) * s32(m_regs.IR3)) >> shift); m_regs.MAC2 = (s32(m_regs.IR2) * s32(m_regs.IR2)) >> shift;
m_regs.MAC3 = (s32(m_regs.IR3) * s32(m_regs.IR3)) >> shift;
const bool lm = inst.lm; const bool lm = inst.lm;
SetIR(1, m_regs.MAC1, lm); TruncateAndSetIR<1>(m_regs.MAC1, lm);
SetIR(2, m_regs.MAC2, lm); TruncateAndSetIR<2>(m_regs.MAC2, lm);
SetIR(3, m_regs.MAC3, lm); TruncateAndSetIR<3>(m_regs.MAC3, lm);
m_regs.FLAG.UpdateError(); m_regs.FLAG.UpdateError();
} }
@ -516,10 +478,10 @@ void Core::Execute_AVSZ3(Instruction inst)
{ {
m_regs.FLAG.Clear(); m_regs.FLAG.Clear();
const s64 MAC0 = static_cast<s64>(m_regs.ZSF3) * const s64 result =
static_cast<s32>(ZeroExtend32(m_regs.SZ1) + ZeroExtend32(m_regs.SZ2) + ZeroExtend32(m_regs.SZ3)); TruncateAndSetMAC<0>(s64(m_regs.ZSF3) * s32(u32(m_regs.SZ1) + u32(m_regs.SZ2) + u32(m_regs.SZ3)), 0);
SetMAC(0, MAC0); TruncateAndSetMAC<0>(result, 0);
SetOTZ(static_cast<s32>(MAC0 / 0x1000)); SetOTZ(s32(result >> 12));
m_regs.FLAG.UpdateError(); m_regs.FLAG.UpdateError();
} }
@ -528,30 +490,19 @@ void Core::Execute_AVSZ4(Instruction inst)
{ {
m_regs.FLAG.Clear(); m_regs.FLAG.Clear();
const s64 MAC0 = const s64 result = TruncateAndSetMAC<0>(
static_cast<s64>(m_regs.ZSF4) * static_cast<s32>(ZeroExtend32(m_regs.SZ0) + ZeroExtend32(m_regs.SZ1) + s64(m_regs.ZSF4) * s32(u32(m_regs.SZ0) + u32(m_regs.SZ1) + u32(m_regs.SZ2) + u32(m_regs.SZ3)), 0);
ZeroExtend32(m_regs.SZ2) + ZeroExtend32(m_regs.SZ3)); TruncateAndSetMAC<0>(result, 0);
SetMAC(0, MAC0); SetOTZ(s32(result >> 12));
SetOTZ(static_cast<s32>(MAC0 / 0x1000));
m_regs.FLAG.UpdateError(); m_regs.FLAG.UpdateError();
} }
s64 Core::VecDot(const s16 A[3], const s16 B[3])
{
return s64(s32(A[0]) * s32(B[0])) + s64(s32(A[1]) * s32(B[1])) + s64(s32(A[2]) * s32(B[2]));
}
s64 Core::VecDot(const s16 A[3], s16 B_x, s16 B_y, s16 B_z)
{
return s64(s32(A[0]) * s32(B_x)) + s64(s32(A[1]) * s32(B_y)) + s64(s32(A[2]) * s32(B_z));
}
void Core::MulMatVec(const s16 M[3][3], const s16 Vx, const s16 Vy, const s16 Vz, u8 shift, bool lm) void Core::MulMatVec(const s16 M[3][3], const s16 Vx, const s16 Vy, const s16 Vz, u8 shift, bool lm)
{ {
#define dot3(i) \ #define dot3(i) \
TruncateAndSetMAC<i + 1>( \ TruncateAndSetMAC<i + 1>( \
TruncateMAC<i + 1>(TruncateMAC<i + 1>(s64(s32(M[i][0]) * s32(Vx))) + s64(s32(M[i][1]) * s32(Vy))) + \ CheckMACResult<i + 1>(CheckMACResult<i + 1>(s64(s32(M[i][0]) * s32(Vx))) + s64(s32(M[i][1]) * s32(Vy))) + \
s64(s32(M[i][2]) * s32(Vz)), \ s64(s32(M[i][2]) * s32(Vz)), \
shift) shift)
@ -569,9 +520,10 @@ void Core::MulMatVec(const s16 M[3][3], const s16 Vx, const s16 Vy, const s16 Vz
void Core::MulMatVec(const s16 M[3][3], const s32 T[3], const s16 Vx, const s16 Vy, const s16 Vz, u8 shift, bool lm) void Core::MulMatVec(const s16 M[3][3], const s32 T[3], const s16 Vx, const s16 Vy, const s16 Vz, u8 shift, bool lm)
{ {
#define dot3(i) \ #define dot3(i) \
TruncateAndSetMAC<i + 1>((s64(T[i]) << 12) + \ TruncateAndSetMAC<i + 1>( \
TruncateMAC<i + 1>(TruncateMAC<i + 1>(TruncateMAC<i + 1>(s64(s32(M[i][0]) * s32(Vx))) + \ (s64(T[i]) << 12) + \
s64(s32(M[i][1]) * s32(Vy))) + \ CheckMACResult<i + 1>( \
CheckMACResult<i + 1>(CheckMACResult<i + 1>(s64(s32(M[i][0]) * s32(Vx))) + s64(s32(M[i][1]) * s32(Vy))) + \
s64(s32(M[i][2]) * s32(Vz))), \ s64(s32(M[i][2]) * s32(Vz))), \
shift) shift)

View file

@ -26,8 +26,8 @@ public:
void ExecuteInstruction(Instruction inst); void ExecuteInstruction(Instruction inst);
private: private:
static constexpr s64 MAC0_MIN_VALUE = -(INT64_C(1) << 43); static constexpr s64 MAC0_MIN_VALUE = -(INT64_C(1) << 31);
static constexpr s64 MAC0_MAX_VALUE = (INT64_C(1) << 43) - 1; static constexpr s64 MAC0_MAX_VALUE = (INT64_C(1) << 31) - 1;
static constexpr s64 MAC123_MIN_VALUE = -(INT64_C(1) << 43); static constexpr s64 MAC123_MIN_VALUE = -(INT64_C(1) << 43);
static constexpr s64 MAC123_MAX_VALUE = (INT64_C(1) << 43) - 1; static constexpr s64 MAC123_MAX_VALUE = (INT64_C(1) << 43) - 1;
static constexpr s32 IR0_MIN_VALUE = 0x0000; static constexpr s32 IR0_MIN_VALUE = 0x0000;
@ -35,28 +35,24 @@ private:
static constexpr s32 IR123_MIN_VALUE = -(INT64_C(1) << 15); static constexpr s32 IR123_MIN_VALUE = -(INT64_C(1) << 15);
static constexpr s32 IR123_MAX_VALUE = (INT64_C(1) << 15) - 1; static constexpr s32 IR123_MAX_VALUE = (INT64_C(1) << 15) - 1;
// Checks for underflow/overflow. Returns the value untouched so it can be threaded through an expression.
template<u32 index> template<u32 index>
s64 TruncateMAC(s64 value); s64 CheckMACResult(s64 value);
template<u32 index> template<u32 index>
s32 TruncateAndSetMAC(s64 value, u8 shift); s64 TruncateAndSetMAC(s64 value, u8 shift);
template<u32 index>
u8 TruncateRGB(s32 value);
template<u32 index> template<u32 index>
s16 TruncateAndSetIR(s32 value, bool lm); s16 TruncateAndSetIR(s32 value, bool lm);
void SetMAC(u32 index, s64 value); template<u32 index>
void SetIR(u32 index, s32 value, bool lm); u8 TruncateRGB(s32 value);
void SetOTZ(s32 value); void SetOTZ(s32 value);
void PushSXY(s32 x, s32 y); void PushSXY(s32 x, s32 y);
void PushSZ(s32 value); void PushSZ(s32 value);
void PushRGB(u8 r, u8 g, u8 b, u8 c); void PushRGB(u8 r, u8 g, u8 b, u8 c);
s64 VecDot(const s16 A[3], const s16 B[3]);
s64 VecDot(const s16 A[3], s16 B_x, s16 B_y, s16 B_z);
// 3x3 matrix * 3x1 vector, updates MAC[1-3] and IR[1-3] // 3x3 matrix * 3x1 vector, updates MAC[1-3] and IR[1-3]
void MulMatVec(const s16 M[3][3], const s16 Vx, const s16 Vy, const s16 Vz, u8 shift, bool lm); void MulMatVec(const s16 M[3][3], const s16 Vx, const s16 Vy, const s16 Vz, u8 shift, bool lm);

View file

@ -1,25 +1,7 @@
#include "gte.h" #include "gte.h"
template<u32 index> template<u32 index>
u8 GTE::Core::TruncateRGB(s32 value) s64 GTE::Core::CheckMACResult(s64 value)
{
if (value < 0 || value > 0xFF)
{
if constexpr (index == 0)
m_regs.FLAG.color_r_saturated = true;
else if constexpr (index == 1)
m_regs.FLAG.color_g_saturated = true;
else
m_regs.FLAG.color_b_saturated = true;
value = (value < 0) ? 0 : 0xFF;
}
return static_cast<u8>(value);
}
template<u32 index>
s64 GTE::Core::TruncateMAC(s64 value)
{ {
constexpr s64 MIN_VALUE = (index == 0) ? MAC0_MIN_VALUE : MAC123_MIN_VALUE; constexpr s64 MIN_VALUE = (index == 0) ? MAC0_MIN_VALUE : MAC123_MIN_VALUE;
constexpr s64 MAX_VALUE = (index == 0) ? MAC0_MAX_VALUE : MAC123_MAX_VALUE; constexpr s64 MAX_VALUE = (index == 0) ? MAC0_MAX_VALUE : MAC123_MAX_VALUE;
@ -33,8 +15,6 @@ s64 GTE::Core::TruncateMAC(s64 value)
m_regs.FLAG.mac2_underflow = true; m_regs.FLAG.mac2_underflow = true;
else if constexpr (index == 3) else if constexpr (index == 3)
m_regs.FLAG.mac3_underflow = true; m_regs.FLAG.mac3_underflow = true;
return MIN_VALUE;
} }
else if (value > MAX_VALUE) else if (value > MAX_VALUE)
{ {
@ -46,26 +26,21 @@ s64 GTE::Core::TruncateMAC(s64 value)
m_regs.FLAG.mac2_overflow = true; m_regs.FLAG.mac2_overflow = true;
else if constexpr (index == 3) else if constexpr (index == 3)
m_regs.FLAG.mac3_overflow = true; m_regs.FLAG.mac3_overflow = true;
}
return MAX_VALUE;
}
else
{
return value; return value;
}
} }
template<u32 index> template<u32 index>
s32 GTE::Core::TruncateAndSetMAC(s64 value, u8 shift) s64 GTE::Core::TruncateAndSetMAC(s64 value, u8 shift)
{ {
value = TruncateMAC<index>(value); value = CheckMACResult<index>(value);
// shift should be done before storing to avoid losing precision // shift should be done before storing to avoid losing precision
value >>= shift; value >>= shift;
const s32 value32 = static_cast<s32>(value); m_regs.dr32[24 + index] = Truncate32(static_cast<u64>(value));
m_regs.dr32[24 + index] = value32; return value;
return value32;
} }
template<u32 index> template<u32 index>
@ -103,3 +78,21 @@ s16 GTE::Core::TruncateAndSetIR(s32 value, bool lm)
m_regs.dr32[8 + index] = value; m_regs.dr32[8 + index] = value;
return static_cast<s16>(value); return static_cast<s16>(value);
} }
template<u32 index>
u8 GTE::Core::TruncateRGB(s32 value)
{
if (value < 0 || value > 0xFF)
{
if constexpr (index == 0)
m_regs.FLAG.color_r_saturated = true;
else if constexpr (index == 1)
m_regs.FLAG.color_g_saturated = true;
else
m_regs.FLAG.color_b_saturated = true;
value = (value < 0) ? 0 : 0xFF;
}
return static_cast<u8>(value);
}