mirror of
https://github.com/RetroDECK/Duckstation.git
synced 2024-11-25 23:25:41 +00:00
GTE: Fix NCDS
This commit is contained in:
parent
607cd4d3e4
commit
3df7b22c37
|
@ -588,52 +588,78 @@ s64 Core::VecDot(const s16 A[3], s16 B_x, s16 B_y, s16 B_z)
|
||||||
return s64(s32(A[0]) * s32(B_x)) + s64(s32(A[1]) * s32(B_y)) + s64(s32(A[2]) * s32(B_z));
|
return s64(s32(A[0]) * s32(B_x)) + s64(s32(A[1]) * s32(B_y)) + s64(s32(A[2]) * s32(B_z));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Core::MulMatVec(const s16 M[3][3], const s16 Vx, const s16 Vy, const s16 Vz, bool sf, bool lm)
|
||||||
|
{
|
||||||
|
#define dot3(i) \
|
||||||
|
TruncateAndSetMAC<i + 1>( \
|
||||||
|
TruncateMAC<i + 1>(TruncateMAC<i + 1>(s64(s32(M[i][0]) * s32(Vx))) + s64(s32(M[i][1]) * s32(Vy))) + \
|
||||||
|
s64(s32(M[i][2]) * s32(Vz)), \
|
||||||
|
sf)
|
||||||
|
|
||||||
|
dot3(0);
|
||||||
|
dot3(1);
|
||||||
|
dot3(2);
|
||||||
|
|
||||||
|
#undef dot3
|
||||||
|
|
||||||
|
TruncateAndSetIR<1>(m_regs.MAC1, lm);
|
||||||
|
TruncateAndSetIR<2>(m_regs.MAC2, lm);
|
||||||
|
TruncateAndSetIR<3>(m_regs.MAC3, lm);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Core::MulMatVec(const s16 M[3][3], const u32 T[3], const s16 Vx, const s16 Vy, const s16 Vz, bool sf, bool lm)
|
||||||
|
{
|
||||||
|
#define dot3(i) \
|
||||||
|
TruncateAndSetMAC<i + 1>(static_cast<s64>(ZeroExtend64(T[i]) << 12) + \
|
||||||
|
TruncateMAC<i + 1>(TruncateMAC<i + 1>(TruncateMAC<i + 1>(s64(s32(M[i][0]) * s32(Vx))) + \
|
||||||
|
s64(s32(M[i][1]) * s32(Vy))) + \
|
||||||
|
s64(s32(M[i][2]) * s32(Vz))), \
|
||||||
|
sf)
|
||||||
|
|
||||||
|
dot3(0);
|
||||||
|
dot3(1);
|
||||||
|
dot3(2);
|
||||||
|
|
||||||
|
#undef dot3
|
||||||
|
|
||||||
|
TruncateAndSetIR<1>(m_regs.MAC1, lm);
|
||||||
|
TruncateAndSetIR<2>(m_regs.MAC2, lm);
|
||||||
|
TruncateAndSetIR<3>(m_regs.MAC3, lm);
|
||||||
|
}
|
||||||
|
|
||||||
void Core::NCDS(const s16 V[3], bool sf, bool lm)
|
void Core::NCDS(const s16 V[3], bool sf, bool lm)
|
||||||
{
|
{
|
||||||
const u8 shift = sf ? 12 : 0;
|
const u8 shift = sf ? 12 : 0;
|
||||||
|
|
||||||
// [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (LLM*V0) SAR (sf*12)
|
// [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (LLM*V0) SAR (sf*12)
|
||||||
m_regs.MAC1 = TruncateMAC<1>(VecDot(m_regs.LLM[0], V) >> shift);
|
MulMatVec(m_regs.LLM, V[0], V[1], V[2], sf, lm);
|
||||||
m_regs.MAC2 = TruncateMAC<2>(VecDot(m_regs.LLM[1], V) >> shift);
|
|
||||||
m_regs.MAC3 = TruncateMAC<3>(VecDot(m_regs.LLM[2], V) >> shift);
|
|
||||||
SetIR(0, m_regs.MAC1, lm);
|
|
||||||
SetIR(1, m_regs.MAC2, lm);
|
|
||||||
SetIR(2, m_regs.MAC3, lm);
|
|
||||||
|
|
||||||
// [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (BK*1000h + LCM*IR) SAR (sf*12)
|
// [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (BK*1000h + LCM*IR) SAR (sf*12)
|
||||||
// TODO: First multiply should check overflow
|
MulMatVec(m_regs.LCM, m_regs.BK, m_regs.IR1, m_regs.IR2, m_regs.IR3, sf, lm);
|
||||||
m_regs.MAC1 = TruncateMAC<1>(
|
|
||||||
((ZeroExtend64(m_regs.RBK) * 0x1000) + VecDot(m_regs.LCM[0], m_regs.IR1, m_regs.IR2, m_regs.IR3)) >> shift);
|
|
||||||
m_regs.MAC2 = TruncateMAC<2>(
|
|
||||||
((ZeroExtend64(m_regs.GBK) * 0x1000) + VecDot(m_regs.LCM[1], m_regs.IR1, m_regs.IR2, m_regs.IR3)) >> shift);
|
|
||||||
m_regs.MAC3 = TruncateMAC<3>(
|
|
||||||
((ZeroExtend64(m_regs.BBK) * 0x1000) + VecDot(m_regs.LCM[2], m_regs.IR1, m_regs.IR2, m_regs.IR3)) >> shift);
|
|
||||||
SetIR(1, m_regs.MAC1, lm);
|
|
||||||
SetIR(2, m_regs.MAC2, lm);
|
|
||||||
SetIR(3, m_regs.MAC3, lm);
|
|
||||||
|
|
||||||
// [MAC1,MAC2,MAC3] = [R*IR1,G*IR2,B*IR3] SHL 4 ;<--- for NCDx/NCCx
|
// [MAC1,MAC2,MAC3] = [R*IR1,G*IR2,B*IR3] SHL 4 ;<--- for NCDx/NCCx
|
||||||
m_regs.MAC1 = TruncateMAC<1>((ZeroExtend64(m_regs.RGBC[0]) * static_cast<u16>(m_regs.IR1)) << 4);
|
TruncateAndSetMAC<1>((s64(ZeroExtend64(m_regs.RGBC[0])) << 4) * s64(m_regs.MAC1), false);
|
||||||
m_regs.MAC2 = TruncateMAC<1>((ZeroExtend64(m_regs.RGBC[1]) * static_cast<u16>(m_regs.IR2)) << 4);
|
TruncateAndSetMAC<2>((s64(ZeroExtend64(m_regs.RGBC[1])) << 4) * s64(m_regs.MAC2), false);
|
||||||
m_regs.MAC3 = TruncateMAC<1>((ZeroExtend64(m_regs.RGBC[2]) * static_cast<u16>(m_regs.IR3)) << 4);
|
TruncateAndSetMAC<3>((s64(ZeroExtend64(m_regs.RGBC[2])) << 4) * s64(m_regs.MAC3), false);
|
||||||
SetIR(1, m_regs.MAC1, false);
|
|
||||||
SetIR(2, m_regs.MAC2, false);
|
|
||||||
SetIR(3, m_regs.MAC3, false);
|
|
||||||
|
|
||||||
// [MAC1,MAC2,MAC3] = MAC+(FC-MAC)*IR0 ;<--- for NCDx only
|
// [MAC1,MAC2,MAC3] = MAC+(FC-MAC)*IR0 ;<--- for NCDx only
|
||||||
// [MAC1,MAC2,MAC3] = [MAC1,MAC2,MAC3] SAR (sf*12) ;<--- for NCDx/NCCx
|
// [IR1,IR2,IR3] = (([RFC,GFC,BFC] SHL 12) - [MAC1,MAC2,MAC3]) SAR (sf*12)
|
||||||
m_regs.MAC1 = TruncateMAC<1>(m_regs.MAC1 + ((s32(m_regs.RFC) - m_regs.MAC1) * m_regs.IR0));
|
TruncateAndSetIR<1>(s32(s64(ZeroExtend64(m_regs.FC[0]) << 12) - s64(m_regs.MAC1)) >> (sf ? 12 : 0), false);
|
||||||
m_regs.MAC2 = TruncateMAC<2>(m_regs.MAC2 + ((s32(m_regs.GFC) - m_regs.MAC2) * m_regs.IR0));
|
TruncateAndSetIR<2>(s32(s64(ZeroExtend64(m_regs.FC[1]) << 12) - s64(m_regs.MAC2)) >> (sf ? 12 : 0), false);
|
||||||
m_regs.MAC3 = TruncateMAC<3>(m_regs.MAC3 + ((s32(m_regs.BFC) - m_regs.MAC3) * m_regs.IR0));
|
TruncateAndSetIR<3>(s32(s64(ZeroExtend64(m_regs.FC[2]) << 12) - s64(m_regs.MAC3)) >> (sf ? 12 : 0), false);
|
||||||
|
|
||||||
|
// [MAC1,MAC2,MAC3] = (([IR1,IR2,IR3] * IR0) + [MAC1,MAC2,MAC3])
|
||||||
// [MAC1,MAC2,MAC3] = [MAC1,MAC2,MAC3] SAR (sf*12) ;<--- for NCDx/NCCx
|
// [MAC1,MAC2,MAC3] = [MAC1,MAC2,MAC3] SAR (sf*12) ;<--- for NCDx/NCCx
|
||||||
m_regs.MAC1 >>= shift;
|
TruncateAndSetMAC<1>(s64(s32(m_regs.IR1) * s32(m_regs.IR0)) + s64(m_regs.MAC1), sf);
|
||||||
m_regs.MAC2 >>= shift;
|
TruncateAndSetMAC<2>(s64(s32(m_regs.IR2) * s32(m_regs.IR0)) + s64(m_regs.MAC2), sf);
|
||||||
m_regs.MAC3 >>= shift;
|
TruncateAndSetMAC<3>(s64(s32(m_regs.IR3) * s32(m_regs.IR0)) + s64(m_regs.MAC3), sf);
|
||||||
|
|
||||||
// Color FIFO = [MAC1/16,MAC2/16,MAC3/16,CODE], [IR1,IR2,IR3] = [MAC1,MAC2,MAC3]
|
// Color FIFO = [MAC1/16,MAC2/16,MAC3/16,CODE], [IR1,IR2,IR3] = [MAC1,MAC2,MAC3]
|
||||||
PushRGB(TruncateRGB<0>(m_regs.MAC1 / 16), TruncateRGB<1>(m_regs.MAC2 / 16), TruncateRGB<2>(m_regs.MAC3 / 16),
|
PushRGB(TruncateRGB<0>(m_regs.MAC1 / 16), TruncateRGB<1>(m_regs.MAC2 / 16), TruncateRGB<2>(m_regs.MAC3 / 16),
|
||||||
m_regs.RGBC[3]);
|
m_regs.RGBC[3]);
|
||||||
|
TruncateAndSetIR<1>(m_regs.MAC1, lm);
|
||||||
|
TruncateAndSetIR<2>(m_regs.MAC2, lm);
|
||||||
|
TruncateAndSetIR<3>(m_regs.MAC3, lm);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Core::Execute_NCDS(Instruction inst)
|
void Core::Execute_NCDS(Instruction inst)
|
||||||
|
|
|
@ -26,14 +26,26 @@ public:
|
||||||
void ExecuteInstruction(Instruction inst);
|
void ExecuteInstruction(Instruction inst);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
static constexpr s64 MAC0_MIN_VALUE = -(INT64_C(1) << 43);
|
||||||
|
static constexpr s64 MAC0_MAX_VALUE = (INT64_C(1) << 43) - 1;
|
||||||
|
static constexpr s64 MAC123_MIN_VALUE = -(INT64_C(1) << 43);
|
||||||
|
static constexpr s64 MAC123_MAX_VALUE = (INT64_C(1) << 43) - 1;
|
||||||
|
static constexpr s32 IR0_MIN_VALUE = 0x0000;
|
||||||
|
static constexpr s32 IR0_MAX_VALUE = 0x1000;
|
||||||
|
static constexpr s32 IR123_MIN_VALUE = -(INT64_C(1) << 15);
|
||||||
|
static constexpr s32 IR123_MAX_VALUE = (INT64_C(1) << 15) - 1;
|
||||||
|
|
||||||
template<u32 index>
|
template<u32 index>
|
||||||
s32 TruncateMAC(s64 value);
|
s64 TruncateMAC(s64 value);
|
||||||
|
|
||||||
|
template<u32 index>
|
||||||
|
s32 TruncateAndSetMAC(s64 value, bool sf);
|
||||||
|
|
||||||
template<u32 index>
|
template<u32 index>
|
||||||
u8 TruncateRGB(s32 value);
|
u8 TruncateRGB(s32 value);
|
||||||
|
|
||||||
template<u32 index>
|
template<u32 index>
|
||||||
void SetIR(s32 value, bool lm);
|
s16 TruncateAndSetIR(s32 value, bool lm);
|
||||||
|
|
||||||
void SetMAC(u32 index, s64 value);
|
void SetMAC(u32 index, s64 value);
|
||||||
void SetIR(u32 index, s32 value, bool lm);
|
void SetIR(u32 index, s32 value, bool lm);
|
||||||
|
@ -45,8 +57,14 @@ private:
|
||||||
s32 Divide(s32 dividend, s32 divisor);
|
s32 Divide(s32 dividend, s32 divisor);
|
||||||
s32 SaturateDivide(s32 result);
|
s32 SaturateDivide(s32 result);
|
||||||
|
|
||||||
static s64 VecDot(const s16 A[3], const s16 B[3]);
|
s64 VecDot(const s16 A[3], const s16 B[3]);
|
||||||
static s64 VecDot(const s16 A[3], s16 B_x, s16 B_y, s16 B_z);
|
s64 VecDot(const s16 A[3], s16 B_x, s16 B_y, s16 B_z);
|
||||||
|
|
||||||
|
// 3x3 matrix * 3x1 vector, updates MAC[1-3] and IR[1-3]
|
||||||
|
void MulMatVec(const s16 M[3][3], const s16 Vx, const s16 Vy, const s16 Vz, bool sf, bool lm);
|
||||||
|
|
||||||
|
// 3x3 matrix * 3x1 vector with translation, updates MAC[1-3] and IR[1-3]
|
||||||
|
void MulMatVec(const s16 M[3][3], const u32 T[3], const s16 Vx, const s16 Vy, const s16 Vz, bool sf, bool lm);
|
||||||
|
|
||||||
void RTPS(const s16 V[3], bool sf);
|
void RTPS(const s16 V[3], bool sf);
|
||||||
void NCDS(const s16 V[3], bool sf, bool lm);
|
void NCDS(const s16 V[3], bool sf, bool lm);
|
||||||
|
|
|
@ -19,9 +19,11 @@ u8 GTE::Core::TruncateRGB(s32 value)
|
||||||
}
|
}
|
||||||
|
|
||||||
template<u32 index>
|
template<u32 index>
|
||||||
s32 GTE::Core::TruncateMAC(s64 value)
|
s64 GTE::Core::TruncateMAC(s64 value)
|
||||||
{
|
{
|
||||||
if (value < INT64_C(-2147483648))
|
constexpr s64 MIN_VALUE = (index == 0) ? MAC0_MIN_VALUE : MAC123_MIN_VALUE;
|
||||||
|
constexpr s64 MAX_VALUE = (index == 0) ? MAC0_MAX_VALUE : MAC123_MAX_VALUE;
|
||||||
|
if (value < MIN_VALUE)
|
||||||
{
|
{
|
||||||
if constexpr (index == 0)
|
if constexpr (index == 0)
|
||||||
m_regs.FLAG.mac0_underflow = true;
|
m_regs.FLAG.mac0_underflow = true;
|
||||||
|
@ -32,9 +34,9 @@ s32 GTE::Core::TruncateMAC(s64 value)
|
||||||
else if constexpr (index == 3)
|
else if constexpr (index == 3)
|
||||||
m_regs.FLAG.mac3_underflow = true;
|
m_regs.FLAG.mac3_underflow = true;
|
||||||
|
|
||||||
return static_cast<s32>(UINT32_C(0x80000000));
|
return MIN_VALUE;
|
||||||
}
|
}
|
||||||
else if (value > INT64_C(2147483647))
|
else if (value > MAX_VALUE)
|
||||||
{
|
{
|
||||||
if constexpr (index == 0)
|
if constexpr (index == 0)
|
||||||
m_regs.FLAG.mac0_overflow = true;
|
m_regs.FLAG.mac0_overflow = true;
|
||||||
|
@ -45,13 +47,60 @@ s32 GTE::Core::TruncateMAC(s64 value)
|
||||||
else if constexpr (index == 3)
|
else if constexpr (index == 3)
|
||||||
m_regs.FLAG.mac3_overflow = true;
|
m_regs.FLAG.mac3_overflow = true;
|
||||||
|
|
||||||
return static_cast<s32>(UINT32_C(0x7FFFFFFF));
|
return MAX_VALUE;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
return static_cast<s32>(value);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<u32 index>
|
template<u32 index>
|
||||||
void GTE::Core::SetIR(s32 value, bool lm)
|
s32 GTE::Core::TruncateAndSetMAC(s64 value, bool sf)
|
||||||
{
|
{
|
||||||
|
value = TruncateMAC<index>(value);
|
||||||
|
|
||||||
|
// shift should be done before storing to avoid losing precision
|
||||||
|
if (sf)
|
||||||
|
value >>= 12;
|
||||||
|
|
||||||
|
const s32 value32 = static_cast<s32>(value);
|
||||||
|
m_regs.dr32[24 + index] = value32;
|
||||||
|
return value32;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<u32 index>
|
||||||
|
s16 GTE::Core::TruncateAndSetIR(s32 value, bool lm)
|
||||||
|
{
|
||||||
|
constexpr s32 MIN_VALUE = (index == 0) ? IR0_MIN_VALUE : IR123_MIN_VALUE;
|
||||||
|
constexpr s32 MAX_VALUE = (index == 0) ? IR0_MAX_VALUE : IR123_MAX_VALUE;
|
||||||
|
const s32 actual_min_value = lm ? 0 : -0x8000;
|
||||||
|
if (value < actual_min_value)
|
||||||
|
{
|
||||||
|
value = actual_min_value;
|
||||||
|
if constexpr (index == 0)
|
||||||
|
m_regs.FLAG.ir0_saturated = true;
|
||||||
|
else if constexpr (index == 1)
|
||||||
|
m_regs.FLAG.ir1_saturated = true;
|
||||||
|
else if constexpr (index == 2)
|
||||||
|
m_regs.FLAG.ir2_saturated = true;
|
||||||
|
else if constexpr (index == 3)
|
||||||
|
m_regs.FLAG.ir3_saturated = true;
|
||||||
|
}
|
||||||
|
else if (value > MAX_VALUE)
|
||||||
|
{
|
||||||
|
value = MAX_VALUE;
|
||||||
|
if constexpr (index == 0)
|
||||||
|
m_regs.FLAG.ir0_saturated = true;
|
||||||
|
else if constexpr (index == 1)
|
||||||
|
m_regs.FLAG.ir1_saturated = true;
|
||||||
|
else if constexpr (index == 2)
|
||||||
|
m_regs.FLAG.ir2_saturated = true;
|
||||||
|
else if constexpr (index == 3)
|
||||||
|
m_regs.FLAG.ir3_saturated = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// store sign-extended 16-bit value as 32-bit
|
||||||
|
m_regs.dr32[8 + index] = value;
|
||||||
|
return static_cast<s16>(value);
|
||||||
}
|
}
|
||||||
|
|
|
@ -103,14 +103,10 @@ union Regs
|
||||||
s32 TR[3]; // 37-39
|
s32 TR[3]; // 37-39
|
||||||
s16 LLM[3][3]; // 40-44
|
s16 LLM[3][3]; // 40-44
|
||||||
u16 pad18; // 44
|
u16 pad18; // 44
|
||||||
u32 RBK; // 45
|
u32 BK[3]; // 45-47
|
||||||
u32 GBK; // 46
|
|
||||||
u32 BBK; // 47
|
|
||||||
s16 LCM[3][3]; // 48-52
|
s16 LCM[3][3]; // 48-52
|
||||||
u16 pad19; // 52
|
u16 pad19; // 52
|
||||||
u32 RFC; // 53
|
u32 FC[3]; // 53-55
|
||||||
u32 GFC; // 54
|
|
||||||
u32 BFC; // 55
|
|
||||||
s32 OFX; // 56
|
s32 OFX; // 56
|
||||||
s32 OFY; // 57
|
s32 OFY; // 57
|
||||||
u16 H; // 58
|
u16 H; // 58
|
||||||
|
|
Loading…
Reference in a new issue