diff --git a/src/pse/gte.cpp b/src/pse/gte.cpp index 3fe1ee902..0921e2965 100644 --- a/src/pse/gte.cpp +++ b/src/pse/gte.cpp @@ -270,6 +270,10 @@ void Core::ExecuteInstruction(Instruction inst) Execute_NCLIP(inst); break; + case 0x13: + Execute_NCDS(inst); + break; + case 0x28: Execute_SQR(inst); break; @@ -415,6 +419,13 @@ void Core::PushSZ(s32 value) m_regs.dr32[19] = static_cast(value); // SZ3 <- value } +void Core::PushRGB(u8 r, u8 g, u8 b, u8 c) +{ + m_regs.RGB0 = m_regs.RGB1; + m_regs.RGB1 = m_regs.RGB2; + m_regs.RGB2 = ZeroExtend32(r) | (ZeroExtend32(g) << 8) | (ZeroExtend32(b) << 16) | (ZeroExtend32(c) << 24); +} + s32 Core::Divide(s32 dividend, s32 divisor) { DebugAssert(divisor != 0); @@ -567,4 +578,71 @@ void Core::Execute_AVSZ4(Instruction inst) m_regs.FLAG.UpdateError(); } +s64 Core::VecDot(const s16 A[3], const s16 B[3]) +{ + return s64(s32(A[0]) * s32(B[0])) + s64(s32(A[1]) * s32(B[1])) + s64(s32(A[2]) * s32(B[2])); +} + +s64 Core::VecDot(const s16 A[3], s16 B_x, s16 B_y, s16 B_z) +{ + return s64(s32(A[0]) * s32(B_x)) + s64(s32(A[1]) * s32(B_y)) + s64(s32(A[2]) * s32(B_z)); +} + +void Core::NCDS(const s16 V[3], bool sf, bool lm) +{ + const u8 shift = sf ? 12 : 0; + + // [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (LLM*V0) SAR (sf*12) + m_regs.MAC1 = TruncateMAC<1>(VecDot(m_regs.LLM[0], V) >> shift); + m_regs.MAC2 = TruncateMAC<2>(VecDot(m_regs.LLM[1], V) >> shift); + m_regs.MAC3 = TruncateMAC<3>(VecDot(m_regs.LLM[2], V) >> shift); + SetIR(0, m_regs.MAC1, lm); + SetIR(1, m_regs.MAC2, lm); + SetIR(2, m_regs.MAC3, lm); + + // [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (BK*1000h + LCM*IR) SAR (sf*12) + // TODO: First multiply should check overflow + m_regs.MAC1 = TruncateMAC<1>( + ((ZeroExtend64(m_regs.RBK) * 0x1000) + VecDot(m_regs.LCM[0], m_regs.IR1, m_regs.IR2, m_regs.IR3)) >> shift); + m_regs.MAC2 = TruncateMAC<2>( + ((ZeroExtend64(m_regs.GBK) * 0x1000) + VecDot(m_regs.LCM[1], m_regs.IR1, m_regs.IR2, m_regs.IR3)) >> shift); + m_regs.MAC3 = TruncateMAC<3>( + ((ZeroExtend64(m_regs.BBK) * 0x1000) + VecDot(m_regs.LCM[2], m_regs.IR1, m_regs.IR2, m_regs.IR3)) >> shift); + SetIR(1, m_regs.MAC1, lm); + SetIR(2, m_regs.MAC2, lm); + SetIR(3, m_regs.MAC3, lm); + + // [MAC1,MAC2,MAC3] = [R*IR1,G*IR2,B*IR3] SHL 4 ;<--- for NCDx/NCCx + m_regs.MAC1 = TruncateMAC<1>((ZeroExtend64(m_regs.RGBC[0]) * static_cast(m_regs.IR1)) << 4); + m_regs.MAC2 = TruncateMAC<1>((ZeroExtend64(m_regs.RGBC[1]) * static_cast(m_regs.IR2)) << 4); + m_regs.MAC3 = TruncateMAC<1>((ZeroExtend64(m_regs.RGBC[2]) * static_cast(m_regs.IR3)) << 4); + SetIR(1, m_regs.MAC1, false); + SetIR(2, m_regs.MAC2, false); + SetIR(3, m_regs.MAC3, false); + + // [MAC1,MAC2,MAC3] = MAC+(FC-MAC)*IR0 ;<--- for NCDx only + // [MAC1,MAC2,MAC3] = [MAC1,MAC2,MAC3] SAR (sf*12) ;<--- for NCDx/NCCx + m_regs.MAC1 = TruncateMAC<1>(m_regs.MAC1 + ((s32(m_regs.RFC) - m_regs.MAC1) * m_regs.IR0)); + m_regs.MAC2 = TruncateMAC<2>(m_regs.MAC2 + ((s32(m_regs.GFC) - m_regs.MAC2) * m_regs.IR0)); + m_regs.MAC3 = TruncateMAC<3>(m_regs.MAC3 + ((s32(m_regs.BFC) - m_regs.MAC3) * m_regs.IR0)); + + // [MAC1,MAC2,MAC3] = [MAC1,MAC2,MAC3] SAR (sf*12) ;<--- for NCDx/NCCx + m_regs.MAC1 >>= shift; + m_regs.MAC2 >>= shift; + m_regs.MAC3 >>= shift; + + // Color FIFO = [MAC1/16,MAC2/16,MAC3/16,CODE], [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] + PushRGB(TruncateRGB<0>(m_regs.MAC1 / 16), TruncateRGB<1>(m_regs.MAC2 / 16), TruncateRGB<2>(m_regs.MAC3 / 16), + m_regs.RGBC[3]); +} + +void Core::Execute_NCDS(Instruction inst) +{ + m_regs.FLAG.Clear(); + + NCDS(m_regs.V0, inst.sf, inst.lm); + + m_regs.FLAG.UpdateError(); +} + } // namespace GTE \ No newline at end of file diff --git a/src/pse/gte.h b/src/pse/gte.h index c2969acbf..48d784f6d 100644 --- a/src/pse/gte.h +++ b/src/pse/gte.h @@ -26,16 +26,30 @@ public: void ExecuteInstruction(Instruction inst); private: + template + s32 TruncateMAC(s64 value); + + template + u8 TruncateRGB(s32 value); + + template + void SetIR(s32 value, bool lm); + void SetMAC(u32 index, s64 value); void SetIR(u32 index, s32 value, bool lm); void SetIR0(s32 value); void SetOTZ(s32 value); void PushSXY(s32 x, s32 y); void PushSZ(s32 value); + void PushRGB(u8 r, u8 g, u8 b, u8 c); s32 Divide(s32 dividend, s32 divisor); s32 SaturateDivide(s32 result); + static s64 VecDot(const s16 A[3], const s16 B[3]); + static s64 VecDot(const s16 A[3], s16 B_x, s16 B_y, s16 B_z); + void RTPS(const s16 V[3], bool sf); + void NCDS(const s16 V[3], bool sf, bool lm); void Execute_RTPS(Instruction inst); void Execute_RTPT(Instruction inst); @@ -43,8 +57,11 @@ private: void Execute_SQR(Instruction inst); void Execute_AVSZ3(Instruction inst); void Execute_AVSZ4(Instruction inst); + void Execute_NCDS(Instruction inst); Regs m_regs = {}; }; +#include "gte.inl" + } // namespace GTE \ No newline at end of file diff --git a/src/pse/gte.inl b/src/pse/gte.inl new file mode 100644 index 000000000..697e66332 --- /dev/null +++ b/src/pse/gte.inl @@ -0,0 +1,57 @@ +#include "gte.h" + +template +u8 GTE::Core::TruncateRGB(s32 value) +{ + if (value < 0 || value > 0xFF) + { + if constexpr (index == 0) + m_regs.FLAG.color_r_saturated = true; + else if constexpr (index == 1) + m_regs.FLAG.color_g_saturated = true; + else + m_regs.FLAG.color_b_saturated = true; + + value = (value < 0) ? 0 : 0xFF; + } + + return static_cast(value); +} + +template +s32 GTE::Core::TruncateMAC(s64 value) +{ + if (value < INT64_C(-2147483648)) + { + if constexpr (index == 0) + m_regs.FLAG.mac0_underflow = true; + else if constexpr (index == 1) + m_regs.FLAG.mac1_underflow = true; + else if constexpr (index == 2) + m_regs.FLAG.mac2_underflow = true; + else if constexpr (index == 3) + m_regs.FLAG.mac3_underflow = true; + + return static_cast(UINT32_C(0x80000000)); + } + else if (value > INT64_C(2147483647)) + { + if constexpr (index == 0) + m_regs.FLAG.mac0_overflow = true; + else if constexpr (index == 1) + m_regs.FLAG.mac1_overflow = true; + else if constexpr (index == 2) + m_regs.FLAG.mac2_overflow = true; + else if constexpr (index == 3) + m_regs.FLAG.mac3_overflow = true; + + return static_cast(UINT32_C(0x7FFFFFFF)); + } + + return static_cast(value); +} + +template +void GTE::Core::SetIR(s32 value, bool lm) +{ +} diff --git a/src/pse/gte_types.h b/src/pse/gte_types.h index a9bfac550..865c09419 100644 --- a/src/pse/gte_types.h +++ b/src/pse/gte_types.h @@ -57,72 +57,72 @@ union Regs #pragma pack(push, 1) struct { - s16 V0[3]; // 0-1 - u16 pad1; // 1 - s16 V1[3]; // 2-3 - u16 pad2; // 3 - s16 V2[3]; // 4-5 - u16 pad3; // 5 - u8 RGBC[4]; // 6 - u16 OTZ; // 7 - u16 pad4; // 7 - s16 IR0; // 8 - u16 pad5; // 8 - s16 IR1; // 9 - u16 pad6; // 9 - s16 IR2; // 10 - u16 pad7; // 10 - s16 IR3; // 11 - u16 pad8; // 11 - s16 SXY0[2]; // 12 - s16 SXY1[2]; // 13 - s16 SXY2[2]; // 14 - s16 SXYP[2]; // 15 - u16 SZ0; // 16 - u16 pad13; // 16 - u16 SZ1; // 17 - u16 pad14; // 17 - u16 SZ2; // 18 - u16 pad15; // 18 - u16 SZ3; // 19 - u16 pad16; // 19 - u32 RGB0; // 20 - u32 RGB1; // 21 - u32 RGB2; // 22 - u32 RES1; // 23 - s32 MAC0; // 24 - s32 MAC1; // 25 - s32 MAC2; // 26 - s32 MAC3; // 27 - u32 IRGB; // 28 - u32 ORGB; // 29 - s32 LZCS; // 30 - u32 LZCR; // 31 - s16 RT[3][3]; // 32-36 - u16 pad17; // 36 - s32 TR[3]; // 37-39 - u16 L[3][3]; // 40-44 - u16 pad18; // 44 - u32 RBK; // 45 - u32 GBK; // 46 - u32 BBK; // 47 - u16 LR[3][3]; // 48-52 - u16 pad19; // 52 - u32 RFC; // 53 - u32 GFC; // 54 - u32 BFC; // 55 - s32 OFX; // 56 - s32 OFY; // 57 - u16 H; // 58 - u16 pad20; // 58 - s16 DQA; // 59 - u16 pad21; // 59 - s32 DQB; // 60 - s16 ZSF3; // 61 - u16 pad22; // 61 - s16 ZSF4; // 62 - u16 pad23; // 62 - FLAGS FLAG; // 63 + s16 V0[3]; // 0-1 + u16 pad1; // 1 + s16 V1[3]; // 2-3 + u16 pad2; // 3 + s16 V2[3]; // 4-5 + u16 pad3; // 5 + u8 RGBC[4]; // 6 + u16 OTZ; // 7 + u16 pad4; // 7 + s16 IR0; // 8 + u16 pad5; // 8 + s16 IR1; // 9 + u16 pad6; // 9 + s16 IR2; // 10 + u16 pad7; // 10 + s16 IR3; // 11 + u16 pad8; // 11 + s16 SXY0[2]; // 12 + s16 SXY1[2]; // 13 + s16 SXY2[2]; // 14 + s16 SXYP[2]; // 15 + u16 SZ0; // 16 + u16 pad13; // 16 + u16 SZ1; // 17 + u16 pad14; // 17 + u16 SZ2; // 18 + u16 pad15; // 18 + u16 SZ3; // 19 + u16 pad16; // 19 + u32 RGB0; // 20 + u32 RGB1; // 21 + u32 RGB2; // 22 + u32 RES1; // 23 + s32 MAC0; // 24 + s32 MAC1; // 25 + s32 MAC2; // 26 + s32 MAC3; // 27 + u32 IRGB; // 28 + u32 ORGB; // 29 + s32 LZCS; // 30 + u32 LZCR; // 31 + s16 RT[3][3]; // 32-36 + u16 pad17; // 36 + s32 TR[3]; // 37-39 + s16 LLM[3][3]; // 40-44 + u16 pad18; // 44 + u32 RBK; // 45 + u32 GBK; // 46 + u32 BBK; // 47 + s16 LCM[3][3]; // 48-52 + u16 pad19; // 52 + u32 RFC; // 53 + u32 GFC; // 54 + u32 BFC; // 55 + s32 OFX; // 56 + s32 OFY; // 57 + u16 H; // 58 + u16 pad20; // 58 + s16 DQA; // 59 + u16 pad21; // 59 + s32 DQB; // 60 + s16 ZSF3; // 61 + u16 pad22; // 61 + s16 ZSF4; // 62 + u16 pad23; // 62 + FLAGS FLAG; // 63 }; #pragma pack(pop) }; diff --git a/src/pse/pse.vcxproj b/src/pse/pse.vcxproj index 89d27835b..4e81aed10 100644 --- a/src/pse/pse.vcxproj +++ b/src/pse/pse.vcxproj @@ -91,6 +91,7 @@ + {868B98C8-65A1-494B-8346-250A73A48C0A} diff --git a/src/pse/pse.vcxproj.filters b/src/pse/pse.vcxproj.filters index d9f6f35f8..667224c26 100644 --- a/src/pse/pse.vcxproj.filters +++ b/src/pse/pse.vcxproj.filters @@ -43,5 +43,6 @@ + \ No newline at end of file