GTE: Implement NCDS (but incorrectly)

This commit is contained in:
Connor McLaughlin 2019-09-22 21:41:11 +10:00
parent f2d62fcce0
commit fbd7fcec48
6 changed files with 220 additions and 66 deletions

View file

@ -270,6 +270,10 @@ void Core::ExecuteInstruction(Instruction inst)
Execute_NCLIP(inst);
break;
case 0x13:
Execute_NCDS(inst);
break;
case 0x28:
Execute_SQR(inst);
break;
@ -415,6 +419,13 @@ void Core::PushSZ(s32 value)
m_regs.dr32[19] = static_cast<u32>(value); // SZ3 <- value
}
void Core::PushRGB(u8 r, u8 g, u8 b, u8 c)
{
m_regs.RGB0 = m_regs.RGB1;
m_regs.RGB1 = m_regs.RGB2;
m_regs.RGB2 = ZeroExtend32(r) | (ZeroExtend32(g) << 8) | (ZeroExtend32(b) << 16) | (ZeroExtend32(c) << 24);
}
s32 Core::Divide(s32 dividend, s32 divisor)
{
DebugAssert(divisor != 0);
@ -567,4 +578,71 @@ void Core::Execute_AVSZ4(Instruction inst)
m_regs.FLAG.UpdateError();
}
s64 Core::VecDot(const s16 A[3], const s16 B[3])
{
return s64(s32(A[0]) * s32(B[0])) + s64(s32(A[1]) * s32(B[1])) + s64(s32(A[2]) * s32(B[2]));
}
s64 Core::VecDot(const s16 A[3], s16 B_x, s16 B_y, s16 B_z)
{
return s64(s32(A[0]) * s32(B_x)) + s64(s32(A[1]) * s32(B_y)) + s64(s32(A[2]) * s32(B_z));
}
void Core::NCDS(const s16 V[3], bool sf, bool lm)
{
const u8 shift = sf ? 12 : 0;
// [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (LLM*V0) SAR (sf*12)
m_regs.MAC1 = TruncateMAC<1>(VecDot(m_regs.LLM[0], V) >> shift);
m_regs.MAC2 = TruncateMAC<2>(VecDot(m_regs.LLM[1], V) >> shift);
m_regs.MAC3 = TruncateMAC<3>(VecDot(m_regs.LLM[2], V) >> shift);
SetIR(0, m_regs.MAC1, lm);
SetIR(1, m_regs.MAC2, lm);
SetIR(2, m_regs.MAC3, lm);
// [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (BK*1000h + LCM*IR) SAR (sf*12)
// TODO: First multiply should check overflow
m_regs.MAC1 = TruncateMAC<1>(
((ZeroExtend64(m_regs.RBK) * 0x1000) + VecDot(m_regs.LCM[0], m_regs.IR1, m_regs.IR2, m_regs.IR3)) >> shift);
m_regs.MAC2 = TruncateMAC<2>(
((ZeroExtend64(m_regs.GBK) * 0x1000) + VecDot(m_regs.LCM[1], m_regs.IR1, m_regs.IR2, m_regs.IR3)) >> shift);
m_regs.MAC3 = TruncateMAC<3>(
((ZeroExtend64(m_regs.BBK) * 0x1000) + VecDot(m_regs.LCM[2], m_regs.IR1, m_regs.IR2, m_regs.IR3)) >> shift);
SetIR(1, m_regs.MAC1, lm);
SetIR(2, m_regs.MAC2, lm);
SetIR(3, m_regs.MAC3, lm);
// [MAC1,MAC2,MAC3] = [R*IR1,G*IR2,B*IR3] SHL 4 ;<--- for NCDx/NCCx
m_regs.MAC1 = TruncateMAC<1>((ZeroExtend64(m_regs.RGBC[0]) * static_cast<u16>(m_regs.IR1)) << 4);
m_regs.MAC2 = TruncateMAC<1>((ZeroExtend64(m_regs.RGBC[1]) * static_cast<u16>(m_regs.IR2)) << 4);
m_regs.MAC3 = TruncateMAC<1>((ZeroExtend64(m_regs.RGBC[2]) * static_cast<u16>(m_regs.IR3)) << 4);
SetIR(1, m_regs.MAC1, false);
SetIR(2, m_regs.MAC2, false);
SetIR(3, m_regs.MAC3, false);
// [MAC1,MAC2,MAC3] = MAC+(FC-MAC)*IR0 ;<--- for NCDx only
// [MAC1,MAC2,MAC3] = [MAC1,MAC2,MAC3] SAR (sf*12) ;<--- for NCDx/NCCx
m_regs.MAC1 = TruncateMAC<1>(m_regs.MAC1 + ((s32(m_regs.RFC) - m_regs.MAC1) * m_regs.IR0));
m_regs.MAC2 = TruncateMAC<2>(m_regs.MAC2 + ((s32(m_regs.GFC) - m_regs.MAC2) * m_regs.IR0));
m_regs.MAC3 = TruncateMAC<3>(m_regs.MAC3 + ((s32(m_regs.BFC) - m_regs.MAC3) * m_regs.IR0));
// [MAC1,MAC2,MAC3] = [MAC1,MAC2,MAC3] SAR (sf*12) ;<--- for NCDx/NCCx
m_regs.MAC1 >>= shift;
m_regs.MAC2 >>= shift;
m_regs.MAC3 >>= shift;
// Color FIFO = [MAC1/16,MAC2/16,MAC3/16,CODE], [IR1,IR2,IR3] = [MAC1,MAC2,MAC3]
PushRGB(TruncateRGB<0>(m_regs.MAC1 / 16), TruncateRGB<1>(m_regs.MAC2 / 16), TruncateRGB<2>(m_regs.MAC3 / 16),
m_regs.RGBC[3]);
}
void Core::Execute_NCDS(Instruction inst)
{
m_regs.FLAG.Clear();
NCDS(m_regs.V0, inst.sf, inst.lm);
m_regs.FLAG.UpdateError();
}
} // namespace GTE

View file

@ -26,16 +26,30 @@ public:
void ExecuteInstruction(Instruction inst);
private:
template<u32 index>
s32 TruncateMAC(s64 value);
template<u32 index>
u8 TruncateRGB(s32 value);
template<u32 index>
void SetIR(s32 value, bool lm);
void SetMAC(u32 index, s64 value);
void SetIR(u32 index, s32 value, bool lm);
void SetIR0(s32 value);
void SetOTZ(s32 value);
void PushSXY(s32 x, s32 y);
void PushSZ(s32 value);
void PushRGB(u8 r, u8 g, u8 b, u8 c);
s32 Divide(s32 dividend, s32 divisor);
s32 SaturateDivide(s32 result);
static s64 VecDot(const s16 A[3], const s16 B[3]);
static s64 VecDot(const s16 A[3], s16 B_x, s16 B_y, s16 B_z);
void RTPS(const s16 V[3], bool sf);
void NCDS(const s16 V[3], bool sf, bool lm);
void Execute_RTPS(Instruction inst);
void Execute_RTPT(Instruction inst);
@ -43,8 +57,11 @@ private:
void Execute_SQR(Instruction inst);
void Execute_AVSZ3(Instruction inst);
void Execute_AVSZ4(Instruction inst);
void Execute_NCDS(Instruction inst);
Regs m_regs = {};
};
#include "gte.inl"
} // namespace GTE

57
src/pse/gte.inl Normal file
View file

@ -0,0 +1,57 @@
#include "gte.h"
template<u32 index>
u8 GTE::Core::TruncateRGB(s32 value)
{
if (value < 0 || value > 0xFF)
{
if constexpr (index == 0)
m_regs.FLAG.color_r_saturated = true;
else if constexpr (index == 1)
m_regs.FLAG.color_g_saturated = true;
else
m_regs.FLAG.color_b_saturated = true;
value = (value < 0) ? 0 : 0xFF;
}
return static_cast<u8>(value);
}
template<u32 index>
s32 GTE::Core::TruncateMAC(s64 value)
{
if (value < INT64_C(-2147483648))
{
if constexpr (index == 0)
m_regs.FLAG.mac0_underflow = true;
else if constexpr (index == 1)
m_regs.FLAG.mac1_underflow = true;
else if constexpr (index == 2)
m_regs.FLAG.mac2_underflow = true;
else if constexpr (index == 3)
m_regs.FLAG.mac3_underflow = true;
return static_cast<s32>(UINT32_C(0x80000000));
}
else if (value > INT64_C(2147483647))
{
if constexpr (index == 0)
m_regs.FLAG.mac0_overflow = true;
else if constexpr (index == 1)
m_regs.FLAG.mac1_overflow = true;
else if constexpr (index == 2)
m_regs.FLAG.mac2_overflow = true;
else if constexpr (index == 3)
m_regs.FLAG.mac3_overflow = true;
return static_cast<s32>(UINT32_C(0x7FFFFFFF));
}
return static_cast<s32>(value);
}
template<u32 index>
void GTE::Core::SetIR(s32 value, bool lm)
{
}

View file

@ -57,72 +57,72 @@ union Regs
#pragma pack(push, 1)
struct
{
s16 V0[3]; // 0-1
u16 pad1; // 1
s16 V1[3]; // 2-3
u16 pad2; // 3
s16 V2[3]; // 4-5
u16 pad3; // 5
u8 RGBC[4]; // 6
u16 OTZ; // 7
u16 pad4; // 7
s16 IR0; // 8
u16 pad5; // 8
s16 IR1; // 9
u16 pad6; // 9
s16 IR2; // 10
u16 pad7; // 10
s16 IR3; // 11
u16 pad8; // 11
s16 SXY0[2]; // 12
s16 SXY1[2]; // 13
s16 SXY2[2]; // 14
s16 SXYP[2]; // 15
u16 SZ0; // 16
u16 pad13; // 16
u16 SZ1; // 17
u16 pad14; // 17
u16 SZ2; // 18
u16 pad15; // 18
u16 SZ3; // 19
u16 pad16; // 19
u32 RGB0; // 20
u32 RGB1; // 21
u32 RGB2; // 22
u32 RES1; // 23
s32 MAC0; // 24
s32 MAC1; // 25
s32 MAC2; // 26
s32 MAC3; // 27
u32 IRGB; // 28
u32 ORGB; // 29
s32 LZCS; // 30
u32 LZCR; // 31
s16 RT[3][3]; // 32-36
u16 pad17; // 36
s32 TR[3]; // 37-39
u16 L[3][3]; // 40-44
u16 pad18; // 44
u32 RBK; // 45
u32 GBK; // 46
u32 BBK; // 47
u16 LR[3][3]; // 48-52
u16 pad19; // 52
u32 RFC; // 53
u32 GFC; // 54
u32 BFC; // 55
s32 OFX; // 56
s32 OFY; // 57
u16 H; // 58
u16 pad20; // 58
s16 DQA; // 59
u16 pad21; // 59
s32 DQB; // 60
s16 ZSF3; // 61
u16 pad22; // 61
s16 ZSF4; // 62
u16 pad23; // 62
FLAGS FLAG; // 63
s16 V0[3]; // 0-1
u16 pad1; // 1
s16 V1[3]; // 2-3
u16 pad2; // 3
s16 V2[3]; // 4-5
u16 pad3; // 5
u8 RGBC[4]; // 6
u16 OTZ; // 7
u16 pad4; // 7
s16 IR0; // 8
u16 pad5; // 8
s16 IR1; // 9
u16 pad6; // 9
s16 IR2; // 10
u16 pad7; // 10
s16 IR3; // 11
u16 pad8; // 11
s16 SXY0[2]; // 12
s16 SXY1[2]; // 13
s16 SXY2[2]; // 14
s16 SXYP[2]; // 15
u16 SZ0; // 16
u16 pad13; // 16
u16 SZ1; // 17
u16 pad14; // 17
u16 SZ2; // 18
u16 pad15; // 18
u16 SZ3; // 19
u16 pad16; // 19
u32 RGB0; // 20
u32 RGB1; // 21
u32 RGB2; // 22
u32 RES1; // 23
s32 MAC0; // 24
s32 MAC1; // 25
s32 MAC2; // 26
s32 MAC3; // 27
u32 IRGB; // 28
u32 ORGB; // 29
s32 LZCS; // 30
u32 LZCR; // 31
s16 RT[3][3]; // 32-36
u16 pad17; // 36
s32 TR[3]; // 37-39
s16 LLM[3][3]; // 40-44
u16 pad18; // 44
u32 RBK; // 45
u32 GBK; // 46
u32 BBK; // 47
s16 LCM[3][3]; // 48-52
u16 pad19; // 52
u32 RFC; // 53
u32 GFC; // 54
u32 BFC; // 55
s32 OFX; // 56
s32 OFY; // 57
u16 H; // 58
u16 pad20; // 58
s16 DQA; // 59
u16 pad21; // 59
s32 DQB; // 60
s16 ZSF3; // 61
u16 pad22; // 61
s16 ZSF4; // 62
u16 pad23; // 62
FLAGS FLAG; // 63
};
#pragma pack(pop)
};

View file

@ -91,6 +91,7 @@
<ItemGroup>
<None Include="cpu_core.inl" />
<None Include="bus.inl" />
<None Include="gte.inl" />
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{868B98C8-65A1-494B-8346-250A73A48C0A}</ProjectGuid>

View file

@ -43,5 +43,6 @@
<ItemGroup>
<None Include="cpu_core.inl" />
<None Include="bus.inl" />
<None Include="gte.inl" />
</ItemGroup>
</Project>