PGXP: Add initial implementation

This commit is contained in:
Connor McLaughlin 2020-08-02 00:25:07 +10:00
parent 013497cf20
commit 0c1b637549
30 changed files with 1699 additions and 377 deletions

View file

@ -11,6 +11,7 @@ A "BIOS" ROM image is required to to start the emulator and to play games. You c
## Latest News ## Latest News
- 2020/08/01: Initial PGXP (geometry/perspective correction) support.
- 2020/07/28: Qt frontend supports displaying interface in multiple languages. - 2020/07/28: Qt frontend supports displaying interface in multiple languages.
- 2020/07/23: m3u multi-disc support for libretro core. - 2020/07/23: m3u multi-disc support for libretro core.
- 2020/07/22: Support multiple bindings for each controller button/axis. - 2020/07/22: Support multiple bindings for each controller button/axis.

View file

@ -129,6 +129,50 @@
</PreferenceCategory> </PreferenceCategory>
<PreferenceCategory app:title="Enhancements">
<SwitchPreferenceCompat
app:key="GPU/TrueColor"
app:title="True Color Rendering (24-bit, disables dithering)"
app:defaultValue="false"/>
<SwitchPreferenceCompat
app:key="GPU/ScaledDithering"
app:title="Scaled Dithering (scale dither pattern to resolution)"
app:defaultValue="true"/>
<SwitchPreferenceCompat
app:key="GPU/DisableInterlacing"
app:title="Disable Interlacing (force progressive render/scan)"
app:defaultValue="true"/>
<SwitchPreferenceCompat
app:key="GPU/ForceNTSCTimings"
app:title="Force NTSC Timings (60hz-on-PAL)"
app:defaultValue="false"/>
<SwitchPreferenceCompat
app:key="GPU/PGXPEnable"
app:title="PGXP Geometry Correction"
app:defaultValue="false"/>
<SwitchPreferenceCompat
app:key="GPU/PGXPCulling"
app:title="PGXP Culling Correction"
app:defaultValue="true"/>
<SwitchPreferenceCompat
app:key="GPU/PGXPTextureCorrection"
app:title="PGXP Texture Correction"
app:defaultValue="true"/>
<SwitchPreferenceCompat
app:key="GPU/PGXPVertexCache"
app:title="PGXP Vertex Cache"
app:defaultValue="false"/>
</PreferenceCategory>
<PreferenceCategory app:title="Display"> <PreferenceCategory app:title="Display">
<ListPreference <ListPreference
app:key="Display/CropMode" app:key="Display/CropMode"
@ -144,7 +188,7 @@
app:entries="@array/settings_display_aspect_ratio_names" app:entries="@array/settings_display_aspect_ratio_names"
app:entryValues="@array/settings_display_aspect_ratio_values" app:entryValues="@array/settings_display_aspect_ratio_values"
app:defaultValue="4:3" app:defaultValue="4:3"
app:useSimpleSummaryProvider="true" />] app:useSimpleSummaryProvider="true" />
<SwitchPreferenceCompat <SwitchPreferenceCompat
app:key="Display/LinearFiltering" app:key="Display/LinearFiltering"

View file

@ -59,6 +59,8 @@ add_library(core
negcon.h negcon.h
pad.cpp pad.cpp
pad.h pad.h
pgxp.cpp
pgxp.h
playstation_mouse.cpp playstation_mouse.cpp
playstation_mouse.h playstation_mouse.h
psf_loader.cpp psf_loader.cpp

View file

@ -80,6 +80,7 @@
<ClCompile Include="negcon.cpp" /> <ClCompile Include="negcon.cpp" />
<ClCompile Include="pad.cpp" /> <ClCompile Include="pad.cpp" />
<ClCompile Include="controller.cpp" /> <ClCompile Include="controller.cpp" />
<ClCompile Include="pgxp.cpp" />
<ClCompile Include="playstation_mouse.cpp" /> <ClCompile Include="playstation_mouse.cpp" />
<ClCompile Include="psf_loader.cpp" /> <ClCompile Include="psf_loader.cpp" />
<ClCompile Include="resources.cpp" /> <ClCompile Include="resources.cpp" />
@ -126,6 +127,7 @@
<ClInclude Include="negcon.h" /> <ClInclude Include="negcon.h" />
<ClInclude Include="pad.h" /> <ClInclude Include="pad.h" />
<ClInclude Include="controller.h" /> <ClInclude Include="controller.h" />
<ClInclude Include="pgxp.h" />
<ClInclude Include="playstation_mouse.h" /> <ClInclude Include="playstation_mouse.h" />
<ClInclude Include="psf_loader.h" /> <ClInclude Include="psf_loader.h" />
<ClInclude Include="resources.h" /> <ClInclude Include="resources.h" />

View file

@ -46,6 +46,7 @@
<ClCompile Include="gpu_hw_vulkan.cpp" /> <ClCompile Include="gpu_hw_vulkan.cpp" />
<ClCompile Include="resources.cpp" /> <ClCompile Include="resources.cpp" />
<ClCompile Include="host_interface_progress_callback.cpp" /> <ClCompile Include="host_interface_progress_callback.cpp" />
<ClCompile Include="pgxp.cpp" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClInclude Include="types.h" /> <ClInclude Include="types.h" />
@ -94,5 +95,6 @@
<ClInclude Include="resources.h" /> <ClInclude Include="resources.h" />
<ClInclude Include="host_interface_progress_callback.h" /> <ClInclude Include="host_interface_progress_callback.h" />
<ClInclude Include="gte_types.h" /> <ClInclude Include="gte_types.h" />
<ClInclude Include="pgxp.h" />
</ItemGroup> </ItemGroup>
</Project> </Project>

View file

@ -6,6 +6,8 @@
#include "cpu_disasm.h" #include "cpu_disasm.h"
#include "cpu_recompiler_thunks.h" #include "cpu_recompiler_thunks.h"
#include "gte.h" #include "gte.h"
#include "pgxp.h"
#include "settings.h"
#include "timing_event.h" #include "timing_event.h"
#include <cstdio> #include <cstdio>
Log_SetChannel(CPU::Core); Log_SetChannel(CPU::Core);
@ -73,6 +75,9 @@ void Initialize()
g_state.cop0_regs.PRID = UINT32_C(0x00000002); g_state.cop0_regs.PRID = UINT32_C(0x00000002);
GTE::Initialize(); GTE::Initialize();
if (g_settings.gpu_pgxp_enable)
PGXP::Initialize();
} }
void Shutdown() void Shutdown()
@ -100,6 +105,9 @@ void Reset()
GTE::Reset(); GTE::Reset();
SetPC(RESET_VECTOR); SetPC(RESET_VECTOR);
if (g_settings.gpu_pgxp_enable)
PGXP::Initialize();
} }
bool DoState(StateWrapper& sw) bool DoState(StateWrapper& sw)
@ -137,6 +145,9 @@ bool DoState(StateWrapper& sw)
if (!GTE::DoState(sw)) if (!GTE::DoState(sw))
return false; return false;
if (sw.IsReading())
PGXP::Initialize();
return !sw.HasError(); return !sw.HasError();
} }
@ -893,7 +904,12 @@ void ExecuteInstruction()
if (!ReadMemoryByte(addr, &value)) if (!ReadMemoryByte(addr, &value))
return; return;
WriteRegDelayed(inst.i.rt, SignExtend32(value)); const u32 sxvalue = SignExtend32(value);
WriteRegDelayed(inst.i.rt, sxvalue);
if (g_settings.gpu_pgxp_enable)
PGXP::CPU_LBx(inst.bits, sxvalue, addr);
} }
break; break;
@ -904,7 +920,11 @@ void ExecuteInstruction()
if (!ReadMemoryHalfWord(addr, &value)) if (!ReadMemoryHalfWord(addr, &value))
return; return;
WriteRegDelayed(inst.i.rt, SignExtend32(value)); const u32 sxvalue = SignExtend32(value);
WriteRegDelayed(inst.i.rt, sxvalue);
if (g_settings.gpu_pgxp_enable)
PGXP::CPU_LHx(inst.bits, sxvalue, addr);
} }
break; break;
@ -916,6 +936,9 @@ void ExecuteInstruction()
return; return;
WriteRegDelayed(inst.i.rt, value); WriteRegDelayed(inst.i.rt, value);
if (g_settings.gpu_pgxp_enable)
PGXP::CPU_LW(inst.bits, value, addr);
} }
break; break;
@ -926,7 +949,11 @@ void ExecuteInstruction()
if (!ReadMemoryByte(addr, &value)) if (!ReadMemoryByte(addr, &value))
return; return;
WriteRegDelayed(inst.i.rt, ZeroExtend32(value)); const u32 zxvalue = ZeroExtend32(value);
WriteRegDelayed(inst.i.rt, zxvalue);
if (g_settings.gpu_pgxp_enable)
PGXP::CPU_LBx(inst.bits, zxvalue, addr);
} }
break; break;
@ -937,7 +964,11 @@ void ExecuteInstruction()
if (!ReadMemoryHalfWord(addr, &value)) if (!ReadMemoryHalfWord(addr, &value))
return; return;
WriteRegDelayed(inst.i.rt, ZeroExtend32(value)); const u32 zxvalue = ZeroExtend32(value);
WriteRegDelayed(inst.i.rt, zxvalue);
if (g_settings.gpu_pgxp_enable)
PGXP::CPU_LHx(inst.bits, zxvalue, addr);
} }
break; break;
@ -966,6 +997,9 @@ void ExecuteInstruction()
} }
WriteRegDelayed(inst.i.rt, new_value); WriteRegDelayed(inst.i.rt, new_value);
if (g_settings.gpu_pgxp_enable)
PGXP::CPU_LW(inst.bits, new_value, addr);
} }
break; break;
@ -974,6 +1008,9 @@ void ExecuteInstruction()
const VirtualMemoryAddress addr = ReadReg(inst.i.rs) + inst.i.imm_sext32(); const VirtualMemoryAddress addr = ReadReg(inst.i.rs) + inst.i.imm_sext32();
const u8 value = Truncate8(ReadReg(inst.i.rt)); const u8 value = Truncate8(ReadReg(inst.i.rt));
WriteMemoryByte(addr, value); WriteMemoryByte(addr, value);
if (g_settings.gpu_pgxp_enable)
PGXP::CPU_SB(inst.bits, value, addr);
} }
break; break;
@ -982,6 +1019,9 @@ void ExecuteInstruction()
const VirtualMemoryAddress addr = ReadReg(inst.i.rs) + inst.i.imm_sext32(); const VirtualMemoryAddress addr = ReadReg(inst.i.rs) + inst.i.imm_sext32();
const u16 value = Truncate16(ReadReg(inst.i.rt)); const u16 value = Truncate16(ReadReg(inst.i.rt));
WriteMemoryHalfWord(addr, value); WriteMemoryHalfWord(addr, value);
if (g_settings.gpu_pgxp_enable)
PGXP::CPU_SH(inst.bits, value, addr);
} }
break; break;
@ -990,6 +1030,9 @@ void ExecuteInstruction()
const VirtualMemoryAddress addr = ReadReg(inst.i.rs) + inst.i.imm_sext32(); const VirtualMemoryAddress addr = ReadReg(inst.i.rs) + inst.i.imm_sext32();
const u32 value = ReadReg(inst.i.rt); const u32 value = ReadReg(inst.i.rt);
WriteMemoryWord(addr, value); WriteMemoryWord(addr, value);
if (g_settings.gpu_pgxp_enable)
PGXP::CPU_SW(inst.bits, value, addr);
} }
break; break;
@ -1017,6 +1060,9 @@ void ExecuteInstruction()
} }
WriteMemoryWord(aligned_addr, new_value); WriteMemoryWord(aligned_addr, new_value);
if (g_settings.gpu_pgxp_enable)
PGXP::CPU_SW(inst.bits, new_value, addr);
} }
break; break;
@ -1132,6 +1178,9 @@ void ExecuteInstruction()
return; return;
GTE::WriteRegister(ZeroExtend32(static_cast<u8>(inst.i.rt.GetValue())), value); GTE::WriteRegister(ZeroExtend32(static_cast<u8>(inst.i.rt.GetValue())), value);
if (g_settings.gpu_pgxp_enable)
PGXP::CPU_LWC2(inst.bits, value, addr);
} }
break; break;
@ -1147,6 +1196,9 @@ void ExecuteInstruction()
const VirtualMemoryAddress addr = ReadReg(inst.i.rs) + inst.i.imm_sext32(); const VirtualMemoryAddress addr = ReadReg(inst.i.rs) + inst.i.imm_sext32();
const u32 value = GTE::ReadRegister(ZeroExtend32(static_cast<u8>(inst.i.rt.GetValue()))); const u32 value = GTE::ReadRegister(ZeroExtend32(static_cast<u8>(inst.i.rt.GetValue())));
WriteMemoryWord(addr, value); WriteMemoryWord(addr, value);
if (g_settings.gpu_pgxp_enable)
PGXP::CPU_SWC2(inst.bits, value, addr);
} }
break; break;
@ -1230,20 +1282,44 @@ void ExecuteCop2Instruction()
switch (inst.cop.CommonOp()) switch (inst.cop.CommonOp())
{ {
case CopCommonInstruction::cfcn: case CopCommonInstruction::cfcn:
WriteRegDelayed(inst.r.rt, GTE::ReadRegister(static_cast<u32>(inst.r.rd.GetValue()) + 32)); {
break; const u32 value = GTE::ReadRegister(static_cast<u32>(inst.r.rd.GetValue()) + 32);
WriteRegDelayed(inst.r.rt, value);
if (g_settings.gpu_pgxp_enable)
PGXP::CPU_CFC2(inst.bits, value, value);
}
break;
case CopCommonInstruction::ctcn: case CopCommonInstruction::ctcn:
GTE::WriteRegister(static_cast<u32>(inst.r.rd.GetValue()) + 32, ReadReg(inst.r.rt)); {
break; const u32 value = ReadReg(inst.r.rt);
GTE::WriteRegister(static_cast<u32>(inst.r.rd.GetValue()) + 32, value);
if (g_settings.gpu_pgxp_enable)
PGXP::CPU_CTC2(inst.bits, value, value);
}
break;
case CopCommonInstruction::mfcn: case CopCommonInstruction::mfcn:
WriteRegDelayed(inst.r.rt, GTE::ReadRegister(static_cast<u32>(inst.r.rd.GetValue()))); {
break; const u32 value = GTE::ReadRegister(static_cast<u32>(inst.r.rd.GetValue()));
WriteRegDelayed(inst.r.rt, value);
if (g_settings.gpu_pgxp_enable)
PGXP::CPU_MFC2(inst.bits, value, value);
}
break;
case CopCommonInstruction::mtcn: case CopCommonInstruction::mtcn:
GTE::WriteRegister(static_cast<u32>(inst.r.rd.GetValue()), ReadReg(inst.r.rt)); {
break; const u32 value = ReadReg(inst.r.rt);
GTE::WriteRegister(static_cast<u32>(inst.r.rd.GetValue()), value);
if (g_settings.gpu_pgxp_enable)
PGXP::CPU_MTC2(inst.bits, value, value);
}
break;
case CopCommonInstruction::bcnc: case CopCommonInstruction::bcnc:
default: default:

View file

@ -3,6 +3,8 @@
#include "cpu_core.h" #include "cpu_core.h"
#include "cpu_disasm.h" #include "cpu_disasm.h"
#include "gte.h" #include "gte.h"
#include "pgxp.h"
#include "settings.h"
Log_SetChannel(CPU::Recompiler); Log_SetChannel(CPU::Recompiler);
// TODO: Turn load+sext/zext into a single signed/unsigned load // TODO: Turn load+sext/zext into a single signed/unsigned load
@ -1115,19 +1117,32 @@ bool CodeGenerator::Compile_Load(const CodeBlockInstruction& cbi)
{ {
case InstructionOp::lb: case InstructionOp::lb:
case InstructionOp::lbu: case InstructionOp::lbu:
{
result = EmitLoadGuestMemory(cbi, address, RegSize_8); result = EmitLoadGuestMemory(cbi, address, RegSize_8);
ConvertValueSizeInPlace(&result, RegSize_32, (cbi.instruction.op == InstructionOp::lb)); ConvertValueSizeInPlace(&result, RegSize_32, (cbi.instruction.op == InstructionOp::lb));
break; if (g_settings.gpu_pgxp_enable)
EmitFunctionCall(nullptr, PGXP::CPU_LBx, Value::FromConstantU32(cbi.instruction.bits), result, address);
}
break;
case InstructionOp::lh: case InstructionOp::lh:
case InstructionOp::lhu: case InstructionOp::lhu:
{
result = EmitLoadGuestMemory(cbi, address, RegSize_16); result = EmitLoadGuestMemory(cbi, address, RegSize_16);
ConvertValueSizeInPlace(&result, RegSize_32, (cbi.instruction.op == InstructionOp::lh)); ConvertValueSizeInPlace(&result, RegSize_32, (cbi.instruction.op == InstructionOp::lh));
break;
if (g_settings.gpu_pgxp_enable)
EmitFunctionCall(nullptr, PGXP::CPU_LHx, Value::FromConstantU32(cbi.instruction.bits), result, address);
}
break;
case InstructionOp::lw: case InstructionOp::lw:
{
result = EmitLoadGuestMemory(cbi, address, RegSize_32); result = EmitLoadGuestMemory(cbi, address, RegSize_32);
break; if (g_settings.gpu_pgxp_enable)
EmitFunctionCall(nullptr, PGXP::CPU_LW, Value::FromConstantU32(cbi.instruction.bits), result, address);
}
break;
default: default:
UnreachableCode(); UnreachableCode();
@ -1153,16 +1168,34 @@ bool CodeGenerator::Compile_Store(const CodeBlockInstruction& cbi)
switch (cbi.instruction.op) switch (cbi.instruction.op)
{ {
case InstructionOp::sb: case InstructionOp::sb:
{
EmitStoreGuestMemory(cbi, address, value.ViewAsSize(RegSize_8)); EmitStoreGuestMemory(cbi, address, value.ViewAsSize(RegSize_8));
break; if (g_settings.gpu_pgxp_enable)
{
EmitFunctionCall(nullptr, PGXP::CPU_SB, Value::FromConstantU32(cbi.instruction.bits),
value.ViewAsSize(RegSize_8), address);
}
}
break;
case InstructionOp::sh: case InstructionOp::sh:
{
EmitStoreGuestMemory(cbi, address, value.ViewAsSize(RegSize_16)); EmitStoreGuestMemory(cbi, address, value.ViewAsSize(RegSize_16));
break; if (g_settings.gpu_pgxp_enable)
{
EmitFunctionCall(nullptr, PGXP::CPU_SH, Value::FromConstantU32(cbi.instruction.bits),
value.ViewAsSize(RegSize_16), address);
}
}
break;
case InstructionOp::sw: case InstructionOp::sw:
{
EmitStoreGuestMemory(cbi, address, value); EmitStoreGuestMemory(cbi, address, value);
break; if (g_settings.gpu_pgxp_enable)
EmitFunctionCall(nullptr, PGXP::CPU_SW, Value::FromConstantU32(cbi.instruction.bits), value, address);
}
break;
default: default:
UnreachableCode(); UnreachableCode();
@ -1827,11 +1860,17 @@ bool CodeGenerator::Compile_cop2(const CodeBlockInstruction& cbi)
{ {
Value value = EmitLoadGuestMemory(cbi, address, RegSize_32); Value value = EmitLoadGuestMemory(cbi, address, RegSize_32);
DoGTERegisterWrite(reg, value); DoGTERegisterWrite(reg, value);
if (g_settings.gpu_pgxp_enable)
EmitFunctionCall(nullptr, PGXP::CPU_LWC2, Value::FromConstantU32(cbi.instruction.bits), value, address);
} }
else else
{ {
Value value = DoGTERegisterRead(reg); Value value = DoGTERegisterRead(reg);
EmitStoreGuestMemory(cbi, address, value); EmitStoreGuestMemory(cbi, address, value);
if (g_settings.gpu_pgxp_enable)
EmitFunctionCall(nullptr, PGXP::CPU_SWC2, Value::FromConstantU32(cbi.instruction.bits), value, address);
} }
InstructionEpilogue(cbi); InstructionEpilogue(cbi);
@ -1851,7 +1890,19 @@ bool CodeGenerator::Compile_cop2(const CodeBlockInstruction& cbi)
((cbi.instruction.cop.CommonOp() == CopCommonInstruction::cfcn) ? 32 : 0); ((cbi.instruction.cop.CommonOp() == CopCommonInstruction::cfcn) ? 32 : 0);
InstructionPrologue(cbi, 1); InstructionPrologue(cbi, 1);
m_register_cache.WriteGuestRegisterDelayed(cbi.instruction.r.rt, DoGTERegisterRead(reg));
Value value = DoGTERegisterRead(reg);
// PGXP done first here before ownership is transferred.
if (g_settings.gpu_pgxp_enable)
{
EmitFunctionCall(
nullptr, (cbi.instruction.cop.CommonOp() == CopCommonInstruction::cfcn) ? PGXP::CPU_CFC2 : PGXP::CPU_MFC2,
Value::FromConstantU32(cbi.instruction.bits), value, value);
}
m_register_cache.WriteGuestRegisterDelayed(cbi.instruction.r.rt, std::move(value));
InstructionEpilogue(cbi); InstructionEpilogue(cbi);
return true; return true;
} }
@ -1863,7 +1914,17 @@ bool CodeGenerator::Compile_cop2(const CodeBlockInstruction& cbi)
((cbi.instruction.cop.CommonOp() == CopCommonInstruction::ctcn) ? 32 : 0); ((cbi.instruction.cop.CommonOp() == CopCommonInstruction::ctcn) ? 32 : 0);
InstructionPrologue(cbi, 1); InstructionPrologue(cbi, 1);
DoGTERegisterWrite(reg, m_register_cache.ReadGuestRegister(cbi.instruction.r.rt));
Value value = m_register_cache.ReadGuestRegister(cbi.instruction.r.rt);
DoGTERegisterWrite(reg, value);
if (g_settings.gpu_pgxp_enable)
{
EmitFunctionCall(
nullptr, (cbi.instruction.cop.CommonOp() == CopCommonInstruction::ctcn) ? PGXP::CPU_CTC2 : PGXP::CPU_MTC2,
Value::FromConstantU32(cbi.instruction.bits), value, value);
}
InstructionEpilogue(cbi); InstructionEpilogue(cbi);
return true; return true;
} }

View file

@ -429,7 +429,8 @@ void DMA::UnhaltTransfer(TickCount ticks)
TickCount DMA::TransferMemoryToDevice(Channel channel, u32 address, u32 increment, u32 word_count) TickCount DMA::TransferMemoryToDevice(Channel channel, u32 address, u32 increment, u32 word_count)
{ {
const u32* src_pointer = reinterpret_cast<u32*>(Bus::g_ram + address); const u32* src_pointer = reinterpret_cast<u32*>(Bus::g_ram + address);
if (static_cast<s32>(increment) < 0 || ((address + (increment * word_count)) & ADDRESS_MASK) <= address) if (channel != Channel::GPU &&
(static_cast<s32>(increment) < 0 || ((address + (increment * word_count)) & ADDRESS_MASK) <= address))
{ {
// Use temp buffer if it's wrapping around // Use temp buffer if it's wrapping around
if (m_transfer_buffer.size() < word_count) if (m_transfer_buffer.size() < word_count)
@ -447,8 +448,21 @@ TickCount DMA::TransferMemoryToDevice(Channel channel, u32 address, u32 incremen
switch (channel) switch (channel)
{ {
case Channel::GPU: case Channel::GPU:
g_gpu->DMAWrite(src_pointer, word_count); {
break; if (g_gpu->BeginDMAWrite())
{
u8* ram_pointer = Bus::g_ram;
for (u32 i = 0; i < word_count; i++)
{
u32 value;
std::memcpy(&value, &ram_pointer[address], sizeof(u32));
g_gpu->DMAWrite(address, value);
address = (address + increment) & ADDRESS_MASK;
}
g_gpu->EndDMAWrite();
}
}
break;
case Channel::SPU: case Channel::SPU:
g_spu.DMAWrite(src_pointer, word_count); g_spu.DMAWrite(src_pointer, word_count);

View file

@ -349,32 +349,17 @@ void GPU::DMARead(u32* words, u32 word_count)
words[i] = ReadGPUREAD(); words[i] = ReadGPUREAD();
} }
void GPU::DMAWrite(const u32* words, u32 word_count) void GPU::EndDMAWrite()
{ {
switch (m_GPUSTAT.dma_direction) m_fifo_pushed = true;
if (!m_syncing)
{ {
case DMADirection::CPUtoGP0: ExecuteCommands();
{ UpdateCommandTickEvent();
m_fifo.PushRange(words, word_count); }
m_fifo_pushed = true; else
if (!m_syncing) {
{ UpdateDMARequest();
ExecuteCommands();
UpdateCommandTickEvent();
}
else
{
UpdateDMARequest();
}
}
break;
default:
{
Log_ErrorPrintf("Unhandled GPU DMA write mode %u for %u words",
static_cast<u32>(m_GPUSTAT.dma_direction.GetValue()), word_count);
}
break;
} }
} }

View file

@ -136,7 +136,13 @@ public:
// DMA access // DMA access
void DMARead(u32* words, u32 word_count); void DMARead(u32* words, u32 word_count);
void DMAWrite(const u32* words, u32 word_count);
ALWAYS_INLINE bool BeginDMAWrite() const { return (m_GPUSTAT.dma_direction == DMADirection::CPUtoGP0); }
ALWAYS_INLINE void DMAWrite(u32 address, u32 value)
{
m_fifo.Push((ZeroExtend64(address) << 32) | ZeroExtend64(value));
}
void EndDMAWrite();
/// Returns the number of pending GPU ticks. /// Returns the number of pending GPU ticks.
TickCount GetPendingCRTCTicks() const; TickCount GetPendingCRTCTicks() const;
@ -276,6 +282,14 @@ protected:
// Sprites/rectangles should be clipped to 12 bits before drawing. // Sprites/rectangles should be clipped to 12 bits before drawing.
static constexpr s32 TruncateVertexPosition(s32 x) { return SignExtendN<11, s32>(x); } static constexpr s32 TruncateVertexPosition(s32 x) { return SignExtendN<11, s32>(x); }
struct NativeVertex
{
s16 x;
s16 y;
u32 color;
u16 texcoord;
};
union VRAMPixel union VRAMPixel
{ {
u16 bits; u16 bits;
@ -700,11 +714,15 @@ protected:
u16 row; u16 row;
} m_vram_transfer = {}; } m_vram_transfer = {};
HeapFIFOQueue<u32, MAX_FIFO_SIZE> m_fifo; HeapFIFOQueue<u64, MAX_FIFO_SIZE> m_fifo;
std::vector<u32> m_blit_buffer; std::vector<u32> m_blit_buffer;
u32 m_blit_remaining_words; u32 m_blit_remaining_words;
RenderCommand m_render_command{}; RenderCommand m_render_command{};
ALWAYS_INLINE u32 FifoPop() { return Truncate32(m_fifo.Pop()); }
ALWAYS_INLINE u32 FifoPeek() { return Truncate32(m_fifo.Peek()); }
ALWAYS_INLINE u32 FifoPeek(u32 i) { return Truncate32(m_fifo.Peek(i)); }
TickCount m_max_run_ahead = 128; TickCount m_max_run_ahead = 128;
u32 m_fifo_size = 128; u32 m_fifo_size = 128;

View file

@ -33,7 +33,7 @@ void GPU::ExecuteCommands()
{ {
case BlitterState::Idle: case BlitterState::Idle:
{ {
const u32 command = m_fifo.Peek(0) >> 24; const u32 command = FifoPeek(0) >> 24;
if ((this->*s_GP0_command_handler_table[command])()) if ((this->*s_GP0_command_handler_table[command])())
continue; continue;
else else
@ -45,8 +45,11 @@ void GPU::ExecuteCommands()
DebugAssert(m_blit_remaining_words > 0); DebugAssert(m_blit_remaining_words > 0);
const u32 words_to_copy = std::min(m_blit_remaining_words, m_fifo.GetSize()); const u32 words_to_copy = std::min(m_blit_remaining_words, m_fifo.GetSize());
const size_t old_size = m_blit_buffer.size(); const size_t old_size = m_blit_buffer.size();
m_blit_buffer.resize(m_blit_buffer.size() + words_to_copy); // m_blit_buffer.resize(m_blit_buffer.size() + words_to_copy);
m_fifo.PopRange(&m_blit_buffer[old_size], words_to_copy); // FifoPopRange(&m_blit_buffer[old_size], words_to_copy);
m_blit_buffer.reserve(m_blit_buffer.size() + words_to_copy);
for (u32 i = 0; i < words_to_copy; i++)
m_blit_buffer.push_back(FifoPop());
m_blit_remaining_words -= words_to_copy; m_blit_remaining_words -= words_to_copy;
AddCommandTicks(words_to_copy); AddCommandTicks(words_to_copy);
@ -72,7 +75,7 @@ void GPU::ExecuteCommands()
{ {
// polyline must have at least two vertices, and the terminator is (word & 0xf000f000) == 0x50005000. // polyline must have at least two vertices, and the terminator is (word & 0xf000f000) == 0x50005000.
// terminator is on the first word for the vertex // terminator is on the first word for the vertex
if ((m_fifo.Peek(terminator_index) & UINT32_C(0xF000F000)) == UINT32_C(0x50005000)) if ((FifoPeek(terminator_index) & UINT32_C(0xF000F000)) == UINT32_C(0x50005000))
break; break;
} }
@ -81,8 +84,11 @@ void GPU::ExecuteCommands()
if (words_to_copy > 0) if (words_to_copy > 0)
{ {
const size_t old_size = m_blit_buffer.size(); const size_t old_size = m_blit_buffer.size();
m_blit_buffer.resize(m_blit_buffer.size() + words_to_copy); // m_blit_buffer.resize(m_blit_buffer.size() + words_to_copy);
m_fifo.PopRange(&m_blit_buffer[old_size], words_to_copy); // FifoPopRange(&m_blit_buffer[old_size], words_to_copy);
m_blit_buffer.reserve(m_blit_buffer.size() + words_to_copy);
for (u32 i = 0; i < words_to_copy; i++)
m_blit_buffer.push_back(FifoPop());
} }
Log_DebugPrintf("Added %u words to polyline", words_to_copy); Log_DebugPrintf("Added %u words to polyline", words_to_copy);
@ -170,12 +176,12 @@ GPU::GP0CommandHandlerTable GPU::GenerateGP0CommandHandlerTable()
bool GPU::HandleUnknownGP0Command() bool GPU::HandleUnknownGP0Command()
{ {
const u32 command = m_fifo.Peek() >> 24; const u32 command = FifoPeek() >> 24;
Log_ErrorPrintf("Unimplemented GP0 command 0x%02X", command); Log_ErrorPrintf("Unimplemented GP0 command 0x%02X", command);
SmallString dump; SmallString dump;
for (u32 i = 0; i < m_fifo.GetSize(); i++) for (u32 i = 0; i < m_fifo.GetSize(); i++)
dump.AppendFormattedString("%s0x%08X", (i > 0) ? " " : "", m_fifo.Peek(i)); dump.AppendFormattedString("%s0x%08X", (i > 0) ? " " : "", FifoPeek(i));
Log_ErrorPrintf("FIFO: %s", dump.GetCharArray()); Log_ErrorPrintf("FIFO: %s", dump.GetCharArray());
m_fifo.RemoveOne(); m_fifo.RemoveOne();
@ -216,7 +222,7 @@ bool GPU::HandleInterruptRequestCommand()
bool GPU::HandleSetDrawModeCommand() bool GPU::HandleSetDrawModeCommand()
{ {
const u32 param = m_fifo.Pop() & 0x00FFFFFFu; const u32 param = FifoPop() & 0x00FFFFFFu;
Log_DebugPrintf("Set draw mode %08X", param); Log_DebugPrintf("Set draw mode %08X", param);
SetDrawMode(Truncate16(param)); SetDrawMode(Truncate16(param));
AddCommandTicks(1); AddCommandTicks(1);
@ -226,7 +232,7 @@ bool GPU::HandleSetDrawModeCommand()
bool GPU::HandleSetTextureWindowCommand() bool GPU::HandleSetTextureWindowCommand()
{ {
const u32 param = m_fifo.Pop() & 0x00FFFFFFu; const u32 param = FifoPop() & 0x00FFFFFFu;
SetTextureWindow(param); SetTextureWindow(param);
Log_DebugPrintf("Set texture window %02X %02X %02X %02X", m_draw_mode.texture_window_mask_x, Log_DebugPrintf("Set texture window %02X %02X %02X %02X", m_draw_mode.texture_window_mask_x,
m_draw_mode.texture_window_mask_y, m_draw_mode.texture_window_offset_x, m_draw_mode.texture_window_mask_y, m_draw_mode.texture_window_offset_x,
@ -239,7 +245,7 @@ bool GPU::HandleSetTextureWindowCommand()
bool GPU::HandleSetDrawingAreaTopLeftCommand() bool GPU::HandleSetDrawingAreaTopLeftCommand()
{ {
const u32 param = m_fifo.Pop() & 0x00FFFFFFu; const u32 param = FifoPop() & 0x00FFFFFFu;
const u32 left = param & VRAM_WIDTH_MASK; const u32 left = param & VRAM_WIDTH_MASK;
const u32 top = (param >> 10) & VRAM_HEIGHT_MASK; const u32 top = (param >> 10) & VRAM_HEIGHT_MASK;
Log_DebugPrintf("Set drawing area top-left: (%u, %u)", left, top); Log_DebugPrintf("Set drawing area top-left: (%u, %u)", left, top);
@ -259,7 +265,7 @@ bool GPU::HandleSetDrawingAreaTopLeftCommand()
bool GPU::HandleSetDrawingAreaBottomRightCommand() bool GPU::HandleSetDrawingAreaBottomRightCommand()
{ {
const u32 param = m_fifo.Pop() & 0x00FFFFFFu; const u32 param = FifoPop() & 0x00FFFFFFu;
const u32 right = param & VRAM_WIDTH_MASK; const u32 right = param & VRAM_WIDTH_MASK;
const u32 bottom = (param >> 10) & VRAM_HEIGHT_MASK; const u32 bottom = (param >> 10) & VRAM_HEIGHT_MASK;
@ -280,7 +286,7 @@ bool GPU::HandleSetDrawingAreaBottomRightCommand()
bool GPU::HandleSetDrawingOffsetCommand() bool GPU::HandleSetDrawingOffsetCommand()
{ {
const u32 param = m_fifo.Pop() & 0x00FFFFFFu; const u32 param = FifoPop() & 0x00FFFFFFu;
const s32 x = SignExtendN<11, s32>(param & 0x7FFu); const s32 x = SignExtendN<11, s32>(param & 0x7FFu);
const s32 y = SignExtendN<11, s32>((param >> 11) & 0x7FFu); const s32 y = SignExtendN<11, s32>((param >> 11) & 0x7FFu);
Log_DebugPrintf("Set drawing offset (%d, %d)", m_drawing_offset.x, m_drawing_offset.y); Log_DebugPrintf("Set drawing offset (%d, %d)", m_drawing_offset.x, m_drawing_offset.y);
@ -299,7 +305,7 @@ bool GPU::HandleSetDrawingOffsetCommand()
bool GPU::HandleSetMaskBitCommand() bool GPU::HandleSetMaskBitCommand()
{ {
const u32 param = m_fifo.Pop() & 0x00FFFFFFu; const u32 param = FifoPop() & 0x00FFFFFFu;
constexpr u32 gpustat_mask = (1 << 11) | (1 << 12); constexpr u32 gpustat_mask = (1 << 11) | (1 << 12);
const u32 gpustat_bits = (param & 0x03) << 11; const u32 gpustat_bits = (param & 0x03) << 11;
@ -318,7 +324,7 @@ bool GPU::HandleSetMaskBitCommand()
bool GPU::HandleRenderPolygonCommand() bool GPU::HandleRenderPolygonCommand()
{ {
const RenderCommand rc{m_fifo.Peek(0)}; const RenderCommand rc{FifoPeek(0)};
// shaded vertices use the colour from the first word for the first vertex // shaded vertices use the colour from the first word for the first vertex
const u32 words_per_vertex = 1 + BoolToUInt32(rc.texture_enable) + BoolToUInt32(rc.shading_enable); const u32 words_per_vertex = 1 + BoolToUInt32(rc.texture_enable) + BoolToUInt32(rc.shading_enable);
@ -344,10 +350,10 @@ bool GPU::HandleRenderPolygonCommand()
// set draw state up // set draw state up
if (rc.texture_enable) if (rc.texture_enable)
{ {
const u16 texpage_attribute = Truncate16((rc.shading_enable ? m_fifo.Peek(5) : m_fifo.Peek(4)) >> 16); const u16 texpage_attribute = Truncate16((rc.shading_enable ? FifoPeek(5) : FifoPeek(4)) >> 16);
SetDrawMode((texpage_attribute & DrawMode::Reg::POLYGON_TEXPAGE_MASK) | SetDrawMode((texpage_attribute & DrawMode::Reg::POLYGON_TEXPAGE_MASK) |
(m_draw_mode.mode_reg.bits & ~DrawMode::Reg::POLYGON_TEXPAGE_MASK)); (m_draw_mode.mode_reg.bits & ~DrawMode::Reg::POLYGON_TEXPAGE_MASK));
SetTexturePalette(Truncate16(m_fifo.Peek(2) >> 16)); SetTexturePalette(Truncate16(FifoPeek(2) >> 16));
} }
m_stats.num_vertices += num_vertices; m_stats.num_vertices += num_vertices;
@ -362,7 +368,7 @@ bool GPU::HandleRenderPolygonCommand()
bool GPU::HandleRenderRectangleCommand() bool GPU::HandleRenderRectangleCommand()
{ {
const RenderCommand rc{m_fifo.Peek(0)}; const RenderCommand rc{FifoPeek(0)};
const u32 total_words = const u32 total_words =
2 + BoolToUInt32(rc.texture_enable) + BoolToUInt32(rc.rectangle_size == DrawRectangleSize::Variable); 2 + BoolToUInt32(rc.texture_enable) + BoolToUInt32(rc.rectangle_size == DrawRectangleSize::Variable);
@ -372,7 +378,7 @@ bool GPU::HandleRenderRectangleCommand()
SynchronizeCRTC(); SynchronizeCRTC();
if (rc.texture_enable) if (rc.texture_enable)
SetTexturePalette(Truncate16(m_fifo.Peek(2) >> 16)); SetTexturePalette(Truncate16(FifoPeek(2) >> 16));
const TickCount setup_ticks = 16; const TickCount setup_ticks = 16;
AddCommandTicks(setup_ticks); AddCommandTicks(setup_ticks);
@ -394,7 +400,7 @@ bool GPU::HandleRenderRectangleCommand()
bool GPU::HandleRenderLineCommand() bool GPU::HandleRenderLineCommand()
{ {
const RenderCommand rc{m_fifo.Peek(0)}; const RenderCommand rc{FifoPeek(0)};
const u32 total_words = rc.shading_enable ? 4 : 3; const u32 total_words = rc.shading_enable ? 4 : 3;
CHECK_COMMAND_SIZE(total_words); CHECK_COMMAND_SIZE(total_words);
@ -417,7 +423,7 @@ bool GPU::HandleRenderLineCommand()
bool GPU::HandleRenderPolyLineCommand() bool GPU::HandleRenderPolyLineCommand()
{ {
// always read the first two vertices, we test for the terminator after that // always read the first two vertices, we test for the terminator after that
const RenderCommand rc{m_fifo.Peek(0)}; const RenderCommand rc{FifoPeek(0)};
const u32 min_words = rc.shading_enable ? 3 : 4; const u32 min_words = rc.shading_enable ? 3 : 4;
CHECK_COMMAND_SIZE(min_words); CHECK_COMMAND_SIZE(min_words);
@ -434,8 +440,11 @@ bool GPU::HandleRenderPolyLineCommand()
m_fifo.RemoveOne(); m_fifo.RemoveOne();
const u32 words_to_pop = min_words - 1; const u32 words_to_pop = min_words - 1;
m_blit_buffer.resize(words_to_pop); // m_blit_buffer.resize(words_to_pop);
m_fifo.PopRange(m_blit_buffer.data(), words_to_pop); // FifoPopRange(m_blit_buffer.data(), words_to_pop);
m_blit_buffer.reserve(words_to_pop);
for (u32 i = 0; i < words_to_pop; i++)
m_blit_buffer.push_back(Truncate32(FifoPop()));
// polyline goes via a different path through the blit buffer // polyline goes via a different path through the blit buffer
m_blitter_state = BlitterState::DrawingPolyLine; m_blitter_state = BlitterState::DrawingPolyLine;
@ -452,11 +461,11 @@ bool GPU::HandleFillRectangleCommand()
FlushRender(); FlushRender();
const u32 color = m_fifo.Pop() & 0x00FFFFFF; const u32 color = FifoPop() & 0x00FFFFFF;
const u32 dst_x = m_fifo.Peek() & 0x3F0; const u32 dst_x = FifoPeek() & 0x3F0;
const u32 dst_y = (m_fifo.Pop() >> 16) & VRAM_COORD_MASK; const u32 dst_y = (FifoPop() >> 16) & VRAM_COORD_MASK;
const u32 width = ((m_fifo.Peek() & VRAM_WIDTH_MASK) + 0xF) & ~0xF; const u32 width = ((FifoPeek() & VRAM_WIDTH_MASK) + 0xF) & ~0xF;
const u32 height = (m_fifo.Pop() >> 16) & VRAM_HEIGHT_MASK; const u32 height = (FifoPop() >> 16) & VRAM_HEIGHT_MASK;
Log_DebugPrintf("Fill VRAM rectangle offset=(%u,%u), size=(%u,%u)", dst_x, dst_y, width, height); Log_DebugPrintf("Fill VRAM rectangle offset=(%u,%u), size=(%u,%u)", dst_x, dst_y, width, height);
@ -472,10 +481,10 @@ bool GPU::HandleCopyRectangleCPUToVRAMCommand()
CHECK_COMMAND_SIZE(3); CHECK_COMMAND_SIZE(3);
m_fifo.RemoveOne(); m_fifo.RemoveOne();
const u32 dst_x = m_fifo.Peek() & VRAM_COORD_MASK; const u32 dst_x = FifoPeek() & VRAM_COORD_MASK;
const u32 dst_y = (m_fifo.Pop() >> 16) & VRAM_COORD_MASK; const u32 dst_y = (FifoPop() >> 16) & VRAM_COORD_MASK;
const u32 copy_width = ReplaceZero(m_fifo.Peek() & VRAM_WIDTH_MASK, 0x400); const u32 copy_width = ReplaceZero(FifoPeek() & VRAM_WIDTH_MASK, 0x400);
const u32 copy_height = ReplaceZero((m_fifo.Pop() >> 16) & VRAM_HEIGHT_MASK, 0x200); const u32 copy_height = ReplaceZero((FifoPop() >> 16) & VRAM_HEIGHT_MASK, 0x200);
const u32 num_pixels = copy_width * copy_height; const u32 num_pixels = copy_width * copy_height;
const u32 num_words = ((num_pixels + 1) / 2); const u32 num_words = ((num_pixels + 1) / 2);
@ -520,10 +529,10 @@ bool GPU::HandleCopyRectangleVRAMToCPUCommand()
CHECK_COMMAND_SIZE(3); CHECK_COMMAND_SIZE(3);
m_fifo.RemoveOne(); m_fifo.RemoveOne();
m_vram_transfer.x = Truncate16(m_fifo.Peek() & VRAM_COORD_MASK); m_vram_transfer.x = Truncate16(FifoPeek() & VRAM_COORD_MASK);
m_vram_transfer.y = Truncate16((m_fifo.Pop() >> 16) & VRAM_COORD_MASK); m_vram_transfer.y = Truncate16((FifoPop() >> 16) & VRAM_COORD_MASK);
m_vram_transfer.width = ((Truncate16(m_fifo.Peek()) - 1) & VRAM_WIDTH_MASK) + 1; m_vram_transfer.width = ((Truncate16(FifoPeek()) - 1) & VRAM_WIDTH_MASK) + 1;
m_vram_transfer.height = ((Truncate16(m_fifo.Pop() >> 16) - 1) & VRAM_HEIGHT_MASK) + 1; m_vram_transfer.height = ((Truncate16(FifoPop() >> 16) - 1) & VRAM_HEIGHT_MASK) + 1;
Log_DebugPrintf("Copy rectangle from VRAM to CPU offset=(%u,%u), size=(%u,%u)", m_vram_transfer.x, m_vram_transfer.y, Log_DebugPrintf("Copy rectangle from VRAM to CPU offset=(%u,%u), size=(%u,%u)", m_vram_transfer.x, m_vram_transfer.y,
m_vram_transfer.width, m_vram_transfer.height); m_vram_transfer.width, m_vram_transfer.height);
@ -554,12 +563,12 @@ bool GPU::HandleCopyRectangleVRAMToVRAMCommand()
CHECK_COMMAND_SIZE(4); CHECK_COMMAND_SIZE(4);
m_fifo.RemoveOne(); m_fifo.RemoveOne();
const u32 src_x = m_fifo.Peek() & VRAM_COORD_MASK; const u32 src_x = FifoPeek() & VRAM_COORD_MASK;
const u32 src_y = (m_fifo.Pop() >> 16) & VRAM_COORD_MASK; const u32 src_y = (FifoPop() >> 16) & VRAM_COORD_MASK;
const u32 dst_x = m_fifo.Peek() & VRAM_COORD_MASK; const u32 dst_x = FifoPeek() & VRAM_COORD_MASK;
const u32 dst_y = (m_fifo.Pop() >> 16) & VRAM_COORD_MASK; const u32 dst_y = (FifoPop() >> 16) & VRAM_COORD_MASK;
const u32 width = ReplaceZero(m_fifo.Peek() & VRAM_WIDTH_MASK, 0x400); const u32 width = ReplaceZero(FifoPeek() & VRAM_WIDTH_MASK, 0x400);
const u32 height = ReplaceZero((m_fifo.Pop() >> 16) & VRAM_HEIGHT_MASK, 0x200); const u32 height = ReplaceZero((FifoPop() >> 16) & VRAM_HEIGHT_MASK, 0x200);
Log_DebugPrintf("Copy rectangle from VRAM to VRAM src=(%u,%u), dst=(%u,%u), size=(%u,%u)", src_x, src_y, dst_x, dst_y, Log_DebugPrintf("Copy rectangle from VRAM to VRAM src=(%u,%u), dst=(%u,%u), size=(%u,%u)", src_x, src_y, dst_x, dst_y,
width, height); width, height);

View file

@ -2,17 +2,25 @@
#include "common/assert.h" #include "common/assert.h"
#include "common/log.h" #include "common/log.h"
#include "common/state_wrapper.h" #include "common/state_wrapper.h"
#include "cpu_core.h"
#include "pgxp.h"
#include "settings.h" #include "settings.h"
#include "system.h" #include "system.h"
#include <imgui.h> #include <imgui.h>
#include <sstream> #include <sstream>
Log_SetChannel(GPU_HW); Log_SetChannel(GPU_HW);
GPU_HW::GPU_HW() : GPU() { m_vram_ptr = m_vram_shadow.data(); } GPU_HW::GPU_HW() : GPU()
{
m_vram_ptr = m_vram_shadow.data();
}
GPU_HW::~GPU_HW() = default; GPU_HW::~GPU_HW() = default;
bool GPU_HW::IsHardwareRenderer() const { return true; } bool GPU_HW::IsHardwareRenderer() const
{
return true;
}
bool GPU_HW::Initialize(HostDisplay* host_display) bool GPU_HW::Initialize(HostDisplay* host_display)
{ {
@ -110,35 +118,39 @@ void GPU_HW::HandleFlippedQuadTextureCoordinates(BatchVertex* vertices)
// It might be faster to do more direct checking here, but the code below handles primitives in any order and // It might be faster to do more direct checking here, but the code below handles primitives in any order and
// orientation, and is far more SIMD-friendly if needed. // orientation, and is far more SIMD-friendly if needed.
const s32 abx = vertices[1].x - vertices[0].x; const float abx = vertices[1].x - vertices[0].x;
const s32 aby = vertices[1].y - vertices[0].y; const float aby = vertices[1].y - vertices[0].y;
const s32 bcx = vertices[2].x - vertices[1].x; const float bcx = vertices[2].x - vertices[1].x;
const s32 bcy = vertices[2].y - vertices[1].y; const float bcy = vertices[2].y - vertices[1].y;
const s32 cax = vertices[0].x - vertices[2].x; const float cax = vertices[0].x - vertices[2].x;
const s32 cay = vertices[0].y - vertices[2].y; const float cay = vertices[0].y - vertices[2].y;
// Compute static derivatives, just assume W is uniform across the primitive and that the plane equation remains the // Compute static derivatives, just assume W is uniform across the primitive and that the plane equation remains the
// same across the quad. (which it is, there is no Z.. yet). // same across the quad. (which it is, there is no Z.. yet).
const s32 dudx = -aby * vertices[2].u - bcy * vertices[0].u - cay * vertices[1].u; const float dudx = -aby * static_cast<float>(vertices[2].u) - bcy * static_cast<float>(vertices[0].u) -
const s32 dvdx = -aby * vertices[2].v - bcy * vertices[0].v - cay * vertices[1].v; cay * static_cast<float>(vertices[1].u);
const s32 dudy = +abx * vertices[2].u + bcx * vertices[0].u + cax * vertices[1].u; const float dvdx = -aby * static_cast<float>(vertices[2].v) - bcy * static_cast<float>(vertices[0].v) -
const s32 dvdy = +abx * vertices[2].v + bcx * vertices[0].v + cax * vertices[1].v; cay * static_cast<float>(vertices[1].v);
const s32 area = bcx * cay - bcy * cax; const float dudy = +abx * static_cast<float>(vertices[2].u) + bcx * static_cast<float>(vertices[0].u) +
cax * static_cast<float>(vertices[1].u);
const float dvdy = +abx * static_cast<float>(vertices[2].v) + bcx * static_cast<float>(vertices[0].v) +
cax * static_cast<float>(vertices[1].v);
const float area = bcx * cay - bcy * cax;
// Detect and reject any triangles with 0 size texture area // Detect and reject any triangles with 0 size texture area
const s32 texArea = (vertices[1].u - vertices[0].u) * (vertices[2].v - vertices[0].v) - const s32 texArea = (vertices[1].u - vertices[0].u) * (vertices[2].v - vertices[0].v) -
(vertices[2].u - vertices[0].u) * (vertices[1].v - vertices[0].v); (vertices[2].u - vertices[0].u) * (vertices[1].v - vertices[0].v);
// Shouldn't matter as degenerate primitives will be culled anyways. // Shouldn't matter as degenerate primitives will be culled anyways.
if (area == 0 && texArea == 0) if (area == 0.0f && texArea == 0)
return; return;
// Use floats here as it'll be faster than integer divides. // Use floats here as it'll be faster than integer divides.
const float rcp_area = 1.0f / static_cast<float>(area); const float rcp_area = 1.0f / area;
const float dudx_area = static_cast<float>(dudx) * rcp_area; const float dudx_area = dudx * rcp_area;
const float dudy_area = static_cast<float>(dudy) * rcp_area; const float dudy_area = dudy * rcp_area;
const float dvdx_area = static_cast<float>(dvdx) * rcp_area; const float dvdx_area = dvdx * rcp_area;
const float dvdy_area = static_cast<float>(dvdy) * rcp_area; const float dvdy_area = dvdy * rcp_area;
const bool neg_dudx = dudx_area < 0.0f; const bool neg_dudx = dudx_area < 0.0f;
const bool neg_dudy = dudy_area < 0.0f; const bool neg_dudy = dudy_area < 0.0f;
const bool neg_dvdx = dvdx_area < 0.0f; const bool neg_dvdx = dvdx_area < 0.0f;
@ -179,22 +191,22 @@ void GPU_HW::HandleFlippedQuadTextureCoordinates(BatchVertex* vertices)
// The PlayStation GPU draws lines from start to end, inclusive. Or, more specifically, inclusive of the greatest delta // The PlayStation GPU draws lines from start to end, inclusive. Or, more specifically, inclusive of the greatest delta
// in the x or y direction. // in the x or y direction.
void GPU_HW::FixLineVertexCoordinates(BatchVertex& start, BatchVertex& end, s32 dx, s32 dy) void GPU_HW::FixLineVertexCoordinates(s32& start_x, s32& start_y, s32& end_x, s32& end_y, s32 dx, s32 dy)
{ {
// deliberately not else if to catch the equal case // deliberately not else if to catch the equal case
if (dx >= dy) if (dx >= dy)
{ {
if (start.x > end.x) if (start_x > end_x)
start.x++; start_x++;
else else
end.x++; end_x++;
} }
if (dx <= dy) if (dx <= dy)
{ {
if (start.y > end.y) if (start_y > end_y)
start.y++; start_y++;
else else
end.y++; end_y++;
} }
} }
@ -202,6 +214,7 @@ void GPU_HW::LoadVertices()
{ {
const RenderCommand rc{m_render_command.bits}; const RenderCommand rc{m_render_command.bits};
const u32 texpage = ZeroExtend32(m_draw_mode.mode_reg.bits) | (ZeroExtend32(m_draw_mode.palette_reg) << 16); const u32 texpage = ZeroExtend32(m_draw_mode.mode_reg.bits) | (ZeroExtend32(m_draw_mode.palette_reg) << 16);
const float depth = GetCurrentNormalizedVertexDepth();
if (m_GPUSTAT.check_mask_before_draw) if (m_GPUSTAT.check_mask_before_draw)
m_current_depth++; m_current_depth++;
@ -215,17 +228,36 @@ void GPU_HW::LoadVertices()
const u32 first_color = rc.color_for_first_vertex; const u32 first_color = rc.color_for_first_vertex;
const bool shaded = rc.shading_enable; const bool shaded = rc.shading_enable;
const bool textured = rc.texture_enable; const bool textured = rc.texture_enable;
const bool pgxp = g_settings.gpu_pgxp_enable;
const u32 num_vertices = rc.quad_polygon ? 4 : 3; const u32 num_vertices = rc.quad_polygon ? 4 : 3;
std::array<BatchVertex, 4> vertices; std::array<BatchVertex, 4> vertices;
std::array<std::array<s32, 2>, 4> native_vertex_positions;
bool valid_w = g_settings.gpu_pgxp_texture_correction;
for (u32 i = 0; i < num_vertices; i++) for (u32 i = 0; i < num_vertices; i++)
{ {
const u32 color = (shaded && i > 0) ? (m_fifo.Pop() & UINT32_C(0x00FFFFFF)) : first_color; const u32 color = (shaded && i > 0) ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color;
const VertexPosition vp{m_fifo.Pop()}; const u64 maddr_and_pos = m_fifo.Pop();
const u16 packed_texcoord = textured ? Truncate16(m_fifo.Pop()) : 0; const VertexPosition vp{Truncate32(maddr_and_pos)};
const u16 texcoord = textured ? Truncate16(FifoPop()) : 0;
const s32 native_x = m_drawing_offset.x + vp.x;
const s32 native_y = m_drawing_offset.y + vp.y;
native_vertex_positions[i][0] = native_x;
native_vertex_positions[i][1] = native_y;
vertices[i].Set(static_cast<float>(native_x), static_cast<float>(native_y), depth, 1.0f, color, texpage,
texcoord);
vertices[i].Set(m_drawing_offset.x + vp.x, m_drawing_offset.y + vp.y, m_current_depth, color, texpage, if (pgxp)
packed_texcoord); {
valid_w &=
PGXP::GetPreciseVertex(Truncate32(maddr_and_pos >> 32), vp.bits, native_x, native_y, m_drawing_offset.x,
m_drawing_offset.y, &vertices[i].x, &vertices[i].y, &vertices[i].w);
}
}
if (!valid_w)
{
for (BatchVertex& v : vertices)
v.w = 1.0f;
} }
if (rc.quad_polygon && m_resolution_scale > 1) if (rc.quad_polygon && m_resolution_scale > 1)
@ -235,19 +267,20 @@ void GPU_HW::LoadVertices()
return; return;
// Cull polygons which are too large. // Cull polygons which are too large.
const s32 min_x_12 = std::min(vertices[1].x, vertices[2].x); const s32 min_x_12 = std::min(native_vertex_positions[1][0], native_vertex_positions[2][0]);
const s32 max_x_12 = std::max(vertices[1].x, vertices[2].x); const s32 max_x_12 = std::max(native_vertex_positions[1][0], native_vertex_positions[2][0]);
const s32 min_y_12 = std::min(vertices[1].y, vertices[2].y); const s32 min_y_12 = std::min(native_vertex_positions[1][1], native_vertex_positions[2][1]);
const s32 max_y_12 = std::max(vertices[1].y, vertices[2].y); const s32 max_y_12 = std::max(native_vertex_positions[1][1], native_vertex_positions[2][1]);
const s32 min_x = std::min(min_x_12, vertices[0].x); const s32 min_x = std::min(min_x_12, native_vertex_positions[0][0]);
const s32 max_x = std::max(max_x_12, vertices[0].x); const s32 max_x = std::max(max_x_12, native_vertex_positions[0][0]);
const s32 min_y = std::min(min_y_12, vertices[0].y); const s32 min_y = std::min(min_y_12, native_vertex_positions[0][1]);
const s32 max_y = std::max(max_y_12, vertices[0].y); const s32 max_y = std::max(max_y_12, native_vertex_positions[0][1]);
if ((max_x - min_x) >= MAX_PRIMITIVE_WIDTH || (max_y - min_y) >= MAX_PRIMITIVE_HEIGHT) if ((max_x - min_x) >= MAX_PRIMITIVE_WIDTH || (max_y - min_y) >= MAX_PRIMITIVE_HEIGHT)
{ {
Log_DebugPrintf("Culling too-large polygon: %d,%d %d,%d %d,%d", vertices[0].x, vertices[0].y, vertices[1].x, Log_DebugPrintf("Culling too-large polygon: %d,%d %d,%d %d,%d", native_vertex_positions[0][0],
vertices[1].y, vertices[2].x, vertices[2].y); native_vertex_positions[0][1], native_vertex_positions[1][0], native_vertex_positions[1][1],
native_vertex_positions[2][0], native_vertex_positions[2][1]);
} }
else else
{ {
@ -268,16 +301,17 @@ void GPU_HW::LoadVertices()
// quads // quads
if (rc.quad_polygon) if (rc.quad_polygon)
{ {
const s32 min_x_123 = std::min(min_x_12, vertices[3].x); const s32 min_x_123 = std::min(min_x_12, native_vertex_positions[3][0]);
const s32 max_x_123 = std::max(max_x_12, vertices[3].x); const s32 max_x_123 = std::max(max_x_12, native_vertex_positions[3][0]);
const s32 min_y_123 = std::min(min_y_12, vertices[3].y); const s32 min_y_123 = std::min(min_y_12, native_vertex_positions[3][1]);
const s32 max_y_123 = std::max(max_y_12, vertices[3].y); const s32 max_y_123 = std::max(max_y_12, native_vertex_positions[3][1]);
// Cull polygons which are too large. // Cull polygons which are too large.
if ((max_x_123 - min_x_123) >= MAX_PRIMITIVE_WIDTH || (max_y_123 - min_y_123) >= MAX_PRIMITIVE_HEIGHT) if ((max_x_123 - min_x_123) >= MAX_PRIMITIVE_WIDTH || (max_y_123 - min_y_123) >= MAX_PRIMITIVE_HEIGHT)
{ {
Log_DebugPrintf("Culling too-large polygon (quad second half): %d,%d %d,%d %d,%d", vertices[2].x, Log_DebugPrintf("Culling too-large polygon (quad second half): %d,%d %d,%d %d,%d",
vertices[2].y, vertices[1].x, vertices[1].y, vertices[0].x, vertices[0].y); native_vertex_positions[2][0], native_vertex_positions[2][1], native_vertex_positions[1][0],
native_vertex_positions[1][1], native_vertex_positions[0][0], native_vertex_positions[0][1]);
} }
else else
{ {
@ -303,11 +337,11 @@ void GPU_HW::LoadVertices()
case Primitive::Rectangle: case Primitive::Rectangle:
{ {
const u32 color = rc.color_for_first_vertex; const u32 color = rc.color_for_first_vertex;
const VertexPosition vp{m_fifo.Pop()}; const VertexPosition vp{FifoPop()};
const s32 pos_x = TruncateVertexPosition(m_drawing_offset.x + vp.x); const s32 pos_x = TruncateVertexPosition(m_drawing_offset.x + vp.x);
const s32 pos_y = TruncateVertexPosition(m_drawing_offset.y + vp.y); const s32 pos_y = TruncateVertexPosition(m_drawing_offset.y + vp.y);
const auto [texcoord_x, texcoord_y] = UnpackTexcoord(rc.texture_enable ? Truncate16(m_fifo.Pop()) : 0); const auto [texcoord_x, texcoord_y] = UnpackTexcoord(rc.texture_enable ? Truncate16(FifoPop()) : 0);
u16 orig_tex_left = ZeroExtend16(texcoord_x); u16 orig_tex_left = ZeroExtend16(texcoord_x);
u16 orig_tex_top = ZeroExtend16(texcoord_y); u16 orig_tex_top = ZeroExtend16(texcoord_y);
s32 rectangle_width; s32 rectangle_width;
@ -328,7 +362,7 @@ void GPU_HW::LoadVertices()
break; break;
default: default:
{ {
const u32 width_and_height = m_fifo.Pop(); const u32 width_and_height = FifoPop();
rectangle_width = static_cast<s32>(width_and_height & VRAM_WIDTH_MASK); rectangle_width = static_cast<s32>(width_and_height & VRAM_WIDTH_MASK);
rectangle_height = static_cast<s32>((width_and_height >> 16) & VRAM_HEIGHT_MASK); rectangle_height = static_cast<s32>((width_and_height >> 16) & VRAM_HEIGHT_MASK);
@ -353,25 +387,25 @@ void GPU_HW::LoadVertices()
for (s32 y_offset = 0; y_offset < rectangle_height;) for (s32 y_offset = 0; y_offset < rectangle_height;)
{ {
const s32 quad_height = std::min<s32>(rectangle_height - y_offset, TEXTURE_PAGE_WIDTH - tex_top); const s32 quad_height = std::min<s32>(rectangle_height - y_offset, TEXTURE_PAGE_WIDTH - tex_top);
const s32 quad_start_y = pos_y + y_offset; const float quad_start_y = static_cast<float>(pos_y + y_offset);
const s32 quad_end_y = quad_start_y + quad_height; const float quad_end_y = quad_start_y + static_cast<float>(quad_height);
const u16 tex_bottom = tex_top + static_cast<u16>(quad_height); const u16 tex_bottom = tex_top + static_cast<u16>(quad_height);
u16 tex_left = orig_tex_left; u16 tex_left = orig_tex_left;
for (s32 x_offset = 0; x_offset < rectangle_width;) for (s32 x_offset = 0; x_offset < rectangle_width;)
{ {
const s32 quad_width = std::min<s32>(rectangle_width - x_offset, TEXTURE_PAGE_HEIGHT - tex_left); const s32 quad_width = std::min<s32>(rectangle_width - x_offset, TEXTURE_PAGE_HEIGHT - tex_left);
const s32 quad_start_x = pos_x + x_offset; const float quad_start_x = static_cast<float>(pos_x + x_offset);
const s32 quad_end_x = quad_start_x + quad_width; const float quad_end_x = quad_start_x + static_cast<float>(quad_width);
const u16 tex_right = tex_left + static_cast<u16>(quad_width); const u16 tex_right = tex_left + static_cast<u16>(quad_width);
AddNewVertex(quad_start_x, quad_start_y, m_current_depth, color, texpage, tex_left, tex_top); AddNewVertex(quad_start_x, quad_start_y, depth, 1.0f, color, texpage, tex_left, tex_top);
AddNewVertex(quad_end_x, quad_start_y, m_current_depth, color, texpage, tex_right, tex_top); AddNewVertex(quad_end_x, quad_start_y, depth, 1.0f, color, texpage, tex_right, tex_top);
AddNewVertex(quad_start_x, quad_end_y, m_current_depth, color, texpage, tex_left, tex_bottom); AddNewVertex(quad_start_x, quad_end_y, depth, 1.0f, color, texpage, tex_left, tex_bottom);
AddNewVertex(quad_start_x, quad_end_y, m_current_depth, color, texpage, tex_left, tex_bottom); AddNewVertex(quad_start_x, quad_end_y, depth, 1.0f, color, texpage, tex_left, tex_bottom);
AddNewVertex(quad_end_x, quad_start_y, m_current_depth, color, texpage, tex_right, tex_top); AddNewVertex(quad_end_x, quad_start_y, depth, 1.0f, color, texpage, tex_right, tex_top);
AddNewVertex(quad_end_x, quad_end_y, m_current_depth, color, texpage, tex_right, tex_bottom); AddNewVertex(quad_end_x, quad_end_y, depth, 1.0f, color, texpage, tex_right, tex_bottom);
x_offset += quad_width; x_offset += quad_width;
tex_left = 0; tex_left = 0;
@ -404,41 +438,41 @@ void GPU_HW::LoadVertices()
if (rc.shading_enable) if (rc.shading_enable)
{ {
color0 = rc.color_for_first_vertex; color0 = rc.color_for_first_vertex;
pos0.bits = m_fifo.Pop(); pos0.bits = FifoPop();
color1 = m_fifo.Pop() & UINT32_C(0x00FFFFFF); color1 = FifoPop() & UINT32_C(0x00FFFFFF);
pos1.bits = m_fifo.Pop(); pos1.bits = FifoPop();
} }
else else
{ {
color0 = color1 = rc.color_for_first_vertex; color0 = color1 = rc.color_for_first_vertex;
pos0.bits = m_fifo.Pop(); pos0.bits = FifoPop();
pos1.bits = m_fifo.Pop(); pos1.bits = FifoPop();
} }
if (!IsDrawingAreaIsValid()) if (!IsDrawingAreaIsValid())
return; return;
BatchVertex start, end; s32 start_x = pos0.x + m_drawing_offset.x;
start.Set(m_drawing_offset.x + pos0.x, m_drawing_offset.y + pos0.y, m_current_depth, color0, 0, 0); s32 start_y = pos0.y + m_drawing_offset.y;
end.Set(m_drawing_offset.x + pos1.x, m_drawing_offset.y + pos1.y, m_current_depth, color1, 0, 0); s32 end_x = pos1.x + m_drawing_offset.x;
s32 end_y = pos1.y + m_drawing_offset.y;
const s32 min_x = std::min(start.x, end.x); const s32 min_x = std::min(start_x, end_x);
const s32 max_x = std::max(start.x, end.x); const s32 max_x = std::max(start_x, end_x);
const s32 min_y = std::min(start.y, end.y); const s32 min_y = std::min(start_y, end_y);
const s32 max_y = std::max(start.y, end.y); const s32 max_y = std::max(start_y, end_y);
const s32 dx = max_x - min_x; const s32 dx = max_x - min_x;
const s32 dy = max_y - min_y; const s32 dy = max_y - min_y;
if (dx >= MAX_PRIMITIVE_WIDTH || dy >= MAX_PRIMITIVE_HEIGHT) if (dx >= MAX_PRIMITIVE_WIDTH || dy >= MAX_PRIMITIVE_HEIGHT)
{ {
Log_DebugPrintf("Culling too-large line: %d,%d - %d,%d", start.x, start.y, end.x, end.y); Log_DebugPrintf("Culling too-large line: %d,%d - %d,%d", start_x, start_y, end_x, end_y);
return; return;
} }
FixLineVertexCoordinates(start, end, dx, dy); FixLineVertexCoordinates(start_x, start_y, end_x, end_y, dx, dy);
AddNewVertex(static_cast<float>(start_x), static_cast<float>(start_y), depth, 1.0f, color0, 0,
AddVertex(start); static_cast<u16>(0));
AddVertex(end); AddNewVertex(static_cast<float>(end_x), static_cast<float>(end_y), depth, 1.0f, color1, 0, static_cast<u16>(0));
const u32 clip_left = static_cast<u32>(std::clamp<s32>(min_x, m_drawing_area.left, m_drawing_area.left)); const u32 clip_left = static_cast<u32>(std::clamp<s32>(min_x, m_drawing_area.left, m_drawing_area.left));
const u32 clip_right = static_cast<u32>(std::clamp<s32>(max_x, m_drawing_area.left, m_drawing_area.right)) + 1u; const u32 clip_right = static_cast<u32>(std::clamp<s32>(max_x, m_drawing_area.left, m_drawing_area.right)) + 1u;
@ -461,37 +495,38 @@ void GPU_HW::LoadVertices()
const u32 first_color = rc.color_for_first_vertex; const u32 first_color = rc.color_for_first_vertex;
const bool shaded = rc.shading_enable; const bool shaded = rc.shading_enable;
BatchVertex last_vertex; s32 last_x, last_y;
u32 last_color;
u32 buffer_pos = 0; u32 buffer_pos = 0;
for (u32 i = 0; i < num_vertices; i++) for (u32 i = 0; i < num_vertices; i++)
{ {
const u32 color = (shaded && i > 0) ? (m_blit_buffer[buffer_pos++] & UINT32_C(0x00FFFFFF)) : first_color; const u32 color = (shaded && i > 0) ? (m_blit_buffer[buffer_pos++] & UINT32_C(0x00FFFFFF)) : first_color;
const VertexPosition vp{m_blit_buffer[buffer_pos++]}; const VertexPosition vp{m_blit_buffer[buffer_pos++]};
const s32 x = m_drawing_offset.x + vp.x;
BatchVertex vertex; const s32 y = m_drawing_offset.y + vp.y;
vertex.Set(m_drawing_offset.x + vp.x, m_drawing_offset.y + vp.y, m_current_depth, color, 0, 0);
if (i > 0) if (i > 0)
{ {
const s32 min_x = std::min(last_vertex.x, vertex.x); const s32 min_x = std::min(last_x, x);
const s32 max_x = std::max(last_vertex.x, vertex.x); const s32 max_x = std::max(last_x, x);
const s32 min_y = std::min(last_vertex.y, vertex.y); const s32 min_y = std::min(last_y, y);
const s32 max_y = std::max(last_vertex.y, vertex.y); const s32 max_y = std::max(last_y, y);
const s32 dx = max_x - min_x; const s32 dx = max_x - min_x;
const s32 dy = max_y - min_y; const s32 dy = max_y - min_y;
if (dx >= MAX_PRIMITIVE_WIDTH || dy >= MAX_PRIMITIVE_HEIGHT) if (dx >= MAX_PRIMITIVE_WIDTH || dy >= MAX_PRIMITIVE_HEIGHT)
{ {
Log_DebugPrintf("Culling too-large line: %d,%d - %d,%d", last_vertex.x, last_vertex.y, vertex.x, Log_DebugPrintf("Culling too-large line: %d,%d - %d,%d", last_x, last_y, x, y);
vertex.y);
} }
else else
{ {
BatchVertex start(last_vertex); s32 start_x = last_x, start_y = last_y;
BatchVertex end(vertex); s32 end_x = x, end_y = y;
FixLineVertexCoordinates(start, end, dx, dy); FixLineVertexCoordinates(start_x, start_y, end_x, end_y, dx, dy);
AddVertex(start); AddNewVertex(static_cast<float>(start_x), static_cast<float>(start_y), depth, 1.0f, last_color, 0,
AddVertex(end); static_cast<u16>(0));
AddNewVertex(static_cast<float>(end_x), static_cast<float>(end_y), depth, 1.0f, color, 0,
static_cast<u16>(0));
const u32 clip_left = static_cast<u32>(std::clamp<s32>(min_x, m_drawing_area.left, m_drawing_area.left)); const u32 clip_left = static_cast<u32>(std::clamp<s32>(min_x, m_drawing_area.left, m_drawing_area.left));
const u32 clip_right = const u32 clip_right =
@ -505,7 +540,9 @@ void GPU_HW::LoadVertices()
} }
} }
std::memcpy(&last_vertex, &vertex, sizeof(BatchVertex)); last_x = x;
last_y = y;
last_color = color;
} }
} }
} }

View file

@ -55,24 +55,26 @@ protected:
struct BatchVertex struct BatchVertex
{ {
s32 x; float x;
s32 y; float y;
s32 z; float z;
float w;
u32 color; u32 color;
u32 texpage; u32 texpage;
u16 u; // 16-bit texcoords are needed for 256 extent rectangles u16 u; // 16-bit texcoords are needed for 256 extent rectangles
u16 v; u16 v;
ALWAYS_INLINE void Set(s32 x_, s32 y_, s32 z_, u32 color_, u32 texpage_, u16 packed_texcoord) ALWAYS_INLINE void Set(float x_, float y_, float z_, float w_, u32 color_, u32 texpage_, u16 packed_texcoord)
{ {
Set(x_, y_, z_, color_, texpage_, packed_texcoord & 0xFF, (packed_texcoord >> 8)); Set(x_, y_, z_, w_, color_, texpage_, packed_texcoord & 0xFF, (packed_texcoord >> 8));
} }
ALWAYS_INLINE void Set(s32 x_, s32 y_, s32 z_, u32 color_, u32 texpage_, u16 u_, u16 v_) ALWAYS_INLINE void Set(float x_, float y_, float z_, float w_, u32 color_, u32 texpage_, u16 u_, u16 v_)
{ {
x = x_; x = x_;
y = y_; y = y_;
z = z_; z = z_;
w = w_;
color = color_; color = color_;
texpage = texpage_; texpage = texpage_;
u = u_; u = u_;
@ -191,7 +193,7 @@ protected:
/// Returns the value to be written to the depth buffer for the current operation for mask bit emulation. /// Returns the value to be written to the depth buffer for the current operation for mask bit emulation.
ALWAYS_INLINE float GetCurrentNormalizedVertexDepth() const ALWAYS_INLINE float GetCurrentNormalizedVertexDepth() const
{ {
return (static_cast<float>(m_current_depth) / 65535.0f); return 1.0f - (static_cast<float>(m_current_depth) / 65535.0f);
} }
/// Returns the interlaced mode to use when scanning out/displaying. /// Returns the interlaced mode to use when scanning out/displaying.
@ -234,7 +236,7 @@ protected:
/// Handles quads with flipped texture coordinate directions. /// Handles quads with flipped texture coordinate directions.
static void HandleFlippedQuadTextureCoordinates(BatchVertex* vertices); static void HandleFlippedQuadTextureCoordinates(BatchVertex* vertices);
static void FixLineVertexCoordinates(BatchVertex& start, BatchVertex& end, s32 dx, s32 dy); static void FixLineVertexCoordinates(s32& start_x, s32& start_y, s32& end_x, s32& end_y, s32 dx, s32 dy);
HeapArray<u16, VRAM_WIDTH * VRAM_HEIGHT> m_vram_shadow; HeapArray<u16, VRAM_WIDTH * VRAM_HEIGHT> m_vram_shadow;

View file

@ -263,7 +263,7 @@ bool GPU_HW_D3D11::CreateTextureBuffer()
bool GPU_HW_D3D11::CreateBatchInputLayout() bool GPU_HW_D3D11::CreateBatchInputLayout()
{ {
static constexpr std::array<D3D11_INPUT_ELEMENT_DESC, 4> attributes = { static constexpr std::array<D3D11_INPUT_ELEMENT_DESC, 4> attributes = {
{{"ATTR", 0, DXGI_FORMAT_R32G32B32_SINT, 0, offsetof(BatchVertex, x), D3D11_INPUT_PER_VERTEX_DATA, 0}, {{"ATTR", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, offsetof(BatchVertex, x), D3D11_INPUT_PER_VERTEX_DATA, 0},
{"ATTR", 1, DXGI_FORMAT_R8G8B8A8_UNORM, 0, offsetof(BatchVertex, color), D3D11_INPUT_PER_VERTEX_DATA, 0}, {"ATTR", 1, DXGI_FORMAT_R8G8B8A8_UNORM, 0, offsetof(BatchVertex, color), D3D11_INPUT_PER_VERTEX_DATA, 0},
{"ATTR", 2, DXGI_FORMAT_R32_UINT, 0, offsetof(BatchVertex, u), D3D11_INPUT_PER_VERTEX_DATA, 0}, {"ATTR", 2, DXGI_FORMAT_R32_UINT, 0, offsetof(BatchVertex, u), D3D11_INPUT_PER_VERTEX_DATA, 0},
{"ATTR", 3, DXGI_FORMAT_R32_UINT, 0, offsetof(BatchVertex, texpage), D3D11_INPUT_PER_VERTEX_DATA, 0}}}; {"ATTR", 3, DXGI_FORMAT_R32_UINT, 0, offsetof(BatchVertex, texpage), D3D11_INPUT_PER_VERTEX_DATA, 0}}};

View file

@ -291,7 +291,7 @@ bool GPU_HW_OpenGL::CreateVertexBuffer()
glEnableVertexAttribArray(1); glEnableVertexAttribArray(1);
glEnableVertexAttribArray(2); glEnableVertexAttribArray(2);
glEnableVertexAttribArray(3); glEnableVertexAttribArray(3);
glVertexAttribIPointer(0, 3, GL_INT, sizeof(BatchVertex), reinterpret_cast<void*>(offsetof(BatchVertex, x))); glVertexAttribPointer(0, 4, GL_FLOAT, false, sizeof(BatchVertex), reinterpret_cast<void*>(offsetof(BatchVertex, x)));
glVertexAttribPointer(1, 4, GL_UNSIGNED_BYTE, true, sizeof(BatchVertex), glVertexAttribPointer(1, 4, GL_UNSIGNED_BYTE, true, sizeof(BatchVertex),
reinterpret_cast<void*>(offsetof(BatchVertex, color))); reinterpret_cast<void*>(offsetof(BatchVertex, color)));
glVertexAttribIPointer(2, 1, GL_UNSIGNED_INT, sizeof(BatchVertex), reinterpret_cast<void*>(offsetof(BatchVertex, u))); glVertexAttribIPointer(2, 1, GL_UNSIGNED_INT, sizeof(BatchVertex), reinterpret_cast<void*>(offsetof(BatchVertex, u)));

View file

@ -516,12 +516,12 @@ std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured, bool upsc
const char* output_block_suffix = upscaled_lines ? "VS" : ""; const char* output_block_suffix = upscaled_lines ? "VS" : "";
if (textured) if (textured)
{ {
DeclareVertexEntryPoint(ss, {"int3 a_pos", "float4 a_col0", "uint a_texcoord", "uint a_texpage"}, 1, 1, DeclareVertexEntryPoint(ss, {"float4 a_pos", "float4 a_col0", "uint a_texcoord", "uint a_texpage"}, 1, 1,
{{"nointerpolation", "uint4 v_texpage"}}, false, output_block_suffix); {{"nointerpolation", "uint4 v_texpage"}}, false, output_block_suffix);
} }
else else
{ {
DeclareVertexEntryPoint(ss, {"int3 a_pos", "float4 a_col0"}, 1, 0, {}, false, output_block_suffix); DeclareVertexEntryPoint(ss, {"float4 a_pos", "float4 a_col0"}, 1, 0, {}, false, output_block_suffix);
} }
ss << R"( ss << R"(
@ -532,9 +532,10 @@ std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured, bool upsc
float vertex_offset = (RESOLUTION_SCALE == 1u) ? 0.5 : 0.0; float vertex_offset = (RESOLUTION_SCALE == 1u) ? 0.5 : 0.0;
// 0..+1023 -> -1..1 // 0..+1023 -> -1..1
float pos_x = ((float(a_pos.x) + vertex_offset) / 512.0) - 1.0; float pos_x = ((a_pos.x + vertex_offset) / 512.0) - 1.0;
float pos_y = ((float(a_pos.y) + vertex_offset) / -256.0) + 1.0; float pos_y = ((a_pos.y + vertex_offset) / -256.0) + 1.0;
float pos_z = 1.0 - (float(a_pos.z) / 65535.0); float pos_z = a_pos.z;
float pos_w = a_pos.w;
#if API_OPENGL || API_OPENGL_ES #if API_OPENGL || API_OPENGL_ES
// OpenGL seems to be off by one pixel in the Y direction due to lower-left origin, but only on // OpenGL seems to be off by one pixel in the Y direction due to lower-left origin, but only on
@ -550,7 +551,7 @@ std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured, bool upsc
pos_y = -pos_y; pos_y = -pos_y;
#endif #endif
v_pos = float4(pos_x, pos_y, pos_z, 1.0); v_pos = float4(pos_x * pos_w, pos_y * pos_w, pos_z * pos_w, pos_w);
v_col0 = a_col0; v_col0 = a_col0;
#if TEXTURED #if TEXTURED

View file

@ -669,7 +669,7 @@ bool GPU_HW_Vulkan::CompilePipelines()
gpbuilder.SetRenderPass(m_vram_render_pass, 0); gpbuilder.SetRenderPass(m_vram_render_pass, 0);
gpbuilder.AddVertexBuffer(0, sizeof(BatchVertex), VK_VERTEX_INPUT_RATE_VERTEX); gpbuilder.AddVertexBuffer(0, sizeof(BatchVertex), VK_VERTEX_INPUT_RATE_VERTEX);
gpbuilder.AddVertexAttribute(0, 0, VK_FORMAT_R32G32B32_SINT, offsetof(BatchVertex, x)); gpbuilder.AddVertexAttribute(0, 0, VK_FORMAT_R32G32B32A32_SFLOAT, offsetof(BatchVertex, x));
gpbuilder.AddVertexAttribute(1, 0, VK_FORMAT_R8G8B8A8_UNORM, offsetof(BatchVertex, color)); gpbuilder.AddVertexAttribute(1, 0, VK_FORMAT_R8G8B8A8_UNORM, offsetof(BatchVertex, color));
if (textured) if (textured)
{ {

View file

@ -227,18 +227,18 @@ void GPU_SW::DispatchRenderCommand()
for (u32 i = 0; i < num_vertices; i++) for (u32 i = 0; i < num_vertices; i++)
{ {
SWVertex& vert = vertices[i]; SWVertex& vert = vertices[i];
const u32 color_rgb = (shaded && i > 0) ? (m_fifo.Pop() & UINT32_C(0x00FFFFFF)) : first_color; const u32 color_rgb = (shaded && i > 0) ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color;
vert.color_r = Truncate8(color_rgb); vert.color_r = Truncate8(color_rgb);
vert.color_g = Truncate8(color_rgb >> 8); vert.color_g = Truncate8(color_rgb >> 8);
vert.color_b = Truncate8(color_rgb >> 16); vert.color_b = Truncate8(color_rgb >> 16);
const VertexPosition vp{m_fifo.Pop()}; const VertexPosition vp{FifoPop()};
vert.x = vp.x; vert.x = vp.x;
vert.y = vp.y; vert.y = vp.y;
if (textured) if (textured)
{ {
std::tie(vert.texcoord_x, vert.texcoord_y) = UnpackTexcoord(Truncate16(m_fifo.Pop())); std::tie(vert.texcoord_x, vert.texcoord_y) = UnpackTexcoord(Truncate16(FifoPop()));
} }
else else
{ {
@ -262,8 +262,8 @@ void GPU_SW::DispatchRenderCommand()
case Primitive::Rectangle: case Primitive::Rectangle:
{ {
const auto [r, g, b] = UnpackColorRGB24(rc.color_for_first_vertex); const auto [r, g, b] = UnpackColorRGB24(rc.color_for_first_vertex);
const VertexPosition vp{m_fifo.Pop()}; const VertexPosition vp{FifoPop()};
const u32 texcoord_and_palette = rc.texture_enable ? m_fifo.Pop() : 0; const u32 texcoord_and_palette = rc.texture_enable ? FifoPop() : 0;
const auto [texcoord_x, texcoord_y] = UnpackTexcoord(Truncate16(texcoord_and_palette)); const auto [texcoord_x, texcoord_y] = UnpackTexcoord(Truncate16(texcoord_and_palette));
s32 width; s32 width;
@ -284,7 +284,7 @@ void GPU_SW::DispatchRenderCommand()
break; break;
default: default:
{ {
const u32 width_and_height = m_fifo.Pop(); const u32 width_and_height = FifoPop();
width = static_cast<s32>(width_and_height & VRAM_WIDTH_MASK); width = static_cast<s32>(width_and_height & VRAM_WIDTH_MASK);
height = static_cast<s32>((width_and_height >> 16) & VRAM_HEIGHT_MASK); height = static_cast<s32>((width_and_height >> 16) & VRAM_HEIGHT_MASK);
@ -321,7 +321,7 @@ void GPU_SW::DispatchRenderCommand()
// first vertex // first vertex
SWVertex* p0 = &vertices[0]; SWVertex* p0 = &vertices[0];
SWVertex* p1 = &vertices[1]; SWVertex* p1 = &vertices[1];
p0->SetPosition(VertexPosition{rc.polyline ? m_blit_buffer[buffer_pos++] : m_fifo.Pop()}); p0->SetPosition(VertexPosition{rc.polyline ? m_blit_buffer[buffer_pos++] : Truncate32(FifoPop())});
p0->SetColorRGB24(first_color); p0->SetColorRGB24(first_color);
// remaining vertices in line strip // remaining vertices in line strip
@ -335,8 +335,8 @@ void GPU_SW::DispatchRenderCommand()
} }
else else
{ {
p1->SetColorRGB24(shaded ? (m_fifo.Pop() & UINT32_C(0x00FFFFFF)) : first_color); p1->SetColorRGB24(shaded ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color);
p1->SetPosition(VertexPosition{m_fifo.Pop()}); p1->SetPosition(VertexPosition{Truncate32(FifoPop())});
} }
// down here because of the FIFO pops // down here because of the FIFO pops

View file

@ -3,6 +3,7 @@
#include "common/bitutils.h" #include "common/bitutils.h"
#include "common/state_wrapper.h" #include "common/state_wrapper.h"
#include "cpu_core.h" #include "cpu_core.h"
#include "pgxp.h"
#include "settings.h" #include "settings.h"
#include <algorithm> #include <algorithm>
#include <array> #include <array>
@ -621,6 +622,21 @@ static void RTPS(const s16 V[3], u8 shift, bool lm, bool last)
CheckMACOverflow<0>(Sy); CheckMACOverflow<0>(Sy);
PushSXY(s32(Sx >> 16), s32(Sy >> 16)); PushSXY(s32(Sx >> 16), s32(Sy >> 16));
if (g_settings.gpu_pgxp_enable)
{
// this can potentially use increased precision on Z
const float precise_z = std::max<float>((float)REGS.H / 2.f, (float)REGS.SZ3);
const float precise_h_div_sz = (float)REGS.H / precise_z;
const float fofx = ((float)REGS.OFX / (float)(1 << 16));
const float fofy = ((float)REGS.OFY / (float)(1 << 16));
float precise_x = fofx + ((float)REGS.IR1 * precise_h_div_sz) * ((g_settings.gpu_widescreen_hack) ? 0.75f : 1.00f);
float precise_y = fofy + ((float)REGS.IR2 * precise_h_div_sz);
precise_x = std::clamp<float>(precise_x, -0x400, 0x3ff);
precise_y = std::clamp<float>(precise_y, -0x400, 0x3ff);
PGXP::GTE_PushSXYZ2f(precise_x, precise_y, precise_z, REGS.dr32[14]);
}
if (last) if (last)
{ {
// MAC0=(((H*20000h/SZ3)+1)/2)*DQA+DQB, IR0=MAC0/1000h ;Depth cueing 0..+1000h // MAC0=(((H*20000h/SZ3)+1)/2)*DQA+DQB, IR0=MAC0/1000h ;Depth cueing 0..+1000h
@ -664,6 +680,19 @@ static void Execute_NCLIP(Instruction inst)
REGS.FLAG.UpdateError(); REGS.FLAG.UpdateError();
} }
static void Execute_NCLIP_PGXP(Instruction inst)
{
if (PGXP::GTE_NCLIP_valid(REGS.dr32[12], REGS.dr32[13], REGS.dr32[14]))
{
REGS.FLAG.Clear();
REGS.MAC0 = static_cast<s32>(PGXP::GTE_NCLIP());
}
else
{
Execute_NCLIP(inst);
}
}
static void Execute_AVSZ3(Instruction inst) static void Execute_AVSZ3(Instruction inst)
{ {
REGS.FLAG.Clear(); REGS.FLAG.Clear();
@ -994,8 +1023,13 @@ void ExecuteInstruction(u32 inst_bits)
break; break;
case 0x06: case 0x06:
Execute_NCLIP(inst); {
break; if (g_settings.gpu_pgxp_enable && g_settings.gpu_pgxp_culling)
Execute_NCLIP_PGXP(inst);
else
Execute_NCLIP(inst);
}
break;
case 0x0C: case 0x0C:
Execute_OP(inst); Execute_OP(inst);
@ -1092,7 +1126,12 @@ InstructionImpl GetInstructionImpl(u32 inst_bits)
return &Execute_RTPS; return &Execute_RTPS;
case 0x06: case 0x06:
return &Execute_NCLIP; {
if (g_settings.gpu_pgxp_enable && g_settings.gpu_pgxp_culling)
return &Execute_NCLIP_PGXP;
else
return &Execute_NCLIP;
}
case 0x0C: case 0x0C:
return &Execute_OP; return &Execute_OP;

View file

@ -8,12 +8,13 @@
#include "common/log.h" #include "common/log.h"
#include "common/string_util.h" #include "common/string_util.h"
#include "controller.h" #include "controller.h"
#include "cpu_core.h"
#include "cpu_code_cache.h" #include "cpu_code_cache.h"
#include "cpu_core.h"
#include "dma.h" #include "dma.h"
#include "gpu.h" #include "gpu.h"
#include "gte.h" #include "gte.h"
#include "host_display.h" #include "host_display.h"
#include "pgxp.h"
#include "save_state_version.h" #include "save_state_version.h"
#include "system.h" #include "system.h"
#include <cmath> #include <cmath>
@ -367,6 +368,10 @@ void HostInterface::SetDefaultSettings(SettingsInterface& si)
si.SetBoolValue("GPU", "DisableInterlacing", false); si.SetBoolValue("GPU", "DisableInterlacing", false);
si.SetBoolValue("GPU", "ForceNTSCTimings", false); si.SetBoolValue("GPU", "ForceNTSCTimings", false);
si.SetBoolValue("GPU", "WidescreenHack", false); si.SetBoolValue("GPU", "WidescreenHack", false);
si.SetBoolValue("GPU", "PGXPEnable", false);
si.SetBoolValue("GPU", "PGXPCulling", true);
si.SetBoolValue("GPU", "PGXPTextureCorrection", true);
si.SetBoolValue("GPU", "PGXPVertexCache", false);
si.SetStringValue("Display", "CropMode", Settings::GetDisplayCropModeName(Settings::DEFAULT_DISPLAY_CROP_MODE)); si.SetStringValue("Display", "CropMode", Settings::GetDisplayCropModeName(Settings::DEFAULT_DISPLAY_CROP_MODE));
si.SetStringValue("Display", "AspectRatio", si.SetStringValue("Display", "AspectRatio",
@ -485,6 +490,19 @@ void HostInterface::CheckForSettingsChanges(const Settings& old_settings)
g_gpu->UpdateSettings(); g_gpu->UpdateSettings();
} }
if (g_settings.gpu_pgxp_enable != old_settings.gpu_pgxp_enable ||
(g_settings.gpu_pgxp_enable && g_settings.gpu_pgxp_culling != old_settings.gpu_pgxp_culling))
{
if (g_settings.IsUsingCodeCache())
{
ReportFormattedMessage("PGXP %s, recompiling all blocks.", g_settings.gpu_pgxp_enable ? "enabled" : "disabled");
CPU::CodeCache::Flush();
}
if (g_settings.gpu_pgxp_enable)
PGXP::Initialize();
}
if (g_settings.cdrom_read_thread != old_settings.cdrom_read_thread) if (g_settings.cdrom_read_thread != old_settings.cdrom_read_thread)
g_cdrom.SetUseReadThread(g_settings.cdrom_read_thread); g_cdrom.SetUseReadThread(g_settings.cdrom_read_thread);
@ -625,8 +643,7 @@ void HostInterface::ToggleSoftwareRendering()
if (System::IsShutdown() || g_settings.gpu_renderer == GPURenderer::Software) if (System::IsShutdown() || g_settings.gpu_renderer == GPURenderer::Software)
return; return;
const GPURenderer new_renderer = const GPURenderer new_renderer = g_gpu->IsHardwareRenderer() ? GPURenderer::Software : g_settings.gpu_renderer;
g_gpu->IsHardwareRenderer() ? GPURenderer::Software : g_settings.gpu_renderer;
AddFormattedOSDMessage(2.0f, "Switching to %s renderer...", Settings::GetRendererDisplayName(new_renderer)); AddFormattedOSDMessage(2.0f, "Switching to %s renderer...", Settings::GetRendererDisplayName(new_renderer));
System::RecreateGPU(new_renderer); System::RecreateGPU(new_renderer);

800
src/core/pgxp.cpp Normal file
View file

@ -0,0 +1,800 @@
/***************************************************************************
* Original copyright notice from PGXP code from Beetle PSX. *
* Copyright (C) 2016 by iCatButler *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write to the *
* Free Software Foundation, Inc., *
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. *
***************************************************************************/
#include "pgxp.h"
#include "settings.h"
#include <cmath>
namespace PGXP {
// pgxp_types.h
typedef struct PGXP_value_Tag
{
float x;
float y;
float z;
union
{
unsigned int flags;
unsigned char compFlags[4];
unsigned short halfFlags[2];
};
unsigned int count;
unsigned int value;
unsigned short gFlags;
unsigned char lFlags;
unsigned char hFlags;
} PGXP_value;
// pgxp_value.h
typedef union
{
struct
{
u8 l, h, h2, h3;
} b;
struct
{
u16 l, h;
} w;
struct
{
s8 l, h, h2, h3;
} sb;
struct
{
s16 l, h;
} sw;
u32 d;
s32 sd;
} psx_value;
typedef enum
{
UNINITIALISED = 0,
INVALID_PSX_VALUE = 1,
INVALID_ADDRESS = 2,
INVALID_BITWISE_OP = 3,
DIVIDE_BY_ZERO = 4,
INVALID_8BIT_LOAD = 5,
INVALID_8BIT_STORE = 6
} PGXP_error_states;
#define NONE 0
#define ALL 0xFFFFFFFF
#define VALID 1
#define VALID_0 (VALID << 0)
#define VALID_1 (VALID << 8)
#define VALID_2 (VALID << 16)
#define VALID_3 (VALID << 24)
#define VALID_01 (VALID_0 | VALID_1)
#define VALID_012 (VALID_0 | VALID_1 | VALID_2)
#define VALID_ALL (VALID_0 | VALID_1 | VALID_2 | VALID_3)
#define INV_VALID_ALL (ALL ^ VALID_ALL)
static const PGXP_value PGXP_value_invalid_address = {0.f, 0.f, 0.f, {0}, 0, 0, INVALID_ADDRESS, 0, 0};
static const PGXP_value PGXP_value_zero = {0.f, 0.f, 0.f, {0}, 0, VALID_ALL, 0, 0, 0};
static void Validate(PGXP_value* pV, u32 psxV);
static void MaskValidate(PGXP_value* pV, u32 psxV, u32 mask, u32 validMask);
typedef union
{
struct
{
s16 x;
s16 y;
};
struct
{
u16 ux;
u16 uy;
};
u32 word;
} low_value;
// pgxp_mem.h
static u32 PGXP_ConvertAddress(u32 addr);
static PGXP_value* GetPtr(u32 addr);
static PGXP_value* ReadMem(u32 addr);
static void ValidateAndCopyMem(PGXP_value* dest, u32 addr, u32 value);
static void ValidateAndCopyMem16(PGXP_value* dest, u32 addr, u32 value, int sign);
static void WriteMem(PGXP_value* value, u32 addr);
static void WriteMem16(PGXP_value* src, u32 addr);
// pgxp_gpu.h
void PGXP_CacheVertex(short sx, short sy, const PGXP_value* _pVertex);
// pgxp_gte.h
static void PGXP_InitGTE();
// pgxp_cpu.h
static void PGXP_InitCPU();
static PGXP_value CPU_reg_mem[34];
#define CPU_Hi CPU_reg[33]
#define CPU_Lo CPU_reg[34]
static PGXP_value CP0_reg_mem[32];
static PGXP_value* CPU_reg = CPU_reg_mem;
static PGXP_value* CP0_reg = CP0_reg_mem;
// pgxp_value.c
void Validate(PGXP_value* pV, u32 psxV)
{
// assume pV is not NULL
pV->flags &= (pV->value == psxV) ? ALL : INV_VALID_ALL;
}
void MaskValidate(PGXP_value* pV, u32 psxV, u32 mask, u32 validMask)
{
// assume pV is not NULL
pV->flags &= ((pV->value & mask) == (psxV & mask)) ? ALL : (ALL ^ (validMask));
}
// pgxp_mem.c
static void PGXP_InitMem();
static PGXP_value Mem[3 * 2048 * 1024 / 4]; // mirror 2MB in 32-bit words * 3
static const u32 UserMemOffset = 0;
static const u32 ScratchOffset = 2048 * 1024 / 4;
static const u32 RegisterOffset = 2 * 2048 * 1024 / 4;
static const u32 InvalidAddress = 3 * 2048 * 1024 / 4;
void PGXP_InitMem()
{
memset(Mem, 0, sizeof(Mem));
}
u32 PGXP_ConvertAddress(u32 addr)
{
u32 paddr = addr;
switch (paddr >> 24)
{
case 0x80:
case 0xa0:
case 0x00:
// RAM further mirrored over 8MB
paddr = ((paddr & 0x7FFFFF) % 0x200000) >> 2;
paddr = UserMemOffset + paddr;
break;
default:
if ((paddr >> 20) == 0x1f8)
{
if (paddr >= 0x1f801000)
{
// paddr = ((paddr & 0xFFFF) - 0x1000);
// paddr = (paddr % 0x2000) >> 2;
paddr = ((paddr & 0xFFFF) - 0x1000) >> 2;
paddr = RegisterOffset + paddr;
break;
}
else
{
// paddr = ((paddr & 0xFFF) % 0x400) >> 2;
paddr = (paddr & 0x3FF) >> 2;
paddr = ScratchOffset + paddr;
break;
}
}
paddr = InvalidAddress;
break;
}
#ifdef GTE_LOG
// GTE_LOG("PGXP_Read %x [%x] |", addr, paddr);
#endif
return paddr;
}
PGXP_value* GetPtr(u32 addr)
{
addr = PGXP_ConvertAddress(addr);
if (addr != InvalidAddress)
return &Mem[addr];
return NULL;
}
PGXP_value* ReadMem(u32 addr)
{
return GetPtr(addr);
}
void ValidateAndCopyMem(PGXP_value* dest, u32 addr, u32 value)
{
PGXP_value* pMem = GetPtr(addr);
if (pMem != NULL)
{
Validate(pMem, value);
*dest = *pMem;
return;
}
*dest = PGXP_value_invalid_address;
}
void ValidateAndCopyMem16(PGXP_value* dest, u32 addr, u32 value, int sign)
{
u32 validMask = 0;
psx_value val, mask;
PGXP_value* pMem = GetPtr(addr);
if (pMem != NULL)
{
mask.d = val.d = 0;
// determine if high or low word
if ((addr % 4) == 2)
{
val.w.h = static_cast<u16>(value);
mask.w.h = 0xFFFF;
validMask = VALID_1;
}
else
{
val.w.l = static_cast<u16>(value);
mask.w.l = 0xFFFF;
validMask = VALID_0;
}
// validate and copy whole value
MaskValidate(pMem, val.d, mask.d, validMask);
*dest = *pMem;
// if high word then shift
if ((addr % 4) == 2)
{
dest->x = dest->y;
dest->lFlags = dest->hFlags;
dest->compFlags[0] = dest->compFlags[1];
}
// truncate value
dest->y = (dest->x < 0) ? -1.f * sign : 0.f; // 0.f;
dest->hFlags = 0;
dest->value = value;
dest->compFlags[1] = VALID; // iCB: High word is valid, just 0
return;
}
*dest = PGXP_value_invalid_address;
}
void WriteMem(PGXP_value* value, u32 addr)
{
PGXP_value* pMem = GetPtr(addr);
if (pMem)
*pMem = *value;
}
void WriteMem16(PGXP_value* src, u32 addr)
{
PGXP_value* dest = GetPtr(addr);
psx_value* pVal = NULL;
if (dest)
{
pVal = (psx_value*)&dest->value;
// determine if high or low word
if ((addr % 4) == 2)
{
dest->y = src->x;
dest->hFlags = src->lFlags;
dest->compFlags[1] = src->compFlags[0];
pVal->w.h = (u16)src->value;
}
else
{
dest->x = src->x;
dest->lFlags = src->lFlags;
dest->compFlags[0] = src->compFlags[0];
pVal->w.l = (u16)src->value;
}
// overwrite z/w if valid
if (src->compFlags[2] == VALID)
{
dest->z = src->z;
dest->compFlags[2] = src->compFlags[2];
}
// dest->valid = dest->valid && src->valid;
dest->gFlags |= src->gFlags; // inherit flags from both values (?)
}
}
// pgxp_main.c
u32 static gMode = 0;
void Initialize()
{
PGXP_InitMem();
PGXP_InitCPU();
PGXP_InitGTE();
}
void PGXP_SetModes(u32 modes)
{
gMode = modes;
}
u32 PGXP_GetModes()
{
return gMode;
}
void PGXP_EnableModes(u32 modes)
{
gMode |= modes;
}
void PGXP_DisableModes(u32 modes)
{
gMode = gMode & ~modes;
}
// pgxp_gte.c
// GTE registers
static PGXP_value GTE_data_reg_mem[32];
static PGXP_value GTE_ctrl_reg_mem[32];
static PGXP_value* GTE_data_reg = GTE_data_reg_mem;
static PGXP_value* GTE_ctrl_reg = GTE_ctrl_reg_mem;
void PGXP_InitGTE()
{
memset(GTE_data_reg_mem, 0, sizeof(GTE_data_reg_mem));
memset(GTE_ctrl_reg_mem, 0, sizeof(GTE_ctrl_reg_mem));
}
// Instruction register decoding
#define op(_instr) (_instr >> 26) // The op part of the instruction register
#define func(_instr) ((_instr)&0x3F) // The funct part of the instruction register
#define sa(_instr) ((_instr >> 6) & 0x1F) // The sa part of the instruction register
#define rd(_instr) ((_instr >> 11) & 0x1F) // The rd part of the instruction register
#define rt(_instr) ((_instr >> 16) & 0x1F) // The rt part of the instruction register
#define rs(_instr) ((_instr >> 21) & 0x1F) // The rs part of the instruction register
#define imm(_instr) (_instr & 0xFFFF) // The immediate part of the instruction register
#define SX0 (GTE_data_reg[12].x)
#define SY0 (GTE_data_reg[12].y)
#define SX1 (GTE_data_reg[13].x)
#define SY1 (GTE_data_reg[13].y)
#define SX2 (GTE_data_reg[14].x)
#define SY2 (GTE_data_reg[14].y)
#define SXY0 (GTE_data_reg[12])
#define SXY1 (GTE_data_reg[13])
#define SXY2 (GTE_data_reg[14])
#define SXYP (GTE_data_reg[15])
void GTE_PushSXYZ2f(float _x, float _y, float _z, unsigned int _v)
{
static unsigned int uCount = 0;
low_value temp;
// push values down FIFO
SXY0 = SXY1;
SXY1 = SXY2;
SXY2.x = _x;
SXY2.y = _y;
SXY2.z = _z;
SXY2.value = _v;
SXY2.flags = VALID_ALL;
SXY2.count = uCount++;
// cache value in GPU plugin
temp.word = _v;
if (g_settings.gpu_pgxp_vertex_cache)
PGXP_CacheVertex(temp.x, temp.y, &SXY2);
else
PGXP_CacheVertex(0, 0, NULL);
#ifdef GTE_LOG
GTE_LOG("PGXP_PUSH (%f, %f) %u %u|", SXY2.x, SXY2.y, SXY2.flags, SXY2.count);
#endif
}
void GTE_PushSXYZ2s(s64 _x, s64 _y, s64 _z, u32 v)
{
float fx = (float)(_x) / (float)(1 << 16);
float fy = (float)(_y) / (float)(1 << 16);
float fz = (float)(_z);
// if(Config.PGXP_GTE)
GTE_PushSXYZ2f(fx, fy, fz, v);
}
#define VX(n) (psxRegs.CP2D.p[n << 1].sw.l)
#define VY(n) (psxRegs.CP2D.p[n << 1].sw.h)
#define VZ(n) (psxRegs.CP2D.p[(n << 1) + 1].sw.l)
int GTE_NCLIP_valid(u32 sxy0, u32 sxy1, u32 sxy2)
{
Validate(&SXY0, sxy0);
Validate(&SXY1, sxy1);
Validate(&SXY2, sxy2);
if (((SXY0.flags & SXY1.flags & SXY2.flags & VALID_01) == VALID_01)) // && Config.PGXP_GTE && (Config.PGXP_Mode > 0))
return 1;
return 0;
}
float GTE_NCLIP()
{
float nclip = ((SX0 * SY1) + (SX1 * SY2) + (SX2 * SY0) - (SX0 * SY2) - (SX1 * SY0) - (SX2 * SY1));
// ensure fractional values are not incorrectly rounded to 0
float nclipAbs = std::abs(nclip);
if ((0.1f < nclipAbs) && (nclipAbs < 1.f))
nclip += (nclip < 0.f ? -1 : 1);
// float AX = SX1 - SX0;
// float AY = SY1 - SY0;
// float BX = SX2 - SX0;
// float BY = SY2 - SY0;
//// normalise A and B
// float mA = sqrt((AX*AX) + (AY*AY));
// float mB = sqrt((BX*BX) + (BY*BY));
//// calculate AxB to get Z component of C
// float CZ = ((AX * BY) - (AY * BX)) * (1 << 12);
return nclip;
}
static void PGXP_MTC2_int(PGXP_value value, u32 reg)
{
switch (reg)
{
case 15:
// push FIFO
SXY0 = SXY1;
SXY1 = SXY2;
SXY2 = value;
SXYP = SXY2;
break;
case 31:
return;
}
GTE_data_reg[reg] = value;
}
////////////////////////////////////
// Data transfer tracking
////////////////////////////////////
void CPU_MFC2(u32 instr, u32 rtVal, u32 rdVal)
{
// CPU[Rt] = GTE_D[Rd]
Validate(&GTE_data_reg[rd(instr)], rdVal);
CPU_reg[rt(instr)] = GTE_data_reg[rd(instr)];
CPU_reg[rt(instr)].value = rtVal;
}
void CPU_MTC2(u32 instr, u32 rdVal, u32 rtVal)
{
// GTE_D[Rd] = CPU[Rt]
Validate(&CPU_reg[rt(instr)], rtVal);
PGXP_MTC2_int(CPU_reg[rt(instr)], rd(instr));
GTE_data_reg[rd(instr)].value = rdVal;
}
void CPU_CFC2(u32 instr, u32 rtVal, u32 rdVal)
{
// CPU[Rt] = GTE_C[Rd]
Validate(&GTE_ctrl_reg[rd(instr)], rdVal);
CPU_reg[rt(instr)] = GTE_ctrl_reg[rd(instr)];
CPU_reg[rt(instr)].value = rtVal;
}
void CPU_CTC2(u32 instr, u32 rdVal, u32 rtVal)
{
// GTE_C[Rd] = CPU[Rt]
Validate(&CPU_reg[rt(instr)], rtVal);
GTE_ctrl_reg[rd(instr)] = CPU_reg[rt(instr)];
GTE_ctrl_reg[rd(instr)].value = rdVal;
}
////////////////////////////////////
// Memory Access
////////////////////////////////////
void CPU_LWC2(u32 instr, u32 rtVal, u32 addr)
{
// GTE_D[Rt] = Mem[addr]
PGXP_value val;
ValidateAndCopyMem(&val, addr, rtVal);
PGXP_MTC2_int(val, rt(instr));
}
void CPU_SWC2(u32 instr, u32 rtVal, u32 addr)
{
// Mem[addr] = GTE_D[Rt]
Validate(&GTE_data_reg[rt(instr)], rtVal);
WriteMem(&GTE_data_reg[rt(instr)], addr);
}
// pgxp_gpu.c
/////////////////////////////////
//// Blade_Arma's Vertex Cache (CatBlade?)
/////////////////////////////////
const unsigned int mode_init = 0;
const unsigned int mode_write = 1;
const unsigned int mode_read = 2;
const unsigned int mode_fail = 3;
PGXP_value vertexCache[0x800 * 2][0x800 * 2];
unsigned int baseID = 0;
unsigned int lastID = 0;
unsigned int cacheMode = 0;
unsigned int IsSessionID(unsigned int vertID)
{
// No wrapping
if (lastID >= baseID)
return (vertID >= baseID);
// If vertID is >= baseID it is pre-wrap and in session
if (vertID >= baseID)
return 1;
// vertID is < baseID, If it is <= lastID it is post-wrap and in session
if (vertID <= lastID)
return 1;
return 0;
}
void PGXP_CacheVertex(short sx, short sy, const PGXP_value* _pVertex)
{
const PGXP_value* pNewVertex = (const PGXP_value*)_pVertex;
PGXP_value* pOldVertex = NULL;
if (!pNewVertex)
{
cacheMode = mode_fail;
return;
}
// if (bGteAccuracy)
{
if (cacheMode != mode_write)
{
// Initialise cache on first use
if (cacheMode == mode_init)
memset(vertexCache, 0x00, sizeof(vertexCache));
// First vertex of write session (frame?)
cacheMode = mode_write;
baseID = pNewVertex->count;
}
lastID = pNewVertex->count;
if (sx >= -0x800 && sx <= 0x7ff && sy >= -0x800 && sy <= 0x7ff)
{
pOldVertex = &vertexCache[sy + 0x800][sx + 0x800];
// To avoid ambiguity there can only be one valid entry per-session
if (0) //(IsSessionID(pOldVertex->count) && (pOldVertex->value == pNewVertex->value))
{
// check to ensure this isn't identical
if ((fabsf(pOldVertex->x - pNewVertex->x) > 0.1f) || (fabsf(pOldVertex->y - pNewVertex->y) > 0.1f) ||
(fabsf(pOldVertex->z - pNewVertex->z) > 0.1f))
{
*pOldVertex = *pNewVertex;
pOldVertex->gFlags = 5;
return;
}
}
// Write vertex into cache
*pOldVertex = *pNewVertex;
pOldVertex->gFlags = 1;
}
}
}
PGXP_value* PGXP_GetCachedVertex(short sx, short sy)
{
// if (bGteAccuracy)
{
if (cacheMode != mode_read)
{
if (cacheMode == mode_fail)
return NULL;
// Initialise cache on first use
if (cacheMode == mode_init)
memset(vertexCache, 0x00, sizeof(vertexCache));
// First vertex of read session (frame?)
cacheMode = mode_read;
}
if (sx >= -0x800 && sx <= 0x7ff && sy >= -0x800 && sy <= 0x7ff)
{
// Return pointer to cache entry
return &vertexCache[sy + 0x800][sx + 0x800];
}
}
return NULL;
}
static float TruncateVertexPosition(float p)
{
const s32 int_part = static_cast<s32>(p);
const float int_part_f = static_cast<float>(int_part);
return static_cast<float>(static_cast<s16>(int_part << 5) >> 5) + (p - int_part_f);
}
bool GetPreciseVertex(u32 addr, u32 value, int x, int y, int xOffs, int yOffs, float* out_x, float* out_y, float* out_w)
{
const PGXP_value* vert = ReadMem(addr);
if (vert && ((vert->flags & VALID_01) == VALID_01) && (vert->value == value))
{
// There is a value here with valid X and Y coordinates
*out_x = TruncateVertexPosition(vert->x) + static_cast<float>(xOffs);
*out_y = TruncateVertexPosition(vert->y) + static_cast<float>(yOffs);
*out_w = vert->z / 32768.0f;
// This value does not have a valid W coordinate
return ((vert->flags & VALID_2) == VALID_2);
}
else
{
const short psx_x = (short)(value & 0xFFFFu);
const short psx_y = (short)(value >> 16);
// Look in cache for valid vertex
vert = PGXP_GetCachedVertex(psx_x, psx_y);
if ((vert) && /*(IsSessionID(vert->count)) &&*/ (vert->gFlags == 1))
{
// a value is found, it is from the current session and is unambiguous (there was only one value recorded at that
// position)
*out_x = TruncateVertexPosition(vert->x) + static_cast<float>(xOffs);
*out_y = TruncateVertexPosition(vert->y) + static_cast<float>(yOffs);
*out_w = vert->z / 32768.0f;
return false; // iCB: Getting the wrong w component causes too great an error when using perspective correction
// so disable it
}
else
{
// no valid value can be found anywhere, use the native PSX data
*out_x = static_cast<float>(x);
*out_y = static_cast<float>(y);
*out_w = 1.0f;
return false;
}
}
}
// pgxp_cpu.c
// Instruction register decoding
#define op(_instr) (_instr >> 26) // The op part of the instruction register
#define func(_instr) ((_instr)&0x3F) // The funct part of the instruction register
#define sa(_instr) ((_instr >> 6) & 0x1F) // The sa part of the instruction register
#define rd(_instr) ((_instr >> 11) & 0x1F) // The rd part of the instruction register
#define rt(_instr) ((_instr >> 16) & 0x1F) // The rt part of the instruction register
#define rs(_instr) ((_instr >> 21) & 0x1F) // The rs part of the instruction register
#define imm(_instr) (_instr & 0xFFFF) // The immediate part of the instruction register
void PGXP_InitCPU()
{
memset(CPU_reg_mem, 0, sizeof(CPU_reg_mem));
memset(CP0_reg_mem, 0, sizeof(CP0_reg_mem));
}
// invalidate register (invalid 8 bit read)
static void InvalidLoad(u32 addr, u32 code, u32 value)
{
u32 reg = ((code >> 16) & 0x1F); // The rt part of the instruction register
PGXP_value* pD = NULL;
PGXP_value p;
p.x = p.y = -1337; // default values
// p.valid = 0;
// p.count = value;
pD = ReadMem(addr);
if (pD)
{
p.count = addr;
p = *pD;
}
else
{
p.count = value;
}
p.flags = 0;
// invalidate register
CPU_reg[reg] = p;
}
// invalidate memory address (invalid 8 bit write)
static void InvalidStore(u32 addr, u32 code, u32 value)
{
u32 reg = ((code >> 16) & 0x1F); // The rt part of the instruction register
PGXP_value* pD = NULL;
PGXP_value p;
pD = ReadMem(addr);
p.x = p.y = -2337;
if (pD)
p = *pD;
p.flags = 0;
p.count = (reg * 1000) + value;
// invalidate memory
WriteMem(&p, addr);
}
void CPU_LW(u32 instr, u32 rtVal, u32 addr)
{
// Rt = Mem[Rs + Im]
ValidateAndCopyMem(&CPU_reg[rt(instr)], addr, rtVal);
}
void CPU_LBx(u32 instr, u32 rtVal, u32 addr)
{
InvalidLoad(addr, instr, 116);
}
void CPU_LHx(u32 instr, u32 rtVal, u32 addr)
{
// Rt = Mem[Rs + Im] (sign/zero extended)
ValidateAndCopyMem16(&CPU_reg[rt(instr)], addr, rtVal, 1);
}
void CPU_SB(u32 instr, u8 rtVal, u32 addr)
{
InvalidStore(addr, instr, 208);
}
void CPU_SH(u32 instr, u16 rtVal, u32 addr)
{
// validate and copy half value
MaskValidate(&CPU_reg[rt(instr)], rtVal, 0xFFFF, VALID_0);
WriteMem16(&CPU_reg[rt(instr)], addr);
}
void CPU_SW(u32 instr, u32 rtVal, u32 addr)
{
// Mem[Rs + Im] = Rt
Validate(&CPU_reg[rt(instr)], rtVal);
WriteMem(&CPU_reg[rt(instr)], addr);
}
} // namespace PGXP

54
src/core/pgxp.h Normal file
View file

@ -0,0 +1,54 @@
/***************************************************************************
* Original copyright notice from PGXP code from Beetle PSX. *
* Copyright (C) 2016 by iCatButler *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write to the *
* Free Software Foundation, Inc., *
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. *
***************************************************************************/
#pragma once
#include "types.h"
namespace PGXP {
void Initialize();
// -- GTE functions
// Transforms
void GTE_PushSXYZ2f(float _x, float _y, float _z, unsigned int _v);
void GTE_PushSXYZ2s(s64 _x, s64 _y, s64 _z, u32 v);
int GTE_NCLIP_valid(u32 sxy0, u32 sxy1, u32 sxy2);
float GTE_NCLIP();
// Data transfer tracking
void CPU_MFC2(u32 instr, u32 rtVal, u32 rdVal); // copy GTE data reg to GPR reg (MFC2)
void CPU_MTC2(u32 instr, u32 rdVal, u32 rtVal); // copy GPR reg to GTE data reg (MTC2)
void CPU_CFC2(u32 instr, u32 rtVal, u32 rdVal); // copy GTE ctrl reg to GPR reg (CFC2)
void CPU_CTC2(u32 instr, u32 rdVal, u32 rtVal); // copy GPR reg to GTE ctrl reg (CTC2)
// Memory Access
void CPU_LWC2(u32 instr, u32 rtVal, u32 addr); // copy memory to GTE reg
void CPU_SWC2(u32 instr, u32 rtVal, u32 addr); // copy GTE reg to memory
bool GetPreciseVertex(u32 addr, u32 value, int x, int y, int xOffs, int yOffs, float* out_x, float* out_y, float* out_w);
// -- CPU functions
void CPU_LW(u32 instr, u32 rtVal, u32 addr);
void CPU_LHx(u32 instr, u32 rtVal, u32 addr);
void CPU_LBx(u32 instr, u32 rtVal, u32 addr);
void CPU_SB(u32 instr, u8 rtVal, u32 addr);
void CPU_SH(u32 instr, u16 rtVal, u32 addr);
void CPU_SW(u32 instr, u32 rtVal, u32 addr);
} // namespace PGXP

View file

@ -101,6 +101,10 @@ void Settings::Load(SettingsInterface& si)
gpu_disable_interlacing = si.GetBoolValue("GPU", "DisableInterlacing", false); gpu_disable_interlacing = si.GetBoolValue("GPU", "DisableInterlacing", false);
gpu_force_ntsc_timings = si.GetBoolValue("GPU", "ForceNTSCTimings", false); gpu_force_ntsc_timings = si.GetBoolValue("GPU", "ForceNTSCTimings", false);
gpu_widescreen_hack = si.GetBoolValue("GPU", "WidescreenHack", false); gpu_widescreen_hack = si.GetBoolValue("GPU", "WidescreenHack", false);
gpu_pgxp_enable = si.GetBoolValue("GPU", "PGXPEnable", false);
gpu_pgxp_culling = si.GetBoolValue("GPU", "PGXPCulling", true);
gpu_pgxp_texture_correction = si.GetBoolValue("GPU", "PGXPTextureCorrection", true);
gpu_pgxp_vertex_cache = si.GetBoolValue("GPU", "PGXPVertexCache", false);
display_crop_mode = display_crop_mode =
ParseDisplayCropMode( ParseDisplayCropMode(
@ -203,6 +207,10 @@ void Settings::Save(SettingsInterface& si) const
si.SetBoolValue("GPU", "DisableInterlacing", gpu_disable_interlacing); si.SetBoolValue("GPU", "DisableInterlacing", gpu_disable_interlacing);
si.SetBoolValue("GPU", "ForceNTSCTimings", gpu_force_ntsc_timings); si.SetBoolValue("GPU", "ForceNTSCTimings", gpu_force_ntsc_timings);
si.SetBoolValue("GPU", "WidescreenHack", gpu_widescreen_hack); si.SetBoolValue("GPU", "WidescreenHack", gpu_widescreen_hack);
si.SetBoolValue("GPU", "PGXPEnable", gpu_pgxp_enable);
si.SetBoolValue("GPU", "PGXPCulling", gpu_pgxp_culling);
si.SetBoolValue("GPU", "PGXPTextureCorrection", gpu_pgxp_texture_correction);
si.SetBoolValue("GPU", "PGXPVertexCache", gpu_pgxp_vertex_cache);
si.SetStringValue("Display", "CropMode", GetDisplayCropModeName(display_crop_mode)); si.SetStringValue("Display", "CropMode", GetDisplayCropModeName(display_crop_mode));
si.SetStringValue("Display", "AspectRatio", GetDisplayAspectRatioName(display_aspect_ratio)); si.SetStringValue("Display", "AspectRatio", GetDisplayAspectRatioName(display_aspect_ratio));

View file

@ -88,6 +88,10 @@ struct Settings
bool gpu_disable_interlacing = false; bool gpu_disable_interlacing = false;
bool gpu_force_ntsc_timings = false; bool gpu_force_ntsc_timings = false;
bool gpu_widescreen_hack = false; bool gpu_widescreen_hack = false;
bool gpu_pgxp_enable = false;
bool gpu_pgxp_culling = true;
bool gpu_pgxp_texture_correction = true;
bool gpu_pgxp_vertex_cache = false;
DisplayCropMode display_crop_mode = DisplayCropMode::None; DisplayCropMode display_crop_mode = DisplayCropMode::None;
DisplayAspectRatio display_aspect_ratio = DisplayAspectRatio::R4_3; DisplayAspectRatio display_aspect_ratio = DisplayAspectRatio::R4_3;
bool display_linear_filtering = true; bool display_linear_filtering = true;
@ -146,6 +150,7 @@ struct Settings
bool log_to_window = false; bool log_to_window = false;
bool log_to_file = false; bool log_to_file = false;
ALWAYS_INLINE bool IsUsingCodeCache() const { return (cpu_execution_mode != CPUExecutionMode::Interpreter); }
ALWAYS_INLINE bool IsUsingRecompiler() const { return (cpu_execution_mode == CPUExecutionMode::Recompiler); } ALWAYS_INLINE bool IsUsingRecompiler() const { return (cpu_execution_mode == CPUExecutionMode::Recompiler); }
ALWAYS_INLINE bool IsUsingSoftwareRenderer() const { return (gpu_renderer == GPURenderer::Software); } ALWAYS_INLINE bool IsUsingSoftwareRenderer() const { return (gpu_renderer == GPURenderer::Software); }

View file

@ -352,7 +352,7 @@ void LibretroHostInterface::OnSystemDestroyed()
m_using_hardware_renderer = false; m_using_hardware_renderer = false;
} }
static std::array<retro_core_option_definition, 23> s_option_definitions = {{ static std::array<retro_core_option_definition, 27> s_option_definitions = {{
{"Console.Region", {"Console.Region",
"Console Region", "Console Region",
"Determines which region/hardware to emulate. Auto-Detect will use the region of the disc inserted.", "Determines which region/hardware to emulate. Auto-Detect will use the region of the disc inserted.",
@ -447,6 +447,29 @@ static std::array<retro_core_option_definition, 23> s_option_definitions = {{
"backgrounds, this enhancement will not work as expected.", "backgrounds, this enhancement will not work as expected.",
{{"true", "Enabled"}, {"false", "Disabled"}}, {{"true", "Enabled"}, {"false", "Disabled"}},
"false"}, "false"},
{"GPU.PGXPEnable",
"PGXP Geometry Correction",
"Reduces \"wobbly\" polygons by attempting to preserve the fractional component through memory transfers. Only "
"works with the hardware renderers, and may not be compatible with all games.",
{{"true", "Enabled"}, {"false", "Disabled"}},
"false"},
{"GPU.PGXPCulling",
"PGXP Culling Correction",
"Increases the precision of polygon culling, reducing the number of holes in geometry. Requires geometry correction "
"enabled.",
{{"true", "Enabled"}, {"false", "Disabled"}},
"true"},
{"GPU.PGXPTextureCorrection",
"PGXP Texture Correction",
"Uses perspective-correct interpolation for texture coordinates and colors, straightening out warped textures. "
"Requires geometry correction enabled.",
{{"true", "Enabled"}, {"false", "Disabled"}},
"true"},
{"GPU.PGXPVertexCache",
"PGXP Vertex Cache",
"Uses screen coordinates as a fallback when tracking vertices through memory fails. May improve PGXP compatibility.",
{{"true", "Enabled"}, {"false", "Disabled"}},
"false"},
{"Display.CropMode", {"Display.CropMode",
"Crop Mode", "Crop Mode",
"Changes how much of the image is cropped. Some games display garbage in the overscan area which is typically " "Changes how much of the image is cropped. Some games display garbage in the overscan area which is typically "

View file

@ -40,11 +40,20 @@ GPUSettingsWidget::GPUSettingsWidget(QtHostInterface* host_interface, QWidget* p
"TextureFiltering"); "TextureFiltering");
SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.widescreenHack, "GPU", "WidescreenHack"); SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.widescreenHack, "GPU", "WidescreenHack");
SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.pgxpEnable, "GPU", "PGXPEnable", false);
SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.pgxpCulling, "GPU", "PGXPCulling", true);
SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.pgxpTextureCorrection, "GPU",
"PGXPTextureCorrection", true);
SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.pgxpVertexCache, "GPU", "PGXPVertexCache", false);
connect(m_ui.resolutionScale, QOverload<int>::of(&QComboBox::currentIndexChanged), this, connect(m_ui.resolutionScale, QOverload<int>::of(&QComboBox::currentIndexChanged), this,
&GPUSettingsWidget::updateScaledDitheringEnabled); &GPUSettingsWidget::updateScaledDitheringEnabled);
connect(m_ui.trueColor, &QCheckBox::stateChanged, this, &GPUSettingsWidget::updateScaledDitheringEnabled); connect(m_ui.trueColor, &QCheckBox::stateChanged, this, &GPUSettingsWidget::updateScaledDitheringEnabled);
updateScaledDitheringEnabled(); updateScaledDitheringEnabled();
connect(m_ui.pgxpEnable, &QCheckBox::stateChanged, this, &GPUSettingsWidget::updatePGXPSettingsEnabled);
updatePGXPSettingsEnabled();
connect(m_ui.renderer, QOverload<int>::of(&QComboBox::currentIndexChanged), this, connect(m_ui.renderer, QOverload<int>::of(&QComboBox::currentIndexChanged), this,
&GPUSettingsWidget::populateGPUAdapters); &GPUSettingsWidget::populateGPUAdapters);
connect(m_ui.adapter, QOverload<int>::of(&QComboBox::currentIndexChanged), this, connect(m_ui.adapter, QOverload<int>::of(&QComboBox::currentIndexChanged), this,
@ -126,6 +135,19 @@ GPUSettingsWidget::GPUSettingsWidget(QtHostInterface* host_interface, QWidget* p
tr("Scales vertex positions in screen-space to a widescreen aspect ratio, essentially " tr("Scales vertex positions in screen-space to a widescreen aspect ratio, essentially "
"increasing the field of view from 4:3 to 16:9 in 3D games. For 2D games, or games which " "increasing the field of view from 4:3 to 16:9 in 3D games. For 2D games, or games which "
"use pre-rendered backgrounds, this enhancement will not work as expected.")); "use pre-rendered backgrounds, this enhancement will not work as expected."));
dialog->registerWidgetHelp(
m_ui.pgxpEnable, tr("Geometry Correction"), tr("Unchecked"),
tr("Reduces \"wobbly\" polygons by attempting to preserve the fractional component through memory transfers. Only "
"works with the hardware renderers, and may not be compatible with all games."));
dialog->registerWidgetHelp(m_ui.pgxpCulling, tr("Culling Correction"), tr("Checked"),
tr("Increases the precision of polygon culling, reducing the number of holes in geometry. "
"Requires geometry correction enabled."));
dialog->registerWidgetHelp(m_ui.pgxpTextureCorrection, tr("Texture Correction"), tr("Checked"),
tr("Uses perspective-correct interpolation for texture coordinates and colors, "
"straightening out warped textures. Requires geometry correction enabled."));
dialog->registerWidgetHelp(m_ui.pgxpVertexCache, tr("Vertex Cache"), tr("Unchecked"),
tr("Uses screen coordinates as a fallback when tracking vertices through memory fails. "
"May improve PGXP compatibility."));
} }
GPUSettingsWidget::~GPUSettingsWidget() = default; GPUSettingsWidget::~GPUSettingsWidget() = default;
@ -232,3 +254,11 @@ void GPUSettingsWidget::onGPUAdapterIndexChanged()
m_host_interface->SetStringSettingValue("GPU", "Adapter", m_ui.adapter->currentText().toUtf8().constData()); m_host_interface->SetStringSettingValue("GPU", "Adapter", m_ui.adapter->currentText().toUtf8().constData());
} }
void GPUSettingsWidget::updatePGXPSettingsEnabled()
{
const bool enabled = m_ui.pgxpEnable->isChecked();
m_ui.pgxpCulling->setEnabled(enabled);
m_ui.pgxpTextureCorrection->setEnabled(enabled);
m_ui.pgxpVertexCache->setEnabled(enabled);
}

View file

@ -19,6 +19,7 @@ private Q_SLOTS:
void updateScaledDitheringEnabled(); void updateScaledDitheringEnabled();
void populateGPUAdapters(); void populateGPUAdapters();
void onGPUAdapterIndexChanged(); void onGPUAdapterIndexChanged();
void updatePGXPSettingsEnabled();
private: private:
void setupAdditionalUi(); void setupAdditionalUi();

View file

@ -7,13 +7,13 @@
<x>0</x> <x>0</x>
<y>0</y> <y>0</y>
<width>448</width> <width>448</width>
<height>307</height> <height>720</height>
</rect> </rect>
</property> </property>
<property name="windowTitle"> <property name="windowTitle">
<string>Form</string> <string>Form</string>
</property> </property>
<layout class="QVBoxLayout" name="verticalLayout"> <layout class="QVBoxLayout" name="verticalLayout_2">
<property name="leftMargin"> <property name="leftMargin">
<number>0</number> <number>0</number>
</property> </property>
@ -27,165 +27,221 @@
<number>0</number> <number>0</number>
</property> </property>
<item> <item>
<widget class="QGroupBox" name="groupBox"> <widget class="QScrollArea" name="scrollArea">
<property name="title"> <property name="widgetResizable">
<string>Basic</string> <bool>true</bool>
</property> </property>
<layout class="QFormLayout" name="formLayout_3"> <widget class="QWidget" name="scrollAreaWidgetContents">
<item row="0" column="0"> <property name="geometry">
<widget class="QLabel" name="label"> <rect>
<property name="text"> <x>0</x>
<string>Renderer:</string> <y>0</y>
</property> <width>423</width>
</widget> <height>762</height>
</item> </rect>
<item row="0" column="1"> </property>
<widget class="QComboBox" name="renderer"/> <layout class="QVBoxLayout" name="verticalLayout_3">
</item> <item>
<item row="1" column="0"> <widget class="QGroupBox" name="groupBox">
<widget class="QLabel" name="label_5"> <property name="title">
<property name="text"> <string>Basic</string>
<string>Adapter:</string> </property>
</property> <layout class="QFormLayout" name="formLayout_3">
</widget> <item row="0" column="0">
</item> <widget class="QLabel" name="label">
<item row="1" column="1"> <property name="text">
<widget class="QComboBox" name="adapter"/> <string>Renderer:</string>
</item> </property>
<item row="2" column="0" colspan="2"> </widget>
<widget class="QCheckBox" name="useDebugDevice"> </item>
<property name="text"> <item row="0" column="1">
<string>Use Debug Device</string> <widget class="QComboBox" name="renderer"/>
</property> </item>
</widget> <item row="1" column="0">
</item> <widget class="QLabel" name="label_5">
</layout> <property name="text">
<string>Adapter:</string>
</property>
</widget>
</item>
<item row="1" column="1">
<widget class="QComboBox" name="adapter"/>
</item>
<item row="2" column="0" colspan="2">
<widget class="QCheckBox" name="useDebugDevice">
<property name="text">
<string>Use Debug Device</string>
</property>
</widget>
</item>
</layout>
</widget>
</item>
<item>
<widget class="QGroupBox" name="groupBox_3">
<property name="title">
<string>Screen Display</string>
</property>
<layout class="QFormLayout" name="formLayout_4">
<item row="0" column="0">
<widget class="QLabel" name="label_4">
<property name="text">
<string>Aspect Ratio:</string>
</property>
</widget>
</item>
<item row="0" column="1">
<widget class="QComboBox" name="displayAspectRatio"/>
</item>
<item row="1" column="0">
<widget class="QLabel" name="label_3">
<property name="text">
<string>Crop:</string>
</property>
</widget>
</item>
<item row="1" column="1">
<widget class="QComboBox" name="displayCropMode"/>
</item>
<item row="2" column="0" colspan="2">
<widget class="QCheckBox" name="displayLinearFiltering">
<property name="text">
<string>Linear Upscaling</string>
</property>
</widget>
</item>
<item row="3" column="0" colspan="2">
<widget class="QCheckBox" name="displayIntegerScaling">
<property name="text">
<string>Integer Upscaling</string>
</property>
</widget>
</item>
<item row="4" column="0" colspan="2">
<widget class="QCheckBox" name="vsync">
<property name="text">
<string>VSync</string>
</property>
</widget>
</item>
</layout>
</widget>
</item>
<item>
<widget class="QGroupBox" name="groupBox_2">
<property name="title">
<string>Enhancements</string>
</property>
<layout class="QFormLayout" name="formLayout_2">
<item row="0" column="0">
<widget class="QLabel" name="label_2">
<property name="text">
<string>Resolution Scale:</string>
</property>
</widget>
</item>
<item row="0" column="1">
<widget class="QComboBox" name="resolutionScale"/>
</item>
<item row="1" column="0" colspan="2">
<widget class="QCheckBox" name="trueColor">
<property name="text">
<string>True Color Rendering (24-bit, disables dithering)</string>
</property>
</widget>
</item>
<item row="2" column="0" colspan="2">
<widget class="QCheckBox" name="scaledDithering">
<property name="text">
<string>Scaled Dithering (scale dither pattern to resolution)</string>
</property>
</widget>
</item>
<item row="3" column="0" colspan="2">
<widget class="QCheckBox" name="disableInterlacing">
<property name="text">
<string>Disable Interlacing (force progressive render/scan)</string>
</property>
</widget>
</item>
<item row="4" column="0" colspan="2">
<widget class="QCheckBox" name="forceNTSCTimings">
<property name="text">
<string>Force NTSC Timings (60hz-on-PAL)</string>
</property>
</widget>
</item>
<item row="5" column="0" colspan="2">
<widget class="QCheckBox" name="linearTextureFiltering">
<property name="text">
<string>Bilinear Texture Filtering</string>
</property>
</widget>
</item>
<item row="6" column="0" colspan="2">
<widget class="QCheckBox" name="widescreenHack">
<property name="text">
<string>Widescreen Hack</string>
</property>
</widget>
</item>
</layout>
</widget>
</item>
<item>
<widget class="QGroupBox" name="groupBox_4">
<property name="title">
<string>PGXP</string>
</property>
<layout class="QVBoxLayout" name="verticalLayout">
<item>
<widget class="QCheckBox" name="pgxpEnable">
<property name="text">
<string>Geometry Correction</string>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="pgxpCulling">
<property name="text">
<string>Culling Correction</string>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="pgxpTextureCorrection">
<property name="text">
<string>Texture Correction</string>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="pgxpVertexCache">
<property name="text">
<string>Vertex Cache</string>
</property>
</widget>
</item>
</layout>
</widget>
</item>
<item>
<spacer name="verticalSpacer">
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>40</height>
</size>
</property>
</spacer>
</item>
</layout>
</widget>
</widget> </widget>
</item> </item>
<item>
<widget class="QGroupBox" name="groupBox_3">
<property name="title">
<string>Screen Display</string>
</property>
<layout class="QFormLayout" name="formLayout_4">
<item row="0" column="0">
<widget class="QLabel" name="label_4">
<property name="text">
<string>Aspect Ratio:</string>
</property>
</widget>
</item>
<item row="0" column="1">
<widget class="QComboBox" name="displayAspectRatio"/>
</item>
<item row="1" column="0">
<widget class="QLabel" name="label_3">
<property name="text">
<string>Crop:</string>
</property>
</widget>
</item>
<item row="1" column="1">
<widget class="QComboBox" name="displayCropMode"/>
</item>
<item row="2" column="0" colspan="2">
<widget class="QCheckBox" name="displayLinearFiltering">
<property name="text">
<string>Linear Upscaling</string>
</property>
</widget>
</item>
<item row="3" column="0" colspan="2">
<widget class="QCheckBox" name="displayIntegerScaling">
<property name="text">
<string>Integer Upscaling</string>
</property>
</widget>
</item>
<item row="4" column="0" colspan="2">
<widget class="QCheckBox" name="vsync">
<property name="text">
<string>VSync</string>
</property>
</widget>
</item>
</layout>
</widget>
</item>
<item>
<widget class="QGroupBox" name="groupBox_2">
<property name="title">
<string>Enhancements</string>
</property>
<layout class="QFormLayout" name="formLayout_2">
<item row="0" column="0">
<widget class="QLabel" name="label_2">
<property name="text">
<string>Resolution Scale:</string>
</property>
</widget>
</item>
<item row="0" column="1">
<widget class="QComboBox" name="resolutionScale"/>
</item>
<item row="1" column="0" colspan="2">
<widget class="QCheckBox" name="trueColor">
<property name="text">
<string>True Color Rendering (24-bit, disables dithering)</string>
</property>
</widget>
</item>
<item row="2" column="0" colspan="2">
<widget class="QCheckBox" name="scaledDithering">
<property name="text">
<string>Scaled Dithering (scale dither pattern to resolution)</string>
</property>
</widget>
</item>
<item row="3" column="0" colspan="2">
<widget class="QCheckBox" name="disableInterlacing">
<property name="text">
<string>Disable Interlacing (force progressive render/scan)</string>
</property>
</widget>
</item>
<item row="4" column="0" colspan="2">
<widget class="QCheckBox" name="forceNTSCTimings">
<property name="text">
<string>Force NTSC Timings (60hz-on-PAL)</string>
</property>
</widget>
</item>
<item row="5" column="0" colspan="2">
<widget class="QCheckBox" name="linearTextureFiltering">
<property name="text">
<string>Bilinear Texture Filtering</string>
</property>
</widget>
</item>
<item row="6" column="0" colspan="2">
<widget class="QCheckBox" name="widescreenHack">
<property name="text">
<string>Widescreen Hack</string>
</property>
</widget>
</item>
</layout>
</widget>
</item>
<item>
<spacer name="verticalSpacer">
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>40</height>
</size>
</property>
</spacer>
</item>
</layout> </layout>
</widget> </widget>
<resources/> <resources/>

View file

@ -858,6 +858,18 @@ void SDLHostInterface::DrawQuickSettingsMenu()
ImGui::EndMenu(); ImGui::EndMenu();
} }
if (ImGui::BeginMenu("PGXP"))
{
settings_changed |= ImGui::MenuItem("PGXP Enabled", nullptr, &m_settings_copy.gpu_pgxp_enable);
settings_changed |=
ImGui::MenuItem("PGXP Culling", nullptr, &m_settings_copy.gpu_pgxp_culling, m_settings_copy.gpu_pgxp_enable);
settings_changed |= ImGui::MenuItem("PGXP Texture Correction", nullptr,
&m_settings_copy.gpu_pgxp_texture_correction, m_settings_copy.gpu_pgxp_enable);
settings_changed |= ImGui::MenuItem("PGXP Vertex Cache", nullptr, &m_settings_copy.gpu_pgxp_vertex_cache,
m_settings_copy.gpu_pgxp_enable);
ImGui::EndMenu();
}
settings_changed |= ImGui::MenuItem("True (24-Bit) Color", nullptr, &m_settings_copy.gpu_true_color); settings_changed |= ImGui::MenuItem("True (24-Bit) Color", nullptr, &m_settings_copy.gpu_true_color);
settings_changed |= ImGui::MenuItem("Scaled Dithering", nullptr, &m_settings_copy.gpu_scaled_dithering); settings_changed |= ImGui::MenuItem("Scaled Dithering", nullptr, &m_settings_copy.gpu_scaled_dithering);
settings_changed |= ImGui::MenuItem("Texture Filtering", nullptr, &m_settings_copy.gpu_texture_filtering); settings_changed |= ImGui::MenuItem("Texture Filtering", nullptr, &m_settings_copy.gpu_texture_filtering);
@ -1316,6 +1328,11 @@ void SDLHostInterface::DrawSettingsWindow()
settings_changed |= ImGui::Checkbox("Disable Interlacing", &m_settings_copy.gpu_disable_interlacing); settings_changed |= ImGui::Checkbox("Disable Interlacing", &m_settings_copy.gpu_disable_interlacing);
settings_changed |= ImGui::Checkbox("Force NTSC Timings", &m_settings_copy.gpu_force_ntsc_timings); settings_changed |= ImGui::Checkbox("Force NTSC Timings", &m_settings_copy.gpu_force_ntsc_timings);
settings_changed |= ImGui::Checkbox("Widescreen Hack", &m_settings_copy.gpu_widescreen_hack); settings_changed |= ImGui::Checkbox("Widescreen Hack", &m_settings_copy.gpu_widescreen_hack);
settings_changed |= ImGui::Checkbox("PGXP Enabled", &m_settings_copy.gpu_pgxp_enable);
settings_changed |= ImGui::Checkbox("PGXP Culling", &m_settings_copy.gpu_pgxp_culling);
settings_changed |= ImGui::Checkbox("PGXP Texture Correction", &m_settings_copy.gpu_pgxp_texture_correction);
settings_changed |= ImGui::Checkbox("PGXP Vertex Cache", &m_settings_copy.gpu_pgxp_vertex_cache);
} }
ImGui::EndTabItem(); ImGui::EndTabItem();

View file

@ -8,11 +8,13 @@
#include "controller_interface.h" #include "controller_interface.h"
#include "core/cdrom.h" #include "core/cdrom.h"
#include "core/controller.h" #include "core/controller.h"
#include "core/cpu_code_cache.h"
#include "core/dma.h" #include "core/dma.h"
#include "core/game_list.h" #include "core/game_list.h"
#include "core/gpu.h" #include "core/gpu.h"
#include "core/host_display.h" #include "core/host_display.h"
#include "core/mdec.h" #include "core/mdec.h"
#include "core/pgxp.h"
#include "core/save_state_version.h" #include "core/save_state_version.h"
#include "core/spu.h" #include "core/spu.h"
#include "core/system.h" #include "core/system.h"
@ -1295,6 +1297,22 @@ void CommonHostInterface::RegisterGraphicsHotkeys()
ToggleSoftwareRendering(); ToggleSoftwareRendering();
}); });
RegisterHotkey(StaticString("Graphics"), StaticString("TogglePGXP"), StaticString("Toggle PGXP"),
[this](bool pressed) {
if (!pressed)
{
g_settings.gpu_pgxp_enable = !g_settings.gpu_pgxp_enable;
ReportFormattedMessage("PGXP is now %s.", g_settings.gpu_pgxp_enable ? "enabled" : "disabled");
if (g_settings.gpu_pgxp_enable)
PGXP::Initialize();
// we need to recompile all blocks if pgxp is toggled on/off
if (g_settings.IsUsingCodeCache())
CPU::CodeCache::Flush();
}
});
RegisterHotkey(StaticString("Graphics"), StaticString("IncreaseResolutionScale"), RegisterHotkey(StaticString("Graphics"), StaticString("IncreaseResolutionScale"),
StaticString("Increase Resolution Scale"), [this](bool pressed) { StaticString("Increase Resolution Scale"), [this](bool pressed) {
if (!pressed) if (!pressed)