mirror of
https://github.com/RetroDECK/Duckstation.git
synced 2025-02-28 08:35:38 +00:00
GPU: Split software to frontend/backend
This commit is contained in:
parent
03d4f80883
commit
2595e31575
src
core
CMakeLists.txtcore.vcxprojcore.vcxproj.filtersgpu.cppgpu.hgpu_backend.cppgpu_backend.hgpu_hw.cppgpu_sw.cppgpu_sw.hgpu_sw_backend.cppgpu_sw_backend.hgpu_types.hhost_interface.cppsettings.cppsettings.h
duckstation-libretro
duckstation-qt
duckstation-sdl
|
@ -30,6 +30,8 @@ add_library(core
|
||||||
dma.h
|
dma.h
|
||||||
gpu.cpp
|
gpu.cpp
|
||||||
gpu.h
|
gpu.h
|
||||||
|
gpu_backend.cpp
|
||||||
|
gpu_backend.h
|
||||||
gpu_commands.cpp
|
gpu_commands.cpp
|
||||||
gpu_hw.cpp
|
gpu_hw.cpp
|
||||||
gpu_hw.h
|
gpu_hw.h
|
||||||
|
@ -41,6 +43,8 @@ add_library(core
|
||||||
gpu_hw_vulkan.h
|
gpu_hw_vulkan.h
|
||||||
gpu_sw.cpp
|
gpu_sw.cpp
|
||||||
gpu_sw.h
|
gpu_sw.h
|
||||||
|
gpu_sw_backend.cpp
|
||||||
|
gpu_sw_backend.h
|
||||||
gpu_types.h
|
gpu_types.h
|
||||||
gte.cpp
|
gte.cpp
|
||||||
gte.h
|
gte.h
|
||||||
|
|
|
@ -115,11 +115,13 @@
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
<ClCompile Include="cpu_types.cpp" />
|
<ClCompile Include="cpu_types.cpp" />
|
||||||
<ClCompile Include="digital_controller.cpp" />
|
<ClCompile Include="digital_controller.cpp" />
|
||||||
|
<ClCompile Include="gpu_backend.cpp" />
|
||||||
<ClCompile Include="gpu_commands.cpp" />
|
<ClCompile Include="gpu_commands.cpp" />
|
||||||
<ClCompile Include="gpu_hw_d3d11.cpp" />
|
<ClCompile Include="gpu_hw_d3d11.cpp" />
|
||||||
<ClCompile Include="gpu_hw_shadergen.cpp" />
|
<ClCompile Include="gpu_hw_shadergen.cpp" />
|
||||||
<ClCompile Include="gpu_hw_vulkan.cpp" />
|
<ClCompile Include="gpu_hw_vulkan.cpp" />
|
||||||
<ClCompile Include="gpu_sw.cpp" />
|
<ClCompile Include="gpu_sw.cpp" />
|
||||||
|
<ClCompile Include="gpu_sw_backend.cpp" />
|
||||||
<ClCompile Include="gte.cpp" />
|
<ClCompile Include="gte.cpp" />
|
||||||
<ClCompile Include="dma.cpp" />
|
<ClCompile Include="dma.cpp" />
|
||||||
<ClCompile Include="gpu.cpp" />
|
<ClCompile Include="gpu.cpp" />
|
||||||
|
@ -185,10 +187,12 @@
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
|
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
|
||||||
</ClInclude>
|
</ClInclude>
|
||||||
<ClInclude Include="digital_controller.h" />
|
<ClInclude Include="digital_controller.h" />
|
||||||
|
<ClInclude Include="gpu_backend.h" />
|
||||||
<ClInclude Include="gpu_hw_d3d11.h" />
|
<ClInclude Include="gpu_hw_d3d11.h" />
|
||||||
<ClInclude Include="gpu_hw_shadergen.h" />
|
<ClInclude Include="gpu_hw_shadergen.h" />
|
||||||
<ClInclude Include="gpu_hw_vulkan.h" />
|
<ClInclude Include="gpu_hw_vulkan.h" />
|
||||||
<ClInclude Include="gpu_sw.h" />
|
<ClInclude Include="gpu_sw.h" />
|
||||||
|
<ClInclude Include="gpu_sw_backend.h" />
|
||||||
<ClInclude Include="gpu_types.h" />
|
<ClInclude Include="gpu_types.h" />
|
||||||
<ClInclude Include="gte.h" />
|
<ClInclude Include="gte.h" />
|
||||||
<ClInclude Include="cpu_types.h" />
|
<ClInclude Include="cpu_types.h" />
|
||||||
|
|
|
@ -51,6 +51,8 @@
|
||||||
<ClCompile Include="memory_card_image.cpp" />
|
<ClCompile Include="memory_card_image.cpp" />
|
||||||
<ClCompile Include="analog_joystick.cpp" />
|
<ClCompile Include="analog_joystick.cpp" />
|
||||||
<ClCompile Include="cpu_recompiler_code_generator_aarch32.cpp" />
|
<ClCompile Include="cpu_recompiler_code_generator_aarch32.cpp" />
|
||||||
|
<ClCompile Include="gpu_backend.cpp" />
|
||||||
|
<ClCompile Include="gpu_sw_backend.cpp" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<ClInclude Include="types.h" />
|
<ClInclude Include="types.h" />
|
||||||
|
@ -105,5 +107,7 @@
|
||||||
<ClInclude Include="memory_card_image.h" />
|
<ClInclude Include="memory_card_image.h" />
|
||||||
<ClInclude Include="analog_joystick.h" />
|
<ClInclude Include="analog_joystick.h" />
|
||||||
<ClInclude Include="gpu_types.h" />
|
<ClInclude Include="gpu_types.h" />
|
||||||
|
<ClInclude Include="gpu_backend.h" />
|
||||||
|
<ClInclude Include="gpu_sw_backend.h" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
</Project>
|
</Project>
|
||||||
|
|
|
@ -129,10 +129,10 @@ bool GPU::DoState(StateWrapper& sw)
|
||||||
sw.Do(&m_draw_mode.texture_page_y);
|
sw.Do(&m_draw_mode.texture_page_y);
|
||||||
sw.Do(&m_draw_mode.texture_palette_x);
|
sw.Do(&m_draw_mode.texture_palette_x);
|
||||||
sw.Do(&m_draw_mode.texture_palette_y);
|
sw.Do(&m_draw_mode.texture_palette_y);
|
||||||
sw.Do(&m_draw_mode.texture_window_and_x);
|
sw.Do(&m_draw_mode.texture_window.and_x);
|
||||||
sw.Do(&m_draw_mode.texture_window_and_y);
|
sw.Do(&m_draw_mode.texture_window.and_y);
|
||||||
sw.Do(&m_draw_mode.texture_window_or_x);
|
sw.Do(&m_draw_mode.texture_window.or_x);
|
||||||
sw.Do(&m_draw_mode.texture_window_or_y);
|
sw.Do(&m_draw_mode.texture_window.or_y);
|
||||||
sw.Do(&m_draw_mode.texture_x_flip);
|
sw.Do(&m_draw_mode.texture_x_flip);
|
||||||
sw.Do(&m_draw_mode.texture_y_flip);
|
sw.Do(&m_draw_mode.texture_y_flip);
|
||||||
|
|
||||||
|
@ -1358,10 +1358,10 @@ void GPU::SetTextureWindow(u32 value)
|
||||||
const u8 offset_y = Truncate8((value >> 15) & UINT32_C(0x1F));
|
const u8 offset_y = Truncate8((value >> 15) & UINT32_C(0x1F));
|
||||||
Log_DebugPrintf("Set texture window %02X %02X %02X %02X", mask_x, mask_y, offset_x, offset_y);
|
Log_DebugPrintf("Set texture window %02X %02X %02X %02X", mask_x, mask_y, offset_x, offset_y);
|
||||||
|
|
||||||
m_draw_mode.texture_window_and_x = ~(mask_x * 8);
|
m_draw_mode.texture_window.and_x = ~(mask_x * 8);
|
||||||
m_draw_mode.texture_window_and_y = ~(mask_y * 8);
|
m_draw_mode.texture_window.and_y = ~(mask_y * 8);
|
||||||
m_draw_mode.texture_window_or_x = (offset_x & mask_x) * 8u;
|
m_draw_mode.texture_window.or_x = (offset_x & mask_x) * 8u;
|
||||||
m_draw_mode.texture_window_or_y = (offset_y & mask_y) * 8u;
|
m_draw_mode.texture_window.or_y = (offset_y & mask_y) * 8u;
|
||||||
m_draw_mode.texture_window_value = value;
|
m_draw_mode.texture_window_value = value;
|
||||||
m_draw_mode.texture_window_changed = true;
|
m_draw_mode.texture_window_changed = true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -159,9 +159,6 @@ protected:
|
||||||
ALWAYS_INLINE static constexpr TickCount SystemTicksToGPUTicks(TickCount sysclk_ticks) { return sysclk_ticks << 1; }
|
ALWAYS_INLINE static constexpr TickCount SystemTicksToGPUTicks(TickCount sysclk_ticks) { return sysclk_ticks << 1; }
|
||||||
|
|
||||||
// Helper/format conversion functions.
|
// Helper/format conversion functions.
|
||||||
static constexpr u8 Convert5To8(u8 x5) { return (x5 << 3) | (x5 & 7); }
|
|
||||||
static constexpr u8 Convert8To5(u8 x8) { return (x8 >> 3); }
|
|
||||||
|
|
||||||
static constexpr u32 RGBA5551ToRGBA8888(u16 color)
|
static constexpr u32 RGBA5551ToRGBA8888(u16 color)
|
||||||
{
|
{
|
||||||
u8 r = Truncate8(color & 31);
|
u8 r = Truncate8(color & 31);
|
||||||
|
@ -197,68 +194,10 @@ protected:
|
||||||
{
|
{
|
||||||
return std::make_tuple(static_cast<u8>(rgb24), static_cast<u8>(rgb24 >> 8), static_cast<u8>(rgb24 >> 16));
|
return std::make_tuple(static_cast<u8>(rgb24), static_cast<u8>(rgb24 >> 8), static_cast<u8>(rgb24 >> 16));
|
||||||
}
|
}
|
||||||
static constexpr u32 PackColorRGB24(u8 r, u8 g, u8 b)
|
|
||||||
{
|
|
||||||
return ZeroExtend32(r) | (ZeroExtend32(g) << 8) | (ZeroExtend32(b) << 16);
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool DumpVRAMToFile(const char* filename, u32 width, u32 height, u32 stride, const void* buffer,
|
static bool DumpVRAMToFile(const char* filename, u32 width, u32 height, u32 stride, const void* buffer,
|
||||||
bool remove_alpha);
|
bool remove_alpha);
|
||||||
|
|
||||||
union VRAMPixel
|
|
||||||
{
|
|
||||||
u16 bits;
|
|
||||||
|
|
||||||
BitField<u16, u8, 0, 5> r;
|
|
||||||
BitField<u16, u8, 5, 5> g;
|
|
||||||
BitField<u16, u8, 10, 5> b;
|
|
||||||
BitField<u16, bool, 15, 1> c;
|
|
||||||
|
|
||||||
u8 GetR8() const { return Convert5To8(r); }
|
|
||||||
u8 GetG8() const { return Convert5To8(g); }
|
|
||||||
u8 GetB8() const { return Convert5To8(b); }
|
|
||||||
|
|
||||||
void Set(u8 r_, u8 g_, u8 b_, bool c_ = false)
|
|
||||||
{
|
|
||||||
bits = (ZeroExtend16(r_)) | (ZeroExtend16(g_) << 5) | (ZeroExtend16(b_) << 10) | (static_cast<u16>(c_) << 15);
|
|
||||||
}
|
|
||||||
|
|
||||||
void ClampAndSet(u8 r_, u8 g_, u8 b_, bool c_ = false)
|
|
||||||
{
|
|
||||||
Set(std::min<u8>(r_, 0x1F), std::min<u8>(g_, 0x1F), std::min<u8>(b_, 0x1F), c_);
|
|
||||||
}
|
|
||||||
|
|
||||||
void SetRGB24(u32 rgb24, bool c_ = false)
|
|
||||||
{
|
|
||||||
bits = Truncate16(((rgb24 >> 3) & 0x1F) | (((rgb24 >> 11) & 0x1F) << 5) | (((rgb24 >> 19) & 0x1F) << 10)) |
|
|
||||||
(static_cast<u16>(c_) << 15);
|
|
||||||
}
|
|
||||||
|
|
||||||
void SetRGB24(u8 r8, u8 g8, u8 b8, bool c_ = false)
|
|
||||||
{
|
|
||||||
bits = (ZeroExtend16(r8 >> 3)) | (ZeroExtend16(g8 >> 3) << 5) | (ZeroExtend16(b8 >> 3) << 10) |
|
|
||||||
(static_cast<u16>(c_) << 15);
|
|
||||||
}
|
|
||||||
|
|
||||||
void SetRGB24Dithered(u32 x, u32 y, u8 r8, u8 g8, u8 b8, bool c_ = false)
|
|
||||||
{
|
|
||||||
const s32 offset = DITHER_MATRIX[y & 3][x & 3];
|
|
||||||
r8 = static_cast<u8>(std::clamp<s32>(static_cast<s32>(ZeroExtend32(r8)) + offset, 0, 255));
|
|
||||||
g8 = static_cast<u8>(std::clamp<s32>(static_cast<s32>(ZeroExtend32(g8)) + offset, 0, 255));
|
|
||||||
b8 = static_cast<u8>(std::clamp<s32>(static_cast<s32>(ZeroExtend32(b8)) + offset, 0, 255));
|
|
||||||
SetRGB24(r8, g8, b8, c_);
|
|
||||||
}
|
|
||||||
|
|
||||||
u32 ToRGB24() const
|
|
||||||
{
|
|
||||||
const u32 r_ = ZeroExtend32(r.GetValue());
|
|
||||||
const u32 g_ = ZeroExtend32(g.GetValue());
|
|
||||||
const u32 b_ = ZeroExtend32(b.GetValue());
|
|
||||||
|
|
||||||
return ((r_ << 3) | (r_ & 7)) | (((g_ << 3) | (g_ & 7)) << 8) | (((b_ << 3) | (b_ & 7)) << 16);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
void SoftReset();
|
void SoftReset();
|
||||||
|
|
||||||
// Sets dots per scanline
|
// Sets dots per scanline
|
||||||
|
@ -464,10 +403,7 @@ protected:
|
||||||
u32 texture_page_y;
|
u32 texture_page_y;
|
||||||
u32 texture_palette_x;
|
u32 texture_palette_x;
|
||||||
u32 texture_palette_y;
|
u32 texture_palette_y;
|
||||||
u8 texture_window_and_x;
|
GPUTextureWindow texture_window;
|
||||||
u8 texture_window_and_y;
|
|
||||||
u8 texture_window_or_x;
|
|
||||||
u8 texture_window_or_y;
|
|
||||||
bool texture_x_flip;
|
bool texture_x_flip;
|
||||||
bool texture_y_flip;
|
bool texture_y_flip;
|
||||||
bool texture_page_changed;
|
bool texture_page_changed;
|
||||||
|
|
327
src/core/gpu_backend.cpp
Normal file
327
src/core/gpu_backend.cpp
Normal file
|
@ -0,0 +1,327 @@
|
||||||
|
#include "gpu_backend.h"
|
||||||
|
#include "common/log.h"
|
||||||
|
#include "common/state_wrapper.h"
|
||||||
|
#include "settings.h"
|
||||||
|
Log_SetChannel(GPUBackend);
|
||||||
|
|
||||||
|
std::unique_ptr<GPUBackend> g_gpu_backend;
|
||||||
|
|
||||||
|
GPUBackend::GPUBackend() = default;
|
||||||
|
|
||||||
|
GPUBackend::~GPUBackend() = default;
|
||||||
|
|
||||||
|
bool GPUBackend::Initialize()
|
||||||
|
{
|
||||||
|
if (g_settings.gpu_use_thread)
|
||||||
|
StartGPUThread();
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPUBackend::Reset()
|
||||||
|
{
|
||||||
|
Sync();
|
||||||
|
m_drawing_area = {};
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPUBackend::UpdateSettings()
|
||||||
|
{
|
||||||
|
Sync();
|
||||||
|
|
||||||
|
if (m_use_gpu_thread != g_settings.gpu_use_thread)
|
||||||
|
{
|
||||||
|
if (!g_settings.gpu_use_thread)
|
||||||
|
StopGPUThread();
|
||||||
|
else
|
||||||
|
StartGPUThread();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPUBackend::Shutdown()
|
||||||
|
{
|
||||||
|
StopGPUThread();
|
||||||
|
}
|
||||||
|
|
||||||
|
GPUBackendFillVRAMCommand* GPUBackend::NewFillVRAMCommand()
|
||||||
|
{
|
||||||
|
GPUBackendFillVRAMCommand* cmd =
|
||||||
|
static_cast<GPUBackendFillVRAMCommand*>(AllocateCommand(sizeof(GPUBackendFillVRAMCommand)));
|
||||||
|
cmd->type = GPUBackendCommandType::FillVRAM;
|
||||||
|
cmd->size = cmd->Size();
|
||||||
|
return cmd;
|
||||||
|
}
|
||||||
|
|
||||||
|
GPUBackendUpdateVRAMCommand* GPUBackend::NewUpdateVRAMCommand(u32 num_words)
|
||||||
|
{
|
||||||
|
const u32 size = sizeof(GPUBackendUpdateVRAMCommand) + (num_words * sizeof(u16));
|
||||||
|
GPUBackendUpdateVRAMCommand* cmd = static_cast<GPUBackendUpdateVRAMCommand*>(AllocateCommand(size));
|
||||||
|
cmd->type = GPUBackendCommandType::UpdateVRAM;
|
||||||
|
cmd->size = size;
|
||||||
|
return cmd;
|
||||||
|
}
|
||||||
|
|
||||||
|
GPUBackendCopyVRAMCommand* GPUBackend::NewCopyVRAMCommand()
|
||||||
|
{
|
||||||
|
GPUBackendCopyVRAMCommand* cmd =
|
||||||
|
static_cast<GPUBackendCopyVRAMCommand*>(AllocateCommand(sizeof(GPUBackendCopyVRAMCommand)));
|
||||||
|
cmd->type = GPUBackendCommandType::CopyVRAM;
|
||||||
|
cmd->size = cmd->Size();
|
||||||
|
return cmd;
|
||||||
|
}
|
||||||
|
|
||||||
|
GPUBackendSetDrawingAreaCommand* GPUBackend::NewSetDrawingAreaCommand()
|
||||||
|
{
|
||||||
|
GPUBackendSetDrawingAreaCommand* cmd =
|
||||||
|
static_cast<GPUBackendSetDrawingAreaCommand*>(AllocateCommand(sizeof(GPUBackendSetDrawingAreaCommand)));
|
||||||
|
cmd->type = GPUBackendCommandType::SetDrawingArea;
|
||||||
|
cmd->size = cmd->Size();
|
||||||
|
return cmd;
|
||||||
|
}
|
||||||
|
|
||||||
|
GPUBackendDrawPolygonCommand* GPUBackend::NewDrawPolygonCommand(u32 num_vertices)
|
||||||
|
{
|
||||||
|
const u32 size = sizeof(GPUBackendDrawPolygonCommand) + (num_vertices * sizeof(GPUBackendDrawPolygonCommand::Vertex));
|
||||||
|
GPUBackendDrawPolygonCommand* cmd = static_cast<GPUBackendDrawPolygonCommand*>(AllocateCommand(size));
|
||||||
|
cmd->type = GPUBackendCommandType::DrawPolygon;
|
||||||
|
cmd->size = size;
|
||||||
|
cmd->num_vertices = Truncate16(num_vertices);
|
||||||
|
return cmd;
|
||||||
|
}
|
||||||
|
|
||||||
|
GPUBackendDrawRectangleCommand* GPUBackend::NewDrawRectangleCommand()
|
||||||
|
{
|
||||||
|
GPUBackendDrawRectangleCommand* cmd =
|
||||||
|
static_cast<GPUBackendDrawRectangleCommand*>(AllocateCommand(sizeof(GPUBackendDrawRectangleCommand)));
|
||||||
|
cmd->type = GPUBackendCommandType::DrawRectangle;
|
||||||
|
cmd->size = cmd->Size();
|
||||||
|
return cmd;
|
||||||
|
}
|
||||||
|
|
||||||
|
GPUBackendDrawLineCommand* GPUBackend::NewDrawLineCommand(u32 num_vertices)
|
||||||
|
{
|
||||||
|
const u32 size = sizeof(GPUBackendDrawLineCommand) + (num_vertices * sizeof(GPUBackendDrawLineCommand::Vertex));
|
||||||
|
GPUBackendDrawLineCommand* cmd = static_cast<GPUBackendDrawLineCommand*>(AllocateCommand(size));
|
||||||
|
cmd->type = GPUBackendCommandType::DrawLine;
|
||||||
|
cmd->size = size;
|
||||||
|
cmd->num_vertices = Truncate16(num_vertices);
|
||||||
|
return cmd;
|
||||||
|
}
|
||||||
|
|
||||||
|
void* GPUBackend::AllocateCommand(u32 size)
|
||||||
|
{
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
u32 read_ptr = m_command_fifo_read_ptr.load();
|
||||||
|
u32 write_ptr = m_command_fifo_write_ptr.load();
|
||||||
|
if (read_ptr > write_ptr)
|
||||||
|
{
|
||||||
|
u32 available_size = read_ptr - write_ptr;
|
||||||
|
while (available_size < (size + sizeof(GPUBackendCommandType)))
|
||||||
|
{
|
||||||
|
WakeGPUThread();
|
||||||
|
read_ptr = m_command_fifo_read_ptr.load();
|
||||||
|
available_size = (read_ptr > write_ptr) ? (read_ptr - write_ptr) : (COMMAND_QUEUE_SIZE - write_ptr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const u32 available_size = COMMAND_QUEUE_SIZE - write_ptr;
|
||||||
|
if ((size + sizeof(GPUBackendCommand)) > available_size)
|
||||||
|
{
|
||||||
|
// allocate a dummy command to wrap the buffer around
|
||||||
|
GPUBackendCommand* dummy_cmd = reinterpret_cast<GPUBackendCommand*>(&m_command_fifo_data[write_ptr]);
|
||||||
|
dummy_cmd->type = GPUBackendCommandType::Wraparound;
|
||||||
|
dummy_cmd->size = available_size;
|
||||||
|
dummy_cmd->params.bits = 0;
|
||||||
|
m_command_fifo_write_ptr.store(0);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return &m_command_fifo_data[write_ptr];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 GPUBackend::GetPendingCommandSize() const
|
||||||
|
{
|
||||||
|
const u32 read_ptr = m_command_fifo_read_ptr.load();
|
||||||
|
const u32 write_ptr = m_command_fifo_write_ptr.load();
|
||||||
|
return (write_ptr >= read_ptr) ? (write_ptr - read_ptr) : (COMMAND_QUEUE_SIZE - read_ptr + write_ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPUBackend::PushCommand(GPUBackendCommand* cmd)
|
||||||
|
{
|
||||||
|
if (!m_use_gpu_thread)
|
||||||
|
{
|
||||||
|
// single-thread mode
|
||||||
|
if (cmd->type != GPUBackendCommandType::Sync)
|
||||||
|
HandleCommand(cmd);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const u32 new_write_ptr = m_command_fifo_write_ptr.fetch_add(cmd->size) + cmd->size;
|
||||||
|
DebugAssert(new_write_ptr <= COMMAND_QUEUE_SIZE);
|
||||||
|
if (GetPendingCommandSize() >= THRESHOLD_TO_WAKE_GPU)
|
||||||
|
WakeGPUThread();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPUBackend::WakeGPUThread()
|
||||||
|
{
|
||||||
|
std::unique_lock<std::mutex> lock(m_sync_mutex);
|
||||||
|
if (!m_gpu_thread_sleeping.load())
|
||||||
|
return;
|
||||||
|
|
||||||
|
m_wake_gpu_thread_cv.notify_one();
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPUBackend::StartGPUThread()
|
||||||
|
{
|
||||||
|
m_gpu_loop_done.store(false);
|
||||||
|
m_use_gpu_thread = true;
|
||||||
|
m_gpu_thread = std::thread(&GPUBackend::RunGPULoop, this);
|
||||||
|
Log_InfoPrint("GPU thread started.");
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPUBackend::StopGPUThread()
|
||||||
|
{
|
||||||
|
if (!m_use_gpu_thread)
|
||||||
|
return;
|
||||||
|
|
||||||
|
m_gpu_loop_done.store(true);
|
||||||
|
WakeGPUThread();
|
||||||
|
m_gpu_thread.join();
|
||||||
|
m_use_gpu_thread = false;
|
||||||
|
Log_InfoPrint("GPU thread stopped.");
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPUBackend::Sync()
|
||||||
|
{
|
||||||
|
if (!m_use_gpu_thread)
|
||||||
|
return;
|
||||||
|
|
||||||
|
GPUBackendSyncCommand* cmd = static_cast<GPUBackendSyncCommand*>(AllocateCommand(sizeof(GPUBackendSyncCommand)));
|
||||||
|
cmd->type = GPUBackendCommandType::Sync;
|
||||||
|
cmd->size = sizeof(GPUBackendSyncCommand);
|
||||||
|
PushCommand(cmd);
|
||||||
|
WakeGPUThread();
|
||||||
|
|
||||||
|
m_sync_event.Wait();
|
||||||
|
m_sync_event.Reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPUBackend::RunGPULoop()
|
||||||
|
{
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
u32 write_ptr = m_command_fifo_write_ptr.load();
|
||||||
|
u32 read_ptr = m_command_fifo_read_ptr.load();
|
||||||
|
if (read_ptr == write_ptr)
|
||||||
|
{
|
||||||
|
std::unique_lock<std::mutex> lock(m_sync_mutex);
|
||||||
|
m_gpu_thread_sleeping.store(true);
|
||||||
|
m_wake_gpu_thread_cv.wait(lock, [this]() { return m_gpu_loop_done.load() || GetPendingCommandSize() > 0; });
|
||||||
|
m_gpu_thread_sleeping.store(false);
|
||||||
|
|
||||||
|
if (m_gpu_loop_done.load())
|
||||||
|
break;
|
||||||
|
else
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (write_ptr < read_ptr)
|
||||||
|
write_ptr = COMMAND_QUEUE_SIZE;
|
||||||
|
|
||||||
|
while (read_ptr < write_ptr)
|
||||||
|
{
|
||||||
|
const GPUBackendCommand* cmd = reinterpret_cast<const GPUBackendCommand*>(&m_command_fifo_data[read_ptr]);
|
||||||
|
read_ptr += cmd->size;
|
||||||
|
|
||||||
|
switch (cmd->type)
|
||||||
|
{
|
||||||
|
case GPUBackendCommandType::Wraparound:
|
||||||
|
{
|
||||||
|
DebugAssert(read_ptr == COMMAND_QUEUE_SIZE);
|
||||||
|
write_ptr = m_command_fifo_write_ptr.load();
|
||||||
|
read_ptr = 0;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case GPUBackendCommandType::Sync:
|
||||||
|
{
|
||||||
|
DebugAssert(read_ptr == write_ptr);
|
||||||
|
m_sync_event.Signal();
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
HandleCommand(cmd);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
m_command_fifo_read_ptr.store(read_ptr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPUBackend::HandleCommand(const GPUBackendCommand* cmd)
|
||||||
|
{
|
||||||
|
switch (cmd->type)
|
||||||
|
{
|
||||||
|
case GPUBackendCommandType::FillVRAM:
|
||||||
|
{
|
||||||
|
FlushRender();
|
||||||
|
const GPUBackendFillVRAMCommand* ccmd = static_cast<const GPUBackendFillVRAMCommand*>(cmd);
|
||||||
|
FillVRAM(ZeroExtend32(ccmd->x), ZeroExtend32(ccmd->y), ZeroExtend32(ccmd->width), ZeroExtend32(ccmd->height),
|
||||||
|
ccmd->color, ccmd->params);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case GPUBackendCommandType::UpdateVRAM:
|
||||||
|
{
|
||||||
|
FlushRender();
|
||||||
|
const GPUBackendUpdateVRAMCommand* ccmd = static_cast<const GPUBackendUpdateVRAMCommand*>(cmd);
|
||||||
|
UpdateVRAM(ZeroExtend32(ccmd->x), ZeroExtend32(ccmd->y), ZeroExtend32(ccmd->width), ZeroExtend32(ccmd->height),
|
||||||
|
ccmd->data, ccmd->params);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case GPUBackendCommandType::CopyVRAM:
|
||||||
|
{
|
||||||
|
FlushRender();
|
||||||
|
const GPUBackendCopyVRAMCommand* ccmd = static_cast<const GPUBackendCopyVRAMCommand*>(cmd);
|
||||||
|
CopyVRAM(ZeroExtend32(ccmd->src_x), ZeroExtend32(ccmd->src_y), ZeroExtend32(ccmd->dst_x),
|
||||||
|
ZeroExtend32(ccmd->dst_y), ZeroExtend32(ccmd->width), ZeroExtend32(ccmd->height), ccmd->params);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case GPUBackendCommandType::SetDrawingArea:
|
||||||
|
{
|
||||||
|
FlushRender();
|
||||||
|
m_drawing_area = static_cast<const GPUBackendSetDrawingAreaCommand*>(cmd)->new_area;
|
||||||
|
DrawingAreaChanged();
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case GPUBackendCommandType::DrawPolygon:
|
||||||
|
{
|
||||||
|
DrawPolygon(static_cast<const GPUBackendDrawPolygonCommand*>(cmd));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case GPUBackendCommandType::DrawRectangle:
|
||||||
|
{
|
||||||
|
DrawRectangle(static_cast<const GPUBackendDrawRectangleCommand*>(cmd));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case GPUBackendCommandType::DrawLine:
|
||||||
|
{
|
||||||
|
DrawLine(static_cast<const GPUBackendDrawLineCommand*>(cmd));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
91
src/core/gpu_backend.h
Normal file
91
src/core/gpu_backend.h
Normal file
|
@ -0,0 +1,91 @@
|
||||||
|
#pragma once
|
||||||
|
#include "common/event.h"
|
||||||
|
#include "common/heap_array.h"
|
||||||
|
#include "gpu_types.h"
|
||||||
|
#include <atomic>
|
||||||
|
#include <condition_variable>
|
||||||
|
#include <memory>
|
||||||
|
#include <mutex>
|
||||||
|
#include <thread>
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#pragma warning(push)
|
||||||
|
#pragma warning(disable : 4324) // warning C4324: 'GPUBackend': structure was padded due to alignment specifier
|
||||||
|
#endif
|
||||||
|
|
||||||
|
class GPUBackend
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
GPUBackend();
|
||||||
|
virtual ~GPUBackend();
|
||||||
|
|
||||||
|
ALWAYS_INLINE u16* GetVRAM() const { return m_vram_ptr; }
|
||||||
|
|
||||||
|
virtual bool Initialize();
|
||||||
|
virtual void UpdateSettings();
|
||||||
|
virtual void Reset();
|
||||||
|
virtual void Shutdown();
|
||||||
|
|
||||||
|
GPUBackendFillVRAMCommand* NewFillVRAMCommand();
|
||||||
|
GPUBackendUpdateVRAMCommand* NewUpdateVRAMCommand(u32 num_words);
|
||||||
|
GPUBackendCopyVRAMCommand* NewCopyVRAMCommand();
|
||||||
|
GPUBackendSetDrawingAreaCommand* NewSetDrawingAreaCommand();
|
||||||
|
GPUBackendDrawPolygonCommand* NewDrawPolygonCommand(u32 num_vertices);
|
||||||
|
GPUBackendDrawRectangleCommand* NewDrawRectangleCommand();
|
||||||
|
GPUBackendDrawLineCommand* NewDrawLineCommand(u32 num_vertices);
|
||||||
|
|
||||||
|
void PushCommand(GPUBackendCommand* cmd);
|
||||||
|
void Sync();
|
||||||
|
|
||||||
|
/// Processes all pending GPU commands.
|
||||||
|
void RunGPULoop();
|
||||||
|
|
||||||
|
protected:
|
||||||
|
void* AllocateCommand(u32 size);
|
||||||
|
u32 GetPendingCommandSize() const;
|
||||||
|
void WakeGPUThread();
|
||||||
|
void StartGPUThread();
|
||||||
|
void StopGPUThread();
|
||||||
|
|
||||||
|
virtual void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) = 0;
|
||||||
|
virtual void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data,
|
||||||
|
GPUBackendCommandParameters params) = 0;
|
||||||
|
virtual void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height,
|
||||||
|
GPUBackendCommandParameters params) = 0;
|
||||||
|
virtual void DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) = 0;
|
||||||
|
virtual void DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) = 0;
|
||||||
|
virtual void DrawLine(const GPUBackendDrawLineCommand* cmd) = 0;
|
||||||
|
virtual void FlushRender() = 0;
|
||||||
|
virtual void DrawingAreaChanged() = 0;
|
||||||
|
|
||||||
|
void HandleCommand(const GPUBackendCommand* cmd);
|
||||||
|
|
||||||
|
u16* m_vram_ptr = nullptr;
|
||||||
|
|
||||||
|
Common::Rectangle<u32> m_drawing_area{};
|
||||||
|
|
||||||
|
Common::Event m_sync_event;
|
||||||
|
std::atomic_bool m_gpu_thread_sleeping{false};
|
||||||
|
std::atomic_bool m_gpu_loop_done{false};
|
||||||
|
std::thread m_gpu_thread;
|
||||||
|
bool m_use_gpu_thread = false;
|
||||||
|
|
||||||
|
std::mutex m_sync_mutex;
|
||||||
|
std::condition_variable m_sync_cpu_thread_cv;
|
||||||
|
std::condition_variable m_wake_gpu_thread_cv;
|
||||||
|
bool m_sync_done = false;
|
||||||
|
|
||||||
|
enum : u32
|
||||||
|
{
|
||||||
|
COMMAND_QUEUE_SIZE = 4 * 1024 * 1024,
|
||||||
|
THRESHOLD_TO_WAKE_GPU = 256
|
||||||
|
};
|
||||||
|
|
||||||
|
HeapArray<u8, COMMAND_QUEUE_SIZE> m_command_fifo_data;
|
||||||
|
alignas(64) std::atomic<u32> m_command_fifo_read_ptr{0};
|
||||||
|
alignas(64) std::atomic<u32> m_command_fifo_write_ptr{0};
|
||||||
|
};
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#pragma warning(pop)
|
||||||
|
#endif
|
|
@ -1004,10 +1004,10 @@ void GPU_HW::DispatchRenderCommand()
|
||||||
{
|
{
|
||||||
m_draw_mode.ClearTextureWindowChangedFlag();
|
m_draw_mode.ClearTextureWindowChangedFlag();
|
||||||
|
|
||||||
m_batch_ubo_data.u_texture_window_and[0] = ZeroExtend32(m_draw_mode.texture_window_and_x);
|
m_batch_ubo_data.u_texture_window_and[0] = ZeroExtend32(m_draw_mode.texture_window.and_x);
|
||||||
m_batch_ubo_data.u_texture_window_and[1] = ZeroExtend32(m_draw_mode.texture_window_and_y);
|
m_batch_ubo_data.u_texture_window_and[1] = ZeroExtend32(m_draw_mode.texture_window.and_y);
|
||||||
m_batch_ubo_data.u_texture_window_or[0] = ZeroExtend32(m_draw_mode.texture_window_or_x);
|
m_batch_ubo_data.u_texture_window_or[0] = ZeroExtend32(m_draw_mode.texture_window.or_x);
|
||||||
m_batch_ubo_data.u_texture_window_or[1] = ZeroExtend32(m_draw_mode.texture_window_or_y);
|
m_batch_ubo_data.u_texture_window_or[1] = ZeroExtend32(m_draw_mode.texture_window.or_y);
|
||||||
m_batch_ubo_dirty = true;
|
m_batch_ubo_dirty = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
1067
src/core/gpu_sw.cpp
1067
src/core/gpu_sw.cpp
File diff suppressed because it is too large
Load diff
|
@ -1,6 +1,7 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
#include "common/heap_array.h"
|
#include "common/heap_array.h"
|
||||||
#include "gpu.h"
|
#include "gpu.h"
|
||||||
|
#include "gpu_sw_backend.h"
|
||||||
#include "host_display.h"
|
#include "host_display.h"
|
||||||
#include <array>
|
#include <array>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
@ -18,37 +19,14 @@ public:
|
||||||
|
|
||||||
bool Initialize(HostDisplay* host_display) override;
|
bool Initialize(HostDisplay* host_display) override;
|
||||||
void Reset() override;
|
void Reset() override;
|
||||||
|
void UpdateSettings() override;
|
||||||
ALWAYS_INLINE_RELEASE u16 GetPixel(const u32 x, const u32 y) const { return m_vram[VRAM_WIDTH * y + x]; }
|
|
||||||
ALWAYS_INLINE_RELEASE const u16* GetPixelPtr(const u32 x, const u32 y) const { return &m_vram[VRAM_WIDTH * y + x]; }
|
|
||||||
ALWAYS_INLINE_RELEASE u16* GetPixelPtr(const u32 x, const u32 y) { return &m_vram[VRAM_WIDTH * y + x]; }
|
|
||||||
ALWAYS_INLINE_RELEASE void SetPixel(const u32 x, const u32 y, const u16 value) { m_vram[VRAM_WIDTH * y + x] = value; }
|
|
||||||
|
|
||||||
// this is actually (31 * 255) >> 4) == 494, but to simplify addressing we use the next power of two (512)
|
|
||||||
static constexpr u32 DITHER_LUT_SIZE = 512;
|
|
||||||
using DitherLUT = std::array<std::array<std::array<u8, 512>, DITHER_MATRIX_SIZE>, DITHER_MATRIX_SIZE>;
|
|
||||||
static constexpr DitherLUT ComputeDitherLUT();
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
struct SWVertex
|
void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override;
|
||||||
{
|
void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override;
|
||||||
s32 x, y;
|
void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) override;
|
||||||
u8 r, g, b;
|
void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override;
|
||||||
u8 u, v;
|
|
||||||
|
|
||||||
ALWAYS_INLINE void SetPosition(GPUVertexPosition p, s32 offset_x, s32 offset_y)
|
|
||||||
{
|
|
||||||
x = TruncateGPUVertexPosition(offset_x + p.x);
|
|
||||||
y = TruncateGPUVertexPosition(offset_y + p.y);
|
|
||||||
}
|
|
||||||
|
|
||||||
ALWAYS_INLINE void SetColorRGB24(u32 color) { std::tie(r, g, b) = UnpackColorRGB24(color); }
|
|
||||||
ALWAYS_INLINE void SetTexcoord(u16 value) { std::tie(u, v) = UnpackTexcoord(value); }
|
|
||||||
};
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
|
||||||
// Scanout
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
|
||||||
template<HostDisplayPixelFormat display_format>
|
template<HostDisplayPixelFormat display_format>
|
||||||
void CopyOut15Bit(u32 src_x, u32 src_y, u32 width, u32 height, u32 field, bool interlaced, bool interleaved);
|
void CopyOut15Bit(u32 src_x, u32 src_y, u32 width, u32 height, u32 field, bool interlaced, bool interleaved);
|
||||||
void CopyOut15Bit(HostDisplayPixelFormat display_format, u32 src_x, u32 src_y, u32 width, u32 height, u32 field,
|
void CopyOut15Bit(HostDisplayPixelFormat display_format, u32 src_x, u32 src_y, u32 width, u32 height, u32 field,
|
||||||
|
@ -63,71 +41,14 @@ protected:
|
||||||
void ClearDisplay() override;
|
void ClearDisplay() override;
|
||||||
void UpdateDisplay() override;
|
void UpdateDisplay() override;
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
|
||||||
// Rasterization
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
void DispatchRenderCommand() override;
|
void DispatchRenderCommand() override;
|
||||||
|
|
||||||
template<bool texture_enable, bool raw_texture_enable, bool transparency_enable, bool dithering_enable>
|
void FillBackendCommandParameters(GPUBackendCommand* cmd);
|
||||||
void ShadePixel(u32 x, u32 y, u8 color_r, u8 color_g, u8 color_b, u8 texcoord_x, u8 texcoord_y);
|
void FillDrawCommand(GPUBackendDrawCommand* cmd, GPURenderCommand rc);
|
||||||
|
|
||||||
template<bool texture_enable, bool raw_texture_enable, bool transparency_enable>
|
|
||||||
void DrawRectangle(s32 origin_x, s32 origin_y, u32 width, u32 height, u8 r, u8 g, u8 b, u8 origin_texcoord_x,
|
|
||||||
u8 origin_texcoord_y);
|
|
||||||
|
|
||||||
using DrawRectangleFunction = void (GPU_SW::*)(s32 origin_x, s32 origin_y, u32 width, u32 height, u8 r, u8 g, u8 b,
|
|
||||||
u8 origin_texcoord_x, u8 origin_texcoord_y);
|
|
||||||
DrawRectangleFunction GetDrawRectangleFunction(bool texture_enable, bool raw_texture_enable,
|
|
||||||
bool transparency_enable);
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
|
||||||
// Polygon and line rasterization ported from Mednafen
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
|
||||||
struct i_deltas
|
|
||||||
{
|
|
||||||
u32 du_dx, dv_dx;
|
|
||||||
u32 dr_dx, dg_dx, db_dx;
|
|
||||||
|
|
||||||
u32 du_dy, dv_dy;
|
|
||||||
u32 dr_dy, dg_dy, db_dy;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct i_group
|
|
||||||
{
|
|
||||||
u32 u, v;
|
|
||||||
u32 r, g, b;
|
|
||||||
};
|
|
||||||
|
|
||||||
template<bool shading_enable, bool texture_enable>
|
|
||||||
bool CalcIDeltas(i_deltas& idl, const SWVertex* A, const SWVertex* B, const SWVertex* C);
|
|
||||||
|
|
||||||
template<bool shading_enable, bool texture_enable>
|
|
||||||
void AddIDeltas_DX(i_group& ig, const i_deltas& idl, u32 count = 1);
|
|
||||||
|
|
||||||
template<bool shading_enable, bool texture_enable>
|
|
||||||
void AddIDeltas_DY(i_group& ig, const i_deltas& idl, u32 count = 1);
|
|
||||||
|
|
||||||
template<bool shading_enable, bool texture_enable, bool raw_texture_enable, bool transparency_enable,
|
|
||||||
bool dithering_enable>
|
|
||||||
void DrawSpan(s32 y, s32 x_start, s32 x_bound, i_group ig, const i_deltas& idl);
|
|
||||||
|
|
||||||
template<bool shading_enable, bool texture_enable, bool raw_texture_enable, bool transparency_enable,
|
|
||||||
bool dithering_enable>
|
|
||||||
void DrawTriangle(const SWVertex* v0, const SWVertex* v1, const SWVertex* v2);
|
|
||||||
|
|
||||||
using DrawTriangleFunction = void (GPU_SW::*)(const SWVertex* v0, const SWVertex* v1, const SWVertex* v2);
|
|
||||||
DrawTriangleFunction GetDrawTriangleFunction(bool shading_enable, bool texture_enable, bool raw_texture_enable,
|
|
||||||
bool transparency_enable, bool dithering_enable);
|
|
||||||
|
|
||||||
template<bool shading_enable, bool transparency_enable, bool dithering_enable>
|
|
||||||
void DrawLine(const SWVertex* p0, const SWVertex* p1);
|
|
||||||
|
|
||||||
using DrawLineFunction = void (GPU_SW::*)(const SWVertex* p0, const SWVertex* p1);
|
|
||||||
DrawLineFunction GetDrawLineFunction(bool shading_enable, bool transparency_enable, bool dithering_enable);
|
|
||||||
|
|
||||||
std::array<u16, VRAM_WIDTH * VRAM_HEIGHT> m_vram;
|
|
||||||
HeapArray<u8, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u32)> m_display_texture_buffer;
|
HeapArray<u8, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u32)> m_display_texture_buffer;
|
||||||
HostDisplayPixelFormat m_16bit_display_format = HostDisplayPixelFormat::RGB565;
|
HostDisplayPixelFormat m_16bit_display_format = HostDisplayPixelFormat::RGB565;
|
||||||
HostDisplayPixelFormat m_24bit_display_format = HostDisplayPixelFormat::RGBA8;
|
HostDisplayPixelFormat m_24bit_display_format = HostDisplayPixelFormat::RGBA8;
|
||||||
|
|
||||||
|
GPU_SW_Backend m_backend;
|
||||||
};
|
};
|
||||||
|
|
928
src/core/gpu_sw_backend.cpp
Normal file
928
src/core/gpu_sw_backend.cpp
Normal file
|
@ -0,0 +1,928 @@
|
||||||
|
#include "gpu_sw_backend.h"
|
||||||
|
#include "common/assert.h"
|
||||||
|
#include "common/log.h"
|
||||||
|
#include "gpu_sw_backend.h"
|
||||||
|
#include "host_display.h"
|
||||||
|
#include "system.h"
|
||||||
|
#include <algorithm>
|
||||||
|
Log_SetChannel(GPU_SW_Backend);
|
||||||
|
|
||||||
|
GPU_SW_Backend::GPU_SW_Backend() : GPUBackend()
|
||||||
|
{
|
||||||
|
m_vram.fill(0);
|
||||||
|
m_vram_ptr = m_vram.data();
|
||||||
|
}
|
||||||
|
|
||||||
|
GPU_SW_Backend::~GPU_SW_Backend() = default;
|
||||||
|
|
||||||
|
bool GPU_SW_Backend::Initialize()
|
||||||
|
{
|
||||||
|
return GPUBackend::Initialize();
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPU_SW_Backend::Reset()
|
||||||
|
{
|
||||||
|
GPUBackend::Reset();
|
||||||
|
|
||||||
|
m_vram.fill(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPU_SW_Backend::DrawPolygon(const GPUBackendDrawPolygonCommand* cmd)
|
||||||
|
{
|
||||||
|
const GPURenderCommand rc{cmd->rc.bits};
|
||||||
|
const bool dithering_enable = rc.IsDitheringEnabled() && cmd->draw_mode.dither_enable;
|
||||||
|
|
||||||
|
const DrawTriangleFunction DrawFunction = GetDrawTriangleFunction(
|
||||||
|
rc.shading_enable, rc.texture_enable, rc.raw_texture_enable, rc.transparency_enable, dithering_enable);
|
||||||
|
|
||||||
|
(this->*DrawFunction)(cmd, &cmd->vertices[0], &cmd->vertices[1], &cmd->vertices[2]);
|
||||||
|
if (rc.quad_polygon)
|
||||||
|
(this->*DrawFunction)(cmd, &cmd->vertices[2], &cmd->vertices[1], &cmd->vertices[3]);
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPU_SW_Backend::DrawRectangle(const GPUBackendDrawRectangleCommand* cmd)
|
||||||
|
{
|
||||||
|
const GPURenderCommand rc{cmd->rc.bits};
|
||||||
|
const bool dithering_enable = rc.IsDitheringEnabled() && cmd->draw_mode.dither_enable;
|
||||||
|
|
||||||
|
const DrawRectangleFunction DrawFunction =
|
||||||
|
GetDrawRectangleFunction(rc.texture_enable, rc.raw_texture_enable, rc.transparency_enable);
|
||||||
|
|
||||||
|
(this->*DrawFunction)(cmd);
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPU_SW_Backend::DrawLine(const GPUBackendDrawLineCommand* cmd)
|
||||||
|
{
|
||||||
|
const DrawLineFunction DrawFunction =
|
||||||
|
GetDrawLineFunction(cmd->rc.shading_enable, cmd->rc.transparency_enable, cmd->IsDitheringEnabled());
|
||||||
|
|
||||||
|
for (u16 i = 1; i < cmd->num_vertices; i++)
|
||||||
|
(this->*DrawFunction)(cmd, &cmd->vertices[i - 1], &cmd->vertices[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr GPU_SW_Backend::DitherLUT GPU_SW_Backend::ComputeDitherLUT()
|
||||||
|
{
|
||||||
|
DitherLUT lut = {};
|
||||||
|
for (u32 i = 0; i < DITHER_MATRIX_SIZE; i++)
|
||||||
|
{
|
||||||
|
for (u32 j = 0; j < DITHER_MATRIX_SIZE; j++)
|
||||||
|
{
|
||||||
|
for (s32 value = 0; value < DITHER_LUT_SIZE; value++)
|
||||||
|
{
|
||||||
|
const s32 dithered_value = (value + DITHER_MATRIX[i][j]) >> 3;
|
||||||
|
lut[i][j][value] = static_cast<u8>((dithered_value < 0) ? 0 : ((dithered_value > 31) ? 31 : dithered_value));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return lut;
|
||||||
|
}
|
||||||
|
|
||||||
|
static constexpr GPU_SW_Backend::DitherLUT s_dither_lut = GPU_SW_Backend::ComputeDitherLUT();
|
||||||
|
|
||||||
|
template<bool texture_enable, bool raw_texture_enable, bool transparency_enable, bool dithering_enable>
|
||||||
|
void ALWAYS_INLINE_RELEASE GPU_SW_Backend::ShadePixel(const GPUBackendDrawCommand* cmd, u32 x, u32 y, u8 color_r,
|
||||||
|
u8 color_g, u8 color_b, u8 texcoord_x, u8 texcoord_y)
|
||||||
|
{
|
||||||
|
VRAMPixel color;
|
||||||
|
bool transparent;
|
||||||
|
if constexpr (texture_enable)
|
||||||
|
{
|
||||||
|
// Apply texture window
|
||||||
|
// TODO: Precompute the second half
|
||||||
|
texcoord_x = (texcoord_x & cmd->window.and_x) | cmd->window.or_x;
|
||||||
|
texcoord_y = (texcoord_y & cmd->window.and_y) | cmd->window.or_y;
|
||||||
|
|
||||||
|
VRAMPixel texture_color;
|
||||||
|
switch (cmd->draw_mode.texture_mode)
|
||||||
|
{
|
||||||
|
case GPUTextureMode::Palette4Bit:
|
||||||
|
{
|
||||||
|
const u16 palette_value =
|
||||||
|
GetPixel((cmd->draw_mode.GetTexturePageBaseX() + ZeroExtend32(texcoord_x / 4)) % VRAM_WIDTH,
|
||||||
|
(cmd->draw_mode.GetTexturePageBaseY() + ZeroExtend32(texcoord_y)) % VRAM_HEIGHT);
|
||||||
|
const u16 palette_index = (palette_value >> ((texcoord_x % 4) * 4)) & 0x0Fu;
|
||||||
|
|
||||||
|
const u32 px = (cmd->palette.GetXBase() + ZeroExtend32(palette_index)) % VRAM_WIDTH;
|
||||||
|
const u32 py = cmd->palette.GetYBase();
|
||||||
|
texture_color.bits =
|
||||||
|
GetPixel((cmd->palette.GetXBase() + ZeroExtend32(palette_index)) % VRAM_WIDTH, cmd->palette.GetYBase());
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case GPUTextureMode::Palette8Bit:
|
||||||
|
{
|
||||||
|
const u16 palette_value =
|
||||||
|
GetPixel((cmd->draw_mode.GetTexturePageBaseX() + ZeroExtend32(texcoord_x / 2)) % VRAM_WIDTH,
|
||||||
|
(cmd->draw_mode.GetTexturePageBaseY() + ZeroExtend32(texcoord_y)) % VRAM_HEIGHT);
|
||||||
|
const u16 palette_index = (palette_value >> ((texcoord_x % 2) * 8)) & 0xFFu;
|
||||||
|
texture_color.bits =
|
||||||
|
GetPixel((cmd->palette.GetXBase() + ZeroExtend32(palette_index)) % VRAM_WIDTH, cmd->palette.GetYBase());
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
texture_color.bits = GetPixel((cmd->draw_mode.GetTexturePageBaseX() + ZeroExtend32(texcoord_x)) % VRAM_WIDTH,
|
||||||
|
(cmd->draw_mode.GetTexturePageBaseY() + ZeroExtend32(texcoord_y)) % VRAM_HEIGHT);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (texture_color.bits == 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
transparent = texture_color.c;
|
||||||
|
|
||||||
|
if constexpr (raw_texture_enable)
|
||||||
|
{
|
||||||
|
color.bits = texture_color.bits;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const u32 dither_y = (dithering_enable) ? (y & 3u) : 2u;
|
||||||
|
const u32 dither_x = (dithering_enable) ? (x & 3u) : 3u;
|
||||||
|
|
||||||
|
color.bits = (ZeroExtend16(s_dither_lut[dither_y][dither_x][(u16(texture_color.r) * u16(color_r)) >> 4]) << 0) |
|
||||||
|
(ZeroExtend16(s_dither_lut[dither_y][dither_x][(u16(texture_color.g) * u16(color_g)) >> 4]) << 5) |
|
||||||
|
(ZeroExtend16(s_dither_lut[dither_y][dither_x][(u16(texture_color.b) * u16(color_b)) >> 4]) << 10) |
|
||||||
|
(texture_color.bits & 0x8000u);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
transparent = true;
|
||||||
|
|
||||||
|
const u32 dither_y = (dithering_enable) ? (y & 3u) : 2u;
|
||||||
|
const u32 dither_x = (dithering_enable) ? (x & 3u) : 3u;
|
||||||
|
|
||||||
|
color.bits = (ZeroExtend16(s_dither_lut[dither_y][dither_x][color_r]) << 0) |
|
||||||
|
(ZeroExtend16(s_dither_lut[dither_y][dither_x][color_g]) << 5) |
|
||||||
|
(ZeroExtend16(s_dither_lut[dither_y][dither_x][color_b]) << 10);
|
||||||
|
}
|
||||||
|
|
||||||
|
const VRAMPixel bg_color{GetPixel(static_cast<u32>(x), static_cast<u32>(y))};
|
||||||
|
if constexpr (transparency_enable)
|
||||||
|
{
|
||||||
|
if (transparent)
|
||||||
|
{
|
||||||
|
#define BLEND_AVERAGE(bg, fg) Truncate8(std::min<u32>((ZeroExtend32(bg) / 2) + (ZeroExtend32(fg) / 2), 0x1F))
|
||||||
|
#define BLEND_ADD(bg, fg) Truncate8(std::min<u32>(ZeroExtend32(bg) + ZeroExtend32(fg), 0x1F))
|
||||||
|
#define BLEND_SUBTRACT(bg, fg) Truncate8((bg > fg) ? ((bg) - (fg)) : 0)
|
||||||
|
#define BLEND_QUARTER(bg, fg) Truncate8(std::min<u32>(ZeroExtend32(bg) + ZeroExtend32(fg / 4), 0x1F))
|
||||||
|
|
||||||
|
#define BLEND_RGB(func) \
|
||||||
|
color.Set(func(bg_color.r.GetValue(), color.r.GetValue()), func(bg_color.g.GetValue(), color.g.GetValue()), \
|
||||||
|
func(bg_color.b.GetValue(), color.b.GetValue()), color.c.GetValue())
|
||||||
|
|
||||||
|
switch (cmd->draw_mode.transparency_mode)
|
||||||
|
{
|
||||||
|
case GPUTransparencyMode::HalfBackgroundPlusHalfForeground:
|
||||||
|
BLEND_RGB(BLEND_AVERAGE);
|
||||||
|
break;
|
||||||
|
case GPUTransparencyMode::BackgroundPlusForeground:
|
||||||
|
BLEND_RGB(BLEND_ADD);
|
||||||
|
break;
|
||||||
|
case GPUTransparencyMode::BackgroundMinusForeground:
|
||||||
|
BLEND_RGB(BLEND_SUBTRACT);
|
||||||
|
break;
|
||||||
|
case GPUTransparencyMode::BackgroundPlusQuarterForeground:
|
||||||
|
BLEND_RGB(BLEND_QUARTER);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef BLEND_RGB
|
||||||
|
|
||||||
|
#undef BLEND_QUARTER
|
||||||
|
#undef BLEND_SUBTRACT
|
||||||
|
#undef BLEND_ADD
|
||||||
|
#undef BLEND_AVERAGE
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
UNREFERENCED_VARIABLE(transparent);
|
||||||
|
}
|
||||||
|
|
||||||
|
const u16 mask_and = cmd->params.GetMaskAND();
|
||||||
|
if ((bg_color.bits & mask_and) != 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
SetPixel(static_cast<u32>(x), static_cast<u32>(y), color.bits | cmd->params.GetMaskOR());
|
||||||
|
}
|
||||||
|
|
||||||
|
template<bool texture_enable, bool raw_texture_enable, bool transparency_enable>
|
||||||
|
void GPU_SW_Backend::DrawRectangle(const GPUBackendDrawRectangleCommand* cmd)
|
||||||
|
{
|
||||||
|
const s32 origin_x = cmd->x;
|
||||||
|
const s32 origin_y = cmd->y;
|
||||||
|
const auto [r, g, b] = UnpackColorRGB24(cmd->color);
|
||||||
|
const auto [origin_texcoord_x, origin_texcoord_y] = UnpackTexcoord(cmd->texcoord);
|
||||||
|
|
||||||
|
for (u32 offset_y = 0; offset_y < cmd->height; offset_y++)
|
||||||
|
{
|
||||||
|
const s32 y = origin_y + static_cast<s32>(offset_y);
|
||||||
|
if (y < static_cast<s32>(m_drawing_area.top) || y > static_cast<s32>(m_drawing_area.bottom) ||
|
||||||
|
(cmd->params.interlaced_rendering && cmd->params.active_line_lsb == (Truncate8(static_cast<u32>(y)) & 1u)))
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const u8 texcoord_y = Truncate8(ZeroExtend32(origin_texcoord_y) + offset_y);
|
||||||
|
|
||||||
|
for (u32 offset_x = 0; offset_x < cmd->width; offset_x++)
|
||||||
|
{
|
||||||
|
const s32 x = origin_x + static_cast<s32>(offset_x);
|
||||||
|
if (x < static_cast<s32>(m_drawing_area.left) || x > static_cast<s32>(m_drawing_area.right))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
const u8 texcoord_x = Truncate8(ZeroExtend32(origin_texcoord_x) + offset_x);
|
||||||
|
|
||||||
|
ShadePixel<texture_enable, raw_texture_enable, transparency_enable, false>(
|
||||||
|
cmd, static_cast<u32>(x), static_cast<u32>(y), r, g, b, texcoord_x, texcoord_y);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
// Polygon and line rasterization ported from Mednafen
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
#define COORD_FBS 12
|
||||||
|
#define COORD_MF_INT(n) ((n) << COORD_FBS)
|
||||||
|
#define COORD_POST_PADDING 12
|
||||||
|
|
||||||
|
static ALWAYS_INLINE_RELEASE s64 MakePolyXFP(s32 x)
|
||||||
|
{
|
||||||
|
return ((u64)x << 32) + ((1ULL << 32) - (1 << 11));
|
||||||
|
}
|
||||||
|
|
||||||
|
static ALWAYS_INLINE_RELEASE s64 MakePolyXFPStep(s32 dx, s32 dy)
|
||||||
|
{
|
||||||
|
s64 ret;
|
||||||
|
s64 dx_ex = (u64)dx << 32;
|
||||||
|
|
||||||
|
if (dx_ex < 0)
|
||||||
|
dx_ex -= dy - 1;
|
||||||
|
|
||||||
|
if (dx_ex > 0)
|
||||||
|
dx_ex += dy - 1;
|
||||||
|
|
||||||
|
ret = dx_ex / dy;
|
||||||
|
|
||||||
|
return (ret);
|
||||||
|
}
|
||||||
|
|
||||||
|
static ALWAYS_INLINE_RELEASE s32 GetPolyXFP_Int(s64 xfp)
|
||||||
|
{
|
||||||
|
return (xfp >> 32);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<bool shading_enable, bool texture_enable>
|
||||||
|
bool ALWAYS_INLINE_RELEASE GPU_SW_Backend::CalcIDeltas(i_deltas& idl, const GPUBackendDrawPolygonCommand::Vertex* A,
|
||||||
|
const GPUBackendDrawPolygonCommand::Vertex* B,
|
||||||
|
const GPUBackendDrawPolygonCommand::Vertex* C)
|
||||||
|
{
|
||||||
|
#define CALCIS(x, y) (((B->x - A->x) * (C->y - B->y)) - ((C->x - B->x) * (B->y - A->y)))
|
||||||
|
|
||||||
|
s32 denom = CALCIS(x, y);
|
||||||
|
|
||||||
|
if (!denom)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if constexpr (shading_enable)
|
||||||
|
{
|
||||||
|
idl.dr_dx = (u32)(CALCIS(r, y) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
|
||||||
|
idl.dr_dy = (u32)(CALCIS(x, r) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
|
||||||
|
|
||||||
|
idl.dg_dx = (u32)(CALCIS(g, y) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
|
||||||
|
idl.dg_dy = (u32)(CALCIS(x, g) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
|
||||||
|
|
||||||
|
idl.db_dx = (u32)(CALCIS(b, y) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
|
||||||
|
idl.db_dy = (u32)(CALCIS(x, b) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
|
||||||
|
}
|
||||||
|
|
||||||
|
if constexpr (texture_enable)
|
||||||
|
{
|
||||||
|
idl.du_dx = (u32)(CALCIS(u, y) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
|
||||||
|
idl.du_dy = (u32)(CALCIS(x, u) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
|
||||||
|
|
||||||
|
idl.dv_dx = (u32)(CALCIS(v, y) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
|
||||||
|
idl.dv_dy = (u32)(CALCIS(x, v) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
|
||||||
|
#undef CALCIS
|
||||||
|
}
|
||||||
|
|
||||||
|
template<bool shading_enable, bool texture_enable>
|
||||||
|
void ALWAYS_INLINE_RELEASE GPU_SW_Backend::AddIDeltas_DX(i_group& ig, const i_deltas& idl, u32 count /*= 1*/)
|
||||||
|
{
|
||||||
|
if constexpr (shading_enable)
|
||||||
|
{
|
||||||
|
ig.r += idl.dr_dx * count;
|
||||||
|
ig.g += idl.dg_dx * count;
|
||||||
|
ig.b += idl.db_dx * count;
|
||||||
|
}
|
||||||
|
|
||||||
|
if constexpr (texture_enable)
|
||||||
|
{
|
||||||
|
ig.u += idl.du_dx * count;
|
||||||
|
ig.v += idl.dv_dx * count;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<bool shading_enable, bool texture_enable>
|
||||||
|
void ALWAYS_INLINE_RELEASE GPU_SW_Backend::AddIDeltas_DY(i_group& ig, const i_deltas& idl, u32 count /*= 1*/)
|
||||||
|
{
|
||||||
|
if constexpr (shading_enable)
|
||||||
|
{
|
||||||
|
ig.r += idl.dr_dy * count;
|
||||||
|
ig.g += idl.dg_dy * count;
|
||||||
|
ig.b += idl.db_dy * count;
|
||||||
|
}
|
||||||
|
|
||||||
|
if constexpr (texture_enable)
|
||||||
|
{
|
||||||
|
ig.u += idl.du_dy * count;
|
||||||
|
ig.v += idl.dv_dy * count;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<bool shading_enable, bool texture_enable, bool raw_texture_enable, bool transparency_enable,
|
||||||
|
bool dithering_enable>
|
||||||
|
void GPU_SW_Backend::DrawSpan(const GPUBackendDrawPolygonCommand* cmd, s32 y, s32 x_start, s32 x_bound, i_group ig,
|
||||||
|
const i_deltas& idl)
|
||||||
|
{
|
||||||
|
if (cmd->params.interlaced_rendering && cmd->params.active_line_lsb == (Truncate8(static_cast<u32>(y)) & 1u))
|
||||||
|
return;
|
||||||
|
|
||||||
|
s32 x_ig_adjust = x_start;
|
||||||
|
s32 w = x_bound - x_start;
|
||||||
|
s32 x = TruncateGPUVertexPosition(x_start);
|
||||||
|
|
||||||
|
if (x < static_cast<s32>(m_drawing_area.left))
|
||||||
|
{
|
||||||
|
s32 delta = static_cast<s32>(m_drawing_area.left) - x;
|
||||||
|
x_ig_adjust += delta;
|
||||||
|
x += delta;
|
||||||
|
w -= delta;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((x + w) > (static_cast<s32>(m_drawing_area.right) + 1))
|
||||||
|
w = static_cast<s32>(m_drawing_area.right) + 1 - x;
|
||||||
|
|
||||||
|
if (w <= 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
AddIDeltas_DX<shading_enable, texture_enable>(ig, idl, x_ig_adjust);
|
||||||
|
AddIDeltas_DY<shading_enable, texture_enable>(ig, idl, y);
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
const u32 r = ig.r >> (COORD_FBS + COORD_POST_PADDING);
|
||||||
|
const u32 g = ig.g >> (COORD_FBS + COORD_POST_PADDING);
|
||||||
|
const u32 b = ig.b >> (COORD_FBS + COORD_POST_PADDING);
|
||||||
|
const u32 u = ig.u >> (COORD_FBS + COORD_POST_PADDING);
|
||||||
|
const u32 v = ig.v >> (COORD_FBS + COORD_POST_PADDING);
|
||||||
|
|
||||||
|
ShadePixel<texture_enable, raw_texture_enable, transparency_enable, dithering_enable>(
|
||||||
|
cmd, static_cast<u32>(x), static_cast<u32>(y), Truncate8(r), Truncate8(g), Truncate8(b), Truncate8(u),
|
||||||
|
Truncate8(v));
|
||||||
|
|
||||||
|
x++;
|
||||||
|
AddIDeltas_DX<shading_enable, texture_enable>(ig, idl);
|
||||||
|
} while (--w > 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<bool shading_enable, bool texture_enable, bool raw_texture_enable, bool transparency_enable,
|
||||||
|
bool dithering_enable>
|
||||||
|
void GPU_SW_Backend::DrawTriangle(const GPUBackendDrawPolygonCommand* cmd,
|
||||||
|
const GPUBackendDrawPolygonCommand::Vertex* v0,
|
||||||
|
const GPUBackendDrawPolygonCommand::Vertex* v1,
|
||||||
|
const GPUBackendDrawPolygonCommand::Vertex* v2)
|
||||||
|
{
|
||||||
|
u32 core_vertex;
|
||||||
|
{
|
||||||
|
u32 cvtemp = 0;
|
||||||
|
|
||||||
|
if (v1->x <= v0->x)
|
||||||
|
{
|
||||||
|
if (v2->x <= v1->x)
|
||||||
|
cvtemp = (1 << 2);
|
||||||
|
else
|
||||||
|
cvtemp = (1 << 1);
|
||||||
|
}
|
||||||
|
else if (v2->x < v0->x)
|
||||||
|
cvtemp = (1 << 2);
|
||||||
|
else
|
||||||
|
cvtemp = (1 << 0);
|
||||||
|
|
||||||
|
if (v2->y < v1->y)
|
||||||
|
{
|
||||||
|
std::swap(v2, v1);
|
||||||
|
cvtemp = ((cvtemp >> 1) & 0x2) | ((cvtemp << 1) & 0x4) | (cvtemp & 0x1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (v1->y < v0->y)
|
||||||
|
{
|
||||||
|
std::swap(v1, v0);
|
||||||
|
cvtemp = ((cvtemp >> 1) & 0x1) | ((cvtemp << 1) & 0x2) | (cvtemp & 0x4);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (v2->y < v1->y)
|
||||||
|
{
|
||||||
|
std::swap(v2, v1);
|
||||||
|
cvtemp = ((cvtemp >> 1) & 0x2) | ((cvtemp << 1) & 0x4) | (cvtemp & 0x1);
|
||||||
|
}
|
||||||
|
|
||||||
|
core_vertex = cvtemp >> 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (v0->y == v2->y)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (static_cast<u32>(std::abs(v2->x - v0->x)) >= MAX_PRIMITIVE_WIDTH ||
|
||||||
|
static_cast<u32>(std::abs(v2->x - v1->x)) >= MAX_PRIMITIVE_WIDTH ||
|
||||||
|
static_cast<u32>(std::abs(v1->x - v0->x)) >= MAX_PRIMITIVE_WIDTH ||
|
||||||
|
static_cast<u32>(v2->y - v0->y) >= MAX_PRIMITIVE_HEIGHT)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
s64 base_coord = MakePolyXFP(v0->x);
|
||||||
|
s64 base_step = MakePolyXFPStep((v2->x - v0->x), (v2->y - v0->y));
|
||||||
|
s64 bound_coord_us;
|
||||||
|
s64 bound_coord_ls;
|
||||||
|
bool right_facing;
|
||||||
|
|
||||||
|
if (v1->y == v0->y)
|
||||||
|
{
|
||||||
|
bound_coord_us = 0;
|
||||||
|
right_facing = (bool)(v1->x > v0->x);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
bound_coord_us = MakePolyXFPStep((v1->x - v0->x), (v1->y - v0->y));
|
||||||
|
right_facing = (bool)(bound_coord_us > base_step);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (v2->y == v1->y)
|
||||||
|
bound_coord_ls = 0;
|
||||||
|
else
|
||||||
|
bound_coord_ls = MakePolyXFPStep((v2->x - v1->x), (v2->y - v1->y));
|
||||||
|
|
||||||
|
i_deltas idl;
|
||||||
|
if (!CalcIDeltas<shading_enable, texture_enable>(idl, v0, v1, v2))
|
||||||
|
return;
|
||||||
|
|
||||||
|
const GPUBackendDrawPolygonCommand::Vertex* vertices[3] = {v0, v1, v2};
|
||||||
|
|
||||||
|
i_group ig;
|
||||||
|
if constexpr (texture_enable)
|
||||||
|
{
|
||||||
|
ig.u = (COORD_MF_INT(vertices[core_vertex]->u) + (1 << (COORD_FBS - 1))) << COORD_POST_PADDING;
|
||||||
|
ig.v = (COORD_MF_INT(vertices[core_vertex]->v) + (1 << (COORD_FBS - 1))) << COORD_POST_PADDING;
|
||||||
|
}
|
||||||
|
|
||||||
|
ig.r = (COORD_MF_INT(vertices[core_vertex]->r) + (1 << (COORD_FBS - 1))) << COORD_POST_PADDING;
|
||||||
|
ig.g = (COORD_MF_INT(vertices[core_vertex]->g) + (1 << (COORD_FBS - 1))) << COORD_POST_PADDING;
|
||||||
|
ig.b = (COORD_MF_INT(vertices[core_vertex]->b) + (1 << (COORD_FBS - 1))) << COORD_POST_PADDING;
|
||||||
|
|
||||||
|
AddIDeltas_DX<shading_enable, texture_enable>(ig, idl, -vertices[core_vertex]->x);
|
||||||
|
AddIDeltas_DY<shading_enable, texture_enable>(ig, idl, -vertices[core_vertex]->y);
|
||||||
|
|
||||||
|
struct TriangleHalf
|
||||||
|
{
|
||||||
|
u64 x_coord[2];
|
||||||
|
u64 x_step[2];
|
||||||
|
|
||||||
|
s32 y_coord;
|
||||||
|
s32 y_bound;
|
||||||
|
|
||||||
|
bool dec_mode;
|
||||||
|
} tripart[2];
|
||||||
|
|
||||||
|
u32 vo = 0;
|
||||||
|
u32 vp = 0;
|
||||||
|
if (core_vertex != 0)
|
||||||
|
vo = 1;
|
||||||
|
if (core_vertex == 2)
|
||||||
|
vp = 3;
|
||||||
|
|
||||||
|
{
|
||||||
|
TriangleHalf* tp = &tripart[vo];
|
||||||
|
tp->y_coord = vertices[0 ^ vo]->y;
|
||||||
|
tp->y_bound = vertices[1 ^ vo]->y;
|
||||||
|
tp->x_coord[right_facing] = MakePolyXFP(vertices[0 ^ vo]->x);
|
||||||
|
tp->x_step[right_facing] = bound_coord_us;
|
||||||
|
tp->x_coord[!right_facing] = base_coord + ((vertices[vo]->y - vertices[0]->y) * base_step);
|
||||||
|
tp->x_step[!right_facing] = base_step;
|
||||||
|
tp->dec_mode = vo;
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
TriangleHalf* tp = &tripart[vo ^ 1];
|
||||||
|
tp->y_coord = vertices[1 ^ vp]->y;
|
||||||
|
tp->y_bound = vertices[2 ^ vp]->y;
|
||||||
|
tp->x_coord[right_facing] = MakePolyXFP(vertices[1 ^ vp]->x);
|
||||||
|
tp->x_step[right_facing] = bound_coord_ls;
|
||||||
|
tp->x_coord[!right_facing] =
|
||||||
|
base_coord + ((vertices[1 ^ vp]->y - vertices[0]->y) *
|
||||||
|
base_step); // base_coord + ((vertices[1].y - vertices[0].y) * base_step);
|
||||||
|
tp->x_step[!right_facing] = base_step;
|
||||||
|
tp->dec_mode = vp;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (u32 i = 0; i < 2; i++)
|
||||||
|
{
|
||||||
|
s32 yi = tripart[i].y_coord;
|
||||||
|
s32 yb = tripart[i].y_bound;
|
||||||
|
|
||||||
|
u64 lc = tripart[i].x_coord[0];
|
||||||
|
u64 ls = tripart[i].x_step[0];
|
||||||
|
|
||||||
|
u64 rc = tripart[i].x_coord[1];
|
||||||
|
u64 rs = tripart[i].x_step[1];
|
||||||
|
|
||||||
|
if (tripart[i].dec_mode)
|
||||||
|
{
|
||||||
|
while (yi > yb)
|
||||||
|
{
|
||||||
|
yi--;
|
||||||
|
lc -= ls;
|
||||||
|
rc -= rs;
|
||||||
|
|
||||||
|
s32 y = TruncateGPUVertexPosition(yi);
|
||||||
|
|
||||||
|
if (y < static_cast<s32>(m_drawing_area.top))
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (y > static_cast<s32>(m_drawing_area.bottom))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
DrawSpan<shading_enable, texture_enable, raw_texture_enable, transparency_enable, dithering_enable>(
|
||||||
|
cmd, yi, GetPolyXFP_Int(lc), GetPolyXFP_Int(rc), ig, idl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
while (yi < yb)
|
||||||
|
{
|
||||||
|
s32 y = TruncateGPUVertexPosition(yi);
|
||||||
|
|
||||||
|
if (y > static_cast<s32>(m_drawing_area.bottom))
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (y >= static_cast<s32>(m_drawing_area.top))
|
||||||
|
{
|
||||||
|
|
||||||
|
DrawSpan<shading_enable, texture_enable, raw_texture_enable, transparency_enable, dithering_enable>(
|
||||||
|
cmd, yi, GetPolyXFP_Int(lc), GetPolyXFP_Int(rc), ig, idl);
|
||||||
|
}
|
||||||
|
|
||||||
|
yi++;
|
||||||
|
lc += ls;
|
||||||
|
rc += rs;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
GPU_SW_Backend::DrawTriangleFunction GPU_SW_Backend::GetDrawTriangleFunction(bool shading_enable, bool texture_enable,
|
||||||
|
bool raw_texture_enable,
|
||||||
|
bool transparency_enable,
|
||||||
|
bool dithering_enable)
|
||||||
|
{
|
||||||
|
#define F(SHADING, TEXTURE, RAW_TEXTURE, TRANSPARENCY, DITHERING) \
|
||||||
|
&GPU_SW_Backend::DrawTriangle<SHADING, TEXTURE, RAW_TEXTURE, TRANSPARENCY, DITHERING>
|
||||||
|
|
||||||
|
static constexpr DrawTriangleFunction funcs[2][2][2][2][2] = {
|
||||||
|
{{{{F(false, false, false, false, false), F(false, false, false, false, true)},
|
||||||
|
{F(false, false, false, true, false), F(false, false, false, true, true)}},
|
||||||
|
{{F(false, false, true, false, false), F(false, false, true, false, true)},
|
||||||
|
{F(false, false, true, true, false), F(false, false, true, true, true)}}},
|
||||||
|
{{{F(false, true, false, false, false), F(false, true, false, false, true)},
|
||||||
|
{F(false, true, false, true, false), F(false, true, false, true, true)}},
|
||||||
|
{{F(false, true, true, false, false), F(false, true, true, false, true)},
|
||||||
|
{F(false, true, true, true, false), F(false, true, true, true, true)}}}},
|
||||||
|
{{{{F(true, false, false, false, false), F(true, false, false, false, true)},
|
||||||
|
{F(true, false, false, true, false), F(true, false, false, true, true)}},
|
||||||
|
{{F(true, false, true, false, false), F(true, false, true, false, true)},
|
||||||
|
{F(true, false, true, true, false), F(true, false, true, true, true)}}},
|
||||||
|
{{{F(true, true, false, false, false), F(true, true, false, false, true)},
|
||||||
|
{F(true, true, false, true, false), F(true, true, false, true, true)}},
|
||||||
|
{{F(true, true, true, false, false), F(true, true, true, false, true)},
|
||||||
|
{F(true, true, true, true, false), F(true, true, true, true, true)}}}}};
|
||||||
|
|
||||||
|
#undef F
|
||||||
|
|
||||||
|
return funcs[u8(shading_enable)][u8(texture_enable)][u8(raw_texture_enable)][u8(transparency_enable)]
|
||||||
|
[u8(dithering_enable)];
|
||||||
|
}
|
||||||
|
|
||||||
|
enum
|
||||||
|
{
|
||||||
|
Line_XY_FractBits = 32
|
||||||
|
};
|
||||||
|
enum
|
||||||
|
{
|
||||||
|
Line_RGB_FractBits = 12
|
||||||
|
};
|
||||||
|
|
||||||
|
struct line_fxp_coord
|
||||||
|
{
|
||||||
|
u64 x, y;
|
||||||
|
u32 r, g, b;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct line_fxp_step
|
||||||
|
{
|
||||||
|
s64 dx_dk, dy_dk;
|
||||||
|
s32 dr_dk, dg_dk, db_dk;
|
||||||
|
};
|
||||||
|
|
||||||
|
static ALWAYS_INLINE_RELEASE s64 LineDivide(s64 delta, s32 dk)
|
||||||
|
{
|
||||||
|
delta = (u64)delta << Line_XY_FractBits;
|
||||||
|
|
||||||
|
if (delta < 0)
|
||||||
|
delta -= dk - 1;
|
||||||
|
if (delta > 0)
|
||||||
|
delta += dk - 1;
|
||||||
|
|
||||||
|
return (delta / dk);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<bool shading_enable, bool transparency_enable, bool dithering_enable>
|
||||||
|
void GPU_SW_Backend::DrawLine(const GPUBackendDrawLineCommand* cmd, const GPUBackendDrawLineCommand::Vertex* p0,
|
||||||
|
const GPUBackendDrawLineCommand::Vertex* p1)
|
||||||
|
{
|
||||||
|
const s32 i_dx = std::abs(p1->x - p0->x);
|
||||||
|
const s32 i_dy = std::abs(p1->y - p0->y);
|
||||||
|
const s32 k = (i_dx > i_dy) ? i_dx : i_dy;
|
||||||
|
if (i_dx >= MAX_PRIMITIVE_WIDTH || i_dy >= MAX_PRIMITIVE_HEIGHT)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (p0->x >= p1->x && k > 0)
|
||||||
|
std::swap(p0, p1);
|
||||||
|
|
||||||
|
line_fxp_step step;
|
||||||
|
if (k == 0)
|
||||||
|
{
|
||||||
|
step.dx_dk = 0;
|
||||||
|
step.dy_dk = 0;
|
||||||
|
|
||||||
|
if constexpr (shading_enable)
|
||||||
|
{
|
||||||
|
step.dr_dk = 0;
|
||||||
|
step.dg_dk = 0;
|
||||||
|
step.db_dk = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
step.dx_dk = LineDivide(p1->x - p0->x, k);
|
||||||
|
step.dy_dk = LineDivide(p1->y - p0->y, k);
|
||||||
|
|
||||||
|
if constexpr (shading_enable)
|
||||||
|
{
|
||||||
|
step.dr_dk = (s32)((u32)(p1->r - p0->r) << Line_RGB_FractBits) / k;
|
||||||
|
step.dg_dk = (s32)((u32)(p1->g - p0->g) << Line_RGB_FractBits) / k;
|
||||||
|
step.db_dk = (s32)((u32)(p1->b - p0->b) << Line_RGB_FractBits) / k;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
line_fxp_coord cur_point;
|
||||||
|
cur_point.x = ((u64)p0->x << Line_XY_FractBits) | (1ULL << (Line_XY_FractBits - 1));
|
||||||
|
cur_point.y = ((u64)p0->y << Line_XY_FractBits) | (1ULL << (Line_XY_FractBits - 1));
|
||||||
|
|
||||||
|
cur_point.x -= 1024;
|
||||||
|
|
||||||
|
if (step.dy_dk < 0)
|
||||||
|
cur_point.y -= 1024;
|
||||||
|
|
||||||
|
if constexpr (shading_enable)
|
||||||
|
{
|
||||||
|
cur_point.r = (p0->r << Line_RGB_FractBits) | (1 << (Line_RGB_FractBits - 1));
|
||||||
|
cur_point.g = (p0->g << Line_RGB_FractBits) | (1 << (Line_RGB_FractBits - 1));
|
||||||
|
cur_point.b = (p0->b << Line_RGB_FractBits) | (1 << (Line_RGB_FractBits - 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (s32 i = 0; i <= k; i++)
|
||||||
|
{
|
||||||
|
// Sign extension is not necessary here for x and y, due to the maximum values that ClipX1 and ClipY1 can contain.
|
||||||
|
const s32 x = (cur_point.x >> Line_XY_FractBits) & 2047;
|
||||||
|
const s32 y = (cur_point.y >> Line_XY_FractBits) & 2047;
|
||||||
|
|
||||||
|
if ((!cmd->params.interlaced_rendering || cmd->params.active_line_lsb != (Truncate8(static_cast<u32>(y)) & 1u)) &&
|
||||||
|
x >= static_cast<s32>(m_drawing_area.left) && x <= static_cast<s32>(m_drawing_area.right) &&
|
||||||
|
y >= static_cast<s32>(m_drawing_area.top) && y <= static_cast<s32>(m_drawing_area.bottom))
|
||||||
|
{
|
||||||
|
const u8 r = shading_enable ? static_cast<u8>(cur_point.r >> Line_RGB_FractBits) : p0->r;
|
||||||
|
const u8 g = shading_enable ? static_cast<u8>(cur_point.g >> Line_RGB_FractBits) : p0->g;
|
||||||
|
const u8 b = shading_enable ? static_cast<u8>(cur_point.b >> Line_RGB_FractBits) : p0->b;
|
||||||
|
|
||||||
|
ShadePixel<false, false, transparency_enable, dithering_enable>(cmd, static_cast<u32>(x), static_cast<u32>(y), r,
|
||||||
|
g, b, 0, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
cur_point.x += step.dx_dk;
|
||||||
|
cur_point.y += step.dy_dk;
|
||||||
|
|
||||||
|
if constexpr (shading_enable)
|
||||||
|
{
|
||||||
|
cur_point.r += step.dr_dk;
|
||||||
|
cur_point.g += step.dg_dk;
|
||||||
|
cur_point.b += step.db_dk;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
GPU_SW_Backend::DrawLineFunction GPU_SW_Backend::GetDrawLineFunction(bool shading_enable, bool transparency_enable,
|
||||||
|
bool dithering_enable)
|
||||||
|
{
|
||||||
|
#define F(SHADING, TRANSPARENCY, DITHERING) &GPU_SW_Backend::DrawLine<SHADING, TRANSPARENCY, DITHERING>
|
||||||
|
|
||||||
|
static constexpr DrawLineFunction funcs[2][2][2] = {
|
||||||
|
{{F(false, false, false), F(false, false, true)}, {F(false, true, false), F(false, true, true)}},
|
||||||
|
{{F(true, false, false), F(true, false, true)}, {F(true, true, false), F(true, true, true)}}};
|
||||||
|
|
||||||
|
#undef F
|
||||||
|
|
||||||
|
return funcs[u8(shading_enable)][u8(transparency_enable)][u8(dithering_enable)];
|
||||||
|
}
|
||||||
|
|
||||||
|
GPU_SW_Backend::DrawRectangleFunction
|
||||||
|
GPU_SW_Backend::GetDrawRectangleFunction(bool texture_enable, bool raw_texture_enable, bool transparency_enable)
|
||||||
|
{
|
||||||
|
#define F(TEXTURE, RAW_TEXTURE, TRANSPARENCY) &GPU_SW_Backend::DrawRectangle<TEXTURE, RAW_TEXTURE, TRANSPARENCY>
|
||||||
|
|
||||||
|
static constexpr DrawRectangleFunction funcs[2][2][2] = {
|
||||||
|
{{F(false, false, false), F(false, false, true)}, {F(false, true, false), F(false, true, true)}},
|
||||||
|
{{F(true, false, false), F(true, false, true)}, {F(true, true, false), F(true, true, true)}}};
|
||||||
|
|
||||||
|
#undef F
|
||||||
|
|
||||||
|
return funcs[u8(texture_enable)][u8(raw_texture_enable)][u8(transparency_enable)];
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPU_SW_Backend::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params)
|
||||||
|
{
|
||||||
|
const u16 color16 = RGBA8888ToRGBA5551(color);
|
||||||
|
if ((x + width) <= VRAM_WIDTH && !params.interlaced_rendering)
|
||||||
|
{
|
||||||
|
for (u32 yoffs = 0; yoffs < height; yoffs++)
|
||||||
|
{
|
||||||
|
const u32 row = (y + yoffs) % VRAM_HEIGHT;
|
||||||
|
std::fill_n(&m_vram_ptr[row * VRAM_WIDTH + x], width, color16);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (params.interlaced_rendering)
|
||||||
|
{
|
||||||
|
// Hardware tests show that fills seem to break on the first two lines when the offset matches the displayed field.
|
||||||
|
const u32 active_field = params.active_line_lsb;
|
||||||
|
for (u32 yoffs = 0; yoffs < height; yoffs++)
|
||||||
|
{
|
||||||
|
const u32 row = (y + yoffs) % VRAM_HEIGHT;
|
||||||
|
if ((row & u32(1)) == active_field)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
u16* row_ptr = &m_vram_ptr[row * VRAM_WIDTH];
|
||||||
|
for (u32 xoffs = 0; xoffs < width; xoffs++)
|
||||||
|
{
|
||||||
|
const u32 col = (x + xoffs) % VRAM_WIDTH;
|
||||||
|
row_ptr[col] = color16;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (u32 yoffs = 0; yoffs < height; yoffs++)
|
||||||
|
{
|
||||||
|
const u32 row = (y + yoffs) % VRAM_HEIGHT;
|
||||||
|
u16* row_ptr = &m_vram_ptr[row * VRAM_WIDTH];
|
||||||
|
for (u32 xoffs = 0; xoffs < width; xoffs++)
|
||||||
|
{
|
||||||
|
const u32 col = (x + xoffs) % VRAM_WIDTH;
|
||||||
|
row_ptr[col] = color16;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPU_SW_Backend::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data,
|
||||||
|
GPUBackendCommandParameters params)
|
||||||
|
{
|
||||||
|
// Fast path when the copy is not oversized.
|
||||||
|
if ((x + width) <= VRAM_WIDTH && (y + height) <= VRAM_HEIGHT && !params.IsMaskingEnabled())
|
||||||
|
{
|
||||||
|
const u16* src_ptr = static_cast<const u16*>(data);
|
||||||
|
u16* dst_ptr = &m_vram_ptr[y * VRAM_WIDTH + x];
|
||||||
|
for (u32 yoffs = 0; yoffs < height; yoffs++)
|
||||||
|
{
|
||||||
|
std::copy_n(src_ptr, width, dst_ptr);
|
||||||
|
src_ptr += width;
|
||||||
|
dst_ptr += VRAM_WIDTH;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Slow path when we need to handle wrap-around.
|
||||||
|
const u16* src_ptr = static_cast<const u16*>(data);
|
||||||
|
const u16 mask_and = params.GetMaskAND();
|
||||||
|
const u16 mask_or = params.GetMaskOR();
|
||||||
|
|
||||||
|
for (u32 row = 0; row < height;)
|
||||||
|
{
|
||||||
|
u16* dst_row_ptr = &m_vram_ptr[((y + row++) % VRAM_HEIGHT) * VRAM_WIDTH];
|
||||||
|
for (u32 col = 0; col < width;)
|
||||||
|
{
|
||||||
|
// TODO: Handle unaligned reads...
|
||||||
|
u16* pixel_ptr = &dst_row_ptr[(x + col++) % VRAM_WIDTH];
|
||||||
|
if (((*pixel_ptr) & mask_and) == 0)
|
||||||
|
*pixel_ptr = *(src_ptr++) | mask_or;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPU_SW_Backend::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height,
|
||||||
|
GPUBackendCommandParameters params)
|
||||||
|
{
|
||||||
|
// Break up oversized copies. This behavior has not been verified on console.
|
||||||
|
if ((src_x + width) > VRAM_WIDTH || (dst_x + width) > VRAM_WIDTH)
|
||||||
|
{
|
||||||
|
u32 remaining_rows = height;
|
||||||
|
u32 current_src_y = src_y;
|
||||||
|
u32 current_dst_y = dst_y;
|
||||||
|
while (remaining_rows > 0)
|
||||||
|
{
|
||||||
|
const u32 rows_to_copy =
|
||||||
|
std::min<u32>(remaining_rows, std::min<u32>(VRAM_HEIGHT - current_src_y, VRAM_HEIGHT - current_dst_y));
|
||||||
|
|
||||||
|
u32 remaining_columns = width;
|
||||||
|
u32 current_src_x = src_x;
|
||||||
|
u32 current_dst_x = dst_x;
|
||||||
|
while (remaining_columns > 0)
|
||||||
|
{
|
||||||
|
const u32 columns_to_copy =
|
||||||
|
std::min<u32>(remaining_columns, std::min<u32>(VRAM_WIDTH - current_src_x, VRAM_WIDTH - current_dst_x));
|
||||||
|
CopyVRAM(current_src_x, current_src_y, current_dst_x, current_dst_y, columns_to_copy, rows_to_copy, params);
|
||||||
|
current_src_x = (current_src_x + columns_to_copy) % VRAM_WIDTH;
|
||||||
|
current_dst_x = (current_dst_x + columns_to_copy) % VRAM_WIDTH;
|
||||||
|
remaining_columns -= columns_to_copy;
|
||||||
|
}
|
||||||
|
|
||||||
|
current_src_y = (current_src_y + rows_to_copy) % VRAM_HEIGHT;
|
||||||
|
current_dst_y = (current_dst_y + rows_to_copy) % VRAM_HEIGHT;
|
||||||
|
remaining_rows -= rows_to_copy;
|
||||||
|
}
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// This doesn't have a fast path, but do we really need one? It's not common.
|
||||||
|
const u16 mask_and = params.GetMaskAND();
|
||||||
|
const u16 mask_or = params.GetMaskOR();
|
||||||
|
|
||||||
|
// Copy in reverse when src_x < dst_x, this is verified on console.
|
||||||
|
if (src_x < dst_x || ((src_x + width - 1) % VRAM_WIDTH) < ((dst_x + width - 1) % VRAM_WIDTH))
|
||||||
|
{
|
||||||
|
for (u32 row = 0; row < height; row++)
|
||||||
|
{
|
||||||
|
const u16* src_row_ptr = &m_vram_ptr[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
|
||||||
|
u16* dst_row_ptr = &m_vram_ptr[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
|
||||||
|
|
||||||
|
for (s32 col = static_cast<s32>(width - 1); col >= 0; col--)
|
||||||
|
{
|
||||||
|
const u16 src_pixel = src_row_ptr[(src_x + static_cast<u32>(col)) % VRAM_WIDTH];
|
||||||
|
u16* dst_pixel_ptr = &dst_row_ptr[(dst_x + static_cast<u32>(col)) % VRAM_WIDTH];
|
||||||
|
if ((*dst_pixel_ptr & mask_and) == 0)
|
||||||
|
*dst_pixel_ptr = src_pixel | mask_or;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (u32 row = 0; row < height; row++)
|
||||||
|
{
|
||||||
|
const u16* src_row_ptr = &m_vram_ptr[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
|
||||||
|
u16* dst_row_ptr = &m_vram_ptr[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
|
||||||
|
|
||||||
|
for (u32 col = 0; col < width; col++)
|
||||||
|
{
|
||||||
|
const u16 src_pixel = src_row_ptr[(src_x + col) % VRAM_WIDTH];
|
||||||
|
u16* dst_pixel_ptr = &dst_row_ptr[(dst_x + col) % VRAM_WIDTH];
|
||||||
|
if ((*dst_pixel_ptr & mask_and) == 0)
|
||||||
|
*dst_pixel_ptr = src_pixel | mask_or;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPU_SW_Backend::FlushRender() {}
|
||||||
|
|
||||||
|
void GPU_SW_Backend::DrawingAreaChanged() {}
|
174
src/core/gpu_sw_backend.h
Normal file
174
src/core/gpu_sw_backend.h
Normal file
|
@ -0,0 +1,174 @@
|
||||||
|
#pragma once
|
||||||
|
#include "gpu_backend.h"
|
||||||
|
#include <array>
|
||||||
|
#include <memory>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
class GPU_SW_Backend final : public GPUBackend
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
GPU_SW_Backend();
|
||||||
|
~GPU_SW_Backend() override;
|
||||||
|
|
||||||
|
bool Initialize() override;
|
||||||
|
void Reset() override;
|
||||||
|
|
||||||
|
ALWAYS_INLINE_RELEASE u16 GetPixel(const u32 x, const u32 y) const { return m_vram[VRAM_WIDTH * y + x]; }
|
||||||
|
ALWAYS_INLINE_RELEASE const u16* GetPixelPtr(const u32 x, const u32 y) const { return &m_vram[VRAM_WIDTH * y + x]; }
|
||||||
|
ALWAYS_INLINE_RELEASE u16* GetPixelPtr(const u32 x, const u32 y) { return &m_vram[VRAM_WIDTH * y + x]; }
|
||||||
|
ALWAYS_INLINE_RELEASE void SetPixel(const u32 x, const u32 y, const u16 value) { m_vram[VRAM_WIDTH * y + x] = value; }
|
||||||
|
|
||||||
|
// this is actually (31 * 255) >> 4) == 494, but to simplify addressing we use the next power of two (512)
|
||||||
|
static constexpr u32 DITHER_LUT_SIZE = 512;
|
||||||
|
using DitherLUT = std::array<std::array<std::array<u8, 512>, DITHER_MATRIX_SIZE>, DITHER_MATRIX_SIZE>;
|
||||||
|
static constexpr DitherLUT ComputeDitherLUT();
|
||||||
|
|
||||||
|
protected:
|
||||||
|
static constexpr u8 Convert5To8(u8 x5) { return (x5 << 3) | (x5 & 7); }
|
||||||
|
static constexpr u8 Convert8To5(u8 x8) { return (x8 >> 3); }
|
||||||
|
|
||||||
|
union VRAMPixel
|
||||||
|
{
|
||||||
|
u16 bits;
|
||||||
|
|
||||||
|
BitField<u16, u8, 0, 5> r;
|
||||||
|
BitField<u16, u8, 5, 5> g;
|
||||||
|
BitField<u16, u8, 10, 5> b;
|
||||||
|
BitField<u16, bool, 15, 1> c;
|
||||||
|
|
||||||
|
u8 GetR8() const { return Convert5To8(r); }
|
||||||
|
u8 GetG8() const { return Convert5To8(g); }
|
||||||
|
u8 GetB8() const { return Convert5To8(b); }
|
||||||
|
|
||||||
|
void Set(u8 r_, u8 g_, u8 b_, bool c_ = false)
|
||||||
|
{
|
||||||
|
bits = (ZeroExtend16(r_)) | (ZeroExtend16(g_) << 5) | (ZeroExtend16(b_) << 10) | (static_cast<u16>(c_) << 15);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ClampAndSet(u8 r_, u8 g_, u8 b_, bool c_ = false)
|
||||||
|
{
|
||||||
|
Set(std::min<u8>(r_, 0x1F), std::min<u8>(g_, 0x1F), std::min<u8>(b_, 0x1F), c_);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetRGB24(u32 rgb24, bool c_ = false)
|
||||||
|
{
|
||||||
|
bits = Truncate16(((rgb24 >> 3) & 0x1F) | (((rgb24 >> 11) & 0x1F) << 5) | (((rgb24 >> 19) & 0x1F) << 10)) |
|
||||||
|
(static_cast<u16>(c_) << 15);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetRGB24(u8 r8, u8 g8, u8 b8, bool c_ = false)
|
||||||
|
{
|
||||||
|
bits = (ZeroExtend16(r8 >> 3)) | (ZeroExtend16(g8 >> 3) << 5) | (ZeroExtend16(b8 >> 3) << 10) |
|
||||||
|
(static_cast<u16>(c_) << 15);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetRGB24Dithered(u32 x, u32 y, u8 r8, u8 g8, u8 b8, bool c_ = false)
|
||||||
|
{
|
||||||
|
const s32 offset = DITHER_MATRIX[y & 3][x & 3];
|
||||||
|
r8 = static_cast<u8>(std::clamp<s32>(static_cast<s32>(ZeroExtend32(r8)) + offset, 0, 255));
|
||||||
|
g8 = static_cast<u8>(std::clamp<s32>(static_cast<s32>(ZeroExtend32(g8)) + offset, 0, 255));
|
||||||
|
b8 = static_cast<u8>(std::clamp<s32>(static_cast<s32>(ZeroExtend32(b8)) + offset, 0, 255));
|
||||||
|
SetRGB24(r8, g8, b8, c_);
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 ToRGB24() const
|
||||||
|
{
|
||||||
|
const u32 r_ = ZeroExtend32(r.GetValue());
|
||||||
|
const u32 g_ = ZeroExtend32(g.GetValue());
|
||||||
|
const u32 b_ = ZeroExtend32(b.GetValue());
|
||||||
|
|
||||||
|
return ((r_ << 3) | (r_ & 7)) | (((g_ << 3) | (g_ & 7)) << 8) | (((b_ << 3) | (b_ & 7)) << 16);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
static constexpr std::tuple<u8, u8> UnpackTexcoord(u16 texcoord)
|
||||||
|
{
|
||||||
|
return std::make_tuple(static_cast<u8>(texcoord), static_cast<u8>(texcoord >> 8));
|
||||||
|
}
|
||||||
|
|
||||||
|
static constexpr std::tuple<u8, u8, u8> UnpackColorRGB24(u32 rgb24)
|
||||||
|
{
|
||||||
|
return std::make_tuple(static_cast<u8>(rgb24), static_cast<u8>(rgb24 >> 8), static_cast<u8>(rgb24 >> 16));
|
||||||
|
}
|
||||||
|
|
||||||
|
void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) override;
|
||||||
|
void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, GPUBackendCommandParameters params) override;
|
||||||
|
void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height,
|
||||||
|
GPUBackendCommandParameters params) override;
|
||||||
|
|
||||||
|
void DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) override;
|
||||||
|
void DrawLine(const GPUBackendDrawLineCommand* cmd) override;
|
||||||
|
void DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) override;
|
||||||
|
void FlushRender() override;
|
||||||
|
void DrawingAreaChanged() override;
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
// Rasterization
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
template<bool texture_enable, bool raw_texture_enable, bool transparency_enable, bool dithering_enable>
|
||||||
|
void ShadePixel(const GPUBackendDrawCommand* cmd, u32 x, u32 y, u8 color_r, u8 color_g, u8 color_b, u8 texcoord_x,
|
||||||
|
u8 texcoord_y);
|
||||||
|
|
||||||
|
template<bool texture_enable, bool raw_texture_enable, bool transparency_enable>
|
||||||
|
void DrawRectangle(const GPUBackendDrawRectangleCommand* cmd);
|
||||||
|
|
||||||
|
using DrawRectangleFunction = void (GPU_SW_Backend::*)(const GPUBackendDrawRectangleCommand* cmd);
|
||||||
|
DrawRectangleFunction GetDrawRectangleFunction(bool texture_enable, bool raw_texture_enable,
|
||||||
|
bool transparency_enable);
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
// Polygon and line rasterization ported from Mednafen
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
struct i_deltas
|
||||||
|
{
|
||||||
|
u32 du_dx, dv_dx;
|
||||||
|
u32 dr_dx, dg_dx, db_dx;
|
||||||
|
|
||||||
|
u32 du_dy, dv_dy;
|
||||||
|
u32 dr_dy, dg_dy, db_dy;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct i_group
|
||||||
|
{
|
||||||
|
u32 u, v;
|
||||||
|
u32 r, g, b;
|
||||||
|
};
|
||||||
|
|
||||||
|
template<bool shading_enable, bool texture_enable>
|
||||||
|
bool CalcIDeltas(i_deltas& idl, const GPUBackendDrawPolygonCommand::Vertex* A,
|
||||||
|
const GPUBackendDrawPolygonCommand::Vertex* B, const GPUBackendDrawPolygonCommand::Vertex* C);
|
||||||
|
|
||||||
|
template<bool shading_enable, bool texture_enable>
|
||||||
|
void AddIDeltas_DX(i_group& ig, const i_deltas& idl, u32 count = 1);
|
||||||
|
|
||||||
|
template<bool shading_enable, bool texture_enable>
|
||||||
|
void AddIDeltas_DY(i_group& ig, const i_deltas& idl, u32 count = 1);
|
||||||
|
|
||||||
|
template<bool shading_enable, bool texture_enable, bool raw_texture_enable, bool transparency_enable,
|
||||||
|
bool dithering_enable>
|
||||||
|
void DrawSpan(const GPUBackendDrawPolygonCommand* cmd, s32 y, s32 x_start, s32 x_bound, i_group ig,
|
||||||
|
const i_deltas& idl);
|
||||||
|
|
||||||
|
template<bool shading_enable, bool texture_enable, bool raw_texture_enable, bool transparency_enable,
|
||||||
|
bool dithering_enable>
|
||||||
|
void DrawTriangle(const GPUBackendDrawPolygonCommand* cmd, const GPUBackendDrawPolygonCommand::Vertex* v0,
|
||||||
|
const GPUBackendDrawPolygonCommand::Vertex* v1, const GPUBackendDrawPolygonCommand::Vertex* v2);
|
||||||
|
|
||||||
|
using DrawTriangleFunction = void (GPU_SW_Backend::*)(const GPUBackendDrawPolygonCommand* cmd,
|
||||||
|
const GPUBackendDrawPolygonCommand::Vertex* v0,
|
||||||
|
const GPUBackendDrawPolygonCommand::Vertex* v1,
|
||||||
|
const GPUBackendDrawPolygonCommand::Vertex* v2);
|
||||||
|
DrawTriangleFunction GetDrawTriangleFunction(bool shading_enable, bool texture_enable, bool raw_texture_enable,
|
||||||
|
bool transparency_enable, bool dithering_enable);
|
||||||
|
|
||||||
|
template<bool shading_enable, bool transparency_enable, bool dithering_enable>
|
||||||
|
void DrawLine(const GPUBackendDrawLineCommand* cmd, const GPUBackendDrawLineCommand::Vertex* p0,
|
||||||
|
const GPUBackendDrawLineCommand::Vertex* p1);
|
||||||
|
|
||||||
|
using DrawLineFunction = void (GPU_SW_Backend::*)(const GPUBackendDrawLineCommand* cmd,
|
||||||
|
const GPUBackendDrawLineCommand::Vertex* p0,
|
||||||
|
const GPUBackendDrawLineCommand::Vertex* p1);
|
||||||
|
DrawLineFunction GetDrawLineFunction(bool shading_enable, bool transparency_enable, bool dithering_enable);
|
||||||
|
|
||||||
|
std::array<u16, VRAM_WIDTH * VRAM_HEIGHT> m_vram;
|
||||||
|
};
|
|
@ -222,3 +222,177 @@ static constexpr s32 DITHER_MATRIX[DITHER_MATRIX_SIZE][DITHER_MATRIX_SIZE] = { {
|
||||||
{+2, -2, +3, -1}, // row 1
|
{+2, -2, +3, -1}, // row 1
|
||||||
{-3, +1, -4, +0}, // row 2
|
{-3, +1, -4, +0}, // row 2
|
||||||
{+4, -1, +2, -2} }; // row 3
|
{+4, -1, +2, -2} }; // row 3
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#pragma warning(push)
|
||||||
|
#pragma warning(disable:4200) // warning C4200: nonstandard extension used: zero-sized array in struct/union
|
||||||
|
#endif
|
||||||
|
|
||||||
|
enum class GPUBackendCommandType : u8
|
||||||
|
{
|
||||||
|
Wraparound,
|
||||||
|
Sync,
|
||||||
|
FillVRAM,
|
||||||
|
UpdateVRAM,
|
||||||
|
CopyVRAM,
|
||||||
|
SetDrawingArea,
|
||||||
|
DrawPolygon,
|
||||||
|
DrawRectangle,
|
||||||
|
DrawLine
|
||||||
|
};
|
||||||
|
|
||||||
|
union GPUBackendCommandParameters
|
||||||
|
{
|
||||||
|
u8 bits;
|
||||||
|
|
||||||
|
BitField<u8, bool, 0, 1> interlaced_rendering;
|
||||||
|
|
||||||
|
/// Returns 0 if the currently-displayed field is on an even line in VRAM, otherwise 1.
|
||||||
|
BitField<u8, u8, 1, 1> active_line_lsb;
|
||||||
|
|
||||||
|
BitField<u8, bool, 2, 1> set_mask_while_drawing;
|
||||||
|
BitField<u8, bool, 3, 1> check_mask_before_draw;
|
||||||
|
|
||||||
|
ALWAYS_INLINE bool IsMaskingEnabled() const { return (bits & 12u) != 0u; }
|
||||||
|
|
||||||
|
// During transfer/render operations, if ((dst_pixel & mask_and) == 0) { pixel = src_pixel | mask_or }
|
||||||
|
u16 GetMaskAND() const
|
||||||
|
{
|
||||||
|
// return check_mask_before_draw ? 0x8000 : 0x0000;
|
||||||
|
return Truncate16((bits << 12) & 0x8000);
|
||||||
|
}
|
||||||
|
u16 GetMaskOR() const
|
||||||
|
{
|
||||||
|
// return set_mask_while_drawing ? 0x8000 : 0x0000;
|
||||||
|
return Truncate16((bits << 13) & 0x8000);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct GPUBackendCommand
|
||||||
|
{
|
||||||
|
GPUBackendCommandType type;
|
||||||
|
GPUBackendCommandParameters params;
|
||||||
|
u32 size;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct GPUBackendSyncCommand : public GPUBackendCommand
|
||||||
|
{
|
||||||
|
ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendSyncCommand); }
|
||||||
|
};
|
||||||
|
|
||||||
|
struct GPUBackendFillVRAMCommand : public GPUBackendCommand
|
||||||
|
{
|
||||||
|
u16 x;
|
||||||
|
u16 y;
|
||||||
|
u16 width;
|
||||||
|
u16 height;
|
||||||
|
u32 color;
|
||||||
|
|
||||||
|
ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendFillVRAMCommand); }
|
||||||
|
};
|
||||||
|
|
||||||
|
struct GPUBackendUpdateVRAMCommand : public GPUBackendCommand
|
||||||
|
{
|
||||||
|
u16 x;
|
||||||
|
u16 y;
|
||||||
|
u16 width;
|
||||||
|
u16 height;
|
||||||
|
u16 data[0];
|
||||||
|
|
||||||
|
ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendUpdateVRAMCommand) + (sizeof(u16) * width * height); }
|
||||||
|
};
|
||||||
|
|
||||||
|
struct GPUBackendCopyVRAMCommand : public GPUBackendCommand
|
||||||
|
{
|
||||||
|
u16 src_x;
|
||||||
|
u16 src_y;
|
||||||
|
u16 dst_x;
|
||||||
|
u16 dst_y;
|
||||||
|
u16 width;
|
||||||
|
u16 height;
|
||||||
|
|
||||||
|
ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendCopyVRAMCommand); }
|
||||||
|
};
|
||||||
|
|
||||||
|
struct GPUBackendSetDrawingAreaCommand : public GPUBackendCommand
|
||||||
|
{
|
||||||
|
Common::Rectangle<u32> new_area;
|
||||||
|
|
||||||
|
ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendSetDrawingAreaCommand); }
|
||||||
|
};
|
||||||
|
|
||||||
|
struct GPUBackendDrawCommand : public GPUBackendCommand
|
||||||
|
{
|
||||||
|
GPURenderCommand rc;
|
||||||
|
GPUDrawModeReg draw_mode;
|
||||||
|
GPUTexturePaletteReg palette;
|
||||||
|
GPUTextureWindow window;
|
||||||
|
|
||||||
|
ALWAYS_INLINE bool IsDitheringEnabled() const { return rc.IsDitheringEnabled() && draw_mode.dither_enable; }
|
||||||
|
};
|
||||||
|
|
||||||
|
struct GPUBackendDrawPolygonCommand : public GPUBackendDrawCommand
|
||||||
|
{
|
||||||
|
u16 num_vertices;
|
||||||
|
|
||||||
|
struct Vertex
|
||||||
|
{
|
||||||
|
s32 x, y;
|
||||||
|
union
|
||||||
|
{
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
u8 r, g, b, a;
|
||||||
|
};
|
||||||
|
u32 color;
|
||||||
|
};
|
||||||
|
union
|
||||||
|
{
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
u8 u, v;
|
||||||
|
};
|
||||||
|
u16 texcoord;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
Vertex vertices[0];
|
||||||
|
|
||||||
|
ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendDrawPolygonCommand) + sizeof(Vertex) * num_vertices; }
|
||||||
|
};
|
||||||
|
|
||||||
|
struct GPUBackendDrawRectangleCommand : public GPUBackendDrawCommand
|
||||||
|
{
|
||||||
|
s32 x, y;
|
||||||
|
u16 width, height;
|
||||||
|
u16 texcoord;
|
||||||
|
u32 color;
|
||||||
|
|
||||||
|
ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendDrawRectangleCommand); }
|
||||||
|
};
|
||||||
|
|
||||||
|
struct GPUBackendDrawLineCommand : public GPUBackendDrawCommand
|
||||||
|
{
|
||||||
|
u16 num_vertices;
|
||||||
|
|
||||||
|
struct Vertex
|
||||||
|
{
|
||||||
|
s32 x, y;
|
||||||
|
union
|
||||||
|
{
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
u8 r, g, b, a;
|
||||||
|
};
|
||||||
|
u32 color;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
Vertex vertices[0];
|
||||||
|
|
||||||
|
ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendDrawLineCommand) + sizeof(Vertex) * num_vertices; }
|
||||||
|
};
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#pragma warning(pop)
|
||||||
|
#endif
|
||||||
|
|
|
@ -432,6 +432,7 @@ void HostInterface::SetDefaultSettings(SettingsInterface& si)
|
||||||
si.SetIntValue("GPU", "Multisamples", 1);
|
si.SetIntValue("GPU", "Multisamples", 1);
|
||||||
si.SetBoolValue("GPU", "UseDebugDevice", false);
|
si.SetBoolValue("GPU", "UseDebugDevice", false);
|
||||||
si.SetBoolValue("GPU", "PerSampleShading", false);
|
si.SetBoolValue("GPU", "PerSampleShading", false);
|
||||||
|
si.SetBoolValue("GPU", "UseThread", true);
|
||||||
si.SetBoolValue("GPU", "TrueColor", false);
|
si.SetBoolValue("GPU", "TrueColor", false);
|
||||||
si.SetBoolValue("GPU", "ScaledDithering", true);
|
si.SetBoolValue("GPU", "ScaledDithering", true);
|
||||||
si.SetStringValue("GPU", "TextureFilter", Settings::GetTextureFilterName(Settings::DEFAULT_GPU_TEXTURE_FILTER));
|
si.SetStringValue("GPU", "TextureFilter", Settings::GetTextureFilterName(Settings::DEFAULT_GPU_TEXTURE_FILTER));
|
||||||
|
@ -629,6 +630,7 @@ void HostInterface::CheckForSettingsChanges(const Settings& old_settings)
|
||||||
if (g_settings.gpu_resolution_scale != old_settings.gpu_resolution_scale ||
|
if (g_settings.gpu_resolution_scale != old_settings.gpu_resolution_scale ||
|
||||||
g_settings.gpu_multisamples != old_settings.gpu_multisamples ||
|
g_settings.gpu_multisamples != old_settings.gpu_multisamples ||
|
||||||
g_settings.gpu_per_sample_shading != old_settings.gpu_per_sample_shading ||
|
g_settings.gpu_per_sample_shading != old_settings.gpu_per_sample_shading ||
|
||||||
|
g_settings.gpu_use_thread != old_settings.gpu_use_thread ||
|
||||||
g_settings.gpu_fifo_size != old_settings.gpu_fifo_size ||
|
g_settings.gpu_fifo_size != old_settings.gpu_fifo_size ||
|
||||||
g_settings.gpu_max_run_ahead != old_settings.gpu_max_run_ahead ||
|
g_settings.gpu_max_run_ahead != old_settings.gpu_max_run_ahead ||
|
||||||
g_settings.gpu_true_color != old_settings.gpu_true_color ||
|
g_settings.gpu_true_color != old_settings.gpu_true_color ||
|
||||||
|
|
|
@ -139,6 +139,7 @@ void Settings::Load(SettingsInterface& si)
|
||||||
gpu_multisamples = static_cast<u32>(si.GetIntValue("GPU", "Multisamples", 1));
|
gpu_multisamples = static_cast<u32>(si.GetIntValue("GPU", "Multisamples", 1));
|
||||||
gpu_use_debug_device = si.GetBoolValue("GPU", "UseDebugDevice", false);
|
gpu_use_debug_device = si.GetBoolValue("GPU", "UseDebugDevice", false);
|
||||||
gpu_per_sample_shading = si.GetBoolValue("GPU", "PerSampleShading", false);
|
gpu_per_sample_shading = si.GetBoolValue("GPU", "PerSampleShading", false);
|
||||||
|
gpu_use_thread = si.GetBoolValue("GPU", "UseThread", true);
|
||||||
gpu_true_color = si.GetBoolValue("GPU", "TrueColor", true);
|
gpu_true_color = si.GetBoolValue("GPU", "TrueColor", true);
|
||||||
gpu_scaled_dithering = si.GetBoolValue("GPU", "ScaledDithering", false);
|
gpu_scaled_dithering = si.GetBoolValue("GPU", "ScaledDithering", false);
|
||||||
gpu_texture_filter =
|
gpu_texture_filter =
|
||||||
|
@ -273,6 +274,7 @@ void Settings::Save(SettingsInterface& si) const
|
||||||
si.SetIntValue("GPU", "Multisamples", static_cast<long>(gpu_multisamples));
|
si.SetIntValue("GPU", "Multisamples", static_cast<long>(gpu_multisamples));
|
||||||
si.SetBoolValue("GPU", "UseDebugDevice", gpu_use_debug_device);
|
si.SetBoolValue("GPU", "UseDebugDevice", gpu_use_debug_device);
|
||||||
si.SetBoolValue("GPU", "PerSampleShading", gpu_per_sample_shading);
|
si.SetBoolValue("GPU", "PerSampleShading", gpu_per_sample_shading);
|
||||||
|
si.SetBoolValue("GPU", "UseThread", gpu_use_thread);
|
||||||
si.SetBoolValue("GPU", "TrueColor", gpu_true_color);
|
si.SetBoolValue("GPU", "TrueColor", gpu_true_color);
|
||||||
si.SetBoolValue("GPU", "ScaledDithering", gpu_scaled_dithering);
|
si.SetBoolValue("GPU", "ScaledDithering", gpu_scaled_dithering);
|
||||||
si.SetStringValue("GPU", "TextureFilter", GetTextureFilterName(gpu_texture_filter));
|
si.SetStringValue("GPU", "TextureFilter", GetTextureFilterName(gpu_texture_filter));
|
||||||
|
|
|
@ -94,6 +94,7 @@ struct Settings
|
||||||
std::string display_post_process_chain;
|
std::string display_post_process_chain;
|
||||||
u32 gpu_resolution_scale = 1;
|
u32 gpu_resolution_scale = 1;
|
||||||
u32 gpu_multisamples = 1;
|
u32 gpu_multisamples = 1;
|
||||||
|
bool gpu_use_thread = true;
|
||||||
bool gpu_use_debug_device = false;
|
bool gpu_use_debug_device = false;
|
||||||
bool gpu_per_sample_shading = false;
|
bool gpu_per_sample_shading = false;
|
||||||
bool gpu_true_color = true;
|
bool gpu_true_color = true;
|
||||||
|
|
|
@ -461,7 +461,7 @@ void LibretroHostInterface::OnSystemDestroyed()
|
||||||
m_using_hardware_renderer = false;
|
m_using_hardware_renderer = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::array<retro_core_option_definition, 44> s_option_definitions = {{
|
static std::array<retro_core_option_definition, 45> s_option_definitions = {{
|
||||||
{"duckstation_Console.Region",
|
{"duckstation_Console.Region",
|
||||||
"Console Region",
|
"Console Region",
|
||||||
"Determines which region/hardware to emulate. Auto-Detect will use the region of the disc inserted.",
|
"Determines which region/hardware to emulate. Auto-Detect will use the region of the disc inserted.",
|
||||||
|
@ -542,6 +542,12 @@ static std::array<retro_core_option_definition, 44> s_option_definitions = {{
|
||||||
"OpenGL"
|
"OpenGL"
|
||||||
#endif
|
#endif
|
||||||
},
|
},
|
||||||
|
{"duckstation_GPU.UseThread",
|
||||||
|
"Threaded Rendering (Software)",
|
||||||
|
"Uses a second thread for drawing graphics. Currently only available for the software renderer, but can provide a "
|
||||||
|
"significant speed improvement, and is safe to use.",
|
||||||
|
{{"true", "Enabled"}, {"false", "Disabled"}},
|
||||||
|
"true"},
|
||||||
{"duckstation_GPU.ResolutionScale",
|
{"duckstation_GPU.ResolutionScale",
|
||||||
"Internal Resolution Scale",
|
"Internal Resolution Scale",
|
||||||
"Scales internal VRAM resolution by the specified multiplier. Larger values are slower. Some games require "
|
"Scales internal VRAM resolution by the specified multiplier. Larger values are slower. Some games require "
|
||||||
|
@ -731,8 +737,7 @@ static std::array<retro_core_option_definition, 44> s_option_definitions = {{
|
||||||
"Controller 1 Analog Axis Scale",
|
"Controller 1 Analog Axis Scale",
|
||||||
"Sets the analog stick axis scaling factor.",
|
"Sets the analog stick axis scaling factor.",
|
||||||
{{"1.00f", "1.00"}, {"1.40f", "1.40"}},
|
{{"1.00f", "1.00"}, {"1.40f", "1.40"}},
|
||||||
"1.00f"
|
"1.00f"},
|
||||||
},
|
|
||||||
{"duckstation_Controller2.Type",
|
{"duckstation_Controller2.Type",
|
||||||
"Controller 2 Type",
|
"Controller 2 Type",
|
||||||
"Sets the type of controller for Slot 2.",
|
"Sets the type of controller for Slot 2.",
|
||||||
|
@ -753,12 +758,11 @@ static std::array<retro_core_option_definition, 44> s_option_definitions = {{
|
||||||
"Allows you to use the analog sticks to control the d-pad in digital mode, as well as the buttons.",
|
"Allows you to use the analog sticks to control the d-pad in digital mode, as well as the buttons.",
|
||||||
{{"true", "Enabled"}, {"false", "Disabled"}},
|
{{"true", "Enabled"}, {"false", "Disabled"}},
|
||||||
"false"},
|
"false"},
|
||||||
{"duckstation_Controller2.AxisScale",
|
{"duckstation_Controller2.AxisScale",
|
||||||
"Controller 2 Analog Axis Scale",
|
"Controller 2 Analog Axis Scale",
|
||||||
"Sets the analog stick axis scaling factor.",
|
"Sets the analog stick axis scaling factor.",
|
||||||
{{"1.00f", "1.00"}, {"1.40f", "1.40"}},
|
{{"1.00f", "1.00"}, {"1.40f", "1.40"}},
|
||||||
"1.00f"
|
"1.00f"},
|
||||||
},
|
|
||||||
{"duckstation_Display.ShowOSDMessages",
|
{"duckstation_Display.ShowOSDMessages",
|
||||||
"Display OSD Messages",
|
"Display OSD Messages",
|
||||||
"Shows on-screen messages generated by the core.",
|
"Shows on-screen messages generated by the core.",
|
||||||
|
|
|
@ -33,6 +33,7 @@ DisplaySettingsWidget::DisplaySettingsWidget(QtHostInterface* host_interface, QW
|
||||||
SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.displayIntegerScaling, "Display",
|
SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.displayIntegerScaling, "Display",
|
||||||
"IntegerScaling");
|
"IntegerScaling");
|
||||||
SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.vsync, "Display", "VSync");
|
SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.vsync, "Display", "VSync");
|
||||||
|
SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.gpuThread, "GPU", "UseThread");
|
||||||
SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.showOSDMessages, "Display", "ShowOSDMessages",
|
SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.showOSDMessages, "Display", "ShowOSDMessages",
|
||||||
true);
|
true);
|
||||||
SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.showFPS, "Display", "ShowFPS", false);
|
SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.showFPS, "Display", "ShowFPS", false);
|
||||||
|
@ -84,6 +85,9 @@ DisplaySettingsWidget::DisplaySettingsWidget(QtHostInterface* host_interface, QW
|
||||||
m_ui.vsync, tr("VSync"), tr("Checked"),
|
m_ui.vsync, tr("VSync"), tr("Checked"),
|
||||||
tr("Enable this option to match DuckStation's refresh rate with your current monitor or screen. "
|
tr("Enable this option to match DuckStation's refresh rate with your current monitor or screen. "
|
||||||
"VSync is automatically disabled when it is not possible (e.g. running at non-100% speed)."));
|
"VSync is automatically disabled when it is not possible (e.g. running at non-100% speed)."));
|
||||||
|
dialog->registerWidgetHelp(m_ui.gpuThread, tr("Threaded Rendering"), tr("Checked"),
|
||||||
|
tr("Uses a second thread for drawing graphics. Currently only available for the software "
|
||||||
|
"renderer, but can provide a significant speed improvement, and is safe to use."));
|
||||||
dialog->registerWidgetHelp(m_ui.showOSDMessages, tr("Show OSD Messages"), tr("Checked"),
|
dialog->registerWidgetHelp(m_ui.showOSDMessages, tr("Show OSD Messages"), tr("Checked"),
|
||||||
tr("Shows on-screen-display messages when events occur such as save states being "
|
tr("Shows on-screen-display messages when events occur such as save states being "
|
||||||
"created/loaded, screenshots being taken, etc."));
|
"created/loaded, screenshots being taken, etc."));
|
||||||
|
@ -124,6 +128,7 @@ void DisplaySettingsWidget::populateGPUAdaptersAndResolutions()
|
||||||
{
|
{
|
||||||
std::vector<std::string> adapter_names;
|
std::vector<std::string> adapter_names;
|
||||||
std::vector<std::string> fullscreen_modes;
|
std::vector<std::string> fullscreen_modes;
|
||||||
|
bool thread_supported = false;
|
||||||
switch (static_cast<GPURenderer>(m_ui.renderer->currentIndex()))
|
switch (static_cast<GPURenderer>(m_ui.renderer->currentIndex()))
|
||||||
{
|
{
|
||||||
#ifdef WIN32
|
#ifdef WIN32
|
||||||
|
@ -140,6 +145,10 @@ void DisplaySettingsWidget::populateGPUAdaptersAndResolutions()
|
||||||
adapter_names = FrontendCommon::VulkanHostDisplay::EnumerateAdapterNames();
|
adapter_names = FrontendCommon::VulkanHostDisplay::EnumerateAdapterNames();
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case GPURenderer::Software:
|
||||||
|
thread_supported = true;
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -184,6 +193,8 @@ void DisplaySettingsWidget::populateGPUAdaptersAndResolutions()
|
||||||
// disable it if we don't have a choice
|
// disable it if we don't have a choice
|
||||||
m_ui.fullscreenMode->setEnabled(!fullscreen_modes.empty());
|
m_ui.fullscreenMode->setEnabled(!fullscreen_modes.empty());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
m_ui.gpuThread->setEnabled(thread_supported);
|
||||||
}
|
}
|
||||||
|
|
||||||
void DisplaySettingsWidget::onGPUAdapterIndexChanged()
|
void DisplaySettingsWidget::onGPUAdapterIndexChanged()
|
||||||
|
|
|
@ -62,7 +62,14 @@
|
||||||
<item row="2" column="1">
|
<item row="2" column="1">
|
||||||
<widget class="QComboBox" name="fullscreenMode"/>
|
<widget class="QComboBox" name="fullscreenMode"/>
|
||||||
</item>
|
</item>
|
||||||
<item row="3" column="0">
|
<item row="3" column="0" colspan="2">
|
||||||
|
<widget class="QCheckBox" name="gpuThread">
|
||||||
|
<property name="text">
|
||||||
|
<string>Threaded Rendering</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item row="4" column="0" colspan="2">
|
||||||
<widget class="QCheckBox" name="vsync">
|
<widget class="QCheckBox" name="vsync">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>VSync</string>
|
<string>VSync</string>
|
||||||
|
|
|
@ -931,6 +931,8 @@ void SDLHostInterface::DrawQuickSettingsMenu()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
settings_changed |= ImGui::MenuItem("GPU on Thread", nullptr, &m_settings_copy.gpu_use_thread);
|
||||||
|
|
||||||
ImGui::EndMenu();
|
ImGui::EndMenu();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue