From 2595e31575c17663707d6a64c867364b23a2a045 Mon Sep 17 00:00:00 2001 From: Connor McLaughlin <stenzek@gmail.com> Date: Sat, 21 Nov 2020 13:32:58 +1000 Subject: [PATCH] GPU: Split software to frontend/backend --- src/core/CMakeLists.txt | 4 + src/core/core.vcxproj | 4 + src/core/core.vcxproj.filters | 4 + src/core/gpu.cpp | 16 +- src/core/gpu.h | 66 +- src/core/gpu_backend.cpp | 327 +++++ src/core/gpu_backend.h | 91 ++ src/core/gpu_hw.cpp | 8 +- src/core/gpu_sw.cpp | 1067 +++++------------ src/core/gpu_sw.h | 99 +- src/core/gpu_sw_backend.cpp | 928 ++++++++++++++ src/core/gpu_sw_backend.h | 174 +++ src/core/gpu_types.h | 174 +++ src/core/host_interface.cpp | 2 + src/core/settings.cpp | 2 + src/core/settings.h | 1 + .../libretro_host_interface.cpp | 16 +- src/duckstation-qt/displaysettingswidget.cpp | 11 + src/duckstation-qt/displaysettingswidget.ui | 9 +- src/duckstation-sdl/sdl_host_interface.cpp | 2 + 20 files changed, 2035 insertions(+), 970 deletions(-) create mode 100644 src/core/gpu_backend.cpp create mode 100644 src/core/gpu_backend.h create mode 100644 src/core/gpu_sw_backend.cpp create mode 100644 src/core/gpu_sw_backend.h diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 086a5a76a..5da789715 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -30,6 +30,8 @@ add_library(core dma.h gpu.cpp gpu.h + gpu_backend.cpp + gpu_backend.h gpu_commands.cpp gpu_hw.cpp gpu_hw.h @@ -41,6 +43,8 @@ add_library(core gpu_hw_vulkan.h gpu_sw.cpp gpu_sw.h + gpu_sw_backend.cpp + gpu_sw_backend.h gpu_types.h gte.cpp gte.h diff --git a/src/core/core.vcxproj b/src/core/core.vcxproj index bd9dce6df..a0fc019ce 100644 --- a/src/core/core.vcxproj +++ b/src/core/core.vcxproj @@ -115,11 +115,13 @@ </ClCompile> <ClCompile Include="cpu_types.cpp" /> <ClCompile Include="digital_controller.cpp" /> + <ClCompile Include="gpu_backend.cpp" /> <ClCompile Include="gpu_commands.cpp" /> <ClCompile Include="gpu_hw_d3d11.cpp" /> <ClCompile Include="gpu_hw_shadergen.cpp" /> <ClCompile Include="gpu_hw_vulkan.cpp" /> <ClCompile Include="gpu_sw.cpp" /> + <ClCompile Include="gpu_sw_backend.cpp" /> <ClCompile Include="gte.cpp" /> <ClCompile Include="dma.cpp" /> <ClCompile Include="gpu.cpp" /> @@ -185,10 +187,12 @@ <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild> </ClInclude> <ClInclude Include="digital_controller.h" /> + <ClInclude Include="gpu_backend.h" /> <ClInclude Include="gpu_hw_d3d11.h" /> <ClInclude Include="gpu_hw_shadergen.h" /> <ClInclude Include="gpu_hw_vulkan.h" /> <ClInclude Include="gpu_sw.h" /> + <ClInclude Include="gpu_sw_backend.h" /> <ClInclude Include="gpu_types.h" /> <ClInclude Include="gte.h" /> <ClInclude Include="cpu_types.h" /> diff --git a/src/core/core.vcxproj.filters b/src/core/core.vcxproj.filters index bd9fa7850..d2c2ce6d5 100644 --- a/src/core/core.vcxproj.filters +++ b/src/core/core.vcxproj.filters @@ -51,6 +51,8 @@ <ClCompile Include="memory_card_image.cpp" /> <ClCompile Include="analog_joystick.cpp" /> <ClCompile Include="cpu_recompiler_code_generator_aarch32.cpp" /> + <ClCompile Include="gpu_backend.cpp" /> + <ClCompile Include="gpu_sw_backend.cpp" /> </ItemGroup> <ItemGroup> <ClInclude Include="types.h" /> @@ -105,5 +107,7 @@ <ClInclude Include="memory_card_image.h" /> <ClInclude Include="analog_joystick.h" /> <ClInclude Include="gpu_types.h" /> + <ClInclude Include="gpu_backend.h" /> + <ClInclude Include="gpu_sw_backend.h" /> </ItemGroup> </Project> diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp index 05cb6ae99..c4f5e920c 100644 --- a/src/core/gpu.cpp +++ b/src/core/gpu.cpp @@ -129,10 +129,10 @@ bool GPU::DoState(StateWrapper& sw) sw.Do(&m_draw_mode.texture_page_y); sw.Do(&m_draw_mode.texture_palette_x); sw.Do(&m_draw_mode.texture_palette_y); - sw.Do(&m_draw_mode.texture_window_and_x); - sw.Do(&m_draw_mode.texture_window_and_y); - sw.Do(&m_draw_mode.texture_window_or_x); - sw.Do(&m_draw_mode.texture_window_or_y); + sw.Do(&m_draw_mode.texture_window.and_x); + sw.Do(&m_draw_mode.texture_window.and_y); + sw.Do(&m_draw_mode.texture_window.or_x); + sw.Do(&m_draw_mode.texture_window.or_y); sw.Do(&m_draw_mode.texture_x_flip); sw.Do(&m_draw_mode.texture_y_flip); @@ -1358,10 +1358,10 @@ void GPU::SetTextureWindow(u32 value) const u8 offset_y = Truncate8((value >> 15) & UINT32_C(0x1F)); Log_DebugPrintf("Set texture window %02X %02X %02X %02X", mask_x, mask_y, offset_x, offset_y); - m_draw_mode.texture_window_and_x = ~(mask_x * 8); - m_draw_mode.texture_window_and_y = ~(mask_y * 8); - m_draw_mode.texture_window_or_x = (offset_x & mask_x) * 8u; - m_draw_mode.texture_window_or_y = (offset_y & mask_y) * 8u; + m_draw_mode.texture_window.and_x = ~(mask_x * 8); + m_draw_mode.texture_window.and_y = ~(mask_y * 8); + m_draw_mode.texture_window.or_x = (offset_x & mask_x) * 8u; + m_draw_mode.texture_window.or_y = (offset_y & mask_y) * 8u; m_draw_mode.texture_window_value = value; m_draw_mode.texture_window_changed = true; } diff --git a/src/core/gpu.h b/src/core/gpu.h index 979318326..459b2fbff 100644 --- a/src/core/gpu.h +++ b/src/core/gpu.h @@ -159,9 +159,6 @@ protected: ALWAYS_INLINE static constexpr TickCount SystemTicksToGPUTicks(TickCount sysclk_ticks) { return sysclk_ticks << 1; } // Helper/format conversion functions. - static constexpr u8 Convert5To8(u8 x5) { return (x5 << 3) | (x5 & 7); } - static constexpr u8 Convert8To5(u8 x8) { return (x8 >> 3); } - static constexpr u32 RGBA5551ToRGBA8888(u16 color) { u8 r = Truncate8(color & 31); @@ -197,68 +194,10 @@ protected: { return std::make_tuple(static_cast<u8>(rgb24), static_cast<u8>(rgb24 >> 8), static_cast<u8>(rgb24 >> 16)); } - static constexpr u32 PackColorRGB24(u8 r, u8 g, u8 b) - { - return ZeroExtend32(r) | (ZeroExtend32(g) << 8) | (ZeroExtend32(b) << 16); - } static bool DumpVRAMToFile(const char* filename, u32 width, u32 height, u32 stride, const void* buffer, bool remove_alpha); - union VRAMPixel - { - u16 bits; - - BitField<u16, u8, 0, 5> r; - BitField<u16, u8, 5, 5> g; - BitField<u16, u8, 10, 5> b; - BitField<u16, bool, 15, 1> c; - - u8 GetR8() const { return Convert5To8(r); } - u8 GetG8() const { return Convert5To8(g); } - u8 GetB8() const { return Convert5To8(b); } - - void Set(u8 r_, u8 g_, u8 b_, bool c_ = false) - { - bits = (ZeroExtend16(r_)) | (ZeroExtend16(g_) << 5) | (ZeroExtend16(b_) << 10) | (static_cast<u16>(c_) << 15); - } - - void ClampAndSet(u8 r_, u8 g_, u8 b_, bool c_ = false) - { - Set(std::min<u8>(r_, 0x1F), std::min<u8>(g_, 0x1F), std::min<u8>(b_, 0x1F), c_); - } - - void SetRGB24(u32 rgb24, bool c_ = false) - { - bits = Truncate16(((rgb24 >> 3) & 0x1F) | (((rgb24 >> 11) & 0x1F) << 5) | (((rgb24 >> 19) & 0x1F) << 10)) | - (static_cast<u16>(c_) << 15); - } - - void SetRGB24(u8 r8, u8 g8, u8 b8, bool c_ = false) - { - bits = (ZeroExtend16(r8 >> 3)) | (ZeroExtend16(g8 >> 3) << 5) | (ZeroExtend16(b8 >> 3) << 10) | - (static_cast<u16>(c_) << 15); - } - - void SetRGB24Dithered(u32 x, u32 y, u8 r8, u8 g8, u8 b8, bool c_ = false) - { - const s32 offset = DITHER_MATRIX[y & 3][x & 3]; - r8 = static_cast<u8>(std::clamp<s32>(static_cast<s32>(ZeroExtend32(r8)) + offset, 0, 255)); - g8 = static_cast<u8>(std::clamp<s32>(static_cast<s32>(ZeroExtend32(g8)) + offset, 0, 255)); - b8 = static_cast<u8>(std::clamp<s32>(static_cast<s32>(ZeroExtend32(b8)) + offset, 0, 255)); - SetRGB24(r8, g8, b8, c_); - } - - u32 ToRGB24() const - { - const u32 r_ = ZeroExtend32(r.GetValue()); - const u32 g_ = ZeroExtend32(g.GetValue()); - const u32 b_ = ZeroExtend32(b.GetValue()); - - return ((r_ << 3) | (r_ & 7)) | (((g_ << 3) | (g_ & 7)) << 8) | (((b_ << 3) | (b_ & 7)) << 16); - } - }; - void SoftReset(); // Sets dots per scanline @@ -464,10 +403,7 @@ protected: u32 texture_page_y; u32 texture_palette_x; u32 texture_palette_y; - u8 texture_window_and_x; - u8 texture_window_and_y; - u8 texture_window_or_x; - u8 texture_window_or_y; + GPUTextureWindow texture_window; bool texture_x_flip; bool texture_y_flip; bool texture_page_changed; diff --git a/src/core/gpu_backend.cpp b/src/core/gpu_backend.cpp new file mode 100644 index 000000000..7bb67df8a --- /dev/null +++ b/src/core/gpu_backend.cpp @@ -0,0 +1,327 @@ +#include "gpu_backend.h" +#include "common/log.h" +#include "common/state_wrapper.h" +#include "settings.h" +Log_SetChannel(GPUBackend); + +std::unique_ptr<GPUBackend> g_gpu_backend; + +GPUBackend::GPUBackend() = default; + +GPUBackend::~GPUBackend() = default; + +bool GPUBackend::Initialize() +{ + if (g_settings.gpu_use_thread) + StartGPUThread(); + + return true; +} + +void GPUBackend::Reset() +{ + Sync(); + m_drawing_area = {}; +} + +void GPUBackend::UpdateSettings() +{ + Sync(); + + if (m_use_gpu_thread != g_settings.gpu_use_thread) + { + if (!g_settings.gpu_use_thread) + StopGPUThread(); + else + StartGPUThread(); + } +} + +void GPUBackend::Shutdown() +{ + StopGPUThread(); +} + +GPUBackendFillVRAMCommand* GPUBackend::NewFillVRAMCommand() +{ + GPUBackendFillVRAMCommand* cmd = + static_cast<GPUBackendFillVRAMCommand*>(AllocateCommand(sizeof(GPUBackendFillVRAMCommand))); + cmd->type = GPUBackendCommandType::FillVRAM; + cmd->size = cmd->Size(); + return cmd; +} + +GPUBackendUpdateVRAMCommand* GPUBackend::NewUpdateVRAMCommand(u32 num_words) +{ + const u32 size = sizeof(GPUBackendUpdateVRAMCommand) + (num_words * sizeof(u16)); + GPUBackendUpdateVRAMCommand* cmd = static_cast<GPUBackendUpdateVRAMCommand*>(AllocateCommand(size)); + cmd->type = GPUBackendCommandType::UpdateVRAM; + cmd->size = size; + return cmd; +} + +GPUBackendCopyVRAMCommand* GPUBackend::NewCopyVRAMCommand() +{ + GPUBackendCopyVRAMCommand* cmd = + static_cast<GPUBackendCopyVRAMCommand*>(AllocateCommand(sizeof(GPUBackendCopyVRAMCommand))); + cmd->type = GPUBackendCommandType::CopyVRAM; + cmd->size = cmd->Size(); + return cmd; +} + +GPUBackendSetDrawingAreaCommand* GPUBackend::NewSetDrawingAreaCommand() +{ + GPUBackendSetDrawingAreaCommand* cmd = + static_cast<GPUBackendSetDrawingAreaCommand*>(AllocateCommand(sizeof(GPUBackendSetDrawingAreaCommand))); + cmd->type = GPUBackendCommandType::SetDrawingArea; + cmd->size = cmd->Size(); + return cmd; +} + +GPUBackendDrawPolygonCommand* GPUBackend::NewDrawPolygonCommand(u32 num_vertices) +{ + const u32 size = sizeof(GPUBackendDrawPolygonCommand) + (num_vertices * sizeof(GPUBackendDrawPolygonCommand::Vertex)); + GPUBackendDrawPolygonCommand* cmd = static_cast<GPUBackendDrawPolygonCommand*>(AllocateCommand(size)); + cmd->type = GPUBackendCommandType::DrawPolygon; + cmd->size = size; + cmd->num_vertices = Truncate16(num_vertices); + return cmd; +} + +GPUBackendDrawRectangleCommand* GPUBackend::NewDrawRectangleCommand() +{ + GPUBackendDrawRectangleCommand* cmd = + static_cast<GPUBackendDrawRectangleCommand*>(AllocateCommand(sizeof(GPUBackendDrawRectangleCommand))); + cmd->type = GPUBackendCommandType::DrawRectangle; + cmd->size = cmd->Size(); + return cmd; +} + +GPUBackendDrawLineCommand* GPUBackend::NewDrawLineCommand(u32 num_vertices) +{ + const u32 size = sizeof(GPUBackendDrawLineCommand) + (num_vertices * sizeof(GPUBackendDrawLineCommand::Vertex)); + GPUBackendDrawLineCommand* cmd = static_cast<GPUBackendDrawLineCommand*>(AllocateCommand(size)); + cmd->type = GPUBackendCommandType::DrawLine; + cmd->size = size; + cmd->num_vertices = Truncate16(num_vertices); + return cmd; +} + +void* GPUBackend::AllocateCommand(u32 size) +{ + for (;;) + { + u32 read_ptr = m_command_fifo_read_ptr.load(); + u32 write_ptr = m_command_fifo_write_ptr.load(); + if (read_ptr > write_ptr) + { + u32 available_size = read_ptr - write_ptr; + while (available_size < (size + sizeof(GPUBackendCommandType))) + { + WakeGPUThread(); + read_ptr = m_command_fifo_read_ptr.load(); + available_size = (read_ptr > write_ptr) ? (read_ptr - write_ptr) : (COMMAND_QUEUE_SIZE - write_ptr); + } + } + else + { + const u32 available_size = COMMAND_QUEUE_SIZE - write_ptr; + if ((size + sizeof(GPUBackendCommand)) > available_size) + { + // allocate a dummy command to wrap the buffer around + GPUBackendCommand* dummy_cmd = reinterpret_cast<GPUBackendCommand*>(&m_command_fifo_data[write_ptr]); + dummy_cmd->type = GPUBackendCommandType::Wraparound; + dummy_cmd->size = available_size; + dummy_cmd->params.bits = 0; + m_command_fifo_write_ptr.store(0); + continue; + } + } + + return &m_command_fifo_data[write_ptr]; + } +} + +u32 GPUBackend::GetPendingCommandSize() const +{ + const u32 read_ptr = m_command_fifo_read_ptr.load(); + const u32 write_ptr = m_command_fifo_write_ptr.load(); + return (write_ptr >= read_ptr) ? (write_ptr - read_ptr) : (COMMAND_QUEUE_SIZE - read_ptr + write_ptr); +} + +void GPUBackend::PushCommand(GPUBackendCommand* cmd) +{ + if (!m_use_gpu_thread) + { + // single-thread mode + if (cmd->type != GPUBackendCommandType::Sync) + HandleCommand(cmd); + } + else + { + const u32 new_write_ptr = m_command_fifo_write_ptr.fetch_add(cmd->size) + cmd->size; + DebugAssert(new_write_ptr <= COMMAND_QUEUE_SIZE); + if (GetPendingCommandSize() >= THRESHOLD_TO_WAKE_GPU) + WakeGPUThread(); + } +} + +void GPUBackend::WakeGPUThread() +{ + std::unique_lock<std::mutex> lock(m_sync_mutex); + if (!m_gpu_thread_sleeping.load()) + return; + + m_wake_gpu_thread_cv.notify_one(); +} + +void GPUBackend::StartGPUThread() +{ + m_gpu_loop_done.store(false); + m_use_gpu_thread = true; + m_gpu_thread = std::thread(&GPUBackend::RunGPULoop, this); + Log_InfoPrint("GPU thread started."); +} + +void GPUBackend::StopGPUThread() +{ + if (!m_use_gpu_thread) + return; + + m_gpu_loop_done.store(true); + WakeGPUThread(); + m_gpu_thread.join(); + m_use_gpu_thread = false; + Log_InfoPrint("GPU thread stopped."); +} + +void GPUBackend::Sync() +{ + if (!m_use_gpu_thread) + return; + + GPUBackendSyncCommand* cmd = static_cast<GPUBackendSyncCommand*>(AllocateCommand(sizeof(GPUBackendSyncCommand))); + cmd->type = GPUBackendCommandType::Sync; + cmd->size = sizeof(GPUBackendSyncCommand); + PushCommand(cmd); + WakeGPUThread(); + + m_sync_event.Wait(); + m_sync_event.Reset(); +} + +void GPUBackend::RunGPULoop() +{ + for (;;) + { + u32 write_ptr = m_command_fifo_write_ptr.load(); + u32 read_ptr = m_command_fifo_read_ptr.load(); + if (read_ptr == write_ptr) + { + std::unique_lock<std::mutex> lock(m_sync_mutex); + m_gpu_thread_sleeping.store(true); + m_wake_gpu_thread_cv.wait(lock, [this]() { return m_gpu_loop_done.load() || GetPendingCommandSize() > 0; }); + m_gpu_thread_sleeping.store(false); + + if (m_gpu_loop_done.load()) + break; + else + continue; + } + + if (write_ptr < read_ptr) + write_ptr = COMMAND_QUEUE_SIZE; + + while (read_ptr < write_ptr) + { + const GPUBackendCommand* cmd = reinterpret_cast<const GPUBackendCommand*>(&m_command_fifo_data[read_ptr]); + read_ptr += cmd->size; + + switch (cmd->type) + { + case GPUBackendCommandType::Wraparound: + { + DebugAssert(read_ptr == COMMAND_QUEUE_SIZE); + write_ptr = m_command_fifo_write_ptr.load(); + read_ptr = 0; + } + break; + + case GPUBackendCommandType::Sync: + { + DebugAssert(read_ptr == write_ptr); + m_sync_event.Signal(); + } + break; + + default: + HandleCommand(cmd); + break; + } + } + + m_command_fifo_read_ptr.store(read_ptr); + } +} + +void GPUBackend::HandleCommand(const GPUBackendCommand* cmd) +{ + switch (cmd->type) + { + case GPUBackendCommandType::FillVRAM: + { + FlushRender(); + const GPUBackendFillVRAMCommand* ccmd = static_cast<const GPUBackendFillVRAMCommand*>(cmd); + FillVRAM(ZeroExtend32(ccmd->x), ZeroExtend32(ccmd->y), ZeroExtend32(ccmd->width), ZeroExtend32(ccmd->height), + ccmd->color, ccmd->params); + } + break; + + case GPUBackendCommandType::UpdateVRAM: + { + FlushRender(); + const GPUBackendUpdateVRAMCommand* ccmd = static_cast<const GPUBackendUpdateVRAMCommand*>(cmd); + UpdateVRAM(ZeroExtend32(ccmd->x), ZeroExtend32(ccmd->y), ZeroExtend32(ccmd->width), ZeroExtend32(ccmd->height), + ccmd->data, ccmd->params); + } + break; + + case GPUBackendCommandType::CopyVRAM: + { + FlushRender(); + const GPUBackendCopyVRAMCommand* ccmd = static_cast<const GPUBackendCopyVRAMCommand*>(cmd); + CopyVRAM(ZeroExtend32(ccmd->src_x), ZeroExtend32(ccmd->src_y), ZeroExtend32(ccmd->dst_x), + ZeroExtend32(ccmd->dst_y), ZeroExtend32(ccmd->width), ZeroExtend32(ccmd->height), ccmd->params); + } + break; + + case GPUBackendCommandType::SetDrawingArea: + { + FlushRender(); + m_drawing_area = static_cast<const GPUBackendSetDrawingAreaCommand*>(cmd)->new_area; + DrawingAreaChanged(); + } + break; + + case GPUBackendCommandType::DrawPolygon: + { + DrawPolygon(static_cast<const GPUBackendDrawPolygonCommand*>(cmd)); + } + break; + + case GPUBackendCommandType::DrawRectangle: + { + DrawRectangle(static_cast<const GPUBackendDrawRectangleCommand*>(cmd)); + } + break; + + case GPUBackendCommandType::DrawLine: + { + DrawLine(static_cast<const GPUBackendDrawLineCommand*>(cmd)); + } + break; + + default: + break; + } +} diff --git a/src/core/gpu_backend.h b/src/core/gpu_backend.h new file mode 100644 index 000000000..2590b76b9 --- /dev/null +++ b/src/core/gpu_backend.h @@ -0,0 +1,91 @@ +#pragma once +#include "common/event.h" +#include "common/heap_array.h" +#include "gpu_types.h" +#include <atomic> +#include <condition_variable> +#include <memory> +#include <mutex> +#include <thread> + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4324) // warning C4324: 'GPUBackend': structure was padded due to alignment specifier +#endif + +class GPUBackend +{ +public: + GPUBackend(); + virtual ~GPUBackend(); + + ALWAYS_INLINE u16* GetVRAM() const { return m_vram_ptr; } + + virtual bool Initialize(); + virtual void UpdateSettings(); + virtual void Reset(); + virtual void Shutdown(); + + GPUBackendFillVRAMCommand* NewFillVRAMCommand(); + GPUBackendUpdateVRAMCommand* NewUpdateVRAMCommand(u32 num_words); + GPUBackendCopyVRAMCommand* NewCopyVRAMCommand(); + GPUBackendSetDrawingAreaCommand* NewSetDrawingAreaCommand(); + GPUBackendDrawPolygonCommand* NewDrawPolygonCommand(u32 num_vertices); + GPUBackendDrawRectangleCommand* NewDrawRectangleCommand(); + GPUBackendDrawLineCommand* NewDrawLineCommand(u32 num_vertices); + + void PushCommand(GPUBackendCommand* cmd); + void Sync(); + + /// Processes all pending GPU commands. + void RunGPULoop(); + +protected: + void* AllocateCommand(u32 size); + u32 GetPendingCommandSize() const; + void WakeGPUThread(); + void StartGPUThread(); + void StopGPUThread(); + + virtual void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) = 0; + virtual void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, + GPUBackendCommandParameters params) = 0; + virtual void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, + GPUBackendCommandParameters params) = 0; + virtual void DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) = 0; + virtual void DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) = 0; + virtual void DrawLine(const GPUBackendDrawLineCommand* cmd) = 0; + virtual void FlushRender() = 0; + virtual void DrawingAreaChanged() = 0; + + void HandleCommand(const GPUBackendCommand* cmd); + + u16* m_vram_ptr = nullptr; + + Common::Rectangle<u32> m_drawing_area{}; + + Common::Event m_sync_event; + std::atomic_bool m_gpu_thread_sleeping{false}; + std::atomic_bool m_gpu_loop_done{false}; + std::thread m_gpu_thread; + bool m_use_gpu_thread = false; + + std::mutex m_sync_mutex; + std::condition_variable m_sync_cpu_thread_cv; + std::condition_variable m_wake_gpu_thread_cv; + bool m_sync_done = false; + + enum : u32 + { + COMMAND_QUEUE_SIZE = 4 * 1024 * 1024, + THRESHOLD_TO_WAKE_GPU = 256 + }; + + HeapArray<u8, COMMAND_QUEUE_SIZE> m_command_fifo_data; + alignas(64) std::atomic<u32> m_command_fifo_read_ptr{0}; + alignas(64) std::atomic<u32> m_command_fifo_write_ptr{0}; +}; + +#ifdef _MSC_VER +#pragma warning(pop) +#endif diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index 0145a8da0..63b007cf6 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -1004,10 +1004,10 @@ void GPU_HW::DispatchRenderCommand() { m_draw_mode.ClearTextureWindowChangedFlag(); - m_batch_ubo_data.u_texture_window_and[0] = ZeroExtend32(m_draw_mode.texture_window_and_x); - m_batch_ubo_data.u_texture_window_and[1] = ZeroExtend32(m_draw_mode.texture_window_and_y); - m_batch_ubo_data.u_texture_window_or[0] = ZeroExtend32(m_draw_mode.texture_window_or_x); - m_batch_ubo_data.u_texture_window_or[1] = ZeroExtend32(m_draw_mode.texture_window_or_y); + m_batch_ubo_data.u_texture_window_and[0] = ZeroExtend32(m_draw_mode.texture_window.and_x); + m_batch_ubo_data.u_texture_window_and[1] = ZeroExtend32(m_draw_mode.texture_window.and_y); + m_batch_ubo_data.u_texture_window_or[0] = ZeroExtend32(m_draw_mode.texture_window.or_x); + m_batch_ubo_data.u_texture_window_or[1] = ZeroExtend32(m_draw_mode.texture_window.or_y); m_batch_ubo_dirty = true; } diff --git a/src/core/gpu_sw.cpp b/src/core/gpu_sw.cpp index 3d383f053..5a69c1cb8 100644 --- a/src/core/gpu_sw.cpp +++ b/src/core/gpu_sw.cpp @@ -19,14 +19,23 @@ Log_SetChannel(GPU_SW); #endif #endif +template<typename T> +ALWAYS_INLINE static constexpr std::tuple<T, T> MinMax(T v1, T v2) +{ + if (v1 > v2) + return std::tie(v2, v1); + else + return std::tie(v1, v2); +} + GPU_SW::GPU_SW() { - m_vram.fill(0); - m_vram_ptr = m_vram.data(); + m_vram_ptr = m_backend.GetVRAM(); } GPU_SW::~GPU_SW() { + m_backend.Shutdown(); if (m_host_display) m_host_display->ClearDisplayTexture(); } @@ -38,7 +47,7 @@ bool GPU_SW::IsHardwareRenderer() const bool GPU_SW::Initialize(HostDisplay* host_display) { - if (!GPU::Initialize(host_display)) + if (!GPU::Initialize(host_display) || !m_backend.Initialize()) return false; static constexpr auto formats_for_16bit = make_array(HostDisplayPixelFormat::RGB565, HostDisplayPixelFormat::RGBA5551, @@ -70,7 +79,13 @@ void GPU_SW::Reset() { GPU::Reset(); - m_vram.fill(0); + m_backend.Reset(); +} + +void GPU_SW::UpdateSettings() +{ + GPU::UpdateSettings(); + m_backend.UpdateSettings(); } template<HostDisplayPixelFormat out_format, typename out_type> @@ -248,7 +263,7 @@ void GPU_SW::CopyOut15Bit(u32 src_x, u32 src_y, u32 width, u32 height, u32 field const u32 rows = height >> interlaced_shift; dst_stride <<= interlaced_shift; - const u16* src_ptr = &m_vram[src_y * VRAM_WIDTH + src_x]; + const u16* src_ptr = &m_vram_ptr[src_y * VRAM_WIDTH + src_x]; const u32 src_step = VRAM_WIDTH << interleaved_shift; for (u32 row = 0; row < rows; row++) { @@ -265,7 +280,7 @@ void GPU_SW::CopyOut15Bit(u32 src_x, u32 src_y, u32 width, u32 height, u32 field const u32 end_x = src_x + width; for (u32 row = 0; row < rows; row++) { - const u16* src_row_ptr = &m_vram[(src_y % VRAM_HEIGHT) * VRAM_WIDTH]; + const u16* src_row_ptr = &m_vram_ptr[(src_y % VRAM_HEIGHT) * VRAM_WIDTH]; OutputPixelType* dst_row_ptr = reinterpret_cast<OutputPixelType*>(dst_ptr); for (u32 col = src_x; col < end_x; col++) { @@ -340,7 +355,7 @@ void GPU_SW::CopyOut24Bit(u32 src_x, u32 src_y, u32 skip_x, u32 width, u32 heigh if ((src_x + width) <= VRAM_WIDTH && (src_y + (rows << interleaved_shift)) <= VRAM_HEIGHT) { - const u8* src_ptr = reinterpret_cast<const u8*>(&m_vram[src_y * VRAM_WIDTH + src_x]) + (skip_x * 3); + const u8* src_ptr = reinterpret_cast<const u8*>(&m_vram_ptr[src_y * VRAM_WIDTH + src_x]) + (skip_x * 3); const u32 src_stride = (VRAM_WIDTH << interleaved_shift) * sizeof(u16); for (u32 row = 0; row < rows; row++) { @@ -400,7 +415,7 @@ void GPU_SW::CopyOut24Bit(u32 src_x, u32 src_y, u32 skip_x, u32 width, u32 heigh { for (u32 row = 0; row < rows; row++) { - const u16* src_row_ptr = &m_vram[(src_y % VRAM_HEIGHT) * VRAM_WIDTH]; + const u16* src_row_ptr = &m_vram_ptr[(src_y % VRAM_HEIGHT) * VRAM_WIDTH]; OutputPixelType* dst_row_ptr = reinterpret_cast<OutputPixelType*>(dst_ptr); for (u32 col = 0; col < width; col++) @@ -475,6 +490,8 @@ void GPU_SW::ClearDisplay() void GPU_SW::UpdateDisplay() { // fill display texture + m_backend.Sync(); + if (!g_settings.debugging.show_vram) { if (IsDisplayDisabled()) @@ -530,8 +547,34 @@ void GPU_SW::UpdateDisplay() } } +void GPU_SW::FillBackendCommandParameters(GPUBackendCommand* cmd) +{ + cmd->params.bits = 0; + cmd->params.check_mask_before_draw = m_GPUSTAT.check_mask_before_draw; + cmd->params.set_mask_while_drawing = m_GPUSTAT.set_mask_while_drawing; + cmd->params.active_line_lsb = m_crtc_state.active_line_lsb; + cmd->params.interlaced_rendering = IsInterlacedRenderingEnabled(); +} + +void GPU_SW::FillDrawCommand(GPUBackendDrawCommand* cmd, GPURenderCommand rc) +{ + FillBackendCommandParameters(cmd); + cmd->rc.bits = rc.bits; + cmd->draw_mode.bits = m_draw_mode.mode_reg.bits; + cmd->palette.bits = m_draw_mode.palette_reg; + cmd->window = m_draw_mode.texture_window; +} + void GPU_SW::DispatchRenderCommand() { + if (m_drawing_area_changed) + { + GPUBackendSetDrawingAreaCommand* cmd = m_backend.NewSetDrawingAreaCommand(); + cmd->new_area = m_drawing_area; + m_backend.PushCommand(cmd); + m_drawing_area_changed = false; + } + const GPURenderCommand rc{m_render_command.bits}; const bool dithering_enable = rc.IsDitheringEnabled() && m_GPUSTAT.dither_enable; @@ -539,80 +582,119 @@ void GPU_SW::DispatchRenderCommand() { case GPUPrimitive::Polygon: { + const u32 num_vertices = rc.quad_polygon ? 4 : 3; + GPUBackendDrawPolygonCommand* cmd = m_backend.NewDrawPolygonCommand(num_vertices); + FillDrawCommand(cmd, rc); + const u32 first_color = rc.color_for_first_vertex; const bool shaded = rc.shading_enable; const bool textured = rc.texture_enable; - - const u32 num_vertices = rc.quad_polygon ? 4 : 3; - std::array<SWVertex, 4> vertices; for (u32 i = 0; i < num_vertices; i++) { - SWVertex& vert = vertices[i]; - const u32 color_rgb = (shaded && i > 0) ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color; - vert.r = Truncate8(color_rgb); - vert.g = Truncate8(color_rgb >> 8); - vert.b = Truncate8(color_rgb >> 16); - - const GPUVertexPosition vp{FifoPop()}; - vert.x = m_drawing_offset.x + vp.x; - vert.y = m_drawing_offset.y + vp.y; - - if (textured) - { - std::tie(vert.u, vert.v) = UnpackTexcoord(Truncate16(FifoPop())); - } - else - { - vert.u = 0; - vert.v = 0; - } + GPUBackendDrawPolygonCommand::Vertex* vert = &cmd->vertices[i]; + vert->color = (shaded && i > 0) ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color; + const u64 maddr_and_pos = m_fifo.Pop(); + const GPUVertexPosition vp{Truncate32(maddr_and_pos)}; + vert->x = m_drawing_offset.x + vp.x; + vert->y = m_drawing_offset.y + vp.y; + vert->texcoord = textured ? Truncate16(FifoPop()) : 0; } if (!IsDrawingAreaIsValid()) return; - const DrawTriangleFunction DrawFunction = GetDrawTriangleFunction( - rc.shading_enable, rc.texture_enable, rc.raw_texture_enable, rc.transparency_enable, dithering_enable); + // Cull polygons which are too large. + const auto [min_x_12, max_x_12] = MinMax(cmd->vertices[1].x, cmd->vertices[2].x); + const auto [min_y_12, max_y_12] = MinMax(cmd->vertices[1].y, cmd->vertices[2].y); + const s32 min_x = std::min(min_x_12, cmd->vertices[0].x); + const s32 max_x = std::max(max_x_12, cmd->vertices[0].x); + const s32 min_y = std::min(min_y_12, cmd->vertices[0].y); + const s32 max_y = std::max(max_y_12, cmd->vertices[0].y); - (this->*DrawFunction)(&vertices[0], &vertices[1], &vertices[2]); - if (num_vertices > 3) - (this->*DrawFunction)(&vertices[2], &vertices[1], &vertices[3]); + if ((max_x - min_x) >= MAX_PRIMITIVE_WIDTH || (max_y - min_y) >= MAX_PRIMITIVE_HEIGHT) + { + Log_DebugPrintf("Culling too-large polygon: %d,%d %d,%d %d,%d", cmd->vertices[0].x, cmd->vertices[0].y, + cmd->vertices[1].x, cmd->vertices[1].y, cmd->vertices[2].x, cmd->vertices[2].y); + } + else + { + AddDrawTriangleTicks(cmd->vertices[0].x, cmd->vertices[0].y, cmd->vertices[1].x, cmd->vertices[1].y, + cmd->vertices[2].x, cmd->vertices[2].y, rc.shading_enable, rc.texture_enable, + rc.transparency_enable); + } + + // quads + if (rc.quad_polygon) + { + const s32 min_x_123 = std::min(min_x_12, cmd->vertices[3].x); + const s32 max_x_123 = std::max(max_x_12, cmd->vertices[3].x); + const s32 min_y_123 = std::min(min_y_12, cmd->vertices[3].y); + const s32 max_y_123 = std::max(max_y_12, cmd->vertices[3].y); + + // Cull polygons which are too large. + if ((max_x_123 - min_x_123) >= MAX_PRIMITIVE_WIDTH || (max_y_123 - min_y_123) >= MAX_PRIMITIVE_HEIGHT) + { + Log_DebugPrintf("Culling too-large polygon (quad second half): %d,%d %d,%d %d,%d", cmd->vertices[2].x, + cmd->vertices[2].y, cmd->vertices[1].x, cmd->vertices[1].y, cmd->vertices[0].x, + cmd->vertices[0].y); + } + else + { + AddDrawTriangleTicks(cmd->vertices[2].x, cmd->vertices[2].y, cmd->vertices[1].x, cmd->vertices[1].y, + cmd->vertices[3].x, cmd->vertices[3].y, rc.shading_enable, rc.texture_enable, + rc.transparency_enable); + } + } + + m_backend.PushCommand(cmd); } break; case GPUPrimitive::Rectangle: { - const auto [r, g, b] = UnpackColorRGB24(rc.color_for_first_vertex); - const GPUVertexPosition vp{FifoPop()}; - const u32 texcoord_and_palette = rc.texture_enable ? FifoPop() : 0; - const auto [texcoord_x, texcoord_y] = UnpackTexcoord(Truncate16(texcoord_and_palette)); + GPUBackendDrawRectangleCommand* cmd = m_backend.NewDrawRectangleCommand(); + FillDrawCommand(cmd, rc); + cmd->color = rc.color_for_first_vertex; + + const GPUVertexPosition vp{FifoPop()}; + cmd->x = TruncateGPUVertexPosition(m_drawing_offset.x + vp.x); + cmd->y = TruncateGPUVertexPosition(m_drawing_offset.y + vp.y); + + if (rc.texture_enable) + { + const u32 texcoord_and_palette = FifoPop(); + cmd->palette.bits = Truncate16(texcoord_and_palette >> 16); + cmd->texcoord = Truncate16(texcoord_and_palette); + } + else + { + cmd->palette.bits = 0; + cmd->texcoord = 0; + } - u32 width; - u32 height; switch (rc.rectangle_size) { case GPUDrawRectangleSize::R1x1: - width = 1; - height = 1; + cmd->width = 1; + cmd->height = 1; break; case GPUDrawRectangleSize::R8x8: - width = 8; - height = 8; + cmd->width = 8; + cmd->height = 8; break; case GPUDrawRectangleSize::R16x16: - width = 16; - height = 16; + cmd->width = 16; + cmd->height = 16; break; default: { const u32 width_and_height = FifoPop(); - width = static_cast<u32>(width_and_height & VRAM_WIDTH_MASK); - height = static_cast<u32>((width_and_height >> 16) & VRAM_HEIGHT_MASK); + cmd->width = static_cast<u16>(width_and_height & VRAM_WIDTH_MASK); + cmd->height = static_cast<u16>((width_and_height >> 16) & VRAM_HEIGHT_MASK); - if (width >= MAX_PRIMITIVE_WIDTH || height >= MAX_PRIMITIVE_HEIGHT) + if (cmd->width >= MAX_PRIMITIVE_WIDTH || cmd->height >= MAX_PRIMITIVE_HEIGHT) { - Log_DebugPrintf("Culling too-large rectangle: %d,%d %dx%d", vp.x.GetValue(), vp.y.GetValue(), width, - height); + Log_DebugPrintf("Culling too-large rectangle: %d,%d %dx%d", cmd->x, cmd->y, cmd->width, cmd->height); return; } } @@ -622,51 +704,123 @@ void GPU_SW::DispatchRenderCommand() if (!IsDrawingAreaIsValid()) return; - const DrawRectangleFunction DrawFunction = - GetDrawRectangleFunction(rc.texture_enable, rc.raw_texture_enable, rc.transparency_enable); + const u32 clip_left = static_cast<u32>(std::clamp<s32>(cmd->x, m_drawing_area.left, m_drawing_area.right)); + const u32 clip_right = + static_cast<u32>(std::clamp<s32>(cmd->x + cmd->width, m_drawing_area.left, m_drawing_area.right)) + 1u; + const u32 clip_top = static_cast<u32>(std::clamp<s32>(cmd->y, m_drawing_area.top, m_drawing_area.bottom)); + const u32 clip_bottom = + static_cast<u32>(std::clamp<s32>(cmd->y + cmd->height, m_drawing_area.top, m_drawing_area.bottom)) + 1u; - (this->*DrawFunction)(vp.x, vp.y, width, height, r, g, b, texcoord_x, texcoord_y); + // cmd->bounds.Set(Truncate16(clip_left), Truncate16(clip_top), Truncate16(clip_right), Truncate16(clip_bottom)); + AddDrawRectangleTicks(clip_right - clip_left, clip_bottom - clip_top, rc.texture_enable, rc.transparency_enable); + + m_backend.PushCommand(cmd); } break; case GPUPrimitive::Line: { - const u32 first_color = rc.color_for_first_vertex; - const bool shaded = rc.shading_enable; - - const DrawLineFunction DrawFunction = GetDrawLineFunction(shaded, rc.transparency_enable, dithering_enable); - - std::array<SWVertex, 2> vertices = {}; - u32 buffer_pos = 0; - - // first vertex - SWVertex* p0 = &vertices[0]; - SWVertex* p1 = &vertices[1]; - p0->SetPosition(GPUVertexPosition{rc.polyline ? m_blit_buffer[buffer_pos++] : Truncate32(FifoPop())}, - m_drawing_offset.x, m_drawing_offset.y); - p0->SetColorRGB24(first_color); - - // remaining vertices in line strip - const u32 num_vertices = rc.polyline ? GetPolyLineVertexCount() : 2; - for (u32 i = 1; i < num_vertices; i++) + if (!rc.polyline) { - if (rc.polyline) + GPUBackendDrawLineCommand* cmd = m_backend.NewDrawLineCommand(2); + FillDrawCommand(cmd, rc); + cmd->palette.bits = 0; + + if (rc.shading_enable) { - p1->SetColorRGB24(shaded ? (m_blit_buffer[buffer_pos++] & UINT32_C(0x00FFFFFF)) : first_color); - p1->SetPosition(GPUVertexPosition{m_blit_buffer[buffer_pos++]}, m_drawing_offset.x, m_drawing_offset.y); + cmd->vertices[0].color = rc.color_for_first_vertex; + const GPUVertexPosition start_pos{FifoPop()}; + cmd->vertices[0].x = m_drawing_offset.x + start_pos.x; + cmd->vertices[0].y = m_drawing_offset.y + start_pos.y; + + cmd->vertices[1].color = FifoPop() & UINT32_C(0x00FFFFFF); + const GPUVertexPosition end_pos{FifoPop()}; + cmd->vertices[1].x = m_drawing_offset.x + end_pos.x; + cmd->vertices[1].y = m_drawing_offset.y + end_pos.y; } else { - p1->SetColorRGB24(shaded ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color); - p1->SetPosition(GPUVertexPosition{Truncate32(FifoPop())}, m_drawing_offset.x, m_drawing_offset.y); + cmd->vertices[0].color = rc.color_for_first_vertex; + cmd->vertices[1].color = rc.color_for_first_vertex; + + const GPUVertexPosition start_pos{FifoPop()}; + cmd->vertices[0].x = m_drawing_offset.x + start_pos.x; + cmd->vertices[0].y = m_drawing_offset.y + start_pos.y; + + const GPUVertexPosition end_pos{FifoPop()}; + cmd->vertices[1].x = m_drawing_offset.x + end_pos.x; + cmd->vertices[1].y = m_drawing_offset.y + end_pos.y; } - // down here because of the FIFO pops - if (IsDrawingAreaIsValid()) - (this->*DrawFunction)(p0, p1); + if (!IsDrawingAreaIsValid()) + return; - // swap p0/p1 so that the last vertex is used as the first for the next line - std::swap(p0, p1); + const auto [min_x, max_x] = MinMax(cmd->vertices[0].x, cmd->vertices[1].x); + const auto [min_y, max_y] = MinMax(cmd->vertices[0].y, cmd->vertices[1].y); + if ((max_x - min_x) >= MAX_PRIMITIVE_WIDTH || (max_y - min_y) >= MAX_PRIMITIVE_HEIGHT) + { + Log_DebugPrintf("Culling too-large line: %d,%d - %d,%d", cmd->vertices[0].y, cmd->vertices[0].y, + cmd->vertices[1].x, cmd->vertices[1].y); + return; + } + + const u32 clip_left = static_cast<u32>(std::clamp<s32>(min_x, m_drawing_area.left, m_drawing_area.left)); + const u32 clip_right = static_cast<u32>(std::clamp<s32>(max_x, m_drawing_area.left, m_drawing_area.right)) + 1u; + const u32 clip_top = static_cast<u32>(std::clamp<s32>(min_y, m_drawing_area.top, m_drawing_area.bottom)); + const u32 clip_bottom = + static_cast<u32>(std::clamp<s32>(max_y, m_drawing_area.top, m_drawing_area.bottom)) + 1u; + // cmd->bounds.Set(Truncate16(clip_left), Truncate16(clip_top), Truncate16(clip_right), + // Truncate16(clip_bottom)); + AddDrawLineTicks(clip_right - clip_left, clip_bottom - clip_top, rc.shading_enable); + + m_backend.PushCommand(cmd); + } + else + { + const u32 num_vertices = GetPolyLineVertexCount(); + + GPUBackendDrawLineCommand* cmd = m_backend.NewDrawLineCommand(num_vertices); + FillDrawCommand(cmd, m_render_command); + + u32 buffer_pos = 0; + const GPUVertexPosition start_vp{m_blit_buffer[buffer_pos++]}; + cmd->vertices[0].x = start_vp.x + m_drawing_offset.x; + cmd->vertices[0].y = start_vp.y + m_drawing_offset.y; + cmd->vertices[0].color = m_render_command.color_for_first_vertex; + // cmd->bounds.SetInvalid(); + + const bool shaded = m_render_command.shading_enable; + for (u32 i = 1; i < num_vertices; i++) + { + cmd->vertices[i].color = + shaded ? (m_blit_buffer[buffer_pos++] & UINT32_C(0x00FFFFFF)) : m_render_command.color_for_first_vertex; + const GPUVertexPosition vp{m_blit_buffer[buffer_pos++]}; + cmd->vertices[i].x = m_drawing_offset.x + vp.x; + cmd->vertices[i].y = m_drawing_offset.y + vp.y; + + const auto [min_x, max_x] = MinMax(cmd->vertices[i - 1].x, cmd->vertices[i].y); + const auto [min_y, max_y] = MinMax(cmd->vertices[i - 1].x, cmd->vertices[i].y); + if ((max_x - min_x) >= MAX_PRIMITIVE_WIDTH || (max_y - min_y) >= MAX_PRIMITIVE_HEIGHT) + { + Log_DebugPrintf("Culling too-large line: %d,%d - %d,%d", cmd->vertices[i - 1].x, cmd->vertices[i - 1].y, + cmd->vertices[i].x, cmd->vertices[i].y); + } + else + { + const u32 clip_left = static_cast<u32>(std::clamp<s32>(min_x, m_drawing_area.left, m_drawing_area.left)); + const u32 clip_right = + static_cast<u32>(std::clamp<s32>(max_x, m_drawing_area.left, m_drawing_area.right)) + 1u; + const u32 clip_top = static_cast<u32>(std::clamp<s32>(min_y, m_drawing_area.top, m_drawing_area.bottom)); + const u32 clip_bottom = + static_cast<u32>(std::clamp<s32>(max_y, m_drawing_area.top, m_drawing_area.bottom)) + 1u; + + // cmd->bounds.Include(Truncate16(clip_left), Truncate16(clip_right), Truncate16(clip_top), + // Truncate16(clip_bottom)); + AddDrawLineTicks(clip_right - clip_left, clip_bottom - clip_top, m_render_command.shading_enable); + } + } + + m_backend.PushCommand(cmd); } } break; @@ -677,731 +831,50 @@ void GPU_SW::DispatchRenderCommand() } } -constexpr GPU_SW::DitherLUT GPU_SW::ComputeDitherLUT() +void GPU_SW::ReadVRAM(u32 x, u32 y, u32 width, u32 height) { - DitherLUT lut = {}; - for (u32 i = 0; i < DITHER_MATRIX_SIZE; i++) - { - for (u32 j = 0; j < DITHER_MATRIX_SIZE; j++) - { - for (s32 value = 0; value < DITHER_LUT_SIZE; value++) - { - const s32 dithered_value = (value + DITHER_MATRIX[i][j]) >> 3; - lut[i][j][value] = static_cast<u8>((dithered_value < 0) ? 0 : ((dithered_value > 31) ? 31 : dithered_value)); - } - } - } - return lut; + m_backend.Sync(); } -static constexpr GPU_SW::DitherLUT s_dither_lut = GPU_SW::ComputeDitherLUT(); - -template<bool texture_enable, bool raw_texture_enable, bool transparency_enable, bool dithering_enable> -void ALWAYS_INLINE_RELEASE GPU_SW::ShadePixel(u32 x, u32 y, u8 color_r, u8 color_g, u8 color_b, u8 texcoord_x, - u8 texcoord_y) +void GPU_SW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) { - VRAMPixel color; - bool transparent; - if constexpr (texture_enable) - { - // Apply texture window - // TODO: Precompute the second half - texcoord_x = (texcoord_x & m_draw_mode.texture_window_and_x) | m_draw_mode.texture_window_or_x; - texcoord_y = (texcoord_y & m_draw_mode.texture_window_and_y) | m_draw_mode.texture_window_or_y; - - VRAMPixel texture_color; - switch (m_draw_mode.mode_reg.texture_mode) - { - case GPUTextureMode::Palette4Bit: - { - const u16 palette_value = GetPixel((m_draw_mode.texture_page_x + ZeroExtend32(texcoord_x / 4)) % VRAM_WIDTH, - (m_draw_mode.texture_page_y + ZeroExtend32(texcoord_y)) % VRAM_HEIGHT); - const u16 palette_index = (palette_value >> ((texcoord_x % 4) * 4)) & 0x0Fu; - texture_color.bits = GetPixel((m_draw_mode.texture_palette_x + ZeroExtend32(palette_index)) % VRAM_WIDTH, - m_draw_mode.texture_palette_y); - } - break; - - case GPUTextureMode::Palette8Bit: - { - const u16 palette_value = GetPixel((m_draw_mode.texture_page_x + ZeroExtend32(texcoord_x / 2)) % VRAM_WIDTH, - (m_draw_mode.texture_page_y + ZeroExtend32(texcoord_y)) % VRAM_HEIGHT); - const u16 palette_index = (palette_value >> ((texcoord_x % 2) * 8)) & 0xFFu; - texture_color.bits = GetPixel((m_draw_mode.texture_palette_x + ZeroExtend32(palette_index)) % VRAM_WIDTH, - m_draw_mode.texture_palette_y); - } - break; - - default: - { - texture_color.bits = GetPixel((m_draw_mode.texture_page_x + ZeroExtend32(texcoord_x)) % VRAM_WIDTH, - (m_draw_mode.texture_page_y + ZeroExtend32(texcoord_y)) % VRAM_HEIGHT); - } - break; - } - - if (texture_color.bits == 0) - return; - - transparent = texture_color.c; - - if constexpr (raw_texture_enable) - { - color.bits = texture_color.bits; - } - else - { - const u32 dither_y = (dithering_enable) ? (y & 3u) : 2u; - const u32 dither_x = (dithering_enable) ? (x & 3u) : 3u; - - color.bits = (ZeroExtend16(s_dither_lut[dither_y][dither_x][(u16(texture_color.r) * u16(color_r)) >> 4]) << 0) | - (ZeroExtend16(s_dither_lut[dither_y][dither_x][(u16(texture_color.g) * u16(color_g)) >> 4]) << 5) | - (ZeroExtend16(s_dither_lut[dither_y][dither_x][(u16(texture_color.b) * u16(color_b)) >> 4]) << 10) | - (texture_color.bits & 0x8000u); - } - } - else - { - transparent = true; - - const u32 dither_y = (dithering_enable) ? (y & 3u) : 2u; - const u32 dither_x = (dithering_enable) ? (x & 3u) : 3u; - - color.bits = (ZeroExtend16(s_dither_lut[dither_y][dither_x][color_r]) << 0) | - (ZeroExtend16(s_dither_lut[dither_y][dither_x][color_g]) << 5) | - (ZeroExtend16(s_dither_lut[dither_y][dither_x][color_b]) << 10); - } - - const VRAMPixel bg_color{GetPixel(static_cast<u32>(x), static_cast<u32>(y))}; - if constexpr (transparency_enable) - { - if (transparent) - { -#define BLEND_AVERAGE(bg, fg) Truncate8(std::min<u32>((ZeroExtend32(bg) / 2) + (ZeroExtend32(fg) / 2), 0x1F)) -#define BLEND_ADD(bg, fg) Truncate8(std::min<u32>(ZeroExtend32(bg) + ZeroExtend32(fg), 0x1F)) -#define BLEND_SUBTRACT(bg, fg) Truncate8((bg > fg) ? ((bg) - (fg)) : 0) -#define BLEND_QUARTER(bg, fg) Truncate8(std::min<u32>(ZeroExtend32(bg) + ZeroExtend32(fg / 4), 0x1F)) - -#define BLEND_RGB(func) \ - color.Set(func(bg_color.r.GetValue(), color.r.GetValue()), func(bg_color.g.GetValue(), color.g.GetValue()), \ - func(bg_color.b.GetValue(), color.b.GetValue()), color.c.GetValue()) - - switch (m_draw_mode.mode_reg.transparency_mode) - { - case GPUTransparencyMode::HalfBackgroundPlusHalfForeground: - BLEND_RGB(BLEND_AVERAGE); - break; - case GPUTransparencyMode::BackgroundPlusForeground: - BLEND_RGB(BLEND_ADD); - break; - case GPUTransparencyMode::BackgroundMinusForeground: - BLEND_RGB(BLEND_SUBTRACT); - break; - case GPUTransparencyMode::BackgroundPlusQuarterForeground: - BLEND_RGB(BLEND_QUARTER); - break; - default: - break; - } - -#undef BLEND_RGB - -#undef BLEND_QUARTER -#undef BLEND_SUBTRACT -#undef BLEND_ADD -#undef BLEND_AVERAGE - } - } - else - { - UNREFERENCED_VARIABLE(transparent); - } - - const u16 mask_and = m_GPUSTAT.GetMaskAND(); - if ((bg_color.bits & mask_and) != 0) - return; - - SetPixel(static_cast<u32>(x), static_cast<u32>(y), color.bits | m_GPUSTAT.GetMaskOR()); + GPUBackendFillVRAMCommand* cmd = m_backend.NewFillVRAMCommand(); + FillBackendCommandParameters(cmd); + cmd->x = static_cast<u16>(x); + cmd->y = static_cast<u16>(y); + cmd->width = static_cast<u16>(width); + cmd->height = static_cast<u16>(height); + cmd->color = color; + m_backend.PushCommand(cmd); } -template<bool texture_enable, bool raw_texture_enable, bool transparency_enable> -void GPU_SW::DrawRectangle(s32 origin_x, s32 origin_y, u32 width, u32 height, u8 r, u8 g, u8 b, u8 origin_texcoord_x, - u8 origin_texcoord_y) +void GPU_SW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) { - const s32 start_x = TruncateGPUVertexPosition(m_drawing_offset.x + origin_x); - const s32 start_y = TruncateGPUVertexPosition(m_drawing_offset.y + origin_y); + const u32 num_words = width * height; + GPUBackendUpdateVRAMCommand* cmd = m_backend.NewUpdateVRAMCommand(num_words); + FillBackendCommandParameters(cmd); + cmd->x = static_cast<u16>(x); + cmd->y = static_cast<u16>(y); + cmd->width = static_cast<u16>(width); + cmd->height = static_cast<u16>(height); + std::memcpy(cmd->data, data, sizeof(u16) * num_words); + m_backend.PushCommand(cmd); +} - { - const u32 clip_left = static_cast<u32>(std::clamp<s32>(start_x, m_drawing_area.left, m_drawing_area.right)); - const u32 clip_right = - static_cast<u32>(std::clamp<s32>(start_x + static_cast<s32>(width), m_drawing_area.left, m_drawing_area.right)) + - 1u; - const u32 clip_top = static_cast<u32>(std::clamp<s32>(start_y, m_drawing_area.top, m_drawing_area.bottom)); - const u32 clip_bottom = - static_cast<u32>(std::clamp<s32>(start_y + static_cast<s32>(height), m_drawing_area.top, m_drawing_area.bottom)) + - 1u; - AddDrawRectangleTicks(clip_right - clip_left, clip_bottom - clip_top, texture_enable, transparency_enable); - } - - for (u32 offset_y = 0; offset_y < height; offset_y++) - { - const s32 y = start_y + static_cast<s32>(offset_y); - if (y < static_cast<s32>(m_drawing_area.top) || y > static_cast<s32>(m_drawing_area.bottom) || - (IsInterlacedRenderingEnabled() && GetActiveLineLSB() == (static_cast<u32>(y) & 1u))) - { - continue; - } - - const u8 texcoord_y = Truncate8(ZeroExtend32(origin_texcoord_y) + offset_y); - - for (u32 offset_x = 0; offset_x < width; offset_x++) - { - const s32 x = start_x + static_cast<s32>(offset_x); - if (x < static_cast<s32>(m_drawing_area.left) || x > static_cast<s32>(m_drawing_area.right)) - continue; - - const u8 texcoord_x = Truncate8(ZeroExtend32(origin_texcoord_x) + offset_x); - - ShadePixel<texture_enable, raw_texture_enable, transparency_enable, false>( - static_cast<u32>(x), static_cast<u32>(y), r, g, b, texcoord_x, texcoord_y); - } - } +void GPU_SW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) +{ + GPUBackendCopyVRAMCommand* cmd = m_backend.NewCopyVRAMCommand(); + FillBackendCommandParameters(cmd); + cmd->src_x = static_cast<u16>(src_x); + cmd->src_y = static_cast<u16>(src_y); + cmd->dst_x = static_cast<u16>(dst_x); + cmd->dst_y = static_cast<u16>(dst_y); + cmd->width = static_cast<u16>(width); + cmd->height = static_cast<u16>(height); + m_backend.PushCommand(cmd); } std::unique_ptr<GPU> GPU::CreateSoftwareRenderer() { return std::make_unique<GPU_SW>(); } - -////////////////////////////////////////////////////////////////////////// -// Polygon and line rasterization ported from Mednafen -////////////////////////////////////////////////////////////////////////// - -#define COORD_FBS 12 -#define COORD_MF_INT(n) ((n) << COORD_FBS) -#define COORD_POST_PADDING 12 - -static ALWAYS_INLINE_RELEASE s64 MakePolyXFP(s32 x) -{ - return ((u64)x << 32) + ((1ULL << 32) - (1 << 11)); -} - -static ALWAYS_INLINE_RELEASE s64 MakePolyXFPStep(s32 dx, s32 dy) -{ - s64 ret; - s64 dx_ex = (u64)dx << 32; - - if (dx_ex < 0) - dx_ex -= dy - 1; - - if (dx_ex > 0) - dx_ex += dy - 1; - - ret = dx_ex / dy; - - return (ret); -} - -static ALWAYS_INLINE_RELEASE s32 GetPolyXFP_Int(s64 xfp) -{ - return (xfp >> 32); -} - -template<bool shading_enable, bool texture_enable> -bool ALWAYS_INLINE_RELEASE GPU_SW::CalcIDeltas(i_deltas& idl, const SWVertex* A, const SWVertex* B, const SWVertex* C) -{ -#define CALCIS(x, y) (((B->x - A->x) * (C->y - B->y)) - ((C->x - B->x) * (B->y - A->y))) - - s32 denom = CALCIS(x, y); - - if (!denom) - return false; - - if constexpr (shading_enable) - { - idl.dr_dx = (u32)(CALCIS(r, y) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING; - idl.dr_dy = (u32)(CALCIS(x, r) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING; - - idl.dg_dx = (u32)(CALCIS(g, y) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING; - idl.dg_dy = (u32)(CALCIS(x, g) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING; - - idl.db_dx = (u32)(CALCIS(b, y) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING; - idl.db_dy = (u32)(CALCIS(x, b) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING; - } - - if constexpr (texture_enable) - { - idl.du_dx = (u32)(CALCIS(u, y) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING; - idl.du_dy = (u32)(CALCIS(x, u) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING; - - idl.dv_dx = (u32)(CALCIS(v, y) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING; - idl.dv_dy = (u32)(CALCIS(x, v) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING; - } - - return true; - -#undef CALCIS -} - -template<bool shading_enable, bool texture_enable> -void ALWAYS_INLINE_RELEASE GPU_SW::AddIDeltas_DX(i_group& ig, const i_deltas& idl, u32 count /*= 1*/) -{ - if constexpr (shading_enable) - { - ig.r += idl.dr_dx * count; - ig.g += idl.dg_dx * count; - ig.b += idl.db_dx * count; - } - - if constexpr (texture_enable) - { - ig.u += idl.du_dx * count; - ig.v += idl.dv_dx * count; - } -} - -template<bool shading_enable, bool texture_enable> -void ALWAYS_INLINE_RELEASE GPU_SW::AddIDeltas_DY(i_group& ig, const i_deltas& idl, u32 count /*= 1*/) -{ - if constexpr (shading_enable) - { - ig.r += idl.dr_dy * count; - ig.g += idl.dg_dy * count; - ig.b += idl.db_dy * count; - } - - if constexpr (texture_enable) - { - ig.u += idl.du_dy * count; - ig.v += idl.dv_dy * count; - } -} - -template<bool shading_enable, bool texture_enable, bool raw_texture_enable, bool transparency_enable, - bool dithering_enable> -void GPU_SW::DrawSpan(s32 y, s32 x_start, s32 x_bound, i_group ig, const i_deltas& idl) -{ - if (IsInterlacedRenderingEnabled() && GetActiveLineLSB() == (static_cast<u32>(y) & 1u)) - return; - - s32 x_ig_adjust = x_start; - s32 w = x_bound - x_start; - s32 x = TruncateGPUVertexPosition(x_start); - - if (x < static_cast<s32>(m_drawing_area.left)) - { - s32 delta = static_cast<s32>(m_drawing_area.left) - x; - x_ig_adjust += delta; - x += delta; - w -= delta; - } - - if ((x + w) > (static_cast<s32>(m_drawing_area.right) + 1)) - w = static_cast<s32>(m_drawing_area.right) + 1 - x; - - if (w <= 0) - return; - - AddIDeltas_DX<shading_enable, texture_enable>(ig, idl, x_ig_adjust); - AddIDeltas_DY<shading_enable, texture_enable>(ig, idl, y); - - do - { - const u32 r = ig.r >> (COORD_FBS + COORD_POST_PADDING); - const u32 g = ig.g >> (COORD_FBS + COORD_POST_PADDING); - const u32 b = ig.b >> (COORD_FBS + COORD_POST_PADDING); - const u32 u = ig.u >> (COORD_FBS + COORD_POST_PADDING); - const u32 v = ig.v >> (COORD_FBS + COORD_POST_PADDING); - - ShadePixel<texture_enable, raw_texture_enable, transparency_enable, dithering_enable>( - static_cast<u32>(x), static_cast<u32>(y), Truncate8(r), Truncate8(g), Truncate8(b), Truncate8(u), Truncate8(v)); - - x++; - AddIDeltas_DX<shading_enable, texture_enable>(ig, idl); - } while (--w > 0); -} - -template<bool shading_enable, bool texture_enable, bool raw_texture_enable, bool transparency_enable, - bool dithering_enable> -void GPU_SW::DrawTriangle(const SWVertex* v0, const SWVertex* v1, const SWVertex* v2) -{ - u32 core_vertex; - { - u32 cvtemp = 0; - - if (v1->x <= v0->x) - { - if (v2->x <= v1->x) - cvtemp = (1 << 2); - else - cvtemp = (1 << 1); - } - else if (v2->x < v0->x) - cvtemp = (1 << 2); - else - cvtemp = (1 << 0); - - if (v2->y < v1->y) - { - std::swap(v2, v1); - cvtemp = ((cvtemp >> 1) & 0x2) | ((cvtemp << 1) & 0x4) | (cvtemp & 0x1); - } - - if (v1->y < v0->y) - { - std::swap(v1, v0); - cvtemp = ((cvtemp >> 1) & 0x1) | ((cvtemp << 1) & 0x2) | (cvtemp & 0x4); - } - - if (v2->y < v1->y) - { - std::swap(v2, v1); - cvtemp = ((cvtemp >> 1) & 0x2) | ((cvtemp << 1) & 0x4) | (cvtemp & 0x1); - } - - core_vertex = cvtemp >> 1; - } - - if (v0->y == v2->y) - return; - - if (static_cast<u32>(std::abs(v2->x - v0->x)) >= MAX_PRIMITIVE_WIDTH || - static_cast<u32>(std::abs(v2->x - v1->x)) >= MAX_PRIMITIVE_WIDTH || - static_cast<u32>(std::abs(v1->x - v0->x)) >= MAX_PRIMITIVE_WIDTH || - static_cast<u32>(v2->y - v0->y) >= MAX_PRIMITIVE_HEIGHT) - { - return; - } - - AddDrawTriangleTicks(v0->x, v0->y, v1->x, v1->y, v2->x, v2->y, shading_enable, texture_enable, transparency_enable); - - s64 base_coord = MakePolyXFP(v0->x); - s64 base_step = MakePolyXFPStep((v2->x - v0->x), (v2->y - v0->y)); - s64 bound_coord_us; - s64 bound_coord_ls; - bool right_facing; - - if (v1->y == v0->y) - { - bound_coord_us = 0; - right_facing = (bool)(v1->x > v0->x); - } - else - { - bound_coord_us = MakePolyXFPStep((v1->x - v0->x), (v1->y - v0->y)); - right_facing = (bool)(bound_coord_us > base_step); - } - - if (v2->y == v1->y) - bound_coord_ls = 0; - else - bound_coord_ls = MakePolyXFPStep((v2->x - v1->x), (v2->y - v1->y)); - - i_deltas idl; - if (!CalcIDeltas<shading_enable, texture_enable>(idl, v0, v1, v2)) - return; - - const SWVertex* vertices[3] = {v0, v1, v2}; - - i_group ig; - if constexpr (texture_enable) - { - ig.u = (COORD_MF_INT(vertices[core_vertex]->u) + (1 << (COORD_FBS - 1))) << COORD_POST_PADDING; - ig.v = (COORD_MF_INT(vertices[core_vertex]->v) + (1 << (COORD_FBS - 1))) << COORD_POST_PADDING; - } - - ig.r = (COORD_MF_INT(vertices[core_vertex]->r) + (1 << (COORD_FBS - 1))) << COORD_POST_PADDING; - ig.g = (COORD_MF_INT(vertices[core_vertex]->g) + (1 << (COORD_FBS - 1))) << COORD_POST_PADDING; - ig.b = (COORD_MF_INT(vertices[core_vertex]->b) + (1 << (COORD_FBS - 1))) << COORD_POST_PADDING; - - AddIDeltas_DX<shading_enable, texture_enable>(ig, idl, -vertices[core_vertex]->x); - AddIDeltas_DY<shading_enable, texture_enable>(ig, idl, -vertices[core_vertex]->y); - - struct TriangleHalf - { - u64 x_coord[2]; - u64 x_step[2]; - - s32 y_coord; - s32 y_bound; - - bool dec_mode; - } tripart[2]; - - u32 vo = 0; - u32 vp = 0; - if (core_vertex != 0) - vo = 1; - if (core_vertex == 2) - vp = 3; - - { - TriangleHalf* tp = &tripart[vo]; - tp->y_coord = vertices[0 ^ vo]->y; - tp->y_bound = vertices[1 ^ vo]->y; - tp->x_coord[right_facing] = MakePolyXFP(vertices[0 ^ vo]->x); - tp->x_step[right_facing] = bound_coord_us; - tp->x_coord[!right_facing] = base_coord + ((vertices[vo]->y - vertices[0]->y) * base_step); - tp->x_step[!right_facing] = base_step; - tp->dec_mode = vo; - } - - { - TriangleHalf* tp = &tripart[vo ^ 1]; - tp->y_coord = vertices[1 ^ vp]->y; - tp->y_bound = vertices[2 ^ vp]->y; - tp->x_coord[right_facing] = MakePolyXFP(vertices[1 ^ vp]->x); - tp->x_step[right_facing] = bound_coord_ls; - tp->x_coord[!right_facing] = - base_coord + ((vertices[1 ^ vp]->y - vertices[0]->y) * - base_step); // base_coord + ((vertices[1].y - vertices[0].y) * base_step); - tp->x_step[!right_facing] = base_step; - tp->dec_mode = vp; - } - - for (u32 i = 0; i < 2; i++) - { - s32 yi = tripart[i].y_coord; - s32 yb = tripart[i].y_bound; - - u64 lc = tripart[i].x_coord[0]; - u64 ls = tripart[i].x_step[0]; - - u64 rc = tripart[i].x_coord[1]; - u64 rs = tripart[i].x_step[1]; - - if (tripart[i].dec_mode) - { - while (yi > yb) - { - yi--; - lc -= ls; - rc -= rs; - - s32 y = TruncateGPUVertexPosition(yi); - - if (y < static_cast<s32>(m_drawing_area.top)) - break; - - if (y > static_cast<s32>(m_drawing_area.bottom)) - continue; - - DrawSpan<shading_enable, texture_enable, raw_texture_enable, transparency_enable, dithering_enable>( - yi, GetPolyXFP_Int(lc), GetPolyXFP_Int(rc), ig, idl); - } - } - else - { - while (yi < yb) - { - s32 y = TruncateGPUVertexPosition(yi); - - if (y > static_cast<s32>(m_drawing_area.bottom)) - break; - - if (y >= static_cast<s32>(m_drawing_area.top)) - { - - DrawSpan<shading_enable, texture_enable, raw_texture_enable, transparency_enable, dithering_enable>( - yi, GetPolyXFP_Int(lc), GetPolyXFP_Int(rc), ig, idl); - } - - yi++; - lc += ls; - rc += rs; - } - } - } -} - -GPU_SW::DrawTriangleFunction GPU_SW::GetDrawTriangleFunction(bool shading_enable, bool texture_enable, - bool raw_texture_enable, bool transparency_enable, - bool dithering_enable) -{ -#define F(SHADING, TEXTURE, RAW_TEXTURE, TRANSPARENCY, DITHERING) \ - &GPU_SW::DrawTriangle<SHADING, TEXTURE, RAW_TEXTURE, TRANSPARENCY, DITHERING> - - static constexpr DrawTriangleFunction funcs[2][2][2][2][2] = { - {{{{F(false, false, false, false, false), F(false, false, false, false, true)}, - {F(false, false, false, true, false), F(false, false, false, true, true)}}, - {{F(false, false, true, false, false), F(false, false, true, false, true)}, - {F(false, false, true, true, false), F(false, false, true, true, true)}}}, - {{{F(false, true, false, false, false), F(false, true, false, false, true)}, - {F(false, true, false, true, false), F(false, true, false, true, true)}}, - {{F(false, true, true, false, false), F(false, true, true, false, true)}, - {F(false, true, true, true, false), F(false, true, true, true, true)}}}}, - {{{{F(true, false, false, false, false), F(true, false, false, false, true)}, - {F(true, false, false, true, false), F(true, false, false, true, true)}}, - {{F(true, false, true, false, false), F(true, false, true, false, true)}, - {F(true, false, true, true, false), F(true, false, true, true, true)}}}, - {{{F(true, true, false, false, false), F(true, true, false, false, true)}, - {F(true, true, false, true, false), F(true, true, false, true, true)}}, - {{F(true, true, true, false, false), F(true, true, true, false, true)}, - {F(true, true, true, true, false), F(true, true, true, true, true)}}}}}; - -#undef F - - return funcs[u8(shading_enable)][u8(texture_enable)][u8(raw_texture_enable)][u8(transparency_enable)] - [u8(dithering_enable)]; -} - -enum -{ - Line_XY_FractBits = 32 -}; -enum -{ - Line_RGB_FractBits = 12 -}; - -struct line_fxp_coord -{ - u64 x, y; - u32 r, g, b; -}; - -struct line_fxp_step -{ - s64 dx_dk, dy_dk; - s32 dr_dk, dg_dk, db_dk; -}; - -static ALWAYS_INLINE_RELEASE s64 LineDivide(s64 delta, s32 dk) -{ - delta = (u64)delta << Line_XY_FractBits; - - if (delta < 0) - delta -= dk - 1; - if (delta > 0) - delta += dk - 1; - - return (delta / dk); -} - -template<bool shading_enable, bool transparency_enable, bool dithering_enable> -void GPU_SW::DrawLine(const SWVertex* p0, const SWVertex* p1) -{ - const s32 i_dx = std::abs(p1->x - p0->x); - const s32 i_dy = std::abs(p1->y - p0->y); - const s32 k = (i_dx > i_dy) ? i_dx : i_dy; - if (i_dx >= MAX_PRIMITIVE_WIDTH || i_dy >= MAX_PRIMITIVE_HEIGHT) - return; - - { - // TODO: Move to base class - const u32 clip_left = - static_cast<u32>(std::clamp<s32>(std::min(p0->x, p1->x), m_drawing_area.left, m_drawing_area.left)); - const u32 clip_right = - static_cast<u32>(std::clamp<s32>(std::max(p0->x, p1->x), m_drawing_area.left, m_drawing_area.right)) + 1u; - const u32 clip_top = - static_cast<u32>(std::clamp<s32>(std::min(p0->y, p1->y), m_drawing_area.top, m_drawing_area.bottom)); - const u32 clip_bottom = - static_cast<u32>(std::clamp<s32>(std::max(p0->y, p1->y), m_drawing_area.top, m_drawing_area.bottom)) + 1u; - - AddDrawLineTicks(clip_right - clip_left, clip_bottom - clip_top, shading_enable); - } - - if (p0->x >= p1->x && k > 0) - std::swap(p0, p1); - - line_fxp_step step; - if (k == 0) - { - step.dx_dk = 0; - step.dy_dk = 0; - - if constexpr (shading_enable) - { - step.dr_dk = 0; - step.dg_dk = 0; - step.db_dk = 0; - } - } - else - { - step.dx_dk = LineDivide(p1->x - p0->x, k); - step.dy_dk = LineDivide(p1->y - p0->y, k); - - if constexpr (shading_enable) - { - step.dr_dk = (s32)((u32)(p1->r - p0->r) << Line_RGB_FractBits) / k; - step.dg_dk = (s32)((u32)(p1->g - p0->g) << Line_RGB_FractBits) / k; - step.db_dk = (s32)((u32)(p1->b - p0->b) << Line_RGB_FractBits) / k; - } - } - - line_fxp_coord cur_point; - cur_point.x = ((u64)p0->x << Line_XY_FractBits) | (1ULL << (Line_XY_FractBits - 1)); - cur_point.y = ((u64)p0->y << Line_XY_FractBits) | (1ULL << (Line_XY_FractBits - 1)); - - cur_point.x -= 1024; - - if (step.dy_dk < 0) - cur_point.y -= 1024; - - if constexpr (shading_enable) - { - cur_point.r = (p0->r << Line_RGB_FractBits) | (1 << (Line_RGB_FractBits - 1)); - cur_point.g = (p0->g << Line_RGB_FractBits) | (1 << (Line_RGB_FractBits - 1)); - cur_point.b = (p0->b << Line_RGB_FractBits) | (1 << (Line_RGB_FractBits - 1)); - } - - for (s32 i = 0; i <= k; i++) - { - // Sign extension is not necessary here for x and y, due to the maximum values that ClipX1 and ClipY1 can contain. - const s32 x = (cur_point.x >> Line_XY_FractBits) & 2047; - const s32 y = (cur_point.y >> Line_XY_FractBits) & 2047; - - if (!IsInterlacedRenderingEnabled() || GetActiveLineLSB() != (static_cast<u32>(y) & 1u)) - { - const u8 r = shading_enable ? static_cast<u8>(cur_point.r >> Line_RGB_FractBits) : p0->r; - const u8 g = shading_enable ? static_cast<u8>(cur_point.g >> Line_RGB_FractBits) : p0->g; - const u8 b = shading_enable ? static_cast<u8>(cur_point.b >> Line_RGB_FractBits) : p0->b; - - if (x >= static_cast<s32>(m_drawing_area.left) && x <= static_cast<s32>(m_drawing_area.right) && - y >= static_cast<s32>(m_drawing_area.top) && y <= static_cast<s32>(m_drawing_area.bottom)) - { - ShadePixel<false, false, transparency_enable, dithering_enable>(static_cast<u32>(x), static_cast<u32>(y), r, g, - b, 0, 0); - } - } - - cur_point.x += step.dx_dk; - cur_point.y += step.dy_dk; - - if constexpr (shading_enable) - { - cur_point.r += step.dr_dk; - cur_point.g += step.dg_dk; - cur_point.b += step.db_dk; - } - } -} - -GPU_SW::DrawLineFunction GPU_SW::GetDrawLineFunction(bool shading_enable, bool transparency_enable, - bool dithering_enable) -{ -#define F(SHADING, TRANSPARENCY, DITHERING) &GPU_SW::DrawLine<SHADING, TRANSPARENCY, DITHERING> - - static constexpr DrawLineFunction funcs[2][2][2] = { - {{F(false, false, false), F(false, false, true)}, {F(false, true, false), F(false, true, true)}}, - {{F(true, false, false), F(true, false, true)}, {F(true, true, false), F(true, true, true)}}}; - -#undef F - - return funcs[u8(shading_enable)][u8(transparency_enable)][u8(dithering_enable)]; -} - -GPU_SW::DrawRectangleFunction GPU_SW::GetDrawRectangleFunction(bool texture_enable, bool raw_texture_enable, - bool transparency_enable) -{ -#define F(TEXTURE, RAW_TEXTURE, TRANSPARENCY) &GPU_SW::DrawRectangle<TEXTURE, RAW_TEXTURE, TRANSPARENCY> - - static constexpr DrawRectangleFunction funcs[2][2][2] = { - {{F(false, false, false), F(false, false, true)}, {F(false, true, false), F(false, true, true)}}, - {{F(true, false, false), F(true, false, true)}, {F(true, true, false), F(true, true, true)}}}; - -#undef F - - return funcs[u8(texture_enable)][u8(raw_texture_enable)][u8(transparency_enable)]; -} diff --git a/src/core/gpu_sw.h b/src/core/gpu_sw.h index 0b9326db4..f9b910154 100644 --- a/src/core/gpu_sw.h +++ b/src/core/gpu_sw.h @@ -1,6 +1,7 @@ #pragma once #include "common/heap_array.h" #include "gpu.h" +#include "gpu_sw_backend.h" #include "host_display.h" #include <array> #include <memory> @@ -18,37 +19,14 @@ public: bool Initialize(HostDisplay* host_display) override; void Reset() override; - - ALWAYS_INLINE_RELEASE u16 GetPixel(const u32 x, const u32 y) const { return m_vram[VRAM_WIDTH * y + x]; } - ALWAYS_INLINE_RELEASE const u16* GetPixelPtr(const u32 x, const u32 y) const { return &m_vram[VRAM_WIDTH * y + x]; } - ALWAYS_INLINE_RELEASE u16* GetPixelPtr(const u32 x, const u32 y) { return &m_vram[VRAM_WIDTH * y + x]; } - ALWAYS_INLINE_RELEASE void SetPixel(const u32 x, const u32 y, const u16 value) { m_vram[VRAM_WIDTH * y + x] = value; } - - // this is actually (31 * 255) >> 4) == 494, but to simplify addressing we use the next power of two (512) - static constexpr u32 DITHER_LUT_SIZE = 512; - using DitherLUT = std::array<std::array<std::array<u8, 512>, DITHER_MATRIX_SIZE>, DITHER_MATRIX_SIZE>; - static constexpr DitherLUT ComputeDitherLUT(); + void UpdateSettings() override; protected: - struct SWVertex - { - s32 x, y; - u8 r, g, b; - u8 u, v; + void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override; + void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override; + void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) override; + void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; - ALWAYS_INLINE void SetPosition(GPUVertexPosition p, s32 offset_x, s32 offset_y) - { - x = TruncateGPUVertexPosition(offset_x + p.x); - y = TruncateGPUVertexPosition(offset_y + p.y); - } - - ALWAYS_INLINE void SetColorRGB24(u32 color) { std::tie(r, g, b) = UnpackColorRGB24(color); } - ALWAYS_INLINE void SetTexcoord(u16 value) { std::tie(u, v) = UnpackTexcoord(value); } - }; - - ////////////////////////////////////////////////////////////////////////// - // Scanout - ////////////////////////////////////////////////////////////////////////// template<HostDisplayPixelFormat display_format> void CopyOut15Bit(u32 src_x, u32 src_y, u32 width, u32 height, u32 field, bool interlaced, bool interleaved); void CopyOut15Bit(HostDisplayPixelFormat display_format, u32 src_x, u32 src_y, u32 width, u32 height, u32 field, @@ -63,71 +41,14 @@ protected: void ClearDisplay() override; void UpdateDisplay() override; - ////////////////////////////////////////////////////////////////////////// - // Rasterization - ////////////////////////////////////////////////////////////////////////// - void DispatchRenderCommand() override; - template<bool texture_enable, bool raw_texture_enable, bool transparency_enable, bool dithering_enable> - void ShadePixel(u32 x, u32 y, u8 color_r, u8 color_g, u8 color_b, u8 texcoord_x, u8 texcoord_y); + void FillBackendCommandParameters(GPUBackendCommand* cmd); + void FillDrawCommand(GPUBackendDrawCommand* cmd, GPURenderCommand rc); - template<bool texture_enable, bool raw_texture_enable, bool transparency_enable> - void DrawRectangle(s32 origin_x, s32 origin_y, u32 width, u32 height, u8 r, u8 g, u8 b, u8 origin_texcoord_x, - u8 origin_texcoord_y); - - using DrawRectangleFunction = void (GPU_SW::*)(s32 origin_x, s32 origin_y, u32 width, u32 height, u8 r, u8 g, u8 b, - u8 origin_texcoord_x, u8 origin_texcoord_y); - DrawRectangleFunction GetDrawRectangleFunction(bool texture_enable, bool raw_texture_enable, - bool transparency_enable); - - ////////////////////////////////////////////////////////////////////////// - // Polygon and line rasterization ported from Mednafen - ////////////////////////////////////////////////////////////////////////// - struct i_deltas - { - u32 du_dx, dv_dx; - u32 dr_dx, dg_dx, db_dx; - - u32 du_dy, dv_dy; - u32 dr_dy, dg_dy, db_dy; - }; - - struct i_group - { - u32 u, v; - u32 r, g, b; - }; - - template<bool shading_enable, bool texture_enable> - bool CalcIDeltas(i_deltas& idl, const SWVertex* A, const SWVertex* B, const SWVertex* C); - - template<bool shading_enable, bool texture_enable> - void AddIDeltas_DX(i_group& ig, const i_deltas& idl, u32 count = 1); - - template<bool shading_enable, bool texture_enable> - void AddIDeltas_DY(i_group& ig, const i_deltas& idl, u32 count = 1); - - template<bool shading_enable, bool texture_enable, bool raw_texture_enable, bool transparency_enable, - bool dithering_enable> - void DrawSpan(s32 y, s32 x_start, s32 x_bound, i_group ig, const i_deltas& idl); - - template<bool shading_enable, bool texture_enable, bool raw_texture_enable, bool transparency_enable, - bool dithering_enable> - void DrawTriangle(const SWVertex* v0, const SWVertex* v1, const SWVertex* v2); - - using DrawTriangleFunction = void (GPU_SW::*)(const SWVertex* v0, const SWVertex* v1, const SWVertex* v2); - DrawTriangleFunction GetDrawTriangleFunction(bool shading_enable, bool texture_enable, bool raw_texture_enable, - bool transparency_enable, bool dithering_enable); - - template<bool shading_enable, bool transparency_enable, bool dithering_enable> - void DrawLine(const SWVertex* p0, const SWVertex* p1); - - using DrawLineFunction = void (GPU_SW::*)(const SWVertex* p0, const SWVertex* p1); - DrawLineFunction GetDrawLineFunction(bool shading_enable, bool transparency_enable, bool dithering_enable); - - std::array<u16, VRAM_WIDTH * VRAM_HEIGHT> m_vram; HeapArray<u8, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u32)> m_display_texture_buffer; HostDisplayPixelFormat m_16bit_display_format = HostDisplayPixelFormat::RGB565; HostDisplayPixelFormat m_24bit_display_format = HostDisplayPixelFormat::RGBA8; + + GPU_SW_Backend m_backend; }; diff --git a/src/core/gpu_sw_backend.cpp b/src/core/gpu_sw_backend.cpp new file mode 100644 index 000000000..fc4d2d2f6 --- /dev/null +++ b/src/core/gpu_sw_backend.cpp @@ -0,0 +1,928 @@ +#include "gpu_sw_backend.h" +#include "common/assert.h" +#include "common/log.h" +#include "gpu_sw_backend.h" +#include "host_display.h" +#include "system.h" +#include <algorithm> +Log_SetChannel(GPU_SW_Backend); + +GPU_SW_Backend::GPU_SW_Backend() : GPUBackend() +{ + m_vram.fill(0); + m_vram_ptr = m_vram.data(); +} + +GPU_SW_Backend::~GPU_SW_Backend() = default; + +bool GPU_SW_Backend::Initialize() +{ + return GPUBackend::Initialize(); +} + +void GPU_SW_Backend::Reset() +{ + GPUBackend::Reset(); + + m_vram.fill(0); +} + +void GPU_SW_Backend::DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) +{ + const GPURenderCommand rc{cmd->rc.bits}; + const bool dithering_enable = rc.IsDitheringEnabled() && cmd->draw_mode.dither_enable; + + const DrawTriangleFunction DrawFunction = GetDrawTriangleFunction( + rc.shading_enable, rc.texture_enable, rc.raw_texture_enable, rc.transparency_enable, dithering_enable); + + (this->*DrawFunction)(cmd, &cmd->vertices[0], &cmd->vertices[1], &cmd->vertices[2]); + if (rc.quad_polygon) + (this->*DrawFunction)(cmd, &cmd->vertices[2], &cmd->vertices[1], &cmd->vertices[3]); +} + +void GPU_SW_Backend::DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) +{ + const GPURenderCommand rc{cmd->rc.bits}; + const bool dithering_enable = rc.IsDitheringEnabled() && cmd->draw_mode.dither_enable; + + const DrawRectangleFunction DrawFunction = + GetDrawRectangleFunction(rc.texture_enable, rc.raw_texture_enable, rc.transparency_enable); + + (this->*DrawFunction)(cmd); +} + +void GPU_SW_Backend::DrawLine(const GPUBackendDrawLineCommand* cmd) +{ + const DrawLineFunction DrawFunction = + GetDrawLineFunction(cmd->rc.shading_enable, cmd->rc.transparency_enable, cmd->IsDitheringEnabled()); + + for (u16 i = 1; i < cmd->num_vertices; i++) + (this->*DrawFunction)(cmd, &cmd->vertices[i - 1], &cmd->vertices[i]); +} + +constexpr GPU_SW_Backend::DitherLUT GPU_SW_Backend::ComputeDitherLUT() +{ + DitherLUT lut = {}; + for (u32 i = 0; i < DITHER_MATRIX_SIZE; i++) + { + for (u32 j = 0; j < DITHER_MATRIX_SIZE; j++) + { + for (s32 value = 0; value < DITHER_LUT_SIZE; value++) + { + const s32 dithered_value = (value + DITHER_MATRIX[i][j]) >> 3; + lut[i][j][value] = static_cast<u8>((dithered_value < 0) ? 0 : ((dithered_value > 31) ? 31 : dithered_value)); + } + } + } + return lut; +} + +static constexpr GPU_SW_Backend::DitherLUT s_dither_lut = GPU_SW_Backend::ComputeDitherLUT(); + +template<bool texture_enable, bool raw_texture_enable, bool transparency_enable, bool dithering_enable> +void ALWAYS_INLINE_RELEASE GPU_SW_Backend::ShadePixel(const GPUBackendDrawCommand* cmd, u32 x, u32 y, u8 color_r, + u8 color_g, u8 color_b, u8 texcoord_x, u8 texcoord_y) +{ + VRAMPixel color; + bool transparent; + if constexpr (texture_enable) + { + // Apply texture window + // TODO: Precompute the second half + texcoord_x = (texcoord_x & cmd->window.and_x) | cmd->window.or_x; + texcoord_y = (texcoord_y & cmd->window.and_y) | cmd->window.or_y; + + VRAMPixel texture_color; + switch (cmd->draw_mode.texture_mode) + { + case GPUTextureMode::Palette4Bit: + { + const u16 palette_value = + GetPixel((cmd->draw_mode.GetTexturePageBaseX() + ZeroExtend32(texcoord_x / 4)) % VRAM_WIDTH, + (cmd->draw_mode.GetTexturePageBaseY() + ZeroExtend32(texcoord_y)) % VRAM_HEIGHT); + const u16 palette_index = (palette_value >> ((texcoord_x % 4) * 4)) & 0x0Fu; + + const u32 px = (cmd->palette.GetXBase() + ZeroExtend32(palette_index)) % VRAM_WIDTH; + const u32 py = cmd->palette.GetYBase(); + texture_color.bits = + GetPixel((cmd->palette.GetXBase() + ZeroExtend32(palette_index)) % VRAM_WIDTH, cmd->palette.GetYBase()); + } + break; + + case GPUTextureMode::Palette8Bit: + { + const u16 palette_value = + GetPixel((cmd->draw_mode.GetTexturePageBaseX() + ZeroExtend32(texcoord_x / 2)) % VRAM_WIDTH, + (cmd->draw_mode.GetTexturePageBaseY() + ZeroExtend32(texcoord_y)) % VRAM_HEIGHT); + const u16 palette_index = (palette_value >> ((texcoord_x % 2) * 8)) & 0xFFu; + texture_color.bits = + GetPixel((cmd->palette.GetXBase() + ZeroExtend32(palette_index)) % VRAM_WIDTH, cmd->palette.GetYBase()); + } + break; + + default: + { + texture_color.bits = GetPixel((cmd->draw_mode.GetTexturePageBaseX() + ZeroExtend32(texcoord_x)) % VRAM_WIDTH, + (cmd->draw_mode.GetTexturePageBaseY() + ZeroExtend32(texcoord_y)) % VRAM_HEIGHT); + } + break; + } + + if (texture_color.bits == 0) + return; + + transparent = texture_color.c; + + if constexpr (raw_texture_enable) + { + color.bits = texture_color.bits; + } + else + { + const u32 dither_y = (dithering_enable) ? (y & 3u) : 2u; + const u32 dither_x = (dithering_enable) ? (x & 3u) : 3u; + + color.bits = (ZeroExtend16(s_dither_lut[dither_y][dither_x][(u16(texture_color.r) * u16(color_r)) >> 4]) << 0) | + (ZeroExtend16(s_dither_lut[dither_y][dither_x][(u16(texture_color.g) * u16(color_g)) >> 4]) << 5) | + (ZeroExtend16(s_dither_lut[dither_y][dither_x][(u16(texture_color.b) * u16(color_b)) >> 4]) << 10) | + (texture_color.bits & 0x8000u); + } + } + else + { + transparent = true; + + const u32 dither_y = (dithering_enable) ? (y & 3u) : 2u; + const u32 dither_x = (dithering_enable) ? (x & 3u) : 3u; + + color.bits = (ZeroExtend16(s_dither_lut[dither_y][dither_x][color_r]) << 0) | + (ZeroExtend16(s_dither_lut[dither_y][dither_x][color_g]) << 5) | + (ZeroExtend16(s_dither_lut[dither_y][dither_x][color_b]) << 10); + } + + const VRAMPixel bg_color{GetPixel(static_cast<u32>(x), static_cast<u32>(y))}; + if constexpr (transparency_enable) + { + if (transparent) + { +#define BLEND_AVERAGE(bg, fg) Truncate8(std::min<u32>((ZeroExtend32(bg) / 2) + (ZeroExtend32(fg) / 2), 0x1F)) +#define BLEND_ADD(bg, fg) Truncate8(std::min<u32>(ZeroExtend32(bg) + ZeroExtend32(fg), 0x1F)) +#define BLEND_SUBTRACT(bg, fg) Truncate8((bg > fg) ? ((bg) - (fg)) : 0) +#define BLEND_QUARTER(bg, fg) Truncate8(std::min<u32>(ZeroExtend32(bg) + ZeroExtend32(fg / 4), 0x1F)) + +#define BLEND_RGB(func) \ + color.Set(func(bg_color.r.GetValue(), color.r.GetValue()), func(bg_color.g.GetValue(), color.g.GetValue()), \ + func(bg_color.b.GetValue(), color.b.GetValue()), color.c.GetValue()) + + switch (cmd->draw_mode.transparency_mode) + { + case GPUTransparencyMode::HalfBackgroundPlusHalfForeground: + BLEND_RGB(BLEND_AVERAGE); + break; + case GPUTransparencyMode::BackgroundPlusForeground: + BLEND_RGB(BLEND_ADD); + break; + case GPUTransparencyMode::BackgroundMinusForeground: + BLEND_RGB(BLEND_SUBTRACT); + break; + case GPUTransparencyMode::BackgroundPlusQuarterForeground: + BLEND_RGB(BLEND_QUARTER); + break; + default: + break; + } + +#undef BLEND_RGB + +#undef BLEND_QUARTER +#undef BLEND_SUBTRACT +#undef BLEND_ADD +#undef BLEND_AVERAGE + } + } + else + { + UNREFERENCED_VARIABLE(transparent); + } + + const u16 mask_and = cmd->params.GetMaskAND(); + if ((bg_color.bits & mask_and) != 0) + return; + + SetPixel(static_cast<u32>(x), static_cast<u32>(y), color.bits | cmd->params.GetMaskOR()); +} + +template<bool texture_enable, bool raw_texture_enable, bool transparency_enable> +void GPU_SW_Backend::DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) +{ + const s32 origin_x = cmd->x; + const s32 origin_y = cmd->y; + const auto [r, g, b] = UnpackColorRGB24(cmd->color); + const auto [origin_texcoord_x, origin_texcoord_y] = UnpackTexcoord(cmd->texcoord); + + for (u32 offset_y = 0; offset_y < cmd->height; offset_y++) + { + const s32 y = origin_y + static_cast<s32>(offset_y); + if (y < static_cast<s32>(m_drawing_area.top) || y > static_cast<s32>(m_drawing_area.bottom) || + (cmd->params.interlaced_rendering && cmd->params.active_line_lsb == (Truncate8(static_cast<u32>(y)) & 1u))) + { + continue; + } + + const u8 texcoord_y = Truncate8(ZeroExtend32(origin_texcoord_y) + offset_y); + + for (u32 offset_x = 0; offset_x < cmd->width; offset_x++) + { + const s32 x = origin_x + static_cast<s32>(offset_x); + if (x < static_cast<s32>(m_drawing_area.left) || x > static_cast<s32>(m_drawing_area.right)) + continue; + + const u8 texcoord_x = Truncate8(ZeroExtend32(origin_texcoord_x) + offset_x); + + ShadePixel<texture_enable, raw_texture_enable, transparency_enable, false>( + cmd, static_cast<u32>(x), static_cast<u32>(y), r, g, b, texcoord_x, texcoord_y); + } + } +} + +////////////////////////////////////////////////////////////////////////// +// Polygon and line rasterization ported from Mednafen +////////////////////////////////////////////////////////////////////////// + +#define COORD_FBS 12 +#define COORD_MF_INT(n) ((n) << COORD_FBS) +#define COORD_POST_PADDING 12 + +static ALWAYS_INLINE_RELEASE s64 MakePolyXFP(s32 x) +{ + return ((u64)x << 32) + ((1ULL << 32) - (1 << 11)); +} + +static ALWAYS_INLINE_RELEASE s64 MakePolyXFPStep(s32 dx, s32 dy) +{ + s64 ret; + s64 dx_ex = (u64)dx << 32; + + if (dx_ex < 0) + dx_ex -= dy - 1; + + if (dx_ex > 0) + dx_ex += dy - 1; + + ret = dx_ex / dy; + + return (ret); +} + +static ALWAYS_INLINE_RELEASE s32 GetPolyXFP_Int(s64 xfp) +{ + return (xfp >> 32); +} + +template<bool shading_enable, bool texture_enable> +bool ALWAYS_INLINE_RELEASE GPU_SW_Backend::CalcIDeltas(i_deltas& idl, const GPUBackendDrawPolygonCommand::Vertex* A, + const GPUBackendDrawPolygonCommand::Vertex* B, + const GPUBackendDrawPolygonCommand::Vertex* C) +{ +#define CALCIS(x, y) (((B->x - A->x) * (C->y - B->y)) - ((C->x - B->x) * (B->y - A->y))) + + s32 denom = CALCIS(x, y); + + if (!denom) + return false; + + if constexpr (shading_enable) + { + idl.dr_dx = (u32)(CALCIS(r, y) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING; + idl.dr_dy = (u32)(CALCIS(x, r) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING; + + idl.dg_dx = (u32)(CALCIS(g, y) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING; + idl.dg_dy = (u32)(CALCIS(x, g) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING; + + idl.db_dx = (u32)(CALCIS(b, y) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING; + idl.db_dy = (u32)(CALCIS(x, b) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING; + } + + if constexpr (texture_enable) + { + idl.du_dx = (u32)(CALCIS(u, y) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING; + idl.du_dy = (u32)(CALCIS(x, u) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING; + + idl.dv_dx = (u32)(CALCIS(v, y) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING; + idl.dv_dy = (u32)(CALCIS(x, v) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING; + } + + return true; + +#undef CALCIS +} + +template<bool shading_enable, bool texture_enable> +void ALWAYS_INLINE_RELEASE GPU_SW_Backend::AddIDeltas_DX(i_group& ig, const i_deltas& idl, u32 count /*= 1*/) +{ + if constexpr (shading_enable) + { + ig.r += idl.dr_dx * count; + ig.g += idl.dg_dx * count; + ig.b += idl.db_dx * count; + } + + if constexpr (texture_enable) + { + ig.u += idl.du_dx * count; + ig.v += idl.dv_dx * count; + } +} + +template<bool shading_enable, bool texture_enable> +void ALWAYS_INLINE_RELEASE GPU_SW_Backend::AddIDeltas_DY(i_group& ig, const i_deltas& idl, u32 count /*= 1*/) +{ + if constexpr (shading_enable) + { + ig.r += idl.dr_dy * count; + ig.g += idl.dg_dy * count; + ig.b += idl.db_dy * count; + } + + if constexpr (texture_enable) + { + ig.u += idl.du_dy * count; + ig.v += idl.dv_dy * count; + } +} + +template<bool shading_enable, bool texture_enable, bool raw_texture_enable, bool transparency_enable, + bool dithering_enable> +void GPU_SW_Backend::DrawSpan(const GPUBackendDrawPolygonCommand* cmd, s32 y, s32 x_start, s32 x_bound, i_group ig, + const i_deltas& idl) +{ + if (cmd->params.interlaced_rendering && cmd->params.active_line_lsb == (Truncate8(static_cast<u32>(y)) & 1u)) + return; + + s32 x_ig_adjust = x_start; + s32 w = x_bound - x_start; + s32 x = TruncateGPUVertexPosition(x_start); + + if (x < static_cast<s32>(m_drawing_area.left)) + { + s32 delta = static_cast<s32>(m_drawing_area.left) - x; + x_ig_adjust += delta; + x += delta; + w -= delta; + } + + if ((x + w) > (static_cast<s32>(m_drawing_area.right) + 1)) + w = static_cast<s32>(m_drawing_area.right) + 1 - x; + + if (w <= 0) + return; + + AddIDeltas_DX<shading_enable, texture_enable>(ig, idl, x_ig_adjust); + AddIDeltas_DY<shading_enable, texture_enable>(ig, idl, y); + + do + { + const u32 r = ig.r >> (COORD_FBS + COORD_POST_PADDING); + const u32 g = ig.g >> (COORD_FBS + COORD_POST_PADDING); + const u32 b = ig.b >> (COORD_FBS + COORD_POST_PADDING); + const u32 u = ig.u >> (COORD_FBS + COORD_POST_PADDING); + const u32 v = ig.v >> (COORD_FBS + COORD_POST_PADDING); + + ShadePixel<texture_enable, raw_texture_enable, transparency_enable, dithering_enable>( + cmd, static_cast<u32>(x), static_cast<u32>(y), Truncate8(r), Truncate8(g), Truncate8(b), Truncate8(u), + Truncate8(v)); + + x++; + AddIDeltas_DX<shading_enable, texture_enable>(ig, idl); + } while (--w > 0); +} + +template<bool shading_enable, bool texture_enable, bool raw_texture_enable, bool transparency_enable, + bool dithering_enable> +void GPU_SW_Backend::DrawTriangle(const GPUBackendDrawPolygonCommand* cmd, + const GPUBackendDrawPolygonCommand::Vertex* v0, + const GPUBackendDrawPolygonCommand::Vertex* v1, + const GPUBackendDrawPolygonCommand::Vertex* v2) +{ + u32 core_vertex; + { + u32 cvtemp = 0; + + if (v1->x <= v0->x) + { + if (v2->x <= v1->x) + cvtemp = (1 << 2); + else + cvtemp = (1 << 1); + } + else if (v2->x < v0->x) + cvtemp = (1 << 2); + else + cvtemp = (1 << 0); + + if (v2->y < v1->y) + { + std::swap(v2, v1); + cvtemp = ((cvtemp >> 1) & 0x2) | ((cvtemp << 1) & 0x4) | (cvtemp & 0x1); + } + + if (v1->y < v0->y) + { + std::swap(v1, v0); + cvtemp = ((cvtemp >> 1) & 0x1) | ((cvtemp << 1) & 0x2) | (cvtemp & 0x4); + } + + if (v2->y < v1->y) + { + std::swap(v2, v1); + cvtemp = ((cvtemp >> 1) & 0x2) | ((cvtemp << 1) & 0x4) | (cvtemp & 0x1); + } + + core_vertex = cvtemp >> 1; + } + + if (v0->y == v2->y) + return; + + if (static_cast<u32>(std::abs(v2->x - v0->x)) >= MAX_PRIMITIVE_WIDTH || + static_cast<u32>(std::abs(v2->x - v1->x)) >= MAX_PRIMITIVE_WIDTH || + static_cast<u32>(std::abs(v1->x - v0->x)) >= MAX_PRIMITIVE_WIDTH || + static_cast<u32>(v2->y - v0->y) >= MAX_PRIMITIVE_HEIGHT) + { + return; + } + + s64 base_coord = MakePolyXFP(v0->x); + s64 base_step = MakePolyXFPStep((v2->x - v0->x), (v2->y - v0->y)); + s64 bound_coord_us; + s64 bound_coord_ls; + bool right_facing; + + if (v1->y == v0->y) + { + bound_coord_us = 0; + right_facing = (bool)(v1->x > v0->x); + } + else + { + bound_coord_us = MakePolyXFPStep((v1->x - v0->x), (v1->y - v0->y)); + right_facing = (bool)(bound_coord_us > base_step); + } + + if (v2->y == v1->y) + bound_coord_ls = 0; + else + bound_coord_ls = MakePolyXFPStep((v2->x - v1->x), (v2->y - v1->y)); + + i_deltas idl; + if (!CalcIDeltas<shading_enable, texture_enable>(idl, v0, v1, v2)) + return; + + const GPUBackendDrawPolygonCommand::Vertex* vertices[3] = {v0, v1, v2}; + + i_group ig; + if constexpr (texture_enable) + { + ig.u = (COORD_MF_INT(vertices[core_vertex]->u) + (1 << (COORD_FBS - 1))) << COORD_POST_PADDING; + ig.v = (COORD_MF_INT(vertices[core_vertex]->v) + (1 << (COORD_FBS - 1))) << COORD_POST_PADDING; + } + + ig.r = (COORD_MF_INT(vertices[core_vertex]->r) + (1 << (COORD_FBS - 1))) << COORD_POST_PADDING; + ig.g = (COORD_MF_INT(vertices[core_vertex]->g) + (1 << (COORD_FBS - 1))) << COORD_POST_PADDING; + ig.b = (COORD_MF_INT(vertices[core_vertex]->b) + (1 << (COORD_FBS - 1))) << COORD_POST_PADDING; + + AddIDeltas_DX<shading_enable, texture_enable>(ig, idl, -vertices[core_vertex]->x); + AddIDeltas_DY<shading_enable, texture_enable>(ig, idl, -vertices[core_vertex]->y); + + struct TriangleHalf + { + u64 x_coord[2]; + u64 x_step[2]; + + s32 y_coord; + s32 y_bound; + + bool dec_mode; + } tripart[2]; + + u32 vo = 0; + u32 vp = 0; + if (core_vertex != 0) + vo = 1; + if (core_vertex == 2) + vp = 3; + + { + TriangleHalf* tp = &tripart[vo]; + tp->y_coord = vertices[0 ^ vo]->y; + tp->y_bound = vertices[1 ^ vo]->y; + tp->x_coord[right_facing] = MakePolyXFP(vertices[0 ^ vo]->x); + tp->x_step[right_facing] = bound_coord_us; + tp->x_coord[!right_facing] = base_coord + ((vertices[vo]->y - vertices[0]->y) * base_step); + tp->x_step[!right_facing] = base_step; + tp->dec_mode = vo; + } + + { + TriangleHalf* tp = &tripart[vo ^ 1]; + tp->y_coord = vertices[1 ^ vp]->y; + tp->y_bound = vertices[2 ^ vp]->y; + tp->x_coord[right_facing] = MakePolyXFP(vertices[1 ^ vp]->x); + tp->x_step[right_facing] = bound_coord_ls; + tp->x_coord[!right_facing] = + base_coord + ((vertices[1 ^ vp]->y - vertices[0]->y) * + base_step); // base_coord + ((vertices[1].y - vertices[0].y) * base_step); + tp->x_step[!right_facing] = base_step; + tp->dec_mode = vp; + } + + for (u32 i = 0; i < 2; i++) + { + s32 yi = tripart[i].y_coord; + s32 yb = tripart[i].y_bound; + + u64 lc = tripart[i].x_coord[0]; + u64 ls = tripart[i].x_step[0]; + + u64 rc = tripart[i].x_coord[1]; + u64 rs = tripart[i].x_step[1]; + + if (tripart[i].dec_mode) + { + while (yi > yb) + { + yi--; + lc -= ls; + rc -= rs; + + s32 y = TruncateGPUVertexPosition(yi); + + if (y < static_cast<s32>(m_drawing_area.top)) + break; + + if (y > static_cast<s32>(m_drawing_area.bottom)) + continue; + + DrawSpan<shading_enable, texture_enable, raw_texture_enable, transparency_enable, dithering_enable>( + cmd, yi, GetPolyXFP_Int(lc), GetPolyXFP_Int(rc), ig, idl); + } + } + else + { + while (yi < yb) + { + s32 y = TruncateGPUVertexPosition(yi); + + if (y > static_cast<s32>(m_drawing_area.bottom)) + break; + + if (y >= static_cast<s32>(m_drawing_area.top)) + { + + DrawSpan<shading_enable, texture_enable, raw_texture_enable, transparency_enable, dithering_enable>( + cmd, yi, GetPolyXFP_Int(lc), GetPolyXFP_Int(rc), ig, idl); + } + + yi++; + lc += ls; + rc += rs; + } + } + } +} + +GPU_SW_Backend::DrawTriangleFunction GPU_SW_Backend::GetDrawTriangleFunction(bool shading_enable, bool texture_enable, + bool raw_texture_enable, + bool transparency_enable, + bool dithering_enable) +{ +#define F(SHADING, TEXTURE, RAW_TEXTURE, TRANSPARENCY, DITHERING) \ + &GPU_SW_Backend::DrawTriangle<SHADING, TEXTURE, RAW_TEXTURE, TRANSPARENCY, DITHERING> + + static constexpr DrawTriangleFunction funcs[2][2][2][2][2] = { + {{{{F(false, false, false, false, false), F(false, false, false, false, true)}, + {F(false, false, false, true, false), F(false, false, false, true, true)}}, + {{F(false, false, true, false, false), F(false, false, true, false, true)}, + {F(false, false, true, true, false), F(false, false, true, true, true)}}}, + {{{F(false, true, false, false, false), F(false, true, false, false, true)}, + {F(false, true, false, true, false), F(false, true, false, true, true)}}, + {{F(false, true, true, false, false), F(false, true, true, false, true)}, + {F(false, true, true, true, false), F(false, true, true, true, true)}}}}, + {{{{F(true, false, false, false, false), F(true, false, false, false, true)}, + {F(true, false, false, true, false), F(true, false, false, true, true)}}, + {{F(true, false, true, false, false), F(true, false, true, false, true)}, + {F(true, false, true, true, false), F(true, false, true, true, true)}}}, + {{{F(true, true, false, false, false), F(true, true, false, false, true)}, + {F(true, true, false, true, false), F(true, true, false, true, true)}}, + {{F(true, true, true, false, false), F(true, true, true, false, true)}, + {F(true, true, true, true, false), F(true, true, true, true, true)}}}}}; + +#undef F + + return funcs[u8(shading_enable)][u8(texture_enable)][u8(raw_texture_enable)][u8(transparency_enable)] + [u8(dithering_enable)]; +} + +enum +{ + Line_XY_FractBits = 32 +}; +enum +{ + Line_RGB_FractBits = 12 +}; + +struct line_fxp_coord +{ + u64 x, y; + u32 r, g, b; +}; + +struct line_fxp_step +{ + s64 dx_dk, dy_dk; + s32 dr_dk, dg_dk, db_dk; +}; + +static ALWAYS_INLINE_RELEASE s64 LineDivide(s64 delta, s32 dk) +{ + delta = (u64)delta << Line_XY_FractBits; + + if (delta < 0) + delta -= dk - 1; + if (delta > 0) + delta += dk - 1; + + return (delta / dk); +} + +template<bool shading_enable, bool transparency_enable, bool dithering_enable> +void GPU_SW_Backend::DrawLine(const GPUBackendDrawLineCommand* cmd, const GPUBackendDrawLineCommand::Vertex* p0, + const GPUBackendDrawLineCommand::Vertex* p1) +{ + const s32 i_dx = std::abs(p1->x - p0->x); + const s32 i_dy = std::abs(p1->y - p0->y); + const s32 k = (i_dx > i_dy) ? i_dx : i_dy; + if (i_dx >= MAX_PRIMITIVE_WIDTH || i_dy >= MAX_PRIMITIVE_HEIGHT) + return; + + if (p0->x >= p1->x && k > 0) + std::swap(p0, p1); + + line_fxp_step step; + if (k == 0) + { + step.dx_dk = 0; + step.dy_dk = 0; + + if constexpr (shading_enable) + { + step.dr_dk = 0; + step.dg_dk = 0; + step.db_dk = 0; + } + } + else + { + step.dx_dk = LineDivide(p1->x - p0->x, k); + step.dy_dk = LineDivide(p1->y - p0->y, k); + + if constexpr (shading_enable) + { + step.dr_dk = (s32)((u32)(p1->r - p0->r) << Line_RGB_FractBits) / k; + step.dg_dk = (s32)((u32)(p1->g - p0->g) << Line_RGB_FractBits) / k; + step.db_dk = (s32)((u32)(p1->b - p0->b) << Line_RGB_FractBits) / k; + } + } + + line_fxp_coord cur_point; + cur_point.x = ((u64)p0->x << Line_XY_FractBits) | (1ULL << (Line_XY_FractBits - 1)); + cur_point.y = ((u64)p0->y << Line_XY_FractBits) | (1ULL << (Line_XY_FractBits - 1)); + + cur_point.x -= 1024; + + if (step.dy_dk < 0) + cur_point.y -= 1024; + + if constexpr (shading_enable) + { + cur_point.r = (p0->r << Line_RGB_FractBits) | (1 << (Line_RGB_FractBits - 1)); + cur_point.g = (p0->g << Line_RGB_FractBits) | (1 << (Line_RGB_FractBits - 1)); + cur_point.b = (p0->b << Line_RGB_FractBits) | (1 << (Line_RGB_FractBits - 1)); + } + + for (s32 i = 0; i <= k; i++) + { + // Sign extension is not necessary here for x and y, due to the maximum values that ClipX1 and ClipY1 can contain. + const s32 x = (cur_point.x >> Line_XY_FractBits) & 2047; + const s32 y = (cur_point.y >> Line_XY_FractBits) & 2047; + + if ((!cmd->params.interlaced_rendering || cmd->params.active_line_lsb != (Truncate8(static_cast<u32>(y)) & 1u)) && + x >= static_cast<s32>(m_drawing_area.left) && x <= static_cast<s32>(m_drawing_area.right) && + y >= static_cast<s32>(m_drawing_area.top) && y <= static_cast<s32>(m_drawing_area.bottom)) + { + const u8 r = shading_enable ? static_cast<u8>(cur_point.r >> Line_RGB_FractBits) : p0->r; + const u8 g = shading_enable ? static_cast<u8>(cur_point.g >> Line_RGB_FractBits) : p0->g; + const u8 b = shading_enable ? static_cast<u8>(cur_point.b >> Line_RGB_FractBits) : p0->b; + + ShadePixel<false, false, transparency_enable, dithering_enable>(cmd, static_cast<u32>(x), static_cast<u32>(y), r, + g, b, 0, 0); + } + + cur_point.x += step.dx_dk; + cur_point.y += step.dy_dk; + + if constexpr (shading_enable) + { + cur_point.r += step.dr_dk; + cur_point.g += step.dg_dk; + cur_point.b += step.db_dk; + } + } +} + +GPU_SW_Backend::DrawLineFunction GPU_SW_Backend::GetDrawLineFunction(bool shading_enable, bool transparency_enable, + bool dithering_enable) +{ +#define F(SHADING, TRANSPARENCY, DITHERING) &GPU_SW_Backend::DrawLine<SHADING, TRANSPARENCY, DITHERING> + + static constexpr DrawLineFunction funcs[2][2][2] = { + {{F(false, false, false), F(false, false, true)}, {F(false, true, false), F(false, true, true)}}, + {{F(true, false, false), F(true, false, true)}, {F(true, true, false), F(true, true, true)}}}; + +#undef F + + return funcs[u8(shading_enable)][u8(transparency_enable)][u8(dithering_enable)]; +} + +GPU_SW_Backend::DrawRectangleFunction +GPU_SW_Backend::GetDrawRectangleFunction(bool texture_enable, bool raw_texture_enable, bool transparency_enable) +{ +#define F(TEXTURE, RAW_TEXTURE, TRANSPARENCY) &GPU_SW_Backend::DrawRectangle<TEXTURE, RAW_TEXTURE, TRANSPARENCY> + + static constexpr DrawRectangleFunction funcs[2][2][2] = { + {{F(false, false, false), F(false, false, true)}, {F(false, true, false), F(false, true, true)}}, + {{F(true, false, false), F(true, false, true)}, {F(true, true, false), F(true, true, true)}}}; + +#undef F + + return funcs[u8(texture_enable)][u8(raw_texture_enable)][u8(transparency_enable)]; +} + +void GPU_SW_Backend::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) +{ + const u16 color16 = RGBA8888ToRGBA5551(color); + if ((x + width) <= VRAM_WIDTH && !params.interlaced_rendering) + { + for (u32 yoffs = 0; yoffs < height; yoffs++) + { + const u32 row = (y + yoffs) % VRAM_HEIGHT; + std::fill_n(&m_vram_ptr[row * VRAM_WIDTH + x], width, color16); + } + } + else if (params.interlaced_rendering) + { + // Hardware tests show that fills seem to break on the first two lines when the offset matches the displayed field. + const u32 active_field = params.active_line_lsb; + for (u32 yoffs = 0; yoffs < height; yoffs++) + { + const u32 row = (y + yoffs) % VRAM_HEIGHT; + if ((row & u32(1)) == active_field) + continue; + + u16* row_ptr = &m_vram_ptr[row * VRAM_WIDTH]; + for (u32 xoffs = 0; xoffs < width; xoffs++) + { + const u32 col = (x + xoffs) % VRAM_WIDTH; + row_ptr[col] = color16; + } + } + } + else + { + for (u32 yoffs = 0; yoffs < height; yoffs++) + { + const u32 row = (y + yoffs) % VRAM_HEIGHT; + u16* row_ptr = &m_vram_ptr[row * VRAM_WIDTH]; + for (u32 xoffs = 0; xoffs < width; xoffs++) + { + const u32 col = (x + xoffs) % VRAM_WIDTH; + row_ptr[col] = color16; + } + } + } +} + +void GPU_SW_Backend::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, + GPUBackendCommandParameters params) +{ + // Fast path when the copy is not oversized. + if ((x + width) <= VRAM_WIDTH && (y + height) <= VRAM_HEIGHT && !params.IsMaskingEnabled()) + { + const u16* src_ptr = static_cast<const u16*>(data); + u16* dst_ptr = &m_vram_ptr[y * VRAM_WIDTH + x]; + for (u32 yoffs = 0; yoffs < height; yoffs++) + { + std::copy_n(src_ptr, width, dst_ptr); + src_ptr += width; + dst_ptr += VRAM_WIDTH; + } + } + else + { + // Slow path when we need to handle wrap-around. + const u16* src_ptr = static_cast<const u16*>(data); + const u16 mask_and = params.GetMaskAND(); + const u16 mask_or = params.GetMaskOR(); + + for (u32 row = 0; row < height;) + { + u16* dst_row_ptr = &m_vram_ptr[((y + row++) % VRAM_HEIGHT) * VRAM_WIDTH]; + for (u32 col = 0; col < width;) + { + // TODO: Handle unaligned reads... + u16* pixel_ptr = &dst_row_ptr[(x + col++) % VRAM_WIDTH]; + if (((*pixel_ptr) & mask_and) == 0) + *pixel_ptr = *(src_ptr++) | mask_or; + } + } + } +} + +void GPU_SW_Backend::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, + GPUBackendCommandParameters params) +{ + // Break up oversized copies. This behavior has not been verified on console. + if ((src_x + width) > VRAM_WIDTH || (dst_x + width) > VRAM_WIDTH) + { + u32 remaining_rows = height; + u32 current_src_y = src_y; + u32 current_dst_y = dst_y; + while (remaining_rows > 0) + { + const u32 rows_to_copy = + std::min<u32>(remaining_rows, std::min<u32>(VRAM_HEIGHT - current_src_y, VRAM_HEIGHT - current_dst_y)); + + u32 remaining_columns = width; + u32 current_src_x = src_x; + u32 current_dst_x = dst_x; + while (remaining_columns > 0) + { + const u32 columns_to_copy = + std::min<u32>(remaining_columns, std::min<u32>(VRAM_WIDTH - current_src_x, VRAM_WIDTH - current_dst_x)); + CopyVRAM(current_src_x, current_src_y, current_dst_x, current_dst_y, columns_to_copy, rows_to_copy, params); + current_src_x = (current_src_x + columns_to_copy) % VRAM_WIDTH; + current_dst_x = (current_dst_x + columns_to_copy) % VRAM_WIDTH; + remaining_columns -= columns_to_copy; + } + + current_src_y = (current_src_y + rows_to_copy) % VRAM_HEIGHT; + current_dst_y = (current_dst_y + rows_to_copy) % VRAM_HEIGHT; + remaining_rows -= rows_to_copy; + } + + return; + } + + // This doesn't have a fast path, but do we really need one? It's not common. + const u16 mask_and = params.GetMaskAND(); + const u16 mask_or = params.GetMaskOR(); + + // Copy in reverse when src_x < dst_x, this is verified on console. + if (src_x < dst_x || ((src_x + width - 1) % VRAM_WIDTH) < ((dst_x + width - 1) % VRAM_WIDTH)) + { + for (u32 row = 0; row < height; row++) + { + const u16* src_row_ptr = &m_vram_ptr[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; + u16* dst_row_ptr = &m_vram_ptr[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; + + for (s32 col = static_cast<s32>(width - 1); col >= 0; col--) + { + const u16 src_pixel = src_row_ptr[(src_x + static_cast<u32>(col)) % VRAM_WIDTH]; + u16* dst_pixel_ptr = &dst_row_ptr[(dst_x + static_cast<u32>(col)) % VRAM_WIDTH]; + if ((*dst_pixel_ptr & mask_and) == 0) + *dst_pixel_ptr = src_pixel | mask_or; + } + } + } + else + { + for (u32 row = 0; row < height; row++) + { + const u16* src_row_ptr = &m_vram_ptr[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; + u16* dst_row_ptr = &m_vram_ptr[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; + + for (u32 col = 0; col < width; col++) + { + const u16 src_pixel = src_row_ptr[(src_x + col) % VRAM_WIDTH]; + u16* dst_pixel_ptr = &dst_row_ptr[(dst_x + col) % VRAM_WIDTH]; + if ((*dst_pixel_ptr & mask_and) == 0) + *dst_pixel_ptr = src_pixel | mask_or; + } + } + } +} + +void GPU_SW_Backend::FlushRender() {} + +void GPU_SW_Backend::DrawingAreaChanged() {} diff --git a/src/core/gpu_sw_backend.h b/src/core/gpu_sw_backend.h new file mode 100644 index 000000000..a7105ecc2 --- /dev/null +++ b/src/core/gpu_sw_backend.h @@ -0,0 +1,174 @@ +#pragma once +#include "gpu_backend.h" +#include <array> +#include <memory> +#include <vector> + +class GPU_SW_Backend final : public GPUBackend +{ +public: + GPU_SW_Backend(); + ~GPU_SW_Backend() override; + + bool Initialize() override; + void Reset() override; + + ALWAYS_INLINE_RELEASE u16 GetPixel(const u32 x, const u32 y) const { return m_vram[VRAM_WIDTH * y + x]; } + ALWAYS_INLINE_RELEASE const u16* GetPixelPtr(const u32 x, const u32 y) const { return &m_vram[VRAM_WIDTH * y + x]; } + ALWAYS_INLINE_RELEASE u16* GetPixelPtr(const u32 x, const u32 y) { return &m_vram[VRAM_WIDTH * y + x]; } + ALWAYS_INLINE_RELEASE void SetPixel(const u32 x, const u32 y, const u16 value) { m_vram[VRAM_WIDTH * y + x] = value; } + + // this is actually (31 * 255) >> 4) == 494, but to simplify addressing we use the next power of two (512) + static constexpr u32 DITHER_LUT_SIZE = 512; + using DitherLUT = std::array<std::array<std::array<u8, 512>, DITHER_MATRIX_SIZE>, DITHER_MATRIX_SIZE>; + static constexpr DitherLUT ComputeDitherLUT(); + +protected: + static constexpr u8 Convert5To8(u8 x5) { return (x5 << 3) | (x5 & 7); } + static constexpr u8 Convert8To5(u8 x8) { return (x8 >> 3); } + + union VRAMPixel + { + u16 bits; + + BitField<u16, u8, 0, 5> r; + BitField<u16, u8, 5, 5> g; + BitField<u16, u8, 10, 5> b; + BitField<u16, bool, 15, 1> c; + + u8 GetR8() const { return Convert5To8(r); } + u8 GetG8() const { return Convert5To8(g); } + u8 GetB8() const { return Convert5To8(b); } + + void Set(u8 r_, u8 g_, u8 b_, bool c_ = false) + { + bits = (ZeroExtend16(r_)) | (ZeroExtend16(g_) << 5) | (ZeroExtend16(b_) << 10) | (static_cast<u16>(c_) << 15); + } + + void ClampAndSet(u8 r_, u8 g_, u8 b_, bool c_ = false) + { + Set(std::min<u8>(r_, 0x1F), std::min<u8>(g_, 0x1F), std::min<u8>(b_, 0x1F), c_); + } + + void SetRGB24(u32 rgb24, bool c_ = false) + { + bits = Truncate16(((rgb24 >> 3) & 0x1F) | (((rgb24 >> 11) & 0x1F) << 5) | (((rgb24 >> 19) & 0x1F) << 10)) | + (static_cast<u16>(c_) << 15); + } + + void SetRGB24(u8 r8, u8 g8, u8 b8, bool c_ = false) + { + bits = (ZeroExtend16(r8 >> 3)) | (ZeroExtend16(g8 >> 3) << 5) | (ZeroExtend16(b8 >> 3) << 10) | + (static_cast<u16>(c_) << 15); + } + + void SetRGB24Dithered(u32 x, u32 y, u8 r8, u8 g8, u8 b8, bool c_ = false) + { + const s32 offset = DITHER_MATRIX[y & 3][x & 3]; + r8 = static_cast<u8>(std::clamp<s32>(static_cast<s32>(ZeroExtend32(r8)) + offset, 0, 255)); + g8 = static_cast<u8>(std::clamp<s32>(static_cast<s32>(ZeroExtend32(g8)) + offset, 0, 255)); + b8 = static_cast<u8>(std::clamp<s32>(static_cast<s32>(ZeroExtend32(b8)) + offset, 0, 255)); + SetRGB24(r8, g8, b8, c_); + } + + u32 ToRGB24() const + { + const u32 r_ = ZeroExtend32(r.GetValue()); + const u32 g_ = ZeroExtend32(g.GetValue()); + const u32 b_ = ZeroExtend32(b.GetValue()); + + return ((r_ << 3) | (r_ & 7)) | (((g_ << 3) | (g_ & 7)) << 8) | (((b_ << 3) | (b_ & 7)) << 16); + } + }; + + static constexpr std::tuple<u8, u8> UnpackTexcoord(u16 texcoord) + { + return std::make_tuple(static_cast<u8>(texcoord), static_cast<u8>(texcoord >> 8)); + } + + static constexpr std::tuple<u8, u8, u8> UnpackColorRGB24(u32 rgb24) + { + return std::make_tuple(static_cast<u8>(rgb24), static_cast<u8>(rgb24 >> 8), static_cast<u8>(rgb24 >> 16)); + } + + void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) override; + void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, GPUBackendCommandParameters params) override; + void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, + GPUBackendCommandParameters params) override; + + void DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) override; + void DrawLine(const GPUBackendDrawLineCommand* cmd) override; + void DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) override; + void FlushRender() override; + void DrawingAreaChanged() override; + + ////////////////////////////////////////////////////////////////////////// + // Rasterization + ////////////////////////////////////////////////////////////////////////// + template<bool texture_enable, bool raw_texture_enable, bool transparency_enable, bool dithering_enable> + void ShadePixel(const GPUBackendDrawCommand* cmd, u32 x, u32 y, u8 color_r, u8 color_g, u8 color_b, u8 texcoord_x, + u8 texcoord_y); + + template<bool texture_enable, bool raw_texture_enable, bool transparency_enable> + void DrawRectangle(const GPUBackendDrawRectangleCommand* cmd); + + using DrawRectangleFunction = void (GPU_SW_Backend::*)(const GPUBackendDrawRectangleCommand* cmd); + DrawRectangleFunction GetDrawRectangleFunction(bool texture_enable, bool raw_texture_enable, + bool transparency_enable); + + ////////////////////////////////////////////////////////////////////////// + // Polygon and line rasterization ported from Mednafen + ////////////////////////////////////////////////////////////////////////// + struct i_deltas + { + u32 du_dx, dv_dx; + u32 dr_dx, dg_dx, db_dx; + + u32 du_dy, dv_dy; + u32 dr_dy, dg_dy, db_dy; + }; + + struct i_group + { + u32 u, v; + u32 r, g, b; + }; + + template<bool shading_enable, bool texture_enable> + bool CalcIDeltas(i_deltas& idl, const GPUBackendDrawPolygonCommand::Vertex* A, + const GPUBackendDrawPolygonCommand::Vertex* B, const GPUBackendDrawPolygonCommand::Vertex* C); + + template<bool shading_enable, bool texture_enable> + void AddIDeltas_DX(i_group& ig, const i_deltas& idl, u32 count = 1); + + template<bool shading_enable, bool texture_enable> + void AddIDeltas_DY(i_group& ig, const i_deltas& idl, u32 count = 1); + + template<bool shading_enable, bool texture_enable, bool raw_texture_enable, bool transparency_enable, + bool dithering_enable> + void DrawSpan(const GPUBackendDrawPolygonCommand* cmd, s32 y, s32 x_start, s32 x_bound, i_group ig, + const i_deltas& idl); + + template<bool shading_enable, bool texture_enable, bool raw_texture_enable, bool transparency_enable, + bool dithering_enable> + void DrawTriangle(const GPUBackendDrawPolygonCommand* cmd, const GPUBackendDrawPolygonCommand::Vertex* v0, + const GPUBackendDrawPolygonCommand::Vertex* v1, const GPUBackendDrawPolygonCommand::Vertex* v2); + + using DrawTriangleFunction = void (GPU_SW_Backend::*)(const GPUBackendDrawPolygonCommand* cmd, + const GPUBackendDrawPolygonCommand::Vertex* v0, + const GPUBackendDrawPolygonCommand::Vertex* v1, + const GPUBackendDrawPolygonCommand::Vertex* v2); + DrawTriangleFunction GetDrawTriangleFunction(bool shading_enable, bool texture_enable, bool raw_texture_enable, + bool transparency_enable, bool dithering_enable); + + template<bool shading_enable, bool transparency_enable, bool dithering_enable> + void DrawLine(const GPUBackendDrawLineCommand* cmd, const GPUBackendDrawLineCommand::Vertex* p0, + const GPUBackendDrawLineCommand::Vertex* p1); + + using DrawLineFunction = void (GPU_SW_Backend::*)(const GPUBackendDrawLineCommand* cmd, + const GPUBackendDrawLineCommand::Vertex* p0, + const GPUBackendDrawLineCommand::Vertex* p1); + DrawLineFunction GetDrawLineFunction(bool shading_enable, bool transparency_enable, bool dithering_enable); + + std::array<u16, VRAM_WIDTH * VRAM_HEIGHT> m_vram; +}; diff --git a/src/core/gpu_types.h b/src/core/gpu_types.h index 1df6cbc8b..4fd0472e7 100644 --- a/src/core/gpu_types.h +++ b/src/core/gpu_types.h @@ -222,3 +222,177 @@ static constexpr s32 DITHER_MATRIX[DITHER_MATRIX_SIZE][DITHER_MATRIX_SIZE] = { { {+2, -2, +3, -1}, // row 1 {-3, +1, -4, +0}, // row 2 {+4, -1, +2, -2} }; // row 3 + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable:4200) // warning C4200: nonstandard extension used: zero-sized array in struct/union +#endif + +enum class GPUBackendCommandType : u8 +{ + Wraparound, + Sync, + FillVRAM, + UpdateVRAM, + CopyVRAM, + SetDrawingArea, + DrawPolygon, + DrawRectangle, + DrawLine +}; + +union GPUBackendCommandParameters +{ + u8 bits; + + BitField<u8, bool, 0, 1> interlaced_rendering; + + /// Returns 0 if the currently-displayed field is on an even line in VRAM, otherwise 1. + BitField<u8, u8, 1, 1> active_line_lsb; + + BitField<u8, bool, 2, 1> set_mask_while_drawing; + BitField<u8, bool, 3, 1> check_mask_before_draw; + + ALWAYS_INLINE bool IsMaskingEnabled() const { return (bits & 12u) != 0u; } + + // During transfer/render operations, if ((dst_pixel & mask_and) == 0) { pixel = src_pixel | mask_or } + u16 GetMaskAND() const + { + // return check_mask_before_draw ? 0x8000 : 0x0000; + return Truncate16((bits << 12) & 0x8000); + } + u16 GetMaskOR() const + { + // return set_mask_while_drawing ? 0x8000 : 0x0000; + return Truncate16((bits << 13) & 0x8000); + } +}; + +struct GPUBackendCommand +{ + GPUBackendCommandType type; + GPUBackendCommandParameters params; + u32 size; +}; + +struct GPUBackendSyncCommand : public GPUBackendCommand +{ + ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendSyncCommand); } +}; + +struct GPUBackendFillVRAMCommand : public GPUBackendCommand +{ + u16 x; + u16 y; + u16 width; + u16 height; + u32 color; + + ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendFillVRAMCommand); } +}; + +struct GPUBackendUpdateVRAMCommand : public GPUBackendCommand +{ + u16 x; + u16 y; + u16 width; + u16 height; + u16 data[0]; + + ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendUpdateVRAMCommand) + (sizeof(u16) * width * height); } +}; + +struct GPUBackendCopyVRAMCommand : public GPUBackendCommand +{ + u16 src_x; + u16 src_y; + u16 dst_x; + u16 dst_y; + u16 width; + u16 height; + + ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendCopyVRAMCommand); } +}; + +struct GPUBackendSetDrawingAreaCommand : public GPUBackendCommand +{ + Common::Rectangle<u32> new_area; + + ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendSetDrawingAreaCommand); } +}; + +struct GPUBackendDrawCommand : public GPUBackendCommand +{ + GPURenderCommand rc; + GPUDrawModeReg draw_mode; + GPUTexturePaletteReg palette; + GPUTextureWindow window; + + ALWAYS_INLINE bool IsDitheringEnabled() const { return rc.IsDitheringEnabled() && draw_mode.dither_enable; } +}; + +struct GPUBackendDrawPolygonCommand : public GPUBackendDrawCommand +{ + u16 num_vertices; + + struct Vertex + { + s32 x, y; + union + { + struct + { + u8 r, g, b, a; + }; + u32 color; + }; + union + { + struct + { + u8 u, v; + }; + u16 texcoord; + }; + }; + + Vertex vertices[0]; + + ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendDrawPolygonCommand) + sizeof(Vertex) * num_vertices; } +}; + +struct GPUBackendDrawRectangleCommand : public GPUBackendDrawCommand +{ + s32 x, y; + u16 width, height; + u16 texcoord; + u32 color; + + ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendDrawRectangleCommand); } +}; + +struct GPUBackendDrawLineCommand : public GPUBackendDrawCommand +{ + u16 num_vertices; + + struct Vertex + { + s32 x, y; + union + { + struct + { + u8 r, g, b, a; + }; + u32 color; + }; + }; + + Vertex vertices[0]; + + ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendDrawLineCommand) + sizeof(Vertex) * num_vertices; } +}; + +#ifdef _MSC_VER +#pragma warning(pop) +#endif diff --git a/src/core/host_interface.cpp b/src/core/host_interface.cpp index c49c84c69..b7d8d48a7 100644 --- a/src/core/host_interface.cpp +++ b/src/core/host_interface.cpp @@ -432,6 +432,7 @@ void HostInterface::SetDefaultSettings(SettingsInterface& si) si.SetIntValue("GPU", "Multisamples", 1); si.SetBoolValue("GPU", "UseDebugDevice", false); si.SetBoolValue("GPU", "PerSampleShading", false); + si.SetBoolValue("GPU", "UseThread", true); si.SetBoolValue("GPU", "TrueColor", false); si.SetBoolValue("GPU", "ScaledDithering", true); si.SetStringValue("GPU", "TextureFilter", Settings::GetTextureFilterName(Settings::DEFAULT_GPU_TEXTURE_FILTER)); @@ -629,6 +630,7 @@ void HostInterface::CheckForSettingsChanges(const Settings& old_settings) if (g_settings.gpu_resolution_scale != old_settings.gpu_resolution_scale || g_settings.gpu_multisamples != old_settings.gpu_multisamples || g_settings.gpu_per_sample_shading != old_settings.gpu_per_sample_shading || + g_settings.gpu_use_thread != old_settings.gpu_use_thread || g_settings.gpu_fifo_size != old_settings.gpu_fifo_size || g_settings.gpu_max_run_ahead != old_settings.gpu_max_run_ahead || g_settings.gpu_true_color != old_settings.gpu_true_color || diff --git a/src/core/settings.cpp b/src/core/settings.cpp index c2bb9fe8b..15bdbcb48 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp @@ -139,6 +139,7 @@ void Settings::Load(SettingsInterface& si) gpu_multisamples = static_cast<u32>(si.GetIntValue("GPU", "Multisamples", 1)); gpu_use_debug_device = si.GetBoolValue("GPU", "UseDebugDevice", false); gpu_per_sample_shading = si.GetBoolValue("GPU", "PerSampleShading", false); + gpu_use_thread = si.GetBoolValue("GPU", "UseThread", true); gpu_true_color = si.GetBoolValue("GPU", "TrueColor", true); gpu_scaled_dithering = si.GetBoolValue("GPU", "ScaledDithering", false); gpu_texture_filter = @@ -273,6 +274,7 @@ void Settings::Save(SettingsInterface& si) const si.SetIntValue("GPU", "Multisamples", static_cast<long>(gpu_multisamples)); si.SetBoolValue("GPU", "UseDebugDevice", gpu_use_debug_device); si.SetBoolValue("GPU", "PerSampleShading", gpu_per_sample_shading); + si.SetBoolValue("GPU", "UseThread", gpu_use_thread); si.SetBoolValue("GPU", "TrueColor", gpu_true_color); si.SetBoolValue("GPU", "ScaledDithering", gpu_scaled_dithering); si.SetStringValue("GPU", "TextureFilter", GetTextureFilterName(gpu_texture_filter)); diff --git a/src/core/settings.h b/src/core/settings.h index 32f31fc6c..00214b055 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -94,6 +94,7 @@ struct Settings std::string display_post_process_chain; u32 gpu_resolution_scale = 1; u32 gpu_multisamples = 1; + bool gpu_use_thread = true; bool gpu_use_debug_device = false; bool gpu_per_sample_shading = false; bool gpu_true_color = true; diff --git a/src/duckstation-libretro/libretro_host_interface.cpp b/src/duckstation-libretro/libretro_host_interface.cpp index 7ccfd64b7..99d7ae72f 100644 --- a/src/duckstation-libretro/libretro_host_interface.cpp +++ b/src/duckstation-libretro/libretro_host_interface.cpp @@ -461,7 +461,7 @@ void LibretroHostInterface::OnSystemDestroyed() m_using_hardware_renderer = false; } -static std::array<retro_core_option_definition, 44> s_option_definitions = {{ +static std::array<retro_core_option_definition, 45> s_option_definitions = {{ {"duckstation_Console.Region", "Console Region", "Determines which region/hardware to emulate. Auto-Detect will use the region of the disc inserted.", @@ -542,6 +542,12 @@ static std::array<retro_core_option_definition, 44> s_option_definitions = {{ "OpenGL" #endif }, + {"duckstation_GPU.UseThread", + "Threaded Rendering (Software)", + "Uses a second thread for drawing graphics. Currently only available for the software renderer, but can provide a " + "significant speed improvement, and is safe to use.", + {{"true", "Enabled"}, {"false", "Disabled"}}, + "true"}, {"duckstation_GPU.ResolutionScale", "Internal Resolution Scale", "Scales internal VRAM resolution by the specified multiplier. Larger values are slower. Some games require " @@ -731,8 +737,7 @@ static std::array<retro_core_option_definition, 44> s_option_definitions = {{ "Controller 1 Analog Axis Scale", "Sets the analog stick axis scaling factor.", {{"1.00f", "1.00"}, {"1.40f", "1.40"}}, - "1.00f" - }, + "1.00f"}, {"duckstation_Controller2.Type", "Controller 2 Type", "Sets the type of controller for Slot 2.", @@ -753,12 +758,11 @@ static std::array<retro_core_option_definition, 44> s_option_definitions = {{ "Allows you to use the analog sticks to control the d-pad in digital mode, as well as the buttons.", {{"true", "Enabled"}, {"false", "Disabled"}}, "false"}, - {"duckstation_Controller2.AxisScale", + {"duckstation_Controller2.AxisScale", "Controller 2 Analog Axis Scale", "Sets the analog stick axis scaling factor.", {{"1.00f", "1.00"}, {"1.40f", "1.40"}}, - "1.00f" - }, + "1.00f"}, {"duckstation_Display.ShowOSDMessages", "Display OSD Messages", "Shows on-screen messages generated by the core.", diff --git a/src/duckstation-qt/displaysettingswidget.cpp b/src/duckstation-qt/displaysettingswidget.cpp index 92d9f563d..a92524475 100644 --- a/src/duckstation-qt/displaysettingswidget.cpp +++ b/src/duckstation-qt/displaysettingswidget.cpp @@ -33,6 +33,7 @@ DisplaySettingsWidget::DisplaySettingsWidget(QtHostInterface* host_interface, QW SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.displayIntegerScaling, "Display", "IntegerScaling"); SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.vsync, "Display", "VSync"); + SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.gpuThread, "GPU", "UseThread"); SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.showOSDMessages, "Display", "ShowOSDMessages", true); SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.showFPS, "Display", "ShowFPS", false); @@ -84,6 +85,9 @@ DisplaySettingsWidget::DisplaySettingsWidget(QtHostInterface* host_interface, QW m_ui.vsync, tr("VSync"), tr("Checked"), tr("Enable this option to match DuckStation's refresh rate with your current monitor or screen. " "VSync is automatically disabled when it is not possible (e.g. running at non-100% speed).")); + dialog->registerWidgetHelp(m_ui.gpuThread, tr("Threaded Rendering"), tr("Checked"), + tr("Uses a second thread for drawing graphics. Currently only available for the software " + "renderer, but can provide a significant speed improvement, and is safe to use.")); dialog->registerWidgetHelp(m_ui.showOSDMessages, tr("Show OSD Messages"), tr("Checked"), tr("Shows on-screen-display messages when events occur such as save states being " "created/loaded, screenshots being taken, etc.")); @@ -124,6 +128,7 @@ void DisplaySettingsWidget::populateGPUAdaptersAndResolutions() { std::vector<std::string> adapter_names; std::vector<std::string> fullscreen_modes; + bool thread_supported = false; switch (static_cast<GPURenderer>(m_ui.renderer->currentIndex())) { #ifdef WIN32 @@ -140,6 +145,10 @@ void DisplaySettingsWidget::populateGPUAdaptersAndResolutions() adapter_names = FrontendCommon::VulkanHostDisplay::EnumerateAdapterNames(); break; + case GPURenderer::Software: + thread_supported = true; + break; + default: break; } @@ -184,6 +193,8 @@ void DisplaySettingsWidget::populateGPUAdaptersAndResolutions() // disable it if we don't have a choice m_ui.fullscreenMode->setEnabled(!fullscreen_modes.empty()); } + + m_ui.gpuThread->setEnabled(thread_supported); } void DisplaySettingsWidget::onGPUAdapterIndexChanged() diff --git a/src/duckstation-qt/displaysettingswidget.ui b/src/duckstation-qt/displaysettingswidget.ui index 5ae78f14e..658e8a02b 100644 --- a/src/duckstation-qt/displaysettingswidget.ui +++ b/src/duckstation-qt/displaysettingswidget.ui @@ -62,7 +62,14 @@ <item row="2" column="1"> <widget class="QComboBox" name="fullscreenMode"/> </item> - <item row="3" column="0"> + <item row="3" column="0" colspan="2"> + <widget class="QCheckBox" name="gpuThread"> + <property name="text"> + <string>Threaded Rendering</string> + </property> + </widget> + </item> + <item row="4" column="0" colspan="2"> <widget class="QCheckBox" name="vsync"> <property name="text"> <string>VSync</string> diff --git a/src/duckstation-sdl/sdl_host_interface.cpp b/src/duckstation-sdl/sdl_host_interface.cpp index c9899586e..65b5abc0a 100644 --- a/src/duckstation-sdl/sdl_host_interface.cpp +++ b/src/duckstation-sdl/sdl_host_interface.cpp @@ -931,6 +931,8 @@ void SDLHostInterface::DrawQuickSettingsMenu() } } + settings_changed |= ImGui::MenuItem("GPU on Thread", nullptr, &m_settings_copy.gpu_use_thread); + ImGui::EndMenu(); }