From be81d08109d0ba3dafaae500d2fec08164a5407d Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Sun, 3 Nov 2019 13:36:54 +1000 Subject: [PATCH] GPU/HW: Split shadergen to seperate class --- src/core/CMakeLists.txt | 2 + src/core/core.vcxproj | 2 + src/core/core.vcxproj.filters | 2 + src/core/gpu.h | 117 ++++----- src/core/gpu_hw.cpp | 443 +--------------------------------- src/core/gpu_hw.h | 70 +++--- src/core/gpu_hw_opengl.cpp | 105 ++++---- src/core/gpu_hw_opengl.h | 3 +- src/core/gpu_hw_shadergen.cpp | 436 +++++++++++++++++++++++++++++++++ src/core/gpu_hw_shadergen.h | 34 +++ 10 files changed, 624 insertions(+), 590 deletions(-) create mode 100644 src/core/gpu_hw_shadergen.cpp create mode 100644 src/core/gpu_hw_shadergen.h diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 36cd5a524..2bdc8e9ee 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -21,6 +21,8 @@ add_library(core gpu_hw.h gpu_hw_opengl.cpp gpu_hw_opengl.h + gpu_hw_shadergen.cpp + gpu_hw_shadergen.h gpu_sw.cpp gpu_sw.h gte.cpp diff --git a/src/core/core.vcxproj b/src/core/core.vcxproj index c2bab1929..4497e24df 100644 --- a/src/core/core.vcxproj +++ b/src/core/core.vcxproj @@ -41,6 +41,7 @@ + @@ -64,6 +65,7 @@ + diff --git a/src/core/core.vcxproj.filters b/src/core/core.vcxproj.filters index 200a14710..70ddf1b56 100644 --- a/src/core/core.vcxproj.filters +++ b/src/core/core.vcxproj.filters @@ -23,6 +23,7 @@ + @@ -50,6 +51,7 @@ + diff --git a/src/core/gpu.h b/src/core/gpu.h index e17ac914d..99170c246 100644 --- a/src/core/gpu.h +++ b/src/core/gpu.h @@ -18,6 +18,57 @@ class Timers; class GPU { public: + enum class DMADirection : u32 + { + Off = 0, + FIFO = 1, + CPUtoGP0 = 2, + GPUREADtoCPU = 3 + }; + + enum class Primitive : u8 + { + Reserved = 0, + Polygon = 1, + Line = 2, + Rectangle = 3 + }; + + enum class DrawRectangleSize : u8 + { + Variable = 0, + R1x1 = 1, + R8x8 = 2, + R16x16 = 3 + }; + + enum class TextureMode : u8 + { + Palette4Bit = 0, + Palette8Bit = 1, + Direct16Bit = 2, + Reserved_Direct16Bit = 3, + + // Not register values. + RawTextureBit = 4, + RawPalette4Bit = RawTextureBit | Palette4Bit, + RawPalette8Bit = RawTextureBit | Palette8Bit, + RawDirect16Bit = RawTextureBit | Direct16Bit, + Reserved_RawDirect16Bit = RawTextureBit | Reserved_Direct16Bit, + + Disabled = 8 // Not a register value + }; + + enum class TransparencyMode : u8 + { + HalfBackgroundPlusHalfForeground = 0, + BackgroundPlusForeground = 1, + BackgroundMinusForeground = 2, + BackgroundPlusQuarterForeground = 3, + + Disabled = 4 // Not a register value + }; + enum : u32 { VRAM_WIDTH = 1024, @@ -29,6 +80,13 @@ public: HBLANK_TIMER_INDEX = 1 }; + // 4x4 dither matrix. + static constexpr s32 DITHER_MATRIX[4][4] = {{-4, +0, -3, +1}, // row 0 + {+2, -2, +3, -1}, // row 1 + {-3, +1, -4, +0}, // row 2 + {+4, -1, +2, -2}}; // row 3 + + // Base class constructor. GPU(); virtual ~GPU(); @@ -112,57 +170,6 @@ protected: static bool DumpVRAMToFile(const char* filename, u32 width, u32 height, u32 stride, const void* buffer, bool remove_alpha); - enum class DMADirection : u32 - { - Off = 0, - FIFO = 1, - CPUtoGP0 = 2, - GPUREADtoCPU = 3 - }; - - enum class Primitive : u8 - { - Reserved = 0, - Polygon = 1, - Line = 2, - Rectangle = 3 - }; - - enum class DrawRectangleSize : u8 - { - Variable = 0, - R1x1 = 1, - R8x8 = 2, - R16x16 = 3 - }; - - enum class TextureMode : u8 - { - Palette4Bit = 0, - Palette8Bit = 1, - Direct16Bit = 2, - Reserved_Direct16Bit = 3, - - // Not register values. - RawTextureBit = 4, - RawPalette4Bit = RawTextureBit | Palette4Bit, - RawPalette8Bit = RawTextureBit | Palette8Bit, - RawDirect16Bit = RawTextureBit | Direct16Bit, - Reserved_RawDirect16Bit = RawTextureBit | Reserved_Direct16Bit, - - Disabled = 8 // Not a register value - }; - - enum class TransparencyMode : u8 - { - HalfBackgroundPlusHalfForeground = 0, - BackgroundPlusForeground = 1, - BackgroundMinusForeground = 2, - BackgroundPlusQuarterForeground = 3, - - Disabled = 4 // Not a register value - }; - union RenderCommand { u32 bits; @@ -258,12 +265,6 @@ protected: } }; - // 4x4 dither matrix. - static constexpr s32 DITHER_MATRIX[4][4] = {{-4, +0, -3, +1}, // row 0 - {+2, -2, +3, -1}, // row 1 - {-3, +1, -4, +0}, // row 2 - {+4, -1, +2, -2}}; // row 3 - void SoftReset(); // Sets dots per scanline @@ -464,3 +465,5 @@ private: static const GP0CommandHandlerTable s_GP0_command_handler_table; }; + +IMPLEMENT_ENUM_CLASS_BITWISE_OPERATORS(GPU::TextureMode); diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index 71f7bcd19..a0730d3f7 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -163,7 +163,7 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command void GPU_HW::AddDuplicateVertex() { - std::memcpy(m_batch_current_vertex_ptr, m_batch_current_vertex_ptr - 1, sizeof(HWVertex)); + std::memcpy(m_batch_current_vertex_ptr, m_batch_current_vertex_ptr - 1, sizeof(BatchVertex)); m_batch_current_vertex_ptr++; } @@ -175,443 +175,14 @@ void GPU_HW::CalcScissorRect(int* left, int* top, int* right, int* bottom) *bottom = std::max((m_drawing_area.bottom + 1) * m_resolution_scale, *top + 1); } -static void DefineMacro(std::stringstream& ss, const char* name, bool enabled) -{ - if (enabled) - ss << "#define " << name << " 1\n"; - else - ss << "/* #define " << name << " 0 */\n"; -} - -void GPU_HW::GenerateShaderHeader(std::stringstream& ss) -{ - ss << "#version 330 core\n\n"; - ss << "const int RESOLUTION_SCALE = " << m_resolution_scale << ";\n"; - ss << "const ivec2 VRAM_SIZE = ivec2(" << VRAM_WIDTH << ", " << VRAM_HEIGHT << ") * RESOLUTION_SCALE;\n"; - ss << "const vec2 RCP_VRAM_SIZE = vec2(1.0, 1.0) / vec2(VRAM_SIZE);\n"; - ss << R"( - -float fixYCoord(float y) -{ - return 1.0 - RCP_VRAM_SIZE.y - y; -} - -int fixYCoord(int y) -{ - return VRAM_SIZE.y - y - 1; -} - -uint RGBA8ToRGBA5551(vec4 v) -{ - uint r = uint(v.r * 255.0) >> 3; - uint g = uint(v.g * 255.0) >> 3; - uint b = uint(v.b * 255.0) >> 3; - uint a = (v.a != 0.0) ? 1u : 0u; - return (r) | (g << 5) | (b << 10) | (a << 15); -} - -vec4 RGBA5551ToRGBA8(uint v) -{ - uint r = (v & 31u); - uint g = ((v >> 5) & 31u); - uint b = ((v >> 10) & 31u); - uint a = ((v >> 15) & 1u); - - // repeat lower bits - r = (r << 3) | (r & 7u); - g = (g << 3) | (g & 7u); - b = (b << 3) | (b & 7u); - - return vec4(float(r) / 255.0, float(g) / 255.0, float(b) / 255.0, float(a)); -} -)"; -} - -void GPU_HW::GenerateBatchUniformBuffer(std::stringstream& ss) -{ - ss << R"( -uniform UBOBlock { - ivec2 u_pos_offset; - uvec2 u_texture_window_mask; - uvec2 u_texture_window_offset; - float u_src_alpha_factor; - float u_dst_alpha_factor; -}; -)"; -} - -std::string GPU_HW::GenerateVertexShader(bool textured) -{ - std::stringstream ss; - GenerateShaderHeader(ss); - DefineMacro(ss, "TEXTURED", textured); - GenerateBatchUniformBuffer(ss); - - ss << R"( -in ivec2 a_pos; -in vec4 a_col0; -in int a_texcoord; -in int a_texpage; - -out vec3 v_col0; -#if TEXTURED - out vec2 v_tex0; - flat out ivec4 v_texpage; -#endif - -void main() -{ - // 0..+1023 -> -1..1 - float pos_x = (float(a_pos.x + u_pos_offset.x) / 512.0) - 1.0; - float pos_y = (float(a_pos.y + u_pos_offset.y) / -256.0) + 1.0; - gl_Position = vec4(pos_x, pos_y, 0.0, 1.0); - - v_col0 = a_col0.rgb; - #if TEXTURED - v_tex0 = vec2(float(a_texcoord & 0xFFFF), float(a_texcoord >> 16)) / vec2(255.0); - - // base_x,base_y,palette_x,palette_y - v_texpage.x = (a_texpage & 15) * 64 * RESOLUTION_SCALE; - v_texpage.y = ((a_texpage >> 4) & 1) * 256 * RESOLUTION_SCALE; - v_texpage.z = ((a_texpage >> 16) & 63) * 16 * RESOLUTION_SCALE; - v_texpage.w = ((a_texpage >> 22) & 511) * RESOLUTION_SCALE; - #endif -} -)"; - - return ss.str(); -} - -std::string GPU_HW::GenerateFragmentShader(HWBatchRenderMode transparency, TextureMode texture_mode, bool dithering) -{ - const TextureMode actual_texture_mode = - static_cast(static_cast(texture_mode) & ~static_cast(TextureMode::RawTextureBit)); - const bool raw_texture = (static_cast(texture_mode) & static_cast(TextureMode::RawTextureBit)) == - static_cast(TextureMode::RawTextureBit); - - std::stringstream ss; - GenerateShaderHeader(ss); - GenerateBatchUniformBuffer(ss); - DefineMacro(ss, "TRANSPARENCY", transparency != HWBatchRenderMode::TransparencyDisabled); - DefineMacro(ss, "TRANSPARENCY_ONLY_OPAQUE", transparency == HWBatchRenderMode::OnlyOpaque); - DefineMacro(ss, "TRANSPARENCY_ONLY_TRANSPARENCY", transparency == HWBatchRenderMode::OnlyTransparent); - DefineMacro(ss, "TEXTURED", actual_texture_mode != TextureMode::Disabled); - DefineMacro(ss, "PALETTE", - actual_texture_mode == GPU::TextureMode::Palette4Bit || - actual_texture_mode == GPU::TextureMode::Palette8Bit); - DefineMacro(ss, "PALETTE_4_BIT", actual_texture_mode == GPU::TextureMode::Palette4Bit); - DefineMacro(ss, "PALETTE_8_BIT", actual_texture_mode == GPU::TextureMode::Palette8Bit); - DefineMacro(ss, "RAW_TEXTURE", raw_texture); - DefineMacro(ss, "DITHERING", dithering); - DefineMacro(ss, "TRUE_COLOR", m_true_color); - - ss << "const int[16] s_dither_values = int[16]( "; - for (u32 i = 0; i < 16; i++) - { - if (i > 0) - ss << ", "; - ss << DITHER_MATRIX[i / 4][i % 4]; - } - ss << " );\n"; - - ss << R"( -in vec3 v_col0; -#if TEXTURED - in vec2 v_tex0; - flat in ivec4 v_texpage; - uniform sampler2D samp0; -#endif - -out vec4 o_col0; - -ivec3 ApplyDithering(ivec3 icol) -{ - ivec2 fc = (ivec2(gl_FragCoord.xy) / ivec2(RESOLUTION_SCALE, RESOLUTION_SCALE)) & ivec2(3, 3); - int offset = s_dither_values[fc.y * 4 + fc.x]; - return icol + ivec3(offset, offset, offset); -} - -ivec3 TruncateTo15Bit(ivec3 icol) -{ - icol = clamp(icol, ivec3(0, 0, 0), ivec3(255, 255, 255)); - return (icol & ivec3(~7, ~7, ~7)) | ((icol >> 3) & ivec3(7, 7, 7)); -} - -#if TEXTURED -ivec2 ApplyNativeTextureWindow(ivec2 coords) -{ - uint x = (uint(coords.x) & ~(u_texture_window_mask.x * 8u)) | ((u_texture_window_offset.x & u_texture_window_mask.x) * 8u); - uint y = (uint(coords.y) & ~(u_texture_window_mask.y * 8u)) | ((u_texture_window_offset.y & u_texture_window_mask.y) * 8u); - return ivec2(int(x), int(y)); -} - -ivec2 ApplyTextureWindow(ivec2 coords) -{ - if (RESOLUTION_SCALE == 1) - return ApplyNativeTextureWindow(coords); - - ivec2 downscaled_coords = coords / ivec2(RESOLUTION_SCALE); - ivec2 coords_offset = coords % ivec2(RESOLUTION_SCALE); - return (ApplyNativeTextureWindow(downscaled_coords) * ivec2(RESOLUTION_SCALE)) + coords_offset; -} - -ivec4 SampleFromVRAM(vec2 coord) -{ - // from 0..1 to 0..255 - ivec2 icoord = ivec2(coord * vec2(255 * RESOLUTION_SCALE)); - icoord = ApplyTextureWindow(icoord); - - // adjust for tightly packed palette formats - ivec2 index_coord = icoord; - #if PALETTE_4_BIT - index_coord.x /= 4; - #elif PALETTE_8_BIT - index_coord.x /= 2; - #endif - - // fixup coords - ivec2 vicoord = ivec2(v_texpage.x + index_coord.x, fixYCoord(v_texpage.y + index_coord.y)); - - // load colour/palette - vec4 color = texelFetch(samp0, vicoord, 0); - - // apply palette - #if PALETTE - #if PALETTE_4_BIT - int subpixel = int(icoord.x / RESOLUTION_SCALE) & 3; - uint vram_value = RGBA8ToRGBA5551(color); - int palette_index = int((vram_value >> (subpixel * 4)) & 0x0Fu); - #elif PALETTE_8_BIT - int subpixel = int(icoord.x / RESOLUTION_SCALE) & 1; - uint vram_value = RGBA8ToRGBA5551(color); - int palette_index = int((vram_value >> (subpixel * 8)) & 0xFFu); - #endif - ivec2 palette_icoord = ivec2(v_texpage.z + (palette_index * RESOLUTION_SCALE), fixYCoord(v_texpage.w)); - color = texelFetch(samp0, palette_icoord, 0); - #endif - - return ivec4(color * vec4(255.0, 255.0, 255.0, 255.0)); -} -#endif - -void main() -{ - ivec3 vertcol = ivec3(v_col0 * vec3(255.0, 255.0, 255.0)); - - bool semitransparent; - bool new_mask_bit; - ivec3 icolor; - - #if TEXTURED - ivec4 texcol = SampleFromVRAM(v_tex0); - if (texcol == ivec4(0.0, 0.0, 0.0, 0.0)) - discard; - - // Grab semitransparent bit from the texture color. - semitransparent = (texcol.a != 0); - - #if RAW_TEXTURE - icolor = texcol.rgb; - #else - icolor = (vertcol * texcol.rgb) >> 7; - #endif - #else - // All pixels are semitransparent for untextured polygons. - semitransparent = true; - icolor = vertcol; - #endif - - // Apply dithering - #if DITHERING - icolor = ApplyDithering(icolor); - #endif - - // Clip to 15-bit range - #if !TRUE_COLOR - icolor = TruncateTo15Bit(icolor); - #endif - - // Normalize - vec3 color = vec3(icolor) / vec3(255.0, 255.0, 255.0); - - #if TRANSPARENCY - // Apply semitransparency. If not a semitransparent texel, destination alpha is ignored. - if (semitransparent) - { - #if TRANSPARENCY_ONLY_OPAQUE - discard; - #endif - o_col0 = vec4(color * u_src_alpha_factor, u_dst_alpha_factor); - } - else - { - #if TRANSPARENCY_ONLY_TRANSPARENCY - discard; - #endif - o_col0 = vec4(color, 0.0); - } - #else - o_col0 = vec4(color, 0.0); - #endif -} -)"; - - return ss.str(); -} - -std::string GPU_HW::GenerateScreenQuadVertexShader() -{ - std::stringstream ss; - GenerateShaderHeader(ss); - ss << R"( - -out vec2 v_tex0; - -void main() -{ - v_tex0 = vec2(float((gl_VertexID << 1) & 2), float(gl_VertexID & 2)); - gl_Position = vec4(v_tex0 * vec2(2.0f, -2.0f) + vec2(-1.0f, 1.0f), 0.0f, 1.0f); - gl_Position.y = -gl_Position.y; -} -)"; - - return ss.str(); -} - -std::string GPU_HW::GenerateFillFragmentShader() -{ - std::stringstream ss; - GenerateShaderHeader(ss); - - ss << R"( -uniform vec4 fill_color; -out vec4 o_col0; - -void main() -{ - o_col0 = fill_color; -} -)"; - - return ss.str(); -} - -std::string GPU_HW::GenerateDisplayFragmentShader(bool depth_24bit, bool interlaced) -{ - std::stringstream ss; - GenerateShaderHeader(ss); - DefineMacro(ss, "DEPTH_24BIT", depth_24bit); - DefineMacro(ss, "INTERLACED", interlaced); - - ss << R"( -in vec2 v_tex0; -out vec4 o_col0; - -uniform sampler2D samp0; -uniform ivec3 u_base_coords; - -ivec2 GetCoords(vec2 fragcoord) -{ - ivec2 icoords = ivec2(fragcoord); - #if INTERLACED - if ((((icoords.y - u_base_coords.z) / RESOLUTION_SCALE) & 1) != 0) - discard; - #endif - return icoords; -} - -void main() -{ - ivec2 icoords = GetCoords(gl_FragCoord.xy); - - #if DEPTH_24BIT - // compute offset in dwords from the start of the 24-bit values - ivec2 base = ivec2(u_base_coords.x, u_base_coords.y + icoords.y); - int xoff = int(icoords.x); - int dword_index = (xoff / 2) + (xoff / 4); - - // sample two adjacent dwords, or four 16-bit values as the 24-bit value will lie somewhere between these - uint s0 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + dword_index * 2 + 0, base.y), 0)); - uint s1 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + dword_index * 2 + 1, base.y), 0)); - uint s2 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + (dword_index + 1) * 2 + 0, base.y), 0)); - uint s3 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + (dword_index + 1) * 2 + 1, base.y), 0)); - - // select the bit for this pixel depending on its offset in the 4-pixel block - uint r, g, b; - int block_offset = xoff & 3; - if (block_offset == 0) - { - r = s0 & 0xFFu; - g = s0 >> 8; - b = s1 & 0xFFu; - } - else if (block_offset == 1) - { - r = s1 >> 8; - g = s2 & 0xFFu; - b = s2 >> 8; - } - else if (block_offset == 2) - { - r = s1 & 0xFFu; - g = s1 >> 8; - b = s2 & 0xFFu; - } - else - { - r = s2 >> 8; - g = s3 & 0xFFu; - b = s3 >> 8; - } - - // and normalize - o_col0 = vec4(float(r) / 255.0, float(g) / 255.0, float(b) / 255.0, 1.0); - #else - // load and return - o_col0 = texelFetch(samp0, u_base_coords.xy + icoords, 0); - #endif -} -)"; - - return ss.str(); -} - -std::string GPU_HW::GenerateVRAMWriteFragmentShader() -{ - std::stringstream ss; - GenerateShaderHeader(ss); - - ss << R"( - -uniform ivec2 u_base_coords; -uniform ivec2 u_size; -uniform usamplerBuffer samp0; - -out vec4 o_col0; - -void main() -{ - ivec2 coords = ivec2(gl_FragCoord.xy) / ivec2(RESOLUTION_SCALE, RESOLUTION_SCALE); - ivec2 offset = coords - u_base_coords; - offset.y = u_size.y - offset.y - 1; - - int buffer_offset = offset.y * u_size.x + offset.x; - uint value = texelFetch(samp0, buffer_offset).r; - - o_col0 = RGBA5551ToRGBA8(value); -})"; - - return ss.str(); -} - -GPU_HW::HWPrimitive GPU_HW::GetPrimitiveForCommand(RenderCommand rc) +GPU_HW::BatchPrimitive GPU_HW::GetPrimitiveForCommand(RenderCommand rc) { if (rc.primitive == Primitive::Line) - return rc.polyline ? HWPrimitive::LineStrip : HWPrimitive::Lines; + return rc.polyline ? BatchPrimitive::LineStrip : BatchPrimitive::Lines; else if ((rc.primitive == Primitive::Polygon && rc.quad_polygon) || rc.primitive == Primitive::Rectangle) - return HWPrimitive::TriangleStrip; + return BatchPrimitive::TriangleStrip; else - return HWPrimitive::Triangles; + return BatchPrimitive::Triangles; } void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32* command_ptr) @@ -687,13 +258,13 @@ void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32 // has any state changed which requires a new batch? const TransparencyMode transparency_mode = rc.transparency_enable ? m_render_state.transparency_mode : TransparencyMode::Disabled; - const HWPrimitive rc_primitive = GetPrimitiveForCommand(rc); + const BatchPrimitive rc_primitive = GetPrimitiveForCommand(rc); const bool dithering_enable = (!m_true_color && rc.IsDitheringEnabled()) ? m_GPUSTAT.dither_enable : false; const u32 max_added_vertices = num_vertices + 2; if (!IsFlushed()) { const bool buffer_overflow = GetBatchVertexSpace() < max_added_vertices; - if (buffer_overflow || rc_primitive == HWPrimitive::LineStrip || m_batch.texture_mode != texture_mode || + if (buffer_overflow || rc_primitive == BatchPrimitive::LineStrip || m_batch.texture_mode != texture_mode || m_batch.transparency_mode != transparency_mode || m_batch.primitive != rc_primitive || dithering_enable != m_batch.dithering || m_render_state.IsTextureWindowChanged()) { diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index 5af1a5242..b64e46b31 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -8,6 +8,22 @@ class GPU_HW : public GPU { public: + enum class BatchPrimitive : u8 + { + Lines = 0, + LineStrip = 1, + Triangles = 2, + TriangleStrip = 3 + }; + + enum class BatchRenderMode : u8 + { + TransparencyDisabled, + TransparentAndOpaque, + OnlyOpaque, + OnlyTransparent + }; + GPU_HW(); virtual ~GPU_HW(); @@ -16,23 +32,7 @@ public: virtual void UpdateSettings() override; protected: - enum class HWPrimitive : u8 - { - Lines = 0, - LineStrip = 1, - Triangles = 2, - TriangleStrip = 3 - }; - - enum class HWBatchRenderMode : u8 - { - TransparencyDisabled, - TransparentAndOpaque, - OnlyOpaque, - OnlyTransparent - }; - - struct HWVertex + struct BatchVertex { s32 x; s32 y; @@ -55,9 +55,9 @@ protected: } }; - struct HWBatchConfig + struct BatchConfig { - HWPrimitive primitive; + BatchPrimitive primitive; TextureMode texture_mode; TransparencyMode transparency_mode; bool dithering; @@ -71,14 +71,14 @@ protected: } // Returns the render mode for this batch. - HWBatchRenderMode GetRenderMode() const + BatchRenderMode GetRenderMode() const { - return transparency_mode == TransparencyMode::Disabled ? HWBatchRenderMode::TransparencyDisabled : - HWBatchRenderMode::TransparentAndOpaque; + return transparency_mode == TransparencyMode::Disabled ? BatchRenderMode::TransparencyDisabled : + BatchRenderMode::TransparentAndOpaque; } }; - struct HWBatchUBOData + struct BatchUBOData { s32 u_pos_offset[2]; u32 u_texture_window_mask[2]; @@ -90,7 +90,7 @@ protected: static constexpr u32 VRAM_UPDATE_TEXTURE_BUFFER_SIZE = VRAM_WIDTH * VRAM_HEIGHT * sizeof(u32); static constexpr u32 VERTEX_BUFFER_SIZE = 1 * 1024 * 1024; static constexpr u32 MIN_BATCH_VERTEX_COUNT = 6; - static constexpr u32 MAX_BATCH_VERTEX_COUNT = VERTEX_BUFFER_SIZE / sizeof(HWVertex); + static constexpr u32 MAX_BATCH_VERTEX_COUNT = VERTEX_BUFFER_SIZE / sizeof(BatchVertex); static constexpr u32 UNIFORM_BUFFER_SIZE = 512 * 1024; static constexpr std::tuple RGBA8ToFloat(u32 rgba) @@ -121,31 +121,21 @@ protected: return std::make_tuple(x * s32(m_resolution_scale), y * s32(m_resolution_scale)); } - std::string GenerateVertexShader(bool textured); - std::string GenerateFragmentShader(HWBatchRenderMode transparency, TextureMode texture_mode, bool dithering); - std::string GenerateScreenQuadVertexShader(); - std::string GenerateFillFragmentShader(); - std::string GenerateDisplayFragmentShader(bool depth_24bit, bool interlaced); - std::string GenerateVRAMWriteFragmentShader(); - - HWVertex* m_batch_start_vertex_ptr = nullptr; - HWVertex* m_batch_end_vertex_ptr = nullptr; - HWVertex* m_batch_current_vertex_ptr = nullptr; + BatchVertex* m_batch_start_vertex_ptr = nullptr; + BatchVertex* m_batch_end_vertex_ptr = nullptr; + BatchVertex* m_batch_current_vertex_ptr = nullptr; u32 m_batch_base_vertex = 0; u32 m_resolution_scale = 1; u32 m_max_resolution_scale = 1; bool m_true_color = false; - HWBatchConfig m_batch = {}; - HWBatchUBOData m_batch_ubo_data = {}; + BatchConfig m_batch = {}; + BatchUBOData m_batch_ubo_data = {}; bool m_batch_ubo_dirty = true; private: - static HWPrimitive GetPrimitiveForCommand(RenderCommand rc); - - void GenerateShaderHeader(std::stringstream& ss); - void GenerateBatchUniformBuffer(std::stringstream& ss); + static BatchPrimitive GetPrimitiveForCommand(RenderCommand rc); void LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command_ptr); void AddDuplicateVertex(); diff --git a/src/core/gpu_hw_opengl.cpp b/src/core/gpu_hw_opengl.cpp index 5548616d1..5b5161b30 100644 --- a/src/core/gpu_hw_opengl.cpp +++ b/src/core/gpu_hw_opengl.cpp @@ -2,6 +2,7 @@ #include "YBaseLib/Assert.h" #include "YBaseLib/Log.h" #include "YBaseLib/String.h" +#include "gpu_hw_shadergen.h" #include "host_interface.h" #include "imgui.h" #include "system.h" @@ -138,9 +139,9 @@ void GPU_HW_OpenGL::MapBatchVertexPointer(u32 required_vertices) Assert(!m_batch_start_vertex_ptr); const GL::StreamBuffer::MappingResult res = - m_vertex_stream_buffer->Map(sizeof(HWVertex), required_vertices * sizeof(HWVertex)); + m_vertex_stream_buffer->Map(sizeof(BatchVertex), required_vertices * sizeof(BatchVertex)); - m_batch_start_vertex_ptr = static_cast(res.pointer); + m_batch_start_vertex_ptr = static_cast(res.pointer); m_batch_current_vertex_ptr = m_batch_start_vertex_ptr; m_batch_end_vertex_ptr = m_batch_start_vertex_ptr + res.space_aligned; m_batch_base_vertex = res.index_aligned; @@ -246,11 +247,11 @@ void GPU_HW_OpenGL::CreateVertexBuffer() glEnableVertexAttribArray(1); glEnableVertexAttribArray(2); glEnableVertexAttribArray(3); - glVertexAttribIPointer(0, 2, GL_INT, sizeof(HWVertex), reinterpret_cast(offsetof(HWVertex, x))); - glVertexAttribPointer(1, 4, GL_UNSIGNED_BYTE, true, sizeof(HWVertex), - reinterpret_cast(offsetof(HWVertex, color))); - glVertexAttribIPointer(2, 2, GL_INT, sizeof(HWVertex), reinterpret_cast(offsetof(HWVertex, texcoord))); - glVertexAttribIPointer(3, 1, GL_INT, sizeof(HWVertex), reinterpret_cast(offsetof(HWVertex, texpage))); + glVertexAttribIPointer(0, 2, GL_INT, sizeof(BatchVertex), reinterpret_cast(offsetof(BatchVertex, x))); + glVertexAttribPointer(1, 4, GL_UNSIGNED_BYTE, true, sizeof(BatchVertex), + reinterpret_cast(offsetof(BatchVertex, color))); + glVertexAttribIPointer(2, 2, GL_INT, sizeof(BatchVertex), reinterpret_cast(offsetof(BatchVertex, texcoord))); + glVertexAttribIPointer(3, 1, GL_INT, sizeof(BatchVertex), reinterpret_cast(offsetof(BatchVertex, texpage))); glBindVertexArray(0); glGenVertexArrays(1, &m_attributeless_vao_id); @@ -280,31 +281,56 @@ void GPU_HW_OpenGL::CreateTextureBuffer() bool GPU_HW_OpenGL::CompilePrograms() { + GPU_HW_ShaderGen shadergen(GPU_HW_ShaderGen::Backend::OpenGL, m_resolution_scale, m_true_color); + for (u32 render_mode = 0; render_mode < 4; render_mode++) { for (u32 texture_mode = 0; texture_mode < 9; texture_mode++) { for (u8 dithering = 0; dithering < 2; dithering++) { - if (!CompileProgram(m_render_programs[render_mode][texture_mode][dithering], - static_cast(render_mode), static_cast(texture_mode), - ConvertToBoolUnchecked(dithering))) - { + const bool textured = (static_cast(texture_mode) != TextureMode::Disabled); + const std::string vs = shadergen.GenerateBatchVertexShader(textured); + const std::string fs = shadergen.GenerateBatchFragmentShader(static_cast(render_mode), + static_cast(texture_mode), + ConvertToBoolUnchecked(dithering)); + + GL::Program& prog = m_render_programs[render_mode][texture_mode][dithering]; + if (!prog.Compile(vs, fs)) return false; + + prog.BindAttribute(0, "a_pos"); + prog.BindAttribute(1, "a_col0"); + if (textured) + { + prog.BindAttribute(2, "a_texcoord"); + prog.BindAttribute(3, "a_texpage"); + } + + prog.BindFragData(0, "o_col0"); + + if (!prog.Link()) + return false; + + prog.BindUniformBlock("UBOBlock", 1); + if (textured) + { + prog.Bind(); + prog.RegisterUniform("samp0"); + prog.Uniform1i(0, 0); } } } } - // TODO: Use string_view for (u8 depth_24bit = 0; depth_24bit < 2; depth_24bit++) { for (u8 interlaced = 0; interlaced < 2; interlaced++) { GL::Program& prog = m_display_programs[depth_24bit][interlaced]; - const std::string vs = GenerateScreenQuadVertexShader(); - const std::string fs = - GenerateDisplayFragmentShader(ConvertToBoolUnchecked(depth_24bit), ConvertToBoolUnchecked(interlaced)); + const std::string vs = shadergen.GenerateScreenQuadVertexShader(); + const std::string fs = shadergen.GenerateDisplayFragmentShader(ConvertToBoolUnchecked(depth_24bit), + ConvertToBoolUnchecked(interlaced)); if (!prog.Compile(vs, fs)) return false; @@ -319,8 +345,11 @@ bool GPU_HW_OpenGL::CompilePrograms() } } - if (!m_vram_write_program.Compile(GenerateScreenQuadVertexShader(), GenerateVRAMWriteFragmentShader())) + if (!m_vram_write_program.Compile(shadergen.GenerateScreenQuadVertexShader(), + shadergen.GenerateVRAMWriteFragmentShader())) + { return false; + } m_vram_write_program.BindFragData(0, "o_col0"); if (!m_vram_write_program.Link()) @@ -335,41 +364,7 @@ bool GPU_HW_OpenGL::CompilePrograms() return true; } -bool GPU_HW_OpenGL::CompileProgram(GL::Program& prog, HWBatchRenderMode render_mode, TextureMode texture_mode, - bool dithering) -{ - const bool textured = texture_mode != TextureMode::Disabled; - const std::string vs = GenerateVertexShader(textured); - const std::string fs = GenerateFragmentShader(render_mode, texture_mode, dithering); - if (!prog.Compile(vs, fs)) - return false; - - prog.BindAttribute(0, "a_pos"); - prog.BindAttribute(1, "a_col0"); - if (textured) - { - prog.BindAttribute(2, "a_texcoord"); - prog.BindAttribute(3, "a_texpage"); - } - - prog.BindFragData(0, "o_col0"); - - if (!prog.Link()) - return false; - - prog.BindUniformBlock("UBOBlock", 1); - - if (textured) - { - prog.Bind(); - prog.RegisterUniform("samp0"); - prog.Uniform1i(0, 0); - } - - return true; -} - -void GPU_HW_OpenGL::SetDrawState(HWBatchRenderMode render_mode) +void GPU_HW_OpenGL::SetDrawState(BatchRenderMode render_mode) { const GL::Program& prog = m_render_programs[static_cast(render_mode)][static_cast(m_batch.texture_mode)] [BoolToUInt8(m_batch.dithering)]; @@ -378,7 +373,7 @@ void GPU_HW_OpenGL::SetDrawState(HWBatchRenderMode render_mode) if (m_batch.texture_mode != TextureMode::Disabled) m_vram_read_texture->Bind(); - if (m_batch.transparency_mode == TransparencyMode::Disabled || render_mode == HWBatchRenderMode::OnlyOpaque) + if (m_batch.transparency_mode == TransparencyMode::Disabled || render_mode == BatchRenderMode::OnlyOpaque) { glDisable(GL_BLEND); } @@ -732,7 +727,7 @@ void GPU_HW_OpenGL::FlushRender() m_stats.num_batches++; m_stats.num_vertices += vertex_count; - m_vertex_stream_buffer->Unmap(vertex_count * sizeof(HWVertex)); + m_vertex_stream_buffer->Unmap(vertex_count * sizeof(BatchVertex)); m_vertex_stream_buffer->Bind(); m_batch_start_vertex_ptr = nullptr; m_batch_end_vertex_ptr = nullptr; @@ -742,9 +737,9 @@ void GPU_HW_OpenGL::FlushRender() if (m_batch.NeedsTwoPassRendering()) { - SetDrawState(HWBatchRenderMode::OnlyTransparent); + SetDrawState(BatchRenderMode::OnlyTransparent); glDrawArrays(gl_primitives[static_cast(m_batch.primitive)], 0, vertex_count); - SetDrawState(HWBatchRenderMode::OnlyOpaque); + SetDrawState(BatchRenderMode::OnlyOpaque); glDrawArrays(gl_primitives[static_cast(m_batch.primitive)], 0, vertex_count); } else diff --git a/src/core/gpu_hw_opengl.h b/src/core/gpu_hw_opengl.h index b9cac7ac8..1522e15b5 100644 --- a/src/core/gpu_hw_opengl.h +++ b/src/core/gpu_hw_opengl.h @@ -58,8 +58,7 @@ private: void CreateTextureBuffer(); bool CompilePrograms(); - bool CompileProgram(GL::Program& prog, HWBatchRenderMode render_mode, TextureMode texture_mode, bool dithering); - void SetDrawState(HWBatchRenderMode render_mode); + void SetDrawState(BatchRenderMode render_mode); void UploadUniformBlock(const void* data, u32 data_size); // downsample texture - used for readbacks at >1xIR. diff --git a/src/core/gpu_hw_shadergen.cpp b/src/core/gpu_hw_shadergen.cpp new file mode 100644 index 000000000..ab5f15774 --- /dev/null +++ b/src/core/gpu_hw_shadergen.cpp @@ -0,0 +1,436 @@ +#include "gpu_hw_shadergen.h" + +GPU_HW_ShaderGen::GPU_HW_ShaderGen(Backend backend, u32 resolution_scale, bool true_color) + : m_backend(backend), m_resolution_scale(resolution_scale), m_true_color(true_color) +{ +} + +GPU_HW_ShaderGen::~GPU_HW_ShaderGen() = default; + +static void DefineMacro(std::stringstream& ss, const char* name, bool enabled) +{ + if (enabled) + ss << "#define " << name << " 1\n"; + else + ss << "/* #define " << name << " 0 */\n"; +} + +void GPU_HW_ShaderGen::GenerateShaderHeader(std::stringstream& ss) +{ + ss << "#version 330 core\n\n"; + ss << "const int RESOLUTION_SCALE = " << m_resolution_scale << ";\n"; + ss << "const ivec2 VRAM_SIZE = ivec2(" << GPU::VRAM_WIDTH << ", " << GPU::VRAM_HEIGHT << ") * RESOLUTION_SCALE;\n"; + ss << "const vec2 RCP_VRAM_SIZE = vec2(1.0, 1.0) / vec2(VRAM_SIZE);\n"; + ss << R"( + +float fixYCoord(float y) +{ + return 1.0 - RCP_VRAM_SIZE.y - y; +} + +int fixYCoord(int y) +{ + return VRAM_SIZE.y - y - 1; +} + +uint RGBA8ToRGBA5551(vec4 v) +{ + uint r = uint(v.r * 255.0) >> 3; + uint g = uint(v.g * 255.0) >> 3; + uint b = uint(v.b * 255.0) >> 3; + uint a = (v.a != 0.0) ? 1u : 0u; + return (r) | (g << 5) | (b << 10) | (a << 15); +} + +vec4 RGBA5551ToRGBA8(uint v) +{ + uint r = (v & 31u); + uint g = ((v >> 5) & 31u); + uint b = ((v >> 10) & 31u); + uint a = ((v >> 15) & 1u); + + // repeat lower bits + r = (r << 3) | (r & 7u); + g = (g << 3) | (g & 7u); + b = (b << 3) | (b & 7u); + + return vec4(float(r) / 255.0, float(g) / 255.0, float(b) / 255.0, float(a)); +} +)"; +} + +void GPU_HW_ShaderGen::GenerateBatchUniformBuffer(std::stringstream& ss) +{ + ss << R"( +uniform UBOBlock { + ivec2 u_pos_offset; + uvec2 u_texture_window_mask; + uvec2 u_texture_window_offset; + float u_src_alpha_factor; + float u_dst_alpha_factor; +}; +)"; +} + +std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured) +{ + std::stringstream ss; + GenerateShaderHeader(ss); + DefineMacro(ss, "TEXTURED", textured); + GenerateBatchUniformBuffer(ss); + + ss << R"( +in ivec2 a_pos; +in vec4 a_col0; +in int a_texcoord; +in int a_texpage; + +out vec3 v_col0; +#if TEXTURED + out vec2 v_tex0; + flat out ivec4 v_texpage; +#endif + +void main() +{ + // 0..+1023 -> -1..1 + float pos_x = (float(a_pos.x + u_pos_offset.x) / 512.0) - 1.0; + float pos_y = (float(a_pos.y + u_pos_offset.y) / -256.0) + 1.0; + gl_Position = vec4(pos_x, pos_y, 0.0, 1.0); + + v_col0 = a_col0.rgb; + #if TEXTURED + v_tex0 = vec2(float(a_texcoord & 0xFFFF), float(a_texcoord >> 16)) / vec2(255.0); + + // base_x,base_y,palette_x,palette_y + v_texpage.x = (a_texpage & 15) * 64 * RESOLUTION_SCALE; + v_texpage.y = ((a_texpage >> 4) & 1) * 256 * RESOLUTION_SCALE; + v_texpage.z = ((a_texpage >> 16) & 63) * 16 * RESOLUTION_SCALE; + v_texpage.w = ((a_texpage >> 22) & 511) * RESOLUTION_SCALE; + #endif +} +)"; + + return ss.str(); +} + +std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMode transparency, + GPU::TextureMode texture_mode, bool dithering) +{ + const GPU::TextureMode actual_texture_mode = texture_mode & ~GPU::TextureMode::RawTextureBit; + const bool raw_texture = (texture_mode & GPU::TextureMode::RawTextureBit) == GPU::TextureMode::RawTextureBit; + + std::stringstream ss; + GenerateShaderHeader(ss); + GenerateBatchUniformBuffer(ss); + DefineMacro(ss, "TRANSPARENCY", transparency != GPU_HW::BatchRenderMode::TransparencyDisabled); + DefineMacro(ss, "TRANSPARENCY_ONLY_OPAQUE", transparency == GPU_HW::BatchRenderMode::OnlyOpaque); + DefineMacro(ss, "TRANSPARENCY_ONLY_TRANSPARENCY", transparency == GPU_HW::BatchRenderMode::OnlyTransparent); + DefineMacro(ss, "TEXTURED", actual_texture_mode != GPU::TextureMode::Disabled); + DefineMacro(ss, "PALETTE", + actual_texture_mode == GPU::TextureMode::Palette4Bit || + actual_texture_mode == GPU::TextureMode::Palette8Bit); + DefineMacro(ss, "PALETTE_4_BIT", actual_texture_mode == GPU::TextureMode::Palette4Bit); + DefineMacro(ss, "PALETTE_8_BIT", actual_texture_mode == GPU::TextureMode::Palette8Bit); + DefineMacro(ss, "RAW_TEXTURE", raw_texture); + DefineMacro(ss, "DITHERING", dithering); + DefineMacro(ss, "TRUE_COLOR", m_true_color); + + ss << "const int[16] s_dither_values = int[16]( "; + for (u32 i = 0; i < 16; i++) + { + if (i > 0) + ss << ", "; + ss << GPU::DITHER_MATRIX[i / 4][i % 4]; + } + ss << " );\n"; + + ss << R"( +in vec3 v_col0; +#if TEXTURED + in vec2 v_tex0; + flat in ivec4 v_texpage; + uniform sampler2D samp0; +#endif + +out vec4 o_col0; + +ivec3 ApplyDithering(ivec3 icol) +{ + ivec2 fc = (ivec2(gl_FragCoord.xy) / ivec2(RESOLUTION_SCALE, RESOLUTION_SCALE)) & ivec2(3, 3); + int offset = s_dither_values[fc.y * 4 + fc.x]; + return icol + ivec3(offset, offset, offset); +} + +ivec3 TruncateTo15Bit(ivec3 icol) +{ + icol = clamp(icol, ivec3(0, 0, 0), ivec3(255, 255, 255)); + return (icol & ivec3(~7, ~7, ~7)) | ((icol >> 3) & ivec3(7, 7, 7)); +} + +#if TEXTURED +ivec2 ApplyNativeTextureWindow(ivec2 coords) +{ + uint x = (uint(coords.x) & ~(u_texture_window_mask.x * 8u)) | ((u_texture_window_offset.x & u_texture_window_mask.x) * 8u); + uint y = (uint(coords.y) & ~(u_texture_window_mask.y * 8u)) | ((u_texture_window_offset.y & u_texture_window_mask.y) * 8u); + return ivec2(int(x), int(y)); +} + +ivec2 ApplyTextureWindow(ivec2 coords) +{ + if (RESOLUTION_SCALE == 1) + return ApplyNativeTextureWindow(coords); + + ivec2 downscaled_coords = coords / ivec2(RESOLUTION_SCALE); + ivec2 coords_offset = coords % ivec2(RESOLUTION_SCALE); + return (ApplyNativeTextureWindow(downscaled_coords) * ivec2(RESOLUTION_SCALE)) + coords_offset; +} + +ivec4 SampleFromVRAM(vec2 coord) +{ + // from 0..1 to 0..255 + ivec2 icoord = ivec2(coord * vec2(255 * RESOLUTION_SCALE)); + icoord = ApplyTextureWindow(icoord); + + // adjust for tightly packed palette formats + ivec2 index_coord = icoord; + #if PALETTE_4_BIT + index_coord.x /= 4; + #elif PALETTE_8_BIT + index_coord.x /= 2; + #endif + + // fixup coords + ivec2 vicoord = ivec2(v_texpage.x + index_coord.x, fixYCoord(v_texpage.y + index_coord.y)); + + // load colour/palette + vec4 color = texelFetch(samp0, vicoord, 0); + + // apply palette + #if PALETTE + #if PALETTE_4_BIT + int subpixel = int(icoord.x / RESOLUTION_SCALE) & 3; + uint vram_value = RGBA8ToRGBA5551(color); + int palette_index = int((vram_value >> (subpixel * 4)) & 0x0Fu); + #elif PALETTE_8_BIT + int subpixel = int(icoord.x / RESOLUTION_SCALE) & 1; + uint vram_value = RGBA8ToRGBA5551(color); + int palette_index = int((vram_value >> (subpixel * 8)) & 0xFFu); + #endif + ivec2 palette_icoord = ivec2(v_texpage.z + (palette_index * RESOLUTION_SCALE), fixYCoord(v_texpage.w)); + color = texelFetch(samp0, palette_icoord, 0); + #endif + + return ivec4(color * vec4(255.0, 255.0, 255.0, 255.0)); +} +#endif + +void main() +{ + ivec3 vertcol = ivec3(v_col0 * vec3(255.0, 255.0, 255.0)); + + bool semitransparent; + bool new_mask_bit; + ivec3 icolor; + + #if TEXTURED + ivec4 texcol = SampleFromVRAM(v_tex0); + if (texcol == ivec4(0.0, 0.0, 0.0, 0.0)) + discard; + + // Grab semitransparent bit from the texture color. + semitransparent = (texcol.a != 0); + + #if RAW_TEXTURE + icolor = texcol.rgb; + #else + icolor = (vertcol * texcol.rgb) >> 7; + #endif + #else + // All pixels are semitransparent for untextured polygons. + semitransparent = true; + icolor = vertcol; + #endif + + // Apply dithering + #if DITHERING + icolor = ApplyDithering(icolor); + #endif + + // Clip to 15-bit range + #if !TRUE_COLOR + icolor = TruncateTo15Bit(icolor); + #endif + + // Normalize + vec3 color = vec3(icolor) / vec3(255.0, 255.0, 255.0); + + #if TRANSPARENCY + // Apply semitransparency. If not a semitransparent texel, destination alpha is ignored. + if (semitransparent) + { + #if TRANSPARENCY_ONLY_OPAQUE + discard; + #endif + o_col0 = vec4(color * u_src_alpha_factor, u_dst_alpha_factor); + } + else + { + #if TRANSPARENCY_ONLY_TRANSPARENCY + discard; + #endif + o_col0 = vec4(color, 0.0); + } + #else + o_col0 = vec4(color, 0.0); + #endif +} +)"; + + return ss.str(); +} + +std::string GPU_HW_ShaderGen::GenerateScreenQuadVertexShader() +{ + std::stringstream ss; + GenerateShaderHeader(ss); + ss << R"( + +out vec2 v_tex0; + +void main() +{ + v_tex0 = vec2(float((gl_VertexID << 1) & 2), float(gl_VertexID & 2)); + gl_Position = vec4(v_tex0 * vec2(2.0f, -2.0f) + vec2(-1.0f, 1.0f), 0.0f, 1.0f); + gl_Position.y = -gl_Position.y; +} +)"; + + return ss.str(); +} + +std::string GPU_HW_ShaderGen::GenerateFillFragmentShader() +{ + std::stringstream ss; + GenerateShaderHeader(ss); + + ss << R"( +uniform vec4 fill_color; +out vec4 o_col0; + +void main() +{ + o_col0 = fill_color; +} +)"; + + return ss.str(); +} + +std::string GPU_HW_ShaderGen::GenerateDisplayFragmentShader(bool depth_24bit, bool interlaced) +{ + std::stringstream ss; + GenerateShaderHeader(ss); + DefineMacro(ss, "DEPTH_24BIT", depth_24bit); + DefineMacro(ss, "INTERLACED", interlaced); + + ss << R"( +in vec2 v_tex0; +out vec4 o_col0; + +uniform sampler2D samp0; +uniform ivec3 u_base_coords; + +ivec2 GetCoords(vec2 fragcoord) +{ + ivec2 icoords = ivec2(fragcoord); + #if INTERLACED + if ((((icoords.y - u_base_coords.z) / RESOLUTION_SCALE) & 1) != 0) + discard; + #endif + return icoords; +} + +void main() +{ + ivec2 icoords = GetCoords(gl_FragCoord.xy); + + #if DEPTH_24BIT + // compute offset in dwords from the start of the 24-bit values + ivec2 base = ivec2(u_base_coords.x, u_base_coords.y + icoords.y); + int xoff = int(icoords.x); + int dword_index = (xoff / 2) + (xoff / 4); + + // sample two adjacent dwords, or four 16-bit values as the 24-bit value will lie somewhere between these + uint s0 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + dword_index * 2 + 0, base.y), 0)); + uint s1 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + dword_index * 2 + 1, base.y), 0)); + uint s2 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + (dword_index + 1) * 2 + 0, base.y), 0)); + uint s3 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + (dword_index + 1) * 2 + 1, base.y), 0)); + + // select the bit for this pixel depending on its offset in the 4-pixel block + uint r, g, b; + int block_offset = xoff & 3; + if (block_offset == 0) + { + r = s0 & 0xFFu; + g = s0 >> 8; + b = s1 & 0xFFu; + } + else if (block_offset == 1) + { + r = s1 >> 8; + g = s2 & 0xFFu; + b = s2 >> 8; + } + else if (block_offset == 2) + { + r = s1 & 0xFFu; + g = s1 >> 8; + b = s2 & 0xFFu; + } + else + { + r = s2 >> 8; + g = s3 & 0xFFu; + b = s3 >> 8; + } + + // and normalize + o_col0 = vec4(float(r) / 255.0, float(g) / 255.0, float(b) / 255.0, 1.0); + #else + // load and return + o_col0 = texelFetch(samp0, u_base_coords.xy + icoords, 0); + #endif +} +)"; + + return ss.str(); +} + +std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader() +{ + std::stringstream ss; + GenerateShaderHeader(ss); + + ss << R"( + +uniform ivec2 u_base_coords; +uniform ivec2 u_size; +uniform usamplerBuffer samp0; + +out vec4 o_col0; + +void main() +{ + ivec2 coords = ivec2(gl_FragCoord.xy) / ivec2(RESOLUTION_SCALE, RESOLUTION_SCALE); + ivec2 offset = coords - u_base_coords; + offset.y = u_size.y - offset.y - 1; + + int buffer_offset = offset.y * u_size.x + offset.x; + uint value = texelFetch(samp0, buffer_offset).r; + + o_col0 = RGBA5551ToRGBA8(value); +})"; + + return ss.str(); +} diff --git a/src/core/gpu_hw_shadergen.h b/src/core/gpu_hw_shadergen.h new file mode 100644 index 000000000..72e97381b --- /dev/null +++ b/src/core/gpu_hw_shadergen.h @@ -0,0 +1,34 @@ +#pragma once +#include +#include +#include "gpu_hw.h" + +class GPU_HW_ShaderGen +{ +public: + enum class Backend + { + OpenGL + }; + +public: + GPU_HW_ShaderGen(Backend backend, u32 resolution_scale, bool true_color); + ~GPU_HW_ShaderGen(); + + void Init(Backend backend, u32 resolution_scale, bool true_color); + + std::string GenerateBatchVertexShader(bool textured); + std::string GenerateBatchFragmentShader(GPU_HW::BatchRenderMode transparency, GPU::TextureMode texture_mode, bool dithering); + std::string GenerateScreenQuadVertexShader(); + std::string GenerateFillFragmentShader(); + std::string GenerateDisplayFragmentShader(bool depth_24bit, bool interlaced); + std::string GenerateVRAMWriteFragmentShader(); + + Backend m_backend; + u32 m_resolution_scale; + bool m_true_color; + +private: + void GenerateShaderHeader(std::stringstream& ss); + void GenerateBatchUniformBuffer(std::stringstream& ss); +};