diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 36cd5a524..2bdc8e9ee 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -21,6 +21,8 @@ add_library(core
gpu_hw.h
gpu_hw_opengl.cpp
gpu_hw_opengl.h
+ gpu_hw_shadergen.cpp
+ gpu_hw_shadergen.h
gpu_sw.cpp
gpu_sw.h
gte.cpp
diff --git a/src/core/core.vcxproj b/src/core/core.vcxproj
index c2bab1929..4497e24df 100644
--- a/src/core/core.vcxproj
+++ b/src/core/core.vcxproj
@@ -41,6 +41,7 @@
+
@@ -64,6 +65,7 @@
+
diff --git a/src/core/core.vcxproj.filters b/src/core/core.vcxproj.filters
index 200a14710..70ddf1b56 100644
--- a/src/core/core.vcxproj.filters
+++ b/src/core/core.vcxproj.filters
@@ -23,6 +23,7 @@
+
@@ -50,6 +51,7 @@
+
diff --git a/src/core/gpu.h b/src/core/gpu.h
index e17ac914d..99170c246 100644
--- a/src/core/gpu.h
+++ b/src/core/gpu.h
@@ -18,6 +18,57 @@ class Timers;
class GPU
{
public:
+ enum class DMADirection : u32
+ {
+ Off = 0,
+ FIFO = 1,
+ CPUtoGP0 = 2,
+ GPUREADtoCPU = 3
+ };
+
+ enum class Primitive : u8
+ {
+ Reserved = 0,
+ Polygon = 1,
+ Line = 2,
+ Rectangle = 3
+ };
+
+ enum class DrawRectangleSize : u8
+ {
+ Variable = 0,
+ R1x1 = 1,
+ R8x8 = 2,
+ R16x16 = 3
+ };
+
+ enum class TextureMode : u8
+ {
+ Palette4Bit = 0,
+ Palette8Bit = 1,
+ Direct16Bit = 2,
+ Reserved_Direct16Bit = 3,
+
+ // Not register values.
+ RawTextureBit = 4,
+ RawPalette4Bit = RawTextureBit | Palette4Bit,
+ RawPalette8Bit = RawTextureBit | Palette8Bit,
+ RawDirect16Bit = RawTextureBit | Direct16Bit,
+ Reserved_RawDirect16Bit = RawTextureBit | Reserved_Direct16Bit,
+
+ Disabled = 8 // Not a register value
+ };
+
+ enum class TransparencyMode : u8
+ {
+ HalfBackgroundPlusHalfForeground = 0,
+ BackgroundPlusForeground = 1,
+ BackgroundMinusForeground = 2,
+ BackgroundPlusQuarterForeground = 3,
+
+ Disabled = 4 // Not a register value
+ };
+
enum : u32
{
VRAM_WIDTH = 1024,
@@ -29,6 +80,13 @@ public:
HBLANK_TIMER_INDEX = 1
};
+ // 4x4 dither matrix.
+ static constexpr s32 DITHER_MATRIX[4][4] = {{-4, +0, -3, +1}, // row 0
+ {+2, -2, +3, -1}, // row 1
+ {-3, +1, -4, +0}, // row 2
+ {+4, -1, +2, -2}}; // row 3
+
+ // Base class constructor.
GPU();
virtual ~GPU();
@@ -112,57 +170,6 @@ protected:
static bool DumpVRAMToFile(const char* filename, u32 width, u32 height, u32 stride, const void* buffer,
bool remove_alpha);
- enum class DMADirection : u32
- {
- Off = 0,
- FIFO = 1,
- CPUtoGP0 = 2,
- GPUREADtoCPU = 3
- };
-
- enum class Primitive : u8
- {
- Reserved = 0,
- Polygon = 1,
- Line = 2,
- Rectangle = 3
- };
-
- enum class DrawRectangleSize : u8
- {
- Variable = 0,
- R1x1 = 1,
- R8x8 = 2,
- R16x16 = 3
- };
-
- enum class TextureMode : u8
- {
- Palette4Bit = 0,
- Palette8Bit = 1,
- Direct16Bit = 2,
- Reserved_Direct16Bit = 3,
-
- // Not register values.
- RawTextureBit = 4,
- RawPalette4Bit = RawTextureBit | Palette4Bit,
- RawPalette8Bit = RawTextureBit | Palette8Bit,
- RawDirect16Bit = RawTextureBit | Direct16Bit,
- Reserved_RawDirect16Bit = RawTextureBit | Reserved_Direct16Bit,
-
- Disabled = 8 // Not a register value
- };
-
- enum class TransparencyMode : u8
- {
- HalfBackgroundPlusHalfForeground = 0,
- BackgroundPlusForeground = 1,
- BackgroundMinusForeground = 2,
- BackgroundPlusQuarterForeground = 3,
-
- Disabled = 4 // Not a register value
- };
-
union RenderCommand
{
u32 bits;
@@ -258,12 +265,6 @@ protected:
}
};
- // 4x4 dither matrix.
- static constexpr s32 DITHER_MATRIX[4][4] = {{-4, +0, -3, +1}, // row 0
- {+2, -2, +3, -1}, // row 1
- {-3, +1, -4, +0}, // row 2
- {+4, -1, +2, -2}}; // row 3
-
void SoftReset();
// Sets dots per scanline
@@ -464,3 +465,5 @@ private:
static const GP0CommandHandlerTable s_GP0_command_handler_table;
};
+
+IMPLEMENT_ENUM_CLASS_BITWISE_OPERATORS(GPU::TextureMode);
diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp
index 71f7bcd19..a0730d3f7 100644
--- a/src/core/gpu_hw.cpp
+++ b/src/core/gpu_hw.cpp
@@ -163,7 +163,7 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command
void GPU_HW::AddDuplicateVertex()
{
- std::memcpy(m_batch_current_vertex_ptr, m_batch_current_vertex_ptr - 1, sizeof(HWVertex));
+ std::memcpy(m_batch_current_vertex_ptr, m_batch_current_vertex_ptr - 1, sizeof(BatchVertex));
m_batch_current_vertex_ptr++;
}
@@ -175,443 +175,14 @@ void GPU_HW::CalcScissorRect(int* left, int* top, int* right, int* bottom)
*bottom = std::max((m_drawing_area.bottom + 1) * m_resolution_scale, *top + 1);
}
-static void DefineMacro(std::stringstream& ss, const char* name, bool enabled)
-{
- if (enabled)
- ss << "#define " << name << " 1\n";
- else
- ss << "/* #define " << name << " 0 */\n";
-}
-
-void GPU_HW::GenerateShaderHeader(std::stringstream& ss)
-{
- ss << "#version 330 core\n\n";
- ss << "const int RESOLUTION_SCALE = " << m_resolution_scale << ";\n";
- ss << "const ivec2 VRAM_SIZE = ivec2(" << VRAM_WIDTH << ", " << VRAM_HEIGHT << ") * RESOLUTION_SCALE;\n";
- ss << "const vec2 RCP_VRAM_SIZE = vec2(1.0, 1.0) / vec2(VRAM_SIZE);\n";
- ss << R"(
-
-float fixYCoord(float y)
-{
- return 1.0 - RCP_VRAM_SIZE.y - y;
-}
-
-int fixYCoord(int y)
-{
- return VRAM_SIZE.y - y - 1;
-}
-
-uint RGBA8ToRGBA5551(vec4 v)
-{
- uint r = uint(v.r * 255.0) >> 3;
- uint g = uint(v.g * 255.0) >> 3;
- uint b = uint(v.b * 255.0) >> 3;
- uint a = (v.a != 0.0) ? 1u : 0u;
- return (r) | (g << 5) | (b << 10) | (a << 15);
-}
-
-vec4 RGBA5551ToRGBA8(uint v)
-{
- uint r = (v & 31u);
- uint g = ((v >> 5) & 31u);
- uint b = ((v >> 10) & 31u);
- uint a = ((v >> 15) & 1u);
-
- // repeat lower bits
- r = (r << 3) | (r & 7u);
- g = (g << 3) | (g & 7u);
- b = (b << 3) | (b & 7u);
-
- return vec4(float(r) / 255.0, float(g) / 255.0, float(b) / 255.0, float(a));
-}
-)";
-}
-
-void GPU_HW::GenerateBatchUniformBuffer(std::stringstream& ss)
-{
- ss << R"(
-uniform UBOBlock {
- ivec2 u_pos_offset;
- uvec2 u_texture_window_mask;
- uvec2 u_texture_window_offset;
- float u_src_alpha_factor;
- float u_dst_alpha_factor;
-};
-)";
-}
-
-std::string GPU_HW::GenerateVertexShader(bool textured)
-{
- std::stringstream ss;
- GenerateShaderHeader(ss);
- DefineMacro(ss, "TEXTURED", textured);
- GenerateBatchUniformBuffer(ss);
-
- ss << R"(
-in ivec2 a_pos;
-in vec4 a_col0;
-in int a_texcoord;
-in int a_texpage;
-
-out vec3 v_col0;
-#if TEXTURED
- out vec2 v_tex0;
- flat out ivec4 v_texpage;
-#endif
-
-void main()
-{
- // 0..+1023 -> -1..1
- float pos_x = (float(a_pos.x + u_pos_offset.x) / 512.0) - 1.0;
- float pos_y = (float(a_pos.y + u_pos_offset.y) / -256.0) + 1.0;
- gl_Position = vec4(pos_x, pos_y, 0.0, 1.0);
-
- v_col0 = a_col0.rgb;
- #if TEXTURED
- v_tex0 = vec2(float(a_texcoord & 0xFFFF), float(a_texcoord >> 16)) / vec2(255.0);
-
- // base_x,base_y,palette_x,palette_y
- v_texpage.x = (a_texpage & 15) * 64 * RESOLUTION_SCALE;
- v_texpage.y = ((a_texpage >> 4) & 1) * 256 * RESOLUTION_SCALE;
- v_texpage.z = ((a_texpage >> 16) & 63) * 16 * RESOLUTION_SCALE;
- v_texpage.w = ((a_texpage >> 22) & 511) * RESOLUTION_SCALE;
- #endif
-}
-)";
-
- return ss.str();
-}
-
-std::string GPU_HW::GenerateFragmentShader(HWBatchRenderMode transparency, TextureMode texture_mode, bool dithering)
-{
- const TextureMode actual_texture_mode =
- static_cast(static_cast(texture_mode) & ~static_cast(TextureMode::RawTextureBit));
- const bool raw_texture = (static_cast(texture_mode) & static_cast(TextureMode::RawTextureBit)) ==
- static_cast(TextureMode::RawTextureBit);
-
- std::stringstream ss;
- GenerateShaderHeader(ss);
- GenerateBatchUniformBuffer(ss);
- DefineMacro(ss, "TRANSPARENCY", transparency != HWBatchRenderMode::TransparencyDisabled);
- DefineMacro(ss, "TRANSPARENCY_ONLY_OPAQUE", transparency == HWBatchRenderMode::OnlyOpaque);
- DefineMacro(ss, "TRANSPARENCY_ONLY_TRANSPARENCY", transparency == HWBatchRenderMode::OnlyTransparent);
- DefineMacro(ss, "TEXTURED", actual_texture_mode != TextureMode::Disabled);
- DefineMacro(ss, "PALETTE",
- actual_texture_mode == GPU::TextureMode::Palette4Bit ||
- actual_texture_mode == GPU::TextureMode::Palette8Bit);
- DefineMacro(ss, "PALETTE_4_BIT", actual_texture_mode == GPU::TextureMode::Palette4Bit);
- DefineMacro(ss, "PALETTE_8_BIT", actual_texture_mode == GPU::TextureMode::Palette8Bit);
- DefineMacro(ss, "RAW_TEXTURE", raw_texture);
- DefineMacro(ss, "DITHERING", dithering);
- DefineMacro(ss, "TRUE_COLOR", m_true_color);
-
- ss << "const int[16] s_dither_values = int[16]( ";
- for (u32 i = 0; i < 16; i++)
- {
- if (i > 0)
- ss << ", ";
- ss << DITHER_MATRIX[i / 4][i % 4];
- }
- ss << " );\n";
-
- ss << R"(
-in vec3 v_col0;
-#if TEXTURED
- in vec2 v_tex0;
- flat in ivec4 v_texpage;
- uniform sampler2D samp0;
-#endif
-
-out vec4 o_col0;
-
-ivec3 ApplyDithering(ivec3 icol)
-{
- ivec2 fc = (ivec2(gl_FragCoord.xy) / ivec2(RESOLUTION_SCALE, RESOLUTION_SCALE)) & ivec2(3, 3);
- int offset = s_dither_values[fc.y * 4 + fc.x];
- return icol + ivec3(offset, offset, offset);
-}
-
-ivec3 TruncateTo15Bit(ivec3 icol)
-{
- icol = clamp(icol, ivec3(0, 0, 0), ivec3(255, 255, 255));
- return (icol & ivec3(~7, ~7, ~7)) | ((icol >> 3) & ivec3(7, 7, 7));
-}
-
-#if TEXTURED
-ivec2 ApplyNativeTextureWindow(ivec2 coords)
-{
- uint x = (uint(coords.x) & ~(u_texture_window_mask.x * 8u)) | ((u_texture_window_offset.x & u_texture_window_mask.x) * 8u);
- uint y = (uint(coords.y) & ~(u_texture_window_mask.y * 8u)) | ((u_texture_window_offset.y & u_texture_window_mask.y) * 8u);
- return ivec2(int(x), int(y));
-}
-
-ivec2 ApplyTextureWindow(ivec2 coords)
-{
- if (RESOLUTION_SCALE == 1)
- return ApplyNativeTextureWindow(coords);
-
- ivec2 downscaled_coords = coords / ivec2(RESOLUTION_SCALE);
- ivec2 coords_offset = coords % ivec2(RESOLUTION_SCALE);
- return (ApplyNativeTextureWindow(downscaled_coords) * ivec2(RESOLUTION_SCALE)) + coords_offset;
-}
-
-ivec4 SampleFromVRAM(vec2 coord)
-{
- // from 0..1 to 0..255
- ivec2 icoord = ivec2(coord * vec2(255 * RESOLUTION_SCALE));
- icoord = ApplyTextureWindow(icoord);
-
- // adjust for tightly packed palette formats
- ivec2 index_coord = icoord;
- #if PALETTE_4_BIT
- index_coord.x /= 4;
- #elif PALETTE_8_BIT
- index_coord.x /= 2;
- #endif
-
- // fixup coords
- ivec2 vicoord = ivec2(v_texpage.x + index_coord.x, fixYCoord(v_texpage.y + index_coord.y));
-
- // load colour/palette
- vec4 color = texelFetch(samp0, vicoord, 0);
-
- // apply palette
- #if PALETTE
- #if PALETTE_4_BIT
- int subpixel = int(icoord.x / RESOLUTION_SCALE) & 3;
- uint vram_value = RGBA8ToRGBA5551(color);
- int palette_index = int((vram_value >> (subpixel * 4)) & 0x0Fu);
- #elif PALETTE_8_BIT
- int subpixel = int(icoord.x / RESOLUTION_SCALE) & 1;
- uint vram_value = RGBA8ToRGBA5551(color);
- int palette_index = int((vram_value >> (subpixel * 8)) & 0xFFu);
- #endif
- ivec2 palette_icoord = ivec2(v_texpage.z + (palette_index * RESOLUTION_SCALE), fixYCoord(v_texpage.w));
- color = texelFetch(samp0, palette_icoord, 0);
- #endif
-
- return ivec4(color * vec4(255.0, 255.0, 255.0, 255.0));
-}
-#endif
-
-void main()
-{
- ivec3 vertcol = ivec3(v_col0 * vec3(255.0, 255.0, 255.0));
-
- bool semitransparent;
- bool new_mask_bit;
- ivec3 icolor;
-
- #if TEXTURED
- ivec4 texcol = SampleFromVRAM(v_tex0);
- if (texcol == ivec4(0.0, 0.0, 0.0, 0.0))
- discard;
-
- // Grab semitransparent bit from the texture color.
- semitransparent = (texcol.a != 0);
-
- #if RAW_TEXTURE
- icolor = texcol.rgb;
- #else
- icolor = (vertcol * texcol.rgb) >> 7;
- #endif
- #else
- // All pixels are semitransparent for untextured polygons.
- semitransparent = true;
- icolor = vertcol;
- #endif
-
- // Apply dithering
- #if DITHERING
- icolor = ApplyDithering(icolor);
- #endif
-
- // Clip to 15-bit range
- #if !TRUE_COLOR
- icolor = TruncateTo15Bit(icolor);
- #endif
-
- // Normalize
- vec3 color = vec3(icolor) / vec3(255.0, 255.0, 255.0);
-
- #if TRANSPARENCY
- // Apply semitransparency. If not a semitransparent texel, destination alpha is ignored.
- if (semitransparent)
- {
- #if TRANSPARENCY_ONLY_OPAQUE
- discard;
- #endif
- o_col0 = vec4(color * u_src_alpha_factor, u_dst_alpha_factor);
- }
- else
- {
- #if TRANSPARENCY_ONLY_TRANSPARENCY
- discard;
- #endif
- o_col0 = vec4(color, 0.0);
- }
- #else
- o_col0 = vec4(color, 0.0);
- #endif
-}
-)";
-
- return ss.str();
-}
-
-std::string GPU_HW::GenerateScreenQuadVertexShader()
-{
- std::stringstream ss;
- GenerateShaderHeader(ss);
- ss << R"(
-
-out vec2 v_tex0;
-
-void main()
-{
- v_tex0 = vec2(float((gl_VertexID << 1) & 2), float(gl_VertexID & 2));
- gl_Position = vec4(v_tex0 * vec2(2.0f, -2.0f) + vec2(-1.0f, 1.0f), 0.0f, 1.0f);
- gl_Position.y = -gl_Position.y;
-}
-)";
-
- return ss.str();
-}
-
-std::string GPU_HW::GenerateFillFragmentShader()
-{
- std::stringstream ss;
- GenerateShaderHeader(ss);
-
- ss << R"(
-uniform vec4 fill_color;
-out vec4 o_col0;
-
-void main()
-{
- o_col0 = fill_color;
-}
-)";
-
- return ss.str();
-}
-
-std::string GPU_HW::GenerateDisplayFragmentShader(bool depth_24bit, bool interlaced)
-{
- std::stringstream ss;
- GenerateShaderHeader(ss);
- DefineMacro(ss, "DEPTH_24BIT", depth_24bit);
- DefineMacro(ss, "INTERLACED", interlaced);
-
- ss << R"(
-in vec2 v_tex0;
-out vec4 o_col0;
-
-uniform sampler2D samp0;
-uniform ivec3 u_base_coords;
-
-ivec2 GetCoords(vec2 fragcoord)
-{
- ivec2 icoords = ivec2(fragcoord);
- #if INTERLACED
- if ((((icoords.y - u_base_coords.z) / RESOLUTION_SCALE) & 1) != 0)
- discard;
- #endif
- return icoords;
-}
-
-void main()
-{
- ivec2 icoords = GetCoords(gl_FragCoord.xy);
-
- #if DEPTH_24BIT
- // compute offset in dwords from the start of the 24-bit values
- ivec2 base = ivec2(u_base_coords.x, u_base_coords.y + icoords.y);
- int xoff = int(icoords.x);
- int dword_index = (xoff / 2) + (xoff / 4);
-
- // sample two adjacent dwords, or four 16-bit values as the 24-bit value will lie somewhere between these
- uint s0 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + dword_index * 2 + 0, base.y), 0));
- uint s1 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + dword_index * 2 + 1, base.y), 0));
- uint s2 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + (dword_index + 1) * 2 + 0, base.y), 0));
- uint s3 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + (dword_index + 1) * 2 + 1, base.y), 0));
-
- // select the bit for this pixel depending on its offset in the 4-pixel block
- uint r, g, b;
- int block_offset = xoff & 3;
- if (block_offset == 0)
- {
- r = s0 & 0xFFu;
- g = s0 >> 8;
- b = s1 & 0xFFu;
- }
- else if (block_offset == 1)
- {
- r = s1 >> 8;
- g = s2 & 0xFFu;
- b = s2 >> 8;
- }
- else if (block_offset == 2)
- {
- r = s1 & 0xFFu;
- g = s1 >> 8;
- b = s2 & 0xFFu;
- }
- else
- {
- r = s2 >> 8;
- g = s3 & 0xFFu;
- b = s3 >> 8;
- }
-
- // and normalize
- o_col0 = vec4(float(r) / 255.0, float(g) / 255.0, float(b) / 255.0, 1.0);
- #else
- // load and return
- o_col0 = texelFetch(samp0, u_base_coords.xy + icoords, 0);
- #endif
-}
-)";
-
- return ss.str();
-}
-
-std::string GPU_HW::GenerateVRAMWriteFragmentShader()
-{
- std::stringstream ss;
- GenerateShaderHeader(ss);
-
- ss << R"(
-
-uniform ivec2 u_base_coords;
-uniform ivec2 u_size;
-uniform usamplerBuffer samp0;
-
-out vec4 o_col0;
-
-void main()
-{
- ivec2 coords = ivec2(gl_FragCoord.xy) / ivec2(RESOLUTION_SCALE, RESOLUTION_SCALE);
- ivec2 offset = coords - u_base_coords;
- offset.y = u_size.y - offset.y - 1;
-
- int buffer_offset = offset.y * u_size.x + offset.x;
- uint value = texelFetch(samp0, buffer_offset).r;
-
- o_col0 = RGBA5551ToRGBA8(value);
-})";
-
- return ss.str();
-}
-
-GPU_HW::HWPrimitive GPU_HW::GetPrimitiveForCommand(RenderCommand rc)
+GPU_HW::BatchPrimitive GPU_HW::GetPrimitiveForCommand(RenderCommand rc)
{
if (rc.primitive == Primitive::Line)
- return rc.polyline ? HWPrimitive::LineStrip : HWPrimitive::Lines;
+ return rc.polyline ? BatchPrimitive::LineStrip : BatchPrimitive::Lines;
else if ((rc.primitive == Primitive::Polygon && rc.quad_polygon) || rc.primitive == Primitive::Rectangle)
- return HWPrimitive::TriangleStrip;
+ return BatchPrimitive::TriangleStrip;
else
- return HWPrimitive::Triangles;
+ return BatchPrimitive::Triangles;
}
void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32* command_ptr)
@@ -687,13 +258,13 @@ void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32
// has any state changed which requires a new batch?
const TransparencyMode transparency_mode =
rc.transparency_enable ? m_render_state.transparency_mode : TransparencyMode::Disabled;
- const HWPrimitive rc_primitive = GetPrimitiveForCommand(rc);
+ const BatchPrimitive rc_primitive = GetPrimitiveForCommand(rc);
const bool dithering_enable = (!m_true_color && rc.IsDitheringEnabled()) ? m_GPUSTAT.dither_enable : false;
const u32 max_added_vertices = num_vertices + 2;
if (!IsFlushed())
{
const bool buffer_overflow = GetBatchVertexSpace() < max_added_vertices;
- if (buffer_overflow || rc_primitive == HWPrimitive::LineStrip || m_batch.texture_mode != texture_mode ||
+ if (buffer_overflow || rc_primitive == BatchPrimitive::LineStrip || m_batch.texture_mode != texture_mode ||
m_batch.transparency_mode != transparency_mode || m_batch.primitive != rc_primitive ||
dithering_enable != m_batch.dithering || m_render_state.IsTextureWindowChanged())
{
diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h
index 5af1a5242..b64e46b31 100644
--- a/src/core/gpu_hw.h
+++ b/src/core/gpu_hw.h
@@ -8,6 +8,22 @@
class GPU_HW : public GPU
{
public:
+ enum class BatchPrimitive : u8
+ {
+ Lines = 0,
+ LineStrip = 1,
+ Triangles = 2,
+ TriangleStrip = 3
+ };
+
+ enum class BatchRenderMode : u8
+ {
+ TransparencyDisabled,
+ TransparentAndOpaque,
+ OnlyOpaque,
+ OnlyTransparent
+ };
+
GPU_HW();
virtual ~GPU_HW();
@@ -16,23 +32,7 @@ public:
virtual void UpdateSettings() override;
protected:
- enum class HWPrimitive : u8
- {
- Lines = 0,
- LineStrip = 1,
- Triangles = 2,
- TriangleStrip = 3
- };
-
- enum class HWBatchRenderMode : u8
- {
- TransparencyDisabled,
- TransparentAndOpaque,
- OnlyOpaque,
- OnlyTransparent
- };
-
- struct HWVertex
+ struct BatchVertex
{
s32 x;
s32 y;
@@ -55,9 +55,9 @@ protected:
}
};
- struct HWBatchConfig
+ struct BatchConfig
{
- HWPrimitive primitive;
+ BatchPrimitive primitive;
TextureMode texture_mode;
TransparencyMode transparency_mode;
bool dithering;
@@ -71,14 +71,14 @@ protected:
}
// Returns the render mode for this batch.
- HWBatchRenderMode GetRenderMode() const
+ BatchRenderMode GetRenderMode() const
{
- return transparency_mode == TransparencyMode::Disabled ? HWBatchRenderMode::TransparencyDisabled :
- HWBatchRenderMode::TransparentAndOpaque;
+ return transparency_mode == TransparencyMode::Disabled ? BatchRenderMode::TransparencyDisabled :
+ BatchRenderMode::TransparentAndOpaque;
}
};
- struct HWBatchUBOData
+ struct BatchUBOData
{
s32 u_pos_offset[2];
u32 u_texture_window_mask[2];
@@ -90,7 +90,7 @@ protected:
static constexpr u32 VRAM_UPDATE_TEXTURE_BUFFER_SIZE = VRAM_WIDTH * VRAM_HEIGHT * sizeof(u32);
static constexpr u32 VERTEX_BUFFER_SIZE = 1 * 1024 * 1024;
static constexpr u32 MIN_BATCH_VERTEX_COUNT = 6;
- static constexpr u32 MAX_BATCH_VERTEX_COUNT = VERTEX_BUFFER_SIZE / sizeof(HWVertex);
+ static constexpr u32 MAX_BATCH_VERTEX_COUNT = VERTEX_BUFFER_SIZE / sizeof(BatchVertex);
static constexpr u32 UNIFORM_BUFFER_SIZE = 512 * 1024;
static constexpr std::tuple RGBA8ToFloat(u32 rgba)
@@ -121,31 +121,21 @@ protected:
return std::make_tuple(x * s32(m_resolution_scale), y * s32(m_resolution_scale));
}
- std::string GenerateVertexShader(bool textured);
- std::string GenerateFragmentShader(HWBatchRenderMode transparency, TextureMode texture_mode, bool dithering);
- std::string GenerateScreenQuadVertexShader();
- std::string GenerateFillFragmentShader();
- std::string GenerateDisplayFragmentShader(bool depth_24bit, bool interlaced);
- std::string GenerateVRAMWriteFragmentShader();
-
- HWVertex* m_batch_start_vertex_ptr = nullptr;
- HWVertex* m_batch_end_vertex_ptr = nullptr;
- HWVertex* m_batch_current_vertex_ptr = nullptr;
+ BatchVertex* m_batch_start_vertex_ptr = nullptr;
+ BatchVertex* m_batch_end_vertex_ptr = nullptr;
+ BatchVertex* m_batch_current_vertex_ptr = nullptr;
u32 m_batch_base_vertex = 0;
u32 m_resolution_scale = 1;
u32 m_max_resolution_scale = 1;
bool m_true_color = false;
- HWBatchConfig m_batch = {};
- HWBatchUBOData m_batch_ubo_data = {};
+ BatchConfig m_batch = {};
+ BatchUBOData m_batch_ubo_data = {};
bool m_batch_ubo_dirty = true;
private:
- static HWPrimitive GetPrimitiveForCommand(RenderCommand rc);
-
- void GenerateShaderHeader(std::stringstream& ss);
- void GenerateBatchUniformBuffer(std::stringstream& ss);
+ static BatchPrimitive GetPrimitiveForCommand(RenderCommand rc);
void LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command_ptr);
void AddDuplicateVertex();
diff --git a/src/core/gpu_hw_opengl.cpp b/src/core/gpu_hw_opengl.cpp
index 5548616d1..5b5161b30 100644
--- a/src/core/gpu_hw_opengl.cpp
+++ b/src/core/gpu_hw_opengl.cpp
@@ -2,6 +2,7 @@
#include "YBaseLib/Assert.h"
#include "YBaseLib/Log.h"
#include "YBaseLib/String.h"
+#include "gpu_hw_shadergen.h"
#include "host_interface.h"
#include "imgui.h"
#include "system.h"
@@ -138,9 +139,9 @@ void GPU_HW_OpenGL::MapBatchVertexPointer(u32 required_vertices)
Assert(!m_batch_start_vertex_ptr);
const GL::StreamBuffer::MappingResult res =
- m_vertex_stream_buffer->Map(sizeof(HWVertex), required_vertices * sizeof(HWVertex));
+ m_vertex_stream_buffer->Map(sizeof(BatchVertex), required_vertices * sizeof(BatchVertex));
- m_batch_start_vertex_ptr = static_cast(res.pointer);
+ m_batch_start_vertex_ptr = static_cast(res.pointer);
m_batch_current_vertex_ptr = m_batch_start_vertex_ptr;
m_batch_end_vertex_ptr = m_batch_start_vertex_ptr + res.space_aligned;
m_batch_base_vertex = res.index_aligned;
@@ -246,11 +247,11 @@ void GPU_HW_OpenGL::CreateVertexBuffer()
glEnableVertexAttribArray(1);
glEnableVertexAttribArray(2);
glEnableVertexAttribArray(3);
- glVertexAttribIPointer(0, 2, GL_INT, sizeof(HWVertex), reinterpret_cast(offsetof(HWVertex, x)));
- glVertexAttribPointer(1, 4, GL_UNSIGNED_BYTE, true, sizeof(HWVertex),
- reinterpret_cast(offsetof(HWVertex, color)));
- glVertexAttribIPointer(2, 2, GL_INT, sizeof(HWVertex), reinterpret_cast(offsetof(HWVertex, texcoord)));
- glVertexAttribIPointer(3, 1, GL_INT, sizeof(HWVertex), reinterpret_cast(offsetof(HWVertex, texpage)));
+ glVertexAttribIPointer(0, 2, GL_INT, sizeof(BatchVertex), reinterpret_cast(offsetof(BatchVertex, x)));
+ glVertexAttribPointer(1, 4, GL_UNSIGNED_BYTE, true, sizeof(BatchVertex),
+ reinterpret_cast(offsetof(BatchVertex, color)));
+ glVertexAttribIPointer(2, 2, GL_INT, sizeof(BatchVertex), reinterpret_cast(offsetof(BatchVertex, texcoord)));
+ glVertexAttribIPointer(3, 1, GL_INT, sizeof(BatchVertex), reinterpret_cast(offsetof(BatchVertex, texpage)));
glBindVertexArray(0);
glGenVertexArrays(1, &m_attributeless_vao_id);
@@ -280,31 +281,56 @@ void GPU_HW_OpenGL::CreateTextureBuffer()
bool GPU_HW_OpenGL::CompilePrograms()
{
+ GPU_HW_ShaderGen shadergen(GPU_HW_ShaderGen::Backend::OpenGL, m_resolution_scale, m_true_color);
+
for (u32 render_mode = 0; render_mode < 4; render_mode++)
{
for (u32 texture_mode = 0; texture_mode < 9; texture_mode++)
{
for (u8 dithering = 0; dithering < 2; dithering++)
{
- if (!CompileProgram(m_render_programs[render_mode][texture_mode][dithering],
- static_cast(render_mode), static_cast(texture_mode),
- ConvertToBoolUnchecked(dithering)))
- {
+ const bool textured = (static_cast(texture_mode) != TextureMode::Disabled);
+ const std::string vs = shadergen.GenerateBatchVertexShader(textured);
+ const std::string fs = shadergen.GenerateBatchFragmentShader(static_cast(render_mode),
+ static_cast(texture_mode),
+ ConvertToBoolUnchecked(dithering));
+
+ GL::Program& prog = m_render_programs[render_mode][texture_mode][dithering];
+ if (!prog.Compile(vs, fs))
return false;
+
+ prog.BindAttribute(0, "a_pos");
+ prog.BindAttribute(1, "a_col0");
+ if (textured)
+ {
+ prog.BindAttribute(2, "a_texcoord");
+ prog.BindAttribute(3, "a_texpage");
+ }
+
+ prog.BindFragData(0, "o_col0");
+
+ if (!prog.Link())
+ return false;
+
+ prog.BindUniformBlock("UBOBlock", 1);
+ if (textured)
+ {
+ prog.Bind();
+ prog.RegisterUniform("samp0");
+ prog.Uniform1i(0, 0);
}
}
}
}
- // TODO: Use string_view
for (u8 depth_24bit = 0; depth_24bit < 2; depth_24bit++)
{
for (u8 interlaced = 0; interlaced < 2; interlaced++)
{
GL::Program& prog = m_display_programs[depth_24bit][interlaced];
- const std::string vs = GenerateScreenQuadVertexShader();
- const std::string fs =
- GenerateDisplayFragmentShader(ConvertToBoolUnchecked(depth_24bit), ConvertToBoolUnchecked(interlaced));
+ const std::string vs = shadergen.GenerateScreenQuadVertexShader();
+ const std::string fs = shadergen.GenerateDisplayFragmentShader(ConvertToBoolUnchecked(depth_24bit),
+ ConvertToBoolUnchecked(interlaced));
if (!prog.Compile(vs, fs))
return false;
@@ -319,8 +345,11 @@ bool GPU_HW_OpenGL::CompilePrograms()
}
}
- if (!m_vram_write_program.Compile(GenerateScreenQuadVertexShader(), GenerateVRAMWriteFragmentShader()))
+ if (!m_vram_write_program.Compile(shadergen.GenerateScreenQuadVertexShader(),
+ shadergen.GenerateVRAMWriteFragmentShader()))
+ {
return false;
+ }
m_vram_write_program.BindFragData(0, "o_col0");
if (!m_vram_write_program.Link())
@@ -335,41 +364,7 @@ bool GPU_HW_OpenGL::CompilePrograms()
return true;
}
-bool GPU_HW_OpenGL::CompileProgram(GL::Program& prog, HWBatchRenderMode render_mode, TextureMode texture_mode,
- bool dithering)
-{
- const bool textured = texture_mode != TextureMode::Disabled;
- const std::string vs = GenerateVertexShader(textured);
- const std::string fs = GenerateFragmentShader(render_mode, texture_mode, dithering);
- if (!prog.Compile(vs, fs))
- return false;
-
- prog.BindAttribute(0, "a_pos");
- prog.BindAttribute(1, "a_col0");
- if (textured)
- {
- prog.BindAttribute(2, "a_texcoord");
- prog.BindAttribute(3, "a_texpage");
- }
-
- prog.BindFragData(0, "o_col0");
-
- if (!prog.Link())
- return false;
-
- prog.BindUniformBlock("UBOBlock", 1);
-
- if (textured)
- {
- prog.Bind();
- prog.RegisterUniform("samp0");
- prog.Uniform1i(0, 0);
- }
-
- return true;
-}
-
-void GPU_HW_OpenGL::SetDrawState(HWBatchRenderMode render_mode)
+void GPU_HW_OpenGL::SetDrawState(BatchRenderMode render_mode)
{
const GL::Program& prog = m_render_programs[static_cast(render_mode)][static_cast(m_batch.texture_mode)]
[BoolToUInt8(m_batch.dithering)];
@@ -378,7 +373,7 @@ void GPU_HW_OpenGL::SetDrawState(HWBatchRenderMode render_mode)
if (m_batch.texture_mode != TextureMode::Disabled)
m_vram_read_texture->Bind();
- if (m_batch.transparency_mode == TransparencyMode::Disabled || render_mode == HWBatchRenderMode::OnlyOpaque)
+ if (m_batch.transparency_mode == TransparencyMode::Disabled || render_mode == BatchRenderMode::OnlyOpaque)
{
glDisable(GL_BLEND);
}
@@ -732,7 +727,7 @@ void GPU_HW_OpenGL::FlushRender()
m_stats.num_batches++;
m_stats.num_vertices += vertex_count;
- m_vertex_stream_buffer->Unmap(vertex_count * sizeof(HWVertex));
+ m_vertex_stream_buffer->Unmap(vertex_count * sizeof(BatchVertex));
m_vertex_stream_buffer->Bind();
m_batch_start_vertex_ptr = nullptr;
m_batch_end_vertex_ptr = nullptr;
@@ -742,9 +737,9 @@ void GPU_HW_OpenGL::FlushRender()
if (m_batch.NeedsTwoPassRendering())
{
- SetDrawState(HWBatchRenderMode::OnlyTransparent);
+ SetDrawState(BatchRenderMode::OnlyTransparent);
glDrawArrays(gl_primitives[static_cast(m_batch.primitive)], 0, vertex_count);
- SetDrawState(HWBatchRenderMode::OnlyOpaque);
+ SetDrawState(BatchRenderMode::OnlyOpaque);
glDrawArrays(gl_primitives[static_cast(m_batch.primitive)], 0, vertex_count);
}
else
diff --git a/src/core/gpu_hw_opengl.h b/src/core/gpu_hw_opengl.h
index b9cac7ac8..1522e15b5 100644
--- a/src/core/gpu_hw_opengl.h
+++ b/src/core/gpu_hw_opengl.h
@@ -58,8 +58,7 @@ private:
void CreateTextureBuffer();
bool CompilePrograms();
- bool CompileProgram(GL::Program& prog, HWBatchRenderMode render_mode, TextureMode texture_mode, bool dithering);
- void SetDrawState(HWBatchRenderMode render_mode);
+ void SetDrawState(BatchRenderMode render_mode);
void UploadUniformBlock(const void* data, u32 data_size);
// downsample texture - used for readbacks at >1xIR.
diff --git a/src/core/gpu_hw_shadergen.cpp b/src/core/gpu_hw_shadergen.cpp
new file mode 100644
index 000000000..ab5f15774
--- /dev/null
+++ b/src/core/gpu_hw_shadergen.cpp
@@ -0,0 +1,436 @@
+#include "gpu_hw_shadergen.h"
+
+GPU_HW_ShaderGen::GPU_HW_ShaderGen(Backend backend, u32 resolution_scale, bool true_color)
+ : m_backend(backend), m_resolution_scale(resolution_scale), m_true_color(true_color)
+{
+}
+
+GPU_HW_ShaderGen::~GPU_HW_ShaderGen() = default;
+
+static void DefineMacro(std::stringstream& ss, const char* name, bool enabled)
+{
+ if (enabled)
+ ss << "#define " << name << " 1\n";
+ else
+ ss << "/* #define " << name << " 0 */\n";
+}
+
+void GPU_HW_ShaderGen::GenerateShaderHeader(std::stringstream& ss)
+{
+ ss << "#version 330 core\n\n";
+ ss << "const int RESOLUTION_SCALE = " << m_resolution_scale << ";\n";
+ ss << "const ivec2 VRAM_SIZE = ivec2(" << GPU::VRAM_WIDTH << ", " << GPU::VRAM_HEIGHT << ") * RESOLUTION_SCALE;\n";
+ ss << "const vec2 RCP_VRAM_SIZE = vec2(1.0, 1.0) / vec2(VRAM_SIZE);\n";
+ ss << R"(
+
+float fixYCoord(float y)
+{
+ return 1.0 - RCP_VRAM_SIZE.y - y;
+}
+
+int fixYCoord(int y)
+{
+ return VRAM_SIZE.y - y - 1;
+}
+
+uint RGBA8ToRGBA5551(vec4 v)
+{
+ uint r = uint(v.r * 255.0) >> 3;
+ uint g = uint(v.g * 255.0) >> 3;
+ uint b = uint(v.b * 255.0) >> 3;
+ uint a = (v.a != 0.0) ? 1u : 0u;
+ return (r) | (g << 5) | (b << 10) | (a << 15);
+}
+
+vec4 RGBA5551ToRGBA8(uint v)
+{
+ uint r = (v & 31u);
+ uint g = ((v >> 5) & 31u);
+ uint b = ((v >> 10) & 31u);
+ uint a = ((v >> 15) & 1u);
+
+ // repeat lower bits
+ r = (r << 3) | (r & 7u);
+ g = (g << 3) | (g & 7u);
+ b = (b << 3) | (b & 7u);
+
+ return vec4(float(r) / 255.0, float(g) / 255.0, float(b) / 255.0, float(a));
+}
+)";
+}
+
+void GPU_HW_ShaderGen::GenerateBatchUniformBuffer(std::stringstream& ss)
+{
+ ss << R"(
+uniform UBOBlock {
+ ivec2 u_pos_offset;
+ uvec2 u_texture_window_mask;
+ uvec2 u_texture_window_offset;
+ float u_src_alpha_factor;
+ float u_dst_alpha_factor;
+};
+)";
+}
+
+std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured)
+{
+ std::stringstream ss;
+ GenerateShaderHeader(ss);
+ DefineMacro(ss, "TEXTURED", textured);
+ GenerateBatchUniformBuffer(ss);
+
+ ss << R"(
+in ivec2 a_pos;
+in vec4 a_col0;
+in int a_texcoord;
+in int a_texpage;
+
+out vec3 v_col0;
+#if TEXTURED
+ out vec2 v_tex0;
+ flat out ivec4 v_texpage;
+#endif
+
+void main()
+{
+ // 0..+1023 -> -1..1
+ float pos_x = (float(a_pos.x + u_pos_offset.x) / 512.0) - 1.0;
+ float pos_y = (float(a_pos.y + u_pos_offset.y) / -256.0) + 1.0;
+ gl_Position = vec4(pos_x, pos_y, 0.0, 1.0);
+
+ v_col0 = a_col0.rgb;
+ #if TEXTURED
+ v_tex0 = vec2(float(a_texcoord & 0xFFFF), float(a_texcoord >> 16)) / vec2(255.0);
+
+ // base_x,base_y,palette_x,palette_y
+ v_texpage.x = (a_texpage & 15) * 64 * RESOLUTION_SCALE;
+ v_texpage.y = ((a_texpage >> 4) & 1) * 256 * RESOLUTION_SCALE;
+ v_texpage.z = ((a_texpage >> 16) & 63) * 16 * RESOLUTION_SCALE;
+ v_texpage.w = ((a_texpage >> 22) & 511) * RESOLUTION_SCALE;
+ #endif
+}
+)";
+
+ return ss.str();
+}
+
+std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMode transparency,
+ GPU::TextureMode texture_mode, bool dithering)
+{
+ const GPU::TextureMode actual_texture_mode = texture_mode & ~GPU::TextureMode::RawTextureBit;
+ const bool raw_texture = (texture_mode & GPU::TextureMode::RawTextureBit) == GPU::TextureMode::RawTextureBit;
+
+ std::stringstream ss;
+ GenerateShaderHeader(ss);
+ GenerateBatchUniformBuffer(ss);
+ DefineMacro(ss, "TRANSPARENCY", transparency != GPU_HW::BatchRenderMode::TransparencyDisabled);
+ DefineMacro(ss, "TRANSPARENCY_ONLY_OPAQUE", transparency == GPU_HW::BatchRenderMode::OnlyOpaque);
+ DefineMacro(ss, "TRANSPARENCY_ONLY_TRANSPARENCY", transparency == GPU_HW::BatchRenderMode::OnlyTransparent);
+ DefineMacro(ss, "TEXTURED", actual_texture_mode != GPU::TextureMode::Disabled);
+ DefineMacro(ss, "PALETTE",
+ actual_texture_mode == GPU::TextureMode::Palette4Bit ||
+ actual_texture_mode == GPU::TextureMode::Palette8Bit);
+ DefineMacro(ss, "PALETTE_4_BIT", actual_texture_mode == GPU::TextureMode::Palette4Bit);
+ DefineMacro(ss, "PALETTE_8_BIT", actual_texture_mode == GPU::TextureMode::Palette8Bit);
+ DefineMacro(ss, "RAW_TEXTURE", raw_texture);
+ DefineMacro(ss, "DITHERING", dithering);
+ DefineMacro(ss, "TRUE_COLOR", m_true_color);
+
+ ss << "const int[16] s_dither_values = int[16]( ";
+ for (u32 i = 0; i < 16; i++)
+ {
+ if (i > 0)
+ ss << ", ";
+ ss << GPU::DITHER_MATRIX[i / 4][i % 4];
+ }
+ ss << " );\n";
+
+ ss << R"(
+in vec3 v_col0;
+#if TEXTURED
+ in vec2 v_tex0;
+ flat in ivec4 v_texpage;
+ uniform sampler2D samp0;
+#endif
+
+out vec4 o_col0;
+
+ivec3 ApplyDithering(ivec3 icol)
+{
+ ivec2 fc = (ivec2(gl_FragCoord.xy) / ivec2(RESOLUTION_SCALE, RESOLUTION_SCALE)) & ivec2(3, 3);
+ int offset = s_dither_values[fc.y * 4 + fc.x];
+ return icol + ivec3(offset, offset, offset);
+}
+
+ivec3 TruncateTo15Bit(ivec3 icol)
+{
+ icol = clamp(icol, ivec3(0, 0, 0), ivec3(255, 255, 255));
+ return (icol & ivec3(~7, ~7, ~7)) | ((icol >> 3) & ivec3(7, 7, 7));
+}
+
+#if TEXTURED
+ivec2 ApplyNativeTextureWindow(ivec2 coords)
+{
+ uint x = (uint(coords.x) & ~(u_texture_window_mask.x * 8u)) | ((u_texture_window_offset.x & u_texture_window_mask.x) * 8u);
+ uint y = (uint(coords.y) & ~(u_texture_window_mask.y * 8u)) | ((u_texture_window_offset.y & u_texture_window_mask.y) * 8u);
+ return ivec2(int(x), int(y));
+}
+
+ivec2 ApplyTextureWindow(ivec2 coords)
+{
+ if (RESOLUTION_SCALE == 1)
+ return ApplyNativeTextureWindow(coords);
+
+ ivec2 downscaled_coords = coords / ivec2(RESOLUTION_SCALE);
+ ivec2 coords_offset = coords % ivec2(RESOLUTION_SCALE);
+ return (ApplyNativeTextureWindow(downscaled_coords) * ivec2(RESOLUTION_SCALE)) + coords_offset;
+}
+
+ivec4 SampleFromVRAM(vec2 coord)
+{
+ // from 0..1 to 0..255
+ ivec2 icoord = ivec2(coord * vec2(255 * RESOLUTION_SCALE));
+ icoord = ApplyTextureWindow(icoord);
+
+ // adjust for tightly packed palette formats
+ ivec2 index_coord = icoord;
+ #if PALETTE_4_BIT
+ index_coord.x /= 4;
+ #elif PALETTE_8_BIT
+ index_coord.x /= 2;
+ #endif
+
+ // fixup coords
+ ivec2 vicoord = ivec2(v_texpage.x + index_coord.x, fixYCoord(v_texpage.y + index_coord.y));
+
+ // load colour/palette
+ vec4 color = texelFetch(samp0, vicoord, 0);
+
+ // apply palette
+ #if PALETTE
+ #if PALETTE_4_BIT
+ int subpixel = int(icoord.x / RESOLUTION_SCALE) & 3;
+ uint vram_value = RGBA8ToRGBA5551(color);
+ int palette_index = int((vram_value >> (subpixel * 4)) & 0x0Fu);
+ #elif PALETTE_8_BIT
+ int subpixel = int(icoord.x / RESOLUTION_SCALE) & 1;
+ uint vram_value = RGBA8ToRGBA5551(color);
+ int palette_index = int((vram_value >> (subpixel * 8)) & 0xFFu);
+ #endif
+ ivec2 palette_icoord = ivec2(v_texpage.z + (palette_index * RESOLUTION_SCALE), fixYCoord(v_texpage.w));
+ color = texelFetch(samp0, palette_icoord, 0);
+ #endif
+
+ return ivec4(color * vec4(255.0, 255.0, 255.0, 255.0));
+}
+#endif
+
+void main()
+{
+ ivec3 vertcol = ivec3(v_col0 * vec3(255.0, 255.0, 255.0));
+
+ bool semitransparent;
+ bool new_mask_bit;
+ ivec3 icolor;
+
+ #if TEXTURED
+ ivec4 texcol = SampleFromVRAM(v_tex0);
+ if (texcol == ivec4(0.0, 0.0, 0.0, 0.0))
+ discard;
+
+ // Grab semitransparent bit from the texture color.
+ semitransparent = (texcol.a != 0);
+
+ #if RAW_TEXTURE
+ icolor = texcol.rgb;
+ #else
+ icolor = (vertcol * texcol.rgb) >> 7;
+ #endif
+ #else
+ // All pixels are semitransparent for untextured polygons.
+ semitransparent = true;
+ icolor = vertcol;
+ #endif
+
+ // Apply dithering
+ #if DITHERING
+ icolor = ApplyDithering(icolor);
+ #endif
+
+ // Clip to 15-bit range
+ #if !TRUE_COLOR
+ icolor = TruncateTo15Bit(icolor);
+ #endif
+
+ // Normalize
+ vec3 color = vec3(icolor) / vec3(255.0, 255.0, 255.0);
+
+ #if TRANSPARENCY
+ // Apply semitransparency. If not a semitransparent texel, destination alpha is ignored.
+ if (semitransparent)
+ {
+ #if TRANSPARENCY_ONLY_OPAQUE
+ discard;
+ #endif
+ o_col0 = vec4(color * u_src_alpha_factor, u_dst_alpha_factor);
+ }
+ else
+ {
+ #if TRANSPARENCY_ONLY_TRANSPARENCY
+ discard;
+ #endif
+ o_col0 = vec4(color, 0.0);
+ }
+ #else
+ o_col0 = vec4(color, 0.0);
+ #endif
+}
+)";
+
+ return ss.str();
+}
+
+std::string GPU_HW_ShaderGen::GenerateScreenQuadVertexShader()
+{
+ std::stringstream ss;
+ GenerateShaderHeader(ss);
+ ss << R"(
+
+out vec2 v_tex0;
+
+void main()
+{
+ v_tex0 = vec2(float((gl_VertexID << 1) & 2), float(gl_VertexID & 2));
+ gl_Position = vec4(v_tex0 * vec2(2.0f, -2.0f) + vec2(-1.0f, 1.0f), 0.0f, 1.0f);
+ gl_Position.y = -gl_Position.y;
+}
+)";
+
+ return ss.str();
+}
+
+std::string GPU_HW_ShaderGen::GenerateFillFragmentShader()
+{
+ std::stringstream ss;
+ GenerateShaderHeader(ss);
+
+ ss << R"(
+uniform vec4 fill_color;
+out vec4 o_col0;
+
+void main()
+{
+ o_col0 = fill_color;
+}
+)";
+
+ return ss.str();
+}
+
+std::string GPU_HW_ShaderGen::GenerateDisplayFragmentShader(bool depth_24bit, bool interlaced)
+{
+ std::stringstream ss;
+ GenerateShaderHeader(ss);
+ DefineMacro(ss, "DEPTH_24BIT", depth_24bit);
+ DefineMacro(ss, "INTERLACED", interlaced);
+
+ ss << R"(
+in vec2 v_tex0;
+out vec4 o_col0;
+
+uniform sampler2D samp0;
+uniform ivec3 u_base_coords;
+
+ivec2 GetCoords(vec2 fragcoord)
+{
+ ivec2 icoords = ivec2(fragcoord);
+ #if INTERLACED
+ if ((((icoords.y - u_base_coords.z) / RESOLUTION_SCALE) & 1) != 0)
+ discard;
+ #endif
+ return icoords;
+}
+
+void main()
+{
+ ivec2 icoords = GetCoords(gl_FragCoord.xy);
+
+ #if DEPTH_24BIT
+ // compute offset in dwords from the start of the 24-bit values
+ ivec2 base = ivec2(u_base_coords.x, u_base_coords.y + icoords.y);
+ int xoff = int(icoords.x);
+ int dword_index = (xoff / 2) + (xoff / 4);
+
+ // sample two adjacent dwords, or four 16-bit values as the 24-bit value will lie somewhere between these
+ uint s0 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + dword_index * 2 + 0, base.y), 0));
+ uint s1 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + dword_index * 2 + 1, base.y), 0));
+ uint s2 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + (dword_index + 1) * 2 + 0, base.y), 0));
+ uint s3 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + (dword_index + 1) * 2 + 1, base.y), 0));
+
+ // select the bit for this pixel depending on its offset in the 4-pixel block
+ uint r, g, b;
+ int block_offset = xoff & 3;
+ if (block_offset == 0)
+ {
+ r = s0 & 0xFFu;
+ g = s0 >> 8;
+ b = s1 & 0xFFu;
+ }
+ else if (block_offset == 1)
+ {
+ r = s1 >> 8;
+ g = s2 & 0xFFu;
+ b = s2 >> 8;
+ }
+ else if (block_offset == 2)
+ {
+ r = s1 & 0xFFu;
+ g = s1 >> 8;
+ b = s2 & 0xFFu;
+ }
+ else
+ {
+ r = s2 >> 8;
+ g = s3 & 0xFFu;
+ b = s3 >> 8;
+ }
+
+ // and normalize
+ o_col0 = vec4(float(r) / 255.0, float(g) / 255.0, float(b) / 255.0, 1.0);
+ #else
+ // load and return
+ o_col0 = texelFetch(samp0, u_base_coords.xy + icoords, 0);
+ #endif
+}
+)";
+
+ return ss.str();
+}
+
+std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader()
+{
+ std::stringstream ss;
+ GenerateShaderHeader(ss);
+
+ ss << R"(
+
+uniform ivec2 u_base_coords;
+uniform ivec2 u_size;
+uniform usamplerBuffer samp0;
+
+out vec4 o_col0;
+
+void main()
+{
+ ivec2 coords = ivec2(gl_FragCoord.xy) / ivec2(RESOLUTION_SCALE, RESOLUTION_SCALE);
+ ivec2 offset = coords - u_base_coords;
+ offset.y = u_size.y - offset.y - 1;
+
+ int buffer_offset = offset.y * u_size.x + offset.x;
+ uint value = texelFetch(samp0, buffer_offset).r;
+
+ o_col0 = RGBA5551ToRGBA8(value);
+})";
+
+ return ss.str();
+}
diff --git a/src/core/gpu_hw_shadergen.h b/src/core/gpu_hw_shadergen.h
new file mode 100644
index 000000000..72e97381b
--- /dev/null
+++ b/src/core/gpu_hw_shadergen.h
@@ -0,0 +1,34 @@
+#pragma once
+#include
+#include
+#include "gpu_hw.h"
+
+class GPU_HW_ShaderGen
+{
+public:
+ enum class Backend
+ {
+ OpenGL
+ };
+
+public:
+ GPU_HW_ShaderGen(Backend backend, u32 resolution_scale, bool true_color);
+ ~GPU_HW_ShaderGen();
+
+ void Init(Backend backend, u32 resolution_scale, bool true_color);
+
+ std::string GenerateBatchVertexShader(bool textured);
+ std::string GenerateBatchFragmentShader(GPU_HW::BatchRenderMode transparency, GPU::TextureMode texture_mode, bool dithering);
+ std::string GenerateScreenQuadVertexShader();
+ std::string GenerateFillFragmentShader();
+ std::string GenerateDisplayFragmentShader(bool depth_24bit, bool interlaced);
+ std::string GenerateVRAMWriteFragmentShader();
+
+ Backend m_backend;
+ u32 m_resolution_scale;
+ bool m_true_color;
+
+private:
+ void GenerateShaderHeader(std::stringstream& ss);
+ void GenerateBatchUniformBuffer(std::stringstream& ss);
+};