GPU/HW: Split shadergen to seperate class

This commit is contained in:
Connor McLaughlin 2019-11-03 13:36:54 +10:00
parent 91c99f0226
commit be81d08109
10 changed files with 624 additions and 590 deletions

View file

@ -21,6 +21,8 @@ add_library(core
gpu_hw.h gpu_hw.h
gpu_hw_opengl.cpp gpu_hw_opengl.cpp
gpu_hw_opengl.h gpu_hw_opengl.h
gpu_hw_shadergen.cpp
gpu_hw_shadergen.h
gpu_sw.cpp gpu_sw.cpp
gpu_sw.h gpu_sw.h
gte.cpp gte.cpp

View file

@ -41,6 +41,7 @@
<ClCompile Include="cpu_disasm.cpp" /> <ClCompile Include="cpu_disasm.cpp" />
<ClCompile Include="digital_controller.cpp" /> <ClCompile Include="digital_controller.cpp" />
<ClCompile Include="gpu_commands.cpp" /> <ClCompile Include="gpu_commands.cpp" />
<ClCompile Include="gpu_hw_shadergen.cpp" />
<ClCompile Include="gpu_sw.cpp" /> <ClCompile Include="gpu_sw.cpp" />
<ClCompile Include="gte.cpp" /> <ClCompile Include="gte.cpp" />
<ClCompile Include="dma.cpp" /> <ClCompile Include="dma.cpp" />
@ -64,6 +65,7 @@
<ClInclude Include="cpu_core.h" /> <ClInclude Include="cpu_core.h" />
<ClInclude Include="cpu_disasm.h" /> <ClInclude Include="cpu_disasm.h" />
<ClInclude Include="digital_controller.h" /> <ClInclude Include="digital_controller.h" />
<ClInclude Include="gpu_hw_shadergen.h" />
<ClInclude Include="gpu_sw.h" /> <ClInclude Include="gpu_sw.h" />
<ClInclude Include="gte.h" /> <ClInclude Include="gte.h" />
<ClInclude Include="cpu_types.h" /> <ClInclude Include="cpu_types.h" />

View file

@ -23,6 +23,7 @@
<ClCompile Include="settings.cpp" /> <ClCompile Include="settings.cpp" />
<ClCompile Include="gpu_commands.cpp" /> <ClCompile Include="gpu_commands.cpp" />
<ClCompile Include="gpu_sw.cpp" /> <ClCompile Include="gpu_sw.cpp" />
<ClCompile Include="gpu_hw_shadergen.cpp" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClInclude Include="types.h" /> <ClInclude Include="types.h" />
@ -50,6 +51,7 @@
<ClInclude Include="memory_card.h" /> <ClInclude Include="memory_card.h" />
<ClInclude Include="settings.h" /> <ClInclude Include="settings.h" />
<ClInclude Include="gpu_sw.h" /> <ClInclude Include="gpu_sw.h" />
<ClInclude Include="gpu_hw_shadergen.h" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<None Include="cpu_core.inl" /> <None Include="cpu_core.inl" />

View file

@ -18,6 +18,57 @@ class Timers;
class GPU class GPU
{ {
public: public:
enum class DMADirection : u32
{
Off = 0,
FIFO = 1,
CPUtoGP0 = 2,
GPUREADtoCPU = 3
};
enum class Primitive : u8
{
Reserved = 0,
Polygon = 1,
Line = 2,
Rectangle = 3
};
enum class DrawRectangleSize : u8
{
Variable = 0,
R1x1 = 1,
R8x8 = 2,
R16x16 = 3
};
enum class TextureMode : u8
{
Palette4Bit = 0,
Palette8Bit = 1,
Direct16Bit = 2,
Reserved_Direct16Bit = 3,
// Not register values.
RawTextureBit = 4,
RawPalette4Bit = RawTextureBit | Palette4Bit,
RawPalette8Bit = RawTextureBit | Palette8Bit,
RawDirect16Bit = RawTextureBit | Direct16Bit,
Reserved_RawDirect16Bit = RawTextureBit | Reserved_Direct16Bit,
Disabled = 8 // Not a register value
};
enum class TransparencyMode : u8
{
HalfBackgroundPlusHalfForeground = 0,
BackgroundPlusForeground = 1,
BackgroundMinusForeground = 2,
BackgroundPlusQuarterForeground = 3,
Disabled = 4 // Not a register value
};
enum : u32 enum : u32
{ {
VRAM_WIDTH = 1024, VRAM_WIDTH = 1024,
@ -29,6 +80,13 @@ public:
HBLANK_TIMER_INDEX = 1 HBLANK_TIMER_INDEX = 1
}; };
// 4x4 dither matrix.
static constexpr s32 DITHER_MATRIX[4][4] = {{-4, +0, -3, +1}, // row 0
{+2, -2, +3, -1}, // row 1
{-3, +1, -4, +0}, // row 2
{+4, -1, +2, -2}}; // row 3
// Base class constructor.
GPU(); GPU();
virtual ~GPU(); virtual ~GPU();
@ -112,57 +170,6 @@ protected:
static bool DumpVRAMToFile(const char* filename, u32 width, u32 height, u32 stride, const void* buffer, static bool DumpVRAMToFile(const char* filename, u32 width, u32 height, u32 stride, const void* buffer,
bool remove_alpha); bool remove_alpha);
enum class DMADirection : u32
{
Off = 0,
FIFO = 1,
CPUtoGP0 = 2,
GPUREADtoCPU = 3
};
enum class Primitive : u8
{
Reserved = 0,
Polygon = 1,
Line = 2,
Rectangle = 3
};
enum class DrawRectangleSize : u8
{
Variable = 0,
R1x1 = 1,
R8x8 = 2,
R16x16 = 3
};
enum class TextureMode : u8
{
Palette4Bit = 0,
Palette8Bit = 1,
Direct16Bit = 2,
Reserved_Direct16Bit = 3,
// Not register values.
RawTextureBit = 4,
RawPalette4Bit = RawTextureBit | Palette4Bit,
RawPalette8Bit = RawTextureBit | Palette8Bit,
RawDirect16Bit = RawTextureBit | Direct16Bit,
Reserved_RawDirect16Bit = RawTextureBit | Reserved_Direct16Bit,
Disabled = 8 // Not a register value
};
enum class TransparencyMode : u8
{
HalfBackgroundPlusHalfForeground = 0,
BackgroundPlusForeground = 1,
BackgroundMinusForeground = 2,
BackgroundPlusQuarterForeground = 3,
Disabled = 4 // Not a register value
};
union RenderCommand union RenderCommand
{ {
u32 bits; u32 bits;
@ -258,12 +265,6 @@ protected:
} }
}; };
// 4x4 dither matrix.
static constexpr s32 DITHER_MATRIX[4][4] = {{-4, +0, -3, +1}, // row 0
{+2, -2, +3, -1}, // row 1
{-3, +1, -4, +0}, // row 2
{+4, -1, +2, -2}}; // row 3
void SoftReset(); void SoftReset();
// Sets dots per scanline // Sets dots per scanline
@ -464,3 +465,5 @@ private:
static const GP0CommandHandlerTable s_GP0_command_handler_table; static const GP0CommandHandlerTable s_GP0_command_handler_table;
}; };
IMPLEMENT_ENUM_CLASS_BITWISE_OPERATORS(GPU::TextureMode);

View file

@ -163,7 +163,7 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command
void GPU_HW::AddDuplicateVertex() void GPU_HW::AddDuplicateVertex()
{ {
std::memcpy(m_batch_current_vertex_ptr, m_batch_current_vertex_ptr - 1, sizeof(HWVertex)); std::memcpy(m_batch_current_vertex_ptr, m_batch_current_vertex_ptr - 1, sizeof(BatchVertex));
m_batch_current_vertex_ptr++; m_batch_current_vertex_ptr++;
} }
@ -175,443 +175,14 @@ void GPU_HW::CalcScissorRect(int* left, int* top, int* right, int* bottom)
*bottom = std::max<u32>((m_drawing_area.bottom + 1) * m_resolution_scale, *top + 1); *bottom = std::max<u32>((m_drawing_area.bottom + 1) * m_resolution_scale, *top + 1);
} }
static void DefineMacro(std::stringstream& ss, const char* name, bool enabled) GPU_HW::BatchPrimitive GPU_HW::GetPrimitiveForCommand(RenderCommand rc)
{
if (enabled)
ss << "#define " << name << " 1\n";
else
ss << "/* #define " << name << " 0 */\n";
}
void GPU_HW::GenerateShaderHeader(std::stringstream& ss)
{
ss << "#version 330 core\n\n";
ss << "const int RESOLUTION_SCALE = " << m_resolution_scale << ";\n";
ss << "const ivec2 VRAM_SIZE = ivec2(" << VRAM_WIDTH << ", " << VRAM_HEIGHT << ") * RESOLUTION_SCALE;\n";
ss << "const vec2 RCP_VRAM_SIZE = vec2(1.0, 1.0) / vec2(VRAM_SIZE);\n";
ss << R"(
float fixYCoord(float y)
{
return 1.0 - RCP_VRAM_SIZE.y - y;
}
int fixYCoord(int y)
{
return VRAM_SIZE.y - y - 1;
}
uint RGBA8ToRGBA5551(vec4 v)
{
uint r = uint(v.r * 255.0) >> 3;
uint g = uint(v.g * 255.0) >> 3;
uint b = uint(v.b * 255.0) >> 3;
uint a = (v.a != 0.0) ? 1u : 0u;
return (r) | (g << 5) | (b << 10) | (a << 15);
}
vec4 RGBA5551ToRGBA8(uint v)
{
uint r = (v & 31u);
uint g = ((v >> 5) & 31u);
uint b = ((v >> 10) & 31u);
uint a = ((v >> 15) & 1u);
// repeat lower bits
r = (r << 3) | (r & 7u);
g = (g << 3) | (g & 7u);
b = (b << 3) | (b & 7u);
return vec4(float(r) / 255.0, float(g) / 255.0, float(b) / 255.0, float(a));
}
)";
}
void GPU_HW::GenerateBatchUniformBuffer(std::stringstream& ss)
{
ss << R"(
uniform UBOBlock {
ivec2 u_pos_offset;
uvec2 u_texture_window_mask;
uvec2 u_texture_window_offset;
float u_src_alpha_factor;
float u_dst_alpha_factor;
};
)";
}
std::string GPU_HW::GenerateVertexShader(bool textured)
{
std::stringstream ss;
GenerateShaderHeader(ss);
DefineMacro(ss, "TEXTURED", textured);
GenerateBatchUniformBuffer(ss);
ss << R"(
in ivec2 a_pos;
in vec4 a_col0;
in int a_texcoord;
in int a_texpage;
out vec3 v_col0;
#if TEXTURED
out vec2 v_tex0;
flat out ivec4 v_texpage;
#endif
void main()
{
// 0..+1023 -> -1..1
float pos_x = (float(a_pos.x + u_pos_offset.x) / 512.0) - 1.0;
float pos_y = (float(a_pos.y + u_pos_offset.y) / -256.0) + 1.0;
gl_Position = vec4(pos_x, pos_y, 0.0, 1.0);
v_col0 = a_col0.rgb;
#if TEXTURED
v_tex0 = vec2(float(a_texcoord & 0xFFFF), float(a_texcoord >> 16)) / vec2(255.0);
// base_x,base_y,palette_x,palette_y
v_texpage.x = (a_texpage & 15) * 64 * RESOLUTION_SCALE;
v_texpage.y = ((a_texpage >> 4) & 1) * 256 * RESOLUTION_SCALE;
v_texpage.z = ((a_texpage >> 16) & 63) * 16 * RESOLUTION_SCALE;
v_texpage.w = ((a_texpage >> 22) & 511) * RESOLUTION_SCALE;
#endif
}
)";
return ss.str();
}
std::string GPU_HW::GenerateFragmentShader(HWBatchRenderMode transparency, TextureMode texture_mode, bool dithering)
{
const TextureMode actual_texture_mode =
static_cast<TextureMode>(static_cast<u8>(texture_mode) & ~static_cast<u8>(TextureMode::RawTextureBit));
const bool raw_texture = (static_cast<u8>(texture_mode) & static_cast<u8>(TextureMode::RawTextureBit)) ==
static_cast<u8>(TextureMode::RawTextureBit);
std::stringstream ss;
GenerateShaderHeader(ss);
GenerateBatchUniformBuffer(ss);
DefineMacro(ss, "TRANSPARENCY", transparency != HWBatchRenderMode::TransparencyDisabled);
DefineMacro(ss, "TRANSPARENCY_ONLY_OPAQUE", transparency == HWBatchRenderMode::OnlyOpaque);
DefineMacro(ss, "TRANSPARENCY_ONLY_TRANSPARENCY", transparency == HWBatchRenderMode::OnlyTransparent);
DefineMacro(ss, "TEXTURED", actual_texture_mode != TextureMode::Disabled);
DefineMacro(ss, "PALETTE",
actual_texture_mode == GPU::TextureMode::Palette4Bit ||
actual_texture_mode == GPU::TextureMode::Palette8Bit);
DefineMacro(ss, "PALETTE_4_BIT", actual_texture_mode == GPU::TextureMode::Palette4Bit);
DefineMacro(ss, "PALETTE_8_BIT", actual_texture_mode == GPU::TextureMode::Palette8Bit);
DefineMacro(ss, "RAW_TEXTURE", raw_texture);
DefineMacro(ss, "DITHERING", dithering);
DefineMacro(ss, "TRUE_COLOR", m_true_color);
ss << "const int[16] s_dither_values = int[16]( ";
for (u32 i = 0; i < 16; i++)
{
if (i > 0)
ss << ", ";
ss << DITHER_MATRIX[i / 4][i % 4];
}
ss << " );\n";
ss << R"(
in vec3 v_col0;
#if TEXTURED
in vec2 v_tex0;
flat in ivec4 v_texpage;
uniform sampler2D samp0;
#endif
out vec4 o_col0;
ivec3 ApplyDithering(ivec3 icol)
{
ivec2 fc = (ivec2(gl_FragCoord.xy) / ivec2(RESOLUTION_SCALE, RESOLUTION_SCALE)) & ivec2(3, 3);
int offset = s_dither_values[fc.y * 4 + fc.x];
return icol + ivec3(offset, offset, offset);
}
ivec3 TruncateTo15Bit(ivec3 icol)
{
icol = clamp(icol, ivec3(0, 0, 0), ivec3(255, 255, 255));
return (icol & ivec3(~7, ~7, ~7)) | ((icol >> 3) & ivec3(7, 7, 7));
}
#if TEXTURED
ivec2 ApplyNativeTextureWindow(ivec2 coords)
{
uint x = (uint(coords.x) & ~(u_texture_window_mask.x * 8u)) | ((u_texture_window_offset.x & u_texture_window_mask.x) * 8u);
uint y = (uint(coords.y) & ~(u_texture_window_mask.y * 8u)) | ((u_texture_window_offset.y & u_texture_window_mask.y) * 8u);
return ivec2(int(x), int(y));
}
ivec2 ApplyTextureWindow(ivec2 coords)
{
if (RESOLUTION_SCALE == 1)
return ApplyNativeTextureWindow(coords);
ivec2 downscaled_coords = coords / ivec2(RESOLUTION_SCALE);
ivec2 coords_offset = coords % ivec2(RESOLUTION_SCALE);
return (ApplyNativeTextureWindow(downscaled_coords) * ivec2(RESOLUTION_SCALE)) + coords_offset;
}
ivec4 SampleFromVRAM(vec2 coord)
{
// from 0..1 to 0..255
ivec2 icoord = ivec2(coord * vec2(255 * RESOLUTION_SCALE));
icoord = ApplyTextureWindow(icoord);
// adjust for tightly packed palette formats
ivec2 index_coord = icoord;
#if PALETTE_4_BIT
index_coord.x /= 4;
#elif PALETTE_8_BIT
index_coord.x /= 2;
#endif
// fixup coords
ivec2 vicoord = ivec2(v_texpage.x + index_coord.x, fixYCoord(v_texpage.y + index_coord.y));
// load colour/palette
vec4 color = texelFetch(samp0, vicoord, 0);
// apply palette
#if PALETTE
#if PALETTE_4_BIT
int subpixel = int(icoord.x / RESOLUTION_SCALE) & 3;
uint vram_value = RGBA8ToRGBA5551(color);
int palette_index = int((vram_value >> (subpixel * 4)) & 0x0Fu);
#elif PALETTE_8_BIT
int subpixel = int(icoord.x / RESOLUTION_SCALE) & 1;
uint vram_value = RGBA8ToRGBA5551(color);
int palette_index = int((vram_value >> (subpixel * 8)) & 0xFFu);
#endif
ivec2 palette_icoord = ivec2(v_texpage.z + (palette_index * RESOLUTION_SCALE), fixYCoord(v_texpage.w));
color = texelFetch(samp0, palette_icoord, 0);
#endif
return ivec4(color * vec4(255.0, 255.0, 255.0, 255.0));
}
#endif
void main()
{
ivec3 vertcol = ivec3(v_col0 * vec3(255.0, 255.0, 255.0));
bool semitransparent;
bool new_mask_bit;
ivec3 icolor;
#if TEXTURED
ivec4 texcol = SampleFromVRAM(v_tex0);
if (texcol == ivec4(0.0, 0.0, 0.0, 0.0))
discard;
// Grab semitransparent bit from the texture color.
semitransparent = (texcol.a != 0);
#if RAW_TEXTURE
icolor = texcol.rgb;
#else
icolor = (vertcol * texcol.rgb) >> 7;
#endif
#else
// All pixels are semitransparent for untextured polygons.
semitransparent = true;
icolor = vertcol;
#endif
// Apply dithering
#if DITHERING
icolor = ApplyDithering(icolor);
#endif
// Clip to 15-bit range
#if !TRUE_COLOR
icolor = TruncateTo15Bit(icolor);
#endif
// Normalize
vec3 color = vec3(icolor) / vec3(255.0, 255.0, 255.0);
#if TRANSPARENCY
// Apply semitransparency. If not a semitransparent texel, destination alpha is ignored.
if (semitransparent)
{
#if TRANSPARENCY_ONLY_OPAQUE
discard;
#endif
o_col0 = vec4(color * u_src_alpha_factor, u_dst_alpha_factor);
}
else
{
#if TRANSPARENCY_ONLY_TRANSPARENCY
discard;
#endif
o_col0 = vec4(color, 0.0);
}
#else
o_col0 = vec4(color, 0.0);
#endif
}
)";
return ss.str();
}
std::string GPU_HW::GenerateScreenQuadVertexShader()
{
std::stringstream ss;
GenerateShaderHeader(ss);
ss << R"(
out vec2 v_tex0;
void main()
{
v_tex0 = vec2(float((gl_VertexID << 1) & 2), float(gl_VertexID & 2));
gl_Position = vec4(v_tex0 * vec2(2.0f, -2.0f) + vec2(-1.0f, 1.0f), 0.0f, 1.0f);
gl_Position.y = -gl_Position.y;
}
)";
return ss.str();
}
std::string GPU_HW::GenerateFillFragmentShader()
{
std::stringstream ss;
GenerateShaderHeader(ss);
ss << R"(
uniform vec4 fill_color;
out vec4 o_col0;
void main()
{
o_col0 = fill_color;
}
)";
return ss.str();
}
std::string GPU_HW::GenerateDisplayFragmentShader(bool depth_24bit, bool interlaced)
{
std::stringstream ss;
GenerateShaderHeader(ss);
DefineMacro(ss, "DEPTH_24BIT", depth_24bit);
DefineMacro(ss, "INTERLACED", interlaced);
ss << R"(
in vec2 v_tex0;
out vec4 o_col0;
uniform sampler2D samp0;
uniform ivec3 u_base_coords;
ivec2 GetCoords(vec2 fragcoord)
{
ivec2 icoords = ivec2(fragcoord);
#if INTERLACED
if ((((icoords.y - u_base_coords.z) / RESOLUTION_SCALE) & 1) != 0)
discard;
#endif
return icoords;
}
void main()
{
ivec2 icoords = GetCoords(gl_FragCoord.xy);
#if DEPTH_24BIT
// compute offset in dwords from the start of the 24-bit values
ivec2 base = ivec2(u_base_coords.x, u_base_coords.y + icoords.y);
int xoff = int(icoords.x);
int dword_index = (xoff / 2) + (xoff / 4);
// sample two adjacent dwords, or four 16-bit values as the 24-bit value will lie somewhere between these
uint s0 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + dword_index * 2 + 0, base.y), 0));
uint s1 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + dword_index * 2 + 1, base.y), 0));
uint s2 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + (dword_index + 1) * 2 + 0, base.y), 0));
uint s3 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + (dword_index + 1) * 2 + 1, base.y), 0));
// select the bit for this pixel depending on its offset in the 4-pixel block
uint r, g, b;
int block_offset = xoff & 3;
if (block_offset == 0)
{
r = s0 & 0xFFu;
g = s0 >> 8;
b = s1 & 0xFFu;
}
else if (block_offset == 1)
{
r = s1 >> 8;
g = s2 & 0xFFu;
b = s2 >> 8;
}
else if (block_offset == 2)
{
r = s1 & 0xFFu;
g = s1 >> 8;
b = s2 & 0xFFu;
}
else
{
r = s2 >> 8;
g = s3 & 0xFFu;
b = s3 >> 8;
}
// and normalize
o_col0 = vec4(float(r) / 255.0, float(g) / 255.0, float(b) / 255.0, 1.0);
#else
// load and return
o_col0 = texelFetch(samp0, u_base_coords.xy + icoords, 0);
#endif
}
)";
return ss.str();
}
std::string GPU_HW::GenerateVRAMWriteFragmentShader()
{
std::stringstream ss;
GenerateShaderHeader(ss);
ss << R"(
uniform ivec2 u_base_coords;
uniform ivec2 u_size;
uniform usamplerBuffer samp0;
out vec4 o_col0;
void main()
{
ivec2 coords = ivec2(gl_FragCoord.xy) / ivec2(RESOLUTION_SCALE, RESOLUTION_SCALE);
ivec2 offset = coords - u_base_coords;
offset.y = u_size.y - offset.y - 1;
int buffer_offset = offset.y * u_size.x + offset.x;
uint value = texelFetch(samp0, buffer_offset).r;
o_col0 = RGBA5551ToRGBA8(value);
})";
return ss.str();
}
GPU_HW::HWPrimitive GPU_HW::GetPrimitiveForCommand(RenderCommand rc)
{ {
if (rc.primitive == Primitive::Line) if (rc.primitive == Primitive::Line)
return rc.polyline ? HWPrimitive::LineStrip : HWPrimitive::Lines; return rc.polyline ? BatchPrimitive::LineStrip : BatchPrimitive::Lines;
else if ((rc.primitive == Primitive::Polygon && rc.quad_polygon) || rc.primitive == Primitive::Rectangle) else if ((rc.primitive == Primitive::Polygon && rc.quad_polygon) || rc.primitive == Primitive::Rectangle)
return HWPrimitive::TriangleStrip; return BatchPrimitive::TriangleStrip;
else else
return HWPrimitive::Triangles; return BatchPrimitive::Triangles;
} }
void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32* command_ptr) void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32* command_ptr)
@ -687,13 +258,13 @@ void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32
// has any state changed which requires a new batch? // has any state changed which requires a new batch?
const TransparencyMode transparency_mode = const TransparencyMode transparency_mode =
rc.transparency_enable ? m_render_state.transparency_mode : TransparencyMode::Disabled; rc.transparency_enable ? m_render_state.transparency_mode : TransparencyMode::Disabled;
const HWPrimitive rc_primitive = GetPrimitiveForCommand(rc); const BatchPrimitive rc_primitive = GetPrimitiveForCommand(rc);
const bool dithering_enable = (!m_true_color && rc.IsDitheringEnabled()) ? m_GPUSTAT.dither_enable : false; const bool dithering_enable = (!m_true_color && rc.IsDitheringEnabled()) ? m_GPUSTAT.dither_enable : false;
const u32 max_added_vertices = num_vertices + 2; const u32 max_added_vertices = num_vertices + 2;
if (!IsFlushed()) if (!IsFlushed())
{ {
const bool buffer_overflow = GetBatchVertexSpace() < max_added_vertices; const bool buffer_overflow = GetBatchVertexSpace() < max_added_vertices;
if (buffer_overflow || rc_primitive == HWPrimitive::LineStrip || m_batch.texture_mode != texture_mode || if (buffer_overflow || rc_primitive == BatchPrimitive::LineStrip || m_batch.texture_mode != texture_mode ||
m_batch.transparency_mode != transparency_mode || m_batch.primitive != rc_primitive || m_batch.transparency_mode != transparency_mode || m_batch.primitive != rc_primitive ||
dithering_enable != m_batch.dithering || m_render_state.IsTextureWindowChanged()) dithering_enable != m_batch.dithering || m_render_state.IsTextureWindowChanged())
{ {

View file

@ -8,6 +8,22 @@
class GPU_HW : public GPU class GPU_HW : public GPU
{ {
public: public:
enum class BatchPrimitive : u8
{
Lines = 0,
LineStrip = 1,
Triangles = 2,
TriangleStrip = 3
};
enum class BatchRenderMode : u8
{
TransparencyDisabled,
TransparentAndOpaque,
OnlyOpaque,
OnlyTransparent
};
GPU_HW(); GPU_HW();
virtual ~GPU_HW(); virtual ~GPU_HW();
@ -16,23 +32,7 @@ public:
virtual void UpdateSettings() override; virtual void UpdateSettings() override;
protected: protected:
enum class HWPrimitive : u8 struct BatchVertex
{
Lines = 0,
LineStrip = 1,
Triangles = 2,
TriangleStrip = 3
};
enum class HWBatchRenderMode : u8
{
TransparencyDisabled,
TransparentAndOpaque,
OnlyOpaque,
OnlyTransparent
};
struct HWVertex
{ {
s32 x; s32 x;
s32 y; s32 y;
@ -55,9 +55,9 @@ protected:
} }
}; };
struct HWBatchConfig struct BatchConfig
{ {
HWPrimitive primitive; BatchPrimitive primitive;
TextureMode texture_mode; TextureMode texture_mode;
TransparencyMode transparency_mode; TransparencyMode transparency_mode;
bool dithering; bool dithering;
@ -71,14 +71,14 @@ protected:
} }
// Returns the render mode for this batch. // Returns the render mode for this batch.
HWBatchRenderMode GetRenderMode() const BatchRenderMode GetRenderMode() const
{ {
return transparency_mode == TransparencyMode::Disabled ? HWBatchRenderMode::TransparencyDisabled : return transparency_mode == TransparencyMode::Disabled ? BatchRenderMode::TransparencyDisabled :
HWBatchRenderMode::TransparentAndOpaque; BatchRenderMode::TransparentAndOpaque;
} }
}; };
struct HWBatchUBOData struct BatchUBOData
{ {
s32 u_pos_offset[2]; s32 u_pos_offset[2];
u32 u_texture_window_mask[2]; u32 u_texture_window_mask[2];
@ -90,7 +90,7 @@ protected:
static constexpr u32 VRAM_UPDATE_TEXTURE_BUFFER_SIZE = VRAM_WIDTH * VRAM_HEIGHT * sizeof(u32); static constexpr u32 VRAM_UPDATE_TEXTURE_BUFFER_SIZE = VRAM_WIDTH * VRAM_HEIGHT * sizeof(u32);
static constexpr u32 VERTEX_BUFFER_SIZE = 1 * 1024 * 1024; static constexpr u32 VERTEX_BUFFER_SIZE = 1 * 1024 * 1024;
static constexpr u32 MIN_BATCH_VERTEX_COUNT = 6; static constexpr u32 MIN_BATCH_VERTEX_COUNT = 6;
static constexpr u32 MAX_BATCH_VERTEX_COUNT = VERTEX_BUFFER_SIZE / sizeof(HWVertex); static constexpr u32 MAX_BATCH_VERTEX_COUNT = VERTEX_BUFFER_SIZE / sizeof(BatchVertex);
static constexpr u32 UNIFORM_BUFFER_SIZE = 512 * 1024; static constexpr u32 UNIFORM_BUFFER_SIZE = 512 * 1024;
static constexpr std::tuple<float, float, float, float> RGBA8ToFloat(u32 rgba) static constexpr std::tuple<float, float, float, float> RGBA8ToFloat(u32 rgba)
@ -121,31 +121,21 @@ protected:
return std::make_tuple(x * s32(m_resolution_scale), y * s32(m_resolution_scale)); return std::make_tuple(x * s32(m_resolution_scale), y * s32(m_resolution_scale));
} }
std::string GenerateVertexShader(bool textured); BatchVertex* m_batch_start_vertex_ptr = nullptr;
std::string GenerateFragmentShader(HWBatchRenderMode transparency, TextureMode texture_mode, bool dithering); BatchVertex* m_batch_end_vertex_ptr = nullptr;
std::string GenerateScreenQuadVertexShader(); BatchVertex* m_batch_current_vertex_ptr = nullptr;
std::string GenerateFillFragmentShader();
std::string GenerateDisplayFragmentShader(bool depth_24bit, bool interlaced);
std::string GenerateVRAMWriteFragmentShader();
HWVertex* m_batch_start_vertex_ptr = nullptr;
HWVertex* m_batch_end_vertex_ptr = nullptr;
HWVertex* m_batch_current_vertex_ptr = nullptr;
u32 m_batch_base_vertex = 0; u32 m_batch_base_vertex = 0;
u32 m_resolution_scale = 1; u32 m_resolution_scale = 1;
u32 m_max_resolution_scale = 1; u32 m_max_resolution_scale = 1;
bool m_true_color = false; bool m_true_color = false;
HWBatchConfig m_batch = {}; BatchConfig m_batch = {};
HWBatchUBOData m_batch_ubo_data = {}; BatchUBOData m_batch_ubo_data = {};
bool m_batch_ubo_dirty = true; bool m_batch_ubo_dirty = true;
private: private:
static HWPrimitive GetPrimitiveForCommand(RenderCommand rc); static BatchPrimitive GetPrimitiveForCommand(RenderCommand rc);
void GenerateShaderHeader(std::stringstream& ss);
void GenerateBatchUniformBuffer(std::stringstream& ss);
void LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command_ptr); void LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command_ptr);
void AddDuplicateVertex(); void AddDuplicateVertex();

View file

@ -2,6 +2,7 @@
#include "YBaseLib/Assert.h" #include "YBaseLib/Assert.h"
#include "YBaseLib/Log.h" #include "YBaseLib/Log.h"
#include "YBaseLib/String.h" #include "YBaseLib/String.h"
#include "gpu_hw_shadergen.h"
#include "host_interface.h" #include "host_interface.h"
#include "imgui.h" #include "imgui.h"
#include "system.h" #include "system.h"
@ -138,9 +139,9 @@ void GPU_HW_OpenGL::MapBatchVertexPointer(u32 required_vertices)
Assert(!m_batch_start_vertex_ptr); Assert(!m_batch_start_vertex_ptr);
const GL::StreamBuffer::MappingResult res = const GL::StreamBuffer::MappingResult res =
m_vertex_stream_buffer->Map(sizeof(HWVertex), required_vertices * sizeof(HWVertex)); m_vertex_stream_buffer->Map(sizeof(BatchVertex), required_vertices * sizeof(BatchVertex));
m_batch_start_vertex_ptr = static_cast<HWVertex*>(res.pointer); m_batch_start_vertex_ptr = static_cast<BatchVertex*>(res.pointer);
m_batch_current_vertex_ptr = m_batch_start_vertex_ptr; m_batch_current_vertex_ptr = m_batch_start_vertex_ptr;
m_batch_end_vertex_ptr = m_batch_start_vertex_ptr + res.space_aligned; m_batch_end_vertex_ptr = m_batch_start_vertex_ptr + res.space_aligned;
m_batch_base_vertex = res.index_aligned; m_batch_base_vertex = res.index_aligned;
@ -246,11 +247,11 @@ void GPU_HW_OpenGL::CreateVertexBuffer()
glEnableVertexAttribArray(1); glEnableVertexAttribArray(1);
glEnableVertexAttribArray(2); glEnableVertexAttribArray(2);
glEnableVertexAttribArray(3); glEnableVertexAttribArray(3);
glVertexAttribIPointer(0, 2, GL_INT, sizeof(HWVertex), reinterpret_cast<void*>(offsetof(HWVertex, x))); glVertexAttribIPointer(0, 2, GL_INT, sizeof(BatchVertex), reinterpret_cast<void*>(offsetof(BatchVertex, x)));
glVertexAttribPointer(1, 4, GL_UNSIGNED_BYTE, true, sizeof(HWVertex), glVertexAttribPointer(1, 4, GL_UNSIGNED_BYTE, true, sizeof(BatchVertex),
reinterpret_cast<void*>(offsetof(HWVertex, color))); reinterpret_cast<void*>(offsetof(BatchVertex, color)));
glVertexAttribIPointer(2, 2, GL_INT, sizeof(HWVertex), reinterpret_cast<void*>(offsetof(HWVertex, texcoord))); glVertexAttribIPointer(2, 2, GL_INT, sizeof(BatchVertex), reinterpret_cast<void*>(offsetof(BatchVertex, texcoord)));
glVertexAttribIPointer(3, 1, GL_INT, sizeof(HWVertex), reinterpret_cast<void*>(offsetof(HWVertex, texpage))); glVertexAttribIPointer(3, 1, GL_INT, sizeof(BatchVertex), reinterpret_cast<void*>(offsetof(BatchVertex, texpage)));
glBindVertexArray(0); glBindVertexArray(0);
glGenVertexArrays(1, &m_attributeless_vao_id); glGenVertexArrays(1, &m_attributeless_vao_id);
@ -280,31 +281,56 @@ void GPU_HW_OpenGL::CreateTextureBuffer()
bool GPU_HW_OpenGL::CompilePrograms() bool GPU_HW_OpenGL::CompilePrograms()
{ {
GPU_HW_ShaderGen shadergen(GPU_HW_ShaderGen::Backend::OpenGL, m_resolution_scale, m_true_color);
for (u32 render_mode = 0; render_mode < 4; render_mode++) for (u32 render_mode = 0; render_mode < 4; render_mode++)
{ {
for (u32 texture_mode = 0; texture_mode < 9; texture_mode++) for (u32 texture_mode = 0; texture_mode < 9; texture_mode++)
{ {
for (u8 dithering = 0; dithering < 2; dithering++) for (u8 dithering = 0; dithering < 2; dithering++)
{ {
if (!CompileProgram(m_render_programs[render_mode][texture_mode][dithering], const bool textured = (static_cast<TextureMode>(texture_mode) != TextureMode::Disabled);
static_cast<HWBatchRenderMode>(render_mode), static_cast<TextureMode>(texture_mode), const std::string vs = shadergen.GenerateBatchVertexShader(textured);
ConvertToBoolUnchecked(dithering))) const std::string fs = shadergen.GenerateBatchFragmentShader(static_cast<BatchRenderMode>(render_mode),
{ static_cast<TextureMode>(texture_mode),
ConvertToBoolUnchecked(dithering));
GL::Program& prog = m_render_programs[render_mode][texture_mode][dithering];
if (!prog.Compile(vs, fs))
return false; return false;
prog.BindAttribute(0, "a_pos");
prog.BindAttribute(1, "a_col0");
if (textured)
{
prog.BindAttribute(2, "a_texcoord");
prog.BindAttribute(3, "a_texpage");
}
prog.BindFragData(0, "o_col0");
if (!prog.Link())
return false;
prog.BindUniformBlock("UBOBlock", 1);
if (textured)
{
prog.Bind();
prog.RegisterUniform("samp0");
prog.Uniform1i(0, 0);
} }
} }
} }
} }
// TODO: Use string_view
for (u8 depth_24bit = 0; depth_24bit < 2; depth_24bit++) for (u8 depth_24bit = 0; depth_24bit < 2; depth_24bit++)
{ {
for (u8 interlaced = 0; interlaced < 2; interlaced++) for (u8 interlaced = 0; interlaced < 2; interlaced++)
{ {
GL::Program& prog = m_display_programs[depth_24bit][interlaced]; GL::Program& prog = m_display_programs[depth_24bit][interlaced];
const std::string vs = GenerateScreenQuadVertexShader(); const std::string vs = shadergen.GenerateScreenQuadVertexShader();
const std::string fs = const std::string fs = shadergen.GenerateDisplayFragmentShader(ConvertToBoolUnchecked(depth_24bit),
GenerateDisplayFragmentShader(ConvertToBoolUnchecked(depth_24bit), ConvertToBoolUnchecked(interlaced)); ConvertToBoolUnchecked(interlaced));
if (!prog.Compile(vs, fs)) if (!prog.Compile(vs, fs))
return false; return false;
@ -319,8 +345,11 @@ bool GPU_HW_OpenGL::CompilePrograms()
} }
} }
if (!m_vram_write_program.Compile(GenerateScreenQuadVertexShader(), GenerateVRAMWriteFragmentShader())) if (!m_vram_write_program.Compile(shadergen.GenerateScreenQuadVertexShader(),
shadergen.GenerateVRAMWriteFragmentShader()))
{
return false; return false;
}
m_vram_write_program.BindFragData(0, "o_col0"); m_vram_write_program.BindFragData(0, "o_col0");
if (!m_vram_write_program.Link()) if (!m_vram_write_program.Link())
@ -335,41 +364,7 @@ bool GPU_HW_OpenGL::CompilePrograms()
return true; return true;
} }
bool GPU_HW_OpenGL::CompileProgram(GL::Program& prog, HWBatchRenderMode render_mode, TextureMode texture_mode, void GPU_HW_OpenGL::SetDrawState(BatchRenderMode render_mode)
bool dithering)
{
const bool textured = texture_mode != TextureMode::Disabled;
const std::string vs = GenerateVertexShader(textured);
const std::string fs = GenerateFragmentShader(render_mode, texture_mode, dithering);
if (!prog.Compile(vs, fs))
return false;
prog.BindAttribute(0, "a_pos");
prog.BindAttribute(1, "a_col0");
if (textured)
{
prog.BindAttribute(2, "a_texcoord");
prog.BindAttribute(3, "a_texpage");
}
prog.BindFragData(0, "o_col0");
if (!prog.Link())
return false;
prog.BindUniformBlock("UBOBlock", 1);
if (textured)
{
prog.Bind();
prog.RegisterUniform("samp0");
prog.Uniform1i(0, 0);
}
return true;
}
void GPU_HW_OpenGL::SetDrawState(HWBatchRenderMode render_mode)
{ {
const GL::Program& prog = m_render_programs[static_cast<u8>(render_mode)][static_cast<u8>(m_batch.texture_mode)] const GL::Program& prog = m_render_programs[static_cast<u8>(render_mode)][static_cast<u8>(m_batch.texture_mode)]
[BoolToUInt8(m_batch.dithering)]; [BoolToUInt8(m_batch.dithering)];
@ -378,7 +373,7 @@ void GPU_HW_OpenGL::SetDrawState(HWBatchRenderMode render_mode)
if (m_batch.texture_mode != TextureMode::Disabled) if (m_batch.texture_mode != TextureMode::Disabled)
m_vram_read_texture->Bind(); m_vram_read_texture->Bind();
if (m_batch.transparency_mode == TransparencyMode::Disabled || render_mode == HWBatchRenderMode::OnlyOpaque) if (m_batch.transparency_mode == TransparencyMode::Disabled || render_mode == BatchRenderMode::OnlyOpaque)
{ {
glDisable(GL_BLEND); glDisable(GL_BLEND);
} }
@ -732,7 +727,7 @@ void GPU_HW_OpenGL::FlushRender()
m_stats.num_batches++; m_stats.num_batches++;
m_stats.num_vertices += vertex_count; m_stats.num_vertices += vertex_count;
m_vertex_stream_buffer->Unmap(vertex_count * sizeof(HWVertex)); m_vertex_stream_buffer->Unmap(vertex_count * sizeof(BatchVertex));
m_vertex_stream_buffer->Bind(); m_vertex_stream_buffer->Bind();
m_batch_start_vertex_ptr = nullptr; m_batch_start_vertex_ptr = nullptr;
m_batch_end_vertex_ptr = nullptr; m_batch_end_vertex_ptr = nullptr;
@ -742,9 +737,9 @@ void GPU_HW_OpenGL::FlushRender()
if (m_batch.NeedsTwoPassRendering()) if (m_batch.NeedsTwoPassRendering())
{ {
SetDrawState(HWBatchRenderMode::OnlyTransparent); SetDrawState(BatchRenderMode::OnlyTransparent);
glDrawArrays(gl_primitives[static_cast<u8>(m_batch.primitive)], 0, vertex_count); glDrawArrays(gl_primitives[static_cast<u8>(m_batch.primitive)], 0, vertex_count);
SetDrawState(HWBatchRenderMode::OnlyOpaque); SetDrawState(BatchRenderMode::OnlyOpaque);
glDrawArrays(gl_primitives[static_cast<u8>(m_batch.primitive)], 0, vertex_count); glDrawArrays(gl_primitives[static_cast<u8>(m_batch.primitive)], 0, vertex_count);
} }
else else

View file

@ -58,8 +58,7 @@ private:
void CreateTextureBuffer(); void CreateTextureBuffer();
bool CompilePrograms(); bool CompilePrograms();
bool CompileProgram(GL::Program& prog, HWBatchRenderMode render_mode, TextureMode texture_mode, bool dithering); void SetDrawState(BatchRenderMode render_mode);
void SetDrawState(HWBatchRenderMode render_mode);
void UploadUniformBlock(const void* data, u32 data_size); void UploadUniformBlock(const void* data, u32 data_size);
// downsample texture - used for readbacks at >1xIR. // downsample texture - used for readbacks at >1xIR.

View file

@ -0,0 +1,436 @@
#include "gpu_hw_shadergen.h"
GPU_HW_ShaderGen::GPU_HW_ShaderGen(Backend backend, u32 resolution_scale, bool true_color)
: m_backend(backend), m_resolution_scale(resolution_scale), m_true_color(true_color)
{
}
GPU_HW_ShaderGen::~GPU_HW_ShaderGen() = default;
static void DefineMacro(std::stringstream& ss, const char* name, bool enabled)
{
if (enabled)
ss << "#define " << name << " 1\n";
else
ss << "/* #define " << name << " 0 */\n";
}
void GPU_HW_ShaderGen::GenerateShaderHeader(std::stringstream& ss)
{
ss << "#version 330 core\n\n";
ss << "const int RESOLUTION_SCALE = " << m_resolution_scale << ";\n";
ss << "const ivec2 VRAM_SIZE = ivec2(" << GPU::VRAM_WIDTH << ", " << GPU::VRAM_HEIGHT << ") * RESOLUTION_SCALE;\n";
ss << "const vec2 RCP_VRAM_SIZE = vec2(1.0, 1.0) / vec2(VRAM_SIZE);\n";
ss << R"(
float fixYCoord(float y)
{
return 1.0 - RCP_VRAM_SIZE.y - y;
}
int fixYCoord(int y)
{
return VRAM_SIZE.y - y - 1;
}
uint RGBA8ToRGBA5551(vec4 v)
{
uint r = uint(v.r * 255.0) >> 3;
uint g = uint(v.g * 255.0) >> 3;
uint b = uint(v.b * 255.0) >> 3;
uint a = (v.a != 0.0) ? 1u : 0u;
return (r) | (g << 5) | (b << 10) | (a << 15);
}
vec4 RGBA5551ToRGBA8(uint v)
{
uint r = (v & 31u);
uint g = ((v >> 5) & 31u);
uint b = ((v >> 10) & 31u);
uint a = ((v >> 15) & 1u);
// repeat lower bits
r = (r << 3) | (r & 7u);
g = (g << 3) | (g & 7u);
b = (b << 3) | (b & 7u);
return vec4(float(r) / 255.0, float(g) / 255.0, float(b) / 255.0, float(a));
}
)";
}
void GPU_HW_ShaderGen::GenerateBatchUniformBuffer(std::stringstream& ss)
{
ss << R"(
uniform UBOBlock {
ivec2 u_pos_offset;
uvec2 u_texture_window_mask;
uvec2 u_texture_window_offset;
float u_src_alpha_factor;
float u_dst_alpha_factor;
};
)";
}
std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured)
{
std::stringstream ss;
GenerateShaderHeader(ss);
DefineMacro(ss, "TEXTURED", textured);
GenerateBatchUniformBuffer(ss);
ss << R"(
in ivec2 a_pos;
in vec4 a_col0;
in int a_texcoord;
in int a_texpage;
out vec3 v_col0;
#if TEXTURED
out vec2 v_tex0;
flat out ivec4 v_texpage;
#endif
void main()
{
// 0..+1023 -> -1..1
float pos_x = (float(a_pos.x + u_pos_offset.x) / 512.0) - 1.0;
float pos_y = (float(a_pos.y + u_pos_offset.y) / -256.0) + 1.0;
gl_Position = vec4(pos_x, pos_y, 0.0, 1.0);
v_col0 = a_col0.rgb;
#if TEXTURED
v_tex0 = vec2(float(a_texcoord & 0xFFFF), float(a_texcoord >> 16)) / vec2(255.0);
// base_x,base_y,palette_x,palette_y
v_texpage.x = (a_texpage & 15) * 64 * RESOLUTION_SCALE;
v_texpage.y = ((a_texpage >> 4) & 1) * 256 * RESOLUTION_SCALE;
v_texpage.z = ((a_texpage >> 16) & 63) * 16 * RESOLUTION_SCALE;
v_texpage.w = ((a_texpage >> 22) & 511) * RESOLUTION_SCALE;
#endif
}
)";
return ss.str();
}
std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMode transparency,
GPU::TextureMode texture_mode, bool dithering)
{
const GPU::TextureMode actual_texture_mode = texture_mode & ~GPU::TextureMode::RawTextureBit;
const bool raw_texture = (texture_mode & GPU::TextureMode::RawTextureBit) == GPU::TextureMode::RawTextureBit;
std::stringstream ss;
GenerateShaderHeader(ss);
GenerateBatchUniformBuffer(ss);
DefineMacro(ss, "TRANSPARENCY", transparency != GPU_HW::BatchRenderMode::TransparencyDisabled);
DefineMacro(ss, "TRANSPARENCY_ONLY_OPAQUE", transparency == GPU_HW::BatchRenderMode::OnlyOpaque);
DefineMacro(ss, "TRANSPARENCY_ONLY_TRANSPARENCY", transparency == GPU_HW::BatchRenderMode::OnlyTransparent);
DefineMacro(ss, "TEXTURED", actual_texture_mode != GPU::TextureMode::Disabled);
DefineMacro(ss, "PALETTE",
actual_texture_mode == GPU::TextureMode::Palette4Bit ||
actual_texture_mode == GPU::TextureMode::Palette8Bit);
DefineMacro(ss, "PALETTE_4_BIT", actual_texture_mode == GPU::TextureMode::Palette4Bit);
DefineMacro(ss, "PALETTE_8_BIT", actual_texture_mode == GPU::TextureMode::Palette8Bit);
DefineMacro(ss, "RAW_TEXTURE", raw_texture);
DefineMacro(ss, "DITHERING", dithering);
DefineMacro(ss, "TRUE_COLOR", m_true_color);
ss << "const int[16] s_dither_values = int[16]( ";
for (u32 i = 0; i < 16; i++)
{
if (i > 0)
ss << ", ";
ss << GPU::DITHER_MATRIX[i / 4][i % 4];
}
ss << " );\n";
ss << R"(
in vec3 v_col0;
#if TEXTURED
in vec2 v_tex0;
flat in ivec4 v_texpage;
uniform sampler2D samp0;
#endif
out vec4 o_col0;
ivec3 ApplyDithering(ivec3 icol)
{
ivec2 fc = (ivec2(gl_FragCoord.xy) / ivec2(RESOLUTION_SCALE, RESOLUTION_SCALE)) & ivec2(3, 3);
int offset = s_dither_values[fc.y * 4 + fc.x];
return icol + ivec3(offset, offset, offset);
}
ivec3 TruncateTo15Bit(ivec3 icol)
{
icol = clamp(icol, ivec3(0, 0, 0), ivec3(255, 255, 255));
return (icol & ivec3(~7, ~7, ~7)) | ((icol >> 3) & ivec3(7, 7, 7));
}
#if TEXTURED
ivec2 ApplyNativeTextureWindow(ivec2 coords)
{
uint x = (uint(coords.x) & ~(u_texture_window_mask.x * 8u)) | ((u_texture_window_offset.x & u_texture_window_mask.x) * 8u);
uint y = (uint(coords.y) & ~(u_texture_window_mask.y * 8u)) | ((u_texture_window_offset.y & u_texture_window_mask.y) * 8u);
return ivec2(int(x), int(y));
}
ivec2 ApplyTextureWindow(ivec2 coords)
{
if (RESOLUTION_SCALE == 1)
return ApplyNativeTextureWindow(coords);
ivec2 downscaled_coords = coords / ivec2(RESOLUTION_SCALE);
ivec2 coords_offset = coords % ivec2(RESOLUTION_SCALE);
return (ApplyNativeTextureWindow(downscaled_coords) * ivec2(RESOLUTION_SCALE)) + coords_offset;
}
ivec4 SampleFromVRAM(vec2 coord)
{
// from 0..1 to 0..255
ivec2 icoord = ivec2(coord * vec2(255 * RESOLUTION_SCALE));
icoord = ApplyTextureWindow(icoord);
// adjust for tightly packed palette formats
ivec2 index_coord = icoord;
#if PALETTE_4_BIT
index_coord.x /= 4;
#elif PALETTE_8_BIT
index_coord.x /= 2;
#endif
// fixup coords
ivec2 vicoord = ivec2(v_texpage.x + index_coord.x, fixYCoord(v_texpage.y + index_coord.y));
// load colour/palette
vec4 color = texelFetch(samp0, vicoord, 0);
// apply palette
#if PALETTE
#if PALETTE_4_BIT
int subpixel = int(icoord.x / RESOLUTION_SCALE) & 3;
uint vram_value = RGBA8ToRGBA5551(color);
int palette_index = int((vram_value >> (subpixel * 4)) & 0x0Fu);
#elif PALETTE_8_BIT
int subpixel = int(icoord.x / RESOLUTION_SCALE) & 1;
uint vram_value = RGBA8ToRGBA5551(color);
int palette_index = int((vram_value >> (subpixel * 8)) & 0xFFu);
#endif
ivec2 palette_icoord = ivec2(v_texpage.z + (palette_index * RESOLUTION_SCALE), fixYCoord(v_texpage.w));
color = texelFetch(samp0, palette_icoord, 0);
#endif
return ivec4(color * vec4(255.0, 255.0, 255.0, 255.0));
}
#endif
void main()
{
ivec3 vertcol = ivec3(v_col0 * vec3(255.0, 255.0, 255.0));
bool semitransparent;
bool new_mask_bit;
ivec3 icolor;
#if TEXTURED
ivec4 texcol = SampleFromVRAM(v_tex0);
if (texcol == ivec4(0.0, 0.0, 0.0, 0.0))
discard;
// Grab semitransparent bit from the texture color.
semitransparent = (texcol.a != 0);
#if RAW_TEXTURE
icolor = texcol.rgb;
#else
icolor = (vertcol * texcol.rgb) >> 7;
#endif
#else
// All pixels are semitransparent for untextured polygons.
semitransparent = true;
icolor = vertcol;
#endif
// Apply dithering
#if DITHERING
icolor = ApplyDithering(icolor);
#endif
// Clip to 15-bit range
#if !TRUE_COLOR
icolor = TruncateTo15Bit(icolor);
#endif
// Normalize
vec3 color = vec3(icolor) / vec3(255.0, 255.0, 255.0);
#if TRANSPARENCY
// Apply semitransparency. If not a semitransparent texel, destination alpha is ignored.
if (semitransparent)
{
#if TRANSPARENCY_ONLY_OPAQUE
discard;
#endif
o_col0 = vec4(color * u_src_alpha_factor, u_dst_alpha_factor);
}
else
{
#if TRANSPARENCY_ONLY_TRANSPARENCY
discard;
#endif
o_col0 = vec4(color, 0.0);
}
#else
o_col0 = vec4(color, 0.0);
#endif
}
)";
return ss.str();
}
std::string GPU_HW_ShaderGen::GenerateScreenQuadVertexShader()
{
std::stringstream ss;
GenerateShaderHeader(ss);
ss << R"(
out vec2 v_tex0;
void main()
{
v_tex0 = vec2(float((gl_VertexID << 1) & 2), float(gl_VertexID & 2));
gl_Position = vec4(v_tex0 * vec2(2.0f, -2.0f) + vec2(-1.0f, 1.0f), 0.0f, 1.0f);
gl_Position.y = -gl_Position.y;
}
)";
return ss.str();
}
std::string GPU_HW_ShaderGen::GenerateFillFragmentShader()
{
std::stringstream ss;
GenerateShaderHeader(ss);
ss << R"(
uniform vec4 fill_color;
out vec4 o_col0;
void main()
{
o_col0 = fill_color;
}
)";
return ss.str();
}
std::string GPU_HW_ShaderGen::GenerateDisplayFragmentShader(bool depth_24bit, bool interlaced)
{
std::stringstream ss;
GenerateShaderHeader(ss);
DefineMacro(ss, "DEPTH_24BIT", depth_24bit);
DefineMacro(ss, "INTERLACED", interlaced);
ss << R"(
in vec2 v_tex0;
out vec4 o_col0;
uniform sampler2D samp0;
uniform ivec3 u_base_coords;
ivec2 GetCoords(vec2 fragcoord)
{
ivec2 icoords = ivec2(fragcoord);
#if INTERLACED
if ((((icoords.y - u_base_coords.z) / RESOLUTION_SCALE) & 1) != 0)
discard;
#endif
return icoords;
}
void main()
{
ivec2 icoords = GetCoords(gl_FragCoord.xy);
#if DEPTH_24BIT
// compute offset in dwords from the start of the 24-bit values
ivec2 base = ivec2(u_base_coords.x, u_base_coords.y + icoords.y);
int xoff = int(icoords.x);
int dword_index = (xoff / 2) + (xoff / 4);
// sample two adjacent dwords, or four 16-bit values as the 24-bit value will lie somewhere between these
uint s0 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + dword_index * 2 + 0, base.y), 0));
uint s1 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + dword_index * 2 + 1, base.y), 0));
uint s2 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + (dword_index + 1) * 2 + 0, base.y), 0));
uint s3 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + (dword_index + 1) * 2 + 1, base.y), 0));
// select the bit for this pixel depending on its offset in the 4-pixel block
uint r, g, b;
int block_offset = xoff & 3;
if (block_offset == 0)
{
r = s0 & 0xFFu;
g = s0 >> 8;
b = s1 & 0xFFu;
}
else if (block_offset == 1)
{
r = s1 >> 8;
g = s2 & 0xFFu;
b = s2 >> 8;
}
else if (block_offset == 2)
{
r = s1 & 0xFFu;
g = s1 >> 8;
b = s2 & 0xFFu;
}
else
{
r = s2 >> 8;
g = s3 & 0xFFu;
b = s3 >> 8;
}
// and normalize
o_col0 = vec4(float(r) / 255.0, float(g) / 255.0, float(b) / 255.0, 1.0);
#else
// load and return
o_col0 = texelFetch(samp0, u_base_coords.xy + icoords, 0);
#endif
}
)";
return ss.str();
}
std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader()
{
std::stringstream ss;
GenerateShaderHeader(ss);
ss << R"(
uniform ivec2 u_base_coords;
uniform ivec2 u_size;
uniform usamplerBuffer samp0;
out vec4 o_col0;
void main()
{
ivec2 coords = ivec2(gl_FragCoord.xy) / ivec2(RESOLUTION_SCALE, RESOLUTION_SCALE);
ivec2 offset = coords - u_base_coords;
offset.y = u_size.y - offset.y - 1;
int buffer_offset = offset.y * u_size.x + offset.x;
uint value = texelFetch(samp0, buffer_offset).r;
o_col0 = RGBA5551ToRGBA8(value);
})";
return ss.str();
}

View file

@ -0,0 +1,34 @@
#pragma once
#include <sstream>
#include <string>
#include "gpu_hw.h"
class GPU_HW_ShaderGen
{
public:
enum class Backend
{
OpenGL
};
public:
GPU_HW_ShaderGen(Backend backend, u32 resolution_scale, bool true_color);
~GPU_HW_ShaderGen();
void Init(Backend backend, u32 resolution_scale, bool true_color);
std::string GenerateBatchVertexShader(bool textured);
std::string GenerateBatchFragmentShader(GPU_HW::BatchRenderMode transparency, GPU::TextureMode texture_mode, bool dithering);
std::string GenerateScreenQuadVertexShader();
std::string GenerateFillFragmentShader();
std::string GenerateDisplayFragmentShader(bool depth_24bit, bool interlaced);
std::string GenerateVRAMWriteFragmentShader();
Backend m_backend;
u32 m_resolution_scale;
bool m_true_color;
private:
void GenerateShaderHeader(std::stringstream& ss);
void GenerateBatchUniformBuffer(std::stringstream& ss);
};