mirror of
https://github.com/RetroDECK/Duckstation.git
synced 2024-11-30 01:25:51 +00:00
GPU/HW: Split shadergen to seperate class
This commit is contained in:
parent
91c99f0226
commit
be81d08109
|
@ -21,6 +21,8 @@ add_library(core
|
||||||
gpu_hw.h
|
gpu_hw.h
|
||||||
gpu_hw_opengl.cpp
|
gpu_hw_opengl.cpp
|
||||||
gpu_hw_opengl.h
|
gpu_hw_opengl.h
|
||||||
|
gpu_hw_shadergen.cpp
|
||||||
|
gpu_hw_shadergen.h
|
||||||
gpu_sw.cpp
|
gpu_sw.cpp
|
||||||
gpu_sw.h
|
gpu_sw.h
|
||||||
gte.cpp
|
gte.cpp
|
||||||
|
|
|
@ -41,6 +41,7 @@
|
||||||
<ClCompile Include="cpu_disasm.cpp" />
|
<ClCompile Include="cpu_disasm.cpp" />
|
||||||
<ClCompile Include="digital_controller.cpp" />
|
<ClCompile Include="digital_controller.cpp" />
|
||||||
<ClCompile Include="gpu_commands.cpp" />
|
<ClCompile Include="gpu_commands.cpp" />
|
||||||
|
<ClCompile Include="gpu_hw_shadergen.cpp" />
|
||||||
<ClCompile Include="gpu_sw.cpp" />
|
<ClCompile Include="gpu_sw.cpp" />
|
||||||
<ClCompile Include="gte.cpp" />
|
<ClCompile Include="gte.cpp" />
|
||||||
<ClCompile Include="dma.cpp" />
|
<ClCompile Include="dma.cpp" />
|
||||||
|
@ -64,6 +65,7 @@
|
||||||
<ClInclude Include="cpu_core.h" />
|
<ClInclude Include="cpu_core.h" />
|
||||||
<ClInclude Include="cpu_disasm.h" />
|
<ClInclude Include="cpu_disasm.h" />
|
||||||
<ClInclude Include="digital_controller.h" />
|
<ClInclude Include="digital_controller.h" />
|
||||||
|
<ClInclude Include="gpu_hw_shadergen.h" />
|
||||||
<ClInclude Include="gpu_sw.h" />
|
<ClInclude Include="gpu_sw.h" />
|
||||||
<ClInclude Include="gte.h" />
|
<ClInclude Include="gte.h" />
|
||||||
<ClInclude Include="cpu_types.h" />
|
<ClInclude Include="cpu_types.h" />
|
||||||
|
|
|
@ -23,6 +23,7 @@
|
||||||
<ClCompile Include="settings.cpp" />
|
<ClCompile Include="settings.cpp" />
|
||||||
<ClCompile Include="gpu_commands.cpp" />
|
<ClCompile Include="gpu_commands.cpp" />
|
||||||
<ClCompile Include="gpu_sw.cpp" />
|
<ClCompile Include="gpu_sw.cpp" />
|
||||||
|
<ClCompile Include="gpu_hw_shadergen.cpp" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<ClInclude Include="types.h" />
|
<ClInclude Include="types.h" />
|
||||||
|
@ -50,6 +51,7 @@
|
||||||
<ClInclude Include="memory_card.h" />
|
<ClInclude Include="memory_card.h" />
|
||||||
<ClInclude Include="settings.h" />
|
<ClInclude Include="settings.h" />
|
||||||
<ClInclude Include="gpu_sw.h" />
|
<ClInclude Include="gpu_sw.h" />
|
||||||
|
<ClInclude Include="gpu_hw_shadergen.h" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<None Include="cpu_core.inl" />
|
<None Include="cpu_core.inl" />
|
||||||
|
|
117
src/core/gpu.h
117
src/core/gpu.h
|
@ -18,6 +18,57 @@ class Timers;
|
||||||
class GPU
|
class GPU
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
enum class DMADirection : u32
|
||||||
|
{
|
||||||
|
Off = 0,
|
||||||
|
FIFO = 1,
|
||||||
|
CPUtoGP0 = 2,
|
||||||
|
GPUREADtoCPU = 3
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class Primitive : u8
|
||||||
|
{
|
||||||
|
Reserved = 0,
|
||||||
|
Polygon = 1,
|
||||||
|
Line = 2,
|
||||||
|
Rectangle = 3
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class DrawRectangleSize : u8
|
||||||
|
{
|
||||||
|
Variable = 0,
|
||||||
|
R1x1 = 1,
|
||||||
|
R8x8 = 2,
|
||||||
|
R16x16 = 3
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class TextureMode : u8
|
||||||
|
{
|
||||||
|
Palette4Bit = 0,
|
||||||
|
Palette8Bit = 1,
|
||||||
|
Direct16Bit = 2,
|
||||||
|
Reserved_Direct16Bit = 3,
|
||||||
|
|
||||||
|
// Not register values.
|
||||||
|
RawTextureBit = 4,
|
||||||
|
RawPalette4Bit = RawTextureBit | Palette4Bit,
|
||||||
|
RawPalette8Bit = RawTextureBit | Palette8Bit,
|
||||||
|
RawDirect16Bit = RawTextureBit | Direct16Bit,
|
||||||
|
Reserved_RawDirect16Bit = RawTextureBit | Reserved_Direct16Bit,
|
||||||
|
|
||||||
|
Disabled = 8 // Not a register value
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class TransparencyMode : u8
|
||||||
|
{
|
||||||
|
HalfBackgroundPlusHalfForeground = 0,
|
||||||
|
BackgroundPlusForeground = 1,
|
||||||
|
BackgroundMinusForeground = 2,
|
||||||
|
BackgroundPlusQuarterForeground = 3,
|
||||||
|
|
||||||
|
Disabled = 4 // Not a register value
|
||||||
|
};
|
||||||
|
|
||||||
enum : u32
|
enum : u32
|
||||||
{
|
{
|
||||||
VRAM_WIDTH = 1024,
|
VRAM_WIDTH = 1024,
|
||||||
|
@ -29,6 +80,13 @@ public:
|
||||||
HBLANK_TIMER_INDEX = 1
|
HBLANK_TIMER_INDEX = 1
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// 4x4 dither matrix.
|
||||||
|
static constexpr s32 DITHER_MATRIX[4][4] = {{-4, +0, -3, +1}, // row 0
|
||||||
|
{+2, -2, +3, -1}, // row 1
|
||||||
|
{-3, +1, -4, +0}, // row 2
|
||||||
|
{+4, -1, +2, -2}}; // row 3
|
||||||
|
|
||||||
|
// Base class constructor.
|
||||||
GPU();
|
GPU();
|
||||||
virtual ~GPU();
|
virtual ~GPU();
|
||||||
|
|
||||||
|
@ -112,57 +170,6 @@ protected:
|
||||||
static bool DumpVRAMToFile(const char* filename, u32 width, u32 height, u32 stride, const void* buffer,
|
static bool DumpVRAMToFile(const char* filename, u32 width, u32 height, u32 stride, const void* buffer,
|
||||||
bool remove_alpha);
|
bool remove_alpha);
|
||||||
|
|
||||||
enum class DMADirection : u32
|
|
||||||
{
|
|
||||||
Off = 0,
|
|
||||||
FIFO = 1,
|
|
||||||
CPUtoGP0 = 2,
|
|
||||||
GPUREADtoCPU = 3
|
|
||||||
};
|
|
||||||
|
|
||||||
enum class Primitive : u8
|
|
||||||
{
|
|
||||||
Reserved = 0,
|
|
||||||
Polygon = 1,
|
|
||||||
Line = 2,
|
|
||||||
Rectangle = 3
|
|
||||||
};
|
|
||||||
|
|
||||||
enum class DrawRectangleSize : u8
|
|
||||||
{
|
|
||||||
Variable = 0,
|
|
||||||
R1x1 = 1,
|
|
||||||
R8x8 = 2,
|
|
||||||
R16x16 = 3
|
|
||||||
};
|
|
||||||
|
|
||||||
enum class TextureMode : u8
|
|
||||||
{
|
|
||||||
Palette4Bit = 0,
|
|
||||||
Palette8Bit = 1,
|
|
||||||
Direct16Bit = 2,
|
|
||||||
Reserved_Direct16Bit = 3,
|
|
||||||
|
|
||||||
// Not register values.
|
|
||||||
RawTextureBit = 4,
|
|
||||||
RawPalette4Bit = RawTextureBit | Palette4Bit,
|
|
||||||
RawPalette8Bit = RawTextureBit | Palette8Bit,
|
|
||||||
RawDirect16Bit = RawTextureBit | Direct16Bit,
|
|
||||||
Reserved_RawDirect16Bit = RawTextureBit | Reserved_Direct16Bit,
|
|
||||||
|
|
||||||
Disabled = 8 // Not a register value
|
|
||||||
};
|
|
||||||
|
|
||||||
enum class TransparencyMode : u8
|
|
||||||
{
|
|
||||||
HalfBackgroundPlusHalfForeground = 0,
|
|
||||||
BackgroundPlusForeground = 1,
|
|
||||||
BackgroundMinusForeground = 2,
|
|
||||||
BackgroundPlusQuarterForeground = 3,
|
|
||||||
|
|
||||||
Disabled = 4 // Not a register value
|
|
||||||
};
|
|
||||||
|
|
||||||
union RenderCommand
|
union RenderCommand
|
||||||
{
|
{
|
||||||
u32 bits;
|
u32 bits;
|
||||||
|
@ -258,12 +265,6 @@ protected:
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// 4x4 dither matrix.
|
|
||||||
static constexpr s32 DITHER_MATRIX[4][4] = {{-4, +0, -3, +1}, // row 0
|
|
||||||
{+2, -2, +3, -1}, // row 1
|
|
||||||
{-3, +1, -4, +0}, // row 2
|
|
||||||
{+4, -1, +2, -2}}; // row 3
|
|
||||||
|
|
||||||
void SoftReset();
|
void SoftReset();
|
||||||
|
|
||||||
// Sets dots per scanline
|
// Sets dots per scanline
|
||||||
|
@ -464,3 +465,5 @@ private:
|
||||||
|
|
||||||
static const GP0CommandHandlerTable s_GP0_command_handler_table;
|
static const GP0CommandHandlerTable s_GP0_command_handler_table;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
IMPLEMENT_ENUM_CLASS_BITWISE_OPERATORS(GPU::TextureMode);
|
||||||
|
|
|
@ -163,7 +163,7 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command
|
||||||
|
|
||||||
void GPU_HW::AddDuplicateVertex()
|
void GPU_HW::AddDuplicateVertex()
|
||||||
{
|
{
|
||||||
std::memcpy(m_batch_current_vertex_ptr, m_batch_current_vertex_ptr - 1, sizeof(HWVertex));
|
std::memcpy(m_batch_current_vertex_ptr, m_batch_current_vertex_ptr - 1, sizeof(BatchVertex));
|
||||||
m_batch_current_vertex_ptr++;
|
m_batch_current_vertex_ptr++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -175,443 +175,14 @@ void GPU_HW::CalcScissorRect(int* left, int* top, int* right, int* bottom)
|
||||||
*bottom = std::max<u32>((m_drawing_area.bottom + 1) * m_resolution_scale, *top + 1);
|
*bottom = std::max<u32>((m_drawing_area.bottom + 1) * m_resolution_scale, *top + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void DefineMacro(std::stringstream& ss, const char* name, bool enabled)
|
GPU_HW::BatchPrimitive GPU_HW::GetPrimitiveForCommand(RenderCommand rc)
|
||||||
{
|
|
||||||
if (enabled)
|
|
||||||
ss << "#define " << name << " 1\n";
|
|
||||||
else
|
|
||||||
ss << "/* #define " << name << " 0 */\n";
|
|
||||||
}
|
|
||||||
|
|
||||||
void GPU_HW::GenerateShaderHeader(std::stringstream& ss)
|
|
||||||
{
|
|
||||||
ss << "#version 330 core\n\n";
|
|
||||||
ss << "const int RESOLUTION_SCALE = " << m_resolution_scale << ";\n";
|
|
||||||
ss << "const ivec2 VRAM_SIZE = ivec2(" << VRAM_WIDTH << ", " << VRAM_HEIGHT << ") * RESOLUTION_SCALE;\n";
|
|
||||||
ss << "const vec2 RCP_VRAM_SIZE = vec2(1.0, 1.0) / vec2(VRAM_SIZE);\n";
|
|
||||||
ss << R"(
|
|
||||||
|
|
||||||
float fixYCoord(float y)
|
|
||||||
{
|
|
||||||
return 1.0 - RCP_VRAM_SIZE.y - y;
|
|
||||||
}
|
|
||||||
|
|
||||||
int fixYCoord(int y)
|
|
||||||
{
|
|
||||||
return VRAM_SIZE.y - y - 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint RGBA8ToRGBA5551(vec4 v)
|
|
||||||
{
|
|
||||||
uint r = uint(v.r * 255.0) >> 3;
|
|
||||||
uint g = uint(v.g * 255.0) >> 3;
|
|
||||||
uint b = uint(v.b * 255.0) >> 3;
|
|
||||||
uint a = (v.a != 0.0) ? 1u : 0u;
|
|
||||||
return (r) | (g << 5) | (b << 10) | (a << 15);
|
|
||||||
}
|
|
||||||
|
|
||||||
vec4 RGBA5551ToRGBA8(uint v)
|
|
||||||
{
|
|
||||||
uint r = (v & 31u);
|
|
||||||
uint g = ((v >> 5) & 31u);
|
|
||||||
uint b = ((v >> 10) & 31u);
|
|
||||||
uint a = ((v >> 15) & 1u);
|
|
||||||
|
|
||||||
// repeat lower bits
|
|
||||||
r = (r << 3) | (r & 7u);
|
|
||||||
g = (g << 3) | (g & 7u);
|
|
||||||
b = (b << 3) | (b & 7u);
|
|
||||||
|
|
||||||
return vec4(float(r) / 255.0, float(g) / 255.0, float(b) / 255.0, float(a));
|
|
||||||
}
|
|
||||||
)";
|
|
||||||
}
|
|
||||||
|
|
||||||
void GPU_HW::GenerateBatchUniformBuffer(std::stringstream& ss)
|
|
||||||
{
|
|
||||||
ss << R"(
|
|
||||||
uniform UBOBlock {
|
|
||||||
ivec2 u_pos_offset;
|
|
||||||
uvec2 u_texture_window_mask;
|
|
||||||
uvec2 u_texture_window_offset;
|
|
||||||
float u_src_alpha_factor;
|
|
||||||
float u_dst_alpha_factor;
|
|
||||||
};
|
|
||||||
)";
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string GPU_HW::GenerateVertexShader(bool textured)
|
|
||||||
{
|
|
||||||
std::stringstream ss;
|
|
||||||
GenerateShaderHeader(ss);
|
|
||||||
DefineMacro(ss, "TEXTURED", textured);
|
|
||||||
GenerateBatchUniformBuffer(ss);
|
|
||||||
|
|
||||||
ss << R"(
|
|
||||||
in ivec2 a_pos;
|
|
||||||
in vec4 a_col0;
|
|
||||||
in int a_texcoord;
|
|
||||||
in int a_texpage;
|
|
||||||
|
|
||||||
out vec3 v_col0;
|
|
||||||
#if TEXTURED
|
|
||||||
out vec2 v_tex0;
|
|
||||||
flat out ivec4 v_texpage;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
void main()
|
|
||||||
{
|
|
||||||
// 0..+1023 -> -1..1
|
|
||||||
float pos_x = (float(a_pos.x + u_pos_offset.x) / 512.0) - 1.0;
|
|
||||||
float pos_y = (float(a_pos.y + u_pos_offset.y) / -256.0) + 1.0;
|
|
||||||
gl_Position = vec4(pos_x, pos_y, 0.0, 1.0);
|
|
||||||
|
|
||||||
v_col0 = a_col0.rgb;
|
|
||||||
#if TEXTURED
|
|
||||||
v_tex0 = vec2(float(a_texcoord & 0xFFFF), float(a_texcoord >> 16)) / vec2(255.0);
|
|
||||||
|
|
||||||
// base_x,base_y,palette_x,palette_y
|
|
||||||
v_texpage.x = (a_texpage & 15) * 64 * RESOLUTION_SCALE;
|
|
||||||
v_texpage.y = ((a_texpage >> 4) & 1) * 256 * RESOLUTION_SCALE;
|
|
||||||
v_texpage.z = ((a_texpage >> 16) & 63) * 16 * RESOLUTION_SCALE;
|
|
||||||
v_texpage.w = ((a_texpage >> 22) & 511) * RESOLUTION_SCALE;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
)";
|
|
||||||
|
|
||||||
return ss.str();
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string GPU_HW::GenerateFragmentShader(HWBatchRenderMode transparency, TextureMode texture_mode, bool dithering)
|
|
||||||
{
|
|
||||||
const TextureMode actual_texture_mode =
|
|
||||||
static_cast<TextureMode>(static_cast<u8>(texture_mode) & ~static_cast<u8>(TextureMode::RawTextureBit));
|
|
||||||
const bool raw_texture = (static_cast<u8>(texture_mode) & static_cast<u8>(TextureMode::RawTextureBit)) ==
|
|
||||||
static_cast<u8>(TextureMode::RawTextureBit);
|
|
||||||
|
|
||||||
std::stringstream ss;
|
|
||||||
GenerateShaderHeader(ss);
|
|
||||||
GenerateBatchUniformBuffer(ss);
|
|
||||||
DefineMacro(ss, "TRANSPARENCY", transparency != HWBatchRenderMode::TransparencyDisabled);
|
|
||||||
DefineMacro(ss, "TRANSPARENCY_ONLY_OPAQUE", transparency == HWBatchRenderMode::OnlyOpaque);
|
|
||||||
DefineMacro(ss, "TRANSPARENCY_ONLY_TRANSPARENCY", transparency == HWBatchRenderMode::OnlyTransparent);
|
|
||||||
DefineMacro(ss, "TEXTURED", actual_texture_mode != TextureMode::Disabled);
|
|
||||||
DefineMacro(ss, "PALETTE",
|
|
||||||
actual_texture_mode == GPU::TextureMode::Palette4Bit ||
|
|
||||||
actual_texture_mode == GPU::TextureMode::Palette8Bit);
|
|
||||||
DefineMacro(ss, "PALETTE_4_BIT", actual_texture_mode == GPU::TextureMode::Palette4Bit);
|
|
||||||
DefineMacro(ss, "PALETTE_8_BIT", actual_texture_mode == GPU::TextureMode::Palette8Bit);
|
|
||||||
DefineMacro(ss, "RAW_TEXTURE", raw_texture);
|
|
||||||
DefineMacro(ss, "DITHERING", dithering);
|
|
||||||
DefineMacro(ss, "TRUE_COLOR", m_true_color);
|
|
||||||
|
|
||||||
ss << "const int[16] s_dither_values = int[16]( ";
|
|
||||||
for (u32 i = 0; i < 16; i++)
|
|
||||||
{
|
|
||||||
if (i > 0)
|
|
||||||
ss << ", ";
|
|
||||||
ss << DITHER_MATRIX[i / 4][i % 4];
|
|
||||||
}
|
|
||||||
ss << " );\n";
|
|
||||||
|
|
||||||
ss << R"(
|
|
||||||
in vec3 v_col0;
|
|
||||||
#if TEXTURED
|
|
||||||
in vec2 v_tex0;
|
|
||||||
flat in ivec4 v_texpage;
|
|
||||||
uniform sampler2D samp0;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
out vec4 o_col0;
|
|
||||||
|
|
||||||
ivec3 ApplyDithering(ivec3 icol)
|
|
||||||
{
|
|
||||||
ivec2 fc = (ivec2(gl_FragCoord.xy) / ivec2(RESOLUTION_SCALE, RESOLUTION_SCALE)) & ivec2(3, 3);
|
|
||||||
int offset = s_dither_values[fc.y * 4 + fc.x];
|
|
||||||
return icol + ivec3(offset, offset, offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
ivec3 TruncateTo15Bit(ivec3 icol)
|
|
||||||
{
|
|
||||||
icol = clamp(icol, ivec3(0, 0, 0), ivec3(255, 255, 255));
|
|
||||||
return (icol & ivec3(~7, ~7, ~7)) | ((icol >> 3) & ivec3(7, 7, 7));
|
|
||||||
}
|
|
||||||
|
|
||||||
#if TEXTURED
|
|
||||||
ivec2 ApplyNativeTextureWindow(ivec2 coords)
|
|
||||||
{
|
|
||||||
uint x = (uint(coords.x) & ~(u_texture_window_mask.x * 8u)) | ((u_texture_window_offset.x & u_texture_window_mask.x) * 8u);
|
|
||||||
uint y = (uint(coords.y) & ~(u_texture_window_mask.y * 8u)) | ((u_texture_window_offset.y & u_texture_window_mask.y) * 8u);
|
|
||||||
return ivec2(int(x), int(y));
|
|
||||||
}
|
|
||||||
|
|
||||||
ivec2 ApplyTextureWindow(ivec2 coords)
|
|
||||||
{
|
|
||||||
if (RESOLUTION_SCALE == 1)
|
|
||||||
return ApplyNativeTextureWindow(coords);
|
|
||||||
|
|
||||||
ivec2 downscaled_coords = coords / ivec2(RESOLUTION_SCALE);
|
|
||||||
ivec2 coords_offset = coords % ivec2(RESOLUTION_SCALE);
|
|
||||||
return (ApplyNativeTextureWindow(downscaled_coords) * ivec2(RESOLUTION_SCALE)) + coords_offset;
|
|
||||||
}
|
|
||||||
|
|
||||||
ivec4 SampleFromVRAM(vec2 coord)
|
|
||||||
{
|
|
||||||
// from 0..1 to 0..255
|
|
||||||
ivec2 icoord = ivec2(coord * vec2(255 * RESOLUTION_SCALE));
|
|
||||||
icoord = ApplyTextureWindow(icoord);
|
|
||||||
|
|
||||||
// adjust for tightly packed palette formats
|
|
||||||
ivec2 index_coord = icoord;
|
|
||||||
#if PALETTE_4_BIT
|
|
||||||
index_coord.x /= 4;
|
|
||||||
#elif PALETTE_8_BIT
|
|
||||||
index_coord.x /= 2;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// fixup coords
|
|
||||||
ivec2 vicoord = ivec2(v_texpage.x + index_coord.x, fixYCoord(v_texpage.y + index_coord.y));
|
|
||||||
|
|
||||||
// load colour/palette
|
|
||||||
vec4 color = texelFetch(samp0, vicoord, 0);
|
|
||||||
|
|
||||||
// apply palette
|
|
||||||
#if PALETTE
|
|
||||||
#if PALETTE_4_BIT
|
|
||||||
int subpixel = int(icoord.x / RESOLUTION_SCALE) & 3;
|
|
||||||
uint vram_value = RGBA8ToRGBA5551(color);
|
|
||||||
int palette_index = int((vram_value >> (subpixel * 4)) & 0x0Fu);
|
|
||||||
#elif PALETTE_8_BIT
|
|
||||||
int subpixel = int(icoord.x / RESOLUTION_SCALE) & 1;
|
|
||||||
uint vram_value = RGBA8ToRGBA5551(color);
|
|
||||||
int palette_index = int((vram_value >> (subpixel * 8)) & 0xFFu);
|
|
||||||
#endif
|
|
||||||
ivec2 palette_icoord = ivec2(v_texpage.z + (palette_index * RESOLUTION_SCALE), fixYCoord(v_texpage.w));
|
|
||||||
color = texelFetch(samp0, palette_icoord, 0);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return ivec4(color * vec4(255.0, 255.0, 255.0, 255.0));
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
void main()
|
|
||||||
{
|
|
||||||
ivec3 vertcol = ivec3(v_col0 * vec3(255.0, 255.0, 255.0));
|
|
||||||
|
|
||||||
bool semitransparent;
|
|
||||||
bool new_mask_bit;
|
|
||||||
ivec3 icolor;
|
|
||||||
|
|
||||||
#if TEXTURED
|
|
||||||
ivec4 texcol = SampleFromVRAM(v_tex0);
|
|
||||||
if (texcol == ivec4(0.0, 0.0, 0.0, 0.0))
|
|
||||||
discard;
|
|
||||||
|
|
||||||
// Grab semitransparent bit from the texture color.
|
|
||||||
semitransparent = (texcol.a != 0);
|
|
||||||
|
|
||||||
#if RAW_TEXTURE
|
|
||||||
icolor = texcol.rgb;
|
|
||||||
#else
|
|
||||||
icolor = (vertcol * texcol.rgb) >> 7;
|
|
||||||
#endif
|
|
||||||
#else
|
|
||||||
// All pixels are semitransparent for untextured polygons.
|
|
||||||
semitransparent = true;
|
|
||||||
icolor = vertcol;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Apply dithering
|
|
||||||
#if DITHERING
|
|
||||||
icolor = ApplyDithering(icolor);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Clip to 15-bit range
|
|
||||||
#if !TRUE_COLOR
|
|
||||||
icolor = TruncateTo15Bit(icolor);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Normalize
|
|
||||||
vec3 color = vec3(icolor) / vec3(255.0, 255.0, 255.0);
|
|
||||||
|
|
||||||
#if TRANSPARENCY
|
|
||||||
// Apply semitransparency. If not a semitransparent texel, destination alpha is ignored.
|
|
||||||
if (semitransparent)
|
|
||||||
{
|
|
||||||
#if TRANSPARENCY_ONLY_OPAQUE
|
|
||||||
discard;
|
|
||||||
#endif
|
|
||||||
o_col0 = vec4(color * u_src_alpha_factor, u_dst_alpha_factor);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
#if TRANSPARENCY_ONLY_TRANSPARENCY
|
|
||||||
discard;
|
|
||||||
#endif
|
|
||||||
o_col0 = vec4(color, 0.0);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
o_col0 = vec4(color, 0.0);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
)";
|
|
||||||
|
|
||||||
return ss.str();
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string GPU_HW::GenerateScreenQuadVertexShader()
|
|
||||||
{
|
|
||||||
std::stringstream ss;
|
|
||||||
GenerateShaderHeader(ss);
|
|
||||||
ss << R"(
|
|
||||||
|
|
||||||
out vec2 v_tex0;
|
|
||||||
|
|
||||||
void main()
|
|
||||||
{
|
|
||||||
v_tex0 = vec2(float((gl_VertexID << 1) & 2), float(gl_VertexID & 2));
|
|
||||||
gl_Position = vec4(v_tex0 * vec2(2.0f, -2.0f) + vec2(-1.0f, 1.0f), 0.0f, 1.0f);
|
|
||||||
gl_Position.y = -gl_Position.y;
|
|
||||||
}
|
|
||||||
)";
|
|
||||||
|
|
||||||
return ss.str();
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string GPU_HW::GenerateFillFragmentShader()
|
|
||||||
{
|
|
||||||
std::stringstream ss;
|
|
||||||
GenerateShaderHeader(ss);
|
|
||||||
|
|
||||||
ss << R"(
|
|
||||||
uniform vec4 fill_color;
|
|
||||||
out vec4 o_col0;
|
|
||||||
|
|
||||||
void main()
|
|
||||||
{
|
|
||||||
o_col0 = fill_color;
|
|
||||||
}
|
|
||||||
)";
|
|
||||||
|
|
||||||
return ss.str();
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string GPU_HW::GenerateDisplayFragmentShader(bool depth_24bit, bool interlaced)
|
|
||||||
{
|
|
||||||
std::stringstream ss;
|
|
||||||
GenerateShaderHeader(ss);
|
|
||||||
DefineMacro(ss, "DEPTH_24BIT", depth_24bit);
|
|
||||||
DefineMacro(ss, "INTERLACED", interlaced);
|
|
||||||
|
|
||||||
ss << R"(
|
|
||||||
in vec2 v_tex0;
|
|
||||||
out vec4 o_col0;
|
|
||||||
|
|
||||||
uniform sampler2D samp0;
|
|
||||||
uniform ivec3 u_base_coords;
|
|
||||||
|
|
||||||
ivec2 GetCoords(vec2 fragcoord)
|
|
||||||
{
|
|
||||||
ivec2 icoords = ivec2(fragcoord);
|
|
||||||
#if INTERLACED
|
|
||||||
if ((((icoords.y - u_base_coords.z) / RESOLUTION_SCALE) & 1) != 0)
|
|
||||||
discard;
|
|
||||||
#endif
|
|
||||||
return icoords;
|
|
||||||
}
|
|
||||||
|
|
||||||
void main()
|
|
||||||
{
|
|
||||||
ivec2 icoords = GetCoords(gl_FragCoord.xy);
|
|
||||||
|
|
||||||
#if DEPTH_24BIT
|
|
||||||
// compute offset in dwords from the start of the 24-bit values
|
|
||||||
ivec2 base = ivec2(u_base_coords.x, u_base_coords.y + icoords.y);
|
|
||||||
int xoff = int(icoords.x);
|
|
||||||
int dword_index = (xoff / 2) + (xoff / 4);
|
|
||||||
|
|
||||||
// sample two adjacent dwords, or four 16-bit values as the 24-bit value will lie somewhere between these
|
|
||||||
uint s0 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + dword_index * 2 + 0, base.y), 0));
|
|
||||||
uint s1 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + dword_index * 2 + 1, base.y), 0));
|
|
||||||
uint s2 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + (dword_index + 1) * 2 + 0, base.y), 0));
|
|
||||||
uint s3 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + (dword_index + 1) * 2 + 1, base.y), 0));
|
|
||||||
|
|
||||||
// select the bit for this pixel depending on its offset in the 4-pixel block
|
|
||||||
uint r, g, b;
|
|
||||||
int block_offset = xoff & 3;
|
|
||||||
if (block_offset == 0)
|
|
||||||
{
|
|
||||||
r = s0 & 0xFFu;
|
|
||||||
g = s0 >> 8;
|
|
||||||
b = s1 & 0xFFu;
|
|
||||||
}
|
|
||||||
else if (block_offset == 1)
|
|
||||||
{
|
|
||||||
r = s1 >> 8;
|
|
||||||
g = s2 & 0xFFu;
|
|
||||||
b = s2 >> 8;
|
|
||||||
}
|
|
||||||
else if (block_offset == 2)
|
|
||||||
{
|
|
||||||
r = s1 & 0xFFu;
|
|
||||||
g = s1 >> 8;
|
|
||||||
b = s2 & 0xFFu;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
r = s2 >> 8;
|
|
||||||
g = s3 & 0xFFu;
|
|
||||||
b = s3 >> 8;
|
|
||||||
}
|
|
||||||
|
|
||||||
// and normalize
|
|
||||||
o_col0 = vec4(float(r) / 255.0, float(g) / 255.0, float(b) / 255.0, 1.0);
|
|
||||||
#else
|
|
||||||
// load and return
|
|
||||||
o_col0 = texelFetch(samp0, u_base_coords.xy + icoords, 0);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
)";
|
|
||||||
|
|
||||||
return ss.str();
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string GPU_HW::GenerateVRAMWriteFragmentShader()
|
|
||||||
{
|
|
||||||
std::stringstream ss;
|
|
||||||
GenerateShaderHeader(ss);
|
|
||||||
|
|
||||||
ss << R"(
|
|
||||||
|
|
||||||
uniform ivec2 u_base_coords;
|
|
||||||
uniform ivec2 u_size;
|
|
||||||
uniform usamplerBuffer samp0;
|
|
||||||
|
|
||||||
out vec4 o_col0;
|
|
||||||
|
|
||||||
void main()
|
|
||||||
{
|
|
||||||
ivec2 coords = ivec2(gl_FragCoord.xy) / ivec2(RESOLUTION_SCALE, RESOLUTION_SCALE);
|
|
||||||
ivec2 offset = coords - u_base_coords;
|
|
||||||
offset.y = u_size.y - offset.y - 1;
|
|
||||||
|
|
||||||
int buffer_offset = offset.y * u_size.x + offset.x;
|
|
||||||
uint value = texelFetch(samp0, buffer_offset).r;
|
|
||||||
|
|
||||||
o_col0 = RGBA5551ToRGBA8(value);
|
|
||||||
})";
|
|
||||||
|
|
||||||
return ss.str();
|
|
||||||
}
|
|
||||||
|
|
||||||
GPU_HW::HWPrimitive GPU_HW::GetPrimitiveForCommand(RenderCommand rc)
|
|
||||||
{
|
{
|
||||||
if (rc.primitive == Primitive::Line)
|
if (rc.primitive == Primitive::Line)
|
||||||
return rc.polyline ? HWPrimitive::LineStrip : HWPrimitive::Lines;
|
return rc.polyline ? BatchPrimitive::LineStrip : BatchPrimitive::Lines;
|
||||||
else if ((rc.primitive == Primitive::Polygon && rc.quad_polygon) || rc.primitive == Primitive::Rectangle)
|
else if ((rc.primitive == Primitive::Polygon && rc.quad_polygon) || rc.primitive == Primitive::Rectangle)
|
||||||
return HWPrimitive::TriangleStrip;
|
return BatchPrimitive::TriangleStrip;
|
||||||
else
|
else
|
||||||
return HWPrimitive::Triangles;
|
return BatchPrimitive::Triangles;
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32* command_ptr)
|
void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32* command_ptr)
|
||||||
|
@ -687,13 +258,13 @@ void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32
|
||||||
// has any state changed which requires a new batch?
|
// has any state changed which requires a new batch?
|
||||||
const TransparencyMode transparency_mode =
|
const TransparencyMode transparency_mode =
|
||||||
rc.transparency_enable ? m_render_state.transparency_mode : TransparencyMode::Disabled;
|
rc.transparency_enable ? m_render_state.transparency_mode : TransparencyMode::Disabled;
|
||||||
const HWPrimitive rc_primitive = GetPrimitiveForCommand(rc);
|
const BatchPrimitive rc_primitive = GetPrimitiveForCommand(rc);
|
||||||
const bool dithering_enable = (!m_true_color && rc.IsDitheringEnabled()) ? m_GPUSTAT.dither_enable : false;
|
const bool dithering_enable = (!m_true_color && rc.IsDitheringEnabled()) ? m_GPUSTAT.dither_enable : false;
|
||||||
const u32 max_added_vertices = num_vertices + 2;
|
const u32 max_added_vertices = num_vertices + 2;
|
||||||
if (!IsFlushed())
|
if (!IsFlushed())
|
||||||
{
|
{
|
||||||
const bool buffer_overflow = GetBatchVertexSpace() < max_added_vertices;
|
const bool buffer_overflow = GetBatchVertexSpace() < max_added_vertices;
|
||||||
if (buffer_overflow || rc_primitive == HWPrimitive::LineStrip || m_batch.texture_mode != texture_mode ||
|
if (buffer_overflow || rc_primitive == BatchPrimitive::LineStrip || m_batch.texture_mode != texture_mode ||
|
||||||
m_batch.transparency_mode != transparency_mode || m_batch.primitive != rc_primitive ||
|
m_batch.transparency_mode != transparency_mode || m_batch.primitive != rc_primitive ||
|
||||||
dithering_enable != m_batch.dithering || m_render_state.IsTextureWindowChanged())
|
dithering_enable != m_batch.dithering || m_render_state.IsTextureWindowChanged())
|
||||||
{
|
{
|
||||||
|
|
|
@ -8,6 +8,22 @@
|
||||||
class GPU_HW : public GPU
|
class GPU_HW : public GPU
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
enum class BatchPrimitive : u8
|
||||||
|
{
|
||||||
|
Lines = 0,
|
||||||
|
LineStrip = 1,
|
||||||
|
Triangles = 2,
|
||||||
|
TriangleStrip = 3
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class BatchRenderMode : u8
|
||||||
|
{
|
||||||
|
TransparencyDisabled,
|
||||||
|
TransparentAndOpaque,
|
||||||
|
OnlyOpaque,
|
||||||
|
OnlyTransparent
|
||||||
|
};
|
||||||
|
|
||||||
GPU_HW();
|
GPU_HW();
|
||||||
virtual ~GPU_HW();
|
virtual ~GPU_HW();
|
||||||
|
|
||||||
|
@ -16,23 +32,7 @@ public:
|
||||||
virtual void UpdateSettings() override;
|
virtual void UpdateSettings() override;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
enum class HWPrimitive : u8
|
struct BatchVertex
|
||||||
{
|
|
||||||
Lines = 0,
|
|
||||||
LineStrip = 1,
|
|
||||||
Triangles = 2,
|
|
||||||
TriangleStrip = 3
|
|
||||||
};
|
|
||||||
|
|
||||||
enum class HWBatchRenderMode : u8
|
|
||||||
{
|
|
||||||
TransparencyDisabled,
|
|
||||||
TransparentAndOpaque,
|
|
||||||
OnlyOpaque,
|
|
||||||
OnlyTransparent
|
|
||||||
};
|
|
||||||
|
|
||||||
struct HWVertex
|
|
||||||
{
|
{
|
||||||
s32 x;
|
s32 x;
|
||||||
s32 y;
|
s32 y;
|
||||||
|
@ -55,9 +55,9 @@ protected:
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct HWBatchConfig
|
struct BatchConfig
|
||||||
{
|
{
|
||||||
HWPrimitive primitive;
|
BatchPrimitive primitive;
|
||||||
TextureMode texture_mode;
|
TextureMode texture_mode;
|
||||||
TransparencyMode transparency_mode;
|
TransparencyMode transparency_mode;
|
||||||
bool dithering;
|
bool dithering;
|
||||||
|
@ -71,14 +71,14 @@ protected:
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns the render mode for this batch.
|
// Returns the render mode for this batch.
|
||||||
HWBatchRenderMode GetRenderMode() const
|
BatchRenderMode GetRenderMode() const
|
||||||
{
|
{
|
||||||
return transparency_mode == TransparencyMode::Disabled ? HWBatchRenderMode::TransparencyDisabled :
|
return transparency_mode == TransparencyMode::Disabled ? BatchRenderMode::TransparencyDisabled :
|
||||||
HWBatchRenderMode::TransparentAndOpaque;
|
BatchRenderMode::TransparentAndOpaque;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct HWBatchUBOData
|
struct BatchUBOData
|
||||||
{
|
{
|
||||||
s32 u_pos_offset[2];
|
s32 u_pos_offset[2];
|
||||||
u32 u_texture_window_mask[2];
|
u32 u_texture_window_mask[2];
|
||||||
|
@ -90,7 +90,7 @@ protected:
|
||||||
static constexpr u32 VRAM_UPDATE_TEXTURE_BUFFER_SIZE = VRAM_WIDTH * VRAM_HEIGHT * sizeof(u32);
|
static constexpr u32 VRAM_UPDATE_TEXTURE_BUFFER_SIZE = VRAM_WIDTH * VRAM_HEIGHT * sizeof(u32);
|
||||||
static constexpr u32 VERTEX_BUFFER_SIZE = 1 * 1024 * 1024;
|
static constexpr u32 VERTEX_BUFFER_SIZE = 1 * 1024 * 1024;
|
||||||
static constexpr u32 MIN_BATCH_VERTEX_COUNT = 6;
|
static constexpr u32 MIN_BATCH_VERTEX_COUNT = 6;
|
||||||
static constexpr u32 MAX_BATCH_VERTEX_COUNT = VERTEX_BUFFER_SIZE / sizeof(HWVertex);
|
static constexpr u32 MAX_BATCH_VERTEX_COUNT = VERTEX_BUFFER_SIZE / sizeof(BatchVertex);
|
||||||
static constexpr u32 UNIFORM_BUFFER_SIZE = 512 * 1024;
|
static constexpr u32 UNIFORM_BUFFER_SIZE = 512 * 1024;
|
||||||
|
|
||||||
static constexpr std::tuple<float, float, float, float> RGBA8ToFloat(u32 rgba)
|
static constexpr std::tuple<float, float, float, float> RGBA8ToFloat(u32 rgba)
|
||||||
|
@ -121,31 +121,21 @@ protected:
|
||||||
return std::make_tuple(x * s32(m_resolution_scale), y * s32(m_resolution_scale));
|
return std::make_tuple(x * s32(m_resolution_scale), y * s32(m_resolution_scale));
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string GenerateVertexShader(bool textured);
|
BatchVertex* m_batch_start_vertex_ptr = nullptr;
|
||||||
std::string GenerateFragmentShader(HWBatchRenderMode transparency, TextureMode texture_mode, bool dithering);
|
BatchVertex* m_batch_end_vertex_ptr = nullptr;
|
||||||
std::string GenerateScreenQuadVertexShader();
|
BatchVertex* m_batch_current_vertex_ptr = nullptr;
|
||||||
std::string GenerateFillFragmentShader();
|
|
||||||
std::string GenerateDisplayFragmentShader(bool depth_24bit, bool interlaced);
|
|
||||||
std::string GenerateVRAMWriteFragmentShader();
|
|
||||||
|
|
||||||
HWVertex* m_batch_start_vertex_ptr = nullptr;
|
|
||||||
HWVertex* m_batch_end_vertex_ptr = nullptr;
|
|
||||||
HWVertex* m_batch_current_vertex_ptr = nullptr;
|
|
||||||
u32 m_batch_base_vertex = 0;
|
u32 m_batch_base_vertex = 0;
|
||||||
|
|
||||||
u32 m_resolution_scale = 1;
|
u32 m_resolution_scale = 1;
|
||||||
u32 m_max_resolution_scale = 1;
|
u32 m_max_resolution_scale = 1;
|
||||||
bool m_true_color = false;
|
bool m_true_color = false;
|
||||||
|
|
||||||
HWBatchConfig m_batch = {};
|
BatchConfig m_batch = {};
|
||||||
HWBatchUBOData m_batch_ubo_data = {};
|
BatchUBOData m_batch_ubo_data = {};
|
||||||
bool m_batch_ubo_dirty = true;
|
bool m_batch_ubo_dirty = true;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static HWPrimitive GetPrimitiveForCommand(RenderCommand rc);
|
static BatchPrimitive GetPrimitiveForCommand(RenderCommand rc);
|
||||||
|
|
||||||
void GenerateShaderHeader(std::stringstream& ss);
|
|
||||||
void GenerateBatchUniformBuffer(std::stringstream& ss);
|
|
||||||
|
|
||||||
void LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command_ptr);
|
void LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command_ptr);
|
||||||
void AddDuplicateVertex();
|
void AddDuplicateVertex();
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
#include "YBaseLib/Assert.h"
|
#include "YBaseLib/Assert.h"
|
||||||
#include "YBaseLib/Log.h"
|
#include "YBaseLib/Log.h"
|
||||||
#include "YBaseLib/String.h"
|
#include "YBaseLib/String.h"
|
||||||
|
#include "gpu_hw_shadergen.h"
|
||||||
#include "host_interface.h"
|
#include "host_interface.h"
|
||||||
#include "imgui.h"
|
#include "imgui.h"
|
||||||
#include "system.h"
|
#include "system.h"
|
||||||
|
@ -138,9 +139,9 @@ void GPU_HW_OpenGL::MapBatchVertexPointer(u32 required_vertices)
|
||||||
Assert(!m_batch_start_vertex_ptr);
|
Assert(!m_batch_start_vertex_ptr);
|
||||||
|
|
||||||
const GL::StreamBuffer::MappingResult res =
|
const GL::StreamBuffer::MappingResult res =
|
||||||
m_vertex_stream_buffer->Map(sizeof(HWVertex), required_vertices * sizeof(HWVertex));
|
m_vertex_stream_buffer->Map(sizeof(BatchVertex), required_vertices * sizeof(BatchVertex));
|
||||||
|
|
||||||
m_batch_start_vertex_ptr = static_cast<HWVertex*>(res.pointer);
|
m_batch_start_vertex_ptr = static_cast<BatchVertex*>(res.pointer);
|
||||||
m_batch_current_vertex_ptr = m_batch_start_vertex_ptr;
|
m_batch_current_vertex_ptr = m_batch_start_vertex_ptr;
|
||||||
m_batch_end_vertex_ptr = m_batch_start_vertex_ptr + res.space_aligned;
|
m_batch_end_vertex_ptr = m_batch_start_vertex_ptr + res.space_aligned;
|
||||||
m_batch_base_vertex = res.index_aligned;
|
m_batch_base_vertex = res.index_aligned;
|
||||||
|
@ -246,11 +247,11 @@ void GPU_HW_OpenGL::CreateVertexBuffer()
|
||||||
glEnableVertexAttribArray(1);
|
glEnableVertexAttribArray(1);
|
||||||
glEnableVertexAttribArray(2);
|
glEnableVertexAttribArray(2);
|
||||||
glEnableVertexAttribArray(3);
|
glEnableVertexAttribArray(3);
|
||||||
glVertexAttribIPointer(0, 2, GL_INT, sizeof(HWVertex), reinterpret_cast<void*>(offsetof(HWVertex, x)));
|
glVertexAttribIPointer(0, 2, GL_INT, sizeof(BatchVertex), reinterpret_cast<void*>(offsetof(BatchVertex, x)));
|
||||||
glVertexAttribPointer(1, 4, GL_UNSIGNED_BYTE, true, sizeof(HWVertex),
|
glVertexAttribPointer(1, 4, GL_UNSIGNED_BYTE, true, sizeof(BatchVertex),
|
||||||
reinterpret_cast<void*>(offsetof(HWVertex, color)));
|
reinterpret_cast<void*>(offsetof(BatchVertex, color)));
|
||||||
glVertexAttribIPointer(2, 2, GL_INT, sizeof(HWVertex), reinterpret_cast<void*>(offsetof(HWVertex, texcoord)));
|
glVertexAttribIPointer(2, 2, GL_INT, sizeof(BatchVertex), reinterpret_cast<void*>(offsetof(BatchVertex, texcoord)));
|
||||||
glVertexAttribIPointer(3, 1, GL_INT, sizeof(HWVertex), reinterpret_cast<void*>(offsetof(HWVertex, texpage)));
|
glVertexAttribIPointer(3, 1, GL_INT, sizeof(BatchVertex), reinterpret_cast<void*>(offsetof(BatchVertex, texpage)));
|
||||||
glBindVertexArray(0);
|
glBindVertexArray(0);
|
||||||
|
|
||||||
glGenVertexArrays(1, &m_attributeless_vao_id);
|
glGenVertexArrays(1, &m_attributeless_vao_id);
|
||||||
|
@ -280,31 +281,56 @@ void GPU_HW_OpenGL::CreateTextureBuffer()
|
||||||
|
|
||||||
bool GPU_HW_OpenGL::CompilePrograms()
|
bool GPU_HW_OpenGL::CompilePrograms()
|
||||||
{
|
{
|
||||||
|
GPU_HW_ShaderGen shadergen(GPU_HW_ShaderGen::Backend::OpenGL, m_resolution_scale, m_true_color);
|
||||||
|
|
||||||
for (u32 render_mode = 0; render_mode < 4; render_mode++)
|
for (u32 render_mode = 0; render_mode < 4; render_mode++)
|
||||||
{
|
{
|
||||||
for (u32 texture_mode = 0; texture_mode < 9; texture_mode++)
|
for (u32 texture_mode = 0; texture_mode < 9; texture_mode++)
|
||||||
{
|
{
|
||||||
for (u8 dithering = 0; dithering < 2; dithering++)
|
for (u8 dithering = 0; dithering < 2; dithering++)
|
||||||
{
|
{
|
||||||
if (!CompileProgram(m_render_programs[render_mode][texture_mode][dithering],
|
const bool textured = (static_cast<TextureMode>(texture_mode) != TextureMode::Disabled);
|
||||||
static_cast<HWBatchRenderMode>(render_mode), static_cast<TextureMode>(texture_mode),
|
const std::string vs = shadergen.GenerateBatchVertexShader(textured);
|
||||||
ConvertToBoolUnchecked(dithering)))
|
const std::string fs = shadergen.GenerateBatchFragmentShader(static_cast<BatchRenderMode>(render_mode),
|
||||||
{
|
static_cast<TextureMode>(texture_mode),
|
||||||
|
ConvertToBoolUnchecked(dithering));
|
||||||
|
|
||||||
|
GL::Program& prog = m_render_programs[render_mode][texture_mode][dithering];
|
||||||
|
if (!prog.Compile(vs, fs))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
prog.BindAttribute(0, "a_pos");
|
||||||
|
prog.BindAttribute(1, "a_col0");
|
||||||
|
if (textured)
|
||||||
|
{
|
||||||
|
prog.BindAttribute(2, "a_texcoord");
|
||||||
|
prog.BindAttribute(3, "a_texpage");
|
||||||
|
}
|
||||||
|
|
||||||
|
prog.BindFragData(0, "o_col0");
|
||||||
|
|
||||||
|
if (!prog.Link())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
prog.BindUniformBlock("UBOBlock", 1);
|
||||||
|
if (textured)
|
||||||
|
{
|
||||||
|
prog.Bind();
|
||||||
|
prog.RegisterUniform("samp0");
|
||||||
|
prog.Uniform1i(0, 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Use string_view
|
|
||||||
for (u8 depth_24bit = 0; depth_24bit < 2; depth_24bit++)
|
for (u8 depth_24bit = 0; depth_24bit < 2; depth_24bit++)
|
||||||
{
|
{
|
||||||
for (u8 interlaced = 0; interlaced < 2; interlaced++)
|
for (u8 interlaced = 0; interlaced < 2; interlaced++)
|
||||||
{
|
{
|
||||||
GL::Program& prog = m_display_programs[depth_24bit][interlaced];
|
GL::Program& prog = m_display_programs[depth_24bit][interlaced];
|
||||||
const std::string vs = GenerateScreenQuadVertexShader();
|
const std::string vs = shadergen.GenerateScreenQuadVertexShader();
|
||||||
const std::string fs =
|
const std::string fs = shadergen.GenerateDisplayFragmentShader(ConvertToBoolUnchecked(depth_24bit),
|
||||||
GenerateDisplayFragmentShader(ConvertToBoolUnchecked(depth_24bit), ConvertToBoolUnchecked(interlaced));
|
ConvertToBoolUnchecked(interlaced));
|
||||||
if (!prog.Compile(vs, fs))
|
if (!prog.Compile(vs, fs))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
@ -319,8 +345,11 @@ bool GPU_HW_OpenGL::CompilePrograms()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!m_vram_write_program.Compile(GenerateScreenQuadVertexShader(), GenerateVRAMWriteFragmentShader()))
|
if (!m_vram_write_program.Compile(shadergen.GenerateScreenQuadVertexShader(),
|
||||||
|
shadergen.GenerateVRAMWriteFragmentShader()))
|
||||||
|
{
|
||||||
return false;
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
m_vram_write_program.BindFragData(0, "o_col0");
|
m_vram_write_program.BindFragData(0, "o_col0");
|
||||||
if (!m_vram_write_program.Link())
|
if (!m_vram_write_program.Link())
|
||||||
|
@ -335,41 +364,7 @@ bool GPU_HW_OpenGL::CompilePrograms()
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool GPU_HW_OpenGL::CompileProgram(GL::Program& prog, HWBatchRenderMode render_mode, TextureMode texture_mode,
|
void GPU_HW_OpenGL::SetDrawState(BatchRenderMode render_mode)
|
||||||
bool dithering)
|
|
||||||
{
|
|
||||||
const bool textured = texture_mode != TextureMode::Disabled;
|
|
||||||
const std::string vs = GenerateVertexShader(textured);
|
|
||||||
const std::string fs = GenerateFragmentShader(render_mode, texture_mode, dithering);
|
|
||||||
if (!prog.Compile(vs, fs))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
prog.BindAttribute(0, "a_pos");
|
|
||||||
prog.BindAttribute(1, "a_col0");
|
|
||||||
if (textured)
|
|
||||||
{
|
|
||||||
prog.BindAttribute(2, "a_texcoord");
|
|
||||||
prog.BindAttribute(3, "a_texpage");
|
|
||||||
}
|
|
||||||
|
|
||||||
prog.BindFragData(0, "o_col0");
|
|
||||||
|
|
||||||
if (!prog.Link())
|
|
||||||
return false;
|
|
||||||
|
|
||||||
prog.BindUniformBlock("UBOBlock", 1);
|
|
||||||
|
|
||||||
if (textured)
|
|
||||||
{
|
|
||||||
prog.Bind();
|
|
||||||
prog.RegisterUniform("samp0");
|
|
||||||
prog.Uniform1i(0, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
void GPU_HW_OpenGL::SetDrawState(HWBatchRenderMode render_mode)
|
|
||||||
{
|
{
|
||||||
const GL::Program& prog = m_render_programs[static_cast<u8>(render_mode)][static_cast<u8>(m_batch.texture_mode)]
|
const GL::Program& prog = m_render_programs[static_cast<u8>(render_mode)][static_cast<u8>(m_batch.texture_mode)]
|
||||||
[BoolToUInt8(m_batch.dithering)];
|
[BoolToUInt8(m_batch.dithering)];
|
||||||
|
@ -378,7 +373,7 @@ void GPU_HW_OpenGL::SetDrawState(HWBatchRenderMode render_mode)
|
||||||
if (m_batch.texture_mode != TextureMode::Disabled)
|
if (m_batch.texture_mode != TextureMode::Disabled)
|
||||||
m_vram_read_texture->Bind();
|
m_vram_read_texture->Bind();
|
||||||
|
|
||||||
if (m_batch.transparency_mode == TransparencyMode::Disabled || render_mode == HWBatchRenderMode::OnlyOpaque)
|
if (m_batch.transparency_mode == TransparencyMode::Disabled || render_mode == BatchRenderMode::OnlyOpaque)
|
||||||
{
|
{
|
||||||
glDisable(GL_BLEND);
|
glDisable(GL_BLEND);
|
||||||
}
|
}
|
||||||
|
@ -732,7 +727,7 @@ void GPU_HW_OpenGL::FlushRender()
|
||||||
m_stats.num_batches++;
|
m_stats.num_batches++;
|
||||||
m_stats.num_vertices += vertex_count;
|
m_stats.num_vertices += vertex_count;
|
||||||
|
|
||||||
m_vertex_stream_buffer->Unmap(vertex_count * sizeof(HWVertex));
|
m_vertex_stream_buffer->Unmap(vertex_count * sizeof(BatchVertex));
|
||||||
m_vertex_stream_buffer->Bind();
|
m_vertex_stream_buffer->Bind();
|
||||||
m_batch_start_vertex_ptr = nullptr;
|
m_batch_start_vertex_ptr = nullptr;
|
||||||
m_batch_end_vertex_ptr = nullptr;
|
m_batch_end_vertex_ptr = nullptr;
|
||||||
|
@ -742,9 +737,9 @@ void GPU_HW_OpenGL::FlushRender()
|
||||||
|
|
||||||
if (m_batch.NeedsTwoPassRendering())
|
if (m_batch.NeedsTwoPassRendering())
|
||||||
{
|
{
|
||||||
SetDrawState(HWBatchRenderMode::OnlyTransparent);
|
SetDrawState(BatchRenderMode::OnlyTransparent);
|
||||||
glDrawArrays(gl_primitives[static_cast<u8>(m_batch.primitive)], 0, vertex_count);
|
glDrawArrays(gl_primitives[static_cast<u8>(m_batch.primitive)], 0, vertex_count);
|
||||||
SetDrawState(HWBatchRenderMode::OnlyOpaque);
|
SetDrawState(BatchRenderMode::OnlyOpaque);
|
||||||
glDrawArrays(gl_primitives[static_cast<u8>(m_batch.primitive)], 0, vertex_count);
|
glDrawArrays(gl_primitives[static_cast<u8>(m_batch.primitive)], 0, vertex_count);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
|
|
@ -58,8 +58,7 @@ private:
|
||||||
void CreateTextureBuffer();
|
void CreateTextureBuffer();
|
||||||
|
|
||||||
bool CompilePrograms();
|
bool CompilePrograms();
|
||||||
bool CompileProgram(GL::Program& prog, HWBatchRenderMode render_mode, TextureMode texture_mode, bool dithering);
|
void SetDrawState(BatchRenderMode render_mode);
|
||||||
void SetDrawState(HWBatchRenderMode render_mode);
|
|
||||||
void UploadUniformBlock(const void* data, u32 data_size);
|
void UploadUniformBlock(const void* data, u32 data_size);
|
||||||
|
|
||||||
// downsample texture - used for readbacks at >1xIR.
|
// downsample texture - used for readbacks at >1xIR.
|
||||||
|
|
436
src/core/gpu_hw_shadergen.cpp
Normal file
436
src/core/gpu_hw_shadergen.cpp
Normal file
|
@ -0,0 +1,436 @@
|
||||||
|
#include "gpu_hw_shadergen.h"
|
||||||
|
|
||||||
|
GPU_HW_ShaderGen::GPU_HW_ShaderGen(Backend backend, u32 resolution_scale, bool true_color)
|
||||||
|
: m_backend(backend), m_resolution_scale(resolution_scale), m_true_color(true_color)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
GPU_HW_ShaderGen::~GPU_HW_ShaderGen() = default;
|
||||||
|
|
||||||
|
static void DefineMacro(std::stringstream& ss, const char* name, bool enabled)
|
||||||
|
{
|
||||||
|
if (enabled)
|
||||||
|
ss << "#define " << name << " 1\n";
|
||||||
|
else
|
||||||
|
ss << "/* #define " << name << " 0 */\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPU_HW_ShaderGen::GenerateShaderHeader(std::stringstream& ss)
|
||||||
|
{
|
||||||
|
ss << "#version 330 core\n\n";
|
||||||
|
ss << "const int RESOLUTION_SCALE = " << m_resolution_scale << ";\n";
|
||||||
|
ss << "const ivec2 VRAM_SIZE = ivec2(" << GPU::VRAM_WIDTH << ", " << GPU::VRAM_HEIGHT << ") * RESOLUTION_SCALE;\n";
|
||||||
|
ss << "const vec2 RCP_VRAM_SIZE = vec2(1.0, 1.0) / vec2(VRAM_SIZE);\n";
|
||||||
|
ss << R"(
|
||||||
|
|
||||||
|
float fixYCoord(float y)
|
||||||
|
{
|
||||||
|
return 1.0 - RCP_VRAM_SIZE.y - y;
|
||||||
|
}
|
||||||
|
|
||||||
|
int fixYCoord(int y)
|
||||||
|
{
|
||||||
|
return VRAM_SIZE.y - y - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint RGBA8ToRGBA5551(vec4 v)
|
||||||
|
{
|
||||||
|
uint r = uint(v.r * 255.0) >> 3;
|
||||||
|
uint g = uint(v.g * 255.0) >> 3;
|
||||||
|
uint b = uint(v.b * 255.0) >> 3;
|
||||||
|
uint a = (v.a != 0.0) ? 1u : 0u;
|
||||||
|
return (r) | (g << 5) | (b << 10) | (a << 15);
|
||||||
|
}
|
||||||
|
|
||||||
|
vec4 RGBA5551ToRGBA8(uint v)
|
||||||
|
{
|
||||||
|
uint r = (v & 31u);
|
||||||
|
uint g = ((v >> 5) & 31u);
|
||||||
|
uint b = ((v >> 10) & 31u);
|
||||||
|
uint a = ((v >> 15) & 1u);
|
||||||
|
|
||||||
|
// repeat lower bits
|
||||||
|
r = (r << 3) | (r & 7u);
|
||||||
|
g = (g << 3) | (g & 7u);
|
||||||
|
b = (b << 3) | (b & 7u);
|
||||||
|
|
||||||
|
return vec4(float(r) / 255.0, float(g) / 255.0, float(b) / 255.0, float(a));
|
||||||
|
}
|
||||||
|
)";
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPU_HW_ShaderGen::GenerateBatchUniformBuffer(std::stringstream& ss)
|
||||||
|
{
|
||||||
|
ss << R"(
|
||||||
|
uniform UBOBlock {
|
||||||
|
ivec2 u_pos_offset;
|
||||||
|
uvec2 u_texture_window_mask;
|
||||||
|
uvec2 u_texture_window_offset;
|
||||||
|
float u_src_alpha_factor;
|
||||||
|
float u_dst_alpha_factor;
|
||||||
|
};
|
||||||
|
)";
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured)
|
||||||
|
{
|
||||||
|
std::stringstream ss;
|
||||||
|
GenerateShaderHeader(ss);
|
||||||
|
DefineMacro(ss, "TEXTURED", textured);
|
||||||
|
GenerateBatchUniformBuffer(ss);
|
||||||
|
|
||||||
|
ss << R"(
|
||||||
|
in ivec2 a_pos;
|
||||||
|
in vec4 a_col0;
|
||||||
|
in int a_texcoord;
|
||||||
|
in int a_texpage;
|
||||||
|
|
||||||
|
out vec3 v_col0;
|
||||||
|
#if TEXTURED
|
||||||
|
out vec2 v_tex0;
|
||||||
|
flat out ivec4 v_texpage;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void main()
|
||||||
|
{
|
||||||
|
// 0..+1023 -> -1..1
|
||||||
|
float pos_x = (float(a_pos.x + u_pos_offset.x) / 512.0) - 1.0;
|
||||||
|
float pos_y = (float(a_pos.y + u_pos_offset.y) / -256.0) + 1.0;
|
||||||
|
gl_Position = vec4(pos_x, pos_y, 0.0, 1.0);
|
||||||
|
|
||||||
|
v_col0 = a_col0.rgb;
|
||||||
|
#if TEXTURED
|
||||||
|
v_tex0 = vec2(float(a_texcoord & 0xFFFF), float(a_texcoord >> 16)) / vec2(255.0);
|
||||||
|
|
||||||
|
// base_x,base_y,palette_x,palette_y
|
||||||
|
v_texpage.x = (a_texpage & 15) * 64 * RESOLUTION_SCALE;
|
||||||
|
v_texpage.y = ((a_texpage >> 4) & 1) * 256 * RESOLUTION_SCALE;
|
||||||
|
v_texpage.z = ((a_texpage >> 16) & 63) * 16 * RESOLUTION_SCALE;
|
||||||
|
v_texpage.w = ((a_texpage >> 22) & 511) * RESOLUTION_SCALE;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
)";
|
||||||
|
|
||||||
|
return ss.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMode transparency,
|
||||||
|
GPU::TextureMode texture_mode, bool dithering)
|
||||||
|
{
|
||||||
|
const GPU::TextureMode actual_texture_mode = texture_mode & ~GPU::TextureMode::RawTextureBit;
|
||||||
|
const bool raw_texture = (texture_mode & GPU::TextureMode::RawTextureBit) == GPU::TextureMode::RawTextureBit;
|
||||||
|
|
||||||
|
std::stringstream ss;
|
||||||
|
GenerateShaderHeader(ss);
|
||||||
|
GenerateBatchUniformBuffer(ss);
|
||||||
|
DefineMacro(ss, "TRANSPARENCY", transparency != GPU_HW::BatchRenderMode::TransparencyDisabled);
|
||||||
|
DefineMacro(ss, "TRANSPARENCY_ONLY_OPAQUE", transparency == GPU_HW::BatchRenderMode::OnlyOpaque);
|
||||||
|
DefineMacro(ss, "TRANSPARENCY_ONLY_TRANSPARENCY", transparency == GPU_HW::BatchRenderMode::OnlyTransparent);
|
||||||
|
DefineMacro(ss, "TEXTURED", actual_texture_mode != GPU::TextureMode::Disabled);
|
||||||
|
DefineMacro(ss, "PALETTE",
|
||||||
|
actual_texture_mode == GPU::TextureMode::Palette4Bit ||
|
||||||
|
actual_texture_mode == GPU::TextureMode::Palette8Bit);
|
||||||
|
DefineMacro(ss, "PALETTE_4_BIT", actual_texture_mode == GPU::TextureMode::Palette4Bit);
|
||||||
|
DefineMacro(ss, "PALETTE_8_BIT", actual_texture_mode == GPU::TextureMode::Palette8Bit);
|
||||||
|
DefineMacro(ss, "RAW_TEXTURE", raw_texture);
|
||||||
|
DefineMacro(ss, "DITHERING", dithering);
|
||||||
|
DefineMacro(ss, "TRUE_COLOR", m_true_color);
|
||||||
|
|
||||||
|
ss << "const int[16] s_dither_values = int[16]( ";
|
||||||
|
for (u32 i = 0; i < 16; i++)
|
||||||
|
{
|
||||||
|
if (i > 0)
|
||||||
|
ss << ", ";
|
||||||
|
ss << GPU::DITHER_MATRIX[i / 4][i % 4];
|
||||||
|
}
|
||||||
|
ss << " );\n";
|
||||||
|
|
||||||
|
ss << R"(
|
||||||
|
in vec3 v_col0;
|
||||||
|
#if TEXTURED
|
||||||
|
in vec2 v_tex0;
|
||||||
|
flat in ivec4 v_texpage;
|
||||||
|
uniform sampler2D samp0;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
out vec4 o_col0;
|
||||||
|
|
||||||
|
ivec3 ApplyDithering(ivec3 icol)
|
||||||
|
{
|
||||||
|
ivec2 fc = (ivec2(gl_FragCoord.xy) / ivec2(RESOLUTION_SCALE, RESOLUTION_SCALE)) & ivec2(3, 3);
|
||||||
|
int offset = s_dither_values[fc.y * 4 + fc.x];
|
||||||
|
return icol + ivec3(offset, offset, offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
ivec3 TruncateTo15Bit(ivec3 icol)
|
||||||
|
{
|
||||||
|
icol = clamp(icol, ivec3(0, 0, 0), ivec3(255, 255, 255));
|
||||||
|
return (icol & ivec3(~7, ~7, ~7)) | ((icol >> 3) & ivec3(7, 7, 7));
|
||||||
|
}
|
||||||
|
|
||||||
|
#if TEXTURED
|
||||||
|
ivec2 ApplyNativeTextureWindow(ivec2 coords)
|
||||||
|
{
|
||||||
|
uint x = (uint(coords.x) & ~(u_texture_window_mask.x * 8u)) | ((u_texture_window_offset.x & u_texture_window_mask.x) * 8u);
|
||||||
|
uint y = (uint(coords.y) & ~(u_texture_window_mask.y * 8u)) | ((u_texture_window_offset.y & u_texture_window_mask.y) * 8u);
|
||||||
|
return ivec2(int(x), int(y));
|
||||||
|
}
|
||||||
|
|
||||||
|
ivec2 ApplyTextureWindow(ivec2 coords)
|
||||||
|
{
|
||||||
|
if (RESOLUTION_SCALE == 1)
|
||||||
|
return ApplyNativeTextureWindow(coords);
|
||||||
|
|
||||||
|
ivec2 downscaled_coords = coords / ivec2(RESOLUTION_SCALE);
|
||||||
|
ivec2 coords_offset = coords % ivec2(RESOLUTION_SCALE);
|
||||||
|
return (ApplyNativeTextureWindow(downscaled_coords) * ivec2(RESOLUTION_SCALE)) + coords_offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
ivec4 SampleFromVRAM(vec2 coord)
|
||||||
|
{
|
||||||
|
// from 0..1 to 0..255
|
||||||
|
ivec2 icoord = ivec2(coord * vec2(255 * RESOLUTION_SCALE));
|
||||||
|
icoord = ApplyTextureWindow(icoord);
|
||||||
|
|
||||||
|
// adjust for tightly packed palette formats
|
||||||
|
ivec2 index_coord = icoord;
|
||||||
|
#if PALETTE_4_BIT
|
||||||
|
index_coord.x /= 4;
|
||||||
|
#elif PALETTE_8_BIT
|
||||||
|
index_coord.x /= 2;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// fixup coords
|
||||||
|
ivec2 vicoord = ivec2(v_texpage.x + index_coord.x, fixYCoord(v_texpage.y + index_coord.y));
|
||||||
|
|
||||||
|
// load colour/palette
|
||||||
|
vec4 color = texelFetch(samp0, vicoord, 0);
|
||||||
|
|
||||||
|
// apply palette
|
||||||
|
#if PALETTE
|
||||||
|
#if PALETTE_4_BIT
|
||||||
|
int subpixel = int(icoord.x / RESOLUTION_SCALE) & 3;
|
||||||
|
uint vram_value = RGBA8ToRGBA5551(color);
|
||||||
|
int palette_index = int((vram_value >> (subpixel * 4)) & 0x0Fu);
|
||||||
|
#elif PALETTE_8_BIT
|
||||||
|
int subpixel = int(icoord.x / RESOLUTION_SCALE) & 1;
|
||||||
|
uint vram_value = RGBA8ToRGBA5551(color);
|
||||||
|
int palette_index = int((vram_value >> (subpixel * 8)) & 0xFFu);
|
||||||
|
#endif
|
||||||
|
ivec2 palette_icoord = ivec2(v_texpage.z + (palette_index * RESOLUTION_SCALE), fixYCoord(v_texpage.w));
|
||||||
|
color = texelFetch(samp0, palette_icoord, 0);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return ivec4(color * vec4(255.0, 255.0, 255.0, 255.0));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void main()
|
||||||
|
{
|
||||||
|
ivec3 vertcol = ivec3(v_col0 * vec3(255.0, 255.0, 255.0));
|
||||||
|
|
||||||
|
bool semitransparent;
|
||||||
|
bool new_mask_bit;
|
||||||
|
ivec3 icolor;
|
||||||
|
|
||||||
|
#if TEXTURED
|
||||||
|
ivec4 texcol = SampleFromVRAM(v_tex0);
|
||||||
|
if (texcol == ivec4(0.0, 0.0, 0.0, 0.0))
|
||||||
|
discard;
|
||||||
|
|
||||||
|
// Grab semitransparent bit from the texture color.
|
||||||
|
semitransparent = (texcol.a != 0);
|
||||||
|
|
||||||
|
#if RAW_TEXTURE
|
||||||
|
icolor = texcol.rgb;
|
||||||
|
#else
|
||||||
|
icolor = (vertcol * texcol.rgb) >> 7;
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
// All pixels are semitransparent for untextured polygons.
|
||||||
|
semitransparent = true;
|
||||||
|
icolor = vertcol;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Apply dithering
|
||||||
|
#if DITHERING
|
||||||
|
icolor = ApplyDithering(icolor);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Clip to 15-bit range
|
||||||
|
#if !TRUE_COLOR
|
||||||
|
icolor = TruncateTo15Bit(icolor);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Normalize
|
||||||
|
vec3 color = vec3(icolor) / vec3(255.0, 255.0, 255.0);
|
||||||
|
|
||||||
|
#if TRANSPARENCY
|
||||||
|
// Apply semitransparency. If not a semitransparent texel, destination alpha is ignored.
|
||||||
|
if (semitransparent)
|
||||||
|
{
|
||||||
|
#if TRANSPARENCY_ONLY_OPAQUE
|
||||||
|
discard;
|
||||||
|
#endif
|
||||||
|
o_col0 = vec4(color * u_src_alpha_factor, u_dst_alpha_factor);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
#if TRANSPARENCY_ONLY_TRANSPARENCY
|
||||||
|
discard;
|
||||||
|
#endif
|
||||||
|
o_col0 = vec4(color, 0.0);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
o_col0 = vec4(color, 0.0);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
)";
|
||||||
|
|
||||||
|
return ss.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string GPU_HW_ShaderGen::GenerateScreenQuadVertexShader()
|
||||||
|
{
|
||||||
|
std::stringstream ss;
|
||||||
|
GenerateShaderHeader(ss);
|
||||||
|
ss << R"(
|
||||||
|
|
||||||
|
out vec2 v_tex0;
|
||||||
|
|
||||||
|
void main()
|
||||||
|
{
|
||||||
|
v_tex0 = vec2(float((gl_VertexID << 1) & 2), float(gl_VertexID & 2));
|
||||||
|
gl_Position = vec4(v_tex0 * vec2(2.0f, -2.0f) + vec2(-1.0f, 1.0f), 0.0f, 1.0f);
|
||||||
|
gl_Position.y = -gl_Position.y;
|
||||||
|
}
|
||||||
|
)";
|
||||||
|
|
||||||
|
return ss.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string GPU_HW_ShaderGen::GenerateFillFragmentShader()
|
||||||
|
{
|
||||||
|
std::stringstream ss;
|
||||||
|
GenerateShaderHeader(ss);
|
||||||
|
|
||||||
|
ss << R"(
|
||||||
|
uniform vec4 fill_color;
|
||||||
|
out vec4 o_col0;
|
||||||
|
|
||||||
|
void main()
|
||||||
|
{
|
||||||
|
o_col0 = fill_color;
|
||||||
|
}
|
||||||
|
)";
|
||||||
|
|
||||||
|
return ss.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string GPU_HW_ShaderGen::GenerateDisplayFragmentShader(bool depth_24bit, bool interlaced)
|
||||||
|
{
|
||||||
|
std::stringstream ss;
|
||||||
|
GenerateShaderHeader(ss);
|
||||||
|
DefineMacro(ss, "DEPTH_24BIT", depth_24bit);
|
||||||
|
DefineMacro(ss, "INTERLACED", interlaced);
|
||||||
|
|
||||||
|
ss << R"(
|
||||||
|
in vec2 v_tex0;
|
||||||
|
out vec4 o_col0;
|
||||||
|
|
||||||
|
uniform sampler2D samp0;
|
||||||
|
uniform ivec3 u_base_coords;
|
||||||
|
|
||||||
|
ivec2 GetCoords(vec2 fragcoord)
|
||||||
|
{
|
||||||
|
ivec2 icoords = ivec2(fragcoord);
|
||||||
|
#if INTERLACED
|
||||||
|
if ((((icoords.y - u_base_coords.z) / RESOLUTION_SCALE) & 1) != 0)
|
||||||
|
discard;
|
||||||
|
#endif
|
||||||
|
return icoords;
|
||||||
|
}
|
||||||
|
|
||||||
|
void main()
|
||||||
|
{
|
||||||
|
ivec2 icoords = GetCoords(gl_FragCoord.xy);
|
||||||
|
|
||||||
|
#if DEPTH_24BIT
|
||||||
|
// compute offset in dwords from the start of the 24-bit values
|
||||||
|
ivec2 base = ivec2(u_base_coords.x, u_base_coords.y + icoords.y);
|
||||||
|
int xoff = int(icoords.x);
|
||||||
|
int dword_index = (xoff / 2) + (xoff / 4);
|
||||||
|
|
||||||
|
// sample two adjacent dwords, or four 16-bit values as the 24-bit value will lie somewhere between these
|
||||||
|
uint s0 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + dword_index * 2 + 0, base.y), 0));
|
||||||
|
uint s1 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + dword_index * 2 + 1, base.y), 0));
|
||||||
|
uint s2 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + (dword_index + 1) * 2 + 0, base.y), 0));
|
||||||
|
uint s3 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + (dword_index + 1) * 2 + 1, base.y), 0));
|
||||||
|
|
||||||
|
// select the bit for this pixel depending on its offset in the 4-pixel block
|
||||||
|
uint r, g, b;
|
||||||
|
int block_offset = xoff & 3;
|
||||||
|
if (block_offset == 0)
|
||||||
|
{
|
||||||
|
r = s0 & 0xFFu;
|
||||||
|
g = s0 >> 8;
|
||||||
|
b = s1 & 0xFFu;
|
||||||
|
}
|
||||||
|
else if (block_offset == 1)
|
||||||
|
{
|
||||||
|
r = s1 >> 8;
|
||||||
|
g = s2 & 0xFFu;
|
||||||
|
b = s2 >> 8;
|
||||||
|
}
|
||||||
|
else if (block_offset == 2)
|
||||||
|
{
|
||||||
|
r = s1 & 0xFFu;
|
||||||
|
g = s1 >> 8;
|
||||||
|
b = s2 & 0xFFu;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
r = s2 >> 8;
|
||||||
|
g = s3 & 0xFFu;
|
||||||
|
b = s3 >> 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
// and normalize
|
||||||
|
o_col0 = vec4(float(r) / 255.0, float(g) / 255.0, float(b) / 255.0, 1.0);
|
||||||
|
#else
|
||||||
|
// load and return
|
||||||
|
o_col0 = texelFetch(samp0, u_base_coords.xy + icoords, 0);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
)";
|
||||||
|
|
||||||
|
return ss.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader()
|
||||||
|
{
|
||||||
|
std::stringstream ss;
|
||||||
|
GenerateShaderHeader(ss);
|
||||||
|
|
||||||
|
ss << R"(
|
||||||
|
|
||||||
|
uniform ivec2 u_base_coords;
|
||||||
|
uniform ivec2 u_size;
|
||||||
|
uniform usamplerBuffer samp0;
|
||||||
|
|
||||||
|
out vec4 o_col0;
|
||||||
|
|
||||||
|
void main()
|
||||||
|
{
|
||||||
|
ivec2 coords = ivec2(gl_FragCoord.xy) / ivec2(RESOLUTION_SCALE, RESOLUTION_SCALE);
|
||||||
|
ivec2 offset = coords - u_base_coords;
|
||||||
|
offset.y = u_size.y - offset.y - 1;
|
||||||
|
|
||||||
|
int buffer_offset = offset.y * u_size.x + offset.x;
|
||||||
|
uint value = texelFetch(samp0, buffer_offset).r;
|
||||||
|
|
||||||
|
o_col0 = RGBA5551ToRGBA8(value);
|
||||||
|
})";
|
||||||
|
|
||||||
|
return ss.str();
|
||||||
|
}
|
34
src/core/gpu_hw_shadergen.h
Normal file
34
src/core/gpu_hw_shadergen.h
Normal file
|
@ -0,0 +1,34 @@
|
||||||
|
#pragma once
|
||||||
|
#include <sstream>
|
||||||
|
#include <string>
|
||||||
|
#include "gpu_hw.h"
|
||||||
|
|
||||||
|
class GPU_HW_ShaderGen
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
enum class Backend
|
||||||
|
{
|
||||||
|
OpenGL
|
||||||
|
};
|
||||||
|
|
||||||
|
public:
|
||||||
|
GPU_HW_ShaderGen(Backend backend, u32 resolution_scale, bool true_color);
|
||||||
|
~GPU_HW_ShaderGen();
|
||||||
|
|
||||||
|
void Init(Backend backend, u32 resolution_scale, bool true_color);
|
||||||
|
|
||||||
|
std::string GenerateBatchVertexShader(bool textured);
|
||||||
|
std::string GenerateBatchFragmentShader(GPU_HW::BatchRenderMode transparency, GPU::TextureMode texture_mode, bool dithering);
|
||||||
|
std::string GenerateScreenQuadVertexShader();
|
||||||
|
std::string GenerateFillFragmentShader();
|
||||||
|
std::string GenerateDisplayFragmentShader(bool depth_24bit, bool interlaced);
|
||||||
|
std::string GenerateVRAMWriteFragmentShader();
|
||||||
|
|
||||||
|
Backend m_backend;
|
||||||
|
u32 m_resolution_scale;
|
||||||
|
bool m_true_color;
|
||||||
|
|
||||||
|
private:
|
||||||
|
void GenerateShaderHeader(std::stringstream& ss);
|
||||||
|
void GenerateBatchUniformBuffer(std::stringstream& ss);
|
||||||
|
};
|
Loading…
Reference in a new issue