mirror of
https://github.com/RetroDECK/Duckstation.git
synced 2024-11-25 23:25:41 +00:00
GPU: Improve batching by sampling a VRAM copy
This commit is contained in:
parent
332b5481e8
commit
792ec27b1a
|
@ -52,8 +52,8 @@ bool GPU::DoState(StateWrapper& sw)
|
||||||
|
|
||||||
sw.Do(&m_GPUSTAT.bits);
|
sw.Do(&m_GPUSTAT.bits);
|
||||||
|
|
||||||
sw.Do(&m_render_state.texture_base_x);
|
sw.Do(&m_render_state.texture_page_x);
|
||||||
sw.Do(&m_render_state.texture_base_y);
|
sw.Do(&m_render_state.texture_page_y);
|
||||||
sw.Do(&m_render_state.texture_palette_x);
|
sw.Do(&m_render_state.texture_palette_x);
|
||||||
sw.Do(&m_render_state.texture_palette_y);
|
sw.Do(&m_render_state.texture_palette_y);
|
||||||
sw.Do(&m_render_state.texture_color_mode);
|
sw.Do(&m_render_state.texture_color_mode);
|
||||||
|
@ -69,10 +69,10 @@ bool GPU::DoState(StateWrapper& sw)
|
||||||
sw.Do(&m_render_state.texture_changed);
|
sw.Do(&m_render_state.texture_changed);
|
||||||
sw.Do(&m_render_state.transparency_mode_changed);
|
sw.Do(&m_render_state.transparency_mode_changed);
|
||||||
|
|
||||||
sw.Do(&m_drawing_area.top_left_x);
|
sw.Do(&m_drawing_area.left);
|
||||||
sw.Do(&m_drawing_area.top_left_y);
|
sw.Do(&m_drawing_area.top);
|
||||||
sw.Do(&m_drawing_area.bottom_right_x);
|
sw.Do(&m_drawing_area.right);
|
||||||
sw.Do(&m_drawing_area.bottom_right_y);
|
sw.Do(&m_drawing_area.bottom);
|
||||||
sw.Do(&m_drawing_offset.x);
|
sw.Do(&m_drawing_offset.x);
|
||||||
sw.Do(&m_drawing_offset.y);
|
sw.Do(&m_drawing_offset.y);
|
||||||
sw.Do(&m_drawing_offset.x);
|
sw.Do(&m_drawing_offset.x);
|
||||||
|
@ -429,18 +429,18 @@ void GPU::WriteGP0(u32 value)
|
||||||
|
|
||||||
case 0xE3: // Set drawing area top left
|
case 0xE3: // Set drawing area top left
|
||||||
{
|
{
|
||||||
m_drawing_area.top_left_x = param & UINT32_C(0x3FF);
|
m_drawing_area.left = param & UINT32_C(0x3FF);
|
||||||
m_drawing_area.top_left_y = (param >> 10) & UINT32_C(0x1FF);
|
m_drawing_area.top = (param >> 10) & UINT32_C(0x1FF);
|
||||||
Log_DebugPrintf("Set drawing area top-left: (%u, %u)", m_drawing_area.top_left_x, m_drawing_area.top_left_y);
|
Log_DebugPrintf("Set drawing area top-left: (%u, %u)", m_drawing_area.left, m_drawing_area.top);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0xE4: // Set drawing area bottom right
|
case 0xE4: // Set drawing area bottom right
|
||||||
{
|
{
|
||||||
m_drawing_area.bottom_right_x = param & UINT32_C(0x3FF);
|
m_drawing_area.right = param & UINT32_C(0x3FF);
|
||||||
m_drawing_area.bottom_right_y = (param >> 10) & UINT32_C(0x1FF);
|
m_drawing_area.bottom = (param >> 10) & UINT32_C(0x1FF);
|
||||||
Log_DebugPrintf("Set drawing area bottom-right: (%u, %u)", m_drawing_area.bottom_right_x,
|
Log_DebugPrintf("Set drawing area bottom-right: (%u, %u)", m_drawing_area.right,
|
||||||
m_drawing_area.bottom_right_y);
|
m_drawing_area.bottom);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -785,8 +785,8 @@ void GPU::RenderState::SetFromPageAttribute(u16 value)
|
||||||
if (texpage_attribute == value)
|
if (texpage_attribute == value)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
texture_base_x = static_cast<s32>(ZeroExtend32(value & UINT16_C(0x0F)) * UINT32_C(64));
|
texture_page_x = static_cast<s32>(ZeroExtend32(value & UINT16_C(0x0F)) * UINT32_C(64));
|
||||||
texture_base_y = static_cast<s32>(ZeroExtend32((value >> 4) & UINT16_C(1)) * UINT32_C(256));
|
texture_page_y = static_cast<s32>(ZeroExtend32((value >> 4) & UINT16_C(1)) * UINT32_C(256));
|
||||||
texture_color_mode = (static_cast<TextureColorMode>((value >> 7) & UINT16_C(0x03)));
|
texture_color_mode = (static_cast<TextureColorMode>((value >> 7) & UINT16_C(0x03)));
|
||||||
if (texture_color_mode == TextureColorMode::Reserved_Direct16Bit)
|
if (texture_color_mode == TextureColorMode::Reserved_Direct16Bit)
|
||||||
texture_color_mode = TextureColorMode::Direct16Bit;
|
texture_color_mode = TextureColorMode::Direct16Bit;
|
||||||
|
|
|
@ -228,8 +228,8 @@ protected:
|
||||||
static constexpr u16 PALETTE_ATTRIBUTE_MASK = UINT16_C(0b0111111111111111);
|
static constexpr u16 PALETTE_ATTRIBUTE_MASK = UINT16_C(0b0111111111111111);
|
||||||
|
|
||||||
// decoded values
|
// decoded values
|
||||||
s32 texture_base_x;
|
s32 texture_page_x;
|
||||||
s32 texture_base_y;
|
s32 texture_page_y;
|
||||||
s32 texture_palette_x;
|
s32 texture_palette_x;
|
||||||
s32 texture_palette_y;
|
s32 texture_palette_y;
|
||||||
TextureColorMode texture_color_mode;
|
TextureColorMode texture_color_mode;
|
||||||
|
@ -265,8 +265,8 @@ protected:
|
||||||
|
|
||||||
struct DrawingArea
|
struct DrawingArea
|
||||||
{
|
{
|
||||||
u32 top_left_x, top_left_y;
|
u32 left, top;
|
||||||
u32 bottom_right_x, bottom_right_y;
|
u32 right, bottom;
|
||||||
} m_drawing_area = {};
|
} m_drawing_area = {};
|
||||||
|
|
||||||
struct DrawingOffset
|
struct DrawingOffset
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
#include "gpu_hw.h"
|
#include "gpu_hw.h"
|
||||||
#include "YBaseLib/Assert.h"
|
#include "YBaseLib/Assert.h"
|
||||||
|
#include "YBaseLib/Log.h"
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
|
Log_SetChannel(GPU_HW);
|
||||||
|
|
||||||
GPU_HW::GPU_HW() = default;
|
GPU_HW::GPU_HW() = default;
|
||||||
|
|
||||||
|
@ -110,10 +112,10 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices)
|
||||||
|
|
||||||
void GPU_HW::CalcScissorRect(int* left, int* top, int* right, int* bottom)
|
void GPU_HW::CalcScissorRect(int* left, int* top, int* right, int* bottom)
|
||||||
{
|
{
|
||||||
*left = m_drawing_area.top_left_x;
|
*left = m_drawing_area.left;
|
||||||
*right = m_drawing_area.bottom_right_x + 1;
|
*right = m_drawing_area.right + 1;
|
||||||
*top = m_drawing_area.top_left_y;
|
*top = m_drawing_area.top;
|
||||||
*bottom = m_drawing_area.bottom_right_y + 1;
|
*bottom = m_drawing_area.bottom + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void DefineMacro(std::stringstream& ss, const char* name, bool enabled)
|
static void DefineMacro(std::stringstream& ss, const char* name, bool enabled)
|
||||||
|
@ -176,7 +178,6 @@ in vec2 a_tex0;
|
||||||
|
|
||||||
out vec4 v_col0;
|
out vec4 v_col0;
|
||||||
#if TEXTURED
|
#if TEXTURED
|
||||||
uniform vec2 u_tex_scale;
|
|
||||||
out vec2 v_tex0;
|
out vec2 v_tex0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -199,26 +200,75 @@ void main()
|
||||||
return ss.str();
|
return ss.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string GPU_HW::GenerateFragmentShader(bool textured, bool blending)
|
std::string GPU_HW::GenerateFragmentShader(bool textured, bool blending, TextureColorMode texture_color_mode)
|
||||||
{
|
{
|
||||||
std::stringstream ss;
|
std::stringstream ss;
|
||||||
GenerateShaderHeader(ss);
|
GenerateShaderHeader(ss);
|
||||||
DefineMacro(ss, "TEXTURED", textured);
|
DefineMacro(ss, "TEXTURED", textured);
|
||||||
DefineMacro(ss, "BLENDING", blending);
|
DefineMacro(ss, "BLENDING", blending);
|
||||||
|
DefineMacro(ss, "PALETTE",
|
||||||
|
textured && (texture_color_mode == GPU::TextureColorMode::Palette4Bit ||
|
||||||
|
texture_color_mode == GPU::TextureColorMode::Palette8Bit));
|
||||||
|
DefineMacro(ss, "PALETTE_4_BIT", textured && texture_color_mode == GPU::TextureColorMode::Palette4Bit);
|
||||||
|
DefineMacro(ss, "PALETTE_8_BIT", textured && texture_color_mode == GPU::TextureColorMode::Palette8Bit);
|
||||||
|
|
||||||
ss << R"(
|
ss << R"(
|
||||||
in vec4 v_col0;
|
in vec4 v_col0;
|
||||||
#if TEXTURED
|
#if TEXTURED
|
||||||
in vec2 v_tex0;
|
in vec2 v_tex0;
|
||||||
uniform sampler2D samp0;
|
uniform sampler2D samp0;
|
||||||
|
uniform ivec2 u_texture_page_base;
|
||||||
|
#if PALETTE
|
||||||
|
uniform ivec2 u_texture_palette_base;
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
out vec4 o_col0;
|
out vec4 o_col0;
|
||||||
|
|
||||||
|
#if TEXTURED
|
||||||
|
vec4 SampleFromVRAM(vec2 coord)
|
||||||
|
{
|
||||||
|
// from 0..1 to 0..255
|
||||||
|
ivec2 icoord = ivec2(coord * vec2(255.0));
|
||||||
|
|
||||||
|
// adjust for tightly packed palette formats
|
||||||
|
ivec2 index_coord = icoord;
|
||||||
|
#if PALETTE_4_BIT
|
||||||
|
index_coord.x /= 4;
|
||||||
|
#elif PALETTE_8_BIT
|
||||||
|
index_coord.x /= 2;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// fixup coords
|
||||||
|
ivec2 vicoord = ivec2(u_texture_page_base.x + index_coord.x,
|
||||||
|
fixYCoord(u_texture_page_base.y + index_coord.y));
|
||||||
|
|
||||||
|
// load colour/palette
|
||||||
|
vec4 color = texelFetch(samp0, vicoord & VRAM_COORD_MASK, 0);
|
||||||
|
|
||||||
|
// apply palette
|
||||||
|
#if PALETTE
|
||||||
|
#if PALETTE_4_BIT
|
||||||
|
int subpixel = int(icoord.x) & 3;
|
||||||
|
uint vram_value = RGBA8ToRGBA5551(color);
|
||||||
|
int palette_index = int((vram_value >> (subpixel * 4)) & 0x0Fu);
|
||||||
|
#elif PALETTE_8_BIT
|
||||||
|
int subpixel = int(icoord.x) & 1;
|
||||||
|
uint vram_value = RGBA8ToRGBA5551(color);
|
||||||
|
int palette_index = int((vram_value >> (subpixel * 8)) & 0xFFu);
|
||||||
|
#endif
|
||||||
|
ivec2 palette_icoord = ivec2(u_texture_palette_base.x + palette_index, fixYCoord(u_texture_palette_base.y));
|
||||||
|
color = texelFetch(samp0, palette_icoord & VRAM_COORD_MASK, 0);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return color;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
void main()
|
void main()
|
||||||
{
|
{
|
||||||
#if TEXTURED
|
#if TEXTURED
|
||||||
vec4 texcol = texture(samp0, v_tex0);
|
vec4 texcol = SampleFromVRAM(v_tex0);
|
||||||
if (texcol == vec4(0.0, 0.0, 0.0, 0.0))
|
if (texcol == vec4(0.0, 0.0, 0.0, 0.0))
|
||||||
discard;
|
discard;
|
||||||
|
|
||||||
|
@ -255,64 +305,6 @@ void main()
|
||||||
return ss.str();
|
return ss.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string GPU_HW::GenerateTexturePageFragmentShader(TextureColorMode mode)
|
|
||||||
{
|
|
||||||
const bool is_palette = (mode == GPU::TextureColorMode::Palette4Bit || mode == GPU::TextureColorMode::Palette8Bit);
|
|
||||||
|
|
||||||
std::stringstream ss;
|
|
||||||
GenerateShaderHeader(ss);
|
|
||||||
DefineMacro(ss, "PALETTE", is_palette);
|
|
||||||
DefineMacro(ss, "PALETTE_4_BIT", mode == GPU::TextureColorMode::Palette4Bit);
|
|
||||||
DefineMacro(ss, "PALETTE_8_BIT", mode == GPU::TextureColorMode::Palette8Bit);
|
|
||||||
|
|
||||||
ss << R"(
|
|
||||||
uniform sampler2D samp0;
|
|
||||||
uniform ivec2 base_offset;
|
|
||||||
|
|
||||||
#if PALETTE
|
|
||||||
uniform ivec2 palette_offset;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
in vec2 v_tex0;
|
|
||||||
out vec4 o_col0;
|
|
||||||
|
|
||||||
void main()
|
|
||||||
{
|
|
||||||
ivec2 local_coords = ivec2(gl_FragCoord.xy);
|
|
||||||
#if PALETTE_4_BIT
|
|
||||||
local_coords.x /= 4;
|
|
||||||
#elif PALETTE_8_BIT
|
|
||||||
local_coords.x /= 2;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// fixup coords
|
|
||||||
ivec2 coords = ivec2(base_offset.x + local_coords.x, fixYCoord(base_offset.y + local_coords.y));
|
|
||||||
|
|
||||||
// load colour/palette
|
|
||||||
vec4 color = texelFetch(samp0, coords & VRAM_COORD_MASK, 0);
|
|
||||||
|
|
||||||
// apply palette
|
|
||||||
#if PALETTE
|
|
||||||
#if PALETTE_4_BIT
|
|
||||||
int subpixel = int(gl_FragCoord.x) & 3;
|
|
||||||
uint vram_value = RGBA8ToRGBA5551(color);
|
|
||||||
int palette_index = int((vram_value >> (subpixel * 4)) & 0x0Fu);
|
|
||||||
#elif PALETTE_8_BIT
|
|
||||||
int subpixel = int(gl_FragCoord.x) & 1;
|
|
||||||
uint vram_value = RGBA8ToRGBA5551(color);
|
|
||||||
int palette_index = int((vram_value >> (subpixel * 8)) & 0xFFu);
|
|
||||||
#endif
|
|
||||||
ivec2 palette_coords = ivec2(palette_offset.x + palette_index, fixYCoord(palette_offset.y));
|
|
||||||
color = texelFetch(samp0, palette_coords & VRAM_COORD_MASK, 0);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
o_col0 = color;
|
|
||||||
}
|
|
||||||
)";
|
|
||||||
|
|
||||||
return ss.str();
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string GPU_HW::GenerateFillFragmentShader()
|
std::string GPU_HW::GenerateFillFragmentShader()
|
||||||
{
|
{
|
||||||
std::stringstream ss;
|
std::stringstream ss;
|
||||||
|
@ -331,8 +323,6 @@ void main()
|
||||||
return ss.str();
|
return ss.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPU_HW::UpdateTexturePageTexture() {}
|
|
||||||
|
|
||||||
GPU_HW::HWRenderBatch::Primitive GPU_HW::GetPrimitiveForCommand(RenderCommand rc)
|
GPU_HW::HWRenderBatch::Primitive GPU_HW::GetPrimitiveForCommand(RenderCommand rc)
|
||||||
{
|
{
|
||||||
if (rc.primitive == Primitive::Line)
|
if (rc.primitive == Primitive::Line)
|
||||||
|
@ -343,6 +333,8 @@ GPU_HW::HWRenderBatch::Primitive GPU_HW::GetPrimitiveForCommand(RenderCommand rc
|
||||||
return HWRenderBatch::Primitive::Triangles;
|
return HWRenderBatch::Primitive::Triangles;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void GPU_HW::InvalidateVRAMReadCache() {}
|
||||||
|
|
||||||
void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices)
|
void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices)
|
||||||
{
|
{
|
||||||
if (rc.texture_enable)
|
if (rc.texture_enable)
|
||||||
|
@ -375,20 +367,46 @@ void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices)
|
||||||
if (m_render_state.IsTextureChanged())
|
if (m_render_state.IsTextureChanged())
|
||||||
{
|
{
|
||||||
if (!IsFlushed())
|
if (!IsFlushed())
|
||||||
|
{
|
||||||
|
// we only need to update the copy texture if the render area intersects with the texture page
|
||||||
|
const u32 texture_page_left = m_render_state.texture_page_x;
|
||||||
|
const u32 texture_page_right = m_render_state.texture_page_y + TEXTURE_PAGE_WIDTH;
|
||||||
|
const u32 texture_page_top = m_render_state.texture_page_y;
|
||||||
|
const u32 texture_page_bottom = texture_page_top + TEXTURE_PAGE_HEIGHT;
|
||||||
|
const bool texture_page_overlaps =
|
||||||
|
(texture_page_left < m_drawing_area.right && texture_page_right > m_drawing_area.left &&
|
||||||
|
texture_page_top > m_drawing_area.bottom && texture_page_bottom < m_drawing_area.top);
|
||||||
|
|
||||||
|
// TODO: Check palette too.
|
||||||
|
if (texture_page_overlaps)
|
||||||
|
{
|
||||||
|
Log_DebugPrintf("Invalidating VRAM read cache due to drawing area overlap");
|
||||||
|
InvalidateVRAMReadCache();
|
||||||
|
}
|
||||||
|
|
||||||
|
// texture page changed?
|
||||||
|
// TODO: Move this to the shader...
|
||||||
FlushRender();
|
FlushRender();
|
||||||
UpdateTexturePageTexture();
|
}
|
||||||
|
|
||||||
m_render_state.ClearTextureChangedFlag();
|
m_render_state.ClearTextureChangedFlag();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m_batch.transparency_enable && m_render_state.IsTransparencyModeChanged() && !IsFlushed())
|
if (m_batch.transparency_enable && m_render_state.IsTransparencyModeChanged() && !IsFlushed())
|
||||||
FlushRender();
|
FlushRender();
|
||||||
|
|
||||||
m_batch.transparency_mode = m_render_state.transparency_mode;
|
|
||||||
m_render_state.ClearTransparencyModeChangedFlag();
|
m_render_state.ClearTransparencyModeChangedFlag();
|
||||||
|
|
||||||
|
m_batch.texture_color_mode = m_render_state.texture_color_mode;
|
||||||
|
m_batch.texture_page_x = m_render_state.texture_page_x;
|
||||||
|
m_batch.texture_page_y = m_render_state.texture_page_y;
|
||||||
|
m_batch.texture_palette_x = m_render_state.texture_palette_x;
|
||||||
|
m_batch.texture_palette_y = m_render_state.texture_palette_y;
|
||||||
|
m_batch.transparency_mode = m_render_state.transparency_mode;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// extract state
|
// extract state
|
||||||
|
const bool rc_transparency_enable = rc.transparency_enable;
|
||||||
const bool rc_texture_enable = rc.texture_enable;
|
const bool rc_texture_enable = rc.texture_enable;
|
||||||
const bool rc_texture_blend_enable = !rc.texture_blend_disable;
|
const bool rc_texture_blend_enable = !rc.texture_blend_disable;
|
||||||
const HWRenderBatch::Primitive rc_primitive = GetPrimitiveForCommand(rc);
|
const HWRenderBatch::Primitive rc_primitive = GetPrimitiveForCommand(rc);
|
||||||
|
@ -399,14 +417,15 @@ void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices)
|
||||||
// including the degenerate triangles for strips
|
// including the degenerate triangles for strips
|
||||||
const u32 max_added_vertices = num_vertices + 2;
|
const u32 max_added_vertices = num_vertices + 2;
|
||||||
const bool params_changed =
|
const bool params_changed =
|
||||||
(m_batch.texture_enable != rc_texture_enable || m_batch.texture_blending_enable != rc_texture_blend_enable ||
|
(m_batch.transparency_enable != rc_transparency_enable || m_batch.texture_enable != rc_texture_enable ||
|
||||||
m_batch.primitive != rc_primitive);
|
m_batch.texture_blending_enable != rc_texture_blend_enable || m_batch.primitive != rc_primitive);
|
||||||
if ((m_batch.vertices.size() + max_added_vertices) >= MAX_BATCH_VERTEX_COUNT || params_changed)
|
if ((m_batch.vertices.size() + max_added_vertices) >= MAX_BATCH_VERTEX_COUNT || params_changed)
|
||||||
FlushRender();
|
FlushRender();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
m_batch.primitive = rc_primitive;
|
||||||
|
m_batch.transparency_enable = rc_transparency_enable;
|
||||||
m_batch.texture_enable = rc_texture_enable;
|
m_batch.texture_enable = rc_texture_enable;
|
||||||
m_batch.texture_blending_enable = rc_texture_blend_enable;
|
m_batch.texture_blending_enable = rc_texture_blend_enable;
|
||||||
m_batch.primitive = rc_primitive;
|
|
||||||
LoadVertices(rc, num_vertices);
|
LoadVertices(rc, num_vertices);
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,6 +39,11 @@ protected:
|
||||||
bool transparency_enable;
|
bool transparency_enable;
|
||||||
bool texture_enable;
|
bool texture_enable;
|
||||||
bool texture_blending_enable;
|
bool texture_blending_enable;
|
||||||
|
TextureColorMode texture_color_mode;
|
||||||
|
u32 texture_page_x;
|
||||||
|
u32 texture_page_y;
|
||||||
|
u32 texture_palette_x;
|
||||||
|
u32 texture_palette_y;
|
||||||
TransparencyMode transparency_mode;
|
TransparencyMode transparency_mode;
|
||||||
|
|
||||||
std::vector<HWVertex> vertices;
|
std::vector<HWVertex> vertices;
|
||||||
|
@ -46,6 +51,10 @@ protected:
|
||||||
|
|
||||||
static constexpr u32 VERTEX_BUFFER_SIZE = 1 * 1024 * 1024;
|
static constexpr u32 VERTEX_BUFFER_SIZE = 1 * 1024 * 1024;
|
||||||
static constexpr u32 MAX_BATCH_VERTEX_COUNT = VERTEX_BUFFER_SIZE / sizeof(HWVertex);
|
static constexpr u32 MAX_BATCH_VERTEX_COUNT = VERTEX_BUFFER_SIZE / sizeof(HWVertex);
|
||||||
|
static constexpr u32 TEXTURE_TILE_SIZE = 256;
|
||||||
|
static constexpr u32 TEXTURE_TILE_X_COUNT = VRAM_WIDTH / TEXTURE_TILE_SIZE;
|
||||||
|
static constexpr u32 TEXTURE_TILE_Y_COUNT = VRAM_HEIGHT / TEXTURE_TILE_SIZE;
|
||||||
|
static constexpr u32 TEXTURE_TILE_COUNT = TEXTURE_TILE_X_COUNT * TEXTURE_TILE_Y_COUNT;
|
||||||
|
|
||||||
static constexpr std::tuple<float, float, float, float> RGBA8ToFloat(u32 rgba)
|
static constexpr std::tuple<float, float, float, float> RGBA8ToFloat(u32 rgba)
|
||||||
{
|
{
|
||||||
|
@ -55,7 +64,7 @@ protected:
|
||||||
static_cast<float>(rgba >> 24) * (1.0f / 255.0f));
|
static_cast<float>(rgba >> 24) * (1.0f / 255.0f));
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void UpdateTexturePageTexture();
|
virtual void InvalidateVRAMReadCache();
|
||||||
|
|
||||||
bool IsFlushed() const { return m_batch.vertices.empty(); }
|
bool IsFlushed() const { return m_batch.vertices.empty(); }
|
||||||
|
|
||||||
|
@ -64,9 +73,8 @@ protected:
|
||||||
void CalcScissorRect(int* left, int* top, int* right, int* bottom);
|
void CalcScissorRect(int* left, int* top, int* right, int* bottom);
|
||||||
|
|
||||||
std::string GenerateVertexShader(bool textured);
|
std::string GenerateVertexShader(bool textured);
|
||||||
std::string GenerateFragmentShader(bool textured, bool blending);
|
std::string GenerateFragmentShader(bool textured, bool blending, TextureColorMode texture_color_mode);
|
||||||
std::string GenerateScreenQuadVertexShader();
|
std::string GenerateScreenQuadVertexShader();
|
||||||
std::string GenerateTexturePageFragmentShader(TextureColorMode mode);
|
|
||||||
std::string GenerateFillFragmentShader();
|
std::string GenerateFillFragmentShader();
|
||||||
|
|
||||||
HWRenderBatch m_batch = {};
|
HWRenderBatch m_batch = {};
|
||||||
|
|
|
@ -43,7 +43,7 @@ void GPU_HW_OpenGL::RenderUI()
|
||||||
|
|
||||||
ImGui::TextUnformatted("Texture Page Updates:");
|
ImGui::TextUnformatted("Texture Page Updates:");
|
||||||
ImGui::NextColumn();
|
ImGui::NextColumn();
|
||||||
ImGui::Text("%u", m_stats.num_texture_page_updates);
|
ImGui::Text("%u", m_stats.num_vram_read_texture_updates);
|
||||||
ImGui::NextColumn();
|
ImGui::NextColumn();
|
||||||
|
|
||||||
ImGui::TextUnformatted("Batches Drawn:");
|
ImGui::TextUnformatted("Batches Drawn:");
|
||||||
|
@ -64,6 +64,11 @@ void GPU_HW_OpenGL::RenderUI()
|
||||||
m_stats = {};
|
m_stats = {};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void GPU_HW_OpenGL::InvalidateVRAMReadCache()
|
||||||
|
{
|
||||||
|
m_vram_read_texture_dirty = true;
|
||||||
|
}
|
||||||
|
|
||||||
std::tuple<s32, s32> GPU_HW_OpenGL::ConvertToFramebufferCoordinates(s32 x, s32 y)
|
std::tuple<s32, s32> GPU_HW_OpenGL::ConvertToFramebufferCoordinates(s32 x, s32 y)
|
||||||
{
|
{
|
||||||
return std::make_tuple(x, static_cast<s32>(static_cast<s32>(VRAM_HEIGHT) - y));
|
return std::make_tuple(x, static_cast<s32>(static_cast<s32>(VRAM_HEIGHT) - y));
|
||||||
|
@ -79,11 +84,11 @@ void GPU_HW_OpenGL::CreateFramebuffer()
|
||||||
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_framebuffer_texture->GetGLId(), 0);
|
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_framebuffer_texture->GetGLId(), 0);
|
||||||
Assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
|
Assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
|
||||||
|
|
||||||
m_texture_page_texture =
|
m_vram_read_texture =
|
||||||
std::make_unique<GL::Texture>(TEXTURE_PAGE_WIDTH, TEXTURE_PAGE_HEIGHT, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false);
|
std::make_unique<GL::Texture>(VRAM_WIDTH, VRAM_HEIGHT, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false);
|
||||||
glGenFramebuffers(1, &m_texture_page_fbo_id);
|
glGenFramebuffers(1, &m_vram_read_fbo_id);
|
||||||
glBindFramebuffer(GL_FRAMEBUFFER, m_texture_page_fbo_id);
|
glBindFramebuffer(GL_FRAMEBUFFER, m_vram_read_fbo_id);
|
||||||
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_texture_page_texture->GetGLId(), 0);
|
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_vram_read_texture->GetGLId(), 0);
|
||||||
Assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
|
Assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -100,9 +105,9 @@ void GPU_HW_OpenGL::ClearFramebuffer()
|
||||||
|
|
||||||
void GPU_HW_OpenGL::DestroyFramebuffer()
|
void GPU_HW_OpenGL::DestroyFramebuffer()
|
||||||
{
|
{
|
||||||
glDeleteFramebuffers(1, &m_texture_page_fbo_id);
|
glDeleteFramebuffers(1, &m_vram_read_fbo_id);
|
||||||
m_texture_page_fbo_id = 0;
|
m_vram_read_fbo_id = 0;
|
||||||
m_texture_page_texture.reset();
|
m_vram_read_texture.reset();
|
||||||
|
|
||||||
glDeleteFramebuffers(1, &m_framebuffer_fbo_id);
|
glDeleteFramebuffers(1, &m_framebuffer_fbo_id);
|
||||||
m_framebuffer_fbo_id = 0;
|
m_framebuffer_fbo_id = 0;
|
||||||
|
@ -132,41 +137,29 @@ void GPU_HW_OpenGL::CreateVertexBuffer()
|
||||||
|
|
||||||
bool GPU_HW_OpenGL::CompilePrograms()
|
bool GPU_HW_OpenGL::CompilePrograms()
|
||||||
{
|
{
|
||||||
bool result = true;
|
for (u32 textured = 0; textured < 2; textured++)
|
||||||
result &= CompileProgram(m_color_program, false, false);
|
{
|
||||||
result &= CompileProgram(m_texture_program, true, false);
|
for (u32 blending = 0; blending < 2; blending++)
|
||||||
result &= CompileProgram(m_blended_texture_program, true, true);
|
{
|
||||||
if (!result)
|
for (u32 format = 0; format < 3; format++)
|
||||||
return false;
|
{
|
||||||
|
// TODO: eliminate duplicate shaders here
|
||||||
const std::string screen_quad_vs = GenerateScreenQuadVertexShader();
|
if (!CompileProgram(m_render_programs[textured][blending][format], ConvertToBoolUnchecked(textured),
|
||||||
for (u32 palette_size = 0; palette_size < static_cast<u32>(m_texture_page_programs.size()); palette_size++)
|
ConvertToBoolUnchecked(blending), static_cast<TextureColorMode>(format)))
|
||||||
{
|
{
|
||||||
const std::string fs = GenerateTexturePageFragmentShader(static_cast<TextureColorMode>(palette_size));
|
|
||||||
|
|
||||||
GL::Program& prog = m_texture_page_programs[palette_size];
|
|
||||||
if (!prog.Compile(screen_quad_vs.c_str(), fs.c_str()))
|
|
||||||
return false;
|
return false;
|
||||||
|
}
|
||||||
prog.BindFragData(0, "o_col0");
|
}
|
||||||
|
}
|
||||||
if (!prog.Link())
|
|
||||||
return false;
|
|
||||||
|
|
||||||
prog.RegisterUniform("samp0");
|
|
||||||
prog.RegisterUniform("base_offset");
|
|
||||||
prog.RegisterUniform("palette_offset");
|
|
||||||
prog.Bind();
|
|
||||||
prog.Uniform1i(0, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool GPU_HW_OpenGL::CompileProgram(GL::Program& prog, bool textured, bool blending)
|
bool GPU_HW_OpenGL::CompileProgram(GL::Program& prog, bool textured, bool blending, TextureColorMode texture_color_mode)
|
||||||
{
|
{
|
||||||
const std::string vs = GenerateVertexShader(textured);
|
const std::string vs = GenerateVertexShader(textured);
|
||||||
const std::string fs = GenerateFragmentShader(textured, blending);
|
const std::string fs = GenerateFragmentShader(textured, blending, texture_color_mode);
|
||||||
if (!prog.Compile(vs.c_str(), fs.c_str()))
|
if (!prog.Compile(vs.c_str(), fs.c_str()))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
@ -187,21 +180,29 @@ bool GPU_HW_OpenGL::CompileProgram(GL::Program& prog, bool textured, bool blendi
|
||||||
if (textured)
|
if (textured)
|
||||||
{
|
{
|
||||||
prog.RegisterUniform("samp0");
|
prog.RegisterUniform("samp0");
|
||||||
|
prog.RegisterUniform("u_texture_page_base");
|
||||||
|
prog.RegisterUniform("u_texture_palette_base");
|
||||||
prog.Uniform1i(1, 0);
|
prog.Uniform1i(1, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPU_HW_OpenGL::SetProgram(bool textured, bool blending)
|
void GPU_HW_OpenGL::SetProgram()
|
||||||
{
|
{
|
||||||
const GL::Program& prog = textured ? (blending ? m_blended_texture_program : m_texture_program) : m_color_program;
|
const GL::Program& prog =
|
||||||
|
m_render_programs[BoolToUInt32(m_batch.texture_enable)][BoolToUInt32(m_batch.texture_blending_enable)]
|
||||||
|
[static_cast<u32>(m_batch.texture_color_mode)];
|
||||||
prog.Bind();
|
prog.Bind();
|
||||||
|
|
||||||
if (textured)
|
|
||||||
m_texture_page_texture->Bind();
|
|
||||||
|
|
||||||
prog.Uniform2i(0, m_drawing_offset.x, m_drawing_offset.y);
|
prog.Uniform2i(0, m_drawing_offset.x, m_drawing_offset.y);
|
||||||
|
|
||||||
|
if (m_batch.texture_enable)
|
||||||
|
{
|
||||||
|
m_vram_read_texture->Bind();
|
||||||
|
prog.Uniform2i(2, m_batch.texture_page_x, m_batch.texture_page_y);
|
||||||
|
prog.Uniform2i(3, m_batch.texture_palette_x, m_batch.texture_palette_y);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPU_HW_OpenGL::SetViewport()
|
void GPU_HW_OpenGL::SetViewport()
|
||||||
|
@ -302,6 +303,8 @@ void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u16 color)
|
||||||
const auto [r, g, b, a] = RGBA8ToFloat(RGBA5551ToRGBA8888(color));
|
const auto [r, g, b, a] = RGBA8ToFloat(RGBA5551ToRGBA8888(color));
|
||||||
glClearColor(r, g, b, a);
|
glClearColor(r, g, b, a);
|
||||||
glClear(GL_COLOR_BUFFER_BIT);
|
glClear(GL_COLOR_BUFFER_BIT);
|
||||||
|
|
||||||
|
InvalidateVRAMReadCache();
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data)
|
void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data)
|
||||||
|
@ -334,6 +337,8 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
|
||||||
// lower-left origin flip happens here
|
// lower-left origin flip happens here
|
||||||
glTexSubImage2D(GL_TEXTURE_2D, 0, x, VRAM_HEIGHT - y - height, width, height, GL_RGBA, GL_UNSIGNED_BYTE,
|
glTexSubImage2D(GL_TEXTURE_2D, 0, x, VRAM_HEIGHT - y - height, width, height, GL_RGBA, GL_UNSIGNED_BYTE,
|
||||||
rgba_data.data());
|
rgba_data.data());
|
||||||
|
|
||||||
|
InvalidateVRAMReadCache();
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPU_HW_OpenGL::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height)
|
void GPU_HW_OpenGL::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height)
|
||||||
|
@ -347,31 +352,18 @@ void GPU_HW_OpenGL::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wid
|
||||||
glBindFramebuffer(GL_FRAMEBUFFER, m_framebuffer_fbo_id);
|
glBindFramebuffer(GL_FRAMEBUFFER, m_framebuffer_fbo_id);
|
||||||
glBlitFramebuffer(src_x, src_y, src_x + width, src_y + height, dst_x, dst_y, dst_x + width, dst_y + height,
|
glBlitFramebuffer(src_x, src_y, src_x + width, src_y + height, dst_x, dst_y, dst_x + width, dst_y + height,
|
||||||
GL_COLOR_BUFFER_BIT, GL_NEAREST);
|
GL_COLOR_BUFFER_BIT, GL_NEAREST);
|
||||||
|
|
||||||
|
InvalidateVRAMReadCache();
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPU_HW_OpenGL::UpdateTexturePageTexture()
|
void GPU_HW_OpenGL::UpdateVRAMReadTexture()
|
||||||
{
|
{
|
||||||
m_stats.num_texture_page_updates++;
|
m_stats.num_vram_read_texture_updates++;
|
||||||
|
m_vram_read_texture_dirty = false;
|
||||||
|
|
||||||
glBindFramebuffer(GL_FRAMEBUFFER, m_texture_page_fbo_id);
|
// TODO: Fallback blit path, and partial updates.
|
||||||
m_framebuffer_texture->Bind();
|
glCopyImageSubData(m_framebuffer_texture->GetGLId(), GL_TEXTURE_2D, 0, 0, 0, 0, m_vram_read_texture->GetGLId(),
|
||||||
|
GL_TEXTURE_2D, 0, 0, 0, 0, VRAM_WIDTH, VRAM_HEIGHT, 1);
|
||||||
glDisable(GL_BLEND);
|
|
||||||
glDisable(GL_SCISSOR_TEST);
|
|
||||||
glViewport(0, 0, TEXTURE_PAGE_WIDTH, TEXTURE_PAGE_HEIGHT);
|
|
||||||
glBindVertexArray(m_attributeless_vao_id);
|
|
||||||
|
|
||||||
const GL::Program& prog = m_texture_page_programs[static_cast<u8>(m_render_state.texture_color_mode)];
|
|
||||||
prog.Bind();
|
|
||||||
|
|
||||||
prog.Uniform2i(1, m_render_state.texture_base_x, m_render_state.texture_base_y);
|
|
||||||
if (m_render_state.texture_color_mode >= GPU::TextureColorMode::Palette4Bit)
|
|
||||||
prog.Uniform2i(2, m_render_state.texture_palette_x, m_render_state.texture_palette_y);
|
|
||||||
|
|
||||||
glDrawArrays(GL_TRIANGLES, 0, 3);
|
|
||||||
|
|
||||||
m_framebuffer_texture->Unbind();
|
|
||||||
glBindFramebuffer(GL_FRAMEBUFFER, m_framebuffer_fbo_id);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPU_HW_OpenGL::FlushRender()
|
void GPU_HW_OpenGL::FlushRender()
|
||||||
|
@ -379,6 +371,9 @@ void GPU_HW_OpenGL::FlushRender()
|
||||||
if (m_batch.vertices.empty())
|
if (m_batch.vertices.empty())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
if (m_vram_read_texture_dirty)
|
||||||
|
UpdateVRAMReadTexture();
|
||||||
|
|
||||||
m_stats.num_batches++;
|
m_stats.num_batches++;
|
||||||
m_stats.num_vertices += static_cast<u32>(m_batch.vertices.size());
|
m_stats.num_vertices += static_cast<u32>(m_batch.vertices.size());
|
||||||
|
|
||||||
|
@ -386,7 +381,7 @@ void GPU_HW_OpenGL::FlushRender()
|
||||||
glDisable(GL_DEPTH_TEST);
|
glDisable(GL_DEPTH_TEST);
|
||||||
glEnable(GL_SCISSOR_TEST);
|
glEnable(GL_SCISSOR_TEST);
|
||||||
glDepthMask(GL_FALSE);
|
glDepthMask(GL_FALSE);
|
||||||
SetProgram(m_batch.texture_enable, m_batch.texture_blending_enable);
|
SetProgram();
|
||||||
SetViewport();
|
SetViewport();
|
||||||
SetScissor();
|
SetScissor();
|
||||||
SetBlendState();
|
SetBlendState();
|
||||||
|
|
|
@ -23,13 +23,13 @@ protected:
|
||||||
void FillVRAM(u32 x, u32 y, u32 width, u32 height, u16 color) override;
|
void FillVRAM(u32 x, u32 y, u32 width, u32 height, u16 color) override;
|
||||||
void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) override;
|
void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) override;
|
||||||
void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override;
|
void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override;
|
||||||
void UpdateTexturePageTexture() override;
|
|
||||||
void FlushRender() override;
|
void FlushRender() override;
|
||||||
|
void InvalidateVRAMReadCache() override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
struct GLStats
|
struct GLStats
|
||||||
{
|
{
|
||||||
u32 num_texture_page_updates;
|
u32 num_vram_read_texture_updates;
|
||||||
u32 num_batches;
|
u32 num_batches;
|
||||||
u32 num_vertices;
|
u32 num_vertices;
|
||||||
};
|
};
|
||||||
|
@ -39,13 +39,14 @@ private:
|
||||||
void CreateFramebuffer();
|
void CreateFramebuffer();
|
||||||
void ClearFramebuffer();
|
void ClearFramebuffer();
|
||||||
void DestroyFramebuffer();
|
void DestroyFramebuffer();
|
||||||
|
void UpdateVRAMReadTexture();
|
||||||
|
|
||||||
void CreateVertexBuffer();
|
void CreateVertexBuffer();
|
||||||
|
|
||||||
bool CompilePrograms();
|
bool CompilePrograms();
|
||||||
bool CompileProgram(GL::Program& prog, bool textured, bool blending);
|
bool CompileProgram(GL::Program& prog, bool textured, bool blending, TextureColorMode texture_color_mode);
|
||||||
|
|
||||||
void SetProgram(bool textured, bool blending);
|
void SetProgram();
|
||||||
void SetViewport();
|
void SetViewport();
|
||||||
void SetScissor();
|
void SetScissor();
|
||||||
void SetBlendState();
|
void SetBlendState();
|
||||||
|
@ -53,16 +54,15 @@ private:
|
||||||
std::unique_ptr<GL::Texture> m_framebuffer_texture;
|
std::unique_ptr<GL::Texture> m_framebuffer_texture;
|
||||||
GLuint m_framebuffer_fbo_id = 0;
|
GLuint m_framebuffer_fbo_id = 0;
|
||||||
|
|
||||||
std::unique_ptr<GL::Texture> m_texture_page_texture;
|
std::unique_ptr<GL::Texture> m_vram_read_texture;
|
||||||
GLuint m_texture_page_fbo_id = 0;
|
GLuint m_vram_read_fbo_id = 0;
|
||||||
|
bool m_vram_read_texture_dirty = true;
|
||||||
|
|
||||||
GLuint m_vertex_buffer = 0;
|
GLuint m_vertex_buffer = 0;
|
||||||
GLuint m_vao_id = 0;
|
GLuint m_vao_id = 0;
|
||||||
GLuint m_attributeless_vao_id = 0;
|
GLuint m_attributeless_vao_id = 0;
|
||||||
|
|
||||||
GL::Program m_texture_program;
|
std::array<std::array<std::array<GL::Program, 3>, 2>, 2> m_render_programs;
|
||||||
GL::Program m_color_program;
|
|
||||||
GL::Program m_blended_texture_program;
|
|
||||||
std::array<GL::Program, 3> m_texture_page_programs;
|
std::array<GL::Program, 3> m_texture_page_programs;
|
||||||
|
|
||||||
GLStats m_stats = {};
|
GLStats m_stats = {};
|
||||||
|
|
Loading…
Reference in a new issue