mirror of
https://github.com/RetroDECK/Duckstation.git
synced 2025-01-18 06:25:37 +00:00
GPU: Improve batching by sampling a VRAM copy
This commit is contained in:
parent
332b5481e8
commit
792ec27b1a
|
@ -52,8 +52,8 @@ bool GPU::DoState(StateWrapper& sw)
|
|||
|
||||
sw.Do(&m_GPUSTAT.bits);
|
||||
|
||||
sw.Do(&m_render_state.texture_base_x);
|
||||
sw.Do(&m_render_state.texture_base_y);
|
||||
sw.Do(&m_render_state.texture_page_x);
|
||||
sw.Do(&m_render_state.texture_page_y);
|
||||
sw.Do(&m_render_state.texture_palette_x);
|
||||
sw.Do(&m_render_state.texture_palette_y);
|
||||
sw.Do(&m_render_state.texture_color_mode);
|
||||
|
@ -69,10 +69,10 @@ bool GPU::DoState(StateWrapper& sw)
|
|||
sw.Do(&m_render_state.texture_changed);
|
||||
sw.Do(&m_render_state.transparency_mode_changed);
|
||||
|
||||
sw.Do(&m_drawing_area.top_left_x);
|
||||
sw.Do(&m_drawing_area.top_left_y);
|
||||
sw.Do(&m_drawing_area.bottom_right_x);
|
||||
sw.Do(&m_drawing_area.bottom_right_y);
|
||||
sw.Do(&m_drawing_area.left);
|
||||
sw.Do(&m_drawing_area.top);
|
||||
sw.Do(&m_drawing_area.right);
|
||||
sw.Do(&m_drawing_area.bottom);
|
||||
sw.Do(&m_drawing_offset.x);
|
||||
sw.Do(&m_drawing_offset.y);
|
||||
sw.Do(&m_drawing_offset.x);
|
||||
|
@ -429,18 +429,18 @@ void GPU::WriteGP0(u32 value)
|
|||
|
||||
case 0xE3: // Set drawing area top left
|
||||
{
|
||||
m_drawing_area.top_left_x = param & UINT32_C(0x3FF);
|
||||
m_drawing_area.top_left_y = (param >> 10) & UINT32_C(0x1FF);
|
||||
Log_DebugPrintf("Set drawing area top-left: (%u, %u)", m_drawing_area.top_left_x, m_drawing_area.top_left_y);
|
||||
m_drawing_area.left = param & UINT32_C(0x3FF);
|
||||
m_drawing_area.top = (param >> 10) & UINT32_C(0x1FF);
|
||||
Log_DebugPrintf("Set drawing area top-left: (%u, %u)", m_drawing_area.left, m_drawing_area.top);
|
||||
}
|
||||
break;
|
||||
|
||||
case 0xE4: // Set drawing area bottom right
|
||||
{
|
||||
m_drawing_area.bottom_right_x = param & UINT32_C(0x3FF);
|
||||
m_drawing_area.bottom_right_y = (param >> 10) & UINT32_C(0x1FF);
|
||||
Log_DebugPrintf("Set drawing area bottom-right: (%u, %u)", m_drawing_area.bottom_right_x,
|
||||
m_drawing_area.bottom_right_y);
|
||||
m_drawing_area.right = param & UINT32_C(0x3FF);
|
||||
m_drawing_area.bottom = (param >> 10) & UINT32_C(0x1FF);
|
||||
Log_DebugPrintf("Set drawing area bottom-right: (%u, %u)", m_drawing_area.right,
|
||||
m_drawing_area.bottom);
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -785,8 +785,8 @@ void GPU::RenderState::SetFromPageAttribute(u16 value)
|
|||
if (texpage_attribute == value)
|
||||
return;
|
||||
|
||||
texture_base_x = static_cast<s32>(ZeroExtend32(value & UINT16_C(0x0F)) * UINT32_C(64));
|
||||
texture_base_y = static_cast<s32>(ZeroExtend32((value >> 4) & UINT16_C(1)) * UINT32_C(256));
|
||||
texture_page_x = static_cast<s32>(ZeroExtend32(value & UINT16_C(0x0F)) * UINT32_C(64));
|
||||
texture_page_y = static_cast<s32>(ZeroExtend32((value >> 4) & UINT16_C(1)) * UINT32_C(256));
|
||||
texture_color_mode = (static_cast<TextureColorMode>((value >> 7) & UINT16_C(0x03)));
|
||||
if (texture_color_mode == TextureColorMode::Reserved_Direct16Bit)
|
||||
texture_color_mode = TextureColorMode::Direct16Bit;
|
||||
|
|
|
@ -228,8 +228,8 @@ protected:
|
|||
static constexpr u16 PALETTE_ATTRIBUTE_MASK = UINT16_C(0b0111111111111111);
|
||||
|
||||
// decoded values
|
||||
s32 texture_base_x;
|
||||
s32 texture_base_y;
|
||||
s32 texture_page_x;
|
||||
s32 texture_page_y;
|
||||
s32 texture_palette_x;
|
||||
s32 texture_palette_y;
|
||||
TextureColorMode texture_color_mode;
|
||||
|
@ -265,8 +265,8 @@ protected:
|
|||
|
||||
struct DrawingArea
|
||||
{
|
||||
u32 top_left_x, top_left_y;
|
||||
u32 bottom_right_x, bottom_right_y;
|
||||
u32 left, top;
|
||||
u32 right, bottom;
|
||||
} m_drawing_area = {};
|
||||
|
||||
struct DrawingOffset
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
#include "gpu_hw.h"
|
||||
#include "YBaseLib/Assert.h"
|
||||
#include "YBaseLib/Log.h"
|
||||
#include <sstream>
|
||||
Log_SetChannel(GPU_HW);
|
||||
|
||||
GPU_HW::GPU_HW() = default;
|
||||
|
||||
|
@ -110,10 +112,10 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices)
|
|||
|
||||
void GPU_HW::CalcScissorRect(int* left, int* top, int* right, int* bottom)
|
||||
{
|
||||
*left = m_drawing_area.top_left_x;
|
||||
*right = m_drawing_area.bottom_right_x + 1;
|
||||
*top = m_drawing_area.top_left_y;
|
||||
*bottom = m_drawing_area.bottom_right_y + 1;
|
||||
*left = m_drawing_area.left;
|
||||
*right = m_drawing_area.right + 1;
|
||||
*top = m_drawing_area.top;
|
||||
*bottom = m_drawing_area.bottom + 1;
|
||||
}
|
||||
|
||||
static void DefineMacro(std::stringstream& ss, const char* name, bool enabled)
|
||||
|
@ -176,7 +178,6 @@ in vec2 a_tex0;
|
|||
|
||||
out vec4 v_col0;
|
||||
#if TEXTURED
|
||||
uniform vec2 u_tex_scale;
|
||||
out vec2 v_tex0;
|
||||
#endif
|
||||
|
||||
|
@ -199,26 +200,75 @@ void main()
|
|||
return ss.str();
|
||||
}
|
||||
|
||||
std::string GPU_HW::GenerateFragmentShader(bool textured, bool blending)
|
||||
std::string GPU_HW::GenerateFragmentShader(bool textured, bool blending, TextureColorMode texture_color_mode)
|
||||
{
|
||||
std::stringstream ss;
|
||||
GenerateShaderHeader(ss);
|
||||
DefineMacro(ss, "TEXTURED", textured);
|
||||
DefineMacro(ss, "BLENDING", blending);
|
||||
DefineMacro(ss, "PALETTE",
|
||||
textured && (texture_color_mode == GPU::TextureColorMode::Palette4Bit ||
|
||||
texture_color_mode == GPU::TextureColorMode::Palette8Bit));
|
||||
DefineMacro(ss, "PALETTE_4_BIT", textured && texture_color_mode == GPU::TextureColorMode::Palette4Bit);
|
||||
DefineMacro(ss, "PALETTE_8_BIT", textured && texture_color_mode == GPU::TextureColorMode::Palette8Bit);
|
||||
|
||||
ss << R"(
|
||||
in vec4 v_col0;
|
||||
#if TEXTURED
|
||||
in vec2 v_tex0;
|
||||
uniform sampler2D samp0;
|
||||
uniform ivec2 u_texture_page_base;
|
||||
#if PALETTE
|
||||
uniform ivec2 u_texture_palette_base;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
out vec4 o_col0;
|
||||
|
||||
#if TEXTURED
|
||||
vec4 SampleFromVRAM(vec2 coord)
|
||||
{
|
||||
// from 0..1 to 0..255
|
||||
ivec2 icoord = ivec2(coord * vec2(255.0));
|
||||
|
||||
// adjust for tightly packed palette formats
|
||||
ivec2 index_coord = icoord;
|
||||
#if PALETTE_4_BIT
|
||||
index_coord.x /= 4;
|
||||
#elif PALETTE_8_BIT
|
||||
index_coord.x /= 2;
|
||||
#endif
|
||||
|
||||
// fixup coords
|
||||
ivec2 vicoord = ivec2(u_texture_page_base.x + index_coord.x,
|
||||
fixYCoord(u_texture_page_base.y + index_coord.y));
|
||||
|
||||
// load colour/palette
|
||||
vec4 color = texelFetch(samp0, vicoord & VRAM_COORD_MASK, 0);
|
||||
|
||||
// apply palette
|
||||
#if PALETTE
|
||||
#if PALETTE_4_BIT
|
||||
int subpixel = int(icoord.x) & 3;
|
||||
uint vram_value = RGBA8ToRGBA5551(color);
|
||||
int palette_index = int((vram_value >> (subpixel * 4)) & 0x0Fu);
|
||||
#elif PALETTE_8_BIT
|
||||
int subpixel = int(icoord.x) & 1;
|
||||
uint vram_value = RGBA8ToRGBA5551(color);
|
||||
int palette_index = int((vram_value >> (subpixel * 8)) & 0xFFu);
|
||||
#endif
|
||||
ivec2 palette_icoord = ivec2(u_texture_palette_base.x + palette_index, fixYCoord(u_texture_palette_base.y));
|
||||
color = texelFetch(samp0, palette_icoord & VRAM_COORD_MASK, 0);
|
||||
#endif
|
||||
|
||||
return color;
|
||||
}
|
||||
#endif
|
||||
|
||||
void main()
|
||||
{
|
||||
#if TEXTURED
|
||||
vec4 texcol = texture(samp0, v_tex0);
|
||||
vec4 texcol = SampleFromVRAM(v_tex0);
|
||||
if (texcol == vec4(0.0, 0.0, 0.0, 0.0))
|
||||
discard;
|
||||
|
||||
|
@ -255,64 +305,6 @@ void main()
|
|||
return ss.str();
|
||||
}
|
||||
|
||||
std::string GPU_HW::GenerateTexturePageFragmentShader(TextureColorMode mode)
|
||||
{
|
||||
const bool is_palette = (mode == GPU::TextureColorMode::Palette4Bit || mode == GPU::TextureColorMode::Palette8Bit);
|
||||
|
||||
std::stringstream ss;
|
||||
GenerateShaderHeader(ss);
|
||||
DefineMacro(ss, "PALETTE", is_palette);
|
||||
DefineMacro(ss, "PALETTE_4_BIT", mode == GPU::TextureColorMode::Palette4Bit);
|
||||
DefineMacro(ss, "PALETTE_8_BIT", mode == GPU::TextureColorMode::Palette8Bit);
|
||||
|
||||
ss << R"(
|
||||
uniform sampler2D samp0;
|
||||
uniform ivec2 base_offset;
|
||||
|
||||
#if PALETTE
|
||||
uniform ivec2 palette_offset;
|
||||
#endif
|
||||
|
||||
in vec2 v_tex0;
|
||||
out vec4 o_col0;
|
||||
|
||||
void main()
|
||||
{
|
||||
ivec2 local_coords = ivec2(gl_FragCoord.xy);
|
||||
#if PALETTE_4_BIT
|
||||
local_coords.x /= 4;
|
||||
#elif PALETTE_8_BIT
|
||||
local_coords.x /= 2;
|
||||
#endif
|
||||
|
||||
// fixup coords
|
||||
ivec2 coords = ivec2(base_offset.x + local_coords.x, fixYCoord(base_offset.y + local_coords.y));
|
||||
|
||||
// load colour/palette
|
||||
vec4 color = texelFetch(samp0, coords & VRAM_COORD_MASK, 0);
|
||||
|
||||
// apply palette
|
||||
#if PALETTE
|
||||
#if PALETTE_4_BIT
|
||||
int subpixel = int(gl_FragCoord.x) & 3;
|
||||
uint vram_value = RGBA8ToRGBA5551(color);
|
||||
int palette_index = int((vram_value >> (subpixel * 4)) & 0x0Fu);
|
||||
#elif PALETTE_8_BIT
|
||||
int subpixel = int(gl_FragCoord.x) & 1;
|
||||
uint vram_value = RGBA8ToRGBA5551(color);
|
||||
int palette_index = int((vram_value >> (subpixel * 8)) & 0xFFu);
|
||||
#endif
|
||||
ivec2 palette_coords = ivec2(palette_offset.x + palette_index, fixYCoord(palette_offset.y));
|
||||
color = texelFetch(samp0, palette_coords & VRAM_COORD_MASK, 0);
|
||||
#endif
|
||||
|
||||
o_col0 = color;
|
||||
}
|
||||
)";
|
||||
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::string GPU_HW::GenerateFillFragmentShader()
|
||||
{
|
||||
std::stringstream ss;
|
||||
|
@ -331,8 +323,6 @@ void main()
|
|||
return ss.str();
|
||||
}
|
||||
|
||||
void GPU_HW::UpdateTexturePageTexture() {}
|
||||
|
||||
GPU_HW::HWRenderBatch::Primitive GPU_HW::GetPrimitiveForCommand(RenderCommand rc)
|
||||
{
|
||||
if (rc.primitive == Primitive::Line)
|
||||
|
@ -343,6 +333,8 @@ GPU_HW::HWRenderBatch::Primitive GPU_HW::GetPrimitiveForCommand(RenderCommand rc
|
|||
return HWRenderBatch::Primitive::Triangles;
|
||||
}
|
||||
|
||||
void GPU_HW::InvalidateVRAMReadCache() {}
|
||||
|
||||
void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices)
|
||||
{
|
||||
if (rc.texture_enable)
|
||||
|
@ -375,20 +367,46 @@ void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices)
|
|||
if (m_render_state.IsTextureChanged())
|
||||
{
|
||||
if (!IsFlushed())
|
||||
{
|
||||
// we only need to update the copy texture if the render area intersects with the texture page
|
||||
const u32 texture_page_left = m_render_state.texture_page_x;
|
||||
const u32 texture_page_right = m_render_state.texture_page_y + TEXTURE_PAGE_WIDTH;
|
||||
const u32 texture_page_top = m_render_state.texture_page_y;
|
||||
const u32 texture_page_bottom = texture_page_top + TEXTURE_PAGE_HEIGHT;
|
||||
const bool texture_page_overlaps =
|
||||
(texture_page_left < m_drawing_area.right && texture_page_right > m_drawing_area.left &&
|
||||
texture_page_top > m_drawing_area.bottom && texture_page_bottom < m_drawing_area.top);
|
||||
|
||||
// TODO: Check palette too.
|
||||
if (texture_page_overlaps)
|
||||
{
|
||||
Log_DebugPrintf("Invalidating VRAM read cache due to drawing area overlap");
|
||||
InvalidateVRAMReadCache();
|
||||
}
|
||||
|
||||
// texture page changed?
|
||||
// TODO: Move this to the shader...
|
||||
FlushRender();
|
||||
UpdateTexturePageTexture();
|
||||
}
|
||||
|
||||
m_render_state.ClearTextureChangedFlag();
|
||||
}
|
||||
|
||||
if (m_batch.transparency_enable && m_render_state.IsTransparencyModeChanged() && !IsFlushed())
|
||||
FlushRender();
|
||||
|
||||
m_batch.transparency_mode = m_render_state.transparency_mode;
|
||||
m_render_state.ClearTransparencyModeChangedFlag();
|
||||
|
||||
m_batch.texture_color_mode = m_render_state.texture_color_mode;
|
||||
m_batch.texture_page_x = m_render_state.texture_page_x;
|
||||
m_batch.texture_page_y = m_render_state.texture_page_y;
|
||||
m_batch.texture_palette_x = m_render_state.texture_palette_x;
|
||||
m_batch.texture_palette_y = m_render_state.texture_palette_y;
|
||||
m_batch.transparency_mode = m_render_state.transparency_mode;
|
||||
}
|
||||
}
|
||||
|
||||
// extract state
|
||||
const bool rc_transparency_enable = rc.transparency_enable;
|
||||
const bool rc_texture_enable = rc.texture_enable;
|
||||
const bool rc_texture_blend_enable = !rc.texture_blend_disable;
|
||||
const HWRenderBatch::Primitive rc_primitive = GetPrimitiveForCommand(rc);
|
||||
|
@ -399,14 +417,15 @@ void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices)
|
|||
// including the degenerate triangles for strips
|
||||
const u32 max_added_vertices = num_vertices + 2;
|
||||
const bool params_changed =
|
||||
(m_batch.texture_enable != rc_texture_enable || m_batch.texture_blending_enable != rc_texture_blend_enable ||
|
||||
m_batch.primitive != rc_primitive);
|
||||
(m_batch.transparency_enable != rc_transparency_enable || m_batch.texture_enable != rc_texture_enable ||
|
||||
m_batch.texture_blending_enable != rc_texture_blend_enable || m_batch.primitive != rc_primitive);
|
||||
if ((m_batch.vertices.size() + max_added_vertices) >= MAX_BATCH_VERTEX_COUNT || params_changed)
|
||||
FlushRender();
|
||||
}
|
||||
|
||||
m_batch.primitive = rc_primitive;
|
||||
m_batch.transparency_enable = rc_transparency_enable;
|
||||
m_batch.texture_enable = rc_texture_enable;
|
||||
m_batch.texture_blending_enable = rc_texture_blend_enable;
|
||||
m_batch.primitive = rc_primitive;
|
||||
LoadVertices(rc, num_vertices);
|
||||
}
|
||||
|
|
|
@ -39,6 +39,11 @@ protected:
|
|||
bool transparency_enable;
|
||||
bool texture_enable;
|
||||
bool texture_blending_enable;
|
||||
TextureColorMode texture_color_mode;
|
||||
u32 texture_page_x;
|
||||
u32 texture_page_y;
|
||||
u32 texture_palette_x;
|
||||
u32 texture_palette_y;
|
||||
TransparencyMode transparency_mode;
|
||||
|
||||
std::vector<HWVertex> vertices;
|
||||
|
@ -46,6 +51,10 @@ protected:
|
|||
|
||||
static constexpr u32 VERTEX_BUFFER_SIZE = 1 * 1024 * 1024;
|
||||
static constexpr u32 MAX_BATCH_VERTEX_COUNT = VERTEX_BUFFER_SIZE / sizeof(HWVertex);
|
||||
static constexpr u32 TEXTURE_TILE_SIZE = 256;
|
||||
static constexpr u32 TEXTURE_TILE_X_COUNT = VRAM_WIDTH / TEXTURE_TILE_SIZE;
|
||||
static constexpr u32 TEXTURE_TILE_Y_COUNT = VRAM_HEIGHT / TEXTURE_TILE_SIZE;
|
||||
static constexpr u32 TEXTURE_TILE_COUNT = TEXTURE_TILE_X_COUNT * TEXTURE_TILE_Y_COUNT;
|
||||
|
||||
static constexpr std::tuple<float, float, float, float> RGBA8ToFloat(u32 rgba)
|
||||
{
|
||||
|
@ -55,7 +64,7 @@ protected:
|
|||
static_cast<float>(rgba >> 24) * (1.0f / 255.0f));
|
||||
}
|
||||
|
||||
virtual void UpdateTexturePageTexture();
|
||||
virtual void InvalidateVRAMReadCache();
|
||||
|
||||
bool IsFlushed() const { return m_batch.vertices.empty(); }
|
||||
|
||||
|
@ -64,9 +73,8 @@ protected:
|
|||
void CalcScissorRect(int* left, int* top, int* right, int* bottom);
|
||||
|
||||
std::string GenerateVertexShader(bool textured);
|
||||
std::string GenerateFragmentShader(bool textured, bool blending);
|
||||
std::string GenerateFragmentShader(bool textured, bool blending, TextureColorMode texture_color_mode);
|
||||
std::string GenerateScreenQuadVertexShader();
|
||||
std::string GenerateTexturePageFragmentShader(TextureColorMode mode);
|
||||
std::string GenerateFillFragmentShader();
|
||||
|
||||
HWRenderBatch m_batch = {};
|
||||
|
|
|
@ -43,7 +43,7 @@ void GPU_HW_OpenGL::RenderUI()
|
|||
|
||||
ImGui::TextUnformatted("Texture Page Updates:");
|
||||
ImGui::NextColumn();
|
||||
ImGui::Text("%u", m_stats.num_texture_page_updates);
|
||||
ImGui::Text("%u", m_stats.num_vram_read_texture_updates);
|
||||
ImGui::NextColumn();
|
||||
|
||||
ImGui::TextUnformatted("Batches Drawn:");
|
||||
|
@ -64,6 +64,11 @@ void GPU_HW_OpenGL::RenderUI()
|
|||
m_stats = {};
|
||||
}
|
||||
|
||||
void GPU_HW_OpenGL::InvalidateVRAMReadCache()
|
||||
{
|
||||
m_vram_read_texture_dirty = true;
|
||||
}
|
||||
|
||||
std::tuple<s32, s32> GPU_HW_OpenGL::ConvertToFramebufferCoordinates(s32 x, s32 y)
|
||||
{
|
||||
return std::make_tuple(x, static_cast<s32>(static_cast<s32>(VRAM_HEIGHT) - y));
|
||||
|
@ -79,11 +84,11 @@ void GPU_HW_OpenGL::CreateFramebuffer()
|
|||
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_framebuffer_texture->GetGLId(), 0);
|
||||
Assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
|
||||
|
||||
m_texture_page_texture =
|
||||
std::make_unique<GL::Texture>(TEXTURE_PAGE_WIDTH, TEXTURE_PAGE_HEIGHT, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false);
|
||||
glGenFramebuffers(1, &m_texture_page_fbo_id);
|
||||
glBindFramebuffer(GL_FRAMEBUFFER, m_texture_page_fbo_id);
|
||||
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_texture_page_texture->GetGLId(), 0);
|
||||
m_vram_read_texture =
|
||||
std::make_unique<GL::Texture>(VRAM_WIDTH, VRAM_HEIGHT, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false);
|
||||
glGenFramebuffers(1, &m_vram_read_fbo_id);
|
||||
glBindFramebuffer(GL_FRAMEBUFFER, m_vram_read_fbo_id);
|
||||
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_vram_read_texture->GetGLId(), 0);
|
||||
Assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
|
||||
}
|
||||
|
||||
|
@ -100,9 +105,9 @@ void GPU_HW_OpenGL::ClearFramebuffer()
|
|||
|
||||
void GPU_HW_OpenGL::DestroyFramebuffer()
|
||||
{
|
||||
glDeleteFramebuffers(1, &m_texture_page_fbo_id);
|
||||
m_texture_page_fbo_id = 0;
|
||||
m_texture_page_texture.reset();
|
||||
glDeleteFramebuffers(1, &m_vram_read_fbo_id);
|
||||
m_vram_read_fbo_id = 0;
|
||||
m_vram_read_texture.reset();
|
||||
|
||||
glDeleteFramebuffers(1, &m_framebuffer_fbo_id);
|
||||
m_framebuffer_fbo_id = 0;
|
||||
|
@ -132,41 +137,29 @@ void GPU_HW_OpenGL::CreateVertexBuffer()
|
|||
|
||||
bool GPU_HW_OpenGL::CompilePrograms()
|
||||
{
|
||||
bool result = true;
|
||||
result &= CompileProgram(m_color_program, false, false);
|
||||
result &= CompileProgram(m_texture_program, true, false);
|
||||
result &= CompileProgram(m_blended_texture_program, true, true);
|
||||
if (!result)
|
||||
return false;
|
||||
|
||||
const std::string screen_quad_vs = GenerateScreenQuadVertexShader();
|
||||
for (u32 palette_size = 0; palette_size < static_cast<u32>(m_texture_page_programs.size()); palette_size++)
|
||||
for (u32 textured = 0; textured < 2; textured++)
|
||||
{
|
||||
const std::string fs = GenerateTexturePageFragmentShader(static_cast<TextureColorMode>(palette_size));
|
||||
|
||||
GL::Program& prog = m_texture_page_programs[palette_size];
|
||||
if (!prog.Compile(screen_quad_vs.c_str(), fs.c_str()))
|
||||
return false;
|
||||
|
||||
prog.BindFragData(0, "o_col0");
|
||||
|
||||
if (!prog.Link())
|
||||
return false;
|
||||
|
||||
prog.RegisterUniform("samp0");
|
||||
prog.RegisterUniform("base_offset");
|
||||
prog.RegisterUniform("palette_offset");
|
||||
prog.Bind();
|
||||
prog.Uniform1i(0, 0);
|
||||
for (u32 blending = 0; blending < 2; blending++)
|
||||
{
|
||||
for (u32 format = 0; format < 3; format++)
|
||||
{
|
||||
// TODO: eliminate duplicate shaders here
|
||||
if (!CompileProgram(m_render_programs[textured][blending][format], ConvertToBoolUnchecked(textured),
|
||||
ConvertToBoolUnchecked(blending), static_cast<TextureColorMode>(format)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GPU_HW_OpenGL::CompileProgram(GL::Program& prog, bool textured, bool blending)
|
||||
bool GPU_HW_OpenGL::CompileProgram(GL::Program& prog, bool textured, bool blending, TextureColorMode texture_color_mode)
|
||||
{
|
||||
const std::string vs = GenerateVertexShader(textured);
|
||||
const std::string fs = GenerateFragmentShader(textured, blending);
|
||||
const std::string fs = GenerateFragmentShader(textured, blending, texture_color_mode);
|
||||
if (!prog.Compile(vs.c_str(), fs.c_str()))
|
||||
return false;
|
||||
|
||||
|
@ -187,21 +180,29 @@ bool GPU_HW_OpenGL::CompileProgram(GL::Program& prog, bool textured, bool blendi
|
|||
if (textured)
|
||||
{
|
||||
prog.RegisterUniform("samp0");
|
||||
prog.RegisterUniform("u_texture_page_base");
|
||||
prog.RegisterUniform("u_texture_palette_base");
|
||||
prog.Uniform1i(1, 0);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void GPU_HW_OpenGL::SetProgram(bool textured, bool blending)
|
||||
void GPU_HW_OpenGL::SetProgram()
|
||||
{
|
||||
const GL::Program& prog = textured ? (blending ? m_blended_texture_program : m_texture_program) : m_color_program;
|
||||
const GL::Program& prog =
|
||||
m_render_programs[BoolToUInt32(m_batch.texture_enable)][BoolToUInt32(m_batch.texture_blending_enable)]
|
||||
[static_cast<u32>(m_batch.texture_color_mode)];
|
||||
prog.Bind();
|
||||
|
||||
if (textured)
|
||||
m_texture_page_texture->Bind();
|
||||
|
||||
prog.Uniform2i(0, m_drawing_offset.x, m_drawing_offset.y);
|
||||
|
||||
if (m_batch.texture_enable)
|
||||
{
|
||||
m_vram_read_texture->Bind();
|
||||
prog.Uniform2i(2, m_batch.texture_page_x, m_batch.texture_page_y);
|
||||
prog.Uniform2i(3, m_batch.texture_palette_x, m_batch.texture_palette_y);
|
||||
}
|
||||
}
|
||||
|
||||
void GPU_HW_OpenGL::SetViewport()
|
||||
|
@ -302,6 +303,8 @@ void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u16 color)
|
|||
const auto [r, g, b, a] = RGBA8ToFloat(RGBA5551ToRGBA8888(color));
|
||||
glClearColor(r, g, b, a);
|
||||
glClear(GL_COLOR_BUFFER_BIT);
|
||||
|
||||
InvalidateVRAMReadCache();
|
||||
}
|
||||
|
||||
void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data)
|
||||
|
@ -334,6 +337,8 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
|
|||
// lower-left origin flip happens here
|
||||
glTexSubImage2D(GL_TEXTURE_2D, 0, x, VRAM_HEIGHT - y - height, width, height, GL_RGBA, GL_UNSIGNED_BYTE,
|
||||
rgba_data.data());
|
||||
|
||||
InvalidateVRAMReadCache();
|
||||
}
|
||||
|
||||
void GPU_HW_OpenGL::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height)
|
||||
|
@ -347,31 +352,18 @@ void GPU_HW_OpenGL::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wid
|
|||
glBindFramebuffer(GL_FRAMEBUFFER, m_framebuffer_fbo_id);
|
||||
glBlitFramebuffer(src_x, src_y, src_x + width, src_y + height, dst_x, dst_y, dst_x + width, dst_y + height,
|
||||
GL_COLOR_BUFFER_BIT, GL_NEAREST);
|
||||
|
||||
InvalidateVRAMReadCache();
|
||||
}
|
||||
|
||||
void GPU_HW_OpenGL::UpdateTexturePageTexture()
|
||||
void GPU_HW_OpenGL::UpdateVRAMReadTexture()
|
||||
{
|
||||
m_stats.num_texture_page_updates++;
|
||||
m_stats.num_vram_read_texture_updates++;
|
||||
m_vram_read_texture_dirty = false;
|
||||
|
||||
glBindFramebuffer(GL_FRAMEBUFFER, m_texture_page_fbo_id);
|
||||
m_framebuffer_texture->Bind();
|
||||
|
||||
glDisable(GL_BLEND);
|
||||
glDisable(GL_SCISSOR_TEST);
|
||||
glViewport(0, 0, TEXTURE_PAGE_WIDTH, TEXTURE_PAGE_HEIGHT);
|
||||
glBindVertexArray(m_attributeless_vao_id);
|
||||
|
||||
const GL::Program& prog = m_texture_page_programs[static_cast<u8>(m_render_state.texture_color_mode)];
|
||||
prog.Bind();
|
||||
|
||||
prog.Uniform2i(1, m_render_state.texture_base_x, m_render_state.texture_base_y);
|
||||
if (m_render_state.texture_color_mode >= GPU::TextureColorMode::Palette4Bit)
|
||||
prog.Uniform2i(2, m_render_state.texture_palette_x, m_render_state.texture_palette_y);
|
||||
|
||||
glDrawArrays(GL_TRIANGLES, 0, 3);
|
||||
|
||||
m_framebuffer_texture->Unbind();
|
||||
glBindFramebuffer(GL_FRAMEBUFFER, m_framebuffer_fbo_id);
|
||||
// TODO: Fallback blit path, and partial updates.
|
||||
glCopyImageSubData(m_framebuffer_texture->GetGLId(), GL_TEXTURE_2D, 0, 0, 0, 0, m_vram_read_texture->GetGLId(),
|
||||
GL_TEXTURE_2D, 0, 0, 0, 0, VRAM_WIDTH, VRAM_HEIGHT, 1);
|
||||
}
|
||||
|
||||
void GPU_HW_OpenGL::FlushRender()
|
||||
|
@ -379,6 +371,9 @@ void GPU_HW_OpenGL::FlushRender()
|
|||
if (m_batch.vertices.empty())
|
||||
return;
|
||||
|
||||
if (m_vram_read_texture_dirty)
|
||||
UpdateVRAMReadTexture();
|
||||
|
||||
m_stats.num_batches++;
|
||||
m_stats.num_vertices += static_cast<u32>(m_batch.vertices.size());
|
||||
|
||||
|
@ -386,7 +381,7 @@ void GPU_HW_OpenGL::FlushRender()
|
|||
glDisable(GL_DEPTH_TEST);
|
||||
glEnable(GL_SCISSOR_TEST);
|
||||
glDepthMask(GL_FALSE);
|
||||
SetProgram(m_batch.texture_enable, m_batch.texture_blending_enable);
|
||||
SetProgram();
|
||||
SetViewport();
|
||||
SetScissor();
|
||||
SetBlendState();
|
||||
|
|
|
@ -23,13 +23,13 @@ protected:
|
|||
void FillVRAM(u32 x, u32 y, u32 width, u32 height, u16 color) override;
|
||||
void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) override;
|
||||
void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override;
|
||||
void UpdateTexturePageTexture() override;
|
||||
void FlushRender() override;
|
||||
void InvalidateVRAMReadCache() override;
|
||||
|
||||
private:
|
||||
struct GLStats
|
||||
{
|
||||
u32 num_texture_page_updates;
|
||||
u32 num_vram_read_texture_updates;
|
||||
u32 num_batches;
|
||||
u32 num_vertices;
|
||||
};
|
||||
|
@ -39,13 +39,14 @@ private:
|
|||
void CreateFramebuffer();
|
||||
void ClearFramebuffer();
|
||||
void DestroyFramebuffer();
|
||||
void UpdateVRAMReadTexture();
|
||||
|
||||
void CreateVertexBuffer();
|
||||
|
||||
bool CompilePrograms();
|
||||
bool CompileProgram(GL::Program& prog, bool textured, bool blending);
|
||||
bool CompileProgram(GL::Program& prog, bool textured, bool blending, TextureColorMode texture_color_mode);
|
||||
|
||||
void SetProgram(bool textured, bool blending);
|
||||
void SetProgram();
|
||||
void SetViewport();
|
||||
void SetScissor();
|
||||
void SetBlendState();
|
||||
|
@ -53,16 +54,15 @@ private:
|
|||
std::unique_ptr<GL::Texture> m_framebuffer_texture;
|
||||
GLuint m_framebuffer_fbo_id = 0;
|
||||
|
||||
std::unique_ptr<GL::Texture> m_texture_page_texture;
|
||||
GLuint m_texture_page_fbo_id = 0;
|
||||
std::unique_ptr<GL::Texture> m_vram_read_texture;
|
||||
GLuint m_vram_read_fbo_id = 0;
|
||||
bool m_vram_read_texture_dirty = true;
|
||||
|
||||
GLuint m_vertex_buffer = 0;
|
||||
GLuint m_vao_id = 0;
|
||||
GLuint m_attributeless_vao_id = 0;
|
||||
|
||||
GL::Program m_texture_program;
|
||||
GL::Program m_color_program;
|
||||
GL::Program m_blended_texture_program;
|
||||
std::array<std::array<std::array<GL::Program, 3>, 2>, 2> m_render_programs;
|
||||
std::array<GL::Program, 3> m_texture_page_programs;
|
||||
|
||||
GLStats m_stats = {};
|
||||
|
|
Loading…
Reference in a new issue