From 792ec27b1a7b80a4a90bc96dcf2c468663f6080f Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Thu, 26 Sep 2019 23:33:20 +1000 Subject: [PATCH] GPU: Improve batching by sampling a VRAM copy --- src/pse/gpu.cpp | 30 +++---- src/pse/gpu.h | 8 +- src/pse/gpu_hw.cpp | 165 +++++++++++++++++++++----------------- src/pse/gpu_hw.h | 14 +++- src/pse/gpu_hw_opengl.cpp | 121 ++++++++++++++-------------- src/pse/gpu_hw_opengl.h | 18 ++--- 6 files changed, 189 insertions(+), 167 deletions(-) diff --git a/src/pse/gpu.cpp b/src/pse/gpu.cpp index 20e60ff7d..b469e3fda 100644 --- a/src/pse/gpu.cpp +++ b/src/pse/gpu.cpp @@ -52,8 +52,8 @@ bool GPU::DoState(StateWrapper& sw) sw.Do(&m_GPUSTAT.bits); - sw.Do(&m_render_state.texture_base_x); - sw.Do(&m_render_state.texture_base_y); + sw.Do(&m_render_state.texture_page_x); + sw.Do(&m_render_state.texture_page_y); sw.Do(&m_render_state.texture_palette_x); sw.Do(&m_render_state.texture_palette_y); sw.Do(&m_render_state.texture_color_mode); @@ -69,10 +69,10 @@ bool GPU::DoState(StateWrapper& sw) sw.Do(&m_render_state.texture_changed); sw.Do(&m_render_state.transparency_mode_changed); - sw.Do(&m_drawing_area.top_left_x); - sw.Do(&m_drawing_area.top_left_y); - sw.Do(&m_drawing_area.bottom_right_x); - sw.Do(&m_drawing_area.bottom_right_y); + sw.Do(&m_drawing_area.left); + sw.Do(&m_drawing_area.top); + sw.Do(&m_drawing_area.right); + sw.Do(&m_drawing_area.bottom); sw.Do(&m_drawing_offset.x); sw.Do(&m_drawing_offset.y); sw.Do(&m_drawing_offset.x); @@ -429,18 +429,18 @@ void GPU::WriteGP0(u32 value) case 0xE3: // Set drawing area top left { - m_drawing_area.top_left_x = param & UINT32_C(0x3FF); - m_drawing_area.top_left_y = (param >> 10) & UINT32_C(0x1FF); - Log_DebugPrintf("Set drawing area top-left: (%u, %u)", m_drawing_area.top_left_x, m_drawing_area.top_left_y); + m_drawing_area.left = param & UINT32_C(0x3FF); + m_drawing_area.top = (param >> 10) & UINT32_C(0x1FF); + Log_DebugPrintf("Set drawing area top-left: (%u, %u)", m_drawing_area.left, m_drawing_area.top); } break; case 0xE4: // Set drawing area bottom right { - m_drawing_area.bottom_right_x = param & UINT32_C(0x3FF); - m_drawing_area.bottom_right_y = (param >> 10) & UINT32_C(0x1FF); - Log_DebugPrintf("Set drawing area bottom-right: (%u, %u)", m_drawing_area.bottom_right_x, - m_drawing_area.bottom_right_y); + m_drawing_area.right = param & UINT32_C(0x3FF); + m_drawing_area.bottom = (param >> 10) & UINT32_C(0x1FF); + Log_DebugPrintf("Set drawing area bottom-right: (%u, %u)", m_drawing_area.right, + m_drawing_area.bottom); } break; @@ -785,8 +785,8 @@ void GPU::RenderState::SetFromPageAttribute(u16 value) if (texpage_attribute == value) return; - texture_base_x = static_cast(ZeroExtend32(value & UINT16_C(0x0F)) * UINT32_C(64)); - texture_base_y = static_cast(ZeroExtend32((value >> 4) & UINT16_C(1)) * UINT32_C(256)); + texture_page_x = static_cast(ZeroExtend32(value & UINT16_C(0x0F)) * UINT32_C(64)); + texture_page_y = static_cast(ZeroExtend32((value >> 4) & UINT16_C(1)) * UINT32_C(256)); texture_color_mode = (static_cast((value >> 7) & UINT16_C(0x03))); if (texture_color_mode == TextureColorMode::Reserved_Direct16Bit) texture_color_mode = TextureColorMode::Direct16Bit; diff --git a/src/pse/gpu.h b/src/pse/gpu.h index 5b1c72c66..1c03d2295 100644 --- a/src/pse/gpu.h +++ b/src/pse/gpu.h @@ -228,8 +228,8 @@ protected: static constexpr u16 PALETTE_ATTRIBUTE_MASK = UINT16_C(0b0111111111111111); // decoded values - s32 texture_base_x; - s32 texture_base_y; + s32 texture_page_x; + s32 texture_page_y; s32 texture_palette_x; s32 texture_palette_y; TextureColorMode texture_color_mode; @@ -265,8 +265,8 @@ protected: struct DrawingArea { - u32 top_left_x, top_left_y; - u32 bottom_right_x, bottom_right_y; + u32 left, top; + u32 right, bottom; } m_drawing_area = {}; struct DrawingOffset diff --git a/src/pse/gpu_hw.cpp b/src/pse/gpu_hw.cpp index a0c4f8107..c2d92fdb4 100644 --- a/src/pse/gpu_hw.cpp +++ b/src/pse/gpu_hw.cpp @@ -1,6 +1,8 @@ #include "gpu_hw.h" #include "YBaseLib/Assert.h" +#include "YBaseLib/Log.h" #include +Log_SetChannel(GPU_HW); GPU_HW::GPU_HW() = default; @@ -110,10 +112,10 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices) void GPU_HW::CalcScissorRect(int* left, int* top, int* right, int* bottom) { - *left = m_drawing_area.top_left_x; - *right = m_drawing_area.bottom_right_x + 1; - *top = m_drawing_area.top_left_y; - *bottom = m_drawing_area.bottom_right_y + 1; + *left = m_drawing_area.left; + *right = m_drawing_area.right + 1; + *top = m_drawing_area.top; + *bottom = m_drawing_area.bottom + 1; } static void DefineMacro(std::stringstream& ss, const char* name, bool enabled) @@ -176,7 +178,6 @@ in vec2 a_tex0; out vec4 v_col0; #if TEXTURED - uniform vec2 u_tex_scale; out vec2 v_tex0; #endif @@ -199,26 +200,75 @@ void main() return ss.str(); } -std::string GPU_HW::GenerateFragmentShader(bool textured, bool blending) +std::string GPU_HW::GenerateFragmentShader(bool textured, bool blending, TextureColorMode texture_color_mode) { std::stringstream ss; GenerateShaderHeader(ss); DefineMacro(ss, "TEXTURED", textured); DefineMacro(ss, "BLENDING", blending); + DefineMacro(ss, "PALETTE", + textured && (texture_color_mode == GPU::TextureColorMode::Palette4Bit || + texture_color_mode == GPU::TextureColorMode::Palette8Bit)); + DefineMacro(ss, "PALETTE_4_BIT", textured && texture_color_mode == GPU::TextureColorMode::Palette4Bit); + DefineMacro(ss, "PALETTE_8_BIT", textured && texture_color_mode == GPU::TextureColorMode::Palette8Bit); ss << R"( in vec4 v_col0; #if TEXTURED in vec2 v_tex0; uniform sampler2D samp0; + uniform ivec2 u_texture_page_base; + #if PALETTE + uniform ivec2 u_texture_palette_base; + #endif #endif out vec4 o_col0; +#if TEXTURED +vec4 SampleFromVRAM(vec2 coord) +{ + // from 0..1 to 0..255 + ivec2 icoord = ivec2(coord * vec2(255.0)); + + // adjust for tightly packed palette formats + ivec2 index_coord = icoord; + #if PALETTE_4_BIT + index_coord.x /= 4; + #elif PALETTE_8_BIT + index_coord.x /= 2; + #endif + + // fixup coords + ivec2 vicoord = ivec2(u_texture_page_base.x + index_coord.x, + fixYCoord(u_texture_page_base.y + index_coord.y)); + + // load colour/palette + vec4 color = texelFetch(samp0, vicoord & VRAM_COORD_MASK, 0); + + // apply palette + #if PALETTE + #if PALETTE_4_BIT + int subpixel = int(icoord.x) & 3; + uint vram_value = RGBA8ToRGBA5551(color); + int palette_index = int((vram_value >> (subpixel * 4)) & 0x0Fu); + #elif PALETTE_8_BIT + int subpixel = int(icoord.x) & 1; + uint vram_value = RGBA8ToRGBA5551(color); + int palette_index = int((vram_value >> (subpixel * 8)) & 0xFFu); + #endif + ivec2 palette_icoord = ivec2(u_texture_palette_base.x + palette_index, fixYCoord(u_texture_palette_base.y)); + color = texelFetch(samp0, palette_icoord & VRAM_COORD_MASK, 0); + #endif + + return color; +} +#endif + void main() { #if TEXTURED - vec4 texcol = texture(samp0, v_tex0); + vec4 texcol = SampleFromVRAM(v_tex0); if (texcol == vec4(0.0, 0.0, 0.0, 0.0)) discard; @@ -255,64 +305,6 @@ void main() return ss.str(); } -std::string GPU_HW::GenerateTexturePageFragmentShader(TextureColorMode mode) -{ - const bool is_palette = (mode == GPU::TextureColorMode::Palette4Bit || mode == GPU::TextureColorMode::Palette8Bit); - - std::stringstream ss; - GenerateShaderHeader(ss); - DefineMacro(ss, "PALETTE", is_palette); - DefineMacro(ss, "PALETTE_4_BIT", mode == GPU::TextureColorMode::Palette4Bit); - DefineMacro(ss, "PALETTE_8_BIT", mode == GPU::TextureColorMode::Palette8Bit); - - ss << R"( -uniform sampler2D samp0; -uniform ivec2 base_offset; - -#if PALETTE -uniform ivec2 palette_offset; -#endif - -in vec2 v_tex0; -out vec4 o_col0; - -void main() -{ - ivec2 local_coords = ivec2(gl_FragCoord.xy); - #if PALETTE_4_BIT - local_coords.x /= 4; - #elif PALETTE_8_BIT - local_coords.x /= 2; - #endif - - // fixup coords - ivec2 coords = ivec2(base_offset.x + local_coords.x, fixYCoord(base_offset.y + local_coords.y)); - - // load colour/palette - vec4 color = texelFetch(samp0, coords & VRAM_COORD_MASK, 0); - - // apply palette - #if PALETTE - #if PALETTE_4_BIT - int subpixel = int(gl_FragCoord.x) & 3; - uint vram_value = RGBA8ToRGBA5551(color); - int palette_index = int((vram_value >> (subpixel * 4)) & 0x0Fu); - #elif PALETTE_8_BIT - int subpixel = int(gl_FragCoord.x) & 1; - uint vram_value = RGBA8ToRGBA5551(color); - int palette_index = int((vram_value >> (subpixel * 8)) & 0xFFu); - #endif - ivec2 palette_coords = ivec2(palette_offset.x + palette_index, fixYCoord(palette_offset.y)); - color = texelFetch(samp0, palette_coords & VRAM_COORD_MASK, 0); - #endif - - o_col0 = color; -} -)"; - - return ss.str(); -} - std::string GPU_HW::GenerateFillFragmentShader() { std::stringstream ss; @@ -331,8 +323,6 @@ void main() return ss.str(); } -void GPU_HW::UpdateTexturePageTexture() {} - GPU_HW::HWRenderBatch::Primitive GPU_HW::GetPrimitiveForCommand(RenderCommand rc) { if (rc.primitive == Primitive::Line) @@ -343,6 +333,8 @@ GPU_HW::HWRenderBatch::Primitive GPU_HW::GetPrimitiveForCommand(RenderCommand rc return HWRenderBatch::Primitive::Triangles; } +void GPU_HW::InvalidateVRAMReadCache() {} + void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices) { if (rc.texture_enable) @@ -375,20 +367,46 @@ void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices) if (m_render_state.IsTextureChanged()) { if (!IsFlushed()) + { + // we only need to update the copy texture if the render area intersects with the texture page + const u32 texture_page_left = m_render_state.texture_page_x; + const u32 texture_page_right = m_render_state.texture_page_y + TEXTURE_PAGE_WIDTH; + const u32 texture_page_top = m_render_state.texture_page_y; + const u32 texture_page_bottom = texture_page_top + TEXTURE_PAGE_HEIGHT; + const bool texture_page_overlaps = + (texture_page_left < m_drawing_area.right && texture_page_right > m_drawing_area.left && + texture_page_top > m_drawing_area.bottom && texture_page_bottom < m_drawing_area.top); + + // TODO: Check palette too. + if (texture_page_overlaps) + { + Log_DebugPrintf("Invalidating VRAM read cache due to drawing area overlap"); + InvalidateVRAMReadCache(); + } + + // texture page changed? + // TODO: Move this to the shader... FlushRender(); - UpdateTexturePageTexture(); + } + m_render_state.ClearTextureChangedFlag(); } if (m_batch.transparency_enable && m_render_state.IsTransparencyModeChanged() && !IsFlushed()) FlushRender(); - - m_batch.transparency_mode = m_render_state.transparency_mode; m_render_state.ClearTransparencyModeChangedFlag(); + + m_batch.texture_color_mode = m_render_state.texture_color_mode; + m_batch.texture_page_x = m_render_state.texture_page_x; + m_batch.texture_page_y = m_render_state.texture_page_y; + m_batch.texture_palette_x = m_render_state.texture_palette_x; + m_batch.texture_palette_y = m_render_state.texture_palette_y; + m_batch.transparency_mode = m_render_state.transparency_mode; } } // extract state + const bool rc_transparency_enable = rc.transparency_enable; const bool rc_texture_enable = rc.texture_enable; const bool rc_texture_blend_enable = !rc.texture_blend_disable; const HWRenderBatch::Primitive rc_primitive = GetPrimitiveForCommand(rc); @@ -399,14 +417,15 @@ void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices) // including the degenerate triangles for strips const u32 max_added_vertices = num_vertices + 2; const bool params_changed = - (m_batch.texture_enable != rc_texture_enable || m_batch.texture_blending_enable != rc_texture_blend_enable || - m_batch.primitive != rc_primitive); + (m_batch.transparency_enable != rc_transparency_enable || m_batch.texture_enable != rc_texture_enable || + m_batch.texture_blending_enable != rc_texture_blend_enable || m_batch.primitive != rc_primitive); if ((m_batch.vertices.size() + max_added_vertices) >= MAX_BATCH_VERTEX_COUNT || params_changed) FlushRender(); } + m_batch.primitive = rc_primitive; + m_batch.transparency_enable = rc_transparency_enable; m_batch.texture_enable = rc_texture_enable; m_batch.texture_blending_enable = rc_texture_blend_enable; - m_batch.primitive = rc_primitive; LoadVertices(rc, num_vertices); } diff --git a/src/pse/gpu_hw.h b/src/pse/gpu_hw.h index a4763e929..d0f197c89 100644 --- a/src/pse/gpu_hw.h +++ b/src/pse/gpu_hw.h @@ -39,6 +39,11 @@ protected: bool transparency_enable; bool texture_enable; bool texture_blending_enable; + TextureColorMode texture_color_mode; + u32 texture_page_x; + u32 texture_page_y; + u32 texture_palette_x; + u32 texture_palette_y; TransparencyMode transparency_mode; std::vector vertices; @@ -46,6 +51,10 @@ protected: static constexpr u32 VERTEX_BUFFER_SIZE = 1 * 1024 * 1024; static constexpr u32 MAX_BATCH_VERTEX_COUNT = VERTEX_BUFFER_SIZE / sizeof(HWVertex); + static constexpr u32 TEXTURE_TILE_SIZE = 256; + static constexpr u32 TEXTURE_TILE_X_COUNT = VRAM_WIDTH / TEXTURE_TILE_SIZE; + static constexpr u32 TEXTURE_TILE_Y_COUNT = VRAM_HEIGHT / TEXTURE_TILE_SIZE; + static constexpr u32 TEXTURE_TILE_COUNT = TEXTURE_TILE_X_COUNT * TEXTURE_TILE_Y_COUNT; static constexpr std::tuple RGBA8ToFloat(u32 rgba) { @@ -55,7 +64,7 @@ protected: static_cast(rgba >> 24) * (1.0f / 255.0f)); } - virtual void UpdateTexturePageTexture(); + virtual void InvalidateVRAMReadCache(); bool IsFlushed() const { return m_batch.vertices.empty(); } @@ -64,9 +73,8 @@ protected: void CalcScissorRect(int* left, int* top, int* right, int* bottom); std::string GenerateVertexShader(bool textured); - std::string GenerateFragmentShader(bool textured, bool blending); + std::string GenerateFragmentShader(bool textured, bool blending, TextureColorMode texture_color_mode); std::string GenerateScreenQuadVertexShader(); - std::string GenerateTexturePageFragmentShader(TextureColorMode mode); std::string GenerateFillFragmentShader(); HWRenderBatch m_batch = {}; diff --git a/src/pse/gpu_hw_opengl.cpp b/src/pse/gpu_hw_opengl.cpp index 65e0af9ec..5fabe395b 100644 --- a/src/pse/gpu_hw_opengl.cpp +++ b/src/pse/gpu_hw_opengl.cpp @@ -43,7 +43,7 @@ void GPU_HW_OpenGL::RenderUI() ImGui::TextUnformatted("Texture Page Updates:"); ImGui::NextColumn(); - ImGui::Text("%u", m_stats.num_texture_page_updates); + ImGui::Text("%u", m_stats.num_vram_read_texture_updates); ImGui::NextColumn(); ImGui::TextUnformatted("Batches Drawn:"); @@ -64,6 +64,11 @@ void GPU_HW_OpenGL::RenderUI() m_stats = {}; } +void GPU_HW_OpenGL::InvalidateVRAMReadCache() +{ + m_vram_read_texture_dirty = true; +} + std::tuple GPU_HW_OpenGL::ConvertToFramebufferCoordinates(s32 x, s32 y) { return std::make_tuple(x, static_cast(static_cast(VRAM_HEIGHT) - y)); @@ -79,11 +84,11 @@ void GPU_HW_OpenGL::CreateFramebuffer() glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_framebuffer_texture->GetGLId(), 0); Assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE); - m_texture_page_texture = - std::make_unique(TEXTURE_PAGE_WIDTH, TEXTURE_PAGE_HEIGHT, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false); - glGenFramebuffers(1, &m_texture_page_fbo_id); - glBindFramebuffer(GL_FRAMEBUFFER, m_texture_page_fbo_id); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_texture_page_texture->GetGLId(), 0); + m_vram_read_texture = + std::make_unique(VRAM_WIDTH, VRAM_HEIGHT, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false); + glGenFramebuffers(1, &m_vram_read_fbo_id); + glBindFramebuffer(GL_FRAMEBUFFER, m_vram_read_fbo_id); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_vram_read_texture->GetGLId(), 0); Assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE); } @@ -100,9 +105,9 @@ void GPU_HW_OpenGL::ClearFramebuffer() void GPU_HW_OpenGL::DestroyFramebuffer() { - glDeleteFramebuffers(1, &m_texture_page_fbo_id); - m_texture_page_fbo_id = 0; - m_texture_page_texture.reset(); + glDeleteFramebuffers(1, &m_vram_read_fbo_id); + m_vram_read_fbo_id = 0; + m_vram_read_texture.reset(); glDeleteFramebuffers(1, &m_framebuffer_fbo_id); m_framebuffer_fbo_id = 0; @@ -132,41 +137,29 @@ void GPU_HW_OpenGL::CreateVertexBuffer() bool GPU_HW_OpenGL::CompilePrograms() { - bool result = true; - result &= CompileProgram(m_color_program, false, false); - result &= CompileProgram(m_texture_program, true, false); - result &= CompileProgram(m_blended_texture_program, true, true); - if (!result) - return false; - - const std::string screen_quad_vs = GenerateScreenQuadVertexShader(); - for (u32 palette_size = 0; palette_size < static_cast(m_texture_page_programs.size()); palette_size++) + for (u32 textured = 0; textured < 2; textured++) { - const std::string fs = GenerateTexturePageFragmentShader(static_cast(palette_size)); - - GL::Program& prog = m_texture_page_programs[palette_size]; - if (!prog.Compile(screen_quad_vs.c_str(), fs.c_str())) - return false; - - prog.BindFragData(0, "o_col0"); - - if (!prog.Link()) - return false; - - prog.RegisterUniform("samp0"); - prog.RegisterUniform("base_offset"); - prog.RegisterUniform("palette_offset"); - prog.Bind(); - prog.Uniform1i(0, 0); + for (u32 blending = 0; blending < 2; blending++) + { + for (u32 format = 0; format < 3; format++) + { + // TODO: eliminate duplicate shaders here + if (!CompileProgram(m_render_programs[textured][blending][format], ConvertToBoolUnchecked(textured), + ConvertToBoolUnchecked(blending), static_cast(format))) + { + return false; + } + } + } } return true; } -bool GPU_HW_OpenGL::CompileProgram(GL::Program& prog, bool textured, bool blending) +bool GPU_HW_OpenGL::CompileProgram(GL::Program& prog, bool textured, bool blending, TextureColorMode texture_color_mode) { const std::string vs = GenerateVertexShader(textured); - const std::string fs = GenerateFragmentShader(textured, blending); + const std::string fs = GenerateFragmentShader(textured, blending, texture_color_mode); if (!prog.Compile(vs.c_str(), fs.c_str())) return false; @@ -187,21 +180,29 @@ bool GPU_HW_OpenGL::CompileProgram(GL::Program& prog, bool textured, bool blendi if (textured) { prog.RegisterUniform("samp0"); + prog.RegisterUniform("u_texture_page_base"); + prog.RegisterUniform("u_texture_palette_base"); prog.Uniform1i(1, 0); } return true; } -void GPU_HW_OpenGL::SetProgram(bool textured, bool blending) +void GPU_HW_OpenGL::SetProgram() { - const GL::Program& prog = textured ? (blending ? m_blended_texture_program : m_texture_program) : m_color_program; + const GL::Program& prog = + m_render_programs[BoolToUInt32(m_batch.texture_enable)][BoolToUInt32(m_batch.texture_blending_enable)] + [static_cast(m_batch.texture_color_mode)]; prog.Bind(); - if (textured) - m_texture_page_texture->Bind(); - prog.Uniform2i(0, m_drawing_offset.x, m_drawing_offset.y); + + if (m_batch.texture_enable) + { + m_vram_read_texture->Bind(); + prog.Uniform2i(2, m_batch.texture_page_x, m_batch.texture_page_y); + prog.Uniform2i(3, m_batch.texture_palette_x, m_batch.texture_palette_y); + } } void GPU_HW_OpenGL::SetViewport() @@ -302,6 +303,8 @@ void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u16 color) const auto [r, g, b, a] = RGBA8ToFloat(RGBA5551ToRGBA8888(color)); glClearColor(r, g, b, a); glClear(GL_COLOR_BUFFER_BIT); + + InvalidateVRAMReadCache(); } void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) @@ -334,6 +337,8 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* // lower-left origin flip happens here glTexSubImage2D(GL_TEXTURE_2D, 0, x, VRAM_HEIGHT - y - height, width, height, GL_RGBA, GL_UNSIGNED_BYTE, rgba_data.data()); + + InvalidateVRAMReadCache(); } void GPU_HW_OpenGL::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) @@ -347,31 +352,18 @@ void GPU_HW_OpenGL::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wid glBindFramebuffer(GL_FRAMEBUFFER, m_framebuffer_fbo_id); glBlitFramebuffer(src_x, src_y, src_x + width, src_y + height, dst_x, dst_y, dst_x + width, dst_y + height, GL_COLOR_BUFFER_BIT, GL_NEAREST); + + InvalidateVRAMReadCache(); } -void GPU_HW_OpenGL::UpdateTexturePageTexture() +void GPU_HW_OpenGL::UpdateVRAMReadTexture() { - m_stats.num_texture_page_updates++; + m_stats.num_vram_read_texture_updates++; + m_vram_read_texture_dirty = false; - glBindFramebuffer(GL_FRAMEBUFFER, m_texture_page_fbo_id); - m_framebuffer_texture->Bind(); - - glDisable(GL_BLEND); - glDisable(GL_SCISSOR_TEST); - glViewport(0, 0, TEXTURE_PAGE_WIDTH, TEXTURE_PAGE_HEIGHT); - glBindVertexArray(m_attributeless_vao_id); - - const GL::Program& prog = m_texture_page_programs[static_cast(m_render_state.texture_color_mode)]; - prog.Bind(); - - prog.Uniform2i(1, m_render_state.texture_base_x, m_render_state.texture_base_y); - if (m_render_state.texture_color_mode >= GPU::TextureColorMode::Palette4Bit) - prog.Uniform2i(2, m_render_state.texture_palette_x, m_render_state.texture_palette_y); - - glDrawArrays(GL_TRIANGLES, 0, 3); - - m_framebuffer_texture->Unbind(); - glBindFramebuffer(GL_FRAMEBUFFER, m_framebuffer_fbo_id); + // TODO: Fallback blit path, and partial updates. + glCopyImageSubData(m_framebuffer_texture->GetGLId(), GL_TEXTURE_2D, 0, 0, 0, 0, m_vram_read_texture->GetGLId(), + GL_TEXTURE_2D, 0, 0, 0, 0, VRAM_WIDTH, VRAM_HEIGHT, 1); } void GPU_HW_OpenGL::FlushRender() @@ -379,6 +371,9 @@ void GPU_HW_OpenGL::FlushRender() if (m_batch.vertices.empty()) return; + if (m_vram_read_texture_dirty) + UpdateVRAMReadTexture(); + m_stats.num_batches++; m_stats.num_vertices += static_cast(m_batch.vertices.size()); @@ -386,7 +381,7 @@ void GPU_HW_OpenGL::FlushRender() glDisable(GL_DEPTH_TEST); glEnable(GL_SCISSOR_TEST); glDepthMask(GL_FALSE); - SetProgram(m_batch.texture_enable, m_batch.texture_blending_enable); + SetProgram(); SetViewport(); SetScissor(); SetBlendState(); diff --git a/src/pse/gpu_hw_opengl.h b/src/pse/gpu_hw_opengl.h index ebfef049d..c7b2e14ff 100644 --- a/src/pse/gpu_hw_opengl.h +++ b/src/pse/gpu_hw_opengl.h @@ -23,13 +23,13 @@ protected: void FillVRAM(u32 x, u32 y, u32 width, u32 height, u16 color) override; void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) override; void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; - void UpdateTexturePageTexture() override; void FlushRender() override; + void InvalidateVRAMReadCache() override; private: struct GLStats { - u32 num_texture_page_updates; + u32 num_vram_read_texture_updates; u32 num_batches; u32 num_vertices; }; @@ -39,13 +39,14 @@ private: void CreateFramebuffer(); void ClearFramebuffer(); void DestroyFramebuffer(); + void UpdateVRAMReadTexture(); void CreateVertexBuffer(); bool CompilePrograms(); - bool CompileProgram(GL::Program& prog, bool textured, bool blending); + bool CompileProgram(GL::Program& prog, bool textured, bool blending, TextureColorMode texture_color_mode); - void SetProgram(bool textured, bool blending); + void SetProgram(); void SetViewport(); void SetScissor(); void SetBlendState(); @@ -53,16 +54,15 @@ private: std::unique_ptr m_framebuffer_texture; GLuint m_framebuffer_fbo_id = 0; - std::unique_ptr m_texture_page_texture; - GLuint m_texture_page_fbo_id = 0; + std::unique_ptr m_vram_read_texture; + GLuint m_vram_read_fbo_id = 0; + bool m_vram_read_texture_dirty = true; GLuint m_vertex_buffer = 0; GLuint m_vao_id = 0; GLuint m_attributeless_vao_id = 0; - GL::Program m_texture_program; - GL::Program m_color_program; - GL::Program m_blended_texture_program; + std::array, 2>, 2> m_render_programs; std::array m_texture_page_programs; GLStats m_stats = {};