diff --git a/src/core/gpu_hw_opengl.cpp b/src/core/gpu_hw_opengl.cpp index 2e417f44a..026eb4766 100644 --- a/src/core/gpu_hw_opengl.cpp +++ b/src/core/gpu_hw_opengl.cpp @@ -728,6 +728,21 @@ bool GPU_HW_OpenGL::CompilePrograms() m_downsample_program = std::move(*prog); } + prog = shader_cache.GetProgram(shadergen.GenerateUVQuadVertexShader(), {}, shadergen.GenerateSampleFragmentShader(), + [this, use_binding_layout](GL::Program& prog) { + if (!IsGLES() && !use_binding_layout) + prog.BindFragData(0, "o_col0"); + }); + if (!prog) + return false; + + if (!use_binding_layout) + { + prog->Bind(); + prog->Uniform1i("samp0", 0); + } + m_blit_program = std::move(*prog); + UPDATE_PROGRESS(); #undef UPDATE_PROGRESS @@ -1165,12 +1180,12 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* const u32 scaled_x = x * m_resolution_scale; const u32 scaled_y = y * m_resolution_scale; const u32 scaled_flipped_y = m_vram_texture.GetHeight() - scaled_y - scaled_height; - glDisable(GL_SCISSOR_TEST); - m_vram_encoding_texture.BindFramebuffer(GL_READ_FRAMEBUFFER); - glBlitFramebuffer(x, flipped_y, x + width, flipped_y + height, scaled_x, scaled_flipped_y, - scaled_x + scaled_width, scaled_flipped_y + scaled_height, GL_COLOR_BUFFER_BIT, GL_NEAREST); - glEnable(GL_SCISSOR_TEST); + + BlitTexture(m_vram_encoding_texture, x, flipped_y, width, height, scaled_x, scaled_flipped_y, scaled_width, + scaled_height); } + + RestoreGraphicsAPIState(); } } @@ -1201,6 +1216,7 @@ void GPU_HW_OpenGL::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wid dst_bounds_scaled.GetWidth(), dst_bounds_scaled.GetHeight()); m_vram_read_texture.Bind(); m_vram_copy_program.Bind(); + glBindVertexArray(m_attributeless_vao_id); glDrawArrays(GL_TRIANGLES, 0, 3); RestoreGraphicsAPIState(); @@ -1241,21 +1257,77 @@ void GPU_HW_OpenGL::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wid } else { - // glBlitFramebufer with same source/destination should be legal, but on Mali (at least Bifrost) it breaks. - // So, blit from the shadow texture, like in the other renderers. if (src_dirty) UpdateVRAMReadTexture(); - glDisable(GL_SCISSOR_TEST); - m_vram_read_texture.BindFramebuffer(GL_READ_FRAMEBUFFER); - glBlitFramebuffer(src_x, src_y, src_x + width, src_y + height, dst_x, dst_y, dst_x + width, dst_y + height, - GL_COLOR_BUFFER_BIT, GL_NEAREST); - glEnable(GL_SCISSOR_TEST); + CopyTexture(m_vram_texture, m_vram_fbo_id, m_vram_read_texture, src_x, src_y, dst_x, dst_y, width, height); } IncludeVRAMDirtyRectangle(dst_bounds); } +void GPU_HW_OpenGL::CopyTexture(GL::Texture& dest, GLuint dest_fbo, GL::Texture& src, u32 src_x, u32 src_y, u32 dst_x, + u32 dst_y, u32 width, u32 height) +{ + if (src.IsMultisampled()) + { + // The MSAA case still needs framebuffer blits. + dest.BindFramebuffer(GL_DRAW_FRAMEBUFFER); + glBindFramebuffer(GL_READ_FRAMEBUFFER, dest_fbo); + glDisable(GL_SCISSOR_TEST); + glBlitFramebuffer(src_x, src_y, src_x + width, src_y + height, dst_x, dst_y, dst_x + width, dst_y + height, + GL_COLOR_BUFFER_BIT, GL_NEAREST); + glEnable(GL_SCISSOR_TEST); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, dest_fbo); + return; + } + + if (GLAD_GL_VERSION_4_3) + { + glCopyImageSubData(src.GetGLId(), src.GetGLTarget(), 0, src_x, src_y, 0, dest.GetGLId(), src.GetGLTarget(), 0, + dst_x, dst_y, 0, width, height, 1); + } + else if (GLAD_GL_EXT_copy_image) + { + glCopyImageSubDataEXT(src.GetGLId(), src.GetGLTarget(), 0, src_x, src_y, 0, dest.GetGLId(), src.GetGLTarget(), 0, + dst_x, dst_y, 0, width, height, 1); + } + else if (GLAD_GL_OES_copy_image) + { + glCopyImageSubDataOES(src.GetGLId(), src.GetGLTarget(), 0, src_x, src_y, 0, dest.GetGLId(), src.GetGLTarget(), 0, + dst_x, dst_y, 0, width, height, 1); + } + else + { + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, dest_fbo); + BlitTexture(src, src_x, src_y, width, height, dst_x, dst_y, width, height); + RestoreGraphicsAPIState(); + } +} + +void GPU_HW_OpenGL::BlitTexture(GL::Texture& src, u32 src_x, u32 src_y, u32 src_width, u32 src_height, u32 dst_x, + u32 dst_y, u32 dst_width, u32 dst_height) +{ + // But a copy shader is probably better on mobile drivers. + const float uniforms[4] = { + static_cast(src_x) / static_cast(src.GetWidth()), + static_cast(src_y) / static_cast(src.GetHeight()), + static_cast(src_x + src_width) / static_cast(src.GetWidth()), + static_cast(src_y + src_height) / static_cast(src.GetHeight()), + }; + UploadUniformBuffer(uniforms, sizeof(uniforms)); + + glDisable(GL_SCISSOR_TEST); + glDisable(GL_BLEND); + SetDepthFunc(GL_ALWAYS); + + glViewport(dst_x, dst_y, dst_width, dst_height); + src.Bind(); + m_blit_program.Bind(); + glBindVertexArray(m_attributeless_vao_id); + glDrawArrays(GL_TRIANGLES, 0, 3); +} + void GPU_HW_OpenGL::UpdateVRAMReadTexture() { const auto scaled_rect = m_vram_dirty_rect * m_resolution_scale; @@ -1263,32 +1335,8 @@ void GPU_HW_OpenGL::UpdateVRAMReadTexture() const u32 height = scaled_rect.GetHeight(); const u32 x = scaled_rect.left; const u32 y = m_vram_texture.GetHeight() - scaled_rect.top - height; - const bool multisampled = m_vram_texture.IsMultisampled(); - if (!multisampled && GLAD_GL_VERSION_4_3) - { - glCopyImageSubData(m_vram_texture.GetGLId(), m_vram_texture.GetGLTarget(), 0, x, y, 0, - m_vram_read_texture.GetGLId(), m_vram_texture.GetGLTarget(), 0, x, y, 0, width, height, 1); - } - else if (!multisampled && GLAD_GL_EXT_copy_image) - { - glCopyImageSubDataEXT(m_vram_texture.GetGLId(), m_vram_texture.GetGLTarget(), 0, x, y, 0, - m_vram_read_texture.GetGLId(), m_vram_texture.GetGLTarget(), 0, x, y, 0, width, height, 1); - } - else if (!multisampled && GLAD_GL_OES_copy_image) - { - glCopyImageSubDataOES(m_vram_texture.GetGLId(), m_vram_texture.GetGLTarget(), 0, x, y, 0, - m_vram_read_texture.GetGLId(), m_vram_texture.GetGLTarget(), 0, x, y, 0, width, height, 1); - } - else - { - m_vram_read_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER); - glBindFramebuffer(GL_READ_FRAMEBUFFER, m_vram_fbo_id); - glDisable(GL_SCISSOR_TEST); - glBlitFramebuffer(x, y, x + width, y + height, x, y, x + width, y + height, GL_COLOR_BUFFER_BIT, GL_NEAREST); - glEnable(GL_SCISSOR_TEST); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_vram_fbo_id); - } + CopyTexture(m_vram_read_texture, m_vram_read_texture.GetGLFramebufferID(), m_vram_texture, x, y, x, y, width, height); GPU_HW::UpdateVRAMReadTexture(); } diff --git a/src/core/gpu_hw_opengl.h b/src/core/gpu_hw_opengl.h index 1d6751f7f..ce4380ee4 100644 --- a/src/core/gpu_hw_opengl.h +++ b/src/core/gpu_hw_opengl.h @@ -76,6 +76,10 @@ private: bool BlitVRAMReplacementTexture(const TextureReplacementTexture* tex, u32 dst_x, u32 dst_y, u32 width, u32 height); void DownsampleFramebuffer(GL::Texture& source, u32 left, u32 top, u32 width, u32 height); void DownsampleFramebufferBoxFilter(GL::Texture& source, u32 left, u32 top, u32 width, u32 height); + void CopyTexture(GL::Texture& dest, GLuint dest_fbo, GL::Texture& src, u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, + u32 width, u32 height); + void BlitTexture(GL::Texture& src, u32 src_x, u32 src_y, u32 src_width, u32 src_height, u32 dst_x, u32 dst_y, + u32 dst_width, u32 dst_height); // downsample texture - used for readbacks at >1xIR. GL::Texture m_vram_texture; @@ -104,6 +108,7 @@ private: GL::Program m_vram_write_program; GL::Program m_vram_copy_program; GL::Program m_vram_update_depth_program; + GL::Program m_blit_program; u32 m_uniform_buffer_alignment = 1; u32 m_texture_stream_buffer_size = 0; diff --git a/src/core/shadergen.cpp b/src/core/shadergen.cpp index b75028604..25748c47c 100644 --- a/src/core/shadergen.cpp +++ b/src/core/shadergen.cpp @@ -617,3 +617,19 @@ std::string ShaderGen::GenerateCopyFragmentShader() return ss.str(); } + +std::string ShaderGen::GenerateSampleFragmentShader() +{ + std::stringstream ss; + WriteHeader(ss); + DeclareTexture(ss, "samp0", 0); + DeclareFragmentEntryPoint(ss, 0, 1, {}, false, 1); + + ss << R"( +{ + o_col0 = SAMPLE_TEXTURE(samp0, v_tex0); +} +)"; + + return ss.str(); +} diff --git a/src/core/shadergen.h b/src/core/shadergen.h index d93a5c73b..f3a9196c9 100644 --- a/src/core/shadergen.h +++ b/src/core/shadergen.h @@ -16,6 +16,7 @@ public: std::string GenerateUVQuadVertexShader(); std::string GenerateFillFragmentShader(); std::string GenerateCopyFragmentShader(); + std::string GenerateSampleFragmentShader(); protected: ALWAYS_INLINE bool IsVulkan() const { return (m_render_api == HostDisplay::RenderAPI::Vulkan); }