GPU/OpenGL: Use shader instead of framebuffer blit

This commit is contained in:
Connor McLaughlin 2021-04-06 02:19:56 +10:00
parent 57830bdd36
commit 2ed482132d
4 changed files with 107 additions and 37 deletions

View file

@ -728,6 +728,21 @@ bool GPU_HW_OpenGL::CompilePrograms()
m_downsample_program = std::move(*prog);
}
prog = shader_cache.GetProgram(shadergen.GenerateUVQuadVertexShader(), {}, shadergen.GenerateSampleFragmentShader(),
[this, use_binding_layout](GL::Program& prog) {
if (!IsGLES() && !use_binding_layout)
prog.BindFragData(0, "o_col0");
});
if (!prog)
return false;
if (!use_binding_layout)
{
prog->Bind();
prog->Uniform1i("samp0", 0);
}
m_blit_program = std::move(*prog);
UPDATE_PROGRESS();
#undef UPDATE_PROGRESS
@ -1165,12 +1180,12 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
const u32 scaled_x = x * m_resolution_scale;
const u32 scaled_y = y * m_resolution_scale;
const u32 scaled_flipped_y = m_vram_texture.GetHeight() - scaled_y - scaled_height;
glDisable(GL_SCISSOR_TEST);
m_vram_encoding_texture.BindFramebuffer(GL_READ_FRAMEBUFFER);
glBlitFramebuffer(x, flipped_y, x + width, flipped_y + height, scaled_x, scaled_flipped_y,
scaled_x + scaled_width, scaled_flipped_y + scaled_height, GL_COLOR_BUFFER_BIT, GL_NEAREST);
glEnable(GL_SCISSOR_TEST);
BlitTexture(m_vram_encoding_texture, x, flipped_y, width, height, scaled_x, scaled_flipped_y, scaled_width,
scaled_height);
}
RestoreGraphicsAPIState();
}
}
@ -1201,6 +1216,7 @@ void GPU_HW_OpenGL::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wid
dst_bounds_scaled.GetWidth(), dst_bounds_scaled.GetHeight());
m_vram_read_texture.Bind();
m_vram_copy_program.Bind();
glBindVertexArray(m_attributeless_vao_id);
glDrawArrays(GL_TRIANGLES, 0, 3);
RestoreGraphicsAPIState();
@ -1241,21 +1257,77 @@ void GPU_HW_OpenGL::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wid
}
else
{
// glBlitFramebufer with same source/destination should be legal, but on Mali (at least Bifrost) it breaks.
// So, blit from the shadow texture, like in the other renderers.
if (src_dirty)
UpdateVRAMReadTexture();
glDisable(GL_SCISSOR_TEST);
m_vram_read_texture.BindFramebuffer(GL_READ_FRAMEBUFFER);
glBlitFramebuffer(src_x, src_y, src_x + width, src_y + height, dst_x, dst_y, dst_x + width, dst_y + height,
GL_COLOR_BUFFER_BIT, GL_NEAREST);
glEnable(GL_SCISSOR_TEST);
CopyTexture(m_vram_texture, m_vram_fbo_id, m_vram_read_texture, src_x, src_y, dst_x, dst_y, width, height);
}
IncludeVRAMDirtyRectangle(dst_bounds);
}
void GPU_HW_OpenGL::CopyTexture(GL::Texture& dest, GLuint dest_fbo, GL::Texture& src, u32 src_x, u32 src_y, u32 dst_x,
u32 dst_y, u32 width, u32 height)
{
if (src.IsMultisampled())
{
// The MSAA case still needs framebuffer blits.
dest.BindFramebuffer(GL_DRAW_FRAMEBUFFER);
glBindFramebuffer(GL_READ_FRAMEBUFFER, dest_fbo);
glDisable(GL_SCISSOR_TEST);
glBlitFramebuffer(src_x, src_y, src_x + width, src_y + height, dst_x, dst_y, dst_x + width, dst_y + height,
GL_COLOR_BUFFER_BIT, GL_NEAREST);
glEnable(GL_SCISSOR_TEST);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, dest_fbo);
return;
}
if (GLAD_GL_VERSION_4_3)
{
glCopyImageSubData(src.GetGLId(), src.GetGLTarget(), 0, src_x, src_y, 0, dest.GetGLId(), src.GetGLTarget(), 0,
dst_x, dst_y, 0, width, height, 1);
}
else if (GLAD_GL_EXT_copy_image)
{
glCopyImageSubDataEXT(src.GetGLId(), src.GetGLTarget(), 0, src_x, src_y, 0, dest.GetGLId(), src.GetGLTarget(), 0,
dst_x, dst_y, 0, width, height, 1);
}
else if (GLAD_GL_OES_copy_image)
{
glCopyImageSubDataOES(src.GetGLId(), src.GetGLTarget(), 0, src_x, src_y, 0, dest.GetGLId(), src.GetGLTarget(), 0,
dst_x, dst_y, 0, width, height, 1);
}
else
{
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, dest_fbo);
BlitTexture(src, src_x, src_y, width, height, dst_x, dst_y, width, height);
RestoreGraphicsAPIState();
}
}
void GPU_HW_OpenGL::BlitTexture(GL::Texture& src, u32 src_x, u32 src_y, u32 src_width, u32 src_height, u32 dst_x,
u32 dst_y, u32 dst_width, u32 dst_height)
{
// But a copy shader is probably better on mobile drivers.
const float uniforms[4] = {
static_cast<float>(src_x) / static_cast<float>(src.GetWidth()),
static_cast<float>(src_y) / static_cast<float>(src.GetHeight()),
static_cast<float>(src_x + src_width) / static_cast<float>(src.GetWidth()),
static_cast<float>(src_y + src_height) / static_cast<float>(src.GetHeight()),
};
UploadUniformBuffer(uniforms, sizeof(uniforms));
glDisable(GL_SCISSOR_TEST);
glDisable(GL_BLEND);
SetDepthFunc(GL_ALWAYS);
glViewport(dst_x, dst_y, dst_width, dst_height);
src.Bind();
m_blit_program.Bind();
glBindVertexArray(m_attributeless_vao_id);
glDrawArrays(GL_TRIANGLES, 0, 3);
}
void GPU_HW_OpenGL::UpdateVRAMReadTexture()
{
const auto scaled_rect = m_vram_dirty_rect * m_resolution_scale;
@ -1263,32 +1335,8 @@ void GPU_HW_OpenGL::UpdateVRAMReadTexture()
const u32 height = scaled_rect.GetHeight();
const u32 x = scaled_rect.left;
const u32 y = m_vram_texture.GetHeight() - scaled_rect.top - height;
const bool multisampled = m_vram_texture.IsMultisampled();
if (!multisampled && GLAD_GL_VERSION_4_3)
{
glCopyImageSubData(m_vram_texture.GetGLId(), m_vram_texture.GetGLTarget(), 0, x, y, 0,
m_vram_read_texture.GetGLId(), m_vram_texture.GetGLTarget(), 0, x, y, 0, width, height, 1);
}
else if (!multisampled && GLAD_GL_EXT_copy_image)
{
glCopyImageSubDataEXT(m_vram_texture.GetGLId(), m_vram_texture.GetGLTarget(), 0, x, y, 0,
m_vram_read_texture.GetGLId(), m_vram_texture.GetGLTarget(), 0, x, y, 0, width, height, 1);
}
else if (!multisampled && GLAD_GL_OES_copy_image)
{
glCopyImageSubDataOES(m_vram_texture.GetGLId(), m_vram_texture.GetGLTarget(), 0, x, y, 0,
m_vram_read_texture.GetGLId(), m_vram_texture.GetGLTarget(), 0, x, y, 0, width, height, 1);
}
else
{
m_vram_read_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER);
glBindFramebuffer(GL_READ_FRAMEBUFFER, m_vram_fbo_id);
glDisable(GL_SCISSOR_TEST);
glBlitFramebuffer(x, y, x + width, y + height, x, y, x + width, y + height, GL_COLOR_BUFFER_BIT, GL_NEAREST);
glEnable(GL_SCISSOR_TEST);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_vram_fbo_id);
}
CopyTexture(m_vram_read_texture, m_vram_read_texture.GetGLFramebufferID(), m_vram_texture, x, y, x, y, width, height);
GPU_HW::UpdateVRAMReadTexture();
}

View file

@ -76,6 +76,10 @@ private:
bool BlitVRAMReplacementTexture(const TextureReplacementTexture* tex, u32 dst_x, u32 dst_y, u32 width, u32 height);
void DownsampleFramebuffer(GL::Texture& source, u32 left, u32 top, u32 width, u32 height);
void DownsampleFramebufferBoxFilter(GL::Texture& source, u32 left, u32 top, u32 width, u32 height);
void CopyTexture(GL::Texture& dest, GLuint dest_fbo, GL::Texture& src, u32 src_x, u32 src_y, u32 dst_x, u32 dst_y,
u32 width, u32 height);
void BlitTexture(GL::Texture& src, u32 src_x, u32 src_y, u32 src_width, u32 src_height, u32 dst_x, u32 dst_y,
u32 dst_width, u32 dst_height);
// downsample texture - used for readbacks at >1xIR.
GL::Texture m_vram_texture;
@ -104,6 +108,7 @@ private:
GL::Program m_vram_write_program;
GL::Program m_vram_copy_program;
GL::Program m_vram_update_depth_program;
GL::Program m_blit_program;
u32 m_uniform_buffer_alignment = 1;
u32 m_texture_stream_buffer_size = 0;

View file

@ -617,3 +617,19 @@ std::string ShaderGen::GenerateCopyFragmentShader()
return ss.str();
}
std::string ShaderGen::GenerateSampleFragmentShader()
{
std::stringstream ss;
WriteHeader(ss);
DeclareTexture(ss, "samp0", 0);
DeclareFragmentEntryPoint(ss, 0, 1, {}, false, 1);
ss << R"(
{
o_col0 = SAMPLE_TEXTURE(samp0, v_tex0);
}
)";
return ss.str();
}

View file

@ -16,6 +16,7 @@ public:
std::string GenerateUVQuadVertexShader();
std::string GenerateFillFragmentShader();
std::string GenerateCopyFragmentShader();
std::string GenerateSampleFragmentShader();
protected:
ALWAYS_INLINE bool IsVulkan() const { return (m_render_api == HostDisplay::RenderAPI::Vulkan); }