From 08ef8c1e8d09f2dc1a74aeb7c9c29771fff99d3b Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Wed, 24 Jun 2020 01:39:53 +1000 Subject: [PATCH] GPU/HW: Support SSBOs instead of texture buffers for VRAM writes --- src/core/gpu_hw_d3d11.cpp | 2 +- src/core/gpu_hw_opengl.cpp | 22 +++++++++++++++++----- src/core/gpu_hw_opengl.h | 1 + src/core/gpu_hw_shadergen.cpp | 28 +++++++++++++++++++++++----- src/core/gpu_hw_shadergen.h | 2 +- src/core/gpu_hw_vulkan.cpp | 2 +- 6 files changed, 44 insertions(+), 13 deletions(-) diff --git a/src/core/gpu_hw_d3d11.cpp b/src/core/gpu_hw_d3d11.cpp index ad2144ef2..aab4a6b09 100644 --- a/src/core/gpu_hw_d3d11.cpp +++ b/src/core/gpu_hw_d3d11.cpp @@ -433,7 +433,7 @@ bool GPU_HW_D3D11::CompileShaders() return false; m_vram_write_pixel_shader = - m_shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateVRAMWriteFragmentShader()); + m_shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateVRAMWriteFragmentShader(false)); if (!m_vram_write_pixel_shader) return false; diff --git a/src/core/gpu_hw_opengl.cpp b/src/core/gpu_hw_opengl.cpp index 94ed29028..505c0a283 100644 --- a/src/core/gpu_hw_opengl.cpp +++ b/src/core/gpu_hw_opengl.cpp @@ -181,7 +181,16 @@ void GPU_HW_OpenGL::SetCapabilities(HostDisplay* host_display) } else { - Log_WarningPrintf("Texture buffers are not supported, VRAM writes will be slower."); + // Try SSBOs. + GLint64 max_ssbo_size = 0; + if (GLAD_GL_VERSION_4_3 || GLAD_GL_ES_VERSION_3_1 || GLAD_GL_ARB_shader_storage_buffer_object) + glGetInteger64v(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &max_ssbo_size); + + m_use_ssbo_for_vram_writes = (max_ssbo_size >= (VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16))); + if (m_use_ssbo_for_vram_writes) + Log_InfoPrintf("Using shader storage buffers for VRAM writes."); + else + Log_WarningPrintf("Texture buffers are not supported, VRAM writes will be slower."); } int max_dual_source_draw_buffers = 0; @@ -484,10 +493,10 @@ bool GPU_HW_OpenGL::CompilePrograms() prog->Uniform1i("samp0", 0); m_vram_update_depth_program = std::move(*prog); - if (m_supports_texture_buffer) + if (m_supports_texture_buffer || m_use_ssbo_for_vram_writes) { prog = m_shader_cache.GetProgram(shadergen.GenerateScreenQuadVertexShader(), {}, - shadergen.GenerateVRAMWriteFragmentShader(), + shadergen.GenerateVRAMWriteFragmentShader(m_use_ssbo_for_vram_writes), [this, use_binding_layout](GL::Program& prog) { if (!IsGLES() && !use_binding_layout) prog.BindFragData(0, "o_col0"); @@ -751,7 +760,7 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* GPU_HW::UpdateVRAM(x, y, width, height, data); const u32 num_pixels = width * height; - if (num_pixels < m_max_texture_buffer_size) + if (num_pixels < m_max_texture_buffer_size || m_use_ssbo_for_vram_writes) { const auto map_result = m_texture_stream_buffer->Map(sizeof(u16), num_pixels * sizeof(u16)); std::memcpy(map_result.pointer, data, num_pixels * sizeof(u16)); @@ -771,7 +780,10 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* glDepthFunc(m_GPUSTAT.check_mask_before_draw ? GL_GEQUAL : GL_ALWAYS); m_vram_write_program.Bind(); - glBindTexture(GL_TEXTURE_BUFFER, m_texture_buffer_r16ui_texture); + if (m_use_ssbo_for_vram_writes) + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_texture_stream_buffer->GetGLBufferId()); + else + glBindTexture(GL_TEXTURE_BUFFER, m_texture_buffer_r16ui_texture); const VRAMWriteUBOData uniforms = {x, flipped_y, diff --git a/src/core/gpu_hw_opengl.h b/src/core/gpu_hw_opengl.h index 0a1d0b607..5e6dc9ee9 100644 --- a/src/core/gpu_hw_opengl.h +++ b/src/core/gpu_hw_opengl.h @@ -97,4 +97,5 @@ private: bool m_supports_texture_buffer = false; bool m_supports_geometry_shaders = false; + bool m_use_ssbo_for_vram_writes = false; }; diff --git a/src/core/gpu_hw_shadergen.cpp b/src/core/gpu_hw_shadergen.cpp index f70e900dc..5358c2899 100644 --- a/src/core/gpu_hw_shadergen.cpp +++ b/src/core/gpu_hw_shadergen.cpp @@ -705,8 +705,7 @@ float4 SampleFromVRAM(uint4 texpage, float2 coords) if (textured) { - DeclareFragmentEntryPoint(ss, 1, 1, {{"nointerpolation", "uint4 v_texpage"}}, - true, use_dual_source ? 2 : 1, true); + DeclareFragmentEntryPoint(ss, 1, 1, {{"nointerpolation", "uint4 v_texpage"}}, true, use_dual_source ? 2 : 1, true); } else { @@ -1167,7 +1166,7 @@ uint SampleVRAM(uint2 coords) return ss.str(); } -std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader() +std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader(bool use_ssbo) { std::stringstream ss; WriteHeader(ss); @@ -1177,7 +1176,26 @@ std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader() {"uint2 u_base_coords", "uint2 u_size", "uint u_buffer_base_offset", "uint u_mask_or_bits", "float u_depth_value"}, true); - DeclareTextureBuffer(ss, "samp0", 0, true, true); + if (use_ssbo && m_glsl) + { + ss << "layout(std430"; + if (IsVulkan()) + ss << ", set = 0, binding = 0"; + else if (m_use_glsl_binding_layout) + ss << ", binding = 0"; + + ss << ") buffer SSBO {\n"; + ss << " uint ssbo_data[];\n"; + ss << "};\n\n"; + + ss << "#define GET_VALUE(buffer_offset) (ssbo_data[(buffer_offset) / 2u] >> (((buffer_offset) % 2u) * 16u))\n\n"; + } + else + { + DeclareTextureBuffer(ss, "samp0", 0, true, true); + ss << "#define GET_VALUE(buffer_offset) (LOAD_TEXTURE_BUFFER(samp0, int(buffer_offset)).r)\n\n"; + } + DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1, true); ss << R"( { @@ -1190,7 +1208,7 @@ std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader() #endif uint buffer_offset = u_buffer_base_offset + (offset.y * u_size.x) + offset.x; - uint value = LOAD_TEXTURE_BUFFER(samp0, int(buffer_offset)).r | u_mask_or_bits; + uint value = GET_VALUE(buffer_offset) | u_mask_or_bits; o_col0 = RGBA5551ToRGBA8(value); o_depth = (o_col0.a == 1.0) ? u_depth_value : 0.0; diff --git a/src/core/gpu_hw_shadergen.h b/src/core/gpu_hw_shadergen.h index 6f6624166..69cd4383b 100644 --- a/src/core/gpu_hw_shadergen.h +++ b/src/core/gpu_hw_shadergen.h @@ -23,7 +23,7 @@ public: std::string GenerateCopyFragmentShader(); std::string GenerateDisplayFragmentShader(bool depth_24bit, GPU_HW::InterlacedRenderMode interlace_mode); std::string GenerateVRAMReadFragmentShader(); - std::string GenerateVRAMWriteFragmentShader(); + std::string GenerateVRAMWriteFragmentShader(bool use_ssbo); std::string GenerateVRAMCopyFragmentShader(); std::string GenerateVRAMUpdateDepthFragmentShader(); diff --git a/src/core/gpu_hw_vulkan.cpp b/src/core/gpu_hw_vulkan.cpp index 8289e4f3c..360a29bd3 100644 --- a/src/core/gpu_hw_vulkan.cpp +++ b/src/core/gpu_hw_vulkan.cpp @@ -747,7 +747,7 @@ bool GPU_HW_Vulkan::CompilePipelines() // VRAM write { - VkShaderModule fs = g_vulkan_shader_cache->GetFragmentShader(shadergen.GenerateVRAMWriteFragmentShader()); + VkShaderModule fs = g_vulkan_shader_cache->GetFragmentShader(shadergen.GenerateVRAMWriteFragmentShader(false)); if (fs == VK_NULL_HANDLE) return false;