GPU/HW: Support SSBOs instead of texture buffers for VRAM writes

This commit is contained in:
Connor McLaughlin 2020-06-24 01:39:53 +10:00
parent eec37df1e0
commit 08ef8c1e8d
6 changed files with 44 additions and 13 deletions

View file

@ -433,7 +433,7 @@ bool GPU_HW_D3D11::CompileShaders()
return false;
m_vram_write_pixel_shader =
m_shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateVRAMWriteFragmentShader());
m_shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateVRAMWriteFragmentShader(false));
if (!m_vram_write_pixel_shader)
return false;

View file

@ -181,7 +181,16 @@ void GPU_HW_OpenGL::SetCapabilities(HostDisplay* host_display)
}
else
{
Log_WarningPrintf("Texture buffers are not supported, VRAM writes will be slower.");
// Try SSBOs.
GLint64 max_ssbo_size = 0;
if (GLAD_GL_VERSION_4_3 || GLAD_GL_ES_VERSION_3_1 || GLAD_GL_ARB_shader_storage_buffer_object)
glGetInteger64v(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &max_ssbo_size);
m_use_ssbo_for_vram_writes = (max_ssbo_size >= (VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16)));
if (m_use_ssbo_for_vram_writes)
Log_InfoPrintf("Using shader storage buffers for VRAM writes.");
else
Log_WarningPrintf("Texture buffers are not supported, VRAM writes will be slower.");
}
int max_dual_source_draw_buffers = 0;
@ -484,10 +493,10 @@ bool GPU_HW_OpenGL::CompilePrograms()
prog->Uniform1i("samp0", 0);
m_vram_update_depth_program = std::move(*prog);
if (m_supports_texture_buffer)
if (m_supports_texture_buffer || m_use_ssbo_for_vram_writes)
{
prog = m_shader_cache.GetProgram(shadergen.GenerateScreenQuadVertexShader(), {},
shadergen.GenerateVRAMWriteFragmentShader(),
shadergen.GenerateVRAMWriteFragmentShader(m_use_ssbo_for_vram_writes),
[this, use_binding_layout](GL::Program& prog) {
if (!IsGLES() && !use_binding_layout)
prog.BindFragData(0, "o_col0");
@ -751,7 +760,7 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
GPU_HW::UpdateVRAM(x, y, width, height, data);
const u32 num_pixels = width * height;
if (num_pixels < m_max_texture_buffer_size)
if (num_pixels < m_max_texture_buffer_size || m_use_ssbo_for_vram_writes)
{
const auto map_result = m_texture_stream_buffer->Map(sizeof(u16), num_pixels * sizeof(u16));
std::memcpy(map_result.pointer, data, num_pixels * sizeof(u16));
@ -771,7 +780,10 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
glDepthFunc(m_GPUSTAT.check_mask_before_draw ? GL_GEQUAL : GL_ALWAYS);
m_vram_write_program.Bind();
glBindTexture(GL_TEXTURE_BUFFER, m_texture_buffer_r16ui_texture);
if (m_use_ssbo_for_vram_writes)
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_texture_stream_buffer->GetGLBufferId());
else
glBindTexture(GL_TEXTURE_BUFFER, m_texture_buffer_r16ui_texture);
const VRAMWriteUBOData uniforms = {x,
flipped_y,

View file

@ -97,4 +97,5 @@ private:
bool m_supports_texture_buffer = false;
bool m_supports_geometry_shaders = false;
bool m_use_ssbo_for_vram_writes = false;
};

View file

@ -705,8 +705,7 @@ float4 SampleFromVRAM(uint4 texpage, float2 coords)
if (textured)
{
DeclareFragmentEntryPoint(ss, 1, 1, {{"nointerpolation", "uint4 v_texpage"}},
true, use_dual_source ? 2 : 1, true);
DeclareFragmentEntryPoint(ss, 1, 1, {{"nointerpolation", "uint4 v_texpage"}}, true, use_dual_source ? 2 : 1, true);
}
else
{
@ -1167,7 +1166,7 @@ uint SampleVRAM(uint2 coords)
return ss.str();
}
std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader()
std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader(bool use_ssbo)
{
std::stringstream ss;
WriteHeader(ss);
@ -1177,7 +1176,26 @@ std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader()
{"uint2 u_base_coords", "uint2 u_size", "uint u_buffer_base_offset", "uint u_mask_or_bits", "float u_depth_value"},
true);
DeclareTextureBuffer(ss, "samp0", 0, true, true);
if (use_ssbo && m_glsl)
{
ss << "layout(std430";
if (IsVulkan())
ss << ", set = 0, binding = 0";
else if (m_use_glsl_binding_layout)
ss << ", binding = 0";
ss << ") buffer SSBO {\n";
ss << " uint ssbo_data[];\n";
ss << "};\n\n";
ss << "#define GET_VALUE(buffer_offset) (ssbo_data[(buffer_offset) / 2u] >> (((buffer_offset) % 2u) * 16u))\n\n";
}
else
{
DeclareTextureBuffer(ss, "samp0", 0, true, true);
ss << "#define GET_VALUE(buffer_offset) (LOAD_TEXTURE_BUFFER(samp0, int(buffer_offset)).r)\n\n";
}
DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1, true);
ss << R"(
{
@ -1190,7 +1208,7 @@ std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader()
#endif
uint buffer_offset = u_buffer_base_offset + (offset.y * u_size.x) + offset.x;
uint value = LOAD_TEXTURE_BUFFER(samp0, int(buffer_offset)).r | u_mask_or_bits;
uint value = GET_VALUE(buffer_offset) | u_mask_or_bits;
o_col0 = RGBA5551ToRGBA8(value);
o_depth = (o_col0.a == 1.0) ? u_depth_value : 0.0;

View file

@ -23,7 +23,7 @@ public:
std::string GenerateCopyFragmentShader();
std::string GenerateDisplayFragmentShader(bool depth_24bit, GPU_HW::InterlacedRenderMode interlace_mode);
std::string GenerateVRAMReadFragmentShader();
std::string GenerateVRAMWriteFragmentShader();
std::string GenerateVRAMWriteFragmentShader(bool use_ssbo);
std::string GenerateVRAMCopyFragmentShader();
std::string GenerateVRAMUpdateDepthFragmentShader();

View file

@ -747,7 +747,7 @@ bool GPU_HW_Vulkan::CompilePipelines()
// VRAM write
{
VkShaderModule fs = g_vulkan_shader_cache->GetFragmentShader(shadergen.GenerateVRAMWriteFragmentShader());
VkShaderModule fs = g_vulkan_shader_cache->GetFragmentShader(shadergen.GenerateVRAMWriteFragmentShader(false));
if (fs == VK_NULL_HANDLE)
return false;