mirror of
https://github.com/RetroDECK/Duckstation.git
synced 2025-03-06 14:27:44 +00:00
GPU/HW: Support SSBOs instead of texture buffers for VRAM writes
This commit is contained in:
parent
eec37df1e0
commit
08ef8c1e8d
|
@ -433,7 +433,7 @@ bool GPU_HW_D3D11::CompileShaders()
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
m_vram_write_pixel_shader =
|
m_vram_write_pixel_shader =
|
||||||
m_shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateVRAMWriteFragmentShader());
|
m_shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateVRAMWriteFragmentShader(false));
|
||||||
if (!m_vram_write_pixel_shader)
|
if (!m_vram_write_pixel_shader)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
|
|
@ -181,7 +181,16 @@ void GPU_HW_OpenGL::SetCapabilities(HostDisplay* host_display)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
Log_WarningPrintf("Texture buffers are not supported, VRAM writes will be slower.");
|
// Try SSBOs.
|
||||||
|
GLint64 max_ssbo_size = 0;
|
||||||
|
if (GLAD_GL_VERSION_4_3 || GLAD_GL_ES_VERSION_3_1 || GLAD_GL_ARB_shader_storage_buffer_object)
|
||||||
|
glGetInteger64v(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &max_ssbo_size);
|
||||||
|
|
||||||
|
m_use_ssbo_for_vram_writes = (max_ssbo_size >= (VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16)));
|
||||||
|
if (m_use_ssbo_for_vram_writes)
|
||||||
|
Log_InfoPrintf("Using shader storage buffers for VRAM writes.");
|
||||||
|
else
|
||||||
|
Log_WarningPrintf("Texture buffers are not supported, VRAM writes will be slower.");
|
||||||
}
|
}
|
||||||
|
|
||||||
int max_dual_source_draw_buffers = 0;
|
int max_dual_source_draw_buffers = 0;
|
||||||
|
@ -484,10 +493,10 @@ bool GPU_HW_OpenGL::CompilePrograms()
|
||||||
prog->Uniform1i("samp0", 0);
|
prog->Uniform1i("samp0", 0);
|
||||||
m_vram_update_depth_program = std::move(*prog);
|
m_vram_update_depth_program = std::move(*prog);
|
||||||
|
|
||||||
if (m_supports_texture_buffer)
|
if (m_supports_texture_buffer || m_use_ssbo_for_vram_writes)
|
||||||
{
|
{
|
||||||
prog = m_shader_cache.GetProgram(shadergen.GenerateScreenQuadVertexShader(), {},
|
prog = m_shader_cache.GetProgram(shadergen.GenerateScreenQuadVertexShader(), {},
|
||||||
shadergen.GenerateVRAMWriteFragmentShader(),
|
shadergen.GenerateVRAMWriteFragmentShader(m_use_ssbo_for_vram_writes),
|
||||||
[this, use_binding_layout](GL::Program& prog) {
|
[this, use_binding_layout](GL::Program& prog) {
|
||||||
if (!IsGLES() && !use_binding_layout)
|
if (!IsGLES() && !use_binding_layout)
|
||||||
prog.BindFragData(0, "o_col0");
|
prog.BindFragData(0, "o_col0");
|
||||||
|
@ -751,7 +760,7 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
|
||||||
GPU_HW::UpdateVRAM(x, y, width, height, data);
|
GPU_HW::UpdateVRAM(x, y, width, height, data);
|
||||||
|
|
||||||
const u32 num_pixels = width * height;
|
const u32 num_pixels = width * height;
|
||||||
if (num_pixels < m_max_texture_buffer_size)
|
if (num_pixels < m_max_texture_buffer_size || m_use_ssbo_for_vram_writes)
|
||||||
{
|
{
|
||||||
const auto map_result = m_texture_stream_buffer->Map(sizeof(u16), num_pixels * sizeof(u16));
|
const auto map_result = m_texture_stream_buffer->Map(sizeof(u16), num_pixels * sizeof(u16));
|
||||||
std::memcpy(map_result.pointer, data, num_pixels * sizeof(u16));
|
std::memcpy(map_result.pointer, data, num_pixels * sizeof(u16));
|
||||||
|
@ -771,7 +780,10 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
|
||||||
glDepthFunc(m_GPUSTAT.check_mask_before_draw ? GL_GEQUAL : GL_ALWAYS);
|
glDepthFunc(m_GPUSTAT.check_mask_before_draw ? GL_GEQUAL : GL_ALWAYS);
|
||||||
|
|
||||||
m_vram_write_program.Bind();
|
m_vram_write_program.Bind();
|
||||||
glBindTexture(GL_TEXTURE_BUFFER, m_texture_buffer_r16ui_texture);
|
if (m_use_ssbo_for_vram_writes)
|
||||||
|
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_texture_stream_buffer->GetGLBufferId());
|
||||||
|
else
|
||||||
|
glBindTexture(GL_TEXTURE_BUFFER, m_texture_buffer_r16ui_texture);
|
||||||
|
|
||||||
const VRAMWriteUBOData uniforms = {x,
|
const VRAMWriteUBOData uniforms = {x,
|
||||||
flipped_y,
|
flipped_y,
|
||||||
|
|
|
@ -97,4 +97,5 @@ private:
|
||||||
|
|
||||||
bool m_supports_texture_buffer = false;
|
bool m_supports_texture_buffer = false;
|
||||||
bool m_supports_geometry_shaders = false;
|
bool m_supports_geometry_shaders = false;
|
||||||
|
bool m_use_ssbo_for_vram_writes = false;
|
||||||
};
|
};
|
||||||
|
|
|
@ -705,8 +705,7 @@ float4 SampleFromVRAM(uint4 texpage, float2 coords)
|
||||||
|
|
||||||
if (textured)
|
if (textured)
|
||||||
{
|
{
|
||||||
DeclareFragmentEntryPoint(ss, 1, 1, {{"nointerpolation", "uint4 v_texpage"}},
|
DeclareFragmentEntryPoint(ss, 1, 1, {{"nointerpolation", "uint4 v_texpage"}}, true, use_dual_source ? 2 : 1, true);
|
||||||
true, use_dual_source ? 2 : 1, true);
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -1167,7 +1166,7 @@ uint SampleVRAM(uint2 coords)
|
||||||
return ss.str();
|
return ss.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader()
|
std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader(bool use_ssbo)
|
||||||
{
|
{
|
||||||
std::stringstream ss;
|
std::stringstream ss;
|
||||||
WriteHeader(ss);
|
WriteHeader(ss);
|
||||||
|
@ -1177,7 +1176,26 @@ std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader()
|
||||||
{"uint2 u_base_coords", "uint2 u_size", "uint u_buffer_base_offset", "uint u_mask_or_bits", "float u_depth_value"},
|
{"uint2 u_base_coords", "uint2 u_size", "uint u_buffer_base_offset", "uint u_mask_or_bits", "float u_depth_value"},
|
||||||
true);
|
true);
|
||||||
|
|
||||||
DeclareTextureBuffer(ss, "samp0", 0, true, true);
|
if (use_ssbo && m_glsl)
|
||||||
|
{
|
||||||
|
ss << "layout(std430";
|
||||||
|
if (IsVulkan())
|
||||||
|
ss << ", set = 0, binding = 0";
|
||||||
|
else if (m_use_glsl_binding_layout)
|
||||||
|
ss << ", binding = 0";
|
||||||
|
|
||||||
|
ss << ") buffer SSBO {\n";
|
||||||
|
ss << " uint ssbo_data[];\n";
|
||||||
|
ss << "};\n\n";
|
||||||
|
|
||||||
|
ss << "#define GET_VALUE(buffer_offset) (ssbo_data[(buffer_offset) / 2u] >> (((buffer_offset) % 2u) * 16u))\n\n";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
DeclareTextureBuffer(ss, "samp0", 0, true, true);
|
||||||
|
ss << "#define GET_VALUE(buffer_offset) (LOAD_TEXTURE_BUFFER(samp0, int(buffer_offset)).r)\n\n";
|
||||||
|
}
|
||||||
|
|
||||||
DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1, true);
|
DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1, true);
|
||||||
ss << R"(
|
ss << R"(
|
||||||
{
|
{
|
||||||
|
@ -1190,7 +1208,7 @@ std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader()
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
uint buffer_offset = u_buffer_base_offset + (offset.y * u_size.x) + offset.x;
|
uint buffer_offset = u_buffer_base_offset + (offset.y * u_size.x) + offset.x;
|
||||||
uint value = LOAD_TEXTURE_BUFFER(samp0, int(buffer_offset)).r | u_mask_or_bits;
|
uint value = GET_VALUE(buffer_offset) | u_mask_or_bits;
|
||||||
|
|
||||||
o_col0 = RGBA5551ToRGBA8(value);
|
o_col0 = RGBA5551ToRGBA8(value);
|
||||||
o_depth = (o_col0.a == 1.0) ? u_depth_value : 0.0;
|
o_depth = (o_col0.a == 1.0) ? u_depth_value : 0.0;
|
||||||
|
|
|
@ -23,7 +23,7 @@ public:
|
||||||
std::string GenerateCopyFragmentShader();
|
std::string GenerateCopyFragmentShader();
|
||||||
std::string GenerateDisplayFragmentShader(bool depth_24bit, GPU_HW::InterlacedRenderMode interlace_mode);
|
std::string GenerateDisplayFragmentShader(bool depth_24bit, GPU_HW::InterlacedRenderMode interlace_mode);
|
||||||
std::string GenerateVRAMReadFragmentShader();
|
std::string GenerateVRAMReadFragmentShader();
|
||||||
std::string GenerateVRAMWriteFragmentShader();
|
std::string GenerateVRAMWriteFragmentShader(bool use_ssbo);
|
||||||
std::string GenerateVRAMCopyFragmentShader();
|
std::string GenerateVRAMCopyFragmentShader();
|
||||||
std::string GenerateVRAMUpdateDepthFragmentShader();
|
std::string GenerateVRAMUpdateDepthFragmentShader();
|
||||||
|
|
||||||
|
|
|
@ -747,7 +747,7 @@ bool GPU_HW_Vulkan::CompilePipelines()
|
||||||
|
|
||||||
// VRAM write
|
// VRAM write
|
||||||
{
|
{
|
||||||
VkShaderModule fs = g_vulkan_shader_cache->GetFragmentShader(shadergen.GenerateVRAMWriteFragmentShader());
|
VkShaderModule fs = g_vulkan_shader_cache->GetFragmentShader(shadergen.GenerateVRAMWriteFragmentShader(false));
|
||||||
if (fs == VK_NULL_HANDLE)
|
if (fs == VK_NULL_HANDLE)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue