mirror of
https://github.com/RetroDECK/Duckstation.git
synced 2025-01-17 22:25:37 +00:00
GPU/HW: Use uniform blocks for batch rendering
This commit is contained in:
parent
e3a9aa1c4f
commit
91c99f0226
|
@ -24,7 +24,7 @@ void StreamBuffer::Unbind()
|
|||
|
||||
StreamBuffer::MappingResult StreamBuffer::Map(u32 alignment, u32 min_size)
|
||||
{
|
||||
return MappingResult{static_cast<void*>(m_cpu_buffer.data()), 0, m_size / alignment};
|
||||
return MappingResult{static_cast<void*>(m_cpu_buffer.data()), 0, 0, m_size / alignment};
|
||||
}
|
||||
|
||||
void StreamBuffer::Unmap(u32 used_size)
|
||||
|
|
|
@ -22,6 +22,7 @@ public:
|
|||
struct MappingResult
|
||||
{
|
||||
void* pointer;
|
||||
u32 buffer_offset;
|
||||
u32 index_aligned; // offset / alignment, suitable for base vertex
|
||||
u32 space_aligned; // remaining space / alignment
|
||||
};
|
||||
|
|
|
@ -51,6 +51,8 @@ void GPU::SoftReset()
|
|||
m_render_state.texture_page_changed = true;
|
||||
UpdateGPUSTAT();
|
||||
UpdateCRTCConfig();
|
||||
UpdateDrawingArea();
|
||||
UpdateDrawingOffset();
|
||||
}
|
||||
|
||||
bool GPU::DoState(StateWrapper& sw)
|
||||
|
@ -115,6 +117,7 @@ bool GPU::DoState(StateWrapper& sw)
|
|||
m_render_state.texture_page_changed = true;
|
||||
m_render_state.texture_window_changed = true;
|
||||
UpdateDrawingArea();
|
||||
UpdateDrawingOffset();
|
||||
UpdateGPUSTAT();
|
||||
}
|
||||
|
||||
|
@ -665,6 +668,8 @@ void GPU::UpdateDisplay() {}
|
|||
|
||||
void GPU::UpdateDrawingArea() {}
|
||||
|
||||
void GPU::UpdateDrawingOffset() {}
|
||||
|
||||
void GPU::ReadVRAM(u32 x, u32 y, u32 width, u32 height, void* buffer) {}
|
||||
|
||||
void GPU::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) {}
|
||||
|
|
|
@ -283,6 +283,7 @@ protected:
|
|||
// Rendering in the backend
|
||||
virtual void UpdateDisplay();
|
||||
virtual void UpdateDrawingArea();
|
||||
virtual void UpdateDrawingOffset();
|
||||
virtual void ReadVRAM(u32 x, u32 y, u32 width, u32 height, void* buffer);
|
||||
virtual void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color);
|
||||
virtual void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data);
|
||||
|
|
|
@ -151,6 +151,7 @@ bool GPU::HandleSetDrawingOffsetCommand(const u32*& command_ptr, u32 command_siz
|
|||
|
||||
m_drawing_offset.x = x;
|
||||
m_drawing_offset.y = y;
|
||||
UpdateDrawingOffset();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -15,6 +15,8 @@ void GPU_HW::Reset()
|
|||
GPU::Reset();
|
||||
|
||||
m_batch = {};
|
||||
m_batch_ubo_data = {};
|
||||
m_batch_ubo_dirty = true;
|
||||
}
|
||||
|
||||
bool GPU_HW::Initialize(System* system, DMA* dma, InterruptController* interrupt_controller, Timers* timers)
|
||||
|
@ -38,6 +40,15 @@ void GPU_HW::UpdateSettings()
|
|||
m_true_color = m_system->GetSettings().gpu_true_color;
|
||||
}
|
||||
|
||||
void GPU_HW::UpdateDrawingOffset()
|
||||
{
|
||||
GPU::UpdateDrawingOffset();
|
||||
|
||||
m_batch_ubo_data.u_pos_offset[0] = m_drawing_offset.x;
|
||||
m_batch_ubo_data.u_pos_offset[1] = m_drawing_offset.y;
|
||||
m_batch_ubo_dirty = true;
|
||||
}
|
||||
|
||||
void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command_ptr)
|
||||
{
|
||||
const u32 texpage =
|
||||
|
@ -216,11 +227,25 @@ vec4 RGBA5551ToRGBA8(uint v)
|
|||
)";
|
||||
}
|
||||
|
||||
void GPU_HW::GenerateBatchUniformBuffer(std::stringstream& ss)
|
||||
{
|
||||
ss << R"(
|
||||
uniform UBOBlock {
|
||||
ivec2 u_pos_offset;
|
||||
uvec2 u_texture_window_mask;
|
||||
uvec2 u_texture_window_offset;
|
||||
float u_src_alpha_factor;
|
||||
float u_dst_alpha_factor;
|
||||
};
|
||||
)";
|
||||
}
|
||||
|
||||
std::string GPU_HW::GenerateVertexShader(bool textured)
|
||||
{
|
||||
std::stringstream ss;
|
||||
GenerateShaderHeader(ss);
|
||||
DefineMacro(ss, "TEXTURED", textured);
|
||||
GenerateBatchUniformBuffer(ss);
|
||||
|
||||
ss << R"(
|
||||
in ivec2 a_pos;
|
||||
|
@ -234,8 +259,6 @@ out vec3 v_col0;
|
|||
flat out ivec4 v_texpage;
|
||||
#endif
|
||||
|
||||
uniform ivec2 u_pos_offset;
|
||||
|
||||
void main()
|
||||
{
|
||||
// 0..+1023 -> -1..1
|
||||
|
@ -268,6 +291,7 @@ std::string GPU_HW::GenerateFragmentShader(HWBatchRenderMode transparency, Textu
|
|||
|
||||
std::stringstream ss;
|
||||
GenerateShaderHeader(ss);
|
||||
GenerateBatchUniformBuffer(ss);
|
||||
DefineMacro(ss, "TRANSPARENCY", transparency != HWBatchRenderMode::TransparencyDisabled);
|
||||
DefineMacro(ss, "TRANSPARENCY_ONLY_OPAQUE", transparency == HWBatchRenderMode::OnlyOpaque);
|
||||
DefineMacro(ss, "TRANSPARENCY_ONLY_TRANSPARENCY", transparency == HWBatchRenderMode::OnlyTransparent);
|
||||
|
@ -292,12 +316,10 @@ std::string GPU_HW::GenerateFragmentShader(HWBatchRenderMode transparency, Textu
|
|||
|
||||
ss << R"(
|
||||
in vec3 v_col0;
|
||||
uniform vec2 u_transparent_alpha;
|
||||
#if TEXTURED
|
||||
in vec2 v_tex0;
|
||||
flat in ivec4 v_texpage;
|
||||
uniform sampler2D samp0;
|
||||
uniform uvec4 u_texture_window;
|
||||
#endif
|
||||
|
||||
out vec4 o_col0;
|
||||
|
@ -318,8 +340,8 @@ ivec3 TruncateTo15Bit(ivec3 icol)
|
|||
#if TEXTURED
|
||||
ivec2 ApplyNativeTextureWindow(ivec2 coords)
|
||||
{
|
||||
uint x = (uint(coords.x) & ~(u_texture_window.x * 8u)) | ((u_texture_window.z & u_texture_window.x) * 8u);
|
||||
uint y = (uint(coords.y) & ~(u_texture_window.y * 8u)) | ((u_texture_window.w & u_texture_window.y) * 8u);
|
||||
uint x = (uint(coords.x) & ~(u_texture_window_mask.x * 8u)) | ((u_texture_window_offset.x & u_texture_window_mask.x) * 8u);
|
||||
uint y = (uint(coords.y) & ~(u_texture_window_mask.y * 8u)) | ((u_texture_window_offset.y & u_texture_window_mask.y) * 8u);
|
||||
return ivec2(int(x), int(y));
|
||||
}
|
||||
|
||||
|
@ -419,7 +441,7 @@ void main()
|
|||
#if TRANSPARENCY_ONLY_OPAQUE
|
||||
discard;
|
||||
#endif
|
||||
o_col0 = vec4(color * u_transparent_alpha.x, u_transparent_alpha.y);
|
||||
o_col0 = vec4(color * u_src_alpha_factor, u_dst_alpha_factor);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -679,6 +701,15 @@ void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32
|
|||
}
|
||||
}
|
||||
|
||||
// transparency mode change
|
||||
if (m_batch.transparency_mode != transparency_mode && transparency_mode != TransparencyMode::Disabled)
|
||||
{
|
||||
static constexpr float transparent_alpha[4][2] = {{0.5f, 0.5f}, {1.0f, 1.0f}, {1.0f, 1.0f}, {0.25f, 1.0f}};
|
||||
m_batch_ubo_data.u_src_alpha_factor = transparent_alpha[static_cast<u32>(transparency_mode)][0];
|
||||
m_batch_ubo_data.u_dst_alpha_factor = transparent_alpha[static_cast<u32>(transparency_mode)][1];
|
||||
m_batch_ubo_dirty = true;
|
||||
}
|
||||
|
||||
// map buffer if it's not already done
|
||||
if (!m_batch_current_vertex_ptr)
|
||||
MapBatchVertexPointer(max_added_vertices);
|
||||
|
@ -691,11 +722,13 @@ void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32
|
|||
|
||||
if (m_render_state.IsTextureWindowChanged())
|
||||
{
|
||||
m_batch.texture_window_values[0] = m_render_state.texture_window_mask_x;
|
||||
m_batch.texture_window_values[1] = m_render_state.texture_window_mask_y;
|
||||
m_batch.texture_window_values[2] = m_render_state.texture_window_offset_x;
|
||||
m_batch.texture_window_values[3] = m_render_state.texture_window_offset_y;
|
||||
m_render_state.ClearTextureWindowChangedFlag();
|
||||
|
||||
m_batch_ubo_data.u_texture_window_mask[0] = ZeroExtend32(m_render_state.texture_window_mask_x);
|
||||
m_batch_ubo_data.u_texture_window_mask[1] = ZeroExtend32(m_render_state.texture_window_mask_y);
|
||||
m_batch_ubo_data.u_texture_window_offset[0] = ZeroExtend32(m_render_state.texture_window_offset_x);
|
||||
m_batch_ubo_data.u_texture_window_offset[1] = ZeroExtend32(m_render_state.texture_window_offset_y);
|
||||
m_batch_ubo_dirty = true;
|
||||
}
|
||||
|
||||
LoadVertices(rc, num_vertices, command_ptr);
|
||||
|
|
|
@ -60,7 +60,6 @@ protected:
|
|||
HWPrimitive primitive;
|
||||
TextureMode texture_mode;
|
||||
TransparencyMode transparency_mode;
|
||||
std::array<u8, 4> texture_window_values;
|
||||
bool dithering;
|
||||
|
||||
// We need two-pass rendering when using BG-FG blending and texturing, as the transparency can be enabled
|
||||
|
@ -79,14 +78,20 @@ protected:
|
|||
}
|
||||
};
|
||||
|
||||
struct HWBatchUBOData
|
||||
{
|
||||
s32 u_pos_offset[2];
|
||||
u32 u_texture_window_mask[2];
|
||||
u32 u_texture_window_offset[2];
|
||||
float u_src_alpha_factor;
|
||||
float u_dst_alpha_factor;
|
||||
};
|
||||
|
||||
static constexpr u32 VRAM_UPDATE_TEXTURE_BUFFER_SIZE = VRAM_WIDTH * VRAM_HEIGHT * sizeof(u32);
|
||||
static constexpr u32 VERTEX_BUFFER_SIZE = 1 * 1024 * 1024;
|
||||
static constexpr u32 MIN_BATCH_VERTEX_COUNT = 6;
|
||||
static constexpr u32 MAX_BATCH_VERTEX_COUNT = VERTEX_BUFFER_SIZE / sizeof(HWVertex);
|
||||
static constexpr u32 TEXTURE_TILE_SIZE = 256;
|
||||
static constexpr u32 TEXTURE_TILE_X_COUNT = VRAM_WIDTH / TEXTURE_TILE_SIZE;
|
||||
static constexpr u32 TEXTURE_TILE_Y_COUNT = VRAM_HEIGHT / TEXTURE_TILE_SIZE;
|
||||
static constexpr u32 TEXTURE_TILE_COUNT = TEXTURE_TILE_X_COUNT * TEXTURE_TILE_Y_COUNT;
|
||||
static constexpr u32 UNIFORM_BUFFER_SIZE = 512 * 1024;
|
||||
|
||||
static constexpr std::tuple<float, float, float, float> RGBA8ToFloat(u32 rgba)
|
||||
{
|
||||
|
@ -96,6 +101,8 @@ protected:
|
|||
static_cast<float>(rgba >> 24) * (1.0f / 255.0f));
|
||||
}
|
||||
|
||||
virtual void UpdateDrawingOffset() override;
|
||||
|
||||
virtual void InvalidateVRAMReadCache() = 0;
|
||||
|
||||
virtual void MapBatchVertexPointer(u32 required_vertices) = 0;
|
||||
|
@ -121,8 +128,6 @@ protected:
|
|||
std::string GenerateDisplayFragmentShader(bool depth_24bit, bool interlaced);
|
||||
std::string GenerateVRAMWriteFragmentShader();
|
||||
|
||||
HWBatchConfig m_batch = {};
|
||||
|
||||
HWVertex* m_batch_start_vertex_ptr = nullptr;
|
||||
HWVertex* m_batch_end_vertex_ptr = nullptr;
|
||||
HWVertex* m_batch_current_vertex_ptr = nullptr;
|
||||
|
@ -132,10 +137,15 @@ protected:
|
|||
u32 m_max_resolution_scale = 1;
|
||||
bool m_true_color = false;
|
||||
|
||||
HWBatchConfig m_batch = {};
|
||||
HWBatchUBOData m_batch_ubo_data = {};
|
||||
bool m_batch_ubo_dirty = true;
|
||||
|
||||
private:
|
||||
static HWPrimitive GetPrimitiveForCommand(RenderCommand rc);
|
||||
|
||||
void GenerateShaderHeader(std::stringstream& ss);
|
||||
void GenerateBatchUniformBuffer(std::stringstream& ss);
|
||||
|
||||
void LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command_ptr);
|
||||
void AddDuplicateVertex();
|
||||
|
|
|
@ -16,13 +16,14 @@ GPU_HW_OpenGL::~GPU_HW_OpenGL()
|
|||
|
||||
bool GPU_HW_OpenGL::Initialize(System* system, DMA* dma, InterruptController* interrupt_controller, Timers* timers)
|
||||
{
|
||||
SetMaxResolutionScale();
|
||||
SetCapabilities();
|
||||
|
||||
if (!GPU_HW::Initialize(system, dma, interrupt_controller, timers))
|
||||
return false;
|
||||
|
||||
CreateFramebuffer();
|
||||
CreateVertexBuffer();
|
||||
CreateUniformBuffer();
|
||||
CreateTextureBuffer();
|
||||
if (!CompilePrograms())
|
||||
return false;
|
||||
|
@ -150,7 +151,7 @@ std::tuple<s32, s32> GPU_HW_OpenGL::ConvertToFramebufferCoordinates(s32 x, s32 y
|
|||
return std::make_tuple(x, static_cast<s32>(static_cast<s32>(VRAM_HEIGHT) - y));
|
||||
}
|
||||
|
||||
void GPU_HW_OpenGL::SetMaxResolutionScale()
|
||||
void GPU_HW_OpenGL::SetCapabilities()
|
||||
{
|
||||
GLint max_texture_size = VRAM_WIDTH;
|
||||
glGetIntegerv(GL_MAX_TEXTURE_SIZE, &max_texture_size);
|
||||
|
@ -163,6 +164,9 @@ void GPU_HW_OpenGL::SetMaxResolutionScale()
|
|||
|
||||
m_max_resolution_scale = std::min(max_texture_scale, line_width_range[1]);
|
||||
Log_InfoPrintf("Maximum resolution scale is %u", m_max_resolution_scale);
|
||||
|
||||
glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, reinterpret_cast<GLint*>(&m_uniform_buffer_alignment));
|
||||
Log_InfoPrintf("Uniform buffer offset alignment: %u", m_uniform_buffer_alignment);
|
||||
}
|
||||
|
||||
void GPU_HW_OpenGL::CreateFramebuffer()
|
||||
|
@ -252,6 +256,13 @@ void GPU_HW_OpenGL::CreateVertexBuffer()
|
|||
glGenVertexArrays(1, &m_attributeless_vao_id);
|
||||
}
|
||||
|
||||
void GPU_HW_OpenGL::CreateUniformBuffer()
|
||||
{
|
||||
m_uniform_stream_buffer = GL::StreamBuffer::Create(GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE);
|
||||
if (!m_uniform_stream_buffer)
|
||||
Panic("Failed to create uniform buffer");
|
||||
}
|
||||
|
||||
void GPU_HW_OpenGL::CreateTextureBuffer()
|
||||
{
|
||||
// const GLenum target = GL_PIXEL_UNPACK_BUFFER;
|
||||
|
@ -346,17 +357,13 @@ bool GPU_HW_OpenGL::CompileProgram(GL::Program& prog, HWBatchRenderMode render_m
|
|||
if (!prog.Link())
|
||||
return false;
|
||||
|
||||
prog.Bind();
|
||||
prog.RegisterUniform("u_pos_offset");
|
||||
prog.RegisterUniform("u_transparent_alpha");
|
||||
prog.Uniform2i(0, 0, 0);
|
||||
prog.Uniform2f(1, 1.0f, 0.0f);
|
||||
prog.BindUniformBlock("UBOBlock", 1);
|
||||
|
||||
if (textured)
|
||||
{
|
||||
prog.RegisterUniform("u_texture_window");
|
||||
prog.Bind();
|
||||
prog.RegisterUniform("samp0");
|
||||
prog.Uniform1i(3, 0);
|
||||
prog.Uniform1i(0, 0);
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@ -368,24 +375,8 @@ void GPU_HW_OpenGL::SetDrawState(HWBatchRenderMode render_mode)
|
|||
[BoolToUInt8(m_batch.dithering)];
|
||||
prog.Bind();
|
||||
|
||||
prog.Uniform2i(0, m_drawing_offset.x, m_drawing_offset.y);
|
||||
if (m_batch.transparency_mode != TransparencyMode::Disabled)
|
||||
{
|
||||
static constexpr float transparent_alpha[4][2] = {{0.5f, 0.5f}, {1.0f, 1.0f}, {1.0f, 1.0f}, {0.25f, 1.0f}};
|
||||
prog.Uniform2fv(1, transparent_alpha[static_cast<u32>(m_batch.transparency_mode)]);
|
||||
}
|
||||
else
|
||||
{
|
||||
static constexpr float disabled_alpha[2] = {1.0f, 0.0f};
|
||||
prog.Uniform2fv(1, disabled_alpha);
|
||||
}
|
||||
|
||||
if (m_batch.texture_mode != TextureMode::Disabled)
|
||||
{
|
||||
prog.Uniform4ui(2, m_batch.texture_window_values[0], m_batch.texture_window_values[1],
|
||||
m_batch.texture_window_values[2], m_batch.texture_window_values[3]);
|
||||
m_vram_read_texture->Bind();
|
||||
}
|
||||
|
||||
if (m_batch.transparency_mode == TransparencyMode::Disabled || render_mode == HWBatchRenderMode::OnlyOpaque)
|
||||
{
|
||||
|
@ -415,6 +406,23 @@ void GPU_HW_OpenGL::SetDrawState(HWBatchRenderMode render_mode)
|
|||
Log_DebugPrintf("SetScissor: (%d-%d, %d-%d)", x, x + width, y, y + height);
|
||||
glScissor(x, y, width, height);
|
||||
}
|
||||
|
||||
if (m_batch_ubo_dirty)
|
||||
{
|
||||
UploadUniformBlock(&m_batch_ubo_data, sizeof(m_batch_ubo_data));
|
||||
m_batch_ubo_dirty = false;
|
||||
}
|
||||
}
|
||||
|
||||
void GPU_HW_OpenGL::UploadUniformBlock(const void* data, u32 data_size)
|
||||
{
|
||||
const GL::StreamBuffer::MappingResult res = m_uniform_stream_buffer->Map(m_uniform_buffer_alignment, data_size);
|
||||
std::memcpy(res.pointer, data, data_size);
|
||||
m_uniform_stream_buffer->Unmap(data_size);
|
||||
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, 1, m_uniform_stream_buffer->GetGLBufferId(), res.buffer_offset, data_size);
|
||||
|
||||
m_stats.num_uniform_buffer_updates++;
|
||||
}
|
||||
|
||||
void GPU_HW_OpenGL::UpdateDrawingArea()
|
||||
|
|
|
@ -42,22 +42,25 @@ private:
|
|||
u32 num_vram_reads;
|
||||
u32 num_vram_writes;
|
||||
u32 num_vram_read_texture_updates;
|
||||
u32 num_uniform_buffer_updates;
|
||||
};
|
||||
|
||||
std::tuple<s32, s32> ConvertToFramebufferCoordinates(s32 x, s32 y);
|
||||
|
||||
void SetMaxResolutionScale();
|
||||
void SetCapabilities();
|
||||
void CreateFramebuffer();
|
||||
void ClearFramebuffer();
|
||||
void DestroyFramebuffer();
|
||||
void UpdateVRAMReadTexture();
|
||||
|
||||
void CreateVertexBuffer();
|
||||
void CreateUniformBuffer();
|
||||
void CreateTextureBuffer();
|
||||
|
||||
bool CompilePrograms();
|
||||
bool CompileProgram(GL::Program& prog, HWBatchRenderMode render_mode, TextureMode texture_mode, bool dithering);
|
||||
void SetDrawState(HWBatchRenderMode render_mode);
|
||||
void UploadUniformBlock(const void* data, u32 data_size);
|
||||
|
||||
// downsample texture - used for readbacks at >1xIR.
|
||||
std::unique_ptr<GL::Texture> m_vram_texture;
|
||||
|
@ -69,12 +72,12 @@ private:
|
|||
GLuint m_vao_id = 0;
|
||||
GLuint m_attributeless_vao_id = 0;
|
||||
|
||||
std::unique_ptr<GL::StreamBuffer> m_uniform_stream_buffer;
|
||||
|
||||
std::unique_ptr<GL::StreamBuffer> m_texture_stream_buffer;
|
||||
GLuint m_texture_buffer_r16ui_texture = 0;
|
||||
|
||||
bool m_vram_read_texture_dirty = true;
|
||||
bool m_drawing_area_changed = true;
|
||||
bool m_show_renderer_statistics = false;
|
||||
u32 m_uniform_buffer_alignment = 1;
|
||||
|
||||
std::array<std::array<std::array<GL::Program, 2>, 9>, 4> m_render_programs; // [render_mode][texture_mode][dithering]
|
||||
std::array<std::array<GL::Program, 2>, 2> m_display_programs; // [depth_24][interlaced]
|
||||
|
@ -82,4 +85,8 @@ private:
|
|||
|
||||
GLStats m_stats = {};
|
||||
GLStats m_last_stats = {};
|
||||
|
||||
bool m_vram_read_texture_dirty = true;
|
||||
bool m_drawing_area_changed = true;
|
||||
bool m_show_renderer_statistics = false;
|
||||
};
|
||||
|
|
Loading…
Reference in a new issue