GPU/HW: Use uniform blocks for batch rendering

This commit is contained in:
Connor McLaughlin 2019-11-03 13:15:17 +10:00
parent e3a9aa1c4f
commit 91c99f0226
9 changed files with 114 additions and 48 deletions

View file

@ -24,7 +24,7 @@ void StreamBuffer::Unbind()
StreamBuffer::MappingResult StreamBuffer::Map(u32 alignment, u32 min_size)
{
return MappingResult{static_cast<void*>(m_cpu_buffer.data()), 0, m_size / alignment};
return MappingResult{static_cast<void*>(m_cpu_buffer.data()), 0, 0, m_size / alignment};
}
void StreamBuffer::Unmap(u32 used_size)

View file

@ -22,6 +22,7 @@ public:
struct MappingResult
{
void* pointer;
u32 buffer_offset;
u32 index_aligned; // offset / alignment, suitable for base vertex
u32 space_aligned; // remaining space / alignment
};

View file

@ -51,6 +51,8 @@ void GPU::SoftReset()
m_render_state.texture_page_changed = true;
UpdateGPUSTAT();
UpdateCRTCConfig();
UpdateDrawingArea();
UpdateDrawingOffset();
}
bool GPU::DoState(StateWrapper& sw)
@ -115,6 +117,7 @@ bool GPU::DoState(StateWrapper& sw)
m_render_state.texture_page_changed = true;
m_render_state.texture_window_changed = true;
UpdateDrawingArea();
UpdateDrawingOffset();
UpdateGPUSTAT();
}
@ -665,6 +668,8 @@ void GPU::UpdateDisplay() {}
void GPU::UpdateDrawingArea() {}
void GPU::UpdateDrawingOffset() {}
void GPU::ReadVRAM(u32 x, u32 y, u32 width, u32 height, void* buffer) {}
void GPU::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) {}

View file

@ -283,6 +283,7 @@ protected:
// Rendering in the backend
virtual void UpdateDisplay();
virtual void UpdateDrawingArea();
virtual void UpdateDrawingOffset();
virtual void ReadVRAM(u32 x, u32 y, u32 width, u32 height, void* buffer);
virtual void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color);
virtual void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data);

View file

@ -151,6 +151,7 @@ bool GPU::HandleSetDrawingOffsetCommand(const u32*& command_ptr, u32 command_siz
m_drawing_offset.x = x;
m_drawing_offset.y = y;
UpdateDrawingOffset();
}
return true;
}

View file

@ -15,6 +15,8 @@ void GPU_HW::Reset()
GPU::Reset();
m_batch = {};
m_batch_ubo_data = {};
m_batch_ubo_dirty = true;
}
bool GPU_HW::Initialize(System* system, DMA* dma, InterruptController* interrupt_controller, Timers* timers)
@ -38,6 +40,15 @@ void GPU_HW::UpdateSettings()
m_true_color = m_system->GetSettings().gpu_true_color;
}
void GPU_HW::UpdateDrawingOffset()
{
GPU::UpdateDrawingOffset();
m_batch_ubo_data.u_pos_offset[0] = m_drawing_offset.x;
m_batch_ubo_data.u_pos_offset[1] = m_drawing_offset.y;
m_batch_ubo_dirty = true;
}
void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command_ptr)
{
const u32 texpage =
@ -216,11 +227,25 @@ vec4 RGBA5551ToRGBA8(uint v)
)";
}
void GPU_HW::GenerateBatchUniformBuffer(std::stringstream& ss)
{
ss << R"(
uniform UBOBlock {
ivec2 u_pos_offset;
uvec2 u_texture_window_mask;
uvec2 u_texture_window_offset;
float u_src_alpha_factor;
float u_dst_alpha_factor;
};
)";
}
std::string GPU_HW::GenerateVertexShader(bool textured)
{
std::stringstream ss;
GenerateShaderHeader(ss);
DefineMacro(ss, "TEXTURED", textured);
GenerateBatchUniformBuffer(ss);
ss << R"(
in ivec2 a_pos;
@ -234,8 +259,6 @@ out vec3 v_col0;
flat out ivec4 v_texpage;
#endif
uniform ivec2 u_pos_offset;
void main()
{
// 0..+1023 -> -1..1
@ -268,6 +291,7 @@ std::string GPU_HW::GenerateFragmentShader(HWBatchRenderMode transparency, Textu
std::stringstream ss;
GenerateShaderHeader(ss);
GenerateBatchUniformBuffer(ss);
DefineMacro(ss, "TRANSPARENCY", transparency != HWBatchRenderMode::TransparencyDisabled);
DefineMacro(ss, "TRANSPARENCY_ONLY_OPAQUE", transparency == HWBatchRenderMode::OnlyOpaque);
DefineMacro(ss, "TRANSPARENCY_ONLY_TRANSPARENCY", transparency == HWBatchRenderMode::OnlyTransparent);
@ -292,12 +316,10 @@ std::string GPU_HW::GenerateFragmentShader(HWBatchRenderMode transparency, Textu
ss << R"(
in vec3 v_col0;
uniform vec2 u_transparent_alpha;
#if TEXTURED
in vec2 v_tex0;
flat in ivec4 v_texpage;
uniform sampler2D samp0;
uniform uvec4 u_texture_window;
#endif
out vec4 o_col0;
@ -318,8 +340,8 @@ ivec3 TruncateTo15Bit(ivec3 icol)
#if TEXTURED
ivec2 ApplyNativeTextureWindow(ivec2 coords)
{
uint x = (uint(coords.x) & ~(u_texture_window.x * 8u)) | ((u_texture_window.z & u_texture_window.x) * 8u);
uint y = (uint(coords.y) & ~(u_texture_window.y * 8u)) | ((u_texture_window.w & u_texture_window.y) * 8u);
uint x = (uint(coords.x) & ~(u_texture_window_mask.x * 8u)) | ((u_texture_window_offset.x & u_texture_window_mask.x) * 8u);
uint y = (uint(coords.y) & ~(u_texture_window_mask.y * 8u)) | ((u_texture_window_offset.y & u_texture_window_mask.y) * 8u);
return ivec2(int(x), int(y));
}
@ -419,7 +441,7 @@ void main()
#if TRANSPARENCY_ONLY_OPAQUE
discard;
#endif
o_col0 = vec4(color * u_transparent_alpha.x, u_transparent_alpha.y);
o_col0 = vec4(color * u_src_alpha_factor, u_dst_alpha_factor);
}
else
{
@ -679,6 +701,15 @@ void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32
}
}
// transparency mode change
if (m_batch.transparency_mode != transparency_mode && transparency_mode != TransparencyMode::Disabled)
{
static constexpr float transparent_alpha[4][2] = {{0.5f, 0.5f}, {1.0f, 1.0f}, {1.0f, 1.0f}, {0.25f, 1.0f}};
m_batch_ubo_data.u_src_alpha_factor = transparent_alpha[static_cast<u32>(transparency_mode)][0];
m_batch_ubo_data.u_dst_alpha_factor = transparent_alpha[static_cast<u32>(transparency_mode)][1];
m_batch_ubo_dirty = true;
}
// map buffer if it's not already done
if (!m_batch_current_vertex_ptr)
MapBatchVertexPointer(max_added_vertices);
@ -691,11 +722,13 @@ void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32
if (m_render_state.IsTextureWindowChanged())
{
m_batch.texture_window_values[0] = m_render_state.texture_window_mask_x;
m_batch.texture_window_values[1] = m_render_state.texture_window_mask_y;
m_batch.texture_window_values[2] = m_render_state.texture_window_offset_x;
m_batch.texture_window_values[3] = m_render_state.texture_window_offset_y;
m_render_state.ClearTextureWindowChangedFlag();
m_batch_ubo_data.u_texture_window_mask[0] = ZeroExtend32(m_render_state.texture_window_mask_x);
m_batch_ubo_data.u_texture_window_mask[1] = ZeroExtend32(m_render_state.texture_window_mask_y);
m_batch_ubo_data.u_texture_window_offset[0] = ZeroExtend32(m_render_state.texture_window_offset_x);
m_batch_ubo_data.u_texture_window_offset[1] = ZeroExtend32(m_render_state.texture_window_offset_y);
m_batch_ubo_dirty = true;
}
LoadVertices(rc, num_vertices, command_ptr);

View file

@ -60,7 +60,6 @@ protected:
HWPrimitive primitive;
TextureMode texture_mode;
TransparencyMode transparency_mode;
std::array<u8, 4> texture_window_values;
bool dithering;
// We need two-pass rendering when using BG-FG blending and texturing, as the transparency can be enabled
@ -79,14 +78,20 @@ protected:
}
};
struct HWBatchUBOData
{
s32 u_pos_offset[2];
u32 u_texture_window_mask[2];
u32 u_texture_window_offset[2];
float u_src_alpha_factor;
float u_dst_alpha_factor;
};
static constexpr u32 VRAM_UPDATE_TEXTURE_BUFFER_SIZE = VRAM_WIDTH * VRAM_HEIGHT * sizeof(u32);
static constexpr u32 VERTEX_BUFFER_SIZE = 1 * 1024 * 1024;
static constexpr u32 MIN_BATCH_VERTEX_COUNT = 6;
static constexpr u32 MAX_BATCH_VERTEX_COUNT = VERTEX_BUFFER_SIZE / sizeof(HWVertex);
static constexpr u32 TEXTURE_TILE_SIZE = 256;
static constexpr u32 TEXTURE_TILE_X_COUNT = VRAM_WIDTH / TEXTURE_TILE_SIZE;
static constexpr u32 TEXTURE_TILE_Y_COUNT = VRAM_HEIGHT / TEXTURE_TILE_SIZE;
static constexpr u32 TEXTURE_TILE_COUNT = TEXTURE_TILE_X_COUNT * TEXTURE_TILE_Y_COUNT;
static constexpr u32 UNIFORM_BUFFER_SIZE = 512 * 1024;
static constexpr std::tuple<float, float, float, float> RGBA8ToFloat(u32 rgba)
{
@ -96,6 +101,8 @@ protected:
static_cast<float>(rgba >> 24) * (1.0f / 255.0f));
}
virtual void UpdateDrawingOffset() override;
virtual void InvalidateVRAMReadCache() = 0;
virtual void MapBatchVertexPointer(u32 required_vertices) = 0;
@ -121,8 +128,6 @@ protected:
std::string GenerateDisplayFragmentShader(bool depth_24bit, bool interlaced);
std::string GenerateVRAMWriteFragmentShader();
HWBatchConfig m_batch = {};
HWVertex* m_batch_start_vertex_ptr = nullptr;
HWVertex* m_batch_end_vertex_ptr = nullptr;
HWVertex* m_batch_current_vertex_ptr = nullptr;
@ -132,10 +137,15 @@ protected:
u32 m_max_resolution_scale = 1;
bool m_true_color = false;
HWBatchConfig m_batch = {};
HWBatchUBOData m_batch_ubo_data = {};
bool m_batch_ubo_dirty = true;
private:
static HWPrimitive GetPrimitiveForCommand(RenderCommand rc);
void GenerateShaderHeader(std::stringstream& ss);
void GenerateBatchUniformBuffer(std::stringstream& ss);
void LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command_ptr);
void AddDuplicateVertex();

View file

@ -16,13 +16,14 @@ GPU_HW_OpenGL::~GPU_HW_OpenGL()
bool GPU_HW_OpenGL::Initialize(System* system, DMA* dma, InterruptController* interrupt_controller, Timers* timers)
{
SetMaxResolutionScale();
SetCapabilities();
if (!GPU_HW::Initialize(system, dma, interrupt_controller, timers))
return false;
CreateFramebuffer();
CreateVertexBuffer();
CreateUniformBuffer();
CreateTextureBuffer();
if (!CompilePrograms())
return false;
@ -150,7 +151,7 @@ std::tuple<s32, s32> GPU_HW_OpenGL::ConvertToFramebufferCoordinates(s32 x, s32 y
return std::make_tuple(x, static_cast<s32>(static_cast<s32>(VRAM_HEIGHT) - y));
}
void GPU_HW_OpenGL::SetMaxResolutionScale()
void GPU_HW_OpenGL::SetCapabilities()
{
GLint max_texture_size = VRAM_WIDTH;
glGetIntegerv(GL_MAX_TEXTURE_SIZE, &max_texture_size);
@ -163,6 +164,9 @@ void GPU_HW_OpenGL::SetMaxResolutionScale()
m_max_resolution_scale = std::min(max_texture_scale, line_width_range[1]);
Log_InfoPrintf("Maximum resolution scale is %u", m_max_resolution_scale);
glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, reinterpret_cast<GLint*>(&m_uniform_buffer_alignment));
Log_InfoPrintf("Uniform buffer offset alignment: %u", m_uniform_buffer_alignment);
}
void GPU_HW_OpenGL::CreateFramebuffer()
@ -252,6 +256,13 @@ void GPU_HW_OpenGL::CreateVertexBuffer()
glGenVertexArrays(1, &m_attributeless_vao_id);
}
void GPU_HW_OpenGL::CreateUniformBuffer()
{
m_uniform_stream_buffer = GL::StreamBuffer::Create(GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE);
if (!m_uniform_stream_buffer)
Panic("Failed to create uniform buffer");
}
void GPU_HW_OpenGL::CreateTextureBuffer()
{
// const GLenum target = GL_PIXEL_UNPACK_BUFFER;
@ -346,17 +357,13 @@ bool GPU_HW_OpenGL::CompileProgram(GL::Program& prog, HWBatchRenderMode render_m
if (!prog.Link())
return false;
prog.Bind();
prog.RegisterUniform("u_pos_offset");
prog.RegisterUniform("u_transparent_alpha");
prog.Uniform2i(0, 0, 0);
prog.Uniform2f(1, 1.0f, 0.0f);
prog.BindUniformBlock("UBOBlock", 1);
if (textured)
{
prog.RegisterUniform("u_texture_window");
prog.Bind();
prog.RegisterUniform("samp0");
prog.Uniform1i(3, 0);
prog.Uniform1i(0, 0);
}
return true;
@ -368,24 +375,8 @@ void GPU_HW_OpenGL::SetDrawState(HWBatchRenderMode render_mode)
[BoolToUInt8(m_batch.dithering)];
prog.Bind();
prog.Uniform2i(0, m_drawing_offset.x, m_drawing_offset.y);
if (m_batch.transparency_mode != TransparencyMode::Disabled)
{
static constexpr float transparent_alpha[4][2] = {{0.5f, 0.5f}, {1.0f, 1.0f}, {1.0f, 1.0f}, {0.25f, 1.0f}};
prog.Uniform2fv(1, transparent_alpha[static_cast<u32>(m_batch.transparency_mode)]);
}
else
{
static constexpr float disabled_alpha[2] = {1.0f, 0.0f};
prog.Uniform2fv(1, disabled_alpha);
}
if (m_batch.texture_mode != TextureMode::Disabled)
{
prog.Uniform4ui(2, m_batch.texture_window_values[0], m_batch.texture_window_values[1],
m_batch.texture_window_values[2], m_batch.texture_window_values[3]);
m_vram_read_texture->Bind();
}
if (m_batch.transparency_mode == TransparencyMode::Disabled || render_mode == HWBatchRenderMode::OnlyOpaque)
{
@ -415,6 +406,23 @@ void GPU_HW_OpenGL::SetDrawState(HWBatchRenderMode render_mode)
Log_DebugPrintf("SetScissor: (%d-%d, %d-%d)", x, x + width, y, y + height);
glScissor(x, y, width, height);
}
if (m_batch_ubo_dirty)
{
UploadUniformBlock(&m_batch_ubo_data, sizeof(m_batch_ubo_data));
m_batch_ubo_dirty = false;
}
}
void GPU_HW_OpenGL::UploadUniformBlock(const void* data, u32 data_size)
{
const GL::StreamBuffer::MappingResult res = m_uniform_stream_buffer->Map(m_uniform_buffer_alignment, data_size);
std::memcpy(res.pointer, data, data_size);
m_uniform_stream_buffer->Unmap(data_size);
glBindBufferRange(GL_UNIFORM_BUFFER, 1, m_uniform_stream_buffer->GetGLBufferId(), res.buffer_offset, data_size);
m_stats.num_uniform_buffer_updates++;
}
void GPU_HW_OpenGL::UpdateDrawingArea()

View file

@ -42,22 +42,25 @@ private:
u32 num_vram_reads;
u32 num_vram_writes;
u32 num_vram_read_texture_updates;
u32 num_uniform_buffer_updates;
};
std::tuple<s32, s32> ConvertToFramebufferCoordinates(s32 x, s32 y);
void SetMaxResolutionScale();
void SetCapabilities();
void CreateFramebuffer();
void ClearFramebuffer();
void DestroyFramebuffer();
void UpdateVRAMReadTexture();
void CreateVertexBuffer();
void CreateUniformBuffer();
void CreateTextureBuffer();
bool CompilePrograms();
bool CompileProgram(GL::Program& prog, HWBatchRenderMode render_mode, TextureMode texture_mode, bool dithering);
void SetDrawState(HWBatchRenderMode render_mode);
void UploadUniformBlock(const void* data, u32 data_size);
// downsample texture - used for readbacks at >1xIR.
std::unique_ptr<GL::Texture> m_vram_texture;
@ -69,12 +72,12 @@ private:
GLuint m_vao_id = 0;
GLuint m_attributeless_vao_id = 0;
std::unique_ptr<GL::StreamBuffer> m_uniform_stream_buffer;
std::unique_ptr<GL::StreamBuffer> m_texture_stream_buffer;
GLuint m_texture_buffer_r16ui_texture = 0;
bool m_vram_read_texture_dirty = true;
bool m_drawing_area_changed = true;
bool m_show_renderer_statistics = false;
u32 m_uniform_buffer_alignment = 1;
std::array<std::array<std::array<GL::Program, 2>, 9>, 4> m_render_programs; // [render_mode][texture_mode][dithering]
std::array<std::array<GL::Program, 2>, 2> m_display_programs; // [depth_24][interlaced]
@ -82,4 +85,8 @@ private:
GLStats m_stats = {};
GLStats m_last_stats = {};
bool m_vram_read_texture_dirty = true;
bool m_drawing_area_changed = true;
bool m_show_renderer_statistics = false;
};