GPU: Add base functionality for GL streaming buffers

This commit is contained in:
Connor McLaughlin 2019-11-02 22:21:56 +10:00
parent c52c0608ae
commit 407fee9ec3
8 changed files with 189 additions and 59 deletions

View file

@ -40,6 +40,7 @@
<ClInclude Include="cd_image.h" />
<ClInclude Include="fifo_queue.h" />
<ClInclude Include="gl_program.h" />
<ClInclude Include="gl_stream_buffer.h" />
<ClInclude Include="gl_texture.h" />
<ClInclude Include="heap_array.h" />
<ClInclude Include="jit_code_buffer.h" />
@ -53,6 +54,7 @@
<ClCompile Include="cd_image_bin.cpp" />
<ClCompile Include="cd_image_cue.cpp" />
<ClCompile Include="gl_program.cpp" />
<ClCompile Include="gl_stream_buffer.cpp" />
<ClCompile Include="gl_texture.cpp" />
<ClCompile Include="jit_code_buffer.cpp" />
<ClCompile Include="state_wrapper.cpp" />

View file

@ -12,6 +12,7 @@
<ClInclude Include="audio_stream.h" />
<ClInclude Include="cd_xa.h" />
<ClInclude Include="heap_array.h" />
<ClInclude Include="gl_stream_buffer.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="jit_code_buffer.cpp" />
@ -23,6 +24,7 @@
<ClCompile Include="cd_xa.cpp" />
<ClCompile Include="cd_image_cue.cpp" />
<ClCompile Include="cd_image_bin.cpp" />
<ClCompile Include="gl_stream_buffer.cpp" />
</ItemGroup>
<ItemGroup>
<Natvis Include="bitfield.natvis" />

View file

@ -0,0 +1,53 @@
#include "gl_stream_buffer.h"
namespace GL {
StreamBuffer::StreamBuffer(GLenum target, GLuint buffer_id, u32 size)
: m_target(target), m_buffer_id(buffer_id), m_size(size), m_cpu_buffer(size)
{
}
StreamBuffer::~StreamBuffer()
{
glDeleteBuffers(1, &m_buffer_id);
}
void StreamBuffer::Bind()
{
glBindBuffer(m_target, m_buffer_id);
}
StreamBuffer::MappingResult StreamBuffer::Map(u32 alignment, u32 min_size)
{
return MappingResult{static_cast<void*>(m_cpu_buffer.data()), 0, m_size / alignment};
}
void StreamBuffer::Unmap(u32 used_size)
{
if (used_size == 0)
return;
glBindBuffer(m_target, m_buffer_id);
glBufferSubData(m_target, 0, used_size, m_cpu_buffer.data());
}
std::unique_ptr<StreamBuffer> StreamBuffer::Create(GLenum target, u32 size)
{
glGetError();
GLuint buffer_id;
glGenBuffers(1, &buffer_id);
glBindBuffer(target, buffer_id);
glBufferData(target, size, nullptr, GL_STREAM_DRAW);
GLenum err = glGetError();
if (err != GL_NO_ERROR)
{
glDeleteBuffers(1, &buffer_id);
return {};
}
return std::unique_ptr<StreamBuffer>(new StreamBuffer(target, buffer_id, size));
}
} // namespace GL

View file

@ -0,0 +1,42 @@
#pragma once
#include "types.h"
#include <glad.h>
#include <memory>
#include <tuple>
#include <vector>
namespace GL {
// TODO: Persistent mapping-based implementation
class StreamBuffer
{
public:
~StreamBuffer();
ALWAYS_INLINE GLuint GetGLBufferId() const { return m_buffer_id; }
ALWAYS_INLINE GLenum GetGLTarget() const { return m_target; }
ALWAYS_INLINE u32 GetSize() const { return m_size; }
void Bind();
struct MappingResult
{
void* pointer;
u32 index_aligned; // offset / alignment, suitable for base vertex
u32 space_aligned; // remaining space / alignment
};
MappingResult Map(u32 alignment, u32 min_size);
void Unmap(u32 used_size);
static std::unique_ptr<StreamBuffer> Create(GLenum target, u32 size);
private:
StreamBuffer(GLenum target, GLuint buffer_id, u32 size);
GLenum m_target;
GLuint m_buffer_id;
u32 m_size;
std::vector<u8> m_cpu_buffer;
};
} // namespace GL

View file

@ -49,9 +49,9 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command
case Primitive::Polygon:
{
// if we're drawing quads, we need to create a degenerate triangle to restart the triangle strip
bool restart_strip = (rc.quad_polygon && !m_batch.vertices.empty());
bool restart_strip = (rc.quad_polygon && !IsFlushed());
if (restart_strip)
m_batch.vertices.push_back(m_batch.vertices.back());
AddDuplicateVertex();
const u32 first_color = rc.color_for_first_vertex;
const bool shaded = rc.shading_enable;
@ -60,28 +60,15 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command
u32 buffer_pos = 1;
for (u32 i = 0; i < num_vertices; i++)
{
HWVertex hw_vert;
hw_vert.color = (shaded && i > 0) ? (command_ptr[buffer_pos++] & UINT32_C(0x00FFFFFF)) : first_color;
const u32 color = (shaded && i > 0) ? (command_ptr[buffer_pos++] & UINT32_C(0x00FFFFFF)) : first_color;
const VertexPosition vp{command_ptr[buffer_pos++]};
hw_vert.x = vp.x;
hw_vert.y = vp.y;
hw_vert.texpage = texpage;
const u16 packed_texcoord = textured ? Truncate16(command_ptr[buffer_pos++]) : 0;
if (textured)
{
const auto [texcoord_x, texcoord_y] = UnpackTexcoord(Truncate16(command_ptr[buffer_pos++]));
hw_vert.texcoord = HWVertex::PackTexcoord(texcoord_x, texcoord_y);
}
else
{
hw_vert.texcoord = 0;
}
(m_batch_current_vertex_ptr++)->Set(vp.x, vp.y, color, texpage, packed_texcoord);
m_batch.vertices.push_back(hw_vert);
if (restart_strip)
{
m_batch.vertices.push_back(m_batch.vertices.back());
AddDuplicateVertex();
restart_strip = false;
}
}
@ -91,9 +78,9 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command
case Primitive::Rectangle:
{
// if we're drawing quads, we need to create a degenerate triangle to restart the triangle strip
const bool restart_strip = !m_batch.vertices.empty();
const bool restart_strip = !IsFlushed();
if (restart_strip)
m_batch.vertices.push_back(m_batch.vertices.back());
AddDuplicateVertex();
u32 buffer_pos = 1;
const u32 color = rc.color_for_first_vertex;
@ -132,16 +119,13 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command
const u16 tex_right = tex_left + static_cast<u16>(rectangle_width);
const u16 tex_bottom = tex_top + static_cast<u16>(rectangle_height);
m_batch.vertices.push_back(
HWVertex{pos_left, pos_top, color, texpage, HWVertex::PackTexcoord(tex_left, tex_top)});
(m_batch_current_vertex_ptr++)->Set(pos_left, pos_top, color, texpage, tex_left, tex_top);
if (restart_strip)
m_batch.vertices.push_back(m_batch.vertices.back());
m_batch.vertices.push_back(
HWVertex{pos_right, pos_top, color, texpage, HWVertex::PackTexcoord(tex_right, tex_top)});
m_batch.vertices.push_back(
HWVertex{pos_left, pos_bottom, color, texpage, HWVertex::PackTexcoord(tex_left, tex_bottom)});
m_batch.vertices.push_back(
HWVertex{pos_right, pos_bottom, color, texpage, HWVertex::PackTexcoord(tex_right, tex_bottom)});
AddDuplicateVertex();
(m_batch_current_vertex_ptr++)->Set(pos_right, pos_top, color, texpage, tex_right, tex_top);
(m_batch_current_vertex_ptr++)->Set(pos_left, pos_bottom, color, texpage, tex_left, tex_bottom);
(m_batch_current_vertex_ptr++)->Set(pos_right, pos_bottom, color, texpage, tex_right, tex_bottom);
}
break;
@ -155,7 +139,7 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command
{
const u32 color = (shaded && i > 0) ? (command_ptr[buffer_pos++] & UINT32_C(0x00FFFFFF)) : first_color;
const VertexPosition vp{command_ptr[buffer_pos++]};
m_batch.vertices.push_back(HWVertex{vp.x.GetValue(), vp.y.GetValue(), color});
(m_batch_current_vertex_ptr++)->Set(vp.x, vp.y, color, 0, 0);
}
}
break;
@ -166,6 +150,12 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command
}
}
void GPU_HW::AddDuplicateVertex()
{
std::memcpy(m_batch_current_vertex_ptr, m_batch_current_vertex_ptr - 1, sizeof(HWVertex));
m_batch_current_vertex_ptr++;
}
void GPU_HW::CalcScissorRect(int* left, int* top, int* right, int* bottom)
{
*left = m_drawing_area.left * m_resolution_scale;
@ -567,8 +557,6 @@ GPU_HW::HWPrimitive GPU_HW::GetPrimitiveForCommand(RenderCommand rc)
return HWPrimitive::Triangles;
}
void GPU_HW::InvalidateVRAMReadCache() {}
void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32* command_ptr)
{
TextureMode texture_mode;
@ -612,10 +600,10 @@ void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32
rc.transparency_enable ? m_render_state.transparency_mode : TransparencyMode::Disabled;
const HWPrimitive rc_primitive = GetPrimitiveForCommand(rc);
const bool dithering_enable = (!m_true_color && rc.IsDitheringEnabled()) ? m_GPUSTAT.dither_enable : false;
const u32 max_added_vertices = num_vertices + 2;
if (!IsFlushed())
{
const u32 max_added_vertices = num_vertices + 2;
const bool buffer_overflow = (m_batch.vertices.size() + max_added_vertices) >= MAX_BATCH_VERTEX_COUNT;
const bool buffer_overflow = GetBatchVertexSpace() < max_added_vertices;
if (buffer_overflow || rc_primitive == HWPrimitive::LineStrip || m_batch.texture_mode != texture_mode ||
m_batch.transparency_mode != transparency_mode || m_batch.primitive != rc_primitive ||
dithering_enable != m_batch.dithering || m_render_state.IsTexturePageChanged() ||
@ -625,6 +613,10 @@ void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32
}
}
// map buffer if it's not already done
if (!m_batch_current_vertex_ptr)
MapBatchVertexPointer(max_added_vertices);
// update state
m_batch.primitive = rc_primitive;
m_batch.texture_mode = texture_mode;

View file

@ -38,13 +38,24 @@ protected:
s32 y;
u32 color;
u32 texpage;
u32 texcoord;
u32 texcoord; // 16-bit texcoords are needed for 256 extent rectangles
// 16-bit texcoords are needed for 256 extent rectangles
static u32 PackTexcoord(u16 x, u16 y) { return ZeroExtend32(x) | (ZeroExtend32(y) << 16); }
ALWAYS_INLINE void Set(s32 x_, s32 y_, u32 color_, u32 texpage_, u16 packed_texcoord)
{
Set(x_, y_, color_, texpage_, packed_texcoord & 0xFF, (packed_texcoord >> 8));
}
ALWAYS_INLINE void Set(s32 x_, s32 y_, u32 color_, u32 texpage_, u16 texcoord_x, u16 texcoord_y)
{
x = x_;
y = y_;
color = color_;
texpage = texpage_;
texcoord = ZeroExtend32(texcoord_x) | (ZeroExtend32(texcoord_y) << 16);
}
};
struct HWRenderBatch
struct HWBatchConfig
{
u32 texture_page_x;
u32 texture_page_y;
@ -56,8 +67,6 @@ protected:
std::array<u8, 4> texture_window_values;
bool dithering;
std::vector<HWVertex> vertices;
// We need two-pass rendering when using BG-FG blending and texturing, as the transparency can be enabled
// on a per-pixel basis, and the opaque pixels shouldn't be blended at all.
bool NeedsTwoPassRendering() const
@ -75,6 +84,7 @@ protected:
};
static constexpr u32 VERTEX_BUFFER_SIZE = 1 * 1024 * 1024;
static constexpr u32 MIN_BATCH_VERTEX_COUNT = 6;
static constexpr u32 MAX_BATCH_VERTEX_COUNT = VERTEX_BUFFER_SIZE / sizeof(HWVertex);
static constexpr u32 TEXTURE_TILE_SIZE = 256;
static constexpr u32 TEXTURE_TILE_X_COUNT = VRAM_WIDTH / TEXTURE_TILE_SIZE;
@ -89,9 +99,14 @@ protected:
static_cast<float>(rgba >> 24) * (1.0f / 255.0f));
}
virtual void InvalidateVRAMReadCache();
virtual void InvalidateVRAMReadCache() = 0;
bool IsFlushed() const { return m_batch.vertices.empty(); }
virtual void MapBatchVertexPointer(u32 required_vertices) = 0;
u32 GetBatchVertexSpace() const { return static_cast<u32>(m_batch_end_vertex_ptr - m_batch_current_vertex_ptr); }
u32 GetBatchVertexCount() const { return static_cast<u32>(m_batch_current_vertex_ptr - m_batch_start_vertex_ptr); }
bool IsFlushed() const { return m_batch_current_vertex_ptr == m_batch_start_vertex_ptr; }
void DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32* command_ptr) override;
@ -108,7 +123,13 @@ protected:
std::string GenerateFillFragmentShader();
std::string GenerateDisplayFragmentShader(bool depth_24bit, bool interlaced);
HWRenderBatch m_batch = {};
HWBatchConfig m_batch = {};
HWVertex* m_batch_start_vertex_ptr = nullptr;
HWVertex* m_batch_end_vertex_ptr = nullptr;
HWVertex* m_batch_current_vertex_ptr = nullptr;
u32 m_batch_base_vertex = 0;
u32 m_resolution_scale = 1;
u32 m_max_resolution_scale = 1;
bool m_true_color = false;
@ -119,4 +140,5 @@ private:
void GenerateShaderHeader(std::stringstream& ss);
void LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command_ptr);
void AddDuplicateVertex();
};

View file

@ -62,7 +62,6 @@ void GPU_HW_OpenGL::RestoreGraphicsAPIState()
glLineWidth(static_cast<float>(m_resolution_scale));
UpdateDrawingArea();
glBindBuffer(GL_ARRAY_BUFFER, m_vertex_buffer);
glBindVertexArray(m_vao_id);
}
@ -132,6 +131,19 @@ void GPU_HW_OpenGL::InvalidateVRAMReadCache()
m_vram_read_texture_dirty = true;
}
void GPU_HW_OpenGL::MapBatchVertexPointer(u32 required_vertices)
{
Assert(!m_batch_start_vertex_ptr);
const GL::StreamBuffer::MappingResult res =
m_vertex_stream_buffer->Map(sizeof(HWVertex), required_vertices * sizeof(HWVertex));
m_batch_start_vertex_ptr = static_cast<HWVertex*>(res.pointer);
m_batch_current_vertex_ptr = m_batch_start_vertex_ptr;
m_batch_end_vertex_ptr = m_batch_start_vertex_ptr + res.space_aligned;
m_batch_base_vertex = res.index_aligned;
}
std::tuple<s32, s32> GPU_HW_OpenGL::ConvertToFramebufferCoordinates(s32 x, s32 y)
{
return std::make_tuple(x, static_cast<s32>(static_cast<s32>(VRAM_HEIGHT) - y));
@ -217,9 +229,11 @@ void GPU_HW_OpenGL::DestroyFramebuffer()
void GPU_HW_OpenGL::CreateVertexBuffer()
{
glGenBuffers(1, &m_vertex_buffer);
glBindBuffer(GL_ARRAY_BUFFER, m_vertex_buffer);
glBufferData(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE, nullptr, GL_STREAM_DRAW);
m_vertex_stream_buffer = GL::StreamBuffer::Create(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE);
if (!m_vertex_stream_buffer)
Panic("Failed to create vertex streaming buffer");
m_vertex_stream_buffer->Bind();
glGenVertexArrays(1, &m_vao_id);
glBindVertexArray(m_vao_id);
@ -638,35 +652,36 @@ void GPU_HW_OpenGL::UpdateVRAMReadTexture()
void GPU_HW_OpenGL::FlushRender()
{
if (m_batch.vertices.empty())
const u32 vertex_count = GetBatchVertexCount();
if (vertex_count == 0)
return;
if (m_vram_read_texture_dirty)
UpdateVRAMReadTexture();
m_stats.num_batches++;
m_stats.num_vertices += static_cast<u32>(m_batch.vertices.size());
m_stats.num_vertices += vertex_count;
Assert((m_batch.vertices.size() * sizeof(HWVertex)) <= VERTEX_BUFFER_SIZE);
glBufferSubData(GL_ARRAY_BUFFER, 0, static_cast<GLsizei>(sizeof(HWVertex) * m_batch.vertices.size()),
m_batch.vertices.data());
m_vertex_stream_buffer->Unmap(vertex_count * sizeof(HWVertex));
m_vertex_stream_buffer->Bind();
m_batch_start_vertex_ptr = nullptr;
m_batch_end_vertex_ptr = nullptr;
m_batch_current_vertex_ptr = nullptr;
static constexpr std::array<GLenum, 4> gl_primitives = {{GL_LINES, GL_LINE_STRIP, GL_TRIANGLES, GL_TRIANGLE_STRIP}};
if (m_batch.NeedsTwoPassRendering())
{
SetDrawState(HWBatchRenderMode::OnlyTransparent);
glDrawArrays(gl_primitives[static_cast<u8>(m_batch.primitive)], 0, static_cast<GLsizei>(m_batch.vertices.size()));
glDrawArrays(gl_primitives[static_cast<u8>(m_batch.primitive)], 0, vertex_count);
SetDrawState(HWBatchRenderMode::OnlyOpaque);
glDrawArrays(gl_primitives[static_cast<u8>(m_batch.primitive)], 0, static_cast<GLsizei>(m_batch.vertices.size()));
glDrawArrays(gl_primitives[static_cast<u8>(m_batch.primitive)], 0, vertex_count);
}
else
{
SetDrawState(m_batch.GetRenderMode());
glDrawArrays(gl_primitives[static_cast<u8>(m_batch.primitive)], 0, static_cast<GLsizei>(m_batch.vertices.size()));
glDrawArrays(gl_primitives[static_cast<u8>(m_batch.primitive)], 0, vertex_count);
}
m_batch.vertices.clear();
}
std::unique_ptr<GPU> GPU::CreateHardwareOpenGLRenderer()

View file

@ -1,5 +1,6 @@
#pragma once
#include "common/gl_program.h"
#include "common/gl_stream_buffer.h"
#include "common/gl_texture.h"
#include "glad.h"
#include "gpu_hw.h"
@ -31,6 +32,7 @@ protected:
void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override;
void FlushRender() override;
void InvalidateVRAMReadCache() override;
void MapBatchVertexPointer(u32 required_vertices) override;
private:
struct GLStats
@ -62,7 +64,7 @@ private:
std::unique_ptr<GL::Texture> m_vram_downsample_texture;
std::unique_ptr<GL::Texture> m_display_texture;
GLuint m_vertex_buffer = 0;
std::unique_ptr<GL::StreamBuffer> m_vertex_stream_buffer;
GLuint m_vao_id = 0;
GLuint m_attributeless_vao_id = 0;