diff --git a/src/common/common.vcxproj b/src/common/common.vcxproj
index 5b647cf64..43cec14de 100644
--- a/src/common/common.vcxproj
+++ b/src/common/common.vcxproj
@@ -40,6 +40,7 @@
+
@@ -53,6 +54,7 @@
+
diff --git a/src/common/common.vcxproj.filters b/src/common/common.vcxproj.filters
index a7bf0f289..a7918e047 100644
--- a/src/common/common.vcxproj.filters
+++ b/src/common/common.vcxproj.filters
@@ -12,6 +12,7 @@
+
@@ -23,6 +24,7 @@
+
diff --git a/src/common/gl_stream_buffer.cpp b/src/common/gl_stream_buffer.cpp
new file mode 100644
index 000000000..65a32fb58
--- /dev/null
+++ b/src/common/gl_stream_buffer.cpp
@@ -0,0 +1,53 @@
+#include "gl_stream_buffer.h"
+
+namespace GL {
+
+StreamBuffer::StreamBuffer(GLenum target, GLuint buffer_id, u32 size)
+ : m_target(target), m_buffer_id(buffer_id), m_size(size), m_cpu_buffer(size)
+{
+}
+
+StreamBuffer::~StreamBuffer()
+{
+ glDeleteBuffers(1, &m_buffer_id);
+}
+
+void StreamBuffer::Bind()
+{
+ glBindBuffer(m_target, m_buffer_id);
+}
+
+StreamBuffer::MappingResult StreamBuffer::Map(u32 alignment, u32 min_size)
+{
+ return MappingResult{static_cast(m_cpu_buffer.data()), 0, m_size / alignment};
+}
+
+void StreamBuffer::Unmap(u32 used_size)
+{
+ if (used_size == 0)
+ return;
+
+ glBindBuffer(m_target, m_buffer_id);
+ glBufferSubData(m_target, 0, used_size, m_cpu_buffer.data());
+}
+
+std::unique_ptr StreamBuffer::Create(GLenum target, u32 size)
+{
+ glGetError();
+
+ GLuint buffer_id;
+ glGenBuffers(1, &buffer_id);
+ glBindBuffer(target, buffer_id);
+ glBufferData(target, size, nullptr, GL_STREAM_DRAW);
+
+ GLenum err = glGetError();
+ if (err != GL_NO_ERROR)
+ {
+ glDeleteBuffers(1, &buffer_id);
+ return {};
+ }
+
+ return std::unique_ptr(new StreamBuffer(target, buffer_id, size));
+}
+
+} // namespace GL
\ No newline at end of file
diff --git a/src/common/gl_stream_buffer.h b/src/common/gl_stream_buffer.h
new file mode 100644
index 000000000..5c1876bed
--- /dev/null
+++ b/src/common/gl_stream_buffer.h
@@ -0,0 +1,42 @@
+#pragma once
+#include "types.h"
+#include
+#include
+#include
+#include
+
+namespace GL {
+// TODO: Persistent mapping-based implementation
+class StreamBuffer
+{
+public:
+ ~StreamBuffer();
+
+ ALWAYS_INLINE GLuint GetGLBufferId() const { return m_buffer_id; }
+ ALWAYS_INLINE GLenum GetGLTarget() const { return m_target; }
+ ALWAYS_INLINE u32 GetSize() const { return m_size; }
+
+ void Bind();
+
+ struct MappingResult
+ {
+ void* pointer;
+ u32 index_aligned; // offset / alignment, suitable for base vertex
+ u32 space_aligned; // remaining space / alignment
+ };
+
+ MappingResult Map(u32 alignment, u32 min_size);
+ void Unmap(u32 used_size);
+
+ static std::unique_ptr Create(GLenum target, u32 size);
+
+private:
+ StreamBuffer(GLenum target, GLuint buffer_id, u32 size);
+
+ GLenum m_target;
+ GLuint m_buffer_id;
+ u32 m_size;
+
+ std::vector m_cpu_buffer;
+};
+} // namespace GL
\ No newline at end of file
diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp
index 1ae3d370a..3c48e4eb9 100644
--- a/src/core/gpu_hw.cpp
+++ b/src/core/gpu_hw.cpp
@@ -49,9 +49,9 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command
case Primitive::Polygon:
{
// if we're drawing quads, we need to create a degenerate triangle to restart the triangle strip
- bool restart_strip = (rc.quad_polygon && !m_batch.vertices.empty());
+ bool restart_strip = (rc.quad_polygon && !IsFlushed());
if (restart_strip)
- m_batch.vertices.push_back(m_batch.vertices.back());
+ AddDuplicateVertex();
const u32 first_color = rc.color_for_first_vertex;
const bool shaded = rc.shading_enable;
@@ -60,28 +60,15 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command
u32 buffer_pos = 1;
for (u32 i = 0; i < num_vertices; i++)
{
- HWVertex hw_vert;
- hw_vert.color = (shaded && i > 0) ? (command_ptr[buffer_pos++] & UINT32_C(0x00FFFFFF)) : first_color;
-
+ const u32 color = (shaded && i > 0) ? (command_ptr[buffer_pos++] & UINT32_C(0x00FFFFFF)) : first_color;
const VertexPosition vp{command_ptr[buffer_pos++]};
- hw_vert.x = vp.x;
- hw_vert.y = vp.y;
- hw_vert.texpage = texpage;
+ const u16 packed_texcoord = textured ? Truncate16(command_ptr[buffer_pos++]) : 0;
- if (textured)
- {
- const auto [texcoord_x, texcoord_y] = UnpackTexcoord(Truncate16(command_ptr[buffer_pos++]));
- hw_vert.texcoord = HWVertex::PackTexcoord(texcoord_x, texcoord_y);
- }
- else
- {
- hw_vert.texcoord = 0;
- }
+ (m_batch_current_vertex_ptr++)->Set(vp.x, vp.y, color, texpage, packed_texcoord);
- m_batch.vertices.push_back(hw_vert);
if (restart_strip)
{
- m_batch.vertices.push_back(m_batch.vertices.back());
+ AddDuplicateVertex();
restart_strip = false;
}
}
@@ -91,9 +78,9 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command
case Primitive::Rectangle:
{
// if we're drawing quads, we need to create a degenerate triangle to restart the triangle strip
- const bool restart_strip = !m_batch.vertices.empty();
+ const bool restart_strip = !IsFlushed();
if (restart_strip)
- m_batch.vertices.push_back(m_batch.vertices.back());
+ AddDuplicateVertex();
u32 buffer_pos = 1;
const u32 color = rc.color_for_first_vertex;
@@ -132,16 +119,13 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command
const u16 tex_right = tex_left + static_cast(rectangle_width);
const u16 tex_bottom = tex_top + static_cast(rectangle_height);
- m_batch.vertices.push_back(
- HWVertex{pos_left, pos_top, color, texpage, HWVertex::PackTexcoord(tex_left, tex_top)});
+ (m_batch_current_vertex_ptr++)->Set(pos_left, pos_top, color, texpage, tex_left, tex_top);
if (restart_strip)
- m_batch.vertices.push_back(m_batch.vertices.back());
- m_batch.vertices.push_back(
- HWVertex{pos_right, pos_top, color, texpage, HWVertex::PackTexcoord(tex_right, tex_top)});
- m_batch.vertices.push_back(
- HWVertex{pos_left, pos_bottom, color, texpage, HWVertex::PackTexcoord(tex_left, tex_bottom)});
- m_batch.vertices.push_back(
- HWVertex{pos_right, pos_bottom, color, texpage, HWVertex::PackTexcoord(tex_right, tex_bottom)});
+ AddDuplicateVertex();
+
+ (m_batch_current_vertex_ptr++)->Set(pos_right, pos_top, color, texpage, tex_right, tex_top);
+ (m_batch_current_vertex_ptr++)->Set(pos_left, pos_bottom, color, texpage, tex_left, tex_bottom);
+ (m_batch_current_vertex_ptr++)->Set(pos_right, pos_bottom, color, texpage, tex_right, tex_bottom);
}
break;
@@ -155,7 +139,7 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command
{
const u32 color = (shaded && i > 0) ? (command_ptr[buffer_pos++] & UINT32_C(0x00FFFFFF)) : first_color;
const VertexPosition vp{command_ptr[buffer_pos++]};
- m_batch.vertices.push_back(HWVertex{vp.x.GetValue(), vp.y.GetValue(), color});
+ (m_batch_current_vertex_ptr++)->Set(vp.x, vp.y, color, 0, 0);
}
}
break;
@@ -166,6 +150,12 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command
}
}
+void GPU_HW::AddDuplicateVertex()
+{
+ std::memcpy(m_batch_current_vertex_ptr, m_batch_current_vertex_ptr - 1, sizeof(HWVertex));
+ m_batch_current_vertex_ptr++;
+}
+
void GPU_HW::CalcScissorRect(int* left, int* top, int* right, int* bottom)
{
*left = m_drawing_area.left * m_resolution_scale;
@@ -567,8 +557,6 @@ GPU_HW::HWPrimitive GPU_HW::GetPrimitiveForCommand(RenderCommand rc)
return HWPrimitive::Triangles;
}
-void GPU_HW::InvalidateVRAMReadCache() {}
-
void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32* command_ptr)
{
TextureMode texture_mode;
@@ -612,10 +600,10 @@ void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32
rc.transparency_enable ? m_render_state.transparency_mode : TransparencyMode::Disabled;
const HWPrimitive rc_primitive = GetPrimitiveForCommand(rc);
const bool dithering_enable = (!m_true_color && rc.IsDitheringEnabled()) ? m_GPUSTAT.dither_enable : false;
+ const u32 max_added_vertices = num_vertices + 2;
if (!IsFlushed())
{
- const u32 max_added_vertices = num_vertices + 2;
- const bool buffer_overflow = (m_batch.vertices.size() + max_added_vertices) >= MAX_BATCH_VERTEX_COUNT;
+ const bool buffer_overflow = GetBatchVertexSpace() < max_added_vertices;
if (buffer_overflow || rc_primitive == HWPrimitive::LineStrip || m_batch.texture_mode != texture_mode ||
m_batch.transparency_mode != transparency_mode || m_batch.primitive != rc_primitive ||
dithering_enable != m_batch.dithering || m_render_state.IsTexturePageChanged() ||
@@ -625,6 +613,10 @@ void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32
}
}
+ // map buffer if it's not already done
+ if (!m_batch_current_vertex_ptr)
+ MapBatchVertexPointer(max_added_vertices);
+
// update state
m_batch.primitive = rc_primitive;
m_batch.texture_mode = texture_mode;
diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h
index 586c70e5a..7828ad714 100644
--- a/src/core/gpu_hw.h
+++ b/src/core/gpu_hw.h
@@ -38,13 +38,24 @@ protected:
s32 y;
u32 color;
u32 texpage;
- u32 texcoord;
+ u32 texcoord; // 16-bit texcoords are needed for 256 extent rectangles
- // 16-bit texcoords are needed for 256 extent rectangles
- static u32 PackTexcoord(u16 x, u16 y) { return ZeroExtend32(x) | (ZeroExtend32(y) << 16); }
+ ALWAYS_INLINE void Set(s32 x_, s32 y_, u32 color_, u32 texpage_, u16 packed_texcoord)
+ {
+ Set(x_, y_, color_, texpage_, packed_texcoord & 0xFF, (packed_texcoord >> 8));
+ }
+
+ ALWAYS_INLINE void Set(s32 x_, s32 y_, u32 color_, u32 texpage_, u16 texcoord_x, u16 texcoord_y)
+ {
+ x = x_;
+ y = y_;
+ color = color_;
+ texpage = texpage_;
+ texcoord = ZeroExtend32(texcoord_x) | (ZeroExtend32(texcoord_y) << 16);
+ }
};
- struct HWRenderBatch
+ struct HWBatchConfig
{
u32 texture_page_x;
u32 texture_page_y;
@@ -56,8 +67,6 @@ protected:
std::array texture_window_values;
bool dithering;
- std::vector vertices;
-
// We need two-pass rendering when using BG-FG blending and texturing, as the transparency can be enabled
// on a per-pixel basis, and the opaque pixels shouldn't be blended at all.
bool NeedsTwoPassRendering() const
@@ -75,6 +84,7 @@ protected:
};
static constexpr u32 VERTEX_BUFFER_SIZE = 1 * 1024 * 1024;
+ static constexpr u32 MIN_BATCH_VERTEX_COUNT = 6;
static constexpr u32 MAX_BATCH_VERTEX_COUNT = VERTEX_BUFFER_SIZE / sizeof(HWVertex);
static constexpr u32 TEXTURE_TILE_SIZE = 256;
static constexpr u32 TEXTURE_TILE_X_COUNT = VRAM_WIDTH / TEXTURE_TILE_SIZE;
@@ -89,9 +99,14 @@ protected:
static_cast(rgba >> 24) * (1.0f / 255.0f));
}
- virtual void InvalidateVRAMReadCache();
+ virtual void InvalidateVRAMReadCache() = 0;
- bool IsFlushed() const { return m_batch.vertices.empty(); }
+ virtual void MapBatchVertexPointer(u32 required_vertices) = 0;
+
+ u32 GetBatchVertexSpace() const { return static_cast(m_batch_end_vertex_ptr - m_batch_current_vertex_ptr); }
+ u32 GetBatchVertexCount() const { return static_cast(m_batch_current_vertex_ptr - m_batch_start_vertex_ptr); }
+
+ bool IsFlushed() const { return m_batch_current_vertex_ptr == m_batch_start_vertex_ptr; }
void DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32* command_ptr) override;
@@ -108,7 +123,13 @@ protected:
std::string GenerateFillFragmentShader();
std::string GenerateDisplayFragmentShader(bool depth_24bit, bool interlaced);
- HWRenderBatch m_batch = {};
+ HWBatchConfig m_batch = {};
+
+ HWVertex* m_batch_start_vertex_ptr = nullptr;
+ HWVertex* m_batch_end_vertex_ptr = nullptr;
+ HWVertex* m_batch_current_vertex_ptr = nullptr;
+ u32 m_batch_base_vertex = 0;
+
u32 m_resolution_scale = 1;
u32 m_max_resolution_scale = 1;
bool m_true_color = false;
@@ -119,4 +140,5 @@ private:
void GenerateShaderHeader(std::stringstream& ss);
void LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command_ptr);
+ void AddDuplicateVertex();
};
diff --git a/src/core/gpu_hw_opengl.cpp b/src/core/gpu_hw_opengl.cpp
index 483cbbf90..3003d7521 100644
--- a/src/core/gpu_hw_opengl.cpp
+++ b/src/core/gpu_hw_opengl.cpp
@@ -62,7 +62,6 @@ void GPU_HW_OpenGL::RestoreGraphicsAPIState()
glLineWidth(static_cast(m_resolution_scale));
UpdateDrawingArea();
- glBindBuffer(GL_ARRAY_BUFFER, m_vertex_buffer);
glBindVertexArray(m_vao_id);
}
@@ -132,6 +131,19 @@ void GPU_HW_OpenGL::InvalidateVRAMReadCache()
m_vram_read_texture_dirty = true;
}
+void GPU_HW_OpenGL::MapBatchVertexPointer(u32 required_vertices)
+{
+ Assert(!m_batch_start_vertex_ptr);
+
+ const GL::StreamBuffer::MappingResult res =
+ m_vertex_stream_buffer->Map(sizeof(HWVertex), required_vertices * sizeof(HWVertex));
+
+ m_batch_start_vertex_ptr = static_cast(res.pointer);
+ m_batch_current_vertex_ptr = m_batch_start_vertex_ptr;
+ m_batch_end_vertex_ptr = m_batch_start_vertex_ptr + res.space_aligned;
+ m_batch_base_vertex = res.index_aligned;
+}
+
std::tuple GPU_HW_OpenGL::ConvertToFramebufferCoordinates(s32 x, s32 y)
{
return std::make_tuple(x, static_cast(static_cast(VRAM_HEIGHT) - y));
@@ -217,9 +229,11 @@ void GPU_HW_OpenGL::DestroyFramebuffer()
void GPU_HW_OpenGL::CreateVertexBuffer()
{
- glGenBuffers(1, &m_vertex_buffer);
- glBindBuffer(GL_ARRAY_BUFFER, m_vertex_buffer);
- glBufferData(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE, nullptr, GL_STREAM_DRAW);
+ m_vertex_stream_buffer = GL::StreamBuffer::Create(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE);
+ if (!m_vertex_stream_buffer)
+ Panic("Failed to create vertex streaming buffer");
+
+ m_vertex_stream_buffer->Bind();
glGenVertexArrays(1, &m_vao_id);
glBindVertexArray(m_vao_id);
@@ -638,35 +652,36 @@ void GPU_HW_OpenGL::UpdateVRAMReadTexture()
void GPU_HW_OpenGL::FlushRender()
{
- if (m_batch.vertices.empty())
+ const u32 vertex_count = GetBatchVertexCount();
+ if (vertex_count == 0)
return;
if (m_vram_read_texture_dirty)
UpdateVRAMReadTexture();
m_stats.num_batches++;
- m_stats.num_vertices += static_cast(m_batch.vertices.size());
+ m_stats.num_vertices += vertex_count;
- Assert((m_batch.vertices.size() * sizeof(HWVertex)) <= VERTEX_BUFFER_SIZE);
- glBufferSubData(GL_ARRAY_BUFFER, 0, static_cast(sizeof(HWVertex) * m_batch.vertices.size()),
- m_batch.vertices.data());
+ m_vertex_stream_buffer->Unmap(vertex_count * sizeof(HWVertex));
+ m_vertex_stream_buffer->Bind();
+ m_batch_start_vertex_ptr = nullptr;
+ m_batch_end_vertex_ptr = nullptr;
+ m_batch_current_vertex_ptr = nullptr;
static constexpr std::array gl_primitives = {{GL_LINES, GL_LINE_STRIP, GL_TRIANGLES, GL_TRIANGLE_STRIP}};
if (m_batch.NeedsTwoPassRendering())
{
SetDrawState(HWBatchRenderMode::OnlyTransparent);
- glDrawArrays(gl_primitives[static_cast(m_batch.primitive)], 0, static_cast(m_batch.vertices.size()));
+ glDrawArrays(gl_primitives[static_cast(m_batch.primitive)], 0, vertex_count);
SetDrawState(HWBatchRenderMode::OnlyOpaque);
- glDrawArrays(gl_primitives[static_cast(m_batch.primitive)], 0, static_cast(m_batch.vertices.size()));
+ glDrawArrays(gl_primitives[static_cast(m_batch.primitive)], 0, vertex_count);
}
else
{
SetDrawState(m_batch.GetRenderMode());
- glDrawArrays(gl_primitives[static_cast(m_batch.primitive)], 0, static_cast(m_batch.vertices.size()));
+ glDrawArrays(gl_primitives[static_cast(m_batch.primitive)], 0, vertex_count);
}
-
- m_batch.vertices.clear();
}
std::unique_ptr GPU::CreateHardwareOpenGLRenderer()
diff --git a/src/core/gpu_hw_opengl.h b/src/core/gpu_hw_opengl.h
index 59ecbb173..81236a943 100644
--- a/src/core/gpu_hw_opengl.h
+++ b/src/core/gpu_hw_opengl.h
@@ -1,5 +1,6 @@
#pragma once
#include "common/gl_program.h"
+#include "common/gl_stream_buffer.h"
#include "common/gl_texture.h"
#include "glad.h"
#include "gpu_hw.h"
@@ -31,6 +32,7 @@ protected:
void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override;
void FlushRender() override;
void InvalidateVRAMReadCache() override;
+ void MapBatchVertexPointer(u32 required_vertices) override;
private:
struct GLStats
@@ -62,7 +64,7 @@ private:
std::unique_ptr m_vram_downsample_texture;
std::unique_ptr m_display_texture;
- GLuint m_vertex_buffer = 0;
+ std::unique_ptr m_vertex_stream_buffer;
GLuint m_vao_id = 0;
GLuint m_attributeless_vao_id = 0;