GPU/HW: Switch to indexed draws

This commit is contained in:
Stenzek 2024-03-01 15:33:00 +10:00
parent b34742cdd2
commit 250fb56838
No known key found for this signature in database
2 changed files with 153 additions and 121 deletions

View file

@ -174,19 +174,6 @@ ALWAYS_INLINE void GPU_HW::BatchVertex::SetUVLimits(u32 min_u, u32 max_u, u32 mi
uv_limits = PackUVLimits(min_u, max_u, min_v, max_v); uv_limits = PackUVLimits(min_u, max_u, min_v, max_v);
} }
ALWAYS_INLINE void GPU_HW::AddVertex(const BatchVertex& v)
{
std::memcpy(m_batch_current_vertex_ptr, &v, sizeof(BatchVertex));
m_batch_current_vertex_ptr++;
}
template<typename... Args>
ALWAYS_INLINE void GPU_HW::AddNewVertex(Args&&... args)
{
m_batch_current_vertex_ptr->Set(std::forward<Args>(args)...);
m_batch_current_vertex_ptr++;
}
const Threading::Thread* GPU_HW::GetSWThread() const const Threading::Thread* GPU_HW::GetSWThread() const
{ {
return m_sw_renderer ? m_sw_renderer->GetThread() : nullptr; return m_sw_renderer ? m_sw_renderer->GetThread() : nullptr;
@ -247,7 +234,8 @@ void GPU_HW::Reset(bool clear_vram)
{ {
GPU::Reset(clear_vram); GPU::Reset(clear_vram);
m_batch_current_vertex_ptr = m_batch_start_vertex_ptr; if (m_batch_vertex_ptr)
UnmapGPUBuffer(0, 0);
if (m_sw_renderer) if (m_sw_renderer)
m_sw_renderer->Reset(); m_sw_renderer->Reset();
@ -305,7 +293,8 @@ bool GPU_HW::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_di
// invalidate the whole VRAM read texture when loading state // invalidate the whole VRAM read texture when loading state
if (sw.IsReading()) if (sw.IsReading())
{ {
m_batch_current_vertex_ptr = m_batch_start_vertex_ptr; if (m_batch_vertex_ptr)
UnmapGPUBuffer(0, 0);
SetFullVRAMDirtyRectangle(); SetFullVRAMDirtyRectangle();
ResetBatchVertexDepth(); ResetBatchVertexDepth();
} }
@ -1373,29 +1362,36 @@ void GPU_HW::SetScissor()
g_gpu_device->SetScissor(left, top, right - left, bottom - top); g_gpu_device->SetScissor(left, top, right - left, bottom - top);
} }
void GPU_HW::MapBatchVertexPointer(u32 required_vertices) void GPU_HW::MapGPUBuffer(u32 required_vertices, u32 required_indices)
{ {
DebugAssert(!m_batch_start_vertex_ptr); DebugAssert(!m_batch_vertex_ptr && !m_batch_index_ptr);
void* map; void* vb_map;
u32 space; u32 vb_space;
g_gpu_device->MapVertexBuffer(sizeof(BatchVertex), required_vertices, &map, &space, &m_batch_base_vertex); g_gpu_device->MapVertexBuffer(sizeof(BatchVertex), required_vertices, &vb_map, &vb_space, &m_batch_base_vertex);
m_batch_vertex_ptr = static_cast<BatchVertex*>(vb_map);
m_batch_vertex_space = Truncate16(std::min<u32>(vb_space, std::numeric_limits<u16>::max()));
m_batch_start_vertex_ptr = static_cast<BatchVertex*>(map); u32 ib_space;
m_batch_current_vertex_ptr = m_batch_start_vertex_ptr; g_gpu_device->MapIndexBuffer(required_indices, &m_batch_index_ptr, &ib_space, &m_batch_base_index);
m_batch_end_vertex_ptr = m_batch_start_vertex_ptr + space; m_batch_index_space = Truncate16(std::min<u32>(ib_space, std::numeric_limits<u16>::max()));
} }
void GPU_HW::UnmapBatchVertexPointer(u32 used_vertices) void GPU_HW::UnmapGPUBuffer(u32 used_vertices, u32 used_indices)
{ {
DebugAssert(m_batch_start_vertex_ptr); DebugAssert(m_batch_vertex_ptr && m_batch_index_ptr);
g_gpu_device->UnmapVertexBuffer(sizeof(BatchVertex), used_vertices); g_gpu_device->UnmapVertexBuffer(sizeof(BatchVertex), used_vertices);
m_batch_start_vertex_ptr = nullptr; g_gpu_device->UnmapIndexBuffer(used_indices);
m_batch_end_vertex_ptr = nullptr; m_batch_vertex_ptr = nullptr;
m_batch_current_vertex_ptr = nullptr; m_batch_vertex_count = 0;
m_batch_vertex_space = 0;
m_batch_index_ptr = nullptr;
m_batch_index_count = 0;
m_batch_index_space = 0;
} }
void GPU_HW::DrawBatchVertices(BatchRenderMode render_mode, u32 num_vertices, u32 base_vertex) ALWAYS_INLINE_RELEASE void GPU_HW::DrawBatchVertices(BatchRenderMode render_mode, u32 num_indices, u32 base_index,
u32 base_vertex)
{ {
// [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing] // [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing]
const u8 depth_test = m_batch.use_depth_buffer ? static_cast<u8>(2) : BoolToUInt8(m_batch.check_mask_before_draw); const u8 depth_test = m_batch.use_depth_buffer ? static_cast<u8>(2) : BoolToUInt8(m_batch.check_mask_before_draw);
@ -1403,7 +1399,7 @@ void GPU_HW::DrawBatchVertices(BatchRenderMode render_mode, u32 num_vertices, u3
m_batch_pipelines[depth_test][static_cast<u8>(render_mode)][static_cast<u8>(m_batch.texture_mode)][static_cast<u8>( m_batch_pipelines[depth_test][static_cast<u8>(render_mode)][static_cast<u8>(m_batch.texture_mode)][static_cast<u8>(
m_batch.transparency_mode)][BoolToUInt8(m_batch.dithering)][BoolToUInt8(m_batch.interlacing)] m_batch.transparency_mode)][BoolToUInt8(m_batch.dithering)][BoolToUInt8(m_batch.interlacing)]
.get()); .get());
g_gpu_device->Draw(num_vertices, base_vertex); g_gpu_device->DrawIndexed(num_indices, base_index, base_vertex);
} }
void GPU_HW::ClearDisplay() void GPU_HW::ClearDisplay()
@ -1529,7 +1525,7 @@ void GPU_HW::SetBatchDepthBuffer(bool enabled)
if (m_batch.use_depth_buffer == enabled) if (m_batch.use_depth_buffer == enabled)
return; return;
if (GetBatchVertexCount() > 0) if (m_batch_index_count > 0)
{ {
FlushRender(); FlushRender();
EnsureVertexBufferSpaceForCurrentCommand(); EnsureVertexBufferSpaceForCurrentCommand();
@ -1549,7 +1545,7 @@ void GPU_HW::CheckForDepthClear(const BatchVertex* vertices, u32 num_vertices)
if ((average_z - m_last_depth_z) >= g_settings.gpu_pgxp_depth_clear_threshold) if ((average_z - m_last_depth_z) >= g_settings.gpu_pgxp_depth_clear_threshold)
{ {
if (GetBatchVertexCount() > 0) if (m_batch_index_count > 0)
{ {
FlushRender(); FlushRender();
EnsureVertexBufferSpaceForCurrentCommand(); EnsureVertexBufferSpaceForCurrentCommand();
@ -1576,16 +1572,17 @@ u32 GPU_HW::GetAdaptiveDownsamplingMipLevels() const
void GPU_HW::DrawLine(float x0, float y0, u32 col0, float x1, float y1, u32 col1, float depth) void GPU_HW::DrawLine(float x0, float y0, u32 col0, float x1, float y1, u32 col1, float depth)
{ {
DebugAssert(m_batch_vertex_space >= 4 && m_batch_index_space >= 6);
const float dx = x1 - x0; const float dx = x1 - x0;
const float dy = y1 - y0; const float dy = y1 - y0;
std::array<BatchVertex, 4> output;
if (dx == 0.0f && dy == 0.0f) if (dx == 0.0f && dy == 0.0f)
{ {
// Degenerate, render a point. // Degenerate, render a point.
output[0].Set(x0, y0, depth, 1.0f, col0, 0, 0, 0); (m_batch_vertex_ptr++)->Set(x0, y0, depth, 1.0f, col0, 0, 0, 0);
output[1].Set(x0 + 1.0f, y0, depth, 1.0f, col0, 0, 0, 0); (m_batch_vertex_ptr++)->Set(x0 + 1.0f, y0, depth, 1.0f, col0, 0, 0, 0);
output[2].Set(x1, y1 + 1.0f, depth, 1.0f, col0, 0, 0, 0); (m_batch_vertex_ptr++)->Set(x1, y1 + 1.0f, depth, 1.0f, col0, 0, 0, 0);
output[3].Set(x1 + 1.0f, y1 + 1.0f, depth, 1.0f, col0, 0, 0, 0); (m_batch_vertex_ptr++)->Set(x1 + 1.0f, y1 + 1.0f, depth, 1.0f, col0, 0, 0, 0);
} }
else else
{ {
@ -1649,18 +1646,24 @@ void GPU_HW::DrawLine(float x0, float y0, u32 col0, float x1, float y1, u32 col1
const float ox1 = x1 + pad_x1; const float ox1 = x1 + pad_x1;
const float oy1 = y1 + pad_y1; const float oy1 = y1 + pad_y1;
output[0].Set(ox0, oy0, depth, 1.0f, col0, 0, 0, 0); (m_batch_vertex_ptr++)->Set(ox0, oy0, depth, 1.0f, col0, 0, 0, 0);
output[1].Set(ox0 + fill_dx, oy0 + fill_dy, depth, 1.0f, col0, 0, 0, 0); (m_batch_vertex_ptr++)->Set(ox0 + fill_dx, oy0 + fill_dy, depth, 1.0f, col0, 0, 0, 0);
output[2].Set(ox1, oy1, depth, 1.0f, col1, 0, 0, 0); (m_batch_vertex_ptr++)->Set(ox1, oy1, depth, 1.0f, col1, 0, 0, 0);
output[3].Set(ox1 + fill_dx, oy1 + fill_dy, depth, 1.0f, col1, 0, 0, 0); (m_batch_vertex_ptr++)->Set(ox1 + fill_dx, oy1 + fill_dy, depth, 1.0f, col1, 0, 0, 0);
} }
AddVertex(output[0]); const u32 start_index = m_batch_vertex_count;
AddVertex(output[1]); m_batch_vertex_count += 4;
AddVertex(output[2]); m_batch_vertex_space -= 4;
AddVertex(output[3]);
AddVertex(output[2]); *(m_batch_index_ptr++) = Truncate16(start_index + 0);
AddVertex(output[1]); *(m_batch_index_ptr++) = Truncate16(start_index + 1);
*(m_batch_index_ptr++) = Truncate16(start_index + 2);
*(m_batch_index_ptr++) = Truncate16(start_index + 3);
*(m_batch_index_ptr++) = Truncate16(start_index + 2);
*(m_batch_index_ptr++) = Truncate16(start_index + 1);
m_batch_index_count += 6;
m_batch_index_space -= 6;
} }
void GPU_HW::LoadVertices() void GPU_HW::LoadVertices()
@ -1676,8 +1679,6 @@ void GPU_HW::LoadVertices()
{ {
case GPUPrimitive::Polygon: case GPUPrimitive::Polygon:
{ {
DebugAssert(GetBatchVertexSpace() >= (rc.quad_polygon ? 6u : 3u));
const u32 first_color = rc.color_for_first_vertex; const u32 first_color = rc.color_for_first_vertex;
const bool shaded = rc.shading_enable; const bool shaded = rc.shading_enable;
const bool textured = rc.texture_enable; const bool textured = rc.texture_enable;
@ -1732,9 +1733,27 @@ void GPU_HW::LoadVertices()
if (m_compute_uv_range && textured) if (m_compute_uv_range && textured)
ComputePolygonUVLimits(texpage, vertices.data(), num_vertices); ComputePolygonUVLimits(texpage, vertices.data(), num_vertices);
if (!IsDrawingAreaIsValid()) if (!IsDrawingAreaIsValid()) [[unlikely]]
return; return;
const u32 start_index = m_batch_vertex_count;
if (rc.quad_polygon)
{
DebugAssert(m_batch_vertex_space >= 4);
std::memcpy(m_batch_vertex_ptr, vertices.data(), sizeof(BatchVertex) * 4);
m_batch_vertex_ptr += 4;
m_batch_vertex_count += 4;
m_batch_vertex_space -= 4;
}
else
{
DebugAssert(m_batch_vertex_space >= 3);
std::memcpy(m_batch_vertex_ptr, vertices.data(), sizeof(BatchVertex) * 3);
m_batch_vertex_ptr += 3;
m_batch_vertex_count += 3;
m_batch_vertex_space -= 3;
}
// Cull polygons which are too large. // Cull polygons which are too large.
const auto [min_x_12, max_x_12] = MinMax(native_vertex_positions[1][0], native_vertex_positions[2][0]); const auto [min_x_12, max_x_12] = MinMax(native_vertex_positions[1][0], native_vertex_positions[2][0]);
const auto [min_y_12, max_y_12] = MinMax(native_vertex_positions[1][1], native_vertex_positions[2][1]); const auto [min_y_12, max_y_12] = MinMax(native_vertex_positions[1][1], native_vertex_positions[2][1]);
@ -1763,8 +1782,12 @@ void GPU_HW::LoadVertices()
native_vertex_positions[2][0], native_vertex_positions[2][1], rc.shading_enable, native_vertex_positions[2][0], native_vertex_positions[2][1], rc.shading_enable,
rc.texture_enable, rc.transparency_enable); rc.texture_enable, rc.transparency_enable);
std::memcpy(m_batch_current_vertex_ptr, vertices.data(), sizeof(BatchVertex) * 3); DebugAssert(m_batch_index_space >= 3);
m_batch_current_vertex_ptr += 3; *(m_batch_index_ptr++) = Truncate16(start_index);
*(m_batch_index_ptr++) = Truncate16(start_index + 1);
*(m_batch_index_ptr++) = Truncate16(start_index + 2);
m_batch_index_count += 3;
m_batch_index_space -= 3;
} }
// quads // quads
@ -1797,9 +1820,12 @@ void GPU_HW::LoadVertices()
native_vertex_positions[3][0], native_vertex_positions[3][1], rc.shading_enable, native_vertex_positions[3][0], native_vertex_positions[3][1], rc.shading_enable,
rc.texture_enable, rc.transparency_enable); rc.texture_enable, rc.transparency_enable);
AddVertex(vertices[2]); DebugAssert(m_batch_index_space >= 3);
AddVertex(vertices[1]); *(m_batch_index_ptr++) = Truncate16(start_index + 2);
AddVertex(vertices[3]); *(m_batch_index_ptr++) = Truncate16(start_index + 1);
*(m_batch_index_ptr++) = Truncate16(start_index + 3);
m_batch_index_count += 3;
m_batch_index_space -= 3;
} }
} }
@ -1808,7 +1834,8 @@ void GPU_HW::LoadVertices()
GPUBackendDrawPolygonCommand* cmd = m_sw_renderer->NewDrawPolygonCommand(num_vertices); GPUBackendDrawPolygonCommand* cmd = m_sw_renderer->NewDrawPolygonCommand(num_vertices);
FillDrawCommand(cmd, rc); FillDrawCommand(cmd, rc);
for (u32 i = 0; i < num_vertices; i++) const u32 sw_num_vertices = rc.quad_polygon ? 4 : 3;
for (u32 i = 0; i < sw_num_vertices; i++)
{ {
GPUBackendDrawPolygonCommand::Vertex* vert = &cmd->vertices[i]; GPUBackendDrawPolygonCommand::Vertex* vert = &cmd->vertices[i];
vert->x = native_vertex_positions[i][0]; vert->x = native_vertex_positions[i][0];
@ -1863,12 +1890,13 @@ void GPU_HW::LoadVertices()
break; break;
} }
if (!IsDrawingAreaIsValid()) if (!IsDrawingAreaIsValid()) [[unlikely]]
return; return;
// we can split the rectangle up into potentially 8 quads // we can split the rectangle up into potentially 8 quads
SetBatchDepthBuffer(false); SetBatchDepthBuffer(false);
DebugAssert(GetBatchVertexSpace() >= MAX_VERTICES_FOR_RECTANGLE); DebugAssert(m_batch_vertex_space >= MAX_VERTICES_FOR_RECTANGLE &&
m_batch_index_space >= MAX_VERTICES_FOR_RECTANGLE);
// Split the rectangle into multiple quads if it's greater than 256x256, as the texture page should repeat. // Split the rectangle into multiple quads if it's greater than 256x256, as the texture page should repeat.
u16 tex_top = orig_tex_top; u16 tex_top = orig_tex_top;
@ -1890,13 +1918,26 @@ void GPU_HW::LoadVertices()
CheckForTexPageOverlap(texpage, tex_left, tex_top, tex_right - 1, tex_bottom - 1); CheckForTexPageOverlap(texpage, tex_left, tex_top, tex_right - 1, tex_bottom - 1);
AddNewVertex(quad_start_x, quad_start_y, depth, 1.0f, color, texpage, tex_left, tex_top, uv_limits); const u32 base_vertex = m_batch_vertex_count;
AddNewVertex(quad_end_x, quad_start_y, depth, 1.0f, color, texpage, tex_right, tex_top, uv_limits); (m_batch_vertex_ptr++)
AddNewVertex(quad_start_x, quad_end_y, depth, 1.0f, color, texpage, tex_left, tex_bottom, uv_limits); ->Set(quad_start_x, quad_start_y, depth, 1.0f, color, texpage, tex_left, tex_top, uv_limits);
(m_batch_vertex_ptr++)
->Set(quad_end_x, quad_start_y, depth, 1.0f, color, texpage, tex_right, tex_top, uv_limits);
(m_batch_vertex_ptr++)
->Set(quad_start_x, quad_end_y, depth, 1.0f, color, texpage, tex_left, tex_bottom, uv_limits);
(m_batch_vertex_ptr++)
->Set(quad_end_x, quad_end_y, depth, 1.0f, color, texpage, tex_right, tex_bottom, uv_limits);
m_batch_vertex_count += 4;
m_batch_vertex_space -= 4;
AddNewVertex(quad_start_x, quad_end_y, depth, 1.0f, color, texpage, tex_left, tex_bottom, uv_limits); *(m_batch_index_ptr++) = Truncate16(base_vertex + 0);
AddNewVertex(quad_end_x, quad_start_y, depth, 1.0f, color, texpage, tex_right, tex_top, uv_limits); *(m_batch_index_ptr++) = Truncate16(base_vertex + 1);
AddNewVertex(quad_end_x, quad_end_y, depth, 1.0f, color, texpage, tex_right, tex_bottom, uv_limits); *(m_batch_index_ptr++) = Truncate16(base_vertex + 2);
*(m_batch_index_ptr++) = Truncate16(base_vertex + 2);
*(m_batch_index_ptr++) = Truncate16(base_vertex + 1);
*(m_batch_index_ptr++) = Truncate16(base_vertex + 3);
m_batch_index_count += 6;
m_batch_index_space -= 6;
x_offset += quad_width; x_offset += quad_width;
tex_left = 0; tex_left = 0;
@ -1937,7 +1978,7 @@ void GPU_HW::LoadVertices()
if (!rc.polyline) if (!rc.polyline)
{ {
DebugAssert(GetBatchVertexSpace() >= 2); DebugAssert(m_batch_vertex_space >= 4 && m_batch_index_space >= 6);
u32 start_color, end_color; u32 start_color, end_color;
GPUVertexPosition start_pos, end_pos; GPUVertexPosition start_pos, end_pos;
@ -1955,7 +1996,7 @@ void GPU_HW::LoadVertices()
end_pos.bits = FifoPop(); end_pos.bits = FifoPop();
} }
if (!IsDrawingAreaIsValid()) if (!IsDrawingAreaIsValid()) [[unlikely]]
return; return;
s32 start_x = start_pos.x + m_drawing_offset.x; s32 start_x = start_pos.x + m_drawing_offset.x;
@ -1996,9 +2037,9 @@ void GPU_HW::LoadVertices()
{ {
// Multiply by two because we don't use line strips. // Multiply by two because we don't use line strips.
const u32 num_vertices = GetPolyLineVertexCount(); const u32 num_vertices = GetPolyLineVertexCount();
DebugAssert(GetBatchVertexSpace() >= (num_vertices * 2)); DebugAssert(m_batch_vertex_space >= (num_vertices * 4) && m_batch_index_space >= (num_vertices * 6));
if (!IsDrawingAreaIsValid()) if (!IsDrawingAreaIsValid()) [[unlikely]]
return; return;
const bool shaded = rc.shading_enable; const bool shaded = rc.shading_enable;
@ -2181,7 +2222,7 @@ ALWAYS_INLINE_RELEASE void GPU_HW::CheckForTexPageOverlap(u32 texpage, u32 min_u
if (update_drawn || update_written) if (update_drawn || update_written)
{ {
if (GetBatchVertexCount() > 0) if (m_batch_index_count > 0)
{ {
FlushRender(); FlushRender();
EnsureVertexBufferSpaceForCurrentCommand(); EnsureVertexBufferSpaceForCurrentCommand();
@ -2194,7 +2235,7 @@ ALWAYS_INLINE_RELEASE void GPU_HW::CheckForTexPageOverlap(u32 texpage, u32 min_u
ALWAYS_INLINE bool GPU_HW::IsFlushed() const ALWAYS_INLINE bool GPU_HW::IsFlushed() const
{ {
return m_batch_current_vertex_ptr == m_batch_start_vertex_ptr; return (m_batch_index_count == 0);
} }
GPU_HW::InterlacedRenderMode GPU_HW::GetInterlacedRenderMode() const GPU_HW::InterlacedRenderMode GPU_HW::GetInterlacedRenderMode() const
@ -2228,44 +2269,44 @@ ALWAYS_INLINE_RELEASE bool GPU_HW::NeedsShaderBlending(GPUTransparencyMode trans
(transparency != GPUTransparencyMode::Disabled || IsBlendedTextureFiltering(m_texture_filtering))))); (transparency != GPUTransparencyMode::Disabled || IsBlendedTextureFiltering(m_texture_filtering)))));
} }
ALWAYS_INLINE u32 GPU_HW::GetBatchVertexSpace() const void GPU_HW::EnsureVertexBufferSpace(u32 required_vertices, u32 required_indices)
{ {
return static_cast<u32>(m_batch_end_vertex_ptr - m_batch_current_vertex_ptr); if (m_batch_vertex_ptr)
}
ALWAYS_INLINE u32 GPU_HW::GetBatchVertexCount() const
{ {
return static_cast<u32>(m_batch_current_vertex_ptr - m_batch_start_vertex_ptr); if (m_batch_vertex_space >= required_vertices && m_batch_index_space >= required_indices)
}
void GPU_HW::EnsureVertexBufferSpace(u32 required_vertices)
{
if (m_batch_current_vertex_ptr)
{
if (GetBatchVertexSpace() >= required_vertices)
return; return;
FlushRender(); FlushRender();
} }
MapBatchVertexPointer(required_vertices); MapGPUBuffer(required_vertices, required_indices);
} }
void GPU_HW::EnsureVertexBufferSpaceForCurrentCommand() void GPU_HW::EnsureVertexBufferSpaceForCurrentCommand()
{ {
u32 required_vertices; u32 required_vertices;
u32 required_indices;
switch (m_render_command.primitive) switch (m_render_command.primitive)
{ {
case GPUPrimitive::Polygon: case GPUPrimitive::Polygon:
required_vertices = m_render_command.quad_polygon ? 6 : 3; required_vertices = 4; // assume quad, in case of expansion
required_indices = 6;
break; break;
case GPUPrimitive::Rectangle: case GPUPrimitive::Rectangle:
required_vertices = MAX_VERTICES_FOR_RECTANGLE; required_vertices = MAX_VERTICES_FOR_RECTANGLE; // TODO: WRong
required_indices = MAX_VERTICES_FOR_RECTANGLE;
break; break;
case GPUPrimitive::Line: case GPUPrimitive::Line:
default: {
required_vertices = m_render_command.polyline ? (GetPolyLineVertexCount() * 6u) : 6u; // assume expansion
const u32 vert_count = m_render_command.polyline ? GetPolyLineVertexCount() : 2;
required_vertices = vert_count * 4;
required_indices = vert_count * 6;
}
break; break;
default:
UnreachableCode();
} }
// can we fit these vertices in the current depth buffer range? // can we fit these vertices in the current depth buffer range?
@ -2273,16 +2314,11 @@ void GPU_HW::EnsureVertexBufferSpaceForCurrentCommand()
{ {
// implies FlushRender() // implies FlushRender()
ResetBatchVertexDepth(); ResetBatchVertexDepth();
} MapGPUBuffer(required_vertices, required_indices);
else if (m_batch_current_vertex_ptr)
{
if (GetBatchVertexSpace() >= required_vertices)
return; return;
FlushRender();
} }
MapBatchVertexPointer(required_vertices); EnsureVertexBufferSpace(required_vertices, required_indices);
} }
void GPU_HW::ResetBatchVertexDepth() void GPU_HW::ResetBatchVertexDepth()
@ -2769,7 +2805,7 @@ void GPU_HW::DispatchRenderCommand()
EnsureVertexBufferSpaceForCurrentCommand(); EnsureVertexBufferSpaceForCurrentCommand();
if (GetBatchVertexCount() == 0) if (m_batch_index_count == 0)
{ {
// transparency mode change // transparency mode change
if (transparency_mode != GPUTransparencyMode::Disabled && if (transparency_mode != GPUTransparencyMode::Disabled &&
@ -2842,14 +2878,13 @@ void GPU_HW::DispatchRenderCommand()
void GPU_HW::FlushRender() void GPU_HW::FlushRender()
{ {
if (!m_batch_current_vertex_ptr) if (m_batch_index_count == 0)
return; return;
const u32 vertex_count = GetBatchVertexCount(); const u32 base_vertex = m_batch_base_vertex;
UnmapBatchVertexPointer(vertex_count); const u32 base_index = m_batch_base_index;
const u32 index_count = m_batch_index_count;
if (vertex_count == 0) UnmapGPUBuffer(m_batch_vertex_count, m_batch_index_count);
return;
#ifdef _DEBUG #ifdef _DEBUG
GL_SCOPE_FMT("Hardware Draw {}", ++s_draw_number); GL_SCOPE_FMT("Hardware Draw {}", ++s_draw_number);
@ -2870,19 +2905,19 @@ void GPU_HW::FlushRender()
{ {
if (NeedsTwoPassRendering()) if (NeedsTwoPassRendering())
{ {
DrawBatchVertices(BatchRenderMode::OnlyOpaque, vertex_count, m_batch_base_vertex); DrawBatchVertices(BatchRenderMode::OnlyOpaque, index_count, base_index, base_vertex);
DrawBatchVertices(BatchRenderMode::OnlyTransparent, vertex_count, m_batch_base_vertex); DrawBatchVertices(BatchRenderMode::OnlyTransparent, index_count, base_index, base_vertex);
} }
else else
{ {
DrawBatchVertices(m_batch.GetRenderMode(), vertex_count, m_batch_base_vertex); DrawBatchVertices(m_batch.GetRenderMode(), index_count, base_index, base_vertex);
} }
} }
if (m_wireframe_mode != GPUWireframeMode::Disabled) if (m_wireframe_mode != GPUWireframeMode::Disabled)
{ {
g_gpu_device->SetPipeline(m_wireframe_pipeline.get()); g_gpu_device->SetPipeline(m_wireframe_pipeline.get());
g_gpu_device->Draw(vertex_count, m_batch_base_vertex); g_gpu_device->DrawIndexed(index_count, base_index, base_vertex);
} }
} }

View file

@ -132,11 +132,6 @@ private:
void LoadVertices(); void LoadVertices();
void AddVertex(const BatchVertex& v);
template<typename... Args>
void AddNewVertex(Args&&... args);
void PrintSettingsToLog(); void PrintSettingsToLog();
void CheckSettings(); void CheckSettings();
@ -144,9 +139,9 @@ private:
void UpdateDepthBufferFromMaskBit(); void UpdateDepthBufferFromMaskBit();
void ClearDepthBuffer(); void ClearDepthBuffer();
void SetScissor(); void SetScissor();
void MapBatchVertexPointer(u32 required_vertices); void MapGPUBuffer(u32 required_vertices, u32 required_indices);
void UnmapBatchVertexPointer(u32 used_vertices); void UnmapGPUBuffer(u32 used_vertices, u32 used_indices);
void DrawBatchVertices(BatchRenderMode render_mode, u32 num_vertices, u32 base_vertex); void DrawBatchVertices(BatchRenderMode render_mode, u32 num_indices, u32 base_index, u32 base_vertex);
u32 CalculateResolutionScale() const; u32 CalculateResolutionScale() const;
GPUDownsampleMode GetDownsampleMode(u32 resolution_scale) const; GPUDownsampleMode GetDownsampleMode(u32 resolution_scale) const;
@ -160,9 +155,7 @@ private:
void CheckForTexPageOverlap(u32 texpage, u32 min_u, u32 min_v, u32 max_u, u32 max_v); void CheckForTexPageOverlap(u32 texpage, u32 min_u, u32 min_v, u32 max_u, u32 max_v);
bool IsFlushed() const; bool IsFlushed() const;
u32 GetBatchVertexSpace() const; void EnsureVertexBufferSpace(u32 required_vertices, u32 required_indices);
u32 GetBatchVertexCount() const;
void EnsureVertexBufferSpace(u32 required_vertices);
void EnsureVertexBufferSpaceForCurrentCommand(); void EnsureVertexBufferSpaceForCurrentCommand();
void ResetBatchVertexDepth(); void ResetBatchVertexDepth();
@ -225,10 +218,14 @@ private:
std::unique_ptr<GPU_SW_Backend> m_sw_renderer; std::unique_ptr<GPU_SW_Backend> m_sw_renderer;
BatchVertex* m_batch_start_vertex_ptr = nullptr; BatchVertex* m_batch_vertex_ptr = nullptr;
BatchVertex* m_batch_end_vertex_ptr = nullptr; u16* m_batch_index_ptr = nullptr;
BatchVertex* m_batch_current_vertex_ptr = nullptr;
u32 m_batch_base_vertex = 0; u32 m_batch_base_vertex = 0;
u32 m_batch_base_index = 0;
u16 m_batch_vertex_count = 0;
u16 m_batch_index_count = 0;
u16 m_batch_vertex_space = 0;
u16 m_batch_index_space = 0;
s32 m_current_depth = 0; s32 m_current_depth = 0;
float m_last_depth_z = 1.0f; float m_last_depth_z = 1.0f;