diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp
index f353e2591..da69aa622 100644
--- a/src/core/gpu_hw.cpp
+++ b/src/core/gpu_hw.cpp
@@ -174,19 +174,6 @@ ALWAYS_INLINE void GPU_HW::BatchVertex::SetUVLimits(u32 min_u, u32 max_u, u32 mi
   uv_limits = PackUVLimits(min_u, max_u, min_v, max_v);
 }
 
-ALWAYS_INLINE void GPU_HW::AddVertex(const BatchVertex& v)
-{
-  std::memcpy(m_batch_current_vertex_ptr, &v, sizeof(BatchVertex));
-  m_batch_current_vertex_ptr++;
-}
-
-template<typename... Args>
-ALWAYS_INLINE void GPU_HW::AddNewVertex(Args&&... args)
-{
-  m_batch_current_vertex_ptr->Set(std::forward<Args>(args)...);
-  m_batch_current_vertex_ptr++;
-}
-
 const Threading::Thread* GPU_HW::GetSWThread() const
 {
   return m_sw_renderer ? m_sw_renderer->GetThread() : nullptr;
@@ -247,7 +234,8 @@ void GPU_HW::Reset(bool clear_vram)
 {
   GPU::Reset(clear_vram);
 
-  m_batch_current_vertex_ptr = m_batch_start_vertex_ptr;
+  if (m_batch_vertex_ptr)
+    UnmapGPUBuffer(0, 0);
 
   if (m_sw_renderer)
     m_sw_renderer->Reset();
@@ -305,7 +293,8 @@ bool GPU_HW::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_di
   // invalidate the whole VRAM read texture when loading state
   if (sw.IsReading())
   {
-    m_batch_current_vertex_ptr = m_batch_start_vertex_ptr;
+    if (m_batch_vertex_ptr)
+      UnmapGPUBuffer(0, 0);
     SetFullVRAMDirtyRectangle();
     ResetBatchVertexDepth();
   }
@@ -1373,29 +1362,36 @@ void GPU_HW::SetScissor()
   g_gpu_device->SetScissor(left, top, right - left, bottom - top);
 }
 
-void GPU_HW::MapBatchVertexPointer(u32 required_vertices)
+void GPU_HW::MapGPUBuffer(u32 required_vertices, u32 required_indices)
 {
-  DebugAssert(!m_batch_start_vertex_ptr);
+  DebugAssert(!m_batch_vertex_ptr && !m_batch_index_ptr);
 
-  void* map;
-  u32 space;
-  g_gpu_device->MapVertexBuffer(sizeof(BatchVertex), required_vertices, &map, &space, &m_batch_base_vertex);
+  void* vb_map;
+  u32 vb_space;
+  g_gpu_device->MapVertexBuffer(sizeof(BatchVertex), required_vertices, &vb_map, &vb_space, &m_batch_base_vertex);
+  m_batch_vertex_ptr = static_cast<BatchVertex*>(vb_map);
+  m_batch_vertex_space = Truncate16(std::min<u32>(vb_space, std::numeric_limits<u16>::max()));
 
-  m_batch_start_vertex_ptr = static_cast<BatchVertex*>(map);
-  m_batch_current_vertex_ptr = m_batch_start_vertex_ptr;
-  m_batch_end_vertex_ptr = m_batch_start_vertex_ptr + space;
+  u32 ib_space;
+  g_gpu_device->MapIndexBuffer(required_indices, &m_batch_index_ptr, &ib_space, &m_batch_base_index);
+  m_batch_index_space = Truncate16(std::min<u32>(ib_space, std::numeric_limits<u16>::max()));
 }
 
-void GPU_HW::UnmapBatchVertexPointer(u32 used_vertices)
+void GPU_HW::UnmapGPUBuffer(u32 used_vertices, u32 used_indices)
 {
-  DebugAssert(m_batch_start_vertex_ptr);
+  DebugAssert(m_batch_vertex_ptr && m_batch_index_ptr);
   g_gpu_device->UnmapVertexBuffer(sizeof(BatchVertex), used_vertices);
-  m_batch_start_vertex_ptr = nullptr;
-  m_batch_end_vertex_ptr = nullptr;
-  m_batch_current_vertex_ptr = nullptr;
+  g_gpu_device->UnmapIndexBuffer(used_indices);
+  m_batch_vertex_ptr = nullptr;
+  m_batch_vertex_count = 0;
+  m_batch_vertex_space = 0;
+  m_batch_index_ptr = nullptr;
+  m_batch_index_count = 0;
+  m_batch_index_space = 0;
 }
 
-void GPU_HW::DrawBatchVertices(BatchRenderMode render_mode, u32 num_vertices, u32 base_vertex)
+ALWAYS_INLINE_RELEASE void GPU_HW::DrawBatchVertices(BatchRenderMode render_mode, u32 num_indices, u32 base_index,
+                                                     u32 base_vertex)
 {
   // [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing]
   const u8 depth_test = m_batch.use_depth_buffer ? static_cast<u8>(2) : BoolToUInt8(m_batch.check_mask_before_draw);
@@ -1403,7 +1399,7 @@ void GPU_HW::DrawBatchVertices(BatchRenderMode render_mode, u32 num_vertices, u3
     m_batch_pipelines[depth_test][static_cast<u8>(render_mode)][static_cast<u8>(m_batch.texture_mode)][static_cast<u8>(
       m_batch.transparency_mode)][BoolToUInt8(m_batch.dithering)][BoolToUInt8(m_batch.interlacing)]
       .get());
-  g_gpu_device->Draw(num_vertices, base_vertex);
+  g_gpu_device->DrawIndexed(num_indices, base_index, base_vertex);
 }
 
 void GPU_HW::ClearDisplay()
@@ -1529,7 +1525,7 @@ void GPU_HW::SetBatchDepthBuffer(bool enabled)
   if (m_batch.use_depth_buffer == enabled)
     return;
 
-  if (GetBatchVertexCount() > 0)
+  if (m_batch_index_count > 0)
   {
     FlushRender();
     EnsureVertexBufferSpaceForCurrentCommand();
@@ -1549,7 +1545,7 @@ void GPU_HW::CheckForDepthClear(const BatchVertex* vertices, u32 num_vertices)
 
   if ((average_z - m_last_depth_z) >= g_settings.gpu_pgxp_depth_clear_threshold)
   {
-    if (GetBatchVertexCount() > 0)
+    if (m_batch_index_count > 0)
     {
       FlushRender();
       EnsureVertexBufferSpaceForCurrentCommand();
@@ -1576,16 +1572,17 @@ u32 GPU_HW::GetAdaptiveDownsamplingMipLevels() const
 
 void GPU_HW::DrawLine(float x0, float y0, u32 col0, float x1, float y1, u32 col1, float depth)
 {
+  DebugAssert(m_batch_vertex_space >= 4 && m_batch_index_space >= 6);
+
   const float dx = x1 - x0;
   const float dy = y1 - y0;
-  std::array<BatchVertex, 4> output;
   if (dx == 0.0f && dy == 0.0f)
   {
     // Degenerate, render a point.
-    output[0].Set(x0, y0, depth, 1.0f, col0, 0, 0, 0);
-    output[1].Set(x0 + 1.0f, y0, depth, 1.0f, col0, 0, 0, 0);
-    output[2].Set(x1, y1 + 1.0f, depth, 1.0f, col0, 0, 0, 0);
-    output[3].Set(x1 + 1.0f, y1 + 1.0f, depth, 1.0f, col0, 0, 0, 0);
+    (m_batch_vertex_ptr++)->Set(x0, y0, depth, 1.0f, col0, 0, 0, 0);
+    (m_batch_vertex_ptr++)->Set(x0 + 1.0f, y0, depth, 1.0f, col0, 0, 0, 0);
+    (m_batch_vertex_ptr++)->Set(x1, y1 + 1.0f, depth, 1.0f, col0, 0, 0, 0);
+    (m_batch_vertex_ptr++)->Set(x1 + 1.0f, y1 + 1.0f, depth, 1.0f, col0, 0, 0, 0);
   }
   else
   {
@@ -1649,18 +1646,24 @@ void GPU_HW::DrawLine(float x0, float y0, u32 col0, float x1, float y1, u32 col1
     const float ox1 = x1 + pad_x1;
     const float oy1 = y1 + pad_y1;
 
-    output[0].Set(ox0, oy0, depth, 1.0f, col0, 0, 0, 0);
-    output[1].Set(ox0 + fill_dx, oy0 + fill_dy, depth, 1.0f, col0, 0, 0, 0);
-    output[2].Set(ox1, oy1, depth, 1.0f, col1, 0, 0, 0);
-    output[3].Set(ox1 + fill_dx, oy1 + fill_dy, depth, 1.0f, col1, 0, 0, 0);
+    (m_batch_vertex_ptr++)->Set(ox0, oy0, depth, 1.0f, col0, 0, 0, 0);
+    (m_batch_vertex_ptr++)->Set(ox0 + fill_dx, oy0 + fill_dy, depth, 1.0f, col0, 0, 0, 0);
+    (m_batch_vertex_ptr++)->Set(ox1, oy1, depth, 1.0f, col1, 0, 0, 0);
+    (m_batch_vertex_ptr++)->Set(ox1 + fill_dx, oy1 + fill_dy, depth, 1.0f, col1, 0, 0, 0);
   }
 
-  AddVertex(output[0]);
-  AddVertex(output[1]);
-  AddVertex(output[2]);
-  AddVertex(output[3]);
-  AddVertex(output[2]);
-  AddVertex(output[1]);
+  const u32 start_index = m_batch_vertex_count;
+  m_batch_vertex_count += 4;
+  m_batch_vertex_space -= 4;
+
+  *(m_batch_index_ptr++) = Truncate16(start_index + 0);
+  *(m_batch_index_ptr++) = Truncate16(start_index + 1);
+  *(m_batch_index_ptr++) = Truncate16(start_index + 2);
+  *(m_batch_index_ptr++) = Truncate16(start_index + 3);
+  *(m_batch_index_ptr++) = Truncate16(start_index + 2);
+  *(m_batch_index_ptr++) = Truncate16(start_index + 1);
+  m_batch_index_count += 6;
+  m_batch_index_space -= 6;
 }
 
 void GPU_HW::LoadVertices()
@@ -1676,8 +1679,6 @@ void GPU_HW::LoadVertices()
   {
     case GPUPrimitive::Polygon:
     {
-      DebugAssert(GetBatchVertexSpace() >= (rc.quad_polygon ? 6u : 3u));
-
       const u32 first_color = rc.color_for_first_vertex;
       const bool shaded = rc.shading_enable;
       const bool textured = rc.texture_enable;
@@ -1732,9 +1733,27 @@ void GPU_HW::LoadVertices()
       if (m_compute_uv_range && textured)
         ComputePolygonUVLimits(texpage, vertices.data(), num_vertices);
 
-      if (!IsDrawingAreaIsValid())
+      if (!IsDrawingAreaIsValid()) [[unlikely]]
         return;
 
+      const u32 start_index = m_batch_vertex_count;
+      if (rc.quad_polygon)
+      {
+        DebugAssert(m_batch_vertex_space >= 4);
+        std::memcpy(m_batch_vertex_ptr, vertices.data(), sizeof(BatchVertex) * 4);
+        m_batch_vertex_ptr += 4;
+        m_batch_vertex_count += 4;
+        m_batch_vertex_space -= 4;
+      }
+      else
+      {
+        DebugAssert(m_batch_vertex_space >= 3);
+        std::memcpy(m_batch_vertex_ptr, vertices.data(), sizeof(BatchVertex) * 3);
+        m_batch_vertex_ptr += 3;
+        m_batch_vertex_count += 3;
+        m_batch_vertex_space -= 3;
+      }
+
       // Cull polygons which are too large.
       const auto [min_x_12, max_x_12] = MinMax(native_vertex_positions[1][0], native_vertex_positions[2][0]);
       const auto [min_y_12, max_y_12] = MinMax(native_vertex_positions[1][1], native_vertex_positions[2][1]);
@@ -1763,8 +1782,12 @@ void GPU_HW::LoadVertices()
                              native_vertex_positions[2][0], native_vertex_positions[2][1], rc.shading_enable,
                              rc.texture_enable, rc.transparency_enable);
 
-        std::memcpy(m_batch_current_vertex_ptr, vertices.data(), sizeof(BatchVertex) * 3);
-        m_batch_current_vertex_ptr += 3;
+        DebugAssert(m_batch_index_space >= 3);
+        *(m_batch_index_ptr++) = Truncate16(start_index);
+        *(m_batch_index_ptr++) = Truncate16(start_index + 1);
+        *(m_batch_index_ptr++) = Truncate16(start_index + 2);
+        m_batch_index_count += 3;
+        m_batch_index_space -= 3;
       }
 
       // quads
@@ -1797,9 +1820,12 @@ void GPU_HW::LoadVertices()
                                native_vertex_positions[3][0], native_vertex_positions[3][1], rc.shading_enable,
                                rc.texture_enable, rc.transparency_enable);
 
-          AddVertex(vertices[2]);
-          AddVertex(vertices[1]);
-          AddVertex(vertices[3]);
+          DebugAssert(m_batch_index_space >= 3);
+          *(m_batch_index_ptr++) = Truncate16(start_index + 2);
+          *(m_batch_index_ptr++) = Truncate16(start_index + 1);
+          *(m_batch_index_ptr++) = Truncate16(start_index + 3);
+          m_batch_index_count += 3;
+          m_batch_index_space -= 3;
         }
       }
 
@@ -1808,7 +1834,8 @@ void GPU_HW::LoadVertices()
         GPUBackendDrawPolygonCommand* cmd = m_sw_renderer->NewDrawPolygonCommand(num_vertices);
         FillDrawCommand(cmd, rc);
 
-        for (u32 i = 0; i < num_vertices; i++)
+        const u32 sw_num_vertices = rc.quad_polygon ? 4 : 3;
+        for (u32 i = 0; i < sw_num_vertices; i++)
         {
           GPUBackendDrawPolygonCommand::Vertex* vert = &cmd->vertices[i];
           vert->x = native_vertex_positions[i][0];
@@ -1863,12 +1890,13 @@ void GPU_HW::LoadVertices()
         break;
       }
 
-      if (!IsDrawingAreaIsValid())
+      if (!IsDrawingAreaIsValid()) [[unlikely]]
         return;
 
       // we can split the rectangle up into potentially 8 quads
       SetBatchDepthBuffer(false);
-      DebugAssert(GetBatchVertexSpace() >= MAX_VERTICES_FOR_RECTANGLE);
+      DebugAssert(m_batch_vertex_space >= MAX_VERTICES_FOR_RECTANGLE &&
+                  m_batch_index_space >= MAX_VERTICES_FOR_RECTANGLE);
 
       // Split the rectangle into multiple quads if it's greater than 256x256, as the texture page should repeat.
       u16 tex_top = orig_tex_top;
@@ -1890,13 +1918,26 @@ void GPU_HW::LoadVertices()
 
           CheckForTexPageOverlap(texpage, tex_left, tex_top, tex_right - 1, tex_bottom - 1);
 
-          AddNewVertex(quad_start_x, quad_start_y, depth, 1.0f, color, texpage, tex_left, tex_top, uv_limits);
-          AddNewVertex(quad_end_x, quad_start_y, depth, 1.0f, color, texpage, tex_right, tex_top, uv_limits);
-          AddNewVertex(quad_start_x, quad_end_y, depth, 1.0f, color, texpage, tex_left, tex_bottom, uv_limits);
+          const u32 base_vertex = m_batch_vertex_count;
+          (m_batch_vertex_ptr++)
+            ->Set(quad_start_x, quad_start_y, depth, 1.0f, color, texpage, tex_left, tex_top, uv_limits);
+          (m_batch_vertex_ptr++)
+            ->Set(quad_end_x, quad_start_y, depth, 1.0f, color, texpage, tex_right, tex_top, uv_limits);
+          (m_batch_vertex_ptr++)
+            ->Set(quad_start_x, quad_end_y, depth, 1.0f, color, texpage, tex_left, tex_bottom, uv_limits);
+          (m_batch_vertex_ptr++)
+            ->Set(quad_end_x, quad_end_y, depth, 1.0f, color, texpage, tex_right, tex_bottom, uv_limits);
+          m_batch_vertex_count += 4;
+          m_batch_vertex_space -= 4;
 
-          AddNewVertex(quad_start_x, quad_end_y, depth, 1.0f, color, texpage, tex_left, tex_bottom, uv_limits);
-          AddNewVertex(quad_end_x, quad_start_y, depth, 1.0f, color, texpage, tex_right, tex_top, uv_limits);
-          AddNewVertex(quad_end_x, quad_end_y, depth, 1.0f, color, texpage, tex_right, tex_bottom, uv_limits);
+          *(m_batch_index_ptr++) = Truncate16(base_vertex + 0);
+          *(m_batch_index_ptr++) = Truncate16(base_vertex + 1);
+          *(m_batch_index_ptr++) = Truncate16(base_vertex + 2);
+          *(m_batch_index_ptr++) = Truncate16(base_vertex + 2);
+          *(m_batch_index_ptr++) = Truncate16(base_vertex + 1);
+          *(m_batch_index_ptr++) = Truncate16(base_vertex + 3);
+          m_batch_index_count += 6;
+          m_batch_index_space -= 6;
 
           x_offset += quad_width;
           tex_left = 0;
@@ -1937,7 +1978,7 @@ void GPU_HW::LoadVertices()
 
       if (!rc.polyline)
       {
-        DebugAssert(GetBatchVertexSpace() >= 2);
+        DebugAssert(m_batch_vertex_space >= 4 && m_batch_index_space >= 6);
 
         u32 start_color, end_color;
         GPUVertexPosition start_pos, end_pos;
@@ -1955,7 +1996,7 @@ void GPU_HW::LoadVertices()
           end_pos.bits = FifoPop();
         }
 
-        if (!IsDrawingAreaIsValid())
+        if (!IsDrawingAreaIsValid()) [[unlikely]]
           return;
 
         s32 start_x = start_pos.x + m_drawing_offset.x;
@@ -1996,9 +2037,9 @@ void GPU_HW::LoadVertices()
       {
         // Multiply by two because we don't use line strips.
         const u32 num_vertices = GetPolyLineVertexCount();
-        DebugAssert(GetBatchVertexSpace() >= (num_vertices * 2));
+        DebugAssert(m_batch_vertex_space >= (num_vertices * 4) && m_batch_index_space >= (num_vertices * 6));
 
-        if (!IsDrawingAreaIsValid())
+        if (!IsDrawingAreaIsValid()) [[unlikely]]
           return;
 
         const bool shaded = rc.shading_enable;
@@ -2181,7 +2222,7 @@ ALWAYS_INLINE_RELEASE void GPU_HW::CheckForTexPageOverlap(u32 texpage, u32 min_u
 
     if (update_drawn || update_written)
     {
-      if (GetBatchVertexCount() > 0)
+      if (m_batch_index_count > 0)
       {
         FlushRender();
         EnsureVertexBufferSpaceForCurrentCommand();
@@ -2194,7 +2235,7 @@ ALWAYS_INLINE_RELEASE void GPU_HW::CheckForTexPageOverlap(u32 texpage, u32 min_u
 
 ALWAYS_INLINE bool GPU_HW::IsFlushed() const
 {
-  return m_batch_current_vertex_ptr == m_batch_start_vertex_ptr;
+  return (m_batch_index_count == 0);
 }
 
 GPU_HW::InterlacedRenderMode GPU_HW::GetInterlacedRenderMode() const
@@ -2228,44 +2269,44 @@ ALWAYS_INLINE_RELEASE bool GPU_HW::NeedsShaderBlending(GPUTransparencyMode trans
             (transparency != GPUTransparencyMode::Disabled || IsBlendedTextureFiltering(m_texture_filtering)))));
 }
 
-ALWAYS_INLINE u32 GPU_HW::GetBatchVertexSpace() const
+void GPU_HW::EnsureVertexBufferSpace(u32 required_vertices, u32 required_indices)
 {
-  return static_cast<u32>(m_batch_end_vertex_ptr - m_batch_current_vertex_ptr);
-}
-
-ALWAYS_INLINE u32 GPU_HW::GetBatchVertexCount() const
-{
-  return static_cast<u32>(m_batch_current_vertex_ptr - m_batch_start_vertex_ptr);
-}
-
-void GPU_HW::EnsureVertexBufferSpace(u32 required_vertices)
-{
-  if (m_batch_current_vertex_ptr)
+  if (m_batch_vertex_ptr)
   {
-    if (GetBatchVertexSpace() >= required_vertices)
+    if (m_batch_vertex_space >= required_vertices && m_batch_index_space >= required_indices)
       return;
 
     FlushRender();
   }
 
-  MapBatchVertexPointer(required_vertices);
+  MapGPUBuffer(required_vertices, required_indices);
 }
 
 void GPU_HW::EnsureVertexBufferSpaceForCurrentCommand()
 {
   u32 required_vertices;
+  u32 required_indices;
   switch (m_render_command.primitive)
   {
     case GPUPrimitive::Polygon:
-      required_vertices = m_render_command.quad_polygon ? 6 : 3;
+      required_vertices = 4; // assume quad, in case of expansion
+      required_indices = 6;
       break;
     case GPUPrimitive::Rectangle:
-      required_vertices = MAX_VERTICES_FOR_RECTANGLE;
+      required_vertices = MAX_VERTICES_FOR_RECTANGLE; // TODO: WRong
+      required_indices = MAX_VERTICES_FOR_RECTANGLE;
       break;
     case GPUPrimitive::Line:
+    {
+      // assume expansion
+      const u32 vert_count = m_render_command.polyline ? GetPolyLineVertexCount() : 2;
+      required_vertices = vert_count * 4;
+      required_indices = vert_count * 6;
+    }
+    break;
+
     default:
-      required_vertices = m_render_command.polyline ? (GetPolyLineVertexCount() * 6u) : 6u;
-      break;
+      UnreachableCode();
   }
 
   // can we fit these vertices in the current depth buffer range?
@@ -2273,16 +2314,11 @@ void GPU_HW::EnsureVertexBufferSpaceForCurrentCommand()
   {
     // implies FlushRender()
     ResetBatchVertexDepth();
-  }
-  else if (m_batch_current_vertex_ptr)
-  {
-    if (GetBatchVertexSpace() >= required_vertices)
-      return;
-
-    FlushRender();
+    MapGPUBuffer(required_vertices, required_indices);
+    return;
   }
 
-  MapBatchVertexPointer(required_vertices);
+  EnsureVertexBufferSpace(required_vertices, required_indices);
 }
 
 void GPU_HW::ResetBatchVertexDepth()
@@ -2769,7 +2805,7 @@ void GPU_HW::DispatchRenderCommand()
 
   EnsureVertexBufferSpaceForCurrentCommand();
 
-  if (GetBatchVertexCount() == 0)
+  if (m_batch_index_count == 0)
   {
     // transparency mode change
     if (transparency_mode != GPUTransparencyMode::Disabled &&
@@ -2842,14 +2878,13 @@ void GPU_HW::DispatchRenderCommand()
 
 void GPU_HW::FlushRender()
 {
-  if (!m_batch_current_vertex_ptr)
+  if (m_batch_index_count == 0)
     return;
 
-  const u32 vertex_count = GetBatchVertexCount();
-  UnmapBatchVertexPointer(vertex_count);
-
-  if (vertex_count == 0)
-    return;
+  const u32 base_vertex = m_batch_base_vertex;
+  const u32 base_index = m_batch_base_index;
+  const u32 index_count = m_batch_index_count;
+  UnmapGPUBuffer(m_batch_vertex_count, m_batch_index_count);
 
 #ifdef _DEBUG
   GL_SCOPE_FMT("Hardware Draw {}", ++s_draw_number);
@@ -2870,19 +2905,19 @@ void GPU_HW::FlushRender()
   {
     if (NeedsTwoPassRendering())
     {
-      DrawBatchVertices(BatchRenderMode::OnlyOpaque, vertex_count, m_batch_base_vertex);
-      DrawBatchVertices(BatchRenderMode::OnlyTransparent, vertex_count, m_batch_base_vertex);
+      DrawBatchVertices(BatchRenderMode::OnlyOpaque, index_count, base_index, base_vertex);
+      DrawBatchVertices(BatchRenderMode::OnlyTransparent, index_count, base_index, base_vertex);
     }
     else
     {
-      DrawBatchVertices(m_batch.GetRenderMode(), vertex_count, m_batch_base_vertex);
+      DrawBatchVertices(m_batch.GetRenderMode(), index_count, base_index, base_vertex);
     }
   }
 
   if (m_wireframe_mode != GPUWireframeMode::Disabled)
   {
     g_gpu_device->SetPipeline(m_wireframe_pipeline.get());
-    g_gpu_device->Draw(vertex_count, m_batch_base_vertex);
+    g_gpu_device->DrawIndexed(index_count, base_index, base_vertex);
   }
 }
 
diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h
index b0d34746c..e0917d416 100644
--- a/src/core/gpu_hw.h
+++ b/src/core/gpu_hw.h
@@ -132,11 +132,6 @@ private:
 
   void LoadVertices();
 
-  void AddVertex(const BatchVertex& v);
-
-  template<typename... Args>
-  void AddNewVertex(Args&&... args);
-
   void PrintSettingsToLog();
   void CheckSettings();
 
@@ -144,9 +139,9 @@ private:
   void UpdateDepthBufferFromMaskBit();
   void ClearDepthBuffer();
   void SetScissor();
-  void MapBatchVertexPointer(u32 required_vertices);
-  void UnmapBatchVertexPointer(u32 used_vertices);
-  void DrawBatchVertices(BatchRenderMode render_mode, u32 num_vertices, u32 base_vertex);
+  void MapGPUBuffer(u32 required_vertices, u32 required_indices);
+  void UnmapGPUBuffer(u32 used_vertices, u32 used_indices);
+  void DrawBatchVertices(BatchRenderMode render_mode, u32 num_indices, u32 base_index, u32 base_vertex);
 
   u32 CalculateResolutionScale() const;
   GPUDownsampleMode GetDownsampleMode(u32 resolution_scale) const;
@@ -160,9 +155,7 @@ private:
   void CheckForTexPageOverlap(u32 texpage, u32 min_u, u32 min_v, u32 max_u, u32 max_v);
 
   bool IsFlushed() const;
-  u32 GetBatchVertexSpace() const;
-  u32 GetBatchVertexCount() const;
-  void EnsureVertexBufferSpace(u32 required_vertices);
+  void EnsureVertexBufferSpace(u32 required_vertices, u32 required_indices);
   void EnsureVertexBufferSpaceForCurrentCommand();
   void ResetBatchVertexDepth();
 
@@ -225,10 +218,14 @@ private:
 
   std::unique_ptr<GPU_SW_Backend> m_sw_renderer;
 
-  BatchVertex* m_batch_start_vertex_ptr = nullptr;
-  BatchVertex* m_batch_end_vertex_ptr = nullptr;
-  BatchVertex* m_batch_current_vertex_ptr = nullptr;
+  BatchVertex* m_batch_vertex_ptr = nullptr;
+  u16* m_batch_index_ptr = nullptr;
   u32 m_batch_base_vertex = 0;
+  u32 m_batch_base_index = 0;
+  u16 m_batch_vertex_count = 0;
+  u16 m_batch_index_count = 0;
+  u16 m_batch_vertex_space = 0;
+  u16 m_batch_index_space = 0;
   s32 m_current_depth = 0;
   float m_last_depth_z = 1.0f;