From 8f9f039665de62cbca2175f1343997fe9b861b71 Mon Sep 17 00:00:00 2001
From: Connor McLaughlin <stenzek@gmail.com>
Date: Sun, 20 Sep 2020 21:33:24 +1000
Subject: [PATCH] GPU/SW: Port triangle/line rasterizer from Mednafen

---
 src/core/gpu_sw.cpp | 854 ++++++++++++++++++++++++++++----------------
 src/core/gpu_sw.h   |  63 +++-
 2 files changed, 589 insertions(+), 328 deletions(-)

diff --git a/src/core/gpu_sw.cpp b/src/core/gpu_sw.cpp
index 1a4503743..93804694a 100644
--- a/src/core/gpu_sw.cpp
+++ b/src/core/gpu_sw.cpp
@@ -231,22 +231,22 @@ void GPU_SW::DispatchRenderCommand()
       {
         SWVertex& vert = vertices[i];
         const u32 color_rgb = (shaded && i > 0) ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color;
-        vert.color_r = Truncate8(color_rgb);
-        vert.color_g = Truncate8(color_rgb >> 8);
-        vert.color_b = Truncate8(color_rgb >> 16);
+        vert.r = Truncate8(color_rgb);
+        vert.g = Truncate8(color_rgb >> 8);
+        vert.b = Truncate8(color_rgb >> 16);
 
         const VertexPosition vp{FifoPop()};
-        vert.x = vp.x;
-        vert.y = vp.y;
+        vert.x = TruncateVertexPosition(m_drawing_offset.x + vp.x);
+        vert.y = TruncateVertexPosition(m_drawing_offset.y + vp.y);
 
         if (textured)
         {
-          std::tie(vert.texcoord_x, vert.texcoord_y) = UnpackTexcoord(Truncate16(FifoPop()));
+          std::tie(vert.u, vert.v) = UnpackTexcoord(Truncate16(FifoPop()));
         }
         else
         {
-          vert.texcoord_x = 0;
-          vert.texcoord_y = 0;
+          vert.u = 0;
+          vert.v = 0;
         }
       }
 
@@ -324,7 +324,8 @@ void GPU_SW::DispatchRenderCommand()
       // first vertex
       SWVertex* p0 = &vertices[0];
       SWVertex* p1 = &vertices[1];
-      p0->SetPosition(VertexPosition{rc.polyline ? m_blit_buffer[buffer_pos++] : Truncate32(FifoPop())});
+      p0->SetPosition(VertexPosition{rc.polyline ? m_blit_buffer[buffer_pos++] : Truncate32(FifoPop())},
+                      m_drawing_offset.x, m_drawing_offset.y);
       p0->SetColorRGB24(first_color);
 
       // remaining vertices in line strip
@@ -334,12 +335,12 @@ void GPU_SW::DispatchRenderCommand()
         if (rc.polyline)
         {
           p1->SetColorRGB24(shaded ? (m_blit_buffer[buffer_pos++] & UINT32_C(0x00FFFFFF)) : first_color);
-          p1->SetPosition(VertexPosition{m_blit_buffer[buffer_pos++]});
+          p1->SetPosition(VertexPosition{m_blit_buffer[buffer_pos++]}, m_drawing_offset.x, m_drawing_offset.y);
         }
         else
         {
           p1->SetColorRGB24(shaded ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color);
-          p1->SetPosition(VertexPosition{Truncate32(FifoPop())});
+          p1->SetPosition(VertexPosition{Truncate32(FifoPop())}, m_drawing_offset.x, m_drawing_offset.y);
         }
 
         // down here because of the FIFO pops
@@ -358,222 +359,6 @@ void GPU_SW::DispatchRenderCommand()
   }
 }
 
-enum : u32
-{
-  COORD_FRAC_BITS = 32,
-  COLOR_FRAC_BITS = 12
-};
-
-using FixedPointCoord = u64;
-
-constexpr FixedPointCoord IntToFixedCoord(s32 x)
-{
-  return (ZeroExtend64(static_cast<u32>(x)) << COORD_FRAC_BITS) | (ZeroExtend64(1u) << (COORD_FRAC_BITS - 1));
-}
-
-using FixedPointColor = u32;
-
-constexpr FixedPointColor IntToFixedColor(u8 r)
-{
-  return ZeroExtend32(r) << COLOR_FRAC_BITS | (1u << (COLOR_FRAC_BITS - 1));
-}
-
-constexpr u8 FixedColorToInt(FixedPointColor r)
-{
-  return Truncate8(r >> 12);
-}
-
-bool GPU_SW::IsClockwiseWinding(const SWVertex* v0, const SWVertex* v1, const SWVertex* v2)
-{
-  const s32 abx = v1->x - v0->x;
-  const s32 aby = v1->y - v0->y;
-  const s32 acx = v2->x - v0->x;
-  const s32 acy = v2->y - v0->y;
-  return ((abx * acy) - (aby * acx) < 0);
-}
-
-static constexpr bool IsTopLeftEdge(s32 ex, s32 ey)
-{
-  return (ey < 0 || (ey == 0 && ex < 0));
-}
-
-static constexpr u8 Interpolate(u8 v0, u8 v1, u8 v2, s32 w0, s32 w1, s32 w2, s32 ws, s32 half_ws)
-{
-  const s32 v = w0 * static_cast<s32>(static_cast<u32>(v0)) + w1 * static_cast<s32>(static_cast<u32>(v1)) +
-                w2 * static_cast<s32>(static_cast<u32>(v2));
-  const s32 vd = (v + half_ws) / ws;
-  return (vd < 0) ? 0 : ((vd > 0xFF) ? 0xFF : static_cast<u8>(vd));
-}
-
-template<bool shading_enable, bool texture_enable, bool raw_texture_enable, bool transparency_enable,
-         bool dithering_enable>
-void GPU_SW::DrawTriangle(const SWVertex* v0, const SWVertex* v1, const SWVertex* v2)
-{
-#define orient2d(ax, ay, bx, by, cx, cy) ((bx - ax) * (cy - ay) - (by - ay) * (cx - ax))
-
-  // ensure the vertices follow a counter-clockwise order
-  if (IsClockwiseWinding(v0, v1, v2))
-    std::swap(v1, v2);
-
-  const s32 px0 = v0->x + m_drawing_offset.x;
-  const s32 py0 = v0->y + m_drawing_offset.y;
-  const s32 px1 = v1->x + m_drawing_offset.x;
-  const s32 py1 = v1->y + m_drawing_offset.y;
-  const s32 px2 = v2->x + m_drawing_offset.x;
-  const s32 py2 = v2->y + m_drawing_offset.y;
-
-  // Barycentric coordinates at minX/minY corner
-  const s32 ws = orient2d(px0, py0, px1, py1, px2, py2);
-  const s32 half_ws = std::max<s32>((ws / 2) - 1, 0);
-  if (ws == 0)
-    return;
-
-  // compute bounding box of triangle
-  s32 min_x = std::min(px0, std::min(px1, px2));
-  s32 max_x = std::max(px0, std::max(px1, px2));
-  s32 min_y = std::min(py0, std::min(py1, py2));
-  s32 max_y = std::max(py0, std::max(py1, py2));
-
-  // reject triangles which cover the whole vram area
-  if (static_cast<u32>(max_x - min_x) > MAX_PRIMITIVE_WIDTH || static_cast<u32>(max_y - min_y) > MAX_PRIMITIVE_HEIGHT)
-    return;
-
-  // clip to drawing area
-  min_x = std::clamp(min_x, static_cast<s32>(m_drawing_area.left), static_cast<s32>(m_drawing_area.right));
-  max_x = std::clamp(max_x, static_cast<s32>(m_drawing_area.left), static_cast<s32>(m_drawing_area.right));
-  min_y = std::clamp(min_y, static_cast<s32>(m_drawing_area.top), static_cast<s32>(m_drawing_area.bottom));
-  max_y = std::clamp(max_y, static_cast<s32>(m_drawing_area.top), static_cast<s32>(m_drawing_area.bottom));
-  AddDrawTriangleTicks(max_x - min_x + 1, max_y - min_y + 1, shading_enable, texture_enable, transparency_enable);
-
-  // compute per-pixel increments
-  const s32 a01 = py0 - py1, b01 = px1 - px0;
-  const s32 a12 = py1 - py2, b12 = px2 - px1;
-  const s32 a20 = py2 - py0, b20 = px0 - px2;
-
-  // top-left edge rule
-  const s32 w0_bias = 0 - s32(IsTopLeftEdge(b12, a12));
-  const s32 w1_bias = 0 - s32(IsTopLeftEdge(b20, a20));
-  const s32 w2_bias = 0 - s32(IsTopLeftEdge(b01, a01));
-
-  // compute base barycentric coordinates
-  s32 w0 = orient2d(px1, py1, px2, py2, min_x, min_y);
-  s32 w1 = orient2d(px2, py2, px0, py0, min_x, min_y);
-  s32 w2 = orient2d(px0, py0, px1, py1, min_x, min_y);
-
-  // *exclusive* of max coordinate in PSX
-  for (s32 y = min_y; y <= max_y; y++)
-  {
-    s32 row_w0 = w0;
-    s32 row_w1 = w1;
-    s32 row_w2 = w2;
-
-    for (s32 x = min_x; x <= max_x; x++)
-    {
-      if (((row_w0 + w0_bias) | (row_w1 + w1_bias) | (row_w2 + w2_bias)) >= 0)
-      {
-        const s32 b0 = row_w0;
-        const s32 b1 = row_w1;
-        const s32 b2 = row_w2;
-
-        const u8 r =
-          shading_enable ? Interpolate(v0->color_r, v1->color_r, v2->color_r, b0, b1, b2, ws, half_ws) : v0->color_r;
-        const u8 g =
-          shading_enable ? Interpolate(v0->color_g, v1->color_g, v2->color_g, b0, b1, b2, ws, half_ws) : v0->color_g;
-        const u8 b =
-          shading_enable ? Interpolate(v0->color_b, v1->color_b, v2->color_b, b0, b1, b2, ws, half_ws) : v0->color_b;
-
-        const u8 texcoord_x = Interpolate(v0->texcoord_x, v1->texcoord_x, v2->texcoord_x, b0, b1, b2, ws, half_ws);
-        const u8 texcoord_y = Interpolate(v0->texcoord_y, v1->texcoord_y, v2->texcoord_y, b0, b1, b2, ws, half_ws);
-
-        ShadePixel<texture_enable, raw_texture_enable, transparency_enable, dithering_enable>(
-          static_cast<u32>(x), static_cast<u32>(y), r, g, b, texcoord_x, texcoord_y);
-      }
-
-      row_w0 += a12;
-      row_w1 += a20;
-      row_w2 += a01;
-    }
-
-    w0 += b12;
-    w1 += b20;
-    w2 += b01;
-  }
-
-#undef orient2d
-}
-
-GPU_SW::DrawTriangleFunction GPU_SW::GetDrawTriangleFunction(bool shading_enable, bool texture_enable,
-                                                             bool raw_texture_enable, bool transparency_enable,
-                                                             bool dithering_enable)
-{
-#define F(SHADING, TEXTURE, RAW_TEXTURE, TRANSPARENCY, DITHERING)                                                      \
-  &GPU_SW::DrawTriangle<SHADING, TEXTURE, RAW_TEXTURE, TRANSPARENCY, DITHERING>
-
-  static constexpr DrawTriangleFunction funcs[2][2][2][2][2] = {
-    {{{{F(false, false, false, false, false), F(false, false, false, false, true)},
-       {F(false, false, false, true, false), F(false, false, false, true, true)}},
-      {{F(false, false, true, false, false), F(false, false, true, false, true)},
-       {F(false, false, true, true, false), F(false, false, true, true, true)}}},
-     {{{F(false, true, false, false, false), F(false, true, false, false, true)},
-       {F(false, true, false, true, false), F(false, true, false, true, true)}},
-      {{F(false, true, true, false, false), F(false, true, true, false, true)},
-       {F(false, true, true, true, false), F(false, true, true, true, true)}}}},
-    {{{{F(true, false, false, false, false), F(true, false, false, false, true)},
-       {F(true, false, false, true, false), F(true, false, false, true, true)}},
-      {{F(true, false, true, false, false), F(true, false, true, false, true)},
-       {F(true, false, true, true, false), F(true, false, true, true, true)}}},
-     {{{F(true, true, false, false, false), F(true, true, false, false, true)},
-       {F(true, true, false, true, false), F(true, true, false, true, true)}},
-      {{F(true, true, true, false, false), F(true, true, true, false, true)},
-       {F(true, true, true, true, false), F(true, true, true, true, true)}}}}};
-
-#undef F
-
-  return funcs[u8(shading_enable)][u8(texture_enable)][u8(raw_texture_enable)][u8(transparency_enable)]
-              [u8(dithering_enable)];
-}
-
-template<bool texture_enable, bool raw_texture_enable, bool transparency_enable>
-void GPU_SW::DrawRectangle(s32 origin_x, s32 origin_y, u32 width, u32 height, u8 r, u8 g, u8 b, u8 origin_texcoord_x,
-                           u8 origin_texcoord_y)
-{
-  const s32 start_x = TruncateVertexPosition(m_drawing_offset.x + origin_x);
-  const s32 start_y = TruncateVertexPosition(m_drawing_offset.y + origin_y);
-
-  {
-    const u32 clip_left = static_cast<u32>(std::clamp<s32>(start_x, m_drawing_area.left, m_drawing_area.right));
-    const u32 clip_right =
-      static_cast<u32>(std::clamp<s32>(start_x + static_cast<s32>(width), m_drawing_area.left, m_drawing_area.right)) +
-      1u;
-    const u32 clip_top = static_cast<u32>(std::clamp<s32>(start_y, m_drawing_area.top, m_drawing_area.bottom));
-    const u32 clip_bottom =
-      static_cast<u32>(std::clamp<s32>(start_y + static_cast<s32>(height), m_drawing_area.top, m_drawing_area.bottom)) +
-      1u;
-    AddDrawRectangleTicks(clip_right - clip_left, clip_bottom - clip_top, texture_enable, transparency_enable);
-  }
-
-  for (u32 offset_y = 0; offset_y < height; offset_y++)
-  {
-    const s32 y = start_y + static_cast<s32>(offset_y);
-    if (y < static_cast<s32>(m_drawing_area.top) || y > static_cast<s32>(m_drawing_area.bottom))
-      continue;
-
-    const u8 texcoord_y = Truncate8(ZeroExtend32(origin_texcoord_y) + offset_y);
-
-    for (u32 offset_x = 0; offset_x < width; offset_x++)
-    {
-      const s32 x = start_x + static_cast<s32>(offset_x);
-      if (x < static_cast<s32>(m_drawing_area.left) || x > static_cast<s32>(m_drawing_area.right))
-        continue;
-
-      const u8 texcoord_x = Truncate8(ZeroExtend32(origin_texcoord_x) + offset_x);
-
-      ShadePixel<texture_enable, raw_texture_enable, transparency_enable, false>(
-        static_cast<u32>(x), static_cast<u32>(y), r, g, b, texcoord_x, texcoord_y);
-    }
-  }
-}
-
 constexpr GPU_SW::DitherLUT GPU_SW::ComputeDitherLUT()
 {
   DitherLUT lut = {};
@@ -594,7 +379,7 @@ constexpr GPU_SW::DitherLUT GPU_SW::ComputeDitherLUT()
 static constexpr GPU_SW::DitherLUT s_dither_lut = GPU_SW::ComputeDitherLUT();
 
 template<bool texture_enable, bool raw_texture_enable, bool transparency_enable, bool dithering_enable>
-void GPU_SW::ShadePixel(u32 x, u32 y, u8 color_r, u8 color_g, u8 color_b, u8 texcoord_x, u8 texcoord_y)
+void ALWAYS_INLINE_RELEASE GPU_SW::ShadePixel(u32 x, u32 y, u8 color_r, u8 color_g, u8 color_b, u8 texcoord_x, u8 texcoord_y)
 {
   VRAMPixel color;
   bool transparent;
@@ -717,118 +502,570 @@ void GPU_SW::ShadePixel(u32 x, u32 y, u8 color_r, u8 color_g, u8 color_b, u8 tex
   if ((bg_color.bits & mask_and) != 0)
     return;
 
-  if (IsInterlacedRenderingEnabled() && GetActiveLineLSB() == (static_cast<u32>(y) & 1u))
-    return;
-
   SetPixel(static_cast<u32>(x), static_cast<u32>(y), color.bits | m_GPUSTAT.GetMaskOR());
 }
 
-constexpr FixedPointCoord GetLineCoordStep(s32 delta, s32 k)
+template<bool texture_enable, bool raw_texture_enable, bool transparency_enable>
+void GPU_SW::DrawRectangle(s32 origin_x, s32 origin_y, u32 width, u32 height, u8 r, u8 g, u8 b, u8 origin_texcoord_x,
+                           u8 origin_texcoord_y)
 {
-  s64 delta_fp = static_cast<s64>(ZeroExtend64(static_cast<u32>(delta)) << 32);
-  if (delta_fp < 0)
-    delta_fp -= s64(k - 1);
-  if (delta_fp > 0)
-    delta_fp += s64(k - 1);
+  const s32 start_x = TruncateVertexPosition(m_drawing_offset.x + origin_x);
+  const s32 start_y = TruncateVertexPosition(m_drawing_offset.y + origin_y);
 
-  return static_cast<FixedPointCoord>(delta_fp / k);
+  {
+    const u32 clip_left = static_cast<u32>(std::clamp<s32>(start_x, m_drawing_area.left, m_drawing_area.right));
+    const u32 clip_right =
+      static_cast<u32>(std::clamp<s32>(start_x + static_cast<s32>(width), m_drawing_area.left, m_drawing_area.right)) +
+      1u;
+    const u32 clip_top = static_cast<u32>(std::clamp<s32>(start_y, m_drawing_area.top, m_drawing_area.bottom));
+    const u32 clip_bottom =
+      static_cast<u32>(std::clamp<s32>(start_y + static_cast<s32>(height), m_drawing_area.top, m_drawing_area.bottom)) +
+      1u;
+    AddDrawRectangleTicks(clip_right - clip_left, clip_bottom - clip_top, texture_enable, transparency_enable);
+  }
+
+  for (u32 offset_y = 0; offset_y < height; offset_y++)
+  {
+    const s32 y = start_y + static_cast<s32>(offset_y);
+    if (y < static_cast<s32>(m_drawing_area.top) || y > static_cast<s32>(m_drawing_area.bottom) ||
+        (IsInterlacedRenderingEnabled() && GetActiveLineLSB() == (static_cast<u32>(y) & 1u)))
+    {
+      continue;
+    }
+
+    const u8 texcoord_y = Truncate8(ZeroExtend32(origin_texcoord_y) + offset_y);
+
+    for (u32 offset_x = 0; offset_x < width; offset_x++)
+    {
+      const s32 x = start_x + static_cast<s32>(offset_x);
+      if (x < static_cast<s32>(m_drawing_area.left) || x > static_cast<s32>(m_drawing_area.right))
+        continue;
+
+      const u8 texcoord_x = Truncate8(ZeroExtend32(origin_texcoord_x) + offset_x);
+
+      ShadePixel<texture_enable, raw_texture_enable, transparency_enable, false>(
+        static_cast<u32>(x), static_cast<u32>(y), r, g, b, texcoord_x, texcoord_y);
+    }
+  }
 }
 
-constexpr s32 FixedToIntCoord(FixedPointCoord x)
+std::unique_ptr<GPU> GPU::CreateSoftwareRenderer()
 {
-  return static_cast<s32>(Truncate32(x >> COORD_FRAC_BITS));
+  return std::make_unique<GPU_SW>();
 }
 
-constexpr FixedPointColor GetLineColorStep(s32 delta, s32 k)
+//////////////////////////////////////////////////////////////////////////
+// Polygon and line rasterization ported from Mednafen
+//////////////////////////////////////////////////////////////////////////
+
+#define COORD_FBS 12
+#define COORD_MF_INT(n) ((n) << COORD_FBS)
+#define COORD_POST_PADDING 12
+
+static ALWAYS_INLINE_RELEASE s64 MakePolyXFP(s32 x)
 {
-  return static_cast<s32>(static_cast<u32>(delta) << COLOR_FRAC_BITS) / k;
+  return ((u64)x << 32) + ((1ULL << 32) - (1 << 11));
+}
+
+static ALWAYS_INLINE_RELEASE s64 MakePolyXFPStep(s32 dx, s32 dy)
+{
+  s64 ret;
+  s64 dx_ex = (u64)dx << 32;
+
+  if (dx_ex < 0)
+    dx_ex -= dy - 1;
+
+  if (dx_ex > 0)
+    dx_ex += dy - 1;
+
+  ret = dx_ex / dy;
+
+  return (ret);
+}
+
+static ALWAYS_INLINE_RELEASE s32 GetPolyXFP_Int(s64 xfp)
+{
+  return (xfp >> 32);
+}
+
+template<bool shading_enable, bool texture_enable>
+bool ALWAYS_INLINE_RELEASE GPU_SW::CalcIDeltas(i_deltas& idl, const SWVertex* A, const SWVertex* B, const SWVertex* C)
+{
+#define CALCIS(x, y) (((B->x - A->x) * (C->y - B->y)) - ((C->x - B->x) * (B->y - A->y)))
+
+  s32 denom = CALCIS(x, y);
+
+  if (!denom)
+    return false;
+
+  if constexpr (shading_enable)
+  {
+    idl.dr_dx = (u32)(CALCIS(r, y) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
+    idl.dr_dy = (u32)(CALCIS(x, r) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
+
+    idl.dg_dx = (u32)(CALCIS(g, y) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
+    idl.dg_dy = (u32)(CALCIS(x, g) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
+
+    idl.db_dx = (u32)(CALCIS(b, y) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
+    idl.db_dy = (u32)(CALCIS(x, b) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
+  }
+
+  if constexpr (texture_enable)
+  {
+    idl.du_dx = (u32)(CALCIS(u, y) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
+    idl.du_dy = (u32)(CALCIS(x, u) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
+
+    idl.dv_dx = (u32)(CALCIS(v, y) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
+    idl.dv_dy = (u32)(CALCIS(x, v) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
+  }
+
+  return true;
+
+#undef CALCIS
+}
+
+template<bool shading_enable, bool texture_enable>
+void ALWAYS_INLINE_RELEASE GPU_SW::AddIDeltas_DX(i_group& ig, const i_deltas& idl, u32 count /*= 1*/)
+{
+  if constexpr (shading_enable)
+  {
+    ig.r += idl.dr_dx * count;
+    ig.g += idl.dg_dx * count;
+    ig.b += idl.db_dx * count;
+  }
+
+  if constexpr (texture_enable)
+  {
+    ig.u += idl.du_dx * count;
+    ig.v += idl.dv_dx * count;
+  }
+}
+
+template<bool shading_enable, bool texture_enable>
+void ALWAYS_INLINE_RELEASE GPU_SW::AddIDeltas_DY(i_group& ig, const i_deltas& idl, u32 count /*= 1*/)
+{
+  if constexpr (shading_enable)
+  {
+    ig.r += idl.dr_dy * count;
+    ig.g += idl.dg_dy * count;
+    ig.b += idl.db_dy * count;
+  }
+
+  if constexpr (texture_enable)
+  {
+    ig.u += idl.du_dy * count;
+    ig.v += idl.dv_dy * count;
+  }
+}
+
+template<bool shading_enable, bool texture_enable, bool raw_texture_enable, bool transparency_enable,
+         bool dithering_enable>
+void GPU_SW::DrawSpan(s32 y, s32 x_start, s32 x_bound, i_group ig, const i_deltas& idl)
+{
+  if (IsInterlacedRenderingEnabled() && GetActiveLineLSB() == (static_cast<u32>(y) & 1u))
+    return;
+
+  s32 x_ig_adjust = x_start;
+  s32 w = x_bound - x_start;
+  s32 x = TruncateVertexPosition(x_start);
+
+  if (x < static_cast<s32>(m_drawing_area.left))
+  {
+    s32 delta = static_cast<s32>(m_drawing_area.left) - x;
+    x_ig_adjust += delta;
+    x += delta;
+    w -= delta;
+  }
+
+  if ((x + w) > (static_cast<s32>(m_drawing_area.right) + 1))
+    w = static_cast<s32>(m_drawing_area.right) + 1 - x;
+
+  if (w <= 0)
+    return;
+
+  AddIDeltas_DX<shading_enable, texture_enable>(ig, idl, x_ig_adjust);
+  AddIDeltas_DY<shading_enable, texture_enable>(ig, idl, y);
+
+  do
+  {
+    const u32 r = ig.r >> (COORD_FBS + COORD_POST_PADDING);
+    const u32 g = ig.g >> (COORD_FBS + COORD_POST_PADDING);
+    const u32 b = ig.b >> (COORD_FBS + COORD_POST_PADDING);
+    const u32 u = ig.u >> (COORD_FBS + COORD_POST_PADDING);
+    const u32 v = ig.v >> (COORD_FBS + COORD_POST_PADDING);
+
+    ShadePixel<texture_enable, raw_texture_enable, transparency_enable, dithering_enable>(
+      static_cast<u32>(x), static_cast<u32>(y), Truncate8(r), Truncate8(g), Truncate8(b), Truncate8(u), Truncate8(v));
+
+    x++;
+    AddIDeltas_DX<shading_enable, texture_enable>(ig, idl);
+  } while (--w > 0);
+}
+
+template<bool shading_enable, bool texture_enable, bool raw_texture_enable, bool transparency_enable,
+         bool dithering_enable>
+void GPU_SW::DrawTriangle(const SWVertex* v0, const SWVertex* v1, const SWVertex* v2)
+{
+  u32 core_vertex;
+  {
+    u32 cvtemp = 0;
+
+    if (v1->x <= v0->x)
+    {
+      if (v2->x <= v1->x)
+        cvtemp = (1 << 2);
+      else
+        cvtemp = (1 << 1);
+    }
+    else if (v2->x < v0->x)
+      cvtemp = (1 << 2);
+    else
+      cvtemp = (1 << 0);
+
+    if (v2->y < v1->y)
+    {
+      std::swap(v2, v1);
+      cvtemp = ((cvtemp >> 1) & 0x2) | ((cvtemp << 1) & 0x4) | (cvtemp & 0x1);
+    }
+
+    if (v1->y < v0->y)
+    {
+      std::swap(v1, v0);
+      cvtemp = ((cvtemp >> 1) & 0x1) | ((cvtemp << 1) & 0x2) | (cvtemp & 0x4);
+    }
+
+    if (v2->y < v1->y)
+    {
+      std::swap(v2, v1);
+      cvtemp = ((cvtemp >> 1) & 0x2) | ((cvtemp << 1) & 0x4) | (cvtemp & 0x1);
+    }
+
+    core_vertex = cvtemp >> 1;
+  }
+
+  if (v0->y == v2->y)
+    return;
+
+  if (static_cast<u32>(std::abs(v2->x - v0->x)) >= MAX_PRIMITIVE_WIDTH ||
+      static_cast<u32>(std::abs(v2->x - v1->x)) >= MAX_PRIMITIVE_WIDTH ||
+      static_cast<u32>(std::abs(v1->x - v0->x)) >= MAX_PRIMITIVE_WIDTH ||
+      static_cast<u32>(v2->y - v0->y) >= MAX_PRIMITIVE_HEIGHT)
+  {
+    return;
+  }
+
+  {
+    const s32 min_x = std::clamp(std::min(v0->x, std::min(v1->x, v2->x)), static_cast<s32>(m_drawing_area.left),
+                                 static_cast<s32>(m_drawing_area.right));
+    const s32 max_x = std::clamp(std::max(v0->x, std::max(v1->x, v2->x)), static_cast<s32>(m_drawing_area.left),
+                                 static_cast<s32>(m_drawing_area.right));
+    const s32 min_y = std::clamp(std::min(v0->y, std::min(v1->y, v2->y)), static_cast<s32>(m_drawing_area.top),
+                                 static_cast<s32>(m_drawing_area.bottom));
+    const s32 max_y = std::clamp(std::max(v0->y, std::max(v1->y, v2->y)), static_cast<s32>(m_drawing_area.top),
+                                 static_cast<s32>(m_drawing_area.bottom));
+    AddDrawTriangleTicks(static_cast<u32>(max_x - min_x + 1), static_cast<u32>(max_y - min_y + 1), shading_enable,
+                         texture_enable, transparency_enable);
+  }
+
+  s64 base_coord = MakePolyXFP(v0->x);
+  s64 base_step = MakePolyXFPStep((v2->x - v0->x), (v2->y - v0->y));
+  s64 bound_coord_us;
+  s64 bound_coord_ls;
+  bool right_facing;
+
+  if (v1->y == v0->y)
+  {
+    bound_coord_us = 0;
+    right_facing = (bool)(v1->x > v0->x);
+  }
+  else
+  {
+    bound_coord_us = MakePolyXFPStep((v1->x - v0->x), (v1->y - v0->y));
+    right_facing = (bool)(bound_coord_us > base_step);
+  }
+
+  if (v2->y == v1->y)
+    bound_coord_ls = 0;
+  else
+    bound_coord_ls = MakePolyXFPStep((v2->x - v1->x), (v2->y - v1->y));
+
+  i_deltas idl;
+  if (!CalcIDeltas<shading_enable, texture_enable>(idl, v0, v1, v2))
+    return;
+
+  const SWVertex* vertices[3] = {v0, v1, v2};
+
+  i_group ig;
+  if constexpr (texture_enable)
+  {
+    ig.u = (COORD_MF_INT(vertices[core_vertex]->u) + (1 << (COORD_FBS - 1))) << COORD_POST_PADDING;
+    ig.v = (COORD_MF_INT(vertices[core_vertex]->v) + (1 << (COORD_FBS - 1))) << COORD_POST_PADDING;
+  }
+
+  ig.r = (COORD_MF_INT(vertices[core_vertex]->r) + (1 << (COORD_FBS - 1))) << COORD_POST_PADDING;
+  ig.g = (COORD_MF_INT(vertices[core_vertex]->g) + (1 << (COORD_FBS - 1))) << COORD_POST_PADDING;
+  ig.b = (COORD_MF_INT(vertices[core_vertex]->b) + (1 << (COORD_FBS - 1))) << COORD_POST_PADDING;
+
+  AddIDeltas_DX<shading_enable, texture_enable>(ig, idl, -vertices[core_vertex]->x);
+  AddIDeltas_DY<shading_enable, texture_enable>(ig, idl, -vertices[core_vertex]->y);
+
+  struct TriangleHalf
+  {
+    u64 x_coord[2];
+    u64 x_step[2];
+
+    s32 y_coord;
+    s32 y_bound;
+
+    bool dec_mode;
+  } tripart[2];
+
+  u32 vo = 0;
+  u32 vp = 0;
+  if (core_vertex != 0)
+    vo = 1;
+  if (core_vertex == 2)
+    vp = 3;
+
+  {
+    TriangleHalf* tp = &tripart[vo];
+    tp->y_coord = vertices[0 ^ vo]->y;
+    tp->y_bound = vertices[1 ^ vo]->y;
+    tp->x_coord[right_facing] = MakePolyXFP(vertices[0 ^ vo]->x);
+    tp->x_step[right_facing] = bound_coord_us;
+    tp->x_coord[!right_facing] = base_coord + ((vertices[vo]->y - vertices[0]->y) * base_step);
+    tp->x_step[!right_facing] = base_step;
+    tp->dec_mode = vo;
+  }
+
+  {
+    TriangleHalf* tp = &tripart[vo ^ 1];
+    tp->y_coord = vertices[1 ^ vp]->y;
+    tp->y_bound = vertices[2 ^ vp]->y;
+    tp->x_coord[right_facing] = MakePolyXFP(vertices[1 ^ vp]->x);
+    tp->x_step[right_facing] = bound_coord_ls;
+    tp->x_coord[!right_facing] =
+      base_coord + ((vertices[1 ^ vp]->y - vertices[0]->y) *
+                    base_step); // base_coord + ((vertices[1].y - vertices[0].y) * base_step);
+    tp->x_step[!right_facing] = base_step;
+    tp->dec_mode = vp;
+  }
+
+  for (u32 i = 0; i < 2; i++)
+  {
+    s32 yi = tripart[i].y_coord;
+    s32 yb = tripart[i].y_bound;
+
+    u64 lc = tripart[i].x_coord[0];
+    u64 ls = tripart[i].x_step[0];
+
+    u64 rc = tripart[i].x_coord[1];
+    u64 rs = tripart[i].x_step[1];
+
+    if (tripart[i].dec_mode)
+    {
+      while (yi > yb)
+      {
+        yi--;
+        lc -= ls;
+        rc -= rs;
+
+        s32 y = TruncateVertexPosition(yi);
+
+        if (y < static_cast<s32>(m_drawing_area.top))
+          break;
+
+        if (y > static_cast<s32>(m_drawing_area.bottom))
+          continue;
+
+        DrawSpan<shading_enable, texture_enable, raw_texture_enable, transparency_enable, dithering_enable>(
+          yi, GetPolyXFP_Int(lc), GetPolyXFP_Int(rc), ig, idl);
+      }
+    }
+    else
+    {
+      while (yi < yb)
+      {
+        s32 y = TruncateVertexPosition(yi);
+
+        if (y > static_cast<s32>(m_drawing_area.bottom))
+          break;
+
+        if (y >= static_cast<s32>(m_drawing_area.top))
+        {
+
+          DrawSpan<shading_enable, texture_enable, raw_texture_enable, transparency_enable, dithering_enable>(
+            yi, GetPolyXFP_Int(lc), GetPolyXFP_Int(rc), ig, idl);
+        }
+
+        yi++;
+        lc += ls;
+        rc += rs;
+      }
+    }
+  }
+}
+
+GPU_SW::DrawTriangleFunction GPU_SW::GetDrawTriangleFunction(bool shading_enable, bool texture_enable,
+                                                             bool raw_texture_enable, bool transparency_enable,
+                                                             bool dithering_enable)
+{
+#define F(SHADING, TEXTURE, RAW_TEXTURE, TRANSPARENCY, DITHERING)                                                      \
+  &GPU_SW::DrawTriangle<SHADING, TEXTURE, RAW_TEXTURE, TRANSPARENCY, DITHERING>
+
+  static constexpr DrawTriangleFunction funcs[2][2][2][2][2] = {
+    {{{{F(false, false, false, false, false), F(false, false, false, false, true)},
+       {F(false, false, false, true, false), F(false, false, false, true, true)}},
+      {{F(false, false, true, false, false), F(false, false, true, false, true)},
+       {F(false, false, true, true, false), F(false, false, true, true, true)}}},
+     {{{F(false, true, false, false, false), F(false, true, false, false, true)},
+       {F(false, true, false, true, false), F(false, true, false, true, true)}},
+      {{F(false, true, true, false, false), F(false, true, true, false, true)},
+       {F(false, true, true, true, false), F(false, true, true, true, true)}}}},
+    {{{{F(true, false, false, false, false), F(true, false, false, false, true)},
+       {F(true, false, false, true, false), F(true, false, false, true, true)}},
+      {{F(true, false, true, false, false), F(true, false, true, false, true)},
+       {F(true, false, true, true, false), F(true, false, true, true, true)}}},
+     {{{F(true, true, false, false, false), F(true, true, false, false, true)},
+       {F(true, true, false, true, false), F(true, true, false, true, true)}},
+      {{F(true, true, true, false, false), F(true, true, true, false, true)},
+       {F(true, true, true, true, false), F(true, true, true, true, true)}}}}};
+
+#undef F
+
+  return funcs[u8(shading_enable)][u8(texture_enable)][u8(raw_texture_enable)][u8(transparency_enable)]
+              [u8(dithering_enable)];
+}
+
+enum
+{
+  Line_XY_FractBits = 32
+};
+enum
+{
+  Line_RGB_FractBits = 12
+};
+
+struct line_fxp_coord
+{
+  u64 x, y;
+  u32 r, g, b;
+};
+
+struct line_fxp_step
+{
+  s64 dx_dk, dy_dk;
+  s32 dr_dk, dg_dk, db_dk;
+};
+
+static ALWAYS_INLINE_RELEASE s64 LineDivide(s64 delta, s32 dk)
+{
+  delta = (u64)delta << Line_XY_FractBits;
+
+  if (delta < 0)
+    delta -= dk - 1;
+  if (delta > 0)
+    delta += dk - 1;
+
+  return (delta / dk);
 }
 
 template<bool shading_enable, bool transparency_enable, bool dithering_enable>
 void GPU_SW::DrawLine(const SWVertex* p0, const SWVertex* p1)
 {
-  // Algorithm based on Mednafen.
-  if (p0->x > p1->x)
-    std::swap(p0, p1);
-
-  const s32 dx = p1->x - p0->x;
-  const s32 dy = p1->y - p0->y;
-  const s32 k = std::max(std::abs(dx), std::abs(dy));
+  const s32 i_dx = std::abs(p1->x - p0->x);
+  const s32 i_dy = std::abs(p1->y - p0->y);
+  const s32 k = (i_dx > i_dy) ? i_dx : i_dy;
+  if (i_dx >= MAX_PRIMITIVE_WIDTH || i_dy >= MAX_PRIMITIVE_HEIGHT)
+    return;
 
   {
     // TODO: Move to base class
-    const s32 min_x = std::min(p0->x, p1->x);
-    const s32 max_x = std::max(p0->x, p1->x);
-    const s32 min_y = std::min(p0->y, p1->y);
-    const s32 max_y = std::max(p0->y, p1->y);
-
-    const u32 clip_left = static_cast<u32>(std::clamp<s32>(min_x, m_drawing_area.left, m_drawing_area.left));
-    const u32 clip_right = static_cast<u32>(std::clamp<s32>(max_x, m_drawing_area.left, m_drawing_area.right)) + 1u;
-    const u32 clip_top = static_cast<u32>(std::clamp<s32>(min_y, m_drawing_area.top, m_drawing_area.bottom));
-    const u32 clip_bottom = static_cast<u32>(std::clamp<s32>(max_y, m_drawing_area.top, m_drawing_area.bottom)) + 1u;
+    const u32 clip_left =
+      static_cast<u32>(std::clamp<s32>(std::min(p0->x, p1->x), m_drawing_area.left, m_drawing_area.left));
+    const u32 clip_right =
+      static_cast<u32>(std::clamp<s32>(std::max(p0->x, p1->x), m_drawing_area.left, m_drawing_area.right)) + 1u;
+    const u32 clip_top =
+      static_cast<u32>(std::clamp<s32>(std::min(p0->y, p1->y), m_drawing_area.top, m_drawing_area.bottom));
+    const u32 clip_bottom =
+      static_cast<u32>(std::clamp<s32>(std::max(p0->y, p1->y), m_drawing_area.top, m_drawing_area.bottom)) + 1u;
 
     AddDrawLineTicks(clip_right - clip_left, clip_bottom - clip_top, shading_enable);
   }
 
-  FixedPointCoord step_x, step_y;
-  FixedPointColor step_r, step_g, step_b;
-  if (k > 0)
+  if (p0->x >= p1->x && k > 0)
+    std::swap(p0, p1);
+
+  line_fxp_step step;
+  if (k == 0)
   {
-    step_x = GetLineCoordStep(dx, k);
-    step_y = GetLineCoordStep(dy, k);
+    step.dx_dk = 0;
+    step.dy_dk = 0;
 
     if constexpr (shading_enable)
     {
-      step_r = GetLineColorStep(s32(ZeroExtend32(p1->color_r)) - s32(ZeroExtend32(p0->color_r)), k);
-      step_g = GetLineColorStep(s32(ZeroExtend32(p1->color_g)) - s32(ZeroExtend32(p0->color_g)), k);
-      step_b = GetLineColorStep(s32(ZeroExtend32(p1->color_b)) - s32(ZeroExtend32(p0->color_b)), k);
-    }
-    else
-    {
-      step_r = 0;
-      step_g = 0;
-      step_b = 0;
+      step.dr_dk = 0;
+      step.dg_dk = 0;
+      step.db_dk = 0;
     }
   }
   else
   {
-    step_x = 0;
-    step_y = 0;
-    step_r = 0;
-    step_g = 0;
-    step_b = 0;
-  }
-
-  FixedPointCoord current_x = IntToFixedCoord(p0->x);
-  FixedPointCoord current_y = IntToFixedCoord(p0->y);
-  FixedPointColor current_r = IntToFixedColor(p0->color_r);
-  FixedPointColor current_g = IntToFixedColor(p0->color_g);
-  FixedPointColor current_b = IntToFixedColor(p0->color_b);
-
-  for (s32 i = 0; i <= k; i++)
-  {
-    const s32 x = m_drawing_offset.x + FixedToIntCoord(current_x);
-    const s32 y = m_drawing_offset.y + FixedToIntCoord(current_y);
-
-    const u8 r = shading_enable ? FixedColorToInt(current_r) : p0->color_r;
-    const u8 g = shading_enable ? FixedColorToInt(current_g) : p0->color_g;
-    const u8 b = shading_enable ? FixedColorToInt(current_b) : p0->color_b;
-
-    if (x >= static_cast<s32>(m_drawing_area.left) && x <= static_cast<s32>(m_drawing_area.right) &&
-        y >= static_cast<s32>(m_drawing_area.top) && y <= static_cast<s32>(m_drawing_area.bottom))
-    {
-      ShadePixel<false, false, transparency_enable, dithering_enable>(static_cast<u32>(x), static_cast<u32>(y), r, g, b,
-                                                                      0, 0);
-    }
-
-    current_x += step_x;
-    current_y += step_y;
+    step.dx_dk = LineDivide(p1->x - p0->x, k);
+    step.dy_dk = LineDivide(p1->y - p0->y, k);
 
     if constexpr (shading_enable)
     {
-      current_r += step_r;
-      current_g += step_g;
-      current_b += step_b;
+      step.dr_dk = (s32)((u32)(p1->r - p0->r) << Line_RGB_FractBits) / k;
+      step.dg_dk = (s32)((u32)(p1->g - p0->g) << Line_RGB_FractBits) / k;
+      step.db_dk = (s32)((u32)(p1->b - p0->b) << Line_RGB_FractBits) / k;
+    }
+  }
+
+  line_fxp_coord cur_point;
+  cur_point.x = ((u64)p0->x << Line_XY_FractBits) | (1ULL << (Line_XY_FractBits - 1));
+  cur_point.y = ((u64)p0->y << Line_XY_FractBits) | (1ULL << (Line_XY_FractBits - 1));
+
+  cur_point.x -= 1024;
+
+  if (step.dy_dk < 0)
+    cur_point.y -= 1024;
+
+  if constexpr (shading_enable)
+  {
+    cur_point.r = (p0->r << Line_RGB_FractBits) | (1 << (Line_RGB_FractBits - 1));
+    cur_point.g = (p0->g << Line_RGB_FractBits) | (1 << (Line_RGB_FractBits - 1));
+    cur_point.b = (p0->b << Line_RGB_FractBits) | (1 << (Line_RGB_FractBits - 1));
+  }
+
+  for (s32 i = 0; i <= k; i++)
+  {
+    // Sign extension is not necessary here for x and y, due to the maximum values that ClipX1 and ClipY1 can contain.
+    const s32 x = (cur_point.x >> Line_XY_FractBits) & 2047;
+    const s32 y = (cur_point.y >> Line_XY_FractBits) & 2047;
+
+    if (!IsInterlacedRenderingEnabled() || GetActiveLineLSB() != (static_cast<u32>(y) & 1u))
+    {
+      const u8 r = shading_enable ? static_cast<u8>(cur_point.r >> Line_RGB_FractBits) : p0->r;
+      const u8 g = shading_enable ? static_cast<u8>(cur_point.g >> Line_RGB_FractBits) : p0->g;
+      const u8 b = shading_enable ? static_cast<u8>(cur_point.b >> Line_RGB_FractBits) : p0->b;
+
+      if (x >= static_cast<s32>(m_drawing_area.left) && x <= static_cast<s32>(m_drawing_area.right) &&
+          y >= static_cast<s32>(m_drawing_area.top) && y <= static_cast<s32>(m_drawing_area.bottom))
+      {
+        ShadePixel<false, false, transparency_enable, dithering_enable>(static_cast<u32>(x), static_cast<u32>(y), r, g,
+                                                                        b, 0, 0);
+      }
+    }
+
+    cur_point.x += step.dx_dk;
+    cur_point.y += step.dy_dk;
+
+    if constexpr (shading_enable)
+    {
+      cur_point.r += step.dr_dk;
+      cur_point.g += step.dg_dk;
+      cur_point.b += step.db_dk;
     }
   }
 }
@@ -860,8 +1097,3 @@ GPU_SW::DrawRectangleFunction GPU_SW::GetDrawRectangleFunction(bool texture_enab
 
   return funcs[u8(texture_enable)][u8(raw_texture_enable)][u8(transparency_enable)];
 }
-
-std::unique_ptr<GPU> GPU::CreateSoftwareRenderer()
-{
-  return std::make_unique<GPU_SW>();
-}
diff --git a/src/core/gpu_sw.h b/src/core/gpu_sw.h
index a9113e2fb..08f2e242a 100644
--- a/src/core/gpu_sw.h
+++ b/src/core/gpu_sw.h
@@ -31,17 +31,17 @@ protected:
   struct SWVertex
   {
     s32 x, y;
-    u8 color_r, color_g, color_b;
-    u8 texcoord_x, texcoord_y;
+    u8 r, g, b;
+    u8 u, v;
 
-    ALWAYS_INLINE void SetPosition(VertexPosition p)
+    ALWAYS_INLINE void SetPosition(VertexPosition p, s32 offset_x, s32 offset_y)
     {
-      x = p.x;
-      y = p.y;
+      x = TruncateVertexPosition(offset_x + p.x);
+      y = TruncateVertexPosition(offset_y + p.y);
     }
 
-    ALWAYS_INLINE void SetColorRGB24(u32 color) { std::tie(color_r, color_g, color_b) = UnpackColorRGB24(color); }
-    ALWAYS_INLINE void SetTexcoord(u16 value) { std::tie(texcoord_x, texcoord_y) = UnpackTexcoord(value); }
+    ALWAYS_INLINE void SetColorRGB24(u32 color) { std::tie(r, g, b) = UnpackColorRGB24(color); }
+    ALWAYS_INLINE void SetTexcoord(u16 value) { std::tie(u, v) = UnpackTexcoord(value); }
   };
 
   //////////////////////////////////////////////////////////////////////////
@@ -60,19 +60,9 @@ protected:
 
   void DispatchRenderCommand() override;
 
-  static bool IsClockwiseWinding(const SWVertex* v0, const SWVertex* v1, const SWVertex* v2);
-
   template<bool texture_enable, bool raw_texture_enable, bool transparency_enable, bool dithering_enable>
   void ShadePixel(u32 x, u32 y, u8 color_r, u8 color_g, u8 color_b, u8 texcoord_x, u8 texcoord_y);
 
-  template<bool shading_enable, bool texture_enable, bool raw_texture_enable, bool transparency_enable,
-           bool dithering_enable>
-  void DrawTriangle(const SWVertex* v0, const SWVertex* v1, const SWVertex* v2);
-
-  using DrawTriangleFunction = void (GPU_SW::*)(const SWVertex* v0, const SWVertex* v1, const SWVertex* v2);
-  DrawTriangleFunction GetDrawTriangleFunction(bool shading_enable, bool texture_enable, bool raw_texture_enable,
-                                               bool transparency_enable, bool dithering_enable);
-
   template<bool texture_enable, bool raw_texture_enable, bool transparency_enable>
   void DrawRectangle(s32 origin_x, s32 origin_y, u32 width, u32 height, u8 r, u8 g, u8 b, u8 origin_texcoord_x,
                      u8 origin_texcoord_y);
@@ -82,6 +72,45 @@ protected:
   DrawRectangleFunction GetDrawRectangleFunction(bool texture_enable, bool raw_texture_enable,
                                                  bool transparency_enable);
 
+  //////////////////////////////////////////////////////////////////////////
+  // Polygon and line rasterization ported from Mednafen
+  //////////////////////////////////////////////////////////////////////////
+  struct i_deltas
+  {
+    u32 du_dx, dv_dx;
+    u32 dr_dx, dg_dx, db_dx;
+
+    u32 du_dy, dv_dy;
+    u32 dr_dy, dg_dy, db_dy;
+  };
+
+  struct i_group
+  {
+    u32 u, v;
+    u32 r, g, b;
+  };
+
+  template<bool shading_enable, bool texture_enable>
+  bool CalcIDeltas(i_deltas& idl, const SWVertex* A, const SWVertex* B, const SWVertex* C);
+
+  template<bool shading_enable, bool texture_enable>
+  void AddIDeltas_DX(i_group& ig, const i_deltas& idl, u32 count = 1);
+
+  template<bool shading_enable, bool texture_enable>
+  void AddIDeltas_DY(i_group& ig, const i_deltas& idl, u32 count = 1);
+
+  template<bool shading_enable, bool texture_enable, bool raw_texture_enable, bool transparency_enable,
+           bool dithering_enable>
+  void DrawSpan(s32 y, s32 x_start, s32 x_bound, i_group ig, const i_deltas& idl);
+
+  template<bool shading_enable, bool texture_enable, bool raw_texture_enable, bool transparency_enable,
+           bool dithering_enable>
+  void DrawTriangle(const SWVertex* v0, const SWVertex* v1, const SWVertex* v2);
+
+  using DrawTriangleFunction = void (GPU_SW::*)(const SWVertex* v0, const SWVertex* v1, const SWVertex* v2);
+  DrawTriangleFunction GetDrawTriangleFunction(bool shading_enable, bool texture_enable, bool raw_texture_enable,
+                                               bool transparency_enable, bool dithering_enable);
+
   template<bool shading_enable, bool transparency_enable, bool dithering_enable>
   void DrawLine(const SWVertex* p0, const SWVertex* p1);