From 50b025c75aa5a13e0dba83ede19f28a8bd7472ce Mon Sep 17 00:00:00 2001
From: Connor McLaughlin <stenzek@gmail.com>
Date: Sat, 3 Oct 2020 14:10:54 +1000
Subject: [PATCH] GPU: Use triangle area for ticks rather than bbox

Fixes slow transitions in Tales of Phantasia.
---
 src/core/gpu.h      | 44 +++++++++++++++++++++++++++++++++-----------
 src/core/gpu_hw.cpp | 12 ++++++++----
 src/core/gpu_sw.cpp | 13 +------------
 3 files changed, 42 insertions(+), 27 deletions(-)

diff --git a/src/core/gpu.h b/src/core/gpu.h
index 45e08f9d8..5822576a7 100644
--- a/src/core/gpu.h
+++ b/src/core/gpu.h
@@ -421,6 +421,22 @@ protected:
   /// Returns true if the drawing area is valid (i.e. left <= right, top <= bottom).
   ALWAYS_INLINE bool IsDrawingAreaIsValid() const { return m_drawing_area.Valid(); }
 
+  /// Clamps the specified coordinates to the drawing area.
+  ALWAYS_INLINE void ClampCoordinatesToDrawingArea(s32* x, s32* y)
+  {
+    const s32 x_value = *x;
+    if (x_value < static_cast<s32>(m_drawing_area.left))
+      *x = m_drawing_area.left;
+    else if (x_value >= static_cast<s32>(m_drawing_area.right))
+      *x = m_drawing_area.right - 1;
+
+    const s32 y_value = *y;
+    if (y_value < static_cast<s32>(m_drawing_area.top))
+      *y = m_drawing_area.top;
+    else if (y_value >= static_cast<s32>(m_drawing_area.bottom))
+      *y = m_drawing_area.bottom - 1;
+  }
+
   void AddCommandTicks(TickCount ticks);
 
   void WriteGP1(u32 value);
@@ -439,19 +455,25 @@ protected:
   virtual void UpdateDisplay();
   virtual void DrawRendererStats(bool is_idle_frame);
 
-  // These are **very** approximate.
-  ALWAYS_INLINE void AddDrawTriangleTicks(u32 width, u32 height, bool shaded, bool textured, bool semitransparent)
+  ALWAYS_INLINE void AddDrawTriangleTicks(s32 x1, s32 y1, s32 x2, s32 y2, s32 x3, s32 y3, bool shaded, bool textured,
+                                          bool semitransparent)
   {
-    const u32 average_width = ((width + 2) / 3);
-    u32 ticks_per_row = average_width;
-    if (textured)
-      ticks_per_row += average_width;
-    if (semitransparent || m_GPUSTAT.check_mask_before_draw)
-      ticks_per_row += (average_width + 1u) / 2u;
-    if (m_GPUSTAT.SkipDrawingToActiveField())
-      height = std::max<u32>(height / 2, 1u);
+    // This will not produce the correct results for triangles which are partially outside the clip area.
+    // However, usually it'll undershoot not overshoot. If we wanted to make this more accurate, we'd need to intersect
+    // the edges with the clip rectangle.
+    ClampCoordinatesToDrawingArea(&x1, &y1);
+    ClampCoordinatesToDrawingArea(&x2, &y2);
+    ClampCoordinatesToDrawingArea(&x3, &y3);
 
-    AddCommandTicks(ticks_per_row * height);
+    TickCount pixels = std::abs((x1 * y2 + x2 * y3 + x3 * y1 - x1 * y3 - x2 * y1 - x3 * y2) / 2);
+    if (textured)
+      pixels += pixels;
+    if (semitransparent || m_GPUSTAT.check_mask_before_draw)
+      pixels += (pixels + 1) / 2;
+    if (m_GPUSTAT.SkipDrawingToActiveField())
+      pixels /= 2;
+
+    AddCommandTicks(pixels);
   }
   ALWAYS_INLINE void AddDrawRectangleTicks(u32 width, u32 height, bool textured, bool semitransparent)
   {
diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp
index a75445eb6..bf138675e 100644
--- a/src/core/gpu_hw.cpp
+++ b/src/core/gpu_hw.cpp
@@ -440,8 +440,10 @@ void GPU_HW::LoadVertices()
           static_cast<u32>(std::clamp<s32>(max_y, m_drawing_area.top, m_drawing_area.bottom)) + 1u;
 
         m_vram_dirty_rect.Include(clip_left, clip_right, clip_top, clip_bottom);
-        AddDrawTriangleTicks(clip_right - clip_left, clip_bottom - clip_top, rc.shading_enable, rc.texture_enable,
-                             rc.transparency_enable);
+        AddDrawTriangleTicks(native_vertex_positions[0][0], native_vertex_positions[0][1],
+                             native_vertex_positions[1][0], native_vertex_positions[1][1],
+                             native_vertex_positions[2][0], native_vertex_positions[2][1], rc.shading_enable,
+                             rc.texture_enable, rc.transparency_enable);
 
         std::memcpy(m_batch_current_vertex_ptr, vertices.data(), sizeof(BatchVertex) * 3);
         m_batch_current_vertex_ptr += 3;
@@ -472,8 +474,10 @@ void GPU_HW::LoadVertices()
             static_cast<u32>(std::clamp<s32>(max_y_123, m_drawing_area.top, m_drawing_area.bottom)) + 1u;
 
           m_vram_dirty_rect.Include(clip_left, clip_right, clip_top, clip_bottom);
-          AddDrawTriangleTicks(clip_right - clip_left, clip_bottom - clip_top, rc.shading_enable, rc.texture_enable,
-                               rc.transparency_enable);
+          AddDrawTriangleTicks(native_vertex_positions[2][0], native_vertex_positions[2][1],
+                               native_vertex_positions[1][0], native_vertex_positions[1][1],
+                               native_vertex_positions[3][0], native_vertex_positions[3][1], rc.shading_enable,
+                               rc.texture_enable, rc.transparency_enable);
 
           AddVertex(vertices[2]);
           AddVertex(vertices[1]);
diff --git a/src/core/gpu_sw.cpp b/src/core/gpu_sw.cpp
index 93804694a..ff3114994 100644
--- a/src/core/gpu_sw.cpp
+++ b/src/core/gpu_sw.cpp
@@ -754,18 +754,7 @@ void GPU_SW::DrawTriangle(const SWVertex* v0, const SWVertex* v1, const SWVertex
     return;
   }
 
-  {
-    const s32 min_x = std::clamp(std::min(v0->x, std::min(v1->x, v2->x)), static_cast<s32>(m_drawing_area.left),
-                                 static_cast<s32>(m_drawing_area.right));
-    const s32 max_x = std::clamp(std::max(v0->x, std::max(v1->x, v2->x)), static_cast<s32>(m_drawing_area.left),
-                                 static_cast<s32>(m_drawing_area.right));
-    const s32 min_y = std::clamp(std::min(v0->y, std::min(v1->y, v2->y)), static_cast<s32>(m_drawing_area.top),
-                                 static_cast<s32>(m_drawing_area.bottom));
-    const s32 max_y = std::clamp(std::max(v0->y, std::max(v1->y, v2->y)), static_cast<s32>(m_drawing_area.top),
-                                 static_cast<s32>(m_drawing_area.bottom));
-    AddDrawTriangleTicks(static_cast<u32>(max_x - min_x + 1), static_cast<u32>(max_y - min_y + 1), shading_enable,
-                         texture_enable, transparency_enable);
-  }
+  AddDrawTriangleTicks(v0->x, v0->y, v1->x, v1->y, v2->x, v2->y, shading_enable, texture_enable, transparency_enable);
 
   s64 base_coord = MakePolyXFP(v0->x);
   s64 base_step = MakePolyXFPStep((v2->x - v0->x), (v2->y - v0->y));