GPU/HW: Tidying up

2025-04-10 19:15:14 +00:00 · 2023-09-20 18:53:29 +10:00 · 2023-09-20 18:53:29 +10:00 · 18ae01bedb
parent 3cd28f433b
commit 18ae01bedb
2 changed files with 104 additions and 77 deletions
--- a/src/core/gpu_hw.cpp
+++ b/src/core/gpu_hw.cpp
@ -146,6 +146,49 @@ GPU_HW::~GPU_HW()
  }
 }

+ALWAYS_INLINE void GPU_HW::BatchVertex::Set(float x_, float y_, float z_, float w_, u32 color_, u32 texpage_,
+                                            u16 packed_texcoord, u32 uv_limits_)
+{
+  Set(x_, y_, z_, w_, color_, texpage_, packed_texcoord & 0xFF, (packed_texcoord >> 8), uv_limits_);
+}
+
+ALWAYS_INLINE void GPU_HW::BatchVertex::Set(float x_, float y_, float z_, float w_, u32 color_, u32 texpage_, u16 u_,
+                                            u16 v_, u32 uv_limits_)
+{
+  x = x_;
+  y = y_;
+  z = z_;
+  w = w_;
+  color = color_;
+  texpage = texpage_;
+  u = u_;
+  v = v_;
+  uv_limits = uv_limits_;
+}
+
+ALWAYS_INLINE u32 GPU_HW::BatchVertex::PackUVLimits(u32 min_u, u32 max_u, u32 min_v, u32 max_v)
+{
+  return min_u | (min_v << 8) | (max_u << 16) | (max_v << 24);
+}
+
+ALWAYS_INLINE void GPU_HW::BatchVertex::SetUVLimits(u32 min_u, u32 max_u, u32 min_v, u32 max_v)
+{
+  uv_limits = PackUVLimits(min_u, max_u, min_v, max_v);
+}
+
+ALWAYS_INLINE void GPU_HW::AddVertex(const BatchVertex& v)
+{
+  std::memcpy(m_batch_current_vertex_ptr, &v, sizeof(BatchVertex));
+  m_batch_current_vertex_ptr++;
+}
+
+template<typename... Args>
+ALWAYS_INLINE void GPU_HW::AddNewVertex(Args&&... args)
+{
+  m_batch_current_vertex_ptr->Set(std::forward<Args>(args)...);
+  m_batch_current_vertex_ptr++;
+}
+
 const Threading::Thread* GPU_HW::GetSWThread() const
 {
  return m_sw_renderer ? m_sw_renderer->GetThread() : nullptr;
@ -1160,6 +1203,12 @@ void GPU_HW::DestroyPipelines()
  m_display_pipelines.enumerate(destroy);
 }

+GPU_HW::BatchRenderMode GPU_HW::BatchConfig::GetRenderMode() const
+{
+  return transparency_mode == GPUTransparencyMode::Disabled ? BatchRenderMode::TransparencyDisabled :
+                                                              BatchRenderMode::TransparentAndOpaque;
+}
+
 void GPU_HW::UpdateVRAMReadTexture()
 {
  GL_SCOPE("UpdateVRAMReadTexture()");
@ -1969,6 +2018,11 @@ void GPU_HW::IncludeVRAMDirtyRectangle(const Common::Rectangle<u32>& rect)
  }
 }

+ALWAYS_INLINE bool GPU_HW::IsFlushed() const
+{
+  return m_batch_current_vertex_ptr == m_batch_start_vertex_ptr;
+}
+
 GPU_HW::InterlacedRenderMode GPU_HW::GetInterlacedRenderMode() const
 {
  if (IsInterlacedDisplayEnabled())
@ -1982,6 +2036,27 @@ GPU_HW::InterlacedRenderMode GPU_HW::GetInterlacedRenderMode() const
  }
 }

+ALWAYS_INLINE bool GPU_HW::NeedsTwoPassRendering() const
+{
+  // We need two-pass rendering when using BG-FG blending and texturing, as the transparency can be enabled
+  // on a per-pixel basis, and the opaque pixels shouldn't be blended at all.
+
+  // TODO: see if there's a better way we can do this. definitely can with fbfetch.
+  return (m_batch.texture_mode != GPUTextureMode::Disabled &&
+          (m_batch.transparency_mode == GPUTransparencyMode::BackgroundMinusForeground ||
+           (!m_supports_dual_source_blend && m_batch.transparency_mode != GPUTransparencyMode::Disabled)));
+}
+
+ALWAYS_INLINE u32 GPU_HW::GetBatchVertexSpace() const
+{
+  return static_cast<u32>(m_batch_end_vertex_ptr - m_batch_current_vertex_ptr);
+}
+
+ALWAYS_INLINE u32 GPU_HW::GetBatchVertexCount() const
+{
+  return static_cast<u32>(m_batch_current_vertex_ptr - m_batch_start_vertex_ptr);
+}
+
 void GPU_HW::EnsureVertexBufferSpace(u32 required_vertices)
 {
  if (m_batch_current_vertex_ptr)
@ -2041,6 +2116,11 @@ void GPU_HW::ResetBatchVertexDepth()
  m_current_depth = 1;
 }

+ALWAYS_INLINE float GPU_HW::GetCurrentNormalizedVertexDepth() const
+{
+  return 1.0f - (static_cast<float>(m_current_depth) / 65535.0f);
+}
+
 void GPU_HW::UpdateSoftwareRenderer(bool copy_vram_from_hw)
 {
  const bool current_enabled = (m_sw_renderer != nullptr);
--- a/src/core/gpu_hw.h
+++ b/src/core/gpu_hw.h
@ -56,6 +56,9 @@ public:
  std::tuple<u32, u32> GetEffectiveDisplayResolution(bool scaled = true) override final;
  std::tuple<u32, u32> GetFullDisplayResolution(bool scaled = true) override final;

+  void ClearDisplay() override;
+  void UpdateDisplay() override;
+
 private:
  enum : u32
  {
@ -78,35 +81,10 @@ private:
    u16 v;
    u32 uv_limits;

-    ALWAYS_INLINE void Set(float x_, float y_, float z_, float w_, u32 color_, u32 texpage_, u16 packed_texcoord,
-                           u32 uv_limits_)
-    {
-      Set(x_, y_, z_, w_, color_, texpage_, packed_texcoord & 0xFF, (packed_texcoord >> 8), uv_limits_);
-    }
-
-    ALWAYS_INLINE void Set(float x_, float y_, float z_, float w_, u32 color_, u32 texpage_, u16 u_, u16 v_,
-                           u32 uv_limits_)
-    {
-      x = x_;
-      y = y_;
-      z = z_;
-      w = w_;
-      color = color_;
-      texpage = texpage_;
-      u = u_;
-      v = v_;
-      uv_limits = uv_limits_;
-    }
-
-    ALWAYS_INLINE static u32 PackUVLimits(u32 min_u, u32 max_u, u32 min_v, u32 max_v)
-    {
-      return min_u | (min_v << 8) | (max_u << 16) | (max_v << 24);
-    }
-
-    ALWAYS_INLINE void SetUVLimits(u32 min_u, u32 max_u, u32 min_v, u32 max_v)
-    {
-      uv_limits = PackUVLimits(min_u, max_u, min_v, max_v);
-    }
+    void Set(float x_, float y_, float z_, float w_, u32 color_, u32 texpage_, u16 packed_texcoord, u32 uv_limits_);
+    void Set(float x_, float y_, float z_, float w_, u32 color_, u32 texpage_, u16 u_, u16 v_, u32 uv_limits_);
+    static u32 PackUVLimits(u32 min_u, u32 max_u, u32 min_v, u32 max_v);
+    void SetUVLimits(u32 min_u, u32 max_u, u32 min_v, u32 max_v);
  };

  struct BatchConfig
@ -120,11 +98,7 @@ private:
    bool use_depth_buffer = false;

    // Returns the render mode for this batch.
-    BatchRenderMode GetRenderMode() const
-    {
-      return transparency_mode == GPUTransparencyMode::Disabled ? BatchRenderMode::TransparencyDisabled :
-                                                                  BatchRenderMode::TransparentAndOpaque;
-    }
+    BatchRenderMode GetRenderMode() const;
  };

  struct BatchUBOData
@ -151,6 +125,16 @@ private:
  bool CompilePipelines();
  void DestroyPipelines();

+  void LoadVertices();
+
+  void AddVertex(const BatchVertex& v);
+
+  template<typename... Args>
+  void AddNewVertex(Args&&... args);
+
+  void PrintSettingsToLog();
+  void CheckSettings();
+
  void UpdateVRAMReadTexture();
  void UpdateDepthBufferFromMaskBit();
  void ClearDepthBuffer();
@ -158,8 +142,6 @@ private:
  void MapBatchVertexPointer(u32 required_vertices);
  void UnmapBatchVertexPointer(u32 used_vertices);
  void DrawBatchVertices(BatchRenderMode render_mode, u32 num_vertices, u32 base_vertex);
-  void ClearDisplay() override;
-  void UpdateDisplay() override;

  u32 CalculateResolutionScale() const;
  GPUDownsampleMode GetDownsampleMode(u32 resolution_scale) const;
@ -171,37 +153,21 @@ private:
  void ClearVRAMDirtyRectangle();
  void IncludeVRAMDirtyRectangle(const Common::Rectangle<u32>& rect);

-  ALWAYS_INLINE bool IsFlushed() const { return m_batch_current_vertex_ptr == m_batch_start_vertex_ptr; }
-  ALWAYS_INLINE u32 GetBatchVertexSpace() const
-  {
-    return static_cast<u32>(m_batch_end_vertex_ptr - m_batch_current_vertex_ptr);
-  }
-  ALWAYS_INLINE u32 GetBatchVertexCount() const
-  {
-    return static_cast<u32>(m_batch_current_vertex_ptr - m_batch_start_vertex_ptr);
-  }
+  bool IsFlushed() const;
+  u32 GetBatchVertexSpace() const;
+  u32 GetBatchVertexCount() const;
  void EnsureVertexBufferSpace(u32 required_vertices);
  void EnsureVertexBufferSpaceForCurrentCommand();
  void ResetBatchVertexDepth();

  /// Returns the value to be written to the depth buffer for the current operation for mask bit emulation.
-  ALWAYS_INLINE float GetCurrentNormalizedVertexDepth() const
-  {
-    return 1.0f - (static_cast<float>(m_current_depth) / 65535.0f);
-  }
+  float GetCurrentNormalizedVertexDepth() const;

  /// Returns the interlaced mode to use when scanning out/displaying.
  InterlacedRenderMode GetInterlacedRenderMode() const;

-  /// We need two-pass rendering when using BG-FG blending and texturing, as the transparency can be enabled
-  /// on a per-pixel basis, and the opaque pixels shouldn't be blended at all.
-  ALWAYS_INLINE bool NeedsTwoPassRendering() const
-  {
-    // TODO: see if there's a better way we can do this. definitely can with fbfetch.
-    return (m_batch.texture_mode != GPUTextureMode::Disabled &&
-            (m_batch.transparency_mode == GPUTransparencyMode::BackgroundMinusForeground ||
-             (!m_supports_dual_source_blend && m_batch.transparency_mode != GPUTransparencyMode::Disabled)));
-  }
+  /// Returns if the draw needs to be broken into opaque/transparent passes.
+  bool NeedsTwoPassRendering() const;

  void FillBackendCommandParameters(GPUBackendCommand* cmd) const;
  void FillDrawCommand(GPUBackendDrawCommand* cmd, GPURenderCommand rc) const;
@ -329,23 +295,4 @@ private:
  // Statistics
  RendererStats m_renderer_stats = {};
  RendererStats m_last_renderer_stats = {};
-
-private:
-  void LoadVertices();
-
-  ALWAYS_INLINE void AddVertex(const BatchVertex& v)
-  {
-    std::memcpy(m_batch_current_vertex_ptr, &v, sizeof(BatchVertex));
-    m_batch_current_vertex_ptr++;
-  }
-
-  template<typename... Args>
-  ALWAYS_INLINE void AddNewVertex(Args&&... args)
-  {
-    m_batch_current_vertex_ptr->Set(std::forward<Args>(args)...);
-    m_batch_current_vertex_ptr++;
-  }
-
-  void PrintSettingsToLog();
-  void CheckSettings();
 };