From d65cbe42aeca055cdd5ac5313f1d92d371c57111 Mon Sep 17 00:00:00 2001
From: Connor McLaughlin <stenzek@gmail.com>
Date: Tue, 20 Dec 2022 21:19:05 +1000
Subject: [PATCH] GPU: Truncate instead of rounding 24bpp->16bpp

Thanks to axetion for writing a test to confirm this is what the console
GPU indeed does.
---
 src/core/gpu_sw.cpp  | 13 +++++++++----
 src/core/gpu_types.h | 26 +++++++++++---------------
 2 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/src/core/gpu_sw.cpp b/src/core/gpu_sw.cpp
index cca586adb..fc3ef598a 100644
--- a/src/core/gpu_sw.cpp
+++ b/src/core/gpu_sw.cpp
@@ -142,16 +142,21 @@ ALWAYS_INLINE u16 VRAM16ToOutput<GPUTexture::Format::RGB565, u16>(u16 value)
 template<>
 ALWAYS_INLINE u32 VRAM16ToOutput<GPUTexture::Format::RGBA8, u32>(u16 value)
 {
-  return VRAMRGBA5551ToRGBA8888(value);
+  const u32 value32 = ZeroExtend32(value);
+  const u32 r = (value32 & 31u) << 3;
+  const u32 g = ((value32 >> 5) & 31u) << 3;
+  const u32 b = ((value32 >> 10) & 31u) << 3;
+  const u32 a = ((value >> 15) != 0) ? 255 : 0;
+  return ZeroExtend32(r) | (ZeroExtend32(g) << 8) | (ZeroExtend32(b) << 16) | (ZeroExtend32(a) << 24);
 }
 
 template<>
 ALWAYS_INLINE u32 VRAM16ToOutput<GPUTexture::Format::BGRA8, u32>(u16 value)
 {
   const u32 value32 = ZeroExtend32(value);
-  const u32 r = VRAMConvert5To8(value32 & 31u);
-  const u32 g = VRAMConvert5To8((value32 >> 5) & 31u);
-  const u32 b = VRAMConvert5To8((value32 >> 10) & 31u);
+  const u32 r = (value32 & 31u) << 3;
+  const u32 g = ((value32 >> 5) & 31u) << 3;
+  const u32 b = ((value32 >> 10) & 31u) << 3;
   return ZeroExtend32(b) | (ZeroExtend32(g) << 8) | (ZeroExtend32(r) << 16) | (0xFF000000u);
 }
 
diff --git a/src/core/gpu_types.h b/src/core/gpu_types.h
index 4b1f72c28..75bba6e5d 100644
--- a/src/core/gpu_types.h
+++ b/src/core/gpu_types.h
@@ -117,29 +117,25 @@ union GPURenderCommand
   }
 };
 
-// Helper/format conversion functions - constants from https://stackoverflow.com/a/9069480
-ALWAYS_INLINE static constexpr u32 VRAMConvert5To8(u32 color)
-{
-  return (((color * 527u) + 23u) >> 6);
-}
-ALWAYS_INLINE static constexpr u32 VRAMConvert8To5(u32 color)
-{
-  return (((color * 249u) + 1014u) >> 11);
-}
 ALWAYS_INLINE static constexpr u32 VRAMRGBA5551ToRGBA8888(u32 color)
 {
-  const u32 r = VRAMConvert5To8(color & 31u);
-  const u32 g = VRAMConvert5To8((color >> 5) & 31u);
-  const u32 b = VRAMConvert5To8((color >> 10) & 31u);
+  // Helper/format conversion functions - constants from https://stackoverflow.com/a/9069480
+#define E5TO8(color) ((((color) * 527u) + 23u) >> 6)
+
+  const u32 r = E5TO8(color & 31u);
+  const u32 g = E5TO8((color >> 5) & 31u);
+  const u32 b = E5TO8((color >> 10) & 31u);
   const u32 a = ((color >> 15) != 0) ? 255 : 0;
   return ZeroExtend32(r) | (ZeroExtend32(g) << 8) | (ZeroExtend32(b) << 16) | (ZeroExtend32(a) << 24);
+
+#undef E5TO8
 }
 
 ALWAYS_INLINE static constexpr u16 VRAMRGBA8888ToRGBA5551(u32 color)
 {
-  const u32 r = VRAMConvert8To5(color & 0xFFu);
-  const u32 g = VRAMConvert8To5((color >> 8) & 0xFFu);
-  const u32 b = VRAMConvert8To5((color >> 16) & 0xFFu);
+  const u32 r = (color & 0xFFu) >> 3;
+  const u32 g = ((color >> 8) & 0xFFu) >> 3;
+  const u32 b = ((color >> 16) & 0xFFu) >> 3;
   const u32 a = ((color >> 24) & 0x01u);
   return Truncate16(r | (g << 5) | (b << 10) | (a << 15));
 }