From d1e841f55db0a2f25341045196c7830128c9f144 Mon Sep 17 00:00:00 2001
From: Connor McLaughlin <stenzek@gmail.com>
Date: Sun, 26 Apr 2020 18:33:35 +1000
Subject: [PATCH] GTE: Use intrinsic CountLeadingZeros()

Super tiny micro-optimization.
---
 src/core/gte.cpp | 40 +++++++---------------------------------
 1 file changed, 7 insertions(+), 33 deletions(-)

diff --git a/src/core/gte.cpp b/src/core/gte.cpp
index 2513d1b10..454623177 100644
--- a/src/core/gte.cpp
+++ b/src/core/gte.cpp
@@ -1,41 +1,15 @@
 #include "gte.h"
+#include "common/bitutils.h"
 #include <algorithm>
 #include <array>
 
-// TODO: Optimize, intrinsics?
-static inline constexpr u32 CountLeadingZeros(u16 value)
+ALWAYS_INLINE u32 CountLeadingBits(u32 value)
 {
-  u32 count = 0;
-  for (u32 i = 0; i < 16 && (value & UINT16_C(0x8000)) == 0; i++)
-  {
-    count++;
-    value <<= 1;
-  }
+  // if top-most bit is set, we want to count ones not zeros
+  if (value & UINT32_C(0x80000000))
+    value ^= UINT32_C(0xFFFFFFFF);
 
-  return count;
-}
-
-static inline constexpr u32 CountLeadingBits(u32 value)
-{
-  u32 count = 0;
-  if ((value & UINT32_C(0x80000000)) != 0)
-  {
-    for (u32 i = 0; i < 32 && ((value & UINT32_C(0x80000000)) != 0); i++)
-    {
-      count++;
-      value <<= 1;
-    }
-  }
-  else
-  {
-    for (u32 i = 0; i < 32 && (value & UINT32_C(0x80000000)) == 0; i++)
-    {
-      count++;
-      value <<= 1;
-    }
-  }
-
-  return count;
+  return (value == 0u) ? 32 : CountLeadingZeros(value);
 }
 
 namespace GTE {
@@ -365,7 +339,7 @@ u32 Core::UNRDivide(u32 lhs, u32 rhs)
     return 0x1FFFF;
   }
 
-  const u32 shift = CountLeadingZeros(static_cast<u16>(rhs));
+  const u32 shift = (rhs == 0) ? 16 : CountLeadingZeros(static_cast<u16>(rhs));
   lhs <<= shift;
   rhs <<= shift;