From c617a453facb4ab3c8af6e291c77bedd4d4a19cc Mon Sep 17 00:00:00 2001
From: Ian Curtis <i.curtis@gmail.com>
Date: Mon, 3 Apr 2017 12:00:25 +0000
Subject: [PATCH] Cache dereferenced values. Cuts loading time from 15 to 5
 seconds in debug mode :)

---
 Src/GameLoader.cpp | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/Src/GameLoader.cpp b/Src/GameLoader.cpp
index 8b55c39..6adb36a 100644
--- a/Src/GameLoader.cpp
+++ b/Src/GameLoader.cpp
@@ -663,12 +663,11 @@ bool GameLoader::ComputeRegionSize(uint32_t *region_size, const GameLoader::Regi
 
 // We need to preserve the absolute offsets in order for byte swapping to work
 // properly when chunk size is 1
-static inline void CopyBytes(uint8_t *dest_base, size_t dest_offset, const uint8_t *src_base, size_t src_offset, size_t size, bool byte_swap)
+static inline void CopyBytes(uint8_t *dest_base, uint32_t dest_offset, const uint8_t *src_base, uint32_t src_offset, uint32_t size, uint32_t byte_swap)
 {
-  size_t swap = byte_swap ? 1 : 0;
-  for (size_t i = 0; i < size; i++)
+  for (uint32_t i = 0; i < size; i++)
   {
-    dest_base[(dest_offset + i) ^ swap] = src_base[src_offset + i];
+    dest_base[(dest_offset + i) ^ byte_swap] = src_base[src_offset + i];
   }
 }
 
@@ -692,14 +691,17 @@ bool GameLoader::LoadRegion(ROM *rom, const GameLoader::Region::ptr_t &region, c
       }
       else
       {
-        size_t num_chunks = file_size / region->chunk_size;
-        size_t dest_offset = file->offset;
-        size_t src_offset = 0;
-        for (size_t i = 0; i < num_chunks; i++)
+        uint32_t num_chunks = (uint32_t)file_size / region->chunk_size;
+		uint32_t dest_offset = file->offset;
+		uint32_t src_offset = 0;
+		uint32_t chunk_size = (uint32_t)region->chunk_size;		// cache these as pointer dereferencing cripples performance in a tight loop
+		uint32_t stride = (uint32_t)region->stride;
+		uint32_t byte_swap = region->byte_swap;
+		for (uint32_t i = 0; i < num_chunks; i++)
         {
-          CopyBytes(dest, dest_offset, src, src_offset, region->chunk_size, region->byte_swap);
-          dest_offset += region->stride;
-          src_offset += region->chunk_size;
+          CopyBytes(dest, dest_offset, src, src_offset, chunk_size, byte_swap);
+          dest_offset += stride;
+          src_offset += chunk_size;
         }
       }
     }