From 77488db3dca9ae12c32fcc8b3bfd02969700fafe Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sat, 6 Jul 2024 18:49:35 +1000 Subject: [PATCH] Misc: Align CPU state and SPU voices to cache lines Fixes the ~12% performance regression from the texture replacement namespace'ify commit. Apparently LTO was placing the CPU struct in the middle of a cache line... --- src/common/types.h | 1 + src/core/cpu_core.cpp | 2 +- src/core/cpu_core.h | 2 +- src/core/spu.cpp | 2 +- 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/common/types.h b/src/common/types.h index 849d51d72..2bc435636 100644 --- a/src/common/types.h +++ b/src/common/types.h @@ -199,6 +199,7 @@ static constexpr u32 HOST_CACHE_LINE_SIZE = 128; // Apple Silicon uses 128b cach #else static constexpr u32 HOST_CACHE_LINE_SIZE = 64; // Everything else is 64b. #endif +#define ALIGN_TO_CACHE_LINE alignas(HOST_CACHE_LINE_SIZE) // Enum class bitwise operators #define IMPLEMENT_ENUM_CLASS_BITWISE_OPERATORS(type_) \ diff --git a/src/core/cpu_core.cpp b/src/core/cpu_core.cpp index e97d03fd8..96b2ddc81 100644 --- a/src/core/cpu_core.cpp +++ b/src/core/cpu_core.cpp @@ -89,7 +89,7 @@ static bool WriteMemoryByte(VirtualMemoryAddress addr, u32 value); static bool WriteMemoryHalfWord(VirtualMemoryAddress addr, u32 value); static bool WriteMemoryWord(VirtualMemoryAddress addr, u32 value); -State g_state; +alignas(HOST_CACHE_LINE_SIZE) State g_state; bool TRACE_EXECUTION = false; static fastjmp_buf s_jmp_buf; diff --git a/src/core/cpu_core.h b/src/core/cpu_core.h index b63b5e6aa..9527b4390 100644 --- a/src/core/cpu_core.h +++ b/src/core/cpu_core.h @@ -121,7 +121,7 @@ struct State static constexpr u32 GTERegisterOffset(u32 index) { return OFFSETOF(State, gte_regs.r32) + (sizeof(u32) * index); } }; -extern State g_state; +ALIGN_TO_CACHE_LINE extern State g_state; void Initialize(); void Shutdown(); diff --git a/src/core/spu.cpp b/src/core/spu.cpp index bc6f06366..d3cbf04eb 100644 --- a/src/core/spu.cpp +++ b/src/core/spu.cpp @@ -401,7 +401,7 @@ static std::array, 2> s_reverb_downsample_buffer; static std::array, 2> s_reverb_upsample_buffer; static s32 s_reverb_resample_buffer_position = 0; -static std::array s_voices{}; +ALIGN_TO_CACHE_LINE static std::array s_voices{}; static InlineFIFOQueue s_transfer_fifo;