From 44142ab4f9055a0b82e6365009162c1619e1b2f7 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sat, 29 Jun 2024 19:53:35 +1000 Subject: [PATCH] CPU/Recompiler: Prefer adrp over trampoline for +/-4GB branches --- .../cpu_recompiler_code_generator_aarch64.cpp | 20 +++++++++++++++++-- src/core/cpu_recompiler_types.h | 1 + 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/src/core/cpu_recompiler_code_generator_aarch64.cpp b/src/core/cpu_recompiler_code_generator_aarch64.cpp index c6a45d33f..6fff92998 100644 --- a/src/core/cpu_recompiler_code_generator_aarch64.cpp +++ b/src/core/cpu_recompiler_code_generator_aarch64.cpp @@ -147,6 +147,20 @@ s64 CPU::Recompiler::armGetPCDisplacement(const void* current, const void* targe return static_cast((reinterpret_cast(target) - reinterpret_cast(current)) >> 2); } +bool CPU::Recompiler::armIsInAdrpRange(vixl::aarch64::Assembler* armAsm, const void* addr) +{ + const void* cur = armAsm->GetCursorAddress(); + const void* current_code_ptr_page = + reinterpret_cast(reinterpret_cast(cur) & ~static_cast(0xFFF)); + const void* ptr_page = + reinterpret_cast(reinterpret_cast(addr) & ~static_cast(0xFFF)); + const s64 page_displacement = armGetPCDisplacement(current_code_ptr_page, ptr_page) >> 10; + const u32 page_offset = static_cast(reinterpret_cast(addr) & 0xFFFu); + + return (vixl::IsInt21(page_displacement) && + (a64::Assembler::IsImmAddSub(page_offset) || a64::Assembler::IsImmLogical(page_offset, 64))); +} + void CPU::Recompiler::armMoveAddressToReg(a64::Assembler* armAsm, const a64::Register& reg, const void* addr) { DebugAssert(reg.IsX()); @@ -178,7 +192,8 @@ void CPU::Recompiler::armEmitJmp(a64::Assembler* armAsm, const void* ptr, bool f const void* cur = armAsm->GetCursorAddress(); s64 displacement = armGetPCDisplacement(cur, ptr); bool use_blr = !vixl::IsInt26(displacement); - if (use_blr && !force_inline) + bool use_trampoline = use_blr && !armIsInAdrpRange(armAsm, ptr); + if (use_blr && use_trampoline && !force_inline) { if (u8* trampoline = armGetJumpTrampoline(ptr); trampoline) { @@ -203,7 +218,8 @@ void CPU::Recompiler::armEmitCall(a64::Assembler* armAsm, const void* ptr, bool const void* cur = armAsm->GetCursorAddress(); s64 displacement = armGetPCDisplacement(cur, ptr); bool use_blr = !vixl::IsInt26(displacement); - if (use_blr && !force_inline) + bool use_trampoline = use_blr && !armIsInAdrpRange(armAsm, ptr); + if (use_blr && use_trampoline && !force_inline) { if (u8* trampoline = armGetJumpTrampoline(ptr); trampoline) { diff --git a/src/core/cpu_recompiler_types.h b/src/core/cpu_recompiler_types.h index c6c8172fd..de71b9f2b 100644 --- a/src/core/cpu_recompiler_types.h +++ b/src/core/cpu_recompiler_types.h @@ -123,6 +123,7 @@ constexpr u32 MAX_FAR_HOST_BYTES_PER_INSTRUCTION = 128; bool armIsCallerSavedRegister(u32 id); s64 armGetPCDisplacement(const void* current, const void* target); +bool armIsInAdrpRange(vixl::aarch64::Assembler* armAsm, const void* addr); void armMoveAddressToReg(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg, const void* addr); void armEmitMov(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& rd, u64 imm); void armEmitJmp(vixl::aarch64::Assembler* armAsm, const void* ptr, bool force_inline);