From 0afdc04d88e24177cb7f604352cdd46ca52672cf Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Fri, 16 Oct 2020 23:30:56 +1000 Subject: [PATCH] CPU/Recompiler: Optimize constant reads (and some writes) --- src/core/bus.cpp | 69 +++++++++++++++++- src/core/cpu_core_private.h | 3 + .../cpu_recompiler_code_generator_aarch64.cpp | 72 ++++++++++++++++++- .../cpu_recompiler_code_generator_x64.cpp | 30 ++++++++ 4 files changed, 169 insertions(+), 5 deletions(-) diff --git a/src/core/bus.cpp b/src/core/bus.cpp index 7b8a70c0a..f238e6b17 100644 --- a/src/core/bus.cpp +++ b/src/core/bus.cpp @@ -22,6 +22,11 @@ Log_SetChannel(Bus); namespace Bus { +enum : TickCount +{ + RAM_READ_TICKS = 4 +}; + union MEMDELAY { u32 bits; @@ -288,7 +293,7 @@ ALWAYS_INLINE static TickCount DoRAMAccess(u32 offset, u32& value) } } - return (type == MemoryAccessType::Read) ? 4 : 0; + return (type == MemoryAccessType::Read) ? RAM_READ_TICKS : 0; } template @@ -753,7 +758,7 @@ ALWAYS_INLINE_RELEASE void DoInstructionRead(PhysicalMemoryAddress address, void { std::memcpy(data, &g_ram[address & RAM_MASK], sizeof(u32) * word_count); if constexpr (add_ticks) - g_state.pending_ticks += (icache_read ? 1 : 4) * word_count; + g_state.pending_ticks += (icache_read ? 1 : RAM_READ_TICKS) * word_count; } else if (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_SIZE)) { @@ -776,7 +781,7 @@ TickCount GetInstructionReadTicks(VirtualMemoryAddress address) if (address < RAM_MIRROR_END) { - return 4; + return RAM_READ_TICKS; } else if (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_SIZE)) { @@ -1307,6 +1312,64 @@ bool SafeWriteMemoryWord(VirtualMemoryAddress addr, u32 value) return DoMemoryAccess(addr, value) >= 0; } +void* GetDirectReadMemoryPointer(VirtualMemoryAddress address, MemoryAccessSize size, TickCount* read_ticks) +{ + using namespace Bus; + + const u32 seg = (address >> 29); + if (seg != 0 && seg != 4 && seg != 5) + return nullptr; + + const PhysicalMemoryAddress paddr = address & PHYSICAL_MEMORY_ADDRESS_MASK; + if (paddr < RAM_MIRROR_END) + { + if (read_ticks) + *read_ticks = RAM_READ_TICKS; + + return &g_ram[paddr & RAM_MASK]; + } + + if ((paddr & DCACHE_LOCATION_MASK) == DCACHE_LOCATION) + { + if (read_ticks) + *read_ticks = 0; + + return &g_state.dcache[paddr & DCACHE_OFFSET_MASK]; + } + + if (paddr >= BIOS_BASE && paddr < (BIOS_BASE + BIOS_SIZE)) + { + if (read_ticks) + *read_ticks = m_bios_access_time[static_cast(size)]; + + return &g_bios[paddr & BIOS_MASK]; + } + + return nullptr; +} + +void* GetDirectWriteMemoryPointer(VirtualMemoryAddress address, MemoryAccessSize size) +{ + using namespace Bus; + + const u32 seg = (address >> 29); + if (seg != 0 && seg != 4 && seg != 5) + return nullptr; + + const PhysicalMemoryAddress paddr = address & PHYSICAL_MEMORY_ADDRESS_MASK; + +#if 0 + // Not enabled until we can protect code regions. + if (paddr < RAM_MIRROR_END) + return &g_ram[paddr & RAM_MASK]; +#endif + + if ((paddr & DCACHE_LOCATION_MASK) == DCACHE_LOCATION) + return &g_state.dcache[paddr & DCACHE_OFFSET_MASK]; + + return nullptr; +} + namespace Recompiler::Thunks { u64 ReadMemoryByte(u32 address) diff --git a/src/core/cpu_core_private.h b/src/core/cpu_core_private.h index 9f74fd7f0..05ee62f5f 100644 --- a/src/core/cpu_core_private.h +++ b/src/core/cpu_core_private.h @@ -1,5 +1,6 @@ #pragma once #include "cpu_core.h" +#include "bus.h" namespace CPU { @@ -72,5 +73,7 @@ bool ReadMemoryWord(VirtualMemoryAddress addr, u32* value); bool WriteMemoryByte(VirtualMemoryAddress addr, u8 value); bool WriteMemoryHalfWord(VirtualMemoryAddress addr, u16 value); bool WriteMemoryWord(VirtualMemoryAddress addr, u32 value); +void* GetDirectReadMemoryPointer(VirtualMemoryAddress address, MemoryAccessSize size, TickCount* read_ticks); +void* GetDirectWriteMemoryPointer(VirtualMemoryAddress address, MemoryAccessSize size); } // namespace CPU \ No newline at end of file diff --git a/src/core/cpu_recompiler_code_generator_aarch64.cpp b/src/core/cpu_recompiler_code_generator_aarch64.cpp index 4a62184ab..7d772c099 100644 --- a/src/core/cpu_recompiler_code_generator_aarch64.cpp +++ b/src/core/cpu_recompiler_code_generator_aarch64.cpp @@ -1283,6 +1283,23 @@ void CodeGenerator::EmitAddCPUStructField(u32 offset, const Value& value) Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const Value& address, RegSize size) { + if (address.IsConstant()) + { + TickCount read_ticks; + void* ptr = GetDirectReadMemoryPointer( + static_cast(address.constant_value), + (size == RegSize_8) ? MemoryAccessSize::Byte : + ((size == RegSize_16) ? MemoryAccessSize::HalfWord : MemoryAccessSize::Word), + &read_ticks); + if (ptr) + { + Value result = m_register_cache.AllocateScratch(size); + EmitLoadGlobal(result.GetHostRegister(), size, ptr); + m_delayed_cycles_add += read_ticks; + return result; + } + } + AddPendingCycles(true); if (g_settings.cpu_recompiler_memory_exceptions) @@ -1405,6 +1422,19 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const Value& address, const Value& value) { + if (address.IsConstant()) + { + void* ptr = GetDirectWriteMemoryPointer( + static_cast(address.constant_value), + (value.size == RegSize_8) ? MemoryAccessSize::Byte : + ((value.size == RegSize_16) ? MemoryAccessSize::HalfWord : MemoryAccessSize::Word)); + if (ptr) + { + EmitStoreGlobal(ptr, value); + return; + } + } + AddPendingCycles(true); if (g_settings.cpu_recompiler_memory_exceptions) @@ -1480,12 +1510,50 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const void CodeGenerator::EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr) { - Panic("Not implemented"); + m_emit->Mov(GetHostReg64(RSCRATCH), reinterpret_cast(ptr)); + switch (size) + { + case RegSize_8: + m_emit->Ldrb(GetHostReg8(host_reg), a64::MemOperand(GetHostReg64(RSCRATCH))); + break; + + case RegSize_16: + m_emit->Ldrh(GetHostReg16(host_reg), a64::MemOperand(GetHostReg64(RSCRATCH))); + break; + + case RegSize_32: + m_emit->Ldr(GetHostReg32(host_reg), a64::MemOperand(GetHostReg64(RSCRATCH))); + break; + + default: + UnreachableCode(); + break; + } } void CodeGenerator::EmitStoreGlobal(void* ptr, const Value& value) { - Panic("Not implemented"); + Value value_in_hr = GetValueInHostRegister(value); + + m_emit->Mov(GetHostReg64(RSCRATCH), reinterpret_cast(ptr)); + switch (value.size) + { + case RegSize_8: + m_emit->Strb(GetHostReg8(value_in_hr), a64::MemOperand(GetHostReg64(RSCRATCH))); + break; + + case RegSize_16: + m_emit->Strh(GetHostReg16(value_in_hr), a64::MemOperand(GetHostReg64(RSCRATCH))); + break; + + case RegSize_32: + m_emit->Str(GetHostReg32(value_in_hr), a64::MemOperand(GetHostReg64(RSCRATCH))); + break; + + default: + UnreachableCode(); + break; + } } void CodeGenerator::EmitFlushInterpreterLoadDelay() diff --git a/src/core/cpu_recompiler_code_generator_x64.cpp b/src/core/cpu_recompiler_code_generator_x64.cpp index fd2f34035..142f86fad 100644 --- a/src/core/cpu_recompiler_code_generator_x64.cpp +++ b/src/core/cpu_recompiler_code_generator_x64.cpp @@ -1738,6 +1738,23 @@ void CodeGenerator::EmitAddCPUStructField(u32 offset, const Value& value) Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const Value& address, RegSize size) { + if (address.IsConstant()) + { + TickCount read_ticks; + void* ptr = GetDirectReadMemoryPointer( + static_cast(address.constant_value), + (size == RegSize_8) ? MemoryAccessSize::Byte : + ((size == RegSize_16) ? MemoryAccessSize::HalfWord : MemoryAccessSize::Word), + &read_ticks); + if (ptr) + { + Value result = m_register_cache.AllocateScratch(size); + EmitLoadGlobal(result.GetHostRegister(), size, ptr); + m_delayed_cycles_add += read_ticks; + return result; + } + } + AddPendingCycles(true); if (g_settings.cpu_recompiler_memory_exceptions) @@ -1858,6 +1875,19 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const Value& address, const Value& value) { + if (address.IsConstant()) + { + void* ptr = GetDirectWriteMemoryPointer( + static_cast(address.constant_value), + (value.size == RegSize_8) ? MemoryAccessSize::Byte : + ((value.size == RegSize_16) ? MemoryAccessSize::HalfWord : MemoryAccessSize::Word)); + if (ptr) + { + EmitStoreGlobal(ptr, value); + return; + } + } + AddPendingCycles(true); if (g_settings.cpu_recompiler_memory_exceptions)