CPU/Recompiler: Optimize constant reads (and some writes)

This commit is contained in:
Connor McLaughlin 2020-10-16 23:30:56 +10:00
parent 69b2c3fc2d
commit 0afdc04d88
4 changed files with 169 additions and 5 deletions

View file

@ -22,6 +22,11 @@ Log_SetChannel(Bus);
namespace Bus {
enum : TickCount
{
RAM_READ_TICKS = 4
};
union MEMDELAY
{
u32 bits;
@ -288,7 +293,7 @@ ALWAYS_INLINE static TickCount DoRAMAccess(u32 offset, u32& value)
}
}
return (type == MemoryAccessType::Read) ? 4 : 0;
return (type == MemoryAccessType::Read) ? RAM_READ_TICKS : 0;
}
template<MemoryAccessType type, MemoryAccessSize size>
@ -753,7 +758,7 @@ ALWAYS_INLINE_RELEASE void DoInstructionRead(PhysicalMemoryAddress address, void
{
std::memcpy(data, &g_ram[address & RAM_MASK], sizeof(u32) * word_count);
if constexpr (add_ticks)
g_state.pending_ticks += (icache_read ? 1 : 4) * word_count;
g_state.pending_ticks += (icache_read ? 1 : RAM_READ_TICKS) * word_count;
}
else if (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_SIZE))
{
@ -776,7 +781,7 @@ TickCount GetInstructionReadTicks(VirtualMemoryAddress address)
if (address < RAM_MIRROR_END)
{
return 4;
return RAM_READ_TICKS;
}
else if (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_SIZE))
{
@ -1307,6 +1312,64 @@ bool SafeWriteMemoryWord(VirtualMemoryAddress addr, u32 value)
return DoMemoryAccess<MemoryAccessType::Write, MemoryAccessSize::Word>(addr, value) >= 0;
}
void* GetDirectReadMemoryPointer(VirtualMemoryAddress address, MemoryAccessSize size, TickCount* read_ticks)
{
using namespace Bus;
const u32 seg = (address >> 29);
if (seg != 0 && seg != 4 && seg != 5)
return nullptr;
const PhysicalMemoryAddress paddr = address & PHYSICAL_MEMORY_ADDRESS_MASK;
if (paddr < RAM_MIRROR_END)
{
if (read_ticks)
*read_ticks = RAM_READ_TICKS;
return &g_ram[paddr & RAM_MASK];
}
if ((paddr & DCACHE_LOCATION_MASK) == DCACHE_LOCATION)
{
if (read_ticks)
*read_ticks = 0;
return &g_state.dcache[paddr & DCACHE_OFFSET_MASK];
}
if (paddr >= BIOS_BASE && paddr < (BIOS_BASE + BIOS_SIZE))
{
if (read_ticks)
*read_ticks = m_bios_access_time[static_cast<u32>(size)];
return &g_bios[paddr & BIOS_MASK];
}
return nullptr;
}
void* GetDirectWriteMemoryPointer(VirtualMemoryAddress address, MemoryAccessSize size)
{
using namespace Bus;
const u32 seg = (address >> 29);
if (seg != 0 && seg != 4 && seg != 5)
return nullptr;
const PhysicalMemoryAddress paddr = address & PHYSICAL_MEMORY_ADDRESS_MASK;
#if 0
// Not enabled until we can protect code regions.
if (paddr < RAM_MIRROR_END)
return &g_ram[paddr & RAM_MASK];
#endif
if ((paddr & DCACHE_LOCATION_MASK) == DCACHE_LOCATION)
return &g_state.dcache[paddr & DCACHE_OFFSET_MASK];
return nullptr;
}
namespace Recompiler::Thunks {
u64 ReadMemoryByte(u32 address)

View file

@ -1,5 +1,6 @@
#pragma once
#include "cpu_core.h"
#include "bus.h"
namespace CPU {
@ -72,5 +73,7 @@ bool ReadMemoryWord(VirtualMemoryAddress addr, u32* value);
bool WriteMemoryByte(VirtualMemoryAddress addr, u8 value);
bool WriteMemoryHalfWord(VirtualMemoryAddress addr, u16 value);
bool WriteMemoryWord(VirtualMemoryAddress addr, u32 value);
void* GetDirectReadMemoryPointer(VirtualMemoryAddress address, MemoryAccessSize size, TickCount* read_ticks);
void* GetDirectWriteMemoryPointer(VirtualMemoryAddress address, MemoryAccessSize size);
} // namespace CPU

View file

@ -1283,6 +1283,23 @@ void CodeGenerator::EmitAddCPUStructField(u32 offset, const Value& value)
Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const Value& address, RegSize size)
{
if (address.IsConstant())
{
TickCount read_ticks;
void* ptr = GetDirectReadMemoryPointer(
static_cast<u32>(address.constant_value),
(size == RegSize_8) ? MemoryAccessSize::Byte :
((size == RegSize_16) ? MemoryAccessSize::HalfWord : MemoryAccessSize::Word),
&read_ticks);
if (ptr)
{
Value result = m_register_cache.AllocateScratch(size);
EmitLoadGlobal(result.GetHostRegister(), size, ptr);
m_delayed_cycles_add += read_ticks;
return result;
}
}
AddPendingCycles(true);
if (g_settings.cpu_recompiler_memory_exceptions)
@ -1405,6 +1422,19 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const Value& address, const Value& value)
{
if (address.IsConstant())
{
void* ptr = GetDirectWriteMemoryPointer(
static_cast<u32>(address.constant_value),
(value.size == RegSize_8) ? MemoryAccessSize::Byte :
((value.size == RegSize_16) ? MemoryAccessSize::HalfWord : MemoryAccessSize::Word));
if (ptr)
{
EmitStoreGlobal(ptr, value);
return;
}
}
AddPendingCycles(true);
if (g_settings.cpu_recompiler_memory_exceptions)
@ -1480,12 +1510,50 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const
void CodeGenerator::EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr)
{
Panic("Not implemented");
m_emit->Mov(GetHostReg64(RSCRATCH), reinterpret_cast<uintptr_t>(ptr));
switch (size)
{
case RegSize_8:
m_emit->Ldrb(GetHostReg8(host_reg), a64::MemOperand(GetHostReg64(RSCRATCH)));
break;
case RegSize_16:
m_emit->Ldrh(GetHostReg16(host_reg), a64::MemOperand(GetHostReg64(RSCRATCH)));
break;
case RegSize_32:
m_emit->Ldr(GetHostReg32(host_reg), a64::MemOperand(GetHostReg64(RSCRATCH)));
break;
default:
UnreachableCode();
break;
}
}
void CodeGenerator::EmitStoreGlobal(void* ptr, const Value& value)
{
Panic("Not implemented");
Value value_in_hr = GetValueInHostRegister(value);
m_emit->Mov(GetHostReg64(RSCRATCH), reinterpret_cast<uintptr_t>(ptr));
switch (value.size)
{
case RegSize_8:
m_emit->Strb(GetHostReg8(value_in_hr), a64::MemOperand(GetHostReg64(RSCRATCH)));
break;
case RegSize_16:
m_emit->Strh(GetHostReg16(value_in_hr), a64::MemOperand(GetHostReg64(RSCRATCH)));
break;
case RegSize_32:
m_emit->Str(GetHostReg32(value_in_hr), a64::MemOperand(GetHostReg64(RSCRATCH)));
break;
default:
UnreachableCode();
break;
}
}
void CodeGenerator::EmitFlushInterpreterLoadDelay()

View file

@ -1738,6 +1738,23 @@ void CodeGenerator::EmitAddCPUStructField(u32 offset, const Value& value)
Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const Value& address, RegSize size)
{
if (address.IsConstant())
{
TickCount read_ticks;
void* ptr = GetDirectReadMemoryPointer(
static_cast<u32>(address.constant_value),
(size == RegSize_8) ? MemoryAccessSize::Byte :
((size == RegSize_16) ? MemoryAccessSize::HalfWord : MemoryAccessSize::Word),
&read_ticks);
if (ptr)
{
Value result = m_register_cache.AllocateScratch(size);
EmitLoadGlobal(result.GetHostRegister(), size, ptr);
m_delayed_cycles_add += read_ticks;
return result;
}
}
AddPendingCycles(true);
if (g_settings.cpu_recompiler_memory_exceptions)
@ -1858,6 +1875,19 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const Value& address, const Value& value)
{
if (address.IsConstant())
{
void* ptr = GetDirectWriteMemoryPointer(
static_cast<u32>(address.constant_value),
(value.size == RegSize_8) ? MemoryAccessSize::Byte :
((value.size == RegSize_16) ? MemoryAccessSize::HalfWord : MemoryAccessSize::Word));
if (ptr)
{
EmitStoreGlobal(ptr, value);
return;
}
}
AddPendingCycles(true);
if (g_settings.cpu_recompiler_memory_exceptions)