mirror of
https://github.com/RetroDECK/Duckstation.git
synced 2025-03-06 14:27:44 +00:00
CPU/Recompiler: Simplify fast map addressing
This commit is contained in:
parent
7f88cd5f9f
commit
033d85cd90
|
@ -46,29 +46,141 @@ alignas(Recompiler::CODE_STORAGE_ALIGNMENT) static u8
|
|||
#endif
|
||||
|
||||
static JitCodeBuffer s_code_buffer;
|
||||
static FastMapTable s_fast_map[FAST_MAP_TABLE_COUNT];
|
||||
static std::unique_ptr<CodeBlock::HostCodePointer[]> s_fast_map_pointers;
|
||||
|
||||
std::array<CodeBlock::HostCodePointer, FAST_MAP_TOTAL_SLOT_COUNT> s_fast_map;
|
||||
DispatcherFunction s_asm_dispatcher;
|
||||
SingleBlockDispatcherFunction s_single_block_asm_dispatcher;
|
||||
|
||||
ALWAYS_INLINE static u32 GetFastMapIndex(u32 pc)
|
||||
static FastMapTable DecodeFastMapPointer(u32 slot, FastMapTable ptr)
|
||||
{
|
||||
return ((pc & PHYSICAL_MEMORY_ADDRESS_MASK) >= Bus::BIOS_BASE) ?
|
||||
(FAST_MAP_RAM_SLOT_COUNT + ((pc & Bus::BIOS_MASK) >> 2)) :
|
||||
((pc & Bus::g_ram_mask) >> 2);
|
||||
if constexpr (sizeof(void*) == 8)
|
||||
return reinterpret_cast<FastMapTable>(reinterpret_cast<u8*>(ptr) + (static_cast<u64>(slot) << 17));
|
||||
else
|
||||
return reinterpret_cast<FastMapTable>(reinterpret_cast<u8*>(ptr) + (slot << 16));
|
||||
}
|
||||
|
||||
static FastMapTable EncodeFastMapPointer(u32 slot, FastMapTable ptr)
|
||||
{
|
||||
if constexpr (sizeof(void*) == 8)
|
||||
return reinterpret_cast<FastMapTable>(reinterpret_cast<u8*>(ptr) - (static_cast<u64>(slot) << 17));
|
||||
else
|
||||
return reinterpret_cast<FastMapTable>(reinterpret_cast<u8*>(ptr) - (slot << 16));
|
||||
}
|
||||
|
||||
static CodeBlock::HostCodePointer* OffsetFastMapPointer(FastMapTable fake_ptr, u32 pc)
|
||||
{
|
||||
u8* fake_byte_ptr = reinterpret_cast<u8*>(fake_ptr);
|
||||
if constexpr (sizeof(void*) == 8)
|
||||
return reinterpret_cast<CodeBlock::HostCodePointer*>(fake_byte_ptr + (static_cast<u64>(pc) << 1));
|
||||
else
|
||||
return reinterpret_cast<CodeBlock::HostCodePointer*>(fake_byte_ptr + pc);
|
||||
}
|
||||
|
||||
static void CompileDispatcher();
|
||||
static void FastCompileBlockFunction();
|
||||
static void InvalidCodeFunction();
|
||||
|
||||
static constexpr u32 GetTableCount(u32 start, u32 end)
|
||||
{
|
||||
return ((end >> FAST_MAP_TABLE_SHIFT) - (start >> FAST_MAP_TABLE_SHIFT)) + 1;
|
||||
}
|
||||
|
||||
static void AllocateFastMapTables(u32 start, u32 end, FastMapTable& table_ptr)
|
||||
{
|
||||
const u32 start_slot = start >> FAST_MAP_TABLE_SHIFT;
|
||||
const u32 count = GetTableCount(start, end);
|
||||
for (u32 i = 0; i < count; i++)
|
||||
{
|
||||
const u32 slot = start_slot + i;
|
||||
|
||||
s_fast_map[slot] = EncodeFastMapPointer(slot, table_ptr);
|
||||
table_ptr += FAST_MAP_TABLE_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
static void AllocateFastMap()
|
||||
{
|
||||
static constexpr VirtualMemoryAddress ranges[][2] = {
|
||||
{0x00000000, 0x00800000}, // RAM
|
||||
{0x1F000000, 0x1F800000}, // EXP1
|
||||
{0x1FC00000, 0x1FC80000}, // BIOS
|
||||
|
||||
{0x80000000, 0x80800000}, // RAM
|
||||
{0x9F000000, 0x9F800000}, // EXP1
|
||||
{0x9FC00000, 0x9FC80000}, // BIOS
|
||||
|
||||
{0xA0000000, 0xA0800000}, // RAM
|
||||
{0xBF000000, 0xBF800000}, // EXP1
|
||||
{0xBFC00000, 0xBFC80000} // BIOS
|
||||
};
|
||||
|
||||
u32 num_tables = 1; // unreachable table
|
||||
for (u32 i = 0; i < countof(ranges); i++)
|
||||
num_tables += GetTableCount(ranges[i][0], ranges[i][1]);
|
||||
|
||||
const u32 num_slots = FAST_MAP_TABLE_SIZE * num_tables;
|
||||
if (!s_fast_map_pointers)
|
||||
s_fast_map_pointers = std::make_unique<CodeBlock::HostCodePointer[]>(num_slots);
|
||||
|
||||
FastMapTable table_ptr = s_fast_map_pointers.get();
|
||||
FastMapTable table_ptr_end = table_ptr + num_slots;
|
||||
|
||||
// Fill the first table with invalid/unreachable.
|
||||
for (u32 i = 0; i < FAST_MAP_TABLE_SIZE; i++)
|
||||
table_ptr[i] = InvalidCodeFunction;
|
||||
|
||||
// And the remaining with block compile pointers.
|
||||
for (u32 i = FAST_MAP_TABLE_SIZE; i < num_slots; i++)
|
||||
table_ptr[i] = FastCompileBlockFunction;
|
||||
|
||||
// Mark everything as unreachable to begin with.
|
||||
for (u32 i = 0; i < FAST_MAP_TABLE_COUNT; i++)
|
||||
s_fast_map[i] = EncodeFastMapPointer(i, table_ptr);
|
||||
table_ptr += FAST_MAP_TABLE_SIZE;
|
||||
|
||||
// Allocate ranges.
|
||||
for (u32 i = 0; i < countof(ranges); i++)
|
||||
AllocateFastMapTables(ranges[i][0], ranges[i][1], table_ptr);
|
||||
|
||||
Assert(table_ptr == table_ptr_end);
|
||||
}
|
||||
|
||||
static void ResetFastMap()
|
||||
{
|
||||
s_fast_map.fill(FastCompileBlockFunction);
|
||||
if (!s_fast_map_pointers)
|
||||
return;
|
||||
|
||||
for (u32 i = 0; i < FAST_MAP_TABLE_COUNT; i++)
|
||||
{
|
||||
FastMapTable ptr = DecodeFastMapPointer(i, s_fast_map[i]);
|
||||
if (ptr == s_fast_map_pointers.get())
|
||||
continue;
|
||||
|
||||
for (u32 j = 0; j < FAST_MAP_TABLE_SIZE; j++)
|
||||
ptr[j] = FastCompileBlockFunction;
|
||||
}
|
||||
}
|
||||
|
||||
static void FreeFastMap()
|
||||
{
|
||||
std::memset(s_fast_map, 0, sizeof(s_fast_map));
|
||||
s_fast_map_pointers.reset();
|
||||
}
|
||||
|
||||
static void SetFastMap(u32 pc, CodeBlock::HostCodePointer function)
|
||||
{
|
||||
s_fast_map[GetFastMapIndex(pc)] = function;
|
||||
if (!s_fast_map_pointers)
|
||||
return;
|
||||
|
||||
const u32 slot = pc >> FAST_MAP_TABLE_SHIFT;
|
||||
FastMapTable encoded_ptr = s_fast_map[slot];
|
||||
|
||||
const FastMapTable table_ptr = DecodeFastMapPointer(slot, encoded_ptr);
|
||||
Assert(table_ptr != nullptr && table_ptr != s_fast_map_pointers.get());
|
||||
|
||||
CodeBlock::HostCodePointer* ptr = OffsetFastMapPointer(encoded_ptr, pc);
|
||||
*ptr = function;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -138,11 +250,13 @@ void Initialize()
|
|||
Panic("Failed to initialize code space");
|
||||
}
|
||||
|
||||
AllocateFastMap();
|
||||
|
||||
if (g_settings.IsUsingFastmem() && !InitializeFastmem())
|
||||
Panic("Failed to initialize fastmem");
|
||||
|
||||
ResetFastMap();
|
||||
CompileDispatcher();
|
||||
ResetFastMap();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
@ -169,6 +283,7 @@ void Shutdown()
|
|||
ClearState();
|
||||
#ifdef WITH_RECOMPILER
|
||||
ShutdownFastmem();
|
||||
FreeFastMap();
|
||||
s_code_buffer.Destroy();
|
||||
#endif
|
||||
}
|
||||
|
@ -305,9 +420,9 @@ void CompileDispatcher()
|
|||
s_code_buffer.WriteProtect(true);
|
||||
}
|
||||
|
||||
CodeBlock::HostCodePointer* GetFastMapPointer()
|
||||
FastMapTable* GetFastMapPointer()
|
||||
{
|
||||
return s_fast_map.data();
|
||||
return s_fast_map;
|
||||
}
|
||||
|
||||
void ExecuteRecompiler()
|
||||
|
@ -334,8 +449,7 @@ void ExecuteRecompiler()
|
|||
|
||||
const u32 pc = g_state.regs.pc;
|
||||
g_state.current_instruction_pc = pc;
|
||||
const u32 fast_map_index = GetFastMapIndex(pc);
|
||||
s_single_block_asm_dispatcher(s_fast_map[fast_map_index]);
|
||||
s_single_block_asm_dispatcher(s_fast_map[pc >> 16][pc >> 2]);
|
||||
}
|
||||
|
||||
TimingEvents::RunEvents();
|
||||
|
@ -503,7 +617,7 @@ recompile:
|
|||
if (block->recompile_count >= RECOMPILE_COUNT_TO_FALL_BACK_TO_INTERPRETER)
|
||||
{
|
||||
Log_PerfPrintf("Block 0x%08X has been recompiled %u times in %u frames, falling back to interpreter",
|
||||
block->GetPC(), block->recompile_count, frame_diff);
|
||||
block->GetPC(), block->recompile_count, frame_diff);
|
||||
|
||||
FallbackExistingBlockToInterpreter(block);
|
||||
return false;
|
||||
|
@ -683,11 +797,36 @@ void FastCompileBlockFunction()
|
|||
{
|
||||
CodeBlock* block = LookupBlock(GetNextBlockKey());
|
||||
if (block)
|
||||
{
|
||||
s_single_block_asm_dispatcher(block->host_code);
|
||||
}
|
||||
else if (g_settings.gpu_pgxp_enable)
|
||||
InterpretUncachedBlock<PGXPMode::Memory>();
|
||||
{
|
||||
if (g_settings.gpu_pgxp_cpu)
|
||||
InterpretUncachedBlock<PGXPMode::CPU>();
|
||||
else
|
||||
InterpretUncachedBlock<PGXPMode::Memory>();
|
||||
}
|
||||
else
|
||||
{
|
||||
InterpretUncachedBlock<PGXPMode::Disabled>();
|
||||
}
|
||||
}
|
||||
|
||||
void InvalidCodeFunction()
|
||||
{
|
||||
Log_ErrorPrintf("Trying to execute invalid code at 0x%08X", g_state.regs.pc);
|
||||
if (g_settings.gpu_pgxp_enable)
|
||||
{
|
||||
if (g_settings.gpu_pgxp_cpu)
|
||||
InterpretUncachedBlock<PGXPMode::CPU>();
|
||||
else
|
||||
InterpretUncachedBlock<PGXPMode::Memory>();
|
||||
}
|
||||
else
|
||||
{
|
||||
InterpretUncachedBlock<PGXPMode::Disabled>();
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -16,13 +16,6 @@
|
|||
|
||||
namespace CPU {
|
||||
|
||||
enum : u32
|
||||
{
|
||||
FAST_MAP_RAM_SLOT_COUNT = Bus::RAM_8MB_SIZE / 4,
|
||||
FAST_MAP_BIOS_SLOT_COUNT = Bus::BIOS_SIZE / 4,
|
||||
FAST_MAP_TOTAL_SLOT_COUNT = FAST_MAP_RAM_SLOT_COUNT + FAST_MAP_BIOS_SLOT_COUNT,
|
||||
};
|
||||
|
||||
union CodeBlockKey
|
||||
{
|
||||
u32 bits;
|
||||
|
@ -107,6 +100,15 @@ struct CodeBlock
|
|||
|
||||
namespace CodeCache {
|
||||
|
||||
enum : u32
|
||||
{
|
||||
FAST_MAP_TABLE_COUNT = 0x10000,
|
||||
FAST_MAP_TABLE_SIZE = 0x10000 / 4, // 16384
|
||||
FAST_MAP_TABLE_SHIFT = 16,
|
||||
};
|
||||
|
||||
using FastMapTable = CodeBlock::HostCodePointer*;
|
||||
|
||||
void Initialize();
|
||||
void Shutdown();
|
||||
void Execute();
|
||||
|
@ -115,7 +117,7 @@ void Execute();
|
|||
using DispatcherFunction = void (*)();
|
||||
using SingleBlockDispatcherFunction = void(*)(const CodeBlock::HostCodePointer);
|
||||
|
||||
CodeBlock::HostCodePointer* GetFastMapPointer();
|
||||
FastMapTable* GetFastMapPointer();
|
||||
void ExecuteRecompiler();
|
||||
#endif
|
||||
|
||||
|
|
|
@ -2028,29 +2028,18 @@ CodeCache::DispatcherFunction CodeGenerator::CompileDispatcher()
|
|||
|
||||
// time to lookup the block
|
||||
// r0 <- pc
|
||||
m_emit->Mov(a32::r3, Bus::BIOS_BASE);
|
||||
m_emit->ldr(a32::r0, a32::MemOperand(GetHostReg32(RCPUPTR), offsetof(State, regs.pc)));
|
||||
|
||||
// current_instruction_pc <- pc (eax)
|
||||
// r1 <- s_fast_map[pc >> 16]
|
||||
EmitLoadGlobalAddress(2, CodeCache::GetFastMapPointer());
|
||||
m_emit->lsr(a32::r1, a32::r0, 16);
|
||||
m_emit->ldr(a32::r1, a32::MemOperand(a32::r2, a32::r1, a32::LSL, 2));
|
||||
|
||||
// current_instruction_pc <- pc (r0)
|
||||
m_emit->str(a32::r0, a32::MemOperand(GetHostReg32(RCPUPTR), offsetof(State, current_instruction_pc)));
|
||||
|
||||
// r1 <- (pc & RAM_MASK) >> 2
|
||||
m_emit->and_(a32::r1, a32::r0, Bus::g_ram_mask);
|
||||
m_emit->lsr(a32::r1, a32::r1, 2);
|
||||
|
||||
// r2 <- ((pc & BIOS_MASK) >> 2) + FAST_MAP_RAM_SLOT_COUNT
|
||||
m_emit->and_(a32::r2, a32::r0, Bus::BIOS_MASK);
|
||||
m_emit->lsr(a32::r2, a32::r2, 2);
|
||||
m_emit->add(a32::r2, a32::r2, FAST_MAP_RAM_SLOT_COUNT);
|
||||
|
||||
// if ((r0 (pc) & PHYSICAL_MEMORY_ADDRESS_MASK) >= BIOS_BASE) { use r2 as index }
|
||||
m_emit->and_(a32::r0, a32::r0, PHYSICAL_MEMORY_ADDRESS_MASK);
|
||||
m_emit->cmp(a32::r0, a32::r3);
|
||||
m_emit->mov(a32::ge, a32::r1, a32::r2);
|
||||
|
||||
// ebx contains our index, rax <- fast_map[ebx * 8], rax(), continue
|
||||
EmitLoadGlobalAddress(0, CodeCache::GetFastMapPointer());
|
||||
m_emit->ldr(a32::r0, a32::MemOperand(a32::r0, a32::r1, a32::LSL, 2));
|
||||
// blr(r1[pc]) (fast_map[pc >> 2])
|
||||
m_emit->ldr(a32::r0, a32::MemOperand(a32::r1, a32::r0));
|
||||
m_emit->blx(a32::r0);
|
||||
|
||||
// end while
|
||||
|
|
|
@ -2239,29 +2239,18 @@ CodeCache::DispatcherFunction CodeGenerator::CompileDispatcher()
|
|||
|
||||
// time to lookup the block
|
||||
// w8 <- pc
|
||||
m_emit->Mov(a64::w11, Bus::BIOS_BASE);
|
||||
m_emit->ldr(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, regs.pc)));
|
||||
|
||||
// current_instruction_pc <- pc (eax)
|
||||
// x9 <- s_fast_map[pc >> 16]
|
||||
EmitLoadGlobalAddress(10, CodeCache::GetFastMapPointer());
|
||||
m_emit->lsr(a64::w9, a64::w8, 16);
|
||||
m_emit->ldr(a64::x9, a64::MemOperand(a64::x10, a64::x9, a64::LSL, 3));
|
||||
|
||||
// current_instruction_pc <- pc (w8)
|
||||
m_emit->str(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, current_instruction_pc)));
|
||||
|
||||
// w9 <- (pc & RAM_MASK) >> 2
|
||||
m_emit->and_(a64::w9, a64::w8, Bus::g_ram_mask);
|
||||
m_emit->lsr(a64::w9, a64::w9, 2);
|
||||
|
||||
// w10 <- ((pc & BIOS_MASK) >> 2) + FAST_MAP_RAM_SLOT_COUNT
|
||||
m_emit->and_(a64::w10, a64::w8, Bus::BIOS_MASK);
|
||||
m_emit->lsr(a64::w10, a64::w10, 2);
|
||||
m_emit->add(a64::w10, a64::w10, FAST_MAP_RAM_SLOT_COUNT);
|
||||
|
||||
// if ((w8 (pc) & PHYSICAL_MEMORY_ADDRESS_MASK) >= BIOS_BASE) { use w10 as index }
|
||||
m_emit->and_(a64::w8, a64::w8, PHYSICAL_MEMORY_ADDRESS_MASK);
|
||||
m_emit->cmp(a64::w8, a64::w11);
|
||||
m_emit->csel(a64::w8, a64::w9, a64::w10, a64::lt);
|
||||
|
||||
// ebx contains our index, rax <- fast_map[ebx * 8], rax(), continue
|
||||
EmitLoadGlobalAddress(9, CodeCache::GetFastMapPointer());
|
||||
m_emit->ldr(a64::x8, a64::MemOperand(a64::x9, a64::x8, a64::LSL, 3));
|
||||
// blr(x9[pc * 2]) (fast_map[pc >> 2])
|
||||
m_emit->ldr(a64::x8, a64::MemOperand(a64::x9, a64::x8, a64::LSL, 2));
|
||||
m_emit->blr(a64::x8);
|
||||
|
||||
// end while
|
||||
|
|
|
@ -2996,29 +2996,18 @@ CodeCache::DispatcherFunction CodeGenerator::CompileDispatcher()
|
|||
// eax <- pc
|
||||
m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, regs.pc)]);
|
||||
|
||||
// ebx <- (pc & RAM_MASK) >> 2
|
||||
m_emit->mov(m_emit->ebx, m_emit->eax);
|
||||
m_emit->and_(m_emit->ebx, Bus::g_ram_mask);
|
||||
m_emit->shr(m_emit->ebx, 2);
|
||||
|
||||
// ecx <- ((pc & BIOS_MASK) >> 2) + FAST_MAP_RAM_SLOT_COUNT
|
||||
m_emit->mov(m_emit->ecx, m_emit->eax);
|
||||
m_emit->and_(m_emit->ecx, Bus::BIOS_MASK);
|
||||
m_emit->shr(m_emit->ecx, 2);
|
||||
m_emit->add(m_emit->ecx, FAST_MAP_RAM_SLOT_COUNT);
|
||||
|
||||
// current_instruction_pc <- pc (eax)
|
||||
m_emit->mov(m_emit->dword[m_emit->rbp + offsetof(State, current_instruction_pc)], m_emit->eax);
|
||||
|
||||
// if ((eax (pc) & PHYSICAL_MEMORY_ADDRESS_MASK) >= BIOS_BASE) { use ecx as index }
|
||||
m_emit->and_(m_emit->eax, PHYSICAL_MEMORY_ADDRESS_MASK);
|
||||
m_emit->cmp(m_emit->eax, Bus::BIOS_BASE);
|
||||
m_emit->cmovge(m_emit->ebx, m_emit->ecx);
|
||||
// rcx <- s_fast_map[pc >> 16]
|
||||
EmitLoadGlobalAddress(Xbyak::Operand::RBX, CodeCache::GetFastMapPointer());
|
||||
m_emit->mov(m_emit->ecx, m_emit->eax);
|
||||
m_emit->shr(m_emit->ecx, 16);
|
||||
m_emit->mov(m_emit->rcx, m_emit->qword[m_emit->rbx + m_emit->rcx * 8]);
|
||||
|
||||
// call(rcx[pc * 2]) (fast_map[pc >> 2])
|
||||
m_emit->call(m_emit->qword[m_emit->rcx + m_emit->rax * 2]);
|
||||
|
||||
// ebx contains our index, rax <- fast_map[ebx * 8], rax(), continue
|
||||
EmitLoadGlobalAddress(Xbyak::Operand::RAX, CodeCache::GetFastMapPointer());
|
||||
m_emit->mov(m_emit->rax, m_emit->qword[m_emit->rax + m_emit->rbx * 8]);
|
||||
m_emit->call(m_emit->rax);
|
||||
m_emit->jmp(main_loop);
|
||||
|
||||
// end while
|
||||
|
|
Loading…
Reference in a new issue