CPU/Recompiler: Faster ASM dispatcher

This commit is contained in:
Connor McLaughlin 2020-10-18 14:43:09 +10:00
parent cb351a7dbd
commit a6f8dde790
12 changed files with 513 additions and 137 deletions

View file

@ -1,6 +1,5 @@
#pragma once
#include "common/bitfield.h"
#include "cpu_code_cache.h"
#include "types.h"
#include <array>
#include <bitset>
@ -97,16 +96,4 @@ ALWAYS_INLINE TickCount GetDMARAMTickCount(u32 word_count)
return static_cast<TickCount>(word_count + ((word_count + 15) / 16));
}
/// Invalidates any code pages which overlap the specified range.
ALWAYS_INLINE void InvalidateCodePages(PhysicalMemoryAddress address, u32 word_count)
{
const u32 start_page = address / CPU_CODE_CACHE_PAGE_SIZE;
const u32 end_page = (address + word_count * sizeof(u32)) / CPU_CODE_CACHE_PAGE_SIZE;
for (u32 page = start_page; page <= end_page; page++)
{
if (m_ram_code_bits[page])
CPU::CodeCache::InvalidateBlocksWithPageIndex(page);
}
}
} // namespace Bus

View file

@ -35,14 +35,9 @@ alignas(Recompiler::CODE_STORAGE_ALIGNMENT) static u8
static JitCodeBuffer s_code_buffer;
enum : u32
{
FAST_MAP_RAM_SLOT_COUNT = Bus::RAM_SIZE / 4,
FAST_MAP_BIOS_SLOT_COUNT = Bus::BIOS_SIZE / 4,
FAST_MAP_TOTAL_SLOT_COUNT = FAST_MAP_RAM_SLOT_COUNT + FAST_MAP_BIOS_SLOT_COUNT,
};
std::array<CodeBlock::HostCodePointer, FAST_MAP_TOTAL_SLOT_COUNT> s_fast_map;
DispatcherFunction s_asm_dispatcher;
SingleBlockDispatcherFunction s_single_block_asm_dispatcher;
ALWAYS_INLINE static u32 GetFastMapIndex(u32 pc)
{
@ -51,6 +46,7 @@ ALWAYS_INLINE static u32 GetFastMapIndex(u32 pc)
((pc & Bus::RAM_MASK) >> 2);
}
static void CompileDispatcher();
static void FastCompileBlockFunction();
static void ResetFastMap()
@ -111,6 +107,7 @@ void Initialize(bool use_recompiler)
}
ResetFastMap();
CompileDispatcher();
#else
s_use_recompiler = false;
#endif
@ -238,9 +235,27 @@ void Execute()
#ifdef WITH_RECOMPILER
void CompileDispatcher()
{
{
Recompiler::CodeGenerator cg(&s_code_buffer);
s_asm_dispatcher = cg.CompileDispatcher();
}
{
Recompiler::CodeGenerator cg(&s_code_buffer);
s_single_block_asm_dispatcher = cg.CompileSingleBlockDispatcher();
}
}
CodeBlock::HostCodePointer* GetFastMapPointer()
{
return s_fast_map.data();
}
void ExecuteRecompiler()
{
g_state.frame_done = false;
#if 0
while (!g_state.frame_done)
{
if (HasPendingInterrupt())
@ -261,6 +276,9 @@ void ExecuteRecompiler()
TimingEvents::RunEvents();
}
#else
s_asm_dispatcher();
#endif
// in case we switch to interpreter...
g_state.regs.npc = g_state.regs.pc;
@ -291,6 +309,7 @@ void Flush()
#ifdef WITH_RECOMPILER
s_code_buffer.Reset();
ResetFastMap();
CompileDispatcher();
#endif
}
@ -499,7 +518,7 @@ void FastCompileBlockFunction()
{
CodeBlock* block = LookupBlock(GetNextBlockKey());
if (block)
block->host_code();
s_single_block_asm_dispatcher(block->host_code);
else
InterpretUncachedBlock();
}

View file

@ -1,4 +1,5 @@
#pragma once
#include "bus.h"
#include "common/bitfield.h"
#include "common/jit_code_buffer.h"
#include "cpu_types.h"
@ -9,6 +10,13 @@
namespace CPU {
enum : u32
{
FAST_MAP_RAM_SLOT_COUNT = Bus::RAM_SIZE / 4,
FAST_MAP_BIOS_SLOT_COUNT = Bus::BIOS_SIZE / 4,
FAST_MAP_TOTAL_SLOT_COUNT = FAST_MAP_RAM_SLOT_COUNT + FAST_MAP_BIOS_SLOT_COUNT,
};
union CodeBlockKey
{
u32 bits;
@ -86,6 +94,10 @@ void Shutdown();
void Execute();
#ifdef WITH_RECOMPILER
using DispatcherFunction = void (*)();
using SingleBlockDispatcherFunction = void(*)(const CodeBlock::HostCodePointer);
CodeBlock::HostCodePointer* GetFastMapPointer();
void ExecuteRecompiler();
#endif
@ -102,6 +114,18 @@ template<PGXPMode pgxp_mode>
void InterpretCachedBlock(const CodeBlock& block);
void InterpretUncachedBlock();
/// Invalidates any code pages which overlap the specified range.
ALWAYS_INLINE void InvalidateCodePages(PhysicalMemoryAddress address, u32 word_count)
{
const u32 start_page = address / CPU_CODE_CACHE_PAGE_SIZE;
const u32 end_page = (address + word_count * sizeof(u32)) / CPU_CODE_CACHE_PAGE_SIZE;
for (u32 page = start_page; page <= end_page; page++)
{
if (Bus::m_ram_code_bits[page])
CPU::CodeCache::InvalidateBlocksWithPageIndex(page);
}
}
}; // namespace CodeCache
} // namespace CPU

View file

@ -1381,6 +1381,7 @@ void DispatchInterrupt()
{
// If the instruction we're about to execute is a GTE instruction, delay dispatching the interrupt until the next
// instruction. For some reason, if we don't do this, we end up with incorrectly sorted polygons and flickering..
SafeReadInstruction(g_state.regs.pc, &g_state.next_instruction.bits);
if (g_state.next_instruction.op == InstructionOp::cop2 && !g_state.next_instruction.cop.IsCommonInstruction())
GTE::ExecuteInstruction(g_state.next_instruction.bits);

View file

@ -25,6 +25,9 @@ public:
bool CompileBlock(const CodeBlock* block, CodeBlock::HostCodePointer* out_host_code, u32* out_host_code_size);
CodeCache::DispatcherFunction CompileDispatcher();
CodeCache::SingleBlockDispatcherFunction CompileSingleBlockDispatcher();
//////////////////////////////////////////////////////////////////////////
// Code Generation
//////////////////////////////////////////////////////////////////////////
@ -67,6 +70,7 @@ public:
void EmitAddCPUStructField(u32 offset, const Value& value);
void EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr);
void EmitStoreGlobal(void* ptr, const Value& value);
void EmitLoadGlobalAddress(HostReg host_reg, const void* ptr);
// Automatically generates an exception handler.
Value EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const Value& address, RegSize size);
@ -86,6 +90,7 @@ public:
u32 PrepareStackForCall();
void RestoreStackAfterCall(u32 adjust_size);
void EmitCall(const void* ptr);
void EmitFunctionCallPtr(Value* return_value, const void* ptr);
void EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1);
void EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1, const Value& arg2);
@ -128,7 +133,9 @@ public:
// Host register saving.
void EmitPushHostReg(HostReg reg, u32 position);
void EmitPushHostRegPair(HostReg reg, HostReg reg2, u32 position);
void EmitPopHostReg(HostReg reg, u32 position);
void EmitPopHostRegPair(HostReg reg, HostReg reg2, u32 position);
// Value ops
Value AddValues(const Value& lhs, const Value& rhs, bool set_flags);

View file

@ -6,6 +6,7 @@
#include "cpu_recompiler_code_generator.h"
#include "cpu_recompiler_thunks.h"
#include "settings.h"
#include "timing_event.h"
Log_SetChannel(CPU::Recompiler);
namespace a64 = vixl::aarch64;
@ -26,6 +27,16 @@ constexpr u64 FUNCTION_CALLER_SAVED_SPACE_RESERVE = 144; // 18 registers -> 224
constexpr u64 FUNCTION_STACK_SIZE =
FUNCTION_CALLEE_SAVED_SPACE_RESERVE + FUNCTION_CALLER_SAVED_SPACE_RESERVE + FUNCTION_CALL_SHADOW_SPACE;
// PC we return to after the end of the block
static void* s_dispatcher_return_address;
static s64 GetPCDisplacement(const void* current, const void* target)
{
Assert(Common::IsAlignedPow2(reinterpret_cast<size_t>(current), 4));
Assert(Common::IsAlignedPow2(reinterpret_cast<size_t>(target), 4));
return static_cast<s64>((reinterpret_cast<ptrdiff_t>(target) - reinterpret_cast<ptrdiff_t>(current)) >> 2);
}
static const a64::WRegister GetHostReg8(HostReg reg)
{
return a64::WRegister(reg);
@ -172,11 +183,11 @@ void CodeGenerator::EmitBeginBlock()
// Save the link register, since we'll be calling functions.
const bool link_reg_allocated = m_register_cache.AllocateHostReg(30);
DebugAssert(link_reg_allocated);
m_register_cache.AssumeCalleeSavedRegistersAreSaved();
// Store the CPU struct pointer. TODO: make this better.
const bool cpu_reg_allocated = m_register_cache.AllocateHostReg(RCPUPTR);
DebugAssert(cpu_reg_allocated);
m_emit->Mov(GetCPUPtrReg(), reinterpret_cast<size_t>(&g_state));
}
void CodeGenerator::EmitEndBlock()
@ -185,6 +196,7 @@ void CodeGenerator::EmitEndBlock()
m_register_cache.PopCalleeSavedRegisters(true);
m_emit->Add(a64::sp, a64::sp, FUNCTION_STACK_SIZE);
// m_emit->b(GetPCDisplacement(GetCurrentCodePointer(), s_dispatcher_return_address));
m_emit->Ret();
}
@ -200,6 +212,7 @@ void CodeGenerator::EmitExceptionExit()
m_register_cache.PopCalleeSavedRegisters(false);
m_emit->Add(a64::sp, a64::sp, FUNCTION_STACK_SIZE);
// m_emit->b(GetPCDisplacement(GetCurrentCodePointer(), s_dispatcher_return_address));
m_emit->Ret();
}
@ -228,10 +241,10 @@ void CodeGenerator::FinalizeBlock(CodeBlock::HostCodePointer* out_host_code, u32
m_far_emitter.FinalizeCode();
*out_host_code = reinterpret_cast<CodeBlock::HostCodePointer>(m_code_buffer->GetFreeCodePointer());
*out_host_code_size = m_near_emitter.GetSizeOfCodeGenerated();
*out_host_code_size = static_cast<u32>(m_near_emitter.GetSizeOfCodeGenerated());
m_code_buffer->CommitCode(m_near_emitter.GetSizeOfCodeGenerated());
m_code_buffer->CommitFarCode(m_far_emitter.GetSizeOfCodeGenerated());
m_code_buffer->CommitCode(static_cast<u32>(m_near_emitter.GetSizeOfCodeGenerated()));
m_code_buffer->CommitFarCode(static_cast<u32>(m_far_emitter.GetSizeOfCodeGenerated()));
m_near_emitter.Reset();
m_far_emitter.Reset();
@ -958,11 +971,19 @@ void CodeGenerator::RestoreStackAfterCall(u32 adjust_size)
m_register_cache.PopCallerSavedRegisters();
}
static s64 GetBranchDisplacement(const void* current, const void* target)
void CodeGenerator::EmitCall(const void* ptr)
{
Assert(Common::IsAlignedPow2(reinterpret_cast<size_t>(current), 4));
Assert(Common::IsAlignedPow2(reinterpret_cast<size_t>(target), 4));
return static_cast<s64>((reinterpret_cast<ptrdiff_t>(target) - reinterpret_cast<ptrdiff_t>(current)) >> 2);
const s64 displacement = GetPCDisplacement(GetCurrentCodePointer(), ptr);
const bool use_blr = !vixl::IsInt26(displacement);
if (use_blr)
{
m_emit->Mov(GetHostReg64(RSCRATCH), reinterpret_cast<uintptr_t>(ptr));
m_emit->Blr(GetHostReg64(RSCRATCH));
}
else
{
m_emit->bl(displacement);
}
}
void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr)
@ -974,17 +995,7 @@ void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr)
const u32 adjust_size = PrepareStackForCall();
// actually call the function
const s64 displacement = GetBranchDisplacement(GetCurrentCodePointer(), ptr);
const bool use_blr = !vixl::IsInt26(displacement);
if (use_blr)
{
m_emit->Mov(GetHostReg64(RSCRATCH), reinterpret_cast<uintptr_t>(ptr));
m_emit->Blr(GetHostReg64(RSCRATCH));
}
else
{
m_emit->bl(displacement);
}
EmitCall(ptr);
// shadow space release
RestoreStackAfterCall(adjust_size);
@ -1009,17 +1020,7 @@ void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, co
EmitCopyValue(RARG1, arg1);
// actually call the function
const s64 displacement = GetBranchDisplacement(GetCurrentCodePointer(), ptr);
const bool use_blr = !vixl::IsInt26(displacement);
if (use_blr)
{
m_emit->Mov(GetHostReg64(RSCRATCH), reinterpret_cast<uintptr_t>(ptr));
m_emit->Blr(GetHostReg64(RSCRATCH));
}
else
{
m_emit->bl(displacement);
}
EmitCall(ptr);
// shadow space release
RestoreStackAfterCall(adjust_size);
@ -1045,17 +1046,7 @@ void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, co
EmitCopyValue(RARG2, arg2);
// actually call the function
const s64 displacement = GetBranchDisplacement(GetCurrentCodePointer(), ptr);
const bool use_blr = !vixl::IsInt26(displacement);
if (use_blr)
{
m_emit->Mov(GetHostReg64(RSCRATCH), reinterpret_cast<uintptr_t>(ptr));
m_emit->Blr(GetHostReg64(RSCRATCH));
}
else
{
m_emit->bl(displacement);
}
EmitCall(ptr);
// shadow space release
RestoreStackAfterCall(adjust_size);
@ -1083,17 +1074,7 @@ void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, co
EmitCopyValue(RARG3, arg3);
// actually call the function
const s64 displacement = GetBranchDisplacement(GetCurrentCodePointer(), ptr);
const bool use_blr = !vixl::IsInt26(displacement);
if (use_blr)
{
m_emit->Mov(GetHostReg64(RSCRATCH), reinterpret_cast<uintptr_t>(ptr));
m_emit->Blr(GetHostReg64(RSCRATCH));
}
else
{
m_emit->bl(displacement);
}
EmitCall(ptr);
// shadow space release
RestoreStackAfterCall(adjust_size);
@ -1122,17 +1103,7 @@ void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, co
EmitCopyValue(RARG4, arg4);
// actually call the function
const s64 displacement = GetBranchDisplacement(GetCurrentCodePointer(), ptr);
const bool use_blr = !vixl::IsInt26(displacement);
if (use_blr)
{
m_emit->Mov(GetHostReg64(RSCRATCH), reinterpret_cast<uintptr_t>(ptr));
m_emit->Blr(GetHostReg64(RSCRATCH));
}
else
{
m_emit->bl(displacement);
}
EmitCall(ptr);
// shadow space release
RestoreStackAfterCall(adjust_size);
@ -1148,13 +1119,25 @@ void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, co
void CodeGenerator::EmitPushHostReg(HostReg reg, u32 position)
{
const a64::MemOperand addr(a64::sp, FUNCTION_STACK_SIZE - FUNCTION_CALL_SHADOW_SPACE - (position * 8));
m_emit->Str(GetHostReg64(reg), addr);
m_emit->str(GetHostReg64(reg), addr);
}
void CodeGenerator::EmitPushHostRegPair(HostReg reg, HostReg reg2, u32 position)
{
const a64::MemOperand addr(a64::sp, FUNCTION_STACK_SIZE - FUNCTION_CALL_SHADOW_SPACE - ((position + 1) * 8));
m_emit->stp(GetHostReg64(reg2), GetHostReg64(reg), addr);
}
void CodeGenerator::EmitPopHostReg(HostReg reg, u32 position)
{
const a64::MemOperand addr(a64::sp, FUNCTION_STACK_SIZE - FUNCTION_CALL_SHADOW_SPACE - (position * 8));
m_emit->Ldr(GetHostReg64(reg), addr);
m_emit->ldr(GetHostReg64(reg), addr);
}
void CodeGenerator::EmitPopHostRegPair(HostReg reg, HostReg reg2, u32 position)
{
const a64::MemOperand addr(a64::sp, FUNCTION_STACK_SIZE - FUNCTION_CALL_SHADOW_SPACE - (position * 8));
m_emit->ldp(GetHostReg64(reg2), GetHostReg64(reg), addr);
}
void CodeGenerator::EmitLoadCPUStructField(HostReg host_reg, RegSize guest_size, u32 offset)
@ -1510,7 +1493,7 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const
void CodeGenerator::EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr)
{
m_emit->Mov(GetHostReg64(RSCRATCH), reinterpret_cast<uintptr_t>(ptr));
EmitLoadGlobalAddress(RSCRATCH, ptr);
switch (size)
{
case RegSize_8:
@ -1535,7 +1518,7 @@ void CodeGenerator::EmitStoreGlobal(void* ptr, const Value& value)
{
Value value_in_hr = GetValueInHostRegister(value);
m_emit->Mov(GetHostReg64(RSCRATCH), reinterpret_cast<uintptr_t>(ptr));
EmitLoadGlobalAddress(RSCRATCH, ptr);
switch (value.size)
{
case RegSize_8:
@ -1882,4 +1865,152 @@ void CodeGenerator::EmitBindLabel(LabelType* label)
m_emit->Bind(label);
}
void CodeGenerator::EmitLoadGlobalAddress(HostReg host_reg, const void* ptr)
{
const void* current_code_ptr_page = reinterpret_cast<const void*>(
reinterpret_cast<uintptr_t>(GetCurrentCodePointer()) & ~static_cast<uintptr_t>(0xFFF));
const void* ptr_page =
reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(ptr) & ~static_cast<uintptr_t>(0xFFF));
const s64 page_displacement = GetPCDisplacement(current_code_ptr_page, ptr_page) >> 10;
const u32 page_offset = static_cast<u32>(reinterpret_cast<uintptr_t>(ptr) & 0xFFFu);
if (vixl::IsInt21(page_displacement) && a64::Assembler::IsImmLogical(page_offset, 64))
{
m_emit->adrp(GetHostReg64(host_reg), page_displacement);
m_emit->orr(GetHostReg64(host_reg), GetHostReg64(host_reg), page_offset);
}
else
{
m_emit->Mov(GetHostReg64(host_reg), reinterpret_cast<uintptr_t>(ptr));
}
}
CodeCache::DispatcherFunction CodeGenerator::CompileDispatcher()
{
m_emit->sub(a64::sp, a64::sp, FUNCTION_STACK_SIZE);
m_register_cache.ReserveCalleeSavedRegisters();
const u32 stack_adjust = PrepareStackForCall();
EmitLoadGlobalAddress(RCPUPTR, &g_state);
a64::Label frame_done_loop;
a64::Label exit_dispatcher;
m_emit->Bind(&frame_done_loop);
// if frame_done goto exit_dispatcher
m_emit->ldrb(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, frame_done)));
m_emit->tbnz(a64::w8, 0, &exit_dispatcher);
// x8 <- sr
a64::Label no_interrupt;
m_emit->ldr(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, cop0_regs.sr.bits)));
// if Iec == 0 then goto no_interrupt
m_emit->tbz(a64::w8, 0, &no_interrupt);
// x9 <- cause
// x8 (sr) & cause
m_emit->ldr(a64::w9, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, cop0_regs.cause.bits)));
m_emit->and_(a64::w8, a64::w8, a64::w9);
// ((sr & cause) & 0xff00) == 0 goto no_interrupt
m_emit->tst(a64::w8, 0xFF00);
m_emit->b(&no_interrupt, a64::eq);
// we have an interrupt
EmitCall(reinterpret_cast<const void*>(&DispatchInterrupt));
// no interrupt or we just serviced it
m_emit->Bind(&no_interrupt);
// TimingEvents::UpdateCPUDowncount:
// x8 <- head event->downcount
// downcount <- x8
EmitLoadGlobalAddress(8, TimingEvents::GetHeadEventPtr());
m_emit->ldr(a64::x8, a64::MemOperand(a64::x8));
m_emit->ldr(a64::w8, a64::MemOperand(a64::x8, offsetof(TimingEvent, m_downcount)));
m_emit->str(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, downcount)));
// main dispatch loop
a64::Label main_loop;
m_emit->Bind(&main_loop);
s_dispatcher_return_address = GetCurrentCodePointer();
// w8 <- pending_ticks
// w9 <- downcount
m_emit->ldr(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, pending_ticks)));
m_emit->ldr(a64::w9, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, downcount)));
// while downcount < pending_ticks
a64::Label downcount_hit;
m_emit->cmp(a64::w8, a64::w9);
m_emit->b(&downcount_hit, a64::ge);
// time to lookup the block
// w8 <- pc
m_emit->Mov(a64::w11, Bus::BIOS_BASE);
m_emit->ldr(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, regs.pc)));
// current_instruction_pc <- pc (eax)
m_emit->str(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, current_instruction_pc)));
// w9 <- (pc & RAM_MASK) >> 2
m_emit->and_(a64::w9, a64::w8, Bus::RAM_MASK);
m_emit->lsr(a64::w9, a64::w9, 2);
// w10 <- ((pc & BIOS_MASK) >> 2) + FAST_MAP_RAM_SLOT_COUNT
m_emit->and_(a64::w10, a64::w8, Bus::BIOS_MASK);
m_emit->lsr(a64::w10, a64::w10, 2);
m_emit->add(a64::w10, a64::w10, FAST_MAP_RAM_SLOT_COUNT);
// if ((w8 (pc) & PHYSICAL_MEMORY_ADDRESS_MASK) >= BIOS_BASE) { use w10 as index }
m_emit->and_(a64::w8, a64::w8, PHYSICAL_MEMORY_ADDRESS_MASK);
m_emit->cmp(a64::w8, a64::w11);
m_emit->csel(a64::w8, a64::w9, a64::w10, a64::lt);
// ebx contains our index, rax <- fast_map[ebx * 8], rax(), continue
EmitLoadGlobalAddress(9, CodeCache::GetFastMapPointer());
m_emit->ldr(a64::x8, a64::MemOperand(a64::x9, a64::x8, a64::LSL, 3));
m_emit->blr(a64::x8);
// end while
m_emit->Bind(&downcount_hit);
// check events then for frame done
EmitCall(reinterpret_cast<const void*>(&TimingEvents::RunEvents));
m_emit->b(&frame_done_loop);
// all done
m_emit->Bind(&exit_dispatcher);
RestoreStackAfterCall(stack_adjust);
m_register_cache.PopCalleeSavedRegisters(true);
m_emit->add(a64::sp, a64::sp, FUNCTION_STACK_SIZE);
m_emit->ret();
CodeBlock::HostCodePointer ptr;
u32 code_size;
FinalizeBlock(&ptr, &code_size);
Log_DevPrintf("Dispatcher is %u bytes at %p", code_size, ptr);
return reinterpret_cast<CodeCache::DispatcherFunction>(ptr);
}
CodeCache::SingleBlockDispatcherFunction CodeGenerator::CompileSingleBlockDispatcher()
{
m_emit->sub(a64::sp, a64::sp, FUNCTION_STACK_SIZE);
m_register_cache.ReserveCalleeSavedRegisters();
const u32 stack_adjust = PrepareStackForCall();
m_emit->blr(GetHostReg64(RARG1));
RestoreStackAfterCall(stack_adjust);
m_register_cache.PopCalleeSavedRegisters(true);
m_emit->add(a64::sp, a64::sp, FUNCTION_STACK_SIZE);
m_emit->ret();
CodeBlock::HostCodePointer ptr;
u32 code_size;
FinalizeBlock(&ptr, &code_size);
Log_DevPrintf("Dispatcher is %u bytes at %p", code_size, ptr);
return reinterpret_cast<CodeCache::SingleBlockDispatcherFunction>(ptr);
}
} // namespace CPU::Recompiler

View file

@ -1,9 +1,12 @@
#include "common/align.h"
#include "common/log.h"
#include "cpu_core.h"
#include "cpu_core_private.h"
#include "cpu_recompiler_code_generator.h"
#include "cpu_recompiler_thunks.h"
#include "settings.h"
#include "timing_event.h"
Log_SetChannel(Recompiler::CodeGenerator);
namespace CPU::Recompiler {
@ -187,10 +190,12 @@ Value CodeGenerator::GetValueInHostRegister(const Value& value, bool allow_zero_
void CodeGenerator::EmitBeginBlock()
{
m_register_cache.AssumeCalleeSavedRegistersAreSaved();
// Store the CPU struct pointer.
const bool cpu_reg_allocated = m_register_cache.AllocateHostReg(RCPUPTR);
DebugAssert(cpu_reg_allocated);
m_emit->mov(GetCPUPtrReg(), reinterpret_cast<size_t>(&g_state));
// m_emit->mov(GetCPUPtrReg(), reinterpret_cast<size_t>(&g_state));
}
void CodeGenerator::EmitEndBlock()
@ -1392,15 +1397,8 @@ void CodeGenerator::RestoreStackAfterCall(u32 adjust_size)
m_register_cache.PopCallerSavedRegisters();
}
void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr)
void CodeGenerator::EmitCall(const void* ptr)
{
if (return_value)
return_value->Discard();
// shadow space allocate
const u32 adjust_size = PrepareStackForCall();
// actually call the function
if (Xbyak::inner::IsInInt32(reinterpret_cast<size_t>(ptr) - reinterpret_cast<size_t>(m_emit->getCurr())))
{
m_emit->call(ptr);
@ -1410,6 +1408,18 @@ void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr)
m_emit->mov(GetHostReg64(RRETURN), reinterpret_cast<size_t>(ptr));
m_emit->call(GetHostReg64(RRETURN));
}
}
void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr)
{
if (return_value)
return_value->Discard();
// shadow space allocate
const u32 adjust_size = PrepareStackForCall();
// actually call the function
EmitCall(ptr);
// shadow space release
RestoreStackAfterCall(adjust_size);
@ -1434,15 +1444,7 @@ void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, co
EmitCopyValue(RARG1, arg1);
// actually call the function
if (Xbyak::inner::IsInInt32(reinterpret_cast<size_t>(ptr) - reinterpret_cast<size_t>(m_emit->getCurr())))
{
m_emit->call(ptr);
}
else
{
m_emit->mov(GetHostReg64(RRETURN), reinterpret_cast<size_t>(ptr));
m_emit->call(GetHostReg64(RRETURN));
}
EmitCall(ptr);
// shadow space release
RestoreStackAfterCall(adjust_size);
@ -1468,15 +1470,7 @@ void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, co
EmitCopyValue(RARG2, arg2);
// actually call the function
if (Xbyak::inner::IsInInt32(reinterpret_cast<size_t>(ptr) - reinterpret_cast<size_t>(m_emit->getCurr())))
{
m_emit->call(ptr);
}
else
{
m_emit->mov(GetHostReg64(RRETURN), reinterpret_cast<size_t>(ptr));
m_emit->call(GetHostReg64(RRETURN));
}
EmitCall(ptr);
// shadow space release
RestoreStackAfterCall(adjust_size);
@ -1504,15 +1498,7 @@ void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, co
EmitCopyValue(RARG3, arg3);
// actually call the function
if (Xbyak::inner::IsInInt32(reinterpret_cast<size_t>(ptr) - reinterpret_cast<size_t>(m_emit->getCurr())))
{
m_emit->call(ptr);
}
else
{
m_emit->mov(GetHostReg64(RRETURN), reinterpret_cast<size_t>(ptr));
m_emit->call(GetHostReg64(RRETURN));
}
EmitCall(ptr);
// shadow space release
RestoreStackAfterCall(adjust_size);
@ -1541,15 +1527,7 @@ void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, co
EmitCopyValue(RARG4, arg4);
// actually call the function
if (Xbyak::inner::IsInInt32(reinterpret_cast<size_t>(ptr) - reinterpret_cast<size_t>(m_emit->getCurr())))
{
m_emit->call(ptr);
}
else
{
m_emit->mov(GetHostReg64(RRETURN), reinterpret_cast<size_t>(ptr));
m_emit->call(GetHostReg64(RRETURN));
}
EmitCall(ptr);
// shadow space release
RestoreStackAfterCall(adjust_size);
@ -1567,11 +1545,23 @@ void CodeGenerator::EmitPushHostReg(HostReg reg, u32 position)
m_emit->push(GetHostReg64(reg));
}
void CodeGenerator::EmitPushHostRegPair(HostReg reg, HostReg reg2, u32 position)
{
m_emit->push(GetHostReg64(reg));
m_emit->push(GetHostReg64(reg2));
}
void CodeGenerator::EmitPopHostReg(HostReg reg, u32 position)
{
m_emit->pop(GetHostReg64(reg));
}
void CodeGenerator::EmitPopHostRegPair(HostReg reg, HostReg reg2, u32 position)
{
m_emit->pop(GetHostReg64(reg2));
m_emit->pop(GetHostReg64(reg));
}
void CodeGenerator::EmitLoadCPUStructField(HostReg host_reg, RegSize guest_size, u32 offset)
{
switch (guest_size)
@ -2516,4 +2506,140 @@ void CodeGenerator::EmitBindLabel(LabelType* label)
m_emit->L(*label);
}
void CodeGenerator::EmitLoadGlobalAddress(HostReg host_reg, const void* ptr)
{
const s64 displacement =
static_cast<s64>(reinterpret_cast<size_t>(ptr) - reinterpret_cast<size_t>(m_emit->getCurr())) + 2;
if (Xbyak::inner::IsInInt32(static_cast<u64>(displacement)))
m_emit->lea(GetHostReg64(host_reg), m_emit->dword[m_emit->rip + ptr]);
else
m_emit->mov(GetHostReg64(host_reg), reinterpret_cast<size_t>(ptr));
}
CodeCache::DispatcherFunction CodeGenerator::CompileDispatcher()
{
m_register_cache.ReserveCalleeSavedRegisters();
const u32 stack_adjust = PrepareStackForCall();
EmitLoadGlobalAddress(Xbyak::Operand::RBP, &g_state);
Xbyak::Label frame_done_loop;
Xbyak::Label exit_dispatcher;
m_emit->L(frame_done_loop);
// if frame_done goto exit_dispatcher
m_emit->test(m_emit->byte[m_emit->rbp + offsetof(State, frame_done)], 1);
m_emit->jnz(exit_dispatcher, Xbyak::CodeGenerator::T_NEAR);
// eax <- sr
Xbyak::Label no_interrupt;
m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, cop0_regs.sr.bits)]);
// if Iec == 0 then goto no_interrupt
m_emit->test(m_emit->eax, 1);
m_emit->jz(no_interrupt);
// sr & cause
m_emit->and_(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, cop0_regs.cause.bits)]);
// ((sr & cause) & 0xff00) == 0 goto no_interrupt
m_emit->test(m_emit->eax, 0xFF00);
m_emit->jz(no_interrupt);
// we have an interrupt
EmitCall(reinterpret_cast<const void*>(&DispatchInterrupt));
// no interrupt or we just serviced it
m_emit->L(no_interrupt);
// TimingEvents::UpdateCPUDowncount:
// eax <- head event->downcount
// downcount <- eax
EmitLoadGlobalAddress(Xbyak::Operand::RAX, TimingEvents::GetHeadEventPtr());
m_emit->mov(m_emit->rax, m_emit->qword[m_emit->rax]);
m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rax + offsetof(TimingEvent, m_downcount)]);
m_emit->mov(m_emit->dword[m_emit->rbp + offsetof(State, downcount)], m_emit->eax);
// main dispatch loop
Xbyak::Label main_loop;
m_emit->align(16);
m_emit->L(main_loop);
// eax <- pending_ticks
m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, pending_ticks)]);
// while eax < downcount
Xbyak::Label downcount_hit;
m_emit->cmp(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, downcount)]);
m_emit->jge(downcount_hit);
// time to lookup the block
// eax <- pc
m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, regs.pc)]);
// ebx <- (pc & RAM_MASK) >> 2
m_emit->mov(m_emit->ebx, m_emit->eax);
m_emit->and_(m_emit->ebx, Bus::RAM_MASK);
m_emit->shr(m_emit->ebx, 2);
// ecx <- ((pc & BIOS_MASK) >> 2) + FAST_MAP_RAM_SLOT_COUNT
m_emit->mov(m_emit->ecx, m_emit->eax);
m_emit->and_(m_emit->ecx, Bus::BIOS_MASK);
m_emit->shr(m_emit->ecx, 2);
m_emit->add(m_emit->ecx, FAST_MAP_RAM_SLOT_COUNT);
// current_instruction_pc <- pc (eax)
m_emit->mov(m_emit->dword[m_emit->rbp + offsetof(State, current_instruction_pc)], m_emit->eax);
// if ((eax (pc) & PHYSICAL_MEMORY_ADDRESS_MASK) >= BIOS_BASE) { use ecx as index }
m_emit->and_(m_emit->eax, PHYSICAL_MEMORY_ADDRESS_MASK);
m_emit->cmp(m_emit->eax, Bus::BIOS_BASE);
m_emit->cmovge(m_emit->ebx, m_emit->ecx);
// ebx contains our index, rax <- fast_map[ebx * 8], rax(), continue
EmitLoadGlobalAddress(Xbyak::Operand::RAX, CodeCache::GetFastMapPointer());
m_emit->mov(m_emit->rax, m_emit->qword[m_emit->rax + m_emit->rbx * 8]);
m_emit->call(m_emit->rax);
m_emit->jmp(main_loop);
// end while
m_emit->L(downcount_hit);
// check events then for frame done
EmitCall(reinterpret_cast<const void*>(&TimingEvents::RunEvents));
m_emit->jmp(frame_done_loop);
// all done
m_emit->L(exit_dispatcher);
RestoreStackAfterCall(stack_adjust);
m_register_cache.PopCalleeSavedRegisters(true);
m_emit->ret();
CodeBlock::HostCodePointer ptr;
u32 code_size;
FinalizeBlock(&ptr, &code_size);
Log_DevPrintf("Dispatcher is %u bytes at %p", code_size, ptr);
return ptr;
}
CodeCache::SingleBlockDispatcherFunction CodeGenerator::CompileSingleBlockDispatcher()
{
m_register_cache.ReserveCalleeSavedRegisters();
const u32 stack_adjust = PrepareStackForCall();
EmitLoadGlobalAddress(Xbyak::Operand::RBP, &g_state);
m_emit->call(GetHostReg64(RARG1));
RestoreStackAfterCall(stack_adjust);
m_register_cache.PopCalleeSavedRegisters(true);
m_emit->ret();
CodeBlock::HostCodePointer ptr;
u32 code_size;
FinalizeBlock(&ptr, &code_size);
Log_DevPrintf("Single block dispatcher is %u bytes at %p", code_size, ptr);
return reinterpret_cast<CodeCache::SingleBlockDispatcherFunction>(ptr);
}
} // namespace CPU::Recompiler

View file

@ -317,10 +317,27 @@ u32 RegisterCache::PopCallerSavedRegisters() const
{
if ((m_state.host_reg_state[i] & (HostRegState::CallerSaved | HostRegState::InUse | HostRegState::Discarded)) ==
(HostRegState::CallerSaved | HostRegState::InUse))
{
u32 reg_pair;
for (reg_pair = (i - 1); reg_pair > 0 && reg_pair < HostReg_Count; reg_pair--)
{
if ((m_state.host_reg_state[reg_pair] &
(HostRegState::CallerSaved | HostRegState::InUse | HostRegState::Discarded)) ==
(HostRegState::CallerSaved | HostRegState::InUse))
{
m_code_generator.EmitPopHostRegPair(static_cast<HostReg>(reg_pair), static_cast<HostReg>(i), position);
position -= 2;
i = reg_pair;
break;
}
}
if (reg_pair == 0)
{
m_code_generator.EmitPopHostReg(static_cast<HostReg>(i), position);
position--;
}
}
i--;
} while (i > 0);
return count;
@ -351,6 +368,56 @@ u32 RegisterCache::PopCalleeSavedRegisters(bool commit)
return count;
}
void RegisterCache::ReserveCalleeSavedRegisters()
{
for (u32 reg = 0; reg < HostReg_Count; reg++)
{
if ((m_state.host_reg_state[reg] & (HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated)) ==
HostRegState::CalleeSaved)
{
DebugAssert(m_state.callee_saved_order_count < HostReg_Count);
// can we find a paired register? (mainly for ARM)
u32 reg_pair;
for (reg_pair = reg + 1; reg < HostReg_Count; reg_pair++)
{
if ((m_state.host_reg_state[reg_pair] & (HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated)) ==
HostRegState::CalleeSaved)
{
m_code_generator.EmitPushHostRegPair(static_cast<HostReg>(reg), static_cast<HostReg>(reg_pair),
GetActiveCalleeSavedRegisterCount());
m_state.callee_saved_order[m_state.callee_saved_order_count++] = static_cast<HostReg>(reg);
m_state.host_reg_state[reg] |= HostRegState::CalleeSavedAllocated;
m_state.callee_saved_order[m_state.callee_saved_order_count++] = static_cast<HostReg>(reg_pair);
m_state.host_reg_state[reg_pair] |= HostRegState::CalleeSavedAllocated;
reg = reg_pair;
break;
}
}
if (reg_pair == HostReg_Count)
{
m_code_generator.EmitPushHostReg(static_cast<HostReg>(reg), GetActiveCalleeSavedRegisterCount());
m_state.callee_saved_order[m_state.callee_saved_order_count++] = static_cast<HostReg>(reg);
m_state.host_reg_state[reg] |= HostRegState::CalleeSavedAllocated;
}
}
}
}
void RegisterCache::AssumeCalleeSavedRegistersAreSaved()
{
for (u32 i = 0; i < HostReg_Count; i++)
{
if ((m_state.host_reg_state[i] & (HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated)) ==
HostRegState::CalleeSaved)
{
m_state.host_reg_state[i] &= ~HostRegState::CalleeSaved;
}
}
}
void RegisterCache::PushState()
{
// need to copy this manually because of the load delay values

View file

@ -248,6 +248,12 @@ public:
/// Restore callee-saved registers. Call at the end of the function.
u32 PopCalleeSavedRegisters(bool commit);
/// Preallocates caller saved registers, enabling later use without stack pushes.
void ReserveCalleeSavedRegisters();
/// Removes the callee-saved register flag from all registers. Call when compiling code blocks.
void AssumeCalleeSavedRegistersAreSaved();
/// Pushes the register allocator state, use when entering branched code.
void PushState();

View file

@ -4,6 +4,7 @@
#include "common/log.h"
#include "common/state_wrapper.h"
#include "common/string_util.h"
#include "cpu_code_cache.h"
#include "cpu_core.h"
#include "gpu.h"
#include "interrupt_controller.h"
@ -499,7 +500,7 @@ TickCount DMA::TransferDeviceToMemory(Channel channel, u32 address, u32 incremen
const u32 terminator = UINT32_C(0xFFFFFF);
std::memcpy(&ram_pointer[address], &terminator, sizeof(terminator));
Bus::InvalidateCodePages(address, word_count);
CPU::CodeCache::InvalidateCodePages(address, word_count);
return Bus::GetDMARAMTickCount(word_count);
}
@ -547,6 +548,6 @@ TickCount DMA::TransferDeviceToMemory(Channel channel, u32 address, u32 incremen
}
}
Bus::InvalidateCodePages(address, word_count);
CPU::CodeCache::InvalidateCodePages(address, word_count);
return Bus::GetDMARAMTickCount(word_count);
}

View file

@ -57,6 +57,11 @@ void UpdateCPUDowncount()
}
}
TimingEvent** GetHeadEventPtr()
{
return &s_active_events_head;
}
static void SortEvent(TimingEvent* event)
{
const TickCount event_downcount = event->m_downcount;

View file

@ -88,6 +88,8 @@ void RunEvents();
void UpdateCPUDowncount();
TimingEvent** GetHeadEventPtr();
} // namespace TimingEventManager