CPU/Recompiler: Write exception exits to far code buffer

Keeps the hot path nice and clean.
This commit is contained in:
Connor McLaughlin 2019-11-22 17:57:02 +10:00
parent 7b0978119b
commit 11966e4caf
7 changed files with 345 additions and 286 deletions

View file

@ -7,10 +7,12 @@
#include <sys/mman.h>
#endif
JitCodeBuffer::JitCodeBuffer(size_t size)
JitCodeBuffer::JitCodeBuffer(size_t size /* = 64 * 1024 * 1024 */, size_t far_code_size /* = 0 */)
{
m_total_size = size + far_code_size;
#if defined(Y_PLATFORM_WINDOWS)
m_code_ptr = VirtualAlloc(nullptr, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
m_code_ptr = VirtualAlloc(nullptr, m_total_size, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
#elif defined(Y_PLATFORM_LINUX) || defined(Y_PLATFORM_ANDROID)
m_code_ptr = mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
#else
@ -20,6 +22,10 @@ JitCodeBuffer::JitCodeBuffer(size_t size)
m_code_size = size;
m_code_used = 0;
m_far_code_ptr = static_cast<u8*>(m_code_ptr) + size;
m_far_code_size = far_code_size;
m_far_code_used = 0;
if (!m_code_ptr)
Panic("Failed to allocate code space.");
}
@ -27,34 +33,39 @@ JitCodeBuffer::JitCodeBuffer(size_t size)
JitCodeBuffer::~JitCodeBuffer()
{
#if defined(Y_PLATFORM_WINDOWS)
VirtualFree(m_code_ptr, m_code_size, MEM_RELEASE);
VirtualFree(m_code_ptr, m_total_size, MEM_RELEASE);
#elif defined(Y_PLATFORM_LINUX) || defined(Y_PLATFORM_ANDROID)
munmap(m_code_ptr, m_code_size);
munmap(m_code_ptr, m_total_size);
#endif
}
void JitCodeBuffer::CommitCode(size_t length)
{
// // Function alignment?
// size_t extra_bytes = ((length % 16) != 0) ? (16 - (length % 16)) : 0;
// for (size_t i = 0; i < extra_bytes; i++)
// reinterpret_cast<char*>(m_free_code_ptr)[i] = 0xCC;
Assert(length <= (m_code_size - m_code_used));
m_free_code_ptr = reinterpret_cast<char*>(m_free_code_ptr) + length;
m_free_code_ptr = reinterpret_cast<u8*>(m_free_code_ptr) + length;
m_code_used += length;
}
void JitCodeBuffer::CommitFarCode(size_t length)
{
Assert(length <= (m_far_code_size - m_far_code_used));
m_free_far_code_ptr = reinterpret_cast<u8*>(m_free_far_code_ptr) + length;
m_far_code_used += length;
}
void JitCodeBuffer::Reset()
{
#if defined(Y_PLATFORM_WINDOWS)
FlushInstructionCache(GetCurrentProcess(), m_code_ptr, m_code_size);
FlushInstructionCache(GetCurrentProcess(), m_code_ptr, m_total_size);
#elif defined(Y_PLATFORM_LINUX) || defined(Y_PLATFORM_ANDROID)
// TODO
#endif
m_free_code_ptr = m_code_ptr;
m_code_used = 0;
m_free_far_code_ptr = m_far_code_ptr;
m_far_code_used = 0;
}
void JitCodeBuffer::Align(u32 alignment, u8 padding_value)

View file

@ -4,13 +4,18 @@
class JitCodeBuffer
{
public:
JitCodeBuffer(size_t size = 64 * 1024 * 1024);
JitCodeBuffer(size_t size = 64 * 1024 * 1024, size_t far_code_size = 0);
~JitCodeBuffer();
void Reset();
void* GetFreeCodePointer() const { return m_free_code_ptr; }
size_t GetFreeCodeSpace() const { return (m_code_size - m_code_used); }
void CommitCode(size_t length);
void Reset();
void* GetFreeFarCodePointer() const { return m_free_far_code_ptr; }
size_t GetFreeFarCodeSpace() const { return (m_far_code_size - m_far_code_used); }
void CommitFarCode(size_t length);
/// Adjusts the free code pointer to the specified alignment, padding with bytes.
/// Assumes alignment is a power-of-two.
@ -21,5 +26,12 @@ private:
void* m_free_code_ptr;
size_t m_code_size;
size_t m_code_used;
void* m_far_code_ptr;
void* m_free_far_code_ptr;
size_t m_far_code_size;
size_t m_far_code_used;
size_t m_total_size;
};

View file

@ -9,8 +9,11 @@ Log_SetChannel(CPU::CodeCache);
namespace CPU {
bool USE_CODE_CACHE = true;
bool USE_RECOMPILER = true;
bool USE_CODE_CACHE = false;
bool USE_RECOMPILER = false;
static constexpr size_t RECOMPILER_CODE_CACHE_SIZE = 32 * 1024 * 1024;
static constexpr size_t RECOMPILER_FAR_CODE_CACHE_SIZE = 32 * 1024 * 1024;
CodeCache::CodeCache() = default;
@ -22,7 +25,7 @@ void CodeCache::Initialize(System* system, Core* core, Bus* bus)
m_core = core;
m_bus = bus;
m_code_buffer = std::make_unique<JitCodeBuffer>();
m_code_buffer = std::make_unique<JitCodeBuffer>(RECOMPILER_CODE_CACHE_SIZE, RECOMPILER_FAR_CODE_CACHE_SIZE);
m_asm_functions = std::make_unique<Recompiler::ASMFunctions>();
m_asm_functions->Generate(m_code_buffer.get());
}
@ -50,7 +53,7 @@ void CodeCache::Execute()
#if 0
const u32 tick = m_system->GetGlobalTickCounter() + m_core->GetPendingTicks();
if (tick == 8950812)
if (tick == 58672386)
__debugbreak();
#endif
@ -184,6 +187,11 @@ bool CodeCache::CompileBlock(CodeBlock* block)
bool is_branch_delay_slot = false;
bool is_load_delay_slot = false;
#if 0
if (pc == 0x0005aa90)
__debugbreak();
#endif
for (;;)
{
CodeBlockInstruction cbi = {};
@ -247,7 +255,10 @@ bool CodeCache::CompileBlock(CodeBlock* block)
if (USE_RECOMPILER)
{
// Ensure we're not going to run out of space while compiling this block.
if (m_code_buffer->GetFreeCodeSpace() < (block->instructions.size() * Recompiler::MAX_HOST_BYTES_PER_INSTRUCTION))
if (m_code_buffer->GetFreeCodeSpace() <
(block->instructions.size() * Recompiler::MAX_NEAR_HOST_BYTES_PER_INSTRUCTION) ||
m_code_buffer->GetFreeFarCodeSpace() <
(block->instructions.size() * Recompiler::MAX_FAR_HOST_BYTES_PER_INSTRUCTION))
{
Log_WarningPrintf("Out of code space, flushing all blocks.");
Reset();

View file

@ -7,7 +7,8 @@ namespace CPU::Recompiler {
CodeGenerator::CodeGenerator(Core* cpu, JitCodeBuffer* code_buffer, const ASMFunctions& asm_functions)
: m_cpu(cpu), m_code_buffer(code_buffer), m_asm_functions(asm_functions), m_register_cache(*this),
m_emit(code_buffer->GetFreeCodeSpace(), code_buffer->GetFreeCodePointer())
m_near_emitter(code_buffer->GetFreeCodeSpace(), code_buffer->GetFreeCodePointer()),
m_far_emitter(code_buffer->GetFreeFarCodeSpace(), code_buffer->GetFreeFarCodePointer()), m_emit(&m_near_emitter)
{
InitHostRegs();
}
@ -608,7 +609,7 @@ void CodeGenerator::BlockPrologue()
void CodeGenerator::BlockEpilogue()
{
#if defined(_DEBUG) && defined(Y_CPU_X64)
m_emit.nop();
m_emit->nop();
#endif
m_register_cache.FlushAllGuestRegisters(true, true);
@ -632,7 +633,7 @@ void CodeGenerator::InstructionPrologue(const CodeBlockInstruction& cbi, TickCou
bool force_sync /* = false */)
{
#if defined(_DEBUG) && defined(Y_CPU_X64)
m_emit.nop();
m_emit->nop();
#endif
// reset dirty flags

View file

@ -33,9 +33,6 @@ public:
static const char* GetHostRegName(HostReg reg, RegSize size = HostPointerSize);
static void AlignCodeBuffer(JitCodeBuffer* code_buffer);
RegisterCache& GetRegisterCache() { return m_register_cache; }
CodeEmitter& GetCodeEmitter() { return m_emit; }
bool CompileBlock(const CodeBlock* block, CodeBlock::HostCodePointer* out_host_code, u32* out_host_code_size);
//////////////////////////////////////////////////////////////////////////
@ -146,14 +143,18 @@ private:
Value ConvertValueSize(const Value& value, RegSize size, bool sign_extend);
void ConvertValueSizeInPlace(Value* value, RegSize size, bool sign_extend);
void SwitchToFarCode();
void SwitchToNearCode();
void* GetCurrentNearCodePointer() const;
void* GetCurrentFarCodePointer() const;
//////////////////////////////////////////////////////////////////////////
// Code Generation Helpers
//////////////////////////////////////////////////////////////////////////
// branch target, memory address, etc
void BlockPrologue();
void BlockEpilogue();
void InstructionPrologue(const CodeBlockInstruction& cbi, TickCount cycles,
bool force_sync = false);
void InstructionPrologue(const CodeBlockInstruction& cbi, TickCount cycles, bool force_sync = false);
void InstructionEpilogue(const CodeBlockInstruction& cbi);
void SyncCurrentInstructionPC();
void SyncPC();
@ -182,7 +183,9 @@ private:
const CodeBlockInstruction* m_block_start = nullptr;
const CodeBlockInstruction* m_block_end = nullptr;
RegisterCache m_register_cache;
CodeEmitter m_emit;
CodeEmitter m_near_emitter;
CodeEmitter m_far_emitter;
CodeEmitter* m_emit;
u32 m_delayed_pc_add = 0;
TickCount m_delayed_cycles_add = 0;
@ -197,4 +200,4 @@ private:
bool m_next_load_delay_dirty = false;
};
} // namespace CPU_X86::Recompiler
} // namespace CPU::Recompiler

File diff suppressed because it is too large Load diff

View file

@ -35,7 +35,8 @@ constexpr HostReg HostReg_Invalid = static_cast<HostReg>(HostReg_Count);
constexpr RegSize HostPointerSize = RegSize_64;
// A reasonable "maximum" number of bytes per instruction.
constexpr u32 MAX_HOST_BYTES_PER_INSTRUCTION = 128;
constexpr u32 MAX_NEAR_HOST_BYTES_PER_INSTRUCTION = 64;
constexpr u32 MAX_FAR_HOST_BYTES_PER_INSTRUCTION = 128;
// Are shifts implicitly masked to 0..31?
constexpr bool SHIFTS_ARE_IMPLICITLY_MASKED = true;