CPU/NewRec: Fix register allocation crash on Linux

This commit is contained in:
Stenzek 2024-05-14 13:57:35 +10:00
parent e517581041
commit 8f415a44e6
No known key found for this signature in database
4 changed files with 9 additions and 54 deletions

View file

@ -89,8 +89,6 @@ static std::vector<Block*> s_blocks;
// for compiling - reuse to avoid allocations // for compiling - reuse to avoid allocations
static BlockInstructionList s_block_instructions; static BlockInstructionList s_block_instructions;
#ifdef ENABLE_RECOMPILER_SUPPORT
static void BacklinkBlocks(u32 pc, const void* dst); static void BacklinkBlocks(u32 pc, const void* dst);
static void UnlinkBlockExits(Block* block); static void UnlinkBlockExits(Block* block);
@ -143,18 +141,12 @@ static JitCodeBuffer s_code_buffer;
static u32 s_total_instructions_compiled = 0; static u32 s_total_instructions_compiled = 0;
static u32 s_total_host_instructions_emitted = 0; static u32 s_total_host_instructions_emitted = 0;
#endif #endif
#endif // ENABLE_RECOMPILER_SUPPORT
} // namespace CPU::CodeCache } // namespace CPU::CodeCache
bool CPU::CodeCache::IsUsingAnyRecompiler() bool CPU::CodeCache::IsUsingAnyRecompiler()
{ {
#ifdef ENABLE_RECOMPILER_SUPPORT
return (g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler || return (g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler ||
g_settings.cpu_execution_mode == CPUExecutionMode::NewRec); g_settings.cpu_execution_mode == CPUExecutionMode::NewRec);
#else
return false;
#endif
} }
bool CPU::CodeCache::IsUsingFastmem() bool CPU::CodeCache::IsUsingFastmem()
@ -166,7 +158,6 @@ bool CPU::CodeCache::ProcessStartup(Error* error)
{ {
AllocateLUTs(); AllocateLUTs();
#ifdef ENABLE_RECOMPILER_SUPPORT
#ifdef USE_STATIC_CODE_BUFFER #ifdef USE_STATIC_CODE_BUFFER
const bool has_buffer = const bool has_buffer =
s_code_buffer.Initialize(s_code_storage, sizeof(s_code_storage), RECOMPILER_FAR_CODE_CACHE_SIZE, HOST_PAGE_SIZE); s_code_buffer.Initialize(s_code_storage, sizeof(s_code_storage), RECOMPILER_FAR_CODE_CACHE_SIZE, HOST_PAGE_SIZE);
@ -178,7 +169,6 @@ bool CPU::CodeCache::ProcessStartup(Error* error)
Error::SetStringView(error, "Failed to initialize code space"); Error::SetStringView(error, "Failed to initialize code space");
return false; return false;
} }
#endif
if (!PageFaultHandler::Install(error)) if (!PageFaultHandler::Install(error))
return false; return false;
@ -188,10 +178,7 @@ bool CPU::CodeCache::ProcessStartup(Error* error)
void CPU::CodeCache::ProcessShutdown() void CPU::CodeCache::ProcessShutdown()
{ {
#ifdef ENABLE_RECOMPILER_SUPPORT
s_code_buffer.Destroy(); s_code_buffer.Destroy();
#endif
DeallocateLUTs(); DeallocateLUTs();
} }
@ -199,14 +186,12 @@ void CPU::CodeCache::Initialize()
{ {
Assert(s_blocks.empty()); Assert(s_blocks.empty());
#ifdef ENABLE_RECOMPILER_SUPPORT
if (IsUsingAnyRecompiler()) if (IsUsingAnyRecompiler())
{ {
s_code_buffer.Reset(); s_code_buffer.Reset();
CompileASMFunctions(); CompileASMFunctions();
ResetCodeLUT(); ResetCodeLUT();
} }
#endif
Bus::UpdateFastmemViews(IsUsingAnyRecompiler() ? g_settings.cpu_fastmem_mode : CPUFastmemMode::Disabled); Bus::UpdateFastmemViews(IsUsingAnyRecompiler() ? g_settings.cpu_fastmem_mode : CPUFastmemMode::Disabled);
CPU::UpdateMemoryPointers(); CPU::UpdateMemoryPointers();
@ -215,10 +200,7 @@ void CPU::CodeCache::Initialize()
void CPU::CodeCache::Shutdown() void CPU::CodeCache::Shutdown()
{ {
ClearBlocks(); ClearBlocks();
#ifdef ENABLE_RECOMPILER_SUPPORT
ClearASMFunctions(); ClearASMFunctions();
#endif
Bus::UpdateFastmemViews(CPUFastmemMode::Disabled); Bus::UpdateFastmemViews(CPUFastmemMode::Disabled);
CPU::UpdateMemoryPointers(); CPU::UpdateMemoryPointers();
@ -228,7 +210,6 @@ void CPU::CodeCache::Reset()
{ {
ClearBlocks(); ClearBlocks();
#ifdef ENABLE_RECOMPILER_SUPPORT
if (IsUsingAnyRecompiler()) if (IsUsingAnyRecompiler())
{ {
ClearASMFunctions(); ClearASMFunctions();
@ -236,12 +217,10 @@ void CPU::CodeCache::Reset()
CompileASMFunctions(); CompileASMFunctions();
ResetCodeLUT(); ResetCodeLUT();
} }
#endif
} }
void CPU::CodeCache::Execute() void CPU::CodeCache::Execute()
{ {
#ifdef ENABLE_RECOMPILER_SUPPORT
if (IsUsingAnyRecompiler()) if (IsUsingAnyRecompiler())
{ {
g_enter_recompiler(); g_enter_recompiler();
@ -251,9 +230,6 @@ void CPU::CodeCache::Execute()
{ {
ExecuteCachedInterpreter(); ExecuteCachedInterpreter();
} }
#else
ExecuteCachedInterpreter();
#endif
} }
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@ -677,13 +653,11 @@ CPU::CodeCache::PageProtectionMode CPU::CodeCache::GetProtectionModeForBlock(con
void CPU::CodeCache::InvalidateBlock(Block* block, BlockState new_state) void CPU::CodeCache::InvalidateBlock(Block* block, BlockState new_state)
{ {
#ifdef ENABLE_RECOMPILER_SUPPORT
if (block->state == BlockState::Valid) if (block->state == BlockState::Valid)
{ {
SetCodeLUT(block->pc, g_compile_or_revalidate_block); SetCodeLUT(block->pc, g_compile_or_revalidate_block);
BacklinkBlocks(block->pc, g_compile_or_revalidate_block); BacklinkBlocks(block->pc, g_compile_or_revalidate_block);
} }
#endif
block->state = new_state; block->state = new_state;
} }
@ -723,11 +697,9 @@ void CPU::CodeCache::ClearBlocks()
ppi = {}; ppi = {};
} }
#ifdef ENABLE_RECOMPILER_SUPPORT
s_fastmem_backpatch_info.clear(); s_fastmem_backpatch_info.clear();
s_fastmem_faulting_pcs.clear(); s_fastmem_faulting_pcs.clear();
s_block_links.clear(); s_block_links.clear();
#endif
for (Block* block : s_blocks) for (Block* block : s_blocks)
{ {
@ -755,11 +727,7 @@ PageFaultHandler::HandlerResult PageFaultHandler::HandlePageFault(void* exceptio
return PageFaultHandler::HandlerResult::ContinueExecution; return PageFaultHandler::HandlerResult::ContinueExecution;
} }
#ifdef ENABLE_RECOMPILER_SUPPORT
return CPU::CodeCache::HandleFastmemException(exception_pc, fault_address, is_write); return CPU::CodeCache::HandleFastmemException(exception_pc, fault_address, is_write);
#else
return PageFaultHandler::HandlerResult::ExecuteNextHandler;
#endif
} }
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@ -1303,8 +1271,6 @@ void CPU::CodeCache::FillBlockRegInfo(Block* block)
// MARK: - Recompiler Glue // MARK: - Recompiler Glue
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
#ifdef ENABLE_RECOMPILER_SUPPORT
void CPU::CodeCache::CompileOrRevalidateBlock(u32 start_pc) void CPU::CodeCache::CompileOrRevalidateBlock(u32 start_pc)
{ {
// TODO: this doesn't currently handle when the cache overflows... // TODO: this doesn't currently handle when the cache overflows...
@ -1588,7 +1554,6 @@ void CPU::CodeCache::AddLoadStoreInfo(void* code_address, u32 code_size, u32 gue
PageFaultHandler::HandlerResult CPU::CodeCache::HandleFastmemException(void* exception_pc, void* fault_address, PageFaultHandler::HandlerResult CPU::CodeCache::HandleFastmemException(void* exception_pc, void* fault_address,
bool is_write) bool is_write)
{ {
// TODO: Catch general RAM writes, not just fastmem
PhysicalMemoryAddress guest_address; PhysicalMemoryAddress guest_address;
#ifdef ENABLE_MMAP_FASTMEM #ifdef ENABLE_MMAP_FASTMEM
@ -1705,5 +1670,3 @@ void CPU::CodeCache::RemoveBackpatchInfoForRange(const void* host_code, u32 size
// erase the whole range at once // erase the whole range at once
s_fastmem_backpatch_info.erase(start_iter, end_iter); s_fastmem_backpatch_info.erase(start_iter, end_iter);
} }
#endif // ENABLE_RECOMPILER_SUPPORT

View file

@ -229,9 +229,6 @@ void InterpretUncachedBlock();
void LogCurrentState(); void LogCurrentState();
#if defined(ENABLE_RECOMPILER) || defined(ENABLE_NEWREC)
#define ENABLE_RECOMPILER_SUPPORT 1
#if defined(_DEBUG) || false #if defined(_DEBUG) || false
// Enable disassembly of host assembly code. // Enable disassembly of host assembly code.
#define ENABLE_HOST_DISASSEMBLY 1 #define ENABLE_HOST_DISASSEMBLY 1
@ -278,6 +275,4 @@ extern PerfScope MIPSPerfScope;
#endif // ENABLE_RECOMPILER_PROFILING #endif // ENABLE_RECOMPILER_PROFILING
#endif // ENABLE_RECOMPILER
} // namespace CPU::CodeCache } // namespace CPU::CodeCache

View file

@ -542,7 +542,7 @@ u32 CPU::NewRec::Compiler::GetFreeHostReg(u32 flags)
// find register with lowest counter // find register with lowest counter
u32 lowest = NUM_HOST_REGS; u32 lowest = NUM_HOST_REGS;
u16 lowest_count = std::numeric_limits<u16>::max(); u32 lowest_count = std::numeric_limits<u32>::max();
for (u32 i = 0; i < NUM_HOST_REGS; i++) for (u32 i = 0; i < NUM_HOST_REGS; i++)
{ {
const HostRegAlloc& ra = m_host_regs[i]; const HostRegAlloc& ra = m_host_regs[i];
@ -577,7 +577,7 @@ u32 CPU::NewRec::Compiler::GetFreeHostReg(u32 flags)
if (iinfo->UsedTest(ra.reg) && flags & HR_CALLEE_SAVED) if (iinfo->UsedTest(ra.reg) && flags & HR_CALLEE_SAVED)
{ {
u32 caller_saved_lowest = NUM_HOST_REGS; u32 caller_saved_lowest = NUM_HOST_REGS;
u16 caller_saved_lowest_count = std::numeric_limits<u16>::max(); u32 caller_saved_lowest_count = std::numeric_limits<u32>::max();
for (u32 i = 0; i < NUM_HOST_REGS; i++) for (u32 i = 0; i < NUM_HOST_REGS; i++)
{ {
constexpr u32 caller_req_flags = HR_USABLE; constexpr u32 caller_req_flags = HR_USABLE;
@ -1274,8 +1274,8 @@ void CPU::NewRec::Compiler::CompileInstruction()
case InstructionOp::sb: CompileLoadStoreTemplate(&Compiler::Compile_sxx, MemoryAccessSize::Byte, true, false, TF_READS_S | TF_READS_T); SpecExec_sxx(MemoryAccessSize::Byte); break; case InstructionOp::sb: CompileLoadStoreTemplate(&Compiler::Compile_sxx, MemoryAccessSize::Byte, true, false, TF_READS_S | TF_READS_T); SpecExec_sxx(MemoryAccessSize::Byte); break;
case InstructionOp::sh: CompileLoadStoreTemplate(&Compiler::Compile_sxx, MemoryAccessSize::HalfWord, true, false, TF_READS_S | TF_READS_T); SpecExec_sxx(MemoryAccessSize::HalfWord); break; case InstructionOp::sh: CompileLoadStoreTemplate(&Compiler::Compile_sxx, MemoryAccessSize::HalfWord, true, false, TF_READS_S | TF_READS_T); SpecExec_sxx(MemoryAccessSize::HalfWord); break;
case InstructionOp::sw: CompileLoadStoreTemplate(&Compiler::Compile_sxx, MemoryAccessSize::Word, true, false, TF_READS_S | TF_READS_T); SpecExec_sxx(MemoryAccessSize::Word); break; case InstructionOp::sw: CompileLoadStoreTemplate(&Compiler::Compile_sxx, MemoryAccessSize::Word, true, false, TF_READS_S | TF_READS_T); SpecExec_sxx(MemoryAccessSize::Word); break;
case InstructionOp::swl: CompileLoadStoreTemplate(&Compiler::Compile_swx, MemoryAccessSize::Word, false, false, TF_READS_S | /*TF_READS_T | TF_WRITES_T | */TF_LOAD_DELAY); SpecExec_swx(false); break; case InstructionOp::swl: CompileLoadStoreTemplate(&Compiler::Compile_swx, MemoryAccessSize::Word, false, false, TF_READS_S /*| TF_READS_T*/); SpecExec_swx(false); break;
case InstructionOp::swr: CompileLoadStoreTemplate(&Compiler::Compile_swx, MemoryAccessSize::Word, false, false, TF_READS_S | /*TF_READS_T | TF_WRITES_T | */TF_LOAD_DELAY); SpecExec_swx(true); break; case InstructionOp::swr: CompileLoadStoreTemplate(&Compiler::Compile_swx, MemoryAccessSize::Word, false, false, TF_READS_S /*| TF_READS_T*/); SpecExec_swx(true); break;
case InstructionOp::cop0: case InstructionOp::cop0:
{ {

View file

@ -451,8 +451,12 @@ struct Settings
static constexpr float DEFAULT_GPU_PGXP_DEPTH_THRESHOLD = 300.0f; static constexpr float DEFAULT_GPU_PGXP_DEPTH_THRESHOLD = 300.0f;
static constexpr float GPU_PGXP_DEPTH_THRESHOLD_SCALE = 4096.0f; static constexpr float GPU_PGXP_DEPTH_THRESHOLD_SCALE = 4096.0f;
#if defined(ENABLE_RECOMPILER) // Prefer oldrec over newrec for now. Except on RISC-V, where there is no oldrec.
#if defined(CPU_ARCH_RISCV64)
static constexpr CPUExecutionMode DEFAULT_CPU_EXECUTION_MODE = CPUExecutionMode::NewRec;
#else
static constexpr CPUExecutionMode DEFAULT_CPU_EXECUTION_MODE = CPUExecutionMode::Recompiler; static constexpr CPUExecutionMode DEFAULT_CPU_EXECUTION_MODE = CPUExecutionMode::Recompiler;
#endif
// LUT still ends up faster on Apple Silicon for now, because of 16K pages. // LUT still ends up faster on Apple Silicon for now, because of 16K pages.
#if defined(ENABLE_MMAP_FASTMEM) && (!defined(__APPLE__) || !defined(__aarch64__)) #if defined(ENABLE_MMAP_FASTMEM) && (!defined(__APPLE__) || !defined(__aarch64__))
@ -460,13 +464,6 @@ struct Settings
#else #else
static constexpr CPUFastmemMode DEFAULT_CPU_FASTMEM_MODE = CPUFastmemMode::LUT; static constexpr CPUFastmemMode DEFAULT_CPU_FASTMEM_MODE = CPUFastmemMode::LUT;
#endif #endif
#elif defined(ENABLE_NEWREC)
static constexpr CPUExecutionMode DEFAULT_CPU_EXECUTION_MODE = CPUExecutionMode::NewRec;
static constexpr CPUFastmemMode DEFAULT_CPU_FASTMEM_MODE = CPUFastmemMode::MMap;
#else
static constexpr CPUExecutionMode DEFAULT_CPU_EXECUTION_MODE = CPUExecutionMode::CachedInterpreter;
static constexpr CPUFastmemMode DEFAULT_CPU_FASTMEM_MODE = CPUFastmemMode::Disabled;
#endif
static constexpr DisplayDeinterlacingMode DEFAULT_DISPLAY_DEINTERLACING_MODE = DisplayDeinterlacingMode::Adaptive; static constexpr DisplayDeinterlacingMode DEFAULT_DISPLAY_DEINTERLACING_MODE = DisplayDeinterlacingMode::Adaptive;
static constexpr DisplayCropMode DEFAULT_DISPLAY_CROP_MODE = DisplayCropMode::Overscan; static constexpr DisplayCropMode DEFAULT_DISPLAY_CROP_MODE = DisplayCropMode::Overscan;