CPU/Recompiler: Implement block linking

This commit is contained in:
Connor McLaughlin 2021-05-22 14:55:25 +10:00
parent 29bc0c950a
commit 21938e14c6
17 changed files with 666 additions and 165 deletions

View file

@ -38,6 +38,7 @@ static constexpr u32 RECOMPILER_CODE_CACHE_SIZE = 32 * 1024 * 1024;
static constexpr u32 RECOMPILER_FAR_CODE_CACHE_SIZE = 16 * 1024 * 1024; static constexpr u32 RECOMPILER_FAR_CODE_CACHE_SIZE = 16 * 1024 * 1024;
#endif #endif
static constexpr u32 CODE_WRITE_FAULT_THRESHOLD_FOR_SLOWMEM = 10; static constexpr u32 CODE_WRITE_FAULT_THRESHOLD_FOR_SLOWMEM = 10;
static constexpr u32 INVALIDATE_THRESHOLD_TO_DISABLE_LINKING = 10;
#ifdef USE_STATIC_CODE_BUFFER #ifdef USE_STATIC_CODE_BUFFER
static constexpr u32 RECOMPILER_GUARD_SIZE = 4096; static constexpr u32 RECOMPILER_GUARD_SIZE = 4096;
@ -205,8 +206,8 @@ static void RemoveReferencesToBlock(CodeBlock* block);
static void AddBlockToPageMap(CodeBlock* block); static void AddBlockToPageMap(CodeBlock* block);
static void RemoveBlockFromPageMap(CodeBlock* block); static void RemoveBlockFromPageMap(CodeBlock* block);
/// Link block from to to. /// Link block from to to. Returns the successor index.
static void LinkBlock(CodeBlock* from, CodeBlock* to); static void LinkBlock(CodeBlock* from, CodeBlock* to, void* host_pc, void* host_resolve_pc, u32 host_pc_size);
/// Unlink all blocks which point to this block, and any that this block links to. /// Unlink all blocks which point to this block, and any that this block links to.
static void UnlinkBlock(CodeBlock* block); static void UnlinkBlock(CodeBlock* block);
@ -352,8 +353,9 @@ static void ExecuteImpl()
{ {
// Try to find an already-linked block. // Try to find an already-linked block.
// TODO: Don't need to dereference the block, just store a pointer to the code. // TODO: Don't need to dereference the block, just store a pointer to the code.
for (CodeBlock* linked_block : block->link_successors) for (const CodeBlock::LinkInfo& li : block->link_successors)
{ {
CodeBlock* linked_block = li.block;
if (linked_block->key.bits == next_block_key.bits) if (linked_block->key.bits == next_block_key.bits)
{ {
if (linked_block->invalidated && !RevalidateBlock(linked_block)) if (linked_block->invalidated && !RevalidateBlock(linked_block))
@ -373,7 +375,7 @@ static void ExecuteImpl()
if (next_block) if (next_block)
{ {
// Link the previous block to this new block if we find a new block. // Link the previous block to this new block if we find a new block.
LinkBlock(block, next_block); LinkBlock(block, next_block, nullptr, nullptr, 0);
block = next_block; block = next_block;
goto reexecute_block; goto reexecute_block;
} }
@ -686,6 +688,7 @@ bool CompileBlock(CodeBlock* block)
cbi.is_store_instruction = IsMemoryStoreInstruction(cbi.instruction); cbi.is_store_instruction = IsMemoryStoreInstruction(cbi.instruction);
cbi.has_load_delay = InstructionHasLoadDelay(cbi.instruction); cbi.has_load_delay = InstructionHasLoadDelay(cbi.instruction);
cbi.can_trap = CanInstructionTrap(cbi.instruction, InUserMode()); cbi.can_trap = CanInstructionTrap(cbi.instruction, InUserMode());
cbi.is_direct_branch_instruction = IsDirectBranchInstruction(cbi.instruction);
if (g_settings.cpu_recompiler_icache) if (g_settings.cpu_recompiler_icache)
{ {
@ -718,7 +721,7 @@ bool CompileBlock(CodeBlock* block)
} }
// change the pc for the second branch's delay slot, it comes from the first branch // change the pc for the second branch's delay slot, it comes from the first branch
pc = GetBranchInstructionTarget(prev_cbi.instruction, prev_cbi.pc); pc = GetDirectBranchTarget(prev_cbi.instruction, prev_cbi.pc);
Log_DevPrintf("Double branch at %08X, using delay slot from %08X -> %08X", cbi.pc, prev_cbi.pc, pc); Log_DevPrintf("Double branch at %08X, using delay slot from %08X -> %08X", cbi.pc, prev_cbi.pc, pc);
} }
@ -840,6 +843,25 @@ void InvalidateBlocksWithPageIndex(u32 page_index)
// Invalidate forces the block to be checked again. // Invalidate forces the block to be checked again.
Log_DebugPrintf("Invalidating block at 0x%08X", block->GetPC()); Log_DebugPrintf("Invalidating block at 0x%08X", block->GetPC());
block->invalidated = true; block->invalidated = true;
if (block->can_link)
{
const u32 frame_number = System::GetFrameNumber();
const u32 frame_diff = frame_number - block->invalidate_frame_number;
if (frame_diff <= INVALIDATE_THRESHOLD_TO_DISABLE_LINKING)
{
Log_PerfPrintf("Block 0x%08X has been invalidated in %u frames, disabling linking", block->GetPC(), frame_diff);
block->can_link = false;
}
else
{
// It's been a while since this block was modified, so it's all good.
block->invalidate_frame_number = frame_number;
}
}
UnlinkBlock(block);
#ifdef WITH_RECOMPILER #ifdef WITH_RECOMPILER
SetFastMap(block->GetPC(), FastCompileBlockFunction); SetFastMap(block->GetPC(), FastCompileBlockFunction);
#endif #endif
@ -902,30 +924,80 @@ void RemoveBlockFromPageMap(CodeBlock* block)
} }
} }
void LinkBlock(CodeBlock* from, CodeBlock* to) void LinkBlock(CodeBlock* from, CodeBlock* to, void* host_pc, void* host_resolve_pc, u32 host_pc_size)
{ {
Log_DebugPrintf("Linking block %p(%08x) to %p(%08x)", from, from->GetPC(), to, to->GetPC()); Log_DebugPrintf("Linking block %p(%08x) to %p(%08x)", from, from->GetPC(), to, to->GetPC());
from->link_successors.push_back(to);
to->link_predecessors.push_back(from); CodeBlock::LinkInfo li;
li.block = to;
li.host_pc = host_pc;
li.host_resolve_pc = host_resolve_pc;
li.host_pc_size = host_pc_size;
from->link_successors.push_back(li);
li.block = from;
to->link_predecessors.push_back(li);
// apply in code
if (host_pc)
{
Log_ProfilePrintf("Backpatching %p(%08x) to jump to block %p (%08x)", host_pc, from->GetPC(), to, to->GetPC());
s_code_buffer.WriteProtect(false);
Recompiler::CodeGenerator::BackpatchBranch(host_pc, host_pc_size, reinterpret_cast<void*>(to->host_code));
s_code_buffer.WriteProtect(true);
}
} }
void UnlinkBlock(CodeBlock* block) void UnlinkBlock(CodeBlock* block)
{ {
for (CodeBlock* predecessor : block->link_predecessors) if (block->link_predecessors.empty() && block->link_successors.empty())
return;
#ifdef WITH_RECOMPILER
if (g_settings.IsUsingRecompiler() && g_settings.cpu_recompiler_block_linking)
s_code_buffer.WriteProtect(false);
#endif
for (CodeBlock::LinkInfo& li : block->link_predecessors)
{ {
auto iter = std::find(predecessor->link_successors.begin(), predecessor->link_successors.end(), block); auto iter = std::find_if(li.block->link_successors.begin(), li.block->link_successors.end(),
Assert(iter != predecessor->link_successors.end()); [block](const CodeBlock::LinkInfo& li) { return li.block == block; });
predecessor->link_successors.erase(iter); Assert(iter != li.block->link_successors.end());
// Restore blocks linked to this block back to the resolver
if (li.host_pc)
{
Log_ProfilePrintf("Backpatching %p(%08x) [predecessor] to jump to resolver", li.host_pc, li.block->GetPC());
Recompiler::CodeGenerator::BackpatchBranch(li.host_pc, li.host_pc_size, li.host_resolve_pc);
}
li.block->link_successors.erase(iter);
} }
block->link_predecessors.clear(); block->link_predecessors.clear();
for (CodeBlock* successor : block->link_successors) for (CodeBlock::LinkInfo& li : block->link_successors)
{ {
auto iter = std::find(successor->link_predecessors.begin(), successor->link_predecessors.end(), block); auto iter = std::find_if(li.block->link_predecessors.begin(), li.block->link_predecessors.end(),
Assert(iter != successor->link_predecessors.end()); [block](const CodeBlock::LinkInfo& li) { return li.block == block; });
successor->link_predecessors.erase(iter); Assert(iter != li.block->link_predecessors.end());
// Restore blocks we're linking to back to the resolver, since the successor won't be linked to us to backpatch if
// it changes.
if (li.host_pc)
{
Log_ProfilePrintf("Backpatching %p(%08x) [successor] to jump to resolver", li.host_pc, li.block->GetPC());
Recompiler::CodeGenerator::BackpatchBranch(li.host_pc, li.host_pc_size, li.host_resolve_pc);
}
// Don't have to do anything special for successors - just let the successor know it's no longer linked.
li.block->link_predecessors.erase(iter);
} }
block->link_successors.clear(); block->link_successors.clear();
#ifdef WITH_RECOMPILER
if (g_settings.IsUsingRecompiler() && g_settings.cpu_recompiler_block_linking)
s_code_buffer.WriteProtect(true);
#endif
} }
#ifdef WITH_RECOMPILER #ifdef WITH_RECOMPILER
@ -1104,3 +1176,39 @@ Common::PageFaultHandler::HandlerResult LUTPageFaultHandler(void* exception_pc,
#endif // WITH_RECOMPILER #endif // WITH_RECOMPILER
} // namespace CPU::CodeCache } // namespace CPU::CodeCache
#ifdef WITH_RECOMPILER
void CPU::Recompiler::Thunks::ResolveBranch(CodeBlock* block, void* host_pc, void* host_resolve_pc, u32 host_pc_size)
{
using namespace CPU::CodeCache;
CodeBlockKey key = GetNextBlockKey();
CodeBlock* successor_block = LookupBlock(key);
if (!successor_block || (successor_block->invalidated && !RevalidateBlock(successor_block)) || !block->can_link ||
!successor_block->can_link)
{
// just turn it into a return to the dispatcher instead.
s_code_buffer.WriteProtect(false);
CodeGenerator::BackpatchReturn(host_pc, host_pc_size);
s_code_buffer.WriteProtect(true);
}
else
{
// link blocks!
LinkBlock(block, successor_block, host_pc, host_resolve_pc, host_pc_size);
}
}
void CPU::Recompiler::Thunks::LogPC(u32 pc)
{
#if 0
CPU::CodeCache::LogCurrentState();
#endif
#if 0
if (TimingEvents::GetGlobalTickCounter() + GetPendingTicks() == 382856482)
__debugbreak();
#endif
}
#endif // WITH_RECOMPILER

View file

@ -60,6 +60,14 @@ struct CodeBlock
{ {
using HostCodePointer = void (*)(); using HostCodePointer = void (*)();
struct LinkInfo
{
CodeBlock* block;
void* host_pc;
void* host_resolve_pc;
u32 host_pc_size;
};
CodeBlock(const CodeBlockKey key_) : key(key_) {} CodeBlock(const CodeBlockKey key_) : key(key_) {}
CodeBlockKey key; CodeBlockKey key;
@ -67,8 +75,8 @@ struct CodeBlock
HostCodePointer host_code = nullptr; HostCodePointer host_code = nullptr;
std::vector<CodeBlockInstruction> instructions; std::vector<CodeBlockInstruction> instructions;
std::vector<CodeBlock*> link_predecessors; std::vector<LinkInfo> link_predecessors;
std::vector<CodeBlock*> link_successors; std::vector<LinkInfo> link_successors;
TickCount uncached_fetch_ticks = 0; TickCount uncached_fetch_ticks = 0;
u32 icache_line_count = 0; u32 icache_line_count = 0;
@ -80,9 +88,11 @@ struct CodeBlock
bool contains_loadstore_instructions = false; bool contains_loadstore_instructions = false;
bool contains_double_branches = false; bool contains_double_branches = false;
bool invalidated = false; bool invalidated = false;
bool can_link = true;
u32 recompile_frame_number = 0; u32 recompile_frame_number = 0;
u32 recompile_count = 0; u32 recompile_count = 0;
u32 invalidate_frame_number = 0;
const u32 GetPC() const { return key.GetPC(); } const u32 GetPC() const { return key.GetPC(); }
const u32 GetSizeInBytes() const { return static_cast<u32>(instructions.size()) * sizeof(Instruction); } const u32 GetSizeInBytes() const { return static_cast<u32>(instructions.size()) * sizeof(Instruction); }

View file

@ -21,23 +21,20 @@ bool CodeGenerator::CompileBlock(CodeBlock* block, CodeBlock::HostCodePointer* o
m_block = block; m_block = block;
m_block_start = block->instructions.data(); m_block_start = block->instructions.data();
m_block_end = block->instructions.data() + block->instructions.size(); m_block_end = block->instructions.data() + block->instructions.size();
m_pc = block->GetPC(); m_pc = block->GetPC();
m_pc_valid = true; m_pc_valid = true;
EmitBeginBlock(); m_fastmem_load_base_in_register = false;
m_fastmem_store_base_in_register = false;
EmitBeginBlock(true);
BlockPrologue(); BlockPrologue();
const CodeBlockInstruction* cbi = m_block_start; m_current_instruction = m_block_start;
while (cbi != m_block_end) while (m_current_instruction != m_block_end)
{ {
#ifdef _DEBUG if (!CompileInstruction(*m_current_instruction))
SmallString disasm;
DisassembleInstruction(&disasm, cbi->pc, cbi->instruction.bits);
Log_DebugPrintf("Compiling instruction '%s'", disasm.GetCharArray());
#endif
m_current_instruction = cbi;
if (!CompileInstruction(*cbi))
{ {
m_current_instruction = nullptr; m_current_instruction = nullptr;
m_block_end = nullptr; m_block_end = nullptr;
@ -46,11 +43,14 @@ bool CodeGenerator::CompileBlock(CodeBlock* block, CodeBlock::HostCodePointer* o
return false; return false;
} }
cbi++; m_current_instruction++;
} }
if (!m_block_linked)
{
BlockEpilogue(); BlockEpilogue();
EmitEndBlock(); EmitEndBlock(true, true);
}
FinalizeBlock(out_host_code, out_host_code_size); FinalizeBlock(out_host_code, out_host_code_size);
Log_ProfilePrintf("JIT block 0x%08X: %zu instructions (%u bytes), %u host bytes", block->GetPC(), Log_ProfilePrintf("JIT block 0x%08X: %zu instructions (%u bytes), %u host bytes", block->GetPC(),
@ -957,6 +957,10 @@ void CodeGenerator::BlockPrologue()
EmitStoreCPUStructField(offsetof(State, exception_raised), Value::FromConstantU8(0)); EmitStoreCPUStructField(offsetof(State, exception_raised), Value::FromConstantU8(0));
#if 0
EmitFunctionCall(nullptr, &Thunks::LogPC, Value::FromConstantU32(m_pc));
#endif
if (m_block->uncached_fetch_ticks > 0 || m_block->icache_line_count > 0) if (m_block->uncached_fetch_ticks > 0 || m_block->icache_line_count > 0)
EmitICacheCheckAndUpdate(); EmitICacheCheckAndUpdate();
@ -2184,7 +2188,10 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
{ {
InstructionPrologue(cbi, 1); InstructionPrologue(cbi, 1);
auto DoBranch = [this](Condition condition, const Value& lhs, const Value& rhs, Reg lr_reg, Value&& branch_target) { auto DoBranch = [this, &cbi](Condition condition, const Value& lhs, const Value& rhs, Reg lr_reg,
Value&& branch_target) {
const bool can_link_block = cbi.is_direct_branch_instruction && g_settings.cpu_recompiler_block_linking;
// ensure the lr register is flushed, since we want it's correct value after the branch // ensure the lr register is flushed, since we want it's correct value after the branch
// we don't want to invalidate it yet because of "jalr r0, r0", branch_target could be the lr_reg. // we don't want to invalidate it yet because of "jalr r0, r0", branch_target could be the lr_reg.
if (lr_reg != Reg::count && lr_reg != Reg::zero) if (lr_reg != Reg::count && lr_reg != Reg::zero)
@ -2199,8 +2206,11 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
EmitCopyValue(next_pc.GetHostRegister(), CalculatePC(4)); EmitCopyValue(next_pc.GetHostRegister(), CalculatePC(4));
} }
LabelType branch_not_taken; Value take_branch;
LabelType branch_taken, branch_not_taken;
if (condition != Condition::Always) if (condition != Condition::Always)
{
if (!can_link_block)
{ {
// condition is inverted because we want the case for skipping it // condition is inverted because we want the case for skipping it
if (lhs.IsValid() && rhs.IsValid()) if (lhs.IsValid() && rhs.IsValid())
@ -2210,6 +2220,45 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
else else
EmitConditionalBranch(condition, true, &branch_not_taken); EmitConditionalBranch(condition, true, &branch_not_taken);
} }
else
{
take_branch = m_register_cache.AllocateScratch(RegSize_32);
switch (condition)
{
case Condition::NotEqual:
case Condition::Equal:
case Condition::Overflow:
case Condition::Greater:
case Condition::GreaterEqual:
case Condition::LessEqual:
case Condition::Less:
case Condition::Above:
case Condition::AboveEqual:
case Condition::Below:
case Condition::BelowEqual:
{
EmitCmp(lhs.GetHostRegister(), rhs);
EmitSetConditionResult(take_branch.GetHostRegister(), take_branch.size, condition);
}
break;
case Condition::Negative:
case Condition::PositiveOrZero:
case Condition::NotZero:
case Condition::Zero:
{
Assert(!rhs.IsValid() || (rhs.IsConstant() && rhs.GetS64ConstantValue() == 0));
EmitTest(lhs.GetHostRegister(), lhs);
EmitSetConditionResult(take_branch.GetHostRegister(), take_branch.size, condition);
}
break;
default:
UnreachableCode();
break;
}
}
}
// save the old PC if we want to // save the old PC if we want to
if (lr_reg != Reg::count && lr_reg != Reg::zero) if (lr_reg != Reg::count && lr_reg != Reg::zero)
@ -2218,6 +2267,9 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
// if we don't cancel it, at the end of the instruction the value we write can be overridden. // if we don't cancel it, at the end of the instruction the value we write can be overridden.
EmitCancelInterpreterLoadDelayForReg(lr_reg); EmitCancelInterpreterLoadDelayForReg(lr_reg);
EmitStoreGuestRegister(lr_reg, next_pc); EmitStoreGuestRegister(lr_reg, next_pc);
// now invalidate lr because it was possibly written in the branch
m_register_cache.InvalidateGuestRegister(lr_reg);
} }
// we don't need to test the address of constant branches unless they're definitely misaligned, which would be // we don't need to test the address of constant branches unless they're definitely misaligned, which would be
@ -2256,9 +2308,109 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
m_register_cache.PopState(); m_register_cache.PopState();
} }
if (can_link_block)
{
// if it's an in-block branch, compile the delay slot now
// TODO: Make this more optimal by moving the condition down if it's a nop
Assert((m_current_instruction + 1) != m_block_end);
InstructionEpilogue(cbi);
m_current_instruction++;
if (!CompileInstruction(*m_current_instruction))
return false;
// flush all regs since we're at the end of the block now
BlockEpilogue();
m_block_linked = true;
// check downcount
Value pending_ticks = m_register_cache.AllocateScratch(RegSize_32);
Value downcount = m_register_cache.AllocateScratch(RegSize_32);
EmitLoadCPUStructField(pending_ticks.GetHostRegister(), RegSize_32, offsetof(State, pending_ticks));
EmitLoadCPUStructField(downcount.GetHostRegister(), RegSize_32, offsetof(State, downcount));
// pending < downcount
LabelType return_to_dispatcher;
if (condition != Condition::Always)
{
EmitBranchIfBitClear(take_branch.GetHostRegister(), take_branch.size, 0, &branch_not_taken);
m_register_cache.PushState();
{
WriteNewPC(branch_target, false);
EmitConditionalBranch(Condition::GreaterEqual, false, pending_ticks.GetHostRegister(), downcount,
&return_to_dispatcher);
// we're committed at this point :D
EmitStoreCPUStructField(offsetof(State, current_instruction_pc), branch_target);
EmitEndBlock(true, false);
const void* jump_pointer = GetCurrentCodePointer();
const void* resolve_pointer = GetCurrentFarCodePointer();
EmitBranch(resolve_pointer);
const u32 jump_size = static_cast<u32>(static_cast<const char*>(GetCurrentCodePointer()) -
static_cast<const char*>(jump_pointer));
SwitchToFarCode();
EmitBeginBlock(true);
EmitFunctionCall(nullptr, &CPU::Recompiler::Thunks::ResolveBranch, Value::FromConstantPtr(m_block),
Value::FromConstantPtr(jump_pointer), Value::FromConstantPtr(resolve_pointer),
Value::FromConstantU32(jump_size));
EmitEndBlock(true, true);
}
m_register_cache.PopState();
SwitchToNearCode();
EmitBindLabel(&branch_not_taken);
}
m_register_cache.PushState();
if (condition != Condition::Always)
{
WriteNewPC(next_pc, true);
EmitStoreCPUStructField(offsetof(State, current_instruction_pc), next_pc);
}
else
{
WriteNewPC(branch_target, true);
EmitStoreCPUStructField(offsetof(State, current_instruction_pc), branch_target);
}
EmitConditionalBranch(Condition::GreaterEqual, false, pending_ticks.GetHostRegister(), downcount,
&return_to_dispatcher);
if (condition != Condition::Always)
EmitStoreCPUStructField(offsetof(State, current_instruction_pc), next_pc);
else
EmitStoreCPUStructField(offsetof(State, current_instruction_pc), branch_target);
EmitEndBlock(true, false);
const void* jump_pointer = GetCurrentCodePointer();
const void* resolve_pointer = GetCurrentFarCodePointer();
EmitBranch(GetCurrentFarCodePointer());
const u32 jump_size =
static_cast<u32>(static_cast<const char*>(GetCurrentCodePointer()) - static_cast<const char*>(jump_pointer));
SwitchToFarCode();
EmitBeginBlock(true);
EmitFunctionCall(nullptr, &CPU::Recompiler::Thunks::ResolveBranch, Value::FromConstantPtr(m_block),
Value::FromConstantPtr(jump_pointer), Value::FromConstantPtr(resolve_pointer),
Value::FromConstantU32(jump_size));
EmitEndBlock(true, true);
m_register_cache.PopState();
SwitchToNearCode();
EmitBindLabel(&return_to_dispatcher);
EmitEndBlock(true, true);
}
else
{
if (condition != Condition::Always) if (condition != Condition::Always)
{ {
// branch taken path - modify the next pc // branch taken path - modify the next pc
EmitBindLabel(&branch_taken);
EmitCopyValue(next_pc.GetHostRegister(), branch_target); EmitCopyValue(next_pc.GetHostRegister(), branch_target);
// converge point // converge point
@ -2271,9 +2423,10 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
WriteNewPC(branch_target, true); WriteNewPC(branch_target, true);
} }
// now invalidate lr becuase it was possibly written in the branch InstructionEpilogue(cbi);
if (lr_reg != Reg::count && lr_reg != Reg::zero) }
m_register_cache.InvalidateGuestRegister(lr_reg);
return true;
}; };
// Compute the branch target. // Compute the branch target.
@ -2287,10 +2440,9 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
Value branch_target = OrValues(AndValues(CalculatePC(), Value::FromConstantU32(0xF0000000)), Value branch_target = OrValues(AndValues(CalculatePC(), Value::FromConstantU32(0xF0000000)),
Value::FromConstantU32(cbi.instruction.j.target << 2)); Value::FromConstantU32(cbi.instruction.j.target << 2));
DoBranch(Condition::Always, Value(), Value(), (cbi.instruction.op == InstructionOp::jal) ? Reg::ra : Reg::count, return DoBranch(Condition::Always, Value(), Value(),
std::move(branch_target)); (cbi.instruction.op == InstructionOp::jal) ? Reg::ra : Reg::count, std::move(branch_target));
} }
break;
case InstructionOp::funct: case InstructionOp::funct:
{ {
@ -2298,7 +2450,7 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
{ {
// npc = rs, link to rt // npc = rs, link to rt
Value branch_target = m_register_cache.ReadGuestRegister(cbi.instruction.r.rs); Value branch_target = m_register_cache.ReadGuestRegister(cbi.instruction.r.rs);
DoBranch(Condition::Always, Value(), Value(), return DoBranch(Condition::Always, Value(), Value(),
(cbi.instruction.r.funct == InstructionFunct::jalr) ? cbi.instruction.r.rd : Reg::count, (cbi.instruction.r.funct == InstructionFunct::jalr) ? cbi.instruction.r.rd : Reg::count,
std::move(branch_target)); std::move(branch_target));
} }
@ -2308,13 +2460,15 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
const Exception excode = const Exception excode =
(cbi.instruction.r.funct == InstructionFunct::syscall) ? Exception::Syscall : Exception::BP; (cbi.instruction.r.funct == InstructionFunct::syscall) ? Exception::Syscall : Exception::BP;
GenerateExceptionExit(cbi, excode); GenerateExceptionExit(cbi, excode);
InstructionEpilogue(cbi);
return true;
} }
else else
{ {
UnreachableCode(); UnreachableCode();
return false;
} }
} }
break;
case InstructionOp::beq: case InstructionOp::beq:
case InstructionOp::bne: case InstructionOp::bne:
@ -2326,7 +2480,7 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
if (cbi.instruction.op == InstructionOp::beq && cbi.instruction.i.rs == Reg::zero && if (cbi.instruction.op == InstructionOp::beq && cbi.instruction.i.rs == Reg::zero &&
cbi.instruction.i.rt == Reg::zero) cbi.instruction.i.rt == Reg::zero)
{ {
DoBranch(Condition::Always, Value(), Value(), Reg::count, std::move(branch_target)); return DoBranch(Condition::Always, Value(), Value(), Reg::count, std::move(branch_target));
} }
else else
{ {
@ -2334,10 +2488,9 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
Value lhs = m_register_cache.ReadGuestRegister(cbi.instruction.i.rs, true, true); Value lhs = m_register_cache.ReadGuestRegister(cbi.instruction.i.rs, true, true);
Value rhs = m_register_cache.ReadGuestRegister(cbi.instruction.i.rt); Value rhs = m_register_cache.ReadGuestRegister(cbi.instruction.i.rt);
const Condition condition = (cbi.instruction.op == InstructionOp::beq) ? Condition::Equal : Condition::NotEqual; const Condition condition = (cbi.instruction.op == InstructionOp::beq) ? Condition::Equal : Condition::NotEqual;
DoBranch(condition, lhs, rhs, Reg::count, std::move(branch_target)); return DoBranch(condition, lhs, rhs, Reg::count, std::move(branch_target));
} }
} }
break;
case InstructionOp::bgtz: case InstructionOp::bgtz:
case InstructionOp::blez: case InstructionOp::blez:
@ -2350,9 +2503,8 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
const Condition condition = const Condition condition =
(cbi.instruction.op == InstructionOp::bgtz) ? Condition::Greater : Condition::LessEqual; (cbi.instruction.op == InstructionOp::bgtz) ? Condition::Greater : Condition::LessEqual;
DoBranch(condition, lhs, Value::FromConstantU32(0), Reg::count, std::move(branch_target)); return DoBranch(condition, lhs, Value::FromConstantU32(0), Reg::count, std::move(branch_target));
} }
break;
case InstructionOp::b: case InstructionOp::b:
{ {
@ -2378,17 +2530,13 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
m_register_cache.WriteGuestRegister(Reg::ra, CalculatePC(4)); m_register_cache.WriteGuestRegister(Reg::ra, CalculatePC(4));
} }
DoBranch(condition, lhs, Value(), Reg::count, std::move(branch_target)); return DoBranch(condition, lhs, Value(), Reg::count, std::move(branch_target));
} }
break;
default: default:
UnreachableCode(); UnreachableCode();
break; return false;
} }
InstructionEpilogue(cbi);
return true;
} }
bool CodeGenerator::Compile_lui(const CodeBlockInstruction& cbi) bool CodeGenerator::Compile_lui(const CodeBlockInstruction& cbi)

View file

@ -2,6 +2,7 @@
#include <array> #include <array>
#include <initializer_list> #include <initializer_list>
#include <utility> #include <utility>
#include <vector>
#include "common/jit_code_buffer.h" #include "common/jit_code_buffer.h"
@ -25,6 +26,8 @@ public:
static void AlignCodeBuffer(JitCodeBuffer* code_buffer); static void AlignCodeBuffer(JitCodeBuffer* code_buffer);
static bool BackpatchLoadStore(const LoadStoreBackpatchInfo& lbi); static bool BackpatchLoadStore(const LoadStoreBackpatchInfo& lbi);
static void BackpatchBranch(void* pc, u32 pc_size, void* target);
static void BackpatchReturn(void* pc, u32 pc_size);
bool CompileBlock(CodeBlock* block, CodeBlock::HostCodePointer* out_host_code, u32* out_host_code_size); bool CompileBlock(CodeBlock* block, CodeBlock::HostCodePointer* out_host_code, u32* out_host_code_size);
@ -34,8 +37,8 @@ public:
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// Code Generation // Code Generation
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
void EmitBeginBlock(); void EmitBeginBlock(bool allocate_registers = true);
void EmitEndBlock(); void EmitEndBlock(bool free_registers = true, bool emit_return = true);
void EmitExceptionExit(); void EmitExceptionExit();
void EmitExceptionExitOnBool(const Value& value); void EmitExceptionExitOnBool(const Value& value);
void FinalizeBlock(CodeBlock::HostCodePointer* out_host_code, u32* out_host_code_size); void FinalizeBlock(CodeBlock::HostCodePointer* out_host_code, u32* out_host_code_size);
@ -105,6 +108,7 @@ public:
void EmitConditionalBranch(Condition condition, bool invert, HostReg lhs, const Value& rhs, LabelType* label); void EmitConditionalBranch(Condition condition, bool invert, HostReg lhs, const Value& rhs, LabelType* label);
void EmitConditionalBranch(Condition condition, bool invert, LabelType* label); void EmitConditionalBranch(Condition condition, bool invert, LabelType* label);
void EmitBranchIfBitClear(HostReg reg, RegSize size, u8 bit, LabelType* label); void EmitBranchIfBitClear(HostReg reg, RegSize size, u8 bit, LabelType* label);
void EmitBranchIfBitSet(HostReg reg, RegSize size, u8 bit, LabelType* label);
void EmitBindLabel(LabelType* label); void EmitBindLabel(LabelType* label);
u32 PrepareStackForCall(); u32 PrepareStackForCall();
@ -250,6 +254,7 @@ private:
u32 m_pc = 0; u32 m_pc = 0;
bool m_pc_valid = false; bool m_pc_valid = false;
bool m_block_linked = false;
// whether various flags need to be reset. // whether various flags need to be reset.
bool m_current_instruction_in_branch_delay_slot_dirty = false; bool m_current_instruction_in_branch_delay_slot_dirty = false;

View file

@ -166,10 +166,12 @@ Value CodeGenerator::GetValueInHostOrScratchRegister(const Value& value, bool al
return new_value; return new_value;
} }
void CodeGenerator::EmitBeginBlock() void CodeGenerator::EmitBeginBlock(bool allocate_registers /* = true */)
{ {
m_emit->sub(a32::sp, a32::sp, FUNCTION_STACK_SIZE); m_emit->sub(a32::sp, a32::sp, FUNCTION_STACK_SIZE);
if (allocate_registers)
{
// Save the link register, since we'll be calling functions. // Save the link register, since we'll be calling functions.
const bool link_reg_allocated = m_register_cache.AllocateHostReg(14); const bool link_reg_allocated = m_register_cache.AllocateHostReg(14);
DebugAssert(link_reg_allocated); DebugAssert(link_reg_allocated);
@ -181,16 +183,25 @@ void CodeGenerator::EmitBeginBlock()
// m_emit->Mov(GetCPUPtrReg(), reinterpret_cast<uintptr_t>(&g_state)); // m_emit->Mov(GetCPUPtrReg(), reinterpret_cast<uintptr_t>(&g_state));
DebugAssert(cpu_reg_allocated); DebugAssert(cpu_reg_allocated);
UNREFERENCED_VARIABLE(cpu_reg_allocated); UNREFERENCED_VARIABLE(cpu_reg_allocated);
}
} }
void CodeGenerator::EmitEndBlock() void CodeGenerator::EmitEndBlock(bool free_registers /* = true */, bool emit_return /* = true */)
{ {
if (free_registers)
{
m_register_cache.FreeHostReg(RCPUPTR); m_register_cache.FreeHostReg(RCPUPTR);
m_register_cache.FreeHostReg(14);
m_register_cache.PopCalleeSavedRegisters(true); m_register_cache.PopCalleeSavedRegisters(true);
}
m_emit->add(a32::sp, a32::sp, FUNCTION_STACK_SIZE); m_emit->add(a32::sp, a32::sp, FUNCTION_STACK_SIZE);
if (emit_return)
{
// m_emit->b(GetPCDisplacement(GetCurrentCodePointer(), s_dispatcher_return_address)); // m_emit->b(GetPCDisplacement(GetCurrentCodePointer(), s_dispatcher_return_address));
m_emit->bx(a32::lr); m_emit->bx(a32::lr);
}
} }
void CodeGenerator::EmitExceptionExit() void CodeGenerator::EmitExceptionExit()
@ -1572,6 +1583,49 @@ bool CodeGenerator::BackpatchLoadStore(const LoadStoreBackpatchInfo& lbi)
return true; return true;
} }
void CodeGenerator::BackpatchReturn(void* pc, u32 pc_size)
{
Log_ProfilePrintf("Backpatching %p to return", pc);
vixl::aarch32::MacroAssembler emit(static_cast<vixl::byte*>(pc), pc_size, a32::A32);
emit.bx(a32::lr);
const s32 nops = (static_cast<s32>(pc_size) - static_cast<s32>(emit.GetCursorOffset())) / 4;
Assert(nops >= 0);
for (s32 i = 0; i < nops; i++)
emit.nop();
JitCodeBuffer::FlushInstructionCache(pc, pc_size);
}
void CodeGenerator::BackpatchBranch(void* pc, u32 pc_size, void* target)
{
Log_ProfilePrintf("Backpatching %p to %p [branch]", pc, target);
vixl::aarch32::MacroAssembler emit(static_cast<vixl::byte*>(pc), pc_size, a32::A32);
// check jump distance
const s32 displacement = GetPCDisplacement(pc, target);
if (!IsPCDisplacementInImmediateRange(displacement))
{
emit.Mov(GetHostReg32(RSCRATCH), reinterpret_cast<uintptr_t>(target));
emit.bx(GetHostReg32(RSCRATCH));
}
else
{
a32::Label label(displacement + emit.GetCursorOffset());
emit.b(&label);
}
// shouldn't have any nops
const s32 nops = (static_cast<s32>(pc_size) - static_cast<s32>(emit.GetCursorOffset())) / 4;
Assert(nops >= 0);
for (s32 i = 0; i < nops; i++)
emit.nop();
JitCodeBuffer::FlushInstructionCache(pc, pc_size);
}
void CodeGenerator::EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr) void CodeGenerator::EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr)
{ {
EmitLoadGlobalAddress(RSCRATCH, ptr); EmitLoadGlobalAddress(RSCRATCH, ptr);

View file

@ -194,14 +194,17 @@ Value CodeGenerator::GetValueInHostOrScratchRegister(const Value& value, bool al
return new_value; return new_value;
} }
void CodeGenerator::EmitBeginBlock() void CodeGenerator::EmitBeginBlock(bool allocate_registers /* = true */)
{ {
m_emit->Sub(a64::sp, a64::sp, FUNCTION_STACK_SIZE); m_emit->Sub(a64::sp, a64::sp, FUNCTION_STACK_SIZE);
if (allocate_registers)
{
// Save the link register, since we'll be calling functions. // Save the link register, since we'll be calling functions.
const bool link_reg_allocated = m_register_cache.AllocateHostReg(30); const bool link_reg_allocated = m_register_cache.AllocateHostReg(30);
DebugAssert(link_reg_allocated); DebugAssert(link_reg_allocated);
UNREFERENCED_VARIABLE(link_reg_allocated); UNREFERENCED_VARIABLE(link_reg_allocated);
m_register_cache.AssumeCalleeSavedRegistersAreSaved(); m_register_cache.AssumeCalleeSavedRegistersAreSaved();
// Store the CPU struct pointer. TODO: make this better. // Store the CPU struct pointer. TODO: make this better.
@ -216,18 +219,25 @@ void CodeGenerator::EmitBeginBlock()
Assert(fastmem_reg_allocated); Assert(fastmem_reg_allocated);
m_emit->Ldr(GetFastmemBasePtrReg(), a64::MemOperand(GetCPUPtrReg(), offsetof(State, fastmem_base))); m_emit->Ldr(GetFastmemBasePtrReg(), a64::MemOperand(GetCPUPtrReg(), offsetof(State, fastmem_base)));
} }
}
} }
void CodeGenerator::EmitEndBlock() void CodeGenerator::EmitEndBlock(bool free_registers /* = true */, bool emit_return /* = true */)
{ {
if (free_registers)
{
if (m_block->contains_loadstore_instructions) if (m_block->contains_loadstore_instructions)
m_register_cache.FreeHostReg(RMEMBASEPTR); m_register_cache.FreeHostReg(RMEMBASEPTR);
m_register_cache.FreeHostReg(RCPUPTR); m_register_cache.FreeHostReg(RCPUPTR);
m_register_cache.FreeHostReg(30); // lr
m_register_cache.PopCalleeSavedRegisters(true); m_register_cache.PopCalleeSavedRegisters(true);
}
m_emit->Add(a64::sp, a64::sp, FUNCTION_STACK_SIZE); m_emit->Add(a64::sp, a64::sp, FUNCTION_STACK_SIZE);
// m_emit->b(GetPCDisplacement(GetCurrentCodePointer(), s_dispatcher_return_address));
if (emit_return)
m_emit->Ret(); m_emit->Ret();
} }
@ -1767,6 +1777,42 @@ bool CodeGenerator::BackpatchLoadStore(const LoadStoreBackpatchInfo& lbi)
return true; return true;
} }
void CodeGenerator::BackpatchReturn(void* pc, u32 pc_size)
{
Log_ProfilePrintf("Backpatching %p to return", pc);
vixl::aarch64::MacroAssembler emit(static_cast<vixl::byte*>(pc), pc_size, a64::PositionDependentCode);
emit.ret();
const s32 nops = (static_cast<s32>(pc_size) - static_cast<s32>(emit.GetCursorOffset())) / 4;
Assert(nops >= 0);
for (s32 i = 0; i < nops; i++)
emit.nop();
JitCodeBuffer::FlushInstructionCache(pc, pc_size);
}
void CodeGenerator::BackpatchBranch(void* pc, u32 pc_size, void* target)
{
Log_ProfilePrintf("Backpatching %p to %p [branch]", pc, target);
// check jump distance
const s64 jump_distance = static_cast<s64>(reinterpret_cast<intptr_t>(target) - reinterpret_cast<intptr_t>(pc));
Assert(Common::IsAligned(jump_distance, 4));
Assert(a64::Instruction::IsValidImmPCOffset(a64::UncondBranchType, jump_distance >> 2));
vixl::aarch64::MacroAssembler emit(static_cast<vixl::byte*>(pc), pc_size, a64::PositionDependentCode);
emit.b(jump_distance >> 2);
// shouldn't have any nops
const s32 nops = (static_cast<s32>(pc_size) - static_cast<s32>(emit.GetCursorOffset())) / 4;
Assert(nops >= 0);
for (s32 i = 0; i < nops; i++)
emit.nop();
JitCodeBuffer::FlushInstructionCache(pc, pc_size);
}
void CodeGenerator::EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr) void CodeGenerator::EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr)
{ {
EmitLoadGlobalAddress(RSCRATCH, ptr); EmitLoadGlobalAddress(RSCRATCH, ptr);

View file

@ -206,8 +206,10 @@ Value CodeGenerator::GetValueInHostOrScratchRegister(const Value& value, bool al
return new_value; return new_value;
} }
void CodeGenerator::EmitBeginBlock() void CodeGenerator::EmitBeginBlock(bool allocate_registers /* = true */)
{ {
if (allocate_registers)
{
m_register_cache.AssumeCalleeSavedRegistersAreSaved(); m_register_cache.AssumeCalleeSavedRegistersAreSaved();
// Store the CPU struct pointer. // Store the CPU struct pointer.
@ -220,20 +222,25 @@ void CodeGenerator::EmitBeginBlock()
if (m_block->contains_loadstore_instructions) if (m_block->contains_loadstore_instructions)
{ {
const bool fastmem_reg_allocated = m_register_cache.AllocateHostReg(RMEMBASEPTR); const bool fastmem_reg_allocated = m_register_cache.AllocateHostReg(RMEMBASEPTR);
Assert(fastmem_reg_allocated); DebugAssert(fastmem_reg_allocated);
UNREFERENCED_VARIABLE(fastmem_reg_allocated); UNREFERENCED_VARIABLE(fastmem_reg_allocated);
m_emit->mov(GetFastmemBasePtrReg(), m_emit->qword[GetCPUPtrReg() + offsetof(CPU::State, fastmem_base)]); m_emit->mov(GetFastmemBasePtrReg(), m_emit->qword[GetCPUPtrReg() + offsetof(CPU::State, fastmem_base)]);
} }
}
} }
void CodeGenerator::EmitEndBlock() void CodeGenerator::EmitEndBlock(bool free_registers /* = true */, bool emit_return /* = true */)
{ {
if (free_registers)
{
m_register_cache.FreeHostReg(RCPUPTR); m_register_cache.FreeHostReg(RCPUPTR);
if (m_block->contains_loadstore_instructions) if (m_block->contains_loadstore_instructions)
m_register_cache.FreeHostReg(RMEMBASEPTR); m_register_cache.FreeHostReg(RMEMBASEPTR);
m_register_cache.PopCalleeSavedRegisters(true); m_register_cache.PopCalleeSavedRegisters(true);
}
if (emit_return)
m_emit->ret(); m_emit->ret();
} }
@ -2336,7 +2343,7 @@ void CodeGenerator::EmitUpdateFastmemBase()
bool CodeGenerator::BackpatchLoadStore(const LoadStoreBackpatchInfo& lbi) bool CodeGenerator::BackpatchLoadStore(const LoadStoreBackpatchInfo& lbi)
{ {
Log_DevPrintf("Backpatching %p (guest PC 0x%08X) to slowmem", lbi.host_pc, lbi.guest_pc); Log_ProfilePrintf("Backpatching %p (guest PC 0x%08X) to slowmem", lbi.host_pc, lbi.guest_pc);
// turn it into a jump to the slowmem handler // turn it into a jump to the slowmem handler
Xbyak::CodeGenerator cg(lbi.host_code_size, lbi.host_pc); Xbyak::CodeGenerator cg(lbi.host_code_size, lbi.host_pc);
@ -2352,6 +2359,39 @@ bool CodeGenerator::BackpatchLoadStore(const LoadStoreBackpatchInfo& lbi)
return true; return true;
} }
void CodeGenerator::BackpatchReturn(void* pc, u32 pc_size)
{
Log_ProfilePrintf("Backpatching %p to return", pc);
Xbyak::CodeGenerator cg(pc_size, pc);
cg.ret();
const s32 nops =
static_cast<s32>(pc_size) - static_cast<s32>(static_cast<ptrdiff_t>(cg.getCurr() - static_cast<u8*>(pc)));
Assert(nops >= 0);
for (s32 i = 0; i < nops; i++)
cg.nop();
JitCodeBuffer::FlushInstructionCache(pc, pc_size);
}
void CodeGenerator::BackpatchBranch(void* pc, u32 pc_size, void* target)
{
Log_ProfilePrintf("Backpatching %p to %p [branch]", pc, target);
Xbyak::CodeGenerator cg(pc_size, pc);
cg.jmp(target);
// shouldn't have any nops
const s32 nops =
static_cast<s32>(pc_size) - static_cast<s32>(static_cast<ptrdiff_t>(cg.getCurr() - static_cast<u8*>(pc)));
Assert(nops >= 0);
for (s32 i = 0; i < nops; i++)
cg.nop();
JitCodeBuffer::FlushInstructionCache(pc, pc_size);
}
void CodeGenerator::EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr) void CodeGenerator::EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr)
{ {
const s64 displacement = const s64 displacement =
@ -2851,6 +2891,59 @@ void CodeGenerator::EmitConditionalBranch(Condition condition, bool invert, Labe
} }
} }
void CodeGenerator::EmitBranchIfBitSet(HostReg reg, RegSize size, u8 bit, LabelType* label)
{
if (bit < 8)
{
// same size, probably faster
switch (size)
{
case RegSize_8:
m_emit->test(GetHostReg8(reg), (1u << bit));
m_emit->jnz(*label);
break;
case RegSize_16:
m_emit->test(GetHostReg16(reg), (1u << bit));
m_emit->jnz(*label);
break;
case RegSize_32:
m_emit->test(GetHostReg32(reg), (1u << bit));
m_emit->jnz(*label);
break;
default:
UnreachableCode();
break;
}
}
else
{
switch (size)
{
case RegSize_8:
m_emit->bt(GetHostReg8(reg), bit);
m_emit->jc(*label);
break;
case RegSize_16:
m_emit->bt(GetHostReg16(reg), bit);
m_emit->jc(*label);
break;
case RegSize_32:
m_emit->bt(GetHostReg32(reg), bit);
m_emit->jc(*label);
break;
default:
UnreachableCode();
break;
}
}
}
void CodeGenerator::EmitBranchIfBitClear(HostReg reg, RegSize size, u8 bit, LabelType* label) void CodeGenerator::EmitBranchIfBitClear(HostReg reg, RegSize size, u8 bit, LabelType* label)
{ {
if (bit < 8) if (bit < 8)

View file

@ -280,6 +280,21 @@ Value RegisterCache::AllocateScratch(RegSize size, HostReg reg /* = HostReg_Inva
return Value::FromScratch(this, reg, size); return Value::FromScratch(this, reg, size);
} }
void RegisterCache::ReserveCallerSavedRegisters()
{
for (u32 reg = 0; reg < HostReg_Count; reg++)
{
if ((m_state.host_reg_state[reg] & (HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated)) ==
HostRegState::CalleeSaved)
{
DebugAssert(m_state.callee_saved_order_count < HostReg_Count);
m_code_generator.EmitPushHostReg(static_cast<HostReg>(reg), GetActiveCalleeSavedRegisterCount());
m_state.callee_saved_order[m_state.callee_saved_order_count++] = static_cast<HostReg>(reg);
m_state.host_reg_state[reg] |= HostRegState::CalleeSavedAllocated;
}
}
}
u32 RegisterCache::PushCallerSavedRegisters() const u32 RegisterCache::PushCallerSavedRegisters() const
{ {
u32 position = GetActiveCalleeSavedRegisterCount(); u32 position = GetActiveCalleeSavedRegisterCount();

View file

@ -193,6 +193,16 @@ struct Value
static Value FromConstantU32(u32 value) { return FromConstant(ZeroExtend64(value), RegSize_32); } static Value FromConstantU32(u32 value) { return FromConstant(ZeroExtend64(value), RegSize_32); }
static Value FromConstantS32(s32 value) { return FromConstant(ZeroExtend64(static_cast<u32>(value)), RegSize_32); } static Value FromConstantS32(s32 value) { return FromConstant(ZeroExtend64(static_cast<u32>(value)), RegSize_32); }
static Value FromConstantU64(u64 value) { return FromConstant(value, RegSize_64); } static Value FromConstantU64(u64 value) { return FromConstant(value, RegSize_64); }
static Value FromConstantPtr(const void* pointer)
{
#if defined(CPU_AARCH64) || defined(CPU_X64)
return FromConstant(static_cast<u64>(reinterpret_cast<uintptr_t>(pointer)), RegSize_64);
#elif defined(CPU_AARCH32)
return FromConstant(static_cast<u32>(reinterpret_cast<uintptr_t>(pointer)), RegSize_32);
#else
return FromConstant(0, RegSize_32);
#endif
}
private: private:
void Release(); void Release();
@ -241,6 +251,9 @@ public:
/// Ensures a host register is free, removing any value cached. /// Ensures a host register is free, removing any value cached.
void EnsureHostRegFree(HostReg reg); void EnsureHostRegFree(HostReg reg);
/// Preallocates caller saved registers, enabling later use without stack pushes.
void ReserveCallerSavedRegisters();
/// Push/pop volatile host registers. Returns the number of registers pushed/popped. /// Push/pop volatile host registers. Returns the number of registers pushed/popped.
u32 PushCallerSavedRegisters() const; u32 PushCallerSavedRegisters() const;
u32 PopCallerSavedRegisters() const; u32 PopCallerSavedRegisters() const;

View file

@ -3,6 +3,7 @@
#include "cpu_types.h" #include "cpu_types.h"
namespace CPU { namespace CPU {
struct CodeBlock;
struct CodeBlockInstruction; struct CodeBlockInstruction;
namespace Recompiler::Thunks { namespace Recompiler::Thunks {
@ -32,6 +33,9 @@ void UncheckedWriteMemoryByte(u32 address, u32 value);
void UncheckedWriteMemoryHalfWord(u32 address, u32 value); void UncheckedWriteMemoryHalfWord(u32 address, u32 value);
void UncheckedWriteMemoryWord(u32 address, u32 value); void UncheckedWriteMemoryWord(u32 address, u32 value);
void ResolveBranch(CodeBlock* block, void* host_pc, void* host_resolve_pc, u32 host_pc_size);
void LogPC(u32 pc);
} // namespace Recompiler::Thunks } // namespace Recompiler::Thunks
} // namespace CPU } // namespace CPU

View file

@ -104,24 +104,25 @@ bool IsDirectBranchInstruction(const Instruction& instruction)
} }
} }
u32 GetBranchInstructionTarget(const Instruction& instruction, u32 instruction_pc) VirtualMemoryAddress GetDirectBranchTarget(const Instruction& instruction, VirtualMemoryAddress instruction_pc)
{ {
const VirtualMemoryAddress pc = instruction_pc + 4;
switch (instruction.op) switch (instruction.op)
{ {
case InstructionOp::j: case InstructionOp::j:
case InstructionOp::jal: case InstructionOp::jal:
return ((instruction_pc + 4) & UINT32_C(0xF0000000)) | (instruction.j.target << 2); return (pc & UINT32_C(0xF0000000)) | (instruction.j.target << 2);
case InstructionOp::b: case InstructionOp::b:
case InstructionOp::beq: case InstructionOp::beq:
case InstructionOp::bgtz: case InstructionOp::bgtz:
case InstructionOp::blez: case InstructionOp::blez:
case InstructionOp::bne: case InstructionOp::bne:
return instruction_pc + 4 + (instruction.i.imm_sext32() << 2); return (pc + (instruction.i.imm_sext32() << 2));
default: default:
Panic("Trying to get branch target of indirect or invalid branch"); return pc;
return instruction_pc;
} }
} }

View file

@ -223,7 +223,7 @@ bool IsNopInstruction(const Instruction& instruction);
bool IsBranchInstruction(const Instruction& instruction); bool IsBranchInstruction(const Instruction& instruction);
bool IsUnconditionalBranchInstruction(const Instruction& instruction); bool IsUnconditionalBranchInstruction(const Instruction& instruction);
bool IsDirectBranchInstruction(const Instruction& instruction); bool IsDirectBranchInstruction(const Instruction& instruction);
u32 GetBranchInstructionTarget(const Instruction& instruction, u32 instruction_pc); VirtualMemoryAddress GetDirectBranchTarget(const Instruction& instruction, VirtualMemoryAddress instruction_pc);
bool IsCallInstruction(const Instruction& instruction); bool IsCallInstruction(const Instruction& instruction);
bool IsReturnInstruction(const Instruction& instruction); bool IsReturnInstruction(const Instruction& instruction);
bool IsMemoryLoadInstruction(const Instruction& instruction); bool IsMemoryLoadInstruction(const Instruction& instruction);

View file

@ -511,6 +511,7 @@ void HostInterface::SetDefaultSettings(SettingsInterface& si)
si.SetIntValue("CPU", "OverclockDenominator", 1); si.SetIntValue("CPU", "OverclockDenominator", 1);
si.SetBoolValue("CPU", "OverclockEnable", false); si.SetBoolValue("CPU", "OverclockEnable", false);
si.SetBoolValue("CPU", "RecompilerMemoryExceptions", false); si.SetBoolValue("CPU", "RecompilerMemoryExceptions", false);
si.SetBoolValue("CPU", "RecompilerBlockLinking", true);
si.SetBoolValue("CPU", "ICache", false); si.SetBoolValue("CPU", "ICache", false);
si.SetBoolValue("CPU", "FastmemMode", Settings::GetCPUFastmemModeName(Settings::DEFAULT_CPU_FASTMEM_MODE)); si.SetBoolValue("CPU", "FastmemMode", Settings::GetCPUFastmemModeName(Settings::DEFAULT_CPU_FASTMEM_MODE));
@ -772,23 +773,14 @@ void HostInterface::CheckForSettingsChanges(const Settings& old_settings)
} }
if (g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler && if (g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler &&
g_settings.cpu_recompiler_memory_exceptions != old_settings.cpu_recompiler_memory_exceptions) (g_settings.cpu_recompiler_memory_exceptions != old_settings.cpu_recompiler_memory_exceptions ||
g_settings.cpu_recompiler_block_linking != old_settings.cpu_recompiler_block_linking ||
g_settings.cpu_recompiler_icache != old_settings.cpu_recompiler_icache))
{ {
AddOSDMessage(g_settings.cpu_recompiler_memory_exceptions ? AddOSDMessage(TranslateStdString("OSDMessage", "Recompiler options changed, flushing all blocks."), 5.0f);
TranslateStdString("OSDMessage", "CPU memory exceptions enabled, flushing all blocks.") :
TranslateStdString("OSDMessage", "CPU memory exceptions disabled, flushing all blocks."),
5.0f);
CPU::CodeCache::Flush(); CPU::CodeCache::Flush();
}
if (g_settings.cpu_execution_mode != CPUExecutionMode::Interpreter && if (g_settings.cpu_recompiler_icache != old_settings.cpu_recompiler_icache)
g_settings.cpu_recompiler_icache != old_settings.cpu_recompiler_icache)
{
AddOSDMessage(g_settings.cpu_recompiler_icache ?
TranslateStdString("OSDMessage", "CPU ICache enabled, flushing all blocks.") :
TranslateStdString("OSDMessage", "CPU ICache disabled, flushing all blocks."),
5.0f);
CPU::CodeCache::Flush();
CPU::ClearICache(); CPU::ClearICache();
} }

View file

@ -172,6 +172,7 @@ void Settings::Load(SettingsInterface& si)
cpu_overclock_enable = si.GetBoolValue("CPU", "OverclockEnable", false); cpu_overclock_enable = si.GetBoolValue("CPU", "OverclockEnable", false);
UpdateOverclockActive(); UpdateOverclockActive();
cpu_recompiler_memory_exceptions = si.GetBoolValue("CPU", "RecompilerMemoryExceptions", false); cpu_recompiler_memory_exceptions = si.GetBoolValue("CPU", "RecompilerMemoryExceptions", false);
cpu_recompiler_block_linking = si.GetBoolValue("CPU", "RecompilerBlockLinking", true);
cpu_recompiler_icache = si.GetBoolValue("CPU", "RecompilerICache", false); cpu_recompiler_icache = si.GetBoolValue("CPU", "RecompilerICache", false);
cpu_fastmem_mode = ParseCPUFastmemMode( cpu_fastmem_mode = ParseCPUFastmemMode(
si.GetStringValue("CPU", "FastmemMode", GetCPUFastmemModeName(DEFAULT_CPU_FASTMEM_MODE)).c_str()) si.GetStringValue("CPU", "FastmemMode", GetCPUFastmemModeName(DEFAULT_CPU_FASTMEM_MODE)).c_str())
@ -363,6 +364,7 @@ void Settings::Save(SettingsInterface& si) const
si.SetIntValue("CPU", "OverclockNumerator", cpu_overclock_numerator); si.SetIntValue("CPU", "OverclockNumerator", cpu_overclock_numerator);
si.SetIntValue("CPU", "OverclockDenominator", cpu_overclock_denominator); si.SetIntValue("CPU", "OverclockDenominator", cpu_overclock_denominator);
si.SetBoolValue("CPU", "RecompilerMemoryExceptions", cpu_recompiler_memory_exceptions); si.SetBoolValue("CPU", "RecompilerMemoryExceptions", cpu_recompiler_memory_exceptions);
si.SetBoolValue("CPU", "RecompilerBlockLinking", cpu_recompiler_block_linking);
si.SetBoolValue("CPU", "RecompilerICache", cpu_recompiler_icache); si.SetBoolValue("CPU", "RecompilerICache", cpu_recompiler_icache);
si.SetStringValue("CPU", "FastmemMode", GetCPUFastmemModeName(cpu_fastmem_mode)); si.SetStringValue("CPU", "FastmemMode", GetCPUFastmemModeName(cpu_fastmem_mode));

View file

@ -78,6 +78,7 @@ struct Settings
bool cpu_overclock_enable = false; bool cpu_overclock_enable = false;
bool cpu_overclock_active = false; bool cpu_overclock_active = false;
bool cpu_recompiler_memory_exceptions = false; bool cpu_recompiler_memory_exceptions = false;
bool cpu_recompiler_block_linking = true;
bool cpu_recompiler_icache = false; bool cpu_recompiler_icache = false;
CPUFastmemMode cpu_fastmem_mode = CPUFastmemMode::Disabled; CPUFastmemMode cpu_fastmem_mode = CPUFastmemMode::Disabled;

View file

@ -159,6 +159,8 @@ AdvancedSettingsWidget::AdvancedSettingsWidget(QtHostInterface* host_interface,
addBooleanTweakOption(m_host_interface, m_ui.tweakOptionTable, tr("Enable Recompiler Memory Exceptions"), "CPU", addBooleanTweakOption(m_host_interface, m_ui.tweakOptionTable, tr("Enable Recompiler Memory Exceptions"), "CPU",
"RecompilerMemoryExceptions", false); "RecompilerMemoryExceptions", false);
addBooleanTweakOption(m_host_interface, m_ui.tweakOptionTable, tr("Enable Recompiler Block Linking"), "CPU",
"RecompilerBlockLinking", true);
addChoiceTweakOption(m_host_interface, m_ui.tweakOptionTable, tr("Enable Recompiler Fast Memory Access"), "CPU", addChoiceTweakOption(m_host_interface, m_ui.tweakOptionTable, tr("Enable Recompiler Fast Memory Access"), "CPU",
"FastmemMode", Settings::ParseCPUFastmemMode, Settings::GetCPUFastmemModeName, "FastmemMode", Settings::ParseCPUFastmemMode, Settings::GetCPUFastmemModeName,
Settings::GetCPUFastmemModeDisplayName, "CPUFastmemMode", Settings::GetCPUFastmemModeDisplayName, "CPUFastmemMode",
@ -226,20 +228,21 @@ void AdvancedSettingsWidget::onResetToDefaultClicked()
setFloatRangeTweakOption(m_ui.tweakOptionTable, 4, -1.0f); setFloatRangeTweakOption(m_ui.tweakOptionTable, 4, -1.0f);
setFloatRangeTweakOption(m_ui.tweakOptionTable, 5, Settings::DEFAULT_GPU_PGXP_DEPTH_THRESHOLD); setFloatRangeTweakOption(m_ui.tweakOptionTable, 5, Settings::DEFAULT_GPU_PGXP_DEPTH_THRESHOLD);
setBooleanTweakOption(m_ui.tweakOptionTable, 6, false); setBooleanTweakOption(m_ui.tweakOptionTable, 6, false);
setChoiceTweakOption(m_ui.tweakOptionTable, 7, Settings::DEFAULT_CPU_FASTMEM_MODE); setBooleanTweakOption(m_ui.tweakOptionTable, 7, true);
setBooleanTweakOption(m_ui.tweakOptionTable, 8, false); setChoiceTweakOption(m_ui.tweakOptionTable, 8, Settings::DEFAULT_CPU_FASTMEM_MODE);
setBooleanTweakOption(m_ui.tweakOptionTable, 9, false); setBooleanTweakOption(m_ui.tweakOptionTable, 9, false);
setBooleanTweakOption(m_ui.tweakOptionTable, 10, false); setBooleanTweakOption(m_ui.tweakOptionTable, 10, false);
setBooleanTweakOption(m_ui.tweakOptionTable, 11, false); setBooleanTweakOption(m_ui.tweakOptionTable, 11, false);
setBooleanTweakOption(m_ui.tweakOptionTable, 12, false); setBooleanTweakOption(m_ui.tweakOptionTable, 12, false);
setIntRangeTweakOption(m_ui.tweakOptionTable, 13, Settings::DEFAULT_VRAM_WRITE_DUMP_WIDTH_THRESHOLD); setBooleanTweakOption(m_ui.tweakOptionTable, 13, false);
setIntRangeTweakOption(m_ui.tweakOptionTable, 14, Settings::DEFAULT_VRAM_WRITE_DUMP_HEIGHT_THRESHOLD); setIntRangeTweakOption(m_ui.tweakOptionTable, 14, Settings::DEFAULT_VRAM_WRITE_DUMP_WIDTH_THRESHOLD);
setIntRangeTweakOption(m_ui.tweakOptionTable, 15, static_cast<int>(Settings::DEFAULT_DMA_MAX_SLICE_TICKS)); setIntRangeTweakOption(m_ui.tweakOptionTable, 15, Settings::DEFAULT_VRAM_WRITE_DUMP_HEIGHT_THRESHOLD);
setIntRangeTweakOption(m_ui.tweakOptionTable, 16, static_cast<int>(Settings::DEFAULT_DMA_HALT_TICKS)); setIntRangeTweakOption(m_ui.tweakOptionTable, 16, static_cast<int>(Settings::DEFAULT_DMA_MAX_SLICE_TICKS));
setIntRangeTweakOption(m_ui.tweakOptionTable, 17, static_cast<int>(Settings::DEFAULT_GPU_FIFO_SIZE)); setIntRangeTweakOption(m_ui.tweakOptionTable, 17, static_cast<int>(Settings::DEFAULT_DMA_HALT_TICKS));
setIntRangeTweakOption(m_ui.tweakOptionTable, 18, static_cast<int>(Settings::DEFAULT_GPU_MAX_RUN_AHEAD)); setIntRangeTweakOption(m_ui.tweakOptionTable, 18, static_cast<int>(Settings::DEFAULT_GPU_FIFO_SIZE));
setBooleanTweakOption(m_ui.tweakOptionTable, 19, false); setIntRangeTweakOption(m_ui.tweakOptionTable, 19, static_cast<int>(Settings::DEFAULT_GPU_MAX_RUN_AHEAD));
setBooleanTweakOption(m_ui.tweakOptionTable, 20, true); setBooleanTweakOption(m_ui.tweakOptionTable, 20, false);
setBooleanTweakOption(m_ui.tweakOptionTable, 21, false); setBooleanTweakOption(m_ui.tweakOptionTable, 21, true);
setBooleanTweakOption(m_ui.tweakOptionTable, 22, false); setBooleanTweakOption(m_ui.tweakOptionTable, 22, false);
setBooleanTweakOption(m_ui.tweakOptionTable, 23, false);
} }

View file

@ -2588,6 +2588,10 @@ void DrawSettingsWindow()
settings_changed |= ToggleButton("Enable Recompiler Memory Exceptions", settings_changed |= ToggleButton("Enable Recompiler Memory Exceptions",
"Enables alignment and bus exceptions. Not needed for any known games.", "Enables alignment and bus exceptions. Not needed for any known games.",
&s_settings_copy.cpu_recompiler_memory_exceptions); &s_settings_copy.cpu_recompiler_memory_exceptions);
settings_changed |= ToggleButton(
"Enable Recompiler Block Linking",
"Performance enhancement - jumps directly between blocks instead of returning to the dispatcher.",
&s_settings_copy.cpu_recompiler_block_linking);
settings_changed |= EnumChoiceButton("Recompiler Fast Memory Access", settings_changed |= EnumChoiceButton("Recompiler Fast Memory Access",
"Avoids calls to C++ code, significantly speeding up the recompiler.", "Avoids calls to C++ code, significantly speeding up the recompiler.",
&s_settings_copy.cpu_fastmem_mode, &Settings::GetCPUFastmemModeDisplayName, &s_settings_copy.cpu_fastmem_mode, &Settings::GetCPUFastmemModeDisplayName,
@ -3902,6 +3906,8 @@ void DrawDebugSettingsMenu()
settings_changed |= settings_changed |=
ImGui::MenuItem("Recompiler Memory Exceptions", nullptr, &s_settings_copy.cpu_recompiler_memory_exceptions); ImGui::MenuItem("Recompiler Memory Exceptions", nullptr, &s_settings_copy.cpu_recompiler_memory_exceptions);
settings_changed |=
ImGui::MenuItem("Recompiler Block Linking", nullptr, &s_settings_copy.cpu_recompiler_block_linking);
if (ImGui::BeginMenu("Recompiler Fastmem")) if (ImGui::BeginMenu("Recompiler Fastmem"))
{ {
for (u32 i = 0; i < static_cast<u32>(CPUFastmemMode::Count); i++) for (u32 i = 0; i < static_cast<u32>(CPUFastmemMode::Count); i++)