mirror of
https://github.com/RetroDECK/Duckstation.git
synced 2024-11-23 14:25:37 +00:00
CPU/Recompiler: Implement block linking
This commit is contained in:
parent
29bc0c950a
commit
21938e14c6
|
@ -38,6 +38,7 @@ static constexpr u32 RECOMPILER_CODE_CACHE_SIZE = 32 * 1024 * 1024;
|
|||
static constexpr u32 RECOMPILER_FAR_CODE_CACHE_SIZE = 16 * 1024 * 1024;
|
||||
#endif
|
||||
static constexpr u32 CODE_WRITE_FAULT_THRESHOLD_FOR_SLOWMEM = 10;
|
||||
static constexpr u32 INVALIDATE_THRESHOLD_TO_DISABLE_LINKING = 10;
|
||||
|
||||
#ifdef USE_STATIC_CODE_BUFFER
|
||||
static constexpr u32 RECOMPILER_GUARD_SIZE = 4096;
|
||||
|
@ -205,8 +206,8 @@ static void RemoveReferencesToBlock(CodeBlock* block);
|
|||
static void AddBlockToPageMap(CodeBlock* block);
|
||||
static void RemoveBlockFromPageMap(CodeBlock* block);
|
||||
|
||||
/// Link block from to to.
|
||||
static void LinkBlock(CodeBlock* from, CodeBlock* to);
|
||||
/// Link block from to to. Returns the successor index.
|
||||
static void LinkBlock(CodeBlock* from, CodeBlock* to, void* host_pc, void* host_resolve_pc, u32 host_pc_size);
|
||||
|
||||
/// Unlink all blocks which point to this block, and any that this block links to.
|
||||
static void UnlinkBlock(CodeBlock* block);
|
||||
|
@ -352,8 +353,9 @@ static void ExecuteImpl()
|
|||
{
|
||||
// Try to find an already-linked block.
|
||||
// TODO: Don't need to dereference the block, just store a pointer to the code.
|
||||
for (CodeBlock* linked_block : block->link_successors)
|
||||
for (const CodeBlock::LinkInfo& li : block->link_successors)
|
||||
{
|
||||
CodeBlock* linked_block = li.block;
|
||||
if (linked_block->key.bits == next_block_key.bits)
|
||||
{
|
||||
if (linked_block->invalidated && !RevalidateBlock(linked_block))
|
||||
|
@ -373,7 +375,7 @@ static void ExecuteImpl()
|
|||
if (next_block)
|
||||
{
|
||||
// Link the previous block to this new block if we find a new block.
|
||||
LinkBlock(block, next_block);
|
||||
LinkBlock(block, next_block, nullptr, nullptr, 0);
|
||||
block = next_block;
|
||||
goto reexecute_block;
|
||||
}
|
||||
|
@ -686,6 +688,7 @@ bool CompileBlock(CodeBlock* block)
|
|||
cbi.is_store_instruction = IsMemoryStoreInstruction(cbi.instruction);
|
||||
cbi.has_load_delay = InstructionHasLoadDelay(cbi.instruction);
|
||||
cbi.can_trap = CanInstructionTrap(cbi.instruction, InUserMode());
|
||||
cbi.is_direct_branch_instruction = IsDirectBranchInstruction(cbi.instruction);
|
||||
|
||||
if (g_settings.cpu_recompiler_icache)
|
||||
{
|
||||
|
@ -718,7 +721,7 @@ bool CompileBlock(CodeBlock* block)
|
|||
}
|
||||
|
||||
// change the pc for the second branch's delay slot, it comes from the first branch
|
||||
pc = GetBranchInstructionTarget(prev_cbi.instruction, prev_cbi.pc);
|
||||
pc = GetDirectBranchTarget(prev_cbi.instruction, prev_cbi.pc);
|
||||
Log_DevPrintf("Double branch at %08X, using delay slot from %08X -> %08X", cbi.pc, prev_cbi.pc, pc);
|
||||
}
|
||||
|
||||
|
@ -840,6 +843,25 @@ void InvalidateBlocksWithPageIndex(u32 page_index)
|
|||
// Invalidate forces the block to be checked again.
|
||||
Log_DebugPrintf("Invalidating block at 0x%08X", block->GetPC());
|
||||
block->invalidated = true;
|
||||
|
||||
if (block->can_link)
|
||||
{
|
||||
const u32 frame_number = System::GetFrameNumber();
|
||||
const u32 frame_diff = frame_number - block->invalidate_frame_number;
|
||||
if (frame_diff <= INVALIDATE_THRESHOLD_TO_DISABLE_LINKING)
|
||||
{
|
||||
Log_PerfPrintf("Block 0x%08X has been invalidated in %u frames, disabling linking", block->GetPC(), frame_diff);
|
||||
block->can_link = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
// It's been a while since this block was modified, so it's all good.
|
||||
block->invalidate_frame_number = frame_number;
|
||||
}
|
||||
}
|
||||
|
||||
UnlinkBlock(block);
|
||||
|
||||
#ifdef WITH_RECOMPILER
|
||||
SetFastMap(block->GetPC(), FastCompileBlockFunction);
|
||||
#endif
|
||||
|
@ -902,30 +924,80 @@ void RemoveBlockFromPageMap(CodeBlock* block)
|
|||
}
|
||||
}
|
||||
|
||||
void LinkBlock(CodeBlock* from, CodeBlock* to)
|
||||
void LinkBlock(CodeBlock* from, CodeBlock* to, void* host_pc, void* host_resolve_pc, u32 host_pc_size)
|
||||
{
|
||||
Log_DebugPrintf("Linking block %p(%08x) to %p(%08x)", from, from->GetPC(), to, to->GetPC());
|
||||
from->link_successors.push_back(to);
|
||||
to->link_predecessors.push_back(from);
|
||||
|
||||
CodeBlock::LinkInfo li;
|
||||
li.block = to;
|
||||
li.host_pc = host_pc;
|
||||
li.host_resolve_pc = host_resolve_pc;
|
||||
li.host_pc_size = host_pc_size;
|
||||
from->link_successors.push_back(li);
|
||||
|
||||
li.block = from;
|
||||
to->link_predecessors.push_back(li);
|
||||
|
||||
// apply in code
|
||||
if (host_pc)
|
||||
{
|
||||
Log_ProfilePrintf("Backpatching %p(%08x) to jump to block %p (%08x)", host_pc, from->GetPC(), to, to->GetPC());
|
||||
s_code_buffer.WriteProtect(false);
|
||||
Recompiler::CodeGenerator::BackpatchBranch(host_pc, host_pc_size, reinterpret_cast<void*>(to->host_code));
|
||||
s_code_buffer.WriteProtect(true);
|
||||
}
|
||||
}
|
||||
|
||||
void UnlinkBlock(CodeBlock* block)
|
||||
{
|
||||
for (CodeBlock* predecessor : block->link_predecessors)
|
||||
if (block->link_predecessors.empty() && block->link_successors.empty())
|
||||
return;
|
||||
|
||||
#ifdef WITH_RECOMPILER
|
||||
if (g_settings.IsUsingRecompiler() && g_settings.cpu_recompiler_block_linking)
|
||||
s_code_buffer.WriteProtect(false);
|
||||
#endif
|
||||
|
||||
for (CodeBlock::LinkInfo& li : block->link_predecessors)
|
||||
{
|
||||
auto iter = std::find(predecessor->link_successors.begin(), predecessor->link_successors.end(), block);
|
||||
Assert(iter != predecessor->link_successors.end());
|
||||
predecessor->link_successors.erase(iter);
|
||||
auto iter = std::find_if(li.block->link_successors.begin(), li.block->link_successors.end(),
|
||||
[block](const CodeBlock::LinkInfo& li) { return li.block == block; });
|
||||
Assert(iter != li.block->link_successors.end());
|
||||
|
||||
// Restore blocks linked to this block back to the resolver
|
||||
if (li.host_pc)
|
||||
{
|
||||
Log_ProfilePrintf("Backpatching %p(%08x) [predecessor] to jump to resolver", li.host_pc, li.block->GetPC());
|
||||
Recompiler::CodeGenerator::BackpatchBranch(li.host_pc, li.host_pc_size, li.host_resolve_pc);
|
||||
}
|
||||
|
||||
li.block->link_successors.erase(iter);
|
||||
}
|
||||
block->link_predecessors.clear();
|
||||
|
||||
for (CodeBlock* successor : block->link_successors)
|
||||
for (CodeBlock::LinkInfo& li : block->link_successors)
|
||||
{
|
||||
auto iter = std::find(successor->link_predecessors.begin(), successor->link_predecessors.end(), block);
|
||||
Assert(iter != successor->link_predecessors.end());
|
||||
successor->link_predecessors.erase(iter);
|
||||
auto iter = std::find_if(li.block->link_predecessors.begin(), li.block->link_predecessors.end(),
|
||||
[block](const CodeBlock::LinkInfo& li) { return li.block == block; });
|
||||
Assert(iter != li.block->link_predecessors.end());
|
||||
|
||||
// Restore blocks we're linking to back to the resolver, since the successor won't be linked to us to backpatch if
|
||||
// it changes.
|
||||
if (li.host_pc)
|
||||
{
|
||||
Log_ProfilePrintf("Backpatching %p(%08x) [successor] to jump to resolver", li.host_pc, li.block->GetPC());
|
||||
Recompiler::CodeGenerator::BackpatchBranch(li.host_pc, li.host_pc_size, li.host_resolve_pc);
|
||||
}
|
||||
|
||||
// Don't have to do anything special for successors - just let the successor know it's no longer linked.
|
||||
li.block->link_predecessors.erase(iter);
|
||||
}
|
||||
block->link_successors.clear();
|
||||
|
||||
#ifdef WITH_RECOMPILER
|
||||
if (g_settings.IsUsingRecompiler() && g_settings.cpu_recompiler_block_linking)
|
||||
s_code_buffer.WriteProtect(true);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef WITH_RECOMPILER
|
||||
|
@ -1104,3 +1176,39 @@ Common::PageFaultHandler::HandlerResult LUTPageFaultHandler(void* exception_pc,
|
|||
#endif // WITH_RECOMPILER
|
||||
|
||||
} // namespace CPU::CodeCache
|
||||
|
||||
#ifdef WITH_RECOMPILER
|
||||
|
||||
void CPU::Recompiler::Thunks::ResolveBranch(CodeBlock* block, void* host_pc, void* host_resolve_pc, u32 host_pc_size)
|
||||
{
|
||||
using namespace CPU::CodeCache;
|
||||
|
||||
CodeBlockKey key = GetNextBlockKey();
|
||||
CodeBlock* successor_block = LookupBlock(key);
|
||||
if (!successor_block || (successor_block->invalidated && !RevalidateBlock(successor_block)) || !block->can_link ||
|
||||
!successor_block->can_link)
|
||||
{
|
||||
// just turn it into a return to the dispatcher instead.
|
||||
s_code_buffer.WriteProtect(false);
|
||||
CodeGenerator::BackpatchReturn(host_pc, host_pc_size);
|
||||
s_code_buffer.WriteProtect(true);
|
||||
}
|
||||
else
|
||||
{
|
||||
// link blocks!
|
||||
LinkBlock(block, successor_block, host_pc, host_resolve_pc, host_pc_size);
|
||||
}
|
||||
}
|
||||
|
||||
void CPU::Recompiler::Thunks::LogPC(u32 pc)
|
||||
{
|
||||
#if 0
|
||||
CPU::CodeCache::LogCurrentState();
|
||||
#endif
|
||||
#if 0
|
||||
if (TimingEvents::GetGlobalTickCounter() + GetPendingTicks() == 382856482)
|
||||
__debugbreak();
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif // WITH_RECOMPILER
|
||||
|
|
|
@ -60,6 +60,14 @@ struct CodeBlock
|
|||
{
|
||||
using HostCodePointer = void (*)();
|
||||
|
||||
struct LinkInfo
|
||||
{
|
||||
CodeBlock* block;
|
||||
void* host_pc;
|
||||
void* host_resolve_pc;
|
||||
u32 host_pc_size;
|
||||
};
|
||||
|
||||
CodeBlock(const CodeBlockKey key_) : key(key_) {}
|
||||
|
||||
CodeBlockKey key;
|
||||
|
@ -67,8 +75,8 @@ struct CodeBlock
|
|||
HostCodePointer host_code = nullptr;
|
||||
|
||||
std::vector<CodeBlockInstruction> instructions;
|
||||
std::vector<CodeBlock*> link_predecessors;
|
||||
std::vector<CodeBlock*> link_successors;
|
||||
std::vector<LinkInfo> link_predecessors;
|
||||
std::vector<LinkInfo> link_successors;
|
||||
|
||||
TickCount uncached_fetch_ticks = 0;
|
||||
u32 icache_line_count = 0;
|
||||
|
@ -80,9 +88,11 @@ struct CodeBlock
|
|||
bool contains_loadstore_instructions = false;
|
||||
bool contains_double_branches = false;
|
||||
bool invalidated = false;
|
||||
bool can_link = true;
|
||||
|
||||
u32 recompile_frame_number = 0;
|
||||
u32 recompile_count = 0;
|
||||
u32 invalidate_frame_number = 0;
|
||||
|
||||
const u32 GetPC() const { return key.GetPC(); }
|
||||
const u32 GetSizeInBytes() const { return static_cast<u32>(instructions.size()) * sizeof(Instruction); }
|
||||
|
|
|
@ -21,23 +21,20 @@ bool CodeGenerator::CompileBlock(CodeBlock* block, CodeBlock::HostCodePointer* o
|
|||
m_block = block;
|
||||
m_block_start = block->instructions.data();
|
||||
m_block_end = block->instructions.data() + block->instructions.size();
|
||||
|
||||
m_pc = block->GetPC();
|
||||
m_pc_valid = true;
|
||||
|
||||
EmitBeginBlock();
|
||||
m_fastmem_load_base_in_register = false;
|
||||
m_fastmem_store_base_in_register = false;
|
||||
|
||||
EmitBeginBlock(true);
|
||||
BlockPrologue();
|
||||
|
||||
const CodeBlockInstruction* cbi = m_block_start;
|
||||
while (cbi != m_block_end)
|
||||
m_current_instruction = m_block_start;
|
||||
while (m_current_instruction != m_block_end)
|
||||
{
|
||||
#ifdef _DEBUG
|
||||
SmallString disasm;
|
||||
DisassembleInstruction(&disasm, cbi->pc, cbi->instruction.bits);
|
||||
Log_DebugPrintf("Compiling instruction '%s'", disasm.GetCharArray());
|
||||
#endif
|
||||
|
||||
m_current_instruction = cbi;
|
||||
if (!CompileInstruction(*cbi))
|
||||
if (!CompileInstruction(*m_current_instruction))
|
||||
{
|
||||
m_current_instruction = nullptr;
|
||||
m_block_end = nullptr;
|
||||
|
@ -46,11 +43,14 @@ bool CodeGenerator::CompileBlock(CodeBlock* block, CodeBlock::HostCodePointer* o
|
|||
return false;
|
||||
}
|
||||
|
||||
cbi++;
|
||||
m_current_instruction++;
|
||||
}
|
||||
|
||||
BlockEpilogue();
|
||||
EmitEndBlock();
|
||||
if (!m_block_linked)
|
||||
{
|
||||
BlockEpilogue();
|
||||
EmitEndBlock(true, true);
|
||||
}
|
||||
|
||||
FinalizeBlock(out_host_code, out_host_code_size);
|
||||
Log_ProfilePrintf("JIT block 0x%08X: %zu instructions (%u bytes), %u host bytes", block->GetPC(),
|
||||
|
@ -957,6 +957,10 @@ void CodeGenerator::BlockPrologue()
|
|||
|
||||
EmitStoreCPUStructField(offsetof(State, exception_raised), Value::FromConstantU8(0));
|
||||
|
||||
#if 0
|
||||
EmitFunctionCall(nullptr, &Thunks::LogPC, Value::FromConstantU32(m_pc));
|
||||
#endif
|
||||
|
||||
if (m_block->uncached_fetch_ticks > 0 || m_block->icache_line_count > 0)
|
||||
EmitICacheCheckAndUpdate();
|
||||
|
||||
|
@ -2184,7 +2188,10 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
|
|||
{
|
||||
InstructionPrologue(cbi, 1);
|
||||
|
||||
auto DoBranch = [this](Condition condition, const Value& lhs, const Value& rhs, Reg lr_reg, Value&& branch_target) {
|
||||
auto DoBranch = [this, &cbi](Condition condition, const Value& lhs, const Value& rhs, Reg lr_reg,
|
||||
Value&& branch_target) {
|
||||
const bool can_link_block = cbi.is_direct_branch_instruction && g_settings.cpu_recompiler_block_linking;
|
||||
|
||||
// ensure the lr register is flushed, since we want it's correct value after the branch
|
||||
// we don't want to invalidate it yet because of "jalr r0, r0", branch_target could be the lr_reg.
|
||||
if (lr_reg != Reg::count && lr_reg != Reg::zero)
|
||||
|
@ -2199,16 +2206,58 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
|
|||
EmitCopyValue(next_pc.GetHostRegister(), CalculatePC(4));
|
||||
}
|
||||
|
||||
LabelType branch_not_taken;
|
||||
Value take_branch;
|
||||
LabelType branch_taken, branch_not_taken;
|
||||
if (condition != Condition::Always)
|
||||
{
|
||||
// condition is inverted because we want the case for skipping it
|
||||
if (lhs.IsValid() && rhs.IsValid())
|
||||
EmitConditionalBranch(condition, true, lhs.host_reg, rhs, &branch_not_taken);
|
||||
else if (lhs.IsValid())
|
||||
EmitConditionalBranch(condition, true, lhs.host_reg, lhs.size, &branch_not_taken);
|
||||
if (!can_link_block)
|
||||
{
|
||||
// condition is inverted because we want the case for skipping it
|
||||
if (lhs.IsValid() && rhs.IsValid())
|
||||
EmitConditionalBranch(condition, true, lhs.host_reg, rhs, &branch_not_taken);
|
||||
else if (lhs.IsValid())
|
||||
EmitConditionalBranch(condition, true, lhs.host_reg, lhs.size, &branch_not_taken);
|
||||
else
|
||||
EmitConditionalBranch(condition, true, &branch_not_taken);
|
||||
}
|
||||
else
|
||||
EmitConditionalBranch(condition, true, &branch_not_taken);
|
||||
{
|
||||
take_branch = m_register_cache.AllocateScratch(RegSize_32);
|
||||
switch (condition)
|
||||
{
|
||||
case Condition::NotEqual:
|
||||
case Condition::Equal:
|
||||
case Condition::Overflow:
|
||||
case Condition::Greater:
|
||||
case Condition::GreaterEqual:
|
||||
case Condition::LessEqual:
|
||||
case Condition::Less:
|
||||
case Condition::Above:
|
||||
case Condition::AboveEqual:
|
||||
case Condition::Below:
|
||||
case Condition::BelowEqual:
|
||||
{
|
||||
EmitCmp(lhs.GetHostRegister(), rhs);
|
||||
EmitSetConditionResult(take_branch.GetHostRegister(), take_branch.size, condition);
|
||||
}
|
||||
break;
|
||||
|
||||
case Condition::Negative:
|
||||
case Condition::PositiveOrZero:
|
||||
case Condition::NotZero:
|
||||
case Condition::Zero:
|
||||
{
|
||||
Assert(!rhs.IsValid() || (rhs.IsConstant() && rhs.GetS64ConstantValue() == 0));
|
||||
EmitTest(lhs.GetHostRegister(), lhs);
|
||||
EmitSetConditionResult(take_branch.GetHostRegister(), take_branch.size, condition);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
UnreachableCode();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// save the old PC if we want to
|
||||
|
@ -2218,6 +2267,9 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
|
|||
// if we don't cancel it, at the end of the instruction the value we write can be overridden.
|
||||
EmitCancelInterpreterLoadDelayForReg(lr_reg);
|
||||
EmitStoreGuestRegister(lr_reg, next_pc);
|
||||
|
||||
// now invalidate lr because it was possibly written in the branch
|
||||
m_register_cache.InvalidateGuestRegister(lr_reg);
|
||||
}
|
||||
|
||||
// we don't need to test the address of constant branches unless they're definitely misaligned, which would be
|
||||
|
@ -2256,24 +2308,125 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
|
|||
m_register_cache.PopState();
|
||||
}
|
||||
|
||||
if (condition != Condition::Always)
|
||||
if (can_link_block)
|
||||
{
|
||||
// branch taken path - modify the next pc
|
||||
EmitCopyValue(next_pc.GetHostRegister(), branch_target);
|
||||
// if it's an in-block branch, compile the delay slot now
|
||||
// TODO: Make this more optimal by moving the condition down if it's a nop
|
||||
Assert((m_current_instruction + 1) != m_block_end);
|
||||
InstructionEpilogue(cbi);
|
||||
m_current_instruction++;
|
||||
if (!CompileInstruction(*m_current_instruction))
|
||||
return false;
|
||||
|
||||
// converge point
|
||||
EmitBindLabel(&branch_not_taken);
|
||||
WriteNewPC(next_pc, true);
|
||||
// flush all regs since we're at the end of the block now
|
||||
BlockEpilogue();
|
||||
m_block_linked = true;
|
||||
|
||||
// check downcount
|
||||
Value pending_ticks = m_register_cache.AllocateScratch(RegSize_32);
|
||||
Value downcount = m_register_cache.AllocateScratch(RegSize_32);
|
||||
EmitLoadCPUStructField(pending_ticks.GetHostRegister(), RegSize_32, offsetof(State, pending_ticks));
|
||||
EmitLoadCPUStructField(downcount.GetHostRegister(), RegSize_32, offsetof(State, downcount));
|
||||
|
||||
// pending < downcount
|
||||
LabelType return_to_dispatcher;
|
||||
|
||||
if (condition != Condition::Always)
|
||||
{
|
||||
EmitBranchIfBitClear(take_branch.GetHostRegister(), take_branch.size, 0, &branch_not_taken);
|
||||
m_register_cache.PushState();
|
||||
{
|
||||
WriteNewPC(branch_target, false);
|
||||
EmitConditionalBranch(Condition::GreaterEqual, false, pending_ticks.GetHostRegister(), downcount,
|
||||
&return_to_dispatcher);
|
||||
|
||||
// we're committed at this point :D
|
||||
EmitStoreCPUStructField(offsetof(State, current_instruction_pc), branch_target);
|
||||
EmitEndBlock(true, false);
|
||||
|
||||
const void* jump_pointer = GetCurrentCodePointer();
|
||||
const void* resolve_pointer = GetCurrentFarCodePointer();
|
||||
EmitBranch(resolve_pointer);
|
||||
const u32 jump_size = static_cast<u32>(static_cast<const char*>(GetCurrentCodePointer()) -
|
||||
static_cast<const char*>(jump_pointer));
|
||||
SwitchToFarCode();
|
||||
|
||||
EmitBeginBlock(true);
|
||||
EmitFunctionCall(nullptr, &CPU::Recompiler::Thunks::ResolveBranch, Value::FromConstantPtr(m_block),
|
||||
Value::FromConstantPtr(jump_pointer), Value::FromConstantPtr(resolve_pointer),
|
||||
Value::FromConstantU32(jump_size));
|
||||
EmitEndBlock(true, true);
|
||||
}
|
||||
m_register_cache.PopState();
|
||||
|
||||
SwitchToNearCode();
|
||||
EmitBindLabel(&branch_not_taken);
|
||||
}
|
||||
|
||||
m_register_cache.PushState();
|
||||
|
||||
if (condition != Condition::Always)
|
||||
{
|
||||
WriteNewPC(next_pc, true);
|
||||
EmitStoreCPUStructField(offsetof(State, current_instruction_pc), next_pc);
|
||||
}
|
||||
else
|
||||
{
|
||||
WriteNewPC(branch_target, true);
|
||||
EmitStoreCPUStructField(offsetof(State, current_instruction_pc), branch_target);
|
||||
}
|
||||
|
||||
EmitConditionalBranch(Condition::GreaterEqual, false, pending_ticks.GetHostRegister(), downcount,
|
||||
&return_to_dispatcher);
|
||||
|
||||
if (condition != Condition::Always)
|
||||
EmitStoreCPUStructField(offsetof(State, current_instruction_pc), next_pc);
|
||||
else
|
||||
EmitStoreCPUStructField(offsetof(State, current_instruction_pc), branch_target);
|
||||
|
||||
EmitEndBlock(true, false);
|
||||
|
||||
const void* jump_pointer = GetCurrentCodePointer();
|
||||
const void* resolve_pointer = GetCurrentFarCodePointer();
|
||||
EmitBranch(GetCurrentFarCodePointer());
|
||||
const u32 jump_size =
|
||||
static_cast<u32>(static_cast<const char*>(GetCurrentCodePointer()) - static_cast<const char*>(jump_pointer));
|
||||
SwitchToFarCode();
|
||||
|
||||
EmitBeginBlock(true);
|
||||
EmitFunctionCall(nullptr, &CPU::Recompiler::Thunks::ResolveBranch, Value::FromConstantPtr(m_block),
|
||||
Value::FromConstantPtr(jump_pointer), Value::FromConstantPtr(resolve_pointer),
|
||||
Value::FromConstantU32(jump_size));
|
||||
EmitEndBlock(true, true);
|
||||
|
||||
m_register_cache.PopState();
|
||||
|
||||
SwitchToNearCode();
|
||||
EmitBindLabel(&return_to_dispatcher);
|
||||
EmitEndBlock(true, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
// next_pc is not used for unconditional branches
|
||||
WriteNewPC(branch_target, true);
|
||||
if (condition != Condition::Always)
|
||||
{
|
||||
// branch taken path - modify the next pc
|
||||
EmitBindLabel(&branch_taken);
|
||||
EmitCopyValue(next_pc.GetHostRegister(), branch_target);
|
||||
|
||||
// converge point
|
||||
EmitBindLabel(&branch_not_taken);
|
||||
WriteNewPC(next_pc, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
// next_pc is not used for unconditional branches
|
||||
WriteNewPC(branch_target, true);
|
||||
}
|
||||
|
||||
InstructionEpilogue(cbi);
|
||||
}
|
||||
|
||||
// now invalidate lr becuase it was possibly written in the branch
|
||||
if (lr_reg != Reg::count && lr_reg != Reg::zero)
|
||||
m_register_cache.InvalidateGuestRegister(lr_reg);
|
||||
return true;
|
||||
};
|
||||
|
||||
// Compute the branch target.
|
||||
|
@ -2287,10 +2440,9 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
|
|||
Value branch_target = OrValues(AndValues(CalculatePC(), Value::FromConstantU32(0xF0000000)),
|
||||
Value::FromConstantU32(cbi.instruction.j.target << 2));
|
||||
|
||||
DoBranch(Condition::Always, Value(), Value(), (cbi.instruction.op == InstructionOp::jal) ? Reg::ra : Reg::count,
|
||||
std::move(branch_target));
|
||||
return DoBranch(Condition::Always, Value(), Value(),
|
||||
(cbi.instruction.op == InstructionOp::jal) ? Reg::ra : Reg::count, std::move(branch_target));
|
||||
}
|
||||
break;
|
||||
|
||||
case InstructionOp::funct:
|
||||
{
|
||||
|
@ -2298,9 +2450,9 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
|
|||
{
|
||||
// npc = rs, link to rt
|
||||
Value branch_target = m_register_cache.ReadGuestRegister(cbi.instruction.r.rs);
|
||||
DoBranch(Condition::Always, Value(), Value(),
|
||||
(cbi.instruction.r.funct == InstructionFunct::jalr) ? cbi.instruction.r.rd : Reg::count,
|
||||
std::move(branch_target));
|
||||
return DoBranch(Condition::Always, Value(), Value(),
|
||||
(cbi.instruction.r.funct == InstructionFunct::jalr) ? cbi.instruction.r.rd : Reg::count,
|
||||
std::move(branch_target));
|
||||
}
|
||||
else if (cbi.instruction.r.funct == InstructionFunct::syscall ||
|
||||
cbi.instruction.r.funct == InstructionFunct::break_)
|
||||
|
@ -2308,13 +2460,15 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
|
|||
const Exception excode =
|
||||
(cbi.instruction.r.funct == InstructionFunct::syscall) ? Exception::Syscall : Exception::BP;
|
||||
GenerateExceptionExit(cbi, excode);
|
||||
InstructionEpilogue(cbi);
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
UnreachableCode();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case InstructionOp::beq:
|
||||
case InstructionOp::bne:
|
||||
|
@ -2326,7 +2480,7 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
|
|||
if (cbi.instruction.op == InstructionOp::beq && cbi.instruction.i.rs == Reg::zero &&
|
||||
cbi.instruction.i.rt == Reg::zero)
|
||||
{
|
||||
DoBranch(Condition::Always, Value(), Value(), Reg::count, std::move(branch_target));
|
||||
return DoBranch(Condition::Always, Value(), Value(), Reg::count, std::move(branch_target));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -2334,10 +2488,9 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
|
|||
Value lhs = m_register_cache.ReadGuestRegister(cbi.instruction.i.rs, true, true);
|
||||
Value rhs = m_register_cache.ReadGuestRegister(cbi.instruction.i.rt);
|
||||
const Condition condition = (cbi.instruction.op == InstructionOp::beq) ? Condition::Equal : Condition::NotEqual;
|
||||
DoBranch(condition, lhs, rhs, Reg::count, std::move(branch_target));
|
||||
return DoBranch(condition, lhs, rhs, Reg::count, std::move(branch_target));
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case InstructionOp::bgtz:
|
||||
case InstructionOp::blez:
|
||||
|
@ -2350,9 +2503,8 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
|
|||
|
||||
const Condition condition =
|
||||
(cbi.instruction.op == InstructionOp::bgtz) ? Condition::Greater : Condition::LessEqual;
|
||||
DoBranch(condition, lhs, Value::FromConstantU32(0), Reg::count, std::move(branch_target));
|
||||
return DoBranch(condition, lhs, Value::FromConstantU32(0), Reg::count, std::move(branch_target));
|
||||
}
|
||||
break;
|
||||
|
||||
case InstructionOp::b:
|
||||
{
|
||||
|
@ -2378,17 +2530,13 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
|
|||
m_register_cache.WriteGuestRegister(Reg::ra, CalculatePC(4));
|
||||
}
|
||||
|
||||
DoBranch(condition, lhs, Value(), Reg::count, std::move(branch_target));
|
||||
return DoBranch(condition, lhs, Value(), Reg::count, std::move(branch_target));
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
UnreachableCode();
|
||||
break;
|
||||
return false;
|
||||
}
|
||||
|
||||
InstructionEpilogue(cbi);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CodeGenerator::Compile_lui(const CodeBlockInstruction& cbi)
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
#include <array>
|
||||
#include <initializer_list>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "common/jit_code_buffer.h"
|
||||
|
||||
|
@ -25,6 +26,8 @@ public:
|
|||
static void AlignCodeBuffer(JitCodeBuffer* code_buffer);
|
||||
|
||||
static bool BackpatchLoadStore(const LoadStoreBackpatchInfo& lbi);
|
||||
static void BackpatchBranch(void* pc, u32 pc_size, void* target);
|
||||
static void BackpatchReturn(void* pc, u32 pc_size);
|
||||
|
||||
bool CompileBlock(CodeBlock* block, CodeBlock::HostCodePointer* out_host_code, u32* out_host_code_size);
|
||||
|
||||
|
@ -34,8 +37,8 @@ public:
|
|||
//////////////////////////////////////////////////////////////////////////
|
||||
// Code Generation
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
void EmitBeginBlock();
|
||||
void EmitEndBlock();
|
||||
void EmitBeginBlock(bool allocate_registers = true);
|
||||
void EmitEndBlock(bool free_registers = true, bool emit_return = true);
|
||||
void EmitExceptionExit();
|
||||
void EmitExceptionExitOnBool(const Value& value);
|
||||
void FinalizeBlock(CodeBlock::HostCodePointer* out_host_code, u32* out_host_code_size);
|
||||
|
@ -105,6 +108,7 @@ public:
|
|||
void EmitConditionalBranch(Condition condition, bool invert, HostReg lhs, const Value& rhs, LabelType* label);
|
||||
void EmitConditionalBranch(Condition condition, bool invert, LabelType* label);
|
||||
void EmitBranchIfBitClear(HostReg reg, RegSize size, u8 bit, LabelType* label);
|
||||
void EmitBranchIfBitSet(HostReg reg, RegSize size, u8 bit, LabelType* label);
|
||||
void EmitBindLabel(LabelType* label);
|
||||
|
||||
u32 PrepareStackForCall();
|
||||
|
@ -250,6 +254,7 @@ private:
|
|||
|
||||
u32 m_pc = 0;
|
||||
bool m_pc_valid = false;
|
||||
bool m_block_linked = false;
|
||||
|
||||
// whether various flags need to be reset.
|
||||
bool m_current_instruction_in_branch_delay_slot_dirty = false;
|
||||
|
|
|
@ -166,31 +166,42 @@ Value CodeGenerator::GetValueInHostOrScratchRegister(const Value& value, bool al
|
|||
return new_value;
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitBeginBlock()
|
||||
void CodeGenerator::EmitBeginBlock(bool allocate_registers /* = true */)
|
||||
{
|
||||
m_emit->sub(a32::sp, a32::sp, FUNCTION_STACK_SIZE);
|
||||
|
||||
// Save the link register, since we'll be calling functions.
|
||||
const bool link_reg_allocated = m_register_cache.AllocateHostReg(14);
|
||||
DebugAssert(link_reg_allocated);
|
||||
UNREFERENCED_VARIABLE(link_reg_allocated);
|
||||
m_register_cache.AssumeCalleeSavedRegistersAreSaved();
|
||||
if (allocate_registers)
|
||||
{
|
||||
// Save the link register, since we'll be calling functions.
|
||||
const bool link_reg_allocated = m_register_cache.AllocateHostReg(14);
|
||||
DebugAssert(link_reg_allocated);
|
||||
UNREFERENCED_VARIABLE(link_reg_allocated);
|
||||
m_register_cache.AssumeCalleeSavedRegistersAreSaved();
|
||||
|
||||
// Store the CPU struct pointer. TODO: make this better.
|
||||
const bool cpu_reg_allocated = m_register_cache.AllocateHostReg(RCPUPTR);
|
||||
// m_emit->Mov(GetCPUPtrReg(), reinterpret_cast<uintptr_t>(&g_state));
|
||||
DebugAssert(cpu_reg_allocated);
|
||||
UNREFERENCED_VARIABLE(cpu_reg_allocated);
|
||||
// Store the CPU struct pointer. TODO: make this better.
|
||||
const bool cpu_reg_allocated = m_register_cache.AllocateHostReg(RCPUPTR);
|
||||
// m_emit->Mov(GetCPUPtrReg(), reinterpret_cast<uintptr_t>(&g_state));
|
||||
DebugAssert(cpu_reg_allocated);
|
||||
UNREFERENCED_VARIABLE(cpu_reg_allocated);
|
||||
}
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitEndBlock()
|
||||
void CodeGenerator::EmitEndBlock(bool free_registers /* = true */, bool emit_return /* = true */)
|
||||
{
|
||||
m_register_cache.FreeHostReg(RCPUPTR);
|
||||
m_register_cache.PopCalleeSavedRegisters(true);
|
||||
if (free_registers)
|
||||
{
|
||||
m_register_cache.FreeHostReg(RCPUPTR);
|
||||
m_register_cache.FreeHostReg(14);
|
||||
m_register_cache.PopCalleeSavedRegisters(true);
|
||||
}
|
||||
|
||||
m_emit->add(a32::sp, a32::sp, FUNCTION_STACK_SIZE);
|
||||
// m_emit->b(GetPCDisplacement(GetCurrentCodePointer(), s_dispatcher_return_address));
|
||||
m_emit->bx(a32::lr);
|
||||
|
||||
if (emit_return)
|
||||
{
|
||||
// m_emit->b(GetPCDisplacement(GetCurrentCodePointer(), s_dispatcher_return_address));
|
||||
m_emit->bx(a32::lr);
|
||||
}
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitExceptionExit()
|
||||
|
@ -1572,6 +1583,49 @@ bool CodeGenerator::BackpatchLoadStore(const LoadStoreBackpatchInfo& lbi)
|
|||
return true;
|
||||
}
|
||||
|
||||
void CodeGenerator::BackpatchReturn(void* pc, u32 pc_size)
|
||||
{
|
||||
Log_ProfilePrintf("Backpatching %p to return", pc);
|
||||
|
||||
vixl::aarch32::MacroAssembler emit(static_cast<vixl::byte*>(pc), pc_size, a32::A32);
|
||||
emit.bx(a32::lr);
|
||||
|
||||
const s32 nops = (static_cast<s32>(pc_size) - static_cast<s32>(emit.GetCursorOffset())) / 4;
|
||||
Assert(nops >= 0);
|
||||
for (s32 i = 0; i < nops; i++)
|
||||
emit.nop();
|
||||
|
||||
JitCodeBuffer::FlushInstructionCache(pc, pc_size);
|
||||
}
|
||||
|
||||
void CodeGenerator::BackpatchBranch(void* pc, u32 pc_size, void* target)
|
||||
{
|
||||
Log_ProfilePrintf("Backpatching %p to %p [branch]", pc, target);
|
||||
|
||||
vixl::aarch32::MacroAssembler emit(static_cast<vixl::byte*>(pc), pc_size, a32::A32);
|
||||
|
||||
// check jump distance
|
||||
const s32 displacement = GetPCDisplacement(pc, target);
|
||||
if (!IsPCDisplacementInImmediateRange(displacement))
|
||||
{
|
||||
emit.Mov(GetHostReg32(RSCRATCH), reinterpret_cast<uintptr_t>(target));
|
||||
emit.bx(GetHostReg32(RSCRATCH));
|
||||
}
|
||||
else
|
||||
{
|
||||
a32::Label label(displacement + emit.GetCursorOffset());
|
||||
emit.b(&label);
|
||||
}
|
||||
|
||||
// shouldn't have any nops
|
||||
const s32 nops = (static_cast<s32>(pc_size) - static_cast<s32>(emit.GetCursorOffset())) / 4;
|
||||
Assert(nops >= 0);
|
||||
for (s32 i = 0; i < nops; i++)
|
||||
emit.nop();
|
||||
|
||||
JitCodeBuffer::FlushInstructionCache(pc, pc_size);
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr)
|
||||
{
|
||||
EmitLoadGlobalAddress(RSCRATCH, ptr);
|
||||
|
|
|
@ -194,41 +194,51 @@ Value CodeGenerator::GetValueInHostOrScratchRegister(const Value& value, bool al
|
|||
return new_value;
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitBeginBlock()
|
||||
void CodeGenerator::EmitBeginBlock(bool allocate_registers /* = true */)
|
||||
{
|
||||
m_emit->Sub(a64::sp, a64::sp, FUNCTION_STACK_SIZE);
|
||||
|
||||
// Save the link register, since we'll be calling functions.
|
||||
const bool link_reg_allocated = m_register_cache.AllocateHostReg(30);
|
||||
DebugAssert(link_reg_allocated);
|
||||
UNREFERENCED_VARIABLE(link_reg_allocated);
|
||||
m_register_cache.AssumeCalleeSavedRegistersAreSaved();
|
||||
|
||||
// Store the CPU struct pointer. TODO: make this better.
|
||||
const bool cpu_reg_allocated = m_register_cache.AllocateHostReg(RCPUPTR);
|
||||
DebugAssert(cpu_reg_allocated);
|
||||
UNREFERENCED_VARIABLE(cpu_reg_allocated);
|
||||
|
||||
// If there's loadstore instructions, preload the fastmem base.
|
||||
if (m_block->contains_loadstore_instructions)
|
||||
if (allocate_registers)
|
||||
{
|
||||
const bool fastmem_reg_allocated = m_register_cache.AllocateHostReg(RMEMBASEPTR);
|
||||
Assert(fastmem_reg_allocated);
|
||||
m_emit->Ldr(GetFastmemBasePtrReg(), a64::MemOperand(GetCPUPtrReg(), offsetof(State, fastmem_base)));
|
||||
// Save the link register, since we'll be calling functions.
|
||||
const bool link_reg_allocated = m_register_cache.AllocateHostReg(30);
|
||||
DebugAssert(link_reg_allocated);
|
||||
UNREFERENCED_VARIABLE(link_reg_allocated);
|
||||
|
||||
m_register_cache.AssumeCalleeSavedRegistersAreSaved();
|
||||
|
||||
// Store the CPU struct pointer. TODO: make this better.
|
||||
const bool cpu_reg_allocated = m_register_cache.AllocateHostReg(RCPUPTR);
|
||||
DebugAssert(cpu_reg_allocated);
|
||||
UNREFERENCED_VARIABLE(cpu_reg_allocated);
|
||||
|
||||
// If there's loadstore instructions, preload the fastmem base.
|
||||
if (m_block->contains_loadstore_instructions)
|
||||
{
|
||||
const bool fastmem_reg_allocated = m_register_cache.AllocateHostReg(RMEMBASEPTR);
|
||||
Assert(fastmem_reg_allocated);
|
||||
m_emit->Ldr(GetFastmemBasePtrReg(), a64::MemOperand(GetCPUPtrReg(), offsetof(State, fastmem_base)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitEndBlock()
|
||||
void CodeGenerator::EmitEndBlock(bool free_registers /* = true */, bool emit_return /* = true */)
|
||||
{
|
||||
if (m_block->contains_loadstore_instructions)
|
||||
m_register_cache.FreeHostReg(RMEMBASEPTR);
|
||||
if (free_registers)
|
||||
{
|
||||
if (m_block->contains_loadstore_instructions)
|
||||
m_register_cache.FreeHostReg(RMEMBASEPTR);
|
||||
|
||||
m_register_cache.FreeHostReg(RCPUPTR);
|
||||
m_register_cache.PopCalleeSavedRegisters(true);
|
||||
m_register_cache.FreeHostReg(RCPUPTR);
|
||||
m_register_cache.FreeHostReg(30); // lr
|
||||
|
||||
m_register_cache.PopCalleeSavedRegisters(true);
|
||||
}
|
||||
|
||||
m_emit->Add(a64::sp, a64::sp, FUNCTION_STACK_SIZE);
|
||||
// m_emit->b(GetPCDisplacement(GetCurrentCodePointer(), s_dispatcher_return_address));
|
||||
m_emit->Ret();
|
||||
|
||||
if (emit_return)
|
||||
m_emit->Ret();
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitExceptionExit()
|
||||
|
@ -1767,6 +1777,42 @@ bool CodeGenerator::BackpatchLoadStore(const LoadStoreBackpatchInfo& lbi)
|
|||
return true;
|
||||
}
|
||||
|
||||
void CodeGenerator::BackpatchReturn(void* pc, u32 pc_size)
|
||||
{
|
||||
Log_ProfilePrintf("Backpatching %p to return", pc);
|
||||
|
||||
vixl::aarch64::MacroAssembler emit(static_cast<vixl::byte*>(pc), pc_size, a64::PositionDependentCode);
|
||||
emit.ret();
|
||||
|
||||
const s32 nops = (static_cast<s32>(pc_size) - static_cast<s32>(emit.GetCursorOffset())) / 4;
|
||||
Assert(nops >= 0);
|
||||
for (s32 i = 0; i < nops; i++)
|
||||
emit.nop();
|
||||
|
||||
JitCodeBuffer::FlushInstructionCache(pc, pc_size);
|
||||
}
|
||||
|
||||
void CodeGenerator::BackpatchBranch(void* pc, u32 pc_size, void* target)
|
||||
{
|
||||
Log_ProfilePrintf("Backpatching %p to %p [branch]", pc, target);
|
||||
|
||||
// check jump distance
|
||||
const s64 jump_distance = static_cast<s64>(reinterpret_cast<intptr_t>(target) - reinterpret_cast<intptr_t>(pc));
|
||||
Assert(Common::IsAligned(jump_distance, 4));
|
||||
Assert(a64::Instruction::IsValidImmPCOffset(a64::UncondBranchType, jump_distance >> 2));
|
||||
|
||||
vixl::aarch64::MacroAssembler emit(static_cast<vixl::byte*>(pc), pc_size, a64::PositionDependentCode);
|
||||
emit.b(jump_distance >> 2);
|
||||
|
||||
// shouldn't have any nops
|
||||
const s32 nops = (static_cast<s32>(pc_size) - static_cast<s32>(emit.GetCursorOffset())) / 4;
|
||||
Assert(nops >= 0);
|
||||
for (s32 i = 0; i < nops; i++)
|
||||
emit.nop();
|
||||
|
||||
JitCodeBuffer::FlushInstructionCache(pc, pc_size);
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr)
|
||||
{
|
||||
EmitLoadGlobalAddress(RSCRATCH, ptr);
|
||||
|
|
|
@ -206,35 +206,42 @@ Value CodeGenerator::GetValueInHostOrScratchRegister(const Value& value, bool al
|
|||
return new_value;
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitBeginBlock()
|
||||
void CodeGenerator::EmitBeginBlock(bool allocate_registers /* = true */)
|
||||
{
|
||||
m_register_cache.AssumeCalleeSavedRegistersAreSaved();
|
||||
|
||||
// Store the CPU struct pointer.
|
||||
const bool cpu_reg_allocated = m_register_cache.AllocateHostReg(RCPUPTR);
|
||||
DebugAssert(cpu_reg_allocated);
|
||||
UNREFERENCED_VARIABLE(cpu_reg_allocated);
|
||||
// m_emit->mov(GetCPUPtrReg(), reinterpret_cast<size_t>(&g_state));
|
||||
|
||||
// If there's loadstore instructions, preload the fastmem base.
|
||||
if (m_block->contains_loadstore_instructions)
|
||||
if (allocate_registers)
|
||||
{
|
||||
const bool fastmem_reg_allocated = m_register_cache.AllocateHostReg(RMEMBASEPTR);
|
||||
Assert(fastmem_reg_allocated);
|
||||
UNREFERENCED_VARIABLE(fastmem_reg_allocated);
|
||||
m_emit->mov(GetFastmemBasePtrReg(), m_emit->qword[GetCPUPtrReg() + offsetof(CPU::State, fastmem_base)]);
|
||||
m_register_cache.AssumeCalleeSavedRegistersAreSaved();
|
||||
|
||||
// Store the CPU struct pointer.
|
||||
const bool cpu_reg_allocated = m_register_cache.AllocateHostReg(RCPUPTR);
|
||||
DebugAssert(cpu_reg_allocated);
|
||||
UNREFERENCED_VARIABLE(cpu_reg_allocated);
|
||||
// m_emit->mov(GetCPUPtrReg(), reinterpret_cast<size_t>(&g_state));
|
||||
|
||||
// If there's loadstore instructions, preload the fastmem base.
|
||||
if (m_block->contains_loadstore_instructions)
|
||||
{
|
||||
const bool fastmem_reg_allocated = m_register_cache.AllocateHostReg(RMEMBASEPTR);
|
||||
DebugAssert(fastmem_reg_allocated);
|
||||
UNREFERENCED_VARIABLE(fastmem_reg_allocated);
|
||||
m_emit->mov(GetFastmemBasePtrReg(), m_emit->qword[GetCPUPtrReg() + offsetof(CPU::State, fastmem_base)]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitEndBlock()
|
||||
void CodeGenerator::EmitEndBlock(bool free_registers /* = true */, bool emit_return /* = true */)
|
||||
{
|
||||
m_register_cache.FreeHostReg(RCPUPTR);
|
||||
if (m_block->contains_loadstore_instructions)
|
||||
m_register_cache.FreeHostReg(RMEMBASEPTR);
|
||||
if (free_registers)
|
||||
{
|
||||
m_register_cache.FreeHostReg(RCPUPTR);
|
||||
if (m_block->contains_loadstore_instructions)
|
||||
m_register_cache.FreeHostReg(RMEMBASEPTR);
|
||||
|
||||
m_register_cache.PopCalleeSavedRegisters(true);
|
||||
m_register_cache.PopCalleeSavedRegisters(true);
|
||||
}
|
||||
|
||||
m_emit->ret();
|
||||
if (emit_return)
|
||||
m_emit->ret();
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitExceptionExit()
|
||||
|
@ -2336,7 +2343,7 @@ void CodeGenerator::EmitUpdateFastmemBase()
|
|||
|
||||
bool CodeGenerator::BackpatchLoadStore(const LoadStoreBackpatchInfo& lbi)
|
||||
{
|
||||
Log_DevPrintf("Backpatching %p (guest PC 0x%08X) to slowmem", lbi.host_pc, lbi.guest_pc);
|
||||
Log_ProfilePrintf("Backpatching %p (guest PC 0x%08X) to slowmem", lbi.host_pc, lbi.guest_pc);
|
||||
|
||||
// turn it into a jump to the slowmem handler
|
||||
Xbyak::CodeGenerator cg(lbi.host_code_size, lbi.host_pc);
|
||||
|
@ -2352,6 +2359,39 @@ bool CodeGenerator::BackpatchLoadStore(const LoadStoreBackpatchInfo& lbi)
|
|||
return true;
|
||||
}
|
||||
|
||||
void CodeGenerator::BackpatchReturn(void* pc, u32 pc_size)
|
||||
{
|
||||
Log_ProfilePrintf("Backpatching %p to return", pc);
|
||||
|
||||
Xbyak::CodeGenerator cg(pc_size, pc);
|
||||
cg.ret();
|
||||
|
||||
const s32 nops =
|
||||
static_cast<s32>(pc_size) - static_cast<s32>(static_cast<ptrdiff_t>(cg.getCurr() - static_cast<u8*>(pc)));
|
||||
Assert(nops >= 0);
|
||||
for (s32 i = 0; i < nops; i++)
|
||||
cg.nop();
|
||||
|
||||
JitCodeBuffer::FlushInstructionCache(pc, pc_size);
|
||||
}
|
||||
|
||||
void CodeGenerator::BackpatchBranch(void* pc, u32 pc_size, void* target)
|
||||
{
|
||||
Log_ProfilePrintf("Backpatching %p to %p [branch]", pc, target);
|
||||
|
||||
Xbyak::CodeGenerator cg(pc_size, pc);
|
||||
cg.jmp(target);
|
||||
|
||||
// shouldn't have any nops
|
||||
const s32 nops =
|
||||
static_cast<s32>(pc_size) - static_cast<s32>(static_cast<ptrdiff_t>(cg.getCurr() - static_cast<u8*>(pc)));
|
||||
Assert(nops >= 0);
|
||||
for (s32 i = 0; i < nops; i++)
|
||||
cg.nop();
|
||||
|
||||
JitCodeBuffer::FlushInstructionCache(pc, pc_size);
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr)
|
||||
{
|
||||
const s64 displacement =
|
||||
|
@ -2851,6 +2891,59 @@ void CodeGenerator::EmitConditionalBranch(Condition condition, bool invert, Labe
|
|||
}
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitBranchIfBitSet(HostReg reg, RegSize size, u8 bit, LabelType* label)
|
||||
{
|
||||
if (bit < 8)
|
||||
{
|
||||
// same size, probably faster
|
||||
switch (size)
|
||||
{
|
||||
case RegSize_8:
|
||||
m_emit->test(GetHostReg8(reg), (1u << bit));
|
||||
m_emit->jnz(*label);
|
||||
break;
|
||||
|
||||
case RegSize_16:
|
||||
m_emit->test(GetHostReg16(reg), (1u << bit));
|
||||
m_emit->jnz(*label);
|
||||
break;
|
||||
|
||||
case RegSize_32:
|
||||
m_emit->test(GetHostReg32(reg), (1u << bit));
|
||||
m_emit->jnz(*label);
|
||||
break;
|
||||
|
||||
default:
|
||||
UnreachableCode();
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
switch (size)
|
||||
{
|
||||
case RegSize_8:
|
||||
m_emit->bt(GetHostReg8(reg), bit);
|
||||
m_emit->jc(*label);
|
||||
break;
|
||||
|
||||
case RegSize_16:
|
||||
m_emit->bt(GetHostReg16(reg), bit);
|
||||
m_emit->jc(*label);
|
||||
break;
|
||||
|
||||
case RegSize_32:
|
||||
m_emit->bt(GetHostReg32(reg), bit);
|
||||
m_emit->jc(*label);
|
||||
break;
|
||||
|
||||
default:
|
||||
UnreachableCode();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitBranchIfBitClear(HostReg reg, RegSize size, u8 bit, LabelType* label)
|
||||
{
|
||||
if (bit < 8)
|
||||
|
|
|
@ -280,6 +280,21 @@ Value RegisterCache::AllocateScratch(RegSize size, HostReg reg /* = HostReg_Inva
|
|||
return Value::FromScratch(this, reg, size);
|
||||
}
|
||||
|
||||
void RegisterCache::ReserveCallerSavedRegisters()
|
||||
{
|
||||
for (u32 reg = 0; reg < HostReg_Count; reg++)
|
||||
{
|
||||
if ((m_state.host_reg_state[reg] & (HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated)) ==
|
||||
HostRegState::CalleeSaved)
|
||||
{
|
||||
DebugAssert(m_state.callee_saved_order_count < HostReg_Count);
|
||||
m_code_generator.EmitPushHostReg(static_cast<HostReg>(reg), GetActiveCalleeSavedRegisterCount());
|
||||
m_state.callee_saved_order[m_state.callee_saved_order_count++] = static_cast<HostReg>(reg);
|
||||
m_state.host_reg_state[reg] |= HostRegState::CalleeSavedAllocated;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
u32 RegisterCache::PushCallerSavedRegisters() const
|
||||
{
|
||||
u32 position = GetActiveCalleeSavedRegisterCount();
|
||||
|
|
|
@ -193,6 +193,16 @@ struct Value
|
|||
static Value FromConstantU32(u32 value) { return FromConstant(ZeroExtend64(value), RegSize_32); }
|
||||
static Value FromConstantS32(s32 value) { return FromConstant(ZeroExtend64(static_cast<u32>(value)), RegSize_32); }
|
||||
static Value FromConstantU64(u64 value) { return FromConstant(value, RegSize_64); }
|
||||
static Value FromConstantPtr(const void* pointer)
|
||||
{
|
||||
#if defined(CPU_AARCH64) || defined(CPU_X64)
|
||||
return FromConstant(static_cast<u64>(reinterpret_cast<uintptr_t>(pointer)), RegSize_64);
|
||||
#elif defined(CPU_AARCH32)
|
||||
return FromConstant(static_cast<u32>(reinterpret_cast<uintptr_t>(pointer)), RegSize_32);
|
||||
#else
|
||||
return FromConstant(0, RegSize_32);
|
||||
#endif
|
||||
}
|
||||
|
||||
private:
|
||||
void Release();
|
||||
|
@ -241,6 +251,9 @@ public:
|
|||
/// Ensures a host register is free, removing any value cached.
|
||||
void EnsureHostRegFree(HostReg reg);
|
||||
|
||||
/// Preallocates caller saved registers, enabling later use without stack pushes.
|
||||
void ReserveCallerSavedRegisters();
|
||||
|
||||
/// Push/pop volatile host registers. Returns the number of registers pushed/popped.
|
||||
u32 PushCallerSavedRegisters() const;
|
||||
u32 PopCallerSavedRegisters() const;
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
#include "cpu_types.h"
|
||||
|
||||
namespace CPU {
|
||||
struct CodeBlock;
|
||||
struct CodeBlockInstruction;
|
||||
|
||||
namespace Recompiler::Thunks {
|
||||
|
@ -32,6 +33,9 @@ void UncheckedWriteMemoryByte(u32 address, u32 value);
|
|||
void UncheckedWriteMemoryHalfWord(u32 address, u32 value);
|
||||
void UncheckedWriteMemoryWord(u32 address, u32 value);
|
||||
|
||||
void ResolveBranch(CodeBlock* block, void* host_pc, void* host_resolve_pc, u32 host_pc_size);
|
||||
void LogPC(u32 pc);
|
||||
|
||||
} // namespace Recompiler::Thunks
|
||||
|
||||
} // namespace CPU
|
||||
|
|
|
@ -104,24 +104,25 @@ bool IsDirectBranchInstruction(const Instruction& instruction)
|
|||
}
|
||||
}
|
||||
|
||||
u32 GetBranchInstructionTarget(const Instruction& instruction, u32 instruction_pc)
|
||||
VirtualMemoryAddress GetDirectBranchTarget(const Instruction& instruction, VirtualMemoryAddress instruction_pc)
|
||||
{
|
||||
const VirtualMemoryAddress pc = instruction_pc + 4;
|
||||
|
||||
switch (instruction.op)
|
||||
{
|
||||
case InstructionOp::j:
|
||||
case InstructionOp::jal:
|
||||
return ((instruction_pc + 4) & UINT32_C(0xF0000000)) | (instruction.j.target << 2);
|
||||
return (pc & UINT32_C(0xF0000000)) | (instruction.j.target << 2);
|
||||
|
||||
case InstructionOp::b:
|
||||
case InstructionOp::beq:
|
||||
case InstructionOp::bgtz:
|
||||
case InstructionOp::blez:
|
||||
case InstructionOp::bne:
|
||||
return instruction_pc + 4 + (instruction.i.imm_sext32() << 2);
|
||||
return (pc + (instruction.i.imm_sext32() << 2));
|
||||
|
||||
default:
|
||||
Panic("Trying to get branch target of indirect or invalid branch");
|
||||
return instruction_pc;
|
||||
return pc;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -223,7 +223,7 @@ bool IsNopInstruction(const Instruction& instruction);
|
|||
bool IsBranchInstruction(const Instruction& instruction);
|
||||
bool IsUnconditionalBranchInstruction(const Instruction& instruction);
|
||||
bool IsDirectBranchInstruction(const Instruction& instruction);
|
||||
u32 GetBranchInstructionTarget(const Instruction& instruction, u32 instruction_pc);
|
||||
VirtualMemoryAddress GetDirectBranchTarget(const Instruction& instruction, VirtualMemoryAddress instruction_pc);
|
||||
bool IsCallInstruction(const Instruction& instruction);
|
||||
bool IsReturnInstruction(const Instruction& instruction);
|
||||
bool IsMemoryLoadInstruction(const Instruction& instruction);
|
||||
|
|
|
@ -511,6 +511,7 @@ void HostInterface::SetDefaultSettings(SettingsInterface& si)
|
|||
si.SetIntValue("CPU", "OverclockDenominator", 1);
|
||||
si.SetBoolValue("CPU", "OverclockEnable", false);
|
||||
si.SetBoolValue("CPU", "RecompilerMemoryExceptions", false);
|
||||
si.SetBoolValue("CPU", "RecompilerBlockLinking", true);
|
||||
si.SetBoolValue("CPU", "ICache", false);
|
||||
si.SetBoolValue("CPU", "FastmemMode", Settings::GetCPUFastmemModeName(Settings::DEFAULT_CPU_FASTMEM_MODE));
|
||||
|
||||
|
@ -772,24 +773,15 @@ void HostInterface::CheckForSettingsChanges(const Settings& old_settings)
|
|||
}
|
||||
|
||||
if (g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler &&
|
||||
g_settings.cpu_recompiler_memory_exceptions != old_settings.cpu_recompiler_memory_exceptions)
|
||||
(g_settings.cpu_recompiler_memory_exceptions != old_settings.cpu_recompiler_memory_exceptions ||
|
||||
g_settings.cpu_recompiler_block_linking != old_settings.cpu_recompiler_block_linking ||
|
||||
g_settings.cpu_recompiler_icache != old_settings.cpu_recompiler_icache))
|
||||
{
|
||||
AddOSDMessage(g_settings.cpu_recompiler_memory_exceptions ?
|
||||
TranslateStdString("OSDMessage", "CPU memory exceptions enabled, flushing all blocks.") :
|
||||
TranslateStdString("OSDMessage", "CPU memory exceptions disabled, flushing all blocks."),
|
||||
5.0f);
|
||||
AddOSDMessage(TranslateStdString("OSDMessage", "Recompiler options changed, flushing all blocks."), 5.0f);
|
||||
CPU::CodeCache::Flush();
|
||||
}
|
||||
|
||||
if (g_settings.cpu_execution_mode != CPUExecutionMode::Interpreter &&
|
||||
g_settings.cpu_recompiler_icache != old_settings.cpu_recompiler_icache)
|
||||
{
|
||||
AddOSDMessage(g_settings.cpu_recompiler_icache ?
|
||||
TranslateStdString("OSDMessage", "CPU ICache enabled, flushing all blocks.") :
|
||||
TranslateStdString("OSDMessage", "CPU ICache disabled, flushing all blocks."),
|
||||
5.0f);
|
||||
CPU::CodeCache::Flush();
|
||||
CPU::ClearICache();
|
||||
if (g_settings.cpu_recompiler_icache != old_settings.cpu_recompiler_icache)
|
||||
CPU::ClearICache();
|
||||
}
|
||||
|
||||
m_audio_stream->SetOutputVolume(GetAudioOutputVolume());
|
||||
|
|
|
@ -172,6 +172,7 @@ void Settings::Load(SettingsInterface& si)
|
|||
cpu_overclock_enable = si.GetBoolValue("CPU", "OverclockEnable", false);
|
||||
UpdateOverclockActive();
|
||||
cpu_recompiler_memory_exceptions = si.GetBoolValue("CPU", "RecompilerMemoryExceptions", false);
|
||||
cpu_recompiler_block_linking = si.GetBoolValue("CPU", "RecompilerBlockLinking", true);
|
||||
cpu_recompiler_icache = si.GetBoolValue("CPU", "RecompilerICache", false);
|
||||
cpu_fastmem_mode = ParseCPUFastmemMode(
|
||||
si.GetStringValue("CPU", "FastmemMode", GetCPUFastmemModeName(DEFAULT_CPU_FASTMEM_MODE)).c_str())
|
||||
|
@ -363,6 +364,7 @@ void Settings::Save(SettingsInterface& si) const
|
|||
si.SetIntValue("CPU", "OverclockNumerator", cpu_overclock_numerator);
|
||||
si.SetIntValue("CPU", "OverclockDenominator", cpu_overclock_denominator);
|
||||
si.SetBoolValue("CPU", "RecompilerMemoryExceptions", cpu_recompiler_memory_exceptions);
|
||||
si.SetBoolValue("CPU", "RecompilerBlockLinking", cpu_recompiler_block_linking);
|
||||
si.SetBoolValue("CPU", "RecompilerICache", cpu_recompiler_icache);
|
||||
si.SetStringValue("CPU", "FastmemMode", GetCPUFastmemModeName(cpu_fastmem_mode));
|
||||
|
||||
|
|
|
@ -78,6 +78,7 @@ struct Settings
|
|||
bool cpu_overclock_enable = false;
|
||||
bool cpu_overclock_active = false;
|
||||
bool cpu_recompiler_memory_exceptions = false;
|
||||
bool cpu_recompiler_block_linking = true;
|
||||
bool cpu_recompiler_icache = false;
|
||||
CPUFastmemMode cpu_fastmem_mode = CPUFastmemMode::Disabled;
|
||||
|
||||
|
|
|
@ -159,6 +159,8 @@ AdvancedSettingsWidget::AdvancedSettingsWidget(QtHostInterface* host_interface,
|
|||
|
||||
addBooleanTweakOption(m_host_interface, m_ui.tweakOptionTable, tr("Enable Recompiler Memory Exceptions"), "CPU",
|
||||
"RecompilerMemoryExceptions", false);
|
||||
addBooleanTweakOption(m_host_interface, m_ui.tweakOptionTable, tr("Enable Recompiler Block Linking"), "CPU",
|
||||
"RecompilerBlockLinking", true);
|
||||
addChoiceTweakOption(m_host_interface, m_ui.tweakOptionTable, tr("Enable Recompiler Fast Memory Access"), "CPU",
|
||||
"FastmemMode", Settings::ParseCPUFastmemMode, Settings::GetCPUFastmemModeName,
|
||||
Settings::GetCPUFastmemModeDisplayName, "CPUFastmemMode",
|
||||
|
@ -226,20 +228,21 @@ void AdvancedSettingsWidget::onResetToDefaultClicked()
|
|||
setFloatRangeTweakOption(m_ui.tweakOptionTable, 4, -1.0f);
|
||||
setFloatRangeTweakOption(m_ui.tweakOptionTable, 5, Settings::DEFAULT_GPU_PGXP_DEPTH_THRESHOLD);
|
||||
setBooleanTweakOption(m_ui.tweakOptionTable, 6, false);
|
||||
setChoiceTweakOption(m_ui.tweakOptionTable, 7, Settings::DEFAULT_CPU_FASTMEM_MODE);
|
||||
setBooleanTweakOption(m_ui.tweakOptionTable, 8, false);
|
||||
setBooleanTweakOption(m_ui.tweakOptionTable, 7, true);
|
||||
setChoiceTweakOption(m_ui.tweakOptionTable, 8, Settings::DEFAULT_CPU_FASTMEM_MODE);
|
||||
setBooleanTweakOption(m_ui.tweakOptionTable, 9, false);
|
||||
setBooleanTweakOption(m_ui.tweakOptionTable, 10, false);
|
||||
setBooleanTweakOption(m_ui.tweakOptionTable, 11, false);
|
||||
setBooleanTweakOption(m_ui.tweakOptionTable, 12, false);
|
||||
setIntRangeTweakOption(m_ui.tweakOptionTable, 13, Settings::DEFAULT_VRAM_WRITE_DUMP_WIDTH_THRESHOLD);
|
||||
setIntRangeTweakOption(m_ui.tweakOptionTable, 14, Settings::DEFAULT_VRAM_WRITE_DUMP_HEIGHT_THRESHOLD);
|
||||
setIntRangeTweakOption(m_ui.tweakOptionTable, 15, static_cast<int>(Settings::DEFAULT_DMA_MAX_SLICE_TICKS));
|
||||
setIntRangeTweakOption(m_ui.tweakOptionTable, 16, static_cast<int>(Settings::DEFAULT_DMA_HALT_TICKS));
|
||||
setIntRangeTweakOption(m_ui.tweakOptionTable, 17, static_cast<int>(Settings::DEFAULT_GPU_FIFO_SIZE));
|
||||
setIntRangeTweakOption(m_ui.tweakOptionTable, 18, static_cast<int>(Settings::DEFAULT_GPU_MAX_RUN_AHEAD));
|
||||
setBooleanTweakOption(m_ui.tweakOptionTable, 19, false);
|
||||
setBooleanTweakOption(m_ui.tweakOptionTable, 20, true);
|
||||
setBooleanTweakOption(m_ui.tweakOptionTable, 21, false);
|
||||
setBooleanTweakOption(m_ui.tweakOptionTable, 13, false);
|
||||
setIntRangeTweakOption(m_ui.tweakOptionTable, 14, Settings::DEFAULT_VRAM_WRITE_DUMP_WIDTH_THRESHOLD);
|
||||
setIntRangeTweakOption(m_ui.tweakOptionTable, 15, Settings::DEFAULT_VRAM_WRITE_DUMP_HEIGHT_THRESHOLD);
|
||||
setIntRangeTweakOption(m_ui.tweakOptionTable, 16, static_cast<int>(Settings::DEFAULT_DMA_MAX_SLICE_TICKS));
|
||||
setIntRangeTweakOption(m_ui.tweakOptionTable, 17, static_cast<int>(Settings::DEFAULT_DMA_HALT_TICKS));
|
||||
setIntRangeTweakOption(m_ui.tweakOptionTable, 18, static_cast<int>(Settings::DEFAULT_GPU_FIFO_SIZE));
|
||||
setIntRangeTweakOption(m_ui.tweakOptionTable, 19, static_cast<int>(Settings::DEFAULT_GPU_MAX_RUN_AHEAD));
|
||||
setBooleanTweakOption(m_ui.tweakOptionTable, 20, false);
|
||||
setBooleanTweakOption(m_ui.tweakOptionTable, 21, true);
|
||||
setBooleanTweakOption(m_ui.tweakOptionTable, 22, false);
|
||||
setBooleanTweakOption(m_ui.tweakOptionTable, 23, false);
|
||||
}
|
||||
|
|
|
@ -2588,6 +2588,10 @@ void DrawSettingsWindow()
|
|||
settings_changed |= ToggleButton("Enable Recompiler Memory Exceptions",
|
||||
"Enables alignment and bus exceptions. Not needed for any known games.",
|
||||
&s_settings_copy.cpu_recompiler_memory_exceptions);
|
||||
settings_changed |= ToggleButton(
|
||||
"Enable Recompiler Block Linking",
|
||||
"Performance enhancement - jumps directly between blocks instead of returning to the dispatcher.",
|
||||
&s_settings_copy.cpu_recompiler_block_linking);
|
||||
settings_changed |= EnumChoiceButton("Recompiler Fast Memory Access",
|
||||
"Avoids calls to C++ code, significantly speeding up the recompiler.",
|
||||
&s_settings_copy.cpu_fastmem_mode, &Settings::GetCPUFastmemModeDisplayName,
|
||||
|
@ -3902,6 +3906,8 @@ void DrawDebugSettingsMenu()
|
|||
|
||||
settings_changed |=
|
||||
ImGui::MenuItem("Recompiler Memory Exceptions", nullptr, &s_settings_copy.cpu_recompiler_memory_exceptions);
|
||||
settings_changed |=
|
||||
ImGui::MenuItem("Recompiler Block Linking", nullptr, &s_settings_copy.cpu_recompiler_block_linking);
|
||||
if (ImGui::BeginMenu("Recompiler Fastmem"))
|
||||
{
|
||||
for (u32 i = 0; i < static_cast<u32>(CPUFastmemMode::Count); i++)
|
||||
|
|
Loading…
Reference in a new issue