CPU/Recompiler: Support pushing/popping the register cache state

This commit is contained in:
Connor McLaughlin 2019-12-12 16:47:31 +10:00
parent 1905d22a9a
commit 20c7aaf74b
3 changed files with 181 additions and 109 deletions

View file

@ -219,9 +219,13 @@ void CodeGenerator::EmitExceptionExitOnBool(const Value& value)
m_emit->test(GetHostReg8(value), GetHostReg8(value)); m_emit->test(GetHostReg8(value), GetHostReg8(value));
m_emit->jnz(GetCurrentFarCodePointer()); m_emit->jnz(GetCurrentFarCodePointer());
m_register_cache.PushState();
SwitchToFarCode(); SwitchToFarCode();
EmitExceptionExit(); EmitExceptionExit();
SwitchToNearCode(); SwitchToNearCode();
m_register_cache.PopState();
} }
void CodeGenerator::FinalizeBlock(CodeBlock::HostCodePointer* out_host_code, u32* out_host_code_size) void CodeGenerator::FinalizeBlock(CodeBlock::HostCodePointer* out_host_code, u32* out_host_code_size)
@ -1572,11 +1576,15 @@ Value CodeGenerator::EmitLoadGuestMemory(const Value& address, RegSize size)
m_emit->test(GetHostReg64(result.host_reg), GetHostReg64(result.host_reg)); m_emit->test(GetHostReg64(result.host_reg), GetHostReg64(result.host_reg));
m_emit->js(GetCurrentFarCodePointer()); m_emit->js(GetCurrentFarCodePointer());
m_register_cache.PushState();
// load exception path // load exception path
SwitchToFarCode(); SwitchToFarCode();
EmitExceptionExit(); EmitExceptionExit();
SwitchToNearCode(); SwitchToNearCode();
m_register_cache.PopState();
// Downcast to ignore upper 56/48/32 bits. This should be a noop. // Downcast to ignore upper 56/48/32 bits. This should be a noop.
switch (size) switch (size)
{ {
@ -1623,7 +1631,7 @@ void CodeGenerator::EmitStoreGuestMemory(const Value& address, const Value& valu
break; break;
} }
Xbyak::Label store_okay; m_register_cache.PushState();
m_emit->test(GetHostReg8(result), GetHostReg8(result)); m_emit->test(GetHostReg8(result), GetHostReg8(result));
m_emit->jz(GetCurrentFarCodePointer()); m_emit->jz(GetCurrentFarCodePointer());
@ -1632,6 +1640,8 @@ void CodeGenerator::EmitStoreGuestMemory(const Value& address, const Value& valu
SwitchToFarCode(); SwitchToFarCode();
EmitExceptionExit(); EmitExceptionExit();
SwitchToNearCode(); SwitchToNearCode();
m_register_cache.PopState();
} }
void CodeGenerator::EmitFlushInterpreterLoadDelay() void CodeGenerator::EmitFlushInterpreterLoadDelay()
@ -1767,10 +1777,14 @@ static void EmitConditionalJump(Condition condition, bool invert, Xbyak::CodeGen
void CodeGenerator::EmitBranch(Condition condition, Reg lr_reg, Value&& branch_target) void CodeGenerator::EmitBranch(Condition condition, Reg lr_reg, Value&& branch_target)
{ {
// we have to always read the old PC.. when we can push/pop the register cache state this won't be needed // allocate scratch register for reading npc - we return to the main path, so this could cause a reg flush
Value old_npc; Value old_npc = m_register_cache.AllocateScratch(RegSize_32);
// npc gets modified by the branch, so we can't trust it on returning. same for lr_reg, which might contain a dirty
// value
m_register_cache.FlushGuestRegister(Reg::npc, true, true);
if (lr_reg != Reg::count) if (lr_reg != Reg::count)
old_npc = m_register_cache.ReadGuestRegister(Reg::npc, false, true); m_register_cache.FlushGuestRegister(lr_reg, true, true);
// condition is inverted because we want the case for skipping it // condition is inverted because we want the case for skipping it
Xbyak::Label skip_branch; Xbyak::Label skip_branch;
@ -1783,8 +1797,8 @@ void CodeGenerator::EmitBranch(Condition condition, Reg lr_reg, Value&& branch_t
// Can't cache because we have two branches. Load delay cancel is due to the immediate flush afterwards, // Can't cache because we have two branches. Load delay cancel is due to the immediate flush afterwards,
// if we don't cancel it, at the end of the instruction the value we write can be overridden. // if we don't cancel it, at the end of the instruction the value we write can be overridden.
EmitCancelInterpreterLoadDelayForReg(lr_reg); EmitCancelInterpreterLoadDelayForReg(lr_reg);
m_register_cache.WriteGuestRegister(lr_reg, std::move(old_npc)); EmitLoadGuestRegister(old_npc.host_reg, Reg::npc);
m_register_cache.FlushGuestRegister(lr_reg, true, true); EmitStoreGuestRegister(lr_reg, old_npc);
} }
// we don't need to test the address of constant branches unless they're definitely misaligned, which would be // we don't need to test the address of constant branches unless they're definitely misaligned, which would be
@ -1803,17 +1817,20 @@ void CodeGenerator::EmitBranch(Condition condition, Reg lr_reg, Value&& branch_t
m_emit->jnz(GetCurrentFarCodePointer()); m_emit->jnz(GetCurrentFarCodePointer());
} }
m_register_cache.PushState();
// exception exit for misaligned target // exception exit for misaligned target
SwitchToFarCode(); SwitchToFarCode();
EmitFunctionCall(nullptr, &Thunks::RaiseAddressException, m_register_cache.GetCPUPtr(), branch_target, EmitFunctionCall(nullptr, &Thunks::RaiseAddressException, m_register_cache.GetCPUPtr(), branch_target,
Value::FromConstantU8(0), Value::FromConstantU8(1)); Value::FromConstantU8(0), Value::FromConstantU8(1));
EmitExceptionExit(); EmitExceptionExit();
SwitchToNearCode(); SwitchToNearCode();
m_register_cache.PopState();
} }
// branch taken path - write new PC and flush it, since two branches // branch taken path - write new PC and flush it, since two branches
m_register_cache.WriteGuestRegister(Reg::npc, std::move(branch_target)); EmitStoreGuestRegister(Reg::npc, branch_target);
m_register_cache.FlushGuestRegister(Reg::npc, true, true);
EmitStoreCPUStructField(offsetof(Core, m_current_instruction_was_branch_taken), Value::FromConstantU8(1)); EmitStoreCPUStructField(offsetof(Core, m_current_instruction_was_branch_taken), Value::FromConstantU8(1));
// converge point // converge point
@ -1836,6 +1853,8 @@ void CodeGenerator::EmitRaiseException(Exception excode, Condition condition /*
return; return;
} }
m_register_cache.PushState();
const void* far_code_ptr = GetCurrentFarCodePointer(); const void* far_code_ptr = GetCurrentFarCodePointer();
EmitConditionalJump(condition, false, m_emit, far_code_ptr); EmitConditionalJump(condition, false, m_emit, far_code_ptr);
@ -1844,6 +1863,8 @@ void CodeGenerator::EmitRaiseException(Exception excode, Condition condition /*
Value::FromConstantU8(static_cast<u8>(excode))); Value::FromConstantU8(static_cast<u8>(excode)));
EmitExceptionExit(); EmitExceptionExit();
SwitchToNearCode(); SwitchToNearCode();
m_register_cache.PopState();
} }
#if 0 #if 0

View file

@ -101,32 +101,35 @@ void Value::Undiscard()
RegisterCache::RegisterCache(CodeGenerator& code_generator) : m_code_generator(code_generator) RegisterCache::RegisterCache(CodeGenerator& code_generator) : m_code_generator(code_generator)
{ {
m_guest_register_order.fill(Reg::count); m_state.guest_reg_order.fill(Reg::count);
} }
RegisterCache::~RegisterCache() = default; RegisterCache::~RegisterCache()
{
Assert(m_state_stack.empty());
}
void RegisterCache::SetHostRegAllocationOrder(std::initializer_list<HostReg> regs) void RegisterCache::SetHostRegAllocationOrder(std::initializer_list<HostReg> regs)
{ {
size_t index = 0; size_t index = 0;
for (HostReg reg : regs) for (HostReg reg : regs)
{ {
m_host_register_state[reg] = HostRegState::Usable; m_state.host_reg_state[reg] = HostRegState::Usable;
m_host_register_allocation_order[index++] = reg; m_host_register_allocation_order[index++] = reg;
} }
m_host_register_available_count = static_cast<u32>(index); m_state.available_count = static_cast<u32>(index);
} }
void RegisterCache::SetCallerSavedHostRegs(std::initializer_list<HostReg> regs) void RegisterCache::SetCallerSavedHostRegs(std::initializer_list<HostReg> regs)
{ {
for (HostReg reg : regs) for (HostReg reg : regs)
m_host_register_state[reg] |= HostRegState::CallerSaved; m_state.host_reg_state[reg] |= HostRegState::CallerSaved;
} }
void RegisterCache::SetCalleeSavedHostRegs(std::initializer_list<HostReg> regs) void RegisterCache::SetCalleeSavedHostRegs(std::initializer_list<HostReg> regs)
{ {
for (HostReg reg : regs) for (HostReg reg : regs)
m_host_register_state[reg] |= HostRegState::CalleeSaved; m_state.host_reg_state[reg] |= HostRegState::CalleeSaved;
} }
void RegisterCache::SetCPUPtrHostReg(HostReg reg) void RegisterCache::SetCPUPtrHostReg(HostReg reg)
@ -136,17 +139,17 @@ void RegisterCache::SetCPUPtrHostReg(HostReg reg)
bool RegisterCache::IsUsableHostReg(HostReg reg) const bool RegisterCache::IsUsableHostReg(HostReg reg) const
{ {
return (m_host_register_state[reg] & HostRegState::Usable) != HostRegState::None; return (m_state.host_reg_state[reg] & HostRegState::Usable) != HostRegState::None;
} }
bool RegisterCache::IsHostRegInUse(HostReg reg) const bool RegisterCache::IsHostRegInUse(HostReg reg) const
{ {
return (m_host_register_state[reg] & HostRegState::InUse) != HostRegState::None; return (m_state.host_reg_state[reg] & HostRegState::InUse) != HostRegState::None;
} }
bool RegisterCache::HasFreeHostRegister() const bool RegisterCache::HasFreeHostRegister() const
{ {
for (const HostRegState state : m_host_register_state) for (const HostRegState state : m_state.host_reg_state)
{ {
if ((state & (HostRegState::Usable | HostRegState::InUse)) == (HostRegState::Usable)) if ((state & (HostRegState::Usable | HostRegState::InUse)) == (HostRegState::Usable))
return true; return true;
@ -158,7 +161,7 @@ bool RegisterCache::HasFreeHostRegister() const
u32 RegisterCache::GetUsedHostRegisters() const u32 RegisterCache::GetUsedHostRegisters() const
{ {
u32 count = 0; u32 count = 0;
for (const HostRegState state : m_host_register_state) for (const HostRegState state : m_state.host_reg_state)
{ {
if ((state & (HostRegState::Usable | HostRegState::InUse)) == (HostRegState::Usable | HostRegState::InUse)) if ((state & (HostRegState::Usable | HostRegState::InUse)) == (HostRegState::Usable | HostRegState::InUse))
count++; count++;
@ -170,7 +173,7 @@ u32 RegisterCache::GetUsedHostRegisters() const
u32 RegisterCache::GetFreeHostRegisters() const u32 RegisterCache::GetFreeHostRegisters() const
{ {
u32 count = 0; u32 count = 0;
for (const HostRegState state : m_host_register_state) for (const HostRegState state : m_state.host_reg_state)
{ {
if ((state & (HostRegState::Usable | HostRegState::InUse)) == (HostRegState::Usable)) if ((state & (HostRegState::Usable | HostRegState::InUse)) == (HostRegState::Usable))
count++; count++;
@ -182,10 +185,10 @@ u32 RegisterCache::GetFreeHostRegisters() const
HostReg RegisterCache::AllocateHostReg(HostRegState state /* = HostRegState::InUse */) HostReg RegisterCache::AllocateHostReg(HostRegState state /* = HostRegState::InUse */)
{ {
// try for a free register in allocation order // try for a free register in allocation order
for (u32 i = 0; i < m_host_register_available_count; i++) for (u32 i = 0; i < m_state.available_count; i++)
{ {
const HostReg reg = m_host_register_allocation_order[i]; const HostReg reg = m_host_register_allocation_order[i];
if ((m_host_register_state[reg] & (HostRegState::Usable | HostRegState::InUse)) == HostRegState::Usable) if ((m_state.host_reg_state[reg] & (HostRegState::Usable | HostRegState::InUse)) == HostRegState::Usable)
{ {
if (AllocateHostReg(reg, state)) if (AllocateHostReg(reg, state))
return reg; return reg;
@ -201,19 +204,19 @@ HostReg RegisterCache::AllocateHostReg(HostRegState state /* = HostRegState::InU
bool RegisterCache::AllocateHostReg(HostReg reg, HostRegState state /*= HostRegState::InUse*/) bool RegisterCache::AllocateHostReg(HostReg reg, HostRegState state /*= HostRegState::InUse*/)
{ {
if ((m_host_register_state[reg] & HostRegState::InUse) == HostRegState::InUse) if ((m_state.host_reg_state[reg] & HostRegState::InUse) == HostRegState::InUse)
return false; return false;
m_host_register_state[reg] |= state; m_state.host_reg_state[reg] |= state;
if ((m_host_register_state[reg] & (HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated)) == if ((m_state.host_reg_state[reg] & (HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated)) ==
HostRegState::CalleeSaved) HostRegState::CalleeSaved)
{ {
// new register we need to save.. // new register we need to save..
DebugAssert(m_host_register_callee_saved_order_count < HostReg_Count); DebugAssert(m_state.callee_saved_order_count < HostReg_Count);
m_code_generator.EmitPushHostReg(reg, GetActiveCalleeSavedRegisterCount()); m_code_generator.EmitPushHostReg(reg, GetActiveCalleeSavedRegisterCount());
m_host_register_callee_saved_order[m_host_register_callee_saved_order_count++] = reg; m_state.callee_saved_order[m_state.callee_saved_order_count++] = reg;
m_host_register_state[reg] |= HostRegState::CalleeSavedAllocated; m_state.host_reg_state[reg] |= HostRegState::CalleeSavedAllocated;
} }
return reg; return reg;
@ -223,21 +226,21 @@ void RegisterCache::DiscardHostReg(HostReg reg)
{ {
DebugAssert(IsHostRegInUse(reg)); DebugAssert(IsHostRegInUse(reg));
Log_DebugPrintf("Discarding host register %s", m_code_generator.GetHostRegName(reg)); Log_DebugPrintf("Discarding host register %s", m_code_generator.GetHostRegName(reg));
m_host_register_state[reg] |= HostRegState::Discarded; m_state.host_reg_state[reg] |= HostRegState::Discarded;
} }
void RegisterCache::UndiscardHostReg(HostReg reg) void RegisterCache::UndiscardHostReg(HostReg reg)
{ {
DebugAssert(IsHostRegInUse(reg)); DebugAssert(IsHostRegInUse(reg));
Log_DebugPrintf("Undiscarding host register %s", m_code_generator.GetHostRegName(reg)); Log_DebugPrintf("Undiscarding host register %s", m_code_generator.GetHostRegName(reg));
m_host_register_state[reg] &= ~HostRegState::Discarded; m_state.host_reg_state[reg] &= ~HostRegState::Discarded;
} }
void RegisterCache::FreeHostReg(HostReg reg) void RegisterCache::FreeHostReg(HostReg reg)
{ {
DebugAssert(IsHostRegInUse(reg)); DebugAssert(IsHostRegInUse(reg));
Log_DebugPrintf("Freeing host register %s", m_code_generator.GetHostRegName(reg)); Log_DebugPrintf("Freeing host register %s", m_code_generator.GetHostRegName(reg));
m_host_register_state[reg] &= ~HostRegState::InUse; m_state.host_reg_state[reg] &= ~HostRegState::InUse;
} }
void RegisterCache::EnsureHostRegFree(HostReg reg) void RegisterCache::EnsureHostRegFree(HostReg reg)
@ -247,7 +250,7 @@ void RegisterCache::EnsureHostRegFree(HostReg reg)
for (u8 i = 0; i < static_cast<u8>(Reg::count); i++) for (u8 i = 0; i < static_cast<u8>(Reg::count); i++)
{ {
if (m_guest_reg_cache[i].IsInHostRegister() && m_guest_reg_cache[i].GetHostRegister() == reg) if (m_state.guest_reg_state[i].IsInHostRegister() && m_state.guest_reg_state[i].GetHostRegister() == reg)
FlushGuestRegister(static_cast<Reg>(i), true, true); FlushGuestRegister(static_cast<Reg>(i), true, true);
} }
} }
@ -280,7 +283,7 @@ u32 RegisterCache::PushCallerSavedRegisters() const
u32 count = 0; u32 count = 0;
for (u32 i = 0; i < HostReg_Count; i++) for (u32 i = 0; i < HostReg_Count; i++)
{ {
if ((m_host_register_state[i] & (HostRegState::CallerSaved | HostRegState::InUse | HostRegState::Discarded)) == if ((m_state.host_reg_state[i] & (HostRegState::CallerSaved | HostRegState::InUse | HostRegState::Discarded)) ==
(HostRegState::CallerSaved | HostRegState::InUse)) (HostRegState::CallerSaved | HostRegState::InUse))
{ {
m_code_generator.EmitPushHostReg(static_cast<HostReg>(i), position + count); m_code_generator.EmitPushHostReg(static_cast<HostReg>(i), position + count);
@ -296,7 +299,7 @@ u32 RegisterCache::PopCallerSavedRegisters() const
u32 count = 0; u32 count = 0;
for (u32 i = 0; i < HostReg_Count; i++) for (u32 i = 0; i < HostReg_Count; i++)
{ {
if ((m_host_register_state[i] & (HostRegState::CallerSaved | HostRegState::InUse | HostRegState::Discarded)) == if ((m_state.host_reg_state[i] & (HostRegState::CallerSaved | HostRegState::InUse | HostRegState::Discarded)) ==
(HostRegState::CallerSaved | HostRegState::InUse)) (HostRegState::CallerSaved | HostRegState::InUse))
{ {
count++; count++;
@ -304,12 +307,12 @@ u32 RegisterCache::PopCallerSavedRegisters() const
} }
if (count == 0) if (count == 0)
return 0; return 0;
u32 position = GetActiveCalleeSavedRegisterCount() + count - 1; u32 position = GetActiveCalleeSavedRegisterCount() + count - 1;
u32 i = (HostReg_Count - 1); u32 i = (HostReg_Count - 1);
do do
{ {
if ((m_host_register_state[i] & (HostRegState::CallerSaved | HostRegState::InUse | HostRegState::Discarded)) == if ((m_state.host_reg_state[i] & (HostRegState::CallerSaved | HostRegState::InUse | HostRegState::Discarded)) ==
(HostRegState::CallerSaved | HostRegState::InUse)) (HostRegState::CallerSaved | HostRegState::InUse))
{ {
m_code_generator.EmitPopHostReg(static_cast<HostReg>(i), position); m_code_generator.EmitPopHostReg(static_cast<HostReg>(i), position);
@ -322,29 +325,65 @@ u32 RegisterCache::PopCallerSavedRegisters() const
u32 RegisterCache::PopCalleeSavedRegisters(bool commit) u32 RegisterCache::PopCalleeSavedRegisters(bool commit)
{ {
if (m_host_register_callee_saved_order_count == 0) if (m_state.callee_saved_order_count == 0)
return 0; return 0;
u32 count = 0; u32 count = 0;
u32 i = m_host_register_callee_saved_order_count; u32 i = m_state.callee_saved_order_count;
do do
{ {
const HostReg reg = m_host_register_callee_saved_order[i - 1]; const HostReg reg = m_state.callee_saved_order[i - 1];
DebugAssert((m_host_register_state[reg] & (HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated)) == DebugAssert((m_state.host_reg_state[reg] & (HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated)) ==
(HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated)); (HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated));
m_code_generator.EmitPopHostReg(reg, i - 1); m_code_generator.EmitPopHostReg(reg, i - 1);
if (commit) if (commit)
m_host_register_state[reg] &= ~HostRegState::CalleeSavedAllocated; m_state.host_reg_state[reg] &= ~HostRegState::CalleeSavedAllocated;
count++; count++;
i--; i--;
} while (i > 0); } while (i > 0);
if (commit) if (commit)
m_host_register_callee_saved_order_count = 0; m_state.callee_saved_order_count = 0;
return count; return count;
} }
void RegisterCache::PushState()
{
// need to copy this manually because of the load delay values
RegAllocState save_state;
save_state.host_reg_state = m_state.host_reg_state;
save_state.callee_saved_order = m_state.callee_saved_order;
save_state.guest_reg_state = m_state.guest_reg_state;
save_state.guest_reg_order = m_state.guest_reg_order;
save_state.available_count = m_state.available_count;
save_state.callee_saved_order_count = m_state.callee_saved_order_count;
save_state.guest_reg_order_count = m_state.guest_reg_order_count;
save_state.load_delay_register = m_state.load_delay_register;
save_state.load_delay_value.regcache = m_state.load_delay_value.regcache;
save_state.load_delay_value.host_reg = m_state.load_delay_value.host_reg;
save_state.load_delay_value.size = m_state.load_delay_value.size;
save_state.load_delay_value.flags = m_state.load_delay_value.flags;
save_state.next_load_delay_register = m_state.next_load_delay_register;
save_state.next_load_delay_value.regcache = m_state.next_load_delay_value.regcache;
save_state.next_load_delay_value.host_reg = m_state.next_load_delay_value.host_reg;
save_state.next_load_delay_value.size = m_state.next_load_delay_value.size;
save_state.next_load_delay_value.flags = m_state.next_load_delay_value.flags;
m_state_stack.push(std::move(save_state));
}
void RegisterCache::PopState()
{
Assert(!m_state_stack.empty());
// prevent destructor -> freeing of host reg
m_state.load_delay_value.Clear();
m_state.next_load_delay_value.Clear();
m_state = std::move(m_state_stack.top());
m_state_stack.pop();
}
Value RegisterCache::ReadGuestRegister(Reg guest_reg, bool cache /* = true */, bool force_host_register /* = false */, Value RegisterCache::ReadGuestRegister(Reg guest_reg, bool cache /* = true */, bool force_host_register /* = false */,
HostReg forced_host_reg /* = HostReg_Invalid */) HostReg forced_host_reg /* = HostReg_Invalid */)
{ {
@ -362,7 +401,7 @@ Value RegisterCache::ReadGuestRegister(Reg guest_reg, bool cache /* = true */, b
return Value::FromConstantU32(0); return Value::FromConstantU32(0);
} }
Value& cache_value = m_guest_reg_cache[static_cast<u8>(guest_reg)]; Value& cache_value = m_state.guest_reg_state[static_cast<u8>(guest_reg)];
if (cache_value.IsValid()) if (cache_value.IsValid())
{ {
if (cache_value.IsInHostRegister()) if (cache_value.IsInHostRegister())
@ -454,14 +493,14 @@ Value RegisterCache::WriteGuestRegister(Reg guest_reg, Value&& value)
return std::move(value); return std::move(value);
// cancel any load delay delay // cancel any load delay delay
if (m_load_delay_register == guest_reg) if (m_state.load_delay_register == guest_reg)
{ {
Log_DebugPrintf("Cancelling load delay of register %s because of non-delayed write", GetRegName(guest_reg)); Log_DebugPrintf("Cancelling load delay of register %s because of non-delayed write", GetRegName(guest_reg));
m_load_delay_register = Reg::count; m_state.load_delay_register = Reg::count;
m_load_delay_value.ReleaseAndClear(); m_state.load_delay_value.ReleaseAndClear();
} }
Value& cache_value = m_guest_reg_cache[static_cast<u8>(guest_reg)]; Value& cache_value = m_state.guest_reg_state[static_cast<u8>(guest_reg)];
if (cache_value.IsInHostRegister() && value.IsInHostRegister() && cache_value.host_reg == value.host_reg) if (cache_value.IsInHostRegister() && value.IsInHostRegister() && cache_value.host_reg == value.host_reg)
{ {
// updating the register value. // updating the register value.
@ -518,20 +557,20 @@ void RegisterCache::WriteGuestRegisterDelayed(Reg guest_reg, Value&& value)
return; return;
// two load delays in a row? cancel the first one. // two load delays in a row? cancel the first one.
if (guest_reg == m_load_delay_register) if (guest_reg == m_state.load_delay_register)
{ {
Log_DebugPrintf("Cancelling load delay of register %s due to new load delay", GetRegName(guest_reg)); Log_DebugPrintf("Cancelling load delay of register %s due to new load delay", GetRegName(guest_reg));
m_load_delay_register = Reg::count; m_state.load_delay_register = Reg::count;
m_load_delay_value.ReleaseAndClear(); m_state.load_delay_value.ReleaseAndClear();
} }
// two load delay case with interpreter load delay // two load delay case with interpreter load delay
m_code_generator.EmitCancelInterpreterLoadDelayForReg(guest_reg); m_code_generator.EmitCancelInterpreterLoadDelayForReg(guest_reg);
// set up the load delay at the end of this instruction // set up the load delay at the end of this instruction
Value& cache_value = m_next_load_delay_value; Value& cache_value = m_state.next_load_delay_value;
Assert(m_next_load_delay_register == Reg::count); Assert(m_state.next_load_delay_register == Reg::count);
m_next_load_delay_register = guest_reg; m_state.next_load_delay_register = guest_reg;
// If it's a temporary, we can bind that to the guest register. // If it's a temporary, we can bind that to the guest register.
if (value.IsScratch()) if (value.IsScratch())
@ -555,61 +594,61 @@ void RegisterCache::WriteGuestRegisterDelayed(Reg guest_reg, Value&& value)
void RegisterCache::UpdateLoadDelay() void RegisterCache::UpdateLoadDelay()
{ {
// flush current load delay // flush current load delay
if (m_load_delay_register != Reg::count) if (m_state.load_delay_register != Reg::count)
{ {
// have to clear first because otherwise it'll release the value // have to clear first because otherwise it'll release the value
Reg reg = m_load_delay_register; Reg reg = m_state.load_delay_register;
Value value = std::move(m_load_delay_value); Value value = std::move(m_state.load_delay_value);
m_load_delay_register = Reg::count; m_state.load_delay_register = Reg::count;
WriteGuestRegister(reg, std::move(value)); WriteGuestRegister(reg, std::move(value));
} }
// next load delay -> load delay // next load delay -> load delay
if (m_next_load_delay_register != Reg::count) if (m_state.next_load_delay_register != Reg::count)
{ {
m_load_delay_register = m_next_load_delay_register; m_state.load_delay_register = m_state.next_load_delay_register;
m_load_delay_value = std::move(m_next_load_delay_value); m_state.load_delay_value = std::move(m_state.next_load_delay_value);
m_next_load_delay_register = Reg::count; m_state.next_load_delay_register = Reg::count;
} }
} }
void RegisterCache::WriteLoadDelayToCPU(bool clear) void RegisterCache::WriteLoadDelayToCPU(bool clear)
{ {
// There shouldn't be a flush at the same time as there's a new load delay. // There shouldn't be a flush at the same time as there's a new load delay.
Assert(m_next_load_delay_register == Reg::count); Assert(m_state.next_load_delay_register == Reg::count);
if (m_load_delay_register != Reg::count) if (m_state.load_delay_register != Reg::count)
{ {
Log_DebugPrintf("Flushing pending load delay of %s", GetRegName(m_load_delay_register)); Log_DebugPrintf("Flushing pending load delay of %s", GetRegName(m_state.load_delay_register));
m_code_generator.EmitStoreInterpreterLoadDelay(m_load_delay_register, m_load_delay_value); m_code_generator.EmitStoreInterpreterLoadDelay(m_state.load_delay_register, m_state.load_delay_value);
if (clear) if (clear)
{ {
m_load_delay_register = Reg::count; m_state.load_delay_register = Reg::count;
m_load_delay_value.ReleaseAndClear(); m_state.load_delay_value.ReleaseAndClear();
} }
} }
} }
void RegisterCache::FlushLoadDelay(bool clear) void RegisterCache::FlushLoadDelay(bool clear)
{ {
Assert(m_next_load_delay_register == Reg::count); Assert(m_state.next_load_delay_register == Reg::count);
if (m_load_delay_register != Reg::count) if (m_state.load_delay_register != Reg::count)
{ {
// if this is an exception exit, write the new value to the CPU register file, but keep it tracked for the next // if this is an exception exit, write the new value to the CPU register file, but keep it tracked for the next
// non-exception-raised path. TODO: push/pop whole state would avoid this issue // non-exception-raised path. TODO: push/pop whole state would avoid this issue
m_code_generator.EmitStoreGuestRegister(m_load_delay_register, m_load_delay_value); m_code_generator.EmitStoreGuestRegister(m_state.load_delay_register, m_state.load_delay_value);
if (clear) if (clear)
{ {
m_load_delay_register = Reg::count; m_state.load_delay_register = Reg::count;
m_load_delay_value.ReleaseAndClear(); m_state.load_delay_value.ReleaseAndClear();
} }
} }
} }
void RegisterCache::FlushGuestRegister(Reg guest_reg, bool invalidate, bool clear_dirty) void RegisterCache::FlushGuestRegister(Reg guest_reg, bool invalidate, bool clear_dirty)
{ {
Value& cache_value = m_guest_reg_cache[static_cast<u8>(guest_reg)]; Value& cache_value = m_state.guest_reg_state[static_cast<u8>(guest_reg)];
if (cache_value.IsDirty()) if (cache_value.IsDirty())
{ {
if (cache_value.IsInHostRegister()) if (cache_value.IsInHostRegister())
@ -633,7 +672,7 @@ void RegisterCache::FlushGuestRegister(Reg guest_reg, bool invalidate, bool clea
void RegisterCache::InvalidateGuestRegister(Reg guest_reg) void RegisterCache::InvalidateGuestRegister(Reg guest_reg)
{ {
Value& cache_value = m_guest_reg_cache[static_cast<u8>(guest_reg)]; Value& cache_value = m_state.guest_reg_state[static_cast<u8>(guest_reg)];
if (!cache_value.IsValid()) if (!cache_value.IsValid())
return; return;
@ -651,7 +690,7 @@ void RegisterCache::InvalidateAllNonDirtyGuestRegisters()
{ {
for (u8 reg = 0; reg < static_cast<u8>(Reg::count); reg++) for (u8 reg = 0; reg < static_cast<u8>(Reg::count); reg++)
{ {
Value& cache_value = m_guest_reg_cache[reg]; Value& cache_value = m_state.guest_reg_state[reg];
if (cache_value.IsValid() && !cache_value.IsDirty()) if (cache_value.IsValid() && !cache_value.IsDirty())
InvalidateGuestRegister(static_cast<Reg>(reg)); InvalidateGuestRegister(static_cast<Reg>(reg));
} }
@ -665,11 +704,11 @@ void RegisterCache::FlushAllGuestRegisters(bool invalidate, bool clear_dirty)
bool RegisterCache::EvictOneGuestRegister() bool RegisterCache::EvictOneGuestRegister()
{ {
if (m_guest_register_order_count == 0) if (m_state.guest_reg_order_count == 0)
return false; return false;
// evict the register used the longest time ago // evict the register used the longest time ago
Reg evict_reg = m_guest_register_order[m_guest_register_order_count - 1]; Reg evict_reg = m_state.guest_reg_order[m_state.guest_reg_order_count - 1];
Log_ProfilePrintf("Evicting guest register %s", GetRegName(evict_reg)); Log_ProfilePrintf("Evicting guest register %s", GetRegName(evict_reg));
FlushGuestRegister(evict_reg, true, true); FlushGuestRegister(evict_reg, true, true);
@ -678,18 +717,18 @@ bool RegisterCache::EvictOneGuestRegister()
void RegisterCache::ClearRegisterFromOrder(Reg reg) void RegisterCache::ClearRegisterFromOrder(Reg reg)
{ {
for (u32 i = 0; i < m_guest_register_order_count; i++) for (u32 i = 0; i < m_state.guest_reg_order_count; i++)
{ {
if (m_guest_register_order[i] == reg) if (m_state.guest_reg_order[i] == reg)
{ {
// move the registers after backwards into this spot // move the registers after backwards into this spot
const u32 count_after = m_guest_register_order_count - i - 1; const u32 count_after = m_state.guest_reg_order_count - i - 1;
if (count_after > 0) if (count_after > 0)
std::memmove(&m_guest_register_order[i], &m_guest_register_order[i + 1], sizeof(Reg) * count_after); std::memmove(&m_state.guest_reg_order[i], &m_state.guest_reg_order[i + 1], sizeof(Reg) * count_after);
else else
m_guest_register_order[i] = Reg::count; m_state.guest_reg_order[i] = Reg::count;
m_guest_register_order_count--; m_state.guest_reg_order_count--;
return; return;
} }
} }
@ -699,16 +738,16 @@ void RegisterCache::ClearRegisterFromOrder(Reg reg)
void RegisterCache::PushRegisterToOrder(Reg reg) void RegisterCache::PushRegisterToOrder(Reg reg)
{ {
for (u32 i = 0; i < m_guest_register_order_count; i++) for (u32 i = 0; i < m_state.guest_reg_order_count; i++)
{ {
if (m_guest_register_order[i] == reg) if (m_state.guest_reg_order[i] == reg)
{ {
// move the registers after backwards into this spot // move the registers after backwards into this spot
const u32 count_before = i; const u32 count_before = i;
if (count_before > 0) if (count_before > 0)
std::memmove(&m_guest_register_order[1], &m_guest_register_order[0], sizeof(Reg) * count_before); std::memmove(&m_state.guest_reg_order[1], &m_state.guest_reg_order[0], sizeof(Reg) * count_before);
m_guest_register_order[0] = reg; m_state.guest_reg_order[0] = reg;
return; return;
} }
} }
@ -718,11 +757,11 @@ void RegisterCache::PushRegisterToOrder(Reg reg)
void RegisterCache::AppendRegisterToOrder(Reg reg) void RegisterCache::AppendRegisterToOrder(Reg reg)
{ {
DebugAssert(m_guest_register_order_count < HostReg_Count); DebugAssert(m_state.guest_reg_order_count < HostReg_Count);
if (m_guest_register_order_count > 0) if (m_state.guest_reg_order_count > 0)
std::memmove(&m_guest_register_order[1], &m_guest_register_order[0], sizeof(Reg) * m_guest_register_order_count); std::memmove(&m_state.guest_reg_order[1], &m_state.guest_reg_order[0], sizeof(Reg) * m_state.guest_reg_order_count);
m_guest_register_order[0] = reg; m_state.guest_reg_order[0] = reg;
m_guest_register_order_count++; m_state.guest_reg_order_count++;
} }
} // namespace CPU::Recompiler } // namespace CPU::Recompiler

View file

@ -5,6 +5,7 @@
#include <array> #include <array>
#include <optional> #include <optional>
#include <stack>
#include <tuple> #include <tuple>
namespace CPU::Recompiler { namespace CPU::Recompiler {
@ -184,7 +185,7 @@ public:
RegisterCache(CodeGenerator& code_generator); RegisterCache(CodeGenerator& code_generator);
~RegisterCache(); ~RegisterCache();
u32 GetActiveCalleeSavedRegisterCount() const { return m_host_register_callee_saved_order_count; } u32 GetActiveCalleeSavedRegisterCount() const { return m_state.callee_saved_order_count; }
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// Register Allocation // Register Allocation
@ -228,6 +229,12 @@ public:
/// Restore callee-saved registers. Call at the end of the function. /// Restore callee-saved registers. Call at the end of the function.
u32 PopCalleeSavedRegisters(bool commit); u32 PopCalleeSavedRegisters(bool commit);
/// Pushes the register allocator state, use when entering branched code.
void PushState();
/// Pops the register allocator state, use when leaving branched code.
void PopState();
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// Scratch Register Allocation // Scratch Register Allocation
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
@ -241,20 +248,20 @@ public:
/// Returns true if the specified guest register is cached. /// Returns true if the specified guest register is cached.
bool IsGuestRegisterCached(Reg guest_reg) const bool IsGuestRegisterCached(Reg guest_reg) const
{ {
const Value& cache_value = m_guest_reg_cache[static_cast<u8>(guest_reg)]; const Value& cache_value = m_state.guest_reg_state[static_cast<u8>(guest_reg)];
return cache_value.IsConstant() || cache_value.IsInHostRegister(); return cache_value.IsConstant() || cache_value.IsInHostRegister();
} }
/// Returns the host register if the guest register is cached. /// Returns the host register if the guest register is cached.
std::optional<HostReg> GetHostRegisterForGuestRegister(Reg guest_reg) const std::optional<HostReg> GetHostRegisterForGuestRegister(Reg guest_reg) const
{ {
if (!m_guest_reg_cache[static_cast<u8>(guest_reg)].IsInHostRegister()) if (!m_state.guest_reg_state[static_cast<u8>(guest_reg)].IsInHostRegister())
return std::nullopt; return std::nullopt;
return m_guest_reg_cache[static_cast<u8>(guest_reg)].GetHostRegister(); return m_state.guest_reg_state[static_cast<u8>(guest_reg)].GetHostRegister();
} }
/// Returns true if there is a load delay which will be stored at the end of the instruction. /// Returns true if there is a load delay which will be stored at the end of the instruction.
bool HasLoadDelay() const { return m_load_delay_register != Reg::count; } bool HasLoadDelay() const { return m_state.load_delay_register != Reg::count; }
Value ReadGuestRegister(Reg guest_reg, bool cache = true, bool force_host_register = false, Value ReadGuestRegister(Reg guest_reg, bool cache = true, bool force_host_register = false,
HostReg forced_host_reg = HostReg_Invalid); HostReg forced_host_reg = HostReg_Invalid);
@ -288,24 +295,29 @@ private:
CodeGenerator& m_code_generator; CodeGenerator& m_code_generator;
HostReg m_cpu_ptr_host_register = {};
std::array<HostRegState, HostReg_Count> m_host_register_state{};
std::array<HostReg, HostReg_Count> m_host_register_allocation_order{}; std::array<HostReg, HostReg_Count> m_host_register_allocation_order{};
u32 m_host_register_available_count = 0;
std::array<Value, static_cast<u8>(Reg::count)> m_guest_reg_cache{}; HostReg m_cpu_ptr_host_register = {};
std::array<Reg, HostReg_Count> m_guest_register_order{}; struct RegAllocState
u32 m_guest_register_order_count = 0; {
std::array<HostRegState, HostReg_Count> host_reg_state{};
std::array<HostReg, HostReg_Count> callee_saved_order{};
std::array<Value, static_cast<u8>(Reg::count)> guest_reg_state{};
std::array<Reg, HostReg_Count> guest_reg_order{};
std::array<HostReg, HostReg_Count> m_host_register_callee_saved_order{}; u32 available_count = 0;
u32 m_host_register_callee_saved_order_count = 0; u32 callee_saved_order_count = 0;
u32 guest_reg_order_count = 0;
Reg m_load_delay_register = Reg::count; Reg load_delay_register = Reg::count;
Value m_load_delay_value{}; Value load_delay_value{};
Reg m_next_load_delay_register = Reg::count; Reg next_load_delay_register = Reg::count;
Value m_next_load_delay_value{}; Value next_load_delay_value{};
} m_state;
std::stack<RegAllocState> m_state_stack;
}; };
} // namespace CPU::Recompiler } // namespace CPU::Recompiler