diff --git a/src/core/cpu_recompiler_code_generator_x64.cpp b/src/core/cpu_recompiler_code_generator_x64.cpp index e133ea913..4957b0d56 100644 --- a/src/core/cpu_recompiler_code_generator_x64.cpp +++ b/src/core/cpu_recompiler_code_generator_x64.cpp @@ -219,9 +219,13 @@ void CodeGenerator::EmitExceptionExitOnBool(const Value& value) m_emit->test(GetHostReg8(value), GetHostReg8(value)); m_emit->jnz(GetCurrentFarCodePointer()); + m_register_cache.PushState(); + SwitchToFarCode(); EmitExceptionExit(); SwitchToNearCode(); + + m_register_cache.PopState(); } void CodeGenerator::FinalizeBlock(CodeBlock::HostCodePointer* out_host_code, u32* out_host_code_size) @@ -1572,11 +1576,15 @@ Value CodeGenerator::EmitLoadGuestMemory(const Value& address, RegSize size) m_emit->test(GetHostReg64(result.host_reg), GetHostReg64(result.host_reg)); m_emit->js(GetCurrentFarCodePointer()); + m_register_cache.PushState(); + // load exception path SwitchToFarCode(); EmitExceptionExit(); SwitchToNearCode(); + m_register_cache.PopState(); + // Downcast to ignore upper 56/48/32 bits. This should be a noop. switch (size) { @@ -1623,7 +1631,7 @@ void CodeGenerator::EmitStoreGuestMemory(const Value& address, const Value& valu break; } - Xbyak::Label store_okay; + m_register_cache.PushState(); m_emit->test(GetHostReg8(result), GetHostReg8(result)); m_emit->jz(GetCurrentFarCodePointer()); @@ -1632,6 +1640,8 @@ void CodeGenerator::EmitStoreGuestMemory(const Value& address, const Value& valu SwitchToFarCode(); EmitExceptionExit(); SwitchToNearCode(); + + m_register_cache.PopState(); } void CodeGenerator::EmitFlushInterpreterLoadDelay() @@ -1767,10 +1777,14 @@ static void EmitConditionalJump(Condition condition, bool invert, Xbyak::CodeGen void CodeGenerator::EmitBranch(Condition condition, Reg lr_reg, Value&& branch_target) { - // we have to always read the old PC.. when we can push/pop the register cache state this won't be needed - Value old_npc; + // allocate scratch register for reading npc - we return to the main path, so this could cause a reg flush + Value old_npc = m_register_cache.AllocateScratch(RegSize_32); + + // npc gets modified by the branch, so we can't trust it on returning. same for lr_reg, which might contain a dirty + // value + m_register_cache.FlushGuestRegister(Reg::npc, true, true); if (lr_reg != Reg::count) - old_npc = m_register_cache.ReadGuestRegister(Reg::npc, false, true); + m_register_cache.FlushGuestRegister(lr_reg, true, true); // condition is inverted because we want the case for skipping it Xbyak::Label skip_branch; @@ -1783,8 +1797,8 @@ void CodeGenerator::EmitBranch(Condition condition, Reg lr_reg, Value&& branch_t // Can't cache because we have two branches. Load delay cancel is due to the immediate flush afterwards, // if we don't cancel it, at the end of the instruction the value we write can be overridden. EmitCancelInterpreterLoadDelayForReg(lr_reg); - m_register_cache.WriteGuestRegister(lr_reg, std::move(old_npc)); - m_register_cache.FlushGuestRegister(lr_reg, true, true); + EmitLoadGuestRegister(old_npc.host_reg, Reg::npc); + EmitStoreGuestRegister(lr_reg, old_npc); } // we don't need to test the address of constant branches unless they're definitely misaligned, which would be @@ -1803,17 +1817,20 @@ void CodeGenerator::EmitBranch(Condition condition, Reg lr_reg, Value&& branch_t m_emit->jnz(GetCurrentFarCodePointer()); } + m_register_cache.PushState(); + // exception exit for misaligned target SwitchToFarCode(); EmitFunctionCall(nullptr, &Thunks::RaiseAddressException, m_register_cache.GetCPUPtr(), branch_target, Value::FromConstantU8(0), Value::FromConstantU8(1)); EmitExceptionExit(); SwitchToNearCode(); + + m_register_cache.PopState(); } // branch taken path - write new PC and flush it, since two branches - m_register_cache.WriteGuestRegister(Reg::npc, std::move(branch_target)); - m_register_cache.FlushGuestRegister(Reg::npc, true, true); + EmitStoreGuestRegister(Reg::npc, branch_target); EmitStoreCPUStructField(offsetof(Core, m_current_instruction_was_branch_taken), Value::FromConstantU8(1)); // converge point @@ -1836,6 +1853,8 @@ void CodeGenerator::EmitRaiseException(Exception excode, Condition condition /* return; } + m_register_cache.PushState(); + const void* far_code_ptr = GetCurrentFarCodePointer(); EmitConditionalJump(condition, false, m_emit, far_code_ptr); @@ -1844,6 +1863,8 @@ void CodeGenerator::EmitRaiseException(Exception excode, Condition condition /* Value::FromConstantU8(static_cast(excode))); EmitExceptionExit(); SwitchToNearCode(); + + m_register_cache.PopState(); } #if 0 diff --git a/src/core/cpu_recompiler_register_cache.cpp b/src/core/cpu_recompiler_register_cache.cpp index 0635896fc..3306ef8ff 100644 --- a/src/core/cpu_recompiler_register_cache.cpp +++ b/src/core/cpu_recompiler_register_cache.cpp @@ -101,32 +101,35 @@ void Value::Undiscard() RegisterCache::RegisterCache(CodeGenerator& code_generator) : m_code_generator(code_generator) { - m_guest_register_order.fill(Reg::count); + m_state.guest_reg_order.fill(Reg::count); } -RegisterCache::~RegisterCache() = default; +RegisterCache::~RegisterCache() +{ + Assert(m_state_stack.empty()); +} void RegisterCache::SetHostRegAllocationOrder(std::initializer_list regs) { size_t index = 0; for (HostReg reg : regs) { - m_host_register_state[reg] = HostRegState::Usable; + m_state.host_reg_state[reg] = HostRegState::Usable; m_host_register_allocation_order[index++] = reg; } - m_host_register_available_count = static_cast(index); + m_state.available_count = static_cast(index); } void RegisterCache::SetCallerSavedHostRegs(std::initializer_list regs) { for (HostReg reg : regs) - m_host_register_state[reg] |= HostRegState::CallerSaved; + m_state.host_reg_state[reg] |= HostRegState::CallerSaved; } void RegisterCache::SetCalleeSavedHostRegs(std::initializer_list regs) { for (HostReg reg : regs) - m_host_register_state[reg] |= HostRegState::CalleeSaved; + m_state.host_reg_state[reg] |= HostRegState::CalleeSaved; } void RegisterCache::SetCPUPtrHostReg(HostReg reg) @@ -136,17 +139,17 @@ void RegisterCache::SetCPUPtrHostReg(HostReg reg) bool RegisterCache::IsUsableHostReg(HostReg reg) const { - return (m_host_register_state[reg] & HostRegState::Usable) != HostRegState::None; + return (m_state.host_reg_state[reg] & HostRegState::Usable) != HostRegState::None; } bool RegisterCache::IsHostRegInUse(HostReg reg) const { - return (m_host_register_state[reg] & HostRegState::InUse) != HostRegState::None; + return (m_state.host_reg_state[reg] & HostRegState::InUse) != HostRegState::None; } bool RegisterCache::HasFreeHostRegister() const { - for (const HostRegState state : m_host_register_state) + for (const HostRegState state : m_state.host_reg_state) { if ((state & (HostRegState::Usable | HostRegState::InUse)) == (HostRegState::Usable)) return true; @@ -158,7 +161,7 @@ bool RegisterCache::HasFreeHostRegister() const u32 RegisterCache::GetUsedHostRegisters() const { u32 count = 0; - for (const HostRegState state : m_host_register_state) + for (const HostRegState state : m_state.host_reg_state) { if ((state & (HostRegState::Usable | HostRegState::InUse)) == (HostRegState::Usable | HostRegState::InUse)) count++; @@ -170,7 +173,7 @@ u32 RegisterCache::GetUsedHostRegisters() const u32 RegisterCache::GetFreeHostRegisters() const { u32 count = 0; - for (const HostRegState state : m_host_register_state) + for (const HostRegState state : m_state.host_reg_state) { if ((state & (HostRegState::Usable | HostRegState::InUse)) == (HostRegState::Usable)) count++; @@ -182,10 +185,10 @@ u32 RegisterCache::GetFreeHostRegisters() const HostReg RegisterCache::AllocateHostReg(HostRegState state /* = HostRegState::InUse */) { // try for a free register in allocation order - for (u32 i = 0; i < m_host_register_available_count; i++) + for (u32 i = 0; i < m_state.available_count; i++) { const HostReg reg = m_host_register_allocation_order[i]; - if ((m_host_register_state[reg] & (HostRegState::Usable | HostRegState::InUse)) == HostRegState::Usable) + if ((m_state.host_reg_state[reg] & (HostRegState::Usable | HostRegState::InUse)) == HostRegState::Usable) { if (AllocateHostReg(reg, state)) return reg; @@ -201,19 +204,19 @@ HostReg RegisterCache::AllocateHostReg(HostRegState state /* = HostRegState::InU bool RegisterCache::AllocateHostReg(HostReg reg, HostRegState state /*= HostRegState::InUse*/) { - if ((m_host_register_state[reg] & HostRegState::InUse) == HostRegState::InUse) + if ((m_state.host_reg_state[reg] & HostRegState::InUse) == HostRegState::InUse) return false; - m_host_register_state[reg] |= state; + m_state.host_reg_state[reg] |= state; - if ((m_host_register_state[reg] & (HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated)) == + if ((m_state.host_reg_state[reg] & (HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated)) == HostRegState::CalleeSaved) { // new register we need to save.. - DebugAssert(m_host_register_callee_saved_order_count < HostReg_Count); + DebugAssert(m_state.callee_saved_order_count < HostReg_Count); m_code_generator.EmitPushHostReg(reg, GetActiveCalleeSavedRegisterCount()); - m_host_register_callee_saved_order[m_host_register_callee_saved_order_count++] = reg; - m_host_register_state[reg] |= HostRegState::CalleeSavedAllocated; + m_state.callee_saved_order[m_state.callee_saved_order_count++] = reg; + m_state.host_reg_state[reg] |= HostRegState::CalleeSavedAllocated; } return reg; @@ -223,21 +226,21 @@ void RegisterCache::DiscardHostReg(HostReg reg) { DebugAssert(IsHostRegInUse(reg)); Log_DebugPrintf("Discarding host register %s", m_code_generator.GetHostRegName(reg)); - m_host_register_state[reg] |= HostRegState::Discarded; + m_state.host_reg_state[reg] |= HostRegState::Discarded; } void RegisterCache::UndiscardHostReg(HostReg reg) { DebugAssert(IsHostRegInUse(reg)); Log_DebugPrintf("Undiscarding host register %s", m_code_generator.GetHostRegName(reg)); - m_host_register_state[reg] &= ~HostRegState::Discarded; + m_state.host_reg_state[reg] &= ~HostRegState::Discarded; } void RegisterCache::FreeHostReg(HostReg reg) { DebugAssert(IsHostRegInUse(reg)); Log_DebugPrintf("Freeing host register %s", m_code_generator.GetHostRegName(reg)); - m_host_register_state[reg] &= ~HostRegState::InUse; + m_state.host_reg_state[reg] &= ~HostRegState::InUse; } void RegisterCache::EnsureHostRegFree(HostReg reg) @@ -247,7 +250,7 @@ void RegisterCache::EnsureHostRegFree(HostReg reg) for (u8 i = 0; i < static_cast(Reg::count); i++) { - if (m_guest_reg_cache[i].IsInHostRegister() && m_guest_reg_cache[i].GetHostRegister() == reg) + if (m_state.guest_reg_state[i].IsInHostRegister() && m_state.guest_reg_state[i].GetHostRegister() == reg) FlushGuestRegister(static_cast(i), true, true); } } @@ -280,7 +283,7 @@ u32 RegisterCache::PushCallerSavedRegisters() const u32 count = 0; for (u32 i = 0; i < HostReg_Count; i++) { - if ((m_host_register_state[i] & (HostRegState::CallerSaved | HostRegState::InUse | HostRegState::Discarded)) == + if ((m_state.host_reg_state[i] & (HostRegState::CallerSaved | HostRegState::InUse | HostRegState::Discarded)) == (HostRegState::CallerSaved | HostRegState::InUse)) { m_code_generator.EmitPushHostReg(static_cast(i), position + count); @@ -296,7 +299,7 @@ u32 RegisterCache::PopCallerSavedRegisters() const u32 count = 0; for (u32 i = 0; i < HostReg_Count; i++) { - if ((m_host_register_state[i] & (HostRegState::CallerSaved | HostRegState::InUse | HostRegState::Discarded)) == + if ((m_state.host_reg_state[i] & (HostRegState::CallerSaved | HostRegState::InUse | HostRegState::Discarded)) == (HostRegState::CallerSaved | HostRegState::InUse)) { count++; @@ -304,12 +307,12 @@ u32 RegisterCache::PopCallerSavedRegisters() const } if (count == 0) return 0; - + u32 position = GetActiveCalleeSavedRegisterCount() + count - 1; u32 i = (HostReg_Count - 1); do { - if ((m_host_register_state[i] & (HostRegState::CallerSaved | HostRegState::InUse | HostRegState::Discarded)) == + if ((m_state.host_reg_state[i] & (HostRegState::CallerSaved | HostRegState::InUse | HostRegState::Discarded)) == (HostRegState::CallerSaved | HostRegState::InUse)) { m_code_generator.EmitPopHostReg(static_cast(i), position); @@ -322,29 +325,65 @@ u32 RegisterCache::PopCallerSavedRegisters() const u32 RegisterCache::PopCalleeSavedRegisters(bool commit) { - if (m_host_register_callee_saved_order_count == 0) + if (m_state.callee_saved_order_count == 0) return 0; u32 count = 0; - u32 i = m_host_register_callee_saved_order_count; + u32 i = m_state.callee_saved_order_count; do { - const HostReg reg = m_host_register_callee_saved_order[i - 1]; - DebugAssert((m_host_register_state[reg] & (HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated)) == + const HostReg reg = m_state.callee_saved_order[i - 1]; + DebugAssert((m_state.host_reg_state[reg] & (HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated)) == (HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated)); m_code_generator.EmitPopHostReg(reg, i - 1); if (commit) - m_host_register_state[reg] &= ~HostRegState::CalleeSavedAllocated; + m_state.host_reg_state[reg] &= ~HostRegState::CalleeSavedAllocated; count++; i--; } while (i > 0); if (commit) - m_host_register_callee_saved_order_count = 0; - + m_state.callee_saved_order_count = 0; + return count; } +void RegisterCache::PushState() +{ + // need to copy this manually because of the load delay values + RegAllocState save_state; + save_state.host_reg_state = m_state.host_reg_state; + save_state.callee_saved_order = m_state.callee_saved_order; + save_state.guest_reg_state = m_state.guest_reg_state; + save_state.guest_reg_order = m_state.guest_reg_order; + save_state.available_count = m_state.available_count; + save_state.callee_saved_order_count = m_state.callee_saved_order_count; + save_state.guest_reg_order_count = m_state.guest_reg_order_count; + save_state.load_delay_register = m_state.load_delay_register; + save_state.load_delay_value.regcache = m_state.load_delay_value.regcache; + save_state.load_delay_value.host_reg = m_state.load_delay_value.host_reg; + save_state.load_delay_value.size = m_state.load_delay_value.size; + save_state.load_delay_value.flags = m_state.load_delay_value.flags; + save_state.next_load_delay_register = m_state.next_load_delay_register; + save_state.next_load_delay_value.regcache = m_state.next_load_delay_value.regcache; + save_state.next_load_delay_value.host_reg = m_state.next_load_delay_value.host_reg; + save_state.next_load_delay_value.size = m_state.next_load_delay_value.size; + save_state.next_load_delay_value.flags = m_state.next_load_delay_value.flags; + m_state_stack.push(std::move(save_state)); +} + +void RegisterCache::PopState() +{ + Assert(!m_state_stack.empty()); + + // prevent destructor -> freeing of host reg + m_state.load_delay_value.Clear(); + m_state.next_load_delay_value.Clear(); + + m_state = std::move(m_state_stack.top()); + m_state_stack.pop(); +} + Value RegisterCache::ReadGuestRegister(Reg guest_reg, bool cache /* = true */, bool force_host_register /* = false */, HostReg forced_host_reg /* = HostReg_Invalid */) { @@ -362,7 +401,7 @@ Value RegisterCache::ReadGuestRegister(Reg guest_reg, bool cache /* = true */, b return Value::FromConstantU32(0); } - Value& cache_value = m_guest_reg_cache[static_cast(guest_reg)]; + Value& cache_value = m_state.guest_reg_state[static_cast(guest_reg)]; if (cache_value.IsValid()) { if (cache_value.IsInHostRegister()) @@ -454,14 +493,14 @@ Value RegisterCache::WriteGuestRegister(Reg guest_reg, Value&& value) return std::move(value); // cancel any load delay delay - if (m_load_delay_register == guest_reg) + if (m_state.load_delay_register == guest_reg) { Log_DebugPrintf("Cancelling load delay of register %s because of non-delayed write", GetRegName(guest_reg)); - m_load_delay_register = Reg::count; - m_load_delay_value.ReleaseAndClear(); + m_state.load_delay_register = Reg::count; + m_state.load_delay_value.ReleaseAndClear(); } - Value& cache_value = m_guest_reg_cache[static_cast(guest_reg)]; + Value& cache_value = m_state.guest_reg_state[static_cast(guest_reg)]; if (cache_value.IsInHostRegister() && value.IsInHostRegister() && cache_value.host_reg == value.host_reg) { // updating the register value. @@ -518,20 +557,20 @@ void RegisterCache::WriteGuestRegisterDelayed(Reg guest_reg, Value&& value) return; // two load delays in a row? cancel the first one. - if (guest_reg == m_load_delay_register) + if (guest_reg == m_state.load_delay_register) { Log_DebugPrintf("Cancelling load delay of register %s due to new load delay", GetRegName(guest_reg)); - m_load_delay_register = Reg::count; - m_load_delay_value.ReleaseAndClear(); + m_state.load_delay_register = Reg::count; + m_state.load_delay_value.ReleaseAndClear(); } // two load delay case with interpreter load delay m_code_generator.EmitCancelInterpreterLoadDelayForReg(guest_reg); // set up the load delay at the end of this instruction - Value& cache_value = m_next_load_delay_value; - Assert(m_next_load_delay_register == Reg::count); - m_next_load_delay_register = guest_reg; + Value& cache_value = m_state.next_load_delay_value; + Assert(m_state.next_load_delay_register == Reg::count); + m_state.next_load_delay_register = guest_reg; // If it's a temporary, we can bind that to the guest register. if (value.IsScratch()) @@ -555,61 +594,61 @@ void RegisterCache::WriteGuestRegisterDelayed(Reg guest_reg, Value&& value) void RegisterCache::UpdateLoadDelay() { // flush current load delay - if (m_load_delay_register != Reg::count) + if (m_state.load_delay_register != Reg::count) { // have to clear first because otherwise it'll release the value - Reg reg = m_load_delay_register; - Value value = std::move(m_load_delay_value); - m_load_delay_register = Reg::count; + Reg reg = m_state.load_delay_register; + Value value = std::move(m_state.load_delay_value); + m_state.load_delay_register = Reg::count; WriteGuestRegister(reg, std::move(value)); } // next load delay -> load delay - if (m_next_load_delay_register != Reg::count) + if (m_state.next_load_delay_register != Reg::count) { - m_load_delay_register = m_next_load_delay_register; - m_load_delay_value = std::move(m_next_load_delay_value); - m_next_load_delay_register = Reg::count; + m_state.load_delay_register = m_state.next_load_delay_register; + m_state.load_delay_value = std::move(m_state.next_load_delay_value); + m_state.next_load_delay_register = Reg::count; } } void RegisterCache::WriteLoadDelayToCPU(bool clear) { // There shouldn't be a flush at the same time as there's a new load delay. - Assert(m_next_load_delay_register == Reg::count); - if (m_load_delay_register != Reg::count) + Assert(m_state.next_load_delay_register == Reg::count); + if (m_state.load_delay_register != Reg::count) { - Log_DebugPrintf("Flushing pending load delay of %s", GetRegName(m_load_delay_register)); - m_code_generator.EmitStoreInterpreterLoadDelay(m_load_delay_register, m_load_delay_value); + Log_DebugPrintf("Flushing pending load delay of %s", GetRegName(m_state.load_delay_register)); + m_code_generator.EmitStoreInterpreterLoadDelay(m_state.load_delay_register, m_state.load_delay_value); if (clear) { - m_load_delay_register = Reg::count; - m_load_delay_value.ReleaseAndClear(); + m_state.load_delay_register = Reg::count; + m_state.load_delay_value.ReleaseAndClear(); } } } void RegisterCache::FlushLoadDelay(bool clear) { - Assert(m_next_load_delay_register == Reg::count); + Assert(m_state.next_load_delay_register == Reg::count); - if (m_load_delay_register != Reg::count) + if (m_state.load_delay_register != Reg::count) { // if this is an exception exit, write the new value to the CPU register file, but keep it tracked for the next // non-exception-raised path. TODO: push/pop whole state would avoid this issue - m_code_generator.EmitStoreGuestRegister(m_load_delay_register, m_load_delay_value); + m_code_generator.EmitStoreGuestRegister(m_state.load_delay_register, m_state.load_delay_value); if (clear) { - m_load_delay_register = Reg::count; - m_load_delay_value.ReleaseAndClear(); + m_state.load_delay_register = Reg::count; + m_state.load_delay_value.ReleaseAndClear(); } } } void RegisterCache::FlushGuestRegister(Reg guest_reg, bool invalidate, bool clear_dirty) { - Value& cache_value = m_guest_reg_cache[static_cast(guest_reg)]; + Value& cache_value = m_state.guest_reg_state[static_cast(guest_reg)]; if (cache_value.IsDirty()) { if (cache_value.IsInHostRegister()) @@ -633,7 +672,7 @@ void RegisterCache::FlushGuestRegister(Reg guest_reg, bool invalidate, bool clea void RegisterCache::InvalidateGuestRegister(Reg guest_reg) { - Value& cache_value = m_guest_reg_cache[static_cast(guest_reg)]; + Value& cache_value = m_state.guest_reg_state[static_cast(guest_reg)]; if (!cache_value.IsValid()) return; @@ -651,7 +690,7 @@ void RegisterCache::InvalidateAllNonDirtyGuestRegisters() { for (u8 reg = 0; reg < static_cast(Reg::count); reg++) { - Value& cache_value = m_guest_reg_cache[reg]; + Value& cache_value = m_state.guest_reg_state[reg]; if (cache_value.IsValid() && !cache_value.IsDirty()) InvalidateGuestRegister(static_cast(reg)); } @@ -665,11 +704,11 @@ void RegisterCache::FlushAllGuestRegisters(bool invalidate, bool clear_dirty) bool RegisterCache::EvictOneGuestRegister() { - if (m_guest_register_order_count == 0) + if (m_state.guest_reg_order_count == 0) return false; // evict the register used the longest time ago - Reg evict_reg = m_guest_register_order[m_guest_register_order_count - 1]; + Reg evict_reg = m_state.guest_reg_order[m_state.guest_reg_order_count - 1]; Log_ProfilePrintf("Evicting guest register %s", GetRegName(evict_reg)); FlushGuestRegister(evict_reg, true, true); @@ -678,18 +717,18 @@ bool RegisterCache::EvictOneGuestRegister() void RegisterCache::ClearRegisterFromOrder(Reg reg) { - for (u32 i = 0; i < m_guest_register_order_count; i++) + for (u32 i = 0; i < m_state.guest_reg_order_count; i++) { - if (m_guest_register_order[i] == reg) + if (m_state.guest_reg_order[i] == reg) { // move the registers after backwards into this spot - const u32 count_after = m_guest_register_order_count - i - 1; + const u32 count_after = m_state.guest_reg_order_count - i - 1; if (count_after > 0) - std::memmove(&m_guest_register_order[i], &m_guest_register_order[i + 1], sizeof(Reg) * count_after); + std::memmove(&m_state.guest_reg_order[i], &m_state.guest_reg_order[i + 1], sizeof(Reg) * count_after); else - m_guest_register_order[i] = Reg::count; + m_state.guest_reg_order[i] = Reg::count; - m_guest_register_order_count--; + m_state.guest_reg_order_count--; return; } } @@ -699,16 +738,16 @@ void RegisterCache::ClearRegisterFromOrder(Reg reg) void RegisterCache::PushRegisterToOrder(Reg reg) { - for (u32 i = 0; i < m_guest_register_order_count; i++) + for (u32 i = 0; i < m_state.guest_reg_order_count; i++) { - if (m_guest_register_order[i] == reg) + if (m_state.guest_reg_order[i] == reg) { // move the registers after backwards into this spot const u32 count_before = i; if (count_before > 0) - std::memmove(&m_guest_register_order[1], &m_guest_register_order[0], sizeof(Reg) * count_before); + std::memmove(&m_state.guest_reg_order[1], &m_state.guest_reg_order[0], sizeof(Reg) * count_before); - m_guest_register_order[0] = reg; + m_state.guest_reg_order[0] = reg; return; } } @@ -718,11 +757,11 @@ void RegisterCache::PushRegisterToOrder(Reg reg) void RegisterCache::AppendRegisterToOrder(Reg reg) { - DebugAssert(m_guest_register_order_count < HostReg_Count); - if (m_guest_register_order_count > 0) - std::memmove(&m_guest_register_order[1], &m_guest_register_order[0], sizeof(Reg) * m_guest_register_order_count); - m_guest_register_order[0] = reg; - m_guest_register_order_count++; + DebugAssert(m_state.guest_reg_order_count < HostReg_Count); + if (m_state.guest_reg_order_count > 0) + std::memmove(&m_state.guest_reg_order[1], &m_state.guest_reg_order[0], sizeof(Reg) * m_state.guest_reg_order_count); + m_state.guest_reg_order[0] = reg; + m_state.guest_reg_order_count++; } } // namespace CPU::Recompiler diff --git a/src/core/cpu_recompiler_register_cache.h b/src/core/cpu_recompiler_register_cache.h index 9ed86037d..67406cf8e 100644 --- a/src/core/cpu_recompiler_register_cache.h +++ b/src/core/cpu_recompiler_register_cache.h @@ -5,6 +5,7 @@ #include #include +#include #include namespace CPU::Recompiler { @@ -184,7 +185,7 @@ public: RegisterCache(CodeGenerator& code_generator); ~RegisterCache(); - u32 GetActiveCalleeSavedRegisterCount() const { return m_host_register_callee_saved_order_count; } + u32 GetActiveCalleeSavedRegisterCount() const { return m_state.callee_saved_order_count; } ////////////////////////////////////////////////////////////////////////// // Register Allocation @@ -228,6 +229,12 @@ public: /// Restore callee-saved registers. Call at the end of the function. u32 PopCalleeSavedRegisters(bool commit); + /// Pushes the register allocator state, use when entering branched code. + void PushState(); + + /// Pops the register allocator state, use when leaving branched code. + void PopState(); + ////////////////////////////////////////////////////////////////////////// // Scratch Register Allocation ////////////////////////////////////////////////////////////////////////// @@ -241,20 +248,20 @@ public: /// Returns true if the specified guest register is cached. bool IsGuestRegisterCached(Reg guest_reg) const { - const Value& cache_value = m_guest_reg_cache[static_cast(guest_reg)]; + const Value& cache_value = m_state.guest_reg_state[static_cast(guest_reg)]; return cache_value.IsConstant() || cache_value.IsInHostRegister(); } /// Returns the host register if the guest register is cached. std::optional GetHostRegisterForGuestRegister(Reg guest_reg) const { - if (!m_guest_reg_cache[static_cast(guest_reg)].IsInHostRegister()) + if (!m_state.guest_reg_state[static_cast(guest_reg)].IsInHostRegister()) return std::nullopt; - return m_guest_reg_cache[static_cast(guest_reg)].GetHostRegister(); + return m_state.guest_reg_state[static_cast(guest_reg)].GetHostRegister(); } /// Returns true if there is a load delay which will be stored at the end of the instruction. - bool HasLoadDelay() const { return m_load_delay_register != Reg::count; } + bool HasLoadDelay() const { return m_state.load_delay_register != Reg::count; } Value ReadGuestRegister(Reg guest_reg, bool cache = true, bool force_host_register = false, HostReg forced_host_reg = HostReg_Invalid); @@ -288,24 +295,29 @@ private: CodeGenerator& m_code_generator; - HostReg m_cpu_ptr_host_register = {}; - std::array m_host_register_state{}; std::array m_host_register_allocation_order{}; - u32 m_host_register_available_count = 0; - std::array(Reg::count)> m_guest_reg_cache{}; + HostReg m_cpu_ptr_host_register = {}; - std::array m_guest_register_order{}; - u32 m_guest_register_order_count = 0; + struct RegAllocState + { + std::array host_reg_state{}; + std::array callee_saved_order{}; + std::array(Reg::count)> guest_reg_state{}; + std::array guest_reg_order{}; - std::array m_host_register_callee_saved_order{}; - u32 m_host_register_callee_saved_order_count = 0; + u32 available_count = 0; + u32 callee_saved_order_count = 0; + u32 guest_reg_order_count = 0; - Reg m_load_delay_register = Reg::count; - Value m_load_delay_value{}; + Reg load_delay_register = Reg::count; + Value load_delay_value{}; - Reg m_next_load_delay_register = Reg::count; - Value m_next_load_delay_value{}; + Reg next_load_delay_register = Reg::count; + Value next_load_delay_value{}; + } m_state; + + std::stack m_state_stack; }; } // namespace CPU::Recompiler \ No newline at end of file