From f6e88353ebacf2e63171a67ce1011a649a85253a Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Sat, 8 Aug 2020 15:15:56 +1000 Subject: [PATCH] CPU/Recompiler: Make generated code invariant to virtual PC --- src/core/CMakeLists.txt | 1 + src/core/bus.cpp | 95 +++++------- src/core/core.vcxproj | 1 + src/core/core.vcxproj.filters | 1 + src/core/cpu_code_cache.cpp | 5 + src/core/cpu_core.cpp | 74 ++++------ src/core/cpu_core_private.h | 19 +++ src/core/cpu_recompiler_code_generator.cpp | 135 +++++++++++------- src/core/cpu_recompiler_code_generator.h | 13 +- .../cpu_recompiler_code_generator_aarch64.cpp | 39 +++-- .../cpu_recompiler_code_generator_x64.cpp | 41 ++++-- src/core/cpu_recompiler_thunks.h | 36 +---- src/core/cpu_types.h | 11 ++ 13 files changed, 258 insertions(+), 213 deletions(-) create mode 100644 src/core/cpu_core_private.h diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 73b7af22c..6b68305eb 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -15,6 +15,7 @@ add_library(core cpu_code_cache.h cpu_core.cpp cpu_core.h + cpu_core_private.h cpu_disasm.cpp cpu_disasm.h cpu_types.cpp diff --git a/src/core/bus.cpp b/src/core/bus.cpp index 3334c75bc..886f78da2 100644 --- a/src/core/bus.cpp +++ b/src/core/bus.cpp @@ -6,6 +6,7 @@ #include "common/state_wrapper.h" #include "cpu_code_cache.h" #include "cpu_core.h" +#include "cpu_core_private.h" #include "cpu_disasm.h" #include "dma.h" #include "gpu.h" @@ -741,10 +742,6 @@ ALWAYS_INLINE static TickCount DoDMAAccess(u32 offset, u32& value) namespace CPU { -// defined in cpu_core.cpp -void RaiseException(Exception excode); -void RaiseException(Exception excode, u32 EPC, bool BD, bool BT, u8 CE); - static void WriteCacheControl(u32 value) { Log_WarningPrintf("Cache control <- 0x%08X", value); @@ -962,7 +959,7 @@ bool FetchInstruction() 0) { // Bus errors don't set BadVaddr. - RaiseException(Exception::IBE, g_state.regs.npc, false, false, 0); + RaiseException(g_state.regs.npc, Cop0Registers::CAUSE::MakeValueForException(Exception::IBE, false, false, 0)); return false; } @@ -1107,111 +1104,93 @@ bool SafeWriteMemoryWord(VirtualMemoryAddress addr, u32 value) namespace Recompiler::Thunks { -u64 ReadMemoryByte(u32 pc, u32 address) +u64 ReadMemoryByte(u32 address) { - g_state.current_instruction_pc = pc; - u32 temp = 0; const TickCount cycles = DoMemoryAccess(address, temp); if (cycles < 0) - { - RaiseException(Exception::DBE); - return UINT64_C(0xFFFFFFFFFFFFFFFF); - } + return static_cast(-static_cast(Exception::DBE)); g_state.pending_ticks += cycles; return ZeroExtend64(temp); } -u64 ReadMemoryHalfWord(u32 pc, u32 address) +u64 ReadMemoryHalfWord(u32 address) { - g_state.current_instruction_pc = pc; - - if (!DoAlignmentCheck(address)) - return UINT64_C(0xFFFFFFFFFFFFFFFF); + if (!Common::IsAlignedPow2(address, 2)) + { + g_state.cop0_regs.BadVaddr = address; + return static_cast(-static_cast(Exception::AdEL)); + } u32 temp = 0; const TickCount cycles = DoMemoryAccess(address, temp); if (cycles < 0) - { - RaiseException(Exception::DBE); - return UINT64_C(0xFFFFFFFFFFFFFFFF); - } + return static_cast(-static_cast(Exception::DBE)); g_state.pending_ticks += cycles; return ZeroExtend64(temp); } -u64 ReadMemoryWord(u32 pc, u32 address) +u64 ReadMemoryWord(u32 address) { - g_state.current_instruction_pc = pc; - - if (!DoAlignmentCheck(address)) - return UINT64_C(0xFFFFFFFFFFFFFFFF); + if (!Common::IsAlignedPow2(address, 4)) + { + g_state.cop0_regs.BadVaddr = address; + return static_cast(-static_cast(Exception::AdEL)); + } u32 temp = 0; const TickCount cycles = DoMemoryAccess(address, temp); if (cycles < 0) - { - RaiseException(Exception::DBE); - return UINT64_C(0xFFFFFFFFFFFFFFFF); - } + return static_cast(-static_cast(Exception::DBE)); g_state.pending_ticks += cycles; return ZeroExtend64(temp); } -bool WriteMemoryByte(u32 pc, u32 address, u8 value) +u32 WriteMemoryByte(u32 address, u8 value) { - g_state.current_instruction_pc = pc; - u32 temp = ZeroExtend32(value); const TickCount cycles = DoMemoryAccess(address, temp); if (cycles < 0) - { - RaiseException(Exception::DBE); - return false; - } + return static_cast(Exception::DBE); DebugAssert(cycles == 0); - return true; + return 0; } -bool WriteMemoryHalfWord(u32 pc, u32 address, u16 value) +u32 WriteMemoryHalfWord(u32 address, u16 value) { - g_state.current_instruction_pc = pc; - - if (!DoAlignmentCheck(address)) - return false; + if (!Common::IsAlignedPow2(address, 2)) + { + g_state.cop0_regs.BadVaddr = address; + return static_cast(Exception::AdES); + } u32 temp = ZeroExtend32(value); const TickCount cycles = DoMemoryAccess(address, temp); if (cycles < 0) - { - RaiseException(Exception::DBE); - return false; - } + return static_cast(Exception::DBE); DebugAssert(cycles == 0); - return true; + return 0; } -bool WriteMemoryWord(u32 pc, u32 address, u32 value) +u32 WriteMemoryWord(u32 address, u32 value) { - g_state.current_instruction_pc = pc; - - if (!DoAlignmentCheck(address)) - return false; + if (!Common::IsAlignedPow2(address, 4)) + { + g_state.cop0_regs.BadVaddr = address; + return static_cast(Exception::AdES); + } const TickCount cycles = DoMemoryAccess(address, value); if (cycles < 0) - { - RaiseException(Exception::DBE); - return false; - } + return static_cast(Exception::DBE); DebugAssert(cycles == 0); - return true; + return 0; } } // namespace Recompiler::Thunks diff --git a/src/core/core.vcxproj b/src/core/core.vcxproj index d8b9669ec..c475a1e5d 100644 --- a/src/core/core.vcxproj +++ b/src/core/core.vcxproj @@ -98,6 +98,7 @@ + diff --git a/src/core/core.vcxproj.filters b/src/core/core.vcxproj.filters index 775cff690..9acf9dc28 100644 --- a/src/core/core.vcxproj.filters +++ b/src/core/core.vcxproj.filters @@ -96,5 +96,6 @@ + \ No newline at end of file diff --git a/src/core/cpu_code_cache.cpp b/src/core/cpu_code_cache.cpp index 0558c1a40..54bdfc378 100644 --- a/src/core/cpu_code_cache.cpp +++ b/src/core/cpu_code_cache.cpp @@ -119,9 +119,14 @@ void Execute() #endif if (s_use_recompiler) + { + g_state.current_instruction_pc = g_state.regs.pc; block->host_code(); + } else + { InterpretCachedBlock(*block); + } if (g_state.pending_ticks >= g_state.downcount) break; diff --git a/src/core/cpu_core.cpp b/src/core/cpu_core.cpp index c489fb38e..9e06c3cb9 100644 --- a/src/core/cpu_core.cpp +++ b/src/core/cpu_core.cpp @@ -3,6 +3,7 @@ #include "common/file_system.h" #include "common/log.h" #include "common/state_wrapper.h" +#include "cpu_core_private.h" #include "cpu_disasm.h" #include "cpu_recompiler_thunks.h" #include "gte.h" @@ -26,22 +27,9 @@ static void ExecuteCop0Instruction(); static void ExecuteCop2Instruction(); static void Branch(u32 target); -// exceptions -void RaiseException(Exception excode); -void RaiseException(Exception excode, u32 EPC, bool BD, bool BT, u8 CE); - // clears pipeline of load/branch delays static void FlushPipeline(); -// defined in cpu_memory.cpp - memory access functions which return false if an exception was thrown. -bool FetchInstruction(); -bool ReadMemoryByte(VirtualMemoryAddress addr, u8* value); -bool ReadMemoryHalfWord(VirtualMemoryAddress addr, u16* value); -bool ReadMemoryWord(VirtualMemoryAddress addr, u32* value); -bool WriteMemoryByte(VirtualMemoryAddress addr, u8 value); -bool WriteMemoryHalfWord(VirtualMemoryAddress addr, u16 value); -bool WriteMemoryWord(VirtualMemoryAddress addr, u32 value); - State g_state; bool TRACE_EXECUTION = false; bool LOG_EXECUTION = false; @@ -164,7 +152,7 @@ void Branch(u32 target) { // The BadVaddr and EPC must be set to the fetching address, not the instruction about to execute. g_state.cop0_regs.BadVaddr = target; - RaiseException(Exception::AdEL, target, false, false, 0); + RaiseException(Cop0Registers::CAUSE::MakeValueForException(Exception::AdEL, false, false, 0), target); return; } @@ -193,33 +181,38 @@ ALWAYS_INLINE static u32 GetExceptionVector(Exception excode) void RaiseException(Exception excode) { - RaiseException(excode, g_state.current_instruction_pc, g_state.current_instruction_in_branch_delay_slot, - g_state.current_instruction_was_branch_taken, g_state.current_instruction.cop.cop_n); + RaiseException(Cop0Registers::CAUSE::MakeValueForException(excode, g_state.current_instruction_in_branch_delay_slot, + g_state.current_instruction_was_branch_taken, + g_state.current_instruction.cop.cop_n), + g_state.current_instruction_pc); } -void RaiseException(Exception excode, u32 EPC, bool BD, bool BT, u8 CE) +void RaiseException(u32 CAUSE_bits, u32 EPC) { + g_state.cop0_regs.EPC = EPC; + g_state.cop0_regs.cause.bits = (g_state.cop0_regs.cause.bits & !Cop0Registers::CAUSE::EXCEPTION_WRITE_MASK) | + (CAUSE_bits & Cop0Registers::CAUSE::EXCEPTION_WRITE_MASK); + #ifdef _DEBUG - if (excode != Exception::INT && excode != Exception::Syscall && excode != Exception::BP) + if (g_state.cop0_regs.cause.Excode != Exception::INT && g_state.cop0_regs.cause.Excode != Exception::Syscall && + g_state.cop0_regs.cause.Excode != Exception::BP) { - Log_DebugPrintf("Exception %u at 0x%08X (epc=0x%08X, BD=%s, CE=%u)", static_cast(excode), - g_state.current_instruction_pc, EPC, BD ? "true" : "false", ZeroExtend32(CE)); + Log_DebugPrintf("Exception %u at 0x%08X (epc=0x%08X, BD=%s, CE=%u)", + static_cast(g_state.cop0_regs.cause.Excode.GetValue()), g_state.current_instruction_pc, + g_state.cop0_regs.EPC, g_state.cop0_regs.cause.BD ? "true" : "false", + g_state.cop0_regs.cause.CE.GetValue()); DisassembleAndPrint(g_state.current_instruction_pc, 4, 0); if (LOG_EXECUTION) { - CPU::WriteToExecutionLog("Exception %u at 0x%08X (epc=0x%08X, BD=%s, CE=%u)\n", static_cast(excode), - g_state.current_instruction_pc, EPC, BD ? "true" : "false", ZeroExtend32(CE)); + CPU::WriteToExecutionLog("Exception %u at 0x%08X (epc=0x%08X, BD=%s, CE=%u)\n", + static_cast(g_state.cop0_regs.cause.Excode.GetValue()), + g_state.current_instruction_pc, g_state.cop0_regs.EPC, + g_state.cop0_regs.cause.BD ? "true" : "false", g_state.cop0_regs.cause.CE.GetValue()); } } #endif - g_state.cop0_regs.EPC = EPC; - g_state.cop0_regs.cause.Excode = excode; - g_state.cop0_regs.cause.BD = BD; - g_state.cop0_regs.cause.BT = BT; - g_state.cop0_regs.cause.CE = CE; - - if (BD) + if (g_state.cop0_regs.cause.BD) { // TAR is set to the address which was being fetched in this instruction, or the next instruction to execute if the // exception hadn't occurred in the delay slot. @@ -231,7 +224,7 @@ void RaiseException(Exception excode, u32 EPC, bool BD, bool BT, u8 CE) g_state.cop0_regs.sr.mode_bits <<= 2; // flush the pipeline - we don't want to execute the previously fetched instruction - g_state.regs.npc = GetExceptionVector(excode); + g_state.regs.npc = GetExceptionVector(g_state.cop0_regs.cause.Excode); g_state.exception_raised = true; FlushPipeline(); } @@ -268,8 +261,10 @@ void DispatchInterrupt() return; // Interrupt raising occurs before the start of the instruction. - RaiseException(Exception::INT, g_state.regs.pc, g_state.next_instruction_is_branch_delay_slot, - g_state.branch_was_taken, g_state.next_instruction.cop.cop_n); + RaiseException( + Cop0Registers::CAUSE::MakeValueForException(Exception::INT, g_state.next_instruction_is_branch_delay_slot, + g_state.branch_was_taken, g_state.next_instruction.cop.cop_n), + g_state.regs.pc); } void UpdateLoadDelay() @@ -1423,21 +1418,6 @@ bool InterpretInstruction() return g_state.exception_raised; } -void RaiseException(u32 epc, u32 ri_bits) -{ - const RaiseExceptionInfo ri{ri_bits}; - RaiseException(static_cast(ri.excode), epc, ri.BD, g_state.current_instruction_was_branch_taken, ri.CE); -} - -void RaiseAddressException(u32 address, bool store, bool branch) -{ - g_state.cop0_regs.BadVaddr = address; - if (branch) - RaiseException(Exception::AdEL, address, false, false, 0); - else - RaiseException(store ? Exception::AdES : Exception::AdEL); -} - } // namespace Recompiler::Thunks } // namespace CPU \ No newline at end of file diff --git a/src/core/cpu_core_private.h b/src/core/cpu_core_private.h new file mode 100644 index 000000000..2887a181f --- /dev/null +++ b/src/core/cpu_core_private.h @@ -0,0 +1,19 @@ +#pragma once +#include "cpu_core.h" + +namespace CPU { + +// exceptions +void RaiseException(Exception excode); +void RaiseException(u32 CAUSE_bits, u32 EPC); + +// defined in cpu_memory.cpp - memory access functions which return false if an exception was thrown. +bool FetchInstruction(); +bool ReadMemoryByte(VirtualMemoryAddress addr, u8* value); +bool ReadMemoryHalfWord(VirtualMemoryAddress addr, u16* value); +bool ReadMemoryWord(VirtualMemoryAddress addr, u32* value); +bool WriteMemoryByte(VirtualMemoryAddress addr, u8 value); +bool WriteMemoryHalfWord(VirtualMemoryAddress addr, u16 value); +bool WriteMemoryWord(VirtualMemoryAddress addr, u32 value); + +} // namespace CPU \ No newline at end of file diff --git a/src/core/cpu_recompiler_code_generator.cpp b/src/core/cpu_recompiler_code_generator.cpp index 8a949e1e9..bf161fffe 100644 --- a/src/core/cpu_recompiler_code_generator.cpp +++ b/src/core/cpu_recompiler_code_generator.cpp @@ -1,6 +1,7 @@ #include "cpu_recompiler_code_generator.h" #include "common/log.h" #include "cpu_core.h" +#include "cpu_core_private.h" #include "cpu_disasm.h" #include "gte.h" #include "pgxp.h" @@ -803,17 +804,17 @@ Value CodeGenerator::NotValue(const Value& val) void CodeGenerator::GenerateExceptionExit(const CodeBlockInstruction& cbi, Exception excode, Condition condition /* = Condition::Always */) { - const Value epc = Value::FromConstantU32(cbi.pc); - const Value ri_bits = Value::FromConstantU32(Thunks::MakeRaiseExceptionInfo(excode, cbi)); + const Value CAUSE_bits = Value::FromConstantU32( + Cop0Registers::CAUSE::MakeValueForException(excode, cbi.is_branch_delay_slot, false, cbi.instruction.cop.cop_n)); if (condition == Condition::Always) { // no need to use far code if we're always raising the exception - m_register_cache.InvalidateGuestRegister(Reg::pc); m_register_cache.FlushAllGuestRegisters(true, true); m_register_cache.FlushLoadDelay(true); - EmitFunctionCall(nullptr, &Thunks::RaiseException, epc, ri_bits); + EmitFunctionCall(nullptr, static_cast(&CPU::RaiseException), CAUSE_bits, + GetCurrentInstructionPC()); return; } @@ -825,7 +826,8 @@ void CodeGenerator::GenerateExceptionExit(const CodeBlockInstruction& cbi, Excep EmitBranch(GetCurrentFarCodePointer()); SwitchToFarCode(); - EmitFunctionCall(nullptr, &Thunks::RaiseException, epc, ri_bits); + EmitFunctionCall(nullptr, static_cast(&CPU::RaiseException), CAUSE_bits, + GetCurrentInstructionPC()); EmitExceptionExit(); SwitchToNearCode(); @@ -844,6 +846,10 @@ void CodeGenerator::BlockPrologue() m_branch_was_taken_dirty = true; m_current_instruction_was_branch_taken_dirty = false; m_load_delay_dirty = true; + + m_pc_offset = 0; + m_current_instruction_pc_offset = 0; + m_next_pc_offset = 4; } void CodeGenerator::BlockEpilogue() @@ -866,6 +872,11 @@ void CodeGenerator::InstructionPrologue(const CodeBlockInstruction& cbi, TickCou m_emit->nop(); #endif + // move instruction offsets forward + m_current_instruction_pc_offset = m_pc_offset; + m_pc_offset = m_next_pc_offset; + m_next_pc_offset += 4; + // reset dirty flags if (m_branch_was_taken_dirty) { @@ -888,13 +899,6 @@ void CodeGenerator::InstructionPrologue(const CodeBlockInstruction& cbi, TickCou m_current_instruction_in_branch_delay_slot_dirty = false; } - // increment PC, except if we're in the branch delay slot where it was just changed - if (!cbi.is_branch_delay_slot) - { - Assert(!m_register_cache.IsGuestRegisterInHostRegister(Reg::pc)); - m_register_cache.WriteGuestRegister(Reg::pc, Value::FromConstantU32(cbi.pc + 4)); - } - if (!force_sync) { // Defer updates for non-faulting instructions. @@ -910,7 +914,6 @@ void CodeGenerator::InstructionPrologue(const CodeBlockInstruction& cbi, TickCou } m_delayed_cycles_add += cycles; - SetCurrentInstructionPC(cbi); AddPendingCycles(true); } @@ -948,9 +951,47 @@ void CodeGenerator::AddPendingCycles(bool commit) m_delayed_cycles_add = 0; } -void CodeGenerator::SetCurrentInstructionPC(const CodeBlockInstruction& cbi) +Value CodeGenerator::CalculatePC(u32 offset /* = 0 */) { - EmitStoreCPUStructField(offsetof(State, current_instruction_pc), Value::FromConstantU32(cbi.pc)); + Value value = m_register_cache.AllocateScratch(RegSize_32); + EmitLoadGuestRegister(value.GetHostRegister(), Reg::pc); + + const u32 apply_offset = m_pc_offset + offset; + if (apply_offset > 0) + EmitAdd(value.GetHostRegister(), value.GetHostRegister(), Value::FromConstantU32(apply_offset), false); + + return value; +} + +Value CodeGenerator::GetCurrentInstructionPC(u32 offset /* = 0 */) +{ + Value value = m_register_cache.AllocateScratch(RegSize_32); + EmitLoadCPUStructField(value.GetHostRegister(), RegSize_32, offsetof(State, current_instruction_pc)); + + const u32 apply_offset = m_current_instruction_pc_offset + offset; + if (apply_offset > 0) + EmitAdd(value.GetHostRegister(), value.GetHostRegister(), Value::FromConstantU32(apply_offset), false); + + return value; +} + +void CodeGenerator::UpdateCurrentInstructionPC(bool commit) +{ + if (m_current_instruction_pc_offset > 0) + { + EmitAddCPUStructField(offsetof(State, current_instruction_pc), + Value::FromConstantU32(m_current_instruction_pc_offset)); + + if (commit) + m_current_instruction_pc_offset = 0; + } +} + +void CodeGenerator::WriteNewPC(const Value& value) +{ + // TODO: This _could_ be moved into the register cache, but would it gain anything? + EmitStoreGuestRegister(Reg::pc, value); + m_next_pc_offset = 0; } bool CodeGenerator::Compile_Fallback(const CodeBlockInstruction& cbi) @@ -1558,24 +1599,20 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi) m_register_cache.FlushGuestRegister(lr_reg, false, true); // compute return address, which is also set as the new pc when the branch isn't taken - Value new_pc; + Value next_pc; if (condition != Condition::Always || lr_reg != Reg::count) - { - new_pc = AddValues(m_register_cache.ReadGuestRegister(Reg::pc), Value::FromConstantU32(4), false); - if (!new_pc.IsInHostRegister()) - new_pc = GetValueInHostRegister(new_pc); - } + next_pc = CalculatePC(4); - LabelType skip_branch; + LabelType branch_not_taken; if (condition != Condition::Always) { // condition is inverted because we want the case for skipping it if (lhs.IsValid() && rhs.IsValid()) - EmitConditionalBranch(condition, true, lhs.host_reg, rhs, &skip_branch); + EmitConditionalBranch(condition, true, lhs.host_reg, rhs, &branch_not_taken); else if (lhs.IsValid()) - EmitConditionalBranch(condition, true, lhs.host_reg, lhs.size, &skip_branch); + EmitConditionalBranch(condition, true, lhs.host_reg, lhs.size, &branch_not_taken); else - EmitConditionalBranch(condition, true, &skip_branch); + EmitConditionalBranch(condition, true, &branch_not_taken); } // save the old PC if we want to @@ -1584,7 +1621,7 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi) // Can't cache because we have two branches. Load delay cancel is due to the immediate flush afterwards, // if we don't cancel it, at the end of the instruction the value we write can be overridden. EmitCancelInterpreterLoadDelayForReg(lr_reg); - EmitStoreGuestRegister(lr_reg, new_pc); + EmitStoreGuestRegister(lr_reg, next_pc); } // we don't need to test the address of constant branches unless they're definitely misaligned, which would be @@ -1611,28 +1648,33 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi) EmitBindLabel(&branch_okay); SwitchToFarCode(); - EmitFunctionCall(nullptr, &Thunks::RaiseAddressException, branch_target, Value::FromConstantU8(0), - Value::FromConstantU8(1)); + EmitStoreCPUStructField(offsetof(State, cop0_regs.BadVaddr), branch_target); + EmitFunctionCall( + nullptr, static_cast(&CPU::RaiseException), + Value::FromConstantU32(Cop0Registers::CAUSE::MakeValueForException(Exception::AdEL, false, false, 0)), + branch_target); EmitExceptionExit(); SwitchToNearCode(); m_register_cache.PopState(); } - // branch taken path - change the return address/new pc if (condition != Condition::Always) - EmitCopyValue(new_pc.GetHostRegister(), branch_target); + { + // branch taken path - modify the next pc + EmitCopyValue(next_pc.GetHostRegister(), branch_target); - // converge point - EmitBindLabel(&skip_branch); - - // update pc - if (condition != Condition::Always) - m_register_cache.WriteGuestRegister(Reg::pc, std::move(new_pc)); + // converge point + EmitBindLabel(&branch_not_taken); + WriteNewPC(next_pc); + } else - m_register_cache.WriteGuestRegister(Reg::pc, std::move(branch_target)); + { + // next_pc is not used for unconditional branches + WriteNewPC(branch_target); + } - // now invalidate lr becuase it was possibly written in the branch, and we don't need branch_target anymore + // now invalidate lr becuase it was possibly written in the branch if (lr_reg != Reg::count && lr_reg != Reg::zero) m_register_cache.InvalidateGuestRegister(lr_reg); }; @@ -1645,9 +1687,8 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi) case InstructionOp::jal: { // npc = (pc & 0xF0000000) | (target << 2) - Value branch_target = - OrValues(AndValues(m_register_cache.ReadGuestRegister(Reg::pc), Value::FromConstantU32(0xF0000000)), - Value::FromConstantU32(cbi.instruction.j.target << 2)); + Value branch_target = OrValues(AndValues(CalculatePC(), Value::FromConstantU32(0xF0000000)), + Value::FromConstantU32(cbi.instruction.j.target << 2)); DoBranch(Condition::Always, Value(), Value(), (cbi.instruction.op == InstructionOp::jal) ? Reg::ra : Reg::count, std::move(branch_target)); @@ -1682,8 +1723,7 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi) case InstructionOp::bne: { // npc = pc + (sext(imm) << 2) - Value branch_target = AddValues(m_register_cache.ReadGuestRegister(Reg::pc), - Value::FromConstantU32(cbi.instruction.i.imm_sext32() << 2), false); + Value branch_target = CalculatePC(cbi.instruction.i.imm_sext32() << 2); // branch <- rs op rt Value lhs = m_register_cache.ReadGuestRegister(cbi.instruction.i.rs, true, true); @@ -1697,8 +1737,7 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi) case InstructionOp::blez: { // npc = pc + (sext(imm) << 2) - Value branch_target = AddValues(m_register_cache.ReadGuestRegister(Reg::pc), - Value::FromConstantU32(cbi.instruction.i.imm_sext32() << 2), false); + Value branch_target = CalculatePC(cbi.instruction.i.imm_sext32() << 2); // branch <- rs op 0 Value lhs = m_register_cache.ReadGuestRegister(cbi.instruction.i.rs, true, true); @@ -1712,8 +1751,7 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi) case InstructionOp::b: { // npc = pc + (sext(imm) << 2) - Value branch_target = AddValues(m_register_cache.ReadGuestRegister(Reg::pc), - Value::FromConstantU32(cbi.instruction.i.imm_sext32() << 2), false); + Value branch_target = CalculatePC(cbi.instruction.i.imm_sext32() << 2); const u8 rt = static_cast(cbi.instruction.i.rt.GetValue()); const bool bgez = ConvertToBoolUnchecked(rt & u8(1)); @@ -1727,8 +1765,7 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi) if (link) { EmitCancelInterpreterLoadDelayForReg(Reg::ra); - m_register_cache.WriteGuestRegister( - Reg::ra, AddValues(m_register_cache.ReadGuestRegister(Reg::pc), Value::FromConstantU32(4), false)); + m_register_cache.WriteGuestRegister(Reg::ra, CalculatePC(4)); } DoBranch(condition, lhs, Value(), Reg::count, std::move(branch_target)); diff --git a/src/core/cpu_recompiler_code_generator.h b/src/core/cpu_recompiler_code_generator.h index c5b79edad..9e0dca5b4 100644 --- a/src/core/cpu_recompiler_code_generator.h +++ b/src/core/cpu_recompiler_code_generator.h @@ -41,7 +41,8 @@ public: void EmitSub(HostReg to_reg, HostReg from_reg, const Value& value, bool set_flags); void EmitCmp(HostReg to_reg, const Value& value); void EmitMul(HostReg to_reg_hi, HostReg to_reg_lo, const Value& lhs, const Value& rhs, bool signed_multiply); - void EmitDiv(HostReg to_reg_quotient, HostReg to_reg_remainder, HostReg num, HostReg denom, RegSize size, bool signed_divide); + void EmitDiv(HostReg to_reg_quotient, HostReg to_reg_remainder, HostReg num, HostReg denom, RegSize size, + bool signed_divide); void EmitInc(HostReg to_reg, RegSize size); void EmitDec(HostReg to_reg, RegSize size); void EmitShl(HostReg to_reg, HostReg from_reg, RegSize size, const Value& amount_value); @@ -167,9 +168,14 @@ private: void BlockEpilogue(); void InstructionPrologue(const CodeBlockInstruction& cbi, TickCount cycles, bool force_sync = false); void InstructionEpilogue(const CodeBlockInstruction& cbi); - void SetCurrentInstructionPC(const CodeBlockInstruction& cbi); void AddPendingCycles(bool commit); + Value CalculatePC(u32 offset = 0); + void CalculatePC(Value* dest_value, u32 offset = 0); + Value GetCurrentInstructionPC(u32 offset = 0); + void UpdateCurrentInstructionPC(bool commit); + void WriteNewPC(const Value& value); + Value DoGTERegisterRead(u32 index); void DoGTERegisterWrite(u32 index, const Value& value); @@ -204,6 +210,9 @@ private: CodeEmitter* m_emit; TickCount m_delayed_cycles_add = 0; + TickCount m_pc_offset = 0; + TickCount m_current_instruction_pc_offset = 0; + TickCount m_next_pc_offset = 0; // whether various flags need to be reset. bool m_current_instruction_in_branch_delay_slot_dirty = false; diff --git a/src/core/cpu_recompiler_code_generator_aarch64.cpp b/src/core/cpu_recompiler_code_generator_aarch64.cpp index 457c8700e..8dd93e73c 100644 --- a/src/core/cpu_recompiler_code_generator_aarch64.cpp +++ b/src/core/cpu_recompiler_code_generator_aarch64.cpp @@ -2,6 +2,7 @@ #include "common/assert.h" #include "common/log.h" #include "cpu_core.h" +#include "cpu_core_private.h" #include "cpu_recompiler_code_generator.h" #include "cpu_recompiler_thunks.h" Log_SetChannel(CPU::Recompiler); @@ -187,9 +188,6 @@ void CodeGenerator::EmitEndBlock() void CodeGenerator::EmitExceptionExit() { - // toss away our PC value since we're jumping to the exception handler - m_register_cache.InvalidateGuestRegister(Reg::pc); - // ensure all unflushed registers are written back m_register_cache.FlushAllGuestRegisters(false, false); @@ -1268,7 +1266,6 @@ void CodeGenerator::EmitAddCPUStructField(u32 offset, const Value& value) Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const Value& address, RegSize size) { - const Value pc = Value::FromConstantU32(cbi.pc); AddPendingCycles(true); // We need to use the full 64 bits here since we test the sign bit result. @@ -1278,15 +1275,15 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const switch (size) { case RegSize_8: - EmitFunctionCall(&result, &Thunks::ReadMemoryByte, pc, address); + EmitFunctionCall(&result, &Thunks::ReadMemoryByte, address); break; case RegSize_16: - EmitFunctionCall(&result, &Thunks::ReadMemoryHalfWord, pc, address); + EmitFunctionCall(&result, &Thunks::ReadMemoryHalfWord, address); break; case RegSize_32: - EmitFunctionCall(&result, &Thunks::ReadMemoryWord, pc, address); + EmitFunctionCall(&result, &Thunks::ReadMemoryWord, address); break; default: @@ -1303,6 +1300,15 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const // load exception path SwitchToFarCode(); + + // cause_bits = (-result << 2) | BD | cop_n + m_emit->neg(GetHostReg32(result.host_reg), GetHostReg32(result.host_reg)); + m_emit->lsl(GetHostReg32(result.host_reg), GetHostReg32(result.host_reg), 2); + EmitOr(result.host_reg, result.host_reg, + Value::FromConstantU32(Cop0Registers::CAUSE::MakeValueForException( + static_cast(0), cbi.is_branch_delay_slot, false, cbi.instruction.cop.cop_n))); + EmitFunctionCall(nullptr, static_cast(&CPU::RaiseException), result, GetCurrentInstructionPC()); + EmitExceptionExit(); SwitchToNearCode(); @@ -1333,23 +1339,22 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const Value& address, const Value& value) { - const Value pc = Value::FromConstantU32(cbi.pc); AddPendingCycles(true); - Value result = m_register_cache.AllocateScratch(RegSize_8); + Value result = m_register_cache.AllocateScratch(RegSize_32); switch (value.size) { case RegSize_8: - EmitFunctionCall(&result, &Thunks::WriteMemoryByte, pc, address, value); + EmitFunctionCall(&result, &Thunks::WriteMemoryByte, address, value); break; case RegSize_16: - EmitFunctionCall(&result, &Thunks::WriteMemoryHalfWord, pc, address, value); + EmitFunctionCall(&result, &Thunks::WriteMemoryHalfWord, address, value); break; case RegSize_32: - EmitFunctionCall(&result, &Thunks::WriteMemoryWord, pc, address, value); + EmitFunctionCall(&result, &Thunks::WriteMemoryWord, address, value); break; default: @@ -1360,12 +1365,20 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const m_register_cache.PushState(); a64::Label store_okay; - m_emit->Cbnz(GetHostReg64(result.host_reg), &store_okay); + m_emit->Cbz(GetHostReg64(result.host_reg), &store_okay); EmitBranch(GetCurrentFarCodePointer()); m_emit->Bind(&store_okay); // store exception path SwitchToFarCode(); + + // cause_bits = (result << 2) | BD | cop_n + m_emit->lsl(GetHostReg32(result.host_reg), GetHostReg32(result.host_reg), 2); + EmitOr(result.host_reg, result.host_reg, + Value::FromConstantU32(Cop0Registers::CAUSE::MakeValueForException( + static_cast(0), cbi.is_branch_delay_slot, false, cbi.instruction.cop.cop_n))); + EmitFunctionCall(nullptr, static_cast(&CPU::RaiseException), result, GetCurrentInstructionPC()); + EmitExceptionExit(); SwitchToNearCode(); diff --git a/src/core/cpu_recompiler_code_generator_x64.cpp b/src/core/cpu_recompiler_code_generator_x64.cpp index 67c79225f..f620872ca 100644 --- a/src/core/cpu_recompiler_code_generator_x64.cpp +++ b/src/core/cpu_recompiler_code_generator_x64.cpp @@ -1,5 +1,6 @@ #include "common/align.h" #include "cpu_core.h" +#include "cpu_core_private.h" #include "cpu_recompiler_code_generator.h" #include "cpu_recompiler_thunks.h" @@ -203,9 +204,6 @@ void CodeGenerator::EmitExceptionExit() { AddPendingCycles(false); - // toss away our PC value since we're jumping to the exception handler - m_register_cache.InvalidateGuestRegister(Reg::pc); - // ensure all unflushed registers are written back m_register_cache.FlushAllGuestRegisters(false, false); @@ -1739,7 +1737,6 @@ void CodeGenerator::EmitAddCPUStructField(u32 offset, const Value& value) Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const Value& address, RegSize size) { - const Value pc = Value::FromConstantU32(cbi.pc); AddPendingCycles(true); // We need to use the full 64 bits here since we test the sign bit result. @@ -1749,15 +1746,15 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const switch (size) { case RegSize_8: - EmitFunctionCall(&result, &Thunks::ReadMemoryByte, pc, address); + EmitFunctionCall(&result, &Thunks::ReadMemoryByte, address); break; case RegSize_16: - EmitFunctionCall(&result, &Thunks::ReadMemoryHalfWord, pc, address); + EmitFunctionCall(&result, &Thunks::ReadMemoryHalfWord, address); break; case RegSize_32: - EmitFunctionCall(&result, &Thunks::ReadMemoryWord, pc, address); + EmitFunctionCall(&result, &Thunks::ReadMemoryWord, address); break; default: @@ -1772,6 +1769,15 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const // load exception path SwitchToFarCode(); + + // cause_bits = (-result << 2) | BD | cop_n + m_emit->neg(GetHostReg32(result.host_reg)); + m_emit->shl(GetHostReg32(result.host_reg), 2); + m_emit->or_(GetHostReg32(result.host_reg), + Cop0Registers::CAUSE::MakeValueForException(static_cast(0), cbi.is_branch_delay_slot, false, + cbi.instruction.cop.cop_n)); + EmitFunctionCall(nullptr, static_cast(&CPU::RaiseException), result, GetCurrentInstructionPC()); + EmitExceptionExit(); SwitchToNearCode(); @@ -1802,23 +1808,22 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const Value& address, const Value& value) { - const Value pc = Value::FromConstantU32(cbi.pc); AddPendingCycles(true); - Value result = m_register_cache.AllocateScratch(RegSize_8); + Value result = m_register_cache.AllocateScratch(RegSize_32); switch (value.size) { case RegSize_8: - EmitFunctionCall(&result, &Thunks::WriteMemoryByte, pc, address, value); + EmitFunctionCall(&result, &Thunks::WriteMemoryByte, address, value); break; case RegSize_16: - EmitFunctionCall(&result, &Thunks::WriteMemoryHalfWord, pc, address, value); + EmitFunctionCall(&result, &Thunks::WriteMemoryHalfWord, address, value); break; case RegSize_32: - EmitFunctionCall(&result, &Thunks::WriteMemoryWord, pc, address, value); + EmitFunctionCall(&result, &Thunks::WriteMemoryWord, address, value); break; default: @@ -1828,11 +1833,19 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const m_register_cache.PushState(); - m_emit->test(GetHostReg8(result), GetHostReg8(result)); - m_emit->jz(GetCurrentFarCodePointer()); + m_emit->test(GetHostReg32(result), GetHostReg32(result)); + m_emit->jnz(GetCurrentFarCodePointer()); // store exception path SwitchToFarCode(); + + // cause_bits = (result << 2) | BD | cop_n + m_emit->shl(GetHostReg32(result.host_reg), 2); + m_emit->or_(GetHostReg32(result.host_reg), + Cop0Registers::CAUSE::MakeValueForException(static_cast(0), cbi.is_branch_delay_slot, false, + cbi.instruction.cop.cop_n)); + EmitFunctionCall(nullptr, static_cast(&CPU::RaiseException), result, GetCurrentInstructionPC()); + EmitExceptionExit(); SwitchToNearCode(); diff --git a/src/core/cpu_recompiler_thunks.h b/src/core/cpu_recompiler_thunks.h index 8aeaef620..2ea1817dd 100644 --- a/src/core/cpu_recompiler_thunks.h +++ b/src/core/cpu_recompiler_thunks.h @@ -7,44 +7,20 @@ struct CodeBlockInstruction; namespace Recompiler::Thunks { -union RaiseExceptionInfo -{ - u32 bits; - - struct - { - u8 excode; - bool BD; - u8 CE; - u8 unused; - }; -}; - -ALWAYS_INLINE u32 MakeRaiseExceptionInfo(Exception excode, const CodeBlockInstruction& cbi) -{ - RaiseExceptionInfo ri = {}; - ri.excode = static_cast(excode); - ri.BD = cbi.is_branch_delay_slot; - ri.CE = cbi.instruction.cop.cop_n; - return ri.bits; -} - ////////////////////////////////////////////////////////////////////////// // Trampolines for calling back from the JIT // Needed because we can't cast member functions to void*... // TODO: Abuse carry flag or something else for exception ////////////////////////////////////////////////////////////////////////// bool InterpretInstruction(); -void RaiseException(u32 epc, u32 ri_bits); -void RaiseAddressException(u32 address, bool store, bool branch); // Memory access functions for the JIT - MSB is set on exception. -u64 ReadMemoryByte(u32 pc, u32 address); -u64 ReadMemoryHalfWord(u32 pc, u32 address); -u64 ReadMemoryWord(u32 pc, u32 address); -bool WriteMemoryByte(u32 pc, u32 address, u8 value); -bool WriteMemoryHalfWord(u32 pc, u32 address, u16 value); -bool WriteMemoryWord(u32 pc, u32 address, u32 value); +u64 ReadMemoryByte(u32 address); +u64 ReadMemoryHalfWord(u32 address); +u64 ReadMemoryWord(u32 address); +u32 WriteMemoryByte(u32 address, u8 value); +u32 WriteMemoryHalfWord(u32 address, u16 value); +u32 WriteMemoryWord(u32 address, u32 value); } // namespace Recompiler::Thunks diff --git a/src/core/cpu_types.h b/src/core/cpu_types.h index 48ed2f19d..48021af6d 100644 --- a/src/core/cpu_types.h +++ b/src/core/cpu_types.h @@ -352,6 +352,17 @@ struct Cop0Registers BitField BD; // exception occurred in branch delay slot, but pushed IP is for branch static constexpr u32 WRITE_MASK = 0b0000'0000'0000'0000'0000'0011'0000'0000; + static constexpr u32 EXCEPTION_WRITE_MASK = 0b1111'0000'0000'0000'0000'0000'0111'1100; + + static u32 MakeValueForException(Exception excode, bool BD, bool BT, u8 CE) + { + CAUSE c = {}; + c.Excode = excode; + c.BD = BD; + c.BT = BT; + c.CE = CE; + return c.bits; + } } cause; union DCIC