diff --git a/src/core/cpu_recompiler_code_generator.cpp b/src/core/cpu_recompiler_code_generator.cpp index 0291e6287..ad8b394ec 100644 --- a/src/core/cpu_recompiler_code_generator.cpp +++ b/src/core/cpu_recompiler_code_generator.cpp @@ -91,6 +91,15 @@ bool CodeGenerator::CompileInstruction(const CodeBlockInstruction& cbi) result = Compile_Store(cbi); break; + case InstructionOp::j: + case InstructionOp::jal: + case InstructionOp::beq: + case InstructionOp::bne: + case InstructionOp::bgtz: + case InstructionOp::blez: + result = Compile_Branch(cbi); + break; + case InstructionOp::lui: result = Compile_lui(cbi); break; @@ -127,6 +136,11 @@ bool CodeGenerator::CompileInstruction(const CodeBlockInstruction& cbi) result = Compile_Multiply(cbi); break; + case InstructionFunct::jr: + case InstructionFunct::jalr: + result = Compile_Branch(cbi); + break; + default: result = Compile_Fallback(cbi); break; @@ -998,6 +1012,87 @@ bool CodeGenerator::Compile_Multiply(const CodeBlockInstruction& cbi) return true; } +bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi) +{ + // Force sync since we branches are PC-relative. + InstructionPrologue(cbi, 1, true); + + // Compute the branch target. + // This depends on the form of the instruction. + switch (cbi.instruction.op) + { + case InstructionOp::j: + case InstructionOp::jal: + { + // npc = (pc & 0xF0000000) | (target << 2) + Value branch_target = + OrValues(AndValues(m_register_cache.ReadGuestRegister(Reg::pc, false), Value::FromConstantU32(0xF0000000)), + Value::FromConstantU32(cbi.instruction.j.target << 2)); + + EmitBranch(Condition::Always, (cbi.instruction.op == InstructionOp::jal) ? Reg::ra : Reg::count, + std::move(branch_target)); + } + break; + + case InstructionOp::funct: + { + Assert(cbi.instruction.r.funct == InstructionFunct::jr || cbi.instruction.r.funct == InstructionFunct::jalr); + + // npc = rs, link to rt + Value branch_target = m_register_cache.ReadGuestRegister(cbi.instruction.r.rs); + EmitBranch(Condition::Always, + (cbi.instruction.r.funct == InstructionFunct::jalr) ? cbi.instruction.r.rd : Reg::count, + std::move(branch_target)); + } + break; + + case InstructionOp::beq: + case InstructionOp::bne: + case InstructionOp::bgtz: + case InstructionOp::blez: + { + // npc = pc + (sext(imm) << 2) + Value branch_target = AddValues(m_register_cache.ReadGuestRegister(Reg::pc, false), + Value::FromConstantU32(cbi.instruction.i.imm_sext32() << 2)); + + // branch <- rs op rt + Value lhs = m_register_cache.ReadGuestRegister(cbi.instruction.i.rs, true, true); + Value rhs = m_register_cache.ReadGuestRegister(cbi.instruction.i.rt); + EmitCmp(lhs.host_reg, rhs); + + Condition condition; + switch (cbi.instruction.op) + { + case InstructionOp::beq: + condition = Condition::Equal; + break; + case InstructionOp::bne: + condition = Condition::NotEqual; + break; + case InstructionOp::bgtz: + condition = Condition::GreaterThanZero; + break; + case InstructionOp::blez: + condition = Condition::LessOrEqualToZero; + break; + default: + condition = Condition::Always; + break; + } + + EmitBranch(condition, Reg::count, std::move(branch_target)); + } + break; + + default: + UnreachableCode(); + break; + } + + InstructionEpilogue(cbi); + return true; +} + bool CodeGenerator::Compile_lui(const CodeBlockInstruction& cbi) { InstructionPrologue(cbi, 1); diff --git a/src/core/cpu_recompiler_code_generator.h b/src/core/cpu_recompiler_code_generator.h index de25fb7b6..ab9ac980b 100644 --- a/src/core/cpu_recompiler_code_generator.h +++ b/src/core/cpu_recompiler_code_generator.h @@ -49,8 +49,8 @@ public: void EmitCopyValue(HostReg to_reg, const Value& value); void EmitAdd(HostReg to_reg, const Value& value); void EmitSub(HostReg to_reg, const Value& value); - void EmitMul(HostReg to_reg_hi, HostReg to_reg_lo, const Value& lhs, const Value& rhs, bool signed_multiply); void EmitCmp(HostReg to_reg, const Value& value); + void EmitMul(HostReg to_reg_hi, HostReg to_reg_lo, const Value& lhs, const Value& rhs, bool signed_multiply); void EmitInc(HostReg to_reg, RegSize size); void EmitDec(HostReg to_reg, RegSize size); void EmitShl(HostReg to_reg, RegSize size, const Value& amount_value); @@ -73,6 +73,9 @@ public: Value EmitLoadGuestMemory(const Value& address, RegSize size); void EmitStoreGuestMemory(const Value& address, const Value& value); + // Branching, generates two paths. + void EmitBranch(Condition condition, Reg lr_reg, Value&& branch_target); + u32 PrepareStackForCall(); void RestoreStackAfterCall(u32 adjust_size); @@ -173,6 +176,7 @@ private: bool Compile_Store(const CodeBlockInstruction& cbi); bool Compile_MoveHiLo(const CodeBlockInstruction& cbi); bool Compile_Multiply(const CodeBlockInstruction& cbi); + bool Compile_Branch(const CodeBlockInstruction& cbi); bool Compile_lui(const CodeBlockInstruction& cbi); bool Compile_addiu(const CodeBlockInstruction& cbi); diff --git a/src/core/cpu_recompiler_code_generator_generic.cpp b/src/core/cpu_recompiler_code_generator_generic.cpp index d9db3007f..1436cdef1 100644 --- a/src/core/cpu_recompiler_code_generator_generic.cpp +++ b/src/core/cpu_recompiler_code_generator_generic.cpp @@ -39,4 +39,11 @@ void CodeGenerator::EmitStoreLoadDelay(Reg reg, const Value& value) m_load_delay_dirty = true; } +#if !defined(Y_CPU_X64) +void CodeGenerator::EmitBranch(Condition condition, Reg lr_reg, bool relative, const Value& branch_address) +{ + Panic("Not implemented"); +} +#endif + } // namespace CPU::Recompiler \ No newline at end of file diff --git a/src/core/cpu_recompiler_code_generator_x64.cpp b/src/core/cpu_recompiler_code_generator_x64.cpp index b291bfd25..9b1c09a02 100644 --- a/src/core/cpu_recompiler_code_generator_x64.cpp +++ b/src/core/cpu_recompiler_code_generator_x64.cpp @@ -1,5 +1,7 @@ +#include "YBaseLib/Log.h" #include "cpu_recompiler_code_generator.h" #include "cpu_recompiler_thunks.h" +Log_SetChannel(CPU::Recompiler); namespace CPU::Recompiler { @@ -451,6 +453,63 @@ void CodeGenerator::EmitSub(HostReg to_reg, const Value& value) } } +void CodeGenerator::EmitCmp(HostReg to_reg, const Value& value) +{ + DebugAssert(value.IsConstant() || value.IsInHostRegister()); + + switch (value.size) + { + case RegSize_8: + { + if (value.IsConstant()) + m_emit->cmp(GetHostReg8(to_reg), SignExtend32(Truncate8(value.constant_value))); + else + m_emit->cmp(GetHostReg8(to_reg), GetHostReg8(value.host_reg)); + } + break; + + case RegSize_16: + { + if (value.IsConstant()) + m_emit->cmp(GetHostReg16(to_reg), SignExtend32(Truncate16(value.constant_value))); + else + m_emit->cmp(GetHostReg16(to_reg), GetHostReg16(value.host_reg)); + } + break; + + case RegSize_32: + { + if (value.IsConstant()) + m_emit->cmp(GetHostReg32(to_reg), Truncate32(value.constant_value)); + else + m_emit->cmp(GetHostReg32(to_reg), GetHostReg32(value.host_reg)); + } + break; + + case RegSize_64: + { + if (value.IsConstant()) + { + if (!Xbyak::inner::IsInInt32(value.constant_value)) + { + Value temp = m_register_cache.AllocateScratch(RegSize_64); + m_emit->mov(GetHostReg64(temp.host_reg), value.constant_value); + m_emit->cmp(GetHostReg64(to_reg), GetHostReg64(temp.host_reg)); + } + else + { + m_emit->cmp(GetHostReg64(to_reg), Truncate32(value.constant_value)); + } + } + else + { + m_emit->cmp(GetHostReg64(to_reg), GetHostReg64(value.host_reg)); + } + } + break; + } +} + void CodeGenerator::EmitMul(HostReg to_reg_hi, HostReg to_reg_lo, const Value& lhs, const Value& rhs, bool signed_multiply) { @@ -567,63 +626,6 @@ void CodeGenerator::EmitMul(HostReg to_reg_hi, HostReg to_reg_lo, const Value& l m_emit->pop(m_emit->rax); } -void CodeGenerator::EmitCmp(HostReg to_reg, const Value& value) -{ - DebugAssert(value.IsConstant() || value.IsInHostRegister()); - - switch (value.size) - { - case RegSize_8: - { - if (value.IsConstant()) - m_emit->cmp(GetHostReg8(to_reg), SignExtend32(Truncate8(value.constant_value))); - else - m_emit->cmp(GetHostReg8(to_reg), GetHostReg8(value.host_reg)); - } - break; - - case RegSize_16: - { - if (value.IsConstant()) - m_emit->cmp(GetHostReg16(to_reg), SignExtend32(Truncate16(value.constant_value))); - else - m_emit->cmp(GetHostReg16(to_reg), GetHostReg16(value.host_reg)); - } - break; - - case RegSize_32: - { - if (value.IsConstant()) - m_emit->cmp(GetHostReg32(to_reg), Truncate32(value.constant_value)); - else - m_emit->cmp(GetHostReg32(to_reg), GetHostReg32(value.host_reg)); - } - break; - - case RegSize_64: - { - if (value.IsConstant()) - { - if (!Xbyak::inner::IsInInt32(value.constant_value)) - { - Value temp = m_register_cache.AllocateScratch(RegSize_64); - m_emit->mov(GetHostReg64(temp.host_reg), value.constant_value); - m_emit->cmp(GetHostReg64(to_reg), GetHostReg64(temp.host_reg)); - } - else - { - m_emit->cmp(GetHostReg64(to_reg), Truncate32(value.constant_value)); - } - } - else - { - m_emit->cmp(GetHostReg64(to_reg), GetHostReg64(value.host_reg)); - } - } - break; - } -} - void CodeGenerator::EmitInc(HostReg to_reg, RegSize size) { switch (size) @@ -1599,6 +1601,94 @@ void CodeGenerator::EmitDelaySlotUpdate(bool skip_check_for_delay, bool skip_che } } +static void EmitConditionalJump(Condition condition, bool invert, Xbyak::CodeGenerator* emit, const Xbyak::Label& label) +{ + switch (condition) + { + case Condition::Always: + emit->jmp(label); + break; + + case Condition::NotEqual: + invert ? emit->je(label) : emit->jne(label); + break; + + case Condition::Equal: + invert ? emit->jne(label) : emit->je(label); + break; + + case Condition::Overflow: + invert ? emit->jno(label) : emit->jo(label); + break; + + case Condition::GreaterThanZero: + invert ? emit->jng(label) : emit->jg(label); + break; + + case Condition::LessOrEqualToZero: + invert ? emit->jnle(label) : emit->jle(label); + break; + + default: + UnreachableCode(); + break; + } +} + +void CodeGenerator::EmitBranch(Condition condition, Reg lr_reg, Value&& branch_target) +{ + Xbyak::Label skip_branch; + + // we have to always read the old PC.. when we can push/pop the register cache state this won't be needed + Value old_npc; + if (lr_reg != Reg::count) + old_npc = m_register_cache.ReadGuestRegister(Reg::npc, false, true); + + // condition is inverted because we want the case for skipping it + if (condition != Condition::Always) + EmitConditionalJump(condition, true, m_emit, skip_branch); + + // save the old PC if we want to + if (lr_reg != Reg::count) + { + // can't cache because we have two branches + m_register_cache.WriteGuestRegister(lr_reg, std::move(old_npc)); + m_register_cache.FlushGuestRegister(lr_reg, true, true); + } + + // we don't need to test the address of constant branches unless they're definitely misaligned, which would be + // strange. + if (!branch_target.IsConstant() || (branch_target.constant_value & 0x3) != 0) + { + if (branch_target.IsConstant()) + { + Log_WarningPrintf("Misaligned constant target branch 0x%08X, this is strange", + Truncate32(branch_target.constant_value)); + } + else + { + // check the alignment of the target + m_emit->test(GetHostReg32(branch_target), 0x3); + m_emit->jnz(GetCurrentFarCodePointer()); + } + + // exception exit for misaligned target + SwitchToFarCode(); + EmitFunctionCall(nullptr, &Thunks::RaiseAddressException, m_register_cache.GetCPUPtr(), branch_target, + Value::FromConstantU8(0), Value::FromConstantU8(1)); + EmitExceptionExit(); + SwitchToNearCode(); + } + + // branch taken path - write new PC and flush it, since two branches + m_register_cache.WriteGuestRegister(Reg::npc, std::move(branch_target)); + m_register_cache.FlushGuestRegister(Reg::npc, true, true); + EmitStoreCPUStructField(offsetof(Core, m_current_instruction_was_branch_taken), Value::FromConstantU8(1)); + + // converge point + m_emit->L(skip_branch); +} + #if 0 class ThunkGenerator { diff --git a/src/core/cpu_recompiler_register_cache.cpp b/src/core/cpu_recompiler_register_cache.cpp index a3ae22481..e33999896 100644 --- a/src/core/cpu_recompiler_register_cache.cpp +++ b/src/core/cpu_recompiler_register_cache.cpp @@ -334,7 +334,17 @@ Value RegisterCache::ReadGuestRegister(Reg guest_reg, bool cache /* = true */, b { // register zero is always zero if (guest_reg == Reg::zero) + { + // return a scratch value of zero if it's forced + if (force_host_register) + { + Value temp = AllocateScratch(RegSize_32, forced_host_reg); + m_code_generator.EmitXor(temp.host_reg, temp); + return temp; + } + return Value::FromConstantU32(0); + } Value& cache_value = m_guest_reg_cache[static_cast(guest_reg)]; if (cache_value.IsValid()) diff --git a/src/core/cpu_recompiler_thunks.cpp b/src/core/cpu_recompiler_thunks.cpp index c2eec38a2..77652fceb 100644 --- a/src/core/cpu_recompiler_thunks.cpp +++ b/src/core/cpu_recompiler_thunks.cpp @@ -78,4 +78,13 @@ void Thunks::UpdateLoadDelay(Core* cpu) cpu->UpdateLoadDelay(); } +void Thunks::RaiseAddressException(Core* cpu, u32 address, bool store, bool branch) +{ + cpu->m_cop0_regs.BadVaddr = address; + if (branch) + cpu->RaiseException(Exception::AdEL, address, false, false, 0); + else + cpu->RaiseException(store ? Exception::AdES : Exception::AdEL); +} + } // namespace CPU::Recompiler \ No newline at end of file diff --git a/src/core/cpu_recompiler_thunks.h b/src/core/cpu_recompiler_thunks.h index 0523d050f..e5c903d7f 100644 --- a/src/core/cpu_recompiler_thunks.h +++ b/src/core/cpu_recompiler_thunks.h @@ -21,6 +21,7 @@ public: static bool WriteMemoryWord(Core* cpu, u32 address, u32 value); static bool InterpretInstruction(Core* cpu); static void UpdateLoadDelay(Core* cpu); + static void RaiseAddressException(Core* cpu, u32 address, bool store, bool branch); }; class ASMFunctions diff --git a/src/core/cpu_recompiler_types.h b/src/core/cpu_recompiler_types.h index be802be52..b48edc46b 100644 --- a/src/core/cpu_recompiler_types.h +++ b/src/core/cpu_recompiler_types.h @@ -24,6 +24,19 @@ enum RegSize : u8 RegSize_64, }; +enum class Condition: u8 +{ + Always, + NotEqual, + Equal, + Overflow, + GreaterThanZero, + LessOrEqualToZero, + + NotZero = NotEqual, + Zero = Equal +}; + #if defined(Y_CPU_X64) using HostReg = Xbyak::Operand::Code; using CodeEmitter = Xbyak::CodeGenerator;