diff --git a/src/core/cpu_recompiler_code_generator.cpp b/src/core/cpu_recompiler_code_generator.cpp index 47fbc8afd..8a949e1e9 100644 --- a/src/core/cpu_recompiler_code_generator.cpp +++ b/src/core/cpu_recompiler_code_generator.cpp @@ -167,6 +167,14 @@ bool CodeGenerator::CompileInstruction(const CodeBlockInstruction& cbi) result = Compile_Multiply(cbi); break; + case InstructionFunct::div: + result = Compile_SignedDivide(cbi); + break; + + case InstructionFunct::divu: + result = Compile_Divide(cbi); + break; + case InstructionFunct::slt: case InstructionFunct::sltu: result = Compile_SetLess(cbi); @@ -1317,6 +1325,186 @@ bool CodeGenerator::Compile_Multiply(const CodeBlockInstruction& cbi) return true; } +static std::tuple MIPSDivide(u32 num, u32 denom) +{ + u32 lo, hi; + + if (denom == 0) + { + // divide by zero + lo = UINT32_C(0xFFFFFFFF); + hi = static_cast(num); + } + else + { + lo = num / denom; + hi = num % denom; + } + + return std::tie(lo, hi); +} + +static std::tuple MIPSDivide(s32 num, s32 denom) +{ + s32 lo, hi; + if (denom == 0) + { + // divide by zero + lo = (num >= 0) ? UINT32_C(0xFFFFFFFF) : UINT32_C(1); + hi = static_cast(num); + } + else if (static_cast(num) == UINT32_C(0x80000000) && denom == -1) + { + // unrepresentable + lo = UINT32_C(0x80000000); + hi = 0; + } + else + { + lo = num / denom; + hi = num % denom; + } + + return std::tie(lo, hi); +} + +bool CodeGenerator::Compile_Divide(const CodeBlockInstruction& cbi) +{ + InstructionPrologue(cbi, 1); + + const bool signed_divide = (cbi.instruction.r.funct == InstructionFunct::div); + + Value num = m_register_cache.ReadGuestRegister(cbi.instruction.r.rs); + Value denom = m_register_cache.ReadGuestRegister(cbi.instruction.r.rt); + if (num.IsConstant() && denom.IsConstant()) + { + const auto [lo, hi] = MIPSDivide(static_cast(num.constant_value), static_cast(denom.constant_value)); + m_register_cache.WriteGuestRegister(Reg::lo, Value::FromConstantU32(lo)); + m_register_cache.WriteGuestRegister(Reg::hi, Value::FromConstantU32(hi)); + } + else + { + Value num_reg = GetValueInHostRegister(num, false); + Value denom_reg = GetValueInHostRegister(denom, false); + + m_register_cache.InvalidateGuestRegister(Reg::lo); + m_register_cache.InvalidateGuestRegister(Reg::hi); + + Value lo = m_register_cache.AllocateScratch(RegSize_32); + Value hi = m_register_cache.AllocateScratch(RegSize_32); + + LabelType do_divide, done; + + if (!denom.IsConstant() || denom.HasConstantValue(0)) + { + // if (denom == 0) + EmitConditionalBranch(Condition::NotEqual, false, denom_reg.GetHostRegister(), Value::FromConstantU32(0), + &do_divide); + { + // unrepresentable + EmitCopyValue(lo.GetHostRegister(), Value::FromConstantU32(0xFFFFFFFF)); + EmitCopyValue(hi.GetHostRegister(), num_reg); + EmitBranch(&done); + } + } + + // else + { + EmitBindLabel(&do_divide); + EmitDiv(lo.GetHostRegister(), hi.GetHostRegister(), num_reg.GetHostRegister(), denom_reg.GetHostRegister(), + RegSize_32, false); + } + + EmitBindLabel(&done); + + m_register_cache.WriteGuestRegister(Reg::lo, std::move(lo)); + m_register_cache.WriteGuestRegister(Reg::hi, std::move(hi)); + } + + InstructionEpilogue(cbi); + return true; +} + +bool CodeGenerator::Compile_SignedDivide(const CodeBlockInstruction& cbi) +{ + InstructionPrologue(cbi, 1); + + Value num = m_register_cache.ReadGuestRegister(cbi.instruction.r.rs); + Value denom = m_register_cache.ReadGuestRegister(cbi.instruction.r.rt); + if (num.IsConstant() && denom.IsConstant()) + { + const auto [lo, hi] = MIPSDivide(num.GetS32ConstantValue(), denom.GetS32ConstantValue()); + m_register_cache.WriteGuestRegister(Reg::lo, Value::FromConstantU32(static_cast(lo))); + m_register_cache.WriteGuestRegister(Reg::hi, Value::FromConstantU32(static_cast(hi))); + } + else + { + Value num_reg = GetValueInHostRegister(num, false); + Value denom_reg = GetValueInHostRegister(denom, false); + + m_register_cache.InvalidateGuestRegister(Reg::lo); + m_register_cache.InvalidateGuestRegister(Reg::hi); + + Value lo = m_register_cache.AllocateScratch(RegSize_32); + Value hi = m_register_cache.AllocateScratch(RegSize_32); + + // we need this in a register on ARM because it won't fit in an immediate + EmitCopyValue(lo.GetHostRegister(), Value::FromConstantU32(0x80000000u)); + + LabelType do_divide, done; + + LabelType not_zero; + if (!denom.IsConstant() || denom.HasConstantValue(0)) + { + // if (denom == 0) + EmitConditionalBranch(Condition::NotEqual, false, denom_reg.GetHostRegister(), Value::FromConstantU32(0), + ¬_zero); + { + // hi = static_cast(num); + EmitCopyValue(hi.GetHostRegister(), num_reg); + + // lo = (num >= 0) ? UINT32_C(0xFFFFFFFF) : UINT32_C(1); + LabelType greater_equal_zero; + EmitConditionalBranch(Condition::GreaterEqual, false, num_reg.GetHostRegister(), Value::FromConstantU32(0), + &greater_equal_zero); + EmitCopyValue(lo.GetHostRegister(), Value::FromConstantU32(1)); + EmitBranch(&done); + EmitBindLabel(&greater_equal_zero); + EmitCopyValue(lo.GetHostRegister(), Value::FromConstantU32(0xFFFFFFFFu)); + EmitBranch(&done); + } + } + + // else if (static_cast(num) == UINT32_C(0x80000000) && denom == -1) + { + EmitBindLabel(¬_zero); + EmitConditionalBranch(Condition::NotEqual, false, denom_reg.GetHostRegister(), + Value::FromConstantU32(0xFFFFFFFFu), &do_divide); + EmitConditionalBranch(Condition::NotEqual, false, num_reg.GetHostRegister(), lo, &do_divide); + + // unrepresentable + // EmitCopyValue(lo.GetHostRegister(), Value::FromConstantU32(0x80000000u)); // done above + EmitCopyValue(hi.GetHostRegister(), Value::FromConstantU32(0)); + EmitBranch(&done); + } + + // else + { + EmitBindLabel(&do_divide); + EmitDiv(lo.GetHostRegister(), hi.GetHostRegister(), num_reg.GetHostRegister(), denom_reg.GetHostRegister(), + RegSize_32, true); + } + + EmitBindLabel(&done); + + m_register_cache.WriteGuestRegister(Reg::lo, std::move(lo)); + m_register_cache.WriteGuestRegister(Reg::hi, std::move(hi)); + } + + InstructionEpilogue(cbi); + return true; +} + bool CodeGenerator::Compile_SetLess(const CodeBlockInstruction& cbi) { InstructionPrologue(cbi, 1); diff --git a/src/core/cpu_recompiler_code_generator.h b/src/core/cpu_recompiler_code_generator.h index d0b9b4236..c5b79edad 100644 --- a/src/core/cpu_recompiler_code_generator.h +++ b/src/core/cpu_recompiler_code_generator.h @@ -41,6 +41,7 @@ public: void EmitSub(HostReg to_reg, HostReg from_reg, const Value& value, bool set_flags); void EmitCmp(HostReg to_reg, const Value& value); void EmitMul(HostReg to_reg_hi, HostReg to_reg_lo, const Value& lhs, const Value& rhs, bool signed_multiply); + void EmitDiv(HostReg to_reg_quotient, HostReg to_reg_remainder, HostReg num, HostReg denom, RegSize size, bool signed_divide); void EmitInc(HostReg to_reg, RegSize size); void EmitDec(HostReg to_reg, RegSize size); void EmitShl(HostReg to_reg, HostReg from_reg, RegSize size, const Value& amount_value); @@ -71,6 +72,7 @@ public: // Unconditional branch to pointer. May allocate a scratch register. void EmitBranch(const void* address, bool allow_scratch = true); + void EmitBranch(LabelType* label); // Branching, generates two paths. void EmitConditionalBranch(Condition condition, bool invert, HostReg value, RegSize size, LabelType* label); @@ -149,7 +151,7 @@ private: Value ConvertValueSize(const Value& value, RegSize size, bool sign_extend); void ConvertValueSizeInPlace(Value* value, RegSize size, bool sign_extend); - Value GetValueInHostRegister(const Value& value); + Value GetValueInHostRegister(const Value& value, bool allow_zero_register = true); void SwitchToFarCode(); void SwitchToNearCode(); @@ -184,6 +186,8 @@ private: bool Compile_Add(const CodeBlockInstruction& cbi); bool Compile_Subtract(const CodeBlockInstruction& cbi); bool Compile_Multiply(const CodeBlockInstruction& cbi); + bool Compile_Divide(const CodeBlockInstruction& cbi); + bool Compile_SignedDivide(const CodeBlockInstruction& cbi); bool Compile_SetLess(const CodeBlockInstruction& cbi); bool Compile_Branch(const CodeBlockInstruction& cbi); bool Compile_lui(const CodeBlockInstruction& cbi); diff --git a/src/core/cpu_recompiler_code_generator_aarch64.cpp b/src/core/cpu_recompiler_code_generator_aarch64.cpp index eb0e2431f..457c8700e 100644 --- a/src/core/cpu_recompiler_code_generator_aarch64.cpp +++ b/src/core/cpu_recompiler_code_generator_aarch64.cpp @@ -23,7 +23,10 @@ constexpr u64 FUNCTION_CALLER_SAVED_SPACE_RESERVE = 144; // 18 registers -> 224 constexpr u64 FUNCTION_STACK_SIZE = FUNCTION_CALLEE_SAVED_SPACE_RESERVE + FUNCTION_CALLER_SAVED_SPACE_RESERVE + FUNCTION_CALL_SHADOW_SPACE; -static const a64::WRegister GetHostReg8(HostReg reg) { return a64::WRegister(reg); } +static const a64::WRegister GetHostReg8(HostReg reg) +{ + return a64::WRegister(reg); +} static const a64::WRegister GetHostReg8(const Value& value) { @@ -31,7 +34,10 @@ static const a64::WRegister GetHostReg8(const Value& value) return a64::WRegister(value.host_reg); } -static const a64::WRegister GetHostReg16(HostReg reg) { return a64::WRegister(reg); } +static const a64::WRegister GetHostReg16(HostReg reg) +{ + return a64::WRegister(reg); +} static const a64::WRegister GetHostReg16(const Value& value) { @@ -39,7 +45,10 @@ static const a64::WRegister GetHostReg16(const Value& value) return a64::WRegister(value.host_reg); } -static const a64::WRegister GetHostReg32(HostReg reg) { return a64::WRegister(reg); } +static const a64::WRegister GetHostReg32(HostReg reg) +{ + return a64::WRegister(reg); +} static const a64::WRegister GetHostReg32(const Value& value) { @@ -47,7 +56,10 @@ static const a64::WRegister GetHostReg32(const Value& value) return a64::WRegister(value.host_reg); } -static const a64::XRegister GetHostReg64(HostReg reg) { return a64::XRegister(reg); } +static const a64::XRegister GetHostReg64(HostReg reg) +{ + return a64::XRegister(reg); +} static const a64::XRegister GetHostReg64(const Value& value) { @@ -55,7 +67,10 @@ static const a64::XRegister GetHostReg64(const Value& value) return a64::XRegister(value.host_reg); } -static const a64::XRegister GetCPUPtrReg() { return GetHostReg64(RCPUPTR); } +static const a64::XRegister GetCPUPtrReg() +{ + return GetHostReg64(RCPUPTR); +} CodeGenerator::CodeGenerator(JitCodeBuffer* code_buffer) : m_code_buffer(code_buffer), m_register_cache(*this), @@ -98,7 +113,10 @@ const char* CodeGenerator::GetHostRegName(HostReg reg, RegSize size /*= HostPoin } } -void CodeGenerator::AlignCodeBuffer(JitCodeBuffer* code_buffer) { code_buffer->Align(16, 0x90); } +void CodeGenerator::AlignCodeBuffer(JitCodeBuffer* code_buffer) +{ + code_buffer->Align(16, 0x90); +} void CodeGenerator::InitHostRegs() { @@ -111,9 +129,15 @@ void CodeGenerator::InitHostRegs() m_register_cache.SetCPUPtrHostReg(RCPUPTR); } -void CodeGenerator::SwitchToFarCode() { m_emit = &m_far_emitter; } +void CodeGenerator::SwitchToFarCode() +{ + m_emit = &m_far_emitter; +} -void CodeGenerator::SwitchToNearCode() { m_emit = &m_near_emitter; } +void CodeGenerator::SwitchToNearCode() +{ + m_emit = &m_near_emitter; +} void* CodeGenerator::GetCurrentNearCodePointer() const { @@ -125,12 +149,12 @@ void* CodeGenerator::GetCurrentFarCodePointer() const return static_cast(m_code_buffer->GetFreeFarCodePointer()) + m_far_emitter.GetCursorOffset(); } -Value CodeGenerator::GetValueInHostRegister(const Value& value) +Value CodeGenerator::GetValueInHostRegister(const Value& value, bool allow_zero_register /* = true */) { if (value.IsInHostRegister()) return Value::FromHostReg(&m_register_cache, value.host_reg, value.size); - if (value.HasConstantValue(0)) + if (value.HasConstantValue(0) && allow_zero_register) return Value::FromHostReg(&m_register_cache, static_cast(31), value.size); Value new_value = m_register_cache.AllocateScratch(value.size); @@ -487,6 +511,38 @@ void CodeGenerator::EmitMul(HostReg to_reg_hi, HostReg to_reg_lo, const Value& l } } +void CodeGenerator::EmitDiv(HostReg to_reg_quotient, HostReg to_reg_remainder, HostReg num, HostReg denom, RegSize size, + bool signed_divide) +{ + // only 32-bit supported for now.. + Assert(size == RegSize_32); + + Value quotient_value; + if (to_reg_quotient == HostReg_Count) + quotient_value = m_register_cache.AllocateScratch(size); + else + quotient_value.SetHostReg(&m_register_cache, to_reg_quotient, size); + + if (signed_divide) + { + m_emit->sdiv(GetHostReg32(quotient_value), GetHostReg32(num), GetHostReg32(denom)); + if (to_reg_remainder != HostReg_Count) + { + m_emit->msub(GetHostReg32(to_reg_remainder), GetHostReg32(quotient_value), GetHostReg32(denom), + GetHostReg32(num)); + } + } + else + { + m_emit->udiv(GetHostReg32(quotient_value), GetHostReg32(num), GetHostReg32(denom)); + if (to_reg_remainder != HostReg_Count) + { + m_emit->msub(GetHostReg32(to_reg_remainder), GetHostReg32(quotient_value), GetHostReg32(denom), + GetHostReg32(num)); + } + } +} + void CodeGenerator::EmitInc(HostReg to_reg, RegSize size) { Panic("Not implemented"); @@ -882,7 +938,10 @@ u32 CodeGenerator::PrepareStackForCall() return 0; } -void CodeGenerator::RestoreStackAfterCall(u32 adjust_size) { m_register_cache.PopCallerSavedRegisters(); } +void CodeGenerator::RestoreStackAfterCall(u32 adjust_size) +{ + m_register_cache.PopCallerSavedRegisters(); +} static s64 GetBranchDisplacement(const void* current, const void* target) { @@ -1038,7 +1097,6 @@ void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, co if (return_value) return_value->Discard(); - // shadow space allocate const u32 adjust_size = PrepareStackForCall(); @@ -1314,9 +1372,15 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const m_register_cache.PopState(); } -void CodeGenerator::EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr) { Panic("Not implemented"); } +void CodeGenerator::EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr) +{ + Panic("Not implemented"); +} -void CodeGenerator::EmitStoreGlobal(void* ptr, const Value& value) { Panic("Not implemented"); } +void CodeGenerator::EmitStoreGlobal(void* ptr, const Value& value) +{ + Panic("Not implemented"); +} void CodeGenerator::EmitFlushInterpreterLoadDelay() { @@ -1411,6 +1475,11 @@ void CodeGenerator::EmitBranch(const void* address, bool allow_scratch) m_emit->br(GetHostReg64(temp)); } +void CodeGenerator::EmitBranch(LabelType* label) +{ + m_emit->B(label); +} + static a64::Condition TranslateCondition(Condition condition, bool invert) { switch (condition) @@ -1634,6 +1703,9 @@ void CodeGenerator::EmitBranchIfBitClear(HostReg reg, RegSize size, u8 bit, Labe } } -void CodeGenerator::EmitBindLabel(LabelType* label) { m_emit->Bind(label); } +void CodeGenerator::EmitBindLabel(LabelType* label) +{ + m_emit->Bind(label); +} } // namespace CPU::Recompiler diff --git a/src/core/cpu_recompiler_code_generator_x64.cpp b/src/core/cpu_recompiler_code_generator_x64.cpp index a69b49b85..67c79225f 100644 --- a/src/core/cpu_recompiler_code_generator_x64.cpp +++ b/src/core/cpu_recompiler_code_generator_x64.cpp @@ -1,7 +1,7 @@ +#include "common/align.h" #include "cpu_core.h" #include "cpu_recompiler_code_generator.h" #include "cpu_recompiler_thunks.h" -#include "common/align.h" namespace CPU::Recompiler { @@ -173,7 +173,7 @@ void* CodeGenerator::GetCurrentFarCodePointer() const return m_far_emitter.getCurr(); } -Value CodeGenerator::GetValueInHostRegister(const Value& value) +Value CodeGenerator::GetValueInHostRegister(const Value& value, bool allow_zero_register /* = true */) { if (value.IsInHostRegister()) return Value(value.regcache, value.host_reg, value.size, ValueFlags::Valid | ValueFlags::InHostRegister); @@ -679,6 +679,102 @@ void CodeGenerator::EmitMul(HostReg to_reg_hi, HostReg to_reg_lo, const Value& l m_emit->pop(m_emit->rax); } +void CodeGenerator::EmitDiv(HostReg to_reg_quotient, HostReg to_reg_remainder, HostReg num, HostReg denom, RegSize size, + bool signed_divide) +{ + const bool save_eax = (to_reg_quotient != Xbyak::Operand::RAX && to_reg_remainder != Xbyak::Operand::RAX); + const bool save_edx = (to_reg_quotient != Xbyak::Operand::RDX && to_reg_remainder != Xbyak::Operand::RDX); + + if (save_eax) + m_emit->push(m_emit->rax); + + if (save_edx) + m_emit->push(m_emit->rdx); + + // unsupported cases.. for now + Assert(num != Xbyak::Operand::RDX && num != Xbyak::Operand::RAX); + if (num != Xbyak::Operand::RAX) + EmitCopyValue(Xbyak::Operand::RAX, Value::FromHostReg(&m_register_cache, num, size)); + + if (size == RegSize_8) + { + if (signed_divide) + { + m_emit->cbw(); + m_emit->idiv(GetHostReg8(denom)); + } + else + { + m_emit->xor_(m_emit->dx, m_emit->dx); + m_emit->div(GetHostReg8(denom)); + } + } + else if (size == RegSize_16) + { + if (signed_divide) + { + m_emit->cwd(); + m_emit->idiv(GetHostReg16(denom)); + } + else + { + m_emit->xor_(m_emit->edx, m_emit->edx); + m_emit->div(GetHostReg16(denom)); + } + } + else if (size == RegSize_32) + { + if (signed_divide) + { + m_emit->cdq(); + m_emit->idiv(GetHostReg32(denom)); + } + else + { + m_emit->xor_(m_emit->rdx, m_emit->edx); + m_emit->div(GetHostReg32(denom)); + } + } + else + { + if (signed_divide) + m_emit->idiv(GetHostReg64(denom)); + else + m_emit->div(GetHostReg64(denom)); + } + + if (to_reg_quotient == Xbyak::Operand::RAX && to_reg_remainder == Xbyak::Operand::RDX) + { + // ideal case: registers are the ones we want: don't have to do anything + } + else if (to_reg_quotient == Xbyak::Operand::RDX && to_reg_remainder == Xbyak::Operand::RAX) + { + // what we want, but swapped, so exchange them + m_emit->xchg(m_emit->rax, m_emit->rdx); + } + else + { + // store to the registers we want.. this could be optimized better + if (to_reg_quotient != HostReg_Count) + { + m_emit->push(m_emit->rax); + m_emit->pop(GetHostReg64(to_reg_quotient)); + } + if (to_reg_remainder != HostReg_Count) + { + m_emit->push(m_emit->rdx); + m_emit->pop(GetHostReg64(to_reg_remainder)); + } + } + + // restore original contents + if (save_edx) + m_emit->pop(m_emit->rdx); + + if (save_eax) + m_emit->pop(m_emit->rax); +} + void CodeGenerator::EmitInc(HostReg to_reg, RegSize size) { switch (size) @@ -2018,6 +2114,11 @@ void CodeGenerator::EmitBranch(const void* address, bool allow_scratch) m_emit->jmp(GetHostReg64(temp)); } +void CodeGenerator::EmitBranch(LabelType* label) +{ + m_emit->jmp(*label); +} + void CodeGenerator::EmitConditionalBranch(Condition condition, bool invert, HostReg value, RegSize size, LabelType* label) { diff --git a/src/core/cpu_recompiler_register_cache.h b/src/core/cpu_recompiler_register_cache.h index b5bee1503..cbb24c962 100644 --- a/src/core/cpu_recompiler_register_cache.h +++ b/src/core/cpu_recompiler_register_cache.h @@ -138,6 +138,24 @@ struct Value return Value(); } + /// Returns the constant value as a signed 32-bit integer, suitable as an immediate. + s32 GetS32ConstantValue() const + { + switch (size) + { + case RegSize_8: + return static_cast(SignExtend32(Truncate8(constant_value))); + + case RegSize_16: + return static_cast(SignExtend32(Truncate16(constant_value))); + + case RegSize_32: + case RegSize_64: + default: + return static_cast(constant_value); + } + } + /// Returns the constant value as a signed 64-bit integer, suitable as an immediate. s64 GetS64ConstantValue() const {