CPU/Recompiler: Implement div/divu

This commit is contained in:
Connor McLaughlin 2020-08-07 18:08:14 +10:00
parent a2f271c505
commit 8d4216068a
5 changed files with 401 additions and 18 deletions

View file

@ -167,6 +167,14 @@ bool CodeGenerator::CompileInstruction(const CodeBlockInstruction& cbi)
result = Compile_Multiply(cbi);
break;
case InstructionFunct::div:
result = Compile_SignedDivide(cbi);
break;
case InstructionFunct::divu:
result = Compile_Divide(cbi);
break;
case InstructionFunct::slt:
case InstructionFunct::sltu:
result = Compile_SetLess(cbi);
@ -1317,6 +1325,186 @@ bool CodeGenerator::Compile_Multiply(const CodeBlockInstruction& cbi)
return true;
}
static std::tuple<u32, u32> MIPSDivide(u32 num, u32 denom)
{
u32 lo, hi;
if (denom == 0)
{
// divide by zero
lo = UINT32_C(0xFFFFFFFF);
hi = static_cast<u32>(num);
}
else
{
lo = num / denom;
hi = num % denom;
}
return std::tie(lo, hi);
}
static std::tuple<s32, s32> MIPSDivide(s32 num, s32 denom)
{
s32 lo, hi;
if (denom == 0)
{
// divide by zero
lo = (num >= 0) ? UINT32_C(0xFFFFFFFF) : UINT32_C(1);
hi = static_cast<u32>(num);
}
else if (static_cast<u32>(num) == UINT32_C(0x80000000) && denom == -1)
{
// unrepresentable
lo = UINT32_C(0x80000000);
hi = 0;
}
else
{
lo = num / denom;
hi = num % denom;
}
return std::tie(lo, hi);
}
bool CodeGenerator::Compile_Divide(const CodeBlockInstruction& cbi)
{
InstructionPrologue(cbi, 1);
const bool signed_divide = (cbi.instruction.r.funct == InstructionFunct::div);
Value num = m_register_cache.ReadGuestRegister(cbi.instruction.r.rs);
Value denom = m_register_cache.ReadGuestRegister(cbi.instruction.r.rt);
if (num.IsConstant() && denom.IsConstant())
{
const auto [lo, hi] = MIPSDivide(static_cast<u32>(num.constant_value), static_cast<u32>(denom.constant_value));
m_register_cache.WriteGuestRegister(Reg::lo, Value::FromConstantU32(lo));
m_register_cache.WriteGuestRegister(Reg::hi, Value::FromConstantU32(hi));
}
else
{
Value num_reg = GetValueInHostRegister(num, false);
Value denom_reg = GetValueInHostRegister(denom, false);
m_register_cache.InvalidateGuestRegister(Reg::lo);
m_register_cache.InvalidateGuestRegister(Reg::hi);
Value lo = m_register_cache.AllocateScratch(RegSize_32);
Value hi = m_register_cache.AllocateScratch(RegSize_32);
LabelType do_divide, done;
if (!denom.IsConstant() || denom.HasConstantValue(0))
{
// if (denom == 0)
EmitConditionalBranch(Condition::NotEqual, false, denom_reg.GetHostRegister(), Value::FromConstantU32(0),
&do_divide);
{
// unrepresentable
EmitCopyValue(lo.GetHostRegister(), Value::FromConstantU32(0xFFFFFFFF));
EmitCopyValue(hi.GetHostRegister(), num_reg);
EmitBranch(&done);
}
}
// else
{
EmitBindLabel(&do_divide);
EmitDiv(lo.GetHostRegister(), hi.GetHostRegister(), num_reg.GetHostRegister(), denom_reg.GetHostRegister(),
RegSize_32, false);
}
EmitBindLabel(&done);
m_register_cache.WriteGuestRegister(Reg::lo, std::move(lo));
m_register_cache.WriteGuestRegister(Reg::hi, std::move(hi));
}
InstructionEpilogue(cbi);
return true;
}
bool CodeGenerator::Compile_SignedDivide(const CodeBlockInstruction& cbi)
{
InstructionPrologue(cbi, 1);
Value num = m_register_cache.ReadGuestRegister(cbi.instruction.r.rs);
Value denom = m_register_cache.ReadGuestRegister(cbi.instruction.r.rt);
if (num.IsConstant() && denom.IsConstant())
{
const auto [lo, hi] = MIPSDivide(num.GetS32ConstantValue(), denom.GetS32ConstantValue());
m_register_cache.WriteGuestRegister(Reg::lo, Value::FromConstantU32(static_cast<u32>(lo)));
m_register_cache.WriteGuestRegister(Reg::hi, Value::FromConstantU32(static_cast<u32>(hi)));
}
else
{
Value num_reg = GetValueInHostRegister(num, false);
Value denom_reg = GetValueInHostRegister(denom, false);
m_register_cache.InvalidateGuestRegister(Reg::lo);
m_register_cache.InvalidateGuestRegister(Reg::hi);
Value lo = m_register_cache.AllocateScratch(RegSize_32);
Value hi = m_register_cache.AllocateScratch(RegSize_32);
// we need this in a register on ARM because it won't fit in an immediate
EmitCopyValue(lo.GetHostRegister(), Value::FromConstantU32(0x80000000u));
LabelType do_divide, done;
LabelType not_zero;
if (!denom.IsConstant() || denom.HasConstantValue(0))
{
// if (denom == 0)
EmitConditionalBranch(Condition::NotEqual, false, denom_reg.GetHostRegister(), Value::FromConstantU32(0),
&not_zero);
{
// hi = static_cast<u32>(num);
EmitCopyValue(hi.GetHostRegister(), num_reg);
// lo = (num >= 0) ? UINT32_C(0xFFFFFFFF) : UINT32_C(1);
LabelType greater_equal_zero;
EmitConditionalBranch(Condition::GreaterEqual, false, num_reg.GetHostRegister(), Value::FromConstantU32(0),
&greater_equal_zero);
EmitCopyValue(lo.GetHostRegister(), Value::FromConstantU32(1));
EmitBranch(&done);
EmitBindLabel(&greater_equal_zero);
EmitCopyValue(lo.GetHostRegister(), Value::FromConstantU32(0xFFFFFFFFu));
EmitBranch(&done);
}
}
// else if (static_cast<u32>(num) == UINT32_C(0x80000000) && denom == -1)
{
EmitBindLabel(&not_zero);
EmitConditionalBranch(Condition::NotEqual, false, denom_reg.GetHostRegister(),
Value::FromConstantU32(0xFFFFFFFFu), &do_divide);
EmitConditionalBranch(Condition::NotEqual, false, num_reg.GetHostRegister(), lo, &do_divide);
// unrepresentable
// EmitCopyValue(lo.GetHostRegister(), Value::FromConstantU32(0x80000000u)); // done above
EmitCopyValue(hi.GetHostRegister(), Value::FromConstantU32(0));
EmitBranch(&done);
}
// else
{
EmitBindLabel(&do_divide);
EmitDiv(lo.GetHostRegister(), hi.GetHostRegister(), num_reg.GetHostRegister(), denom_reg.GetHostRegister(),
RegSize_32, true);
}
EmitBindLabel(&done);
m_register_cache.WriteGuestRegister(Reg::lo, std::move(lo));
m_register_cache.WriteGuestRegister(Reg::hi, std::move(hi));
}
InstructionEpilogue(cbi);
return true;
}
bool CodeGenerator::Compile_SetLess(const CodeBlockInstruction& cbi)
{
InstructionPrologue(cbi, 1);

View file

@ -41,6 +41,7 @@ public:
void EmitSub(HostReg to_reg, HostReg from_reg, const Value& value, bool set_flags);
void EmitCmp(HostReg to_reg, const Value& value);
void EmitMul(HostReg to_reg_hi, HostReg to_reg_lo, const Value& lhs, const Value& rhs, bool signed_multiply);
void EmitDiv(HostReg to_reg_quotient, HostReg to_reg_remainder, HostReg num, HostReg denom, RegSize size, bool signed_divide);
void EmitInc(HostReg to_reg, RegSize size);
void EmitDec(HostReg to_reg, RegSize size);
void EmitShl(HostReg to_reg, HostReg from_reg, RegSize size, const Value& amount_value);
@ -71,6 +72,7 @@ public:
// Unconditional branch to pointer. May allocate a scratch register.
void EmitBranch(const void* address, bool allow_scratch = true);
void EmitBranch(LabelType* label);
// Branching, generates two paths.
void EmitConditionalBranch(Condition condition, bool invert, HostReg value, RegSize size, LabelType* label);
@ -149,7 +151,7 @@ private:
Value ConvertValueSize(const Value& value, RegSize size, bool sign_extend);
void ConvertValueSizeInPlace(Value* value, RegSize size, bool sign_extend);
Value GetValueInHostRegister(const Value& value);
Value GetValueInHostRegister(const Value& value, bool allow_zero_register = true);
void SwitchToFarCode();
void SwitchToNearCode();
@ -184,6 +186,8 @@ private:
bool Compile_Add(const CodeBlockInstruction& cbi);
bool Compile_Subtract(const CodeBlockInstruction& cbi);
bool Compile_Multiply(const CodeBlockInstruction& cbi);
bool Compile_Divide(const CodeBlockInstruction& cbi);
bool Compile_SignedDivide(const CodeBlockInstruction& cbi);
bool Compile_SetLess(const CodeBlockInstruction& cbi);
bool Compile_Branch(const CodeBlockInstruction& cbi);
bool Compile_lui(const CodeBlockInstruction& cbi);

View file

@ -23,7 +23,10 @@ constexpr u64 FUNCTION_CALLER_SAVED_SPACE_RESERVE = 144; // 18 registers -> 224
constexpr u64 FUNCTION_STACK_SIZE =
FUNCTION_CALLEE_SAVED_SPACE_RESERVE + FUNCTION_CALLER_SAVED_SPACE_RESERVE + FUNCTION_CALL_SHADOW_SPACE;
static const a64::WRegister GetHostReg8(HostReg reg) { return a64::WRegister(reg); }
static const a64::WRegister GetHostReg8(HostReg reg)
{
return a64::WRegister(reg);
}
static const a64::WRegister GetHostReg8(const Value& value)
{
@ -31,7 +34,10 @@ static const a64::WRegister GetHostReg8(const Value& value)
return a64::WRegister(value.host_reg);
}
static const a64::WRegister GetHostReg16(HostReg reg) { return a64::WRegister(reg); }
static const a64::WRegister GetHostReg16(HostReg reg)
{
return a64::WRegister(reg);
}
static const a64::WRegister GetHostReg16(const Value& value)
{
@ -39,7 +45,10 @@ static const a64::WRegister GetHostReg16(const Value& value)
return a64::WRegister(value.host_reg);
}
static const a64::WRegister GetHostReg32(HostReg reg) { return a64::WRegister(reg); }
static const a64::WRegister GetHostReg32(HostReg reg)
{
return a64::WRegister(reg);
}
static const a64::WRegister GetHostReg32(const Value& value)
{
@ -47,7 +56,10 @@ static const a64::WRegister GetHostReg32(const Value& value)
return a64::WRegister(value.host_reg);
}
static const a64::XRegister GetHostReg64(HostReg reg) { return a64::XRegister(reg); }
static const a64::XRegister GetHostReg64(HostReg reg)
{
return a64::XRegister(reg);
}
static const a64::XRegister GetHostReg64(const Value& value)
{
@ -55,7 +67,10 @@ static const a64::XRegister GetHostReg64(const Value& value)
return a64::XRegister(value.host_reg);
}
static const a64::XRegister GetCPUPtrReg() { return GetHostReg64(RCPUPTR); }
static const a64::XRegister GetCPUPtrReg()
{
return GetHostReg64(RCPUPTR);
}
CodeGenerator::CodeGenerator(JitCodeBuffer* code_buffer)
: m_code_buffer(code_buffer), m_register_cache(*this),
@ -98,7 +113,10 @@ const char* CodeGenerator::GetHostRegName(HostReg reg, RegSize size /*= HostPoin
}
}
void CodeGenerator::AlignCodeBuffer(JitCodeBuffer* code_buffer) { code_buffer->Align(16, 0x90); }
void CodeGenerator::AlignCodeBuffer(JitCodeBuffer* code_buffer)
{
code_buffer->Align(16, 0x90);
}
void CodeGenerator::InitHostRegs()
{
@ -111,9 +129,15 @@ void CodeGenerator::InitHostRegs()
m_register_cache.SetCPUPtrHostReg(RCPUPTR);
}
void CodeGenerator::SwitchToFarCode() { m_emit = &m_far_emitter; }
void CodeGenerator::SwitchToFarCode()
{
m_emit = &m_far_emitter;
}
void CodeGenerator::SwitchToNearCode() { m_emit = &m_near_emitter; }
void CodeGenerator::SwitchToNearCode()
{
m_emit = &m_near_emitter;
}
void* CodeGenerator::GetCurrentNearCodePointer() const
{
@ -125,12 +149,12 @@ void* CodeGenerator::GetCurrentFarCodePointer() const
return static_cast<u8*>(m_code_buffer->GetFreeFarCodePointer()) + m_far_emitter.GetCursorOffset();
}
Value CodeGenerator::GetValueInHostRegister(const Value& value)
Value CodeGenerator::GetValueInHostRegister(const Value& value, bool allow_zero_register /* = true */)
{
if (value.IsInHostRegister())
return Value::FromHostReg(&m_register_cache, value.host_reg, value.size);
if (value.HasConstantValue(0))
if (value.HasConstantValue(0) && allow_zero_register)
return Value::FromHostReg(&m_register_cache, static_cast<HostReg>(31), value.size);
Value new_value = m_register_cache.AllocateScratch(value.size);
@ -487,6 +511,38 @@ void CodeGenerator::EmitMul(HostReg to_reg_hi, HostReg to_reg_lo, const Value& l
}
}
void CodeGenerator::EmitDiv(HostReg to_reg_quotient, HostReg to_reg_remainder, HostReg num, HostReg denom, RegSize size,
bool signed_divide)
{
// only 32-bit supported for now..
Assert(size == RegSize_32);
Value quotient_value;
if (to_reg_quotient == HostReg_Count)
quotient_value = m_register_cache.AllocateScratch(size);
else
quotient_value.SetHostReg(&m_register_cache, to_reg_quotient, size);
if (signed_divide)
{
m_emit->sdiv(GetHostReg32(quotient_value), GetHostReg32(num), GetHostReg32(denom));
if (to_reg_remainder != HostReg_Count)
{
m_emit->msub(GetHostReg32(to_reg_remainder), GetHostReg32(quotient_value), GetHostReg32(denom),
GetHostReg32(num));
}
}
else
{
m_emit->udiv(GetHostReg32(quotient_value), GetHostReg32(num), GetHostReg32(denom));
if (to_reg_remainder != HostReg_Count)
{
m_emit->msub(GetHostReg32(to_reg_remainder), GetHostReg32(quotient_value), GetHostReg32(denom),
GetHostReg32(num));
}
}
}
void CodeGenerator::EmitInc(HostReg to_reg, RegSize size)
{
Panic("Not implemented");
@ -882,7 +938,10 @@ u32 CodeGenerator::PrepareStackForCall()
return 0;
}
void CodeGenerator::RestoreStackAfterCall(u32 adjust_size) { m_register_cache.PopCallerSavedRegisters(); }
void CodeGenerator::RestoreStackAfterCall(u32 adjust_size)
{
m_register_cache.PopCallerSavedRegisters();
}
static s64 GetBranchDisplacement(const void* current, const void* target)
{
@ -1038,7 +1097,6 @@ void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, co
if (return_value)
return_value->Discard();
// shadow space allocate
const u32 adjust_size = PrepareStackForCall();
@ -1314,9 +1372,15 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const
m_register_cache.PopState();
}
void CodeGenerator::EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr) { Panic("Not implemented"); }
void CodeGenerator::EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr)
{
Panic("Not implemented");
}
void CodeGenerator::EmitStoreGlobal(void* ptr, const Value& value) { Panic("Not implemented"); }
void CodeGenerator::EmitStoreGlobal(void* ptr, const Value& value)
{
Panic("Not implemented");
}
void CodeGenerator::EmitFlushInterpreterLoadDelay()
{
@ -1411,6 +1475,11 @@ void CodeGenerator::EmitBranch(const void* address, bool allow_scratch)
m_emit->br(GetHostReg64(temp));
}
void CodeGenerator::EmitBranch(LabelType* label)
{
m_emit->B(label);
}
static a64::Condition TranslateCondition(Condition condition, bool invert)
{
switch (condition)
@ -1634,6 +1703,9 @@ void CodeGenerator::EmitBranchIfBitClear(HostReg reg, RegSize size, u8 bit, Labe
}
}
void CodeGenerator::EmitBindLabel(LabelType* label) { m_emit->Bind(label); }
void CodeGenerator::EmitBindLabel(LabelType* label)
{
m_emit->Bind(label);
}
} // namespace CPU::Recompiler

View file

@ -1,7 +1,7 @@
#include "common/align.h"
#include "cpu_core.h"
#include "cpu_recompiler_code_generator.h"
#include "cpu_recompiler_thunks.h"
#include "common/align.h"
namespace CPU::Recompiler {
@ -173,7 +173,7 @@ void* CodeGenerator::GetCurrentFarCodePointer() const
return m_far_emitter.getCurr<void*>();
}
Value CodeGenerator::GetValueInHostRegister(const Value& value)
Value CodeGenerator::GetValueInHostRegister(const Value& value, bool allow_zero_register /* = true */)
{
if (value.IsInHostRegister())
return Value(value.regcache, value.host_reg, value.size, ValueFlags::Valid | ValueFlags::InHostRegister);
@ -679,6 +679,102 @@ void CodeGenerator::EmitMul(HostReg to_reg_hi, HostReg to_reg_lo, const Value& l
m_emit->pop(m_emit->rax);
}
void CodeGenerator::EmitDiv(HostReg to_reg_quotient, HostReg to_reg_remainder, HostReg num, HostReg denom, RegSize size,
bool signed_divide)
{
const bool save_eax = (to_reg_quotient != Xbyak::Operand::RAX && to_reg_remainder != Xbyak::Operand::RAX);
const bool save_edx = (to_reg_quotient != Xbyak::Operand::RDX && to_reg_remainder != Xbyak::Operand::RDX);
if (save_eax)
m_emit->push(m_emit->rax);
if (save_edx)
m_emit->push(m_emit->rdx);
// unsupported cases.. for now
Assert(num != Xbyak::Operand::RDX && num != Xbyak::Operand::RAX);
if (num != Xbyak::Operand::RAX)
EmitCopyValue(Xbyak::Operand::RAX, Value::FromHostReg(&m_register_cache, num, size));
if (size == RegSize_8)
{
if (signed_divide)
{
m_emit->cbw();
m_emit->idiv(GetHostReg8(denom));
}
else
{
m_emit->xor_(m_emit->dx, m_emit->dx);
m_emit->div(GetHostReg8(denom));
}
}
else if (size == RegSize_16)
{
if (signed_divide)
{
m_emit->cwd();
m_emit->idiv(GetHostReg16(denom));
}
else
{
m_emit->xor_(m_emit->edx, m_emit->edx);
m_emit->div(GetHostReg16(denom));
}
}
else if (size == RegSize_32)
{
if (signed_divide)
{
m_emit->cdq();
m_emit->idiv(GetHostReg32(denom));
}
else
{
m_emit->xor_(m_emit->rdx, m_emit->edx);
m_emit->div(GetHostReg32(denom));
}
}
else
{
if (signed_divide)
m_emit->idiv(GetHostReg64(denom));
else
m_emit->div(GetHostReg64(denom));
}
if (to_reg_quotient == Xbyak::Operand::RAX && to_reg_remainder == Xbyak::Operand::RDX)
{
// ideal case: registers are the ones we want: don't have to do anything
}
else if (to_reg_quotient == Xbyak::Operand::RDX && to_reg_remainder == Xbyak::Operand::RAX)
{
// what we want, but swapped, so exchange them
m_emit->xchg(m_emit->rax, m_emit->rdx);
}
else
{
// store to the registers we want.. this could be optimized better
if (to_reg_quotient != HostReg_Count)
{
m_emit->push(m_emit->rax);
m_emit->pop(GetHostReg64(to_reg_quotient));
}
if (to_reg_remainder != HostReg_Count)
{
m_emit->push(m_emit->rdx);
m_emit->pop(GetHostReg64(to_reg_remainder));
}
}
// restore original contents
if (save_edx)
m_emit->pop(m_emit->rdx);
if (save_eax)
m_emit->pop(m_emit->rax);
}
void CodeGenerator::EmitInc(HostReg to_reg, RegSize size)
{
switch (size)
@ -2018,6 +2114,11 @@ void CodeGenerator::EmitBranch(const void* address, bool allow_scratch)
m_emit->jmp(GetHostReg64(temp));
}
void CodeGenerator::EmitBranch(LabelType* label)
{
m_emit->jmp(*label);
}
void CodeGenerator::EmitConditionalBranch(Condition condition, bool invert, HostReg value, RegSize size,
LabelType* label)
{

View file

@ -138,6 +138,24 @@ struct Value
return Value();
}
/// Returns the constant value as a signed 32-bit integer, suitable as an immediate.
s32 GetS32ConstantValue() const
{
switch (size)
{
case RegSize_8:
return static_cast<s32>(SignExtend32(Truncate8(constant_value)));
case RegSize_16:
return static_cast<s32>(SignExtend32(Truncate16(constant_value)));
case RegSize_32:
case RegSize_64:
default:
return static_cast<s32>(constant_value);
}
}
/// Returns the constant value as a signed 64-bit integer, suitable as an immediate.
s64 GetS64ConstantValue() const
{