mirror of
https://github.com/RetroDECK/Duckstation.git
synced 2025-03-06 14:27:44 +00:00
CPU/Recompiler: Implement mult/multu
This commit is contained in:
parent
e5c0d28fdc
commit
f46160ac46
|
@ -9,8 +9,8 @@ Log_SetChannel(CPU::CodeCache);
|
|||
|
||||
namespace CPU {
|
||||
|
||||
bool USE_CODE_CACHE = false;
|
||||
bool USE_RECOMPILER = false;
|
||||
bool USE_CODE_CACHE = true;
|
||||
bool USE_RECOMPILER = true;
|
||||
|
||||
CodeCache::CodeCache() = default;
|
||||
|
||||
|
@ -48,13 +48,21 @@ void CodeCache::Execute()
|
|||
continue;
|
||||
}
|
||||
|
||||
#if 0
|
||||
const u32 tick = m_system->GetGlobalTickCounter() + m_core->GetPendingTicks();
|
||||
if (tick == 8950812)
|
||||
__debugbreak();
|
||||
#endif
|
||||
|
||||
reexecute_block:
|
||||
if (USE_RECOMPILER)
|
||||
block->host_code(m_core);
|
||||
else
|
||||
InterpretCachedBlock(*block);
|
||||
|
||||
// LogCurrentState();
|
||||
#if 0
|
||||
LogCurrentState();
|
||||
#endif
|
||||
|
||||
next_block_key = GetNextBlockKey();
|
||||
if (next_block_key.bits == block->key.bits)
|
||||
|
@ -91,10 +99,10 @@ void CodeCache::LogCurrentState()
|
|||
"tick=%u pc=%08X npc=%08X zero=%08X at=%08X v0=%08X v1=%08X a0=%08X a1=%08X a2=%08X a3=%08X t0=%08X "
|
||||
"t1=%08X t2=%08X t3=%08X t4=%08X t5=%08X t6=%08X t7=%08X s0=%08X s1=%08X s2=%08X s3=%08X s4=%08X "
|
||||
"s5=%08X s6=%08X s7=%08X t8=%08X t9=%08X k0=%08X k1=%08X gp=%08X sp=%08X fp=%08X ra=%08X\n",
|
||||
m_system->GetGlobalTickCounter(), regs.pc, regs.npc, regs.zero, regs.at, regs.v0, regs.v1, regs.a0, regs.a1,
|
||||
regs.a2, regs.a3, regs.t0, regs.t1, regs.t2, regs.t3, regs.t4, regs.t5, regs.t6, regs.t7, regs.s0, regs.s1, regs.s2,
|
||||
regs.s3, regs.s4, regs.s5, regs.s6, regs.s7, regs.t8, regs.t9, regs.k0, regs.k1, regs.gp, regs.sp, regs.fp,
|
||||
regs.ra);
|
||||
m_system->GetGlobalTickCounter() + m_core->GetPendingTicks(), regs.pc, regs.npc, regs.zero, regs.at, regs.v0,
|
||||
regs.v1, regs.a0, regs.a1, regs.a2, regs.a3, regs.t0, regs.t1, regs.t2, regs.t3, regs.t4, regs.t5, regs.t6, regs.t7,
|
||||
regs.s0, regs.s1, regs.s2, regs.s3, regs.s4, regs.s5, regs.s6, regs.s7, regs.t8, regs.t9, regs.k0, regs.k1, regs.gp,
|
||||
regs.sp, regs.fp, regs.ra);
|
||||
}
|
||||
|
||||
CodeBlockKey CodeCache::GetNextBlockKey() const
|
||||
|
|
|
@ -121,6 +121,11 @@ bool CodeGenerator::CompileInstruction(const CodeBlockInstruction& cbi)
|
|||
result = Compile_MoveHiLo(cbi);
|
||||
break;
|
||||
|
||||
case InstructionFunct::mult:
|
||||
case InstructionFunct::multu:
|
||||
result = Compile_Multiply(cbi);
|
||||
break;
|
||||
|
||||
default:
|
||||
result = Compile_Fallback(cbi);
|
||||
break;
|
||||
|
@ -285,6 +290,73 @@ Value CodeGenerator::AddValues(const Value& lhs, const Value& rhs)
|
|||
}
|
||||
}
|
||||
|
||||
std::pair<Value, Value> CodeGenerator::MulValues(const Value& lhs, const Value& rhs, bool signed_multiply)
|
||||
{
|
||||
DebugAssert(lhs.size == rhs.size);
|
||||
if (lhs.IsConstant() && rhs.IsConstant())
|
||||
{
|
||||
// compile-time
|
||||
switch (lhs.size)
|
||||
{
|
||||
case RegSize_8:
|
||||
{
|
||||
u16 res;
|
||||
if (signed_multiply)
|
||||
res = u16(s16(s8(lhs.constant_value)) * s16(s8(rhs.constant_value)));
|
||||
else
|
||||
res = u16(u8(lhs.constant_value)) * u16(u8(rhs.constant_value));
|
||||
|
||||
return std::make_pair(Value::FromConstantU8(Truncate8(res >> 8)), Value::FromConstantU8(Truncate8(res)));
|
||||
}
|
||||
|
||||
case RegSize_16:
|
||||
{
|
||||
u32 res;
|
||||
if (signed_multiply)
|
||||
res = u32(s32(s16(lhs.constant_value)) * s32(s16(rhs.constant_value)));
|
||||
else
|
||||
res = u32(u16(lhs.constant_value)) * u32(u16(rhs.constant_value));
|
||||
|
||||
return std::make_pair(Value::FromConstantU16(Truncate16(res >> 16)), Value::FromConstantU16(Truncate16(res)));
|
||||
}
|
||||
|
||||
case RegSize_32:
|
||||
{
|
||||
u64 res;
|
||||
if (signed_multiply)
|
||||
res = u64(s64(s32(lhs.constant_value)) * s64(s32(rhs.constant_value)));
|
||||
else
|
||||
res = u64(u32(lhs.constant_value)) * u64(u32(rhs.constant_value));
|
||||
|
||||
return std::make_pair(Value::FromConstantU32(Truncate32(res >> 32)), Value::FromConstantU32(Truncate32(res)));
|
||||
}
|
||||
break;
|
||||
|
||||
case RegSize_64:
|
||||
{
|
||||
u64 res;
|
||||
if (signed_multiply)
|
||||
res = u64(s64(lhs.constant_value) * s64(rhs.constant_value));
|
||||
else
|
||||
res = lhs.constant_value * rhs.constant_value;
|
||||
|
||||
// TODO: 128-bit multiply...
|
||||
Panic("128-bit multiply");
|
||||
return std::make_pair(Value::FromConstantU64(0), Value::FromConstantU64(res));
|
||||
}
|
||||
|
||||
default:
|
||||
return std::make_pair(Value::FromConstantU64(0), Value::FromConstantU64(0));
|
||||
}
|
||||
}
|
||||
|
||||
// We need two registers for both components.
|
||||
Value hi = m_register_cache.AllocateScratch(lhs.size);
|
||||
Value lo = m_register_cache.AllocateScratch(lhs.size);
|
||||
EmitMul(hi.host_reg, lo.host_reg, lhs, rhs, signed_multiply);
|
||||
return std::make_pair(std::move(hi), std::move(lo));
|
||||
}
|
||||
|
||||
Value CodeGenerator::ShlValues(const Value& lhs, const Value& rhs)
|
||||
{
|
||||
DebugAssert(lhs.size == rhs.size);
|
||||
|
@ -911,6 +983,20 @@ bool CodeGenerator::Compile_MoveHiLo(const CodeBlockInstruction& cbi)
|
|||
return true;
|
||||
}
|
||||
|
||||
bool CodeGenerator::Compile_Multiply(const CodeBlockInstruction& cbi)
|
||||
{
|
||||
InstructionPrologue(cbi, 1);
|
||||
|
||||
const bool signed_multiply = (cbi.instruction.r.funct == InstructionFunct::mult);
|
||||
std::pair<Value, Value> result = MulValues(m_register_cache.ReadGuestRegister(cbi.instruction.r.rs),
|
||||
m_register_cache.ReadGuestRegister(cbi.instruction.r.rt), signed_multiply);
|
||||
m_register_cache.WriteGuestRegister(Reg::hi, std::move(result.first));
|
||||
m_register_cache.WriteGuestRegister(Reg::lo, std::move(result.second));
|
||||
|
||||
InstructionEpilogue(cbi);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CodeGenerator::Compile_lui(const CodeBlockInstruction& cbi)
|
||||
{
|
||||
InstructionPrologue(cbi, 1);
|
||||
|
|
|
@ -52,6 +52,7 @@ public:
|
|||
void EmitCopyValue(HostReg to_reg, const Value& value);
|
||||
void EmitAdd(HostReg to_reg, const Value& value);
|
||||
void EmitSub(HostReg to_reg, const Value& value);
|
||||
void EmitMul(HostReg to_reg_hi, HostReg to_reg_lo, const Value& lhs, const Value& rhs, bool signed_multiply);
|
||||
void EmitCmp(HostReg to_reg, const Value& value);
|
||||
void EmitInc(HostReg to_reg, RegSize size);
|
||||
void EmitDec(HostReg to_reg, RegSize size);
|
||||
|
@ -130,7 +131,7 @@ public:
|
|||
|
||||
// Value ops
|
||||
Value AddValues(const Value& lhs, const Value& rhs);
|
||||
Value MulValues(const Value& lhs, const Value& rhs);
|
||||
std::pair<Value, Value> MulValues(const Value& lhs, const Value& rhs, bool signed_multiply);
|
||||
Value ShlValues(const Value& lhs, const Value& rhs);
|
||||
Value ShrValues(const Value& lhs, const Value& rhs);
|
||||
Value SarValues(const Value& lhs, const Value& rhs);
|
||||
|
@ -170,6 +171,7 @@ private:
|
|||
bool Compile_Load(const CodeBlockInstruction& cbi);
|
||||
bool Compile_Store(const CodeBlockInstruction& cbi);
|
||||
bool Compile_MoveHiLo(const CodeBlockInstruction& cbi);
|
||||
bool Compile_Multiply(const CodeBlockInstruction& cbi);
|
||||
bool Compile_lui(const CodeBlockInstruction& cbi);
|
||||
bool Compile_addiu(const CodeBlockInstruction& cbi);
|
||||
|
||||
|
|
|
@ -427,6 +427,122 @@ void CodeGenerator::EmitSub(HostReg to_reg, const Value& value)
|
|||
}
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitMul(HostReg to_reg_hi, HostReg to_reg_lo, const Value& lhs, const Value& rhs,
|
||||
bool signed_multiply)
|
||||
{
|
||||
const bool save_eax = (to_reg_hi != Xbyak::Operand::RAX && to_reg_lo != Xbyak::Operand::RAX);
|
||||
const bool save_edx = (to_reg_hi != Xbyak::Operand::RDX && to_reg_lo != Xbyak::Operand::RDX);
|
||||
|
||||
if (save_eax)
|
||||
m_emit.push(m_emit.rax);
|
||||
|
||||
if (save_edx)
|
||||
m_emit.push(m_emit.rdx);
|
||||
|
||||
#define DO_MUL(src) \
|
||||
if (lhs.size == RegSize_8) \
|
||||
signed_multiply ? m_emit.imul(src.changeBit(8)) : m_emit.mul(src.changeBit(8)); \
|
||||
else if (lhs.size == RegSize_16) \
|
||||
signed_multiply ? m_emit.imul(src.changeBit(16)) : m_emit.mul(src.changeBit(16)); \
|
||||
else if (lhs.size == RegSize_32) \
|
||||
signed_multiply ? m_emit.imul(src.changeBit(32)) : m_emit.mul(src.changeBit(32)); \
|
||||
else \
|
||||
signed_multiply ? m_emit.imul(src.changeBit(64)) : m_emit.mul(src.changeBit(64));
|
||||
|
||||
// x*x
|
||||
if (lhs.IsInHostRegister() && rhs.IsInHostRegister() && lhs.GetHostRegister() == rhs.GetHostRegister())
|
||||
{
|
||||
if (lhs.GetHostRegister() != Xbyak::Operand::RAX)
|
||||
EmitCopyValue(Xbyak::Operand::RAX, lhs);
|
||||
|
||||
DO_MUL(m_emit.rax);
|
||||
}
|
||||
else if (lhs.IsInHostRegister() && lhs.GetHostRegister() == Xbyak::Operand::RAX)
|
||||
{
|
||||
if (!rhs.IsInHostRegister())
|
||||
{
|
||||
EmitCopyValue(Xbyak::Operand::RDX, rhs);
|
||||
DO_MUL(m_emit.rdx);
|
||||
}
|
||||
else
|
||||
{
|
||||
DO_MUL(GetHostReg64(rhs));
|
||||
}
|
||||
}
|
||||
else if (rhs.IsInHostRegister() && rhs.GetHostRegister() == Xbyak::Operand::RAX)
|
||||
{
|
||||
if (!lhs.IsInHostRegister())
|
||||
{
|
||||
EmitCopyValue(Xbyak::Operand::RDX, lhs);
|
||||
DO_MUL(m_emit.rdx);
|
||||
}
|
||||
else
|
||||
{
|
||||
DO_MUL(GetHostReg64(lhs));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (lhs.IsInHostRegister())
|
||||
{
|
||||
EmitCopyValue(Xbyak::Operand::RAX, rhs);
|
||||
if (lhs.size == RegSize_8)
|
||||
signed_multiply ? m_emit.imul(GetHostReg8(lhs)) : m_emit.mul(GetHostReg8(lhs));
|
||||
else if (lhs.size == RegSize_16)
|
||||
signed_multiply ? m_emit.imul(GetHostReg16(lhs)) : m_emit.mul(GetHostReg16(lhs));
|
||||
else if (lhs.size == RegSize_32)
|
||||
signed_multiply ? m_emit.imul(GetHostReg32(lhs)) : m_emit.mul(GetHostReg32(lhs));
|
||||
else
|
||||
signed_multiply ? m_emit.imul(GetHostReg64(lhs)) : m_emit.mul(GetHostReg64(lhs));
|
||||
}
|
||||
else if (rhs.IsInHostRegister())
|
||||
{
|
||||
EmitCopyValue(Xbyak::Operand::RAX, lhs);
|
||||
if (lhs.size == RegSize_8)
|
||||
signed_multiply ? m_emit.imul(GetHostReg8(rhs)) : m_emit.mul(GetHostReg8(rhs));
|
||||
else if (lhs.size == RegSize_16)
|
||||
signed_multiply ? m_emit.imul(GetHostReg16(rhs)) : m_emit.mul(GetHostReg16(rhs));
|
||||
else if (lhs.size == RegSize_32)
|
||||
signed_multiply ? m_emit.imul(GetHostReg32(rhs)) : m_emit.mul(GetHostReg32(rhs));
|
||||
else
|
||||
signed_multiply ? m_emit.imul(GetHostReg64(rhs)) : m_emit.mul(GetHostReg64(rhs));
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitCopyValue(Xbyak::Operand::RAX, lhs);
|
||||
EmitCopyValue(Xbyak::Operand::RDX, rhs);
|
||||
DO_MUL(m_emit.rdx);
|
||||
}
|
||||
}
|
||||
|
||||
#undef DO_MUL
|
||||
|
||||
if (to_reg_hi == Xbyak::Operand::RDX && to_reg_lo == Xbyak::Operand::RAX)
|
||||
{
|
||||
// ideal case: registers are the ones we want: don't have to do anything
|
||||
}
|
||||
else if (to_reg_hi == Xbyak::Operand::RAX && to_reg_lo == Xbyak::Operand::RDX)
|
||||
{
|
||||
// what we want, but swapped, so exchange them
|
||||
m_emit.xchg(m_emit.rax, m_emit.rdx);
|
||||
}
|
||||
else
|
||||
{
|
||||
// store to the registers we want.. this could be optimized better
|
||||
m_emit.push(m_emit.rdx);
|
||||
m_emit.push(m_emit.rax);
|
||||
m_emit.pop(GetHostReg64(to_reg_lo));
|
||||
m_emit.pop(GetHostReg64(to_reg_hi));
|
||||
}
|
||||
|
||||
// restore original contents
|
||||
if (save_edx)
|
||||
m_emit.pop(m_emit.rdx);
|
||||
|
||||
if (save_eax)
|
||||
m_emit.pop(m_emit.rax);
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitCmp(HostReg to_reg, const Value& value)
|
||||
{
|
||||
DebugAssert(value.IsConstant() || value.IsInHostRegister());
|
||||
|
|
Loading…
Reference in a new issue