CPU/Recompiler: Implement mult/multu

This commit is contained in:
Connor McLaughlin 2019-11-22 16:45:13 +10:00
parent e5c0d28fdc
commit f46160ac46
4 changed files with 220 additions and 8 deletions

View file

@ -9,8 +9,8 @@ Log_SetChannel(CPU::CodeCache);
namespace CPU {
bool USE_CODE_CACHE = false;
bool USE_RECOMPILER = false;
bool USE_CODE_CACHE = true;
bool USE_RECOMPILER = true;
CodeCache::CodeCache() = default;
@ -48,13 +48,21 @@ void CodeCache::Execute()
continue;
}
#if 0
const u32 tick = m_system->GetGlobalTickCounter() + m_core->GetPendingTicks();
if (tick == 8950812)
__debugbreak();
#endif
reexecute_block:
if (USE_RECOMPILER)
block->host_code(m_core);
else
InterpretCachedBlock(*block);
// LogCurrentState();
#if 0
LogCurrentState();
#endif
next_block_key = GetNextBlockKey();
if (next_block_key.bits == block->key.bits)
@ -91,10 +99,10 @@ void CodeCache::LogCurrentState()
"tick=%u pc=%08X npc=%08X zero=%08X at=%08X v0=%08X v1=%08X a0=%08X a1=%08X a2=%08X a3=%08X t0=%08X "
"t1=%08X t2=%08X t3=%08X t4=%08X t5=%08X t6=%08X t7=%08X s0=%08X s1=%08X s2=%08X s3=%08X s4=%08X "
"s5=%08X s6=%08X s7=%08X t8=%08X t9=%08X k0=%08X k1=%08X gp=%08X sp=%08X fp=%08X ra=%08X\n",
m_system->GetGlobalTickCounter(), regs.pc, regs.npc, regs.zero, regs.at, regs.v0, regs.v1, regs.a0, regs.a1,
regs.a2, regs.a3, regs.t0, regs.t1, regs.t2, regs.t3, regs.t4, regs.t5, regs.t6, regs.t7, regs.s0, regs.s1, regs.s2,
regs.s3, regs.s4, regs.s5, regs.s6, regs.s7, regs.t8, regs.t9, regs.k0, regs.k1, regs.gp, regs.sp, regs.fp,
regs.ra);
m_system->GetGlobalTickCounter() + m_core->GetPendingTicks(), regs.pc, regs.npc, regs.zero, regs.at, regs.v0,
regs.v1, regs.a0, regs.a1, regs.a2, regs.a3, regs.t0, regs.t1, regs.t2, regs.t3, regs.t4, regs.t5, regs.t6, regs.t7,
regs.s0, regs.s1, regs.s2, regs.s3, regs.s4, regs.s5, regs.s6, regs.s7, regs.t8, regs.t9, regs.k0, regs.k1, regs.gp,
regs.sp, regs.fp, regs.ra);
}
CodeBlockKey CodeCache::GetNextBlockKey() const

View file

@ -121,6 +121,11 @@ bool CodeGenerator::CompileInstruction(const CodeBlockInstruction& cbi)
result = Compile_MoveHiLo(cbi);
break;
case InstructionFunct::mult:
case InstructionFunct::multu:
result = Compile_Multiply(cbi);
break;
default:
result = Compile_Fallback(cbi);
break;
@ -285,6 +290,73 @@ Value CodeGenerator::AddValues(const Value& lhs, const Value& rhs)
}
}
std::pair<Value, Value> CodeGenerator::MulValues(const Value& lhs, const Value& rhs, bool signed_multiply)
{
DebugAssert(lhs.size == rhs.size);
if (lhs.IsConstant() && rhs.IsConstant())
{
// compile-time
switch (lhs.size)
{
case RegSize_8:
{
u16 res;
if (signed_multiply)
res = u16(s16(s8(lhs.constant_value)) * s16(s8(rhs.constant_value)));
else
res = u16(u8(lhs.constant_value)) * u16(u8(rhs.constant_value));
return std::make_pair(Value::FromConstantU8(Truncate8(res >> 8)), Value::FromConstantU8(Truncate8(res)));
}
case RegSize_16:
{
u32 res;
if (signed_multiply)
res = u32(s32(s16(lhs.constant_value)) * s32(s16(rhs.constant_value)));
else
res = u32(u16(lhs.constant_value)) * u32(u16(rhs.constant_value));
return std::make_pair(Value::FromConstantU16(Truncate16(res >> 16)), Value::FromConstantU16(Truncate16(res)));
}
case RegSize_32:
{
u64 res;
if (signed_multiply)
res = u64(s64(s32(lhs.constant_value)) * s64(s32(rhs.constant_value)));
else
res = u64(u32(lhs.constant_value)) * u64(u32(rhs.constant_value));
return std::make_pair(Value::FromConstantU32(Truncate32(res >> 32)), Value::FromConstantU32(Truncate32(res)));
}
break;
case RegSize_64:
{
u64 res;
if (signed_multiply)
res = u64(s64(lhs.constant_value) * s64(rhs.constant_value));
else
res = lhs.constant_value * rhs.constant_value;
// TODO: 128-bit multiply...
Panic("128-bit multiply");
return std::make_pair(Value::FromConstantU64(0), Value::FromConstantU64(res));
}
default:
return std::make_pair(Value::FromConstantU64(0), Value::FromConstantU64(0));
}
}
// We need two registers for both components.
Value hi = m_register_cache.AllocateScratch(lhs.size);
Value lo = m_register_cache.AllocateScratch(lhs.size);
EmitMul(hi.host_reg, lo.host_reg, lhs, rhs, signed_multiply);
return std::make_pair(std::move(hi), std::move(lo));
}
Value CodeGenerator::ShlValues(const Value& lhs, const Value& rhs)
{
DebugAssert(lhs.size == rhs.size);
@ -911,6 +983,20 @@ bool CodeGenerator::Compile_MoveHiLo(const CodeBlockInstruction& cbi)
return true;
}
bool CodeGenerator::Compile_Multiply(const CodeBlockInstruction& cbi)
{
InstructionPrologue(cbi, 1);
const bool signed_multiply = (cbi.instruction.r.funct == InstructionFunct::mult);
std::pair<Value, Value> result = MulValues(m_register_cache.ReadGuestRegister(cbi.instruction.r.rs),
m_register_cache.ReadGuestRegister(cbi.instruction.r.rt), signed_multiply);
m_register_cache.WriteGuestRegister(Reg::hi, std::move(result.first));
m_register_cache.WriteGuestRegister(Reg::lo, std::move(result.second));
InstructionEpilogue(cbi);
return true;
}
bool CodeGenerator::Compile_lui(const CodeBlockInstruction& cbi)
{
InstructionPrologue(cbi, 1);

View file

@ -52,6 +52,7 @@ public:
void EmitCopyValue(HostReg to_reg, const Value& value);
void EmitAdd(HostReg to_reg, const Value& value);
void EmitSub(HostReg to_reg, const Value& value);
void EmitMul(HostReg to_reg_hi, HostReg to_reg_lo, const Value& lhs, const Value& rhs, bool signed_multiply);
void EmitCmp(HostReg to_reg, const Value& value);
void EmitInc(HostReg to_reg, RegSize size);
void EmitDec(HostReg to_reg, RegSize size);
@ -130,7 +131,7 @@ public:
// Value ops
Value AddValues(const Value& lhs, const Value& rhs);
Value MulValues(const Value& lhs, const Value& rhs);
std::pair<Value, Value> MulValues(const Value& lhs, const Value& rhs, bool signed_multiply);
Value ShlValues(const Value& lhs, const Value& rhs);
Value ShrValues(const Value& lhs, const Value& rhs);
Value SarValues(const Value& lhs, const Value& rhs);
@ -170,6 +171,7 @@ private:
bool Compile_Load(const CodeBlockInstruction& cbi);
bool Compile_Store(const CodeBlockInstruction& cbi);
bool Compile_MoveHiLo(const CodeBlockInstruction& cbi);
bool Compile_Multiply(const CodeBlockInstruction& cbi);
bool Compile_lui(const CodeBlockInstruction& cbi);
bool Compile_addiu(const CodeBlockInstruction& cbi);

View file

@ -427,6 +427,122 @@ void CodeGenerator::EmitSub(HostReg to_reg, const Value& value)
}
}
void CodeGenerator::EmitMul(HostReg to_reg_hi, HostReg to_reg_lo, const Value& lhs, const Value& rhs,
bool signed_multiply)
{
const bool save_eax = (to_reg_hi != Xbyak::Operand::RAX && to_reg_lo != Xbyak::Operand::RAX);
const bool save_edx = (to_reg_hi != Xbyak::Operand::RDX && to_reg_lo != Xbyak::Operand::RDX);
if (save_eax)
m_emit.push(m_emit.rax);
if (save_edx)
m_emit.push(m_emit.rdx);
#define DO_MUL(src) \
if (lhs.size == RegSize_8) \
signed_multiply ? m_emit.imul(src.changeBit(8)) : m_emit.mul(src.changeBit(8)); \
else if (lhs.size == RegSize_16) \
signed_multiply ? m_emit.imul(src.changeBit(16)) : m_emit.mul(src.changeBit(16)); \
else if (lhs.size == RegSize_32) \
signed_multiply ? m_emit.imul(src.changeBit(32)) : m_emit.mul(src.changeBit(32)); \
else \
signed_multiply ? m_emit.imul(src.changeBit(64)) : m_emit.mul(src.changeBit(64));
// x*x
if (lhs.IsInHostRegister() && rhs.IsInHostRegister() && lhs.GetHostRegister() == rhs.GetHostRegister())
{
if (lhs.GetHostRegister() != Xbyak::Operand::RAX)
EmitCopyValue(Xbyak::Operand::RAX, lhs);
DO_MUL(m_emit.rax);
}
else if (lhs.IsInHostRegister() && lhs.GetHostRegister() == Xbyak::Operand::RAX)
{
if (!rhs.IsInHostRegister())
{
EmitCopyValue(Xbyak::Operand::RDX, rhs);
DO_MUL(m_emit.rdx);
}
else
{
DO_MUL(GetHostReg64(rhs));
}
}
else if (rhs.IsInHostRegister() && rhs.GetHostRegister() == Xbyak::Operand::RAX)
{
if (!lhs.IsInHostRegister())
{
EmitCopyValue(Xbyak::Operand::RDX, lhs);
DO_MUL(m_emit.rdx);
}
else
{
DO_MUL(GetHostReg64(lhs));
}
}
else
{
if (lhs.IsInHostRegister())
{
EmitCopyValue(Xbyak::Operand::RAX, rhs);
if (lhs.size == RegSize_8)
signed_multiply ? m_emit.imul(GetHostReg8(lhs)) : m_emit.mul(GetHostReg8(lhs));
else if (lhs.size == RegSize_16)
signed_multiply ? m_emit.imul(GetHostReg16(lhs)) : m_emit.mul(GetHostReg16(lhs));
else if (lhs.size == RegSize_32)
signed_multiply ? m_emit.imul(GetHostReg32(lhs)) : m_emit.mul(GetHostReg32(lhs));
else
signed_multiply ? m_emit.imul(GetHostReg64(lhs)) : m_emit.mul(GetHostReg64(lhs));
}
else if (rhs.IsInHostRegister())
{
EmitCopyValue(Xbyak::Operand::RAX, lhs);
if (lhs.size == RegSize_8)
signed_multiply ? m_emit.imul(GetHostReg8(rhs)) : m_emit.mul(GetHostReg8(rhs));
else if (lhs.size == RegSize_16)
signed_multiply ? m_emit.imul(GetHostReg16(rhs)) : m_emit.mul(GetHostReg16(rhs));
else if (lhs.size == RegSize_32)
signed_multiply ? m_emit.imul(GetHostReg32(rhs)) : m_emit.mul(GetHostReg32(rhs));
else
signed_multiply ? m_emit.imul(GetHostReg64(rhs)) : m_emit.mul(GetHostReg64(rhs));
}
else
{
EmitCopyValue(Xbyak::Operand::RAX, lhs);
EmitCopyValue(Xbyak::Operand::RDX, rhs);
DO_MUL(m_emit.rdx);
}
}
#undef DO_MUL
if (to_reg_hi == Xbyak::Operand::RDX && to_reg_lo == Xbyak::Operand::RAX)
{
// ideal case: registers are the ones we want: don't have to do anything
}
else if (to_reg_hi == Xbyak::Operand::RAX && to_reg_lo == Xbyak::Operand::RDX)
{
// what we want, but swapped, so exchange them
m_emit.xchg(m_emit.rax, m_emit.rdx);
}
else
{
// store to the registers we want.. this could be optimized better
m_emit.push(m_emit.rdx);
m_emit.push(m_emit.rax);
m_emit.pop(GetHostReg64(to_reg_lo));
m_emit.pop(GetHostReg64(to_reg_hi));
}
// restore original contents
if (save_edx)
m_emit.pop(m_emit.rdx);
if (save_eax)
m_emit.pop(m_emit.rax);
}
void CodeGenerator::EmitCmp(HostReg to_reg, const Value& value)
{
DebugAssert(value.IsConstant() || value.IsInHostRegister());