From d2d0d5287bd933f7216bdc306d8ec999fd8c7013 Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Sat, 23 Nov 2019 01:20:59 +1000 Subject: [PATCH] CPU/Recompiler: Implement slt/sltu/slti/sltiu --- src/core/cpu_recompiler_code_generator.cpp | 58 +++++++++++-- src/core/cpu_recompiler_code_generator.h | 2 + .../cpu_recompiler_code_generator_x64.cpp | 87 ++++++++++++++++++- src/core/cpu_recompiler_types.h | 8 +- 4 files changed, 147 insertions(+), 8 deletions(-) diff --git a/src/core/cpu_recompiler_code_generator.cpp b/src/core/cpu_recompiler_code_generator.cpp index 882c6f3ac..a43ab7031 100644 --- a/src/core/cpu_recompiler_code_generator.cpp +++ b/src/core/cpu_recompiler_code_generator.cpp @@ -101,15 +101,20 @@ bool CodeGenerator::CompileInstruction(const CodeBlockInstruction& cbi) result = Compile_Branch(cbi); break; - case InstructionOp::lui: - result = Compile_lui(cbi); - break; - case InstructionOp::addi: case InstructionOp::addiu: result = Compile_Add(cbi); break; + case InstructionOp::slti: + case InstructionOp::sltiu: + result = Compile_SetLess(cbi); + break; + + case InstructionOp::lui: + result = Compile_lui(cbi); + break; + case InstructionOp::funct: { switch (cbi.instruction.r.funct) @@ -151,6 +156,11 @@ bool CodeGenerator::CompileInstruction(const CodeBlockInstruction& cbi) result = Compile_Multiply(cbi); break; + case InstructionFunct::slt: + case InstructionFunct::sltu: + result = Compile_SetLess(cbi); + break; + case InstructionFunct::jr: case InstructionFunct::jalr: result = Compile_Branch(cbi); @@ -1148,6 +1158,43 @@ bool CodeGenerator::Compile_Multiply(const CodeBlockInstruction& cbi) return true; } +bool CodeGenerator::Compile_SetLess(const CodeBlockInstruction& cbi) +{ + InstructionPrologue(cbi, 1); + + const bool signed_comparison = + (cbi.instruction.op == InstructionOp::slti || + (cbi.instruction.op == InstructionOp::funct && cbi.instruction.r.funct == InstructionFunct::slt)); + + Reg dest; + Value lhs, rhs; + if (cbi.instruction.op == InstructionOp::slti || cbi.instruction.op == InstructionOp::sltiu) + { + // rt <- rs < {z,s}ext(imm) + dest = cbi.instruction.i.rt; + lhs = m_register_cache.ReadGuestRegister(cbi.instruction.i.rs, true, true); + rhs = Value::FromConstantU32(cbi.instruction.i.imm_sext32()); + } + else + { + // rd <- rs < rt + dest = cbi.instruction.r.rd; + lhs = m_register_cache.ReadGuestRegister(cbi.instruction.r.rs, true, true); + rhs = m_register_cache.ReadGuestRegister(cbi.instruction.r.rt); + } + + // flush the old value which might free up a register + m_register_cache.InvalidateGuestRegister(dest); + + Value result = m_register_cache.AllocateScratch(RegSize_32); + EmitCmp(lhs.host_reg, rhs); + EmitSetConditionResult(result.host_reg, result.size, signed_comparison ? Condition::Less : Condition::Below); + m_register_cache.WriteGuestRegister(dest, std::move(result)); + + InstructionEpilogue(cbi); + return true; +} + bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi) { // Force sync since we branches are PC-relative. @@ -1211,7 +1258,7 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi) EmitCmp(lhs.host_reg, Value::FromConstantU32(0)); const Condition condition = - (cbi.instruction.op == InstructionOp::bgtz) ? Condition::Greater : Condition::LessOrEqual; + (cbi.instruction.op == InstructionOp::bgtz) ? Condition::Greater : Condition::LessEqual; EmitBranch(condition, Reg::count, false, std::move(branch_target)); } break; @@ -1253,4 +1300,5 @@ bool CodeGenerator::Compile_lui(const CodeBlockInstruction& cbi) InstructionEpilogue(cbi); return true; } + } // namespace CPU::Recompiler diff --git a/src/core/cpu_recompiler_code_generator.h b/src/core/cpu_recompiler_code_generator.h index c21668d11..31202a801 100644 --- a/src/core/cpu_recompiler_code_generator.h +++ b/src/core/cpu_recompiler_code_generator.h @@ -61,6 +61,7 @@ public: void EmitXor(HostReg to_reg, const Value& value); void EmitTest(HostReg to_reg, const Value& value); void EmitNot(HostReg to_reg, RegSize size); + void EmitSetConditionResult(HostReg to_reg, RegSize to_size, Condition condition); void EmitLoadGuestRegister(HostReg host_reg, Reg guest_reg); void EmitStoreGuestRegister(Reg guest_reg, const Value& value); @@ -181,6 +182,7 @@ private: bool Compile_Add(const CodeBlockInstruction& cbi); bool Compile_Subtract(const CodeBlockInstruction& cbi); bool Compile_Multiply(const CodeBlockInstruction& cbi); + bool Compile_SetLess(const CodeBlockInstruction& cbi); bool Compile_Branch(const CodeBlockInstruction& cbi); bool Compile_lui(const CodeBlockInstruction& cbi); diff --git a/src/core/cpu_recompiler_code_generator_x64.cpp b/src/core/cpu_recompiler_code_generator_x64.cpp index ac23c8dfb..9b16468b5 100644 --- a/src/core/cpu_recompiler_code_generator_x64.cpp +++ b/src/core/cpu_recompiler_code_generator_x64.cpp @@ -1081,6 +1081,75 @@ void CodeGenerator::EmitNot(HostReg to_reg, RegSize size) } } +void CodeGenerator::EmitSetConditionResult(HostReg to_reg, RegSize to_size, Condition condition) +{ + switch (condition) + { + case Condition::Always: + m_emit->mov(GetHostReg8(to_reg), 1); + break; + + case Condition::NotEqual: + m_emit->setne(GetHostReg8(to_reg)); + break; + + case Condition::Equal: + m_emit->sete(GetHostReg8(to_reg)); + break; + + case Condition::Overflow: + m_emit->seto(GetHostReg8(to_reg)); + break; + + case Condition::Greater: + m_emit->setg(GetHostReg8(to_reg)); + break; + + case Condition::GreaterEqual: + m_emit->setge(GetHostReg8(to_reg)); + break; + + case Condition::Less: + m_emit->setl(GetHostReg8(to_reg)); + break; + + case Condition::LessEqual: + m_emit->setle(GetHostReg8(to_reg)); + break; + + case Condition::Negative: + m_emit->sets(GetHostReg8(to_reg)); + break; + + case Condition::PositiveOrZero: + m_emit->setns(GetHostReg8(to_reg)); + break; + + case Condition::Above: + m_emit->seta(GetHostReg8(to_reg)); + break; + + case Condition::AboveEqual: + m_emit->setae(GetHostReg8(to_reg)); + break; + + case Condition::Below: + m_emit->setb(GetHostReg8(to_reg)); + break; + + case Condition::BelowEqual: + m_emit->setbe(GetHostReg8(to_reg)); + break; + + default: + UnreachableCode(); + break; + } + + if (to_size != RegSize_8) + EmitZeroExtend(to_reg, to_size, to_reg, RegSize_8); +} + u32 CodeGenerator::PrepareStackForCall() { // we assume that the stack is unaligned at this point @@ -1634,7 +1703,7 @@ static void EmitConditionalJump(Condition condition, bool invert, Xbyak::CodeGen invert ? emit->jnl(label) : emit->jl(label); break; - case Condition::LessOrEqual: + case Condition::LessEqual: invert ? emit->jnle(label) : emit->jle(label); break; @@ -1646,6 +1715,22 @@ static void EmitConditionalJump(Condition condition, bool invert, Xbyak::CodeGen invert ? emit->js(label) : emit->jns(label); break; + case Condition::Above: + invert ? emit->jna(label) : emit->ja(label); + break; + + case Condition::AboveEqual: + invert ? emit->jnae(label) : emit->jae(label); + break; + + case Condition::Below: + invert ? emit->jnb(label) : emit->jb(label); + break; + + case Condition::BelowEqual: + invert ? emit->jnbe(label) : emit->jbe(label); + break; + default: UnreachableCode(); break; diff --git a/src/core/cpu_recompiler_types.h b/src/core/cpu_recompiler_types.h index bdb6680b6..e1c84fb4f 100644 --- a/src/core/cpu_recompiler_types.h +++ b/src/core/cpu_recompiler_types.h @@ -24,7 +24,7 @@ enum RegSize : u8 RegSize_64, }; -enum class Condition: u8 +enum class Condition : u8 { Always, NotEqual, @@ -32,10 +32,14 @@ enum class Condition: u8 Overflow, Greater, GreaterEqual, - LessOrEqual, + LessEqual, Less, Negative, PositiveOrZero, + Above, // unsigned variant of Greater + AboveEqual, // unsigned variant of GreaterEqual + Below, // unsigned variant of Less + BelowEqual, // unsigned variant of LessEqual NotZero = NotEqual, Zero = Equal