From e3965d9be3da698e55f23e1be5b0f01d4b7b8840 Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Sun, 24 Nov 2019 16:47:18 +1000 Subject: [PATCH] CPU/Recompiler: Fix b{ltz,gez}al when using a load delayed register --- src/core/cpu_recompiler_code_generator.cpp | 23 ++++++++++++++----- src/core/cpu_recompiler_code_generator.h | 2 +- .../cpu_recompiler_code_generator_x64.cpp | 14 ++--------- 3 files changed, 20 insertions(+), 19 deletions(-) diff --git a/src/core/cpu_recompiler_code_generator.cpp b/src/core/cpu_recompiler_code_generator.cpp index 952ffdafa..d6fea1c79 100644 --- a/src/core/cpu_recompiler_code_generator.cpp +++ b/src/core/cpu_recompiler_code_generator.cpp @@ -93,7 +93,7 @@ bool CodeGenerator::CompileInstruction(const CodeBlockInstruction& cbi) case InstructionOp::j: case InstructionOp::jal: - //case InstructionOp::b: + case InstructionOp::b: case InstructionOp::beq: case InstructionOp::bne: case InstructionOp::bgtz: @@ -1253,7 +1253,7 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi) OrValues(AndValues(m_register_cache.ReadGuestRegister(Reg::pc, false), Value::FromConstantU32(0xF0000000)), Value::FromConstantU32(cbi.instruction.j.target << 2)); - EmitBranch(Condition::Always, (cbi.instruction.op == InstructionOp::jal) ? Reg::ra : Reg::count, false, + EmitBranch(Condition::Always, (cbi.instruction.op == InstructionOp::jal) ? Reg::ra : Reg::count, std::move(branch_target)); } break; @@ -1265,7 +1265,7 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi) // npc = rs, link to rt Value branch_target = m_register_cache.ReadGuestRegister(cbi.instruction.r.rs); EmitBranch(Condition::Always, - (cbi.instruction.r.funct == InstructionFunct::jalr) ? cbi.instruction.r.rd : Reg::count, false, + (cbi.instruction.r.funct == InstructionFunct::jalr) ? cbi.instruction.r.rd : Reg::count, std::move(branch_target)); } else if (cbi.instruction.r.funct == InstructionFunct::syscall || @@ -1295,7 +1295,7 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi) EmitCmp(lhs.host_reg, rhs); const Condition condition = (cbi.instruction.op == InstructionOp::beq) ? Condition::Equal : Condition::NotEqual; - EmitBranch(condition, Reg::count, false, std::move(branch_target)); + EmitBranch(condition, Reg::count, std::move(branch_target)); } break; @@ -1312,7 +1312,7 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi) const Condition condition = (cbi.instruction.op == InstructionOp::bgtz) ? Condition::Greater : Condition::LessEqual; - EmitBranch(condition, Reg::count, false, std::move(branch_target)); + EmitBranch(condition, Reg::count, std::move(branch_target)); } break; @@ -1327,9 +1327,20 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi) const Condition condition = bgez ? Condition::PositiveOrZero : Condition::Negative; const bool link = (rt & u8(0x1E)) == u8(0x10); + // Read has to happen before the link as the compare can use ra. + // This is a little dangerous since lhs can get freed, but there aren't any allocations inbetween here and the + // test so it shouldn't be an issue. Value lhs = m_register_cache.ReadGuestRegister(cbi.instruction.i.rs, true, true); + + // The return address is always written if link is set, regardless of whether the branch is taken. + if (link) + { + EmitCancelInterpreterLoadDelayForReg(Reg::ra); + m_register_cache.WriteGuestRegister(Reg::ra, m_register_cache.ReadGuestRegister(Reg::npc, false)); + } + EmitTest(lhs.host_reg, lhs); - EmitBranch(condition, link ? Reg::ra : Reg::count, link, std::move(branch_target)); + EmitBranch(condition, Reg::count, std::move(branch_target)); } break; diff --git a/src/core/cpu_recompiler_code_generator.h b/src/core/cpu_recompiler_code_generator.h index 05f639d5c..cfaac15fb 100644 --- a/src/core/cpu_recompiler_code_generator.h +++ b/src/core/cpu_recompiler_code_generator.h @@ -78,7 +78,7 @@ public: void EmitStoreGuestMemory(const Value& address, const Value& value); // Branching, generates two paths. - void EmitBranch(Condition condition, Reg lr_reg, bool always_link, Value&& branch_target); + void EmitBranch(Condition condition, Reg lr_reg, Value&& branch_target); // Raising exception if condition is true. void EmitRaiseException(Exception excode, Condition condition = Condition::Always); diff --git a/src/core/cpu_recompiler_code_generator_x64.cpp b/src/core/cpu_recompiler_code_generator_x64.cpp index c304fc6c2..68672a877 100644 --- a/src/core/cpu_recompiler_code_generator_x64.cpp +++ b/src/core/cpu_recompiler_code_generator_x64.cpp @@ -1742,22 +1742,12 @@ static void EmitConditionalJump(Condition condition, bool invert, Xbyak::CodeGen } } -void CodeGenerator::EmitBranch(Condition condition, Reg lr_reg, bool always_link, Value&& branch_target) +void CodeGenerator::EmitBranch(Condition condition, Reg lr_reg, Value&& branch_target) { // we have to always read the old PC.. when we can push/pop the register cache state this won't be needed Value old_npc; if (lr_reg != Reg::count) - { old_npc = m_register_cache.ReadGuestRegister(Reg::npc, false, true); - if (always_link) - { - // Can't cache because we have two branches. Load delay cancel is due to the immediate flush afterwards, - // if we don't cancel it, at the end of the instruction the value we write can be overridden. - EmitCancelInterpreterLoadDelayForReg(lr_reg); - m_register_cache.WriteGuestRegister(lr_reg, std::move(old_npc)); - m_register_cache.FlushGuestRegister(lr_reg, true, true); - } - } // condition is inverted because we want the case for skipping it Xbyak::Label skip_branch; @@ -1765,7 +1755,7 @@ void CodeGenerator::EmitBranch(Condition condition, Reg lr_reg, bool always_link EmitConditionalJump(condition, true, m_emit, skip_branch); // save the old PC if we want to - if (lr_reg != Reg::count && !always_link) + if (lr_reg != Reg::count) { // Can't cache because we have two branches. Load delay cancel is due to the immediate flush afterwards, // if we don't cancel it, at the end of the instruction the value we write can be overridden.