CPU/Recompiler: Reduced forced PC flushes

This commit is contained in:
Connor McLaughlin 2019-12-28 14:04:15 +10:00
parent 620284f7d1
commit d118377c9e
7 changed files with 174 additions and 67 deletions

View file

@ -1,5 +1,6 @@
#include "cpu_recompiler_code_generator.h"
#include "YBaseLib/Log.h"
#include "cpu_core.h"
#include "cpu_disasm.h"
Log_SetChannel(CPU::Recompiler);
@ -788,8 +789,12 @@ Value CodeGenerator::NotValue(const Value& val)
return res;
}
void CodeGenerator::GenerateExceptionExit(Exception excode, Condition condition /* = Condition::Always */)
void CodeGenerator::GenerateExceptionExit(const CodeBlockInstruction& cbi, Exception excode,
Condition condition /* = Condition::Always */)
{
const Value epc = Value::FromConstantU32(cbi.pc);
const Value ri_bits = Value::FromConstantU32(Thunks::MakeRaiseExceptionInfo(excode, cbi));
if (condition == Condition::Always)
{
// no need to use far code if we're always raising the exception
@ -797,8 +802,7 @@ void CodeGenerator::GenerateExceptionExit(Exception excode, Condition condition
m_register_cache.FlushAllGuestRegisters(true, true);
m_register_cache.FlushLoadDelay(true);
EmitFunctionCall(nullptr, &Thunks::RaiseException, m_register_cache.GetCPUPtr(),
Value::FromConstantU8(static_cast<u8>(excode)));
EmitFunctionCall(nullptr, &Thunks::RaiseException, m_register_cache.GetCPUPtr(), epc, ri_bits);
return;
}
@ -810,8 +814,7 @@ void CodeGenerator::GenerateExceptionExit(Exception excode, Condition condition
EmitBranch(GetCurrentFarCodePointer());
SwitchToFarCode();
EmitFunctionCall(nullptr, &Thunks::RaiseException, m_register_cache.GetCPUPtr(),
Value::FromConstantU8(static_cast<u8>(excode)));
EmitFunctionCall(nullptr, &Thunks::RaiseException, m_register_cache.GetCPUPtr(), epc, ri_bits);
EmitExceptionExit();
SwitchToNearCode();
@ -842,7 +845,7 @@ void CodeGenerator::BlockEpilogue()
if (m_register_cache.HasLoadDelay())
m_register_cache.WriteLoadDelayToCPU(true);
AddPendingCycles();
AddPendingCycles(true);
}
void CodeGenerator::InstructionPrologue(const CodeBlockInstruction& cbi, TickCount cycles,
@ -881,7 +884,7 @@ void CodeGenerator::InstructionPrologue(const CodeBlockInstruction& cbi, TickCou
m_register_cache.WriteGuestRegister(Reg::pc, Value::FromConstantU32(cbi.pc + 4));
}
if (!CanInstructionTrap(cbi.instruction, m_block->key.user_mode) && !force_sync)
if (!force_sync)
{
// Defer updates for non-faulting instructions.
m_delayed_cycles_add += cycles;
@ -895,11 +898,9 @@ void CodeGenerator::InstructionPrologue(const CodeBlockInstruction& cbi, TickCou
m_current_instruction_in_branch_delay_slot_dirty = true;
}
// Sync current instruction PC
EmitStoreCPUStructField(offsetof(Core, m_current_instruction_pc), Value::FromConstantU32(cbi.pc));
m_delayed_cycles_add += cycles;
AddPendingCycles();
SetCurrentInstructionPC(cbi);
AddPendingCycles(true);
}
void CodeGenerator::InstructionEpilogue(const CodeBlockInstruction& cbi)
@ -925,13 +926,20 @@ void CodeGenerator::InstructionEpilogue(const CodeBlockInstruction& cbi)
}
}
void CodeGenerator::AddPendingCycles()
void CodeGenerator::AddPendingCycles(bool commit)
{
if (m_delayed_cycles_add == 0)
return;
EmitAddCPUStructField(offsetof(Core, m_pending_ticks), Value::FromConstantU32(m_delayed_cycles_add));
m_delayed_cycles_add = 0;
if (commit)
m_delayed_cycles_add = 0;
}
void CodeGenerator::SetCurrentInstructionPC(const CodeBlockInstruction& cbi)
{
EmitStoreCPUStructField(offsetof(Core, m_current_instruction_pc), Value::FromConstantU32(cbi.pc));
}
bool CodeGenerator::Compile_Fallback(const CodeBlockInstruction& cbi)
@ -1107,18 +1115,18 @@ bool CodeGenerator::Compile_Load(const CodeBlockInstruction& cbi)
{
case InstructionOp::lb:
case InstructionOp::lbu:
result = EmitLoadGuestMemory(address, RegSize_8);
result = EmitLoadGuestMemory(cbi, address, RegSize_8);
ConvertValueSizeInPlace(&result, RegSize_32, (cbi.instruction.op == InstructionOp::lb));
break;
case InstructionOp::lh:
case InstructionOp::lhu:
result = EmitLoadGuestMemory(address, RegSize_16);
result = EmitLoadGuestMemory(cbi, address, RegSize_16);
ConvertValueSizeInPlace(&result, RegSize_32, (cbi.instruction.op == InstructionOp::lh));
break;
case InstructionOp::lw:
result = EmitLoadGuestMemory(address, RegSize_32);
result = EmitLoadGuestMemory(cbi, address, RegSize_32);
break;
default:
@ -1145,15 +1153,15 @@ bool CodeGenerator::Compile_Store(const CodeBlockInstruction& cbi)
switch (cbi.instruction.op)
{
case InstructionOp::sb:
EmitStoreGuestMemory(address, value.ViewAsSize(RegSize_8));
EmitStoreGuestMemory(cbi, address, value.ViewAsSize(RegSize_8));
break;
case InstructionOp::sh:
EmitStoreGuestMemory(address, value.ViewAsSize(RegSize_16));
EmitStoreGuestMemory(cbi, address, value.ViewAsSize(RegSize_16));
break;
case InstructionOp::sw:
EmitStoreGuestMemory(address, value);
EmitStoreGuestMemory(cbi, address, value);
break;
default:
@ -1234,7 +1242,7 @@ bool CodeGenerator::Compile_Add(const CodeBlockInstruction& cbi)
Value result = AddValues(lhs, rhs, check_overflow);
if (check_overflow)
GenerateExceptionExit(Exception::Ov, Condition::Overflow);
GenerateExceptionExit(cbi, Exception::Ov, Condition::Overflow);
m_register_cache.WriteGuestRegister(dest, std::move(result));
@ -1254,7 +1262,7 @@ bool CodeGenerator::Compile_Subtract(const CodeBlockInstruction& cbi)
Value result = SubValues(lhs, rhs, check_overflow);
if (check_overflow)
GenerateExceptionExit(Exception::Ov, Condition::Overflow);
GenerateExceptionExit(cbi, Exception::Ov, Condition::Overflow);
m_register_cache.WriteGuestRegister(cbi.instruction.r.rd, std::move(result));
@ -1440,7 +1448,7 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
{
const Exception excode =
(cbi.instruction.r.funct == InstructionFunct::syscall) ? Exception::Syscall : Exception::BP;
GenerateExceptionExit(excode);
GenerateExceptionExit(cbi, excode);
}
else
{
@ -1785,13 +1793,13 @@ bool CodeGenerator::Compile_cop2(const CodeBlockInstruction& cbi)
Value::FromConstantU32(cbi.instruction.i.imm_sext32()), false);
if (cbi.instruction.op == InstructionOp::lwc2)
{
Value value = EmitLoadGuestMemory(address, RegSize_32);
Value value = EmitLoadGuestMemory(cbi, address, RegSize_32);
DoGTERegisterWrite(reg, value);
}
else
{
Value value = DoGTERegisterRead(reg);
EmitStoreGuestMemory(address, value);
EmitStoreGuestMemory(cbi, address, value);
}
InstructionEpilogue(cbi);

View file

@ -75,8 +75,8 @@ public:
void EmitAddCPUStructField(u32 offset, const Value& value);
// Automatically generates an exception handler.
Value EmitLoadGuestMemory(const Value& address, RegSize size);
void EmitStoreGuestMemory(const Value& address, const Value& value);
Value EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const Value& address, RegSize size);
void EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const Value& address, const Value& value);
// Unconditional branch to pointer. May allocate a scratch register.
void EmitBranch(const void* address, bool allow_scratch = true);
@ -148,7 +148,8 @@ public:
Value NotValue(const Value& val);
// Raising exception if condition is true.
void GenerateExceptionExit(Exception excode, Condition condition = Condition::Always);
void GenerateExceptionExit(const CodeBlockInstruction& cbi, Exception excode,
Condition condition = Condition::Always);
private:
// Host register setup
@ -173,7 +174,8 @@ private:
void BlockEpilogue();
void InstructionPrologue(const CodeBlockInstruction& cbi, TickCount cycles, bool force_sync = false);
void InstructionEpilogue(const CodeBlockInstruction& cbi);
void AddPendingCycles();
void SetCurrentInstructionPC(const CodeBlockInstruction& cbi);
void AddPendingCycles(bool commit);
Value DoGTERegisterRead(u32 index);
void DoGTERegisterWrite(u32 index, const Value& value);

View file

@ -1,6 +1,7 @@
#include "YBaseLib/Log.h"
#include "cpu_recompiler_code_generator.h"
#include "cpu_recompiler_thunks.h"
#include "cpu_core.h"
Log_SetChannel(CPU::Recompiler);
namespace a64 = vixl::aarch64;
@ -1209,8 +1210,11 @@ void CodeGenerator::EmitAddCPUStructField(u32 offset, const Value& value)
}
}
Value CodeGenerator::EmitLoadGuestMemory(const Value& address, RegSize size)
Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const Value& address, RegSize size)
{
const Value pc = Value::FromConstantU32(cbi.pc);
AddPendingCycles(true);
// We need to use the full 64 bits here since we test the sign bit result.
Value result = m_register_cache.AllocateScratch(RegSize_64);
@ -1218,15 +1222,15 @@ Value CodeGenerator::EmitLoadGuestMemory(const Value& address, RegSize size)
switch (size)
{
case RegSize_8:
EmitFunctionCall(&result, &Thunks::ReadMemoryByte, m_register_cache.GetCPUPtr(), address);
EmitFunctionCall(&result, &Thunks::ReadMemoryByte, m_register_cache.GetCPUPtr(), pc, address);
break;
case RegSize_16:
EmitFunctionCall(&result, &Thunks::ReadMemoryHalfWord, m_register_cache.GetCPUPtr(), address);
EmitFunctionCall(&result, &Thunks::ReadMemoryHalfWord, m_register_cache.GetCPUPtr(), pc, address);
break;
case RegSize_32:
EmitFunctionCall(&result, &Thunks::ReadMemoryWord, m_register_cache.GetCPUPtr(), address);
EmitFunctionCall(&result, &Thunks::ReadMemoryWord, m_register_cache.GetCPUPtr(), pc, address);
break;
default:
@ -1271,22 +1275,25 @@ Value CodeGenerator::EmitLoadGuestMemory(const Value& address, RegSize size)
return result;
}
void CodeGenerator::EmitStoreGuestMemory(const Value& address, const Value& value)
void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const Value& address, const Value& value)
{
const Value pc = Value::FromConstantU32(cbi.pc);
AddPendingCycles(true);
Value result = m_register_cache.AllocateScratch(RegSize_8);
switch (value.size)
{
case RegSize_8:
EmitFunctionCall(&result, &Thunks::WriteMemoryByte, m_register_cache.GetCPUPtr(), address, value);
EmitFunctionCall(&result, &Thunks::WriteMemoryByte, m_register_cache.GetCPUPtr(), pc, address, value);
break;
case RegSize_16:
EmitFunctionCall(&result, &Thunks::WriteMemoryHalfWord, m_register_cache.GetCPUPtr(), address, value);
EmitFunctionCall(&result, &Thunks::WriteMemoryHalfWord, m_register_cache.GetCPUPtr(), pc, address, value);
break;
case RegSize_32:
EmitFunctionCall(&result, &Thunks::WriteMemoryWord, m_register_cache.GetCPUPtr(), address, value);
EmitFunctionCall(&result, &Thunks::WriteMemoryWord, m_register_cache.GetCPUPtr(), pc, address, value);
break;
default:

View file

@ -1,3 +1,4 @@
#include "cpu_core.h"
#include "cpu_recompiler_code_generator.h"
namespace CPU::Recompiler {

View file

@ -1,6 +1,7 @@
#include "YBaseLib/Log.h"
#include "cpu_recompiler_code_generator.h"
#include "cpu_recompiler_thunks.h"
#include "cpu_core.h"
Log_SetChannel(CPU::Recompiler);
namespace CPU::Recompiler {
@ -201,6 +202,8 @@ void CodeGenerator::EmitEndBlock()
void CodeGenerator::EmitExceptionExit()
{
AddPendingCycles(false);
// toss away our PC value since we're jumping to the exception handler
m_register_cache.InvalidateGuestRegister(Reg::pc);
@ -1632,8 +1635,11 @@ void CodeGenerator::EmitAddCPUStructField(u32 offset, const Value& value)
}
}
Value CodeGenerator::EmitLoadGuestMemory(const Value& address, RegSize size)
Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const Value& address, RegSize size)
{
const Value pc = Value::FromConstantU32(cbi.pc);
AddPendingCycles(true);
// We need to use the full 64 bits here since we test the sign bit result.
Value result = m_register_cache.AllocateScratch(RegSize_64);
@ -1641,15 +1647,15 @@ Value CodeGenerator::EmitLoadGuestMemory(const Value& address, RegSize size)
switch (size)
{
case RegSize_8:
EmitFunctionCall(&result, &Thunks::ReadMemoryByte, m_register_cache.GetCPUPtr(), address);
EmitFunctionCall(&result, &Thunks::ReadMemoryByte, m_register_cache.GetCPUPtr(), pc, address);
break;
case RegSize_16:
EmitFunctionCall(&result, &Thunks::ReadMemoryHalfWord, m_register_cache.GetCPUPtr(), address);
EmitFunctionCall(&result, &Thunks::ReadMemoryHalfWord, m_register_cache.GetCPUPtr(), pc, address);
break;
case RegSize_32:
EmitFunctionCall(&result, &Thunks::ReadMemoryWord, m_register_cache.GetCPUPtr(), address);
EmitFunctionCall(&result, &Thunks::ReadMemoryWord, m_register_cache.GetCPUPtr(), pc, address);
break;
default:
@ -1692,22 +1698,25 @@ Value CodeGenerator::EmitLoadGuestMemory(const Value& address, RegSize size)
return result;
}
void CodeGenerator::EmitStoreGuestMemory(const Value& address, const Value& value)
void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const Value& address, const Value& value)
{
const Value pc = Value::FromConstantU32(cbi.pc);
AddPendingCycles(true);
Value result = m_register_cache.AllocateScratch(RegSize_8);
switch (value.size)
{
case RegSize_8:
EmitFunctionCall(&result, &Thunks::WriteMemoryByte, m_register_cache.GetCPUPtr(), address, value);
EmitFunctionCall(&result, &Thunks::WriteMemoryByte, m_register_cache.GetCPUPtr(), pc, address, value);
break;
case RegSize_16:
EmitFunctionCall(&result, &Thunks::WriteMemoryHalfWord, m_register_cache.GetCPUPtr(), address, value);
EmitFunctionCall(&result, &Thunks::WriteMemoryHalfWord, m_register_cache.GetCPUPtr(), pc, address, value);
break;
case RegSize_32:
EmitFunctionCall(&result, &Thunks::WriteMemoryWord, m_register_cache.GetCPUPtr(), address, value);
EmitFunctionCall(&result, &Thunks::WriteMemoryWord, m_register_cache.GetCPUPtr(), pc, address, value);
break;
default:

View file

@ -1,11 +1,24 @@
#include "cpu_recompiler_thunks.h"
#include "cpu_code_cache.h"
#include "cpu_core.h"
namespace CPU::Recompiler {
u32 Thunks::MakeRaiseExceptionInfo(Exception excode, const CodeBlockInstruction& cbi)
{
RaiseExceptionInfo ri = {};
ri.excode = static_cast<u8>(excode);
ri.BD = cbi.is_branch_delay_slot;
ri.CE = cbi.instruction.cop.cop_n;
return ri.bits;
}
// TODO: Port thunks to "ASM routines", i.e. code in the jit buffer.
u64 Thunks::ReadMemoryByte(Core* cpu, u32 address)
u64 Thunks::ReadMemoryByte(Core* cpu, u32 pc, u32 address)
{
cpu->m_current_instruction_pc = pc;
u32 temp = 0;
const TickCount cycles = cpu->DoMemoryAccess<MemoryAccessType::Read, MemoryAccessSize::Byte>(address, temp);
if (cycles < 0)
@ -18,8 +31,10 @@ u64 Thunks::ReadMemoryByte(Core* cpu, u32 address)
return ZeroExtend64(temp);
}
u64 Thunks::ReadMemoryHalfWord(Core* cpu, u32 address)
u64 Thunks::ReadMemoryHalfWord(Core* cpu, u32 pc, u32 address)
{
cpu->m_current_instruction_pc = pc;
if (!cpu->DoAlignmentCheck<MemoryAccessType::Read, MemoryAccessSize::HalfWord>(address))
return UINT64_C(0xFFFFFFFFFFFFFFFF);
@ -35,8 +50,10 @@ u64 Thunks::ReadMemoryHalfWord(Core* cpu, u32 address)
return ZeroExtend64(temp);
}
u64 Thunks::ReadMemoryWord(Core* cpu, u32 address)
u64 Thunks::ReadMemoryWord(Core* cpu, u32 pc, u32 address)
{
cpu->m_current_instruction_pc = pc;
if (!cpu->DoAlignmentCheck<MemoryAccessType::Read, MemoryAccessSize::Word>(address))
return UINT64_C(0xFFFFFFFFFFFFFFFF);
@ -52,19 +69,57 @@ u64 Thunks::ReadMemoryWord(Core* cpu, u32 address)
return ZeroExtend64(temp);
}
bool Thunks::WriteMemoryByte(Core* cpu, u32 address, u8 value)
bool Thunks::WriteMemoryByte(Core* cpu, u32 pc, u32 address, u8 value)
{
return cpu->WriteMemoryByte(address, value);
cpu->m_current_instruction_pc = pc;
u32 temp = ZeroExtend32(value);
const TickCount cycles = cpu->DoMemoryAccess<MemoryAccessType::Write, MemoryAccessSize::Byte>(address, temp);
if (cycles < 0)
{
cpu->RaiseException(Exception::DBE);
return false;
}
DebugAssert(cycles == 0);
return true;
}
bool Thunks::WriteMemoryHalfWord(Core* cpu, u32 address, u16 value)
bool Thunks::WriteMemoryHalfWord(Core* cpu, u32 pc, u32 address, u16 value)
{
return cpu->WriteMemoryHalfWord(address, value);
cpu->m_current_instruction_pc = pc;
if (!cpu->DoAlignmentCheck<MemoryAccessType::Write, MemoryAccessSize::HalfWord>(address))
return false;
u32 temp = ZeroExtend32(value);
const TickCount cycles = cpu->DoMemoryAccess<MemoryAccessType::Write, MemoryAccessSize::HalfWord>(address, temp);
if (cycles < 0)
{
cpu->RaiseException(Exception::DBE);
return false;
}
DebugAssert(cycles == 0);
return true;
}
bool Thunks::WriteMemoryWord(Core* cpu, u32 address, u32 value)
bool Thunks::WriteMemoryWord(Core* cpu, u32 pc, u32 address, u32 value)
{
return cpu->WriteMemoryWord(address, value);
cpu->m_current_instruction_pc = pc;
if (!cpu->DoAlignmentCheck<MemoryAccessType::Write, MemoryAccessSize::Word>(address))
return false;
const TickCount cycles = cpu->DoMemoryAccess<MemoryAccessType::Write, MemoryAccessSize::Word>(address, value);
if (cycles < 0)
{
cpu->RaiseException(Exception::DBE);
return false;
}
DebugAssert(cycles == 0);
return true;
}
bool Thunks::InterpretInstruction(Core* cpu)
@ -78,9 +133,11 @@ void Thunks::UpdateLoadDelay(Core* cpu)
cpu->UpdateLoadDelay();
}
void Thunks::RaiseException(Core* cpu, u8 excode)
void Thunks::RaiseException(Core* cpu, u32 epc, u32 ri_bits)
{
cpu->RaiseException(static_cast<Exception>(excode));
const RaiseExceptionInfo ri{ri_bits};
cpu->RaiseException(static_cast<Exception>(ri.excode), epc, ri.BD, cpu->m_current_instruction_was_branch_taken,
ri.CE);
}
void Thunks::RaiseAddressException(Core* cpu, u32 address, bool store, bool branch)

View file

@ -1,27 +1,48 @@
#pragma once
#include "common/jit_code_buffer.h"
#include "cpu_core.h"
#include <array>
#include "cpu_types.h"
namespace CPU::Recompiler {
class JitCodeBuffer;
namespace CPU {
struct CodeBlockInstruction;
class Core;
namespace Recompiler {
class Thunks
{
public:
union RaiseExceptionInfo
{
u32 bits;
struct
{
u8 excode;
bool BD;
u8 CE;
u8 unused;
};
};
static u32 MakeRaiseExceptionInfo(Exception excode, const CodeBlockInstruction& cbi);
//////////////////////////////////////////////////////////////////////////
// Trampolines for calling back from the JIT
// Needed because we can't cast member functions to void*...
// TODO: Abuse carry flag or something else for exception
//////////////////////////////////////////////////////////////////////////
static u64 ReadMemoryByte(Core* cpu, u32 address);
static u64 ReadMemoryHalfWord(Core* cpu, u32 address);
static u64 ReadMemoryWord(Core* cpu, u32 address);
static bool WriteMemoryByte(Core* cpu, u32 address, u8 value);
static bool WriteMemoryHalfWord(Core* cpu, u32 address, u16 value);
static bool WriteMemoryWord(Core* cpu, u32 address, u32 value);
static u64 ReadMemoryByte(Core* cpu, u32 pc, u32 address);
static u64 ReadMemoryHalfWord(Core* cpu, u32 pc, u32 address);
static u64 ReadMemoryWord(Core* cpu, u32 pc, u32 address);
static bool WriteMemoryByte(Core* cpu, u32 pc, u32 address, u8 value);
static bool WriteMemoryHalfWord(Core* cpu, u32 pc, u32 address, u16 value);
static bool WriteMemoryWord(Core* cpu, u32 pc, u32 address, u32 value);
static bool InterpretInstruction(Core* cpu);
static void UpdateLoadDelay(Core* cpu);
static void RaiseException(Core* cpu, u8 excode);
static void RaiseException(Core* cpu, u32 epc, u32 ri_bits);
static void RaiseAddressException(Core* cpu, u32 address, bool store, bool branch);
static void ExecuteGTEInstruction(Core* cpu, u32 instruction_bits);
static u32 ReadGTERegister(Core* cpu, u32 reg);
@ -41,4 +62,6 @@ public:
void Generate(JitCodeBuffer* code_buffer);
};
} // namespace CPU_X86::Recompiler
} // namespace Recompiler
} // namespace CPU