mirror of
https://github.com/RetroDECK/Duckstation.git
synced 2024-11-29 17:15:40 +00:00
CPU: Refactor load delay handling
Now works when mixing interpreter and recompiler code.
This commit is contained in:
parent
1d6c4a3af1
commit
5217088d82
|
@ -4,6 +4,9 @@
|
||||||
#include "cpu_disasm.h"
|
#include "cpu_disasm.h"
|
||||||
#include "cpu_recompiler_code_generator.h"
|
#include "cpu_recompiler_code_generator.h"
|
||||||
#include "cpu_recompiler_thunks.h"
|
#include "cpu_recompiler_thunks.h"
|
||||||
|
#include "system.h"
|
||||||
|
#include <thread>
|
||||||
|
#include <chrono>
|
||||||
Log_SetChannel(CPU::CodeCache);
|
Log_SetChannel(CPU::CodeCache);
|
||||||
|
|
||||||
namespace CPU {
|
namespace CPU {
|
||||||
|
@ -15,8 +18,9 @@ CodeCache::CodeCache() = default;
|
||||||
|
|
||||||
CodeCache::~CodeCache() = default;
|
CodeCache::~CodeCache() = default;
|
||||||
|
|
||||||
void CodeCache::Initialize(Core* core, Bus* bus)
|
void CodeCache::Initialize(System* system, Core* core, Bus* bus)
|
||||||
{
|
{
|
||||||
|
m_system = system;
|
||||||
m_core = core;
|
m_core = core;
|
||||||
m_bus = bus;
|
m_bus = bus;
|
||||||
|
|
||||||
|
@ -122,9 +126,12 @@ bool CodeCache::CompileBlock(CodeBlock* block)
|
||||||
}
|
}
|
||||||
|
|
||||||
cbi.pc = pc;
|
cbi.pc = pc;
|
||||||
cbi.is_branch = IsBranchInstruction(cbi.instruction);
|
|
||||||
cbi.is_branch_delay_slot = is_branch_delay_slot;
|
cbi.is_branch_delay_slot = is_branch_delay_slot;
|
||||||
cbi.is_load_delay_slot = is_load_delay_slot;
|
cbi.is_load_delay_slot = is_load_delay_slot;
|
||||||
|
cbi.is_branch_instruction = IsBranchInstruction(cbi.instruction);
|
||||||
|
cbi.is_load_instruction = IsMemoryLoadInstruction(cbi.instruction);
|
||||||
|
cbi.is_store_instruction = IsMemoryStoreInstruction(cbi.instruction);
|
||||||
|
cbi.has_load_delay = InstructionHasLoadDelay(cbi.instruction);
|
||||||
cbi.can_trap = CanInstructionTrap(cbi.instruction, m_core->InUserMode());
|
cbi.can_trap = CanInstructionTrap(cbi.instruction, m_core->InUserMode());
|
||||||
|
|
||||||
// instruction is decoded now
|
// instruction is decoded now
|
||||||
|
@ -133,11 +140,14 @@ bool CodeCache::CompileBlock(CodeBlock* block)
|
||||||
|
|
||||||
// if we're in a branch delay slot, the block is now done
|
// if we're in a branch delay slot, the block is now done
|
||||||
// except if this is a branch in a branch delay slot, then we grab the one after that, and so on...
|
// except if this is a branch in a branch delay slot, then we grab the one after that, and so on...
|
||||||
if (is_branch_delay_slot && !cbi.is_branch)
|
if (is_branch_delay_slot && !cbi.is_branch_instruction)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// if this is a branch, we grab the next instruction (delay slot), and then exit
|
// if this is a branch, we grab the next instruction (delay slot), and then exit
|
||||||
is_branch_delay_slot = cbi.is_branch;
|
is_branch_delay_slot = cbi.is_branch_instruction;
|
||||||
|
|
||||||
|
// same for load delay
|
||||||
|
is_load_delay_slot = cbi.has_load_delay;
|
||||||
|
|
||||||
// is this a non-branchy exit? (e.g. syscall)
|
// is this a non-branchy exit? (e.g. syscall)
|
||||||
if (IsExitBlockInstruction(cbi.instruction))
|
if (IsExitBlockInstruction(cbi.instruction))
|
||||||
|
@ -254,10 +264,7 @@ void CodeCache::InterpretCachedBlock(const CodeBlock& block)
|
||||||
m_core->ExecuteInstruction();
|
m_core->ExecuteInstruction();
|
||||||
|
|
||||||
// next load delay
|
// next load delay
|
||||||
m_core->m_load_delay_reg = m_core->m_next_load_delay_reg;
|
m_core->UpdateLoadDelay();
|
||||||
m_core->m_next_load_delay_reg = Reg::count;
|
|
||||||
m_core->m_load_delay_old_value = m_core->m_next_load_delay_old_value;
|
|
||||||
m_core->m_next_load_delay_old_value = 0;
|
|
||||||
|
|
||||||
if (m_core->m_exception_raised)
|
if (m_core->m_exception_raised)
|
||||||
break;
|
break;
|
||||||
|
@ -294,10 +301,7 @@ void CodeCache::InterpretUncachedBlock()
|
||||||
m_core->ExecuteInstruction();
|
m_core->ExecuteInstruction();
|
||||||
|
|
||||||
// next load delay
|
// next load delay
|
||||||
m_core->m_load_delay_reg = m_core->m_next_load_delay_reg;
|
m_core->UpdateLoadDelay();
|
||||||
m_core->m_next_load_delay_reg = Reg::count;
|
|
||||||
m_core->m_load_delay_old_value = m_core->m_next_load_delay_old_value;
|
|
||||||
m_core->m_next_load_delay_old_value = 0;
|
|
||||||
|
|
||||||
const bool branch = IsBranchInstruction(m_core->m_current_instruction);
|
const bool branch = IsBranchInstruction(m_core->m_current_instruction);
|
||||||
if (m_core->m_exception_raised || (!branch && in_branch_delay_slot) ||
|
if (m_core->m_exception_raised || (!branch && in_branch_delay_slot) ||
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
class JitCodeBuffer;
|
class JitCodeBuffer;
|
||||||
|
|
||||||
class Bus;
|
class Bus;
|
||||||
|
class System;
|
||||||
|
|
||||||
namespace CPU {
|
namespace CPU {
|
||||||
class Core;
|
class Core;
|
||||||
|
@ -23,7 +24,7 @@ public:
|
||||||
CodeCache();
|
CodeCache();
|
||||||
~CodeCache();
|
~CodeCache();
|
||||||
|
|
||||||
void Initialize(Core* core, Bus* bus);
|
void Initialize(System* system, Core* core, Bus* bus);
|
||||||
void Reset();
|
void Reset();
|
||||||
void Execute();
|
void Execute();
|
||||||
|
|
||||||
|
@ -39,6 +40,7 @@ private:
|
||||||
void InterpretCachedBlock(const CodeBlock& block);
|
void InterpretCachedBlock(const CodeBlock& block);
|
||||||
void InterpretUncachedBlock();
|
void InterpretUncachedBlock();
|
||||||
|
|
||||||
|
System* m_system;
|
||||||
Core* m_core;
|
Core* m_core;
|
||||||
Bus* m_bus;
|
Bus* m_bus;
|
||||||
|
|
||||||
|
|
|
@ -97,8 +97,10 @@ bool Core::DoState(StateWrapper& sw)
|
||||||
sw.Do(&m_next_instruction_is_branch_delay_slot);
|
sw.Do(&m_next_instruction_is_branch_delay_slot);
|
||||||
sw.Do(&m_branch_was_taken);
|
sw.Do(&m_branch_was_taken);
|
||||||
sw.Do(&m_load_delay_reg);
|
sw.Do(&m_load_delay_reg);
|
||||||
|
sw.Do(&m_load_delay_value);
|
||||||
sw.Do(&m_load_delay_old_value);
|
sw.Do(&m_load_delay_old_value);
|
||||||
sw.Do(&m_next_load_delay_reg);
|
sw.Do(&m_next_load_delay_reg);
|
||||||
|
sw.Do(&m_next_load_delay_value);
|
||||||
sw.Do(&m_next_load_delay_old_value);
|
sw.Do(&m_next_load_delay_old_value);
|
||||||
sw.Do(&m_cache_control);
|
sw.Do(&m_cache_control);
|
||||||
sw.DoBytes(m_dcache.data(), m_dcache.size());
|
sw.DoBytes(m_dcache.data(), m_dcache.size());
|
||||||
|
@ -354,18 +356,15 @@ void Core::DispatchInterrupt()
|
||||||
m_next_instruction.cop.cop_n);
|
m_next_instruction.cop.cop_n);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Core::FlushLoadDelay()
|
|
||||||
{
|
|
||||||
m_load_delay_reg = Reg::count;
|
|
||||||
m_load_delay_old_value = 0;
|
|
||||||
m_next_load_delay_reg = Reg::count;
|
|
||||||
m_next_load_delay_old_value = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
void Core::FlushPipeline()
|
void Core::FlushPipeline()
|
||||||
{
|
{
|
||||||
// loads are flushed
|
// loads are flushed
|
||||||
FlushLoadDelay();
|
m_next_load_delay_reg = Reg::count;
|
||||||
|
if (m_load_delay_reg != Reg::count)
|
||||||
|
{
|
||||||
|
m_regs.r[static_cast<u8>(m_load_delay_reg)] = m_load_delay_value;
|
||||||
|
m_load_delay_reg = Reg::count;
|
||||||
|
}
|
||||||
|
|
||||||
// not in a branch delay slot
|
// not in a branch delay slot
|
||||||
m_branch_was_taken = false;
|
m_branch_was_taken = false;
|
||||||
|
@ -383,13 +382,15 @@ void Core::FlushPipeline()
|
||||||
|
|
||||||
u32 Core::ReadReg(Reg rs)
|
u32 Core::ReadReg(Reg rs)
|
||||||
{
|
{
|
||||||
return rs == m_load_delay_reg ? m_load_delay_old_value : m_regs.r[static_cast<u8>(rs)];
|
return m_regs.r[static_cast<u8>(rs)];
|
||||||
}
|
}
|
||||||
|
|
||||||
void Core::WriteReg(Reg rd, u32 value)
|
void Core::WriteReg(Reg rd, u32 value)
|
||||||
{
|
{
|
||||||
if (rd != Reg::zero)
|
|
||||||
m_regs.r[static_cast<u8>(rd)] = value;
|
m_regs.r[static_cast<u8>(rd)] = value;
|
||||||
|
|
||||||
|
// prevent writes to $zero from going through - better than branching/cmov
|
||||||
|
m_regs.zero = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Core::WriteRegDelayed(Reg rd, u32 value)
|
void Core::WriteRegDelayed(Reg rd, u32 value)
|
||||||
|
@ -398,10 +399,14 @@ void Core::WriteRegDelayed(Reg rd, u32 value)
|
||||||
if (rd == Reg::zero)
|
if (rd == Reg::zero)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// save the old value, this will be returned if the register is read in the next instruction
|
// double load delays ignore the first value
|
||||||
|
if (m_load_delay_reg == rd)
|
||||||
|
m_load_delay_reg = Reg::count;
|
||||||
|
|
||||||
|
// save the old value, if something else overwrites this reg we want to preserve it
|
||||||
m_next_load_delay_reg = rd;
|
m_next_load_delay_reg = rd;
|
||||||
m_next_load_delay_old_value = ReadReg(rd);
|
m_next_load_delay_value = value;
|
||||||
m_regs.r[static_cast<u8>(rd)] = value;
|
m_next_load_delay_old_value = m_regs.r[static_cast<u8>(rd)];
|
||||||
}
|
}
|
||||||
|
|
||||||
std::optional<u32> Core::ReadCop0Reg(Cop0Reg reg)
|
std::optional<u32> Core::ReadCop0Reg(Cop0Reg reg)
|
||||||
|
@ -608,10 +613,7 @@ void Core::Execute()
|
||||||
ExecuteInstruction();
|
ExecuteInstruction();
|
||||||
|
|
||||||
// next load delay
|
// next load delay
|
||||||
m_load_delay_reg = m_next_load_delay_reg;
|
UpdateLoadDelay();
|
||||||
m_next_load_delay_reg = Reg::count;
|
|
||||||
m_load_delay_old_value = m_next_load_delay_old_value;
|
|
||||||
m_next_load_delay_old_value = 0;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1039,8 +1041,8 @@ void Core::ExecuteInstruction()
|
||||||
if (!ReadMemoryWord(aligned_addr, &aligned_value))
|
if (!ReadMemoryWord(aligned_addr, &aligned_value))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// note: bypasses load delay on the read
|
// Bypasses load delay. No need to check the old value since this is the delay slot or it's not relevant.
|
||||||
const u32 existing_value = m_regs.r[static_cast<u8>(inst.i.rt.GetValue())];
|
const u32 existing_value = (inst.i.rt == m_load_delay_reg) ? m_load_delay_value : ReadReg(inst.i.rt);
|
||||||
const u8 shift = (Truncate8(addr) & u8(3)) * u8(8);
|
const u8 shift = (Truncate8(addr) & u8(3)) * u8(8);
|
||||||
u32 new_value;
|
u32 new_value;
|
||||||
if (inst.op == InstructionOp::lwl)
|
if (inst.op == InstructionOp::lwl)
|
||||||
|
|
|
@ -14,11 +14,10 @@ namespace CPU {
|
||||||
|
|
||||||
class CodeCache;
|
class CodeCache;
|
||||||
|
|
||||||
namespace Recompiler
|
namespace Recompiler {
|
||||||
{
|
|
||||||
class CodeGenerator;
|
class CodeGenerator;
|
||||||
class Thunks;
|
class Thunks;
|
||||||
}
|
} // namespace Recompiler
|
||||||
|
|
||||||
class Core
|
class Core
|
||||||
{
|
{
|
||||||
|
@ -103,6 +102,19 @@ private:
|
||||||
void DisassembleAndLog(u32 addr);
|
void DisassembleAndLog(u32 addr);
|
||||||
void DisassembleAndPrint(u32 addr, u32 instructions_before, u32 instructions_after);
|
void DisassembleAndPrint(u32 addr, u32 instructions_before, u32 instructions_after);
|
||||||
|
|
||||||
|
// Updates load delays - call after each instruction
|
||||||
|
ALWAYS_INLINE void UpdateLoadDelay()
|
||||||
|
{
|
||||||
|
// the old value is needed in case the delay slot instruction overwrites the same register
|
||||||
|
if (m_load_delay_reg != Reg::count && m_regs.r[static_cast<u8>(m_load_delay_reg)] == m_load_delay_old_value)
|
||||||
|
m_regs.r[static_cast<u8>(m_load_delay_reg)] = m_load_delay_value;
|
||||||
|
|
||||||
|
m_load_delay_reg = m_next_load_delay_reg;
|
||||||
|
m_load_delay_value = m_next_load_delay_value;
|
||||||
|
m_load_delay_old_value = m_next_load_delay_old_value;
|
||||||
|
m_next_load_delay_reg = Reg::count;
|
||||||
|
}
|
||||||
|
|
||||||
// Fetches the instruction at m_regs.npc
|
// Fetches the instruction at m_regs.npc
|
||||||
bool FetchInstruction();
|
bool FetchInstruction();
|
||||||
void ExecuteInstruction();
|
void ExecuteInstruction();
|
||||||
|
@ -117,9 +129,6 @@ private:
|
||||||
bool HasPendingInterrupt();
|
bool HasPendingInterrupt();
|
||||||
void DispatchInterrupt();
|
void DispatchInterrupt();
|
||||||
|
|
||||||
// flushes any load delays if present
|
|
||||||
void FlushLoadDelay();
|
|
||||||
|
|
||||||
// clears pipeline of load/branch delays
|
// clears pipeline of load/branch delays
|
||||||
void FlushPipeline();
|
void FlushPipeline();
|
||||||
|
|
||||||
|
@ -158,8 +167,10 @@ private:
|
||||||
|
|
||||||
// load delays
|
// load delays
|
||||||
Reg m_load_delay_reg = Reg::count;
|
Reg m_load_delay_reg = Reg::count;
|
||||||
|
u32 m_load_delay_value = 0;
|
||||||
u32 m_load_delay_old_value = 0;
|
u32 m_load_delay_old_value = 0;
|
||||||
Reg m_next_load_delay_reg = Reg::count;
|
Reg m_next_load_delay_reg = Reg::count;
|
||||||
|
u32 m_next_load_delay_value = 0;
|
||||||
u32 m_next_load_delay_old_value = 0;
|
u32 m_next_load_delay_old_value = 0;
|
||||||
|
|
||||||
u32 m_cache_control = 0;
|
u32 m_cache_control = 0;
|
||||||
|
|
|
@ -28,11 +28,6 @@ bool CodeGenerator::CompileBlock(const CodeBlock* block, CodeBlock::HostCodePoin
|
||||||
m_block_start = block->instructions.data();
|
m_block_start = block->instructions.data();
|
||||||
m_block_end = block->instructions.data() + block->instructions.size();
|
m_block_end = block->instructions.data() + block->instructions.size();
|
||||||
|
|
||||||
m_current_instruction_in_branch_delay_slot_dirty = true;
|
|
||||||
m_branch_was_taken_dirty = true;
|
|
||||||
m_current_instruction_was_branch_taken_dirty = false;
|
|
||||||
m_load_delay_dirty = true;
|
|
||||||
|
|
||||||
EmitBeginBlock();
|
EmitBeginBlock();
|
||||||
BlockPrologue();
|
BlockPrologue();
|
||||||
|
|
||||||
|
@ -340,7 +335,11 @@ void CodeGenerator::BlockPrologue()
|
||||||
{
|
{
|
||||||
EmitStoreCPUStructField(offsetof(Core, m_exception_raised), Value::FromConstantU8(0));
|
EmitStoreCPUStructField(offsetof(Core, m_exception_raised), Value::FromConstantU8(0));
|
||||||
|
|
||||||
// fetching of the first instruction...
|
// we don't know the state of the last block, so assume load delays might be in progress
|
||||||
|
m_current_instruction_in_branch_delay_slot_dirty = true;
|
||||||
|
m_branch_was_taken_dirty = true;
|
||||||
|
m_current_instruction_was_branch_taken_dirty = false;
|
||||||
|
m_load_delay_dirty = true;
|
||||||
|
|
||||||
// sync m_current_instruction_pc so we can simply add to it
|
// sync m_current_instruction_pc so we can simply add to it
|
||||||
SyncCurrentInstructionPC();
|
SyncCurrentInstructionPC();
|
||||||
|
@ -435,7 +434,7 @@ void CodeGenerator::InstructionPrologue(const CodeBlockInstruction& cbi, TickCou
|
||||||
m_delayed_pc_add = 0;
|
m_delayed_pc_add = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!cbi.is_branch)
|
if (!cbi.is_branch_instruction)
|
||||||
m_delayed_pc_add = INSTRUCTION_SIZE;
|
m_delayed_pc_add = INSTRUCTION_SIZE;
|
||||||
|
|
||||||
m_delayed_cycles_add += cycles;
|
m_delayed_cycles_add += cycles;
|
||||||
|
@ -445,38 +444,18 @@ void CodeGenerator::InstructionPrologue(const CodeBlockInstruction& cbi, TickCou
|
||||||
void CodeGenerator::InstructionEpilogue(const CodeBlockInstruction& cbi)
|
void CodeGenerator::InstructionEpilogue(const CodeBlockInstruction& cbi)
|
||||||
{
|
{
|
||||||
// copy if the previous instruction was a load, reset the current value on the next instruction
|
// copy if the previous instruction was a load, reset the current value on the next instruction
|
||||||
if (m_load_delay_dirty)
|
if (m_next_load_delay_dirty)
|
||||||
{
|
{
|
||||||
// cpu->m_load_delay_reg = cpu->m_next_load_delay_reg;
|
Log_DebugPrint("Emitting delay slot flush (with move next)");
|
||||||
// cpu->m_next_load_delay_reg = Reg::count;
|
EmitDelaySlotUpdate(false, false, true);
|
||||||
{
|
|
||||||
Value temp = m_register_cache.AllocateScratch(RegSize_8);
|
|
||||||
EmitLoadCPUStructField(temp.host_reg, RegSize_8, offsetof(Core, m_next_load_delay_reg));
|
|
||||||
EmitStoreCPUStructField(offsetof(Core, m_next_load_delay_reg),
|
|
||||||
Value::FromConstantU8(static_cast<u8>(Reg::count)));
|
|
||||||
EmitStoreCPUStructField(offsetof(Core, m_load_delay_reg), temp);
|
|
||||||
}
|
|
||||||
|
|
||||||
// cpu->m_load_delay_old_value = cpu->m_next_load_delay_old_value;
|
|
||||||
// cpu->m_next_load_delay_old_value = 0;
|
|
||||||
{
|
|
||||||
Value temp = m_register_cache.AllocateScratch(RegSize_32);
|
|
||||||
EmitLoadCPUStructField(temp.host_reg, RegSize_32, offsetof(Core, m_next_load_delay_old_value));
|
|
||||||
EmitStoreCPUStructField(offsetof(Core, m_next_load_delay_old_value), Value::FromConstantU32(0));
|
|
||||||
EmitStoreCPUStructField(offsetof(Core, m_load_delay_old_value), temp);
|
|
||||||
}
|
|
||||||
|
|
||||||
m_load_delay_dirty = false;
|
|
||||||
m_next_load_delay_dirty = true;
|
|
||||||
}
|
|
||||||
else if (m_next_load_delay_dirty)
|
|
||||||
{
|
|
||||||
// cpu->m_load_delay_reg = Reg::count;
|
|
||||||
// cpu->m_load_delay_old_value = 0;
|
|
||||||
EmitStoreCPUStructField(offsetof(Core, m_load_delay_reg), Value::FromConstantU8(static_cast<u8>(Reg::count)));
|
|
||||||
EmitStoreCPUStructField(offsetof(Core, m_load_delay_old_value), Value::FromConstantU32(0));
|
|
||||||
|
|
||||||
m_next_load_delay_dirty = false;
|
m_next_load_delay_dirty = false;
|
||||||
|
m_load_delay_dirty = true;
|
||||||
|
}
|
||||||
|
else if (m_load_delay_dirty)
|
||||||
|
{
|
||||||
|
Log_DebugPrint("Emitting delay slot flush");
|
||||||
|
EmitDelaySlotUpdate(true, false, false);
|
||||||
|
m_load_delay_dirty = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -528,9 +507,9 @@ bool CodeGenerator::Compile_Fallback(const CodeBlockInstruction& cbi)
|
||||||
EmitFunctionCall(nullptr, &Thunks::InterpretInstruction, m_register_cache.GetCPUPtr());
|
EmitFunctionCall(nullptr, &Thunks::InterpretInstruction, m_register_cache.GetCPUPtr());
|
||||||
}
|
}
|
||||||
|
|
||||||
m_current_instruction_in_branch_delay_slot_dirty = cbi.is_branch;
|
m_current_instruction_in_branch_delay_slot_dirty = cbi.is_branch_instruction;
|
||||||
m_branch_was_taken_dirty = cbi.is_branch;
|
m_branch_was_taken_dirty = cbi.is_branch_instruction;
|
||||||
m_load_delay_dirty = true;
|
m_next_load_delay_dirty = cbi.has_load_delay;
|
||||||
InstructionEpilogue(cbi);
|
InstructionEpilogue(cbi);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -147,6 +147,7 @@ private:
|
||||||
void SyncCurrentInstructionPC();
|
void SyncCurrentInstructionPC();
|
||||||
void SyncPC();
|
void SyncPC();
|
||||||
void AddPendingCycles();
|
void AddPendingCycles();
|
||||||
|
void EmitDelaySlotUpdate(bool skip_check_for_delay, bool skip_check_old_value, bool move_next);
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
// Instruction Code Generators
|
// Instruction Code Generators
|
||||||
|
@ -172,14 +173,12 @@ private:
|
||||||
|
|
||||||
std::array<Value, 3> m_operand_memory_addresses{};
|
std::array<Value, 3> m_operand_memory_addresses{};
|
||||||
|
|
||||||
Xbyak::Label m_block_exit_label;
|
|
||||||
|
|
||||||
// whether various flags need to be reset.
|
// whether various flags need to be reset.
|
||||||
bool m_current_instruction_in_branch_delay_slot_dirty = false;
|
bool m_current_instruction_in_branch_delay_slot_dirty = false;
|
||||||
bool m_branch_was_taken_dirty = false;
|
bool m_branch_was_taken_dirty = false;
|
||||||
bool m_current_instruction_was_branch_taken_dirty = false;
|
bool m_current_instruction_was_branch_taken_dirty = false;
|
||||||
bool m_next_load_delay_dirty = false;
|
|
||||||
bool m_load_delay_dirty = false;
|
bool m_load_delay_dirty = false;
|
||||||
|
bool m_next_load_delay_dirty = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace CPU_X86::Recompiler
|
} // namespace CPU_X86::Recompiler
|
||||||
|
|
|
@ -151,9 +151,8 @@ void CodeGenerator::EmitBeginBlock()
|
||||||
|
|
||||||
void CodeGenerator::EmitEndBlock()
|
void CodeGenerator::EmitEndBlock()
|
||||||
{
|
{
|
||||||
m_emit.L(m_block_exit_label);
|
|
||||||
m_register_cache.FreeHostReg(RCPUPTR);
|
m_register_cache.FreeHostReg(RCPUPTR);
|
||||||
m_register_cache.PopCalleeSavedRegisters();
|
m_register_cache.PopCalleeSavedRegisters(true);
|
||||||
|
|
||||||
m_emit.ret();
|
m_emit.ret();
|
||||||
}
|
}
|
||||||
|
@ -166,9 +165,10 @@ void CodeGenerator::EmitBlockExitOnBool(const Value& value)
|
||||||
m_emit.test(GetHostReg8(value), GetHostReg8(value));
|
m_emit.test(GetHostReg8(value), GetHostReg8(value));
|
||||||
m_emit.jz(continue_label);
|
m_emit.jz(continue_label);
|
||||||
|
|
||||||
// flush current state
|
// flush current state and return
|
||||||
m_register_cache.FlushAllGuestRegisters(false, false);
|
m_register_cache.FlushAllGuestRegisters(false, false);
|
||||||
m_emit.jmp(m_block_exit_label, Xbyak::CodeGenerator::T_NEAR);
|
m_register_cache.PopCalleeSavedRegisters(false);
|
||||||
|
m_emit.ret();
|
||||||
|
|
||||||
m_emit.L(continue_label);
|
m_emit.L(continue_label);
|
||||||
}
|
}
|
||||||
|
@ -1303,6 +1303,62 @@ void CodeGenerator::EmitAddCPUStructField(u32 offset, const Value& value)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void CodeGenerator::EmitDelaySlotUpdate(bool skip_check_for_delay, bool skip_check_old_value, bool move_next)
|
||||||
|
{
|
||||||
|
Value reg = m_register_cache.AllocateScratch(RegSize_8);
|
||||||
|
Value value = m_register_cache.AllocateScratch(RegSize_32);
|
||||||
|
|
||||||
|
Xbyak::Label skip_flush;
|
||||||
|
|
||||||
|
auto load_delay_reg = m_emit.byte[GetCPUPtrReg() + offsetof(Core, m_load_delay_reg)];
|
||||||
|
auto load_delay_old_value = m_emit.dword[GetCPUPtrReg() + offsetof(Core, m_load_delay_old_value)];
|
||||||
|
auto load_delay_value = m_emit.dword[GetCPUPtrReg() + offsetof(Core, m_load_delay_value)];
|
||||||
|
auto reg_ptr = m_emit.dword[GetCPUPtrReg() + offsetof(Core, m_regs.r[0]) + GetHostReg64(reg.host_reg) * 4];
|
||||||
|
|
||||||
|
// reg = load_delay_reg
|
||||||
|
m_emit.movzx(GetHostReg32(reg.host_reg), load_delay_reg);
|
||||||
|
if (!skip_check_old_value)
|
||||||
|
m_emit.mov(GetHostReg32(value), load_delay_old_value);
|
||||||
|
|
||||||
|
if (!skip_check_for_delay)
|
||||||
|
{
|
||||||
|
// if load_delay_reg == Reg::count goto skip_flush
|
||||||
|
m_emit.cmp(GetHostReg32(reg.host_reg), static_cast<u8>(Reg::count));
|
||||||
|
m_emit.je(skip_flush);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!skip_check_old_value)
|
||||||
|
{
|
||||||
|
// if r[reg] != load_delay_old_value goto skip_flush
|
||||||
|
m_emit.cmp(GetHostReg32(value), reg_ptr);
|
||||||
|
m_emit.jne(skip_flush);
|
||||||
|
}
|
||||||
|
|
||||||
|
// r[reg] = load_delay_value
|
||||||
|
m_emit.mov(GetHostReg32(value), load_delay_value);
|
||||||
|
m_emit.mov(reg_ptr, GetHostReg32(value));
|
||||||
|
|
||||||
|
// if !move_next load_delay_reg = Reg::count
|
||||||
|
if (!move_next)
|
||||||
|
m_emit.mov(load_delay_reg, static_cast<u8>(Reg::count));
|
||||||
|
|
||||||
|
m_emit.L(skip_flush);
|
||||||
|
|
||||||
|
if (move_next)
|
||||||
|
{
|
||||||
|
auto next_load_delay_reg = m_emit.byte[GetCPUPtrReg() + offsetof(Core, m_next_load_delay_reg)];
|
||||||
|
auto next_load_delay_old_value = m_emit.dword[GetCPUPtrReg() + offsetof(Core, m_next_load_delay_old_value)];
|
||||||
|
auto next_load_delay_value = m_emit.dword[GetCPUPtrReg() + offsetof(Core, m_next_load_delay_value)];
|
||||||
|
m_emit.mov(GetHostReg32(value), next_load_delay_value);
|
||||||
|
m_emit.mov(GetHostReg8(reg), next_load_delay_reg);
|
||||||
|
m_emit.mov(load_delay_value, GetHostReg32(value));
|
||||||
|
m_emit.mov(GetHostReg32(value), next_load_delay_old_value);
|
||||||
|
m_emit.mov(load_delay_reg, GetHostReg8(reg));
|
||||||
|
m_emit.mov(load_delay_old_value, GetHostReg32(value));
|
||||||
|
m_emit.mov(next_load_delay_reg, static_cast<u8>(Reg::count));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
class ThunkGenerator
|
class ThunkGenerator
|
||||||
{
|
{
|
||||||
|
|
|
@ -307,7 +307,7 @@ u32 RegisterCache::PopCallerSavedRegisters() const
|
||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 RegisterCache::PopCalleeSavedRegisters()
|
u32 RegisterCache::PopCalleeSavedRegisters(bool commit)
|
||||||
{
|
{
|
||||||
if (m_host_register_callee_saved_order_count == 0)
|
if (m_host_register_callee_saved_order_count == 0)
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -321,6 +321,7 @@ u32 RegisterCache::PopCalleeSavedRegisters()
|
||||||
(HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated));
|
(HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated));
|
||||||
|
|
||||||
m_code_generator.EmitPopHostReg(reg);
|
m_code_generator.EmitPopHostReg(reg);
|
||||||
|
if (commit)
|
||||||
m_host_register_state[reg] &= ~HostRegState::CalleeSavedAllocated;
|
m_host_register_state[reg] &= ~HostRegState::CalleeSavedAllocated;
|
||||||
count++;
|
count++;
|
||||||
i--;
|
i--;
|
||||||
|
|
|
@ -176,7 +176,7 @@ public:
|
||||||
u32 PopCallerSavedRegisters() const;
|
u32 PopCallerSavedRegisters() const;
|
||||||
|
|
||||||
/// Restore callee-saved registers. Call at the end of the function.
|
/// Restore callee-saved registers. Call at the end of the function.
|
||||||
u32 PopCalleeSavedRegisters();
|
u32 PopCalleeSavedRegisters(bool commit);
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
// Scratch Register Allocation
|
// Scratch Register Allocation
|
||||||
|
|
|
@ -40,4 +40,9 @@ bool Thunks::InterpretInstruction(Core* cpu)
|
||||||
return cpu->m_exception_raised;
|
return cpu->m_exception_raised;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Thunks::UpdateLoadDelay(Core* cpu)
|
||||||
|
{
|
||||||
|
cpu->UpdateLoadDelay();
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace CPU::Recompiler
|
} // namespace CPU::Recompiler
|
|
@ -20,6 +20,7 @@ public:
|
||||||
static bool WriteMemoryHalfWord(Core* cpu, u32 address, u16 value);
|
static bool WriteMemoryHalfWord(Core* cpu, u32 address, u16 value);
|
||||||
static bool WriteMemoryWord(Core* cpu, u32 address, u32 value);
|
static bool WriteMemoryWord(Core* cpu, u32 address, u32 value);
|
||||||
static bool InterpretInstruction(Core* cpu);
|
static bool InterpretInstruction(Core* cpu);
|
||||||
|
static void UpdateLoadDelay(Core* cpu);
|
||||||
};
|
};
|
||||||
|
|
||||||
class ASMFunctions
|
class ASMFunctions
|
||||||
|
|
|
@ -44,6 +44,76 @@ bool IsBranchInstruction(const Instruction& instruction)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool IsMemoryLoadInstruction(const Instruction& instruction)
|
||||||
|
{
|
||||||
|
switch (instruction.op)
|
||||||
|
{
|
||||||
|
case InstructionOp::lb:
|
||||||
|
case InstructionOp::lh:
|
||||||
|
case InstructionOp::lw:
|
||||||
|
case InstructionOp::lbu:
|
||||||
|
case InstructionOp::lhu:
|
||||||
|
case InstructionOp::lwl:
|
||||||
|
case InstructionOp::lwr:
|
||||||
|
return true;
|
||||||
|
|
||||||
|
case InstructionOp::lwc2:
|
||||||
|
return true;
|
||||||
|
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IsMemoryStoreInstruction(const Instruction& instruction)
|
||||||
|
{
|
||||||
|
switch (instruction.op)
|
||||||
|
{
|
||||||
|
case InstructionOp::sb:
|
||||||
|
case InstructionOp::sh:
|
||||||
|
case InstructionOp::sw:
|
||||||
|
case InstructionOp::swl:
|
||||||
|
case InstructionOp::swr:
|
||||||
|
return true;
|
||||||
|
|
||||||
|
case InstructionOp::swc2:
|
||||||
|
return true;
|
||||||
|
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool InstructionHasLoadDelay(const Instruction& instruction)
|
||||||
|
{
|
||||||
|
switch (instruction.op)
|
||||||
|
{
|
||||||
|
case InstructionOp::lb:
|
||||||
|
case InstructionOp::lh:
|
||||||
|
case InstructionOp::lw:
|
||||||
|
case InstructionOp::lbu:
|
||||||
|
case InstructionOp::lhu:
|
||||||
|
case InstructionOp::lwl:
|
||||||
|
case InstructionOp::lwr:
|
||||||
|
return true;
|
||||||
|
|
||||||
|
case InstructionOp::cop0:
|
||||||
|
case InstructionOp::cop2:
|
||||||
|
{
|
||||||
|
if (instruction.cop.IsCommonInstruction())
|
||||||
|
{
|
||||||
|
const CopCommonInstruction common_op = instruction.cop.CommonOp();
|
||||||
|
return (common_op == CopCommonInstruction::cfcn || common_op == CopCommonInstruction::mfcn);
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool IsExitBlockInstruction(const Instruction& instruction)
|
bool IsExitBlockInstruction(const Instruction& instruction)
|
||||||
{
|
{
|
||||||
switch (instruction.op)
|
switch (instruction.op)
|
||||||
|
@ -167,26 +237,6 @@ bool CanInstructionTrap(const Instruction& instruction, bool in_user_mode)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool IsLoadDelayingInstruction(const Instruction& instruction)
|
|
||||||
{
|
|
||||||
switch (instruction.op)
|
|
||||||
{
|
|
||||||
case InstructionOp::lb:
|
|
||||||
case InstructionOp::lh:
|
|
||||||
case InstructionOp::lw:
|
|
||||||
case InstructionOp::lbu:
|
|
||||||
case InstructionOp::lhu:
|
|
||||||
return true;
|
|
||||||
|
|
||||||
case InstructionOp::lwl:
|
|
||||||
case InstructionOp::lwr:
|
|
||||||
return false;
|
|
||||||
|
|
||||||
default:
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool IsInvalidInstruction(const Instruction& instruction)
|
bool IsInvalidInstruction(const Instruction& instruction)
|
||||||
{
|
{
|
||||||
// TODO
|
// TODO
|
||||||
|
|
|
@ -208,9 +208,11 @@ union Instruction
|
||||||
|
|
||||||
// Instruction helpers.
|
// Instruction helpers.
|
||||||
bool IsBranchInstruction(const Instruction& instruction);
|
bool IsBranchInstruction(const Instruction& instruction);
|
||||||
|
bool IsMemoryLoadInstruction(const Instruction& instruction);
|
||||||
|
bool IsMemoryStoreInstruction(const Instruction& instruction);
|
||||||
|
bool InstructionHasLoadDelay(const Instruction& instruction);
|
||||||
bool IsExitBlockInstruction(const Instruction& instruction);
|
bool IsExitBlockInstruction(const Instruction& instruction);
|
||||||
bool CanInstructionTrap(const Instruction& instruction, bool in_user_mode);
|
bool CanInstructionTrap(const Instruction& instruction, bool in_user_mode);
|
||||||
bool IsLoadDelayingInstruction(const Instruction& instruction);
|
|
||||||
bool IsInvalidInstruction(const Instruction& instruction);
|
bool IsInvalidInstruction(const Instruction& instruction);
|
||||||
|
|
||||||
struct Registers
|
struct Registers
|
||||||
|
@ -396,10 +398,13 @@ struct CodeBlockInstruction
|
||||||
Instruction instruction;
|
Instruction instruction;
|
||||||
u32 pc;
|
u32 pc;
|
||||||
|
|
||||||
bool is_branch : 1;
|
bool is_branch_instruction : 1;
|
||||||
bool is_branch_delay_slot : 1;
|
bool is_branch_delay_slot : 1;
|
||||||
|
bool is_load_instruction : 1;
|
||||||
|
bool is_store_instruction : 1;
|
||||||
bool is_load_delay_slot : 1;
|
bool is_load_delay_slot : 1;
|
||||||
bool is_last_instruction : 1;
|
bool is_last_instruction : 1;
|
||||||
|
bool has_load_delay : 1;
|
||||||
bool can_trap : 1;
|
bool can_trap : 1;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -171,7 +171,7 @@ bool System::Boot(const char* filename)
|
||||||
void System::InitializeComponents()
|
void System::InitializeComponents()
|
||||||
{
|
{
|
||||||
m_cpu->Initialize(m_bus.get());
|
m_cpu->Initialize(m_bus.get());
|
||||||
m_cpu_code_cache->Initialize(m_cpu.get(), m_bus.get());
|
m_cpu_code_cache->Initialize(this, m_cpu.get(), m_bus.get());
|
||||||
m_bus->Initialize(m_cpu.get(), m_cpu_code_cache.get(), m_dma.get(), m_interrupt_controller.get(), m_gpu.get(),
|
m_bus->Initialize(m_cpu.get(), m_cpu_code_cache.get(), m_dma.get(), m_interrupt_controller.get(), m_gpu.get(),
|
||||||
m_cdrom.get(), m_pad.get(), m_timers.get(), m_spu.get(), m_mdec.get());
|
m_cdrom.get(), m_pad.get(), m_timers.get(), m_spu.get(), m_mdec.get());
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue