CPU: Implement instruction cache simulation

Implemented for all execution modes. Disabled by default in the cached
interpreter and recompiler, always enabled in the pure interpreter.
This commit is contained in:
Connor McLaughlin 2020-08-29 22:07:33 +10:00
parent efc00a2d0e
commit 19d6037b99
19 changed files with 449 additions and 75 deletions

View file

@ -742,10 +742,153 @@ ALWAYS_INLINE static TickCount DoDMAAccess(u32 offset, u32& value)
namespace CPU {
template<bool add_ticks, bool icache_read = false, u32 word_count = 1>
ALWAYS_INLINE_RELEASE void DoInstructionRead(PhysicalMemoryAddress address, void* data)
{
using namespace Bus;
address &= PHYSICAL_MEMORY_ADDRESS_MASK;
if (address < RAM_MIRROR_END)
{
std::memcpy(data, &g_ram[address & RAM_MASK], sizeof(u32) * word_count);
if constexpr (add_ticks)
g_state.pending_ticks += (icache_read ? 1 : 4) * word_count;
}
else if (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_SIZE))
{
std::memcpy(data, &g_bios[(address - BIOS_BASE) & BIOS_MASK], sizeof(u32));
if constexpr (add_ticks)
g_state.pending_ticks += m_bios_access_time[static_cast<u32>(MemoryAccessSize::Word)] * word_count;
}
else
{
CPU::RaiseException(address, Cop0Registers::CAUSE::MakeValueForException(Exception::IBE, false, false, 0));
std::memset(data, 0, sizeof(u32) * word_count);
}
}
TickCount GetInstructionReadTicks(VirtualMemoryAddress address)
{
using namespace Bus;
address &= PHYSICAL_MEMORY_ADDRESS_MASK;
if (address < RAM_MIRROR_END)
{
return 4;
}
else if (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_SIZE))
{
return m_bios_access_time[static_cast<u32>(MemoryAccessSize::Word)];
}
else
{
return 0;
}
}
TickCount GetICacheFillTicks(VirtualMemoryAddress address)
{
using namespace Bus;
address &= PHYSICAL_MEMORY_ADDRESS_MASK;
if (address < RAM_MIRROR_END)
{
return 1 * (ICACHE_LINE_SIZE / sizeof(u32));
}
else if (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_SIZE))
{
return m_bios_access_time[static_cast<u32>(MemoryAccessSize::Word)] * (ICACHE_LINE_SIZE / sizeof(u32));
}
else
{
return 0;
}
}
void CheckAndUpdateICacheTags(u32 line_count, TickCount uncached_ticks)
{
VirtualMemoryAddress current_pc = g_state.regs.pc & ICACHE_TAG_ADDRESS_MASK;
if (IsCachedAddress(current_pc))
{
TickCount ticks = 0;
TickCount cached_ticks_per_line = GetICacheFillTicks(current_pc);
for (u32 i = 0; i < line_count; i++, current_pc += ICACHE_LINE_SIZE)
{
const u32 line = GetICacheLine(current_pc);
if (g_state.icache_tags[line] != current_pc)
{
g_state.icache_tags[line] = current_pc;
ticks += cached_ticks_per_line;
}
}
g_state.pending_ticks += ticks;
}
else
{
g_state.pending_ticks += uncached_ticks;
}
}
u32 FillICache(VirtualMemoryAddress address)
{
const u32 line = GetICacheLine(address);
g_state.icache_tags[line] = GetICacheTagForAddress(address);
u8* line_data = &g_state.icache_data[line * ICACHE_LINE_SIZE];
DoInstructionRead<true, true, 4>(address & ~(ICACHE_LINE_SIZE - 1u), line_data);
const u32 offset = GetICacheLineOffset(address);
u32 result;
std::memcpy(&result, &line_data[offset], sizeof(result));
return result;
}
void ClearICache()
{
std::memset(g_state.icache_data.data(), 0, ICACHE_SIZE);
g_state.icache_tags.fill(ICACHE_INVALD_BIT | ICACHE_DISABLED_BIT);
}
ALWAYS_INLINE_RELEASE static u32 ReadICache(VirtualMemoryAddress address)
{
const u32 line = GetICacheLine(address);
const u8* line_data = &g_state.icache_data[line * ICACHE_LINE_SIZE];
const u32 offset = GetICacheLineOffset(address);
u32 result;
std::memcpy(&result, &line_data[offset], sizeof(result));
return result;
}
ALWAYS_INLINE_RELEASE static void WriteICache(VirtualMemoryAddress address, u32 value)
{
const u32 line = GetICacheLine(address);
const u32 offset = GetICacheLineOffset(address);
g_state.icache_tags[line] = GetICacheTagForAddress(address) | ICACHE_INVALD_BIT;
std::memcpy(&g_state.icache_data[line * ICACHE_LINE_SIZE + offset], &value, sizeof(value));
}
static void WriteCacheControl(u32 value)
{
Log_WarningPrintf("Cache control <- 0x%08X", value);
g_state.cache_control = value;
CacheControl changed_bits{g_state.cache_control.bits ^ value};
g_state.cache_control.bits = value;
if (changed_bits.icache_enable)
{
if (g_state.cache_control.icache_enable)
{
for (u32 i = 0; i < ICACHE_LINES; i++)
g_state.icache_tags[i] &= ~ICACHE_DISABLED_BIT;
}
else
{
for (u32 i = 0; i < ICACHE_LINES; i++)
g_state.icache_tags[i] |= ICACHE_DISABLED_BIT;
}
}
}
template<MemoryAccessType type, MemoryAccessSize size>
@ -797,8 +940,11 @@ static ALWAYS_INLINE TickCount DoMemoryAccess(VirtualMemoryAddress address, u32&
if constexpr (type == MemoryAccessType::Write)
{
if (g_state.cop0_regs.sr.Isc)
{
WriteICache(address, value);
return 0;
}
}
address &= PHYSICAL_MEMORY_ADDRESS_MASK;
if ((address & DCACHE_LOCATION_MASK) == DCACHE_LOCATION)
@ -829,7 +975,7 @@ static ALWAYS_INLINE TickCount DoMemoryAccess(VirtualMemoryAddress address, u32&
if (address == 0xFFFE0130)
{
if constexpr (type == MemoryAccessType::Read)
value = g_state.cache_control;
value = g_state.cache_control.bits;
else
WriteCacheControl(value);
@ -849,6 +995,10 @@ static ALWAYS_INLINE TickCount DoMemoryAccess(VirtualMemoryAddress address, u32&
{
return DoRAMAccess<type, size>(address, value);
}
else if (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_SIZE))
{
return DoBIOSAccess<type, size>(static_cast<u32>(address - BIOS_BASE), value);
}
else if (address < EXP1_BASE)
{
return DoInvalidAccess(type, size, address, value);
@ -921,14 +1071,6 @@ static ALWAYS_INLINE TickCount DoMemoryAccess(VirtualMemoryAddress address, u32&
{
return DoEXP2Access<type, size>(address & EXP2_MASK, value);
}
else if (address < BIOS_BASE)
{
return DoInvalidAccess(type, size, address, value);
}
else if (address < (BIOS_BASE + BIOS_SIZE))
{
return DoBIOSAccess<type, size>(static_cast<u32>(address - BIOS_BASE), value);
}
else
{
return DoInvalidAccess(type, size, address, value);
@ -961,19 +1103,76 @@ static bool DoAlignmentCheck(VirtualMemoryAddress address)
bool FetchInstruction()
{
DebugAssert(Common::IsAlignedPow2(g_state.regs.npc, 4));
if (DoMemoryAccess<MemoryAccessType::Read, MemoryAccessSize::Word>(g_state.regs.npc, g_state.next_instruction.bits) <
0)
using namespace Bus;
PhysicalMemoryAddress address = g_state.regs.npc;
switch (address >> 29)
{
// Bus errors don't set BadVaddr.
RaiseException(g_state.regs.npc, Cop0Registers::CAUSE::MakeValueForException(Exception::IBE, false, false, 0));
case 0x00: // KUSEG 0M-512M
case 0x04: // KSEG0 - physical memory cached
{
#if 0
// TODO: icache
TickCount cycles;
DoInstructionRead(address, cycles, g_state.next_instruction.bits);
#else
if (CompareICacheTag(address))
g_state.next_instruction.bits = ReadICache(address);
else
g_state.next_instruction.bits = FillICache(address);
#endif
}
break;
case 0x05: // KSEG1 - physical memory uncached
{
DoInstructionRead<true, false, 1>(address, &g_state.next_instruction.bits);
}
break;
case 0x01: // KUSEG 512M-1024M
case 0x02: // KUSEG 1024M-1536M
case 0x03: // KUSEG 1536M-2048M
case 0x06: // KSEG2
case 0x07: // KSEG2
default:
{
CPU::RaiseException(address, Cop0Registers::CAUSE::MakeValueForException(Exception::IBE, false, false, 0));
return false;
}
}
g_state.regs.pc = g_state.regs.npc;
g_state.regs.npc += sizeof(g_state.next_instruction.bits);
return true;
}
bool SafeReadInstruction(VirtualMemoryAddress addr, u32* value)
{
switch (addr >> 29)
{
case 0x00: // KUSEG 0M-512M
case 0x04: // KSEG0 - physical memory cached
case 0x05: // KSEG1 - physical memory uncached
{
DoInstructionRead<false, false, 1>(addr, value);
return true;
}
case 0x01: // KUSEG 512M-1024M
case 0x02: // KUSEG 1024M-1536M
case 0x03: // KUSEG 1536M-2048M
case 0x06: // KSEG2
case 0x07: // KSEG2
default:
{
return false;
}
}
}
bool ReadMemoryByte(VirtualMemoryAddress addr, u8* value)
{
u32 temp = 0;

View file

@ -78,41 +78,6 @@ extern std::bitset<CPU_CODE_CACHE_PAGE_COUNT> m_ram_code_bits;
extern u8 g_ram[RAM_SIZE]; // 2MB RAM
extern u8 g_bios[BIOS_SIZE]; // 512K BIOS ROM
/// Returns the address which should be used for code caching (i.e. removes mirrors).
ALWAYS_INLINE PhysicalMemoryAddress UnmirrorAddress(PhysicalMemoryAddress address)
{
// RAM
if (address < 0x800000)
return address & UINT32_C(0x1FFFFF);
else
return address;
}
/// Returns true if the address specified is cacheable (RAM or BIOS).
ALWAYS_INLINE bool IsCacheableAddress(PhysicalMemoryAddress address)
{
return (address < RAM_MIRROR_END) || (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_SIZE));
}
/// Reads a cachable address (RAM or BIOS).
ALWAYS_INLINE u32 ReadCacheableAddress(PhysicalMemoryAddress address)
{
u32 value;
if (address < RAM_MIRROR_END)
{
std::memcpy(&value, &g_ram[address & RAM_MASK], sizeof(value));
return value;
}
else
{
std::memcpy(&value, &g_bios[address & BIOS_MASK], sizeof(value));
return value;
}
}
/// Returns true if the address specified is writable (RAM).
ALWAYS_INLINE bool IsRAMAddress(PhysicalMemoryAddress address) { return address < RAM_MIRROR_END; }
/// Flags a RAM region as code, so we know when to invalidate blocks.
ALWAYS_INLINE void SetRAMCodePage(u32 index) { m_ram_code_bits[index] = true; }

View file

@ -139,8 +139,7 @@ static void ExecuteImpl()
{
if (HasPendingInterrupt())
{
// TODO: Fill in m_next_instruction...
SafeReadMemoryWord(g_state.regs.pc, &g_state.next_instruction.bits);
SafeReadInstruction(g_state.regs.pc, &g_state.next_instruction.bits);
DispatchInterrupt();
next_block_key = GetNextBlockKey();
}
@ -165,6 +164,9 @@ static void ExecuteImpl()
LogCurrentState();
#endif
if (g_settings.cpu_recompiler_icache)
CheckAndUpdateICacheTags(block->icache_line_count, block->uncached_fetch_ticks);
InterpretCachedBlock<pgxp_mode>(*block);
if (g_state.pending_ticks >= g_state.downcount)
@ -247,7 +249,7 @@ void ExecuteRecompiler()
{
if (HasPendingInterrupt())
{
SafeReadMemoryWord(g_state.regs.pc, &g_state.next_instruction.bits);
SafeReadInstruction(g_state.regs.pc, &g_state.next_instruction.bits);
DispatchInterrupt();
}
@ -351,7 +353,8 @@ bool RevalidateBlock(CodeBlock* block)
{
for (const CodeBlockInstruction& cbi : block->instructions)
{
u32 new_code = Bus::ReadCacheableAddress(cbi.pc & PHYSICAL_MEMORY_ADDRESS_MASK);
u32 new_code = 0;
SafeReadInstruction(cbi.pc, &new_code);
if (cbi.instruction.bits != new_code)
{
Log_DebugPrintf("Block 0x%08X changed at PC 0x%08X - %08X to %08X - recompiling.", block->GetPC(), cbi.pc,
@ -395,16 +398,12 @@ bool CompileBlock(CodeBlock* block)
__debugbreak();
#endif
u32 last_cache_line = ICACHE_LINES;
for (;;)
{
CodeBlockInstruction cbi = {};
const PhysicalMemoryAddress phys_addr = pc & PHYSICAL_MEMORY_ADDRESS_MASK;
if (!Bus::IsCacheableAddress(phys_addr))
break;
cbi.instruction.bits = Bus::ReadCacheableAddress(phys_addr);
if (!IsInvalidInstruction(cbi.instruction))
if (!SafeReadInstruction(pc, &cbi.instruction.bits) || !IsInvalidInstruction(cbi.instruction))
break;
cbi.pc = pc;
@ -416,6 +415,18 @@ bool CompileBlock(CodeBlock* block)
cbi.has_load_delay = InstructionHasLoadDelay(cbi.instruction);
cbi.can_trap = CanInstructionTrap(cbi.instruction, InUserMode());
if (g_settings.cpu_recompiler_icache)
{
const u32 icache_line = GetICacheLine(pc);
if (icache_line != last_cache_line)
{
block->icache_line_count++;
block->icache_line_count = GetICacheFillTicks(pc);
last_cache_line = icache_line;
}
block->uncached_fetch_ticks += GetInstructionReadTicks(pc);
}
// instruction is decoded now
block->instructions.push_back(cbi);
pc += sizeof(cbi.instruction.bits);

View file

@ -61,6 +61,8 @@ struct CodeBlock
std::vector<CodeBlock*> link_predecessors;
std::vector<CodeBlock*> link_successors;
TickCount uncached_fetch_ticks = 0;
u32 icache_line_count = 0;
bool invalidated = false;
const u32 GetPC() const { return key.GetPC(); }

View file

@ -80,6 +80,8 @@ void Reset()
g_state.cop0_regs.sr.bits = 0;
g_state.cop0_regs.cause.bits = 0;
ClearICache();
GTE::Reset();
SetPC(RESET_VECTOR);
@ -117,14 +119,17 @@ bool DoState(StateWrapper& sw)
sw.Do(&g_state.load_delay_value);
sw.Do(&g_state.next_load_delay_reg);
sw.Do(&g_state.next_load_delay_value);
sw.Do(&g_state.cache_control);
sw.Do(&g_state.cache_control.bits);
sw.DoBytes(g_state.dcache.data(), g_state.dcache.size());
if (!GTE::DoState(sw))
return false;
if (sw.IsReading())
{
ClearICache();
PGXP::Initialize();
}
return !sw.HasError();
}
@ -1416,7 +1421,6 @@ void InterpretCachedBlock(const CodeBlock& block)
{
// set up the state so we've already fetched the instruction
DebugAssert(g_state.regs.pc == block.GetPC());
g_state.regs.npc = block.GetPC() + 4;
for (const CodeBlockInstruction& cbi : block.instructions)

View file

@ -19,7 +19,32 @@ enum : PhysicalMemoryAddress
DCACHE_LOCATION = UINT32_C(0x1F800000),
DCACHE_LOCATION_MASK = UINT32_C(0xFFFFFC00),
DCACHE_OFFSET_MASK = UINT32_C(0x000003FF),
DCACHE_SIZE = UINT32_C(0x00000400)
DCACHE_SIZE = UINT32_C(0x00000400),
ICACHE_SIZE = UINT32_C(0x00001000),
ICACHE_SLOTS = ICACHE_SIZE / sizeof(u32),
ICACHE_LINE_SIZE = 16,
ICACHE_LINES = ICACHE_SIZE / ICACHE_LINE_SIZE,
ICACHE_SLOTS_PER_LINE = ICACHE_SLOTS / ICACHE_LINES,
ICACHE_TAG_ADDRESS_MASK = 0xFFFFFFF0u
};
enum : u32
{
ICACHE_DISABLED_BIT = 0x01,
ICACHE_INVALD_BIT = 0x02,
};
union CacheControl
{
u32 bits;
BitField<u32, bool, 0, 1> lock_mode;
BitField<u32, bool, 1, 1> invalidate_mode;
BitField<u32, bool, 2, 1> tag_test_mode;
BitField<u32, bool, 3, 1> dcache_scratchpad;
BitField<u32, bool, 7, 1> dcache_enable;
BitField<u32, u8, 8, 2> icache_fill_size; // actually dcache? icache always fills to 16 bytes
BitField<u32, bool, 11, 1> icache_enable;
};
struct State
@ -49,13 +74,15 @@ struct State
Reg next_load_delay_reg = Reg::count;
u32 next_load_delay_value = 0;
u32 cache_control = 0;
CacheControl cache_control{ 0 };
// GTE registers are stored here so we can access them on ARM with a single instruction
GTE::Regs gte_regs = {};
// data cache (used as scratchpad)
std::array<u8, DCACHE_SIZE> dcache = {};
std::array<u32, ICACHE_LINES> icache_tags = {};
std::array<u8, ICACHE_SIZE> icache_data = {};
};
extern State g_state;
@ -64,6 +91,7 @@ void Initialize();
void Shutdown();
void Reset();
bool DoState(StateWrapper& sw);
void ClearICache();
/// Executes interpreter loop.
void Execute();

View file

@ -34,8 +34,38 @@ ALWAYS_INLINE static void DispatchInterrupt()
g_state.regs.pc);
}
// icache stuff
ALWAYS_INLINE bool IsCachedAddress(VirtualMemoryAddress address)
{
// KUSEG, KSEG0
return (address >> 29) <= 4;
}
ALWAYS_INLINE u32 GetICacheLine(VirtualMemoryAddress address)
{
return ((address >> 4) & 0xFFu);
}
ALWAYS_INLINE u32 GetICacheLineOffset(VirtualMemoryAddress address)
{
return (address & (ICACHE_LINE_SIZE - 1));
}
ALWAYS_INLINE u32 GetICacheTagForAddress(VirtualMemoryAddress address)
{
return (address & ICACHE_TAG_ADDRESS_MASK);
}
ALWAYS_INLINE bool CompareICacheTag(VirtualMemoryAddress address)
{
const u32 line = GetICacheLine(address);
return (g_state.icache_tags[line] == GetICacheTagForAddress(address));
}
TickCount GetInstructionReadTicks(VirtualMemoryAddress address);
TickCount GetICacheFillTicks(VirtualMemoryAddress address);
u32 FillICache(VirtualMemoryAddress address);
void CheckAndUpdateICacheTags(u32 line_count, TickCount uncached_ticks);
// defined in cpu_memory.cpp - memory access functions which return false if an exception was thrown.
bool FetchInstruction();
bool SafeReadInstruction(VirtualMemoryAddress addr, u32* value);
bool ReadMemoryByte(VirtualMemoryAddress addr, u8* value);
bool ReadMemoryHalfWord(VirtualMemoryAddress addr, u16* value);
bool ReadMemoryWord(VirtualMemoryAddress addr, u32* value);

View file

@ -34,7 +34,7 @@ bool CodeGenerator::CompileBlock(const CodeBlock* block, CodeBlock::HostCodePoin
const CodeBlockInstruction* cbi = m_block_start;
while (cbi != m_block_end)
{
#ifndef Y_BUILD_CONFIG_RELEASE
#ifdef _DEBUG
SmallString disasm;
DisassembleInstruction(&disasm, cbi->pc, cbi->instruction.bits, nullptr);
Log_DebugPrintf("Compiling instruction '%s'", disasm.GetCharArray());
@ -840,6 +840,9 @@ void CodeGenerator::BlockPrologue()
{
EmitStoreCPUStructField(offsetof(State, exception_raised), Value::FromConstantU8(0));
if (m_block->uncached_fetch_ticks > 0)
EmitICacheCheckAndUpdate();
// we don't know the state of the last block, so assume load delays might be in progress
// TODO: Pull load delay into register cache
m_current_instruction_in_branch_delay_slot_dirty = true;

View file

@ -61,6 +61,7 @@ public:
void EmitFlushInterpreterLoadDelay();
void EmitMoveNextInterpreterLoadDelay();
void EmitCancelInterpreterLoadDelayForReg(Reg reg);
void EmitICacheCheckAndUpdate();
void EmitLoadCPUStructField(HostReg host_reg, RegSize size, u32 offset);
void EmitStoreCPUStructField(u32 offset, const Value& value);
void EmitAddCPUStructField(u32 offset, const Value& value);

View file

@ -22,4 +22,48 @@ void CodeGenerator::EmitStoreInterpreterLoadDelay(Reg reg, const Value& value)
m_load_delay_dirty = true;
}
#ifndef CPU_X64
void CodeGenerator::EmitICacheCheckAndUpdate()
{
Value pc = CalculatePC();
Value temp = m_register_cache.AllocateScratch(RegSize_32);
m_register_cache.InhibitAllocation();
EmitShr(temp.GetHostRegister(), pc.GetHostRegister(), RegSize_32, Value::FromConstantU32(29));
LabelType is_cached;
LabelType ready_to_execute;
EmitConditionalBranch(Condition::LessEqual, false, temp.GetHostRegister(), Value::FromConstantU32(4), &is_cached);
EmitAddCPUStructField(offsetof(State, pending_ticks),
Value::FromConstantU32(static_cast<u32>(m_block->uncached_fetch_ticks)));
EmitBranch(&ready_to_execute);
EmitBindLabel(&is_cached);
// cached path
EmitAnd(pc.GetHostRegister(), pc.GetHostRegister(), Value::FromConstantU32(ICACHE_TAG_ADDRESS_MASK));
VirtualMemoryAddress current_address = (m_block->instructions[0].pc & ICACHE_TAG_ADDRESS_MASK);
for (u32 i = 0; i < m_block->icache_line_count; i++, current_address += ICACHE_LINE_SIZE)
{
const TickCount fill_ticks = GetICacheFillTicks(current_address);
if (fill_ticks <= 0)
continue;
const u32 line = GetICacheLine(current_address);
const u32 offset = offsetof(State, icache_tags) + (line * sizeof(u32));
LabelType cache_hit;
EmitLoadCPUStructField(temp.GetHostRegister(), RegSize_32, offset);
EmitConditionalBranch(Condition::Equal, false, temp.GetHostRegister(), pc, &cache_hit);
EmitAddCPUStructField(offsetof(State, pending_ticks), Value::FromConstantU32(static_cast<u32>(fill_ticks)));
EmitStoreCPUStructField(offset, pc);
EmitBindLabel(&cache_hit);
EmitAdd(pc.GetHostRegister(), pc.GetHostRegister(), Value::FromConstantU32(ICACHE_LINE_SIZE), false);
}
EmitBindLabel(&ready_to_execute);
m_register_cache.UnunhibitAllocation();
}
#endif
} // namespace CPU::Recompiler

View file

@ -2187,6 +2187,52 @@ void CodeGenerator::EmitCancelInterpreterLoadDelayForReg(Reg reg)
m_emit->L(skip_cancel);
}
void CodeGenerator::EmitICacheCheckAndUpdate()
{
Value pc = CalculatePC();
Value seg = m_register_cache.AllocateScratch(RegSize_32);
m_register_cache.InhibitAllocation();
m_emit->mov(GetHostReg32(seg), GetHostReg32(pc));
m_emit->shr(GetHostReg32(seg), 29);
Xbyak::Label is_cached;
m_emit->cmp(GetHostReg32(seg), 4);
m_emit->jle(is_cached);
// uncached
Xbyak::Label done;
m_emit->add(m_emit->dword[GetCPUPtrReg() + offsetof(State, pending_ticks)],
static_cast<u32>(m_block->uncached_fetch_ticks));
m_emit->jmp(done, Xbyak::CodeGenerator::T_NEAR);
// cached
m_emit->L(is_cached);
m_emit->and_(GetHostReg32(pc), ICACHE_TAG_ADDRESS_MASK);
VirtualMemoryAddress current_address = (m_block->instructions[0].pc & ICACHE_TAG_ADDRESS_MASK);
for (u32 i = 0; i < m_block->icache_line_count; i++, current_address += ICACHE_LINE_SIZE)
{
const TickCount fill_ticks = GetICacheFillTicks(current_address);
if (fill_ticks <= 0)
continue;
const u32 line = GetICacheLine(current_address);
const u32 offset = offsetof(State, icache_tags) + (line * sizeof(u32));
Xbyak::Label cache_hit;
m_emit->cmp(GetHostReg32(pc), m_emit->dword[GetCPUPtrReg() + offset]);
m_emit->je(cache_hit);
m_emit->mov(m_emit->dword[GetCPUPtrReg() + offset], GetHostReg32(pc));
m_emit->add(m_emit->dword[GetCPUPtrReg() + offsetof(State, pending_ticks)], static_cast<u32>(fill_ticks));
m_emit->L(cache_hit);
m_emit->add(GetHostReg32(pc), ICACHE_LINE_SIZE);
}
m_emit->L(done);
m_register_cache.UnunhibitAllocation();
}
void CodeGenerator::EmitBranch(const void* address, bool allow_scratch)
{
const s64 jump_distance =

View file

@ -14,6 +14,7 @@ namespace Recompiler::Thunks {
//////////////////////////////////////////////////////////////////////////
bool InterpretInstruction();
bool InterpretInstructionPGXP();
void CheckAndUpdateICache(u32 pc, u32 line_count);
// Memory access functions for the JIT - MSB is set on exception.
u64 ReadMemoryByte(u32 address);

View file

@ -362,6 +362,7 @@ void HostInterface::SetDefaultSettings(SettingsInterface& si)
si.SetStringValue("CPU", "ExecutionMode", Settings::GetCPUExecutionModeName(Settings::DEFAULT_CPU_EXECUTION_MODE));
si.SetBoolValue("CPU", "RecompilerMemoryExceptions", false);
si.SetBoolValue("CPU", "ICache", false);
si.SetStringValue("GPU", "Renderer", Settings::GetRendererName(Settings::DEFAULT_GPU_RENDERER));
si.SetIntValue("GPU", "ResolutionScale", 1);
@ -452,7 +453,8 @@ void HostInterface::FixIncompatibleSettings(bool display_osd_messages)
{
if (display_osd_messages)
{
AddOSDMessage(TranslateStdString("OSDMessage", "PGXP is incompatible with the software renderer, disabling PGXP."), 10.0f);
AddOSDMessage(
TranslateStdString("OSDMessage", "PGXP is incompatible with the software renderer, disabling PGXP."), 10.0f);
}
g_settings.gpu_pgxp_enable = false;
}
@ -510,6 +512,8 @@ void HostInterface::CheckForSettingsChanges(const Settings& old_settings)
AddFormattedOSDMessage(5.0f, "Switching to %s CPU execution mode.",
Settings::GetCPUExecutionModeName(g_settings.cpu_execution_mode));
CPU::CodeCache::SetUseRecompiler(g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler);
CPU::CodeCache::Flush();
CPU::ClearICache();
}
if (g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler &&
@ -520,6 +524,15 @@ void HostInterface::CheckForSettingsChanges(const Settings& old_settings)
CPU::CodeCache::Flush();
}
if (g_settings.cpu_execution_mode != CPUExecutionMode::Interpreter &&
g_settings.cpu_recompiler_icache != old_settings.cpu_recompiler_icache)
{
AddFormattedOSDMessage(5.0f, "CPU ICache %s, flushing all blocks.",
g_settings.cpu_recompiler_icache ? "enabled" : "disabled");
CPU::CodeCache::Flush();
CPU::ClearICache();
}
m_audio_stream->SetOutputVolume(g_settings.audio_output_muted ? 0 : g_settings.audio_output_volume);
if (g_settings.gpu_resolution_scale != old_settings.gpu_resolution_scale ||

View file

@ -92,6 +92,7 @@ void Settings::Load(SettingsInterface& si)
si.GetStringValue("CPU", "ExecutionMode", GetCPUExecutionModeName(DEFAULT_CPU_EXECUTION_MODE)).c_str())
.value_or(DEFAULT_CPU_EXECUTION_MODE);
cpu_recompiler_memory_exceptions = si.GetBoolValue("CPU", "RecompilerMemoryExceptions", false);
cpu_recompiler_icache = si.GetBoolValue("CPU", "RecompilerICache", false);
gpu_renderer = ParseRendererName(si.GetStringValue("GPU", "Renderer", GetRendererName(DEFAULT_GPU_RENDERER)).c_str())
.value_or(DEFAULT_GPU_RENDERER);
@ -206,6 +207,7 @@ void Settings::Save(SettingsInterface& si) const
si.SetStringValue("CPU", "ExecutionMode", GetCPUExecutionModeName(cpu_execution_mode));
si.SetBoolValue("CPU", "RecompilerMemoryExceptions", cpu_recompiler_memory_exceptions);
si.SetBoolValue("CPU", "RecompilerICache", cpu_recompiler_icache);
si.SetStringValue("GPU", "Renderer", GetRendererName(gpu_renderer));
si.SetStringValue("GPU", "Adapter", gpu_adapter.c_str());

View file

@ -69,6 +69,7 @@ struct Settings
CPUExecutionMode cpu_execution_mode = CPUExecutionMode::Interpreter;
bool cpu_recompiler_memory_exceptions = false;
bool cpu_recompiler_icache = false;
float emulation_speed = 1.0f;
bool speed_limiter_enabled = true;

View file

@ -370,7 +370,7 @@ void LibretroHostInterface::OnSystemDestroyed()
m_using_hardware_renderer = false;
}
static std::array<retro_core_option_definition, 31> s_option_definitions = {{
static std::array<retro_core_option_definition, 32> s_option_definitions = {{
{"duckstation_Console.Region",
"Console Region",
"Determines which region/hardware to emulate. Auto-Detect will use the region of the disc inserted.",
@ -406,6 +406,12 @@ static std::array<retro_core_option_definition, 31> s_option_definitions = {{
"Which mode to use for CPU emulation. Recompiler provides the best performance.",
{{"Interpreter", "Interpreter"}, {"CachedIntepreter", "Cached Interpreter"}, {"Recompiler", "Recompiler"}},
"Recompiler"},
{"duckstation_CPU.RecompilerICache",
"CPU Recompiler ICache",
"Determines whether the CPU's instruction cache is simulated in the recompiler. Improves accuracy at a small cost "
"to performance. If games are running too fast, try enabling this option.",
{{"true", "Enabled"}, {"false", "Disabled"}},
"false"},
{"duckstation_GPU.Renderer",
"GPU Renderer",
"Which renderer to use to emulate the GPU",

View file

@ -27,6 +27,8 @@ AdvancedSettingsWidget::AdvancedSettingsWidget(QtHostInterface* host_interface,
SettingWidgetBinder::BindWidgetToIntSetting(m_host_interface, m_ui.gpuMaxRunAhead, "Hacks", "GPUMaxRunAhead");
SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.cpuRecompilerMemoryExceptions, "CPU",
"RecompilerMemoryExceptions", false);
SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.cpuRecompilerICache, "CPU", "RecompilerICache",
false);
SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.showDebugMenu, "Main", "ShowDebugMenu");
SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.gpuUseDebugDevice, "GPU", "UseDebugDevice");
@ -38,6 +40,10 @@ AdvancedSettingsWidget::AdvancedSettingsWidget(QtHostInterface* host_interface,
dialog->registerWidgetHelp(m_ui.gpuUseDebugDevice, tr("Use Debug Host GPU Device"), tr("Unchecked"),
tr("Enables the usage of debug devices and shaders for rendering APIs which support them. "
"Should only be used when debugging the emulator."));
dialog->registerWidgetHelp(
m_ui.cpuRecompilerICache, tr("Enable Recompiler ICache"), tr("Unchecked"),
tr("Determines whether the CPU's instruction cache is simulated in the recompiler. Improves accuracy at a small "
"cost to performance. If games are running too fast, try enabling this option."));
}
AdvancedSettingsWidget::~AdvancedSettingsWidget() = default;

View file

@ -184,6 +184,20 @@
</property>
</widget>
</item>
<item row="5" column="0">
<widget class="QCheckBox" name="cpuRecompilerMemoryExceptions">
<property name="text">
<string>Enable Recompiler Memory Exceptions</string>
</property>
</widget>
</item>
<item row="5" column="1">
<widget class="QCheckBox" name="cpuRecompilerICache">
<property name="text">
<string>Enable Recompiler ICache</string>
</property>
</widget>
</item>
<item row="6" column="0" colspan="2">
<widget class="QPushButton" name="resetToDefaultButton">
<property name="text">
@ -191,13 +205,6 @@
</property>
</widget>
</item>
<item row="5" column="0" colspan="2">
<widget class="QCheckBox" name="cpuRecompilerMemoryExceptions">
<property name="text">
<string>Enable Recompiler Memory Exceptions</string>
</property>
</widget>
</item>
</layout>
</widget>
</item>

View file

@ -950,6 +950,11 @@ void SDLHostInterface::DrawDebugMenu()
settings_changed |= ImGui::MenuItem("Show Timers State", nullptr, &debug_settings.show_timers_state);
settings_changed |= ImGui::MenuItem("Show MDEC State", nullptr, &debug_settings.show_mdec_state);
ImGui::Separator();
settings_changed |= ImGui::MenuItem("Recompiler Memory Exceptions", nullptr, &m_settings_copy.cpu_recompiler_memory_exceptions);
settings_changed |= ImGui::MenuItem("Recompiler ICache", nullptr, &m_settings_copy.cpu_recompiler_icache);
if (settings_changed)
{
// have to apply it to the copy too, otherwise it won't save