diff --git a/src/core/bus.cpp b/src/core/bus.cpp index e6fa98296..7b8a70c0a 100644 --- a/src/core/bus.cpp +++ b/src/core/bus.cpp @@ -742,10 +742,153 @@ ALWAYS_INLINE static TickCount DoDMAAccess(u32 offset, u32& value) namespace CPU { +template +ALWAYS_INLINE_RELEASE void DoInstructionRead(PhysicalMemoryAddress address, void* data) +{ + using namespace Bus; + + address &= PHYSICAL_MEMORY_ADDRESS_MASK; + + if (address < RAM_MIRROR_END) + { + std::memcpy(data, &g_ram[address & RAM_MASK], sizeof(u32) * word_count); + if constexpr (add_ticks) + g_state.pending_ticks += (icache_read ? 1 : 4) * word_count; + } + else if (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_SIZE)) + { + std::memcpy(data, &g_bios[(address - BIOS_BASE) & BIOS_MASK], sizeof(u32)); + if constexpr (add_ticks) + g_state.pending_ticks += m_bios_access_time[static_cast(MemoryAccessSize::Word)] * word_count; + } + else + { + CPU::RaiseException(address, Cop0Registers::CAUSE::MakeValueForException(Exception::IBE, false, false, 0)); + std::memset(data, 0, sizeof(u32) * word_count); + } +} + +TickCount GetInstructionReadTicks(VirtualMemoryAddress address) +{ + using namespace Bus; + + address &= PHYSICAL_MEMORY_ADDRESS_MASK; + + if (address < RAM_MIRROR_END) + { + return 4; + } + else if (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_SIZE)) + { + return m_bios_access_time[static_cast(MemoryAccessSize::Word)]; + } + else + { + return 0; + } +} + +TickCount GetICacheFillTicks(VirtualMemoryAddress address) +{ + using namespace Bus; + + address &= PHYSICAL_MEMORY_ADDRESS_MASK; + + if (address < RAM_MIRROR_END) + { + return 1 * (ICACHE_LINE_SIZE / sizeof(u32)); + } + else if (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_SIZE)) + { + return m_bios_access_time[static_cast(MemoryAccessSize::Word)] * (ICACHE_LINE_SIZE / sizeof(u32)); + } + else + { + return 0; + } +} + +void CheckAndUpdateICacheTags(u32 line_count, TickCount uncached_ticks) +{ + VirtualMemoryAddress current_pc = g_state.regs.pc & ICACHE_TAG_ADDRESS_MASK; + if (IsCachedAddress(current_pc)) + { + TickCount ticks = 0; + TickCount cached_ticks_per_line = GetICacheFillTicks(current_pc); + for (u32 i = 0; i < line_count; i++, current_pc += ICACHE_LINE_SIZE) + { + const u32 line = GetICacheLine(current_pc); + if (g_state.icache_tags[line] != current_pc) + { + g_state.icache_tags[line] = current_pc; + ticks += cached_ticks_per_line; + } + } + + g_state.pending_ticks += ticks; + } + else + { + g_state.pending_ticks += uncached_ticks; + } +} + +u32 FillICache(VirtualMemoryAddress address) +{ + const u32 line = GetICacheLine(address); + g_state.icache_tags[line] = GetICacheTagForAddress(address); + u8* line_data = &g_state.icache_data[line * ICACHE_LINE_SIZE]; + DoInstructionRead(address & ~(ICACHE_LINE_SIZE - 1u), line_data); + + const u32 offset = GetICacheLineOffset(address); + u32 result; + std::memcpy(&result, &line_data[offset], sizeof(result)); + return result; +} + +void ClearICache() +{ + std::memset(g_state.icache_data.data(), 0, ICACHE_SIZE); + g_state.icache_tags.fill(ICACHE_INVALD_BIT | ICACHE_DISABLED_BIT); +} + +ALWAYS_INLINE_RELEASE static u32 ReadICache(VirtualMemoryAddress address) +{ + const u32 line = GetICacheLine(address); + const u8* line_data = &g_state.icache_data[line * ICACHE_LINE_SIZE]; + const u32 offset = GetICacheLineOffset(address); + u32 result; + std::memcpy(&result, &line_data[offset], sizeof(result)); + return result; +} + +ALWAYS_INLINE_RELEASE static void WriteICache(VirtualMemoryAddress address, u32 value) +{ + const u32 line = GetICacheLine(address); + const u32 offset = GetICacheLineOffset(address); + g_state.icache_tags[line] = GetICacheTagForAddress(address) | ICACHE_INVALD_BIT; + std::memcpy(&g_state.icache_data[line * ICACHE_LINE_SIZE + offset], &value, sizeof(value)); +} + static void WriteCacheControl(u32 value) { Log_WarningPrintf("Cache control <- 0x%08X", value); - g_state.cache_control = value; + + CacheControl changed_bits{g_state.cache_control.bits ^ value}; + g_state.cache_control.bits = value; + if (changed_bits.icache_enable) + { + if (g_state.cache_control.icache_enable) + { + for (u32 i = 0; i < ICACHE_LINES; i++) + g_state.icache_tags[i] &= ~ICACHE_DISABLED_BIT; + } + else + { + for (u32 i = 0; i < ICACHE_LINES; i++) + g_state.icache_tags[i] |= ICACHE_DISABLED_BIT; + } + } } template @@ -797,7 +940,10 @@ static ALWAYS_INLINE TickCount DoMemoryAccess(VirtualMemoryAddress address, u32& if constexpr (type == MemoryAccessType::Write) { if (g_state.cop0_regs.sr.Isc) + { + WriteICache(address, value); return 0; + } } address &= PHYSICAL_MEMORY_ADDRESS_MASK; @@ -829,7 +975,7 @@ static ALWAYS_INLINE TickCount DoMemoryAccess(VirtualMemoryAddress address, u32& if (address == 0xFFFE0130) { if constexpr (type == MemoryAccessType::Read) - value = g_state.cache_control; + value = g_state.cache_control.bits; else WriteCacheControl(value); @@ -849,6 +995,10 @@ static ALWAYS_INLINE TickCount DoMemoryAccess(VirtualMemoryAddress address, u32& { return DoRAMAccess(address, value); } + else if (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_SIZE)) + { + return DoBIOSAccess(static_cast(address - BIOS_BASE), value); + } else if (address < EXP1_BASE) { return DoInvalidAccess(type, size, address, value); @@ -921,14 +1071,6 @@ static ALWAYS_INLINE TickCount DoMemoryAccess(VirtualMemoryAddress address, u32& { return DoEXP2Access(address & EXP2_MASK, value); } - else if (address < BIOS_BASE) - { - return DoInvalidAccess(type, size, address, value); - } - else if (address < (BIOS_BASE + BIOS_SIZE)) - { - return DoBIOSAccess(static_cast(address - BIOS_BASE), value); - } else { return DoInvalidAccess(type, size, address, value); @@ -961,12 +1103,45 @@ static bool DoAlignmentCheck(VirtualMemoryAddress address) bool FetchInstruction() { DebugAssert(Common::IsAlignedPow2(g_state.regs.npc, 4)); - if (DoMemoryAccess(g_state.regs.npc, g_state.next_instruction.bits) < - 0) + + using namespace Bus; + + PhysicalMemoryAddress address = g_state.regs.npc; + switch (address >> 29) { - // Bus errors don't set BadVaddr. - RaiseException(g_state.regs.npc, Cop0Registers::CAUSE::MakeValueForException(Exception::IBE, false, false, 0)); - return false; + case 0x00: // KUSEG 0M-512M + case 0x04: // KSEG0 - physical memory cached + { +#if 0 + // TODO: icache + TickCount cycles; + DoInstructionRead(address, cycles, g_state.next_instruction.bits); +#else + if (CompareICacheTag(address)) + g_state.next_instruction.bits = ReadICache(address); + else + g_state.next_instruction.bits = FillICache(address); + +#endif + } + break; + + case 0x05: // KSEG1 - physical memory uncached + { + DoInstructionRead(address, &g_state.next_instruction.bits); + } + break; + + case 0x01: // KUSEG 512M-1024M + case 0x02: // KUSEG 1024M-1536M + case 0x03: // KUSEG 1536M-2048M + case 0x06: // KSEG2 + case 0x07: // KSEG2 + default: + { + CPU::RaiseException(address, Cop0Registers::CAUSE::MakeValueForException(Exception::IBE, false, false, 0)); + return false; + } } g_state.regs.pc = g_state.regs.npc; @@ -974,6 +1149,30 @@ bool FetchInstruction() return true; } +bool SafeReadInstruction(VirtualMemoryAddress addr, u32* value) +{ + switch (addr >> 29) + { + case 0x00: // KUSEG 0M-512M + case 0x04: // KSEG0 - physical memory cached + case 0x05: // KSEG1 - physical memory uncached + { + DoInstructionRead(addr, value); + return true; + } + + case 0x01: // KUSEG 512M-1024M + case 0x02: // KUSEG 1024M-1536M + case 0x03: // KUSEG 1536M-2048M + case 0x06: // KSEG2 + case 0x07: // KSEG2 + default: + { + return false; + } + } +} + bool ReadMemoryByte(VirtualMemoryAddress addr, u8* value) { u32 temp = 0; diff --git a/src/core/bus.h b/src/core/bus.h index a18ba1274..10c44f90e 100644 --- a/src/core/bus.h +++ b/src/core/bus.h @@ -78,41 +78,6 @@ extern std::bitset m_ram_code_bits; extern u8 g_ram[RAM_SIZE]; // 2MB RAM extern u8 g_bios[BIOS_SIZE]; // 512K BIOS ROM -/// Returns the address which should be used for code caching (i.e. removes mirrors). -ALWAYS_INLINE PhysicalMemoryAddress UnmirrorAddress(PhysicalMemoryAddress address) -{ - // RAM - if (address < 0x800000) - return address & UINT32_C(0x1FFFFF); - else - return address; -} - -/// Returns true if the address specified is cacheable (RAM or BIOS). -ALWAYS_INLINE bool IsCacheableAddress(PhysicalMemoryAddress address) -{ - return (address < RAM_MIRROR_END) || (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_SIZE)); -} - -/// Reads a cachable address (RAM or BIOS). -ALWAYS_INLINE u32 ReadCacheableAddress(PhysicalMemoryAddress address) -{ - u32 value; - if (address < RAM_MIRROR_END) - { - std::memcpy(&value, &g_ram[address & RAM_MASK], sizeof(value)); - return value; - } - else - { - std::memcpy(&value, &g_bios[address & BIOS_MASK], sizeof(value)); - return value; - } -} - -/// Returns true if the address specified is writable (RAM). -ALWAYS_INLINE bool IsRAMAddress(PhysicalMemoryAddress address) { return address < RAM_MIRROR_END; } - /// Flags a RAM region as code, so we know when to invalidate blocks. ALWAYS_INLINE void SetRAMCodePage(u32 index) { m_ram_code_bits[index] = true; } diff --git a/src/core/cpu_code_cache.cpp b/src/core/cpu_code_cache.cpp index b4fc78ebb..727f449d8 100644 --- a/src/core/cpu_code_cache.cpp +++ b/src/core/cpu_code_cache.cpp @@ -139,8 +139,7 @@ static void ExecuteImpl() { if (HasPendingInterrupt()) { - // TODO: Fill in m_next_instruction... - SafeReadMemoryWord(g_state.regs.pc, &g_state.next_instruction.bits); + SafeReadInstruction(g_state.regs.pc, &g_state.next_instruction.bits); DispatchInterrupt(); next_block_key = GetNextBlockKey(); } @@ -165,6 +164,9 @@ static void ExecuteImpl() LogCurrentState(); #endif + if (g_settings.cpu_recompiler_icache) + CheckAndUpdateICacheTags(block->icache_line_count, block->uncached_fetch_ticks); + InterpretCachedBlock(*block); if (g_state.pending_ticks >= g_state.downcount) @@ -247,7 +249,7 @@ void ExecuteRecompiler() { if (HasPendingInterrupt()) { - SafeReadMemoryWord(g_state.regs.pc, &g_state.next_instruction.bits); + SafeReadInstruction(g_state.regs.pc, &g_state.next_instruction.bits); DispatchInterrupt(); } @@ -351,7 +353,8 @@ bool RevalidateBlock(CodeBlock* block) { for (const CodeBlockInstruction& cbi : block->instructions) { - u32 new_code = Bus::ReadCacheableAddress(cbi.pc & PHYSICAL_MEMORY_ADDRESS_MASK); + u32 new_code = 0; + SafeReadInstruction(cbi.pc, &new_code); if (cbi.instruction.bits != new_code) { Log_DebugPrintf("Block 0x%08X changed at PC 0x%08X - %08X to %08X - recompiling.", block->GetPC(), cbi.pc, @@ -395,16 +398,12 @@ bool CompileBlock(CodeBlock* block) __debugbreak(); #endif + u32 last_cache_line = ICACHE_LINES; + for (;;) { CodeBlockInstruction cbi = {}; - - const PhysicalMemoryAddress phys_addr = pc & PHYSICAL_MEMORY_ADDRESS_MASK; - if (!Bus::IsCacheableAddress(phys_addr)) - break; - - cbi.instruction.bits = Bus::ReadCacheableAddress(phys_addr); - if (!IsInvalidInstruction(cbi.instruction)) + if (!SafeReadInstruction(pc, &cbi.instruction.bits) || !IsInvalidInstruction(cbi.instruction)) break; cbi.pc = pc; @@ -416,6 +415,18 @@ bool CompileBlock(CodeBlock* block) cbi.has_load_delay = InstructionHasLoadDelay(cbi.instruction); cbi.can_trap = CanInstructionTrap(cbi.instruction, InUserMode()); + if (g_settings.cpu_recompiler_icache) + { + const u32 icache_line = GetICacheLine(pc); + if (icache_line != last_cache_line) + { + block->icache_line_count++; + block->icache_line_count = GetICacheFillTicks(pc); + last_cache_line = icache_line; + } + block->uncached_fetch_ticks += GetInstructionReadTicks(pc); + } + // instruction is decoded now block->instructions.push_back(cbi); pc += sizeof(cbi.instruction.bits); diff --git a/src/core/cpu_code_cache.h b/src/core/cpu_code_cache.h index eec01ac3b..068e6706e 100644 --- a/src/core/cpu_code_cache.h +++ b/src/core/cpu_code_cache.h @@ -61,6 +61,8 @@ struct CodeBlock std::vector link_predecessors; std::vector link_successors; + TickCount uncached_fetch_ticks = 0; + u32 icache_line_count = 0; bool invalidated = false; const u32 GetPC() const { return key.GetPC(); } diff --git a/src/core/cpu_core.cpp b/src/core/cpu_core.cpp index ea90a1fb5..3dd02cd70 100644 --- a/src/core/cpu_core.cpp +++ b/src/core/cpu_core.cpp @@ -80,6 +80,8 @@ void Reset() g_state.cop0_regs.sr.bits = 0; g_state.cop0_regs.cause.bits = 0; + ClearICache(); + GTE::Reset(); SetPC(RESET_VECTOR); @@ -117,14 +119,17 @@ bool DoState(StateWrapper& sw) sw.Do(&g_state.load_delay_value); sw.Do(&g_state.next_load_delay_reg); sw.Do(&g_state.next_load_delay_value); - sw.Do(&g_state.cache_control); + sw.Do(&g_state.cache_control.bits); sw.DoBytes(g_state.dcache.data(), g_state.dcache.size()); if (!GTE::DoState(sw)) return false; if (sw.IsReading()) + { + ClearICache(); PGXP::Initialize(); + } return !sw.HasError(); } @@ -1416,7 +1421,6 @@ void InterpretCachedBlock(const CodeBlock& block) { // set up the state so we've already fetched the instruction DebugAssert(g_state.regs.pc == block.GetPC()); - g_state.regs.npc = block.GetPC() + 4; for (const CodeBlockInstruction& cbi : block.instructions) diff --git a/src/core/cpu_core.h b/src/core/cpu_core.h index 660596353..43c14c99a 100644 --- a/src/core/cpu_core.h +++ b/src/core/cpu_core.h @@ -19,7 +19,32 @@ enum : PhysicalMemoryAddress DCACHE_LOCATION = UINT32_C(0x1F800000), DCACHE_LOCATION_MASK = UINT32_C(0xFFFFFC00), DCACHE_OFFSET_MASK = UINT32_C(0x000003FF), - DCACHE_SIZE = UINT32_C(0x00000400) + DCACHE_SIZE = UINT32_C(0x00000400), + ICACHE_SIZE = UINT32_C(0x00001000), + ICACHE_SLOTS = ICACHE_SIZE / sizeof(u32), + ICACHE_LINE_SIZE = 16, + ICACHE_LINES = ICACHE_SIZE / ICACHE_LINE_SIZE, + ICACHE_SLOTS_PER_LINE = ICACHE_SLOTS / ICACHE_LINES, + ICACHE_TAG_ADDRESS_MASK = 0xFFFFFFF0u +}; + +enum : u32 +{ + ICACHE_DISABLED_BIT = 0x01, + ICACHE_INVALD_BIT = 0x02, +}; + +union CacheControl +{ + u32 bits; + + BitField lock_mode; + BitField invalidate_mode; + BitField tag_test_mode; + BitField dcache_scratchpad; + BitField dcache_enable; + BitField icache_fill_size; // actually dcache? icache always fills to 16 bytes + BitField icache_enable; }; struct State @@ -49,13 +74,15 @@ struct State Reg next_load_delay_reg = Reg::count; u32 next_load_delay_value = 0; - u32 cache_control = 0; + CacheControl cache_control{ 0 }; // GTE registers are stored here so we can access them on ARM with a single instruction GTE::Regs gte_regs = {}; // data cache (used as scratchpad) std::array dcache = {}; + std::array icache_tags = {}; + std::array icache_data = {}; }; extern State g_state; @@ -64,6 +91,7 @@ void Initialize(); void Shutdown(); void Reset(); bool DoState(StateWrapper& sw); +void ClearICache(); /// Executes interpreter loop. void Execute(); diff --git a/src/core/cpu_core_private.h b/src/core/cpu_core_private.h index 41ad24ec5..9f74fd7f0 100644 --- a/src/core/cpu_core_private.h +++ b/src/core/cpu_core_private.h @@ -34,8 +34,38 @@ ALWAYS_INLINE static void DispatchInterrupt() g_state.regs.pc); } +// icache stuff +ALWAYS_INLINE bool IsCachedAddress(VirtualMemoryAddress address) +{ + // KUSEG, KSEG0 + return (address >> 29) <= 4; +} +ALWAYS_INLINE u32 GetICacheLine(VirtualMemoryAddress address) +{ + return ((address >> 4) & 0xFFu); +} +ALWAYS_INLINE u32 GetICacheLineOffset(VirtualMemoryAddress address) +{ + return (address & (ICACHE_LINE_SIZE - 1)); +} +ALWAYS_INLINE u32 GetICacheTagForAddress(VirtualMemoryAddress address) +{ + return (address & ICACHE_TAG_ADDRESS_MASK); +} +ALWAYS_INLINE bool CompareICacheTag(VirtualMemoryAddress address) +{ + const u32 line = GetICacheLine(address); + return (g_state.icache_tags[line] == GetICacheTagForAddress(address)); +} + +TickCount GetInstructionReadTicks(VirtualMemoryAddress address); +TickCount GetICacheFillTicks(VirtualMemoryAddress address); +u32 FillICache(VirtualMemoryAddress address); +void CheckAndUpdateICacheTags(u32 line_count, TickCount uncached_ticks); + // defined in cpu_memory.cpp - memory access functions which return false if an exception was thrown. bool FetchInstruction(); +bool SafeReadInstruction(VirtualMemoryAddress addr, u32* value); bool ReadMemoryByte(VirtualMemoryAddress addr, u8* value); bool ReadMemoryHalfWord(VirtualMemoryAddress addr, u16* value); bool ReadMemoryWord(VirtualMemoryAddress addr, u32* value); diff --git a/src/core/cpu_recompiler_code_generator.cpp b/src/core/cpu_recompiler_code_generator.cpp index 1c7ae0db4..f350844a3 100644 --- a/src/core/cpu_recompiler_code_generator.cpp +++ b/src/core/cpu_recompiler_code_generator.cpp @@ -34,7 +34,7 @@ bool CodeGenerator::CompileBlock(const CodeBlock* block, CodeBlock::HostCodePoin const CodeBlockInstruction* cbi = m_block_start; while (cbi != m_block_end) { -#ifndef Y_BUILD_CONFIG_RELEASE +#ifdef _DEBUG SmallString disasm; DisassembleInstruction(&disasm, cbi->pc, cbi->instruction.bits, nullptr); Log_DebugPrintf("Compiling instruction '%s'", disasm.GetCharArray()); @@ -840,6 +840,9 @@ void CodeGenerator::BlockPrologue() { EmitStoreCPUStructField(offsetof(State, exception_raised), Value::FromConstantU8(0)); + if (m_block->uncached_fetch_ticks > 0) + EmitICacheCheckAndUpdate(); + // we don't know the state of the last block, so assume load delays might be in progress // TODO: Pull load delay into register cache m_current_instruction_in_branch_delay_slot_dirty = true; diff --git a/src/core/cpu_recompiler_code_generator.h b/src/core/cpu_recompiler_code_generator.h index b59992330..438786bd3 100644 --- a/src/core/cpu_recompiler_code_generator.h +++ b/src/core/cpu_recompiler_code_generator.h @@ -61,6 +61,7 @@ public: void EmitFlushInterpreterLoadDelay(); void EmitMoveNextInterpreterLoadDelay(); void EmitCancelInterpreterLoadDelayForReg(Reg reg); + void EmitICacheCheckAndUpdate(); void EmitLoadCPUStructField(HostReg host_reg, RegSize size, u32 offset); void EmitStoreCPUStructField(u32 offset, const Value& value); void EmitAddCPUStructField(u32 offset, const Value& value); diff --git a/src/core/cpu_recompiler_code_generator_generic.cpp b/src/core/cpu_recompiler_code_generator_generic.cpp index b652cb24b..c9e9a7ee6 100644 --- a/src/core/cpu_recompiler_code_generator_generic.cpp +++ b/src/core/cpu_recompiler_code_generator_generic.cpp @@ -22,4 +22,48 @@ void CodeGenerator::EmitStoreInterpreterLoadDelay(Reg reg, const Value& value) m_load_delay_dirty = true; } +#ifndef CPU_X64 + +void CodeGenerator::EmitICacheCheckAndUpdate() +{ + Value pc = CalculatePC(); + Value temp = m_register_cache.AllocateScratch(RegSize_32); + m_register_cache.InhibitAllocation(); + + EmitShr(temp.GetHostRegister(), pc.GetHostRegister(), RegSize_32, Value::FromConstantU32(29)); + LabelType is_cached; + LabelType ready_to_execute; + EmitConditionalBranch(Condition::LessEqual, false, temp.GetHostRegister(), Value::FromConstantU32(4), &is_cached); + EmitAddCPUStructField(offsetof(State, pending_ticks), + Value::FromConstantU32(static_cast(m_block->uncached_fetch_ticks))); + EmitBranch(&ready_to_execute); + EmitBindLabel(&is_cached); + + // cached path + EmitAnd(pc.GetHostRegister(), pc.GetHostRegister(), Value::FromConstantU32(ICACHE_TAG_ADDRESS_MASK)); + VirtualMemoryAddress current_address = (m_block->instructions[0].pc & ICACHE_TAG_ADDRESS_MASK); + for (u32 i = 0; i < m_block->icache_line_count; i++, current_address += ICACHE_LINE_SIZE) + { + const TickCount fill_ticks = GetICacheFillTicks(current_address); + if (fill_ticks <= 0) + continue; + + const u32 line = GetICacheLine(current_address); + const u32 offset = offsetof(State, icache_tags) + (line * sizeof(u32)); + LabelType cache_hit; + + EmitLoadCPUStructField(temp.GetHostRegister(), RegSize_32, offset); + EmitConditionalBranch(Condition::Equal, false, temp.GetHostRegister(), pc, &cache_hit); + EmitAddCPUStructField(offsetof(State, pending_ticks), Value::FromConstantU32(static_cast(fill_ticks))); + EmitStoreCPUStructField(offset, pc); + EmitBindLabel(&cache_hit); + EmitAdd(pc.GetHostRegister(), pc.GetHostRegister(), Value::FromConstantU32(ICACHE_LINE_SIZE), false); + } + + EmitBindLabel(&ready_to_execute); + m_register_cache.UnunhibitAllocation(); +} + +#endif + } // namespace CPU::Recompiler diff --git a/src/core/cpu_recompiler_code_generator_x64.cpp b/src/core/cpu_recompiler_code_generator_x64.cpp index e6f85e3e6..fd2f34035 100644 --- a/src/core/cpu_recompiler_code_generator_x64.cpp +++ b/src/core/cpu_recompiler_code_generator_x64.cpp @@ -2187,6 +2187,52 @@ void CodeGenerator::EmitCancelInterpreterLoadDelayForReg(Reg reg) m_emit->L(skip_cancel); } +void CodeGenerator::EmitICacheCheckAndUpdate() +{ + Value pc = CalculatePC(); + Value seg = m_register_cache.AllocateScratch(RegSize_32); + m_register_cache.InhibitAllocation(); + + m_emit->mov(GetHostReg32(seg), GetHostReg32(pc)); + m_emit->shr(GetHostReg32(seg), 29); + + Xbyak::Label is_cached; + m_emit->cmp(GetHostReg32(seg), 4); + m_emit->jle(is_cached); + + // uncached + Xbyak::Label done; + m_emit->add(m_emit->dword[GetCPUPtrReg() + offsetof(State, pending_ticks)], + static_cast(m_block->uncached_fetch_ticks)); + m_emit->jmp(done, Xbyak::CodeGenerator::T_NEAR); + + // cached + m_emit->L(is_cached); + m_emit->and_(GetHostReg32(pc), ICACHE_TAG_ADDRESS_MASK); + + VirtualMemoryAddress current_address = (m_block->instructions[0].pc & ICACHE_TAG_ADDRESS_MASK); + for (u32 i = 0; i < m_block->icache_line_count; i++, current_address += ICACHE_LINE_SIZE) + { + const TickCount fill_ticks = GetICacheFillTicks(current_address); + if (fill_ticks <= 0) + continue; + + const u32 line = GetICacheLine(current_address); + const u32 offset = offsetof(State, icache_tags) + (line * sizeof(u32)); + Xbyak::Label cache_hit; + + m_emit->cmp(GetHostReg32(pc), m_emit->dword[GetCPUPtrReg() + offset]); + m_emit->je(cache_hit); + m_emit->mov(m_emit->dword[GetCPUPtrReg() + offset], GetHostReg32(pc)); + m_emit->add(m_emit->dword[GetCPUPtrReg() + offsetof(State, pending_ticks)], static_cast(fill_ticks)); + m_emit->L(cache_hit); + m_emit->add(GetHostReg32(pc), ICACHE_LINE_SIZE); + } + + m_emit->L(done); + m_register_cache.UnunhibitAllocation(); +} + void CodeGenerator::EmitBranch(const void* address, bool allow_scratch) { const s64 jump_distance = diff --git a/src/core/cpu_recompiler_thunks.h b/src/core/cpu_recompiler_thunks.h index 602f522af..f698a859d 100644 --- a/src/core/cpu_recompiler_thunks.h +++ b/src/core/cpu_recompiler_thunks.h @@ -14,6 +14,7 @@ namespace Recompiler::Thunks { ////////////////////////////////////////////////////////////////////////// bool InterpretInstruction(); bool InterpretInstructionPGXP(); +void CheckAndUpdateICache(u32 pc, u32 line_count); // Memory access functions for the JIT - MSB is set on exception. u64 ReadMemoryByte(u32 address); diff --git a/src/core/host_interface.cpp b/src/core/host_interface.cpp index db8efc625..9322a5f62 100644 --- a/src/core/host_interface.cpp +++ b/src/core/host_interface.cpp @@ -362,6 +362,7 @@ void HostInterface::SetDefaultSettings(SettingsInterface& si) si.SetStringValue("CPU", "ExecutionMode", Settings::GetCPUExecutionModeName(Settings::DEFAULT_CPU_EXECUTION_MODE)); si.SetBoolValue("CPU", "RecompilerMemoryExceptions", false); + si.SetBoolValue("CPU", "ICache", false); si.SetStringValue("GPU", "Renderer", Settings::GetRendererName(Settings::DEFAULT_GPU_RENDERER)); si.SetIntValue("GPU", "ResolutionScale", 1); @@ -452,7 +453,8 @@ void HostInterface::FixIncompatibleSettings(bool display_osd_messages) { if (display_osd_messages) { - AddOSDMessage(TranslateStdString("OSDMessage", "PGXP is incompatible with the software renderer, disabling PGXP."), 10.0f); + AddOSDMessage( + TranslateStdString("OSDMessage", "PGXP is incompatible with the software renderer, disabling PGXP."), 10.0f); } g_settings.gpu_pgxp_enable = false; } @@ -510,6 +512,8 @@ void HostInterface::CheckForSettingsChanges(const Settings& old_settings) AddFormattedOSDMessage(5.0f, "Switching to %s CPU execution mode.", Settings::GetCPUExecutionModeName(g_settings.cpu_execution_mode)); CPU::CodeCache::SetUseRecompiler(g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler); + CPU::CodeCache::Flush(); + CPU::ClearICache(); } if (g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler && @@ -520,6 +524,15 @@ void HostInterface::CheckForSettingsChanges(const Settings& old_settings) CPU::CodeCache::Flush(); } + if (g_settings.cpu_execution_mode != CPUExecutionMode::Interpreter && + g_settings.cpu_recompiler_icache != old_settings.cpu_recompiler_icache) + { + AddFormattedOSDMessage(5.0f, "CPU ICache %s, flushing all blocks.", + g_settings.cpu_recompiler_icache ? "enabled" : "disabled"); + CPU::CodeCache::Flush(); + CPU::ClearICache(); + } + m_audio_stream->SetOutputVolume(g_settings.audio_output_muted ? 0 : g_settings.audio_output_volume); if (g_settings.gpu_resolution_scale != old_settings.gpu_resolution_scale || diff --git a/src/core/settings.cpp b/src/core/settings.cpp index e0254e3c8..21cdbc4b1 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp @@ -92,6 +92,7 @@ void Settings::Load(SettingsInterface& si) si.GetStringValue("CPU", "ExecutionMode", GetCPUExecutionModeName(DEFAULT_CPU_EXECUTION_MODE)).c_str()) .value_or(DEFAULT_CPU_EXECUTION_MODE); cpu_recompiler_memory_exceptions = si.GetBoolValue("CPU", "RecompilerMemoryExceptions", false); + cpu_recompiler_icache = si.GetBoolValue("CPU", "RecompilerICache", false); gpu_renderer = ParseRendererName(si.GetStringValue("GPU", "Renderer", GetRendererName(DEFAULT_GPU_RENDERER)).c_str()) .value_or(DEFAULT_GPU_RENDERER); @@ -206,6 +207,7 @@ void Settings::Save(SettingsInterface& si) const si.SetStringValue("CPU", "ExecutionMode", GetCPUExecutionModeName(cpu_execution_mode)); si.SetBoolValue("CPU", "RecompilerMemoryExceptions", cpu_recompiler_memory_exceptions); + si.SetBoolValue("CPU", "RecompilerICache", cpu_recompiler_icache); si.SetStringValue("GPU", "Renderer", GetRendererName(gpu_renderer)); si.SetStringValue("GPU", "Adapter", gpu_adapter.c_str()); diff --git a/src/core/settings.h b/src/core/settings.h index 91e26d5b4..5b98bde8e 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -69,6 +69,7 @@ struct Settings CPUExecutionMode cpu_execution_mode = CPUExecutionMode::Interpreter; bool cpu_recompiler_memory_exceptions = false; + bool cpu_recompiler_icache = false; float emulation_speed = 1.0f; bool speed_limiter_enabled = true; diff --git a/src/duckstation-libretro/libretro_host_interface.cpp b/src/duckstation-libretro/libretro_host_interface.cpp index a8fc3660b..8388ae868 100644 --- a/src/duckstation-libretro/libretro_host_interface.cpp +++ b/src/duckstation-libretro/libretro_host_interface.cpp @@ -370,7 +370,7 @@ void LibretroHostInterface::OnSystemDestroyed() m_using_hardware_renderer = false; } -static std::array s_option_definitions = {{ +static std::array s_option_definitions = {{ {"duckstation_Console.Region", "Console Region", "Determines which region/hardware to emulate. Auto-Detect will use the region of the disc inserted.", @@ -406,6 +406,12 @@ static std::array s_option_definitions = {{ "Which mode to use for CPU emulation. Recompiler provides the best performance.", {{"Interpreter", "Interpreter"}, {"CachedIntepreter", "Cached Interpreter"}, {"Recompiler", "Recompiler"}}, "Recompiler"}, + {"duckstation_CPU.RecompilerICache", + "CPU Recompiler ICache", + "Determines whether the CPU's instruction cache is simulated in the recompiler. Improves accuracy at a small cost " + "to performance. If games are running too fast, try enabling this option.", + {{"true", "Enabled"}, {"false", "Disabled"}}, + "false"}, {"duckstation_GPU.Renderer", "GPU Renderer", "Which renderer to use to emulate the GPU", diff --git a/src/duckstation-qt/advancedsettingswidget.cpp b/src/duckstation-qt/advancedsettingswidget.cpp index 2b4d93a32..837485555 100644 --- a/src/duckstation-qt/advancedsettingswidget.cpp +++ b/src/duckstation-qt/advancedsettingswidget.cpp @@ -27,6 +27,8 @@ AdvancedSettingsWidget::AdvancedSettingsWidget(QtHostInterface* host_interface, SettingWidgetBinder::BindWidgetToIntSetting(m_host_interface, m_ui.gpuMaxRunAhead, "Hacks", "GPUMaxRunAhead"); SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.cpuRecompilerMemoryExceptions, "CPU", "RecompilerMemoryExceptions", false); + SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.cpuRecompilerICache, "CPU", "RecompilerICache", + false); SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.showDebugMenu, "Main", "ShowDebugMenu"); SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.gpuUseDebugDevice, "GPU", "UseDebugDevice"); @@ -38,6 +40,10 @@ AdvancedSettingsWidget::AdvancedSettingsWidget(QtHostInterface* host_interface, dialog->registerWidgetHelp(m_ui.gpuUseDebugDevice, tr("Use Debug Host GPU Device"), tr("Unchecked"), tr("Enables the usage of debug devices and shaders for rendering APIs which support them. " "Should only be used when debugging the emulator.")); + dialog->registerWidgetHelp( + m_ui.cpuRecompilerICache, tr("Enable Recompiler ICache"), tr("Unchecked"), + tr("Determines whether the CPU's instruction cache is simulated in the recompiler. Improves accuracy at a small " + "cost to performance. If games are running too fast, try enabling this option.")); } AdvancedSettingsWidget::~AdvancedSettingsWidget() = default; diff --git a/src/duckstation-qt/advancedsettingswidget.ui b/src/duckstation-qt/advancedsettingswidget.ui index d1ba75000..d75529513 100644 --- a/src/duckstation-qt/advancedsettingswidget.ui +++ b/src/duckstation-qt/advancedsettingswidget.ui @@ -184,6 +184,20 @@ + + + + Enable Recompiler Memory Exceptions + + + + + + + Enable Recompiler ICache + + + @@ -191,13 +205,6 @@ - - - - Enable Recompiler Memory Exceptions - - - diff --git a/src/duckstation-sdl/sdl_host_interface.cpp b/src/duckstation-sdl/sdl_host_interface.cpp index 84b20e750..23d49de29 100644 --- a/src/duckstation-sdl/sdl_host_interface.cpp +++ b/src/duckstation-sdl/sdl_host_interface.cpp @@ -950,6 +950,11 @@ void SDLHostInterface::DrawDebugMenu() settings_changed |= ImGui::MenuItem("Show Timers State", nullptr, &debug_settings.show_timers_state); settings_changed |= ImGui::MenuItem("Show MDEC State", nullptr, &debug_settings.show_mdec_state); + ImGui::Separator(); + + settings_changed |= ImGui::MenuItem("Recompiler Memory Exceptions", nullptr, &m_settings_copy.cpu_recompiler_memory_exceptions); + settings_changed |= ImGui::MenuItem("Recompiler ICache", nullptr, &m_settings_copy.cpu_recompiler_icache); + if (settings_changed) { // have to apply it to the copy too, otherwise it won't save