mirror of
				https://github.com/RetroDECK/Duckstation.git
				synced 2025-04-10 19:15:14 +00:00 
			
		
		
		
	CPU: Implement instruction cache simulation
Implemented for all execution modes. Disabled by default in the cached interpreter and recompiler, always enabled in the pure interpreter.
This commit is contained in:
		
							parent
							
								
									efc00a2d0e
								
							
						
					
					
						commit
						19d6037b99
					
				
							
								
								
									
										229
									
								
								src/core/bus.cpp
									
									
									
									
									
								
							
							
						
						
									
										229
									
								
								src/core/bus.cpp
									
									
									
									
									
								
							|  | @ -742,10 +742,153 @@ ALWAYS_INLINE static TickCount DoDMAAccess(u32 offset, u32& value) | |||
| 
 | ||||
| namespace CPU { | ||||
| 
 | ||||
| template<bool add_ticks, bool icache_read = false, u32 word_count = 1> | ||||
| ALWAYS_INLINE_RELEASE void DoInstructionRead(PhysicalMemoryAddress address, void* data) | ||||
| { | ||||
|   using namespace Bus; | ||||
| 
 | ||||
|   address &= PHYSICAL_MEMORY_ADDRESS_MASK; | ||||
| 
 | ||||
|   if (address < RAM_MIRROR_END) | ||||
|   { | ||||
|     std::memcpy(data, &g_ram[address & RAM_MASK], sizeof(u32) * word_count); | ||||
|     if constexpr (add_ticks) | ||||
|       g_state.pending_ticks += (icache_read ? 1 : 4) * word_count; | ||||
|   } | ||||
|   else if (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_SIZE)) | ||||
|   { | ||||
|     std::memcpy(data, &g_bios[(address - BIOS_BASE) & BIOS_MASK], sizeof(u32)); | ||||
|     if constexpr (add_ticks) | ||||
|       g_state.pending_ticks += m_bios_access_time[static_cast<u32>(MemoryAccessSize::Word)] * word_count; | ||||
|   } | ||||
|   else | ||||
|   { | ||||
|     CPU::RaiseException(address, Cop0Registers::CAUSE::MakeValueForException(Exception::IBE, false, false, 0)); | ||||
|     std::memset(data, 0, sizeof(u32) * word_count); | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| TickCount GetInstructionReadTicks(VirtualMemoryAddress address) | ||||
| { | ||||
|   using namespace Bus; | ||||
| 
 | ||||
|   address &= PHYSICAL_MEMORY_ADDRESS_MASK; | ||||
| 
 | ||||
|   if (address < RAM_MIRROR_END) | ||||
|   { | ||||
|     return 4; | ||||
|   } | ||||
|   else if (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_SIZE)) | ||||
|   { | ||||
|     return m_bios_access_time[static_cast<u32>(MemoryAccessSize::Word)]; | ||||
|   } | ||||
|   else | ||||
|   { | ||||
|     return 0; | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| TickCount GetICacheFillTicks(VirtualMemoryAddress address) | ||||
| { | ||||
|   using namespace Bus; | ||||
| 
 | ||||
|   address &= PHYSICAL_MEMORY_ADDRESS_MASK; | ||||
| 
 | ||||
|   if (address < RAM_MIRROR_END) | ||||
|   { | ||||
|     return 1 * (ICACHE_LINE_SIZE / sizeof(u32)); | ||||
|   } | ||||
|   else if (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_SIZE)) | ||||
|   { | ||||
|     return m_bios_access_time[static_cast<u32>(MemoryAccessSize::Word)] * (ICACHE_LINE_SIZE / sizeof(u32)); | ||||
|   } | ||||
|   else | ||||
|   { | ||||
|     return 0; | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| void CheckAndUpdateICacheTags(u32 line_count, TickCount uncached_ticks) | ||||
| { | ||||
|   VirtualMemoryAddress current_pc = g_state.regs.pc & ICACHE_TAG_ADDRESS_MASK; | ||||
|   if (IsCachedAddress(current_pc)) | ||||
|   { | ||||
|     TickCount ticks = 0; | ||||
|     TickCount cached_ticks_per_line = GetICacheFillTicks(current_pc); | ||||
|     for (u32 i = 0; i < line_count; i++, current_pc += ICACHE_LINE_SIZE) | ||||
|     { | ||||
|       const u32 line = GetICacheLine(current_pc); | ||||
|       if (g_state.icache_tags[line] != current_pc) | ||||
|       { | ||||
|         g_state.icache_tags[line] = current_pc; | ||||
|         ticks += cached_ticks_per_line; | ||||
|       } | ||||
|     } | ||||
| 
 | ||||
|     g_state.pending_ticks += ticks; | ||||
|   } | ||||
|   else | ||||
|   { | ||||
|     g_state.pending_ticks += uncached_ticks; | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| u32 FillICache(VirtualMemoryAddress address) | ||||
| { | ||||
|   const u32 line = GetICacheLine(address); | ||||
|   g_state.icache_tags[line] = GetICacheTagForAddress(address); | ||||
|   u8* line_data = &g_state.icache_data[line * ICACHE_LINE_SIZE]; | ||||
|   DoInstructionRead<true, true, 4>(address & ~(ICACHE_LINE_SIZE - 1u), line_data); | ||||
| 
 | ||||
|   const u32 offset = GetICacheLineOffset(address); | ||||
|   u32 result; | ||||
|   std::memcpy(&result, &line_data[offset], sizeof(result)); | ||||
|   return result; | ||||
| } | ||||
| 
 | ||||
| void ClearICache() | ||||
| { | ||||
|   std::memset(g_state.icache_data.data(), 0, ICACHE_SIZE); | ||||
|   g_state.icache_tags.fill(ICACHE_INVALD_BIT | ICACHE_DISABLED_BIT); | ||||
| } | ||||
| 
 | ||||
| ALWAYS_INLINE_RELEASE static u32 ReadICache(VirtualMemoryAddress address) | ||||
| { | ||||
|   const u32 line = GetICacheLine(address); | ||||
|   const u8* line_data = &g_state.icache_data[line * ICACHE_LINE_SIZE]; | ||||
|   const u32 offset = GetICacheLineOffset(address); | ||||
|   u32 result; | ||||
|   std::memcpy(&result, &line_data[offset], sizeof(result)); | ||||
|   return result; | ||||
| } | ||||
| 
 | ||||
| ALWAYS_INLINE_RELEASE static void WriteICache(VirtualMemoryAddress address, u32 value) | ||||
| { | ||||
|   const u32 line = GetICacheLine(address); | ||||
|   const u32 offset = GetICacheLineOffset(address); | ||||
|   g_state.icache_tags[line] = GetICacheTagForAddress(address) | ICACHE_INVALD_BIT; | ||||
|   std::memcpy(&g_state.icache_data[line * ICACHE_LINE_SIZE + offset], &value, sizeof(value)); | ||||
| } | ||||
| 
 | ||||
| static void WriteCacheControl(u32 value) | ||||
| { | ||||
|   Log_WarningPrintf("Cache control <- 0x%08X", value); | ||||
|   g_state.cache_control = value; | ||||
| 
 | ||||
|   CacheControl changed_bits{g_state.cache_control.bits ^ value}; | ||||
|   g_state.cache_control.bits = value; | ||||
|   if (changed_bits.icache_enable) | ||||
|   { | ||||
|     if (g_state.cache_control.icache_enable) | ||||
|     { | ||||
|       for (u32 i = 0; i < ICACHE_LINES; i++) | ||||
|         g_state.icache_tags[i] &= ~ICACHE_DISABLED_BIT; | ||||
|     } | ||||
|     else | ||||
|     { | ||||
|       for (u32 i = 0; i < ICACHE_LINES; i++) | ||||
|         g_state.icache_tags[i] |= ICACHE_DISABLED_BIT; | ||||
|     } | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| template<MemoryAccessType type, MemoryAccessSize size> | ||||
|  | @ -797,7 +940,10 @@ static ALWAYS_INLINE TickCount DoMemoryAccess(VirtualMemoryAddress address, u32& | |||
|       if constexpr (type == MemoryAccessType::Write) | ||||
|       { | ||||
|         if (g_state.cop0_regs.sr.Isc) | ||||
|         { | ||||
|           WriteICache(address, value); | ||||
|           return 0; | ||||
|         } | ||||
|       } | ||||
| 
 | ||||
|       address &= PHYSICAL_MEMORY_ADDRESS_MASK; | ||||
|  | @ -829,7 +975,7 @@ static ALWAYS_INLINE TickCount DoMemoryAccess(VirtualMemoryAddress address, u32& | |||
|       if (address == 0xFFFE0130) | ||||
|       { | ||||
|         if constexpr (type == MemoryAccessType::Read) | ||||
|           value = g_state.cache_control; | ||||
|           value = g_state.cache_control.bits; | ||||
|         else | ||||
|           WriteCacheControl(value); | ||||
| 
 | ||||
|  | @ -849,6 +995,10 @@ static ALWAYS_INLINE TickCount DoMemoryAccess(VirtualMemoryAddress address, u32& | |||
|   { | ||||
|     return DoRAMAccess<type, size>(address, value); | ||||
|   } | ||||
|   else if (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_SIZE)) | ||||
|   { | ||||
|     return DoBIOSAccess<type, size>(static_cast<u32>(address - BIOS_BASE), value); | ||||
|   } | ||||
|   else if (address < EXP1_BASE) | ||||
|   { | ||||
|     return DoInvalidAccess(type, size, address, value); | ||||
|  | @ -921,14 +1071,6 @@ static ALWAYS_INLINE TickCount DoMemoryAccess(VirtualMemoryAddress address, u32& | |||
|   { | ||||
|     return DoEXP2Access<type, size>(address & EXP2_MASK, value); | ||||
|   } | ||||
|   else if (address < BIOS_BASE) | ||||
|   { | ||||
|     return DoInvalidAccess(type, size, address, value); | ||||
|   } | ||||
|   else if (address < (BIOS_BASE + BIOS_SIZE)) | ||||
|   { | ||||
|     return DoBIOSAccess<type, size>(static_cast<u32>(address - BIOS_BASE), value); | ||||
|   } | ||||
|   else | ||||
|   { | ||||
|     return DoInvalidAccess(type, size, address, value); | ||||
|  | @ -961,12 +1103,45 @@ static bool DoAlignmentCheck(VirtualMemoryAddress address) | |||
| bool FetchInstruction() | ||||
| { | ||||
|   DebugAssert(Common::IsAlignedPow2(g_state.regs.npc, 4)); | ||||
|   if (DoMemoryAccess<MemoryAccessType::Read, MemoryAccessSize::Word>(g_state.regs.npc, g_state.next_instruction.bits) < | ||||
|       0) | ||||
| 
 | ||||
|   using namespace Bus; | ||||
| 
 | ||||
|   PhysicalMemoryAddress address = g_state.regs.npc; | ||||
|   switch (address >> 29) | ||||
|   { | ||||
|     // Bus errors don't set BadVaddr.
 | ||||
|     RaiseException(g_state.regs.npc, Cop0Registers::CAUSE::MakeValueForException(Exception::IBE, false, false, 0)); | ||||
|     return false; | ||||
|     case 0x00: // KUSEG 0M-512M
 | ||||
|     case 0x04: // KSEG0 - physical memory cached
 | ||||
|     { | ||||
| #if 0 | ||||
|       // TODO: icache
 | ||||
|       TickCount cycles; | ||||
|       DoInstructionRead(address, cycles, g_state.next_instruction.bits); | ||||
| #else | ||||
|       if (CompareICacheTag(address)) | ||||
|         g_state.next_instruction.bits = ReadICache(address); | ||||
|       else | ||||
|         g_state.next_instruction.bits = FillICache(address); | ||||
| 
 | ||||
| #endif | ||||
|     } | ||||
|     break; | ||||
| 
 | ||||
|     case 0x05: // KSEG1 - physical memory uncached
 | ||||
|     { | ||||
|       DoInstructionRead<true, false, 1>(address, &g_state.next_instruction.bits); | ||||
|     } | ||||
|     break; | ||||
| 
 | ||||
|     case 0x01: // KUSEG 512M-1024M
 | ||||
|     case 0x02: // KUSEG 1024M-1536M
 | ||||
|     case 0x03: // KUSEG 1536M-2048M
 | ||||
|     case 0x06: // KSEG2
 | ||||
|     case 0x07: // KSEG2
 | ||||
|     default: | ||||
|     { | ||||
|       CPU::RaiseException(address, Cop0Registers::CAUSE::MakeValueForException(Exception::IBE, false, false, 0)); | ||||
|       return false; | ||||
|     } | ||||
|   } | ||||
| 
 | ||||
|   g_state.regs.pc = g_state.regs.npc; | ||||
|  | @ -974,6 +1149,30 @@ bool FetchInstruction() | |||
|   return true; | ||||
| } | ||||
| 
 | ||||
| bool SafeReadInstruction(VirtualMemoryAddress addr, u32* value) | ||||
| { | ||||
|   switch (addr >> 29) | ||||
|   { | ||||
|     case 0x00: // KUSEG 0M-512M
 | ||||
|     case 0x04: // KSEG0 - physical memory cached
 | ||||
|     case 0x05: // KSEG1 - physical memory uncached
 | ||||
|     { | ||||
|       DoInstructionRead<false, false, 1>(addr, value); | ||||
|       return true; | ||||
|     } | ||||
| 
 | ||||
|     case 0x01: // KUSEG 512M-1024M
 | ||||
|     case 0x02: // KUSEG 1024M-1536M
 | ||||
|     case 0x03: // KUSEG 1536M-2048M
 | ||||
|     case 0x06: // KSEG2
 | ||||
|     case 0x07: // KSEG2
 | ||||
|     default: | ||||
|     { | ||||
|       return false; | ||||
|     } | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| bool ReadMemoryByte(VirtualMemoryAddress addr, u8* value) | ||||
| { | ||||
|   u32 temp = 0; | ||||
|  |  | |||
|  | @ -78,41 +78,6 @@ extern std::bitset<CPU_CODE_CACHE_PAGE_COUNT> m_ram_code_bits; | |||
| extern u8 g_ram[RAM_SIZE];   // 2MB RAM
 | ||||
| extern u8 g_bios[BIOS_SIZE]; // 512K BIOS ROM
 | ||||
| 
 | ||||
| /// Returns the address which should be used for code caching (i.e. removes mirrors).
 | ||||
| ALWAYS_INLINE PhysicalMemoryAddress UnmirrorAddress(PhysicalMemoryAddress address) | ||||
| { | ||||
|   // RAM
 | ||||
|   if (address < 0x800000) | ||||
|     return address & UINT32_C(0x1FFFFF); | ||||
|   else | ||||
|     return address; | ||||
| } | ||||
| 
 | ||||
| /// Returns true if the address specified is cacheable (RAM or BIOS).
 | ||||
| ALWAYS_INLINE bool IsCacheableAddress(PhysicalMemoryAddress address) | ||||
| { | ||||
|   return (address < RAM_MIRROR_END) || (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_SIZE)); | ||||
| } | ||||
| 
 | ||||
| /// Reads a cachable address (RAM or BIOS).
 | ||||
| ALWAYS_INLINE u32 ReadCacheableAddress(PhysicalMemoryAddress address) | ||||
| { | ||||
|   u32 value; | ||||
|   if (address < RAM_MIRROR_END) | ||||
|   { | ||||
|     std::memcpy(&value, &g_ram[address & RAM_MASK], sizeof(value)); | ||||
|     return value; | ||||
|   } | ||||
|   else | ||||
|   { | ||||
|     std::memcpy(&value, &g_bios[address & BIOS_MASK], sizeof(value)); | ||||
|     return value; | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| /// Returns true if the address specified is writable (RAM).
 | ||||
| ALWAYS_INLINE bool IsRAMAddress(PhysicalMemoryAddress address) { return address < RAM_MIRROR_END; } | ||||
| 
 | ||||
| /// Flags a RAM region as code, so we know when to invalidate blocks.
 | ||||
| ALWAYS_INLINE void SetRAMCodePage(u32 index) { m_ram_code_bits[index] = true; } | ||||
| 
 | ||||
|  |  | |||
|  | @ -139,8 +139,7 @@ static void ExecuteImpl() | |||
|     { | ||||
|       if (HasPendingInterrupt()) | ||||
|       { | ||||
|         // TODO: Fill in m_next_instruction...
 | ||||
|         SafeReadMemoryWord(g_state.regs.pc, &g_state.next_instruction.bits); | ||||
|         SafeReadInstruction(g_state.regs.pc, &g_state.next_instruction.bits); | ||||
|         DispatchInterrupt(); | ||||
|         next_block_key = GetNextBlockKey(); | ||||
|       } | ||||
|  | @ -165,6 +164,9 @@ static void ExecuteImpl() | |||
|       LogCurrentState(); | ||||
| #endif | ||||
| 
 | ||||
|       if (g_settings.cpu_recompiler_icache) | ||||
|         CheckAndUpdateICacheTags(block->icache_line_count, block->uncached_fetch_ticks); | ||||
| 
 | ||||
|       InterpretCachedBlock<pgxp_mode>(*block); | ||||
| 
 | ||||
|       if (g_state.pending_ticks >= g_state.downcount) | ||||
|  | @ -247,7 +249,7 @@ void ExecuteRecompiler() | |||
|     { | ||||
|       if (HasPendingInterrupt()) | ||||
|       { | ||||
|         SafeReadMemoryWord(g_state.regs.pc, &g_state.next_instruction.bits); | ||||
|         SafeReadInstruction(g_state.regs.pc, &g_state.next_instruction.bits); | ||||
|         DispatchInterrupt(); | ||||
|       } | ||||
| 
 | ||||
|  | @ -351,7 +353,8 @@ bool RevalidateBlock(CodeBlock* block) | |||
| { | ||||
|   for (const CodeBlockInstruction& cbi : block->instructions) | ||||
|   { | ||||
|     u32 new_code = Bus::ReadCacheableAddress(cbi.pc & PHYSICAL_MEMORY_ADDRESS_MASK); | ||||
|     u32 new_code = 0; | ||||
|     SafeReadInstruction(cbi.pc, &new_code); | ||||
|     if (cbi.instruction.bits != new_code) | ||||
|     { | ||||
|       Log_DebugPrintf("Block 0x%08X changed at PC 0x%08X - %08X to %08X - recompiling.", block->GetPC(), cbi.pc, | ||||
|  | @ -395,16 +398,12 @@ bool CompileBlock(CodeBlock* block) | |||
|     __debugbreak(); | ||||
| #endif | ||||
| 
 | ||||
|   u32 last_cache_line = ICACHE_LINES; | ||||
| 
 | ||||
|   for (;;) | ||||
|   { | ||||
|     CodeBlockInstruction cbi = {}; | ||||
| 
 | ||||
|     const PhysicalMemoryAddress phys_addr = pc & PHYSICAL_MEMORY_ADDRESS_MASK; | ||||
|     if (!Bus::IsCacheableAddress(phys_addr)) | ||||
|       break; | ||||
| 
 | ||||
|     cbi.instruction.bits = Bus::ReadCacheableAddress(phys_addr); | ||||
|     if (!IsInvalidInstruction(cbi.instruction)) | ||||
|     if (!SafeReadInstruction(pc, &cbi.instruction.bits) || !IsInvalidInstruction(cbi.instruction)) | ||||
|       break; | ||||
| 
 | ||||
|     cbi.pc = pc; | ||||
|  | @ -416,6 +415,18 @@ bool CompileBlock(CodeBlock* block) | |||
|     cbi.has_load_delay = InstructionHasLoadDelay(cbi.instruction); | ||||
|     cbi.can_trap = CanInstructionTrap(cbi.instruction, InUserMode()); | ||||
| 
 | ||||
|     if (g_settings.cpu_recompiler_icache) | ||||
|     { | ||||
|       const u32 icache_line = GetICacheLine(pc); | ||||
|       if (icache_line != last_cache_line) | ||||
|       { | ||||
|         block->icache_line_count++; | ||||
|         block->icache_line_count = GetICacheFillTicks(pc); | ||||
|         last_cache_line = icache_line; | ||||
|       } | ||||
|       block->uncached_fetch_ticks += GetInstructionReadTicks(pc); | ||||
|     } | ||||
| 
 | ||||
|     // instruction is decoded now
 | ||||
|     block->instructions.push_back(cbi); | ||||
|     pc += sizeof(cbi.instruction.bits); | ||||
|  |  | |||
|  | @ -61,6 +61,8 @@ struct CodeBlock | |||
|   std::vector<CodeBlock*> link_predecessors; | ||||
|   std::vector<CodeBlock*> link_successors; | ||||
| 
 | ||||
|   TickCount uncached_fetch_ticks = 0; | ||||
|   u32 icache_line_count = 0; | ||||
|   bool invalidated = false; | ||||
| 
 | ||||
|   const u32 GetPC() const { return key.GetPC(); } | ||||
|  |  | |||
|  | @ -80,6 +80,8 @@ void Reset() | |||
|   g_state.cop0_regs.sr.bits = 0; | ||||
|   g_state.cop0_regs.cause.bits = 0; | ||||
| 
 | ||||
|   ClearICache(); | ||||
| 
 | ||||
|   GTE::Reset(); | ||||
| 
 | ||||
|   SetPC(RESET_VECTOR); | ||||
|  | @ -117,14 +119,17 @@ bool DoState(StateWrapper& sw) | |||
|   sw.Do(&g_state.load_delay_value); | ||||
|   sw.Do(&g_state.next_load_delay_reg); | ||||
|   sw.Do(&g_state.next_load_delay_value); | ||||
|   sw.Do(&g_state.cache_control); | ||||
|   sw.Do(&g_state.cache_control.bits); | ||||
|   sw.DoBytes(g_state.dcache.data(), g_state.dcache.size()); | ||||
| 
 | ||||
|   if (!GTE::DoState(sw)) | ||||
|     return false; | ||||
| 
 | ||||
|   if (sw.IsReading()) | ||||
|   { | ||||
|     ClearICache(); | ||||
|     PGXP::Initialize(); | ||||
|   } | ||||
| 
 | ||||
|   return !sw.HasError(); | ||||
| } | ||||
|  | @ -1416,7 +1421,6 @@ void InterpretCachedBlock(const CodeBlock& block) | |||
| { | ||||
|   // set up the state so we've already fetched the instruction
 | ||||
|   DebugAssert(g_state.regs.pc == block.GetPC()); | ||||
| 
 | ||||
|   g_state.regs.npc = block.GetPC() + 4; | ||||
| 
 | ||||
|   for (const CodeBlockInstruction& cbi : block.instructions) | ||||
|  |  | |||
|  | @ -19,7 +19,32 @@ enum : PhysicalMemoryAddress | |||
|   DCACHE_LOCATION = UINT32_C(0x1F800000), | ||||
|   DCACHE_LOCATION_MASK = UINT32_C(0xFFFFFC00), | ||||
|   DCACHE_OFFSET_MASK = UINT32_C(0x000003FF), | ||||
|   DCACHE_SIZE = UINT32_C(0x00000400) | ||||
|   DCACHE_SIZE = UINT32_C(0x00000400), | ||||
|   ICACHE_SIZE = UINT32_C(0x00001000), | ||||
|   ICACHE_SLOTS = ICACHE_SIZE / sizeof(u32), | ||||
|   ICACHE_LINE_SIZE = 16, | ||||
|   ICACHE_LINES = ICACHE_SIZE / ICACHE_LINE_SIZE, | ||||
|   ICACHE_SLOTS_PER_LINE = ICACHE_SLOTS / ICACHE_LINES, | ||||
|   ICACHE_TAG_ADDRESS_MASK = 0xFFFFFFF0u | ||||
| }; | ||||
| 
 | ||||
| enum : u32 | ||||
| { | ||||
|   ICACHE_DISABLED_BIT = 0x01, | ||||
|   ICACHE_INVALD_BIT = 0x02, | ||||
| }; | ||||
| 
 | ||||
| union CacheControl | ||||
| { | ||||
|   u32 bits; | ||||
| 
 | ||||
|   BitField<u32, bool, 0, 1> lock_mode; | ||||
|   BitField<u32, bool, 1, 1> invalidate_mode; | ||||
|   BitField<u32, bool, 2, 1> tag_test_mode; | ||||
|   BitField<u32, bool, 3, 1> dcache_scratchpad; | ||||
|   BitField<u32, bool, 7, 1> dcache_enable; | ||||
|   BitField<u32, u8, 8, 2> icache_fill_size;   // actually dcache? icache always fills to 16 bytes
 | ||||
|   BitField<u32, bool, 11, 1> icache_enable; | ||||
| }; | ||||
| 
 | ||||
| struct State | ||||
|  | @ -49,13 +74,15 @@ struct State | |||
|   Reg next_load_delay_reg = Reg::count; | ||||
|   u32 next_load_delay_value = 0; | ||||
| 
 | ||||
|   u32 cache_control = 0; | ||||
|   CacheControl cache_control{ 0 }; | ||||
| 
 | ||||
|   // GTE registers are stored here so we can access them on ARM with a single instruction
 | ||||
|   GTE::Regs gte_regs = {}; | ||||
| 
 | ||||
|   // data cache (used as scratchpad)
 | ||||
|   std::array<u8, DCACHE_SIZE> dcache = {}; | ||||
|   std::array<u32, ICACHE_LINES> icache_tags = {}; | ||||
|   std::array<u8, ICACHE_SIZE> icache_data = {}; | ||||
| }; | ||||
| 
 | ||||
| extern State g_state; | ||||
|  | @ -64,6 +91,7 @@ void Initialize(); | |||
| void Shutdown(); | ||||
| void Reset(); | ||||
| bool DoState(StateWrapper& sw); | ||||
| void ClearICache(); | ||||
| 
 | ||||
| /// Executes interpreter loop.
 | ||||
| void Execute(); | ||||
|  |  | |||
|  | @ -34,8 +34,38 @@ ALWAYS_INLINE static void DispatchInterrupt() | |||
|     g_state.regs.pc); | ||||
| } | ||||
| 
 | ||||
| // icache stuff
 | ||||
| ALWAYS_INLINE bool IsCachedAddress(VirtualMemoryAddress address) | ||||
| { | ||||
|   // KUSEG, KSEG0
 | ||||
|   return (address >> 29) <= 4; | ||||
| } | ||||
| ALWAYS_INLINE u32 GetICacheLine(VirtualMemoryAddress address) | ||||
| { | ||||
|   return ((address >> 4) & 0xFFu); | ||||
| } | ||||
| ALWAYS_INLINE u32 GetICacheLineOffset(VirtualMemoryAddress address) | ||||
| { | ||||
|   return (address & (ICACHE_LINE_SIZE - 1)); | ||||
| } | ||||
| ALWAYS_INLINE u32 GetICacheTagForAddress(VirtualMemoryAddress address) | ||||
| { | ||||
|   return (address & ICACHE_TAG_ADDRESS_MASK); | ||||
| } | ||||
| ALWAYS_INLINE bool CompareICacheTag(VirtualMemoryAddress address) | ||||
| { | ||||
|   const u32 line = GetICacheLine(address); | ||||
|   return (g_state.icache_tags[line] == GetICacheTagForAddress(address)); | ||||
| } | ||||
| 
 | ||||
| TickCount GetInstructionReadTicks(VirtualMemoryAddress address); | ||||
| TickCount GetICacheFillTicks(VirtualMemoryAddress address); | ||||
| u32 FillICache(VirtualMemoryAddress address); | ||||
| void CheckAndUpdateICacheTags(u32 line_count, TickCount uncached_ticks); | ||||
| 
 | ||||
| // defined in cpu_memory.cpp - memory access functions which return false if an exception was thrown.
 | ||||
| bool FetchInstruction(); | ||||
| bool SafeReadInstruction(VirtualMemoryAddress addr, u32* value); | ||||
| bool ReadMemoryByte(VirtualMemoryAddress addr, u8* value); | ||||
| bool ReadMemoryHalfWord(VirtualMemoryAddress addr, u16* value); | ||||
| bool ReadMemoryWord(VirtualMemoryAddress addr, u32* value); | ||||
|  |  | |||
|  | @ -34,7 +34,7 @@ bool CodeGenerator::CompileBlock(const CodeBlock* block, CodeBlock::HostCodePoin | |||
|   const CodeBlockInstruction* cbi = m_block_start; | ||||
|   while (cbi != m_block_end) | ||||
|   { | ||||
| #ifndef Y_BUILD_CONFIG_RELEASE | ||||
| #ifdef _DEBUG | ||||
|     SmallString disasm; | ||||
|     DisassembleInstruction(&disasm, cbi->pc, cbi->instruction.bits, nullptr); | ||||
|     Log_DebugPrintf("Compiling instruction '%s'", disasm.GetCharArray()); | ||||
|  | @ -840,6 +840,9 @@ void CodeGenerator::BlockPrologue() | |||
| { | ||||
|   EmitStoreCPUStructField(offsetof(State, exception_raised), Value::FromConstantU8(0)); | ||||
| 
 | ||||
|   if (m_block->uncached_fetch_ticks > 0) | ||||
|     EmitICacheCheckAndUpdate(); | ||||
| 
 | ||||
|   // we don't know the state of the last block, so assume load delays might be in progress
 | ||||
|   // TODO: Pull load delay into register cache
 | ||||
|   m_current_instruction_in_branch_delay_slot_dirty = true; | ||||
|  |  | |||
|  | @ -61,6 +61,7 @@ public: | |||
|   void EmitFlushInterpreterLoadDelay(); | ||||
|   void EmitMoveNextInterpreterLoadDelay(); | ||||
|   void EmitCancelInterpreterLoadDelayForReg(Reg reg); | ||||
|   void EmitICacheCheckAndUpdate(); | ||||
|   void EmitLoadCPUStructField(HostReg host_reg, RegSize size, u32 offset); | ||||
|   void EmitStoreCPUStructField(u32 offset, const Value& value); | ||||
|   void EmitAddCPUStructField(u32 offset, const Value& value); | ||||
|  |  | |||
|  | @ -22,4 +22,48 @@ void CodeGenerator::EmitStoreInterpreterLoadDelay(Reg reg, const Value& value) | |||
|   m_load_delay_dirty = true; | ||||
| } | ||||
| 
 | ||||
| #ifndef CPU_X64 | ||||
| 
 | ||||
| void CodeGenerator::EmitICacheCheckAndUpdate() | ||||
| { | ||||
|   Value pc = CalculatePC(); | ||||
|   Value temp = m_register_cache.AllocateScratch(RegSize_32); | ||||
|   m_register_cache.InhibitAllocation(); | ||||
| 
 | ||||
|   EmitShr(temp.GetHostRegister(), pc.GetHostRegister(), RegSize_32, Value::FromConstantU32(29)); | ||||
|   LabelType is_cached; | ||||
|   LabelType ready_to_execute; | ||||
|   EmitConditionalBranch(Condition::LessEqual, false, temp.GetHostRegister(), Value::FromConstantU32(4), &is_cached); | ||||
|   EmitAddCPUStructField(offsetof(State, pending_ticks), | ||||
|                         Value::FromConstantU32(static_cast<u32>(m_block->uncached_fetch_ticks))); | ||||
|   EmitBranch(&ready_to_execute); | ||||
|   EmitBindLabel(&is_cached); | ||||
| 
 | ||||
|   // cached path
 | ||||
|   EmitAnd(pc.GetHostRegister(), pc.GetHostRegister(), Value::FromConstantU32(ICACHE_TAG_ADDRESS_MASK)); | ||||
|   VirtualMemoryAddress current_address = (m_block->instructions[0].pc & ICACHE_TAG_ADDRESS_MASK); | ||||
|   for (u32 i = 0; i < m_block->icache_line_count; i++, current_address += ICACHE_LINE_SIZE) | ||||
|   { | ||||
|     const TickCount fill_ticks = GetICacheFillTicks(current_address); | ||||
|     if (fill_ticks <= 0) | ||||
|       continue; | ||||
| 
 | ||||
|     const u32 line = GetICacheLine(current_address); | ||||
|     const u32 offset = offsetof(State, icache_tags) + (line * sizeof(u32)); | ||||
|     LabelType cache_hit; | ||||
| 
 | ||||
|     EmitLoadCPUStructField(temp.GetHostRegister(), RegSize_32, offset); | ||||
|     EmitConditionalBranch(Condition::Equal, false, temp.GetHostRegister(), pc, &cache_hit); | ||||
|     EmitAddCPUStructField(offsetof(State, pending_ticks), Value::FromConstantU32(static_cast<u32>(fill_ticks))); | ||||
|     EmitStoreCPUStructField(offset, pc); | ||||
|     EmitBindLabel(&cache_hit); | ||||
|     EmitAdd(pc.GetHostRegister(), pc.GetHostRegister(), Value::FromConstantU32(ICACHE_LINE_SIZE), false); | ||||
|   } | ||||
| 
 | ||||
|   EmitBindLabel(&ready_to_execute); | ||||
|   m_register_cache.UnunhibitAllocation(); | ||||
| } | ||||
| 
 | ||||
| #endif | ||||
| 
 | ||||
| } // namespace CPU::Recompiler
 | ||||
|  |  | |||
|  | @ -2187,6 +2187,52 @@ void CodeGenerator::EmitCancelInterpreterLoadDelayForReg(Reg reg) | |||
|   m_emit->L(skip_cancel); | ||||
| } | ||||
| 
 | ||||
| void CodeGenerator::EmitICacheCheckAndUpdate() | ||||
| { | ||||
|   Value pc = CalculatePC(); | ||||
|   Value seg = m_register_cache.AllocateScratch(RegSize_32); | ||||
|   m_register_cache.InhibitAllocation(); | ||||
| 
 | ||||
|   m_emit->mov(GetHostReg32(seg), GetHostReg32(pc)); | ||||
|   m_emit->shr(GetHostReg32(seg), 29); | ||||
| 
 | ||||
|   Xbyak::Label is_cached; | ||||
|   m_emit->cmp(GetHostReg32(seg), 4); | ||||
|   m_emit->jle(is_cached); | ||||
| 
 | ||||
|   // uncached
 | ||||
|   Xbyak::Label done; | ||||
|   m_emit->add(m_emit->dword[GetCPUPtrReg() + offsetof(State, pending_ticks)], | ||||
|               static_cast<u32>(m_block->uncached_fetch_ticks)); | ||||
|   m_emit->jmp(done, Xbyak::CodeGenerator::T_NEAR); | ||||
| 
 | ||||
|   // cached
 | ||||
|   m_emit->L(is_cached); | ||||
|   m_emit->and_(GetHostReg32(pc), ICACHE_TAG_ADDRESS_MASK); | ||||
| 
 | ||||
|   VirtualMemoryAddress current_address = (m_block->instructions[0].pc & ICACHE_TAG_ADDRESS_MASK); | ||||
|   for (u32 i = 0; i < m_block->icache_line_count; i++, current_address += ICACHE_LINE_SIZE) | ||||
|   { | ||||
|     const TickCount fill_ticks = GetICacheFillTicks(current_address); | ||||
|     if (fill_ticks <= 0) | ||||
|       continue; | ||||
| 
 | ||||
|     const u32 line = GetICacheLine(current_address); | ||||
|     const u32 offset = offsetof(State, icache_tags) + (line * sizeof(u32)); | ||||
|     Xbyak::Label cache_hit; | ||||
| 
 | ||||
|     m_emit->cmp(GetHostReg32(pc), m_emit->dword[GetCPUPtrReg() + offset]); | ||||
|     m_emit->je(cache_hit); | ||||
|     m_emit->mov(m_emit->dword[GetCPUPtrReg() + offset], GetHostReg32(pc)); | ||||
|     m_emit->add(m_emit->dword[GetCPUPtrReg() + offsetof(State, pending_ticks)], static_cast<u32>(fill_ticks)); | ||||
|     m_emit->L(cache_hit); | ||||
|     m_emit->add(GetHostReg32(pc), ICACHE_LINE_SIZE); | ||||
|   } | ||||
| 
 | ||||
|   m_emit->L(done); | ||||
|   m_register_cache.UnunhibitAllocation(); | ||||
| } | ||||
| 
 | ||||
| void CodeGenerator::EmitBranch(const void* address, bool allow_scratch) | ||||
| { | ||||
|   const s64 jump_distance = | ||||
|  |  | |||
|  | @ -14,6 +14,7 @@ namespace Recompiler::Thunks { | |||
| //////////////////////////////////////////////////////////////////////////
 | ||||
| bool InterpretInstruction(); | ||||
| bool InterpretInstructionPGXP(); | ||||
| void CheckAndUpdateICache(u32 pc, u32 line_count); | ||||
| 
 | ||||
| // Memory access functions for the JIT - MSB is set on exception.
 | ||||
| u64 ReadMemoryByte(u32 address); | ||||
|  |  | |||
|  | @ -362,6 +362,7 @@ void HostInterface::SetDefaultSettings(SettingsInterface& si) | |||
| 
 | ||||
|   si.SetStringValue("CPU", "ExecutionMode", Settings::GetCPUExecutionModeName(Settings::DEFAULT_CPU_EXECUTION_MODE)); | ||||
|   si.SetBoolValue("CPU", "RecompilerMemoryExceptions", false); | ||||
|   si.SetBoolValue("CPU", "ICache", false); | ||||
| 
 | ||||
|   si.SetStringValue("GPU", "Renderer", Settings::GetRendererName(Settings::DEFAULT_GPU_RENDERER)); | ||||
|   si.SetIntValue("GPU", "ResolutionScale", 1); | ||||
|  | @ -452,7 +453,8 @@ void HostInterface::FixIncompatibleSettings(bool display_osd_messages) | |||
|     { | ||||
|       if (display_osd_messages) | ||||
|       { | ||||
|         AddOSDMessage(TranslateStdString("OSDMessage", "PGXP is incompatible with the software renderer, disabling PGXP."), 10.0f); | ||||
|         AddOSDMessage( | ||||
|           TranslateStdString("OSDMessage", "PGXP is incompatible with the software renderer, disabling PGXP."), 10.0f); | ||||
|       } | ||||
|       g_settings.gpu_pgxp_enable = false; | ||||
|     } | ||||
|  | @ -510,6 +512,8 @@ void HostInterface::CheckForSettingsChanges(const Settings& old_settings) | |||
|       AddFormattedOSDMessage(5.0f, "Switching to %s CPU execution mode.", | ||||
|                              Settings::GetCPUExecutionModeName(g_settings.cpu_execution_mode)); | ||||
|       CPU::CodeCache::SetUseRecompiler(g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler); | ||||
|       CPU::CodeCache::Flush(); | ||||
|       CPU::ClearICache(); | ||||
|     } | ||||
| 
 | ||||
|     if (g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler && | ||||
|  | @ -520,6 +524,15 @@ void HostInterface::CheckForSettingsChanges(const Settings& old_settings) | |||
|       CPU::CodeCache::Flush(); | ||||
|     } | ||||
| 
 | ||||
|     if (g_settings.cpu_execution_mode != CPUExecutionMode::Interpreter && | ||||
|         g_settings.cpu_recompiler_icache != old_settings.cpu_recompiler_icache) | ||||
|     { | ||||
|       AddFormattedOSDMessage(5.0f, "CPU ICache %s, flushing all blocks.", | ||||
|                              g_settings.cpu_recompiler_icache ? "enabled" : "disabled"); | ||||
|       CPU::CodeCache::Flush(); | ||||
|       CPU::ClearICache(); | ||||
|     } | ||||
| 
 | ||||
|     m_audio_stream->SetOutputVolume(g_settings.audio_output_muted ? 0 : g_settings.audio_output_volume); | ||||
| 
 | ||||
|     if (g_settings.gpu_resolution_scale != old_settings.gpu_resolution_scale || | ||||
|  |  | |||
|  | @ -92,6 +92,7 @@ void Settings::Load(SettingsInterface& si) | |||
|       si.GetStringValue("CPU", "ExecutionMode", GetCPUExecutionModeName(DEFAULT_CPU_EXECUTION_MODE)).c_str()) | ||||
|       .value_or(DEFAULT_CPU_EXECUTION_MODE); | ||||
|   cpu_recompiler_memory_exceptions = si.GetBoolValue("CPU", "RecompilerMemoryExceptions", false); | ||||
|   cpu_recompiler_icache = si.GetBoolValue("CPU", "RecompilerICache", false); | ||||
| 
 | ||||
|   gpu_renderer = ParseRendererName(si.GetStringValue("GPU", "Renderer", GetRendererName(DEFAULT_GPU_RENDERER)).c_str()) | ||||
|                    .value_or(DEFAULT_GPU_RENDERER); | ||||
|  | @ -206,6 +207,7 @@ void Settings::Save(SettingsInterface& si) const | |||
| 
 | ||||
|   si.SetStringValue("CPU", "ExecutionMode", GetCPUExecutionModeName(cpu_execution_mode)); | ||||
|   si.SetBoolValue("CPU", "RecompilerMemoryExceptions", cpu_recompiler_memory_exceptions); | ||||
|   si.SetBoolValue("CPU", "RecompilerICache", cpu_recompiler_icache); | ||||
| 
 | ||||
|   si.SetStringValue("GPU", "Renderer", GetRendererName(gpu_renderer)); | ||||
|   si.SetStringValue("GPU", "Adapter", gpu_adapter.c_str()); | ||||
|  |  | |||
|  | @ -69,6 +69,7 @@ struct Settings | |||
| 
 | ||||
|   CPUExecutionMode cpu_execution_mode = CPUExecutionMode::Interpreter; | ||||
|   bool cpu_recompiler_memory_exceptions = false; | ||||
|   bool cpu_recompiler_icache = false; | ||||
| 
 | ||||
|   float emulation_speed = 1.0f; | ||||
|   bool speed_limiter_enabled = true; | ||||
|  |  | |||
|  | @ -370,7 +370,7 @@ void LibretroHostInterface::OnSystemDestroyed() | |||
|   m_using_hardware_renderer = false; | ||||
| } | ||||
| 
 | ||||
| static std::array<retro_core_option_definition, 31> s_option_definitions = {{ | ||||
| static std::array<retro_core_option_definition, 32> s_option_definitions = {{ | ||||
|   {"duckstation_Console.Region", | ||||
|    "Console Region", | ||||
|    "Determines which region/hardware to emulate. Auto-Detect will use the region of the disc inserted.", | ||||
|  | @ -406,6 +406,12 @@ static std::array<retro_core_option_definition, 31> s_option_definitions = {{ | |||
|    "Which mode to use for CPU emulation. Recompiler provides the best performance.", | ||||
|    {{"Interpreter", "Interpreter"}, {"CachedIntepreter", "Cached Interpreter"}, {"Recompiler", "Recompiler"}}, | ||||
|    "Recompiler"}, | ||||
|   {"duckstation_CPU.RecompilerICache", | ||||
|    "CPU Recompiler ICache", | ||||
|    "Determines whether the CPU's instruction cache is simulated in the recompiler. Improves accuracy at a small cost " | ||||
|    "to performance. If games are running too fast, try enabling this option.", | ||||
|    {{"true", "Enabled"}, {"false", "Disabled"}}, | ||||
|    "false"}, | ||||
|   {"duckstation_GPU.Renderer", | ||||
|    "GPU Renderer", | ||||
|    "Which renderer to use to emulate the GPU", | ||||
|  |  | |||
|  | @ -27,6 +27,8 @@ AdvancedSettingsWidget::AdvancedSettingsWidget(QtHostInterface* host_interface, | |||
|   SettingWidgetBinder::BindWidgetToIntSetting(m_host_interface, m_ui.gpuMaxRunAhead, "Hacks", "GPUMaxRunAhead"); | ||||
|   SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.cpuRecompilerMemoryExceptions, "CPU", | ||||
|                                                "RecompilerMemoryExceptions", false); | ||||
|   SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.cpuRecompilerICache, "CPU", "RecompilerICache", | ||||
|                                                false); | ||||
| 
 | ||||
|   SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.showDebugMenu, "Main", "ShowDebugMenu"); | ||||
|   SettingWidgetBinder::BindWidgetToBoolSetting(m_host_interface, m_ui.gpuUseDebugDevice, "GPU", "UseDebugDevice"); | ||||
|  | @ -38,6 +40,10 @@ AdvancedSettingsWidget::AdvancedSettingsWidget(QtHostInterface* host_interface, | |||
|   dialog->registerWidgetHelp(m_ui.gpuUseDebugDevice, tr("Use Debug Host GPU Device"), tr("Unchecked"), | ||||
|                              tr("Enables the usage of debug devices and shaders for rendering APIs which support them. " | ||||
|                                 "Should only be used when debugging the emulator.")); | ||||
|   dialog->registerWidgetHelp( | ||||
|     m_ui.cpuRecompilerICache, tr("Enable Recompiler ICache"), tr("Unchecked"), | ||||
|     tr("Determines whether the CPU's instruction cache is simulated in the recompiler. Improves accuracy at a small " | ||||
|        "cost to performance. If games are running too fast, try enabling this option.")); | ||||
| } | ||||
| 
 | ||||
| AdvancedSettingsWidget::~AdvancedSettingsWidget() = default; | ||||
|  |  | |||
|  | @ -184,6 +184,20 @@ | |||
|         </property> | ||||
|        </widget> | ||||
|       </item> | ||||
|       <item row="5" column="0"> | ||||
|        <widget class="QCheckBox" name="cpuRecompilerMemoryExceptions"> | ||||
|         <property name="text"> | ||||
|          <string>Enable Recompiler Memory Exceptions</string> | ||||
|         </property> | ||||
|        </widget> | ||||
|       </item> | ||||
|       <item row="5" column="1"> | ||||
|        <widget class="QCheckBox" name="cpuRecompilerICache"> | ||||
|         <property name="text"> | ||||
|          <string>Enable Recompiler ICache</string> | ||||
|         </property> | ||||
|        </widget> | ||||
|       </item> | ||||
|       <item row="6" column="0" colspan="2"> | ||||
|        <widget class="QPushButton" name="resetToDefaultButton"> | ||||
|         <property name="text"> | ||||
|  | @ -191,13 +205,6 @@ | |||
|         </property> | ||||
|        </widget> | ||||
|       </item> | ||||
|       <item row="5" column="0" colspan="2"> | ||||
|        <widget class="QCheckBox" name="cpuRecompilerMemoryExceptions"> | ||||
|         <property name="text"> | ||||
|          <string>Enable Recompiler Memory Exceptions</string> | ||||
|         </property> | ||||
|        </widget> | ||||
|       </item> | ||||
|      </layout> | ||||
|     </widget> | ||||
|    </item> | ||||
|  |  | |||
|  | @ -950,6 +950,11 @@ void SDLHostInterface::DrawDebugMenu() | |||
|   settings_changed |= ImGui::MenuItem("Show Timers State", nullptr, &debug_settings.show_timers_state); | ||||
|   settings_changed |= ImGui::MenuItem("Show MDEC State", nullptr, &debug_settings.show_mdec_state); | ||||
| 
 | ||||
|   ImGui::Separator(); | ||||
| 
 | ||||
|   settings_changed |= ImGui::MenuItem("Recompiler Memory Exceptions", nullptr, &m_settings_copy.cpu_recompiler_memory_exceptions); | ||||
|   settings_changed |= ImGui::MenuItem("Recompiler ICache", nullptr, &m_settings_copy.cpu_recompiler_icache); | ||||
| 
 | ||||
|   if (settings_changed) | ||||
|   { | ||||
|     // have to apply it to the copy too, otherwise it won't save
 | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Connor McLaughlin
						Connor McLaughlin