diff --git a/src/core/cpu_code_cache.cpp b/src/core/cpu_code_cache.cpp index 18b3b20ba..7d8ae9086 100644 --- a/src/core/cpu_code_cache.cpp +++ b/src/core/cpu_code_cache.cpp @@ -66,6 +66,7 @@ static void FillBlockRegInfo(Block* block); static void CopyRegInfo(InstructionInfo* dst, const InstructionInfo* src); static void SetRegAccess(InstructionInfo* inst, Reg reg, bool write); static void AddBlockToPageList(Block* block); +static void RemoveBlockFromPageList(Block* block); static Common::PageFaultHandler::HandlerResult ExceptionHandler(void* exception_pc, void* fault_address, bool is_write); @@ -526,7 +527,7 @@ bool CPU::CodeCache::IsBlockCodeCurrent(const Block* block) bool CPU::CodeCache::RevalidateBlock(Block* block) { DebugAssert(block->state != BlockState::Valid); - DebugAssert(AddressInRAM(block->pc)); + DebugAssert(AddressInRAM(block->pc) || block->state == BlockState::NeedsRecompile); if (block->state >= BlockState::NeedsRecompile) return false; @@ -569,6 +570,39 @@ void CPU::CodeCache::AddBlockToPageList(Block* block) } } +void CPU::CodeCache::RemoveBlockFromPageList(Block* block) +{ + DebugAssert(block->size > 0); + if (!AddressInRAM(block->pc) || block->protection != PageProtectionMode::WriteProtected) + return; + + const u32 page_idx = block->StartPageIndex(); + PageProtectionInfo& entry = s_page_protection[page_idx]; + + // unlink from list + Block* prev_block = nullptr; + Block* cur_block = entry.first_block_in_page; + while (cur_block) + { + if (cur_block != block) + { + prev_block = cur_block; + cur_block = cur_block->next_block_in_page; + continue; + } + + if (prev_block) + prev_block->next_block_in_page = cur_block->next_block_in_page; + else + entry.first_block_in_page = cur_block->next_block_in_page; + if (!cur_block->next_block_in_page) + entry.last_block_in_page = prev_block; + + cur_block->next_block_in_page = nullptr; + break; + } +} + void CPU::CodeCache::InvalidateBlocksWithPageIndex(u32 index) { DebugAssert(index < Bus::RAM_8MB_CODE_PAGE_COUNT); @@ -1480,13 +1514,14 @@ void CPU::CodeCache::AddLoadStoreInfo(void* code_address, u32 code_size, u32 gue LoadstoreBackpatchInfo info; info.thunk_address = thunk_address; info.guest_pc = guest_pc; + info.guest_block = 0; info.code_size = static_cast(code_size); s_fastmem_backpatch_info.emplace(code_address, info); } -void CPU::CodeCache::AddLoadStoreInfo(void* code_address, u32 code_size, u32 guest_pc, TickCount cycles, - u32 gpr_bitmask, u8 address_register, u8 data_register, MemoryAccessSize size, - bool is_signed, bool is_load) +void CPU::CodeCache::AddLoadStoreInfo(void* code_address, u32 code_size, u32 guest_pc, u32 guest_block, + TickCount cycles, u32 gpr_bitmask, u8 address_register, u8 data_register, + MemoryAccessSize size, bool is_signed, bool is_load) { DebugAssert(code_size < std::numeric_limits::max()); DebugAssert(cycles >= 0 && cycles < std::numeric_limits::max()); @@ -1498,6 +1533,7 @@ void CPU::CodeCache::AddLoadStoreInfo(void* code_address, u32 code_size, u32 gue LoadstoreBackpatchInfo info; info.thunk_address = nullptr; info.guest_pc = guest_pc; + info.guest_block = guest_block; info.gpr_bitmask = gpr_bitmask; info.cycles = static_cast(cycles); info.address_register = address_register; @@ -1562,7 +1598,22 @@ Common::PageFaultHandler::HandlerResult CPU::CodeCache::HandleFastmemException(v BackpatchLoadStore(exception_pc, info); - // TODO: queue block for recompilation later + // queue block for recompilation later + if (g_settings.cpu_execution_mode == CPUExecutionMode::NewRec) + { + Block* block = LookupBlock(info.guest_block); + if (block) + { + // This is a bit annoying, we have to remove it from the page list if it's a RAM block. + Log_DevFmt("Queuing block {:08X} for recompilation due to backpatch", block->pc); + RemoveBlockFromPageList(block); + InvalidateBlock(block, BlockState::NeedsRecompile); + + // Need to reset the recompile count, otherwise it'll get trolled into an interpreter fallback. + block->compile_frame = System::GetFrameNumber(); + block->compile_count = 1; + } + } // and store the pc in the faulting list, so that we don't emit another fastmem loadstore s_fastmem_faulting_pcs.insert(info.guest_pc); @@ -1570,6 +1621,11 @@ Common::PageFaultHandler::HandlerResult CPU::CodeCache::HandleFastmemException(v return Common::PageFaultHandler::HandlerResult::ContinueExecution; } +bool CPU::CodeCache::HasPreviouslyFaultedOnPC(u32 guest_pc) +{ + return (s_fastmem_faulting_pcs.find(guest_pc) != s_fastmem_faulting_pcs.end()); +} + void CPU::CodeCache::BackpatchLoadStore(void* host_pc, const LoadstoreBackpatchInfo& info) { s_code_buffer.WriteProtect(false); diff --git a/src/core/cpu_code_cache_private.h b/src/core/cpu_code_cache_private.h index 341fde10f..4d5c6b62c 100644 --- a/src/core/cpu_code_cache_private.h +++ b/src/core/cpu_code_cache_private.h @@ -130,7 +130,7 @@ struct alignas(16) Block // links to previous/next block within page Block* next_block_in_page; - + BlockLinkMap::iterator exit_links[MAX_BLOCK_EXIT_LINKS]; u8 num_exit_links; @@ -196,12 +196,17 @@ struct LoadstoreBackpatchInfo }; u32 guest_pc; + u32 guest_block; u8 code_size; MemoryAccessSize AccessSize() const { return static_cast(size); } u32 AccessSizeInBytes() const { return 1u << size; } }; -static_assert(sizeof(LoadstoreBackpatchInfo) == 16); +#ifdef CPU_ARCH_ARM32 +static_assert(sizeof(LoadstoreBackpatchInfo) == 20); +#else +static_assert(sizeof(LoadstoreBackpatchInfo) == 24); +#endif static inline bool AddressInRAM(VirtualMemoryAddress pc) { @@ -248,8 +253,10 @@ void DiscardAndRecompileBlock(u32 start_pc); const void* CreateBlockLink(Block* from_block, void* code, u32 newpc); void AddLoadStoreInfo(void* code_address, u32 code_size, u32 guest_pc, const void* thunk_address); -void AddLoadStoreInfo(void* code_address, u32 code_size, u32 guest_pc, TickCount cycles, u32 gpr_bitmask, - u8 address_register, u8 data_register, MemoryAccessSize size, bool is_signed, bool is_load); +void AddLoadStoreInfo(void* code_address, u32 code_size, u32 guest_pc, u32 guest_block, TickCount cycles, + u32 gpr_bitmask, u8 address_register, u8 data_register, MemoryAccessSize size, bool is_signed, + bool is_load); +bool HasPreviouslyFaultedOnPC(u32 guest_pc); u32 EmitASMFunctions(void* code, u32 code_size); u32 EmitJump(void* code, const void* dst, bool flush_icache); diff --git a/src/core/cpu_newrec_compiler.cpp b/src/core/cpu_newrec_compiler.cpp index 5a3fb9b42..ae2acddeb 100644 --- a/src/core/cpu_newrec_compiler.cpp +++ b/src/core/cpu_newrec_compiler.cpp @@ -59,6 +59,8 @@ void CPU::NewRec::Compiler::Reset(CodeCache::Block* block, u8* code_buffer, u32 m_load_delay_dirty = EMULATE_LOAD_DELAYS; m_load_delay_register = Reg::count; m_load_delay_value_register = NUM_HOST_REGS; + + InitSpeculativeRegs(); } void CPU::NewRec::Compiler::BeginBlock() @@ -133,6 +135,7 @@ const void* CPU::NewRec::Compiler::CompileBlock(CodeCache::Block* block, u32* ho DebugAssert(!IsHostRegAllocated(i)); for (u32 i = 1; i < static_cast(Reg::count); i++) DebugAssert(!m_constant_regs_dirty.test(i) && !m_constant_regs_valid.test(i)); + m_speculative_constants.memory.clear(); u32 code_size, far_code_size; const void* code = EndCompile(&code_size, &far_code_size); @@ -494,7 +497,7 @@ bool CPU::NewRec::Compiler::TrySwapDelaySlot(Reg rs, Reg rt, Reg rd) is_safe: #ifdef _DEBUG - Log_DevFmt("Swapping delay slot {:08X} {}", m_current_instruction_pc + 4, disasm); + Log_DebugFmt("Swapping delay slot {:08X} {}", m_current_instruction_pc + 4, disasm); #endif CompileBranchDelaySlot(); @@ -506,7 +509,7 @@ is_safe: is_unsafe: #ifdef _DEBUG - Log_DevFmt("NOT swapping delay slot {:08X} {}", m_current_instruction_pc + 4, disasm); + Log_DebugFmt("NOT swapping delay slot {:08X} {}", m_current_instruction_pc + 4, disasm); #endif return false; @@ -1079,6 +1082,9 @@ void CPU::NewRec::Compiler::Flush(u32 flags) FlushConstantRegs(false); } } + + if (flags & FLUSH_INVALIDATE_SPECULATIVE_CONSTANTS) + InvalidateSpeculativeValues(); } void CPU::NewRec::Compiler::FlushConstantReg(Reg r) @@ -1161,9 +1167,9 @@ void CPU::NewRec::Compiler::AddLoadStoreInfo(void* code_address, u32 code_size, gpr_bitmask |= (1u << i); } - CPU::CodeCache::AddLoadStoreInfo(code_address, code_size, m_current_instruction_pc, m_cycles, gpr_bitmask, - static_cast(address_register), static_cast(data_register), size, is_signed, - is_load); + CPU::CodeCache::AddLoadStoreInfo(code_address, code_size, m_current_instruction_pc, m_block->pc, m_cycles, + gpr_bitmask, static_cast(address_register), static_cast(data_register), size, + is_signed, is_load); } void CPU::NewRec::Compiler::CompileInstruction() @@ -1194,34 +1200,34 @@ void CPU::NewRec::Compiler::CompileInstruction() { switch (inst->r.funct) { - case InstructionFunct::sll: CompileTemplate(&Compiler::Compile_sll_const, &Compiler::Compile_sll, PGXPFN(CPU_SLL), TF_WRITES_D | TF_READS_T); break; - case InstructionFunct::srl: CompileTemplate(&Compiler::Compile_srl_const, &Compiler::Compile_srl, PGXPFN(CPU_SRL), TF_WRITES_D | TF_READS_T); break; - case InstructionFunct::sra: CompileTemplate(&Compiler::Compile_sra_const, &Compiler::Compile_sra, PGXPFN(CPU_SRA), TF_WRITES_D | TF_READS_T); break; - case InstructionFunct::sllv: CompileTemplate(&Compiler::Compile_sllv_const, &Compiler::Compile_sllv, PGXPFN(CPU_SLLV), TF_WRITES_D | TF_READS_S | TF_READS_T); break; - case InstructionFunct::srlv: CompileTemplate(&Compiler::Compile_srlv_const, &Compiler::Compile_srlv, PGXPFN(CPU_SRLV), TF_WRITES_D | TF_READS_S | TF_READS_T); break; - case InstructionFunct::srav: CompileTemplate(&Compiler::Compile_srav_const, &Compiler::Compile_srav, PGXPFN(CPU_SRAV), TF_WRITES_D | TF_READS_S | TF_READS_T); break; + case InstructionFunct::sll: CompileTemplate(&Compiler::Compile_sll_const, &Compiler::Compile_sll, PGXPFN(CPU_SLL), TF_WRITES_D | TF_READS_T); SpecExec_sll(); break; + case InstructionFunct::srl: CompileTemplate(&Compiler::Compile_srl_const, &Compiler::Compile_srl, PGXPFN(CPU_SRL), TF_WRITES_D | TF_READS_T); SpecExec_srl(); break; + case InstructionFunct::sra: CompileTemplate(&Compiler::Compile_sra_const, &Compiler::Compile_sra, PGXPFN(CPU_SRA), TF_WRITES_D | TF_READS_T); SpecExec_sra(); break; + case InstructionFunct::sllv: CompileTemplate(&Compiler::Compile_sllv_const, &Compiler::Compile_sllv, PGXPFN(CPU_SLLV), TF_WRITES_D | TF_READS_S | TF_READS_T); SpecExec_sllv(); break; + case InstructionFunct::srlv: CompileTemplate(&Compiler::Compile_srlv_const, &Compiler::Compile_srlv, PGXPFN(CPU_SRLV), TF_WRITES_D | TF_READS_S | TF_READS_T); SpecExec_srlv(); break; + case InstructionFunct::srav: CompileTemplate(&Compiler::Compile_srav_const, &Compiler::Compile_srav, PGXPFN(CPU_SRAV), TF_WRITES_D | TF_READS_S | TF_READS_T); SpecExec_srav(); break; case InstructionFunct::jr: CompileTemplate(&Compiler::Compile_jr_const, &Compiler::Compile_jr, nullptr, TF_READS_S); break; - case InstructionFunct::jalr: CompileTemplate(&Compiler::Compile_jalr_const, &Compiler::Compile_jalr, nullptr, /*TF_WRITES_D |*/ TF_READS_S | TF_NO_NOP); break; + case InstructionFunct::jalr: CompileTemplate(&Compiler::Compile_jalr_const, &Compiler::Compile_jalr, nullptr, /*TF_WRITES_D |*/ TF_READS_S | TF_NO_NOP); SpecExec_jalr(); break; case InstructionFunct::syscall: Compile_syscall(); break; case InstructionFunct::break_: Compile_break(); break; - case InstructionFunct::mfhi: CompileMoveRegTemplate(inst->r.rd, Reg::hi, g_settings.gpu_pgxp_cpu); break; - case InstructionFunct::mthi: CompileMoveRegTemplate(Reg::hi, inst->r.rs, g_settings.gpu_pgxp_cpu); break; - case InstructionFunct::mflo: CompileMoveRegTemplate(inst->r.rd, Reg::lo, g_settings.gpu_pgxp_cpu); break; - case InstructionFunct::mtlo: CompileMoveRegTemplate(Reg::lo, inst->r.rs, g_settings.gpu_pgxp_cpu); break; - case InstructionFunct::mult: CompileTemplate(&Compiler::Compile_mult_const, &Compiler::Compile_mult, PGXPFN(CPU_MULT), TF_READS_S | TF_READS_T | TF_WRITES_LO | TF_WRITES_HI | TF_COMMUTATIVE); break; - case InstructionFunct::multu: CompileTemplate(&Compiler::Compile_multu_const, &Compiler::Compile_multu, PGXPFN(CPU_MULTU), TF_READS_S | TF_READS_T | TF_WRITES_LO | TF_WRITES_HI | TF_COMMUTATIVE); break; - case InstructionFunct::div: CompileTemplate(&Compiler::Compile_div_const, &Compiler::Compile_div, PGXPFN(CPU_DIV), TF_READS_S | TF_READS_T | TF_WRITES_LO | TF_WRITES_HI); break; - case InstructionFunct::divu: CompileTemplate(&Compiler::Compile_divu_const, &Compiler::Compile_divu, PGXPFN(CPU_DIVU), TF_READS_S | TF_READS_T | TF_WRITES_LO | TF_WRITES_HI); break; - case InstructionFunct::add: CompileTemplate(&Compiler::Compile_add_const, &Compiler::Compile_add, PGXPFN(CPU_ADD), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_CAN_OVERFLOW | TF_RENAME_WITH_ZERO_T); break; - case InstructionFunct::addu: CompileTemplate(&Compiler::Compile_addu_const, &Compiler::Compile_addu, PGXPFN(CPU_ADD), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_T); break; - case InstructionFunct::sub: CompileTemplate(&Compiler::Compile_sub_const, &Compiler::Compile_sub, PGXPFN(CPU_SUB), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_CAN_OVERFLOW | TF_RENAME_WITH_ZERO_T); break; - case InstructionFunct::subu: CompileTemplate(&Compiler::Compile_subu_const, &Compiler::Compile_subu, PGXPFN(CPU_SUB), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_RENAME_WITH_ZERO_T); break; - case InstructionFunct::and_: CompileTemplate(&Compiler::Compile_and_const, &Compiler::Compile_and, PGXPFN(CPU_AND_), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE); break; - case InstructionFunct::or_: CompileTemplate(&Compiler::Compile_or_const, &Compiler::Compile_or, PGXPFN(CPU_OR_), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_T); break; - case InstructionFunct::xor_: CompileTemplate(&Compiler::Compile_xor_const, &Compiler::Compile_xor, PGXPFN(CPU_XOR_), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_T); break; - case InstructionFunct::nor: CompileTemplate(&Compiler::Compile_nor_const, &Compiler::Compile_nor, PGXPFN(CPU_NOR), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE); break; - case InstructionFunct::slt: CompileTemplate(&Compiler::Compile_slt_const, &Compiler::Compile_slt, PGXPFN(CPU_SLT), TF_WRITES_D | TF_READS_T | TF_READS_S); break; - case InstructionFunct::sltu: CompileTemplate(&Compiler::Compile_sltu_const, &Compiler::Compile_sltu, PGXPFN(CPU_SLTU), TF_WRITES_D | TF_READS_T | TF_READS_S); break; + case InstructionFunct::mfhi: SpecCopyReg(inst->r.rd, Reg::hi); CompileMoveRegTemplate(inst->r.rd, Reg::hi, g_settings.gpu_pgxp_cpu); break; + case InstructionFunct::mthi: SpecCopyReg(Reg::hi, inst->r.rs); CompileMoveRegTemplate(Reg::hi, inst->r.rs, g_settings.gpu_pgxp_cpu); break; + case InstructionFunct::mflo: SpecCopyReg(inst->r.rd, Reg::lo); CompileMoveRegTemplate(inst->r.rd, Reg::lo, g_settings.gpu_pgxp_cpu); break; + case InstructionFunct::mtlo: SpecCopyReg(Reg::lo, inst->r.rs); CompileMoveRegTemplate(Reg::lo, inst->r.rs, g_settings.gpu_pgxp_cpu); break; + case InstructionFunct::mult: CompileTemplate(&Compiler::Compile_mult_const, &Compiler::Compile_mult, PGXPFN(CPU_MULT), TF_READS_S | TF_READS_T | TF_WRITES_LO | TF_WRITES_HI | TF_COMMUTATIVE); SpecExec_mult(); break; + case InstructionFunct::multu: CompileTemplate(&Compiler::Compile_multu_const, &Compiler::Compile_multu, PGXPFN(CPU_MULTU), TF_READS_S | TF_READS_T | TF_WRITES_LO | TF_WRITES_HI | TF_COMMUTATIVE); SpecExec_multu(); break; + case InstructionFunct::div: CompileTemplate(&Compiler::Compile_div_const, &Compiler::Compile_div, PGXPFN(CPU_DIV), TF_READS_S | TF_READS_T | TF_WRITES_LO | TF_WRITES_HI); SpecExec_div(); break; + case InstructionFunct::divu: CompileTemplate(&Compiler::Compile_divu_const, &Compiler::Compile_divu, PGXPFN(CPU_DIVU), TF_READS_S | TF_READS_T | TF_WRITES_LO | TF_WRITES_HI); SpecExec_divu(); break; + case InstructionFunct::add: CompileTemplate(&Compiler::Compile_add_const, &Compiler::Compile_add, PGXPFN(CPU_ADD), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_CAN_OVERFLOW | TF_RENAME_WITH_ZERO_T); SpecExec_add(); break; + case InstructionFunct::addu: CompileTemplate(&Compiler::Compile_addu_const, &Compiler::Compile_addu, PGXPFN(CPU_ADD), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_T); SpecExec_addu(); break; + case InstructionFunct::sub: CompileTemplate(&Compiler::Compile_sub_const, &Compiler::Compile_sub, PGXPFN(CPU_SUB), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_CAN_OVERFLOW | TF_RENAME_WITH_ZERO_T); SpecExec_sub(); break; + case InstructionFunct::subu: CompileTemplate(&Compiler::Compile_subu_const, &Compiler::Compile_subu, PGXPFN(CPU_SUB), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_RENAME_WITH_ZERO_T); SpecExec_subu(); break; + case InstructionFunct::and_: CompileTemplate(&Compiler::Compile_and_const, &Compiler::Compile_and, PGXPFN(CPU_AND_), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE); SpecExec_and(); break; + case InstructionFunct::or_: CompileTemplate(&Compiler::Compile_or_const, &Compiler::Compile_or, PGXPFN(CPU_OR_), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_T); SpecExec_or(); break; + case InstructionFunct::xor_: CompileTemplate(&Compiler::Compile_xor_const, &Compiler::Compile_xor, PGXPFN(CPU_XOR_), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_T); SpecExec_xor(); break; + case InstructionFunct::nor: CompileTemplate(&Compiler::Compile_nor_const, &Compiler::Compile_nor, PGXPFN(CPU_NOR), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE); SpecExec_nor(); break; + case InstructionFunct::slt: CompileTemplate(&Compiler::Compile_slt_const, &Compiler::Compile_slt, PGXPFN(CPU_SLT), TF_WRITES_D | TF_READS_T | TF_READS_S); SpecExec_slt(); break; + case InstructionFunct::sltu: CompileTemplate(&Compiler::Compile_sltu_const, &Compiler::Compile_sltu, PGXPFN(CPU_SLTU), TF_WRITES_D | TF_READS_T | TF_READS_S); SpecExec_sltu(); break; default: Panic("fixme funct"); break; } @@ -1229,35 +1235,35 @@ void CPU::NewRec::Compiler::CompileInstruction() break; case InstructionOp::j: Compile_j(); break; - case InstructionOp::jal: Compile_jal(); break; + case InstructionOp::jal: Compile_jal(); SpecExec_jal(); break; - case InstructionOp::b: CompileTemplate(&Compiler::Compile_b_const, &Compiler::Compile_b, nullptr, TF_READS_S | TF_CAN_SWAP_DELAY_SLOT); break; + case InstructionOp::b: CompileTemplate(&Compiler::Compile_b_const, &Compiler::Compile_b, nullptr, TF_READS_S | TF_CAN_SWAP_DELAY_SLOT); SpecExec_b(); break; case InstructionOp::blez: CompileTemplate(&Compiler::Compile_blez_const, &Compiler::Compile_blez, nullptr, TF_READS_S | TF_CAN_SWAP_DELAY_SLOT); break; case InstructionOp::bgtz: CompileTemplate(&Compiler::Compile_bgtz_const, &Compiler::Compile_bgtz, nullptr, TF_READS_S | TF_CAN_SWAP_DELAY_SLOT); break; case InstructionOp::beq: CompileTemplate(&Compiler::Compile_beq_const, &Compiler::Compile_beq, nullptr, TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_CAN_SWAP_DELAY_SLOT); break; case InstructionOp::bne: CompileTemplate(&Compiler::Compile_bne_const, &Compiler::Compile_bne, nullptr, TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_CAN_SWAP_DELAY_SLOT); break; - case InstructionOp::addi: CompileTemplate(&Compiler::Compile_addi_const, &Compiler::Compile_addi, PGXPFN(CPU_ADDI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE | TF_CAN_OVERFLOW | TF_RENAME_WITH_ZERO_IMM); break; - case InstructionOp::addiu: CompileTemplate(&Compiler::Compile_addiu_const, &Compiler::Compile_addiu, PGXPFN(CPU_ADDI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_IMM); break; - case InstructionOp::slti: CompileTemplate(&Compiler::Compile_slti_const, &Compiler::Compile_slti, PGXPFN(CPU_SLTI), TF_WRITES_T | TF_READS_S); break; - case InstructionOp::sltiu: CompileTemplate(&Compiler::Compile_sltiu_const, &Compiler::Compile_sltiu, PGXPFN(CPU_SLTIU), TF_WRITES_T | TF_READS_S); break; - case InstructionOp::andi: CompileTemplate(&Compiler::Compile_andi_const, &Compiler::Compile_andi, PGXPFN(CPU_ANDI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE); break; - case InstructionOp::ori: CompileTemplate(&Compiler::Compile_ori_const, &Compiler::Compile_ori, PGXPFN(CPU_ORI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_IMM); break; - case InstructionOp::xori: CompileTemplate(&Compiler::Compile_xori_const, &Compiler::Compile_xori, PGXPFN(CPU_XORI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_IMM); break; - case InstructionOp::lui: Compile_lui(); break; + case InstructionOp::addi: CompileTemplate(&Compiler::Compile_addi_const, &Compiler::Compile_addi, PGXPFN(CPU_ADDI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE | TF_CAN_OVERFLOW | TF_RENAME_WITH_ZERO_IMM); SpecExec_addi(); break; + case InstructionOp::addiu: CompileTemplate(&Compiler::Compile_addiu_const, &Compiler::Compile_addiu, PGXPFN(CPU_ADDI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_IMM); SpecExec_addiu(); break; + case InstructionOp::slti: CompileTemplate(&Compiler::Compile_slti_const, &Compiler::Compile_slti, PGXPFN(CPU_SLTI), TF_WRITES_T | TF_READS_S); SpecExec_slti(); break; + case InstructionOp::sltiu: CompileTemplate(&Compiler::Compile_sltiu_const, &Compiler::Compile_sltiu, PGXPFN(CPU_SLTIU), TF_WRITES_T | TF_READS_S); SpecExec_sltiu(); break; + case InstructionOp::andi: CompileTemplate(&Compiler::Compile_andi_const, &Compiler::Compile_andi, PGXPFN(CPU_ANDI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE); SpecExec_andi(); break; + case InstructionOp::ori: CompileTemplate(&Compiler::Compile_ori_const, &Compiler::Compile_ori, PGXPFN(CPU_ORI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_IMM); SpecExec_ori(); break; + case InstructionOp::xori: CompileTemplate(&Compiler::Compile_xori_const, &Compiler::Compile_xori, PGXPFN(CPU_XORI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_IMM); SpecExec_xori(); break; + case InstructionOp::lui: Compile_lui(); SpecExec_lui(); break; - case InstructionOp::lb: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::Byte, false, true, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); break; - case InstructionOp::lbu: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::Byte, false, false, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); break; - case InstructionOp::lh: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::HalfWord, false, true, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); break; - case InstructionOp::lhu: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::HalfWord, false, false, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); break; - case InstructionOp::lw: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::Word, false, false, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); break; - case InstructionOp::lwl: CompileLoadStoreTemplate(&Compiler::Compile_lwx, MemoryAccessSize::Word, false, false, TF_READS_S | /*TF_READS_T | TF_WRITES_T | */TF_LOAD_DELAY); break; - case InstructionOp::lwr: CompileLoadStoreTemplate(&Compiler::Compile_lwx, MemoryAccessSize::Word, false, false, TF_READS_S | /*TF_READS_T | TF_WRITES_T | */TF_LOAD_DELAY); break; - case InstructionOp::sb: CompileLoadStoreTemplate(&Compiler::Compile_sxx, MemoryAccessSize::Byte, true, false, TF_READS_S | TF_READS_T); break; - case InstructionOp::sh: CompileLoadStoreTemplate(&Compiler::Compile_sxx, MemoryAccessSize::HalfWord, true, false, TF_READS_S | TF_READS_T); break; - case InstructionOp::sw: CompileLoadStoreTemplate(&Compiler::Compile_sxx, MemoryAccessSize::Word, true, false, TF_READS_S | TF_READS_T); break; - case InstructionOp::swl: CompileLoadStoreTemplate(&Compiler::Compile_swx, MemoryAccessSize::Word, false, false, TF_READS_S | /*TF_READS_T | TF_WRITES_T | */TF_LOAD_DELAY); break; - case InstructionOp::swr: CompileLoadStoreTemplate(&Compiler::Compile_swx, MemoryAccessSize::Word, false, false, TF_READS_S | /*TF_READS_T | TF_WRITES_T | */TF_LOAD_DELAY); break; + case InstructionOp::lb: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::Byte, false, true, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); SpecExec_lxx(MemoryAccessSize::Byte, true); break; + case InstructionOp::lbu: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::Byte, false, false, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); SpecExec_lxx(MemoryAccessSize::Byte, false); break; + case InstructionOp::lh: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::HalfWord, false, true, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); SpecExec_lxx(MemoryAccessSize::HalfWord, true); break; + case InstructionOp::lhu: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::HalfWord, false, false, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); SpecExec_lxx(MemoryAccessSize::HalfWord, false); break; + case InstructionOp::lw: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::Word, false, false, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); SpecExec_lxx(MemoryAccessSize::Word, false); break; + case InstructionOp::lwl: CompileLoadStoreTemplate(&Compiler::Compile_lwx, MemoryAccessSize::Word, false, false, TF_READS_S | /*TF_READS_T | TF_WRITES_T | */TF_LOAD_DELAY); SpecExec_lwx(false); break; + case InstructionOp::lwr: CompileLoadStoreTemplate(&Compiler::Compile_lwx, MemoryAccessSize::Word, false, false, TF_READS_S | /*TF_READS_T | TF_WRITES_T | */TF_LOAD_DELAY); SpecExec_lwx(true); break; + case InstructionOp::sb: CompileLoadStoreTemplate(&Compiler::Compile_sxx, MemoryAccessSize::Byte, true, false, TF_READS_S | TF_READS_T); SpecExec_sxx(MemoryAccessSize::Byte); break; + case InstructionOp::sh: CompileLoadStoreTemplate(&Compiler::Compile_sxx, MemoryAccessSize::HalfWord, true, false, TF_READS_S | TF_READS_T); SpecExec_sxx(MemoryAccessSize::HalfWord); break; + case InstructionOp::sw: CompileLoadStoreTemplate(&Compiler::Compile_sxx, MemoryAccessSize::Word, true, false, TF_READS_S | TF_READS_T); SpecExec_sxx(MemoryAccessSize::Word); break; + case InstructionOp::swl: CompileLoadStoreTemplate(&Compiler::Compile_swx, MemoryAccessSize::Word, false, false, TF_READS_S | /*TF_READS_T | TF_WRITES_T | */TF_LOAD_DELAY); SpecExec_swx(false); break; + case InstructionOp::swr: CompileLoadStoreTemplate(&Compiler::Compile_swx, MemoryAccessSize::Word, false, false, TF_READS_S | /*TF_READS_T | TF_WRITES_T | */TF_LOAD_DELAY); SpecExec_swx(true); break; case InstructionOp::cop0: { @@ -1265,8 +1271,8 @@ void CPU::NewRec::Compiler::CompileInstruction() { switch (inst->cop.CommonOp()) { - case CopCommonInstruction::mfcn: if (inst->r.rt != Reg::zero) { CompileTemplate(nullptr, &Compiler::Compile_mfc0, nullptr, TF_WRITES_T | TF_LOAD_DELAY); } break; - case CopCommonInstruction::mtcn: CompileTemplate(nullptr, &Compiler::Compile_mtc0, PGXPFN(CPU_MTC0), TF_READS_T); break; + case CopCommonInstruction::mfcn: if (inst->r.rt != Reg::zero) { CompileTemplate(nullptr, &Compiler::Compile_mfc0, nullptr, TF_WRITES_T | TF_LOAD_DELAY); } SpecExec_mfc0(); break; + case CopCommonInstruction::mtcn: CompileTemplate(nullptr, &Compiler::Compile_mtc0, PGXPFN(CPU_MTC0), TF_READS_T); SpecExec_mtc0(); break; default: Compile_Fallback(); break; } } @@ -1274,7 +1280,7 @@ void CPU::NewRec::Compiler::CompileInstruction() { switch (inst->cop.Cop0Op()) { - case Cop0Instruction::rfe: CompileTemplate(nullptr, &Compiler::Compile_rfe, nullptr, 0); break; + case Cop0Instruction::rfe: CompileTemplate(nullptr, &Compiler::Compile_rfe, nullptr, 0); SpecExec_rfe(); break; default: Compile_Fallback(); break; } } @@ -1303,7 +1309,7 @@ void CPU::NewRec::Compiler::CompileInstruction() break; case InstructionOp::lwc2: CompileLoadStoreTemplate(&Compiler::Compile_lwc2, MemoryAccessSize::Word, false, false, TF_GTE_STALL | TF_READS_S | TF_LOAD_DELAY); break; - case InstructionOp::swc2: CompileLoadStoreTemplate(&Compiler::Compile_swc2, MemoryAccessSize::Word, true, false, TF_GTE_STALL | TF_READS_S); break; + case InstructionOp::swc2: CompileLoadStoreTemplate(&Compiler::Compile_swc2, MemoryAccessSize::Word, true, false, TF_GTE_STALL | TF_READS_S); SpecExec_swc2(); break; default: Panic("Fixme"); break; // clang-format on @@ -1567,7 +1573,7 @@ void CPU::NewRec::Compiler::CompileTemplate(void (Compiler::*const_func)(Compile } } -void CPU::NewRec::Compiler::CompileLoadStoreTemplate(void (Compiler::*func)(CompileFlags, MemoryAccessSize, bool, +void CPU::NewRec::Compiler::CompileLoadStoreTemplate(void (Compiler::*func)(CompileFlags, MemoryAccessSize, bool, bool, const std::optional&), MemoryAccessSize size, bool store, bool sign, u32 tflags) { @@ -1595,13 +1601,28 @@ void CPU::NewRec::Compiler::CompileLoadStoreTemplate(void (Compiler::*func)(Comp // constant address? std::optional addr; + bool use_fastmem = CodeCache::IsUsingFastmem() && !g_settings.cpu_recompiler_memory_exceptions && + !SpecIsCacheIsolated() && !CodeCache::HasPreviouslyFaultedOnPC(m_current_instruction_pc); if (HasConstantReg(rs)) { addr = GetConstantRegU32(rs) + inst->i.imm_sext32(); cf.const_s = true; + + if (!Bus::CanUseFastmemForAddress(addr.value())) + { + Log_DebugFmt("Not using fastmem for {:08X}", addr.value()); + use_fastmem = false; + } } else { + const std::optional spec_addr = SpecExec_LoadStoreAddr(); + if (use_fastmem && spec_addr.has_value() && !Bus::CanUseFastmemForAddress(spec_addr.value())) + { + Log_DebugFmt("Not using fastmem for speculative {:08X}", spec_addr.value()); + use_fastmem = false; + } + if constexpr (HAS_MEMORY_OPERANDS) { // don't bother caching it since we're going to flush anyway @@ -1648,12 +1669,13 @@ void CPU::NewRec::Compiler::CompileLoadStoreTemplate(void (Compiler::*func)(Comp } } - (this->*func)(cf, size, sign, addr); + (this->*func)(cf, size, sign, use_fastmem, addr); } -void CPU::NewRec::Compiler::FlushForLoadStore(const std::optional& address, bool store) +void CPU::NewRec::Compiler::FlushForLoadStore(const std::optional& address, bool store, + bool use_fastmem) { - if (CodeCache::IsUsingFastmem() && !g_settings.cpu_recompiler_memory_exceptions) + if (use_fastmem) return; // TODO: Stores don't need to flush GTE cycles... @@ -2275,3 +2297,480 @@ void CPU::NewRec::BackpatchLoadStore(void* exception_pc, const CodeCache::Loadst buffer.CommitFarCode(thunk_size); } + +void CPU::NewRec::Compiler::InitSpeculativeRegs() +{ + for (u8 i = 0; i < static_cast(Reg::count); i++) + m_speculative_constants.regs[i] = g_state.regs.r[i]; + + m_speculative_constants.cop0_sr = g_state.cop0_regs.sr.bits; + m_speculative_constants.memory.clear(); +} + +void CPU::NewRec::Compiler::InvalidateSpeculativeValues() +{ + m_speculative_constants.regs.fill(std::nullopt); + m_speculative_constants.memory.clear(); + m_speculative_constants.cop0_sr.reset(); +} + +CPU::NewRec::Compiler::SpecValue CPU::NewRec::Compiler::SpecReadReg(Reg reg) +{ + return m_speculative_constants.regs[static_cast(reg)]; +} + +void CPU::NewRec::Compiler::SpecWriteReg(Reg reg, SpecValue value) +{ + if (reg == Reg::zero) + return; + + m_speculative_constants.regs[static_cast(reg)] = value; +} + +void CPU::NewRec::Compiler::SpecInvalidateReg(Reg reg) +{ + if (reg == Reg::zero) + return; + + m_speculative_constants.regs[static_cast(reg)].reset(); +} + +void CPU::NewRec::Compiler::SpecCopyReg(Reg dst, Reg src) +{ + if (dst == Reg::zero) + return; + + m_speculative_constants.regs[static_cast(dst)] = m_speculative_constants.regs[static_cast(src)]; +} + +CPU::NewRec::Compiler::SpecValue CPU::NewRec::Compiler::SpecReadMem(VirtualMemoryAddress address) +{ + auto it = m_speculative_constants.memory.find(address); + if (it != m_speculative_constants.memory.end()) + return it->second; + + u32 value; + if ((address & DCACHE_LOCATION_MASK) == DCACHE_LOCATION) + { + u32 scratchpad_offset = address & DCACHE_OFFSET_MASK; + std::memcpy(&value, &CPU::g_state.dcache[scratchpad_offset], sizeof(value)); + return value; + } + + const PhysicalMemoryAddress phys_addr = address & PHYSICAL_MEMORY_ADDRESS_MASK; + if (Bus::IsRAMAddress(phys_addr)) + { + u32 ram_offset = phys_addr & Bus::g_ram_mask; + std::memcpy(&value, &Bus::g_ram[ram_offset], sizeof(value)); + return value; + } + + return std::nullopt; +} + +void CPU::NewRec::Compiler::SpecWriteMem(u32 address, SpecValue value) +{ + auto it = m_speculative_constants.memory.find(address); + if (it != m_speculative_constants.memory.end()) + { + it->second = value; + return; + } + + const PhysicalMemoryAddress phys_addr = address & PHYSICAL_MEMORY_ADDRESS_MASK; + if ((address & DCACHE_LOCATION_MASK) == DCACHE_LOCATION || Bus::IsRAMAddress(phys_addr)) + m_speculative_constants.memory.emplace(address, value); +} + +void CPU::NewRec::Compiler::SpecInvalidateMem(VirtualMemoryAddress address) +{ + SpecWriteMem(address, std::nullopt); +} + +bool CPU::NewRec::Compiler::SpecIsCacheIsolated() +{ + if (!m_speculative_constants.cop0_sr.has_value()) + return false; + + const Cop0Registers::SR sr{m_speculative_constants.cop0_sr.value()}; + return sr.Isc; +} + +void CPU::NewRec::Compiler::SpecExec_b() +{ + const bool link = (static_cast(inst->i.rt.GetValue()) & u8(0x1E)) == u8(0x10); + if (link) + SpecWriteReg(Reg::ra, m_compiler_pc); +} + +void CPU::NewRec::Compiler::SpecExec_jal() +{ + SpecWriteReg(Reg::ra, m_compiler_pc); +} + +void CPU::NewRec::Compiler::SpecExec_jalr() +{ + SpecWriteReg(inst->r.rd, m_compiler_pc); +} + +void CPU::NewRec::Compiler::SpecExec_sll() +{ + const SpecValue rt = SpecReadReg(inst->r.rt); + if (rt.has_value()) + SpecWriteReg(inst->r.rd, rt.value() << inst->r.shamt); + else + SpecInvalidateReg(inst->r.rd); +} + +void CPU::NewRec::Compiler::SpecExec_srl() +{ + const SpecValue rt = SpecReadReg(inst->r.rt); + if (rt.has_value()) + SpecWriteReg(inst->r.rd, rt.value() >> inst->r.shamt); + else + SpecInvalidateReg(inst->r.rd); +} + +void CPU::NewRec::Compiler::SpecExec_sra() +{ + const SpecValue rt = SpecReadReg(inst->r.rt); + if (rt.has_value()) + SpecWriteReg(inst->r.rd, static_cast(static_cast(rt.value()) >> inst->r.shamt)); + else + SpecInvalidateReg(inst->r.rd); +} + +void CPU::NewRec::Compiler::SpecExec_sllv() +{ + const SpecValue rs = SpecReadReg(inst->r.rs); + const SpecValue rt = SpecReadReg(inst->r.rt); + if (rs.has_value() && rt.has_value()) + SpecWriteReg(inst->r.rd, rt.value() << (rs.value() & 0x1F)); + else + SpecInvalidateReg(inst->r.rd); +} + +void CPU::NewRec::Compiler::SpecExec_srlv() +{ + const SpecValue rs = SpecReadReg(inst->r.rs); + const SpecValue rt = SpecReadReg(inst->r.rt); + if (rs.has_value() && rt.has_value()) + SpecWriteReg(inst->r.rd, rt.value() >> (rs.value() & 0x1F)); + else + SpecInvalidateReg(inst->r.rd); +} + +void CPU::NewRec::Compiler::SpecExec_srav() +{ + const SpecValue rs = SpecReadReg(inst->r.rs); + const SpecValue rt = SpecReadReg(inst->r.rt); + if (rs.has_value() && rt.has_value()) + SpecWriteReg(inst->r.rd, static_cast(static_cast(rt.value()) >> (rs.value() & 0x1F))); + else + SpecInvalidateReg(inst->r.rd); +} + +void CPU::NewRec::Compiler::SpecExec_mult() +{ + // TODO + SpecInvalidateReg(Reg::hi); + SpecInvalidateReg(Reg::lo); +} + +void CPU::NewRec::Compiler::SpecExec_multu() +{ + // TODO + SpecInvalidateReg(Reg::hi); + SpecInvalidateReg(Reg::lo); +} + +void CPU::NewRec::Compiler::SpecExec_div() +{ + // TODO + SpecInvalidateReg(Reg::hi); + SpecInvalidateReg(Reg::lo); +} + +void CPU::NewRec::Compiler::SpecExec_divu() +{ + // TODO + SpecInvalidateReg(Reg::hi); + SpecInvalidateReg(Reg::lo); +} + +void CPU::NewRec::Compiler::SpecExec_add() +{ + SpecExec_addu(); +} + +void CPU::NewRec::Compiler::SpecExec_addu() +{ + const SpecValue rs = SpecReadReg(inst->r.rs); + const SpecValue rt = SpecReadReg(inst->r.rt); + if (rs.has_value() && rt.has_value()) + SpecWriteReg(inst->r.rd, rs.value() + rt.value()); + else + SpecInvalidateReg(inst->r.rd); +} + +void CPU::NewRec::Compiler::SpecExec_sub() +{ + SpecExec_subu(); +} + +void CPU::NewRec::Compiler::SpecExec_subu() +{ + const SpecValue rs = SpecReadReg(inst->r.rs); + const SpecValue rt = SpecReadReg(inst->r.rt); + if (rs.has_value() && rt.has_value()) + SpecWriteReg(inst->r.rd, rs.value() - rt.value()); + else + SpecInvalidateReg(inst->r.rd); +} + +void CPU::NewRec::Compiler::SpecExec_and() +{ + const SpecValue rs = SpecReadReg(inst->r.rs); + const SpecValue rt = SpecReadReg(inst->r.rt); + if (rs.has_value() && rt.has_value()) + SpecWriteReg(inst->r.rd, rs.value() & rt.value()); + else + SpecInvalidateReg(inst->r.rd); +} + +void CPU::NewRec::Compiler::SpecExec_or() +{ + const SpecValue rs = SpecReadReg(inst->r.rs); + const SpecValue rt = SpecReadReg(inst->r.rt); + if (rs.has_value() && rt.has_value()) + SpecWriteReg(inst->r.rd, rs.value() | rt.value()); + else + SpecInvalidateReg(inst->r.rd); +} + +void CPU::NewRec::Compiler::SpecExec_xor() +{ + const SpecValue rs = SpecReadReg(inst->r.rs); + const SpecValue rt = SpecReadReg(inst->r.rt); + if (rs.has_value() && rt.has_value()) + SpecWriteReg(inst->r.rd, rs.value() ^ rt.value()); + else + SpecInvalidateReg(inst->r.rd); +} + +void CPU::NewRec::Compiler::SpecExec_nor() +{ + const SpecValue rs = SpecReadReg(inst->r.rs); + const SpecValue rt = SpecReadReg(inst->r.rt); + if (rs.has_value() && rt.has_value()) + SpecWriteReg(inst->r.rd, ~(rs.value() | rt.value())); + else + SpecInvalidateReg(inst->r.rd); +} + +void CPU::NewRec::Compiler::SpecExec_slt() +{ + const SpecValue rs = SpecReadReg(inst->r.rs); + const SpecValue rt = SpecReadReg(inst->r.rt); + if (rs.has_value() && rt.has_value()) + SpecWriteReg(inst->r.rd, BoolToUInt32(static_cast(rs.value()) < static_cast(rt.value()))); + else + SpecInvalidateReg(inst->r.rd); +} + +void CPU::NewRec::Compiler::SpecExec_sltu() +{ + const SpecValue rs = SpecReadReg(inst->r.rs); + const SpecValue rt = SpecReadReg(inst->r.rt); + if (rs.has_value() && rt.has_value()) + SpecWriteReg(inst->r.rd, BoolToUInt32(rs.value() < rt.value())); + else + SpecInvalidateReg(inst->r.rd); +} + +void CPU::NewRec::Compiler::SpecExec_addi() +{ + SpecExec_addiu(); +} + +void CPU::NewRec::Compiler::SpecExec_addiu() +{ + const SpecValue rs = SpecReadReg(inst->i.rs); + if (rs.has_value()) + SpecWriteReg(inst->i.rt, rs.value() + inst->i.imm_sext32()); + else + SpecInvalidateReg(inst->i.rt); +} + +void CPU::NewRec::Compiler::SpecExec_slti() +{ + const SpecValue rs = SpecReadReg(inst->i.rs); + if (rs.has_value()) + SpecWriteReg(inst->i.rt, BoolToUInt32(static_cast(rs.value()) < static_cast(inst->i.imm_sext32()))); + else + SpecInvalidateReg(inst->i.rt); +} + +void CPU::NewRec::Compiler::SpecExec_sltiu() +{ + const SpecValue rs = SpecReadReg(inst->i.rs); + if (rs.has_value()) + SpecWriteReg(inst->i.rt, BoolToUInt32(rs.value() < inst->i.imm_sext32())); + else + SpecInvalidateReg(inst->i.rt); +} + +void CPU::NewRec::Compiler::SpecExec_andi() +{ + const SpecValue rs = SpecReadReg(inst->i.rs); + if (rs.has_value()) + SpecWriteReg(inst->i.rt, rs.value() & inst->i.imm_zext32()); + else + SpecInvalidateReg(inst->i.rt); +} + +void CPU::NewRec::Compiler::SpecExec_ori() +{ + const SpecValue rs = SpecReadReg(inst->i.rs); + if (rs.has_value()) + SpecWriteReg(inst->i.rt, rs.value() | inst->i.imm_zext32()); + else + SpecInvalidateReg(inst->i.rt); +} + +void CPU::NewRec::Compiler::SpecExec_xori() +{ + const SpecValue rs = SpecReadReg(inst->i.rs); + if (rs.has_value()) + SpecWriteReg(inst->i.rt, rs.value() ^ inst->i.imm_zext32()); + else + SpecInvalidateReg(inst->i.rt); +} + +void CPU::NewRec::Compiler::SpecExec_lui() +{ + SpecWriteReg(inst->i.rt, inst->i.imm_zext32() << 16); +} + +CPU::NewRec::Compiler::SpecValue CPU::NewRec::Compiler::SpecExec_LoadStoreAddr() +{ + const SpecValue rs = SpecReadReg(inst->i.rs); + return rs.has_value() ? (rs.value() + inst->i.imm_sext32()) : rs; +} + +void CPU::NewRec::Compiler::SpecExec_lxx(MemoryAccessSize size, bool sign) +{ + const SpecValue addr = SpecExec_LoadStoreAddr(); + SpecValue val; + if (!addr.has_value() || !(val = SpecReadMem(addr.value())).has_value()) + { + SpecInvalidateReg(inst->i.rt); + return; + } + + switch (size) + { + case MemoryAccessSize::Byte: + val = sign ? SignExtend32(static_cast(val.value())) : ZeroExtend32(static_cast(val.value())); + break; + + case MemoryAccessSize::HalfWord: + val = sign ? SignExtend32(static_cast(val.value())) : ZeroExtend32(static_cast(val.value())); + break; + + case MemoryAccessSize::Word: + break; + + default: + UnreachableCode(); + } + + SpecWriteReg(inst->r.rt, val); +} + +void CPU::NewRec::Compiler::SpecExec_lwx(bool lwr) +{ + // TODO + SpecInvalidateReg(inst->i.rt); +} + +void CPU::NewRec::Compiler::SpecExec_sxx(MemoryAccessSize size) +{ + const SpecValue addr = SpecExec_LoadStoreAddr(); + if (!addr.has_value()) + return; + + SpecValue rt = SpecReadReg(inst->i.rt); + if (rt.has_value()) + { + switch (size) + { + case MemoryAccessSize::Byte: + rt = ZeroExtend32(static_cast(rt.value())); + break; + + case MemoryAccessSize::HalfWord: + rt = ZeroExtend32(static_cast(rt.value())); + break; + + case MemoryAccessSize::Word: + break; + + default: + UnreachableCode(); + } + } + + SpecWriteMem(addr.value(), rt); +} + +void CPU::NewRec::Compiler::SpecExec_swx(bool swr) +{ + const SpecValue addr = SpecExec_LoadStoreAddr(); + if (addr.has_value()) + SpecInvalidateMem(addr.value() & ~3u); +} + +void CPU::NewRec::Compiler::SpecExec_swc2() +{ + const SpecValue addr = SpecExec_LoadStoreAddr(); + if (addr.has_value()) + SpecInvalidateMem(addr.value()); +} + +void CPU::NewRec::Compiler::SpecExec_mfc0() +{ + const Cop0Reg rd = static_cast(inst->r.rd.GetValue()); + if (rd != Cop0Reg::SR) + { + SpecInvalidateReg(inst->r.rt); + return; + } + + SpecWriteReg(inst->r.rt, m_speculative_constants.cop0_sr); +} + +void CPU::NewRec::Compiler::SpecExec_mtc0() +{ + const Cop0Reg rd = static_cast(inst->r.rd.GetValue()); + if (rd != Cop0Reg::SR || !m_speculative_constants.cop0_sr.has_value()) + return; + + SpecValue val = SpecReadReg(inst->r.rt); + if (val.has_value()) + { + constexpr u32 mask = Cop0Registers::SR::WRITE_MASK; + val = (m_speculative_constants.cop0_sr.value() & mask) | (val.value() & mask); + } + + m_speculative_constants.cop0_sr = val; +} + +void CPU::NewRec::Compiler::SpecExec_rfe() +{ + if (!m_speculative_constants.cop0_sr.has_value()) + return; + + const u32 val = m_speculative_constants.cop0_sr.value(); + m_speculative_constants.cop0_sr = (val & UINT32_C(0b110000)) | ((val & UINT32_C(0b111111)) >> 2); +} diff --git a/src/core/cpu_newrec_compiler.h b/src/core/cpu_newrec_compiler.h index 5f0d2dbf0..21924237c 100644 --- a/src/core/cpu_newrec_compiler.h +++ b/src/core/cpu_newrec_compiler.h @@ -56,17 +56,18 @@ protected: FLUSH_LOAD_DELAY_FROM_STATE = (1 << 9), FLUSH_GTE_DONE_CYCLE = (1 << 10), FLUSH_GTE_STALL_FROM_STATE = (1 << 11), + FLUSH_INVALIDATE_SPECULATIVE_CONSTANTS = (1 << 12), FLUSH_FOR_C_CALL = (FLUSH_FREE_CALLER_SAVED_REGISTERS), FLUSH_FOR_LOADSTORE = (FLUSH_FREE_CALLER_SAVED_REGISTERS | FLUSH_CYCLES), FLUSH_FOR_BRANCH = (FLUSH_FLUSH_MIPS_REGISTERS), FLUSH_FOR_EXCEPTION = (FLUSH_CYCLES | FLUSH_GTE_DONE_CYCLE), // GTE cycles needed because it stalls when a GTE instruction is next. - FLUSH_FOR_INTERPRETER = - (FLUSH_FLUSH_MIPS_REGISTERS | FLUSH_INVALIDATE_MIPS_REGISTERS | FLUSH_FREE_CALLER_SAVED_REGISTERS | FLUSH_PC | - FLUSH_CYCLES | FLUSH_INSTRUCTION_BITS | FLUSH_LOAD_DELAY | FLUSH_GTE_DONE_CYCLE), + FLUSH_FOR_INTERPRETER = (FLUSH_FLUSH_MIPS_REGISTERS | FLUSH_INVALIDATE_MIPS_REGISTERS | + FLUSH_FREE_CALLER_SAVED_REGISTERS | FLUSH_PC | FLUSH_CYCLES | FLUSH_INSTRUCTION_BITS | + FLUSH_LOAD_DELAY | FLUSH_GTE_DONE_CYCLE | FLUSH_INVALIDATE_SPECULATIVE_CONSTANTS), FLUSH_END_BLOCK = 0xFFFFFFFFu & ~(FLUSH_PC | FLUSH_CYCLES | FLUSH_GTE_DONE_CYCLE | FLUSH_INSTRUCTION_BITS | - FLUSH_GTE_STALL_FROM_STATE), + FLUSH_GTE_STALL_FROM_STATE | FLUSH_INVALIDATE_SPECULATIVE_CONSTANTS), }; union CompileFlags @@ -267,10 +268,10 @@ protected: void CompileTemplate(void (Compiler::*const_func)(CompileFlags), void (Compiler::*func)(CompileFlags), const void* pgxp_cpu_func, u32 tflags); - void CompileLoadStoreTemplate(void (Compiler::*func)(CompileFlags, MemoryAccessSize, bool, + void CompileLoadStoreTemplate(void (Compiler::*func)(CompileFlags, MemoryAccessSize, bool, bool, const std::optional&), MemoryAccessSize size, bool store, bool sign, u32 tflags); - void FlushForLoadStore(const std::optional& address, bool store); + void FlushForLoadStore(const std::optional& address, bool store, bool use_fastmem); void CompileMoveRegTemplate(Reg dst, Reg src, bool pgxp_move); virtual void GeneratePGXPCallWithMIPSRegs(const void* func, u32 arg1val, Reg arg2reg = Reg::count, @@ -357,17 +358,17 @@ protected: virtual void Compile_xori(CompileFlags cf) = 0; void Compile_lui(); - virtual void Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, + virtual void Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) = 0; - virtual void Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, + virtual void Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) = 0; // lwl/lwr - virtual void Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, + virtual void Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) = 0; - virtual void Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, + virtual void Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) = 0; - virtual void Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, + virtual void Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) = 0; // swl/swr - virtual void Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, + virtual void Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) = 0; static u32* GetCop0RegPtr(Cop0Reg reg); @@ -454,6 +455,71 @@ protected: std::array m_host_state_backup = {}; u32 m_host_state_backup_count = 0; + ////////////////////////////////////////////////////////////////////////// + // Speculative Constants + ////////////////////////////////////////////////////////////////////////// + using SpecValue = std::optional; + struct SpeculativeConstants + { + std::array(Reg::count)> regs; + std::unordered_map memory; + SpecValue cop0_sr; + }; + + void InitSpeculativeRegs(); + void InvalidateSpeculativeValues(); + SpecValue SpecReadReg(Reg reg); + void SpecWriteReg(Reg reg, SpecValue value); + void SpecInvalidateReg(Reg reg); + void SpecCopyReg(Reg dst, Reg src); + SpecValue SpecReadMem(u32 address); + void SpecWriteMem(VirtualMemoryAddress address, SpecValue value); + void SpecInvalidateMem(VirtualMemoryAddress address); + bool SpecIsCacheIsolated(); + + SpeculativeConstants m_speculative_constants; + + void SpecExec_b(); + void SpecExec_jal(); + void SpecExec_jalr(); + void SpecExec_sll(); + void SpecExec_srl(); + void SpecExec_sra(); + void SpecExec_sllv(); + void SpecExec_srlv(); + void SpecExec_srav(); + void SpecExec_mult(); + void SpecExec_multu(); + void SpecExec_div(); + void SpecExec_divu(); + void SpecExec_add(); + void SpecExec_addu(); + void SpecExec_sub(); + void SpecExec_subu(); + void SpecExec_and(); + void SpecExec_or(); + void SpecExec_xor(); + void SpecExec_nor(); + void SpecExec_slt(); + void SpecExec_sltu(); + void SpecExec_addi(); + void SpecExec_addiu(); + void SpecExec_slti(); + void SpecExec_sltiu(); + void SpecExec_andi(); + void SpecExec_ori(); + void SpecExec_xori(); + void SpecExec_lui(); + SpecValue SpecExec_LoadStoreAddr(); + void SpecExec_lxx(MemoryAccessSize size, bool sign); + void SpecExec_lwx(bool lwr); // lwl/lwr + void SpecExec_sxx(MemoryAccessSize size); + void SpecExec_swx(bool swr); // swl/swr + void SpecExec_swc2(); + void SpecExec_mfc0(); + void SpecExec_mtc0(); + void SpecExec_rfe(); + // PGXP memory callbacks static const std::array, 3> s_pgxp_mem_load_functions; static const std::array s_pgxp_mem_store_functions; diff --git a/src/core/cpu_newrec_compiler_aarch32.cpp b/src/core/cpu_newrec_compiler_aarch32.cpp index 9bf4bd646..3a63cc8bd 100644 --- a/src/core/cpu_newrec_compiler_aarch32.cpp +++ b/src/core/cpu_newrec_compiler_aarch32.cpp @@ -1340,11 +1340,10 @@ CPU::NewRec::AArch32Compiler::ComputeLoadStoreAddressArg(CompileFlags cf, template vixl::aarch32::Register CPU::NewRec::AArch32Compiler::GenerateLoad(const vixl::aarch32::Register& addr_reg, - MemoryAccessSize size, bool sign, + MemoryAccessSize size, bool sign, bool use_fastmem, const RegAllocFn& dst_reg_alloc) { - const bool checked = g_settings.cpu_recompiler_memory_exceptions; - if (!checked && CodeCache::IsUsingFastmem()) + if (use_fastmem) { DebugAssert(g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT); m_cycles += Bus::RAM_READ_TICKS; @@ -1379,6 +1378,7 @@ vixl::aarch32::Register CPU::NewRec::AArch32Compiler::GenerateLoad(const vixl::a if (addr_reg.GetCode() != RARG1.GetCode()) armAsm->mov(RARG1, addr_reg); + const bool checked = g_settings.cpu_recompiler_memory_exceptions; switch (size) { case MemoryAccessSize::Byte: @@ -1452,10 +1452,10 @@ vixl::aarch32::Register CPU::NewRec::AArch32Compiler::GenerateLoad(const vixl::a } void CPU::NewRec::AArch32Compiler::GenerateStore(const vixl::aarch32::Register& addr_reg, - const vixl::aarch32::Register& value_reg, MemoryAccessSize size) + const vixl::aarch32::Register& value_reg, MemoryAccessSize size, + bool use_fastmem) { - const bool checked = g_settings.cpu_recompiler_memory_exceptions; - if (!checked && CodeCache::IsUsingFastmem()) + if (use_fastmem) { DebugAssert(g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT); DebugAssert(addr_reg.GetCode() != RARG3.GetCode()); @@ -1488,6 +1488,7 @@ void CPU::NewRec::AArch32Compiler::GenerateStore(const vixl::aarch32::Register& if (value_reg.GetCode() != RARG2.GetCode()) armAsm->mov(RARG2, value_reg); + const bool checked = g_settings.cpu_recompiler_memory_exceptions; switch (size) { case MemoryAccessSize::Byte: @@ -1536,15 +1537,15 @@ void CPU::NewRec::AArch32Compiler::GenerateStore(const vixl::aarch32::Register& } } -void CPU::NewRec::AArch32Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, +void CPU::NewRec::AArch32Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) { const std::optional addr_reg = g_settings.gpu_pgxp_enable ? std::optional(Register(AllocateTempHostReg(HR_CALLEE_SAVED))) : std::optional(); - FlushForLoadStore(address, false); + FlushForLoadStore(address, false, use_fastmem); const Register addr = ComputeLoadStoreAddressArg(cf, address, addr_reg); - const Register data = GenerateLoad(addr, size, sign, [this, cf]() { + const Register data = GenerateLoad(addr, size, sign, use_fastmem, [this, cf]() { if (cf.MipsT() == Reg::zero) return RRET; @@ -1564,11 +1565,11 @@ void CPU::NewRec::AArch32Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize } } -void CPU::NewRec::AArch32Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, +void CPU::NewRec::AArch32Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) { DebugAssert(size == MemoryAccessSize::Word && !sign); - FlushForLoadStore(address, false); + FlushForLoadStore(address, false, use_fastmem); // TODO: if address is constant, this can be simplified.. @@ -1580,7 +1581,7 @@ void CPU::NewRec::AArch32Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize const Register addr = Register(AllocateHostReg(HR_CALLEE_SAVED, HR_TYPE_TEMP)); ComputeLoadStoreAddressArg(cf, address, addr); armAsm->and_(RARG1, addr, armCheckLogicalConstant(~0x3u)); - GenerateLoad(RARG1, MemoryAccessSize::Word, false, []() { return RRET; }); + GenerateLoad(RARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RRET; }); if (inst->r.rt == Reg::zero) { @@ -1648,15 +1649,15 @@ void CPU::NewRec::AArch32Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize FreeHostReg(addr.GetCode()); } -void CPU::NewRec::AArch32Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, +void CPU::NewRec::AArch32Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) { const std::optional addr_reg = g_settings.gpu_pgxp_enable ? std::optional(Register(AllocateTempHostReg(HR_CALLEE_SAVED))) : std::optional(); - FlushForLoadStore(address, false); + FlushForLoadStore(address, false, use_fastmem); const Register addr = ComputeLoadStoreAddressArg(cf, address, addr_reg); - GenerateLoad(addr, MemoryAccessSize::Word, false, []() { return RRET; }); + GenerateLoad(addr, MemoryAccessSize::Word, false, use_fastmem, []() { return RRET; }); const u32 index = static_cast(inst->r.rt.GetValue()); const auto [ptr, action] = GetGTERegisterPointer(index, true); @@ -1728,7 +1729,7 @@ void CPU::NewRec::AArch32Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSiz } } -void CPU::NewRec::AArch32Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, +void CPU::NewRec::AArch32Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) { AssertRegOrConstS(cf); @@ -1737,13 +1738,13 @@ void CPU::NewRec::AArch32Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize const std::optional addr_reg = g_settings.gpu_pgxp_enable ? std::optional(Register(AllocateTempHostReg(HR_CALLEE_SAVED))) : std::optional(); - FlushForLoadStore(address, true); + FlushForLoadStore(address, true, use_fastmem); const Register addr = ComputeLoadStoreAddressArg(cf, address, addr_reg); const Register data = cf.valid_host_t ? CFGetRegT(cf) : RARG2; if (!cf.valid_host_t) MoveTToReg(RARG2, cf); - GenerateStore(addr, data, size); + GenerateStore(addr, data, size, use_fastmem); if (g_settings.gpu_pgxp_enable) { @@ -1756,18 +1757,18 @@ void CPU::NewRec::AArch32Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize } } -void CPU::NewRec::AArch32Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, +void CPU::NewRec::AArch32Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) { DebugAssert(size == MemoryAccessSize::Word && !sign); - FlushForLoadStore(address, true); + FlushForLoadStore(address, true, use_fastmem); // TODO: if address is constant, this can be simplified.. // We'd need to be careful here if we weren't overwriting it.. const Register addr = Register(AllocateHostReg(HR_CALLEE_SAVED, HR_TYPE_TEMP)); ComputeLoadStoreAddressArg(cf, address, addr); armAsm->and_(RARG1, addr, armCheckLogicalConstant(~0x3u)); - GenerateLoad(RARG1, MemoryAccessSize::Word, false, []() { return RRET; }); + GenerateLoad(RARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RRET; }); // TODO: this can take over rt's value if it's no longer needed // NOTE: can't trust T in cf because of the flush @@ -1813,13 +1814,13 @@ void CPU::NewRec::AArch32Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize FreeHostReg(addr.GetCode()); armAsm->and_(RARG1, addr, armCheckLogicalConstant(~0x3u)); - GenerateStore(RARG1, value, MemoryAccessSize::Word); + GenerateStore(RARG1, value, MemoryAccessSize::Word, use_fastmem); } -void CPU::NewRec::AArch32Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, +void CPU::NewRec::AArch32Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) { - FlushForLoadStore(address, true); + FlushForLoadStore(address, true, use_fastmem); const u32 index = static_cast(inst->r.rt.GetValue()); const auto [ptr, action] = GetGTERegisterPointer(index, false); @@ -1852,17 +1853,17 @@ void CPU::NewRec::AArch32Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSiz if (!g_settings.gpu_pgxp_enable) { const Register addr = ComputeLoadStoreAddressArg(cf, address); - GenerateStore(addr, RARG2, size); + GenerateStore(addr, RARG2, size, use_fastmem); return; } // TODO: This can be simplified because we don't need to validate in PGXP.. const Register addr_reg = Register(AllocateTempHostReg(HR_CALLEE_SAVED)); const Register data_backup = Register(AllocateTempHostReg(HR_CALLEE_SAVED)); - FlushForLoadStore(address, true); + FlushForLoadStore(address, true, use_fastmem); ComputeLoadStoreAddressArg(cf, address, addr_reg); armAsm->mov(data_backup, RARG2); - GenerateStore(addr_reg, RARG2, size); + GenerateStore(addr_reg, RARG2, size, use_fastmem); Flush(FLUSH_FOR_C_CALL); armAsm->mov(RARG3, data_backup); diff --git a/src/core/cpu_newrec_compiler_aarch32.h b/src/core/cpu_newrec_compiler_aarch32.h index 1b5f84273..cffc40069 100644 --- a/src/core/cpu_newrec_compiler_aarch32.h +++ b/src/core/cpu_newrec_compiler_aarch32.h @@ -96,20 +96,20 @@ protected: const std::optional& reg = std::nullopt); template vixl::aarch32::Register GenerateLoad(const vixl::aarch32::Register& addr_reg, MemoryAccessSize size, bool sign, - const RegAllocFn& dst_reg_alloc); + bool use_fastmem, const RegAllocFn& dst_reg_alloc); void GenerateStore(const vixl::aarch32::Register& addr_reg, const vixl::aarch32::Register& value_reg, - MemoryAccessSize size); - void Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, + MemoryAccessSize size, bool use_fastmem); + void Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) override; - void Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, + void Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) override; - void Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, + void Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) override; - void Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, + void Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) override; - void Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, + void Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) override; - void Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, + void Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) override; void TestInterrupts(const vixl::aarch32::Register& sr); diff --git a/src/core/cpu_newrec_compiler_aarch64.cpp b/src/core/cpu_newrec_compiler_aarch64.cpp index a19f32ae1..af195b314 100644 --- a/src/core/cpu_newrec_compiler_aarch64.cpp +++ b/src/core/cpu_newrec_compiler_aarch64.cpp @@ -37,8 +37,7 @@ Compiler* g_compiler = &s_instance; } // namespace CPU::NewRec CPU::NewRec::AArch64Compiler::AArch64Compiler() - : m_emitter(PositionDependentCode) - , m_far_emitter(PositionIndependentCode) + : m_emitter(PositionDependentCode), m_far_emitter(PositionIndependentCode) { } @@ -1314,11 +1313,10 @@ CPU::NewRec::AArch64Compiler::ComputeLoadStoreAddressArg(CompileFlags cf, template vixl::aarch64::WRegister CPU::NewRec::AArch64Compiler::GenerateLoad(const vixl::aarch64::WRegister& addr_reg, - MemoryAccessSize size, bool sign, + MemoryAccessSize size, bool sign, bool use_fastmem, const RegAllocFn& dst_reg_alloc) { - const bool checked = g_settings.cpu_recompiler_memory_exceptions; - if (!checked && CodeCache::IsUsingFastmem()) + if (use_fastmem) { m_cycles += Bus::RAM_READ_TICKS; @@ -1356,6 +1354,7 @@ vixl::aarch64::WRegister CPU::NewRec::AArch64Compiler::GenerateLoad(const vixl:: if (addr_reg.GetCode() != RWARG1.GetCode()) armAsm->mov(RWARG1, addr_reg); + const bool checked = g_settings.cpu_recompiler_memory_exceptions; switch (size) { case MemoryAccessSize::Byte: @@ -1429,10 +1428,10 @@ vixl::aarch64::WRegister CPU::NewRec::AArch64Compiler::GenerateLoad(const vixl:: } void CPU::NewRec::AArch64Compiler::GenerateStore(const vixl::aarch64::WRegister& addr_reg, - const vixl::aarch64::WRegister& value_reg, MemoryAccessSize size) + const vixl::aarch64::WRegister& value_reg, MemoryAccessSize size, + bool use_fastmem) { - const bool checked = g_settings.cpu_recompiler_memory_exceptions; - if (!checked && CodeCache::IsUsingFastmem()) + if (use_fastmem) { if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) { @@ -1467,6 +1466,7 @@ void CPU::NewRec::AArch64Compiler::GenerateStore(const vixl::aarch64::WRegister& if (value_reg.GetCode() != RWARG2.GetCode()) armAsm->mov(RWARG2, value_reg); + const bool checked = g_settings.cpu_recompiler_memory_exceptions; switch (size) { case MemoryAccessSize::Byte: @@ -1515,15 +1515,15 @@ void CPU::NewRec::AArch64Compiler::GenerateStore(const vixl::aarch64::WRegister& } } -void CPU::NewRec::AArch64Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, +void CPU::NewRec::AArch64Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) { const std::optional addr_reg = g_settings.gpu_pgxp_enable ? std::optional(WRegister(AllocateTempHostReg(HR_CALLEE_SAVED))) : std::optional(); - FlushForLoadStore(address, false); + FlushForLoadStore(address, false, use_fastmem); const WRegister addr = ComputeLoadStoreAddressArg(cf, address, addr_reg); - const WRegister data = GenerateLoad(addr, size, sign, [this, cf]() { + const WRegister data = GenerateLoad(addr, size, sign, use_fastmem, [this, cf]() { if (cf.MipsT() == Reg::zero) return RWRET; @@ -1544,11 +1544,11 @@ void CPU::NewRec::AArch64Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize } } -void CPU::NewRec::AArch64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, +void CPU::NewRec::AArch64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) { DebugAssert(size == MemoryAccessSize::Word && !sign); - FlushForLoadStore(address, false); + FlushForLoadStore(address, false, use_fastmem); // TODO: if address is constant, this can be simplified.. @@ -1560,7 +1560,7 @@ void CPU::NewRec::AArch64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize const WRegister addr = WRegister(AllocateHostReg(HR_CALLEE_SAVED, HR_TYPE_TEMP)); ComputeLoadStoreAddressArg(cf, address, addr); armAsm->and_(RWARG1, addr, armCheckLogicalConstant(~0x3u)); - GenerateLoad(RWARG1, MemoryAccessSize::Word, false, []() { return RWRET; }); + GenerateLoad(RWARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RWRET; }); if (inst->r.rt == Reg::zero) { @@ -1628,15 +1628,15 @@ void CPU::NewRec::AArch64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize FreeHostReg(addr.GetCode()); } -void CPU::NewRec::AArch64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, +void CPU::NewRec::AArch64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) { const std::optional addr_reg = g_settings.gpu_pgxp_enable ? std::optional(WRegister(AllocateTempHostReg(HR_CALLEE_SAVED))) : std::optional(); - FlushForLoadStore(address, false); + FlushForLoadStore(address, false, use_fastmem); const WRegister addr = ComputeLoadStoreAddressArg(cf, address, addr_reg); - GenerateLoad(addr, MemoryAccessSize::Word, false, []() { return RWRET; }); + GenerateLoad(addr, MemoryAccessSize::Word, false, use_fastmem, []() { return RWRET; }); const u32 index = static_cast(inst->r.rt.GetValue()); const auto [ptr, action] = GetGTERegisterPointer(index, true); @@ -1708,7 +1708,7 @@ void CPU::NewRec::AArch64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSiz } } -void CPU::NewRec::AArch64Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, +void CPU::NewRec::AArch64Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) { AssertRegOrConstS(cf); @@ -1717,13 +1717,13 @@ void CPU::NewRec::AArch64Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize const std::optional addr_reg = g_settings.gpu_pgxp_enable ? std::optional(WRegister(AllocateTempHostReg(HR_CALLEE_SAVED))) : std::optional(); - FlushForLoadStore(address, true); + FlushForLoadStore(address, true, use_fastmem); const WRegister addr = ComputeLoadStoreAddressArg(cf, address, addr_reg); const WRegister data = cf.valid_host_t ? CFGetRegT(cf) : RWARG2; if (!cf.valid_host_t) MoveTToReg(RWARG2, cf); - GenerateStore(addr, data, size); + GenerateStore(addr, data, size, use_fastmem); if (g_settings.gpu_pgxp_enable) { @@ -1736,18 +1736,18 @@ void CPU::NewRec::AArch64Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize } } -void CPU::NewRec::AArch64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, +void CPU::NewRec::AArch64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) { DebugAssert(size == MemoryAccessSize::Word && !sign); - FlushForLoadStore(address, true); + FlushForLoadStore(address, true, use_fastmem); // TODO: if address is constant, this can be simplified.. // We'd need to be careful here if we weren't overwriting it.. const WRegister addr = WRegister(AllocateHostReg(HR_CALLEE_SAVED, HR_TYPE_TEMP)); ComputeLoadStoreAddressArg(cf, address, addr); armAsm->and_(RWARG1, addr, armCheckLogicalConstant(~0x3u)); - GenerateLoad(RWARG1, MemoryAccessSize::Word, false, []() { return RWRET; }); + GenerateLoad(RWARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RWRET; }); // TODO: this can take over rt's value if it's no longer needed // NOTE: can't trust T in cf because of the flush @@ -1793,13 +1793,13 @@ void CPU::NewRec::AArch64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize FreeHostReg(addr.GetCode()); armAsm->and_(RWARG1, addr, armCheckLogicalConstant(~0x3u)); - GenerateStore(RWARG1, value, MemoryAccessSize::Word); + GenerateStore(RWARG1, value, MemoryAccessSize::Word, use_fastmem); } -void CPU::NewRec::AArch64Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, +void CPU::NewRec::AArch64Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) { - FlushForLoadStore(address, true); + FlushForLoadStore(address, true, use_fastmem); const u32 index = static_cast(inst->r.rt.GetValue()); const auto [ptr, action] = GetGTERegisterPointer(index, false); @@ -1832,17 +1832,17 @@ void CPU::NewRec::AArch64Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSiz if (!g_settings.gpu_pgxp_enable) { const WRegister addr = ComputeLoadStoreAddressArg(cf, address); - GenerateStore(addr, RWARG2, size); + GenerateStore(addr, RWARG2, size, use_fastmem); return; } // TODO: This can be simplified because we don't need to validate in PGXP.. const WRegister addr_reg = WRegister(AllocateTempHostReg(HR_CALLEE_SAVED)); const WRegister data_backup = WRegister(AllocateTempHostReg(HR_CALLEE_SAVED)); - FlushForLoadStore(address, true); + FlushForLoadStore(address, true, use_fastmem); ComputeLoadStoreAddressArg(cf, address, addr_reg); armAsm->mov(data_backup, RWARG2); - GenerateStore(addr_reg, RWARG2, size); + GenerateStore(addr_reg, RWARG2, size, use_fastmem); Flush(FLUSH_FOR_C_CALL); armAsm->mov(RWARG3, data_backup); diff --git a/src/core/cpu_newrec_compiler_aarch64.h b/src/core/cpu_newrec_compiler_aarch64.h index a9477ba42..46a27ba64 100644 --- a/src/core/cpu_newrec_compiler_aarch64.h +++ b/src/core/cpu_newrec_compiler_aarch64.h @@ -26,7 +26,8 @@ protected: void StoreHostRegToCPUPointer(u32 reg, const void* ptr) override; void CopyHostReg(u32 dst, u32 src) override; - void Reset(CodeCache::Block* block, u8* code_buffer, u32 code_buffer_space, u8* far_code_buffer, u32 far_code_space) override; + void Reset(CodeCache::Block* block, u8* code_buffer, u32 code_buffer_space, u8* far_code_buffer, + u32 far_code_space) override; void BeginBlock() override; void GenerateBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size) override; void GenerateICacheCheckAndUpdate() override; @@ -97,20 +98,20 @@ protected: const std::optional& reg = std::nullopt); template vixl::aarch64::WRegister GenerateLoad(const vixl::aarch64::WRegister& addr_reg, MemoryAccessSize size, bool sign, - const RegAllocFn& dst_reg_alloc); + bool use_fastmem, const RegAllocFn& dst_reg_alloc); void GenerateStore(const vixl::aarch64::WRegister& addr_reg, const vixl::aarch64::WRegister& value_reg, - MemoryAccessSize size); - void Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, + MemoryAccessSize size, bool use_fastmem); + void Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) override; - void Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, + void Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) override; - void Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, + void Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) override; - void Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, + void Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) override; - void Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, + void Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) override; - void Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, + void Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) override; void TestInterrupts(const vixl::aarch64::WRegister& sr); diff --git a/src/core/cpu_newrec_compiler_riscv64.cpp b/src/core/cpu_newrec_compiler_riscv64.cpp index 88ad4783e..e4700b241 100644 --- a/src/core/cpu_newrec_compiler_riscv64.cpp +++ b/src/core/cpu_newrec_compiler_riscv64.cpp @@ -1595,10 +1595,9 @@ biscuit::GPR CPU::NewRec::RISCV64Compiler::ComputeLoadStoreAddressArg( template void CPU::NewRec::RISCV64Compiler::GenerateLoad(const biscuit::GPR& addr_reg, MemoryAccessSize size, bool sign, - const RegAllocFn& dst_reg_alloc) + bool use_fastmem, const RegAllocFn& dst_reg_alloc) { - const bool checked = g_settings.cpu_recompiler_memory_exceptions; - if (!checked && CodeCache::IsUsingFastmem()) + if (use_fastmem) { m_cycles += Bus::RAM_READ_TICKS; @@ -1648,6 +1647,7 @@ void CPU::NewRec::RISCV64Compiler::GenerateLoad(const biscuit::GPR& addr_reg, Me if (addr_reg.Index() != RARG1.Index()) rvAsm->MV(RARG1, addr_reg); + const bool checked = g_settings.cpu_recompiler_memory_exceptions; switch (size) { case MemoryAccessSize::Byte: @@ -1723,10 +1723,9 @@ void CPU::NewRec::RISCV64Compiler::GenerateLoad(const biscuit::GPR& addr_reg, Me } void CPU::NewRec::RISCV64Compiler::GenerateStore(const biscuit::GPR& addr_reg, const biscuit::GPR& value_reg, - MemoryAccessSize size) + MemoryAccessSize size, bool use_fastmem) { - const bool checked = g_settings.cpu_recompiler_memory_exceptions; - if (!checked && CodeCache::IsUsingFastmem()) + if (use_fastmem) { DebugAssert(value_reg != RSCRATCH); rvAsm->SLLI64(RSCRATCH, addr_reg, 32); @@ -1774,6 +1773,7 @@ void CPU::NewRec::RISCV64Compiler::GenerateStore(const biscuit::GPR& addr_reg, c if (value_reg.Index() != RARG2.Index()) rvAsm->MV(RARG2, value_reg); + const bool checked = g_settings.cpu_recompiler_memory_exceptions; switch (size) { case MemoryAccessSize::Byte: @@ -1822,12 +1822,12 @@ void CPU::NewRec::RISCV64Compiler::GenerateStore(const biscuit::GPR& addr_reg, c } } -void CPU::NewRec::RISCV64Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, +void CPU::NewRec::RISCV64Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) { - FlushForLoadStore(address, false); + FlushForLoadStore(address, false, use_fastmem); const GPR addr = ComputeLoadStoreAddressArg(cf, address); - GenerateLoad(addr, size, sign, [this, cf]() { + GenerateLoad(addr, size, sign, use_fastmem, [this, cf]() { if (cf.MipsT() == Reg::zero) return RRET; @@ -1836,11 +1836,11 @@ void CPU::NewRec::RISCV64Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize }); } -void CPU::NewRec::RISCV64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, +void CPU::NewRec::RISCV64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) { DebugAssert(size == MemoryAccessSize::Word && !sign); - FlushForLoadStore(address, false); + FlushForLoadStore(address, false, use_fastmem); // TODO: if address is constant, this can be simplified.. @@ -1852,7 +1852,7 @@ void CPU::NewRec::RISCV64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize const GPR addr = GPR(AllocateHostReg(HR_CALLEE_SAVED, HR_TYPE_TEMP)); ComputeLoadStoreAddressArg(cf, address, addr); rvAsm->ANDI(RARG1, addr, ~0x3u); - GenerateLoad(RARG1, MemoryAccessSize::Word, false, []() { return RRET; }); + GenerateLoad(RARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RRET; }); if (inst->r.rt == Reg::zero) { @@ -1920,12 +1920,12 @@ void CPU::NewRec::RISCV64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize FreeHostReg(addr.Index()); } -void CPU::NewRec::RISCV64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, +void CPU::NewRec::RISCV64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) { - FlushForLoadStore(address, false); + FlushForLoadStore(address, false, use_fastmem); const GPR addr = ComputeLoadStoreAddressArg(cf, address); - GenerateLoad(addr, MemoryAccessSize::Word, false, []() { return RRET; }); + GenerateLoad(addr, MemoryAccessSize::Word, false, use_fastmem, []() { return RRET; }); const u32 index = static_cast(inst->r.rt.GetValue()); const auto [ptr, action] = GetGTERegisterPointer(index, true); @@ -1987,32 +1987,32 @@ void CPU::NewRec::RISCV64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSiz } } -void CPU::NewRec::RISCV64Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, +void CPU::NewRec::RISCV64Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) { AssertRegOrConstS(cf); AssertRegOrConstT(cf); - FlushForLoadStore(address, true); + FlushForLoadStore(address, true, use_fastmem); const GPR addr = ComputeLoadStoreAddressArg(cf, address); if (!cf.valid_host_t) MoveTToReg(RARG2, cf); - GenerateStore(addr, cf.valid_host_t ? CFGetRegT(cf) : RARG2, size); + GenerateStore(addr, cf.valid_host_t ? CFGetRegT(cf) : RARG2, size, use_fastmem); } -void CPU::NewRec::RISCV64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, +void CPU::NewRec::RISCV64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) { DebugAssert(size == MemoryAccessSize::Word && !sign); - FlushForLoadStore(address, true); + FlushForLoadStore(address, true, use_fastmem); // TODO: if address is constant, this can be simplified.. // We'd need to be careful here if we weren't overwriting it.. const GPR addr = GPR(AllocateHostReg(HR_CALLEE_SAVED, HR_TYPE_TEMP)); ComputeLoadStoreAddressArg(cf, address, addr); rvAsm->ANDI(RARG1, addr, ~0x3u); - GenerateLoad(RARG1, MemoryAccessSize::Word, false, []() { return RRET; }); + GenerateLoad(RARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RRET; }); // TODO: this can take over rt's value if it's no longer needed // NOTE: can't trust T in cf because of the flush @@ -2058,13 +2058,13 @@ void CPU::NewRec::RISCV64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize FreeHostReg(addr.Index()); rvAsm->ANDI(RARG1, addr, ~0x3u); - GenerateStore(RARG1, value, MemoryAccessSize::Word); + GenerateStore(RARG1, value, MemoryAccessSize::Word, use_fastmem); } -void CPU::NewRec::RISCV64Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, +void CPU::NewRec::RISCV64Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) { - FlushForLoadStore(address, true); + FlushForLoadStore(address, true, use_fastmem); const u32 index = static_cast(inst->r.rt.GetValue()); const auto [ptr, action] = GetGTERegisterPointer(index, false); @@ -2094,7 +2094,7 @@ void CPU::NewRec::RISCV64Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSiz } const GPR addr = ComputeLoadStoreAddressArg(cf, address); - GenerateStore(addr, RARG2, size); + GenerateStore(addr, RARG2, size, use_fastmem); } void CPU::NewRec::RISCV64Compiler::Compile_mtc0(CompileFlags cf) diff --git a/src/core/cpu_newrec_compiler_riscv64.h b/src/core/cpu_newrec_compiler_riscv64.h index 96a265e33..a91af4745 100644 --- a/src/core/cpu_newrec_compiler_riscv64.h +++ b/src/core/cpu_newrec_compiler_riscv64.h @@ -88,19 +88,21 @@ protected: biscuit::GPR ComputeLoadStoreAddressArg(CompileFlags cf, const std::optional& address, const std::optional& reg = std::nullopt); template - void GenerateLoad(const biscuit::GPR& addr_reg, MemoryAccessSize size, bool sign, const RegAllocFn& dst_reg_alloc); - void GenerateStore(const biscuit::GPR& addr_reg, const biscuit::GPR& value_reg, MemoryAccessSize size); - void Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, + void GenerateLoad(const biscuit::GPR& addr_reg, MemoryAccessSize size, bool sign, bool use_fastmem, + const RegAllocFn& dst_reg_alloc); + void GenerateStore(const biscuit::GPR& addr_reg, const biscuit::GPR& value_reg, MemoryAccessSize size, + bool use_fastmem); + void Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) override; - void Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, + void Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) override; - void Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, + void Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) override; - void Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, + void Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) override; - void Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, + void Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) override; - void Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, + void Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) override; void TestInterrupts(const biscuit::GPR& sr); diff --git a/src/core/cpu_newrec_compiler_x64.cpp b/src/core/cpu_newrec_compiler_x64.cpp index 07913c14e..fde0d9b7a 100644 --- a/src/core/cpu_newrec_compiler_x64.cpp +++ b/src/core/cpu_newrec_compiler_x64.cpp @@ -1239,10 +1239,9 @@ CPU::NewRec::X64Compiler::ComputeLoadStoreAddressArg(CompileFlags cf, template Xbyak::Reg32 CPU::NewRec::X64Compiler::GenerateLoad(const Xbyak::Reg32& addr_reg, MemoryAccessSize size, bool sign, - const RegAllocFn& dst_reg_alloc) + bool use_fastmem, const RegAllocFn& dst_reg_alloc) { - const bool checked = g_settings.cpu_recompiler_memory_exceptions; - if (CodeCache::IsUsingFastmem() && !checked) + if (use_fastmem) { m_cycles += Bus::RAM_READ_TICKS; @@ -1296,6 +1295,7 @@ Xbyak::Reg32 CPU::NewRec::X64Compiler::GenerateLoad(const Xbyak::Reg32& addr_reg if (addr_reg != RWARG1) cg->mov(RWARG1, addr_reg); + const bool checked = g_settings.cpu_recompiler_memory_exceptions; switch (size) { case MemoryAccessSize::Byte: @@ -1370,10 +1370,9 @@ Xbyak::Reg32 CPU::NewRec::X64Compiler::GenerateLoad(const Xbyak::Reg32& addr_reg } void CPU::NewRec::X64Compiler::GenerateStore(const Xbyak::Reg32& addr_reg, const Xbyak::Reg32& value_reg, - MemoryAccessSize size) + MemoryAccessSize size, bool use_fastmem) { - const bool checked = g_settings.cpu_recompiler_memory_exceptions; - if (CodeCache::IsUsingFastmem() && !checked) + if (use_fastmem) { if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) { @@ -1417,6 +1416,7 @@ void CPU::NewRec::X64Compiler::GenerateStore(const Xbyak::Reg32& addr_reg, const if (value_reg != RWARG2) cg->mov(RWARG2, value_reg); + const bool checked = g_settings.cpu_recompiler_memory_exceptions; switch (size) { case MemoryAccessSize::Byte: @@ -1466,16 +1466,16 @@ void CPU::NewRec::X64Compiler::GenerateStore(const Xbyak::Reg32& addr_reg, const } } -void CPU::NewRec::X64Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, +void CPU::NewRec::X64Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) { const std::optional addr_reg = g_settings.gpu_pgxp_enable ? std::optional(Reg32(AllocateTempHostReg(HR_CALLEE_SAVED))) : std::optional(); - FlushForLoadStore(address, false); + FlushForLoadStore(address, false, use_fastmem); const Reg32 addr = ComputeLoadStoreAddressArg(cf, address, addr_reg); - const Reg32 data = GenerateLoad(addr, size, sign, [this, cf]() { + const Reg32 data = GenerateLoad(addr, size, sign, use_fastmem, [this, cf]() { if (cf.MipsT() == Reg::zero) return RWRET; @@ -1495,11 +1495,11 @@ void CPU::NewRec::X64Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize siz } } -void CPU::NewRec::X64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, +void CPU::NewRec::X64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) { DebugAssert(size == MemoryAccessSize::Word && !sign); - FlushForLoadStore(address, false); + FlushForLoadStore(address, false, use_fastmem); // TODO: if address is constant, this can be simplified.. @@ -1512,7 +1512,7 @@ void CPU::NewRec::X64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize siz ComputeLoadStoreAddressArg(cf, address, addr); cg->mov(RWARG1, addr); cg->and_(RWARG1, ~0x3u); - GenerateLoad(RWARG1, MemoryAccessSize::Word, false, []() { return RWRET; }); + GenerateLoad(RWARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RWRET; }); if (inst->r.rt == Reg::zero) { @@ -1586,15 +1586,15 @@ void CPU::NewRec::X64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize siz FreeHostReg(addr.getIdx()); } -void CPU::NewRec::X64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, +void CPU::NewRec::X64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) { const std::optional addr_reg = g_settings.gpu_pgxp_enable ? std::optional(Reg32(AllocateTempHostReg(HR_CALLEE_SAVED))) : std::optional(); - FlushForLoadStore(address, false); + FlushForLoadStore(address, false, use_fastmem); const Reg32 addr = ComputeLoadStoreAddressArg(cf, address, addr_reg); - GenerateLoad(addr, MemoryAccessSize::Word, false, []() { return RWRET; }); + GenerateLoad(addr, MemoryAccessSize::Word, false, use_fastmem, []() { return RWRET; }); const u32 index = static_cast(inst->r.rt.GetValue()); const auto [ptr, action] = GetGTERegisterPointer(index, true); @@ -1666,19 +1666,19 @@ void CPU::NewRec::X64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize si } } -void CPU::NewRec::X64Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, +void CPU::NewRec::X64Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) { const std::optional addr_reg = g_settings.gpu_pgxp_enable ? std::optional(Reg32(AllocateTempHostReg(HR_CALLEE_SAVED))) : std::optional(); - FlushForLoadStore(address, true); + FlushForLoadStore(address, true, use_fastmem); const Reg32 addr = ComputeLoadStoreAddressArg(cf, address, addr_reg); const Reg32 data = cf.valid_host_t ? CFGetRegT(cf) : RWARG2; if (!cf.valid_host_t) MoveTToReg(RWARG2, cf); - GenerateStore(addr, data, size); + GenerateStore(addr, data, size, use_fastmem); if (g_settings.gpu_pgxp_enable) { @@ -1691,11 +1691,11 @@ void CPU::NewRec::X64Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize siz } } -void CPU::NewRec::X64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, +void CPU::NewRec::X64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) { DebugAssert(size == MemoryAccessSize::Word && !sign); - FlushForLoadStore(address, true); + FlushForLoadStore(address, true, use_fastmem); // TODO: if address is constant, this can be simplified.. // We'd need to be careful here if we weren't overwriting it.. @@ -1703,7 +1703,7 @@ void CPU::NewRec::X64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize siz ComputeLoadStoreAddressArg(cf, address, addr); cg->mov(RWARG1, addr); cg->and_(RWARG1, ~0x3u); - GenerateLoad(RWARG1, MemoryAccessSize::Word, false, []() { return RWRET; }); + GenerateLoad(RWARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RWRET; }); // TODO: this can take over rt's value if it's no longer needed // NOTE: can't trust T in cf because of the flush @@ -1755,10 +1755,10 @@ void CPU::NewRec::X64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize siz cg->mov(RWARG1, addr); cg->and_(RWARG1, ~0x3u); - GenerateStore(RWARG1, value, MemoryAccessSize::Word); + GenerateStore(RWARG1, value, MemoryAccessSize::Word, use_fastmem); } -void CPU::NewRec::X64Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, +void CPU::NewRec::X64Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) { const u32 index = static_cast(inst->r.rt.GetValue()); @@ -1791,19 +1791,19 @@ void CPU::NewRec::X64Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSize si // PGXP makes this a giant pain. if (!g_settings.gpu_pgxp_enable) { - FlushForLoadStore(address, true); + FlushForLoadStore(address, true, use_fastmem); const Reg32 addr = ComputeLoadStoreAddressArg(cf, address); - GenerateStore(addr, RWARG2, size); + GenerateStore(addr, RWARG2, size, use_fastmem); return; } // TODO: This can be simplified because we don't need to validate in PGXP.. const Reg32 addr_reg = Reg32(AllocateTempHostReg(HR_CALLEE_SAVED)); const Reg32 data_backup = Reg32(AllocateTempHostReg(HR_CALLEE_SAVED)); - FlushForLoadStore(address, true); + FlushForLoadStore(address, true, use_fastmem); ComputeLoadStoreAddressArg(cf, address, addr_reg); cg->mov(data_backup, RWARG2); - GenerateStore(addr_reg, RWARG2, size); + GenerateStore(addr_reg, RWARG2, size, use_fastmem); Flush(FLUSH_FOR_C_CALL); cg->mov(RWARG3, data_backup); @@ -2066,9 +2066,9 @@ void CPU::NewRec::X64Compiler::Compile_cop2(CompileFlags cf) } u32 CPU::NewRec::CompileLoadStoreThunk(void* thunk_code, u32 thunk_space, void* code_address, u32 code_size, - TickCount cycles_to_add, TickCount cycles_to_remove, u32 gpr_bitmask, - u8 address_register, u8 data_register, MemoryAccessSize size, bool is_signed, - bool is_load) + TickCount cycles_to_add, TickCount cycles_to_remove, u32 gpr_bitmask, + u8 address_register, u8 data_register, MemoryAccessSize size, bool is_signed, + bool is_load) { CodeGenerator acg(thunk_space, thunk_code); CodeGenerator* cg = &acg; diff --git a/src/core/cpu_newrec_compiler_x64.h b/src/core/cpu_newrec_compiler_x64.h index e9af43398..e99ba1993 100644 --- a/src/core/cpu_newrec_compiler_x64.h +++ b/src/core/cpu_newrec_compiler_x64.h @@ -87,20 +87,21 @@ protected: Xbyak::Reg32 ComputeLoadStoreAddressArg(CompileFlags cf, const std::optional& address, const std::optional& reg = std::nullopt); template - Xbyak::Reg32 GenerateLoad(const Xbyak::Reg32& addr_reg, MemoryAccessSize size, bool sign, + Xbyak::Reg32 GenerateLoad(const Xbyak::Reg32& addr_reg, MemoryAccessSize size, bool sign, bool use_fastmem, const RegAllocFn& dst_reg_alloc); - void GenerateStore(const Xbyak::Reg32& addr_reg, const Xbyak::Reg32& value_reg, MemoryAccessSize size); - void Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, + void GenerateStore(const Xbyak::Reg32& addr_reg, const Xbyak::Reg32& value_reg, MemoryAccessSize size, + bool use_fastmem); + void Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) override; - void Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, + void Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) override; - void Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, + void Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) override; - void Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, + void Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) override; - void Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, + void Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) override; - void Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, + void Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, const std::optional& address) override; void TestInterrupts(const Xbyak::Reg32& sr);