// SPDX-FileCopyrightText: 2023 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #include "cpu_newrec_compiler.h" #include "common/assert.h" #include "common/log.h" #include "common/small_string.h" #include "cpu_code_cache.h" #include "cpu_core_private.h" #include "cpu_disasm.h" #include "cpu_pgxp.h" #include "settings.h" #include #include Log_SetChannel(NewRec::Compiler); // TODO: direct link skip delay slot check // TODO: speculative constants // TODO: std::bitset in msvc has bounds checks even in release... const std::array, 3> CPU::NewRec::Compiler::s_pgxp_mem_load_functions = { {{{reinterpret_cast(&PGXP::CPU_LBx), reinterpret_cast(&PGXP::CPU_LBx)}}, {{reinterpret_cast(&PGXP::CPU_LHU), reinterpret_cast(&PGXP::CPU_LH)}}, {{reinterpret_cast(&PGXP::CPU_LW)}}}}; const std::array CPU::NewRec::Compiler::s_pgxp_mem_store_functions = { {reinterpret_cast(&PGXP::CPU_SB), reinterpret_cast(&PGXP::CPU_SH), reinterpret_cast(&PGXP::CPU_SW)}}; CPU::NewRec::Compiler::Compiler() = default; CPU::NewRec::Compiler::~Compiler() = default; void CPU::NewRec::Compiler::Reset(CodeCache::Block* block, u8* code_buffer, u32 code_buffer_space, u8* far_code_buffer, u32 far_code_space) { m_block = block; m_compiler_pc = block->pc; m_cycles = 0; m_gte_done_cycle = 0; inst = nullptr; iinfo = nullptr; m_current_instruction_pc = 0; m_current_instruction_branch_delay_slot = false; m_dirty_pc = false; m_dirty_instruction_bits = false; m_dirty_gte_done_cycle = true; m_block_ended = false; m_constant_reg_values.fill(0); m_constant_regs_valid.reset(); m_constant_regs_dirty.reset(); for (u32 i = 0; i < NUM_HOST_REGS; i++) ClearHostReg(i); m_register_alloc_counter = 0; m_constant_reg_values[static_cast(Reg::zero)] = 0; m_constant_regs_valid.set(static_cast(Reg::zero)); m_load_delay_dirty = EMULATE_LOAD_DELAYS; m_load_delay_register = Reg::count; m_load_delay_value_register = NUM_HOST_REGS; InitSpeculativeRegs(); } void CPU::NewRec::Compiler::BeginBlock() { #if 0 GenerateCall(reinterpret_cast(&CPU::CodeCache::LogCurrentState)); #endif if (m_block->protection == CodeCache::PageProtectionMode::ManualCheck) { Log_DebugPrintf("Generate manual protection for PC %08X", m_block->pc); const u8* ram_ptr = Bus::g_ram + VirtualAddressToPhysical(m_block->pc); const u8* shadow_ptr = reinterpret_cast(m_block->Instructions()); GenerateBlockProtectCheck(ram_ptr, shadow_ptr, m_block->size * sizeof(Instruction)); } if (m_block->uncached_fetch_ticks > 0 || m_block->icache_line_count > 0) GenerateICacheCheckAndUpdate(); if (g_settings.bios_tty_logging) { if (m_block->pc == 0xa0) GenerateCall(reinterpret_cast(&CPU::HandleA0Syscall)); else if (m_block->pc == 0xb0) GenerateCall(reinterpret_cast(&CPU::HandleB0Syscall)); } inst = m_block->Instructions(); iinfo = m_block->InstructionsInfo(); m_current_instruction_pc = m_block->pc; m_current_instruction_branch_delay_slot = false; m_compiler_pc += sizeof(Instruction); m_dirty_pc = true; m_dirty_instruction_bits = true; } const void* CPU::NewRec::Compiler::CompileBlock(CodeCache::Block* block, u32* host_code_size, u32* host_far_code_size) { JitCodeBuffer& buffer = CodeCache::GetCodeBuffer(); Reset(block, buffer.GetFreeCodePointer(), buffer.GetFreeCodeSpace(), buffer.GetFreeFarCodePointer(), buffer.GetFreeFarCodeSpace()); Log_DebugPrintf("Block range: %08X -> %08X", block->pc, block->pc + block->size * 4); BeginBlock(); for (;;) { CompileInstruction(); if (m_block_ended || iinfo->is_last_instruction) { if (!m_block_ended) { // Block was truncated. Link it. EndBlock(m_compiler_pc, false); } break; } inst++; iinfo++; m_current_instruction_pc += sizeof(Instruction); m_compiler_pc += sizeof(Instruction); m_dirty_pc = true; m_dirty_instruction_bits = true; } // Nothing should be valid anymore for (u32 i = 0; i < NUM_HOST_REGS; i++) DebugAssert(!IsHostRegAllocated(i)); for (u32 i = 1; i < static_cast(Reg::count); i++) DebugAssert(!m_constant_regs_dirty.test(i) && !m_constant_regs_valid.test(i)); m_speculative_constants.memory.clear(); u32 code_size, far_code_size; const void* code = EndCompile(&code_size, &far_code_size); *host_code_size = code_size; *host_far_code_size = far_code_size; buffer.CommitCode(code_size); buffer.CommitFarCode(far_code_size); return code; } void CPU::NewRec::Compiler::SetConstantReg(Reg r, u32 v) { DebugAssert(r < Reg::count && r != Reg::zero); // There might still be an incoming load delay which we need to cancel. CancelLoadDelaysToReg(r); if (m_constant_regs_valid.test(static_cast(r)) && m_constant_reg_values[static_cast(r)] == v) { // Shouldn't be any host regs though. DebugAssert(!CheckHostReg(0, HR_TYPE_CPU_REG, r).has_value()); return; } m_constant_reg_values[static_cast(r)] = v; m_constant_regs_valid.set(static_cast(r)); m_constant_regs_dirty.set(static_cast(r)); if (const std::optional hostreg = CheckHostReg(0, HR_TYPE_CPU_REG, r); hostreg.has_value()) { Log_DebugPrintf("Discarding guest register %s in host register %s due to constant set", GetRegName(r), GetHostRegName(hostreg.value())); FreeHostReg(hostreg.value()); } } void CPU::NewRec::Compiler::CancelLoadDelaysToReg(Reg reg) { if (m_load_delay_register != reg) return; Log_DebugPrintf("Cancelling load delay to %s", GetRegName(reg)); m_load_delay_register = Reg::count; if (m_load_delay_value_register != NUM_HOST_REGS) ClearHostReg(m_load_delay_value_register); } void CPU::NewRec::Compiler::UpdateLoadDelay() { if (m_load_delay_dirty) { // we shouldn't have a static load delay. DebugAssert(!HasLoadDelay()); // have to invalidate registers, we might have one of them cached // TODO: double check the order here, will we trash a new value? we shouldn't... // thankfully since this only happens on the first instruction, we can get away with just killing anything which // isn't in write mode, because nothing could've been written before it, and the new value overwrites any // load-delayed value Log_DebugPrintf("Invalidating non-dirty registers, and flushing load delay from state"); constexpr u32 req_flags = (HR_ALLOCATED | HR_MODE_WRITE); for (u32 i = 0; i < NUM_HOST_REGS; i++) { HostRegAlloc& ra = m_host_regs[i]; if (ra.type != HR_TYPE_CPU_REG || !IsHostRegAllocated(i) || ((ra.flags & req_flags) == req_flags)) continue; Log_DebugPrintf("Freeing non-dirty cached register %s in %s", GetRegName(ra.reg), GetHostRegName(i)); DebugAssert(!(ra.flags & HR_MODE_WRITE)); ClearHostReg(i); } // remove any non-dirty constants too for (u32 i = 1; i < static_cast(Reg::count); i++) { if (!HasConstantReg(static_cast(i)) || HasDirtyConstantReg(static_cast(i))) continue; Log_DebugPrintf("Clearing non-dirty constant %s", GetRegName(static_cast(i))); ClearConstantReg(static_cast(i)); } Flush(FLUSH_LOAD_DELAY_FROM_STATE); } // commit the delayed register load FinishLoadDelay(); // move next load delay forward if (m_next_load_delay_register != Reg::count) { // if it somehow got flushed, read it back in. if (m_next_load_delay_value_register == NUM_HOST_REGS) { AllocateHostReg(HR_MODE_READ, HR_TYPE_NEXT_LOAD_DELAY_VALUE, m_next_load_delay_register); DebugAssert(m_next_load_delay_value_register != NUM_HOST_REGS); } HostRegAlloc& ra = m_host_regs[m_next_load_delay_value_register]; ra.flags |= HR_MODE_WRITE; ra.type = HR_TYPE_LOAD_DELAY_VALUE; m_load_delay_register = m_next_load_delay_register; m_load_delay_value_register = m_next_load_delay_value_register; m_next_load_delay_register = Reg::count; m_next_load_delay_value_register = NUM_HOST_REGS; } } void CPU::NewRec::Compiler::FinishLoadDelay() { DebugAssert(!m_load_delay_dirty); if (!HasLoadDelay()) return; // we may need to reload the value.. if (m_load_delay_value_register == NUM_HOST_REGS) { AllocateHostReg(HR_MODE_READ, HR_TYPE_LOAD_DELAY_VALUE, m_load_delay_register); DebugAssert(m_load_delay_value_register != NUM_HOST_REGS); } // kill any (old) cached value for this register DeleteMIPSReg(m_load_delay_register, false); Log_DebugPrintf("Finished delayed load to %s in host register %s", GetRegName(m_load_delay_register), GetHostRegName(m_load_delay_value_register)); // and swap the mode over so it gets written back later HostRegAlloc& ra = m_host_regs[m_load_delay_value_register]; DebugAssert(ra.reg == m_load_delay_register); ra.flags = (ra.flags & IMMUTABLE_HR_FLAGS) | HR_ALLOCATED | HR_MODE_READ | HR_MODE_WRITE; ra.counter = m_register_alloc_counter++; ra.type = HR_TYPE_CPU_REG; // constants are gone Log_DebugPrintf("Clearing constant in %s due to load delay", GetRegName(m_load_delay_register)); ClearConstantReg(m_load_delay_register); m_load_delay_register = Reg::count; m_load_delay_value_register = NUM_HOST_REGS; } void CPU::NewRec::Compiler::FinishLoadDelayToReg(Reg reg) { if (m_load_delay_dirty) { // inter-block :( UpdateLoadDelay(); return; } if (m_load_delay_register != reg) return; FinishLoadDelay(); } u32 CPU::NewRec::Compiler::GetFlagsForNewLoadDelayedReg() const { return g_settings.gpu_pgxp_enable ? (HR_MODE_WRITE | HR_CALLEE_SAVED) : (HR_MODE_WRITE); } void CPU::NewRec::Compiler::ClearConstantReg(Reg r) { DebugAssert(r < Reg::count && r != Reg::zero); m_constant_reg_values[static_cast(r)] = 0; m_constant_regs_valid.reset(static_cast(r)); m_constant_regs_dirty.reset(static_cast(r)); } void CPU::NewRec::Compiler::FlushConstantRegs(bool invalidate) { for (u32 i = 1; i < static_cast(Reg::count); i++) { if (m_constant_regs_dirty.test(static_cast(i))) FlushConstantReg(static_cast(i)); if (invalidate) ClearConstantReg(static_cast(i)); } } CPU::Reg CPU::NewRec::Compiler::MipsD() const { return inst->r.rd; } u32 CPU::NewRec::Compiler::GetConditionalBranchTarget(CompileFlags cf) const { // compiler pc has already been advanced when swapping branch delay slots const u32 current_pc = m_compiler_pc - (cf.delay_slot_swapped ? sizeof(Instruction) : 0); return current_pc + (inst->i.imm_sext32() << 2); } u32 CPU::NewRec::Compiler::GetBranchReturnAddress(CompileFlags cf) const { // compiler pc has already been advanced when swapping branch delay slots return m_compiler_pc + (cf.delay_slot_swapped ? 0 : sizeof(Instruction)); } bool CPU::NewRec::Compiler::TrySwapDelaySlot(Reg rs, Reg rt, Reg rd) { if constexpr (!SWAP_BRANCH_DELAY_SLOTS) return false; const Instruction* next_instruction = inst + 1; DebugAssert(next_instruction < (m_block->Instructions() + m_block->size)); const Reg opcode_rs = next_instruction->r.rs; const Reg opcode_rt = next_instruction->r.rt; const Reg opcode_rd = next_instruction->r.rd; #ifdef _DEBUG TinyString disasm; DisassembleInstruction(&disasm, m_current_instruction_pc + 4, next_instruction->bits); #endif // Just in case we read it in the instruction.. but the block should end after this. const Instruction* const backup_instruction = inst; const u32 backup_instruction_pc = m_current_instruction_pc; const bool backup_instruction_delay_slot = m_current_instruction_branch_delay_slot; if (next_instruction->bits == 0) { // nop goto is_safe; } // can't swap when the branch is the first instruction because of bloody load delays if ((EMULATE_LOAD_DELAYS && m_block->pc == m_current_instruction_pc) || m_load_delay_dirty || (HasLoadDelay() && (m_load_delay_register == rs || m_load_delay_register == rt || m_load_delay_register == rd))) { goto is_unsafe; } switch (next_instruction->op) { case InstructionOp::addi: case InstructionOp::addiu: case InstructionOp::slti: case InstructionOp::sltiu: case InstructionOp::andi: case InstructionOp::ori: case InstructionOp::xori: case InstructionOp::lui: case InstructionOp::lb: case InstructionOp::lh: case InstructionOp::lwl: case InstructionOp::lw: case InstructionOp::lbu: case InstructionOp::lhu: case InstructionOp::lwr: case InstructionOp::sb: case InstructionOp::sh: case InstructionOp::swl: case InstructionOp::sw: case InstructionOp::swr: { if ((rs != Reg::zero && rs == opcode_rt) || (rt != Reg::zero && rt == opcode_rt) || (rd != Reg::zero && (rd == opcode_rs || rd == opcode_rt)) || (HasLoadDelay() && (m_load_delay_register == opcode_rs || m_load_delay_register == opcode_rt))) { goto is_unsafe; } } break; case InstructionOp::lwc2: // LWC2 case InstructionOp::swc2: // SWC2 break; case InstructionOp::funct: // SPECIAL { switch (next_instruction->r.funct) { case InstructionFunct::sll: case InstructionFunct::srl: case InstructionFunct::sra: case InstructionFunct::sllv: case InstructionFunct::srlv: case InstructionFunct::srav: case InstructionFunct::add: case InstructionFunct::addu: case InstructionFunct::sub: case InstructionFunct::subu: case InstructionFunct::and_: case InstructionFunct::or_: case InstructionFunct::xor_: case InstructionFunct::nor: case InstructionFunct::slt: case InstructionFunct::sltu: { if ((rs != Reg::zero && rs == opcode_rd) || (rt != Reg::zero && rt == opcode_rd) || (rd != Reg::zero && (rd == opcode_rs || rd == opcode_rt)) || (HasLoadDelay() && (m_load_delay_register == opcode_rs || m_load_delay_register == opcode_rt || m_load_delay_register == opcode_rd))) { goto is_unsafe; } } break; case InstructionFunct::mult: case InstructionFunct::multu: case InstructionFunct::div: case InstructionFunct::divu: { if (HasLoadDelay() && (m_load_delay_register == opcode_rs || m_load_delay_register == opcode_rt)) goto is_unsafe; } break; default: goto is_unsafe; } } break; case InstructionOp::cop0: // COP0 case InstructionOp::cop1: // COP1 case InstructionOp::cop2: // COP2 case InstructionOp::cop3: // COP3 { if (next_instruction->cop.IsCommonInstruction()) { switch (next_instruction->cop.CommonOp()) { case CopCommonInstruction::mfcn: // MFC0 case CopCommonInstruction::cfcn: // CFC0 { if ((rs != Reg::zero && rs == opcode_rt) || (rt != Reg::zero && rt == opcode_rt) || (rd != Reg::zero && rd == opcode_rt) || (HasLoadDelay() && m_load_delay_register == opcode_rt)) { goto is_unsafe; } } break; case CopCommonInstruction::mtcn: // MTC0 case CopCommonInstruction::ctcn: // CTC0 break; } } else { // swap when it's GTE if (next_instruction->op != InstructionOp::cop2) goto is_unsafe; } } break; default: goto is_unsafe; } is_safe: #ifdef _DEBUG Log_DebugFmt("Swapping delay slot {:08X} {}", m_current_instruction_pc + 4, disasm); #endif CompileBranchDelaySlot(); inst = backup_instruction; m_current_instruction_pc = backup_instruction_pc; m_current_instruction_branch_delay_slot = backup_instruction_delay_slot; return true; is_unsafe: #ifdef _DEBUG Log_DebugFmt("NOT swapping delay slot {:08X} {}", m_current_instruction_pc + 4, disasm); #endif return false; } void CPU::NewRec::Compiler::SetCompilerPC(u32 newpc) { m_compiler_pc = newpc; m_dirty_pc = true; } u32 CPU::NewRec::Compiler::GetFreeHostReg(u32 flags) { const u32 req_flags = HR_USABLE | (flags & HR_CALLEE_SAVED); u32 fallback = NUM_HOST_REGS; for (u32 i = 0; i < NUM_HOST_REGS; i++) { if ((m_host_regs[i].flags & (req_flags | HR_NEEDED | HR_ALLOCATED)) == req_flags) { // Prefer callee-saved registers. if (m_host_regs[i].flags & HR_CALLEE_SAVED) return i; else if (fallback == NUM_HOST_REGS) fallback = i; } } if (fallback != NUM_HOST_REGS) return fallback; // find register with lowest counter u32 lowest = NUM_HOST_REGS; u16 lowest_count = std::numeric_limits::max(); for (u32 i = 0; i < NUM_HOST_REGS; i++) { const HostRegAlloc& ra = m_host_regs[i]; if ((ra.flags & (req_flags | HR_NEEDED)) != req_flags) continue; DebugAssert(ra.flags & HR_ALLOCATED); if (ra.type == HR_TYPE_TEMP) { // can't punt temps continue; } if (ra.counter < lowest_count) { lowest = i; lowest_count = ra.counter; } } // AssertMsg(lowest != NUM_HOST_REGS, "Register allocation failed."); const HostRegAlloc& ra = m_host_regs[lowest]; switch (ra.type) { case HR_TYPE_CPU_REG: { // If the register is needed later, and we're allocating a callee-saved register, try moving it to a caller-saved // register. if (iinfo->UsedTest(ra.reg) && flags & HR_CALLEE_SAVED) { u32 caller_saved_lowest = NUM_HOST_REGS; u16 caller_saved_lowest_count = std::numeric_limits::max(); for (u32 i = 0; i < NUM_HOST_REGS; i++) { constexpr u32 caller_req_flags = HR_USABLE; constexpr u32 caller_req_mask = HR_USABLE | HR_NEEDED | HR_CALLEE_SAVED; const HostRegAlloc& caller_ra = m_host_regs[i]; if ((caller_ra.flags & caller_req_mask) != caller_req_flags) continue; if (!(caller_ra.flags & HR_ALLOCATED)) { caller_saved_lowest = i; caller_saved_lowest_count = 0; break; } if (caller_ra.type == HR_TYPE_TEMP) continue; if (caller_ra.counter < caller_saved_lowest_count) { caller_saved_lowest = i; caller_saved_lowest_count = caller_ra.counter; } } if (caller_saved_lowest_count < lowest_count) { Log_DebugPrintf("Moving caller-saved host register %s with MIPS register %s to %s for allocation", GetHostRegName(lowest), GetRegName(ra.reg), GetHostRegName(caller_saved_lowest)); if (IsHostRegAllocated(caller_saved_lowest)) FreeHostReg(caller_saved_lowest); CopyHostReg(caller_saved_lowest, lowest); SwapHostRegAlloc(caller_saved_lowest, lowest); DebugAssert(!IsHostRegAllocated(lowest)); return lowest; } } Log_DebugPrintf("Freeing register %s in host register %s for allocation", GetRegName(ra.reg), GetHostRegName(lowest)); } break; case HR_TYPE_LOAD_DELAY_VALUE: { Log_DebugPrintf("Freeing load delay register %s in host register %s for allocation", GetHostRegName(lowest), GetRegName(ra.reg)); } break; case HR_TYPE_NEXT_LOAD_DELAY_VALUE: { Log_DebugPrintf("Freeing next load delay register %s in host register %s due for allocation", GetRegName(ra.reg), GetHostRegName(lowest)); } break; default: { Panic("Unknown type freed"); } break; } FreeHostReg(lowest); return lowest; } const char* CPU::NewRec::Compiler::GetReadWriteModeString(u32 flags) { if ((flags & (HR_MODE_READ | HR_MODE_WRITE)) == (HR_MODE_READ | HR_MODE_WRITE)) return "read-write"; else if (flags & HR_MODE_READ) return "read-only"; else if (flags & HR_MODE_WRITE) return "write-only"; else return "UNKNOWN"; } u32 CPU::NewRec::Compiler::AllocateHostReg(u32 flags, HostRegAllocType type /* = HR_TYPE_TEMP */, Reg reg /* = Reg::count */) { // Cancel any load delays before booting anything out if (flags & HR_MODE_WRITE && (type == HR_TYPE_CPU_REG || type == HR_TYPE_NEXT_LOAD_DELAY_VALUE)) CancelLoadDelaysToReg(reg); // Already have a matching type? if (type != HR_TYPE_TEMP) { const std::optional check_reg = CheckHostReg(flags, type, reg); // shouldn't be allocating >1 load delay in a single instruction.. // TODO: prefer callee saved registers for load delay DebugAssert((type != HR_TYPE_LOAD_DELAY_VALUE && type != HR_TYPE_NEXT_LOAD_DELAY_VALUE) || !check_reg.has_value()); if (check_reg.has_value()) return check_reg.value(); } const u32 hreg = GetFreeHostReg(flags); HostRegAlloc& ra = m_host_regs[hreg]; ra.flags = (ra.flags & IMMUTABLE_HR_FLAGS) | (flags & ALLOWED_HR_FLAGS) | HR_ALLOCATED | HR_NEEDED; ra.type = type; ra.reg = reg; ra.counter = m_register_alloc_counter++; switch (type) { case HR_TYPE_CPU_REG: { DebugAssert(reg != Reg::zero); Log_DebugPrintf("Allocate host reg %s to guest reg %s in %s mode", GetHostRegName(hreg), GetRegName(reg), GetReadWriteModeString(flags)); if (flags & HR_MODE_READ) { DebugAssert(ra.reg > Reg::zero && ra.reg < Reg::count); if (HasConstantReg(reg)) { // may as well flush it now Log_DebugPrintf("Flush constant register in guest reg %s to host reg %s", GetRegName(reg), GetHostRegName(hreg)); LoadHostRegWithConstant(hreg, GetConstantRegU32(reg)); m_constant_regs_dirty.reset(static_cast(reg)); ra.flags |= HR_MODE_WRITE; } else { LoadHostRegFromCPUPointer(hreg, &g_state.regs.r[static_cast(reg)]); } } if (flags & HR_MODE_WRITE && HasConstantReg(reg)) { DebugAssert(reg != Reg::zero); Log_DebugPrintf("Clearing constant register in guest reg %s due to write mode in %s", GetRegName(reg), GetHostRegName(hreg)); ClearConstantReg(reg); } } break; case HR_TYPE_LOAD_DELAY_VALUE: { DebugAssert(!m_load_delay_dirty && (!HasLoadDelay() || !(flags & HR_MODE_WRITE))); Log_DebugPrintf("Allocating load delayed guest register %s in host reg %s in %s mode", GetRegName(reg), GetHostRegName(hreg), GetReadWriteModeString(flags)); m_load_delay_register = reg; m_load_delay_value_register = hreg; if (flags & HR_MODE_READ) LoadHostRegFromCPUPointer(hreg, &g_state.load_delay_value); } break; case HR_TYPE_NEXT_LOAD_DELAY_VALUE: { Log_DebugPrintf("Allocating next load delayed guest register %s in host reg %s in %s mode", GetRegName(reg), GetHostRegName(hreg), GetReadWriteModeString(flags)); m_next_load_delay_register = reg; m_next_load_delay_value_register = hreg; if (flags & HR_MODE_READ) LoadHostRegFromCPUPointer(hreg, &g_state.next_load_delay_value); } break; case HR_TYPE_TEMP: { DebugAssert(!(flags & (HR_MODE_READ | HR_MODE_WRITE))); Log_DebugPrintf("Allocate host reg %s as temporary", GetHostRegName(hreg)); } break; default: Panic("Unknown type"); break; } return hreg; } std::optional CPU::NewRec::Compiler::CheckHostReg(u32 flags, HostRegAllocType type /* = HR_TYPE_TEMP */, Reg reg /* = Reg::count */) { for (u32 i = 0; i < NUM_HOST_REGS; i++) { HostRegAlloc& ra = m_host_regs[i]; if (!(ra.flags & HR_ALLOCATED) || ra.type != type || ra.reg != reg) continue; DebugAssert(ra.flags & HR_MODE_READ); if (flags & HR_MODE_WRITE) { DebugAssert(type == HR_TYPE_CPU_REG); if (!(ra.flags & HR_MODE_WRITE)) { Log_DebugPrintf("Switch guest reg %s from read to read-write in host reg %s", GetRegName(reg), GetHostRegName(i)); } if (HasConstantReg(reg)) { DebugAssert(reg != Reg::zero); Log_DebugPrintf("Clearing constant register in guest reg %s due to write mode in %s", GetRegName(reg), GetHostRegName(i)); ClearConstantReg(reg); } } ra.flags |= (flags & ALLOWED_HR_FLAGS) | HR_NEEDED; ra.counter = m_register_alloc_counter++; // Need a callee saved reg? if (flags & HR_CALLEE_SAVED && !(ra.flags & HR_CALLEE_SAVED)) { // Need to move it to one which is const u32 new_reg = GetFreeHostReg(HR_CALLEE_SAVED); Log_DebugPrintf("Rename host reg %s to %s for callee saved", GetHostRegName(i), GetHostRegName(new_reg)); CopyHostReg(new_reg, i); SwapHostRegAlloc(i, new_reg); DebugAssert(!IsHostRegAllocated(i)); return new_reg; } return i; } return std::nullopt; } u32 CPU::NewRec::Compiler::AllocateTempHostReg(u32 flags) { return AllocateHostReg(flags, HR_TYPE_TEMP); } void CPU::NewRec::Compiler::SwapHostRegAlloc(u32 lhs, u32 rhs) { HostRegAlloc& lra = m_host_regs[lhs]; HostRegAlloc& rra = m_host_regs[rhs]; const u8 lra_flags = lra.flags; lra.flags = (lra.flags & IMMUTABLE_HR_FLAGS) | (rra.flags & ~IMMUTABLE_HR_FLAGS); rra.flags = (rra.flags & IMMUTABLE_HR_FLAGS) | (lra_flags & ~IMMUTABLE_HR_FLAGS); std::swap(lra.type, rra.type); std::swap(lra.reg, rra.reg); std::swap(lra.counter, rra.counter); } void CPU::NewRec::Compiler::FlushHostReg(u32 reg) { HostRegAlloc& ra = m_host_regs[reg]; if (ra.flags & HR_MODE_WRITE) { switch (ra.type) { case HR_TYPE_CPU_REG: { DebugAssert(ra.reg > Reg::zero && ra.reg < Reg::count); Log_DebugPrintf("Flushing register %s in host register %s to state", GetRegName(ra.reg), GetHostRegName(reg)); StoreHostRegToCPUPointer(reg, &g_state.regs.r[static_cast(ra.reg)]); } break; case HR_TYPE_LOAD_DELAY_VALUE: { DebugAssert(m_load_delay_value_register == reg); Log_DebugPrintf("Flushing load delayed register %s in host register %s to state", GetRegName(ra.reg), GetHostRegName(reg)); StoreHostRegToCPUPointer(reg, &g_state.load_delay_value); m_load_delay_value_register = NUM_HOST_REGS; } break; case HR_TYPE_NEXT_LOAD_DELAY_VALUE: { DebugAssert(m_next_load_delay_value_register == reg); Log_WarningPrintf("Flushing NEXT load delayed register %s in host register %s to state", GetRegName(ra.reg), GetHostRegName(reg)); StoreHostRegToCPUPointer(reg, &g_state.next_load_delay_value); m_next_load_delay_value_register = NUM_HOST_REGS; } break; default: break; } ra.flags = (ra.flags & ~HR_MODE_WRITE) | HR_MODE_READ; } } void CPU::NewRec::Compiler::FreeHostReg(u32 reg) { DebugAssert(IsHostRegAllocated(reg)); Log_DebugPrintf("Freeing host register %s", GetHostRegName(reg)); FlushHostReg(reg); ClearHostReg(reg); } void CPU::NewRec::Compiler::ClearHostReg(u32 reg) { HostRegAlloc& ra = m_host_regs[reg]; ra.flags &= IMMUTABLE_HR_FLAGS; ra.type = HR_TYPE_TEMP; ra.counter = 0; ra.reg = Reg::count; } void CPU::NewRec::Compiler::MarkRegsNeeded(HostRegAllocType type, Reg reg) { for (u32 i = 0; i < NUM_HOST_REGS; i++) { HostRegAlloc& ra = m_host_regs[i]; if (ra.flags & HR_ALLOCATED && ra.type == type && ra.reg == reg) ra.flags |= HR_NEEDED; } } void CPU::NewRec::Compiler::RenameHostReg(u32 reg, u32 new_flags, HostRegAllocType new_type, Reg new_reg) { // only supported for cpu regs for now DebugAssert(new_type == HR_TYPE_TEMP || new_type == HR_TYPE_CPU_REG || new_type == HR_TYPE_NEXT_LOAD_DELAY_VALUE); const std::optional old_reg = CheckHostReg(0, new_type, new_reg); if (old_reg.has_value()) { // don't writeback ClearHostReg(old_reg.value()); } // kill any load delay to this reg if (new_type == HR_TYPE_CPU_REG || new_type == HR_TYPE_NEXT_LOAD_DELAY_VALUE) CancelLoadDelaysToReg(new_reg); if (new_type == HR_TYPE_CPU_REG) { Log_DebugPrintf("Renaming host reg %s to guest reg %s", GetHostRegName(reg), GetRegName(new_reg)); } else if (new_type == HR_TYPE_NEXT_LOAD_DELAY_VALUE) { Log_DebugPrintf("Renaming host reg %s to load delayed guest reg %s", GetHostRegName(reg), GetRegName(new_reg)); DebugAssert(m_next_load_delay_register == Reg::count && m_next_load_delay_value_register == NUM_HOST_REGS); m_next_load_delay_register = new_reg; m_next_load_delay_value_register = reg; } else { Log_DebugPrintf("Renaming host reg %s to temp", GetHostRegName(reg)); } HostRegAlloc& ra = m_host_regs[reg]; ra.flags = (ra.flags & IMMUTABLE_HR_FLAGS) | HR_NEEDED | HR_ALLOCATED | (new_flags & ALLOWED_HR_FLAGS); ra.counter = m_register_alloc_counter++; ra.type = new_type; ra.reg = new_reg; } void CPU::NewRec::Compiler::ClearHostRegNeeded(u32 reg) { DebugAssert(reg < NUM_HOST_REGS && IsHostRegAllocated(reg)); HostRegAlloc& ra = m_host_regs[reg]; if (ra.flags & HR_MODE_WRITE) ra.flags |= HR_MODE_READ; ra.flags &= ~HR_NEEDED; } void CPU::NewRec::Compiler::ClearHostRegsNeeded() { for (u32 i = 0; i < NUM_HOST_REGS; i++) { HostRegAlloc& ra = m_host_regs[i]; if (!(ra.flags & HR_ALLOCATED)) continue; // shouldn't have any temps left DebugAssert(ra.type != HR_TYPE_TEMP); if (ra.flags & HR_MODE_WRITE) ra.flags |= HR_MODE_READ; ra.flags &= ~HR_NEEDED; } } void CPU::NewRec::Compiler::DeleteMIPSReg(Reg reg, bool flush) { DebugAssert(reg != Reg::zero); for (u32 i = 0; i < NUM_HOST_REGS; i++) { HostRegAlloc& ra = m_host_regs[i]; if (ra.flags & HR_ALLOCATED && ra.type == HR_TYPE_CPU_REG && ra.reg == reg) { if (flush) FlushHostReg(i); ClearHostReg(i); ClearConstantReg(reg); return; } } if (flush) FlushConstantReg(reg); ClearConstantReg(reg); } bool CPU::NewRec::Compiler::TryRenameMIPSReg(Reg to, Reg from, u32 fromhost, Reg other) { // can't rename when in form Rd = Rs op Rt and Rd == Rs or Rd == Rt if (to == from || to == other || !iinfo->RenameTest(from)) return false; Log_DebugPrintf("Renaming MIPS register %s to %s", GetRegName(from), GetRegName(to)); if (iinfo->LiveTest(from)) FlushHostReg(fromhost); // remove all references to renamed-to register DeleteMIPSReg(to, false); CancelLoadDelaysToReg(to); // and do the actual rename, new register has been modified. m_host_regs[fromhost].reg = to; m_host_regs[fromhost].flags |= HR_MODE_READ | HR_MODE_WRITE; return true; } void CPU::NewRec::Compiler::UpdateHostRegCounters() { const CodeCache::InstructionInfo* const info_end = m_block->InstructionsInfo() + m_block->size; for (u32 i = 0; i < NUM_HOST_REGS; i++) { HostRegAlloc& ra = m_host_regs[i]; if ((ra.flags & (HR_ALLOCATED | HR_NEEDED)) != HR_ALLOCATED) continue; // Try not to punt out load delays. if (ra.type != HR_TYPE_CPU_REG) { ra.counter = std::numeric_limits::max(); continue; } DebugAssert(IsHostRegAllocated(i)); const CodeCache::InstructionInfo* cur = iinfo; const Reg reg = ra.reg; if (!(cur->reg_flags[static_cast(reg)] & CodeCache::RI_USED)) { ra.counter = 0; continue; } // order based on the number of instructions until this register is used u16 counter_val = std::numeric_limits::max(); for (; cur != info_end; cur++, counter_val--) { if (cur->ReadsReg(reg)) break; } ra.counter = counter_val; } } void CPU::NewRec::Compiler::Flush(u32 flags) { // TODO: Flush unneeded caller-saved regs (backup/replace calle-saved needed with caller-saved) if (flags & (FLUSH_FREE_UNNEEDED_CALLER_SAVED_REGISTERS | FLUSH_FREE_CALLER_SAVED_REGISTERS | FLUSH_FREE_ALL_REGISTERS)) { const u32 req_mask = (flags & FLUSH_FREE_ALL_REGISTERS) ? HR_ALLOCATED : ((flags & FLUSH_FREE_CALLER_SAVED_REGISTERS) ? (HR_ALLOCATED | HR_CALLEE_SAVED) : (HR_ALLOCATED | HR_CALLEE_SAVED | HR_NEEDED)); constexpr u32 req_flags = HR_ALLOCATED; for (u32 i = 0; i < NUM_HOST_REGS; i++) { HostRegAlloc& ra = m_host_regs[i]; if ((ra.flags & req_mask) == req_flags) FreeHostReg(i); } } if (flags & FLUSH_INVALIDATE_MIPS_REGISTERS) { for (u32 i = 0; i < NUM_HOST_REGS; i++) { HostRegAlloc& ra = m_host_regs[i]; if (ra.flags & HR_ALLOCATED && ra.type == HR_TYPE_CPU_REG) FreeHostReg(i); } FlushConstantRegs(true); } else { if (flags & FLUSH_FLUSH_MIPS_REGISTERS) { for (u32 i = 0; i < NUM_HOST_REGS; i++) { HostRegAlloc& ra = m_host_regs[i]; if ((ra.flags & (HR_ALLOCATED | HR_MODE_WRITE)) == (HR_ALLOCATED | HR_MODE_WRITE) && ra.type == HR_TYPE_CPU_REG) FlushHostReg(i); } // flush any constant registers which are dirty too FlushConstantRegs(false); } } if (flags & FLUSH_INVALIDATE_SPECULATIVE_CONSTANTS) InvalidateSpeculativeValues(); } void CPU::NewRec::Compiler::FlushConstantReg(Reg r) { DebugAssert(m_constant_regs_valid.test(static_cast(r))); Log_DebugPrintf("Writing back register %s with constant value 0x%08X", GetRegName(r), m_constant_reg_values[static_cast(r)]); StoreConstantToCPUPointer(m_constant_reg_values[static_cast(r)], &g_state.regs.r[static_cast(r)]); m_constant_regs_dirty.reset(static_cast(r)); } void CPU::NewRec::Compiler::BackupHostState() { DebugAssert(m_host_state_backup_count < m_host_state_backup.size()); // need to back up everything... HostStateBackup& bu = m_host_state_backup[m_host_state_backup_count]; bu.cycles = m_cycles; bu.gte_done_cycle = m_gte_done_cycle; bu.compiler_pc = m_compiler_pc; bu.dirty_pc = m_dirty_pc; bu.dirty_instruction_bits = m_dirty_instruction_bits; bu.dirty_gte_done_cycle = m_dirty_gte_done_cycle; bu.block_ended = m_block_ended; bu.inst = inst; bu.iinfo = iinfo; bu.current_instruction_pc = m_current_instruction_pc; bu.current_instruction_delay_slot = m_current_instruction_branch_delay_slot; bu.const_regs_valid = m_constant_regs_valid; bu.const_regs_dirty = m_constant_regs_dirty; bu.const_regs_values = m_constant_reg_values; bu.host_regs = m_host_regs; bu.register_alloc_counter = m_register_alloc_counter; bu.load_delay_dirty = m_load_delay_dirty; bu.load_delay_register = m_load_delay_register; bu.load_delay_value_register = m_load_delay_value_register; bu.next_load_delay_register = m_next_load_delay_register; bu.next_load_delay_value_register = m_next_load_delay_value_register; m_host_state_backup_count++; } void CPU::NewRec::Compiler::RestoreHostState() { DebugAssert(m_host_state_backup_count > 0); m_host_state_backup_count--; HostStateBackup& bu = m_host_state_backup[m_host_state_backup_count]; m_host_regs = std::move(bu.host_regs); m_constant_reg_values = std::move(bu.const_regs_values); m_constant_regs_dirty = std::move(bu.const_regs_dirty); m_constant_regs_valid = std::move(bu.const_regs_valid); m_current_instruction_branch_delay_slot = bu.current_instruction_delay_slot; m_current_instruction_pc = bu.current_instruction_pc; inst = bu.inst; iinfo = bu.iinfo; m_block_ended = bu.block_ended; m_dirty_gte_done_cycle = bu.dirty_gte_done_cycle; m_dirty_instruction_bits = bu.dirty_instruction_bits; m_dirty_pc = bu.dirty_pc; m_compiler_pc = bu.compiler_pc; m_register_alloc_counter = bu.register_alloc_counter; m_load_delay_dirty = bu.load_delay_dirty; m_load_delay_register = bu.load_delay_register; m_load_delay_value_register = bu.load_delay_value_register; m_next_load_delay_register = bu.next_load_delay_register; m_next_load_delay_value_register = bu.next_load_delay_value_register; m_gte_done_cycle = bu.gte_done_cycle; m_cycles = bu.cycles; } void CPU::NewRec::Compiler::AddLoadStoreInfo(void* code_address, u32 code_size, u32 address_register, u32 data_register, MemoryAccessSize size, bool is_signed, bool is_load) { DebugAssert(CodeCache::IsUsingFastmem()); DebugAssert(address_register < NUM_HOST_REGS); DebugAssert(data_register < NUM_HOST_REGS); u32 gpr_bitmask = 0; for (u32 i = 0; i < NUM_HOST_REGS; i++) { if (IsHostRegAllocated(i)) gpr_bitmask |= (1u << i); } CPU::CodeCache::AddLoadStoreInfo(code_address, code_size, m_current_instruction_pc, m_block->pc, m_cycles, gpr_bitmask, static_cast(address_register), static_cast(data_register), size, is_signed, is_load); } void CPU::NewRec::Compiler::CompileInstruction() { #ifdef _DEBUG TinyString str; DisassembleInstruction(&str, m_current_instruction_pc, inst->bits); Log_DebugFmt("Compiling{} {:08X}: {}", m_current_instruction_branch_delay_slot ? " branch delay slot" : "", m_current_instruction_pc, str); #endif m_cycles++; if (IsNopInstruction(*inst)) { UpdateLoadDelay(); return; } switch (inst->op) { #define PGXPFN(x) reinterpret_cast(&PGXP::x) // clang-format off // TODO: PGXP for jalr case InstructionOp::funct: { switch (inst->r.funct) { case InstructionFunct::sll: CompileTemplate(&Compiler::Compile_sll_const, &Compiler::Compile_sll, PGXPFN(CPU_SLL), TF_WRITES_D | TF_READS_T); SpecExec_sll(); break; case InstructionFunct::srl: CompileTemplate(&Compiler::Compile_srl_const, &Compiler::Compile_srl, PGXPFN(CPU_SRL), TF_WRITES_D | TF_READS_T); SpecExec_srl(); break; case InstructionFunct::sra: CompileTemplate(&Compiler::Compile_sra_const, &Compiler::Compile_sra, PGXPFN(CPU_SRA), TF_WRITES_D | TF_READS_T); SpecExec_sra(); break; case InstructionFunct::sllv: CompileTemplate(&Compiler::Compile_sllv_const, &Compiler::Compile_sllv, PGXPFN(CPU_SLLV), TF_WRITES_D | TF_READS_S | TF_READS_T); SpecExec_sllv(); break; case InstructionFunct::srlv: CompileTemplate(&Compiler::Compile_srlv_const, &Compiler::Compile_srlv, PGXPFN(CPU_SRLV), TF_WRITES_D | TF_READS_S | TF_READS_T); SpecExec_srlv(); break; case InstructionFunct::srav: CompileTemplate(&Compiler::Compile_srav_const, &Compiler::Compile_srav, PGXPFN(CPU_SRAV), TF_WRITES_D | TF_READS_S | TF_READS_T); SpecExec_srav(); break; case InstructionFunct::jr: CompileTemplate(&Compiler::Compile_jr_const, &Compiler::Compile_jr, nullptr, TF_READS_S); break; case InstructionFunct::jalr: CompileTemplate(&Compiler::Compile_jalr_const, &Compiler::Compile_jalr, nullptr, /*TF_WRITES_D |*/ TF_READS_S | TF_NO_NOP); SpecExec_jalr(); break; case InstructionFunct::syscall: Compile_syscall(); break; case InstructionFunct::break_: Compile_break(); break; case InstructionFunct::mfhi: SpecCopyReg(inst->r.rd, Reg::hi); CompileMoveRegTemplate(inst->r.rd, Reg::hi, g_settings.gpu_pgxp_cpu); break; case InstructionFunct::mthi: SpecCopyReg(Reg::hi, inst->r.rs); CompileMoveRegTemplate(Reg::hi, inst->r.rs, g_settings.gpu_pgxp_cpu); break; case InstructionFunct::mflo: SpecCopyReg(inst->r.rd, Reg::lo); CompileMoveRegTemplate(inst->r.rd, Reg::lo, g_settings.gpu_pgxp_cpu); break; case InstructionFunct::mtlo: SpecCopyReg(Reg::lo, inst->r.rs); CompileMoveRegTemplate(Reg::lo, inst->r.rs, g_settings.gpu_pgxp_cpu); break; case InstructionFunct::mult: CompileTemplate(&Compiler::Compile_mult_const, &Compiler::Compile_mult, PGXPFN(CPU_MULT), TF_READS_S | TF_READS_T | TF_WRITES_LO | TF_WRITES_HI | TF_COMMUTATIVE); SpecExec_mult(); break; case InstructionFunct::multu: CompileTemplate(&Compiler::Compile_multu_const, &Compiler::Compile_multu, PGXPFN(CPU_MULTU), TF_READS_S | TF_READS_T | TF_WRITES_LO | TF_WRITES_HI | TF_COMMUTATIVE); SpecExec_multu(); break; case InstructionFunct::div: CompileTemplate(&Compiler::Compile_div_const, &Compiler::Compile_div, PGXPFN(CPU_DIV), TF_READS_S | TF_READS_T | TF_WRITES_LO | TF_WRITES_HI); SpecExec_div(); break; case InstructionFunct::divu: CompileTemplate(&Compiler::Compile_divu_const, &Compiler::Compile_divu, PGXPFN(CPU_DIVU), TF_READS_S | TF_READS_T | TF_WRITES_LO | TF_WRITES_HI); SpecExec_divu(); break; case InstructionFunct::add: CompileTemplate(&Compiler::Compile_add_const, &Compiler::Compile_add, PGXPFN(CPU_ADD), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_CAN_OVERFLOW | TF_RENAME_WITH_ZERO_T); SpecExec_add(); break; case InstructionFunct::addu: CompileTemplate(&Compiler::Compile_addu_const, &Compiler::Compile_addu, PGXPFN(CPU_ADD), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_T); SpecExec_addu(); break; case InstructionFunct::sub: CompileTemplate(&Compiler::Compile_sub_const, &Compiler::Compile_sub, PGXPFN(CPU_SUB), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_CAN_OVERFLOW | TF_RENAME_WITH_ZERO_T); SpecExec_sub(); break; case InstructionFunct::subu: CompileTemplate(&Compiler::Compile_subu_const, &Compiler::Compile_subu, PGXPFN(CPU_SUB), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_RENAME_WITH_ZERO_T); SpecExec_subu(); break; case InstructionFunct::and_: CompileTemplate(&Compiler::Compile_and_const, &Compiler::Compile_and, PGXPFN(CPU_AND_), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE); SpecExec_and(); break; case InstructionFunct::or_: CompileTemplate(&Compiler::Compile_or_const, &Compiler::Compile_or, PGXPFN(CPU_OR_), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_T); SpecExec_or(); break; case InstructionFunct::xor_: CompileTemplate(&Compiler::Compile_xor_const, &Compiler::Compile_xor, PGXPFN(CPU_XOR_), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_T); SpecExec_xor(); break; case InstructionFunct::nor: CompileTemplate(&Compiler::Compile_nor_const, &Compiler::Compile_nor, PGXPFN(CPU_NOR), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE); SpecExec_nor(); break; case InstructionFunct::slt: CompileTemplate(&Compiler::Compile_slt_const, &Compiler::Compile_slt, PGXPFN(CPU_SLT), TF_WRITES_D | TF_READS_T | TF_READS_S); SpecExec_slt(); break; case InstructionFunct::sltu: CompileTemplate(&Compiler::Compile_sltu_const, &Compiler::Compile_sltu, PGXPFN(CPU_SLTU), TF_WRITES_D | TF_READS_T | TF_READS_S); SpecExec_sltu(); break; default: Compile_Fallback(); InvalidateSpeculativeValues(); TruncateBlock(); break; } } break; case InstructionOp::j: Compile_j(); break; case InstructionOp::jal: Compile_jal(); SpecExec_jal(); break; case InstructionOp::b: CompileTemplate(&Compiler::Compile_b_const, &Compiler::Compile_b, nullptr, TF_READS_S | TF_CAN_SWAP_DELAY_SLOT); SpecExec_b(); break; case InstructionOp::blez: CompileTemplate(&Compiler::Compile_blez_const, &Compiler::Compile_blez, nullptr, TF_READS_S | TF_CAN_SWAP_DELAY_SLOT); break; case InstructionOp::bgtz: CompileTemplate(&Compiler::Compile_bgtz_const, &Compiler::Compile_bgtz, nullptr, TF_READS_S | TF_CAN_SWAP_DELAY_SLOT); break; case InstructionOp::beq: CompileTemplate(&Compiler::Compile_beq_const, &Compiler::Compile_beq, nullptr, TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_CAN_SWAP_DELAY_SLOT); break; case InstructionOp::bne: CompileTemplate(&Compiler::Compile_bne_const, &Compiler::Compile_bne, nullptr, TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_CAN_SWAP_DELAY_SLOT); break; case InstructionOp::addi: CompileTemplate(&Compiler::Compile_addi_const, &Compiler::Compile_addi, PGXPFN(CPU_ADDI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE | TF_CAN_OVERFLOW | TF_RENAME_WITH_ZERO_IMM); SpecExec_addi(); break; case InstructionOp::addiu: CompileTemplate(&Compiler::Compile_addiu_const, &Compiler::Compile_addiu, PGXPFN(CPU_ADDI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_IMM); SpecExec_addiu(); break; case InstructionOp::slti: CompileTemplate(&Compiler::Compile_slti_const, &Compiler::Compile_slti, PGXPFN(CPU_SLTI), TF_WRITES_T | TF_READS_S); SpecExec_slti(); break; case InstructionOp::sltiu: CompileTemplate(&Compiler::Compile_sltiu_const, &Compiler::Compile_sltiu, PGXPFN(CPU_SLTIU), TF_WRITES_T | TF_READS_S); SpecExec_sltiu(); break; case InstructionOp::andi: CompileTemplate(&Compiler::Compile_andi_const, &Compiler::Compile_andi, PGXPFN(CPU_ANDI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE); SpecExec_andi(); break; case InstructionOp::ori: CompileTemplate(&Compiler::Compile_ori_const, &Compiler::Compile_ori, PGXPFN(CPU_ORI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_IMM); SpecExec_ori(); break; case InstructionOp::xori: CompileTemplate(&Compiler::Compile_xori_const, &Compiler::Compile_xori, PGXPFN(CPU_XORI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_IMM); SpecExec_xori(); break; case InstructionOp::lui: Compile_lui(); SpecExec_lui(); break; case InstructionOp::lb: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::Byte, false, true, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); SpecExec_lxx(MemoryAccessSize::Byte, true); break; case InstructionOp::lbu: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::Byte, false, false, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); SpecExec_lxx(MemoryAccessSize::Byte, false); break; case InstructionOp::lh: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::HalfWord, false, true, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); SpecExec_lxx(MemoryAccessSize::HalfWord, true); break; case InstructionOp::lhu: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::HalfWord, false, false, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); SpecExec_lxx(MemoryAccessSize::HalfWord, false); break; case InstructionOp::lw: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::Word, false, false, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); SpecExec_lxx(MemoryAccessSize::Word, false); break; case InstructionOp::lwl: CompileLoadStoreTemplate(&Compiler::Compile_lwx, MemoryAccessSize::Word, false, false, TF_READS_S | /*TF_READS_T | TF_WRITES_T | */TF_LOAD_DELAY); SpecExec_lwx(false); break; case InstructionOp::lwr: CompileLoadStoreTemplate(&Compiler::Compile_lwx, MemoryAccessSize::Word, false, false, TF_READS_S | /*TF_READS_T | TF_WRITES_T | */TF_LOAD_DELAY); SpecExec_lwx(true); break; case InstructionOp::sb: CompileLoadStoreTemplate(&Compiler::Compile_sxx, MemoryAccessSize::Byte, true, false, TF_READS_S | TF_READS_T); SpecExec_sxx(MemoryAccessSize::Byte); break; case InstructionOp::sh: CompileLoadStoreTemplate(&Compiler::Compile_sxx, MemoryAccessSize::HalfWord, true, false, TF_READS_S | TF_READS_T); SpecExec_sxx(MemoryAccessSize::HalfWord); break; case InstructionOp::sw: CompileLoadStoreTemplate(&Compiler::Compile_sxx, MemoryAccessSize::Word, true, false, TF_READS_S | TF_READS_T); SpecExec_sxx(MemoryAccessSize::Word); break; case InstructionOp::swl: CompileLoadStoreTemplate(&Compiler::Compile_swx, MemoryAccessSize::Word, false, false, TF_READS_S | /*TF_READS_T | TF_WRITES_T | */TF_LOAD_DELAY); SpecExec_swx(false); break; case InstructionOp::swr: CompileLoadStoreTemplate(&Compiler::Compile_swx, MemoryAccessSize::Word, false, false, TF_READS_S | /*TF_READS_T | TF_WRITES_T | */TF_LOAD_DELAY); SpecExec_swx(true); break; case InstructionOp::cop0: { if (inst->cop.IsCommonInstruction()) { switch (inst->cop.CommonOp()) { case CopCommonInstruction::mfcn: if (inst->r.rt != Reg::zero) { CompileTemplate(nullptr, &Compiler::Compile_mfc0, PGXPFN(CPU_MFC0), TF_WRITES_T | TF_LOAD_DELAY); } SpecExec_mfc0(); break; case CopCommonInstruction::mtcn: CompileTemplate(nullptr, &Compiler::Compile_mtc0, PGXPFN(CPU_MTC0), TF_READS_T); SpecExec_mtc0(); break; default: Compile_Fallback(); break; } } else { switch (inst->cop.Cop0Op()) { case Cop0Instruction::rfe: CompileTemplate(nullptr, &Compiler::Compile_rfe, nullptr, 0); SpecExec_rfe(); break; default: Compile_Fallback(); break; } } } break; case InstructionOp::cop2: { if (inst->cop.IsCommonInstruction()) { switch (inst->cop.CommonOp()) { case CopCommonInstruction::mfcn: if (inst->r.rt != Reg::zero) { CompileTemplate(nullptr, &Compiler::Compile_mfc2, nullptr, TF_GTE_STALL); } break; case CopCommonInstruction::cfcn: if (inst->r.rt != Reg::zero) { CompileTemplate(nullptr, &Compiler::Compile_mfc2, nullptr, TF_GTE_STALL); } break; case CopCommonInstruction::mtcn: CompileTemplate(nullptr, &Compiler::Compile_mtc2, PGXPFN(CPU_MTC2), TF_GTE_STALL | TF_READS_T | TF_PGXP_WITHOUT_CPU); break; case CopCommonInstruction::ctcn: CompileTemplate(nullptr, &Compiler::Compile_mtc2, PGXPFN(CPU_MTC2), TF_GTE_STALL | TF_READS_T | TF_PGXP_WITHOUT_CPU); break; default: Compile_Fallback(); break; } } else { // GTE ops CompileTemplate(nullptr, &Compiler::Compile_cop2, nullptr, TF_GTE_STALL); } } break; case InstructionOp::lwc2: CompileLoadStoreTemplate(&Compiler::Compile_lwc2, MemoryAccessSize::Word, false, false, TF_GTE_STALL | TF_READS_S | TF_LOAD_DELAY); break; case InstructionOp::swc2: CompileLoadStoreTemplate(&Compiler::Compile_swc2, MemoryAccessSize::Word, true, false, TF_GTE_STALL | TF_READS_S); SpecExec_swc2(); break; // swc0/lwc0/cop1/cop3 are essentially no-ops case InstructionOp::cop1: case InstructionOp::cop3: case InstructionOp::lwc0: case InstructionOp::lwc1: case InstructionOp::lwc3: case InstructionOp::swc0: case InstructionOp::swc1: case InstructionOp::swc3: break; default: Compile_Fallback(); InvalidateSpeculativeValues(); TruncateBlock(); break; // clang-format on #undef PGXPFN } ClearHostRegsNeeded(); UpdateLoadDelay(); #if 0 const void* end = GetCurrentCodePointer(); if (start != end && !m_current_instruction_branch_delay_slot) { CodeCache::DisassembleAndLogHostCode(start, static_cast(static_cast(end) - static_cast(start))); } #endif } void CPU::NewRec::Compiler::CompileBranchDelaySlot(bool dirty_pc /* = true */) { // Update load delay at the end of the previous instruction. UpdateLoadDelay(); // TODO: Move cycle add before this. inst++; iinfo++; m_current_instruction_pc += sizeof(Instruction); m_current_instruction_branch_delay_slot = true; m_compiler_pc += sizeof(Instruction); m_dirty_pc = dirty_pc; m_dirty_instruction_bits = true; CompileInstruction(); m_current_instruction_branch_delay_slot = false; } void CPU::NewRec::Compiler::CompileTemplate(void (Compiler::*const_func)(CompileFlags), void (Compiler::*func)(CompileFlags), const void* pgxp_cpu_func, u32 tflags) { // TODO: This is where we will do memory operand optimization. Remember to kill constants! // TODO: Swap S and T if commutative // TODO: For and, treat as zeroing if imm is zero // TODO: Optimize slt + bne to cmp + jump // TODO: Prefer memory operands when load delay is dirty, since we're going to invalidate immediately after the first // instruction.. // TODO: andi with zero -> zero const // TODO: load constant so it can be flushed if it's not overwritten later // TODO: inline PGXP ops. // TODO: don't rename on sltu. bool allow_constant = static_cast(const_func); Reg rs = inst->r.rs.GetValue(); Reg rt = inst->r.rt.GetValue(); Reg rd = inst->r.rd.GetValue(); if (tflags & TF_GTE_STALL) StallUntilGTEComplete(); // throw away instructions writing to $zero if (!(tflags & TF_NO_NOP) && (!g_settings.cpu_recompiler_memory_exceptions || !(tflags & TF_CAN_OVERFLOW)) && ((tflags & TF_WRITES_T && rt == Reg::zero) || (tflags & TF_WRITES_D && rd == Reg::zero))) { Log_DebugPrintf("Skipping instruction because it writes to zero"); return; } // handle rename operations if ((tflags & TF_RENAME_WITH_ZERO_T && HasConstantRegValue(rt, 0))) { DebugAssert((tflags & (TF_WRITES_D | TF_READS_S | TF_READS_T)) == (TF_WRITES_D | TF_READS_S | TF_READS_T)); CompileMoveRegTemplate(rd, rs, true); return; } else if ((tflags & (TF_RENAME_WITH_ZERO_T | TF_COMMUTATIVE)) == (TF_RENAME_WITH_ZERO_T | TF_COMMUTATIVE) && HasConstantRegValue(rs, 0)) { DebugAssert((tflags & (TF_WRITES_D | TF_READS_S | TF_READS_T)) == (TF_WRITES_D | TF_READS_S | TF_READS_T)); CompileMoveRegTemplate(rd, rt, true); return; } else if (tflags & TF_RENAME_WITH_ZERO_IMM && inst->i.imm == 0) { CompileMoveRegTemplate(rt, rs, true); return; } if (pgxp_cpu_func && g_settings.gpu_pgxp_enable && ((tflags & TF_PGXP_WITHOUT_CPU) || g_settings.UsingPGXPCPUMode())) { std::array reg_args = {{Reg::count, Reg::count}}; u32 num_reg_args = 0; if (tflags & TF_READS_S) reg_args[num_reg_args++] = rs; if (tflags & TF_READS_T) reg_args[num_reg_args++] = rt; if (tflags & TF_READS_LO) reg_args[num_reg_args++] = Reg::lo; if (tflags & TF_READS_HI) reg_args[num_reg_args++] = Reg::hi; DebugAssert(num_reg_args <= 2); GeneratePGXPCallWithMIPSRegs(pgxp_cpu_func, inst->bits, reg_args[0], reg_args[1]); } // if it's a commutative op, and we have one constant reg but not the other, swap them // TODO: make it swap when writing to T as well // TODO: drop the hack for rd == rt if (tflags & TF_COMMUTATIVE && !(tflags & TF_WRITES_T) && ((HasConstantReg(rs) && !HasConstantReg(rt)) || (tflags & TF_WRITES_D && rd == rt))) { Log_DebugPrintf("Swapping S:%s and T:%s due to commutative op and constants", GetRegName(rs), GetRegName(rt)); std::swap(rs, rt); } CompileFlags cf = {}; if (tflags & TF_READS_S) { MarkRegsNeeded(HR_TYPE_CPU_REG, rs); if (HasConstantReg(rs)) cf.const_s = true; else allow_constant = false; } if (tflags & TF_READS_T) { MarkRegsNeeded(HR_TYPE_CPU_REG, rt); if (HasConstantReg(rt)) cf.const_t = true; else allow_constant = false; } if (tflags & TF_READS_LO) { MarkRegsNeeded(HR_TYPE_CPU_REG, Reg::lo); if (HasConstantReg(Reg::lo)) cf.const_lo = true; else allow_constant = false; } if (tflags & TF_READS_HI) { MarkRegsNeeded(HR_TYPE_CPU_REG, Reg::hi); if (HasConstantReg(Reg::hi)) cf.const_hi = true; else allow_constant = false; } // Needed because of potential swapping if (tflags & TF_READS_S) cf.mips_s = static_cast(rs); if (tflags & (TF_READS_T | TF_WRITES_T)) cf.mips_t = static_cast(rt); if (allow_constant) { // woot, constant path (this->*const_func)(cf); return; } UpdateHostRegCounters(); if (tflags & TF_CAN_SWAP_DELAY_SLOT && TrySwapDelaySlot(cf.MipsS(), cf.MipsT())) cf.delay_slot_swapped = true; if (tflags & TF_READS_S && (tflags & TF_NEEDS_REG_S || !cf.const_s || (tflags & TF_WRITES_D && rd != Reg::zero && rd == rs))) { cf.host_s = AllocateHostReg(HR_MODE_READ, HR_TYPE_CPU_REG, rs); cf.const_s = false; cf.valid_host_s = true; } if (tflags & TF_READS_T && (tflags & (TF_NEEDS_REG_T | TF_WRITES_T) || !cf.const_t || (tflags & TF_WRITES_D && rd != Reg::zero && rd == rt))) { cf.host_t = AllocateHostReg(HR_MODE_READ, HR_TYPE_CPU_REG, rt); cf.const_t = false; cf.valid_host_t = true; } if (tflags & (TF_READS_LO | TF_WRITES_LO)) { cf.host_lo = AllocateHostReg(((tflags & TF_READS_LO) ? HR_MODE_READ : 0u) | ((tflags & TF_WRITES_LO) ? HR_MODE_WRITE : 0u), HR_TYPE_CPU_REG, Reg::lo); cf.const_lo = false; cf.valid_host_lo = true; } if (tflags & (TF_READS_HI | TF_WRITES_HI)) { cf.host_hi = AllocateHostReg(((tflags & TF_READS_HI) ? HR_MODE_READ : 0u) | ((tflags & TF_WRITES_HI) ? HR_MODE_WRITE : 0u), HR_TYPE_CPU_REG, Reg::hi); cf.const_hi = false; cf.valid_host_hi = true; } const HostRegAllocType write_type = (tflags & TF_LOAD_DELAY && EMULATE_LOAD_DELAYS) ? HR_TYPE_NEXT_LOAD_DELAY_VALUE : HR_TYPE_CPU_REG; if (tflags & TF_CAN_OVERFLOW && g_settings.cpu_recompiler_memory_exceptions) { // allocate a temp register for the result, then swap it back const u32 tempreg = AllocateHostReg(0, HR_TYPE_TEMP); ; if (tflags & TF_WRITES_D) { cf.host_d = tempreg; cf.valid_host_d = true; } else if (tflags & TF_WRITES_T) { cf.host_t = tempreg; cf.valid_host_t = true; } (this->*func)(cf); if (tflags & TF_WRITES_D && rd != Reg::zero) { DeleteMIPSReg(rd, false); RenameHostReg(tempreg, HR_MODE_WRITE, write_type, rd); } else if (tflags & TF_WRITES_T && rt != Reg::zero) { DeleteMIPSReg(rt, false); RenameHostReg(tempreg, HR_MODE_WRITE, write_type, rt); } else { FreeHostReg(tempreg); } } else { if (tflags & TF_WRITES_D && rd != Reg::zero) { if (tflags & TF_READS_S && cf.valid_host_s && TryRenameMIPSReg(rd, rs, cf.host_s, Reg::count)) cf.host_d = cf.host_s; else cf.host_d = AllocateHostReg(HR_MODE_WRITE, write_type, rd); cf.valid_host_d = true; } if (tflags & TF_WRITES_T && rt != Reg::zero) { if (tflags & TF_READS_S && cf.valid_host_s && TryRenameMIPSReg(rt, rs, cf.host_s, Reg::count)) cf.host_t = cf.host_s; else cf.host_t = AllocateHostReg(HR_MODE_WRITE, write_type, rt); cf.valid_host_t = true; } (this->*func)(cf); } } void CPU::NewRec::Compiler::CompileLoadStoreTemplate(void (Compiler::*func)(CompileFlags, MemoryAccessSize, bool, bool, const std::optional&), MemoryAccessSize size, bool store, bool sign, u32 tflags) { const Reg rs = inst->i.rs; const Reg rt = inst->i.rt; if (tflags & TF_GTE_STALL) StallUntilGTEComplete(); CompileFlags cf = {}; if (tflags & TF_READS_S) { MarkRegsNeeded(HR_TYPE_CPU_REG, rs); cf.mips_s = static_cast(rs); } if (tflags & (TF_READS_T | TF_WRITES_T)) { if (tflags & TF_READS_T) MarkRegsNeeded(HR_TYPE_CPU_REG, rt); cf.mips_t = static_cast(rt); } UpdateHostRegCounters(); // constant address? std::optional addr; std::optional spec_addr; bool use_fastmem = CodeCache::IsUsingFastmem() && !g_settings.cpu_recompiler_memory_exceptions && !SpecIsCacheIsolated() && !CodeCache::HasPreviouslyFaultedOnPC(m_current_instruction_pc); if (HasConstantReg(rs)) { addr = GetConstantRegU32(rs) + inst->i.imm_sext32(); spec_addr = addr; cf.const_s = true; if (!Bus::CanUseFastmemForAddress(addr.value())) { Log_DebugFmt("Not using fastmem for {:08X}", addr.value()); use_fastmem = false; } } else { spec_addr = SpecExec_LoadStoreAddr(); if (use_fastmem && spec_addr.has_value() && !Bus::CanUseFastmemForAddress(spec_addr.value())) { Log_DebugFmt("Not using fastmem for speculative {:08X}", spec_addr.value()); use_fastmem = false; } if constexpr (HAS_MEMORY_OPERANDS) { // don't bother caching it since we're going to flush anyway // TODO: make less rubbish, if it's caller saved we don't need to flush... const std::optional hreg = CheckHostReg(HR_MODE_READ, HR_TYPE_CPU_REG, rs); if (hreg.has_value()) { cf.valid_host_s = true; cf.host_s = hreg.value(); } } else { // need rs in a register cf.host_s = AllocateHostReg(HR_MODE_READ, HR_TYPE_CPU_REG, rs); cf.valid_host_s = true; } } // reads T -> store, writes T -> load // for now, we defer the allocation to afterwards, because C call if (tflags & TF_READS_T) { if (HasConstantReg(rt)) { cf.const_t = true; } else { if constexpr (HAS_MEMORY_OPERANDS) { const std::optional hreg = CheckHostReg(HR_MODE_READ, HR_TYPE_CPU_REG, rt); if (hreg.has_value()) { cf.valid_host_t = true; cf.host_t = hreg.value(); } } else { cf.host_t = AllocateHostReg(HR_MODE_READ, HR_TYPE_CPU_REG, rt); cf.valid_host_t = true; } } } (this->*func)(cf, size, sign, use_fastmem, addr); if (store && !m_block_ended && !m_current_instruction_branch_delay_slot && spec_addr.has_value() && GetSegmentForAddress(spec_addr.value()) != Segment::KSEG2) { // Get rid of physical aliases. const u32 phys_spec_addr = VirtualAddressToPhysical(spec_addr.value()); if (phys_spec_addr >= VirtualAddressToPhysical(m_block->pc) && phys_spec_addr < VirtualAddressToPhysical(m_block->pc + (m_block->size * sizeof(Instruction)))) { Log_WarningFmt("Instruction {:08X} speculatively writes to {:08X} inside block {:08X}-{:08X}. Truncating block.", m_current_instruction_pc, phys_spec_addr, m_block->pc, m_block->pc + (m_block->size * sizeof(Instruction))); TruncateBlock(); } } } void CPU::NewRec::Compiler::TruncateBlock() { m_block->size = ((m_current_instruction_pc - m_block->pc) / sizeof(Instruction)) + 1; iinfo->is_last_instruction = true; } void CPU::NewRec::Compiler::FlushForLoadStore(const std::optional& address, bool store, bool use_fastmem) { if (use_fastmem) return; // TODO: Stores don't need to flush GTE cycles... Flush(FLUSH_FOR_C_CALL | FLUSH_FOR_LOADSTORE); } void CPU::NewRec::Compiler::CompileMoveRegTemplate(Reg dst, Reg src, bool pgxp_move) { if (dst == src || dst == Reg::zero) return; if (HasConstantReg(src)) { DeleteMIPSReg(dst, false); SetConstantReg(dst, GetConstantRegU32(src)); } else { const u32 srcreg = AllocateHostReg(HR_MODE_READ, HR_TYPE_CPU_REG, src); if (!TryRenameMIPSReg(dst, src, srcreg, Reg::count)) { const u32 dstreg = AllocateHostReg(HR_MODE_WRITE, HR_TYPE_CPU_REG, dst); CopyHostReg(dstreg, srcreg); ClearHostRegNeeded(dstreg); } } // TODO: This could be made better if we only did it for registers where there was a previous MFC2. if (g_settings.gpu_pgxp_enable && pgxp_move) { // might've been renamed, so use dst here GeneratePGXPCallWithMIPSRegs(reinterpret_cast(&PGXP::CPU_MOVE_Packed), PGXP::PackMoveArgs(dst, src), dst); } } void CPU::NewRec::Compiler::Compile_j() { const u32 newpc = (m_compiler_pc & UINT32_C(0xF0000000)) | (inst->j.target << 2); // TODO: Delay slot swap. // We could also move the cycle commit back. CompileBranchDelaySlot(); EndBlock(newpc, true); } void CPU::NewRec::Compiler::Compile_jr_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsS())); const u32 newpc = GetConstantRegU32(cf.MipsS()); if (newpc & 3 && g_settings.cpu_recompiler_memory_exceptions) { EndBlockWithException(Exception::AdEL); return; } CompileBranchDelaySlot(); EndBlock(newpc, true); } void CPU::NewRec::Compiler::Compile_jal() { const u32 newpc = (m_compiler_pc & UINT32_C(0xF0000000)) | (inst->j.target << 2); SetConstantReg(Reg::ra, GetBranchReturnAddress({})); CompileBranchDelaySlot(); EndBlock(newpc, true); } void CPU::NewRec::Compiler::Compile_jalr_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsS())); const u32 newpc = GetConstantRegU32(cf.MipsS()); if (MipsD() != Reg::zero) SetConstantReg(MipsD(), GetBranchReturnAddress({})); CompileBranchDelaySlot(); EndBlock(newpc, true); } void CPU::NewRec::Compiler::Compile_syscall() { EndBlockWithException(Exception::Syscall); } void CPU::NewRec::Compiler::Compile_break() { EndBlockWithException(Exception::BP); } void CPU::NewRec::Compiler::Compile_b_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsS())); const u8 irt = static_cast(inst->i.rt.GetValue()); const bool bgez = ConvertToBoolUnchecked(irt & u8(1)); const bool link = (irt & u8(0x1E)) == u8(0x10); const s32 rs = GetConstantRegS32(cf.MipsS()); const bool taken = bgez ? (rs >= 0) : (rs < 0); const u32 taken_pc = GetConditionalBranchTarget(cf); if (link) SetConstantReg(Reg::ra, GetBranchReturnAddress(cf)); CompileBranchDelaySlot(); EndBlock(taken ? taken_pc : m_compiler_pc, true); } void CPU::NewRec::Compiler::Compile_b(CompileFlags cf) { const u8 irt = static_cast(inst->i.rt.GetValue()); const bool bgez = ConvertToBoolUnchecked(irt & u8(1)); const bool link = (irt & u8(0x1E)) == u8(0x10); if (link) SetConstantReg(Reg::ra, GetBranchReturnAddress(cf)); Compile_bxx(cf, bgez ? BranchCondition::GreaterEqualZero : BranchCondition::LessThanZero); } void CPU::NewRec::Compiler::Compile_blez(CompileFlags cf) { Compile_bxx(cf, BranchCondition::LessEqualZero); } void CPU::NewRec::Compiler::Compile_blez_const(CompileFlags cf) { Compile_bxx_const(cf, BranchCondition::LessEqualZero); } void CPU::NewRec::Compiler::Compile_bgtz(CompileFlags cf) { Compile_bxx(cf, BranchCondition::GreaterThanZero); } void CPU::NewRec::Compiler::Compile_bgtz_const(CompileFlags cf) { Compile_bxx_const(cf, BranchCondition::GreaterThanZero); } void CPU::NewRec::Compiler::Compile_beq(CompileFlags cf) { Compile_bxx(cf, BranchCondition::Equal); } void CPU::NewRec::Compiler::Compile_beq_const(CompileFlags cf) { Compile_bxx_const(cf, BranchCondition::Equal); } void CPU::NewRec::Compiler::Compile_bne(CompileFlags cf) { Compile_bxx(cf, BranchCondition::NotEqual); } void CPU::NewRec::Compiler::Compile_bne_const(CompileFlags cf) { Compile_bxx_const(cf, BranchCondition::NotEqual); } void CPU::NewRec::Compiler::Compile_bxx_const(CompileFlags cf, BranchCondition cond) { DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); bool taken; switch (cond) { case BranchCondition::Equal: taken = GetConstantRegU32(cf.MipsS()) == GetConstantRegU32(cf.MipsT()); break; case BranchCondition::NotEqual: taken = GetConstantRegU32(cf.MipsS()) != GetConstantRegU32(cf.MipsT()); break; case BranchCondition::GreaterThanZero: taken = GetConstantRegS32(cf.MipsS()) > 0; break; case BranchCondition::GreaterEqualZero: taken = GetConstantRegS32(cf.MipsS()) >= 0; break; case BranchCondition::LessThanZero: taken = GetConstantRegS32(cf.MipsS()) < 0; break; case BranchCondition::LessEqualZero: taken = GetConstantRegS32(cf.MipsS()) <= 0; break; default: Panic("Unhandled condition"); return; } const u32 taken_pc = GetConditionalBranchTarget(cf); CompileBranchDelaySlot(); EndBlock(taken ? taken_pc : m_compiler_pc, true); } void CPU::NewRec::Compiler::Compile_sll_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsT())); SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsT()) << inst->r.shamt); } void CPU::NewRec::Compiler::Compile_srl_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsT())); SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsT()) >> inst->r.shamt); } void CPU::NewRec::Compiler::Compile_sra_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsT())); SetConstantReg(MipsD(), static_cast(GetConstantRegS32(cf.MipsT()) >> inst->r.shamt)); } void CPU::NewRec::Compiler::Compile_sllv_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsT()) << (GetConstantRegU32(cf.MipsS()) & 0x1Fu)); } void CPU::NewRec::Compiler::Compile_srlv_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsT()) >> (GetConstantRegU32(cf.MipsS()) & 0x1Fu)); } void CPU::NewRec::Compiler::Compile_srav_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); SetConstantReg(MipsD(), static_cast(GetConstantRegS32(cf.MipsT()) >> (GetConstantRegU32(cf.MipsS()) & 0x1Fu))); } void CPU::NewRec::Compiler::Compile_and_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsS()) & GetConstantRegU32(cf.MipsT())); } void CPU::NewRec::Compiler::Compile_or_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsS()) | GetConstantRegU32(cf.MipsT())); } void CPU::NewRec::Compiler::Compile_xor_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsS()) ^ GetConstantRegU32(cf.MipsT())); } void CPU::NewRec::Compiler::Compile_nor_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); SetConstantReg(MipsD(), ~(GetConstantRegU32(cf.MipsS()) | GetConstantRegU32(cf.MipsT()))); } void CPU::NewRec::Compiler::Compile_slt_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); SetConstantReg(MipsD(), BoolToUInt32(GetConstantRegS32(cf.MipsS()) < GetConstantRegS32(cf.MipsT()))); } void CPU::NewRec::Compiler::Compile_sltu_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); SetConstantReg(MipsD(), BoolToUInt32(GetConstantRegU32(cf.MipsS()) < GetConstantRegU32(cf.MipsT()))); } void CPU::NewRec::Compiler::Compile_mult_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); const u64 res = static_cast(static_cast(GetConstantRegS32(cf.MipsS())) * static_cast(GetConstantRegS32(cf.MipsT()))); SetConstantReg(Reg::hi, static_cast(res >> 32)); SetConstantReg(Reg::lo, static_cast(res)); } void CPU::NewRec::Compiler::Compile_multu_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); const u64 res = static_cast(GetConstantRegU32(cf.MipsS())) * static_cast(GetConstantRegU32(cf.MipsT())); SetConstantReg(Reg::hi, static_cast(res >> 32)); SetConstantReg(Reg::lo, static_cast(res)); } void CPU::NewRec::Compiler::MIPSSignedDivide(s32 num, s32 denom, u32* lo, u32* hi) { if (denom == 0) { // divide by zero *lo = (num >= 0) ? UINT32_C(0xFFFFFFFF) : UINT32_C(1); *hi = static_cast(num); } else if (static_cast(num) == UINT32_C(0x80000000) && denom == -1) { // unrepresentable *lo = UINT32_C(0x80000000); *hi = 0; } else { *lo = static_cast(num / denom); *hi = static_cast(num % denom); } } void CPU::NewRec::Compiler::MIPSUnsignedDivide(u32 num, u32 denom, u32* lo, u32* hi) { if (denom == 0) { // divide by zero *lo = UINT32_C(0xFFFFFFFF); *hi = static_cast(num); } else { *lo = num / denom; *hi = num % denom; } } void CPU::NewRec::Compiler::Compile_div_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); const s32 num = GetConstantRegS32(cf.MipsS()); const s32 denom = GetConstantRegS32(cf.MipsT()); u32 lo, hi; MIPSSignedDivide(num, denom, &lo, &hi); SetConstantReg(Reg::hi, hi); SetConstantReg(Reg::lo, lo); } void CPU::NewRec::Compiler::Compile_divu_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); const u32 num = GetConstantRegU32(cf.MipsS()); const u32 denom = GetConstantRegU32(cf.MipsT()); u32 lo, hi; MIPSUnsignedDivide(num, denom, &lo, &hi); SetConstantReg(Reg::hi, hi); SetConstantReg(Reg::lo, lo); } void CPU::NewRec::Compiler::Compile_add_const(CompileFlags cf) { // TODO: Overflow DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); if (MipsD() != Reg::zero) SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsS()) + GetConstantRegU32(cf.MipsT())); } void CPU::NewRec::Compiler::Compile_addu_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsS()) + GetConstantRegU32(cf.MipsT())); } void CPU::NewRec::Compiler::Compile_sub_const(CompileFlags cf) { // TODO: Overflow DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); if (MipsD() != Reg::zero) SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsS()) - GetConstantRegU32(cf.MipsT())); } void CPU::NewRec::Compiler::Compile_subu_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsS()) - GetConstantRegU32(cf.MipsT())); } void CPU::NewRec::Compiler::Compile_addi_const(CompileFlags cf) { // TODO: Overflow DebugAssert(HasConstantReg(cf.MipsS())); if (cf.MipsT() != Reg::zero) SetConstantReg(cf.MipsT(), GetConstantRegU32(cf.MipsS()) + inst->i.imm_sext32()); } void CPU::NewRec::Compiler::Compile_addiu_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsS())); SetConstantReg(cf.MipsT(), GetConstantRegU32(cf.MipsS()) + inst->i.imm_sext32()); } void CPU::NewRec::Compiler::Compile_slti_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsS())); SetConstantReg(cf.MipsT(), BoolToUInt32(GetConstantRegS32(cf.MipsS()) < static_cast(inst->i.imm_sext32()))); } void CPU::NewRec::Compiler::Compile_sltiu_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsS())); SetConstantReg(cf.MipsT(), GetConstantRegU32(cf.MipsS()) < inst->i.imm_sext32()); } void CPU::NewRec::Compiler::Compile_andi_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsS())); SetConstantReg(cf.MipsT(), GetConstantRegU32(cf.MipsS()) & inst->i.imm_zext32()); } void CPU::NewRec::Compiler::Compile_ori_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsS())); SetConstantReg(cf.MipsT(), GetConstantRegU32(cf.MipsS()) | inst->i.imm_zext32()); } void CPU::NewRec::Compiler::Compile_xori_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsS())); SetConstantReg(cf.MipsT(), GetConstantRegU32(cf.MipsS()) ^ inst->i.imm_zext32()); } void CPU::NewRec::Compiler::Compile_lui() { if (inst->i.rt == Reg::zero) return; SetConstantReg(inst->i.rt, inst->i.imm_zext32() << 16); if (g_settings.UsingPGXPCPUMode()) GeneratePGXPCallWithMIPSRegs(reinterpret_cast(&PGXP::CPU_LUI), inst->bits); } static constexpr const std::array, 16> s_cop0_table = { {{nullptr, 0x00000000u}, {nullptr, 0x00000000u}, {nullptr, 0x00000000u}, {&CPU::g_state.cop0_regs.BPC, 0xffffffffu}, {nullptr, 0}, {&CPU::g_state.cop0_regs.BDA, 0xffffffffu}, {&CPU::g_state.cop0_regs.TAR, 0x00000000u}, {&CPU::g_state.cop0_regs.dcic.bits, CPU::Cop0Registers::DCIC::WRITE_MASK}, {&CPU::g_state.cop0_regs.BadVaddr, 0x00000000u}, {&CPU::g_state.cop0_regs.BDAM, 0xffffffffu}, {nullptr, 0x00000000u}, {&CPU::g_state.cop0_regs.BPCM, 0xffffffffu}, {&CPU::g_state.cop0_regs.sr.bits, CPU::Cop0Registers::SR::WRITE_MASK}, {&CPU::g_state.cop0_regs.cause.bits, CPU::Cop0Registers::CAUSE::WRITE_MASK}, {&CPU::g_state.cop0_regs.EPC, 0x00000000u}, {&CPU::g_state.cop0_regs.PRID, 0x00000000u}}}; u32* CPU::NewRec::Compiler::GetCop0RegPtr(Cop0Reg reg) { return (static_cast(reg) < s_cop0_table.size()) ? s_cop0_table[static_cast(reg)].first : nullptr; } u32 CPU::NewRec::Compiler::GetCop0RegWriteMask(Cop0Reg reg) { return (static_cast(reg) < s_cop0_table.size()) ? s_cop0_table[static_cast(reg)].second : 0; } void CPU::NewRec::Compiler::Compile_mfc0(CompileFlags cf) { const Cop0Reg r = static_cast(MipsD()); const u32* ptr = GetCop0RegPtr(r); if (!ptr) { Log_ErrorPrintf("Read from unknown cop0 reg %u", static_cast(r)); Compile_Fallback(); return; } DebugAssert(cf.valid_host_t); LoadHostRegFromCPUPointer(cf.host_t, ptr); } std::pair CPU::NewRec::Compiler::GetGTERegisterPointer(u32 index, bool writing) { if (!writing) { // Most GTE registers can be read directly. Handle the special cases here. if (index == 15) // SXY3 { // mirror of SXY2 index = 14; } switch (index) { case 28: // IRGB case 29: // ORGB { return std::make_pair(&g_state.gte_regs.r32[index], GTERegisterAccessAction::CallHandler); } break; default: { return std::make_pair(&g_state.gte_regs.r32[index], GTERegisterAccessAction::Direct); } break; } } else { switch (index) { case 1: // V0[z] case 3: // V1[z] case 5: // V2[z] case 8: // IR0 case 9: // IR1 case 10: // IR2 case 11: // IR3 case 36: // RT33 case 44: // L33 case 52: // LR33 case 58: // H - sign-extended on read but zext on use case 59: // DQA case 61: // ZSF3 case 62: // ZSF4 { // sign-extend z component of vector registers return std::make_pair(&g_state.gte_regs.r32[index], GTERegisterAccessAction::SignExtend16); } break; case 7: // OTZ case 16: // SZ0 case 17: // SZ1 case 18: // SZ2 case 19: // SZ3 { // zero-extend unsigned values return std::make_pair(&g_state.gte_regs.r32[index], GTERegisterAccessAction::ZeroExtend16); } break; case 15: // SXY3 { // writing to SXYP pushes to the FIFO return std::make_pair(&g_state.gte_regs.r32[index], GTERegisterAccessAction::PushFIFO); } break; case 28: // IRGB case 30: // LZCS case 63: // FLAG { return std::make_pair(&g_state.gte_regs.r32[index], GTERegisterAccessAction::CallHandler); } case 29: // ORGB case 31: // LZCR { // read-only registers return std::make_pair(&g_state.gte_regs.r32[index], GTERegisterAccessAction::Ignore); } default: { // written as-is, 2x16 or 1x32 bits return std::make_pair(&g_state.gte_regs.r32[index], GTERegisterAccessAction::Direct); } } } } void CPU::NewRec::Compiler::AddGTETicks(TickCount ticks) { // TODO: check, int has +1 here m_gte_done_cycle = m_cycles + ticks; Log_DebugPrintf("Adding %d GTE ticks", ticks); } void CPU::NewRec::Compiler::StallUntilGTEComplete() { // TODO: hack to match old rec.. this may or may not be correct behavior // it's the difference between stalling before and after the current instruction's cycle DebugAssert(m_cycles > 0); m_cycles--; if (!m_dirty_gte_done_cycle) { // simple case - in block scheduling if (m_gte_done_cycle > m_cycles) { Log_DebugPrintf("Stalling for %d ticks from GTE", m_gte_done_cycle - m_cycles); m_cycles += (m_gte_done_cycle - m_cycles); } } else { // switch to in block scheduling Log_DebugPrintf("Flushing GTE stall from state"); Flush(FLUSH_GTE_STALL_FROM_STATE); } m_cycles++; } void CPU::NewRec::BackpatchLoadStore(void* exception_pc, const CodeCache::LoadstoreBackpatchInfo& info) { // remove the cycles we added for the memory read, then take them off again after the backpatch // the normal rec path will add the ram read ticks later, so we need to take them off at the end DebugAssert(!info.is_load || info.cycles >= Bus::RAM_READ_TICKS); const TickCount cycles_to_add = static_cast(static_cast(info.cycles)) - (info.is_load ? Bus::RAM_READ_TICKS : 0); const TickCount cycles_to_remove = static_cast(static_cast(info.cycles)); JitCodeBuffer& buffer = CodeCache::GetCodeBuffer(); void* thunk_address = buffer.GetFreeFarCodePointer(); const u32 thunk_size = CompileLoadStoreThunk( thunk_address, buffer.GetFreeFarCodeSpace(), exception_pc, info.code_size, cycles_to_add, cycles_to_remove, info.gpr_bitmask, info.address_register, info.data_register, info.AccessSize(), info.is_signed, info.is_load); #if 0 Log_DebugPrintf("**Backpatch Thunk**"); CodeCache::DisassembleAndLogHostCode(thunk_address, thunk_size); #endif // backpatch to a jump to the slowmem handler CodeCache::EmitJump(exception_pc, thunk_address, true); buffer.CommitFarCode(thunk_size); } void CPU::NewRec::Compiler::InitSpeculativeRegs() { for (u8 i = 0; i < static_cast(Reg::count); i++) m_speculative_constants.regs[i] = g_state.regs.r[i]; m_speculative_constants.cop0_sr = g_state.cop0_regs.sr.bits; m_speculative_constants.memory.clear(); } void CPU::NewRec::Compiler::InvalidateSpeculativeValues() { m_speculative_constants.regs.fill(std::nullopt); m_speculative_constants.memory.clear(); m_speculative_constants.cop0_sr.reset(); } CPU::NewRec::Compiler::SpecValue CPU::NewRec::Compiler::SpecReadReg(Reg reg) { return m_speculative_constants.regs[static_cast(reg)]; } void CPU::NewRec::Compiler::SpecWriteReg(Reg reg, SpecValue value) { if (reg == Reg::zero) return; m_speculative_constants.regs[static_cast(reg)] = value; } void CPU::NewRec::Compiler::SpecInvalidateReg(Reg reg) { if (reg == Reg::zero) return; m_speculative_constants.regs[static_cast(reg)].reset(); } void CPU::NewRec::Compiler::SpecCopyReg(Reg dst, Reg src) { if (dst == Reg::zero) return; m_speculative_constants.regs[static_cast(dst)] = m_speculative_constants.regs[static_cast(src)]; } CPU::NewRec::Compiler::SpecValue CPU::NewRec::Compiler::SpecReadMem(VirtualMemoryAddress address) { auto it = m_speculative_constants.memory.find(address); if (it != m_speculative_constants.memory.end()) return it->second; u32 value; if ((address & SCRATCHPAD_ADDR_MASK) == SCRATCHPAD_ADDR) { u32 scratchpad_offset = address & SCRATCHPAD_OFFSET_MASK; std::memcpy(&value, &CPU::g_state.scratchpad[scratchpad_offset], sizeof(value)); return value; } const PhysicalMemoryAddress phys_addr = address & PHYSICAL_MEMORY_ADDRESS_MASK; if (Bus::IsRAMAddress(phys_addr)) { u32 ram_offset = phys_addr & Bus::g_ram_mask; std::memcpy(&value, &Bus::g_ram[ram_offset], sizeof(value)); return value; } return std::nullopt; } void CPU::NewRec::Compiler::SpecWriteMem(u32 address, SpecValue value) { auto it = m_speculative_constants.memory.find(address); if (it != m_speculative_constants.memory.end()) { it->second = value; return; } const PhysicalMemoryAddress phys_addr = address & PHYSICAL_MEMORY_ADDRESS_MASK; if ((address & SCRATCHPAD_ADDR_MASK) == SCRATCHPAD_ADDR || Bus::IsRAMAddress(phys_addr)) m_speculative_constants.memory.emplace(address, value); } void CPU::NewRec::Compiler::SpecInvalidateMem(VirtualMemoryAddress address) { SpecWriteMem(address, std::nullopt); } bool CPU::NewRec::Compiler::SpecIsCacheIsolated() { if (!m_speculative_constants.cop0_sr.has_value()) return false; const Cop0Registers::SR sr{m_speculative_constants.cop0_sr.value()}; return sr.Isc; } void CPU::NewRec::Compiler::SpecExec_b() { const bool link = (static_cast(inst->i.rt.GetValue()) & u8(0x1E)) == u8(0x10); if (link) SpecWriteReg(Reg::ra, m_compiler_pc); } void CPU::NewRec::Compiler::SpecExec_jal() { SpecWriteReg(Reg::ra, m_compiler_pc); } void CPU::NewRec::Compiler::SpecExec_jalr() { SpecWriteReg(inst->r.rd, m_compiler_pc); } void CPU::NewRec::Compiler::SpecExec_sll() { const SpecValue rt = SpecReadReg(inst->r.rt); if (rt.has_value()) SpecWriteReg(inst->r.rd, rt.value() << inst->r.shamt); else SpecInvalidateReg(inst->r.rd); } void CPU::NewRec::Compiler::SpecExec_srl() { const SpecValue rt = SpecReadReg(inst->r.rt); if (rt.has_value()) SpecWriteReg(inst->r.rd, rt.value() >> inst->r.shamt); else SpecInvalidateReg(inst->r.rd); } void CPU::NewRec::Compiler::SpecExec_sra() { const SpecValue rt = SpecReadReg(inst->r.rt); if (rt.has_value()) SpecWriteReg(inst->r.rd, static_cast(static_cast(rt.value()) >> inst->r.shamt)); else SpecInvalidateReg(inst->r.rd); } void CPU::NewRec::Compiler::SpecExec_sllv() { const SpecValue rs = SpecReadReg(inst->r.rs); const SpecValue rt = SpecReadReg(inst->r.rt); if (rs.has_value() && rt.has_value()) SpecWriteReg(inst->r.rd, rt.value() << (rs.value() & 0x1F)); else SpecInvalidateReg(inst->r.rd); } void CPU::NewRec::Compiler::SpecExec_srlv() { const SpecValue rs = SpecReadReg(inst->r.rs); const SpecValue rt = SpecReadReg(inst->r.rt); if (rs.has_value() && rt.has_value()) SpecWriteReg(inst->r.rd, rt.value() >> (rs.value() & 0x1F)); else SpecInvalidateReg(inst->r.rd); } void CPU::NewRec::Compiler::SpecExec_srav() { const SpecValue rs = SpecReadReg(inst->r.rs); const SpecValue rt = SpecReadReg(inst->r.rt); if (rs.has_value() && rt.has_value()) SpecWriteReg(inst->r.rd, static_cast(static_cast(rt.value()) >> (rs.value() & 0x1F))); else SpecInvalidateReg(inst->r.rd); } void CPU::NewRec::Compiler::SpecExec_mult() { const SpecValue rs = SpecReadReg(inst->r.rs); const SpecValue rt = SpecReadReg(inst->r.rt); if (rs.has_value() && rt.has_value()) { const u64 result = static_cast(static_cast(SignExtend64(rs.value())) * static_cast(SignExtend64(rt.value()))); SpecWriteReg(Reg::hi, Truncate32(result >> 32)); SpecWriteReg(Reg::lo, Truncate32(result)); } else { SpecInvalidateReg(Reg::hi); SpecInvalidateReg(Reg::lo); } } void CPU::NewRec::Compiler::SpecExec_multu() { const SpecValue rs = SpecReadReg(inst->r.rs); const SpecValue rt = SpecReadReg(inst->r.rt); if (rs.has_value() && rt.has_value()) { const u64 result = ZeroExtend64(rs.value()) * SignExtend64(rt.value()); SpecWriteReg(Reg::hi, Truncate32(result >> 32)); SpecWriteReg(Reg::lo, Truncate32(result)); } else { SpecInvalidateReg(Reg::hi); SpecInvalidateReg(Reg::lo); } } void CPU::NewRec::Compiler::SpecExec_div() { const SpecValue rs = SpecReadReg(inst->r.rs); const SpecValue rt = SpecReadReg(inst->r.rt); if (rs.has_value() && rt.has_value()) { u32 lo, hi; MIPSSignedDivide(static_cast(rs.value()), static_cast(rt.value()), &lo, &hi); SpecWriteReg(Reg::hi, hi); SpecWriteReg(Reg::lo, lo); } else { SpecInvalidateReg(Reg::hi); SpecInvalidateReg(Reg::lo); } } void CPU::NewRec::Compiler::SpecExec_divu() { const SpecValue rs = SpecReadReg(inst->r.rs); const SpecValue rt = SpecReadReg(inst->r.rt); if (rs.has_value() && rt.has_value()) { u32 lo, hi; MIPSUnsignedDivide(rs.value(), rt.value(), &lo, &hi); SpecWriteReg(Reg::hi, hi); SpecWriteReg(Reg::lo, lo); } else { SpecInvalidateReg(Reg::hi); SpecInvalidateReg(Reg::lo); } } void CPU::NewRec::Compiler::SpecExec_add() { SpecExec_addu(); } void CPU::NewRec::Compiler::SpecExec_addu() { const SpecValue rs = SpecReadReg(inst->r.rs); const SpecValue rt = SpecReadReg(inst->r.rt); if (rs.has_value() && rt.has_value()) SpecWriteReg(inst->r.rd, rs.value() + rt.value()); else SpecInvalidateReg(inst->r.rd); } void CPU::NewRec::Compiler::SpecExec_sub() { SpecExec_subu(); } void CPU::NewRec::Compiler::SpecExec_subu() { const SpecValue rs = SpecReadReg(inst->r.rs); const SpecValue rt = SpecReadReg(inst->r.rt); if (rs.has_value() && rt.has_value()) SpecWriteReg(inst->r.rd, rs.value() - rt.value()); else SpecInvalidateReg(inst->r.rd); } void CPU::NewRec::Compiler::SpecExec_and() { const SpecValue rs = SpecReadReg(inst->r.rs); const SpecValue rt = SpecReadReg(inst->r.rt); if (rs.has_value() && rt.has_value()) SpecWriteReg(inst->r.rd, rs.value() & rt.value()); else SpecInvalidateReg(inst->r.rd); } void CPU::NewRec::Compiler::SpecExec_or() { const SpecValue rs = SpecReadReg(inst->r.rs); const SpecValue rt = SpecReadReg(inst->r.rt); if (rs.has_value() && rt.has_value()) SpecWriteReg(inst->r.rd, rs.value() | rt.value()); else SpecInvalidateReg(inst->r.rd); } void CPU::NewRec::Compiler::SpecExec_xor() { const SpecValue rs = SpecReadReg(inst->r.rs); const SpecValue rt = SpecReadReg(inst->r.rt); if (rs.has_value() && rt.has_value()) SpecWriteReg(inst->r.rd, rs.value() ^ rt.value()); else SpecInvalidateReg(inst->r.rd); } void CPU::NewRec::Compiler::SpecExec_nor() { const SpecValue rs = SpecReadReg(inst->r.rs); const SpecValue rt = SpecReadReg(inst->r.rt); if (rs.has_value() && rt.has_value()) SpecWriteReg(inst->r.rd, ~(rs.value() | rt.value())); else SpecInvalidateReg(inst->r.rd); } void CPU::NewRec::Compiler::SpecExec_slt() { const SpecValue rs = SpecReadReg(inst->r.rs); const SpecValue rt = SpecReadReg(inst->r.rt); if (rs.has_value() && rt.has_value()) SpecWriteReg(inst->r.rd, BoolToUInt32(static_cast(rs.value()) < static_cast(rt.value()))); else SpecInvalidateReg(inst->r.rd); } void CPU::NewRec::Compiler::SpecExec_sltu() { const SpecValue rs = SpecReadReg(inst->r.rs); const SpecValue rt = SpecReadReg(inst->r.rt); if (rs.has_value() && rt.has_value()) SpecWriteReg(inst->r.rd, BoolToUInt32(rs.value() < rt.value())); else SpecInvalidateReg(inst->r.rd); } void CPU::NewRec::Compiler::SpecExec_addi() { SpecExec_addiu(); } void CPU::NewRec::Compiler::SpecExec_addiu() { const SpecValue rs = SpecReadReg(inst->i.rs); if (rs.has_value()) SpecWriteReg(inst->i.rt, rs.value() + inst->i.imm_sext32()); else SpecInvalidateReg(inst->i.rt); } void CPU::NewRec::Compiler::SpecExec_slti() { const SpecValue rs = SpecReadReg(inst->i.rs); if (rs.has_value()) SpecWriteReg(inst->i.rt, BoolToUInt32(static_cast(rs.value()) < static_cast(inst->i.imm_sext32()))); else SpecInvalidateReg(inst->i.rt); } void CPU::NewRec::Compiler::SpecExec_sltiu() { const SpecValue rs = SpecReadReg(inst->i.rs); if (rs.has_value()) SpecWriteReg(inst->i.rt, BoolToUInt32(rs.value() < inst->i.imm_sext32())); else SpecInvalidateReg(inst->i.rt); } void CPU::NewRec::Compiler::SpecExec_andi() { const SpecValue rs = SpecReadReg(inst->i.rs); if (rs.has_value()) SpecWriteReg(inst->i.rt, rs.value() & inst->i.imm_zext32()); else SpecInvalidateReg(inst->i.rt); } void CPU::NewRec::Compiler::SpecExec_ori() { const SpecValue rs = SpecReadReg(inst->i.rs); if (rs.has_value()) SpecWriteReg(inst->i.rt, rs.value() | inst->i.imm_zext32()); else SpecInvalidateReg(inst->i.rt); } void CPU::NewRec::Compiler::SpecExec_xori() { const SpecValue rs = SpecReadReg(inst->i.rs); if (rs.has_value()) SpecWriteReg(inst->i.rt, rs.value() ^ inst->i.imm_zext32()); else SpecInvalidateReg(inst->i.rt); } void CPU::NewRec::Compiler::SpecExec_lui() { SpecWriteReg(inst->i.rt, inst->i.imm_zext32() << 16); } CPU::NewRec::Compiler::SpecValue CPU::NewRec::Compiler::SpecExec_LoadStoreAddr() { const SpecValue rs = SpecReadReg(inst->i.rs); return rs.has_value() ? (rs.value() + inst->i.imm_sext32()) : rs; } void CPU::NewRec::Compiler::SpecExec_lxx(MemoryAccessSize size, bool sign) { const SpecValue addr = SpecExec_LoadStoreAddr(); SpecValue val; if (!addr.has_value() || !(val = SpecReadMem(addr.value())).has_value()) { SpecInvalidateReg(inst->i.rt); return; } switch (size) { case MemoryAccessSize::Byte: val = sign ? SignExtend32(static_cast(val.value())) : ZeroExtend32(static_cast(val.value())); break; case MemoryAccessSize::HalfWord: val = sign ? SignExtend32(static_cast(val.value())) : ZeroExtend32(static_cast(val.value())); break; case MemoryAccessSize::Word: break; default: UnreachableCode(); } SpecWriteReg(inst->r.rt, val); } void CPU::NewRec::Compiler::SpecExec_lwx(bool lwr) { // TODO SpecInvalidateReg(inst->i.rt); } void CPU::NewRec::Compiler::SpecExec_sxx(MemoryAccessSize size) { const SpecValue addr = SpecExec_LoadStoreAddr(); if (!addr.has_value()) return; SpecValue rt = SpecReadReg(inst->i.rt); if (rt.has_value()) { switch (size) { case MemoryAccessSize::Byte: rt = ZeroExtend32(static_cast(rt.value())); break; case MemoryAccessSize::HalfWord: rt = ZeroExtend32(static_cast(rt.value())); break; case MemoryAccessSize::Word: break; default: UnreachableCode(); } } SpecWriteMem(addr.value(), rt); } void CPU::NewRec::Compiler::SpecExec_swx(bool swr) { const SpecValue addr = SpecExec_LoadStoreAddr(); if (addr.has_value()) SpecInvalidateMem(addr.value() & ~3u); } void CPU::NewRec::Compiler::SpecExec_swc2() { const SpecValue addr = SpecExec_LoadStoreAddr(); if (addr.has_value()) SpecInvalidateMem(addr.value()); } void CPU::NewRec::Compiler::SpecExec_mfc0() { const Cop0Reg rd = static_cast(inst->r.rd.GetValue()); if (rd != Cop0Reg::SR) { SpecInvalidateReg(inst->r.rt); return; } SpecWriteReg(inst->r.rt, m_speculative_constants.cop0_sr); } void CPU::NewRec::Compiler::SpecExec_mtc0() { const Cop0Reg rd = static_cast(inst->r.rd.GetValue()); if (rd != Cop0Reg::SR || !m_speculative_constants.cop0_sr.has_value()) return; SpecValue val = SpecReadReg(inst->r.rt); if (val.has_value()) { constexpr u32 mask = Cop0Registers::SR::WRITE_MASK; val = (m_speculative_constants.cop0_sr.value() & mask) | (val.value() & mask); } m_speculative_constants.cop0_sr = val; } void CPU::NewRec::Compiler::SpecExec_rfe() { if (!m_speculative_constants.cop0_sr.has_value()) return; const u32 val = m_speculative_constants.cop0_sr.value(); m_speculative_constants.cop0_sr = (val & UINT32_C(0b110000)) | ((val & UINT32_C(0b111111)) >> 2); }