diff --git a/src/core/bus.cpp b/src/core/bus.cpp index 52fde0a77..425303b28 100644 --- a/src/core/bus.cpp +++ b/src/core/bus.cpp @@ -332,6 +332,12 @@ void UpdateFastmemViews(bool enabled, bool isolate_cache) //MapRAM(0xA0600000); } +bool CanUseFastmemForAddress(VirtualMemoryAddress address) +{ + const PhysicalMemoryAddress paddr = address & CPU::PHYSICAL_MEMORY_ADDRESS_MASK; + return IsRAMAddress(paddr); +} + bool IsRAMCodePage(u32 index) { return m_ram_code_bits[index]; diff --git a/src/core/bus.h b/src/core/bus.h index 9b8056f9a..4be96a83b 100644 --- a/src/core/bus.h +++ b/src/core/bus.h @@ -90,6 +90,7 @@ bool DoState(StateWrapper& sw); u8* GetFastmemBase(); void UpdateFastmemViews(bool enabled, bool isolate_cache); +bool CanUseFastmemForAddress(VirtualMemoryAddress address); void SetExpansionROM(std::vector data); void SetBIOS(const std::vector& image); diff --git a/src/core/cpu_recompiler_code_generator.cpp b/src/core/cpu_recompiler_code_generator.cpp index 82e6bcbfa..340faf6da 100644 --- a/src/core/cpu_recompiler_code_generator.cpp +++ b/src/core/cpu_recompiler_code_generator.cpp @@ -850,6 +850,8 @@ void CodeGenerator::GenerateExceptionExit(const CodeBlockInstruction& cbi, Excep void CodeGenerator::BlockPrologue() { + InitSpeculativeRegs(); + EmitStoreCPUStructField(offsetof(State, exception_raised), Value::FromConstantU8(0)); if (m_block->uncached_fetch_ticks > 0) @@ -1042,6 +1044,7 @@ bool CodeGenerator::Compile_Fallback(const CodeBlockInstruction& cbi) m_current_instruction_in_branch_delay_slot_dirty = cbi.is_branch_instruction; m_branch_was_taken_dirty = cbi.is_branch_instruction; m_next_load_delay_dirty = cbi.has_load_delay; + InvalidateSpeculativeValues(); InstructionEpilogue(cbi); return true; } @@ -1054,54 +1057,92 @@ bool CodeGenerator::Compile_Bitwise(const CodeBlockInstruction& cbi) Value lhs; Value rhs; Reg dest; + + SpeculativeValue spec_lhs, spec_rhs; + SpeculativeValue spec_value; + if (op != InstructionOp::funct) { // rt <- rs op zext(imm) lhs = m_register_cache.ReadGuestRegister(cbi.instruction.i.rs); rhs = Value::FromConstantU32(cbi.instruction.i.imm_zext32()); dest = cbi.instruction.i.rt; + + spec_lhs = SpeculativeReadReg(cbi.instruction.i.rs); + spec_rhs = cbi.instruction.i.imm_zext32(); } else { lhs = m_register_cache.ReadGuestRegister(cbi.instruction.r.rs); rhs = m_register_cache.ReadGuestRegister(cbi.instruction.r.rt); dest = cbi.instruction.r.rd; + + spec_lhs = SpeculativeReadReg(cbi.instruction.r.rs); + spec_rhs = SpeculativeReadReg(cbi.instruction.r.rt); } Value result; switch (cbi.instruction.op) { case InstructionOp::ori: + { result = OrValues(lhs, rhs); - break; + if (spec_lhs && spec_rhs) + spec_value = *spec_lhs | *spec_rhs; + } + break; case InstructionOp::andi: + { result = AndValues(lhs, rhs); - break; + if (spec_lhs && spec_rhs) + spec_value = *spec_lhs & *spec_rhs; + } + break; case InstructionOp::xori: + { result = XorValues(lhs, rhs); - break; + if (spec_lhs && spec_rhs) + spec_value = *spec_lhs ^ *spec_rhs; + } + break; case InstructionOp::funct: { switch (cbi.instruction.r.funct) { case InstructionFunct::or_: + { result = OrValues(lhs, rhs); - break; + if (spec_lhs && spec_rhs) + spec_value = *spec_lhs | *spec_rhs; + } + break; case InstructionFunct::and_: + { result = AndValues(lhs, rhs); - break; + if (spec_lhs && spec_rhs) + spec_value = *spec_lhs & *spec_rhs; + } + break; case InstructionFunct::xor_: + { result = XorValues(lhs, rhs); - break; + if (spec_lhs && spec_rhs) + spec_value = *spec_lhs ^ *spec_rhs; + } + break; case InstructionFunct::nor: + { result = NotValue(OrValues(lhs, rhs)); - break; + if (spec_lhs && spec_rhs) + spec_value = ~(*spec_lhs | *spec_rhs); + } + break; default: UnreachableCode(); @@ -1116,6 +1157,7 @@ bool CodeGenerator::Compile_Bitwise(const CodeBlockInstruction& cbi) } m_register_cache.WriteGuestRegister(dest, std::move(result)); + SpeculativeWriteReg(dest, spec_value); InstructionEpilogue(cbi); return true; @@ -1127,37 +1169,54 @@ bool CodeGenerator::Compile_Shift(const CodeBlockInstruction& cbi) const InstructionFunct funct = cbi.instruction.r.funct; Value rt = m_register_cache.ReadGuestRegister(cbi.instruction.r.rt); + SpeculativeValue rt_spec = SpeculativeReadReg(cbi.instruction.r.rt); Value shamt; + SpeculativeValue shamt_spec; if (funct == InstructionFunct::sll || funct == InstructionFunct::srl || funct == InstructionFunct::sra) { // rd <- rt op shamt shamt = Value::FromConstantU32(cbi.instruction.r.shamt); + shamt_spec = cbi.instruction.r.shamt; } else { // rd <- rt op (rs & 0x1F) shamt = m_register_cache.ReadGuestRegister(cbi.instruction.r.rs); + shamt_spec = SpeculativeReadReg(cbi.instruction.r.rs); if constexpr (!SHIFTS_ARE_IMPLICITLY_MASKED) EmitAnd(shamt.host_reg, shamt.host_reg, Value::FromConstantU32(0x1F)); } Value result; + SpeculativeValue result_spec; switch (cbi.instruction.r.funct) { case InstructionFunct::sll: case InstructionFunct::sllv: + { result = ShlValues(rt, shamt); - break; + if (rt_spec && shamt_spec) + result_spec = *rt_spec << *shamt_spec; + } + break; case InstructionFunct::srl: case InstructionFunct::srlv: + { result = ShrValues(rt, shamt); - break; + if (rt_spec && shamt_spec) + result_spec = *rt_spec >> *shamt_spec; + } + break; case InstructionFunct::sra: case InstructionFunct::srav: + { result = SarValues(rt, shamt); - break; + if (rt_spec && shamt_spec) + result_spec = static_cast(static_cast(*rt_spec) << *shamt_spec); + } + break; default: UnreachableCode(); @@ -1165,6 +1224,7 @@ bool CodeGenerator::Compile_Shift(const CodeBlockInstruction& cbi) } m_register_cache.WriteGuestRegister(cbi.instruction.r.rd, std::move(result)); + SpeculativeWriteReg(cbi.instruction.r.rd, result_spec); InstructionEpilogue(cbi); return true; @@ -1179,35 +1239,57 @@ bool CodeGenerator::Compile_Load(const CodeBlockInstruction& cbi) Value offset = Value::FromConstantU32(cbi.instruction.i.imm_sext32()); Value address = AddValues(base, offset, false); + SpeculativeValue address_spec = SpeculativeReadReg(cbi.instruction.i.rs); + SpeculativeValue value_spec; + if (address_spec) + address_spec = *address_spec + cbi.instruction.i.imm_sext32(); + Value result; switch (cbi.instruction.op) { case InstructionOp::lb: case InstructionOp::lbu: { - result = EmitLoadGuestMemory(cbi, address, RegSize_8); + result = EmitLoadGuestMemory(cbi, address, address_spec, RegSize_8); ConvertValueSizeInPlace(&result, RegSize_32, (cbi.instruction.op == InstructionOp::lb)); if (g_settings.gpu_pgxp_enable) EmitFunctionCall(nullptr, PGXP::CPU_LBx, Value::FromConstantU32(cbi.instruction.bits), result, address); + + if (address_spec) + { + value_spec = SpeculativeReadMemory(*address_spec & ~3u); + if (value_spec) + value_spec = (*value_spec >> ((*address_spec & 3u) * 8u)) & 0xFFu; + } } break; case InstructionOp::lh: case InstructionOp::lhu: { - result = EmitLoadGuestMemory(cbi, address, RegSize_16); + result = EmitLoadGuestMemory(cbi, address, address_spec, RegSize_16); ConvertValueSizeInPlace(&result, RegSize_32, (cbi.instruction.op == InstructionOp::lh)); if (g_settings.gpu_pgxp_enable) EmitFunctionCall(nullptr, PGXP::CPU_LHx, Value::FromConstantU32(cbi.instruction.bits), result, address); + + if (address_spec) + { + value_spec = SpeculativeReadMemory(*address_spec & ~1u); + if (value_spec) + value_spec = (*value_spec >> ((*address_spec & 1u) * 16u)) & 0xFFFFu; + } } break; case InstructionOp::lw: { - result = EmitLoadGuestMemory(cbi, address, RegSize_32); + result = EmitLoadGuestMemory(cbi, address, address_spec, RegSize_32); if (g_settings.gpu_pgxp_enable) EmitFunctionCall(nullptr, PGXP::CPU_LW, Value::FromConstantU32(cbi.instruction.bits), result, address); + + if (address_spec) + value_spec = SpeculativeReadMemory(*address_spec); } break; @@ -1217,6 +1299,7 @@ bool CodeGenerator::Compile_Load(const CodeBlockInstruction& cbi) } m_register_cache.WriteGuestRegisterDelayed(cbi.instruction.i.rt, std::move(result)); + SpeculativeWriteReg(cbi.instruction.i.rt, value_spec); InstructionEpilogue(cbi); return true; @@ -1232,35 +1315,81 @@ bool CodeGenerator::Compile_Store(const CodeBlockInstruction& cbi) Value address = AddValues(base, offset, false); Value value = m_register_cache.ReadGuestRegister(cbi.instruction.i.rt); + SpeculativeValue address_spec = SpeculativeReadReg(cbi.instruction.i.rs); + SpeculativeValue value_spec = SpeculativeReadReg(cbi.instruction.i.rt); + if (address_spec) + address_spec = *address_spec + cbi.instruction.i.imm_sext32(); + switch (cbi.instruction.op) { case InstructionOp::sb: { - EmitStoreGuestMemory(cbi, address, value.ViewAsSize(RegSize_8)); + EmitStoreGuestMemory(cbi, address, address_spec, value.ViewAsSize(RegSize_8)); if (g_settings.gpu_pgxp_enable) { EmitFunctionCall(nullptr, PGXP::CPU_SB, Value::FromConstantU32(cbi.instruction.bits), value.ViewAsSize(RegSize_8), address); } + + if (address_spec) + { + const VirtualMemoryAddress aligned_addr = (*address_spec & ~3u); + const SpeculativeValue aligned_existing_value = SpeculativeReadMemory(aligned_addr); + if (aligned_existing_value) + { + if (value_spec) + { + const u32 shift = (aligned_addr & 3u) * 8u; + SpeculativeWriteMemory(aligned_addr, + (*aligned_existing_value & ~(0xFFu << shift)) | ((*value_spec & 0xFFu) << shift)); + } + else + { + SpeculativeWriteMemory(aligned_addr, std::nullopt); + } + } + } } break; case InstructionOp::sh: { - EmitStoreGuestMemory(cbi, address, value.ViewAsSize(RegSize_16)); + EmitStoreGuestMemory(cbi, address, address_spec, value.ViewAsSize(RegSize_16)); if (g_settings.gpu_pgxp_enable) { EmitFunctionCall(nullptr, PGXP::CPU_SH, Value::FromConstantU32(cbi.instruction.bits), value.ViewAsSize(RegSize_16), address); } + + if (address_spec) + { + const VirtualMemoryAddress aligned_addr = (*address_spec & ~3u); + const SpeculativeValue aligned_existing_value = SpeculativeReadMemory(aligned_addr); + if (aligned_existing_value) + { + if (value_spec) + { + const u32 shift = (aligned_addr & 1u) * 16u; + SpeculativeWriteMemory(aligned_addr, (*aligned_existing_value & ~(0xFFFFu << shift)) | + ((*value_spec & 0xFFFFu) << shift)); + } + else + { + SpeculativeWriteMemory(aligned_addr, std::nullopt); + } + } + } } break; case InstructionOp::sw: { - EmitStoreGuestMemory(cbi, address, value); + EmitStoreGuestMemory(cbi, address, address_spec, value); if (g_settings.gpu_pgxp_enable) EmitFunctionCall(nullptr, PGXP::CPU_SW, Value::FromConstantU32(cbi.instruction.bits), value, address); + + if (address_spec) + SpeculativeWriteMemory(*address_spec, value_spec); } break; @@ -1282,10 +1411,14 @@ bool CodeGenerator::Compile_LoadLeftRight(const CodeBlockInstruction& cbi) Value address = AddValues(base, offset, false); base.ReleaseAndClear(); + SpeculativeValue address_spec = SpeculativeReadReg(cbi.instruction.i.rs); + if (address_spec) + address_spec = *address_spec + cbi.instruction.i.imm_sext32(); + Value shift = ShlValues(AndValues(address, Value::FromConstantU32(3)), Value::FromConstantU32(3)); // * 8 address = AndValues(address, Value::FromConstantU32(~u32(3))); - Value mem = EmitLoadGuestMemory(cbi, address, RegSize_32); + Value mem = EmitLoadGuestMemory(cbi, address, address_spec, RegSize_32); // hack to bypass load delays Value value; @@ -1323,6 +1456,9 @@ bool CodeGenerator::Compile_LoadLeftRight(const CodeBlockInstruction& cbi) m_register_cache.WriteGuestRegisterDelayed(cbi.instruction.i.rt, std::move(mem)); + // TODO: Speculative values + SpeculativeWriteReg(cbi.instruction.r.rt, std::nullopt); + InstructionEpilogue(cbi); return true; } @@ -1336,10 +1472,18 @@ bool CodeGenerator::Compile_StoreLeftRight(const CodeBlockInstruction& cbi) Value address = AddValues(base, offset, false); base.ReleaseAndClear(); + // TODO: Speculative values + SpeculativeValue address_spec = SpeculativeReadReg(cbi.instruction.i.rs); + if (address_spec) + { + address_spec = *address_spec + cbi.instruction.i.imm_sext32(); + SpeculativeWriteMemory(*address_spec & ~3u, std::nullopt); + } + Value shift = ShlValues(AndValues(address, Value::FromConstantU32(3)), Value::FromConstantU32(3)); // * 8 address = AndValues(address, Value::FromConstantU32(~u32(3))); - Value mem = EmitLoadGuestMemory(cbi, address, RegSize_32); + Value mem = EmitLoadGuestMemory(cbi, address, address_spec, RegSize_32); Value reg = m_register_cache.ReadGuestRegister(cbi.instruction.r.rt); @@ -1359,7 +1503,7 @@ bool CodeGenerator::Compile_StoreLeftRight(const CodeBlockInstruction& cbi) shift.ReleaseAndClear(); - EmitStoreGuestMemory(cbi, address, mem); + EmitStoreGuestMemory(cbi, address, address_spec, mem); if (g_settings.gpu_pgxp_enable) EmitFunctionCall(nullptr, PGXP::CPU_SW, Value::FromConstantU32(cbi.instruction.bits), mem, address); @@ -1375,6 +1519,7 @@ bool CodeGenerator::Compile_MoveHiLo(const CodeBlockInstruction& cbi) { case InstructionFunct::mfhi: m_register_cache.WriteGuestRegister(cbi.instruction.r.rd, m_register_cache.ReadGuestRegister(Reg::hi)); + SpeculativeWriteReg(cbi.instruction.r.rd, std::nullopt); break; case InstructionFunct::mthi: @@ -1383,6 +1528,7 @@ bool CodeGenerator::Compile_MoveHiLo(const CodeBlockInstruction& cbi) case InstructionFunct::mflo: m_register_cache.WriteGuestRegister(cbi.instruction.r.rd, m_register_cache.ReadGuestRegister(Reg::lo)); + SpeculativeWriteReg(cbi.instruction.r.rd, std::nullopt); break; case InstructionFunct::mtlo: @@ -1408,7 +1554,9 @@ bool CodeGenerator::Compile_Add(const CodeBlockInstruction& cbi) Value lhs, rhs; Reg lhs_src; + SpeculativeValue lhs_spec, rhs_spec; Reg dest; + switch (cbi.instruction.op) { case InstructionOp::addi: @@ -1419,6 +1567,9 @@ bool CodeGenerator::Compile_Add(const CodeBlockInstruction& cbi) lhs_src = cbi.instruction.i.rs; lhs = m_register_cache.ReadGuestRegister(cbi.instruction.i.rs); rhs = Value::FromConstantU32(cbi.instruction.i.imm_sext32()); + + lhs_spec = SpeculativeReadReg(cbi.instruction.i.rs); + rhs_spec = cbi.instruction.i.imm_sext32(); } break; @@ -1429,6 +1580,8 @@ bool CodeGenerator::Compile_Add(const CodeBlockInstruction& cbi) lhs_src = cbi.instruction.r.rs; lhs = m_register_cache.ReadGuestRegister(cbi.instruction.r.rs); rhs = m_register_cache.ReadGuestRegister(cbi.instruction.r.rt); + lhs_spec = SpeculativeReadReg(cbi.instruction.r.rs); + rhs_spec = SpeculativeReadReg(cbi.instruction.r.rt); } break; @@ -1450,6 +1603,11 @@ bool CodeGenerator::Compile_Add(const CodeBlockInstruction& cbi) m_register_cache.WriteGuestRegister(dest, std::move(result)); + SpeculativeValue value_spec; + if (lhs_spec && rhs_spec) + value_spec = *lhs_spec + *rhs_spec; + SpeculativeWriteReg(dest, value_spec); + InstructionEpilogue(cbi); return true; } @@ -1464,12 +1622,20 @@ bool CodeGenerator::Compile_Subtract(const CodeBlockInstruction& cbi) Value lhs = m_register_cache.ReadGuestRegister(cbi.instruction.r.rs); Value rhs = m_register_cache.ReadGuestRegister(cbi.instruction.r.rt); + SpeculativeValue lhs_spec = SpeculativeReadReg(cbi.instruction.r.rs); + SpeculativeValue rhs_spec = SpeculativeReadReg(cbi.instruction.r.rt); + Value result = SubValues(lhs, rhs, check_overflow); if (check_overflow) GenerateExceptionExit(cbi, Exception::Ov, Condition::Overflow); m_register_cache.WriteGuestRegister(cbi.instruction.r.rd, std::move(result)); + SpeculativeValue value_spec; + if (lhs_spec && rhs_spec) + value_spec = *lhs_spec - *rhs_spec; + SpeculativeWriteReg(cbi.instruction.r.rd, value_spec); + InstructionEpilogue(cbi); return true; } @@ -1680,12 +1846,15 @@ bool CodeGenerator::Compile_SetLess(const CodeBlockInstruction& cbi) Reg dest; Value lhs, rhs; + SpeculativeValue lhs_spec, rhs_spec; if (cbi.instruction.op == InstructionOp::slti || cbi.instruction.op == InstructionOp::sltiu) { // rt <- rs < {z,s}ext(imm) dest = cbi.instruction.i.rt; lhs = m_register_cache.ReadGuestRegister(cbi.instruction.i.rs, true, true); rhs = Value::FromConstantU32(cbi.instruction.i.imm_sext32()); + lhs_spec = SpeculativeReadReg(cbi.instruction.i.rs); + rhs_spec = cbi.instruction.i.imm_sext32(); // flush the old value which might free up a register if (dest != cbi.instruction.r.rs) @@ -1697,6 +1866,8 @@ bool CodeGenerator::Compile_SetLess(const CodeBlockInstruction& cbi) dest = cbi.instruction.r.rd; lhs = m_register_cache.ReadGuestRegister(cbi.instruction.r.rs, true, true); rhs = m_register_cache.ReadGuestRegister(cbi.instruction.r.rt); + lhs_spec = SpeculativeReadReg(cbi.instruction.r.rs); + rhs_spec = SpeculativeReadReg(cbi.instruction.r.rt); // flush the old value which might free up a register if (dest != cbi.instruction.i.rs && dest != cbi.instruction.r.rt) @@ -1708,6 +1879,14 @@ bool CodeGenerator::Compile_SetLess(const CodeBlockInstruction& cbi) EmitSetConditionResult(result.host_reg, result.size, signed_comparison ? Condition::Less : Condition::Below); m_register_cache.WriteGuestRegister(dest, std::move(result)); + SpeculativeValue value_spec; + if (lhs_spec && rhs_spec) + { + value_spec = BoolToUInt32(signed_comparison ? (static_cast(*lhs_spec) < static_cast(*rhs_spec)) : + (*lhs_spec < *rhs_spec)); + } + SpeculativeWriteReg(cbi.instruction.r.rd, value_spec); + InstructionEpilogue(cbi); return true; } @@ -1920,8 +2099,9 @@ bool CodeGenerator::Compile_lui(const CodeBlockInstruction& cbi) InstructionPrologue(cbi, 1); // rt <- (imm << 16) - m_register_cache.WriteGuestRegister(cbi.instruction.i.rt, - Value::FromConstantU32(cbi.instruction.i.imm_zext32() << 16)); + const u32 value = cbi.instruction.i.imm_zext32() << 16; + m_register_cache.WriteGuestRegister(cbi.instruction.i.rt, Value::FromConstantU32(value)); + SpeculativeWriteReg(cbi.instruction.i.rt, value); InstructionEpilogue(cbi); return true; @@ -2005,6 +2185,7 @@ bool CodeGenerator::Compile_cop0(const CodeBlockInstruction& cbi) Value value = m_register_cache.AllocateScratch(RegSize_32); EmitLoadCPUStructField(value.host_reg, value.size, offset); m_register_cache.WriteGuestRegisterDelayed(cbi.instruction.r.rt, std::move(value)); + SpeculativeWriteReg(cbi.instruction.r.rt, std::nullopt); } else { @@ -2228,9 +2409,13 @@ bool CodeGenerator::Compile_cop2(const CodeBlockInstruction& cbi) const u32 reg = static_cast(cbi.instruction.i.rt.GetValue()); Value address = AddValues(m_register_cache.ReadGuestRegister(cbi.instruction.i.rs), Value::FromConstantU32(cbi.instruction.i.imm_sext32()), false); + SpeculativeValue spec_address = SpeculativeReadReg(cbi.instruction.i.rs); + if (spec_address) + spec_address = *spec_address + cbi.instruction.i.imm_sext32(); + if (cbi.instruction.op == InstructionOp::lwc2) { - Value value = EmitLoadGuestMemory(cbi, address, RegSize_32); + Value value = EmitLoadGuestMemory(cbi, address, spec_address, RegSize_32); DoGTERegisterWrite(reg, value); if (g_settings.gpu_pgxp_enable) @@ -2239,10 +2424,14 @@ bool CodeGenerator::Compile_cop2(const CodeBlockInstruction& cbi) else { Value value = DoGTERegisterRead(reg); - EmitStoreGuestMemory(cbi, address, value); + EmitStoreGuestMemory(cbi, address, spec_address, value); if (g_settings.gpu_pgxp_enable) EmitFunctionCall(nullptr, PGXP::CPU_SWC2, Value::FromConstantU32(cbi.instruction.bits), value, address); + + SpeculativeValue spec_base = SpeculativeReadReg(cbi.instruction.i.rs); + if (spec_base) + SpeculativeWriteMemory(*spec_address, std::nullopt); } InstructionEpilogue(cbi); @@ -2274,6 +2463,7 @@ bool CodeGenerator::Compile_cop2(const CodeBlockInstruction& cbi) } m_register_cache.WriteGuestRegisterDelayed(cbi.instruction.r.rt, std::move(value)); + SpeculativeWriteReg(cbi.instruction.r.rt, std::nullopt); InstructionEpilogue(cbi); return true; @@ -2317,4 +2507,68 @@ bool CodeGenerator::Compile_cop2(const CodeBlockInstruction& cbi) return true; } } + +void CodeGenerator::InitSpeculativeRegs() +{ + for (u8 i = 0; i < static_cast(Reg::count); i++) + m_speculative_constants.regs[i] = g_state.regs.r[i]; +} + +void CodeGenerator::InvalidateSpeculativeValues() +{ + m_speculative_constants.regs.fill(std::nullopt); + m_speculative_constants.memory.clear(); +} + +CodeGenerator::SpeculativeValue CodeGenerator::SpeculativeReadReg(Reg reg) +{ + return m_speculative_constants.regs[static_cast(reg)]; +} + +void CodeGenerator::SpeculativeWriteReg(Reg reg, SpeculativeValue value) +{ + m_speculative_constants.regs[static_cast(reg)] = value; +} + +CodeGenerator::SpeculativeValue CodeGenerator::SpeculativeReadMemory(VirtualMemoryAddress address) +{ + PhysicalMemoryAddress phys_addr = address & PHYSICAL_MEMORY_ADDRESS_MASK; + + auto it = m_speculative_constants.memory.find(address); + if (it != m_speculative_constants.memory.end()) + return it->second; + + u32 value; + if ((phys_addr & DCACHE_LOCATION_MASK) == DCACHE_LOCATION) + { + u32 scratchpad_offset = phys_addr & DCACHE_OFFSET_MASK; + std::memcpy(&value, &CPU::g_state.dcache[scratchpad_offset], sizeof(value)); + return value; + } + + if (Bus::IsRAMAddress(phys_addr)) + { + u32 ram_offset = phys_addr & Bus::RAM_MASK; + std::memcpy(&value, &Bus::g_ram[ram_offset], sizeof(value)); + return value; + } + + return std::nullopt; +} + +void CodeGenerator::SpeculativeWriteMemory(u32 address, SpeculativeValue value) +{ + PhysicalMemoryAddress phys_addr = address & PHYSICAL_MEMORY_ADDRESS_MASK; + + auto it = m_speculative_constants.memory.find(address); + if (it != m_speculative_constants.memory.end()) + { + it->second = value; + return; + } + + if ((phys_addr & DCACHE_LOCATION_MASK) == DCACHE_LOCATION || Bus::IsRAMAddress(phys_addr)) + m_speculative_constants.memory.emplace(address, value); +} + } // namespace CPU::Recompiler diff --git a/src/core/cpu_recompiler_code_generator.h b/src/core/cpu_recompiler_code_generator.h index 98e22d4be..576f3556c 100644 --- a/src/core/cpu_recompiler_code_generator.h +++ b/src/core/cpu_recompiler_code_generator.h @@ -16,6 +16,8 @@ namespace CPU::Recompiler { class CodeGenerator { public: + using SpeculativeValue = std::optional; + CodeGenerator(JitCodeBuffer* code_buffer); ~CodeGenerator(); @@ -75,12 +77,16 @@ public: void EmitLoadGlobalAddress(HostReg host_reg, const void* ptr); // Automatically generates an exception handler. - Value EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const Value& address, RegSize size); + Value EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const Value& address, const SpeculativeValue& address_spec, + RegSize size); void EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size, Value& result); - void EmitLoadGuestMemorySlowmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size, Value& result, bool in_far_code); - void EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const Value& address, const Value& value); + void EmitLoadGuestMemorySlowmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size, Value& result, + bool in_far_code); + void EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const Value& address, const SpeculativeValue& address_spec, + const Value& value); void EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi, const Value& address, const Value& value); - void EmitStoreGuestMemorySlowmem(const CodeBlockInstruction& cbi, const Value& address, const Value& value, bool in_far_code); + void EmitStoreGuestMemorySlowmem(const CodeBlockInstruction& cbi, const Value& address, const Value& value, + bool in_far_code); // Unconditional branch to pointer. May allocate a scratch register. void EmitBranch(const void* address, bool allow_scratch = true); @@ -236,6 +242,24 @@ private: bool m_current_instruction_was_branch_taken_dirty = false; bool m_load_delay_dirty = false; bool m_next_load_delay_dirty = false; + + ////////////////////////////////////////////////////////////////////////// + // Speculative Constants + ////////////////////////////////////////////////////////////////////////// + struct SpeculativeConstants + { + std::array(Reg::count)> regs; + std::unordered_map memory; + }; + + void InitSpeculativeRegs(); + void InvalidateSpeculativeValues(); + SpeculativeValue SpeculativeReadReg(Reg reg); + void SpeculativeWriteReg(Reg reg, SpeculativeValue value); + SpeculativeValue SpeculativeReadMemory(u32 address); + void SpeculativeWriteMemory(VirtualMemoryAddress address, SpeculativeValue value); + + SpeculativeConstants m_speculative_constants; }; } // namespace CPU::Recompiler diff --git a/src/core/cpu_recompiler_code_generator_aarch64.cpp b/src/core/cpu_recompiler_code_generator_aarch64.cpp index 1674b7fb5..a815c4b4f 100644 --- a/src/core/cpu_recompiler_code_generator_aarch64.cpp +++ b/src/core/cpu_recompiler_code_generator_aarch64.cpp @@ -1281,61 +1281,6 @@ void CodeGenerator::EmitAddCPUStructField(u32 offset, const Value& value) } } -Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const Value& address, RegSize size) -{ - if (address.IsConstant()) - { - TickCount read_ticks; - void* ptr = GetDirectReadMemoryPointer( - static_cast(address.constant_value), - (size == RegSize_8) ? MemoryAccessSize::Byte : - ((size == RegSize_16) ? MemoryAccessSize::HalfWord : MemoryAccessSize::Word), - &read_ticks); - if (ptr) - { - Value result = m_register_cache.AllocateScratch(size); - EmitLoadGlobal(result.GetHostRegister(), size, ptr); - m_delayed_cycles_add += read_ticks; - return result; - } - } - - AddPendingCycles(true); - - Value result = m_register_cache.AllocateScratch(RegSize_64); - if (g_settings.IsUsingFastmem()) - { - EmitLoadGuestMemoryFastmem(cbi, address, size, result); - } - else - { - m_register_cache.FlushCallerSavedGuestRegisters(true, true); - EmitLoadGuestMemorySlowmem(cbi, address, size, result, false); - } - - // Downcast to ignore upper 56/48/32 bits. This should be a noop. - switch (size) - { - case RegSize_8: - ConvertValueSizeInPlace(&result, RegSize_8, false); - break; - - case RegSize_16: - ConvertValueSizeInPlace(&result, RegSize_16, false); - break; - - case RegSize_32: - ConvertValueSizeInPlace(&result, RegSize_32, false); - break; - - default: - UnreachableCode(); - break; - } - - return result; -} - void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size, Value& result) { @@ -1470,39 +1415,11 @@ void CodeGenerator::EmitLoadGuestMemorySlowmem(const CodeBlockInstruction& cbi, } } -void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const Value& address, const Value& value) -{ - if (address.IsConstant()) - { - void* ptr = GetDirectWriteMemoryPointer( - static_cast(address.constant_value), - (value.size == RegSize_8) ? MemoryAccessSize::Byte : - ((value.size == RegSize_16) ? MemoryAccessSize::HalfWord : MemoryAccessSize::Word)); - if (ptr) - { - EmitStoreGlobal(ptr, value); - return; - } - } - - AddPendingCycles(true); - - if (g_settings.IsUsingFastmem()) - { - // we need the value in a host register to store it - Value value_in_hr = GetValueInHostRegister(value); - EmitStoreGuestMemoryFastmem(cbi, address, value_in_hr); - } - else - { - m_register_cache.FlushCallerSavedGuestRegisters(true, true); - EmitStoreGuestMemorySlowmem(cbi, address, value, false); - } -} - void CodeGenerator::EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi, const Value& address, const Value& value) { + Value value_in_hr = GetValueInHostRegister(value); + // fastmem LoadStoreBackpatchInfo bpi; bpi.host_pc = GetCurrentNearCodePointer(); @@ -1525,15 +1442,15 @@ void CodeGenerator::EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi, switch (value.size) { case RegSize_8: - m_emit->Strb(GetHostReg8(value), actual_address); + m_emit->Strb(GetHostReg8(value_in_hr), actual_address); break; case RegSize_16: - m_emit->Strh(GetHostReg16(value), actual_address); + m_emit->Strh(GetHostReg16(value_in_hr), actual_address); break; case RegSize_32: - m_emit->Str(GetHostReg32(value), actual_address); + m_emit->Str(GetHostReg32(value_in_hr), actual_address); break; default: @@ -1548,7 +1465,7 @@ void CodeGenerator::EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi, bpi.host_slowmem_pc = GetCurrentFarCodePointer(); SwitchToFarCode(); - EmitStoreGuestMemorySlowmem(cbi, address, value, true); + EmitStoreGuestMemorySlowmem(cbi, address, value_in_hr, true); // return to the block code EmitBranch(GetCurrentNearCodePointer(), false); @@ -1563,6 +1480,8 @@ void CodeGenerator::EmitStoreGuestMemorySlowmem(const CodeBlockInstruction& cbi, { AddPendingCycles(true); + Value value_in_hr = GetValueInHostRegister(value); + if (g_settings.cpu_recompiler_memory_exceptions) { Assert(!in_far_code); @@ -1571,15 +1490,15 @@ void CodeGenerator::EmitStoreGuestMemorySlowmem(const CodeBlockInstruction& cbi, switch (value.size) { case RegSize_8: - EmitFunctionCall(&result, &Thunks::WriteMemoryByte, address, value); + EmitFunctionCall(&result, &Thunks::WriteMemoryByte, address, value_in_hr); break; case RegSize_16: - EmitFunctionCall(&result, &Thunks::WriteMemoryHalfWord, address, value); + EmitFunctionCall(&result, &Thunks::WriteMemoryHalfWord, address, value_in_hr); break; case RegSize_32: - EmitFunctionCall(&result, &Thunks::WriteMemoryWord, address, value); + EmitFunctionCall(&result, &Thunks::WriteMemoryWord, address, value_in_hr); break; default: @@ -1616,15 +1535,15 @@ void CodeGenerator::EmitStoreGuestMemorySlowmem(const CodeBlockInstruction& cbi, switch (value.size) { case RegSize_8: - EmitFunctionCall(nullptr, &Thunks::UncheckedWriteMemoryByte, address, value); + EmitFunctionCall(nullptr, &Thunks::UncheckedWriteMemoryByte, address, value_in_hr); break; case RegSize_16: - EmitFunctionCall(nullptr, &Thunks::UncheckedWriteMemoryHalfWord, address, value); + EmitFunctionCall(nullptr, &Thunks::UncheckedWriteMemoryHalfWord, address, value_in_hr); break; case RegSize_32: - EmitFunctionCall(nullptr, &Thunks::UncheckedWriteMemoryWord, address, value); + EmitFunctionCall(nullptr, &Thunks::UncheckedWriteMemoryWord, address, value_in_hr); break; default: diff --git a/src/core/cpu_recompiler_code_generator_generic.cpp b/src/core/cpu_recompiler_code_generator_generic.cpp index 60344d650..6246184c8 100644 --- a/src/core/cpu_recompiler_code_generator_generic.cpp +++ b/src/core/cpu_recompiler_code_generator_generic.cpp @@ -1,6 +1,9 @@ #include "cpu_core.h" #include "cpu_core_private.h" #include "cpu_recompiler_code_generator.h" +#include "settings.h" +#include "common/log.h" +Log_SetChannel(Recompiler::CodeGenerator); namespace CPU::Recompiler { @@ -23,6 +26,117 @@ void CodeGenerator::EmitStoreInterpreterLoadDelay(Reg reg, const Value& value) m_load_delay_dirty = true; } +Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const Value& address, + const SpeculativeValue& address_spec, RegSize size) +{ + if (address.IsConstant()) + { + TickCount read_ticks; + void* ptr = GetDirectReadMemoryPointer( + static_cast(address.constant_value), + (size == RegSize_8) ? MemoryAccessSize::Byte : + ((size == RegSize_16) ? MemoryAccessSize::HalfWord : MemoryAccessSize::Word), + &read_ticks); + if (ptr) + { + Value result = m_register_cache.AllocateScratch(size); + EmitLoadGlobal(result.GetHostRegister(), size, ptr); + m_delayed_cycles_add += read_ticks; + return result; + } + } + + AddPendingCycles(true); + + const bool use_fastmem = address_spec ? Bus::CanUseFastmemForAddress(*address_spec) : true; + if (address_spec) + { + if (!use_fastmem) + Log_DevPrintf("Non-constant load at 0x%08X, speculative address 0x%08X, using fastmem = %s", cbi.pc, + *address_spec, use_fastmem ? "yes" : "no"); + } + else + { + Log_DevPrintf("Non-constant load at 0x%08X, speculative address UNKNOWN, using fastmem = %s", cbi.pc, + use_fastmem ? "yes" : "no"); + } + + Value result = m_register_cache.AllocateScratch(RegSize_64); + if (g_settings.IsUsingFastmem() && use_fastmem) + { + EmitLoadGuestMemoryFastmem(cbi, address, size, result); + } + else + { + m_register_cache.FlushCallerSavedGuestRegisters(true, true); + EmitLoadGuestMemorySlowmem(cbi, address, size, result, false); + } + + // Downcast to ignore upper 56/48/32 bits. This should be a noop. + switch (size) + { + case RegSize_8: + ConvertValueSizeInPlace(&result, RegSize_8, false); + break; + + case RegSize_16: + ConvertValueSizeInPlace(&result, RegSize_16, false); + break; + + case RegSize_32: + ConvertValueSizeInPlace(&result, RegSize_32, false); + break; + + default: + UnreachableCode(); + break; + } + + return result; +} + +void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const Value& address, + const SpeculativeValue& address_spec, const Value& value) +{ + if (address.IsConstant()) + { + void* ptr = GetDirectWriteMemoryPointer( + static_cast(address.constant_value), + (value.size == RegSize_8) ? MemoryAccessSize::Byte : + ((value.size == RegSize_16) ? MemoryAccessSize::HalfWord : MemoryAccessSize::Word)); + if (ptr) + { + EmitStoreGlobal(ptr, value); + return; + } + } + + AddPendingCycles(true); + + const bool use_fastmem = address_spec ? Bus::CanUseFastmemForAddress(*address_spec) : true; + if (address_spec) + { + if (!use_fastmem) + Log_DevPrintf("Non-constant store at 0x%08X, speculative address 0x%08X, using fastmem = %s", cbi.pc, + *address_spec, use_fastmem ? "yes" : "no"); + } + else + { + Log_DevPrintf("Non-constant store at 0x%08X, speculative address UNKNOWN, using fastmem = %s", cbi.pc, + use_fastmem ? "yes" : "no"); + } + + if (g_settings.IsUsingFastmem() && use_fastmem) + { + EmitStoreGuestMemoryFastmem(cbi, address, value); + } + else + { + m_register_cache.FlushCallerSavedGuestRegisters(true, true); + EmitStoreGuestMemorySlowmem(cbi, address, value, false); + } +} + #ifndef CPU_X64 void CodeGenerator::EmitICacheCheckAndUpdate() diff --git a/src/core/cpu_recompiler_code_generator_x64.cpp b/src/core/cpu_recompiler_code_generator_x64.cpp index e56a49716..83410b070 100644 --- a/src/core/cpu_recompiler_code_generator_x64.cpp +++ b/src/core/cpu_recompiler_code_generator_x64.cpp @@ -1745,61 +1745,6 @@ void CodeGenerator::EmitAddCPUStructField(u32 offset, const Value& value) } } -Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const Value& address, RegSize size) -{ - if (address.IsConstant()) - { - TickCount read_ticks; - void* ptr = GetDirectReadMemoryPointer( - static_cast(address.constant_value), - (size == RegSize_8) ? MemoryAccessSize::Byte : - ((size == RegSize_16) ? MemoryAccessSize::HalfWord : MemoryAccessSize::Word), - &read_ticks); - if (ptr) - { - Value result = m_register_cache.AllocateScratch(size); - EmitLoadGlobal(result.GetHostRegister(), size, ptr); - m_delayed_cycles_add += read_ticks; - return result; - } - } - - AddPendingCycles(true); - - Value result = m_register_cache.AllocateScratch(RegSize_64); - if (g_settings.IsUsingFastmem()) - { - EmitLoadGuestMemoryFastmem(cbi, address, size, result); - } - else - { - m_register_cache.FlushCallerSavedGuestRegisters(true, true); - EmitLoadGuestMemorySlowmem(cbi, address, size, result, false); - } - - // Downcast to ignore upper 56/48/32 bits. This should be a noop. - switch (size) - { - case RegSize_8: - ConvertValueSizeInPlace(&result, RegSize_8, false); - break; - - case RegSize_16: - ConvertValueSizeInPlace(&result, RegSize_16, false); - break; - - case RegSize_32: - ConvertValueSizeInPlace(&result, RegSize_32, false); - break; - - default: - UnreachableCode(); - break; - } - - return result; -} - void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size, Value& result) { @@ -1967,34 +1912,6 @@ void CodeGenerator::EmitLoadGuestMemorySlowmem(const CodeBlockInstruction& cbi, } } -void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const Value& address, const Value& value) -{ - if (address.IsConstant()) - { - void* ptr = GetDirectWriteMemoryPointer( - static_cast(address.constant_value), - (value.size == RegSize_8) ? MemoryAccessSize::Byte : - ((value.size == RegSize_16) ? MemoryAccessSize::HalfWord : MemoryAccessSize::Word)); - if (ptr) - { - EmitStoreGlobal(ptr, value); - return; - } - } - - AddPendingCycles(true); - - if (g_settings.IsUsingFastmem()) - { - EmitStoreGuestMemoryFastmem(cbi, address, value); - } - else - { - m_register_cache.FlushCallerSavedGuestRegisters(true, true); - EmitStoreGuestMemorySlowmem(cbi, address, value, false); - } -} - void CodeGenerator::EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi, const Value& address, const Value& value) {