CPU/CodeCache: Dynamically compute BIOS memory access timing

The timings can change if the game does so. Instead of forcing the
blocks to recompile, we can just manually multiply size * word_time.

Improves stability of Nightmare Creatures booting, and fixes corrupted
text in Formula Circus when using the cached interpreter.
This commit is contained in:
Stenzek 2024-07-19 19:31:33 +10:00
parent 5f36c2948f
commit 2e96931c32
No known key found for this signature in database
18 changed files with 294 additions and 65 deletions

View file

@ -708,6 +708,15 @@ bool Bus::HasCodePagesInRange(PhysicalMemoryAddress start_address, u32 size)
return false;
}
const TickCount* Bus::GetMemoryAccessTimePtr(PhysicalMemoryAddress address, MemoryAccessSize size)
{
// Currently only BIOS, but could be EXP1 as well.
if (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_MIRROR_SIZE))
return &g_bios_access_time[static_cast<size_t>(size)];
return nullptr;
}
std::optional<Bus::MemoryRegion> Bus::GetMemoryRegionForAddress(PhysicalMemoryAddress address)
{
if (address < RAM_2MB_SIZE)

View file

@ -191,6 +191,9 @@ ALWAYS_INLINE TickCount GetDMARAMTickCount(u32 word_count)
return static_cast<TickCount>(word_count + ((word_count + 15) / 16));
}
/// Returns a pointer to the cycle count for a non-RAM memory access.
const TickCount* GetMemoryAccessTimePtr(PhysicalMemoryAddress address, MemoryAccessSize size);
enum class MemoryRegion
{
RAM,

View file

@ -823,8 +823,20 @@ template<PGXPMode pgxp_mode>
}
DebugAssert(!(HasPendingInterrupt()));
if (g_settings.cpu_recompiler_icache)
CheckAndUpdateICacheTags(block->icache_line_count, block->uncached_fetch_ticks);
if (block->HasFlag(BlockFlags::IsUsingICache))
{
CheckAndUpdateICacheTags(block->icache_line_count);
}
else if (block->HasFlag(BlockFlags::NeedsDynamicFetchTicks))
{
AddPendingTicks(
static_cast<TickCount>(block->size * static_cast<u32>(*Bus::GetMemoryAccessTimePtr(
block->pc & PHYSICAL_MEMORY_ADDRESS_MASK, MemoryAccessSize::Word))));
}
else
{
AddPendingTicks(block->uncached_fetch_ticks);
}
InterpretCachedBlock<pgxp_mode>(block);
@ -893,6 +905,9 @@ bool CPU::CodeCache::ReadBlockInstructions(u32 start_pc, BlockInstructionList* i
// TODO: Jump to other block if it exists at this pc?
const PageProtectionMode protection = GetProtectionModeForPC(start_pc);
const bool use_icache = CPU::IsCachedAddress(start_pc);
const bool dynamic_fetch_ticks = (!use_icache && Bus::GetMemoryAccessTimePtr(start_pc & PHYSICAL_MEMORY_ADDRESS_MASK,
MemoryAccessSize::Word) != nullptr);
u32 pc = start_pc;
bool is_branch_delay_slot = false;
bool is_load_delay_slot = false;
@ -905,7 +920,8 @@ bool CPU::CodeCache::ReadBlockInstructions(u32 start_pc, BlockInstructionList* i
instructions->clear();
metadata->icache_line_count = 0;
metadata->uncached_fetch_ticks = 0;
metadata->flags = BlockFlags::None;
metadata->flags = use_icache ? BlockFlags::IsUsingICache :
(dynamic_fetch_ticks ? BlockFlags::NeedsDynamicFetchTicks : BlockFlags::None);
u32 last_cache_line = ICACHE_LINES;
u32 last_page = (protection == PageProtectionMode::WriteProtected) ? Bus::GetRAMCodePageIndex(start_pc) : 0;
@ -956,17 +972,23 @@ bool CPU::CodeCache::ReadBlockInstructions(u32 start_pc, BlockInstructionList* i
info.is_store_instruction = IsMemoryStoreInstruction(instruction);
info.has_load_delay = InstructionHasLoadDelay(instruction);
if (g_settings.cpu_recompiler_icache)
if (use_icache)
{
const u32 icache_line = GetICacheLine(pc);
if (icache_line != last_cache_line)
if (g_settings.cpu_recompiler_icache)
{
metadata->icache_line_count++;
last_cache_line = icache_line;
const u32 icache_line = GetICacheLine(pc);
if (icache_line != last_cache_line)
{
metadata->icache_line_count++;
last_cache_line = icache_line;
}
}
}
else if (!dynamic_fetch_ticks)
{
metadata->uncached_fetch_ticks += GetInstructionReadTicks(pc);
}
metadata->uncached_fetch_ticks += GetInstructionReadTicks(pc);
if (info.is_load_instruction || info.is_store_instruction)
metadata->flags |= BlockFlags::ContainsLoadStoreInstructions;
@ -1022,6 +1044,8 @@ bool CPU::CodeCache::ReadBlockInstructions(u32 start_pc, BlockInstructionList* i
#ifdef _DEBUG
SmallString disasm;
DEBUG_LOG("Block at 0x{:08X}", start_pc);
DEBUG_LOG(" Uncached fetch ticks: {}", metadata->uncached_fetch_ticks);
DEBUG_LOG(" ICache line count: {}", metadata->icache_line_count);
for (const auto& cbi : *instructions)
{
CPU::DisassembleInstruction(&disasm, cbi.second.pc, cbi.first.bits);

View file

@ -94,6 +94,8 @@ enum class BlockFlags : u8
ContainsLoadStoreInstructions = (1 << 0),
SpansPages = (1 << 1),
BranchDelaySpansPages = (1 << 2),
IsUsingICache = (1 << 3),
NeedsDynamicFetchTicks = (1 << 4),
};
IMPLEMENT_ENUM_CLASS_BITWISE_OPERATORS(BlockFlags);

View file

@ -2620,7 +2620,7 @@ TickCount CPU::GetInstructionReadTicks(VirtualMemoryAddress address)
{
return RAM_READ_TICKS;
}
else if (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_SIZE))
else if (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_MIRROR_SIZE))
{
return g_bios_access_time[static_cast<u32>(MemoryAccessSize::Word)];
}
@ -2640,7 +2640,7 @@ TickCount CPU::GetICacheFillTicks(VirtualMemoryAddress address)
{
return 1 * ((ICACHE_LINE_SIZE - (address & (ICACHE_LINE_SIZE - 1))) / sizeof(u32));
}
else if (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_SIZE))
else if (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_MIRROR_SIZE))
{
return g_bios_access_time[static_cast<u32>(MemoryAccessSize::Word)] *
((ICACHE_LINE_SIZE - (address & (ICACHE_LINE_SIZE - 1))) / sizeof(u32));
@ -2651,29 +2651,23 @@ TickCount CPU::GetICacheFillTicks(VirtualMemoryAddress address)
}
}
void CPU::CheckAndUpdateICacheTags(u32 line_count, TickCount uncached_ticks)
void CPU::CheckAndUpdateICacheTags(u32 line_count)
{
VirtualMemoryAddress current_pc = g_state.pc & ICACHE_TAG_ADDRESS_MASK;
if (IsCachedAddress(current_pc))
{
TickCount ticks = 0;
TickCount cached_ticks_per_line = GetICacheFillTicks(current_pc);
for (u32 i = 0; i < line_count; i++, current_pc += ICACHE_LINE_SIZE)
{
const u32 line = GetICacheLine(current_pc);
if (g_state.icache_tags[line] != current_pc)
{
g_state.icache_tags[line] = current_pc;
ticks += cached_ticks_per_line;
}
}
g_state.pending_ticks += ticks;
}
else
TickCount ticks = 0;
TickCount cached_ticks_per_line = GetICacheFillTicks(current_pc);
for (u32 i = 0; i < line_count; i++, current_pc += ICACHE_LINE_SIZE)
{
g_state.pending_ticks += uncached_ticks;
const u32 line = GetICacheLine(current_pc);
if (g_state.icache_tags[line] != current_pc)
{
g_state.icache_tags[line] = current_pc;
ticks += cached_ticks_per_line;
}
}
g_state.pending_ticks += ticks;
}
u32 CPU::FillICache(VirtualMemoryAddress address)

View file

@ -65,7 +65,7 @@ ALWAYS_INLINE static bool CompareICacheTag(VirtualMemoryAddress address)
TickCount GetInstructionReadTicks(VirtualMemoryAddress address);
TickCount GetICacheFillTicks(VirtualMemoryAddress address);
u32 FillICache(VirtualMemoryAddress address);
void CheckAndUpdateICacheTags(u32 line_count, TickCount uncached_ticks);
void CheckAndUpdateICacheTags(u32 line_count);
ALWAYS_INLINE static Segment GetSegmentForAddress(VirtualMemoryAddress address)
{

View file

@ -77,8 +77,7 @@ void CPU::NewRec::Compiler::BeginBlock()
GenerateBlockProtectCheck(ram_ptr, shadow_ptr, m_block->size * sizeof(Instruction));
}
if (m_block->uncached_fetch_ticks > 0 || m_block->icache_line_count > 0)
GenerateICacheCheckAndUpdate();
GenerateICacheCheckAndUpdate();
if (g_settings.bios_tty_logging)
{
@ -1719,6 +1718,14 @@ void CPU::NewRec::Compiler::TruncateBlock()
iinfo->is_last_instruction = true;
}
const TickCount* CPU::NewRec::Compiler::GetFetchMemoryAccessTimePtr() const
{
const TickCount* ptr =
Bus::GetMemoryAccessTimePtr(m_block->pc & PHYSICAL_MEMORY_ADDRESS_MASK, MemoryAccessSize::Word);
AssertMsg(ptr, "Address has dynamic fetch ticks");
return ptr;
}
void CPU::NewRec::Compiler::FlushForLoadStore(const std::optional<VirtualMemoryAddress>& address, bool store,
bool use_fastmem)
{

View file

@ -201,6 +201,8 @@ protected:
void SetCompilerPC(u32 newpc);
void TruncateBlock();
const TickCount* GetFetchMemoryAccessTimePtr() const;
virtual const void* GetCurrentCodePointer() = 0;
virtual void Reset(CodeCache::Block* block, u8* code_buffer, u32 code_buffer_space, u8* far_code_buffer,

View file

@ -28,6 +28,7 @@ using namespace vixl::aarch32;
using CPU::Recompiler::armEmitCall;
using CPU::Recompiler::armEmitCondBranch;
using CPU::Recompiler::armEmitFarLoad;
using CPU::Recompiler::armEmitJmp;
using CPU::Recompiler::armEmitMov;
using CPU::Recompiler::armGetJumpTrampoline;
@ -302,13 +303,25 @@ bool foo(const void* a, const void* b)
void CPU::NewRec::AArch32Compiler::GenerateICacheCheckAndUpdate()
{
if (GetSegmentForAddress(m_block->pc) >= Segment::KSEG1)
if (!m_block->HasFlag(CodeCache::BlockFlags::IsUsingICache))
{
armAsm->ldr(RARG1, PTR(&g_state.pending_ticks));
armAsm->add(RARG1, RARG1, armCheckAddSubConstant(static_cast<u32>(m_block->uncached_fetch_ticks)));
armAsm->str(RARG1, PTR(&g_state.pending_ticks));
if (m_block->HasFlag(CodeCache::BlockFlags::NeedsDynamicFetchTicks))
{
armEmitFarLoad(armAsm, RARG2, GetFetchMemoryAccessTimePtr());
armAsm->ldr(RARG1, PTR(&g_state.pending_ticks));
armEmitMov(armAsm, RARG3, m_block->size);
armAsm->mul(RARG2, RARG2, RARG3);
armAsm->add(RARG1, RARG1, RARG2);
armAsm->str(RARG1, PTR(&g_state.pending_ticks));
}
else
{
armAsm->ldr(RARG1, PTR(&g_state.pending_ticks));
armAsm->add(RARG1, RARG1, armCheckAddSubConstant(static_cast<u32>(m_block->uncached_fetch_ticks)));
armAsm->str(RARG1, PTR(&g_state.pending_ticks));
}
}
else
else if (m_block->icache_line_count > 0)
{
const auto& ticks_reg = RARG1;
const auto& current_tag_reg = RARG2;

View file

@ -27,6 +27,7 @@ using namespace vixl::aarch64;
using CPU::Recompiler::armEmitCall;
using CPU::Recompiler::armEmitCondBranch;
using CPU::Recompiler::armEmitFarLoad;
using CPU::Recompiler::armEmitJmp;
using CPU::Recompiler::armEmitMov;
using CPU::Recompiler::armGetJumpTrampoline;
@ -274,13 +275,25 @@ void CPU::NewRec::AArch64Compiler::GenerateBlockProtectCheck(const u8* ram_ptr,
void CPU::NewRec::AArch64Compiler::GenerateICacheCheckAndUpdate()
{
if (GetSegmentForAddress(m_block->pc) >= Segment::KSEG1)
if (!m_block->HasFlag(CodeCache::BlockFlags::IsUsingICache))
{
armAsm->ldr(RWARG1, PTR(&g_state.pending_ticks));
armAsm->add(RWARG1, RWARG1, armCheckAddSubConstant(static_cast<u32>(m_block->uncached_fetch_ticks)));
armAsm->str(RWARG1, PTR(&g_state.pending_ticks));
if (m_block->HasFlag(CodeCache::BlockFlags::NeedsDynamicFetchTicks))
{
armEmitFarLoad(armAsm, RWARG2, GetFetchMemoryAccessTimePtr());
armAsm->ldr(RWARG1, PTR(&g_state.pending_ticks));
armEmitMov(armAsm, RWARG3, m_block->size);
armAsm->mul(RWARG2, RWARG2, RWARG3);
armAsm->add(RWARG1, RWARG1, RWARG2);
armAsm->str(RWARG1, PTR(&g_state.pending_ticks));
}
else
{
armAsm->ldr(RWARG1, PTR(&g_state.pending_ticks));
armAsm->add(RWARG1, RWARG1, armCheckAddSubConstant(static_cast<u32>(m_block->uncached_fetch_ticks)));
armAsm->str(RWARG1, PTR(&g_state.pending_ticks));
}
}
else
else if (m_block->icache_line_count > 0)
{
const auto& ticks_reg = RWARG1;
const auto& current_tag_reg = RWARG2;

View file

@ -40,6 +40,7 @@ using namespace biscuit;
using CPU::Recompiler::rvEmitCall;
using CPU::Recompiler::rvEmitDSExtW;
using CPU::Recompiler::rvEmitDUExtW;
using CPU::Recompiler::rvEmitFarLoad;
using CPU::Recompiler::rvEmitJmp;
using CPU::Recompiler::rvEmitMov;
using CPU::Recompiler::rvEmitMov64;
@ -130,6 +131,25 @@ u32 CPU::Recompiler::rvEmitCall(biscuit::Assembler* rvAsm, const void* ptr)
return rvEmitJmp(rvAsm, ptr, biscuit::ra);
}
void CPU::Recompiler::rvEmitFarLoad(biscuit::Assembler* rvAsm, const biscuit::GPR& reg, const void* addr,
bool sign_extend_word)
{
const auto [hi, lo] = rvGetAddressImmediates(rvAsm->GetCursorPointer(), addr);
rvAsm->AUIPC(reg, hi);
if (sign_extend_word)
rvAsm->LW(reg, lo, reg);
else
rvAsm->LWU(reg, lo, reg);
}
void CPU::Recompiler::rvEmitFarStore(biscuit::Assembler* rvAsm, const biscuit::GPR& reg, const void* addr,
const biscuit::GPR& tempreg)
{
const auto [hi, lo] = rvGetAddressImmediates(rvAsm->GetCursorPointer(), addr);
rvAsm->AUIPC(tempreg, hi);
rvAsm->SW(reg, lo, tempreg);
}
void CPU::Recompiler::rvEmitSExtB(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs)
{
rvAsm->SLLI(rd, rs, 24);
@ -525,13 +545,25 @@ void CPU::NewRec::RISCV64Compiler::GenerateBlockProtectCheck(const u8* ram_ptr,
void CPU::NewRec::RISCV64Compiler::GenerateICacheCheckAndUpdate()
{
if (GetSegmentForAddress(m_block->pc) >= Segment::KSEG1)
if (!m_block->HasFlag(CodeCache::BlockFlags::IsUsingICache))
{
rvAsm->LW(RARG1, PTR(&g_state.pending_ticks));
SafeADDIW(RARG1, RARG1, static_cast<u32>(m_block->uncached_fetch_ticks));
rvAsm->SW(RARG1, PTR(&g_state.pending_ticks));
if (m_block->HasFlag(CodeCache::BlockFlags::NeedsDynamicFetchTicks))
{
rvEmitFarLoad(rvAsm, RARG2, GetFetchMemoryAccessTimePtr());
rvAsm->LW(RARG1, PTR(&g_state.pending_ticks));
rvEmitMov(rvAsm, RARG3, m_block->size);
rvAsm->MULW(RARG2, RARG2, RARG3);
rvAsm->ADD(RARG1, RARG1, RARG2);
rvAsm->SW(RARG1, PTR(&g_state.pending_ticks));
}
else
{
rvAsm->LW(RARG1, PTR(&g_state.pending_ticks));
SafeADDIW(RARG1, RARG1, static_cast<u32>(m_block->uncached_fetch_ticks));
rvAsm->SW(RARG1, PTR(&g_state.pending_ticks));
}
}
else
else if (m_block->icache_line_count > 0)
{
const auto& ticks_reg = RARG1;
const auto& current_tag_reg = RARG2;

View file

@ -179,9 +179,18 @@ void CPU::NewRec::X64Compiler::GenerateBlockProtectCheck(const u8* ram_ptr, cons
void CPU::NewRec::X64Compiler::GenerateICacheCheckAndUpdate()
{
if (GetSegmentForAddress(m_block->pc) >= Segment::KSEG1)
if (!m_block->HasFlag(CodeCache::BlockFlags::IsUsingICache))
{
cg->add(cg->dword[PTR(&g_state.pending_ticks)], static_cast<u32>(m_block->uncached_fetch_ticks));
if (m_block->HasFlag(CodeCache::BlockFlags::NeedsDynamicFetchTicks))
{
cg->mov(cg->eax, m_block->size);
cg->mul(cg->dword[cg->rip + GetFetchMemoryAccessTimePtr()]);
cg->add(cg->dword[PTR(&g_state.pending_ticks)], cg->eax);
}
else
{
cg->add(cg->dword[PTR(&g_state.pending_ticks)], static_cast<u32>(m_block->uncached_fetch_ticks));
}
}
else if (m_block->icache_line_count > 0)
{

View file

@ -926,6 +926,14 @@ Value CodeGenerator::NotValue(const Value& val)
return res;
}
const TickCount* CodeGenerator::GetFetchMemoryAccessTimePtr() const
{
const TickCount* ptr =
Bus::GetMemoryAccessTimePtr(m_block->pc & PHYSICAL_MEMORY_ADDRESS_MASK, MemoryAccessSize::Word);
AssertMsg(ptr, "Address has dynamic fetch ticks");
return ptr;
}
void CodeGenerator::GenerateExceptionExit(Instruction instruction, const CodeCache::InstructionInfo& info,
Exception excode, Condition condition /* = Condition::Always */)
{
@ -996,8 +1004,7 @@ void CodeGenerator::BlockPrologue()
EmitFunctionCall(nullptr, &CPU::HandleB0Syscall);
}
if (m_block->uncached_fetch_ticks > 0 || m_block->icache_line_count > 0)
EmitICacheCheckAndUpdate();
EmitICacheCheckAndUpdate();
// we don't know the state of the last block, so assume load delays might be in progress
// TODO: Pull load delay into register cache

View file

@ -198,6 +198,8 @@ public:
Value XorValues(const Value& lhs, const Value& rhs);
Value NotValue(const Value& val);
const TickCount* GetFetchMemoryAccessTimePtr() const;
// Raising exception if condition is true.
void GenerateExceptionExit(Instruction instruction, const CodeCache::InstructionInfo& info, Exception excode,
Condition condition = Condition::Always);

View file

@ -140,6 +140,20 @@ void CPU::Recompiler::armEmitCondBranch(vixl::aarch32::Assembler* armAsm, vixl::
}
}
void CPU::Recompiler::armEmitFarLoad(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register& reg,
const void* addr)
{
armMoveAddressToReg(armAsm, reg, addr);
armAsm->ldr(reg, vixl::aarch32::MemOperand(reg));
}
void CPU::Recompiler::armEmitFarStore(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register& reg,
const void* addr, const vixl::aarch64::Register& tempreg)
{
armMoveAddressToReg(armAsm, tempreg, addr);
armAsm->str(reg, vixl::aarch32::MemOperand(tempreg));
}
void CPU::CodeCache::DisassembleAndLogHostCode(const void* start, u32 size)
{
#ifdef ENABLE_HOST_DISASSEMBLY
@ -1913,12 +1927,24 @@ void CodeGenerator::EmitCancelInterpreterLoadDelayForReg(Reg reg)
void CodeGenerator::EmitICacheCheckAndUpdate()
{
if (GetSegmentForAddress(m_pc) >= Segment::KSEG1)
if (!m_block->HasFlag(CodeCache::BlockFlags::IsUsingICache))
{
EmitAddCPUStructField(OFFSETOF(State, pending_ticks),
Value::FromConstantU32(static_cast<u32>(m_block->uncached_fetch_ticks)));
if (m_block->HasFlag(CodeCache::BlockFlags::NeedsDynamicFetchTicks))
{
armEmitFarLoad(m_emit, RARG2, GetFetchMemoryAccessTimePtr());
m_emit->ldr(RARG1, a32::MemOperand(GetCPUPtrReg(), OFFSETOF(State, pending_ticks)));
m_emit->Mov(RARG3, m_block->size);
m_emit->mul(RARG2, RARG2, RARG3);
m_emit->add(RARG1, RARG1, RARG2);
m_emit->str(RARG1, a32::MemOperand(GetCPUPtrReg(), OFFSETOF(State, pending_ticks)));
}
else
{
EmitAddCPUStructField(OFFSETOF(State, pending_ticks),
Value::FromConstantU32(static_cast<u32>(m_block->uncached_fetch_ticks)));
}
}
else
else if (m_block->icache_line_count > 0)
{
const auto& ticks_reg = a32::r0;
const auto& current_tag_reg = a32::r1;

View file

@ -261,6 +261,61 @@ void CPU::Recompiler::armEmitCondBranch(a64::Assembler* armAsm, a64::Condition c
}
}
void CPU::Recompiler::armEmitFarLoad(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg,
const void* addr, bool sign_extend_word)
{
const void* cur = armAsm->GetCursorAddress<const void*>();
const void* current_code_ptr_page =
reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(cur) & ~static_cast<uintptr_t>(0xFFF));
const void* ptr_page =
reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(addr) & ~static_cast<uintptr_t>(0xFFF));
const s64 page_displacement = armGetPCDisplacement(current_code_ptr_page, ptr_page) >> 10;
const u32 page_offset = static_cast<u32>(reinterpret_cast<uintptr_t>(addr) & 0xFFFu);
a64::MemOperand memop;
const vixl::aarch64::Register xreg = reg.X();
if (vixl::IsInt21(page_displacement))
{
armAsm->adrp(xreg, page_displacement);
memop = vixl::aarch64::MemOperand(xreg, static_cast<int64_t>(page_offset));
}
else
{
armMoveAddressToReg(armAsm, xreg, addr);
memop = vixl::aarch64::MemOperand(xreg);
}
if (sign_extend_word)
armAsm->ldrsw(reg, memop);
else
armAsm->ldr(reg, memop);
}
void CPU::Recompiler::armEmitFarStore(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg,
const void* addr, const vixl::aarch64::Register& tempreg)
{
DebugAssert(tempreg.IsX());
const void* cur = armAsm->GetCursorAddress<const void*>();
const void* current_code_ptr_page =
reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(cur) & ~static_cast<uintptr_t>(0xFFF));
const void* ptr_page =
reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(addr) & ~static_cast<uintptr_t>(0xFFF));
const s64 page_displacement = armGetPCDisplacement(current_code_ptr_page, ptr_page) >> 10;
const u32 page_offset = static_cast<u32>(reinterpret_cast<uintptr_t>(addr) & 0xFFFu);
if (vixl::IsInt21(page_displacement))
{
armAsm->adrp(tempreg, page_displacement);
armAsm->str(reg, vixl::aarch64::MemOperand(tempreg, static_cast<int64_t>(page_offset)));
}
else
{
armMoveAddressToReg(armAsm, tempreg, addr);
armAsm->str(reg, vixl::aarch64::MemOperand(tempreg));
}
}
u8* CPU::Recompiler::armGetJumpTrampoline(const void* target)
{
auto it = s_trampoline_targets.find(target);
@ -2240,12 +2295,24 @@ void CodeGenerator::EmitCancelInterpreterLoadDelayForReg(Reg reg)
void CodeGenerator::EmitICacheCheckAndUpdate()
{
if (GetSegmentForAddress(m_pc) >= Segment::KSEG1)
if (!m_block->HasFlag(CodeCache::BlockFlags::IsUsingICache))
{
EmitAddCPUStructField(OFFSETOF(State, pending_ticks),
Value::FromConstantU32(static_cast<u32>(m_block->uncached_fetch_ticks)));
if (m_block->HasFlag(CodeCache::BlockFlags::NeedsDynamicFetchTicks))
{
armEmitFarLoad(m_emit, RWARG2, GetFetchMemoryAccessTimePtr());
m_emit->Ldr(RWARG1, a64::MemOperand(GetCPUPtrReg(), OFFSETOF(State, pending_ticks)));
m_emit->Mov(RWARG3, m_block->size);
m_emit->Mul(RWARG2, RWARG2, RWARG3);
m_emit->Add(RWARG1, RWARG1, RWARG2);
m_emit->Str(RWARG1, a64::MemOperand(GetCPUPtrReg(), OFFSETOF(State, pending_ticks)));
}
else
{
EmitAddCPUStructField(OFFSETOF(State, pending_ticks),
Value::FromConstantU32(static_cast<u32>(m_block->uncached_fetch_ticks)));
}
}
else
else if (m_block->icache_line_count > 0)
{
const auto& ticks_reg = a64::w0;
const auto& current_tag_reg = a64::w1;

View file

@ -2782,12 +2782,21 @@ void CodeGenerator::EmitCancelInterpreterLoadDelayForReg(Reg reg)
void CodeGenerator::EmitICacheCheckAndUpdate()
{
if (GetSegmentForAddress(m_pc) >= Segment::KSEG1)
if (!m_block->HasFlag(CodeCache::BlockFlags::IsUsingICache))
{
m_emit->add(m_emit->dword[GetCPUPtrReg() + OFFSETOF(State, pending_ticks)],
static_cast<u32>(m_block->uncached_fetch_ticks));
if (m_block->HasFlag(CodeCache::BlockFlags::NeedsDynamicFetchTicks))
{
m_emit->mov(m_emit->eax, m_block->size);
m_emit->mul(m_emit->dword[m_emit->rip + GetFetchMemoryAccessTimePtr()]);
m_emit->add(m_emit->dword[GetCPUPtrReg() + OFFSETOF(State, pending_ticks)], m_emit->eax);
}
else
{
m_emit->add(m_emit->dword[GetCPUPtrReg() + OFFSETOF(State, pending_ticks)],
static_cast<u32>(m_block->uncached_fetch_ticks));
}
}
else
else if (m_block->icache_line_count > 0)
{
VirtualMemoryAddress current_pc = m_pc & ICACHE_TAG_ADDRESS_MASK;
for (u32 i = 0; i < m_block->icache_line_count; i++, current_pc += ICACHE_LINE_SIZE)

View file

@ -93,6 +93,9 @@ void armEmitMov(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register&
void armEmitJmp(vixl::aarch32::Assembler* armAsm, const void* ptr, bool force_inline);
void armEmitCall(vixl::aarch32::Assembler* armAsm, const void* ptr, bool force_inline);
void armEmitCondBranch(vixl::aarch32::Assembler* armAsm, vixl::aarch32::Condition cond, const void* ptr);
void armEmitFarLoad(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register& reg, const void* addr);
void armEmitFarStore(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register& reg, const void* addr,
const vixl::aarch32::Register& tempreg = RSCRATCH);
u8* armGetJumpTrampoline(const void* target);
} // namespace CPU::Recompiler
@ -129,6 +132,10 @@ void armEmitMov(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register&
void armEmitJmp(vixl::aarch64::Assembler* armAsm, const void* ptr, bool force_inline);
void armEmitCall(vixl::aarch64::Assembler* armAsm, const void* ptr, bool force_inline);
void armEmitCondBranch(vixl::aarch64::Assembler* armAsm, vixl::aarch64::Condition cond, const void* ptr);
void armEmitFarLoad(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg, const void* addr,
bool sign_extend_word = false);
void armEmitFarStore(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg, const void* addr,
const vixl::aarch64::Register& tempreg = RXSCRATCH);
u8* armGetJumpTrampoline(const void* target);
} // namespace CPU::Recompiler
@ -157,8 +164,11 @@ std::pair<s32, s32> rvGetAddressImmediates(const void* cur, const void* target);
void rvMoveAddressToReg(biscuit::Assembler* armAsm, const biscuit::GPR& reg, const void* addr);
void rvEmitMov(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, u32 imm);
void rvEmitMov64(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& scratch, u64 imm);
u32 rvEmitJmp(biscuit::Assembler* armAsm, const void* ptr, const biscuit::GPR& link_reg = biscuit::zero);
u32 rvEmitCall(biscuit::Assembler* armAsm, const void* ptr);
u32 rvEmitJmp(biscuit::Assembler* rvAsm, const void* ptr, const biscuit::GPR& link_reg = biscuit::zero);
u32 rvEmitCall(biscuit::Assembler* rvAsm, const void* ptr);
void rvEmitFarLoad(biscuit::Assembler* rvAsm, const biscuit::GPR& reg, const void* addr, bool sign_extend_word = false);
void rvEmitFarStore(biscuit::Assembler* rvAsm, const biscuit::GPR& reg, const void* addr,
const biscuit::GPR& tempreg = RSCRATCH);
void rvEmitSExtB(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> word
void rvEmitUExtB(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> word
void rvEmitSExtH(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> word