mirror of
https://github.com/RetroDECK/Duckstation.git
synced 2025-01-20 07:15:38 +00:00
CPU/CodeCache: Dynamically compute BIOS memory access timing
The timings can change if the game does so. Instead of forcing the blocks to recompile, we can just manually multiply size * word_time. Improves stability of Nightmare Creatures booting, and fixes corrupted text in Formula Circus when using the cached interpreter.
This commit is contained in:
parent
5f36c2948f
commit
2e96931c32
|
@ -708,6 +708,15 @@ bool Bus::HasCodePagesInRange(PhysicalMemoryAddress start_address, u32 size)
|
|||
return false;
|
||||
}
|
||||
|
||||
const TickCount* Bus::GetMemoryAccessTimePtr(PhysicalMemoryAddress address, MemoryAccessSize size)
|
||||
{
|
||||
// Currently only BIOS, but could be EXP1 as well.
|
||||
if (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_MIRROR_SIZE))
|
||||
return &g_bios_access_time[static_cast<size_t>(size)];
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::optional<Bus::MemoryRegion> Bus::GetMemoryRegionForAddress(PhysicalMemoryAddress address)
|
||||
{
|
||||
if (address < RAM_2MB_SIZE)
|
||||
|
|
|
@ -191,6 +191,9 @@ ALWAYS_INLINE TickCount GetDMARAMTickCount(u32 word_count)
|
|||
return static_cast<TickCount>(word_count + ((word_count + 15) / 16));
|
||||
}
|
||||
|
||||
/// Returns a pointer to the cycle count for a non-RAM memory access.
|
||||
const TickCount* GetMemoryAccessTimePtr(PhysicalMemoryAddress address, MemoryAccessSize size);
|
||||
|
||||
enum class MemoryRegion
|
||||
{
|
||||
RAM,
|
||||
|
|
|
@ -823,8 +823,20 @@ template<PGXPMode pgxp_mode>
|
|||
}
|
||||
|
||||
DebugAssert(!(HasPendingInterrupt()));
|
||||
if (g_settings.cpu_recompiler_icache)
|
||||
CheckAndUpdateICacheTags(block->icache_line_count, block->uncached_fetch_ticks);
|
||||
if (block->HasFlag(BlockFlags::IsUsingICache))
|
||||
{
|
||||
CheckAndUpdateICacheTags(block->icache_line_count);
|
||||
}
|
||||
else if (block->HasFlag(BlockFlags::NeedsDynamicFetchTicks))
|
||||
{
|
||||
AddPendingTicks(
|
||||
static_cast<TickCount>(block->size * static_cast<u32>(*Bus::GetMemoryAccessTimePtr(
|
||||
block->pc & PHYSICAL_MEMORY_ADDRESS_MASK, MemoryAccessSize::Word))));
|
||||
}
|
||||
else
|
||||
{
|
||||
AddPendingTicks(block->uncached_fetch_ticks);
|
||||
}
|
||||
|
||||
InterpretCachedBlock<pgxp_mode>(block);
|
||||
|
||||
|
@ -893,6 +905,9 @@ bool CPU::CodeCache::ReadBlockInstructions(u32 start_pc, BlockInstructionList* i
|
|||
// TODO: Jump to other block if it exists at this pc?
|
||||
|
||||
const PageProtectionMode protection = GetProtectionModeForPC(start_pc);
|
||||
const bool use_icache = CPU::IsCachedAddress(start_pc);
|
||||
const bool dynamic_fetch_ticks = (!use_icache && Bus::GetMemoryAccessTimePtr(start_pc & PHYSICAL_MEMORY_ADDRESS_MASK,
|
||||
MemoryAccessSize::Word) != nullptr);
|
||||
u32 pc = start_pc;
|
||||
bool is_branch_delay_slot = false;
|
||||
bool is_load_delay_slot = false;
|
||||
|
@ -905,7 +920,8 @@ bool CPU::CodeCache::ReadBlockInstructions(u32 start_pc, BlockInstructionList* i
|
|||
instructions->clear();
|
||||
metadata->icache_line_count = 0;
|
||||
metadata->uncached_fetch_ticks = 0;
|
||||
metadata->flags = BlockFlags::None;
|
||||
metadata->flags = use_icache ? BlockFlags::IsUsingICache :
|
||||
(dynamic_fetch_ticks ? BlockFlags::NeedsDynamicFetchTicks : BlockFlags::None);
|
||||
|
||||
u32 last_cache_line = ICACHE_LINES;
|
||||
u32 last_page = (protection == PageProtectionMode::WriteProtected) ? Bus::GetRAMCodePageIndex(start_pc) : 0;
|
||||
|
@ -956,17 +972,23 @@ bool CPU::CodeCache::ReadBlockInstructions(u32 start_pc, BlockInstructionList* i
|
|||
info.is_store_instruction = IsMemoryStoreInstruction(instruction);
|
||||
info.has_load_delay = InstructionHasLoadDelay(instruction);
|
||||
|
||||
if (g_settings.cpu_recompiler_icache)
|
||||
if (use_icache)
|
||||
{
|
||||
const u32 icache_line = GetICacheLine(pc);
|
||||
if (icache_line != last_cache_line)
|
||||
if (g_settings.cpu_recompiler_icache)
|
||||
{
|
||||
metadata->icache_line_count++;
|
||||
last_cache_line = icache_line;
|
||||
const u32 icache_line = GetICacheLine(pc);
|
||||
if (icache_line != last_cache_line)
|
||||
{
|
||||
metadata->icache_line_count++;
|
||||
last_cache_line = icache_line;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (!dynamic_fetch_ticks)
|
||||
{
|
||||
metadata->uncached_fetch_ticks += GetInstructionReadTicks(pc);
|
||||
}
|
||||
|
||||
metadata->uncached_fetch_ticks += GetInstructionReadTicks(pc);
|
||||
if (info.is_load_instruction || info.is_store_instruction)
|
||||
metadata->flags |= BlockFlags::ContainsLoadStoreInstructions;
|
||||
|
||||
|
@ -1022,6 +1044,8 @@ bool CPU::CodeCache::ReadBlockInstructions(u32 start_pc, BlockInstructionList* i
|
|||
#ifdef _DEBUG
|
||||
SmallString disasm;
|
||||
DEBUG_LOG("Block at 0x{:08X}", start_pc);
|
||||
DEBUG_LOG(" Uncached fetch ticks: {}", metadata->uncached_fetch_ticks);
|
||||
DEBUG_LOG(" ICache line count: {}", metadata->icache_line_count);
|
||||
for (const auto& cbi : *instructions)
|
||||
{
|
||||
CPU::DisassembleInstruction(&disasm, cbi.second.pc, cbi.first.bits);
|
||||
|
|
|
@ -94,6 +94,8 @@ enum class BlockFlags : u8
|
|||
ContainsLoadStoreInstructions = (1 << 0),
|
||||
SpansPages = (1 << 1),
|
||||
BranchDelaySpansPages = (1 << 2),
|
||||
IsUsingICache = (1 << 3),
|
||||
NeedsDynamicFetchTicks = (1 << 4),
|
||||
};
|
||||
IMPLEMENT_ENUM_CLASS_BITWISE_OPERATORS(BlockFlags);
|
||||
|
||||
|
|
|
@ -2620,7 +2620,7 @@ TickCount CPU::GetInstructionReadTicks(VirtualMemoryAddress address)
|
|||
{
|
||||
return RAM_READ_TICKS;
|
||||
}
|
||||
else if (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_SIZE))
|
||||
else if (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_MIRROR_SIZE))
|
||||
{
|
||||
return g_bios_access_time[static_cast<u32>(MemoryAccessSize::Word)];
|
||||
}
|
||||
|
@ -2640,7 +2640,7 @@ TickCount CPU::GetICacheFillTicks(VirtualMemoryAddress address)
|
|||
{
|
||||
return 1 * ((ICACHE_LINE_SIZE - (address & (ICACHE_LINE_SIZE - 1))) / sizeof(u32));
|
||||
}
|
||||
else if (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_SIZE))
|
||||
else if (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_MIRROR_SIZE))
|
||||
{
|
||||
return g_bios_access_time[static_cast<u32>(MemoryAccessSize::Word)] *
|
||||
((ICACHE_LINE_SIZE - (address & (ICACHE_LINE_SIZE - 1))) / sizeof(u32));
|
||||
|
@ -2651,29 +2651,23 @@ TickCount CPU::GetICacheFillTicks(VirtualMemoryAddress address)
|
|||
}
|
||||
}
|
||||
|
||||
void CPU::CheckAndUpdateICacheTags(u32 line_count, TickCount uncached_ticks)
|
||||
void CPU::CheckAndUpdateICacheTags(u32 line_count)
|
||||
{
|
||||
VirtualMemoryAddress current_pc = g_state.pc & ICACHE_TAG_ADDRESS_MASK;
|
||||
if (IsCachedAddress(current_pc))
|
||||
{
|
||||
TickCount ticks = 0;
|
||||
TickCount cached_ticks_per_line = GetICacheFillTicks(current_pc);
|
||||
for (u32 i = 0; i < line_count; i++, current_pc += ICACHE_LINE_SIZE)
|
||||
{
|
||||
const u32 line = GetICacheLine(current_pc);
|
||||
if (g_state.icache_tags[line] != current_pc)
|
||||
{
|
||||
g_state.icache_tags[line] = current_pc;
|
||||
ticks += cached_ticks_per_line;
|
||||
}
|
||||
}
|
||||
|
||||
g_state.pending_ticks += ticks;
|
||||
}
|
||||
else
|
||||
TickCount ticks = 0;
|
||||
TickCount cached_ticks_per_line = GetICacheFillTicks(current_pc);
|
||||
for (u32 i = 0; i < line_count; i++, current_pc += ICACHE_LINE_SIZE)
|
||||
{
|
||||
g_state.pending_ticks += uncached_ticks;
|
||||
const u32 line = GetICacheLine(current_pc);
|
||||
if (g_state.icache_tags[line] != current_pc)
|
||||
{
|
||||
g_state.icache_tags[line] = current_pc;
|
||||
ticks += cached_ticks_per_line;
|
||||
}
|
||||
}
|
||||
|
||||
g_state.pending_ticks += ticks;
|
||||
}
|
||||
|
||||
u32 CPU::FillICache(VirtualMemoryAddress address)
|
||||
|
|
|
@ -65,7 +65,7 @@ ALWAYS_INLINE static bool CompareICacheTag(VirtualMemoryAddress address)
|
|||
TickCount GetInstructionReadTicks(VirtualMemoryAddress address);
|
||||
TickCount GetICacheFillTicks(VirtualMemoryAddress address);
|
||||
u32 FillICache(VirtualMemoryAddress address);
|
||||
void CheckAndUpdateICacheTags(u32 line_count, TickCount uncached_ticks);
|
||||
void CheckAndUpdateICacheTags(u32 line_count);
|
||||
|
||||
ALWAYS_INLINE static Segment GetSegmentForAddress(VirtualMemoryAddress address)
|
||||
{
|
||||
|
|
|
@ -77,8 +77,7 @@ void CPU::NewRec::Compiler::BeginBlock()
|
|||
GenerateBlockProtectCheck(ram_ptr, shadow_ptr, m_block->size * sizeof(Instruction));
|
||||
}
|
||||
|
||||
if (m_block->uncached_fetch_ticks > 0 || m_block->icache_line_count > 0)
|
||||
GenerateICacheCheckAndUpdate();
|
||||
GenerateICacheCheckAndUpdate();
|
||||
|
||||
if (g_settings.bios_tty_logging)
|
||||
{
|
||||
|
@ -1719,6 +1718,14 @@ void CPU::NewRec::Compiler::TruncateBlock()
|
|||
iinfo->is_last_instruction = true;
|
||||
}
|
||||
|
||||
const TickCount* CPU::NewRec::Compiler::GetFetchMemoryAccessTimePtr() const
|
||||
{
|
||||
const TickCount* ptr =
|
||||
Bus::GetMemoryAccessTimePtr(m_block->pc & PHYSICAL_MEMORY_ADDRESS_MASK, MemoryAccessSize::Word);
|
||||
AssertMsg(ptr, "Address has dynamic fetch ticks");
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::FlushForLoadStore(const std::optional<VirtualMemoryAddress>& address, bool store,
|
||||
bool use_fastmem)
|
||||
{
|
||||
|
|
|
@ -201,6 +201,8 @@ protected:
|
|||
void SetCompilerPC(u32 newpc);
|
||||
void TruncateBlock();
|
||||
|
||||
const TickCount* GetFetchMemoryAccessTimePtr() const;
|
||||
|
||||
virtual const void* GetCurrentCodePointer() = 0;
|
||||
|
||||
virtual void Reset(CodeCache::Block* block, u8* code_buffer, u32 code_buffer_space, u8* far_code_buffer,
|
||||
|
|
|
@ -28,6 +28,7 @@ using namespace vixl::aarch32;
|
|||
|
||||
using CPU::Recompiler::armEmitCall;
|
||||
using CPU::Recompiler::armEmitCondBranch;
|
||||
using CPU::Recompiler::armEmitFarLoad;
|
||||
using CPU::Recompiler::armEmitJmp;
|
||||
using CPU::Recompiler::armEmitMov;
|
||||
using CPU::Recompiler::armGetJumpTrampoline;
|
||||
|
@ -302,13 +303,25 @@ bool foo(const void* a, const void* b)
|
|||
|
||||
void CPU::NewRec::AArch32Compiler::GenerateICacheCheckAndUpdate()
|
||||
{
|
||||
if (GetSegmentForAddress(m_block->pc) >= Segment::KSEG1)
|
||||
if (!m_block->HasFlag(CodeCache::BlockFlags::IsUsingICache))
|
||||
{
|
||||
armAsm->ldr(RARG1, PTR(&g_state.pending_ticks));
|
||||
armAsm->add(RARG1, RARG1, armCheckAddSubConstant(static_cast<u32>(m_block->uncached_fetch_ticks)));
|
||||
armAsm->str(RARG1, PTR(&g_state.pending_ticks));
|
||||
if (m_block->HasFlag(CodeCache::BlockFlags::NeedsDynamicFetchTicks))
|
||||
{
|
||||
armEmitFarLoad(armAsm, RARG2, GetFetchMemoryAccessTimePtr());
|
||||
armAsm->ldr(RARG1, PTR(&g_state.pending_ticks));
|
||||
armEmitMov(armAsm, RARG3, m_block->size);
|
||||
armAsm->mul(RARG2, RARG2, RARG3);
|
||||
armAsm->add(RARG1, RARG1, RARG2);
|
||||
armAsm->str(RARG1, PTR(&g_state.pending_ticks));
|
||||
}
|
||||
else
|
||||
{
|
||||
armAsm->ldr(RARG1, PTR(&g_state.pending_ticks));
|
||||
armAsm->add(RARG1, RARG1, armCheckAddSubConstant(static_cast<u32>(m_block->uncached_fetch_ticks)));
|
||||
armAsm->str(RARG1, PTR(&g_state.pending_ticks));
|
||||
}
|
||||
}
|
||||
else
|
||||
else if (m_block->icache_line_count > 0)
|
||||
{
|
||||
const auto& ticks_reg = RARG1;
|
||||
const auto& current_tag_reg = RARG2;
|
||||
|
|
|
@ -27,6 +27,7 @@ using namespace vixl::aarch64;
|
|||
|
||||
using CPU::Recompiler::armEmitCall;
|
||||
using CPU::Recompiler::armEmitCondBranch;
|
||||
using CPU::Recompiler::armEmitFarLoad;
|
||||
using CPU::Recompiler::armEmitJmp;
|
||||
using CPU::Recompiler::armEmitMov;
|
||||
using CPU::Recompiler::armGetJumpTrampoline;
|
||||
|
@ -274,13 +275,25 @@ void CPU::NewRec::AArch64Compiler::GenerateBlockProtectCheck(const u8* ram_ptr,
|
|||
|
||||
void CPU::NewRec::AArch64Compiler::GenerateICacheCheckAndUpdate()
|
||||
{
|
||||
if (GetSegmentForAddress(m_block->pc) >= Segment::KSEG1)
|
||||
if (!m_block->HasFlag(CodeCache::BlockFlags::IsUsingICache))
|
||||
{
|
||||
armAsm->ldr(RWARG1, PTR(&g_state.pending_ticks));
|
||||
armAsm->add(RWARG1, RWARG1, armCheckAddSubConstant(static_cast<u32>(m_block->uncached_fetch_ticks)));
|
||||
armAsm->str(RWARG1, PTR(&g_state.pending_ticks));
|
||||
if (m_block->HasFlag(CodeCache::BlockFlags::NeedsDynamicFetchTicks))
|
||||
{
|
||||
armEmitFarLoad(armAsm, RWARG2, GetFetchMemoryAccessTimePtr());
|
||||
armAsm->ldr(RWARG1, PTR(&g_state.pending_ticks));
|
||||
armEmitMov(armAsm, RWARG3, m_block->size);
|
||||
armAsm->mul(RWARG2, RWARG2, RWARG3);
|
||||
armAsm->add(RWARG1, RWARG1, RWARG2);
|
||||
armAsm->str(RWARG1, PTR(&g_state.pending_ticks));
|
||||
}
|
||||
else
|
||||
{
|
||||
armAsm->ldr(RWARG1, PTR(&g_state.pending_ticks));
|
||||
armAsm->add(RWARG1, RWARG1, armCheckAddSubConstant(static_cast<u32>(m_block->uncached_fetch_ticks)));
|
||||
armAsm->str(RWARG1, PTR(&g_state.pending_ticks));
|
||||
}
|
||||
}
|
||||
else
|
||||
else if (m_block->icache_line_count > 0)
|
||||
{
|
||||
const auto& ticks_reg = RWARG1;
|
||||
const auto& current_tag_reg = RWARG2;
|
||||
|
|
|
@ -40,6 +40,7 @@ using namespace biscuit;
|
|||
using CPU::Recompiler::rvEmitCall;
|
||||
using CPU::Recompiler::rvEmitDSExtW;
|
||||
using CPU::Recompiler::rvEmitDUExtW;
|
||||
using CPU::Recompiler::rvEmitFarLoad;
|
||||
using CPU::Recompiler::rvEmitJmp;
|
||||
using CPU::Recompiler::rvEmitMov;
|
||||
using CPU::Recompiler::rvEmitMov64;
|
||||
|
@ -130,6 +131,25 @@ u32 CPU::Recompiler::rvEmitCall(biscuit::Assembler* rvAsm, const void* ptr)
|
|||
return rvEmitJmp(rvAsm, ptr, biscuit::ra);
|
||||
}
|
||||
|
||||
void CPU::Recompiler::rvEmitFarLoad(biscuit::Assembler* rvAsm, const biscuit::GPR& reg, const void* addr,
|
||||
bool sign_extend_word)
|
||||
{
|
||||
const auto [hi, lo] = rvGetAddressImmediates(rvAsm->GetCursorPointer(), addr);
|
||||
rvAsm->AUIPC(reg, hi);
|
||||
if (sign_extend_word)
|
||||
rvAsm->LW(reg, lo, reg);
|
||||
else
|
||||
rvAsm->LWU(reg, lo, reg);
|
||||
}
|
||||
|
||||
void CPU::Recompiler::rvEmitFarStore(biscuit::Assembler* rvAsm, const biscuit::GPR& reg, const void* addr,
|
||||
const biscuit::GPR& tempreg)
|
||||
{
|
||||
const auto [hi, lo] = rvGetAddressImmediates(rvAsm->GetCursorPointer(), addr);
|
||||
rvAsm->AUIPC(tempreg, hi);
|
||||
rvAsm->SW(reg, lo, tempreg);
|
||||
}
|
||||
|
||||
void CPU::Recompiler::rvEmitSExtB(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs)
|
||||
{
|
||||
rvAsm->SLLI(rd, rs, 24);
|
||||
|
@ -525,13 +545,25 @@ void CPU::NewRec::RISCV64Compiler::GenerateBlockProtectCheck(const u8* ram_ptr,
|
|||
|
||||
void CPU::NewRec::RISCV64Compiler::GenerateICacheCheckAndUpdate()
|
||||
{
|
||||
if (GetSegmentForAddress(m_block->pc) >= Segment::KSEG1)
|
||||
if (!m_block->HasFlag(CodeCache::BlockFlags::IsUsingICache))
|
||||
{
|
||||
rvAsm->LW(RARG1, PTR(&g_state.pending_ticks));
|
||||
SafeADDIW(RARG1, RARG1, static_cast<u32>(m_block->uncached_fetch_ticks));
|
||||
rvAsm->SW(RARG1, PTR(&g_state.pending_ticks));
|
||||
if (m_block->HasFlag(CodeCache::BlockFlags::NeedsDynamicFetchTicks))
|
||||
{
|
||||
rvEmitFarLoad(rvAsm, RARG2, GetFetchMemoryAccessTimePtr());
|
||||
rvAsm->LW(RARG1, PTR(&g_state.pending_ticks));
|
||||
rvEmitMov(rvAsm, RARG3, m_block->size);
|
||||
rvAsm->MULW(RARG2, RARG2, RARG3);
|
||||
rvAsm->ADD(RARG1, RARG1, RARG2);
|
||||
rvAsm->SW(RARG1, PTR(&g_state.pending_ticks));
|
||||
}
|
||||
else
|
||||
{
|
||||
rvAsm->LW(RARG1, PTR(&g_state.pending_ticks));
|
||||
SafeADDIW(RARG1, RARG1, static_cast<u32>(m_block->uncached_fetch_ticks));
|
||||
rvAsm->SW(RARG1, PTR(&g_state.pending_ticks));
|
||||
}
|
||||
}
|
||||
else
|
||||
else if (m_block->icache_line_count > 0)
|
||||
{
|
||||
const auto& ticks_reg = RARG1;
|
||||
const auto& current_tag_reg = RARG2;
|
||||
|
|
|
@ -179,9 +179,18 @@ void CPU::NewRec::X64Compiler::GenerateBlockProtectCheck(const u8* ram_ptr, cons
|
|||
|
||||
void CPU::NewRec::X64Compiler::GenerateICacheCheckAndUpdate()
|
||||
{
|
||||
if (GetSegmentForAddress(m_block->pc) >= Segment::KSEG1)
|
||||
if (!m_block->HasFlag(CodeCache::BlockFlags::IsUsingICache))
|
||||
{
|
||||
cg->add(cg->dword[PTR(&g_state.pending_ticks)], static_cast<u32>(m_block->uncached_fetch_ticks));
|
||||
if (m_block->HasFlag(CodeCache::BlockFlags::NeedsDynamicFetchTicks))
|
||||
{
|
||||
cg->mov(cg->eax, m_block->size);
|
||||
cg->mul(cg->dword[cg->rip + GetFetchMemoryAccessTimePtr()]);
|
||||
cg->add(cg->dword[PTR(&g_state.pending_ticks)], cg->eax);
|
||||
}
|
||||
else
|
||||
{
|
||||
cg->add(cg->dword[PTR(&g_state.pending_ticks)], static_cast<u32>(m_block->uncached_fetch_ticks));
|
||||
}
|
||||
}
|
||||
else if (m_block->icache_line_count > 0)
|
||||
{
|
||||
|
|
|
@ -926,6 +926,14 @@ Value CodeGenerator::NotValue(const Value& val)
|
|||
return res;
|
||||
}
|
||||
|
||||
const TickCount* CodeGenerator::GetFetchMemoryAccessTimePtr() const
|
||||
{
|
||||
const TickCount* ptr =
|
||||
Bus::GetMemoryAccessTimePtr(m_block->pc & PHYSICAL_MEMORY_ADDRESS_MASK, MemoryAccessSize::Word);
|
||||
AssertMsg(ptr, "Address has dynamic fetch ticks");
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void CodeGenerator::GenerateExceptionExit(Instruction instruction, const CodeCache::InstructionInfo& info,
|
||||
Exception excode, Condition condition /* = Condition::Always */)
|
||||
{
|
||||
|
@ -996,8 +1004,7 @@ void CodeGenerator::BlockPrologue()
|
|||
EmitFunctionCall(nullptr, &CPU::HandleB0Syscall);
|
||||
}
|
||||
|
||||
if (m_block->uncached_fetch_ticks > 0 || m_block->icache_line_count > 0)
|
||||
EmitICacheCheckAndUpdate();
|
||||
EmitICacheCheckAndUpdate();
|
||||
|
||||
// we don't know the state of the last block, so assume load delays might be in progress
|
||||
// TODO: Pull load delay into register cache
|
||||
|
|
|
@ -198,6 +198,8 @@ public:
|
|||
Value XorValues(const Value& lhs, const Value& rhs);
|
||||
Value NotValue(const Value& val);
|
||||
|
||||
const TickCount* GetFetchMemoryAccessTimePtr() const;
|
||||
|
||||
// Raising exception if condition is true.
|
||||
void GenerateExceptionExit(Instruction instruction, const CodeCache::InstructionInfo& info, Exception excode,
|
||||
Condition condition = Condition::Always);
|
||||
|
|
|
@ -140,6 +140,20 @@ void CPU::Recompiler::armEmitCondBranch(vixl::aarch32::Assembler* armAsm, vixl::
|
|||
}
|
||||
}
|
||||
|
||||
void CPU::Recompiler::armEmitFarLoad(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register& reg,
|
||||
const void* addr)
|
||||
{
|
||||
armMoveAddressToReg(armAsm, reg, addr);
|
||||
armAsm->ldr(reg, vixl::aarch32::MemOperand(reg));
|
||||
}
|
||||
|
||||
void CPU::Recompiler::armEmitFarStore(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register& reg,
|
||||
const void* addr, const vixl::aarch64::Register& tempreg)
|
||||
{
|
||||
armMoveAddressToReg(armAsm, tempreg, addr);
|
||||
armAsm->str(reg, vixl::aarch32::MemOperand(tempreg));
|
||||
}
|
||||
|
||||
void CPU::CodeCache::DisassembleAndLogHostCode(const void* start, u32 size)
|
||||
{
|
||||
#ifdef ENABLE_HOST_DISASSEMBLY
|
||||
|
@ -1913,12 +1927,24 @@ void CodeGenerator::EmitCancelInterpreterLoadDelayForReg(Reg reg)
|
|||
|
||||
void CodeGenerator::EmitICacheCheckAndUpdate()
|
||||
{
|
||||
if (GetSegmentForAddress(m_pc) >= Segment::KSEG1)
|
||||
if (!m_block->HasFlag(CodeCache::BlockFlags::IsUsingICache))
|
||||
{
|
||||
EmitAddCPUStructField(OFFSETOF(State, pending_ticks),
|
||||
Value::FromConstantU32(static_cast<u32>(m_block->uncached_fetch_ticks)));
|
||||
if (m_block->HasFlag(CodeCache::BlockFlags::NeedsDynamicFetchTicks))
|
||||
{
|
||||
armEmitFarLoad(m_emit, RARG2, GetFetchMemoryAccessTimePtr());
|
||||
m_emit->ldr(RARG1, a32::MemOperand(GetCPUPtrReg(), OFFSETOF(State, pending_ticks)));
|
||||
m_emit->Mov(RARG3, m_block->size);
|
||||
m_emit->mul(RARG2, RARG2, RARG3);
|
||||
m_emit->add(RARG1, RARG1, RARG2);
|
||||
m_emit->str(RARG1, a32::MemOperand(GetCPUPtrReg(), OFFSETOF(State, pending_ticks)));
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitAddCPUStructField(OFFSETOF(State, pending_ticks),
|
||||
Value::FromConstantU32(static_cast<u32>(m_block->uncached_fetch_ticks)));
|
||||
}
|
||||
}
|
||||
else
|
||||
else if (m_block->icache_line_count > 0)
|
||||
{
|
||||
const auto& ticks_reg = a32::r0;
|
||||
const auto& current_tag_reg = a32::r1;
|
||||
|
|
|
@ -261,6 +261,61 @@ void CPU::Recompiler::armEmitCondBranch(a64::Assembler* armAsm, a64::Condition c
|
|||
}
|
||||
}
|
||||
|
||||
void CPU::Recompiler::armEmitFarLoad(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg,
|
||||
const void* addr, bool sign_extend_word)
|
||||
{
|
||||
const void* cur = armAsm->GetCursorAddress<const void*>();
|
||||
const void* current_code_ptr_page =
|
||||
reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(cur) & ~static_cast<uintptr_t>(0xFFF));
|
||||
const void* ptr_page =
|
||||
reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(addr) & ~static_cast<uintptr_t>(0xFFF));
|
||||
const s64 page_displacement = armGetPCDisplacement(current_code_ptr_page, ptr_page) >> 10;
|
||||
const u32 page_offset = static_cast<u32>(reinterpret_cast<uintptr_t>(addr) & 0xFFFu);
|
||||
a64::MemOperand memop;
|
||||
|
||||
const vixl::aarch64::Register xreg = reg.X();
|
||||
if (vixl::IsInt21(page_displacement))
|
||||
{
|
||||
armAsm->adrp(xreg, page_displacement);
|
||||
memop = vixl::aarch64::MemOperand(xreg, static_cast<int64_t>(page_offset));
|
||||
}
|
||||
else
|
||||
{
|
||||
armMoveAddressToReg(armAsm, xreg, addr);
|
||||
memop = vixl::aarch64::MemOperand(xreg);
|
||||
}
|
||||
|
||||
if (sign_extend_word)
|
||||
armAsm->ldrsw(reg, memop);
|
||||
else
|
||||
armAsm->ldr(reg, memop);
|
||||
}
|
||||
|
||||
void CPU::Recompiler::armEmitFarStore(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg,
|
||||
const void* addr, const vixl::aarch64::Register& tempreg)
|
||||
{
|
||||
DebugAssert(tempreg.IsX());
|
||||
|
||||
const void* cur = armAsm->GetCursorAddress<const void*>();
|
||||
const void* current_code_ptr_page =
|
||||
reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(cur) & ~static_cast<uintptr_t>(0xFFF));
|
||||
const void* ptr_page =
|
||||
reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(addr) & ~static_cast<uintptr_t>(0xFFF));
|
||||
const s64 page_displacement = armGetPCDisplacement(current_code_ptr_page, ptr_page) >> 10;
|
||||
const u32 page_offset = static_cast<u32>(reinterpret_cast<uintptr_t>(addr) & 0xFFFu);
|
||||
|
||||
if (vixl::IsInt21(page_displacement))
|
||||
{
|
||||
armAsm->adrp(tempreg, page_displacement);
|
||||
armAsm->str(reg, vixl::aarch64::MemOperand(tempreg, static_cast<int64_t>(page_offset)));
|
||||
}
|
||||
else
|
||||
{
|
||||
armMoveAddressToReg(armAsm, tempreg, addr);
|
||||
armAsm->str(reg, vixl::aarch64::MemOperand(tempreg));
|
||||
}
|
||||
}
|
||||
|
||||
u8* CPU::Recompiler::armGetJumpTrampoline(const void* target)
|
||||
{
|
||||
auto it = s_trampoline_targets.find(target);
|
||||
|
@ -2240,12 +2295,24 @@ void CodeGenerator::EmitCancelInterpreterLoadDelayForReg(Reg reg)
|
|||
|
||||
void CodeGenerator::EmitICacheCheckAndUpdate()
|
||||
{
|
||||
if (GetSegmentForAddress(m_pc) >= Segment::KSEG1)
|
||||
if (!m_block->HasFlag(CodeCache::BlockFlags::IsUsingICache))
|
||||
{
|
||||
EmitAddCPUStructField(OFFSETOF(State, pending_ticks),
|
||||
Value::FromConstantU32(static_cast<u32>(m_block->uncached_fetch_ticks)));
|
||||
if (m_block->HasFlag(CodeCache::BlockFlags::NeedsDynamicFetchTicks))
|
||||
{
|
||||
armEmitFarLoad(m_emit, RWARG2, GetFetchMemoryAccessTimePtr());
|
||||
m_emit->Ldr(RWARG1, a64::MemOperand(GetCPUPtrReg(), OFFSETOF(State, pending_ticks)));
|
||||
m_emit->Mov(RWARG3, m_block->size);
|
||||
m_emit->Mul(RWARG2, RWARG2, RWARG3);
|
||||
m_emit->Add(RWARG1, RWARG1, RWARG2);
|
||||
m_emit->Str(RWARG1, a64::MemOperand(GetCPUPtrReg(), OFFSETOF(State, pending_ticks)));
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitAddCPUStructField(OFFSETOF(State, pending_ticks),
|
||||
Value::FromConstantU32(static_cast<u32>(m_block->uncached_fetch_ticks)));
|
||||
}
|
||||
}
|
||||
else
|
||||
else if (m_block->icache_line_count > 0)
|
||||
{
|
||||
const auto& ticks_reg = a64::w0;
|
||||
const auto& current_tag_reg = a64::w1;
|
||||
|
|
|
@ -2782,12 +2782,21 @@ void CodeGenerator::EmitCancelInterpreterLoadDelayForReg(Reg reg)
|
|||
|
||||
void CodeGenerator::EmitICacheCheckAndUpdate()
|
||||
{
|
||||
if (GetSegmentForAddress(m_pc) >= Segment::KSEG1)
|
||||
if (!m_block->HasFlag(CodeCache::BlockFlags::IsUsingICache))
|
||||
{
|
||||
m_emit->add(m_emit->dword[GetCPUPtrReg() + OFFSETOF(State, pending_ticks)],
|
||||
static_cast<u32>(m_block->uncached_fetch_ticks));
|
||||
if (m_block->HasFlag(CodeCache::BlockFlags::NeedsDynamicFetchTicks))
|
||||
{
|
||||
m_emit->mov(m_emit->eax, m_block->size);
|
||||
m_emit->mul(m_emit->dword[m_emit->rip + GetFetchMemoryAccessTimePtr()]);
|
||||
m_emit->add(m_emit->dword[GetCPUPtrReg() + OFFSETOF(State, pending_ticks)], m_emit->eax);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_emit->add(m_emit->dword[GetCPUPtrReg() + OFFSETOF(State, pending_ticks)],
|
||||
static_cast<u32>(m_block->uncached_fetch_ticks));
|
||||
}
|
||||
}
|
||||
else
|
||||
else if (m_block->icache_line_count > 0)
|
||||
{
|
||||
VirtualMemoryAddress current_pc = m_pc & ICACHE_TAG_ADDRESS_MASK;
|
||||
for (u32 i = 0; i < m_block->icache_line_count; i++, current_pc += ICACHE_LINE_SIZE)
|
||||
|
|
|
@ -93,6 +93,9 @@ void armEmitMov(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register&
|
|||
void armEmitJmp(vixl::aarch32::Assembler* armAsm, const void* ptr, bool force_inline);
|
||||
void armEmitCall(vixl::aarch32::Assembler* armAsm, const void* ptr, bool force_inline);
|
||||
void armEmitCondBranch(vixl::aarch32::Assembler* armAsm, vixl::aarch32::Condition cond, const void* ptr);
|
||||
void armEmitFarLoad(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register& reg, const void* addr);
|
||||
void armEmitFarStore(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register& reg, const void* addr,
|
||||
const vixl::aarch32::Register& tempreg = RSCRATCH);
|
||||
u8* armGetJumpTrampoline(const void* target);
|
||||
|
||||
} // namespace CPU::Recompiler
|
||||
|
@ -129,6 +132,10 @@ void armEmitMov(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register&
|
|||
void armEmitJmp(vixl::aarch64::Assembler* armAsm, const void* ptr, bool force_inline);
|
||||
void armEmitCall(vixl::aarch64::Assembler* armAsm, const void* ptr, bool force_inline);
|
||||
void armEmitCondBranch(vixl::aarch64::Assembler* armAsm, vixl::aarch64::Condition cond, const void* ptr);
|
||||
void armEmitFarLoad(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg, const void* addr,
|
||||
bool sign_extend_word = false);
|
||||
void armEmitFarStore(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg, const void* addr,
|
||||
const vixl::aarch64::Register& tempreg = RXSCRATCH);
|
||||
u8* armGetJumpTrampoline(const void* target);
|
||||
|
||||
} // namespace CPU::Recompiler
|
||||
|
@ -157,8 +164,11 @@ std::pair<s32, s32> rvGetAddressImmediates(const void* cur, const void* target);
|
|||
void rvMoveAddressToReg(biscuit::Assembler* armAsm, const biscuit::GPR& reg, const void* addr);
|
||||
void rvEmitMov(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, u32 imm);
|
||||
void rvEmitMov64(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& scratch, u64 imm);
|
||||
u32 rvEmitJmp(biscuit::Assembler* armAsm, const void* ptr, const biscuit::GPR& link_reg = biscuit::zero);
|
||||
u32 rvEmitCall(biscuit::Assembler* armAsm, const void* ptr);
|
||||
u32 rvEmitJmp(biscuit::Assembler* rvAsm, const void* ptr, const biscuit::GPR& link_reg = biscuit::zero);
|
||||
u32 rvEmitCall(biscuit::Assembler* rvAsm, const void* ptr);
|
||||
void rvEmitFarLoad(biscuit::Assembler* rvAsm, const biscuit::GPR& reg, const void* addr, bool sign_extend_word = false);
|
||||
void rvEmitFarStore(biscuit::Assembler* rvAsm, const biscuit::GPR& reg, const void* addr,
|
||||
const biscuit::GPR& tempreg = RSCRATCH);
|
||||
void rvEmitSExtB(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> word
|
||||
void rvEmitUExtB(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> word
|
||||
void rvEmitSExtH(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> word
|
||||
|
|
Loading…
Reference in a new issue