CPU/NewRec: Add speculative constants

This commit is contained in:
Stenzek 2023-10-21 22:12:00 +10:00
parent b3cbe5a7ee
commit 6592cafadc
No known key found for this signature in database
12 changed files with 858 additions and 225 deletions

View file

@ -66,6 +66,7 @@ static void FillBlockRegInfo(Block* block);
static void CopyRegInfo(InstructionInfo* dst, const InstructionInfo* src);
static void SetRegAccess(InstructionInfo* inst, Reg reg, bool write);
static void AddBlockToPageList(Block* block);
static void RemoveBlockFromPageList(Block* block);
static Common::PageFaultHandler::HandlerResult ExceptionHandler(void* exception_pc, void* fault_address, bool is_write);
@ -526,7 +527,7 @@ bool CPU::CodeCache::IsBlockCodeCurrent(const Block* block)
bool CPU::CodeCache::RevalidateBlock(Block* block)
{
DebugAssert(block->state != BlockState::Valid);
DebugAssert(AddressInRAM(block->pc));
DebugAssert(AddressInRAM(block->pc) || block->state == BlockState::NeedsRecompile);
if (block->state >= BlockState::NeedsRecompile)
return false;
@ -569,6 +570,39 @@ void CPU::CodeCache::AddBlockToPageList(Block* block)
}
}
void CPU::CodeCache::RemoveBlockFromPageList(Block* block)
{
DebugAssert(block->size > 0);
if (!AddressInRAM(block->pc) || block->protection != PageProtectionMode::WriteProtected)
return;
const u32 page_idx = block->StartPageIndex();
PageProtectionInfo& entry = s_page_protection[page_idx];
// unlink from list
Block* prev_block = nullptr;
Block* cur_block = entry.first_block_in_page;
while (cur_block)
{
if (cur_block != block)
{
prev_block = cur_block;
cur_block = cur_block->next_block_in_page;
continue;
}
if (prev_block)
prev_block->next_block_in_page = cur_block->next_block_in_page;
else
entry.first_block_in_page = cur_block->next_block_in_page;
if (!cur_block->next_block_in_page)
entry.last_block_in_page = prev_block;
cur_block->next_block_in_page = nullptr;
break;
}
}
void CPU::CodeCache::InvalidateBlocksWithPageIndex(u32 index)
{
DebugAssert(index < Bus::RAM_8MB_CODE_PAGE_COUNT);
@ -1480,13 +1514,14 @@ void CPU::CodeCache::AddLoadStoreInfo(void* code_address, u32 code_size, u32 gue
LoadstoreBackpatchInfo info;
info.thunk_address = thunk_address;
info.guest_pc = guest_pc;
info.guest_block = 0;
info.code_size = static_cast<u8>(code_size);
s_fastmem_backpatch_info.emplace(code_address, info);
}
void CPU::CodeCache::AddLoadStoreInfo(void* code_address, u32 code_size, u32 guest_pc, TickCount cycles,
u32 gpr_bitmask, u8 address_register, u8 data_register, MemoryAccessSize size,
bool is_signed, bool is_load)
void CPU::CodeCache::AddLoadStoreInfo(void* code_address, u32 code_size, u32 guest_pc, u32 guest_block,
TickCount cycles, u32 gpr_bitmask, u8 address_register, u8 data_register,
MemoryAccessSize size, bool is_signed, bool is_load)
{
DebugAssert(code_size < std::numeric_limits<u8>::max());
DebugAssert(cycles >= 0 && cycles < std::numeric_limits<u16>::max());
@ -1498,6 +1533,7 @@ void CPU::CodeCache::AddLoadStoreInfo(void* code_address, u32 code_size, u32 gue
LoadstoreBackpatchInfo info;
info.thunk_address = nullptr;
info.guest_pc = guest_pc;
info.guest_block = guest_block;
info.gpr_bitmask = gpr_bitmask;
info.cycles = static_cast<u16>(cycles);
info.address_register = address_register;
@ -1562,7 +1598,22 @@ Common::PageFaultHandler::HandlerResult CPU::CodeCache::HandleFastmemException(v
BackpatchLoadStore(exception_pc, info);
// TODO: queue block for recompilation later
// queue block for recompilation later
if (g_settings.cpu_execution_mode == CPUExecutionMode::NewRec)
{
Block* block = LookupBlock(info.guest_block);
if (block)
{
// This is a bit annoying, we have to remove it from the page list if it's a RAM block.
Log_DevFmt("Queuing block {:08X} for recompilation due to backpatch", block->pc);
RemoveBlockFromPageList(block);
InvalidateBlock(block, BlockState::NeedsRecompile);
// Need to reset the recompile count, otherwise it'll get trolled into an interpreter fallback.
block->compile_frame = System::GetFrameNumber();
block->compile_count = 1;
}
}
// and store the pc in the faulting list, so that we don't emit another fastmem loadstore
s_fastmem_faulting_pcs.insert(info.guest_pc);
@ -1570,6 +1621,11 @@ Common::PageFaultHandler::HandlerResult CPU::CodeCache::HandleFastmemException(v
return Common::PageFaultHandler::HandlerResult::ContinueExecution;
}
bool CPU::CodeCache::HasPreviouslyFaultedOnPC(u32 guest_pc)
{
return (s_fastmem_faulting_pcs.find(guest_pc) != s_fastmem_faulting_pcs.end());
}
void CPU::CodeCache::BackpatchLoadStore(void* host_pc, const LoadstoreBackpatchInfo& info)
{
s_code_buffer.WriteProtect(false);

View file

@ -130,7 +130,7 @@ struct alignas(16) Block
// links to previous/next block within page
Block* next_block_in_page;
BlockLinkMap::iterator exit_links[MAX_BLOCK_EXIT_LINKS];
u8 num_exit_links;
@ -196,12 +196,17 @@ struct LoadstoreBackpatchInfo
};
u32 guest_pc;
u32 guest_block;
u8 code_size;
MemoryAccessSize AccessSize() const { return static_cast<MemoryAccessSize>(size); }
u32 AccessSizeInBytes() const { return 1u << size; }
};
static_assert(sizeof(LoadstoreBackpatchInfo) == 16);
#ifdef CPU_ARCH_ARM32
static_assert(sizeof(LoadstoreBackpatchInfo) == 20);
#else
static_assert(sizeof(LoadstoreBackpatchInfo) == 24);
#endif
static inline bool AddressInRAM(VirtualMemoryAddress pc)
{
@ -248,8 +253,10 @@ void DiscardAndRecompileBlock(u32 start_pc);
const void* CreateBlockLink(Block* from_block, void* code, u32 newpc);
void AddLoadStoreInfo(void* code_address, u32 code_size, u32 guest_pc, const void* thunk_address);
void AddLoadStoreInfo(void* code_address, u32 code_size, u32 guest_pc, TickCount cycles, u32 gpr_bitmask,
u8 address_register, u8 data_register, MemoryAccessSize size, bool is_signed, bool is_load);
void AddLoadStoreInfo(void* code_address, u32 code_size, u32 guest_pc, u32 guest_block, TickCount cycles,
u32 gpr_bitmask, u8 address_register, u8 data_register, MemoryAccessSize size, bool is_signed,
bool is_load);
bool HasPreviouslyFaultedOnPC(u32 guest_pc);
u32 EmitASMFunctions(void* code, u32 code_size);
u32 EmitJump(void* code, const void* dst, bool flush_icache);

View file

@ -59,6 +59,8 @@ void CPU::NewRec::Compiler::Reset(CodeCache::Block* block, u8* code_buffer, u32
m_load_delay_dirty = EMULATE_LOAD_DELAYS;
m_load_delay_register = Reg::count;
m_load_delay_value_register = NUM_HOST_REGS;
InitSpeculativeRegs();
}
void CPU::NewRec::Compiler::BeginBlock()
@ -133,6 +135,7 @@ const void* CPU::NewRec::Compiler::CompileBlock(CodeCache::Block* block, u32* ho
DebugAssert(!IsHostRegAllocated(i));
for (u32 i = 1; i < static_cast<u32>(Reg::count); i++)
DebugAssert(!m_constant_regs_dirty.test(i) && !m_constant_regs_valid.test(i));
m_speculative_constants.memory.clear();
u32 code_size, far_code_size;
const void* code = EndCompile(&code_size, &far_code_size);
@ -494,7 +497,7 @@ bool CPU::NewRec::Compiler::TrySwapDelaySlot(Reg rs, Reg rt, Reg rd)
is_safe:
#ifdef _DEBUG
Log_DevFmt("Swapping delay slot {:08X} {}", m_current_instruction_pc + 4, disasm);
Log_DebugFmt("Swapping delay slot {:08X} {}", m_current_instruction_pc + 4, disasm);
#endif
CompileBranchDelaySlot();
@ -506,7 +509,7 @@ is_safe:
is_unsafe:
#ifdef _DEBUG
Log_DevFmt("NOT swapping delay slot {:08X} {}", m_current_instruction_pc + 4, disasm);
Log_DebugFmt("NOT swapping delay slot {:08X} {}", m_current_instruction_pc + 4, disasm);
#endif
return false;
@ -1079,6 +1082,9 @@ void CPU::NewRec::Compiler::Flush(u32 flags)
FlushConstantRegs(false);
}
}
if (flags & FLUSH_INVALIDATE_SPECULATIVE_CONSTANTS)
InvalidateSpeculativeValues();
}
void CPU::NewRec::Compiler::FlushConstantReg(Reg r)
@ -1161,9 +1167,9 @@ void CPU::NewRec::Compiler::AddLoadStoreInfo(void* code_address, u32 code_size,
gpr_bitmask |= (1u << i);
}
CPU::CodeCache::AddLoadStoreInfo(code_address, code_size, m_current_instruction_pc, m_cycles, gpr_bitmask,
static_cast<u8>(address_register), static_cast<u8>(data_register), size, is_signed,
is_load);
CPU::CodeCache::AddLoadStoreInfo(code_address, code_size, m_current_instruction_pc, m_block->pc, m_cycles,
gpr_bitmask, static_cast<u8>(address_register), static_cast<u8>(data_register), size,
is_signed, is_load);
}
void CPU::NewRec::Compiler::CompileInstruction()
@ -1194,34 +1200,34 @@ void CPU::NewRec::Compiler::CompileInstruction()
{
switch (inst->r.funct)
{
case InstructionFunct::sll: CompileTemplate(&Compiler::Compile_sll_const, &Compiler::Compile_sll, PGXPFN(CPU_SLL), TF_WRITES_D | TF_READS_T); break;
case InstructionFunct::srl: CompileTemplate(&Compiler::Compile_srl_const, &Compiler::Compile_srl, PGXPFN(CPU_SRL), TF_WRITES_D | TF_READS_T); break;
case InstructionFunct::sra: CompileTemplate(&Compiler::Compile_sra_const, &Compiler::Compile_sra, PGXPFN(CPU_SRA), TF_WRITES_D | TF_READS_T); break;
case InstructionFunct::sllv: CompileTemplate(&Compiler::Compile_sllv_const, &Compiler::Compile_sllv, PGXPFN(CPU_SLLV), TF_WRITES_D | TF_READS_S | TF_READS_T); break;
case InstructionFunct::srlv: CompileTemplate(&Compiler::Compile_srlv_const, &Compiler::Compile_srlv, PGXPFN(CPU_SRLV), TF_WRITES_D | TF_READS_S | TF_READS_T); break;
case InstructionFunct::srav: CompileTemplate(&Compiler::Compile_srav_const, &Compiler::Compile_srav, PGXPFN(CPU_SRAV), TF_WRITES_D | TF_READS_S | TF_READS_T); break;
case InstructionFunct::sll: CompileTemplate(&Compiler::Compile_sll_const, &Compiler::Compile_sll, PGXPFN(CPU_SLL), TF_WRITES_D | TF_READS_T); SpecExec_sll(); break;
case InstructionFunct::srl: CompileTemplate(&Compiler::Compile_srl_const, &Compiler::Compile_srl, PGXPFN(CPU_SRL), TF_WRITES_D | TF_READS_T); SpecExec_srl(); break;
case InstructionFunct::sra: CompileTemplate(&Compiler::Compile_sra_const, &Compiler::Compile_sra, PGXPFN(CPU_SRA), TF_WRITES_D | TF_READS_T); SpecExec_sra(); break;
case InstructionFunct::sllv: CompileTemplate(&Compiler::Compile_sllv_const, &Compiler::Compile_sllv, PGXPFN(CPU_SLLV), TF_WRITES_D | TF_READS_S | TF_READS_T); SpecExec_sllv(); break;
case InstructionFunct::srlv: CompileTemplate(&Compiler::Compile_srlv_const, &Compiler::Compile_srlv, PGXPFN(CPU_SRLV), TF_WRITES_D | TF_READS_S | TF_READS_T); SpecExec_srlv(); break;
case InstructionFunct::srav: CompileTemplate(&Compiler::Compile_srav_const, &Compiler::Compile_srav, PGXPFN(CPU_SRAV), TF_WRITES_D | TF_READS_S | TF_READS_T); SpecExec_srav(); break;
case InstructionFunct::jr: CompileTemplate(&Compiler::Compile_jr_const, &Compiler::Compile_jr, nullptr, TF_READS_S); break;
case InstructionFunct::jalr: CompileTemplate(&Compiler::Compile_jalr_const, &Compiler::Compile_jalr, nullptr, /*TF_WRITES_D |*/ TF_READS_S | TF_NO_NOP); break;
case InstructionFunct::jalr: CompileTemplate(&Compiler::Compile_jalr_const, &Compiler::Compile_jalr, nullptr, /*TF_WRITES_D |*/ TF_READS_S | TF_NO_NOP); SpecExec_jalr(); break;
case InstructionFunct::syscall: Compile_syscall(); break;
case InstructionFunct::break_: Compile_break(); break;
case InstructionFunct::mfhi: CompileMoveRegTemplate(inst->r.rd, Reg::hi, g_settings.gpu_pgxp_cpu); break;
case InstructionFunct::mthi: CompileMoveRegTemplate(Reg::hi, inst->r.rs, g_settings.gpu_pgxp_cpu); break;
case InstructionFunct::mflo: CompileMoveRegTemplate(inst->r.rd, Reg::lo, g_settings.gpu_pgxp_cpu); break;
case InstructionFunct::mtlo: CompileMoveRegTemplate(Reg::lo, inst->r.rs, g_settings.gpu_pgxp_cpu); break;
case InstructionFunct::mult: CompileTemplate(&Compiler::Compile_mult_const, &Compiler::Compile_mult, PGXPFN(CPU_MULT), TF_READS_S | TF_READS_T | TF_WRITES_LO | TF_WRITES_HI | TF_COMMUTATIVE); break;
case InstructionFunct::multu: CompileTemplate(&Compiler::Compile_multu_const, &Compiler::Compile_multu, PGXPFN(CPU_MULTU), TF_READS_S | TF_READS_T | TF_WRITES_LO | TF_WRITES_HI | TF_COMMUTATIVE); break;
case InstructionFunct::div: CompileTemplate(&Compiler::Compile_div_const, &Compiler::Compile_div, PGXPFN(CPU_DIV), TF_READS_S | TF_READS_T | TF_WRITES_LO | TF_WRITES_HI); break;
case InstructionFunct::divu: CompileTemplate(&Compiler::Compile_divu_const, &Compiler::Compile_divu, PGXPFN(CPU_DIVU), TF_READS_S | TF_READS_T | TF_WRITES_LO | TF_WRITES_HI); break;
case InstructionFunct::add: CompileTemplate(&Compiler::Compile_add_const, &Compiler::Compile_add, PGXPFN(CPU_ADD), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_CAN_OVERFLOW | TF_RENAME_WITH_ZERO_T); break;
case InstructionFunct::addu: CompileTemplate(&Compiler::Compile_addu_const, &Compiler::Compile_addu, PGXPFN(CPU_ADD), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_T); break;
case InstructionFunct::sub: CompileTemplate(&Compiler::Compile_sub_const, &Compiler::Compile_sub, PGXPFN(CPU_SUB), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_CAN_OVERFLOW | TF_RENAME_WITH_ZERO_T); break;
case InstructionFunct::subu: CompileTemplate(&Compiler::Compile_subu_const, &Compiler::Compile_subu, PGXPFN(CPU_SUB), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_RENAME_WITH_ZERO_T); break;
case InstructionFunct::and_: CompileTemplate(&Compiler::Compile_and_const, &Compiler::Compile_and, PGXPFN(CPU_AND_), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE); break;
case InstructionFunct::or_: CompileTemplate(&Compiler::Compile_or_const, &Compiler::Compile_or, PGXPFN(CPU_OR_), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_T); break;
case InstructionFunct::xor_: CompileTemplate(&Compiler::Compile_xor_const, &Compiler::Compile_xor, PGXPFN(CPU_XOR_), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_T); break;
case InstructionFunct::nor: CompileTemplate(&Compiler::Compile_nor_const, &Compiler::Compile_nor, PGXPFN(CPU_NOR), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE); break;
case InstructionFunct::slt: CompileTemplate(&Compiler::Compile_slt_const, &Compiler::Compile_slt, PGXPFN(CPU_SLT), TF_WRITES_D | TF_READS_T | TF_READS_S); break;
case InstructionFunct::sltu: CompileTemplate(&Compiler::Compile_sltu_const, &Compiler::Compile_sltu, PGXPFN(CPU_SLTU), TF_WRITES_D | TF_READS_T | TF_READS_S); break;
case InstructionFunct::mfhi: SpecCopyReg(inst->r.rd, Reg::hi); CompileMoveRegTemplate(inst->r.rd, Reg::hi, g_settings.gpu_pgxp_cpu); break;
case InstructionFunct::mthi: SpecCopyReg(Reg::hi, inst->r.rs); CompileMoveRegTemplate(Reg::hi, inst->r.rs, g_settings.gpu_pgxp_cpu); break;
case InstructionFunct::mflo: SpecCopyReg(inst->r.rd, Reg::lo); CompileMoveRegTemplate(inst->r.rd, Reg::lo, g_settings.gpu_pgxp_cpu); break;
case InstructionFunct::mtlo: SpecCopyReg(Reg::lo, inst->r.rs); CompileMoveRegTemplate(Reg::lo, inst->r.rs, g_settings.gpu_pgxp_cpu); break;
case InstructionFunct::mult: CompileTemplate(&Compiler::Compile_mult_const, &Compiler::Compile_mult, PGXPFN(CPU_MULT), TF_READS_S | TF_READS_T | TF_WRITES_LO | TF_WRITES_HI | TF_COMMUTATIVE); SpecExec_mult(); break;
case InstructionFunct::multu: CompileTemplate(&Compiler::Compile_multu_const, &Compiler::Compile_multu, PGXPFN(CPU_MULTU), TF_READS_S | TF_READS_T | TF_WRITES_LO | TF_WRITES_HI | TF_COMMUTATIVE); SpecExec_multu(); break;
case InstructionFunct::div: CompileTemplate(&Compiler::Compile_div_const, &Compiler::Compile_div, PGXPFN(CPU_DIV), TF_READS_S | TF_READS_T | TF_WRITES_LO | TF_WRITES_HI); SpecExec_div(); break;
case InstructionFunct::divu: CompileTemplate(&Compiler::Compile_divu_const, &Compiler::Compile_divu, PGXPFN(CPU_DIVU), TF_READS_S | TF_READS_T | TF_WRITES_LO | TF_WRITES_HI); SpecExec_divu(); break;
case InstructionFunct::add: CompileTemplate(&Compiler::Compile_add_const, &Compiler::Compile_add, PGXPFN(CPU_ADD), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_CAN_OVERFLOW | TF_RENAME_WITH_ZERO_T); SpecExec_add(); break;
case InstructionFunct::addu: CompileTemplate(&Compiler::Compile_addu_const, &Compiler::Compile_addu, PGXPFN(CPU_ADD), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_T); SpecExec_addu(); break;
case InstructionFunct::sub: CompileTemplate(&Compiler::Compile_sub_const, &Compiler::Compile_sub, PGXPFN(CPU_SUB), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_CAN_OVERFLOW | TF_RENAME_WITH_ZERO_T); SpecExec_sub(); break;
case InstructionFunct::subu: CompileTemplate(&Compiler::Compile_subu_const, &Compiler::Compile_subu, PGXPFN(CPU_SUB), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_RENAME_WITH_ZERO_T); SpecExec_subu(); break;
case InstructionFunct::and_: CompileTemplate(&Compiler::Compile_and_const, &Compiler::Compile_and, PGXPFN(CPU_AND_), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE); SpecExec_and(); break;
case InstructionFunct::or_: CompileTemplate(&Compiler::Compile_or_const, &Compiler::Compile_or, PGXPFN(CPU_OR_), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_T); SpecExec_or(); break;
case InstructionFunct::xor_: CompileTemplate(&Compiler::Compile_xor_const, &Compiler::Compile_xor, PGXPFN(CPU_XOR_), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_T); SpecExec_xor(); break;
case InstructionFunct::nor: CompileTemplate(&Compiler::Compile_nor_const, &Compiler::Compile_nor, PGXPFN(CPU_NOR), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE); SpecExec_nor(); break;
case InstructionFunct::slt: CompileTemplate(&Compiler::Compile_slt_const, &Compiler::Compile_slt, PGXPFN(CPU_SLT), TF_WRITES_D | TF_READS_T | TF_READS_S); SpecExec_slt(); break;
case InstructionFunct::sltu: CompileTemplate(&Compiler::Compile_sltu_const, &Compiler::Compile_sltu, PGXPFN(CPU_SLTU), TF_WRITES_D | TF_READS_T | TF_READS_S); SpecExec_sltu(); break;
default: Panic("fixme funct"); break;
}
@ -1229,35 +1235,35 @@ void CPU::NewRec::Compiler::CompileInstruction()
break;
case InstructionOp::j: Compile_j(); break;
case InstructionOp::jal: Compile_jal(); break;
case InstructionOp::jal: Compile_jal(); SpecExec_jal(); break;
case InstructionOp::b: CompileTemplate(&Compiler::Compile_b_const, &Compiler::Compile_b, nullptr, TF_READS_S | TF_CAN_SWAP_DELAY_SLOT); break;
case InstructionOp::b: CompileTemplate(&Compiler::Compile_b_const, &Compiler::Compile_b, nullptr, TF_READS_S | TF_CAN_SWAP_DELAY_SLOT); SpecExec_b(); break;
case InstructionOp::blez: CompileTemplate(&Compiler::Compile_blez_const, &Compiler::Compile_blez, nullptr, TF_READS_S | TF_CAN_SWAP_DELAY_SLOT); break;
case InstructionOp::bgtz: CompileTemplate(&Compiler::Compile_bgtz_const, &Compiler::Compile_bgtz, nullptr, TF_READS_S | TF_CAN_SWAP_DELAY_SLOT); break;
case InstructionOp::beq: CompileTemplate(&Compiler::Compile_beq_const, &Compiler::Compile_beq, nullptr, TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_CAN_SWAP_DELAY_SLOT); break;
case InstructionOp::bne: CompileTemplate(&Compiler::Compile_bne_const, &Compiler::Compile_bne, nullptr, TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_CAN_SWAP_DELAY_SLOT); break;
case InstructionOp::addi: CompileTemplate(&Compiler::Compile_addi_const, &Compiler::Compile_addi, PGXPFN(CPU_ADDI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE | TF_CAN_OVERFLOW | TF_RENAME_WITH_ZERO_IMM); break;
case InstructionOp::addiu: CompileTemplate(&Compiler::Compile_addiu_const, &Compiler::Compile_addiu, PGXPFN(CPU_ADDI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_IMM); break;
case InstructionOp::slti: CompileTemplate(&Compiler::Compile_slti_const, &Compiler::Compile_slti, PGXPFN(CPU_SLTI), TF_WRITES_T | TF_READS_S); break;
case InstructionOp::sltiu: CompileTemplate(&Compiler::Compile_sltiu_const, &Compiler::Compile_sltiu, PGXPFN(CPU_SLTIU), TF_WRITES_T | TF_READS_S); break;
case InstructionOp::andi: CompileTemplate(&Compiler::Compile_andi_const, &Compiler::Compile_andi, PGXPFN(CPU_ANDI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE); break;
case InstructionOp::ori: CompileTemplate(&Compiler::Compile_ori_const, &Compiler::Compile_ori, PGXPFN(CPU_ORI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_IMM); break;
case InstructionOp::xori: CompileTemplate(&Compiler::Compile_xori_const, &Compiler::Compile_xori, PGXPFN(CPU_XORI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_IMM); break;
case InstructionOp::lui: Compile_lui(); break;
case InstructionOp::addi: CompileTemplate(&Compiler::Compile_addi_const, &Compiler::Compile_addi, PGXPFN(CPU_ADDI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE | TF_CAN_OVERFLOW | TF_RENAME_WITH_ZERO_IMM); SpecExec_addi(); break;
case InstructionOp::addiu: CompileTemplate(&Compiler::Compile_addiu_const, &Compiler::Compile_addiu, PGXPFN(CPU_ADDI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_IMM); SpecExec_addiu(); break;
case InstructionOp::slti: CompileTemplate(&Compiler::Compile_slti_const, &Compiler::Compile_slti, PGXPFN(CPU_SLTI), TF_WRITES_T | TF_READS_S); SpecExec_slti(); break;
case InstructionOp::sltiu: CompileTemplate(&Compiler::Compile_sltiu_const, &Compiler::Compile_sltiu, PGXPFN(CPU_SLTIU), TF_WRITES_T | TF_READS_S); SpecExec_sltiu(); break;
case InstructionOp::andi: CompileTemplate(&Compiler::Compile_andi_const, &Compiler::Compile_andi, PGXPFN(CPU_ANDI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE); SpecExec_andi(); break;
case InstructionOp::ori: CompileTemplate(&Compiler::Compile_ori_const, &Compiler::Compile_ori, PGXPFN(CPU_ORI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_IMM); SpecExec_ori(); break;
case InstructionOp::xori: CompileTemplate(&Compiler::Compile_xori_const, &Compiler::Compile_xori, PGXPFN(CPU_XORI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_IMM); SpecExec_xori(); break;
case InstructionOp::lui: Compile_lui(); SpecExec_lui(); break;
case InstructionOp::lb: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::Byte, false, true, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); break;
case InstructionOp::lbu: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::Byte, false, false, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); break;
case InstructionOp::lh: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::HalfWord, false, true, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); break;
case InstructionOp::lhu: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::HalfWord, false, false, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); break;
case InstructionOp::lw: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::Word, false, false, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); break;
case InstructionOp::lwl: CompileLoadStoreTemplate(&Compiler::Compile_lwx, MemoryAccessSize::Word, false, false, TF_READS_S | /*TF_READS_T | TF_WRITES_T | */TF_LOAD_DELAY); break;
case InstructionOp::lwr: CompileLoadStoreTemplate(&Compiler::Compile_lwx, MemoryAccessSize::Word, false, false, TF_READS_S | /*TF_READS_T | TF_WRITES_T | */TF_LOAD_DELAY); break;
case InstructionOp::sb: CompileLoadStoreTemplate(&Compiler::Compile_sxx, MemoryAccessSize::Byte, true, false, TF_READS_S | TF_READS_T); break;
case InstructionOp::sh: CompileLoadStoreTemplate(&Compiler::Compile_sxx, MemoryAccessSize::HalfWord, true, false, TF_READS_S | TF_READS_T); break;
case InstructionOp::sw: CompileLoadStoreTemplate(&Compiler::Compile_sxx, MemoryAccessSize::Word, true, false, TF_READS_S | TF_READS_T); break;
case InstructionOp::swl: CompileLoadStoreTemplate(&Compiler::Compile_swx, MemoryAccessSize::Word, false, false, TF_READS_S | /*TF_READS_T | TF_WRITES_T | */TF_LOAD_DELAY); break;
case InstructionOp::swr: CompileLoadStoreTemplate(&Compiler::Compile_swx, MemoryAccessSize::Word, false, false, TF_READS_S | /*TF_READS_T | TF_WRITES_T | */TF_LOAD_DELAY); break;
case InstructionOp::lb: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::Byte, false, true, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); SpecExec_lxx(MemoryAccessSize::Byte, true); break;
case InstructionOp::lbu: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::Byte, false, false, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); SpecExec_lxx(MemoryAccessSize::Byte, false); break;
case InstructionOp::lh: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::HalfWord, false, true, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); SpecExec_lxx(MemoryAccessSize::HalfWord, true); break;
case InstructionOp::lhu: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::HalfWord, false, false, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); SpecExec_lxx(MemoryAccessSize::HalfWord, false); break;
case InstructionOp::lw: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::Word, false, false, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); SpecExec_lxx(MemoryAccessSize::Word, false); break;
case InstructionOp::lwl: CompileLoadStoreTemplate(&Compiler::Compile_lwx, MemoryAccessSize::Word, false, false, TF_READS_S | /*TF_READS_T | TF_WRITES_T | */TF_LOAD_DELAY); SpecExec_lwx(false); break;
case InstructionOp::lwr: CompileLoadStoreTemplate(&Compiler::Compile_lwx, MemoryAccessSize::Word, false, false, TF_READS_S | /*TF_READS_T | TF_WRITES_T | */TF_LOAD_DELAY); SpecExec_lwx(true); break;
case InstructionOp::sb: CompileLoadStoreTemplate(&Compiler::Compile_sxx, MemoryAccessSize::Byte, true, false, TF_READS_S | TF_READS_T); SpecExec_sxx(MemoryAccessSize::Byte); break;
case InstructionOp::sh: CompileLoadStoreTemplate(&Compiler::Compile_sxx, MemoryAccessSize::HalfWord, true, false, TF_READS_S | TF_READS_T); SpecExec_sxx(MemoryAccessSize::HalfWord); break;
case InstructionOp::sw: CompileLoadStoreTemplate(&Compiler::Compile_sxx, MemoryAccessSize::Word, true, false, TF_READS_S | TF_READS_T); SpecExec_sxx(MemoryAccessSize::Word); break;
case InstructionOp::swl: CompileLoadStoreTemplate(&Compiler::Compile_swx, MemoryAccessSize::Word, false, false, TF_READS_S | /*TF_READS_T | TF_WRITES_T | */TF_LOAD_DELAY); SpecExec_swx(false); break;
case InstructionOp::swr: CompileLoadStoreTemplate(&Compiler::Compile_swx, MemoryAccessSize::Word, false, false, TF_READS_S | /*TF_READS_T | TF_WRITES_T | */TF_LOAD_DELAY); SpecExec_swx(true); break;
case InstructionOp::cop0:
{
@ -1265,8 +1271,8 @@ void CPU::NewRec::Compiler::CompileInstruction()
{
switch (inst->cop.CommonOp())
{
case CopCommonInstruction::mfcn: if (inst->r.rt != Reg::zero) { CompileTemplate(nullptr, &Compiler::Compile_mfc0, nullptr, TF_WRITES_T | TF_LOAD_DELAY); } break;
case CopCommonInstruction::mtcn: CompileTemplate(nullptr, &Compiler::Compile_mtc0, PGXPFN(CPU_MTC0), TF_READS_T); break;
case CopCommonInstruction::mfcn: if (inst->r.rt != Reg::zero) { CompileTemplate(nullptr, &Compiler::Compile_mfc0, nullptr, TF_WRITES_T | TF_LOAD_DELAY); } SpecExec_mfc0(); break;
case CopCommonInstruction::mtcn: CompileTemplate(nullptr, &Compiler::Compile_mtc0, PGXPFN(CPU_MTC0), TF_READS_T); SpecExec_mtc0(); break;
default: Compile_Fallback(); break;
}
}
@ -1274,7 +1280,7 @@ void CPU::NewRec::Compiler::CompileInstruction()
{
switch (inst->cop.Cop0Op())
{
case Cop0Instruction::rfe: CompileTemplate(nullptr, &Compiler::Compile_rfe, nullptr, 0); break;
case Cop0Instruction::rfe: CompileTemplate(nullptr, &Compiler::Compile_rfe, nullptr, 0); SpecExec_rfe(); break;
default: Compile_Fallback(); break;
}
}
@ -1303,7 +1309,7 @@ void CPU::NewRec::Compiler::CompileInstruction()
break;
case InstructionOp::lwc2: CompileLoadStoreTemplate(&Compiler::Compile_lwc2, MemoryAccessSize::Word, false, false, TF_GTE_STALL | TF_READS_S | TF_LOAD_DELAY); break;
case InstructionOp::swc2: CompileLoadStoreTemplate(&Compiler::Compile_swc2, MemoryAccessSize::Word, true, false, TF_GTE_STALL | TF_READS_S); break;
case InstructionOp::swc2: CompileLoadStoreTemplate(&Compiler::Compile_swc2, MemoryAccessSize::Word, true, false, TF_GTE_STALL | TF_READS_S); SpecExec_swc2(); break;
default: Panic("Fixme"); break;
// clang-format on
@ -1567,7 +1573,7 @@ void CPU::NewRec::Compiler::CompileTemplate(void (Compiler::*const_func)(Compile
}
}
void CPU::NewRec::Compiler::CompileLoadStoreTemplate(void (Compiler::*func)(CompileFlags, MemoryAccessSize, bool,
void CPU::NewRec::Compiler::CompileLoadStoreTemplate(void (Compiler::*func)(CompileFlags, MemoryAccessSize, bool, bool,
const std::optional<VirtualMemoryAddress>&),
MemoryAccessSize size, bool store, bool sign, u32 tflags)
{
@ -1595,13 +1601,28 @@ void CPU::NewRec::Compiler::CompileLoadStoreTemplate(void (Compiler::*func)(Comp
// constant address?
std::optional<VirtualMemoryAddress> addr;
bool use_fastmem = CodeCache::IsUsingFastmem() && !g_settings.cpu_recompiler_memory_exceptions &&
!SpecIsCacheIsolated() && !CodeCache::HasPreviouslyFaultedOnPC(m_current_instruction_pc);
if (HasConstantReg(rs))
{
addr = GetConstantRegU32(rs) + inst->i.imm_sext32();
cf.const_s = true;
if (!Bus::CanUseFastmemForAddress(addr.value()))
{
Log_DebugFmt("Not using fastmem for {:08X}", addr.value());
use_fastmem = false;
}
}
else
{
const std::optional<VirtualMemoryAddress> spec_addr = SpecExec_LoadStoreAddr();
if (use_fastmem && spec_addr.has_value() && !Bus::CanUseFastmemForAddress(spec_addr.value()))
{
Log_DebugFmt("Not using fastmem for speculative {:08X}", spec_addr.value());
use_fastmem = false;
}
if constexpr (HAS_MEMORY_OPERANDS)
{
// don't bother caching it since we're going to flush anyway
@ -1648,12 +1669,13 @@ void CPU::NewRec::Compiler::CompileLoadStoreTemplate(void (Compiler::*func)(Comp
}
}
(this->*func)(cf, size, sign, addr);
(this->*func)(cf, size, sign, use_fastmem, addr);
}
void CPU::NewRec::Compiler::FlushForLoadStore(const std::optional<VirtualMemoryAddress>& address, bool store)
void CPU::NewRec::Compiler::FlushForLoadStore(const std::optional<VirtualMemoryAddress>& address, bool store,
bool use_fastmem)
{
if (CodeCache::IsUsingFastmem() && !g_settings.cpu_recompiler_memory_exceptions)
if (use_fastmem)
return;
// TODO: Stores don't need to flush GTE cycles...
@ -2275,3 +2297,480 @@ void CPU::NewRec::BackpatchLoadStore(void* exception_pc, const CodeCache::Loadst
buffer.CommitFarCode(thunk_size);
}
void CPU::NewRec::Compiler::InitSpeculativeRegs()
{
for (u8 i = 0; i < static_cast<u8>(Reg::count); i++)
m_speculative_constants.regs[i] = g_state.regs.r[i];
m_speculative_constants.cop0_sr = g_state.cop0_regs.sr.bits;
m_speculative_constants.memory.clear();
}
void CPU::NewRec::Compiler::InvalidateSpeculativeValues()
{
m_speculative_constants.regs.fill(std::nullopt);
m_speculative_constants.memory.clear();
m_speculative_constants.cop0_sr.reset();
}
CPU::NewRec::Compiler::SpecValue CPU::NewRec::Compiler::SpecReadReg(Reg reg)
{
return m_speculative_constants.regs[static_cast<u8>(reg)];
}
void CPU::NewRec::Compiler::SpecWriteReg(Reg reg, SpecValue value)
{
if (reg == Reg::zero)
return;
m_speculative_constants.regs[static_cast<u8>(reg)] = value;
}
void CPU::NewRec::Compiler::SpecInvalidateReg(Reg reg)
{
if (reg == Reg::zero)
return;
m_speculative_constants.regs[static_cast<u8>(reg)].reset();
}
void CPU::NewRec::Compiler::SpecCopyReg(Reg dst, Reg src)
{
if (dst == Reg::zero)
return;
m_speculative_constants.regs[static_cast<u8>(dst)] = m_speculative_constants.regs[static_cast<u8>(src)];
}
CPU::NewRec::Compiler::SpecValue CPU::NewRec::Compiler::SpecReadMem(VirtualMemoryAddress address)
{
auto it = m_speculative_constants.memory.find(address);
if (it != m_speculative_constants.memory.end())
return it->second;
u32 value;
if ((address & DCACHE_LOCATION_MASK) == DCACHE_LOCATION)
{
u32 scratchpad_offset = address & DCACHE_OFFSET_MASK;
std::memcpy(&value, &CPU::g_state.dcache[scratchpad_offset], sizeof(value));
return value;
}
const PhysicalMemoryAddress phys_addr = address & PHYSICAL_MEMORY_ADDRESS_MASK;
if (Bus::IsRAMAddress(phys_addr))
{
u32 ram_offset = phys_addr & Bus::g_ram_mask;
std::memcpy(&value, &Bus::g_ram[ram_offset], sizeof(value));
return value;
}
return std::nullopt;
}
void CPU::NewRec::Compiler::SpecWriteMem(u32 address, SpecValue value)
{
auto it = m_speculative_constants.memory.find(address);
if (it != m_speculative_constants.memory.end())
{
it->second = value;
return;
}
const PhysicalMemoryAddress phys_addr = address & PHYSICAL_MEMORY_ADDRESS_MASK;
if ((address & DCACHE_LOCATION_MASK) == DCACHE_LOCATION || Bus::IsRAMAddress(phys_addr))
m_speculative_constants.memory.emplace(address, value);
}
void CPU::NewRec::Compiler::SpecInvalidateMem(VirtualMemoryAddress address)
{
SpecWriteMem(address, std::nullopt);
}
bool CPU::NewRec::Compiler::SpecIsCacheIsolated()
{
if (!m_speculative_constants.cop0_sr.has_value())
return false;
const Cop0Registers::SR sr{m_speculative_constants.cop0_sr.value()};
return sr.Isc;
}
void CPU::NewRec::Compiler::SpecExec_b()
{
const bool link = (static_cast<u8>(inst->i.rt.GetValue()) & u8(0x1E)) == u8(0x10);
if (link)
SpecWriteReg(Reg::ra, m_compiler_pc);
}
void CPU::NewRec::Compiler::SpecExec_jal()
{
SpecWriteReg(Reg::ra, m_compiler_pc);
}
void CPU::NewRec::Compiler::SpecExec_jalr()
{
SpecWriteReg(inst->r.rd, m_compiler_pc);
}
void CPU::NewRec::Compiler::SpecExec_sll()
{
const SpecValue rt = SpecReadReg(inst->r.rt);
if (rt.has_value())
SpecWriteReg(inst->r.rd, rt.value() << inst->r.shamt);
else
SpecInvalidateReg(inst->r.rd);
}
void CPU::NewRec::Compiler::SpecExec_srl()
{
const SpecValue rt = SpecReadReg(inst->r.rt);
if (rt.has_value())
SpecWriteReg(inst->r.rd, rt.value() >> inst->r.shamt);
else
SpecInvalidateReg(inst->r.rd);
}
void CPU::NewRec::Compiler::SpecExec_sra()
{
const SpecValue rt = SpecReadReg(inst->r.rt);
if (rt.has_value())
SpecWriteReg(inst->r.rd, static_cast<u32>(static_cast<s32>(rt.value()) >> inst->r.shamt));
else
SpecInvalidateReg(inst->r.rd);
}
void CPU::NewRec::Compiler::SpecExec_sllv()
{
const SpecValue rs = SpecReadReg(inst->r.rs);
const SpecValue rt = SpecReadReg(inst->r.rt);
if (rs.has_value() && rt.has_value())
SpecWriteReg(inst->r.rd, rt.value() << (rs.value() & 0x1F));
else
SpecInvalidateReg(inst->r.rd);
}
void CPU::NewRec::Compiler::SpecExec_srlv()
{
const SpecValue rs = SpecReadReg(inst->r.rs);
const SpecValue rt = SpecReadReg(inst->r.rt);
if (rs.has_value() && rt.has_value())
SpecWriteReg(inst->r.rd, rt.value() >> (rs.value() & 0x1F));
else
SpecInvalidateReg(inst->r.rd);
}
void CPU::NewRec::Compiler::SpecExec_srav()
{
const SpecValue rs = SpecReadReg(inst->r.rs);
const SpecValue rt = SpecReadReg(inst->r.rt);
if (rs.has_value() && rt.has_value())
SpecWriteReg(inst->r.rd, static_cast<u32>(static_cast<s32>(rt.value()) >> (rs.value() & 0x1F)));
else
SpecInvalidateReg(inst->r.rd);
}
void CPU::NewRec::Compiler::SpecExec_mult()
{
// TODO
SpecInvalidateReg(Reg::hi);
SpecInvalidateReg(Reg::lo);
}
void CPU::NewRec::Compiler::SpecExec_multu()
{
// TODO
SpecInvalidateReg(Reg::hi);
SpecInvalidateReg(Reg::lo);
}
void CPU::NewRec::Compiler::SpecExec_div()
{
// TODO
SpecInvalidateReg(Reg::hi);
SpecInvalidateReg(Reg::lo);
}
void CPU::NewRec::Compiler::SpecExec_divu()
{
// TODO
SpecInvalidateReg(Reg::hi);
SpecInvalidateReg(Reg::lo);
}
void CPU::NewRec::Compiler::SpecExec_add()
{
SpecExec_addu();
}
void CPU::NewRec::Compiler::SpecExec_addu()
{
const SpecValue rs = SpecReadReg(inst->r.rs);
const SpecValue rt = SpecReadReg(inst->r.rt);
if (rs.has_value() && rt.has_value())
SpecWriteReg(inst->r.rd, rs.value() + rt.value());
else
SpecInvalidateReg(inst->r.rd);
}
void CPU::NewRec::Compiler::SpecExec_sub()
{
SpecExec_subu();
}
void CPU::NewRec::Compiler::SpecExec_subu()
{
const SpecValue rs = SpecReadReg(inst->r.rs);
const SpecValue rt = SpecReadReg(inst->r.rt);
if (rs.has_value() && rt.has_value())
SpecWriteReg(inst->r.rd, rs.value() - rt.value());
else
SpecInvalidateReg(inst->r.rd);
}
void CPU::NewRec::Compiler::SpecExec_and()
{
const SpecValue rs = SpecReadReg(inst->r.rs);
const SpecValue rt = SpecReadReg(inst->r.rt);
if (rs.has_value() && rt.has_value())
SpecWriteReg(inst->r.rd, rs.value() & rt.value());
else
SpecInvalidateReg(inst->r.rd);
}
void CPU::NewRec::Compiler::SpecExec_or()
{
const SpecValue rs = SpecReadReg(inst->r.rs);
const SpecValue rt = SpecReadReg(inst->r.rt);
if (rs.has_value() && rt.has_value())
SpecWriteReg(inst->r.rd, rs.value() | rt.value());
else
SpecInvalidateReg(inst->r.rd);
}
void CPU::NewRec::Compiler::SpecExec_xor()
{
const SpecValue rs = SpecReadReg(inst->r.rs);
const SpecValue rt = SpecReadReg(inst->r.rt);
if (rs.has_value() && rt.has_value())
SpecWriteReg(inst->r.rd, rs.value() ^ rt.value());
else
SpecInvalidateReg(inst->r.rd);
}
void CPU::NewRec::Compiler::SpecExec_nor()
{
const SpecValue rs = SpecReadReg(inst->r.rs);
const SpecValue rt = SpecReadReg(inst->r.rt);
if (rs.has_value() && rt.has_value())
SpecWriteReg(inst->r.rd, ~(rs.value() | rt.value()));
else
SpecInvalidateReg(inst->r.rd);
}
void CPU::NewRec::Compiler::SpecExec_slt()
{
const SpecValue rs = SpecReadReg(inst->r.rs);
const SpecValue rt = SpecReadReg(inst->r.rt);
if (rs.has_value() && rt.has_value())
SpecWriteReg(inst->r.rd, BoolToUInt32(static_cast<s32>(rs.value()) < static_cast<s32>(rt.value())));
else
SpecInvalidateReg(inst->r.rd);
}
void CPU::NewRec::Compiler::SpecExec_sltu()
{
const SpecValue rs = SpecReadReg(inst->r.rs);
const SpecValue rt = SpecReadReg(inst->r.rt);
if (rs.has_value() && rt.has_value())
SpecWriteReg(inst->r.rd, BoolToUInt32(rs.value() < rt.value()));
else
SpecInvalidateReg(inst->r.rd);
}
void CPU::NewRec::Compiler::SpecExec_addi()
{
SpecExec_addiu();
}
void CPU::NewRec::Compiler::SpecExec_addiu()
{
const SpecValue rs = SpecReadReg(inst->i.rs);
if (rs.has_value())
SpecWriteReg(inst->i.rt, rs.value() + inst->i.imm_sext32());
else
SpecInvalidateReg(inst->i.rt);
}
void CPU::NewRec::Compiler::SpecExec_slti()
{
const SpecValue rs = SpecReadReg(inst->i.rs);
if (rs.has_value())
SpecWriteReg(inst->i.rt, BoolToUInt32(static_cast<s32>(rs.value()) < static_cast<s32>(inst->i.imm_sext32())));
else
SpecInvalidateReg(inst->i.rt);
}
void CPU::NewRec::Compiler::SpecExec_sltiu()
{
const SpecValue rs = SpecReadReg(inst->i.rs);
if (rs.has_value())
SpecWriteReg(inst->i.rt, BoolToUInt32(rs.value() < inst->i.imm_sext32()));
else
SpecInvalidateReg(inst->i.rt);
}
void CPU::NewRec::Compiler::SpecExec_andi()
{
const SpecValue rs = SpecReadReg(inst->i.rs);
if (rs.has_value())
SpecWriteReg(inst->i.rt, rs.value() & inst->i.imm_zext32());
else
SpecInvalidateReg(inst->i.rt);
}
void CPU::NewRec::Compiler::SpecExec_ori()
{
const SpecValue rs = SpecReadReg(inst->i.rs);
if (rs.has_value())
SpecWriteReg(inst->i.rt, rs.value() | inst->i.imm_zext32());
else
SpecInvalidateReg(inst->i.rt);
}
void CPU::NewRec::Compiler::SpecExec_xori()
{
const SpecValue rs = SpecReadReg(inst->i.rs);
if (rs.has_value())
SpecWriteReg(inst->i.rt, rs.value() ^ inst->i.imm_zext32());
else
SpecInvalidateReg(inst->i.rt);
}
void CPU::NewRec::Compiler::SpecExec_lui()
{
SpecWriteReg(inst->i.rt, inst->i.imm_zext32() << 16);
}
CPU::NewRec::Compiler::SpecValue CPU::NewRec::Compiler::SpecExec_LoadStoreAddr()
{
const SpecValue rs = SpecReadReg(inst->i.rs);
return rs.has_value() ? (rs.value() + inst->i.imm_sext32()) : rs;
}
void CPU::NewRec::Compiler::SpecExec_lxx(MemoryAccessSize size, bool sign)
{
const SpecValue addr = SpecExec_LoadStoreAddr();
SpecValue val;
if (!addr.has_value() || !(val = SpecReadMem(addr.value())).has_value())
{
SpecInvalidateReg(inst->i.rt);
return;
}
switch (size)
{
case MemoryAccessSize::Byte:
val = sign ? SignExtend32(static_cast<u8>(val.value())) : ZeroExtend32(static_cast<u8>(val.value()));
break;
case MemoryAccessSize::HalfWord:
val = sign ? SignExtend32(static_cast<u16>(val.value())) : ZeroExtend32(static_cast<u16>(val.value()));
break;
case MemoryAccessSize::Word:
break;
default:
UnreachableCode();
}
SpecWriteReg(inst->r.rt, val);
}
void CPU::NewRec::Compiler::SpecExec_lwx(bool lwr)
{
// TODO
SpecInvalidateReg(inst->i.rt);
}
void CPU::NewRec::Compiler::SpecExec_sxx(MemoryAccessSize size)
{
const SpecValue addr = SpecExec_LoadStoreAddr();
if (!addr.has_value())
return;
SpecValue rt = SpecReadReg(inst->i.rt);
if (rt.has_value())
{
switch (size)
{
case MemoryAccessSize::Byte:
rt = ZeroExtend32(static_cast<u8>(rt.value()));
break;
case MemoryAccessSize::HalfWord:
rt = ZeroExtend32(static_cast<u16>(rt.value()));
break;
case MemoryAccessSize::Word:
break;
default:
UnreachableCode();
}
}
SpecWriteMem(addr.value(), rt);
}
void CPU::NewRec::Compiler::SpecExec_swx(bool swr)
{
const SpecValue addr = SpecExec_LoadStoreAddr();
if (addr.has_value())
SpecInvalidateMem(addr.value() & ~3u);
}
void CPU::NewRec::Compiler::SpecExec_swc2()
{
const SpecValue addr = SpecExec_LoadStoreAddr();
if (addr.has_value())
SpecInvalidateMem(addr.value());
}
void CPU::NewRec::Compiler::SpecExec_mfc0()
{
const Cop0Reg rd = static_cast<Cop0Reg>(inst->r.rd.GetValue());
if (rd != Cop0Reg::SR)
{
SpecInvalidateReg(inst->r.rt);
return;
}
SpecWriteReg(inst->r.rt, m_speculative_constants.cop0_sr);
}
void CPU::NewRec::Compiler::SpecExec_mtc0()
{
const Cop0Reg rd = static_cast<Cop0Reg>(inst->r.rd.GetValue());
if (rd != Cop0Reg::SR || !m_speculative_constants.cop0_sr.has_value())
return;
SpecValue val = SpecReadReg(inst->r.rt);
if (val.has_value())
{
constexpr u32 mask = Cop0Registers::SR::WRITE_MASK;
val = (m_speculative_constants.cop0_sr.value() & mask) | (val.value() & mask);
}
m_speculative_constants.cop0_sr = val;
}
void CPU::NewRec::Compiler::SpecExec_rfe()
{
if (!m_speculative_constants.cop0_sr.has_value())
return;
const u32 val = m_speculative_constants.cop0_sr.value();
m_speculative_constants.cop0_sr = (val & UINT32_C(0b110000)) | ((val & UINT32_C(0b111111)) >> 2);
}

View file

@ -56,17 +56,18 @@ protected:
FLUSH_LOAD_DELAY_FROM_STATE = (1 << 9),
FLUSH_GTE_DONE_CYCLE = (1 << 10),
FLUSH_GTE_STALL_FROM_STATE = (1 << 11),
FLUSH_INVALIDATE_SPECULATIVE_CONSTANTS = (1 << 12),
FLUSH_FOR_C_CALL = (FLUSH_FREE_CALLER_SAVED_REGISTERS),
FLUSH_FOR_LOADSTORE = (FLUSH_FREE_CALLER_SAVED_REGISTERS | FLUSH_CYCLES),
FLUSH_FOR_BRANCH = (FLUSH_FLUSH_MIPS_REGISTERS),
FLUSH_FOR_EXCEPTION =
(FLUSH_CYCLES | FLUSH_GTE_DONE_CYCLE), // GTE cycles needed because it stalls when a GTE instruction is next.
FLUSH_FOR_INTERPRETER =
(FLUSH_FLUSH_MIPS_REGISTERS | FLUSH_INVALIDATE_MIPS_REGISTERS | FLUSH_FREE_CALLER_SAVED_REGISTERS | FLUSH_PC |
FLUSH_CYCLES | FLUSH_INSTRUCTION_BITS | FLUSH_LOAD_DELAY | FLUSH_GTE_DONE_CYCLE),
FLUSH_FOR_INTERPRETER = (FLUSH_FLUSH_MIPS_REGISTERS | FLUSH_INVALIDATE_MIPS_REGISTERS |
FLUSH_FREE_CALLER_SAVED_REGISTERS | FLUSH_PC | FLUSH_CYCLES | FLUSH_INSTRUCTION_BITS |
FLUSH_LOAD_DELAY | FLUSH_GTE_DONE_CYCLE | FLUSH_INVALIDATE_SPECULATIVE_CONSTANTS),
FLUSH_END_BLOCK = 0xFFFFFFFFu & ~(FLUSH_PC | FLUSH_CYCLES | FLUSH_GTE_DONE_CYCLE | FLUSH_INSTRUCTION_BITS |
FLUSH_GTE_STALL_FROM_STATE),
FLUSH_GTE_STALL_FROM_STATE | FLUSH_INVALIDATE_SPECULATIVE_CONSTANTS),
};
union CompileFlags
@ -267,10 +268,10 @@ protected:
void CompileTemplate(void (Compiler::*const_func)(CompileFlags), void (Compiler::*func)(CompileFlags),
const void* pgxp_cpu_func, u32 tflags);
void CompileLoadStoreTemplate(void (Compiler::*func)(CompileFlags, MemoryAccessSize, bool,
void CompileLoadStoreTemplate(void (Compiler::*func)(CompileFlags, MemoryAccessSize, bool, bool,
const std::optional<VirtualMemoryAddress>&),
MemoryAccessSize size, bool store, bool sign, u32 tflags);
void FlushForLoadStore(const std::optional<VirtualMemoryAddress>& address, bool store);
void FlushForLoadStore(const std::optional<VirtualMemoryAddress>& address, bool store, bool use_fastmem);
void CompileMoveRegTemplate(Reg dst, Reg src, bool pgxp_move);
virtual void GeneratePGXPCallWithMIPSRegs(const void* func, u32 arg1val, Reg arg2reg = Reg::count,
@ -357,17 +358,17 @@ protected:
virtual void Compile_xori(CompileFlags cf) = 0;
void Compile_lui();
virtual void Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign,
virtual void Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address) = 0;
virtual void Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign,
virtual void Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address) = 0; // lwl/lwr
virtual void Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign,
virtual void Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address) = 0;
virtual void Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign,
virtual void Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address) = 0;
virtual void Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign,
virtual void Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address) = 0; // swl/swr
virtual void Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign,
virtual void Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address) = 0;
static u32* GetCop0RegPtr(Cop0Reg reg);
@ -454,6 +455,71 @@ protected:
std::array<HostStateBackup, 2> m_host_state_backup = {};
u32 m_host_state_backup_count = 0;
//////////////////////////////////////////////////////////////////////////
// Speculative Constants
//////////////////////////////////////////////////////////////////////////
using SpecValue = std::optional<u32>;
struct SpeculativeConstants
{
std::array<SpecValue, static_cast<u8>(Reg::count)> regs;
std::unordered_map<PhysicalMemoryAddress, SpecValue> memory;
SpecValue cop0_sr;
};
void InitSpeculativeRegs();
void InvalidateSpeculativeValues();
SpecValue SpecReadReg(Reg reg);
void SpecWriteReg(Reg reg, SpecValue value);
void SpecInvalidateReg(Reg reg);
void SpecCopyReg(Reg dst, Reg src);
SpecValue SpecReadMem(u32 address);
void SpecWriteMem(VirtualMemoryAddress address, SpecValue value);
void SpecInvalidateMem(VirtualMemoryAddress address);
bool SpecIsCacheIsolated();
SpeculativeConstants m_speculative_constants;
void SpecExec_b();
void SpecExec_jal();
void SpecExec_jalr();
void SpecExec_sll();
void SpecExec_srl();
void SpecExec_sra();
void SpecExec_sllv();
void SpecExec_srlv();
void SpecExec_srav();
void SpecExec_mult();
void SpecExec_multu();
void SpecExec_div();
void SpecExec_divu();
void SpecExec_add();
void SpecExec_addu();
void SpecExec_sub();
void SpecExec_subu();
void SpecExec_and();
void SpecExec_or();
void SpecExec_xor();
void SpecExec_nor();
void SpecExec_slt();
void SpecExec_sltu();
void SpecExec_addi();
void SpecExec_addiu();
void SpecExec_slti();
void SpecExec_sltiu();
void SpecExec_andi();
void SpecExec_ori();
void SpecExec_xori();
void SpecExec_lui();
SpecValue SpecExec_LoadStoreAddr();
void SpecExec_lxx(MemoryAccessSize size, bool sign);
void SpecExec_lwx(bool lwr); // lwl/lwr
void SpecExec_sxx(MemoryAccessSize size);
void SpecExec_swx(bool swr); // swl/swr
void SpecExec_swc2();
void SpecExec_mfc0();
void SpecExec_mtc0();
void SpecExec_rfe();
// PGXP memory callbacks
static const std::array<std::array<const void*, 2>, 3> s_pgxp_mem_load_functions;
static const std::array<const void*, 3> s_pgxp_mem_store_functions;

View file

@ -1340,11 +1340,10 @@ CPU::NewRec::AArch32Compiler::ComputeLoadStoreAddressArg(CompileFlags cf,
template<typename RegAllocFn>
vixl::aarch32::Register CPU::NewRec::AArch32Compiler::GenerateLoad(const vixl::aarch32::Register& addr_reg,
MemoryAccessSize size, bool sign,
MemoryAccessSize size, bool sign, bool use_fastmem,
const RegAllocFn& dst_reg_alloc)
{
const bool checked = g_settings.cpu_recompiler_memory_exceptions;
if (!checked && CodeCache::IsUsingFastmem())
if (use_fastmem)
{
DebugAssert(g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT);
m_cycles += Bus::RAM_READ_TICKS;
@ -1379,6 +1378,7 @@ vixl::aarch32::Register CPU::NewRec::AArch32Compiler::GenerateLoad(const vixl::a
if (addr_reg.GetCode() != RARG1.GetCode())
armAsm->mov(RARG1, addr_reg);
const bool checked = g_settings.cpu_recompiler_memory_exceptions;
switch (size)
{
case MemoryAccessSize::Byte:
@ -1452,10 +1452,10 @@ vixl::aarch32::Register CPU::NewRec::AArch32Compiler::GenerateLoad(const vixl::a
}
void CPU::NewRec::AArch32Compiler::GenerateStore(const vixl::aarch32::Register& addr_reg,
const vixl::aarch32::Register& value_reg, MemoryAccessSize size)
const vixl::aarch32::Register& value_reg, MemoryAccessSize size,
bool use_fastmem)
{
const bool checked = g_settings.cpu_recompiler_memory_exceptions;
if (!checked && CodeCache::IsUsingFastmem())
if (use_fastmem)
{
DebugAssert(g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT);
DebugAssert(addr_reg.GetCode() != RARG3.GetCode());
@ -1488,6 +1488,7 @@ void CPU::NewRec::AArch32Compiler::GenerateStore(const vixl::aarch32::Register&
if (value_reg.GetCode() != RARG2.GetCode())
armAsm->mov(RARG2, value_reg);
const bool checked = g_settings.cpu_recompiler_memory_exceptions;
switch (size)
{
case MemoryAccessSize::Byte:
@ -1536,15 +1537,15 @@ void CPU::NewRec::AArch32Compiler::GenerateStore(const vixl::aarch32::Register&
}
}
void CPU::NewRec::AArch32Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign,
void CPU::NewRec::AArch32Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address)
{
const std::optional<Register> addr_reg = g_settings.gpu_pgxp_enable ?
std::optional<Register>(Register(AllocateTempHostReg(HR_CALLEE_SAVED))) :
std::optional<Register>();
FlushForLoadStore(address, false);
FlushForLoadStore(address, false, use_fastmem);
const Register addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);
const Register data = GenerateLoad(addr, size, sign, [this, cf]() {
const Register data = GenerateLoad(addr, size, sign, use_fastmem, [this, cf]() {
if (cf.MipsT() == Reg::zero)
return RRET;
@ -1564,11 +1565,11 @@ void CPU::NewRec::AArch32Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize
}
}
void CPU::NewRec::AArch32Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign,
void CPU::NewRec::AArch32Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address)
{
DebugAssert(size == MemoryAccessSize::Word && !sign);
FlushForLoadStore(address, false);
FlushForLoadStore(address, false, use_fastmem);
// TODO: if address is constant, this can be simplified..
@ -1580,7 +1581,7 @@ void CPU::NewRec::AArch32Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize
const Register addr = Register(AllocateHostReg(HR_CALLEE_SAVED, HR_TYPE_TEMP));
ComputeLoadStoreAddressArg(cf, address, addr);
armAsm->and_(RARG1, addr, armCheckLogicalConstant(~0x3u));
GenerateLoad(RARG1, MemoryAccessSize::Word, false, []() { return RRET; });
GenerateLoad(RARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RRET; });
if (inst->r.rt == Reg::zero)
{
@ -1648,15 +1649,15 @@ void CPU::NewRec::AArch32Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize
FreeHostReg(addr.GetCode());
}
void CPU::NewRec::AArch32Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign,
void CPU::NewRec::AArch32Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address)
{
const std::optional<Register> addr_reg = g_settings.gpu_pgxp_enable ?
std::optional<Register>(Register(AllocateTempHostReg(HR_CALLEE_SAVED))) :
std::optional<Register>();
FlushForLoadStore(address, false);
FlushForLoadStore(address, false, use_fastmem);
const Register addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);
GenerateLoad(addr, MemoryAccessSize::Word, false, []() { return RRET; });
GenerateLoad(addr, MemoryAccessSize::Word, false, use_fastmem, []() { return RRET; });
const u32 index = static_cast<u32>(inst->r.rt.GetValue());
const auto [ptr, action] = GetGTERegisterPointer(index, true);
@ -1728,7 +1729,7 @@ void CPU::NewRec::AArch32Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSiz
}
}
void CPU::NewRec::AArch32Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign,
void CPU::NewRec::AArch32Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address)
{
AssertRegOrConstS(cf);
@ -1737,13 +1738,13 @@ void CPU::NewRec::AArch32Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize
const std::optional<Register> addr_reg = g_settings.gpu_pgxp_enable ?
std::optional<Register>(Register(AllocateTempHostReg(HR_CALLEE_SAVED))) :
std::optional<Register>();
FlushForLoadStore(address, true);
FlushForLoadStore(address, true, use_fastmem);
const Register addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);
const Register data = cf.valid_host_t ? CFGetRegT(cf) : RARG2;
if (!cf.valid_host_t)
MoveTToReg(RARG2, cf);
GenerateStore(addr, data, size);
GenerateStore(addr, data, size, use_fastmem);
if (g_settings.gpu_pgxp_enable)
{
@ -1756,18 +1757,18 @@ void CPU::NewRec::AArch32Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize
}
}
void CPU::NewRec::AArch32Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign,
void CPU::NewRec::AArch32Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address)
{
DebugAssert(size == MemoryAccessSize::Word && !sign);
FlushForLoadStore(address, true);
FlushForLoadStore(address, true, use_fastmem);
// TODO: if address is constant, this can be simplified..
// We'd need to be careful here if we weren't overwriting it..
const Register addr = Register(AllocateHostReg(HR_CALLEE_SAVED, HR_TYPE_TEMP));
ComputeLoadStoreAddressArg(cf, address, addr);
armAsm->and_(RARG1, addr, armCheckLogicalConstant(~0x3u));
GenerateLoad(RARG1, MemoryAccessSize::Word, false, []() { return RRET; });
GenerateLoad(RARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RRET; });
// TODO: this can take over rt's value if it's no longer needed
// NOTE: can't trust T in cf because of the flush
@ -1813,13 +1814,13 @@ void CPU::NewRec::AArch32Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize
FreeHostReg(addr.GetCode());
armAsm->and_(RARG1, addr, armCheckLogicalConstant(~0x3u));
GenerateStore(RARG1, value, MemoryAccessSize::Word);
GenerateStore(RARG1, value, MemoryAccessSize::Word, use_fastmem);
}
void CPU::NewRec::AArch32Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign,
void CPU::NewRec::AArch32Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address)
{
FlushForLoadStore(address, true);
FlushForLoadStore(address, true, use_fastmem);
const u32 index = static_cast<u32>(inst->r.rt.GetValue());
const auto [ptr, action] = GetGTERegisterPointer(index, false);
@ -1852,17 +1853,17 @@ void CPU::NewRec::AArch32Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSiz
if (!g_settings.gpu_pgxp_enable)
{
const Register addr = ComputeLoadStoreAddressArg(cf, address);
GenerateStore(addr, RARG2, size);
GenerateStore(addr, RARG2, size, use_fastmem);
return;
}
// TODO: This can be simplified because we don't need to validate in PGXP..
const Register addr_reg = Register(AllocateTempHostReg(HR_CALLEE_SAVED));
const Register data_backup = Register(AllocateTempHostReg(HR_CALLEE_SAVED));
FlushForLoadStore(address, true);
FlushForLoadStore(address, true, use_fastmem);
ComputeLoadStoreAddressArg(cf, address, addr_reg);
armAsm->mov(data_backup, RARG2);
GenerateStore(addr_reg, RARG2, size);
GenerateStore(addr_reg, RARG2, size, use_fastmem);
Flush(FLUSH_FOR_C_CALL);
armAsm->mov(RARG3, data_backup);

View file

@ -96,20 +96,20 @@ protected:
const std::optional<const vixl::aarch32::Register>& reg = std::nullopt);
template<typename RegAllocFn>
vixl::aarch32::Register GenerateLoad(const vixl::aarch32::Register& addr_reg, MemoryAccessSize size, bool sign,
const RegAllocFn& dst_reg_alloc);
bool use_fastmem, const RegAllocFn& dst_reg_alloc);
void GenerateStore(const vixl::aarch32::Register& addr_reg, const vixl::aarch32::Register& value_reg,
MemoryAccessSize size);
void Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign,
MemoryAccessSize size, bool use_fastmem);
void Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign,
void Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign,
void Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign,
void Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign,
void Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign,
void Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address) override;
void TestInterrupts(const vixl::aarch32::Register& sr);

View file

@ -37,8 +37,7 @@ Compiler* g_compiler = &s_instance;
} // namespace CPU::NewRec
CPU::NewRec::AArch64Compiler::AArch64Compiler()
: m_emitter(PositionDependentCode)
, m_far_emitter(PositionIndependentCode)
: m_emitter(PositionDependentCode), m_far_emitter(PositionIndependentCode)
{
}
@ -1314,11 +1313,10 @@ CPU::NewRec::AArch64Compiler::ComputeLoadStoreAddressArg(CompileFlags cf,
template<typename RegAllocFn>
vixl::aarch64::WRegister CPU::NewRec::AArch64Compiler::GenerateLoad(const vixl::aarch64::WRegister& addr_reg,
MemoryAccessSize size, bool sign,
MemoryAccessSize size, bool sign, bool use_fastmem,
const RegAllocFn& dst_reg_alloc)
{
const bool checked = g_settings.cpu_recompiler_memory_exceptions;
if (!checked && CodeCache::IsUsingFastmem())
if (use_fastmem)
{
m_cycles += Bus::RAM_READ_TICKS;
@ -1356,6 +1354,7 @@ vixl::aarch64::WRegister CPU::NewRec::AArch64Compiler::GenerateLoad(const vixl::
if (addr_reg.GetCode() != RWARG1.GetCode())
armAsm->mov(RWARG1, addr_reg);
const bool checked = g_settings.cpu_recompiler_memory_exceptions;
switch (size)
{
case MemoryAccessSize::Byte:
@ -1429,10 +1428,10 @@ vixl::aarch64::WRegister CPU::NewRec::AArch64Compiler::GenerateLoad(const vixl::
}
void CPU::NewRec::AArch64Compiler::GenerateStore(const vixl::aarch64::WRegister& addr_reg,
const vixl::aarch64::WRegister& value_reg, MemoryAccessSize size)
const vixl::aarch64::WRegister& value_reg, MemoryAccessSize size,
bool use_fastmem)
{
const bool checked = g_settings.cpu_recompiler_memory_exceptions;
if (!checked && CodeCache::IsUsingFastmem())
if (use_fastmem)
{
if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT)
{
@ -1467,6 +1466,7 @@ void CPU::NewRec::AArch64Compiler::GenerateStore(const vixl::aarch64::WRegister&
if (value_reg.GetCode() != RWARG2.GetCode())
armAsm->mov(RWARG2, value_reg);
const bool checked = g_settings.cpu_recompiler_memory_exceptions;
switch (size)
{
case MemoryAccessSize::Byte:
@ -1515,15 +1515,15 @@ void CPU::NewRec::AArch64Compiler::GenerateStore(const vixl::aarch64::WRegister&
}
}
void CPU::NewRec::AArch64Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign,
void CPU::NewRec::AArch64Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address)
{
const std::optional<WRegister> addr_reg =
g_settings.gpu_pgxp_enable ? std::optional<WRegister>(WRegister(AllocateTempHostReg(HR_CALLEE_SAVED))) :
std::optional<WRegister>();
FlushForLoadStore(address, false);
FlushForLoadStore(address, false, use_fastmem);
const WRegister addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);
const WRegister data = GenerateLoad(addr, size, sign, [this, cf]() {
const WRegister data = GenerateLoad(addr, size, sign, use_fastmem, [this, cf]() {
if (cf.MipsT() == Reg::zero)
return RWRET;
@ -1544,11 +1544,11 @@ void CPU::NewRec::AArch64Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize
}
}
void CPU::NewRec::AArch64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign,
void CPU::NewRec::AArch64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address)
{
DebugAssert(size == MemoryAccessSize::Word && !sign);
FlushForLoadStore(address, false);
FlushForLoadStore(address, false, use_fastmem);
// TODO: if address is constant, this can be simplified..
@ -1560,7 +1560,7 @@ void CPU::NewRec::AArch64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize
const WRegister addr = WRegister(AllocateHostReg(HR_CALLEE_SAVED, HR_TYPE_TEMP));
ComputeLoadStoreAddressArg(cf, address, addr);
armAsm->and_(RWARG1, addr, armCheckLogicalConstant(~0x3u));
GenerateLoad(RWARG1, MemoryAccessSize::Word, false, []() { return RWRET; });
GenerateLoad(RWARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RWRET; });
if (inst->r.rt == Reg::zero)
{
@ -1628,15 +1628,15 @@ void CPU::NewRec::AArch64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize
FreeHostReg(addr.GetCode());
}
void CPU::NewRec::AArch64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign,
void CPU::NewRec::AArch64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address)
{
const std::optional<WRegister> addr_reg =
g_settings.gpu_pgxp_enable ? std::optional<WRegister>(WRegister(AllocateTempHostReg(HR_CALLEE_SAVED))) :
std::optional<WRegister>();
FlushForLoadStore(address, false);
FlushForLoadStore(address, false, use_fastmem);
const WRegister addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);
GenerateLoad(addr, MemoryAccessSize::Word, false, []() { return RWRET; });
GenerateLoad(addr, MemoryAccessSize::Word, false, use_fastmem, []() { return RWRET; });
const u32 index = static_cast<u32>(inst->r.rt.GetValue());
const auto [ptr, action] = GetGTERegisterPointer(index, true);
@ -1708,7 +1708,7 @@ void CPU::NewRec::AArch64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSiz
}
}
void CPU::NewRec::AArch64Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign,
void CPU::NewRec::AArch64Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address)
{
AssertRegOrConstS(cf);
@ -1717,13 +1717,13 @@ void CPU::NewRec::AArch64Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize
const std::optional<WRegister> addr_reg =
g_settings.gpu_pgxp_enable ? std::optional<WRegister>(WRegister(AllocateTempHostReg(HR_CALLEE_SAVED))) :
std::optional<WRegister>();
FlushForLoadStore(address, true);
FlushForLoadStore(address, true, use_fastmem);
const WRegister addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);
const WRegister data = cf.valid_host_t ? CFGetRegT(cf) : RWARG2;
if (!cf.valid_host_t)
MoveTToReg(RWARG2, cf);
GenerateStore(addr, data, size);
GenerateStore(addr, data, size, use_fastmem);
if (g_settings.gpu_pgxp_enable)
{
@ -1736,18 +1736,18 @@ void CPU::NewRec::AArch64Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize
}
}
void CPU::NewRec::AArch64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign,
void CPU::NewRec::AArch64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address)
{
DebugAssert(size == MemoryAccessSize::Word && !sign);
FlushForLoadStore(address, true);
FlushForLoadStore(address, true, use_fastmem);
// TODO: if address is constant, this can be simplified..
// We'd need to be careful here if we weren't overwriting it..
const WRegister addr = WRegister(AllocateHostReg(HR_CALLEE_SAVED, HR_TYPE_TEMP));
ComputeLoadStoreAddressArg(cf, address, addr);
armAsm->and_(RWARG1, addr, armCheckLogicalConstant(~0x3u));
GenerateLoad(RWARG1, MemoryAccessSize::Word, false, []() { return RWRET; });
GenerateLoad(RWARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RWRET; });
// TODO: this can take over rt's value if it's no longer needed
// NOTE: can't trust T in cf because of the flush
@ -1793,13 +1793,13 @@ void CPU::NewRec::AArch64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize
FreeHostReg(addr.GetCode());
armAsm->and_(RWARG1, addr, armCheckLogicalConstant(~0x3u));
GenerateStore(RWARG1, value, MemoryAccessSize::Word);
GenerateStore(RWARG1, value, MemoryAccessSize::Word, use_fastmem);
}
void CPU::NewRec::AArch64Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign,
void CPU::NewRec::AArch64Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address)
{
FlushForLoadStore(address, true);
FlushForLoadStore(address, true, use_fastmem);
const u32 index = static_cast<u32>(inst->r.rt.GetValue());
const auto [ptr, action] = GetGTERegisterPointer(index, false);
@ -1832,17 +1832,17 @@ void CPU::NewRec::AArch64Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSiz
if (!g_settings.gpu_pgxp_enable)
{
const WRegister addr = ComputeLoadStoreAddressArg(cf, address);
GenerateStore(addr, RWARG2, size);
GenerateStore(addr, RWARG2, size, use_fastmem);
return;
}
// TODO: This can be simplified because we don't need to validate in PGXP..
const WRegister addr_reg = WRegister(AllocateTempHostReg(HR_CALLEE_SAVED));
const WRegister data_backup = WRegister(AllocateTempHostReg(HR_CALLEE_SAVED));
FlushForLoadStore(address, true);
FlushForLoadStore(address, true, use_fastmem);
ComputeLoadStoreAddressArg(cf, address, addr_reg);
armAsm->mov(data_backup, RWARG2);
GenerateStore(addr_reg, RWARG2, size);
GenerateStore(addr_reg, RWARG2, size, use_fastmem);
Flush(FLUSH_FOR_C_CALL);
armAsm->mov(RWARG3, data_backup);

View file

@ -26,7 +26,8 @@ protected:
void StoreHostRegToCPUPointer(u32 reg, const void* ptr) override;
void CopyHostReg(u32 dst, u32 src) override;
void Reset(CodeCache::Block* block, u8* code_buffer, u32 code_buffer_space, u8* far_code_buffer, u32 far_code_space) override;
void Reset(CodeCache::Block* block, u8* code_buffer, u32 code_buffer_space, u8* far_code_buffer,
u32 far_code_space) override;
void BeginBlock() override;
void GenerateBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size) override;
void GenerateICacheCheckAndUpdate() override;
@ -97,20 +98,20 @@ protected:
const std::optional<const vixl::aarch64::WRegister>& reg = std::nullopt);
template<typename RegAllocFn>
vixl::aarch64::WRegister GenerateLoad(const vixl::aarch64::WRegister& addr_reg, MemoryAccessSize size, bool sign,
const RegAllocFn& dst_reg_alloc);
bool use_fastmem, const RegAllocFn& dst_reg_alloc);
void GenerateStore(const vixl::aarch64::WRegister& addr_reg, const vixl::aarch64::WRegister& value_reg,
MemoryAccessSize size);
void Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign,
MemoryAccessSize size, bool use_fastmem);
void Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign,
void Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign,
void Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign,
void Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign,
void Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign,
void Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address) override;
void TestInterrupts(const vixl::aarch64::WRegister& sr);

View file

@ -1595,10 +1595,9 @@ biscuit::GPR CPU::NewRec::RISCV64Compiler::ComputeLoadStoreAddressArg(
template<typename RegAllocFn>
void CPU::NewRec::RISCV64Compiler::GenerateLoad(const biscuit::GPR& addr_reg, MemoryAccessSize size, bool sign,
const RegAllocFn& dst_reg_alloc)
bool use_fastmem, const RegAllocFn& dst_reg_alloc)
{
const bool checked = g_settings.cpu_recompiler_memory_exceptions;
if (!checked && CodeCache::IsUsingFastmem())
if (use_fastmem)
{
m_cycles += Bus::RAM_READ_TICKS;
@ -1648,6 +1647,7 @@ void CPU::NewRec::RISCV64Compiler::GenerateLoad(const biscuit::GPR& addr_reg, Me
if (addr_reg.Index() != RARG1.Index())
rvAsm->MV(RARG1, addr_reg);
const bool checked = g_settings.cpu_recompiler_memory_exceptions;
switch (size)
{
case MemoryAccessSize::Byte:
@ -1723,10 +1723,9 @@ void CPU::NewRec::RISCV64Compiler::GenerateLoad(const biscuit::GPR& addr_reg, Me
}
void CPU::NewRec::RISCV64Compiler::GenerateStore(const biscuit::GPR& addr_reg, const biscuit::GPR& value_reg,
MemoryAccessSize size)
MemoryAccessSize size, bool use_fastmem)
{
const bool checked = g_settings.cpu_recompiler_memory_exceptions;
if (!checked && CodeCache::IsUsingFastmem())
if (use_fastmem)
{
DebugAssert(value_reg != RSCRATCH);
rvAsm->SLLI64(RSCRATCH, addr_reg, 32);
@ -1774,6 +1773,7 @@ void CPU::NewRec::RISCV64Compiler::GenerateStore(const biscuit::GPR& addr_reg, c
if (value_reg.Index() != RARG2.Index())
rvAsm->MV(RARG2, value_reg);
const bool checked = g_settings.cpu_recompiler_memory_exceptions;
switch (size)
{
case MemoryAccessSize::Byte:
@ -1822,12 +1822,12 @@ void CPU::NewRec::RISCV64Compiler::GenerateStore(const biscuit::GPR& addr_reg, c
}
}
void CPU::NewRec::RISCV64Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign,
void CPU::NewRec::RISCV64Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address)
{
FlushForLoadStore(address, false);
FlushForLoadStore(address, false, use_fastmem);
const GPR addr = ComputeLoadStoreAddressArg(cf, address);
GenerateLoad(addr, size, sign, [this, cf]() {
GenerateLoad(addr, size, sign, use_fastmem, [this, cf]() {
if (cf.MipsT() == Reg::zero)
return RRET;
@ -1836,11 +1836,11 @@ void CPU::NewRec::RISCV64Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize
});
}
void CPU::NewRec::RISCV64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign,
void CPU::NewRec::RISCV64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address)
{
DebugAssert(size == MemoryAccessSize::Word && !sign);
FlushForLoadStore(address, false);
FlushForLoadStore(address, false, use_fastmem);
// TODO: if address is constant, this can be simplified..
@ -1852,7 +1852,7 @@ void CPU::NewRec::RISCV64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize
const GPR addr = GPR(AllocateHostReg(HR_CALLEE_SAVED, HR_TYPE_TEMP));
ComputeLoadStoreAddressArg(cf, address, addr);
rvAsm->ANDI(RARG1, addr, ~0x3u);
GenerateLoad(RARG1, MemoryAccessSize::Word, false, []() { return RRET; });
GenerateLoad(RARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RRET; });
if (inst->r.rt == Reg::zero)
{
@ -1920,12 +1920,12 @@ void CPU::NewRec::RISCV64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize
FreeHostReg(addr.Index());
}
void CPU::NewRec::RISCV64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign,
void CPU::NewRec::RISCV64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address)
{
FlushForLoadStore(address, false);
FlushForLoadStore(address, false, use_fastmem);
const GPR addr = ComputeLoadStoreAddressArg(cf, address);
GenerateLoad(addr, MemoryAccessSize::Word, false, []() { return RRET; });
GenerateLoad(addr, MemoryAccessSize::Word, false, use_fastmem, []() { return RRET; });
const u32 index = static_cast<u32>(inst->r.rt.GetValue());
const auto [ptr, action] = GetGTERegisterPointer(index, true);
@ -1987,32 +1987,32 @@ void CPU::NewRec::RISCV64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSiz
}
}
void CPU::NewRec::RISCV64Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign,
void CPU::NewRec::RISCV64Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address)
{
AssertRegOrConstS(cf);
AssertRegOrConstT(cf);
FlushForLoadStore(address, true);
FlushForLoadStore(address, true, use_fastmem);
const GPR addr = ComputeLoadStoreAddressArg(cf, address);
if (!cf.valid_host_t)
MoveTToReg(RARG2, cf);
GenerateStore(addr, cf.valid_host_t ? CFGetRegT(cf) : RARG2, size);
GenerateStore(addr, cf.valid_host_t ? CFGetRegT(cf) : RARG2, size, use_fastmem);
}
void CPU::NewRec::RISCV64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign,
void CPU::NewRec::RISCV64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address)
{
DebugAssert(size == MemoryAccessSize::Word && !sign);
FlushForLoadStore(address, true);
FlushForLoadStore(address, true, use_fastmem);
// TODO: if address is constant, this can be simplified..
// We'd need to be careful here if we weren't overwriting it..
const GPR addr = GPR(AllocateHostReg(HR_CALLEE_SAVED, HR_TYPE_TEMP));
ComputeLoadStoreAddressArg(cf, address, addr);
rvAsm->ANDI(RARG1, addr, ~0x3u);
GenerateLoad(RARG1, MemoryAccessSize::Word, false, []() { return RRET; });
GenerateLoad(RARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RRET; });
// TODO: this can take over rt's value if it's no longer needed
// NOTE: can't trust T in cf because of the flush
@ -2058,13 +2058,13 @@ void CPU::NewRec::RISCV64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize
FreeHostReg(addr.Index());
rvAsm->ANDI(RARG1, addr, ~0x3u);
GenerateStore(RARG1, value, MemoryAccessSize::Word);
GenerateStore(RARG1, value, MemoryAccessSize::Word, use_fastmem);
}
void CPU::NewRec::RISCV64Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign,
void CPU::NewRec::RISCV64Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address)
{
FlushForLoadStore(address, true);
FlushForLoadStore(address, true, use_fastmem);
const u32 index = static_cast<u32>(inst->r.rt.GetValue());
const auto [ptr, action] = GetGTERegisterPointer(index, false);
@ -2094,7 +2094,7 @@ void CPU::NewRec::RISCV64Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSiz
}
const GPR addr = ComputeLoadStoreAddressArg(cf, address);
GenerateStore(addr, RARG2, size);
GenerateStore(addr, RARG2, size, use_fastmem);
}
void CPU::NewRec::RISCV64Compiler::Compile_mtc0(CompileFlags cf)

View file

@ -88,19 +88,21 @@ protected:
biscuit::GPR ComputeLoadStoreAddressArg(CompileFlags cf, const std::optional<VirtualMemoryAddress>& address,
const std::optional<const biscuit::GPR>& reg = std::nullopt);
template<typename RegAllocFn>
void GenerateLoad(const biscuit::GPR& addr_reg, MemoryAccessSize size, bool sign, const RegAllocFn& dst_reg_alloc);
void GenerateStore(const biscuit::GPR& addr_reg, const biscuit::GPR& value_reg, MemoryAccessSize size);
void Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign,
void GenerateLoad(const biscuit::GPR& addr_reg, MemoryAccessSize size, bool sign, bool use_fastmem,
const RegAllocFn& dst_reg_alloc);
void GenerateStore(const biscuit::GPR& addr_reg, const biscuit::GPR& value_reg, MemoryAccessSize size,
bool use_fastmem);
void Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign,
void Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign,
void Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign,
void Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign,
void Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign,
void Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address) override;
void TestInterrupts(const biscuit::GPR& sr);

View file

@ -1239,10 +1239,9 @@ CPU::NewRec::X64Compiler::ComputeLoadStoreAddressArg(CompileFlags cf,
template<typename RegAllocFn>
Xbyak::Reg32 CPU::NewRec::X64Compiler::GenerateLoad(const Xbyak::Reg32& addr_reg, MemoryAccessSize size, bool sign,
const RegAllocFn& dst_reg_alloc)
bool use_fastmem, const RegAllocFn& dst_reg_alloc)
{
const bool checked = g_settings.cpu_recompiler_memory_exceptions;
if (CodeCache::IsUsingFastmem() && !checked)
if (use_fastmem)
{
m_cycles += Bus::RAM_READ_TICKS;
@ -1296,6 +1295,7 @@ Xbyak::Reg32 CPU::NewRec::X64Compiler::GenerateLoad(const Xbyak::Reg32& addr_reg
if (addr_reg != RWARG1)
cg->mov(RWARG1, addr_reg);
const bool checked = g_settings.cpu_recompiler_memory_exceptions;
switch (size)
{
case MemoryAccessSize::Byte:
@ -1370,10 +1370,9 @@ Xbyak::Reg32 CPU::NewRec::X64Compiler::GenerateLoad(const Xbyak::Reg32& addr_reg
}
void CPU::NewRec::X64Compiler::GenerateStore(const Xbyak::Reg32& addr_reg, const Xbyak::Reg32& value_reg,
MemoryAccessSize size)
MemoryAccessSize size, bool use_fastmem)
{
const bool checked = g_settings.cpu_recompiler_memory_exceptions;
if (CodeCache::IsUsingFastmem() && !checked)
if (use_fastmem)
{
if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT)
{
@ -1417,6 +1416,7 @@ void CPU::NewRec::X64Compiler::GenerateStore(const Xbyak::Reg32& addr_reg, const
if (value_reg != RWARG2)
cg->mov(RWARG2, value_reg);
const bool checked = g_settings.cpu_recompiler_memory_exceptions;
switch (size)
{
case MemoryAccessSize::Byte:
@ -1466,16 +1466,16 @@ void CPU::NewRec::X64Compiler::GenerateStore(const Xbyak::Reg32& addr_reg, const
}
}
void CPU::NewRec::X64Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign,
void CPU::NewRec::X64Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address)
{
const std::optional<Reg32> addr_reg = g_settings.gpu_pgxp_enable ?
std::optional<Reg32>(Reg32(AllocateTempHostReg(HR_CALLEE_SAVED))) :
std::optional<Reg32>();
FlushForLoadStore(address, false);
FlushForLoadStore(address, false, use_fastmem);
const Reg32 addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);
const Reg32 data = GenerateLoad(addr, size, sign, [this, cf]() {
const Reg32 data = GenerateLoad(addr, size, sign, use_fastmem, [this, cf]() {
if (cf.MipsT() == Reg::zero)
return RWRET;
@ -1495,11 +1495,11 @@ void CPU::NewRec::X64Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize siz
}
}
void CPU::NewRec::X64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign,
void CPU::NewRec::X64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address)
{
DebugAssert(size == MemoryAccessSize::Word && !sign);
FlushForLoadStore(address, false);
FlushForLoadStore(address, false, use_fastmem);
// TODO: if address is constant, this can be simplified..
@ -1512,7 +1512,7 @@ void CPU::NewRec::X64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize siz
ComputeLoadStoreAddressArg(cf, address, addr);
cg->mov(RWARG1, addr);
cg->and_(RWARG1, ~0x3u);
GenerateLoad(RWARG1, MemoryAccessSize::Word, false, []() { return RWRET; });
GenerateLoad(RWARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RWRET; });
if (inst->r.rt == Reg::zero)
{
@ -1586,15 +1586,15 @@ void CPU::NewRec::X64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize siz
FreeHostReg(addr.getIdx());
}
void CPU::NewRec::X64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign,
void CPU::NewRec::X64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address)
{
const std::optional<Reg32> addr_reg = g_settings.gpu_pgxp_enable ?
std::optional<Reg32>(Reg32(AllocateTempHostReg(HR_CALLEE_SAVED))) :
std::optional<Reg32>();
FlushForLoadStore(address, false);
FlushForLoadStore(address, false, use_fastmem);
const Reg32 addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);
GenerateLoad(addr, MemoryAccessSize::Word, false, []() { return RWRET; });
GenerateLoad(addr, MemoryAccessSize::Word, false, use_fastmem, []() { return RWRET; });
const u32 index = static_cast<u32>(inst->r.rt.GetValue());
const auto [ptr, action] = GetGTERegisterPointer(index, true);
@ -1666,19 +1666,19 @@ void CPU::NewRec::X64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize si
}
}
void CPU::NewRec::X64Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign,
void CPU::NewRec::X64Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address)
{
const std::optional<Reg32> addr_reg = g_settings.gpu_pgxp_enable ?
std::optional<Reg32>(Reg32(AllocateTempHostReg(HR_CALLEE_SAVED))) :
std::optional<Reg32>();
FlushForLoadStore(address, true);
FlushForLoadStore(address, true, use_fastmem);
const Reg32 addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);
const Reg32 data = cf.valid_host_t ? CFGetRegT(cf) : RWARG2;
if (!cf.valid_host_t)
MoveTToReg(RWARG2, cf);
GenerateStore(addr, data, size);
GenerateStore(addr, data, size, use_fastmem);
if (g_settings.gpu_pgxp_enable)
{
@ -1691,11 +1691,11 @@ void CPU::NewRec::X64Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize siz
}
}
void CPU::NewRec::X64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign,
void CPU::NewRec::X64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address)
{
DebugAssert(size == MemoryAccessSize::Word && !sign);
FlushForLoadStore(address, true);
FlushForLoadStore(address, true, use_fastmem);
// TODO: if address is constant, this can be simplified..
// We'd need to be careful here if we weren't overwriting it..
@ -1703,7 +1703,7 @@ void CPU::NewRec::X64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize siz
ComputeLoadStoreAddressArg(cf, address, addr);
cg->mov(RWARG1, addr);
cg->and_(RWARG1, ~0x3u);
GenerateLoad(RWARG1, MemoryAccessSize::Word, false, []() { return RWRET; });
GenerateLoad(RWARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RWRET; });
// TODO: this can take over rt's value if it's no longer needed
// NOTE: can't trust T in cf because of the flush
@ -1755,10 +1755,10 @@ void CPU::NewRec::X64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize siz
cg->mov(RWARG1, addr);
cg->and_(RWARG1, ~0x3u);
GenerateStore(RWARG1, value, MemoryAccessSize::Word);
GenerateStore(RWARG1, value, MemoryAccessSize::Word, use_fastmem);
}
void CPU::NewRec::X64Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign,
void CPU::NewRec::X64Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address)
{
const u32 index = static_cast<u32>(inst->r.rt.GetValue());
@ -1791,19 +1791,19 @@ void CPU::NewRec::X64Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSize si
// PGXP makes this a giant pain.
if (!g_settings.gpu_pgxp_enable)
{
FlushForLoadStore(address, true);
FlushForLoadStore(address, true, use_fastmem);
const Reg32 addr = ComputeLoadStoreAddressArg(cf, address);
GenerateStore(addr, RWARG2, size);
GenerateStore(addr, RWARG2, size, use_fastmem);
return;
}
// TODO: This can be simplified because we don't need to validate in PGXP..
const Reg32 addr_reg = Reg32(AllocateTempHostReg(HR_CALLEE_SAVED));
const Reg32 data_backup = Reg32(AllocateTempHostReg(HR_CALLEE_SAVED));
FlushForLoadStore(address, true);
FlushForLoadStore(address, true, use_fastmem);
ComputeLoadStoreAddressArg(cf, address, addr_reg);
cg->mov(data_backup, RWARG2);
GenerateStore(addr_reg, RWARG2, size);
GenerateStore(addr_reg, RWARG2, size, use_fastmem);
Flush(FLUSH_FOR_C_CALL);
cg->mov(RWARG3, data_backup);
@ -2066,9 +2066,9 @@ void CPU::NewRec::X64Compiler::Compile_cop2(CompileFlags cf)
}
u32 CPU::NewRec::CompileLoadStoreThunk(void* thunk_code, u32 thunk_space, void* code_address, u32 code_size,
TickCount cycles_to_add, TickCount cycles_to_remove, u32 gpr_bitmask,
u8 address_register, u8 data_register, MemoryAccessSize size, bool is_signed,
bool is_load)
TickCount cycles_to_add, TickCount cycles_to_remove, u32 gpr_bitmask,
u8 address_register, u8 data_register, MemoryAccessSize size, bool is_signed,
bool is_load)
{
CodeGenerator acg(thunk_space, thunk_code);
CodeGenerator* cg = &acg;

View file

@ -87,20 +87,21 @@ protected:
Xbyak::Reg32 ComputeLoadStoreAddressArg(CompileFlags cf, const std::optional<VirtualMemoryAddress>& address,
const std::optional<const Xbyak::Reg32>& reg = std::nullopt);
template<typename RegAllocFn>
Xbyak::Reg32 GenerateLoad(const Xbyak::Reg32& addr_reg, MemoryAccessSize size, bool sign,
Xbyak::Reg32 GenerateLoad(const Xbyak::Reg32& addr_reg, MemoryAccessSize size, bool sign, bool use_fastmem,
const RegAllocFn& dst_reg_alloc);
void GenerateStore(const Xbyak::Reg32& addr_reg, const Xbyak::Reg32& value_reg, MemoryAccessSize size);
void Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign,
void GenerateStore(const Xbyak::Reg32& addr_reg, const Xbyak::Reg32& value_reg, MemoryAccessSize size,
bool use_fastmem);
void Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign,
void Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign,
void Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign,
void Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign,
void Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign,
void Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address) override;
void TestInterrupts(const Xbyak::Reg32& sr);