mirror of
https://github.com/RetroDECK/Duckstation.git
synced 2024-11-26 07:35:41 +00:00
CPU/NewRec: Add speculative constants
This commit is contained in:
parent
b3cbe5a7ee
commit
6592cafadc
|
@ -66,6 +66,7 @@ static void FillBlockRegInfo(Block* block);
|
|||
static void CopyRegInfo(InstructionInfo* dst, const InstructionInfo* src);
|
||||
static void SetRegAccess(InstructionInfo* inst, Reg reg, bool write);
|
||||
static void AddBlockToPageList(Block* block);
|
||||
static void RemoveBlockFromPageList(Block* block);
|
||||
|
||||
static Common::PageFaultHandler::HandlerResult ExceptionHandler(void* exception_pc, void* fault_address, bool is_write);
|
||||
|
||||
|
@ -526,7 +527,7 @@ bool CPU::CodeCache::IsBlockCodeCurrent(const Block* block)
|
|||
bool CPU::CodeCache::RevalidateBlock(Block* block)
|
||||
{
|
||||
DebugAssert(block->state != BlockState::Valid);
|
||||
DebugAssert(AddressInRAM(block->pc));
|
||||
DebugAssert(AddressInRAM(block->pc) || block->state == BlockState::NeedsRecompile);
|
||||
|
||||
if (block->state >= BlockState::NeedsRecompile)
|
||||
return false;
|
||||
|
@ -569,6 +570,39 @@ void CPU::CodeCache::AddBlockToPageList(Block* block)
|
|||
}
|
||||
}
|
||||
|
||||
void CPU::CodeCache::RemoveBlockFromPageList(Block* block)
|
||||
{
|
||||
DebugAssert(block->size > 0);
|
||||
if (!AddressInRAM(block->pc) || block->protection != PageProtectionMode::WriteProtected)
|
||||
return;
|
||||
|
||||
const u32 page_idx = block->StartPageIndex();
|
||||
PageProtectionInfo& entry = s_page_protection[page_idx];
|
||||
|
||||
// unlink from list
|
||||
Block* prev_block = nullptr;
|
||||
Block* cur_block = entry.first_block_in_page;
|
||||
while (cur_block)
|
||||
{
|
||||
if (cur_block != block)
|
||||
{
|
||||
prev_block = cur_block;
|
||||
cur_block = cur_block->next_block_in_page;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (prev_block)
|
||||
prev_block->next_block_in_page = cur_block->next_block_in_page;
|
||||
else
|
||||
entry.first_block_in_page = cur_block->next_block_in_page;
|
||||
if (!cur_block->next_block_in_page)
|
||||
entry.last_block_in_page = prev_block;
|
||||
|
||||
cur_block->next_block_in_page = nullptr;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void CPU::CodeCache::InvalidateBlocksWithPageIndex(u32 index)
|
||||
{
|
||||
DebugAssert(index < Bus::RAM_8MB_CODE_PAGE_COUNT);
|
||||
|
@ -1480,13 +1514,14 @@ void CPU::CodeCache::AddLoadStoreInfo(void* code_address, u32 code_size, u32 gue
|
|||
LoadstoreBackpatchInfo info;
|
||||
info.thunk_address = thunk_address;
|
||||
info.guest_pc = guest_pc;
|
||||
info.guest_block = 0;
|
||||
info.code_size = static_cast<u8>(code_size);
|
||||
s_fastmem_backpatch_info.emplace(code_address, info);
|
||||
}
|
||||
|
||||
void CPU::CodeCache::AddLoadStoreInfo(void* code_address, u32 code_size, u32 guest_pc, TickCount cycles,
|
||||
u32 gpr_bitmask, u8 address_register, u8 data_register, MemoryAccessSize size,
|
||||
bool is_signed, bool is_load)
|
||||
void CPU::CodeCache::AddLoadStoreInfo(void* code_address, u32 code_size, u32 guest_pc, u32 guest_block,
|
||||
TickCount cycles, u32 gpr_bitmask, u8 address_register, u8 data_register,
|
||||
MemoryAccessSize size, bool is_signed, bool is_load)
|
||||
{
|
||||
DebugAssert(code_size < std::numeric_limits<u8>::max());
|
||||
DebugAssert(cycles >= 0 && cycles < std::numeric_limits<u16>::max());
|
||||
|
@ -1498,6 +1533,7 @@ void CPU::CodeCache::AddLoadStoreInfo(void* code_address, u32 code_size, u32 gue
|
|||
LoadstoreBackpatchInfo info;
|
||||
info.thunk_address = nullptr;
|
||||
info.guest_pc = guest_pc;
|
||||
info.guest_block = guest_block;
|
||||
info.gpr_bitmask = gpr_bitmask;
|
||||
info.cycles = static_cast<u16>(cycles);
|
||||
info.address_register = address_register;
|
||||
|
@ -1562,7 +1598,22 @@ Common::PageFaultHandler::HandlerResult CPU::CodeCache::HandleFastmemException(v
|
|||
|
||||
BackpatchLoadStore(exception_pc, info);
|
||||
|
||||
// TODO: queue block for recompilation later
|
||||
// queue block for recompilation later
|
||||
if (g_settings.cpu_execution_mode == CPUExecutionMode::NewRec)
|
||||
{
|
||||
Block* block = LookupBlock(info.guest_block);
|
||||
if (block)
|
||||
{
|
||||
// This is a bit annoying, we have to remove it from the page list if it's a RAM block.
|
||||
Log_DevFmt("Queuing block {:08X} for recompilation due to backpatch", block->pc);
|
||||
RemoveBlockFromPageList(block);
|
||||
InvalidateBlock(block, BlockState::NeedsRecompile);
|
||||
|
||||
// Need to reset the recompile count, otherwise it'll get trolled into an interpreter fallback.
|
||||
block->compile_frame = System::GetFrameNumber();
|
||||
block->compile_count = 1;
|
||||
}
|
||||
}
|
||||
|
||||
// and store the pc in the faulting list, so that we don't emit another fastmem loadstore
|
||||
s_fastmem_faulting_pcs.insert(info.guest_pc);
|
||||
|
@ -1570,6 +1621,11 @@ Common::PageFaultHandler::HandlerResult CPU::CodeCache::HandleFastmemException(v
|
|||
return Common::PageFaultHandler::HandlerResult::ContinueExecution;
|
||||
}
|
||||
|
||||
bool CPU::CodeCache::HasPreviouslyFaultedOnPC(u32 guest_pc)
|
||||
{
|
||||
return (s_fastmem_faulting_pcs.find(guest_pc) != s_fastmem_faulting_pcs.end());
|
||||
}
|
||||
|
||||
void CPU::CodeCache::BackpatchLoadStore(void* host_pc, const LoadstoreBackpatchInfo& info)
|
||||
{
|
||||
s_code_buffer.WriteProtect(false);
|
||||
|
|
|
@ -196,12 +196,17 @@ struct LoadstoreBackpatchInfo
|
|||
};
|
||||
|
||||
u32 guest_pc;
|
||||
u32 guest_block;
|
||||
u8 code_size;
|
||||
|
||||
MemoryAccessSize AccessSize() const { return static_cast<MemoryAccessSize>(size); }
|
||||
u32 AccessSizeInBytes() const { return 1u << size; }
|
||||
};
|
||||
static_assert(sizeof(LoadstoreBackpatchInfo) == 16);
|
||||
#ifdef CPU_ARCH_ARM32
|
||||
static_assert(sizeof(LoadstoreBackpatchInfo) == 20);
|
||||
#else
|
||||
static_assert(sizeof(LoadstoreBackpatchInfo) == 24);
|
||||
#endif
|
||||
|
||||
static inline bool AddressInRAM(VirtualMemoryAddress pc)
|
||||
{
|
||||
|
@ -248,8 +253,10 @@ void DiscardAndRecompileBlock(u32 start_pc);
|
|||
const void* CreateBlockLink(Block* from_block, void* code, u32 newpc);
|
||||
|
||||
void AddLoadStoreInfo(void* code_address, u32 code_size, u32 guest_pc, const void* thunk_address);
|
||||
void AddLoadStoreInfo(void* code_address, u32 code_size, u32 guest_pc, TickCount cycles, u32 gpr_bitmask,
|
||||
u8 address_register, u8 data_register, MemoryAccessSize size, bool is_signed, bool is_load);
|
||||
void AddLoadStoreInfo(void* code_address, u32 code_size, u32 guest_pc, u32 guest_block, TickCount cycles,
|
||||
u32 gpr_bitmask, u8 address_register, u8 data_register, MemoryAccessSize size, bool is_signed,
|
||||
bool is_load);
|
||||
bool HasPreviouslyFaultedOnPC(u32 guest_pc);
|
||||
|
||||
u32 EmitASMFunctions(void* code, u32 code_size);
|
||||
u32 EmitJump(void* code, const void* dst, bool flush_icache);
|
||||
|
|
|
@ -59,6 +59,8 @@ void CPU::NewRec::Compiler::Reset(CodeCache::Block* block, u8* code_buffer, u32
|
|||
m_load_delay_dirty = EMULATE_LOAD_DELAYS;
|
||||
m_load_delay_register = Reg::count;
|
||||
m_load_delay_value_register = NUM_HOST_REGS;
|
||||
|
||||
InitSpeculativeRegs();
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::BeginBlock()
|
||||
|
@ -133,6 +135,7 @@ const void* CPU::NewRec::Compiler::CompileBlock(CodeCache::Block* block, u32* ho
|
|||
DebugAssert(!IsHostRegAllocated(i));
|
||||
for (u32 i = 1; i < static_cast<u32>(Reg::count); i++)
|
||||
DebugAssert(!m_constant_regs_dirty.test(i) && !m_constant_regs_valid.test(i));
|
||||
m_speculative_constants.memory.clear();
|
||||
|
||||
u32 code_size, far_code_size;
|
||||
const void* code = EndCompile(&code_size, &far_code_size);
|
||||
|
@ -494,7 +497,7 @@ bool CPU::NewRec::Compiler::TrySwapDelaySlot(Reg rs, Reg rt, Reg rd)
|
|||
|
||||
is_safe:
|
||||
#ifdef _DEBUG
|
||||
Log_DevFmt("Swapping delay slot {:08X} {}", m_current_instruction_pc + 4, disasm);
|
||||
Log_DebugFmt("Swapping delay slot {:08X} {}", m_current_instruction_pc + 4, disasm);
|
||||
#endif
|
||||
|
||||
CompileBranchDelaySlot();
|
||||
|
@ -506,7 +509,7 @@ is_safe:
|
|||
|
||||
is_unsafe:
|
||||
#ifdef _DEBUG
|
||||
Log_DevFmt("NOT swapping delay slot {:08X} {}", m_current_instruction_pc + 4, disasm);
|
||||
Log_DebugFmt("NOT swapping delay slot {:08X} {}", m_current_instruction_pc + 4, disasm);
|
||||
#endif
|
||||
|
||||
return false;
|
||||
|
@ -1079,6 +1082,9 @@ void CPU::NewRec::Compiler::Flush(u32 flags)
|
|||
FlushConstantRegs(false);
|
||||
}
|
||||
}
|
||||
|
||||
if (flags & FLUSH_INVALIDATE_SPECULATIVE_CONSTANTS)
|
||||
InvalidateSpeculativeValues();
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::FlushConstantReg(Reg r)
|
||||
|
@ -1161,9 +1167,9 @@ void CPU::NewRec::Compiler::AddLoadStoreInfo(void* code_address, u32 code_size,
|
|||
gpr_bitmask |= (1u << i);
|
||||
}
|
||||
|
||||
CPU::CodeCache::AddLoadStoreInfo(code_address, code_size, m_current_instruction_pc, m_cycles, gpr_bitmask,
|
||||
static_cast<u8>(address_register), static_cast<u8>(data_register), size, is_signed,
|
||||
is_load);
|
||||
CPU::CodeCache::AddLoadStoreInfo(code_address, code_size, m_current_instruction_pc, m_block->pc, m_cycles,
|
||||
gpr_bitmask, static_cast<u8>(address_register), static_cast<u8>(data_register), size,
|
||||
is_signed, is_load);
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::CompileInstruction()
|
||||
|
@ -1194,34 +1200,34 @@ void CPU::NewRec::Compiler::CompileInstruction()
|
|||
{
|
||||
switch (inst->r.funct)
|
||||
{
|
||||
case InstructionFunct::sll: CompileTemplate(&Compiler::Compile_sll_const, &Compiler::Compile_sll, PGXPFN(CPU_SLL), TF_WRITES_D | TF_READS_T); break;
|
||||
case InstructionFunct::srl: CompileTemplate(&Compiler::Compile_srl_const, &Compiler::Compile_srl, PGXPFN(CPU_SRL), TF_WRITES_D | TF_READS_T); break;
|
||||
case InstructionFunct::sra: CompileTemplate(&Compiler::Compile_sra_const, &Compiler::Compile_sra, PGXPFN(CPU_SRA), TF_WRITES_D | TF_READS_T); break;
|
||||
case InstructionFunct::sllv: CompileTemplate(&Compiler::Compile_sllv_const, &Compiler::Compile_sllv, PGXPFN(CPU_SLLV), TF_WRITES_D | TF_READS_S | TF_READS_T); break;
|
||||
case InstructionFunct::srlv: CompileTemplate(&Compiler::Compile_srlv_const, &Compiler::Compile_srlv, PGXPFN(CPU_SRLV), TF_WRITES_D | TF_READS_S | TF_READS_T); break;
|
||||
case InstructionFunct::srav: CompileTemplate(&Compiler::Compile_srav_const, &Compiler::Compile_srav, PGXPFN(CPU_SRAV), TF_WRITES_D | TF_READS_S | TF_READS_T); break;
|
||||
case InstructionFunct::sll: CompileTemplate(&Compiler::Compile_sll_const, &Compiler::Compile_sll, PGXPFN(CPU_SLL), TF_WRITES_D | TF_READS_T); SpecExec_sll(); break;
|
||||
case InstructionFunct::srl: CompileTemplate(&Compiler::Compile_srl_const, &Compiler::Compile_srl, PGXPFN(CPU_SRL), TF_WRITES_D | TF_READS_T); SpecExec_srl(); break;
|
||||
case InstructionFunct::sra: CompileTemplate(&Compiler::Compile_sra_const, &Compiler::Compile_sra, PGXPFN(CPU_SRA), TF_WRITES_D | TF_READS_T); SpecExec_sra(); break;
|
||||
case InstructionFunct::sllv: CompileTemplate(&Compiler::Compile_sllv_const, &Compiler::Compile_sllv, PGXPFN(CPU_SLLV), TF_WRITES_D | TF_READS_S | TF_READS_T); SpecExec_sllv(); break;
|
||||
case InstructionFunct::srlv: CompileTemplate(&Compiler::Compile_srlv_const, &Compiler::Compile_srlv, PGXPFN(CPU_SRLV), TF_WRITES_D | TF_READS_S | TF_READS_T); SpecExec_srlv(); break;
|
||||
case InstructionFunct::srav: CompileTemplate(&Compiler::Compile_srav_const, &Compiler::Compile_srav, PGXPFN(CPU_SRAV), TF_WRITES_D | TF_READS_S | TF_READS_T); SpecExec_srav(); break;
|
||||
case InstructionFunct::jr: CompileTemplate(&Compiler::Compile_jr_const, &Compiler::Compile_jr, nullptr, TF_READS_S); break;
|
||||
case InstructionFunct::jalr: CompileTemplate(&Compiler::Compile_jalr_const, &Compiler::Compile_jalr, nullptr, /*TF_WRITES_D |*/ TF_READS_S | TF_NO_NOP); break;
|
||||
case InstructionFunct::jalr: CompileTemplate(&Compiler::Compile_jalr_const, &Compiler::Compile_jalr, nullptr, /*TF_WRITES_D |*/ TF_READS_S | TF_NO_NOP); SpecExec_jalr(); break;
|
||||
case InstructionFunct::syscall: Compile_syscall(); break;
|
||||
case InstructionFunct::break_: Compile_break(); break;
|
||||
case InstructionFunct::mfhi: CompileMoveRegTemplate(inst->r.rd, Reg::hi, g_settings.gpu_pgxp_cpu); break;
|
||||
case InstructionFunct::mthi: CompileMoveRegTemplate(Reg::hi, inst->r.rs, g_settings.gpu_pgxp_cpu); break;
|
||||
case InstructionFunct::mflo: CompileMoveRegTemplate(inst->r.rd, Reg::lo, g_settings.gpu_pgxp_cpu); break;
|
||||
case InstructionFunct::mtlo: CompileMoveRegTemplate(Reg::lo, inst->r.rs, g_settings.gpu_pgxp_cpu); break;
|
||||
case InstructionFunct::mult: CompileTemplate(&Compiler::Compile_mult_const, &Compiler::Compile_mult, PGXPFN(CPU_MULT), TF_READS_S | TF_READS_T | TF_WRITES_LO | TF_WRITES_HI | TF_COMMUTATIVE); break;
|
||||
case InstructionFunct::multu: CompileTemplate(&Compiler::Compile_multu_const, &Compiler::Compile_multu, PGXPFN(CPU_MULTU), TF_READS_S | TF_READS_T | TF_WRITES_LO | TF_WRITES_HI | TF_COMMUTATIVE); break;
|
||||
case InstructionFunct::div: CompileTemplate(&Compiler::Compile_div_const, &Compiler::Compile_div, PGXPFN(CPU_DIV), TF_READS_S | TF_READS_T | TF_WRITES_LO | TF_WRITES_HI); break;
|
||||
case InstructionFunct::divu: CompileTemplate(&Compiler::Compile_divu_const, &Compiler::Compile_divu, PGXPFN(CPU_DIVU), TF_READS_S | TF_READS_T | TF_WRITES_LO | TF_WRITES_HI); break;
|
||||
case InstructionFunct::add: CompileTemplate(&Compiler::Compile_add_const, &Compiler::Compile_add, PGXPFN(CPU_ADD), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_CAN_OVERFLOW | TF_RENAME_WITH_ZERO_T); break;
|
||||
case InstructionFunct::addu: CompileTemplate(&Compiler::Compile_addu_const, &Compiler::Compile_addu, PGXPFN(CPU_ADD), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_T); break;
|
||||
case InstructionFunct::sub: CompileTemplate(&Compiler::Compile_sub_const, &Compiler::Compile_sub, PGXPFN(CPU_SUB), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_CAN_OVERFLOW | TF_RENAME_WITH_ZERO_T); break;
|
||||
case InstructionFunct::subu: CompileTemplate(&Compiler::Compile_subu_const, &Compiler::Compile_subu, PGXPFN(CPU_SUB), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_RENAME_WITH_ZERO_T); break;
|
||||
case InstructionFunct::and_: CompileTemplate(&Compiler::Compile_and_const, &Compiler::Compile_and, PGXPFN(CPU_AND_), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE); break;
|
||||
case InstructionFunct::or_: CompileTemplate(&Compiler::Compile_or_const, &Compiler::Compile_or, PGXPFN(CPU_OR_), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_T); break;
|
||||
case InstructionFunct::xor_: CompileTemplate(&Compiler::Compile_xor_const, &Compiler::Compile_xor, PGXPFN(CPU_XOR_), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_T); break;
|
||||
case InstructionFunct::nor: CompileTemplate(&Compiler::Compile_nor_const, &Compiler::Compile_nor, PGXPFN(CPU_NOR), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE); break;
|
||||
case InstructionFunct::slt: CompileTemplate(&Compiler::Compile_slt_const, &Compiler::Compile_slt, PGXPFN(CPU_SLT), TF_WRITES_D | TF_READS_T | TF_READS_S); break;
|
||||
case InstructionFunct::sltu: CompileTemplate(&Compiler::Compile_sltu_const, &Compiler::Compile_sltu, PGXPFN(CPU_SLTU), TF_WRITES_D | TF_READS_T | TF_READS_S); break;
|
||||
case InstructionFunct::mfhi: SpecCopyReg(inst->r.rd, Reg::hi); CompileMoveRegTemplate(inst->r.rd, Reg::hi, g_settings.gpu_pgxp_cpu); break;
|
||||
case InstructionFunct::mthi: SpecCopyReg(Reg::hi, inst->r.rs); CompileMoveRegTemplate(Reg::hi, inst->r.rs, g_settings.gpu_pgxp_cpu); break;
|
||||
case InstructionFunct::mflo: SpecCopyReg(inst->r.rd, Reg::lo); CompileMoveRegTemplate(inst->r.rd, Reg::lo, g_settings.gpu_pgxp_cpu); break;
|
||||
case InstructionFunct::mtlo: SpecCopyReg(Reg::lo, inst->r.rs); CompileMoveRegTemplate(Reg::lo, inst->r.rs, g_settings.gpu_pgxp_cpu); break;
|
||||
case InstructionFunct::mult: CompileTemplate(&Compiler::Compile_mult_const, &Compiler::Compile_mult, PGXPFN(CPU_MULT), TF_READS_S | TF_READS_T | TF_WRITES_LO | TF_WRITES_HI | TF_COMMUTATIVE); SpecExec_mult(); break;
|
||||
case InstructionFunct::multu: CompileTemplate(&Compiler::Compile_multu_const, &Compiler::Compile_multu, PGXPFN(CPU_MULTU), TF_READS_S | TF_READS_T | TF_WRITES_LO | TF_WRITES_HI | TF_COMMUTATIVE); SpecExec_multu(); break;
|
||||
case InstructionFunct::div: CompileTemplate(&Compiler::Compile_div_const, &Compiler::Compile_div, PGXPFN(CPU_DIV), TF_READS_S | TF_READS_T | TF_WRITES_LO | TF_WRITES_HI); SpecExec_div(); break;
|
||||
case InstructionFunct::divu: CompileTemplate(&Compiler::Compile_divu_const, &Compiler::Compile_divu, PGXPFN(CPU_DIVU), TF_READS_S | TF_READS_T | TF_WRITES_LO | TF_WRITES_HI); SpecExec_divu(); break;
|
||||
case InstructionFunct::add: CompileTemplate(&Compiler::Compile_add_const, &Compiler::Compile_add, PGXPFN(CPU_ADD), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_CAN_OVERFLOW | TF_RENAME_WITH_ZERO_T); SpecExec_add(); break;
|
||||
case InstructionFunct::addu: CompileTemplate(&Compiler::Compile_addu_const, &Compiler::Compile_addu, PGXPFN(CPU_ADD), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_T); SpecExec_addu(); break;
|
||||
case InstructionFunct::sub: CompileTemplate(&Compiler::Compile_sub_const, &Compiler::Compile_sub, PGXPFN(CPU_SUB), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_CAN_OVERFLOW | TF_RENAME_WITH_ZERO_T); SpecExec_sub(); break;
|
||||
case InstructionFunct::subu: CompileTemplate(&Compiler::Compile_subu_const, &Compiler::Compile_subu, PGXPFN(CPU_SUB), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_RENAME_WITH_ZERO_T); SpecExec_subu(); break;
|
||||
case InstructionFunct::and_: CompileTemplate(&Compiler::Compile_and_const, &Compiler::Compile_and, PGXPFN(CPU_AND_), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE); SpecExec_and(); break;
|
||||
case InstructionFunct::or_: CompileTemplate(&Compiler::Compile_or_const, &Compiler::Compile_or, PGXPFN(CPU_OR_), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_T); SpecExec_or(); break;
|
||||
case InstructionFunct::xor_: CompileTemplate(&Compiler::Compile_xor_const, &Compiler::Compile_xor, PGXPFN(CPU_XOR_), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_T); SpecExec_xor(); break;
|
||||
case InstructionFunct::nor: CompileTemplate(&Compiler::Compile_nor_const, &Compiler::Compile_nor, PGXPFN(CPU_NOR), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE); SpecExec_nor(); break;
|
||||
case InstructionFunct::slt: CompileTemplate(&Compiler::Compile_slt_const, &Compiler::Compile_slt, PGXPFN(CPU_SLT), TF_WRITES_D | TF_READS_T | TF_READS_S); SpecExec_slt(); break;
|
||||
case InstructionFunct::sltu: CompileTemplate(&Compiler::Compile_sltu_const, &Compiler::Compile_sltu, PGXPFN(CPU_SLTU), TF_WRITES_D | TF_READS_T | TF_READS_S); SpecExec_sltu(); break;
|
||||
|
||||
default: Panic("fixme funct"); break;
|
||||
}
|
||||
|
@ -1229,35 +1235,35 @@ void CPU::NewRec::Compiler::CompileInstruction()
|
|||
break;
|
||||
|
||||
case InstructionOp::j: Compile_j(); break;
|
||||
case InstructionOp::jal: Compile_jal(); break;
|
||||
case InstructionOp::jal: Compile_jal(); SpecExec_jal(); break;
|
||||
|
||||
case InstructionOp::b: CompileTemplate(&Compiler::Compile_b_const, &Compiler::Compile_b, nullptr, TF_READS_S | TF_CAN_SWAP_DELAY_SLOT); break;
|
||||
case InstructionOp::b: CompileTemplate(&Compiler::Compile_b_const, &Compiler::Compile_b, nullptr, TF_READS_S | TF_CAN_SWAP_DELAY_SLOT); SpecExec_b(); break;
|
||||
case InstructionOp::blez: CompileTemplate(&Compiler::Compile_blez_const, &Compiler::Compile_blez, nullptr, TF_READS_S | TF_CAN_SWAP_DELAY_SLOT); break;
|
||||
case InstructionOp::bgtz: CompileTemplate(&Compiler::Compile_bgtz_const, &Compiler::Compile_bgtz, nullptr, TF_READS_S | TF_CAN_SWAP_DELAY_SLOT); break;
|
||||
case InstructionOp::beq: CompileTemplate(&Compiler::Compile_beq_const, &Compiler::Compile_beq, nullptr, TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_CAN_SWAP_DELAY_SLOT); break;
|
||||
case InstructionOp::bne: CompileTemplate(&Compiler::Compile_bne_const, &Compiler::Compile_bne, nullptr, TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_CAN_SWAP_DELAY_SLOT); break;
|
||||
|
||||
case InstructionOp::addi: CompileTemplate(&Compiler::Compile_addi_const, &Compiler::Compile_addi, PGXPFN(CPU_ADDI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE | TF_CAN_OVERFLOW | TF_RENAME_WITH_ZERO_IMM); break;
|
||||
case InstructionOp::addiu: CompileTemplate(&Compiler::Compile_addiu_const, &Compiler::Compile_addiu, PGXPFN(CPU_ADDI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_IMM); break;
|
||||
case InstructionOp::slti: CompileTemplate(&Compiler::Compile_slti_const, &Compiler::Compile_slti, PGXPFN(CPU_SLTI), TF_WRITES_T | TF_READS_S); break;
|
||||
case InstructionOp::sltiu: CompileTemplate(&Compiler::Compile_sltiu_const, &Compiler::Compile_sltiu, PGXPFN(CPU_SLTIU), TF_WRITES_T | TF_READS_S); break;
|
||||
case InstructionOp::andi: CompileTemplate(&Compiler::Compile_andi_const, &Compiler::Compile_andi, PGXPFN(CPU_ANDI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE); break;
|
||||
case InstructionOp::ori: CompileTemplate(&Compiler::Compile_ori_const, &Compiler::Compile_ori, PGXPFN(CPU_ORI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_IMM); break;
|
||||
case InstructionOp::xori: CompileTemplate(&Compiler::Compile_xori_const, &Compiler::Compile_xori, PGXPFN(CPU_XORI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_IMM); break;
|
||||
case InstructionOp::lui: Compile_lui(); break;
|
||||
case InstructionOp::addi: CompileTemplate(&Compiler::Compile_addi_const, &Compiler::Compile_addi, PGXPFN(CPU_ADDI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE | TF_CAN_OVERFLOW | TF_RENAME_WITH_ZERO_IMM); SpecExec_addi(); break;
|
||||
case InstructionOp::addiu: CompileTemplate(&Compiler::Compile_addiu_const, &Compiler::Compile_addiu, PGXPFN(CPU_ADDI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_IMM); SpecExec_addiu(); break;
|
||||
case InstructionOp::slti: CompileTemplate(&Compiler::Compile_slti_const, &Compiler::Compile_slti, PGXPFN(CPU_SLTI), TF_WRITES_T | TF_READS_S); SpecExec_slti(); break;
|
||||
case InstructionOp::sltiu: CompileTemplate(&Compiler::Compile_sltiu_const, &Compiler::Compile_sltiu, PGXPFN(CPU_SLTIU), TF_WRITES_T | TF_READS_S); SpecExec_sltiu(); break;
|
||||
case InstructionOp::andi: CompileTemplate(&Compiler::Compile_andi_const, &Compiler::Compile_andi, PGXPFN(CPU_ANDI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE); SpecExec_andi(); break;
|
||||
case InstructionOp::ori: CompileTemplate(&Compiler::Compile_ori_const, &Compiler::Compile_ori, PGXPFN(CPU_ORI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_IMM); SpecExec_ori(); break;
|
||||
case InstructionOp::xori: CompileTemplate(&Compiler::Compile_xori_const, &Compiler::Compile_xori, PGXPFN(CPU_XORI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_IMM); SpecExec_xori(); break;
|
||||
case InstructionOp::lui: Compile_lui(); SpecExec_lui(); break;
|
||||
|
||||
case InstructionOp::lb: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::Byte, false, true, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); break;
|
||||
case InstructionOp::lbu: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::Byte, false, false, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); break;
|
||||
case InstructionOp::lh: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::HalfWord, false, true, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); break;
|
||||
case InstructionOp::lhu: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::HalfWord, false, false, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); break;
|
||||
case InstructionOp::lw: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::Word, false, false, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); break;
|
||||
case InstructionOp::lwl: CompileLoadStoreTemplate(&Compiler::Compile_lwx, MemoryAccessSize::Word, false, false, TF_READS_S | /*TF_READS_T | TF_WRITES_T | */TF_LOAD_DELAY); break;
|
||||
case InstructionOp::lwr: CompileLoadStoreTemplate(&Compiler::Compile_lwx, MemoryAccessSize::Word, false, false, TF_READS_S | /*TF_READS_T | TF_WRITES_T | */TF_LOAD_DELAY); break;
|
||||
case InstructionOp::sb: CompileLoadStoreTemplate(&Compiler::Compile_sxx, MemoryAccessSize::Byte, true, false, TF_READS_S | TF_READS_T); break;
|
||||
case InstructionOp::sh: CompileLoadStoreTemplate(&Compiler::Compile_sxx, MemoryAccessSize::HalfWord, true, false, TF_READS_S | TF_READS_T); break;
|
||||
case InstructionOp::sw: CompileLoadStoreTemplate(&Compiler::Compile_sxx, MemoryAccessSize::Word, true, false, TF_READS_S | TF_READS_T); break;
|
||||
case InstructionOp::swl: CompileLoadStoreTemplate(&Compiler::Compile_swx, MemoryAccessSize::Word, false, false, TF_READS_S | /*TF_READS_T | TF_WRITES_T | */TF_LOAD_DELAY); break;
|
||||
case InstructionOp::swr: CompileLoadStoreTemplate(&Compiler::Compile_swx, MemoryAccessSize::Word, false, false, TF_READS_S | /*TF_READS_T | TF_WRITES_T | */TF_LOAD_DELAY); break;
|
||||
case InstructionOp::lb: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::Byte, false, true, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); SpecExec_lxx(MemoryAccessSize::Byte, true); break;
|
||||
case InstructionOp::lbu: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::Byte, false, false, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); SpecExec_lxx(MemoryAccessSize::Byte, false); break;
|
||||
case InstructionOp::lh: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::HalfWord, false, true, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); SpecExec_lxx(MemoryAccessSize::HalfWord, true); break;
|
||||
case InstructionOp::lhu: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::HalfWord, false, false, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); SpecExec_lxx(MemoryAccessSize::HalfWord, false); break;
|
||||
case InstructionOp::lw: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::Word, false, false, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); SpecExec_lxx(MemoryAccessSize::Word, false); break;
|
||||
case InstructionOp::lwl: CompileLoadStoreTemplate(&Compiler::Compile_lwx, MemoryAccessSize::Word, false, false, TF_READS_S | /*TF_READS_T | TF_WRITES_T | */TF_LOAD_DELAY); SpecExec_lwx(false); break;
|
||||
case InstructionOp::lwr: CompileLoadStoreTemplate(&Compiler::Compile_lwx, MemoryAccessSize::Word, false, false, TF_READS_S | /*TF_READS_T | TF_WRITES_T | */TF_LOAD_DELAY); SpecExec_lwx(true); break;
|
||||
case InstructionOp::sb: CompileLoadStoreTemplate(&Compiler::Compile_sxx, MemoryAccessSize::Byte, true, false, TF_READS_S | TF_READS_T); SpecExec_sxx(MemoryAccessSize::Byte); break;
|
||||
case InstructionOp::sh: CompileLoadStoreTemplate(&Compiler::Compile_sxx, MemoryAccessSize::HalfWord, true, false, TF_READS_S | TF_READS_T); SpecExec_sxx(MemoryAccessSize::HalfWord); break;
|
||||
case InstructionOp::sw: CompileLoadStoreTemplate(&Compiler::Compile_sxx, MemoryAccessSize::Word, true, false, TF_READS_S | TF_READS_T); SpecExec_sxx(MemoryAccessSize::Word); break;
|
||||
case InstructionOp::swl: CompileLoadStoreTemplate(&Compiler::Compile_swx, MemoryAccessSize::Word, false, false, TF_READS_S | /*TF_READS_T | TF_WRITES_T | */TF_LOAD_DELAY); SpecExec_swx(false); break;
|
||||
case InstructionOp::swr: CompileLoadStoreTemplate(&Compiler::Compile_swx, MemoryAccessSize::Word, false, false, TF_READS_S | /*TF_READS_T | TF_WRITES_T | */TF_LOAD_DELAY); SpecExec_swx(true); break;
|
||||
|
||||
case InstructionOp::cop0:
|
||||
{
|
||||
|
@ -1265,8 +1271,8 @@ void CPU::NewRec::Compiler::CompileInstruction()
|
|||
{
|
||||
switch (inst->cop.CommonOp())
|
||||
{
|
||||
case CopCommonInstruction::mfcn: if (inst->r.rt != Reg::zero) { CompileTemplate(nullptr, &Compiler::Compile_mfc0, nullptr, TF_WRITES_T | TF_LOAD_DELAY); } break;
|
||||
case CopCommonInstruction::mtcn: CompileTemplate(nullptr, &Compiler::Compile_mtc0, PGXPFN(CPU_MTC0), TF_READS_T); break;
|
||||
case CopCommonInstruction::mfcn: if (inst->r.rt != Reg::zero) { CompileTemplate(nullptr, &Compiler::Compile_mfc0, nullptr, TF_WRITES_T | TF_LOAD_DELAY); } SpecExec_mfc0(); break;
|
||||
case CopCommonInstruction::mtcn: CompileTemplate(nullptr, &Compiler::Compile_mtc0, PGXPFN(CPU_MTC0), TF_READS_T); SpecExec_mtc0(); break;
|
||||
default: Compile_Fallback(); break;
|
||||
}
|
||||
}
|
||||
|
@ -1274,7 +1280,7 @@ void CPU::NewRec::Compiler::CompileInstruction()
|
|||
{
|
||||
switch (inst->cop.Cop0Op())
|
||||
{
|
||||
case Cop0Instruction::rfe: CompileTemplate(nullptr, &Compiler::Compile_rfe, nullptr, 0); break;
|
||||
case Cop0Instruction::rfe: CompileTemplate(nullptr, &Compiler::Compile_rfe, nullptr, 0); SpecExec_rfe(); break;
|
||||
default: Compile_Fallback(); break;
|
||||
}
|
||||
}
|
||||
|
@ -1303,7 +1309,7 @@ void CPU::NewRec::Compiler::CompileInstruction()
|
|||
break;
|
||||
|
||||
case InstructionOp::lwc2: CompileLoadStoreTemplate(&Compiler::Compile_lwc2, MemoryAccessSize::Word, false, false, TF_GTE_STALL | TF_READS_S | TF_LOAD_DELAY); break;
|
||||
case InstructionOp::swc2: CompileLoadStoreTemplate(&Compiler::Compile_swc2, MemoryAccessSize::Word, true, false, TF_GTE_STALL | TF_READS_S); break;
|
||||
case InstructionOp::swc2: CompileLoadStoreTemplate(&Compiler::Compile_swc2, MemoryAccessSize::Word, true, false, TF_GTE_STALL | TF_READS_S); SpecExec_swc2(); break;
|
||||
|
||||
default: Panic("Fixme"); break;
|
||||
// clang-format on
|
||||
|
@ -1567,7 +1573,7 @@ void CPU::NewRec::Compiler::CompileTemplate(void (Compiler::*const_func)(Compile
|
|||
}
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::CompileLoadStoreTemplate(void (Compiler::*func)(CompileFlags, MemoryAccessSize, bool,
|
||||
void CPU::NewRec::Compiler::CompileLoadStoreTemplate(void (Compiler::*func)(CompileFlags, MemoryAccessSize, bool, bool,
|
||||
const std::optional<VirtualMemoryAddress>&),
|
||||
MemoryAccessSize size, bool store, bool sign, u32 tflags)
|
||||
{
|
||||
|
@ -1595,13 +1601,28 @@ void CPU::NewRec::Compiler::CompileLoadStoreTemplate(void (Compiler::*func)(Comp
|
|||
|
||||
// constant address?
|
||||
std::optional<VirtualMemoryAddress> addr;
|
||||
bool use_fastmem = CodeCache::IsUsingFastmem() && !g_settings.cpu_recompiler_memory_exceptions &&
|
||||
!SpecIsCacheIsolated() && !CodeCache::HasPreviouslyFaultedOnPC(m_current_instruction_pc);
|
||||
if (HasConstantReg(rs))
|
||||
{
|
||||
addr = GetConstantRegU32(rs) + inst->i.imm_sext32();
|
||||
cf.const_s = true;
|
||||
|
||||
if (!Bus::CanUseFastmemForAddress(addr.value()))
|
||||
{
|
||||
Log_DebugFmt("Not using fastmem for {:08X}", addr.value());
|
||||
use_fastmem = false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const std::optional<VirtualMemoryAddress> spec_addr = SpecExec_LoadStoreAddr();
|
||||
if (use_fastmem && spec_addr.has_value() && !Bus::CanUseFastmemForAddress(spec_addr.value()))
|
||||
{
|
||||
Log_DebugFmt("Not using fastmem for speculative {:08X}", spec_addr.value());
|
||||
use_fastmem = false;
|
||||
}
|
||||
|
||||
if constexpr (HAS_MEMORY_OPERANDS)
|
||||
{
|
||||
// don't bother caching it since we're going to flush anyway
|
||||
|
@ -1648,12 +1669,13 @@ void CPU::NewRec::Compiler::CompileLoadStoreTemplate(void (Compiler::*func)(Comp
|
|||
}
|
||||
}
|
||||
|
||||
(this->*func)(cf, size, sign, addr);
|
||||
(this->*func)(cf, size, sign, use_fastmem, addr);
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::FlushForLoadStore(const std::optional<VirtualMemoryAddress>& address, bool store)
|
||||
void CPU::NewRec::Compiler::FlushForLoadStore(const std::optional<VirtualMemoryAddress>& address, bool store,
|
||||
bool use_fastmem)
|
||||
{
|
||||
if (CodeCache::IsUsingFastmem() && !g_settings.cpu_recompiler_memory_exceptions)
|
||||
if (use_fastmem)
|
||||
return;
|
||||
|
||||
// TODO: Stores don't need to flush GTE cycles...
|
||||
|
@ -2275,3 +2297,480 @@ void CPU::NewRec::BackpatchLoadStore(void* exception_pc, const CodeCache::Loadst
|
|||
|
||||
buffer.CommitFarCode(thunk_size);
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::InitSpeculativeRegs()
|
||||
{
|
||||
for (u8 i = 0; i < static_cast<u8>(Reg::count); i++)
|
||||
m_speculative_constants.regs[i] = g_state.regs.r[i];
|
||||
|
||||
m_speculative_constants.cop0_sr = g_state.cop0_regs.sr.bits;
|
||||
m_speculative_constants.memory.clear();
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::InvalidateSpeculativeValues()
|
||||
{
|
||||
m_speculative_constants.regs.fill(std::nullopt);
|
||||
m_speculative_constants.memory.clear();
|
||||
m_speculative_constants.cop0_sr.reset();
|
||||
}
|
||||
|
||||
CPU::NewRec::Compiler::SpecValue CPU::NewRec::Compiler::SpecReadReg(Reg reg)
|
||||
{
|
||||
return m_speculative_constants.regs[static_cast<u8>(reg)];
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::SpecWriteReg(Reg reg, SpecValue value)
|
||||
{
|
||||
if (reg == Reg::zero)
|
||||
return;
|
||||
|
||||
m_speculative_constants.regs[static_cast<u8>(reg)] = value;
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::SpecInvalidateReg(Reg reg)
|
||||
{
|
||||
if (reg == Reg::zero)
|
||||
return;
|
||||
|
||||
m_speculative_constants.regs[static_cast<u8>(reg)].reset();
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::SpecCopyReg(Reg dst, Reg src)
|
||||
{
|
||||
if (dst == Reg::zero)
|
||||
return;
|
||||
|
||||
m_speculative_constants.regs[static_cast<u8>(dst)] = m_speculative_constants.regs[static_cast<u8>(src)];
|
||||
}
|
||||
|
||||
CPU::NewRec::Compiler::SpecValue CPU::NewRec::Compiler::SpecReadMem(VirtualMemoryAddress address)
|
||||
{
|
||||
auto it = m_speculative_constants.memory.find(address);
|
||||
if (it != m_speculative_constants.memory.end())
|
||||
return it->second;
|
||||
|
||||
u32 value;
|
||||
if ((address & DCACHE_LOCATION_MASK) == DCACHE_LOCATION)
|
||||
{
|
||||
u32 scratchpad_offset = address & DCACHE_OFFSET_MASK;
|
||||
std::memcpy(&value, &CPU::g_state.dcache[scratchpad_offset], sizeof(value));
|
||||
return value;
|
||||
}
|
||||
|
||||
const PhysicalMemoryAddress phys_addr = address & PHYSICAL_MEMORY_ADDRESS_MASK;
|
||||
if (Bus::IsRAMAddress(phys_addr))
|
||||
{
|
||||
u32 ram_offset = phys_addr & Bus::g_ram_mask;
|
||||
std::memcpy(&value, &Bus::g_ram[ram_offset], sizeof(value));
|
||||
return value;
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::SpecWriteMem(u32 address, SpecValue value)
|
||||
{
|
||||
auto it = m_speculative_constants.memory.find(address);
|
||||
if (it != m_speculative_constants.memory.end())
|
||||
{
|
||||
it->second = value;
|
||||
return;
|
||||
}
|
||||
|
||||
const PhysicalMemoryAddress phys_addr = address & PHYSICAL_MEMORY_ADDRESS_MASK;
|
||||
if ((address & DCACHE_LOCATION_MASK) == DCACHE_LOCATION || Bus::IsRAMAddress(phys_addr))
|
||||
m_speculative_constants.memory.emplace(address, value);
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::SpecInvalidateMem(VirtualMemoryAddress address)
|
||||
{
|
||||
SpecWriteMem(address, std::nullopt);
|
||||
}
|
||||
|
||||
bool CPU::NewRec::Compiler::SpecIsCacheIsolated()
|
||||
{
|
||||
if (!m_speculative_constants.cop0_sr.has_value())
|
||||
return false;
|
||||
|
||||
const Cop0Registers::SR sr{m_speculative_constants.cop0_sr.value()};
|
||||
return sr.Isc;
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::SpecExec_b()
|
||||
{
|
||||
const bool link = (static_cast<u8>(inst->i.rt.GetValue()) & u8(0x1E)) == u8(0x10);
|
||||
if (link)
|
||||
SpecWriteReg(Reg::ra, m_compiler_pc);
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::SpecExec_jal()
|
||||
{
|
||||
SpecWriteReg(Reg::ra, m_compiler_pc);
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::SpecExec_jalr()
|
||||
{
|
||||
SpecWriteReg(inst->r.rd, m_compiler_pc);
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::SpecExec_sll()
|
||||
{
|
||||
const SpecValue rt = SpecReadReg(inst->r.rt);
|
||||
if (rt.has_value())
|
||||
SpecWriteReg(inst->r.rd, rt.value() << inst->r.shamt);
|
||||
else
|
||||
SpecInvalidateReg(inst->r.rd);
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::SpecExec_srl()
|
||||
{
|
||||
const SpecValue rt = SpecReadReg(inst->r.rt);
|
||||
if (rt.has_value())
|
||||
SpecWriteReg(inst->r.rd, rt.value() >> inst->r.shamt);
|
||||
else
|
||||
SpecInvalidateReg(inst->r.rd);
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::SpecExec_sra()
|
||||
{
|
||||
const SpecValue rt = SpecReadReg(inst->r.rt);
|
||||
if (rt.has_value())
|
||||
SpecWriteReg(inst->r.rd, static_cast<u32>(static_cast<s32>(rt.value()) >> inst->r.shamt));
|
||||
else
|
||||
SpecInvalidateReg(inst->r.rd);
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::SpecExec_sllv()
|
||||
{
|
||||
const SpecValue rs = SpecReadReg(inst->r.rs);
|
||||
const SpecValue rt = SpecReadReg(inst->r.rt);
|
||||
if (rs.has_value() && rt.has_value())
|
||||
SpecWriteReg(inst->r.rd, rt.value() << (rs.value() & 0x1F));
|
||||
else
|
||||
SpecInvalidateReg(inst->r.rd);
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::SpecExec_srlv()
|
||||
{
|
||||
const SpecValue rs = SpecReadReg(inst->r.rs);
|
||||
const SpecValue rt = SpecReadReg(inst->r.rt);
|
||||
if (rs.has_value() && rt.has_value())
|
||||
SpecWriteReg(inst->r.rd, rt.value() >> (rs.value() & 0x1F));
|
||||
else
|
||||
SpecInvalidateReg(inst->r.rd);
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::SpecExec_srav()
|
||||
{
|
||||
const SpecValue rs = SpecReadReg(inst->r.rs);
|
||||
const SpecValue rt = SpecReadReg(inst->r.rt);
|
||||
if (rs.has_value() && rt.has_value())
|
||||
SpecWriteReg(inst->r.rd, static_cast<u32>(static_cast<s32>(rt.value()) >> (rs.value() & 0x1F)));
|
||||
else
|
||||
SpecInvalidateReg(inst->r.rd);
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::SpecExec_mult()
|
||||
{
|
||||
// TODO
|
||||
SpecInvalidateReg(Reg::hi);
|
||||
SpecInvalidateReg(Reg::lo);
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::SpecExec_multu()
|
||||
{
|
||||
// TODO
|
||||
SpecInvalidateReg(Reg::hi);
|
||||
SpecInvalidateReg(Reg::lo);
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::SpecExec_div()
|
||||
{
|
||||
// TODO
|
||||
SpecInvalidateReg(Reg::hi);
|
||||
SpecInvalidateReg(Reg::lo);
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::SpecExec_divu()
|
||||
{
|
||||
// TODO
|
||||
SpecInvalidateReg(Reg::hi);
|
||||
SpecInvalidateReg(Reg::lo);
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::SpecExec_add()
|
||||
{
|
||||
SpecExec_addu();
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::SpecExec_addu()
|
||||
{
|
||||
const SpecValue rs = SpecReadReg(inst->r.rs);
|
||||
const SpecValue rt = SpecReadReg(inst->r.rt);
|
||||
if (rs.has_value() && rt.has_value())
|
||||
SpecWriteReg(inst->r.rd, rs.value() + rt.value());
|
||||
else
|
||||
SpecInvalidateReg(inst->r.rd);
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::SpecExec_sub()
|
||||
{
|
||||
SpecExec_subu();
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::SpecExec_subu()
|
||||
{
|
||||
const SpecValue rs = SpecReadReg(inst->r.rs);
|
||||
const SpecValue rt = SpecReadReg(inst->r.rt);
|
||||
if (rs.has_value() && rt.has_value())
|
||||
SpecWriteReg(inst->r.rd, rs.value() - rt.value());
|
||||
else
|
||||
SpecInvalidateReg(inst->r.rd);
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::SpecExec_and()
|
||||
{
|
||||
const SpecValue rs = SpecReadReg(inst->r.rs);
|
||||
const SpecValue rt = SpecReadReg(inst->r.rt);
|
||||
if (rs.has_value() && rt.has_value())
|
||||
SpecWriteReg(inst->r.rd, rs.value() & rt.value());
|
||||
else
|
||||
SpecInvalidateReg(inst->r.rd);
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::SpecExec_or()
|
||||
{
|
||||
const SpecValue rs = SpecReadReg(inst->r.rs);
|
||||
const SpecValue rt = SpecReadReg(inst->r.rt);
|
||||
if (rs.has_value() && rt.has_value())
|
||||
SpecWriteReg(inst->r.rd, rs.value() | rt.value());
|
||||
else
|
||||
SpecInvalidateReg(inst->r.rd);
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::SpecExec_xor()
|
||||
{
|
||||
const SpecValue rs = SpecReadReg(inst->r.rs);
|
||||
const SpecValue rt = SpecReadReg(inst->r.rt);
|
||||
if (rs.has_value() && rt.has_value())
|
||||
SpecWriteReg(inst->r.rd, rs.value() ^ rt.value());
|
||||
else
|
||||
SpecInvalidateReg(inst->r.rd);
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::SpecExec_nor()
|
||||
{
|
||||
const SpecValue rs = SpecReadReg(inst->r.rs);
|
||||
const SpecValue rt = SpecReadReg(inst->r.rt);
|
||||
if (rs.has_value() && rt.has_value())
|
||||
SpecWriteReg(inst->r.rd, ~(rs.value() | rt.value()));
|
||||
else
|
||||
SpecInvalidateReg(inst->r.rd);
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::SpecExec_slt()
|
||||
{
|
||||
const SpecValue rs = SpecReadReg(inst->r.rs);
|
||||
const SpecValue rt = SpecReadReg(inst->r.rt);
|
||||
if (rs.has_value() && rt.has_value())
|
||||
SpecWriteReg(inst->r.rd, BoolToUInt32(static_cast<s32>(rs.value()) < static_cast<s32>(rt.value())));
|
||||
else
|
||||
SpecInvalidateReg(inst->r.rd);
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::SpecExec_sltu()
|
||||
{
|
||||
const SpecValue rs = SpecReadReg(inst->r.rs);
|
||||
const SpecValue rt = SpecReadReg(inst->r.rt);
|
||||
if (rs.has_value() && rt.has_value())
|
||||
SpecWriteReg(inst->r.rd, BoolToUInt32(rs.value() < rt.value()));
|
||||
else
|
||||
SpecInvalidateReg(inst->r.rd);
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::SpecExec_addi()
|
||||
{
|
||||
SpecExec_addiu();
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::SpecExec_addiu()
|
||||
{
|
||||
const SpecValue rs = SpecReadReg(inst->i.rs);
|
||||
if (rs.has_value())
|
||||
SpecWriteReg(inst->i.rt, rs.value() + inst->i.imm_sext32());
|
||||
else
|
||||
SpecInvalidateReg(inst->i.rt);
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::SpecExec_slti()
|
||||
{
|
||||
const SpecValue rs = SpecReadReg(inst->i.rs);
|
||||
if (rs.has_value())
|
||||
SpecWriteReg(inst->i.rt, BoolToUInt32(static_cast<s32>(rs.value()) < static_cast<s32>(inst->i.imm_sext32())));
|
||||
else
|
||||
SpecInvalidateReg(inst->i.rt);
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::SpecExec_sltiu()
|
||||
{
|
||||
const SpecValue rs = SpecReadReg(inst->i.rs);
|
||||
if (rs.has_value())
|
||||
SpecWriteReg(inst->i.rt, BoolToUInt32(rs.value() < inst->i.imm_sext32()));
|
||||
else
|
||||
SpecInvalidateReg(inst->i.rt);
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::SpecExec_andi()
|
||||
{
|
||||
const SpecValue rs = SpecReadReg(inst->i.rs);
|
||||
if (rs.has_value())
|
||||
SpecWriteReg(inst->i.rt, rs.value() & inst->i.imm_zext32());
|
||||
else
|
||||
SpecInvalidateReg(inst->i.rt);
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::SpecExec_ori()
|
||||
{
|
||||
const SpecValue rs = SpecReadReg(inst->i.rs);
|
||||
if (rs.has_value())
|
||||
SpecWriteReg(inst->i.rt, rs.value() | inst->i.imm_zext32());
|
||||
else
|
||||
SpecInvalidateReg(inst->i.rt);
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::SpecExec_xori()
|
||||
{
|
||||
const SpecValue rs = SpecReadReg(inst->i.rs);
|
||||
if (rs.has_value())
|
||||
SpecWriteReg(inst->i.rt, rs.value() ^ inst->i.imm_zext32());
|
||||
else
|
||||
SpecInvalidateReg(inst->i.rt);
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::SpecExec_lui()
|
||||
{
|
||||
SpecWriteReg(inst->i.rt, inst->i.imm_zext32() << 16);
|
||||
}
|
||||
|
||||
CPU::NewRec::Compiler::SpecValue CPU::NewRec::Compiler::SpecExec_LoadStoreAddr()
|
||||
{
|
||||
const SpecValue rs = SpecReadReg(inst->i.rs);
|
||||
return rs.has_value() ? (rs.value() + inst->i.imm_sext32()) : rs;
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::SpecExec_lxx(MemoryAccessSize size, bool sign)
|
||||
{
|
||||
const SpecValue addr = SpecExec_LoadStoreAddr();
|
||||
SpecValue val;
|
||||
if (!addr.has_value() || !(val = SpecReadMem(addr.value())).has_value())
|
||||
{
|
||||
SpecInvalidateReg(inst->i.rt);
|
||||
return;
|
||||
}
|
||||
|
||||
switch (size)
|
||||
{
|
||||
case MemoryAccessSize::Byte:
|
||||
val = sign ? SignExtend32(static_cast<u8>(val.value())) : ZeroExtend32(static_cast<u8>(val.value()));
|
||||
break;
|
||||
|
||||
case MemoryAccessSize::HalfWord:
|
||||
val = sign ? SignExtend32(static_cast<u16>(val.value())) : ZeroExtend32(static_cast<u16>(val.value()));
|
||||
break;
|
||||
|
||||
case MemoryAccessSize::Word:
|
||||
break;
|
||||
|
||||
default:
|
||||
UnreachableCode();
|
||||
}
|
||||
|
||||
SpecWriteReg(inst->r.rt, val);
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::SpecExec_lwx(bool lwr)
|
||||
{
|
||||
// TODO
|
||||
SpecInvalidateReg(inst->i.rt);
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::SpecExec_sxx(MemoryAccessSize size)
|
||||
{
|
||||
const SpecValue addr = SpecExec_LoadStoreAddr();
|
||||
if (!addr.has_value())
|
||||
return;
|
||||
|
||||
SpecValue rt = SpecReadReg(inst->i.rt);
|
||||
if (rt.has_value())
|
||||
{
|
||||
switch (size)
|
||||
{
|
||||
case MemoryAccessSize::Byte:
|
||||
rt = ZeroExtend32(static_cast<u8>(rt.value()));
|
||||
break;
|
||||
|
||||
case MemoryAccessSize::HalfWord:
|
||||
rt = ZeroExtend32(static_cast<u16>(rt.value()));
|
||||
break;
|
||||
|
||||
case MemoryAccessSize::Word:
|
||||
break;
|
||||
|
||||
default:
|
||||
UnreachableCode();
|
||||
}
|
||||
}
|
||||
|
||||
SpecWriteMem(addr.value(), rt);
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::SpecExec_swx(bool swr)
|
||||
{
|
||||
const SpecValue addr = SpecExec_LoadStoreAddr();
|
||||
if (addr.has_value())
|
||||
SpecInvalidateMem(addr.value() & ~3u);
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::SpecExec_swc2()
|
||||
{
|
||||
const SpecValue addr = SpecExec_LoadStoreAddr();
|
||||
if (addr.has_value())
|
||||
SpecInvalidateMem(addr.value());
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::SpecExec_mfc0()
|
||||
{
|
||||
const Cop0Reg rd = static_cast<Cop0Reg>(inst->r.rd.GetValue());
|
||||
if (rd != Cop0Reg::SR)
|
||||
{
|
||||
SpecInvalidateReg(inst->r.rt);
|
||||
return;
|
||||
}
|
||||
|
||||
SpecWriteReg(inst->r.rt, m_speculative_constants.cop0_sr);
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::SpecExec_mtc0()
|
||||
{
|
||||
const Cop0Reg rd = static_cast<Cop0Reg>(inst->r.rd.GetValue());
|
||||
if (rd != Cop0Reg::SR || !m_speculative_constants.cop0_sr.has_value())
|
||||
return;
|
||||
|
||||
SpecValue val = SpecReadReg(inst->r.rt);
|
||||
if (val.has_value())
|
||||
{
|
||||
constexpr u32 mask = Cop0Registers::SR::WRITE_MASK;
|
||||
val = (m_speculative_constants.cop0_sr.value() & mask) | (val.value() & mask);
|
||||
}
|
||||
|
||||
m_speculative_constants.cop0_sr = val;
|
||||
}
|
||||
|
||||
void CPU::NewRec::Compiler::SpecExec_rfe()
|
||||
{
|
||||
if (!m_speculative_constants.cop0_sr.has_value())
|
||||
return;
|
||||
|
||||
const u32 val = m_speculative_constants.cop0_sr.value();
|
||||
m_speculative_constants.cop0_sr = (val & UINT32_C(0b110000)) | ((val & UINT32_C(0b111111)) >> 2);
|
||||
}
|
||||
|
|
|
@ -56,17 +56,18 @@ protected:
|
|||
FLUSH_LOAD_DELAY_FROM_STATE = (1 << 9),
|
||||
FLUSH_GTE_DONE_CYCLE = (1 << 10),
|
||||
FLUSH_GTE_STALL_FROM_STATE = (1 << 11),
|
||||
FLUSH_INVALIDATE_SPECULATIVE_CONSTANTS = (1 << 12),
|
||||
|
||||
FLUSH_FOR_C_CALL = (FLUSH_FREE_CALLER_SAVED_REGISTERS),
|
||||
FLUSH_FOR_LOADSTORE = (FLUSH_FREE_CALLER_SAVED_REGISTERS | FLUSH_CYCLES),
|
||||
FLUSH_FOR_BRANCH = (FLUSH_FLUSH_MIPS_REGISTERS),
|
||||
FLUSH_FOR_EXCEPTION =
|
||||
(FLUSH_CYCLES | FLUSH_GTE_DONE_CYCLE), // GTE cycles needed because it stalls when a GTE instruction is next.
|
||||
FLUSH_FOR_INTERPRETER =
|
||||
(FLUSH_FLUSH_MIPS_REGISTERS | FLUSH_INVALIDATE_MIPS_REGISTERS | FLUSH_FREE_CALLER_SAVED_REGISTERS | FLUSH_PC |
|
||||
FLUSH_CYCLES | FLUSH_INSTRUCTION_BITS | FLUSH_LOAD_DELAY | FLUSH_GTE_DONE_CYCLE),
|
||||
FLUSH_FOR_INTERPRETER = (FLUSH_FLUSH_MIPS_REGISTERS | FLUSH_INVALIDATE_MIPS_REGISTERS |
|
||||
FLUSH_FREE_CALLER_SAVED_REGISTERS | FLUSH_PC | FLUSH_CYCLES | FLUSH_INSTRUCTION_BITS |
|
||||
FLUSH_LOAD_DELAY | FLUSH_GTE_DONE_CYCLE | FLUSH_INVALIDATE_SPECULATIVE_CONSTANTS),
|
||||
FLUSH_END_BLOCK = 0xFFFFFFFFu & ~(FLUSH_PC | FLUSH_CYCLES | FLUSH_GTE_DONE_CYCLE | FLUSH_INSTRUCTION_BITS |
|
||||
FLUSH_GTE_STALL_FROM_STATE),
|
||||
FLUSH_GTE_STALL_FROM_STATE | FLUSH_INVALIDATE_SPECULATIVE_CONSTANTS),
|
||||
};
|
||||
|
||||
union CompileFlags
|
||||
|
@ -267,10 +268,10 @@ protected:
|
|||
|
||||
void CompileTemplate(void (Compiler::*const_func)(CompileFlags), void (Compiler::*func)(CompileFlags),
|
||||
const void* pgxp_cpu_func, u32 tflags);
|
||||
void CompileLoadStoreTemplate(void (Compiler::*func)(CompileFlags, MemoryAccessSize, bool,
|
||||
void CompileLoadStoreTemplate(void (Compiler::*func)(CompileFlags, MemoryAccessSize, bool, bool,
|
||||
const std::optional<VirtualMemoryAddress>&),
|
||||
MemoryAccessSize size, bool store, bool sign, u32 tflags);
|
||||
void FlushForLoadStore(const std::optional<VirtualMemoryAddress>& address, bool store);
|
||||
void FlushForLoadStore(const std::optional<VirtualMemoryAddress>& address, bool store, bool use_fastmem);
|
||||
void CompileMoveRegTemplate(Reg dst, Reg src, bool pgxp_move);
|
||||
|
||||
virtual void GeneratePGXPCallWithMIPSRegs(const void* func, u32 arg1val, Reg arg2reg = Reg::count,
|
||||
|
@ -357,17 +358,17 @@ protected:
|
|||
virtual void Compile_xori(CompileFlags cf) = 0;
|
||||
void Compile_lui();
|
||||
|
||||
virtual void Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
virtual void Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address) = 0;
|
||||
virtual void Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
virtual void Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address) = 0; // lwl/lwr
|
||||
virtual void Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
virtual void Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address) = 0;
|
||||
virtual void Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
virtual void Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address) = 0;
|
||||
virtual void Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
virtual void Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address) = 0; // swl/swr
|
||||
virtual void Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
virtual void Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address) = 0;
|
||||
|
||||
static u32* GetCop0RegPtr(Cop0Reg reg);
|
||||
|
@ -454,6 +455,71 @@ protected:
|
|||
std::array<HostStateBackup, 2> m_host_state_backup = {};
|
||||
u32 m_host_state_backup_count = 0;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Speculative Constants
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
using SpecValue = std::optional<u32>;
|
||||
struct SpeculativeConstants
|
||||
{
|
||||
std::array<SpecValue, static_cast<u8>(Reg::count)> regs;
|
||||
std::unordered_map<PhysicalMemoryAddress, SpecValue> memory;
|
||||
SpecValue cop0_sr;
|
||||
};
|
||||
|
||||
void InitSpeculativeRegs();
|
||||
void InvalidateSpeculativeValues();
|
||||
SpecValue SpecReadReg(Reg reg);
|
||||
void SpecWriteReg(Reg reg, SpecValue value);
|
||||
void SpecInvalidateReg(Reg reg);
|
||||
void SpecCopyReg(Reg dst, Reg src);
|
||||
SpecValue SpecReadMem(u32 address);
|
||||
void SpecWriteMem(VirtualMemoryAddress address, SpecValue value);
|
||||
void SpecInvalidateMem(VirtualMemoryAddress address);
|
||||
bool SpecIsCacheIsolated();
|
||||
|
||||
SpeculativeConstants m_speculative_constants;
|
||||
|
||||
void SpecExec_b();
|
||||
void SpecExec_jal();
|
||||
void SpecExec_jalr();
|
||||
void SpecExec_sll();
|
||||
void SpecExec_srl();
|
||||
void SpecExec_sra();
|
||||
void SpecExec_sllv();
|
||||
void SpecExec_srlv();
|
||||
void SpecExec_srav();
|
||||
void SpecExec_mult();
|
||||
void SpecExec_multu();
|
||||
void SpecExec_div();
|
||||
void SpecExec_divu();
|
||||
void SpecExec_add();
|
||||
void SpecExec_addu();
|
||||
void SpecExec_sub();
|
||||
void SpecExec_subu();
|
||||
void SpecExec_and();
|
||||
void SpecExec_or();
|
||||
void SpecExec_xor();
|
||||
void SpecExec_nor();
|
||||
void SpecExec_slt();
|
||||
void SpecExec_sltu();
|
||||
void SpecExec_addi();
|
||||
void SpecExec_addiu();
|
||||
void SpecExec_slti();
|
||||
void SpecExec_sltiu();
|
||||
void SpecExec_andi();
|
||||
void SpecExec_ori();
|
||||
void SpecExec_xori();
|
||||
void SpecExec_lui();
|
||||
SpecValue SpecExec_LoadStoreAddr();
|
||||
void SpecExec_lxx(MemoryAccessSize size, bool sign);
|
||||
void SpecExec_lwx(bool lwr); // lwl/lwr
|
||||
void SpecExec_sxx(MemoryAccessSize size);
|
||||
void SpecExec_swx(bool swr); // swl/swr
|
||||
void SpecExec_swc2();
|
||||
void SpecExec_mfc0();
|
||||
void SpecExec_mtc0();
|
||||
void SpecExec_rfe();
|
||||
|
||||
// PGXP memory callbacks
|
||||
static const std::array<std::array<const void*, 2>, 3> s_pgxp_mem_load_functions;
|
||||
static const std::array<const void*, 3> s_pgxp_mem_store_functions;
|
||||
|
|
|
@ -1340,11 +1340,10 @@ CPU::NewRec::AArch32Compiler::ComputeLoadStoreAddressArg(CompileFlags cf,
|
|||
|
||||
template<typename RegAllocFn>
|
||||
vixl::aarch32::Register CPU::NewRec::AArch32Compiler::GenerateLoad(const vixl::aarch32::Register& addr_reg,
|
||||
MemoryAccessSize size, bool sign,
|
||||
MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const RegAllocFn& dst_reg_alloc)
|
||||
{
|
||||
const bool checked = g_settings.cpu_recompiler_memory_exceptions;
|
||||
if (!checked && CodeCache::IsUsingFastmem())
|
||||
if (use_fastmem)
|
||||
{
|
||||
DebugAssert(g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT);
|
||||
m_cycles += Bus::RAM_READ_TICKS;
|
||||
|
@ -1379,6 +1378,7 @@ vixl::aarch32::Register CPU::NewRec::AArch32Compiler::GenerateLoad(const vixl::a
|
|||
if (addr_reg.GetCode() != RARG1.GetCode())
|
||||
armAsm->mov(RARG1, addr_reg);
|
||||
|
||||
const bool checked = g_settings.cpu_recompiler_memory_exceptions;
|
||||
switch (size)
|
||||
{
|
||||
case MemoryAccessSize::Byte:
|
||||
|
@ -1452,10 +1452,10 @@ vixl::aarch32::Register CPU::NewRec::AArch32Compiler::GenerateLoad(const vixl::a
|
|||
}
|
||||
|
||||
void CPU::NewRec::AArch32Compiler::GenerateStore(const vixl::aarch32::Register& addr_reg,
|
||||
const vixl::aarch32::Register& value_reg, MemoryAccessSize size)
|
||||
const vixl::aarch32::Register& value_reg, MemoryAccessSize size,
|
||||
bool use_fastmem)
|
||||
{
|
||||
const bool checked = g_settings.cpu_recompiler_memory_exceptions;
|
||||
if (!checked && CodeCache::IsUsingFastmem())
|
||||
if (use_fastmem)
|
||||
{
|
||||
DebugAssert(g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT);
|
||||
DebugAssert(addr_reg.GetCode() != RARG3.GetCode());
|
||||
|
@ -1488,6 +1488,7 @@ void CPU::NewRec::AArch32Compiler::GenerateStore(const vixl::aarch32::Register&
|
|||
if (value_reg.GetCode() != RARG2.GetCode())
|
||||
armAsm->mov(RARG2, value_reg);
|
||||
|
||||
const bool checked = g_settings.cpu_recompiler_memory_exceptions;
|
||||
switch (size)
|
||||
{
|
||||
case MemoryAccessSize::Byte:
|
||||
|
@ -1536,15 +1537,15 @@ void CPU::NewRec::AArch32Compiler::GenerateStore(const vixl::aarch32::Register&
|
|||
}
|
||||
}
|
||||
|
||||
void CPU::NewRec::AArch32Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
void CPU::NewRec::AArch32Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address)
|
||||
{
|
||||
const std::optional<Register> addr_reg = g_settings.gpu_pgxp_enable ?
|
||||
std::optional<Register>(Register(AllocateTempHostReg(HR_CALLEE_SAVED))) :
|
||||
std::optional<Register>();
|
||||
FlushForLoadStore(address, false);
|
||||
FlushForLoadStore(address, false, use_fastmem);
|
||||
const Register addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);
|
||||
const Register data = GenerateLoad(addr, size, sign, [this, cf]() {
|
||||
const Register data = GenerateLoad(addr, size, sign, use_fastmem, [this, cf]() {
|
||||
if (cf.MipsT() == Reg::zero)
|
||||
return RRET;
|
||||
|
||||
|
@ -1564,11 +1565,11 @@ void CPU::NewRec::AArch32Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize
|
|||
}
|
||||
}
|
||||
|
||||
void CPU::NewRec::AArch32Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
void CPU::NewRec::AArch32Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address)
|
||||
{
|
||||
DebugAssert(size == MemoryAccessSize::Word && !sign);
|
||||
FlushForLoadStore(address, false);
|
||||
FlushForLoadStore(address, false, use_fastmem);
|
||||
|
||||
// TODO: if address is constant, this can be simplified..
|
||||
|
||||
|
@ -1580,7 +1581,7 @@ void CPU::NewRec::AArch32Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize
|
|||
const Register addr = Register(AllocateHostReg(HR_CALLEE_SAVED, HR_TYPE_TEMP));
|
||||
ComputeLoadStoreAddressArg(cf, address, addr);
|
||||
armAsm->and_(RARG1, addr, armCheckLogicalConstant(~0x3u));
|
||||
GenerateLoad(RARG1, MemoryAccessSize::Word, false, []() { return RRET; });
|
||||
GenerateLoad(RARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RRET; });
|
||||
|
||||
if (inst->r.rt == Reg::zero)
|
||||
{
|
||||
|
@ -1648,15 +1649,15 @@ void CPU::NewRec::AArch32Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize
|
|||
FreeHostReg(addr.GetCode());
|
||||
}
|
||||
|
||||
void CPU::NewRec::AArch32Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
void CPU::NewRec::AArch32Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address)
|
||||
{
|
||||
const std::optional<Register> addr_reg = g_settings.gpu_pgxp_enable ?
|
||||
std::optional<Register>(Register(AllocateTempHostReg(HR_CALLEE_SAVED))) :
|
||||
std::optional<Register>();
|
||||
FlushForLoadStore(address, false);
|
||||
FlushForLoadStore(address, false, use_fastmem);
|
||||
const Register addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);
|
||||
GenerateLoad(addr, MemoryAccessSize::Word, false, []() { return RRET; });
|
||||
GenerateLoad(addr, MemoryAccessSize::Word, false, use_fastmem, []() { return RRET; });
|
||||
|
||||
const u32 index = static_cast<u32>(inst->r.rt.GetValue());
|
||||
const auto [ptr, action] = GetGTERegisterPointer(index, true);
|
||||
|
@ -1728,7 +1729,7 @@ void CPU::NewRec::AArch32Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSiz
|
|||
}
|
||||
}
|
||||
|
||||
void CPU::NewRec::AArch32Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
void CPU::NewRec::AArch32Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address)
|
||||
{
|
||||
AssertRegOrConstS(cf);
|
||||
|
@ -1737,13 +1738,13 @@ void CPU::NewRec::AArch32Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize
|
|||
const std::optional<Register> addr_reg = g_settings.gpu_pgxp_enable ?
|
||||
std::optional<Register>(Register(AllocateTempHostReg(HR_CALLEE_SAVED))) :
|
||||
std::optional<Register>();
|
||||
FlushForLoadStore(address, true);
|
||||
FlushForLoadStore(address, true, use_fastmem);
|
||||
const Register addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);
|
||||
const Register data = cf.valid_host_t ? CFGetRegT(cf) : RARG2;
|
||||
if (!cf.valid_host_t)
|
||||
MoveTToReg(RARG2, cf);
|
||||
|
||||
GenerateStore(addr, data, size);
|
||||
GenerateStore(addr, data, size, use_fastmem);
|
||||
|
||||
if (g_settings.gpu_pgxp_enable)
|
||||
{
|
||||
|
@ -1756,18 +1757,18 @@ void CPU::NewRec::AArch32Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize
|
|||
}
|
||||
}
|
||||
|
||||
void CPU::NewRec::AArch32Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
void CPU::NewRec::AArch32Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address)
|
||||
{
|
||||
DebugAssert(size == MemoryAccessSize::Word && !sign);
|
||||
FlushForLoadStore(address, true);
|
||||
FlushForLoadStore(address, true, use_fastmem);
|
||||
|
||||
// TODO: if address is constant, this can be simplified..
|
||||
// We'd need to be careful here if we weren't overwriting it..
|
||||
const Register addr = Register(AllocateHostReg(HR_CALLEE_SAVED, HR_TYPE_TEMP));
|
||||
ComputeLoadStoreAddressArg(cf, address, addr);
|
||||
armAsm->and_(RARG1, addr, armCheckLogicalConstant(~0x3u));
|
||||
GenerateLoad(RARG1, MemoryAccessSize::Word, false, []() { return RRET; });
|
||||
GenerateLoad(RARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RRET; });
|
||||
|
||||
// TODO: this can take over rt's value if it's no longer needed
|
||||
// NOTE: can't trust T in cf because of the flush
|
||||
|
@ -1813,13 +1814,13 @@ void CPU::NewRec::AArch32Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize
|
|||
FreeHostReg(addr.GetCode());
|
||||
|
||||
armAsm->and_(RARG1, addr, armCheckLogicalConstant(~0x3u));
|
||||
GenerateStore(RARG1, value, MemoryAccessSize::Word);
|
||||
GenerateStore(RARG1, value, MemoryAccessSize::Word, use_fastmem);
|
||||
}
|
||||
|
||||
void CPU::NewRec::AArch32Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
void CPU::NewRec::AArch32Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address)
|
||||
{
|
||||
FlushForLoadStore(address, true);
|
||||
FlushForLoadStore(address, true, use_fastmem);
|
||||
|
||||
const u32 index = static_cast<u32>(inst->r.rt.GetValue());
|
||||
const auto [ptr, action] = GetGTERegisterPointer(index, false);
|
||||
|
@ -1852,17 +1853,17 @@ void CPU::NewRec::AArch32Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSiz
|
|||
if (!g_settings.gpu_pgxp_enable)
|
||||
{
|
||||
const Register addr = ComputeLoadStoreAddressArg(cf, address);
|
||||
GenerateStore(addr, RARG2, size);
|
||||
GenerateStore(addr, RARG2, size, use_fastmem);
|
||||
return;
|
||||
}
|
||||
|
||||
// TODO: This can be simplified because we don't need to validate in PGXP..
|
||||
const Register addr_reg = Register(AllocateTempHostReg(HR_CALLEE_SAVED));
|
||||
const Register data_backup = Register(AllocateTempHostReg(HR_CALLEE_SAVED));
|
||||
FlushForLoadStore(address, true);
|
||||
FlushForLoadStore(address, true, use_fastmem);
|
||||
ComputeLoadStoreAddressArg(cf, address, addr_reg);
|
||||
armAsm->mov(data_backup, RARG2);
|
||||
GenerateStore(addr_reg, RARG2, size);
|
||||
GenerateStore(addr_reg, RARG2, size, use_fastmem);
|
||||
|
||||
Flush(FLUSH_FOR_C_CALL);
|
||||
armAsm->mov(RARG3, data_backup);
|
||||
|
|
|
@ -96,20 +96,20 @@ protected:
|
|||
const std::optional<const vixl::aarch32::Register>& reg = std::nullopt);
|
||||
template<typename RegAllocFn>
|
||||
vixl::aarch32::Register GenerateLoad(const vixl::aarch32::Register& addr_reg, MemoryAccessSize size, bool sign,
|
||||
const RegAllocFn& dst_reg_alloc);
|
||||
bool use_fastmem, const RegAllocFn& dst_reg_alloc);
|
||||
void GenerateStore(const vixl::aarch32::Register& addr_reg, const vixl::aarch32::Register& value_reg,
|
||||
MemoryAccessSize size);
|
||||
void Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
MemoryAccessSize size, bool use_fastmem);
|
||||
void Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address) override;
|
||||
void Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
void Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address) override;
|
||||
void Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
void Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address) override;
|
||||
void Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
void Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address) override;
|
||||
void Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
void Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address) override;
|
||||
void Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
void Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address) override;
|
||||
|
||||
void TestInterrupts(const vixl::aarch32::Register& sr);
|
||||
|
|
|
@ -37,8 +37,7 @@ Compiler* g_compiler = &s_instance;
|
|||
} // namespace CPU::NewRec
|
||||
|
||||
CPU::NewRec::AArch64Compiler::AArch64Compiler()
|
||||
: m_emitter(PositionDependentCode)
|
||||
, m_far_emitter(PositionIndependentCode)
|
||||
: m_emitter(PositionDependentCode), m_far_emitter(PositionIndependentCode)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -1314,11 +1313,10 @@ CPU::NewRec::AArch64Compiler::ComputeLoadStoreAddressArg(CompileFlags cf,
|
|||
|
||||
template<typename RegAllocFn>
|
||||
vixl::aarch64::WRegister CPU::NewRec::AArch64Compiler::GenerateLoad(const vixl::aarch64::WRegister& addr_reg,
|
||||
MemoryAccessSize size, bool sign,
|
||||
MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const RegAllocFn& dst_reg_alloc)
|
||||
{
|
||||
const bool checked = g_settings.cpu_recompiler_memory_exceptions;
|
||||
if (!checked && CodeCache::IsUsingFastmem())
|
||||
if (use_fastmem)
|
||||
{
|
||||
m_cycles += Bus::RAM_READ_TICKS;
|
||||
|
||||
|
@ -1356,6 +1354,7 @@ vixl::aarch64::WRegister CPU::NewRec::AArch64Compiler::GenerateLoad(const vixl::
|
|||
if (addr_reg.GetCode() != RWARG1.GetCode())
|
||||
armAsm->mov(RWARG1, addr_reg);
|
||||
|
||||
const bool checked = g_settings.cpu_recompiler_memory_exceptions;
|
||||
switch (size)
|
||||
{
|
||||
case MemoryAccessSize::Byte:
|
||||
|
@ -1429,10 +1428,10 @@ vixl::aarch64::WRegister CPU::NewRec::AArch64Compiler::GenerateLoad(const vixl::
|
|||
}
|
||||
|
||||
void CPU::NewRec::AArch64Compiler::GenerateStore(const vixl::aarch64::WRegister& addr_reg,
|
||||
const vixl::aarch64::WRegister& value_reg, MemoryAccessSize size)
|
||||
const vixl::aarch64::WRegister& value_reg, MemoryAccessSize size,
|
||||
bool use_fastmem)
|
||||
{
|
||||
const bool checked = g_settings.cpu_recompiler_memory_exceptions;
|
||||
if (!checked && CodeCache::IsUsingFastmem())
|
||||
if (use_fastmem)
|
||||
{
|
||||
if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT)
|
||||
{
|
||||
|
@ -1467,6 +1466,7 @@ void CPU::NewRec::AArch64Compiler::GenerateStore(const vixl::aarch64::WRegister&
|
|||
if (value_reg.GetCode() != RWARG2.GetCode())
|
||||
armAsm->mov(RWARG2, value_reg);
|
||||
|
||||
const bool checked = g_settings.cpu_recompiler_memory_exceptions;
|
||||
switch (size)
|
||||
{
|
||||
case MemoryAccessSize::Byte:
|
||||
|
@ -1515,15 +1515,15 @@ void CPU::NewRec::AArch64Compiler::GenerateStore(const vixl::aarch64::WRegister&
|
|||
}
|
||||
}
|
||||
|
||||
void CPU::NewRec::AArch64Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
void CPU::NewRec::AArch64Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address)
|
||||
{
|
||||
const std::optional<WRegister> addr_reg =
|
||||
g_settings.gpu_pgxp_enable ? std::optional<WRegister>(WRegister(AllocateTempHostReg(HR_CALLEE_SAVED))) :
|
||||
std::optional<WRegister>();
|
||||
FlushForLoadStore(address, false);
|
||||
FlushForLoadStore(address, false, use_fastmem);
|
||||
const WRegister addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);
|
||||
const WRegister data = GenerateLoad(addr, size, sign, [this, cf]() {
|
||||
const WRegister data = GenerateLoad(addr, size, sign, use_fastmem, [this, cf]() {
|
||||
if (cf.MipsT() == Reg::zero)
|
||||
return RWRET;
|
||||
|
||||
|
@ -1544,11 +1544,11 @@ void CPU::NewRec::AArch64Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize
|
|||
}
|
||||
}
|
||||
|
||||
void CPU::NewRec::AArch64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
void CPU::NewRec::AArch64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address)
|
||||
{
|
||||
DebugAssert(size == MemoryAccessSize::Word && !sign);
|
||||
FlushForLoadStore(address, false);
|
||||
FlushForLoadStore(address, false, use_fastmem);
|
||||
|
||||
// TODO: if address is constant, this can be simplified..
|
||||
|
||||
|
@ -1560,7 +1560,7 @@ void CPU::NewRec::AArch64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize
|
|||
const WRegister addr = WRegister(AllocateHostReg(HR_CALLEE_SAVED, HR_TYPE_TEMP));
|
||||
ComputeLoadStoreAddressArg(cf, address, addr);
|
||||
armAsm->and_(RWARG1, addr, armCheckLogicalConstant(~0x3u));
|
||||
GenerateLoad(RWARG1, MemoryAccessSize::Word, false, []() { return RWRET; });
|
||||
GenerateLoad(RWARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RWRET; });
|
||||
|
||||
if (inst->r.rt == Reg::zero)
|
||||
{
|
||||
|
@ -1628,15 +1628,15 @@ void CPU::NewRec::AArch64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize
|
|||
FreeHostReg(addr.GetCode());
|
||||
}
|
||||
|
||||
void CPU::NewRec::AArch64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
void CPU::NewRec::AArch64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address)
|
||||
{
|
||||
const std::optional<WRegister> addr_reg =
|
||||
g_settings.gpu_pgxp_enable ? std::optional<WRegister>(WRegister(AllocateTempHostReg(HR_CALLEE_SAVED))) :
|
||||
std::optional<WRegister>();
|
||||
FlushForLoadStore(address, false);
|
||||
FlushForLoadStore(address, false, use_fastmem);
|
||||
const WRegister addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);
|
||||
GenerateLoad(addr, MemoryAccessSize::Word, false, []() { return RWRET; });
|
||||
GenerateLoad(addr, MemoryAccessSize::Word, false, use_fastmem, []() { return RWRET; });
|
||||
|
||||
const u32 index = static_cast<u32>(inst->r.rt.GetValue());
|
||||
const auto [ptr, action] = GetGTERegisterPointer(index, true);
|
||||
|
@ -1708,7 +1708,7 @@ void CPU::NewRec::AArch64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSiz
|
|||
}
|
||||
}
|
||||
|
||||
void CPU::NewRec::AArch64Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
void CPU::NewRec::AArch64Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address)
|
||||
{
|
||||
AssertRegOrConstS(cf);
|
||||
|
@ -1717,13 +1717,13 @@ void CPU::NewRec::AArch64Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize
|
|||
const std::optional<WRegister> addr_reg =
|
||||
g_settings.gpu_pgxp_enable ? std::optional<WRegister>(WRegister(AllocateTempHostReg(HR_CALLEE_SAVED))) :
|
||||
std::optional<WRegister>();
|
||||
FlushForLoadStore(address, true);
|
||||
FlushForLoadStore(address, true, use_fastmem);
|
||||
const WRegister addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);
|
||||
const WRegister data = cf.valid_host_t ? CFGetRegT(cf) : RWARG2;
|
||||
if (!cf.valid_host_t)
|
||||
MoveTToReg(RWARG2, cf);
|
||||
|
||||
GenerateStore(addr, data, size);
|
||||
GenerateStore(addr, data, size, use_fastmem);
|
||||
|
||||
if (g_settings.gpu_pgxp_enable)
|
||||
{
|
||||
|
@ -1736,18 +1736,18 @@ void CPU::NewRec::AArch64Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize
|
|||
}
|
||||
}
|
||||
|
||||
void CPU::NewRec::AArch64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
void CPU::NewRec::AArch64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address)
|
||||
{
|
||||
DebugAssert(size == MemoryAccessSize::Word && !sign);
|
||||
FlushForLoadStore(address, true);
|
||||
FlushForLoadStore(address, true, use_fastmem);
|
||||
|
||||
// TODO: if address is constant, this can be simplified..
|
||||
// We'd need to be careful here if we weren't overwriting it..
|
||||
const WRegister addr = WRegister(AllocateHostReg(HR_CALLEE_SAVED, HR_TYPE_TEMP));
|
||||
ComputeLoadStoreAddressArg(cf, address, addr);
|
||||
armAsm->and_(RWARG1, addr, armCheckLogicalConstant(~0x3u));
|
||||
GenerateLoad(RWARG1, MemoryAccessSize::Word, false, []() { return RWRET; });
|
||||
GenerateLoad(RWARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RWRET; });
|
||||
|
||||
// TODO: this can take over rt's value if it's no longer needed
|
||||
// NOTE: can't trust T in cf because of the flush
|
||||
|
@ -1793,13 +1793,13 @@ void CPU::NewRec::AArch64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize
|
|||
FreeHostReg(addr.GetCode());
|
||||
|
||||
armAsm->and_(RWARG1, addr, armCheckLogicalConstant(~0x3u));
|
||||
GenerateStore(RWARG1, value, MemoryAccessSize::Word);
|
||||
GenerateStore(RWARG1, value, MemoryAccessSize::Word, use_fastmem);
|
||||
}
|
||||
|
||||
void CPU::NewRec::AArch64Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
void CPU::NewRec::AArch64Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address)
|
||||
{
|
||||
FlushForLoadStore(address, true);
|
||||
FlushForLoadStore(address, true, use_fastmem);
|
||||
|
||||
const u32 index = static_cast<u32>(inst->r.rt.GetValue());
|
||||
const auto [ptr, action] = GetGTERegisterPointer(index, false);
|
||||
|
@ -1832,17 +1832,17 @@ void CPU::NewRec::AArch64Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSiz
|
|||
if (!g_settings.gpu_pgxp_enable)
|
||||
{
|
||||
const WRegister addr = ComputeLoadStoreAddressArg(cf, address);
|
||||
GenerateStore(addr, RWARG2, size);
|
||||
GenerateStore(addr, RWARG2, size, use_fastmem);
|
||||
return;
|
||||
}
|
||||
|
||||
// TODO: This can be simplified because we don't need to validate in PGXP..
|
||||
const WRegister addr_reg = WRegister(AllocateTempHostReg(HR_CALLEE_SAVED));
|
||||
const WRegister data_backup = WRegister(AllocateTempHostReg(HR_CALLEE_SAVED));
|
||||
FlushForLoadStore(address, true);
|
||||
FlushForLoadStore(address, true, use_fastmem);
|
||||
ComputeLoadStoreAddressArg(cf, address, addr_reg);
|
||||
armAsm->mov(data_backup, RWARG2);
|
||||
GenerateStore(addr_reg, RWARG2, size);
|
||||
GenerateStore(addr_reg, RWARG2, size, use_fastmem);
|
||||
|
||||
Flush(FLUSH_FOR_C_CALL);
|
||||
armAsm->mov(RWARG3, data_backup);
|
||||
|
|
|
@ -26,7 +26,8 @@ protected:
|
|||
void StoreHostRegToCPUPointer(u32 reg, const void* ptr) override;
|
||||
void CopyHostReg(u32 dst, u32 src) override;
|
||||
|
||||
void Reset(CodeCache::Block* block, u8* code_buffer, u32 code_buffer_space, u8* far_code_buffer, u32 far_code_space) override;
|
||||
void Reset(CodeCache::Block* block, u8* code_buffer, u32 code_buffer_space, u8* far_code_buffer,
|
||||
u32 far_code_space) override;
|
||||
void BeginBlock() override;
|
||||
void GenerateBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size) override;
|
||||
void GenerateICacheCheckAndUpdate() override;
|
||||
|
@ -97,20 +98,20 @@ protected:
|
|||
const std::optional<const vixl::aarch64::WRegister>& reg = std::nullopt);
|
||||
template<typename RegAllocFn>
|
||||
vixl::aarch64::WRegister GenerateLoad(const vixl::aarch64::WRegister& addr_reg, MemoryAccessSize size, bool sign,
|
||||
const RegAllocFn& dst_reg_alloc);
|
||||
bool use_fastmem, const RegAllocFn& dst_reg_alloc);
|
||||
void GenerateStore(const vixl::aarch64::WRegister& addr_reg, const vixl::aarch64::WRegister& value_reg,
|
||||
MemoryAccessSize size);
|
||||
void Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
MemoryAccessSize size, bool use_fastmem);
|
||||
void Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address) override;
|
||||
void Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
void Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address) override;
|
||||
void Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
void Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address) override;
|
||||
void Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
void Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address) override;
|
||||
void Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
void Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address) override;
|
||||
void Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
void Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address) override;
|
||||
|
||||
void TestInterrupts(const vixl::aarch64::WRegister& sr);
|
||||
|
|
|
@ -1595,10 +1595,9 @@ biscuit::GPR CPU::NewRec::RISCV64Compiler::ComputeLoadStoreAddressArg(
|
|||
|
||||
template<typename RegAllocFn>
|
||||
void CPU::NewRec::RISCV64Compiler::GenerateLoad(const biscuit::GPR& addr_reg, MemoryAccessSize size, bool sign,
|
||||
const RegAllocFn& dst_reg_alloc)
|
||||
bool use_fastmem, const RegAllocFn& dst_reg_alloc)
|
||||
{
|
||||
const bool checked = g_settings.cpu_recompiler_memory_exceptions;
|
||||
if (!checked && CodeCache::IsUsingFastmem())
|
||||
if (use_fastmem)
|
||||
{
|
||||
m_cycles += Bus::RAM_READ_TICKS;
|
||||
|
||||
|
@ -1648,6 +1647,7 @@ void CPU::NewRec::RISCV64Compiler::GenerateLoad(const biscuit::GPR& addr_reg, Me
|
|||
if (addr_reg.Index() != RARG1.Index())
|
||||
rvAsm->MV(RARG1, addr_reg);
|
||||
|
||||
const bool checked = g_settings.cpu_recompiler_memory_exceptions;
|
||||
switch (size)
|
||||
{
|
||||
case MemoryAccessSize::Byte:
|
||||
|
@ -1723,10 +1723,9 @@ void CPU::NewRec::RISCV64Compiler::GenerateLoad(const biscuit::GPR& addr_reg, Me
|
|||
}
|
||||
|
||||
void CPU::NewRec::RISCV64Compiler::GenerateStore(const biscuit::GPR& addr_reg, const biscuit::GPR& value_reg,
|
||||
MemoryAccessSize size)
|
||||
MemoryAccessSize size, bool use_fastmem)
|
||||
{
|
||||
const bool checked = g_settings.cpu_recompiler_memory_exceptions;
|
||||
if (!checked && CodeCache::IsUsingFastmem())
|
||||
if (use_fastmem)
|
||||
{
|
||||
DebugAssert(value_reg != RSCRATCH);
|
||||
rvAsm->SLLI64(RSCRATCH, addr_reg, 32);
|
||||
|
@ -1774,6 +1773,7 @@ void CPU::NewRec::RISCV64Compiler::GenerateStore(const biscuit::GPR& addr_reg, c
|
|||
if (value_reg.Index() != RARG2.Index())
|
||||
rvAsm->MV(RARG2, value_reg);
|
||||
|
||||
const bool checked = g_settings.cpu_recompiler_memory_exceptions;
|
||||
switch (size)
|
||||
{
|
||||
case MemoryAccessSize::Byte:
|
||||
|
@ -1822,12 +1822,12 @@ void CPU::NewRec::RISCV64Compiler::GenerateStore(const biscuit::GPR& addr_reg, c
|
|||
}
|
||||
}
|
||||
|
||||
void CPU::NewRec::RISCV64Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
void CPU::NewRec::RISCV64Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address)
|
||||
{
|
||||
FlushForLoadStore(address, false);
|
||||
FlushForLoadStore(address, false, use_fastmem);
|
||||
const GPR addr = ComputeLoadStoreAddressArg(cf, address);
|
||||
GenerateLoad(addr, size, sign, [this, cf]() {
|
||||
GenerateLoad(addr, size, sign, use_fastmem, [this, cf]() {
|
||||
if (cf.MipsT() == Reg::zero)
|
||||
return RRET;
|
||||
|
||||
|
@ -1836,11 +1836,11 @@ void CPU::NewRec::RISCV64Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize
|
|||
});
|
||||
}
|
||||
|
||||
void CPU::NewRec::RISCV64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
void CPU::NewRec::RISCV64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address)
|
||||
{
|
||||
DebugAssert(size == MemoryAccessSize::Word && !sign);
|
||||
FlushForLoadStore(address, false);
|
||||
FlushForLoadStore(address, false, use_fastmem);
|
||||
|
||||
// TODO: if address is constant, this can be simplified..
|
||||
|
||||
|
@ -1852,7 +1852,7 @@ void CPU::NewRec::RISCV64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize
|
|||
const GPR addr = GPR(AllocateHostReg(HR_CALLEE_SAVED, HR_TYPE_TEMP));
|
||||
ComputeLoadStoreAddressArg(cf, address, addr);
|
||||
rvAsm->ANDI(RARG1, addr, ~0x3u);
|
||||
GenerateLoad(RARG1, MemoryAccessSize::Word, false, []() { return RRET; });
|
||||
GenerateLoad(RARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RRET; });
|
||||
|
||||
if (inst->r.rt == Reg::zero)
|
||||
{
|
||||
|
@ -1920,12 +1920,12 @@ void CPU::NewRec::RISCV64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize
|
|||
FreeHostReg(addr.Index());
|
||||
}
|
||||
|
||||
void CPU::NewRec::RISCV64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
void CPU::NewRec::RISCV64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address)
|
||||
{
|
||||
FlushForLoadStore(address, false);
|
||||
FlushForLoadStore(address, false, use_fastmem);
|
||||
const GPR addr = ComputeLoadStoreAddressArg(cf, address);
|
||||
GenerateLoad(addr, MemoryAccessSize::Word, false, []() { return RRET; });
|
||||
GenerateLoad(addr, MemoryAccessSize::Word, false, use_fastmem, []() { return RRET; });
|
||||
|
||||
const u32 index = static_cast<u32>(inst->r.rt.GetValue());
|
||||
const auto [ptr, action] = GetGTERegisterPointer(index, true);
|
||||
|
@ -1987,32 +1987,32 @@ void CPU::NewRec::RISCV64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSiz
|
|||
}
|
||||
}
|
||||
|
||||
void CPU::NewRec::RISCV64Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
void CPU::NewRec::RISCV64Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address)
|
||||
{
|
||||
AssertRegOrConstS(cf);
|
||||
AssertRegOrConstT(cf);
|
||||
FlushForLoadStore(address, true);
|
||||
FlushForLoadStore(address, true, use_fastmem);
|
||||
const GPR addr = ComputeLoadStoreAddressArg(cf, address);
|
||||
|
||||
if (!cf.valid_host_t)
|
||||
MoveTToReg(RARG2, cf);
|
||||
|
||||
GenerateStore(addr, cf.valid_host_t ? CFGetRegT(cf) : RARG2, size);
|
||||
GenerateStore(addr, cf.valid_host_t ? CFGetRegT(cf) : RARG2, size, use_fastmem);
|
||||
}
|
||||
|
||||
void CPU::NewRec::RISCV64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
void CPU::NewRec::RISCV64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address)
|
||||
{
|
||||
DebugAssert(size == MemoryAccessSize::Word && !sign);
|
||||
FlushForLoadStore(address, true);
|
||||
FlushForLoadStore(address, true, use_fastmem);
|
||||
|
||||
// TODO: if address is constant, this can be simplified..
|
||||
// We'd need to be careful here if we weren't overwriting it..
|
||||
const GPR addr = GPR(AllocateHostReg(HR_CALLEE_SAVED, HR_TYPE_TEMP));
|
||||
ComputeLoadStoreAddressArg(cf, address, addr);
|
||||
rvAsm->ANDI(RARG1, addr, ~0x3u);
|
||||
GenerateLoad(RARG1, MemoryAccessSize::Word, false, []() { return RRET; });
|
||||
GenerateLoad(RARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RRET; });
|
||||
|
||||
// TODO: this can take over rt's value if it's no longer needed
|
||||
// NOTE: can't trust T in cf because of the flush
|
||||
|
@ -2058,13 +2058,13 @@ void CPU::NewRec::RISCV64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize
|
|||
FreeHostReg(addr.Index());
|
||||
|
||||
rvAsm->ANDI(RARG1, addr, ~0x3u);
|
||||
GenerateStore(RARG1, value, MemoryAccessSize::Word);
|
||||
GenerateStore(RARG1, value, MemoryAccessSize::Word, use_fastmem);
|
||||
}
|
||||
|
||||
void CPU::NewRec::RISCV64Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
void CPU::NewRec::RISCV64Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address)
|
||||
{
|
||||
FlushForLoadStore(address, true);
|
||||
FlushForLoadStore(address, true, use_fastmem);
|
||||
|
||||
const u32 index = static_cast<u32>(inst->r.rt.GetValue());
|
||||
const auto [ptr, action] = GetGTERegisterPointer(index, false);
|
||||
|
@ -2094,7 +2094,7 @@ void CPU::NewRec::RISCV64Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSiz
|
|||
}
|
||||
|
||||
const GPR addr = ComputeLoadStoreAddressArg(cf, address);
|
||||
GenerateStore(addr, RARG2, size);
|
||||
GenerateStore(addr, RARG2, size, use_fastmem);
|
||||
}
|
||||
|
||||
void CPU::NewRec::RISCV64Compiler::Compile_mtc0(CompileFlags cf)
|
||||
|
|
|
@ -88,19 +88,21 @@ protected:
|
|||
biscuit::GPR ComputeLoadStoreAddressArg(CompileFlags cf, const std::optional<VirtualMemoryAddress>& address,
|
||||
const std::optional<const biscuit::GPR>& reg = std::nullopt);
|
||||
template<typename RegAllocFn>
|
||||
void GenerateLoad(const biscuit::GPR& addr_reg, MemoryAccessSize size, bool sign, const RegAllocFn& dst_reg_alloc);
|
||||
void GenerateStore(const biscuit::GPR& addr_reg, const biscuit::GPR& value_reg, MemoryAccessSize size);
|
||||
void Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
void GenerateLoad(const biscuit::GPR& addr_reg, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const RegAllocFn& dst_reg_alloc);
|
||||
void GenerateStore(const biscuit::GPR& addr_reg, const biscuit::GPR& value_reg, MemoryAccessSize size,
|
||||
bool use_fastmem);
|
||||
void Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address) override;
|
||||
void Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
void Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address) override;
|
||||
void Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
void Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address) override;
|
||||
void Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
void Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address) override;
|
||||
void Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
void Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address) override;
|
||||
void Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
void Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address) override;
|
||||
|
||||
void TestInterrupts(const biscuit::GPR& sr);
|
||||
|
|
|
@ -1239,10 +1239,9 @@ CPU::NewRec::X64Compiler::ComputeLoadStoreAddressArg(CompileFlags cf,
|
|||
|
||||
template<typename RegAllocFn>
|
||||
Xbyak::Reg32 CPU::NewRec::X64Compiler::GenerateLoad(const Xbyak::Reg32& addr_reg, MemoryAccessSize size, bool sign,
|
||||
const RegAllocFn& dst_reg_alloc)
|
||||
bool use_fastmem, const RegAllocFn& dst_reg_alloc)
|
||||
{
|
||||
const bool checked = g_settings.cpu_recompiler_memory_exceptions;
|
||||
if (CodeCache::IsUsingFastmem() && !checked)
|
||||
if (use_fastmem)
|
||||
{
|
||||
m_cycles += Bus::RAM_READ_TICKS;
|
||||
|
||||
|
@ -1296,6 +1295,7 @@ Xbyak::Reg32 CPU::NewRec::X64Compiler::GenerateLoad(const Xbyak::Reg32& addr_reg
|
|||
if (addr_reg != RWARG1)
|
||||
cg->mov(RWARG1, addr_reg);
|
||||
|
||||
const bool checked = g_settings.cpu_recompiler_memory_exceptions;
|
||||
switch (size)
|
||||
{
|
||||
case MemoryAccessSize::Byte:
|
||||
|
@ -1370,10 +1370,9 @@ Xbyak::Reg32 CPU::NewRec::X64Compiler::GenerateLoad(const Xbyak::Reg32& addr_reg
|
|||
}
|
||||
|
||||
void CPU::NewRec::X64Compiler::GenerateStore(const Xbyak::Reg32& addr_reg, const Xbyak::Reg32& value_reg,
|
||||
MemoryAccessSize size)
|
||||
MemoryAccessSize size, bool use_fastmem)
|
||||
{
|
||||
const bool checked = g_settings.cpu_recompiler_memory_exceptions;
|
||||
if (CodeCache::IsUsingFastmem() && !checked)
|
||||
if (use_fastmem)
|
||||
{
|
||||
if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT)
|
||||
{
|
||||
|
@ -1417,6 +1416,7 @@ void CPU::NewRec::X64Compiler::GenerateStore(const Xbyak::Reg32& addr_reg, const
|
|||
if (value_reg != RWARG2)
|
||||
cg->mov(RWARG2, value_reg);
|
||||
|
||||
const bool checked = g_settings.cpu_recompiler_memory_exceptions;
|
||||
switch (size)
|
||||
{
|
||||
case MemoryAccessSize::Byte:
|
||||
|
@ -1466,16 +1466,16 @@ void CPU::NewRec::X64Compiler::GenerateStore(const Xbyak::Reg32& addr_reg, const
|
|||
}
|
||||
}
|
||||
|
||||
void CPU::NewRec::X64Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
void CPU::NewRec::X64Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address)
|
||||
{
|
||||
const std::optional<Reg32> addr_reg = g_settings.gpu_pgxp_enable ?
|
||||
std::optional<Reg32>(Reg32(AllocateTempHostReg(HR_CALLEE_SAVED))) :
|
||||
std::optional<Reg32>();
|
||||
FlushForLoadStore(address, false);
|
||||
FlushForLoadStore(address, false, use_fastmem);
|
||||
const Reg32 addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);
|
||||
|
||||
const Reg32 data = GenerateLoad(addr, size, sign, [this, cf]() {
|
||||
const Reg32 data = GenerateLoad(addr, size, sign, use_fastmem, [this, cf]() {
|
||||
if (cf.MipsT() == Reg::zero)
|
||||
return RWRET;
|
||||
|
||||
|
@ -1495,11 +1495,11 @@ void CPU::NewRec::X64Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize siz
|
|||
}
|
||||
}
|
||||
|
||||
void CPU::NewRec::X64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
void CPU::NewRec::X64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address)
|
||||
{
|
||||
DebugAssert(size == MemoryAccessSize::Word && !sign);
|
||||
FlushForLoadStore(address, false);
|
||||
FlushForLoadStore(address, false, use_fastmem);
|
||||
|
||||
// TODO: if address is constant, this can be simplified..
|
||||
|
||||
|
@ -1512,7 +1512,7 @@ void CPU::NewRec::X64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize siz
|
|||
ComputeLoadStoreAddressArg(cf, address, addr);
|
||||
cg->mov(RWARG1, addr);
|
||||
cg->and_(RWARG1, ~0x3u);
|
||||
GenerateLoad(RWARG1, MemoryAccessSize::Word, false, []() { return RWRET; });
|
||||
GenerateLoad(RWARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RWRET; });
|
||||
|
||||
if (inst->r.rt == Reg::zero)
|
||||
{
|
||||
|
@ -1586,15 +1586,15 @@ void CPU::NewRec::X64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize siz
|
|||
FreeHostReg(addr.getIdx());
|
||||
}
|
||||
|
||||
void CPU::NewRec::X64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
void CPU::NewRec::X64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address)
|
||||
{
|
||||
const std::optional<Reg32> addr_reg = g_settings.gpu_pgxp_enable ?
|
||||
std::optional<Reg32>(Reg32(AllocateTempHostReg(HR_CALLEE_SAVED))) :
|
||||
std::optional<Reg32>();
|
||||
FlushForLoadStore(address, false);
|
||||
FlushForLoadStore(address, false, use_fastmem);
|
||||
const Reg32 addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);
|
||||
GenerateLoad(addr, MemoryAccessSize::Word, false, []() { return RWRET; });
|
||||
GenerateLoad(addr, MemoryAccessSize::Word, false, use_fastmem, []() { return RWRET; });
|
||||
|
||||
const u32 index = static_cast<u32>(inst->r.rt.GetValue());
|
||||
const auto [ptr, action] = GetGTERegisterPointer(index, true);
|
||||
|
@ -1666,19 +1666,19 @@ void CPU::NewRec::X64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize si
|
|||
}
|
||||
}
|
||||
|
||||
void CPU::NewRec::X64Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
void CPU::NewRec::X64Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address)
|
||||
{
|
||||
const std::optional<Reg32> addr_reg = g_settings.gpu_pgxp_enable ?
|
||||
std::optional<Reg32>(Reg32(AllocateTempHostReg(HR_CALLEE_SAVED))) :
|
||||
std::optional<Reg32>();
|
||||
FlushForLoadStore(address, true);
|
||||
FlushForLoadStore(address, true, use_fastmem);
|
||||
const Reg32 addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);
|
||||
const Reg32 data = cf.valid_host_t ? CFGetRegT(cf) : RWARG2;
|
||||
if (!cf.valid_host_t)
|
||||
MoveTToReg(RWARG2, cf);
|
||||
|
||||
GenerateStore(addr, data, size);
|
||||
GenerateStore(addr, data, size, use_fastmem);
|
||||
|
||||
if (g_settings.gpu_pgxp_enable)
|
||||
{
|
||||
|
@ -1691,11 +1691,11 @@ void CPU::NewRec::X64Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize siz
|
|||
}
|
||||
}
|
||||
|
||||
void CPU::NewRec::X64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
void CPU::NewRec::X64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address)
|
||||
{
|
||||
DebugAssert(size == MemoryAccessSize::Word && !sign);
|
||||
FlushForLoadStore(address, true);
|
||||
FlushForLoadStore(address, true, use_fastmem);
|
||||
|
||||
// TODO: if address is constant, this can be simplified..
|
||||
// We'd need to be careful here if we weren't overwriting it..
|
||||
|
@ -1703,7 +1703,7 @@ void CPU::NewRec::X64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize siz
|
|||
ComputeLoadStoreAddressArg(cf, address, addr);
|
||||
cg->mov(RWARG1, addr);
|
||||
cg->and_(RWARG1, ~0x3u);
|
||||
GenerateLoad(RWARG1, MemoryAccessSize::Word, false, []() { return RWRET; });
|
||||
GenerateLoad(RWARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RWRET; });
|
||||
|
||||
// TODO: this can take over rt's value if it's no longer needed
|
||||
// NOTE: can't trust T in cf because of the flush
|
||||
|
@ -1755,10 +1755,10 @@ void CPU::NewRec::X64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize siz
|
|||
|
||||
cg->mov(RWARG1, addr);
|
||||
cg->and_(RWARG1, ~0x3u);
|
||||
GenerateStore(RWARG1, value, MemoryAccessSize::Word);
|
||||
GenerateStore(RWARG1, value, MemoryAccessSize::Word, use_fastmem);
|
||||
}
|
||||
|
||||
void CPU::NewRec::X64Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
void CPU::NewRec::X64Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address)
|
||||
{
|
||||
const u32 index = static_cast<u32>(inst->r.rt.GetValue());
|
||||
|
@ -1791,19 +1791,19 @@ void CPU::NewRec::X64Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSize si
|
|||
// PGXP makes this a giant pain.
|
||||
if (!g_settings.gpu_pgxp_enable)
|
||||
{
|
||||
FlushForLoadStore(address, true);
|
||||
FlushForLoadStore(address, true, use_fastmem);
|
||||
const Reg32 addr = ComputeLoadStoreAddressArg(cf, address);
|
||||
GenerateStore(addr, RWARG2, size);
|
||||
GenerateStore(addr, RWARG2, size, use_fastmem);
|
||||
return;
|
||||
}
|
||||
|
||||
// TODO: This can be simplified because we don't need to validate in PGXP..
|
||||
const Reg32 addr_reg = Reg32(AllocateTempHostReg(HR_CALLEE_SAVED));
|
||||
const Reg32 data_backup = Reg32(AllocateTempHostReg(HR_CALLEE_SAVED));
|
||||
FlushForLoadStore(address, true);
|
||||
FlushForLoadStore(address, true, use_fastmem);
|
||||
ComputeLoadStoreAddressArg(cf, address, addr_reg);
|
||||
cg->mov(data_backup, RWARG2);
|
||||
GenerateStore(addr_reg, RWARG2, size);
|
||||
GenerateStore(addr_reg, RWARG2, size, use_fastmem);
|
||||
|
||||
Flush(FLUSH_FOR_C_CALL);
|
||||
cg->mov(RWARG3, data_backup);
|
||||
|
@ -2066,9 +2066,9 @@ void CPU::NewRec::X64Compiler::Compile_cop2(CompileFlags cf)
|
|||
}
|
||||
|
||||
u32 CPU::NewRec::CompileLoadStoreThunk(void* thunk_code, u32 thunk_space, void* code_address, u32 code_size,
|
||||
TickCount cycles_to_add, TickCount cycles_to_remove, u32 gpr_bitmask,
|
||||
u8 address_register, u8 data_register, MemoryAccessSize size, bool is_signed,
|
||||
bool is_load)
|
||||
TickCount cycles_to_add, TickCount cycles_to_remove, u32 gpr_bitmask,
|
||||
u8 address_register, u8 data_register, MemoryAccessSize size, bool is_signed,
|
||||
bool is_load)
|
||||
{
|
||||
CodeGenerator acg(thunk_space, thunk_code);
|
||||
CodeGenerator* cg = &acg;
|
||||
|
|
|
@ -87,20 +87,21 @@ protected:
|
|||
Xbyak::Reg32 ComputeLoadStoreAddressArg(CompileFlags cf, const std::optional<VirtualMemoryAddress>& address,
|
||||
const std::optional<const Xbyak::Reg32>& reg = std::nullopt);
|
||||
template<typename RegAllocFn>
|
||||
Xbyak::Reg32 GenerateLoad(const Xbyak::Reg32& addr_reg, MemoryAccessSize size, bool sign,
|
||||
Xbyak::Reg32 GenerateLoad(const Xbyak::Reg32& addr_reg, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const RegAllocFn& dst_reg_alloc);
|
||||
void GenerateStore(const Xbyak::Reg32& addr_reg, const Xbyak::Reg32& value_reg, MemoryAccessSize size);
|
||||
void Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
void GenerateStore(const Xbyak::Reg32& addr_reg, const Xbyak::Reg32& value_reg, MemoryAccessSize size,
|
||||
bool use_fastmem);
|
||||
void Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address) override;
|
||||
void Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
void Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address) override;
|
||||
void Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
void Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address) override;
|
||||
void Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
void Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address) override;
|
||||
void Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
void Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address) override;
|
||||
void Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
void Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
|
||||
const std::optional<VirtualMemoryAddress>& address) override;
|
||||
|
||||
void TestInterrupts(const Xbyak::Reg32& sr);
|
||||
|
|
Loading…
Reference in a new issue