CPU/Recompiler: Simplify PC addressing

This commit is contained in:
Connor McLaughlin 2021-07-20 13:31:23 +10:00
parent 033d85cd90
commit 6081e2415f
5 changed files with 86 additions and 116 deletions

View file

@ -21,6 +21,8 @@ bool CodeGenerator::CompileBlock(CodeBlock* block, CodeBlock::HostCodePointer* o
m_block = block;
m_block_start = block->instructions.data();
m_block_end = block->instructions.data() + block->instructions.size();
m_pc = block->GetPC();
m_pc_valid = true;
EmitBeginBlock();
BlockPrologue();
@ -955,7 +957,7 @@ void CodeGenerator::BlockPrologue()
EmitStoreCPUStructField(offsetof(State, exception_raised), Value::FromConstantU8(0));
if (m_block->uncached_fetch_ticks > 0)
if (m_block->uncached_fetch_ticks > 0 || m_block->icache_line_count > 0)
EmitICacheCheckAndUpdate();
// we don't know the state of the last block, so assume load delays might be in progress
@ -965,10 +967,6 @@ void CodeGenerator::BlockPrologue()
m_current_instruction_was_branch_taken_dirty = false;
m_load_delay_dirty = true;
m_gte_busy_cycles_dirty = true;
m_pc_offset = 0;
m_current_instruction_pc_offset = 0;
m_next_pc_offset = 4;
}
void CodeGenerator::BlockEpilogue()
@ -992,9 +990,8 @@ void CodeGenerator::InstructionPrologue(const CodeBlockInstruction& cbi, TickCou
#endif
// move instruction offsets forward
m_current_instruction_pc_offset = m_pc_offset;
m_pc_offset = m_next_pc_offset;
m_next_pc_offset += 4;
if (m_pc_valid)
m_pc += 4;
// reset dirty flags
if (m_branch_was_taken_dirty)
@ -1129,38 +1126,15 @@ void CodeGenerator::StallUntilGTEComplete()
Value CodeGenerator::CalculatePC(u32 offset /* = 0 */)
{
Value value = m_register_cache.AllocateScratch(RegSize_32);
EmitLoadGuestRegister(value.GetHostRegister(), Reg::pc);
if (!m_pc_valid)
Panic("Attempt to get an indeterminate PC");
const u32 apply_offset = m_pc_offset + offset;
if (apply_offset > 0)
EmitAdd(value.GetHostRegister(), value.GetHostRegister(), Value::FromConstantU32(apply_offset), false);
return value;
return Value::FromConstantU32(m_pc + offset);
}
Value CodeGenerator::GetCurrentInstructionPC(u32 offset /* = 0 */)
{
Value value = m_register_cache.AllocateScratch(RegSize_32);
EmitLoadCPUStructField(value.GetHostRegister(), RegSize_32, offsetof(State, current_instruction_pc));
const u32 apply_offset = m_current_instruction_pc_offset + offset;
if (apply_offset > 0)
EmitAdd(value.GetHostRegister(), value.GetHostRegister(), Value::FromConstantU32(apply_offset), false);
return value;
}
void CodeGenerator::UpdateCurrentInstructionPC(bool commit)
{
if (m_current_instruction_pc_offset > 0)
{
EmitAddCPUStructField(offsetof(State, current_instruction_pc),
Value::FromConstantU32(m_current_instruction_pc_offset));
if (commit)
m_current_instruction_pc_offset = 0;
}
return Value::FromConstantU32(m_current_instruction->pc);
}
void CodeGenerator::WriteNewPC(const Value& value, bool commit)
@ -1168,7 +1142,11 @@ void CodeGenerator::WriteNewPC(const Value& value, bool commit)
// TODO: This _could_ be moved into the register cache, but would it gain anything?
EmitStoreGuestRegister(Reg::pc, value);
if (commit)
m_next_pc_offset = 0;
{
m_pc_valid = value.IsConstant();
if (m_pc_valid)
m_pc = static_cast<u32>(value.constant_value);
}
}
bool CodeGenerator::Compile_Fallback(const CodeBlockInstruction& cbi)
@ -2209,9 +2187,13 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi)
m_register_cache.FlushGuestRegister(lr_reg, false, true);
// compute return address, which is also set as the new pc when the branch isn't taken
Value next_pc;
if (condition != Condition::Always || lr_reg != Reg::count)
next_pc = CalculatePC(4);
Value next_pc = CalculatePC(4);
DebugAssert(next_pc.IsConstant());
if (condition != Condition::Always)
{
next_pc = m_register_cache.AllocateScratch(RegSize_32);
EmitCopyValue(next_pc.GetHostRegister(), CalculatePC(4));
}
LabelType branch_not_taken;
if (condition != Condition::Always)

View file

@ -206,7 +206,6 @@ private:
Value CalculatePC(u32 offset = 0);
Value GetCurrentInstructionPC(u32 offset = 0);
void UpdateCurrentInstructionPC(bool commit);
void WriteNewPC(const Value& value, bool commit);
Value DoGTERegisterRead(u32 index);
@ -248,9 +247,9 @@ private:
TickCount m_delayed_cycles_add = 0;
TickCount m_gte_done_cycle = 0;
TickCount m_pc_offset = 0;
TickCount m_current_instruction_pc_offset = 0;
TickCount m_next_pc_offset = 0;
u32 m_pc = 0;
bool m_pc_valid = false;
// whether various flags need to be reset.
bool m_current_instruction_in_branch_delay_slot_dirty = false;

View file

@ -2250,7 +2250,8 @@ CodeCache::DispatcherFunction CodeGenerator::CompileDispatcher()
m_emit->str(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, current_instruction_pc)));
// blr(x9[pc * 2]) (fast_map[pc >> 2])
m_emit->ldr(a64::x8, a64::MemOperand(a64::x9, a64::x8, a64::LSL, 2));
m_emit->lsr(a64::w8, a64::w8, 2);
m_emit->ldr(a64::x8, a64::MemOperand(a64::x9, a64::x8, a64::LSL, 3));
m_emit->blr(a64::x8);
// end while

View file

@ -164,45 +164,49 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const
void CodeGenerator::EmitICacheCheckAndUpdate()
{
Value pc = CalculatePC();
Value temp = m_register_cache.AllocateScratch(RegSize_32);
m_register_cache.InhibitAllocation();
EmitShr(temp.GetHostRegister(), pc.GetHostRegister(), RegSize_32, Value::FromConstantU32(29));
LabelType is_cached;
LabelType ready_to_execute;
EmitConditionalBranch(Condition::LessEqual, false, temp.GetHostRegister(), Value::FromConstantU32(4), &is_cached);
EmitLoadCPUStructField(temp.host_reg, RegSize_32, offsetof(State, pending_ticks));
EmitAdd(temp.host_reg, temp.host_reg, Value::FromConstantU32(static_cast<u32>(m_block->uncached_fetch_ticks)), false);
EmitStoreCPUStructField(offsetof(State, pending_ticks), temp);
EmitBranch(&ready_to_execute);
EmitBindLabel(&is_cached);
// cached path
EmitAnd(pc.GetHostRegister(), pc.GetHostRegister(), Value::FromConstantU32(ICACHE_TAG_ADDRESS_MASK));
VirtualMemoryAddress current_address = (m_block->instructions[0].pc & ICACHE_TAG_ADDRESS_MASK);
for (u32 i = 0; i < m_block->icache_line_count; i++, current_address += ICACHE_LINE_SIZE)
if (GetSegmentForAddress(m_pc) >= Segment::KSEG1)
{
const TickCount fill_ticks = GetICacheFillTicks(current_address);
if (fill_ticks <= 0)
continue;
const u32 line = GetICacheLine(current_address);
const u32 offset = offsetof(State, icache_tags) + (line * sizeof(u32));
LabelType cache_hit;
EmitLoadCPUStructField(temp.GetHostRegister(), RegSize_32, offset);
EmitConditionalBranch(Condition::Equal, false, temp.GetHostRegister(), pc, &cache_hit);
EmitLoadCPUStructField(temp.host_reg, RegSize_32, offsetof(State, pending_ticks));
EmitStoreCPUStructField(offset, pc);
EmitAdd(temp.host_reg, temp.host_reg, Value::FromConstantU32(static_cast<u32>(fill_ticks)), false);
EmitLoadCPUStructField(temp.GetHostRegister(), RegSize_32, offsetof(State, pending_ticks));
EmitAdd(temp.GetHostRegister(), temp.GetHostRegister(),
Value::FromConstantU32(static_cast<u32>(m_block->uncached_fetch_ticks)), false);
EmitStoreCPUStructField(offsetof(State, pending_ticks), temp);
EmitBindLabel(&cache_hit);
EmitAdd(pc.GetHostRegister(), pc.GetHostRegister(), Value::FromConstantU32(ICACHE_LINE_SIZE), false);
}
else
{
// cached path
Value temp2 = m_register_cache.AllocateScratch(RegSize_32);
EmitBindLabel(&ready_to_execute);
m_register_cache.UninhibitAllocation();
m_register_cache.InhibitAllocation();
VirtualMemoryAddress current_pc = m_pc & ICACHE_TAG_ADDRESS_MASK;
for (u32 i = 0; i < m_block->icache_line_count; i++, current_pc += ICACHE_LINE_SIZE)
{
const VirtualMemoryAddress tag = GetICacheTagForAddress(current_pc);
const TickCount fill_ticks = GetICacheFillTicks(current_pc);
if (fill_ticks <= 0)
continue;
const u32 line = GetICacheLine(current_pc);
const u32 offset = offsetof(State, icache_tags) + (line * sizeof(u32));
LabelType cache_hit;
EmitLoadCPUStructField(temp.GetHostRegister(), RegSize_32, offset);
EmitCopyValue(temp2.GetHostRegister(), Value::FromConstantU32(current_pc));
EmitCmp(temp2.GetHostRegister(), temp);
EmitConditionalBranch(Condition::Equal, false, temp.GetHostRegister(), temp2, &cache_hit);
EmitLoadCPUStructField(temp.GetHostRegister(), RegSize_32, offsetof(State, pending_ticks));
EmitStoreCPUStructField(offset, temp2);
EmitAdd(temp.GetHostRegister(), temp.GetHostRegister(), Value::FromConstantU32(static_cast<u32>(fill_ticks)),
false);
EmitStoreCPUStructField(offsetof(State, pending_ticks), temp);
EmitBindLabel(&cache_hit);
}
m_register_cache.UninhibitAllocation();
}
}
#endif

View file

@ -2612,48 +2612,32 @@ void CodeGenerator::EmitCancelInterpreterLoadDelayForReg(Reg reg)
void CodeGenerator::EmitICacheCheckAndUpdate()
{
Value pc = CalculatePC();
Value seg = m_register_cache.AllocateScratch(RegSize_32);
m_register_cache.InhibitAllocation();
m_emit->mov(GetHostReg32(seg), GetHostReg32(pc));
m_emit->shr(GetHostReg32(seg), 29);
Xbyak::Label is_cached;
m_emit->cmp(GetHostReg32(seg), 4);
m_emit->jle(is_cached);
// uncached
Xbyak::Label done;
m_emit->add(m_emit->dword[GetCPUPtrReg() + offsetof(State, pending_ticks)],
static_cast<u32>(m_block->uncached_fetch_ticks));
m_emit->jmp(done, Xbyak::CodeGenerator::T_NEAR);
// cached
m_emit->L(is_cached);
m_emit->and_(GetHostReg32(pc), ICACHE_TAG_ADDRESS_MASK);
VirtualMemoryAddress current_address = (m_block->instructions[0].pc & ICACHE_TAG_ADDRESS_MASK);
for (u32 i = 0; i < m_block->icache_line_count; i++, current_address += ICACHE_LINE_SIZE)
if (GetSegmentForAddress(m_pc) >= Segment::KSEG1)
{
const TickCount fill_ticks = GetICacheFillTicks(current_address);
if (fill_ticks <= 0)
continue;
const u32 line = GetICacheLine(current_address);
const u32 offset = offsetof(State, icache_tags) + (line * sizeof(u32));
Xbyak::Label cache_hit;
m_emit->cmp(GetHostReg32(pc), m_emit->dword[GetCPUPtrReg() + offset]);
m_emit->je(cache_hit);
m_emit->mov(m_emit->dword[GetCPUPtrReg() + offset], GetHostReg32(pc));
m_emit->add(m_emit->dword[GetCPUPtrReg() + offsetof(State, pending_ticks)], static_cast<u32>(fill_ticks));
m_emit->L(cache_hit);
m_emit->add(GetHostReg32(pc), ICACHE_LINE_SIZE);
m_emit->add(m_emit->dword[GetCPUPtrReg() + offsetof(State, pending_ticks)],
static_cast<u32>(m_block->uncached_fetch_ticks));
}
else
{
VirtualMemoryAddress current_pc = m_pc & ICACHE_TAG_ADDRESS_MASK;
for (u32 i = 0; i < m_block->icache_line_count; i++, current_pc += ICACHE_LINE_SIZE)
{
const VirtualMemoryAddress tag = GetICacheTagForAddress(current_pc);
const TickCount fill_ticks = GetICacheFillTicks(current_pc);
if (fill_ticks <= 0)
continue;
m_emit->L(done);
m_register_cache.UninhibitAllocation();
const u32 line = GetICacheLine(current_pc);
const u32 offset = offsetof(State, icache_tags) + (line * sizeof(u32));
Xbyak::Label cache_hit;
m_emit->cmp(m_emit->dword[GetCPUPtrReg() + offset], tag);
m_emit->je(cache_hit);
m_emit->mov(m_emit->dword[GetCPUPtrReg() + offset], tag);
m_emit->add(m_emit->dword[GetCPUPtrReg() + offsetof(State, pending_ticks)], static_cast<u32>(fill_ticks));
m_emit->L(cache_hit);
}
}
}
void CodeGenerator::EmitStallUntilGTEComplete()