From 6081e2415fa0e4cc992a506026558c7caa3d2e67 Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Tue, 20 Jul 2021 13:31:23 +1000 Subject: [PATCH] CPU/Recompiler: Simplify PC addressing --- src/core/cpu_recompiler_code_generator.cpp | 60 ++++++---------- src/core/cpu_recompiler_code_generator.h | 7 +- .../cpu_recompiler_code_generator_aarch64.cpp | 3 +- .../cpu_recompiler_code_generator_generic.cpp | 70 ++++++++++--------- .../cpu_recompiler_code_generator_x64.cpp | 62 ++++++---------- 5 files changed, 86 insertions(+), 116 deletions(-) diff --git a/src/core/cpu_recompiler_code_generator.cpp b/src/core/cpu_recompiler_code_generator.cpp index c033270e8..99aedeb12 100644 --- a/src/core/cpu_recompiler_code_generator.cpp +++ b/src/core/cpu_recompiler_code_generator.cpp @@ -21,6 +21,8 @@ bool CodeGenerator::CompileBlock(CodeBlock* block, CodeBlock::HostCodePointer* o m_block = block; m_block_start = block->instructions.data(); m_block_end = block->instructions.data() + block->instructions.size(); + m_pc = block->GetPC(); + m_pc_valid = true; EmitBeginBlock(); BlockPrologue(); @@ -955,7 +957,7 @@ void CodeGenerator::BlockPrologue() EmitStoreCPUStructField(offsetof(State, exception_raised), Value::FromConstantU8(0)); - if (m_block->uncached_fetch_ticks > 0) + if (m_block->uncached_fetch_ticks > 0 || m_block->icache_line_count > 0) EmitICacheCheckAndUpdate(); // we don't know the state of the last block, so assume load delays might be in progress @@ -965,10 +967,6 @@ void CodeGenerator::BlockPrologue() m_current_instruction_was_branch_taken_dirty = false; m_load_delay_dirty = true; m_gte_busy_cycles_dirty = true; - - m_pc_offset = 0; - m_current_instruction_pc_offset = 0; - m_next_pc_offset = 4; } void CodeGenerator::BlockEpilogue() @@ -992,9 +990,8 @@ void CodeGenerator::InstructionPrologue(const CodeBlockInstruction& cbi, TickCou #endif // move instruction offsets forward - m_current_instruction_pc_offset = m_pc_offset; - m_pc_offset = m_next_pc_offset; - m_next_pc_offset += 4; + if (m_pc_valid) + m_pc += 4; // reset dirty flags if (m_branch_was_taken_dirty) @@ -1129,38 +1126,15 @@ void CodeGenerator::StallUntilGTEComplete() Value CodeGenerator::CalculatePC(u32 offset /* = 0 */) { - Value value = m_register_cache.AllocateScratch(RegSize_32); - EmitLoadGuestRegister(value.GetHostRegister(), Reg::pc); + if (!m_pc_valid) + Panic("Attempt to get an indeterminate PC"); - const u32 apply_offset = m_pc_offset + offset; - if (apply_offset > 0) - EmitAdd(value.GetHostRegister(), value.GetHostRegister(), Value::FromConstantU32(apply_offset), false); - - return value; + return Value::FromConstantU32(m_pc + offset); } Value CodeGenerator::GetCurrentInstructionPC(u32 offset /* = 0 */) { - Value value = m_register_cache.AllocateScratch(RegSize_32); - EmitLoadCPUStructField(value.GetHostRegister(), RegSize_32, offsetof(State, current_instruction_pc)); - - const u32 apply_offset = m_current_instruction_pc_offset + offset; - if (apply_offset > 0) - EmitAdd(value.GetHostRegister(), value.GetHostRegister(), Value::FromConstantU32(apply_offset), false); - - return value; -} - -void CodeGenerator::UpdateCurrentInstructionPC(bool commit) -{ - if (m_current_instruction_pc_offset > 0) - { - EmitAddCPUStructField(offsetof(State, current_instruction_pc), - Value::FromConstantU32(m_current_instruction_pc_offset)); - - if (commit) - m_current_instruction_pc_offset = 0; - } + return Value::FromConstantU32(m_current_instruction->pc); } void CodeGenerator::WriteNewPC(const Value& value, bool commit) @@ -1168,7 +1142,11 @@ void CodeGenerator::WriteNewPC(const Value& value, bool commit) // TODO: This _could_ be moved into the register cache, but would it gain anything? EmitStoreGuestRegister(Reg::pc, value); if (commit) - m_next_pc_offset = 0; + { + m_pc_valid = value.IsConstant(); + if (m_pc_valid) + m_pc = static_cast(value.constant_value); + } } bool CodeGenerator::Compile_Fallback(const CodeBlockInstruction& cbi) @@ -2209,9 +2187,13 @@ bool CodeGenerator::Compile_Branch(const CodeBlockInstruction& cbi) m_register_cache.FlushGuestRegister(lr_reg, false, true); // compute return address, which is also set as the new pc when the branch isn't taken - Value next_pc; - if (condition != Condition::Always || lr_reg != Reg::count) - next_pc = CalculatePC(4); + Value next_pc = CalculatePC(4); + DebugAssert(next_pc.IsConstant()); + if (condition != Condition::Always) + { + next_pc = m_register_cache.AllocateScratch(RegSize_32); + EmitCopyValue(next_pc.GetHostRegister(), CalculatePC(4)); + } LabelType branch_not_taken; if (condition != Condition::Always) diff --git a/src/core/cpu_recompiler_code_generator.h b/src/core/cpu_recompiler_code_generator.h index 476efbf34..37d0d3e44 100644 --- a/src/core/cpu_recompiler_code_generator.h +++ b/src/core/cpu_recompiler_code_generator.h @@ -206,7 +206,6 @@ private: Value CalculatePC(u32 offset = 0); Value GetCurrentInstructionPC(u32 offset = 0); - void UpdateCurrentInstructionPC(bool commit); void WriteNewPC(const Value& value, bool commit); Value DoGTERegisterRead(u32 index); @@ -248,9 +247,9 @@ private: TickCount m_delayed_cycles_add = 0; TickCount m_gte_done_cycle = 0; - TickCount m_pc_offset = 0; - TickCount m_current_instruction_pc_offset = 0; - TickCount m_next_pc_offset = 0; + + u32 m_pc = 0; + bool m_pc_valid = false; // whether various flags need to be reset. bool m_current_instruction_in_branch_delay_slot_dirty = false; diff --git a/src/core/cpu_recompiler_code_generator_aarch64.cpp b/src/core/cpu_recompiler_code_generator_aarch64.cpp index 96ff6f9d6..c8e27dadf 100644 --- a/src/core/cpu_recompiler_code_generator_aarch64.cpp +++ b/src/core/cpu_recompiler_code_generator_aarch64.cpp @@ -2250,7 +2250,8 @@ CodeCache::DispatcherFunction CodeGenerator::CompileDispatcher() m_emit->str(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, current_instruction_pc))); // blr(x9[pc * 2]) (fast_map[pc >> 2]) - m_emit->ldr(a64::x8, a64::MemOperand(a64::x9, a64::x8, a64::LSL, 2)); + m_emit->lsr(a64::w8, a64::w8, 2); + m_emit->ldr(a64::x8, a64::MemOperand(a64::x9, a64::x8, a64::LSL, 3)); m_emit->blr(a64::x8); // end while diff --git a/src/core/cpu_recompiler_code_generator_generic.cpp b/src/core/cpu_recompiler_code_generator_generic.cpp index 945fb8204..1bdf3215f 100644 --- a/src/core/cpu_recompiler_code_generator_generic.cpp +++ b/src/core/cpu_recompiler_code_generator_generic.cpp @@ -164,45 +164,49 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const void CodeGenerator::EmitICacheCheckAndUpdate() { - Value pc = CalculatePC(); Value temp = m_register_cache.AllocateScratch(RegSize_32); - m_register_cache.InhibitAllocation(); - EmitShr(temp.GetHostRegister(), pc.GetHostRegister(), RegSize_32, Value::FromConstantU32(29)); - LabelType is_cached; - LabelType ready_to_execute; - EmitConditionalBranch(Condition::LessEqual, false, temp.GetHostRegister(), Value::FromConstantU32(4), &is_cached); - EmitLoadCPUStructField(temp.host_reg, RegSize_32, offsetof(State, pending_ticks)); - EmitAdd(temp.host_reg, temp.host_reg, Value::FromConstantU32(static_cast(m_block->uncached_fetch_ticks)), false); - EmitStoreCPUStructField(offsetof(State, pending_ticks), temp); - EmitBranch(&ready_to_execute); - EmitBindLabel(&is_cached); - - // cached path - EmitAnd(pc.GetHostRegister(), pc.GetHostRegister(), Value::FromConstantU32(ICACHE_TAG_ADDRESS_MASK)); - VirtualMemoryAddress current_address = (m_block->instructions[0].pc & ICACHE_TAG_ADDRESS_MASK); - for (u32 i = 0; i < m_block->icache_line_count; i++, current_address += ICACHE_LINE_SIZE) + if (GetSegmentForAddress(m_pc) >= Segment::KSEG1) { - const TickCount fill_ticks = GetICacheFillTicks(current_address); - if (fill_ticks <= 0) - continue; - - const u32 line = GetICacheLine(current_address); - const u32 offset = offsetof(State, icache_tags) + (line * sizeof(u32)); - LabelType cache_hit; - - EmitLoadCPUStructField(temp.GetHostRegister(), RegSize_32, offset); - EmitConditionalBranch(Condition::Equal, false, temp.GetHostRegister(), pc, &cache_hit); - EmitLoadCPUStructField(temp.host_reg, RegSize_32, offsetof(State, pending_ticks)); - EmitStoreCPUStructField(offset, pc); - EmitAdd(temp.host_reg, temp.host_reg, Value::FromConstantU32(static_cast(fill_ticks)), false); + EmitLoadCPUStructField(temp.GetHostRegister(), RegSize_32, offsetof(State, pending_ticks)); + EmitAdd(temp.GetHostRegister(), temp.GetHostRegister(), + Value::FromConstantU32(static_cast(m_block->uncached_fetch_ticks)), false); EmitStoreCPUStructField(offsetof(State, pending_ticks), temp); - EmitBindLabel(&cache_hit); - EmitAdd(pc.GetHostRegister(), pc.GetHostRegister(), Value::FromConstantU32(ICACHE_LINE_SIZE), false); } + else + { + // cached path + Value temp2 = m_register_cache.AllocateScratch(RegSize_32); - EmitBindLabel(&ready_to_execute); - m_register_cache.UninhibitAllocation(); + m_register_cache.InhibitAllocation(); + + VirtualMemoryAddress current_pc = m_pc & ICACHE_TAG_ADDRESS_MASK; + for (u32 i = 0; i < m_block->icache_line_count; i++, current_pc += ICACHE_LINE_SIZE) + { + const VirtualMemoryAddress tag = GetICacheTagForAddress(current_pc); + const TickCount fill_ticks = GetICacheFillTicks(current_pc); + if (fill_ticks <= 0) + continue; + + const u32 line = GetICacheLine(current_pc); + const u32 offset = offsetof(State, icache_tags) + (line * sizeof(u32)); + LabelType cache_hit; + + EmitLoadCPUStructField(temp.GetHostRegister(), RegSize_32, offset); + EmitCopyValue(temp2.GetHostRegister(), Value::FromConstantU32(current_pc)); + EmitCmp(temp2.GetHostRegister(), temp); + EmitConditionalBranch(Condition::Equal, false, temp.GetHostRegister(), temp2, &cache_hit); + + EmitLoadCPUStructField(temp.GetHostRegister(), RegSize_32, offsetof(State, pending_ticks)); + EmitStoreCPUStructField(offset, temp2); + EmitAdd(temp.GetHostRegister(), temp.GetHostRegister(), Value::FromConstantU32(static_cast(fill_ticks)), + false); + EmitStoreCPUStructField(offsetof(State, pending_ticks), temp); + EmitBindLabel(&cache_hit); + } + + m_register_cache.UninhibitAllocation(); + } } #endif diff --git a/src/core/cpu_recompiler_code_generator_x64.cpp b/src/core/cpu_recompiler_code_generator_x64.cpp index a65aa370e..99d374118 100644 --- a/src/core/cpu_recompiler_code_generator_x64.cpp +++ b/src/core/cpu_recompiler_code_generator_x64.cpp @@ -2612,48 +2612,32 @@ void CodeGenerator::EmitCancelInterpreterLoadDelayForReg(Reg reg) void CodeGenerator::EmitICacheCheckAndUpdate() { - Value pc = CalculatePC(); - Value seg = m_register_cache.AllocateScratch(RegSize_32); - m_register_cache.InhibitAllocation(); - - m_emit->mov(GetHostReg32(seg), GetHostReg32(pc)); - m_emit->shr(GetHostReg32(seg), 29); - - Xbyak::Label is_cached; - m_emit->cmp(GetHostReg32(seg), 4); - m_emit->jle(is_cached); - - // uncached - Xbyak::Label done; - m_emit->add(m_emit->dword[GetCPUPtrReg() + offsetof(State, pending_ticks)], - static_cast(m_block->uncached_fetch_ticks)); - m_emit->jmp(done, Xbyak::CodeGenerator::T_NEAR); - - // cached - m_emit->L(is_cached); - m_emit->and_(GetHostReg32(pc), ICACHE_TAG_ADDRESS_MASK); - - VirtualMemoryAddress current_address = (m_block->instructions[0].pc & ICACHE_TAG_ADDRESS_MASK); - for (u32 i = 0; i < m_block->icache_line_count; i++, current_address += ICACHE_LINE_SIZE) + if (GetSegmentForAddress(m_pc) >= Segment::KSEG1) { - const TickCount fill_ticks = GetICacheFillTicks(current_address); - if (fill_ticks <= 0) - continue; - - const u32 line = GetICacheLine(current_address); - const u32 offset = offsetof(State, icache_tags) + (line * sizeof(u32)); - Xbyak::Label cache_hit; - - m_emit->cmp(GetHostReg32(pc), m_emit->dword[GetCPUPtrReg() + offset]); - m_emit->je(cache_hit); - m_emit->mov(m_emit->dword[GetCPUPtrReg() + offset], GetHostReg32(pc)); - m_emit->add(m_emit->dword[GetCPUPtrReg() + offsetof(State, pending_ticks)], static_cast(fill_ticks)); - m_emit->L(cache_hit); - m_emit->add(GetHostReg32(pc), ICACHE_LINE_SIZE); + m_emit->add(m_emit->dword[GetCPUPtrReg() + offsetof(State, pending_ticks)], + static_cast(m_block->uncached_fetch_ticks)); } + else + { + VirtualMemoryAddress current_pc = m_pc & ICACHE_TAG_ADDRESS_MASK; + for (u32 i = 0; i < m_block->icache_line_count; i++, current_pc += ICACHE_LINE_SIZE) + { + const VirtualMemoryAddress tag = GetICacheTagForAddress(current_pc); + const TickCount fill_ticks = GetICacheFillTicks(current_pc); + if (fill_ticks <= 0) + continue; - m_emit->L(done); - m_register_cache.UninhibitAllocation(); + const u32 line = GetICacheLine(current_pc); + const u32 offset = offsetof(State, icache_tags) + (line * sizeof(u32)); + Xbyak::Label cache_hit; + + m_emit->cmp(m_emit->dword[GetCPUPtrReg() + offset], tag); + m_emit->je(cache_hit); + m_emit->mov(m_emit->dword[GetCPUPtrReg() + offset], tag); + m_emit->add(m_emit->dword[GetCPUPtrReg() + offsetof(State, pending_ticks)], static_cast(fill_ticks)); + m_emit->L(cache_hit); + } + } } void CodeGenerator::EmitStallUntilGTEComplete()