mirror of
https://github.com/RetroDECK/Duckstation.git
synced 2025-01-18 22:35:39 +00:00
CPU/Recompiler: Elide tick flush completely where possible
This commit is contained in:
parent
e8ac1fca80
commit
2113405c7a
|
@ -1276,8 +1276,6 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
|||
break;
|
||||
}
|
||||
|
||||
EmitAddCPUStructField(offsetof(State, pending_ticks), Value::FromConstantU32(Bus::RAM_READ_TICKS));
|
||||
|
||||
bpi.host_code_size = static_cast<u32>(
|
||||
static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc)));
|
||||
|
||||
|
@ -1286,8 +1284,17 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
|||
// generate slowmem fallback
|
||||
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
|
||||
SwitchToFarCode();
|
||||
|
||||
// we add the ticks *after* the add here, since we counted incorrectly, then correct for it below
|
||||
DebugAssert(m_delayed_cycles_add > 0);
|
||||
EmitAddCPUStructField(offsetof(State, pending_ticks), Value::FromConstantU32(static_cast<u32>(m_delayed_cycles_add)));
|
||||
m_delayed_cycles_add += Bus::RAM_READ_TICKS;
|
||||
|
||||
EmitLoadGuestMemorySlowmem(cbi, address, size, result, true);
|
||||
|
||||
EmitAddCPUStructField(offsetof(State, pending_ticks),
|
||||
Value::FromConstantU32(static_cast<u32>(-m_delayed_cycles_add)));
|
||||
|
||||
// restore fastmem base state for the next instruction
|
||||
if (old_store_fastmem_base)
|
||||
fastmem_base = GetFastmemStoreBase();
|
||||
|
@ -1436,8 +1443,15 @@ void CodeGenerator::EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
|||
// generate slowmem fallback
|
||||
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
|
||||
SwitchToFarCode();
|
||||
|
||||
DebugAssert(m_delayed_cycles_add > 0);
|
||||
EmitAddCPUStructField(offsetof(State, pending_ticks), Value::FromConstantU32(static_cast<u32>(m_delayed_cycles_add)));
|
||||
|
||||
EmitStoreGuestMemorySlowmem(cbi, address, size, actual_value, true);
|
||||
|
||||
EmitAddCPUStructField(offsetof(State, pending_ticks),
|
||||
Value::FromConstantU32(static_cast<u32>(-m_delayed_cycles_add)));
|
||||
|
||||
// restore fastmem base state for the next instruction
|
||||
if (old_load_fastmem_base)
|
||||
fastmem_base = GetFastmemLoadBase();
|
||||
|
|
|
@ -1463,16 +1463,23 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
|||
}
|
||||
}
|
||||
|
||||
EmitAddCPUStructField(offsetof(State, pending_ticks), Value::FromConstantU32(Bus::RAM_READ_TICKS));
|
||||
|
||||
bpi.host_code_size = static_cast<u32>(
|
||||
static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc)));
|
||||
|
||||
// generate slowmem fallback
|
||||
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
|
||||
SwitchToFarCode();
|
||||
|
||||
// we add the ticks *after* the add here, since we counted incorrectly, then correct for it below
|
||||
DebugAssert(m_delayed_cycles_add > 0);
|
||||
EmitAddCPUStructField(offsetof(State, pending_ticks), Value::FromConstantU32(static_cast<u32>(m_delayed_cycles_add)));
|
||||
m_delayed_cycles_add += Bus::RAM_READ_TICKS;
|
||||
|
||||
EmitLoadGuestMemorySlowmem(cbi, address, size, result, true);
|
||||
|
||||
EmitAddCPUStructField(offsetof(State, pending_ticks),
|
||||
Value::FromConstantU32(static_cast<u32>(-m_delayed_cycles_add)));
|
||||
|
||||
// return to the block code
|
||||
EmitBranch(GetCurrentNearCodePointer(), false);
|
||||
|
||||
|
@ -1638,8 +1645,14 @@ void CodeGenerator::EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
|||
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
|
||||
SwitchToFarCode();
|
||||
|
||||
DebugAssert(m_delayed_cycles_add > 0);
|
||||
EmitAddCPUStructField(offsetof(State, pending_ticks), Value::FromConstantU32(static_cast<u32>(m_delayed_cycles_add)));
|
||||
|
||||
EmitStoreGuestMemorySlowmem(cbi, address, size, value_in_hr, true);
|
||||
|
||||
EmitAddCPUStructField(offsetof(State, pending_ticks),
|
||||
Value::FromConstantU32(static_cast<u32>(-m_delayed_cycles_add)));
|
||||
|
||||
// return to the block code
|
||||
EmitBranch(GetCurrentNearCodePointer(), false);
|
||||
|
||||
|
|
|
@ -57,8 +57,6 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
|
|||
}
|
||||
}
|
||||
|
||||
AddPendingCycles(true);
|
||||
|
||||
Value result = m_register_cache.AllocateScratch(HostPointerSize);
|
||||
|
||||
const bool use_fastmem =
|
||||
|
@ -83,6 +81,7 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
|
|||
}
|
||||
else
|
||||
{
|
||||
AddPendingCycles(true);
|
||||
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
|
||||
EmitLoadGuestMemorySlowmem(cbi, address, size, result, false);
|
||||
}
|
||||
|
@ -133,8 +132,6 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const
|
|||
}
|
||||
}
|
||||
|
||||
AddPendingCycles(true);
|
||||
|
||||
const bool use_fastmem =
|
||||
(address_spec ? Bus::CanUseFastmemForAddress(*address_spec) : true) && !SpeculativeIsCacheIsolated();
|
||||
if (address_spec)
|
||||
|
@ -157,6 +154,7 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const
|
|||
}
|
||||
else
|
||||
{
|
||||
AddPendingCycles(true);
|
||||
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
|
||||
EmitStoreGuestMemorySlowmem(cbi, address, size, value, false);
|
||||
}
|
||||
|
|
|
@ -1955,9 +1955,6 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
|||
}
|
||||
}
|
||||
|
||||
// TODO: BIOS reads...
|
||||
EmitAddCPUStructField(offsetof(CPU::State, pending_ticks), Value::FromConstantU32(Bus::RAM_READ_TICKS));
|
||||
|
||||
// insert nops, we need at least 5 bytes for a relative jump
|
||||
const u32 fastmem_size =
|
||||
static_cast<u32>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc));
|
||||
|
@ -1972,8 +1969,17 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
|||
m_far_emitter.align(16);
|
||||
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
|
||||
SwitchToFarCode();
|
||||
|
||||
// we add the ticks *after* the add here, since we counted incorrectly, then correct for it below
|
||||
DebugAssert(m_delayed_cycles_add > 0);
|
||||
EmitAddCPUStructField(offsetof(State, pending_ticks), Value::FromConstantU32(static_cast<u32>(m_delayed_cycles_add)));
|
||||
m_delayed_cycles_add += Bus::RAM_READ_TICKS;
|
||||
|
||||
EmitLoadGuestMemorySlowmem(cbi, address, size, result, true);
|
||||
|
||||
EmitAddCPUStructField(offsetof(State, pending_ticks),
|
||||
Value::FromConstantU32(static_cast<u32>(-m_delayed_cycles_add)));
|
||||
|
||||
// return to the block code
|
||||
m_emit->jmp(GetCurrentNearCodePointer());
|
||||
|
||||
|
@ -2234,8 +2240,14 @@ void CodeGenerator::EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
|||
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
|
||||
SwitchToFarCode();
|
||||
|
||||
DebugAssert(m_delayed_cycles_add > 0);
|
||||
EmitAddCPUStructField(offsetof(State, pending_ticks), Value::FromConstantU32(static_cast<u32>(m_delayed_cycles_add)));
|
||||
|
||||
EmitStoreGuestMemorySlowmem(cbi, address, size, value, true);
|
||||
|
||||
EmitAddCPUStructField(offsetof(State, pending_ticks),
|
||||
Value::FromConstantU32(static_cast<u32>(-m_delayed_cycles_add)));
|
||||
|
||||
// return to the block code
|
||||
m_emit->jmp(GetCurrentNearCodePointer());
|
||||
|
||||
|
|
Loading…
Reference in a new issue