mirror of
https://github.com/RetroDECK/Duckstation.git
synced 2024-11-27 08:05:41 +00:00
CPU/Recompiler: Elide tick flush completely where possible
This commit is contained in:
parent
e8ac1fca80
commit
2113405c7a
|
@ -1276,8 +1276,6 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
EmitAddCPUStructField(offsetof(State, pending_ticks), Value::FromConstantU32(Bus::RAM_READ_TICKS));
|
|
||||||
|
|
||||||
bpi.host_code_size = static_cast<u32>(
|
bpi.host_code_size = static_cast<u32>(
|
||||||
static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc)));
|
static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc)));
|
||||||
|
|
||||||
|
@ -1286,8 +1284,17 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
||||||
// generate slowmem fallback
|
// generate slowmem fallback
|
||||||
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
|
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
|
||||||
SwitchToFarCode();
|
SwitchToFarCode();
|
||||||
|
|
||||||
|
// we add the ticks *after* the add here, since we counted incorrectly, then correct for it below
|
||||||
|
DebugAssert(m_delayed_cycles_add > 0);
|
||||||
|
EmitAddCPUStructField(offsetof(State, pending_ticks), Value::FromConstantU32(static_cast<u32>(m_delayed_cycles_add)));
|
||||||
|
m_delayed_cycles_add += Bus::RAM_READ_TICKS;
|
||||||
|
|
||||||
EmitLoadGuestMemorySlowmem(cbi, address, size, result, true);
|
EmitLoadGuestMemorySlowmem(cbi, address, size, result, true);
|
||||||
|
|
||||||
|
EmitAddCPUStructField(offsetof(State, pending_ticks),
|
||||||
|
Value::FromConstantU32(static_cast<u32>(-m_delayed_cycles_add)));
|
||||||
|
|
||||||
// restore fastmem base state for the next instruction
|
// restore fastmem base state for the next instruction
|
||||||
if (old_store_fastmem_base)
|
if (old_store_fastmem_base)
|
||||||
fastmem_base = GetFastmemStoreBase();
|
fastmem_base = GetFastmemStoreBase();
|
||||||
|
@ -1436,8 +1443,15 @@ void CodeGenerator::EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
||||||
// generate slowmem fallback
|
// generate slowmem fallback
|
||||||
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
|
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
|
||||||
SwitchToFarCode();
|
SwitchToFarCode();
|
||||||
|
|
||||||
|
DebugAssert(m_delayed_cycles_add > 0);
|
||||||
|
EmitAddCPUStructField(offsetof(State, pending_ticks), Value::FromConstantU32(static_cast<u32>(m_delayed_cycles_add)));
|
||||||
|
|
||||||
EmitStoreGuestMemorySlowmem(cbi, address, size, actual_value, true);
|
EmitStoreGuestMemorySlowmem(cbi, address, size, actual_value, true);
|
||||||
|
|
||||||
|
EmitAddCPUStructField(offsetof(State, pending_ticks),
|
||||||
|
Value::FromConstantU32(static_cast<u32>(-m_delayed_cycles_add)));
|
||||||
|
|
||||||
// restore fastmem base state for the next instruction
|
// restore fastmem base state for the next instruction
|
||||||
if (old_load_fastmem_base)
|
if (old_load_fastmem_base)
|
||||||
fastmem_base = GetFastmemLoadBase();
|
fastmem_base = GetFastmemLoadBase();
|
||||||
|
|
|
@ -1463,16 +1463,23 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
EmitAddCPUStructField(offsetof(State, pending_ticks), Value::FromConstantU32(Bus::RAM_READ_TICKS));
|
|
||||||
|
|
||||||
bpi.host_code_size = static_cast<u32>(
|
bpi.host_code_size = static_cast<u32>(
|
||||||
static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc)));
|
static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc)));
|
||||||
|
|
||||||
// generate slowmem fallback
|
// generate slowmem fallback
|
||||||
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
|
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
|
||||||
SwitchToFarCode();
|
SwitchToFarCode();
|
||||||
|
|
||||||
|
// we add the ticks *after* the add here, since we counted incorrectly, then correct for it below
|
||||||
|
DebugAssert(m_delayed_cycles_add > 0);
|
||||||
|
EmitAddCPUStructField(offsetof(State, pending_ticks), Value::FromConstantU32(static_cast<u32>(m_delayed_cycles_add)));
|
||||||
|
m_delayed_cycles_add += Bus::RAM_READ_TICKS;
|
||||||
|
|
||||||
EmitLoadGuestMemorySlowmem(cbi, address, size, result, true);
|
EmitLoadGuestMemorySlowmem(cbi, address, size, result, true);
|
||||||
|
|
||||||
|
EmitAddCPUStructField(offsetof(State, pending_ticks),
|
||||||
|
Value::FromConstantU32(static_cast<u32>(-m_delayed_cycles_add)));
|
||||||
|
|
||||||
// return to the block code
|
// return to the block code
|
||||||
EmitBranch(GetCurrentNearCodePointer(), false);
|
EmitBranch(GetCurrentNearCodePointer(), false);
|
||||||
|
|
||||||
|
@ -1638,8 +1645,14 @@ void CodeGenerator::EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
||||||
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
|
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
|
||||||
SwitchToFarCode();
|
SwitchToFarCode();
|
||||||
|
|
||||||
|
DebugAssert(m_delayed_cycles_add > 0);
|
||||||
|
EmitAddCPUStructField(offsetof(State, pending_ticks), Value::FromConstantU32(static_cast<u32>(m_delayed_cycles_add)));
|
||||||
|
|
||||||
EmitStoreGuestMemorySlowmem(cbi, address, size, value_in_hr, true);
|
EmitStoreGuestMemorySlowmem(cbi, address, size, value_in_hr, true);
|
||||||
|
|
||||||
|
EmitAddCPUStructField(offsetof(State, pending_ticks),
|
||||||
|
Value::FromConstantU32(static_cast<u32>(-m_delayed_cycles_add)));
|
||||||
|
|
||||||
// return to the block code
|
// return to the block code
|
||||||
EmitBranch(GetCurrentNearCodePointer(), false);
|
EmitBranch(GetCurrentNearCodePointer(), false);
|
||||||
|
|
||||||
|
|
|
@ -57,8 +57,6 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
AddPendingCycles(true);
|
|
||||||
|
|
||||||
Value result = m_register_cache.AllocateScratch(HostPointerSize);
|
Value result = m_register_cache.AllocateScratch(HostPointerSize);
|
||||||
|
|
||||||
const bool use_fastmem =
|
const bool use_fastmem =
|
||||||
|
@ -83,6 +81,7 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
AddPendingCycles(true);
|
||||||
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
|
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
|
||||||
EmitLoadGuestMemorySlowmem(cbi, address, size, result, false);
|
EmitLoadGuestMemorySlowmem(cbi, address, size, result, false);
|
||||||
}
|
}
|
||||||
|
@ -133,8 +132,6 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
AddPendingCycles(true);
|
|
||||||
|
|
||||||
const bool use_fastmem =
|
const bool use_fastmem =
|
||||||
(address_spec ? Bus::CanUseFastmemForAddress(*address_spec) : true) && !SpeculativeIsCacheIsolated();
|
(address_spec ? Bus::CanUseFastmemForAddress(*address_spec) : true) && !SpeculativeIsCacheIsolated();
|
||||||
if (address_spec)
|
if (address_spec)
|
||||||
|
@ -157,6 +154,7 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
AddPendingCycles(true);
|
||||||
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
|
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
|
||||||
EmitStoreGuestMemorySlowmem(cbi, address, size, value, false);
|
EmitStoreGuestMemorySlowmem(cbi, address, size, value, false);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1955,9 +1955,6 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: BIOS reads...
|
|
||||||
EmitAddCPUStructField(offsetof(CPU::State, pending_ticks), Value::FromConstantU32(Bus::RAM_READ_TICKS));
|
|
||||||
|
|
||||||
// insert nops, we need at least 5 bytes for a relative jump
|
// insert nops, we need at least 5 bytes for a relative jump
|
||||||
const u32 fastmem_size =
|
const u32 fastmem_size =
|
||||||
static_cast<u32>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc));
|
static_cast<u32>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc));
|
||||||
|
@ -1972,8 +1969,17 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
||||||
m_far_emitter.align(16);
|
m_far_emitter.align(16);
|
||||||
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
|
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
|
||||||
SwitchToFarCode();
|
SwitchToFarCode();
|
||||||
|
|
||||||
|
// we add the ticks *after* the add here, since we counted incorrectly, then correct for it below
|
||||||
|
DebugAssert(m_delayed_cycles_add > 0);
|
||||||
|
EmitAddCPUStructField(offsetof(State, pending_ticks), Value::FromConstantU32(static_cast<u32>(m_delayed_cycles_add)));
|
||||||
|
m_delayed_cycles_add += Bus::RAM_READ_TICKS;
|
||||||
|
|
||||||
EmitLoadGuestMemorySlowmem(cbi, address, size, result, true);
|
EmitLoadGuestMemorySlowmem(cbi, address, size, result, true);
|
||||||
|
|
||||||
|
EmitAddCPUStructField(offsetof(State, pending_ticks),
|
||||||
|
Value::FromConstantU32(static_cast<u32>(-m_delayed_cycles_add)));
|
||||||
|
|
||||||
// return to the block code
|
// return to the block code
|
||||||
m_emit->jmp(GetCurrentNearCodePointer());
|
m_emit->jmp(GetCurrentNearCodePointer());
|
||||||
|
|
||||||
|
@ -2234,8 +2240,14 @@ void CodeGenerator::EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
||||||
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
|
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
|
||||||
SwitchToFarCode();
|
SwitchToFarCode();
|
||||||
|
|
||||||
|
DebugAssert(m_delayed_cycles_add > 0);
|
||||||
|
EmitAddCPUStructField(offsetof(State, pending_ticks), Value::FromConstantU32(static_cast<u32>(m_delayed_cycles_add)));
|
||||||
|
|
||||||
EmitStoreGuestMemorySlowmem(cbi, address, size, value, true);
|
EmitStoreGuestMemorySlowmem(cbi, address, size, value, true);
|
||||||
|
|
||||||
|
EmitAddCPUStructField(offsetof(State, pending_ticks),
|
||||||
|
Value::FromConstantU32(static_cast<u32>(-m_delayed_cycles_add)));
|
||||||
|
|
||||||
// return to the block code
|
// return to the block code
|
||||||
m_emit->jmp(GetCurrentNearCodePointer());
|
m_emit->jmp(GetCurrentNearCodePointer());
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue