mirror of
https://github.com/RetroDECK/Duckstation.git
synced 2025-01-20 15:25:38 +00:00
DMA: Better enforce CPU runtime during linked list
and get rid of the hack for Newman Haas.
This commit is contained in:
parent
f63f5d829d
commit
417bf0c3bc
|
@ -90470,7 +90470,7 @@ SLPS-02376:
|
|||
- DigitalController
|
||||
settings:
|
||||
dmaMaxSliceTicks: 100
|
||||
dmaHaltTicks: 150
|
||||
dmaHaltTicks: 200
|
||||
codes:
|
||||
- SLPS-02376
|
||||
- SLPS-02356
|
||||
|
|
|
@ -47,7 +47,7 @@ static constexpr PhysicalMemoryAddress LINKED_LIST_TERMINATOR = UINT32_C(0x00FFF
|
|||
|
||||
static constexpr TickCount LINKED_LIST_HEADER_READ_TICKS = 10;
|
||||
static constexpr TickCount LINKED_LIST_BLOCK_SETUP_TICKS = 5;
|
||||
static constexpr TickCount HALT_TICKS_WHEN_TRANSMITTING_PAD = 100;
|
||||
static constexpr TickCount SLICE_SIZE_WHEN_TRANSMITTING_PAD = 10;
|
||||
|
||||
struct ChannelState
|
||||
{
|
||||
|
@ -194,7 +194,7 @@ static TickCount TransferDeviceToMemory(u32 address, u32 increment, u32 word_cou
|
|||
template<Channel channel>
|
||||
static TickCount TransferMemoryToDevice(u32 address, u32 increment, u32 word_count);
|
||||
|
||||
|
||||
static TickCount GetMaxSliceTicks();
|
||||
|
||||
// configuration
|
||||
static TickCount s_max_slice_ticks = 1000;
|
||||
|
@ -543,6 +543,17 @@ ALWAYS_INLINE_RELEASE void DMA::CompleteTransfer(Channel channel, ChannelState&
|
|||
}
|
||||
}
|
||||
|
||||
TickCount DMA::GetMaxSliceTicks()
|
||||
{
|
||||
const TickCount max = Pad::IsTransmitting() ? SLICE_SIZE_WHEN_TRANSMITTING_PAD : s_max_slice_ticks;
|
||||
if (!TimingEvents::IsRunningEvents())
|
||||
return max;
|
||||
|
||||
const u32 current_ticks = TimingEvents::GetGlobalTickCounter();
|
||||
const u32 max_ticks = TimingEvents::GetEventRunTickCounter() + static_cast<u32>(max);
|
||||
return std::clamp(static_cast<TickCount>(max_ticks - current_ticks), 0, max);
|
||||
}
|
||||
|
||||
template<DMA::Channel channel>
|
||||
bool DMA::TransferChannel()
|
||||
{
|
||||
|
@ -586,35 +597,13 @@ bool DMA::TransferChannel()
|
|||
return true;
|
||||
}
|
||||
|
||||
if constexpr (channel == Channel::GPU)
|
||||
{
|
||||
// Plenty of games seem to suffer from this issue where they have a linked list DMA going while polling the
|
||||
// controller. Having a large slice size causes the serial transfer to complete before the silly busy wait
|
||||
// in the BIOS poll routine returns, resulting in it thinking that the controller is disconnected. Some games
|
||||
// are very sensitive to this (e.g. Newman Haas Racing), to the point that even using a slice size of 1 is
|
||||
// insufficient for avoiding the race, probably due to the linked list layout.
|
||||
//
|
||||
// Therefore, without major refactoring to ensure the CPU runs every DMA block, and the associated performance
|
||||
// penalty, we just halt the DMA until the serial transfers have completed. To reduce the chances of this
|
||||
// significantly affecting timing, we add accumulate the ticks that have been "lost", and allow them to be
|
||||
// "used up" when the transfer does happen.
|
||||
//
|
||||
if (Pad::IsTransmitting())
|
||||
{
|
||||
Log_DebugFmt("DMA transfer while transmitting pad - {} ticks are buffered", -s_halt_ticks_remaining);
|
||||
if (!s_unhalt_event->IsActive())
|
||||
s_unhalt_event->SetIntervalAndSchedule(HALT_TICKS_WHEN_TRANSMITTING_PAD);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
Log_DebugFmt("DMA[{}]: Copying linked list starting at 0x{:08X} to device", channel, current_address);
|
||||
|
||||
// Prove to the compiler that nothing's going to modify these.
|
||||
const u8* const ram_ptr = Bus::g_ram;
|
||||
const u32 mask = Bus::g_ram_mask;
|
||||
|
||||
const TickCount slice_ticks = s_max_slice_ticks + -s_halt_ticks_remaining;
|
||||
const TickCount slice_ticks = GetMaxSliceTicks();
|
||||
TickCount remaining_ticks = slice_ticks;
|
||||
while (cs.request && remaining_ticks > 0)
|
||||
{
|
||||
|
@ -658,9 +647,6 @@ bool DMA::TransferChannel()
|
|||
cs.base_address = current_address;
|
||||
if (cs.request)
|
||||
{
|
||||
// don't actually delay the transfer for the buffered ticks, this variable is dual-purposed.
|
||||
s_halt_ticks_remaining = std::max(s_halt_ticks_remaining, 0);
|
||||
|
||||
// stall the transfer for a bit if we ran for too long
|
||||
HaltTransfer(s_halt_ticks);
|
||||
return false;
|
||||
|
@ -681,7 +667,7 @@ bool DMA::TransferChannel()
|
|||
|
||||
const u32 block_size = cs.block_control.request.GetBlockSize();
|
||||
u32 blocks_remaining = cs.block_control.request.GetBlockCount();
|
||||
TickCount ticks_remaining = s_max_slice_ticks;
|
||||
TickCount ticks_remaining = GetMaxSliceTicks();
|
||||
|
||||
if (copy_to_device)
|
||||
{
|
||||
|
|
|
@ -467,7 +467,6 @@ void GPU::WriteRegister(u32 offset, u32 value)
|
|||
case 0x00:
|
||||
m_fifo.Push(value);
|
||||
ExecuteCommands();
|
||||
UpdateCommandTickEvent();
|
||||
return;
|
||||
|
||||
case 0x04:
|
||||
|
@ -495,16 +494,7 @@ void GPU::DMARead(u32* words, u32 word_count)
|
|||
|
||||
void GPU::EndDMAWrite()
|
||||
{
|
||||
m_fifo_pushed = true;
|
||||
if (!m_syncing)
|
||||
{
|
||||
ExecuteCommands();
|
||||
UpdateCommandTickEvent();
|
||||
}
|
||||
else
|
||||
{
|
||||
UpdateDMARequest();
|
||||
}
|
||||
ExecuteCommands();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1029,26 +1019,24 @@ void GPU::CRTCTickEvent(TickCount ticks)
|
|||
void GPU::CommandTickEvent(TickCount ticks)
|
||||
{
|
||||
m_pending_command_ticks -= SystemTicksToGPUTicks(ticks);
|
||||
m_command_tick_event->Deactivate();
|
||||
|
||||
// we can be syncing if this came from a DMA write. recursively executing commands would be bad.
|
||||
if (!m_syncing)
|
||||
ExecuteCommands();
|
||||
|
||||
UpdateGPUIdle();
|
||||
|
||||
if (m_pending_command_ticks <= 0)
|
||||
m_pending_command_ticks = 0;
|
||||
else
|
||||
m_command_tick_event->SetIntervalAndSchedule(GPUTicksToSystemTicks(m_pending_command_ticks));
|
||||
m_executing_commands = true;
|
||||
ExecuteCommands();
|
||||
UpdateCommandTickEvent();
|
||||
m_executing_commands = false;
|
||||
}
|
||||
|
||||
void GPU::UpdateCommandTickEvent()
|
||||
{
|
||||
if (m_pending_command_ticks <= 0)
|
||||
{
|
||||
m_pending_command_ticks = 0;
|
||||
m_command_tick_event->Deactivate();
|
||||
else if (!m_command_tick_event->IsActive())
|
||||
}
|
||||
else
|
||||
{
|
||||
m_command_tick_event->SetIntervalAndSchedule(GPUTicksToSystemTicks(m_pending_command_ticks));
|
||||
}
|
||||
}
|
||||
|
||||
void GPU::ConvertScreenCoordinatesToDisplayCoordinates(float window_x, float window_y, float* display_x,
|
||||
|
@ -1121,7 +1109,6 @@ u32 GPU::ReadGPUREAD()
|
|||
|
||||
// end of transfer, catch up on any commands which were written (unlikely)
|
||||
ExecuteCommands();
|
||||
UpdateCommandTickEvent();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -307,6 +307,7 @@ protected:
|
|||
void WriteGP1(u32 value);
|
||||
void EndCommand();
|
||||
void ExecuteCommands();
|
||||
void TryExecuteCommands();
|
||||
void HandleGetGPUInfoCommand(u32 value);
|
||||
|
||||
// Rendering in the backend
|
||||
|
@ -542,8 +543,7 @@ protected:
|
|||
u32 m_GPUREAD_latch = 0;
|
||||
|
||||
/// True if currently executing/syncing.
|
||||
bool m_syncing = false;
|
||||
bool m_fifo_pushed = false;
|
||||
bool m_executing_commands = false;
|
||||
|
||||
struct VRAMTransfer
|
||||
{
|
||||
|
|
|
@ -25,94 +25,93 @@ static constexpr u32 ReplaceZero(u32 value, u32 value_for_zero)
|
|||
return value == 0 ? value_for_zero : value;
|
||||
}
|
||||
|
||||
void GPU::ExecuteCommands()
|
||||
void GPU::TryExecuteCommands()
|
||||
{
|
||||
m_syncing = true;
|
||||
|
||||
for (;;)
|
||||
while (m_pending_command_ticks <= m_max_run_ahead && !m_fifo.IsEmpty())
|
||||
{
|
||||
if (m_pending_command_ticks <= m_max_run_ahead && !m_fifo.IsEmpty())
|
||||
switch (m_blitter_state)
|
||||
{
|
||||
switch (m_blitter_state)
|
||||
case BlitterState::Idle:
|
||||
{
|
||||
case BlitterState::Idle:
|
||||
const u32 command = FifoPeek(0) >> 24;
|
||||
if ((this->*s_GP0_command_handler_table[command])())
|
||||
continue;
|
||||
else
|
||||
return;
|
||||
}
|
||||
|
||||
case BlitterState::WritingVRAM:
|
||||
{
|
||||
DebugAssert(m_blit_remaining_words > 0);
|
||||
const u32 words_to_copy = std::min(m_blit_remaining_words, m_fifo.GetSize());
|
||||
m_blit_buffer.reserve(m_blit_buffer.size() + words_to_copy);
|
||||
for (u32 i = 0; i < words_to_copy; i++)
|
||||
m_blit_buffer.push_back(FifoPop());
|
||||
m_blit_remaining_words -= words_to_copy;
|
||||
|
||||
Log_DebugPrintf("VRAM write burst of %u words, %u words remaining", words_to_copy, m_blit_remaining_words);
|
||||
if (m_blit_remaining_words == 0)
|
||||
FinishVRAMWrite();
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
case BlitterState::ReadingVRAM:
|
||||
{
|
||||
return;
|
||||
}
|
||||
break;
|
||||
|
||||
case BlitterState::DrawingPolyLine:
|
||||
{
|
||||
const u32 words_per_vertex = m_render_command.shading_enable ? 2 : 1;
|
||||
u32 terminator_index =
|
||||
m_render_command.shading_enable ? ((static_cast<u32>(m_blit_buffer.size()) & 1u) ^ 1u) : 0u;
|
||||
for (; terminator_index < m_fifo.GetSize(); terminator_index += words_per_vertex)
|
||||
{
|
||||
const u32 command = FifoPeek(0) >> 24;
|
||||
if ((this->*s_GP0_command_handler_table[command])())
|
||||
continue;
|
||||
else
|
||||
goto batch_done;
|
||||
// polyline must have at least two vertices, and the terminator is (word & 0xf000f000) == 0x50005000.
|
||||
// terminator is on the first word for the vertex
|
||||
if ((FifoPeek(terminator_index) & UINT32_C(0xF000F000)) == UINT32_C(0x50005000))
|
||||
break;
|
||||
}
|
||||
|
||||
case BlitterState::WritingVRAM:
|
||||
const bool found_terminator = (terminator_index < m_fifo.GetSize());
|
||||
const u32 words_to_copy = std::min(terminator_index, m_fifo.GetSize());
|
||||
if (words_to_copy > 0)
|
||||
{
|
||||
DebugAssert(m_blit_remaining_words > 0);
|
||||
const u32 words_to_copy = std::min(m_blit_remaining_words, m_fifo.GetSize());
|
||||
m_blit_buffer.reserve(m_blit_buffer.size() + words_to_copy);
|
||||
for (u32 i = 0; i < words_to_copy; i++)
|
||||
m_blit_buffer.push_back(FifoPop());
|
||||
m_blit_remaining_words -= words_to_copy;
|
||||
|
||||
Log_DebugPrintf("VRAM write burst of %u words, %u words remaining", words_to_copy, m_blit_remaining_words);
|
||||
if (m_blit_remaining_words == 0)
|
||||
FinishVRAMWrite();
|
||||
}
|
||||
|
||||
Log_DebugPrintf("Added %u words to polyline", words_to_copy);
|
||||
if (found_terminator)
|
||||
{
|
||||
// drop terminator
|
||||
m_fifo.RemoveOne();
|
||||
Log_DebugPrintf("Drawing poly-line with %u vertices", GetPolyLineVertexCount());
|
||||
DispatchRenderCommand();
|
||||
m_blit_buffer.clear();
|
||||
EndCommand();
|
||||
continue;
|
||||
}
|
||||
|
||||
case BlitterState::ReadingVRAM:
|
||||
{
|
||||
goto batch_done;
|
||||
}
|
||||
break;
|
||||
|
||||
case BlitterState::DrawingPolyLine:
|
||||
{
|
||||
const u32 words_per_vertex = m_render_command.shading_enable ? 2 : 1;
|
||||
u32 terminator_index =
|
||||
m_render_command.shading_enable ? ((static_cast<u32>(m_blit_buffer.size()) & 1u) ^ 1u) : 0u;
|
||||
for (; terminator_index < m_fifo.GetSize(); terminator_index += words_per_vertex)
|
||||
{
|
||||
// polyline must have at least two vertices, and the terminator is (word & 0xf000f000) == 0x50005000.
|
||||
// terminator is on the first word for the vertex
|
||||
if ((FifoPeek(terminator_index) & UINT32_C(0xF000F000)) == UINT32_C(0x50005000))
|
||||
break;
|
||||
}
|
||||
|
||||
const bool found_terminator = (terminator_index < m_fifo.GetSize());
|
||||
const u32 words_to_copy = std::min(terminator_index, m_fifo.GetSize());
|
||||
if (words_to_copy > 0)
|
||||
{
|
||||
m_blit_buffer.reserve(m_blit_buffer.size() + words_to_copy);
|
||||
for (u32 i = 0; i < words_to_copy; i++)
|
||||
m_blit_buffer.push_back(FifoPop());
|
||||
}
|
||||
|
||||
Log_DebugPrintf("Added %u words to polyline", words_to_copy);
|
||||
if (found_terminator)
|
||||
{
|
||||
// drop terminator
|
||||
m_fifo.RemoveOne();
|
||||
Log_DebugPrintf("Drawing poly-line with %u vertices", GetPolyLineVertexCount());
|
||||
DispatchRenderCommand();
|
||||
m_blit_buffer.clear();
|
||||
EndCommand();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
batch_done:
|
||||
m_fifo_pushed = false;
|
||||
UpdateDMARequest();
|
||||
if (!m_fifo_pushed)
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GPU::ExecuteCommands()
|
||||
{
|
||||
const bool was_executing_from_event = std::exchange(m_executing_commands, true);
|
||||
|
||||
TryExecuteCommands();
|
||||
UpdateDMARequest();
|
||||
UpdateGPUIdle();
|
||||
m_syncing = false;
|
||||
|
||||
m_executing_commands = was_executing_from_event;
|
||||
if (!was_executing_from_event)
|
||||
UpdateCommandTickEvent();
|
||||
}
|
||||
|
||||
void GPU::EndCommand()
|
||||
|
|
|
@ -17,6 +17,7 @@ static TimingEvent* s_active_events_tail;
|
|||
static TimingEvent* s_current_event = nullptr;
|
||||
static u32 s_active_event_count = 0;
|
||||
static u32 s_global_tick_counter = 0;
|
||||
static u32 s_event_run_tick_counter = 0;
|
||||
static bool s_frame_done = false;
|
||||
|
||||
u32 GetGlobalTickCounter()
|
||||
|
@ -24,6 +25,11 @@ u32 GetGlobalTickCounter()
|
|||
return s_global_tick_counter;
|
||||
}
|
||||
|
||||
u32 GetEventRunTickCounter()
|
||||
{
|
||||
return s_event_run_tick_counter;
|
||||
}
|
||||
|
||||
void Initialize()
|
||||
{
|
||||
Reset();
|
||||
|
@ -293,6 +299,7 @@ void RunEvents()
|
|||
if (pending_ticks >= s_active_events_head->GetDowncount())
|
||||
{
|
||||
CPU::ResetPendingTicks();
|
||||
s_event_run_tick_counter = s_global_tick_counter + static_cast<u32>(pending_ticks);
|
||||
|
||||
do
|
||||
{
|
||||
|
|
|
@ -81,6 +81,7 @@ public:
|
|||
namespace TimingEvents {
|
||||
|
||||
u32 GetGlobalTickCounter();
|
||||
u32 GetEventRunTickCounter();
|
||||
|
||||
void Initialize();
|
||||
void Reset();
|
||||
|
|
Loading…
Reference in a new issue