DMA: Better enforce CPU runtime during linked list

and get rid of the hack for Newman Haas.
This commit is contained in:
Stenzek 2024-04-10 20:00:09 +10:00
parent f63f5d829d
commit 417bf0c3bc
No known key found for this signature in database
7 changed files with 105 additions and 125 deletions

View file

@ -90470,7 +90470,7 @@ SLPS-02376:
- DigitalController - DigitalController
settings: settings:
dmaMaxSliceTicks: 100 dmaMaxSliceTicks: 100
dmaHaltTicks: 150 dmaHaltTicks: 200
codes: codes:
- SLPS-02376 - SLPS-02376
- SLPS-02356 - SLPS-02356

View file

@ -47,7 +47,7 @@ static constexpr PhysicalMemoryAddress LINKED_LIST_TERMINATOR = UINT32_C(0x00FFF
static constexpr TickCount LINKED_LIST_HEADER_READ_TICKS = 10; static constexpr TickCount LINKED_LIST_HEADER_READ_TICKS = 10;
static constexpr TickCount LINKED_LIST_BLOCK_SETUP_TICKS = 5; static constexpr TickCount LINKED_LIST_BLOCK_SETUP_TICKS = 5;
static constexpr TickCount HALT_TICKS_WHEN_TRANSMITTING_PAD = 100; static constexpr TickCount SLICE_SIZE_WHEN_TRANSMITTING_PAD = 10;
struct ChannelState struct ChannelState
{ {
@ -194,7 +194,7 @@ static TickCount TransferDeviceToMemory(u32 address, u32 increment, u32 word_cou
template<Channel channel> template<Channel channel>
static TickCount TransferMemoryToDevice(u32 address, u32 increment, u32 word_count); static TickCount TransferMemoryToDevice(u32 address, u32 increment, u32 word_count);
static TickCount GetMaxSliceTicks();
// configuration // configuration
static TickCount s_max_slice_ticks = 1000; static TickCount s_max_slice_ticks = 1000;
@ -543,6 +543,17 @@ ALWAYS_INLINE_RELEASE void DMA::CompleteTransfer(Channel channel, ChannelState&
} }
} }
TickCount DMA::GetMaxSliceTicks()
{
const TickCount max = Pad::IsTransmitting() ? SLICE_SIZE_WHEN_TRANSMITTING_PAD : s_max_slice_ticks;
if (!TimingEvents::IsRunningEvents())
return max;
const u32 current_ticks = TimingEvents::GetGlobalTickCounter();
const u32 max_ticks = TimingEvents::GetEventRunTickCounter() + static_cast<u32>(max);
return std::clamp(static_cast<TickCount>(max_ticks - current_ticks), 0, max);
}
template<DMA::Channel channel> template<DMA::Channel channel>
bool DMA::TransferChannel() bool DMA::TransferChannel()
{ {
@ -586,35 +597,13 @@ bool DMA::TransferChannel()
return true; return true;
} }
if constexpr (channel == Channel::GPU)
{
// Plenty of games seem to suffer from this issue where they have a linked list DMA going while polling the
// controller. Having a large slice size causes the serial transfer to complete before the silly busy wait
// in the BIOS poll routine returns, resulting in it thinking that the controller is disconnected. Some games
// are very sensitive to this (e.g. Newman Haas Racing), to the point that even using a slice size of 1 is
// insufficient for avoiding the race, probably due to the linked list layout.
//
// Therefore, without major refactoring to ensure the CPU runs every DMA block, and the associated performance
// penalty, we just halt the DMA until the serial transfers have completed. To reduce the chances of this
// significantly affecting timing, we add accumulate the ticks that have been "lost", and allow them to be
// "used up" when the transfer does happen.
//
if (Pad::IsTransmitting())
{
Log_DebugFmt("DMA transfer while transmitting pad - {} ticks are buffered", -s_halt_ticks_remaining);
if (!s_unhalt_event->IsActive())
s_unhalt_event->SetIntervalAndSchedule(HALT_TICKS_WHEN_TRANSMITTING_PAD);
return false;
}
}
Log_DebugFmt("DMA[{}]: Copying linked list starting at 0x{:08X} to device", channel, current_address); Log_DebugFmt("DMA[{}]: Copying linked list starting at 0x{:08X} to device", channel, current_address);
// Prove to the compiler that nothing's going to modify these. // Prove to the compiler that nothing's going to modify these.
const u8* const ram_ptr = Bus::g_ram; const u8* const ram_ptr = Bus::g_ram;
const u32 mask = Bus::g_ram_mask; const u32 mask = Bus::g_ram_mask;
const TickCount slice_ticks = s_max_slice_ticks + -s_halt_ticks_remaining; const TickCount slice_ticks = GetMaxSliceTicks();
TickCount remaining_ticks = slice_ticks; TickCount remaining_ticks = slice_ticks;
while (cs.request && remaining_ticks > 0) while (cs.request && remaining_ticks > 0)
{ {
@ -658,9 +647,6 @@ bool DMA::TransferChannel()
cs.base_address = current_address; cs.base_address = current_address;
if (cs.request) if (cs.request)
{ {
// don't actually delay the transfer for the buffered ticks, this variable is dual-purposed.
s_halt_ticks_remaining = std::max(s_halt_ticks_remaining, 0);
// stall the transfer for a bit if we ran for too long // stall the transfer for a bit if we ran for too long
HaltTransfer(s_halt_ticks); HaltTransfer(s_halt_ticks);
return false; return false;
@ -681,7 +667,7 @@ bool DMA::TransferChannel()
const u32 block_size = cs.block_control.request.GetBlockSize(); const u32 block_size = cs.block_control.request.GetBlockSize();
u32 blocks_remaining = cs.block_control.request.GetBlockCount(); u32 blocks_remaining = cs.block_control.request.GetBlockCount();
TickCount ticks_remaining = s_max_slice_ticks; TickCount ticks_remaining = GetMaxSliceTicks();
if (copy_to_device) if (copy_to_device)
{ {

View file

@ -467,7 +467,6 @@ void GPU::WriteRegister(u32 offset, u32 value)
case 0x00: case 0x00:
m_fifo.Push(value); m_fifo.Push(value);
ExecuteCommands(); ExecuteCommands();
UpdateCommandTickEvent();
return; return;
case 0x04: case 0x04:
@ -495,16 +494,7 @@ void GPU::DMARead(u32* words, u32 word_count)
void GPU::EndDMAWrite() void GPU::EndDMAWrite()
{ {
m_fifo_pushed = true; ExecuteCommands();
if (!m_syncing)
{
ExecuteCommands();
UpdateCommandTickEvent();
}
else
{
UpdateDMARequest();
}
} }
/** /**
@ -1029,26 +1019,24 @@ void GPU::CRTCTickEvent(TickCount ticks)
void GPU::CommandTickEvent(TickCount ticks) void GPU::CommandTickEvent(TickCount ticks)
{ {
m_pending_command_ticks -= SystemTicksToGPUTicks(ticks); m_pending_command_ticks -= SystemTicksToGPUTicks(ticks);
m_command_tick_event->Deactivate();
// we can be syncing if this came from a DMA write. recursively executing commands would be bad. m_executing_commands = true;
if (!m_syncing) ExecuteCommands();
ExecuteCommands(); UpdateCommandTickEvent();
m_executing_commands = false;
UpdateGPUIdle();
if (m_pending_command_ticks <= 0)
m_pending_command_ticks = 0;
else
m_command_tick_event->SetIntervalAndSchedule(GPUTicksToSystemTicks(m_pending_command_ticks));
} }
void GPU::UpdateCommandTickEvent() void GPU::UpdateCommandTickEvent()
{ {
if (m_pending_command_ticks <= 0) if (m_pending_command_ticks <= 0)
{
m_pending_command_ticks = 0;
m_command_tick_event->Deactivate(); m_command_tick_event->Deactivate();
else if (!m_command_tick_event->IsActive()) }
else
{
m_command_tick_event->SetIntervalAndSchedule(GPUTicksToSystemTicks(m_pending_command_ticks)); m_command_tick_event->SetIntervalAndSchedule(GPUTicksToSystemTicks(m_pending_command_ticks));
}
} }
void GPU::ConvertScreenCoordinatesToDisplayCoordinates(float window_x, float window_y, float* display_x, void GPU::ConvertScreenCoordinatesToDisplayCoordinates(float window_x, float window_y, float* display_x,
@ -1121,7 +1109,6 @@ u32 GPU::ReadGPUREAD()
// end of transfer, catch up on any commands which were written (unlikely) // end of transfer, catch up on any commands which were written (unlikely)
ExecuteCommands(); ExecuteCommands();
UpdateCommandTickEvent();
break; break;
} }
} }

View file

@ -307,6 +307,7 @@ protected:
void WriteGP1(u32 value); void WriteGP1(u32 value);
void EndCommand(); void EndCommand();
void ExecuteCommands(); void ExecuteCommands();
void TryExecuteCommands();
void HandleGetGPUInfoCommand(u32 value); void HandleGetGPUInfoCommand(u32 value);
// Rendering in the backend // Rendering in the backend
@ -542,8 +543,7 @@ protected:
u32 m_GPUREAD_latch = 0; u32 m_GPUREAD_latch = 0;
/// True if currently executing/syncing. /// True if currently executing/syncing.
bool m_syncing = false; bool m_executing_commands = false;
bool m_fifo_pushed = false;
struct VRAMTransfer struct VRAMTransfer
{ {

View file

@ -25,94 +25,93 @@ static constexpr u32 ReplaceZero(u32 value, u32 value_for_zero)
return value == 0 ? value_for_zero : value; return value == 0 ? value_for_zero : value;
} }
void GPU::ExecuteCommands() void GPU::TryExecuteCommands()
{ {
m_syncing = true; while (m_pending_command_ticks <= m_max_run_ahead && !m_fifo.IsEmpty())
for (;;)
{ {
if (m_pending_command_ticks <= m_max_run_ahead && !m_fifo.IsEmpty()) switch (m_blitter_state)
{ {
switch (m_blitter_state) case BlitterState::Idle:
{ {
case BlitterState::Idle: const u32 command = FifoPeek(0) >> 24;
if ((this->*s_GP0_command_handler_table[command])())
continue;
else
return;
}
case BlitterState::WritingVRAM:
{
DebugAssert(m_blit_remaining_words > 0);
const u32 words_to_copy = std::min(m_blit_remaining_words, m_fifo.GetSize());
m_blit_buffer.reserve(m_blit_buffer.size() + words_to_copy);
for (u32 i = 0; i < words_to_copy; i++)
m_blit_buffer.push_back(FifoPop());
m_blit_remaining_words -= words_to_copy;
Log_DebugPrintf("VRAM write burst of %u words, %u words remaining", words_to_copy, m_blit_remaining_words);
if (m_blit_remaining_words == 0)
FinishVRAMWrite();
continue;
}
case BlitterState::ReadingVRAM:
{
return;
}
break;
case BlitterState::DrawingPolyLine:
{
const u32 words_per_vertex = m_render_command.shading_enable ? 2 : 1;
u32 terminator_index =
m_render_command.shading_enable ? ((static_cast<u32>(m_blit_buffer.size()) & 1u) ^ 1u) : 0u;
for (; terminator_index < m_fifo.GetSize(); terminator_index += words_per_vertex)
{ {
const u32 command = FifoPeek(0) >> 24; // polyline must have at least two vertices, and the terminator is (word & 0xf000f000) == 0x50005000.
if ((this->*s_GP0_command_handler_table[command])()) // terminator is on the first word for the vertex
continue; if ((FifoPeek(terminator_index) & UINT32_C(0xF000F000)) == UINT32_C(0x50005000))
else break;
goto batch_done;
} }
case BlitterState::WritingVRAM: const bool found_terminator = (terminator_index < m_fifo.GetSize());
const u32 words_to_copy = std::min(terminator_index, m_fifo.GetSize());
if (words_to_copy > 0)
{ {
DebugAssert(m_blit_remaining_words > 0);
const u32 words_to_copy = std::min(m_blit_remaining_words, m_fifo.GetSize());
m_blit_buffer.reserve(m_blit_buffer.size() + words_to_copy); m_blit_buffer.reserve(m_blit_buffer.size() + words_to_copy);
for (u32 i = 0; i < words_to_copy; i++) for (u32 i = 0; i < words_to_copy; i++)
m_blit_buffer.push_back(FifoPop()); m_blit_buffer.push_back(FifoPop());
m_blit_remaining_words -= words_to_copy; }
Log_DebugPrintf("VRAM write burst of %u words, %u words remaining", words_to_copy, m_blit_remaining_words);
if (m_blit_remaining_words == 0)
FinishVRAMWrite();
Log_DebugPrintf("Added %u words to polyline", words_to_copy);
if (found_terminator)
{
// drop terminator
m_fifo.RemoveOne();
Log_DebugPrintf("Drawing poly-line with %u vertices", GetPolyLineVertexCount());
DispatchRenderCommand();
m_blit_buffer.clear();
EndCommand();
continue; continue;
} }
case BlitterState::ReadingVRAM:
{
goto batch_done;
}
break;
case BlitterState::DrawingPolyLine:
{
const u32 words_per_vertex = m_render_command.shading_enable ? 2 : 1;
u32 terminator_index =
m_render_command.shading_enable ? ((static_cast<u32>(m_blit_buffer.size()) & 1u) ^ 1u) : 0u;
for (; terminator_index < m_fifo.GetSize(); terminator_index += words_per_vertex)
{
// polyline must have at least two vertices, and the terminator is (word & 0xf000f000) == 0x50005000.
// terminator is on the first word for the vertex
if ((FifoPeek(terminator_index) & UINT32_C(0xF000F000)) == UINT32_C(0x50005000))
break;
}
const bool found_terminator = (terminator_index < m_fifo.GetSize());
const u32 words_to_copy = std::min(terminator_index, m_fifo.GetSize());
if (words_to_copy > 0)
{
m_blit_buffer.reserve(m_blit_buffer.size() + words_to_copy);
for (u32 i = 0; i < words_to_copy; i++)
m_blit_buffer.push_back(FifoPop());
}
Log_DebugPrintf("Added %u words to polyline", words_to_copy);
if (found_terminator)
{
// drop terminator
m_fifo.RemoveOne();
Log_DebugPrintf("Drawing poly-line with %u vertices", GetPolyLineVertexCount());
DispatchRenderCommand();
m_blit_buffer.clear();
EndCommand();
continue;
}
}
break;
} }
}
batch_done:
m_fifo_pushed = false;
UpdateDMARequest();
if (!m_fifo_pushed)
break; break;
}
} }
}
void GPU::ExecuteCommands()
{
const bool was_executing_from_event = std::exchange(m_executing_commands, true);
TryExecuteCommands();
UpdateDMARequest();
UpdateGPUIdle(); UpdateGPUIdle();
m_syncing = false;
m_executing_commands = was_executing_from_event;
if (!was_executing_from_event)
UpdateCommandTickEvent();
} }
void GPU::EndCommand() void GPU::EndCommand()

View file

@ -17,6 +17,7 @@ static TimingEvent* s_active_events_tail;
static TimingEvent* s_current_event = nullptr; static TimingEvent* s_current_event = nullptr;
static u32 s_active_event_count = 0; static u32 s_active_event_count = 0;
static u32 s_global_tick_counter = 0; static u32 s_global_tick_counter = 0;
static u32 s_event_run_tick_counter = 0;
static bool s_frame_done = false; static bool s_frame_done = false;
u32 GetGlobalTickCounter() u32 GetGlobalTickCounter()
@ -24,6 +25,11 @@ u32 GetGlobalTickCounter()
return s_global_tick_counter; return s_global_tick_counter;
} }
u32 GetEventRunTickCounter()
{
return s_event_run_tick_counter;
}
void Initialize() void Initialize()
{ {
Reset(); Reset();
@ -293,6 +299,7 @@ void RunEvents()
if (pending_ticks >= s_active_events_head->GetDowncount()) if (pending_ticks >= s_active_events_head->GetDowncount())
{ {
CPU::ResetPendingTicks(); CPU::ResetPendingTicks();
s_event_run_tick_counter = s_global_tick_counter + static_cast<u32>(pending_ticks);
do do
{ {

View file

@ -81,6 +81,7 @@ public:
namespace TimingEvents { namespace TimingEvents {
u32 GetGlobalTickCounter(); u32 GetGlobalTickCounter();
u32 GetEventRunTickCounter();
void Initialize(); void Initialize();
void Reset(); void Reset();