DMA: Better enforce CPU runtime during linked list

and get rid of the hack for Newman Haas.
2025-03-06 14:27:44 +00:00 · 2024-04-10 20:00:09 +10:00 · 2024-04-10 20:00:09 +10:00 · 417bf0c3bc
parent f63f5d829d
commit 417bf0c3bc
7 changed files with 105 additions and 125 deletions
--- a/data/resources/gamedb.yaml
+++ b/data/resources/gamedb.yaml
@ -90470,7 +90470,7 @@ SLPS-02376:
    - DigitalController
  settings:
    dmaMaxSliceTicks: 100
-    dmaHaltTicks: 150
+    dmaHaltTicks: 200
  codes:
    - SLPS-02376
    - SLPS-02356
--- a/src/core/dma.cpp
+++ b/src/core/dma.cpp
@ -47,7 +47,7 @@ static constexpr PhysicalMemoryAddress LINKED_LIST_TERMINATOR = UINT32_C(0x00FFF

 static constexpr TickCount LINKED_LIST_HEADER_READ_TICKS = 10;
 static constexpr TickCount LINKED_LIST_BLOCK_SETUP_TICKS = 5;
-static constexpr TickCount HALT_TICKS_WHEN_TRANSMITTING_PAD = 100;
+static constexpr TickCount SLICE_SIZE_WHEN_TRANSMITTING_PAD = 10;

 struct ChannelState
 {
@ -194,7 +194,7 @@ static TickCount TransferDeviceToMemory(u32 address, u32 increment, u32 word_cou
 template<Channel channel>
 static TickCount TransferMemoryToDevice(u32 address, u32 increment, u32 word_count);

-
+static TickCount GetMaxSliceTicks();

 // configuration
 static TickCount s_max_slice_ticks = 1000;
@ -543,6 +543,17 @@ ALWAYS_INLINE_RELEASE void DMA::CompleteTransfer(Channel channel, ChannelState&
  }
 }

+TickCount DMA::GetMaxSliceTicks()
+{
+  const TickCount max = Pad::IsTransmitting() ? SLICE_SIZE_WHEN_TRANSMITTING_PAD : s_max_slice_ticks;
+  if (!TimingEvents::IsRunningEvents())
+    return max;
+
+  const u32 current_ticks = TimingEvents::GetGlobalTickCounter();
+  const u32 max_ticks = TimingEvents::GetEventRunTickCounter() + static_cast<u32>(max);
+  return std::clamp(static_cast<TickCount>(max_ticks - current_ticks), 0, max);
+}
+
 template<DMA::Channel channel>
 bool DMA::TransferChannel()
 {
@ -586,35 +597,13 @@ bool DMA::TransferChannel()
        return true;
      }

-      if constexpr (channel == Channel::GPU)
-      {
-        // Plenty of games seem to suffer from this issue where they have a linked list DMA going while polling the
-        // controller. Having a large slice size causes the serial transfer to complete before the silly busy wait
-        // in the BIOS poll routine returns, resulting in it thinking that the controller is disconnected. Some games
-        // are very sensitive to this (e.g. Newman Haas Racing), to the point that even using a slice size of 1 is
-        // insufficient for avoiding the race, probably due to the linked list layout.
-        //
-        // Therefore, without major refactoring to ensure the CPU runs every DMA block, and the associated performance
-        // penalty, we just halt the DMA until the serial transfers have completed. To reduce the chances of this
-        // significantly affecting timing, we add accumulate the ticks that have been "lost", and allow them to be
-        // "used up" when the transfer does happen.
-        //
-        if (Pad::IsTransmitting())
-        {
-          Log_DebugFmt("DMA transfer while transmitting pad - {} ticks are buffered", -s_halt_ticks_remaining);
-          if (!s_unhalt_event->IsActive())
-            s_unhalt_event->SetIntervalAndSchedule(HALT_TICKS_WHEN_TRANSMITTING_PAD);
-          return false;
-        }
-      }
-
      Log_DebugFmt("DMA[{}]: Copying linked list starting at 0x{:08X} to device", channel, current_address);

      // Prove to the compiler that nothing's going to modify these.
      const u8* const ram_ptr = Bus::g_ram;
      const u32 mask = Bus::g_ram_mask;

-      const TickCount slice_ticks = s_max_slice_ticks + -s_halt_ticks_remaining;
+      const TickCount slice_ticks = GetMaxSliceTicks();
      TickCount remaining_ticks = slice_ticks;
      while (cs.request && remaining_ticks > 0)
      {
@ -658,9 +647,6 @@ bool DMA::TransferChannel()
      cs.base_address = current_address;
      if (cs.request)
      {
-        // don't actually delay the transfer for the buffered ticks, this variable is dual-purposed.
-        s_halt_ticks_remaining = std::max(s_halt_ticks_remaining, 0);
-
        // stall the transfer for a bit if we ran for too long
        HaltTransfer(s_halt_ticks);
        return false;
@ -681,7 +667,7 @@ bool DMA::TransferChannel()

      const u32 block_size = cs.block_control.request.GetBlockSize();
      u32 blocks_remaining = cs.block_control.request.GetBlockCount();
-      TickCount ticks_remaining = s_max_slice_ticks;
+      TickCount ticks_remaining = GetMaxSliceTicks();

      if (copy_to_device)
      {
--- a/src/core/gpu.cpp
+++ b/src/core/gpu.cpp
@ -467,7 +467,6 @@ void GPU::WriteRegister(u32 offset, u32 value)
    case 0x00:
      m_fifo.Push(value);
      ExecuteCommands();
-      UpdateCommandTickEvent();
      return;

    case 0x04:
@ -495,16 +494,7 @@ void GPU::DMARead(u32* words, u32 word_count)

 void GPU::EndDMAWrite()
 {
-  m_fifo_pushed = true;
-  if (!m_syncing)
-  {
  ExecuteCommands();
-    UpdateCommandTickEvent();
-  }
-  else
-  {
-    UpdateDMARequest();
-  }
 }

 /**
@ -1029,26 +1019,24 @@ void GPU::CRTCTickEvent(TickCount ticks)
 void GPU::CommandTickEvent(TickCount ticks)
 {
  m_pending_command_ticks -= SystemTicksToGPUTicks(ticks);
-  m_command_tick_event->Deactivate();

-  // we can be syncing if this came from a DMA write. recursively executing commands would be bad.
-  if (!m_syncing)
+  m_executing_commands = true;
  ExecuteCommands();
-
-  UpdateGPUIdle();
-
-  if (m_pending_command_ticks <= 0)
-    m_pending_command_ticks = 0;
-  else
-    m_command_tick_event->SetIntervalAndSchedule(GPUTicksToSystemTicks(m_pending_command_ticks));
+  UpdateCommandTickEvent();
+  m_executing_commands = false;
 }

 void GPU::UpdateCommandTickEvent()
 {
  if (m_pending_command_ticks <= 0)
+  {
+    m_pending_command_ticks = 0;
    m_command_tick_event->Deactivate();
-  else if (!m_command_tick_event->IsActive())
+  }
+  else
+  {
    m_command_tick_event->SetIntervalAndSchedule(GPUTicksToSystemTicks(m_pending_command_ticks));
+  }
 }

 void GPU::ConvertScreenCoordinatesToDisplayCoordinates(float window_x, float window_y, float* display_x,
@ -1121,7 +1109,6 @@ u32 GPU::ReadGPUREAD()

        // end of transfer, catch up on any commands which were written (unlikely)
        ExecuteCommands();
-        UpdateCommandTickEvent();
        break;
      }
    }
--- a/src/core/gpu.h
+++ b/src/core/gpu.h
@ -307,6 +307,7 @@ protected:
  void WriteGP1(u32 value);
  void EndCommand();
  void ExecuteCommands();
+  void TryExecuteCommands();
  void HandleGetGPUInfoCommand(u32 value);

  // Rendering in the backend
@ -542,8 +543,7 @@ protected:
  u32 m_GPUREAD_latch = 0;

  /// True if currently executing/syncing.
-  bool m_syncing = false;
-  bool m_fifo_pushed = false;
+  bool m_executing_commands = false;

  struct VRAMTransfer
  {
--- a/src/core/gpu_commands.cpp
+++ b/src/core/gpu_commands.cpp
@ -25,13 +25,9 @@ static constexpr u32 ReplaceZero(u32 value, u32 value_for_zero)
  return value == 0 ? value_for_zero : value;
 }

-void GPU::ExecuteCommands()
+void GPU::TryExecuteCommands()
 {
-  m_syncing = true;
-
-  for (;;)
-  {
-    if (m_pending_command_ticks <= m_max_run_ahead && !m_fifo.IsEmpty())
+  while (m_pending_command_ticks <= m_max_run_ahead && !m_fifo.IsEmpty())
  {
    switch (m_blitter_state)
    {
@ -41,7 +37,7 @@ void GPU::ExecuteCommands()
        if ((this->*s_GP0_command_handler_table[command])())
          continue;
        else
-            goto batch_done;
+          return;
      }

      case BlitterState::WritingVRAM:
@ -62,7 +58,7 @@ void GPU::ExecuteCommands()

      case BlitterState::ReadingVRAM:
      {
-          goto batch_done;
+        return;
      }
      break;

@ -103,16 +99,19 @@ void GPU::ExecuteCommands()
      break;
    }
  }
+}

-  batch_done:
-    m_fifo_pushed = false;
+void GPU::ExecuteCommands()
+{
+  const bool was_executing_from_event = std::exchange(m_executing_commands, true);
+
+  TryExecuteCommands();
  UpdateDMARequest();
-    if (!m_fifo_pushed)
-      break;
-  }
-
  UpdateGPUIdle();
-  m_syncing = false;
+
+  m_executing_commands = was_executing_from_event;
+  if (!was_executing_from_event)
+    UpdateCommandTickEvent();
 }

 void GPU::EndCommand()
--- a/src/core/timing_event.cpp
+++ b/src/core/timing_event.cpp
@ -17,6 +17,7 @@ static TimingEvent* s_active_events_tail;
 static TimingEvent* s_current_event = nullptr;
 static u32 s_active_event_count = 0;
 static u32 s_global_tick_counter = 0;
+static u32 s_event_run_tick_counter = 0;
 static bool s_frame_done = false;

 u32 GetGlobalTickCounter()
@ -24,6 +25,11 @@ u32 GetGlobalTickCounter()
  return s_global_tick_counter;
 }

+u32 GetEventRunTickCounter()
+{
+  return s_event_run_tick_counter;
+}
+
 void Initialize()
 {
  Reset();
@ -293,6 +299,7 @@ void RunEvents()
    if (pending_ticks >= s_active_events_head->GetDowncount())
    {
      CPU::ResetPendingTicks();
+      s_event_run_tick_counter = s_global_tick_counter + static_cast<u32>(pending_ticks);

      do
      {
--- a/src/core/timing_event.h
+++ b/src/core/timing_event.h
@ -81,6 +81,7 @@ public:
 namespace TimingEvents {

 u32 GetGlobalTickCounter();
+u32 GetEventRunTickCounter();

 void Initialize();
 void Reset();