From 8a1b4d8beef01334b62d19f3ee7b20b0f30ad012 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sun, 7 Apr 2024 18:52:30 +1000 Subject: [PATCH] DMA: Halt linked list transfers during pad transfer Plenty of games seem to suffer from this issue where they have a linked list DMA going while polling the controller. Having a large slice size causes the serial transfer to complete before the silly busy wait in the BIOS poll routine returns, resulting in it thinking that the controller is disconnected. Some games are very sensitive to this (e.g. Newman Haas Racing), to the point that even using a slice size of 1 is insufficient for avoiding the race, probably due to the linked list layout. Therefore, without major refactoring to ensure the CPU runs every DMA block, and the associated performance penalty, we just halt the DMA until the serial transfers have completed. To reduce the chances of this significantly affecting timing, we add accumulate the ticks that have been "lost", and allow them to be "used up" when the transfer does happen. --- src/core/dma.cpp | 72 +++++++++++++++++++++++------------------------- 1 file changed, 34 insertions(+), 38 deletions(-) diff --git a/src/core/dma.cpp b/src/core/dma.cpp index bcd1a96e7..75e375b77 100644 --- a/src/core/dma.cpp +++ b/src/core/dma.cpp @@ -45,6 +45,10 @@ static constexpr PhysicalMemoryAddress BASE_ADDRESS_MASK = UINT32_C(0x00FFFFFF); static constexpr PhysicalMemoryAddress TRANSFER_ADDRESS_MASK = UINT32_C(0x00FFFFFC); static constexpr PhysicalMemoryAddress LINKED_LIST_TERMINATOR = UINT32_C(0x00FFFFFF); +static constexpr TickCount LINKED_LIST_HEADER_READ_TICKS = 10; +static constexpr TickCount LINKED_LIST_BLOCK_SETUP_TICKS = 5; +static constexpr TickCount HALT_TICKS_WHEN_TRANSMITTING_PAD = 100; + struct ChannelState { u32 base_address = 0; @@ -172,10 +176,6 @@ static bool CanTransferChannel(Channel channel, bool ignore_halt); static bool IsTransferHalted(); static void UpdateIRQ(); -// returns false if the DMA should now be halted -static TickCount GetTransferSliceTicks(); -static TickCount GetTransferHaltTicks(); - static void HaltTransfer(TickCount duration); static void UnhaltTransfer(void*, TickCount ticks, TickCount ticks_late); @@ -543,36 +543,6 @@ ALWAYS_INLINE_RELEASE void DMA::CompleteTransfer(Channel channel, ChannelState& } } -// Plenty of games seem to suffer from this issue where they have a linked list DMA going while polling the -// controller. Using a too-large slice size will result in the serial timing being off, and the game thinking -// the controller is disconnected. So we don't hurt performance too much for the general case, we reduce this -// to equal CPU and DMA time when the controller is transferring, but otherwise leave it at the higher size. -enum : TickCount -{ - SLICE_SIZE_WHEN_TRANSMITTING_PAD = 100, - HALT_TICKS_WHEN_TRANSMITTING_PAD = 100, - LINKED_LIST_HEADER_READ_TICKS = 10, - LINKED_LIST_BLOCK_SETUP_TICKS = 5, -}; - -TickCount DMA::GetTransferSliceTicks() -{ -#ifdef _DEBUG - if (Pad::IsTransmitting()) - { - Log_DebugPrintf("DMA transfer while transmitting pad - using lower slice size of %u vs %u", - SLICE_SIZE_WHEN_TRANSMITTING_PAD, s_max_slice_ticks); - } -#endif - - return Pad::IsTransmitting() ? SLICE_SIZE_WHEN_TRANSMITTING_PAD : s_max_slice_ticks; -} - -TickCount DMA::GetTransferHaltTicks() -{ - return Pad::IsTransmitting() ? HALT_TICKS_WHEN_TRANSMITTING_PAD : s_halt_ticks; -} - template bool DMA::TransferChannel() { @@ -616,13 +586,36 @@ bool DMA::TransferChannel() return true; } + if constexpr (channel == Channel::GPU) + { + // Plenty of games seem to suffer from this issue where they have a linked list DMA going while polling the + // controller. Having a large slice size causes the serial transfer to complete before the silly busy wait + // in the BIOS poll routine returns, resulting in it thinking that the controller is disconnected. Some games + // are very sensitive to this (e.g. Newman Haas Racing), to the point that even using a slice size of 1 is + // insufficient for avoiding the race, probably due to the linked list layout. + // + // Therefore, without major refactoring to ensure the CPU runs every DMA block, and the associated performance + // penalty, we just halt the DMA until the serial transfers have completed. To reduce the chances of this + // significantly affecting timing, we add accumulate the ticks that have been "lost", and allow them to be + // "used up" when the transfer does happen. + // + if (Pad::IsTransmitting()) + { + Log_DebugFmt("DMA transfer while transmitting pad - {} ticks are buffered", -s_halt_ticks_remaining); + if (!s_unhalt_event->IsActive()) + s_unhalt_event->SetIntervalAndSchedule(HALT_TICKS_WHEN_TRANSMITTING_PAD); + return false; + } + } + Log_DebugFmt("DMA[{}]: Copying linked list starting at 0x{:08X} to device", channel, current_address); // Prove to the compiler that nothing's going to modify these. const u8* const ram_ptr = Bus::g_ram; const u32 mask = Bus::g_ram_mask; - TickCount remaining_ticks = GetTransferSliceTicks(); + const TickCount slice_ticks = s_max_slice_ticks + -s_halt_ticks_remaining; + TickCount remaining_ticks = slice_ticks; while (cs.request && remaining_ticks > 0) { u32 header; @@ -665,8 +658,11 @@ bool DMA::TransferChannel() cs.base_address = current_address; if (cs.request) { + // don't actually delay the transfer for the buffered ticks, this variable is dual-purposed. + s_halt_ticks_remaining = std::max(s_halt_ticks_remaining, 0); + // stall the transfer for a bit if we ran for too long - HaltTransfer(GetTransferHaltTicks()); + HaltTransfer(s_halt_ticks); return false; } else @@ -685,7 +681,7 @@ bool DMA::TransferChannel() const u32 block_size = cs.block_control.request.GetBlockSize(); u32 blocks_remaining = cs.block_control.request.GetBlockCount(); - TickCount ticks_remaining = GetTransferSliceTicks(); + TickCount ticks_remaining = s_max_slice_ticks; if (copy_to_device) { @@ -740,7 +736,7 @@ bool DMA::TransferChannel() { // we got halted if (!s_unhalt_event->IsActive()) - HaltTransfer(GetTransferHaltTicks()); + HaltTransfer(s_halt_ticks); return false; }