mirror of
https://github.com/RetroDECK/Duckstation.git
synced 2025-01-20 15:25:38 +00:00
DMA: Better enforce CPU runtime during linked list
and get rid of the hack for Newman Haas.
This commit is contained in:
parent
f63f5d829d
commit
417bf0c3bc
|
@ -90470,7 +90470,7 @@ SLPS-02376:
|
||||||
- DigitalController
|
- DigitalController
|
||||||
settings:
|
settings:
|
||||||
dmaMaxSliceTicks: 100
|
dmaMaxSliceTicks: 100
|
||||||
dmaHaltTicks: 150
|
dmaHaltTicks: 200
|
||||||
codes:
|
codes:
|
||||||
- SLPS-02376
|
- SLPS-02376
|
||||||
- SLPS-02356
|
- SLPS-02356
|
||||||
|
|
|
@ -47,7 +47,7 @@ static constexpr PhysicalMemoryAddress LINKED_LIST_TERMINATOR = UINT32_C(0x00FFF
|
||||||
|
|
||||||
static constexpr TickCount LINKED_LIST_HEADER_READ_TICKS = 10;
|
static constexpr TickCount LINKED_LIST_HEADER_READ_TICKS = 10;
|
||||||
static constexpr TickCount LINKED_LIST_BLOCK_SETUP_TICKS = 5;
|
static constexpr TickCount LINKED_LIST_BLOCK_SETUP_TICKS = 5;
|
||||||
static constexpr TickCount HALT_TICKS_WHEN_TRANSMITTING_PAD = 100;
|
static constexpr TickCount SLICE_SIZE_WHEN_TRANSMITTING_PAD = 10;
|
||||||
|
|
||||||
struct ChannelState
|
struct ChannelState
|
||||||
{
|
{
|
||||||
|
@ -194,7 +194,7 @@ static TickCount TransferDeviceToMemory(u32 address, u32 increment, u32 word_cou
|
||||||
template<Channel channel>
|
template<Channel channel>
|
||||||
static TickCount TransferMemoryToDevice(u32 address, u32 increment, u32 word_count);
|
static TickCount TransferMemoryToDevice(u32 address, u32 increment, u32 word_count);
|
||||||
|
|
||||||
|
static TickCount GetMaxSliceTicks();
|
||||||
|
|
||||||
// configuration
|
// configuration
|
||||||
static TickCount s_max_slice_ticks = 1000;
|
static TickCount s_max_slice_ticks = 1000;
|
||||||
|
@ -543,6 +543,17 @@ ALWAYS_INLINE_RELEASE void DMA::CompleteTransfer(Channel channel, ChannelState&
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TickCount DMA::GetMaxSliceTicks()
|
||||||
|
{
|
||||||
|
const TickCount max = Pad::IsTransmitting() ? SLICE_SIZE_WHEN_TRANSMITTING_PAD : s_max_slice_ticks;
|
||||||
|
if (!TimingEvents::IsRunningEvents())
|
||||||
|
return max;
|
||||||
|
|
||||||
|
const u32 current_ticks = TimingEvents::GetGlobalTickCounter();
|
||||||
|
const u32 max_ticks = TimingEvents::GetEventRunTickCounter() + static_cast<u32>(max);
|
||||||
|
return std::clamp(static_cast<TickCount>(max_ticks - current_ticks), 0, max);
|
||||||
|
}
|
||||||
|
|
||||||
template<DMA::Channel channel>
|
template<DMA::Channel channel>
|
||||||
bool DMA::TransferChannel()
|
bool DMA::TransferChannel()
|
||||||
{
|
{
|
||||||
|
@ -586,35 +597,13 @@ bool DMA::TransferChannel()
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if constexpr (channel == Channel::GPU)
|
|
||||||
{
|
|
||||||
// Plenty of games seem to suffer from this issue where they have a linked list DMA going while polling the
|
|
||||||
// controller. Having a large slice size causes the serial transfer to complete before the silly busy wait
|
|
||||||
// in the BIOS poll routine returns, resulting in it thinking that the controller is disconnected. Some games
|
|
||||||
// are very sensitive to this (e.g. Newman Haas Racing), to the point that even using a slice size of 1 is
|
|
||||||
// insufficient for avoiding the race, probably due to the linked list layout.
|
|
||||||
//
|
|
||||||
// Therefore, without major refactoring to ensure the CPU runs every DMA block, and the associated performance
|
|
||||||
// penalty, we just halt the DMA until the serial transfers have completed. To reduce the chances of this
|
|
||||||
// significantly affecting timing, we add accumulate the ticks that have been "lost", and allow them to be
|
|
||||||
// "used up" when the transfer does happen.
|
|
||||||
//
|
|
||||||
if (Pad::IsTransmitting())
|
|
||||||
{
|
|
||||||
Log_DebugFmt("DMA transfer while transmitting pad - {} ticks are buffered", -s_halt_ticks_remaining);
|
|
||||||
if (!s_unhalt_event->IsActive())
|
|
||||||
s_unhalt_event->SetIntervalAndSchedule(HALT_TICKS_WHEN_TRANSMITTING_PAD);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Log_DebugFmt("DMA[{}]: Copying linked list starting at 0x{:08X} to device", channel, current_address);
|
Log_DebugFmt("DMA[{}]: Copying linked list starting at 0x{:08X} to device", channel, current_address);
|
||||||
|
|
||||||
// Prove to the compiler that nothing's going to modify these.
|
// Prove to the compiler that nothing's going to modify these.
|
||||||
const u8* const ram_ptr = Bus::g_ram;
|
const u8* const ram_ptr = Bus::g_ram;
|
||||||
const u32 mask = Bus::g_ram_mask;
|
const u32 mask = Bus::g_ram_mask;
|
||||||
|
|
||||||
const TickCount slice_ticks = s_max_slice_ticks + -s_halt_ticks_remaining;
|
const TickCount slice_ticks = GetMaxSliceTicks();
|
||||||
TickCount remaining_ticks = slice_ticks;
|
TickCount remaining_ticks = slice_ticks;
|
||||||
while (cs.request && remaining_ticks > 0)
|
while (cs.request && remaining_ticks > 0)
|
||||||
{
|
{
|
||||||
|
@ -658,9 +647,6 @@ bool DMA::TransferChannel()
|
||||||
cs.base_address = current_address;
|
cs.base_address = current_address;
|
||||||
if (cs.request)
|
if (cs.request)
|
||||||
{
|
{
|
||||||
// don't actually delay the transfer for the buffered ticks, this variable is dual-purposed.
|
|
||||||
s_halt_ticks_remaining = std::max(s_halt_ticks_remaining, 0);
|
|
||||||
|
|
||||||
// stall the transfer for a bit if we ran for too long
|
// stall the transfer for a bit if we ran for too long
|
||||||
HaltTransfer(s_halt_ticks);
|
HaltTransfer(s_halt_ticks);
|
||||||
return false;
|
return false;
|
||||||
|
@ -681,7 +667,7 @@ bool DMA::TransferChannel()
|
||||||
|
|
||||||
const u32 block_size = cs.block_control.request.GetBlockSize();
|
const u32 block_size = cs.block_control.request.GetBlockSize();
|
||||||
u32 blocks_remaining = cs.block_control.request.GetBlockCount();
|
u32 blocks_remaining = cs.block_control.request.GetBlockCount();
|
||||||
TickCount ticks_remaining = s_max_slice_ticks;
|
TickCount ticks_remaining = GetMaxSliceTicks();
|
||||||
|
|
||||||
if (copy_to_device)
|
if (copy_to_device)
|
||||||
{
|
{
|
||||||
|
|
|
@ -467,7 +467,6 @@ void GPU::WriteRegister(u32 offset, u32 value)
|
||||||
case 0x00:
|
case 0x00:
|
||||||
m_fifo.Push(value);
|
m_fifo.Push(value);
|
||||||
ExecuteCommands();
|
ExecuteCommands();
|
||||||
UpdateCommandTickEvent();
|
|
||||||
return;
|
return;
|
||||||
|
|
||||||
case 0x04:
|
case 0x04:
|
||||||
|
@ -495,16 +494,7 @@ void GPU::DMARead(u32* words, u32 word_count)
|
||||||
|
|
||||||
void GPU::EndDMAWrite()
|
void GPU::EndDMAWrite()
|
||||||
{
|
{
|
||||||
m_fifo_pushed = true;
|
ExecuteCommands();
|
||||||
if (!m_syncing)
|
|
||||||
{
|
|
||||||
ExecuteCommands();
|
|
||||||
UpdateCommandTickEvent();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
UpdateDMARequest();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1029,26 +1019,24 @@ void GPU::CRTCTickEvent(TickCount ticks)
|
||||||
void GPU::CommandTickEvent(TickCount ticks)
|
void GPU::CommandTickEvent(TickCount ticks)
|
||||||
{
|
{
|
||||||
m_pending_command_ticks -= SystemTicksToGPUTicks(ticks);
|
m_pending_command_ticks -= SystemTicksToGPUTicks(ticks);
|
||||||
m_command_tick_event->Deactivate();
|
|
||||||
|
|
||||||
// we can be syncing if this came from a DMA write. recursively executing commands would be bad.
|
m_executing_commands = true;
|
||||||
if (!m_syncing)
|
ExecuteCommands();
|
||||||
ExecuteCommands();
|
UpdateCommandTickEvent();
|
||||||
|
m_executing_commands = false;
|
||||||
UpdateGPUIdle();
|
|
||||||
|
|
||||||
if (m_pending_command_ticks <= 0)
|
|
||||||
m_pending_command_ticks = 0;
|
|
||||||
else
|
|
||||||
m_command_tick_event->SetIntervalAndSchedule(GPUTicksToSystemTicks(m_pending_command_ticks));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPU::UpdateCommandTickEvent()
|
void GPU::UpdateCommandTickEvent()
|
||||||
{
|
{
|
||||||
if (m_pending_command_ticks <= 0)
|
if (m_pending_command_ticks <= 0)
|
||||||
|
{
|
||||||
|
m_pending_command_ticks = 0;
|
||||||
m_command_tick_event->Deactivate();
|
m_command_tick_event->Deactivate();
|
||||||
else if (!m_command_tick_event->IsActive())
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
m_command_tick_event->SetIntervalAndSchedule(GPUTicksToSystemTicks(m_pending_command_ticks));
|
m_command_tick_event->SetIntervalAndSchedule(GPUTicksToSystemTicks(m_pending_command_ticks));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPU::ConvertScreenCoordinatesToDisplayCoordinates(float window_x, float window_y, float* display_x,
|
void GPU::ConvertScreenCoordinatesToDisplayCoordinates(float window_x, float window_y, float* display_x,
|
||||||
|
@ -1121,7 +1109,6 @@ u32 GPU::ReadGPUREAD()
|
||||||
|
|
||||||
// end of transfer, catch up on any commands which were written (unlikely)
|
// end of transfer, catch up on any commands which were written (unlikely)
|
||||||
ExecuteCommands();
|
ExecuteCommands();
|
||||||
UpdateCommandTickEvent();
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -307,6 +307,7 @@ protected:
|
||||||
void WriteGP1(u32 value);
|
void WriteGP1(u32 value);
|
||||||
void EndCommand();
|
void EndCommand();
|
||||||
void ExecuteCommands();
|
void ExecuteCommands();
|
||||||
|
void TryExecuteCommands();
|
||||||
void HandleGetGPUInfoCommand(u32 value);
|
void HandleGetGPUInfoCommand(u32 value);
|
||||||
|
|
||||||
// Rendering in the backend
|
// Rendering in the backend
|
||||||
|
@ -542,8 +543,7 @@ protected:
|
||||||
u32 m_GPUREAD_latch = 0;
|
u32 m_GPUREAD_latch = 0;
|
||||||
|
|
||||||
/// True if currently executing/syncing.
|
/// True if currently executing/syncing.
|
||||||
bool m_syncing = false;
|
bool m_executing_commands = false;
|
||||||
bool m_fifo_pushed = false;
|
|
||||||
|
|
||||||
struct VRAMTransfer
|
struct VRAMTransfer
|
||||||
{
|
{
|
||||||
|
|
|
@ -25,94 +25,93 @@ static constexpr u32 ReplaceZero(u32 value, u32 value_for_zero)
|
||||||
return value == 0 ? value_for_zero : value;
|
return value == 0 ? value_for_zero : value;
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPU::ExecuteCommands()
|
void GPU::TryExecuteCommands()
|
||||||
{
|
{
|
||||||
m_syncing = true;
|
while (m_pending_command_ticks <= m_max_run_ahead && !m_fifo.IsEmpty())
|
||||||
|
|
||||||
for (;;)
|
|
||||||
{
|
{
|
||||||
if (m_pending_command_ticks <= m_max_run_ahead && !m_fifo.IsEmpty())
|
switch (m_blitter_state)
|
||||||
{
|
{
|
||||||
switch (m_blitter_state)
|
case BlitterState::Idle:
|
||||||
{
|
{
|
||||||
case BlitterState::Idle:
|
const u32 command = FifoPeek(0) >> 24;
|
||||||
|
if ((this->*s_GP0_command_handler_table[command])())
|
||||||
|
continue;
|
||||||
|
else
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
case BlitterState::WritingVRAM:
|
||||||
|
{
|
||||||
|
DebugAssert(m_blit_remaining_words > 0);
|
||||||
|
const u32 words_to_copy = std::min(m_blit_remaining_words, m_fifo.GetSize());
|
||||||
|
m_blit_buffer.reserve(m_blit_buffer.size() + words_to_copy);
|
||||||
|
for (u32 i = 0; i < words_to_copy; i++)
|
||||||
|
m_blit_buffer.push_back(FifoPop());
|
||||||
|
m_blit_remaining_words -= words_to_copy;
|
||||||
|
|
||||||
|
Log_DebugPrintf("VRAM write burst of %u words, %u words remaining", words_to_copy, m_blit_remaining_words);
|
||||||
|
if (m_blit_remaining_words == 0)
|
||||||
|
FinishVRAMWrite();
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
case BlitterState::ReadingVRAM:
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case BlitterState::DrawingPolyLine:
|
||||||
|
{
|
||||||
|
const u32 words_per_vertex = m_render_command.shading_enable ? 2 : 1;
|
||||||
|
u32 terminator_index =
|
||||||
|
m_render_command.shading_enable ? ((static_cast<u32>(m_blit_buffer.size()) & 1u) ^ 1u) : 0u;
|
||||||
|
for (; terminator_index < m_fifo.GetSize(); terminator_index += words_per_vertex)
|
||||||
{
|
{
|
||||||
const u32 command = FifoPeek(0) >> 24;
|
// polyline must have at least two vertices, and the terminator is (word & 0xf000f000) == 0x50005000.
|
||||||
if ((this->*s_GP0_command_handler_table[command])())
|
// terminator is on the first word for the vertex
|
||||||
continue;
|
if ((FifoPeek(terminator_index) & UINT32_C(0xF000F000)) == UINT32_C(0x50005000))
|
||||||
else
|
break;
|
||||||
goto batch_done;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
case BlitterState::WritingVRAM:
|
const bool found_terminator = (terminator_index < m_fifo.GetSize());
|
||||||
|
const u32 words_to_copy = std::min(terminator_index, m_fifo.GetSize());
|
||||||
|
if (words_to_copy > 0)
|
||||||
{
|
{
|
||||||
DebugAssert(m_blit_remaining_words > 0);
|
|
||||||
const u32 words_to_copy = std::min(m_blit_remaining_words, m_fifo.GetSize());
|
|
||||||
m_blit_buffer.reserve(m_blit_buffer.size() + words_to_copy);
|
m_blit_buffer.reserve(m_blit_buffer.size() + words_to_copy);
|
||||||
for (u32 i = 0; i < words_to_copy; i++)
|
for (u32 i = 0; i < words_to_copy; i++)
|
||||||
m_blit_buffer.push_back(FifoPop());
|
m_blit_buffer.push_back(FifoPop());
|
||||||
m_blit_remaining_words -= words_to_copy;
|
}
|
||||||
|
|
||||||
Log_DebugPrintf("VRAM write burst of %u words, %u words remaining", words_to_copy, m_blit_remaining_words);
|
|
||||||
if (m_blit_remaining_words == 0)
|
|
||||||
FinishVRAMWrite();
|
|
||||||
|
|
||||||
|
Log_DebugPrintf("Added %u words to polyline", words_to_copy);
|
||||||
|
if (found_terminator)
|
||||||
|
{
|
||||||
|
// drop terminator
|
||||||
|
m_fifo.RemoveOne();
|
||||||
|
Log_DebugPrintf("Drawing poly-line with %u vertices", GetPolyLineVertexCount());
|
||||||
|
DispatchRenderCommand();
|
||||||
|
m_blit_buffer.clear();
|
||||||
|
EndCommand();
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
case BlitterState::ReadingVRAM:
|
|
||||||
{
|
|
||||||
goto batch_done;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case BlitterState::DrawingPolyLine:
|
|
||||||
{
|
|
||||||
const u32 words_per_vertex = m_render_command.shading_enable ? 2 : 1;
|
|
||||||
u32 terminator_index =
|
|
||||||
m_render_command.shading_enable ? ((static_cast<u32>(m_blit_buffer.size()) & 1u) ^ 1u) : 0u;
|
|
||||||
for (; terminator_index < m_fifo.GetSize(); terminator_index += words_per_vertex)
|
|
||||||
{
|
|
||||||
// polyline must have at least two vertices, and the terminator is (word & 0xf000f000) == 0x50005000.
|
|
||||||
// terminator is on the first word for the vertex
|
|
||||||
if ((FifoPeek(terminator_index) & UINT32_C(0xF000F000)) == UINT32_C(0x50005000))
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
const bool found_terminator = (terminator_index < m_fifo.GetSize());
|
|
||||||
const u32 words_to_copy = std::min(terminator_index, m_fifo.GetSize());
|
|
||||||
if (words_to_copy > 0)
|
|
||||||
{
|
|
||||||
m_blit_buffer.reserve(m_blit_buffer.size() + words_to_copy);
|
|
||||||
for (u32 i = 0; i < words_to_copy; i++)
|
|
||||||
m_blit_buffer.push_back(FifoPop());
|
|
||||||
}
|
|
||||||
|
|
||||||
Log_DebugPrintf("Added %u words to polyline", words_to_copy);
|
|
||||||
if (found_terminator)
|
|
||||||
{
|
|
||||||
// drop terminator
|
|
||||||
m_fifo.RemoveOne();
|
|
||||||
Log_DebugPrintf("Drawing poly-line with %u vertices", GetPolyLineVertexCount());
|
|
||||||
DispatchRenderCommand();
|
|
||||||
m_blit_buffer.clear();
|
|
||||||
EndCommand();
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
batch_done:
|
|
||||||
m_fifo_pushed = false;
|
|
||||||
UpdateDMARequest();
|
|
||||||
if (!m_fifo_pushed)
|
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPU::ExecuteCommands()
|
||||||
|
{
|
||||||
|
const bool was_executing_from_event = std::exchange(m_executing_commands, true);
|
||||||
|
|
||||||
|
TryExecuteCommands();
|
||||||
|
UpdateDMARequest();
|
||||||
UpdateGPUIdle();
|
UpdateGPUIdle();
|
||||||
m_syncing = false;
|
|
||||||
|
m_executing_commands = was_executing_from_event;
|
||||||
|
if (!was_executing_from_event)
|
||||||
|
UpdateCommandTickEvent();
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPU::EndCommand()
|
void GPU::EndCommand()
|
||||||
|
|
|
@ -17,6 +17,7 @@ static TimingEvent* s_active_events_tail;
|
||||||
static TimingEvent* s_current_event = nullptr;
|
static TimingEvent* s_current_event = nullptr;
|
||||||
static u32 s_active_event_count = 0;
|
static u32 s_active_event_count = 0;
|
||||||
static u32 s_global_tick_counter = 0;
|
static u32 s_global_tick_counter = 0;
|
||||||
|
static u32 s_event_run_tick_counter = 0;
|
||||||
static bool s_frame_done = false;
|
static bool s_frame_done = false;
|
||||||
|
|
||||||
u32 GetGlobalTickCounter()
|
u32 GetGlobalTickCounter()
|
||||||
|
@ -24,6 +25,11 @@ u32 GetGlobalTickCounter()
|
||||||
return s_global_tick_counter;
|
return s_global_tick_counter;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u32 GetEventRunTickCounter()
|
||||||
|
{
|
||||||
|
return s_event_run_tick_counter;
|
||||||
|
}
|
||||||
|
|
||||||
void Initialize()
|
void Initialize()
|
||||||
{
|
{
|
||||||
Reset();
|
Reset();
|
||||||
|
@ -293,6 +299,7 @@ void RunEvents()
|
||||||
if (pending_ticks >= s_active_events_head->GetDowncount())
|
if (pending_ticks >= s_active_events_head->GetDowncount())
|
||||||
{
|
{
|
||||||
CPU::ResetPendingTicks();
|
CPU::ResetPendingTicks();
|
||||||
|
s_event_run_tick_counter = s_global_tick_counter + static_cast<u32>(pending_ticks);
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
|
|
|
@ -81,6 +81,7 @@ public:
|
||||||
namespace TimingEvents {
|
namespace TimingEvents {
|
||||||
|
|
||||||
u32 GetGlobalTickCounter();
|
u32 GetGlobalTickCounter();
|
||||||
|
u32 GetEventRunTickCounter();
|
||||||
|
|
||||||
void Initialize();
|
void Initialize();
|
||||||
void Reset();
|
void Reset();
|
||||||
|
|
Loading…
Reference in a new issue