DMA: Increase precision for large transfers

Also gets rid of the delay on the GPU side for writing to VRAM (doesn't
make sense), and it's not needed since we slice the block transfers now.

Fixes palette corruption in Vigilante 8, and missing rider in
Championship Motocross 2001 featuring Ricky Carmichael.
This commit is contained in:
Connor McLaughlin 2021-01-03 16:05:43 +10:00
parent c9ef3ec1a3
commit 0de34d7bf7
4 changed files with 80 additions and 28 deletions

View file

@ -597,3 +597,45 @@ DMAMaxSliceTicks = 100
# SLUS-00912 (Destruction Derby Raw (USA))
[SLUS-00912]
ForceInterlacing = true
# SLUS-00510 (Vigilante 8 (USA) (Rev 1))
[SLUS-00510]
DMAMaxSliceTicks = 300
DMAHaltTicks = 100
# SLES-01212 (Vigilante 8 (Europe))
[SLES-01212]
DMAMaxSliceTicks = 300
DMAHaltTicks = 100
# SLES-01214 (Vigilante 8 (Germany))
[SLES-01214]
DMAMaxSliceTicks = 300
DMAHaltTicks = 100
# SLES-01213 (Vigilante 8 (France))
[SLES-01213]
DMAMaxSliceTicks = 300
DMAHaltTicks = 100
# SLES-01215 (Vigilante 8 (Italy))
[SLES-01215]
DMAMaxSliceTicks = 300
DMAHaltTicks = 100
# SLPS-01703 (Vigilante 8 (Japan))
[SLPS-01703]
DMAMaxSliceTicks = 300
DMAHaltTicks = 100
# SLPS-91182 (Vigilante 8 (Japan))
[SLPS-91182]
DMAMaxSliceTicks = 300
DMAHaltTicks = 100

View file

@ -284,7 +284,6 @@ bool DMA::TransferChannel(Channel channel)
case SyncMode::LinkedList:
{
TickCount used_ticks = 0;
if (!copy_to_device)
{
Panic("Linked list not implemented for DMA reads");
@ -295,12 +294,13 @@ bool DMA::TransferChannel(Channel channel)
current_address & ADDRESS_MASK);
u8* ram_pointer = Bus::g_ram;
bool halt_transfer = false;
while (cs.request)
TickCount remaining_ticks = m_max_slice_ticks;
while (cs.request && remaining_ticks > 0)
{
u32 header;
std::memcpy(&header, &ram_pointer[current_address & ADDRESS_MASK], sizeof(header));
used_ticks += 10;
CPU::AddPendingTicks(10);
remaining_ticks -= 10;
const u32 word_count = header >> 24;
const u32 next_address = header & UINT32_C(0x00FFFFFF);
@ -308,35 +308,26 @@ bool DMA::TransferChannel(Channel channel)
word_count * UINT32_C(4), word_count, next_address);
if (word_count > 0)
{
used_ticks += 5;
used_ticks +=
CPU::AddPendingTicks(5);
remaining_ticks -= 5;
const TickCount block_ticks =
TransferMemoryToDevice(channel, (current_address + sizeof(header)) & ADDRESS_MASK, 4, word_count);
}
else if ((current_address & ADDRESS_MASK) == (next_address & ADDRESS_MASK))
{
current_address = next_address;
halt_transfer = true;
break;
CPU::AddPendingTicks(block_ticks);
remaining_ticks -= block_ticks;
}
current_address = next_address;
if (current_address & UINT32_C(0x800000))
break;
if (used_ticks >= m_max_slice_ticks)
{
halt_transfer = true;
break;
}
}
cs.base_address = current_address;
CPU::AddPendingTicks(used_ticks);
if (current_address & UINT32_C(0x800000))
break;
if (halt_transfer)
if (cs.request)
{
// stall the transfer for a bit if we ran for too long
HaltTransfer(m_halt_ticks);
@ -359,34 +350,54 @@ bool DMA::TransferChannel(Channel channel)
const u32 block_size = cs.block_control.request.GetBlockSize();
u32 blocks_remaining = cs.block_control.request.GetBlockCount();
TickCount used_ticks = 0;
TickCount ticks_remaining = m_max_slice_ticks;
if (copy_to_device)
{
do
{
blocks_remaining--;
used_ticks += TransferMemoryToDevice(channel, current_address & ADDRESS_MASK, increment, block_size);
const TickCount ticks =
TransferMemoryToDevice(channel, current_address & ADDRESS_MASK, increment, block_size);
CPU::AddPendingTicks(ticks);
ticks_remaining -= ticks;
current_address = (current_address + (increment * block_size));
} while (cs.request && blocks_remaining > 0);
} while (cs.request && blocks_remaining > 0 && ticks_remaining > 0);
}
else
{
do
{
blocks_remaining--;
used_ticks += TransferDeviceToMemory(channel, current_address & ADDRESS_MASK, increment, block_size);
const TickCount ticks =
TransferDeviceToMemory(channel, current_address & ADDRESS_MASK, increment, block_size);
CPU::AddPendingTicks(ticks);
ticks_remaining -= ticks;
current_address = (current_address + (increment * block_size));
} while (cs.request && blocks_remaining > 0);
} while (cs.request && blocks_remaining > 0 && ticks_remaining > 0);
}
cs.base_address = current_address & BASE_ADDRESS_MASK;
cs.block_control.request.block_count = blocks_remaining;
CPU::AddPendingTicks(used_ticks);
// finish transfer later if the request was cleared
if (blocks_remaining > 0)
{
if (cs.request)
{
// we got halted
if (!m_unhalt_event->IsActive())
HaltTransfer(m_halt_ticks);
return false;
}
return true;
}
}
break;

View file

@ -167,7 +167,7 @@ protected:
// The GPU internally appears to run at 2x the system clock.
ALWAYS_INLINE static constexpr TickCount GPUTicksToSystemTicks(TickCount gpu_ticks)
{
return std::max<TickCount>(gpu_ticks >> 1, 1);
return std::max<TickCount>((gpu_ticks + 1) >> 1, 1);
}
ALWAYS_INLINE static constexpr TickCount SystemTicksToGPUTicks(TickCount sysclk_ticks) { return sysclk_ticks << 1; }

View file

@ -49,7 +49,6 @@ void GPU::ExecuteCommands()
for (u32 i = 0; i < words_to_copy; i++)
m_blit_buffer.push_back(FifoPop());
m_blit_remaining_words -= words_to_copy;
AddCommandTicks(words_to_copy);
Log_DebugPrintf("VRAM write burst of %u words, %u words remaining", words_to_copy, m_blit_remaining_words);
if (m_blit_remaining_words == 0)