DMA: Batch multi-word transfers together

This commit is contained in:
Connor McLaughlin 2019-10-13 16:48:11 +10:00
parent 88ec178380
commit ec8c5d4bb6
12 changed files with 273 additions and 149 deletions

View file

@ -17,7 +17,7 @@
Log_SetChannel(Bus);
#define FIXUP_WORD_READ_OFFSET(offset) ((offset) & ~u32(3))
#define FIXUP_WORD_READ_VALUE(offset, value) ((value) >> (((offset) & u32(3)) * 8))
#define FIXUP_WORD_READ_VALUE(offset, value) ((value) >> (((offset)&u32(3)) * 8))
// Offset and value remapping for (w32) registers from nocash docs.
void FixupUnalignedWordAccessW32(u32& offset, u32& value)
@ -118,6 +118,58 @@ bool Bus::WriteWord(PhysicalMemoryAddress address, u32 value)
return DispatchAccess<MemoryAccessType::Write, MemoryAccessSize::Word>(address, value);
}
TickCount Bus::ReadWords(PhysicalMemoryAddress address, u32* words, u32 word_count)
{
if (address + (word_count * sizeof(u32)) > (RAM_BASE + RAM_SIZE))
{
// Not RAM, or RAM mirrors.
TickCount total_ticks = 0;
for (u32 i = 0; i < word_count; i++)
{
const TickCount ticks = DispatchAccess<MemoryAccessType::Read, MemoryAccessSize::Word>(address, words[i]);
if (ticks < 0)
return -1;
total_ticks += ticks;
address += sizeof(u32);
}
return total_ticks;
}
// DMA is using DRAM Hyper Page mode, allowing it to access DRAM rows at 1 clock cycle per word (effectively around 17
// clks per 16 words, due to required row address loading, probably plus some further minimal overload due to refresh
// cycles). This is making DMA much faster than CPU memory accesses (CPU DRAM access takes 1 opcode cycle plus 6
// waitstates, ie. 7 cycles in total).
std::memcpy(words, &m_ram[address], sizeof(u32) * word_count);
return static_cast<TickCount>(word_count + ((word_count + 15) / 16));
}
TickCount Bus::WriteWords(PhysicalMemoryAddress address, const u32* words, u32 word_count)
{
if (address + (word_count * sizeof(u32)) > (RAM_BASE + RAM_SIZE))
{
// Not RAM, or RAM mirrors.
TickCount total_ticks = 0;
for (u32 i = 0; i < word_count; i++)
{
u32 value = words[i];
const TickCount ticks = DispatchAccess<MemoryAccessType::Write, MemoryAccessSize::Word>(address, value);
if (ticks < 0)
return -1;
total_ticks += ticks;
address += sizeof(u32);
}
return total_ticks;
}
std::memcpy(&m_ram[address], words, sizeof(u32) * word_count);
return static_cast<TickCount>(word_count + ((word_count + 15) / 16));
}
void Bus::PatchBIOS(u32 address, u32 value, u32 mask /*= UINT32_C(0xFFFFFFFF)*/)
{
const u32 phys_address = address & UINT32_C(0x1FFFFFFF);

View file

@ -42,12 +42,20 @@ public:
template<MemoryAccessType type, MemoryAccessSize size>
TickCount DispatchAccess(PhysicalMemoryAddress address, u32& value);
// Optimized variant for burst/multi-word read/writing.
TickCount ReadWords(PhysicalMemoryAddress address, u32* words, u32 word_count);
TickCount WriteWords(PhysicalMemoryAddress address, const u32* words, u32 word_count);
void PatchBIOS(u32 address, u32 value, u32 mask = UINT32_C(0xFFFFFFFF));
void SetExpansionROM(std::vector<u8> data);
private:
enum : u32
{
RAM_BASE = 0x00000000,
RAM_SIZE = 0x200000,
RAM_MASK = RAM_SIZE - 1,
RAM_MIRROR_END = 0x800000,
EXP1_BASE = 0x1F000000,
EXP1_SIZE = 0x800000,
EXP1_MASK = EXP1_SIZE - 1,

View file

@ -348,30 +348,17 @@ void CDROM::WriteRegister(u32 offset, u8 value)
ZeroExtend32(m_status.index.GetValue()), ZeroExtend32(value));
}
u32 CDROM::DMARead()
void CDROM::DMARead(u32* words, u32 word_count)
{
if (m_data_fifo.IsEmpty())
const u32 words_in_fifo = m_data_fifo.GetSize() / 4;
if (words_in_fifo < word_count)
{
Log_ErrorPrintf("DMA read on empty data FIFO");
return UINT32_C(0xFFFFFFFF);
Log_ErrorPrintf("DMA read on empty/near-empty data FIFO");
std::memset(words + words_in_fifo, 0, sizeof(u32) * (word_count - words_in_fifo));
}
u32 data;
if (m_data_fifo.GetSize() >= sizeof(data))
{
std::memcpy(&data, m_data_fifo.GetFrontPointer(), sizeof(data));
m_data_fifo.Remove(sizeof(data));
}
else
{
Log_WarningPrintf("Unaligned DMA read on FIFO(%u)", m_data_fifo.GetSize());
data = 0;
std::memcpy(&data, m_data_fifo.GetFrontPointer(), m_data_fifo.GetSize());
m_data_fifo.Clear();
}
// Log_DebugPrintf("DMA Read -> 0x%08X (%u remaining)", data, m_data_fifo.GetSize());
return data;
const u32 bytes_to_read = std::min<u32>(word_count * sizeof(u32), m_data_fifo.GetSize());
m_data_fifo.PopRange(reinterpret_cast<u8*>(words), bytes_to_read);
}
void CDROM::SetInterrupt(Interrupt interrupt)

View file

@ -29,7 +29,7 @@ public:
// I/O
u8 ReadRegister(u32 offset);
void WriteRegister(u32 offset, u8 value);
u32 DMARead();
void DMARead(u32* words, u32 word_count);
void Execute(TickCount ticks);

View file

@ -24,6 +24,7 @@ bool DMA::Initialize(System* system, Bus* bus, InterruptController* interrupt_co
m_cdrom = cdrom;
m_spu = spu;
m_mdec = mdec;
m_transfer_buffer.resize(32);
return true;
}
@ -223,7 +224,7 @@ void DMA::TransferChannel(Channel channel)
// start/trigger bit is cleared on beginning of transfer
cs.channel_control.start_trigger = false;
PhysicalMemoryAddress current_address = cs.base_address & ~UINT32_C(3);
PhysicalMemoryAddress current_address = (cs.base_address & ~UINT32_C(3)) & ADDRESS_MASK;
const PhysicalMemoryAddress increment = cs.channel_control.address_step_reverse ? static_cast<u32>(-4) : UINT32_C(4);
switch (cs.channel_control.sync_mode)
{
@ -233,32 +234,9 @@ void DMA::TransferChannel(Channel channel)
Log_DebugPrintf("DMA%u: Copying %u words %s 0x%08X", static_cast<u32>(channel), word_count,
copy_to_device ? "from" : "to", current_address);
if (copy_to_device)
{
u32 words_remaining = word_count;
do
{
words_remaining--;
u32 value = 0;
m_bus->DispatchAccess<MemoryAccessType::Read, MemoryAccessSize::Word>(current_address, value);
DMAWrite(channel, value, current_address, words_remaining);
current_address = (current_address + increment) & ADDRESS_MASK;
} while (words_remaining > 0);
}
TransferMemoryToDevice(channel, current_address, increment, word_count);
else
{
u32 words_remaining = word_count;
do
{
words_remaining--;
u32 value = DMARead(channel, current_address, words_remaining);
m_bus->DispatchAccess<MemoryAccessType::Write, MemoryAccessSize::Word>(current_address, value);
current_address = (current_address + increment) & ADDRESS_MASK;
} while (words_remaining > 0);
}
TransferDeviceToMemory(channel, current_address, increment, word_count);
}
break;
@ -285,18 +263,7 @@ void DMA::TransferChannel(Channel channel)
current_address += sizeof(header);
if (word_count > 0)
{
u32 words_remaining = word_count;
do
{
words_remaining--;
u32 memory_value = 0;
m_bus->DispatchAccess<MemoryAccessType::Read, MemoryAccessSize::Word>(current_address, memory_value);
DMAWrite(channel, memory_value, current_address, words_remaining);
current_address = (current_address + UINT32_C(4)) & ADDRESS_MASK;
} while (words_remaining > 0);
}
TransferMemoryToDevice(channel, current_address, 4, word_count);
if (next_address & UINT32_C(0x800000))
break;
@ -313,6 +280,7 @@ void DMA::TransferChannel(Channel channel)
cs.block_control.request.GetBlockCount(), cs.block_control.request.GetBlockSize(),
copy_to_device ? "from" : "to", current_address);
const u32 block_size = cs.block_control.request.GetBlockSize();
u32 blocks_remaining = cs.block_control.request.block_count;
if (copy_to_device)
@ -320,18 +288,8 @@ void DMA::TransferChannel(Channel channel)
do
{
blocks_remaining--;
u32 words_remaining = cs.block_control.request.block_size;
do
{
words_remaining--;
u32 value = 0;
m_bus->DispatchAccess<MemoryAccessType::Read, MemoryAccessSize::Word>(current_address, value);
DMAWrite(channel, value, current_address, words_remaining);
current_address = (current_address + increment) & ADDRESS_MASK;
} while (words_remaining > 0);
TransferMemoryToDevice(channel, current_address, increment, block_size);
current_address = (current_address + (increment * block_size)) & ADDRESS_MASK;
} while (cs.request && blocks_remaining > 0);
}
else
@ -339,17 +297,8 @@ void DMA::TransferChannel(Channel channel)
do
{
blocks_remaining--;
u32 words_remaining = cs.block_control.request.block_size;
do
{
words_remaining--;
u32 value = DMARead(channel, current_address, words_remaining);
m_bus->DispatchAccess<MemoryAccessType::Write, MemoryAccessSize::Word>(current_address, value);
current_address = (current_address + increment) & ADDRESS_MASK;
} while (words_remaining > 0);
TransferDeviceToMemory(channel, current_address, increment, block_size);
current_address = (current_address + (increment * block_size)) & ADDRESS_MASK;
} while (cs.request && blocks_remaining > 0);
}
@ -382,56 +331,122 @@ void DMA::TransferChannel(Channel channel)
}
}
u32 DMA::DMARead(Channel channel, PhysicalMemoryAddress dst_address, u32 remaining_words)
void DMA::TransferMemoryToDevice(Channel channel, u32 address, u32 increment, u32 word_count)
{
// Read from memory. Wrap-around?
if (m_transfer_buffer.size() < word_count)
m_transfer_buffer.resize(word_count);
if (increment > 0 && ((address + (increment * word_count)) & ADDRESS_MASK) > address)
{
m_bus->ReadWords(address, m_transfer_buffer.data(), word_count);
}
else
{
for (u32 i = 0; i < word_count; i++)
{
m_bus->DispatchAccess<MemoryAccessType::Read, MemoryAccessSize::Word>(address, m_transfer_buffer[i]);
address = (address + increment) & ADDRESS_MASK;
}
}
switch (channel)
{
case Channel::GPU:
m_gpu->DMAWrite(m_transfer_buffer.data(), word_count);
break;
case Channel::SPU:
m_spu->DMAWrite(m_transfer_buffer.data(), word_count);
break;
case Channel::MDECin:
m_mdec->DMAWrite(m_transfer_buffer.data(), word_count);
break;
case Channel::CDROM:
case Channel::MDECout:
case Channel::PIO:
default:
Panic("Unhandled DMA channel for device write");
break;
}
}
void DMA::TransferDeviceToMemory(Channel channel, u32 address, u32 increment, u32 word_count)
{
if (m_transfer_buffer.size() < word_count)
m_transfer_buffer.resize(word_count);
// Read from device.
switch (channel)
{
case Channel::OTC:
{
// clear ordering table
return (remaining_words == 0) ? UINT32_C(0xFFFFFF) : ((dst_address - UINT32_C(4)) & ADDRESS_MASK);
// this always goes in reverse, so we can generate values in reverse order and write it forwards
if (((address - (4 * word_count)) & ADDRESS_MASK) < address)
{
const u32 end_address = (address - (4 * (word_count - 1))) & ADDRESS_MASK;
case Channel::GPU:
return m_gpu->DMARead();
u32 value = end_address;
m_transfer_buffer[0] = UINT32_C(0xFFFFFF);
for (u32 i = 1; i < word_count; i++)
{
m_transfer_buffer[i] = value;
value = (value + 4) & ADDRESS_MASK;
}
case Channel::CDROM:
return m_cdrom->DMARead();
m_bus->WriteWords(end_address, m_transfer_buffer.data(), word_count);
}
else
{
for (u32 i = 0; i < word_count; i++)
{
u32 value = (i == word_count - 1) ? UINT32_C(0xFFFFFFF) : ((address - 4) & ADDRESS_MASK);
m_bus->DispatchAccess<MemoryAccessType::Write, MemoryAccessSize::Word>(address, value);
address = (address - 4) & ADDRESS_MASK;
}
}
case Channel::SPU:
return m_spu->DMARead();
case Channel::MDECout:
return m_mdec->DMARead();
case Channel::MDECin:
case Channel::PIO:
default:
Panic("Unhandled DMA channel read");
return UINT32_C(0xFFFFFFFF);
}
}
void DMA::DMAWrite(Channel channel, u32 value, PhysicalMemoryAddress src_address, u32 remaining_words)
{
switch (channel)
{
case Channel::GPU:
m_gpu->DMAWrite(value);
return;
}
break;
case Channel::SPU:
m_spu->DMAWrite(value);
case Channel::GPU:
m_gpu->DMARead(m_transfer_buffer.data(), word_count);
break;
case Channel::MDECin:
m_mdec->DMAWrite(value);
case Channel::CDROM:
m_cdrom->DMARead(m_transfer_buffer.data(), word_count);
break;
case Channel::SPU:
m_spu->DMARead(m_transfer_buffer.data(), word_count);
break;
case Channel::MDECout:
case Channel::CDROM:
m_mdec->DMARead(m_transfer_buffer.data(), word_count);
break;
case Channel::MDECin:
case Channel::PIO:
case Channel::OTC:
default:
Panic("Unhandled DMA channel write");
Panic("Unhandled DMA channel for device read");
std::fill_n(m_transfer_buffer.begin(), word_count, UINT32_C(0xFFFFFFFF));
break;
}
if (increment > 0 && ((address + (increment * word_count)) & ADDRESS_MASK) > address)
{
m_bus->WriteWords(address, m_transfer_buffer.data(), word_count);
}
else
{
for (u32 i = 0; i < word_count; i++)
{
m_bus->DispatchAccess<MemoryAccessType::Write, MemoryAccessSize::Word>(address, m_transfer_buffer[i]);
address = (address + increment) & ADDRESS_MASK;
}
}
}

View file

@ -65,10 +65,10 @@ private:
void TransferChannel(Channel channel);
// from device -> memory
u32 DMARead(Channel channel, PhysicalMemoryAddress dst_address, u32 remaining_words);
void TransferDeviceToMemory(Channel channel, u32 address, u32 increment, u32 word_count);
// from memory -> device
void DMAWrite(Channel channel, u32 value, PhysicalMemoryAddress src_address, u32 remaining_words);
void TransferMemoryToDevice(Channel channel, u32 address, u32 increment, u32 word_count);
System* m_system = nullptr;
Bus* m_bus = nullptr;
@ -81,6 +81,8 @@ private:
TickCount m_transfer_ticks = 0;
bool m_transfer_in_progress = false;
std::vector<u32> m_transfer_buffer;
struct ChannelState
{
u32 base_address;

View file

@ -217,29 +217,56 @@ void GPU::WriteRegister(u32 offset, u32 value)
}
}
u32 GPU::DMARead()
void GPU::DMARead(u32* words, u32 word_count)
{
if (m_GPUSTAT.dma_direction != DMADirection::GPUREADtoCPU)
{
Log_ErrorPrintf("Invalid DMA direction from GPU DMA read");
return UINT32_C(0xFFFFFFFF);
std::fill_n(words, word_count, UINT32_C(0xFFFFFFFF));
return;
}
return ReadGPUREAD();
const u32 words_to_copy = std::min(word_count, static_cast<u32>(m_GPUREAD_buffer.size()));
if (!m_GPUREAD_buffer.empty())
{
auto it = m_GPUREAD_buffer.begin();
for (u32 i = 0; i < word_count; i++)
words[i] = *(it++);
m_GPUREAD_buffer.erase(m_GPUREAD_buffer.begin(), it);
}
if (words_to_copy < word_count)
{
Log_WarningPrintf("Partially-empty GPUREAD buffer on GPU DMA read");
std::fill_n(words + words_to_copy, word_count - words_to_copy, u32(0));
}
UpdateGPUSTAT();
}
void GPU::DMAWrite(u32 value)
void GPU::DMAWrite(const u32* words, u32 word_count)
{
switch (m_GPUSTAT.dma_direction)
{
case DMADirection::CPUtoGP0:
WriteGP0(value);
break;
{
m_GP0_command.reserve(m_GP0_command.size() + word_count);
for (u32 i = 0; i < word_count; i++)
{
m_GP0_command.push_back(*(words++));
HandleGP0Command();
}
UpdateGPUSTAT();
}
break;
default:
Log_ErrorPrintf("Unhandled GPU DMA write mode %u for value %08X",
static_cast<u32>(m_GPUSTAT.dma_direction.GetValue()), value);
break;
{
Log_ErrorPrintf("Unhandled GPU DMA write mode %u for %u words",
static_cast<u32>(m_GPUSTAT.dma_direction.GetValue()), word_count);
}
break;
}
}
@ -369,10 +396,14 @@ void GPU::WriteGP0(u32 value)
{
m_GP0_command.push_back(value);
Assert(m_GP0_command.size() <= 1048576);
HandleGP0Command();
UpdateGPUSTAT();
}
void GPU::HandleGP0Command()
{
const u8 command = Truncate8(m_GP0_command[0] >> 24);
const u32 param = m_GP0_command[0] & UINT32_C(0x00FFFFFF);
UpdateGPUSTAT();
if (command >= 0x20 && command <= 0x7F)
{
@ -432,7 +463,7 @@ void GPU::WriteGP0(u32 value)
case 0xE2: // set texture window
{
m_render_state.SetTextureWindow(value);
m_render_state.SetTextureWindow(param);
Log_DebugPrintf("Set texture window %02X %02X %02X %02X", m_render_state.texture_window_mask_x,
m_render_state.texture_window_mask_y, m_render_state.texture_window_offset_x,
m_render_state.texture_window_offset_y);
@ -504,7 +535,6 @@ void GPU::WriteGP0(u32 value)
}
m_GP0_command.clear();
UpdateGPUSTAT();
}
void GPU::WriteGP1(u32 value)

View file

@ -52,8 +52,8 @@ public:
void WriteRegister(u32 offset, u32 value);
// DMA access
u32 DMARead();
void DMAWrite(u32 value);
void DMARead(u32* words, u32 word_count);
void DMAWrite(const u32* words, u32 word_count);
// gpu_hw_opengl.cpp
static std::unique_ptr<GPU> CreateHardwareOpenGLRenderer();
@ -185,6 +185,7 @@ protected:
void HandleGetGPUInfoCommand(u32 value);
// Rendering commands, returns false if not enough data is provided
void HandleGP0Command();
bool HandleRenderCommand();
bool HandleFillRectangleCommand();
bool HandleCopyRectangleCPUToVRAMCommand();

View file

@ -97,14 +97,17 @@ void MDEC::WriteRegister(u32 offset, u32 value)
}
}
u32 MDEC::DMARead()
void MDEC::DMARead(u32* words, u32 word_count)
{
return ReadDataRegister();
// TODO: Make faster
for (u32 i= 0; i < word_count; i++)
words[i] = ReadDataRegister();
}
void MDEC::DMAWrite(u32 value)
void MDEC::DMAWrite(const u32* words, u32 word_count)
{
WriteCommandRegister(value);
for (u32 i = 0; i < word_count; i++)
WriteCommandRegister(words[i]);
}
void MDEC::SoftReset()

View file

@ -23,8 +23,8 @@ public:
u32 ReadRegister(u32 offset);
void WriteRegister(u32 offset, u32 value);
u32 DMARead();
void DMAWrite(u32 value);
void DMARead(u32* words, u32 word_count);
void DMAWrite(const u32* words, u32 word_count);
void DrawDebugMenu();
void DrawDebugWindow();

View file

@ -412,18 +412,44 @@ void SPU::WriteVoiceRegister(u32 offset, u16 value)
}
}
u32 SPU::DMARead()
void SPU::DMARead(u32* words, u32 word_count)
{
const u16 lsb = RAMTransferRead();
const u16 msb = RAMTransferRead();
return ZeroExtend32(lsb) | (ZeroExtend32(msb) << 16);
// test for wrap-around
if ((m_transfer_address & ~RAM_MASK) != ((m_transfer_address + (word_count * sizeof(u32))) & ~RAM_MASK))
{
// this could still be optimized to copy in two parts - end/start, but is unlikely.
for (u32 i = 0; i < word_count; i++)
{
const u16 lsb = RAMTransferRead();
const u16 msb = RAMTransferRead();
words[i] = ZeroExtend32(lsb) | (ZeroExtend32(msb) << 16);
}
}
else
{
std::memcpy(words, &m_ram[m_transfer_address], sizeof(u32) * word_count);
m_transfer_address = (m_transfer_address + (sizeof(u32) * word_count)) & RAM_MASK;
}
}
void SPU::DMAWrite(u32 value)
void SPU::DMAWrite(const u32* words, u32 word_count)
{
// two 16-bit writes to prevent out-of-bounds
RAMTransferWrite(Truncate16(value));
RAMTransferWrite(Truncate16(value >> 16));
// test for wrap-around
if ((m_transfer_address & ~RAM_MASK) != ((m_transfer_address + (word_count * sizeof(u32))) & ~RAM_MASK))
{
// this could still be optimized to copy in two parts - end/start, but is unlikely.
for (u32 i = 0; i < word_count; i++)
{
const u32 value = words[i];
RAMTransferWrite(Truncate16(value));
RAMTransferWrite(Truncate16(value >> 16));
}
}
else
{
std::memcpy(&m_ram[m_transfer_address], words, sizeof(u32) * word_count);
m_transfer_address = (m_transfer_address + (sizeof(u32) * word_count)) & RAM_MASK;
}
}
void SPU::UpdateDMARequest()

View file

@ -25,8 +25,8 @@ public:
u16 ReadRegister(u32 offset);
void WriteRegister(u32 offset, u16 value);
u32 DMARead();
void DMAWrite(u32 value);
void DMARead(u32* words, u32 word_count);
void DMAWrite(const u32* words, u32 word_count);
void Execute(TickCount ticks);