diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp index e1674003f..98f0b5aa6 100644 --- a/src/core/gpu.cpp +++ b/src/core/gpu.cpp @@ -39,7 +39,7 @@ void GPU::SoftReset() m_crtc_state.regs.display_address_start = 0; m_crtc_state.regs.horizontal_display_range = 0xC60260; m_crtc_state.regs.vertical_display_range = 0x3FC10; - m_GP0_command.clear(); + m_GP0_buffer.clear(); m_GPUREAD_buffer.clear(); m_render_state = {}; m_render_state.texture_page_changed = true; @@ -107,7 +107,7 @@ bool GPU::DoState(StateWrapper& sw) if (sw.IsReading()) UpdateSliceTicks(); - sw.Do(&m_GP0_command); + sw.Do(&m_GP0_buffer); sw.Do(&m_GPUREAD_buffer); if (sw.IsReading()) @@ -250,11 +250,45 @@ void GPU::DMAWrite(const u32* words, u32 word_count) { case DMADirection::CPUtoGP0: { - m_GP0_command.reserve(m_GP0_command.size() + word_count); - for (u32 i = 0; i < word_count; i++) + // partial command buffered? have to go through the slow path + if (!m_GP0_buffer.empty()) { - m_GP0_command.push_back(*(words++)); - HandleGP0Command(); + std::copy(words, words + word_count, std::back_inserter(m_GP0_buffer)); + const u32* command_ptr = m_GP0_buffer.data(); + u32 command_size = static_cast(m_GP0_buffer.size()); + do + { + const u32* prev_command_ptr = command_ptr; + const bool result = HandleGP0Command(command_ptr, command_size); + command_size -= command_ptr - prev_command_ptr; + if (!result) + break; + } while (command_size > 0); + + if (command_size > 0 && command_size < m_GP0_buffer.size()) + m_GP0_buffer.erase(m_GP0_buffer.begin(), m_GP0_buffer.begin() + (m_GP0_buffer.size() - command_size)); + else if (command_size == 0) + m_GP0_buffer.clear(); + } + else + { + // fast path - read directly from DMA buffer + const u32* command_ptr = words; + u32 command_size = word_count; + do + { + const u32* prev_command_ptr = command_ptr; + const bool result = HandleGP0Command(command_ptr, command_size); + command_size -= command_ptr - prev_command_ptr; + if (!result) + break; + } while (command_size > 0); + + if (command_size > 0) + { + // partial command left over + std::copy(command_ptr, command_ptr + command_size, std::back_inserter(m_GP0_buffer)); + } } UpdateGPUSTAT(); @@ -394,60 +428,50 @@ u32 GPU::ReadGPUREAD() void GPU::WriteGP0(u32 value) { - m_GP0_command.push_back(value); - Assert(m_GP0_command.size() <= 1048576); - HandleGP0Command(); + m_GP0_buffer.push_back(value); + Assert(m_GP0_buffer.size() <= 1048576); + + const u32* command_ptr = m_GP0_buffer.data(); + if (HandleGP0Command(command_ptr, static_cast(m_GP0_buffer.size()))) + { + DebugAssert((command_ptr - m_GP0_buffer.data()) == m_GP0_buffer.size()); + m_GP0_buffer.clear(); + } + UpdateGPUSTAT(); } -void GPU::HandleGP0Command() +bool GPU::HandleGP0Command(const u32*& command_ptr, u32 command_size) { - const u8 command = Truncate8(m_GP0_command[0] >> 24); - const u32 param = m_GP0_command[0] & UINT32_C(0x00FFFFFF); + const u8 command = Truncate8(command_ptr[0] >> 24); if (command >= 0x20 && command <= 0x7F) { // Draw polygon - if (!HandleRenderCommand()) - return; + return HandleRenderCommand(command_ptr, command_size); } else { + const u32 param = command_ptr[0] & UINT32_C(0x00FFFFFF); + switch (command) { case 0x00: // NOP - break; - case 0x01: // Clear cache - break; + command_ptr++; + return true; case 0x02: // Fill Rectangle - { - if (!HandleFillRectangleCommand()) - return; - } - break; + return HandleFillRectangleCommand(command_ptr, command_size); case 0xA0: // Copy Rectangle CPU->VRAM - { - if (!HandleCopyRectangleCPUToVRAMCommand()) - return; - } - break; + return HandleCopyRectangleCPUToVRAMCommand(command_ptr, command_size); case 0xC0: // Copy Rectangle VRAM->CPU - { - if (!HandleCopyRectangleVRAMToCPUCommand()) - return; - } - break; + return HandleCopyRectangleVRAMToCPUCommand(command_ptr, command_size); case 0x80: // Copy Rectangle VRAM->VRAM - { - if (!HandleCopyRectangleVRAMToVRAMCommand()) - return; - } - break; + return HandleCopyRectangleVRAMToVRAMCommand(command_ptr, command_size); case 0xE1: // Set draw mode { @@ -458,8 +482,9 @@ void GPU::HandleGP0Command() m_render_state.texture_x_flip = (param & (UINT32_C(1) << 12)) != 0; m_render_state.texture_y_flip = (param & (UINT32_C(1) << 13)) != 0; Log_DebugPrintf("Set draw mode %08X", param); + command_ptr++; + return true; } - break; case 0xE2: // set texture window { @@ -467,8 +492,9 @@ void GPU::HandleGP0Command() Log_DebugPrintf("Set texture window %02X %02X %02X %02X", m_render_state.texture_window_mask_x, m_render_state.texture_window_mask_y, m_render_state.texture_window_offset_x, m_render_state.texture_window_offset_y); + command_ptr++; + return true; } - break; case 0xE3: // Set drawing area top left { @@ -483,8 +509,9 @@ void GPU::HandleGP0Command() m_drawing_area.top = top; UpdateDrawingArea(); } + command_ptr++; + return true; } - break; case 0xE4: // Set drawing area bottom right { @@ -499,8 +526,9 @@ void GPU::HandleGP0Command() m_drawing_area.bottom = bottom; UpdateDrawingArea(); } + command_ptr++; + return true; } - break; case 0xE5: // Set drawing offset { @@ -514,8 +542,9 @@ void GPU::HandleGP0Command() m_drawing_offset.x = x; m_drawing_offset.y = y; } + command_ptr++; + return true; } - break; case 0xE6: // Mask bit setting { @@ -523,18 +552,18 @@ void GPU::HandleGP0Command() m_GPUSTAT.draw_to_masked_pixels = (param & UINT32_C(0x01)) != 0; Log_DebugPrintf("Set mask bit %u %u", BoolToUInt32(m_GPUSTAT.draw_set_mask_bit), BoolToUInt32(m_GPUSTAT.draw_to_masked_pixels)); + command_ptr++; + return true; } - break; default: { Log_ErrorPrintf("Unimplemented GP0 command 0x%02X", command); + command_ptr++; + return true; } - break; } } - - m_GP0_command.clear(); } void GPU::WriteGP1(u32 value) @@ -545,7 +574,7 @@ void GPU::WriteGP1(u32 value) { case 0x01: // Clear FIFO { - m_GP0_command.clear(); + m_GP0_buffer.clear(); Log_DebugPrintf("GP1 clear FIFO"); UpdateGPUSTAT(); } @@ -695,11 +724,9 @@ void GPU::HandleGetGPUInfoCommand(u32 value) } } -bool GPU::HandleRenderCommand() +bool GPU::HandleRenderCommand(const u32*& command_ptr, u32 command_size) { - const u8 command = Truncate8(m_GP0_command[0] >> 24); - - const RenderCommand rc{m_GP0_command[0]}; + const RenderCommand rc{command_ptr[0]}; u8 words_per_vertex; u32 num_vertices; u32 total_words; @@ -722,10 +749,9 @@ bool GPU::HandleRenderCommand() // polyline goes until we hit the termination code num_vertices = 0; bool found_terminator = false; - for (u32 pos = BoolToUInt32(!rc.shading_enable); pos < static_cast(m_GP0_command.size()); - pos += words_per_vertex) + for (u32 pos = 1 + BoolToUInt32(!rc.shading_enable); pos < command_size; pos += words_per_vertex) { - if (m_GP0_command[pos] == 0x55555555) + if (command_ptr[pos] == 0x55555555) { found_terminator = true; break; @@ -759,7 +785,7 @@ bool GPU::HandleRenderCommand() return true; } - if (m_GP0_command.size() < total_words) + if (command_size < total_words) return false; static constexpr std::array primitive_names = {{"", "polygon", "line", "rectangle"}}; @@ -770,22 +796,24 @@ bool GPU::HandleRenderCommand() primitive_names[static_cast(rc.primitive.GetValue())], ZeroExtend32(num_vertices), ZeroExtend32(words_per_vertex)); - DispatchRenderCommand(rc, num_vertices); + DispatchRenderCommand(rc, num_vertices, command_ptr); + command_ptr += total_words; return true; } -bool GPU::HandleFillRectangleCommand() +bool GPU::HandleFillRectangleCommand(const u32*& command_ptr, u32 command_size) { - if (m_GP0_command.size() < 3) + if (command_size < 3) return false; FlushRender(); - const u32 color = m_GP0_command[0] & UINT32_C(0x00FFFFFF); - const u32 dst_x = m_GP0_command[1] & UINT32_C(0xFFFF); - const u32 dst_y = m_GP0_command[1] >> 16; - const u32 width = m_GP0_command[2] & UINT32_C(0xFFFF); - const u32 height = m_GP0_command[2] >> 16; + const u32 color = command_ptr[0] & UINT32_C(0x00FFFFFF); + const u32 dst_x = command_ptr[1] & UINT32_C(0xFFFF); + const u32 dst_y = command_ptr[1] >> 16; + const u32 width = command_ptr[2] & UINT32_C(0xFFFF); + const u32 height = command_ptr[2] >> 16; + command_ptr += 3; Log_DebugPrintf("Fill VRAM rectangle offset=(%u,%u), size=(%u,%u)", dst_x, dst_y, width, height); @@ -797,20 +825,20 @@ bool GPU::HandleFillRectangleCommand() return true; } -bool GPU::HandleCopyRectangleCPUToVRAMCommand() +bool GPU::HandleCopyRectangleCPUToVRAMCommand(const u32*& command_ptr, u32 command_size) { - if (m_GP0_command.size() < 3) + if (command_size < 3) return false; - const u32 copy_width = m_GP0_command[2] & UINT32_C(0xFFFF); - const u32 copy_height = m_GP0_command[2] >> 16; + const u32 copy_width = command_ptr[2] & UINT32_C(0xFFFF); + const u32 copy_height = command_ptr[2] >> 16; const u32 num_pixels = copy_width * copy_height; const u32 num_words = 3 + ((num_pixels + 1) / 2); - if (m_GP0_command.size() < num_words) + if (command_size < num_words) return false; - const u32 dst_x = m_GP0_command[1] & UINT32_C(0xFFFF); - const u32 dst_y = m_GP0_command[1] >> 16; + const u32 dst_x = command_ptr[1] & UINT32_C(0xFFFF); + const u32 dst_y = command_ptr[1] >> 16; Log_DebugPrintf("Copy rectangle from CPU to VRAM offset=(%u,%u), size=(%u,%u)", dst_x, dst_y, copy_width, copy_height); @@ -824,25 +852,27 @@ bool GPU::HandleCopyRectangleCPUToVRAMCommand() if (m_debug_options.dump_cpu_to_vram_copies) { DumpVRAMToFile(SmallString::FromFormat("cpu_to_vram_copy_%u.png", s_cpu_to_vram_dump_id++), copy_width, copy_height, - sizeof(u16) * copy_width, &m_GP0_command[3], true); + sizeof(u16) * copy_width, &command_ptr[3], true); } FlushRender(); - UpdateVRAM(dst_x, dst_y, copy_width, copy_height, &m_GP0_command[3]); + UpdateVRAM(dst_x, dst_y, copy_width, copy_height, &command_ptr[3]); + command_ptr += num_words; return true; } -bool GPU::HandleCopyRectangleVRAMToCPUCommand() +bool GPU::HandleCopyRectangleVRAMToCPUCommand(const u32*& command_ptr, u32 command_size) { - if (m_GP0_command.size() < 3) + if (command_size < 3) return false; - const u32 width = m_GP0_command[2] & UINT32_C(0xFFFF); - const u32 height = m_GP0_command[2] >> 16; + const u32 width = command_ptr[2] & UINT32_C(0xFFFF); + const u32 height = command_ptr[2] >> 16; const u32 num_pixels = width * height; const u32 num_words = ((num_pixels + 1) / 2); - const u32 src_x = m_GP0_command[1] & UINT32_C(0xFFFF); - const u32 src_y = m_GP0_command[1] >> 16; + const u32 src_x = command_ptr[1] & UINT32_C(0xFFFF); + const u32 src_y = command_ptr[1] >> 16; + command_ptr += 3; Log_DebugPrintf("Copy rectangle from VRAM to CPU offset=(%u,%u), size=(%u,%u)", src_x, src_y, width, height); @@ -871,17 +901,18 @@ bool GPU::HandleCopyRectangleVRAMToCPUCommand() return true; } -bool GPU::HandleCopyRectangleVRAMToVRAMCommand() +bool GPU::HandleCopyRectangleVRAMToVRAMCommand(const u32*& command_ptr, u32 command_size) { - if (m_GP0_command.size() < 4) + if (command_size < 4) return false; - const u32 src_x = m_GP0_command[1] & UINT32_C(0xFFFF); - const u32 src_y = m_GP0_command[1] >> 16; - const u32 dst_x = m_GP0_command[2] & UINT32_C(0xFFFF); - const u32 dst_y = m_GP0_command[2] >> 16; - const u32 width = m_GP0_command[3] & UINT32_C(0xFFFF); - const u32 height = m_GP0_command[3] >> 16; + const u32 src_x = command_ptr[1] & UINT32_C(0xFFFF); + const u32 src_y = command_ptr[1] >> 16; + const u32 dst_x = command_ptr[2] & UINT32_C(0xFFFF); + const u32 dst_y = command_ptr[2] >> 16; + const u32 width = command_ptr[3] & UINT32_C(0xFFFF); + const u32 height = command_ptr[3] >> 16; + command_ptr += 4; Log_DebugPrintf("Copy rectangle from VRAM to VRAM src=(%u,%u), dst=(%u,%u), size=(%u,%u)", src_x, src_y, dst_x, dst_y, width, height); @@ -910,7 +941,7 @@ void GPU::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) {} void GPU::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) {} -void GPU::DispatchRenderCommand(RenderCommand rc, u32 num_vertices) {} +void GPU::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32* command_ptr) {} void GPU::FlushRender() {} diff --git a/src/core/gpu.h b/src/core/gpu.h index 0c8f99bf4..fa89e97f5 100644 --- a/src/core/gpu.h +++ b/src/core/gpu.h @@ -185,12 +185,12 @@ protected: void HandleGetGPUInfoCommand(u32 value); // Rendering commands, returns false if not enough data is provided - void HandleGP0Command(); - bool HandleRenderCommand(); - bool HandleFillRectangleCommand(); - bool HandleCopyRectangleCPUToVRAMCommand(); - bool HandleCopyRectangleVRAMToCPUCommand(); - bool HandleCopyRectangleVRAMToVRAMCommand(); + bool HandleGP0Command(const u32*& command_ptr, u32 command_size); + bool HandleRenderCommand(const u32*& command_ptr, u32 command_size); + bool HandleFillRectangleCommand(const u32*& command_ptr, u32 command_size); + bool HandleCopyRectangleCPUToVRAMCommand(const u32*& command_ptr, u32 command_size); + bool HandleCopyRectangleVRAMToCPUCommand(const u32*& command_ptr, u32 command_size); + bool HandleCopyRectangleVRAMToVRAMCommand(const u32*& command_ptr, u32 command_size); // Rendering in the backend virtual void UpdateDisplay(); @@ -199,7 +199,7 @@ protected: virtual void FillVRAM(u32 x, u32 y, u32 width, u32 height, u16 color); virtual void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data); virtual void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height); - virtual void DispatchRenderCommand(RenderCommand rc, u32 num_vertices); + virtual void DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32* command_ptr); virtual void FlushRender(); // Debugging @@ -353,7 +353,7 @@ protected: bool in_vblank; } m_crtc_state = {}; - std::vector m_GP0_command; + std::vector m_GP0_buffer; std::deque m_GPUREAD_buffer; DebugOptions m_debug_options; diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index b8fda68b4..74d37f718 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -15,7 +15,7 @@ void GPU_HW::Reset() m_batch = {}; } -void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices) +void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command_ptr) { const u32 texpage = ZeroExtend32(m_render_state.texpage_attribute) | (ZeroExtend32(m_render_state.texlut_attribute) << 16); @@ -38,16 +38,16 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices) for (u32 i = 0; i < num_vertices; i++) { HWVertex hw_vert; - hw_vert.color = (shaded && i > 0) ? (m_GP0_command[buffer_pos++] & UINT32_C(0x00FFFFFF)) : first_color; + hw_vert.color = (shaded && i > 0) ? (command_ptr[buffer_pos++] & UINT32_C(0x00FFFFFF)) : first_color; - const VertexPosition vp{m_GP0_command[buffer_pos++]}; + const VertexPosition vp{command_ptr[buffer_pos++]}; hw_vert.x = vp.x; hw_vert.y = vp.y; hw_vert.texpage = texpage; if (textured) { - hw_vert.texcoord = Truncate16(m_GP0_command[buffer_pos++]); + hw_vert.texcoord = Truncate16(command_ptr[buffer_pos++]); // auto [u, v] = HWVertex::DecodeTexcoord(hw_vert.texcoord); // u = (u & (~(m_render_state.texture_window_mask_x * 8))) | ((m_render_state.texture_window_offset_x & // m_render_state.texture_window_mask_x) * 8); v = (v & (~(m_render_state.texture_window_mask_y * 8))) | @@ -80,11 +80,11 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices) u32 buffer_pos = 1; const bool textured = rc.texture_enable; const u32 color = rc.color_for_first_vertex; - const VertexPosition vp{m_GP0_command[buffer_pos++]}; + const VertexPosition vp{command_ptr[buffer_pos++]}; const s32 pos_left = vp.x; const s32 pos_top = vp.y; const auto [tex_left, tex_top] = - HWVertex::DecodeTexcoord(rc.texture_enable ? Truncate16(m_GP0_command[buffer_pos++]) : 0); + HWVertex::DecodeTexcoord(rc.texture_enable ? Truncate16(command_ptr[buffer_pos++]) : 0); s32 rectangle_width; s32 rectangle_height; switch (rc.rectangle_size) @@ -102,8 +102,8 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices) rectangle_height = 16; break; default: - rectangle_width = static_cast(m_GP0_command[buffer_pos] & UINT32_C(0xFFFF)); - rectangle_height = static_cast(m_GP0_command[buffer_pos] >> 16); + rectangle_width = static_cast(command_ptr[buffer_pos] & UINT32_C(0xFFFF)); + rectangle_height = static_cast(command_ptr[buffer_pos] >> 16); break; } @@ -134,8 +134,8 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices) u32 buffer_pos = 1; for (u32 i = 0; i < num_vertices; i++) { - const u32 color = (shaded && i > 0) ? (m_GP0_command[buffer_pos++] & UINT32_C(0x00FFFFFF)) : first_color; - const VertexPosition vp{m_GP0_command[buffer_pos++]}; + const u32 color = (shaded && i > 0) ? (command_ptr[buffer_pos++] & UINT32_C(0x00FFFFFF)) : first_color; + const VertexPosition vp{command_ptr[buffer_pos++]}; m_batch.vertices.push_back(HWVertex{vp.x.GetValue(), vp.y.GetValue(), color}); } } @@ -480,7 +480,7 @@ GPU_HW::HWRenderBatch::Primitive GPU_HW::GetPrimitiveForCommand(RenderCommand rc void GPU_HW::InvalidateVRAMReadCache() {} -void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices) +void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32* command_ptr) { if (rc.texture_enable) { @@ -490,15 +490,15 @@ void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices) case Primitive::Polygon: { if (rc.shading_enable) - m_render_state.SetFromPolygonTexcoord(m_GP0_command[2], m_GP0_command[5]); + m_render_state.SetFromPolygonTexcoord(command_ptr[2], command_ptr[5]); else - m_render_state.SetFromPolygonTexcoord(m_GP0_command[2], m_GP0_command[4]); + m_render_state.SetFromPolygonTexcoord(command_ptr[2], command_ptr[4]); } break; case Primitive::Rectangle: { - m_render_state.SetFromRectangleTexcoord(m_GP0_command[2]); + m_render_state.SetFromRectangleTexcoord(command_ptr[2]); m_render_state.SetFromPageAttribute(Truncate16(m_GPUSTAT.bits)); } break; @@ -586,5 +586,5 @@ void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices) m_render_state.ClearTextureWindowChangedFlag(); } - LoadVertices(rc, num_vertices); + LoadVertices(rc, num_vertices, command_ptr); } diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index d4c32d21a..0afffe79d 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -83,7 +83,7 @@ protected: bool IsFlushed() const { return m_batch.vertices.empty(); } - void DispatchRenderCommand(RenderCommand rc, u32 num_vertices) override; + void DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32* command_ptr) override; void CalcScissorRect(int* left, int* top, int* right, int* bottom); @@ -107,5 +107,5 @@ private: void GenerateShaderHeader(std::stringstream& ss); - void LoadVertices(RenderCommand rc, u32 num_vertices); + void LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command_ptr); };