#include "common/assert.h" #include "common/log.h" #include "common/string_util.h" #include "gpu.h" #include "interrupt_controller.h" #include "system.h" Log_SetChannel(GPU); #define CHECK_COMMAND_SIZE(num_words) \ if (m_fifo.GetSize() < num_words) \ { \ m_command_total_words = num_words; \ return false; \ } static u32 s_cpu_to_vram_dump_id = 1; static u32 s_vram_to_cpu_dump_id = 1; static constexpr u32 ReplaceZero(u32 value, u32 value_for_zero) { return value == 0 ? value_for_zero : value; } void GPU::ExecuteCommands() { m_syncing = true; for (;;) { if (m_command_ticks <= m_max_run_ahead && !m_fifo.IsEmpty()) { switch (m_blitter_state) { case BlitterState::Idle: { const u32 command = m_fifo.Peek(0) >> 24; if ((this->*s_GP0_command_handler_table[command])()) continue; else break; } case BlitterState::WritingVRAM: { DebugAssert(m_blit_remaining_words > 0); const u32 words_to_copy = std::min(m_blit_remaining_words, m_fifo.GetSize()); const size_t old_size = m_blit_buffer.size(); m_blit_buffer.resize(m_blit_buffer.size() + words_to_copy); m_fifo.PopRange(&m_blit_buffer[old_size], words_to_copy); m_blit_remaining_words -= words_to_copy; AddCommandTicks(words_to_copy); Log_DebugPrintf("VRAM write burst of %u words, %u words remaining", words_to_copy, m_blit_remaining_words); if (m_blit_remaining_words == 0) FinishVRAMWrite(); continue; } case BlitterState::ReadingVRAM: { Panic("shouldn't be here"); } break; case BlitterState::DrawingPolyLine: { const u32 words_per_vertex = m_render_command.shading_enable ? 2 : 1; u32 terminator_index = m_render_command.shading_enable ? ((static_cast(m_blit_buffer.size()) & 1u) ^ 1u) : 0u; for (; terminator_index < m_fifo.GetSize(); terminator_index += words_per_vertex) { // polyline must have at least two vertices, and the terminator is (word & 0xf000f000) == 0x50005000. // terminator is on the first word for the vertex if ((m_fifo.Peek(terminator_index) & UINT32_C(0xF000F000)) == UINT32_C(0x50005000)) break; } const bool found_terminator = (terminator_index < m_fifo.GetSize()); const u32 words_to_copy = std::min(terminator_index, m_fifo.GetSize()); if (words_to_copy > 0) { const size_t old_size = m_blit_buffer.size(); m_blit_buffer.resize(m_blit_buffer.size() + words_to_copy); m_fifo.PopRange(&m_blit_buffer[old_size], words_to_copy); } Log_DebugPrintf("Added %u words to polyline", words_to_copy); if (found_terminator) { // drop terminator m_fifo.RemoveOne(); Log_DebugPrintf("Drawing poly-line with %u vertices", GetPolyLineVertexCount()); DispatchRenderCommand(); m_blit_buffer.clear(); EndCommand(); continue; } } break; } } m_fifo_pushed = false; UpdateDMARequest(); if (!m_fifo_pushed) break; } m_syncing = false; } void GPU::EndCommand() { m_blitter_state = BlitterState::Idle; m_command_total_words = 0; } GPU::GP0CommandHandlerTable GPU::GenerateGP0CommandHandlerTable() { GP0CommandHandlerTable table = {}; for (u32 i = 0; i < static_cast(table.size()); i++) table[i] = &GPU::HandleUnknownGP0Command; table[0x00] = &GPU::HandleNOPCommand; table[0x01] = &GPU::HandleClearCacheCommand; table[0x02] = &GPU::HandleFillRectangleCommand; table[0x03] = &GPU::HandleNOPCommand; for (u32 i = 0x04; i <= 0x1E; i++) table[i] = &GPU::HandleNOPCommand; table[0x1F] = &GPU::HandleInterruptRequestCommand; for (u32 i = 0x20; i <= 0x7F; i++) { const RenderCommand rc{i << 24}; switch (rc.primitive) { case Primitive::Polygon: table[i] = &GPU::HandleRenderPolygonCommand; break; case Primitive::Line: table[i] = rc.polyline ? &GPU::HandleRenderPolyLineCommand : &GPU::HandleRenderLineCommand; break; case Primitive::Rectangle: table[i] = &GPU::HandleRenderRectangleCommand; break; default: table[i] = &GPU::HandleUnknownGP0Command; break; } } table[0xE0] = &GPU::HandleNOPCommand; table[0xE1] = &GPU::HandleSetDrawModeCommand; table[0xE2] = &GPU::HandleSetTextureWindowCommand; table[0xE3] = &GPU::HandleSetDrawingAreaTopLeftCommand; table[0xE4] = &GPU::HandleSetDrawingAreaBottomRightCommand; table[0xE5] = &GPU::HandleSetDrawingOffsetCommand; table[0xE6] = &GPU::HandleSetMaskBitCommand; for (u32 i = 0xE7; i <= 0xEF; i++) table[i] = &GPU::HandleNOPCommand; for (u32 i = 0x80; i <= 0x9F; i++) table[i] = &GPU::HandleCopyRectangleVRAMToVRAMCommand; for (u32 i = 0xA0; i <= 0xBF; i++) table[i] = &GPU::HandleCopyRectangleCPUToVRAMCommand; for (u32 i = 0xC0; i <= 0xDF; i++) table[i] = &GPU::HandleCopyRectangleVRAMToCPUCommand; return table; } bool GPU::HandleUnknownGP0Command() { const u32 command = m_fifo.Pop() >> 24; Log_ErrorPrintf("Unimplemented GP0 command 0x%02X", command); EndCommand(); return true; } bool GPU::HandleNOPCommand() { m_fifo.RemoveOne(); EndCommand(); return true; } bool GPU::HandleClearCacheCommand() { Log_DebugPrintf("GP0 clear cache"); m_fifo.RemoveOne(); AddCommandTicks(1); EndCommand(); return true; } bool GPU::HandleInterruptRequestCommand() { Log_WarningPrintf("GP0 interrupt request"); if (!m_GPUSTAT.interrupt_request) { m_GPUSTAT.interrupt_request = true; m_interrupt_controller->InterruptRequest(InterruptController::IRQ::GPU); } m_fifo.RemoveOne(); AddCommandTicks(1); EndCommand(); return true; } bool GPU::HandleSetDrawModeCommand() { const u32 param = m_fifo.Pop() & 0x00FFFFFFu; Log_DebugPrintf("Set draw mode %08X", param); SetDrawMode(Truncate16(param)); AddCommandTicks(1); EndCommand(); return true; } bool GPU::HandleSetTextureWindowCommand() { const u32 param = m_fifo.Pop() & 0x00FFFFFFu; SetTextureWindow(param); Log_DebugPrintf("Set texture window %02X %02X %02X %02X", m_draw_mode.texture_window_mask_x, m_draw_mode.texture_window_mask_y, m_draw_mode.texture_window_offset_x, m_draw_mode.texture_window_offset_y); AddCommandTicks(1); EndCommand(); return true; } bool GPU::HandleSetDrawingAreaTopLeftCommand() { const u32 param = m_fifo.Pop() & 0x00FFFFFFu; const u32 left = param & 0x3FF; const u32 top = (param >> 10) & 0x1FF; Log_DebugPrintf("Set drawing area top-left: (%u, %u)", left, top); if (m_drawing_area.left != left || m_drawing_area.top != top) { FlushRender(); m_drawing_area.left = left; m_drawing_area.top = top; m_drawing_area_changed = true; } AddCommandTicks(1); EndCommand(); return true; } bool GPU::HandleSetDrawingAreaBottomRightCommand() { const u32 param = m_fifo.Pop() & 0x00FFFFFFu; const u32 right = param & 0x3FFu; const u32 bottom = (param >> 10) & 0x1FFu; Log_DebugPrintf("Set drawing area bottom-right: (%u, %u)", m_drawing_area.right, m_drawing_area.bottom); if (m_drawing_area.right != right || m_drawing_area.bottom != bottom) { FlushRender(); m_drawing_area.right = right; m_drawing_area.bottom = bottom; m_drawing_area_changed = true; } AddCommandTicks(1); EndCommand(); return true; } bool GPU::HandleSetDrawingOffsetCommand() { const u32 param = m_fifo.Pop() & 0x00FFFFFFu; const s32 x = SignExtendN<11, s32>(param & 0x7FFu); const s32 y = SignExtendN<11, s32>((param >> 11) & 0x7FFu); Log_DebugPrintf("Set drawing offset (%d, %d)", m_drawing_offset.x, m_drawing_offset.y); if (m_drawing_offset.x != x || m_drawing_offset.y != y) { FlushRender(); m_drawing_offset.x = x; m_drawing_offset.y = y; } AddCommandTicks(1); EndCommand(); return true; } bool GPU::HandleSetMaskBitCommand() { const u32 param = m_fifo.Pop() & 0x00FFFFFFu; constexpr u32 gpustat_mask = (1 << 11) | (1 << 12); const u32 gpustat_bits = (param & 0x03) << 11; if ((m_GPUSTAT.bits & gpustat_mask) != gpustat_bits) { FlushRender(); m_GPUSTAT.bits = (m_GPUSTAT.bits & ~gpustat_mask) | gpustat_bits; } Log_DebugPrintf("Set mask bit %u %u", BoolToUInt32(m_GPUSTAT.set_mask_while_drawing), BoolToUInt32(m_GPUSTAT.check_mask_before_draw)); AddCommandTicks(1); EndCommand(); return true; } bool GPU::HandleRenderPolygonCommand() { const RenderCommand rc{m_fifo.Peek(0)}; // shaded vertices use the colour from the first word for the first vertex const u32 words_per_vertex = 1 + BoolToUInt32(rc.texture_enable) + BoolToUInt32(rc.shading_enable); const u32 num_vertices = rc.quad_polygon ? 4 : 3; const u32 total_words = words_per_vertex * num_vertices + BoolToUInt32(!rc.shading_enable); CHECK_COMMAND_SIZE(total_words); if (IsInterlacedRenderingEnabled() && IsRasterScanlinePending()) Synchronize(); // setup time static constexpr u16 s_setup_time[2][2][2] = {{{46, 226}, {334, 496}}, {{82, 262}, {370, 532}}}; const TickCount setup_ticks = static_cast(ZeroExtend32( s_setup_time[BoolToUInt8(rc.quad_polygon)][BoolToUInt8(rc.shading_enable)][BoolToUInt8(rc.texture_enable)])); AddCommandTicks(setup_ticks); Log_TracePrintf("Render %s %s %s %s polygon (%u verts, %u words per vert), %d setup ticks", rc.quad_polygon ? "four-point" : "three-point", rc.transparency_enable ? "semi-transparent" : "opaque", rc.texture_enable ? "textured" : "non-textured", rc.shading_enable ? "shaded" : "monochrome", ZeroExtend32(num_vertices), ZeroExtend32(words_per_vertex), setup_ticks); // set draw state up if (rc.texture_enable) { const u16 texpage_attribute = Truncate16((rc.shading_enable ? m_fifo.Peek(5) : m_fifo.Peek(4)) >> 16); SetDrawMode((texpage_attribute & DrawMode::Reg::POLYGON_TEXPAGE_MASK) | (m_draw_mode.mode_reg.bits & ~DrawMode::Reg::POLYGON_TEXPAGE_MASK)); SetTexturePalette(Truncate16(m_fifo.Peek(2) >> 16)); } m_stats.num_vertices += num_vertices; m_stats.num_polygons++; m_render_command.bits = rc.bits; m_fifo.RemoveOne(); DispatchRenderCommand(); EndCommand(); return true; } bool GPU::HandleRenderRectangleCommand() { const RenderCommand rc{m_fifo.Peek(0)}; const u32 total_words = 2 + BoolToUInt32(rc.texture_enable) + BoolToUInt32(rc.rectangle_size == DrawRectangleSize::Variable); CHECK_COMMAND_SIZE(total_words); if (IsInterlacedRenderingEnabled() && IsRasterScanlinePending()) Synchronize(); if (rc.texture_enable) SetTexturePalette(Truncate16(m_fifo.Peek(2) >> 16)); const TickCount setup_ticks = 16; AddCommandTicks(setup_ticks); Log_TracePrintf("Render %s %s %s rectangle (%u words), %d setup ticks", rc.transparency_enable ? "semi-transparent" : "opaque", rc.texture_enable ? "textured" : "non-textured", rc.shading_enable ? "shaded" : "monochrome", total_words, setup_ticks); m_stats.num_vertices++; m_stats.num_polygons++; m_render_command.bits = rc.bits; m_fifo.RemoveOne(); DispatchRenderCommand(); EndCommand(); return true; } bool GPU::HandleRenderLineCommand() { const RenderCommand rc{m_fifo.Peek(0)}; const u32 total_words = rc.shading_enable ? 4 : 3; CHECK_COMMAND_SIZE(total_words); if (IsInterlacedRenderingEnabled() && IsRasterScanlinePending()) Synchronize(); Log_TracePrintf("Render %s %s line (%u total words)", rc.transparency_enable ? "semi-transparent" : "opaque", rc.shading_enable ? "shaded" : "monochrome", total_words); m_stats.num_vertices += 2; m_stats.num_polygons++; m_render_command.bits = rc.bits; m_fifo.RemoveOne(); DispatchRenderCommand(); EndCommand(); return true; } bool GPU::HandleRenderPolyLineCommand() { // always read the first two vertices, we test for the terminator after that const RenderCommand rc{m_fifo.Peek(0)}; const u32 min_words = rc.shading_enable ? 3 : 4; CHECK_COMMAND_SIZE(min_words); if (IsInterlacedRenderingEnabled() && IsRasterScanlinePending()) Synchronize(); const TickCount setup_ticks = 16; AddCommandTicks(setup_ticks); Log_TracePrintf("Render %s %s poly-line, %d setup ticks", rc.transparency_enable ? "semi-transparent" : "opaque", rc.shading_enable ? "shaded" : "monochrome", setup_ticks); m_render_command.bits = rc.bits; m_fifo.RemoveOne(); const u32 words_to_pop = min_words - 1; m_blit_buffer.resize(words_to_pop); m_fifo.PopRange(m_blit_buffer.data(), words_to_pop); // polyline goes via a different path through the blit buffer m_blitter_state = BlitterState::DrawingPolyLine; m_command_total_words = 0; return true; } bool GPU::HandleFillRectangleCommand() { CHECK_COMMAND_SIZE(3); FlushRender(); const u32 color = m_fifo.Pop() & 0x00FFFFFF; const u32 dst_x = m_fifo.Peek() & 0x3F0; const u32 dst_y = (m_fifo.Pop() >> 16) & 0x3FF; const u32 width = ((m_fifo.Peek() & 0x3FF) + 0xF) & ~0xF; const u32 height = (m_fifo.Pop() >> 16) & 0x1FF; Log_DebugPrintf("Fill VRAM rectangle offset=(%u,%u), size=(%u,%u)", dst_x, dst_y, width, height); FillVRAM(dst_x, dst_y, width, height, color); m_stats.num_vram_fills++; AddCommandTicks(46 + ((width / 8) + 9) * height); EndCommand(); return true; } bool GPU::HandleCopyRectangleCPUToVRAMCommand() { CHECK_COMMAND_SIZE(3); m_fifo.RemoveOne(); const u32 dst_x = m_fifo.Peek() & 0x3FF; const u32 dst_y = (m_fifo.Pop() >> 16) & 0x3FF; const u32 copy_width = ReplaceZero(m_fifo.Peek() & 0x3FF, 0x400); const u32 copy_height = ReplaceZero((m_fifo.Pop() >> 16) & 0x1FF, 0x200); const u32 num_pixels = copy_width * copy_height; const u32 num_words = ((num_pixels + 1) / 2); Log_DebugPrintf("Copy rectangle from CPU to VRAM offset=(%u,%u), size=(%u,%u)", dst_x, dst_y, copy_width, copy_height); EndCommand(); m_blitter_state = BlitterState::WritingVRAM; m_blit_buffer.reserve(num_words); m_blit_remaining_words = num_words; m_vram_transfer.x = Truncate16(dst_x); m_vram_transfer.y = Truncate16(dst_y); m_vram_transfer.width = Truncate16(copy_width); m_vram_transfer.height = Truncate16(copy_height); return true; } void GPU::FinishVRAMWrite() { if (m_system->GetSettings().debugging.dump_cpu_to_vram_copies) { DumpVRAMToFile(StringUtil::StdStringFromFormat("cpu_to_vram_copy_%u.png", s_cpu_to_vram_dump_id++).c_str(), m_vram_transfer.width, m_vram_transfer.height, sizeof(u16) * m_vram_transfer.width, m_blit_buffer.data(), true); } FlushRender(); UpdateVRAM(m_vram_transfer.x, m_vram_transfer.y, m_vram_transfer.width, m_vram_transfer.height, m_blit_buffer.data()); m_blit_buffer.clear(); m_vram_transfer = {}; m_blitter_state = BlitterState::Idle; m_stats.num_vram_writes++; } bool GPU::HandleCopyRectangleVRAMToCPUCommand() { CHECK_COMMAND_SIZE(3); m_fifo.RemoveOne(); m_vram_transfer.x = Truncate16(m_fifo.Peek() & 0x3FF); m_vram_transfer.y = Truncate16((m_fifo.Pop() >> 16) & 0x3FF); m_vram_transfer.width = ((Truncate16(m_fifo.Peek()) - 1) & 0x3FF) + 1; m_vram_transfer.height = ((Truncate16(m_fifo.Pop() >> 16) - 1) & 0x1FF) + 1; Log_DebugPrintf("Copy rectangle from VRAM to CPU offset=(%u,%u), size=(%u,%u)", m_vram_transfer.x, m_vram_transfer.y, m_vram_transfer.width, m_vram_transfer.height); DebugAssert(m_vram_transfer.col == 0 && m_vram_transfer.row == 0); // all rendering should be done first... FlushRender(); // ensure VRAM shadow is up to date ReadVRAM(m_vram_transfer.x, m_vram_transfer.y, m_vram_transfer.width, m_vram_transfer.height); if (m_system->GetSettings().debugging.dump_vram_to_cpu_copies) { DumpVRAMToFile(StringUtil::StdStringFromFormat("vram_to_cpu_copy_%u.png", s_vram_to_cpu_dump_id++).c_str(), m_vram_transfer.width, m_vram_transfer.height, sizeof(u16) * VRAM_WIDTH, &m_vram_ptr[m_vram_transfer.y * VRAM_WIDTH + m_vram_transfer.x], true); } // switch to pixel-by-pixel read state m_stats.num_vram_reads++; m_blitter_state = BlitterState::ReadingVRAM; m_command_total_words = 0; return true; } bool GPU::HandleCopyRectangleVRAMToVRAMCommand() { CHECK_COMMAND_SIZE(4); m_fifo.RemoveOne(); const u32 src_x = m_fifo.Peek() & 0x3FF; const u32 src_y = (m_fifo.Pop() >> 16) & 0x3FF; const u32 dst_x = m_fifo.Peek() & 0x3FF; const u32 dst_y = (m_fifo.Pop() >> 16) & 0x3FF; const u32 width = ReplaceZero(m_fifo.Peek() & 0x3FF, 0x400); const u32 height = ReplaceZero((m_fifo.Pop() >> 16) & 0x1FF, 0x200); Log_DebugPrintf("Copy rectangle from VRAM to VRAM src=(%u,%u), dst=(%u,%u), size=(%u,%u)", src_x, src_y, dst_x, dst_y, width, height); FlushRender(); CopyVRAM(src_x, src_y, dst_x, dst_y, width, height); m_stats.num_vram_copies++; AddCommandTicks(width * height * 2); EndCommand(); return true; }