// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #include "gpu.h" #include "dma.h" #include "gpu_shadergen.h" #include "host.h" #include "imgui.h" #include "interrupt_controller.h" #include "settings.h" #include "system.h" #include "timers.h" #include "util/gpu_device.h" #include "util/image.h" #include "util/imgui_manager.h" #include "util/postprocessing.h" #include "util/shadergen.h" #include "util/state_wrapper.h" #include "common/align.h" #include "common/file_system.h" #include "common/heap_array.h" #include "common/log.h" #include "common/path.h" #include "common/small_string.h" #include "common/string_util.h" #include "IconsFontAwesome5.h" #include "fmt/format.h" #include #include Log_SetChannel(GPU); std::unique_ptr g_gpu; alignas(HOST_PAGE_SIZE) u16 g_vram[VRAM_SIZE / sizeof(u16)]; const GPU::GP0CommandHandlerTable GPU::s_GP0_command_handler_table = GPU::GenerateGP0CommandHandlerTable(); static bool CompressAndWriteTextureToFile(u32 width, u32 height, std::string filename, FileSystem::ManagedCFilePtr fp, u8 quality, bool clear_alpha, bool flip_y, std::vector texture_data, u32 texture_data_stride, GPUTexture::Format texture_format, bool display_osd_message, bool use_thread); static void JoinScreenshotThreads(); static std::deque s_screenshot_threads; static std::mutex s_screenshot_threads_mutex; GPU::GPU() { ResetStatistics(); } GPU::~GPU() { JoinScreenshotThreads(); DestroyDeinterlaceTextures(); g_gpu_device->RecycleTexture(std::move(m_chroma_smoothing_texture)); if (g_gpu_device) g_gpu_device->SetGPUTimingEnabled(false); } bool GPU::Initialize() { m_force_progressive_scan = g_settings.gpu_disable_interlacing; m_force_ntsc_timings = g_settings.gpu_force_ntsc_timings; m_crtc_tick_event = TimingEvents::CreateTimingEvent( "GPU CRTC Tick", 1, 1, [](void* param, TickCount ticks, TickCount ticks_late) { static_cast(param)->CRTCTickEvent(ticks); }, this, true); m_command_tick_event = TimingEvents::CreateTimingEvent( "GPU Command Tick", 1, 1, [](void* param, TickCount ticks, TickCount ticks_late) { static_cast(param)->CommandTickEvent(ticks); }, this, true); m_fifo_size = g_settings.gpu_fifo_size; m_max_run_ahead = g_settings.gpu_max_run_ahead; m_console_is_pal = System::IsPALRegion(); UpdateCRTCConfig(); if (!CompileDisplayPipelines(true, true, g_settings.gpu_24bit_chroma_smoothing)) { Host::ReportErrorAsync("Error", "Failed to compile base GPU pipelines."); return false; } g_gpu_device->SetGPUTimingEnabled(g_settings.display_show_gpu_usage); return true; } void GPU::UpdateSettings(const Settings& old_settings) { FlushRender(); m_force_progressive_scan = g_settings.gpu_disable_interlacing; m_fifo_size = g_settings.gpu_fifo_size; m_max_run_ahead = g_settings.gpu_max_run_ahead; if (m_force_ntsc_timings != g_settings.gpu_force_ntsc_timings || m_console_is_pal != System::IsPALRegion()) { m_force_ntsc_timings = g_settings.gpu_force_ntsc_timings; m_console_is_pal = System::IsPALRegion(); UpdateCRTCConfig(); } // Crop mode calls this, so recalculate the display area UpdateCRTCDisplayParameters(); if (g_settings.display_scaling != old_settings.display_scaling || g_settings.display_deinterlacing_mode != old_settings.display_deinterlacing_mode || g_settings.gpu_24bit_chroma_smoothing != old_settings.gpu_24bit_chroma_smoothing) { // Toss buffers on mode change. if (g_settings.display_deinterlacing_mode != old_settings.display_deinterlacing_mode) DestroyDeinterlaceTextures(); if (!CompileDisplayPipelines(g_settings.display_scaling != old_settings.display_scaling, g_settings.display_deinterlacing_mode != old_settings.display_deinterlacing_mode, g_settings.gpu_24bit_chroma_smoothing != old_settings.gpu_24bit_chroma_smoothing)) { Panic("Failed to compile display pipeline on settings change."); } } g_gpu_device->SetGPUTimingEnabled(g_settings.display_show_gpu_usage); } void GPU::CPUClockChanged() { UpdateCRTCConfig(); } void GPU::UpdateResolutionScale() { } std::tuple GPU::GetEffectiveDisplayResolution(bool scaled /* = true */) { return std::tie(m_crtc_state.display_vram_width, m_crtc_state.display_vram_height); } std::tuple GPU::GetFullDisplayResolution(bool scaled /* = true */) { return std::tie(m_crtc_state.display_width, m_crtc_state.display_height); } void GPU::Reset(bool clear_vram) { m_GPUSTAT.bits = 0x14802000; m_set_texture_disable_mask = false; m_GPUREAD_latch = 0; m_crtc_state.fractional_ticks = 0; m_crtc_state.fractional_dot_ticks = 0; m_crtc_state.current_tick_in_scanline = 0; m_crtc_state.current_scanline = 0; m_crtc_state.in_hblank = false; m_crtc_state.in_vblank = false; m_crtc_state.interlaced_field = 0; m_crtc_state.interlaced_display_field = 0; if (clear_vram) std::memset(g_vram, 0, sizeof(g_vram)); SoftReset(); UpdateDisplay(); } void GPU::SoftReset() { FlushRender(); if (m_blitter_state == BlitterState::WritingVRAM) FinishVRAMWrite(); m_GPUSTAT.texture_page_x_base = 0; m_GPUSTAT.texture_page_y_base = 0; m_GPUSTAT.semi_transparency_mode = GPUTransparencyMode::HalfBackgroundPlusHalfForeground; m_GPUSTAT.texture_color_mode = GPUTextureMode::Palette4Bit; m_GPUSTAT.dither_enable = false; m_GPUSTAT.draw_to_displayed_field = false; m_GPUSTAT.set_mask_while_drawing = false; m_GPUSTAT.check_mask_before_draw = false; m_GPUSTAT.reverse_flag = false; m_GPUSTAT.texture_disable = false; m_GPUSTAT.horizontal_resolution_2 = 0; m_GPUSTAT.horizontal_resolution_1 = 0; m_GPUSTAT.vertical_resolution = false; m_GPUSTAT.pal_mode = System::IsPALRegion(); m_GPUSTAT.display_area_color_depth_24 = false; m_GPUSTAT.vertical_interlace = false; m_GPUSTAT.display_disable = true; m_GPUSTAT.dma_direction = DMADirection::Off; m_drawing_area.Set(0, 0, 0, 0); m_drawing_area_changed = true; m_drawing_offset = {}; std::memset(&m_crtc_state.regs, 0, sizeof(m_crtc_state.regs)); m_crtc_state.regs.horizontal_display_range = 0xC60260; m_crtc_state.regs.vertical_display_range = 0x3FC10; m_blitter_state = BlitterState::Idle; m_pending_command_ticks = 0; m_command_total_words = 0; m_vram_transfer = {}; m_fifo.Clear(); m_blit_buffer.clear(); m_blit_remaining_words = 0; m_draw_mode.texture_window_value = 0xFFFFFFFFu; SetDrawMode(0); SetTexturePalette(0); SetTextureWindow(0); UpdateDMARequest(); UpdateCRTCConfig(); UpdateCRTCTickEvent(); UpdateCommandTickEvent(); UpdateGPUIdle(); } bool GPU::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display) { FlushRender(); if (sw.IsReading()) { // perform a reset to discard all pending draws/fb state Reset(host_texture == nullptr); } sw.Do(&m_GPUSTAT.bits); sw.Do(&m_draw_mode.mode_reg.bits); sw.Do(&m_draw_mode.palette_reg.bits); sw.Do(&m_draw_mode.texture_window_value); if (sw.GetVersion() < 62) { // texture_page_x, texture_page_y, texture_palette_x, texture_palette_y DebugAssert(sw.IsReading()); sw.SkipBytes(sizeof(u32) * 4); } sw.Do(&m_draw_mode.texture_window.and_x); sw.Do(&m_draw_mode.texture_window.and_y); sw.Do(&m_draw_mode.texture_window.or_x); sw.Do(&m_draw_mode.texture_window.or_y); sw.Do(&m_draw_mode.texture_x_flip); sw.Do(&m_draw_mode.texture_y_flip); sw.Do(&m_drawing_area.left); sw.Do(&m_drawing_area.top); sw.Do(&m_drawing_area.right); sw.Do(&m_drawing_area.bottom); sw.Do(&m_drawing_offset.x); sw.Do(&m_drawing_offset.y); sw.Do(&m_drawing_offset.x); sw.Do(&m_console_is_pal); sw.Do(&m_set_texture_disable_mask); sw.Do(&m_crtc_state.regs.display_address_start); sw.Do(&m_crtc_state.regs.horizontal_display_range); sw.Do(&m_crtc_state.regs.vertical_display_range); sw.Do(&m_crtc_state.dot_clock_divider); sw.Do(&m_crtc_state.display_width); sw.Do(&m_crtc_state.display_height); sw.Do(&m_crtc_state.display_origin_left); sw.Do(&m_crtc_state.display_origin_top); sw.Do(&m_crtc_state.display_vram_left); sw.Do(&m_crtc_state.display_vram_top); sw.Do(&m_crtc_state.display_vram_width); sw.Do(&m_crtc_state.display_vram_height); sw.Do(&m_crtc_state.horizontal_total); sw.Do(&m_crtc_state.horizontal_visible_start); sw.Do(&m_crtc_state.horizontal_visible_end); sw.Do(&m_crtc_state.horizontal_display_start); sw.Do(&m_crtc_state.horizontal_display_end); sw.Do(&m_crtc_state.vertical_total); sw.Do(&m_crtc_state.vertical_visible_start); sw.Do(&m_crtc_state.vertical_visible_end); sw.Do(&m_crtc_state.vertical_display_start); sw.Do(&m_crtc_state.vertical_display_end); sw.Do(&m_crtc_state.fractional_ticks); sw.Do(&m_crtc_state.current_tick_in_scanline); sw.Do(&m_crtc_state.current_scanline); sw.DoEx(&m_crtc_state.fractional_dot_ticks, 46, 0); sw.Do(&m_crtc_state.in_hblank); sw.Do(&m_crtc_state.in_vblank); sw.Do(&m_crtc_state.interlaced_field); sw.Do(&m_crtc_state.interlaced_display_field); sw.Do(&m_crtc_state.active_line_lsb); sw.Do(&m_blitter_state); sw.Do(&m_pending_command_ticks); sw.Do(&m_command_total_words); sw.Do(&m_GPUREAD_latch); sw.Do(&m_vram_transfer.x); sw.Do(&m_vram_transfer.y); sw.Do(&m_vram_transfer.width); sw.Do(&m_vram_transfer.height); sw.Do(&m_vram_transfer.col); sw.Do(&m_vram_transfer.row); sw.Do(&m_fifo); sw.Do(&m_blit_buffer); sw.Do(&m_blit_remaining_words); sw.Do(&m_render_command.bits); sw.Do(&m_max_run_ahead); sw.Do(&m_fifo_size); if (sw.IsReading()) { m_draw_mode.texture_page_changed = true; m_draw_mode.texture_window_changed = true; m_drawing_area_changed = true; UpdateDMARequest(); } if (!host_texture) { if (!sw.DoMarker("GPU-VRAM")) return false; if (sw.IsReading()) { // Still need a temporary here. FixedHeapArray temp; sw.DoBytes(temp.data(), VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16)); UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, temp.data(), false, false); } else { ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); sw.DoBytes(g_vram, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16)); } } if (sw.IsReading()) { UpdateCRTCConfig(); if (update_display) UpdateDisplay(); UpdateCRTCTickEvent(); UpdateCommandTickEvent(); } return !sw.HasError(); } void GPU::RestoreDeviceContext() { } void GPU::UpdateDMARequest() { switch (m_blitter_state) { case BlitterState::Idle: m_GPUSTAT.ready_to_send_vram = false; m_GPUSTAT.ready_to_recieve_dma = (m_fifo.IsEmpty() || m_fifo.GetSize() < m_command_total_words); break; case BlitterState::WritingVRAM: m_GPUSTAT.ready_to_send_vram = false; m_GPUSTAT.ready_to_recieve_dma = (m_fifo.GetSize() < m_fifo_size); break; case BlitterState::ReadingVRAM: m_GPUSTAT.ready_to_send_vram = true; m_GPUSTAT.ready_to_recieve_dma = m_fifo.IsEmpty(); break; case BlitterState::DrawingPolyLine: m_GPUSTAT.ready_to_send_vram = false; m_GPUSTAT.ready_to_recieve_dma = (m_fifo.GetSize() < m_fifo_size); break; default: UnreachableCode(); break; } bool dma_request; switch (m_GPUSTAT.dma_direction) { case DMADirection::Off: dma_request = false; break; case DMADirection::FIFO: dma_request = m_GPUSTAT.ready_to_recieve_dma; break; case DMADirection::CPUtoGP0: dma_request = m_GPUSTAT.ready_to_recieve_dma; break; case DMADirection::GPUREADtoCPU: dma_request = m_GPUSTAT.ready_to_send_vram; break; default: dma_request = false; break; } m_GPUSTAT.dma_data_request = dma_request; DMA::SetRequest(DMA::Channel::GPU, dma_request); } void GPU::UpdateGPUIdle() { switch (m_blitter_state) { case BlitterState::Idle: m_GPUSTAT.gpu_idle = (m_pending_command_ticks <= 0 && m_fifo.IsEmpty()); break; case BlitterState::WritingVRAM: m_GPUSTAT.gpu_idle = false; break; case BlitterState::ReadingVRAM: m_GPUSTAT.gpu_idle = false; break; case BlitterState::DrawingPolyLine: m_GPUSTAT.gpu_idle = false; break; default: UnreachableCode(); break; } } u32 GPU::ReadRegister(u32 offset) { switch (offset) { case 0x00: return ReadGPUREAD(); case 0x04: { // code can be dependent on the odd/even bit, so update the GPU state when reading. // we can mitigate this slightly by only updating when the raster is actually hitting a new line if (IsCRTCScanlinePending()) SynchronizeCRTC(); if (IsCommandCompletionPending()) m_command_tick_event->InvokeEarly(); return m_GPUSTAT.bits; } default: Log_ErrorPrintf("Unhandled register read: %02X", offset); return UINT32_C(0xFFFFFFFF); } } void GPU::WriteRegister(u32 offset, u32 value) { switch (offset) { case 0x00: m_fifo.Push(value); ExecuteCommands(); return; case 0x04: WriteGP1(value); return; default: Log_ErrorPrintf("Unhandled register write: %02X <- %08X", offset, value); return; } } void GPU::DMARead(u32* words, u32 word_count) { if (m_GPUSTAT.dma_direction != DMADirection::GPUREADtoCPU) { Log_ErrorPrintf("Invalid DMA direction from GPU DMA read"); std::fill_n(words, word_count, UINT32_C(0xFFFFFFFF)); return; } for (u32 i = 0; i < word_count; i++) words[i] = ReadGPUREAD(); } void GPU::EndDMAWrite() { ExecuteCommands(); } /** * NTSC GPU clock 53.693175 MHz * PAL GPU clock 53.203425 MHz * courtesy of @ggrtk * * NTSC - sysclk * 715909 / 451584 * PAL - sysclk * 709379 / 451584 */ TickCount GPU::GetCRTCFrequency() const { return m_console_is_pal ? 53203425 : 53693175; } TickCount GPU::CRTCTicksToSystemTicks(TickCount gpu_ticks, TickCount fractional_ticks) const { // convert to master clock, rounding up as we want to overshoot not undershoot if (!m_console_is_pal) return static_cast((u64(gpu_ticks) * u64(451584) + fractional_ticks + u64(715908)) / u64(715909)); else return static_cast((u64(gpu_ticks) * u64(451584) + fractional_ticks + u64(709378)) / u64(709379)); } TickCount GPU::SystemTicksToCRTCTicks(TickCount sysclk_ticks, TickCount* fractional_ticks) const { u64 mul = u64(sysclk_ticks); mul *= !m_console_is_pal ? u64(715909) : u64(709379); mul += u64(*fractional_ticks); const TickCount ticks = static_cast(mul / u64(451584)); *fractional_ticks = static_cast(mul % u64(451584)); return ticks; } void GPU::AddCommandTicks(TickCount ticks) { m_pending_command_ticks += ticks; } void GPU::SynchronizeCRTC() { m_crtc_tick_event->InvokeEarly(); } float GPU::ComputeHorizontalFrequency() const { const CRTCState& cs = m_crtc_state; TickCount fractional_ticks = 0; return static_cast( static_cast(SystemTicksToCRTCTicks(System::GetTicksPerSecond(), &fractional_ticks)) / static_cast(cs.horizontal_total)); } float GPU::ComputeVerticalFrequency() const { const CRTCState& cs = m_crtc_state; const TickCount ticks_per_frame = cs.horizontal_total * cs.vertical_total; TickCount fractional_ticks = 0; return static_cast( static_cast(SystemTicksToCRTCTicks(System::GetTicksPerSecond(), &fractional_ticks)) / static_cast(ticks_per_frame)); } float GPU::ComputeDisplayAspectRatio() const { if (g_settings.display_force_4_3_for_24bit && m_GPUSTAT.display_area_color_depth_24) { return 4.0f / 3.0f; } else if (g_settings.display_aspect_ratio == DisplayAspectRatio::Auto) { const CRTCState& cs = m_crtc_state; float relative_width = static_cast(cs.horizontal_visible_end - cs.horizontal_visible_start); float relative_height = static_cast(cs.vertical_visible_end - cs.vertical_visible_start); if (relative_width <= 0 || relative_height <= 0) return 4.0f / 3.0f; if (m_GPUSTAT.pal_mode) { relative_width /= static_cast(PAL_HORIZONTAL_ACTIVE_END - PAL_HORIZONTAL_ACTIVE_START); relative_height /= static_cast(PAL_VERTICAL_ACTIVE_END - PAL_VERTICAL_ACTIVE_START); } else { relative_width /= static_cast(NTSC_HORIZONTAL_ACTIVE_END - NTSC_HORIZONTAL_ACTIVE_START); relative_height /= static_cast(NTSC_VERTICAL_ACTIVE_END - NTSC_VERTICAL_ACTIVE_START); } return (relative_width / relative_height) * (4.0f / 3.0f); } else if (g_settings.display_aspect_ratio == DisplayAspectRatio::PAR1_1) { if (m_crtc_state.display_width == 0 || m_crtc_state.display_height == 0) return 4.0f / 3.0f; return static_cast(m_crtc_state.display_width) / static_cast(m_crtc_state.display_height); } else { return g_settings.GetDisplayAspectRatioValue(); } } void GPU::UpdateCRTCConfig() { static constexpr std::array dot_clock_dividers = {{10, 8, 5, 4, 7, 7, 7, 7}}; CRTCState& cs = m_crtc_state; if (m_GPUSTAT.pal_mode) { cs.vertical_total = PAL_TOTAL_LINES; cs.current_scanline %= PAL_TOTAL_LINES; cs.horizontal_total = PAL_TICKS_PER_LINE; cs.horizontal_sync_start = PAL_HSYNC_TICKS; cs.current_tick_in_scanline %= System::ScaleTicksToOverclock(PAL_TICKS_PER_LINE); } else { cs.vertical_total = NTSC_TOTAL_LINES; cs.current_scanline %= NTSC_TOTAL_LINES; cs.horizontal_total = NTSC_TICKS_PER_LINE; cs.horizontal_sync_start = NTSC_HSYNC_TICKS; cs.current_tick_in_scanline %= System::ScaleTicksToOverclock(NTSC_TICKS_PER_LINE); } cs.in_hblank = (cs.current_tick_in_scanline >= cs.horizontal_sync_start); const u8 horizontal_resolution_index = m_GPUSTAT.horizontal_resolution_1 | (m_GPUSTAT.horizontal_resolution_2 << 2); cs.dot_clock_divider = dot_clock_dividers[horizontal_resolution_index]; cs.horizontal_display_start = (std::min(cs.regs.X1, cs.horizontal_total) / cs.dot_clock_divider) * cs.dot_clock_divider; cs.horizontal_display_end = (std::min(cs.regs.X2, cs.horizontal_total) / cs.dot_clock_divider) * cs.dot_clock_divider; cs.vertical_display_start = std::min(cs.regs.Y1, cs.vertical_total); cs.vertical_display_end = std::min(cs.regs.Y2, cs.vertical_total); if (m_GPUSTAT.pal_mode && m_force_ntsc_timings) { // scale to NTSC parameters cs.horizontal_display_start = static_cast((static_cast(cs.horizontal_display_start) * NTSC_TICKS_PER_LINE) / PAL_TICKS_PER_LINE); cs.horizontal_display_end = static_cast( ((static_cast(cs.horizontal_display_end) * NTSC_TICKS_PER_LINE) + (PAL_TICKS_PER_LINE - 1)) / PAL_TICKS_PER_LINE); cs.vertical_display_start = static_cast((static_cast(cs.vertical_display_start) * NTSC_TOTAL_LINES) / PAL_TOTAL_LINES); cs.vertical_display_end = static_cast( ((static_cast(cs.vertical_display_end) * NTSC_TOTAL_LINES) + (PAL_TOTAL_LINES - 1)) / PAL_TOTAL_LINES); cs.vertical_total = NTSC_TOTAL_LINES; cs.current_scanline %= NTSC_TOTAL_LINES; cs.horizontal_total = NTSC_TICKS_PER_LINE; cs.current_tick_in_scanline %= NTSC_TICKS_PER_LINE; } cs.horizontal_display_start = static_cast(System::ScaleTicksToOverclock(static_cast(cs.horizontal_display_start))); cs.horizontal_display_end = static_cast(System::ScaleTicksToOverclock(static_cast(cs.horizontal_display_end))); cs.horizontal_total = static_cast(System::ScaleTicksToOverclock(static_cast(cs.horizontal_total))); System::SetThrottleFrequency(ComputeVerticalFrequency()); UpdateCRTCDisplayParameters(); UpdateCRTCTickEvent(); } void GPU::UpdateCRTCDisplayParameters() { CRTCState& cs = m_crtc_state; const DisplayCropMode crop_mode = g_settings.display_crop_mode; const u16 horizontal_total = m_GPUSTAT.pal_mode ? PAL_TICKS_PER_LINE : NTSC_TICKS_PER_LINE; const u16 vertical_total = m_GPUSTAT.pal_mode ? PAL_TOTAL_LINES : NTSC_TOTAL_LINES; const u16 horizontal_display_start = (std::min(cs.regs.X1, horizontal_total) / cs.dot_clock_divider) * cs.dot_clock_divider; const u16 horizontal_display_end = (std::min(cs.regs.X2, horizontal_total) / cs.dot_clock_divider) * cs.dot_clock_divider; const u16 vertical_display_start = std::min(cs.regs.Y1, vertical_total); const u16 vertical_display_end = std::min(cs.regs.Y2, vertical_total); if (m_GPUSTAT.pal_mode) { // TODO: Verify PAL numbers. switch (crop_mode) { case DisplayCropMode::None: cs.horizontal_visible_start = PAL_HORIZONTAL_ACTIVE_START; cs.horizontal_visible_end = PAL_HORIZONTAL_ACTIVE_END; cs.vertical_visible_start = PAL_VERTICAL_ACTIVE_START; cs.vertical_visible_end = PAL_VERTICAL_ACTIVE_END; break; case DisplayCropMode::Overscan: cs.horizontal_visible_start = static_cast(std::max(0, 628 + g_settings.display_active_start_offset)); cs.horizontal_visible_end = static_cast(std::max(cs.horizontal_visible_start, 3188 + g_settings.display_active_end_offset)); cs.vertical_visible_start = static_cast(std::max(0, 30 + g_settings.display_line_start_offset)); cs.vertical_visible_end = static_cast(std::max(cs.vertical_visible_start, 298 + g_settings.display_line_end_offset)); break; case DisplayCropMode::Borders: default: cs.horizontal_visible_start = horizontal_display_start; cs.horizontal_visible_end = horizontal_display_end; cs.vertical_visible_start = vertical_display_start; cs.vertical_visible_end = vertical_display_end; break; } cs.horizontal_visible_start = std::clamp(cs.horizontal_visible_start, PAL_HORIZONTAL_ACTIVE_START, PAL_HORIZONTAL_ACTIVE_END); cs.horizontal_visible_end = std::clamp(cs.horizontal_visible_end, cs.horizontal_visible_start, PAL_HORIZONTAL_ACTIVE_END); cs.vertical_visible_start = std::clamp(cs.vertical_visible_start, PAL_VERTICAL_ACTIVE_START, PAL_VERTICAL_ACTIVE_END); cs.vertical_visible_end = std::clamp(cs.vertical_visible_end, cs.vertical_visible_start, PAL_VERTICAL_ACTIVE_END); } else { switch (crop_mode) { case DisplayCropMode::None: cs.horizontal_visible_start = NTSC_HORIZONTAL_ACTIVE_START; cs.horizontal_visible_end = NTSC_HORIZONTAL_ACTIVE_END; cs.vertical_visible_start = NTSC_VERTICAL_ACTIVE_START; cs.vertical_visible_end = NTSC_VERTICAL_ACTIVE_END; break; case DisplayCropMode::Overscan: cs.horizontal_visible_start = static_cast(std::max(0, 608 + g_settings.display_active_start_offset)); cs.horizontal_visible_end = static_cast(std::max(cs.horizontal_visible_start, 3168 + g_settings.display_active_end_offset)); cs.vertical_visible_start = static_cast(std::max(0, 24 + g_settings.display_line_start_offset)); cs.vertical_visible_end = static_cast(std::max(cs.vertical_visible_start, 248 + g_settings.display_line_end_offset)); break; case DisplayCropMode::Borders: default: cs.horizontal_visible_start = horizontal_display_start; cs.horizontal_visible_end = horizontal_display_end; cs.vertical_visible_start = vertical_display_start; cs.vertical_visible_end = vertical_display_end; break; } cs.horizontal_visible_start = std::clamp(cs.horizontal_visible_start, NTSC_HORIZONTAL_ACTIVE_START, NTSC_HORIZONTAL_ACTIVE_END); cs.horizontal_visible_end = std::clamp(cs.horizontal_visible_end, cs.horizontal_visible_start, NTSC_HORIZONTAL_ACTIVE_END); cs.vertical_visible_start = std::clamp(cs.vertical_visible_start, NTSC_VERTICAL_ACTIVE_START, NTSC_VERTICAL_ACTIVE_END); cs.vertical_visible_end = std::clamp(cs.vertical_visible_end, cs.vertical_visible_start, NTSC_VERTICAL_ACTIVE_END); } // If force-progressive is enabled, we only double the height in 480i mode. This way non-interleaved 480i framebuffers // won't be broken when displayed. const u8 y_shift = BoolToUInt8(m_GPUSTAT.vertical_interlace && m_GPUSTAT.vertical_resolution); const u8 height_shift = m_force_progressive_scan ? y_shift : BoolToUInt8(m_GPUSTAT.vertical_interlace); // Determine screen size. cs.display_width = (cs.horizontal_visible_end - cs.horizontal_visible_start) / cs.dot_clock_divider; cs.display_height = (cs.vertical_visible_end - cs.vertical_visible_start) << height_shift; // Determine number of pixels outputted from VRAM (in general, round to 4-pixel multiple). // TODO: Verify behavior if values are outside of the active video portion of scanline. const u16 horizontal_display_ticks = (horizontal_display_end < horizontal_display_start) ? 0 : (horizontal_display_end - horizontal_display_start); const u16 horizontal_display_pixels = horizontal_display_ticks / cs.dot_clock_divider; if (horizontal_display_pixels == 1u) cs.display_vram_width = 4u; else cs.display_vram_width = (horizontal_display_pixels + 2u) & ~3u; // Determine if we need to adjust the VRAM rectangle (because the display is starting outside the visible area) or add // padding. u16 horizontal_skip_pixels; if (horizontal_display_start >= cs.horizontal_visible_start) { cs.display_origin_left = (horizontal_display_start - cs.horizontal_visible_start) / cs.dot_clock_divider; cs.display_vram_left = cs.regs.X; horizontal_skip_pixels = 0; } else { horizontal_skip_pixels = (cs.horizontal_visible_start - horizontal_display_start) / cs.dot_clock_divider; cs.display_origin_left = 0; cs.display_vram_left = (cs.regs.X + horizontal_skip_pixels) % VRAM_WIDTH; } // apply the crop from the start (usually overscan) cs.display_vram_width -= std::min(cs.display_vram_width, horizontal_skip_pixels); // Apply crop from the end by shrinking VRAM rectangle width if display would end outside the visible area. cs.display_vram_width = std::min(cs.display_vram_width, cs.display_width - cs.display_origin_left); if (vertical_display_start >= cs.vertical_visible_start) { cs.display_origin_top = (vertical_display_start - cs.vertical_visible_start) << y_shift; cs.display_vram_top = cs.regs.Y; } else { cs.display_origin_top = 0; cs.display_vram_top = (cs.regs.Y + ((cs.vertical_visible_start - vertical_display_start) << y_shift)) % VRAM_HEIGHT; } if (vertical_display_end <= cs.vertical_visible_end) { cs.display_vram_height = (vertical_display_end - std::min(vertical_display_end, std::max(vertical_display_start, cs.vertical_visible_start))) << height_shift; } else { cs.display_vram_height = (cs.vertical_visible_end - std::min(cs.vertical_visible_end, std::max(vertical_display_start, cs.vertical_visible_start))) << height_shift; } } TickCount GPU::GetPendingCRTCTicks() const { const TickCount pending_sysclk_ticks = m_crtc_tick_event->GetTicksSinceLastExecution(); TickCount fractional_ticks = m_crtc_state.fractional_ticks; return SystemTicksToCRTCTicks(pending_sysclk_ticks, &fractional_ticks); } TickCount GPU::GetPendingCommandTicks() const { if (!m_command_tick_event->IsActive()) return 0; return SystemTicksToGPUTicks(m_command_tick_event->GetTicksSinceLastExecution()); } void GPU::UpdateCRTCTickEvent() { // figure out how many GPU ticks until the next vblank or event TickCount lines_until_event; if (Timers::IsSyncEnabled(HBLANK_TIMER_INDEX)) { // when the timer sync is enabled we need to sync at vblank start and end lines_until_event = (m_crtc_state.current_scanline >= m_crtc_state.vertical_display_end) ? (m_crtc_state.vertical_total - m_crtc_state.current_scanline + m_crtc_state.vertical_display_start) : (m_crtc_state.vertical_display_end - m_crtc_state.current_scanline); } else { lines_until_event = (m_crtc_state.current_scanline >= m_crtc_state.vertical_display_end ? (m_crtc_state.vertical_total - m_crtc_state.current_scanline + m_crtc_state.vertical_display_end) : (m_crtc_state.vertical_display_end - m_crtc_state.current_scanline)); } if (Timers::IsExternalIRQEnabled(HBLANK_TIMER_INDEX)) lines_until_event = std::min(lines_until_event, Timers::GetTicksUntilIRQ(HBLANK_TIMER_INDEX)); TickCount ticks_until_event = lines_until_event * m_crtc_state.horizontal_total - m_crtc_state.current_tick_in_scanline; if (Timers::IsExternalIRQEnabled(DOT_TIMER_INDEX)) { const TickCount dots_until_irq = Timers::GetTicksUntilIRQ(DOT_TIMER_INDEX); const TickCount ticks_until_irq = (dots_until_irq * m_crtc_state.dot_clock_divider) - m_crtc_state.fractional_dot_ticks; ticks_until_event = std::min(ticks_until_event, std::max(ticks_until_irq, 0)); } #if 0 const TickCount ticks_until_hblank = (m_crtc_state.current_tick_in_scanline >= m_crtc_state.horizontal_display_end) ? (m_crtc_state.horizontal_total - m_crtc_state.current_tick_in_scanline + m_crtc_state.horizontal_display_end) : (m_crtc_state.horizontal_display_end - m_crtc_state.current_tick_in_scanline); #endif m_crtc_tick_event->Schedule(CRTCTicksToSystemTicks(ticks_until_event, m_crtc_state.fractional_ticks)); } bool GPU::IsCRTCScanlinePending() const { const TickCount ticks = (GetPendingCRTCTicks() + m_crtc_state.current_tick_in_scanline); return (ticks >= (m_crtc_state.in_hblank ? m_crtc_state.horizontal_total : m_crtc_state.horizontal_sync_start)); } bool GPU::IsCommandCompletionPending() const { return (m_pending_command_ticks > 0 && GetPendingCommandTicks() >= m_pending_command_ticks); } void GPU::CRTCTickEvent(TickCount ticks) { // convert cpu/master clock to GPU ticks, accounting for partial cycles because of the non-integer divider { const TickCount gpu_ticks = SystemTicksToCRTCTicks(ticks, &m_crtc_state.fractional_ticks); m_crtc_state.current_tick_in_scanline += gpu_ticks; if (Timers::IsUsingExternalClock(DOT_TIMER_INDEX)) { m_crtc_state.fractional_dot_ticks += gpu_ticks; const TickCount dots = m_crtc_state.fractional_dot_ticks / m_crtc_state.dot_clock_divider; m_crtc_state.fractional_dot_ticks = m_crtc_state.fractional_dot_ticks % m_crtc_state.dot_clock_divider; if (dots > 0) Timers::AddTicks(DOT_TIMER_INDEX, dots); } } if (m_crtc_state.current_tick_in_scanline < m_crtc_state.horizontal_total) { // short path when we execute <1 line.. this shouldn't occur often. const bool old_hblank = m_crtc_state.in_hblank; const bool new_hblank = (m_crtc_state.current_tick_in_scanline >= m_crtc_state.horizontal_sync_start); m_crtc_state.in_hblank = new_hblank; if (!old_hblank && new_hblank && Timers::IsUsingExternalClock(HBLANK_TIMER_INDEX)) Timers::AddTicks(HBLANK_TIMER_INDEX, 1); UpdateCRTCTickEvent(); return; } u32 lines_to_draw = m_crtc_state.current_tick_in_scanline / m_crtc_state.horizontal_total; m_crtc_state.current_tick_in_scanline %= m_crtc_state.horizontal_total; #if 0 Log_WarningPrintf("Old line: %u, new line: %u, drawing %u", m_crtc_state.current_scanline, m_crtc_state.current_scanline + lines_to_draw, lines_to_draw); #endif const bool old_hblank = m_crtc_state.in_hblank; const bool new_hblank = (m_crtc_state.current_tick_in_scanline >= m_crtc_state.horizontal_sync_start); m_crtc_state.in_hblank = new_hblank; if (Timers::IsUsingExternalClock(HBLANK_TIMER_INDEX)) { const u32 hblank_timer_ticks = BoolToUInt32(!old_hblank) + BoolToUInt32(new_hblank) + (lines_to_draw - 1); Timers::AddTicks(HBLANK_TIMER_INDEX, static_cast(hblank_timer_ticks)); } while (lines_to_draw > 0) { const u32 lines_to_draw_this_loop = std::min(lines_to_draw, m_crtc_state.vertical_total - m_crtc_state.current_scanline); const u32 prev_scanline = m_crtc_state.current_scanline; m_crtc_state.current_scanline += lines_to_draw_this_loop; DebugAssert(m_crtc_state.current_scanline <= m_crtc_state.vertical_total); lines_to_draw -= lines_to_draw_this_loop; // clear the vblank flag if the beam would pass through the display area if (prev_scanline < m_crtc_state.vertical_display_start && m_crtc_state.current_scanline >= m_crtc_state.vertical_display_end) { Timers::SetGate(HBLANK_TIMER_INDEX, false); InterruptController::SetLineState(InterruptController::IRQ::VBLANK, false); m_crtc_state.in_vblank = false; } const bool new_vblank = m_crtc_state.current_scanline < m_crtc_state.vertical_display_start || m_crtc_state.current_scanline >= m_crtc_state.vertical_display_end; if (m_crtc_state.in_vblank != new_vblank) { if (new_vblank) { Log_DebugPrintf("Now in v-blank"); // flush any pending draws and "scan out" the image // TODO: move present in here I guess FlushRender(); UpdateDisplay(); TimingEvents::SetFrameDone(); // switch fields early. this is needed so we draw to the correct one. if (m_GPUSTAT.InInterleaved480iMode()) m_crtc_state.interlaced_display_field = m_crtc_state.interlaced_field ^ 1u; else m_crtc_state.interlaced_display_field = 0; } Timers::SetGate(HBLANK_TIMER_INDEX, new_vblank); InterruptController::SetLineState(InterruptController::IRQ::VBLANK, new_vblank); m_crtc_state.in_vblank = new_vblank; } // past the end of vblank? if (m_crtc_state.current_scanline == m_crtc_state.vertical_total) { // start the new frame m_crtc_state.current_scanline = 0; if (m_GPUSTAT.vertical_interlace) { m_crtc_state.interlaced_field ^= 1u; m_GPUSTAT.interlaced_field = !m_crtc_state.interlaced_field; } else { m_crtc_state.interlaced_field = 0; m_GPUSTAT.interlaced_field = 0u; // new GPU = 1, old GPU = 0 } } } // alternating even line bit in 240-line mode if (m_GPUSTAT.InInterleaved480iMode()) { m_crtc_state.active_line_lsb = Truncate8((m_crtc_state.regs.Y + BoolToUInt32(m_crtc_state.interlaced_display_field)) & u32(1)); m_GPUSTAT.display_line_lsb = ConvertToBoolUnchecked( (m_crtc_state.regs.Y + (BoolToUInt8(!m_crtc_state.in_vblank) & m_crtc_state.interlaced_display_field)) & u32(1)); } else { m_crtc_state.active_line_lsb = 0; m_GPUSTAT.display_line_lsb = ConvertToBoolUnchecked((m_crtc_state.regs.Y + m_crtc_state.current_scanline) & u32(1)); } UpdateCRTCTickEvent(); } void GPU::CommandTickEvent(TickCount ticks) { m_pending_command_ticks -= SystemTicksToGPUTicks(ticks); m_executing_commands = true; ExecuteCommands(); UpdateCommandTickEvent(); m_executing_commands = false; } void GPU::UpdateCommandTickEvent() { if (m_pending_command_ticks <= 0) { m_pending_command_ticks = 0; m_command_tick_event->Deactivate(); } else { m_command_tick_event->SetIntervalAndSchedule(GPUTicksToSystemTicks(m_pending_command_ticks)); } } void GPU::ConvertScreenCoordinatesToDisplayCoordinates(float window_x, float window_y, float* display_x, float* display_y) const { const Common::Rectangle draw_rc = CalculateDrawRect(g_gpu_device->GetWindowWidth(), g_gpu_device->GetWindowHeight()); // convert coordinates to active display region, then to full display region const float scaled_display_x = (window_x - static_cast(draw_rc.left)) / static_cast(draw_rc.GetWidth()); const float scaled_display_y = (window_y - static_cast(draw_rc.top)) / static_cast(draw_rc.GetHeight()); // scale back to internal resolution *display_x = scaled_display_x * static_cast(m_crtc_state.display_width); *display_y = scaled_display_y * static_cast(m_crtc_state.display_height); Log_DevPrintf("win %.0f,%.0f -> local %.0f,%.0f, disp %.2f,%.2f (size %u,%u frac %f,%f)", window_x, window_y, window_x - draw_rc.left, window_y - draw_rc.top, *display_x, *display_y, m_crtc_state.display_width, m_crtc_state.display_height, *display_x / static_cast(m_crtc_state.display_width), *display_y / static_cast(m_crtc_state.display_height)); } bool GPU::ConvertDisplayCoordinatesToBeamTicksAndLines(float display_x, float display_y, float x_scale, u32* out_tick, u32* out_line) const { if (x_scale != 1.0f) { const float dw = static_cast(m_crtc_state.display_width); float scaled_x = ((display_x / dw) * 2.0f) - 1.0f; // 0..1 -> -1..1 scaled_x *= x_scale; display_x = (((scaled_x + 1.0f) * 0.5f) * dw); // -1..1 -> 0..1 } if (display_x < 0 || static_cast(display_x) >= m_crtc_state.display_width || display_y < 0 || static_cast(display_y) >= m_crtc_state.display_height) { return false; } *out_line = (static_cast(std::round(display_y)) >> BoolToUInt8(m_GPUSTAT.vertical_interlace)) + m_crtc_state.vertical_visible_start; *out_tick = static_cast(std::round(display_x * static_cast(m_crtc_state.dot_clock_divider))) + m_crtc_state.horizontal_visible_start; return true; } u32 GPU::ReadGPUREAD() { if (m_blitter_state != BlitterState::ReadingVRAM) return m_GPUREAD_latch; // Read two pixels out of VRAM and combine them. Zero fill odd pixel counts. u32 value = 0; for (u32 i = 0; i < 2; i++) { // Read with correct wrap-around behavior. const u16 read_x = (m_vram_transfer.x + m_vram_transfer.col) % VRAM_WIDTH; const u16 read_y = (m_vram_transfer.y + m_vram_transfer.row) % VRAM_HEIGHT; value |= ZeroExtend32(g_vram[read_y * VRAM_WIDTH + read_x]) << (i * 16); if (++m_vram_transfer.col == m_vram_transfer.width) { m_vram_transfer.col = 0; if (++m_vram_transfer.row == m_vram_transfer.height) { Log_DebugPrintf("End of VRAM->CPU transfer"); m_vram_transfer = {}; m_blitter_state = BlitterState::Idle; // end of transfer, catch up on any commands which were written (unlikely) ExecuteCommands(); break; } } } m_GPUREAD_latch = value; return value; } void GPU::WriteGP1(u32 value) { const u32 command = (value >> 24) & 0x3Fu; const u32 param = value & UINT32_C(0x00FFFFFF); switch (command) { case 0x00: // Reset GPU { Log_DebugPrintf("GP1 reset GPU"); m_command_tick_event->InvokeEarly(); SynchronizeCRTC(); SoftReset(); } break; case 0x01: // Clear FIFO { Log_DebugPrintf("GP1 clear FIFO"); m_command_tick_event->InvokeEarly(); SynchronizeCRTC(); // flush partial writes if (m_blitter_state == BlitterState::WritingVRAM) FinishVRAMWrite(); m_blitter_state = BlitterState::Idle; m_command_total_words = 0; m_vram_transfer = {}; m_fifo.Clear(); m_blit_buffer.clear(); m_blit_remaining_words = 0; m_pending_command_ticks = 0; m_command_tick_event->Deactivate(); UpdateDMARequest(); UpdateGPUIdle(); } break; case 0x02: // Acknowledge Interrupt { Log_DebugPrintf("Acknowledge interrupt"); m_GPUSTAT.interrupt_request = false; } break; case 0x03: // Display on/off { const bool disable = ConvertToBoolUnchecked(value & 0x01); Log_DebugPrintf("Display %s", disable ? "disabled" : "enabled"); SynchronizeCRTC(); if (!m_GPUSTAT.display_disable && disable && m_GPUSTAT.vertical_interlace && !m_force_progressive_scan) ClearDisplay(); m_GPUSTAT.display_disable = disable; } break; case 0x04: // DMA Direction { Log_DebugPrintf("DMA direction <- 0x%02X", static_cast(param)); if (m_GPUSTAT.dma_direction != static_cast(param)) { m_GPUSTAT.dma_direction = static_cast(param); UpdateDMARequest(); } } break; case 0x05: // Set display start address { const u32 new_value = param & CRTCState::Regs::DISPLAY_ADDRESS_START_MASK; Log_DebugPrintf("Display address start <- 0x%08X", new_value); System::IncrementInternalFrameNumber(); if (m_crtc_state.regs.display_address_start != new_value) { SynchronizeCRTC(); m_crtc_state.regs.display_address_start = new_value; UpdateCRTCDisplayParameters(); } } break; case 0x06: // Set horizontal display range { const u32 new_value = param & CRTCState::Regs::HORIZONTAL_DISPLAY_RANGE_MASK; Log_DebugPrintf("Horizontal display range <- 0x%08X", new_value); if (m_crtc_state.regs.horizontal_display_range != new_value) { SynchronizeCRTC(); m_crtc_state.regs.horizontal_display_range = new_value; UpdateCRTCConfig(); } } break; case 0x07: // Set vertical display range { const u32 new_value = param & CRTCState::Regs::VERTICAL_DISPLAY_RANGE_MASK; Log_DebugPrintf("Vertical display range <- 0x%08X", new_value); if (m_crtc_state.regs.vertical_display_range != new_value) { SynchronizeCRTC(); m_crtc_state.regs.vertical_display_range = new_value; UpdateCRTCConfig(); } } break; case 0x08: // Set display mode { union GP1_08h { u32 bits; BitField horizontal_resolution_1; BitField vertical_resolution; BitField pal_mode; BitField display_area_color_depth; BitField vertical_interlace; BitField horizontal_resolution_2; BitField reverse_flag; }; const GP1_08h dm{param}; GPUSTAT new_GPUSTAT{m_GPUSTAT.bits}; new_GPUSTAT.horizontal_resolution_1 = dm.horizontal_resolution_1; new_GPUSTAT.vertical_resolution = dm.vertical_resolution; new_GPUSTAT.pal_mode = dm.pal_mode; new_GPUSTAT.display_area_color_depth_24 = dm.display_area_color_depth; new_GPUSTAT.vertical_interlace = dm.vertical_interlace; new_GPUSTAT.horizontal_resolution_2 = dm.horizontal_resolution_2; new_GPUSTAT.reverse_flag = dm.reverse_flag; Log_DebugPrintf("Set display mode <- 0x%08X", dm.bits); if (!m_GPUSTAT.vertical_interlace && dm.vertical_interlace && !m_force_progressive_scan) { // bit of a hack, technically we should pull the previous frame in, but this may not exist anymore ClearDisplay(); } if (m_GPUSTAT.bits != new_GPUSTAT.bits) { // Have to be careful when setting this because Synchronize() can modify GPUSTAT. static constexpr u32 SET_MASK = UINT32_C(0b00000000011111110100000000000000); m_command_tick_event->InvokeEarly(); SynchronizeCRTC(); m_GPUSTAT.bits = (m_GPUSTAT.bits & ~SET_MASK) | (new_GPUSTAT.bits & SET_MASK); UpdateCRTCConfig(); } } break; case 0x09: // Allow texture disable { m_set_texture_disable_mask = ConvertToBoolUnchecked(param & 0x01); Log_DebugPrintf("Set texture disable mask <- %s", m_set_texture_disable_mask ? "allowed" : "ignored"); } break; case 0x10: case 0x11: case 0x12: case 0x13: case 0x14: case 0x15: case 0x16: case 0x17: case 0x18: case 0x19: case 0x1A: case 0x1B: case 0x1C: case 0x1D: case 0x1E: case 0x1F: { HandleGetGPUInfoCommand(value); } break; default: Log_ErrorPrintf("Unimplemented GP1 command 0x%02X", command); break; } } void GPU::HandleGetGPUInfoCommand(u32 value) { const u8 subcommand = Truncate8(value & 0x07); switch (subcommand) { case 0x00: case 0x01: case 0x06: case 0x07: // leave GPUREAD intact break; case 0x02: // Get Texture Window { Log_DebugPrintf("Get texture window"); m_GPUREAD_latch = m_draw_mode.texture_window_value; } break; case 0x03: // Get Draw Area Top Left { Log_DebugPrintf("Get drawing area top left"); m_GPUREAD_latch = ((m_drawing_area.left & UINT32_C(0b1111111111)) | ((m_drawing_area.top & UINT32_C(0b1111111111)) << 10)); } break; case 0x04: // Get Draw Area Bottom Right { Log_DebugPrintf("Get drawing area bottom right"); m_GPUREAD_latch = ((m_drawing_area.right & UINT32_C(0b1111111111)) | ((m_drawing_area.bottom & UINT32_C(0b1111111111)) << 10)); } break; case 0x05: // Get Drawing Offset { Log_DebugPrintf("Get drawing offset"); m_GPUREAD_latch = ((m_drawing_offset.x & INT32_C(0b11111111111)) | ((m_drawing_offset.y & INT32_C(0b11111111111)) << 11)); } break; default: Log_WarningPrintf("Unhandled GetGPUInfo(0x%02X)", ZeroExtend32(subcommand)); break; } } void GPU::ClearDisplay() { ClearDisplayTexture(); // Just recycle the textures, it'll get re-fetched. DestroyDeinterlaceTextures(); } void GPU::UpdateDisplay() { } void GPU::ReadVRAM(u32 x, u32 y, u32 width, u32 height) { } void GPU::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) { const u16 color16 = VRAMRGBA8888ToRGBA5551(color); if ((x + width) <= VRAM_WIDTH && !IsInterlacedRenderingEnabled()) { for (u32 yoffs = 0; yoffs < height; yoffs++) { const u32 row = (y + yoffs) % VRAM_HEIGHT; std::fill_n(&g_vram[row * VRAM_WIDTH + x], width, color16); } } else if (IsInterlacedRenderingEnabled()) { // Hardware tests show that fills seem to break on the first two lines when the offset matches the displayed field. if (IsCRTCScanlinePending()) SynchronizeCRTC(); const u32 active_field = GetActiveLineLSB(); for (u32 yoffs = 0; yoffs < height; yoffs++) { const u32 row = (y + yoffs) % VRAM_HEIGHT; if ((row & u32(1)) == active_field) continue; u16* row_ptr = &g_vram[row * VRAM_WIDTH]; for (u32 xoffs = 0; xoffs < width; xoffs++) { const u32 col = (x + xoffs) % VRAM_WIDTH; row_ptr[col] = color16; } } } else { for (u32 yoffs = 0; yoffs < height; yoffs++) { const u32 row = (y + yoffs) % VRAM_HEIGHT; u16* row_ptr = &g_vram[row * VRAM_WIDTH]; for (u32 xoffs = 0; xoffs < width; xoffs++) { const u32 col = (x + xoffs) % VRAM_WIDTH; row_ptr[col] = color16; } } } } void GPU::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) { // Fast path when the copy is not oversized. if ((x + width) <= VRAM_WIDTH && (y + height) <= VRAM_HEIGHT && !set_mask && !check_mask) { const u16* src_ptr = static_cast(data); u16* dst_ptr = &g_vram[y * VRAM_WIDTH + x]; for (u32 yoffs = 0; yoffs < height; yoffs++) { std::copy_n(src_ptr, width, dst_ptr); src_ptr += width; dst_ptr += VRAM_WIDTH; } } else { // Slow path when we need to handle wrap-around. // During transfer/render operations, if ((dst_pixel & mask_and) == 0) { pixel = src_pixel | mask_or } const u16* src_ptr = static_cast(data); const u16 mask_and = check_mask ? 0x8000 : 0; const u16 mask_or = set_mask ? 0x8000 : 0; for (u32 row = 0; row < height;) { u16* dst_row_ptr = &g_vram[((y + row++) % VRAM_HEIGHT) * VRAM_WIDTH]; for (u32 col = 0; col < width;) { // TODO: Handle unaligned reads... u16* pixel_ptr = &dst_row_ptr[(x + col++) % VRAM_WIDTH]; if (((*pixel_ptr) & mask_and) == 0) *pixel_ptr = *(src_ptr++) | mask_or; } } } } void GPU::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) { // Break up oversized copies. This behavior has not been verified on console. if ((src_x + width) > VRAM_WIDTH || (dst_x + width) > VRAM_WIDTH) { u32 remaining_rows = height; u32 current_src_y = src_y; u32 current_dst_y = dst_y; while (remaining_rows > 0) { const u32 rows_to_copy = std::min(remaining_rows, std::min(VRAM_HEIGHT - current_src_y, VRAM_HEIGHT - current_dst_y)); u32 remaining_columns = width; u32 current_src_x = src_x; u32 current_dst_x = dst_x; while (remaining_columns > 0) { const u32 columns_to_copy = std::min(remaining_columns, std::min(VRAM_WIDTH - current_src_x, VRAM_WIDTH - current_dst_x)); CopyVRAM(current_src_x, current_src_y, current_dst_x, current_dst_y, columns_to_copy, rows_to_copy); current_src_x = (current_src_x + columns_to_copy) % VRAM_WIDTH; current_dst_x = (current_dst_x + columns_to_copy) % VRAM_WIDTH; remaining_columns -= columns_to_copy; } current_src_y = (current_src_y + rows_to_copy) % VRAM_HEIGHT; current_dst_y = (current_dst_y + rows_to_copy) % VRAM_HEIGHT; remaining_rows -= rows_to_copy; } return; } // This doesn't have a fast path, but do we really need one? It's not common. const u16 mask_and = m_GPUSTAT.GetMaskAND(); const u16 mask_or = m_GPUSTAT.GetMaskOR(); // Copy in reverse when src_x < dst_x, this is verified on console. if (src_x < dst_x || ((src_x + width - 1) % VRAM_WIDTH) < ((dst_x + width - 1) % VRAM_WIDTH)) { for (u32 row = 0; row < height; row++) { const u16* src_row_ptr = &g_vram[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; u16* dst_row_ptr = &g_vram[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; for (s32 col = static_cast(width - 1); col >= 0; col--) { const u16 src_pixel = src_row_ptr[(src_x + static_cast(col)) % VRAM_WIDTH]; u16* dst_pixel_ptr = &dst_row_ptr[(dst_x + static_cast(col)) % VRAM_WIDTH]; if ((*dst_pixel_ptr & mask_and) == 0) *dst_pixel_ptr = src_pixel | mask_or; } } } else { for (u32 row = 0; row < height; row++) { const u16* src_row_ptr = &g_vram[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; u16* dst_row_ptr = &g_vram[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; for (u32 col = 0; col < width; col++) { const u16 src_pixel = src_row_ptr[(src_x + col) % VRAM_WIDTH]; u16* dst_pixel_ptr = &dst_row_ptr[(dst_x + col) % VRAM_WIDTH]; if ((*dst_pixel_ptr & mask_and) == 0) *dst_pixel_ptr = src_pixel | mask_or; } } } } void GPU::DispatchRenderCommand() { } void GPU::FlushRender() { } void GPU::SetDrawMode(u16 value) { GPUDrawModeReg new_mode_reg{static_cast(value & GPUDrawModeReg::MASK)}; if (!m_set_texture_disable_mask) new_mode_reg.texture_disable = false; if (new_mode_reg.bits == m_draw_mode.mode_reg.bits) return; m_draw_mode.texture_page_changed |= ((new_mode_reg.bits & GPUDrawModeReg::TEXTURE_PAGE_MASK) != (m_draw_mode.mode_reg.bits & GPUDrawModeReg::TEXTURE_PAGE_MASK)); m_draw_mode.mode_reg.bits = new_mode_reg.bits; if (m_GPUSTAT.draw_to_displayed_field != new_mode_reg.draw_to_displayed_field) FlushRender(); // Bits 0..10 are returned in the GPU status register. m_GPUSTAT.bits = (m_GPUSTAT.bits & ~(GPUDrawModeReg::GPUSTAT_MASK)) | (ZeroExtend32(new_mode_reg.bits) & GPUDrawModeReg::GPUSTAT_MASK); m_GPUSTAT.texture_disable = m_draw_mode.mode_reg.texture_disable; } void GPU::SetTexturePalette(u16 value) { value &= DrawMode::PALETTE_MASK; if (m_draw_mode.palette_reg.bits == value) return; m_draw_mode.palette_reg.bits = value; m_draw_mode.texture_page_changed = true; } void GPU::SetTextureWindow(u32 value) { value &= DrawMode::TEXTURE_WINDOW_MASK; if (m_draw_mode.texture_window_value == value) return; FlushRender(); const u8 mask_x = Truncate8(value & UINT32_C(0x1F)); const u8 mask_y = Truncate8((value >> 5) & UINT32_C(0x1F)); const u8 offset_x = Truncate8((value >> 10) & UINT32_C(0x1F)); const u8 offset_y = Truncate8((value >> 15) & UINT32_C(0x1F)); Log_DebugPrintf("Set texture window %02X %02X %02X %02X", mask_x, mask_y, offset_x, offset_y); m_draw_mode.texture_window.and_x = ~(mask_x * 8); m_draw_mode.texture_window.and_y = ~(mask_y * 8); m_draw_mode.texture_window.or_x = (offset_x & mask_x) * 8u; m_draw_mode.texture_window.or_y = (offset_y & mask_y) * 8u; m_draw_mode.texture_window_value = value; m_draw_mode.texture_window_changed = true; } bool GPU::CompileDisplayPipelines(bool display, bool deinterlace, bool chroma_smoothing) { GPUShaderGen shadergen(g_gpu_device->GetRenderAPI(), g_gpu_device->GetFeatures().dual_source_blend, g_gpu_device->GetFeatures().framebuffer_fetch); GPUPipeline::GraphicsConfig plconfig; plconfig.input_layout.vertex_stride = 0; plconfig.primitive = GPUPipeline::Primitive::Triangles; plconfig.rasterization = GPUPipeline::RasterizationState::GetNoCullState(); plconfig.depth = GPUPipeline::DepthState::GetNoTestsState(); plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); plconfig.geometry_shader = nullptr; plconfig.depth_format = GPUTexture::Format::Unknown; plconfig.samples = 1; plconfig.per_sample_shading = false; plconfig.render_pass_flags = GPUPipeline::NoRenderPassFlags; if (display) { plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; plconfig.SetTargetFormats(g_gpu_device->HasSurface() ? g_gpu_device->GetWindowFormat() : GPUTexture::Format::RGBA8); std::string vs = shadergen.GenerateDisplayVertexShader(); std::string fs; switch (g_settings.display_scaling) { case DisplayScalingMode::BilinearSharp: fs = shadergen.GenerateDisplaySharpBilinearFragmentShader(); break; case DisplayScalingMode::BilinearSmooth: fs = shadergen.GenerateDisplayFragmentShader(true); break; case DisplayScalingMode::Nearest: case DisplayScalingMode::NearestInteger: default: fs = shadergen.GenerateDisplayFragmentShader(false); break; } std::unique_ptr vso = g_gpu_device->CreateShader(GPUShaderStage::Vertex, vs); std::unique_ptr fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, fs); if (!vso || !fso) return false; GL_OBJECT_NAME(vso, "Display Vertex Shader"); GL_OBJECT_NAME_FMT(fso, "Display Fragment Shader [{}]", Settings::GetDisplayScalingName(g_settings.display_scaling)); plconfig.vertex_shader = vso.get(); plconfig.fragment_shader = fso.get(); if (!(m_display_pipeline = g_gpu_device->CreatePipeline(plconfig))) return false; GL_OBJECT_NAME_FMT(m_display_pipeline, "Display Pipeline [{}]", Settings::GetDisplayScalingName(g_settings.display_scaling)); } if (deinterlace) { plconfig.SetTargetFormats(GPUTexture::Format::RGBA8); std::unique_ptr vso = g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GenerateScreenQuadVertexShader()); if (!vso) return false; GL_OBJECT_NAME(vso, "Deinterlace Vertex Shader"); std::unique_ptr fso; if (!(fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GenerateInterleavedFieldExtractFragmentShader()))) { return false; } GL_OBJECT_NAME(fso, "Deinterlace Field Extract Fragment Shader"); plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; plconfig.vertex_shader = vso.get(); plconfig.fragment_shader = fso.get(); if (!(m_deinterlace_extract_pipeline = g_gpu_device->CreatePipeline(plconfig))) return false; GL_OBJECT_NAME(m_deinterlace_extract_pipeline, "Deinterlace Field Extract Pipeline"); switch (g_settings.display_deinterlacing_mode) { case DisplayDeinterlacingMode::Disabled: break; case DisplayDeinterlacingMode::Weave: { if (!(fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GenerateDeinterlaceWeaveFragmentShader()))) { return false; } GL_OBJECT_NAME(fso, "Weave Deinterlace Fragment Shader"); plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; plconfig.vertex_shader = vso.get(); plconfig.fragment_shader = fso.get(); if (!(m_deinterlace_pipeline = g_gpu_device->CreatePipeline(plconfig))) return false; GL_OBJECT_NAME(m_deinterlace_pipeline, "Weave Deinterlace Pipeline"); } break; case DisplayDeinterlacingMode::Blend: { if (!(fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GenerateDeinterlaceBlendFragmentShader()))) { return false; } GL_OBJECT_NAME(fso, "Blend Deinterlace Fragment Shader"); plconfig.layout = GPUPipeline::Layout::MultiTextureAndPushConstants; plconfig.vertex_shader = vso.get(); plconfig.fragment_shader = fso.get(); if (!(m_deinterlace_pipeline = g_gpu_device->CreatePipeline(plconfig))) return false; GL_OBJECT_NAME(m_deinterlace_pipeline, "Blend Deinterlace Pipeline"); } break; case DisplayDeinterlacingMode::Adaptive: { fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GenerateFastMADReconstructFragmentShader()); if (!fso) return false; GL_OBJECT_NAME(fso, "FastMAD Reconstruct Fragment Shader"); plconfig.layout = GPUPipeline::Layout::MultiTextureAndPushConstants; plconfig.fragment_shader = fso.get(); if (!(m_deinterlace_pipeline = g_gpu_device->CreatePipeline(plconfig))) return false; GL_OBJECT_NAME(m_deinterlace_pipeline, "FastMAD Reconstruct Pipeline"); } break; default: UnreachableCode(); } } if (chroma_smoothing) { m_chroma_smoothing_pipeline.reset(); g_gpu_device->RecycleTexture(std::move(m_chroma_smoothing_texture)); if (g_settings.gpu_24bit_chroma_smoothing) { plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; plconfig.SetTargetFormats(GPUTexture::Format::RGBA8); std::unique_ptr vso = g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GenerateScreenQuadVertexShader()); std::unique_ptr fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GenerateChromaSmoothingFragmentShader()); if (!vso || !fso) return false; GL_OBJECT_NAME(vso, "Chroma Smoothing Vertex Shader"); GL_OBJECT_NAME(fso, "Chroma Smoothing Fragment Shader"); plconfig.vertex_shader = vso.get(); plconfig.fragment_shader = fso.get(); if (!(m_chroma_smoothing_pipeline = g_gpu_device->CreatePipeline(plconfig))) return false; GL_OBJECT_NAME(m_chroma_smoothing_pipeline, "Chroma Smoothing Pipeline"); } } return true; } void GPU::ClearDisplayTexture() { m_display_texture = nullptr; m_display_texture_view_x = 0; m_display_texture_view_y = 0; m_display_texture_view_width = 0; m_display_texture_view_height = 0; } void GPU::SetDisplayTexture(GPUTexture* texture, s32 view_x, s32 view_y, s32 view_width, s32 view_height) { DebugAssert(texture); m_display_texture = texture; m_display_texture_view_x = view_x; m_display_texture_view_y = view_y; m_display_texture_view_width = view_width; m_display_texture_view_height = view_height; } void GPU::SetDisplayTextureRect(s32 view_x, s32 view_y, s32 view_width, s32 view_height) { m_display_texture_view_x = view_x; m_display_texture_view_y = view_y; m_display_texture_view_width = view_width; m_display_texture_view_height = view_height; } void GPU::SetDisplayParameters(s32 display_width, s32 display_height, s32 active_left, s32 active_top, s32 active_width, s32 active_height, float display_aspect_ratio) { m_display_width = display_width; m_display_height = display_height; m_display_active_left = active_left; m_display_active_top = active_top; m_display_active_width = active_width; m_display_active_height = active_height; m_display_aspect_ratio = display_aspect_ratio; } bool GPU::PresentDisplay() { FlushRender(); if (!HasDisplayTexture()) return g_gpu_device->BeginPresent(false); const Common::Rectangle draw_rect = CalculateDrawRect(g_gpu_device->GetWindowWidth(), g_gpu_device->GetWindowHeight()); return RenderDisplay(nullptr, draw_rect, true); } bool GPU::RenderDisplay(GPUTexture* target, const Common::Rectangle& draw_rect, bool postfx) { GL_SCOPE_FMT("RenderDisplay: {}x{} at {},{}", draw_rect.left, draw_rect.top, draw_rect.GetWidth(), draw_rect.GetHeight()); if (m_display_texture) m_display_texture->MakeReadyForSampling(); bool texture_filter_linear = false; struct Uniforms { float src_rect[4]; float src_size[4]; float clamp_rect[4]; float params[4]; } uniforms; std::memset(uniforms.params, 0, sizeof(uniforms.params)); switch (g_settings.display_scaling) { case DisplayScalingMode::Nearest: case DisplayScalingMode::NearestInteger: break; case DisplayScalingMode::BilinearSmooth: texture_filter_linear = true; break; case DisplayScalingMode::BilinearSharp: { texture_filter_linear = true; uniforms.params[0] = std::max( std::floor(static_cast(draw_rect.GetWidth()) / static_cast(m_display_texture_view_width)), 1.0f); uniforms.params[1] = std::max( std::floor(static_cast(draw_rect.GetHeight()) / static_cast(m_display_texture_view_height)), 1.0f); uniforms.params[2] = 0.5f - 0.5f / uniforms.params[0]; uniforms.params[3] = 0.5f - 0.5f / uniforms.params[1]; } break; default: UnreachableCode(); break; } const GPUTexture::Format hdformat = target ? target->GetFormat() : g_gpu_device->GetWindowFormat(); const u32 target_width = target ? target->GetWidth() : g_gpu_device->GetWindowWidth(); const u32 target_height = target ? target->GetHeight() : g_gpu_device->GetWindowHeight(); const bool really_postfx = (postfx && HasDisplayTexture() && PostProcessing::IsActive() && !g_gpu_device->GetWindowInfo().IsSurfaceless() && hdformat != GPUTexture::Format::Unknown && target_width > 0 && target_height > 0 && PostProcessing::CheckTargets(hdformat, target_width, target_height)); const Common::Rectangle real_draw_rect = g_gpu_device->UsesLowerLeftOrigin() ? GPUDevice::FlipToLowerLeft(draw_rect, target_height) : draw_rect; if (really_postfx) { g_gpu_device->ClearRenderTarget(PostProcessing::GetInputTexture(), 0); g_gpu_device->SetRenderTarget(PostProcessing::GetInputTexture()); } else { if (target) g_gpu_device->SetRenderTarget(target); else if (!g_gpu_device->BeginPresent(false)) return false; } if (!HasDisplayTexture()) return true; g_gpu_device->SetPipeline(m_display_pipeline.get()); g_gpu_device->SetTextureSampler( 0, m_display_texture, texture_filter_linear ? g_gpu_device->GetLinearSampler() : g_gpu_device->GetNearestSampler()); // For bilinear, clamp to 0.5/SIZE-0.5 to avoid bleeding from the adjacent texels in VRAM. This is because // 1.0 in UV space is not the bottom-right texel, but a mix of the bottom-right and wrapped/next texel. const float rcp_width = 1.0f / static_cast(m_display_texture->GetWidth()); const float rcp_height = 1.0f / static_cast(m_display_texture->GetHeight()); uniforms.src_rect[0] = static_cast(m_display_texture_view_x) * rcp_width; uniforms.src_rect[1] = static_cast(m_display_texture_view_y) * rcp_height; uniforms.src_rect[2] = static_cast(m_display_texture_view_width) * rcp_width; uniforms.src_rect[3] = static_cast(m_display_texture_view_height) * rcp_height; uniforms.clamp_rect[0] = (static_cast(m_display_texture_view_x) + 0.5f) * rcp_width; uniforms.clamp_rect[1] = (static_cast(m_display_texture_view_y) + 0.5f) * rcp_height; uniforms.clamp_rect[2] = (static_cast(m_display_texture_view_x + m_display_texture_view_width) - 0.5f) * rcp_width; uniforms.clamp_rect[3] = (static_cast(m_display_texture_view_y + m_display_texture_view_height) - 0.5f) * rcp_height; uniforms.src_size[0] = static_cast(m_display_texture->GetWidth()); uniforms.src_size[1] = static_cast(m_display_texture->GetHeight()); uniforms.src_size[2] = rcp_width; uniforms.src_size[3] = rcp_height; g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); g_gpu_device->SetViewportAndScissor(real_draw_rect.left, real_draw_rect.top, real_draw_rect.GetWidth(), real_draw_rect.GetHeight()); g_gpu_device->Draw(3, 0); if (really_postfx) { return PostProcessing::Apply(target, real_draw_rect.left, real_draw_rect.top, real_draw_rect.GetWidth(), real_draw_rect.GetHeight(), m_display_texture_view_width, m_display_texture_view_height); } else { return true; } } void GPU::DestroyDeinterlaceTextures() { for (std::unique_ptr& tex : m_deinterlace_buffers) g_gpu_device->RecycleTexture(std::move(tex)); g_gpu_device->RecycleTexture(std::move(m_deinterlace_texture)); m_current_deinterlace_buffer = 0; } bool GPU::Deinterlace(GPUTexture* src, u32 x, u32 y, u32 width, u32 height, u32 field, u32 line_skip) { switch (g_settings.display_deinterlacing_mode) { case DisplayDeinterlacingMode::Disabled: { if (line_skip == 0) { SetDisplayTexture(src, x, y, width, height); return true; } // Still have to extract the field. if (!DeinterlaceExtractField(0, src, x, y, width, height, line_skip)) [[unlikely]] return false; SetDisplayTexture(m_deinterlace_buffers[0].get(), 0, 0, width, height); return true; } case DisplayDeinterlacingMode::Weave: { GL_SCOPE_FMT("DeinterlaceWeave({{{},{}}}, {}x{}, field={}, line_skip={})", x, y, width, height, field, line_skip); const u32 full_height = height * 2; if (!DeinterlaceSetTargetSize(width, full_height, true)) [[unlikely]] { ClearDisplayTexture(); return false; } src->MakeReadyForSampling(); g_gpu_device->SetRenderTarget(m_deinterlace_texture.get()); g_gpu_device->SetPipeline(m_deinterlace_pipeline.get()); g_gpu_device->SetTextureSampler(0, src, g_gpu_device->GetNearestSampler()); const u32 uniforms[] = {x, y, field, line_skip}; g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); g_gpu_device->SetViewportAndScissor(0, 0, width, full_height); g_gpu_device->Draw(3, 0); m_deinterlace_texture->MakeReadyForSampling(); SetDisplayTexture(m_deinterlace_texture.get(), 0, 0, width, full_height); return true; } case DisplayDeinterlacingMode::Blend: { constexpr u32 NUM_BLEND_BUFFERS = 2; GL_SCOPE_FMT("DeinterlaceBlend({{{},{}}}, {}x{}, field={}, line_skip={})", x, y, width, height, field, line_skip); const u32 this_buffer = m_current_deinterlace_buffer; m_current_deinterlace_buffer = (m_current_deinterlace_buffer + 1u) % NUM_BLEND_BUFFERS; GL_INS_FMT("Current buffer: {}", this_buffer); if (!DeinterlaceExtractField(this_buffer, src, x, y, width, height, line_skip) || !DeinterlaceSetTargetSize(width, height, false)) [[unlikely]] { ClearDisplayTexture(); return false; } // TODO: could be implemented with alpha blending instead.. g_gpu_device->InvalidateRenderTarget(m_deinterlace_texture.get()); g_gpu_device->SetRenderTarget(m_deinterlace_texture.get()); g_gpu_device->SetPipeline(m_deinterlace_pipeline.get()); g_gpu_device->SetTextureSampler(0, m_deinterlace_buffers[this_buffer].get(), g_gpu_device->GetNearestSampler()); g_gpu_device->SetTextureSampler(1, m_deinterlace_buffers[(this_buffer - 1) % NUM_BLEND_BUFFERS].get(), g_gpu_device->GetNearestSampler()); g_gpu_device->SetViewportAndScissor(0, 0, width, height); g_gpu_device->Draw(3, 0); m_deinterlace_texture->MakeReadyForSampling(); SetDisplayTexture(m_deinterlace_texture.get(), 0, 0, width, height); return true; } case DisplayDeinterlacingMode::Adaptive: { GL_SCOPE_FMT("DeinterlaceAdaptive({{{},{}}}, {}x{}, field={}, line_skip={})", x, y, width, height, field, line_skip); const u32 full_height = height * 2; const u32 this_buffer = m_current_deinterlace_buffer; m_current_deinterlace_buffer = (m_current_deinterlace_buffer + 1u) % DEINTERLACE_BUFFER_COUNT; GL_INS_FMT("Current buffer: {}", this_buffer); if (!DeinterlaceExtractField(this_buffer, src, x, y, width, height, line_skip) || !DeinterlaceSetTargetSize(width, full_height, false)) [[unlikely]] { ClearDisplayTexture(); return false; } g_gpu_device->SetRenderTarget(m_deinterlace_texture.get()); g_gpu_device->SetPipeline(m_deinterlace_pipeline.get()); g_gpu_device->SetTextureSampler(0, m_deinterlace_buffers[this_buffer].get(), g_gpu_device->GetNearestSampler()); g_gpu_device->SetTextureSampler(1, m_deinterlace_buffers[(this_buffer - 1) % DEINTERLACE_BUFFER_COUNT].get(), g_gpu_device->GetNearestSampler()); g_gpu_device->SetTextureSampler(2, m_deinterlace_buffers[(this_buffer - 2) % DEINTERLACE_BUFFER_COUNT].get(), g_gpu_device->GetNearestSampler()); g_gpu_device->SetTextureSampler(3, m_deinterlace_buffers[(this_buffer - 3) % DEINTERLACE_BUFFER_COUNT].get(), g_gpu_device->GetNearestSampler()); const u32 uniforms[] = {field, full_height}; g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); g_gpu_device->SetViewportAndScissor(0, 0, width, full_height); g_gpu_device->Draw(3, 0); m_deinterlace_texture->MakeReadyForSampling(); SetDisplayTexture(m_deinterlace_texture.get(), 0, 0, width, full_height); return true; } default: UnreachableCode(); } } bool GPU::DeinterlaceExtractField(u32 dst_bufidx, GPUTexture* src, u32 x, u32 y, u32 width, u32 height, u32 line_skip) { if (!m_deinterlace_buffers[dst_bufidx] || m_deinterlace_buffers[dst_bufidx]->GetWidth() != width || m_deinterlace_buffers[dst_bufidx]->GetHeight() != height) { if (!g_gpu_device->ResizeTexture(&m_deinterlace_buffers[dst_bufidx], width, height, GPUTexture::Type::RenderTarget, GPUTexture::Format::RGBA8, false)) [[unlikely]] { return false; } GL_OBJECT_NAME_FMT(m_deinterlace_buffers[dst_bufidx], "Blend Deinterlace Buffer {}", dst_bufidx); } GPUTexture* dst = m_deinterlace_buffers[dst_bufidx].get(); g_gpu_device->InvalidateRenderTarget(dst); // If we're not skipping lines, then we can simply copy the texture. if (line_skip == 0 && src->GetFormat() == dst->GetFormat()) { GL_INS_FMT("DeinterlaceExtractField({{{},{}}} {}x{} line_skip={}) => copy direct", x, y, width, height, line_skip); g_gpu_device->CopyTextureRegion(dst, 0, 0, 0, 0, src, x, y, 0, 0, width, height); } else { GL_SCOPE_FMT("DeinterlaceExtractField({{{},{}}} {}x{} line_skip={}) => shader copy", x, y, width, height, line_skip); // Otherwise, we need to extract every other line from the texture. src->MakeReadyForSampling(); g_gpu_device->SetRenderTarget(dst); g_gpu_device->SetPipeline(m_deinterlace_extract_pipeline.get()); g_gpu_device->SetTextureSampler(0, src, g_gpu_device->GetNearestSampler()); const u32 uniforms[] = {x, y, line_skip}; g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); g_gpu_device->SetViewportAndScissor(0, 0, width, height); g_gpu_device->Draw(3, 0); GL_POP(); } dst->MakeReadyForSampling(); return true; } bool GPU::DeinterlaceSetTargetSize(u32 width, u32 height, bool preserve) { if (!m_deinterlace_texture || m_deinterlace_texture->GetWidth() != width || m_deinterlace_texture->GetHeight() != height) { if (!g_gpu_device->ResizeTexture(&m_deinterlace_texture, width, height, GPUTexture::Type::RenderTarget, GPUTexture::Format::RGBA8, preserve)) [[unlikely]] { return false; } GL_OBJECT_NAME(m_deinterlace_texture, "Deinterlace target texture"); } return true; } bool GPU::ApplyChromaSmoothing(GPUTexture* src, u32 x, u32 y, u32 width, u32 height) { if (!m_chroma_smoothing_texture || m_chroma_smoothing_texture->GetWidth() != width || m_chroma_smoothing_texture->GetHeight() != height) { if (!g_gpu_device->ResizeTexture(&m_chroma_smoothing_texture, width, height, GPUTexture::Type::RenderTarget, GPUTexture::Format::RGBA8, false)) { ClearDisplayTexture(); return false; } GL_OBJECT_NAME(m_chroma_smoothing_texture, "Chroma smoothing texture"); } GL_SCOPE_FMT("ApplyChromaSmoothing({{{},{}}}, {}x{})", x, y, width, height); src->MakeReadyForSampling(); g_gpu_device->InvalidateRenderTarget(m_chroma_smoothing_texture.get()); g_gpu_device->SetRenderTarget(m_chroma_smoothing_texture.get()); g_gpu_device->SetPipeline(m_chroma_smoothing_pipeline.get()); g_gpu_device->SetTextureSampler(0, src, g_gpu_device->GetNearestSampler()); const u32 uniforms[] = {x, y, width - 1, height - 1}; g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); g_gpu_device->SetViewportAndScissor(0, 0, width, height); g_gpu_device->Draw(3, 0); m_chroma_smoothing_texture->MakeReadyForSampling(); SetDisplayTexture(m_chroma_smoothing_texture.get(), 0, 0, width, height); return true; } Common::Rectangle GPU::CalculateDrawRect(s32 window_width, s32 window_height, float* out_left_padding, float* out_top_padding, float* out_scale, float* out_x_scale, bool apply_aspect_ratio /* = true */) const { const float window_ratio = static_cast(window_width) / static_cast(window_height); const float x_scale = apply_aspect_ratio ? (m_display_aspect_ratio / (static_cast(m_display_width) / static_cast(m_display_height))) : 1.0f; const float display_width = g_settings.display_stretch_vertically ? static_cast(m_display_width) : static_cast(m_display_width) * x_scale; const float display_height = g_settings.display_stretch_vertically ? static_cast(m_display_height) / x_scale : static_cast(m_display_height); const float active_left = g_settings.display_stretch_vertically ? static_cast(m_display_active_left) : static_cast(m_display_active_left) * x_scale; const float active_top = g_settings.display_stretch_vertically ? static_cast(m_display_active_top) / x_scale : static_cast(m_display_active_top); const float active_width = g_settings.display_stretch_vertically ? static_cast(m_display_active_width) : static_cast(m_display_active_width) * x_scale; const float active_height = g_settings.display_stretch_vertically ? static_cast(m_display_active_height) / x_scale : static_cast(m_display_active_height); if (out_x_scale) *out_x_scale = x_scale; // now fit it within the window float scale; if ((display_width / display_height) >= window_ratio) { // align in middle vertically scale = static_cast(window_width) / display_width; if (g_settings.display_scaling == DisplayScalingMode::NearestInteger) scale = std::max(std::floor(scale), 1.0f); if (out_left_padding) { if (g_settings.display_scaling == DisplayScalingMode::NearestInteger) *out_left_padding = std::max((static_cast(window_width) - display_width * scale) / 2.0f, 0.0f); else *out_left_padding = 0.0f; } if (out_top_padding) { switch (g_settings.display_alignment) { case DisplayAlignment::RightOrBottom: *out_top_padding = std::max(static_cast(window_height) - (display_height * scale), 0.0f); break; case DisplayAlignment::Center: *out_top_padding = std::max((static_cast(window_height) - (display_height * scale)) / 2.0f, 0.0f); break; case DisplayAlignment::LeftOrTop: default: *out_top_padding = 0.0f; break; } } } else { // align in middle horizontally scale = static_cast(window_height) / display_height; if (g_settings.display_scaling == DisplayScalingMode::NearestInteger) scale = std::max(std::floor(scale), 1.0f); if (out_left_padding) { switch (g_settings.display_alignment) { case DisplayAlignment::RightOrBottom: *out_left_padding = std::max(static_cast(window_width) - (display_width * scale), 0.0f); break; case DisplayAlignment::Center: *out_left_padding = std::max((static_cast(window_width) - (display_width * scale)) / 2.0f, 0.0f); break; case DisplayAlignment::LeftOrTop: default: *out_left_padding = 0.0f; break; } } if (out_top_padding) { if (g_settings.display_scaling == DisplayScalingMode::NearestInteger) *out_top_padding = std::max((static_cast(window_height) - (display_height * scale)) / 2.0f, 0.0f); else *out_top_padding = 0.0f; } } if (out_scale) *out_scale = scale; return Common::Rectangle::FromExtents(active_left * scale, active_top * scale, active_width * scale, active_height * scale); } Common::Rectangle GPU::CalculateDrawRect(s32 window_width, s32 window_height, bool apply_aspect_ratio /* = true */) const { float left_padding, top_padding; const Common::Rectangle draw_rc = CalculateDrawRect(window_width, window_height, &left_padding, &top_padding, nullptr, nullptr, apply_aspect_ratio); // TODO: This should be a float rectangle. But because GL is lame, it only has integer viewports... return Common::Rectangle::FromExtents( static_cast(draw_rc.left + left_padding), static_cast(draw_rc.top + top_padding), static_cast(draw_rc.GetWidth()), static_cast(draw_rc.GetHeight())); } bool CompressAndWriteTextureToFile(u32 width, u32 height, std::string filename, FileSystem::ManagedCFilePtr fp, u8 quality, bool clear_alpha, bool flip_y, std::vector texture_data, u32 texture_data_stride, GPUTexture::Format texture_format, bool display_osd_message, bool use_thread) { std::string osd_key; if (display_osd_message) { // Use a 60 second timeout to give it plenty of time to actually save. osd_key = fmt::format("ScreenshotSaver_{}", filename); Host::AddIconOSDMessage(osd_key, ICON_FA_CAMERA, fmt::format(TRANSLATE_FS("GPU", "Saving screenshot to '{}'."), Path::GetFileName(filename)), 60.0f); } static constexpr auto proc = [](u32 width, u32 height, std::string filename, FileSystem::ManagedCFilePtr fp, u8 quality, bool clear_alpha, bool flip_y, std::vector texture_data, u32 texture_data_stride, GPUTexture::Format texture_format, std::string osd_key, bool use_thread) { bool result; const char* extension = std::strrchr(filename.c_str(), '.'); if (extension) { if (GPUTexture::ConvertTextureDataToRGBA8(width, height, texture_data, texture_data_stride, texture_format)) { if (clear_alpha) { for (u32& pixel : texture_data) pixel |= 0xFF000000u; } if (flip_y) GPUTexture::FlipTextureDataRGBA8(width, height, reinterpret_cast(texture_data.data()), texture_data_stride); Assert(texture_data_stride == sizeof(u32) * width); RGBA8Image image(width, height, std::move(texture_data)); if (image.SaveToFile(filename.c_str(), fp.get(), quality)) { result = true; } else { Log_ErrorPrintf("Unknown extension in filename '%s' or save error: '%s'", filename.c_str(), extension); result = false; } } else { result = false; } } else { Log_ErrorPrintf("Unable to determine file extension for '%s'", filename.c_str()); result = false; } if (!osd_key.empty()) { Host::AddIconOSDMessage(std::move(osd_key), ICON_FA_CAMERA, fmt::format(result ? TRANSLATE_FS("GS", "Saved screenshot to '{}'.") : TRANSLATE_FS("GPU", "Failed to save screenshot to '{}'."), Path::GetFileName(filename), result ? Host::OSD_INFO_DURATION : Host::OSD_ERROR_DURATION)); } if (use_thread) { // remove ourselves from the list, if the GS thread is waiting for us, we won't be in there const auto this_id = std::this_thread::get_id(); std::unique_lock lock(s_screenshot_threads_mutex); for (auto it = s_screenshot_threads.begin(); it != s_screenshot_threads.end(); ++it) { if (it->get_id() == this_id) { it->detach(); s_screenshot_threads.erase(it); break; } } } return result; }; if (!use_thread) { return proc(width, height, std::move(filename), std::move(fp), quality, clear_alpha, flip_y, std::move(texture_data), texture_data_stride, texture_format, std::move(osd_key), use_thread); } std::thread thread(proc, width, height, std::move(filename), std::move(fp), quality, clear_alpha, flip_y, std::move(texture_data), texture_data_stride, texture_format, std::move(osd_key), use_thread); std::unique_lock lock(s_screenshot_threads_mutex); s_screenshot_threads.push_back(std::move(thread)); return true; } void JoinScreenshotThreads() { std::unique_lock lock(s_screenshot_threads_mutex); while (!s_screenshot_threads.empty()) { std::thread save_thread(std::move(s_screenshot_threads.front())); s_screenshot_threads.pop_front(); lock.unlock(); save_thread.join(); lock.lock(); } } bool GPU::WriteDisplayTextureToFile(std::string filename, bool compress_on_thread /* = false */) { if (!m_display_texture) return false; const u32 read_x = static_cast(m_display_texture_view_x); const u32 read_y = static_cast(m_display_texture_view_y); const u32 read_width = static_cast(m_display_texture_view_width); const u32 read_height = static_cast(m_display_texture_view_height); const u32 texture_data_stride = Common::AlignUpPow2(GPUTexture::GetPixelSize(m_display_texture->GetFormat()) * read_width, 4); std::vector texture_data((texture_data_stride * read_height) / sizeof(u32)); std::unique_ptr dltex; if (g_gpu_device->GetFeatures().memory_import) { dltex = g_gpu_device->CreateDownloadTexture(read_width, read_height, m_display_texture->GetFormat(), texture_data.data(), texture_data.size() * sizeof(u32), texture_data_stride); } if (!dltex) { if (!(dltex = g_gpu_device->CreateDownloadTexture(read_width, read_height, m_display_texture->GetFormat()))) { Log_ErrorFmt("Failed to create {}x{} {} download texture", read_width, read_height, GPUTexture::GetFormatName(m_display_texture->GetFormat())); return false; } } dltex->CopyFromTexture(0, 0, m_display_texture, read_x, read_y, read_width, read_height, 0, 0, !dltex->IsImported()); if (!dltex->ReadTexels(0, 0, read_width, read_height, texture_data.data(), texture_data_stride)) { RestoreDeviceContext(); return false; } RestoreDeviceContext(); auto fp = FileSystem::OpenManagedCFile(filename.c_str(), "wb"); if (!fp) { Log_ErrorPrintf("Can't open file '%s': errno %d", filename.c_str(), errno); return false; } constexpr bool clear_alpha = true; const bool flip_y = g_gpu_device->UsesLowerLeftOrigin(); return CompressAndWriteTextureToFile( read_width, read_height, std::move(filename), std::move(fp), g_settings.display_screenshot_quality, clear_alpha, flip_y, std::move(texture_data), texture_data_stride, m_display_texture->GetFormat(), false, compress_on_thread); } bool GPU::RenderScreenshotToBuffer(u32 width, u32 height, const Common::Rectangle& draw_rect, bool postfx, std::vector* out_pixels, u32* out_stride, GPUTexture::Format* out_format) { const GPUTexture::Format hdformat = g_gpu_device->HasSurface() ? g_gpu_device->GetWindowFormat() : GPUTexture::Format::RGBA8; auto render_texture = g_gpu_device->FetchAutoRecycleTexture(width, height, 1, 1, 1, GPUTexture::Type::RenderTarget, hdformat); if (!render_texture) return false; g_gpu_device->ClearRenderTarget(render_texture.get(), 0); // TODO: this should use copy shader instead. RenderDisplay(render_texture.get(), draw_rect, postfx); const u32 stride = Common::AlignUpPow2(GPUTexture::GetPixelSize(hdformat) * width, sizeof(u32)); out_pixels->resize((height * stride) / sizeof(u32)); std::unique_ptr dltex; if (g_gpu_device->GetFeatures().memory_import) { dltex = g_gpu_device->CreateDownloadTexture(width, height, hdformat, out_pixels->data(), out_pixels->size() * sizeof(u32), stride); } if (!dltex) { if (!(dltex = g_gpu_device->CreateDownloadTexture(width, height, hdformat))) { Log_ErrorFmt("Failed to create {}x{} download texture", width, height); return false; } } dltex->CopyFromTexture(0, 0, render_texture.get(), 0, 0, width, height, 0, 0, false); if (!dltex->ReadTexels(0, 0, width, height, out_pixels->data(), stride)) { RestoreDeviceContext(); return false; } *out_stride = stride; *out_format = hdformat; RestoreDeviceContext(); return true; } bool GPU::RenderScreenshotToFile(std::string filename, DisplayScreenshotMode mode, u8 quality, bool compress_on_thread, bool show_osd_message) { u32 width = g_gpu_device->GetWindowWidth(); u32 height = g_gpu_device->GetWindowHeight(); Common::Rectangle draw_rect = CalculateDrawRect(width, height); const bool internal_resolution = (mode != DisplayScreenshotMode::ScreenResolution); if (internal_resolution && m_display_texture_view_width != 0 && m_display_texture_view_height != 0) { if (mode == DisplayScreenshotMode::InternalResolution) { const u32 draw_width = static_cast(draw_rect.GetWidth()); const u32 draw_height = static_cast(draw_rect.GetHeight()); // If internal res, scale the computed draw rectangle to the internal res. // We re-use the draw rect because it's already been AR corrected. const float sar = static_cast(m_display_texture_view_width) / static_cast(m_display_texture_view_height); const float dar = static_cast(draw_width) / static_cast(draw_height); if (sar >= dar) { // stretch height, preserve width const float scale = static_cast(m_display_texture_view_width) / static_cast(draw_width); width = m_display_texture_view_width; height = static_cast(std::round(static_cast(draw_height) * scale)); } else { // stretch width, preserve height const float scale = static_cast(m_display_texture_view_height) / static_cast(draw_height); width = static_cast(std::round(static_cast(draw_width) * scale)); height = m_display_texture_view_height; } // DX11 won't go past 16K texture size. const u32 max_texture_size = g_gpu_device->GetMaxTextureSize(); if (width > max_texture_size) { height = static_cast(static_cast(height) / (static_cast(width) / static_cast(max_texture_size))); width = max_texture_size; } if (height > max_texture_size) { height = max_texture_size; width = static_cast(static_cast(width) / (static_cast(height) / static_cast(max_texture_size))); } } else // if (mode == DisplayScreenshotMode::UncorrectedInternalResolution) { width = m_display_texture_view_width; height = m_display_texture_view_height; } // Remove padding, it's not part of the framebuffer. draw_rect.Set(0, 0, static_cast(width), static_cast(height)); } if (width == 0 || height == 0) return false; std::vector pixels; u32 pixels_stride; GPUTexture::Format pixels_format; if (!RenderScreenshotToBuffer(width, height, draw_rect, !internal_resolution, &pixels, &pixels_stride, &pixels_format)) { Log_ErrorPrintf("Failed to render %ux%u screenshot", width, height); return false; } // These filenames tend to be fairly long, so remove any MAX_PATH limit. auto fp = FileSystem::OpenManagedCFile(Path::RemoveLengthLimits(filename).c_str(), "wb"); if (!fp) { Log_ErrorPrintf("Can't open file '%s': errno %d", filename.c_str(), errno); return false; } return CompressAndWriteTextureToFile(width, height, std::move(filename), std::move(fp), quality, true, g_gpu_device->UsesLowerLeftOrigin(), std::move(pixels), pixels_stride, pixels_format, show_osd_message, compress_on_thread); } bool GPU::DumpVRAMToFile(const char* filename) { ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); const char* extension = std::strrchr(filename, '.'); if (extension && StringUtil::Strcasecmp(extension, ".png") == 0) { return DumpVRAMToFile(filename, VRAM_WIDTH, VRAM_HEIGHT, sizeof(u16) * VRAM_WIDTH, g_vram, true); } else if (extension && StringUtil::Strcasecmp(extension, ".bin") == 0) { return FileSystem::WriteBinaryFile(filename, g_vram, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16)); } else { Log_ErrorPrintf("Unknown extension: '%s'", filename); return false; } } bool GPU::DumpVRAMToFile(const char* filename, u32 width, u32 height, u32 stride, const void* buffer, bool remove_alpha) { RGBA8Image image(width, height); const char* ptr_in = static_cast(buffer); for (u32 row = 0; row < height; row++) { const char* row_ptr_in = ptr_in; u32* ptr_out = image.GetRowPixels(row); for (u32 col = 0; col < width; col++) { u16 src_col; std::memcpy(&src_col, row_ptr_in, sizeof(u16)); row_ptr_in += sizeof(u16); *(ptr_out++) = VRAMRGBA5551ToRGBA8888(remove_alpha ? (src_col | u16(0x8000)) : src_col); } ptr_in += stride; } return image.SaveToFile(filename); } void GPU::DrawDebugStateWindow() { const float framebuffer_scale = Host::GetOSDScale(); ImGui::SetNextWindowSize(ImVec2(450.0f * framebuffer_scale, 550.0f * framebuffer_scale), ImGuiCond_FirstUseEver); if (!ImGui::Begin("GPU", nullptr)) { ImGui::End(); return; } DrawRendererStats(); if (ImGui::CollapsingHeader("GPU", ImGuiTreeNodeFlags_DefaultOpen)) { static constexpr std::array state_strings = { {"Idle", "Reading VRAM", "Writing VRAM", "Drawing Polyline"}}; ImGui::Text("State: %s", state_strings[static_cast(m_blitter_state)]); ImGui::Text("Dither: %s", m_GPUSTAT.dither_enable ? "Enabled" : "Disabled"); ImGui::Text("Draw To Displayed Field: %s", m_GPUSTAT.draw_to_displayed_field ? "Enabled" : "Disabled"); ImGui::Text("Draw Set Mask Bit: %s", m_GPUSTAT.set_mask_while_drawing ? "Yes" : "No"); ImGui::Text("Draw To Masked Pixels: %s", m_GPUSTAT.check_mask_before_draw ? "Yes" : "No"); ImGui::Text("Reverse Flag: %s", m_GPUSTAT.reverse_flag ? "Yes" : "No"); ImGui::Text("Texture Disable: %s", m_GPUSTAT.texture_disable ? "Yes" : "No"); ImGui::Text("PAL Mode: %s", m_GPUSTAT.pal_mode ? "Yes" : "No"); ImGui::Text("Interrupt Request: %s", m_GPUSTAT.interrupt_request ? "Yes" : "No"); ImGui::Text("DMA Request: %s", m_GPUSTAT.dma_data_request ? "Yes" : "No"); } if (ImGui::CollapsingHeader("CRTC", ImGuiTreeNodeFlags_DefaultOpen)) { const auto& cs = m_crtc_state; ImGui::Text("Clock: %s", (m_console_is_pal ? (m_GPUSTAT.pal_mode ? "PAL-on-PAL" : "NTSC-on-PAL") : (m_GPUSTAT.pal_mode ? "PAL-on-NTSC" : "NTSC-on-NTSC"))); ImGui::Text("Horizontal Frequency: %.3f KHz", ComputeHorizontalFrequency() / 1000.0f); ImGui::Text("Vertical Frequency: %.3f Hz", ComputeVerticalFrequency()); ImGui::Text("Dot Clock Divider: %u", cs.dot_clock_divider); ImGui::Text("Vertical Interlace: %s (%s field)", m_GPUSTAT.vertical_interlace ? "Yes" : "No", cs.interlaced_field ? "odd" : "even"); ImGui::Text("Current Scanline: %u (tick %u)", cs.current_scanline, cs.current_tick_in_scanline); ImGui::Text("Display Disable: %s", m_GPUSTAT.display_disable ? "Yes" : "No"); ImGui::Text("Displaying Odd Lines: %s", cs.active_line_lsb ? "Yes" : "No"); ImGui::Text("Color Depth: %u-bit", m_GPUSTAT.display_area_color_depth_24 ? 24 : 15); ImGui::Text("Start Offset in VRAM: (%u, %u)", cs.regs.X.GetValue(), cs.regs.Y.GetValue()); ImGui::Text("Display Total: %u (%u) horizontal, %u vertical", cs.horizontal_total, cs.horizontal_total / cs.dot_clock_divider, cs.vertical_total); ImGui::Text("Configured Display Range: %u-%u (%u-%u), %u-%u", cs.regs.X1.GetValue(), cs.regs.X2.GetValue(), cs.regs.X1.GetValue() / cs.dot_clock_divider, cs.regs.X2.GetValue() / cs.dot_clock_divider, cs.regs.Y1.GetValue(), cs.regs.Y2.GetValue()); ImGui::Text("Output Display Range: %u-%u (%u-%u), %u-%u", cs.horizontal_display_start, cs.horizontal_display_end, cs.horizontal_display_start / cs.dot_clock_divider, cs.horizontal_display_end / cs.dot_clock_divider, cs.vertical_display_start, cs.vertical_display_end); ImGui::Text("Cropping: %s", Settings::GetDisplayCropModeName(g_settings.display_crop_mode)); ImGui::Text("Visible Display Range: %u-%u (%u-%u), %u-%u", cs.horizontal_visible_start, cs.horizontal_visible_end, cs.horizontal_visible_start / cs.dot_clock_divider, cs.horizontal_visible_end / cs.dot_clock_divider, cs.vertical_visible_start, cs.vertical_visible_end); ImGui::Text("Display Resolution: %ux%u", cs.display_width, cs.display_height); ImGui::Text("Display Origin: %u, %u", cs.display_origin_left, cs.display_origin_top); ImGui::Text("Displayed/Visible VRAM Portion: %ux%u @ (%u, %u)", cs.display_vram_width, cs.display_vram_height, cs.display_vram_left, cs.display_vram_top); ImGui::Text("Padding: Left=%d, Top=%d, Right=%d, Bottom=%d", cs.display_origin_left, cs.display_origin_top, cs.display_width - cs.display_vram_width - cs.display_origin_left, cs.display_height - cs.display_vram_height - cs.display_origin_top); } ImGui::End(); } void GPU::DrawRendererStats() { } void GPU::GetStatsString(SmallStringBase& str) { if (IsHardwareRenderer()) { str.format("{} HW | {} P | {} DC | {} B | {} RP | {} RB | {} C | {} W", GPUDevice::RenderAPIToString(g_gpu_device->GetRenderAPI()), m_stats.num_primitives, m_stats.host_num_draws, m_stats.host_num_barriers, m_stats.host_num_render_passes, m_stats.host_num_downloads, m_stats.num_copies, m_stats.num_writes); } else { str.format("{} SW | {} P | {} R | {} C | {} W", GPUDevice::RenderAPIToString(g_gpu_device->GetRenderAPI()), m_stats.num_primitives, m_stats.num_reads, m_stats.num_copies, m_stats.num_writes); } } void GPU::GetMemoryStatsString(SmallStringBase& str) { const u32 vram_usage_mb = static_cast((g_gpu_device->GetVRAMUsage() + (1048576 - 1)) / 1048576); const u32 stream_kb = static_cast((m_stats.host_buffer_streamed + (1024 - 1)) / 1024); str.format("{} MB VRAM | {} KB STR | {} TC | {} TU", vram_usage_mb, stream_kb, m_stats.host_num_copies, m_stats.host_num_uploads); } void GPU::ResetStatistics() { m_counters = {}; g_gpu_device->ResetStatistics(); } void GPU::UpdateStatistics(u32 frame_count) { const GPUDevice::Statistics& stats = g_gpu_device->GetStatistics(); const u32 round = (frame_count - 1); #define UPDATE_COUNTER(x) m_stats.x = (m_counters.x + round) / frame_count #define UPDATE_GPU_STAT(x) m_stats.host_##x = (stats.x + round) / frame_count UPDATE_COUNTER(num_reads); UPDATE_COUNTER(num_writes); UPDATE_COUNTER(num_copies); UPDATE_COUNTER(num_vertices); UPDATE_COUNTER(num_primitives); // UPDATE_COUNTER(num_read_texture_updates); // UPDATE_COUNTER(num_ubo_updates); UPDATE_GPU_STAT(buffer_streamed); UPDATE_GPU_STAT(num_draws); UPDATE_GPU_STAT(num_barriers); UPDATE_GPU_STAT(num_render_passes); UPDATE_GPU_STAT(num_copies); UPDATE_GPU_STAT(num_downloads); UPDATE_GPU_STAT(num_uploads); #undef UPDATE_GPU_STAT #undef UPDATE_COUNTER ResetStatistics(); }