diff --git a/src/core/fullscreen_ui.cpp b/src/core/fullscreen_ui.cpp index 9eafa8baa..9cc7f5b18 100644 --- a/src/core/fullscreen_ui.cpp +++ b/src/core/fullscreen_ui.cpp @@ -2778,6 +2778,9 @@ void FullscreenUI::DrawInterfaceSettingsPage() FSUI_CSTR("Shows the number of frames (or v-syncs) displayed per second by the system in the top-right " "corner of the display."), "Display", "ShowFPS", false); + DrawToggleSetting(bsi, FSUI_ICONSTR(ICON_FA_BARS, "Show GPU Statistics"), + FSUI_CSTR("Shows information about the emulated GPU in the top-right corner of the display."), + "Display", "ShowGPUStatistics", false); DrawToggleSetting(bsi, FSUI_ICONSTR(ICON_FA_BATTERY_HALF, "Show CPU Usage"), FSUI_CSTR("Shows the host's CPU usage based on threads in the top-right corner of the display."), "Display", "ShowCPU", false); diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp index eb289f062..5bad2255d 100644 --- a/src/core/gpu.cpp +++ b/src/core/gpu.cpp @@ -21,6 +21,7 @@ #include "common/file_system.h" #include "common/heap_array.h" #include "common/log.h" +#include "common/small_string.h" #include "common/string_util.h" #include "stb_image_resize.h" @@ -35,7 +36,10 @@ std::unique_ptr g_gpu; const GPU::GP0CommandHandlerTable GPU::s_GP0_command_handler_table = GPU::GenerateGP0CommandHandlerTable(); -GPU::GPU() = default; +GPU::GPU() +{ + ResetStatistics(); +} GPU::~GPU() { @@ -66,7 +70,7 @@ bool GPU::Initialize() return false; } - g_gpu_device->SetGPUTimingEnabled(g_settings.display_show_gpu); + g_gpu_device->SetGPUTimingEnabled(g_settings.display_show_gpu_usage); return true; } @@ -93,7 +97,7 @@ void GPU::UpdateSettings(const Settings& old_settings) Panic("Failed to compile display pipeline on settings change."); } - g_gpu_device->SetGPUTimingEnabled(g_settings.display_show_gpu); + g_gpu_device->SetGPUTimingEnabled(g_settings.display_show_gpu_usage); } void GPU::CPUClockChanged() @@ -2229,59 +2233,7 @@ void GPU::DrawDebugStateWindow() return; } - const bool is_idle_frame = m_stats.num_polygons == 0; - if (!is_idle_frame) - { - m_last_stats = m_stats; - m_stats = {}; - } - - if (ImGui::CollapsingHeader("Statistics", ImGuiTreeNodeFlags_DefaultOpen)) - { - const Stats& stats = m_last_stats; - - ImGui::Columns(2); - ImGui::SetColumnWidth(0, 200.0f * framebuffer_scale); - - ImGui::TextUnformatted("Idle Frame: "); - ImGui::NextColumn(); - ImGui::Text("%s", is_idle_frame ? "Yes" : "No"); - ImGui::NextColumn(); - - ImGui::TextUnformatted("VRAM Reads: "); - ImGui::NextColumn(); - ImGui::Text("%u", stats.num_vram_reads); - ImGui::NextColumn(); - - ImGui::TextUnformatted("VRAM Fills: "); - ImGui::NextColumn(); - ImGui::Text("%u", stats.num_vram_fills); - ImGui::NextColumn(); - - ImGui::TextUnformatted("VRAM Writes: "); - ImGui::NextColumn(); - ImGui::Text("%u", stats.num_vram_writes); - ImGui::NextColumn(); - - ImGui::TextUnformatted("VRAM Copies: "); - ImGui::NextColumn(); - ImGui::Text("%u", stats.num_vram_copies); - ImGui::NextColumn(); - - ImGui::TextUnformatted("Vertices Processed: "); - ImGui::NextColumn(); - ImGui::Text("%u", stats.num_vertices); - ImGui::NextColumn(); - - ImGui::TextUnformatted("Polygons Drawn: "); - ImGui::NextColumn(); - ImGui::Text("%u", stats.num_polygons); - ImGui::NextColumn(); - - ImGui::Columns(1); - } - - DrawRendererStats(is_idle_frame); + DrawRendererStats(); if (ImGui::CollapsingHeader("GPU", ImGuiTreeNodeFlags_DefaultOpen)) { @@ -2339,6 +2291,67 @@ void GPU::DrawDebugStateWindow() ImGui::End(); } -void GPU::DrawRendererStats(bool is_idle_frame) +void GPU::DrawRendererStats() { } + +void GPU::GetStatsString(SmallStringBase& str) +{ + if (IsHardwareRenderer()) + { + str.format("{} HW | {} P | {} DC | {} RP | {} RB | {} C | {} W", + GPUDevice::RenderAPIToString(g_gpu_device->GetRenderAPI()), m_stats.num_primitives, + m_stats.host_num_draws, m_stats.host_num_render_passes, m_stats.num_reads, m_stats.num_copies, + m_stats.num_writes); + } + else + { + str.format("{} SW | {} P | {} R | {} C | {} W", GPUDevice::RenderAPIToString(g_gpu_device->GetRenderAPI()), + m_stats.num_primitives, m_stats.num_reads, m_stats.num_copies, m_stats.num_writes); + } +} + +void GPU::GetMemoryStatsString(SmallStringBase& str) +{ + const u32 vram_usage_mb = static_cast((g_gpu_device->GetVRAMUsage() + (1048576 - 1)) / 1048576); + const u32 stream_kb = static_cast((m_stats.host_buffer_streamed + (1024 - 1)) / 1024); + + str.format("{} MB VRAM | {} KB STR | {} TC | {} TU", vram_usage_mb, stream_kb, m_stats.host_num_copies, + m_stats.host_num_uploads); +} + +void GPU::ResetStatistics() +{ + m_counters = {}; + g_gpu_device->ResetStatistics(); +} + +void GPU::UpdateStatistics(u32 frame_count) +{ + const GPUDevice::Statistics& stats = g_gpu_device->GetStatistics(); + const u32 round = (frame_count - 1); + +#define UPDATE_COUNTER(x) m_stats.x = (m_counters.x + round) / frame_count +#define UPDATE_GPU_STAT(x) m_stats.host_##x = (stats.x + round) / frame_count + + UPDATE_COUNTER(num_reads); + UPDATE_COUNTER(num_writes); + UPDATE_COUNTER(num_copies); + UPDATE_COUNTER(num_vertices); + UPDATE_COUNTER(num_primitives); + + // UPDATE_COUNTER(num_read_texture_updates); + // UPDATE_COUNTER(num_ubo_updates); + + UPDATE_GPU_STAT(buffer_streamed); + UPDATE_GPU_STAT(num_draws); + UPDATE_GPU_STAT(num_render_passes); + UPDATE_GPU_STAT(num_copies); + UPDATE_GPU_STAT(num_downloads); + UPDATE_GPU_STAT(num_uploads); + +#undef UPDATE_GPU_STAT +#undef UPDATE_COUNTER + + ResetStatistics(); +} diff --git a/src/core/gpu.h b/src/core/gpu.h index 360aee00c..42165fd3d 100644 --- a/src/core/gpu.h +++ b/src/core/gpu.h @@ -20,6 +20,8 @@ #include #include +class SmallStringBase; + class StateWrapper; class GPUDevice; @@ -99,6 +101,10 @@ public: // Render statistics debug window. void DrawDebugStateWindow(); + void GetStatsString(SmallStringBase& str); + void GetMemoryStatsString(SmallStringBase& str); + void ResetStatistics(); + void UpdateStatistics(u32 frame_count); void CPUClockChanged(); @@ -308,7 +314,7 @@ protected: virtual void DispatchRenderCommand(); virtual void ClearDisplay(); virtual void UpdateDisplay(); - virtual void DrawRendererStats(bool is_idle_frame); + virtual void DrawRendererStats(); ALWAYS_INLINE void AddDrawTriangleTicks(s32 x1, s32 y1, s32 x2, s32 y2, s32 x3, s32 y3, bool shaded, bool textured, bool semitransparent) @@ -589,17 +595,30 @@ protected: s32 m_display_texture_view_width = 0; s32 m_display_texture_view_height = 0; - struct Stats + struct Counters { - u32 num_vram_reads; - u32 num_vram_fills; - u32 num_vram_writes; - u32 num_vram_copies; + u32 num_reads; + u32 num_writes; + u32 num_copies; u32 num_vertices; - u32 num_polygons; + u32 num_primitives; + + // u32 num_read_texture_updates; + // u32 num_ubo_updates; }; + + struct Stats : Counters + { + size_t host_buffer_streamed; + u32 host_num_draws; + u32 host_num_render_passes; + u32 host_num_copies; + u32 host_num_downloads; + u32 host_num_uploads; + }; + + Counters m_counters = {}; Stats m_stats = {}; - Stats m_last_stats = {}; private: bool CompileDisplayPipeline(); diff --git a/src/core/gpu_commands.cpp b/src/core/gpu_commands.cpp index 8ed376bd3..33b60c9bf 100644 --- a/src/core/gpu_commands.cpp +++ b/src/core/gpu_commands.cpp @@ -352,8 +352,8 @@ bool GPU::HandleRenderPolygonCommand() SetTexturePalette(Truncate16(FifoPeek(2) >> 16)); } - m_stats.num_vertices += num_vertices; - m_stats.num_polygons++; + m_counters.num_vertices += num_vertices; + m_counters.num_primitives++; m_render_command.bits = rc.bits; m_fifo.RemoveOne(); @@ -384,8 +384,8 @@ bool GPU::HandleRenderRectangleCommand() rc.texture_enable ? "textured" : "non-textured", rc.shading_enable ? "shaded" : "monochrome", total_words, setup_ticks); - m_stats.num_vertices++; - m_stats.num_polygons++; + m_counters.num_vertices++; + m_counters.num_primitives++; m_render_command.bits = rc.bits; m_fifo.RemoveOne(); @@ -406,8 +406,8 @@ bool GPU::HandleRenderLineCommand() Log_TracePrintf("Render %s %s line (%u total words)", rc.transparency_enable ? "semi-transparent" : "opaque", rc.shading_enable ? "shaded" : "monochrome", total_words); - m_stats.num_vertices += 2; - m_stats.num_polygons++; + m_counters.num_vertices += 2; + m_counters.num_primitives++; m_render_command.bits = rc.bits; m_fifo.RemoveOne(); @@ -468,7 +468,7 @@ bool GPU::HandleFillRectangleCommand() if (width > 0 && height > 0) FillVRAM(dst_x, dst_y, width, height, color); - m_stats.num_vram_fills++; + m_counters.num_writes++; AddCommandTicks(46 + ((width / 8) + 9) * height); EndCommand(); return true; @@ -552,10 +552,10 @@ void GPU::FinishVRAMWrite() } } + m_counters.num_writes++; m_blit_buffer.clear(); m_vram_transfer = {}; m_blitter_state = BlitterState::Idle; - m_stats.num_vram_writes++; } bool GPU::HandleCopyRectangleVRAMToCPUCommand() @@ -586,7 +586,6 @@ bool GPU::HandleCopyRectangleVRAMToCPUCommand() } // switch to pixel-by-pixel read state - m_stats.num_vram_reads++; m_blitter_state = BlitterState::ReadingVRAM; m_command_total_words = 0; return true; @@ -612,11 +611,12 @@ bool GPU::HandleCopyRectangleVRAMToVRAMCommand() width == 0 || height == 0 || (src_x == dst_x && src_y == dst_y && !m_GPUSTAT.set_mask_while_drawing); if (!skip_copy) { + m_counters.num_copies++; + FlushRender(); CopyVRAM(src_x, src_y, dst_x, dst_y, width, height); } - m_stats.num_vram_copies++; AddCommandTicks(width * height * 2); EndCommand(); return true; diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index f76e611b0..cc52c03e7 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -1262,7 +1262,7 @@ void GPU_HW::UpdateVRAMReadTexture(bool drawn, bool written) scaled_rect.GetWidth(), scaled_rect.GetHeight()); } - m_renderer_stats.num_vram_read_texture_updates++; + // m_counters.num_read_texture_updates++; rect.SetInvalid(); }; @@ -2797,7 +2797,7 @@ void GPU_HW::FlushRender() if (m_batch_ubo_dirty) { g_gpu_device->UploadUniformBuffer(&m_batch_ubo_data, sizeof(m_batch_ubo_data)); - m_renderer_stats.num_uniform_buffer_updates++; + // m_counters.num_ubo_updates++; m_batch_ubo_dirty = false; } @@ -2805,20 +2805,17 @@ void GPU_HW::FlushRender() { if (NeedsTwoPassRendering()) { - m_renderer_stats.num_batches += 2; DrawBatchVertices(BatchRenderMode::OnlyOpaque, vertex_count, m_batch_base_vertex); DrawBatchVertices(BatchRenderMode::OnlyTransparent, vertex_count, m_batch_base_vertex); } else { - m_renderer_stats.num_batches++; DrawBatchVertices(m_batch.GetRenderMode(), vertex_count, m_batch_base_vertex); } } if (m_wireframe_mode != GPUWireframeMode::Disabled) { - m_renderer_stats.num_batches++; g_gpu_device->SetPipeline(m_wireframe_pipeline.get()); g_gpu_device->Draw(vertex_count, m_batch_base_vertex); } @@ -3094,19 +3091,12 @@ void GPU_HW::DownsampleFramebufferBoxFilter(GPUTexture* source, u32 left, u32 to SetDisplayTexture(m_downsample_texture.get(), 0, 0, ds_width, ds_height); } -void GPU_HW::DrawRendererStats(bool is_idle_frame) +void GPU_HW::DrawRendererStats() { - if (!is_idle_frame) - { - m_last_renderer_stats = m_renderer_stats; - m_renderer_stats = {}; - } - if (ImGui::CollapsingHeader("Renderer Statistics", ImGuiTreeNodeFlags_DefaultOpen)) { static const ImVec4 active_color{1.0f, 1.0f, 1.0f, 1.0f}; static const ImVec4 inactive_color{0.4f, 0.4f, 0.4f, 1.0f}; - const auto& stats = m_last_renderer_stats; ImGui::Columns(2); ImGui::SetColumnWidth(0, 200.0f * Host::GetOSDScale()); @@ -3158,21 +3148,6 @@ void GPU_HW::DrawRendererStats(bool is_idle_frame) "Cache"); ImGui::NextColumn(); - ImGui::TextUnformatted("Batches Drawn:"); - ImGui::NextColumn(); - ImGui::Text("%u", stats.num_batches); - ImGui::NextColumn(); - - ImGui::TextUnformatted("VRAM Read Texture Updates:"); - ImGui::NextColumn(); - ImGui::Text("%u", stats.num_vram_read_texture_updates); - ImGui::NextColumn(); - - ImGui::TextUnformatted("Uniform Buffer Updates: "); - ImGui::NextColumn(); - ImGui::Text("%u", stats.num_uniform_buffer_updates); - ImGui::NextColumn(); - ImGui::Columns(1); } } diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index a3a27d3cd..ad996b7b3 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -188,7 +188,7 @@ private: void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; void DispatchRenderCommand() override; void FlushRender() override; - void DrawRendererStats(bool is_idle_frame) override; + void DrawRendererStats() override; bool BlitVRAMReplacementTexture(const TextureReplacementTexture* tex, u32 dst_x, u32 dst_y, u32 width, u32 height); @@ -297,8 +297,4 @@ private: std::unique_ptr m_downsample_lod_sampler; std::unique_ptr m_downsample_composite_sampler; u32 m_downsample_scale_or_levels = 0; - - // Statistics - RendererStats m_renderer_stats = {}; - RendererStats m_last_renderer_stats = {}; }; diff --git a/src/core/imgui_overlays.cpp b/src/core/imgui_overlays.cpp index e2f931b37..0561b3553 100644 --- a/src/core/imgui_overlays.cpp +++ b/src/core/imgui_overlays.cpp @@ -253,8 +253,8 @@ void ImGuiManager::FormatProcessorStat(SmallStringBase& text, double usage, doub void ImGuiManager::DrawPerformanceOverlay() { - if (!(g_settings.display_show_fps || g_settings.display_show_speed || g_settings.display_show_resolution || - g_settings.display_show_cpu || + if (!(g_settings.display_show_fps || g_settings.display_show_speed || g_settings.display_show_gpu_stats || + g_settings.display_show_resolution || g_settings.display_show_cpu_usage || (g_settings.display_show_status_indicators && (System::IsPaused() || System::IsFastForwardEnabled() || System::IsTurboEnabled())))) { @@ -322,6 +322,15 @@ void ImGuiManager::DrawPerformanceOverlay() DRAW_LINE(fixed_font, text, color); } + if (g_settings.display_show_gpu_stats) + { + g_gpu->GetStatsString(text); + DRAW_LINE(fixed_font, text, IM_COL32(255, 255, 255, 255)); + + g_gpu->GetMemoryStatsString(text); + DRAW_LINE(fixed_font, text, IM_COL32(255, 255, 255, 255)); + } + if (g_settings.display_show_resolution) { // TODO: this seems wrong? @@ -333,7 +342,7 @@ void ImGuiManager::DrawPerformanceOverlay() DRAW_LINE(fixed_font, text, IM_COL32(255, 255, 255, 255)); } - if (g_settings.display_show_cpu) + if (g_settings.display_show_cpu_usage) { text.format("{:.2f}ms | {:.2f}ms | {:.2f}ms", System::GetMinimumFrameTime(), System::GetAverageFrameTime(), System::GetMaximumFrameTime()); @@ -405,16 +414,10 @@ void ImGuiManager::DrawPerformanceOverlay() #endif } - if (g_settings.display_show_gpu) + if (g_settings.display_show_gpu_usage && g_gpu_device->IsGPUTimingEnabled()) { - if (g_gpu_device->IsGPUTimingEnabled()) - { - text.assign("GPU: "); - FormatProcessorStat(text, System::GetGPUUsage(), System::GetGPUAverageTime()); - DRAW_LINE(fixed_font, text, IM_COL32(255, 255, 255, 255)); - } - - text.format("VRAM: {} MB", (g_gpu_device->GetVRAMUsage() + (1048576 - 1)) / 1048576); + text.assign("GPU: "); + FormatProcessorStat(text, System::GetGPUUsage(), System::GetGPUAverageTime()); DRAW_LINE(fixed_font, text, IM_COL32(255, 255, 255, 255)); } @@ -525,10 +528,14 @@ void ImGuiManager::DrawEnhancementsOverlay() { text.append_format(" {}x{}", g_settings.gpu_multisamples, g_settings.gpu_per_sample_shading ? "SSAA" : "MSAA"); } - if (g_settings.gpu_true_color) { - if (g_settings.gpu_debanding) { + if (g_settings.gpu_true_color) + { + if (g_settings.gpu_debanding) + { text.append(" TrueColDeband"); - } else { + } + else + { text.append(" TrueCol"); } } diff --git a/src/core/settings.cpp b/src/core/settings.cpp index 18a6c4db5..cc9b94aaf 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp @@ -253,9 +253,10 @@ void Settings::Load(SettingsInterface& si) display_show_osd_messages = si.GetBoolValue("Display", "ShowOSDMessages", true); display_show_fps = si.GetBoolValue("Display", "ShowFPS", false); display_show_speed = si.GetBoolValue("Display", "ShowSpeed", false); + display_show_gpu_stats = si.GetBoolValue("Display", "ShowGPUStatistics", false); display_show_resolution = si.GetBoolValue("Display", "ShowResolution", false); - display_show_cpu = si.GetBoolValue("Display", "ShowCPU", false); - display_show_gpu = si.GetBoolValue("Display", "ShowGPU", false); + display_show_cpu_usage = si.GetBoolValue("Display", "ShowCPU", false); + display_show_gpu_usage = si.GetBoolValue("Display", "ShowGPU", false); display_show_frame_times = si.GetBoolValue("Display", "ShowFrameTimes", false); display_show_status_indicators = si.GetBoolValue("Display", "ShowStatusIndicators", true); display_show_inputs = si.GetBoolValue("Display", "ShowInputs", false); @@ -495,8 +496,9 @@ void Settings::Save(SettingsInterface& si) const si.SetBoolValue("Display", "ShowFPS", display_show_fps); si.SetBoolValue("Display", "ShowSpeed", display_show_speed); si.SetBoolValue("Display", "ShowResolution", display_show_resolution); - si.SetBoolValue("Display", "ShowCPU", display_show_cpu); - si.SetBoolValue("Display", "ShowGPU", display_show_gpu); + si.SetBoolValue("Display", "ShowGPUStatistics", display_show_gpu_stats); + si.SetBoolValue("Display", "ShowCPU", display_show_cpu_usage); + si.SetBoolValue("Display", "ShowGPU", display_show_gpu_usage); si.SetBoolValue("Display", "ShowFrameTimes", display_show_frame_times); si.SetBoolValue("Display", "ShowStatusIndicators", display_show_status_indicators); si.SetBoolValue("Display", "ShowInputs", display_show_inputs); diff --git a/src/core/settings.h b/src/core/settings.h index 7924a0ce6..0b1246142 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -144,9 +144,10 @@ struct Settings bool display_show_osd_messages = true; bool display_show_fps = false; bool display_show_speed = false; + bool display_show_gpu_stats = false; bool display_show_resolution = false; - bool display_show_cpu = false; - bool display_show_gpu = false; + bool display_show_cpu_usage = false; + bool display_show_gpu_usage = false; bool display_show_frame_times = false; bool display_show_status_indicators = true; bool display_show_inputs = false; diff --git a/src/core/system.cpp b/src/core/system.cpp index 044566e09..33cfe87b4 100644 --- a/src/core/system.cpp +++ b/src/core/system.cpp @@ -952,9 +952,10 @@ void System::SetDefaultSettings(SettingsInterface& si) temp.display_show_osd_messages = g_settings.display_show_osd_messages; temp.display_show_fps = g_settings.display_show_fps; temp.display_show_speed = g_settings.display_show_speed; + temp.display_show_gpu_stats = g_settings.display_show_gpu_stats; temp.display_show_resolution = g_settings.display_show_resolution; - temp.display_show_cpu = g_settings.display_show_cpu; - temp.display_show_gpu = g_settings.display_show_gpu; + temp.display_show_cpu_usage = g_settings.display_show_cpu_usage; + temp.display_show_gpu_usage = g_settings.display_show_gpu_usage; temp.display_show_frame_times = g_settings.display_show_frame_times; // keep controller, we reset it elsewhere @@ -2517,7 +2518,8 @@ void System::UpdatePerformanceCounters() if (time < PERFORMANCE_COUNTER_UPDATE_INTERVAL) return; - const float frames_run = static_cast(s_frame_number - s_last_frame_number); + const u32 frames_run = s_frame_number - s_last_frame_number; + const float frames_runf = static_cast(frames_run); const u32 global_tick_counter = TimingEvents::GetGlobalTickCounter(); // TODO: Make the math here less rubbish @@ -2525,13 +2527,13 @@ void System::UpdatePerformanceCounters() 100.0 * (1.0 / ((static_cast(ticks_diff) * static_cast(Threading::GetThreadTicksPerSecond())) / Common::Timer::GetFrequency() / 1000000000.0)); const double time_divider = 1000.0 * (1.0 / static_cast(Threading::GetThreadTicksPerSecond())) * - (1.0 / static_cast(frames_run)); + (1.0 / static_cast(frames_runf)); s_minimum_frame_time = std::exchange(s_minimum_frame_time_accumulator, 0.0f); - s_average_frame_time = std::exchange(s_average_frame_time_accumulator, 0.0f) / frames_run; + s_average_frame_time = std::exchange(s_average_frame_time_accumulator, 0.0f) / frames_runf; s_maximum_frame_time = std::exchange(s_maximum_frame_time_accumulator, 0.0f); - s_vps = static_cast(frames_run / time); + s_vps = static_cast(frames_runf / time); s_last_frame_number = s_frame_number; s_fps = static_cast(s_internal_frame_number - s_last_internal_frame_number) / time; s_last_internal_frame_number = s_internal_frame_number; @@ -2563,6 +2565,9 @@ void System::UpdatePerformanceCounters() s_accumulated_gpu_time = 0.0f; s_presents_since_last_update = 0; + if (g_settings.display_show_gpu_stats) + g_gpu->UpdateStatistics(frames_run); + Log_VerbosePrintf("FPS: %.2f VPS: %.2f CPU: %.2f GPU: %.2f Average: %.2fms Min: %.2fms Max: %.2f ms", s_fps, s_vps, s_cpu_thread_usage, s_gpu_usage, s_average_frame_time, s_minimum_frame_time, s_maximum_frame_time); @@ -3639,7 +3644,7 @@ void System::CheckForSettingsChanges(const Settings& old_settings) g_settings.display_aspect_ratio != old_settings.display_aspect_ratio || g_settings.display_alignment != old_settings.display_alignment || g_settings.display_scaling != old_settings.display_scaling || - g_settings.display_show_gpu != old_settings.display_show_gpu || + g_settings.display_show_gpu_usage != old_settings.display_show_gpu_usage || g_settings.gpu_pgxp_enable != old_settings.gpu_pgxp_enable || g_settings.gpu_pgxp_texture_correction != old_settings.gpu_pgxp_texture_correction || g_settings.gpu_pgxp_color_correction != old_settings.gpu_pgxp_color_correction || @@ -3678,6 +3683,9 @@ void System::CheckForSettingsChanges(const Settings& old_settings) CPU::CodeCache::Reset(); } + if (g_settings.display_show_gpu_stats != old_settings.display_show_gpu_stats) + g_gpu->ResetStatistics(); + if (g_settings.cdrom_readahead_sectors != old_settings.cdrom_readahead_sectors) CDROM::SetReadaheadSectors(g_settings.cdrom_readahead_sectors); diff --git a/src/duckstation-qt/advancedsettingswidget.cpp b/src/duckstation-qt/advancedsettingswidget.cpp index ec3395361..14d7d3140 100644 --- a/src/duckstation-qt/advancedsettingswidget.cpp +++ b/src/duckstation-qt/advancedsettingswidget.cpp @@ -273,6 +273,7 @@ void AdvancedSettingsWidget::addTweakOptions() addBooleanTweakOption(m_dialog, m_ui.tweakOptionTable, tr("Show Status Indicators"), "Display", "ShowStatusIndicators", true); addBooleanTweakOption(m_dialog, m_ui.tweakOptionTable, tr("Show Frame Times"), "Display", "ShowFrameTimes", false); + addBooleanTweakOption(m_dialog, m_ui.tweakOptionTable, tr("Show Settings Overlay"), "Display", "ShowEnhancements", false); addBooleanTweakOption(m_dialog, m_ui.tweakOptionTable, tr("Apply Compatibility Settings"), "Main", "ApplyCompatibilitySettings", true); addIntRangeTweakOption(m_dialog, m_ui.tweakOptionTable, tr("Display FPS Limit"), "Display", "MaxFPS", 0, 1000, 0); @@ -384,6 +385,7 @@ void AdvancedSettingsWidget::onResetToDefaultClicked() setBooleanTweakOption(m_ui.tweakOptionTable, i++, false); // Disable all enhancements setBooleanTweakOption(m_ui.tweakOptionTable, i++, true); // Show status indicators setBooleanTweakOption(m_ui.tweakOptionTable, i++, false); // Show frame times + setBooleanTweakOption(m_ui.tweakOptionTable, i++, false); // Show settings overlay setBooleanTweakOption(m_ui.tweakOptionTable, i++, true); // Apply compatibility settings setIntRangeTweakOption(m_ui.tweakOptionTable, i++, 0); // Display FPS limit setChoiceTweakOption(m_ui.tweakOptionTable, i++, Settings::DEFAULT_DISPLAY_EXCLUSIVE_FULLSCREEN_CONTROL); @@ -438,6 +440,7 @@ void AdvancedSettingsWidget::onResetToDefaultClicked() sif->DeleteValue("Display", "ShowEnhancements"); sif->DeleteValue("Display", "ShowStatusIndicators"); sif->DeleteValue("Display", "ShowFrameTimes"); + sif->DeleteValue("Display", "ShowEnhancements"); sif->DeleteValue("Main", "ApplyCompatibilitySettings"); sif->DeleteValue("Display", "MaxFPS"); sif->DeleteValue("Display", "ActiveStartOffset"); diff --git a/src/duckstation-qt/displaysettingswidget.cpp b/src/duckstation-qt/displaysettingswidget.cpp index 9ce4921b7..fb8e237d7 100644 --- a/src/duckstation-qt/displaysettingswidget.cpp +++ b/src/duckstation-qt/displaysettingswidget.cpp @@ -56,8 +56,8 @@ DisplaySettingsWidget::DisplaySettingsWidget(SettingsWindow* dialog, QWidget* pa SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.showCPU, "Display", "ShowCPU", false); SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.showGPU, "Display", "ShowGPU", false); SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.showInput, "Display", "ShowInputs", false); - SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.showSettings, "Display", "ShowEnhancements", false); - + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.showGPUStatistics, "Display", "ShowGPUStatistics", false); + connect(m_ui.renderer, QOverload::of(&QComboBox::currentIndexChanged), this, &DisplaySettingsWidget::populateGPUAdaptersAndResolutions); connect(m_ui.adapter, QOverload::of(&QComboBox::currentIndexChanged), this, diff --git a/src/duckstation-qt/displaysettingswidget.ui b/src/duckstation-qt/displaysettingswidget.ui index 019cda0e5..86284d7da 100644 --- a/src/duckstation-qt/displaysettingswidget.ui +++ b/src/duckstation-qt/displaysettingswidget.ui @@ -241,9 +241,9 @@ - + - Show Settings Overlay + Show GPU Statistics diff --git a/src/util/d3d11_device.cpp b/src/util/d3d11_device.cpp index a460ddfff..1a4f8fd6b 100644 --- a/src/util/d3d11_device.cpp +++ b/src/util/d3d11_device.cpp @@ -513,6 +513,8 @@ void D3D11Device::CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 d src11->CommitClear(m_context.Get()); + s_stats.num_copies++; + const CD3D11_BOX src_box(static_cast(src_x), static_cast(src_y), 0, static_cast(src_x + width), static_cast(src_y + height), 1); m_context->CopySubresourceRegion(dst11->GetD3DTexture(), D3D11CalcSubresource(dst_level, dst_layer, dst->GetLevels()), @@ -531,6 +533,8 @@ void D3D11Device::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u3 DebugAssert((dst_y + height) <= dst->GetMipHeight(dst_level)); DebugAssert(!dst->IsMultisampled() && src->IsMultisampled()); + s_stats.num_copies++; + // DX11 can't resolve partial rects. Assert(src_x == 0 && src_y == 0 && width == src->GetWidth() && height == src->GetHeight() && dst_x == 0 && dst_y == 0 && width == dst->GetMipWidth(dst_level) && height == dst->GetMipHeight(dst_level)); @@ -635,6 +639,7 @@ bool D3D11Device::BeginPresent(bool skip_present) static constexpr float clear_color[4] = {0.0f, 0.0f, 0.0f, 1.0f}; m_context->ClearRenderTargetView(m_swap_chain_rtv.Get(), clear_color); m_context->OMSetRenderTargets(1, m_swap_chain_rtv.GetAddressOf(), nullptr); + s_stats.num_render_passes++; m_num_current_render_targets = 0; std::memset(m_current_render_targets.data(), 0, sizeof(m_current_render_targets)); m_current_depth_target = nullptr; @@ -852,7 +857,9 @@ void D3D11Device::MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ void D3D11Device::UnmapVertexBuffer(u32 vertex_size, u32 vertex_count) { - m_vertex_buffer.Unmap(m_context.Get(), vertex_size * vertex_count); + const u32 upload_size = vertex_size * vertex_count; + s_stats.buffer_streamed += upload_size; + m_vertex_buffer.Unmap(m_context.Get(), upload_size); } void D3D11Device::MapIndexBuffer(u32 index_count, DrawIndex** map_ptr, u32* map_space, u32* map_base_index) @@ -865,6 +872,7 @@ void D3D11Device::MapIndexBuffer(u32 index_count, DrawIndex** map_ptr, u32* map_ void D3D11Device::UnmapIndexBuffer(u32 used_index_count) { + s_stats.buffer_streamed += sizeof(DrawIndex) * used_index_count; m_index_buffer.Unmap(m_context.Get(), sizeof(DrawIndex) * used_index_count); } @@ -874,6 +882,7 @@ void D3D11Device::PushUniformBuffer(const void* data, u32 data_size) const auto res = m_uniform_buffer.Map(m_context.Get(), UNIFORM_BUFFER_ALIGNMENT, used_space); std::memcpy(res.pointer, data, data_size); m_uniform_buffer.Unmap(m_context.Get(), data_size); + s_stats.buffer_streamed += data_size; const UINT first_constant = (res.index_aligned * UNIFORM_BUFFER_ALIGNMENT) / 16u; const UINT num_constants = (used_space * UNIFORM_BUFFER_ALIGNMENT) / 16u; @@ -895,6 +904,8 @@ void D3D11Device::UnmapUniformBuffer(u32 size) const UINT num_constants = used_space / 16u; m_uniform_buffer.Unmap(m_context.Get(), used_space); + s_stats.buffer_streamed += size; + m_context->VSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants); m_context->PSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants); } @@ -944,6 +955,7 @@ void D3D11Device::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTextu if (!changed) return; + s_stats.num_render_passes++; m_context->OMSetRenderTargets(num_rts, rtvs, ds ? static_cast(ds)->GetD3DDSV() : nullptr); } @@ -1035,10 +1047,12 @@ void D3D11Device::SetScissor(s32 x, s32 y, s32 width, s32 height) void D3D11Device::Draw(u32 vertex_count, u32 base_vertex) { + s_stats.num_draws++; m_context->Draw(vertex_count, base_vertex); } void D3D11Device::DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) { + s_stats.num_draws++; m_context->DrawIndexed(index_count, base_index, base_vertex); } diff --git a/src/util/d3d11_texture.cpp b/src/util/d3d11_texture.cpp index a929bf61a..91a43990d 100644 --- a/src/util/d3d11_texture.cpp +++ b/src/util/d3d11_texture.cpp @@ -45,6 +45,8 @@ bool D3D11Device::DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, return false; } + s_stats.num_downloads++; + const u32 copy_size = tex->GetPixelSize() * width; StringUtil::StrideMemCpy(out_data, out_data_stride, sr.pData, sr.RowPitch, copy_size, height); m_context->Unmap(m_readback_staging_texture.Get(), 0); @@ -218,6 +220,10 @@ bool D3D11Texture::Update(u32 x, u32 y, u32 width, u32 height, const void* data, ID3D11DeviceContext1* context = D3D11Device::GetD3DContext(); CommitClear(context); + + GPUDevice::GetStatistics().buffer_streamed += height * pitch; + GPUDevice::GetStatistics().num_uploads++; + context->UpdateSubresource(m_texture.Get(), srnum, &box, data, pitch, 0); m_state = GPUTexture::State::Dirty; return true; @@ -246,6 +252,9 @@ bool D3D11Texture::Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32 return false; } + GPUDevice::GetStatistics().buffer_streamed += height * sr.RowPitch; + GPUDevice::GetStatistics().num_uploads++; + *map = static_cast(sr.pData) + (y * sr.RowPitch) + (x * GetPixelSize()); *map_stride = sr.RowPitch; m_mapped_subresource = srnum; @@ -323,6 +332,12 @@ std::unique_ptr D3D11Texture::Create(ID3D11Device* device, u32 wid return nullptr; } + if (initial_data) + { + GPUDevice::GetStatistics().buffer_streamed += height * initial_data_stride; + GPUDevice::GetStatistics().num_uploads++; + } + ComPtr srv; if (bind_flags & D3D11_BIND_SHADER_RESOURCE) { @@ -412,7 +427,10 @@ void* D3D11TextureBuffer::Map(u32 required_elements) void D3D11TextureBuffer::Unmap(u32 used_elements) { - m_buffer.Unmap(D3D11Device::GetD3DContext(), used_elements * GetElementSize(m_format)); + const u32 size = used_elements * GetElementSize(m_format); + GPUDevice::GetStatistics().buffer_streamed += size; + GPUDevice::GetStatistics().num_uploads++; + m_buffer.Unmap(D3D11Device::GetD3DContext(), size); } void D3D11TextureBuffer::SetDebugName(const std::string_view& name) diff --git a/src/util/d3d12_device.cpp b/src/util/d3d12_device.cpp index 0b99ac017..b594cbd1f 100644 --- a/src/util/d3d12_device.cpp +++ b/src/util/d3d12_device.cpp @@ -118,7 +118,8 @@ D3D12Device::ComPtr D3D12Device::CreateRootSignature(const } bool D3D12Device::CreateDevice(const std::string_view& adapter, bool threaded_presentation, - std::optional exclusive_fullscreen_control, FeatureMask disabled_features, Error* error) + std::optional exclusive_fullscreen_control, FeatureMask disabled_features, + Error* error) { std::unique_lock lock(s_instance_mutex); @@ -1256,6 +1257,8 @@ void D3D12Device::CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 d D->CommitClear(); } + s_stats.num_copies++; + // *now* we can do a normal image copy. if (InRenderPass()) EndRenderPass(); @@ -1297,6 +1300,8 @@ void D3D12Device::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u3 if (InRenderPass()) EndRenderPass(); + s_stats.num_copies++; + D3D12Texture* D = static_cast(dst); D3D12Texture* S = static_cast(src); ID3D12GraphicsCommandList4* cmdlist = GetCommandList(); @@ -1401,7 +1406,9 @@ void D3D12Device::MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ void D3D12Device::UnmapVertexBuffer(u32 vertex_size, u32 vertex_count) { - m_vertex_buffer.CommitMemory(vertex_size * vertex_count); + const u32 upload_size = vertex_size * vertex_count; + s_stats.buffer_streamed += upload_size; + m_vertex_buffer.CommitMemory(upload_size); } void D3D12Device::MapIndexBuffer(u32 index_count, DrawIndex** map_ptr, u32* map_space, u32* map_base_index) @@ -1421,7 +1428,9 @@ void D3D12Device::MapIndexBuffer(u32 index_count, DrawIndex** map_ptr, u32* map_ void D3D12Device::UnmapIndexBuffer(u32 used_index_count) { - m_index_buffer.CommitMemory(sizeof(DrawIndex) * used_index_count); + const u32 upload_size = sizeof(DrawIndex) * used_index_count; + s_stats.buffer_streamed += upload_size; + m_index_buffer.CommitMemory(upload_size); } void D3D12Device::PushUniformBuffer(const void* data, u32 data_size) @@ -1441,6 +1450,7 @@ void D3D12Device::PushUniformBuffer(const void* data, u32 data_size) UpdateRootSignature(); } + s_stats.buffer_streamed += data_size; GetCommandList()->SetGraphicsRoot32BitConstants(push_parameter[static_cast(m_current_pipeline_layout)], data_size / 4u, data, 0); } @@ -1462,6 +1472,7 @@ void* D3D12Device::MapUniformBuffer(u32 size) void D3D12Device::UnmapUniformBuffer(u32 size) { + s_stats.buffer_streamed += size; m_uniform_buffer_position = m_uniform_buffer.GetCurrentOffset(); m_uniform_buffer.CommitMemory(size); m_dirty_flags |= DIRTY_FLAG_CONSTANT_BUFFER; @@ -1681,6 +1692,7 @@ void D3D12Device::BeginRenderPass() // TODO: Stats m_in_render_pass = true; + s_stats.num_render_passes++; // If this is a new command buffer, bind the pipeline and such. if (m_dirty_flags & DIRTY_FLAG_INITIAL) @@ -1715,6 +1727,7 @@ void D3D12Device::BeginSwapChainRenderPass() m_num_current_render_targets = 0; m_current_depth_target = nullptr; m_in_render_pass = true; + s_stats.num_render_passes++; // Clear pipeline, it's likely incompatible. m_current_pipeline = nullptr; @@ -2119,11 +2132,13 @@ bool D3D12Device::UpdateRootParameters(u32 dirty) void D3D12Device::Draw(u32 vertex_count, u32 base_vertex) { PreDrawCheck(); + s_stats.num_draws++; GetCommandList()->DrawInstanced(vertex_count, 1, base_vertex, 0); } void D3D12Device::DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) { PreDrawCheck(); + s_stats.num_draws++; GetCommandList()->DrawIndexedInstanced(index_count, 1, base_index, base_vertex, 0); } diff --git a/src/util/d3d12_texture.cpp b/src/util/d3d12_texture.cpp index 9438300c7..0ccb59286 100644 --- a/src/util/d3d12_texture.cpp +++ b/src/util/d3d12_texture.cpp @@ -443,6 +443,9 @@ bool D3D12Texture::Update(u32 x, u32 y, u32 width, u32 height, const void* data, m_state = State::Dirty; } + GPUDevice::GetStatistics().buffer_streamed += required_size; + GPUDevice::GetStatistics().num_uploads++; + // first time the texture is used? don't leave it undefined if (m_resource_state == D3D12_RESOURCE_STATE_COMMON) TransitionToState(cmdlist, D3D12_RESOURCE_STATE_COPY_DEST); @@ -511,6 +514,9 @@ void D3D12Texture::Unmap() const u32 offset = sb.GetCurrentOffset(); sb.CommitMemory(req_size); + GPUDevice::GetStatistics().buffer_streamed += req_size; + GPUDevice::GetStatistics().num_uploads++; + ID3D12GraphicsCommandList4* cmdlist = GetCommandBufferForUpdate(); // first time the texture is used? don't leave it undefined @@ -907,7 +913,10 @@ void* D3D12TextureBuffer::Map(u32 required_elements) void D3D12TextureBuffer::Unmap(u32 used_elements) { - m_buffer.CommitMemory(GetElementSize(m_format) * used_elements); + const u32 size = GetElementSize(m_format) * used_elements; + GPUDevice::GetStatistics().buffer_streamed += size; + GPUDevice::GetStatistics().num_uploads++; + m_buffer.CommitMemory(size); } void D3D12TextureBuffer::SetDebugName(const std::string_view& name) diff --git a/src/util/gpu_device.cpp b/src/util/gpu_device.cpp index 810605fec..f59be0bbe 100644 --- a/src/util/gpu_device.cpp +++ b/src/util/gpu_device.cpp @@ -40,6 +40,7 @@ std::unique_ptr g_gpu_device; static std::string s_pipeline_cache_path; size_t GPUDevice::s_total_vram_usage = 0; +GPUDevice::Statistics GPUDevice::s_stats = {}; GPUSampler::GPUSampler() = default; @@ -206,6 +207,11 @@ size_t GPUFramebufferManagerBase::KeyHash::operator()(const Key& key) const return XXH32(&key, sizeof(key), 0x1337); } +GPUDevice::GPUDevice() +{ + ResetStatistics(); +} + GPUDevice::~GPUDevice() = default; RenderAPI GPUDevice::GetPreferredAPI() @@ -993,6 +999,11 @@ float GPUDevice::GetAndResetAccumulatedGPUTime() return 0.0f; } +void GPUDevice::ResetStatistics() +{ + s_stats = {}; +} + std::unique_ptr GPUDevice::CreateDeviceForAPI(RenderAPI api) { switch (api) diff --git a/src/util/gpu_device.h b/src/util/gpu_device.h index a3fefef41..33899aa6e 100644 --- a/src/util/gpu_device.h +++ b/src/util/gpu_device.h @@ -458,6 +458,16 @@ public: bool prefer_unused_textures : 1; }; + struct Statistics + { + size_t buffer_streamed; + u32 num_draws; + u32 num_render_passes; + u32 num_copies; + u32 num_downloads; + u32 num_uploads; + }; + struct AdapterAndModeList { std::vector adapter_names; @@ -474,6 +484,7 @@ public: static constexpr u32 MAX_RENDER_TARGETS = 4; static_assert(sizeof(GPUPipeline::GraphicsConfig::color_formats) == sizeof(GPUTexture::Format) * MAX_RENDER_TARGETS); + GPUDevice(); virtual ~GPUDevice(); /// Returns the default/preferred API for the system. @@ -652,6 +663,9 @@ public: /// Returns the amount of GPU time utilized since the last time this method was called. virtual float GetAndResetAccumulatedGPUTime(); + ALWAYS_INLINE static Statistics& GetStatistics() { return s_stats; } + static void ResetStatistics(); + protected: virtual bool CreateDevice(const std::string_view& adapter, bool threaded_presentation, std::optional exclusive_fullscreen_control, FeatureMask disabled_features, @@ -738,6 +752,8 @@ private: float m_display_frame_interval = 0.0f; protected: + static Statistics s_stats; + bool m_gpu_timing_enabled = false; bool m_vsync_enabled = false; bool m_debug_device = false; diff --git a/src/util/metal_device.mm b/src/util/metal_device.mm index 354abb979..ecdb550de 100644 --- a/src/util/metal_device.mm +++ b/src/util/metal_device.mm @@ -890,6 +890,9 @@ bool MetalTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data, const u32 aligned_pitch = Common::AlignUpPow2(width * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT); const u32 req_size = height * aligned_pitch; + GPUDevice::GetStatistics().buffer_streamed += req_size; + GPUDevice::GetStatistics().num_uploads++; + MetalDevice& dev = MetalDevice::GetInstance(); MetalStreamBuffer& sb = dev.GetTextureStreamBuffer(); id actual_buffer; @@ -989,6 +992,9 @@ void MetalTexture::Unmap() const u32 aligned_pitch = Common::AlignUpPow2(m_map_width * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT); const u32 req_size = m_map_height * aligned_pitch; + GPUDevice::GetStatistics().buffer_streamed += req_size; + GPUDevice::GetStatistics().num_uploads++; + MetalDevice& dev = MetalDevice::GetInstance(); MetalStreamBuffer& sb = dev.GetTextureStreamBuffer(); const u32 offset = sb.GetCurrentOffset(); @@ -1206,6 +1212,8 @@ bool MetalDevice::DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, MetalTexture* T = static_cast(texture); CommitClear(T); + s_stats.num_downloads++; + @autoreleasepool { id encoder = GetBlitEncoder(true); @@ -1303,6 +1311,8 @@ void MetalDevice::CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 d S->SetUseFenceCounter(m_current_fence_counter); D->SetUseFenceCounter(m_current_fence_counter); + s_stats.num_copies++; + @autoreleasepool { id encoder = GetBlitEncoder(true); @@ -1365,6 +1375,8 @@ void MetalDevice::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u3 if (InRenderPass()) EndRenderPass(); + s_stats.num_copies++; + const u32 threadgroupHeight = resolve_pipeline.maxTotalThreadsPerThreadgroup / resolve_pipeline.threadExecutionWidth; const MTLSize intrinsicThreadgroupSize = MTLSizeMake(resolve_pipeline.threadExecutionWidth, threadgroupHeight, 1); const MTLSize threadgroupsInGrid = @@ -1472,7 +1484,10 @@ void* MetalTextureBuffer::Map(u32 required_elements) void MetalTextureBuffer::Unmap(u32 used_elements) { - m_buffer.CommitMemory(GetElementSize(m_format) * used_elements); + const u32 size = GetElementSize(m_format) * used_elements; + GPUDevice::GetStatistics().buffer_streamed += size; + GPUDevice::GetStatistics().num_uploads++; + m_buffer.CommitMemory(size); } void MetalTextureBuffer::SetDebugName(const std::string_view& name) @@ -1523,7 +1538,9 @@ void MetalDevice::MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ void MetalDevice::UnmapVertexBuffer(u32 vertex_size, u32 vertex_count) { - m_vertex_buffer.CommitMemory(vertex_size * vertex_count); + const u32 size = vertex_size * vertex_count; + s_stats.buffer_streamed += size; + m_vertex_buffer.CommitMemory(size); } void MetalDevice::MapIndexBuffer(u32 index_count, DrawIndex** map_ptr, u32* map_space, u32* map_base_index) @@ -1543,11 +1560,14 @@ void MetalDevice::MapIndexBuffer(u32 index_count, DrawIndex** map_ptr, u32* map_ void MetalDevice::UnmapIndexBuffer(u32 used_index_count) { - m_index_buffer.CommitMemory(sizeof(DrawIndex) * used_index_count); + const u32 size = sizeof(DrawIndex) * used_index_count; + s_stats.buffer_streamed += size; + m_index_buffer.CommitMemory(size); } void MetalDevice::PushUniformBuffer(const void* data, u32 data_size) { + s_stats.buffer_streamed += data_size; void* map = MapUniformBuffer(data_size); std::memcpy(map, data, data_size); UnmapUniformBuffer(data_size); @@ -1568,6 +1588,7 @@ void* MetalDevice::MapUniformBuffer(u32 size) void MetalDevice::UnmapUniformBuffer(u32 size) { + s_stats.buffer_streamed += size; m_current_uniform_buffer_position = m_uniform_buffer.GetCurrentOffset(); m_uniform_buffer.CommitMemory(size); if (InRenderPass()) @@ -1758,6 +1779,8 @@ void MetalDevice::BeginRenderPass() m_inline_upload_encoder = nil; } + s_stats.num_render_passes++; + @autoreleasepool { MTLRenderPassDescriptor* desc = [MTLRenderPassDescriptor renderPassDescriptor]; @@ -1927,6 +1950,7 @@ void MetalDevice::PreDrawCheck() void MetalDevice::Draw(u32 vertex_count, u32 base_vertex) { PreDrawCheck(); + s_stats.num_draws++; [m_render_encoder drawPrimitives:m_current_pipeline->GetPrimitive() vertexStart:base_vertex vertexCount:vertex_count]; } @@ -1934,6 +1958,8 @@ void MetalDevice::DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) { PreDrawCheck(); + s_stats.num_draws++; + const u32 index_offset = base_index * sizeof(u16); [m_render_encoder drawIndexedPrimitives:m_current_pipeline->GetPrimitive() indexCount:index_count @@ -2000,6 +2026,7 @@ bool MetalDevice::BeginPresent(bool skip_present) m_layer_pass_desc.colorAttachments[0].texture = layer_texture; m_layer_pass_desc.colorAttachments[0].loadAction = MTLLoadActionClear; m_render_encoder = [[m_render_cmdbuf renderCommandEncoderWithDescriptor:m_layer_pass_desc] retain]; + s_stats.num_render_passes++; std::memset(m_current_render_targets.data(), 0, sizeof(m_current_render_targets)); m_num_current_render_targets = 0; m_current_depth_target = nullptr; diff --git a/src/util/opengl_device.cpp b/src/util/opengl_device.cpp index 6e83b372b..acdf075ec 100644 --- a/src/util/opengl_device.cpp +++ b/src/util/opengl_device.cpp @@ -76,6 +76,8 @@ bool OpenGLDevice::DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, const u32 layer = 0; const u32 level = 0; + s_stats.num_downloads++; + if (GLAD_GL_VERSION_4_5 || GLAD_GL_ARB_get_texture_sub_image) { glGetTextureSubImage(T->GetGLId(), level, x, y, layer, width, height, 1, gl_format, gl_type, @@ -116,6 +118,8 @@ void OpenGLDevice::CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 CommitClear(D); CommitClear(S); + s_stats.num_copies++; + const GLuint sid = S->GetGLId(); const GLuint did = D->GetGLId(); if (GLAD_GL_VERSION_4_3 || GLAD_GL_ARB_copy_image) @@ -192,6 +196,8 @@ void OpenGLDevice::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u CommitClear(D); } + s_stats.num_copies++; + glDisable(GL_SCISSOR_TEST); glBlitFramebuffer(src_x, src_y, src_x + width, src_y + height, dst_x, dst_y, dst_x + width, dst_y + height, GL_COLOR_BUFFER_BIT, GL_LINEAR); @@ -307,7 +313,8 @@ bool OpenGLDevice::HasSurface() const } bool OpenGLDevice::CreateDevice(const std::string_view& adapter, bool threaded_presentation, - std::optional exclusive_fullscreen_control, FeatureMask disabled_features, Error* error) + std::optional exclusive_fullscreen_control, FeatureMask disabled_features, + Error* error) { m_gl_context = GL::Context::Create(m_window_info, error); if (!m_gl_context) @@ -1030,6 +1037,8 @@ ALWAYS_INLINE_RELEASE void OpenGLDevice::SetVertexBufferOffsets(u32 base_vertex) void OpenGLDevice::Draw(u32 vertex_count, u32 base_vertex) { + s_stats.num_draws++; + if (glDrawElementsBaseVertex) [[likely]] { glDrawArrays(m_current_pipeline->GetTopology(), base_vertex, vertex_count); @@ -1042,6 +1051,8 @@ void OpenGLDevice::Draw(u32 vertex_count, u32 base_vertex) void OpenGLDevice::DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) { + s_stats.num_draws++; + if (glDrawElementsBaseVertex) [[likely]] { const void* indices = reinterpret_cast(static_cast(base_index) * sizeof(u16)); @@ -1066,7 +1077,9 @@ void OpenGLDevice::MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map void OpenGLDevice::UnmapVertexBuffer(u32 vertex_size, u32 vertex_count) { - m_vertex_buffer->Unmap(vertex_size * vertex_count); + const u32 size = vertex_size * vertex_count; + s_stats.buffer_streamed += size; + m_vertex_buffer->Unmap(size); } void OpenGLDevice::MapIndexBuffer(u32 index_count, DrawIndex** map_ptr, u32* map_space, u32* map_base_index) @@ -1079,7 +1092,9 @@ void OpenGLDevice::MapIndexBuffer(u32 index_count, DrawIndex** map_ptr, u32* map void OpenGLDevice::UnmapIndexBuffer(u32 used_index_count) { - m_index_buffer->Unmap(sizeof(DrawIndex) * used_index_count); + const u32 size = sizeof(DrawIndex) * used_index_count; + s_stats.buffer_streamed += size; + m_index_buffer->Unmap(size); } void OpenGLDevice::PushUniformBuffer(const void* data, u32 data_size) @@ -1087,6 +1102,7 @@ void OpenGLDevice::PushUniformBuffer(const void* data, u32 data_size) const auto res = m_uniform_buffer->Map(m_uniform_buffer_alignment, data_size); std::memcpy(res.pointer, data, data_size); m_uniform_buffer->Unmap(data_size); + s_stats.buffer_streamed += data_size; glBindBufferRange(GL_UNIFORM_BUFFER, 1, m_uniform_buffer->GetGLBufferId(), res.buffer_offset, data_size); } @@ -1099,6 +1115,7 @@ void* OpenGLDevice::MapUniformBuffer(u32 size) void OpenGLDevice::UnmapUniformBuffer(u32 size) { const u32 pos = m_uniform_buffer->Unmap(size); + s_stats.buffer_streamed += pos; glBindBufferRange(GL_UNIFORM_BUFFER, 1, m_uniform_buffer->GetGLBufferId(), pos, size); } @@ -1135,6 +1152,7 @@ void OpenGLDevice::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUText } } + s_stats.num_render_passes++; m_current_fbo = fbo; glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo); } diff --git a/src/util/opengl_texture.cpp b/src/util/opengl_texture.cpp index dafafcd2e..980dc4d1d 100644 --- a/src/util/opengl_texture.cpp +++ b/src/util/opengl_texture.cpp @@ -166,6 +166,9 @@ std::unique_ptr OpenGLTexture::Create(u32 width, u32 height, u32 const u32 alignment = ((data_pitch % 4) == 0) ? 4 : (((data_pitch % 2) == 0) ? 2 : 1); if (data) { + GPUDevice::GetStatistics().buffer_streamed += data_pitch * height; + GPUDevice::GetStatistics().num_uploads++; + glPixelStorei(GL_UNPACK_ROW_LENGTH, data_pitch / pixel_size); if (alignment != 4) glPixelStorei(GL_UNPACK_ALIGNMENT, alignment); @@ -248,6 +251,9 @@ bool OpenGLTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data CommitClear(); + GPUDevice::GetStatistics().buffer_streamed += map_size; + GPUDevice::GetStatistics().num_uploads++; + OpenGLDevice::BindUpdateTextureUnit(); glBindTexture(target, m_id); @@ -309,6 +315,10 @@ void OpenGLTexture::Unmap() const u32 pitch = Common::AlignUpPow2(static_cast(m_map_width) * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT); const u32 upload_size = pitch * static_cast(m_map_height); + + GPUDevice::GetStatistics().buffer_streamed += upload_size; + GPUDevice::GetStatistics().num_uploads++; + OpenGLStreamBuffer* sb = OpenGLDevice::GetTextureStreamBuffer(); sb->Unmap(upload_size); sb->Bind(); @@ -627,7 +637,10 @@ void* OpenGLTextureBuffer::Map(u32 required_elements) void OpenGLTextureBuffer::Unmap(u32 used_elements) { - m_buffer->Unmap(used_elements * GetElementSize(m_format)); + const u32 size = used_elements * GetElementSize(m_format); + GPUDevice::GetStatistics().buffer_streamed += size; + GPUDevice::GetStatistics().num_uploads++; + m_buffer->Unmap(size); } void OpenGLTextureBuffer::SetDebugName(const std::string_view& name) diff --git a/src/util/vulkan_device.cpp b/src/util/vulkan_device.cpp index a7adbc707..224cd6e5d 100644 --- a/src/util/vulkan_device.cpp +++ b/src/util/vulkan_device.cpp @@ -2568,6 +2568,8 @@ void VulkanDevice::CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 if (InRenderPass()) EndRenderPass(); + s_stats.num_copies++; + S->SetUseFenceCounter(GetCurrentFenceCounter()); D->SetUseFenceCounter(GetCurrentFenceCounter()); S->TransitionToLayout((D == S) ? VulkanTexture::Layout::TransferSelf : VulkanTexture::Layout::TransferSrc); @@ -2592,6 +2594,8 @@ void VulkanDevice::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u if (InRenderPass()) EndRenderPass(); + s_stats.num_copies++; + VulkanTexture* D = static_cast(dst); VulkanTexture* S = static_cast(src); const VkCommandBuffer cmdbuf = GetCurrentCommandBuffer(); @@ -2699,7 +2703,9 @@ void VulkanDevice::MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map void VulkanDevice::UnmapVertexBuffer(u32 vertex_size, u32 vertex_count) { - m_vertex_buffer.CommitMemory(vertex_size * vertex_count); + const u32 size = vertex_size * vertex_count; + s_stats.buffer_streamed += size; + m_vertex_buffer.CommitMemory(size); } void VulkanDevice::MapIndexBuffer(u32 index_count, DrawIndex** map_ptr, u32* map_space, u32* map_base_index) @@ -2719,12 +2725,15 @@ void VulkanDevice::MapIndexBuffer(u32 index_count, DrawIndex** map_ptr, u32* map void VulkanDevice::UnmapIndexBuffer(u32 used_index_count) { - m_index_buffer.CommitMemory(sizeof(DrawIndex) * used_index_count); + const u32 size = sizeof(DrawIndex) * used_index_count; + s_stats.buffer_streamed += size; + m_index_buffer.CommitMemory(size); } void VulkanDevice::PushUniformBuffer(const void* data, u32 data_size) { DebugAssert(data_size < UNIFORM_PUSH_CONSTANTS_SIZE); + s_stats.buffer_streamed += data_size; vkCmdPushConstants(GetCurrentCommandBuffer(), GetCurrentVkPipelineLayout(), UNIFORM_PUSH_CONSTANTS_STAGES, 0, data_size, data); } @@ -2745,6 +2754,7 @@ void* VulkanDevice::MapUniformBuffer(u32 size) void VulkanDevice::UnmapUniformBuffer(u32 size) { + s_stats.buffer_streamed += size; m_uniform_buffer_position = m_uniform_buffer.GetCurrentOffset(); m_uniform_buffer.CommitMemory(size); m_dirty_flags |= DIRTY_FLAG_DYNAMIC_OFFSETS; @@ -3147,6 +3157,8 @@ void VulkanDevice::BeginRenderPass() vkCmdBeginRenderPass(GetCurrentCommandBuffer(), &bi, VK_SUBPASS_CONTENTS_INLINE); } + s_stats.num_render_passes++; + // If this is a new command buffer, bind the pipeline and such. if (m_dirty_flags & DIRTY_FLAG_INITIAL) SetInitialPipelineState(); @@ -3212,6 +3224,7 @@ void VulkanDevice::BeginSwapChainRenderPass() vkCmdBeginRenderPass(GetCurrentCommandBuffer(), &rp, VK_SUBPASS_CONTENTS_INLINE); } + s_stats.num_render_passes++; m_num_current_render_targets = 0; std::memset(m_current_render_targets.data(), 0, sizeof(m_current_render_targets)); m_current_depth_target = nullptr; @@ -3568,11 +3581,13 @@ bool VulkanDevice::UpdateDescriptorSets(u32 dirty) void VulkanDevice::Draw(u32 vertex_count, u32 base_vertex) { PreDrawCheck(); + s_stats.num_draws++; vkCmdDraw(GetCurrentCommandBuffer(), vertex_count, 1, base_vertex, 0); } void VulkanDevice::DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) { PreDrawCheck(); + s_stats.num_draws++; vkCmdDrawIndexed(GetCurrentCommandBuffer(), index_count, 1, base_index, base_vertex, 0); } diff --git a/src/util/vulkan_texture.cpp b/src/util/vulkan_texture.cpp index 3c032b1e5..f82f810b8 100644 --- a/src/util/vulkan_texture.cpp +++ b/src/util/vulkan_texture.cpp @@ -339,6 +339,9 @@ bool VulkanTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data sbuffer.CommitMemory(required_size); } + GPUDevice::GetStatistics().buffer_streamed += required_size; + GPUDevice::GetStatistics().num_uploads++; + const VkCommandBuffer cmdbuf = GetCommandBufferForUpdate(); // if we're an rt and have been cleared, and the full rect isn't being uploaded, do the clear @@ -407,6 +410,9 @@ void VulkanTexture::Unmap() const u32 offset = sb.GetCurrentOffset(); sb.CommitMemory(req_size); + GPUDevice::GetStatistics().buffer_streamed += req_size; + GPUDevice::GetStatistics().num_uploads++; + // first time the texture is used? don't leave it undefined const VkCommandBuffer cmdbuf = GetCommandBufferForUpdate(); if (m_layout == Layout::Undefined) @@ -745,6 +751,8 @@ bool VulkanDevice::DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, return false; } + s_stats.num_downloads++; + if (InRenderPass()) EndRenderPass(); @@ -1015,7 +1023,10 @@ void* VulkanTextureBuffer::Map(u32 required_elements) void VulkanTextureBuffer::Unmap(u32 used_elements) { - m_buffer.CommitMemory(GetElementSize(m_format) * used_elements); + const u32 size = GetElementSize(m_format) * used_elements; + GPUDevice::GetStatistics().buffer_streamed += size; + GPUDevice::GetStatistics().num_uploads++; + m_buffer.CommitMemory(size); } void VulkanTextureBuffer::SetDebugName(const std::string_view& name)