From bec0d6e7df839a8155bc673e8c5c84645d3d4a30 Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Sat, 3 Sep 2022 14:15:15 +1000 Subject: [PATCH] HostDisplay: Add GPU usage statistics --- src/common/d3d12/context.cpp | 109 +++++++++++++- src/common/d3d12/context.h | 14 ++ src/common/gl/context.cpp | 23 ++- src/common/gl/program.cpp | 2 +- src/common/vulkan/context.cpp | 79 +++++++++- src/common/vulkan/context.h | 9 ++ src/core/gpu.cpp | 15 +- src/core/host_display.cpp | 26 +++- src/core/host_display.h | 28 ++-- src/core/settings.cpp | 2 + src/core/settings.h | 1 + src/core/system.cpp | 49 ++++-- src/core/system.h | 2 + src/frontend-common/d3d11_host_display.cpp | 144 +++++++++++++++++- src/frontend-common/d3d11_host_display.h | 16 ++ src/frontend-common/d3d12_host_display.cpp | 16 +- src/frontend-common/d3d12_host_display.h | 47 +++--- src/frontend-common/imgui_overlays.cpp | 7 + src/frontend-common/opengl_host_display.cpp | 159 ++++++++++++++++++-- src/frontend-common/opengl_host_display.h | 17 +++ src/frontend-common/vulkan_host_display.cpp | 31 +++- src/frontend-common/vulkan_host_display.h | 3 + 22 files changed, 698 insertions(+), 101 deletions(-) diff --git a/src/common/d3d12/context.cpp b/src/common/d3d12/context.cpp index 5e3b4dc6d..619976e0b 100644 --- a/src/common/d3d12/context.cpp +++ b/src/common/d3d12/context.cpp @@ -138,7 +138,7 @@ bool Context::Create(IDXGIFactory* dxgi_factory, u32 adapter_index, bool enable_ if (!g_d3d12_context->CreateDevice(dxgi_factory, adapter_index, enable_debug_layer) || !g_d3d12_context->CreateCommandQueue() || !g_d3d12_context->CreateFence() || !g_d3d12_context->CreateDescriptorHeaps() || !g_d3d12_context->CreateCommandLists() || - !g_d3d12_context->CreateTextureStreamBuffer()) + !g_d3d12_context->CreateTimestampQuery() || !g_d3d12_context->CreateTextureStreamBuffer()) { Destroy(); return false; @@ -326,20 +326,64 @@ void Context::MoveToNextCommandList() // We may have to wait if this command list hasn't finished on the GPU. CommandListResources& res = m_command_lists[m_current_command_list]; WaitForFence(res.ready_fence_value); + res.ready_fence_value = m_current_fence_value; // Begin command list. res.command_allocator->Reset(); res.command_list->Reset(res.command_allocator.Get(), nullptr); + + if (res.has_timestamp_query) + { + // readback timestamp from the last time this cmdlist was used. + // we don't need to worry about disjoint in dx12, the frequency is reliable within a single cmdlist. + const u32 offset = (m_current_command_list * (sizeof(u64) * NUM_TIMESTAMP_QUERIES_PER_CMDLIST)); + const D3D12_RANGE read_range = {offset, offset + (sizeof(u64) * NUM_TIMESTAMP_QUERIES_PER_CMDLIST)}; + void* map; + HRESULT hr = m_timestamp_query_buffer->Map(0, &read_range, &map); + if (SUCCEEDED(hr)) + { + u64 timestamps[2]; + std::memcpy(timestamps, static_cast(map) + offset, sizeof(timestamps)); + m_accumulated_gpu_time += + static_cast(static_cast(timestamps[1] - timestamps[0]) / m_timestamp_frequency); + + const D3D12_RANGE write_range = {}; + m_timestamp_query_buffer->Unmap(0, &write_range); + } + else + { + Log_WarningPrintf("Map() for timestamp query failed: %08X", hr); + } + } + + res.has_timestamp_query = m_gpu_timing_enabled; + if (m_gpu_timing_enabled) + { + res.command_list->EndQuery(m_timestamp_query_heap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, + m_current_command_list * NUM_TIMESTAMP_QUERIES_PER_CMDLIST); + } + res.command_list->SetDescriptorHeaps(static_cast(m_gpu_descriptor_heaps.size()), m_gpu_descriptor_heaps.data()); - res.ready_fence_value = m_current_fence_value; } void Context::ExecuteCommandList(bool wait_for_completion) { CommandListResources& res = m_command_lists[m_current_command_list]; + HRESULT hr; + + if (res.has_timestamp_query) + { + // write the timestamp back at the end of the cmdlist + res.command_list->EndQuery(m_timestamp_query_heap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, + (m_current_command_list * NUM_TIMESTAMP_QUERIES_PER_CMDLIST) + 1); + res.command_list->ResolveQueryData(m_timestamp_query_heap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, + m_current_command_list * NUM_TIMESTAMP_QUERIES_PER_CMDLIST, + NUM_TIMESTAMP_QUERIES_PER_CMDLIST, m_timestamp_query_buffer.Get(), + m_current_command_list * (sizeof(u64) * NUM_TIMESTAMP_QUERIES_PER_CMDLIST)); + } // Close and queue command list. - HRESULT hr = res.command_list->Close(); + hr = res.command_list->Close(); AssertMsg(SUCCEEDED(hr), "Close command list"); const std::array execute_lists{res.command_list.Get()}; m_command_queue->ExecuteCommandLists(static_cast(execute_lists.size()), execute_lists.data()); @@ -391,6 +435,8 @@ void Context::DestroyResources() { ExecuteCommandList(true); + m_timestamp_query_buffer.Reset(); + m_timestamp_query_heap.Reset(); m_texture_stream_buffer.Destroy(false); m_descriptor_heap_manager.Free(&m_null_srv_descriptor); m_sampler_heap_manager.Destroy(); @@ -450,4 +496,61 @@ void Context::WaitForGPUIdle() index = (index + 1) % NUM_COMMAND_LISTS; } } + +bool Context::CreateTimestampQuery() +{ + constexpr u32 QUERY_COUNT = NUM_TIMESTAMP_QUERIES_PER_CMDLIST * NUM_COMMAND_LISTS; + constexpr u32 BUFFER_SIZE = sizeof(u64) * QUERY_COUNT; + + const D3D12_QUERY_HEAP_DESC desc = {D3D12_QUERY_HEAP_TYPE_TIMESTAMP, QUERY_COUNT}; + HRESULT hr = m_device->CreateQueryHeap(&desc, IID_PPV_ARGS(m_timestamp_query_heap.ReleaseAndGetAddressOf())); + if (FAILED(hr)) + { + Log_ErrorPrintf("CreateQueryHeap() for timestamp failed with %08X", hr); + return false; + } + + const D3D12_HEAP_PROPERTIES heap_properties = {D3D12_HEAP_TYPE_READBACK}; + const D3D12_RESOURCE_DESC resource_desc = {D3D12_RESOURCE_DIMENSION_BUFFER, + 0, + BUFFER_SIZE, + 1, + 1, + 1, + DXGI_FORMAT_UNKNOWN, + {1, 0}, + D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + D3D12_RESOURCE_FLAG_NONE}; + hr = m_device->CreateCommittedResource(&heap_properties, D3D12_HEAP_FLAG_NONE, &resource_desc, + D3D12_RESOURCE_STATE_COPY_DEST, nullptr, + IID_PPV_ARGS(m_timestamp_query_buffer.ReleaseAndGetAddressOf())); + if (FAILED(hr)) + { + Log_ErrorPrintf("CreateResource() for timestamp failed with %08X", hr); + return false; + } + + u64 frequency; + hr = m_command_queue->GetTimestampFrequency(&frequency); + if (FAILED(hr)) + { + Log_ErrorPrintf("GetTimestampFrequency() failed: %08X", hr); + return false; + } + + m_timestamp_frequency = static_cast(frequency) / 1000.0; + return true; +} + +float Context::GetAndResetAccumulatedGPUTime() +{ + const float time = m_accumulated_gpu_time; + m_accumulated_gpu_time = 0.0f; + return time; +} + +void Context::SetEnableGPUTiming(bool enabled) +{ + m_gpu_timing_enabled = enabled; +} } // namespace D3D12 diff --git a/src/common/d3d12/context.h b/src/common/d3d12/context.h index 474eaeb3d..aeb562c41 100644 --- a/src/common/d3d12/context.h +++ b/src/common/d3d12/context.h @@ -31,6 +31,9 @@ public: // Textures that don't fit into this buffer will be uploaded with a staging buffer. TEXTURE_UPLOAD_BUFFER_SIZE = 16 * 1024 * 1024, + + /// Start/End timestamp queries. + NUM_TIMESTAMP_QUERIES_PER_CMDLIST = 2, }; ~Context(); @@ -92,6 +95,9 @@ public: void DeferDescriptorDestruction(DescriptorHeapManager& manager, u32 index); void DeferDescriptorDestruction(DescriptorHeapManager& manager, DescriptorHandle* handle); + float GetAndResetAccumulatedGPUTime(); + void SetEnableGPUTiming(bool enabled); + private: struct CommandListResources { @@ -100,6 +106,7 @@ private: std::vector pending_resources; std::vector> pending_descriptors; u64 ready_fence_value = 0; + bool has_timestamp_query = false; }; Context(); @@ -110,6 +117,7 @@ private: bool CreateDescriptorHeaps(); bool CreateCommandLists(); bool CreateTextureStreamBuffer(); + bool CreateTimestampQuery(); void MoveToNextCommandList(); void DestroyPendingResources(CommandListResources& cmdlist); void DestroyResources(); @@ -126,6 +134,12 @@ private: std::array m_command_lists; u32 m_current_command_list = NUM_COMMAND_LISTS - 1; + ComPtr m_timestamp_query_heap; + ComPtr m_timestamp_query_buffer; + double m_timestamp_frequency = 0.0; + float m_accumulated_gpu_time = 0.0f; + bool m_gpu_timing_enabled = false; + DescriptorHeapManager m_descriptor_heap_manager; DescriptorHeapManager m_rtv_heap_manager; DescriptorHeapManager m_dsv_heap_manager; diff --git a/src/common/gl/context.cpp b/src/common/gl/context.cpp index ca29c2f4b..33f480159 100644 --- a/src/common/gl/context.cpp +++ b/src/common/gl/context.cpp @@ -55,14 +55,27 @@ static bool ShouldPreferESContext() #endif } -static void DisableBrokenExtensions(const char* gl_vendor, const char* gl_renderer) +static void DisableBrokenExtensions(const char* gl_vendor, const char* gl_renderer, const char* gl_version) { if (std::strstr(gl_vendor, "ARM")) { // GL_{EXT,OES}_copy_image seem to be implemented on the CPU in the Mali drivers... - Log_VerbosePrintf("Mali driver detected, disabling GL_{EXT,OES}_copy_image"); - GLAD_GL_EXT_copy_image = 0; - GLAD_GL_OES_copy_image = 0; + // Older drivers don't implement timer queries correctly either. + int gl_major_version, gl_minor_version, unused_version, major_version, patch_version; + if (std::sscanf(gl_version, "OpenGL ES %d.%d v%d.r%dp%d", &gl_major_version, &gl_minor_version, &unused_version, + &major_version, &patch_version) == 5 && + gl_major_version >= 3 && gl_minor_version >= 2 && major_version >= 32) + { + // r32p0 and beyond seem okay. + Log_VerbosePrintf("Keeping copy_image for driver version '%s'", gl_version); + } + else + { + Log_VerbosePrintf("Older Mali driver detected, disabling GL_{EXT,OES}_copy_image, disjoint_timer_query."); + GLAD_GL_EXT_copy_image = 0; + GLAD_GL_OES_copy_image = 0; + GLAD_GL_EXT_disjoint_timer_query = 0; + } } } @@ -173,7 +186,7 @@ std::unique_ptr Context::Create(const WindowInfo& wi, const Version Log_InfoPrintf("GL_VERSION: %s", gl_version); Log_InfoPrintf("GL_SHADING_LANGUAGE_VERSION: %s", gl_shading_language_version); - DisableBrokenExtensions(gl_vendor, gl_renderer); + DisableBrokenExtensions(gl_vendor, gl_renderer, gl_version); return context; } diff --git a/src/common/gl/program.cpp b/src/common/gl/program.cpp index 65bd6924e..8c0d00c41 100644 --- a/src/common/gl/program.cpp +++ b/src/common/gl/program.cpp @@ -164,7 +164,7 @@ bool Program::GetBinary(std::vector* out_data, u32* out_data_format) } *out_data_format = static_cast(format); - Log_InfoPrintf("Program binary retrieved, %zu bytes, format %u", out_data->size(), *out_data_format); + Log_DevPrintf("Program binary retrieved, %zu bytes, format %u", out_data->size(), *out_data_format); return true; } diff --git a/src/common/vulkan/context.cpp b/src/common/vulkan/context.cpp index 84fcd6b4c..954a80e9f 100644 --- a/src/common/vulkan/context.cpp +++ b/src/common/vulkan/context.cpp @@ -622,9 +622,17 @@ bool Context::CreateDevice(VkSurfaceKHR surface, bool enable_validation_layer, c // Grab the graphics and present queues. vkGetDeviceQueue(m_device, m_graphics_queue_family_index, 0, &m_graphics_queue); if (surface) - { vkGetDeviceQueue(m_device, m_present_queue_family_index, 0, &m_present_queue); - } + + m_gpu_timing_supported = (m_device_properties.limits.timestampComputeAndGraphics != 0 && + queue_family_properties[m_graphics_queue_family_index].timestampValidBits > 0 && + m_device_properties.limits.timestampPeriod > 0); + Log_VerbosePrintf("GPU timing is %s (TS=%u TS valid bits=%u, TS period=%f)", + m_gpu_timing_supported ? "supported" : "not supported", + static_cast(m_device_properties.limits.timestampComputeAndGraphics), + queue_family_properties[m_graphics_queue_family_index].timestampValidBits, + m_device_properties.limits.timestampPeriod); + return true; } @@ -751,6 +759,20 @@ bool Context::CreateGlobalDescriptorPool() return false; } Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_global_descriptor_pool, "Global Descriptor Pool"); + + if (m_gpu_timing_supported) + { + const VkQueryPoolCreateInfo query_create_info = { + VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, nullptr, 0, VK_QUERY_TYPE_TIMESTAMP, NUM_COMMAND_BUFFERS * 2, 0}; + res = vkCreateQueryPool(m_device, &query_create_info, nullptr, &m_timestamp_query_pool); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateQueryPool failed: "); + m_gpu_timing_supported = false; + return false; + } + } + return true; } @@ -831,6 +853,19 @@ void Context::WaitForGPUIdle() vkDeviceWaitIdle(m_device); } +float Context::GetAndResetAccumulatedGPUTime() +{ + const float time = m_accumulated_gpu_time; + m_accumulated_gpu_time = 0.0f; + return time; +} + +bool Context::SetEnableGPUTiming(bool enabled) +{ + m_gpu_timing_enabled = enabled && m_gpu_timing_supported; + return (enabled == m_gpu_timing_enabled); +} + void Context::WaitForCommandBufferCompletion(u32 index) { // Wait for this command buffer to be completed. @@ -868,6 +903,12 @@ void Context::SubmitCommandBuffer(VkSemaphore wait_semaphore /* = VK_NULL_HANDLE { FrameResources& resources = m_frame_resources[m_current_frame]; + if (m_gpu_timing_enabled && resources.timestamp_written) + { + vkCmdWriteTimestamp(m_current_command_buffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, m_timestamp_query_pool, + m_current_frame * 2 + 1); + } + // End the current command buffer. VkResult res = vkEndCommandBuffer(resources.command_buffer); if (res != VK_SUCCESS) @@ -1048,9 +1089,41 @@ void Context::ActivateCommandBuffer(u32 index) if (res != VK_SUCCESS) LOG_VULKAN_ERROR(res, "vkResetDescriptorPool failed: "); + if (m_gpu_timing_enabled) + { + if (resources.timestamp_written) + { + std::array timestamps; + res = + vkGetQueryPoolResults(m_device, m_timestamp_query_pool, index * 2, static_cast(timestamps.size()), + sizeof(u64) * timestamps.size(), timestamps.data(), sizeof(u64), VK_QUERY_RESULT_64_BIT); + if (res == VK_SUCCESS) + { + // if we didn't write the timestamp at the start of the cmdbuffer (just enabled timing), the first TS will be + // zero + if (timestamps[0] > 0) + { + const double ns_diff = + (timestamps[1] - timestamps[0]) * static_cast(m_device_properties.limits.timestampPeriod); + m_accumulated_gpu_time = static_cast(static_cast(m_accumulated_gpu_time) + (ns_diff / 1000000.0)); + } + } + else + { + LOG_VULKAN_ERROR(res, "vkGetQueryPoolResults failed: "); + } + } + + vkCmdResetQueryPool(resources.command_buffer, m_timestamp_query_pool, index * 2, 2); + vkCmdWriteTimestamp(resources.command_buffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, m_timestamp_query_pool, + index * 2); + } + + resources.fence_counter = m_next_fence_counter++; + resources.timestamp_written = m_gpu_timing_enabled; + m_current_frame = index; m_current_command_buffer = resources.command_buffer; - resources.fence_counter = m_next_fence_counter++; } void Context::ExecuteCommandBuffer(bool wait_for_completion) diff --git a/src/common/vulkan/context.h b/src/common/vulkan/context.h index d49e65299..e1fe79ee9 100644 --- a/src/common/vulkan/context.h +++ b/src/common/vulkan/context.h @@ -180,6 +180,9 @@ public: void WaitForGPUIdle(); + float GetAndResetAccumulatedGPUTime(); + bool SetEnableGPUTiming(bool enabled); + private: Context(VkInstance instance, VkPhysicalDevice physical_device, bool owns_device); @@ -216,6 +219,7 @@ private: VkFence fence = VK_NULL_HANDLE; u64 fence_counter = 0; bool needs_fence_wait = false; + bool timestamp_written = false; std::vector> cleanup_resources; }; @@ -233,6 +237,11 @@ private: VkQueue m_present_queue = VK_NULL_HANDLE; u32 m_present_queue_family_index = 0; + VkQueryPool m_timestamp_query_pool = VK_NULL_HANDLE; + float m_accumulated_gpu_time = 0.0f; + bool m_gpu_timing_enabled = false; + bool m_gpu_timing_supported = false; + std::array m_frame_resources; u64 m_next_fence_counter = 1; u64 m_completed_fence_counter = 0; diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp index adbe964d1..7c68d9ac1 100644 --- a/src/core/gpu.cpp +++ b/src/core/gpu.cpp @@ -22,7 +22,11 @@ const GPU::GP0CommandHandlerTable GPU::s_GP0_command_handler_table = GPU::Genera GPU::GPU() = default; -GPU::~GPU() = default; +GPU::~GPU() +{ + if (g_host_display) + g_host_display->SetGPUTimingEnabled(false); +} bool GPU::Initialize() { @@ -41,15 +45,14 @@ bool GPU::Initialize() m_console_is_pal = System::IsPALRegion(); UpdateCRTCConfig(); - g_host_display->SetDisplayLinearFiltering(g_settings.display_linear_filtering); - g_host_display->SetDisplayIntegerScaling(g_settings.display_integer_scaling); - g_host_display->SetDisplayStretch(g_settings.display_stretch); if (g_settings.display_post_processing && !g_settings.display_post_process_chain.empty() && !g_host_display->SetPostProcessingChain(g_settings.display_post_process_chain)) { Host::AddOSDMessage(Host::TranslateStdString("OSDMessage", "Failed to load post processing shader chain."), 20.0f); } + g_host_display->SetGPUTimingEnabled(g_settings.display_show_gpu); + return true; } @@ -69,9 +72,7 @@ void GPU::UpdateSettings() // Crop mode calls this, so recalculate the display area UpdateCRTCDisplayParameters(); - g_host_display->SetDisplayLinearFiltering(g_settings.display_linear_filtering); - g_host_display->SetDisplayIntegerScaling(g_settings.display_integer_scaling); - g_host_display->SetDisplayStretch(g_settings.display_stretch); + g_host_display->SetGPUTimingEnabled(g_settings.display_show_gpu); } bool GPU::IsHardwareRenderer() diff --git a/src/core/host_display.cpp b/src/core/host_display.cpp index 5ecc7876b..6cc27e92a 100644 --- a/src/core/host_display.cpp +++ b/src/core/host_display.cpp @@ -5,6 +5,7 @@ #include "common/log.h" #include "common/string_util.h" #include "common/timer.h" +#include "settings.h" #include "stb_image.h" #include "stb_image_resize.h" #include "stb_image_write.h" @@ -164,6 +165,16 @@ bool HostDisplay::GetHostRefreshRate(float* refresh_rate) return WindowInfo::QueryRefreshRateForWindow(m_window_info, refresh_rate); } +bool HostDisplay::SetGPUTimingEnabled(bool enabled) +{ + return false; +} + +float HostDisplay::GetAndResetAccumulatedGPUTime() +{ + return 0.0f; +} + void HostDisplay::SetSoftwareCursor(std::unique_ptr texture, float scale /*= 1.0f*/) { m_cursor_texture = std::move(texture); @@ -216,13 +227,18 @@ void HostDisplay::ClearSoftwareCursor() m_cursor_texture_scale = 1.0f; } +bool HostDisplay::IsUsingLinearFiltering() const +{ + return g_settings.display_linear_filtering; +} + void HostDisplay::CalculateDrawRect(s32 window_width, s32 window_height, float* out_left, float* out_top, float* out_width, float* out_height, float* out_left_padding, float* out_top_padding, float* out_scale, float* out_x_scale, bool apply_aspect_ratio /* = true */) const { const float window_ratio = static_cast(window_width) / static_cast(window_height); - const float display_aspect_ratio = m_display_stretch ? window_ratio : m_display_aspect_ratio; + const float display_aspect_ratio = g_settings.display_stretch ? window_ratio : m_display_aspect_ratio; const float x_scale = apply_aspect_ratio ? (display_aspect_ratio / (static_cast(m_display_width) / static_cast(m_display_height))) : @@ -242,12 +258,12 @@ void HostDisplay::CalculateDrawRect(s32 window_width, s32 window_height, float* { // align in middle vertically scale = static_cast(window_width) / display_width; - if (m_display_integer_scaling) + if (g_settings.display_integer_scaling) scale = std::max(std::floor(scale), 1.0f); if (out_left_padding) { - if (m_display_integer_scaling) + if (g_settings.display_integer_scaling) *out_left_padding = std::max((static_cast(window_width) - display_width * scale) / 2.0f, 0.0f); else *out_left_padding = 0.0f; @@ -276,7 +292,7 @@ void HostDisplay::CalculateDrawRect(s32 window_width, s32 window_height, float* { // align in middle horizontally scale = static_cast(window_height) / display_height; - if (m_display_integer_scaling) + if (g_settings.display_integer_scaling) scale = std::max(std::floor(scale), 1.0f); if (out_left_padding) @@ -301,7 +317,7 @@ void HostDisplay::CalculateDrawRect(s32 window_width, s32 window_height, float* if (out_top_padding) { - if (m_display_integer_scaling) + if (g_settings.display_integer_scaling) *out_top_padding = std::max((static_cast(window_height) - (display_height * scale)) / 2.0f, 0.0f); else *out_top_padding = 0.0f; diff --git a/src/core/host_display.h b/src/core/host_display.h index 3c6b52143..8e15907f3 100644 --- a/src/core/host_display.h +++ b/src/core/host_display.h @@ -85,6 +85,13 @@ public: m_mouse_position_y = y; } + ALWAYS_INLINE const void* GetDisplayTextureHandle() const { return m_display_texture_handle; } + ALWAYS_INLINE s32 GetDisplayTopMargin() const { return m_display_top_margin; } + ALWAYS_INLINE s32 GetDisplayWidth() const { return m_display_width; } + ALWAYS_INLINE s32 GetDisplayHeight() const { return m_display_height; } + ALWAYS_INLINE float GetDisplayAspectRatio() const { return m_display_aspect_ratio; } + ALWAYS_INLINE bool IsGPUTimingEnabled() const { return m_gpu_timing_enabled; } + virtual RenderAPI GetRenderAPI() const = 0; virtual void* GetRenderDevice() const = 0; virtual void* GetRenderContext() const = 0; @@ -137,12 +144,6 @@ public: virtual void DestroyImGuiContext() = 0; virtual bool UpdateImGuiFontTexture() = 0; - const void* GetDisplayTextureHandle() const { return m_display_texture_handle; } - s32 GetDisplayTopMargin() const { return m_display_top_margin; } - s32 GetDisplayWidth() const { return m_display_width; } - s32 GetDisplayHeight() const { return m_display_height; } - float GetDisplayAspectRatio() const { return m_display_aspect_ratio; } - bool UsesLowerLeftOrigin() const; void SetDisplayMaxFPS(float max_fps); bool ShouldSkipDisplayingFrame(); @@ -209,11 +210,14 @@ public: virtual bool GetHostRefreshRate(float* refresh_rate); - void SetDisplayLinearFiltering(bool enabled) { m_display_linear_filtering = enabled; } + /// Enables/disables GPU frame timing. + virtual bool SetGPUTimingEnabled(bool enabled); + + /// Returns the amount of GPU time utilized since the last time this method was called. + virtual float GetAndResetAccumulatedGPUTime(); + void SetDisplayTopMargin(s32 height) { m_display_top_margin = height; } - void SetDisplayIntegerScaling(bool enabled) { m_display_integer_scaling = enabled; } void SetDisplayAlignment(Alignment alignment) { m_display_alignment = alignment; } - void SetDisplayStretch(bool stretch) { m_display_stretch = stretch; } /// Sets the software cursor to the specified texture. Ownership of the texture is transferred. void SetSoftwareCursor(std::unique_ptr texture, float scale = 1.0f); @@ -256,6 +260,8 @@ protected: ALWAYS_INLINE bool HasSoftwareCursor() const { return static_cast(m_cursor_texture); } ALWAYS_INLINE bool HasDisplayTexture() const { return (m_display_texture_handle != nullptr); } + bool IsUsingLinearFiltering() const; + void CalculateDrawRect(s32 window_width, s32 window_height, float* out_left, float* out_top, float* out_width, float* out_height, float* out_left_padding, float* out_top_padding, float* out_scale, float* out_x_scale, bool apply_aspect_ratio = true) const; @@ -294,10 +300,8 @@ protected: std::unique_ptr m_cursor_texture; float m_cursor_texture_scale = 1.0f; - bool m_display_linear_filtering = false; bool m_display_changed = false; - bool m_display_integer_scaling = false; - bool m_display_stretch = false; + bool m_gpu_timing_enabled = false; }; /// Returns a pointer to the current host display abstraction. Assumes AcquireHostDisplay() has been caled. diff --git a/src/core/settings.cpp b/src/core/settings.cpp index 3db665e43..173b42c3d 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp @@ -255,6 +255,7 @@ void Settings::Load(SettingsInterface& si) display_show_speed = si.GetBoolValue("Display", "ShowSpeed", false); display_show_resolution = si.GetBoolValue("Display", "ShowResolution", false); display_show_cpu = si.GetBoolValue("Display", "ShowCPU", false); + display_show_gpu = si.GetBoolValue("Display", "ShowGPU", false); display_show_status_indicators = si.GetBoolValue("Display", "ShowStatusIndicators", true); display_show_inputs = si.GetBoolValue("Display", "ShowInputs", false); display_show_enhancements = si.GetBoolValue("Display", "ShowEnhancements", false); @@ -459,6 +460,7 @@ void Settings::Save(SettingsInterface& si) const si.SetBoolValue("Display", "ShowSpeed", display_show_speed); si.SetBoolValue("Display", "ShowResolution", display_show_resolution); si.SetBoolValue("Display", "ShowCPU", display_show_cpu); + si.SetBoolValue("Display", "ShowGPU", display_show_gpu); si.SetBoolValue("Display", "ShowStatusIndicators", display_show_status_indicators); si.SetBoolValue("Display", "ShowInputs", display_show_inputs); si.SetBoolValue("Display", "ShowEnhancements", display_show_enhancements); diff --git a/src/core/settings.h b/src/core/settings.h index 54721ba59..199bf805a 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -129,6 +129,7 @@ struct Settings bool display_show_speed = false; bool display_show_resolution = false; bool display_show_cpu = false; + bool display_show_gpu = false; bool display_show_status_indicators = true; bool display_show_inputs = false; bool display_show_enhancements = false; diff --git a/src/core/system.cpp b/src/core/system.cpp index 118eae605..d6364e59a 100644 --- a/src/core/system.cpp +++ b/src/core/system.cpp @@ -162,11 +162,15 @@ static float s_cpu_thread_usage = 0.0f; static float s_cpu_thread_time = 0.0f; static float s_sw_thread_usage = 0.0f; static float s_sw_thread_time = 0.0f; +static float s_average_gpu_time = 0.0f; +static float s_accumulated_gpu_time = 0.0f; +static float s_gpu_usage = 0.0f; static u32 s_last_frame_number = 0; static u32 s_last_internal_frame_number = 0; static u32 s_last_global_tick_counter = 0; static u64 s_last_cpu_time = 0; static u64 s_last_sw_time = 0; +static u32 s_presents_since_last_update = 0; static Common::Timer s_fps_timer; static Common::Timer s_frame_timer; static Threading::ThreadHandle s_cpu_thread_handle; @@ -350,6 +354,14 @@ float System::GetSWThreadAverageTime() { return s_sw_thread_time; } +float System::GetGPUUsage() +{ + return s_gpu_usage; +} +float System::GetGPUAverageTime() +{ + return s_average_gpu_time; +} bool System::IsExeFileName(const std::string_view& path) { @@ -1257,9 +1269,13 @@ bool System::Initialize(bool force_software_renderer) s_cpu_thread_time = 0.0f; s_sw_thread_usage = 0.0f; s_sw_thread_time = 0.0f; + s_average_gpu_time = 0.0f; + s_accumulated_gpu_time = 0.0f; + s_gpu_usage = 0.0f; s_last_frame_number = 0; s_last_internal_frame_number = 0; s_last_global_tick_counter = 0; + s_presents_since_last_update = 0; s_last_cpu_time = 0; s_fps_timer.Reset(); s_frame_timer.Reset(); @@ -1427,7 +1443,13 @@ void System::Execute() PauseSystem(true); } - Host::RenderDisplay(g_host_display->ShouldSkipDisplayingFrame()); + const bool skip_present = g_host_display->ShouldSkipDisplayingFrame(); + Host::RenderDisplay(skip_present); + if (!skip_present && g_host_display->IsGPUTimingEnabled()) + { + s_accumulated_gpu_time += g_host_display->GetAndResetAccumulatedGPUTime(); + s_presents_since_last_update++; + } System::UpdatePerformanceCounters(); @@ -2125,7 +2147,7 @@ void System::UpdatePerformanceCounters() if (time < 1.0f) return; - const float frames_presented = static_cast(s_frame_number - s_last_frame_number); + const float frames_run = static_cast(s_frame_number - s_last_frame_number); const u32 global_tick_counter = TimingEvents::GetGlobalTickCounter(); // TODO: Make the math here less rubbish @@ -2133,13 +2155,13 @@ void System::UpdatePerformanceCounters() 100.0 * (1.0 / ((static_cast(ticks_diff) * static_cast(Threading::GetThreadTicksPerSecond())) / Common::Timer::GetFrequency() / 1000000000.0)); const double time_divider = 1000.0 * (1.0 / static_cast(Threading::GetThreadTicksPerSecond())) * - (1.0 / static_cast(frames_presented)); + (1.0 / static_cast(frames_run)); s_worst_frame_time = s_worst_frame_time_accumulator; s_worst_frame_time_accumulator = 0.0f; - s_average_frame_time = s_average_frame_time_accumulator / frames_presented; + s_average_frame_time = s_average_frame_time_accumulator / frames_run; s_average_frame_time_accumulator = 0.0f; - s_vps = static_cast(frames_presented / time); + s_vps = static_cast(frames_run / time); s_last_frame_number = s_frame_number; s_fps = static_cast(s_internal_frame_number - s_last_internal_frame_number) / time; s_last_internal_frame_number = s_internal_frame_number; @@ -2163,8 +2185,16 @@ void System::UpdatePerformanceCounters() s_fps_timer.ResetTo(now_ticks); - Log_VerbosePrintf("FPS: %.2f VPS: %.2f CPU: %.2f Average: %.2fms Worst: %.2fms", s_fps, s_vps, s_cpu_thread_usage, - s_average_frame_time, s_worst_frame_time); + if (g_host_display->IsGPUTimingEnabled()) + { + s_average_gpu_time = s_accumulated_gpu_time / static_cast(std::max(s_presents_since_last_update, 1u)); + s_gpu_usage = s_accumulated_gpu_time / (time * 10.0f); + } + s_accumulated_gpu_time = 0.0f; + s_presents_since_last_update = 0; + + Log_VerbosePrintf("FPS: %.2f VPS: %.2f CPU: %.2f GPU: %.2f Average: %.2fms Worst: %.2fms", s_fps, s_vps, + s_cpu_thread_usage, s_gpu_usage, s_average_frame_time, s_worst_frame_time); Host::OnPerformanceCountersUpdated(); } @@ -3123,10 +3153,7 @@ void System::CheckForSettingsChanges(const Settings& old_settings) g_settings.display_line_start_offset != old_settings.display_line_start_offset || g_settings.display_line_end_offset != old_settings.display_line_end_offset || g_settings.rewind_enable != old_settings.rewind_enable || - g_settings.runahead_frames != old_settings.runahead_frames || - g_settings.display_linear_filtering != old_settings.display_linear_filtering || - g_settings.display_integer_scaling != old_settings.display_integer_scaling || - g_settings.display_stretch != old_settings.display_stretch) + g_settings.runahead_frames != old_settings.runahead_frames) { g_gpu->UpdateSettings(); Host::InvalidateDisplay(); diff --git a/src/core/system.h b/src/core/system.h index 0b4f522ab..b298a25f9 100644 --- a/src/core/system.h +++ b/src/core/system.h @@ -181,6 +181,8 @@ float GetCPUThreadUsage(); float GetCPUThreadAverageTime(); float GetSWThreadUsage(); float GetSWThreadAverageTime(); +float GetGPUUsage(); +float GetGPUAverageTime(); /// Loads global settings (i.e. EmuConfig). void LoadSettings(bool display_osd_messages); diff --git a/src/frontend-common/d3d11_host_display.cpp b/src/frontend-common/d3d11_host_display.cpp index fea0ed2f7..5d9745794 100644 --- a/src/frontend-common/d3d11_host_display.cpp +++ b/src/frontend-common/d3d11_host_display.cpp @@ -755,6 +755,13 @@ bool D3D11HostDisplay::Render(bool skip_present) return false; } + // When using vsync, the time here seems to include the time for the buffer to become available. + // This blows our our GPU usage number considerably, so read the timestamp before the final blit + // in this configuration. It does reduce accuracy a little, but better than seeing 100% all of + // the time, when it's more like a couple of percent. + if (m_vsync && m_gpu_timing_enabled) + PopTimestampQuery(); + static constexpr std::array clear_color = {}; m_context->ClearRenderTargetView(m_swap_chain_rtv.Get(), clear_color.data()); m_context->OMSetRenderTargets(1, m_swap_chain_rtv.GetAddressOf(), nullptr); @@ -766,11 +773,17 @@ bool D3D11HostDisplay::Render(bool skip_present) RenderSoftwareCursor(); + if (!m_vsync && m_gpu_timing_enabled) + PopTimestampQuery(); + if (!m_vsync && m_using_allow_tearing) m_swap_chain->Present(0, DXGI_PRESENT_ALLOW_TEARING); else m_swap_chain->Present(BoolToUInt32(m_vsync), 0); + if (m_gpu_timing_enabled) + KickTimestampQuery(); + return true; } @@ -806,7 +819,7 @@ bool D3D11HostDisplay::RenderScreenshot(u32 width, u32 height, std::vector* { RenderDisplay(left, top, draw_width, draw_height, m_display_texture_handle, m_display_texture_width, m_display_texture_height, m_display_texture_view_x, m_display_texture_view_y, - m_display_texture_view_width, m_display_texture_view_height, m_display_linear_filtering); + m_display_texture_view_width, m_display_texture_view_height, IsUsingLinearFiltering()); } } @@ -851,7 +864,7 @@ void D3D11HostDisplay::RenderDisplay() RenderDisplay(left, top, width, height, m_display_texture_handle, m_display_texture_width, m_display_texture_height, m_display_texture_view_x, m_display_texture_view_y, m_display_texture_view_width, - m_display_texture_view_height, m_display_linear_filtering); + m_display_texture_view_height, IsUsingLinearFiltering()); } void D3D11HostDisplay::RenderDisplay(s32 left, s32 top, s32 width, s32 height, void* texture_handle, u32 texture_width, @@ -864,8 +877,9 @@ void D3D11HostDisplay::RenderDisplay(s32 left, s32 top, s32 width, s32 height, v m_context->PSSetShaderResources(0, 1, reinterpret_cast(&texture_handle)); m_context->PSSetSamplers(0, 1, linear_filter ? m_linear_sampler.GetAddressOf() : m_point_sampler.GetAddressOf()); - const float position_adjust = m_display_linear_filtering ? 0.5f : 0.0f; - const float size_adjust = m_display_linear_filtering ? 1.0f : 0.0f; + const bool linear = IsUsingLinearFiltering(); + const float position_adjust = linear ? 0.5f : 0.0f; + const float size_adjust = linear ? 1.0f : 0.0f; const float uniforms[4] = { (static_cast(texture_view_x) + position_adjust) / static_cast(texture_width), (static_cast(texture_view_y) + position_adjust) / static_cast(texture_height), @@ -1102,7 +1116,7 @@ void D3D11HostDisplay::ApplyPostProcessingChain(ID3D11RenderTargetView* final_ta if (!CheckPostProcessingRenderTargets(target_width, target_height)) { RenderDisplay(final_left, final_top, final_width, final_height, texture_handle, texture_width, texture_height, - texture_view_x, texture_view_y, texture_view_width, texture_view_height, m_display_linear_filtering); + texture_view_x, texture_view_y, texture_view_width, texture_view_height, IsUsingLinearFiltering()); return; } @@ -1110,7 +1124,7 @@ void D3D11HostDisplay::ApplyPostProcessingChain(ID3D11RenderTargetView* final_ta m_context->ClearRenderTargetView(m_post_processing_input_texture.GetD3DRTV(), clear_color.data()); m_context->OMSetRenderTargets(1, m_post_processing_input_texture.GetD3DRTVArray(), nullptr); RenderDisplay(final_left, final_top, final_width, final_height, texture_handle, texture_width, texture_height, - texture_view_x, texture_view_y, texture_view_width, texture_view_height, m_display_linear_filtering); + texture_view_x, texture_view_y, texture_view_width, texture_view_height, IsUsingLinearFiltering()); texture_handle = m_post_processing_input_texture.GetD3DSRV(); texture_width = m_post_processing_input_texture.GetWidth(); @@ -1159,4 +1173,122 @@ void D3D11HostDisplay::ApplyPostProcessingChain(ID3D11RenderTargetView* final_ta m_context->PSSetShaderResources(0, 1, &null_srv); } +bool D3D11HostDisplay::CreateTimestampQueries() +{ + for (u32 i = 0; i < NUM_TIMESTAMP_QUERIES; i++) + { + for (u32 j = 0; j < 3; j++) + { + const CD3D11_QUERY_DESC qdesc((j == 0) ? D3D11_QUERY_TIMESTAMP_DISJOINT : D3D11_QUERY_TIMESTAMP); + const HRESULT hr = m_device->CreateQuery(&qdesc, m_timestamp_queries[i][j].ReleaseAndGetAddressOf()); + if (FAILED(hr)) + { + m_timestamp_queries = {}; + return false; + } + } + } + + KickTimestampQuery(); + return true; +} + +void D3D11HostDisplay::DestroyTimestampQueries() +{ + if (!m_timestamp_queries[0][0]) + return; + + if (m_timestamp_query_started) + m_context->End(m_timestamp_queries[m_write_timestamp_query][1].Get()); + + m_timestamp_queries = {}; + m_read_timestamp_query = 0; + m_write_timestamp_query = 0; + m_waiting_timestamp_queries = 0; + m_timestamp_query_started = 0; +} + +void D3D11HostDisplay::PopTimestampQuery() +{ + while (m_waiting_timestamp_queries > 0) + { + D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjoint; + const HRESULT disjoint_hr = m_context->GetData(m_timestamp_queries[m_read_timestamp_query][0].Get(), &disjoint, + sizeof(disjoint), D3D11_ASYNC_GETDATA_DONOTFLUSH); + if (disjoint_hr != S_OK) + break; + + if (disjoint.Disjoint) + { + Log_VerbosePrintf("GPU timing disjoint, resetting."); + m_read_timestamp_query = 0; + m_write_timestamp_query = 0; + m_waiting_timestamp_queries = 0; + m_timestamp_query_started = 0; + } + else + { + u64 start = 0, end = 0; + const HRESULT start_hr = m_context->GetData(m_timestamp_queries[m_read_timestamp_query][1].Get(), &start, + sizeof(start), D3D11_ASYNC_GETDATA_DONOTFLUSH); + const HRESULT end_hr = m_context->GetData(m_timestamp_queries[m_read_timestamp_query][2].Get(), &end, sizeof(end), + D3D11_ASYNC_GETDATA_DONOTFLUSH); + if (start_hr == S_OK && end_hr == S_OK) + { + const float delta = static_cast(static_cast(end - start) / (static_cast(disjoint.Frequency) / 1000.0)); + m_accumulated_gpu_time += delta; + m_read_timestamp_query = (m_read_timestamp_query + 1) % NUM_TIMESTAMP_QUERIES; + m_waiting_timestamp_queries--; + } + } + } + + if (m_timestamp_query_started) + { + m_context->End(m_timestamp_queries[m_write_timestamp_query][2].Get()); + m_context->End(m_timestamp_queries[m_write_timestamp_query][0].Get()); + m_write_timestamp_query = (m_write_timestamp_query + 1) % NUM_TIMESTAMP_QUERIES; + m_timestamp_query_started = false; + m_waiting_timestamp_queries++; + } +} + +void D3D11HostDisplay::KickTimestampQuery() +{ + if (m_timestamp_query_started || !m_timestamp_queries[0][0] || m_waiting_timestamp_queries == NUM_TIMESTAMP_QUERIES) + return; + + m_context->Begin(m_timestamp_queries[m_write_timestamp_query][0].Get()); + m_context->End(m_timestamp_queries[m_write_timestamp_query][1].Get()); + m_timestamp_query_started = true; +} + +bool D3D11HostDisplay::SetGPUTimingEnabled(bool enabled) +{ + if (m_gpu_timing_enabled == enabled) + return true; + + m_gpu_timing_enabled = enabled; + if (m_gpu_timing_enabled) + { + if (!CreateTimestampQueries()) + return false; + + KickTimestampQuery(); + return true; + } + else + { + DestroyTimestampQueries(); + return true; + } +} + +float D3D11HostDisplay::GetAndResetAccumulatedGPUTime() +{ + const float value = m_accumulated_gpu_time; + m_accumulated_gpu_time = 0.0f; + return value; +} + } // namespace FrontendCommon diff --git a/src/frontend-common/d3d11_host_display.h b/src/frontend-common/d3d11_host_display.h index 8a44f4791..f1623e3f2 100644 --- a/src/frontend-common/d3d11_host_display.h +++ b/src/frontend-common/d3d11_host_display.h @@ -65,6 +65,9 @@ public: bool GetHostRefreshRate(float* refresh_rate) override; + bool SetGPUTimingEnabled(bool enabled) override; + float GetAndResetAccumulatedGPUTime() override; + void SetVSync(bool enabled) override; bool Render(bool skip_present) override; @@ -75,6 +78,7 @@ public: protected: static constexpr u32 DISPLAY_UNIFORM_BUFFER_SIZE = 16; + static constexpr u8 NUM_TIMESTAMP_QUERIES = 3; static AdapterAndModeList GetAdapterAndModeList(IDXGIFactory* dxgi_factory); @@ -111,6 +115,11 @@ protected: s32 texture_view_x, s32 texture_view_y, s32 texture_view_width, s32 texture_view_height, u32 target_width, u32 target_height); + bool CreateTimestampQueries(); + void DestroyTimestampQueries(); + void PopTimestampQuery(); + void KickTimestampQuery(); + ComPtr m_device; ComPtr m_context; @@ -140,6 +149,13 @@ protected: PostProcessingChain m_post_processing_chain; D3D11::Texture m_post_processing_input_texture; std::vector m_post_processing_stages; + + std::array, 3>, NUM_TIMESTAMP_QUERIES> m_timestamp_queries = {}; + u8 m_read_timestamp_query = 0; + u8 m_write_timestamp_query = 0; + u8 m_waiting_timestamp_queries = 0; + bool m_timestamp_query_started = false; + float m_accumulated_gpu_time = 0.0f; }; } // namespace FrontendCommon diff --git a/src/frontend-common/d3d12_host_display.cpp b/src/frontend-common/d3d12_host_display.cpp index 4d49a2424..449c0b159 100644 --- a/src/frontend-common/d3d12_host_display.cpp +++ b/src/frontend-common/d3d12_host_display.cpp @@ -712,7 +712,7 @@ bool D3D12HostDisplay::RenderScreenshot(u32 width, u32 height, std::vector* const auto [left, top, draw_width, draw_height] = CalculateDrawRect(width, height, 0); RenderDisplay(cmdlist, left, top, draw_width, draw_height, m_display_texture_handle, m_display_texture_width, m_display_texture_height, m_display_texture_view_x, m_display_texture_view_y, - m_display_texture_view_width, m_display_texture_view_height, m_display_linear_filtering); + m_display_texture_view_width, m_display_texture_view_height, IsUsingLinearFiltering()); } cmdlist->OMSetRenderTargets(0, nullptr, FALSE, nullptr); @@ -728,6 +728,18 @@ bool D3D12HostDisplay::RenderScreenshot(u32 width, u32 height, std::vector* return m_readback_staging_texture.ReadPixels(0, 0, width, height, out_pixels->data(), stride); } +bool D3D12HostDisplay::SetGPUTimingEnabled(bool enabled) +{ + g_d3d12_context->SetEnableGPUTiming(enabled); + m_gpu_timing_enabled = enabled; + return true; +} + +float D3D12HostDisplay::GetAndResetAccumulatedGPUTime() +{ + return g_d3d12_context->GetAndResetAccumulatedGPUTime(); +} + void D3D12HostDisplay::RenderImGui(ID3D12GraphicsCommandList* cmdlist) { ImGui::Render(); @@ -752,7 +764,7 @@ void D3D12HostDisplay::RenderDisplay(ID3D12GraphicsCommandList* cmdlist) RenderDisplay(cmdlist, left, top, width, height, m_display_texture_handle, m_display_texture_width, m_display_texture_height, m_display_texture_view_x, m_display_texture_view_y, - m_display_texture_view_width, m_display_texture_view_height, m_display_linear_filtering); + m_display_texture_view_width, m_display_texture_view_height, IsUsingLinearFiltering()); } void D3D12HostDisplay::RenderDisplay(ID3D12GraphicsCommandList* cmdlist, s32 left, s32 top, s32 width, s32 height, diff --git a/src/frontend-common/d3d12_host_display.h b/src/frontend-common/d3d12_host_display.h index 0b1b56a77..aacf33b3a 100644 --- a/src/frontend-common/d3d12_host_display.h +++ b/src/frontend-common/d3d12_host_display.h @@ -17,7 +17,7 @@ namespace FrontendCommon { -class D3D12HostDisplay : public HostDisplay +class D3D12HostDisplay final : public HostDisplay { public: template @@ -26,31 +26,31 @@ public: D3D12HostDisplay(); ~D3D12HostDisplay(); - virtual RenderAPI GetRenderAPI() const override; - virtual void* GetRenderDevice() const override; - virtual void* GetRenderContext() const override; + RenderAPI GetRenderAPI() const override; + void* GetRenderDevice() const override; + void* GetRenderContext() const override; - virtual bool HasRenderDevice() const override; - virtual bool HasRenderSurface() const override; + bool HasRenderDevice() const override; + bool HasRenderSurface() const override; - virtual bool CreateRenderDevice(const WindowInfo& wi, std::string_view adapter_name, bool debug_device, + bool CreateRenderDevice(const WindowInfo& wi, std::string_view adapter_name, bool debug_device, bool threaded_presentation) override; - virtual bool InitializeRenderDevice(std::string_view shader_cache_directory, bool debug_device, + bool InitializeRenderDevice(std::string_view shader_cache_directory, bool debug_device, bool threaded_presentation) override; - virtual void DestroyRenderDevice() override; + void DestroyRenderDevice() override; - virtual bool MakeRenderContextCurrent() override; - virtual bool DoneRenderContextCurrent() override; + bool MakeRenderContextCurrent() override; + bool DoneRenderContextCurrent() override; - virtual bool ChangeRenderWindow(const WindowInfo& new_wi) override; - virtual void ResizeRenderWindow(s32 new_window_width, s32 new_window_height) override; - virtual bool SupportsFullscreen() const override; - virtual bool IsFullscreen() override; - virtual bool SetFullscreen(bool fullscreen, u32 width, u32 height, float refresh_rate) override; - virtual AdapterAndModeList GetAdapterAndModeList() override; - virtual void DestroyRenderSurface() override; + bool ChangeRenderWindow(const WindowInfo& new_wi) override; + void ResizeRenderWindow(s32 new_window_width, s32 new_window_height) override; + bool SupportsFullscreen() const override; + bool IsFullscreen() override; + bool SetFullscreen(bool fullscreen, u32 width, u32 height, float refresh_rate) override; + AdapterAndModeList GetAdapterAndModeList() override; + void DestroyRenderSurface() override; - virtual bool SetPostProcessingChain(const std::string_view& config) override; + bool SetPostProcessingChain(const std::string_view& config) override; std::unique_ptr CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, HostDisplayPixelFormat format, const void* data, u32 data_stride, @@ -66,12 +66,15 @@ public: bool GetHostRefreshRate(float* refresh_rate) override; - virtual void SetVSync(bool enabled) override; + void SetVSync(bool enabled) override; - virtual bool Render(bool skip_present) override; - virtual bool RenderScreenshot(u32 width, u32 height, std::vector* out_pixels, u32* out_stride, + bool Render(bool skip_present) override; + bool RenderScreenshot(u32 width, u32 height, std::vector* out_pixels, u32* out_stride, HostDisplayPixelFormat* out_format) override; + bool SetGPUTimingEnabled(bool enabled) override; + float GetAndResetAccumulatedGPUTime() override; + static AdapterAndModeList StaticGetAdapterAndModeList(); protected: diff --git a/src/frontend-common/imgui_overlays.cpp b/src/frontend-common/imgui_overlays.cpp index c46b06005..39bf73ef3 100644 --- a/src/frontend-common/imgui_overlays.cpp +++ b/src/frontend-common/imgui_overlays.cpp @@ -186,6 +186,13 @@ void ImGuiManager::DrawPerformanceOverlay() #endif } + if (g_settings.display_show_gpu && g_host_display->IsGPUTimingEnabled()) + { + text.Assign("GPU: "); + FormatProcessorStat(text, System::GetGPUUsage(), System::GetGPUAverageTime()); + DRAW_LINE(fixed_font, text, IM_COL32(255, 255, 255, 255)); + } + if (g_settings.display_show_status_indicators) { const bool rewinding = System::IsRewinding(); diff --git a/src/frontend-common/opengl_host_display.cpp b/src/frontend-common/opengl_host_display.cpp index 7a23139c7..33ea0c3f8 100644 --- a/src/frontend-common/opengl_host_display.cpp +++ b/src/frontend-common/opengl_host_display.cpp @@ -178,14 +178,15 @@ void OpenGLHostDisplay::BindDisplayPixelsTexture() { if (m_display_pixels_texture_id == 0) { + const bool linear = IsUsingLinearFiltering(); glGenTextures(1, &m_display_pixels_texture_id); glBindTexture(GL_TEXTURE_2D, m_display_pixels_texture_id); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, m_display_linear_filtering ? GL_LINEAR : GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, m_display_linear_filtering ? GL_LINEAR : GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, linear ? GL_LINEAR : GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, linear ? GL_LINEAR : GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 1); - m_display_texture_is_linear_filtered = m_display_linear_filtering; + m_display_texture_is_linear_filtered = linear; } else { @@ -195,12 +196,13 @@ void OpenGLHostDisplay::BindDisplayPixelsTexture() void OpenGLHostDisplay::UpdateDisplayPixelsTextureFilter() { - if (m_display_linear_filtering == m_display_texture_is_linear_filtered) + const bool linear = IsUsingLinearFiltering(); + if (linear == m_display_texture_is_linear_filtered) return; - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, m_display_linear_filtering ? GL_LINEAR : GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, m_display_linear_filtering ? GL_LINEAR : GL_NEAREST); - m_display_texture_is_linear_filtered = m_display_linear_filtering; + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, linear ? GL_LINEAR : GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, linear ? GL_LINEAR : GL_NEAREST); + m_display_texture_is_linear_filtered = linear; } bool OpenGLHostDisplay::SupportsDisplayPixelFormat(HostDisplayPixelFormat format) const @@ -435,7 +437,7 @@ bool OpenGLHostDisplay::InitializeRenderDevice(std::string_view shader_cache_dir glDebugMessageCallback(GLDebugCallback, nullptr); glEnable(GL_DEBUG_OUTPUT); - // glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS); + glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS); } if (!CreateResources()) @@ -520,8 +522,7 @@ HostDisplay::AdapterAndModeList OpenGLHostDisplay::GetAdapterAndModeList() { for (const GL::Context::FullscreenModeInfo& fmi : m_gl_context->EnumerateFullscreenModes()) { - aml.fullscreen_modes.push_back( - GetFullscreenModeString(fmi.width, fmi.height, fmi.refresh_rate)); + aml.fullscreen_modes.push_back(GetFullscreenModeString(fmi.width, fmi.height, fmi.refresh_rate)); } } @@ -760,7 +761,14 @@ bool OpenGLHostDisplay::Render(bool skip_present) RenderSoftwareCursor(); + if (m_gpu_timing_enabled) + PopTimestampQuery(); + m_gl_context->SwapBuffers(); + + if (m_gpu_timing_enabled) + KickTimestampQuery(); + return true; } @@ -792,7 +800,7 @@ bool OpenGLHostDisplay::RenderScreenshot(u32 width, u32 height, std::vector RenderDisplay(left, height - top - draw_height, draw_width, draw_height, m_display_texture_handle, m_display_texture_width, m_display_texture_height, m_display_texture_view_x, m_display_texture_view_y, m_display_texture_view_width, m_display_texture_view_height, - m_display_linear_filtering); + IsUsingLinearFiltering()); } } @@ -830,7 +838,7 @@ void OpenGLHostDisplay::RenderDisplay() RenderDisplay(left, GetWindowHeight() - top - height, width, height, m_display_texture_handle, m_display_texture_width, m_display_texture_height, m_display_texture_view_x, m_display_texture_view_y, - m_display_texture_view_width, m_display_texture_view_height, m_display_linear_filtering); + m_display_texture_view_width, m_display_texture_view_height, IsUsingLinearFiltering()); } static void DrawFullscreenQuadES2(s32 tex_view_x, s32 tex_view_y, s32 tex_view_width, s32 tex_view_height, @@ -871,8 +879,9 @@ void OpenGLHostDisplay::RenderDisplay(s32 left, s32 bottom, s32 width, s32 heigh if (!m_use_gles2_draw_path) { - const float position_adjust = m_display_linear_filtering ? 0.5f : 0.0f; - const float size_adjust = m_display_linear_filtering ? 1.0f : 0.0f; + const bool linear = IsUsingLinearFiltering(); + const float position_adjust = linear ? 0.5f : 0.0f; + const float size_adjust = linear ? 1.0f : 0.0f; const float flip_adjust = (texture_view_height < 0) ? -1.0f : 1.0f; m_display_program.Uniform4f( 0, (static_cast(texture_view_x) + position_adjust) / static_cast(texture_width), @@ -1041,7 +1050,7 @@ void OpenGLHostDisplay::ApplyPostProcessingChain(GLuint final_target, s32 final_ { RenderDisplay(final_left, target_height - final_top - final_height, final_width, final_height, texture_handle, texture_width, texture_height, texture_view_x, texture_view_y, texture_view_width, - texture_view_height, m_display_linear_filtering); + texture_view_height, IsUsingLinearFiltering()); return; } @@ -1050,7 +1059,7 @@ void OpenGLHostDisplay::ApplyPostProcessingChain(GLuint final_target, s32 final_ glClear(GL_COLOR_BUFFER_BIT); RenderDisplay(final_left, target_height - final_top - final_height, final_width, final_height, texture_handle, texture_width, texture_height, texture_view_x, texture_view_y, texture_view_width, texture_view_height, - m_display_linear_filtering); + IsUsingLinearFiltering()); texture_handle = reinterpret_cast(static_cast(m_post_processing_input_texture.GetGLId())); texture_width = m_post_processing_input_texture.GetWidth(); @@ -1099,4 +1108,122 @@ void OpenGLHostDisplay::ApplyPostProcessingChain(GLuint final_target, s32 final_ m_post_processing_ubo->Unbind(); } +void OpenGLHostDisplay::CreateTimestampQueries() +{ + const bool gles = m_gl_context->IsGLES(); + const auto GenQueries = gles ? glGenQueriesEXT : glGenQueries; + + GenQueries(static_cast(m_timestamp_queries.size()), m_timestamp_queries.data()); + KickTimestampQuery(); +} + +void OpenGLHostDisplay::DestroyTimestampQueries() +{ + if (m_timestamp_queries[0] == 0) + return; + + const bool gles = m_gl_context->IsGLES(); + const auto DeleteQueries = gles ? glDeleteQueriesEXT : glDeleteQueries; + + if (m_timestamp_query_started) + { + const auto EndQuery = gles ? glEndQueryEXT : glEndQuery; + EndQuery(m_timestamp_queries[m_write_timestamp_query]); + } + + DeleteQueries(static_cast(m_timestamp_queries.size()), m_timestamp_queries.data()); + m_timestamp_queries.fill(0); + m_read_timestamp_query = 0; + m_write_timestamp_query = 0; + m_waiting_timestamp_queries = 0; + m_timestamp_query_started = false; +} + +void OpenGLHostDisplay::PopTimestampQuery() +{ + const bool gles = m_gl_context->IsGLES(); + + if (gles) + { + GLint disjoint = 0; + glGetIntegerv(GL_GPU_DISJOINT_EXT, &disjoint); + if (disjoint) + { + Log_VerbosePrintf("GPU timing disjoint, resetting."); + if (m_timestamp_query_started) + glEndQueryEXT(GL_TIME_ELAPSED); + + m_read_timestamp_query = 0; + m_write_timestamp_query = 0; + m_waiting_timestamp_queries = 0; + m_timestamp_query_started = false; + } + } + + while (m_waiting_timestamp_queries > 0) + { + const auto GetQueryObjectiv = gles ? glGetQueryObjectivEXT : glGetQueryObjectiv; + const auto GetQueryObjectui64v = gles ? glGetQueryObjectui64vEXT : glGetQueryObjectui64v; + + GLint available = 0; + GetQueryObjectiv(m_timestamp_queries[m_read_timestamp_query], GL_QUERY_RESULT_AVAILABLE, &available); + DebugAssert(m_read_timestamp_query != m_write_timestamp_query); + + if (!available) + break; + + u64 result = 0; + GetQueryObjectui64v(m_timestamp_queries[m_read_timestamp_query], GL_QUERY_RESULT, &result); + m_accumulated_gpu_time += static_cast(static_cast(result) / 1000000.0); + m_read_timestamp_query = (m_read_timestamp_query + 1) % NUM_TIMESTAMP_QUERIES; + m_waiting_timestamp_queries--; + } + + if (m_timestamp_query_started) + { + const auto EndQuery = gles ? glEndQueryEXT : glEndQuery; + EndQuery(GL_TIME_ELAPSED); + + m_write_timestamp_query = (m_write_timestamp_query + 1) % NUM_TIMESTAMP_QUERIES; + m_timestamp_query_started = false; + m_waiting_timestamp_queries++; + } +} + +void OpenGLHostDisplay::KickTimestampQuery() +{ + if (m_timestamp_query_started || m_waiting_timestamp_queries == NUM_TIMESTAMP_QUERIES) + return; + + const bool gles = m_gl_context->IsGLES(); + const auto BeginQuery = gles ? glBeginQueryEXT : glBeginQuery; + + BeginQuery(GL_TIME_ELAPSED, m_timestamp_queries[m_write_timestamp_query]); + m_timestamp_query_started = true; +} + +bool OpenGLHostDisplay::SetGPUTimingEnabled(bool enabled) +{ + if (m_gpu_timing_enabled == enabled) + return true; + + if (enabled && m_gl_context->IsGLES() && !GLAD_GL_EXT_disjoint_timer_query) + return false; + + m_gpu_timing_enabled = enabled; + if (m_gpu_timing_enabled) + CreateTimestampQueries(); + else + DestroyTimestampQueries(); + + return true; +} + +float OpenGLHostDisplay::GetAndResetAccumulatedGPUTime() +{ + const float value = m_accumulated_gpu_time; + m_accumulated_gpu_time = 0.0f; + return value; +} + } // namespace FrontendCommon diff --git a/src/frontend-common/opengl_host_display.h b/src/frontend-common/opengl_host_display.h index 3f0d965cc..89b3557e2 100644 --- a/src/frontend-common/opengl_host_display.h +++ b/src/frontend-common/opengl_host_display.h @@ -62,7 +62,12 @@ public: bool RenderScreenshot(u32 width, u32 height, std::vector* out_pixels, u32* out_stride, HostDisplayPixelFormat* out_format) override; + bool SetGPUTimingEnabled(bool enabled) override; + float GetAndResetAccumulatedGPUTime() override; + protected: + static constexpr u8 NUM_TIMESTAMP_QUERIES = 3; + const char* GetGLSLVersionString() const; std::string GetGLSLVersionHeader() const; @@ -98,6 +103,11 @@ protected: s32 texture_view_y, s32 texture_view_width, s32 texture_view_height, u32 target_width, u32 target_height); + void CreateTimestampQueries(); + void DestroyTimestampQueries(); + void PopTimestampQuery(); + void KickTimestampQuery(); + std::unique_ptr m_gl_context; GL::Program m_display_program; @@ -118,6 +128,13 @@ protected: std::unique_ptr m_post_processing_ubo; std::vector m_post_processing_stages; + std::array m_timestamp_queries = {}; + float m_accumulated_gpu_time = 0.0f; + u8 m_read_timestamp_query = 0; + u8 m_write_timestamp_query = 0; + u8 m_waiting_timestamp_queries = 0; + bool m_timestamp_query_started = false; + bool m_display_texture_is_linear_filtered = false; bool m_use_gles2_draw_path = false; bool m_use_pbo_for_pixels = false; diff --git a/src/frontend-common/vulkan_host_display.cpp b/src/frontend-common/vulkan_host_display.cpp index 8011aa4ae..415cc9c6c 100644 --- a/src/frontend-common/vulkan_host_display.cpp +++ b/src/frontend-common/vulkan_host_display.cpp @@ -732,7 +732,7 @@ bool VulkanHostDisplay::RenderScreenshot(u32 width, u32 height, std::vector BeginSwapChainRenderPass(fb, width, height); RenderDisplay(left, top, draw_width, draw_height, m_display_texture_handle, m_display_texture_width, m_display_texture_height, m_display_texture_view_x, m_display_texture_view_y, - m_display_texture_view_width, m_display_texture_view_height, m_display_linear_filtering); + m_display_texture_view_width, m_display_texture_view_height, IsUsingLinearFiltering()); } vkCmdEndRenderPass(g_vulkan_context->GetCurrentCommandBuffer()); @@ -789,7 +789,7 @@ void VulkanHostDisplay::RenderDisplay() BeginSwapChainRenderPass(m_swap_chain->GetCurrentFramebuffer(), m_swap_chain->GetWidth(), m_swap_chain->GetHeight()); RenderDisplay(left, top, width, height, m_display_texture_handle, m_display_texture_width, m_display_texture_height, m_display_texture_view_x, m_display_texture_view_y, m_display_texture_view_width, - m_display_texture_view_height, m_display_linear_filtering); + m_display_texture_view_height, IsUsingLinearFiltering()); } void VulkanHostDisplay::RenderDisplay(s32 left, s32 top, s32 width, s32 height, void* texture_handle, u32 texture_width, @@ -816,8 +816,8 @@ void VulkanHostDisplay::RenderDisplay(s32 left, s32 top, s32 width, s32 height, dsupdate.Update(g_vulkan_context->GetDevice()); } - const float position_adjust = m_display_linear_filtering ? 0.5f : 0.0f; - const float size_adjust = m_display_linear_filtering ? 1.0f : 0.0f; + const float position_adjust = IsUsingLinearFiltering() ? 0.5f : 0.0f; + const float size_adjust = IsUsingLinearFiltering() ? 1.0f : 0.0f; const PushConstants pc{(static_cast(texture_view_x) + position_adjust) / static_cast(texture_width), (static_cast(texture_view_y) + position_adjust) / static_cast(texture_height), (static_cast(texture_view_width) - size_adjust) / static_cast(texture_width), @@ -874,6 +874,22 @@ void VulkanHostDisplay::RenderSoftwareCursor(s32 left, s32 top, s32 width, s32 h vkCmdDraw(cmdbuffer, 3, 1, 0, 0); } +bool VulkanHostDisplay::SetGPUTimingEnabled(bool enabled) +{ + if (g_vulkan_context->SetEnableGPUTiming(enabled)) + { + m_gpu_timing_enabled = enabled; + return true; + } + + return false; +} + +float VulkanHostDisplay::GetAndResetAccumulatedGPUTime() +{ + return g_vulkan_context->GetAndResetAccumulatedGPUTime(); +} + HostDisplay::AdapterAndModeList VulkanHostDisplay::StaticGetAdapterAndModeList(const WindowInfo* wi) { AdapterAndModeList ret; @@ -907,8 +923,7 @@ HostDisplay::AdapterAndModeList VulkanHostDisplay::StaticGetAdapterAndModeList(c ret.fullscreen_modes.reserve(fsmodes.size()); for (const Vulkan::SwapChain::FullscreenModeInfo& fmi : fsmodes) { - ret.fullscreen_modes.push_back( - GetFullscreenModeString(fmi.width, fmi.height, fmi.refresh_rate)); + ret.fullscreen_modes.push_back(GetFullscreenModeString(fmi.width, fmi.height, fmi.refresh_rate)); } } @@ -1096,7 +1111,7 @@ void VulkanHostDisplay::ApplyPostProcessingChain(VkFramebuffer target_fb, s32 fi { BeginSwapChainRenderPass(target_fb, target_width, target_height); RenderDisplay(final_left, final_top, final_width, final_height, texture_handle, texture_width, texture_height, - texture_view_x, texture_view_y, texture_view_width, texture_view_height, m_display_linear_filtering); + texture_view_x, texture_view_y, texture_view_width, texture_view_height, IsUsingLinearFiltering()); return; } @@ -1104,7 +1119,7 @@ void VulkanHostDisplay::ApplyPostProcessingChain(VkFramebuffer target_fb, s32 fi m_post_processing_input_texture.TransitionToLayout(cmdbuffer, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); BeginSwapChainRenderPass(m_post_processing_input_framebuffer, target_width, target_height); RenderDisplay(final_left, final_top, final_width, final_height, texture_handle, texture_width, texture_height, - texture_view_x, texture_view_y, texture_view_width, texture_view_height, m_display_linear_filtering); + texture_view_x, texture_view_y, texture_view_width, texture_view_height, IsUsingLinearFiltering()); vkCmdEndRenderPass(cmdbuffer); Vulkan::Util::EndDebugScope(g_vulkan_context->GetCurrentCommandBuffer()); m_post_processing_input_texture.TransitionToLayout(cmdbuffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); diff --git a/src/frontend-common/vulkan_host_display.h b/src/frontend-common/vulkan_host_display.h index 9dd2b12ed..14be1132d 100644 --- a/src/frontend-common/vulkan_host_display.h +++ b/src/frontend-common/vulkan_host_display.h @@ -67,6 +67,9 @@ public: bool RenderScreenshot(u32 width, u32 height, std::vector* out_pixels, u32* out_stride, HostDisplayPixelFormat* out_format) override; + bool SetGPUTimingEnabled(bool enabled) override; + float GetAndResetAccumulatedGPUTime() override; + static AdapterAndModeList StaticGetAdapterAndModeList(const WindowInfo* wi); protected: