From 73446618011de02abefc2adab7b8a16daa5db940 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Thu, 11 Apr 2024 22:29:17 +1000 Subject: [PATCH] GPUDevice: Split submission and presentation --- src/core/imgui_overlays.cpp | 2 +- src/core/system.cpp | 40 ++++++++++++++++++++++------------- src/core/system.h | 2 +- src/duckstation-qt/qthost.cpp | 2 +- src/util/d3d11_device.cpp | 9 +++++++- src/util/d3d11_device.h | 3 ++- src/util/d3d12_device.cpp | 14 +++++++++--- src/util/d3d12_device.h | 3 ++- src/util/gpu_device.h | 4 +++- src/util/metal_device.h | 3 ++- src/util/metal_device.mm | 11 +++++++++- src/util/opengl_device.cpp | 9 +++++++- src/util/opengl_device.h | 3 ++- src/util/vulkan_device.cpp | 23 +++++++++++++------- src/util/vulkan_device.h | 5 +++-- 15 files changed, 94 insertions(+), 39 deletions(-) diff --git a/src/core/imgui_overlays.cpp b/src/core/imgui_overlays.cpp index c5d0d1625..aea39da2e 100644 --- a/src/core/imgui_overlays.cpp +++ b/src/core/imgui_overlays.cpp @@ -207,7 +207,7 @@ void Host::DisplayLoadingScreen(const char* message, int progress_min /*= -1*/, if (g_gpu_device->BeginPresent(false)) { g_gpu_device->RenderImGui(); - g_gpu_device->EndPresent(); + g_gpu_device->EndPresent(false); } ImGui::NewFrame(); diff --git a/src/core/system.cpp b/src/core/system.cpp index 95aac2905..a161992c5 100644 --- a/src/core/system.cpp +++ b/src/core/system.cpp @@ -112,7 +112,7 @@ static void WarnAboutUnsafeSettings(); static void LogUnsafeSettingsToConsole(const std::string& messages); /// Throttles the system, i.e. sleeps until it's time to execute the next frame. -static void Throttle(); +static void Throttle(Common::Timer::Value current_time); static void SetRewinding(bool enabled); static bool SaveRewindState(); @@ -1861,24 +1861,35 @@ void System::FrameDone() SaveRunaheadState(); } - // TODO: Kick cmdbuffer early - if (s_optimal_frame_pacing && s_throttler_enabled && !IsExecutionInterrupted()) - Throttle(); - const Common::Timer::Value current_time = Common::Timer::GetCurrentValue(); if (current_time < s_next_frame_time || s_syncing_to_host || s_optimal_frame_pacing || s_last_frame_skipped) { - s_last_frame_skipped = !PresentDisplay(true); + const bool throttle_before_present = (s_optimal_frame_pacing && s_throttler_enabled && !IsExecutionInterrupted()); + const bool explicit_present = (throttle_before_present && g_gpu_device->GetFeatures().explicit_present); + if (explicit_present) + { + s_last_frame_skipped = !PresentDisplay(!throttle_before_present, true); + Throttle(current_time); + g_gpu_device->SubmitPresent(); + } + else + { + if (throttle_before_present) + Throttle(current_time); + + s_last_frame_skipped = !PresentDisplay(!throttle_before_present, false); + + if (!throttle_before_present && s_throttler_enabled && !IsExecutionInterrupted()) + Throttle(current_time); + } } else if (current_time >= s_next_frame_time) { Log_DebugPrintf("Skipping displaying frame"); s_last_frame_skipped = true; + Throttle(current_time); } - if (!s_optimal_frame_pacing && s_throttler_enabled && !IsExecutionInterrupted()) - Throttle(); - // Input poll already done above if (s_runahead_frames == 0) { @@ -1931,12 +1942,11 @@ void System::ResetThrottler() s_next_frame_time = Common::Timer::GetCurrentValue() + s_frame_period; } -void System::Throttle() +void System::Throttle(Common::Timer::Value current_time) { // If we're running too slow, advance the next frame time based on the time we lost. Effectively skips // running those frames at the intended time, because otherwise if we pause in the debugger, we'll run // hundreds of frames when we resume. - Common::Timer::Value current_time = Common::Timer::GetCurrentValue(); if (current_time > s_next_frame_time) { const Common::Timer::Value diff = static_cast(current_time) - static_cast(s_next_frame_time); @@ -4108,7 +4118,7 @@ void System::DoRewind() Host::PumpMessagesOnCPUThread(); Internal::IdlePollUpdate(); - Throttle(); + Throttle(Common::Timer::GetCurrentValue()); } void System::SaveRunaheadState() @@ -4803,7 +4813,7 @@ void System::HostDisplayResized() g_gpu->UpdateResolutionScale(); } -bool System::PresentDisplay(bool allow_skip_present) +bool System::PresentDisplay(bool allow_skip_present, bool explicit_present) { const bool skip_present = allow_skip_present && g_gpu_device->ShouldSkipDisplayingFrame(); @@ -4835,7 +4845,7 @@ bool System::PresentDisplay(bool allow_skip_present) if (do_present) { g_gpu_device->RenderImGui(); - g_gpu_device->EndPresent(); + g_gpu_device->EndPresent(explicit_present); if (g_gpu_device->IsGPUTimingEnabled()) { @@ -4856,7 +4866,7 @@ bool System::PresentDisplay(bool allow_skip_present) void System::InvalidateDisplay() { - PresentDisplay(false); + PresentDisplay(false, false); if (g_gpu) g_gpu->RestoreDeviceContext(); diff --git a/src/core/system.h b/src/core/system.h index 5b6d0ee07..953eaca18 100644 --- a/src/core/system.h +++ b/src/core/system.h @@ -466,7 +466,7 @@ void RequestDisplaySize(float scale = 0.0f); void HostDisplayResized(); /// Renders the display. -bool PresentDisplay(bool allow_skip_present); +bool PresentDisplay(bool allow_skip_present, bool explicit_present); void InvalidateDisplay(); ////////////////////////////////////////////////////////////////////////// diff --git a/src/duckstation-qt/qthost.cpp b/src/duckstation-qt/qthost.cpp index 49f331395..3d640aff0 100644 --- a/src/duckstation-qt/qthost.cpp +++ b/src/duckstation-qt/qthost.cpp @@ -1567,7 +1567,7 @@ void EmuThread::run() System::Internal::IdlePollUpdate(); if (g_gpu_device) { - System::PresentDisplay(false); + System::PresentDisplay(false, false); if (!g_gpu_device->IsVSyncEnabled()) g_gpu_device->ThrottlePresentation(); } diff --git a/src/util/d3d11_device.cpp b/src/util/d3d11_device.cpp index 1e7ee2fbb..dd0872458 100644 --- a/src/util/d3d11_device.cpp +++ b/src/util/d3d11_device.cpp @@ -190,6 +190,7 @@ void D3D11Device::SetFeatures(FeatureMask disabled_features) m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS); m_features.partial_msaa_resolve = false; m_features.memory_import = false; + m_features.explicit_present = false; m_features.gpu_timing = true; m_features.shader_cache = true; m_features.pipeline_cache = false; @@ -644,8 +645,9 @@ bool D3D11Device::BeginPresent(bool skip_present) return true; } -void D3D11Device::EndPresent() +void D3D11Device::EndPresent(bool explicit_present) { + DebugAssert(!explicit_present); DebugAssert(m_num_current_render_targets == 0 && !m_current_depth_target); if (m_vsync_enabled && m_gpu_timing_enabled) @@ -665,6 +667,11 @@ void D3D11Device::EndPresent() TrimTexturePool(); } +void D3D11Device::SubmitPresent() +{ + Panic("Not supported by this API."); +} + GPUDevice::AdapterAndModeList D3D11Device::StaticGetAdapterAndModeList() { AdapterAndModeList ret; diff --git a/src/util/d3d11_device.h b/src/util/d3d11_device.h index 3e35ced45..d98861f10 100644 --- a/src/util/d3d11_device.h +++ b/src/util/d3d11_device.h @@ -102,7 +102,8 @@ public: float GetAndResetAccumulatedGPUTime() override; bool BeginPresent(bool skip_present) override; - void EndPresent() override; + void EndPresent(bool explicit_present) override; + void SubmitPresent() override; void UnbindPipeline(D3D11Pipeline* pl); void UnbindTexture(D3D11Texture* tex); diff --git a/src/util/d3d12_device.cpp b/src/util/d3d12_device.cpp index 04605e35a..cf2fae52d 100644 --- a/src/util/d3d12_device.cpp +++ b/src/util/d3d12_device.cpp @@ -1093,7 +1093,7 @@ bool D3D12Device::BeginPresent(bool frame_skip) return true; } -void D3D12Device::EndPresent() +void D3D12Device::EndPresent(bool explicit_present) { DebugAssert(InRenderPass() && m_num_current_render_targets == 0 && !m_current_depth_target); EndRenderPass(); @@ -1106,6 +1106,15 @@ void D3D12Device::EndPresent() D3D12_RESOURCE_STATE_PRESENT); SubmitCommandList(false); + TrimTexturePool(); + + if (!explicit_present) + SubmitPresent(); +} + +void D3D12Device::SubmitPresent() +{ + DebugAssert(m_swap_chain); // DirectX has no concept of tear-or-sync. I guess if we measured times ourselves, we could implement it. if (m_vsync_enabled) @@ -1114,8 +1123,6 @@ void D3D12Device::EndPresent() m_swap_chain->Present(0, DXGI_PRESENT_ALLOW_TEARING); else m_swap_chain->Present(0, 0); - - TrimTexturePool(); } #ifdef _DEBUG @@ -1194,6 +1201,7 @@ void D3D12Device::SetFeatures(FeatureMask disabled_features) m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS); m_features.partial_msaa_resolve = true; m_features.memory_import = false; + m_features.explicit_present = true; m_features.gpu_timing = true; m_features.shader_cache = true; m_features.pipeline_cache = true; diff --git a/src/util/d3d12_device.h b/src/util/d3d12_device.h index 0d6fc7652..668dff0a8 100644 --- a/src/util/d3d12_device.h +++ b/src/util/d3d12_device.h @@ -122,7 +122,8 @@ public: float GetAndResetAccumulatedGPUTime() override; bool BeginPresent(bool skip_present) override; - void EndPresent() override; + void EndPresent(bool explicit_present) override; + void SubmitPresent() override; // Global state accessors ALWAYS_INLINE static D3D12Device& GetInstance() { return *static_cast(g_gpu_device.get()); } diff --git a/src/util/gpu_device.h b/src/util/gpu_device.h index e76cf4528..53fb74f79 100644 --- a/src/util/gpu_device.h +++ b/src/util/gpu_device.h @@ -471,6 +471,7 @@ public: bool geometry_shaders : 1; bool partial_msaa_resolve : 1; bool memory_import : 1; + bool explicit_present : 1; bool gpu_timing : 1; bool shader_cache : 1; bool pipeline_cache : 1; @@ -665,7 +666,8 @@ public: /// Returns false if the window was completely occluded. virtual bool BeginPresent(bool skip_present) = 0; - virtual void EndPresent() = 0; + virtual void EndPresent(bool explicit_submit) = 0; + virtual void SubmitPresent() = 0; /// Renders ImGui screen elements. Call before EndPresent(). void RenderImGui(); diff --git a/src/util/metal_device.h b/src/util/metal_device.h index 8f447db6e..be51cb798 100644 --- a/src/util/metal_device.h +++ b/src/util/metal_device.h @@ -268,7 +268,8 @@ public: void SetVSyncEnabled(bool enabled) override; bool BeginPresent(bool skip_present) override; - void EndPresent() override; + void EndPresent(bool explicit_submit) override; + void SubmitPresent() override; void WaitForFenceCounter(u64 counter); diff --git a/src/util/metal_device.mm b/src/util/metal_device.mm index b49cd7e2f..cc631fe9b 100644 --- a/src/util/metal_device.mm +++ b/src/util/metal_device.mm @@ -241,6 +241,7 @@ void MetalDevice::SetFeatures(FeatureMask disabled_features) m_features.geometry_shaders = false; m_features.partial_msaa_resolve = false; m_features.memory_import = true; + m_features.explicit_present = false; m_features.shader_cache = true; m_features.pipeline_cache = false; m_features.prefer_unused_textures = true; @@ -2206,8 +2207,11 @@ bool MetalDevice::BeginPresent(bool skip_present) } } -void MetalDevice::EndPresent() +void MetalDevice::EndPresent(bool explicit_present) { + DebugAssert(!explicit_present); + + // TODO: Explicit present DebugAssert(m_num_current_render_targets == 0 && !m_current_depth_target); EndAnyEncoding(); @@ -2218,6 +2222,11 @@ void MetalDevice::EndPresent() TrimTexturePool(); } +void MetalDevice::SubmitPresent() +{ + Panic("Not supported by this API."); +} + void MetalDevice::CreateCommandBuffer() { @autoreleasepool diff --git a/src/util/opengl_device.cpp b/src/util/opengl_device.cpp index fe8453972..ff8163ceb 100644 --- a/src/util/opengl_device.cpp +++ b/src/util/opengl_device.cpp @@ -479,6 +479,7 @@ bool OpenGLDevice::CheckFeatures(FeatureMask disabled_features) (!GLAD_GL_EXT_disjoint_timer_query || !glGetQueryObjectivEXT || !glGetQueryObjectui64vEXT)); m_features.partial_msaa_resolve = true; m_features.memory_import = true; + m_features.explicit_present = false; m_features.shader_cache = false; @@ -774,8 +775,9 @@ bool OpenGLDevice::BeginPresent(bool skip_present) return true; } -void OpenGLDevice::EndPresent() +void OpenGLDevice::EndPresent(bool explicit_present) { + DebugAssert(!explicit_present); DebugAssert(m_current_fbo == 0); if (m_gpu_timing_enabled) @@ -789,6 +791,11 @@ void OpenGLDevice::EndPresent() TrimTexturePool(); } +void OpenGLDevice::SubmitPresent() +{ + Panic("Not supported by this API."); +} + void OpenGLDevice::CreateTimestampQueries() { const bool gles = m_gl_context->IsGLES(); diff --git a/src/util/opengl_device.h b/src/util/opengl_device.h index e07c0abfc..0d3051d74 100644 --- a/src/util/opengl_device.h +++ b/src/util/opengl_device.h @@ -103,7 +103,8 @@ public: void SetVSyncEnabled(bool enabled) override; bool BeginPresent(bool skip_present) override; - void EndPresent() override; + void EndPresent(bool explicit_present) override; + void SubmitPresent() override; bool SetGPUTimingEnabled(bool enabled) override; float GetAndResetAccumulatedGPUTime() override; diff --git a/src/util/vulkan_device.cpp b/src/util/vulkan_device.cpp index 17977551f..794641afa 100644 --- a/src/util/vulkan_device.cpp +++ b/src/util/vulkan_device.cpp @@ -1233,8 +1233,8 @@ void VulkanDevice::WaitForCommandBufferCompletion(u32 index) } } -void VulkanDevice::SubmitCommandBuffer(VulkanSwapChain* present_swap_chain /* = nullptr */, - bool submit_on_thread /* = false */) +void VulkanDevice::EndAndSubmitCommandBuffer(VulkanSwapChain* present_swap_chain, bool explicit_present, + bool submit_on_thread) { if (m_last_submit_failed.load(std::memory_order_acquire)) return; @@ -1272,10 +1272,10 @@ void VulkanDevice::SubmitCommandBuffer(VulkanSwapChain* present_swap_chain /* = std::unique_lock lock(m_present_mutex); WaitForPresentComplete(lock); - if (!submit_on_thread || !m_present_thread.joinable()) + if (!submit_on_thread || explicit_present || !m_present_thread.joinable()) { DoSubmitCommandBuffer(m_current_frame, present_swap_chain); - if (present_swap_chain) + if (present_swap_chain && !explicit_present) DoPresent(present_swap_chain); return; } @@ -1471,7 +1471,7 @@ void VulkanDevice::SubmitCommandBuffer(bool wait_for_completion) DebugAssert(!InRenderPass()); const u32 current_frame = m_current_frame; - SubmitCommandBuffer(); + EndAndSubmitCommandBuffer(nullptr, false, false); MoveToNextCommandBuffer(); if (wait_for_completion) @@ -2409,7 +2409,7 @@ bool VulkanDevice::BeginPresent(bool frame_skip) return true; } -void VulkanDevice::EndPresent() +void VulkanDevice::EndPresent(bool explicit_present) { DebugAssert(InRenderPass() && m_num_current_render_targets == 0 && !m_current_depth_target); EndRenderPass(); @@ -2418,12 +2418,18 @@ void VulkanDevice::EndPresent() VulkanTexture::TransitionSubresourcesToLayout(cmdbuf, m_swap_chain->GetCurrentImage(), GPUTexture::Type::RenderTarget, 0, 1, 0, 1, VulkanTexture::Layout::ColorAttachment, VulkanTexture::Layout::PresentSrc); - SubmitCommandBuffer(m_swap_chain.get(), !m_swap_chain->IsPresentModeSynchronizing()); + EndAndSubmitCommandBuffer(m_swap_chain.get(), explicit_present, !m_swap_chain->IsPresentModeSynchronizing()); MoveToNextCommandBuffer(); InvalidateCachedState(); TrimTexturePool(); } +void VulkanDevice::SubmitPresent() +{ + DebugAssert(m_swap_chain); + DoPresent(m_swap_chain.get()); +} + #ifdef _DEBUG static std::array Palette(float phase, const std::array& a, const std::array& b, const std::array& c, const std::array& d) @@ -2543,6 +2549,7 @@ bool VulkanDevice::CheckFeatures(FeatureMask disabled_features) m_features.partial_msaa_resolve = true; m_features.memory_import = m_optional_extensions.vk_ext_external_memory_host; + m_features.explicit_present = true; m_features.shader_cache = true; m_features.pipeline_cache = true; m_features.prefer_unused_textures = true; @@ -3032,7 +3039,7 @@ void VulkanDevice::RenderBlankFrame() VulkanTexture::TransitionSubresourcesToLayout(cmdbuf, image, GPUTexture::Type::RenderTarget, 0, 1, 0, 1, VulkanTexture::Layout::TransferDst, VulkanTexture::Layout::PresentSrc); - SubmitCommandBuffer(m_swap_chain.get(), !m_swap_chain->IsPresentModeSynchronizing()); + EndAndSubmitCommandBuffer(m_swap_chain.get(), false, !m_swap_chain->IsPresentModeSynchronizing()); MoveToNextCommandBuffer(); InvalidateCachedState(); diff --git a/src/util/vulkan_device.h b/src/util/vulkan_device.h index b681ed48c..0c41b2d4d 100644 --- a/src/util/vulkan_device.h +++ b/src/util/vulkan_device.h @@ -131,7 +131,8 @@ public: void SetVSyncEnabled(bool enabled) override; bool BeginPresent(bool skip_present) override; - void EndPresent() override; + void EndPresent(bool explicit_present) override; + void SubmitPresent() override; // Global state accessors ALWAYS_INLINE static VulkanDevice& GetInstance() { return *static_cast(g_gpu_device.get()); } @@ -306,7 +307,7 @@ private: bool IsDeviceImgTec() const; bool IsBrokenMobileDriver() const; - void SubmitCommandBuffer(VulkanSwapChain* present_swap_chain = nullptr, bool submit_on_thread = false); + void EndAndSubmitCommandBuffer(VulkanSwapChain* present_swap_chain, bool explicit_present, bool submit_on_thread); void MoveToNextCommandBuffer(); void WaitForPresentComplete();