GPUDevice: Split submission and presentation

This commit is contained in:
Stenzek 2024-04-11 22:29:17 +10:00
parent 3e9ac99d54
commit 7344661801
No known key found for this signature in database
15 changed files with 94 additions and 39 deletions

View file

@ -207,7 +207,7 @@ void Host::DisplayLoadingScreen(const char* message, int progress_min /*= -1*/,
if (g_gpu_device->BeginPresent(false))
{
g_gpu_device->RenderImGui();
g_gpu_device->EndPresent();
g_gpu_device->EndPresent(false);
}
ImGui::NewFrame();

View file

@ -112,7 +112,7 @@ static void WarnAboutUnsafeSettings();
static void LogUnsafeSettingsToConsole(const std::string& messages);
/// Throttles the system, i.e. sleeps until it's time to execute the next frame.
static void Throttle();
static void Throttle(Common::Timer::Value current_time);
static void SetRewinding(bool enabled);
static bool SaveRewindState();
@ -1861,24 +1861,35 @@ void System::FrameDone()
SaveRunaheadState();
}
// TODO: Kick cmdbuffer early
if (s_optimal_frame_pacing && s_throttler_enabled && !IsExecutionInterrupted())
Throttle();
const Common::Timer::Value current_time = Common::Timer::GetCurrentValue();
if (current_time < s_next_frame_time || s_syncing_to_host || s_optimal_frame_pacing || s_last_frame_skipped)
{
s_last_frame_skipped = !PresentDisplay(true);
const bool throttle_before_present = (s_optimal_frame_pacing && s_throttler_enabled && !IsExecutionInterrupted());
const bool explicit_present = (throttle_before_present && g_gpu_device->GetFeatures().explicit_present);
if (explicit_present)
{
s_last_frame_skipped = !PresentDisplay(!throttle_before_present, true);
Throttle(current_time);
g_gpu_device->SubmitPresent();
}
else
{
if (throttle_before_present)
Throttle(current_time);
s_last_frame_skipped = !PresentDisplay(!throttle_before_present, false);
if (!throttle_before_present && s_throttler_enabled && !IsExecutionInterrupted())
Throttle(current_time);
}
}
else if (current_time >= s_next_frame_time)
{
Log_DebugPrintf("Skipping displaying frame");
s_last_frame_skipped = true;
Throttle(current_time);
}
if (!s_optimal_frame_pacing && s_throttler_enabled && !IsExecutionInterrupted())
Throttle();
// Input poll already done above
if (s_runahead_frames == 0)
{
@ -1931,12 +1942,11 @@ void System::ResetThrottler()
s_next_frame_time = Common::Timer::GetCurrentValue() + s_frame_period;
}
void System::Throttle()
void System::Throttle(Common::Timer::Value current_time)
{
// If we're running too slow, advance the next frame time based on the time we lost. Effectively skips
// running those frames at the intended time, because otherwise if we pause in the debugger, we'll run
// hundreds of frames when we resume.
Common::Timer::Value current_time = Common::Timer::GetCurrentValue();
if (current_time > s_next_frame_time)
{
const Common::Timer::Value diff = static_cast<s64>(current_time) - static_cast<s64>(s_next_frame_time);
@ -4108,7 +4118,7 @@ void System::DoRewind()
Host::PumpMessagesOnCPUThread();
Internal::IdlePollUpdate();
Throttle();
Throttle(Common::Timer::GetCurrentValue());
}
void System::SaveRunaheadState()
@ -4803,7 +4813,7 @@ void System::HostDisplayResized()
g_gpu->UpdateResolutionScale();
}
bool System::PresentDisplay(bool allow_skip_present)
bool System::PresentDisplay(bool allow_skip_present, bool explicit_present)
{
const bool skip_present = allow_skip_present && g_gpu_device->ShouldSkipDisplayingFrame();
@ -4835,7 +4845,7 @@ bool System::PresentDisplay(bool allow_skip_present)
if (do_present)
{
g_gpu_device->RenderImGui();
g_gpu_device->EndPresent();
g_gpu_device->EndPresent(explicit_present);
if (g_gpu_device->IsGPUTimingEnabled())
{
@ -4856,7 +4866,7 @@ bool System::PresentDisplay(bool allow_skip_present)
void System::InvalidateDisplay()
{
PresentDisplay(false);
PresentDisplay(false, false);
if (g_gpu)
g_gpu->RestoreDeviceContext();

View file

@ -466,7 +466,7 @@ void RequestDisplaySize(float scale = 0.0f);
void HostDisplayResized();
/// Renders the display.
bool PresentDisplay(bool allow_skip_present);
bool PresentDisplay(bool allow_skip_present, bool explicit_present);
void InvalidateDisplay();
//////////////////////////////////////////////////////////////////////////

View file

@ -1567,7 +1567,7 @@ void EmuThread::run()
System::Internal::IdlePollUpdate();
if (g_gpu_device)
{
System::PresentDisplay(false);
System::PresentDisplay(false, false);
if (!g_gpu_device->IsVSyncEnabled())
g_gpu_device->ThrottlePresentation();
}

View file

@ -190,6 +190,7 @@ void D3D11Device::SetFeatures(FeatureMask disabled_features)
m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS);
m_features.partial_msaa_resolve = false;
m_features.memory_import = false;
m_features.explicit_present = false;
m_features.gpu_timing = true;
m_features.shader_cache = true;
m_features.pipeline_cache = false;
@ -644,8 +645,9 @@ bool D3D11Device::BeginPresent(bool skip_present)
return true;
}
void D3D11Device::EndPresent()
void D3D11Device::EndPresent(bool explicit_present)
{
DebugAssert(!explicit_present);
DebugAssert(m_num_current_render_targets == 0 && !m_current_depth_target);
if (m_vsync_enabled && m_gpu_timing_enabled)
@ -665,6 +667,11 @@ void D3D11Device::EndPresent()
TrimTexturePool();
}
void D3D11Device::SubmitPresent()
{
Panic("Not supported by this API.");
}
GPUDevice::AdapterAndModeList D3D11Device::StaticGetAdapterAndModeList()
{
AdapterAndModeList ret;

View file

@ -102,7 +102,8 @@ public:
float GetAndResetAccumulatedGPUTime() override;
bool BeginPresent(bool skip_present) override;
void EndPresent() override;
void EndPresent(bool explicit_present) override;
void SubmitPresent() override;
void UnbindPipeline(D3D11Pipeline* pl);
void UnbindTexture(D3D11Texture* tex);

View file

@ -1093,7 +1093,7 @@ bool D3D12Device::BeginPresent(bool frame_skip)
return true;
}
void D3D12Device::EndPresent()
void D3D12Device::EndPresent(bool explicit_present)
{
DebugAssert(InRenderPass() && m_num_current_render_targets == 0 && !m_current_depth_target);
EndRenderPass();
@ -1106,6 +1106,15 @@ void D3D12Device::EndPresent()
D3D12_RESOURCE_STATE_PRESENT);
SubmitCommandList(false);
TrimTexturePool();
if (!explicit_present)
SubmitPresent();
}
void D3D12Device::SubmitPresent()
{
DebugAssert(m_swap_chain);
// DirectX has no concept of tear-or-sync. I guess if we measured times ourselves, we could implement it.
if (m_vsync_enabled)
@ -1114,8 +1123,6 @@ void D3D12Device::EndPresent()
m_swap_chain->Present(0, DXGI_PRESENT_ALLOW_TEARING);
else
m_swap_chain->Present(0, 0);
TrimTexturePool();
}
#ifdef _DEBUG
@ -1194,6 +1201,7 @@ void D3D12Device::SetFeatures(FeatureMask disabled_features)
m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS);
m_features.partial_msaa_resolve = true;
m_features.memory_import = false;
m_features.explicit_present = true;
m_features.gpu_timing = true;
m_features.shader_cache = true;
m_features.pipeline_cache = true;

View file

@ -122,7 +122,8 @@ public:
float GetAndResetAccumulatedGPUTime() override;
bool BeginPresent(bool skip_present) override;
void EndPresent() override;
void EndPresent(bool explicit_present) override;
void SubmitPresent() override;
// Global state accessors
ALWAYS_INLINE static D3D12Device& GetInstance() { return *static_cast<D3D12Device*>(g_gpu_device.get()); }

View file

@ -471,6 +471,7 @@ public:
bool geometry_shaders : 1;
bool partial_msaa_resolve : 1;
bool memory_import : 1;
bool explicit_present : 1;
bool gpu_timing : 1;
bool shader_cache : 1;
bool pipeline_cache : 1;
@ -665,7 +666,8 @@ public:
/// Returns false if the window was completely occluded.
virtual bool BeginPresent(bool skip_present) = 0;
virtual void EndPresent() = 0;
virtual void EndPresent(bool explicit_submit) = 0;
virtual void SubmitPresent() = 0;
/// Renders ImGui screen elements. Call before EndPresent().
void RenderImGui();

View file

@ -268,7 +268,8 @@ public:
void SetVSyncEnabled(bool enabled) override;
bool BeginPresent(bool skip_present) override;
void EndPresent() override;
void EndPresent(bool explicit_submit) override;
void SubmitPresent() override;
void WaitForFenceCounter(u64 counter);

View file

@ -241,6 +241,7 @@ void MetalDevice::SetFeatures(FeatureMask disabled_features)
m_features.geometry_shaders = false;
m_features.partial_msaa_resolve = false;
m_features.memory_import = true;
m_features.explicit_present = false;
m_features.shader_cache = true;
m_features.pipeline_cache = false;
m_features.prefer_unused_textures = true;
@ -2206,8 +2207,11 @@ bool MetalDevice::BeginPresent(bool skip_present)
}
}
void MetalDevice::EndPresent()
void MetalDevice::EndPresent(bool explicit_present)
{
DebugAssert(!explicit_present);
// TODO: Explicit present
DebugAssert(m_num_current_render_targets == 0 && !m_current_depth_target);
EndAnyEncoding();
@ -2218,6 +2222,11 @@ void MetalDevice::EndPresent()
TrimTexturePool();
}
void MetalDevice::SubmitPresent()
{
Panic("Not supported by this API.");
}
void MetalDevice::CreateCommandBuffer()
{
@autoreleasepool

View file

@ -479,6 +479,7 @@ bool OpenGLDevice::CheckFeatures(FeatureMask disabled_features)
(!GLAD_GL_EXT_disjoint_timer_query || !glGetQueryObjectivEXT || !glGetQueryObjectui64vEXT));
m_features.partial_msaa_resolve = true;
m_features.memory_import = true;
m_features.explicit_present = false;
m_features.shader_cache = false;
@ -774,8 +775,9 @@ bool OpenGLDevice::BeginPresent(bool skip_present)
return true;
}
void OpenGLDevice::EndPresent()
void OpenGLDevice::EndPresent(bool explicit_present)
{
DebugAssert(!explicit_present);
DebugAssert(m_current_fbo == 0);
if (m_gpu_timing_enabled)
@ -789,6 +791,11 @@ void OpenGLDevice::EndPresent()
TrimTexturePool();
}
void OpenGLDevice::SubmitPresent()
{
Panic("Not supported by this API.");
}
void OpenGLDevice::CreateTimestampQueries()
{
const bool gles = m_gl_context->IsGLES();

View file

@ -103,7 +103,8 @@ public:
void SetVSyncEnabled(bool enabled) override;
bool BeginPresent(bool skip_present) override;
void EndPresent() override;
void EndPresent(bool explicit_present) override;
void SubmitPresent() override;
bool SetGPUTimingEnabled(bool enabled) override;
float GetAndResetAccumulatedGPUTime() override;

View file

@ -1233,8 +1233,8 @@ void VulkanDevice::WaitForCommandBufferCompletion(u32 index)
}
}
void VulkanDevice::SubmitCommandBuffer(VulkanSwapChain* present_swap_chain /* = nullptr */,
bool submit_on_thread /* = false */)
void VulkanDevice::EndAndSubmitCommandBuffer(VulkanSwapChain* present_swap_chain, bool explicit_present,
bool submit_on_thread)
{
if (m_last_submit_failed.load(std::memory_order_acquire))
return;
@ -1272,10 +1272,10 @@ void VulkanDevice::SubmitCommandBuffer(VulkanSwapChain* present_swap_chain /* =
std::unique_lock<std::mutex> lock(m_present_mutex);
WaitForPresentComplete(lock);
if (!submit_on_thread || !m_present_thread.joinable())
if (!submit_on_thread || explicit_present || !m_present_thread.joinable())
{
DoSubmitCommandBuffer(m_current_frame, present_swap_chain);
if (present_swap_chain)
if (present_swap_chain && !explicit_present)
DoPresent(present_swap_chain);
return;
}
@ -1471,7 +1471,7 @@ void VulkanDevice::SubmitCommandBuffer(bool wait_for_completion)
DebugAssert(!InRenderPass());
const u32 current_frame = m_current_frame;
SubmitCommandBuffer();
EndAndSubmitCommandBuffer(nullptr, false, false);
MoveToNextCommandBuffer();
if (wait_for_completion)
@ -2409,7 +2409,7 @@ bool VulkanDevice::BeginPresent(bool frame_skip)
return true;
}
void VulkanDevice::EndPresent()
void VulkanDevice::EndPresent(bool explicit_present)
{
DebugAssert(InRenderPass() && m_num_current_render_targets == 0 && !m_current_depth_target);
EndRenderPass();
@ -2418,12 +2418,18 @@ void VulkanDevice::EndPresent()
VulkanTexture::TransitionSubresourcesToLayout(cmdbuf, m_swap_chain->GetCurrentImage(), GPUTexture::Type::RenderTarget,
0, 1, 0, 1, VulkanTexture::Layout::ColorAttachment,
VulkanTexture::Layout::PresentSrc);
SubmitCommandBuffer(m_swap_chain.get(), !m_swap_chain->IsPresentModeSynchronizing());
EndAndSubmitCommandBuffer(m_swap_chain.get(), explicit_present, !m_swap_chain->IsPresentModeSynchronizing());
MoveToNextCommandBuffer();
InvalidateCachedState();
TrimTexturePool();
}
void VulkanDevice::SubmitPresent()
{
DebugAssert(m_swap_chain);
DoPresent(m_swap_chain.get());
}
#ifdef _DEBUG
static std::array<float, 3> Palette(float phase, const std::array<float, 3>& a, const std::array<float, 3>& b,
const std::array<float, 3>& c, const std::array<float, 3>& d)
@ -2543,6 +2549,7 @@ bool VulkanDevice::CheckFeatures(FeatureMask disabled_features)
m_features.partial_msaa_resolve = true;
m_features.memory_import = m_optional_extensions.vk_ext_external_memory_host;
m_features.explicit_present = true;
m_features.shader_cache = true;
m_features.pipeline_cache = true;
m_features.prefer_unused_textures = true;
@ -3032,7 +3039,7 @@ void VulkanDevice::RenderBlankFrame()
VulkanTexture::TransitionSubresourcesToLayout(cmdbuf, image, GPUTexture::Type::RenderTarget, 0, 1, 0, 1,
VulkanTexture::Layout::TransferDst, VulkanTexture::Layout::PresentSrc);
SubmitCommandBuffer(m_swap_chain.get(), !m_swap_chain->IsPresentModeSynchronizing());
EndAndSubmitCommandBuffer(m_swap_chain.get(), false, !m_swap_chain->IsPresentModeSynchronizing());
MoveToNextCommandBuffer();
InvalidateCachedState();

View file

@ -131,7 +131,8 @@ public:
void SetVSyncEnabled(bool enabled) override;
bool BeginPresent(bool skip_present) override;
void EndPresent() override;
void EndPresent(bool explicit_present) override;
void SubmitPresent() override;
// Global state accessors
ALWAYS_INLINE static VulkanDevice& GetInstance() { return *static_cast<VulkanDevice*>(g_gpu_device.get()); }
@ -306,7 +307,7 @@ private:
bool IsDeviceImgTec() const;
bool IsBrokenMobileDriver() const;
void SubmitCommandBuffer(VulkanSwapChain* present_swap_chain = nullptr, bool submit_on_thread = false);
void EndAndSubmitCommandBuffer(VulkanSwapChain* present_swap_chain, bool explicit_present, bool submit_on_thread);
void MoveToNextCommandBuffer();
void WaitForPresentComplete();