From 0b3461338cc7b3be446e60367d46bffc60c3808f Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Sun, 11 Sep 2022 01:54:01 +1000 Subject: [PATCH] HostDisplay: Use streaming for sw renderer display --- src/common/gl/texture.cpp | 20 +- src/common/gl/texture.h | 3 + src/common/vulkan/context.cpp | 69 ++-- src/common/vulkan/context.h | 30 +- src/core/gpu_sw.cpp | 53 ++- src/core/gpu_sw.h | 3 + src/core/host_display.cpp | 49 +-- src/core/host_display.h | 12 +- src/frontend-common/d3d11_host_display.cpp | 114 ++---- src/frontend-common/d3d11_host_display.h | 6 - src/frontend-common/d3d12_host_display.cpp | 60 +-- src/frontend-common/d3d12_host_display.h | 5 - src/frontend-common/opengl_host_display.cpp | 409 ++++++++++---------- src/frontend-common/opengl_host_display.h | 24 +- src/frontend-common/vulkan_host_display.cpp | 171 ++++---- src/frontend-common/vulkan_host_display.h | 8 - 16 files changed, 495 insertions(+), 541 deletions(-) diff --git a/src/common/gl/texture.cpp b/src/common/gl/texture.cpp index d22d5b45c..d21f016b3 100644 --- a/src/common/gl/texture.cpp +++ b/src/common/gl/texture.cpp @@ -23,6 +23,16 @@ Texture::~Texture() Destroy(); } +bool Texture::UseTextureStorage(bool multisampled) +{ + return GLAD_GL_ARB_texture_storage || (multisampled ? GLAD_GL_ES_VERSION_3_1 : GLAD_GL_ES_VERSION_3_0); +} + +bool Texture::UseTextureStorage() const +{ + return UseTextureStorage(IsMultisampled()); +} + bool Texture::Create(u32 width, u32 height, u32 samples, GLenum internal_format, GLenum format, GLenum type, const void* data, bool linear_filter, bool wrap) { @@ -37,17 +47,23 @@ bool Texture::Create(u32 width, u32 height, u32 samples, GLenum internal_format, if (samples > 1) { Assert(!data); - if (GLAD_GL_ARB_texture_storage || GLAD_GL_ES_VERSION_3_1) + if (UseTextureStorage(true)) glTexStorage2DMultisample(target, samples, internal_format, width, height, GL_FALSE); else glTexImage2DMultisample(target, samples, internal_format, width, height, GL_FALSE); } else { - if ((GLAD_GL_ARB_texture_storage || GLAD_GL_ES_VERSION_3_0) && !data) + if (UseTextureStorage(false)) + { glTexStorage2D(target, 1, internal_format, width, height); + if (data) + glTexSubImage2D(target, 0, 0, 0, width, height, format, type, data); + } else + { glTexImage2D(target, 0, internal_format, width, height, 0, format, type, data); + } glTexParameteri(target, GL_TEXTURE_MIN_FILTER, linear_filter ? GL_LINEAR : GL_NEAREST); glTexParameteri(target, GL_TEXTURE_MAG_FILTER, linear_filter ? GL_LINEAR : GL_NEAREST); diff --git a/src/common/gl/texture.h b/src/common/gl/texture.h index d275afcd2..bc22a4362 100644 --- a/src/common/gl/texture.h +++ b/src/common/gl/texture.h @@ -10,6 +10,8 @@ public: Texture(Texture&& moved); ~Texture(); + static bool UseTextureStorage(bool multisampled); + bool Create(u32 width, u32 height, u32 samples, GLenum internal_format, GLenum format, GLenum type, const void* data = nullptr, bool linear_filter = false, bool wrap = false); void Replace(u32 width, u32 height, GLenum internal_format, GLenum format, GLenum type, const void* data); @@ -17,6 +19,7 @@ public: void Destroy(); + bool UseTextureStorage() const; void SetLinearFilter(bool enabled); bool IsValid() const { return m_id != 0; } diff --git a/src/common/vulkan/context.cpp b/src/common/vulkan/context.cpp index 954a80e9f..60f39e6f6 100644 --- a/src/common/vulkan/context.cpp +++ b/src/common/vulkan/context.cpp @@ -19,8 +19,13 @@ std::unique_ptr g_vulkan_context; namespace Vulkan { +enum : u32 +{ + TEXTURE_BUFFER_SIZE = 16 * 1024 * 1024, +}; + Context::Context(VkInstance instance, VkPhysicalDevice physical_device, bool owns_device) - : m_instance(instance), m_physical_device(physical_device), m_owns_device(owns_device) + : m_instance(instance), m_physical_device(physical_device) { // Read device physical memory properties, we need it for allocating buffers vkGetPhysicalDeviceProperties(physical_device, &m_device_properties); @@ -37,29 +42,7 @@ Context::Context(VkInstance instance, VkPhysicalDevice physical_device, bool own std::max(m_device_properties.limits.optimalBufferCopyRowPitchAlignment, static_cast(1)); } -Context::~Context() -{ - StopPresentThread(); - - if (m_device != VK_NULL_HANDLE) - WaitForGPUIdle(); - - DestroyRenderPassCache(); - DestroyGlobalDescriptorPool(); - DestroyCommandBuffers(); - - if (m_owns_device && m_device != VK_NULL_HANDLE) - vkDestroyDevice(m_device, nullptr); - - if (m_debug_messenger_callback != VK_NULL_HANDLE) - DisableDebugUtils(); - - if (m_owns_device) - { - vkDestroyInstance(m_instance, nullptr); - Vulkan::UnloadVulkanLibrary(); - } -} +Context::~Context() = default; bool Context::CheckValidationLayerAvailablility() { @@ -369,6 +352,7 @@ bool Context::Create(std::string_view gpu_name, const WindowInfo* wi, std::uniqu // Attempt to create the device. if (!g_vulkan_context->CreateDevice(surface, enable_validation_layer, nullptr, 0, nullptr, 0, nullptr) || !g_vulkan_context->CreateGlobalDescriptorPool() || !g_vulkan_context->CreateCommandBuffers() || + !g_vulkan_context->CreateTextureStreamBuffer() || (enable_surface && (*out_swap_chain = SwapChain::Create(wi_copy, surface, true)) == nullptr)) { // Since we are destroying the instance, we're also responsible for destroying the surface. @@ -415,6 +399,29 @@ bool Context::CreateFromExistingInstance(VkInstance instance, VkPhysicalDevice g void Context::Destroy() { AssertMsg(g_vulkan_context, "Has context"); + + g_vulkan_context->StopPresentThread(); + + if (g_vulkan_context->m_device != VK_NULL_HANDLE) + g_vulkan_context->WaitForGPUIdle(); + + g_vulkan_context->m_texture_upload_buffer.Destroy(false); + + g_vulkan_context->DestroyRenderPassCache(); + g_vulkan_context->DestroyGlobalDescriptorPool(); + g_vulkan_context->DestroyCommandBuffers(); + + if (g_vulkan_context->m_device != VK_NULL_HANDLE) + vkDestroyDevice(g_vulkan_context->m_device, nullptr); + + if (g_vulkan_context->m_debug_messenger_callback != VK_NULL_HANDLE) + g_vulkan_context->DisableDebugUtils(); + + if (g_vulkan_context->m_instance != VK_NULL_HANDLE) + vkDestroyInstance(g_vulkan_context->m_instance, nullptr); + + Vulkan::UnloadVulkanLibrary(); + g_vulkan_context.reset(); } @@ -785,6 +792,17 @@ void Context::DestroyGlobalDescriptorPool() } } +bool Context::CreateTextureStreamBuffer() +{ + if (!m_texture_upload_buffer.Create(VK_BUFFER_USAGE_TRANSFER_SRC_BIT, TEXTURE_BUFFER_SIZE)) + { + Log_ErrorPrintf("Failed to allocate texture upload buffer"); + return false; + } + + return true; +} + void Context::DestroyRenderPassCache() { for (auto& it : m_render_pass_cache) @@ -1105,7 +1123,8 @@ void Context::ActivateCommandBuffer(u32 index) { const double ns_diff = (timestamps[1] - timestamps[0]) * static_cast(m_device_properties.limits.timestampPeriod); - m_accumulated_gpu_time = static_cast(static_cast(m_accumulated_gpu_time) + (ns_diff / 1000000.0)); + m_accumulated_gpu_time = + static_cast(static_cast(m_accumulated_gpu_time) + (ns_diff / 1000000.0)); } } else diff --git a/src/common/vulkan/context.h b/src/common/vulkan/context.h index e1fe79ee9..037e3c30c 100644 --- a/src/common/vulkan/context.h +++ b/src/common/vulkan/context.h @@ -7,6 +7,7 @@ #include "../types.h" #include "loader.h" +#include "stream_buffer.h" #include #include #include @@ -91,22 +92,31 @@ public: ALWAYS_INLINE bool SupportsDualSourceBlend() const { return m_device_features.dualSrcBlend == VK_TRUE; } // Helpers for getting constants - ALWAYS_INLINE VkDeviceSize GetUniformBufferAlignment() const + ALWAYS_INLINE u32 GetUniformBufferAlignment() const { - return m_device_properties.limits.minUniformBufferOffsetAlignment; + return static_cast(m_device_properties.limits.minUniformBufferOffsetAlignment); } - ALWAYS_INLINE VkDeviceSize GetTexelBufferAlignment() const + ALWAYS_INLINE u32 GetTexelBufferAlignment() const { - return m_device_properties.limits.minTexelBufferOffsetAlignment; + return static_cast(m_device_properties.limits.minTexelBufferOffsetAlignment); } - ALWAYS_INLINE VkDeviceSize GetStorageBufferAlignment() const + ALWAYS_INLINE u32 GetStorageBufferAlignment() const { - return m_device_properties.limits.minStorageBufferOffsetAlignment; + return static_cast(m_device_properties.limits.minStorageBufferOffsetAlignment); } - ALWAYS_INLINE VkDeviceSize GetBufferImageGranularity() const + ALWAYS_INLINE u32 GetBufferImageGranularity() const { - return m_device_properties.limits.bufferImageGranularity; + return static_cast(m_device_properties.limits.bufferImageGranularity); } + ALWAYS_INLINE u32 GetBufferCopyOffsetAlignment() const + { + return static_cast(m_device_properties.limits.optimalBufferCopyOffsetAlignment); + } + ALWAYS_INLINE u32 GetBufferCopyRowPitchAlignment() const + { + return static_cast(m_device_properties.limits.optimalBufferCopyRowPitchAlignment); + } + ALWAYS_INLINE u32 GetMaxImageDimension2D() const { return m_device_properties.limits.maxImageDimension2D; } // Finds a memory type index for the specified memory properties and the bits returned by // vkGetImageMemoryRequirements @@ -125,6 +135,7 @@ public: // is submitted, after that you should call these functions again. ALWAYS_INLINE VkDescriptorPool GetGlobalDescriptorPool() const { return m_global_descriptor_pool; } ALWAYS_INLINE VkCommandBuffer GetCurrentCommandBuffer() const { return m_current_command_buffer; } + ALWAYS_INLINE StreamBuffer& GetTextureUploadBuffer() { return m_texture_upload_buffer; } ALWAYS_INLINE VkDescriptorPool GetCurrentDescriptorPool() const { return m_frame_resources[m_current_frame].descriptor_pool; @@ -198,6 +209,7 @@ private: void DestroyCommandBuffers(); bool CreateGlobalDescriptorPool(); void DestroyGlobalDescriptorPool(); + bool CreateTextureStreamBuffer(); void DestroyRenderPassCache(); void ActivateCommandBuffer(u32 index); @@ -247,7 +259,7 @@ private: u64 m_completed_fence_counter = 0; u32 m_current_frame; - bool m_owns_device = false; + StreamBuffer m_texture_upload_buffer; std::atomic_bool m_last_present_failed{false}; std::atomic_bool m_present_done{true}; diff --git a/src/core/gpu_sw.cpp b/src/core/gpu_sw.cpp index a8dd47987..f4c23057e 100644 --- a/src/core/gpu_sw.cpp +++ b/src/core/gpu_sw.cpp @@ -103,6 +103,21 @@ void GPU_SW::UpdateSettings() m_backend.UpdateSettings(); } +HostDisplayTexture* GPU_SW::GetDisplayTexture(u32 width, u32 height, HostDisplayPixelFormat format) +{ + if (!m_display_texture || m_display_texture->GetWidth() != width || m_display_texture->GetHeight() != height || + m_display_texture->GetFormat() != format) + { + g_host_display->ClearDisplayTexture(); + m_display_texture.reset(); + m_display_texture = g_host_display->CreateTexture(width, height, 1, 1, 1, format, nullptr, 0, true); + if (!m_display_texture) + Log_ErrorPrintf("Failed to create %ux%u %u texture", width, height, static_cast(format)); + } + + return m_display_texture.get(); +} + template static void CopyOutRow16(const u16* src_ptr, out_type* dst_ptr, u32 width); @@ -240,13 +255,14 @@ void GPU_SW::CopyOut15Bit(u32 src_x, u32 src_y, u32 width, u32 height, u32 field using OutputPixelType = std::conditional_t< display_format == HostDisplayPixelFormat::RGBA8 || display_format == HostDisplayPixelFormat::BGRA8, u32, u16>; + HostDisplayTexture* texture = GetDisplayTexture(width, height, display_format); + if (!texture) + return; + if (!interlaced) { - if (!g_host_display->BeginSetDisplayPixels(display_format, width, height, reinterpret_cast(&dst_ptr), - &dst_stride)) - { + if (!texture->BeginUpdate(width, height, reinterpret_cast(&dst_ptr), &dst_stride)) return; - } } else { @@ -293,13 +309,11 @@ void GPU_SW::CopyOut15Bit(u32 src_x, u32 src_y, u32 width, u32 height, u32 field } if (!interlaced) - { - g_host_display->EndSetDisplayPixels(); - } + texture->EndUpdate(0, 0, width, height); else - { - g_host_display->SetDisplayPixels(display_format, width, height, m_display_texture_buffer.data(), output_stride); - } + texture->Update(0, 0, width, height, m_display_texture_buffer.data(), output_stride); + + g_host_display->SetDisplayTexture(texture->GetHandle(), display_format, width, height, 0, 0, width, height); } void GPU_SW::CopyOut15Bit(HostDisplayPixelFormat display_format, u32 src_x, u32 src_y, u32 width, u32 height, u32 field, @@ -334,13 +348,14 @@ void GPU_SW::CopyOut24Bit(u32 src_x, u32 src_y, u32 skip_x, u32 width, u32 heigh using OutputPixelType = std::conditional_t< display_format == HostDisplayPixelFormat::RGBA8 || display_format == HostDisplayPixelFormat::BGRA8, u32, u16>; + HostDisplayTexture* texture = GetDisplayTexture(width, height, display_format); + if (!texture) + return; + if (!interlaced) { - if (!g_host_display->BeginSetDisplayPixels(display_format, width, height, reinterpret_cast(&dst_ptr), - &dst_stride)) - { + if (!texture->BeginUpdate(width, height, reinterpret_cast(&dst_ptr), &dst_stride)) return; - } } else { @@ -451,13 +466,11 @@ void GPU_SW::CopyOut24Bit(u32 src_x, u32 src_y, u32 skip_x, u32 width, u32 heigh } if (!interlaced) - { - g_host_display->EndSetDisplayPixels(); - } + texture->EndUpdate(0, 0, width, height); else - { - g_host_display->SetDisplayPixels(display_format, width, height, m_display_texture_buffer.data(), output_stride); - } + texture->Update(0, 0, width, height, m_display_texture_buffer.data(), output_stride); + + g_host_display->SetDisplayTexture(texture->GetHandle(), display_format, width, height, 0, 0, width, height); } void GPU_SW::CopyOut24Bit(HostDisplayPixelFormat display_format, u32 src_x, u32 src_y, u32 skip_x, u32 width, diff --git a/src/core/gpu_sw.h b/src/core/gpu_sw.h index 0b4f0906d..c33d1d8d2 100644 --- a/src/core/gpu_sw.h +++ b/src/core/gpu_sw.h @@ -55,9 +55,12 @@ protected: void FillBackendCommandParameters(GPUBackendCommand* cmd) const; void FillDrawCommand(GPUBackendDrawCommand* cmd, GPURenderCommand rc) const; + HostDisplayTexture* GetDisplayTexture(u32 width, u32 height, HostDisplayPixelFormat format); + HeapArray m_display_texture_buffer; HostDisplayPixelFormat m_16bit_display_format = HostDisplayPixelFormat::RGB565; HostDisplayPixelFormat m_24bit_display_format = HostDisplayPixelFormat::RGBA8; + std::unique_ptr m_display_texture; GPU_SW_Backend m_backend; }; diff --git a/src/core/host_display.cpp b/src/core/host_display.cpp index 6cc27e92a..49bceda35 100644 --- a/src/core/host_display.cpp +++ b/src/core/host_display.cpp @@ -20,6 +20,25 @@ std::unique_ptr g_host_display; HostDisplayTexture::~HostDisplayTexture() = default; +bool HostDisplayTexture::BeginUpdate(u32 width, u32 height, void** out_buffer, u32* out_pitch) /* = 0*/ +{ + return false; +} + +void HostDisplayTexture::EndUpdate(u32 x, u32 y, u32 width, u32 height) /* = 0*/ {} + +bool HostDisplayTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch) +{ + void* map_ptr; + u32 map_pitch; + if (!BeginUpdate(width, height, &map_ptr, &map_pitch)) + return false; + + StringUtil::StrideMemCpy(map_ptr, map_pitch, data, pitch, std::min(pitch, map_pitch), height); + EndUpdate(x, y, width, height); + return true; +} + HostDisplay::~HostDisplay() = default; RenderAPI HostDisplay::GetPreferredAPI() @@ -124,36 +143,6 @@ u32 HostDisplay::GetDisplayPixelFormatSize(HostDisplayPixelFormat format) } } -bool HostDisplay::SetDisplayPixels(HostDisplayPixelFormat format, u32 width, u32 height, const void* buffer, u32 pitch) -{ - void* map_ptr; - u32 map_pitch; - if (!BeginSetDisplayPixels(format, width, height, &map_ptr, &map_pitch)) - return false; - - if (pitch == map_pitch) - { - std::memcpy(map_ptr, buffer, height * map_pitch); - } - else - { - const u32 copy_size = width * GetDisplayPixelFormatSize(format); - DebugAssert(pitch >= copy_size && map_pitch >= copy_size); - - const u8* src_ptr = static_cast(buffer); - u8* dst_ptr = static_cast(map_ptr); - for (u32 i = 0; i < height; i++) - { - std::memcpy(dst_ptr, src_ptr, copy_size); - src_ptr += pitch; - dst_ptr += map_pitch; - } - } - - EndSetDisplayPixels(); - return true; -} - bool HostDisplay::GetHostRefreshRate(float* refresh_rate) { if (m_window_info.surface_refresh_rate > 0.0f) diff --git a/src/core/host_display.h b/src/core/host_display.h index 8e15907f3..7226a674f 100644 --- a/src/core/host_display.h +++ b/src/core/host_display.h @@ -41,6 +41,10 @@ public: virtual u32 GetLevels() const = 0; virtual u32 GetSamples() const = 0; virtual HostDisplayPixelFormat GetFormat() const = 0; + + virtual bool BeginUpdate(u32 width, u32 height, void** out_buffer, u32* out_pitch)/* = 0*/; + virtual void EndUpdate(u32 x, u32 y, u32 width, u32 height)/* = 0*/; + virtual bool Update(u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch); }; // Interface to the frontend's renderer. @@ -124,9 +128,6 @@ public: virtual std::unique_ptr CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, HostDisplayPixelFormat format, const void* data, u32 data_stride, bool dynamic = false) = 0; - virtual void UpdateTexture(HostDisplayTexture* texture, u32 x, u32 y, u32 width, u32 height, const void* data, - u32 data_stride) = 0; - virtual bool DownloadTexture(const void* texture_handle, HostDisplayPixelFormat texture_format, u32 x, u32 y, u32 width, u32 height, void* out_data, u32 out_data_stride) = 0; @@ -203,11 +204,6 @@ public: virtual bool SupportsDisplayPixelFormat(HostDisplayPixelFormat format) const = 0; - virtual bool BeginSetDisplayPixels(HostDisplayPixelFormat format, u32 width, u32 height, void** out_buffer, - u32* out_pitch) = 0; - virtual void EndSetDisplayPixels() = 0; - virtual bool SetDisplayPixels(HostDisplayPixelFormat format, u32 width, u32 height, const void* buffer, u32 pitch); - virtual bool GetHostRefreshRate(float* refresh_rate); /// Enables/disables GPU frame timing. diff --git a/src/frontend-common/d3d11_host_display.cpp b/src/frontend-common/d3d11_host_display.cpp index 5d9745794..be27a3eb1 100644 --- a/src/frontend-common/d3d11_host_display.cpp +++ b/src/frontend-common/d3d11_host_display.cpp @@ -23,7 +23,7 @@ Log_SetChannel(D3D11HostDisplay); namespace FrontendCommon { -class D3D11HostDisplayTexture : public HostDisplayTexture +class D3D11HostDisplayTexture final : public HostDisplayTexture { public: D3D11HostDisplayTexture(D3D11::Texture texture, HostDisplayPixelFormat format, bool dynamic) @@ -40,6 +40,41 @@ public: u32 GetSamples() const override { return m_texture.GetSamples(); } HostDisplayPixelFormat GetFormat() const override { return m_format; } + bool BeginUpdate(u32 width, u32 height, void** out_buffer, u32* out_pitch) override + { + if (!m_dynamic || m_texture.GetWidth() != width || m_texture.GetHeight() != height) + return false; + + D3D11_MAPPED_SUBRESOURCE sr; + HRESULT hr = static_cast(g_host_display->GetRenderContext()) + ->Map(m_texture, 0, D3D11_MAP_WRITE_DISCARD, 0, &sr); + if (FAILED(hr)) + { + Log_ErrorPrintf("Map pixels texture failed: %08X", hr); + return false; + } + + *out_buffer = sr.pData; + *out_pitch = sr.RowPitch; + return true; + } + + void EndUpdate(u32 x, u32 y, u32 width, u32 height) + { + static_cast(g_host_display->GetRenderContext())->Unmap(m_texture, 0); + } + + bool Update(u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch) override + { + if (m_dynamic) + return HostDisplayTexture::Update(x, y, width, height, data, pitch); + + const CD3D11_BOX dst_box(x, y, 0, x + width, y + height, 1); + static_cast(g_host_display->GetRenderContext()) + ->UpdateSubresource(m_texture, 0, &dst_box, data, pitch, pitch * height); + return true; + } + ALWAYS_INLINE ID3D11Texture2D* GetD3DTexture() const { return m_texture.GetD3DTexture(); } ALWAYS_INLINE ID3D11ShaderResourceView* GetD3DSRV() const { return m_texture.GetD3DSRV(); } ALWAYS_INLINE ID3D11ShaderResourceView* const* GetD3DSRVArray() const { return m_texture.GetD3DSRVArray(); } @@ -107,43 +142,6 @@ std::unique_ptr D3D11HostDisplay::CreateTexture(u32 width, u return std::make_unique(std::move(tex), format, dynamic); } -void D3D11HostDisplay::UpdateTexture(HostDisplayTexture* texture, u32 x, u32 y, u32 width, u32 height, - const void* texture_data, u32 texture_data_stride) -{ - D3D11HostDisplayTexture* d3d11_texture = static_cast(texture); - if (!d3d11_texture->IsDynamic()) - { - const CD3D11_BOX dst_box(x, y, 0, x + width, y + height, 1); - m_context->UpdateSubresource(d3d11_texture->GetD3DTexture(), 0, &dst_box, texture_data, texture_data_stride, - texture_data_stride * height); - } - else - { - D3D11_MAPPED_SUBRESOURCE sr; - HRESULT hr = m_context->Map(d3d11_texture->GetD3DTexture(), 0, D3D11_MAP_WRITE_DISCARD, 0, &sr); - if (FAILED(hr)) - Panic("Failed to map dynamic host display texture"); - - char* dst_ptr = static_cast(sr.pData) + (y * sr.RowPitch) + (x * sizeof(u32)); - const char* src_ptr = static_cast(texture_data); - if (sr.RowPitch == texture_data_stride) - { - std::memcpy(dst_ptr, src_ptr, texture_data_stride * height); - } - else - { - for (u32 row = 0; row < height; row++) - { - std::memcpy(dst_ptr, src_ptr, width * sizeof(u32)); - src_ptr += texture_data_stride; - dst_ptr += sr.RowPitch; - } - } - - m_context->Unmap(d3d11_texture->GetD3DTexture(), 0); - } -} - bool D3D11HostDisplay::DownloadTexture(const void* texture_handle, HostDisplayPixelFormat texture_format, u32 x, u32 y, u32 width, u32 height, void* out_data, u32 out_data_stride) { @@ -182,43 +180,6 @@ bool D3D11HostDisplay::SupportsDisplayPixelFormat(HostDisplayPixelFormat format) return (SUCCEEDED(m_device->CheckFormatSupport(dfmt, &support)) && ((support & required) == required)); } -bool D3D11HostDisplay::BeginSetDisplayPixels(HostDisplayPixelFormat format, u32 width, u32 height, void** out_buffer, - u32* out_pitch) -{ - ClearDisplayTexture(); - - const DXGI_FORMAT dxgi_format = s_display_pixel_format_mapping[static_cast(format)]; - if (m_display_pixels_texture.GetWidth() < width || m_display_pixels_texture.GetHeight() < height || - m_display_pixels_texture.GetFormat() != dxgi_format) - { - if (!m_display_pixels_texture.Create(m_device.Get(), width, height, 1, 1, dxgi_format, D3D11_BIND_SHADER_RESOURCE, - nullptr, 0, true)) - { - return false; - } - } - - D3D11_MAPPED_SUBRESOURCE sr; - HRESULT hr = m_context->Map(m_display_pixels_texture.GetD3DTexture(), 0, D3D11_MAP_WRITE_DISCARD, 0, &sr); - if (FAILED(hr)) - { - Log_ErrorPrintf("Map pixels texture failed: %08X", hr); - return false; - } - - *out_buffer = sr.pData; - *out_pitch = sr.RowPitch; - - SetDisplayTexture(m_display_pixels_texture.GetD3DSRV(), format, m_display_pixels_texture.GetWidth(), - m_display_pixels_texture.GetHeight(), 0, 0, static_cast(width), static_cast(height)); - return true; -} - -void D3D11HostDisplay::EndSetDisplayPixels() -{ - m_context->Unmap(m_display_pixels_texture.GetD3DTexture(), 0); -} - bool D3D11HostDisplay::GetHostRefreshRate(float* refresh_rate) { if (m_swap_chain && IsFullscreen()) @@ -1235,7 +1196,8 @@ void D3D11HostDisplay::PopTimestampQuery() D3D11_ASYNC_GETDATA_DONOTFLUSH); if (start_hr == S_OK && end_hr == S_OK) { - const float delta = static_cast(static_cast(end - start) / (static_cast(disjoint.Frequency) / 1000.0)); + const float delta = + static_cast(static_cast(end - start) / (static_cast(disjoint.Frequency) / 1000.0)); m_accumulated_gpu_time += delta; m_read_timestamp_query = (m_read_timestamp_query + 1) % NUM_TIMESTAMP_QUERIES; m_waiting_timestamp_queries--; diff --git a/src/frontend-common/d3d11_host_display.h b/src/frontend-common/d3d11_host_display.h index f1623e3f2..bd15b29f5 100644 --- a/src/frontend-common/d3d11_host_display.h +++ b/src/frontend-common/d3d11_host_display.h @@ -54,14 +54,9 @@ public: std::unique_ptr CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, HostDisplayPixelFormat format, const void* data, u32 data_stride, bool dynamic = false) override; - void UpdateTexture(HostDisplayTexture* texture, u32 x, u32 y, u32 width, u32 height, const void* texture_data, - u32 texture_data_stride) override; bool DownloadTexture(const void* texture_handle, HostDisplayPixelFormat texture_format, u32 x, u32 y, u32 width, u32 height, void* out_data, u32 out_data_stride) override; bool SupportsDisplayPixelFormat(HostDisplayPixelFormat format) const override; - bool BeginSetDisplayPixels(HostDisplayPixelFormat format, u32 width, u32 height, void** out_buffer, - u32* out_pitch) override; - void EndSetDisplayPixels() override; bool GetHostRefreshRate(float* refresh_rate) override; @@ -137,7 +132,6 @@ protected: ComPtr m_point_sampler; ComPtr m_linear_sampler; - D3D11::Texture m_display_pixels_texture; D3D11::StreamBuffer m_display_uniform_buffer; D3D11::AutoStagingTexture m_readback_staging_texture; diff --git a/src/frontend-common/d3d12_host_display.cpp b/src/frontend-common/d3d12_host_display.cpp index 449c0b159..aaf01e3a3 100644 --- a/src/frontend-common/d3d12_host_display.cpp +++ b/src/frontend-common/d3d12_host_display.cpp @@ -21,7 +21,7 @@ static constexpr std::array(HostDisplayPixelFormat s_display_pixel_format_mapping = {{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_B8G8R8A8_UNORM, DXGI_FORMAT_B5G6R5_UNORM, DXGI_FORMAT_B5G5R5A1_UNORM}}; -class D3D12HostDisplayTexture : public HostDisplayTexture +class D3D12HostDisplayTexture final : public HostDisplayTexture { public: D3D12HostDisplayTexture(D3D12::Texture texture) : m_texture(std::move(texture)) {} @@ -45,6 +45,16 @@ public: return HostDisplayPixelFormat::Count; } + bool BeginUpdate(u32 width, u32 height, void** out_buffer, u32* out_pitch) override + { + return m_texture.BeginStreamUpdate(0, 0, width, height, out_buffer, out_pitch); + } + + void EndUpdate(u32 x, u32 y, u32 width, u32 height) override + { + m_texture.EndStreamUpdate(x, y, width, height); + } + const D3D12::Texture& GetTexture() const { return m_texture; } D3D12::Texture& GetTexture() { return m_texture; } @@ -107,13 +117,6 @@ std::unique_ptr D3D12HostDisplay::CreateTexture(u32 width, u return std::make_unique(std::move(tex)); } -void D3D12HostDisplay::UpdateTexture(HostDisplayTexture* texture, u32 x, u32 y, u32 width, u32 height, - const void* texture_data, u32 texture_data_stride) -{ - static_cast(texture)->GetTexture().LoadData(x, y, width, height, texture_data, - texture_data_stride); -} - bool D3D12HostDisplay::DownloadTexture(const void* texture_handle, HostDisplayPixelFormat texture_format, u32 x, u32 y, u32 width, u32 height, void* out_data, u32 out_data_stride) { @@ -139,36 +142,6 @@ bool D3D12HostDisplay::SupportsDisplayPixelFormat(HostDisplayPixelFormat format) return g_d3d12_context->SupportsTextureFormat(dfmt); } -bool D3D12HostDisplay::BeginSetDisplayPixels(HostDisplayPixelFormat format, u32 width, u32 height, void** out_buffer, - u32* out_pitch) -{ - ClearDisplayTexture(); - - const DXGI_FORMAT dxgi_format = s_display_pixel_format_mapping[static_cast(format)]; - if (m_display_pixels_texture.GetWidth() < width || m_display_pixels_texture.GetHeight() < height || - m_display_pixels_texture.GetFormat() != dxgi_format) - { - if (!m_display_pixels_texture.Create(width, height, 1, dxgi_format, dxgi_format, DXGI_FORMAT_UNKNOWN, - DXGI_FORMAT_UNKNOWN, D3D12_RESOURCE_FLAG_NONE)) - { - return false; - } - } - - if (!m_display_pixels_texture.BeginStreamUpdate(0, 0, width, height, out_buffer, out_pitch)) - return false; - - SetDisplayTexture(&m_display_pixels_texture, format, m_display_pixels_texture.GetWidth(), - m_display_pixels_texture.GetHeight(), 0, 0, static_cast(width), static_cast(height)); - return true; -} - -void D3D12HostDisplay::EndSetDisplayPixels() -{ - m_display_pixels_texture.EndStreamUpdate(0, 0, static_cast(m_display_texture_view_width), - static_cast(m_display_texture_view_height)); -} - bool D3D12HostDisplay::GetHostRefreshRate(float* refresh_rate) { if (m_swap_chain && IsFullscreen()) @@ -772,10 +745,13 @@ void D3D12HostDisplay::RenderDisplay(ID3D12GraphicsCommandList* cmdlist, s32 lef s32 texture_view_y, s32 texture_view_width, s32 texture_view_height, bool linear_filter) { - const float uniforms[4] = {static_cast(texture_view_x) / static_cast(texture_width), - static_cast(texture_view_y) / static_cast(texture_height), - (static_cast(texture_view_width) - 0.5f) / static_cast(texture_width), - (static_cast(texture_view_height) - 0.5f) / static_cast(texture_height)}; + const float position_adjust = linear_filter ? 0.5f : 0.0f; + const float size_adjust = linear_filter ? 1.0f : 0.0f; + const float uniforms[4] = { + (static_cast(texture_view_x) + position_adjust) / static_cast(texture_width), + (static_cast(texture_view_y) + position_adjust) / static_cast(texture_height), + (static_cast(texture_view_width) - size_adjust) / static_cast(texture_width), + (static_cast(texture_view_height) - size_adjust) / static_cast(texture_height)}; if (!m_display_uniform_buffer.ReserveMemory(sizeof(uniforms), D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT)) Panic("Failed to reserve UBO space"); diff --git a/src/frontend-common/d3d12_host_display.h b/src/frontend-common/d3d12_host_display.h index aacf33b3a..5f59e1cd1 100644 --- a/src/frontend-common/d3d12_host_display.h +++ b/src/frontend-common/d3d12_host_display.h @@ -55,14 +55,9 @@ public: std::unique_ptr CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, HostDisplayPixelFormat format, const void* data, u32 data_stride, bool dynamic = false) override; - void UpdateTexture(HostDisplayTexture* texture, u32 x, u32 y, u32 width, u32 height, const void* texture_data, - u32 texture_data_stride) override; bool DownloadTexture(const void* texture_handle, HostDisplayPixelFormat texture_format, u32 x, u32 y, u32 width, u32 height, void* out_data, u32 out_data_stride) override; bool SupportsDisplayPixelFormat(HostDisplayPixelFormat format) const override; - bool BeginSetDisplayPixels(HostDisplayPixelFormat format, u32 width, u32 height, void** out_buffer, - u32* out_pitch) override; - void EndSetDisplayPixels() override; bool GetHostRefreshRate(float* refresh_rate) override; diff --git a/src/frontend-common/opengl_host_display.cpp b/src/frontend-common/opengl_host_display.cpp index 33ea0c3f8..59c65656a 100644 --- a/src/frontend-common/opengl_host_display.cpp +++ b/src/frontend-common/opengl_host_display.cpp @@ -13,28 +13,35 @@ Log_SetChannel(OpenGLHostDisplay); namespace FrontendCommon { -class OpenGLHostDisplayTexture : public HostDisplayTexture +enum : u32 +{ + TEXTURE_STREAM_BUFFER_SIZE = 16 * 1024 * 1024, +}; + +class OpenGLHostDisplayTexture final : public HostDisplayTexture { public: - OpenGLHostDisplayTexture(GL::Texture texture, HostDisplayPixelFormat format) - : m_texture(std::move(texture)), m_format(format) - { - } - ~OpenGLHostDisplayTexture() override = default; + OpenGLHostDisplayTexture(GL::Texture texture, HostDisplayPixelFormat format); + ~OpenGLHostDisplayTexture() override; - void* GetHandle() const override { return reinterpret_cast(static_cast(m_texture.GetGLId())); } - u32 GetWidth() const override { return m_texture.GetWidth(); } - u32 GetHeight() const override { return m_texture.GetHeight(); } - u32 GetLayers() const override { return 1; } - u32 GetLevels() const override { return 1; } - u32 GetSamples() const override { return m_texture.GetSamples(); } - HostDisplayPixelFormat GetFormat() const override { return m_format; } + void* GetHandle() const override; + u32 GetWidth() const override; + u32 GetHeight() const override; + u32 GetLayers() const override; + u32 GetLevels() const override; + u32 GetSamples() const override; + HostDisplayPixelFormat GetFormat() const override; - GLuint GetGLID() const { return m_texture.GetGLId(); } + GLuint GetGLID() const; + + bool BeginUpdate(u32 width, u32 height, void** out_buffer, u32* out_pitch) override; + void EndUpdate(u32 x, u32 y, u32 width, u32 height) override; + bool Update(u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch) override; private: GL::Texture m_texture; HostDisplayPixelFormat m_format; + u32 m_map_offset = 0; }; OpenGLHostDisplay::OpenGLHostDisplay() = default; @@ -105,43 +112,6 @@ std::unique_ptr OpenGLHostDisplay::CreateTexture(u32 width, return std::make_unique(std::move(tex), format); } -void OpenGLHostDisplay::UpdateTexture(HostDisplayTexture* texture, u32 x, u32 y, u32 width, u32 height, - const void* texture_data, u32 texture_data_stride) -{ - OpenGLHostDisplayTexture* tex = static_cast(texture); - const auto [gl_internal_format, gl_format, gl_type] = - GetPixelFormatMapping(m_gl_context->IsGLES(), texture->GetFormat()); - - GLint alignment; - if (texture_data_stride & 1) - alignment = 1; - else if (texture_data_stride & 2) - alignment = 2; - else - alignment = 4; - - GLint old_texture_binding = 0, old_alignment = 0, old_row_length = 0; - glGetIntegerv(GL_TEXTURE_BINDING_2D, &old_texture_binding); - glBindTexture(GL_TEXTURE_2D, tex->GetGLID()); - - glGetIntegerv(GL_UNPACK_ALIGNMENT, &old_alignment); - glPixelStorei(GL_UNPACK_ALIGNMENT, alignment); - - if (!m_use_gles2_draw_path) - { - glGetIntegerv(GL_UNPACK_ROW_LENGTH, &old_row_length); - glPixelStorei(GL_UNPACK_ROW_LENGTH, texture_data_stride / GetDisplayPixelFormatSize(texture->GetFormat())); - } - - glTexSubImage2D(GL_TEXTURE_2D, 0, x, y, width, height, gl_format, gl_type, texture_data); - - if (!m_use_gles2_draw_path) - glPixelStorei(GL_UNPACK_ROW_LENGTH, old_row_length); - - glPixelStorei(GL_UNPACK_ALIGNMENT, old_alignment); - glBindTexture(GL_TEXTURE_2D, old_texture_binding); -} - bool OpenGLHostDisplay::DownloadTexture(const void* texture_handle, HostDisplayPixelFormat texture_format, u32 x, u32 y, u32 width, u32 height, void* out_data, u32 out_data_stride) { @@ -174,152 +144,12 @@ bool OpenGLHostDisplay::DownloadTexture(const void* texture_handle, HostDisplayP return true; } -void OpenGLHostDisplay::BindDisplayPixelsTexture() -{ - if (m_display_pixels_texture_id == 0) - { - const bool linear = IsUsingLinearFiltering(); - glGenTextures(1, &m_display_pixels_texture_id); - glBindTexture(GL_TEXTURE_2D, m_display_pixels_texture_id); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, linear ? GL_LINEAR : GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, linear ? GL_LINEAR : GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 1); - m_display_texture_is_linear_filtered = linear; - } - else - { - glBindTexture(GL_TEXTURE_2D, m_display_pixels_texture_id); - } -} - -void OpenGLHostDisplay::UpdateDisplayPixelsTextureFilter() -{ - const bool linear = IsUsingLinearFiltering(); - if (linear == m_display_texture_is_linear_filtered) - return; - - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, linear ? GL_LINEAR : GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, linear ? GL_LINEAR : GL_NEAREST); - m_display_texture_is_linear_filtered = linear; -} - bool OpenGLHostDisplay::SupportsDisplayPixelFormat(HostDisplayPixelFormat format) const { const auto [gl_internal_format, gl_format, gl_type] = GetPixelFormatMapping(m_gl_context->IsGLES(), format); return (gl_internal_format != static_cast(0)); } -bool OpenGLHostDisplay::BeginSetDisplayPixels(HostDisplayPixelFormat format, u32 width, u32 height, void** out_buffer, - u32* out_pitch) -{ - const u32 pixel_size = GetDisplayPixelFormatSize(format); - const u32 stride = Common::AlignUpPow2(width * pixel_size, 4); - const u32 size_required = stride * height * pixel_size; - - if (m_use_pbo_for_pixels) - { - const u32 buffer_size = Common::AlignUpPow2(size_required * 2, 4 * 1024 * 1024); - if (!m_display_pixels_texture_pbo || m_display_pixels_texture_pbo->GetSize() < buffer_size) - { - m_display_pixels_texture_pbo.reset(); - m_display_pixels_texture_pbo = GL::StreamBuffer::Create(GL_PIXEL_UNPACK_BUFFER, buffer_size); - if (!m_display_pixels_texture_pbo) - return false; - } - - const auto map = m_display_pixels_texture_pbo->Map(GetDisplayPixelFormatSize(format), size_required); - m_display_texture_format = format; - m_display_pixels_texture_pbo_map_offset = map.buffer_offset; - m_display_pixels_texture_pbo_map_size = size_required; - *out_buffer = map.pointer; - *out_pitch = stride; - } - else - { - if (m_gles_pixels_repack_buffer.size() < size_required) - m_gles_pixels_repack_buffer.resize(size_required); - - *out_buffer = m_gles_pixels_repack_buffer.data(); - *out_pitch = stride; - } - - BindDisplayPixelsTexture(); - SetDisplayTexture(reinterpret_cast(static_cast(m_display_pixels_texture_id)), format, width, height, - 0, 0, width, height); - return true; -} - -void OpenGLHostDisplay::EndSetDisplayPixels() -{ - const u32 width = static_cast(m_display_texture_view_width); - const u32 height = static_cast(m_display_texture_view_height); - - const auto [gl_internal_format, gl_format, gl_type] = - GetPixelFormatMapping(m_gl_context->IsGLES(), m_display_texture_format); - - glBindTexture(GL_TEXTURE_2D, m_display_pixels_texture_id); - if (m_use_pbo_for_pixels) - { - m_display_pixels_texture_pbo->Unmap(m_display_pixels_texture_pbo_map_size); - m_display_pixels_texture_pbo->Bind(); - glTexImage2D(GL_TEXTURE_2D, 0, gl_internal_format, width, height, 0, gl_format, gl_type, - reinterpret_cast(static_cast(m_display_pixels_texture_pbo_map_offset))); - m_display_pixels_texture_pbo->Unbind(); - - m_display_pixels_texture_pbo_map_offset = 0; - m_display_pixels_texture_pbo_map_size = 0; - } - else - { - // glTexImage2D should be quicker on Mali... - glTexImage2D(GL_TEXTURE_2D, 0, gl_internal_format, width, height, 0, gl_format, gl_type, - m_gles_pixels_repack_buffer.data()); - } - - glBindTexture(GL_TEXTURE_2D, 0); -} - -bool OpenGLHostDisplay::SetDisplayPixels(HostDisplayPixelFormat format, u32 width, u32 height, const void* buffer, - u32 pitch) -{ - BindDisplayPixelsTexture(); - - const auto [gl_internal_format, gl_format, gl_type] = GetPixelFormatMapping(m_gl_context->IsGLES(), format); - const u32 pixel_size = GetDisplayPixelFormatSize(format); - const bool is_packed_tightly = (pitch == (pixel_size * width)); - - // If we have GLES3, we can set row_length. - if (!m_use_gles2_draw_path || is_packed_tightly) - { - if (!is_packed_tightly) - glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / pixel_size); - - glTexImage2D(GL_TEXTURE_2D, 0, gl_internal_format, width, height, 0, gl_format, gl_type, buffer); - - if (!is_packed_tightly) - glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); - } - else - { - // Otherwise, we need to repack the image. - const u32 packed_pitch = width * pixel_size; - const u32 repack_size = packed_pitch * height; - if (m_gles_pixels_repack_buffer.size() < repack_size) - m_gles_pixels_repack_buffer.resize(repack_size); - StringUtil::StrideMemCpy(m_gles_pixels_repack_buffer.data(), packed_pitch, buffer, pitch, packed_pitch, height); - glTexImage2D(GL_TEXTURE_2D, 0, gl_internal_format, width, height, 0, gl_format, gl_type, - m_gles_pixels_repack_buffer.data()); - } - - glBindTexture(GL_TEXTURE_2D, 0); - - SetDisplayTexture(reinterpret_cast(static_cast(m_display_pixels_texture_id)), format, width, height, - 0, 0, width, height); - return true; -} - void OpenGLHostDisplay::SetVSync(bool enabled) { if (m_gl_context->GetWindowInfo().type == WindowInfo::Type::Surfaceless) @@ -420,9 +250,9 @@ bool OpenGLHostDisplay::InitializeRenderDevice(std::string_view shader_cache_dir m_use_pbo_for_pixels = !m_use_gles2_draw_path; if (GetRenderAPI() == RenderAPI::OpenGLES) { - // Adreno seems to corrupt textures through PBOs... + // Adreno seems to corrupt textures through PBOs... and Mali is slow. const char* gl_vendor = reinterpret_cast(glGetString(GL_VENDOR)); - if (std::strstr(gl_vendor, "Qualcomm") || std::strstr(gl_vendor, "Broadcom")) + if (std::strstr(gl_vendor, "Qualcomm") || std::strstr(gl_vendor, "ARM") || std::strstr(gl_vendor, "Broadcom")) m_use_pbo_for_pixels = false; } @@ -713,12 +543,6 @@ void OpenGLHostDisplay::DestroyResources() m_post_processing_ubo.reset(); m_post_processing_stages.clear(); - if (m_display_pixels_texture_id != 0) - { - glDeleteTextures(1, &m_display_pixels_texture_id); - m_display_pixels_texture_id = 0; - } - if (m_display_vao != 0) { glDeleteVertexArrays(1, &m_display_vao); @@ -877,9 +701,10 @@ void OpenGLHostDisplay::RenderDisplay(s32 left, s32 bottom, s32 width, s32 heigh glBindTexture(GL_TEXTURE_2D, static_cast(reinterpret_cast(texture_handle))); m_display_program.Bind(); + const bool linear = IsUsingLinearFiltering(); + if (!m_use_gles2_draw_path) { - const bool linear = IsUsingLinearFiltering(); const float position_adjust = linear ? 0.5f : 0.0f; const float size_adjust = linear ? 1.0f : 0.0f; const float flip_adjust = (texture_view_height < 0) ? -1.0f : 1.0f; @@ -895,8 +720,9 @@ void OpenGLHostDisplay::RenderDisplay(s32 left, s32 bottom, s32 width, s32 heigh } else { - if (static_cast(reinterpret_cast(texture_handle)) == m_display_pixels_texture_id) - UpdateDisplayPixelsTextureFilter(); + // TODO: This sucks. + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, linear ? GL_LINEAR : GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, linear ? GL_LINEAR : GL_NEAREST); DrawFullscreenQuadES2(m_display_texture_view_x, m_display_texture_view_y, m_display_texture_view_width, m_display_texture_view_height, m_display_texture_width, m_display_texture_height); @@ -1207,8 +1033,11 @@ bool OpenGLHostDisplay::SetGPUTimingEnabled(bool enabled) if (m_gpu_timing_enabled == enabled) return true; - if (enabled && m_gl_context->IsGLES() && !GLAD_GL_EXT_disjoint_timer_query) + if (enabled && m_gl_context->IsGLES() && + (!GLAD_GL_EXT_disjoint_timer_query || !glGetQueryObjectivEXT || !glGetQueryObjectui64vEXT)) + { return false; + } m_gpu_timing_enabled = enabled; if (m_gpu_timing_enabled) @@ -1226,4 +1055,178 @@ float OpenGLHostDisplay::GetAndResetAccumulatedGPUTime() return value; } +GL::StreamBuffer* OpenGLHostDisplay::GetTextureStreamBuffer() +{ + if (m_use_gles2_draw_path || m_texture_stream_buffer) + return m_texture_stream_buffer.get(); + + m_texture_stream_buffer = GL::StreamBuffer::Create(GL_PIXEL_UNPACK_BUFFER, TEXTURE_STREAM_BUFFER_SIZE); + return m_texture_stream_buffer.get(); +} + +OpenGLHostDisplayTexture::OpenGLHostDisplayTexture(GL::Texture texture, HostDisplayPixelFormat format) + : m_texture(std::move(texture)), m_format(format) +{ +} + +OpenGLHostDisplayTexture::~OpenGLHostDisplayTexture() = default; + +void* OpenGLHostDisplayTexture::GetHandle() const +{ + return reinterpret_cast(static_cast(m_texture.GetGLId())); +} + +u32 OpenGLHostDisplayTexture::GetWidth() const +{ + return m_texture.GetWidth(); +} + +u32 OpenGLHostDisplayTexture::GetHeight() const +{ + return m_texture.GetHeight(); +} + +u32 OpenGLHostDisplayTexture::GetLayers() const +{ + return 1; +} + +u32 OpenGLHostDisplayTexture::GetLevels() const +{ + return 1; +} + +u32 OpenGLHostDisplayTexture::GetSamples() const +{ + return m_texture.GetSamples(); +} + +HostDisplayPixelFormat OpenGLHostDisplayTexture::GetFormat() const +{ + return m_format; +} + +GLuint OpenGLHostDisplayTexture::GetGLID() const +{ + return m_texture.GetGLId(); +} + +bool OpenGLHostDisplayTexture::BeginUpdate(u32 width, u32 height, void** out_buffer, u32* out_pitch) +{ + const u32 pixel_size = HostDisplay::GetDisplayPixelFormatSize(m_format); + const u32 stride = Common::AlignUpPow2(width * pixel_size, 4); + const u32 size_required = stride * height; + OpenGLHostDisplay* display = static_cast(g_host_display.get()); + GL::StreamBuffer* buffer = display->UsePBOForUploads() ? display->GetTextureStreamBuffer() : nullptr; + + if (buffer && size_required < buffer->GetSize()) + { + auto map = buffer->Map(4096, size_required); + m_map_offset = map.buffer_offset; + *out_buffer = map.pointer; + *out_pitch = stride; + } + else + { + std::vector& repack_buffer = display->GetTextureRepackBuffer(); + if (repack_buffer.size() < size_required) + repack_buffer.resize(size_required); + + *out_buffer = repack_buffer.data(); + *out_pitch = stride; + } + + return true; +} + +void OpenGLHostDisplayTexture::EndUpdate(u32 x, u32 y, u32 width, u32 height) +{ + const u32 pixel_size = HostDisplay::GetDisplayPixelFormatSize(m_format); + const u32 stride = Common::AlignUpPow2(width * pixel_size, 4); + const u32 size_required = stride * height; + OpenGLHostDisplay* display = static_cast(g_host_display.get()); + GL::StreamBuffer* buffer = display->UsePBOForUploads() ? display->GetTextureStreamBuffer() : nullptr; + + const auto [gl_internal_format, gl_format, gl_type] = + GetPixelFormatMapping(display->GetGLContext()->IsGLES(), m_format); + const bool whole_texture = (!m_texture.UseTextureStorage() && x == 0 && y == 0 && width == m_texture.GetWidth() && + height == m_texture.GetHeight()); + + m_texture.Create(width, height, 1, gl_internal_format, gl_format, gl_type, nullptr, false, false); + m_texture.Bind(); + if (buffer && size_required < buffer->GetSize()) + { + buffer->Unmap(size_required); + buffer->Bind(); + + if (whole_texture) + { + glTexImage2D(GL_TEXTURE_2D, 0, gl_internal_format, width, height, 0, gl_format, gl_type, + reinterpret_cast(static_cast(m_map_offset))); + } + else + { + glTexSubImage2D(GL_TEXTURE_2D, 0, x, y, width, height, gl_format, gl_type, + reinterpret_cast(static_cast(m_map_offset))); + } + + buffer->Unbind(); + } + else + { + std::vector& repack_buffer = display->GetTextureRepackBuffer(); + + if (whole_texture) + glTexImage2D(GL_TEXTURE_2D, 0, gl_internal_format, width, height, 0, gl_format, gl_type, repack_buffer.data()); + else + glTexSubImage2D(GL_TEXTURE_2D, 0, x, y, width, height, gl_format, gl_type, repack_buffer.data()); + } +} + +bool OpenGLHostDisplayTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch) +{ + OpenGLHostDisplay* display = static_cast(g_host_display.get()); + const auto [gl_internal_format, gl_format, gl_type] = + GetPixelFormatMapping(display->GetGLContext()->IsGLES(), m_format); + const u32 pixel_size = HostDisplay::GetDisplayPixelFormatSize(m_format); + const bool is_packed_tightly = (pitch == (pixel_size * width)); + + const bool whole_texture = (!m_texture.UseTextureStorage() && x == 0 && y == 0 && width == m_texture.GetWidth() && + height == m_texture.GetHeight()); + m_texture.Bind(); + + // If we have GLES3, we can set row_length. + if (!display->UseGLES3DrawPath() || is_packed_tightly) + { + if (!is_packed_tightly) + glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / pixel_size); + + if (whole_texture) + glTexImage2D(GL_TEXTURE_2D, 0, gl_internal_format, width, height, 0, gl_format, gl_type, data); + else + glTexSubImage2D(GL_TEXTURE_2D, 0, x, y, width, height, gl_format, gl_type, data); + + if (!is_packed_tightly) + glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); + } + else + { + // Otherwise, we need to repack the image. + std::vector& repack_buffer = display->GetTextureRepackBuffer(); + const u32 packed_pitch = width * pixel_size; + const u32 repack_size = packed_pitch * height; + if (repack_buffer.size() < repack_size) + repack_buffer.resize(repack_size); + + StringUtil::StrideMemCpy(repack_buffer.data(), packed_pitch, data, pitch, packed_pitch, height); + + if (whole_texture) + glTexImage2D(GL_TEXTURE_2D, 0, gl_internal_format, width, height, 0, gl_format, gl_type, repack_buffer.data()); + else + glTexSubImage2D(GL_TEXTURE_2D, 0, x, y, width, height, gl_format, gl_type, repack_buffer.data()); + } + + return true; +} + } // namespace FrontendCommon diff --git a/src/frontend-common/opengl_host_display.h b/src/frontend-common/opengl_host_display.h index 89b3557e2..8bc7d58e4 100644 --- a/src/frontend-common/opengl_host_display.h +++ b/src/frontend-common/opengl_host_display.h @@ -46,15 +46,9 @@ public: std::unique_ptr CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, HostDisplayPixelFormat format, const void* data, u32 data_stride, bool dynamic = false) override; - void UpdateTexture(HostDisplayTexture* texture, u32 x, u32 y, u32 width, u32 height, const void* texture_data, - u32 texture_data_stride) override; bool DownloadTexture(const void* texture_handle, HostDisplayPixelFormat texture_format, u32 x, u32 y, u32 width, u32 height, void* out_data, u32 out_data_stride) override; bool SupportsDisplayPixelFormat(HostDisplayPixelFormat format) const override; - bool BeginSetDisplayPixels(HostDisplayPixelFormat format, u32 width, u32 height, void** out_buffer, - u32* out_pitch) override; - void EndSetDisplayPixels() override; - bool SetDisplayPixels(HostDisplayPixelFormat format, u32 width, u32 height, const void* buffer, u32 pitch) override; void SetVSync(bool enabled) override; @@ -65,6 +59,13 @@ public: bool SetGPUTimingEnabled(bool enabled) override; float GetAndResetAccumulatedGPUTime() override; + ALWAYS_INLINE GL::Context* GetGLContext() const { return m_gl_context.get(); } + ALWAYS_INLINE bool UsePBOForUploads() const { return m_use_pbo_for_pixels; } + ALWAYS_INLINE bool UseGLES3DrawPath() const { return m_use_gles2_draw_path; } + ALWAYS_INLINE std::vector& GetTextureRepackBuffer() { return m_texture_repack_buffer; } + + GL::StreamBuffer* GetTextureStreamBuffer(); + protected: static constexpr u8 NUM_TIMESTAMP_QUERIES = 3; @@ -78,9 +79,6 @@ protected: void DestroyImGuiContext() override; bool UpdateImGuiFontTexture() override; - void BindDisplayPixelsTexture(); - void UpdateDisplayPixelsTextureFilter(); - void RenderDisplay(); void RenderImGui(); void RenderSoftwareCursor(); @@ -117,11 +115,8 @@ protected: GLuint m_display_linear_sampler = 0; GLuint m_uniform_buffer_alignment = 1; - GLuint m_display_pixels_texture_id = 0; - std::unique_ptr m_display_pixels_texture_pbo; - u32 m_display_pixels_texture_pbo_map_offset = 0; - u32 m_display_pixels_texture_pbo_map_size = 0; - std::vector m_gles_pixels_repack_buffer; + std::unique_ptr m_texture_stream_buffer; + std::vector m_texture_repack_buffer; PostProcessingChain m_post_processing_chain; GL::Texture m_post_processing_input_texture; @@ -135,7 +130,6 @@ protected: u8 m_waiting_timestamp_queries = 0; bool m_timestamp_query_started = false; - bool m_display_texture_is_linear_filtered = false; bool m_use_gles2_draw_path = false; bool m_use_pbo_for_pixels = false; }; diff --git a/src/frontend-common/vulkan_host_display.cpp b/src/frontend-common/vulkan_host_display.cpp index 415cc9c6c..c7a26019b 100644 --- a/src/frontend-common/vulkan_host_display.cpp +++ b/src/frontend-common/vulkan_host_display.cpp @@ -1,7 +1,9 @@ #include "vulkan_host_display.h" +#include "common/align.h" #include "common/assert.h" #include "common/log.h" #include "common/scoped_guard.h" +#include "common/string_util.h" #include "common/vulkan/builders.h" #include "common/vulkan/context.h" #include "common/vulkan/shader_cache.h" @@ -22,9 +24,8 @@ namespace FrontendCommon { class VulkanHostDisplayTexture : public HostDisplayTexture { public: - VulkanHostDisplayTexture(Vulkan::Texture texture, Vulkan::StagingTexture staging_texture, - HostDisplayPixelFormat format) - : m_texture(std::move(texture)), m_staging_texture(std::move(staging_texture)), m_format(format) + VulkanHostDisplayTexture(Vulkan::Texture texture, HostDisplayPixelFormat format) + : m_texture(std::move(texture)), m_format(format) { } ~VulkanHostDisplayTexture() override = default; @@ -37,13 +38,51 @@ public: u32 GetSamples() const override { return m_texture.GetSamples(); } HostDisplayPixelFormat GetFormat() const override { return m_format; } + u32 CalcUpdatePitch(u32 width) const + { + return Common::AlignUp(width * HostDisplay::GetDisplayPixelFormatSize(m_format), + g_vulkan_context->GetBufferCopyRowPitchAlignment()); + } + + bool BeginUpdate(u32 width, u32 height, void** out_buffer, u32* out_pitch) + { + const u32 pitch = CalcUpdatePitch(width); + const u32 required_size = pitch * height; + Vulkan::StreamBuffer& buffer = g_vulkan_context->GetTextureUploadBuffer(); + if (required_size > buffer.GetCurrentSize()) + return false; + + // TODO: allocate temporary buffer if this fails... + if (!buffer.ReserveMemory(required_size, g_vulkan_context->GetBufferCopyOffsetAlignment())) + { + g_vulkan_context->ExecuteCommandBuffer(false); + if (!buffer.ReserveMemory(required_size, g_vulkan_context->GetBufferCopyOffsetAlignment())) + return false; + } + + *out_buffer = buffer.GetCurrentHostPointer(); + *out_pitch = pitch; + return true; + } + + void EndUpdate(u32 x, u32 y, u32 width, u32 height) + { + const u32 pitch = CalcUpdatePitch(width); + const u32 required_size = pitch * height; + + Vulkan::StreamBuffer& buffer = g_vulkan_context->GetTextureUploadBuffer(); + const u32 buffer_offset = buffer.GetCurrentOffset(); + buffer.CommitMemory(required_size); + + m_texture.UpdateFromBuffer(g_vulkan_context->GetCurrentCommandBuffer(), 0, 0, x, y, width, height, + buffer.GetBuffer(), buffer_offset); + } + const Vulkan::Texture& GetTexture() const { return m_texture; } Vulkan::Texture& GetTexture() { return m_texture; } - Vulkan::StagingTexture& GetStagingTexture() { return m_staging_texture; } private: Vulkan::Texture m_texture; - Vulkan::StagingTexture m_staging_texture; HostDisplayPixelFormat m_format; }; @@ -168,6 +207,9 @@ std::unique_ptr VulkanHostDisplay::CreateTexture(u32 width, static constexpr VkImageUsageFlags usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; + const Vulkan::Util::DebugScope debugScope(g_vulkan_context->GetCurrentCommandBuffer(), + "VulkanHostDisplay::CreateTexture"); + Vulkan::Texture texture; if (!texture.Create(width, height, levels, layers, vk_format, static_cast(samples), (layers > 1) ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, @@ -176,24 +218,41 @@ std::unique_ptr VulkanHostDisplay::CreateTexture(u32 width, return {}; } - Vulkan::StagingTexture staging_texture; - if (data || dynamic) - { - if (!staging_texture.Create(dynamic ? Vulkan::StagingBuffer::Type::Mutable : Vulkan::StagingBuffer::Type::Upload, - vk_format, width, height)) - { - return {}; - } - } - const Vulkan::Util::DebugScope debugScope(g_vulkan_context->GetCurrentCommandBuffer(), - "VulkanHostDisplay::CreateTexture"); texture.TransitionToLayout(g_vulkan_context->GetCurrentCommandBuffer(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); if (data) { - staging_texture.WriteTexels(0, 0, width, height, data, data_stride); - staging_texture.CopyToTexture(g_vulkan_context->GetCurrentCommandBuffer(), 0, 0, texture, 0, 0, 0, 0, width, - height); + const u32 row_size = width * GetDisplayPixelFormatSize(format); + const u32 data_upload_pitch = Common::AlignUp(row_size, g_vulkan_context->GetBufferCopyRowPitchAlignment()); + const u32 data_size = data_upload_pitch * height; + Vulkan::StreamBuffer& buffer = g_vulkan_context->GetTextureUploadBuffer(); + + if (data_size < buffer.GetCurrentSize()) + { + if (!buffer.ReserveMemory(data_size, g_vulkan_context->GetBufferCopyOffsetAlignment())) + { + g_vulkan_context->ExecuteCommandBuffer(false); + if (!buffer.ReserveMemory(data_size, g_vulkan_context->GetBufferCopyOffsetAlignment())) + goto use_staging; + } + + StringUtil::StrideMemCpy(buffer.GetCurrentHostPointer(), data_upload_pitch, data, data_stride, row_size, height); + const u32 buffer_offset = buffer.GetCurrentOffset(); + buffer.CommitMemory(data_size); + texture.UpdateFromBuffer(g_vulkan_context->GetCurrentCommandBuffer(), 0, 0, 0, 0, width, height, + buffer.GetBuffer(), buffer_offset); + } + else + { + use_staging: + Vulkan::StagingTexture staging_texture; + if (!staging_texture.Create(Vulkan::StagingBuffer::Type::Upload, vk_format, width, height)) + return {}; + + staging_texture.WriteTexels(0, 0, width, height, data, data_stride); + staging_texture.CopyToTexture(g_vulkan_context->GetCurrentCommandBuffer(), 0, 0, texture, 0, 0, 0, 0, width, + height); + } } else { @@ -206,40 +265,7 @@ std::unique_ptr VulkanHostDisplay::CreateTexture(u32 width, texture.TransitionToLayout(g_vulkan_context->GetCurrentCommandBuffer(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - // don't need to keep the staging texture around if we're not dynamic - if (!dynamic) - staging_texture.Destroy(true); - - return std::make_unique(std::move(texture), std::move(staging_texture), format); -} - -void VulkanHostDisplay::UpdateTexture(HostDisplayTexture* texture, u32 x, u32 y, u32 width, u32 height, - const void* data, u32 data_stride) -{ - VulkanHostDisplayTexture* vk_texture = static_cast(texture); - - Vulkan::StagingTexture* staging_texture; - if (vk_texture->GetStagingTexture().IsValid()) - { - staging_texture = &vk_texture->GetStagingTexture(); - } - else - { - // TODO: This should use a stream buffer instead for speed. - if (m_upload_staging_texture.IsValid()) - m_upload_staging_texture.Flush(); - - if ((m_upload_staging_texture.GetWidth() < width || m_upload_staging_texture.GetHeight() < height) && - !m_upload_staging_texture.Create(Vulkan::StagingBuffer::Type::Upload, VK_FORMAT_R8G8B8A8_UNORM, width, height)) - { - Panic("Failed to create upload staging texture"); - } - - staging_texture = &m_upload_staging_texture; - } - - staging_texture->WriteTexels(0, 0, width, height, data, data_stride); - staging_texture->CopyToTexture(0, 0, vk_texture->GetTexture(), x, y, 0, 0, width, height); + return std::make_unique(std::move(texture), format); } bool VulkanHostDisplay::DownloadTexture(const void* texture_handle, HostDisplayPixelFormat texture_format, u32 x, u32 y, @@ -271,43 +297,6 @@ bool VulkanHostDisplay::SupportsDisplayPixelFormat(HostDisplayPixelFormat format return ((fp.optimalTilingFeatures & required) == required); } -bool VulkanHostDisplay::BeginSetDisplayPixels(HostDisplayPixelFormat format, u32 width, u32 height, void** out_buffer, - u32* out_pitch) -{ - const VkFormat vk_format = s_display_pixel_format_mapping[static_cast(format)]; - - if (m_display_pixels_texture.GetWidth() < width || m_display_pixels_texture.GetHeight() < height || - m_display_pixels_texture.GetFormat() != vk_format) - { - if (!m_display_pixels_texture.Create(width, height, 1, 1, vk_format, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_VIEW_TYPE_2D, - VK_IMAGE_TILING_OPTIMAL, - VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT)) - { - return false; - } - } - - if ((m_upload_staging_texture.GetWidth() < width || m_upload_staging_texture.GetHeight() < height) && - !m_upload_staging_texture.Create(Vulkan::StagingBuffer::Type::Upload, vk_format, width, height)) - { - return false; - } - - SetDisplayTexture(&m_display_pixels_texture, format, m_display_pixels_texture.GetWidth(), - m_display_pixels_texture.GetHeight(), 0, 0, width, height); - - *out_buffer = m_upload_staging_texture.GetMappedPointer(); - *out_pitch = m_upload_staging_texture.GetMappedStride(); - return true; -} - -void VulkanHostDisplay::EndSetDisplayPixels() -{ - m_upload_staging_texture.CopyToTexture(0, 0, m_display_pixels_texture, 0, 0, 0, 0, - static_cast(m_display_texture_view_width), - static_cast(m_display_texture_view_height)); -} - void VulkanHostDisplay::SetVSync(bool enabled) { if (!m_swap_chain) @@ -518,9 +507,7 @@ void VulkanHostDisplay::DestroyResources() m_post_processing_ubo.Destroy(true); m_post_processing_chain.ClearStages(); - m_display_pixels_texture.Destroy(false); m_readback_staging_texture.Destroy(false); - m_upload_staging_texture.Destroy(false); Vulkan::Util::SafeDestroyPipeline(m_display_pipeline); Vulkan::Util::SafeDestroyPipeline(m_cursor_pipeline); diff --git a/src/frontend-common/vulkan_host_display.h b/src/frontend-common/vulkan_host_display.h index 14be1132d..531381955 100644 --- a/src/frontend-common/vulkan_host_display.h +++ b/src/frontend-common/vulkan_host_display.h @@ -51,15 +51,9 @@ public: std::unique_ptr CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, HostDisplayPixelFormat format, const void* data, u32 data_stride, bool dynamic = false) override; - void UpdateTexture(HostDisplayTexture* texture, u32 x, u32 y, u32 width, u32 height, const void* texture_data, - u32 texture_data_stride) override; bool DownloadTexture(const void* texture_handle, HostDisplayPixelFormat texture_format, u32 x, u32 y, u32 width, u32 height, void* out_data, u32 out_data_stride) override; - bool SupportsDisplayPixelFormat(HostDisplayPixelFormat format) const override; - bool BeginSetDisplayPixels(HostDisplayPixelFormat format, u32 width, u32 height, void** out_buffer, - u32* out_pitch) override; - void EndSetDisplayPixels() override; void SetVSync(bool enabled) override; @@ -128,8 +122,6 @@ protected: VkSampler m_point_sampler = VK_NULL_HANDLE; VkSampler m_linear_sampler = VK_NULL_HANDLE; - Vulkan::Texture m_display_pixels_texture; - Vulkan::StagingTexture m_upload_staging_texture; Vulkan::StagingTexture m_readback_staging_texture; VkDescriptorSetLayout m_post_process_descriptor_set_layout = VK_NULL_HANDLE;