diff --git a/src/common/intrin.h b/src/common/intrin.h index 7d5f18968..795a7f950 100644 --- a/src/common/intrin.h +++ b/src/common/intrin.h @@ -5,6 +5,7 @@ #pragma once +#include "align.h" #include "types.h" #include <type_traits> @@ -27,6 +28,16 @@ #include <malloc.h> // alloca #endif +/// Only currently using 128-bit vectors at max. +static constexpr u32 VECTOR_ALIGNMENT = 16; + +/// Aligns allocation/pitch size to preferred host size. +template<typename T> +ALWAYS_INLINE static T VectorAlign(T value) +{ + return Common::AlignUpPow2(value, VECTOR_ALIGNMENT); +} + template<typename T> ALWAYS_INLINE_RELEASE static void MemsetPtrs(T* ptr, T value, u32 count) { diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp index f41447d0b..64cff9f10 100644 --- a/src/core/gpu.cpp +++ b/src/core/gpu.cpp @@ -1907,7 +1907,7 @@ Common::Rectangle<s32> GPU::CalculateDrawRect(s32 window_width, s32 window_heigh static bool CompressAndWriteTextureToFile(u32 width, u32 height, std::string filename, FileSystem::ManagedCFilePtr fp, bool clear_alpha, bool flip_y, u32 resize_width, u32 resize_height, - std::vector<u32> texture_data, u32 texture_data_stride, + std::vector<u8> texture_data, u32 texture_data_stride, GPUTexture::Format texture_format) { @@ -1923,8 +1923,18 @@ static bool CompressAndWriteTextureToFile(u32 width, u32 height, std::string fil if (clear_alpha) { - for (u32& pixel : texture_data) - pixel |= 0xFF000000; + for (u32 y = 0; y < height; y++) + { + u8* pixels = &texture_data[y * texture_data_stride]; + for (u32 x = 0; x < width; x++) + { + u32 pixel; + std::memcpy(&pixel, pixels, sizeof(pixel)); + pixel |= 0xFF000000u; + std::memcpy(pixels, &pixel, sizeof(pixel)); + pixels += sizeof(pixel); + } + } } if (flip_y) @@ -1932,11 +1942,10 @@ static bool CompressAndWriteTextureToFile(u32 width, u32 height, std::string fil if (resize_width > 0 && resize_height > 0 && (resize_width != width || resize_height != height)) { - std::vector<u32> resized_texture_data(resize_width * resize_height); + std::vector<u8> resized_texture_data(resize_width * resize_height * sizeof(u32)); u32 resized_texture_stride = sizeof(u32) * resize_width; - if (!stbir_resize_uint8(reinterpret_cast<u8*>(texture_data.data()), width, height, texture_data_stride, - reinterpret_cast<u8*>(resized_texture_data.data()), resize_width, resize_height, - resized_texture_stride, 4)) + if (!stbir_resize_uint8(texture_data.data(), width, height, texture_data_stride, resized_texture_data.data(), + resize_width, resize_height, resized_texture_stride, 4)) { Log_ErrorPrintf("Failed to resize texture data from %ux%u to %ux%u", width, height, resize_width, resize_height); return false; @@ -2022,13 +2031,29 @@ bool GPU::WriteDisplayTextureToFile(std::string filename, bool full_resolution / const u32 read_width = static_cast<u32>(m_display_texture_view_width); const u32 read_height = static_cast<u32>(m_display_texture_view_height); - std::vector<u32> texture_data(read_width * read_height); const u32 texture_data_stride = Common::AlignUpPow2(GPUTexture::GetPixelSize(m_display_texture->GetFormat()) * read_width, 4); - if (!g_gpu_device->DownloadTexture(m_display_texture, read_x, read_y, read_width, read_height, texture_data.data(), - texture_data_stride)) + std::vector<u8> texture_data(texture_data_stride * read_height); + + std::unique_ptr<GPUDownloadTexture> dltex; + if (g_gpu_device->GetFeatures().memory_import) + { + dltex = g_gpu_device->CreateDownloadTexture(read_width, read_height, m_display_texture->GetFormat(), + texture_data.data(), texture_data.size(), texture_data_stride); + } + if (!dltex) + { + if (!(dltex = g_gpu_device->CreateDownloadTexture(read_width, read_height, m_display_texture->GetFormat()))) + { + Log_ErrorFmt("Failed to create {}x{} {} download texture", read_width, read_height, + GPUTexture::GetFormatName(m_display_texture->GetFormat())); + return false; + } + } + + dltex->CopyFromTexture(0, 0, m_display_texture, read_x, read_y, read_width, read_height, 0, 0, !dltex->IsImported()); + if (!dltex->ReadTexels(0, 0, read_width, read_height, texture_data.data(), texture_data_stride)) { - Log_ErrorPrintf("Texture download failed"); RestoreDeviceContext(); return false; } @@ -2060,7 +2085,7 @@ bool GPU::WriteDisplayTextureToFile(std::string filename, bool full_resolution / } bool GPU::RenderScreenshotToBuffer(u32 width, u32 height, const Common::Rectangle<s32>& draw_rect, bool postfx, - std::vector<u32>* out_pixels, u32* out_stride, GPUTexture::Format* out_format) + std::vector<u8>* out_pixels, u32* out_stride, GPUTexture::Format* out_format) { const GPUTexture::Format hdformat = g_gpu_device->HasSurface() ? g_gpu_device->GetWindowFormat() : GPUTexture::Format::RGBA8; @@ -2076,8 +2101,25 @@ bool GPU::RenderScreenshotToBuffer(u32 width, u32 height, const Common::Rectangl RenderDisplay(render_texture.get(), draw_rect, postfx); const u32 stride = GPUTexture::GetPixelSize(hdformat) * width; - out_pixels->resize(width * height); - if (!g_gpu_device->DownloadTexture(render_texture.get(), 0, 0, width, height, out_pixels->data(), stride)) + out_pixels->resize(height * stride); + + std::unique_ptr<GPUDownloadTexture> dltex; + if (g_gpu_device->GetFeatures().memory_import) + { + dltex = + g_gpu_device->CreateDownloadTexture(width, height, hdformat, out_pixels->data(), out_pixels->size(), stride); + } + if (!dltex) + { + if (!(dltex = g_gpu_device->CreateDownloadTexture(width, height, hdformat))) + { + Log_ErrorFmt("Failed to create {}x{} download texture", width, height); + return false; + } + } + + dltex->CopyFromTexture(0, 0, render_texture.get(), 0, 0, width, height, 0, 0, false); + if (!dltex->ReadTexels(0, 0, width, height, out_pixels->data(), stride)) { RestoreDeviceContext(); return false; @@ -2142,7 +2184,7 @@ bool GPU::RenderScreenshotToFile(std::string filename, bool internal_resolution if (width == 0 || height == 0) return false; - std::vector<u32> pixels; + std::vector<u8> pixels; u32 pixels_stride; GPUTexture::Format pixels_format; if (!RenderScreenshotToBuffer(width, height, draw_rect, !internal_resolution, &pixels, &pixels_stride, diff --git a/src/core/gpu.h b/src/core/gpu.h index 702711e53..9e5ad40b9 100644 --- a/src/core/gpu.h +++ b/src/core/gpu.h @@ -206,7 +206,7 @@ public: /// Renders the display, optionally with postprocessing to the specified image. bool RenderScreenshotToBuffer(u32 width, u32 height, const Common::Rectangle<s32>& draw_rect, bool postfx, - std::vector<u32>* out_pixels, u32* out_stride, GPUTexture::Format* out_format); + std::vector<u8>* out_pixels, u32* out_stride, GPUTexture::Format* out_format); /// Helper function to save screenshot to PNG. bool RenderScreenshotToFile(std::string filename, bool internal_resolution = false, bool compress_on_thread = false); diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index bbb1e4a72..aa2e50fa9 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -663,6 +663,26 @@ bool GPU_HW::CreateBuffers() GL_OBJECT_NAME(m_vram_read_texture, "VRAM Read Texture"); GL_OBJECT_NAME(m_vram_readback_texture, "VRAM Readback Texture"); + if (g_gpu_device->GetFeatures().memory_import) + { + Log_DevPrint("Trying to import guest VRAM buffer for downloads..."); + m_vram_readback_download_texture = g_gpu_device->CreateDownloadTexture( + m_vram_readback_texture->GetWidth(), m_vram_readback_texture->GetHeight(), m_vram_readback_texture->GetFormat(), + g_vram, sizeof(g_vram), VRAM_WIDTH * sizeof(u16)); + if (!m_vram_readback_download_texture) + Log_ErrorPrint("Failed to create imported readback buffer"); + } + if (!m_vram_readback_download_texture) + { + m_vram_readback_download_texture = g_gpu_device->CreateDownloadTexture( + m_vram_readback_texture->GetWidth(), m_vram_readback_texture->GetHeight(), m_vram_readback_texture->GetFormat()); + if (!m_vram_readback_download_texture) + { + Log_ErrorPrint("Failed to create readback download texture"); + return false; + } + } + if (g_gpu_device->GetFeatures().supports_texture_buffers) { if (!(m_vram_upload_buffer = @@ -703,6 +723,7 @@ void GPU_HW::DestroyBuffers() ClearDisplayTexture(); m_vram_upload_buffer.reset(); + m_vram_readback_download_texture.reset(); g_gpu_device->RecycleTexture(std::move(m_downsample_texture)); g_gpu_device->RecycleTexture(std::move(m_vram_read_texture)); g_gpu_device->RecycleTexture(std::move(m_vram_depth_texture)); @@ -2405,8 +2426,18 @@ void GPU_HW::ReadVRAM(u32 x, u32 y, u32 width, u32 height) } // Get bounds with wrap-around handled. - const Common::Rectangle<u32> copy_rect = GetVRAMTransferBounds(x, y, width, height); - const u32 encoded_width = (copy_rect.GetWidth() + 1) / 2; + Common::Rectangle<u32> copy_rect = GetVRAMTransferBounds(x, y, width, height); + + // Has to be aligned to an even pixel for the download, due to 32-bit packing. + if (copy_rect.left & 1) + copy_rect.left--; + if (copy_rect.right & 1) + copy_rect.right++; + + DebugAssert((copy_rect.left % 2) == 0 && (copy_rect.GetWidth() % 2) == 0); + const u32 encoded_left = copy_rect.left / 2; + const u32 encoded_top = copy_rect.top; + const u32 encoded_width = copy_rect.GetWidth() / 2; const u32 encoded_height = copy_rect.GetHeight(); // Encode the 24-bit texture as 16-bit. @@ -2421,9 +2452,22 @@ void GPU_HW::ReadVRAM(u32 x, u32 y, u32 width, u32 height) GL_POP(); // Stage the readback and copy it into our shadow buffer. - g_gpu_device->DownloadTexture(m_vram_readback_texture.get(), 0, 0, encoded_width, encoded_height, - reinterpret_cast<u32*>(&g_vram[copy_rect.top * VRAM_WIDTH + copy_rect.left]), - VRAM_WIDTH * sizeof(u16)); + if (m_vram_readback_download_texture->IsImported()) + { + // Fast path, read directly. + m_vram_readback_download_texture->CopyFromTexture(encoded_left, encoded_top, m_vram_readback_texture.get(), 0, 0, + encoded_width, encoded_height, 0, 0, false); + m_vram_readback_download_texture->Flush(); + } + else + { + // Copy to staging buffer, then to VRAM. + m_vram_readback_download_texture->CopyFromTexture(0, 0, m_vram_readback_texture.get(), 0, 0, encoded_width, + encoded_height, 0, 0, true); + m_vram_readback_download_texture->ReadTexels(0, 0, encoded_width, encoded_height, + &g_vram[copy_rect.top * VRAM_WIDTH + copy_rect.left], + VRAM_WIDTH * sizeof(u16)); + } RestoreDeviceContext(); } diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index 53d960b2e..659b7fcff 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -216,6 +216,7 @@ private: std::unique_ptr<GPUTexture> m_vram_depth_texture; std::unique_ptr<GPUTexture> m_vram_read_texture; std::unique_ptr<GPUTexture> m_vram_readback_texture; + std::unique_ptr<GPUDownloadTexture> m_vram_readback_download_texture; std::unique_ptr<GPUTexture> m_vram_replacement_texture; std::unique_ptr<GPUTexture> m_display_private_texture; // TODO: Move to base. diff --git a/src/core/system.cpp b/src/core/system.cpp index bed6219e5..a5ba0a227 100644 --- a/src/core/system.cpp +++ b/src/core/system.cpp @@ -265,7 +265,7 @@ bool System::Internal::ProcessStartup() InitializeDiscordPresence(); #endif -return true; + return true; } void System::Internal::ProcessShutdown() @@ -2430,7 +2430,7 @@ bool System::SaveStateToStream(ByteStream* state, u32 screenshot_size /* = 256 * ((display_aspect_ratio > 0.0f) ? display_aspect_ratio : 1.0f))); Log_VerbosePrintf("Saving %ux%u screenshot for state", screenshot_width, screenshot_height); - std::vector<u32> screenshot_buffer; + std::vector<u8> screenshot_buffer; u32 screenshot_stride; GPUTexture::Format screenshot_format; if (g_gpu->RenderScreenshotToBuffer(screenshot_width, screenshot_height, @@ -2454,7 +2454,7 @@ bool System::SaveStateToStream(ByteStream* state, u32 screenshot_size /* = 256 * header.offset_to_screenshot = static_cast<u32>(state->GetPosition()); header.screenshot_width = screenshot_width; header.screenshot_height = screenshot_height; - header.screenshot_size = static_cast<u32>(screenshot_buffer.size() * sizeof(u32)); + header.screenshot_size = static_cast<u32>(screenshot_buffer.size()); if (!state->Write2(screenshot_buffer.data(), header.screenshot_size)) return false; } diff --git a/src/util/d3d11_device.cpp b/src/util/d3d11_device.cpp index 1a4f8fd6b..20c5efeab 100644 --- a/src/util/d3d11_device.cpp +++ b/src/util/d3d11_device.cpp @@ -155,7 +155,6 @@ void D3D11Device::DestroyDevice() { std::unique_lock lock(s_instance_mutex); - DestroyStagingBuffer(); DestroyBuffers(); m_context.Reset(); m_device.Reset(); @@ -187,6 +186,7 @@ void D3D11Device::SetFeatures(FeatureMask disabled_features) m_features.texture_buffers_emulated_with_ssbo = false; m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS); m_features.partial_msaa_resolve = false; + m_features.memory_import = false; m_features.gpu_timing = true; m_features.shader_cache = true; m_features.pipeline_cache = false; diff --git a/src/util/d3d11_device.h b/src/util/d3d11_device.h index e2bd15dad..39d5ff459 100644 --- a/src/util/d3d11_device.h +++ b/src/util/d3d11_device.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com> +// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com> // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #pragma once @@ -53,8 +53,11 @@ public: std::unique_ptr<GPUSampler> CreateSampler(const GPUSampler::Config& config) override; std::unique_ptr<GPUTextureBuffer> CreateTextureBuffer(GPUTextureBuffer::Format format, u32 size_in_elements) override; - bool DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, void* out_data, - u32 out_data_stride) override; + std::unique_ptr<GPUDownloadTexture> CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format) override; + std::unique_ptr<GPUDownloadTexture> CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format, + void* memory, size_t memory_size, + u32 memory_stride) override; + bool SupportsTextureFormat(GPUTexture::Format format) const override; void CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, GPUTexture* src, u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width, u32 height) override; @@ -128,9 +131,6 @@ private: void SetFeatures(FeatureMask disabled_features); - bool CheckStagingBufferSize(u32 width, u32 height, DXGI_FORMAT format); - void DestroyStagingBuffer(); - bool CreateSwapChain(); bool CreateSwapChainRTV(); void DestroySwapChain(); @@ -163,11 +163,6 @@ private: BlendStateMap m_blend_states; InputLayoutMap m_input_layouts; - ComPtr<ID3D11Texture2D> m_readback_staging_texture; - DXGI_FORMAT m_readback_staging_texture_format = DXGI_FORMAT_UNKNOWN; - u32 m_readback_staging_texture_width = 0; - u32 m_readback_staging_texture_height = 0; - bool m_allow_tearing_supported = false; bool m_using_flip_model_swap_chain = true; bool m_using_allow_tearing = false; diff --git a/src/util/d3d11_texture.cpp b/src/util/d3d11_texture.cpp index 91a43990d..44cd2ce4a 100644 --- a/src/util/d3d11_texture.cpp +++ b/src/util/d3d11_texture.cpp @@ -1,16 +1,11 @@ -// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com> +// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com> // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #include "d3d11_texture.h" #include "d3d11_device.h" #include "d3d_common.h" -// #include "common/align.h" -// #include "common/assert.h" -// #include "common/file_system.h" #include "common/log.h" -// #include "common/path.h" -// #include "common/rectangle.h" #include "common/string_util.h" #include "fmt/format.h" @@ -26,60 +21,6 @@ std::unique_ptr<GPUTexture> D3D11Device::CreateTexture(u32 width, u32 height, u3 return D3D11Texture::Create(m_device.Get(), width, height, layers, levels, samples, type, format, data, data_stride); } -bool D3D11Device::DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, void* out_data, - u32 out_data_stride) -{ - const D3D11Texture* tex = static_cast<const D3D11Texture*>(texture); - if (!CheckStagingBufferSize(width, height, tex->GetDXGIFormat())) - return false; - - const CD3D11_BOX box(static_cast<LONG>(x), static_cast<LONG>(y), 0, static_cast<LONG>(x + width), - static_cast<LONG>(y + height), 1); - m_context->CopySubresourceRegion(m_readback_staging_texture.Get(), 0, 0, 0, 0, tex->GetD3DTexture(), 0, &box); - - D3D11_MAPPED_SUBRESOURCE sr; - HRESULT hr = m_context->Map(m_readback_staging_texture.Get(), 0, D3D11_MAP_READ, 0, &sr); - if (FAILED(hr)) - { - Log_ErrorPrintf("Map() failed with HRESULT %08X", hr); - return false; - } - - s_stats.num_downloads++; - - const u32 copy_size = tex->GetPixelSize() * width; - StringUtil::StrideMemCpy(out_data, out_data_stride, sr.pData, sr.RowPitch, copy_size, height); - m_context->Unmap(m_readback_staging_texture.Get(), 0); - return true; -} - -bool D3D11Device::CheckStagingBufferSize(u32 width, u32 height, DXGI_FORMAT format) -{ - if (m_readback_staging_texture_width >= width && m_readback_staging_texture_width >= height && - m_readback_staging_texture_format == format) - return true; - - DestroyStagingBuffer(); - - CD3D11_TEXTURE2D_DESC desc(format, width, height, 1, 1, 0, D3D11_USAGE_STAGING, D3D11_CPU_ACCESS_READ); - HRESULT hr = m_device->CreateTexture2D(&desc, nullptr, m_readback_staging_texture.ReleaseAndGetAddressOf()); - if (FAILED(hr)) - { - Log_ErrorPrintf("CreateTexture2D() failed with HRESULT %08X", hr); - return false; - } - - return true; -} - -void D3D11Device::DestroyStagingBuffer() -{ - m_readback_staging_texture.Reset(); - m_readback_staging_texture_width = 0; - m_readback_staging_texture_height = 0; - m_readback_staging_texture_format = DXGI_FORMAT_UNKNOWN; -} - bool D3D11Device::SupportsTextureFormat(GPUTexture::Format format) const { const DXGI_FORMAT dfmt = D3DCommon::GetFormatMapping(format).resource_format; @@ -447,3 +388,132 @@ std::unique_ptr<GPUTextureBuffer> D3D11Device::CreateTextureBuffer(GPUTextureBuf return tb; } + +D3D11DownloadTexture::D3D11DownloadTexture(Microsoft::WRL::ComPtr<ID3D11Texture2D> tex, u32 width, u32 height, + GPUTexture::Format format) + : GPUDownloadTexture(width, height, format, false), m_texture(std::move(tex)) +{ +} + +D3D11DownloadTexture::~D3D11DownloadTexture() +{ + if (IsMapped()) + D3D11DownloadTexture::Unmap(); +} + +std::unique_ptr<D3D11DownloadTexture> D3D11DownloadTexture::Create(u32 width, u32 height, GPUTexture::Format format) +{ + D3D11_TEXTURE2D_DESC desc = {}; + desc.Width = width; + desc.Height = height; + desc.Format = D3DCommon::GetFormatMapping(format).srv_format; + desc.MipLevels = 1; + desc.ArraySize = 1; + desc.SampleDesc.Count = 1; + desc.SampleDesc.Quality = 0; + desc.Usage = D3D11_USAGE_STAGING; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; + + Microsoft::WRL::ComPtr<ID3D11Texture2D> tex; + HRESULT hr = D3D11Device::GetD3DDevice()->CreateTexture2D(&desc, nullptr, tex.GetAddressOf()); + if (FAILED(hr)) + { + Log_ErrorFmt("CreateTexture2D() failed: {:08X}", hr); + return {}; + } + + return std::unique_ptr<D3D11DownloadTexture>(new D3D11DownloadTexture(std::move(tex), width, height, format)); +} + +void D3D11DownloadTexture::CopyFromTexture(u32 dst_x, u32 dst_y, GPUTexture* src, u32 src_x, u32 src_y, u32 width, + u32 height, u32 src_layer, u32 src_level, bool use_transfer_pitch) +{ + D3D11Texture* src11 = static_cast<D3D11Texture*>(src); + + DebugAssert(src11->GetFormat() == m_format); + DebugAssert(src_level < src11->GetLevels()); + DebugAssert((src_x + width) <= src11->GetMipWidth(src_level) && (src_y + height) <= src11->GetMipHeight(src_level)); + DebugAssert((dst_x + width) <= m_width && (dst_y + height) <= m_height); + DebugAssert((dst_x == 0 && dst_y == 0) || !use_transfer_pitch); + + ID3D11DeviceContext1* const ctx = D3D11Device::GetD3DContext(); + src11->CommitClear(ctx); + + D3D11Device::GetStatistics().num_downloads++; + + if (IsMapped()) + Unmap(); + + // depth textures need to copy the whole thing.. + const u32 subresource = D3D11CalcSubresource(src_level, src_layer, src11->GetLevels()); + if (GPUTexture::IsDepthFormat(src11->GetFormat())) + { + ctx->CopySubresourceRegion(m_texture.Get(), 0, 0, 0, 0, src11->GetD3DTexture(), subresource, nullptr); + } + else + { + const CD3D11_BOX sbox(src_x, src_y, 0, src_x + width, src_y + height, 1); + ctx->CopySubresourceRegion(m_texture.Get(), 0, dst_x, dst_y, 0, src11->GetD3DTexture(), subresource, &sbox); + } + + m_needs_flush = true; +} + +bool D3D11DownloadTexture::Map(u32 x, u32 y, u32 width, u32 height) +{ + if (IsMapped()) + return true; + + D3D11_MAPPED_SUBRESOURCE sr; + HRESULT hr = D3D11Device::GetD3DContext()->Map(m_texture.Get(), 0, D3D11_MAP_READ, 0, &sr); + if (FAILED(hr)) + { + Log_ErrorFmt("Map() failed: {:08X}", hr); + return false; + } + + m_map_pointer = static_cast<u8*>(sr.pData); + m_current_pitch = sr.RowPitch; + return true; +} + +void D3D11DownloadTexture::Unmap() +{ + if (!IsMapped()) + return; + + D3D11Device::GetD3DContext()->Unmap(m_texture.Get(), 0); + m_map_pointer = nullptr; +} + +void D3D11DownloadTexture::Flush() +{ + if (!m_needs_flush) + return; + + if (IsMapped()) + Unmap(); + + // Handled when mapped. +} + +void D3D11DownloadTexture::SetDebugName(std::string_view name) +{ + if (name.empty()) + return; + + SetD3DDebugObjectName(m_texture.Get(), name); +} + +std::unique_ptr<GPUDownloadTexture> D3D11Device::CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format) +{ + return D3D11DownloadTexture::Create(width, height, format); +} + +std::unique_ptr<GPUDownloadTexture> D3D11Device::CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format, + void* memory, size_t memory_size, + u32 memory_stride) +{ + Log_ErrorPrint("D3D11 cannot import memory for download textures"); + return {}; +} diff --git a/src/util/d3d11_texture.h b/src/util/d3d11_texture.h index 5175af611..e7ae14118 100644 --- a/src/util/d3d11_texture.h +++ b/src/util/d3d11_texture.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com> +// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com> // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #pragma once @@ -119,3 +119,26 @@ private: D3D11StreamBuffer m_buffer; Microsoft::WRL::ComPtr<ID3D11ShaderResourceView> m_srv; }; + +class D3D11DownloadTexture final : public GPUDownloadTexture +{ +public: + ~D3D11DownloadTexture() override; + + static std::unique_ptr<D3D11DownloadTexture> Create(u32 width, u32 height, GPUTexture::Format format); + + void CopyFromTexture(u32 dst_x, u32 dst_y, GPUTexture* src, u32 src_x, u32 src_y, u32 width, u32 height, + u32 src_layer, u32 src_level, bool use_transfer_pitch) override; + + bool Map(u32 x, u32 y, u32 width, u32 height) override; + void Unmap() override; + + void Flush() override; + + void SetDebugName(std::string_view name) override; + +private: + D3D11DownloadTexture(Microsoft::WRL::ComPtr<ID3D11Texture2D> tex, u32 width, u32 height, GPUTexture::Format format); + + Microsoft::WRL::ComPtr<ID3D11Texture2D> m_texture; +}; diff --git a/src/util/d3d12_device.cpp b/src/util/d3d12_device.cpp index b594cbd1f..0b75a8117 100644 --- a/src/util/d3d12_device.cpp +++ b/src/util/d3d12_device.cpp @@ -261,7 +261,6 @@ void D3D12Device::DestroyDevice() WaitForGPUIdle(); DestroyDeferredObjects(m_current_fence_value); - DestroyDownloadBuffer(); DestroySamplers(); DestroyTimestampQuery(); DestroyBuffers(); @@ -1195,6 +1194,7 @@ void D3D12Device::SetFeatures(FeatureMask disabled_features) m_features.texture_buffers_emulated_with_ssbo = false; m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS); m_features.partial_msaa_resolve = true; + m_features.memory_import = false; m_features.gpu_timing = true; m_features.shader_cache = true; m_features.pipeline_cache = true; diff --git a/src/util/d3d12_device.h b/src/util/d3d12_device.h index c338b13bb..92aa052c7 100644 --- a/src/util/d3d12_device.h +++ b/src/util/d3d12_device.h @@ -30,6 +30,7 @@ class D3D12Pipeline; class D3D12SwapChain; class D3D12Texture; class D3D12TextureBuffer; +class D3D12DownloadTexture; namespace D3D12MA { class Allocator; @@ -39,6 +40,7 @@ class D3D12Device final : public GPUDevice { public: friend D3D12Texture; + friend D3D12DownloadTexture; template<typename T> using ComPtr = Microsoft::WRL::ComPtr<T>; @@ -74,8 +76,11 @@ public: std::unique_ptr<GPUSampler> CreateSampler(const GPUSampler::Config& config) override; std::unique_ptr<GPUTextureBuffer> CreateTextureBuffer(GPUTextureBuffer::Format format, u32 size_in_elements) override; - bool DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, void* out_data, - u32 out_data_stride) override; + std::unique_ptr<GPUDownloadTexture> CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format) override; + std::unique_ptr<GPUDownloadTexture> CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format, + void* memory, size_t memory_size, + u32 memory_stride) override; + bool SupportsTextureFormat(GPUTexture::Format format) const override; void CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, GPUTexture* src, u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width, u32 height) override; @@ -244,9 +249,6 @@ private: bool IsRenderTargetBound(const GPUTexture* tex) const; - bool CheckDownloadBufferSize(u32 required_size); - void DestroyDownloadBuffer(); - /// Set dirty flags on everything to force re-bind at next draw time. void InvalidateCachedState(); void SetVertexBuffer(ID3D12GraphicsCommandList4* cmdlist); @@ -321,10 +323,6 @@ private: SamplerMap m_sampler_map; ComPtr<ID3D12PipelineLibrary> m_pipeline_library; - ComPtr<D3D12MA::Allocation> m_download_buffer_allocation; - ComPtr<ID3D12Resource> m_download_buffer; - u32 m_download_buffer_size = 0; - // Which bindings/state has to be updated before the next draw. u32 m_dirty_flags = ALL_DIRTY_STATE; diff --git a/src/util/d3d12_texture.cpp b/src/util/d3d12_texture.cpp index 0ccb59286..f85b153a4 100644 --- a/src/util/d3d12_texture.cpp +++ b/src/util/d3d12_texture.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com> +// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com> // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #include "d3d12_texture.h" @@ -664,112 +664,6 @@ void D3D12Texture::MakeReadyForSampling() TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); } -bool D3D12Device::DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, void* out_data, - u32 out_data_stride) -{ - D3D12Texture* T = static_cast<D3D12Texture*>(texture); - T->CommitClear(); - - const u32 pitch = Common::AlignUp(width * T->GetPixelSize(), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); - const u32 size = pitch * height; - const u32 subresource = 0; - if (!CheckDownloadBufferSize(size)) - { - Log_ErrorPrintf("Can't read back %ux%u", width, height); - return false; - } - - if (InRenderPass()) - EndRenderPass(); - - ID3D12GraphicsCommandList4* cmdlist = GetCommandList(); - - D3D12_TEXTURE_COPY_LOCATION srcloc; - srcloc.pResource = T->GetResource(); - srcloc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - srcloc.SubresourceIndex = subresource; - - D3D12_TEXTURE_COPY_LOCATION dstloc; - dstloc.pResource = m_download_buffer.Get(); - dstloc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; - dstloc.PlacedFootprint.Offset = 0; - dstloc.PlacedFootprint.Footprint.Format = T->GetDXGIFormat(); - dstloc.PlacedFootprint.Footprint.Width = width; - dstloc.PlacedFootprint.Footprint.Height = height; - dstloc.PlacedFootprint.Footprint.Depth = 1; - dstloc.PlacedFootprint.Footprint.RowPitch = pitch; - - const D3D12_RESOURCE_STATES old_layout = T->GetResourceState(); - if (old_layout != D3D12_RESOURCE_STATE_COPY_SOURCE) - T->TransitionSubresourceToState(cmdlist, subresource, old_layout, D3D12_RESOURCE_STATE_COPY_SOURCE); - - // TODO: Rules for depth buffers here? - const D3D12_BOX srcbox{static_cast<UINT>(x), static_cast<UINT>(y), 0u, - static_cast<UINT>(x + width), static_cast<UINT>(y + height), 1u}; - cmdlist->CopyTextureRegion(&dstloc, 0, 0, 0, &srcloc, &srcbox); - - if (old_layout != D3D12_RESOURCE_STATE_COPY_SOURCE) - T->TransitionSubresourceToState(cmdlist, subresource, D3D12_RESOURCE_STATE_COPY_SOURCE, old_layout); - - SubmitCommandList(true); - - u8* map_pointer; - const D3D12_RANGE read_range{0u, size}; - const HRESULT hr = m_download_buffer->Map(0, &read_range, reinterpret_cast<void**>(const_cast<u8**>(&map_pointer))); - if (FAILED(hr)) - { - Log_ErrorPrintf("Map() failed with HRESULT %08X", hr); - return false; - } - - StringUtil::StrideMemCpy(out_data, out_data_stride, map_pointer, pitch, width * T->GetPixelSize(), height); - m_download_buffer->Unmap(0, nullptr); - return true; -} - -bool D3D12Device::CheckDownloadBufferSize(u32 required_size) -{ - if (m_download_buffer_size >= required_size) - return true; - - DestroyDownloadBuffer(); - - D3D12MA::ALLOCATION_DESC allocation_desc = {}; - allocation_desc.HeapType = D3D12_HEAP_TYPE_READBACK; - - const D3D12_RESOURCE_DESC resource_desc = {D3D12_RESOURCE_DIMENSION_BUFFER, - 0, - required_size, - 1, - 1, - 1, - DXGI_FORMAT_UNKNOWN, - {1, 0}, - D3D12_TEXTURE_LAYOUT_ROW_MAJOR, - D3D12_RESOURCE_FLAG_NONE}; - - HRESULT hr = m_allocator->CreateResource(&allocation_desc, &resource_desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, - m_download_buffer_allocation.ReleaseAndGetAddressOf(), - IID_PPV_ARGS(m_download_buffer.ReleaseAndGetAddressOf())); - if (FAILED(hr)) - { - Log_ErrorPrintf("CreateResource() failed with HRESULT %08X", hr); - return false; - } - - return true; -} - -void D3D12Device::DestroyDownloadBuffer() -{ - if (!m_download_buffer) - return; - - m_download_buffer.Reset(); - m_download_buffer_allocation.Reset(); - m_download_buffer_size = 0; -} - D3D12Sampler::D3D12Sampler(D3D12DescriptorHandle descriptor) : m_descriptor(descriptor) { } @@ -934,3 +828,184 @@ std::unique_ptr<GPUTextureBuffer> D3D12Device::CreateTextureBuffer(GPUTextureBuf return tb; } + +D3D12DownloadTexture::D3D12DownloadTexture(u32 width, u32 height, GPUTexture::Format format, + ComPtr<D3D12MA::Allocation> allocation, ComPtr<ID3D12Resource> buffer, + size_t buffer_size) + : GPUDownloadTexture(width, height, format, false), m_allocation(std::move(allocation)), m_buffer(std::move(buffer)), + m_buffer_size(buffer_size) +{ +} + +D3D12DownloadTexture::~D3D12DownloadTexture() +{ + if (IsMapped()) + D3D12DownloadTexture::Unmap(); + + if (m_buffer) + D3D12Device::GetInstance().DeferResourceDestruction(m_allocation.Get(), m_buffer.Get()); +} + +std::unique_ptr<D3D12DownloadTexture> D3D12DownloadTexture::Create(u32 width, u32 height, GPUTexture::Format format) +{ + const u32 buffer_size = GetBufferSize(width, height, format, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + + D3D12MA::ALLOCATION_DESC allocation_desc = {}; + allocation_desc.HeapType = D3D12_HEAP_TYPE_READBACK; + + const D3D12_RESOURCE_DESC resource_desc = {D3D12_RESOURCE_DIMENSION_BUFFER, + 0, + buffer_size, + 1, + 1, + 1, + DXGI_FORMAT_UNKNOWN, + {1, 0}, + D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + D3D12_RESOURCE_FLAG_NONE}; + + ComPtr<D3D12MA::Allocation> allocation; + ComPtr<ID3D12Resource> buffer; + + HRESULT hr = D3D12Device::GetInstance().GetAllocator()->CreateResource( + &allocation_desc, &resource_desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, allocation.GetAddressOf(), + IID_PPV_ARGS(buffer.GetAddressOf())); + if (FAILED(hr)) + { + Log_ErrorFmt("CreateResource() failed with HRESULT {:08X}", hr); + return {}; + } + + return std::unique_ptr<D3D12DownloadTexture>( + new D3D12DownloadTexture(width, height, format, std::move(allocation), std::move(buffer), buffer_size)); +} + +void D3D12DownloadTexture::CopyFromTexture(u32 dst_x, u32 dst_y, GPUTexture* src, u32 src_x, u32 src_y, u32 width, + u32 height, u32 src_layer, u32 src_level, bool use_transfer_pitch) +{ + D3D12Texture* const src12 = static_cast<D3D12Texture*>(src); + D3D12Device& dev = D3D12Device::GetInstance(); + + DebugAssert(src12->GetFormat() == m_format); + DebugAssert(src_level < src12->GetLevels()); + DebugAssert((src_x + width) <= src12->GetMipWidth(src_level) && (src_y + height) <= src12->GetMipHeight(src_level)); + DebugAssert((dst_x + width) <= m_width && (dst_y + height) <= m_height); + DebugAssert((dst_x == 0 && dst_y == 0) || !use_transfer_pitch); + + u32 copy_offset, copy_size, copy_rows; + m_current_pitch = GetTransferPitch(use_transfer_pitch ? width : m_width, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + GetTransferSize(dst_x, dst_y, width, height, m_current_pitch, ©_offset, ©_size, ©_rows); + + dev.GetStatistics().num_downloads++; + if (dev.InRenderPass()) + dev.EndRenderPass(); + src12->CommitClear(); + + if (IsMapped()) + Unmap(); + + ID3D12GraphicsCommandList* cmdlist = dev.GetCommandList(); + GL_INS_FMT("ReadbackTexture: {{{},{}}} {}x{} => {{{},{}}}", src_x, src_y, width, height, dst_x, dst_y); + + D3D12_TEXTURE_COPY_LOCATION srcloc; + srcloc.pResource = src12->GetResource(); + srcloc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + srcloc.SubresourceIndex = src12->CalculateSubresource(src_layer, src_level); + + D3D12_TEXTURE_COPY_LOCATION dstloc; + dstloc.pResource = m_buffer.Get(); + dstloc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + dstloc.PlacedFootprint.Offset = copy_offset; + dstloc.PlacedFootprint.Footprint.Format = src12->GetDXGIFormat(); + dstloc.PlacedFootprint.Footprint.Width = width; + dstloc.PlacedFootprint.Footprint.Height = height; + dstloc.PlacedFootprint.Footprint.Depth = 1; + dstloc.PlacedFootprint.Footprint.RowPitch = m_current_pitch; + + const D3D12_RESOURCE_STATES old_layout = src12->GetResourceState(); + if (old_layout != D3D12_RESOURCE_STATE_COPY_SOURCE) + src12->TransitionSubresourceToState(cmdlist, src_level, old_layout, D3D12_RESOURCE_STATE_COPY_SOURCE); + + // TODO: Rules for depth buffers here? + const D3D12_BOX srcbox{static_cast<UINT>(src_x), static_cast<UINT>(src_y), 0u, + static_cast<UINT>(src_x + width), static_cast<UINT>(src_y + height), 1u}; + cmdlist->CopyTextureRegion(&dstloc, 0, 0, 0, &srcloc, &srcbox); + + if (old_layout != D3D12_RESOURCE_STATE_COPY_SOURCE) + src12->TransitionSubresourceToState(cmdlist, src_level, D3D12_RESOURCE_STATE_COPY_SOURCE, old_layout); + + m_copy_fence_value = dev.GetCurrentFenceValue(); + m_needs_flush = true; +} + +bool D3D12DownloadTexture::Map(u32 x, u32 y, u32 width, u32 height) +{ + if (IsMapped()) + return true; + + // Never populated? + if (!m_current_pitch) + return false; + + u32 copy_offset, copy_size, copy_rows; + GetTransferSize(x, y, width, height, m_current_pitch, ©_offset, ©_size, ©_rows); + + const D3D12_RANGE read_range{copy_offset, copy_offset + m_current_pitch * copy_rows}; + const HRESULT hr = m_buffer->Map(0, &read_range, reinterpret_cast<void**>(const_cast<u8**>(&m_map_pointer))); + if (FAILED(hr)) + { + Log_ErrorFmt("Map() failed with HRESULT {:08X}", hr); + return false; + } + + return true; +} + +void D3D12DownloadTexture::Unmap() +{ + if (!IsMapped()) + return; + + const D3D12_RANGE write_range = {}; + m_buffer->Unmap(0, &write_range); + m_map_pointer = nullptr; +} + +void D3D12DownloadTexture::Flush() +{ + if (!m_needs_flush) + return; + + m_needs_flush = false; + + D3D12Device& dev = D3D12Device::GetInstance(); + if (dev.GetCompletedFenceValue() >= m_copy_fence_value) + return; + + // Need to execute command buffer. + if (dev.GetCurrentFenceValue() == m_copy_fence_value) + dev.SubmitCommandList(true); + else + dev.WaitForFence(m_copy_fence_value); +} + +void D3D12DownloadTexture::SetDebugName(std::string_view name) +{ + if (name.empty()) + return; + + D3D12::SetObjectName(m_buffer.Get(), name); +} + +std::unique_ptr<GPUDownloadTexture> D3D12Device::CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format) +{ + return D3D12DownloadTexture::Create(width, height, format); +} + +std::unique_ptr<GPUDownloadTexture> D3D12Device::CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format, + void* memory, size_t memory_size, + u32 memory_stride) +{ + Log_ErrorPrint("D3D12 cannot import memory for download textures"); + return {}; +} diff --git a/src/util/d3d12_texture.h b/src/util/d3d12_texture.h index 277e22c73..4186f4ad3 100644 --- a/src/util/d3d12_texture.h +++ b/src/util/d3d12_texture.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com> +// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com> // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #pragma once @@ -146,3 +146,34 @@ private: D3D12StreamBuffer m_buffer; D3D12DescriptorHandle m_descriptor; }; + +class D3D12DownloadTexture final : public GPUDownloadTexture +{ +public: + template<typename T> + using ComPtr = Microsoft::WRL::ComPtr<T>; + + ~D3D12DownloadTexture() override; + + static std::unique_ptr<D3D12DownloadTexture> Create(u32 width, u32 height, GPUTexture::Format format); + + void CopyFromTexture(u32 dst_x, u32 dst_y, GPUTexture* src, u32 src_x, u32 src_y, u32 width, u32 height, + u32 src_layer, u32 src_level, bool use_transfer_pitch) override; + + bool Map(u32 x, u32 y, u32 width, u32 height) override; + void Unmap() override; + + void Flush() override; + + void SetDebugName(std::string_view name) override; + +private: + D3D12DownloadTexture(u32 width, u32 height, GPUTexture::Format format, ComPtr<D3D12MA::Allocation> allocation, + ComPtr<ID3D12Resource> buffer, size_t buffer_size); + + ComPtr<D3D12MA::Allocation> m_allocation; + ComPtr<ID3D12Resource> m_buffer; + + u64 m_copy_fence_value = 0; + size_t m_buffer_size = 0; +}; diff --git a/src/util/gpu_device.h b/src/util/gpu_device.h index 33899aa6e..4c74e5746 100644 --- a/src/util/gpu_device.h +++ b/src/util/gpu_device.h @@ -439,6 +439,7 @@ public: FEATURE_MASK_TEXTURE_BUFFERS = (1 << 2), FEATURE_MASK_GEOMETRY_SHADERS = (1 << 3), FEATURE_MASK_TEXTURE_COPY_TO_SELF = (1 << 4), + FEATURE_MASK_MEMORY_IMPORT = (1 << 5), }; struct Features @@ -452,6 +453,7 @@ public: bool texture_buffers_emulated_with_ssbo : 1; bool geometry_shaders : 1; bool partial_msaa_resolve : 1; + bool memory_import : 1; bool gpu_timing : 1; bool shader_cache : 1; bool pipeline_cache : 1; @@ -583,8 +585,12 @@ public: void RecycleTexture(std::unique_ptr<GPUTexture> texture); void PurgeTexturePool(); - virtual bool DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, void* out_data, - u32 out_data_stride) = 0; + virtual std::unique_ptr<GPUDownloadTexture> CreateDownloadTexture(u32 width, u32 height, + GPUTexture::Format format) = 0; + virtual std::unique_ptr<GPUDownloadTexture> CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format, + void* memory, size_t memory_size, + u32 memory_stride) = 0; + virtual void CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, GPUTexture* src, u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width, u32 height) = 0; virtual void ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, diff --git a/src/util/gpu_texture.cpp b/src/util/gpu_texture.cpp index afb582548..e6786ecd1 100644 --- a/src/util/gpu_texture.cpp +++ b/src/util/gpu_texture.cpp @@ -1,9 +1,10 @@ -// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com> +// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com> // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #include "gpu_texture.h" #include "gpu_device.h" +#include "common/align.h" #include "common/bitutils.h" #include "common/log.h" #include "common/string_util.h" @@ -52,6 +53,68 @@ const char* GPUTexture::GetFormatName(Format format) return format_names[static_cast<u8>(format)]; } +u32 GPUTexture::GetCompressedBytesPerBlock() const +{ + return GetCompressedBytesPerBlock(m_format); +} + +u32 GPUTexture::GetCompressedBytesPerBlock(Format format) +{ + // TODO: Implement me + return GetPixelSize(format); +} + +u32 GPUTexture::GetCompressedBlockSize() const +{ + return GetCompressedBlockSize(m_format); +} + +u32 GPUTexture::GetCompressedBlockSize(Format format) +{ + // TODO: Implement me + /*if (format >= Format::BC1 && format <= Format::BC7) + return 4; + else*/ + return 1; +} + +u32 GPUTexture::CalcUploadPitch(Format format, u32 width) +{ + /* + if (format >= Format::BC1 && format <= Format::BC7) + width = Common::AlignUpPow2(width, 4) / 4; + */ + return width * GetCompressedBytesPerBlock(format); +} + +u32 GPUTexture::CalcUploadPitch(u32 width) const +{ + return CalcUploadPitch(m_format, width); +} + +u32 GPUTexture::CalcUploadRowLengthFromPitch(u32 pitch) const +{ + return CalcUploadRowLengthFromPitch(m_format, pitch); +} + +u32 GPUTexture::CalcUploadRowLengthFromPitch(Format format, u32 pitch) +{ + const u32 block_size = GetCompressedBlockSize(format); + const u32 bytes_per_block = GetCompressedBytesPerBlock(format); + return ((pitch + (bytes_per_block - 1)) / bytes_per_block) * block_size; +} + +u32 GPUTexture::CalcUploadSize(u32 height, u32 pitch) const +{ + return CalcUploadSize(m_format, height, pitch); +} + +u32 GPUTexture::CalcUploadSize(Format format, u32 height, u32 pitch) +{ + const u32 block_size = GetCompressedBlockSize(format); + return pitch * ((static_cast<u32>(height) + (block_size - 1)) / block_size); +} + std::array<float, 4> GPUTexture::GetUNormClearColor() const { return GPUDevice::RGBA8ToFloat(m_clear_value.color); @@ -117,6 +180,12 @@ bool GPUTexture::IsDepthStencilFormat(Format format) return false; } +bool GPUTexture::IsCompressedFormat(Format format) +{ + // TODO: Implement me + return false; +} + bool GPUTexture::ValidateConfig(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, Format format) { if (width > MAX_WIDTH || height > MAX_HEIGHT || layers > MAX_LAYERS || levels > MAX_LEVELS || samples > MAX_SAMPLES) @@ -161,7 +230,7 @@ bool GPUTexture::ValidateConfig(u32 width, u32 height, u32 layers, u32 levels, u return true; } -bool GPUTexture::ConvertTextureDataToRGBA8(u32 width, u32 height, std::vector<u32>& texture_data, +bool GPUTexture::ConvertTextureDataToRGBA8(u32 width, u32 height, std::vector<u8>& texture_data, u32& texture_data_stride, GPUTexture::Format format) { switch (format) @@ -170,9 +239,15 @@ bool GPUTexture::ConvertTextureDataToRGBA8(u32 width, u32 height, std::vector<u3 { for (u32 y = 0; y < height; y++) { - u32* pixels = reinterpret_cast<u32*>(reinterpret_cast<u8*>(texture_data.data()) + (y * texture_data_stride)); + u8* pixels = texture_data.data() + (y * texture_data_stride); for (u32 x = 0; x < width; x++) - pixels[x] = (pixels[x] & 0xFF00FF00) | ((pixels[x] & 0xFF) << 16) | ((pixels[x] >> 16) & 0xFF); + { + u32 pixel; + std::memcpy(&pixel, pixels, sizeof(pixel)); + pixel = (pixel & 0xFF00FF00) | ((pixel & 0xFF) << 16) | ((pixel >> 16) & 0xFF); + std::memcpy(pixels, &pixel, sizeof(pixel)); + pixels += sizeof(pixel); + } } return true; @@ -183,12 +258,12 @@ bool GPUTexture::ConvertTextureDataToRGBA8(u32 width, u32 height, std::vector<u3 case Format::RGB565: { - std::vector<u32> temp(width * height); + std::vector<u8> temp(width * height * sizeof(u32)); for (u32 y = 0; y < height; y++) { - const u8* pixels_in = reinterpret_cast<u8*>(texture_data.data()) + (y * texture_data_stride); - u32* pixels_out = &temp[y * width]; + const u8* pixels_in = texture_data.data() + (y * texture_data_stride); + u8* pixels_out = &temp[y * width * sizeof(u32)]; for (u32 x = 0; x < width; x++) { @@ -199,8 +274,10 @@ bool GPUTexture::ConvertTextureDataToRGBA8(u32 width, u32 height, std::vector<u3 const u8 r5 = Truncate8(pixel_in >> 11); const u8 g6 = Truncate8((pixel_in >> 5) & 0x3F); const u8 b5 = Truncate8(pixel_in & 0x1F); - *(pixels_out++) = ZeroExtend32((r5 << 3) | (r5 & 7)) | (ZeroExtend32((g6 << 2) | (g6 & 3)) << 8) | + const u32 rgba8 = ZeroExtend32((r5 << 3) | (r5 & 7)) | (ZeroExtend32((g6 << 2) | (g6 & 3)) << 8) | (ZeroExtend32((b5 << 3) | (b5 & 7)) << 16) | (0xFF000000u); + std::memcpy(pixels_out, &rgba8, sizeof(u32)); + pixels_out += sizeof(u32); } } @@ -211,12 +288,12 @@ bool GPUTexture::ConvertTextureDataToRGBA8(u32 width, u32 height, std::vector<u3 case Format::RGBA5551: { - std::vector<u32> temp(width * height); + std::vector<u8> temp(width * height * sizeof(u32)); for (u32 y = 0; y < height; y++) { - const u8* pixels_in = reinterpret_cast<u8*>(texture_data.data()) + (y * texture_data_stride); - u32* pixels_out = &temp[y * width]; + const u8* pixels_in = texture_data.data() + (y * texture_data_stride); + u8* pixels_out = &temp[y * width]; for (u32 x = 0; x < width; x++) { @@ -228,8 +305,10 @@ bool GPUTexture::ConvertTextureDataToRGBA8(u32 width, u32 height, std::vector<u3 const u8 r5 = Truncate8((pixel_in >> 10) & 0x1F); const u8 g6 = Truncate8((pixel_in >> 5) & 0x1F); const u8 b5 = Truncate8(pixel_in & 0x1F); - *(pixels_out++) = ZeroExtend32((r5 << 3) | (r5 & 7)) | (ZeroExtend32((g6 << 3) | (g6 & 7)) << 8) | + const u32 rgba8 = ZeroExtend32((r5 << 3) | (r5 & 7)) | (ZeroExtend32((g6 << 3) | (g6 & 7)) << 8) | (ZeroExtend32((b5 << 3) | (b5 & 7)) << 16) | (a1 ? 0xFF000000u : 0u); + std::memcpy(pixels_out, &rgba8, sizeof(u32)); + pixels_out += sizeof(u32); } } @@ -244,13 +323,13 @@ bool GPUTexture::ConvertTextureDataToRGBA8(u32 width, u32 height, std::vector<u3 } } -void GPUTexture::FlipTextureDataRGBA8(u32 width, u32 height, std::vector<u32>& texture_data, u32 texture_data_stride) +void GPUTexture::FlipTextureDataRGBA8(u32 width, u32 height, std::vector<u8>& texture_data, u32 texture_data_stride) { - std::vector<u32> temp(width); + std::vector<u8> temp(width * sizeof(u32)); for (u32 flip_row = 0; flip_row < (height / 2); flip_row++) { - u32* top_ptr = &texture_data[flip_row * width]; - u32* bottom_ptr = &texture_data[((height - 1) - flip_row) * width]; + u8* top_ptr = &texture_data[flip_row * texture_data_stride]; + u8* bottom_ptr = &texture_data[((height - 1) - flip_row) * texture_data_stride]; std::memcpy(temp.data(), top_ptr, texture_data_stride); std::memcpy(top_ptr, bottom_ptr, texture_data_stride); std::memcpy(bottom_ptr, temp.data(), texture_data_stride); @@ -260,3 +339,56 @@ void GPUTexture::FlipTextureDataRGBA8(u32 width, u32 height, std::vector<u32>& t void GPUTexture::MakeReadyForSampling() { } + +GPUDownloadTexture::GPUDownloadTexture(u32 width, u32 height, GPUTexture::Format format, bool is_imported) + : m_width(width), m_height(height), m_format(format), m_is_imported(is_imported) +{ +} + +GPUDownloadTexture::~GPUDownloadTexture() = default; + +u32 GPUDownloadTexture::GetBufferSize(u32 width, u32 height, GPUTexture::Format format, u32 pitch_align /* = 1 */) +{ + DebugAssert(std::has_single_bit(pitch_align)); + const u32 bytes_per_pixel = GPUTexture::GetPixelSize(format); + const u32 pitch = Common::AlignUpPow2(width * bytes_per_pixel, pitch_align); + return (pitch * height); +} + +u32 GPUDownloadTexture::GetTransferPitch(u32 width, u32 pitch_align) const +{ + DebugAssert(std::has_single_bit(pitch_align)); + const u32 bytes_per_pixel = GPUTexture::GetPixelSize(m_format); + return Common::AlignUpPow2(width * bytes_per_pixel, pitch_align); +} + +void GPUDownloadTexture::GetTransferSize(u32 x, u32 y, u32 width, u32 height, u32 pitch, u32* copy_offset, + u32* copy_size, u32* copy_rows) const +{ + const u32 bytes_per_pixel = GPUTexture::GetPixelSize(m_format); + *copy_offset = (y * pitch) + (x * bytes_per_pixel); + *copy_size = width * bytes_per_pixel; + *copy_rows = height; +} + +bool GPUDownloadTexture::ReadTexels(u32 x, u32 y, u32 width, u32 height, void* out_ptr, u32 out_stride) +{ + if (m_needs_flush) + Flush(); + + // if we're imported, and this is the same buffer, bail out + if (m_map_pointer == out_ptr) + { + // but stride should match + DebugAssert(x == 0 && y == 0 && width <= m_width && height <= m_height && out_stride == m_current_pitch); + return true; + } + + if (!Map(x, y, width, height)) + return false; + + u32 copy_offset, copy_size, copy_rows; + GetTransferSize(x, y, width, height, m_current_pitch, ©_offset, ©_size, ©_rows); + StringUtil::StrideMemCpy(out_ptr, out_stride, m_map_pointer + copy_offset, m_current_pitch, copy_size, copy_rows); + return true; +} diff --git a/src/util/gpu_texture.h b/src/util/gpu_texture.h index 9dec6e654..d0369b946 100644 --- a/src/util/gpu_texture.h +++ b/src/util/gpu_texture.h @@ -80,11 +80,18 @@ public: static u32 GetPixelSize(GPUTexture::Format format); static bool IsDepthFormat(GPUTexture::Format format); static bool IsDepthStencilFormat(GPUTexture::Format format); + static bool IsCompressedFormat(Format format); + static u32 GetCompressedBytesPerBlock(Format format); + static u32 GetCompressedBlockSize(Format format); + static u32 CalcUploadPitch(Format format, u32 width); + static u32 CalcUploadRowLengthFromPitch(Format format, u32 pitch); + static u32 CalcUploadSize(Format format, u32 height, u32 pitch); + static bool ValidateConfig(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, Format format); - static bool ConvertTextureDataToRGBA8(u32 width, u32 height, std::vector<u32>& texture_data, u32& texture_data_stride, + static bool ConvertTextureDataToRGBA8(u32 width, u32 height, std::vector<u8>& texture_data, u32& texture_data_stride, GPUTexture::Format format); - static void FlipTextureDataRGBA8(u32 width, u32 height, std::vector<u32>& texture_data, u32 texture_data_stride); + static void FlipTextureDataRGBA8(u32 width, u32 height, std::vector<u8>& texture_data, u32 texture_data_stride); ALWAYS_INLINE u32 GetWidth() const { return m_width; } ALWAYS_INLINE u32 GetHeight() const { return m_height; } @@ -133,6 +140,12 @@ public: size_t GetVRAMUsage() const; + u32 GetCompressedBytesPerBlock() const; + u32 GetCompressedBlockSize() const; + u32 CalcUploadPitch(u32 width) const; + u32 CalcUploadRowLengthFromPitch(u32 pitch) const; + u32 CalcUploadSize(u32 height, u32 pitch) const; + GPUTexture& operator=(const GPUTexture&) = delete; virtual bool Update(u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch, u32 layer = 0, @@ -160,3 +173,71 @@ protected: ClearValue m_clear_value = {}; }; + +class GPUDownloadTexture +{ +public: + GPUDownloadTexture(u32 width, u32 height, GPUTexture::Format format, bool is_imported); + virtual ~GPUDownloadTexture(); + + /// Basically, this has dimensions only because of DX11. + ALWAYS_INLINE u32 GetWidth() const { return m_width; } + ALWAYS_INLINE u32 GetHeight() const { return m_height; } + ALWAYS_INLINE GPUTexture::Format GetFormat() const { return m_format; } + ALWAYS_INLINE bool NeedsFlush() const { return m_needs_flush; } + ALWAYS_INLINE bool IsMapped() const { return (m_map_pointer != nullptr); } + ALWAYS_INLINE bool IsImported() const { return m_is_imported; } + ALWAYS_INLINE const u8* GetMapPointer() const { return m_map_pointer; } + ALWAYS_INLINE u32 GetMapPitch() const { return m_current_pitch; } + + /// Calculates the pitch of a transfer. + u32 GetTransferPitch(u32 width, u32 pitch_align) const; + + /// Calculates the size of the data you should transfer. + void GetTransferSize(u32 x, u32 y, u32 width, u32 height, u32 pitch, u32* copy_offset, u32* copy_size, + u32* copy_rows) const; + + /// Queues a copy from the specified texture to this buffer. + /// Does not complete immediately, you should flush before accessing the buffer. + /// use_transfer_pitch should be true if there's only a single texture being copied to this buffer before + /// it will be used. This allows the image to be packed tighter together, and buffer reuse. + virtual void CopyFromTexture(u32 dst_x, u32 dst_y, GPUTexture* src, u32 src_x, u32 src_y, u32 width, u32 height, + u32 src_layer, u32 src_level, bool use_transfer_pitch = true) = 0; + + /// Maps the texture into the CPU address space, enabling it to read the contents. + /// The Map call may not perform synchronization. If the contents of the staging texture + /// has been updated by a CopyFromTexture() call, you must call Flush() first. + /// If persistent mapping is supported in the backend, this may be a no-op. + virtual bool Map(u32 x, u32 y, u32 width, u32 height) = 0; + + /// Unmaps the CPU-readable copy of the texture. May be a no-op on backends which + /// support persistent-mapped buffers. + virtual void Unmap() = 0; + + /// Flushes pending writes from the CPU to the GPU, and reads from the GPU to the CPU. + /// This may cause a command buffer submit depending on if one has occurred between the last + /// call to CopyFromTexture() and the Flush() call. + virtual void Flush() = 0; + + /// Sets object name that will be displayed in graphics debuggers. + virtual void SetDebugName(std::string_view name) = 0; + + /// Reads the specified rectangle from the staging texture to out_ptr, with the specified stride + /// (length in bytes of each row). CopyFromTexture() must be called first. The contents of any + /// texels outside of the rectangle used for CopyFromTexture is undefined. + bool ReadTexels(u32 x, u32 y, u32 width, u32 height, void* out_ptr, u32 out_stride); + + /// Returns what the size of the specified texture would be, in bytes. + static u32 GetBufferSize(u32 width, u32 height, GPUTexture::Format format, u32 pitch_align = 1); + +protected: + u32 m_width; + u32 m_height; + GPUTexture::Format m_format; + + const u8* m_map_pointer = nullptr; + u32 m_current_pitch = 0; + + bool m_is_imported = false; + bool m_needs_flush = false; +}; diff --git a/src/util/metal_device.h b/src/util/metal_device.h index 5df1242a2..d4f35b182 100644 --- a/src/util/metal_device.h +++ b/src/util/metal_device.h @@ -137,6 +137,34 @@ private: u8 m_map_level = 0; }; +class MetalDownloadTexture final : public GPUDownloadTexture +{ +public: + ~MetalDownloadTexture() override; + + static std::unique_ptr<MetalDownloadTexture> Create(u32 width, u32 height, GPUTexture::Format format, void* memory, + size_t memory_size, u32 memory_stride); + + void CopyFromTexture(u32 dst_x, u32 dst_y, GPUTexture* src, u32 src_x, u32 src_y, u32 width, u32 height, + u32 src_layer, u32 src_level, bool use_transfer_pitch) override; + + bool Map(u32 x, u32 y, u32 width, u32 height) override; + void Unmap() override; + + void Flush() override; + + void SetDebugName(std::string_view name) override; + +private: + MetalDownloadTexture(u32 width, u32 height, GPUTexture::Format format, u8* import_buffer, size_t buffer_offset, + id<MTLBuffer> buffer, const u8* map_ptr, u32 map_pitch); + + size_t m_buffer_offset = 0; + id<MTLBuffer> m_buffer = nil; + + u64 m_copy_fence_counter = 0; +}; + class MetalTextureBuffer final : public GPUTextureBuffer { public: @@ -160,6 +188,7 @@ private: class MetalDevice final : public GPUDevice { friend MetalTexture; + friend MetalDownloadTexture; public: ALWAYS_INLINE static MetalDevice& GetInstance() { return *static_cast<MetalDevice*>(g_gpu_device.get()); } @@ -188,8 +217,11 @@ public: std::unique_ptr<GPUSampler> CreateSampler(const GPUSampler::Config& config) override; std::unique_ptr<GPUTextureBuffer> CreateTextureBuffer(GPUTextureBuffer::Format format, u32 size_in_elements) override; - bool DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, void* out_data, - u32 out_data_stride) override; + std::unique_ptr<GPUDownloadTexture> CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format) override; + std::unique_ptr<GPUDownloadTexture> CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format, + void* memory, size_t memory_size, + u32 memory_stride) override; + bool SupportsTextureFormat(GPUTexture::Format format) const override; void CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, GPUTexture* src, u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width, u32 height) override; @@ -302,8 +334,6 @@ private: void SetViewportInRenderEncoder(); void SetScissorInRenderEncoder(); - bool CheckDownloadBufferSize(u32 required_size); - bool CreateLayer(); void DestroyLayer(); void RenderBlankFrame(); @@ -327,9 +357,6 @@ private: DepthStateMap m_depth_states; - id<MTLBuffer> m_download_buffer = nil; - u32 m_download_buffer_size = 0; - MetalStreamBuffer m_vertex_buffer; MetalStreamBuffer m_index_buffer; MetalStreamBuffer m_uniform_buffer; diff --git a/src/util/metal_device.mm b/src/util/metal_device.mm index 3757377b7..e3bd58b13 100644 --- a/src/util/metal_device.mm +++ b/src/util/metal_device.mm @@ -234,6 +234,7 @@ void MetalDevice::SetFeatures(FeatureMask disabled_features) m_features.texture_buffers_emulated_with_ssbo = true; m_features.geometry_shaders = false; m_features.partial_msaa_resolve = false; + m_features.memory_import = true; m_features.shader_cache = true; m_features.pipeline_cache = false; m_features.prefer_unused_textures = true; @@ -499,13 +500,6 @@ bool MetalDevice::CreateBuffers() void MetalDevice::DestroyBuffers() { - if (m_download_buffer != nil) - { - [m_download_buffer release]; - m_download_buffer = nil; - m_download_buffer_size = 0; - } - m_texture_upload_buffer.Destroy(); m_uniform_buffer.Destroy(); m_vertex_buffer.Destroy(); @@ -759,17 +753,17 @@ std::unique_ptr<GPUPipeline> MetalDevice::CreatePipeline(const GPUPipeline::Grap static constexpr u32 MAX_COMPONENTS = 4; static constexpr const MTLVertexFormat format_mapping[static_cast<u8>(GPUPipeline::VertexAttribute::Type::MaxCount)][MAX_COMPONENTS] = { - {MTLVertexFormatFloat, MTLVertexFormatFloat2, MTLVertexFormatFloat3, MTLVertexFormatFloat4}, // Float - {MTLVertexFormatUChar, MTLVertexFormatUChar2, MTLVertexFormatUChar3, MTLVertexFormatUChar4}, // UInt8 - {MTLVertexFormatChar, MTLVertexFormatChar2, MTLVertexFormatChar3, MTLVertexFormatChar4}, // SInt8 + {MTLVertexFormatFloat, MTLVertexFormatFloat2, MTLVertexFormatFloat3, MTLVertexFormatFloat4}, // Float + {MTLVertexFormatUChar, MTLVertexFormatUChar2, MTLVertexFormatUChar3, MTLVertexFormatUChar4}, // UInt8 + {MTLVertexFormatChar, MTLVertexFormatChar2, MTLVertexFormatChar3, MTLVertexFormatChar4}, // SInt8 {MTLVertexFormatUCharNormalized, MTLVertexFormatUChar2Normalized, MTLVertexFormatUChar3Normalized, MTLVertexFormatUChar4Normalized}, // UNorm8 {MTLVertexFormatUShort, MTLVertexFormatUShort2, MTLVertexFormatUShort3, MTLVertexFormatUShort4}, // UInt16 {MTLVertexFormatShort, MTLVertexFormatShort2, MTLVertexFormatShort3, MTLVertexFormatShort4}, // SInt16 {MTLVertexFormatUShortNormalized, MTLVertexFormatUShort2Normalized, MTLVertexFormatUShort3Normalized, - MTLVertexFormatUShort4Normalized}, // UNorm16 - {MTLVertexFormatUInt, MTLVertexFormatUInt2, MTLVertexFormatUInt3, MTLVertexFormatUInt4}, // UInt32 - {MTLVertexFormatInt, MTLVertexFormatInt2, MTLVertexFormatInt3, MTLVertexFormatInt4}, // SInt32 + MTLVertexFormatUShort4Normalized}, // UNorm16 + {MTLVertexFormatUInt, MTLVertexFormatUInt2, MTLVertexFormatUInt3, MTLVertexFormatUInt4}, // UInt32 + {MTLVertexFormatInt, MTLVertexFormatInt2, MTLVertexFormatInt3, MTLVertexFormatInt4}, // SInt32 }; static constexpr std::array<MTLCullMode, static_cast<u32>(GPUPipeline::CullMode::MaxCount)> cull_mapping = {{ @@ -1132,6 +1126,166 @@ std::unique_ptr<GPUTexture> MetalDevice::CreateTexture(u32 width, u32 height, u3 } } +MetalDownloadTexture::MetalDownloadTexture(u32 width, u32 height, GPUTexture::Format format, u8* import_buffer, + size_t buffer_offset, id<MTLBuffer> buffer, const u8* map_ptr, u32 map_pitch) + : GPUDownloadTexture(width, height, format, (import_buffer != nullptr)), m_buffer_offset(buffer_offset), + m_buffer(buffer) +{ + m_map_pointer = map_ptr; + m_current_pitch = map_pitch; +} + +MetalDownloadTexture::~MetalDownloadTexture() +{ + [m_buffer release]; +} + +std::unique_ptr<MetalDownloadTexture> MetalDownloadTexture::Create(u32 width, u32 height, GPUTexture::Format format, + void* memory, size_t memory_size, u32 memory_stride) +{ + @autoreleasepool + { + MetalDevice& dev = MetalDevice::GetInstance(); + id<MTLBuffer> buffer = nil; + size_t memory_offset = 0; + const u8* map_ptr = nullptr; + u32 map_pitch = 0; + u32 buffer_size = 0; + + constexpr MTLResourceOptions options = MTLResourceStorageModeShared | MTLResourceCPUCacheModeDefaultCache; + + // not importing memory? + if (!memory) + { + map_pitch = Common::AlignUpPow2(GPUTexture::CalcUploadPitch(format, width), TEXTURE_UPLOAD_PITCH_ALIGNMENT); + buffer_size = height * map_pitch; + buffer = [[dev.m_device newBufferWithLength:buffer_size options:options] retain]; + if (buffer == nil) + { + Log_ErrorFmt("Failed to create {} byte buffer", buffer_size); + return {}; + } + + map_ptr = static_cast<u8*>([buffer contents]); + } + else + { + map_pitch = memory_stride; + buffer_size = height * map_pitch; + Assert(buffer_size <= memory_size); + + // Importing memory, we need to page align the buffer. + void* page_aligned_memory = + reinterpret_cast<void*>(Common::AlignDownPow2(reinterpret_cast<uintptr_t>(memory), HOST_PAGE_SIZE)); + const size_t page_offset = static_cast<size_t>(static_cast<u8*>(memory) - static_cast<u8*>(page_aligned_memory)); + const size_t page_aligned_size = Common::AlignUpPow2(page_offset + memory_size, HOST_PAGE_SIZE); + Log_DevFmt("Trying to import {} bytes of memory at {} for download texture", page_aligned_memory, + page_aligned_size); + + buffer = [[dev.m_device newBufferWithBytesNoCopy:page_aligned_memory + length:page_aligned_size + options:options + deallocator:nil] retain]; + if (buffer == nil) + { + Log_ErrorFmt("Failed to import {} byte buffer", page_aligned_size); + return {}; + } + + map_ptr = static_cast<u8*>(memory); + } + + return std::unique_ptr<MetalDownloadTexture>(new MetalDownloadTexture( + width, height, format, static_cast<u8*>(memory), memory_offset, buffer, map_ptr, map_pitch)); + } +} + +void MetalDownloadTexture::CopyFromTexture(u32 dst_x, u32 dst_y, GPUTexture* src, u32 src_x, u32 src_y, u32 width, + u32 height, u32 src_layer, u32 src_level, bool use_transfer_pitch) +{ + MetalTexture* const mtlTex = static_cast<MetalTexture*>(src); + MetalDevice& dev = MetalDevice::GetInstance(); + + DebugAssert(mtlTex->GetFormat() == m_format); + DebugAssert(src_level < mtlTex->GetLevels()); + DebugAssert((src_x + width) <= mtlTex->GetMipWidth(src_level) && (src_y + height) <= mtlTex->GetMipHeight(src_level)); + DebugAssert((dst_x + width) <= m_width && (dst_y + height) <= m_height); + DebugAssert((dst_x == 0 && dst_y == 0) || !use_transfer_pitch); + DebugAssert(!m_is_imported || !use_transfer_pitch); + + u32 copy_offset, copy_size, copy_rows; + if (!m_is_imported) + m_current_pitch = GetTransferPitch(use_transfer_pitch ? width : m_width, TEXTURE_UPLOAD_PITCH_ALIGNMENT); + GetTransferSize(dst_x, dst_y, width, height, m_current_pitch, ©_offset, ©_size, ©_rows); + + dev.GetStatistics().num_downloads++; + + dev.CommitClear(mtlTex); + + id<MTLBlitCommandEncoder> encoder = dev.GetBlitEncoder(true); + [encoder copyFromTexture:mtlTex->GetMTLTexture() + sourceSlice:src_layer + sourceLevel:src_level + sourceOrigin:MTLOriginMake(src_x, src_y, 0) + sourceSize:MTLSizeMake(width, height, 1) + toBuffer:m_buffer + destinationOffset:m_buffer_offset + copy_offset + destinationBytesPerRow:m_current_pitch + destinationBytesPerImage:0]; + + m_copy_fence_counter = dev.m_current_fence_counter; + m_needs_flush = true; +} + +bool MetalDownloadTexture::Map(u32 x, u32 y, u32 width, u32 height) +{ + // Always mapped. + return true; +} + +void MetalDownloadTexture::Unmap() +{ + // Always mapped. +} + +void MetalDownloadTexture::Flush() +{ + if (!m_needs_flush) + return; + + m_needs_flush = false; + + MetalDevice& dev = MetalDevice::GetInstance(); + if (dev.m_completed_fence_counter >= m_copy_fence_counter) + return; + + // Need to execute command buffer. + if (dev.GetCurrentFenceCounter() == m_copy_fence_counter) + dev.SubmitCommandBuffer(true); + else + dev.WaitForFenceCounter(m_copy_fence_counter); +} + +void MetalDownloadTexture::SetDebugName(std::string_view name) +{ + @autoreleasepool + { + [m_buffer setLabel:StringViewToNSString(name)]; + } +} + +std::unique_ptr<GPUDownloadTexture> MetalDevice::CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format) +{ + return MetalDownloadTexture::Create(width, height, format, nullptr, 0, 0); +} + +std::unique_ptr<GPUDownloadTexture> MetalDevice::CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format, + void* memory, size_t memory_size, + u32 memory_stride) +{ + return MetalDownloadTexture::Create(width, height, format, memory, memory_size, memory_stride); +} + MetalSampler::MetalSampler(id<MTLSamplerState> ss) : m_ss(ss) { } @@ -1218,71 +1372,6 @@ std::unique_ptr<GPUSampler> MetalDevice::CreateSampler(const GPUSampler::Config& } } -bool MetalDevice::DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, void* out_data, - u32 out_data_stride) -{ - constexpr u32 src_layer = 0; - constexpr u32 src_level = 0; - - const u32 copy_size = width * texture->GetPixelSize(); - const u32 pitch = Common::AlignUpPow2(copy_size, TEXTURE_UPLOAD_PITCH_ALIGNMENT); - const u32 required_size = pitch * height; - if (!CheckDownloadBufferSize(required_size)) - return false; - - MetalTexture* T = static_cast<MetalTexture*>(texture); - CommitClear(T); - - s_stats.num_downloads++; - - @autoreleasepool - { - id<MTLBlitCommandEncoder> encoder = GetBlitEncoder(true); - - [encoder copyFromTexture:T->GetMTLTexture() - sourceSlice:src_layer - sourceLevel:src_level - sourceOrigin:MTLOriginMake(x, y, 0) - sourceSize:MTLSizeMake(width, height, 1) - toBuffer:m_download_buffer - destinationOffset:0 - destinationBytesPerRow:pitch - destinationBytesPerImage:0]; - - SubmitCommandBuffer(true); - - StringUtil::StrideMemCpy(out_data, out_data_stride, [m_download_buffer contents], pitch, copy_size, height); - } - - return true; -} - -bool MetalDevice::CheckDownloadBufferSize(u32 required_size) -{ - if (m_download_buffer_size >= required_size) - return true; - - @autoreleasepool - { - // We don't need to defer releasing this one, it's not going to be used. - if (m_download_buffer != nil) - [m_download_buffer release]; - - constexpr MTLResourceOptions options = MTLResourceStorageModeShared | MTLResourceCPUCacheModeDefaultCache; - m_download_buffer = [[m_device newBufferWithLength:required_size options:options] retain]; - if (m_download_buffer == nil) - { - Log_ErrorPrintf("Failed to create %u byte download buffer", required_size); - m_download_buffer_size = 0; - return false; - } - - m_download_buffer_size = required_size; - } - - return true; -} - bool MetalDevice::SupportsTextureFormat(GPUTexture::Format format) const { if (format == GPUTexture::Format::RGB565 || format == GPUTexture::Format::RGBA5551) diff --git a/src/util/opengl_device.cpp b/src/util/opengl_device.cpp index 61ffddb85..cef210663 100644 --- a/src/util/opengl_device.cpp +++ b/src/util/opengl_device.cpp @@ -43,6 +43,11 @@ void OpenGLDevice::BindUpdateTextureUnit() GetInstance().SetActiveTexture(UPDATE_TEXTURE_UNIT - GL_TEXTURE0); } +bool OpenGLDevice::ShouldUsePBOsForDownloads() +{ + return !GetInstance().m_disable_pbo && !GetInstance().m_disable_async_download; +} + RenderAPI OpenGLDevice::GetRenderAPI() const { return m_gl_context->IsGLES() ? RenderAPI::OpenGLES : RenderAPI::OpenGL; @@ -55,53 +60,6 @@ std::unique_ptr<GPUTexture> OpenGLDevice::CreateTexture(u32 width, u32 height, u return OpenGLTexture::Create(width, height, layers, levels, samples, type, format, data, data_stride); } -bool OpenGLDevice::DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, void* out_data, - u32 out_data_stride) -{ - OpenGLTexture* T = static_cast<OpenGLTexture*>(texture); - - GLint alignment; - if (out_data_stride & 1) - alignment = 1; - else if (out_data_stride & 2) - alignment = 2; - else - alignment = 4; - - glPixelStorei(GL_PACK_ALIGNMENT, alignment); - glPixelStorei(GL_PACK_ROW_LENGTH, out_data_stride / T->GetPixelSize()); - - const auto [gl_internal_format, gl_format, gl_type] = - OpenGLTexture::GetPixelFormatMapping(T->GetFormat(), m_gl_context->IsGLES()); - const u32 layer = 0; - const u32 level = 0; - - s_stats.num_downloads++; - - if (GLAD_GL_VERSION_4_5 || GLAD_GL_ARB_get_texture_sub_image) - { - glGetTextureSubImage(T->GetGLId(), level, x, y, layer, width, height, 1, gl_format, gl_type, - height * out_data_stride, out_data); - } - else - { - glBindFramebuffer(GL_READ_FRAMEBUFFER, m_read_fbo); - - if (T->GetLayers() > 1) - glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, T->GetGLId(), level, layer); - else - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, T->GetGLTarget(), T->GetGLId(), level); - - DebugAssert(glCheckFramebufferStatus(GL_READ_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE); - glReadPixels(x, y, width, height, gl_format, gl_type, out_data); - - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); - } - - return true; -} - bool OpenGLDevice::SupportsTextureFormat(GPUTexture::Format format) const { const auto [gl_internal_format, gl_format, gl_type] = @@ -362,11 +320,10 @@ bool OpenGLDevice::CreateDevice(const std::string_view& adapter, bool threaded_p glObjectLabel = nullptr; } - bool buggy_pbo; - if (!CheckFeatures(&buggy_pbo, disabled_features)) + if (!CheckFeatures(disabled_features)) return false; - if (!CreateBuffers(buggy_pbo)) + if (!CreateBuffers()) return false; // Scissor test should always be enabled. @@ -375,7 +332,7 @@ bool OpenGLDevice::CreateDevice(const std::string_view& adapter, bool threaded_p return true; } -bool OpenGLDevice::CheckFeatures(bool* buggy_pbo, FeatureMask disabled_features) +bool OpenGLDevice::CheckFeatures(FeatureMask disabled_features) { const bool is_gles = m_gl_context->IsGLES(); @@ -424,10 +381,9 @@ bool OpenGLDevice::CheckFeatures(bool* buggy_pbo, FeatureMask disabled_features) // using the normal texture update routines and letting the driver take care of it. PBOs are also completely // broken on mobile drivers. const bool is_shitty_mobile_driver = (vendor_id_powervr || vendor_id_qualcomm || vendor_id_arm); - const bool is_buggy_pbo = + m_disable_pbo = (!GLAD_GL_VERSION_4_4 && !GLAD_GL_ARB_buffer_storage && !GLAD_GL_EXT_buffer_storage) || is_shitty_mobile_driver; - *buggy_pbo = is_buggy_pbo; - if (is_buggy_pbo && !is_shitty_mobile_driver) + if (m_disable_pbo && !is_shitty_mobile_driver) Log_WarningPrint("Not using PBOs for texture uploads because buffer_storage is unavailable."); GLint max_texture_size = 1024; @@ -517,6 +473,7 @@ bool OpenGLDevice::CheckFeatures(bool* buggy_pbo, FeatureMask disabled_features) m_features.gpu_timing = !(m_gl_context->IsGLES() && (!GLAD_GL_EXT_disjoint_timer_query || !glGetQueryObjectivEXT || !glGetQueryObjectui64vEXT)); m_features.partial_msaa_resolve = true; + m_features.memory_import = true; m_features.shader_cache = false; @@ -539,6 +496,13 @@ bool OpenGLDevice::CheckFeatures(bool* buggy_pbo, FeatureMask disabled_features) // Mobile drivers prefer textures to not be updated mid-frame. m_features.prefer_unused_textures = is_gles || vendor_id_arm || vendor_id_powervr || vendor_id_qualcomm; + if (vendor_id_intel) + { + // Intel drivers corrupt image on readback when syncs are used for downloads. + Log_WarningPrint("Disabling async downloads with PBOs due to it being broken on Intel drivers."); + m_disable_async_download = true; + } + return true; } @@ -711,7 +675,7 @@ void OpenGLDevice::DestroySurface() Log_ErrorPrintf("Failed to switch to surfaceless"); } -bool OpenGLDevice::CreateBuffers(bool buggy_pbo) +bool OpenGLDevice::CreateBuffers() { if (!(m_vertex_buffer = OpenGLStreamBuffer::Create(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE)) || !(m_index_buffer = OpenGLStreamBuffer::Create(GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE)) || @@ -727,7 +691,7 @@ bool OpenGLDevice::CreateBuffers(bool buggy_pbo) glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, reinterpret_cast<GLint*>(&m_uniform_buffer_alignment)); - if (!buggy_pbo) + if (!m_disable_pbo) { if (!(m_texture_stream_buffer = OpenGLStreamBuffer::Create(GL_PIXEL_UNPACK_BUFFER, TEXTURE_STREAM_BUFFER_SIZE))) { diff --git a/src/util/opengl_device.h b/src/util/opengl_device.h index 8e22b8284..6f4f102ba 100644 --- a/src/util/opengl_device.h +++ b/src/util/opengl_device.h @@ -20,9 +20,13 @@ class OpenGLPipeline; class OpenGLStreamBuffer; class OpenGLTexture; +class OpenGLDownloadTexture; class OpenGLDevice final : public GPUDevice { + friend OpenGLTexture; + friend OpenGLDownloadTexture; + public: OpenGLDevice(); ~OpenGLDevice(); @@ -34,6 +38,7 @@ public: } ALWAYS_INLINE static bool IsGLES() { return GetInstance().m_gl_context->IsGLES(); } static void BindUpdateTextureUnit(); + static bool ShouldUsePBOsForDownloads(); RenderAPI GetRenderAPI() const override; @@ -53,8 +58,11 @@ public: std::unique_ptr<GPUSampler> CreateSampler(const GPUSampler::Config& config) override; std::unique_ptr<GPUTextureBuffer> CreateTextureBuffer(GPUTextureBuffer::Format format, u32 size_in_elements) override; - bool DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, void* out_data, - u32 out_data_stride) override; + std::unique_ptr<GPUDownloadTexture> CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format) override; + std::unique_ptr<GPUDownloadTexture> CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format, + void* memory, size_t memory_size, + u32 memory_stride) override; + bool SupportsTextureFormat(GPUTexture::Format format) const override; void CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, GPUTexture* src, u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width, u32 height) override; @@ -137,8 +145,8 @@ private: static constexpr u32 UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024; static constexpr u32 TEXTURE_STREAM_BUFFER_SIZE = 16 * 1024 * 1024; - bool CheckFeatures(bool* buggy_pbo, FeatureMask disabled_features); - bool CreateBuffers(bool buggy_pbo); + bool CheckFeatures(FeatureMask disabled_features); + bool CreateBuffers(); void DestroyBuffers(); void SetSwapInterval(); @@ -215,4 +223,7 @@ private: std::string m_pipeline_disk_cache_filename; u32 m_pipeline_disk_cache_data_end = 0; bool m_pipeline_disk_cache_changed = false; + + bool m_disable_pbo = false; + bool m_disable_async_download = false; }; diff --git a/src/util/opengl_texture.cpp b/src/util/opengl_texture.cpp index 980dc4d1d..96795a38a 100644 --- a/src/util/opengl_texture.cpp +++ b/src/util/opengl_texture.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com> +// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com> // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #include "opengl_texture.h" @@ -7,6 +7,7 @@ #include "common/align.h" #include "common/assert.h" +#include "common/intrin.h" #include "common/log.h" #include "common/string_util.h" @@ -696,3 +697,207 @@ std::unique_ptr<GPUTextureBuffer> OpenGLDevice::CreateTextureBuffer(GPUTextureBu return std::unique_ptr<GPUTextureBuffer>( new OpenGLTextureBuffer(format, size_in_elements, std::move(buffer), texture_id)); } + +OpenGLDownloadTexture::OpenGLDownloadTexture(u32 width, u32 height, GPUTexture::Format format, bool imported, + GLuint buffer_id, u8* cpu_buffer, u32 buffer_size, const u8* map_ptr, + u32 map_pitch) + : GPUDownloadTexture(width, height, format, imported), m_buffer_id(buffer_id), m_buffer_size(buffer_size), + m_cpu_buffer(cpu_buffer) +{ + m_map_pointer = map_ptr; + m_current_pitch = map_pitch; +} + +OpenGLDownloadTexture::~OpenGLDownloadTexture() +{ + if (m_buffer_id != 0) + { + if (m_sync) + glDeleteSync(m_sync); + + if (m_map_pointer) + { + glBindBuffer(GL_PIXEL_PACK_BUFFER, m_buffer_id); + glUnmapBuffer(GL_PIXEL_PACK_BUFFER); + glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); + } + + glDeleteBuffers(1, &m_buffer_id); + } + else if (m_cpu_buffer && !m_is_imported) + { + Common::AlignedFree(m_cpu_buffer); + } +} + +std::unique_ptr<OpenGLDownloadTexture> OpenGLDownloadTexture::Create(u32 width, u32 height, GPUTexture::Format format, + void* memory, size_t memory_size, u32 memory_pitch) +{ + const u32 buffer_pitch = + memory ? memory_pitch : + Common::AlignUpPow2(GPUTexture::CalcUploadPitch(format, width), TEXTURE_UPLOAD_PITCH_ALIGNMENT); + const u32 buffer_size = memory ? static_cast<u32>(memory_size) : (height * buffer_pitch); + + const bool use_buffer_storage = (GLAD_GL_VERSION_4_4 || GLAD_GL_ARB_buffer_storage || GLAD_GL_EXT_buffer_storage) && + !memory && OpenGLDevice::ShouldUsePBOsForDownloads(); + if (use_buffer_storage) + { + GLuint buffer_id; + glGenBuffers(1, &buffer_id); + glBindBuffer(GL_PIXEL_PACK_BUFFER, buffer_id); + + const u32 flags = GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT; + const u32 map_flags = GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT; + + if (GLAD_GL_VERSION_4_4 || GLAD_GL_ARB_buffer_storage) + glBufferStorage(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, flags); + else if (GLAD_GL_EXT_buffer_storage) + glBufferStorageEXT(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, flags); + + u8* buffer_map = static_cast<u8*>(glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, buffer_size, map_flags)); + + glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); + + if (!buffer_map) + { + Log_ErrorPrint("Failed to map persistent download buffer"); + glDeleteBuffers(1, &buffer_id); + return {}; + } + + return std::unique_ptr<OpenGLDownloadTexture>(new OpenGLDownloadTexture( + width, height, format, false, buffer_id, nullptr, buffer_size, buffer_map, buffer_pitch)); + } + + // Fallback to glReadPixels() + CPU buffer. + const bool imported = (memory != nullptr); + u8* cpu_buffer = + imported ? static_cast<u8*>(memory) : static_cast<u8*>(Common::AlignedMalloc(buffer_size, VECTOR_ALIGNMENT)); + if (!cpu_buffer) + return {}; + + return std::unique_ptr<OpenGLDownloadTexture>( + new OpenGLDownloadTexture(width, height, format, imported, 0, cpu_buffer, buffer_size, cpu_buffer, buffer_pitch)); +} + +void OpenGLDownloadTexture::CopyFromTexture(u32 dst_x, u32 dst_y, GPUTexture* src, u32 src_x, u32 src_y, u32 width, + u32 height, u32 src_layer, u32 src_level, bool use_transfer_pitch) +{ + OpenGLTexture* const srcgl = static_cast<OpenGLTexture*>(src); + OpenGLDevice& dev = OpenGLDevice::GetInstance(); + + DebugAssert(srcgl->GetFormat() == m_format); + DebugAssert(src_level < srcgl->GetLevels()); + DebugAssert((src_x + width) <= srcgl->GetMipWidth(src_level) && (src_y + height) <= srcgl->GetMipHeight(src_level)); + DebugAssert((dst_x + width) <= m_width && (dst_y + height) <= m_height); + DebugAssert((dst_x == 0 && dst_y == 0) || !use_transfer_pitch); + DebugAssert(!m_is_imported || !use_transfer_pitch); + + dev.CommitClear(srcgl); + + u32 copy_offset, copy_size, copy_rows; + if (!m_is_imported) + m_current_pitch = GetTransferPitch(use_transfer_pitch ? width : m_width, TEXTURE_UPLOAD_PITCH_ALIGNMENT); + GetTransferSize(dst_x, dst_y, width, height, m_current_pitch, ©_offset, ©_size, ©_rows); + dev.GetStatistics().num_downloads++; + + GLint alignment; + if (m_current_pitch & 1) + alignment = 1; + else if (m_current_pitch & 2) + alignment = 2; + else + alignment = 4; + + glPixelStorei(GL_PACK_ALIGNMENT, alignment); + glPixelStorei(GL_PACK_ROW_LENGTH, GPUTexture::CalcUploadRowLengthFromPitch(m_format, m_current_pitch)); + + if (!m_cpu_buffer) + { + // Read to PBO. + glBindBuffer(GL_PIXEL_PACK_BUFFER, m_buffer_id); + } + + const auto [gl_internal_format, gl_format, gl_type] = + OpenGLTexture::GetPixelFormatMapping(srcgl->GetFormat(), dev.IsGLES()); + if (GLAD_GL_VERSION_4_5 || GLAD_GL_ARB_get_texture_sub_image) + { + glGetTextureSubImage(srcgl->GetGLId(), src_level, src_x, src_y, 0, width, height, 1, gl_format, gl_type, + m_current_pitch * height, m_cpu_buffer + copy_offset); + } + else + { + glBindFramebuffer(GL_READ_FRAMEBUFFER, dev.m_read_fbo); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, srcgl->GetGLId(), 0); + + glReadPixels(src_x, src_y, width, height, gl_format, gl_type, m_cpu_buffer + copy_offset); + + glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); + } + + if (m_cpu_buffer) + { + // If using CPU buffers, we never need to flush. + m_needs_flush = false; + } + else + { + glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); + + // Create a sync object so we know when the GPU is done copying. + if (m_sync) + glDeleteSync(m_sync); + + m_sync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + m_needs_flush = true; + } + + glPixelStorei(GL_PACK_ROW_LENGTH, 0); +} + +bool OpenGLDownloadTexture::Map(u32 x, u32 y, u32 width, u32 height) +{ + // Either always mapped, or CPU buffer. + return true; +} + +void OpenGLDownloadTexture::Unmap() +{ + // Either always mapped, or CPU buffer. +} + +void OpenGLDownloadTexture::Flush() +{ + // If we're using CPU buffers, we did the readback synchronously... + if (!m_needs_flush || !m_sync) + return; + + m_needs_flush = false; + + glClientWaitSync(m_sync, GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); + glDeleteSync(m_sync); + m_sync = {}; +} + +void OpenGLDownloadTexture::SetDebugName(std::string_view name) +{ + if (name.empty()) + return; + + if (glObjectLabel) + glObjectLabel(GL_BUFFER, m_buffer_id, static_cast<GLsizei>(name.length()), name.data()); +} + +std::unique_ptr<GPUDownloadTexture> OpenGLDevice::CreateDownloadTexture(u32 width, u32 height, + GPUTexture::Format format) +{ + return OpenGLDownloadTexture::Create(width, height, format, nullptr, 0, 0); +} + +std::unique_ptr<GPUDownloadTexture> OpenGLDevice::CreateDownloadTexture(u32 width, u32 height, + GPUTexture::Format format, void* memory, + size_t memory_size, u32 memory_stride) +{ + // not _really_ memory importing, but PBOs are broken on Intel.... + return OpenGLDownloadTexture::Create(width, height, format, memory, memory_size, memory_stride); +} diff --git a/src/util/opengl_texture.h b/src/util/opengl_texture.h index b8fc3e382..f0bd6c11c 100644 --- a/src/util/opengl_texture.h +++ b/src/util/opengl_texture.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin <stenzek@gmail.com> +// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com> // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #pragma once @@ -99,3 +99,34 @@ private: GLuint m_id; }; + +class OpenGLDownloadTexture final : public GPUDownloadTexture +{ +public: + ~OpenGLDownloadTexture() override; + + static std::unique_ptr<OpenGLDownloadTexture> Create(u32 width, u32 height, GPUTexture::Format format, void* memory, + size_t memory_size, u32 memory_pitch); + + void CopyFromTexture(u32 dst_x, u32 dst_y, GPUTexture* src, u32 src_x, u32 src_y, u32 width, u32 height, + u32 src_layer, u32 src_level, bool use_transfer_pitch) override; + + bool Map(u32 x, u32 y, u32 width, u32 height) override; + void Unmap() override; + + void Flush() override; + + void SetDebugName(std::string_view name) override; + +private: + OpenGLDownloadTexture(u32 width, u32 height, GPUTexture::Format format, bool imported, GLuint buffer_id, + u8* cpu_buffer, u32 buffer_size, const u8* map_ptr, u32 map_pitch); + + GLuint m_buffer_id = 0; + u32 m_buffer_size = 0; + + GLsync m_sync = {}; + + // used when buffer storage is not available + u8* m_cpu_buffer = nullptr; +}; diff --git a/src/util/vulkan_device.cpp b/src/util/vulkan_device.cpp index 41c43d841..2a23b2951 100644 --- a/src/util/vulkan_device.cpp +++ b/src/util/vulkan_device.cpp @@ -1515,6 +1515,14 @@ void VulkanDevice::DeferBufferDestruction(VkBuffer object, VmaAllocation allocat [this, object, allocation]() { vmaDestroyBuffer(m_allocator, object, allocation); }); } +void VulkanDevice::DeferBufferDestruction(VkBuffer object, VkDeviceMemory memory) +{ + m_cleanup_objects.emplace_back(GetCurrentFenceCounter(), [this, object, memory]() { + vkDestroyBuffer(m_device, object, nullptr); + vkFreeMemory(m_device, memory, nullptr); + }); +} + void VulkanDevice::DeferFramebufferDestruction(VkFramebuffer object) { m_cleanup_objects.emplace_back(GetCurrentFenceCounter(), @@ -2067,7 +2075,6 @@ void VulkanDevice::DestroyDevice() for (auto& it : m_cleanup_objects) it.second(); m_cleanup_objects.clear(); - DestroyDownloadBuffer(); DestroyPersistentDescriptorSets(); DestroyBuffers(); DestroySamplers(); @@ -2528,6 +2535,7 @@ bool VulkanDevice::CheckFeatures(FeatureMask disabled_features) !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS) && m_device_features.geometryShader; m_features.partial_msaa_resolve = true; + m_features.memory_import = m_optional_extensions.vk_ext_external_memory_host; m_features.shader_cache = true; m_features.pipeline_cache = true; m_features.prefer_unused_textures = true; @@ -2981,21 +2989,21 @@ void VulkanDevice::RenderBlankFrame() InvalidateCachedState(); } -bool VulkanDevice::TryImportHostMemory(const void* data, u32 data_size, VkBufferUsageFlags buffer_usage, - VkDeviceMemory* out_memory, VkBuffer* out_buffer, u32* out_offset) +bool VulkanDevice::TryImportHostMemory(void* data, size_t data_size, VkBufferUsageFlags buffer_usage, + VkDeviceMemory* out_memory, VkBuffer* out_buffer, VkDeviceSize* out_offset) { if (!m_optional_extensions.vk_ext_external_memory_host) return false; // Align to the nearest page - const void* data_aligned = - reinterpret_cast<const void*>(Common::AlignDownPow2(reinterpret_cast<uintptr_t>(data), HOST_PAGE_SIZE)); + void* data_aligned = + reinterpret_cast<void*>(Common::AlignDownPow2(reinterpret_cast<uintptr_t>(data), HOST_PAGE_SIZE)); // Offset to the start of the data within the page - const u32 data_offset = reinterpret_cast<uintptr_t>(data) & (HOST_PAGE_SIZE - 1); + const size_t data_offset = reinterpret_cast<uintptr_t>(data) & static_cast<uintptr_t>(HOST_PAGE_MASK); // Full amount of data that must be imported, including the pages - const u32 data_size_aligned = Common::AlignUpPow2(data_offset + data_size, HOST_PAGE_SIZE); + const size_t data_size_aligned = Common::AlignUpPow2(data_offset + data_size, HOST_PAGE_SIZE); VkMemoryHostPointerPropertiesEXT pointer_properties = {VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT, nullptr, 0}; @@ -3003,6 +3011,7 @@ bool VulkanDevice::TryImportHostMemory(const void* data, u32 data_size, VkBuffer data_aligned, &pointer_properties); if (res != VK_SUCCESS || pointer_properties.memoryTypeBits == 0) { + LOG_VULKAN_ERROR(res, "vkGetMemoryHostPointerPropertiesEXT() failed: "); return false; } @@ -3015,6 +3024,7 @@ bool VulkanDevice::TryImportHostMemory(const void* data, u32 data_size, VkBuffer res = vmaFindMemoryTypeIndex(m_allocator, pointer_properties.memoryTypeBits, &vma_alloc_info, &memory_index); if (res != VK_SUCCESS) { + LOG_VULKAN_ERROR(res, "vmaFindMemoryTypeIndex() failed: "); return false; } @@ -3030,6 +3040,7 @@ bool VulkanDevice::TryImportHostMemory(const void* data, u32 data_size, VkBuffer res = vkAllocateMemory(m_device, &alloc_info, nullptr, &imported_memory); if (res != VK_SUCCESS) { + LOG_VULKAN_ERROR(res, "vkAllocateMemory() failed: "); return false; } @@ -3049,10 +3060,10 @@ bool VulkanDevice::TryImportHostMemory(const void* data, u32 data_size, VkBuffer res = vkCreateBuffer(m_device, &buffer_info, nullptr, &imported_buffer); if (res != VK_SUCCESS) { + LOG_VULKAN_ERROR(res, "vkCreateBuffer() failed: "); if (imported_memory != VK_NULL_HANDLE) - { vkFreeMemory(m_device, imported_memory, nullptr); - } + return false; } @@ -3061,7 +3072,7 @@ bool VulkanDevice::TryImportHostMemory(const void* data, u32 data_size, VkBuffer *out_memory = imported_memory; *out_buffer = imported_buffer; *out_offset = data_offset; - + Log_DevFmt("Imported {} byte buffer covering {} bytes at {}", data_size, data_size_aligned, data); return true; } diff --git a/src/util/vulkan_device.h b/src/util/vulkan_device.h index d06fb8179..ef3e00f94 100644 --- a/src/util/vulkan_device.h +++ b/src/util/vulkan_device.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com> +// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com> // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #pragma once @@ -25,6 +25,7 @@ class VulkanPipeline; class VulkanSwapChain; class VulkanTexture; class VulkanTextureBuffer; +class VulkanDownloadTexture; struct VK_PIPELINE_CACHE_HEADER; @@ -32,6 +33,7 @@ class VulkanDevice final : public GPUDevice { public: friend VulkanTexture; + friend VulkanDownloadTexture; enum : u32 { @@ -81,8 +83,11 @@ public: std::unique_ptr<GPUSampler> CreateSampler(const GPUSampler::Config& config) override; std::unique_ptr<GPUTextureBuffer> CreateTextureBuffer(GPUTextureBuffer::Format format, u32 size_in_elements) override; - bool DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, void* out_data, - u32 out_data_stride) override; + std::unique_ptr<GPUDownloadTexture> CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format) override; + std::unique_ptr<GPUDownloadTexture> CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format, + void* memory, size_t memory_size, + u32 memory_stride) override; + bool SupportsTextureFormat(GPUTexture::Format format) const override; void CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, GPUTexture* src, u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width, u32 height) override; @@ -196,6 +201,7 @@ public: // Schedule a vulkan resource for destruction later on. This will occur when the command buffer // is next re-used, and the GPU has finished working with the specified resource. void DeferBufferDestruction(VkBuffer object, VmaAllocation allocation); + void DeferBufferDestruction(VkBuffer object, VkDeviceMemory memory); void DeferFramebufferDestruction(VkFramebuffer object); void DeferImageDestruction(VkImage object, VmaAllocation allocation); void DeferImageViewDestruction(VkImageView object); @@ -341,11 +347,8 @@ private: void RenderBlankFrame(); - bool TryImportHostMemory(const void* data, u32 data_size, VkBufferUsageFlags buffer_usage, VkDeviceMemory* out_memory, - VkBuffer* out_buffer, u32* out_offset); - - bool CheckDownloadBufferSize(u32 required_size); - void DestroyDownloadBuffer(); + bool TryImportHostMemory(void* data, size_t data_size, VkBufferUsageFlags buffer_usage, VkDeviceMemory* out_memory, + VkBuffer* out_buffer, VkDeviceSize* out_offset); /// Set dirty flags on everything to force re-bind at next draw time. void InvalidateCachedState(); @@ -454,11 +457,6 @@ private: SamplerMap m_sampler_map; - VmaAllocation m_download_buffer_allocation = VK_NULL_HANDLE; - VkBuffer m_download_buffer = VK_NULL_HANDLE; - u8* m_download_buffer_map = nullptr; - u32 m_download_buffer_size = 0; - // Which bindings/state has to be updated before the next draw. u32 m_dirty_flags = ALL_DIRTY_STATE; diff --git a/src/util/vulkan_texture.cpp b/src/util/vulkan_texture.cpp index f82f810b8..5626c9e06 100644 --- a/src/util/vulkan_texture.cpp +++ b/src/util/vulkan_texture.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com> +// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com> // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #include "vulkan_texture.h" @@ -736,126 +736,6 @@ std::unique_ptr<GPUTexture> VulkanDevice::CreateTexture(u32 width, u32 height, u return tex; } -bool VulkanDevice::DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, void* out_data, - u32 out_data_stride) -{ - VulkanTexture* T = static_cast<VulkanTexture*>(texture); - T->CommitClear(); - - const u32 pitch = Common::AlignUp(width * T->GetPixelSize(), GetBufferCopyRowPitchAlignment()); - const u32 size = pitch * height; - const u32 level = 0; - if (!CheckDownloadBufferSize(size)) - { - Log_ErrorPrintf("Can't read back %ux%u", width, height); - return false; - } - - s_stats.num_downloads++; - - if (InRenderPass()) - EndRenderPass(); - - const VkCommandBuffer cmdbuf = GetCurrentCommandBuffer(); - - VulkanTexture::Layout old_layout = T->GetLayout(); - if (old_layout != VulkanTexture::Layout::TransferSrc) - T->TransitionSubresourcesToLayout(cmdbuf, 0, 1, 0, 1, old_layout, VulkanTexture::Layout::TransferSrc); - - VkBufferImageCopy image_copy = {}; - const VkImageAspectFlags aspect = T->IsDepthStencil() ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT; - image_copy.bufferOffset = 0; - image_copy.bufferRowLength = pitch / T->GetPixelSize(); - image_copy.bufferImageHeight = 0; - image_copy.imageSubresource = {aspect, level, 0u, 1u}; - image_copy.imageOffset = {static_cast<s32>(x), static_cast<s32>(y), 0}; - image_copy.imageExtent = {width, height, 1u}; - - // do the copy - vkCmdCopyImageToBuffer(cmdbuf, T->GetImage(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_download_buffer, 1, - &image_copy); - - // flush gpu cache - const VkBufferMemoryBarrier buffer_info = { - VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // VkStructureType sType - nullptr, // const void* pNext - VK_ACCESS_TRANSFER_WRITE_BIT, // VkAccessFlags srcAccessMask - VK_ACCESS_HOST_READ_BIT, // VkAccessFlags dstAccessMask - VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex - VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex - m_download_buffer, // VkBuffer buffer - 0, // VkDeviceSize offset - size // VkDeviceSize size - }; - vkCmdPipelineBarrier(cmdbuf, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0, 0, nullptr, 1, &buffer_info, - 0, nullptr); - - if (old_layout != VulkanTexture::Layout::TransferSrc) - T->TransitionSubresourcesToLayout(cmdbuf, 0, 1, 0, 1, VulkanTexture::Layout::TransferSrc, old_layout); - - SubmitCommandBuffer(true); - - // invalidate cpu cache before reading - VkResult res = vmaInvalidateAllocation(m_allocator, m_download_buffer_allocation, 0, size); - if (res != VK_SUCCESS) - LOG_VULKAN_ERROR(res, "vmaInvalidateAllocation() failed, readback may be incorrect: "); - - StringUtil::StrideMemCpy(out_data, out_data_stride, m_download_buffer_map, pitch, width * T->GetPixelSize(), height); - return true; -} - -bool VulkanDevice::CheckDownloadBufferSize(u32 required_size) -{ - if (m_download_buffer_size >= required_size) - return true; - - DestroyDownloadBuffer(); - - // Adreno has slow coherent cached reads. - const bool is_adreno = (m_device_properties.vendorID == 0x5143 || - m_device_driver_properties.driverID == VK_DRIVER_ID_QUALCOMM_PROPRIETARY); - - const VkBufferCreateInfo bci = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - nullptr, - 0u, - required_size, - VK_BUFFER_USAGE_TRANSFER_DST_BIT, - VK_SHARING_MODE_EXCLUSIVE, - 0u, - nullptr}; - - VmaAllocationCreateInfo aci = {}; - aci.usage = VMA_MEMORY_USAGE_GPU_TO_CPU; - aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT; - aci.preferredFlags = is_adreno ? (VK_MEMORY_PROPERTY_HOST_CACHED_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) : - VK_MEMORY_PROPERTY_HOST_CACHED_BIT; - - VmaAllocationInfo ai = {}; - VkResult res = vmaCreateBuffer(m_allocator, &bci, &aci, &m_download_buffer, &m_download_buffer_allocation, &ai); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vmaCreateBuffer() failed: "); - return false; - } - - m_download_buffer_map = static_cast<u8*>(ai.pMappedData); - return true; -} - -void VulkanDevice::DestroyDownloadBuffer() -{ - if (m_download_buffer == VK_NULL_HANDLE) - return; - - vmaDestroyBuffer(m_allocator, m_download_buffer, m_download_buffer_allocation); - - // unmapped as part of the buffer destroy - m_download_buffer = VK_NULL_HANDLE; - m_download_buffer_allocation = VK_NULL_HANDLE; - m_download_buffer_map = nullptr; - m_download_buffer_size = 0; -} - VulkanSampler::VulkanSampler(VkSampler sampler) : m_sampler(sampler) { } @@ -1081,3 +961,218 @@ std::unique_ptr<GPUTextureBuffer> VulkanDevice::CreateTextureBuffer(GPUTextureBu return tb; } + +VulkanDownloadTexture::VulkanDownloadTexture(u32 width, u32 height, GPUTexture::Format format, VmaAllocation allocation, + VkDeviceMemory memory, VkBuffer buffer, VkDeviceSize memory_offset, + VkDeviceSize buffer_size, const u8* map_ptr, u32 map_pitch) + : GPUDownloadTexture(width, height, format, (memory != VK_NULL_HANDLE)), m_allocation(allocation), m_memory(memory), + m_buffer(buffer), m_memory_offset(memory_offset), m_buffer_size(buffer_size) +{ + m_map_pointer = map_ptr; + m_current_pitch = map_pitch; +} + +VulkanDownloadTexture::~VulkanDownloadTexture() +{ + if (m_allocation != VK_NULL_HANDLE) + { + // Buffer was created mapped, no need to manually unmap. + VulkanDevice::GetInstance().DeferBufferDestruction(m_buffer, m_allocation); + } + else + { + // imported + DebugAssert(m_is_imported && m_memory != VK_NULL_HANDLE); + VulkanDevice::GetInstance().DeferBufferDestruction(m_buffer, m_memory); + } +} + +std::unique_ptr<VulkanDownloadTexture> VulkanDownloadTexture::Create(u32 width, u32 height, GPUTexture::Format format, + void* memory, size_t memory_size, + u32 memory_stride) +{ + VulkanDevice& dev = VulkanDevice::GetInstance(); + VmaAllocation allocation = VK_NULL_HANDLE; + VkDeviceMemory dev_memory = VK_NULL_HANDLE; + VkBuffer buffer = VK_NULL_HANDLE; + VkDeviceSize memory_offset = 0; + const u8* map_ptr = nullptr; + u32 map_pitch = 0; + u32 buffer_size = 0; + + // not importing memory? + if (!memory) + { + map_pitch = Common::AlignUpPow2(GPUTexture::CalcUploadPitch(format, width), dev.GetBufferCopyRowPitchAlignment()); + buffer_size = height * map_pitch; + + const VkBufferCreateInfo bci = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + nullptr, + 0u, + buffer_size, + VK_BUFFER_USAGE_TRANSFER_DST_BIT, + VK_SHARING_MODE_EXCLUSIVE, + 0u, + nullptr}; + + VmaAllocationCreateInfo aci = {}; + aci.usage = VMA_MEMORY_USAGE_GPU_TO_CPU; + aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT; + aci.preferredFlags = VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + + VmaAllocationInfo ai = {}; + VkResult res = vmaCreateBuffer(VulkanDevice::GetInstance().GetAllocator(), &bci, &aci, &buffer, &allocation, &ai); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vmaCreateBuffer() failed: "); + return {}; + } + + DebugAssert(ai.pMappedData); + map_ptr = static_cast<u8*>(ai.pMappedData); + } + else + { + map_pitch = memory_stride; + buffer_size = height * map_pitch; + Assert(buffer_size <= memory_size); + + if (!dev.TryImportHostMemory(memory, memory_size, VK_BUFFER_USAGE_TRANSFER_DST_BIT, &dev_memory, &buffer, + &memory_offset)) + { + return {}; + } + + map_ptr = static_cast<u8*>(memory); + } + + return std::unique_ptr<VulkanDownloadTexture>(new VulkanDownloadTexture( + width, height, format, allocation, dev_memory, buffer, memory_offset, buffer_size, map_ptr, map_pitch)); +} + +void VulkanDownloadTexture::CopyFromTexture(u32 dst_x, u32 dst_y, GPUTexture* src, u32 src_x, u32 src_y, u32 width, + u32 height, u32 src_layer, u32 src_level, bool use_transfer_pitch) +{ + VulkanTexture* const vkTex = static_cast<VulkanTexture*>(src); + VulkanDevice& dev = VulkanDevice::GetInstance(); + + DebugAssert(vkTex->GetFormat() == m_format); + DebugAssert(src_level < vkTex->GetLevels()); + DebugAssert((src_x + width) <= src->GetMipWidth(src_level) && (src_y + height) <= src->GetMipHeight(src_level)); + DebugAssert((dst_x + width) <= m_width && (dst_y + height) <= m_height); + DebugAssert((dst_x == 0 && dst_y == 0) || !use_transfer_pitch); + DebugAssert(!m_is_imported || !use_transfer_pitch); + + u32 copy_offset, copy_size, copy_rows; + if (!m_is_imported) + m_current_pitch = GetTransferPitch(use_transfer_pitch ? width : m_width, dev.GetBufferCopyRowPitchAlignment()); + GetTransferSize(dst_x, dst_y, width, height, m_current_pitch, ©_offset, ©_size, ©_rows); + + dev.GetStatistics().num_downloads++; + if (dev.InRenderPass()) + dev.EndRenderPass(); + vkTex->CommitClear(); + + const VkCommandBuffer cmdbuf = dev.GetCurrentCommandBuffer(); + GL_INS_FMT("VulkanDownloadTexture::CopyFromTexture: {{{},{}}} {}x{} => {{{},{}}}", src_x, src_y, width, height, dst_x, + dst_y); + + VulkanTexture::Layout old_layout = vkTex->GetLayout(); + if (old_layout == VulkanTexture::Layout::Undefined) + vkTex->TransitionToLayout(cmdbuf, VulkanTexture::Layout::TransferSrc); + else if (old_layout != VulkanTexture::Layout::TransferSrc) + vkTex->TransitionSubresourcesToLayout(cmdbuf, 0, 1, src_level, 1, old_layout, VulkanTexture::Layout::TransferSrc); + + VkBufferImageCopy image_copy = {}; + const VkImageAspectFlags aspect = vkTex->IsDepthStencil() ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT; + image_copy.bufferOffset = m_memory_offset + copy_offset; + image_copy.bufferRowLength = GPUTexture::CalcUploadRowLengthFromPitch(m_format, m_current_pitch); + image_copy.bufferImageHeight = 0; + image_copy.imageSubresource = {aspect, src_level, src_layer, 1u}; + image_copy.imageOffset = {static_cast<s32>(src_x), static_cast<s32>(src_y), 0}; + image_copy.imageExtent = {width, height, 1u}; + + // do the copy + vkCmdCopyImageToBuffer(cmdbuf, vkTex->GetImage(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_buffer, 1, &image_copy); + + // flush gpu cache + const VkBufferMemoryBarrier buffer_info = { + VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // VkStructureType sType + nullptr, // const void* pNext + VK_ACCESS_TRANSFER_WRITE_BIT, // VkAccessFlags srcAccessMask + VK_ACCESS_HOST_READ_BIT, // VkAccessFlags dstAccessMask + VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex + VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex + m_buffer, // VkBuffer buffer + 0, // VkDeviceSize offset + copy_size // VkDeviceSize size + }; + vkCmdPipelineBarrier(cmdbuf, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0, 0, nullptr, 1, &buffer_info, + 0, nullptr); + + if (old_layout != VulkanTexture::Layout::TransferSrc && old_layout != VulkanTexture::Layout::Undefined) + vkTex->TransitionSubresourcesToLayout(cmdbuf, 0, 1, src_level, 1, VulkanTexture::Layout::TransferSrc, old_layout); + + m_copy_fence_counter = dev.GetCurrentFenceCounter(); + m_needs_cache_invalidate = true; + m_needs_flush = true; +} + +bool VulkanDownloadTexture::Map(u32 x, u32 y, u32 width, u32 height) +{ + // Always mapped, but we might need to invalidate the cache. + if (m_needs_cache_invalidate) + { + u32 copy_offset, copy_size, copy_rows; + GetTransferSize(x, y, width, height, m_current_pitch, ©_offset, ©_size, ©_rows); + vmaInvalidateAllocation(VulkanDevice::GetInstance().GetAllocator(), m_allocation, copy_offset, + m_current_pitch * copy_rows); + m_needs_cache_invalidate = false; + } + + return true; +} + +void VulkanDownloadTexture::Unmap() +{ + // Always mapped. +} + +void VulkanDownloadTexture::Flush() +{ + if (!m_needs_flush) + return; + + m_needs_flush = false; + + VulkanDevice& dev = VulkanDevice::GetInstance(); + if (dev.GetCompletedFenceCounter() >= m_copy_fence_counter) + return; + + // Need to execute command buffer. + if (dev.GetCurrentFenceCounter() == m_copy_fence_counter) + dev.SubmitCommandBuffer(true); + else + dev.WaitForFenceCounter(m_copy_fence_counter); +} + +void VulkanDownloadTexture::SetDebugName(std::string_view name) +{ + if (name.empty()) + return; + + Vulkan::SetObjectName(VulkanDevice::GetInstance().GetVulkanDevice(), m_buffer, name); +} + +std::unique_ptr<GPUDownloadTexture> VulkanDevice::CreateDownloadTexture(u32 width, u32 height, + GPUTexture::Format format) +{ + return VulkanDownloadTexture::Create(width, height, format, nullptr, 0, 0); +} + +std::unique_ptr<GPUDownloadTexture> VulkanDevice::CreateDownloadTexture(u32 width, u32 height, + GPUTexture::Format format, void* memory, + size_t memory_size, u32 memory_stride) +{ + return VulkanDownloadTexture::Create(width, height, format, memory, memory_size, memory_stride); +} diff --git a/src/util/vulkan_texture.h b/src/util/vulkan_texture.h index f33b8c472..7e8a0b684 100644 --- a/src/util/vulkan_texture.h +++ b/src/util/vulkan_texture.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com> +// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com> // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #pragma once @@ -150,3 +150,37 @@ private: VkBufferView m_buffer_view = VK_NULL_HANDLE; VkDescriptorSet m_descriptor_set = VK_NULL_HANDLE; }; + +class VulkanDownloadTexture final : public GPUDownloadTexture +{ +public: + ~VulkanDownloadTexture() override; + + static std::unique_ptr<VulkanDownloadTexture> Create(u32 width, u32 height, GPUTexture::Format format, void* memory, + size_t memory_size, u32 memory_stride); + + void CopyFromTexture(u32 dst_x, u32 dst_y, GPUTexture* src, u32 src_x, u32 src_y, u32 width, u32 height, + u32 src_layer, u32 src_level, bool use_transfer_pitch) override; + + bool Map(u32 x, u32 y, u32 width, u32 height) override; + void Unmap() override; + + void Flush() override; + + void SetDebugName(std::string_view name) override; + +private: + VulkanDownloadTexture(u32 width, u32 height, GPUTexture::Format format, VmaAllocation allocation, + VkDeviceMemory memory, VkBuffer buffer, VkDeviceSize memory_offset, VkDeviceSize buffer_size, + const u8* map_ptr, u32 map_pitch); + + VmaAllocation m_allocation = VK_NULL_HANDLE; + VkDeviceMemory m_memory = VK_NULL_HANDLE; + VkBuffer m_buffer = VK_NULL_HANDLE; + + u64 m_copy_fence_counter = 0; + VkDeviceSize m_memory_offset = 0; + VkDeviceSize m_buffer_size = 0; + + bool m_needs_cache_invalidate = false; +};