From 8a0033dfb3bd8a555467790ecdbcd4ea4a882759 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Thu, 24 Aug 2023 19:00:01 +1000 Subject: [PATCH] MetalDevice: Avoid inline texture uploads when unnecessary --- src/util/metal_device.h | 13 +++++++++--- src/util/metal_device.mm | 37 +++++++++++++++++++++------------ src/util/metal_stream_buffer.mm | 9 ++++---- 3 files changed, 39 insertions(+), 20 deletions(-) diff --git a/src/util/metal_device.h b/src/util/metal_device.h index fd3daff96..576901e0d 100644 --- a/src/util/metal_device.h +++ b/src/util/metal_device.h @@ -121,12 +121,19 @@ public: void SetDebugName(const std::string_view& name) override; + // Call when the texture is bound to the pipeline, or read from in a copy. + ALWAYS_INLINE void SetUseFenceCounter(u64 counter) { m_use_fence_counter = counter; } + private: MetalTexture(id texture, u16 width, u16 height, u8 layers, u8 levels, u8 samples, Type type, Format format); id m_texture; + // Contains the fence counter when the texture was last used. + // When this matches the current fence counter, the texture was used this command buffer. + u64 m_use_fence_counter = 0; + u16 m_map_x = 0; u16 m_map_y = 0; u16 m_map_width = 0; @@ -179,9 +186,9 @@ class MetalDevice final : public GPUDevice { public: ALWAYS_INLINE static MetalDevice& GetInstance() { return *static_cast(g_gpu_device.get()); } - ALWAYS_INLINE static id GetMTLDevice() { return GetInstance().m_device; } - ALWAYS_INLINE static u64 GetCurrentFenceCounter() { return GetInstance().m_current_fence_counter; } - ALWAYS_INLINE static u64 GetCompletedFenceCounter() { return GetInstance().m_completed_fence_counter; } + ALWAYS_INLINE id GetMTLDevice() { return m_device; } + ALWAYS_INLINE u64 GetCurrentFenceCounter() { return m_current_fence_counter; } + ALWAYS_INLINE u64 GetCompletedFenceCounter() { return m_completed_fence_counter; } MetalDevice(); ~MetalDevice(); diff --git a/src/util/metal_device.mm b/src/util/metal_device.mm index a2a845d96..58fd2d343 100644 --- a/src/util/metal_device.mm +++ b/src/util/metal_device.mm @@ -130,7 +130,7 @@ bool MetalDevice::CreateDevice(const std::string_view& adapter, bool threaded_pr id device = nil; if (!adapter.empty()) { - NSArray> *devices = [MTLCopyAllDevices() autorelease]; + NSArray>* devices = [MTLCopyAllDevices() autorelease]; const u32 count = static_cast([devices count]); for (u32 i = 0; i < count; i++) { @@ -140,11 +140,11 @@ bool MetalDevice::CreateDevice(const std::string_view& adapter, bool threaded_pr break; } } - + if (device == nil) Log_ErrorPrint(fmt::format("Failed to find device named '{}'. Trying default.", adapter).c_str()); } - + if (device == nil) { device = [MTLCreateSystemDefaultDevice() autorelease]; @@ -416,14 +416,15 @@ void MetalDevice::DestroyBuffers() GPUDevice::AdapterAndModeList MetalDevice::StaticGetAdapterAndModeList() { AdapterAndModeList ret; - @autoreleasepool { - NSArray> *devices = [MTLCopyAllDevices() autorelease]; + @autoreleasepool + { + NSArray>* devices = [MTLCopyAllDevices() autorelease]; const u32 count = static_cast([devices count]); ret.adapter_names.reserve(count); for (u32 i = 0; i < count; i++) ret.adapter_names.emplace_back([devices[i].name UTF8String]); } - + return ret; } @@ -914,7 +915,7 @@ bool MetalTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data, Panic("Failed to allocate temporary buffer."); return false; } - + dev.DeferRelease(actual_buffer); } else @@ -928,19 +929,19 @@ bool MetalTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data, return false; } } - + actual_offset = sb.GetCurrentOffset(); StringUtil::StrideMemCpy(sb.GetCurrentHostPointer(), aligned_pitch, data, pitch, width * GetPixelSize(), height); sb.CommitMemory(req_size); actual_buffer = sb.GetBuffer(); actual_pitch = aligned_pitch; } - + if (m_state == GPUTexture::State::Cleared && (x != 0 || y != 0 || width != m_width || height != m_height)) dev.CommitClear(this); - // TODO: track this - const bool is_inline = true; + const bool is_inline = (m_use_fence_counter == dev.GetCurrentFenceCounter()); + id encoder = dev.GetBlitEncoder(is_inline); [encoder copyFromBuffer:actual_buffer sourceOffset:actual_offset @@ -1423,6 +1424,9 @@ void MetalDevice::CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 d CommitClear(S); + S->SetUseFenceCounter(m_current_fence_counter); + D->SetUseFenceCounter(m_current_fence_counter); + @autoreleasepool { id encoder = GetBlitEncoder(true); @@ -1743,7 +1747,8 @@ void MetalDevice::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* s DebugAssert(slot < MAX_TEXTURE_SAMPLERS); id T = texture ? static_cast(texture)->GetMTLTexture() : nil; - id S = sampler ? static_cast(sampler)->GetSamplerState() : nil; + if (texture) + static_cast(texture)->SetUseFenceCounter(m_current_fence_counter); if (m_current_textures[slot] != T) { @@ -1752,6 +1757,7 @@ void MetalDevice::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* s [m_render_encoder setFragmentTexture:T atIndex:slot]; } + id S = sampler ? static_cast(sampler)->GetSamplerState() : nil; if (m_current_samplers[slot] != S) { m_current_samplers[slot] = S; @@ -1829,7 +1835,8 @@ void MetalDevice::BeginRenderPass() m_inline_upload_encoder = nil; } - @autoreleasepool { + @autoreleasepool + { MTLRenderPassDescriptor* desc; if (!m_current_framebuffer) { @@ -1841,6 +1848,10 @@ void MetalDevice::BeginRenderPass() else { desc = m_current_framebuffer->GetDescriptor(); + if (MetalTexture* RT = static_cast(m_current_framebuffer->GetRT())) + RT->SetUseFenceCounter(m_current_fence_counter); + if (MetalTexture* DS = static_cast(m_current_framebuffer->GetDS())) + DS->SetUseFenceCounter(m_current_fence_counter); } m_render_encoder = [[m_render_cmdbuf renderCommandEncoderWithDescriptor:desc] retain]; diff --git a/src/util/metal_stream_buffer.mm b/src/util/metal_stream_buffer.mm index 7d6bce3b6..1c7581d1b 100644 --- a/src/util/metal_stream_buffer.mm +++ b/src/util/metal_stream_buffer.mm @@ -138,7 +138,7 @@ void MetalStreamBuffer::CommitMemory(u32 final_num_bytes) void MetalStreamBuffer::UpdateCurrentFencePosition() { // Has the offset changed since the last fence? - const u64 counter = MetalDevice::GetCurrentFenceCounter(); + const u64 counter = MetalDevice::GetInstance().GetCurrentFenceCounter(); if (!m_tracked_fences.empty() && m_tracked_fences.back().first == counter) { // Still haven't executed a command buffer, so just update the offset. @@ -155,7 +155,7 @@ void MetalStreamBuffer::UpdateGPUPosition() auto start = m_tracked_fences.begin(); auto end = start; - const u64 completed_counter = MetalDevice::GetCompletedFenceCounter(); + const u64 completed_counter = MetalDevice::GetInstance().GetCompletedFenceCounter(); while (end != m_tracked_fences.end() && completed_counter >= end->first) { m_current_gpu_position = end->second; @@ -242,11 +242,12 @@ bool MetalStreamBuffer::WaitForClearSpace(u32 num_bytes) // Did any fences satisfy this condition? // Has the command buffer been executed yet? If not, the caller should execute it. - if (iter == m_tracked_fences.end() || iter->first == MetalDevice::GetCurrentFenceCounter()) + MetalDevice& dev = MetalDevice::GetInstance(); + if (iter == m_tracked_fences.end() || iter->first == dev.GetCurrentFenceCounter()) return false; // Wait until this fence is signaled. This will fire the callback, updating the GPU position. - MetalDevice::GetInstance().WaitForFenceCounter(iter->first); + dev.WaitForFenceCounter(iter->first); m_tracked_fences.erase(m_tracked_fences.begin(), m_current_offset == iter->second ? m_tracked_fences.end() : ++iter); m_current_offset = new_offset; m_current_space = new_space;