MetalDevice: Avoid inline texture uploads when unnecessary

This commit is contained in:
Stenzek 2023-08-24 19:00:01 +10:00
parent 1b9e72e3a6
commit 8a0033dfb3
3 changed files with 39 additions and 20 deletions

View file

@ -121,12 +121,19 @@ public:
void SetDebugName(const std::string_view& name) override;
// Call when the texture is bound to the pipeline, or read from in a copy.
ALWAYS_INLINE void SetUseFenceCounter(u64 counter) { m_use_fence_counter = counter; }
private:
MetalTexture(id<MTLTexture> texture, u16 width, u16 height, u8 layers, u8 levels, u8 samples, Type type,
Format format);
id<MTLTexture> m_texture;
// Contains the fence counter when the texture was last used.
// When this matches the current fence counter, the texture was used this command buffer.
u64 m_use_fence_counter = 0;
u16 m_map_x = 0;
u16 m_map_y = 0;
u16 m_map_width = 0;
@ -179,9 +186,9 @@ class MetalDevice final : public GPUDevice
{
public:
ALWAYS_INLINE static MetalDevice& GetInstance() { return *static_cast<MetalDevice*>(g_gpu_device.get()); }
ALWAYS_INLINE static id<MTLDevice> GetMTLDevice() { return GetInstance().m_device; }
ALWAYS_INLINE static u64 GetCurrentFenceCounter() { return GetInstance().m_current_fence_counter; }
ALWAYS_INLINE static u64 GetCompletedFenceCounter() { return GetInstance().m_completed_fence_counter; }
ALWAYS_INLINE id<MTLDevice> GetMTLDevice() { return m_device; }
ALWAYS_INLINE u64 GetCurrentFenceCounter() { return m_current_fence_counter; }
ALWAYS_INLINE u64 GetCompletedFenceCounter() { return m_completed_fence_counter; }
MetalDevice();
~MetalDevice();

View file

@ -130,7 +130,7 @@ bool MetalDevice::CreateDevice(const std::string_view& adapter, bool threaded_pr
id<MTLDevice> device = nil;
if (!adapter.empty())
{
NSArray<id<MTLDevice>> *devices = [MTLCopyAllDevices() autorelease];
NSArray<id<MTLDevice>>* devices = [MTLCopyAllDevices() autorelease];
const u32 count = static_cast<u32>([devices count]);
for (u32 i = 0; i < count; i++)
{
@ -416,8 +416,9 @@ void MetalDevice::DestroyBuffers()
GPUDevice::AdapterAndModeList MetalDevice::StaticGetAdapterAndModeList()
{
AdapterAndModeList ret;
@autoreleasepool {
NSArray<id<MTLDevice>> *devices = [MTLCopyAllDevices() autorelease];
@autoreleasepool
{
NSArray<id<MTLDevice>>* devices = [MTLCopyAllDevices() autorelease];
const u32 count = static_cast<u32>([devices count]);
ret.adapter_names.reserve(count);
for (u32 i = 0; i < count; i++)
@ -939,8 +940,8 @@ bool MetalTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data,
if (m_state == GPUTexture::State::Cleared && (x != 0 || y != 0 || width != m_width || height != m_height))
dev.CommitClear(this);
// TODO: track this
const bool is_inline = true;
const bool is_inline = (m_use_fence_counter == dev.GetCurrentFenceCounter());
id<MTLBlitCommandEncoder> encoder = dev.GetBlitEncoder(is_inline);
[encoder copyFromBuffer:actual_buffer
sourceOffset:actual_offset
@ -1423,6 +1424,9 @@ void MetalDevice::CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 d
CommitClear(S);
S->SetUseFenceCounter(m_current_fence_counter);
D->SetUseFenceCounter(m_current_fence_counter);
@autoreleasepool
{
id<MTLBlitCommandEncoder> encoder = GetBlitEncoder(true);
@ -1743,7 +1747,8 @@ void MetalDevice::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* s
DebugAssert(slot < MAX_TEXTURE_SAMPLERS);
id<MTLTexture> T = texture ? static_cast<MetalTexture*>(texture)->GetMTLTexture() : nil;
id<MTLSamplerState> S = sampler ? static_cast<MetalSampler*>(sampler)->GetSamplerState() : nil;
if (texture)
static_cast<MetalTexture*>(texture)->SetUseFenceCounter(m_current_fence_counter);
if (m_current_textures[slot] != T)
{
@ -1752,6 +1757,7 @@ void MetalDevice::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* s
[m_render_encoder setFragmentTexture:T atIndex:slot];
}
id<MTLSamplerState> S = sampler ? static_cast<MetalSampler*>(sampler)->GetSamplerState() : nil;
if (m_current_samplers[slot] != S)
{
m_current_samplers[slot] = S;
@ -1829,7 +1835,8 @@ void MetalDevice::BeginRenderPass()
m_inline_upload_encoder = nil;
}
@autoreleasepool {
@autoreleasepool
{
MTLRenderPassDescriptor* desc;
if (!m_current_framebuffer)
{
@ -1841,6 +1848,10 @@ void MetalDevice::BeginRenderPass()
else
{
desc = m_current_framebuffer->GetDescriptor();
if (MetalTexture* RT = static_cast<MetalTexture*>(m_current_framebuffer->GetRT()))
RT->SetUseFenceCounter(m_current_fence_counter);
if (MetalTexture* DS = static_cast<MetalTexture*>(m_current_framebuffer->GetDS()))
DS->SetUseFenceCounter(m_current_fence_counter);
}
m_render_encoder = [[m_render_cmdbuf renderCommandEncoderWithDescriptor:desc] retain];

View file

@ -138,7 +138,7 @@ void MetalStreamBuffer::CommitMemory(u32 final_num_bytes)
void MetalStreamBuffer::UpdateCurrentFencePosition()
{
// Has the offset changed since the last fence?
const u64 counter = MetalDevice::GetCurrentFenceCounter();
const u64 counter = MetalDevice::GetInstance().GetCurrentFenceCounter();
if (!m_tracked_fences.empty() && m_tracked_fences.back().first == counter)
{
// Still haven't executed a command buffer, so just update the offset.
@ -155,7 +155,7 @@ void MetalStreamBuffer::UpdateGPUPosition()
auto start = m_tracked_fences.begin();
auto end = start;
const u64 completed_counter = MetalDevice::GetCompletedFenceCounter();
const u64 completed_counter = MetalDevice::GetInstance().GetCompletedFenceCounter();
while (end != m_tracked_fences.end() && completed_counter >= end->first)
{
m_current_gpu_position = end->second;
@ -242,11 +242,12 @@ bool MetalStreamBuffer::WaitForClearSpace(u32 num_bytes)
// Did any fences satisfy this condition?
// Has the command buffer been executed yet? If not, the caller should execute it.
if (iter == m_tracked_fences.end() || iter->first == MetalDevice::GetCurrentFenceCounter())
MetalDevice& dev = MetalDevice::GetInstance();
if (iter == m_tracked_fences.end() || iter->first == dev.GetCurrentFenceCounter())
return false;
// Wait until this fence is signaled. This will fire the callback, updating the GPU position.
MetalDevice::GetInstance().WaitForFenceCounter(iter->first);
dev.WaitForFenceCounter(iter->first);
m_tracked_fences.erase(m_tracked_fences.begin(), m_current_offset == iter->second ? m_tracked_fences.end() : ++iter);
m_current_offset = new_offset;
m_current_space = new_space;