MetalDevice: Implement GPU timing

This commit is contained in:
Stenzek 2023-09-10 14:45:19 +10:00
parent b30c86ed75
commit 6fbea12ed3
2 changed files with 23 additions and 132 deletions

View file

@ -306,7 +306,7 @@ private:
id<MTLDepthStencilState> GetDepthState(const GPUPipeline::DepthState& ds); id<MTLDepthStencilState> GetDepthState(const GPUPipeline::DepthState& ds);
void CreateCommandBuffer(); void CreateCommandBuffer();
void CommandBufferCompletedOffThread(u64 fence_counter); void CommandBufferCompletedOffThread(id<MTLCommandBuffer> buffer, u64 fence_counter);
void WaitForPreviousCommandBuffers(); void WaitForPreviousCommandBuffers();
void CleanupObjects(); void CleanupObjects();
@ -332,11 +332,6 @@ private:
bool CreateBuffers(); bool CreateBuffers();
void DestroyBuffers(); void DestroyBuffers();
bool CreateTimestampQueries();
void DestroyTimestampQueries();
void PopTimestampQuery();
void KickTimestampQuery();
id<MTLDevice> m_device; id<MTLDevice> m_device;
id<MTLCommandQueue> m_queue; id<MTLCommandQueue> m_queue;
@ -381,10 +376,6 @@ private:
bool m_vsync_enabled = false; bool m_vsync_enabled = false;
// std::array<std::array<ComPtr<IMetalQuery>, 3>, NUM_TIMESTAMP_QUERIES> m_timestamp_queries = {}; double m_accumulated_gpu_time = 0;
// u8 m_read_timestamp_query = 0; double m_last_gpu_time_end = 0;
// u8 m_write_timestamp_query = 0;
// u8 m_waiting_timestamp_queries = 0;
// bool m_timestamp_query_started = false;
// float m_accumulated_gpu_time = 0.0f;
}; };

View file

@ -434,133 +434,22 @@ GPUDevice::AdapterAndModeList MetalDevice::GetAdapterAndModeList()
return StaticGetAdapterAndModeList(); return StaticGetAdapterAndModeList();
} }
#if 0
bool MetalDevice::CreateTimestampQueries()
{
for (u32 i = 0; i < NUM_TIMESTAMP_QUERIES; i++)
{
for (u32 j = 0; j < 3; j++)
{
const CMetal_QUERY_DESC qdesc((j == 0) ? Metal_QUERY_TIMESTAMP_DISJOINT : Metal_QUERY_TIMESTAMP);
const HRESULT hr = m_device->CreateQuery(&qdesc, m_timestamp_queries[i][j].ReleaseAndGetAddressOf());
if (FAILED(hr))
{
m_timestamp_queries = {};
return false;
}
}
}
KickTimestampQuery();
return true;
}
void MetalDevice::DestroyTimestampQueries()
{
if (!m_timestamp_queries[0][0])
return;
if (m_timestamp_query_started)
m_context->End(m_timestamp_queries[m_write_timestamp_query][1].Get());
m_timestamp_queries = {};
m_read_timestamp_query = 0;
m_write_timestamp_query = 0;
m_waiting_timestamp_queries = 0;
m_timestamp_query_started = 0;
}
void MetalDevice::PopTimestampQuery()
{
while (m_waiting_timestamp_queries > 0)
{
Metal_QUERY_DATA_TIMESTAMP_DISJOINT disjoint;
const HRESULT disjoint_hr = m_context->GetData(m_timestamp_queries[m_read_timestamp_query][0].Get(), &disjoint,
sizeof(disjoint), Metal_ASYNC_GETDATA_DONOTFLUSH);
if (disjoint_hr != S_OK)
break;
if (disjoint.Disjoint)
{
Log_VerbosePrintf("GPU timing disjoint, resetting.");
m_read_timestamp_query = 0;
m_write_timestamp_query = 0;
m_waiting_timestamp_queries = 0;
m_timestamp_query_started = 0;
}
else
{
u64 start = 0, end = 0;
const HRESULT start_hr = m_context->GetData(m_timestamp_queries[m_read_timestamp_query][1].Get(), &start,
sizeof(start), Metal_ASYNC_GETDATA_DONOTFLUSH);
const HRESULT end_hr = m_context->GetData(m_timestamp_queries[m_read_timestamp_query][2].Get(), &end, sizeof(end),
Metal_ASYNC_GETDATA_DONOTFLUSH);
if (start_hr == S_OK && end_hr == S_OK)
{
const float delta =
static_cast<float>(static_cast<double>(end - start) / (static_cast<double>(disjoint.Frequency) / 1000.0));
m_accumulated_gpu_time += delta;
m_read_timestamp_query = (m_read_timestamp_query + 1) % NUM_TIMESTAMP_QUERIES;
m_waiting_timestamp_queries--;
}
}
}
if (m_timestamp_query_started)
{
m_context->End(m_timestamp_queries[m_write_timestamp_query][2].Get());
m_context->End(m_timestamp_queries[m_write_timestamp_query][0].Get());
m_write_timestamp_query = (m_write_timestamp_query + 1) % NUM_TIMESTAMP_QUERIES;
m_timestamp_query_started = false;
m_waiting_timestamp_queries++;
}
}
void MetalDevice::KickTimestampQuery()
{
if (m_timestamp_query_started || !m_timestamp_queries[0][0] || m_waiting_timestamp_queries == NUM_TIMESTAMP_QUERIES)
return;
m_context->Begin(m_timestamp_queries[m_write_timestamp_query][0].Get());
m_context->End(m_timestamp_queries[m_write_timestamp_query][1].Get());
m_timestamp_query_started = true;
}
#endif
bool MetalDevice::SetGPUTimingEnabled(bool enabled) bool MetalDevice::SetGPUTimingEnabled(bool enabled)
{ {
#if 0
if (m_gpu_timing_enabled == enabled) if (m_gpu_timing_enabled == enabled)
return true; return true;
std::unique_lock lock(m_fence_mutex);
m_gpu_timing_enabled = enabled; m_gpu_timing_enabled = enabled;
if (m_gpu_timing_enabled) m_accumulated_gpu_time = 0.0;
{ m_last_gpu_time_end = 0.0;
if (!CreateTimestampQueries())
return false;
KickTimestampQuery();
return true; return true;
}
else
{
DestroyTimestampQueries();
return true;
}
#else
return false;
#endif
} }
float MetalDevice::GetAndResetAccumulatedGPUTime() float MetalDevice::GetAndResetAccumulatedGPUTime()
{ {
#if 0 std::unique_lock lock(m_fence_mutex);
const float value = m_accumulated_gpu_time; return std::exchange(m_accumulated_gpu_time, 0.0) * 1000.0;
m_accumulated_gpu_time = 0.0f;
return value;
#else
return 0.0f;
#endif
} }
MetalShader::MetalShader(GPUShaderStage stage, id<MTLLibrary> library, id<MTLFunction> function) MetalShader::MetalShader(GPUShaderStage stage, id<MTLLibrary> library, id<MTLFunction> function)
@ -2042,19 +1931,30 @@ void MetalDevice::CreateCommandBuffer()
DebugAssert(m_render_cmdbuf == nil); DebugAssert(m_render_cmdbuf == nil);
const u64 fence_counter = ++m_current_fence_counter; const u64 fence_counter = ++m_current_fence_counter;
m_render_cmdbuf = [[m_queue commandBufferWithUnretainedReferences] retain]; m_render_cmdbuf = [[m_queue commandBufferWithUnretainedReferences] retain];
[m_render_cmdbuf addCompletedHandler:[this, fence_counter](id<MTLCommandBuffer>) { [m_render_cmdbuf addCompletedHandler:[this, fence_counter](id<MTLCommandBuffer> buffer) {
CommandBufferCompletedOffThread(fence_counter); CommandBufferCompletedOffThread(buffer, fence_counter);
}]; }];
} }
CleanupObjects(); CleanupObjects();
} }
void MetalDevice::CommandBufferCompletedOffThread(u64 fence_counter) void MetalDevice::CommandBufferCompletedOffThread(id<MTLCommandBuffer> buffer, u64 fence_counter)
{ {
std::unique_lock lock(m_fence_mutex); std::unique_lock lock(m_fence_mutex);
m_completed_fence_counter.store(std::max(m_completed_fence_counter.load(std::memory_order_acquire), fence_counter), m_completed_fence_counter.store(std::max(m_completed_fence_counter.load(std::memory_order_acquire), fence_counter),
std::memory_order_release); std::memory_order_release);
if (m_gpu_timing_enabled)
{
const double begin = std::max(m_last_gpu_time_end, [buffer GPUStartTime]);
const double end = [buffer GPUEndTime];
if (end > begin)
{
m_accumulated_gpu_time += end - begin;
m_last_gpu_time_end = end;
}
}
} }
void MetalDevice::SubmitCommandBuffer(bool wait_for_completion) void MetalDevice::SubmitCommandBuffer(bool wait_for_completion)