mirror of
https://github.com/RetroDECK/Duckstation.git
synced 2024-11-22 13:55:38 +00:00
MetalDevice: Implement GPU timing
This commit is contained in:
parent
b30c86ed75
commit
6fbea12ed3
|
@ -306,7 +306,7 @@ private:
|
||||||
id<MTLDepthStencilState> GetDepthState(const GPUPipeline::DepthState& ds);
|
id<MTLDepthStencilState> GetDepthState(const GPUPipeline::DepthState& ds);
|
||||||
|
|
||||||
void CreateCommandBuffer();
|
void CreateCommandBuffer();
|
||||||
void CommandBufferCompletedOffThread(u64 fence_counter);
|
void CommandBufferCompletedOffThread(id<MTLCommandBuffer> buffer, u64 fence_counter);
|
||||||
void WaitForPreviousCommandBuffers();
|
void WaitForPreviousCommandBuffers();
|
||||||
void CleanupObjects();
|
void CleanupObjects();
|
||||||
|
|
||||||
|
@ -332,11 +332,6 @@ private:
|
||||||
bool CreateBuffers();
|
bool CreateBuffers();
|
||||||
void DestroyBuffers();
|
void DestroyBuffers();
|
||||||
|
|
||||||
bool CreateTimestampQueries();
|
|
||||||
void DestroyTimestampQueries();
|
|
||||||
void PopTimestampQuery();
|
|
||||||
void KickTimestampQuery();
|
|
||||||
|
|
||||||
id<MTLDevice> m_device;
|
id<MTLDevice> m_device;
|
||||||
id<MTLCommandQueue> m_queue;
|
id<MTLCommandQueue> m_queue;
|
||||||
|
|
||||||
|
@ -381,10 +376,6 @@ private:
|
||||||
|
|
||||||
bool m_vsync_enabled = false;
|
bool m_vsync_enabled = false;
|
||||||
|
|
||||||
// std::array<std::array<ComPtr<IMetalQuery>, 3>, NUM_TIMESTAMP_QUERIES> m_timestamp_queries = {};
|
double m_accumulated_gpu_time = 0;
|
||||||
// u8 m_read_timestamp_query = 0;
|
double m_last_gpu_time_end = 0;
|
||||||
// u8 m_write_timestamp_query = 0;
|
|
||||||
// u8 m_waiting_timestamp_queries = 0;
|
|
||||||
// bool m_timestamp_query_started = false;
|
|
||||||
// float m_accumulated_gpu_time = 0.0f;
|
|
||||||
};
|
};
|
||||||
|
|
|
@ -434,133 +434,22 @@ GPUDevice::AdapterAndModeList MetalDevice::GetAdapterAndModeList()
|
||||||
return StaticGetAdapterAndModeList();
|
return StaticGetAdapterAndModeList();
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
|
||||||
bool MetalDevice::CreateTimestampQueries()
|
|
||||||
{
|
|
||||||
for (u32 i = 0; i < NUM_TIMESTAMP_QUERIES; i++)
|
|
||||||
{
|
|
||||||
for (u32 j = 0; j < 3; j++)
|
|
||||||
{
|
|
||||||
const CMetal_QUERY_DESC qdesc((j == 0) ? Metal_QUERY_TIMESTAMP_DISJOINT : Metal_QUERY_TIMESTAMP);
|
|
||||||
const HRESULT hr = m_device->CreateQuery(&qdesc, m_timestamp_queries[i][j].ReleaseAndGetAddressOf());
|
|
||||||
if (FAILED(hr))
|
|
||||||
{
|
|
||||||
m_timestamp_queries = {};
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
KickTimestampQuery();
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
void MetalDevice::DestroyTimestampQueries()
|
|
||||||
{
|
|
||||||
if (!m_timestamp_queries[0][0])
|
|
||||||
return;
|
|
||||||
|
|
||||||
if (m_timestamp_query_started)
|
|
||||||
m_context->End(m_timestamp_queries[m_write_timestamp_query][1].Get());
|
|
||||||
|
|
||||||
m_timestamp_queries = {};
|
|
||||||
m_read_timestamp_query = 0;
|
|
||||||
m_write_timestamp_query = 0;
|
|
||||||
m_waiting_timestamp_queries = 0;
|
|
||||||
m_timestamp_query_started = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
void MetalDevice::PopTimestampQuery()
|
|
||||||
{
|
|
||||||
while (m_waiting_timestamp_queries > 0)
|
|
||||||
{
|
|
||||||
Metal_QUERY_DATA_TIMESTAMP_DISJOINT disjoint;
|
|
||||||
const HRESULT disjoint_hr = m_context->GetData(m_timestamp_queries[m_read_timestamp_query][0].Get(), &disjoint,
|
|
||||||
sizeof(disjoint), Metal_ASYNC_GETDATA_DONOTFLUSH);
|
|
||||||
if (disjoint_hr != S_OK)
|
|
||||||
break;
|
|
||||||
|
|
||||||
if (disjoint.Disjoint)
|
|
||||||
{
|
|
||||||
Log_VerbosePrintf("GPU timing disjoint, resetting.");
|
|
||||||
m_read_timestamp_query = 0;
|
|
||||||
m_write_timestamp_query = 0;
|
|
||||||
m_waiting_timestamp_queries = 0;
|
|
||||||
m_timestamp_query_started = 0;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
u64 start = 0, end = 0;
|
|
||||||
const HRESULT start_hr = m_context->GetData(m_timestamp_queries[m_read_timestamp_query][1].Get(), &start,
|
|
||||||
sizeof(start), Metal_ASYNC_GETDATA_DONOTFLUSH);
|
|
||||||
const HRESULT end_hr = m_context->GetData(m_timestamp_queries[m_read_timestamp_query][2].Get(), &end, sizeof(end),
|
|
||||||
Metal_ASYNC_GETDATA_DONOTFLUSH);
|
|
||||||
if (start_hr == S_OK && end_hr == S_OK)
|
|
||||||
{
|
|
||||||
const float delta =
|
|
||||||
static_cast<float>(static_cast<double>(end - start) / (static_cast<double>(disjoint.Frequency) / 1000.0));
|
|
||||||
m_accumulated_gpu_time += delta;
|
|
||||||
m_read_timestamp_query = (m_read_timestamp_query + 1) % NUM_TIMESTAMP_QUERIES;
|
|
||||||
m_waiting_timestamp_queries--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (m_timestamp_query_started)
|
|
||||||
{
|
|
||||||
m_context->End(m_timestamp_queries[m_write_timestamp_query][2].Get());
|
|
||||||
m_context->End(m_timestamp_queries[m_write_timestamp_query][0].Get());
|
|
||||||
m_write_timestamp_query = (m_write_timestamp_query + 1) % NUM_TIMESTAMP_QUERIES;
|
|
||||||
m_timestamp_query_started = false;
|
|
||||||
m_waiting_timestamp_queries++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void MetalDevice::KickTimestampQuery()
|
|
||||||
{
|
|
||||||
if (m_timestamp_query_started || !m_timestamp_queries[0][0] || m_waiting_timestamp_queries == NUM_TIMESTAMP_QUERIES)
|
|
||||||
return;
|
|
||||||
|
|
||||||
m_context->Begin(m_timestamp_queries[m_write_timestamp_query][0].Get());
|
|
||||||
m_context->End(m_timestamp_queries[m_write_timestamp_query][1].Get());
|
|
||||||
m_timestamp_query_started = true;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
bool MetalDevice::SetGPUTimingEnabled(bool enabled)
|
bool MetalDevice::SetGPUTimingEnabled(bool enabled)
|
||||||
{
|
{
|
||||||
#if 0
|
|
||||||
if (m_gpu_timing_enabled == enabled)
|
if (m_gpu_timing_enabled == enabled)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
|
std::unique_lock lock(m_fence_mutex);
|
||||||
m_gpu_timing_enabled = enabled;
|
m_gpu_timing_enabled = enabled;
|
||||||
if (m_gpu_timing_enabled)
|
m_accumulated_gpu_time = 0.0;
|
||||||
{
|
m_last_gpu_time_end = 0.0;
|
||||||
if (!CreateTimestampQueries())
|
|
||||||
return false;
|
|
||||||
|
|
||||||
KickTimestampQuery();
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
DestroyTimestampQueries();
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
return false;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
float MetalDevice::GetAndResetAccumulatedGPUTime()
|
float MetalDevice::GetAndResetAccumulatedGPUTime()
|
||||||
{
|
{
|
||||||
#if 0
|
std::unique_lock lock(m_fence_mutex);
|
||||||
const float value = m_accumulated_gpu_time;
|
return std::exchange(m_accumulated_gpu_time, 0.0) * 1000.0;
|
||||||
m_accumulated_gpu_time = 0.0f;
|
|
||||||
return value;
|
|
||||||
#else
|
|
||||||
return 0.0f;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
MetalShader::MetalShader(GPUShaderStage stage, id<MTLLibrary> library, id<MTLFunction> function)
|
MetalShader::MetalShader(GPUShaderStage stage, id<MTLLibrary> library, id<MTLFunction> function)
|
||||||
|
@ -2042,19 +1931,30 @@ void MetalDevice::CreateCommandBuffer()
|
||||||
DebugAssert(m_render_cmdbuf == nil);
|
DebugAssert(m_render_cmdbuf == nil);
|
||||||
const u64 fence_counter = ++m_current_fence_counter;
|
const u64 fence_counter = ++m_current_fence_counter;
|
||||||
m_render_cmdbuf = [[m_queue commandBufferWithUnretainedReferences] retain];
|
m_render_cmdbuf = [[m_queue commandBufferWithUnretainedReferences] retain];
|
||||||
[m_render_cmdbuf addCompletedHandler:[this, fence_counter](id<MTLCommandBuffer>) {
|
[m_render_cmdbuf addCompletedHandler:[this, fence_counter](id<MTLCommandBuffer> buffer) {
|
||||||
CommandBufferCompletedOffThread(fence_counter);
|
CommandBufferCompletedOffThread(buffer, fence_counter);
|
||||||
}];
|
}];
|
||||||
}
|
}
|
||||||
|
|
||||||
CleanupObjects();
|
CleanupObjects();
|
||||||
}
|
}
|
||||||
|
|
||||||
void MetalDevice::CommandBufferCompletedOffThread(u64 fence_counter)
|
void MetalDevice::CommandBufferCompletedOffThread(id<MTLCommandBuffer> buffer, u64 fence_counter)
|
||||||
{
|
{
|
||||||
std::unique_lock lock(m_fence_mutex);
|
std::unique_lock lock(m_fence_mutex);
|
||||||
m_completed_fence_counter.store(std::max(m_completed_fence_counter.load(std::memory_order_acquire), fence_counter),
|
m_completed_fence_counter.store(std::max(m_completed_fence_counter.load(std::memory_order_acquire), fence_counter),
|
||||||
std::memory_order_release);
|
std::memory_order_release);
|
||||||
|
|
||||||
|
if (m_gpu_timing_enabled)
|
||||||
|
{
|
||||||
|
const double begin = std::max(m_last_gpu_time_end, [buffer GPUStartTime]);
|
||||||
|
const double end = [buffer GPUEndTime];
|
||||||
|
if (end > begin)
|
||||||
|
{
|
||||||
|
m_accumulated_gpu_time += end - begin;
|
||||||
|
m_last_gpu_time_end = end;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void MetalDevice::SubmitCommandBuffer(bool wait_for_completion)
|
void MetalDevice::SubmitCommandBuffer(bool wait_for_completion)
|
||||||
|
|
Loading…
Reference in a new issue