2021-07-10 11:37:08 +00:00
|
|
|
// Copyright 2019 Dolphin Emulator Project
|
|
|
|
// Licensed under GPLv2+
|
|
|
|
// Refer to the license.txt file included.
|
|
|
|
|
|
|
|
#include "stream_buffer.h"
|
|
|
|
#include "../align.h"
|
|
|
|
#include "../assert.h"
|
|
|
|
#include "../log.h"
|
|
|
|
#include "context.h"
|
|
|
|
#include <algorithm>
|
|
|
|
#include <functional>
|
|
|
|
Log_SetChannel(D3D12::StreamBuffer);
|
|
|
|
|
|
|
|
namespace D3D12 {
|
|
|
|
StreamBuffer::StreamBuffer() = default;
|
|
|
|
|
|
|
|
StreamBuffer::~StreamBuffer()
|
|
|
|
{
|
|
|
|
Destroy();
|
|
|
|
}
|
|
|
|
|
|
|
|
bool StreamBuffer::Create(u32 size)
|
|
|
|
{
|
|
|
|
static const D3D12_HEAP_PROPERTIES heap_properties = {D3D12_HEAP_TYPE_UPLOAD};
|
|
|
|
const D3D12_RESOURCE_DESC resource_desc = {
|
|
|
|
D3D12_RESOURCE_DIMENSION_BUFFER, 0, size, 1, 1, 1, DXGI_FORMAT_UNKNOWN, {1, 0}, D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
|
|
|
|
D3D12_RESOURCE_FLAG_NONE};
|
|
|
|
|
|
|
|
Microsoft::WRL::ComPtr<ID3D12Resource> buffer;
|
|
|
|
|
|
|
|
HRESULT hr = g_d3d12_context->GetDevice()->CreateCommittedResource(&heap_properties, D3D12_HEAP_FLAG_NONE,
|
|
|
|
&resource_desc, D3D12_RESOURCE_STATE_GENERIC_READ,
|
|
|
|
nullptr, IID_PPV_ARGS(buffer.GetAddressOf()));
|
|
|
|
AssertMsg(SUCCEEDED(hr), "Allocate buffer");
|
|
|
|
if (FAILED(hr))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
static const D3D12_RANGE read_range = {};
|
|
|
|
u8* host_pointer;
|
|
|
|
hr = buffer->Map(0, &read_range, reinterpret_cast<void**>(&host_pointer));
|
|
|
|
AssertMsg(SUCCEEDED(hr), "Map buffer");
|
|
|
|
if (FAILED(hr))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
Destroy(true);
|
|
|
|
|
|
|
|
m_buffer = std::move(buffer);
|
|
|
|
m_host_pointer = host_pointer;
|
|
|
|
m_size = size;
|
|
|
|
m_gpu_pointer = m_buffer->GetGPUVirtualAddress();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool StreamBuffer::ReserveMemory(u32 num_bytes, u32 alignment)
|
|
|
|
{
|
|
|
|
const u32 required_bytes = num_bytes + alignment;
|
|
|
|
|
|
|
|
// Check for sane allocations
|
2021-07-11 03:32:17 +00:00
|
|
|
if (num_bytes > m_size)
|
2021-07-10 11:37:08 +00:00
|
|
|
{
|
|
|
|
Log_ErrorPrintf("Attempting to allocate %u bytes from a %u byte stream buffer", static_cast<u32>(num_bytes),
|
|
|
|
static_cast<u32>(m_size));
|
|
|
|
Panic("Stream buffer overflow");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Is the GPU behind or up to date with our current offset?
|
|
|
|
UpdateCurrentFencePosition();
|
|
|
|
if (m_current_offset >= m_current_gpu_position)
|
|
|
|
{
|
2021-07-11 03:32:17 +00:00
|
|
|
const u32 aligned_required_bytes = (m_current_offset > 0) ? required_bytes : num_bytes;
|
2021-07-10 11:37:08 +00:00
|
|
|
const u32 remaining_bytes = m_size - m_current_offset;
|
2021-07-11 03:32:17 +00:00
|
|
|
if (aligned_required_bytes <= remaining_bytes)
|
2021-07-10 11:37:08 +00:00
|
|
|
{
|
|
|
|
// Place at the current position, after the GPU position.
|
|
|
|
m_current_offset = Common::AlignUp(m_current_offset, alignment);
|
|
|
|
m_current_space = m_size - m_current_offset;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check for space at the start of the buffer
|
|
|
|
// We use < here because we don't want to have the case of m_current_offset ==
|
|
|
|
// m_current_gpu_position. That would mean the code above would assume the
|
|
|
|
// GPU has caught up to us, which it hasn't.
|
|
|
|
if (required_bytes < m_current_gpu_position)
|
|
|
|
{
|
|
|
|
// Reset offset to zero, since we're allocating behind the gpu now
|
|
|
|
m_current_offset = 0;
|
|
|
|
m_current_space = m_current_gpu_position;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Is the GPU ahead of our current offset?
|
|
|
|
if (m_current_offset < m_current_gpu_position)
|
|
|
|
{
|
|
|
|
// We have from m_current_offset..m_current_gpu_position space to use.
|
|
|
|
const u32 remaining_bytes = m_current_gpu_position - m_current_offset;
|
|
|
|
if (required_bytes < remaining_bytes)
|
|
|
|
{
|
|
|
|
// Place at the current position, since this is still behind the GPU.
|
|
|
|
m_current_offset = Common::AlignUp(m_current_offset, alignment);
|
|
|
|
m_current_space = m_current_gpu_position - m_current_offset;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Can we find a fence to wait on that will give us enough memory?
|
|
|
|
if (WaitForClearSpace(required_bytes))
|
|
|
|
{
|
|
|
|
const u32 align_diff = Common::AlignUp(m_current_offset, alignment) - m_current_offset;
|
|
|
|
m_current_offset += align_diff;
|
|
|
|
m_current_space -= align_diff;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// We tried everything we could, and still couldn't get anything. This means that too much space
|
|
|
|
// in the buffer is being used by the command buffer currently being recorded. Therefore, the
|
|
|
|
// only option is to execute it, and wait until it's done.
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
void StreamBuffer::CommitMemory(u32 final_num_bytes)
|
|
|
|
{
|
|
|
|
Assert((m_current_offset + final_num_bytes) <= m_size);
|
|
|
|
Assert(final_num_bytes <= m_current_space);
|
|
|
|
m_current_offset += final_num_bytes;
|
|
|
|
m_current_space -= final_num_bytes;
|
|
|
|
}
|
|
|
|
|
|
|
|
void StreamBuffer::Destroy(bool defer)
|
|
|
|
{
|
|
|
|
if (m_host_pointer)
|
|
|
|
{
|
|
|
|
const D3D12_RANGE written_range = {0, m_size};
|
|
|
|
m_buffer->Unmap(0, &written_range);
|
|
|
|
m_host_pointer = nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (m_buffer && defer)
|
|
|
|
g_d3d12_context->DeferResourceDestruction(m_buffer.Get());
|
|
|
|
m_buffer.Reset();
|
|
|
|
|
|
|
|
m_current_offset = 0;
|
|
|
|
m_current_space = 0;
|
|
|
|
m_current_gpu_position = 0;
|
|
|
|
m_tracked_fences.clear();
|
|
|
|
}
|
|
|
|
|
|
|
|
void StreamBuffer::UpdateCurrentFencePosition()
|
|
|
|
{
|
|
|
|
// Don't create a tracking entry if the GPU is caught up with the buffer.
|
|
|
|
if (m_current_offset == m_current_gpu_position)
|
|
|
|
return;
|
|
|
|
|
|
|
|
// Has the offset changed since the last fence?
|
|
|
|
const u64 fence = g_d3d12_context->GetCurrentFenceValue();
|
|
|
|
if (!m_tracked_fences.empty() && m_tracked_fences.back().first == fence)
|
|
|
|
{
|
|
|
|
// Still haven't executed a command buffer, so just update the offset.
|
|
|
|
m_tracked_fences.back().second = m_current_offset;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
UpdateGPUPosition();
|
|
|
|
m_tracked_fences.emplace_back(fence, m_current_offset);
|
|
|
|
}
|
|
|
|
|
|
|
|
void StreamBuffer::UpdateGPUPosition()
|
|
|
|
{
|
|
|
|
auto start = m_tracked_fences.begin();
|
|
|
|
auto end = start;
|
|
|
|
|
|
|
|
const u64 completed_counter = g_d3d12_context->GetCompletedFenceValue();
|
|
|
|
while (end != m_tracked_fences.end() && completed_counter >= end->first)
|
|
|
|
{
|
|
|
|
m_current_gpu_position = end->second;
|
|
|
|
++end;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (start != end)
|
|
|
|
m_tracked_fences.erase(start, end);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool StreamBuffer::WaitForClearSpace(u32 num_bytes)
|
|
|
|
{
|
|
|
|
u32 new_offset = 0;
|
|
|
|
u32 new_space = 0;
|
|
|
|
u32 new_gpu_position = 0;
|
|
|
|
|
|
|
|
auto iter = m_tracked_fences.begin();
|
|
|
|
for (; iter != m_tracked_fences.end(); ++iter)
|
|
|
|
{
|
|
|
|
// Would this fence bring us in line with the GPU?
|
|
|
|
// This is the "last resort" case, where a command buffer execution has been forced
|
|
|
|
// after no additional data has been written to it, so we can assume that after the
|
|
|
|
// fence has been signaled the entire buffer is now consumed.
|
|
|
|
u32 gpu_position = iter->second;
|
|
|
|
if (m_current_offset == gpu_position)
|
|
|
|
{
|
|
|
|
new_offset = 0;
|
|
|
|
new_space = m_size;
|
|
|
|
new_gpu_position = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Assuming that we wait for this fence, are we allocating in front of the GPU?
|
|
|
|
if (m_current_offset > gpu_position)
|
|
|
|
{
|
|
|
|
// This would suggest the GPU has now followed us and wrapped around, so we have from
|
|
|
|
// m_current_position..m_size free, as well as and 0..gpu_position.
|
|
|
|
const u32 remaining_space_after_offset = m_size - m_current_offset;
|
|
|
|
if (remaining_space_after_offset >= num_bytes)
|
|
|
|
{
|
|
|
|
// Switch to allocating in front of the GPU, using the remainder of the buffer.
|
|
|
|
new_offset = m_current_offset;
|
|
|
|
new_space = m_size - m_current_offset;
|
|
|
|
new_gpu_position = gpu_position;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// We can wrap around to the start, behind the GPU, if there is enough space.
|
|
|
|
// We use > here because otherwise we'd end up lining up with the GPU, and then the
|
|
|
|
// allocator would assume that the GPU has consumed what we just wrote.
|
|
|
|
if (gpu_position > num_bytes)
|
|
|
|
{
|
|
|
|
new_offset = 0;
|
|
|
|
new_space = gpu_position;
|
|
|
|
new_gpu_position = gpu_position;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
// We're currently allocating behind the GPU. This would give us between the current
|
|
|
|
// offset and the GPU position worth of space to work with. Again, > because we can't
|
|
|
|
// align the GPU position with the buffer offset.
|
|
|
|
u32 available_space_inbetween = gpu_position - m_current_offset;
|
|
|
|
if (available_space_inbetween > num_bytes)
|
|
|
|
{
|
|
|
|
// Leave the offset as-is, but update the GPU position.
|
|
|
|
new_offset = m_current_offset;
|
|
|
|
new_space = gpu_position - m_current_offset;
|
|
|
|
new_gpu_position = gpu_position;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Did any fences satisfy this condition?
|
|
|
|
// Has the command buffer been executed yet? If not, the caller should execute it.
|
|
|
|
if (iter == m_tracked_fences.end() || iter->first == g_d3d12_context->GetCurrentFenceValue())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Wait until this fence is signaled. This will fire the callback, updating the GPU position.
|
|
|
|
g_d3d12_context->WaitForFence(iter->first);
|
|
|
|
m_tracked_fences.erase(m_tracked_fences.begin(), m_current_offset == iter->second ? m_tracked_fences.end() : ++iter);
|
|
|
|
m_current_offset = new_offset;
|
|
|
|
m_current_space = new_space;
|
|
|
|
m_current_gpu_position = new_gpu_position;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace D3D12
|