D3D11Device: Add MAP_DISCARD fallback path for uniform streaming

This commit is contained in:
Stenzek 2024-03-28 13:30:50 +10:00
parent de1e5b24fb
commit fe35542c76
No known key found for this signature in database
8 changed files with 146 additions and 46 deletions

View file

@ -79,6 +79,7 @@ bool D3D11Device::CreateDevice(const std::string_view& adapter, bool threaded_pr
return false;
ComPtr<IDXGIAdapter1> dxgi_adapter = D3DCommon::GetAdapterByName(m_dxgi_factory.Get(), adapter);
m_max_feature_level = D3DCommon::GetDeviceMaxFeatureLevel(dxgi_adapter.Get());
static constexpr std::array<D3D_FEATURE_LEVEL, 3> requested_feature_levels = {
{D3D_FEATURE_LEVEL_11_0, D3D_FEATURE_LEVEL_10_1, D3D_FEATURE_LEVEL_10_0}};
@ -128,6 +129,7 @@ bool D3D11Device::CreateDevice(const std::string_view& adapter, bool threaded_pr
Log_InfoPrintf("D3D Adapter: %s", D3DCommon::GetAdapterName(dxgi_adapter.Get()).c_str());
else
Log_ErrorPrint("Failed to obtain D3D adapter name.");
Log_InfoFmt("Max device feature level: {}", D3DCommon::GetFeatureLevelString(m_max_feature_level));
BOOL allow_tearing_supported = false;
hr = m_dxgi_factory->CheckFeatureSupport(DXGI_FEATURE_PRESENT_ALLOW_TEARING, &allow_tearing_supported,
@ -233,8 +235,7 @@ bool D3D11Device::CreateSwapChain()
swap_chain_desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
swap_chain_desc.SwapEffect = m_using_flip_model_swap_chain ? DXGI_SWAP_EFFECT_FLIP_DISCARD : DXGI_SWAP_EFFECT_DISCARD;
m_using_allow_tearing =
(m_allow_tearing_supported && m_using_flip_model_swap_chain && !m_is_exclusive_fullscreen);
m_using_allow_tearing = (m_allow_tearing_supported && m_using_flip_model_swap_chain && !m_is_exclusive_fullscreen);
if (m_using_allow_tearing)
swap_chain_desc.Flags |= DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING;
@ -456,9 +457,9 @@ std::string D3D11Device::GetDriverInfo() const
bool D3D11Device::CreateBuffers()
{
if (!m_vertex_buffer.Create(m_device.Get(), D3D11_BIND_VERTEX_BUFFER, VERTEX_BUFFER_SIZE) ||
!m_index_buffer.Create(m_device.Get(), D3D11_BIND_INDEX_BUFFER, INDEX_BUFFER_SIZE) ||
!m_uniform_buffer.Create(m_device.Get(), D3D11_BIND_CONSTANT_BUFFER, UNIFORM_BUFFER_SIZE))
if (!m_vertex_buffer.Create(D3D11_BIND_VERTEX_BUFFER, VERTEX_BUFFER_SIZE, VERTEX_BUFFER_SIZE) ||
!m_index_buffer.Create(D3D11_BIND_INDEX_BUFFER, INDEX_BUFFER_SIZE, INDEX_BUFFER_SIZE) ||
!m_uniform_buffer.Create(D3D11_BIND_CONSTANT_BUFFER, MIN_UNIFORM_BUFFER_SIZE, MAX_UNIFORM_BUFFER_SIZE))
{
Log_ErrorPrintf("Failed to create vertex/index/uniform buffers.");
return false;
@ -877,36 +878,61 @@ void D3D11Device::UnmapIndexBuffer(u32 used_index_count)
void D3D11Device::PushUniformBuffer(const void* data, u32 data_size)
{
const u32 used_space = Common::AlignUpPow2(data_size, UNIFORM_BUFFER_ALIGNMENT);
const auto res = m_uniform_buffer.Map(m_context.Get(), UNIFORM_BUFFER_ALIGNMENT, used_space);
const u32 req_align =
m_uniform_buffer.IsUsingMapNoOverwrite() ? UNIFORM_BUFFER_ALIGNMENT : UNIFORM_BUFFER_ALIGNMENT_DISCARD;
const u32 req_size = Common::AlignUpPow2(data_size, req_align);
const auto res = m_uniform_buffer.Map(m_context.Get(), req_align, req_size);
std::memcpy(res.pointer, data, data_size);
m_uniform_buffer.Unmap(m_context.Get(), data_size);
m_uniform_buffer.Unmap(m_context.Get(), req_size);
s_stats.buffer_streamed += data_size;
const UINT first_constant = (res.index_aligned * UNIFORM_BUFFER_ALIGNMENT) / 16u;
const UINT num_constants = (used_space * UNIFORM_BUFFER_ALIGNMENT) / 16u;
m_context->VSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
m_context->PSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
if (m_uniform_buffer.IsUsingMapNoOverwrite())
{
const UINT first_constant = (res.index_aligned * UNIFORM_BUFFER_ALIGNMENT) / 16u;
const UINT num_constants = req_size / 16u;
m_context->VSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
m_context->PSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
}
else
{
DebugAssert(res.index_aligned == 0);
m_context->VSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
m_context->PSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
}
}
void* D3D11Device::MapUniformBuffer(u32 size)
{
const u32 used_space = Common::AlignUpPow2(size, UNIFORM_BUFFER_ALIGNMENT);
const auto res = m_uniform_buffer.Map(m_context.Get(), UNIFORM_BUFFER_ALIGNMENT, used_space);
const u32 req_align =
m_uniform_buffer.IsUsingMapNoOverwrite() ? UNIFORM_BUFFER_ALIGNMENT : UNIFORM_BUFFER_ALIGNMENT_DISCARD;
const u32 req_size = Common::AlignUpPow2(size, req_align);
const auto res = m_uniform_buffer.Map(m_context.Get(), req_align, req_size);
return res.pointer;
}
void D3D11Device::UnmapUniformBuffer(u32 size)
{
const u32 used_space = Common::AlignUpPow2(size, UNIFORM_BUFFER_ALIGNMENT);
const UINT first_constant = m_uniform_buffer.GetPosition() / 16u;
const UINT num_constants = used_space / 16u;
const u32 pos = m_uniform_buffer.GetPosition();
const u32 req_align =
m_uniform_buffer.IsUsingMapNoOverwrite() ? UNIFORM_BUFFER_ALIGNMENT : UNIFORM_BUFFER_ALIGNMENT_DISCARD;
const u32 req_size = Common::AlignUpPow2(size, req_align);
m_uniform_buffer.Unmap(m_context.Get(), used_space);
m_uniform_buffer.Unmap(m_context.Get(), req_size);
s_stats.buffer_streamed += size;
m_context->VSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
m_context->PSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
if (m_uniform_buffer.IsUsingMapNoOverwrite())
{
const UINT first_constant = pos / 16u;
const UINT num_constants = req_size / 16u;
m_context->VSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
m_context->PSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
}
else
{
DebugAssert(pos == 0);
m_context->VSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
m_context->PSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
}
}
void D3D11Device::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds)

View file

@ -34,6 +34,7 @@ public:
ALWAYS_INLINE static D3D11Device& GetInstance() { return *static_cast<D3D11Device*>(g_gpu_device.get()); }
ALWAYS_INLINE static ID3D11Device* GetD3DDevice() { return GetInstance().m_device.Get(); }
ALWAYS_INLINE static ID3D11DeviceContext1* GetD3DContext() { return GetInstance().m_context.Get(); }
ALWAYS_INLINE static D3D_FEATURE_LEVEL GetMaxFeatureLevel() { return GetInstance().m_max_feature_level; }
RenderAPI GetRenderAPI() const override;
@ -121,8 +122,10 @@ private:
static constexpr u32 VERTEX_BUFFER_SIZE = 8 * 1024 * 1024;
static constexpr u32 INDEX_BUFFER_SIZE = 4 * 1024 * 1024;
static constexpr u32 UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024;
static constexpr u32 MAX_UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024;
static constexpr u32 MIN_UNIFORM_BUFFER_SIZE = 16;
static constexpr u32 UNIFORM_BUFFER_ALIGNMENT = 256;
static constexpr u32 UNIFORM_BUFFER_ALIGNMENT_DISCARD = 16;
static constexpr u8 NUM_TIMESTAMP_QUERIES = 3;
static void GetAdapterAndModeList(AdapterAndModeList* ret, IDXGIFactory5* factory);
@ -161,6 +164,7 @@ private:
BlendStateMap m_blend_states;
InputLayoutMap m_input_layouts;
D3D_FEATURE_LEVEL m_max_feature_level = D3D_FEATURE_LEVEL_10_0;
bool m_allow_tearing_supported = false;
bool m_using_flip_model_swap_chain = true;
bool m_using_allow_tearing = false;

View file

@ -1,7 +1,8 @@
// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com>
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#include "d3d11_stream_buffer.h"
#include "d3d11_device.h"
#include "common/align.h"
#include "common/assert.h"
@ -25,27 +26,27 @@ D3D11StreamBuffer::~D3D11StreamBuffer()
Destroy();
}
bool D3D11StreamBuffer::Create(ID3D11Device* device, D3D11_BIND_FLAG bind_flags, u32 size)
bool D3D11StreamBuffer::Create(D3D11_BIND_FLAG bind_flags, u32 min_size, u32 max_size)
{
CD3D11_BUFFER_DESC desc(size, bind_flags, D3D11_USAGE_DYNAMIC, D3D11_CPU_ACCESS_WRITE, 0, 0);
ComPtr<ID3D11Buffer> buffer;
HRESULT hr = device->CreateBuffer(&desc, nullptr, &buffer);
if (FAILED(hr))
{
Log_ErrorPrintf("Creating buffer failed: 0x%08X", hr);
return false;
}
m_buffer = std::move(buffer);
m_size = size;
m_position = 0;
D3D11_FEATURE_DATA_D3D11_OPTIONS options = {};
hr = device->CheckFeatureSupport(D3D11_FEATURE_D3D11_OPTIONS, &options, sizeof(options));
HRESULT hr = D3D11Device::GetD3DDevice()->CheckFeatureSupport(D3D11_FEATURE_D3D11_OPTIONS, &options, sizeof(options));
if (SUCCEEDED(hr))
{
if (bind_flags & D3D11_BIND_CONSTANT_BUFFER)
{
// Older Intel drivers go absolutely bananas with CPU usage when using offset constant buffers.
// NVIDIA seems to be okay, I don't know about AMD. So let's be safe and limit it to feature level 12+.
m_use_map_no_overwrite = options.MapNoOverwriteOnDynamicConstantBuffer;
if (m_use_map_no_overwrite && D3D11Device::GetMaxFeatureLevel() < D3D_FEATURE_LEVEL_12_0)
{
Log_WarningPrint("Ignoring MapNoOverwriteOnDynamicConstantBuffer on driver due to feature level.");
m_use_map_no_overwrite = false;
}
// should be 16 byte aligned
min_size = Common::AlignUpPow2(min_size, 16);
max_size = Common::AlignUpPow2(max_size, 16);
}
else if (bind_flags & D3D11_BIND_SHADER_RESOURCE)
m_use_map_no_overwrite = options.MapNoOverwriteOnDynamicBufferSRV;
else
@ -64,6 +65,21 @@ bool D3D11StreamBuffer::Create(ID3D11Device* device, D3D11_BIND_FLAG bind_flags,
m_use_map_no_overwrite = false;
}
const u32 create_size = m_use_map_no_overwrite ? max_size : min_size;
const CD3D11_BUFFER_DESC desc(create_size, bind_flags, D3D11_USAGE_DYNAMIC, D3D11_CPU_ACCESS_WRITE, 0, 0);
ComPtr<ID3D11Buffer> buffer;
hr = D3D11Device::GetD3DDevice()->CreateBuffer(&desc, nullptr, &buffer);
if (FAILED(hr))
{
Log_ErrorPrintf("Creating buffer failed: 0x%08X", hr);
return false;
}
m_buffer = std::move(buffer);
m_size = create_size;
m_max_size = max_size;
m_position = 0;
return true;
}
@ -74,6 +90,7 @@ void D3D11StreamBuffer::Destroy()
D3D11StreamBuffer::MappingResult D3D11StreamBuffer::Map(ID3D11DeviceContext1* context, u32 alignment, u32 min_size)
{
HRESULT hr;
DebugAssert(!m_mapped);
m_position = Common::AlignUp(m_position, alignment);
@ -81,11 +98,33 @@ D3D11StreamBuffer::MappingResult D3D11StreamBuffer::Map(ID3D11DeviceContext1* co
{
// wrap around
m_position = 0;
// grow buffer if needed
if (min_size > m_size) [[unlikely]]
{
Assert(min_size < m_max_size);
const u32 new_size = std::min(m_max_size, Common::AlignUp(std::max(m_size * 2, min_size), alignment));
Log_WarningFmt("Growing buffer from {} bytes to {} bytes", m_size, new_size);
D3D11_BUFFER_DESC new_desc;
m_buffer->GetDesc(&new_desc);
new_desc.ByteWidth = new_size;
hr = D3D11Device::GetD3DDevice()->CreateBuffer(&new_desc, nullptr, m_buffer.ReleaseAndGetAddressOf());
if (FAILED(hr))
{
Log_ErrorFmt("Creating buffer failed: 0x{:08X}", static_cast<unsigned>(hr));
Panic("Failed to grow buffer");
}
m_size = new_size;
}
}
D3D11_MAPPED_SUBRESOURCE sr;
const D3D11_MAP map_type = (m_position == 0) ? D3D11_MAP_WRITE_DISCARD : D3D11_MAP_WRITE_NO_OVERWRITE;
const HRESULT hr = context->Map(m_buffer.Get(), 0, map_type, 0, &sr);
hr = context->Map(m_buffer.Get(), 0, map_type, 0, &sr);
if (FAILED(hr))
{
Log_ErrorPrintf("Map failed: 0x%08X (alignment %u, minsize %u, size %u, position %u, map type %u)", hr, alignment,

View file

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com>
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#pragma once
@ -24,8 +24,9 @@ public:
ALWAYS_INLINE u32 GetSize() const { return m_size; }
ALWAYS_INLINE u32 GetPosition() const { return m_position; }
ALWAYS_INLINE bool IsMapped() const { return m_mapped; }
ALWAYS_INLINE bool IsUsingMapNoOverwrite() const { return m_use_map_no_overwrite; }
bool Create(ID3D11Device* device, D3D11_BIND_FLAG bind_flags, u32 size);
bool Create(D3D11_BIND_FLAG bind_flags, u32 min_size, u32 max_size);
void Destroy();
struct MappingResult
@ -42,6 +43,7 @@ public:
private:
ComPtr<ID3D11Buffer> m_buffer;
u32 m_size;
u32 m_max_size;
u32 m_position;
bool m_use_map_no_overwrite = false;
bool m_mapped = false;

View file

@ -337,9 +337,10 @@ D3D11TextureBuffer::D3D11TextureBuffer(Format format, u32 size_in_elements) : GP
D3D11TextureBuffer::~D3D11TextureBuffer() = default;
bool D3D11TextureBuffer::CreateBuffer(ID3D11Device* device)
bool D3D11TextureBuffer::CreateBuffer()
{
if (!m_buffer.Create(device, D3D11_BIND_SHADER_RESOURCE, GetSizeInBytes()))
const u32 size_in_bytes = GetSizeInBytes();
if (!m_buffer.Create(D3D11_BIND_SHADER_RESOURCE, size_in_bytes, size_in_bytes))
return false;
static constexpr std::array<DXGI_FORMAT, static_cast<u32>(Format::MaxCount)> dxgi_formats = {{
@ -348,7 +349,8 @@ bool D3D11TextureBuffer::CreateBuffer(ID3D11Device* device)
CD3D11_SHADER_RESOURCE_VIEW_DESC srv_desc(m_buffer.GetD3DBuffer(), dxgi_formats[static_cast<u32>(m_format)], 0,
m_size_in_elements);
const HRESULT hr = device->CreateShaderResourceView(m_buffer.GetD3DBuffer(), &srv_desc, m_srv.GetAddressOf());
const HRESULT hr =
D3D11Device::GetD3DDevice()->CreateShaderResourceView(m_buffer.GetD3DBuffer(), &srv_desc, m_srv.GetAddressOf());
if (FAILED(hr))
{
Log_ErrorPrintf("CreateShaderResourceView() failed: %08X", hr);
@ -383,7 +385,7 @@ std::unique_ptr<GPUTextureBuffer> D3D11Device::CreateTextureBuffer(GPUTextureBuf
u32 size_in_elements)
{
std::unique_ptr<D3D11TextureBuffer> tb = std::make_unique<D3D11TextureBuffer>(format, size_in_elements);
if (!tb->CreateBuffer(m_device.Get()))
if (!tb->CreateBuffer())
tb.reset();
return tb;

View file

@ -107,7 +107,7 @@ public:
ALWAYS_INLINE ID3D11ShaderResourceView* GetSRV() const { return m_srv.Get(); }
ALWAYS_INLINE ID3D11ShaderResourceView* const* GetSRVArray() const { return m_srv.GetAddressOf(); }
bool CreateBuffer(ID3D11Device* device);
bool CreateBuffer();
// Inherited via GPUTextureBuffer
void* Map(u32 required_elements) override;

View file

@ -12,6 +12,7 @@
#include "fmt/format.h"
#include <d3d11.h>
#include <d3dcompiler.h>
#include <dxgi1_5.h>
@ -21,11 +22,18 @@ static unsigned s_next_bad_shader_id = 1;
const char* D3DCommon::GetFeatureLevelString(D3D_FEATURE_LEVEL feature_level)
{
static constexpr std::array<std::tuple<D3D_FEATURE_LEVEL, const char*>, 4> feature_level_names = {{
static constexpr std::array<std::tuple<D3D_FEATURE_LEVEL, const char*>, 11> feature_level_names = {{
{D3D_FEATURE_LEVEL_1_0_CORE, "D3D_FEATURE_LEVEL_1_0_CORE"},
{D3D_FEATURE_LEVEL_9_1, "D3D_FEATURE_LEVEL_9_1"},
{D3D_FEATURE_LEVEL_9_2, "D3D_FEATURE_LEVEL_9_2"},
{D3D_FEATURE_LEVEL_9_3, "D3D_FEATURE_LEVEL_9_3"},
{D3D_FEATURE_LEVEL_10_0, "D3D_FEATURE_LEVEL_10_0"},
{D3D_FEATURE_LEVEL_10_1, "D3D_FEATURE_LEVEL_10_1"},
{D3D_FEATURE_LEVEL_11_0, "D3D_FEATURE_LEVEL_11_0"},
{D3D_FEATURE_LEVEL_11_1, "D3D_FEATURE_LEVEL_11_1"},
{D3D_FEATURE_LEVEL_12_0, "D3D_FEATURE_LEVEL_12_0"},
{D3D_FEATURE_LEVEL_12_1, "D3D_FEATURE_LEVEL_12_1"},
{D3D_FEATURE_LEVEL_12_2, "D3D_FEATURE_LEVEL_12_2"},
}};
for (const auto& [fl, name] : feature_level_names)
@ -55,6 +63,22 @@ const char* D3DCommon::GetFeatureLevelShaderModelString(D3D_FEATURE_LEVEL featur
return "unk";
}
D3D_FEATURE_LEVEL D3DCommon::GetDeviceMaxFeatureLevel(IDXGIAdapter1* adapter)
{
static constexpr std::array requested_feature_levels = {
D3D_FEATURE_LEVEL_12_2, D3D_FEATURE_LEVEL_12_1, D3D_FEATURE_LEVEL_12_0, D3D_FEATURE_LEVEL_11_1,
D3D_FEATURE_LEVEL_11_0, D3D_FEATURE_LEVEL_10_1, D3D_FEATURE_LEVEL_10_0};
D3D_FEATURE_LEVEL max_supported_level = requested_feature_levels.back();
HRESULT hr = D3D11CreateDevice(adapter, adapter ? D3D_DRIVER_TYPE_UNKNOWN : D3D_DRIVER_TYPE_HARDWARE, nullptr, 0,
requested_feature_levels.data(), static_cast<UINT>(requested_feature_levels.size()),
D3D11_SDK_VERSION, nullptr, &max_supported_level, nullptr);
if (FAILED(hr))
Log_WarningFmt("D3D11CreateDevice() for getting max feature level failed: 0x{:08X}", static_cast<unsigned>(hr));
return max_supported_level;
}
Microsoft::WRL::ComPtr<IDXGIFactory5> D3DCommon::CreateFactory(bool debug, Error* error)
{
UINT flags = 0;

View file

@ -28,6 +28,9 @@ namespace D3DCommon {
const char* GetFeatureLevelString(D3D_FEATURE_LEVEL feature_level);
const char* GetFeatureLevelShaderModelString(D3D_FEATURE_LEVEL feature_level);
// returns max feature level of a device
D3D_FEATURE_LEVEL GetDeviceMaxFeatureLevel(IDXGIAdapter1* adapter);
// create a dxgi factory
Microsoft::WRL::ComPtr<IDXGIFactory5> CreateFactory(bool debug, Error* error);