diff --git a/src/util/d3d11_device.cpp b/src/util/d3d11_device.cpp index b78119af5..1c346558c 100644 --- a/src/util/d3d11_device.cpp +++ b/src/util/d3d11_device.cpp @@ -79,6 +79,7 @@ bool D3D11Device::CreateDevice(const std::string_view& adapter, bool threaded_pr return false; ComPtr dxgi_adapter = D3DCommon::GetAdapterByName(m_dxgi_factory.Get(), adapter); + m_max_feature_level = D3DCommon::GetDeviceMaxFeatureLevel(dxgi_adapter.Get()); static constexpr std::array requested_feature_levels = { {D3D_FEATURE_LEVEL_11_0, D3D_FEATURE_LEVEL_10_1, D3D_FEATURE_LEVEL_10_0}}; @@ -128,6 +129,7 @@ bool D3D11Device::CreateDevice(const std::string_view& adapter, bool threaded_pr Log_InfoPrintf("D3D Adapter: %s", D3DCommon::GetAdapterName(dxgi_adapter.Get()).c_str()); else Log_ErrorPrint("Failed to obtain D3D adapter name."); + Log_InfoFmt("Max device feature level: {}", D3DCommon::GetFeatureLevelString(m_max_feature_level)); BOOL allow_tearing_supported = false; hr = m_dxgi_factory->CheckFeatureSupport(DXGI_FEATURE_PRESENT_ALLOW_TEARING, &allow_tearing_supported, @@ -233,8 +235,7 @@ bool D3D11Device::CreateSwapChain() swap_chain_desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; swap_chain_desc.SwapEffect = m_using_flip_model_swap_chain ? DXGI_SWAP_EFFECT_FLIP_DISCARD : DXGI_SWAP_EFFECT_DISCARD; - m_using_allow_tearing = - (m_allow_tearing_supported && m_using_flip_model_swap_chain && !m_is_exclusive_fullscreen); + m_using_allow_tearing = (m_allow_tearing_supported && m_using_flip_model_swap_chain && !m_is_exclusive_fullscreen); if (m_using_allow_tearing) swap_chain_desc.Flags |= DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING; @@ -456,9 +457,9 @@ std::string D3D11Device::GetDriverInfo() const bool D3D11Device::CreateBuffers() { - if (!m_vertex_buffer.Create(m_device.Get(), D3D11_BIND_VERTEX_BUFFER, VERTEX_BUFFER_SIZE) || - !m_index_buffer.Create(m_device.Get(), D3D11_BIND_INDEX_BUFFER, INDEX_BUFFER_SIZE) || - !m_uniform_buffer.Create(m_device.Get(), D3D11_BIND_CONSTANT_BUFFER, UNIFORM_BUFFER_SIZE)) + if (!m_vertex_buffer.Create(D3D11_BIND_VERTEX_BUFFER, VERTEX_BUFFER_SIZE, VERTEX_BUFFER_SIZE) || + !m_index_buffer.Create(D3D11_BIND_INDEX_BUFFER, INDEX_BUFFER_SIZE, INDEX_BUFFER_SIZE) || + !m_uniform_buffer.Create(D3D11_BIND_CONSTANT_BUFFER, MIN_UNIFORM_BUFFER_SIZE, MAX_UNIFORM_BUFFER_SIZE)) { Log_ErrorPrintf("Failed to create vertex/index/uniform buffers."); return false; @@ -877,36 +878,61 @@ void D3D11Device::UnmapIndexBuffer(u32 used_index_count) void D3D11Device::PushUniformBuffer(const void* data, u32 data_size) { - const u32 used_space = Common::AlignUpPow2(data_size, UNIFORM_BUFFER_ALIGNMENT); - const auto res = m_uniform_buffer.Map(m_context.Get(), UNIFORM_BUFFER_ALIGNMENT, used_space); + const u32 req_align = + m_uniform_buffer.IsUsingMapNoOverwrite() ? UNIFORM_BUFFER_ALIGNMENT : UNIFORM_BUFFER_ALIGNMENT_DISCARD; + const u32 req_size = Common::AlignUpPow2(data_size, req_align); + const auto res = m_uniform_buffer.Map(m_context.Get(), req_align, req_size); std::memcpy(res.pointer, data, data_size); - m_uniform_buffer.Unmap(m_context.Get(), data_size); + m_uniform_buffer.Unmap(m_context.Get(), req_size); s_stats.buffer_streamed += data_size; - const UINT first_constant = (res.index_aligned * UNIFORM_BUFFER_ALIGNMENT) / 16u; - const UINT num_constants = (used_space * UNIFORM_BUFFER_ALIGNMENT) / 16u; - m_context->VSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants); - m_context->PSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants); + if (m_uniform_buffer.IsUsingMapNoOverwrite()) + { + const UINT first_constant = (res.index_aligned * UNIFORM_BUFFER_ALIGNMENT) / 16u; + const UINT num_constants = req_size / 16u; + m_context->VSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants); + m_context->PSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants); + } + else + { + DebugAssert(res.index_aligned == 0); + m_context->VSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray()); + m_context->PSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray()); + } } void* D3D11Device::MapUniformBuffer(u32 size) { - const u32 used_space = Common::AlignUpPow2(size, UNIFORM_BUFFER_ALIGNMENT); - const auto res = m_uniform_buffer.Map(m_context.Get(), UNIFORM_BUFFER_ALIGNMENT, used_space); + const u32 req_align = + m_uniform_buffer.IsUsingMapNoOverwrite() ? UNIFORM_BUFFER_ALIGNMENT : UNIFORM_BUFFER_ALIGNMENT_DISCARD; + const u32 req_size = Common::AlignUpPow2(size, req_align); + const auto res = m_uniform_buffer.Map(m_context.Get(), req_align, req_size); return res.pointer; } void D3D11Device::UnmapUniformBuffer(u32 size) { - const u32 used_space = Common::AlignUpPow2(size, UNIFORM_BUFFER_ALIGNMENT); - const UINT first_constant = m_uniform_buffer.GetPosition() / 16u; - const UINT num_constants = used_space / 16u; + const u32 pos = m_uniform_buffer.GetPosition(); + const u32 req_align = + m_uniform_buffer.IsUsingMapNoOverwrite() ? UNIFORM_BUFFER_ALIGNMENT : UNIFORM_BUFFER_ALIGNMENT_DISCARD; + const u32 req_size = Common::AlignUpPow2(size, req_align); - m_uniform_buffer.Unmap(m_context.Get(), used_space); + m_uniform_buffer.Unmap(m_context.Get(), req_size); s_stats.buffer_streamed += size; - m_context->VSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants); - m_context->PSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants); + if (m_uniform_buffer.IsUsingMapNoOverwrite()) + { + const UINT first_constant = pos / 16u; + const UINT num_constants = req_size / 16u; + m_context->VSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants); + m_context->PSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants); + } + else + { + DebugAssert(pos == 0); + m_context->VSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray()); + m_context->PSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray()); + } } void D3D11Device::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds) diff --git a/src/util/d3d11_device.h b/src/util/d3d11_device.h index 466d957d0..73c9a53bf 100644 --- a/src/util/d3d11_device.h +++ b/src/util/d3d11_device.h @@ -34,6 +34,7 @@ public: ALWAYS_INLINE static D3D11Device& GetInstance() { return *static_cast(g_gpu_device.get()); } ALWAYS_INLINE static ID3D11Device* GetD3DDevice() { return GetInstance().m_device.Get(); } ALWAYS_INLINE static ID3D11DeviceContext1* GetD3DContext() { return GetInstance().m_context.Get(); } + ALWAYS_INLINE static D3D_FEATURE_LEVEL GetMaxFeatureLevel() { return GetInstance().m_max_feature_level; } RenderAPI GetRenderAPI() const override; @@ -121,8 +122,10 @@ private: static constexpr u32 VERTEX_BUFFER_SIZE = 8 * 1024 * 1024; static constexpr u32 INDEX_BUFFER_SIZE = 4 * 1024 * 1024; - static constexpr u32 UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024; + static constexpr u32 MAX_UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024; + static constexpr u32 MIN_UNIFORM_BUFFER_SIZE = 16; static constexpr u32 UNIFORM_BUFFER_ALIGNMENT = 256; + static constexpr u32 UNIFORM_BUFFER_ALIGNMENT_DISCARD = 16; static constexpr u8 NUM_TIMESTAMP_QUERIES = 3; static void GetAdapterAndModeList(AdapterAndModeList* ret, IDXGIFactory5* factory); @@ -161,6 +164,7 @@ private: BlendStateMap m_blend_states; InputLayoutMap m_input_layouts; + D3D_FEATURE_LEVEL m_max_feature_level = D3D_FEATURE_LEVEL_10_0; bool m_allow_tearing_supported = false; bool m_using_flip_model_swap_chain = true; bool m_using_allow_tearing = false; diff --git a/src/util/d3d11_stream_buffer.cpp b/src/util/d3d11_stream_buffer.cpp index 6dfc85c2d..8361b76f0 100644 --- a/src/util/d3d11_stream_buffer.cpp +++ b/src/util/d3d11_stream_buffer.cpp @@ -1,7 +1,8 @@ -// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #include "d3d11_stream_buffer.h" +#include "d3d11_device.h" #include "common/align.h" #include "common/assert.h" @@ -25,27 +26,27 @@ D3D11StreamBuffer::~D3D11StreamBuffer() Destroy(); } -bool D3D11StreamBuffer::Create(ID3D11Device* device, D3D11_BIND_FLAG bind_flags, u32 size) +bool D3D11StreamBuffer::Create(D3D11_BIND_FLAG bind_flags, u32 min_size, u32 max_size) { - CD3D11_BUFFER_DESC desc(size, bind_flags, D3D11_USAGE_DYNAMIC, D3D11_CPU_ACCESS_WRITE, 0, 0); - ComPtr buffer; - HRESULT hr = device->CreateBuffer(&desc, nullptr, &buffer); - if (FAILED(hr)) - { - Log_ErrorPrintf("Creating buffer failed: 0x%08X", hr); - return false; - } - - m_buffer = std::move(buffer); - m_size = size; - m_position = 0; - D3D11_FEATURE_DATA_D3D11_OPTIONS options = {}; - hr = device->CheckFeatureSupport(D3D11_FEATURE_D3D11_OPTIONS, &options, sizeof(options)); + HRESULT hr = D3D11Device::GetD3DDevice()->CheckFeatureSupport(D3D11_FEATURE_D3D11_OPTIONS, &options, sizeof(options)); if (SUCCEEDED(hr)) { if (bind_flags & D3D11_BIND_CONSTANT_BUFFER) + { + // Older Intel drivers go absolutely bananas with CPU usage when using offset constant buffers. + // NVIDIA seems to be okay, I don't know about AMD. So let's be safe and limit it to feature level 12+. m_use_map_no_overwrite = options.MapNoOverwriteOnDynamicConstantBuffer; + if (m_use_map_no_overwrite && D3D11Device::GetMaxFeatureLevel() < D3D_FEATURE_LEVEL_12_0) + { + Log_WarningPrint("Ignoring MapNoOverwriteOnDynamicConstantBuffer on driver due to feature level."); + m_use_map_no_overwrite = false; + } + + // should be 16 byte aligned + min_size = Common::AlignUpPow2(min_size, 16); + max_size = Common::AlignUpPow2(max_size, 16); + } else if (bind_flags & D3D11_BIND_SHADER_RESOURCE) m_use_map_no_overwrite = options.MapNoOverwriteOnDynamicBufferSRV; else @@ -64,6 +65,21 @@ bool D3D11StreamBuffer::Create(ID3D11Device* device, D3D11_BIND_FLAG bind_flags, m_use_map_no_overwrite = false; } + const u32 create_size = m_use_map_no_overwrite ? max_size : min_size; + const CD3D11_BUFFER_DESC desc(create_size, bind_flags, D3D11_USAGE_DYNAMIC, D3D11_CPU_ACCESS_WRITE, 0, 0); + ComPtr buffer; + hr = D3D11Device::GetD3DDevice()->CreateBuffer(&desc, nullptr, &buffer); + if (FAILED(hr)) + { + Log_ErrorPrintf("Creating buffer failed: 0x%08X", hr); + return false; + } + + m_buffer = std::move(buffer); + m_size = create_size; + m_max_size = max_size; + m_position = 0; + return true; } @@ -74,6 +90,7 @@ void D3D11StreamBuffer::Destroy() D3D11StreamBuffer::MappingResult D3D11StreamBuffer::Map(ID3D11DeviceContext1* context, u32 alignment, u32 min_size) { + HRESULT hr; DebugAssert(!m_mapped); m_position = Common::AlignUp(m_position, alignment); @@ -81,11 +98,33 @@ D3D11StreamBuffer::MappingResult D3D11StreamBuffer::Map(ID3D11DeviceContext1* co { // wrap around m_position = 0; + + // grow buffer if needed + if (min_size > m_size) [[unlikely]] + { + Assert(min_size < m_max_size); + + const u32 new_size = std::min(m_max_size, Common::AlignUp(std::max(m_size * 2, min_size), alignment)); + Log_WarningFmt("Growing buffer from {} bytes to {} bytes", m_size, new_size); + + D3D11_BUFFER_DESC new_desc; + m_buffer->GetDesc(&new_desc); + new_desc.ByteWidth = new_size; + + hr = D3D11Device::GetD3DDevice()->CreateBuffer(&new_desc, nullptr, m_buffer.ReleaseAndGetAddressOf()); + if (FAILED(hr)) + { + Log_ErrorFmt("Creating buffer failed: 0x{:08X}", static_cast(hr)); + Panic("Failed to grow buffer"); + } + + m_size = new_size; + } } D3D11_MAPPED_SUBRESOURCE sr; const D3D11_MAP map_type = (m_position == 0) ? D3D11_MAP_WRITE_DISCARD : D3D11_MAP_WRITE_NO_OVERWRITE; - const HRESULT hr = context->Map(m_buffer.Get(), 0, map_type, 0, &sr); + hr = context->Map(m_buffer.Get(), 0, map_type, 0, &sr); if (FAILED(hr)) { Log_ErrorPrintf("Map failed: 0x%08X (alignment %u, minsize %u, size %u, position %u, map type %u)", hr, alignment, diff --git a/src/util/d3d11_stream_buffer.h b/src/util/d3d11_stream_buffer.h index b21a34022..7b5c7263d 100644 --- a/src/util/d3d11_stream_buffer.h +++ b/src/util/d3d11_stream_buffer.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #pragma once @@ -24,8 +24,9 @@ public: ALWAYS_INLINE u32 GetSize() const { return m_size; } ALWAYS_INLINE u32 GetPosition() const { return m_position; } ALWAYS_INLINE bool IsMapped() const { return m_mapped; } + ALWAYS_INLINE bool IsUsingMapNoOverwrite() const { return m_use_map_no_overwrite; } - bool Create(ID3D11Device* device, D3D11_BIND_FLAG bind_flags, u32 size); + bool Create(D3D11_BIND_FLAG bind_flags, u32 min_size, u32 max_size); void Destroy(); struct MappingResult @@ -42,6 +43,7 @@ public: private: ComPtr m_buffer; u32 m_size; + u32 m_max_size; u32 m_position; bool m_use_map_no_overwrite = false; bool m_mapped = false; diff --git a/src/util/d3d11_texture.cpp b/src/util/d3d11_texture.cpp index 44cd2ce4a..ae91c0b28 100644 --- a/src/util/d3d11_texture.cpp +++ b/src/util/d3d11_texture.cpp @@ -337,9 +337,10 @@ D3D11TextureBuffer::D3D11TextureBuffer(Format format, u32 size_in_elements) : GP D3D11TextureBuffer::~D3D11TextureBuffer() = default; -bool D3D11TextureBuffer::CreateBuffer(ID3D11Device* device) +bool D3D11TextureBuffer::CreateBuffer() { - if (!m_buffer.Create(device, D3D11_BIND_SHADER_RESOURCE, GetSizeInBytes())) + const u32 size_in_bytes = GetSizeInBytes(); + if (!m_buffer.Create(D3D11_BIND_SHADER_RESOURCE, size_in_bytes, size_in_bytes)) return false; static constexpr std::array(Format::MaxCount)> dxgi_formats = {{ @@ -348,7 +349,8 @@ bool D3D11TextureBuffer::CreateBuffer(ID3D11Device* device) CD3D11_SHADER_RESOURCE_VIEW_DESC srv_desc(m_buffer.GetD3DBuffer(), dxgi_formats[static_cast(m_format)], 0, m_size_in_elements); - const HRESULT hr = device->CreateShaderResourceView(m_buffer.GetD3DBuffer(), &srv_desc, m_srv.GetAddressOf()); + const HRESULT hr = + D3D11Device::GetD3DDevice()->CreateShaderResourceView(m_buffer.GetD3DBuffer(), &srv_desc, m_srv.GetAddressOf()); if (FAILED(hr)) { Log_ErrorPrintf("CreateShaderResourceView() failed: %08X", hr); @@ -383,7 +385,7 @@ std::unique_ptr D3D11Device::CreateTextureBuffer(GPUTextureBuf u32 size_in_elements) { std::unique_ptr tb = std::make_unique(format, size_in_elements); - if (!tb->CreateBuffer(m_device.Get())) + if (!tb->CreateBuffer()) tb.reset(); return tb; diff --git a/src/util/d3d11_texture.h b/src/util/d3d11_texture.h index e7ae14118..d7fa6659b 100644 --- a/src/util/d3d11_texture.h +++ b/src/util/d3d11_texture.h @@ -107,7 +107,7 @@ public: ALWAYS_INLINE ID3D11ShaderResourceView* GetSRV() const { return m_srv.Get(); } ALWAYS_INLINE ID3D11ShaderResourceView* const* GetSRVArray() const { return m_srv.GetAddressOf(); } - bool CreateBuffer(ID3D11Device* device); + bool CreateBuffer(); // Inherited via GPUTextureBuffer void* Map(u32 required_elements) override; diff --git a/src/util/d3d_common.cpp b/src/util/d3d_common.cpp index e66f3a5d4..846158b39 100644 --- a/src/util/d3d_common.cpp +++ b/src/util/d3d_common.cpp @@ -12,6 +12,7 @@ #include "fmt/format.h" +#include #include #include @@ -21,11 +22,18 @@ static unsigned s_next_bad_shader_id = 1; const char* D3DCommon::GetFeatureLevelString(D3D_FEATURE_LEVEL feature_level) { - static constexpr std::array, 4> feature_level_names = {{ + static constexpr std::array, 11> feature_level_names = {{ + {D3D_FEATURE_LEVEL_1_0_CORE, "D3D_FEATURE_LEVEL_1_0_CORE"}, + {D3D_FEATURE_LEVEL_9_1, "D3D_FEATURE_LEVEL_9_1"}, + {D3D_FEATURE_LEVEL_9_2, "D3D_FEATURE_LEVEL_9_2"}, + {D3D_FEATURE_LEVEL_9_3, "D3D_FEATURE_LEVEL_9_3"}, {D3D_FEATURE_LEVEL_10_0, "D3D_FEATURE_LEVEL_10_0"}, {D3D_FEATURE_LEVEL_10_1, "D3D_FEATURE_LEVEL_10_1"}, {D3D_FEATURE_LEVEL_11_0, "D3D_FEATURE_LEVEL_11_0"}, {D3D_FEATURE_LEVEL_11_1, "D3D_FEATURE_LEVEL_11_1"}, + {D3D_FEATURE_LEVEL_12_0, "D3D_FEATURE_LEVEL_12_0"}, + {D3D_FEATURE_LEVEL_12_1, "D3D_FEATURE_LEVEL_12_1"}, + {D3D_FEATURE_LEVEL_12_2, "D3D_FEATURE_LEVEL_12_2"}, }}; for (const auto& [fl, name] : feature_level_names) @@ -55,6 +63,22 @@ const char* D3DCommon::GetFeatureLevelShaderModelString(D3D_FEATURE_LEVEL featur return "unk"; } +D3D_FEATURE_LEVEL D3DCommon::GetDeviceMaxFeatureLevel(IDXGIAdapter1* adapter) +{ + static constexpr std::array requested_feature_levels = { + D3D_FEATURE_LEVEL_12_2, D3D_FEATURE_LEVEL_12_1, D3D_FEATURE_LEVEL_12_0, D3D_FEATURE_LEVEL_11_1, + D3D_FEATURE_LEVEL_11_0, D3D_FEATURE_LEVEL_10_1, D3D_FEATURE_LEVEL_10_0}; + + D3D_FEATURE_LEVEL max_supported_level = requested_feature_levels.back(); + HRESULT hr = D3D11CreateDevice(adapter, adapter ? D3D_DRIVER_TYPE_UNKNOWN : D3D_DRIVER_TYPE_HARDWARE, nullptr, 0, + requested_feature_levels.data(), static_cast(requested_feature_levels.size()), + D3D11_SDK_VERSION, nullptr, &max_supported_level, nullptr); + if (FAILED(hr)) + Log_WarningFmt("D3D11CreateDevice() for getting max feature level failed: 0x{:08X}", static_cast(hr)); + + return max_supported_level; +} + Microsoft::WRL::ComPtr D3DCommon::CreateFactory(bool debug, Error* error) { UINT flags = 0; diff --git a/src/util/d3d_common.h b/src/util/d3d_common.h index a4666139e..f34f33cb9 100644 --- a/src/util/d3d_common.h +++ b/src/util/d3d_common.h @@ -28,6 +28,9 @@ namespace D3DCommon { const char* GetFeatureLevelString(D3D_FEATURE_LEVEL feature_level); const char* GetFeatureLevelShaderModelString(D3D_FEATURE_LEVEL feature_level); +// returns max feature level of a device +D3D_FEATURE_LEVEL GetDeviceMaxFeatureLevel(IDXGIAdapter1* adapter); + // create a dxgi factory Microsoft::WRL::ComPtr CreateFactory(bool debug, Error* error);