diff --git a/src/util/d3d11_device.cpp b/src/util/d3d11_device.cpp index 2e604c4a0..b77cae55c 100644 --- a/src/util/d3d11_device.cpp +++ b/src/util/d3d11_device.cpp @@ -80,8 +80,8 @@ bool D3D11Device::CreateDevice(std::string_view adapter, bool threaded_presentat ComPtr dxgi_adapter = D3DCommon::GetAdapterByName(m_dxgi_factory.Get(), adapter); m_max_feature_level = D3DCommon::GetDeviceMaxFeatureLevel(dxgi_adapter.Get()); - static constexpr std::array requested_feature_levels = { - {D3D_FEATURE_LEVEL_11_0, D3D_FEATURE_LEVEL_10_1, D3D_FEATURE_LEVEL_10_0}}; + static constexpr std::array requested_feature_levels = { + {D3D_FEATURE_LEVEL_11_1, D3D_FEATURE_LEVEL_11_0, D3D_FEATURE_LEVEL_10_1, D3D_FEATURE_LEVEL_10_0}}; ComPtr temp_device; ComPtr temp_context; @@ -194,6 +194,14 @@ void D3D11Device::SetFeatures(FeatureMask disabled_features) m_features.shader_cache = true; m_features.pipeline_cache = false; m_features.prefer_unused_textures = false; + m_features.raster_order_views = false; + if (!(disabled_features & FEATURE_MASK_RASTER_ORDER_VIEWS)) + { + D3D11_FEATURE_DATA_D3D11_OPTIONS2 data = {}; + m_features.raster_order_views = + (SUCCEEDED(m_device->CheckFeatureSupport(D3D11_FEATURE_D3D11_OPTIONS2, &data, sizeof(data))) && + data.ROVsSupported); + } } u32 D3D11Device::GetSwapChainBufferCount() const @@ -567,12 +575,15 @@ void D3D11Device::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u3 src11->GetD3DTexture(), 0, dst11->GetDXGIFormat()); } -bool D3D11Device::IsRenderTargetBound(const GPUTexture* tex) const +bool D3D11Device::IsRenderTargetBound(const D3D11Texture* tex) const { - for (u32 i = 0; i < m_num_current_render_targets; i++) + if (tex->IsRenderTarget() || tex->IsRWTexture()) { - if (m_current_render_targets[i] == tex) - return true; + for (u32 i = 0; i < m_num_current_render_targets; i++) + { + if (m_current_render_targets[i] == tex) + return true; + } } return false; @@ -580,23 +591,26 @@ bool D3D11Device::IsRenderTargetBound(const GPUTexture* tex) const void D3D11Device::ClearRenderTarget(GPUTexture* t, u32 c) { - GPUDevice::ClearRenderTarget(t, c); - if (IsRenderTargetBound(t)) - static_cast(t)->CommitClear(m_context.Get()); + D3D11Texture* const T = static_cast(t); + GPUDevice::ClearRenderTarget(T, c); + if (IsRenderTargetBound(T)) + T->CommitClear(m_context.Get()); } void D3D11Device::ClearDepth(GPUTexture* t, float d) { - GPUDevice::ClearDepth(t, d); - if (m_current_depth_target == t) - static_cast(t)->CommitClear(m_context.Get()); + D3D11Texture* const T = static_cast(t); + GPUDevice::ClearDepth(T, d); + if (T == m_current_depth_target) + T->CommitClear(m_context.Get()); } void D3D11Device::InvalidateRenderTarget(GPUTexture* t) { - GPUDevice::InvalidateRenderTarget(t); - if (t->IsRenderTarget() ? IsRenderTargetBound(t) : (m_current_depth_target == t)) - static_cast(t)->CommitClear(m_context.Get()); + D3D11Texture* const T = static_cast(t); + GPUDevice::InvalidateRenderTarget(T); + if (T->IsDepthStencil() ? (m_current_depth_target == T) : IsRenderTargetBound(T)) + T->CommitClear(m_context.Get()); } void D3D11Device::SetVSyncMode(GPUVSyncMode mode, bool allow_present_throttle) @@ -662,6 +676,7 @@ bool D3D11Device::BeginPresent(bool skip_present) m_context->OMSetRenderTargets(1, m_swap_chain_rtv.GetAddressOf(), nullptr); s_stats.num_render_passes++; m_num_current_render_targets = 0; + m_current_render_pass_flags = GPUPipeline::NoRenderPassFlags; std::memset(m_current_render_targets.data(), 0, sizeof(m_current_render_targets)); m_current_depth_target = nullptr; return true; @@ -934,15 +949,20 @@ void D3D11Device::UnmapUniformBuffer(u32 size) } void D3D11Device::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, - GPUPipeline::RenderPassFlag feedback_loop) + GPUPipeline::RenderPassFlag flags) { - ID3D11RenderTargetView* rtvs[MAX_RENDER_TARGETS]; - DebugAssert(!feedback_loop); - - bool changed = (m_num_current_render_targets != num_rts || m_current_depth_target != ds); - m_current_depth_target = static_cast(ds); - - // Make sure textures aren't bound. + DebugAssert( + !(flags & (GPUPipeline::RenderPassFlag::ColorFeedbackLoop | GPUPipeline::RenderPassFlag::SampleDepthBuffer))); + + // Make sure DSV isn't bound. + D3D11Texture* DS = static_cast(ds); + if (DS) + DS->CommitClear(m_context.Get()); + + bool changed = + (m_num_current_render_targets != num_rts || m_current_depth_target != DS || m_current_render_pass_flags != flags); + m_current_render_pass_flags = flags; + m_current_depth_target = DS; if (ds) { const ID3D11ShaderResourceView* srv = static_cast(ds)->GetD3DSRV(); @@ -958,13 +978,12 @@ void D3D11Device::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTextu for (u32 i = 0; i < num_rts; i++) { - D3D11Texture* const dt = static_cast(rts[i]); - changed |= m_current_render_targets[i] != dt; - m_current_render_targets[i] = dt; - rtvs[i] = dt->GetD3DRTV(); - dt->CommitClear(m_context.Get()); + D3D11Texture* const RT = static_cast(rts[i]); + changed |= m_current_render_targets[i] != RT; + m_current_render_targets[i] = RT; + RT->CommitClear(m_context.Get()); - const ID3D11ShaderResourceView* srv = dt->GetD3DSRV(); + const ID3D11ShaderResourceView* srv = RT->GetD3DSRV(); for (u32 j = 0; j < MAX_TEXTURE_SAMPLERS; j++) { if (m_current_textures[j] && m_current_textures[j] == srv) @@ -981,7 +1000,27 @@ void D3D11Device::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTextu return; s_stats.num_render_passes++; - m_context->OMSetRenderTargets(num_rts, rtvs, ds ? static_cast(ds)->GetD3DDSV() : nullptr); + + if (m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages) + { + std::array uavs; + for (u32 i = 0; i < m_num_current_render_targets; i++) + uavs[i] = m_current_render_targets[i]->GetD3DUAV(); + + m_context->OMSetRenderTargetsAndUnorderedAccessViews( + 0, nullptr, m_current_depth_target ? m_current_depth_target->GetD3DDSV() : nullptr, 0, + m_num_current_render_targets, uavs.data(), nullptr); + } + else + { + std::array rtvs; + for (u32 i = 0; i < m_num_current_render_targets; i++) + rtvs[i] = m_current_render_targets[i]->GetD3DRTV(); + + m_context->OMSetRenderTargets(m_num_current_render_targets, + (m_num_current_render_targets > 0) ? rtvs.data() : nullptr, + m_current_depth_target ? m_current_depth_target->GetD3DDSV() : nullptr); + } } void D3D11Device::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) @@ -1000,7 +1039,11 @@ void D3D11Device::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* s ID3D11SamplerState* S = sampler ? static_cast(sampler)->GetSamplerState() : nullptr; // Runtime will null these if we don't... - DebugAssert(!texture || !IsRenderTargetBound(texture) || m_current_depth_target != texture); + DebugAssert(!texture || + !((texture->IsRenderTarget() || texture->IsRWTexture()) && + IsRenderTargetBound(static_cast(texture))) || + !(texture->IsDepthStencil() && + (!m_current_depth_target || m_current_depth_target != static_cast(texture)))); if (m_current_textures[slot] != T) { @@ -1038,7 +1081,7 @@ void D3D11Device::UnbindTexture(D3D11Texture* tex) } } - if (tex->IsRenderTarget()) + if (tex->IsRenderTarget() || tex->IsRWTexture()) { for (u32 i = 0; i < m_num_current_render_targets; i++) { @@ -1050,7 +1093,7 @@ void D3D11Device::UnbindTexture(D3D11Texture* tex) } } } - else if (m_current_depth_target == tex) + else if (tex->IsDepthStencil() && m_current_depth_target == tex) { WARNING_LOG("Unbinding current DS"); SetRenderTargets(nullptr, 0, nullptr); diff --git a/src/util/d3d11_device.h b/src/util/d3d11_device.h index fd0e044e6..ea556fd36 100644 --- a/src/util/d3d11_device.h +++ b/src/util/d3d11_device.h @@ -89,7 +89,7 @@ public: void* MapUniformBuffer(u32 size) override; void UnmapUniformBuffer(u32 size) override; void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, - GPUPipeline::RenderPassFlag feedback_loop = GPUPipeline::NoRenderPassFlags) override; + GPUPipeline::RenderPassFlag flags = GPUPipeline::NoRenderPassFlags) override; void SetPipeline(GPUPipeline* pipeline) override; void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override; void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override; @@ -142,7 +142,7 @@ private: bool CreateBuffers(); void DestroyBuffers(); - bool IsRenderTargetBound(const GPUTexture* tex) const; + bool IsRenderTargetBound(const D3D11Texture* tex) const; ComPtr GetRasterizationState(const GPUPipeline::RasterizationState& rs, Error* error); ComPtr GetDepthState(const GPUPipeline::DepthState& ds, Error* error); @@ -180,6 +180,7 @@ private: D3D11Pipeline* m_current_pipeline = nullptr; std::array m_current_render_targets = {}; u32 m_num_current_render_targets = 0; + GPUPipeline::RenderPassFlag m_current_render_pass_flags = GPUPipeline::NoRenderPassFlags; D3D11Texture* m_current_depth_target = nullptr; ID3D11InputLayout* m_current_input_layout = nullptr; diff --git a/src/util/d3d11_texture.cpp b/src/util/d3d11_texture.cpp index a9a4f5f4c..f7cb6ae61 100644 --- a/src/util/d3d11_texture.cpp +++ b/src/util/d3d11_texture.cpp @@ -95,19 +95,16 @@ std::unique_ptr D3D11Device::CreateSampler(const GPUSampler::Config& D3D11Texture::D3D11Texture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, Format format, ComPtr texture, ComPtr srv, - ComPtr rtv_dsv) + ComPtr rtv_dsv, ComPtr uav) : GPUTexture(static_cast(width), static_cast(height), static_cast(layers), static_cast(levels), static_cast(samples), type, format), - m_texture(std::move(texture)), m_srv(std::move(srv)), m_rtv_dsv(std::move(rtv_dsv)) + m_texture(std::move(texture)), m_srv(std::move(srv)), m_rtv_dsv(std::move(rtv_dsv)), m_uav(std::move(uav)) { } D3D11Texture::~D3D11Texture() { D3D11Device::GetInstance().UnbindTexture(this); - m_rtv_dsv.Reset(); - m_srv.Reset(); - m_texture.Reset(); } D3D11_TEXTURE2D_DESC D3D11Texture::GetDesc() const @@ -247,7 +244,7 @@ std::unique_ptr D3D11Texture::Create(ID3D11Device* device, u32 wid cpu_access = D3D11_CPU_ACCESS_WRITE; break; case Type::RWTexture: - bind_flags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE; + bind_flags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE; break; default: break; @@ -327,8 +324,23 @@ std::unique_ptr D3D11Texture::Create(ID3D11Device* device, u32 wid rtv_dsv = std::move(dsv); } + ComPtr uav; + if (bind_flags & D3D11_BIND_UNORDERED_ACCESS) + { + const D3D11_UAV_DIMENSION uav_dimension = + (desc.ArraySize > 1 ? D3D11_UAV_DIMENSION_TEXTURE2DARRAY : D3D11_UAV_DIMENSION_TEXTURE2D); + const CD3D11_UNORDERED_ACCESS_VIEW_DESC uav_desc(uav_dimension, fm.srv_format, 0, 0, desc.ArraySize); + const HRESULT hr = device->CreateUnorderedAccessView(texture.Get(), &uav_desc, uav.GetAddressOf()); + if (FAILED(hr)) [[unlikely]] + { + ERROR_LOG("Create UAV for texture failed: 0x{:08X}", static_cast(hr)); + return nullptr; + } + } + return std::unique_ptr(new D3D11Texture(width, height, layers, levels, samples, type, format, - std::move(texture), std::move(srv), std::move(rtv_dsv))); + std::move(texture), std::move(srv), std::move(rtv_dsv), + std::move(uav))); } D3D11TextureBuffer::D3D11TextureBuffer(Format format, u32 size_in_elements) : GPUTextureBuffer(format, size_in_elements) diff --git a/src/util/d3d11_texture.h b/src/util/d3d11_texture.h index 55ceb8d65..5f459161f 100644 --- a/src/util/d3d11_texture.h +++ b/src/util/d3d11_texture.h @@ -60,6 +60,7 @@ public: { return reinterpret_cast(m_rtv_dsv.GetAddressOf()); } + ALWAYS_INLINE ID3D11UnorderedAccessView* GetD3DUAV() const { return m_uav.Get(); } DXGI_FORMAT GetDXGIFormat() const; ALWAYS_INLINE operator ID3D11Texture2D*() const { return m_texture.Get(); } @@ -72,6 +73,7 @@ public: { return static_cast(m_rtv_dsv.Get()); } + ALWAYS_INLINE operator ID3D11UnorderedAccessView*() const { return m_uav.Get(); } ALWAYS_INLINE operator bool() const { return static_cast(m_texture); } static std::unique_ptr Create(ID3D11Device* device, u32 width, u32 height, u32 layers, u32 levels, @@ -89,11 +91,13 @@ public: private: D3D11Texture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, Format format, - ComPtr texture, ComPtr srv, ComPtr rtv_dsv); + ComPtr texture, ComPtr srv, ComPtr rtv_dsv, + ComPtr uav); ComPtr m_texture; ComPtr m_srv; ComPtr m_rtv_dsv; + ComPtr m_uav; u32 m_mapped_subresource = 0; }; diff --git a/src/util/d3d12_device.cpp b/src/util/d3d12_device.cpp index 2ac35d497..b9f454868 100644 --- a/src/util/d3d12_device.cpp +++ b/src/util/d3d12_device.cpp @@ -130,7 +130,7 @@ bool D3D12Device::CreateDevice(std::string_view adapter, bool threaded_presentat m_adapter = D3DCommon::GetAdapterByName(m_dxgi_factory.Get(), adapter); - HRESULT hr; + HRESULT hr = S_OK; // Enabling the debug layer will fail if the Graphics Tools feature is not installed. if (m_debug_device) @@ -149,8 +149,15 @@ bool D3D12Device::CreateDevice(std::string_view adapter, bool threaded_presentat } // Create the actual device. - m_feature_level = D3D_FEATURE_LEVEL_11_0; - hr = D3D12CreateDevice(m_adapter.Get(), m_feature_level, IID_PPV_ARGS(&m_device)); + for (D3D_FEATURE_LEVEL try_feature_level : {D3D_FEATURE_LEVEL_11_0}) + { + hr = D3D12CreateDevice(m_adapter.Get(), try_feature_level, IID_PPV_ARGS(&m_device)); + if (SUCCEEDED(hr)) + { + m_feature_level = try_feature_level; + break; + } + } if (FAILED(hr)) { Error::SetHResult(error, "Failed to create D3D12 device: ", hr); @@ -479,15 +486,23 @@ bool D3D12Device::CreateDescriptorHeaps(Error* error) // Allocate null SRV descriptor for unbound textures. static constexpr D3D12_SHADER_RESOURCE_VIEW_DESC null_srv_desc = { DXGI_FORMAT_R8G8B8A8_UNORM, D3D12_SRV_DIMENSION_TEXTURE2D, D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING, {}}; - if (!m_descriptor_heap_manager.Allocate(&m_null_srv_descriptor)) { Error::SetStringView(error, "Failed to allocate null SRV descriptor"); return false; } - m_device->CreateShaderResourceView(nullptr, &null_srv_desc, m_null_srv_descriptor.cpu_handle); + // Same for UAVs. + static constexpr D3D12_UNORDERED_ACCESS_VIEW_DESC null_uav_desc = { + DXGI_FORMAT_R8G8B8A8_UNORM, D3D12_UAV_DIMENSION_TEXTURE2D, {}}; + if (!m_descriptor_heap_manager.Allocate(&m_null_uav_descriptor)) + { + Error::SetStringView(error, "Failed to allocate null UAV descriptor"); + return false; + } + m_device->CreateUnorderedAccessView(nullptr, nullptr, &null_uav_desc, m_null_uav_descriptor.cpu_handle); + // Same for samplers. m_point_sampler = GetSampler(GPUSampler::GetNearestConfig()); for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++) @@ -497,6 +512,8 @@ bool D3D12Device::CreateDescriptorHeaps(Error* error) void D3D12Device::DestroyDescriptorHeaps() { + if (m_null_uav_descriptor) + m_descriptor_heap_manager.Free(&m_null_uav_descriptor); if (m_null_srv_descriptor) m_descriptor_heap_manager.Free(&m_null_srv_descriptor); m_sampler_heap_manager.Destroy(); @@ -1248,6 +1265,15 @@ void D3D12Device::SetFeatures(FeatureMask disabled_features) HRESULT hr = m_dxgi_factory->CheckFeatureSupport(DXGI_FEATURE_PRESENT_ALLOW_TEARING, &allow_tearing_supported, sizeof(allow_tearing_supported)); m_allow_tearing_supported = (SUCCEEDED(hr) && allow_tearing_supported == TRUE); + + m_features.raster_order_views = false; + if (!(disabled_features & FEATURE_MASK_RASTER_ORDER_VIEWS)) + { + D3D12_FEATURE_DATA_D3D12_OPTIONS options = {}; + m_features.raster_order_views = + SUCCEEDED(m_device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS, &options, sizeof(options))) && + options.ROVsSupported; + } } void D3D12Device::CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, @@ -1479,7 +1505,7 @@ void D3D12Device::UnmapIndexBuffer(u32 used_index_count) void D3D12Device::PushUniformBuffer(const void* data, u32 data_size) { - static constexpr std::array(GPUPipeline::Layout::MaxCount)> push_parameter = { + static constexpr std::array(GPUPipeline::Layout::MaxCount)> push_parameters = { 0, // SingleTextureAndUBO 2, // SingleTextureAndPushConstants 1, // SingleTextureBufferAndPushConstants @@ -1495,8 +1521,10 @@ void D3D12Device::PushUniformBuffer(const void* data, u32 data_size) } s_stats.buffer_streamed += data_size; - GetCommandList()->SetGraphicsRoot32BitConstants(push_parameter[static_cast(m_current_pipeline_layout)], - data_size / 4u, data, 0); + + const u32 push_param = + push_parameters[static_cast(m_current_pipeline_layout)] + BoolToUInt8(IsUsingROVRootSignature()); + GetCommandList()->SetGraphicsRoot32BitConstants(push_param, data_size / 4u, data, 0); } void* D3D12Device::MapUniformBuffer(u32 size) @@ -1526,63 +1554,96 @@ bool D3D12Device::CreateRootSignatures(Error* error) { D3D12::RootSignatureBuilder rsb; + for (u32 rov = 0; rov < 2; rov++) { - auto& rs = m_root_signatures[static_cast(GPUPipeline::Layout::SingleTextureAndUBO)]; + if (rov && !m_features.raster_order_views) + break; - rsb.SetInputAssemblerFlag(); - rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL); - rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL); - rsb.AddCBVParameter(0, D3D12_SHADER_VISIBILITY_ALL); - if (!(rs = rsb.Create(error, true))) - return false; - D3D12::SetObjectName(rs.Get(), "Single Texture + UBO Pipeline Layout"); - } + { + auto& rs = m_root_signatures[rov][static_cast(GPUPipeline::Layout::SingleTextureAndUBO)]; - { - auto& rs = m_root_signatures[static_cast(GPUPipeline::Layout::SingleTextureAndPushConstants)]; + rsb.SetInputAssemblerFlag(); + rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL); + rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL); + rsb.AddCBVParameter(0, D3D12_SHADER_VISIBILITY_ALL); + if (rov) + { + rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS, + D3D12_SHADER_VISIBILITY_PIXEL); + } + if (!(rs = rsb.Create(error, true))) + return false; + D3D12::SetObjectName(rs.Get(), "Single Texture + UBO Pipeline Layout"); + } - rsb.SetInputAssemblerFlag(); - rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL); - rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL); - rsb.Add32BitConstants(0, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL); - if (!(rs = rsb.Create(error, true))) - return false; - D3D12::SetObjectName(rs.Get(), "Single Texture Pipeline Layout"); - } + { + auto& rs = m_root_signatures[rov][static_cast(GPUPipeline::Layout::SingleTextureAndPushConstants)]; - { - auto& rs = m_root_signatures[static_cast(GPUPipeline::Layout::SingleTextureBufferAndPushConstants)]; + rsb.SetInputAssemblerFlag(); + rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL); + rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL); + if (rov) + { + rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS, + D3D12_SHADER_VISIBILITY_PIXEL); + } + rsb.Add32BitConstants(0, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL); + if (!(rs = rsb.Create(error, true))) + return false; + D3D12::SetObjectName(rs.Get(), "Single Texture Pipeline Layout"); + } - rsb.SetInputAssemblerFlag(); - rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL); - rsb.Add32BitConstants(0, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL); - if (!(rs = rsb.Create(error, true))) - return false; - D3D12::SetObjectName(rs.Get(), "Single Texture Buffer + UBO Pipeline Layout"); - } + { + auto& rs = m_root_signatures[rov][static_cast(GPUPipeline::Layout::SingleTextureBufferAndPushConstants)]; - { - auto& rs = m_root_signatures[static_cast(GPUPipeline::Layout::MultiTextureAndUBO)]; + rsb.SetInputAssemblerFlag(); + rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL); + if (rov) + { + rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS, + D3D12_SHADER_VISIBILITY_PIXEL); + } + rsb.Add32BitConstants(0, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL); + if (!(rs = rsb.Create(error, true))) + return false; + D3D12::SetObjectName(rs.Get(), "Single Texture Buffer + UBO Pipeline Layout"); + } - rsb.SetInputAssemblerFlag(); - rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_PIXEL); - rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_PIXEL); - rsb.AddCBVParameter(0, D3D12_SHADER_VISIBILITY_ALL); - if (!(rs = rsb.Create(error, true))) - return false; - D3D12::SetObjectName(rs.Get(), "Multi Texture + UBO Pipeline Layout"); - } + { + auto& rs = m_root_signatures[rov][static_cast(GPUPipeline::Layout::MultiTextureAndUBO)]; + + rsb.SetInputAssemblerFlag(); + rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_PIXEL); + rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, MAX_TEXTURE_SAMPLERS, + D3D12_SHADER_VISIBILITY_PIXEL); + rsb.AddCBVParameter(0, D3D12_SHADER_VISIBILITY_ALL); + if (rov) + { + rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS, + D3D12_SHADER_VISIBILITY_PIXEL); + } + if (!(rs = rsb.Create(error, true))) + return false; + D3D12::SetObjectName(rs.Get(), "Multi Texture + UBO Pipeline Layout"); + } - { - auto& rs = m_root_signatures[static_cast(GPUPipeline::Layout::MultiTextureAndPushConstants)]; + { + auto& rs = m_root_signatures[rov][static_cast(GPUPipeline::Layout::MultiTextureAndPushConstants)]; - rsb.SetInputAssemblerFlag(); - rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_PIXEL); - rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_PIXEL); - rsb.Add32BitConstants(0, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL); - if (!(rs = rsb.Create(error, true))) - return false; - D3D12::SetObjectName(rs.Get(), "Multi Texture Pipeline Layout"); + rsb.SetInputAssemblerFlag(); + rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_PIXEL); + rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, MAX_TEXTURE_SAMPLERS, + D3D12_SHADER_VISIBILITY_PIXEL); + if (rov) + { + rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS, + D3D12_SHADER_VISIBILITY_PIXEL); + } + rsb.Add32BitConstants(0, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL); + if (!(rs = rsb.Create(error, true))) + return false; + D3D12::SetObjectName(rs.Get(), "Multi Texture Pipeline Layout"); + } } return true; @@ -1590,29 +1651,30 @@ bool D3D12Device::CreateRootSignatures(Error* error) void D3D12Device::DestroyRootSignatures() { - for (auto it = m_root_signatures.rbegin(); it != m_root_signatures.rend(); ++it) - it->Reset(); + m_root_signatures.enumerate([](auto& it) { it.Reset(); }); } void D3D12Device::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, - GPUPipeline::RenderPassFlag feedback_loop) + GPUPipeline::RenderPassFlag flags) { - DebugAssert(!feedback_loop); + DebugAssert( + !(flags & (GPUPipeline::RenderPassFlag::ColorFeedbackLoop | GPUPipeline::RenderPassFlag::SampleDepthBuffer))); if (InRenderPass()) EndRenderPass(); - ID3D12GraphicsCommandList4* cmdlist = GetCommandList(); - m_current_depth_target = static_cast(ds); - for (u32 i = 0; i < num_rts; i++) - { - D3D12Texture* const dt = static_cast(rts[i]); - m_current_render_targets[i] = dt; - dt->CommitClear(cmdlist); - } + if (num_rts > 0) + std::memcpy(m_current_render_targets.data(), rts, sizeof(D3D12Texture*) * num_rts); for (u32 i = num_rts; i < m_num_current_render_targets; i++) m_current_render_targets[i] = nullptr; m_num_current_render_targets = num_rts; + + // Need a root signature change if switching to UAVs. + m_dirty_flags |= + ((m_current_render_pass_flags ^ flags) & GPUPipeline::BindRenderTargetsAsImages) ? LAYOUT_DEPENDENT_DIRTY_STATE : 0; + m_dirty_flags = (flags & GPUPipeline::BindRenderTargetsAsImages) ? (m_dirty_flags | DIRTY_FLAG_RT_UAVS) : + (m_dirty_flags & ~DIRTY_FLAG_RT_UAVS); + m_current_render_pass_flags = flags; } void D3D12Device::BeginRenderPass() @@ -1630,48 +1692,62 @@ void D3D12Device::BeginRenderPass() if (m_num_current_render_targets > 0 || m_current_depth_target) [[likely]] { - for (u32 i = 0; i < m_num_current_render_targets; i++) + if (!IsUsingROVRootSignature()) [[likely]] { - D3D12Texture* const rt = m_current_render_targets[i]; - rt->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_RENDER_TARGET); - rt->SetUseFenceValue(GetCurrentFenceValue()); + for (u32 i = 0; i < m_num_current_render_targets; i++) + { + D3D12Texture* const rt = m_current_render_targets[i]; + rt->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_RENDER_TARGET); + rt->SetUseFenceValue(GetCurrentFenceValue()); - D3D12_RENDER_PASS_RENDER_TARGET_DESC& desc = rt_desc[i]; - desc.cpuDescriptor = rt->GetWriteDescriptor(); - desc.EndingAccess.Type = D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE; + D3D12_RENDER_PASS_RENDER_TARGET_DESC& desc = rt_desc[i]; + desc.cpuDescriptor = rt->GetWriteDescriptor(); + desc.EndingAccess.Type = D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE; - switch (rt->GetState()) - { - case GPUTexture::State::Cleared: + switch (rt->GetState()) { - desc.BeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_CLEAR; - std::memcpy(desc.BeginningAccess.Clear.ClearValue.Color, rt->GetUNormClearColor().data(), - sizeof(desc.BeginningAccess.Clear.ClearValue.Color)); - rt->SetState(GPUTexture::State::Dirty); - } - break; + case GPUTexture::State::Cleared: + { + desc.BeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_CLEAR; + std::memcpy(desc.BeginningAccess.Clear.ClearValue.Color, rt->GetUNormClearColor().data(), + sizeof(desc.BeginningAccess.Clear.ClearValue.Color)); + rt->SetState(GPUTexture::State::Dirty); + } + break; - case GPUTexture::State::Invalidated: - { - desc.BeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_DISCARD; - rt->SetState(GPUTexture::State::Dirty); - } - break; + case GPUTexture::State::Invalidated: + { + desc.BeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_DISCARD; + rt->SetState(GPUTexture::State::Dirty); + } + break; - case GPUTexture::State::Dirty: - { - desc.BeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_PRESERVE; + case GPUTexture::State::Dirty: + { + desc.BeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_PRESERVE; + } + break; + + default: + UnreachableCode(); + break; } - break; + } - default: - UnreachableCode(); - break; + rt_desc_p = (m_num_current_render_targets > 0) ? rt_desc.data() : nullptr; + num_rt_descs = m_num_current_render_targets; + } + else + { + // Still need to clear the RTs. + for (u32 i = 0; i < m_num_current_render_targets; i++) + { + D3D12Texture* const rt = m_current_render_targets[i]; + rt->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + rt->SetUseFenceValue(GetCurrentFenceValue()); + rt->CommitClear(cmdlist); } } - - rt_desc_p = (m_num_current_render_targets > 0) ? rt_desc.data() : nullptr; - num_rt_descs = m_num_current_render_targets; if (m_current_depth_target) { D3D12Texture* const ds = m_current_depth_target; @@ -1733,7 +1809,7 @@ void D3D12Device::BeginRenderPass() m_current_textures[i]->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); } - DebugAssert(rt_desc_p || ds_desc_p); + DebugAssert(rt_desc_p || ds_desc_p || IsUsingROVRootSignature()); cmdlist->BeginRenderPass(num_rt_descs, rt_desc_p, ds_desc_p, D3D12_RENDER_PASS_FLAG_NONE); // TODO: Stats @@ -1771,6 +1847,9 @@ void D3D12Device::BeginSwapChainRenderPass() std::memset(m_current_render_targets.data(), 0, sizeof(m_current_render_targets)); m_num_current_render_targets = 0; + m_dirty_flags = + (m_dirty_flags & ~DIRTY_FLAG_RT_UAVS) | ((IsUsingROVRootSignature()) ? DIRTY_FLAG_PIPELINE_LAYOUT : 0); + m_current_render_pass_flags = GPUPipeline::NoRenderPassFlags; m_current_depth_target = nullptr; m_in_render_pass = true; s_stats.num_render_passes++; @@ -1839,8 +1918,7 @@ void D3D12Device::SetPipeline(GPUPipeline* pipeline) if (GPUPipeline::Layout layout = m_current_pipeline->GetLayout(); m_current_pipeline_layout != layout) { m_current_pipeline_layout = layout; - m_dirty_flags |= - DIRTY_FLAG_PIPELINE_LAYOUT | DIRTY_FLAG_CONSTANT_BUFFER | DIRTY_FLAG_TEXTURES | DIRTY_FLAG_SAMPLERS; + m_dirty_flags |= LAYOUT_DEPENDENT_DIRTY_STATE & (IsUsingROVRootSignature() ? ~0u : ~DIRTY_FLAG_RT_UAVS); } } @@ -1865,7 +1943,8 @@ bool D3D12Device::IsRenderTargetBound(const GPUTexture* tex) const void D3D12Device::InvalidateCachedState() { - m_dirty_flags = ALL_DIRTY_STATE; + m_dirty_flags = ALL_DIRTY_STATE & + ((m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages) ? ~0u : ~DIRTY_FLAG_RT_UAVS); m_in_render_pass = false; m_current_pipeline = nullptr; m_current_vertex_stride = 0; @@ -2053,7 +2132,7 @@ void D3D12Device::PreDrawCheck() return; } } - else if (dirty & (DIRTY_FLAG_CONSTANT_BUFFER | DIRTY_FLAG_TEXTURES | DIRTY_FLAG_SAMPLERS)) + else if (dirty & (DIRTY_FLAG_CONSTANT_BUFFER | DIRTY_FLAG_TEXTURES | DIRTY_FLAG_SAMPLERS | DIRTY_FLAG_RT_UAVS)) { if (!UpdateRootParameters(dirty)) { @@ -2068,9 +2147,15 @@ void D3D12Device::PreDrawCheck() BeginRenderPass(); } +bool D3D12Device::IsUsingROVRootSignature() const +{ + return ((m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages) != 0); +} + void D3D12Device::UpdateRootSignature() { - GetCommandList()->SetGraphicsRootSignature(m_root_signatures[static_cast(m_current_pipeline_layout)].Get()); + GetCommandList()->SetGraphicsRootSignature( + m_root_signatures[BoolToUInt8(IsUsingROVRootSignature())][static_cast(m_current_pipeline_layout)].Get()); } template @@ -2145,6 +2230,35 @@ bool D3D12Device::UpdateParametersForLayout(u32 dirty) cmdlist->SetGraphicsRootDescriptorTable(0, gpu_handle); } + if (dirty & DIRTY_FLAG_RT_UAVS) + { + DebugAssert(m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages); + + D3D12DescriptorAllocator& allocator = m_command_lists[m_current_command_list].descriptor_allocator; + D3D12DescriptorHandle gpu_handle; + if (!allocator.Allocate(MAX_IMAGE_RENDER_TARGETS, &gpu_handle)) + return false; + + D3D12_CPU_DESCRIPTOR_HANDLE src_handles[MAX_IMAGE_RENDER_TARGETS]; + UINT src_sizes[MAX_IMAGE_RENDER_TARGETS]; + const UINT dst_size = MAX_IMAGE_RENDER_TARGETS; + for (u32 i = 0; i < MAX_IMAGE_RENDER_TARGETS; i++) + { + src_handles[i] = + m_current_render_targets[i] ? m_current_render_targets[i]->GetSRVDescriptor() : m_null_srv_descriptor; + src_sizes[i] = 1; + } + m_device->CopyDescriptors(1, &gpu_handle.cpu_handle, &dst_size, MAX_IMAGE_RENDER_TARGETS, src_handles, src_sizes, + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + + constexpr u32 rov_param = + (layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants) ? + 1 : + ((layout == GPUPipeline::Layout::SingleTextureAndUBO || layout == GPUPipeline::Layout::MultiTextureAndUBO) ? 3 : + 2); + cmdlist->SetGraphicsRootDescriptorTable(rov_param, gpu_handle); + } + return true; } diff --git a/src/util/d3d12_device.h b/src/util/d3d12_device.h index 22927a134..040a6e290 100644 --- a/src/util/d3d12_device.h +++ b/src/util/d3d12_device.h @@ -8,6 +8,7 @@ #include "gpu_device.h" #include "gpu_texture.h" +#include "common/dimensional_array.h" #include "common/windows_headers.h" #include @@ -110,7 +111,7 @@ public: void* MapUniformBuffer(u32 size) override; void UnmapUniformBuffer(u32 size) override; void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, - GPUPipeline::RenderPassFlag feedback_loop = GPUPipeline::NoRenderPassFlags) override; + GPUPipeline::RenderPassFlag flags = GPUPipeline::NoRenderPassFlags) override; void SetPipeline(GPUPipeline* pipeline) override; void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override; void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override; @@ -200,9 +201,11 @@ private: DIRTY_FLAG_CONSTANT_BUFFER = (1 << 2), DIRTY_FLAG_TEXTURES = (1 << 3), DIRTY_FLAG_SAMPLERS = (1 << 3), + DIRTY_FLAG_RT_UAVS = (1 << 4), - ALL_DIRTY_STATE = DIRTY_FLAG_INITIAL | DIRTY_FLAG_PIPELINE_LAYOUT | DIRTY_FLAG_CONSTANT_BUFFER | - DIRTY_FLAG_TEXTURES | DIRTY_FLAG_SAMPLERS, + LAYOUT_DEPENDENT_DIRTY_STATE = DIRTY_FLAG_PIPELINE_LAYOUT | DIRTY_FLAG_CONSTANT_BUFFER | DIRTY_FLAG_TEXTURES | + DIRTY_FLAG_SAMPLERS | DIRTY_FLAG_RT_UAVS, + ALL_DIRTY_STATE = DIRTY_FLAG_INITIAL | (LAYOUT_DEPENDENT_DIRTY_STATE & ~DIRTY_FLAG_RT_UAVS), }; struct CommandList @@ -264,6 +267,7 @@ private: void SetInitialPipelineState(); void PreDrawCheck(); + bool IsUsingROVRootSignature() const; void UpdateRootSignature(); template bool UpdateParametersForLayout(u32 dirty); @@ -303,6 +307,7 @@ private: D3D12DescriptorHeapManager m_dsv_heap_manager; D3D12DescriptorHeapManager m_sampler_heap_manager; D3D12DescriptorHandle m_null_srv_descriptor; + D3D12DescriptorHandle m_null_uav_descriptor; D3D12DescriptorHandle m_point_sampler; ComPtr m_timestamp_query_heap; @@ -314,7 +319,8 @@ private: std::deque>> m_cleanup_resources; std::deque>> m_cleanup_descriptors; - std::array, static_cast(GPUPipeline::Layout::MaxCount)> m_root_signatures = {}; + DimensionalArray, static_cast(GPUPipeline::Layout::MaxCount), 2> m_root_signatures = + {}; D3D12StreamBuffer m_vertex_buffer; D3D12StreamBuffer m_index_buffer; @@ -333,6 +339,7 @@ private: D3D12Pipeline* m_current_pipeline = nullptr; D3D12_PRIMITIVE_TOPOLOGY m_current_topology = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED; u32 m_num_current_render_targets = 0; + GPUPipeline::RenderPassFlag m_current_render_pass_flags = GPUPipeline::NoRenderPassFlags; std::array m_current_render_targets = {}; D3D12Texture* m_current_depth_target = nullptr; u32 m_current_vertex_stride = 0; diff --git a/src/util/d3d12_pipeline.cpp b/src/util/d3d12_pipeline.cpp index 0cd394110..459b7fa2a 100644 --- a/src/util/d3d12_pipeline.cpp +++ b/src/util/d3d12_pipeline.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #include "d3d12_pipeline.h" @@ -7,6 +7,7 @@ #include "d3d_common.h" #include "common/assert.h" +#include "common/bitutils.h" #include "common/log.h" #include "common/sha1_digest.h" #include "common/string_util.h" @@ -180,8 +181,16 @@ std::unique_ptr D3D12Device::CreatePipeline(const GPUPipeline::Grap D3D12_BLEND_OP_MAX, // Max }}; + if (config.render_pass_flags & GPUPipeline::BindRenderTargetsAsImages && !m_features.raster_order_views) + { + ERROR_LOG("Attempting to create ROV pipeline without ROV feature."); + return {}; + } + D3D12::GraphicsPipelineBuilder gpb; - gpb.SetRootSignature(m_root_signatures[static_cast(config.layout)].Get()); + gpb.SetRootSignature(m_root_signatures[BoolToUInt8( + (config.render_pass_flags & GPUPipeline::BindRenderTargetsAsImages))][static_cast(config.layout)] + .Get()); gpb.SetVertexShader(static_cast(config.vertex_shader)->GetBytecodeData(), static_cast(config.vertex_shader)->GetBytecodeSize()); gpb.SetPixelShader(static_cast(config.fragment_shader)->GetBytecodeData(), diff --git a/src/util/d3d12_texture.cpp b/src/util/d3d12_texture.cpp index 8dce4cee8..c5f7b8c6c 100644 --- a/src/util/d3d12_texture.cpp +++ b/src/util/d3d12_texture.cpp @@ -44,8 +44,6 @@ std::unique_ptr D3D12Device::CreateTexture(u32 width, u32 height, u3 const D3DCommon::DXGIFormatMapping& fm = D3DCommon::GetFormatMapping(format); - const DXGI_FORMAT uav_format = (type == GPUTexture::Type::RWTexture) ? fm.resource_format : DXGI_FORMAT_UNKNOWN; - D3D12_RESOURCE_DESC desc = {}; desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; desc.Width = width; @@ -98,7 +96,9 @@ std::unique_ptr D3D12Device::CreateTexture(u32 width, u32 height, u3 { DebugAssert(levels == 1); allocationDesc.Flags |= D3D12MA::ALLOCATION_FLAG_COMMITTED; - state = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; + desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + optimized_clear_value.Format = fm.rtv_format; + state = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; } break; @@ -106,9 +106,6 @@ std::unique_ptr D3D12Device::CreateTexture(u32 width, u32 height, u3 return {}; } - if (uav_format != DXGI_FORMAT_UNKNOWN) - desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; - ComPtr resource; ComPtr allocation; HRESULT hr = m_allocator->CreateResource( @@ -157,18 +154,28 @@ std::unique_ptr D3D12Device::CreateTexture(u32 width, u32 height, u3 } break; + case GPUTexture::Type::RWTexture: + { + write_descriptor_type = D3D12Texture::WriteDescriptorType::RTV; + if (!CreateRTVDescriptor(resource.Get(), samples, fm.rtv_format, &write_descriptor)) + { + m_descriptor_heap_manager.Free(&srv_descriptor); + return {}; + } + + if (!CreateUAVDescriptor(resource.Get(), samples, fm.srv_format, &uav_descriptor)) + { + m_descriptor_heap_manager.Free(&write_descriptor); + m_descriptor_heap_manager.Free(&srv_descriptor); + return {}; + } + } + break; + default: break; } - if (uav_format != DXGI_FORMAT_UNKNOWN && - !CreateUAVDescriptor(resource.Get(), samples, fm.dsv_format, &uav_descriptor)) - { - m_descriptor_heap_manager.Free(&write_descriptor); - m_descriptor_heap_manager.Free(&srv_descriptor); - return {}; - } - std::unique_ptr tex(new D3D12Texture( width, height, layers, levels, samples, type, format, fm.resource_format, std::move(resource), std::move(allocation), srv_descriptor, write_descriptor, uav_descriptor, write_descriptor_type, state)); diff --git a/src/util/d3d_common.cpp b/src/util/d3d_common.cpp index 212387ebf..4e199cda7 100644 --- a/src/util/d3d_common.cpp +++ b/src/util/d3d_common.cpp @@ -50,7 +50,7 @@ const char* D3DCommon::GetFeatureLevelShaderModelString(D3D_FEATURE_LEVEL featur {D3D_FEATURE_LEVEL_10_0, "sm40"}, {D3D_FEATURE_LEVEL_10_1, "sm41"}, {D3D_FEATURE_LEVEL_11_0, "sm50"}, - {D3D_FEATURE_LEVEL_11_1, "sm51"}, + {D3D_FEATURE_LEVEL_11_1, "sm50"}, }}; for (const auto& [fl, name] : feature_level_names) @@ -390,11 +390,9 @@ u32 D3DCommon::GetShaderModelForFeatureLevel(D3D_FEATURE_LEVEL feature_level) return 41; case D3D_FEATURE_LEVEL_11_0: - return 50; - case D3D_FEATURE_LEVEL_11_1: default: - return 51; + return 50; } } @@ -429,14 +427,6 @@ std::optional> D3DCommon::CompileShader(u32 shader_model, b } break; - case 51: - { - static constexpr std::array(GPUShaderStage::MaxCount)> targets = { - {"vs_5_1", "ps_5_1", "gs_5_1", "cs_5_1"}}; - target = targets[static_cast(stage)]; - } - break; - default: Error::SetStringFmt(error, "Unknown shader model: {}", shader_model); return {}; diff --git a/src/util/gpu_device.h b/src/util/gpu_device.h index b93e985ee..b3dbc6691 100644 --- a/src/util/gpu_device.h +++ b/src/util/gpu_device.h @@ -168,6 +168,7 @@ public: NoRenderPassFlags = 0, ColorFeedbackLoop = (1 << 0), SampleDepthBuffer = (1 << 1), + BindRenderTargetsAsImages = (1 << 2), }; enum class Primitive : u8 @@ -469,6 +470,7 @@ public: FEATURE_MASK_GEOMETRY_SHADERS = (1 << 4), FEATURE_MASK_TEXTURE_COPY_TO_SELF = (1 << 5), FEATURE_MASK_MEMORY_IMPORT = (1 << 6), + FEATURE_MASK_RASTER_ORDER_VIEWS = (1 << 7), }; enum class DrawBarrier : u32 @@ -496,6 +498,7 @@ public: bool shader_cache : 1; bool pipeline_cache : 1; bool prefer_unused_textures : 1; + bool raster_order_views : 1; }; struct Statistics @@ -527,6 +530,7 @@ public: static constexpr u32 MAX_TEXTURE_SAMPLERS = 8; static constexpr u32 MIN_TEXEL_BUFFER_ELEMENTS = 4 * 1024 * 512; static constexpr u32 MAX_RENDER_TARGETS = 4; + static constexpr u32 MAX_IMAGE_RENDER_TARGETS = 2; static_assert(sizeof(GPUPipeline::GraphicsConfig::color_formats) == sizeof(GPUTexture::Format) * MAX_RENDER_TARGETS); GPUDevice(); @@ -676,14 +680,14 @@ public: /// Drawing setup abstraction. virtual void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, - GPUPipeline::RenderPassFlag render_pass_flags = GPUPipeline::NoRenderPassFlags) = 0; + GPUPipeline::RenderPassFlag flags = GPUPipeline::NoRenderPassFlags) = 0; virtual void SetPipeline(GPUPipeline* pipeline) = 0; virtual void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) = 0; virtual void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) = 0; virtual void SetViewport(const GSVector4i rc) = 0; virtual void SetScissor(const GSVector4i rc) = 0; void SetRenderTarget(GPUTexture* rt, GPUTexture* ds = nullptr, - GPUPipeline::RenderPassFlag render_pass_flags = GPUPipeline::NoRenderPassFlags); + GPUPipeline::RenderPassFlag flags = GPUPipeline::NoRenderPassFlags); void SetViewport(s32 x, s32 y, s32 width, s32 height); void SetScissor(s32 x, s32 y, s32 width, s32 height); void SetViewportAndScissor(s32 x, s32 y, s32 width, s32 height); diff --git a/src/util/gpu_texture.cpp b/src/util/gpu_texture.cpp index 52897c8e5..95669d26d 100644 --- a/src/util/gpu_texture.cpp +++ b/src/util/gpu_texture.cpp @@ -213,10 +213,18 @@ bool GPUTexture::ValidateConfig(u32 width, u32 height, u32 layers, u32 levels, u return false; } - if (samples > 1 && levels > 1) + if (samples > 1) { - ERROR_LOG("Multisampled textures can't have mip levels."); - return false; + if (levels > 1) + { + ERROR_LOG("Multisampled textures can't have mip levels."); + return false; + } + else if (type != Type::RenderTarget && type != Type::DepthStencil) + { + ERROR_LOG("Multisampled textures must be render targets or depth stencil targets."); + return false; + } } if (layers > 1 && type != Type::Texture && type != Type::DynamicTexture) diff --git a/src/util/gpu_texture.h b/src/util/gpu_texture.h index d9f29133b..3af1196bb 100644 --- a/src/util/gpu_texture.h +++ b/src/util/gpu_texture.h @@ -129,6 +129,7 @@ public: ALWAYS_INLINE bool IsDepthStencil() const { return (m_type == Type::DepthStencil); } ALWAYS_INLINE bool IsTexture() const { return (m_type == Type::Texture || m_type == Type::DynamicTexture); } ALWAYS_INLINE bool IsDynamicTexture() const { return (m_type == Type::DynamicTexture); } + ALWAYS_INLINE bool IsRWTexture() const { return (m_type == Type::RWTexture); } ALWAYS_INLINE const ClearValue& GetClearValue() const { return m_clear_value; } ALWAYS_INLINE u32 GetClearColor() const { return m_clear_value.color; } diff --git a/src/util/opengl_device.cpp b/src/util/opengl_device.cpp index 5947baf58..63e25c207 100644 --- a/src/util/opengl_device.cpp +++ b/src/util/opengl_device.cpp @@ -481,7 +481,7 @@ bool OpenGLDevice::CheckFeatures(FeatureMask disabled_features) // So, blit from the shadow texture, like in the other renderers. m_features.texture_copy_to_self = !vendor_id_arm && !(disabled_features & FEATURE_MASK_TEXTURE_COPY_TO_SELF); - m_features.feedback_loops = m_features.framebuffer_fetch; + m_features.feedback_loops = false; m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS) && (GLAD_GL_VERSION_3_2 || GLAD_GL_ES_VERSION_3_2); diff --git a/src/util/vulkan_device.cpp b/src/util/vulkan_device.cpp index 2a24533f0..7bfd09e70 100644 --- a/src/util/vulkan_device.cpp +++ b/src/util/vulkan_device.cpp @@ -385,19 +385,21 @@ GPUDevice::AdapterInfoList VulkanDevice::GetAdapterList() return ret; } -bool VulkanDevice::SelectDeviceExtensions(ExtensionList* extension_list, bool enable_surface) +bool VulkanDevice::SelectDeviceExtensions(ExtensionList* extension_list, bool enable_surface, Error* error) { u32 extension_count = 0; VkResult res = vkEnumerateDeviceExtensionProperties(m_physical_device, nullptr, &extension_count, nullptr); if (res != VK_SUCCESS) { LOG_VULKAN_ERROR(res, "vkEnumerateDeviceExtensionProperties failed: "); + Vulkan::SetErrorObject(error, "vkEnumerateDeviceExtensionProperties failed: ", res); return false; } if (extension_count == 0) { - ERROR_LOG("Vulkan: No extensions supported by device."); + ERROR_LOG("No extensions supported by device."); + Error::SetStringView(error, "No extensions supported by device."); return false; } @@ -423,7 +425,10 @@ bool VulkanDevice::SelectDeviceExtensions(ExtensionList* extension_list, bool en } if (required) + { ERROR_LOG("Vulkan: Missing required extension {}.", name); + Error::SetStringFmt(error, "Missing required extension {}.", name); + } return false; }; @@ -466,6 +471,11 @@ bool VulkanDevice::SelectDeviceExtensions(ExtensionList* extension_list, bool en m_optional_extensions.vk_ext_swapchain_maintenance1 && SupportsExtension(VK_EXT_SWAPCHAIN_MAINTENANCE_1_EXTENSION_NAME, false); + // Dynamic rendering isn't strictly needed for FSI, but we want it with framebufferless rendering. + m_optional_extensions.vk_ext_fragment_shader_interlock = + m_optional_extensions.vk_khr_dynamic_rendering && + SupportsExtension(VK_EXT_FRAGMENT_SHADER_INTERLOCK_EXTENSION_NAME, false); + #ifdef _WIN32 m_optional_extensions.vk_ext_full_screen_exclusive = enable_surface && SupportsExtension(VK_EXT_FULL_SCREEN_EXCLUSIVE_EXTENSION_NAME, false); @@ -480,6 +490,7 @@ bool VulkanDevice::SelectDeviceExtensions(ExtensionList* extension_list, bool en { m_optional_extensions.vk_khr_dynamic_rendering = false; m_optional_extensions.vk_khr_dynamic_rendering_local_read = false; + m_optional_extensions.vk_ext_fragment_shader_interlock = false; WARNING_LOG("Disabling VK_KHR_dynamic_rendering on broken mobile driver."); } if (m_optional_extensions.vk_khr_push_descriptor) @@ -501,29 +512,15 @@ bool VulkanDevice::SelectDeviceExtensions(ExtensionList* extension_list, bool en return true; } -bool VulkanDevice::SelectDeviceFeatures() -{ - VkPhysicalDeviceFeatures available_features; - vkGetPhysicalDeviceFeatures(m_physical_device, &available_features); - - // Enable the features we use. - m_device_features.dualSrcBlend = available_features.dualSrcBlend; - m_device_features.largePoints = available_features.largePoints; - m_device_features.wideLines = available_features.wideLines; - m_device_features.samplerAnisotropy = available_features.samplerAnisotropy; - m_device_features.sampleRateShading = available_features.sampleRateShading; - m_device_features.geometryShader = available_features.geometryShader; - - return true; -} - -bool VulkanDevice::CreateDevice(VkSurfaceKHR surface, bool enable_validation_layer) +bool VulkanDevice::CreateDevice(VkSurfaceKHR surface, bool enable_validation_layer, FeatureMask disabled_features, + Error* error) { u32 queue_family_count; vkGetPhysicalDeviceQueueFamilyProperties(m_physical_device, &queue_family_count, nullptr); if (queue_family_count == 0) { ERROR_LOG("No queue families found on specified vulkan physical device."); + Error::SetStringView(error, "No queue families found on specified vulkan physical device."); return false; } @@ -554,6 +551,7 @@ bool VulkanDevice::CreateDevice(VkSurfaceKHR surface, bool enable_validation_lay if (res != VK_SUCCESS) { LOG_VULKAN_ERROR(res, "vkGetPhysicalDeviceSurfaceSupportKHR failed: "); + Vulkan::SetErrorObject(error, "vkGetPhysicalDeviceSurfaceSupportKHR failed: ", res); return false; } @@ -572,11 +570,13 @@ bool VulkanDevice::CreateDevice(VkSurfaceKHR surface, bool enable_validation_lay if (m_graphics_queue_family_index == queue_family_count) { ERROR_LOG("Vulkan: Failed to find an acceptable graphics queue."); + Error::SetStringView(error, "Vulkan: Failed to find an acceptable graphics queue."); return false; } if (surface != VK_NULL_HANDLE && m_present_queue_family_index == queue_family_count) { ERROR_LOG("Vulkan: Failed to find an acceptable present queue."); + Error::SetStringView(error, "Vulkan: Failed to find an acceptable present queue."); return false; } @@ -610,17 +610,26 @@ bool VulkanDevice::CreateDevice(VkSurfaceKHR surface, bool enable_validation_lay device_info.pQueueCreateInfos = queue_infos.data(); ExtensionList enabled_extensions; - if (!SelectDeviceExtensions(&enabled_extensions, surface != VK_NULL_HANDLE)) + if (!SelectDeviceExtensions(&enabled_extensions, surface != VK_NULL_HANDLE, error)) return false; device_info.enabledExtensionCount = static_cast(enabled_extensions.size()); device_info.ppEnabledExtensionNames = enabled_extensions.data(); // Check for required features before creating. - if (!SelectDeviceFeatures()) - return false; + VkPhysicalDeviceFeatures available_features; + vkGetPhysicalDeviceFeatures(m_physical_device, &available_features); - device_info.pEnabledFeatures = &m_device_features; + // Enable the features we use. + VkPhysicalDeviceFeatures enabled_features = {}; + enabled_features.dualSrcBlend = available_features.dualSrcBlend; + enabled_features.largePoints = available_features.largePoints; + enabled_features.wideLines = available_features.wideLines; + enabled_features.samplerAnisotropy = available_features.samplerAnisotropy; + enabled_features.sampleRateShading = available_features.sampleRateShading; + enabled_features.geometryShader = available_features.geometryShader; + enabled_features.fragmentStoresAndAtomics = available_features.fragmentStoresAndAtomics; + device_info.pEnabledFeatures = &enabled_features; // Enable debug layer on debug builds if (enable_validation_layer) @@ -639,6 +648,8 @@ bool VulkanDevice::CreateDevice(VkSurfaceKHR surface, bool enable_validation_lay VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_LOCAL_READ_FEATURES_KHR, nullptr, VK_TRUE}; VkPhysicalDeviceSwapchainMaintenance1FeaturesEXT swapchain_maintenance1_feature = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SWAPCHAIN_MAINTENANCE_1_FEATURES_EXT, nullptr, VK_TRUE}; + VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT fragment_shader_interlock_feature = { + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT, nullptr, VK_FALSE, VK_TRUE, VK_FALSE}; if (m_optional_extensions.vk_ext_rasterization_order_attachment_access) Vulkan::AddPointerToChain(&device_info, &rasterization_order_access_feature); @@ -649,12 +660,15 @@ bool VulkanDevice::CreateDevice(VkSurfaceKHR surface, bool enable_validation_lay Vulkan::AddPointerToChain(&device_info, &dynamic_rendering_feature); if (m_optional_extensions.vk_khr_dynamic_rendering_local_read) Vulkan::AddPointerToChain(&device_info, &dynamic_rendering_local_read_feature); + if (m_optional_extensions.vk_ext_fragment_shader_interlock) + Vulkan::AddPointerToChain(&device_info, &fragment_shader_interlock_feature); } VkResult res = vkCreateDevice(m_physical_device, &device_info, nullptr, &m_device); if (res != VK_SUCCESS) { LOG_VULKAN_ERROR(res, "vkCreateDevice failed: "); + Vulkan::SetErrorObject(error, "vkCreateDevice failed: ", res); return false; } @@ -677,6 +691,7 @@ bool VulkanDevice::CreateDevice(VkSurfaceKHR surface, bool enable_validation_lay m_device_properties.limits.timestampPeriod); ProcessDeviceExtensions(); + SetFeatures(disabled_features, enabled_features); return true; } @@ -693,6 +708,8 @@ void VulkanDevice::ProcessDeviceExtensions() VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_LOCAL_READ_FEATURES_KHR, nullptr, VK_FALSE}; VkPhysicalDeviceSwapchainMaintenance1FeaturesEXT swapchain_maintenance1_feature = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SWAPCHAIN_MAINTENANCE_1_FEATURES_EXT, nullptr, VK_FALSE}; + VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT fragment_shader_interlock_feature = { + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT, nullptr, VK_FALSE, VK_FALSE, VK_FALSE}; // add in optional feature structs if (m_optional_extensions.vk_ext_rasterization_order_attachment_access) @@ -704,6 +721,8 @@ void VulkanDevice::ProcessDeviceExtensions() Vulkan::AddPointerToChain(&features2, &dynamic_rendering_feature); if (m_optional_extensions.vk_khr_dynamic_rendering_local_read) Vulkan::AddPointerToChain(&features2, &dynamic_rendering_local_read_feature); + if (m_optional_extensions.vk_ext_fragment_shader_interlock) + Vulkan::AddPointerToChain(&features2, &fragment_shader_interlock_feature); } // we might not have VK_KHR_get_physical_device_properties2... @@ -738,6 +757,9 @@ void VulkanDevice::ProcessDeviceExtensions() m_optional_extensions.vk_khr_dynamic_rendering &= (dynamic_rendering_feature.dynamicRendering == VK_TRUE); m_optional_extensions.vk_khr_dynamic_rendering_local_read &= (dynamic_rendering_local_read_feature.dynamicRenderingLocalRead == VK_TRUE); + m_optional_extensions.vk_ext_fragment_shader_interlock &= + (m_optional_extensions.vk_khr_dynamic_rendering && + fragment_shader_interlock_feature.fragmentShaderPixelInterlock == VK_TRUE); VkPhysicalDeviceProperties2 properties2 = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, nullptr, {}}; VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor_properties = { @@ -769,6 +791,8 @@ void VulkanDevice::ProcessDeviceExtensions() INFO_LOG("VK_EXT_external_memory_host is {}", m_optional_extensions.vk_ext_external_memory_host ? "supported" : "NOT supported"); INFO_LOG("VK_EXT_memory_budget is {}", m_optional_extensions.vk_ext_memory_budget ? "supported" : "NOT supported"); + INFO_LOG("VK_EXT_fragment_shader_interlock is {}", + m_optional_extensions.vk_ext_fragment_shader_interlock ? "supported" : "NOT supported"); INFO_LOG("VK_EXT_rasterization_order_attachment_access is {}", m_optional_extensions.vk_ext_rasterization_order_attachment_access ? "supported" : "NOT supported"); INFO_LOG("VK_EXT_swapchain_maintenance1 is {}", @@ -2046,14 +2070,8 @@ bool VulkanDevice::CreateDevice(std::string_view adapter, bool threaded_presenta } // Attempt to create the device. - if (!CreateDevice(surface, enable_validation_layer)) - return false; - - if (!CheckFeatures(disabled_features)) - { - Error::SetStringView(error, "Your GPU does not support the required Vulkan features."); + if (!CreateDevice(surface, enable_validation_layer, disabled_features, error)) return false; - } // And critical resources. if (!CreateAllocator() || !CreatePersistentDescriptorPool() || !CreateCommandBuffers() || !CreatePipelineLayouts()) @@ -2576,14 +2594,13 @@ u32 VulkanDevice::GetMaxMultisamples(VkPhysicalDevice physical_device, const VkP return 1; } -bool VulkanDevice::CheckFeatures(FeatureMask disabled_features) +void VulkanDevice::SetFeatures(FeatureMask disabled_features, const VkPhysicalDeviceFeatures& vk_features) { m_max_texture_size = std::min(m_device_properties.limits.maxImageDimension2D, m_device_properties.limits.maxFramebufferWidth); m_max_multisamples = GetMaxMultisamples(m_physical_device, m_device_properties); - m_features.dual_source_blend = - !(disabled_features & FEATURE_MASK_DUAL_SOURCE_BLEND) && m_device_features.dualSrcBlend; + m_features.dual_source_blend = !(disabled_features & FEATURE_MASK_DUAL_SOURCE_BLEND) && vk_features.dualSrcBlend; m_features.framebuffer_fetch = !(disabled_features & (FEATURE_MASK_FEEDBACK_LOOPS | FEATURE_MASK_FRAMEBUFFER_FETCH)) && m_optional_extensions.vk_ext_rasterization_order_attachment_access; @@ -2593,7 +2610,7 @@ bool VulkanDevice::CheckFeatures(FeatureMask disabled_features) m_features.noperspective_interpolation = true; m_features.texture_copy_to_self = !(disabled_features & FEATURE_MASK_TEXTURE_COPY_TO_SELF); - m_features.per_sample_shading = m_device_features.sampleRateShading; + m_features.per_sample_shading = vk_features.sampleRateShading; m_features.supports_texture_buffers = !(disabled_features & FEATURE_MASK_TEXTURE_BUFFERS); m_features.feedback_loops = !(disabled_features & FEATURE_MASK_FEEDBACK_LOOPS); @@ -2612,8 +2629,7 @@ bool VulkanDevice::CheckFeatures(FeatureMask disabled_features) if (m_features.texture_buffers_emulated_with_ssbo) WARNING_LOG("Emulating texture buffers with SSBOs."); - m_features.geometry_shaders = - !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS) && m_device_features.geometryShader; + m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS) && vk_features.geometryShader; m_features.partial_msaa_resolve = true; m_features.memory_import = m_optional_extensions.vk_ext_external_memory_host; @@ -2621,8 +2637,9 @@ bool VulkanDevice::CheckFeatures(FeatureMask disabled_features) m_features.shader_cache = true; m_features.pipeline_cache = true; m_features.prefer_unused_textures = true; - - return true; + m_features.raster_order_views = + (!(disabled_features & FEATURE_MASK_RASTER_ORDER_VIEWS) && vk_features.fragmentStoresAndAtomics && + m_optional_extensions.vk_ext_fragment_shader_interlock); } void VulkanDevice::CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, @@ -2928,7 +2945,7 @@ void VulkanDevice::UnmapUniformBuffer(u32 size) bool VulkanDevice::CreateNullTexture() { - m_null_texture = VulkanTexture::Create(1, 1, 1, 1, 1, GPUTexture::Type::RenderTarget, GPUTexture::Format::RGBA8, + m_null_texture = VulkanTexture::Create(1, 1, 1, 1, 1, GPUTexture::Type::RWTexture, GPUTexture::Format::RGBA8, VK_FORMAT_R8G8B8A8_UNORM); if (!m_null_texture) return false; @@ -2948,10 +2965,7 @@ bool VulkanDevice::CreateNullTexture() return false; for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++) - { - m_current_textures[i] = m_null_texture.get(); m_current_samplers[i] = point_sampler; - } return true; } @@ -3005,59 +3019,89 @@ bool VulkanDevice::CreatePipelineLayouts() Vulkan::SetObjectName(m_device, m_feedback_loop_ds_layout, "Feedback Loop Descriptor Set Layout"); } + if (m_features.raster_order_views) { - VkPipelineLayout& pl = m_pipeline_layouts[static_cast(GPUPipeline::Layout::SingleTextureAndUBO)]; - plb.AddDescriptorSet(m_ubo_ds_layout); - plb.AddDescriptorSet(m_single_texture_ds_layout); - // TODO: REMOVE ME - if (m_features.feedback_loops) - plb.AddDescriptorSet(m_feedback_loop_ds_layout); - if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE) + for (u32 i = 0; i < MAX_IMAGE_RENDER_TARGETS; i++) + dslb.AddBinding(i, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_FRAGMENT_BIT); + if ((m_rov_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE) return false; - Vulkan::SetObjectName(m_device, pl, "Single Texture + UBO Pipeline Layout"); + Vulkan::SetObjectName(m_device, m_feedback_loop_ds_layout, "ROV Descriptor Set Layout"); } + for (u32 type = 0; type < 3; type++) { - VkPipelineLayout& pl = m_pipeline_layouts[static_cast(GPUPipeline::Layout::SingleTextureAndPushConstants)]; - plb.AddDescriptorSet(m_single_texture_ds_layout); - // TODO: REMOVE ME - if (m_features.feedback_loops) - plb.AddDescriptorSet(m_feedback_loop_ds_layout); - plb.AddPushConstants(UNIFORM_PUSH_CONSTANTS_STAGES, 0, UNIFORM_PUSH_CONSTANTS_SIZE); - if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE) - return false; - Vulkan::SetObjectName(m_device, pl, "Single Texture Pipeline Layout"); - } + const bool feedback_loop = (type == 1); + const bool rov = (type == 2); + if ((feedback_loop && !m_features.feedback_loops) || (rov && !m_features.raster_order_views)) + continue; - { - VkPipelineLayout& pl = - m_pipeline_layouts[static_cast(GPUPipeline::Layout::SingleTextureBufferAndPushConstants)]; - plb.AddDescriptorSet(m_single_texture_buffer_ds_layout); - // TODO: REMOVE ME - if (m_features.feedback_loops) - plb.AddDescriptorSet(m_feedback_loop_ds_layout); - plb.AddPushConstants(UNIFORM_PUSH_CONSTANTS_STAGES, 0, UNIFORM_PUSH_CONSTANTS_SIZE); - if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE) - return false; - Vulkan::SetObjectName(m_device, pl, "Single Texture Buffer + UBO Pipeline Layout"); - } + { + VkPipelineLayout& pl = m_pipeline_layouts[type][static_cast(GPUPipeline::Layout::SingleTextureAndUBO)]; + plb.AddDescriptorSet(m_ubo_ds_layout); + plb.AddDescriptorSet(m_single_texture_ds_layout); + if (feedback_loop) + plb.AddDescriptorSet(m_feedback_loop_ds_layout); + else if (rov) + plb.AddDescriptorSet(m_rov_ds_layout); + if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE) + return false; + Vulkan::SetObjectName(m_device, pl, "Single Texture + UBO Pipeline Layout"); + } - { - VkPipelineLayout& pl = m_pipeline_layouts[static_cast(GPUPipeline::Layout::MultiTextureAndUBO)]; - plb.AddDescriptorSet(m_ubo_ds_layout); - plb.AddDescriptorSet(m_multi_texture_ds_layout); - if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE) - return false; - Vulkan::SetObjectName(m_device, pl, "Multi Texture + UBO Pipeline Layout"); - } + { + VkPipelineLayout& pl = + m_pipeline_layouts[type][static_cast(GPUPipeline::Layout::SingleTextureAndPushConstants)]; + plb.AddDescriptorSet(m_single_texture_ds_layout); + if (feedback_loop) + plb.AddDescriptorSet(m_feedback_loop_ds_layout); + else if (rov) + plb.AddDescriptorSet(m_rov_ds_layout); + plb.AddPushConstants(UNIFORM_PUSH_CONSTANTS_STAGES, 0, UNIFORM_PUSH_CONSTANTS_SIZE); + if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE) + return false; + Vulkan::SetObjectName(m_device, pl, "Single Texture Pipeline Layout"); + } - { - VkPipelineLayout& pl = m_pipeline_layouts[static_cast(GPUPipeline::Layout::MultiTextureAndPushConstants)]; - plb.AddDescriptorSet(m_multi_texture_ds_layout); - plb.AddPushConstants(UNIFORM_PUSH_CONSTANTS_STAGES, 0, UNIFORM_PUSH_CONSTANTS_SIZE); - if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE) - return false; - Vulkan::SetObjectName(m_device, pl, "Multi Texture Pipeline Layout"); + { + VkPipelineLayout& pl = + m_pipeline_layouts[type][static_cast(GPUPipeline::Layout::SingleTextureBufferAndPushConstants)]; + plb.AddDescriptorSet(m_single_texture_buffer_ds_layout); + if (feedback_loop) + plb.AddDescriptorSet(m_feedback_loop_ds_layout); + else if (rov) + plb.AddDescriptorSet(m_rov_ds_layout); + plb.AddPushConstants(UNIFORM_PUSH_CONSTANTS_STAGES, 0, UNIFORM_PUSH_CONSTANTS_SIZE); + if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE) + return false; + Vulkan::SetObjectName(m_device, pl, "Single Texture Buffer + UBO Pipeline Layout"); + } + + { + VkPipelineLayout& pl = m_pipeline_layouts[type][static_cast(GPUPipeline::Layout::MultiTextureAndUBO)]; + plb.AddDescriptorSet(m_ubo_ds_layout); + plb.AddDescriptorSet(m_multi_texture_ds_layout); + if (feedback_loop) + plb.AddDescriptorSet(m_feedback_loop_ds_layout); + else if (rov) + plb.AddDescriptorSet(m_rov_ds_layout); + if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE) + return false; + Vulkan::SetObjectName(m_device, pl, "Multi Texture + UBO Pipeline Layout"); + } + + { + VkPipelineLayout& pl = + m_pipeline_layouts[type][static_cast(GPUPipeline::Layout::MultiTextureAndPushConstants)]; + plb.AddDescriptorSet(m_multi_texture_ds_layout); + plb.AddPushConstants(UNIFORM_PUSH_CONSTANTS_STAGES, 0, UNIFORM_PUSH_CONSTANTS_SIZE); + if (feedback_loop) + plb.AddDescriptorSet(m_feedback_loop_ds_layout); + else if (rov) + plb.AddDescriptorSet(m_rov_ds_layout); + if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE) + return false; + Vulkan::SetObjectName(m_device, pl, "Multi Texture Pipeline Layout"); + } } return true; @@ -3065,14 +3109,13 @@ bool VulkanDevice::CreatePipelineLayouts() void VulkanDevice::DestroyPipelineLayouts() { - for (VkPipelineLayout& pl : m_pipeline_layouts) - { + m_pipeline_layouts.enumerate([this](auto& pl) { if (pl != VK_NULL_HANDLE) { vkDestroyPipelineLayout(m_device, pl, nullptr); pl = VK_NULL_HANDLE; } - } + }); auto destroy_dsl = [this](VkDescriptorSetLayout& l) { if (l != VK_NULL_HANDLE) @@ -3081,6 +3124,7 @@ void VulkanDevice::DestroyPipelineLayouts() l = VK_NULL_HANDLE; } }; + destroy_dsl(m_rov_ds_layout); destroy_dsl(m_feedback_loop_ds_layout); destroy_dsl(m_multi_texture_ds_layout); destroy_dsl(m_single_texture_buffer_ds_layout); @@ -3222,10 +3266,13 @@ bool VulkanDevice::TryImportHostMemory(void* data, size_t data_size, VkBufferUsa } void VulkanDevice::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, - GPUPipeline::RenderPassFlag feedback_loop) + GPUPipeline::RenderPassFlag flags) { - bool changed = (m_num_current_render_targets != num_rts || m_current_depth_target != ds || - m_current_feedback_loop != feedback_loop); + const bool changed_layout = + (m_current_render_pass_flags & (GPUPipeline::ColorFeedbackLoop | GPUPipeline::BindRenderTargetsAsImages)) != + (flags & (GPUPipeline::ColorFeedbackLoop | GPUPipeline::BindRenderTargetsAsImages)); + bool changed = + (m_num_current_render_targets != num_rts || m_current_depth_target != ds || m_current_render_pass_flags != flags); bool needs_ds_clear = (ds && ds->IsClearedOrInvalidated()); bool needs_rt_clear = false; @@ -3240,7 +3287,7 @@ void VulkanDevice::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUText for (u32 i = num_rts; i < m_num_current_render_targets; i++) m_current_render_targets[i] = nullptr; m_num_current_render_targets = Truncate8(num_rts); - m_current_feedback_loop = feedback_loop; + m_current_render_pass_flags = flags; if (changed) { @@ -3253,12 +3300,12 @@ void VulkanDevice::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUText return; } - if (!m_optional_extensions.vk_khr_dynamic_rendering || ((feedback_loop & GPUPipeline::ColorFeedbackLoop) && - !m_optional_extensions.vk_khr_dynamic_rendering_local_read)) + if (!m_optional_extensions.vk_khr_dynamic_rendering || + ((flags & GPUPipeline::ColorFeedbackLoop) && !m_optional_extensions.vk_khr_dynamic_rendering_local_read)) { m_current_framebuffer = m_framebuffer_manager.Lookup( (m_num_current_render_targets > 0) ? reinterpret_cast(m_current_render_targets.data()) : nullptr, - m_num_current_render_targets, m_current_depth_target, feedback_loop); + m_num_current_render_targets, m_current_depth_target, flags); if (m_current_framebuffer == VK_NULL_HANDLE) { ERROR_LOG("Failed to create framebuffer"); @@ -3266,8 +3313,10 @@ void VulkanDevice::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUText } } - m_dirty_flags = (m_dirty_flags & ~DIRTY_FLAG_INPUT_ATTACHMENT) | - ((feedback_loop & GPUPipeline::ColorFeedbackLoop) ? DIRTY_FLAG_INPUT_ATTACHMENT : 0); + m_dirty_flags = (m_dirty_flags & ~DIRTY_FLAG_INPUT_ATTACHMENT) | (changed_layout ? DIRTY_FLAG_PIPELINE_LAYOUT : 0) | + ((flags & (GPUPipeline::ColorFeedbackLoop | GPUPipeline::BindRenderTargetsAsImages)) ? + DIRTY_FLAG_INPUT_ATTACHMENT : + 0); } // TODO: This could use vkCmdClearAttachments() instead. @@ -3285,11 +3334,14 @@ void VulkanDevice::BeginRenderPass() // All textures should be in shader read only optimal already, but just in case.. const u32 num_textures = GetActiveTexturesForLayout(m_current_pipeline_layout); for (u32 i = 0; i < num_textures; i++) - m_current_textures[i]->TransitionToLayout(VulkanTexture::Layout::ShaderReadOnly); + { + if (m_current_textures[i]) + m_current_textures[i]->TransitionToLayout(VulkanTexture::Layout::ShaderReadOnly); + } // NVIDIA drivers appear to return random garbage when sampling the RT via a feedback loop, if the load op for // the render pass is CLEAR. Using vkCmdClearAttachments() doesn't work, so we have to clear the image instead. - if (m_current_feedback_loop & GPUPipeline::ColorFeedbackLoop) + if (m_current_render_pass_flags & GPUPipeline::ColorFeedbackLoop && IsDeviceNVIDIA()) { for (u32 i = 0; i < m_num_current_render_targets; i++) { @@ -3298,8 +3350,9 @@ void VulkanDevice::BeginRenderPass() } } - if (m_optional_extensions.vk_khr_dynamic_rendering && (m_optional_extensions.vk_khr_dynamic_rendering_local_read || - !(m_current_feedback_loop & GPUPipeline::ColorFeedbackLoop))) + if (m_optional_extensions.vk_khr_dynamic_rendering && + (m_optional_extensions.vk_khr_dynamic_rendering_local_read || + !(m_current_render_pass_flags & GPUPipeline::ColorFeedbackLoop))) { VkRenderingInfoKHR ri = { VK_STRUCTURE_TYPE_RENDERING_INFO_KHR, nullptr, 0u, {}, 1u, 0u, 0u, nullptr, nullptr, nullptr}; @@ -3309,35 +3362,51 @@ void VulkanDevice::BeginRenderPass() if (m_num_current_render_targets > 0 || m_current_depth_target) { - ri.colorAttachmentCount = m_num_current_render_targets; - ri.pColorAttachments = (m_num_current_render_targets > 0) ? attachments.data() : nullptr; - - // set up clear values and transition targets - for (u32 i = 0; i < m_num_current_render_targets; i++) + if (!(m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages)) { - VulkanTexture* const rt = static_cast(m_current_render_targets[i]); - rt->TransitionToLayout((m_current_feedback_loop & GPUPipeline::ColorFeedbackLoop) ? - VulkanTexture::Layout::FeedbackLoop : - VulkanTexture::Layout::ColorAttachment); - rt->SetUseFenceCounter(GetCurrentFenceCounter()); + ri.colorAttachmentCount = m_num_current_render_targets; + ri.pColorAttachments = (m_num_current_render_targets > 0) ? attachments.data() : nullptr; - VkRenderingAttachmentInfo& ai = attachments[i]; - ai.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO_KHR; - ai.pNext = nullptr; - ai.imageView = rt->GetView(); - ai.imageLayout = rt->GetVkLayout(); - ai.resolveMode = VK_RESOLVE_MODE_NONE_KHR; - ai.resolveImageView = VK_NULL_HANDLE; - ai.resolveImageLayout = VK_IMAGE_LAYOUT_UNDEFINED; - ai.loadOp = GetLoadOpForTexture(rt); - ai.storeOp = VK_ATTACHMENT_STORE_OP_STORE; - - if (rt->GetState() == GPUTexture::State::Cleared) + // set up clear values and transition targets + for (u32 i = 0; i < m_num_current_render_targets; i++) { - std::memcpy(ai.clearValue.color.float32, rt->GetUNormClearColor().data(), - sizeof(ai.clearValue.color.float32)); + VulkanTexture* const rt = static_cast(m_current_render_targets[i]); + rt->TransitionToLayout((m_current_render_pass_flags & GPUPipeline::ColorFeedbackLoop) ? + VulkanTexture::Layout::FeedbackLoop : + VulkanTexture::Layout::ColorAttachment); + rt->SetUseFenceCounter(GetCurrentFenceCounter()); + + VkRenderingAttachmentInfo& ai = attachments[i]; + ai.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO_KHR; + ai.pNext = nullptr; + ai.imageView = rt->GetView(); + ai.imageLayout = rt->GetVkLayout(); + ai.resolveMode = VK_RESOLVE_MODE_NONE_KHR; + ai.resolveImageView = VK_NULL_HANDLE; + ai.resolveImageLayout = VK_IMAGE_LAYOUT_UNDEFINED; + ai.loadOp = GetLoadOpForTexture(rt); + ai.storeOp = VK_ATTACHMENT_STORE_OP_STORE; + + if (rt->GetState() == GPUTexture::State::Cleared) + { + std::memcpy(ai.clearValue.color.float32, rt->GetUNormClearColor().data(), + sizeof(ai.clearValue.color.float32)); + } + rt->SetState(GPUTexture::State::Dirty); + } + } + else + { + // Binding as image, but we still need to clear it. + for (u32 i = 0; i < m_num_current_render_targets; i++) + { + VulkanTexture* rt = m_current_render_targets[i]; + if (rt->GetState() == GPUTexture::State::Cleared) + rt->CommitClear(m_current_command_buffer); + rt->SetState(GPUTexture::State::Dirty); + rt->TransitionToLayout(VulkanTexture::Layout::ReadWriteImage); + rt->SetUseFenceCounter(GetCurrentFenceCounter()); } - rt->SetState(GPUTexture::State::Dirty); } if (VulkanTexture* const ds = m_current_depth_target) @@ -3396,8 +3465,9 @@ void VulkanDevice::BeginRenderPass() if (m_current_framebuffer != VK_NULL_HANDLE) { bi.framebuffer = m_current_framebuffer; - bi.renderPass = m_current_render_pass = GetRenderPass( - m_current_render_targets.data(), m_num_current_render_targets, m_current_depth_target, m_current_feedback_loop); + bi.renderPass = m_current_render_pass = + GetRenderPass(m_current_render_targets.data(), m_num_current_render_targets, m_current_depth_target, + m_current_render_pass_flags); if (bi.renderPass == VK_NULL_HANDLE) { ERROR_LOG("Failed to create render pass"); @@ -3416,7 +3486,7 @@ void VulkanDevice::BeginRenderPass() bi.clearValueCount = i + 1; } rt->SetState(GPUTexture::State::Dirty); - rt->TransitionToLayout((m_current_feedback_loop & GPUPipeline::ColorFeedbackLoop) ? + rt->TransitionToLayout((m_current_render_pass_flags & GPUPipeline::ColorFeedbackLoop) ? VulkanTexture::Layout::FeedbackLoop : VulkanTexture::Layout::ColorAttachment); rt->SetUseFenceCounter(GetCurrentFenceCounter()); @@ -3473,7 +3543,10 @@ void VulkanDevice::BeginSwapChainRenderPass() // All textures should be in shader read only optimal already, but just in case.. const u32 num_textures = GetActiveTexturesForLayout(m_current_pipeline_layout); for (u32 i = 0; i < num_textures; i++) - m_current_textures[i]->TransitionToLayout(VulkanTexture::Layout::ShaderReadOnly); + { + if (m_current_textures[i]) + m_current_textures[i]->TransitionToLayout(VulkanTexture::Layout::ShaderReadOnly); + } if (m_optional_extensions.vk_khr_dynamic_rendering) { @@ -3518,15 +3591,16 @@ void VulkanDevice::BeginSwapChainRenderPass() vkCmdBeginRenderPass(GetCurrentCommandBuffer(), &rp, VK_SUBPASS_CONTENTS_INLINE); } + m_dirty_flags |= + (m_current_render_pass_flags & (GPUPipeline::ColorFeedbackLoop | GPUPipeline::BindRenderTargetsAsImages)) ? + DIRTY_FLAG_PIPELINE_LAYOUT : + 0; s_stats.num_render_passes++; m_num_current_render_targets = 0; - m_current_feedback_loop = GPUPipeline::NoRenderPassFlags; + m_current_render_pass_flags = GPUPipeline::NoRenderPassFlags; std::memset(m_current_render_targets.data(), 0, sizeof(m_current_render_targets)); m_current_depth_target = nullptr; m_current_framebuffer = VK_NULL_HANDLE; - - // Clear pipeline, it's likely incompatible. - m_current_pipeline = nullptr; } bool VulkanDevice::InRenderPass() @@ -3584,8 +3658,8 @@ void VulkanDevice::UnbindPipeline(VulkanPipeline* pl) void VulkanDevice::InvalidateCachedState() { - m_dirty_flags = - ALL_DIRTY_STATE | ((m_current_feedback_loop & GPUPipeline::ColorFeedbackLoop) ? DIRTY_FLAG_INPUT_ATTACHMENT : 0); + m_dirty_flags = ALL_DIRTY_STATE | + ((m_current_render_pass_flags & GPUPipeline::ColorFeedbackLoop) ? DIRTY_FLAG_INPUT_ATTACHMENT : 0); m_current_render_pass = VK_NULL_HANDLE; m_current_pipeline = nullptr; } @@ -3601,9 +3675,18 @@ s32 VulkanDevice::IsRenderTargetBoundIndex(const GPUTexture* tex) const return -1; } +VulkanDevice::PipelineLayoutType VulkanDevice::GetPipelineLayoutType(GPUPipeline::RenderPassFlag flags) +{ + return (flags & GPUPipeline::BindRenderTargetsAsImages) ? + PipelineLayoutType::BindRenderTargetsAsImages : + ((flags & GPUPipeline::ColorFeedbackLoop) ? PipelineLayoutType::ColorFeedbackLoop : + PipelineLayoutType::Normal); +} + VkPipelineLayout VulkanDevice::GetCurrentVkPipelineLayout() const { - return m_pipeline_layouts[static_cast(m_current_pipeline_layout)]; + return m_pipeline_layouts[static_cast(GetPipelineLayoutType(m_current_render_pass_flags))] + [static_cast(m_current_pipeline_layout)]; } void VulkanDevice::SetInitialPipelineState() @@ -3634,7 +3717,7 @@ void VulkanDevice::SetInitialPipelineState() void VulkanDevice::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) { - VulkanTexture* T = texture ? static_cast(texture) : m_null_texture.get(); + VulkanTexture* T = static_cast(texture); const VkSampler vsampler = static_cast(sampler ? sampler : m_nearest_sampler.get())->GetSampler(); if (m_current_textures[slot] != T || m_current_samplers[slot] != vsampler) { @@ -3643,7 +3726,7 @@ void VulkanDevice::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* m_dirty_flags |= DIRTY_FLAG_TEXTURES_OR_SAMPLERS; } - if (texture) + if (T) { T->CommitClear(); T->SetUseFenceCounter(GetCurrentFenceCounter()); @@ -3673,7 +3756,7 @@ void VulkanDevice::UnbindTexture(VulkanTexture* tex) { if (m_current_textures[i] == tex) { - m_current_textures[i] = m_null_texture.get(); + m_current_textures[i] = nullptr; m_dirty_flags |= DIRTY_FLAG_TEXTURES_OR_SAMPLERS; } } @@ -3754,7 +3837,7 @@ void VulkanDevice::PreDrawCheck() BeginRenderPass(); DebugAssert(!(m_dirty_flags & DIRTY_FLAG_INITIAL)); - const u32 update_mask = (m_current_feedback_loop ? ~0u : ~DIRTY_FLAG_INPUT_ATTACHMENT); + const u32 update_mask = (m_current_render_pass_flags ? ~0u : ~DIRTY_FLAG_INPUT_ATTACHMENT); const u32 dirty = m_dirty_flags & update_mask; m_dirty_flags = m_dirty_flags & ~update_mask; @@ -3774,6 +3857,7 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty) { [[maybe_unused]] bool new_dynamic_offsets = false; + VkPipelineLayout const vk_pipeline_layout = GetCurrentVkPipelineLayout(); std::array ds; u32 first_ds = 0; u32 num_ds = 0; @@ -3796,8 +3880,9 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty) if constexpr (layout == GPUPipeline::Layout::SingleTextureAndUBO || layout == GPUPipeline::Layout::SingleTextureAndPushConstants) { - DebugAssert(m_current_textures[0] && m_current_samplers[0] != VK_NULL_HANDLE); - ds[num_ds++] = m_current_textures[0]->GetDescriptorSetWithSampler(m_current_samplers[0]); + VulkanTexture* const tex = m_current_textures[0] ? m_current_textures[0] : m_null_texture.get(); + DebugAssert(tex && m_current_samplers[0] != VK_NULL_HANDLE); + ds[num_ds++] = tex->GetDescriptorSetWithSampler(m_current_samplers[0]); } else if constexpr (layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants) { @@ -3813,14 +3898,14 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty) { for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++) { - DebugAssert(m_current_textures[i] && m_current_samplers[i] != VK_NULL_HANDLE); - dsub.AddCombinedImageSamplerDescriptorWrite(VK_NULL_HANDLE, i, m_current_textures[i]->GetView(), - m_current_samplers[i], m_current_textures[i]->GetVkLayout()); + VulkanTexture* const tex = m_current_textures[i] ? m_current_textures[i] : m_null_texture.get(); + DebugAssert(tex && m_current_samplers[i] != VK_NULL_HANDLE); + dsub.AddCombinedImageSamplerDescriptorWrite(VK_NULL_HANDLE, i, tex->GetView(), m_current_samplers[i], + tex->GetVkLayout()); } const u32 set = (layout == GPUPipeline::Layout::MultiTextureAndUBO) ? 1 : 0; - dsub.PushUpdate(GetCurrentCommandBuffer(), VK_PIPELINE_BIND_POINT_GRAPHICS, - m_pipeline_layouts[static_cast(m_current_pipeline_layout)], set); + dsub.PushUpdate(GetCurrentCommandBuffer(), VK_PIPELINE_BIND_POINT_GRAPHICS, vk_pipeline_layout, set); if (num_ds == 0) return true; } @@ -3834,21 +3919,42 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty) for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++) { - DebugAssert(m_current_textures[i] && m_current_samplers[i] != VK_NULL_HANDLE); - dsub.AddCombinedImageSamplerDescriptorWrite(tds, i, m_current_textures[i]->GetView(), m_current_samplers[i], - m_current_textures[i]->GetVkLayout()); + VulkanTexture* const tex = m_current_textures[i] ? m_current_textures[i] : m_null_texture.get(); + DebugAssert(tex && m_current_samplers[i] != VK_NULL_HANDLE); + dsub.AddCombinedImageSamplerDescriptorWrite(tds, i, tex->GetView(), m_current_samplers[i], tex->GetVkLayout()); } dsub.Update(m_device, false); } } - if constexpr (layout == GPUPipeline::Layout::SingleTextureAndUBO || - layout == GPUPipeline::Layout::SingleTextureAndPushConstants || - layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants) + if (m_num_current_render_targets > 0 && + ((dirty & DIRTY_FLAG_INPUT_ATTACHMENT) || + (dirty & DIRTY_FLAG_PIPELINE_LAYOUT && + (m_current_render_pass_flags & (GPUPipeline::ColorFeedbackLoop | GPUPipeline::BindRenderTargetsAsImages))))) { - if ((dirty & DIRTY_FLAG_INPUT_ATTACHMENT) || - (dirty & DIRTY_FLAG_PIPELINE_LAYOUT && (m_current_feedback_loop & GPUPipeline::ColorFeedbackLoop))) + if (m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages) + { + VkDescriptorSet ids = AllocateDescriptorSet(m_rov_ds_layout); + if (ids == VK_NULL_HANDLE) + return false; + + ds[num_ds++] = ids; + + Vulkan::DescriptorSetUpdateBuilder dsub; + for (u32 i = 0; i < m_num_current_render_targets; i++) + { + dsub.AddStorageImageDescriptorWrite(ids, i, m_current_render_targets[i]->GetView(), + m_current_render_targets[i]->GetVkLayout()); + } + + // Annoyingly, have to update all slots... + for (u32 i = m_num_current_render_targets; i < MAX_IMAGE_RENDER_TARGETS; i++) + dsub.AddStorageImageDescriptorWrite(ids, i, m_null_texture->GetView(), m_null_texture->GetVkLayout()); + + dsub.Update(m_device, false); + } + else { VkDescriptorSet ids = AllocateDescriptorSet(m_feedback_loop_ds_layout); if (ids == VK_NULL_HANDLE) @@ -3864,9 +3970,8 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty) } DebugAssert(num_ds > 0); - vkCmdBindDescriptorSets(GetCurrentCommandBuffer(), VK_PIPELINE_BIND_POINT_GRAPHICS, - m_pipeline_layouts[static_cast(m_current_pipeline_layout)], first_ds, num_ds, ds.data(), - static_cast(new_dynamic_offsets), + vkCmdBindDescriptorSets(GetCurrentCommandBuffer(), VK_PIPELINE_BIND_POINT_GRAPHICS, vk_pipeline_layout, first_ds, + num_ds, ds.data(), static_cast(new_dynamic_offsets), new_dynamic_offsets ? &m_uniform_buffer_position : nullptr); return true; diff --git a/src/util/vulkan_device.h b/src/util/vulkan_device.h index 059c2f8cb..8934b003f 100644 --- a/src/util/vulkan_device.h +++ b/src/util/vulkan_device.h @@ -9,6 +9,8 @@ #include "vulkan_loader.h" #include "vulkan_stream_buffer.h" +#include "common/dimensional_array.h" + #include #include #include @@ -43,6 +45,7 @@ public: struct OptionalExtensions { bool vk_ext_external_memory_host : 1; + bool vk_ext_fragment_shader_interlock : 1; bool vk_ext_full_screen_exclusive : 1; bool vk_ext_memory_budget : 1; bool vk_ext_rasterization_order_attachment_access : 1; @@ -124,7 +127,7 @@ public: void* MapUniformBuffer(u32 size) override; void UnmapUniformBuffer(u32 size) override; void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, - GPUPipeline::RenderPassFlag feedback_loop = GPUPipeline::NoRenderPassFlags) override; + GPUPipeline::RenderPassFlag flags = GPUPipeline::NoRenderPassFlags) override; void SetPipeline(GPUPipeline* pipeline) override; void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override; void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override; @@ -250,6 +253,14 @@ private: DIRTY_FLAG_TEXTURES_OR_SAMPLERS | DIRTY_FLAG_INPUT_ATTACHMENT, }; + enum class PipelineLayoutType : u8 + { + Normal, + ColorFeedbackLoop, + BindRenderTargetsAsImages, + MaxCount, + }; + struct RenderPassCacheKey { struct RenderTarget @@ -324,12 +335,10 @@ private: using ExtensionList = std::vector; static bool SelectInstanceExtensions(ExtensionList* extension_list, const WindowInfo& wi, OptionalExtensions* oe, bool enable_debug_utils); - bool SelectDeviceExtensions(ExtensionList* extension_list, bool enable_surface); - bool SelectDeviceFeatures(); - bool CreateDevice(VkSurfaceKHR surface, bool enable_validation_layer); + bool SelectDeviceExtensions(ExtensionList* extension_list, bool enable_surface, Error* error); + bool CreateDevice(VkSurfaceKHR surface, bool enable_validation_layer, FeatureMask disabled_features, Error* error); void ProcessDeviceExtensions(); - - bool CheckFeatures(FeatureMask disabled_features); + void SetFeatures(FeatureMask disabled_features, const VkPhysicalDeviceFeatures& vk_features); static u32 GetMaxMultisamples(VkPhysicalDevice physical_device, const VkPhysicalDeviceProperties& properties); @@ -360,6 +369,7 @@ private: s32 IsRenderTargetBoundIndex(const GPUTexture* tex) const; /// Applies any changed state. + static PipelineLayoutType GetPipelineLayoutType(GPUPipeline::RenderPassFlag flags); VkPipelineLayout GetCurrentVkPipelineLayout() const; void SetInitialPipelineState(); void PreDrawCheck(); @@ -437,7 +447,6 @@ private: // TODO: Move to static? VkDebugUtilsMessengerEXT m_debug_messenger_callback = VK_NULL_HANDLE; - VkPhysicalDeviceFeatures m_device_features = {}; VkPhysicalDeviceProperties m_device_properties = {}; VkPhysicalDeviceDriverPropertiesKHR m_device_driver_properties = {}; OptionalExtensions m_optional_extensions = {}; @@ -451,7 +460,10 @@ private: VkDescriptorSetLayout m_single_texture_buffer_ds_layout = VK_NULL_HANDLE; VkDescriptorSetLayout m_multi_texture_ds_layout = VK_NULL_HANDLE; VkDescriptorSetLayout m_feedback_loop_ds_layout = VK_NULL_HANDLE; - std::array(GPUPipeline::Layout::MaxCount)> m_pipeline_layouts = {}; + VkDescriptorSetLayout m_rov_ds_layout = VK_NULL_HANDLE; + DimensionalArray(GPUPipeline::Layout::MaxCount), + static_cast(PipelineLayoutType::MaxCount)> + m_pipeline_layouts = {}; VulkanStreamBuffer m_vertex_buffer; VulkanStreamBuffer m_index_buffer; @@ -466,8 +478,8 @@ private: // Which bindings/state has to be updated before the next draw. u32 m_dirty_flags = ALL_DIRTY_STATE; - u8 m_num_current_render_targets = 0; - GPUPipeline::RenderPassFlag m_current_feedback_loop = GPUPipeline::NoRenderPassFlags; + u32 m_num_current_render_targets = 0; + GPUPipeline::RenderPassFlag m_current_render_pass_flags = GPUPipeline::NoRenderPassFlags; std::array m_current_render_targets = {}; VulkanTexture* m_current_depth_target = nullptr; VkFramebuffer m_current_framebuffer = VK_NULL_HANDLE; @@ -479,6 +491,6 @@ private: std::array m_current_textures = {}; std::array m_current_samplers = {}; VulkanTextureBuffer* m_current_texture_buffer = nullptr; - GSVector4i m_current_viewport = {}; + GSVector4i m_current_viewport = GSVector4i::cxpr(0, 0, 1, 1); GSVector4i m_current_scissor = GSVector4i::cxpr(0, 0, 1, 1); }; diff --git a/src/util/vulkan_pipeline.cpp b/src/util/vulkan_pipeline.cpp index 4a4f1c43c..cda46ecc8 100644 --- a/src/util/vulkan_pipeline.cpp +++ b/src/util/vulkan_pipeline.cpp @@ -207,7 +207,8 @@ std::unique_ptr VulkanDevice::CreatePipeline(const GPUPipeline::Gra gpb.AddDynamicState(VK_DYNAMIC_STATE_VIEWPORT); gpb.AddDynamicState(VK_DYNAMIC_STATE_SCISSOR); - gpb.SetPipelineLayout(m_pipeline_layouts[static_cast(config.layout)]); + gpb.SetPipelineLayout(m_pipeline_layouts[static_cast(GetPipelineLayoutType(config.render_pass_flags))] + [static_cast(config.layout)]); if (m_optional_extensions.vk_khr_dynamic_rendering && (m_optional_extensions.vk_khr_dynamic_rendering_local_read || !(config.render_pass_flags & GPUPipeline::ColorFeedbackLoop))) diff --git a/src/util/vulkan_texture.cpp b/src/util/vulkan_texture.cpp index 2a2e9b8b8..77a6e48c5 100644 --- a/src/util/vulkan_texture.cpp +++ b/src/util/vulkan_texture.cpp @@ -124,7 +124,8 @@ std::unique_ptr VulkanTexture::Create(u32 width, u32 height, u32 { DebugAssert(levels == 1); ici.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_STORAGE_BIT | - VK_IMAGE_USAGE_SAMPLED_BIT; + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT; } break;