GPUDevice: Add support for Raster Ordered Views

This commit is contained in:
Stenzek 2024-07-22 01:01:57 +10:00
parent e743c5d1b1
commit 1006fa00da
No known key found for this signature in database
17 changed files with 679 additions and 360 deletions

View file

@ -80,8 +80,8 @@ bool D3D11Device::CreateDevice(std::string_view adapter, bool threaded_presentat
ComPtr<IDXGIAdapter1> dxgi_adapter = D3DCommon::GetAdapterByName(m_dxgi_factory.Get(), adapter);
m_max_feature_level = D3DCommon::GetDeviceMaxFeatureLevel(dxgi_adapter.Get());
static constexpr std::array<D3D_FEATURE_LEVEL, 3> requested_feature_levels = {
{D3D_FEATURE_LEVEL_11_0, D3D_FEATURE_LEVEL_10_1, D3D_FEATURE_LEVEL_10_0}};
static constexpr std::array<D3D_FEATURE_LEVEL, 4> requested_feature_levels = {
{D3D_FEATURE_LEVEL_11_1, D3D_FEATURE_LEVEL_11_0, D3D_FEATURE_LEVEL_10_1, D3D_FEATURE_LEVEL_10_0}};
ComPtr<ID3D11Device> temp_device;
ComPtr<ID3D11DeviceContext> temp_context;
@ -194,6 +194,14 @@ void D3D11Device::SetFeatures(FeatureMask disabled_features)
m_features.shader_cache = true;
m_features.pipeline_cache = false;
m_features.prefer_unused_textures = false;
m_features.raster_order_views = false;
if (!(disabled_features & FEATURE_MASK_RASTER_ORDER_VIEWS))
{
D3D11_FEATURE_DATA_D3D11_OPTIONS2 data = {};
m_features.raster_order_views =
(SUCCEEDED(m_device->CheckFeatureSupport(D3D11_FEATURE_D3D11_OPTIONS2, &data, sizeof(data))) &&
data.ROVsSupported);
}
}
u32 D3D11Device::GetSwapChainBufferCount() const
@ -567,12 +575,15 @@ void D3D11Device::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u3
src11->GetD3DTexture(), 0, dst11->GetDXGIFormat());
}
bool D3D11Device::IsRenderTargetBound(const GPUTexture* tex) const
bool D3D11Device::IsRenderTargetBound(const D3D11Texture* tex) const
{
for (u32 i = 0; i < m_num_current_render_targets; i++)
if (tex->IsRenderTarget() || tex->IsRWTexture())
{
if (m_current_render_targets[i] == tex)
return true;
for (u32 i = 0; i < m_num_current_render_targets; i++)
{
if (m_current_render_targets[i] == tex)
return true;
}
}
return false;
@ -580,23 +591,26 @@ bool D3D11Device::IsRenderTargetBound(const GPUTexture* tex) const
void D3D11Device::ClearRenderTarget(GPUTexture* t, u32 c)
{
GPUDevice::ClearRenderTarget(t, c);
if (IsRenderTargetBound(t))
static_cast<D3D11Texture*>(t)->CommitClear(m_context.Get());
D3D11Texture* const T = static_cast<D3D11Texture*>(t);
GPUDevice::ClearRenderTarget(T, c);
if (IsRenderTargetBound(T))
T->CommitClear(m_context.Get());
}
void D3D11Device::ClearDepth(GPUTexture* t, float d)
{
GPUDevice::ClearDepth(t, d);
if (m_current_depth_target == t)
static_cast<D3D11Texture*>(t)->CommitClear(m_context.Get());
D3D11Texture* const T = static_cast<D3D11Texture*>(t);
GPUDevice::ClearDepth(T, d);
if (T == m_current_depth_target)
T->CommitClear(m_context.Get());
}
void D3D11Device::InvalidateRenderTarget(GPUTexture* t)
{
GPUDevice::InvalidateRenderTarget(t);
if (t->IsRenderTarget() ? IsRenderTargetBound(t) : (m_current_depth_target == t))
static_cast<D3D11Texture*>(t)->CommitClear(m_context.Get());
D3D11Texture* const T = static_cast<D3D11Texture*>(t);
GPUDevice::InvalidateRenderTarget(T);
if (T->IsDepthStencil() ? (m_current_depth_target == T) : IsRenderTargetBound(T))
T->CommitClear(m_context.Get());
}
void D3D11Device::SetVSyncMode(GPUVSyncMode mode, bool allow_present_throttle)
@ -662,6 +676,7 @@ bool D3D11Device::BeginPresent(bool skip_present)
m_context->OMSetRenderTargets(1, m_swap_chain_rtv.GetAddressOf(), nullptr);
s_stats.num_render_passes++;
m_num_current_render_targets = 0;
m_current_render_pass_flags = GPUPipeline::NoRenderPassFlags;
std::memset(m_current_render_targets.data(), 0, sizeof(m_current_render_targets));
m_current_depth_target = nullptr;
return true;
@ -934,15 +949,20 @@ void D3D11Device::UnmapUniformBuffer(u32 size)
}
void D3D11Device::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds,
GPUPipeline::RenderPassFlag feedback_loop)
GPUPipeline::RenderPassFlag flags)
{
ID3D11RenderTargetView* rtvs[MAX_RENDER_TARGETS];
DebugAssert(!feedback_loop);
DebugAssert(
!(flags & (GPUPipeline::RenderPassFlag::ColorFeedbackLoop | GPUPipeline::RenderPassFlag::SampleDepthBuffer)));
bool changed = (m_num_current_render_targets != num_rts || m_current_depth_target != ds);
m_current_depth_target = static_cast<D3D11Texture*>(ds);
// Make sure DSV isn't bound.
D3D11Texture* DS = static_cast<D3D11Texture*>(ds);
if (DS)
DS->CommitClear(m_context.Get());
// Make sure textures aren't bound.
bool changed =
(m_num_current_render_targets != num_rts || m_current_depth_target != DS || m_current_render_pass_flags != flags);
m_current_render_pass_flags = flags;
m_current_depth_target = DS;
if (ds)
{
const ID3D11ShaderResourceView* srv = static_cast<D3D11Texture*>(ds)->GetD3DSRV();
@ -958,13 +978,12 @@ void D3D11Device::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTextu
for (u32 i = 0; i < num_rts; i++)
{
D3D11Texture* const dt = static_cast<D3D11Texture*>(rts[i]);
changed |= m_current_render_targets[i] != dt;
m_current_render_targets[i] = dt;
rtvs[i] = dt->GetD3DRTV();
dt->CommitClear(m_context.Get());
D3D11Texture* const RT = static_cast<D3D11Texture*>(rts[i]);
changed |= m_current_render_targets[i] != RT;
m_current_render_targets[i] = RT;
RT->CommitClear(m_context.Get());
const ID3D11ShaderResourceView* srv = dt->GetD3DSRV();
const ID3D11ShaderResourceView* srv = RT->GetD3DSRV();
for (u32 j = 0; j < MAX_TEXTURE_SAMPLERS; j++)
{
if (m_current_textures[j] && m_current_textures[j] == srv)
@ -981,7 +1000,27 @@ void D3D11Device::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTextu
return;
s_stats.num_render_passes++;
m_context->OMSetRenderTargets(num_rts, rtvs, ds ? static_cast<D3D11Texture*>(ds)->GetD3DDSV() : nullptr);
if (m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages)
{
std::array<ID3D11UnorderedAccessView*, MAX_RENDER_TARGETS> uavs;
for (u32 i = 0; i < m_num_current_render_targets; i++)
uavs[i] = m_current_render_targets[i]->GetD3DUAV();
m_context->OMSetRenderTargetsAndUnorderedAccessViews(
0, nullptr, m_current_depth_target ? m_current_depth_target->GetD3DDSV() : nullptr, 0,
m_num_current_render_targets, uavs.data(), nullptr);
}
else
{
std::array<ID3D11RenderTargetView*, MAX_RENDER_TARGETS> rtvs;
for (u32 i = 0; i < m_num_current_render_targets; i++)
rtvs[i] = m_current_render_targets[i]->GetD3DRTV();
m_context->OMSetRenderTargets(m_num_current_render_targets,
(m_num_current_render_targets > 0) ? rtvs.data() : nullptr,
m_current_depth_target ? m_current_depth_target->GetD3DDSV() : nullptr);
}
}
void D3D11Device::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler)
@ -1000,7 +1039,11 @@ void D3D11Device::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* s
ID3D11SamplerState* S = sampler ? static_cast<D3D11Sampler*>(sampler)->GetSamplerState() : nullptr;
// Runtime will null these if we don't...
DebugAssert(!texture || !IsRenderTargetBound(texture) || m_current_depth_target != texture);
DebugAssert(!texture ||
!((texture->IsRenderTarget() || texture->IsRWTexture()) &&
IsRenderTargetBound(static_cast<D3D11Texture*>(texture))) ||
!(texture->IsDepthStencil() &&
(!m_current_depth_target || m_current_depth_target != static_cast<D3D11Texture*>(texture))));
if (m_current_textures[slot] != T)
{
@ -1038,7 +1081,7 @@ void D3D11Device::UnbindTexture(D3D11Texture* tex)
}
}
if (tex->IsRenderTarget())
if (tex->IsRenderTarget() || tex->IsRWTexture())
{
for (u32 i = 0; i < m_num_current_render_targets; i++)
{
@ -1050,7 +1093,7 @@ void D3D11Device::UnbindTexture(D3D11Texture* tex)
}
}
}
else if (m_current_depth_target == tex)
else if (tex->IsDepthStencil() && m_current_depth_target == tex)
{
WARNING_LOG("Unbinding current DS");
SetRenderTargets(nullptr, 0, nullptr);

View file

@ -89,7 +89,7 @@ public:
void* MapUniformBuffer(u32 size) override;
void UnmapUniformBuffer(u32 size) override;
void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds,
GPUPipeline::RenderPassFlag feedback_loop = GPUPipeline::NoRenderPassFlags) override;
GPUPipeline::RenderPassFlag flags = GPUPipeline::NoRenderPassFlags) override;
void SetPipeline(GPUPipeline* pipeline) override;
void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override;
void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override;
@ -142,7 +142,7 @@ private:
bool CreateBuffers();
void DestroyBuffers();
bool IsRenderTargetBound(const GPUTexture* tex) const;
bool IsRenderTargetBound(const D3D11Texture* tex) const;
ComPtr<ID3D11RasterizerState> GetRasterizationState(const GPUPipeline::RasterizationState& rs, Error* error);
ComPtr<ID3D11DepthStencilState> GetDepthState(const GPUPipeline::DepthState& ds, Error* error);
@ -180,6 +180,7 @@ private:
D3D11Pipeline* m_current_pipeline = nullptr;
std::array<D3D11Texture*, MAX_RENDER_TARGETS> m_current_render_targets = {};
u32 m_num_current_render_targets = 0;
GPUPipeline::RenderPassFlag m_current_render_pass_flags = GPUPipeline::NoRenderPassFlags;
D3D11Texture* m_current_depth_target = nullptr;
ID3D11InputLayout* m_current_input_layout = nullptr;

View file

@ -95,19 +95,16 @@ std::unique_ptr<GPUSampler> D3D11Device::CreateSampler(const GPUSampler::Config&
D3D11Texture::D3D11Texture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, Format format,
ComPtr<ID3D11Texture2D> texture, ComPtr<ID3D11ShaderResourceView> srv,
ComPtr<ID3D11View> rtv_dsv)
ComPtr<ID3D11View> rtv_dsv, ComPtr<ID3D11UnorderedAccessView> uav)
: GPUTexture(static_cast<u16>(width), static_cast<u16>(height), static_cast<u8>(layers), static_cast<u8>(levels),
static_cast<u8>(samples), type, format),
m_texture(std::move(texture)), m_srv(std::move(srv)), m_rtv_dsv(std::move(rtv_dsv))
m_texture(std::move(texture)), m_srv(std::move(srv)), m_rtv_dsv(std::move(rtv_dsv)), m_uav(std::move(uav))
{
}
D3D11Texture::~D3D11Texture()
{
D3D11Device::GetInstance().UnbindTexture(this);
m_rtv_dsv.Reset();
m_srv.Reset();
m_texture.Reset();
}
D3D11_TEXTURE2D_DESC D3D11Texture::GetDesc() const
@ -247,7 +244,7 @@ std::unique_ptr<D3D11Texture> D3D11Texture::Create(ID3D11Device* device, u32 wid
cpu_access = D3D11_CPU_ACCESS_WRITE;
break;
case Type::RWTexture:
bind_flags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE;
bind_flags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE;
break;
default:
break;
@ -327,8 +324,23 @@ std::unique_ptr<D3D11Texture> D3D11Texture::Create(ID3D11Device* device, u32 wid
rtv_dsv = std::move(dsv);
}
ComPtr<ID3D11UnorderedAccessView> uav;
if (bind_flags & D3D11_BIND_UNORDERED_ACCESS)
{
const D3D11_UAV_DIMENSION uav_dimension =
(desc.ArraySize > 1 ? D3D11_UAV_DIMENSION_TEXTURE2DARRAY : D3D11_UAV_DIMENSION_TEXTURE2D);
const CD3D11_UNORDERED_ACCESS_VIEW_DESC uav_desc(uav_dimension, fm.srv_format, 0, 0, desc.ArraySize);
const HRESULT hr = device->CreateUnorderedAccessView(texture.Get(), &uav_desc, uav.GetAddressOf());
if (FAILED(hr)) [[unlikely]]
{
ERROR_LOG("Create UAV for texture failed: 0x{:08X}", static_cast<unsigned>(hr));
return nullptr;
}
}
return std::unique_ptr<D3D11Texture>(new D3D11Texture(width, height, layers, levels, samples, type, format,
std::move(texture), std::move(srv), std::move(rtv_dsv)));
std::move(texture), std::move(srv), std::move(rtv_dsv),
std::move(uav)));
}
D3D11TextureBuffer::D3D11TextureBuffer(Format format, u32 size_in_elements) : GPUTextureBuffer(format, size_in_elements)

View file

@ -60,6 +60,7 @@ public:
{
return reinterpret_cast<ID3D11RenderTargetView* const*>(m_rtv_dsv.GetAddressOf());
}
ALWAYS_INLINE ID3D11UnorderedAccessView* GetD3DUAV() const { return m_uav.Get(); }
DXGI_FORMAT GetDXGIFormat() const;
ALWAYS_INLINE operator ID3D11Texture2D*() const { return m_texture.Get(); }
@ -72,6 +73,7 @@ public:
{
return static_cast<ID3D11DepthStencilView*>(m_rtv_dsv.Get());
}
ALWAYS_INLINE operator ID3D11UnorderedAccessView*() const { return m_uav.Get(); }
ALWAYS_INLINE operator bool() const { return static_cast<bool>(m_texture); }
static std::unique_ptr<D3D11Texture> Create(ID3D11Device* device, u32 width, u32 height, u32 layers, u32 levels,
@ -89,11 +91,13 @@ public:
private:
D3D11Texture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, Format format,
ComPtr<ID3D11Texture2D> texture, ComPtr<ID3D11ShaderResourceView> srv, ComPtr<ID3D11View> rtv_dsv);
ComPtr<ID3D11Texture2D> texture, ComPtr<ID3D11ShaderResourceView> srv, ComPtr<ID3D11View> rtv_dsv,
ComPtr<ID3D11UnorderedAccessView> uav);
ComPtr<ID3D11Texture2D> m_texture;
ComPtr<ID3D11ShaderResourceView> m_srv;
ComPtr<ID3D11View> m_rtv_dsv;
ComPtr<ID3D11UnorderedAccessView> m_uav;
u32 m_mapped_subresource = 0;
};

View file

@ -130,7 +130,7 @@ bool D3D12Device::CreateDevice(std::string_view adapter, bool threaded_presentat
m_adapter = D3DCommon::GetAdapterByName(m_dxgi_factory.Get(), adapter);
HRESULT hr;
HRESULT hr = S_OK;
// Enabling the debug layer will fail if the Graphics Tools feature is not installed.
if (m_debug_device)
@ -149,8 +149,15 @@ bool D3D12Device::CreateDevice(std::string_view adapter, bool threaded_presentat
}
// Create the actual device.
m_feature_level = D3D_FEATURE_LEVEL_11_0;
hr = D3D12CreateDevice(m_adapter.Get(), m_feature_level, IID_PPV_ARGS(&m_device));
for (D3D_FEATURE_LEVEL try_feature_level : {D3D_FEATURE_LEVEL_11_0})
{
hr = D3D12CreateDevice(m_adapter.Get(), try_feature_level, IID_PPV_ARGS(&m_device));
if (SUCCEEDED(hr))
{
m_feature_level = try_feature_level;
break;
}
}
if (FAILED(hr))
{
Error::SetHResult(error, "Failed to create D3D12 device: ", hr);
@ -479,15 +486,23 @@ bool D3D12Device::CreateDescriptorHeaps(Error* error)
// Allocate null SRV descriptor for unbound textures.
static constexpr D3D12_SHADER_RESOURCE_VIEW_DESC null_srv_desc = {
DXGI_FORMAT_R8G8B8A8_UNORM, D3D12_SRV_DIMENSION_TEXTURE2D, D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING, {}};
if (!m_descriptor_heap_manager.Allocate(&m_null_srv_descriptor))
{
Error::SetStringView(error, "Failed to allocate null SRV descriptor");
return false;
}
m_device->CreateShaderResourceView(nullptr, &null_srv_desc, m_null_srv_descriptor.cpu_handle);
// Same for UAVs.
static constexpr D3D12_UNORDERED_ACCESS_VIEW_DESC null_uav_desc = {
DXGI_FORMAT_R8G8B8A8_UNORM, D3D12_UAV_DIMENSION_TEXTURE2D, {}};
if (!m_descriptor_heap_manager.Allocate(&m_null_uav_descriptor))
{
Error::SetStringView(error, "Failed to allocate null UAV descriptor");
return false;
}
m_device->CreateUnorderedAccessView(nullptr, nullptr, &null_uav_desc, m_null_uav_descriptor.cpu_handle);
// Same for samplers.
m_point_sampler = GetSampler(GPUSampler::GetNearestConfig());
for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++)
@ -497,6 +512,8 @@ bool D3D12Device::CreateDescriptorHeaps(Error* error)
void D3D12Device::DestroyDescriptorHeaps()
{
if (m_null_uav_descriptor)
m_descriptor_heap_manager.Free(&m_null_uav_descriptor);
if (m_null_srv_descriptor)
m_descriptor_heap_manager.Free(&m_null_srv_descriptor);
m_sampler_heap_manager.Destroy();
@ -1248,6 +1265,15 @@ void D3D12Device::SetFeatures(FeatureMask disabled_features)
HRESULT hr = m_dxgi_factory->CheckFeatureSupport(DXGI_FEATURE_PRESENT_ALLOW_TEARING, &allow_tearing_supported,
sizeof(allow_tearing_supported));
m_allow_tearing_supported = (SUCCEEDED(hr) && allow_tearing_supported == TRUE);
m_features.raster_order_views = false;
if (!(disabled_features & FEATURE_MASK_RASTER_ORDER_VIEWS))
{
D3D12_FEATURE_DATA_D3D12_OPTIONS options = {};
m_features.raster_order_views =
SUCCEEDED(m_device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS, &options, sizeof(options))) &&
options.ROVsSupported;
}
}
void D3D12Device::CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level,
@ -1479,7 +1505,7 @@ void D3D12Device::UnmapIndexBuffer(u32 used_index_count)
void D3D12Device::PushUniformBuffer(const void* data, u32 data_size)
{
static constexpr std::array<u8, static_cast<u8>(GPUPipeline::Layout::MaxCount)> push_parameter = {
static constexpr std::array<u8, static_cast<u8>(GPUPipeline::Layout::MaxCount)> push_parameters = {
0, // SingleTextureAndUBO
2, // SingleTextureAndPushConstants
1, // SingleTextureBufferAndPushConstants
@ -1495,8 +1521,10 @@ void D3D12Device::PushUniformBuffer(const void* data, u32 data_size)
}
s_stats.buffer_streamed += data_size;
GetCommandList()->SetGraphicsRoot32BitConstants(push_parameter[static_cast<u8>(m_current_pipeline_layout)],
data_size / 4u, data, 0);
const u32 push_param =
push_parameters[static_cast<u8>(m_current_pipeline_layout)] + BoolToUInt8(IsUsingROVRootSignature());
GetCommandList()->SetGraphicsRoot32BitConstants(push_param, data_size / 4u, data, 0);
}
void* D3D12Device::MapUniformBuffer(u32 size)
@ -1526,63 +1554,96 @@ bool D3D12Device::CreateRootSignatures(Error* error)
{
D3D12::RootSignatureBuilder rsb;
for (u32 rov = 0; rov < 2; rov++)
{
auto& rs = m_root_signatures[static_cast<u8>(GPUPipeline::Layout::SingleTextureAndUBO)];
if (rov && !m_features.raster_order_views)
break;
rsb.SetInputAssemblerFlag();
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL);
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL);
rsb.AddCBVParameter(0, D3D12_SHADER_VISIBILITY_ALL);
if (!(rs = rsb.Create(error, true)))
return false;
D3D12::SetObjectName(rs.Get(), "Single Texture + UBO Pipeline Layout");
}
{
auto& rs = m_root_signatures[rov][static_cast<u8>(GPUPipeline::Layout::SingleTextureAndUBO)];
{
auto& rs = m_root_signatures[static_cast<u8>(GPUPipeline::Layout::SingleTextureAndPushConstants)];
rsb.SetInputAssemblerFlag();
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL);
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL);
rsb.AddCBVParameter(0, D3D12_SHADER_VISIBILITY_ALL);
if (rov)
{
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS,
D3D12_SHADER_VISIBILITY_PIXEL);
}
if (!(rs = rsb.Create(error, true)))
return false;
D3D12::SetObjectName(rs.Get(), "Single Texture + UBO Pipeline Layout");
}
rsb.SetInputAssemblerFlag();
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL);
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL);
rsb.Add32BitConstants(0, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL);
if (!(rs = rsb.Create(error, true)))
return false;
D3D12::SetObjectName(rs.Get(), "Single Texture Pipeline Layout");
}
{
auto& rs = m_root_signatures[rov][static_cast<u8>(GPUPipeline::Layout::SingleTextureAndPushConstants)];
{
auto& rs = m_root_signatures[static_cast<u8>(GPUPipeline::Layout::SingleTextureBufferAndPushConstants)];
rsb.SetInputAssemblerFlag();
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL);
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL);
if (rov)
{
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS,
D3D12_SHADER_VISIBILITY_PIXEL);
}
rsb.Add32BitConstants(0, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL);
if (!(rs = rsb.Create(error, true)))
return false;
D3D12::SetObjectName(rs.Get(), "Single Texture Pipeline Layout");
}
rsb.SetInputAssemblerFlag();
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL);
rsb.Add32BitConstants(0, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL);
if (!(rs = rsb.Create(error, true)))
return false;
D3D12::SetObjectName(rs.Get(), "Single Texture Buffer + UBO Pipeline Layout");
}
{
auto& rs = m_root_signatures[rov][static_cast<u8>(GPUPipeline::Layout::SingleTextureBufferAndPushConstants)];
{
auto& rs = m_root_signatures[static_cast<u8>(GPUPipeline::Layout::MultiTextureAndUBO)];
rsb.SetInputAssemblerFlag();
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL);
if (rov)
{
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS,
D3D12_SHADER_VISIBILITY_PIXEL);
}
rsb.Add32BitConstants(0, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL);
if (!(rs = rsb.Create(error, true)))
return false;
D3D12::SetObjectName(rs.Get(), "Single Texture Buffer + UBO Pipeline Layout");
}
rsb.SetInputAssemblerFlag();
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_PIXEL);
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_PIXEL);
rsb.AddCBVParameter(0, D3D12_SHADER_VISIBILITY_ALL);
if (!(rs = rsb.Create(error, true)))
return false;
D3D12::SetObjectName(rs.Get(), "Multi Texture + UBO Pipeline Layout");
}
{
auto& rs = m_root_signatures[rov][static_cast<u8>(GPUPipeline::Layout::MultiTextureAndUBO)];
{
auto& rs = m_root_signatures[static_cast<u8>(GPUPipeline::Layout::MultiTextureAndPushConstants)];
rsb.SetInputAssemblerFlag();
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_PIXEL);
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, MAX_TEXTURE_SAMPLERS,
D3D12_SHADER_VISIBILITY_PIXEL);
rsb.AddCBVParameter(0, D3D12_SHADER_VISIBILITY_ALL);
if (rov)
{
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS,
D3D12_SHADER_VISIBILITY_PIXEL);
}
if (!(rs = rsb.Create(error, true)))
return false;
D3D12::SetObjectName(rs.Get(), "Multi Texture + UBO Pipeline Layout");
}
rsb.SetInputAssemblerFlag();
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_PIXEL);
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_PIXEL);
rsb.Add32BitConstants(0, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL);
if (!(rs = rsb.Create(error, true)))
return false;
D3D12::SetObjectName(rs.Get(), "Multi Texture Pipeline Layout");
{
auto& rs = m_root_signatures[rov][static_cast<u8>(GPUPipeline::Layout::MultiTextureAndPushConstants)];
rsb.SetInputAssemblerFlag();
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_PIXEL);
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, MAX_TEXTURE_SAMPLERS,
D3D12_SHADER_VISIBILITY_PIXEL);
if (rov)
{
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS,
D3D12_SHADER_VISIBILITY_PIXEL);
}
rsb.Add32BitConstants(0, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL);
if (!(rs = rsb.Create(error, true)))
return false;
D3D12::SetObjectName(rs.Get(), "Multi Texture Pipeline Layout");
}
}
return true;
@ -1590,29 +1651,30 @@ bool D3D12Device::CreateRootSignatures(Error* error)
void D3D12Device::DestroyRootSignatures()
{
for (auto it = m_root_signatures.rbegin(); it != m_root_signatures.rend(); ++it)
it->Reset();
m_root_signatures.enumerate([](auto& it) { it.Reset(); });
}
void D3D12Device::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds,
GPUPipeline::RenderPassFlag feedback_loop)
GPUPipeline::RenderPassFlag flags)
{
DebugAssert(!feedback_loop);
DebugAssert(
!(flags & (GPUPipeline::RenderPassFlag::ColorFeedbackLoop | GPUPipeline::RenderPassFlag::SampleDepthBuffer)));
if (InRenderPass())
EndRenderPass();
ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
m_current_depth_target = static_cast<D3D12Texture*>(ds);
for (u32 i = 0; i < num_rts; i++)
{
D3D12Texture* const dt = static_cast<D3D12Texture*>(rts[i]);
m_current_render_targets[i] = dt;
dt->CommitClear(cmdlist);
}
if (num_rts > 0)
std::memcpy(m_current_render_targets.data(), rts, sizeof(D3D12Texture*) * num_rts);
for (u32 i = num_rts; i < m_num_current_render_targets; i++)
m_current_render_targets[i] = nullptr;
m_num_current_render_targets = num_rts;
// Need a root signature change if switching to UAVs.
m_dirty_flags |=
((m_current_render_pass_flags ^ flags) & GPUPipeline::BindRenderTargetsAsImages) ? LAYOUT_DEPENDENT_DIRTY_STATE : 0;
m_dirty_flags = (flags & GPUPipeline::BindRenderTargetsAsImages) ? (m_dirty_flags | DIRTY_FLAG_RT_UAVS) :
(m_dirty_flags & ~DIRTY_FLAG_RT_UAVS);
m_current_render_pass_flags = flags;
}
void D3D12Device::BeginRenderPass()
@ -1630,48 +1692,62 @@ void D3D12Device::BeginRenderPass()
if (m_num_current_render_targets > 0 || m_current_depth_target) [[likely]]
{
for (u32 i = 0; i < m_num_current_render_targets; i++)
if (!IsUsingROVRootSignature()) [[likely]]
{
D3D12Texture* const rt = m_current_render_targets[i];
rt->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_RENDER_TARGET);
rt->SetUseFenceValue(GetCurrentFenceValue());
D3D12_RENDER_PASS_RENDER_TARGET_DESC& desc = rt_desc[i];
desc.cpuDescriptor = rt->GetWriteDescriptor();
desc.EndingAccess.Type = D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE;
switch (rt->GetState())
for (u32 i = 0; i < m_num_current_render_targets; i++)
{
case GPUTexture::State::Cleared:
{
desc.BeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_CLEAR;
std::memcpy(desc.BeginningAccess.Clear.ClearValue.Color, rt->GetUNormClearColor().data(),
sizeof(desc.BeginningAccess.Clear.ClearValue.Color));
rt->SetState(GPUTexture::State::Dirty);
}
break;
D3D12Texture* const rt = m_current_render_targets[i];
rt->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_RENDER_TARGET);
rt->SetUseFenceValue(GetCurrentFenceValue());
case GPUTexture::State::Invalidated:
{
desc.BeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_DISCARD;
rt->SetState(GPUTexture::State::Dirty);
}
break;
D3D12_RENDER_PASS_RENDER_TARGET_DESC& desc = rt_desc[i];
desc.cpuDescriptor = rt->GetWriteDescriptor();
desc.EndingAccess.Type = D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE;
case GPUTexture::State::Dirty:
switch (rt->GetState())
{
desc.BeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_PRESERVE;
}
break;
default:
UnreachableCode();
case GPUTexture::State::Cleared:
{
desc.BeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_CLEAR;
std::memcpy(desc.BeginningAccess.Clear.ClearValue.Color, rt->GetUNormClearColor().data(),
sizeof(desc.BeginningAccess.Clear.ClearValue.Color));
rt->SetState(GPUTexture::State::Dirty);
}
break;
case GPUTexture::State::Invalidated:
{
desc.BeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_DISCARD;
rt->SetState(GPUTexture::State::Dirty);
}
break;
case GPUTexture::State::Dirty:
{
desc.BeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_PRESERVE;
}
break;
default:
UnreachableCode();
break;
}
}
rt_desc_p = (m_num_current_render_targets > 0) ? rt_desc.data() : nullptr;
num_rt_descs = m_num_current_render_targets;
}
else
{
// Still need to clear the RTs.
for (u32 i = 0; i < m_num_current_render_targets; i++)
{
D3D12Texture* const rt = m_current_render_targets[i];
rt->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
rt->SetUseFenceValue(GetCurrentFenceValue());
rt->CommitClear(cmdlist);
}
}
rt_desc_p = (m_num_current_render_targets > 0) ? rt_desc.data() : nullptr;
num_rt_descs = m_num_current_render_targets;
if (m_current_depth_target)
{
D3D12Texture* const ds = m_current_depth_target;
@ -1733,7 +1809,7 @@ void D3D12Device::BeginRenderPass()
m_current_textures[i]->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
}
DebugAssert(rt_desc_p || ds_desc_p);
DebugAssert(rt_desc_p || ds_desc_p || IsUsingROVRootSignature());
cmdlist->BeginRenderPass(num_rt_descs, rt_desc_p, ds_desc_p, D3D12_RENDER_PASS_FLAG_NONE);
// TODO: Stats
@ -1771,6 +1847,9 @@ void D3D12Device::BeginSwapChainRenderPass()
std::memset(m_current_render_targets.data(), 0, sizeof(m_current_render_targets));
m_num_current_render_targets = 0;
m_dirty_flags =
(m_dirty_flags & ~DIRTY_FLAG_RT_UAVS) | ((IsUsingROVRootSignature()) ? DIRTY_FLAG_PIPELINE_LAYOUT : 0);
m_current_render_pass_flags = GPUPipeline::NoRenderPassFlags;
m_current_depth_target = nullptr;
m_in_render_pass = true;
s_stats.num_render_passes++;
@ -1839,8 +1918,7 @@ void D3D12Device::SetPipeline(GPUPipeline* pipeline)
if (GPUPipeline::Layout layout = m_current_pipeline->GetLayout(); m_current_pipeline_layout != layout)
{
m_current_pipeline_layout = layout;
m_dirty_flags |=
DIRTY_FLAG_PIPELINE_LAYOUT | DIRTY_FLAG_CONSTANT_BUFFER | DIRTY_FLAG_TEXTURES | DIRTY_FLAG_SAMPLERS;
m_dirty_flags |= LAYOUT_DEPENDENT_DIRTY_STATE & (IsUsingROVRootSignature() ? ~0u : ~DIRTY_FLAG_RT_UAVS);
}
}
@ -1865,7 +1943,8 @@ bool D3D12Device::IsRenderTargetBound(const GPUTexture* tex) const
void D3D12Device::InvalidateCachedState()
{
m_dirty_flags = ALL_DIRTY_STATE;
m_dirty_flags = ALL_DIRTY_STATE &
((m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages) ? ~0u : ~DIRTY_FLAG_RT_UAVS);
m_in_render_pass = false;
m_current_pipeline = nullptr;
m_current_vertex_stride = 0;
@ -2053,7 +2132,7 @@ void D3D12Device::PreDrawCheck()
return;
}
}
else if (dirty & (DIRTY_FLAG_CONSTANT_BUFFER | DIRTY_FLAG_TEXTURES | DIRTY_FLAG_SAMPLERS))
else if (dirty & (DIRTY_FLAG_CONSTANT_BUFFER | DIRTY_FLAG_TEXTURES | DIRTY_FLAG_SAMPLERS | DIRTY_FLAG_RT_UAVS))
{
if (!UpdateRootParameters(dirty))
{
@ -2068,9 +2147,15 @@ void D3D12Device::PreDrawCheck()
BeginRenderPass();
}
bool D3D12Device::IsUsingROVRootSignature() const
{
return ((m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages) != 0);
}
void D3D12Device::UpdateRootSignature()
{
GetCommandList()->SetGraphicsRootSignature(m_root_signatures[static_cast<u8>(m_current_pipeline_layout)].Get());
GetCommandList()->SetGraphicsRootSignature(
m_root_signatures[BoolToUInt8(IsUsingROVRootSignature())][static_cast<u8>(m_current_pipeline_layout)].Get());
}
template<GPUPipeline::Layout layout>
@ -2145,6 +2230,35 @@ bool D3D12Device::UpdateParametersForLayout(u32 dirty)
cmdlist->SetGraphicsRootDescriptorTable(0, gpu_handle);
}
if (dirty & DIRTY_FLAG_RT_UAVS)
{
DebugAssert(m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages);
D3D12DescriptorAllocator& allocator = m_command_lists[m_current_command_list].descriptor_allocator;
D3D12DescriptorHandle gpu_handle;
if (!allocator.Allocate(MAX_IMAGE_RENDER_TARGETS, &gpu_handle))
return false;
D3D12_CPU_DESCRIPTOR_HANDLE src_handles[MAX_IMAGE_RENDER_TARGETS];
UINT src_sizes[MAX_IMAGE_RENDER_TARGETS];
const UINT dst_size = MAX_IMAGE_RENDER_TARGETS;
for (u32 i = 0; i < MAX_IMAGE_RENDER_TARGETS; i++)
{
src_handles[i] =
m_current_render_targets[i] ? m_current_render_targets[i]->GetSRVDescriptor() : m_null_srv_descriptor;
src_sizes[i] = 1;
}
m_device->CopyDescriptors(1, &gpu_handle.cpu_handle, &dst_size, MAX_IMAGE_RENDER_TARGETS, src_handles, src_sizes,
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
constexpr u32 rov_param =
(layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants) ?
1 :
((layout == GPUPipeline::Layout::SingleTextureAndUBO || layout == GPUPipeline::Layout::MultiTextureAndUBO) ? 3 :
2);
cmdlist->SetGraphicsRootDescriptorTable(rov_param, gpu_handle);
}
return true;
}

View file

@ -8,6 +8,7 @@
#include "gpu_device.h"
#include "gpu_texture.h"
#include "common/dimensional_array.h"
#include "common/windows_headers.h"
#include <array>
@ -110,7 +111,7 @@ public:
void* MapUniformBuffer(u32 size) override;
void UnmapUniformBuffer(u32 size) override;
void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds,
GPUPipeline::RenderPassFlag feedback_loop = GPUPipeline::NoRenderPassFlags) override;
GPUPipeline::RenderPassFlag flags = GPUPipeline::NoRenderPassFlags) override;
void SetPipeline(GPUPipeline* pipeline) override;
void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override;
void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override;
@ -200,9 +201,11 @@ private:
DIRTY_FLAG_CONSTANT_BUFFER = (1 << 2),
DIRTY_FLAG_TEXTURES = (1 << 3),
DIRTY_FLAG_SAMPLERS = (1 << 3),
DIRTY_FLAG_RT_UAVS = (1 << 4),
ALL_DIRTY_STATE = DIRTY_FLAG_INITIAL | DIRTY_FLAG_PIPELINE_LAYOUT | DIRTY_FLAG_CONSTANT_BUFFER |
DIRTY_FLAG_TEXTURES | DIRTY_FLAG_SAMPLERS,
LAYOUT_DEPENDENT_DIRTY_STATE = DIRTY_FLAG_PIPELINE_LAYOUT | DIRTY_FLAG_CONSTANT_BUFFER | DIRTY_FLAG_TEXTURES |
DIRTY_FLAG_SAMPLERS | DIRTY_FLAG_RT_UAVS,
ALL_DIRTY_STATE = DIRTY_FLAG_INITIAL | (LAYOUT_DEPENDENT_DIRTY_STATE & ~DIRTY_FLAG_RT_UAVS),
};
struct CommandList
@ -264,6 +267,7 @@ private:
void SetInitialPipelineState();
void PreDrawCheck();
bool IsUsingROVRootSignature() const;
void UpdateRootSignature();
template<GPUPipeline::Layout layout>
bool UpdateParametersForLayout(u32 dirty);
@ -303,6 +307,7 @@ private:
D3D12DescriptorHeapManager m_dsv_heap_manager;
D3D12DescriptorHeapManager m_sampler_heap_manager;
D3D12DescriptorHandle m_null_srv_descriptor;
D3D12DescriptorHandle m_null_uav_descriptor;
D3D12DescriptorHandle m_point_sampler;
ComPtr<ID3D12QueryHeap> m_timestamp_query_heap;
@ -314,7 +319,8 @@ private:
std::deque<std::pair<u64, std::pair<D3D12MA::Allocation*, ID3D12Object*>>> m_cleanup_resources;
std::deque<std::pair<u64, std::pair<D3D12DescriptorHeapManager*, D3D12DescriptorHandle>>> m_cleanup_descriptors;
std::array<ComPtr<ID3D12RootSignature>, static_cast<u8>(GPUPipeline::Layout::MaxCount)> m_root_signatures = {};
DimensionalArray<ComPtr<ID3D12RootSignature>, static_cast<u8>(GPUPipeline::Layout::MaxCount), 2> m_root_signatures =
{};
D3D12StreamBuffer m_vertex_buffer;
D3D12StreamBuffer m_index_buffer;
@ -333,6 +339,7 @@ private:
D3D12Pipeline* m_current_pipeline = nullptr;
D3D12_PRIMITIVE_TOPOLOGY m_current_topology = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED;
u32 m_num_current_render_targets = 0;
GPUPipeline::RenderPassFlag m_current_render_pass_flags = GPUPipeline::NoRenderPassFlags;
std::array<D3D12Texture*, MAX_RENDER_TARGETS> m_current_render_targets = {};
D3D12Texture* m_current_depth_target = nullptr;
u32 m_current_vertex_stride = 0;

View file

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com>
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#include "d3d12_pipeline.h"
@ -7,6 +7,7 @@
#include "d3d_common.h"
#include "common/assert.h"
#include "common/bitutils.h"
#include "common/log.h"
#include "common/sha1_digest.h"
#include "common/string_util.h"
@ -180,8 +181,16 @@ std::unique_ptr<GPUPipeline> D3D12Device::CreatePipeline(const GPUPipeline::Grap
D3D12_BLEND_OP_MAX, // Max
}};
if (config.render_pass_flags & GPUPipeline::BindRenderTargetsAsImages && !m_features.raster_order_views)
{
ERROR_LOG("Attempting to create ROV pipeline without ROV feature.");
return {};
}
D3D12::GraphicsPipelineBuilder gpb;
gpb.SetRootSignature(m_root_signatures[static_cast<u8>(config.layout)].Get());
gpb.SetRootSignature(m_root_signatures[BoolToUInt8(
(config.render_pass_flags & GPUPipeline::BindRenderTargetsAsImages))][static_cast<u8>(config.layout)]
.Get());
gpb.SetVertexShader(static_cast<const D3D12Shader*>(config.vertex_shader)->GetBytecodeData(),
static_cast<const D3D12Shader*>(config.vertex_shader)->GetBytecodeSize());
gpb.SetPixelShader(static_cast<const D3D12Shader*>(config.fragment_shader)->GetBytecodeData(),

View file

@ -44,8 +44,6 @@ std::unique_ptr<GPUTexture> D3D12Device::CreateTexture(u32 width, u32 height, u3
const D3DCommon::DXGIFormatMapping& fm = D3DCommon::GetFormatMapping(format);
const DXGI_FORMAT uav_format = (type == GPUTexture::Type::RWTexture) ? fm.resource_format : DXGI_FORMAT_UNKNOWN;
D3D12_RESOURCE_DESC desc = {};
desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
desc.Width = width;
@ -98,7 +96,9 @@ std::unique_ptr<GPUTexture> D3D12Device::CreateTexture(u32 width, u32 height, u3
{
DebugAssert(levels == 1);
allocationDesc.Flags |= D3D12MA::ALLOCATION_FLAG_COMMITTED;
state = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;
desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
optimized_clear_value.Format = fm.rtv_format;
state = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
}
break;
@ -106,9 +106,6 @@ std::unique_ptr<GPUTexture> D3D12Device::CreateTexture(u32 width, u32 height, u3
return {};
}
if (uav_format != DXGI_FORMAT_UNKNOWN)
desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
ComPtr<ID3D12Resource> resource;
ComPtr<D3D12MA::Allocation> allocation;
HRESULT hr = m_allocator->CreateResource(
@ -157,18 +154,28 @@ std::unique_ptr<GPUTexture> D3D12Device::CreateTexture(u32 width, u32 height, u3
}
break;
case GPUTexture::Type::RWTexture:
{
write_descriptor_type = D3D12Texture::WriteDescriptorType::RTV;
if (!CreateRTVDescriptor(resource.Get(), samples, fm.rtv_format, &write_descriptor))
{
m_descriptor_heap_manager.Free(&srv_descriptor);
return {};
}
if (!CreateUAVDescriptor(resource.Get(), samples, fm.srv_format, &uav_descriptor))
{
m_descriptor_heap_manager.Free(&write_descriptor);
m_descriptor_heap_manager.Free(&srv_descriptor);
return {};
}
}
break;
default:
break;
}
if (uav_format != DXGI_FORMAT_UNKNOWN &&
!CreateUAVDescriptor(resource.Get(), samples, fm.dsv_format, &uav_descriptor))
{
m_descriptor_heap_manager.Free(&write_descriptor);
m_descriptor_heap_manager.Free(&srv_descriptor);
return {};
}
std::unique_ptr<D3D12Texture> tex(new D3D12Texture(
width, height, layers, levels, samples, type, format, fm.resource_format, std::move(resource),
std::move(allocation), srv_descriptor, write_descriptor, uav_descriptor, write_descriptor_type, state));

View file

@ -50,7 +50,7 @@ const char* D3DCommon::GetFeatureLevelShaderModelString(D3D_FEATURE_LEVEL featur
{D3D_FEATURE_LEVEL_10_0, "sm40"},
{D3D_FEATURE_LEVEL_10_1, "sm41"},
{D3D_FEATURE_LEVEL_11_0, "sm50"},
{D3D_FEATURE_LEVEL_11_1, "sm51"},
{D3D_FEATURE_LEVEL_11_1, "sm50"},
}};
for (const auto& [fl, name] : feature_level_names)
@ -390,11 +390,9 @@ u32 D3DCommon::GetShaderModelForFeatureLevel(D3D_FEATURE_LEVEL feature_level)
return 41;
case D3D_FEATURE_LEVEL_11_0:
return 50;
case D3D_FEATURE_LEVEL_11_1:
default:
return 51;
return 50;
}
}
@ -429,14 +427,6 @@ std::optional<DynamicHeapArray<u8>> D3DCommon::CompileShader(u32 shader_model, b
}
break;
case 51:
{
static constexpr std::array<const char*, static_cast<u32>(GPUShaderStage::MaxCount)> targets = {
{"vs_5_1", "ps_5_1", "gs_5_1", "cs_5_1"}};
target = targets[static_cast<int>(stage)];
}
break;
default:
Error::SetStringFmt(error, "Unknown shader model: {}", shader_model);
return {};

View file

@ -168,6 +168,7 @@ public:
NoRenderPassFlags = 0,
ColorFeedbackLoop = (1 << 0),
SampleDepthBuffer = (1 << 1),
BindRenderTargetsAsImages = (1 << 2),
};
enum class Primitive : u8
@ -469,6 +470,7 @@ public:
FEATURE_MASK_GEOMETRY_SHADERS = (1 << 4),
FEATURE_MASK_TEXTURE_COPY_TO_SELF = (1 << 5),
FEATURE_MASK_MEMORY_IMPORT = (1 << 6),
FEATURE_MASK_RASTER_ORDER_VIEWS = (1 << 7),
};
enum class DrawBarrier : u32
@ -496,6 +498,7 @@ public:
bool shader_cache : 1;
bool pipeline_cache : 1;
bool prefer_unused_textures : 1;
bool raster_order_views : 1;
};
struct Statistics
@ -527,6 +530,7 @@ public:
static constexpr u32 MAX_TEXTURE_SAMPLERS = 8;
static constexpr u32 MIN_TEXEL_BUFFER_ELEMENTS = 4 * 1024 * 512;
static constexpr u32 MAX_RENDER_TARGETS = 4;
static constexpr u32 MAX_IMAGE_RENDER_TARGETS = 2;
static_assert(sizeof(GPUPipeline::GraphicsConfig::color_formats) == sizeof(GPUTexture::Format) * MAX_RENDER_TARGETS);
GPUDevice();
@ -676,14 +680,14 @@ public:
/// Drawing setup abstraction.
virtual void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds,
GPUPipeline::RenderPassFlag render_pass_flags = GPUPipeline::NoRenderPassFlags) = 0;
GPUPipeline::RenderPassFlag flags = GPUPipeline::NoRenderPassFlags) = 0;
virtual void SetPipeline(GPUPipeline* pipeline) = 0;
virtual void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) = 0;
virtual void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) = 0;
virtual void SetViewport(const GSVector4i rc) = 0;
virtual void SetScissor(const GSVector4i rc) = 0;
void SetRenderTarget(GPUTexture* rt, GPUTexture* ds = nullptr,
GPUPipeline::RenderPassFlag render_pass_flags = GPUPipeline::NoRenderPassFlags);
GPUPipeline::RenderPassFlag flags = GPUPipeline::NoRenderPassFlags);
void SetViewport(s32 x, s32 y, s32 width, s32 height);
void SetScissor(s32 x, s32 y, s32 width, s32 height);
void SetViewportAndScissor(s32 x, s32 y, s32 width, s32 height);

View file

@ -213,10 +213,18 @@ bool GPUTexture::ValidateConfig(u32 width, u32 height, u32 layers, u32 levels, u
return false;
}
if (samples > 1 && levels > 1)
if (samples > 1)
{
ERROR_LOG("Multisampled textures can't have mip levels.");
return false;
if (levels > 1)
{
ERROR_LOG("Multisampled textures can't have mip levels.");
return false;
}
else if (type != Type::RenderTarget && type != Type::DepthStencil)
{
ERROR_LOG("Multisampled textures must be render targets or depth stencil targets.");
return false;
}
}
if (layers > 1 && type != Type::Texture && type != Type::DynamicTexture)

View file

@ -129,6 +129,7 @@ public:
ALWAYS_INLINE bool IsDepthStencil() const { return (m_type == Type::DepthStencil); }
ALWAYS_INLINE bool IsTexture() const { return (m_type == Type::Texture || m_type == Type::DynamicTexture); }
ALWAYS_INLINE bool IsDynamicTexture() const { return (m_type == Type::DynamicTexture); }
ALWAYS_INLINE bool IsRWTexture() const { return (m_type == Type::RWTexture); }
ALWAYS_INLINE const ClearValue& GetClearValue() const { return m_clear_value; }
ALWAYS_INLINE u32 GetClearColor() const { return m_clear_value.color; }

View file

@ -481,7 +481,7 @@ bool OpenGLDevice::CheckFeatures(FeatureMask disabled_features)
// So, blit from the shadow texture, like in the other renderers.
m_features.texture_copy_to_self = !vendor_id_arm && !(disabled_features & FEATURE_MASK_TEXTURE_COPY_TO_SELF);
m_features.feedback_loops = m_features.framebuffer_fetch;
m_features.feedback_loops = false;
m_features.geometry_shaders =
!(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS) && (GLAD_GL_VERSION_3_2 || GLAD_GL_ES_VERSION_3_2);

View file

@ -385,19 +385,21 @@ GPUDevice::AdapterInfoList VulkanDevice::GetAdapterList()
return ret;
}
bool VulkanDevice::SelectDeviceExtensions(ExtensionList* extension_list, bool enable_surface)
bool VulkanDevice::SelectDeviceExtensions(ExtensionList* extension_list, bool enable_surface, Error* error)
{
u32 extension_count = 0;
VkResult res = vkEnumerateDeviceExtensionProperties(m_physical_device, nullptr, &extension_count, nullptr);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vkEnumerateDeviceExtensionProperties failed: ");
Vulkan::SetErrorObject(error, "vkEnumerateDeviceExtensionProperties failed: ", res);
return false;
}
if (extension_count == 0)
{
ERROR_LOG("Vulkan: No extensions supported by device.");
ERROR_LOG("No extensions supported by device.");
Error::SetStringView(error, "No extensions supported by device.");
return false;
}
@ -423,7 +425,10 @@ bool VulkanDevice::SelectDeviceExtensions(ExtensionList* extension_list, bool en
}
if (required)
{
ERROR_LOG("Vulkan: Missing required extension {}.", name);
Error::SetStringFmt(error, "Missing required extension {}.", name);
}
return false;
};
@ -466,6 +471,11 @@ bool VulkanDevice::SelectDeviceExtensions(ExtensionList* extension_list, bool en
m_optional_extensions.vk_ext_swapchain_maintenance1 &&
SupportsExtension(VK_EXT_SWAPCHAIN_MAINTENANCE_1_EXTENSION_NAME, false);
// Dynamic rendering isn't strictly needed for FSI, but we want it with framebufferless rendering.
m_optional_extensions.vk_ext_fragment_shader_interlock =
m_optional_extensions.vk_khr_dynamic_rendering &&
SupportsExtension(VK_EXT_FRAGMENT_SHADER_INTERLOCK_EXTENSION_NAME, false);
#ifdef _WIN32
m_optional_extensions.vk_ext_full_screen_exclusive =
enable_surface && SupportsExtension(VK_EXT_FULL_SCREEN_EXCLUSIVE_EXTENSION_NAME, false);
@ -480,6 +490,7 @@ bool VulkanDevice::SelectDeviceExtensions(ExtensionList* extension_list, bool en
{
m_optional_extensions.vk_khr_dynamic_rendering = false;
m_optional_extensions.vk_khr_dynamic_rendering_local_read = false;
m_optional_extensions.vk_ext_fragment_shader_interlock = false;
WARNING_LOG("Disabling VK_KHR_dynamic_rendering on broken mobile driver.");
}
if (m_optional_extensions.vk_khr_push_descriptor)
@ -501,29 +512,15 @@ bool VulkanDevice::SelectDeviceExtensions(ExtensionList* extension_list, bool en
return true;
}
bool VulkanDevice::SelectDeviceFeatures()
{
VkPhysicalDeviceFeatures available_features;
vkGetPhysicalDeviceFeatures(m_physical_device, &available_features);
// Enable the features we use.
m_device_features.dualSrcBlend = available_features.dualSrcBlend;
m_device_features.largePoints = available_features.largePoints;
m_device_features.wideLines = available_features.wideLines;
m_device_features.samplerAnisotropy = available_features.samplerAnisotropy;
m_device_features.sampleRateShading = available_features.sampleRateShading;
m_device_features.geometryShader = available_features.geometryShader;
return true;
}
bool VulkanDevice::CreateDevice(VkSurfaceKHR surface, bool enable_validation_layer)
bool VulkanDevice::CreateDevice(VkSurfaceKHR surface, bool enable_validation_layer, FeatureMask disabled_features,
Error* error)
{
u32 queue_family_count;
vkGetPhysicalDeviceQueueFamilyProperties(m_physical_device, &queue_family_count, nullptr);
if (queue_family_count == 0)
{
ERROR_LOG("No queue families found on specified vulkan physical device.");
Error::SetStringView(error, "No queue families found on specified vulkan physical device.");
return false;
}
@ -554,6 +551,7 @@ bool VulkanDevice::CreateDevice(VkSurfaceKHR surface, bool enable_validation_lay
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vkGetPhysicalDeviceSurfaceSupportKHR failed: ");
Vulkan::SetErrorObject(error, "vkGetPhysicalDeviceSurfaceSupportKHR failed: ", res);
return false;
}
@ -572,11 +570,13 @@ bool VulkanDevice::CreateDevice(VkSurfaceKHR surface, bool enable_validation_lay
if (m_graphics_queue_family_index == queue_family_count)
{
ERROR_LOG("Vulkan: Failed to find an acceptable graphics queue.");
Error::SetStringView(error, "Vulkan: Failed to find an acceptable graphics queue.");
return false;
}
if (surface != VK_NULL_HANDLE && m_present_queue_family_index == queue_family_count)
{
ERROR_LOG("Vulkan: Failed to find an acceptable present queue.");
Error::SetStringView(error, "Vulkan: Failed to find an acceptable present queue.");
return false;
}
@ -610,17 +610,26 @@ bool VulkanDevice::CreateDevice(VkSurfaceKHR surface, bool enable_validation_lay
device_info.pQueueCreateInfos = queue_infos.data();
ExtensionList enabled_extensions;
if (!SelectDeviceExtensions(&enabled_extensions, surface != VK_NULL_HANDLE))
if (!SelectDeviceExtensions(&enabled_extensions, surface != VK_NULL_HANDLE, error))
return false;
device_info.enabledExtensionCount = static_cast<uint32_t>(enabled_extensions.size());
device_info.ppEnabledExtensionNames = enabled_extensions.data();
// Check for required features before creating.
if (!SelectDeviceFeatures())
return false;
VkPhysicalDeviceFeatures available_features;
vkGetPhysicalDeviceFeatures(m_physical_device, &available_features);
device_info.pEnabledFeatures = &m_device_features;
// Enable the features we use.
VkPhysicalDeviceFeatures enabled_features = {};
enabled_features.dualSrcBlend = available_features.dualSrcBlend;
enabled_features.largePoints = available_features.largePoints;
enabled_features.wideLines = available_features.wideLines;
enabled_features.samplerAnisotropy = available_features.samplerAnisotropy;
enabled_features.sampleRateShading = available_features.sampleRateShading;
enabled_features.geometryShader = available_features.geometryShader;
enabled_features.fragmentStoresAndAtomics = available_features.fragmentStoresAndAtomics;
device_info.pEnabledFeatures = &enabled_features;
// Enable debug layer on debug builds
if (enable_validation_layer)
@ -639,6 +648,8 @@ bool VulkanDevice::CreateDevice(VkSurfaceKHR surface, bool enable_validation_lay
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_LOCAL_READ_FEATURES_KHR, nullptr, VK_TRUE};
VkPhysicalDeviceSwapchainMaintenance1FeaturesEXT swapchain_maintenance1_feature = {
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SWAPCHAIN_MAINTENANCE_1_FEATURES_EXT, nullptr, VK_TRUE};
VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT fragment_shader_interlock_feature = {
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT, nullptr, VK_FALSE, VK_TRUE, VK_FALSE};
if (m_optional_extensions.vk_ext_rasterization_order_attachment_access)
Vulkan::AddPointerToChain(&device_info, &rasterization_order_access_feature);
@ -649,12 +660,15 @@ bool VulkanDevice::CreateDevice(VkSurfaceKHR surface, bool enable_validation_lay
Vulkan::AddPointerToChain(&device_info, &dynamic_rendering_feature);
if (m_optional_extensions.vk_khr_dynamic_rendering_local_read)
Vulkan::AddPointerToChain(&device_info, &dynamic_rendering_local_read_feature);
if (m_optional_extensions.vk_ext_fragment_shader_interlock)
Vulkan::AddPointerToChain(&device_info, &fragment_shader_interlock_feature);
}
VkResult res = vkCreateDevice(m_physical_device, &device_info, nullptr, &m_device);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vkCreateDevice failed: ");
Vulkan::SetErrorObject(error, "vkCreateDevice failed: ", res);
return false;
}
@ -677,6 +691,7 @@ bool VulkanDevice::CreateDevice(VkSurfaceKHR surface, bool enable_validation_lay
m_device_properties.limits.timestampPeriod);
ProcessDeviceExtensions();
SetFeatures(disabled_features, enabled_features);
return true;
}
@ -693,6 +708,8 @@ void VulkanDevice::ProcessDeviceExtensions()
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_LOCAL_READ_FEATURES_KHR, nullptr, VK_FALSE};
VkPhysicalDeviceSwapchainMaintenance1FeaturesEXT swapchain_maintenance1_feature = {
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SWAPCHAIN_MAINTENANCE_1_FEATURES_EXT, nullptr, VK_FALSE};
VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT fragment_shader_interlock_feature = {
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT, nullptr, VK_FALSE, VK_FALSE, VK_FALSE};
// add in optional feature structs
if (m_optional_extensions.vk_ext_rasterization_order_attachment_access)
@ -704,6 +721,8 @@ void VulkanDevice::ProcessDeviceExtensions()
Vulkan::AddPointerToChain(&features2, &dynamic_rendering_feature);
if (m_optional_extensions.vk_khr_dynamic_rendering_local_read)
Vulkan::AddPointerToChain(&features2, &dynamic_rendering_local_read_feature);
if (m_optional_extensions.vk_ext_fragment_shader_interlock)
Vulkan::AddPointerToChain(&features2, &fragment_shader_interlock_feature);
}
// we might not have VK_KHR_get_physical_device_properties2...
@ -738,6 +757,9 @@ void VulkanDevice::ProcessDeviceExtensions()
m_optional_extensions.vk_khr_dynamic_rendering &= (dynamic_rendering_feature.dynamicRendering == VK_TRUE);
m_optional_extensions.vk_khr_dynamic_rendering_local_read &=
(dynamic_rendering_local_read_feature.dynamicRenderingLocalRead == VK_TRUE);
m_optional_extensions.vk_ext_fragment_shader_interlock &=
(m_optional_extensions.vk_khr_dynamic_rendering &&
fragment_shader_interlock_feature.fragmentShaderPixelInterlock == VK_TRUE);
VkPhysicalDeviceProperties2 properties2 = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, nullptr, {}};
VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor_properties = {
@ -769,6 +791,8 @@ void VulkanDevice::ProcessDeviceExtensions()
INFO_LOG("VK_EXT_external_memory_host is {}",
m_optional_extensions.vk_ext_external_memory_host ? "supported" : "NOT supported");
INFO_LOG("VK_EXT_memory_budget is {}", m_optional_extensions.vk_ext_memory_budget ? "supported" : "NOT supported");
INFO_LOG("VK_EXT_fragment_shader_interlock is {}",
m_optional_extensions.vk_ext_fragment_shader_interlock ? "supported" : "NOT supported");
INFO_LOG("VK_EXT_rasterization_order_attachment_access is {}",
m_optional_extensions.vk_ext_rasterization_order_attachment_access ? "supported" : "NOT supported");
INFO_LOG("VK_EXT_swapchain_maintenance1 is {}",
@ -2046,15 +2070,9 @@ bool VulkanDevice::CreateDevice(std::string_view adapter, bool threaded_presenta
}
// Attempt to create the device.
if (!CreateDevice(surface, enable_validation_layer))
if (!CreateDevice(surface, enable_validation_layer, disabled_features, error))
return false;
if (!CheckFeatures(disabled_features))
{
Error::SetStringView(error, "Your GPU does not support the required Vulkan features.");
return false;
}
// And critical resources.
if (!CreateAllocator() || !CreatePersistentDescriptorPool() || !CreateCommandBuffers() || !CreatePipelineLayouts())
return false;
@ -2576,14 +2594,13 @@ u32 VulkanDevice::GetMaxMultisamples(VkPhysicalDevice physical_device, const VkP
return 1;
}
bool VulkanDevice::CheckFeatures(FeatureMask disabled_features)
void VulkanDevice::SetFeatures(FeatureMask disabled_features, const VkPhysicalDeviceFeatures& vk_features)
{
m_max_texture_size =
std::min(m_device_properties.limits.maxImageDimension2D, m_device_properties.limits.maxFramebufferWidth);
m_max_multisamples = GetMaxMultisamples(m_physical_device, m_device_properties);
m_features.dual_source_blend =
!(disabled_features & FEATURE_MASK_DUAL_SOURCE_BLEND) && m_device_features.dualSrcBlend;
m_features.dual_source_blend = !(disabled_features & FEATURE_MASK_DUAL_SOURCE_BLEND) && vk_features.dualSrcBlend;
m_features.framebuffer_fetch =
!(disabled_features & (FEATURE_MASK_FEEDBACK_LOOPS | FEATURE_MASK_FRAMEBUFFER_FETCH)) &&
m_optional_extensions.vk_ext_rasterization_order_attachment_access;
@ -2593,7 +2610,7 @@ bool VulkanDevice::CheckFeatures(FeatureMask disabled_features)
m_features.noperspective_interpolation = true;
m_features.texture_copy_to_self = !(disabled_features & FEATURE_MASK_TEXTURE_COPY_TO_SELF);
m_features.per_sample_shading = m_device_features.sampleRateShading;
m_features.per_sample_shading = vk_features.sampleRateShading;
m_features.supports_texture_buffers = !(disabled_features & FEATURE_MASK_TEXTURE_BUFFERS);
m_features.feedback_loops = !(disabled_features & FEATURE_MASK_FEEDBACK_LOOPS);
@ -2612,8 +2629,7 @@ bool VulkanDevice::CheckFeatures(FeatureMask disabled_features)
if (m_features.texture_buffers_emulated_with_ssbo)
WARNING_LOG("Emulating texture buffers with SSBOs.");
m_features.geometry_shaders =
!(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS) && m_device_features.geometryShader;
m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS) && vk_features.geometryShader;
m_features.partial_msaa_resolve = true;
m_features.memory_import = m_optional_extensions.vk_ext_external_memory_host;
@ -2621,8 +2637,9 @@ bool VulkanDevice::CheckFeatures(FeatureMask disabled_features)
m_features.shader_cache = true;
m_features.pipeline_cache = true;
m_features.prefer_unused_textures = true;
return true;
m_features.raster_order_views =
(!(disabled_features & FEATURE_MASK_RASTER_ORDER_VIEWS) && vk_features.fragmentStoresAndAtomics &&
m_optional_extensions.vk_ext_fragment_shader_interlock);
}
void VulkanDevice::CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level,
@ -2928,7 +2945,7 @@ void VulkanDevice::UnmapUniformBuffer(u32 size)
bool VulkanDevice::CreateNullTexture()
{
m_null_texture = VulkanTexture::Create(1, 1, 1, 1, 1, GPUTexture::Type::RenderTarget, GPUTexture::Format::RGBA8,
m_null_texture = VulkanTexture::Create(1, 1, 1, 1, 1, GPUTexture::Type::RWTexture, GPUTexture::Format::RGBA8,
VK_FORMAT_R8G8B8A8_UNORM);
if (!m_null_texture)
return false;
@ -2948,10 +2965,7 @@ bool VulkanDevice::CreateNullTexture()
return false;
for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++)
{
m_current_textures[i] = m_null_texture.get();
m_current_samplers[i] = point_sampler;
}
return true;
}
@ -3005,59 +3019,89 @@ bool VulkanDevice::CreatePipelineLayouts()
Vulkan::SetObjectName(m_device, m_feedback_loop_ds_layout, "Feedback Loop Descriptor Set Layout");
}
if (m_features.raster_order_views)
{
VkPipelineLayout& pl = m_pipeline_layouts[static_cast<u8>(GPUPipeline::Layout::SingleTextureAndUBO)];
plb.AddDescriptorSet(m_ubo_ds_layout);
plb.AddDescriptorSet(m_single_texture_ds_layout);
// TODO: REMOVE ME
if (m_features.feedback_loops)
plb.AddDescriptorSet(m_feedback_loop_ds_layout);
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
for (u32 i = 0; i < MAX_IMAGE_RENDER_TARGETS; i++)
dslb.AddBinding(i, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_FRAGMENT_BIT);
if ((m_rov_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE)
return false;
Vulkan::SetObjectName(m_device, pl, "Single Texture + UBO Pipeline Layout");
Vulkan::SetObjectName(m_device, m_feedback_loop_ds_layout, "ROV Descriptor Set Layout");
}
for (u32 type = 0; type < 3; type++)
{
VkPipelineLayout& pl = m_pipeline_layouts[static_cast<u8>(GPUPipeline::Layout::SingleTextureAndPushConstants)];
plb.AddDescriptorSet(m_single_texture_ds_layout);
// TODO: REMOVE ME
if (m_features.feedback_loops)
plb.AddDescriptorSet(m_feedback_loop_ds_layout);
plb.AddPushConstants(UNIFORM_PUSH_CONSTANTS_STAGES, 0, UNIFORM_PUSH_CONSTANTS_SIZE);
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
return false;
Vulkan::SetObjectName(m_device, pl, "Single Texture Pipeline Layout");
}
const bool feedback_loop = (type == 1);
const bool rov = (type == 2);
if ((feedback_loop && !m_features.feedback_loops) || (rov && !m_features.raster_order_views))
continue;
{
VkPipelineLayout& pl =
m_pipeline_layouts[static_cast<u8>(GPUPipeline::Layout::SingleTextureBufferAndPushConstants)];
plb.AddDescriptorSet(m_single_texture_buffer_ds_layout);
// TODO: REMOVE ME
if (m_features.feedback_loops)
plb.AddDescriptorSet(m_feedback_loop_ds_layout);
plb.AddPushConstants(UNIFORM_PUSH_CONSTANTS_STAGES, 0, UNIFORM_PUSH_CONSTANTS_SIZE);
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
return false;
Vulkan::SetObjectName(m_device, pl, "Single Texture Buffer + UBO Pipeline Layout");
}
{
VkPipelineLayout& pl = m_pipeline_layouts[type][static_cast<u8>(GPUPipeline::Layout::SingleTextureAndUBO)];
plb.AddDescriptorSet(m_ubo_ds_layout);
plb.AddDescriptorSet(m_single_texture_ds_layout);
if (feedback_loop)
plb.AddDescriptorSet(m_feedback_loop_ds_layout);
else if (rov)
plb.AddDescriptorSet(m_rov_ds_layout);
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
return false;
Vulkan::SetObjectName(m_device, pl, "Single Texture + UBO Pipeline Layout");
}
{
VkPipelineLayout& pl = m_pipeline_layouts[static_cast<u8>(GPUPipeline::Layout::MultiTextureAndUBO)];
plb.AddDescriptorSet(m_ubo_ds_layout);
plb.AddDescriptorSet(m_multi_texture_ds_layout);
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
return false;
Vulkan::SetObjectName(m_device, pl, "Multi Texture + UBO Pipeline Layout");
}
{
VkPipelineLayout& pl =
m_pipeline_layouts[type][static_cast<u8>(GPUPipeline::Layout::SingleTextureAndPushConstants)];
plb.AddDescriptorSet(m_single_texture_ds_layout);
if (feedback_loop)
plb.AddDescriptorSet(m_feedback_loop_ds_layout);
else if (rov)
plb.AddDescriptorSet(m_rov_ds_layout);
plb.AddPushConstants(UNIFORM_PUSH_CONSTANTS_STAGES, 0, UNIFORM_PUSH_CONSTANTS_SIZE);
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
return false;
Vulkan::SetObjectName(m_device, pl, "Single Texture Pipeline Layout");
}
{
VkPipelineLayout& pl = m_pipeline_layouts[static_cast<u8>(GPUPipeline::Layout::MultiTextureAndPushConstants)];
plb.AddDescriptorSet(m_multi_texture_ds_layout);
plb.AddPushConstants(UNIFORM_PUSH_CONSTANTS_STAGES, 0, UNIFORM_PUSH_CONSTANTS_SIZE);
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
return false;
Vulkan::SetObjectName(m_device, pl, "Multi Texture Pipeline Layout");
{
VkPipelineLayout& pl =
m_pipeline_layouts[type][static_cast<u8>(GPUPipeline::Layout::SingleTextureBufferAndPushConstants)];
plb.AddDescriptorSet(m_single_texture_buffer_ds_layout);
if (feedback_loop)
plb.AddDescriptorSet(m_feedback_loop_ds_layout);
else if (rov)
plb.AddDescriptorSet(m_rov_ds_layout);
plb.AddPushConstants(UNIFORM_PUSH_CONSTANTS_STAGES, 0, UNIFORM_PUSH_CONSTANTS_SIZE);
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
return false;
Vulkan::SetObjectName(m_device, pl, "Single Texture Buffer + UBO Pipeline Layout");
}
{
VkPipelineLayout& pl = m_pipeline_layouts[type][static_cast<u8>(GPUPipeline::Layout::MultiTextureAndUBO)];
plb.AddDescriptorSet(m_ubo_ds_layout);
plb.AddDescriptorSet(m_multi_texture_ds_layout);
if (feedback_loop)
plb.AddDescriptorSet(m_feedback_loop_ds_layout);
else if (rov)
plb.AddDescriptorSet(m_rov_ds_layout);
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
return false;
Vulkan::SetObjectName(m_device, pl, "Multi Texture + UBO Pipeline Layout");
}
{
VkPipelineLayout& pl =
m_pipeline_layouts[type][static_cast<u8>(GPUPipeline::Layout::MultiTextureAndPushConstants)];
plb.AddDescriptorSet(m_multi_texture_ds_layout);
plb.AddPushConstants(UNIFORM_PUSH_CONSTANTS_STAGES, 0, UNIFORM_PUSH_CONSTANTS_SIZE);
if (feedback_loop)
plb.AddDescriptorSet(m_feedback_loop_ds_layout);
else if (rov)
plb.AddDescriptorSet(m_rov_ds_layout);
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
return false;
Vulkan::SetObjectName(m_device, pl, "Multi Texture Pipeline Layout");
}
}
return true;
@ -3065,14 +3109,13 @@ bool VulkanDevice::CreatePipelineLayouts()
void VulkanDevice::DestroyPipelineLayouts()
{
for (VkPipelineLayout& pl : m_pipeline_layouts)
{
m_pipeline_layouts.enumerate([this](auto& pl) {
if (pl != VK_NULL_HANDLE)
{
vkDestroyPipelineLayout(m_device, pl, nullptr);
pl = VK_NULL_HANDLE;
}
}
});
auto destroy_dsl = [this](VkDescriptorSetLayout& l) {
if (l != VK_NULL_HANDLE)
@ -3081,6 +3124,7 @@ void VulkanDevice::DestroyPipelineLayouts()
l = VK_NULL_HANDLE;
}
};
destroy_dsl(m_rov_ds_layout);
destroy_dsl(m_feedback_loop_ds_layout);
destroy_dsl(m_multi_texture_ds_layout);
destroy_dsl(m_single_texture_buffer_ds_layout);
@ -3222,10 +3266,13 @@ bool VulkanDevice::TryImportHostMemory(void* data, size_t data_size, VkBufferUsa
}
void VulkanDevice::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds,
GPUPipeline::RenderPassFlag feedback_loop)
GPUPipeline::RenderPassFlag flags)
{
bool changed = (m_num_current_render_targets != num_rts || m_current_depth_target != ds ||
m_current_feedback_loop != feedback_loop);
const bool changed_layout =
(m_current_render_pass_flags & (GPUPipeline::ColorFeedbackLoop | GPUPipeline::BindRenderTargetsAsImages)) !=
(flags & (GPUPipeline::ColorFeedbackLoop | GPUPipeline::BindRenderTargetsAsImages));
bool changed =
(m_num_current_render_targets != num_rts || m_current_depth_target != ds || m_current_render_pass_flags != flags);
bool needs_ds_clear = (ds && ds->IsClearedOrInvalidated());
bool needs_rt_clear = false;
@ -3240,7 +3287,7 @@ void VulkanDevice::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUText
for (u32 i = num_rts; i < m_num_current_render_targets; i++)
m_current_render_targets[i] = nullptr;
m_num_current_render_targets = Truncate8(num_rts);
m_current_feedback_loop = feedback_loop;
m_current_render_pass_flags = flags;
if (changed)
{
@ -3253,12 +3300,12 @@ void VulkanDevice::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUText
return;
}
if (!m_optional_extensions.vk_khr_dynamic_rendering || ((feedback_loop & GPUPipeline::ColorFeedbackLoop) &&
!m_optional_extensions.vk_khr_dynamic_rendering_local_read))
if (!m_optional_extensions.vk_khr_dynamic_rendering ||
((flags & GPUPipeline::ColorFeedbackLoop) && !m_optional_extensions.vk_khr_dynamic_rendering_local_read))
{
m_current_framebuffer = m_framebuffer_manager.Lookup(
(m_num_current_render_targets > 0) ? reinterpret_cast<GPUTexture**>(m_current_render_targets.data()) : nullptr,
m_num_current_render_targets, m_current_depth_target, feedback_loop);
m_num_current_render_targets, m_current_depth_target, flags);
if (m_current_framebuffer == VK_NULL_HANDLE)
{
ERROR_LOG("Failed to create framebuffer");
@ -3266,8 +3313,10 @@ void VulkanDevice::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUText
}
}
m_dirty_flags = (m_dirty_flags & ~DIRTY_FLAG_INPUT_ATTACHMENT) |
((feedback_loop & GPUPipeline::ColorFeedbackLoop) ? DIRTY_FLAG_INPUT_ATTACHMENT : 0);
m_dirty_flags = (m_dirty_flags & ~DIRTY_FLAG_INPUT_ATTACHMENT) | (changed_layout ? DIRTY_FLAG_PIPELINE_LAYOUT : 0) |
((flags & (GPUPipeline::ColorFeedbackLoop | GPUPipeline::BindRenderTargetsAsImages)) ?
DIRTY_FLAG_INPUT_ATTACHMENT :
0);
}
// TODO: This could use vkCmdClearAttachments() instead.
@ -3285,11 +3334,14 @@ void VulkanDevice::BeginRenderPass()
// All textures should be in shader read only optimal already, but just in case..
const u32 num_textures = GetActiveTexturesForLayout(m_current_pipeline_layout);
for (u32 i = 0; i < num_textures; i++)
m_current_textures[i]->TransitionToLayout(VulkanTexture::Layout::ShaderReadOnly);
{
if (m_current_textures[i])
m_current_textures[i]->TransitionToLayout(VulkanTexture::Layout::ShaderReadOnly);
}
// NVIDIA drivers appear to return random garbage when sampling the RT via a feedback loop, if the load op for
// the render pass is CLEAR. Using vkCmdClearAttachments() doesn't work, so we have to clear the image instead.
if (m_current_feedback_loop & GPUPipeline::ColorFeedbackLoop)
if (m_current_render_pass_flags & GPUPipeline::ColorFeedbackLoop && IsDeviceNVIDIA())
{
for (u32 i = 0; i < m_num_current_render_targets; i++)
{
@ -3298,8 +3350,9 @@ void VulkanDevice::BeginRenderPass()
}
}
if (m_optional_extensions.vk_khr_dynamic_rendering && (m_optional_extensions.vk_khr_dynamic_rendering_local_read ||
!(m_current_feedback_loop & GPUPipeline::ColorFeedbackLoop)))
if (m_optional_extensions.vk_khr_dynamic_rendering &&
(m_optional_extensions.vk_khr_dynamic_rendering_local_read ||
!(m_current_render_pass_flags & GPUPipeline::ColorFeedbackLoop)))
{
VkRenderingInfoKHR ri = {
VK_STRUCTURE_TYPE_RENDERING_INFO_KHR, nullptr, 0u, {}, 1u, 0u, 0u, nullptr, nullptr, nullptr};
@ -3309,35 +3362,51 @@ void VulkanDevice::BeginRenderPass()
if (m_num_current_render_targets > 0 || m_current_depth_target)
{
ri.colorAttachmentCount = m_num_current_render_targets;
ri.pColorAttachments = (m_num_current_render_targets > 0) ? attachments.data() : nullptr;
// set up clear values and transition targets
for (u32 i = 0; i < m_num_current_render_targets; i++)
if (!(m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages))
{
VulkanTexture* const rt = static_cast<VulkanTexture*>(m_current_render_targets[i]);
rt->TransitionToLayout((m_current_feedback_loop & GPUPipeline::ColorFeedbackLoop) ?
VulkanTexture::Layout::FeedbackLoop :
VulkanTexture::Layout::ColorAttachment);
rt->SetUseFenceCounter(GetCurrentFenceCounter());
ri.colorAttachmentCount = m_num_current_render_targets;
ri.pColorAttachments = (m_num_current_render_targets > 0) ? attachments.data() : nullptr;
VkRenderingAttachmentInfo& ai = attachments[i];
ai.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO_KHR;
ai.pNext = nullptr;
ai.imageView = rt->GetView();
ai.imageLayout = rt->GetVkLayout();
ai.resolveMode = VK_RESOLVE_MODE_NONE_KHR;
ai.resolveImageView = VK_NULL_HANDLE;
ai.resolveImageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
ai.loadOp = GetLoadOpForTexture(rt);
ai.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
if (rt->GetState() == GPUTexture::State::Cleared)
// set up clear values and transition targets
for (u32 i = 0; i < m_num_current_render_targets; i++)
{
std::memcpy(ai.clearValue.color.float32, rt->GetUNormClearColor().data(),
sizeof(ai.clearValue.color.float32));
VulkanTexture* const rt = static_cast<VulkanTexture*>(m_current_render_targets[i]);
rt->TransitionToLayout((m_current_render_pass_flags & GPUPipeline::ColorFeedbackLoop) ?
VulkanTexture::Layout::FeedbackLoop :
VulkanTexture::Layout::ColorAttachment);
rt->SetUseFenceCounter(GetCurrentFenceCounter());
VkRenderingAttachmentInfo& ai = attachments[i];
ai.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO_KHR;
ai.pNext = nullptr;
ai.imageView = rt->GetView();
ai.imageLayout = rt->GetVkLayout();
ai.resolveMode = VK_RESOLVE_MODE_NONE_KHR;
ai.resolveImageView = VK_NULL_HANDLE;
ai.resolveImageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
ai.loadOp = GetLoadOpForTexture(rt);
ai.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
if (rt->GetState() == GPUTexture::State::Cleared)
{
std::memcpy(ai.clearValue.color.float32, rt->GetUNormClearColor().data(),
sizeof(ai.clearValue.color.float32));
}
rt->SetState(GPUTexture::State::Dirty);
}
}
else
{
// Binding as image, but we still need to clear it.
for (u32 i = 0; i < m_num_current_render_targets; i++)
{
VulkanTexture* rt = m_current_render_targets[i];
if (rt->GetState() == GPUTexture::State::Cleared)
rt->CommitClear(m_current_command_buffer);
rt->SetState(GPUTexture::State::Dirty);
rt->TransitionToLayout(VulkanTexture::Layout::ReadWriteImage);
rt->SetUseFenceCounter(GetCurrentFenceCounter());
}
rt->SetState(GPUTexture::State::Dirty);
}
if (VulkanTexture* const ds = m_current_depth_target)
@ -3396,8 +3465,9 @@ void VulkanDevice::BeginRenderPass()
if (m_current_framebuffer != VK_NULL_HANDLE)
{
bi.framebuffer = m_current_framebuffer;
bi.renderPass = m_current_render_pass = GetRenderPass(
m_current_render_targets.data(), m_num_current_render_targets, m_current_depth_target, m_current_feedback_loop);
bi.renderPass = m_current_render_pass =
GetRenderPass(m_current_render_targets.data(), m_num_current_render_targets, m_current_depth_target,
m_current_render_pass_flags);
if (bi.renderPass == VK_NULL_HANDLE)
{
ERROR_LOG("Failed to create render pass");
@ -3416,7 +3486,7 @@ void VulkanDevice::BeginRenderPass()
bi.clearValueCount = i + 1;
}
rt->SetState(GPUTexture::State::Dirty);
rt->TransitionToLayout((m_current_feedback_loop & GPUPipeline::ColorFeedbackLoop) ?
rt->TransitionToLayout((m_current_render_pass_flags & GPUPipeline::ColorFeedbackLoop) ?
VulkanTexture::Layout::FeedbackLoop :
VulkanTexture::Layout::ColorAttachment);
rt->SetUseFenceCounter(GetCurrentFenceCounter());
@ -3473,7 +3543,10 @@ void VulkanDevice::BeginSwapChainRenderPass()
// All textures should be in shader read only optimal already, but just in case..
const u32 num_textures = GetActiveTexturesForLayout(m_current_pipeline_layout);
for (u32 i = 0; i < num_textures; i++)
m_current_textures[i]->TransitionToLayout(VulkanTexture::Layout::ShaderReadOnly);
{
if (m_current_textures[i])
m_current_textures[i]->TransitionToLayout(VulkanTexture::Layout::ShaderReadOnly);
}
if (m_optional_extensions.vk_khr_dynamic_rendering)
{
@ -3518,15 +3591,16 @@ void VulkanDevice::BeginSwapChainRenderPass()
vkCmdBeginRenderPass(GetCurrentCommandBuffer(), &rp, VK_SUBPASS_CONTENTS_INLINE);
}
m_dirty_flags |=
(m_current_render_pass_flags & (GPUPipeline::ColorFeedbackLoop | GPUPipeline::BindRenderTargetsAsImages)) ?
DIRTY_FLAG_PIPELINE_LAYOUT :
0;
s_stats.num_render_passes++;
m_num_current_render_targets = 0;
m_current_feedback_loop = GPUPipeline::NoRenderPassFlags;
m_current_render_pass_flags = GPUPipeline::NoRenderPassFlags;
std::memset(m_current_render_targets.data(), 0, sizeof(m_current_render_targets));
m_current_depth_target = nullptr;
m_current_framebuffer = VK_NULL_HANDLE;
// Clear pipeline, it's likely incompatible.
m_current_pipeline = nullptr;
}
bool VulkanDevice::InRenderPass()
@ -3584,8 +3658,8 @@ void VulkanDevice::UnbindPipeline(VulkanPipeline* pl)
void VulkanDevice::InvalidateCachedState()
{
m_dirty_flags =
ALL_DIRTY_STATE | ((m_current_feedback_loop & GPUPipeline::ColorFeedbackLoop) ? DIRTY_FLAG_INPUT_ATTACHMENT : 0);
m_dirty_flags = ALL_DIRTY_STATE |
((m_current_render_pass_flags & GPUPipeline::ColorFeedbackLoop) ? DIRTY_FLAG_INPUT_ATTACHMENT : 0);
m_current_render_pass = VK_NULL_HANDLE;
m_current_pipeline = nullptr;
}
@ -3601,9 +3675,18 @@ s32 VulkanDevice::IsRenderTargetBoundIndex(const GPUTexture* tex) const
return -1;
}
VulkanDevice::PipelineLayoutType VulkanDevice::GetPipelineLayoutType(GPUPipeline::RenderPassFlag flags)
{
return (flags & GPUPipeline::BindRenderTargetsAsImages) ?
PipelineLayoutType::BindRenderTargetsAsImages :
((flags & GPUPipeline::ColorFeedbackLoop) ? PipelineLayoutType::ColorFeedbackLoop :
PipelineLayoutType::Normal);
}
VkPipelineLayout VulkanDevice::GetCurrentVkPipelineLayout() const
{
return m_pipeline_layouts[static_cast<u8>(m_current_pipeline_layout)];
return m_pipeline_layouts[static_cast<size_t>(GetPipelineLayoutType(m_current_render_pass_flags))]
[static_cast<size_t>(m_current_pipeline_layout)];
}
void VulkanDevice::SetInitialPipelineState()
@ -3634,7 +3717,7 @@ void VulkanDevice::SetInitialPipelineState()
void VulkanDevice::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler)
{
VulkanTexture* T = texture ? static_cast<VulkanTexture*>(texture) : m_null_texture.get();
VulkanTexture* T = static_cast<VulkanTexture*>(texture);
const VkSampler vsampler = static_cast<VulkanSampler*>(sampler ? sampler : m_nearest_sampler.get())->GetSampler();
if (m_current_textures[slot] != T || m_current_samplers[slot] != vsampler)
{
@ -3643,7 +3726,7 @@ void VulkanDevice::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler*
m_dirty_flags |= DIRTY_FLAG_TEXTURES_OR_SAMPLERS;
}
if (texture)
if (T)
{
T->CommitClear();
T->SetUseFenceCounter(GetCurrentFenceCounter());
@ -3673,7 +3756,7 @@ void VulkanDevice::UnbindTexture(VulkanTexture* tex)
{
if (m_current_textures[i] == tex)
{
m_current_textures[i] = m_null_texture.get();
m_current_textures[i] = nullptr;
m_dirty_flags |= DIRTY_FLAG_TEXTURES_OR_SAMPLERS;
}
}
@ -3754,7 +3837,7 @@ void VulkanDevice::PreDrawCheck()
BeginRenderPass();
DebugAssert(!(m_dirty_flags & DIRTY_FLAG_INITIAL));
const u32 update_mask = (m_current_feedback_loop ? ~0u : ~DIRTY_FLAG_INPUT_ATTACHMENT);
const u32 update_mask = (m_current_render_pass_flags ? ~0u : ~DIRTY_FLAG_INPUT_ATTACHMENT);
const u32 dirty = m_dirty_flags & update_mask;
m_dirty_flags = m_dirty_flags & ~update_mask;
@ -3774,6 +3857,7 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty)
{
[[maybe_unused]] bool new_dynamic_offsets = false;
VkPipelineLayout const vk_pipeline_layout = GetCurrentVkPipelineLayout();
std::array<VkDescriptorSet, 3> ds;
u32 first_ds = 0;
u32 num_ds = 0;
@ -3796,8 +3880,9 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty)
if constexpr (layout == GPUPipeline::Layout::SingleTextureAndUBO ||
layout == GPUPipeline::Layout::SingleTextureAndPushConstants)
{
DebugAssert(m_current_textures[0] && m_current_samplers[0] != VK_NULL_HANDLE);
ds[num_ds++] = m_current_textures[0]->GetDescriptorSetWithSampler(m_current_samplers[0]);
VulkanTexture* const tex = m_current_textures[0] ? m_current_textures[0] : m_null_texture.get();
DebugAssert(tex && m_current_samplers[0] != VK_NULL_HANDLE);
ds[num_ds++] = tex->GetDescriptorSetWithSampler(m_current_samplers[0]);
}
else if constexpr (layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants)
{
@ -3813,14 +3898,14 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty)
{
for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++)
{
DebugAssert(m_current_textures[i] && m_current_samplers[i] != VK_NULL_HANDLE);
dsub.AddCombinedImageSamplerDescriptorWrite(VK_NULL_HANDLE, i, m_current_textures[i]->GetView(),
m_current_samplers[i], m_current_textures[i]->GetVkLayout());
VulkanTexture* const tex = m_current_textures[i] ? m_current_textures[i] : m_null_texture.get();
DebugAssert(tex && m_current_samplers[i] != VK_NULL_HANDLE);
dsub.AddCombinedImageSamplerDescriptorWrite(VK_NULL_HANDLE, i, tex->GetView(), m_current_samplers[i],
tex->GetVkLayout());
}
const u32 set = (layout == GPUPipeline::Layout::MultiTextureAndUBO) ? 1 : 0;
dsub.PushUpdate(GetCurrentCommandBuffer(), VK_PIPELINE_BIND_POINT_GRAPHICS,
m_pipeline_layouts[static_cast<u8>(m_current_pipeline_layout)], set);
dsub.PushUpdate(GetCurrentCommandBuffer(), VK_PIPELINE_BIND_POINT_GRAPHICS, vk_pipeline_layout, set);
if (num_ds == 0)
return true;
}
@ -3834,21 +3919,42 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty)
for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++)
{
DebugAssert(m_current_textures[i] && m_current_samplers[i] != VK_NULL_HANDLE);
dsub.AddCombinedImageSamplerDescriptorWrite(tds, i, m_current_textures[i]->GetView(), m_current_samplers[i],
m_current_textures[i]->GetVkLayout());
VulkanTexture* const tex = m_current_textures[i] ? m_current_textures[i] : m_null_texture.get();
DebugAssert(tex && m_current_samplers[i] != VK_NULL_HANDLE);
dsub.AddCombinedImageSamplerDescriptorWrite(tds, i, tex->GetView(), m_current_samplers[i], tex->GetVkLayout());
}
dsub.Update(m_device, false);
}
}
if constexpr (layout == GPUPipeline::Layout::SingleTextureAndUBO ||
layout == GPUPipeline::Layout::SingleTextureAndPushConstants ||
layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants)
if (m_num_current_render_targets > 0 &&
((dirty & DIRTY_FLAG_INPUT_ATTACHMENT) ||
(dirty & DIRTY_FLAG_PIPELINE_LAYOUT &&
(m_current_render_pass_flags & (GPUPipeline::ColorFeedbackLoop | GPUPipeline::BindRenderTargetsAsImages)))))
{
if ((dirty & DIRTY_FLAG_INPUT_ATTACHMENT) ||
(dirty & DIRTY_FLAG_PIPELINE_LAYOUT && (m_current_feedback_loop & GPUPipeline::ColorFeedbackLoop)))
if (m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages)
{
VkDescriptorSet ids = AllocateDescriptorSet(m_rov_ds_layout);
if (ids == VK_NULL_HANDLE)
return false;
ds[num_ds++] = ids;
Vulkan::DescriptorSetUpdateBuilder dsub;
for (u32 i = 0; i < m_num_current_render_targets; i++)
{
dsub.AddStorageImageDescriptorWrite(ids, i, m_current_render_targets[i]->GetView(),
m_current_render_targets[i]->GetVkLayout());
}
// Annoyingly, have to update all slots...
for (u32 i = m_num_current_render_targets; i < MAX_IMAGE_RENDER_TARGETS; i++)
dsub.AddStorageImageDescriptorWrite(ids, i, m_null_texture->GetView(), m_null_texture->GetVkLayout());
dsub.Update(m_device, false);
}
else
{
VkDescriptorSet ids = AllocateDescriptorSet(m_feedback_loop_ds_layout);
if (ids == VK_NULL_HANDLE)
@ -3864,9 +3970,8 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty)
}
DebugAssert(num_ds > 0);
vkCmdBindDescriptorSets(GetCurrentCommandBuffer(), VK_PIPELINE_BIND_POINT_GRAPHICS,
m_pipeline_layouts[static_cast<u8>(m_current_pipeline_layout)], first_ds, num_ds, ds.data(),
static_cast<u32>(new_dynamic_offsets),
vkCmdBindDescriptorSets(GetCurrentCommandBuffer(), VK_PIPELINE_BIND_POINT_GRAPHICS, vk_pipeline_layout, first_ds,
num_ds, ds.data(), static_cast<u32>(new_dynamic_offsets),
new_dynamic_offsets ? &m_uniform_buffer_position : nullptr);
return true;

View file

@ -9,6 +9,8 @@
#include "vulkan_loader.h"
#include "vulkan_stream_buffer.h"
#include "common/dimensional_array.h"
#include <array>
#include <atomic>
#include <condition_variable>
@ -43,6 +45,7 @@ public:
struct OptionalExtensions
{
bool vk_ext_external_memory_host : 1;
bool vk_ext_fragment_shader_interlock : 1;
bool vk_ext_full_screen_exclusive : 1;
bool vk_ext_memory_budget : 1;
bool vk_ext_rasterization_order_attachment_access : 1;
@ -124,7 +127,7 @@ public:
void* MapUniformBuffer(u32 size) override;
void UnmapUniformBuffer(u32 size) override;
void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds,
GPUPipeline::RenderPassFlag feedback_loop = GPUPipeline::NoRenderPassFlags) override;
GPUPipeline::RenderPassFlag flags = GPUPipeline::NoRenderPassFlags) override;
void SetPipeline(GPUPipeline* pipeline) override;
void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override;
void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override;
@ -250,6 +253,14 @@ private:
DIRTY_FLAG_TEXTURES_OR_SAMPLERS | DIRTY_FLAG_INPUT_ATTACHMENT,
};
enum class PipelineLayoutType : u8
{
Normal,
ColorFeedbackLoop,
BindRenderTargetsAsImages,
MaxCount,
};
struct RenderPassCacheKey
{
struct RenderTarget
@ -324,12 +335,10 @@ private:
using ExtensionList = std::vector<const char*>;
static bool SelectInstanceExtensions(ExtensionList* extension_list, const WindowInfo& wi, OptionalExtensions* oe,
bool enable_debug_utils);
bool SelectDeviceExtensions(ExtensionList* extension_list, bool enable_surface);
bool SelectDeviceFeatures();
bool CreateDevice(VkSurfaceKHR surface, bool enable_validation_layer);
bool SelectDeviceExtensions(ExtensionList* extension_list, bool enable_surface, Error* error);
bool CreateDevice(VkSurfaceKHR surface, bool enable_validation_layer, FeatureMask disabled_features, Error* error);
void ProcessDeviceExtensions();
bool CheckFeatures(FeatureMask disabled_features);
void SetFeatures(FeatureMask disabled_features, const VkPhysicalDeviceFeatures& vk_features);
static u32 GetMaxMultisamples(VkPhysicalDevice physical_device, const VkPhysicalDeviceProperties& properties);
@ -360,6 +369,7 @@ private:
s32 IsRenderTargetBoundIndex(const GPUTexture* tex) const;
/// Applies any changed state.
static PipelineLayoutType GetPipelineLayoutType(GPUPipeline::RenderPassFlag flags);
VkPipelineLayout GetCurrentVkPipelineLayout() const;
void SetInitialPipelineState();
void PreDrawCheck();
@ -437,7 +447,6 @@ private:
// TODO: Move to static?
VkDebugUtilsMessengerEXT m_debug_messenger_callback = VK_NULL_HANDLE;
VkPhysicalDeviceFeatures m_device_features = {};
VkPhysicalDeviceProperties m_device_properties = {};
VkPhysicalDeviceDriverPropertiesKHR m_device_driver_properties = {};
OptionalExtensions m_optional_extensions = {};
@ -451,7 +460,10 @@ private:
VkDescriptorSetLayout m_single_texture_buffer_ds_layout = VK_NULL_HANDLE;
VkDescriptorSetLayout m_multi_texture_ds_layout = VK_NULL_HANDLE;
VkDescriptorSetLayout m_feedback_loop_ds_layout = VK_NULL_HANDLE;
std::array<VkPipelineLayout, static_cast<u8>(GPUPipeline::Layout::MaxCount)> m_pipeline_layouts = {};
VkDescriptorSetLayout m_rov_ds_layout = VK_NULL_HANDLE;
DimensionalArray<VkPipelineLayout, static_cast<size_t>(GPUPipeline::Layout::MaxCount),
static_cast<size_t>(PipelineLayoutType::MaxCount)>
m_pipeline_layouts = {};
VulkanStreamBuffer m_vertex_buffer;
VulkanStreamBuffer m_index_buffer;
@ -466,8 +478,8 @@ private:
// Which bindings/state has to be updated before the next draw.
u32 m_dirty_flags = ALL_DIRTY_STATE;
u8 m_num_current_render_targets = 0;
GPUPipeline::RenderPassFlag m_current_feedback_loop = GPUPipeline::NoRenderPassFlags;
u32 m_num_current_render_targets = 0;
GPUPipeline::RenderPassFlag m_current_render_pass_flags = GPUPipeline::NoRenderPassFlags;
std::array<VulkanTexture*, MAX_RENDER_TARGETS> m_current_render_targets = {};
VulkanTexture* m_current_depth_target = nullptr;
VkFramebuffer m_current_framebuffer = VK_NULL_HANDLE;
@ -479,6 +491,6 @@ private:
std::array<VulkanTexture*, MAX_TEXTURE_SAMPLERS> m_current_textures = {};
std::array<VkSampler, MAX_TEXTURE_SAMPLERS> m_current_samplers = {};
VulkanTextureBuffer* m_current_texture_buffer = nullptr;
GSVector4i m_current_viewport = {};
GSVector4i m_current_viewport = GSVector4i::cxpr(0, 0, 1, 1);
GSVector4i m_current_scissor = GSVector4i::cxpr(0, 0, 1, 1);
};

View file

@ -207,7 +207,8 @@ std::unique_ptr<GPUPipeline> VulkanDevice::CreatePipeline(const GPUPipeline::Gra
gpb.AddDynamicState(VK_DYNAMIC_STATE_VIEWPORT);
gpb.AddDynamicState(VK_DYNAMIC_STATE_SCISSOR);
gpb.SetPipelineLayout(m_pipeline_layouts[static_cast<u8>(config.layout)]);
gpb.SetPipelineLayout(m_pipeline_layouts[static_cast<size_t>(GetPipelineLayoutType(config.render_pass_flags))]
[static_cast<size_t>(config.layout)]);
if (m_optional_extensions.vk_khr_dynamic_rendering && (m_optional_extensions.vk_khr_dynamic_rendering_local_read ||
!(config.render_pass_flags & GPUPipeline::ColorFeedbackLoop)))

View file

@ -124,7 +124,8 @@ std::unique_ptr<VulkanTexture> VulkanTexture::Create(u32 width, u32 height, u32
{
DebugAssert(levels == 1);
ici.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_STORAGE_BIT |
VK_IMAGE_USAGE_SAMPLED_BIT;
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT |
VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT;
}
break;