diff --git a/src/core/core.props b/src/core/core.props index 9bd5357f4..3d34ca16e 100644 --- a/src/core/core.props +++ b/src/core/core.props @@ -10,7 +10,7 @@ ENABLE_MMAP_FASTMEM=1;%(PreprocessorDefinitions) ENABLE_NEWREC=1;%(PreprocessorDefinitions) - %(AdditionalIncludeDirectories);$(SolutionDir)dep\xxhash\include;$(SolutionDir)dep\zlib\include;$(SolutionDir)dep\rcheevos\include;$(SolutionDir)dep\rapidjson\include;$(SolutionDir)dep\discord-rpc\include + %(AdditionalIncludeDirectories);$(SolutionDir)dep\zlib\include;$(SolutionDir)dep\rcheevos\include;$(SolutionDir)dep\rapidjson\include;$(SolutionDir)dep\discord-rpc\include %(AdditionalIncludeDirectories);$(SolutionDir)dep\rainterface %(AdditionalIncludeDirectories);$(SolutionDir)dep\xbyak\xbyak diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp index 7a9ea6b04..7dfde3390 100644 --- a/src/core/gpu.cpp +++ b/src/core/gpu.cpp @@ -1579,7 +1579,7 @@ bool GPU::CompileDisplayPipeline() plconfig.rasterization = GPUPipeline::RasterizationState::GetNoCullState(); plconfig.depth = GPUPipeline::DepthState::GetNoTestsState(); plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); - plconfig.color_format = g_gpu_device->HasSurface() ? g_gpu_device->GetWindowFormat() : GPUTexture::Format::RGBA8; + plconfig.SetTargetFormats(g_gpu_device->HasSurface() ? g_gpu_device->GetWindowFormat() : GPUTexture::Format::RGBA8); plconfig.depth_format = GPUTexture::Format::Unknown; plconfig.samples = 1; plconfig.per_sample_shading = false; @@ -1670,9 +1670,10 @@ bool GPU::PresentDisplay() return RenderDisplay(nullptr, draw_rect, true); } -bool GPU::RenderDisplay(GPUFramebuffer* target, const Common::Rectangle& draw_rect, bool postfx) +bool GPU::RenderDisplay(GPUTexture* target, const Common::Rectangle& draw_rect, bool postfx) { - GL_SCOPE_FMT("RenderDisplay: {}x{} at {},{}", draw_rect.left, draw_rect.top, draw_rect.GetWidth(), draw_rect.GetHeight()); + GL_SCOPE_FMT("RenderDisplay: {}x{} at {},{}", draw_rect.left, draw_rect.top, draw_rect.GetWidth(), + draw_rect.GetHeight()); if (m_display_texture) m_display_texture->MakeReadyForSampling(); @@ -1716,8 +1717,7 @@ bool GPU::RenderDisplay(GPUFramebuffer* target, const Common::Rectangle& dr break; } - const GPUTexture::Format hdformat = - (target && target->GetRT()) ? target->GetRT()->GetFormat() : g_gpu_device->GetWindowFormat(); + const GPUTexture::Format hdformat = target ? target->GetFormat() : g_gpu_device->GetWindowFormat(); const u32 target_width = target ? target->GetWidth() : g_gpu_device->GetWindowWidth(); const u32 target_height = target ? target->GetHeight() : g_gpu_device->GetWindowHeight(); const bool really_postfx = (postfx && HasDisplayTexture() && PostProcessing::IsActive() && @@ -1725,12 +1725,12 @@ bool GPU::RenderDisplay(GPUFramebuffer* target, const Common::Rectangle& dr if (really_postfx) { g_gpu_device->ClearRenderTarget(PostProcessing::GetInputTexture(), 0); - g_gpu_device->SetFramebuffer(PostProcessing::GetInputFramebuffer()); + g_gpu_device->SetRenderTarget(PostProcessing::GetInputTexture()); } else { if (target) - g_gpu_device->SetFramebuffer(target); + g_gpu_device->SetRenderTarget(target); else if (!g_gpu_device->BeginPresent(false)) return false; } @@ -2059,16 +2059,10 @@ bool GPU::RenderScreenshotToBuffer(u32 width, u32 height, const Common::Rectangl if (!render_texture) return false; - std::unique_ptr render_fb = g_gpu_device->CreateFramebuffer(render_texture.get()); - if (!render_fb) - return false; - g_gpu_device->ClearRenderTarget(render_texture.get(), 0); // TODO: this should use copy shader instead. - RenderDisplay(render_fb.get(), draw_rect, postfx); - - g_gpu_device->SetFramebuffer(nullptr); + RenderDisplay(render_texture.get(), draw_rect, postfx); const u32 stride = GPUTexture::GetPixelSize(hdformat) * width; out_pixels->resize(width * height); diff --git a/src/core/gpu.h b/src/core/gpu.h index f73cbd880..8864705c7 100644 --- a/src/core/gpu.h +++ b/src/core/gpu.h @@ -23,7 +23,6 @@ class StateWrapper; class GPUDevice; -class GPUFramebuffer; class GPUTexture; class GPUPipeline; @@ -585,7 +584,7 @@ protected: float* out_top_padding, float* out_scale, float* out_x_scale, bool apply_aspect_ratio = true) const; - bool RenderDisplay(GPUFramebuffer* target, const Common::Rectangle& draw_rect, bool postfx); + bool RenderDisplay(GPUTexture* target, const Common::Rectangle& draw_rect, bool postfx); s32 m_display_width = 0; s32 m_display_height = 0; diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index 4a423d3aa..f43e147d1 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -317,7 +317,7 @@ bool GPU_HW::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_di void GPU_HW::RestoreDeviceContext() { g_gpu_device->SetTextureSampler(0, m_vram_read_texture.get(), g_gpu_device->GetNearestSampler()); - g_gpu_device->SetFramebuffer(m_vram_framebuffer.get()); + g_gpu_device->SetRenderTarget(m_vram_texture.get(), m_vram_depth_texture.get()); g_gpu_device->SetViewport(0, 0, m_vram_texture->GetWidth(), m_vram_texture->GetHeight()); SetScissor(); m_batch_ubo_dirty = true; @@ -649,20 +649,6 @@ bool GPU_HW::CreateBuffers() GL_OBJECT_NAME(m_display_private_texture, "Display Texture"); GL_OBJECT_NAME(m_vram_readback_texture, "VRAM Readback Texture"); - // vram framebuffer has both colour and depth - if (!(m_vram_framebuffer = g_gpu_device->CreateFramebuffer(m_vram_texture.get(), m_vram_depth_texture.get())) || - !(m_vram_update_depth_framebuffer = g_gpu_device->CreateFramebuffer(m_vram_depth_texture.get())) || - !(m_vram_readback_framebuffer = g_gpu_device->CreateFramebuffer(m_vram_readback_texture.get())) || - !(m_display_framebuffer = g_gpu_device->CreateFramebuffer(m_display_private_texture.get()))) - { - return false; - } - - GL_OBJECT_NAME(m_vram_framebuffer, "VRAM Framebuffer"); - GL_OBJECT_NAME(m_vram_update_depth_framebuffer, "VRAM Update Depth Framebuffer"); - GL_OBJECT_NAME(m_vram_readback_framebuffer, "VRAM Readback Framebuffer"); - GL_OBJECT_NAME(m_display_framebuffer, "Display Framebuffer"); - if (!(m_vram_upload_buffer = g_gpu_device->CreateTextureBuffer(GPUTextureBuffer::Format::R16UI, GPUDevice::MIN_TEXEL_BUFFER_ELEMENTS))) { @@ -679,11 +665,9 @@ bool GPU_HW::CreateBuffers() GPUTexture::Type::Texture, VRAM_RT_FORMAT)) || !(m_downsample_render_texture = g_gpu_device->CreateTexture(texture_width, texture_height, 1, 1, 1, GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT)) || - !(m_downsample_framebuffer = g_gpu_device->CreateFramebuffer(m_downsample_render_texture.get())) || !(m_downsample_weight_texture = g_gpu_device->CreateTexture(texture_width >> (levels - 1), texture_height >> (levels - 1), 1, 1, 1, - GPUTexture::Type::RenderTarget, GPUTexture::Format::R8)) || - !(m_downsample_weight_framebuffer = g_gpu_device->CreateFramebuffer(m_downsample_weight_texture.get()))) + GPUTexture::Type::RenderTarget, GPUTexture::Format::R8))) { return false; } @@ -693,14 +677,13 @@ bool GPU_HW::CreateBuffers() const u32 downsample_scale = GetBoxDownsampleScale(m_resolution_scale); if (!(m_downsample_render_texture = g_gpu_device->CreateTexture(VRAM_WIDTH * downsample_scale, VRAM_HEIGHT * downsample_scale, 1, 1, 1, - GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT)) || - !(m_downsample_framebuffer = g_gpu_device->CreateFramebuffer(m_downsample_render_texture.get()))) + GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT))) { return false; } } - g_gpu_device->SetFramebuffer(m_vram_framebuffer.get()); + g_gpu_device->SetRenderTarget(m_vram_texture.get(), m_vram_depth_texture.get()); SetFullVRAMDirtyRectangle(); return true; } @@ -719,15 +702,9 @@ void GPU_HW::DestroyBuffers() ClearDisplayTexture(); m_vram_upload_buffer.reset(); - m_downsample_weight_framebuffer.reset(); m_downsample_weight_texture.reset(); - m_downsample_framebuffer.reset(); m_downsample_render_texture.reset(); m_downsample_texture.reset(); - m_display_framebuffer.reset(); - m_vram_readback_framebuffer.reset(); - m_vram_update_depth_framebuffer.reset(); - m_vram_framebuffer.reset(); m_vram_read_texture.reset(); m_vram_depth_texture.reset(); m_vram_texture.reset(); @@ -833,8 +810,7 @@ bool GPU_HW::CompilePipelines() plconfig.input_layout.vertex_stride = sizeof(BatchVertex); plconfig.rasterization = GPUPipeline::RasterizationState::GetNoCullState(); plconfig.primitive = GPUPipeline::Primitive::Triangles; - plconfig.color_format = VRAM_RT_FORMAT; - plconfig.depth_format = VRAM_DS_FORMAT; + plconfig.SetTargetFormats(VRAM_RT_FORMAT, VRAM_DS_FORMAT); plconfig.samples = m_multisamples; plconfig.per_sample_shading = m_per_sample_shading; plconfig.geometry_shader = nullptr; @@ -1080,8 +1056,7 @@ bool GPU_HW::CompilePipelines() return false; plconfig.fragment_shader = fs.get(); - plconfig.color_format = GPUTexture::Format::Unknown; - plconfig.depth_format = VRAM_DS_FORMAT; + plconfig.SetTargetFormats(GPUTexture::Format::Unknown, VRAM_DS_FORMAT); plconfig.depth = GPUPipeline::DepthState::GetAlwaysWriteState(); plconfig.blend.write_mask = 0; @@ -1093,8 +1068,7 @@ bool GPU_HW::CompilePipelines() progress.Increment(); } - plconfig.color_format = VRAM_RT_FORMAT; - plconfig.depth_format = GPUTexture::Format::Unknown; + plconfig.SetTargetFormats(VRAM_RT_FORMAT); plconfig.depth = GPUPipeline::DepthState::GetNoTestsState(); plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); plconfig.samples = 1; @@ -1181,7 +1155,7 @@ bool GPU_HW::CompilePipelines() return false; GL_OBJECT_NAME(fs, "Downsample Blur Pass Fragment Shader"); plconfig.fragment_shader = fs.get(); - plconfig.color_format = GPUTexture::Format::R8; + plconfig.SetTargetFormats(GPUTexture::Format::R8); if (!(m_downsample_blur_pass_pipeline = g_gpu_device->CreatePipeline(plconfig))) return false; GL_OBJECT_NAME(m_downsample_blur_pass_pipeline, "Downsample Blur Pass Pipeline"); @@ -1193,7 +1167,7 @@ bool GPU_HW::CompilePipelines() GL_OBJECT_NAME(fs, "Downsample Composite Pass Fragment Shader"); plconfig.layout = GPUPipeline::Layout::MultiTextureAndPushConstants; plconfig.fragment_shader = fs.get(); - plconfig.color_format = VRAM_RT_FORMAT; + plconfig.SetTargetFormats(VRAM_RT_FORMAT); if (!(m_downsample_composite_pass_pipeline = g_gpu_device->CreatePipeline(plconfig))) return false; GL_OBJECT_NAME(m_downsample_composite_pass_pipeline, "Downsample Blur Pass Pipeline"); @@ -1310,14 +1284,15 @@ void GPU_HW::UpdateDepthBufferFromMaskBit() // Viewport should already be set full, only need to fudge the scissor. g_gpu_device->SetScissor(0, 0, m_vram_texture->GetWidth(), m_vram_texture->GetHeight()); - g_gpu_device->SetFramebuffer(m_vram_update_depth_framebuffer.get()); + g_gpu_device->InvalidateRenderTarget(m_vram_depth_texture.get()); + g_gpu_device->SetRenderTargets(nullptr, 0, m_vram_depth_texture.get()); g_gpu_device->SetPipeline(m_vram_update_depth_pipeline.get()); g_gpu_device->SetTextureSampler(0, m_vram_texture.get(), g_gpu_device->GetNearestSampler()); g_gpu_device->Draw(3, 0); // Restore. g_gpu_device->SetTextureSampler(0, m_vram_read_texture.get(), g_gpu_device->GetNearestSampler()); - g_gpu_device->SetFramebuffer(m_vram_framebuffer.get()); + g_gpu_device->SetRenderTarget(m_vram_texture.get(), m_vram_depth_texture.get()); SetScissor(); } @@ -2061,7 +2036,7 @@ bool GPU_HW::BlitVRAMReplacementTexture(const TextureReplacementTexture* tex, u3 } } - g_gpu_device->SetFramebuffer(m_vram_framebuffer.get()); // TODO: needed? + g_gpu_device->SetRenderTarget(m_vram_texture.get(), m_vram_depth_texture.get()); // TODO: needed? g_gpu_device->SetTextureSampler(0, m_vram_replacement_texture.get(), g_gpu_device->GetLinearSampler()); g_gpu_device->SetPipeline(m_copy_pipeline.get()); g_gpu_device->SetViewportAndScissor(dst_x, dst_y, width, height); @@ -2364,7 +2339,7 @@ void GPU_HW::ReadVRAM(u32 x, u32 y, u32 width, u32 height) // Encode the 24-bit texture as 16-bit. const u32 uniforms[4] = {copy_rect.left, copy_rect.top, copy_rect.GetWidth(), copy_rect.GetHeight()}; - g_gpu_device->SetFramebuffer(m_vram_readback_framebuffer.get()); + g_gpu_device->SetRenderTarget(m_vram_readback_texture.get()); g_gpu_device->SetPipeline(m_vram_readback_pipeline.get()); g_gpu_device->SetTextureSampler(0, m_vram_texture.get(), g_gpu_device->GetNearestSampler()); g_gpu_device->SetViewportAndScissor(0, 0, encoded_width, encoded_height); @@ -2811,7 +2786,7 @@ void GPU_HW::UpdateDisplay() if (interlaced == InterlacedRenderMode::None) g_gpu_device->InvalidateRenderTarget(m_display_private_texture.get()); - g_gpu_device->SetFramebuffer(m_display_framebuffer.get()); + g_gpu_device->SetRenderTarget(m_display_private_texture.get()); g_gpu_device->SetPipeline( m_display_pipelines[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)][static_cast(interlaced)].get()); g_gpu_device->SetTextureSampler(0, m_vram_texture.get(), g_gpu_device->GetNearestSampler()); @@ -2883,7 +2858,7 @@ void GPU_HW::DownsampleFramebufferAdaptive(GPUTexture* source, u32 left, u32 top uniforms.lod = static_cast(level - 1); g_gpu_device->ClearRenderTarget(m_downsample_render_texture.get(), 0); - g_gpu_device->SetFramebuffer(m_downsample_framebuffer.get()); + g_gpu_device->SetRenderTarget(m_downsample_render_texture.get()); g_gpu_device->SetViewportAndScissor(0, 0, level_width, level_height); g_gpu_device->SetPipeline((level == 1) ? m_downsample_first_pass_pipeline.get() : m_downsample_mid_pass_pipeline.get()); @@ -2912,7 +2887,7 @@ void GPU_HW::DownsampleFramebufferAdaptive(GPUTexture* source, u32 left, u32 top m_downsample_render_texture->MakeReadyForSampling(); g_gpu_device->ClearRenderTarget(m_downsample_weight_texture.get(), 0); - g_gpu_device->SetFramebuffer(m_downsample_weight_framebuffer.get()); + g_gpu_device->SetRenderTarget(m_downsample_weight_texture.get()); g_gpu_device->SetTextureSampler(0, m_downsample_render_texture.get(), g_gpu_device->GetNearestSampler()); g_gpu_device->SetViewportAndScissor(0, 0, last_width, last_height); g_gpu_device->SetPipeline(m_downsample_blur_pass_pipeline.get()); @@ -2926,7 +2901,7 @@ void GPU_HW::DownsampleFramebufferAdaptive(GPUTexture* source, u32 left, u32 top GL_SCOPE("Composite"); g_gpu_device->ClearRenderTarget(m_downsample_render_texture.get(), 0); - g_gpu_device->SetFramebuffer(m_downsample_framebuffer.get()); + g_gpu_device->SetRenderTarget(m_downsample_render_texture.get()); g_gpu_device->SetTextureSampler(0, m_downsample_texture.get(), m_downsample_composite_sampler.get()); g_gpu_device->SetTextureSampler(1, m_downsample_weight_texture.get(), m_downsample_lod_sampler.get()); g_gpu_device->SetViewportAndScissor(0, 0, width, height); @@ -2953,7 +2928,7 @@ void GPU_HW::DownsampleFramebufferBoxFilter(GPUTexture* source, u32 left, u32 to source->MakeReadyForSampling(); g_gpu_device->ClearRenderTarget(m_downsample_render_texture.get(), 0); - g_gpu_device->SetFramebuffer(m_downsample_framebuffer.get()); + g_gpu_device->SetRenderTarget(m_downsample_render_texture.get()); g_gpu_device->SetPipeline(m_downsample_first_pass_pipeline.get()); g_gpu_device->SetTextureSampler(0, source, g_gpu_device->GetNearestSampler()); g_gpu_device->SetViewportAndScissor(ds_left, ds_top, ds_width, ds_height); diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index 4ab7943a4..64857c82c 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -213,11 +213,6 @@ private: std::unique_ptr m_vram_replacement_texture; std::unique_ptr m_display_private_texture; // TODO: Move to base. - std::unique_ptr m_vram_framebuffer; - std::unique_ptr m_vram_update_depth_framebuffer; - std::unique_ptr m_vram_readback_framebuffer; - std::unique_ptr m_display_framebuffer; - std::unique_ptr m_vram_upload_buffer; std::unique_ptr m_vram_write_texture; @@ -288,9 +283,7 @@ private: std::unique_ptr m_downsample_texture; std::unique_ptr m_downsample_render_texture; - std::unique_ptr m_downsample_framebuffer; std::unique_ptr m_downsample_weight_texture; - std::unique_ptr m_downsample_weight_framebuffer; std::unique_ptr m_downsample_first_pass_pipeline; std::unique_ptr m_downsample_mid_pass_pipeline; std::unique_ptr m_downsample_blur_pass_pipeline; diff --git a/src/core/shader_cache_version.h b/src/core/shader_cache_version.h index a867c1459..29437323a 100644 --- a/src/core/shader_cache_version.h +++ b/src/core/shader_cache_version.h @@ -4,4 +4,4 @@ #pragma once #include "common/types.h" -static constexpr u32 SHADER_CACHE_VERSION = 10; \ No newline at end of file +static constexpr u32 SHADER_CACHE_VERSION = 11; \ No newline at end of file diff --git a/src/util/CMakeLists.txt b/src/util/CMakeLists.txt index 22034bf24..35c765fad 100644 --- a/src/util/CMakeLists.txt +++ b/src/util/CMakeLists.txt @@ -23,6 +23,7 @@ add_library(util cue_parser.h gpu_device.cpp gpu_device.h + gpu_framebuffer_manager.h gpu_shader_cache.cpp gpu_shader_cache.h gpu_texture.cpp @@ -72,7 +73,7 @@ target_precompile_headers(util PRIVATE "pch.h") target_include_directories(util PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/..") target_include_directories(util PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/..") target_link_libraries(util PUBLIC common simpleini imgui) -target_link_libraries(util PRIVATE stb libchdr zlib soundtouch Zstd::Zstd reshadefx) +target_link_libraries(util PRIVATE stb libchdr zlib soundtouch xxhash Zstd::Zstd reshadefx) if(ENABLE_CUBEB) target_sources(util PRIVATE diff --git a/src/util/d3d11_device.cpp b/src/util/d3d11_device.cpp index 137bf56ee..460aa0445 100644 --- a/src/util/d3d11_device.cpp +++ b/src/util/d3d11_device.cpp @@ -536,24 +536,35 @@ void D3D11Device::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u3 src11->GetD3DTexture(), 0, dst11->GetDXGIFormat()); } +bool D3D11Device::IsRenderTargetBound(const GPUTexture* tex) const +{ + for (u32 i = 0; i < m_num_current_render_targets; i++) + { + if (m_current_render_targets[i] == tex) + return true; + } + + return false; +} + void D3D11Device::ClearRenderTarget(GPUTexture* t, u32 c) { GPUDevice::ClearRenderTarget(t, c); - if (m_current_framebuffer && m_current_framebuffer->GetRT() == t) + if (IsRenderTargetBound(t)) static_cast(t)->CommitClear(m_context.Get()); } void D3D11Device::ClearDepth(GPUTexture* t, float d) { GPUDevice::ClearDepth(t, d); - if (m_current_framebuffer && m_current_framebuffer->GetDS() == t) + if (m_current_depth_target == t) static_cast(t)->CommitClear(m_context.Get()); } void D3D11Device::InvalidateRenderTarget(GPUTexture* t) { GPUDevice::InvalidateRenderTarget(t); - if (m_current_framebuffer && (m_current_framebuffer->GetRT() == t || m_current_framebuffer->GetDS() == t)) + if (t->IsRenderTarget() ? IsRenderTargetBound(t) : (m_current_depth_target == t)) static_cast(t)->CommitClear(m_context.Get()); } @@ -613,13 +624,15 @@ bool D3D11Device::BeginPresent(bool skip_present) static constexpr float clear_color[4] = {0.0f, 0.0f, 0.0f, 1.0f}; m_context->ClearRenderTargetView(m_swap_chain_rtv.Get(), clear_color); m_context->OMSetRenderTargets(1, m_swap_chain_rtv.GetAddressOf(), nullptr); - m_current_framebuffer = nullptr; + m_num_current_render_targets = 0; + std::memset(m_current_render_targets.data(), 0, sizeof(m_current_render_targets)); + m_current_depth_target = nullptr; return true; } void D3D11Device::EndPresent() { - DebugAssert(!m_current_framebuffer); + DebugAssert(m_num_current_render_targets == 0 && !m_current_depth_target); if (!m_vsync_enabled && m_gpu_timing_enabled) PopTimestampQuery(); @@ -873,34 +886,17 @@ void D3D11Device::UnmapUniformBuffer(u32 size) m_context->PSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants); } -void D3D11Device::SetFramebuffer(GPUFramebuffer* fb) +void D3D11Device::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds) { - if (m_current_framebuffer == fb) - return; + ID3D11RenderTargetView* rtvs[MAX_RENDER_TARGETS]; - m_current_framebuffer = static_cast(fb); - if (!m_current_framebuffer) - { - m_context->OMSetRenderTargets(0, nullptr, nullptr); - return; - } + bool changed = (m_num_current_render_targets != num_rts || m_current_depth_target != ds); + m_current_depth_target = static_cast(ds); // Make sure textures aren't bound. - if (D3D11Texture* rt = static_cast(fb->GetRT()); rt) + if (ds) { - const ID3D11ShaderResourceView* srv = rt->GetD3DSRV(); - for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++) - { - if (m_current_textures[i] == srv) - { - m_current_textures[i] = nullptr; - m_context->PSSetShaderResources(i, 1, &m_current_textures[i]); - } - } - } - if (D3D11Texture* ds = static_cast(fb->GetDS()); ds) - { - const ID3D11ShaderResourceView* srv = ds->GetD3DSRV(); + const ID3D11ShaderResourceView* srv = static_cast(ds)->GetD3DSRV(); for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++) { if (m_current_textures[i] == srv) @@ -911,18 +907,31 @@ void D3D11Device::SetFramebuffer(GPUFramebuffer* fb) } } - m_current_framebuffer->CommitClear(m_context.Get()); - m_context->OMSetRenderTargets(m_current_framebuffer->GetNumRTVs(), m_current_framebuffer->GetRTVArray(), - m_current_framebuffer->GetDSV()); -} + for (u32 i = 0; i < num_rts; i++) + { + D3D11Texture* const dt = static_cast(rts[i]); + changed |= m_current_render_targets[i] != dt; + m_current_render_targets[i] = dt; + rtvs[i] = dt->GetD3DRTV(); + dt->CommitClear(m_context.Get()); -void D3D11Device::UnbindFramebuffer(D3D11Framebuffer* fb) -{ - if (m_current_framebuffer != fb) + const ID3D11ShaderResourceView* srv = dt->GetD3DSRV(); + for (u32 j = 0; j < MAX_TEXTURE_SAMPLERS; j++) + { + if (m_current_textures[j] == srv) + { + m_current_textures[j] = nullptr; + m_context->PSSetShaderResources(j, 1, &m_current_textures[j]); + } + } + } + for (u32 i = num_rts; i < m_num_current_render_targets; i++) + m_current_render_targets[i] = nullptr; + m_num_current_render_targets = num_rts; + if (!changed) return; - m_current_framebuffer = nullptr; - m_context->OMSetRenderTargets(0, nullptr, nullptr); + m_context->OMSetRenderTargets(num_rts, rtvs, ds ? static_cast(ds)->GetD3DDSV() : nullptr); } void D3D11Device::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) @@ -931,8 +940,7 @@ void D3D11Device::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* s ID3D11SamplerState* S = sampler ? static_cast(sampler)->GetSamplerState() : nullptr; // Runtime will null these if we don't... - DebugAssert(!m_current_framebuffer || !texture || - (m_current_framebuffer->GetRT() != texture && m_current_framebuffer->GetDS() != texture)); + DebugAssert(!texture || !IsRenderTargetBound(texture) || m_current_depth_target != texture); if (m_current_textures[slot] != T) { @@ -970,8 +978,23 @@ void D3D11Device::UnbindTexture(D3D11Texture* tex) } } - if (m_current_framebuffer && m_current_framebuffer->GetRT() == tex) - SetFramebuffer(nullptr); + if (tex->IsRenderTarget()) + { + for (u32 i = 0; i < m_num_current_render_targets; i++) + { + if (m_current_render_targets[i] == tex) + { + Log_WarningPrint("Unbinding current RT"); + SetRenderTargets(nullptr, 0, m_current_depth_target); + break; + } + } + } + else if (m_current_depth_target == tex) + { + Log_WarningPrint("Unbinding current DS"); + SetRenderTargets(nullptr, 0, nullptr); + } } void D3D11Device::SetViewport(s32 x, s32 y, s32 width, s32 height) diff --git a/src/util/d3d11_device.h b/src/util/d3d11_device.h index 67b12364a..d41c3deb3 100644 --- a/src/util/d3d11_device.h +++ b/src/util/d3d11_device.h @@ -17,7 +17,6 @@ #include #include -class D3D11Framebuffer; class D3D11Pipeline; class D3D11Shader; class D3D11Texture; @@ -66,8 +65,6 @@ public: void ClearDepth(GPUTexture* t, float d) override; void InvalidateRenderTarget(GPUTexture* t) override; - std::unique_ptr CreateFramebuffer(GPUTexture* rt_or_ds, GPUTexture* ds = nullptr) override; - std::unique_ptr CreateShaderFromBinary(GPUShaderStage stage, std::span data) override; std::unique_ptr CreateShaderFromSource(GPUShaderStage stage, const std::string_view& source, const char* entry_point, DynamicHeapArray* binary) override; @@ -85,7 +82,7 @@ public: void PushUniformBuffer(const void* data, u32 data_size) override; void* MapUniformBuffer(u32 size) override; void UnmapUniformBuffer(u32 size) override; - void SetFramebuffer(GPUFramebuffer* fb) override; + void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds) override; void SetPipeline(GPUPipeline* pipeline) override; void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override; void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override; @@ -104,14 +101,14 @@ public: bool BeginPresent(bool skip_present) override; void EndPresent() override; - void UnbindFramebuffer(D3D11Framebuffer* fb); void UnbindPipeline(D3D11Pipeline* pl); void UnbindTexture(D3D11Texture* tex); static AdapterAndModeList StaticGetAdapterAndModeList(); protected: - bool CreateDevice(const std::string_view& adapter, bool threaded_presentation, FeatureMask disabled_features) override; + bool CreateDevice(const std::string_view& adapter, bool threaded_presentation, + FeatureMask disabled_features) override; void DestroyDevice() override; private: @@ -141,6 +138,8 @@ private: bool CreateBuffers(); void DestroyBuffers(); + bool IsRenderTargetBound(const GPUTexture* tex) const; + ComPtr GetRasterizationState(const GPUPipeline::RasterizationState& rs); ComPtr GetDepthState(const GPUPipeline::DepthState& ds); ComPtr GetBlendState(const GPUPipeline::BlendState& bs); @@ -178,8 +177,10 @@ private: D3D11StreamBuffer m_index_buffer; D3D11StreamBuffer m_uniform_buffer; - D3D11Framebuffer* m_current_framebuffer = nullptr; D3D11Pipeline* m_current_pipeline = nullptr; + std::array m_current_render_targets = {}; + u32 m_num_current_render_targets = 0; + D3D11Texture* m_current_depth_target = nullptr; ID3D11InputLayout* m_current_input_layout = nullptr; ID3D11VertexShader* m_current_vertex_shader = nullptr; diff --git a/src/util/d3d11_texture.cpp b/src/util/d3d11_texture.cpp index f6ec3f63b..e5b280e3d 100644 --- a/src/util/d3d11_texture.cpp +++ b/src/util/d3d11_texture.cpp @@ -93,73 +93,6 @@ bool D3D11Device::SupportsTextureFormat(GPUTexture::Format format) const return (SUCCEEDED(m_device->CheckFormatSupport(dfmt, &support)) && ((support & required) == required)); } -D3D11Framebuffer::D3D11Framebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height, - ComPtr rtv, ComPtr dsv) - : GPUFramebuffer(rt, ds, width, height), m_rtv(std::move(rtv)), m_dsv(std::move(dsv)) -{ -} - -D3D11Framebuffer::~D3D11Framebuffer() -{ - D3D11Device::GetInstance().UnbindFramebuffer(this); -} - -void D3D11Framebuffer::SetDebugName(const std::string_view& name) -{ - if (m_rtv) - SetD3DDebugObjectName(m_rtv.Get(), fmt::format("{} RTV", name)); - if (m_dsv) - SetD3DDebugObjectName(m_dsv.Get(), fmt::format("{} DSV", name)); -} - -void D3D11Framebuffer::CommitClear(ID3D11DeviceContext1* context) -{ - if (m_rt && m_rt->GetState() != GPUTexture::State::Dirty) [[unlikely]] - { - if (m_rt->GetState() == GPUTexture::State::Invalidated) - context->DiscardView(m_rtv.Get()); - else - context->ClearRenderTargetView(m_rtv.Get(), m_rt->GetUNormClearColor().data()); - - m_rt->SetState(GPUTexture::State::Dirty); - } - - if (m_ds && m_ds->GetState() != GPUTexture::State::Dirty) [[unlikely]] - { - if (m_ds->GetState() == GPUTexture::State::Invalidated) - context->DiscardView(m_dsv.Get()); - else - context->ClearDepthStencilView(m_dsv.Get(), D3D11_CLEAR_DEPTH, m_ds->GetClearDepth(), 0); - - m_ds->SetState(GPUTexture::State::Dirty); - } -} - -std::unique_ptr D3D11Device::CreateFramebuffer(GPUTexture* rt_or_ds, GPUTexture* ds) -{ - DebugAssert((rt_or_ds || ds) && (!rt_or_ds || rt_or_ds->IsRenderTarget() || (rt_or_ds->IsDepthStencil() && !ds))); - D3D11Texture* RT = static_cast((rt_or_ds && rt_or_ds->IsDepthStencil()) ? nullptr : rt_or_ds); - D3D11Texture* DS = static_cast((rt_or_ds && rt_or_ds->IsDepthStencil()) ? rt_or_ds : ds); - - ComPtr rtv; - if (RT) - { - rtv = RT->GetD3DRTV(); - Assert(rtv); - } - - ComPtr dsv; - if (DS) - { - dsv = DS->GetD3DDSV(); - Assert(dsv); - } - - return std::unique_ptr(new D3D11Framebuffer(RT, DS, RT ? RT->GetWidth() : DS->GetWidth(), - RT ? RT->GetHeight() : DS->GetHeight(), std::move(rtv), - std::move(dsv))); -} - D3D11Sampler::D3D11Sampler(ComPtr ss) : m_ss(std::move(ss)) { } diff --git a/src/util/d3d11_texture.h b/src/util/d3d11_texture.h index a93c614f8..97c8c025a 100644 --- a/src/util/d3d11_texture.h +++ b/src/util/d3d11_texture.h @@ -13,32 +13,6 @@ class D3D11Device; -class D3D11Framebuffer final : public GPUFramebuffer -{ - friend D3D11Device; - - template - using ComPtr = Microsoft::WRL::ComPtr; - -public: - ~D3D11Framebuffer() override; - - ALWAYS_INLINE u32 GetNumRTVs() const { return m_rtv ? 1 : 0; } - ALWAYS_INLINE ID3D11RenderTargetView* GetRTV() const { return m_rtv.Get(); } - ALWAYS_INLINE ID3D11RenderTargetView* const* GetRTVArray() const { return m_rtv.GetAddressOf(); } - ALWAYS_INLINE ID3D11DepthStencilView* GetDSV() const { return m_dsv.Get(); } - - void SetDebugName(const std::string_view& name) override; - void CommitClear(ID3D11DeviceContext1* context); - -private: - D3D11Framebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height, ComPtr rtv, - ComPtr dsv); - - ComPtr m_rtv; - ComPtr m_dsv; -}; - class D3D11Sampler final : public GPUSampler { friend D3D11Device; diff --git a/src/util/d3d12_device.cpp b/src/util/d3d12_device.cpp index 508ca998d..c51c5b53e 100644 --- a/src/util/d3d12_device.cpp +++ b/src/util/d3d12_device.cpp @@ -570,12 +570,9 @@ void D3D12Device::SubmitCommandListAndRestartRenderPass(const char* reason) if (InRenderPass()) EndRenderPass(); - D3D12Framebuffer* fb = m_current_framebuffer; D3D12Pipeline* pl = m_current_pipeline; SubmitCommandList(false, "%s", reason); - if (fb) - SetFramebuffer(fb); SetPipeline(pl); BeginRenderPass(); } @@ -1091,7 +1088,7 @@ bool D3D12Device::BeginPresent(bool frame_skip) void D3D12Device::EndPresent() { - DebugAssert(InRenderPass() && !m_current_framebuffer); + DebugAssert(InRenderPass() && m_num_current_render_targets == 0 && !m_current_depth_target); EndRenderPass(); const auto& swap_chain_buf = m_swap_chain_buffers[m_current_swap_chain_buffer]; @@ -1316,25 +1313,22 @@ void D3D12Device::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u3 void D3D12Device::ClearRenderTarget(GPUTexture* t, u32 c) { GPUDevice::ClearRenderTarget(t, c); - if (InRenderPass() && m_current_framebuffer && m_current_framebuffer->GetRT() == t) + if (InRenderPass() && IsRenderTargetBound(t)) EndRenderPass(); } void D3D12Device::ClearDepth(GPUTexture* t, float d) { GPUDevice::ClearDepth(t, d); - if (InRenderPass() && m_current_framebuffer && m_current_framebuffer->GetDS() == t) + if (InRenderPass() && m_current_depth_target == t) EndRenderPass(); } void D3D12Device::InvalidateRenderTarget(GPUTexture* t) { GPUDevice::InvalidateRenderTarget(t); - if (InRenderPass() && m_current_framebuffer && - (m_current_framebuffer->GetRT() == t || m_current_framebuffer->GetDS() == t)) - { + if (InRenderPass() && (t->IsRenderTarget() ? IsRenderTargetBound(t) : (m_current_depth_target == t))) EndRenderPass(); - } } bool D3D12Device::CreateBuffers() @@ -1530,60 +1524,71 @@ void D3D12Device::DestroyRootSignatures() it->Reset(); } -void D3D12Device::SetFramebuffer(GPUFramebuffer* fb) +void D3D12Device::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds) { - if (m_current_framebuffer == fb) - return; - if (InRenderPass()) EndRenderPass(); - m_current_framebuffer = static_cast(fb); + ID3D12GraphicsCommandList4* cmdlist = GetCommandList(); + + m_current_depth_target = static_cast(ds); + for (u32 i = 0; i < num_rts; i++) + { + D3D12Texture* const dt = static_cast(rts[i]); + m_current_render_targets[i] = dt; + dt->CommitClear(cmdlist); + } + for (u32 i = num_rts; i < m_num_current_render_targets; i++) + m_current_render_targets[i] = nullptr; + m_num_current_render_targets = num_rts; } void D3D12Device::BeginRenderPass() { DebugAssert(!InRenderPass()); - D3D12_RENDER_PASS_RENDER_TARGET_DESC rt_desc; + std::array rt_desc; D3D12_RENDER_PASS_DEPTH_STENCIL_DESC ds_desc; - const D3D12_RENDER_PASS_RENDER_TARGET_DESC* rt_desc_p = nullptr; - const D3D12_RENDER_PASS_DEPTH_STENCIL_DESC* ds_desc_p = nullptr; + + D3D12_RENDER_PASS_RENDER_TARGET_DESC* rt_desc_p = nullptr; + D3D12_RENDER_PASS_DEPTH_STENCIL_DESC* ds_desc_p = nullptr; + u32 num_rt_descs = 0; ID3D12GraphicsCommandList4* cmdlist = GetCommandList(); - if (m_current_framebuffer) [[likely]] + if (m_num_current_render_targets > 0 || m_current_depth_target) [[likely]] { - D3D12Texture* rt = static_cast(m_current_framebuffer->GetRT()); - if (rt) + for (u32 i = 0; i < m_num_current_render_targets; i++) { + D3D12Texture* const rt = m_current_render_targets[i]; rt->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_RENDER_TARGET); rt->SetUseFenceValue(GetCurrentFenceValue()); - rt_desc_p = &rt_desc; - rt_desc.cpuDescriptor = rt->GetWriteDescriptor(); - rt_desc.EndingAccess.Type = D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE; + + D3D12_RENDER_PASS_RENDER_TARGET_DESC& desc = rt_desc[i]; + desc.cpuDescriptor = rt->GetWriteDescriptor(); + desc.EndingAccess.Type = D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE; switch (rt->GetState()) { case GPUTexture::State::Cleared: { - rt_desc.BeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_CLEAR; - std::memcpy(rt_desc.BeginningAccess.Clear.ClearValue.Color, rt->GetUNormClearColor().data(), - sizeof(rt_desc.BeginningAccess.Clear.ClearValue.Color)); + desc.BeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_CLEAR; + std::memcpy(desc.BeginningAccess.Clear.ClearValue.Color, rt->GetUNormClearColor().data(), + sizeof(desc.BeginningAccess.Clear.ClearValue.Color)); rt->SetState(GPUTexture::State::Dirty); } break; case GPUTexture::State::Invalidated: { - rt_desc.BeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_DISCARD; + desc.BeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_DISCARD; rt->SetState(GPUTexture::State::Dirty); } break; case GPUTexture::State::Dirty: { - rt_desc.BeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_PRESERVE; + desc.BeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_PRESERVE; } break; @@ -1593,9 +1598,11 @@ void D3D12Device::BeginRenderPass() } } - D3D12Texture* ds = static_cast(m_current_framebuffer->GetDS()); - if (ds) + rt_desc_p = (m_num_current_render_targets > 0) ? rt_desc.data() : nullptr; + num_rt_descs = m_num_current_render_targets; + if (m_current_depth_target) { + D3D12Texture* const ds = m_current_depth_target; ds->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_DEPTH_WRITE); ds->SetUseFenceValue(GetCurrentFenceValue()); ds_desc_p = &ds_desc; @@ -1631,16 +1638,19 @@ void D3D12Device::BeginRenderPass() UnreachableCode(); break; } + + ds_desc_p = &ds_desc; } } else { // Re-rendering to swap chain. const auto& swap_chain_buf = m_swap_chain_buffers[m_current_swap_chain_buffer]; - rt_desc = {swap_chain_buf.second, - {D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_PRESERVE, {}}, - {D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE, {}}}; - rt_desc_p = &rt_desc; + rt_desc[0] = {swap_chain_buf.second, + {D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_PRESERVE, {}}, + {D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE, {}}}; + rt_desc_p = &rt_desc[0]; + num_rt_descs = 1; } // All textures should be in shader read only optimal already, but just in case.. @@ -1652,7 +1662,7 @@ void D3D12Device::BeginRenderPass() } DebugAssert(rt_desc_p || ds_desc_p); - cmdlist->BeginRenderPass(rt_desc_p ? 1 : 0, rt_desc_p, ds_desc_p, D3D12_RENDER_PASS_FLAG_NONE); + cmdlist->BeginRenderPass(num_rt_descs, rt_desc_p, ds_desc_p, D3D12_RENDER_PASS_FLAG_NONE); // TODO: Stats m_in_render_pass = true; @@ -1686,7 +1696,9 @@ void D3D12Device::BeginSwapChainRenderPass() {D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE, {}}}; cmdlist->BeginRenderPass(1, &rt_desc, nullptr, D3D12_RENDER_PASS_FLAG_NONE); - m_current_framebuffer = nullptr; + std::memset(m_current_render_targets.data(), 0, sizeof(m_current_render_targets)); + m_num_current_render_targets = 0; + m_current_depth_target = nullptr; m_in_render_pass = true; // Clear pipeline, it's likely incompatible. @@ -1708,29 +1720,6 @@ void D3D12Device::EndRenderPass() GetCommandList()->EndRenderPass(); } -void D3D12Device::UnbindFramebuffer(D3D12Framebuffer* fb) -{ - if (m_current_framebuffer != fb) - return; - - if (InRenderPass()) - EndRenderPass(); - m_current_framebuffer = nullptr; -} - -void D3D12Device::UnbindFramebuffer(D3D12Texture* tex) -{ - if (!m_current_framebuffer) - return; - - if (m_current_framebuffer->GetRT() != tex && m_current_framebuffer->GetDS() != tex) - return; - - if (InRenderPass()) - EndRenderPass(); - m_current_framebuffer = nullptr; -} - void D3D12Device::SetPipeline(GPUPipeline* pipeline) { // First draw? Bind everything. @@ -1789,11 +1778,21 @@ void D3D12Device::UnbindPipeline(D3D12Pipeline* pl) m_current_pipeline = nullptr; } +bool D3D12Device::IsRenderTargetBound(const GPUTexture* tex) const +{ + for (u32 i = 0; i < m_num_current_render_targets; i++) + { + if (m_current_render_targets[i] == tex) + return true; + } + + return false; +} + void D3D12Device::InvalidateCachedState() { m_dirty_flags = ALL_DIRTY_STATE; m_in_render_pass = false; - m_current_framebuffer = nullptr; m_current_pipeline = nullptr; m_current_vertex_stride = 0; m_current_blend_constant = 0; @@ -1901,6 +1900,28 @@ void D3D12Device::UnbindTexture(D3D12Texture* tex) m_dirty_flags |= DIRTY_FLAG_TEXTURES; } } + + if (tex->IsRenderTarget()) + { + for (u32 i = 0; i < m_num_current_render_targets; i++) + { + if (m_current_render_targets[i] == tex) + { + if (InRenderPass()) + EndRenderPass(); + m_current_render_targets[i] = nullptr; + } + } + } + else if (tex->IsDepthStencil()) + { + if (m_current_depth_target == tex) + { + if (InRenderPass()) + EndRenderPass(); + m_current_depth_target = nullptr; + } + } } void D3D12Device::UnbindTextureBuffer(D3D12TextureBuffer* buf) diff --git a/src/util/d3d12_device.h b/src/util/d3d12_device.h index d64ca4641..177ccd5cb 100644 --- a/src/util/d3d12_device.h +++ b/src/util/d3d12_device.h @@ -86,8 +86,6 @@ public: void ClearDepth(GPUTexture* t, float d) override; void InvalidateRenderTarget(GPUTexture* t) override; - std::unique_ptr CreateFramebuffer(GPUTexture* rt_or_ds, GPUTexture* ds = nullptr) override; - std::unique_ptr CreateShaderFromBinary(GPUShaderStage stage, std::span data) override; std::unique_ptr CreateShaderFromSource(GPUShaderStage stage, const std::string_view& source, const char* entry_point, DynamicHeapArray* out_binary) override; @@ -105,7 +103,7 @@ public: void PushUniformBuffer(const void* data, u32 data_size) override; void* MapUniformBuffer(u32 size) override; void UnmapUniformBuffer(u32 size) override; - void SetFramebuffer(GPUFramebuffer* fb) override; + void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds) override; void SetPipeline(GPUPipeline* pipeline) override; void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override; void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override; @@ -172,8 +170,6 @@ public: void SubmitCommandList(bool wait_for_completion, const char* reason, ...); void SubmitCommandListAndRestartRenderPass(const char* reason); - void UnbindFramebuffer(D3D12Framebuffer* fb); - void UnbindFramebuffer(D3D12Texture* tex); void UnbindPipeline(D3D12Pipeline* pl); void UnbindTexture(D3D12Texture* tex); void UnbindTextureBuffer(D3D12TextureBuffer* buf); @@ -246,6 +242,8 @@ private: bool CreateDSVDescriptor(ID3D12Resource* resource, u32 samples, DXGI_FORMAT format, D3D12DescriptorHandle* dh); bool CreateUAVDescriptor(ID3D12Resource* resource, u32 samples, DXGI_FORMAT format, D3D12DescriptorHandle* dh); + bool IsRenderTargetBound(const GPUTexture* tex) const; + bool CheckDownloadBufferSize(u32 required_size); void DestroyDownloadBuffer(); @@ -330,10 +328,11 @@ private: // Which bindings/state has to be updated before the next draw. u32 m_dirty_flags = ALL_DIRTY_STATE; - D3D12Framebuffer* m_current_framebuffer = nullptr; - D3D12Pipeline* m_current_pipeline = nullptr; D3D12_PRIMITIVE_TOPOLOGY m_current_topology = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED; + u32 m_num_current_render_targets = 0; + std::array m_current_render_targets = {}; + D3D12Texture* m_current_depth_target = nullptr; u32 m_current_vertex_stride = 0; u32 m_current_blend_constant = 0; GPUPipeline::Layout m_current_pipeline_layout = GPUPipeline::Layout::SingleTextureAndPushConstants; diff --git a/src/util/d3d12_pipeline.cpp b/src/util/d3d12_pipeline.cpp index f5ba720a4..318afb564 100644 --- a/src/util/d3d12_pipeline.cpp +++ b/src/util/d3d12_pipeline.cpp @@ -88,7 +88,7 @@ std::string D3D12Pipeline::GetPipelineName(const GraphicsConfig& config) hash.Update(shader->GetBytecodeData(), shader->GetBytecodeSize()); if (const D3D12Shader* shader = static_cast(config.geometry_shader)) hash.Update(shader->GetBytecodeData(), shader->GetBytecodeSize()); - hash.Update(&config.color_format, sizeof(config.color_format)); + hash.Update(&config.color_formats, sizeof(config.color_formats)); hash.Update(&config.depth_format, sizeof(config.depth_format)); hash.Update(&config.samples, sizeof(config.samples)); hash.Update(&config.per_sample_shading, sizeof(config.per_sample_shading)); @@ -212,8 +212,11 @@ std::unique_ptr D3D12Device::CreatePipeline(const GPUPipeline::Grap blend_mapping[static_cast(config.blend.dst_alpha_blend.GetValue())], op_mapping[static_cast(config.blend.alpha_blend_op.GetValue())], config.blend.write_mask); - if (config.color_format != GPUTexture::Format::Unknown) - gpb.SetRenderTarget(0, D3DCommon::GetFormatMapping(config.color_format).rtv_format); + for (u32 i = 0; i < MAX_RENDER_TARGETS; i++) + { + if (config.color_formats[i] != GPUTexture::Format::Unknown) + gpb.SetRenderTarget(i, D3DCommon::GetFormatMapping(config.color_formats[i]).rtv_format); + } if (config.depth_format != GPUTexture::Format::Unknown) gpb.SetDepthStencilFormat(D3DCommon::GetFormatMapping(config.depth_format).dsv_format); diff --git a/src/util/d3d12_texture.cpp b/src/util/d3d12_texture.cpp index 59310e7a8..3528adcef 100644 --- a/src/util/d3d12_texture.cpp +++ b/src/util/d3d12_texture.cpp @@ -847,43 +847,6 @@ std::unique_ptr D3D12Device::CreateSampler(const GPUSampler::Config& return std::unique_ptr(new D3D12Sampler(std::move(handle))); } -D3D12Framebuffer::D3D12Framebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height, D3D12DescriptorHandle rtv, - D3D12DescriptorHandle dsv) - : GPUFramebuffer(rt, ds, width, height), m_rtv(std::move(rtv)), m_dsv(std::move(dsv)) -{ -} - -D3D12Framebuffer::~D3D12Framebuffer() -{ - D3D12Device& dev = D3D12Device::GetInstance(); - if (m_rtv) - D3D12Device::GetInstance().DeferDescriptorDestruction(dev.GetRTVHeapManager(), &m_rtv); - if (m_dsv) - D3D12Device::GetInstance().DeferDescriptorDestruction(dev.GetDSVHeapManager(), &m_dsv); -} - -void D3D12Framebuffer::SetDebugName(const std::string_view& name) -{ -} - -std::unique_ptr D3D12Device::CreateFramebuffer(GPUTexture* rt_or_ds, GPUTexture* ds /*= nullptr*/) -{ - DebugAssert((rt_or_ds || ds) && (!rt_or_ds || rt_or_ds->IsRenderTarget() || (rt_or_ds->IsDepthStencil() && !ds))); - D3D12Texture* RT = static_cast((rt_or_ds && rt_or_ds->IsDepthStencil()) ? nullptr : rt_or_ds); - D3D12Texture* DS = static_cast((rt_or_ds && rt_or_ds->IsDepthStencil()) ? rt_or_ds : ds); - - const u32 width = RT ? RT->GetWidth() : DS->GetWidth(); - const u32 height = RT ? RT->GetHeight() : DS->GetHeight(); - - D3D12DescriptorHandle rtv, dsv; - if (RT) - rtv = RT->GetWriteDescriptor(); - if (DS) - dsv = DS->GetWriteDescriptor(); - - return std::unique_ptr(new D3D12Framebuffer(RT, DS, width, height, std::move(rtv), std::move(dsv))); -} - D3D12TextureBuffer::D3D12TextureBuffer(Format format, u32 size_in_elements) : GPUTextureBuffer(format, size_in_elements) { } diff --git a/src/util/d3d12_texture.h b/src/util/d3d12_texture.h index a5183117f..b170d126c 100644 --- a/src/util/d3d12_texture.h +++ b/src/util/d3d12_texture.h @@ -124,26 +124,6 @@ private: D3D12DescriptorHandle m_descriptor; }; -class D3D12Framebuffer final : public GPUFramebuffer -{ - friend D3D12Device; - -public: - ~D3D12Framebuffer() override; - - ALWAYS_INLINE const D3D12DescriptorHandle& GetRTV() const { return m_rtv; } - ALWAYS_INLINE const D3D12DescriptorHandle& GetDSV() const { return m_dsv; } - - void SetDebugName(const std::string_view& name) override; - -private: - D3D12Framebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height, D3D12DescriptorHandle rtv, - D3D12DescriptorHandle dsv); - - D3D12DescriptorHandle m_rtv; - D3D12DescriptorHandle m_dsv; -}; - class D3D12TextureBuffer final : public GPUTextureBuffer { friend D3D12Device; diff --git a/src/util/gpu_device.cpp b/src/util/gpu_device.cpp index 80f66044c..ac47d96fa 100644 --- a/src/util/gpu_device.cpp +++ b/src/util/gpu_device.cpp @@ -4,6 +4,7 @@ #include "gpu_device.h" #include "core/host.h" // TODO: Remove, needed for getting fullscreen mode. #include "core/settings.h" // TODO: Remove, needed for dump directory. +#include "gpu_framebuffer_manager.h" #include "shadergen.h" #include "common/assert.h" @@ -15,6 +16,7 @@ #include "fmt/format.h" #include "imgui.h" +#include "xxhash.h" Log_SetChannel(GPUDevice); @@ -37,13 +39,6 @@ std::unique_ptr g_gpu_device; static std::string s_pipeline_cache_path; -GPUFramebuffer::GPUFramebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height) - : m_rt(rt), m_ds(ds), m_width(width), m_height(height) -{ -} - -GPUFramebuffer::~GPUFramebuffer() = default; - GPUSampler::GPUSampler() = default; GPUSampler::~GPUSampler() = default; @@ -156,6 +151,15 @@ GPUPipeline::BlendState GPUPipeline::BlendState::GetAlphaBlendingState() return ret; } +void GPUPipeline::GraphicsConfig::SetTargetFormats(GPUTexture::Format color_format, + GPUTexture::Format depth_format_ /* = GPUTexture::Format::Unknown */) +{ + color_formats[0] = color_format; + for (size_t i = 1; i < std::size(color_formats); i++) + color_formats[i] = GPUTexture::Format::Unknown; + depth_format = depth_format_; +} + GPUTextureBuffer::GPUTextureBuffer(Format format, u32 size) : m_format(format), m_size_in_elements(size) { } @@ -171,6 +175,35 @@ u32 GPUTextureBuffer::GetElementSize(Format format) return element_size[static_cast(format)]; } +bool GPUFramebufferManagerBase::Key::operator==(const Key& rhs) const +{ + return (std::memcmp(this, &rhs, sizeof(*this)) == 0); +} + +bool GPUFramebufferManagerBase::Key::operator!=(const Key& rhs) const +{ + return (std::memcmp(this, &rhs, sizeof(*this)) != 0); +} + +bool GPUFramebufferManagerBase::Key::ContainsRT(const GPUTexture* tex) const +{ + // num_rts is worse for predictability. + for (u32 i = 0; i < GPUDevice::MAX_RENDER_TARGETS; i++) + { + if (rts[i] == tex) + return true; + } + return false; +} + +size_t GPUFramebufferManagerBase::KeyHash::operator()(const Key& key) const +{ + if constexpr (sizeof(void*) == 8) + return XXH3_64bits(&key, sizeof(key)); + else + return XXH32(&key, sizeof(key), 0x1337); +} + GPUDevice::~GPUDevice() = default; RenderAPI GPUDevice::GetPreferredAPI() @@ -438,8 +471,7 @@ bool GPUDevice::CreateResources() plconfig.depth = GPUPipeline::DepthState::GetNoTestsState(); plconfig.blend = GPUPipeline::BlendState::GetAlphaBlendingState(); plconfig.blend.write_mask = 0x7; - plconfig.color_format = HasSurface() ? m_window_info.surface_format : GPUTexture::Format::RGBA8; - plconfig.depth_format = GPUTexture::Format::Unknown; + plconfig.SetTargetFormats(HasSurface() ? m_window_info.surface_format : GPUTexture::Format::RGBA8); plconfig.samples = 1; plconfig.per_sample_shading = false; plconfig.vertex_shader = imgui_vs.get(); @@ -547,6 +579,11 @@ void GPUDevice::UploadUniformBuffer(const void* data, u32 data_size) UnmapUniformBuffer(data_size); } +void GPUDevice::SetRenderTarget(GPUTexture* rt, GPUTexture* ds /*= nullptr*/) +{ + SetRenderTargets(rt ? &rt : nullptr, rt ? 1 : 0, ds); +} + void GPUDevice::SetViewportAndScissor(s32 x, s32 y, s32 width, s32 height) { SetViewport(x, y, width, height); diff --git a/src/util/gpu_device.h b/src/util/gpu_device.h index 6f856bb68..dbd841d1b 100644 --- a/src/util/gpu_device.h +++ b/src/util/gpu_device.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #pragma once @@ -32,27 +32,6 @@ enum class RenderAPI : u32 Metal }; -class GPUFramebuffer -{ -public: - GPUFramebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height); - virtual ~GPUFramebuffer(); - - ALWAYS_INLINE GPUTexture* GetRT() const { return m_rt; } - ALWAYS_INLINE GPUTexture* GetDS() const { return m_ds; } - - ALWAYS_INLINE u32 GetWidth() const { return m_width; } - ALWAYS_INLINE u32 GetHeight() const { return m_height; } - - virtual void SetDebugName(const std::string_view& name) = 0; - -protected: - GPUTexture* m_rt; - GPUTexture* m_ds; - u32 m_width; - u32 m_height; -}; - class GPUSampler { public: @@ -394,10 +373,13 @@ public: GPUShader* geometry_shader; GPUShader* fragment_shader; - GPUTexture::Format color_format; + GPUTexture::Format color_formats[4]; GPUTexture::Format depth_format; u32 samples; bool per_sample_shading; + + void SetTargetFormats(GPUTexture::Format color_format, + GPUTexture::Format depth_format_ = GPUTexture::Format::Unknown); }; GPUPipeline(); @@ -477,6 +459,8 @@ public: static constexpr u32 MAX_TEXTURE_SAMPLERS = 8; static constexpr u32 MIN_TEXEL_BUFFER_ELEMENTS = 4 * 1024 * 512; + static constexpr u32 MAX_RENDER_TARGETS = 4; + static_assert(sizeof(GPUPipeline::GraphicsConfig::color_formats) == sizeof(GPUTexture::Format) * MAX_RENDER_TARGETS); virtual ~GPUDevice(); @@ -576,9 +560,6 @@ public: virtual void ClearDepth(GPUTexture* t, float d); virtual void InvalidateRenderTarget(GPUTexture* t); - /// Framebuffer abstraction. - virtual std::unique_ptr CreateFramebuffer(GPUTexture* rt_or_ds, GPUTexture* ds = nullptr) = 0; - /// Shader abstraction. std::unique_ptr CreateShader(GPUShaderStage stage, const std::string_view& source, const char* entry_point = "main"); @@ -606,12 +587,13 @@ public: void UploadUniformBuffer(const void* data, u32 data_size); /// Drawing setup abstraction. - virtual void SetFramebuffer(GPUFramebuffer* fb) = 0; + virtual void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds) = 0; virtual void SetPipeline(GPUPipeline* pipeline) = 0; virtual void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) = 0; virtual void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) = 0; virtual void SetViewport(s32 x, s32 y, s32 width, s32 height) = 0; // TODO: Rectangle virtual void SetScissor(s32 x, s32 y, s32 width, s32 height) = 0; + void SetRenderTarget(GPUTexture* rt, GPUTexture* ds = nullptr); void SetViewportAndScissor(s32 x, s32 y, s32 width, s32 height); // Drawing abstraction. @@ -647,7 +629,8 @@ public: virtual float GetAndResetAccumulatedGPUTime(); protected: - virtual bool CreateDevice(const std::string_view& adapter, bool threaded_presentation, FeatureMask disabled_features) = 0; + virtual bool CreateDevice(const std::string_view& adapter, bool threaded_presentation, + FeatureMask disabled_features) = 0; virtual void DestroyDevice() = 0; std::string GetShaderCacheBaseName(const std::string_view& type) const; diff --git a/src/util/gpu_framebuffer_manager.h b/src/util/gpu_framebuffer_manager.h new file mode 100644 index 000000000..9b4d199d2 --- /dev/null +++ b/src/util/gpu_framebuffer_manager.h @@ -0,0 +1,142 @@ +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#pragma once + +#include "gpu_device.h" +#include "gpu_texture.h" + +#include + +class GPUFramebufferManagerBase +{ +protected: + struct Key + { + GPUTexture* rts[GPUDevice::MAX_RENDER_TARGETS]; + GPUTexture* ds; + u32 num_rts; + u32 flags; + + bool operator==(const Key& rhs) const; + bool operator!=(const Key& rhs) const; + + bool ContainsRT(const GPUTexture* tex) const; + }; + + struct KeyHash + { + size_t operator()(const Key& key) const; + }; +}; + +template +class GPUFramebufferManager : public GPUFramebufferManagerBase +{ +public: + GPUFramebufferManager() = default; + ~GPUFramebufferManager(); + + FBOType Lookup(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, u32 flags); + + void RemoveReferences(const GPUTexture* tex); + void RemoveRTReferences(const GPUTexture* tex); + void RemoveDSReferences(const GPUTexture* tex); + + void Clear(); + +private: + using MapType = std::unordered_map; + + MapType m_map; +}; + +template +GPUFramebufferManager::~GPUFramebufferManager() +{ + Clear(); +} + +template +FBOType GPUFramebufferManager::Lookup(GPUTexture* const* rts, u32 num_rts, + GPUTexture* ds, u32 flags) +{ + Key key; + for (u32 i = 0; i < num_rts; i++) + key.rts[i] = rts[i]; + for (u32 i = num_rts; i < GPUDevice::MAX_RENDER_TARGETS; i++) + key.rts[i] = nullptr; + key.ds = ds; + key.num_rts = num_rts; + key.flags = flags; + + auto it = m_map.find(key); + if (it == m_map.end()) + { + FBOType fbo = FactoryFunc(rts, num_rts, ds, flags); + if (!fbo) + return fbo; + + it = m_map.emplace(key, fbo).first; + } + + return it->second; +} + +template +void GPUFramebufferManager::RemoveRTReferences(const GPUTexture* tex) +{ + DebugAssert(tex->IsRenderTarget()); + for (auto it = m_map.begin(); it != m_map.end();) + { + if (!it->first.ContainsRT(tex)) + { + ++it; + continue; + } + + DestroyFunc(it->second); + it = m_map.erase(it); + } +} + +template +void GPUFramebufferManager::RemoveDSReferences(const GPUTexture* tex) +{ + DebugAssert(tex->IsDepthStencil()); + for (auto it = m_map.begin(); it != m_map.end();) + { + if (it->first.ds != tex) + { + ++it; + continue; + } + + DestroyFunc(it->second); + it = m_map.erase(it); + } +} + +template +void GPUFramebufferManager::RemoveReferences(const GPUTexture* tex) +{ + if (tex->IsRenderTarget()) + RemoveRTReferences(tex); + else if (tex->IsDepthStencil()) + RemoveDSReferences(tex); +} + +template +void GPUFramebufferManager::Clear() +{ + for (auto it : m_map) + DestroyFunc(it.second); + m_map.clear(); +} diff --git a/src/util/gpu_texture.cpp b/src/util/gpu_texture.cpp index f5f635ea6..ea3126cf1 100644 --- a/src/util/gpu_texture.cpp +++ b/src/util/gpu_texture.cpp @@ -91,6 +91,12 @@ bool GPUTexture::IsDepthFormat(Format format) return (format == Format::D16); } +bool GPUTexture::IsDepthStencilFormat(Format format) +{ + // None needed yet. + return false; +} + bool GPUTexture::ValidateConfig(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, Format format) { if (width > MAX_WIDTH || height > MAX_HEIGHT || layers > MAX_LAYERS || levels > MAX_LEVELS || samples > MAX_SAMPLES) diff --git a/src/util/gpu_texture.h b/src/util/gpu_texture.h index c4db7a267..1f81ca0d1 100644 --- a/src/util/gpu_texture.h +++ b/src/util/gpu_texture.h @@ -91,6 +91,8 @@ public: ALWAYS_INLINE State GetState() const { return m_state; } ALWAYS_INLINE void SetState(State state) { m_state = state; } + ALWAYS_INLINE bool IsDirty() const { return (m_state == State::Dirty); } + ALWAYS_INLINE bool IsClearedOrInvalidated() const { return (m_state != State::Dirty); } ALWAYS_INLINE bool IsRenderTargetOrDepthStencil() const { @@ -118,6 +120,7 @@ public: static u32 GetPixelSize(GPUTexture::Format format); static bool IsDepthFormat(GPUTexture::Format format); + static bool IsDepthStencilFormat(GPUTexture::Format format); static bool ValidateConfig(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, Format format); static bool ConvertTextureDataToRGBA8(u32 width, u32 height, std::vector& texture_data, u32& texture_data_stride, diff --git a/src/util/metal_device.h b/src/util/metal_device.h index 520d48cb8..186bcaa60 100644 --- a/src/util/metal_device.h +++ b/src/util/metal_device.h @@ -33,7 +33,6 @@ #include class MetalDevice; -class MetalFramebuffer; class MetalPipeline; class MetalTexture; @@ -161,28 +160,10 @@ private: MetalStreamBuffer m_buffer; }; -class MetalFramebuffer final : public GPUFramebuffer -{ - friend MetalDevice; - -public: - ~MetalFramebuffer() override; - - MTLRenderPassDescriptor* GetDescriptor() const; - - void SetDebugName(const std::string_view& name) override; - -private: - MetalFramebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height, id rt_tex, id ds_tex, - MTLRenderPassDescriptor* descriptor); - - id m_rt_tex; - id m_ds_tex; - MTLRenderPassDescriptor* m_descriptor; -}; - class MetalDevice final : public GPUDevice { + friend MetalTexture; + public: ALWAYS_INLINE static MetalDevice& GetInstance() { return *static_cast(g_gpu_device.get()); } ALWAYS_INLINE id GetMTLDevice() { return m_device; } @@ -222,8 +203,6 @@ public: void ClearDepth(GPUTexture* t, float d) override; void InvalidateRenderTarget(GPUTexture* t) override; - std::unique_ptr CreateFramebuffer(GPUTexture* rt_or_ds, GPUTexture* ds = nullptr) override; - std::unique_ptr CreateShaderFromBinary(GPUShaderStage stage, std::span data) override; std::unique_ptr CreateShaderFromSource(GPUShaderStage stage, const std::string_view& source, const char* entry_point, @@ -242,7 +221,7 @@ public: void PushUniformBuffer(const void* data, u32 data_size) override; void* MapUniformBuffer(u32 size) override; void UnmapUniformBuffer(u32 size) override; - void SetFramebuffer(GPUFramebuffer* fb) override; + void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds) override; void SetPipeline(GPUPipeline* pipeline) override; void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override; void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override; @@ -271,8 +250,6 @@ public: void CommitClear(MetalTexture* tex); - void UnbindFramebuffer(MetalFramebuffer* fb); - void UnbindFramebuffer(MetalTexture* tex); void UnbindPipeline(MetalPipeline* pl); void UnbindTexture(MetalTexture* tex); void UnbindTextureBuffer(MetalTextureBuffer* buf); @@ -283,7 +260,8 @@ public: static AdapterAndModeList StaticGetAdapterAndModeList(); protected: - bool CreateDevice(const std::string_view& adapter, bool threaded_presentation, FeatureMask disabled_features) override; + bool CreateDevice(const std::string_view& adapter, bool threaded_presentation, + FeatureMask disabled_features) override; void DestroyDevice() override; private: @@ -336,6 +314,8 @@ private: bool CreateBuffers(); void DestroyBuffers(); + bool IsRenderTargetBound(const GPUTexture* tex) const; + id m_device; id m_queue; @@ -369,7 +349,9 @@ private: id m_render_cmdbuf = nil; id m_render_encoder = nil; - MetalFramebuffer* m_current_framebuffer = nullptr; + std::array m_current_render_targets = {}; + u32 m_num_current_render_targets = 0; + MetalTexture* m_current_depth_target = nullptr; MetalPipeline* m_current_pipeline = nullptr; id m_current_depth_state = nil; diff --git a/src/util/metal_device.mm b/src/util/metal_device.mm index 824581c07..16d1c0bf1 100644 --- a/src/util/metal_device.mm +++ b/src/util/metal_device.mm @@ -123,7 +123,8 @@ void MetalDevice::SetVSync(bool enabled) [m_layer setDisplaySyncEnabled:enabled]; } -bool MetalDevice::CreateDevice(const std::string_view& adapter, bool threaded_presentation, FeatureMask disabled_features) +bool MetalDevice::CreateDevice(const std::string_view& adapter, bool threaded_presentation, + FeatureMask disabled_features) { @autoreleasepool { @@ -493,6 +494,17 @@ void MetalDevice::DestroyBuffers() m_depth_states.clear(); } +bool MetalDevice::IsRenderTargetBound(const GPUTexture* tex) const +{ + for (u32 i = 0; i < m_num_current_render_targets; i++) + { + if (m_current_render_targets[i] == tex) + return true; + } + + return false; +} + GPUDevice::AdapterAndModeList MetalDevice::StaticGetAdapterAndModeList() { AdapterAndModeList ret; @@ -770,7 +782,12 @@ std::unique_ptr MetalDevice::CreatePipeline(const GPUPipeline::Grap desc.vertexFunction = static_cast(config.vertex_shader)->GetFunction(); desc.fragmentFunction = static_cast(config.fragment_shader)->GetFunction(); - desc.colorAttachments[0].pixelFormat = s_pixel_format_mapping[static_cast(config.color_format)]; + for (u32 i = 0; i < MAX_RENDER_TARGETS; i++) + { + if (config.color_formats[i] == GPUTexture::Format::Unknown) + break; + desc.colorAttachments[0].pixelFormat = s_pixel_format_mapping[static_cast(config.color_formats[i])]; + } desc.depthAttachmentPixelFormat = s_pixel_format_mapping[static_cast(config.depth_format)]; // Input assembly. @@ -998,7 +1015,12 @@ void MetalTexture::Unmap() void MetalTexture::MakeReadyForSampling() { - MetalDevice::GetInstance().UnbindFramebuffer(this); + MetalDevice& dev = MetalDevice::GetInstance(); + if (!dev.InRenderPass()) + return; + + if (IsRenderTarget() ? dev.IsRenderTargetBound(this) : (dev.m_current_depth_target == this)) + dev.EndRenderPass(); } void MetalTexture::SetDebugName(const std::string_view& name) @@ -1088,130 +1110,6 @@ std::unique_ptr MetalDevice::CreateTexture(u32 width, u32 height, u3 } } -MetalFramebuffer::MetalFramebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height, id rt_tex, - id ds_tex, MTLRenderPassDescriptor* descriptor) - : GPUFramebuffer(rt, ds, width, height), m_rt_tex(rt_tex), m_ds_tex(ds_tex), m_descriptor(descriptor) -{ -} - -MetalFramebuffer::~MetalFramebuffer() -{ - // TODO: safe deleting? - if (m_rt_tex != nil) - [m_rt_tex release]; - if (m_ds_tex != nil) - [m_ds_tex release]; - [m_descriptor release]; -} - -void MetalFramebuffer::SetDebugName(const std::string_view& name) -{ -} - -MTLRenderPassDescriptor* MetalFramebuffer::GetDescriptor() const -{ - if (m_rt) - { - switch (m_rt->GetState()) - { - case GPUTexture::State::Cleared: - { - const auto clear_color = m_rt->GetUNormClearColor(); - m_descriptor.colorAttachments[0].loadAction = MTLLoadActionClear; - m_descriptor.colorAttachments[0].clearColor = - MTLClearColorMake(clear_color[0], clear_color[1], clear_color[2], clear_color[3]); - m_rt->SetState(GPUTexture::State::Dirty); - } - break; - - case GPUTexture::State::Invalidated: - { - m_descriptor.colorAttachments[0].loadAction = MTLLoadActionDontCare; - m_rt->SetState(GPUTexture::State::Dirty); - } - break; - - case GPUTexture::State::Dirty: - { - m_descriptor.colorAttachments[0].loadAction = MTLLoadActionLoad; - } - break; - - default: - UnreachableCode(); - break; - } - } - - if (m_ds) - { - switch (m_ds->GetState()) - { - case GPUTexture::State::Cleared: - { - m_descriptor.depthAttachment.loadAction = MTLLoadActionClear; - m_descriptor.depthAttachment.clearDepth = m_ds->GetClearDepth(); - m_ds->SetState(GPUTexture::State::Dirty); - } - break; - - case GPUTexture::State::Invalidated: - { - m_descriptor.depthAttachment.loadAction = MTLLoadActionDontCare; - m_ds->SetState(GPUTexture::State::Dirty); - } - break; - - case GPUTexture::State::Dirty: - { - m_descriptor.depthAttachment.loadAction = MTLLoadActionLoad; - } - break; - - default: - UnreachableCode(); - break; - } - } - - return m_descriptor; -} - -std::unique_ptr MetalDevice::CreateFramebuffer(GPUTexture* rt_or_ds, GPUTexture* ds) -{ - DebugAssert((rt_or_ds || ds) && (!rt_or_ds || rt_or_ds->IsRenderTarget() || (rt_or_ds->IsDepthStencil() && !ds))); - MetalTexture* RT = static_cast((rt_or_ds && rt_or_ds->IsDepthStencil()) ? nullptr : rt_or_ds); - MetalTexture* DS = static_cast((rt_or_ds && rt_or_ds->IsDepthStencil()) ? rt_or_ds : ds); - - @autoreleasepool - { - MTLRenderPassDescriptor* desc = [[MTLRenderPassDescriptor renderPassDescriptor] retain]; - id rt_tex = RT ? [RT->GetMTLTexture() retain] : nil; - id ds_tex = DS ? [DS->GetMTLTexture() retain] : nil; - - if (RT) - { - desc.colorAttachments[0].texture = rt_tex; - desc.colorAttachments[0].loadAction = MTLLoadActionLoad; - desc.colorAttachments[0].storeAction = MTLStoreActionStore; - } - - if (DS) - { - desc.depthAttachment.texture = ds_tex; - desc.depthAttachment.loadAction = MTLLoadActionLoad; - desc.depthAttachment.storeAction = MTLStoreActionStore; - } - - const u32 width = RT ? RT->GetWidth() : DS->GetWidth(); - const u32 height = RT ? RT->GetHeight() : DS->GetHeight(); - desc.renderTargetWidth = width; - desc.renderTargetHeight = height; - - return std::unique_ptr(new MetalFramebuffer(RT, DS, width, height, rt_tex, ds_tex, desc)); - } -} - MetalSampler::MetalSampler(id ss) : m_ss(ss) { } @@ -1489,25 +1387,22 @@ void MetalDevice::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u3 void MetalDevice::ClearRenderTarget(GPUTexture* t, u32 c) { GPUDevice::ClearRenderTarget(t, c); - if (InRenderPass() && m_current_framebuffer && m_current_framebuffer->GetRT() == t) + if (InRenderPass() && IsRenderTargetBound(t)) EndRenderPass(); } void MetalDevice::ClearDepth(GPUTexture* t, float d) { GPUDevice::ClearDepth(t, d); - if (InRenderPass() && m_current_framebuffer && m_current_framebuffer->GetDS() == t) + if (InRenderPass() && m_current_depth_target == t) EndRenderPass(); } void MetalDevice::InvalidateRenderTarget(GPUTexture* t) { GPUDevice::InvalidateRenderTarget(t); - if (InRenderPass() && m_current_framebuffer && - (m_current_framebuffer->GetRT() == t || m_current_framebuffer->GetDS() == t)) - { + if (InRenderPass() && (t->IsRenderTarget() ? IsRenderTargetBound(t) : (m_current_depth_target == t))) EndRenderPass(); - } } void MetalDevice::CommitClear(MetalTexture* tex) @@ -1689,44 +1584,29 @@ void MetalDevice::UnmapUniformBuffer(u32 size) } } -void MetalDevice::SetFramebuffer(GPUFramebuffer* fb) +void MetalDevice::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds) { - if (m_current_framebuffer == fb) - return; + bool changed = (m_num_current_render_targets != num_rts || m_current_depth_target != ds); + bool needs_ds_clear = (ds && ds->IsClearedOrInvalidated()); + bool needs_rt_clear = false; - if (InRenderPass()) - EndRenderPass(); + m_current_depth_target = static_cast(ds); + for (u32 i = 0; i < num_rts; i++) + { + MetalTexture* const RT = static_cast(rts[i]); + changed |= m_current_render_targets[i] != RT; + m_current_render_targets[i] = RT; + needs_rt_clear |= RT->IsClearedOrInvalidated(); + } + for (u32 i = num_rts; i < m_num_current_render_targets; i++) + m_current_render_targets[i] = nullptr; + m_num_current_render_targets = num_rts; - m_current_framebuffer = static_cast(fb); - - // Current pipeline might be incompatible, so unbind it. - // Otherwise it'll get bound to the new render encoder. - // TODO: we shouldn't need to do this now - m_current_pipeline = nullptr; - m_current_depth_state = nil; -} - -void MetalDevice::UnbindFramebuffer(MetalFramebuffer* fb) -{ - if (m_current_framebuffer != fb) - return; - - if (InRenderPass()) - EndRenderPass(); - m_current_framebuffer = nullptr; -} - -void MetalDevice::UnbindFramebuffer(MetalTexture* tex) -{ - if (!m_current_framebuffer) - return; - - if (m_current_framebuffer->GetRT() != tex && m_current_framebuffer->GetDS() != tex) - return; - - if (InRenderPass()) - EndRenderPass(); - m_current_framebuffer = nullptr; + if (changed || needs_rt_clear || needs_ds_clear) + { + if (InRenderPass()) + EndRenderPass(); + } } void MetalDevice::SetPipeline(GPUPipeline* pipeline) @@ -1815,6 +1695,27 @@ void MetalDevice::UnbindTexture(MetalTexture* tex) [m_render_encoder setFragmentTexture:nil atIndex:i]; } } + + if (tex->IsRenderTarget()) + { + for (u32 i = 0; i < m_num_current_render_targets; i++) + { + if (m_current_render_targets[i] == tex) + { + Log_WarningPrint("Unbinding current RT"); + SetRenderTargets(nullptr, 0, m_current_depth_target); + break; + } + } + } + else if (tex->IsDepthStencil()) + { + if (m_current_depth_target == tex) + { + Log_WarningPrint("Unbinding current DS"); + SetRenderTargets(nullptr, 0, nullptr); + } + } } void MetalDevice::UnbindTextureBuffer(MetalTextureBuffer* buf) @@ -1863,21 +1764,85 @@ void MetalDevice::BeginRenderPass() @autoreleasepool { - MTLRenderPassDescriptor* desc; - if (!m_current_framebuffer) + MTLRenderPassDescriptor* desc = [MTLRenderPassDescriptor renderPassDescriptor]; + if (m_num_current_render_targets == 0 && !m_current_depth_target) { // Rendering to view, but we got interrupted... - desc = [MTLRenderPassDescriptor renderPassDescriptor]; desc.colorAttachments[0].texture = [m_layer_drawable texture]; desc.colorAttachments[0].loadAction = MTLLoadActionLoad; } else { - desc = m_current_framebuffer->GetDescriptor(); - if (MetalTexture* RT = static_cast(m_current_framebuffer->GetRT())) + for (u32 i = 0; i < m_num_current_render_targets; i++) + { + MetalTexture* const RT = m_current_render_targets[i]; + desc.colorAttachments[i].texture = RT->GetMTLTexture(); RT->SetUseFenceCounter(m_current_fence_counter); - if (MetalTexture* DS = static_cast(m_current_framebuffer->GetDS())) + + switch (RT->GetState()) + { + case GPUTexture::State::Cleared: + { + const auto clear_color = RT->GetUNormClearColor(); + desc.colorAttachments[i].loadAction = MTLLoadActionClear; + desc.colorAttachments[i].clearColor = + MTLClearColorMake(clear_color[0], clear_color[1], clear_color[2], clear_color[3]); + RT->SetState(GPUTexture::State::Dirty); + } + break; + + case GPUTexture::State::Invalidated: + { + desc.colorAttachments[i].loadAction = MTLLoadActionDontCare; + RT->SetState(GPUTexture::State::Dirty); + } + break; + + case GPUTexture::State::Dirty: + { + desc.colorAttachments[i].loadAction = MTLLoadActionLoad; + } + break; + + default: + UnreachableCode(); + break; + } + } + + if (MetalTexture* DS = m_current_depth_target) + { + desc.depthAttachment.texture = m_current_depth_target->GetMTLTexture(); DS->SetUseFenceCounter(m_current_fence_counter); + + switch (DS->GetState()) + { + case GPUTexture::State::Cleared: + { + desc.depthAttachment.loadAction = MTLLoadActionClear; + desc.depthAttachment.clearDepth = DS->GetClearDepth(); + DS->SetState(GPUTexture::State::Dirty); + } + break; + + case GPUTexture::State::Invalidated: + { + desc.depthAttachment.loadAction = MTLLoadActionDontCare; + DS->SetState(GPUTexture::State::Dirty); + } + break; + + case GPUTexture::State::Dirty: + { + desc.depthAttachment.loadAction = MTLLoadActionLoad; + } + break; + + default: + UnreachableCode(); + break; + } + } } m_render_encoder = [[m_render_cmdbuf renderCommandEncoderWithDescriptor:desc] retain]; @@ -1948,8 +1913,10 @@ void MetalDevice::SetScissorInRenderEncoder() Common::Rectangle MetalDevice::ClampToFramebufferSize(const Common::Rectangle& rc) const { - const s32 clamp_width = m_current_framebuffer ? m_current_framebuffer->GetWidth() : m_window_info.surface_width; - const s32 clamp_height = m_current_framebuffer ? m_current_framebuffer->GetHeight() : m_window_info.surface_height; + const MetalTexture* rt_or_ds = + (m_num_current_render_targets > 0) ? m_current_render_targets[0] : m_current_depth_target; + const s32 clamp_width = rt_or_ds ? rt_or_ds->GetWidth() : m_window_info.surface_width; + const s32 clamp_height = rt_or_ds ? rt_or_ds->GetHeight() : m_window_info.surface_height; return rc.ClampedSize(clamp_width, clamp_height); } @@ -2023,10 +1990,12 @@ bool MetalDevice::BeginPresent(bool skip_present) // Set up rendering to layer. id layer_texture = [m_layer_drawable texture]; - m_current_framebuffer = nullptr; m_layer_pass_desc.colorAttachments[0].texture = layer_texture; m_layer_pass_desc.colorAttachments[0].loadAction = MTLLoadActionClear; m_render_encoder = [[m_render_cmdbuf renderCommandEncoderWithDescriptor:m_layer_pass_desc] retain]; + std::memset(m_current_render_targets.data(), 0, sizeof(m_current_render_targets)); + m_num_current_render_targets = 0; + m_current_depth_target = nullptr; m_current_pipeline = nullptr; m_current_depth_state = nil; SetInitialEncoderState(); @@ -2036,7 +2005,7 @@ bool MetalDevice::BeginPresent(bool skip_present) void MetalDevice::EndPresent() { - DebugAssert(!m_current_framebuffer); + DebugAssert(m_num_current_render_targets == 0 && !m_current_depth_target); EndAnyEncoding(); [m_render_cmdbuf presentDrawable:m_layer_drawable]; diff --git a/src/util/opengl_device.cpp b/src/util/opengl_device.cpp index 1de8f9a1f..2199415ae 100644 --- a/src/util/opengl_device.cpp +++ b/src/util/opengl_device.cpp @@ -155,9 +155,9 @@ void OpenGLDevice::CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 GL_COLOR_BUFFER_BIT, GL_NEAREST); glEnable(GL_SCISSOR_TEST); - if (m_current_framebuffer) + if (m_current_fbo) { - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_current_framebuffer ? m_current_framebuffer->GetGLId() : 0); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_current_fbo); glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); } else @@ -201,9 +201,9 @@ void OpenGLDevice::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u GL_COLOR_BUFFER_BIT, GL_LINEAR); glEnable(GL_SCISSOR_TEST); - if (m_current_framebuffer) + if (m_current_fbo) { - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_current_framebuffer ? m_current_framebuffer->GetGLId() : 0); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_current_fbo); glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); } else @@ -215,22 +215,31 @@ void OpenGLDevice::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u void OpenGLDevice::ClearRenderTarget(GPUTexture* t, u32 c) { GPUDevice::ClearRenderTarget(t, c); - if (m_current_framebuffer && m_current_framebuffer->GetRT() == t) - CommitClear(m_current_framebuffer); + if (const s32 idx = IsRenderTargetBound(t); idx >= 0) + CommitRTClearInFB(static_cast(t), static_cast(idx)); } void OpenGLDevice::ClearDepth(GPUTexture* t, float d) { GPUDevice::ClearDepth(t, d); - if (m_current_framebuffer && m_current_framebuffer->GetDS() == t) - CommitClear(m_current_framebuffer); + if (m_current_depth_target == t) + CommitDSClearInFB(static_cast(t)); } void OpenGLDevice::InvalidateRenderTarget(GPUTexture* t) { GPUDevice::InvalidateRenderTarget(t); - if (m_current_framebuffer && (m_current_framebuffer->GetRT() == t || m_current_framebuffer->GetDS() == t)) - CommitClear(m_current_framebuffer); + if (t->IsRenderTarget()) + { + if (const s32 idx = IsRenderTargetBound(t); idx >= 0) + CommitRTClearInFB(static_cast(t), static_cast(idx)); + } + else + { + DebugAssert(t->IsDepthStencil()); + if (m_current_depth_target == t) + CommitDSClearInFB(static_cast(t)); + } } void OpenGLDevice::PushDebugGroup(const char* name) @@ -617,7 +626,56 @@ void OpenGLDevice::RenderBlankFrame() m_last_blend_state.write_a); glEnable(GL_SCISSOR_TEST); m_gl_context->SwapBuffers(); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_current_framebuffer ? m_current_framebuffer->GetGLId() : 0); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_current_fbo); +} + +s32 OpenGLDevice::IsRenderTargetBound(const GPUTexture* tex) const +{ + for (u32 i = 0; i < m_num_current_render_targets; i++) + { + if (m_current_render_targets[i] == tex) + return static_cast(i); + } + + return -1; +} + +GLuint OpenGLDevice::CreateFramebuffer(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, u32 flags) +{ + glGetError(); + + GLuint fbo_id; + glGenFramebuffers(1, &fbo_id); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo_id); + + for (u32 i = 0; i < num_rts; i++) + { + OpenGLTexture* const RT = static_cast(rts[i]); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + i, RT->GetGLTarget(), RT->GetGLId(), 0); + } + + if (ds) + { + OpenGLTexture* const DS = static_cast(ds); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, DS->GetGLTarget(), DS->GetGLId(), 0); + } + + if (glGetError() != GL_NO_ERROR || glCheckFramebufferStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) + { + Log_ErrorFmt("Failed to create GL framebuffer: {}", static_cast(glGetError())); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, OpenGLDevice::GetInstance().m_current_fbo); + glDeleteFramebuffers(1, &fbo_id); + return {}; + } + + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, OpenGLDevice::GetInstance().m_current_fbo); + return fbo_id; +} + +void OpenGLDevice::DestroyFramebuffer(GLuint fbo) +{ + if (fbo != 0) + glDeleteFramebuffers(1, &fbo); } GPUDevice::AdapterAndModeList OpenGLDevice::GetAdapterAndModeList() @@ -720,7 +778,12 @@ bool OpenGLDevice::BeginPresent(bool skip_present) const Common::Rectangle window_rc = Common::Rectangle::FromExtents(0, 0, m_window_info.surface_width, m_window_info.surface_height); - m_current_framebuffer = nullptr; + + m_current_fbo = 0; + m_num_current_render_targets = 0; + std::memset(m_current_render_targets.data(), 0, sizeof(m_current_render_targets)); + m_current_depth_target = nullptr; + m_last_viewport = window_rc; m_last_scissor = window_rc; UpdateViewport(); @@ -730,7 +793,7 @@ bool OpenGLDevice::BeginPresent(bool skip_present) void OpenGLDevice::EndPresent() { - DebugAssert(!m_current_framebuffer); + DebugAssert(m_current_fbo == 0); if (m_gpu_timing_enabled) PopTimestampQuery(); @@ -886,6 +949,36 @@ void OpenGLDevice::UnbindTexture(GLuint id) } } +void OpenGLDevice::UnbindTexture(OpenGLTexture* tex) +{ + UnbindTexture(tex->GetGLId()); + + if (tex->IsRenderTarget()) + { + for (u32 i = 0; i < m_num_current_render_targets; i++) + { + if (m_current_render_targets[i] == tex) + { + Log_WarningPrint("Unbinding current RT"); + SetRenderTargets(nullptr, 0, m_current_depth_target); + break; + } + } + + m_framebuffer_manager.RemoveRTReferences(tex); + } + else if (tex->IsDepthStencil()) + { + if (m_current_depth_target == tex) + { + Log_WarningPrint("Unbinding current DS"); + SetRenderTargets(nullptr, 0, nullptr); + } + + m_framebuffer_manager.RemoveDSReferences(tex); + } +} + void OpenGLDevice::UnbindSSBO(GLuint id) { if (m_last_ssbo != id) @@ -908,15 +1001,6 @@ void OpenGLDevice::UnbindSampler(GLuint id) } } -void OpenGLDevice::UnbindFramebuffer(const OpenGLFramebuffer* fb) -{ - if (m_current_framebuffer == fb) - { - m_current_framebuffer = nullptr; - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); - } -} - void OpenGLDevice::UnbindPipeline(const OpenGLPipeline* pl) { if (m_current_pipeline == pl) @@ -984,25 +1068,55 @@ void OpenGLDevice::UnmapUniformBuffer(u32 size) glBindBufferRange(GL_UNIFORM_BUFFER, 1, m_uniform_buffer->GetGLBufferId(), pos, size); } -void OpenGLDevice::SetFramebuffer(GPUFramebuffer* fb) +void OpenGLDevice::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds) { - if (m_current_framebuffer == fb) - return; + bool changed = (m_num_current_render_targets != num_rts || m_current_depth_target != ds); + bool needs_ds_clear = (ds && ds->IsClearedOrInvalidated()); + bool needs_rt_clear = false; - OpenGLFramebuffer* FB = static_cast(fb); - const bool prev_was_window = (m_current_framebuffer == nullptr); - const bool new_is_window = (FB == nullptr); - m_current_framebuffer = FB; - - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, FB ? FB->GetGLId() : 0); - if (prev_was_window != new_is_window) + m_current_depth_target = static_cast(ds); + for (u32 i = 0; i < num_rts; i++) { - UpdateViewport(); - UpdateScissor(); + OpenGLTexture* const dt = static_cast(rts[i]); + changed |= m_current_render_targets[i] != dt; + m_current_render_targets[i] = dt; + needs_rt_clear |= dt->IsClearedOrInvalidated(); + } + for (u32 i = num_rts; i < m_num_current_render_targets; i++) + m_current_render_targets[i] = nullptr; + m_num_current_render_targets = num_rts; + if (changed) + { + GLuint fbo = 0; + if (m_num_current_render_targets > 0 || m_current_depth_target) + { + if ((fbo = m_framebuffer_manager.Lookup(rts, num_rts, ds, 0)) == 0) + { + Log_ErrorFmt("Failed to get FBO for {} render targets", num_rts); + m_current_fbo = 0; + std::memset(m_current_render_targets.data(), 0, sizeof(m_current_render_targets)); + m_num_current_render_targets = 0; + m_current_depth_target = nullptr; + return; + } + } + + m_current_fbo = fbo; + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo); } - if (FB) - CommitClear(FB); + if (needs_rt_clear) + { + for (u32 i = 0; i < num_rts; i++) + { + OpenGLTexture* const dt = static_cast(rts[i]); + if (dt->IsClearedOrInvalidated()) + CommitRTClearInFB(dt, i); + } + } + + if (needs_ds_clear) + CommitDSClearInFB(static_cast(ds)); } void OpenGLDevice::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) @@ -1078,7 +1192,7 @@ std::tuple OpenGLDevice::GetFlippedViewportScissor(const Com // Only when rendering to window framebuffer. // We draw everything else upside-down. s32 x, y, width, height; - if (!m_current_framebuffer) + if (m_current_fbo == 0) { const s32 sh = static_cast(m_window_info.surface_height); const s32 rh = rc.GetHeight(); diff --git a/src/util/opengl_device.h b/src/util/opengl_device.h index fd0a36ceb..e609d109d 100644 --- a/src/util/opengl_device.h +++ b/src/util/opengl_device.h @@ -5,6 +5,7 @@ #include "gl/context.h" #include "gpu_device.h" +#include "gpu_framebuffer_manager.h" #include "gpu_shader_cache.h" #include "opengl_loader.h" #include "opengl_pipeline.h" @@ -16,7 +17,6 @@ #include #include -class OpenGLFramebuffer; class OpenGLPipeline; class OpenGLStreamBuffer; class OpenGLTexture; @@ -65,8 +65,6 @@ public: void ClearDepth(GPUTexture* t, float d) override; void InvalidateRenderTarget(GPUTexture* t) override; - std::unique_ptr CreateFramebuffer(GPUTexture* rt_or_ds, GPUTexture* ds = nullptr) override; - std::unique_ptr CreateShaderFromBinary(GPUShaderStage stage, std::span data) override; std::unique_ptr CreateShaderFromSource(GPUShaderStage stage, const std::string_view& source, const char* entry_point, DynamicHeapArray* out_binary) override; @@ -84,7 +82,7 @@ public: void PushUniformBuffer(const void* data, u32 data_size) override; void* MapUniformBuffer(u32 size) override; void UnmapUniformBuffer(u32 size) override; - void SetFramebuffer(GPUFramebuffer* fb) override; + void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds) override; void SetPipeline(GPUPipeline* pipeline) override; void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override; void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override; @@ -102,7 +100,8 @@ public: float GetAndResetAccumulatedGPUTime() override; void CommitClear(OpenGLTexture* tex); - void CommitClear(OpenGLFramebuffer* fb); // Assumes the FB has been bound. + void CommitRTClearInFB(OpenGLTexture* tex, u32 idx); + void CommitDSClearInFB(OpenGLTexture* tex); GLuint LookupProgramCache(const OpenGLPipeline::ProgramCacheKey& key, const GPUPipeline::GraphicsConfig& plconfig); GLuint CompileProgram(const GPUPipeline::GraphicsConfig& plconfig); @@ -115,9 +114,9 @@ public: void SetActiveTexture(u32 slot); void UnbindTexture(GLuint id); + void UnbindTexture(OpenGLTexture* tex); void UnbindSSBO(GLuint id); void UnbindSampler(GLuint id); - void UnbindFramebuffer(const OpenGLFramebuffer* fb); void UnbindPipeline(const OpenGLPipeline* pl); protected: @@ -145,6 +144,10 @@ private: void SetSwapInterval(); void RenderBlankFrame(); + s32 IsRenderTargetBound(const GPUTexture* tex) const; + static GLuint CreateFramebuffer(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, u32 flags); + static void DestroyFramebuffer(GLuint fbo); + std::tuple GetFlippedViewportScissor(const Common::Rectangle& rc) const; void UpdateViewport(); void UpdateScissor(); @@ -165,7 +168,6 @@ private: void ApplyBlendState(GPUPipeline::BlendState bs); std::unique_ptr m_gl_context; - std::unique_ptr m_window_framebuffer; std::unique_ptr m_vertex_buffer; std::unique_ptr m_index_buffer; @@ -175,6 +177,7 @@ private: // TODO: pass in file instead of blob for pipeline cache OpenGLPipeline::VertexArrayCache m_vao_cache; OpenGLPipeline::ProgramCache m_program_cache; + GPUFramebufferManager m_framebuffer_manager; // VAO cache - fixed max as key GPUPipeline::BlendState m_last_blend_state = {}; @@ -193,7 +196,11 @@ private: GLuint m_read_fbo = 0; GLuint m_write_fbo = 0; - OpenGLFramebuffer* m_current_framebuffer = nullptr; + GLuint m_current_fbo = 0; + u32 m_num_current_render_targets = 0; + std::array m_current_render_targets = {}; + OpenGLTexture* m_current_depth_target = nullptr; + OpenGLPipeline* m_current_pipeline = nullptr; std::array m_timestamp_queries = {}; diff --git a/src/util/opengl_texture.cpp b/src/util/opengl_texture.cpp index 898aa160a..6e18b7f1e 100644 --- a/src/util/opengl_texture.cpp +++ b/src/util/opengl_texture.cpp @@ -205,7 +205,7 @@ void OpenGLTexture::Destroy() { if (m_id != 0) { - OpenGLDevice::GetInstance().UnbindTexture(m_id); + OpenGLDevice::GetInstance().UnbindTexture(this); glDeleteTextures(1, &m_id); m_id = 0; } @@ -410,58 +410,6 @@ std::unique_ptr OpenGLDevice::CreateSampler(const GPUSampler::Config ////////////////////////////////////////////////////////////////////////// -OpenGLFramebuffer::OpenGLFramebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height, GLuint id) - : GPUFramebuffer(rt, ds, width, height), m_id(id) -{ -} - -OpenGLFramebuffer::~OpenGLFramebuffer() -{ - OpenGLDevice::GetInstance().UnbindFramebuffer(this); -} - -void OpenGLFramebuffer::SetDebugName(const std::string_view& name) -{ -#ifdef _DEBUG - if (glObjectLabel) - glObjectLabel(GL_FRAMEBUFFER, m_id, static_cast(name.length()), static_cast(name.data())); -#endif -} - -void OpenGLFramebuffer::Bind(GLenum target) -{ - glBindFramebuffer(target, m_id); -} - -std::unique_ptr OpenGLDevice::CreateFramebuffer(GPUTexture* rt_or_ds, GPUTexture* ds /* = nullptr */) -{ - glGetError(); - - GLuint fbo_id; - glGenFramebuffers(1, &fbo_id); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo_id); - - DebugAssert((rt_or_ds || ds) && (!rt_or_ds || rt_or_ds->IsRenderTarget() || (rt_or_ds->IsDepthStencil() && !ds))); - OpenGLTexture* RT = static_cast((rt_or_ds && rt_or_ds->IsDepthStencil()) ? nullptr : rt_or_ds); - OpenGLTexture* DS = static_cast((rt_or_ds && rt_or_ds->IsDepthStencil()) ? rt_or_ds : ds); - if (RT) - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, RT->GetGLTarget(), RT->GetGLId(), 0); - if (DS) - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, DS->GetGLTarget(), DS->GetGLId(), 0); - - if (glGetError() != GL_NO_ERROR || glCheckFramebufferStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) - { - Log_ErrorPrintf("Failed to create GL framebuffer: %u", glGetError()); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_current_framebuffer ? m_current_framebuffer->GetGLId() : 0); - glDeleteFramebuffers(1, &fbo_id); - return {}; - } - - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_current_framebuffer ? m_current_framebuffer->GetGLId() : 0); - return std::unique_ptr(new OpenGLFramebuffer(RT, DS, RT ? RT->GetWidth() : DS->GetWidth(), - RT ? RT->GetHeight() : DS->GetHeight(), fbo_id)); -} - void OpenGLDevice::CommitClear(OpenGLTexture* tex) { switch (tex->GetState()) @@ -484,7 +432,7 @@ void OpenGLDevice::CommitClear(OpenGLTexture* tex) glInvalidateFramebuffer(GL_DRAW_FRAMEBUFFER, 1, &attachment); glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, attachment, GL_TEXTURE_2D, 0, 0); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_current_framebuffer ? m_current_framebuffer->GetGLId() : 0); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_current_fbo); } } break; @@ -525,7 +473,7 @@ void OpenGLDevice::CommitClear(OpenGLTexture* tex) } glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, attachment, GL_TEXTURE_2D, 0, 0); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_current_framebuffer ? m_current_framebuffer->GetGLId() : 0); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_current_fbo); } } break; @@ -539,74 +487,68 @@ void OpenGLDevice::CommitClear(OpenGLTexture* tex) } } -void OpenGLDevice::CommitClear(OpenGLFramebuffer* fb) +void OpenGLDevice::CommitRTClearInFB(OpenGLTexture* tex, u32 idx) { - GLenum invalidate_attachments[2]; - GLuint num_invalidate_attachments = 0; - - if (OpenGLTexture* FB = static_cast(fb->GetRT())) + switch (tex->GetState()) { - switch (FB->GetState()) + case GPUTexture::State::Invalidated: { - case GPUTexture::State::Invalidated: - { - invalidate_attachments[num_invalidate_attachments++] = GL_COLOR_ATTACHMENT0; - FB->SetState(GPUTexture::State::Dirty); - } + const GLenum attachment = GL_COLOR_ATTACHMENT0 + idx; + glInvalidateFramebuffer(GL_DRAW_FRAMEBUFFER, 1, &attachment); + tex->SetState(GPUTexture::State::Dirty); + } + break; + + case GPUTexture::State::Cleared: + { + const auto color = tex->GetUNormClearColor(); + glDisable(GL_SCISSOR_TEST); + glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + glClearBufferfv(GL_COLOR, static_cast(idx), color.data()); + glColorMask(m_last_blend_state.write_r, m_last_blend_state.write_g, m_last_blend_state.write_b, + m_last_blend_state.write_a); + glEnable(GL_SCISSOR_TEST); + tex->SetState(GPUTexture::State::Dirty); + } + + case GPUTexture::State::Dirty: break; - case GPUTexture::State::Cleared: - { - const auto color = FB->GetUNormClearColor(); - glDisable(GL_SCISSOR_TEST); - glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); - glClearBufferfv(GL_COLOR, 0, color.data()); - glColorMask(m_last_blend_state.write_r, m_last_blend_state.write_g, m_last_blend_state.write_b, - m_last_blend_state.write_a); - glEnable(GL_SCISSOR_TEST); - FB->SetState(GPUTexture::State::Dirty); - } - - case GPUTexture::State::Dirty: - break; - - default: - UnreachableCode(); - break; - } + default: + UnreachableCode(); + break; } - if (OpenGLTexture* DS = static_cast(fb->GetDS())) +} + +void OpenGLDevice::CommitDSClearInFB(OpenGLTexture* tex) +{ + switch (tex->GetState()) { - switch (DS->GetState()) + case GPUTexture::State::Invalidated: { - case GPUTexture::State::Invalidated: - { - invalidate_attachments[num_invalidate_attachments++] = GL_DEPTH_ATTACHMENT; - DS->SetState(GPUTexture::State::Dirty); - } - break; - - case GPUTexture::State::Cleared: - { - const float depth = DS->GetClearDepth(); - glDisable(GL_SCISSOR_TEST); - glClearBufferfv(GL_DEPTH, 0, &depth); - glEnable(GL_SCISSOR_TEST); - DS->SetState(GPUTexture::State::Dirty); - } - break; - - case GPUTexture::State::Dirty: - break; - - default: - UnreachableCode(); - break; + const GLenum attachment = GL_DEPTH_ATTACHMENT; + glInvalidateFramebuffer(GL_DRAW_FRAMEBUFFER, 1, &attachment); + tex->SetState(GPUTexture::State::Dirty); } - } + break; - if (num_invalidate_attachments > 0 && glInvalidateFramebuffer) - glInvalidateFramebuffer(GL_DRAW_FRAMEBUFFER, num_invalidate_attachments, invalidate_attachments); + case GPUTexture::State::Cleared: + { + const float depth = tex->GetClearDepth(); + glDisable(GL_SCISSOR_TEST); + glClearBufferfv(GL_DEPTH, 0, &depth); + glEnable(GL_SCISSOR_TEST); + tex->SetState(GPUTexture::State::Dirty); + } + break; + + case GPUTexture::State::Dirty: + break; + + default: + UnreachableCode(); + break; + } } ////////////////////////////////////////////////////////////////////////// diff --git a/src/util/opengl_texture.h b/src/util/opengl_texture.h index 9c5e9fb3e..336c49487 100644 --- a/src/util/opengl_texture.h +++ b/src/util/opengl_texture.h @@ -57,25 +57,6 @@ private: u8 m_map_level = 0; }; -class OpenGLFramebuffer final : public GPUFramebuffer -{ - friend OpenGLDevice; - -public: - ~OpenGLFramebuffer() override; - - ALWAYS_INLINE GLuint GetGLId() const { return m_id; } - - void SetDebugName(const std::string_view& name) override; - - void Bind(GLenum target); - -private: - OpenGLFramebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height, GLuint id); - - GLuint m_id; -}; - class OpenGLTextureBuffer final : public GPUTextureBuffer { friend OpenGLDevice; diff --git a/src/util/postprocessing.cpp b/src/util/postprocessing.cpp index e7cf301e7..fdb164c07 100644 --- a/src/util/postprocessing.cpp +++ b/src/util/postprocessing.cpp @@ -53,10 +53,8 @@ static u32 s_target_height = 0; static Common::Timer s_timer; static std::unique_ptr s_input_texture; -static std::unique_ptr s_input_framebuffer; static std::unique_ptr s_output_texture; -static std::unique_ptr s_output_framebuffer; static std::unordered_map> s_samplers; static std::unique_ptr s_dummy_texture; @@ -602,11 +600,6 @@ GPUTexture* PostProcessing::GetInputTexture() return s_input_texture.get(); } -GPUFramebuffer* PostProcessing::GetInputFramebuffer() -{ - return s_input_framebuffer.get(); -} - const Common::Timer& PostProcessing::GetTimer() { return s_timer; @@ -650,14 +643,8 @@ bool PostProcessing::CheckTargets(GPUTexture::Format target_format, u32 target_w if (!(s_input_texture = g_gpu_device->CreateTexture(target_width, target_height, 1, 1, 1, GPUTexture::Type::RenderTarget, target_format)) || - !(s_input_framebuffer = g_gpu_device->CreateFramebuffer(s_input_texture.get()))) - { - return false; - } - - if (!(s_output_texture = g_gpu_device->CreateTexture(target_width, target_height, 1, 1, 1, - GPUTexture::Type::RenderTarget, target_format)) || - !(s_output_framebuffer = g_gpu_device->CreateFramebuffer(s_output_texture.get()))) + !(s_output_texture = g_gpu_device->CreateTexture(target_width, target_height, 1, 1, 1, + GPUTexture::Type::RenderTarget, target_format))) { return false; } @@ -688,36 +675,30 @@ void PostProcessing::DestroyTextures() s_target_width = 0; s_target_height = 0; - s_output_framebuffer.reset(); s_output_texture.reset(); - - s_input_framebuffer.reset(); s_input_texture.reset(); } -bool PostProcessing::Apply(GPUFramebuffer* final_target, s32 final_left, s32 final_top, s32 final_width, - s32 final_height, s32 orig_width, s32 orig_height) +bool PostProcessing::Apply(GPUTexture* final_target, s32 final_left, s32 final_top, s32 final_width, s32 final_height, + s32 orig_width, s32 orig_height) { GL_SCOPE("PostProcessing Apply"); const u32 target_width = final_target ? final_target->GetWidth() : g_gpu_device->GetWindowWidth(); const u32 target_height = final_target ? final_target->GetHeight() : g_gpu_device->GetWindowHeight(); - const GPUTexture::Format target_format = - final_target ? final_target->GetRT()->GetFormat() : g_gpu_device->GetWindowFormat(); + const GPUTexture::Format target_format = final_target ? final_target->GetFormat() : g_gpu_device->GetWindowFormat(); if (!CheckTargets(target_format, target_width, target_height)) return false; GPUTexture* input = s_input_texture.get(); - GPUFramebuffer* input_fb = s_input_framebuffer.get(); GPUTexture* output = s_output_texture.get(); - GPUFramebuffer* output_fb = s_output_framebuffer.get(); input->MakeReadyForSampling(); for (const std::unique_ptr& stage : s_stages) { const bool is_final = (stage.get() == s_stages.back().get()); - if (!stage->Apply(input, is_final ? final_target : output_fb, final_left, final_top, final_width, final_height, + if (!stage->Apply(input, is_final ? final_target : output, final_left, final_top, final_width, final_height, orig_width, orig_height, s_target_width, s_target_height)) { return false; @@ -727,7 +708,6 @@ bool PostProcessing::Apply(GPUFramebuffer* final_target, s32 final_left, s32 fin { output->MakeReadyForSampling(); std::swap(input, output); - std::swap(input_fb, output_fb); } } diff --git a/src/util/postprocessing.h b/src/util/postprocessing.h index cc082ec0c..a51c900d6 100644 --- a/src/util/postprocessing.h +++ b/src/util/postprocessing.h @@ -10,13 +10,11 @@ #include #include -namespace Common -{ +namespace Common { class Timer; } class GPUSampler; -class GPUFramebuffer; class GPUTexture; class Error; @@ -123,13 +121,12 @@ bool ReloadShaders(); void Shutdown(); GPUTexture* GetInputTexture(); -GPUFramebuffer* GetInputFramebuffer(); const Common::Timer& GetTimer(); bool CheckTargets(GPUTexture::Format target_format, u32 target_width, u32 target_height); -bool Apply(GPUFramebuffer* final_target, s32 final_left, s32 final_top, s32 final_width, s32 final_height, - s32 orig_width, s32 orig_height); +bool Apply(GPUTexture* final_target, s32 final_left, s32 final_top, s32 final_width, s32 final_height, s32 orig_width, + s32 orig_height); GPUSampler* GetSampler(const GPUSampler::Config& config); GPUTexture* GetDummyTexture(); diff --git a/src/util/postprocessing_shader.h b/src/util/postprocessing_shader.h index 79eadcbce..9358cf1a1 100644 --- a/src/util/postprocessing_shader.h +++ b/src/util/postprocessing_shader.h @@ -47,7 +47,7 @@ public: virtual bool CompilePipeline(GPUTexture::Format format, u32 width, u32 height) = 0; - virtual bool Apply(GPUTexture* input, GPUFramebuffer* final_target, s32 final_left, s32 final_top, s32 final_width, + virtual bool Apply(GPUTexture* input, GPUTexture* final_target, s32 final_left, s32 final_top, s32 final_width, s32 final_height, s32 orig_width, s32 orig_height, u32 target_width, u32 target_height) = 0; protected: diff --git a/src/util/postprocessing_shader_fx.cpp b/src/util/postprocessing_shader_fx.cpp index 2d44b2736..ebe0c2fd6 100644 --- a/src/util/postprocessing_shader_fx.cpp +++ b/src/util/postprocessing_shader_fx.cpp @@ -1080,7 +1080,7 @@ const char* PostProcessing::ReShadeFXShader::GetTextureNameForID(TextureID id) c } GPUTexture* PostProcessing::ReShadeFXShader::GetTextureByID(TextureID id, GPUTexture* input, - GPUFramebuffer* final_target) const + GPUTexture* final_target) const { if (id < 0) { @@ -1093,29 +1093,6 @@ GPUTexture* PostProcessing::ReShadeFXShader::GetTextureByID(TextureID id, GPUTex return PostProcessing::GetDummyTexture(); } else if (id == OUTPUT_COLOR_TEXTURE) - { - Panic("Wrong state for final target"); - return nullptr; - } - else - { - Panic("Unexpected reserved texture ID"); - return nullptr; - } - } - - if (static_cast(id) >= m_textures.size()) - Panic("Unexpected texture ID"); - - return m_textures[static_cast(id)].texture.get(); -} - -GPUFramebuffer* PostProcessing::ReShadeFXShader::GetFramebufferByID(TextureID id, GPUTexture* input, - GPUFramebuffer* final_target) const -{ - if (id < 0) - { - if (id == OUTPUT_COLOR_TEXTURE) { return final_target; } @@ -1129,9 +1106,7 @@ GPUFramebuffer* PostProcessing::ReShadeFXShader::GetFramebufferByID(TextureID id if (static_cast(id) >= m_textures.size()) Panic("Unexpected texture ID"); - const Texture& tex = m_textures[static_cast(id)]; - Assert(tex.framebuffer); - return tex.framebuffer.get(); + return m_textures[static_cast(id)].texture.get(); } bool PostProcessing::ReShadeFXShader::CompilePipeline(GPUTexture::Format format, u32 width, u32 height) @@ -1243,7 +1218,7 @@ bool PostProcessing::ReShadeFXShader::CompilePipeline(GPUTexture::Format format, if (!vs || !fs) return false; - plconfig.color_format = (pass.render_target >= 0) ? m_textures[pass.render_target].format : format; + plconfig.SetTargetFormats((pass.render_target >= 0) ? m_textures[pass.render_target].format : format); plconfig.blend = MapBlendState(info); plconfig.primitive = MapPrimitive(info.topology); plconfig.vertex_shader = vs.get(); @@ -1274,7 +1249,6 @@ bool PostProcessing::ReShadeFXShader::ResizeOutput(GPUTexture::Format format, u3 if (tex.rt_scale == 0.0f) continue; - tex.framebuffer.reset(); tex.texture.reset(); const u32 t_width = std::max(static_cast(static_cast(width) * tex.rt_scale), 1u); @@ -1285,22 +1259,15 @@ bool PostProcessing::ReShadeFXShader::ResizeOutput(GPUTexture::Format format, u3 Log_ErrorPrintf("Failed to create %ux%u texture", t_width, t_height); return {}; } - - tex.framebuffer = g_gpu_device->CreateFramebuffer(tex.texture.get()); - if (!tex.framebuffer) - { - Log_ErrorPrintf("Failed to create %ux%u texture framebuffer", t_width, t_height); - return {}; - } } m_valid = true; return true; } -bool PostProcessing::ReShadeFXShader::Apply(GPUTexture* input, GPUFramebuffer* final_target, s32 final_left, - s32 final_top, s32 final_width, s32 final_height, s32 orig_width, - s32 orig_height, u32 target_width, u32 target_height) +bool PostProcessing::ReShadeFXShader::Apply(GPUTexture* input, GPUTexture* final_target, s32 final_left, s32 final_top, + s32 final_width, s32 final_height, s32 orig_width, s32 orig_height, + u32 target_width, u32 target_height) { GL_PUSH_FMT("PostProcessingShaderFX {}", m_name); @@ -1461,9 +1428,9 @@ bool PostProcessing::ReShadeFXShader::Apply(GPUTexture* input, GPUFramebuffer* f { GL_SCOPE_FMT("Draw pass {}", pass.name.c_str()); GL_INS_FMT("Render Target: ID {} [{}]", pass.render_target, GetTextureNameForID(pass.render_target)); - GPUFramebuffer* output_fb = GetFramebufferByID(pass.render_target, input, final_target); + GPUTexture* output = GetTextureByID(pass.render_target, input, final_target); - if (!output_fb) + if (!output) { // Drawing to final buffer. if (!g_gpu_device->BeginPresent(false)) @@ -1474,7 +1441,7 @@ bool PostProcessing::ReShadeFXShader::Apply(GPUTexture* input, GPUFramebuffer* f } else { - g_gpu_device->SetFramebuffer(output_fb); + g_gpu_device->SetRenderTargets(&output, 1, nullptr); } g_gpu_device->SetPipeline(pass.pipeline.get()); diff --git a/src/util/postprocessing_shader_fx.h b/src/util/postprocessing_shader_fx.h index 4aabcbcc9..04b9e7794 100644 --- a/src/util/postprocessing_shader_fx.h +++ b/src/util/postprocessing_shader_fx.h @@ -29,7 +29,7 @@ public: bool ResizeOutput(GPUTexture::Format format, u32 width, u32 height) override; bool CompilePipeline(GPUTexture::Format format, u32 width, u32 height) override; - bool Apply(GPUTexture* input, GPUFramebuffer* final_target, s32 final_left, s32 final_top, s32 final_width, + bool Apply(GPUTexture* input, GPUTexture* final_target, s32 final_left, s32 final_top, s32 final_width, s32 final_height, s32 orig_width, s32 orig_height, u32 target_width, u32 target_height) override; private: @@ -80,15 +80,13 @@ private: bool CreatePasses(GPUTexture::Format backbuffer_format, reshadefx::module& mod, Error* error); const char* GetTextureNameForID(TextureID id) const; - GPUTexture* GetTextureByID(TextureID id, GPUTexture* input, GPUFramebuffer* final_target) const; - GPUFramebuffer* GetFramebufferByID(TextureID id, GPUTexture* input, GPUFramebuffer* final_target) const; + GPUTexture* GetTextureByID(TextureID id, GPUTexture* input, GPUTexture* final_target) const; std::string m_filename; struct Texture { std::unique_ptr texture; - std::unique_ptr framebuffer; std::string reshade_name; // TODO: we might be able to drop this GPUTexture::Format format; float rt_scale; diff --git a/src/util/postprocessing_shader_glsl.cpp b/src/util/postprocessing_shader_glsl.cpp index 3833d7085..bba75b3f4 100644 --- a/src/util/postprocessing_shader_glsl.cpp +++ b/src/util/postprocessing_shader_glsl.cpp @@ -130,8 +130,7 @@ bool PostProcessing::GLSLShader::CompilePipeline(GPUTexture::Format format, u32 GPUPipeline::GraphicsConfig plconfig; plconfig.layout = GPUPipeline::Layout::SingleTextureAndUBO; plconfig.primitive = GPUPipeline::Primitive::Triangles; - plconfig.color_format = format; - plconfig.depth_format = GPUTexture::Format::Unknown; + plconfig.SetTargetFormats(format); plconfig.rasterization = GPUPipeline::RasterizationState::GetNoCullState(); plconfig.depth = GPUPipeline::DepthState::GetNoTestsState(); plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); @@ -157,7 +156,7 @@ bool PostProcessing::GLSLShader::CompilePipeline(GPUTexture::Format format, u32 return true; } -bool PostProcessing::GLSLShader::Apply(GPUTexture* input, GPUFramebuffer* final_target, s32 final_left, s32 final_top, +bool PostProcessing::GLSLShader::Apply(GPUTexture* input, GPUTexture* final_target, s32 final_left, s32 final_top, s32 final_width, s32 final_height, s32 orig_width, s32 orig_height, u32 target_width, u32 target_height) { @@ -171,8 +170,8 @@ bool PostProcessing::GLSLShader::Apply(GPUTexture* input, GPUFramebuffer* final_ } else { - g_gpu_device->SetFramebuffer(final_target); - g_gpu_device->ClearRenderTarget(final_target->GetRT(), 0); // TODO: Could use an invalidate here too. + g_gpu_device->SetRenderTargets(&final_target, 1, nullptr); + g_gpu_device->ClearRenderTarget(final_target, 0); // TODO: Could use an invalidate here too. } g_gpu_device->SetPipeline(m_pipeline.get()); diff --git a/src/util/postprocessing_shader_glsl.h b/src/util/postprocessing_shader_glsl.h index 0c7953000..f4c2bf3bf 100644 --- a/src/util/postprocessing_shader_glsl.h +++ b/src/util/postprocessing_shader_glsl.h @@ -23,7 +23,7 @@ public: bool ResizeOutput(GPUTexture::Format format, u32 width, u32 height) override; bool CompilePipeline(GPUTexture::Format format, u32 width, u32 height) override; - bool Apply(GPUTexture* input, GPUFramebuffer* final_target, s32 final_left, s32 final_top, s32 final_width, + bool Apply(GPUTexture* input, GPUTexture* final_target, s32 final_left, s32 final_top, s32 final_width, s32 final_height, s32 orig_width, s32 orig_height, u32 target_width, u32 target_height) override; private: diff --git a/src/util/util.props b/src/util/util.props index 25f1acc24..8f73a291a 100644 --- a/src/util/util.props +++ b/src/util/util.props @@ -8,7 +8,7 @@ ENABLE_CUBEB=1;ENABLE_SDL2=1;%(PreprocessorDefinitions) %(PreprocessorDefinitions);ENABLE_OPENGL=1;ENABLE_VULKAN=1 %(PreprocessorDefinitions);SOUNDTOUCH_USE_NEON - %(AdditionalIncludeDirectories);$(SolutionDir)dep\soundtouch\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)dep\libchdr\include;$(SolutionDir)dep\cubeb\include;$(SolutionDir)dep\d3d12ma\include;$(SolutionDir)dep\zstd\lib + %(AdditionalIncludeDirectories);$(SolutionDir)dep\xxhash\include;$(SolutionDir)dep\soundtouch\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)dep\libchdr\include;$(SolutionDir)dep\cubeb\include;$(SolutionDir)dep\d3d12ma\include;$(SolutionDir)dep\zstd\lib %(AdditionalIncludeDirectories);$(SolutionDir)dep\glad\include;$(SolutionDir)dep\vulkan\include;$(SolutionDir)dep\glslang diff --git a/src/util/util.vcxproj b/src/util/util.vcxproj index 51175a815..fec2bba06 100644 --- a/src/util/util.vcxproj +++ b/src/util/util.vcxproj @@ -25,6 +25,7 @@ true + @@ -245,6 +246,19 @@ {ee054e08-3799-4a59-a422-18259c105ffd} + + + true + Document + + + true + + + true + Document + + {57F6206D-F264-4B07-BAF8-11B9BBE1F455} diff --git a/src/util/util.vcxproj.filters b/src/util/util.vcxproj.filters index 7e54e528c..23995ae6c 100644 --- a/src/util/util.vcxproj.filters +++ b/src/util/util.vcxproj.filters @@ -70,6 +70,7 @@ + @@ -149,10 +150,15 @@ + + {e637fc5b-2483-4a31-abc3-89a16d45c223} + + + \ No newline at end of file diff --git a/src/util/vulkan_builders.cpp b/src/util/vulkan_builders.cpp index ce7d1a757..92fb6f97f 100644 --- a/src/util/vulkan_builders.cpp +++ b/src/util/vulkan_builders.cpp @@ -264,6 +264,9 @@ void Vulkan::GraphicsPipelineBuilder::Clear() m_line_rasterization_state = {}; m_line_rasterization_state.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT; + m_rendering = {}; + m_rendering.sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO_KHR; + // set defaults SetNoCullRasterizationState(); SetNoDepthTestState(); @@ -569,6 +572,29 @@ void Vulkan::GraphicsPipelineBuilder::SetProvokingVertex(VkProvokingVertexModeEX m_provoking_vertex.provokingVertexMode = mode; } +void Vulkan::GraphicsPipelineBuilder::SetDynamicRendering() +{ + AddPointerToChain(&m_ci, &m_rendering); +} + +void Vulkan::GraphicsPipelineBuilder::AddDynamicRenderingColorAttachment(VkFormat format) +{ + SetDynamicRendering(); + + DebugAssert(m_rendering.colorAttachmentCount < MAX_ATTACHMENTS); + m_rendering_color_formats[m_rendering.colorAttachmentCount++] = format; + + m_rendering.pColorAttachmentFormats = m_rendering_color_formats.data(); +} + +void Vulkan::GraphicsPipelineBuilder::SetDynamicRenderingDepthAttachment(VkFormat depth_format, VkFormat stencil_format) +{ + SetDynamicRendering(); + + m_rendering.depthAttachmentFormat = depth_format; + m_rendering.stencilAttachmentFormat = stencil_format; +} + Vulkan::ComputePipelineBuilder::ComputePipelineBuilder() { Clear(); diff --git a/src/util/vulkan_builders.h b/src/util/vulkan_builders.h index 3709dcc19..a7ddf21b7 100644 --- a/src/util/vulkan_builders.h +++ b/src/util/vulkan_builders.h @@ -3,6 +3,7 @@ #pragma once +#include "gpu_device.h" #include "vulkan_loader.h" #include "common/string_util.h" @@ -79,7 +80,7 @@ public: MAX_SHADER_STAGES = 3, MAX_VERTEX_ATTRIBUTES = 16, MAX_VERTEX_BUFFERS = 8, - MAX_ATTACHMENTS = 2, + MAX_ATTACHMENTS = GPUDevice::MAX_RENDER_TARGETS + 1, MAX_DYNAMIC_STATE = 8 }; @@ -140,6 +141,10 @@ public: void SetProvokingVertex(VkProvokingVertexModeEXT mode); + void SetDynamicRendering(); + void AddDynamicRenderingColorAttachment(VkFormat format); + void SetDynamicRenderingDepthAttachment(VkFormat depth_format, VkFormat stencil_format); + private: VkGraphicsPipelineCreateInfo m_ci; std::array m_shader_stages; @@ -167,6 +172,9 @@ private: VkPipelineRasterizationProvokingVertexStateCreateInfoEXT m_provoking_vertex; VkPipelineRasterizationLineStateCreateInfoEXT m_line_rasterization_state; + + VkPipelineRenderingCreateInfoKHR m_rendering; + std::array m_rendering_color_formats; }; class ComputePipelineBuilder @@ -271,7 +279,7 @@ class FramebufferBuilder { enum : u32 { - MAX_ATTACHMENTS = 2, + MAX_ATTACHMENTS = GPUDevice::MAX_RENDER_TARGETS + 1, }; public: diff --git a/src/util/vulkan_device.cpp b/src/util/vulkan_device.cpp index 2e52f6cb8..4add90897 100644 --- a/src/util/vulkan_device.cpp +++ b/src/util/vulkan_device.cpp @@ -20,6 +20,7 @@ #include "common/small_string.h" #include "fmt/format.h" +#include "xxhash.h" #include #include @@ -39,6 +40,13 @@ struct VK_PIPELINE_CACHE_HEADER }; #pragma pack(pop) +static VkAttachmentLoadOp GetLoadOpForTexture(const GPUTexture* tex) +{ + static constexpr VkAttachmentLoadOp ops[3] = {VK_ATTACHMENT_LOAD_OP_LOAD, VK_ATTACHMENT_LOAD_OP_CLEAR, + VK_ATTACHMENT_LOAD_OP_DONT_CARE}; + return ops[static_cast(tex->GetState())]; +} + // Tweakables enum : u32 { @@ -84,6 +92,9 @@ const std::array(GPUTexture::Format::MaxCount)> Vulka static constexpr VkClearValue s_present_clear_color = {{{0.0f, 0.0f, 0.0f, 1.0f}}}; +// Handles are always 64-bit, even on 32-bit platforms. +static const VkRenderPass DYNAMIC_RENDERING_RENDER_PASS = reinterpret_cast(static_cast(-1LL)); + #ifdef _DEBUG static u32 s_debug_scope_depth = 0; #endif @@ -355,6 +366,10 @@ bool VulkanDevice::SelectDeviceExtensions(ExtensionList* extension_list, bool en m_optional_extensions.vk_ext_attachment_feedback_loop_layout = SupportsExtension(VK_EXT_ATTACHMENT_FEEDBACK_LOOP_LAYOUT_EXTENSION_NAME, false); m_optional_extensions.vk_khr_driver_properties = SupportsExtension(VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, false); + m_optional_extensions.vk_khr_dynamic_rendering = + SupportsExtension(VK_KHR_DEPTH_STENCIL_RESOLVE_EXTENSION_NAME, false) && + SupportsExtension(VK_KHR_CREATE_RENDERPASS_2_EXTENSION_NAME, false) && + SupportsExtension(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME, false); m_optional_extensions.vk_khr_push_descriptor = SupportsExtension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, false); #ifdef _WIN32 @@ -501,11 +516,15 @@ bool VulkanDevice::CreateDevice(VkSurfaceKHR surface, bool enable_validation_lay VK_FALSE}; VkPhysicalDeviceAttachmentFeedbackLoopLayoutFeaturesEXT attachment_feedback_loop_feature = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ATTACHMENT_FEEDBACK_LOOP_LAYOUT_FEATURES_EXT, nullptr, VK_TRUE}; + VkPhysicalDeviceDynamicRenderingFeatures dynamic_rendering_feature = { + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES, nullptr, VK_TRUE}; if (m_optional_extensions.vk_ext_rasterization_order_attachment_access) Vulkan::AddPointerToChain(&device_info, &rasterization_order_access_feature); if (m_optional_extensions.vk_ext_attachment_feedback_loop_layout) Vulkan::AddPointerToChain(&device_info, &attachment_feedback_loop_feature); + if (m_optional_extensions.vk_khr_dynamic_rendering) + Vulkan::AddPointerToChain(&device_info, &dynamic_rendering_feature); VkResult res = vkCreateDevice(m_physical_device, &device_info, nullptr, &m_device); if (res != VK_SUCCESS) @@ -545,12 +564,16 @@ void VulkanDevice::ProcessDeviceExtensions() VK_FALSE}; VkPhysicalDeviceAttachmentFeedbackLoopLayoutFeaturesEXT attachment_feedback_loop_feature = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ATTACHMENT_FEEDBACK_LOOP_LAYOUT_FEATURES_EXT, nullptr, VK_FALSE}; + VkPhysicalDeviceDynamicRenderingFeatures dynamic_rendering_feature = { + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES, nullptr, VK_TRUE}; // add in optional feature structs if (m_optional_extensions.vk_ext_rasterization_order_attachment_access) Vulkan::AddPointerToChain(&features2, &rasterization_order_access_feature); if (m_optional_extensions.vk_ext_attachment_feedback_loop_layout) Vulkan::AddPointerToChain(&features2, &attachment_feedback_loop_feature); + if (m_optional_extensions.vk_khr_dynamic_rendering) + Vulkan::AddPointerToChain(&features2, &dynamic_rendering_feature); // query vkGetPhysicalDeviceFeatures2(m_physical_device, &features2); @@ -560,6 +583,7 @@ void VulkanDevice::ProcessDeviceExtensions() (rasterization_order_access_feature.rasterizationOrderColorAttachmentAccess == VK_TRUE); m_optional_extensions.vk_ext_attachment_feedback_loop_layout &= (attachment_feedback_loop_feature.attachmentFeedbackLoopLayout == VK_TRUE); + m_optional_extensions.vk_khr_dynamic_rendering &= (dynamic_rendering_feature.dynamicRendering == VK_TRUE); VkPhysicalDeviceProperties2 properties2 = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, nullptr, {}}; VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor_properties = { @@ -584,6 +608,8 @@ void VulkanDevice::ProcessDeviceExtensions() m_optional_extensions.vk_ext_rasterization_order_attachment_access ? "supported" : "NOT supported"); Log_InfoPrintf("VK_EXT_attachment_feedback_loop_layout is %s", m_optional_extensions.vk_ext_attachment_feedback_loop_layout ? "supported" : "NOT supported"); + Log_InfoPrintf("VK_KHR_dynamic_rendering is %s", + m_optional_extensions.vk_khr_dynamic_rendering ? "supported" : "NOT supported"); Log_InfoPrintf("VK_KHR_push_descriptor is %s", m_optional_extensions.vk_khr_push_descriptor ? "supported" : "NOT supported"); } @@ -794,33 +820,110 @@ void VulkanDevice::DestroyPersistentDescriptorPool() vkDestroyDescriptorPool(m_device, m_global_descriptor_pool, nullptr); } -VkRenderPass VulkanDevice::GetRenderPass(VkFormat color_format, VkFormat depth_format, VkSampleCountFlagBits samples, - VkAttachmentLoadOp color_load_op /* = VK_ATTACHMENT_LOAD_OP_LOAD */, - VkAttachmentStoreOp color_store_op /* = VK_ATTACHMENT_STORE_OP_STORE */, - VkAttachmentLoadOp depth_load_op /* = VK_ATTACHMENT_LOAD_OP_LOAD */, - VkAttachmentStoreOp depth_store_op /* = VK_ATTACHMENT_STORE_OP_STORE */, - VkAttachmentLoadOp stencil_load_op /* = VK_ATTACHMENT_LOAD_OP_DONT_CARE */, - VkAttachmentStoreOp stencil_store_op /* = VK_ATTACHMENT_STORE_OP_DONT_CARE */, +bool VulkanDevice::RenderPassCacheKey::operator==(const RenderPassCacheKey& rhs) const +{ + return (std::memcmp(this, &rhs, sizeof(*this)) == 0); +} + +bool VulkanDevice::RenderPassCacheKey::operator!=(const RenderPassCacheKey& rhs) const +{ + return (std::memcmp(this, &rhs, sizeof(*this)) != 0); +} + +size_t VulkanDevice::RenderPassCacheKeyHash::operator()(const RenderPassCacheKey& rhs) const +{ + if constexpr (sizeof(void*) == 8) + return XXH3_64bits(&rhs, sizeof(rhs)); + else + return XXH32(&rhs, sizeof(rhs), 0x1337); +} + +VkRenderPass VulkanDevice::GetRenderPass(const GPUPipeline::GraphicsConfig& config) +{ + RenderPassCacheKey key; + std::memset(&key, 0, sizeof(key)); + + for (u32 i = 0; i < MAX_RENDER_TARGETS; i++) + { + if (config.color_formats[i] == GPUTexture::Format::Unknown) + break; + + key.color[i].format = static_cast(config.color_formats[i]); + key.color[i].load_op = VK_ATTACHMENT_LOAD_OP_LOAD; + key.color[i].store_op = VK_ATTACHMENT_STORE_OP_STORE; + } + + if (config.depth_format != GPUTexture::Format::Unknown) + { + key.depth_format = static_cast(config.depth_format); + key.depth_load_op = VK_ATTACHMENT_LOAD_OP_LOAD; + key.depth_store_op = VK_ATTACHMENT_STORE_OP_STORE; + + const bool stencil = GPUTexture::IsDepthStencilFormat(config.depth_format); + key.stencil_load_op = stencil ? VK_ATTACHMENT_LOAD_OP_LOAD : VK_ATTACHMENT_LOAD_OP_DONT_CARE; + key.stencil_store_op = stencil ? VK_ATTACHMENT_STORE_OP_STORE : VK_ATTACHMENT_STORE_OP_DONT_CARE; + } + + // key.color_feedback_loop = false; + // key.depth_sampling = false; + + key.samples = static_cast(config.samples); + + const auto it = m_render_pass_cache.find(key); + return (it != m_render_pass_cache.end()) ? it->second : CreateCachedRenderPass(key); +} + +VkRenderPass VulkanDevice::GetRenderPass(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, bool color_feedback_loop /* = false */, bool depth_sampling /* = false */) { - RenderPassCacheKey key = {}; - key.color_format = color_format; - key.depth_format = depth_format; - key.samples = samples; - key.color_load_op = color_load_op; - key.color_store_op = color_store_op; - key.depth_load_op = depth_load_op; - key.depth_store_op = depth_store_op; - key.stencil_load_op = stencil_load_op; - key.stencil_store_op = stencil_store_op; + RenderPassCacheKey key; + std::memset(&key, 0, sizeof(key)); + + static_assert(static_cast(GPUTexture::Format::Unknown) == 0); + + for (u32 i = 0; i < num_rts; i++) + { + key.color[i].format = static_cast(rts[i]->GetFormat()); + key.color[i].load_op = GetLoadOpForTexture(rts[i]); + key.color[i].store_op = VK_ATTACHMENT_STORE_OP_STORE; + key.samples = static_cast(rts[i]->GetSamples()); + } + + if (ds) + { + const VkAttachmentLoadOp load_op = GetLoadOpForTexture(ds); + key.depth_format = static_cast(ds->GetFormat()); + key.depth_load_op = load_op; + key.depth_store_op = VK_ATTACHMENT_STORE_OP_STORE; + + const bool stencil = GPUTexture::IsDepthStencilFormat(ds->GetFormat()); + key.stencil_load_op = stencil ? load_op : VK_ATTACHMENT_LOAD_OP_DONT_CARE; + key.stencil_store_op = stencil ? VK_ATTACHMENT_STORE_OP_STORE : VK_ATTACHMENT_STORE_OP_DONT_CARE; + + key.samples = static_cast(ds->GetSamples()); + } + key.color_feedback_loop = color_feedback_loop; key.depth_sampling = depth_sampling; - auto it = m_render_pass_cache.find(key.key); - if (it != m_render_pass_cache.end()) - return it->second; + const auto it = m_render_pass_cache.find(key); + return (it != m_render_pass_cache.end()) ? it->second : CreateCachedRenderPass(key); +} - return CreateCachedRenderPass(key); +VkRenderPass VulkanDevice::GetSwapChainRenderPass(GPUTexture::Format format, VkAttachmentLoadOp load_op) +{ + DebugAssert(format != GPUTexture::Format::Unknown); + + RenderPassCacheKey key; + std::memset(&key, 0, sizeof(key)); + + key.color[0].format = static_cast(format); + key.color[0].load_op = load_op; + key.color[0].store_op = VK_ATTACHMENT_STORE_OP_STORE; + key.samples = 1; + + const auto it = m_render_pass_cache.find(key); + return (it != m_render_pass_cache.end()) ? it->second : CreateCachedRenderPass(key); } VkRenderPass VulkanDevice::GetRenderPassForRestarting(VkRenderPass pass) @@ -830,19 +933,22 @@ VkRenderPass VulkanDevice::GetRenderPassForRestarting(VkRenderPass pass) if (it.second != pass) continue; - RenderPassCacheKey modified_key; - modified_key.key = it.first; - if (modified_key.color_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) - modified_key.color_load_op = VK_ATTACHMENT_LOAD_OP_LOAD; + RenderPassCacheKey modified_key = it.first; + for (u32 i = 0; i < MAX_RENDER_TARGETS; i++) + { + if (modified_key.color[i].load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) + modified_key.color[i].load_op = VK_ATTACHMENT_LOAD_OP_LOAD; + } + if (modified_key.depth_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) modified_key.depth_load_op = VK_ATTACHMENT_LOAD_OP_LOAD; if (modified_key.stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) modified_key.stencil_load_op = VK_ATTACHMENT_LOAD_OP_LOAD; - if (modified_key.key == it.first) + if (modified_key == it.first) return pass; - auto fit = m_render_pass_cache.find(modified_key.key); + auto fit = m_render_pass_cache.find(modified_key); if (fit != m_render_pass_cache.end()) return fit->second; @@ -1261,12 +1367,9 @@ void VulkanDevice::SubmitCommandBufferAndRestartRenderPass(const char* reason) if (InRenderPass()) EndRenderPass(); - VulkanFramebuffer* fb = m_current_framebuffer; VulkanPipeline* pl = m_current_pipeline; SubmitCommandBuffer(false, "%s", reason); - if (fb) - SetFramebuffer(fb); SetPipeline(pl); BeginRenderPass(); } @@ -1404,19 +1507,25 @@ VkRenderPass VulkanDevice::CreateCachedRenderPass(RenderPassCacheKey key) VkAttachmentReference* input_reference_ptr = nullptr; VkSubpassDependency subpass_dependency; VkSubpassDependency* subpass_dependency_ptr = nullptr; - std::array attachments; + std::array attachments; u32 num_attachments = 0; - if (key.color_format != VK_FORMAT_UNDEFINED) + + for (u32 i = 0; i < MAX_RENDER_TARGETS; i++) { + if (key.color[i].format == static_cast(GPUTexture::Format::Unknown)) + break; + const VkImageLayout layout = key.color_feedback_loop ? (UseFeedbackLoopLayout() ? VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT : VK_IMAGE_LAYOUT_GENERAL) : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - attachments[num_attachments] = {0, - static_cast(key.color_format), + + const RenderPassCacheKey::RenderTarget key_rt = key.color[i]; + attachments[num_attachments] = {i, + TEXTURE_FORMAT_MAPPING[key_rt.format], static_cast(key.samples), - static_cast(key.color_load_op), - static_cast(key.color_store_op), + static_cast(key_rt.load_op), + static_cast(key_rt.store_op), VK_ATTACHMENT_LOAD_OP_DONT_CARE, VK_ATTACHMENT_STORE_OP_DONT_CARE, layout, @@ -1453,14 +1562,17 @@ VkRenderPass VulkanDevice::CreateCachedRenderPass(RenderPassCacheKey key) num_attachments++; } - if (key.depth_format != VK_FORMAT_UNDEFINED) + + const u32 num_rts = num_attachments; + + if (key.depth_format != static_cast(GPUTexture::Format::Unknown)) { const VkImageLayout layout = key.depth_sampling ? (UseFeedbackLoopLayout() ? VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT : VK_IMAGE_LAYOUT_GENERAL) : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; attachments[num_attachments] = {0, - static_cast(key.depth_format), + static_cast(TEXTURE_FORMAT_MAPPING[key.depth_format]), static_cast(key.samples), static_cast(key.depth_load_op), static_cast(key.depth_store_op), @@ -1480,10 +1592,10 @@ VkRenderPass VulkanDevice::CreateCachedRenderPass(RenderPassCacheKey key) 0; const VkSubpassDescription subpass = {subpass_flags, VK_PIPELINE_BIND_POINT_GRAPHICS, - input_reference_ptr ? 1u : 0u, - input_reference_ptr ? input_reference_ptr : nullptr, - color_reference_ptr ? 1u : 0u, - color_reference_ptr ? color_reference_ptr : nullptr, + input_reference_ptr ? num_rts : 0u, + input_reference_ptr, + num_rts, + color_reference_ptr, nullptr, depth_reference_ptr, 0, @@ -1506,10 +1618,37 @@ VkRenderPass VulkanDevice::CreateCachedRenderPass(RenderPassCacheKey key) return VK_NULL_HANDLE; } - m_render_pass_cache.emplace(key.key, pass); + m_render_pass_cache.emplace(key, pass); return pass; } +VkFramebuffer VulkanDevice::CreateFramebuffer(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, u32 flags) +{ + VulkanDevice& dev = VulkanDevice::GetInstance(); + VkRenderPass render_pass = dev.GetRenderPass(rts, num_rts, ds, false, false); + + const GPUTexture* rt_or_ds = (num_rts > 0) ? rts[0] : ds; + DebugAssert(rt_or_ds); + + Vulkan::FramebufferBuilder fbb; + fbb.SetRenderPass(render_pass); + fbb.SetSize(rt_or_ds->GetWidth(), rt_or_ds->GetHeight(), 1); + for (u32 i = 0; i < num_rts; i++) + fbb.AddAttachment(static_cast(rts[i])->GetView()); + if (ds) + fbb.AddAttachment(static_cast(ds)->GetView()); + + return fbb.Create(dev.m_device, false); +} + +void VulkanDevice::DestroyFramebuffer(VkFramebuffer fbo) +{ + if (fbo == VK_NULL_HANDLE) + return; + + VulkanDevice::GetInstance().DeferFramebufferDestruction(fbo); +} + void VulkanDevice::GetAdapterAndModeList(AdapterAndModeList* ret, VkInstance instance) { GPUList gpus = EnumerateGPUs(instance); @@ -2098,7 +2237,7 @@ bool VulkanDevice::BeginPresent(bool frame_skip) void VulkanDevice::EndPresent() { - DebugAssert(InRenderPass() && !m_current_framebuffer); + DebugAssert(InRenderPass() && m_num_current_render_targets == 0 && !m_current_depth_target); EndRenderPass(); VkCommandBuffer cmdbuf = GetCurrentCommandBuffer(); @@ -2196,7 +2335,7 @@ bool VulkanDevice::CheckFeatures(FeatureMask disabled_features) m_features.dual_source_blend = !(disabled_features & FEATURE_MASK_DUAL_SOURCE_BLEND) && m_device_features.dualSrcBlend; - m_features.framebuffer_fetch = /*!(disabled_features & FEATURE_MASK_FRAMEBUFFER_FETCH) && */false; + m_features.framebuffer_fetch = /*!(disabled_features & FEATURE_MASK_FRAMEBUFFER_FETCH) && */ false; if (!m_features.dual_source_blend) Log_WarningPrintf("Vulkan driver is missing dual-source blending. This will have an impact on performance."); @@ -2354,25 +2493,22 @@ void VulkanDevice::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u void VulkanDevice::ClearRenderTarget(GPUTexture* t, u32 c) { GPUDevice::ClearRenderTarget(t, c); - if (InRenderPass() && m_current_framebuffer && m_current_framebuffer->GetRT() == t) + if (InRenderPass() && IsRenderTargetBound(t)) EndRenderPass(); } void VulkanDevice::ClearDepth(GPUTexture* t, float d) { GPUDevice::ClearDepth(t, d); - if (InRenderPass() && m_current_framebuffer && m_current_framebuffer->GetDS() == t) + if (InRenderPass() && m_current_depth_target == t) EndRenderPass(); } void VulkanDevice::InvalidateRenderTarget(GPUTexture* t) { GPUDevice::InvalidateRenderTarget(t); - if (InRenderPass() && m_current_framebuffer && - (m_current_framebuffer->GetRT() == t || m_current_framebuffer->GetDS() == t)) - { + if (InRenderPass() && (t->IsRenderTarget() ? IsRenderTargetBound(t) : (m_current_depth_target == t))) EndRenderPass(); - } } bool VulkanDevice::CreateBuffers() @@ -2670,143 +2806,213 @@ void VulkanDevice::RenderBlankFrame() InvalidateCachedState(); } -void VulkanDevice::SetFramebuffer(GPUFramebuffer* fb) +void VulkanDevice::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds) { - if (m_current_framebuffer == fb) - return; + bool changed = (m_num_current_render_targets != num_rts || m_current_depth_target != ds); + bool needs_ds_clear = (ds && ds->IsClearedOrInvalidated()); + bool needs_rt_clear = false; - if (InRenderPass()) - EndRenderPass(); + m_current_depth_target = ds; + for (u32 i = 0; i < num_rts; i++) + { + VulkanTexture* const RT = static_cast(rts[i]); + changed |= m_current_render_targets[i] != RT; + m_current_render_targets[i] = RT; + needs_rt_clear |= RT->IsClearedOrInvalidated(); + } + for (u32 i = num_rts; i < m_num_current_render_targets; i++) + m_current_render_targets[i] = nullptr; + m_num_current_render_targets = num_rts; - m_current_framebuffer = static_cast(fb); + if (changed) + { + if (InRenderPass()) + EndRenderPass(); + + if (m_num_current_render_targets == 0 && !m_current_depth_target) + { + m_current_framebuffer = VK_NULL_HANDLE; + return; + } + + if (!m_optional_extensions.vk_khr_dynamic_rendering) + { + m_current_framebuffer = + m_framebuffer_manager.Lookup((m_num_current_render_targets > 0) ? m_current_render_targets.data() : nullptr, + m_num_current_render_targets, m_current_depth_target, 0); + if (m_current_framebuffer == VK_NULL_HANDLE) + { + Log_ErrorPrint("Failed to create framebuffer"); + return; + } + } + } + + // TODO: This could use vkCmdClearAttachments() instead. + if (needs_rt_clear || needs_ds_clear) + { + if (InRenderPass()) + EndRenderPass(); + } } void VulkanDevice::BeginRenderPass() { + // TODO: Stats DebugAssert(!InRenderPass()); - VkRenderPassBeginInfo bi = { - VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, nullptr, VK_NULL_HANDLE, VK_NULL_HANDLE, {}, 0u, nullptr}; - std::array clear_values; - - if (m_current_framebuffer) [[likely]] - { - VkFormat rt_format = VK_FORMAT_UNDEFINED; - VkFormat ds_format = VK_FORMAT_UNDEFINED; - VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; - VkAttachmentLoadOp rt_load_op = VK_ATTACHMENT_LOAD_OP_DONT_CARE; - VkAttachmentStoreOp rt_store_op = VK_ATTACHMENT_STORE_OP_DONT_CARE; - VkAttachmentLoadOp ds_load_op = VK_ATTACHMENT_LOAD_OP_DONT_CARE; - VkAttachmentStoreOp ds_store_op = VK_ATTACHMENT_STORE_OP_DONT_CARE; - - VulkanTexture* rt = static_cast(m_current_framebuffer->GetRT()); - if (rt) - { - samples = static_cast(rt->GetSamples()); - rt_format = rt->GetVkFormat(); - rt_store_op = VK_ATTACHMENT_STORE_OP_STORE; - - switch (rt->GetState()) - { - case GPUTexture::State::Cleared: - { - std::memcpy(clear_values[0].color.float32, rt->GetUNormClearColor().data(), - sizeof(clear_values[0].color.float32)); - rt_load_op = VK_ATTACHMENT_LOAD_OP_CLEAR; - rt->SetState(GPUTexture::State::Dirty); - bi.pClearValues = clear_values.data(); - bi.clearValueCount = 1; - } - break; - - case GPUTexture::State::Invalidated: - { - // already DONT_CARE - rt->SetState(GPUTexture::State::Dirty); - } - break; - - case GPUTexture::State::Dirty: - { - rt_load_op = VK_ATTACHMENT_LOAD_OP_LOAD; - } - break; - - default: - UnreachableCode(); - break; - } - - rt->TransitionToLayout(VulkanTexture::Layout::ColorAttachment); - rt->SetUseFenceCounter(GetCurrentFenceCounter()); - } - - VulkanTexture* ds = static_cast(m_current_framebuffer->GetDS()); - if (ds) - { - samples = static_cast(ds->GetSamples()); - ds_format = ds->GetVkFormat(); - ds_store_op = VK_ATTACHMENT_STORE_OP_STORE; - - switch (ds->GetState()) - { - case GPUTexture::State::Cleared: - { - const u32 idx = rt ? 1 : 0; - clear_values[idx].depthStencil = {ds->GetClearDepth(), 0u}; - ds_load_op = VK_ATTACHMENT_LOAD_OP_CLEAR; - ds->SetState(GPUTexture::State::Dirty); - bi.pClearValues = clear_values.data(); - bi.clearValueCount = idx + 1; - } - break; - - case GPUTexture::State::Invalidated: - { - // already DONT_CARE - ds->SetState(GPUTexture::State::Dirty); - } - break; - - case GPUTexture::State::Dirty: - { - ds_load_op = VK_ATTACHMENT_LOAD_OP_LOAD; - } - break; - - default: - UnreachableCode(); - break; - } - - ds->TransitionToLayout(VulkanTexture::Layout::DepthStencilAttachment); - ds->SetUseFenceCounter(GetCurrentFenceCounter()); - } - - bi.framebuffer = m_current_framebuffer->GetFramebuffer(); - bi.renderPass = m_current_render_pass = - GetRenderPass(rt_format, ds_format, samples, rt_load_op, rt_store_op, ds_load_op, ds_store_op); - bi.renderArea.extent = {m_current_framebuffer->GetWidth(), m_current_framebuffer->GetHeight()}; - } - else - { - // Re-rendering to swap chain. - bi.framebuffer = m_swap_chain->GetCurrentFramebuffer(); - bi.renderPass = m_current_render_pass = - GetRenderPass(m_swap_chain->GetImageFormat(), VK_FORMAT_UNDEFINED, VK_SAMPLE_COUNT_1_BIT, - VK_ATTACHMENT_LOAD_OP_LOAD, VK_ATTACHMENT_STORE_OP_STORE); - bi.renderArea.extent = {m_swap_chain->GetWidth(), m_swap_chain->GetHeight()}; - } - - DebugAssert(m_current_render_pass); - // All textures should be in shader read only optimal already, but just in case.. const u32 num_textures = GetActiveTexturesForLayout(m_current_pipeline_layout); for (u32 i = 0; i < num_textures; i++) m_current_textures[i]->TransitionToLayout(VulkanTexture::Layout::ShaderReadOnly); - // TODO: Stats - vkCmdBeginRenderPass(GetCurrentCommandBuffer(), &bi, VK_SUBPASS_CONTENTS_INLINE); + if (m_optional_extensions.vk_khr_dynamic_rendering) + { + VkRenderingInfoKHR ri = { + VK_STRUCTURE_TYPE_RENDERING_INFO_KHR, nullptr, 0u, {}, 1u, 0u, 0u, nullptr, nullptr, nullptr}; + + std::array attachments; + VkRenderingAttachmentInfoKHR depth_attachment; + + if (m_num_current_render_targets > 0 || m_current_depth_target) + { + ri.colorAttachmentCount = m_num_current_render_targets; + ri.pColorAttachments = (m_num_current_render_targets > 0) ? attachments.data() : nullptr; + + // set up clear values and transition targets + for (u32 i = 0; i < m_num_current_render_targets; i++) + { + VulkanTexture* const rt = static_cast(m_current_render_targets[i]); + rt->TransitionToLayout(VulkanTexture::Layout::ColorAttachment); + rt->SetUseFenceCounter(GetCurrentFenceCounter()); + + VkRenderingAttachmentInfo& ai = attachments[i]; + ai.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO_KHR; + ai.pNext = nullptr; + ai.imageView = rt->GetView(); + ai.imageLayout = rt->GetVkLayout(); + ai.resolveMode = VK_RESOLVE_MODE_NONE_KHR; + ai.resolveImageView = VK_NULL_HANDLE; + ai.resolveImageLayout = VK_IMAGE_LAYOUT_UNDEFINED; + ai.loadOp = GetLoadOpForTexture(rt); + ai.storeOp = VK_ATTACHMENT_STORE_OP_STORE; + + if (rt->GetState() == GPUTexture::State::Cleared) + { + std::memcpy(ai.clearValue.color.float32, rt->GetUNormClearColor().data(), + sizeof(ai.clearValue.color.float32)); + } + rt->SetState(GPUTexture::State::Dirty); + } + + if (VulkanTexture* const ds = static_cast(m_current_depth_target)) + { + ds->TransitionToLayout(VulkanTexture::Layout::DepthStencilAttachment); + ds->SetUseFenceCounter(GetCurrentFenceCounter()); + + depth_attachment.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO_KHR; + depth_attachment.pNext = nullptr; + depth_attachment.imageView = ds->GetView(); + depth_attachment.imageLayout = ds->GetVkLayout(); + depth_attachment.resolveMode = VK_RESOLVE_MODE_NONE_KHR; + depth_attachment.resolveImageView = VK_NULL_HANDLE; + depth_attachment.resolveImageLayout = VK_IMAGE_LAYOUT_UNDEFINED; + depth_attachment.loadOp = GetLoadOpForTexture(ds); + depth_attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; + ri.pDepthAttachment = &depth_attachment; + + if (ds->GetState() == GPUTexture::State::Cleared) + depth_attachment.clearValue.depthStencil = {ds->GetClearDepth(), 0u}; + + ds->SetState(GPUTexture::State::Dirty); + } + + const VulkanTexture* const rt_or_ds = static_cast( + (m_num_current_render_targets > 0) ? m_current_render_targets[0] : m_current_depth_target); + ri.renderArea = {{}, {rt_or_ds->GetWidth(), rt_or_ds->GetHeight()}}; + } + else + { + VkRenderingAttachmentInfo& ai = attachments[0]; + ai.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO_KHR; + ai.pNext = nullptr; + ai.imageView = m_swap_chain->GetCurrentImageView(); + ai.imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + ai.resolveMode = VK_RESOLVE_MODE_NONE_KHR; + ai.resolveImageView = VK_NULL_HANDLE; + ai.resolveImageLayout = VK_IMAGE_LAYOUT_UNDEFINED; + ai.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + ai.storeOp = VK_ATTACHMENT_STORE_OP_STORE; + + ri.colorAttachmentCount = 1; + ri.pColorAttachments = attachments.data(); + ri.renderArea = {{}, {m_swap_chain->GetWidth(), m_swap_chain->GetHeight()}}; + } + + m_current_render_pass = DYNAMIC_RENDERING_RENDER_PASS; + vkCmdBeginRenderingKHR(GetCurrentCommandBuffer(), &ri); + } + else + { + VkRenderPassBeginInfo bi = { + VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, nullptr, VK_NULL_HANDLE, VK_NULL_HANDLE, {}, 0u, nullptr}; + std::array clear_values; + + if (m_current_framebuffer != VK_NULL_HANDLE) + { + bi.framebuffer = m_current_framebuffer; + bi.renderPass = m_current_render_pass = GetRenderPass( + m_current_render_targets.data(), m_num_current_render_targets, m_current_depth_target, false, false); + if (bi.renderPass == VK_NULL_HANDLE) + { + Log_ErrorPrint("Failed to create render pass"); + return; + } + + // set up clear values and transition targets + for (u32 i = 0; i < m_num_current_render_targets; i++) + { + VulkanTexture* const rt = static_cast(m_current_render_targets[i]); + if (rt->GetState() == GPUTexture::State::Cleared) + { + std::memcpy(clear_values[i].color.float32, rt->GetUNormClearColor().data(), + sizeof(clear_values[i].color.float32)); + bi.pClearValues = clear_values.data(); + bi.clearValueCount = i + 1; + } + rt->SetState(GPUTexture::State::Dirty); + rt->TransitionToLayout(VulkanTexture::Layout::ColorAttachment); + rt->SetUseFenceCounter(GetCurrentFenceCounter()); + } + if (VulkanTexture* const ds = static_cast(m_current_depth_target)) + { + if (ds->GetState() == GPUTexture::State::Cleared) + { + clear_values[m_num_current_render_targets].depthStencil = {ds->GetClearDepth(), 0u}; + bi.pClearValues = clear_values.data(); + bi.clearValueCount = m_num_current_render_targets + 1; + } + ds->SetState(GPUTexture::State::Dirty); + ds->TransitionToLayout(VulkanTexture::Layout::DepthStencilAttachment); + ds->SetUseFenceCounter(GetCurrentFenceCounter()); + } + + const VulkanTexture* const rt_or_ds = static_cast( + (m_num_current_render_targets > 0) ? m_current_render_targets[0] : m_current_depth_target); + bi.renderArea.extent = {rt_or_ds->GetWidth(), rt_or_ds->GetHeight()}; + } + else + { + // Re-rendering to swap chain. + bi.framebuffer = m_swap_chain->GetCurrentFramebuffer(); + bi.renderPass = m_current_render_pass = + GetSwapChainRenderPass(m_swap_chain->GetWindowInfo().surface_format, VK_ATTACHMENT_LOAD_OP_LOAD); + bi.renderArea.extent = {m_swap_chain->GetWidth(), m_swap_chain->GetHeight()}; + } + + DebugAssert(m_current_render_pass); + vkCmdBeginRenderPass(GetCurrentCommandBuffer(), &bi, VK_SUBPASS_CONTENTS_INLINE); + } // If this is a new command buffer, bind the pipeline and such. if (m_dirty_flags & DIRTY_FLAG_INITIAL) @@ -2830,21 +3036,53 @@ void VulkanDevice::BeginSwapChainRenderPass() for (u32 i = 0; i < num_textures; i++) m_current_textures[i]->TransitionToLayout(VulkanTexture::Layout::ShaderReadOnly); - const VkRenderPass render_pass = - GetRenderPass(m_swap_chain->GetImageFormat(), VK_FORMAT_UNDEFINED, VK_SAMPLE_COUNT_1_BIT, - VK_ATTACHMENT_LOAD_OP_CLEAR, VK_ATTACHMENT_STORE_OP_STORE); - DebugAssert(render_pass); + if (m_optional_extensions.vk_khr_dynamic_rendering) + { + const VkRenderingAttachmentInfo ai = {VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO_KHR, + nullptr, + m_swap_chain->GetCurrentImageView(), + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + VK_RESOLVE_MODE_NONE_KHR, + VK_NULL_HANDLE, + VK_IMAGE_LAYOUT_UNDEFINED, + VK_ATTACHMENT_LOAD_OP_LOAD, + VK_ATTACHMENT_STORE_OP_STORE, + {}}; - const VkRenderPassBeginInfo rp = {VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, - nullptr, - render_pass, - m_swap_chain->GetCurrentFramebuffer(), - {{0, 0}, {m_swap_chain->GetWidth(), m_swap_chain->GetHeight()}}, - 1u, - &s_present_clear_color}; - vkCmdBeginRenderPass(GetCurrentCommandBuffer(), &rp, VK_SUBPASS_CONTENTS_INLINE); - m_current_render_pass = render_pass; - m_current_framebuffer = nullptr; + const VkRenderingInfoKHR ri = {VK_STRUCTURE_TYPE_RENDERING_INFO_KHR, + nullptr, + 0u, + {{}, {m_swap_chain->GetWidth(), m_swap_chain->GetHeight()}}, + 1u, + 0u, + 1u, + &ai, + nullptr, + nullptr}; + + m_current_render_pass = DYNAMIC_RENDERING_RENDER_PASS; + vkCmdBeginRenderingKHR(GetCurrentCommandBuffer(), &ri); + } + else + { + m_current_render_pass = + GetSwapChainRenderPass(m_swap_chain->GetWindowInfo().surface_format, VK_ATTACHMENT_LOAD_OP_CLEAR); + DebugAssert(m_current_render_pass); + + const VkRenderPassBeginInfo rp = {VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + nullptr, + m_current_render_pass, + m_swap_chain->GetCurrentFramebuffer(), + {{0, 0}, {m_swap_chain->GetWidth(), m_swap_chain->GetHeight()}}, + 1u, + &s_present_clear_color}; + vkCmdBeginRenderPass(GetCurrentCommandBuffer(), &rp, VK_SUBPASS_CONTENTS_INLINE); + } + + m_num_current_render_targets = 0; + std::memset(m_current_render_targets.data(), 0, sizeof(m_current_render_targets)); + m_current_depth_target = nullptr; + m_current_framebuffer = VK_NULL_HANDLE; // Clear pipeline, it's likely incompatible. m_current_pipeline = nullptr; @@ -2860,32 +3098,11 @@ void VulkanDevice::EndRenderPass() DebugAssert(m_current_render_pass != VK_NULL_HANDLE); // TODO: stats - m_current_render_pass = VK_NULL_HANDLE; - - vkCmdEndRenderPass(GetCurrentCommandBuffer()); -} - -void VulkanDevice::UnbindFramebuffer(VulkanFramebuffer* fb) -{ - if (m_current_framebuffer != fb) - return; - - if (InRenderPass()) - EndRenderPass(); - m_current_framebuffer = nullptr; -} - -void VulkanDevice::UnbindFramebuffer(VulkanTexture* tex) -{ - if (!m_current_framebuffer) - return; - - if (m_current_framebuffer->GetRT() != tex && m_current_framebuffer->GetDS() != tex) - return; - - if (InRenderPass()) - EndRenderPass(); - m_current_framebuffer = nullptr; + VkCommandBuffer cmdbuf = GetCurrentCommandBuffer(); + if (std::exchange(m_current_render_pass, VK_NULL_HANDLE) == DYNAMIC_RENDERING_RENDER_PASS) + vkCmdEndRenderingKHR(cmdbuf); + else + vkCmdEndRenderPass(GetCurrentCommandBuffer()); } void VulkanDevice::SetPipeline(GPUPipeline* pipeline) @@ -2928,10 +3145,20 @@ void VulkanDevice::InvalidateCachedState() { m_dirty_flags = ALL_DIRTY_STATE; m_current_render_pass = VK_NULL_HANDLE; - m_current_framebuffer = nullptr; m_current_pipeline = nullptr; } +bool VulkanDevice::IsRenderTargetBound(const GPUTexture* tex) const +{ + for (u32 i = 0; i < m_num_current_render_targets; i++) + { + if (m_current_render_targets[i] == tex) + return true; + } + + return false; +} + VkPipelineLayout VulkanDevice::GetCurrentVkPipelineLayout() const { return m_pipeline_layouts[static_cast(m_current_pipeline_layout)]; @@ -3008,6 +3235,31 @@ void VulkanDevice::UnbindTexture(VulkanTexture* tex) m_dirty_flags |= DIRTY_FLAG_TEXTURES_OR_SAMPLERS; } } + + if (tex->IsRenderTarget()) + { + for (u32 i = 0; i < m_num_current_render_targets; i++) + { + if (m_current_render_targets[i] == tex) + { + Log_WarningPrint("Unbinding current RT"); + SetRenderTargets(nullptr, 0, m_current_depth_target); + break; + } + } + + m_framebuffer_manager.RemoveRTReferences(tex); + } + else if (tex->IsDepthStencil()) + { + if (m_current_depth_target == tex) + { + Log_WarningPrint("Unbinding current DS"); + SetRenderTargets(nullptr, 0, nullptr); + } + + m_framebuffer_manager.RemoveDSReferences(tex); + } } void VulkanDevice::UnbindTextureBuffer(VulkanTextureBuffer* buf) diff --git a/src/util/vulkan_device.h b/src/util/vulkan_device.h index 20b3d49cd..06f6cb0e2 100644 --- a/src/util/vulkan_device.h +++ b/src/util/vulkan_device.h @@ -4,6 +4,7 @@ #pragma once #include "gpu_device.h" +#include "gpu_framebuffer_manager.h" #include "gpu_texture.h" #include "vulkan_loader.h" #include "vulkan_stream_buffer.h" @@ -20,7 +21,6 @@ #include #include -class VulkanFramebuffer; class VulkanPipeline; class VulkanSwapChain; class VulkanTexture; @@ -45,6 +45,7 @@ public: bool vk_ext_attachment_feedback_loop_layout : 1; bool vk_ext_full_screen_exclusive : 1; bool vk_khr_driver_properties : 1; + bool vk_khr_dynamic_rendering : 1; bool vk_khr_push_descriptor : 1; }; @@ -87,8 +88,6 @@ public: void ClearDepth(GPUTexture* t, float d) override; void InvalidateRenderTarget(GPUTexture* t) override; - std::unique_ptr CreateFramebuffer(GPUTexture* rt_or_ds, GPUTexture* ds = nullptr) override; - std::unique_ptr CreateShaderFromBinary(GPUShaderStage stage, std::span data) override; std::unique_ptr CreateShaderFromSource(GPUShaderStage stage, const std::string_view& source, const char* entry_point, DynamicHeapArray* out_binary) override; @@ -106,7 +105,7 @@ public: void PushUniformBuffer(const void* data, u32 data_size) override; void* MapUniformBuffer(u32 size) override; void UnmapUniformBuffer(u32 size) override; - void SetFramebuffer(GPUFramebuffer* fb) override; + void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds) override; void SetPipeline(GPUPipeline* pipeline) override; void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override; void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override; @@ -156,14 +155,10 @@ public: void WaitForGPUIdle(); // Creates a simple render pass. - VkRenderPass GetRenderPass(VkFormat color_format, VkFormat depth_format, VkSampleCountFlagBits samples, - VkAttachmentLoadOp color_load_op = VK_ATTACHMENT_LOAD_OP_LOAD, - VkAttachmentStoreOp color_store_op = VK_ATTACHMENT_STORE_OP_STORE, - VkAttachmentLoadOp depth_load_op = VK_ATTACHMENT_LOAD_OP_LOAD, - VkAttachmentStoreOp depth_store_op = VK_ATTACHMENT_STORE_OP_STORE, - VkAttachmentLoadOp stencil_load_op = VK_ATTACHMENT_LOAD_OP_DONT_CARE, - VkAttachmentStoreOp stencil_store_op = VK_ATTACHMENT_STORE_OP_DONT_CARE, - bool color_feedback_loop = false, bool depth_sampling = false); + VkRenderPass GetRenderPass(const GPUPipeline::GraphicsConfig& config); + VkRenderPass GetRenderPass(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, bool color_feedback_loop = false, + bool depth_sampling = false); + VkRenderPass GetSwapChainRenderPass(GPUTexture::Format format, VkAttachmentLoadOp load_op); // Gets a non-clearing version of the specified render pass. Slow, don't call in hot path. VkRenderPass GetRenderPassForRestarting(VkRenderPass pass); @@ -213,7 +208,6 @@ public: void SubmitCommandBuffer(bool wait_for_completion, const char* reason, ...); void SubmitCommandBufferAndRestartRenderPass(const char* reason); - void UnbindFramebuffer(VulkanFramebuffer* fb); void UnbindFramebuffer(VulkanTexture* tex); void UnbindPipeline(VulkanPipeline* pl); void UnbindTexture(VulkanTexture* tex); @@ -239,24 +233,32 @@ private: DIRTY_FLAG_INITIAL | DIRTY_FLAG_PIPELINE_LAYOUT | DIRTY_FLAG_DYNAMIC_OFFSETS | DIRTY_FLAG_TEXTURES_OR_SAMPLERS, }; - union RenderPassCacheKey + struct RenderPassCacheKey { - struct + struct RenderTarget { - u32 color_format : 8; - u32 depth_format : 8; - u32 samples : 4; - u32 color_load_op : 2; - u32 color_store_op : 1; - u32 depth_load_op : 2; - u32 depth_store_op : 1; - u32 stencil_load_op : 2; - u32 stencil_store_op : 1; - u32 color_feedback_loop : 1; - u32 depth_sampling : 1; + u8 format : 5; + u8 load_op : 2; + u8 store_op : 1; }; + RenderTarget color[MAX_RENDER_TARGETS]; - u32 key; + u8 depth_format : 5; + u8 depth_load_op : 2; + u8 depth_store_op : 1; + u8 stencil_load_op : 2; + u8 stencil_store_op : 1; + u8 depth_sampling : 1; + u8 color_feedback_loop : 1; + u8 samples; + + bool operator==(const RenderPassCacheKey& rhs) const; + bool operator!=(const RenderPassCacheKey& rhs) const; + }; + + struct RenderPassCacheKeyHash + { + size_t operator()(const RenderPassCacheKey& rhs) const; }; struct CommandBuffer @@ -332,6 +334,8 @@ private: /// Set dirty flags on everything to force re-bind at next draw time. void InvalidateCachedState(); + bool IsRenderTargetBound(const GPUTexture* tex) const; + /// Applies any changed state. VkPipelineLayout GetCurrentVkPipelineLayout() const; void SetInitialPipelineState(); @@ -349,6 +353,8 @@ private: bool InRenderPass(); VkRenderPass CreateCachedRenderPass(RenderPassCacheKey key); + static VkFramebuffer CreateFramebuffer(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, u32 flags); + static void DestroyFramebuffer(VkFramebuffer fbo); void BeginCommandBuffer(u32 index); void WaitForCommandBufferCompletion(u32 index); @@ -400,7 +406,8 @@ private: QueuedPresent m_queued_present = {}; - std::unordered_map m_render_pass_cache; + std::unordered_map m_render_pass_cache; + GPUFramebufferManager m_framebuffer_manager; VkPipelineCache m_pipeline_cache = VK_NULL_HANDLE; // TODO: Move to static? @@ -438,7 +445,10 @@ private: // Which bindings/state has to be updated before the next draw. u32 m_dirty_flags = ALL_DIRTY_STATE; - VulkanFramebuffer* m_current_framebuffer = nullptr; + u32 m_num_current_render_targets = 0; + std::array m_current_render_targets = {}; + GPUTexture* m_current_depth_target = nullptr; + VkFramebuffer m_current_framebuffer = VK_NULL_HANDLE; VkRenderPass m_current_render_pass = VK_NULL_HANDLE; VulkanPipeline* m_current_pipeline = nullptr; diff --git a/src/util/vulkan_entry_points.inl b/src/util/vulkan_entry_points.inl index 9fa2692a6..f74f1e154 100644 --- a/src/util/vulkan_entry_points.inl +++ b/src/util/vulkan_entry_points.inl @@ -236,6 +236,10 @@ VULKAN_DEVICE_ENTRY_POINT(vkAcquireFullScreenExclusiveModeEXT, false) VULKAN_DEVICE_ENTRY_POINT(vkReleaseFullScreenExclusiveModeEXT, false) #endif +// VK_KHR_dynamic_rendering +VULKAN_DEVICE_ENTRY_POINT(vkCmdBeginRenderingKHR, false) +VULKAN_DEVICE_ENTRY_POINT(vkCmdEndRenderingKHR, false) + // VK_KHR_push_descriptor VULKAN_DEVICE_ENTRY_POINT(vkCmdPushDescriptorSetKHR, false) diff --git a/src/util/vulkan_pipeline.cpp b/src/util/vulkan_pipeline.cpp index 744e1a824..38c34c847 100644 --- a/src/util/vulkan_pipeline.cpp +++ b/src/util/vulkan_pipeline.cpp @@ -199,11 +199,31 @@ std::unique_ptr VulkanDevice::CreatePipeline(const GPUPipeline::Gra gpb.SetPipelineLayout(m_pipeline_layouts[static_cast(config.layout)]); - const VkRenderPass render_pass = GetRenderPass(TEXTURE_FORMAT_MAPPING[static_cast(config.color_format)], - TEXTURE_FORMAT_MAPPING[static_cast(config.depth_format)], - static_cast(config.samples)); - DebugAssert(render_pass); - gpb.SetRenderPass(render_pass, 0); + if (m_optional_extensions.vk_khr_dynamic_rendering) + { + gpb.SetDynamicRendering(); + + for (u32 i = 0; i < MAX_RENDER_TARGETS; i++) + { + if (config.color_formats[i] == GPUTexture::Format::Unknown) + break; + + gpb.AddDynamicRenderingColorAttachment( + VulkanDevice::TEXTURE_FORMAT_MAPPING[static_cast(config.color_formats[i])]); + } + + if (config.depth_format != GPUTexture::Format::Unknown) + { + gpb.SetDynamicRenderingDepthAttachment(VulkanDevice::TEXTURE_FORMAT_MAPPING[static_cast(config.depth_format)], + VK_FORMAT_UNDEFINED); + } + } + else + { + const VkRenderPass render_pass = GetRenderPass(config); + DebugAssert(render_pass != VK_NULL_HANDLE); + gpb.SetRenderPass(render_pass, 0); + } const VkPipeline pipeline = gpb.Create(m_device, m_pipeline_cache, false); if (!pipeline) diff --git a/src/util/vulkan_swap_chain.cpp b/src/util/vulkan_swap_chain.cpp index 98df41e28..e12ecc873 100644 --- a/src/util/vulkan_swap_chain.cpp +++ b/src/util/vulkan_swap_chain.cpp @@ -488,8 +488,7 @@ bool VulkanSwapChain::CreateSwapChain() res = vkGetSwapchainImagesKHR(dev.GetVulkanDevice(), m_swap_chain, &image_count, images.data()); Assert(res == VK_SUCCESS); - VkRenderPass render_pass = - dev.GetRenderPass(m_format, VK_FORMAT_UNDEFINED, VK_SAMPLE_COUNT_1_BIT, VK_ATTACHMENT_LOAD_OP_CLEAR); + VkRenderPass render_pass = dev.GetSwapChainRenderPass(m_window_info.surface_format, VK_ATTACHMENT_LOAD_OP_CLEAR); if (render_pass == VK_NULL_HANDLE) return false; diff --git a/src/util/vulkan_swap_chain.h b/src/util/vulkan_swap_chain.h index e4090989f..8f55a6856 100644 --- a/src/util/vulkan_swap_chain.h +++ b/src/util/vulkan_swap_chain.h @@ -40,6 +40,7 @@ public: ALWAYS_INLINE u32 GetImageCount() const { return static_cast(m_images.size()); } ALWAYS_INLINE VkFormat GetImageFormat() const { return m_format; } ALWAYS_INLINE VkImage GetCurrentImage() const { return m_images[m_current_image].image; } + ALWAYS_INLINE VkImageView GetCurrentImageView() const { return m_images[m_current_image].view; } ALWAYS_INLINE VkFramebuffer GetCurrentFramebuffer() const { return m_images[m_current_image].framebuffer; } ALWAYS_INLINE VkSemaphore GetImageAvailableSemaphore() const { diff --git a/src/util/vulkan_texture.cpp b/src/util/vulkan_texture.cpp index e447bc68d..74593d801 100644 --- a/src/util/vulkan_texture.cpp +++ b/src/util/vulkan_texture.cpp @@ -961,54 +961,6 @@ std::unique_ptr VulkanDevice::CreateSampler(const GPUSampler::Config return std::unique_ptr(new VulkanSampler(vsampler)); } -VulkanFramebuffer::VulkanFramebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height, VkFramebuffer fb) - : GPUFramebuffer(rt, ds, width, height), m_framebuffer(fb) -{ -} - -VulkanFramebuffer::~VulkanFramebuffer() -{ - VulkanDevice::GetInstance().DeferFramebufferDestruction(m_framebuffer); -} - -void VulkanFramebuffer::SetDebugName(const std::string_view& name) -{ - Vulkan::SetObjectName(VulkanDevice::GetInstance().GetVulkanDevice(), m_framebuffer, name); -} - -std::unique_ptr VulkanDevice::CreateFramebuffer(GPUTexture* rt_or_ds, GPUTexture* ds /*= nullptr*/) -{ - DebugAssert((rt_or_ds || ds) && (!rt_or_ds || rt_or_ds->IsRenderTarget() || (rt_or_ds->IsDepthStencil() && !ds))); - VulkanTexture* RT = static_cast((rt_or_ds && rt_or_ds->IsDepthStencil()) ? nullptr : rt_or_ds); - VulkanTexture* DS = static_cast((rt_or_ds && rt_or_ds->IsDepthStencil()) ? rt_or_ds : ds); - - const u32 width = RT ? RT->GetWidth() : DS->GetWidth(); - const u32 height = RT ? RT->GetHeight() : DS->GetHeight(); - - const VkRenderPass render_pass = - GetRenderPass(RT ? RT->GetVkFormat() : VK_FORMAT_UNDEFINED, DS ? DS->GetVkFormat() : VK_FORMAT_UNDEFINED, - static_cast(RT ? RT->GetSamples() : DS->GetSamples()), - RT ? VK_ATTACHMENT_LOAD_OP_LOAD : VK_ATTACHMENT_LOAD_OP_DONT_CARE, - RT ? VK_ATTACHMENT_STORE_OP_STORE : VK_ATTACHMENT_STORE_OP_DONT_CARE, - DS ? VK_ATTACHMENT_LOAD_OP_LOAD : VK_ATTACHMENT_LOAD_OP_DONT_CARE, - DS ? VK_ATTACHMENT_STORE_OP_STORE : VK_ATTACHMENT_STORE_OP_DONT_CARE); - DebugAssert(render_pass != VK_NULL_HANDLE); - - Vulkan::FramebufferBuilder fbb; - fbb.SetRenderPass(render_pass); - fbb.SetSize(width, height, 1); - if (RT) - fbb.AddAttachment(RT->GetView()); - if (DS) - fbb.AddAttachment(DS->GetView()); - - const VkFramebuffer fb = fbb.Create(m_device, false); - if (fb == VK_NULL_HANDLE) - return {}; - - return std::unique_ptr(new VulkanFramebuffer(RT, DS, width, height, fb)); -} - VulkanTextureBuffer::VulkanTextureBuffer(Format format, u32 size_in_elements) : GPUTextureBuffer(format, size_in_elements) { diff --git a/src/util/vulkan_texture.h b/src/util/vulkan_texture.h index 8cfc58f9f..0dc84670e 100644 --- a/src/util/vulkan_texture.h +++ b/src/util/vulkan_texture.h @@ -126,25 +126,6 @@ private: VkSampler m_sampler; }; -class VulkanFramebuffer final : public GPUFramebuffer -{ - friend VulkanDevice; - -public: - ~VulkanFramebuffer() override; - - ALWAYS_INLINE VkFramebuffer GetFramebuffer() const { return m_framebuffer; } - - void SetDebugName(const std::string_view& name) override; - - // TODO: Maybe render passes should be in here to avoid the map lookup... - -private: - VulkanFramebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height, VkFramebuffer fb); - - VkFramebuffer m_framebuffer; -}; - class VulkanTextureBuffer final : public GPUTextureBuffer { friend VulkanDevice;