GPUDevice: Get rid of framebuffer abstraction

This commit is contained in:
Stenzek 2023-12-04 15:47:18 +10:00
parent a9ee2a34d8
commit 3b2c70cda5
No known key found for this signature in database
47 changed files with 1404 additions and 1165 deletions

View file

@ -10,7 +10,7 @@
<PreprocessorDefinitions Condition="('$(Platform)'=='x64' Or '$(Platform)'=='ARM64')">ENABLE_MMAP_FASTMEM=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions Condition="('$(Platform)'=='x64' Or '$(Platform)'=='ARM64')">ENABLE_NEWREC=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>%(AdditionalIncludeDirectories);$(SolutionDir)dep\xxhash\include;$(SolutionDir)dep\zlib\include;$(SolutionDir)dep\rcheevos\include;$(SolutionDir)dep\rapidjson\include;$(SolutionDir)dep\discord-rpc\include</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>%(AdditionalIncludeDirectories);$(SolutionDir)dep\zlib\include;$(SolutionDir)dep\rcheevos\include;$(SolutionDir)dep\rapidjson\include;$(SolutionDir)dep\discord-rpc\include</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories Condition="'$(Platform)'!='ARM64'">%(AdditionalIncludeDirectories);$(SolutionDir)dep\rainterface</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories Condition="'$(Platform)'=='x64'">%(AdditionalIncludeDirectories);$(SolutionDir)dep\xbyak\xbyak</AdditionalIncludeDirectories>

View file

@ -1579,7 +1579,7 @@ bool GPU::CompileDisplayPipeline()
plconfig.rasterization = GPUPipeline::RasterizationState::GetNoCullState();
plconfig.depth = GPUPipeline::DepthState::GetNoTestsState();
plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState();
plconfig.color_format = g_gpu_device->HasSurface() ? g_gpu_device->GetWindowFormat() : GPUTexture::Format::RGBA8;
plconfig.SetTargetFormats(g_gpu_device->HasSurface() ? g_gpu_device->GetWindowFormat() : GPUTexture::Format::RGBA8);
plconfig.depth_format = GPUTexture::Format::Unknown;
plconfig.samples = 1;
plconfig.per_sample_shading = false;
@ -1670,9 +1670,10 @@ bool GPU::PresentDisplay()
return RenderDisplay(nullptr, draw_rect, true);
}
bool GPU::RenderDisplay(GPUFramebuffer* target, const Common::Rectangle<s32>& draw_rect, bool postfx)
bool GPU::RenderDisplay(GPUTexture* target, const Common::Rectangle<s32>& draw_rect, bool postfx)
{
GL_SCOPE_FMT("RenderDisplay: {}x{} at {},{}", draw_rect.left, draw_rect.top, draw_rect.GetWidth(), draw_rect.GetHeight());
GL_SCOPE_FMT("RenderDisplay: {}x{} at {},{}", draw_rect.left, draw_rect.top, draw_rect.GetWidth(),
draw_rect.GetHeight());
if (m_display_texture)
m_display_texture->MakeReadyForSampling();
@ -1716,8 +1717,7 @@ bool GPU::RenderDisplay(GPUFramebuffer* target, const Common::Rectangle<s32>& dr
break;
}
const GPUTexture::Format hdformat =
(target && target->GetRT()) ? target->GetRT()->GetFormat() : g_gpu_device->GetWindowFormat();
const GPUTexture::Format hdformat = target ? target->GetFormat() : g_gpu_device->GetWindowFormat();
const u32 target_width = target ? target->GetWidth() : g_gpu_device->GetWindowWidth();
const u32 target_height = target ? target->GetHeight() : g_gpu_device->GetWindowHeight();
const bool really_postfx = (postfx && HasDisplayTexture() && PostProcessing::IsActive() &&
@ -1725,12 +1725,12 @@ bool GPU::RenderDisplay(GPUFramebuffer* target, const Common::Rectangle<s32>& dr
if (really_postfx)
{
g_gpu_device->ClearRenderTarget(PostProcessing::GetInputTexture(), 0);
g_gpu_device->SetFramebuffer(PostProcessing::GetInputFramebuffer());
g_gpu_device->SetRenderTarget(PostProcessing::GetInputTexture());
}
else
{
if (target)
g_gpu_device->SetFramebuffer(target);
g_gpu_device->SetRenderTarget(target);
else if (!g_gpu_device->BeginPresent(false))
return false;
}
@ -2059,16 +2059,10 @@ bool GPU::RenderScreenshotToBuffer(u32 width, u32 height, const Common::Rectangl
if (!render_texture)
return false;
std::unique_ptr<GPUFramebuffer> render_fb = g_gpu_device->CreateFramebuffer(render_texture.get());
if (!render_fb)
return false;
g_gpu_device->ClearRenderTarget(render_texture.get(), 0);
// TODO: this should use copy shader instead.
RenderDisplay(render_fb.get(), draw_rect, postfx);
g_gpu_device->SetFramebuffer(nullptr);
RenderDisplay(render_texture.get(), draw_rect, postfx);
const u32 stride = GPUTexture::GetPixelSize(hdformat) * width;
out_pixels->resize(width * height);

View file

@ -23,7 +23,6 @@
class StateWrapper;
class GPUDevice;
class GPUFramebuffer;
class GPUTexture;
class GPUPipeline;
@ -585,7 +584,7 @@ protected:
float* out_top_padding, float* out_scale, float* out_x_scale,
bool apply_aspect_ratio = true) const;
bool RenderDisplay(GPUFramebuffer* target, const Common::Rectangle<s32>& draw_rect, bool postfx);
bool RenderDisplay(GPUTexture* target, const Common::Rectangle<s32>& draw_rect, bool postfx);
s32 m_display_width = 0;
s32 m_display_height = 0;

View file

@ -317,7 +317,7 @@ bool GPU_HW::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_di
void GPU_HW::RestoreDeviceContext()
{
g_gpu_device->SetTextureSampler(0, m_vram_read_texture.get(), g_gpu_device->GetNearestSampler());
g_gpu_device->SetFramebuffer(m_vram_framebuffer.get());
g_gpu_device->SetRenderTarget(m_vram_texture.get(), m_vram_depth_texture.get());
g_gpu_device->SetViewport(0, 0, m_vram_texture->GetWidth(), m_vram_texture->GetHeight());
SetScissor();
m_batch_ubo_dirty = true;
@ -649,20 +649,6 @@ bool GPU_HW::CreateBuffers()
GL_OBJECT_NAME(m_display_private_texture, "Display Texture");
GL_OBJECT_NAME(m_vram_readback_texture, "VRAM Readback Texture");
// vram framebuffer has both colour and depth
if (!(m_vram_framebuffer = g_gpu_device->CreateFramebuffer(m_vram_texture.get(), m_vram_depth_texture.get())) ||
!(m_vram_update_depth_framebuffer = g_gpu_device->CreateFramebuffer(m_vram_depth_texture.get())) ||
!(m_vram_readback_framebuffer = g_gpu_device->CreateFramebuffer(m_vram_readback_texture.get())) ||
!(m_display_framebuffer = g_gpu_device->CreateFramebuffer(m_display_private_texture.get())))
{
return false;
}
GL_OBJECT_NAME(m_vram_framebuffer, "VRAM Framebuffer");
GL_OBJECT_NAME(m_vram_update_depth_framebuffer, "VRAM Update Depth Framebuffer");
GL_OBJECT_NAME(m_vram_readback_framebuffer, "VRAM Readback Framebuffer");
GL_OBJECT_NAME(m_display_framebuffer, "Display Framebuffer");
if (!(m_vram_upload_buffer =
g_gpu_device->CreateTextureBuffer(GPUTextureBuffer::Format::R16UI, GPUDevice::MIN_TEXEL_BUFFER_ELEMENTS)))
{
@ -679,11 +665,9 @@ bool GPU_HW::CreateBuffers()
GPUTexture::Type::Texture, VRAM_RT_FORMAT)) ||
!(m_downsample_render_texture = g_gpu_device->CreateTexture(texture_width, texture_height, 1, 1, 1,
GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT)) ||
!(m_downsample_framebuffer = g_gpu_device->CreateFramebuffer(m_downsample_render_texture.get())) ||
!(m_downsample_weight_texture =
g_gpu_device->CreateTexture(texture_width >> (levels - 1), texture_height >> (levels - 1), 1, 1, 1,
GPUTexture::Type::RenderTarget, GPUTexture::Format::R8)) ||
!(m_downsample_weight_framebuffer = g_gpu_device->CreateFramebuffer(m_downsample_weight_texture.get())))
GPUTexture::Type::RenderTarget, GPUTexture::Format::R8)))
{
return false;
}
@ -693,14 +677,13 @@ bool GPU_HW::CreateBuffers()
const u32 downsample_scale = GetBoxDownsampleScale(m_resolution_scale);
if (!(m_downsample_render_texture =
g_gpu_device->CreateTexture(VRAM_WIDTH * downsample_scale, VRAM_HEIGHT * downsample_scale, 1, 1, 1,
GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT)) ||
!(m_downsample_framebuffer = g_gpu_device->CreateFramebuffer(m_downsample_render_texture.get())))
GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT)))
{
return false;
}
}
g_gpu_device->SetFramebuffer(m_vram_framebuffer.get());
g_gpu_device->SetRenderTarget(m_vram_texture.get(), m_vram_depth_texture.get());
SetFullVRAMDirtyRectangle();
return true;
}
@ -719,15 +702,9 @@ void GPU_HW::DestroyBuffers()
ClearDisplayTexture();
m_vram_upload_buffer.reset();
m_downsample_weight_framebuffer.reset();
m_downsample_weight_texture.reset();
m_downsample_framebuffer.reset();
m_downsample_render_texture.reset();
m_downsample_texture.reset();
m_display_framebuffer.reset();
m_vram_readback_framebuffer.reset();
m_vram_update_depth_framebuffer.reset();
m_vram_framebuffer.reset();
m_vram_read_texture.reset();
m_vram_depth_texture.reset();
m_vram_texture.reset();
@ -833,8 +810,7 @@ bool GPU_HW::CompilePipelines()
plconfig.input_layout.vertex_stride = sizeof(BatchVertex);
plconfig.rasterization = GPUPipeline::RasterizationState::GetNoCullState();
plconfig.primitive = GPUPipeline::Primitive::Triangles;
plconfig.color_format = VRAM_RT_FORMAT;
plconfig.depth_format = VRAM_DS_FORMAT;
plconfig.SetTargetFormats(VRAM_RT_FORMAT, VRAM_DS_FORMAT);
plconfig.samples = m_multisamples;
plconfig.per_sample_shading = m_per_sample_shading;
plconfig.geometry_shader = nullptr;
@ -1080,8 +1056,7 @@ bool GPU_HW::CompilePipelines()
return false;
plconfig.fragment_shader = fs.get();
plconfig.color_format = GPUTexture::Format::Unknown;
plconfig.depth_format = VRAM_DS_FORMAT;
plconfig.SetTargetFormats(GPUTexture::Format::Unknown, VRAM_DS_FORMAT);
plconfig.depth = GPUPipeline::DepthState::GetAlwaysWriteState();
plconfig.blend.write_mask = 0;
@ -1093,8 +1068,7 @@ bool GPU_HW::CompilePipelines()
progress.Increment();
}
plconfig.color_format = VRAM_RT_FORMAT;
plconfig.depth_format = GPUTexture::Format::Unknown;
plconfig.SetTargetFormats(VRAM_RT_FORMAT);
plconfig.depth = GPUPipeline::DepthState::GetNoTestsState();
plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState();
plconfig.samples = 1;
@ -1181,7 +1155,7 @@ bool GPU_HW::CompilePipelines()
return false;
GL_OBJECT_NAME(fs, "Downsample Blur Pass Fragment Shader");
plconfig.fragment_shader = fs.get();
plconfig.color_format = GPUTexture::Format::R8;
plconfig.SetTargetFormats(GPUTexture::Format::R8);
if (!(m_downsample_blur_pass_pipeline = g_gpu_device->CreatePipeline(plconfig)))
return false;
GL_OBJECT_NAME(m_downsample_blur_pass_pipeline, "Downsample Blur Pass Pipeline");
@ -1193,7 +1167,7 @@ bool GPU_HW::CompilePipelines()
GL_OBJECT_NAME(fs, "Downsample Composite Pass Fragment Shader");
plconfig.layout = GPUPipeline::Layout::MultiTextureAndPushConstants;
plconfig.fragment_shader = fs.get();
plconfig.color_format = VRAM_RT_FORMAT;
plconfig.SetTargetFormats(VRAM_RT_FORMAT);
if (!(m_downsample_composite_pass_pipeline = g_gpu_device->CreatePipeline(plconfig)))
return false;
GL_OBJECT_NAME(m_downsample_composite_pass_pipeline, "Downsample Blur Pass Pipeline");
@ -1310,14 +1284,15 @@ void GPU_HW::UpdateDepthBufferFromMaskBit()
// Viewport should already be set full, only need to fudge the scissor.
g_gpu_device->SetScissor(0, 0, m_vram_texture->GetWidth(), m_vram_texture->GetHeight());
g_gpu_device->SetFramebuffer(m_vram_update_depth_framebuffer.get());
g_gpu_device->InvalidateRenderTarget(m_vram_depth_texture.get());
g_gpu_device->SetRenderTargets(nullptr, 0, m_vram_depth_texture.get());
g_gpu_device->SetPipeline(m_vram_update_depth_pipeline.get());
g_gpu_device->SetTextureSampler(0, m_vram_texture.get(), g_gpu_device->GetNearestSampler());
g_gpu_device->Draw(3, 0);
// Restore.
g_gpu_device->SetTextureSampler(0, m_vram_read_texture.get(), g_gpu_device->GetNearestSampler());
g_gpu_device->SetFramebuffer(m_vram_framebuffer.get());
g_gpu_device->SetRenderTarget(m_vram_texture.get(), m_vram_depth_texture.get());
SetScissor();
}
@ -2061,7 +2036,7 @@ bool GPU_HW::BlitVRAMReplacementTexture(const TextureReplacementTexture* tex, u3
}
}
g_gpu_device->SetFramebuffer(m_vram_framebuffer.get()); // TODO: needed?
g_gpu_device->SetRenderTarget(m_vram_texture.get(), m_vram_depth_texture.get()); // TODO: needed?
g_gpu_device->SetTextureSampler(0, m_vram_replacement_texture.get(), g_gpu_device->GetLinearSampler());
g_gpu_device->SetPipeline(m_copy_pipeline.get());
g_gpu_device->SetViewportAndScissor(dst_x, dst_y, width, height);
@ -2364,7 +2339,7 @@ void GPU_HW::ReadVRAM(u32 x, u32 y, u32 width, u32 height)
// Encode the 24-bit texture as 16-bit.
const u32 uniforms[4] = {copy_rect.left, copy_rect.top, copy_rect.GetWidth(), copy_rect.GetHeight()};
g_gpu_device->SetFramebuffer(m_vram_readback_framebuffer.get());
g_gpu_device->SetRenderTarget(m_vram_readback_texture.get());
g_gpu_device->SetPipeline(m_vram_readback_pipeline.get());
g_gpu_device->SetTextureSampler(0, m_vram_texture.get(), g_gpu_device->GetNearestSampler());
g_gpu_device->SetViewportAndScissor(0, 0, encoded_width, encoded_height);
@ -2811,7 +2786,7 @@ void GPU_HW::UpdateDisplay()
if (interlaced == InterlacedRenderMode::None)
g_gpu_device->InvalidateRenderTarget(m_display_private_texture.get());
g_gpu_device->SetFramebuffer(m_display_framebuffer.get());
g_gpu_device->SetRenderTarget(m_display_private_texture.get());
g_gpu_device->SetPipeline(
m_display_pipelines[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)][static_cast<u8>(interlaced)].get());
g_gpu_device->SetTextureSampler(0, m_vram_texture.get(), g_gpu_device->GetNearestSampler());
@ -2883,7 +2858,7 @@ void GPU_HW::DownsampleFramebufferAdaptive(GPUTexture* source, u32 left, u32 top
uniforms.lod = static_cast<float>(level - 1);
g_gpu_device->ClearRenderTarget(m_downsample_render_texture.get(), 0);
g_gpu_device->SetFramebuffer(m_downsample_framebuffer.get());
g_gpu_device->SetRenderTarget(m_downsample_render_texture.get());
g_gpu_device->SetViewportAndScissor(0, 0, level_width, level_height);
g_gpu_device->SetPipeline((level == 1) ? m_downsample_first_pass_pipeline.get() :
m_downsample_mid_pass_pipeline.get());
@ -2912,7 +2887,7 @@ void GPU_HW::DownsampleFramebufferAdaptive(GPUTexture* source, u32 left, u32 top
m_downsample_render_texture->MakeReadyForSampling();
g_gpu_device->ClearRenderTarget(m_downsample_weight_texture.get(), 0);
g_gpu_device->SetFramebuffer(m_downsample_weight_framebuffer.get());
g_gpu_device->SetRenderTarget(m_downsample_weight_texture.get());
g_gpu_device->SetTextureSampler(0, m_downsample_render_texture.get(), g_gpu_device->GetNearestSampler());
g_gpu_device->SetViewportAndScissor(0, 0, last_width, last_height);
g_gpu_device->SetPipeline(m_downsample_blur_pass_pipeline.get());
@ -2926,7 +2901,7 @@ void GPU_HW::DownsampleFramebufferAdaptive(GPUTexture* source, u32 left, u32 top
GL_SCOPE("Composite");
g_gpu_device->ClearRenderTarget(m_downsample_render_texture.get(), 0);
g_gpu_device->SetFramebuffer(m_downsample_framebuffer.get());
g_gpu_device->SetRenderTarget(m_downsample_render_texture.get());
g_gpu_device->SetTextureSampler(0, m_downsample_texture.get(), m_downsample_composite_sampler.get());
g_gpu_device->SetTextureSampler(1, m_downsample_weight_texture.get(), m_downsample_lod_sampler.get());
g_gpu_device->SetViewportAndScissor(0, 0, width, height);
@ -2953,7 +2928,7 @@ void GPU_HW::DownsampleFramebufferBoxFilter(GPUTexture* source, u32 left, u32 to
source->MakeReadyForSampling();
g_gpu_device->ClearRenderTarget(m_downsample_render_texture.get(), 0);
g_gpu_device->SetFramebuffer(m_downsample_framebuffer.get());
g_gpu_device->SetRenderTarget(m_downsample_render_texture.get());
g_gpu_device->SetPipeline(m_downsample_first_pass_pipeline.get());
g_gpu_device->SetTextureSampler(0, source, g_gpu_device->GetNearestSampler());
g_gpu_device->SetViewportAndScissor(ds_left, ds_top, ds_width, ds_height);

View file

@ -213,11 +213,6 @@ private:
std::unique_ptr<GPUTexture> m_vram_replacement_texture;
std::unique_ptr<GPUTexture> m_display_private_texture; // TODO: Move to base.
std::unique_ptr<GPUFramebuffer> m_vram_framebuffer;
std::unique_ptr<GPUFramebuffer> m_vram_update_depth_framebuffer;
std::unique_ptr<GPUFramebuffer> m_vram_readback_framebuffer;
std::unique_ptr<GPUFramebuffer> m_display_framebuffer;
std::unique_ptr<GPUTextureBuffer> m_vram_upload_buffer;
std::unique_ptr<GPUTexture> m_vram_write_texture;
@ -288,9 +283,7 @@ private:
std::unique_ptr<GPUTexture> m_downsample_texture;
std::unique_ptr<GPUTexture> m_downsample_render_texture;
std::unique_ptr<GPUFramebuffer> m_downsample_framebuffer;
std::unique_ptr<GPUTexture> m_downsample_weight_texture;
std::unique_ptr<GPUFramebuffer> m_downsample_weight_framebuffer;
std::unique_ptr<GPUPipeline> m_downsample_first_pass_pipeline;
std::unique_ptr<GPUPipeline> m_downsample_mid_pass_pipeline;
std::unique_ptr<GPUPipeline> m_downsample_blur_pass_pipeline;

View file

@ -4,4 +4,4 @@
#pragma once
#include "common/types.h"
static constexpr u32 SHADER_CACHE_VERSION = 10;
static constexpr u32 SHADER_CACHE_VERSION = 11;

View file

@ -23,6 +23,7 @@ add_library(util
cue_parser.h
gpu_device.cpp
gpu_device.h
gpu_framebuffer_manager.h
gpu_shader_cache.cpp
gpu_shader_cache.h
gpu_texture.cpp
@ -72,7 +73,7 @@ target_precompile_headers(util PRIVATE "pch.h")
target_include_directories(util PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/..")
target_include_directories(util PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/..")
target_link_libraries(util PUBLIC common simpleini imgui)
target_link_libraries(util PRIVATE stb libchdr zlib soundtouch Zstd::Zstd reshadefx)
target_link_libraries(util PRIVATE stb libchdr zlib soundtouch xxhash Zstd::Zstd reshadefx)
if(ENABLE_CUBEB)
target_sources(util PRIVATE

View file

@ -536,24 +536,35 @@ void D3D11Device::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u3
src11->GetD3DTexture(), 0, dst11->GetDXGIFormat());
}
bool D3D11Device::IsRenderTargetBound(const GPUTexture* tex) const
{
for (u32 i = 0; i < m_num_current_render_targets; i++)
{
if (m_current_render_targets[i] == tex)
return true;
}
return false;
}
void D3D11Device::ClearRenderTarget(GPUTexture* t, u32 c)
{
GPUDevice::ClearRenderTarget(t, c);
if (m_current_framebuffer && m_current_framebuffer->GetRT() == t)
if (IsRenderTargetBound(t))
static_cast<D3D11Texture*>(t)->CommitClear(m_context.Get());
}
void D3D11Device::ClearDepth(GPUTexture* t, float d)
{
GPUDevice::ClearDepth(t, d);
if (m_current_framebuffer && m_current_framebuffer->GetDS() == t)
if (m_current_depth_target == t)
static_cast<D3D11Texture*>(t)->CommitClear(m_context.Get());
}
void D3D11Device::InvalidateRenderTarget(GPUTexture* t)
{
GPUDevice::InvalidateRenderTarget(t);
if (m_current_framebuffer && (m_current_framebuffer->GetRT() == t || m_current_framebuffer->GetDS() == t))
if (t->IsRenderTarget() ? IsRenderTargetBound(t) : (m_current_depth_target == t))
static_cast<D3D11Texture*>(t)->CommitClear(m_context.Get());
}
@ -613,13 +624,15 @@ bool D3D11Device::BeginPresent(bool skip_present)
static constexpr float clear_color[4] = {0.0f, 0.0f, 0.0f, 1.0f};
m_context->ClearRenderTargetView(m_swap_chain_rtv.Get(), clear_color);
m_context->OMSetRenderTargets(1, m_swap_chain_rtv.GetAddressOf(), nullptr);
m_current_framebuffer = nullptr;
m_num_current_render_targets = 0;
std::memset(m_current_render_targets.data(), 0, sizeof(m_current_render_targets));
m_current_depth_target = nullptr;
return true;
}
void D3D11Device::EndPresent()
{
DebugAssert(!m_current_framebuffer);
DebugAssert(m_num_current_render_targets == 0 && !m_current_depth_target);
if (!m_vsync_enabled && m_gpu_timing_enabled)
PopTimestampQuery();
@ -873,34 +886,17 @@ void D3D11Device::UnmapUniformBuffer(u32 size)
m_context->PSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
}
void D3D11Device::SetFramebuffer(GPUFramebuffer* fb)
void D3D11Device::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds)
{
if (m_current_framebuffer == fb)
return;
ID3D11RenderTargetView* rtvs[MAX_RENDER_TARGETS];
m_current_framebuffer = static_cast<D3D11Framebuffer*>(fb);
if (!m_current_framebuffer)
{
m_context->OMSetRenderTargets(0, nullptr, nullptr);
return;
}
bool changed = (m_num_current_render_targets != num_rts || m_current_depth_target != ds);
m_current_depth_target = static_cast<D3D11Texture*>(ds);
// Make sure textures aren't bound.
if (D3D11Texture* rt = static_cast<D3D11Texture*>(fb->GetRT()); rt)
if (ds)
{
const ID3D11ShaderResourceView* srv = rt->GetD3DSRV();
for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++)
{
if (m_current_textures[i] == srv)
{
m_current_textures[i] = nullptr;
m_context->PSSetShaderResources(i, 1, &m_current_textures[i]);
}
}
}
if (D3D11Texture* ds = static_cast<D3D11Texture*>(fb->GetDS()); ds)
{
const ID3D11ShaderResourceView* srv = ds->GetD3DSRV();
const ID3D11ShaderResourceView* srv = static_cast<D3D11Texture*>(ds)->GetD3DSRV();
for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++)
{
if (m_current_textures[i] == srv)
@ -911,18 +907,31 @@ void D3D11Device::SetFramebuffer(GPUFramebuffer* fb)
}
}
m_current_framebuffer->CommitClear(m_context.Get());
m_context->OMSetRenderTargets(m_current_framebuffer->GetNumRTVs(), m_current_framebuffer->GetRTVArray(),
m_current_framebuffer->GetDSV());
}
for (u32 i = 0; i < num_rts; i++)
{
D3D11Texture* const dt = static_cast<D3D11Texture*>(rts[i]);
changed |= m_current_render_targets[i] != dt;
m_current_render_targets[i] = dt;
rtvs[i] = dt->GetD3DRTV();
dt->CommitClear(m_context.Get());
void D3D11Device::UnbindFramebuffer(D3D11Framebuffer* fb)
{
if (m_current_framebuffer != fb)
const ID3D11ShaderResourceView* srv = dt->GetD3DSRV();
for (u32 j = 0; j < MAX_TEXTURE_SAMPLERS; j++)
{
if (m_current_textures[j] == srv)
{
m_current_textures[j] = nullptr;
m_context->PSSetShaderResources(j, 1, &m_current_textures[j]);
}
}
}
for (u32 i = num_rts; i < m_num_current_render_targets; i++)
m_current_render_targets[i] = nullptr;
m_num_current_render_targets = num_rts;
if (!changed)
return;
m_current_framebuffer = nullptr;
m_context->OMSetRenderTargets(0, nullptr, nullptr);
m_context->OMSetRenderTargets(num_rts, rtvs, ds ? static_cast<D3D11Texture*>(ds)->GetD3DDSV() : nullptr);
}
void D3D11Device::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler)
@ -931,8 +940,7 @@ void D3D11Device::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* s
ID3D11SamplerState* S = sampler ? static_cast<D3D11Sampler*>(sampler)->GetSamplerState() : nullptr;
// Runtime will null these if we don't...
DebugAssert(!m_current_framebuffer || !texture ||
(m_current_framebuffer->GetRT() != texture && m_current_framebuffer->GetDS() != texture));
DebugAssert(!texture || !IsRenderTargetBound(texture) || m_current_depth_target != texture);
if (m_current_textures[slot] != T)
{
@ -970,8 +978,23 @@ void D3D11Device::UnbindTexture(D3D11Texture* tex)
}
}
if (m_current_framebuffer && m_current_framebuffer->GetRT() == tex)
SetFramebuffer(nullptr);
if (tex->IsRenderTarget())
{
for (u32 i = 0; i < m_num_current_render_targets; i++)
{
if (m_current_render_targets[i] == tex)
{
Log_WarningPrint("Unbinding current RT");
SetRenderTargets(nullptr, 0, m_current_depth_target);
break;
}
}
}
else if (m_current_depth_target == tex)
{
Log_WarningPrint("Unbinding current DS");
SetRenderTargets(nullptr, 0, nullptr);
}
}
void D3D11Device::SetViewport(s32 x, s32 y, s32 width, s32 height)

View file

@ -17,7 +17,6 @@
#include <vector>
#include <wrl/client.h>
class D3D11Framebuffer;
class D3D11Pipeline;
class D3D11Shader;
class D3D11Texture;
@ -66,8 +65,6 @@ public:
void ClearDepth(GPUTexture* t, float d) override;
void InvalidateRenderTarget(GPUTexture* t) override;
std::unique_ptr<GPUFramebuffer> CreateFramebuffer(GPUTexture* rt_or_ds, GPUTexture* ds = nullptr) override;
std::unique_ptr<GPUShader> CreateShaderFromBinary(GPUShaderStage stage, std::span<const u8> data) override;
std::unique_ptr<GPUShader> CreateShaderFromSource(GPUShaderStage stage, const std::string_view& source,
const char* entry_point, DynamicHeapArray<u8>* binary) override;
@ -85,7 +82,7 @@ public:
void PushUniformBuffer(const void* data, u32 data_size) override;
void* MapUniformBuffer(u32 size) override;
void UnmapUniformBuffer(u32 size) override;
void SetFramebuffer(GPUFramebuffer* fb) override;
void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds) override;
void SetPipeline(GPUPipeline* pipeline) override;
void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override;
void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override;
@ -104,14 +101,14 @@ public:
bool BeginPresent(bool skip_present) override;
void EndPresent() override;
void UnbindFramebuffer(D3D11Framebuffer* fb);
void UnbindPipeline(D3D11Pipeline* pl);
void UnbindTexture(D3D11Texture* tex);
static AdapterAndModeList StaticGetAdapterAndModeList();
protected:
bool CreateDevice(const std::string_view& adapter, bool threaded_presentation, FeatureMask disabled_features) override;
bool CreateDevice(const std::string_view& adapter, bool threaded_presentation,
FeatureMask disabled_features) override;
void DestroyDevice() override;
private:
@ -141,6 +138,8 @@ private:
bool CreateBuffers();
void DestroyBuffers();
bool IsRenderTargetBound(const GPUTexture* tex) const;
ComPtr<ID3D11RasterizerState> GetRasterizationState(const GPUPipeline::RasterizationState& rs);
ComPtr<ID3D11DepthStencilState> GetDepthState(const GPUPipeline::DepthState& ds);
ComPtr<ID3D11BlendState> GetBlendState(const GPUPipeline::BlendState& bs);
@ -178,8 +177,10 @@ private:
D3D11StreamBuffer m_index_buffer;
D3D11StreamBuffer m_uniform_buffer;
D3D11Framebuffer* m_current_framebuffer = nullptr;
D3D11Pipeline* m_current_pipeline = nullptr;
std::array<D3D11Texture*, MAX_RENDER_TARGETS> m_current_render_targets = {};
u32 m_num_current_render_targets = 0;
D3D11Texture* m_current_depth_target = nullptr;
ID3D11InputLayout* m_current_input_layout = nullptr;
ID3D11VertexShader* m_current_vertex_shader = nullptr;

View file

@ -93,73 +93,6 @@ bool D3D11Device::SupportsTextureFormat(GPUTexture::Format format) const
return (SUCCEEDED(m_device->CheckFormatSupport(dfmt, &support)) && ((support & required) == required));
}
D3D11Framebuffer::D3D11Framebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height,
ComPtr<ID3D11RenderTargetView> rtv, ComPtr<ID3D11DepthStencilView> dsv)
: GPUFramebuffer(rt, ds, width, height), m_rtv(std::move(rtv)), m_dsv(std::move(dsv))
{
}
D3D11Framebuffer::~D3D11Framebuffer()
{
D3D11Device::GetInstance().UnbindFramebuffer(this);
}
void D3D11Framebuffer::SetDebugName(const std::string_view& name)
{
if (m_rtv)
SetD3DDebugObjectName(m_rtv.Get(), fmt::format("{} RTV", name));
if (m_dsv)
SetD3DDebugObjectName(m_dsv.Get(), fmt::format("{} DSV", name));
}
void D3D11Framebuffer::CommitClear(ID3D11DeviceContext1* context)
{
if (m_rt && m_rt->GetState() != GPUTexture::State::Dirty) [[unlikely]]
{
if (m_rt->GetState() == GPUTexture::State::Invalidated)
context->DiscardView(m_rtv.Get());
else
context->ClearRenderTargetView(m_rtv.Get(), m_rt->GetUNormClearColor().data());
m_rt->SetState(GPUTexture::State::Dirty);
}
if (m_ds && m_ds->GetState() != GPUTexture::State::Dirty) [[unlikely]]
{
if (m_ds->GetState() == GPUTexture::State::Invalidated)
context->DiscardView(m_dsv.Get());
else
context->ClearDepthStencilView(m_dsv.Get(), D3D11_CLEAR_DEPTH, m_ds->GetClearDepth(), 0);
m_ds->SetState(GPUTexture::State::Dirty);
}
}
std::unique_ptr<GPUFramebuffer> D3D11Device::CreateFramebuffer(GPUTexture* rt_or_ds, GPUTexture* ds)
{
DebugAssert((rt_or_ds || ds) && (!rt_or_ds || rt_or_ds->IsRenderTarget() || (rt_or_ds->IsDepthStencil() && !ds)));
D3D11Texture* RT = static_cast<D3D11Texture*>((rt_or_ds && rt_or_ds->IsDepthStencil()) ? nullptr : rt_or_ds);
D3D11Texture* DS = static_cast<D3D11Texture*>((rt_or_ds && rt_or_ds->IsDepthStencil()) ? rt_or_ds : ds);
ComPtr<ID3D11RenderTargetView> rtv;
if (RT)
{
rtv = RT->GetD3DRTV();
Assert(rtv);
}
ComPtr<ID3D11DepthStencilView> dsv;
if (DS)
{
dsv = DS->GetD3DDSV();
Assert(dsv);
}
return std::unique_ptr<GPUFramebuffer>(new D3D11Framebuffer(RT, DS, RT ? RT->GetWidth() : DS->GetWidth(),
RT ? RT->GetHeight() : DS->GetHeight(), std::move(rtv),
std::move(dsv)));
}
D3D11Sampler::D3D11Sampler(ComPtr<ID3D11SamplerState> ss) : m_ss(std::move(ss))
{
}

View file

@ -13,32 +13,6 @@
class D3D11Device;
class D3D11Framebuffer final : public GPUFramebuffer
{
friend D3D11Device;
template<typename T>
using ComPtr = Microsoft::WRL::ComPtr<T>;
public:
~D3D11Framebuffer() override;
ALWAYS_INLINE u32 GetNumRTVs() const { return m_rtv ? 1 : 0; }
ALWAYS_INLINE ID3D11RenderTargetView* GetRTV() const { return m_rtv.Get(); }
ALWAYS_INLINE ID3D11RenderTargetView* const* GetRTVArray() const { return m_rtv.GetAddressOf(); }
ALWAYS_INLINE ID3D11DepthStencilView* GetDSV() const { return m_dsv.Get(); }
void SetDebugName(const std::string_view& name) override;
void CommitClear(ID3D11DeviceContext1* context);
private:
D3D11Framebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height, ComPtr<ID3D11RenderTargetView> rtv,
ComPtr<ID3D11DepthStencilView> dsv);
ComPtr<ID3D11RenderTargetView> m_rtv;
ComPtr<ID3D11DepthStencilView> m_dsv;
};
class D3D11Sampler final : public GPUSampler
{
friend D3D11Device;

View file

@ -570,12 +570,9 @@ void D3D12Device::SubmitCommandListAndRestartRenderPass(const char* reason)
if (InRenderPass())
EndRenderPass();
D3D12Framebuffer* fb = m_current_framebuffer;
D3D12Pipeline* pl = m_current_pipeline;
SubmitCommandList(false, "%s", reason);
if (fb)
SetFramebuffer(fb);
SetPipeline(pl);
BeginRenderPass();
}
@ -1091,7 +1088,7 @@ bool D3D12Device::BeginPresent(bool frame_skip)
void D3D12Device::EndPresent()
{
DebugAssert(InRenderPass() && !m_current_framebuffer);
DebugAssert(InRenderPass() && m_num_current_render_targets == 0 && !m_current_depth_target);
EndRenderPass();
const auto& swap_chain_buf = m_swap_chain_buffers[m_current_swap_chain_buffer];
@ -1316,25 +1313,22 @@ void D3D12Device::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u3
void D3D12Device::ClearRenderTarget(GPUTexture* t, u32 c)
{
GPUDevice::ClearRenderTarget(t, c);
if (InRenderPass() && m_current_framebuffer && m_current_framebuffer->GetRT() == t)
if (InRenderPass() && IsRenderTargetBound(t))
EndRenderPass();
}
void D3D12Device::ClearDepth(GPUTexture* t, float d)
{
GPUDevice::ClearDepth(t, d);
if (InRenderPass() && m_current_framebuffer && m_current_framebuffer->GetDS() == t)
if (InRenderPass() && m_current_depth_target == t)
EndRenderPass();
}
void D3D12Device::InvalidateRenderTarget(GPUTexture* t)
{
GPUDevice::InvalidateRenderTarget(t);
if (InRenderPass() && m_current_framebuffer &&
(m_current_framebuffer->GetRT() == t || m_current_framebuffer->GetDS() == t))
{
if (InRenderPass() && (t->IsRenderTarget() ? IsRenderTargetBound(t) : (m_current_depth_target == t)))
EndRenderPass();
}
}
bool D3D12Device::CreateBuffers()
@ -1530,60 +1524,71 @@ void D3D12Device::DestroyRootSignatures()
it->Reset();
}
void D3D12Device::SetFramebuffer(GPUFramebuffer* fb)
void D3D12Device::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds)
{
if (m_current_framebuffer == fb)
return;
if (InRenderPass())
EndRenderPass();
m_current_framebuffer = static_cast<D3D12Framebuffer*>(fb);
ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
m_current_depth_target = static_cast<D3D12Texture*>(ds);
for (u32 i = 0; i < num_rts; i++)
{
D3D12Texture* const dt = static_cast<D3D12Texture*>(rts[i]);
m_current_render_targets[i] = dt;
dt->CommitClear(cmdlist);
}
for (u32 i = num_rts; i < m_num_current_render_targets; i++)
m_current_render_targets[i] = nullptr;
m_num_current_render_targets = num_rts;
}
void D3D12Device::BeginRenderPass()
{
DebugAssert(!InRenderPass());
D3D12_RENDER_PASS_RENDER_TARGET_DESC rt_desc;
std::array<D3D12_RENDER_PASS_RENDER_TARGET_DESC, MAX_RENDER_TARGETS> rt_desc;
D3D12_RENDER_PASS_DEPTH_STENCIL_DESC ds_desc;
const D3D12_RENDER_PASS_RENDER_TARGET_DESC* rt_desc_p = nullptr;
const D3D12_RENDER_PASS_DEPTH_STENCIL_DESC* ds_desc_p = nullptr;
D3D12_RENDER_PASS_RENDER_TARGET_DESC* rt_desc_p = nullptr;
D3D12_RENDER_PASS_DEPTH_STENCIL_DESC* ds_desc_p = nullptr;
u32 num_rt_descs = 0;
ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
if (m_current_framebuffer) [[likely]]
if (m_num_current_render_targets > 0 || m_current_depth_target) [[likely]]
{
D3D12Texture* rt = static_cast<D3D12Texture*>(m_current_framebuffer->GetRT());
if (rt)
for (u32 i = 0; i < m_num_current_render_targets; i++)
{
D3D12Texture* const rt = m_current_render_targets[i];
rt->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_RENDER_TARGET);
rt->SetUseFenceValue(GetCurrentFenceValue());
rt_desc_p = &rt_desc;
rt_desc.cpuDescriptor = rt->GetWriteDescriptor();
rt_desc.EndingAccess.Type = D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE;
D3D12_RENDER_PASS_RENDER_TARGET_DESC& desc = rt_desc[i];
desc.cpuDescriptor = rt->GetWriteDescriptor();
desc.EndingAccess.Type = D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE;
switch (rt->GetState())
{
case GPUTexture::State::Cleared:
{
rt_desc.BeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_CLEAR;
std::memcpy(rt_desc.BeginningAccess.Clear.ClearValue.Color, rt->GetUNormClearColor().data(),
sizeof(rt_desc.BeginningAccess.Clear.ClearValue.Color));
desc.BeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_CLEAR;
std::memcpy(desc.BeginningAccess.Clear.ClearValue.Color, rt->GetUNormClearColor().data(),
sizeof(desc.BeginningAccess.Clear.ClearValue.Color));
rt->SetState(GPUTexture::State::Dirty);
}
break;
case GPUTexture::State::Invalidated:
{
rt_desc.BeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_DISCARD;
desc.BeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_DISCARD;
rt->SetState(GPUTexture::State::Dirty);
}
break;
case GPUTexture::State::Dirty:
{
rt_desc.BeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_PRESERVE;
desc.BeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_PRESERVE;
}
break;
@ -1593,9 +1598,11 @@ void D3D12Device::BeginRenderPass()
}
}
D3D12Texture* ds = static_cast<D3D12Texture*>(m_current_framebuffer->GetDS());
if (ds)
rt_desc_p = (m_num_current_render_targets > 0) ? rt_desc.data() : nullptr;
num_rt_descs = m_num_current_render_targets;
if (m_current_depth_target)
{
D3D12Texture* const ds = m_current_depth_target;
ds->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_DEPTH_WRITE);
ds->SetUseFenceValue(GetCurrentFenceValue());
ds_desc_p = &ds_desc;
@ -1631,16 +1638,19 @@ void D3D12Device::BeginRenderPass()
UnreachableCode();
break;
}
ds_desc_p = &ds_desc;
}
}
else
{
// Re-rendering to swap chain.
const auto& swap_chain_buf = m_swap_chain_buffers[m_current_swap_chain_buffer];
rt_desc = {swap_chain_buf.second,
{D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_PRESERVE, {}},
{D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE, {}}};
rt_desc_p = &rt_desc;
rt_desc[0] = {swap_chain_buf.second,
{D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_PRESERVE, {}},
{D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE, {}}};
rt_desc_p = &rt_desc[0];
num_rt_descs = 1;
}
// All textures should be in shader read only optimal already, but just in case..
@ -1652,7 +1662,7 @@ void D3D12Device::BeginRenderPass()
}
DebugAssert(rt_desc_p || ds_desc_p);
cmdlist->BeginRenderPass(rt_desc_p ? 1 : 0, rt_desc_p, ds_desc_p, D3D12_RENDER_PASS_FLAG_NONE);
cmdlist->BeginRenderPass(num_rt_descs, rt_desc_p, ds_desc_p, D3D12_RENDER_PASS_FLAG_NONE);
// TODO: Stats
m_in_render_pass = true;
@ -1686,7 +1696,9 @@ void D3D12Device::BeginSwapChainRenderPass()
{D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE, {}}};
cmdlist->BeginRenderPass(1, &rt_desc, nullptr, D3D12_RENDER_PASS_FLAG_NONE);
m_current_framebuffer = nullptr;
std::memset(m_current_render_targets.data(), 0, sizeof(m_current_render_targets));
m_num_current_render_targets = 0;
m_current_depth_target = nullptr;
m_in_render_pass = true;
// Clear pipeline, it's likely incompatible.
@ -1708,29 +1720,6 @@ void D3D12Device::EndRenderPass()
GetCommandList()->EndRenderPass();
}
void D3D12Device::UnbindFramebuffer(D3D12Framebuffer* fb)
{
if (m_current_framebuffer != fb)
return;
if (InRenderPass())
EndRenderPass();
m_current_framebuffer = nullptr;
}
void D3D12Device::UnbindFramebuffer(D3D12Texture* tex)
{
if (!m_current_framebuffer)
return;
if (m_current_framebuffer->GetRT() != tex && m_current_framebuffer->GetDS() != tex)
return;
if (InRenderPass())
EndRenderPass();
m_current_framebuffer = nullptr;
}
void D3D12Device::SetPipeline(GPUPipeline* pipeline)
{
// First draw? Bind everything.
@ -1789,11 +1778,21 @@ void D3D12Device::UnbindPipeline(D3D12Pipeline* pl)
m_current_pipeline = nullptr;
}
bool D3D12Device::IsRenderTargetBound(const GPUTexture* tex) const
{
for (u32 i = 0; i < m_num_current_render_targets; i++)
{
if (m_current_render_targets[i] == tex)
return true;
}
return false;
}
void D3D12Device::InvalidateCachedState()
{
m_dirty_flags = ALL_DIRTY_STATE;
m_in_render_pass = false;
m_current_framebuffer = nullptr;
m_current_pipeline = nullptr;
m_current_vertex_stride = 0;
m_current_blend_constant = 0;
@ -1901,6 +1900,28 @@ void D3D12Device::UnbindTexture(D3D12Texture* tex)
m_dirty_flags |= DIRTY_FLAG_TEXTURES;
}
}
if (tex->IsRenderTarget())
{
for (u32 i = 0; i < m_num_current_render_targets; i++)
{
if (m_current_render_targets[i] == tex)
{
if (InRenderPass())
EndRenderPass();
m_current_render_targets[i] = nullptr;
}
}
}
else if (tex->IsDepthStencil())
{
if (m_current_depth_target == tex)
{
if (InRenderPass())
EndRenderPass();
m_current_depth_target = nullptr;
}
}
}
void D3D12Device::UnbindTextureBuffer(D3D12TextureBuffer* buf)

View file

@ -86,8 +86,6 @@ public:
void ClearDepth(GPUTexture* t, float d) override;
void InvalidateRenderTarget(GPUTexture* t) override;
std::unique_ptr<GPUFramebuffer> CreateFramebuffer(GPUTexture* rt_or_ds, GPUTexture* ds = nullptr) override;
std::unique_ptr<GPUShader> CreateShaderFromBinary(GPUShaderStage stage, std::span<const u8> data) override;
std::unique_ptr<GPUShader> CreateShaderFromSource(GPUShaderStage stage, const std::string_view& source,
const char* entry_point, DynamicHeapArray<u8>* out_binary) override;
@ -105,7 +103,7 @@ public:
void PushUniformBuffer(const void* data, u32 data_size) override;
void* MapUniformBuffer(u32 size) override;
void UnmapUniformBuffer(u32 size) override;
void SetFramebuffer(GPUFramebuffer* fb) override;
void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds) override;
void SetPipeline(GPUPipeline* pipeline) override;
void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override;
void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override;
@ -172,8 +170,6 @@ public:
void SubmitCommandList(bool wait_for_completion, const char* reason, ...);
void SubmitCommandListAndRestartRenderPass(const char* reason);
void UnbindFramebuffer(D3D12Framebuffer* fb);
void UnbindFramebuffer(D3D12Texture* tex);
void UnbindPipeline(D3D12Pipeline* pl);
void UnbindTexture(D3D12Texture* tex);
void UnbindTextureBuffer(D3D12TextureBuffer* buf);
@ -246,6 +242,8 @@ private:
bool CreateDSVDescriptor(ID3D12Resource* resource, u32 samples, DXGI_FORMAT format, D3D12DescriptorHandle* dh);
bool CreateUAVDescriptor(ID3D12Resource* resource, u32 samples, DXGI_FORMAT format, D3D12DescriptorHandle* dh);
bool IsRenderTargetBound(const GPUTexture* tex) const;
bool CheckDownloadBufferSize(u32 required_size);
void DestroyDownloadBuffer();
@ -330,10 +328,11 @@ private:
// Which bindings/state has to be updated before the next draw.
u32 m_dirty_flags = ALL_DIRTY_STATE;
D3D12Framebuffer* m_current_framebuffer = nullptr;
D3D12Pipeline* m_current_pipeline = nullptr;
D3D12_PRIMITIVE_TOPOLOGY m_current_topology = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED;
u32 m_num_current_render_targets = 0;
std::array<D3D12Texture*, MAX_RENDER_TARGETS> m_current_render_targets = {};
D3D12Texture* m_current_depth_target = nullptr;
u32 m_current_vertex_stride = 0;
u32 m_current_blend_constant = 0;
GPUPipeline::Layout m_current_pipeline_layout = GPUPipeline::Layout::SingleTextureAndPushConstants;

View file

@ -88,7 +88,7 @@ std::string D3D12Pipeline::GetPipelineName(const GraphicsConfig& config)
hash.Update(shader->GetBytecodeData(), shader->GetBytecodeSize());
if (const D3D12Shader* shader = static_cast<const D3D12Shader*>(config.geometry_shader))
hash.Update(shader->GetBytecodeData(), shader->GetBytecodeSize());
hash.Update(&config.color_format, sizeof(config.color_format));
hash.Update(&config.color_formats, sizeof(config.color_formats));
hash.Update(&config.depth_format, sizeof(config.depth_format));
hash.Update(&config.samples, sizeof(config.samples));
hash.Update(&config.per_sample_shading, sizeof(config.per_sample_shading));
@ -212,8 +212,11 @@ std::unique_ptr<GPUPipeline> D3D12Device::CreatePipeline(const GPUPipeline::Grap
blend_mapping[static_cast<u8>(config.blend.dst_alpha_blend.GetValue())],
op_mapping[static_cast<u8>(config.blend.alpha_blend_op.GetValue())], config.blend.write_mask);
if (config.color_format != GPUTexture::Format::Unknown)
gpb.SetRenderTarget(0, D3DCommon::GetFormatMapping(config.color_format).rtv_format);
for (u32 i = 0; i < MAX_RENDER_TARGETS; i++)
{
if (config.color_formats[i] != GPUTexture::Format::Unknown)
gpb.SetRenderTarget(i, D3DCommon::GetFormatMapping(config.color_formats[i]).rtv_format);
}
if (config.depth_format != GPUTexture::Format::Unknown)
gpb.SetDepthStencilFormat(D3DCommon::GetFormatMapping(config.depth_format).dsv_format);

View file

@ -847,43 +847,6 @@ std::unique_ptr<GPUSampler> D3D12Device::CreateSampler(const GPUSampler::Config&
return std::unique_ptr<GPUSampler>(new D3D12Sampler(std::move(handle)));
}
D3D12Framebuffer::D3D12Framebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height, D3D12DescriptorHandle rtv,
D3D12DescriptorHandle dsv)
: GPUFramebuffer(rt, ds, width, height), m_rtv(std::move(rtv)), m_dsv(std::move(dsv))
{
}
D3D12Framebuffer::~D3D12Framebuffer()
{
D3D12Device& dev = D3D12Device::GetInstance();
if (m_rtv)
D3D12Device::GetInstance().DeferDescriptorDestruction(dev.GetRTVHeapManager(), &m_rtv);
if (m_dsv)
D3D12Device::GetInstance().DeferDescriptorDestruction(dev.GetDSVHeapManager(), &m_dsv);
}
void D3D12Framebuffer::SetDebugName(const std::string_view& name)
{
}
std::unique_ptr<GPUFramebuffer> D3D12Device::CreateFramebuffer(GPUTexture* rt_or_ds, GPUTexture* ds /*= nullptr*/)
{
DebugAssert((rt_or_ds || ds) && (!rt_or_ds || rt_or_ds->IsRenderTarget() || (rt_or_ds->IsDepthStencil() && !ds)));
D3D12Texture* RT = static_cast<D3D12Texture*>((rt_or_ds && rt_or_ds->IsDepthStencil()) ? nullptr : rt_or_ds);
D3D12Texture* DS = static_cast<D3D12Texture*>((rt_or_ds && rt_or_ds->IsDepthStencil()) ? rt_or_ds : ds);
const u32 width = RT ? RT->GetWidth() : DS->GetWidth();
const u32 height = RT ? RT->GetHeight() : DS->GetHeight();
D3D12DescriptorHandle rtv, dsv;
if (RT)
rtv = RT->GetWriteDescriptor();
if (DS)
dsv = DS->GetWriteDescriptor();
return std::unique_ptr<GPUFramebuffer>(new D3D12Framebuffer(RT, DS, width, height, std::move(rtv), std::move(dsv)));
}
D3D12TextureBuffer::D3D12TextureBuffer(Format format, u32 size_in_elements) : GPUTextureBuffer(format, size_in_elements)
{
}

View file

@ -124,26 +124,6 @@ private:
D3D12DescriptorHandle m_descriptor;
};
class D3D12Framebuffer final : public GPUFramebuffer
{
friend D3D12Device;
public:
~D3D12Framebuffer() override;
ALWAYS_INLINE const D3D12DescriptorHandle& GetRTV() const { return m_rtv; }
ALWAYS_INLINE const D3D12DescriptorHandle& GetDSV() const { return m_dsv; }
void SetDebugName(const std::string_view& name) override;
private:
D3D12Framebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height, D3D12DescriptorHandle rtv,
D3D12DescriptorHandle dsv);
D3D12DescriptorHandle m_rtv;
D3D12DescriptorHandle m_dsv;
};
class D3D12TextureBuffer final : public GPUTextureBuffer
{
friend D3D12Device;

View file

@ -4,6 +4,7 @@
#include "gpu_device.h"
#include "core/host.h" // TODO: Remove, needed for getting fullscreen mode.
#include "core/settings.h" // TODO: Remove, needed for dump directory.
#include "gpu_framebuffer_manager.h"
#include "shadergen.h"
#include "common/assert.h"
@ -15,6 +16,7 @@
#include "fmt/format.h"
#include "imgui.h"
#include "xxhash.h"
Log_SetChannel(GPUDevice);
@ -37,13 +39,6 @@ std::unique_ptr<GPUDevice> g_gpu_device;
static std::string s_pipeline_cache_path;
GPUFramebuffer::GPUFramebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height)
: m_rt(rt), m_ds(ds), m_width(width), m_height(height)
{
}
GPUFramebuffer::~GPUFramebuffer() = default;
GPUSampler::GPUSampler() = default;
GPUSampler::~GPUSampler() = default;
@ -156,6 +151,15 @@ GPUPipeline::BlendState GPUPipeline::BlendState::GetAlphaBlendingState()
return ret;
}
void GPUPipeline::GraphicsConfig::SetTargetFormats(GPUTexture::Format color_format,
GPUTexture::Format depth_format_ /* = GPUTexture::Format::Unknown */)
{
color_formats[0] = color_format;
for (size_t i = 1; i < std::size(color_formats); i++)
color_formats[i] = GPUTexture::Format::Unknown;
depth_format = depth_format_;
}
GPUTextureBuffer::GPUTextureBuffer(Format format, u32 size) : m_format(format), m_size_in_elements(size)
{
}
@ -171,6 +175,35 @@ u32 GPUTextureBuffer::GetElementSize(Format format)
return element_size[static_cast<u32>(format)];
}
bool GPUFramebufferManagerBase::Key::operator==(const Key& rhs) const
{
return (std::memcmp(this, &rhs, sizeof(*this)) == 0);
}
bool GPUFramebufferManagerBase::Key::operator!=(const Key& rhs) const
{
return (std::memcmp(this, &rhs, sizeof(*this)) != 0);
}
bool GPUFramebufferManagerBase::Key::ContainsRT(const GPUTexture* tex) const
{
// num_rts is worse for predictability.
for (u32 i = 0; i < GPUDevice::MAX_RENDER_TARGETS; i++)
{
if (rts[i] == tex)
return true;
}
return false;
}
size_t GPUFramebufferManagerBase::KeyHash::operator()(const Key& key) const
{
if constexpr (sizeof(void*) == 8)
return XXH3_64bits(&key, sizeof(key));
else
return XXH32(&key, sizeof(key), 0x1337);
}
GPUDevice::~GPUDevice() = default;
RenderAPI GPUDevice::GetPreferredAPI()
@ -438,8 +471,7 @@ bool GPUDevice::CreateResources()
plconfig.depth = GPUPipeline::DepthState::GetNoTestsState();
plconfig.blend = GPUPipeline::BlendState::GetAlphaBlendingState();
plconfig.blend.write_mask = 0x7;
plconfig.color_format = HasSurface() ? m_window_info.surface_format : GPUTexture::Format::RGBA8;
plconfig.depth_format = GPUTexture::Format::Unknown;
plconfig.SetTargetFormats(HasSurface() ? m_window_info.surface_format : GPUTexture::Format::RGBA8);
plconfig.samples = 1;
plconfig.per_sample_shading = false;
plconfig.vertex_shader = imgui_vs.get();
@ -547,6 +579,11 @@ void GPUDevice::UploadUniformBuffer(const void* data, u32 data_size)
UnmapUniformBuffer(data_size);
}
void GPUDevice::SetRenderTarget(GPUTexture* rt, GPUTexture* ds /*= nullptr*/)
{
SetRenderTargets(rt ? &rt : nullptr, rt ? 1 : 0, ds);
}
void GPUDevice::SetViewportAndScissor(s32 x, s32 y, s32 width, s32 height)
{
SetViewport(x, y, width, height);

View file

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin <stenzek@gmail.com>
// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#pragma once
@ -32,27 +32,6 @@ enum class RenderAPI : u32
Metal
};
class GPUFramebuffer
{
public:
GPUFramebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height);
virtual ~GPUFramebuffer();
ALWAYS_INLINE GPUTexture* GetRT() const { return m_rt; }
ALWAYS_INLINE GPUTexture* GetDS() const { return m_ds; }
ALWAYS_INLINE u32 GetWidth() const { return m_width; }
ALWAYS_INLINE u32 GetHeight() const { return m_height; }
virtual void SetDebugName(const std::string_view& name) = 0;
protected:
GPUTexture* m_rt;
GPUTexture* m_ds;
u32 m_width;
u32 m_height;
};
class GPUSampler
{
public:
@ -394,10 +373,13 @@ public:
GPUShader* geometry_shader;
GPUShader* fragment_shader;
GPUTexture::Format color_format;
GPUTexture::Format color_formats[4];
GPUTexture::Format depth_format;
u32 samples;
bool per_sample_shading;
void SetTargetFormats(GPUTexture::Format color_format,
GPUTexture::Format depth_format_ = GPUTexture::Format::Unknown);
};
GPUPipeline();
@ -477,6 +459,8 @@ public:
static constexpr u32 MAX_TEXTURE_SAMPLERS = 8;
static constexpr u32 MIN_TEXEL_BUFFER_ELEMENTS = 4 * 1024 * 512;
static constexpr u32 MAX_RENDER_TARGETS = 4;
static_assert(sizeof(GPUPipeline::GraphicsConfig::color_formats) == sizeof(GPUTexture::Format) * MAX_RENDER_TARGETS);
virtual ~GPUDevice();
@ -576,9 +560,6 @@ public:
virtual void ClearDepth(GPUTexture* t, float d);
virtual void InvalidateRenderTarget(GPUTexture* t);
/// Framebuffer abstraction.
virtual std::unique_ptr<GPUFramebuffer> CreateFramebuffer(GPUTexture* rt_or_ds, GPUTexture* ds = nullptr) = 0;
/// Shader abstraction.
std::unique_ptr<GPUShader> CreateShader(GPUShaderStage stage, const std::string_view& source,
const char* entry_point = "main");
@ -606,12 +587,13 @@ public:
void UploadUniformBuffer(const void* data, u32 data_size);
/// Drawing setup abstraction.
virtual void SetFramebuffer(GPUFramebuffer* fb) = 0;
virtual void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds) = 0;
virtual void SetPipeline(GPUPipeline* pipeline) = 0;
virtual void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) = 0;
virtual void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) = 0;
virtual void SetViewport(s32 x, s32 y, s32 width, s32 height) = 0; // TODO: Rectangle
virtual void SetScissor(s32 x, s32 y, s32 width, s32 height) = 0;
void SetRenderTarget(GPUTexture* rt, GPUTexture* ds = nullptr);
void SetViewportAndScissor(s32 x, s32 y, s32 width, s32 height);
// Drawing abstraction.
@ -647,7 +629,8 @@ public:
virtual float GetAndResetAccumulatedGPUTime();
protected:
virtual bool CreateDevice(const std::string_view& adapter, bool threaded_presentation, FeatureMask disabled_features) = 0;
virtual bool CreateDevice(const std::string_view& adapter, bool threaded_presentation,
FeatureMask disabled_features) = 0;
virtual void DestroyDevice() = 0;
std::string GetShaderCacheBaseName(const std::string_view& type) const;

View file

@ -0,0 +1,142 @@
// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#pragma once
#include "gpu_device.h"
#include "gpu_texture.h"
#include <unordered_map>
class GPUFramebufferManagerBase
{
protected:
struct Key
{
GPUTexture* rts[GPUDevice::MAX_RENDER_TARGETS];
GPUTexture* ds;
u32 num_rts;
u32 flags;
bool operator==(const Key& rhs) const;
bool operator!=(const Key& rhs) const;
bool ContainsRT(const GPUTexture* tex) const;
};
struct KeyHash
{
size_t operator()(const Key& key) const;
};
};
template<typename FBOType, FBOType (*FactoryFunc)(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, u32 flags),
void (*DestroyFunc)(FBOType fbo)>
class GPUFramebufferManager : public GPUFramebufferManagerBase
{
public:
GPUFramebufferManager() = default;
~GPUFramebufferManager();
FBOType Lookup(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, u32 flags);
void RemoveReferences(const GPUTexture* tex);
void RemoveRTReferences(const GPUTexture* tex);
void RemoveDSReferences(const GPUTexture* tex);
void Clear();
private:
using MapType = std::unordered_map<Key, FBOType, KeyHash>;
MapType m_map;
};
template<typename FBOType, FBOType (*FactoryFunc)(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, u32 flags),
void (*DestroyFunc)(FBOType fbo)>
GPUFramebufferManager<FBOType, FactoryFunc, DestroyFunc>::~GPUFramebufferManager()
{
Clear();
}
template<typename FBOType, FBOType (*FactoryFunc)(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, u32 flags),
void (*DestroyFunc)(FBOType fbo)>
FBOType GPUFramebufferManager<FBOType, FactoryFunc, DestroyFunc>::Lookup(GPUTexture* const* rts, u32 num_rts,
GPUTexture* ds, u32 flags)
{
Key key;
for (u32 i = 0; i < num_rts; i++)
key.rts[i] = rts[i];
for (u32 i = num_rts; i < GPUDevice::MAX_RENDER_TARGETS; i++)
key.rts[i] = nullptr;
key.ds = ds;
key.num_rts = num_rts;
key.flags = flags;
auto it = m_map.find(key);
if (it == m_map.end())
{
FBOType fbo = FactoryFunc(rts, num_rts, ds, flags);
if (!fbo)
return fbo;
it = m_map.emplace(key, fbo).first;
}
return it->second;
}
template<typename FBOType, FBOType (*FactoryFunc)(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, u32 flags),
void (*DestroyFunc)(FBOType fbo)>
void GPUFramebufferManager<FBOType, FactoryFunc, DestroyFunc>::RemoveRTReferences(const GPUTexture* tex)
{
DebugAssert(tex->IsRenderTarget());
for (auto it = m_map.begin(); it != m_map.end();)
{
if (!it->first.ContainsRT(tex))
{
++it;
continue;
}
DestroyFunc(it->second);
it = m_map.erase(it);
}
}
template<typename FBOType, FBOType (*FactoryFunc)(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, u32 flags),
void (*DestroyFunc)(FBOType fbo)>
void GPUFramebufferManager<FBOType, FactoryFunc, DestroyFunc>::RemoveDSReferences(const GPUTexture* tex)
{
DebugAssert(tex->IsDepthStencil());
for (auto it = m_map.begin(); it != m_map.end();)
{
if (it->first.ds != tex)
{
++it;
continue;
}
DestroyFunc(it->second);
it = m_map.erase(it);
}
}
template<typename FBOType, FBOType (*FactoryFunc)(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, u32 flags),
void (*DestroyFunc)(FBOType fbo)>
void GPUFramebufferManager<FBOType, FactoryFunc, DestroyFunc>::RemoveReferences(const GPUTexture* tex)
{
if (tex->IsRenderTarget())
RemoveRTReferences(tex);
else if (tex->IsDepthStencil())
RemoveDSReferences(tex);
}
template<typename FBOType, FBOType (*FactoryFunc)(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, u32 flags),
void (*DestroyFunc)(FBOType fbo)>
void GPUFramebufferManager<FBOType, FactoryFunc, DestroyFunc>::Clear()
{
for (auto it : m_map)
DestroyFunc(it.second);
m_map.clear();
}

View file

@ -91,6 +91,12 @@ bool GPUTexture::IsDepthFormat(Format format)
return (format == Format::D16);
}
bool GPUTexture::IsDepthStencilFormat(Format format)
{
// None needed yet.
return false;
}
bool GPUTexture::ValidateConfig(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, Format format)
{
if (width > MAX_WIDTH || height > MAX_HEIGHT || layers > MAX_LAYERS || levels > MAX_LEVELS || samples > MAX_SAMPLES)

View file

@ -91,6 +91,8 @@ public:
ALWAYS_INLINE State GetState() const { return m_state; }
ALWAYS_INLINE void SetState(State state) { m_state = state; }
ALWAYS_INLINE bool IsDirty() const { return (m_state == State::Dirty); }
ALWAYS_INLINE bool IsClearedOrInvalidated() const { return (m_state != State::Dirty); }
ALWAYS_INLINE bool IsRenderTargetOrDepthStencil() const
{
@ -118,6 +120,7 @@ public:
static u32 GetPixelSize(GPUTexture::Format format);
static bool IsDepthFormat(GPUTexture::Format format);
static bool IsDepthStencilFormat(GPUTexture::Format format);
static bool ValidateConfig(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, Format format);
static bool ConvertTextureDataToRGBA8(u32 width, u32 height, std::vector<u32>& texture_data, u32& texture_data_stride,

View file

@ -33,7 +33,6 @@
#include <vector>
class MetalDevice;
class MetalFramebuffer;
class MetalPipeline;
class MetalTexture;
@ -161,28 +160,10 @@ private:
MetalStreamBuffer m_buffer;
};
class MetalFramebuffer final : public GPUFramebuffer
{
friend MetalDevice;
public:
~MetalFramebuffer() override;
MTLRenderPassDescriptor* GetDescriptor() const;
void SetDebugName(const std::string_view& name) override;
private:
MetalFramebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height, id<MTLTexture> rt_tex, id<MTLTexture> ds_tex,
MTLRenderPassDescriptor* descriptor);
id<MTLTexture> m_rt_tex;
id<MTLTexture> m_ds_tex;
MTLRenderPassDescriptor* m_descriptor;
};
class MetalDevice final : public GPUDevice
{
friend MetalTexture;
public:
ALWAYS_INLINE static MetalDevice& GetInstance() { return *static_cast<MetalDevice*>(g_gpu_device.get()); }
ALWAYS_INLINE id<MTLDevice> GetMTLDevice() { return m_device; }
@ -222,8 +203,6 @@ public:
void ClearDepth(GPUTexture* t, float d) override;
void InvalidateRenderTarget(GPUTexture* t) override;
std::unique_ptr<GPUFramebuffer> CreateFramebuffer(GPUTexture* rt_or_ds, GPUTexture* ds = nullptr) override;
std::unique_ptr<GPUShader> CreateShaderFromBinary(GPUShaderStage stage, std::span<const u8> data) override;
std::unique_ptr<GPUShader> CreateShaderFromSource(GPUShaderStage stage, const std::string_view& source,
const char* entry_point,
@ -242,7 +221,7 @@ public:
void PushUniformBuffer(const void* data, u32 data_size) override;
void* MapUniformBuffer(u32 size) override;
void UnmapUniformBuffer(u32 size) override;
void SetFramebuffer(GPUFramebuffer* fb) override;
void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds) override;
void SetPipeline(GPUPipeline* pipeline) override;
void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override;
void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override;
@ -271,8 +250,6 @@ public:
void CommitClear(MetalTexture* tex);
void UnbindFramebuffer(MetalFramebuffer* fb);
void UnbindFramebuffer(MetalTexture* tex);
void UnbindPipeline(MetalPipeline* pl);
void UnbindTexture(MetalTexture* tex);
void UnbindTextureBuffer(MetalTextureBuffer* buf);
@ -283,7 +260,8 @@ public:
static AdapterAndModeList StaticGetAdapterAndModeList();
protected:
bool CreateDevice(const std::string_view& adapter, bool threaded_presentation, FeatureMask disabled_features) override;
bool CreateDevice(const std::string_view& adapter, bool threaded_presentation,
FeatureMask disabled_features) override;
void DestroyDevice() override;
private:
@ -336,6 +314,8 @@ private:
bool CreateBuffers();
void DestroyBuffers();
bool IsRenderTargetBound(const GPUTexture* tex) const;
id<MTLDevice> m_device;
id<MTLCommandQueue> m_queue;
@ -369,7 +349,9 @@ private:
id<MTLCommandBuffer> m_render_cmdbuf = nil;
id<MTLRenderCommandEncoder> m_render_encoder = nil;
MetalFramebuffer* m_current_framebuffer = nullptr;
std::array<MetalTexture*, MAX_RENDER_TARGETS> m_current_render_targets = {};
u32 m_num_current_render_targets = 0;
MetalTexture* m_current_depth_target = nullptr;
MetalPipeline* m_current_pipeline = nullptr;
id<MTLDepthStencilState> m_current_depth_state = nil;

View file

@ -123,7 +123,8 @@ void MetalDevice::SetVSync(bool enabled)
[m_layer setDisplaySyncEnabled:enabled];
}
bool MetalDevice::CreateDevice(const std::string_view& adapter, bool threaded_presentation, FeatureMask disabled_features)
bool MetalDevice::CreateDevice(const std::string_view& adapter, bool threaded_presentation,
FeatureMask disabled_features)
{
@autoreleasepool
{
@ -493,6 +494,17 @@ void MetalDevice::DestroyBuffers()
m_depth_states.clear();
}
bool MetalDevice::IsRenderTargetBound(const GPUTexture* tex) const
{
for (u32 i = 0; i < m_num_current_render_targets; i++)
{
if (m_current_render_targets[i] == tex)
return true;
}
return false;
}
GPUDevice::AdapterAndModeList MetalDevice::StaticGetAdapterAndModeList()
{
AdapterAndModeList ret;
@ -770,7 +782,12 @@ std::unique_ptr<GPUPipeline> MetalDevice::CreatePipeline(const GPUPipeline::Grap
desc.vertexFunction = static_cast<const MetalShader*>(config.vertex_shader)->GetFunction();
desc.fragmentFunction = static_cast<const MetalShader*>(config.fragment_shader)->GetFunction();
desc.colorAttachments[0].pixelFormat = s_pixel_format_mapping[static_cast<u8>(config.color_format)];
for (u32 i = 0; i < MAX_RENDER_TARGETS; i++)
{
if (config.color_formats[i] == GPUTexture::Format::Unknown)
break;
desc.colorAttachments[0].pixelFormat = s_pixel_format_mapping[static_cast<u8>(config.color_formats[i])];
}
desc.depthAttachmentPixelFormat = s_pixel_format_mapping[static_cast<u8>(config.depth_format)];
// Input assembly.
@ -998,7 +1015,12 @@ void MetalTexture::Unmap()
void MetalTexture::MakeReadyForSampling()
{
MetalDevice::GetInstance().UnbindFramebuffer(this);
MetalDevice& dev = MetalDevice::GetInstance();
if (!dev.InRenderPass())
return;
if (IsRenderTarget() ? dev.IsRenderTargetBound(this) : (dev.m_current_depth_target == this))
dev.EndRenderPass();
}
void MetalTexture::SetDebugName(const std::string_view& name)
@ -1088,130 +1110,6 @@ std::unique_ptr<GPUTexture> MetalDevice::CreateTexture(u32 width, u32 height, u3
}
}
MetalFramebuffer::MetalFramebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height, id<MTLTexture> rt_tex,
id<MTLTexture> ds_tex, MTLRenderPassDescriptor* descriptor)
: GPUFramebuffer(rt, ds, width, height), m_rt_tex(rt_tex), m_ds_tex(ds_tex), m_descriptor(descriptor)
{
}
MetalFramebuffer::~MetalFramebuffer()
{
// TODO: safe deleting?
if (m_rt_tex != nil)
[m_rt_tex release];
if (m_ds_tex != nil)
[m_ds_tex release];
[m_descriptor release];
}
void MetalFramebuffer::SetDebugName(const std::string_view& name)
{
}
MTLRenderPassDescriptor* MetalFramebuffer::GetDescriptor() const
{
if (m_rt)
{
switch (m_rt->GetState())
{
case GPUTexture::State::Cleared:
{
const auto clear_color = m_rt->GetUNormClearColor();
m_descriptor.colorAttachments[0].loadAction = MTLLoadActionClear;
m_descriptor.colorAttachments[0].clearColor =
MTLClearColorMake(clear_color[0], clear_color[1], clear_color[2], clear_color[3]);
m_rt->SetState(GPUTexture::State::Dirty);
}
break;
case GPUTexture::State::Invalidated:
{
m_descriptor.colorAttachments[0].loadAction = MTLLoadActionDontCare;
m_rt->SetState(GPUTexture::State::Dirty);
}
break;
case GPUTexture::State::Dirty:
{
m_descriptor.colorAttachments[0].loadAction = MTLLoadActionLoad;
}
break;
default:
UnreachableCode();
break;
}
}
if (m_ds)
{
switch (m_ds->GetState())
{
case GPUTexture::State::Cleared:
{
m_descriptor.depthAttachment.loadAction = MTLLoadActionClear;
m_descriptor.depthAttachment.clearDepth = m_ds->GetClearDepth();
m_ds->SetState(GPUTexture::State::Dirty);
}
break;
case GPUTexture::State::Invalidated:
{
m_descriptor.depthAttachment.loadAction = MTLLoadActionDontCare;
m_ds->SetState(GPUTexture::State::Dirty);
}
break;
case GPUTexture::State::Dirty:
{
m_descriptor.depthAttachment.loadAction = MTLLoadActionLoad;
}
break;
default:
UnreachableCode();
break;
}
}
return m_descriptor;
}
std::unique_ptr<GPUFramebuffer> MetalDevice::CreateFramebuffer(GPUTexture* rt_or_ds, GPUTexture* ds)
{
DebugAssert((rt_or_ds || ds) && (!rt_or_ds || rt_or_ds->IsRenderTarget() || (rt_or_ds->IsDepthStencil() && !ds)));
MetalTexture* RT = static_cast<MetalTexture*>((rt_or_ds && rt_or_ds->IsDepthStencil()) ? nullptr : rt_or_ds);
MetalTexture* DS = static_cast<MetalTexture*>((rt_or_ds && rt_or_ds->IsDepthStencil()) ? rt_or_ds : ds);
@autoreleasepool
{
MTLRenderPassDescriptor* desc = [[MTLRenderPassDescriptor renderPassDescriptor] retain];
id<MTLTexture> rt_tex = RT ? [RT->GetMTLTexture() retain] : nil;
id<MTLTexture> ds_tex = DS ? [DS->GetMTLTexture() retain] : nil;
if (RT)
{
desc.colorAttachments[0].texture = rt_tex;
desc.colorAttachments[0].loadAction = MTLLoadActionLoad;
desc.colorAttachments[0].storeAction = MTLStoreActionStore;
}
if (DS)
{
desc.depthAttachment.texture = ds_tex;
desc.depthAttachment.loadAction = MTLLoadActionLoad;
desc.depthAttachment.storeAction = MTLStoreActionStore;
}
const u32 width = RT ? RT->GetWidth() : DS->GetWidth();
const u32 height = RT ? RT->GetHeight() : DS->GetHeight();
desc.renderTargetWidth = width;
desc.renderTargetHeight = height;
return std::unique_ptr<GPUFramebuffer>(new MetalFramebuffer(RT, DS, width, height, rt_tex, ds_tex, desc));
}
}
MetalSampler::MetalSampler(id<MTLSamplerState> ss) : m_ss(ss)
{
}
@ -1489,25 +1387,22 @@ void MetalDevice::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u3
void MetalDevice::ClearRenderTarget(GPUTexture* t, u32 c)
{
GPUDevice::ClearRenderTarget(t, c);
if (InRenderPass() && m_current_framebuffer && m_current_framebuffer->GetRT() == t)
if (InRenderPass() && IsRenderTargetBound(t))
EndRenderPass();
}
void MetalDevice::ClearDepth(GPUTexture* t, float d)
{
GPUDevice::ClearDepth(t, d);
if (InRenderPass() && m_current_framebuffer && m_current_framebuffer->GetDS() == t)
if (InRenderPass() && m_current_depth_target == t)
EndRenderPass();
}
void MetalDevice::InvalidateRenderTarget(GPUTexture* t)
{
GPUDevice::InvalidateRenderTarget(t);
if (InRenderPass() && m_current_framebuffer &&
(m_current_framebuffer->GetRT() == t || m_current_framebuffer->GetDS() == t))
{
if (InRenderPass() && (t->IsRenderTarget() ? IsRenderTargetBound(t) : (m_current_depth_target == t)))
EndRenderPass();
}
}
void MetalDevice::CommitClear(MetalTexture* tex)
@ -1689,44 +1584,29 @@ void MetalDevice::UnmapUniformBuffer(u32 size)
}
}
void MetalDevice::SetFramebuffer(GPUFramebuffer* fb)
void MetalDevice::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds)
{
if (m_current_framebuffer == fb)
return;
bool changed = (m_num_current_render_targets != num_rts || m_current_depth_target != ds);
bool needs_ds_clear = (ds && ds->IsClearedOrInvalidated());
bool needs_rt_clear = false;
if (InRenderPass())
EndRenderPass();
m_current_depth_target = static_cast<MetalTexture*>(ds);
for (u32 i = 0; i < num_rts; i++)
{
MetalTexture* const RT = static_cast<MetalTexture*>(rts[i]);
changed |= m_current_render_targets[i] != RT;
m_current_render_targets[i] = RT;
needs_rt_clear |= RT->IsClearedOrInvalidated();
}
for (u32 i = num_rts; i < m_num_current_render_targets; i++)
m_current_render_targets[i] = nullptr;
m_num_current_render_targets = num_rts;
m_current_framebuffer = static_cast<MetalFramebuffer*>(fb);
// Current pipeline might be incompatible, so unbind it.
// Otherwise it'll get bound to the new render encoder.
// TODO: we shouldn't need to do this now
m_current_pipeline = nullptr;
m_current_depth_state = nil;
}
void MetalDevice::UnbindFramebuffer(MetalFramebuffer* fb)
{
if (m_current_framebuffer != fb)
return;
if (InRenderPass())
EndRenderPass();
m_current_framebuffer = nullptr;
}
void MetalDevice::UnbindFramebuffer(MetalTexture* tex)
{
if (!m_current_framebuffer)
return;
if (m_current_framebuffer->GetRT() != tex && m_current_framebuffer->GetDS() != tex)
return;
if (InRenderPass())
EndRenderPass();
m_current_framebuffer = nullptr;
if (changed || needs_rt_clear || needs_ds_clear)
{
if (InRenderPass())
EndRenderPass();
}
}
void MetalDevice::SetPipeline(GPUPipeline* pipeline)
@ -1815,6 +1695,27 @@ void MetalDevice::UnbindTexture(MetalTexture* tex)
[m_render_encoder setFragmentTexture:nil atIndex:i];
}
}
if (tex->IsRenderTarget())
{
for (u32 i = 0; i < m_num_current_render_targets; i++)
{
if (m_current_render_targets[i] == tex)
{
Log_WarningPrint("Unbinding current RT");
SetRenderTargets(nullptr, 0, m_current_depth_target);
break;
}
}
}
else if (tex->IsDepthStencil())
{
if (m_current_depth_target == tex)
{
Log_WarningPrint("Unbinding current DS");
SetRenderTargets(nullptr, 0, nullptr);
}
}
}
void MetalDevice::UnbindTextureBuffer(MetalTextureBuffer* buf)
@ -1863,21 +1764,85 @@ void MetalDevice::BeginRenderPass()
@autoreleasepool
{
MTLRenderPassDescriptor* desc;
if (!m_current_framebuffer)
MTLRenderPassDescriptor* desc = [MTLRenderPassDescriptor renderPassDescriptor];
if (m_num_current_render_targets == 0 && !m_current_depth_target)
{
// Rendering to view, but we got interrupted...
desc = [MTLRenderPassDescriptor renderPassDescriptor];
desc.colorAttachments[0].texture = [m_layer_drawable texture];
desc.colorAttachments[0].loadAction = MTLLoadActionLoad;
}
else
{
desc = m_current_framebuffer->GetDescriptor();
if (MetalTexture* RT = static_cast<MetalTexture*>(m_current_framebuffer->GetRT()))
for (u32 i = 0; i < m_num_current_render_targets; i++)
{
MetalTexture* const RT = m_current_render_targets[i];
desc.colorAttachments[i].texture = RT->GetMTLTexture();
RT->SetUseFenceCounter(m_current_fence_counter);
if (MetalTexture* DS = static_cast<MetalTexture*>(m_current_framebuffer->GetDS()))
switch (RT->GetState())
{
case GPUTexture::State::Cleared:
{
const auto clear_color = RT->GetUNormClearColor();
desc.colorAttachments[i].loadAction = MTLLoadActionClear;
desc.colorAttachments[i].clearColor =
MTLClearColorMake(clear_color[0], clear_color[1], clear_color[2], clear_color[3]);
RT->SetState(GPUTexture::State::Dirty);
}
break;
case GPUTexture::State::Invalidated:
{
desc.colorAttachments[i].loadAction = MTLLoadActionDontCare;
RT->SetState(GPUTexture::State::Dirty);
}
break;
case GPUTexture::State::Dirty:
{
desc.colorAttachments[i].loadAction = MTLLoadActionLoad;
}
break;
default:
UnreachableCode();
break;
}
}
if (MetalTexture* DS = m_current_depth_target)
{
desc.depthAttachment.texture = m_current_depth_target->GetMTLTexture();
DS->SetUseFenceCounter(m_current_fence_counter);
switch (DS->GetState())
{
case GPUTexture::State::Cleared:
{
desc.depthAttachment.loadAction = MTLLoadActionClear;
desc.depthAttachment.clearDepth = DS->GetClearDepth();
DS->SetState(GPUTexture::State::Dirty);
}
break;
case GPUTexture::State::Invalidated:
{
desc.depthAttachment.loadAction = MTLLoadActionDontCare;
DS->SetState(GPUTexture::State::Dirty);
}
break;
case GPUTexture::State::Dirty:
{
desc.depthAttachment.loadAction = MTLLoadActionLoad;
}
break;
default:
UnreachableCode();
break;
}
}
}
m_render_encoder = [[m_render_cmdbuf renderCommandEncoderWithDescriptor:desc] retain];
@ -1948,8 +1913,10 @@ void MetalDevice::SetScissorInRenderEncoder()
Common::Rectangle<s32> MetalDevice::ClampToFramebufferSize(const Common::Rectangle<s32>& rc) const
{
const s32 clamp_width = m_current_framebuffer ? m_current_framebuffer->GetWidth() : m_window_info.surface_width;
const s32 clamp_height = m_current_framebuffer ? m_current_framebuffer->GetHeight() : m_window_info.surface_height;
const MetalTexture* rt_or_ds =
(m_num_current_render_targets > 0) ? m_current_render_targets[0] : m_current_depth_target;
const s32 clamp_width = rt_or_ds ? rt_or_ds->GetWidth() : m_window_info.surface_width;
const s32 clamp_height = rt_or_ds ? rt_or_ds->GetHeight() : m_window_info.surface_height;
return rc.ClampedSize(clamp_width, clamp_height);
}
@ -2023,10 +1990,12 @@ bool MetalDevice::BeginPresent(bool skip_present)
// Set up rendering to layer.
id<MTLTexture> layer_texture = [m_layer_drawable texture];
m_current_framebuffer = nullptr;
m_layer_pass_desc.colorAttachments[0].texture = layer_texture;
m_layer_pass_desc.colorAttachments[0].loadAction = MTLLoadActionClear;
m_render_encoder = [[m_render_cmdbuf renderCommandEncoderWithDescriptor:m_layer_pass_desc] retain];
std::memset(m_current_render_targets.data(), 0, sizeof(m_current_render_targets));
m_num_current_render_targets = 0;
m_current_depth_target = nullptr;
m_current_pipeline = nullptr;
m_current_depth_state = nil;
SetInitialEncoderState();
@ -2036,7 +2005,7 @@ bool MetalDevice::BeginPresent(bool skip_present)
void MetalDevice::EndPresent()
{
DebugAssert(!m_current_framebuffer);
DebugAssert(m_num_current_render_targets == 0 && !m_current_depth_target);
EndAnyEncoding();
[m_render_cmdbuf presentDrawable:m_layer_drawable];

View file

@ -155,9 +155,9 @@ void OpenGLDevice::CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32
GL_COLOR_BUFFER_BIT, GL_NEAREST);
glEnable(GL_SCISSOR_TEST);
if (m_current_framebuffer)
if (m_current_fbo)
{
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_current_framebuffer ? m_current_framebuffer->GetGLId() : 0);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_current_fbo);
glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);
}
else
@ -201,9 +201,9 @@ void OpenGLDevice::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u
GL_COLOR_BUFFER_BIT, GL_LINEAR);
glEnable(GL_SCISSOR_TEST);
if (m_current_framebuffer)
if (m_current_fbo)
{
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_current_framebuffer ? m_current_framebuffer->GetGLId() : 0);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_current_fbo);
glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);
}
else
@ -215,22 +215,31 @@ void OpenGLDevice::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u
void OpenGLDevice::ClearRenderTarget(GPUTexture* t, u32 c)
{
GPUDevice::ClearRenderTarget(t, c);
if (m_current_framebuffer && m_current_framebuffer->GetRT() == t)
CommitClear(m_current_framebuffer);
if (const s32 idx = IsRenderTargetBound(t); idx >= 0)
CommitRTClearInFB(static_cast<OpenGLTexture*>(t), static_cast<u32>(idx));
}
void OpenGLDevice::ClearDepth(GPUTexture* t, float d)
{
GPUDevice::ClearDepth(t, d);
if (m_current_framebuffer && m_current_framebuffer->GetDS() == t)
CommitClear(m_current_framebuffer);
if (m_current_depth_target == t)
CommitDSClearInFB(static_cast<OpenGLTexture*>(t));
}
void OpenGLDevice::InvalidateRenderTarget(GPUTexture* t)
{
GPUDevice::InvalidateRenderTarget(t);
if (m_current_framebuffer && (m_current_framebuffer->GetRT() == t || m_current_framebuffer->GetDS() == t))
CommitClear(m_current_framebuffer);
if (t->IsRenderTarget())
{
if (const s32 idx = IsRenderTargetBound(t); idx >= 0)
CommitRTClearInFB(static_cast<OpenGLTexture*>(t), static_cast<u32>(idx));
}
else
{
DebugAssert(t->IsDepthStencil());
if (m_current_depth_target == t)
CommitDSClearInFB(static_cast<OpenGLTexture*>(t));
}
}
void OpenGLDevice::PushDebugGroup(const char* name)
@ -617,7 +626,56 @@ void OpenGLDevice::RenderBlankFrame()
m_last_blend_state.write_a);
glEnable(GL_SCISSOR_TEST);
m_gl_context->SwapBuffers();
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_current_framebuffer ? m_current_framebuffer->GetGLId() : 0);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_current_fbo);
}
s32 OpenGLDevice::IsRenderTargetBound(const GPUTexture* tex) const
{
for (u32 i = 0; i < m_num_current_render_targets; i++)
{
if (m_current_render_targets[i] == tex)
return static_cast<s32>(i);
}
return -1;
}
GLuint OpenGLDevice::CreateFramebuffer(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, u32 flags)
{
glGetError();
GLuint fbo_id;
glGenFramebuffers(1, &fbo_id);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo_id);
for (u32 i = 0; i < num_rts; i++)
{
OpenGLTexture* const RT = static_cast<OpenGLTexture*>(rts[i]);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + i, RT->GetGLTarget(), RT->GetGLId(), 0);
}
if (ds)
{
OpenGLTexture* const DS = static_cast<OpenGLTexture*>(ds);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, DS->GetGLTarget(), DS->GetGLId(), 0);
}
if (glGetError() != GL_NO_ERROR || glCheckFramebufferStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE)
{
Log_ErrorFmt("Failed to create GL framebuffer: {}", static_cast<s32>(glGetError()));
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, OpenGLDevice::GetInstance().m_current_fbo);
glDeleteFramebuffers(1, &fbo_id);
return {};
}
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, OpenGLDevice::GetInstance().m_current_fbo);
return fbo_id;
}
void OpenGLDevice::DestroyFramebuffer(GLuint fbo)
{
if (fbo != 0)
glDeleteFramebuffers(1, &fbo);
}
GPUDevice::AdapterAndModeList OpenGLDevice::GetAdapterAndModeList()
@ -720,7 +778,12 @@ bool OpenGLDevice::BeginPresent(bool skip_present)
const Common::Rectangle<s32> window_rc =
Common::Rectangle<s32>::FromExtents(0, 0, m_window_info.surface_width, m_window_info.surface_height);
m_current_framebuffer = nullptr;
m_current_fbo = 0;
m_num_current_render_targets = 0;
std::memset(m_current_render_targets.data(), 0, sizeof(m_current_render_targets));
m_current_depth_target = nullptr;
m_last_viewport = window_rc;
m_last_scissor = window_rc;
UpdateViewport();
@ -730,7 +793,7 @@ bool OpenGLDevice::BeginPresent(bool skip_present)
void OpenGLDevice::EndPresent()
{
DebugAssert(!m_current_framebuffer);
DebugAssert(m_current_fbo == 0);
if (m_gpu_timing_enabled)
PopTimestampQuery();
@ -886,6 +949,36 @@ void OpenGLDevice::UnbindTexture(GLuint id)
}
}
void OpenGLDevice::UnbindTexture(OpenGLTexture* tex)
{
UnbindTexture(tex->GetGLId());
if (tex->IsRenderTarget())
{
for (u32 i = 0; i < m_num_current_render_targets; i++)
{
if (m_current_render_targets[i] == tex)
{
Log_WarningPrint("Unbinding current RT");
SetRenderTargets(nullptr, 0, m_current_depth_target);
break;
}
}
m_framebuffer_manager.RemoveRTReferences(tex);
}
else if (tex->IsDepthStencil())
{
if (m_current_depth_target == tex)
{
Log_WarningPrint("Unbinding current DS");
SetRenderTargets(nullptr, 0, nullptr);
}
m_framebuffer_manager.RemoveDSReferences(tex);
}
}
void OpenGLDevice::UnbindSSBO(GLuint id)
{
if (m_last_ssbo != id)
@ -908,15 +1001,6 @@ void OpenGLDevice::UnbindSampler(GLuint id)
}
}
void OpenGLDevice::UnbindFramebuffer(const OpenGLFramebuffer* fb)
{
if (m_current_framebuffer == fb)
{
m_current_framebuffer = nullptr;
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
}
}
void OpenGLDevice::UnbindPipeline(const OpenGLPipeline* pl)
{
if (m_current_pipeline == pl)
@ -984,25 +1068,55 @@ void OpenGLDevice::UnmapUniformBuffer(u32 size)
glBindBufferRange(GL_UNIFORM_BUFFER, 1, m_uniform_buffer->GetGLBufferId(), pos, size);
}
void OpenGLDevice::SetFramebuffer(GPUFramebuffer* fb)
void OpenGLDevice::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds)
{
if (m_current_framebuffer == fb)
return;
bool changed = (m_num_current_render_targets != num_rts || m_current_depth_target != ds);
bool needs_ds_clear = (ds && ds->IsClearedOrInvalidated());
bool needs_rt_clear = false;
OpenGLFramebuffer* FB = static_cast<OpenGLFramebuffer*>(fb);
const bool prev_was_window = (m_current_framebuffer == nullptr);
const bool new_is_window = (FB == nullptr);
m_current_framebuffer = FB;
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, FB ? FB->GetGLId() : 0);
if (prev_was_window != new_is_window)
m_current_depth_target = static_cast<OpenGLTexture*>(ds);
for (u32 i = 0; i < num_rts; i++)
{
UpdateViewport();
UpdateScissor();
OpenGLTexture* const dt = static_cast<OpenGLTexture*>(rts[i]);
changed |= m_current_render_targets[i] != dt;
m_current_render_targets[i] = dt;
needs_rt_clear |= dt->IsClearedOrInvalidated();
}
for (u32 i = num_rts; i < m_num_current_render_targets; i++)
m_current_render_targets[i] = nullptr;
m_num_current_render_targets = num_rts;
if (changed)
{
GLuint fbo = 0;
if (m_num_current_render_targets > 0 || m_current_depth_target)
{
if ((fbo = m_framebuffer_manager.Lookup(rts, num_rts, ds, 0)) == 0)
{
Log_ErrorFmt("Failed to get FBO for {} render targets", num_rts);
m_current_fbo = 0;
std::memset(m_current_render_targets.data(), 0, sizeof(m_current_render_targets));
m_num_current_render_targets = 0;
m_current_depth_target = nullptr;
return;
}
}
m_current_fbo = fbo;
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo);
}
if (FB)
CommitClear(FB);
if (needs_rt_clear)
{
for (u32 i = 0; i < num_rts; i++)
{
OpenGLTexture* const dt = static_cast<OpenGLTexture*>(rts[i]);
if (dt->IsClearedOrInvalidated())
CommitRTClearInFB(dt, i);
}
}
if (needs_ds_clear)
CommitDSClearInFB(static_cast<OpenGLTexture*>(ds));
}
void OpenGLDevice::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler)
@ -1078,7 +1192,7 @@ std::tuple<s32, s32, s32, s32> OpenGLDevice::GetFlippedViewportScissor(const Com
// Only when rendering to window framebuffer.
// We draw everything else upside-down.
s32 x, y, width, height;
if (!m_current_framebuffer)
if (m_current_fbo == 0)
{
const s32 sh = static_cast<s32>(m_window_info.surface_height);
const s32 rh = rc.GetHeight();

View file

@ -5,6 +5,7 @@
#include "gl/context.h"
#include "gpu_device.h"
#include "gpu_framebuffer_manager.h"
#include "gpu_shader_cache.h"
#include "opengl_loader.h"
#include "opengl_pipeline.h"
@ -16,7 +17,6 @@
#include <memory>
#include <tuple>
class OpenGLFramebuffer;
class OpenGLPipeline;
class OpenGLStreamBuffer;
class OpenGLTexture;
@ -65,8 +65,6 @@ public:
void ClearDepth(GPUTexture* t, float d) override;
void InvalidateRenderTarget(GPUTexture* t) override;
std::unique_ptr<GPUFramebuffer> CreateFramebuffer(GPUTexture* rt_or_ds, GPUTexture* ds = nullptr) override;
std::unique_ptr<GPUShader> CreateShaderFromBinary(GPUShaderStage stage, std::span<const u8> data) override;
std::unique_ptr<GPUShader> CreateShaderFromSource(GPUShaderStage stage, const std::string_view& source,
const char* entry_point, DynamicHeapArray<u8>* out_binary) override;
@ -84,7 +82,7 @@ public:
void PushUniformBuffer(const void* data, u32 data_size) override;
void* MapUniformBuffer(u32 size) override;
void UnmapUniformBuffer(u32 size) override;
void SetFramebuffer(GPUFramebuffer* fb) override;
void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds) override;
void SetPipeline(GPUPipeline* pipeline) override;
void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override;
void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override;
@ -102,7 +100,8 @@ public:
float GetAndResetAccumulatedGPUTime() override;
void CommitClear(OpenGLTexture* tex);
void CommitClear(OpenGLFramebuffer* fb); // Assumes the FB has been bound.
void CommitRTClearInFB(OpenGLTexture* tex, u32 idx);
void CommitDSClearInFB(OpenGLTexture* tex);
GLuint LookupProgramCache(const OpenGLPipeline::ProgramCacheKey& key, const GPUPipeline::GraphicsConfig& plconfig);
GLuint CompileProgram(const GPUPipeline::GraphicsConfig& plconfig);
@ -115,9 +114,9 @@ public:
void SetActiveTexture(u32 slot);
void UnbindTexture(GLuint id);
void UnbindTexture(OpenGLTexture* tex);
void UnbindSSBO(GLuint id);
void UnbindSampler(GLuint id);
void UnbindFramebuffer(const OpenGLFramebuffer* fb);
void UnbindPipeline(const OpenGLPipeline* pl);
protected:
@ -145,6 +144,10 @@ private:
void SetSwapInterval();
void RenderBlankFrame();
s32 IsRenderTargetBound(const GPUTexture* tex) const;
static GLuint CreateFramebuffer(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, u32 flags);
static void DestroyFramebuffer(GLuint fbo);
std::tuple<s32, s32, s32, s32> GetFlippedViewportScissor(const Common::Rectangle<s32>& rc) const;
void UpdateViewport();
void UpdateScissor();
@ -165,7 +168,6 @@ private:
void ApplyBlendState(GPUPipeline::BlendState bs);
std::unique_ptr<GL::Context> m_gl_context;
std::unique_ptr<OpenGLFramebuffer> m_window_framebuffer;
std::unique_ptr<OpenGLStreamBuffer> m_vertex_buffer;
std::unique_ptr<OpenGLStreamBuffer> m_index_buffer;
@ -175,6 +177,7 @@ private:
// TODO: pass in file instead of blob for pipeline cache
OpenGLPipeline::VertexArrayCache m_vao_cache;
OpenGLPipeline::ProgramCache m_program_cache;
GPUFramebufferManager<GLuint, CreateFramebuffer, DestroyFramebuffer> m_framebuffer_manager;
// VAO cache - fixed max as key
GPUPipeline::BlendState m_last_blend_state = {};
@ -193,7 +196,11 @@ private:
GLuint m_read_fbo = 0;
GLuint m_write_fbo = 0;
OpenGLFramebuffer* m_current_framebuffer = nullptr;
GLuint m_current_fbo = 0;
u32 m_num_current_render_targets = 0;
std::array<OpenGLTexture*, MAX_RENDER_TARGETS> m_current_render_targets = {};
OpenGLTexture* m_current_depth_target = nullptr;
OpenGLPipeline* m_current_pipeline = nullptr;
std::array<GLuint, NUM_TIMESTAMP_QUERIES> m_timestamp_queries = {};

View file

@ -205,7 +205,7 @@ void OpenGLTexture::Destroy()
{
if (m_id != 0)
{
OpenGLDevice::GetInstance().UnbindTexture(m_id);
OpenGLDevice::GetInstance().UnbindTexture(this);
glDeleteTextures(1, &m_id);
m_id = 0;
}
@ -410,58 +410,6 @@ std::unique_ptr<GPUSampler> OpenGLDevice::CreateSampler(const GPUSampler::Config
//////////////////////////////////////////////////////////////////////////
OpenGLFramebuffer::OpenGLFramebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height, GLuint id)
: GPUFramebuffer(rt, ds, width, height), m_id(id)
{
}
OpenGLFramebuffer::~OpenGLFramebuffer()
{
OpenGLDevice::GetInstance().UnbindFramebuffer(this);
}
void OpenGLFramebuffer::SetDebugName(const std::string_view& name)
{
#ifdef _DEBUG
if (glObjectLabel)
glObjectLabel(GL_FRAMEBUFFER, m_id, static_cast<GLsizei>(name.length()), static_cast<const GLchar*>(name.data()));
#endif
}
void OpenGLFramebuffer::Bind(GLenum target)
{
glBindFramebuffer(target, m_id);
}
std::unique_ptr<GPUFramebuffer> OpenGLDevice::CreateFramebuffer(GPUTexture* rt_or_ds, GPUTexture* ds /* = nullptr */)
{
glGetError();
GLuint fbo_id;
glGenFramebuffers(1, &fbo_id);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo_id);
DebugAssert((rt_or_ds || ds) && (!rt_or_ds || rt_or_ds->IsRenderTarget() || (rt_or_ds->IsDepthStencil() && !ds)));
OpenGLTexture* RT = static_cast<OpenGLTexture*>((rt_or_ds && rt_or_ds->IsDepthStencil()) ? nullptr : rt_or_ds);
OpenGLTexture* DS = static_cast<OpenGLTexture*>((rt_or_ds && rt_or_ds->IsDepthStencil()) ? rt_or_ds : ds);
if (RT)
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, RT->GetGLTarget(), RT->GetGLId(), 0);
if (DS)
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, DS->GetGLTarget(), DS->GetGLId(), 0);
if (glGetError() != GL_NO_ERROR || glCheckFramebufferStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE)
{
Log_ErrorPrintf("Failed to create GL framebuffer: %u", glGetError());
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_current_framebuffer ? m_current_framebuffer->GetGLId() : 0);
glDeleteFramebuffers(1, &fbo_id);
return {};
}
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_current_framebuffer ? m_current_framebuffer->GetGLId() : 0);
return std::unique_ptr<GPUFramebuffer>(new OpenGLFramebuffer(RT, DS, RT ? RT->GetWidth() : DS->GetWidth(),
RT ? RT->GetHeight() : DS->GetHeight(), fbo_id));
}
void OpenGLDevice::CommitClear(OpenGLTexture* tex)
{
switch (tex->GetState())
@ -484,7 +432,7 @@ void OpenGLDevice::CommitClear(OpenGLTexture* tex)
glInvalidateFramebuffer(GL_DRAW_FRAMEBUFFER, 1, &attachment);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, attachment, GL_TEXTURE_2D, 0, 0);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_current_framebuffer ? m_current_framebuffer->GetGLId() : 0);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_current_fbo);
}
}
break;
@ -525,7 +473,7 @@ void OpenGLDevice::CommitClear(OpenGLTexture* tex)
}
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, attachment, GL_TEXTURE_2D, 0, 0);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_current_framebuffer ? m_current_framebuffer->GetGLId() : 0);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_current_fbo);
}
}
break;
@ -539,74 +487,68 @@ void OpenGLDevice::CommitClear(OpenGLTexture* tex)
}
}
void OpenGLDevice::CommitClear(OpenGLFramebuffer* fb)
void OpenGLDevice::CommitRTClearInFB(OpenGLTexture* tex, u32 idx)
{
GLenum invalidate_attachments[2];
GLuint num_invalidate_attachments = 0;
if (OpenGLTexture* FB = static_cast<OpenGLTexture*>(fb->GetRT()))
switch (tex->GetState())
{
switch (FB->GetState())
case GPUTexture::State::Invalidated:
{
case GPUTexture::State::Invalidated:
{
invalidate_attachments[num_invalidate_attachments++] = GL_COLOR_ATTACHMENT0;
FB->SetState(GPUTexture::State::Dirty);
}
const GLenum attachment = GL_COLOR_ATTACHMENT0 + idx;
glInvalidateFramebuffer(GL_DRAW_FRAMEBUFFER, 1, &attachment);
tex->SetState(GPUTexture::State::Dirty);
}
break;
case GPUTexture::State::Cleared:
{
const auto color = tex->GetUNormClearColor();
glDisable(GL_SCISSOR_TEST);
glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
glClearBufferfv(GL_COLOR, static_cast<GLint>(idx), color.data());
glColorMask(m_last_blend_state.write_r, m_last_blend_state.write_g, m_last_blend_state.write_b,
m_last_blend_state.write_a);
glEnable(GL_SCISSOR_TEST);
tex->SetState(GPUTexture::State::Dirty);
}
case GPUTexture::State::Dirty:
break;
case GPUTexture::State::Cleared:
{
const auto color = FB->GetUNormClearColor();
glDisable(GL_SCISSOR_TEST);
glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
glClearBufferfv(GL_COLOR, 0, color.data());
glColorMask(m_last_blend_state.write_r, m_last_blend_state.write_g, m_last_blend_state.write_b,
m_last_blend_state.write_a);
glEnable(GL_SCISSOR_TEST);
FB->SetState(GPUTexture::State::Dirty);
}
case GPUTexture::State::Dirty:
break;
default:
UnreachableCode();
break;
}
default:
UnreachableCode();
break;
}
if (OpenGLTexture* DS = static_cast<OpenGLTexture*>(fb->GetDS()))
}
void OpenGLDevice::CommitDSClearInFB(OpenGLTexture* tex)
{
switch (tex->GetState())
{
switch (DS->GetState())
case GPUTexture::State::Invalidated:
{
case GPUTexture::State::Invalidated:
{
invalidate_attachments[num_invalidate_attachments++] = GL_DEPTH_ATTACHMENT;
DS->SetState(GPUTexture::State::Dirty);
}
break;
case GPUTexture::State::Cleared:
{
const float depth = DS->GetClearDepth();
glDisable(GL_SCISSOR_TEST);
glClearBufferfv(GL_DEPTH, 0, &depth);
glEnable(GL_SCISSOR_TEST);
DS->SetState(GPUTexture::State::Dirty);
}
break;
case GPUTexture::State::Dirty:
break;
default:
UnreachableCode();
break;
const GLenum attachment = GL_DEPTH_ATTACHMENT;
glInvalidateFramebuffer(GL_DRAW_FRAMEBUFFER, 1, &attachment);
tex->SetState(GPUTexture::State::Dirty);
}
}
break;
if (num_invalidate_attachments > 0 && glInvalidateFramebuffer)
glInvalidateFramebuffer(GL_DRAW_FRAMEBUFFER, num_invalidate_attachments, invalidate_attachments);
case GPUTexture::State::Cleared:
{
const float depth = tex->GetClearDepth();
glDisable(GL_SCISSOR_TEST);
glClearBufferfv(GL_DEPTH, 0, &depth);
glEnable(GL_SCISSOR_TEST);
tex->SetState(GPUTexture::State::Dirty);
}
break;
case GPUTexture::State::Dirty:
break;
default:
UnreachableCode();
break;
}
}
//////////////////////////////////////////////////////////////////////////

View file

@ -57,25 +57,6 @@ private:
u8 m_map_level = 0;
};
class OpenGLFramebuffer final : public GPUFramebuffer
{
friend OpenGLDevice;
public:
~OpenGLFramebuffer() override;
ALWAYS_INLINE GLuint GetGLId() const { return m_id; }
void SetDebugName(const std::string_view& name) override;
void Bind(GLenum target);
private:
OpenGLFramebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height, GLuint id);
GLuint m_id;
};
class OpenGLTextureBuffer final : public GPUTextureBuffer
{
friend OpenGLDevice;

View file

@ -53,10 +53,8 @@ static u32 s_target_height = 0;
static Common::Timer s_timer;
static std::unique_ptr<GPUTexture> s_input_texture;
static std::unique_ptr<GPUFramebuffer> s_input_framebuffer;
static std::unique_ptr<GPUTexture> s_output_texture;
static std::unique_ptr<GPUFramebuffer> s_output_framebuffer;
static std::unordered_map<u64, std::unique_ptr<GPUSampler>> s_samplers;
static std::unique_ptr<GPUTexture> s_dummy_texture;
@ -602,11 +600,6 @@ GPUTexture* PostProcessing::GetInputTexture()
return s_input_texture.get();
}
GPUFramebuffer* PostProcessing::GetInputFramebuffer()
{
return s_input_framebuffer.get();
}
const Common::Timer& PostProcessing::GetTimer()
{
return s_timer;
@ -650,14 +643,8 @@ bool PostProcessing::CheckTargets(GPUTexture::Format target_format, u32 target_w
if (!(s_input_texture = g_gpu_device->CreateTexture(target_width, target_height, 1, 1, 1,
GPUTexture::Type::RenderTarget, target_format)) ||
!(s_input_framebuffer = g_gpu_device->CreateFramebuffer(s_input_texture.get())))
{
return false;
}
if (!(s_output_texture = g_gpu_device->CreateTexture(target_width, target_height, 1, 1, 1,
GPUTexture::Type::RenderTarget, target_format)) ||
!(s_output_framebuffer = g_gpu_device->CreateFramebuffer(s_output_texture.get())))
!(s_output_texture = g_gpu_device->CreateTexture(target_width, target_height, 1, 1, 1,
GPUTexture::Type::RenderTarget, target_format)))
{
return false;
}
@ -688,36 +675,30 @@ void PostProcessing::DestroyTextures()
s_target_width = 0;
s_target_height = 0;
s_output_framebuffer.reset();
s_output_texture.reset();
s_input_framebuffer.reset();
s_input_texture.reset();
}
bool PostProcessing::Apply(GPUFramebuffer* final_target, s32 final_left, s32 final_top, s32 final_width,
s32 final_height, s32 orig_width, s32 orig_height)
bool PostProcessing::Apply(GPUTexture* final_target, s32 final_left, s32 final_top, s32 final_width, s32 final_height,
s32 orig_width, s32 orig_height)
{
GL_SCOPE("PostProcessing Apply");
const u32 target_width = final_target ? final_target->GetWidth() : g_gpu_device->GetWindowWidth();
const u32 target_height = final_target ? final_target->GetHeight() : g_gpu_device->GetWindowHeight();
const GPUTexture::Format target_format =
final_target ? final_target->GetRT()->GetFormat() : g_gpu_device->GetWindowFormat();
const GPUTexture::Format target_format = final_target ? final_target->GetFormat() : g_gpu_device->GetWindowFormat();
if (!CheckTargets(target_format, target_width, target_height))
return false;
GPUTexture* input = s_input_texture.get();
GPUFramebuffer* input_fb = s_input_framebuffer.get();
GPUTexture* output = s_output_texture.get();
GPUFramebuffer* output_fb = s_output_framebuffer.get();
input->MakeReadyForSampling();
for (const std::unique_ptr<Shader>& stage : s_stages)
{
const bool is_final = (stage.get() == s_stages.back().get());
if (!stage->Apply(input, is_final ? final_target : output_fb, final_left, final_top, final_width, final_height,
if (!stage->Apply(input, is_final ? final_target : output, final_left, final_top, final_width, final_height,
orig_width, orig_height, s_target_width, s_target_height))
{
return false;
@ -727,7 +708,6 @@ bool PostProcessing::Apply(GPUFramebuffer* final_target, s32 final_left, s32 fin
{
output->MakeReadyForSampling();
std::swap(input, output);
std::swap(input_fb, output_fb);
}
}

View file

@ -10,13 +10,11 @@
#include <string_view>
#include <vector>
namespace Common
{
namespace Common {
class Timer;
}
class GPUSampler;
class GPUFramebuffer;
class GPUTexture;
class Error;
@ -123,13 +121,12 @@ bool ReloadShaders();
void Shutdown();
GPUTexture* GetInputTexture();
GPUFramebuffer* GetInputFramebuffer();
const Common::Timer& GetTimer();
bool CheckTargets(GPUTexture::Format target_format, u32 target_width, u32 target_height);
bool Apply(GPUFramebuffer* final_target, s32 final_left, s32 final_top, s32 final_width, s32 final_height,
s32 orig_width, s32 orig_height);
bool Apply(GPUTexture* final_target, s32 final_left, s32 final_top, s32 final_width, s32 final_height, s32 orig_width,
s32 orig_height);
GPUSampler* GetSampler(const GPUSampler::Config& config);
GPUTexture* GetDummyTexture();

View file

@ -47,7 +47,7 @@ public:
virtual bool CompilePipeline(GPUTexture::Format format, u32 width, u32 height) = 0;
virtual bool Apply(GPUTexture* input, GPUFramebuffer* final_target, s32 final_left, s32 final_top, s32 final_width,
virtual bool Apply(GPUTexture* input, GPUTexture* final_target, s32 final_left, s32 final_top, s32 final_width,
s32 final_height, s32 orig_width, s32 orig_height, u32 target_width, u32 target_height) = 0;
protected:

View file

@ -1080,7 +1080,7 @@ const char* PostProcessing::ReShadeFXShader::GetTextureNameForID(TextureID id) c
}
GPUTexture* PostProcessing::ReShadeFXShader::GetTextureByID(TextureID id, GPUTexture* input,
GPUFramebuffer* final_target) const
GPUTexture* final_target) const
{
if (id < 0)
{
@ -1093,29 +1093,6 @@ GPUTexture* PostProcessing::ReShadeFXShader::GetTextureByID(TextureID id, GPUTex
return PostProcessing::GetDummyTexture();
}
else if (id == OUTPUT_COLOR_TEXTURE)
{
Panic("Wrong state for final target");
return nullptr;
}
else
{
Panic("Unexpected reserved texture ID");
return nullptr;
}
}
if (static_cast<size_t>(id) >= m_textures.size())
Panic("Unexpected texture ID");
return m_textures[static_cast<size_t>(id)].texture.get();
}
GPUFramebuffer* PostProcessing::ReShadeFXShader::GetFramebufferByID(TextureID id, GPUTexture* input,
GPUFramebuffer* final_target) const
{
if (id < 0)
{
if (id == OUTPUT_COLOR_TEXTURE)
{
return final_target;
}
@ -1129,9 +1106,7 @@ GPUFramebuffer* PostProcessing::ReShadeFXShader::GetFramebufferByID(TextureID id
if (static_cast<size_t>(id) >= m_textures.size())
Panic("Unexpected texture ID");
const Texture& tex = m_textures[static_cast<size_t>(id)];
Assert(tex.framebuffer);
return tex.framebuffer.get();
return m_textures[static_cast<size_t>(id)].texture.get();
}
bool PostProcessing::ReShadeFXShader::CompilePipeline(GPUTexture::Format format, u32 width, u32 height)
@ -1243,7 +1218,7 @@ bool PostProcessing::ReShadeFXShader::CompilePipeline(GPUTexture::Format format,
if (!vs || !fs)
return false;
plconfig.color_format = (pass.render_target >= 0) ? m_textures[pass.render_target].format : format;
plconfig.SetTargetFormats((pass.render_target >= 0) ? m_textures[pass.render_target].format : format);
plconfig.blend = MapBlendState(info);
plconfig.primitive = MapPrimitive(info.topology);
plconfig.vertex_shader = vs.get();
@ -1274,7 +1249,6 @@ bool PostProcessing::ReShadeFXShader::ResizeOutput(GPUTexture::Format format, u3
if (tex.rt_scale == 0.0f)
continue;
tex.framebuffer.reset();
tex.texture.reset();
const u32 t_width = std::max(static_cast<u32>(static_cast<float>(width) * tex.rt_scale), 1u);
@ -1285,22 +1259,15 @@ bool PostProcessing::ReShadeFXShader::ResizeOutput(GPUTexture::Format format, u3
Log_ErrorPrintf("Failed to create %ux%u texture", t_width, t_height);
return {};
}
tex.framebuffer = g_gpu_device->CreateFramebuffer(tex.texture.get());
if (!tex.framebuffer)
{
Log_ErrorPrintf("Failed to create %ux%u texture framebuffer", t_width, t_height);
return {};
}
}
m_valid = true;
return true;
}
bool PostProcessing::ReShadeFXShader::Apply(GPUTexture* input, GPUFramebuffer* final_target, s32 final_left,
s32 final_top, s32 final_width, s32 final_height, s32 orig_width,
s32 orig_height, u32 target_width, u32 target_height)
bool PostProcessing::ReShadeFXShader::Apply(GPUTexture* input, GPUTexture* final_target, s32 final_left, s32 final_top,
s32 final_width, s32 final_height, s32 orig_width, s32 orig_height,
u32 target_width, u32 target_height)
{
GL_PUSH_FMT("PostProcessingShaderFX {}", m_name);
@ -1461,9 +1428,9 @@ bool PostProcessing::ReShadeFXShader::Apply(GPUTexture* input, GPUFramebuffer* f
{
GL_SCOPE_FMT("Draw pass {}", pass.name.c_str());
GL_INS_FMT("Render Target: ID {} [{}]", pass.render_target, GetTextureNameForID(pass.render_target));
GPUFramebuffer* output_fb = GetFramebufferByID(pass.render_target, input, final_target);
GPUTexture* output = GetTextureByID(pass.render_target, input, final_target);
if (!output_fb)
if (!output)
{
// Drawing to final buffer.
if (!g_gpu_device->BeginPresent(false))
@ -1474,7 +1441,7 @@ bool PostProcessing::ReShadeFXShader::Apply(GPUTexture* input, GPUFramebuffer* f
}
else
{
g_gpu_device->SetFramebuffer(output_fb);
g_gpu_device->SetRenderTargets(&output, 1, nullptr);
}
g_gpu_device->SetPipeline(pass.pipeline.get());

View file

@ -29,7 +29,7 @@ public:
bool ResizeOutput(GPUTexture::Format format, u32 width, u32 height) override;
bool CompilePipeline(GPUTexture::Format format, u32 width, u32 height) override;
bool Apply(GPUTexture* input, GPUFramebuffer* final_target, s32 final_left, s32 final_top, s32 final_width,
bool Apply(GPUTexture* input, GPUTexture* final_target, s32 final_left, s32 final_top, s32 final_width,
s32 final_height, s32 orig_width, s32 orig_height, u32 target_width, u32 target_height) override;
private:
@ -80,15 +80,13 @@ private:
bool CreatePasses(GPUTexture::Format backbuffer_format, reshadefx::module& mod, Error* error);
const char* GetTextureNameForID(TextureID id) const;
GPUTexture* GetTextureByID(TextureID id, GPUTexture* input, GPUFramebuffer* final_target) const;
GPUFramebuffer* GetFramebufferByID(TextureID id, GPUTexture* input, GPUFramebuffer* final_target) const;
GPUTexture* GetTextureByID(TextureID id, GPUTexture* input, GPUTexture* final_target) const;
std::string m_filename;
struct Texture
{
std::unique_ptr<GPUTexture> texture;
std::unique_ptr<GPUFramebuffer> framebuffer;
std::string reshade_name; // TODO: we might be able to drop this
GPUTexture::Format format;
float rt_scale;

View file

@ -130,8 +130,7 @@ bool PostProcessing::GLSLShader::CompilePipeline(GPUTexture::Format format, u32
GPUPipeline::GraphicsConfig plconfig;
plconfig.layout = GPUPipeline::Layout::SingleTextureAndUBO;
plconfig.primitive = GPUPipeline::Primitive::Triangles;
plconfig.color_format = format;
plconfig.depth_format = GPUTexture::Format::Unknown;
plconfig.SetTargetFormats(format);
plconfig.rasterization = GPUPipeline::RasterizationState::GetNoCullState();
plconfig.depth = GPUPipeline::DepthState::GetNoTestsState();
plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState();
@ -157,7 +156,7 @@ bool PostProcessing::GLSLShader::CompilePipeline(GPUTexture::Format format, u32
return true;
}
bool PostProcessing::GLSLShader::Apply(GPUTexture* input, GPUFramebuffer* final_target, s32 final_left, s32 final_top,
bool PostProcessing::GLSLShader::Apply(GPUTexture* input, GPUTexture* final_target, s32 final_left, s32 final_top,
s32 final_width, s32 final_height, s32 orig_width, s32 orig_height,
u32 target_width, u32 target_height)
{
@ -171,8 +170,8 @@ bool PostProcessing::GLSLShader::Apply(GPUTexture* input, GPUFramebuffer* final_
}
else
{
g_gpu_device->SetFramebuffer(final_target);
g_gpu_device->ClearRenderTarget(final_target->GetRT(), 0); // TODO: Could use an invalidate here too.
g_gpu_device->SetRenderTargets(&final_target, 1, nullptr);
g_gpu_device->ClearRenderTarget(final_target, 0); // TODO: Could use an invalidate here too.
}
g_gpu_device->SetPipeline(m_pipeline.get());

View file

@ -23,7 +23,7 @@ public:
bool ResizeOutput(GPUTexture::Format format, u32 width, u32 height) override;
bool CompilePipeline(GPUTexture::Format format, u32 width, u32 height) override;
bool Apply(GPUTexture* input, GPUFramebuffer* final_target, s32 final_left, s32 final_top, s32 final_width,
bool Apply(GPUTexture* input, GPUTexture* final_target, s32 final_left, s32 final_top, s32 final_width,
s32 final_height, s32 orig_width, s32 orig_height, u32 target_width, u32 target_height) override;
private:

View file

@ -8,7 +8,7 @@
<PreprocessorDefinitions>ENABLE_CUBEB=1;ENABLE_SDL2=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions Condition="'$(Platform)'!='ARM64'">%(PreprocessorDefinitions);ENABLE_OPENGL=1;ENABLE_VULKAN=1</PreprocessorDefinitions>
<PreprocessorDefinitions Condition="'$(Platform)'=='ARM64'">%(PreprocessorDefinitions);SOUNDTOUCH_USE_NEON</PreprocessorDefinitions>
<AdditionalIncludeDirectories>%(AdditionalIncludeDirectories);$(SolutionDir)dep\soundtouch\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)dep\libchdr\include;$(SolutionDir)dep\cubeb\include;$(SolutionDir)dep\d3d12ma\include;$(SolutionDir)dep\zstd\lib</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>%(AdditionalIncludeDirectories);$(SolutionDir)dep\xxhash\include;$(SolutionDir)dep\soundtouch\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)dep\libchdr\include;$(SolutionDir)dep\cubeb\include;$(SolutionDir)dep\d3d12ma\include;$(SolutionDir)dep\zstd\lib</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories Condition="'$(Platform)'!='ARM64'">%(AdditionalIncludeDirectories);$(SolutionDir)dep\glad\include;$(SolutionDir)dep\vulkan\include;$(SolutionDir)dep\glslang</AdditionalIncludeDirectories>
</ClCompile>
</ItemDefinitionGroup>

View file

@ -25,6 +25,7 @@
<ExcludedFromBuild Condition="'$(Platform)'=='ARM64'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="gpu_device.h" />
<ClInclude Include="gpu_framebuffer_manager.h" />
<ClInclude Include="gpu_shader_cache.h" />
<ClInclude Include="gpu_texture.h" />
<ClInclude Include="host.h" />
@ -245,6 +246,19 @@
<Project>{ee054e08-3799-4a59-a422-18259c105ffd}</Project>
</ProjectReference>
</ItemGroup>
<ItemGroup>
<ClCompile Include="metal_device.mm">
<ExcludedFromBuild>true</ExcludedFromBuild>
<FileType>Document</FileType>
</ClCompile>
<None Include="metal_shaders.metal">
<ExcludedFromBuild>true</ExcludedFromBuild>
</None>
<ClCompile Include="metal_stream_buffer.mm">
<ExcludedFromBuild>true</ExcludedFromBuild>
<FileType>Document</FileType>
</ClCompile>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{57F6206D-F264-4B07-BAF8-11B9BBE1F455}</ProjectGuid>
</PropertyGroup>

View file

@ -70,6 +70,7 @@
<ClInclude Include="http_downloader_curl.h" />
<ClInclude Include="http_downloader_winhttp.h" />
<ClInclude Include="http_downloader.h" />
<ClInclude Include="gpu_framebuffer_manager.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="jit_code_buffer.cpp" />
@ -149,10 +150,15 @@
<ClCompile Include="http_downloader_curl.cpp" />
<ClCompile Include="http_downloader_winhttp.cpp" />
<ClCompile Include="http_downloader.cpp" />
<ClCompile Include="metal_device.mm" />
<ClCompile Include="metal_stream_buffer.mm" />
</ItemGroup>
<ItemGroup>
<Filter Include="gl">
<UniqueIdentifier>{e637fc5b-2483-4a31-abc3-89a16d45c223}</UniqueIdentifier>
</Filter>
</ItemGroup>
<ItemGroup>
<None Include="metal_shaders.metal" />
</ItemGroup>
</Project>

View file

@ -264,6 +264,9 @@ void Vulkan::GraphicsPipelineBuilder::Clear()
m_line_rasterization_state = {};
m_line_rasterization_state.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT;
m_rendering = {};
m_rendering.sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO_KHR;
// set defaults
SetNoCullRasterizationState();
SetNoDepthTestState();
@ -569,6 +572,29 @@ void Vulkan::GraphicsPipelineBuilder::SetProvokingVertex(VkProvokingVertexModeEX
m_provoking_vertex.provokingVertexMode = mode;
}
void Vulkan::GraphicsPipelineBuilder::SetDynamicRendering()
{
AddPointerToChain(&m_ci, &m_rendering);
}
void Vulkan::GraphicsPipelineBuilder::AddDynamicRenderingColorAttachment(VkFormat format)
{
SetDynamicRendering();
DebugAssert(m_rendering.colorAttachmentCount < MAX_ATTACHMENTS);
m_rendering_color_formats[m_rendering.colorAttachmentCount++] = format;
m_rendering.pColorAttachmentFormats = m_rendering_color_formats.data();
}
void Vulkan::GraphicsPipelineBuilder::SetDynamicRenderingDepthAttachment(VkFormat depth_format, VkFormat stencil_format)
{
SetDynamicRendering();
m_rendering.depthAttachmentFormat = depth_format;
m_rendering.stencilAttachmentFormat = stencil_format;
}
Vulkan::ComputePipelineBuilder::ComputePipelineBuilder()
{
Clear();

View file

@ -3,6 +3,7 @@
#pragma once
#include "gpu_device.h"
#include "vulkan_loader.h"
#include "common/string_util.h"
@ -79,7 +80,7 @@ public:
MAX_SHADER_STAGES = 3,
MAX_VERTEX_ATTRIBUTES = 16,
MAX_VERTEX_BUFFERS = 8,
MAX_ATTACHMENTS = 2,
MAX_ATTACHMENTS = GPUDevice::MAX_RENDER_TARGETS + 1,
MAX_DYNAMIC_STATE = 8
};
@ -140,6 +141,10 @@ public:
void SetProvokingVertex(VkProvokingVertexModeEXT mode);
void SetDynamicRendering();
void AddDynamicRenderingColorAttachment(VkFormat format);
void SetDynamicRenderingDepthAttachment(VkFormat depth_format, VkFormat stencil_format);
private:
VkGraphicsPipelineCreateInfo m_ci;
std::array<VkPipelineShaderStageCreateInfo, MAX_SHADER_STAGES> m_shader_stages;
@ -167,6 +172,9 @@ private:
VkPipelineRasterizationProvokingVertexStateCreateInfoEXT m_provoking_vertex;
VkPipelineRasterizationLineStateCreateInfoEXT m_line_rasterization_state;
VkPipelineRenderingCreateInfoKHR m_rendering;
std::array<VkFormat, MAX_ATTACHMENTS> m_rendering_color_formats;
};
class ComputePipelineBuilder
@ -271,7 +279,7 @@ class FramebufferBuilder
{
enum : u32
{
MAX_ATTACHMENTS = 2,
MAX_ATTACHMENTS = GPUDevice::MAX_RENDER_TARGETS + 1,
};
public:

View file

@ -20,6 +20,7 @@
#include "common/small_string.h"
#include "fmt/format.h"
#include "xxhash.h"
#include <limits>
#include <mutex>
@ -39,6 +40,13 @@ struct VK_PIPELINE_CACHE_HEADER
};
#pragma pack(pop)
static VkAttachmentLoadOp GetLoadOpForTexture(const GPUTexture* tex)
{
static constexpr VkAttachmentLoadOp ops[3] = {VK_ATTACHMENT_LOAD_OP_LOAD, VK_ATTACHMENT_LOAD_OP_CLEAR,
VK_ATTACHMENT_LOAD_OP_DONT_CARE};
return ops[static_cast<u8>(tex->GetState())];
}
// Tweakables
enum : u32
{
@ -84,6 +92,9 @@ const std::array<VkFormat, static_cast<u32>(GPUTexture::Format::MaxCount)> Vulka
static constexpr VkClearValue s_present_clear_color = {{{0.0f, 0.0f, 0.0f, 1.0f}}};
// Handles are always 64-bit, even on 32-bit platforms.
static const VkRenderPass DYNAMIC_RENDERING_RENDER_PASS = reinterpret_cast<VkRenderPass>(static_cast<s64>(-1LL));
#ifdef _DEBUG
static u32 s_debug_scope_depth = 0;
#endif
@ -355,6 +366,10 @@ bool VulkanDevice::SelectDeviceExtensions(ExtensionList* extension_list, bool en
m_optional_extensions.vk_ext_attachment_feedback_loop_layout =
SupportsExtension(VK_EXT_ATTACHMENT_FEEDBACK_LOOP_LAYOUT_EXTENSION_NAME, false);
m_optional_extensions.vk_khr_driver_properties = SupportsExtension(VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, false);
m_optional_extensions.vk_khr_dynamic_rendering =
SupportsExtension(VK_KHR_DEPTH_STENCIL_RESOLVE_EXTENSION_NAME, false) &&
SupportsExtension(VK_KHR_CREATE_RENDERPASS_2_EXTENSION_NAME, false) &&
SupportsExtension(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME, false);
m_optional_extensions.vk_khr_push_descriptor = SupportsExtension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, false);
#ifdef _WIN32
@ -501,11 +516,15 @@ bool VulkanDevice::CreateDevice(VkSurfaceKHR surface, bool enable_validation_lay
VK_FALSE};
VkPhysicalDeviceAttachmentFeedbackLoopLayoutFeaturesEXT attachment_feedback_loop_feature = {
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ATTACHMENT_FEEDBACK_LOOP_LAYOUT_FEATURES_EXT, nullptr, VK_TRUE};
VkPhysicalDeviceDynamicRenderingFeatures dynamic_rendering_feature = {
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES, nullptr, VK_TRUE};
if (m_optional_extensions.vk_ext_rasterization_order_attachment_access)
Vulkan::AddPointerToChain(&device_info, &rasterization_order_access_feature);
if (m_optional_extensions.vk_ext_attachment_feedback_loop_layout)
Vulkan::AddPointerToChain(&device_info, &attachment_feedback_loop_feature);
if (m_optional_extensions.vk_khr_dynamic_rendering)
Vulkan::AddPointerToChain(&device_info, &dynamic_rendering_feature);
VkResult res = vkCreateDevice(m_physical_device, &device_info, nullptr, &m_device);
if (res != VK_SUCCESS)
@ -545,12 +564,16 @@ void VulkanDevice::ProcessDeviceExtensions()
VK_FALSE};
VkPhysicalDeviceAttachmentFeedbackLoopLayoutFeaturesEXT attachment_feedback_loop_feature = {
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ATTACHMENT_FEEDBACK_LOOP_LAYOUT_FEATURES_EXT, nullptr, VK_FALSE};
VkPhysicalDeviceDynamicRenderingFeatures dynamic_rendering_feature = {
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES, nullptr, VK_TRUE};
// add in optional feature structs
if (m_optional_extensions.vk_ext_rasterization_order_attachment_access)
Vulkan::AddPointerToChain(&features2, &rasterization_order_access_feature);
if (m_optional_extensions.vk_ext_attachment_feedback_loop_layout)
Vulkan::AddPointerToChain(&features2, &attachment_feedback_loop_feature);
if (m_optional_extensions.vk_khr_dynamic_rendering)
Vulkan::AddPointerToChain(&features2, &dynamic_rendering_feature);
// query
vkGetPhysicalDeviceFeatures2(m_physical_device, &features2);
@ -560,6 +583,7 @@ void VulkanDevice::ProcessDeviceExtensions()
(rasterization_order_access_feature.rasterizationOrderColorAttachmentAccess == VK_TRUE);
m_optional_extensions.vk_ext_attachment_feedback_loop_layout &=
(attachment_feedback_loop_feature.attachmentFeedbackLoopLayout == VK_TRUE);
m_optional_extensions.vk_khr_dynamic_rendering &= (dynamic_rendering_feature.dynamicRendering == VK_TRUE);
VkPhysicalDeviceProperties2 properties2 = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, nullptr, {}};
VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor_properties = {
@ -584,6 +608,8 @@ void VulkanDevice::ProcessDeviceExtensions()
m_optional_extensions.vk_ext_rasterization_order_attachment_access ? "supported" : "NOT supported");
Log_InfoPrintf("VK_EXT_attachment_feedback_loop_layout is %s",
m_optional_extensions.vk_ext_attachment_feedback_loop_layout ? "supported" : "NOT supported");
Log_InfoPrintf("VK_KHR_dynamic_rendering is %s",
m_optional_extensions.vk_khr_dynamic_rendering ? "supported" : "NOT supported");
Log_InfoPrintf("VK_KHR_push_descriptor is %s",
m_optional_extensions.vk_khr_push_descriptor ? "supported" : "NOT supported");
}
@ -794,33 +820,110 @@ void VulkanDevice::DestroyPersistentDescriptorPool()
vkDestroyDescriptorPool(m_device, m_global_descriptor_pool, nullptr);
}
VkRenderPass VulkanDevice::GetRenderPass(VkFormat color_format, VkFormat depth_format, VkSampleCountFlagBits samples,
VkAttachmentLoadOp color_load_op /* = VK_ATTACHMENT_LOAD_OP_LOAD */,
VkAttachmentStoreOp color_store_op /* = VK_ATTACHMENT_STORE_OP_STORE */,
VkAttachmentLoadOp depth_load_op /* = VK_ATTACHMENT_LOAD_OP_LOAD */,
VkAttachmentStoreOp depth_store_op /* = VK_ATTACHMENT_STORE_OP_STORE */,
VkAttachmentLoadOp stencil_load_op /* = VK_ATTACHMENT_LOAD_OP_DONT_CARE */,
VkAttachmentStoreOp stencil_store_op /* = VK_ATTACHMENT_STORE_OP_DONT_CARE */,
bool VulkanDevice::RenderPassCacheKey::operator==(const RenderPassCacheKey& rhs) const
{
return (std::memcmp(this, &rhs, sizeof(*this)) == 0);
}
bool VulkanDevice::RenderPassCacheKey::operator!=(const RenderPassCacheKey& rhs) const
{
return (std::memcmp(this, &rhs, sizeof(*this)) != 0);
}
size_t VulkanDevice::RenderPassCacheKeyHash::operator()(const RenderPassCacheKey& rhs) const
{
if constexpr (sizeof(void*) == 8)
return XXH3_64bits(&rhs, sizeof(rhs));
else
return XXH32(&rhs, sizeof(rhs), 0x1337);
}
VkRenderPass VulkanDevice::GetRenderPass(const GPUPipeline::GraphicsConfig& config)
{
RenderPassCacheKey key;
std::memset(&key, 0, sizeof(key));
for (u32 i = 0; i < MAX_RENDER_TARGETS; i++)
{
if (config.color_formats[i] == GPUTexture::Format::Unknown)
break;
key.color[i].format = static_cast<u8>(config.color_formats[i]);
key.color[i].load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
key.color[i].store_op = VK_ATTACHMENT_STORE_OP_STORE;
}
if (config.depth_format != GPUTexture::Format::Unknown)
{
key.depth_format = static_cast<u8>(config.depth_format);
key.depth_load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
key.depth_store_op = VK_ATTACHMENT_STORE_OP_STORE;
const bool stencil = GPUTexture::IsDepthStencilFormat(config.depth_format);
key.stencil_load_op = stencil ? VK_ATTACHMENT_LOAD_OP_LOAD : VK_ATTACHMENT_LOAD_OP_DONT_CARE;
key.stencil_store_op = stencil ? VK_ATTACHMENT_STORE_OP_STORE : VK_ATTACHMENT_STORE_OP_DONT_CARE;
}
// key.color_feedback_loop = false;
// key.depth_sampling = false;
key.samples = static_cast<u8>(config.samples);
const auto it = m_render_pass_cache.find(key);
return (it != m_render_pass_cache.end()) ? it->second : CreateCachedRenderPass(key);
}
VkRenderPass VulkanDevice::GetRenderPass(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds,
bool color_feedback_loop /* = false */, bool depth_sampling /* = false */)
{
RenderPassCacheKey key = {};
key.color_format = color_format;
key.depth_format = depth_format;
key.samples = samples;
key.color_load_op = color_load_op;
key.color_store_op = color_store_op;
key.depth_load_op = depth_load_op;
key.depth_store_op = depth_store_op;
key.stencil_load_op = stencil_load_op;
key.stencil_store_op = stencil_store_op;
RenderPassCacheKey key;
std::memset(&key, 0, sizeof(key));
static_assert(static_cast<u8>(GPUTexture::Format::Unknown) == 0);
for (u32 i = 0; i < num_rts; i++)
{
key.color[i].format = static_cast<u8>(rts[i]->GetFormat());
key.color[i].load_op = GetLoadOpForTexture(rts[i]);
key.color[i].store_op = VK_ATTACHMENT_STORE_OP_STORE;
key.samples = static_cast<u8>(rts[i]->GetSamples());
}
if (ds)
{
const VkAttachmentLoadOp load_op = GetLoadOpForTexture(ds);
key.depth_format = static_cast<u8>(ds->GetFormat());
key.depth_load_op = load_op;
key.depth_store_op = VK_ATTACHMENT_STORE_OP_STORE;
const bool stencil = GPUTexture::IsDepthStencilFormat(ds->GetFormat());
key.stencil_load_op = stencil ? load_op : VK_ATTACHMENT_LOAD_OP_DONT_CARE;
key.stencil_store_op = stencil ? VK_ATTACHMENT_STORE_OP_STORE : VK_ATTACHMENT_STORE_OP_DONT_CARE;
key.samples = static_cast<u8>(ds->GetSamples());
}
key.color_feedback_loop = color_feedback_loop;
key.depth_sampling = depth_sampling;
auto it = m_render_pass_cache.find(key.key);
if (it != m_render_pass_cache.end())
return it->second;
const auto it = m_render_pass_cache.find(key);
return (it != m_render_pass_cache.end()) ? it->second : CreateCachedRenderPass(key);
}
return CreateCachedRenderPass(key);
VkRenderPass VulkanDevice::GetSwapChainRenderPass(GPUTexture::Format format, VkAttachmentLoadOp load_op)
{
DebugAssert(format != GPUTexture::Format::Unknown);
RenderPassCacheKey key;
std::memset(&key, 0, sizeof(key));
key.color[0].format = static_cast<u8>(format);
key.color[0].load_op = load_op;
key.color[0].store_op = VK_ATTACHMENT_STORE_OP_STORE;
key.samples = 1;
const auto it = m_render_pass_cache.find(key);
return (it != m_render_pass_cache.end()) ? it->second : CreateCachedRenderPass(key);
}
VkRenderPass VulkanDevice::GetRenderPassForRestarting(VkRenderPass pass)
@ -830,19 +933,22 @@ VkRenderPass VulkanDevice::GetRenderPassForRestarting(VkRenderPass pass)
if (it.second != pass)
continue;
RenderPassCacheKey modified_key;
modified_key.key = it.first;
if (modified_key.color_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR)
modified_key.color_load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
RenderPassCacheKey modified_key = it.first;
for (u32 i = 0; i < MAX_RENDER_TARGETS; i++)
{
if (modified_key.color[i].load_op == VK_ATTACHMENT_LOAD_OP_CLEAR)
modified_key.color[i].load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
}
if (modified_key.depth_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR)
modified_key.depth_load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
if (modified_key.stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR)
modified_key.stencil_load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
if (modified_key.key == it.first)
if (modified_key == it.first)
return pass;
auto fit = m_render_pass_cache.find(modified_key.key);
auto fit = m_render_pass_cache.find(modified_key);
if (fit != m_render_pass_cache.end())
return fit->second;
@ -1261,12 +1367,9 @@ void VulkanDevice::SubmitCommandBufferAndRestartRenderPass(const char* reason)
if (InRenderPass())
EndRenderPass();
VulkanFramebuffer* fb = m_current_framebuffer;
VulkanPipeline* pl = m_current_pipeline;
SubmitCommandBuffer(false, "%s", reason);
if (fb)
SetFramebuffer(fb);
SetPipeline(pl);
BeginRenderPass();
}
@ -1404,19 +1507,25 @@ VkRenderPass VulkanDevice::CreateCachedRenderPass(RenderPassCacheKey key)
VkAttachmentReference* input_reference_ptr = nullptr;
VkSubpassDependency subpass_dependency;
VkSubpassDependency* subpass_dependency_ptr = nullptr;
std::array<VkAttachmentDescription, 2> attachments;
std::array<VkAttachmentDescription, MAX_RENDER_TARGETS + 1> attachments;
u32 num_attachments = 0;
if (key.color_format != VK_FORMAT_UNDEFINED)
for (u32 i = 0; i < MAX_RENDER_TARGETS; i++)
{
if (key.color[i].format == static_cast<u8>(GPUTexture::Format::Unknown))
break;
const VkImageLayout layout =
key.color_feedback_loop ?
(UseFeedbackLoopLayout() ? VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT : VK_IMAGE_LAYOUT_GENERAL) :
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
attachments[num_attachments] = {0,
static_cast<VkFormat>(key.color_format),
const RenderPassCacheKey::RenderTarget key_rt = key.color[i];
attachments[num_attachments] = {i,
TEXTURE_FORMAT_MAPPING[key_rt.format],
static_cast<VkSampleCountFlagBits>(key.samples),
static_cast<VkAttachmentLoadOp>(key.color_load_op),
static_cast<VkAttachmentStoreOp>(key.color_store_op),
static_cast<VkAttachmentLoadOp>(key_rt.load_op),
static_cast<VkAttachmentStoreOp>(key_rt.store_op),
VK_ATTACHMENT_LOAD_OP_DONT_CARE,
VK_ATTACHMENT_STORE_OP_DONT_CARE,
layout,
@ -1453,14 +1562,17 @@ VkRenderPass VulkanDevice::CreateCachedRenderPass(RenderPassCacheKey key)
num_attachments++;
}
if (key.depth_format != VK_FORMAT_UNDEFINED)
const u32 num_rts = num_attachments;
if (key.depth_format != static_cast<u8>(GPUTexture::Format::Unknown))
{
const VkImageLayout layout =
key.depth_sampling ?
(UseFeedbackLoopLayout() ? VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT : VK_IMAGE_LAYOUT_GENERAL) :
VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
attachments[num_attachments] = {0,
static_cast<VkFormat>(key.depth_format),
static_cast<VkFormat>(TEXTURE_FORMAT_MAPPING[key.depth_format]),
static_cast<VkSampleCountFlagBits>(key.samples),
static_cast<VkAttachmentLoadOp>(key.depth_load_op),
static_cast<VkAttachmentStoreOp>(key.depth_store_op),
@ -1480,10 +1592,10 @@ VkRenderPass VulkanDevice::CreateCachedRenderPass(RenderPassCacheKey key)
0;
const VkSubpassDescription subpass = {subpass_flags,
VK_PIPELINE_BIND_POINT_GRAPHICS,
input_reference_ptr ? 1u : 0u,
input_reference_ptr ? input_reference_ptr : nullptr,
color_reference_ptr ? 1u : 0u,
color_reference_ptr ? color_reference_ptr : nullptr,
input_reference_ptr ? num_rts : 0u,
input_reference_ptr,
num_rts,
color_reference_ptr,
nullptr,
depth_reference_ptr,
0,
@ -1506,10 +1618,37 @@ VkRenderPass VulkanDevice::CreateCachedRenderPass(RenderPassCacheKey key)
return VK_NULL_HANDLE;
}
m_render_pass_cache.emplace(key.key, pass);
m_render_pass_cache.emplace(key, pass);
return pass;
}
VkFramebuffer VulkanDevice::CreateFramebuffer(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, u32 flags)
{
VulkanDevice& dev = VulkanDevice::GetInstance();
VkRenderPass render_pass = dev.GetRenderPass(rts, num_rts, ds, false, false);
const GPUTexture* rt_or_ds = (num_rts > 0) ? rts[0] : ds;
DebugAssert(rt_or_ds);
Vulkan::FramebufferBuilder fbb;
fbb.SetRenderPass(render_pass);
fbb.SetSize(rt_or_ds->GetWidth(), rt_or_ds->GetHeight(), 1);
for (u32 i = 0; i < num_rts; i++)
fbb.AddAttachment(static_cast<VulkanTexture*>(rts[i])->GetView());
if (ds)
fbb.AddAttachment(static_cast<VulkanTexture*>(ds)->GetView());
return fbb.Create(dev.m_device, false);
}
void VulkanDevice::DestroyFramebuffer(VkFramebuffer fbo)
{
if (fbo == VK_NULL_HANDLE)
return;
VulkanDevice::GetInstance().DeferFramebufferDestruction(fbo);
}
void VulkanDevice::GetAdapterAndModeList(AdapterAndModeList* ret, VkInstance instance)
{
GPUList gpus = EnumerateGPUs(instance);
@ -2098,7 +2237,7 @@ bool VulkanDevice::BeginPresent(bool frame_skip)
void VulkanDevice::EndPresent()
{
DebugAssert(InRenderPass() && !m_current_framebuffer);
DebugAssert(InRenderPass() && m_num_current_render_targets == 0 && !m_current_depth_target);
EndRenderPass();
VkCommandBuffer cmdbuf = GetCurrentCommandBuffer();
@ -2196,7 +2335,7 @@ bool VulkanDevice::CheckFeatures(FeatureMask disabled_features)
m_features.dual_source_blend =
!(disabled_features & FEATURE_MASK_DUAL_SOURCE_BLEND) && m_device_features.dualSrcBlend;
m_features.framebuffer_fetch = /*!(disabled_features & FEATURE_MASK_FRAMEBUFFER_FETCH) && */false;
m_features.framebuffer_fetch = /*!(disabled_features & FEATURE_MASK_FRAMEBUFFER_FETCH) && */ false;
if (!m_features.dual_source_blend)
Log_WarningPrintf("Vulkan driver is missing dual-source blending. This will have an impact on performance.");
@ -2354,25 +2493,22 @@ void VulkanDevice::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u
void VulkanDevice::ClearRenderTarget(GPUTexture* t, u32 c)
{
GPUDevice::ClearRenderTarget(t, c);
if (InRenderPass() && m_current_framebuffer && m_current_framebuffer->GetRT() == t)
if (InRenderPass() && IsRenderTargetBound(t))
EndRenderPass();
}
void VulkanDevice::ClearDepth(GPUTexture* t, float d)
{
GPUDevice::ClearDepth(t, d);
if (InRenderPass() && m_current_framebuffer && m_current_framebuffer->GetDS() == t)
if (InRenderPass() && m_current_depth_target == t)
EndRenderPass();
}
void VulkanDevice::InvalidateRenderTarget(GPUTexture* t)
{
GPUDevice::InvalidateRenderTarget(t);
if (InRenderPass() && m_current_framebuffer &&
(m_current_framebuffer->GetRT() == t || m_current_framebuffer->GetDS() == t))
{
if (InRenderPass() && (t->IsRenderTarget() ? IsRenderTargetBound(t) : (m_current_depth_target == t)))
EndRenderPass();
}
}
bool VulkanDevice::CreateBuffers()
@ -2670,143 +2806,213 @@ void VulkanDevice::RenderBlankFrame()
InvalidateCachedState();
}
void VulkanDevice::SetFramebuffer(GPUFramebuffer* fb)
void VulkanDevice::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds)
{
if (m_current_framebuffer == fb)
return;
bool changed = (m_num_current_render_targets != num_rts || m_current_depth_target != ds);
bool needs_ds_clear = (ds && ds->IsClearedOrInvalidated());
bool needs_rt_clear = false;
if (InRenderPass())
EndRenderPass();
m_current_depth_target = ds;
for (u32 i = 0; i < num_rts; i++)
{
VulkanTexture* const RT = static_cast<VulkanTexture*>(rts[i]);
changed |= m_current_render_targets[i] != RT;
m_current_render_targets[i] = RT;
needs_rt_clear |= RT->IsClearedOrInvalidated();
}
for (u32 i = num_rts; i < m_num_current_render_targets; i++)
m_current_render_targets[i] = nullptr;
m_num_current_render_targets = num_rts;
m_current_framebuffer = static_cast<VulkanFramebuffer*>(fb);
if (changed)
{
if (InRenderPass())
EndRenderPass();
if (m_num_current_render_targets == 0 && !m_current_depth_target)
{
m_current_framebuffer = VK_NULL_HANDLE;
return;
}
if (!m_optional_extensions.vk_khr_dynamic_rendering)
{
m_current_framebuffer =
m_framebuffer_manager.Lookup((m_num_current_render_targets > 0) ? m_current_render_targets.data() : nullptr,
m_num_current_render_targets, m_current_depth_target, 0);
if (m_current_framebuffer == VK_NULL_HANDLE)
{
Log_ErrorPrint("Failed to create framebuffer");
return;
}
}
}
// TODO: This could use vkCmdClearAttachments() instead.
if (needs_rt_clear || needs_ds_clear)
{
if (InRenderPass())
EndRenderPass();
}
}
void VulkanDevice::BeginRenderPass()
{
// TODO: Stats
DebugAssert(!InRenderPass());
VkRenderPassBeginInfo bi = {
VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, nullptr, VK_NULL_HANDLE, VK_NULL_HANDLE, {}, 0u, nullptr};
std::array<VkClearValue, 2> clear_values;
if (m_current_framebuffer) [[likely]]
{
VkFormat rt_format = VK_FORMAT_UNDEFINED;
VkFormat ds_format = VK_FORMAT_UNDEFINED;
VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT;
VkAttachmentLoadOp rt_load_op = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
VkAttachmentStoreOp rt_store_op = VK_ATTACHMENT_STORE_OP_DONT_CARE;
VkAttachmentLoadOp ds_load_op = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
VkAttachmentStoreOp ds_store_op = VK_ATTACHMENT_STORE_OP_DONT_CARE;
VulkanTexture* rt = static_cast<VulkanTexture*>(m_current_framebuffer->GetRT());
if (rt)
{
samples = static_cast<VkSampleCountFlagBits>(rt->GetSamples());
rt_format = rt->GetVkFormat();
rt_store_op = VK_ATTACHMENT_STORE_OP_STORE;
switch (rt->GetState())
{
case GPUTexture::State::Cleared:
{
std::memcpy(clear_values[0].color.float32, rt->GetUNormClearColor().data(),
sizeof(clear_values[0].color.float32));
rt_load_op = VK_ATTACHMENT_LOAD_OP_CLEAR;
rt->SetState(GPUTexture::State::Dirty);
bi.pClearValues = clear_values.data();
bi.clearValueCount = 1;
}
break;
case GPUTexture::State::Invalidated:
{
// already DONT_CARE
rt->SetState(GPUTexture::State::Dirty);
}
break;
case GPUTexture::State::Dirty:
{
rt_load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
}
break;
default:
UnreachableCode();
break;
}
rt->TransitionToLayout(VulkanTexture::Layout::ColorAttachment);
rt->SetUseFenceCounter(GetCurrentFenceCounter());
}
VulkanTexture* ds = static_cast<VulkanTexture*>(m_current_framebuffer->GetDS());
if (ds)
{
samples = static_cast<VkSampleCountFlagBits>(ds->GetSamples());
ds_format = ds->GetVkFormat();
ds_store_op = VK_ATTACHMENT_STORE_OP_STORE;
switch (ds->GetState())
{
case GPUTexture::State::Cleared:
{
const u32 idx = rt ? 1 : 0;
clear_values[idx].depthStencil = {ds->GetClearDepth(), 0u};
ds_load_op = VK_ATTACHMENT_LOAD_OP_CLEAR;
ds->SetState(GPUTexture::State::Dirty);
bi.pClearValues = clear_values.data();
bi.clearValueCount = idx + 1;
}
break;
case GPUTexture::State::Invalidated:
{
// already DONT_CARE
ds->SetState(GPUTexture::State::Dirty);
}
break;
case GPUTexture::State::Dirty:
{
ds_load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
}
break;
default:
UnreachableCode();
break;
}
ds->TransitionToLayout(VulkanTexture::Layout::DepthStencilAttachment);
ds->SetUseFenceCounter(GetCurrentFenceCounter());
}
bi.framebuffer = m_current_framebuffer->GetFramebuffer();
bi.renderPass = m_current_render_pass =
GetRenderPass(rt_format, ds_format, samples, rt_load_op, rt_store_op, ds_load_op, ds_store_op);
bi.renderArea.extent = {m_current_framebuffer->GetWidth(), m_current_framebuffer->GetHeight()};
}
else
{
// Re-rendering to swap chain.
bi.framebuffer = m_swap_chain->GetCurrentFramebuffer();
bi.renderPass = m_current_render_pass =
GetRenderPass(m_swap_chain->GetImageFormat(), VK_FORMAT_UNDEFINED, VK_SAMPLE_COUNT_1_BIT,
VK_ATTACHMENT_LOAD_OP_LOAD, VK_ATTACHMENT_STORE_OP_STORE);
bi.renderArea.extent = {m_swap_chain->GetWidth(), m_swap_chain->GetHeight()};
}
DebugAssert(m_current_render_pass);
// All textures should be in shader read only optimal already, but just in case..
const u32 num_textures = GetActiveTexturesForLayout(m_current_pipeline_layout);
for (u32 i = 0; i < num_textures; i++)
m_current_textures[i]->TransitionToLayout(VulkanTexture::Layout::ShaderReadOnly);
// TODO: Stats
vkCmdBeginRenderPass(GetCurrentCommandBuffer(), &bi, VK_SUBPASS_CONTENTS_INLINE);
if (m_optional_extensions.vk_khr_dynamic_rendering)
{
VkRenderingInfoKHR ri = {
VK_STRUCTURE_TYPE_RENDERING_INFO_KHR, nullptr, 0u, {}, 1u, 0u, 0u, nullptr, nullptr, nullptr};
std::array<VkRenderingAttachmentInfoKHR, MAX_RENDER_TARGETS> attachments;
VkRenderingAttachmentInfoKHR depth_attachment;
if (m_num_current_render_targets > 0 || m_current_depth_target)
{
ri.colorAttachmentCount = m_num_current_render_targets;
ri.pColorAttachments = (m_num_current_render_targets > 0) ? attachments.data() : nullptr;
// set up clear values and transition targets
for (u32 i = 0; i < m_num_current_render_targets; i++)
{
VulkanTexture* const rt = static_cast<VulkanTexture*>(m_current_render_targets[i]);
rt->TransitionToLayout(VulkanTexture::Layout::ColorAttachment);
rt->SetUseFenceCounter(GetCurrentFenceCounter());
VkRenderingAttachmentInfo& ai = attachments[i];
ai.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO_KHR;
ai.pNext = nullptr;
ai.imageView = rt->GetView();
ai.imageLayout = rt->GetVkLayout();
ai.resolveMode = VK_RESOLVE_MODE_NONE_KHR;
ai.resolveImageView = VK_NULL_HANDLE;
ai.resolveImageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
ai.loadOp = GetLoadOpForTexture(rt);
ai.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
if (rt->GetState() == GPUTexture::State::Cleared)
{
std::memcpy(ai.clearValue.color.float32, rt->GetUNormClearColor().data(),
sizeof(ai.clearValue.color.float32));
}
rt->SetState(GPUTexture::State::Dirty);
}
if (VulkanTexture* const ds = static_cast<VulkanTexture*>(m_current_depth_target))
{
ds->TransitionToLayout(VulkanTexture::Layout::DepthStencilAttachment);
ds->SetUseFenceCounter(GetCurrentFenceCounter());
depth_attachment.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO_KHR;
depth_attachment.pNext = nullptr;
depth_attachment.imageView = ds->GetView();
depth_attachment.imageLayout = ds->GetVkLayout();
depth_attachment.resolveMode = VK_RESOLVE_MODE_NONE_KHR;
depth_attachment.resolveImageView = VK_NULL_HANDLE;
depth_attachment.resolveImageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
depth_attachment.loadOp = GetLoadOpForTexture(ds);
depth_attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
ri.pDepthAttachment = &depth_attachment;
if (ds->GetState() == GPUTexture::State::Cleared)
depth_attachment.clearValue.depthStencil = {ds->GetClearDepth(), 0u};
ds->SetState(GPUTexture::State::Dirty);
}
const VulkanTexture* const rt_or_ds = static_cast<const VulkanTexture*>(
(m_num_current_render_targets > 0) ? m_current_render_targets[0] : m_current_depth_target);
ri.renderArea = {{}, {rt_or_ds->GetWidth(), rt_or_ds->GetHeight()}};
}
else
{
VkRenderingAttachmentInfo& ai = attachments[0];
ai.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO_KHR;
ai.pNext = nullptr;
ai.imageView = m_swap_chain->GetCurrentImageView();
ai.imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
ai.resolveMode = VK_RESOLVE_MODE_NONE_KHR;
ai.resolveImageView = VK_NULL_HANDLE;
ai.resolveImageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
ai.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
ai.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
ri.colorAttachmentCount = 1;
ri.pColorAttachments = attachments.data();
ri.renderArea = {{}, {m_swap_chain->GetWidth(), m_swap_chain->GetHeight()}};
}
m_current_render_pass = DYNAMIC_RENDERING_RENDER_PASS;
vkCmdBeginRenderingKHR(GetCurrentCommandBuffer(), &ri);
}
else
{
VkRenderPassBeginInfo bi = {
VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, nullptr, VK_NULL_HANDLE, VK_NULL_HANDLE, {}, 0u, nullptr};
std::array<VkClearValue, MAX_RENDER_TARGETS + 1> clear_values;
if (m_current_framebuffer != VK_NULL_HANDLE)
{
bi.framebuffer = m_current_framebuffer;
bi.renderPass = m_current_render_pass = GetRenderPass(
m_current_render_targets.data(), m_num_current_render_targets, m_current_depth_target, false, false);
if (bi.renderPass == VK_NULL_HANDLE)
{
Log_ErrorPrint("Failed to create render pass");
return;
}
// set up clear values and transition targets
for (u32 i = 0; i < m_num_current_render_targets; i++)
{
VulkanTexture* const rt = static_cast<VulkanTexture*>(m_current_render_targets[i]);
if (rt->GetState() == GPUTexture::State::Cleared)
{
std::memcpy(clear_values[i].color.float32, rt->GetUNormClearColor().data(),
sizeof(clear_values[i].color.float32));
bi.pClearValues = clear_values.data();
bi.clearValueCount = i + 1;
}
rt->SetState(GPUTexture::State::Dirty);
rt->TransitionToLayout(VulkanTexture::Layout::ColorAttachment);
rt->SetUseFenceCounter(GetCurrentFenceCounter());
}
if (VulkanTexture* const ds = static_cast<VulkanTexture*>(m_current_depth_target))
{
if (ds->GetState() == GPUTexture::State::Cleared)
{
clear_values[m_num_current_render_targets].depthStencil = {ds->GetClearDepth(), 0u};
bi.pClearValues = clear_values.data();
bi.clearValueCount = m_num_current_render_targets + 1;
}
ds->SetState(GPUTexture::State::Dirty);
ds->TransitionToLayout(VulkanTexture::Layout::DepthStencilAttachment);
ds->SetUseFenceCounter(GetCurrentFenceCounter());
}
const VulkanTexture* const rt_or_ds = static_cast<const VulkanTexture*>(
(m_num_current_render_targets > 0) ? m_current_render_targets[0] : m_current_depth_target);
bi.renderArea.extent = {rt_or_ds->GetWidth(), rt_or_ds->GetHeight()};
}
else
{
// Re-rendering to swap chain.
bi.framebuffer = m_swap_chain->GetCurrentFramebuffer();
bi.renderPass = m_current_render_pass =
GetSwapChainRenderPass(m_swap_chain->GetWindowInfo().surface_format, VK_ATTACHMENT_LOAD_OP_LOAD);
bi.renderArea.extent = {m_swap_chain->GetWidth(), m_swap_chain->GetHeight()};
}
DebugAssert(m_current_render_pass);
vkCmdBeginRenderPass(GetCurrentCommandBuffer(), &bi, VK_SUBPASS_CONTENTS_INLINE);
}
// If this is a new command buffer, bind the pipeline and such.
if (m_dirty_flags & DIRTY_FLAG_INITIAL)
@ -2830,21 +3036,53 @@ void VulkanDevice::BeginSwapChainRenderPass()
for (u32 i = 0; i < num_textures; i++)
m_current_textures[i]->TransitionToLayout(VulkanTexture::Layout::ShaderReadOnly);
const VkRenderPass render_pass =
GetRenderPass(m_swap_chain->GetImageFormat(), VK_FORMAT_UNDEFINED, VK_SAMPLE_COUNT_1_BIT,
VK_ATTACHMENT_LOAD_OP_CLEAR, VK_ATTACHMENT_STORE_OP_STORE);
DebugAssert(render_pass);
if (m_optional_extensions.vk_khr_dynamic_rendering)
{
const VkRenderingAttachmentInfo ai = {VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO_KHR,
nullptr,
m_swap_chain->GetCurrentImageView(),
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
VK_RESOLVE_MODE_NONE_KHR,
VK_NULL_HANDLE,
VK_IMAGE_LAYOUT_UNDEFINED,
VK_ATTACHMENT_LOAD_OP_LOAD,
VK_ATTACHMENT_STORE_OP_STORE,
{}};
const VkRenderPassBeginInfo rp = {VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
nullptr,
render_pass,
m_swap_chain->GetCurrentFramebuffer(),
{{0, 0}, {m_swap_chain->GetWidth(), m_swap_chain->GetHeight()}},
1u,
&s_present_clear_color};
vkCmdBeginRenderPass(GetCurrentCommandBuffer(), &rp, VK_SUBPASS_CONTENTS_INLINE);
m_current_render_pass = render_pass;
m_current_framebuffer = nullptr;
const VkRenderingInfoKHR ri = {VK_STRUCTURE_TYPE_RENDERING_INFO_KHR,
nullptr,
0u,
{{}, {m_swap_chain->GetWidth(), m_swap_chain->GetHeight()}},
1u,
0u,
1u,
&ai,
nullptr,
nullptr};
m_current_render_pass = DYNAMIC_RENDERING_RENDER_PASS;
vkCmdBeginRenderingKHR(GetCurrentCommandBuffer(), &ri);
}
else
{
m_current_render_pass =
GetSwapChainRenderPass(m_swap_chain->GetWindowInfo().surface_format, VK_ATTACHMENT_LOAD_OP_CLEAR);
DebugAssert(m_current_render_pass);
const VkRenderPassBeginInfo rp = {VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
nullptr,
m_current_render_pass,
m_swap_chain->GetCurrentFramebuffer(),
{{0, 0}, {m_swap_chain->GetWidth(), m_swap_chain->GetHeight()}},
1u,
&s_present_clear_color};
vkCmdBeginRenderPass(GetCurrentCommandBuffer(), &rp, VK_SUBPASS_CONTENTS_INLINE);
}
m_num_current_render_targets = 0;
std::memset(m_current_render_targets.data(), 0, sizeof(m_current_render_targets));
m_current_depth_target = nullptr;
m_current_framebuffer = VK_NULL_HANDLE;
// Clear pipeline, it's likely incompatible.
m_current_pipeline = nullptr;
@ -2860,32 +3098,11 @@ void VulkanDevice::EndRenderPass()
DebugAssert(m_current_render_pass != VK_NULL_HANDLE);
// TODO: stats
m_current_render_pass = VK_NULL_HANDLE;
vkCmdEndRenderPass(GetCurrentCommandBuffer());
}
void VulkanDevice::UnbindFramebuffer(VulkanFramebuffer* fb)
{
if (m_current_framebuffer != fb)
return;
if (InRenderPass())
EndRenderPass();
m_current_framebuffer = nullptr;
}
void VulkanDevice::UnbindFramebuffer(VulkanTexture* tex)
{
if (!m_current_framebuffer)
return;
if (m_current_framebuffer->GetRT() != tex && m_current_framebuffer->GetDS() != tex)
return;
if (InRenderPass())
EndRenderPass();
m_current_framebuffer = nullptr;
VkCommandBuffer cmdbuf = GetCurrentCommandBuffer();
if (std::exchange(m_current_render_pass, VK_NULL_HANDLE) == DYNAMIC_RENDERING_RENDER_PASS)
vkCmdEndRenderingKHR(cmdbuf);
else
vkCmdEndRenderPass(GetCurrentCommandBuffer());
}
void VulkanDevice::SetPipeline(GPUPipeline* pipeline)
@ -2928,10 +3145,20 @@ void VulkanDevice::InvalidateCachedState()
{
m_dirty_flags = ALL_DIRTY_STATE;
m_current_render_pass = VK_NULL_HANDLE;
m_current_framebuffer = nullptr;
m_current_pipeline = nullptr;
}
bool VulkanDevice::IsRenderTargetBound(const GPUTexture* tex) const
{
for (u32 i = 0; i < m_num_current_render_targets; i++)
{
if (m_current_render_targets[i] == tex)
return true;
}
return false;
}
VkPipelineLayout VulkanDevice::GetCurrentVkPipelineLayout() const
{
return m_pipeline_layouts[static_cast<u8>(m_current_pipeline_layout)];
@ -3008,6 +3235,31 @@ void VulkanDevice::UnbindTexture(VulkanTexture* tex)
m_dirty_flags |= DIRTY_FLAG_TEXTURES_OR_SAMPLERS;
}
}
if (tex->IsRenderTarget())
{
for (u32 i = 0; i < m_num_current_render_targets; i++)
{
if (m_current_render_targets[i] == tex)
{
Log_WarningPrint("Unbinding current RT");
SetRenderTargets(nullptr, 0, m_current_depth_target);
break;
}
}
m_framebuffer_manager.RemoveRTReferences(tex);
}
else if (tex->IsDepthStencil())
{
if (m_current_depth_target == tex)
{
Log_WarningPrint("Unbinding current DS");
SetRenderTargets(nullptr, 0, nullptr);
}
m_framebuffer_manager.RemoveDSReferences(tex);
}
}
void VulkanDevice::UnbindTextureBuffer(VulkanTextureBuffer* buf)

View file

@ -4,6 +4,7 @@
#pragma once
#include "gpu_device.h"
#include "gpu_framebuffer_manager.h"
#include "gpu_texture.h"
#include "vulkan_loader.h"
#include "vulkan_stream_buffer.h"
@ -20,7 +21,6 @@
#include <unordered_map>
#include <vector>
class VulkanFramebuffer;
class VulkanPipeline;
class VulkanSwapChain;
class VulkanTexture;
@ -45,6 +45,7 @@ public:
bool vk_ext_attachment_feedback_loop_layout : 1;
bool vk_ext_full_screen_exclusive : 1;
bool vk_khr_driver_properties : 1;
bool vk_khr_dynamic_rendering : 1;
bool vk_khr_push_descriptor : 1;
};
@ -87,8 +88,6 @@ public:
void ClearDepth(GPUTexture* t, float d) override;
void InvalidateRenderTarget(GPUTexture* t) override;
std::unique_ptr<GPUFramebuffer> CreateFramebuffer(GPUTexture* rt_or_ds, GPUTexture* ds = nullptr) override;
std::unique_ptr<GPUShader> CreateShaderFromBinary(GPUShaderStage stage, std::span<const u8> data) override;
std::unique_ptr<GPUShader> CreateShaderFromSource(GPUShaderStage stage, const std::string_view& source,
const char* entry_point, DynamicHeapArray<u8>* out_binary) override;
@ -106,7 +105,7 @@ public:
void PushUniformBuffer(const void* data, u32 data_size) override;
void* MapUniformBuffer(u32 size) override;
void UnmapUniformBuffer(u32 size) override;
void SetFramebuffer(GPUFramebuffer* fb) override;
void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds) override;
void SetPipeline(GPUPipeline* pipeline) override;
void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override;
void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override;
@ -156,14 +155,10 @@ public:
void WaitForGPUIdle();
// Creates a simple render pass.
VkRenderPass GetRenderPass(VkFormat color_format, VkFormat depth_format, VkSampleCountFlagBits samples,
VkAttachmentLoadOp color_load_op = VK_ATTACHMENT_LOAD_OP_LOAD,
VkAttachmentStoreOp color_store_op = VK_ATTACHMENT_STORE_OP_STORE,
VkAttachmentLoadOp depth_load_op = VK_ATTACHMENT_LOAD_OP_LOAD,
VkAttachmentStoreOp depth_store_op = VK_ATTACHMENT_STORE_OP_STORE,
VkAttachmentLoadOp stencil_load_op = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
VkAttachmentStoreOp stencil_store_op = VK_ATTACHMENT_STORE_OP_DONT_CARE,
bool color_feedback_loop = false, bool depth_sampling = false);
VkRenderPass GetRenderPass(const GPUPipeline::GraphicsConfig& config);
VkRenderPass GetRenderPass(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, bool color_feedback_loop = false,
bool depth_sampling = false);
VkRenderPass GetSwapChainRenderPass(GPUTexture::Format format, VkAttachmentLoadOp load_op);
// Gets a non-clearing version of the specified render pass. Slow, don't call in hot path.
VkRenderPass GetRenderPassForRestarting(VkRenderPass pass);
@ -213,7 +208,6 @@ public:
void SubmitCommandBuffer(bool wait_for_completion, const char* reason, ...);
void SubmitCommandBufferAndRestartRenderPass(const char* reason);
void UnbindFramebuffer(VulkanFramebuffer* fb);
void UnbindFramebuffer(VulkanTexture* tex);
void UnbindPipeline(VulkanPipeline* pl);
void UnbindTexture(VulkanTexture* tex);
@ -239,24 +233,32 @@ private:
DIRTY_FLAG_INITIAL | DIRTY_FLAG_PIPELINE_LAYOUT | DIRTY_FLAG_DYNAMIC_OFFSETS | DIRTY_FLAG_TEXTURES_OR_SAMPLERS,
};
union RenderPassCacheKey
struct RenderPassCacheKey
{
struct
struct RenderTarget
{
u32 color_format : 8;
u32 depth_format : 8;
u32 samples : 4;
u32 color_load_op : 2;
u32 color_store_op : 1;
u32 depth_load_op : 2;
u32 depth_store_op : 1;
u32 stencil_load_op : 2;
u32 stencil_store_op : 1;
u32 color_feedback_loop : 1;
u32 depth_sampling : 1;
u8 format : 5;
u8 load_op : 2;
u8 store_op : 1;
};
RenderTarget color[MAX_RENDER_TARGETS];
u32 key;
u8 depth_format : 5;
u8 depth_load_op : 2;
u8 depth_store_op : 1;
u8 stencil_load_op : 2;
u8 stencil_store_op : 1;
u8 depth_sampling : 1;
u8 color_feedback_loop : 1;
u8 samples;
bool operator==(const RenderPassCacheKey& rhs) const;
bool operator!=(const RenderPassCacheKey& rhs) const;
};
struct RenderPassCacheKeyHash
{
size_t operator()(const RenderPassCacheKey& rhs) const;
};
struct CommandBuffer
@ -332,6 +334,8 @@ private:
/// Set dirty flags on everything to force re-bind at next draw time.
void InvalidateCachedState();
bool IsRenderTargetBound(const GPUTexture* tex) const;
/// Applies any changed state.
VkPipelineLayout GetCurrentVkPipelineLayout() const;
void SetInitialPipelineState();
@ -349,6 +353,8 @@ private:
bool InRenderPass();
VkRenderPass CreateCachedRenderPass(RenderPassCacheKey key);
static VkFramebuffer CreateFramebuffer(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, u32 flags);
static void DestroyFramebuffer(VkFramebuffer fbo);
void BeginCommandBuffer(u32 index);
void WaitForCommandBufferCompletion(u32 index);
@ -400,7 +406,8 @@ private:
QueuedPresent m_queued_present = {};
std::unordered_map<u32, VkRenderPass> m_render_pass_cache;
std::unordered_map<RenderPassCacheKey, VkRenderPass, RenderPassCacheKeyHash> m_render_pass_cache;
GPUFramebufferManager<VkFramebuffer, CreateFramebuffer, DestroyFramebuffer> m_framebuffer_manager;
VkPipelineCache m_pipeline_cache = VK_NULL_HANDLE;
// TODO: Move to static?
@ -438,7 +445,10 @@ private:
// Which bindings/state has to be updated before the next draw.
u32 m_dirty_flags = ALL_DIRTY_STATE;
VulkanFramebuffer* m_current_framebuffer = nullptr;
u32 m_num_current_render_targets = 0;
std::array<GPUTexture*, MAX_RENDER_TARGETS> m_current_render_targets = {};
GPUTexture* m_current_depth_target = nullptr;
VkFramebuffer m_current_framebuffer = VK_NULL_HANDLE;
VkRenderPass m_current_render_pass = VK_NULL_HANDLE;
VulkanPipeline* m_current_pipeline = nullptr;

View file

@ -236,6 +236,10 @@ VULKAN_DEVICE_ENTRY_POINT(vkAcquireFullScreenExclusiveModeEXT, false)
VULKAN_DEVICE_ENTRY_POINT(vkReleaseFullScreenExclusiveModeEXT, false)
#endif
// VK_KHR_dynamic_rendering
VULKAN_DEVICE_ENTRY_POINT(vkCmdBeginRenderingKHR, false)
VULKAN_DEVICE_ENTRY_POINT(vkCmdEndRenderingKHR, false)
// VK_KHR_push_descriptor
VULKAN_DEVICE_ENTRY_POINT(vkCmdPushDescriptorSetKHR, false)

View file

@ -199,11 +199,31 @@ std::unique_ptr<GPUPipeline> VulkanDevice::CreatePipeline(const GPUPipeline::Gra
gpb.SetPipelineLayout(m_pipeline_layouts[static_cast<u8>(config.layout)]);
const VkRenderPass render_pass = GetRenderPass(TEXTURE_FORMAT_MAPPING[static_cast<u8>(config.color_format)],
TEXTURE_FORMAT_MAPPING[static_cast<u8>(config.depth_format)],
static_cast<VkSampleCountFlagBits>(config.samples));
DebugAssert(render_pass);
gpb.SetRenderPass(render_pass, 0);
if (m_optional_extensions.vk_khr_dynamic_rendering)
{
gpb.SetDynamicRendering();
for (u32 i = 0; i < MAX_RENDER_TARGETS; i++)
{
if (config.color_formats[i] == GPUTexture::Format::Unknown)
break;
gpb.AddDynamicRenderingColorAttachment(
VulkanDevice::TEXTURE_FORMAT_MAPPING[static_cast<u8>(config.color_formats[i])]);
}
if (config.depth_format != GPUTexture::Format::Unknown)
{
gpb.SetDynamicRenderingDepthAttachment(VulkanDevice::TEXTURE_FORMAT_MAPPING[static_cast<u8>(config.depth_format)],
VK_FORMAT_UNDEFINED);
}
}
else
{
const VkRenderPass render_pass = GetRenderPass(config);
DebugAssert(render_pass != VK_NULL_HANDLE);
gpb.SetRenderPass(render_pass, 0);
}
const VkPipeline pipeline = gpb.Create(m_device, m_pipeline_cache, false);
if (!pipeline)

View file

@ -488,8 +488,7 @@ bool VulkanSwapChain::CreateSwapChain()
res = vkGetSwapchainImagesKHR(dev.GetVulkanDevice(), m_swap_chain, &image_count, images.data());
Assert(res == VK_SUCCESS);
VkRenderPass render_pass =
dev.GetRenderPass(m_format, VK_FORMAT_UNDEFINED, VK_SAMPLE_COUNT_1_BIT, VK_ATTACHMENT_LOAD_OP_CLEAR);
VkRenderPass render_pass = dev.GetSwapChainRenderPass(m_window_info.surface_format, VK_ATTACHMENT_LOAD_OP_CLEAR);
if (render_pass == VK_NULL_HANDLE)
return false;

View file

@ -40,6 +40,7 @@ public:
ALWAYS_INLINE u32 GetImageCount() const { return static_cast<u32>(m_images.size()); }
ALWAYS_INLINE VkFormat GetImageFormat() const { return m_format; }
ALWAYS_INLINE VkImage GetCurrentImage() const { return m_images[m_current_image].image; }
ALWAYS_INLINE VkImageView GetCurrentImageView() const { return m_images[m_current_image].view; }
ALWAYS_INLINE VkFramebuffer GetCurrentFramebuffer() const { return m_images[m_current_image].framebuffer; }
ALWAYS_INLINE VkSemaphore GetImageAvailableSemaphore() const
{

View file

@ -961,54 +961,6 @@ std::unique_ptr<GPUSampler> VulkanDevice::CreateSampler(const GPUSampler::Config
return std::unique_ptr<GPUSampler>(new VulkanSampler(vsampler));
}
VulkanFramebuffer::VulkanFramebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height, VkFramebuffer fb)
: GPUFramebuffer(rt, ds, width, height), m_framebuffer(fb)
{
}
VulkanFramebuffer::~VulkanFramebuffer()
{
VulkanDevice::GetInstance().DeferFramebufferDestruction(m_framebuffer);
}
void VulkanFramebuffer::SetDebugName(const std::string_view& name)
{
Vulkan::SetObjectName(VulkanDevice::GetInstance().GetVulkanDevice(), m_framebuffer, name);
}
std::unique_ptr<GPUFramebuffer> VulkanDevice::CreateFramebuffer(GPUTexture* rt_or_ds, GPUTexture* ds /*= nullptr*/)
{
DebugAssert((rt_or_ds || ds) && (!rt_or_ds || rt_or_ds->IsRenderTarget() || (rt_or_ds->IsDepthStencil() && !ds)));
VulkanTexture* RT = static_cast<VulkanTexture*>((rt_or_ds && rt_or_ds->IsDepthStencil()) ? nullptr : rt_or_ds);
VulkanTexture* DS = static_cast<VulkanTexture*>((rt_or_ds && rt_or_ds->IsDepthStencil()) ? rt_or_ds : ds);
const u32 width = RT ? RT->GetWidth() : DS->GetWidth();
const u32 height = RT ? RT->GetHeight() : DS->GetHeight();
const VkRenderPass render_pass =
GetRenderPass(RT ? RT->GetVkFormat() : VK_FORMAT_UNDEFINED, DS ? DS->GetVkFormat() : VK_FORMAT_UNDEFINED,
static_cast<VkSampleCountFlagBits>(RT ? RT->GetSamples() : DS->GetSamples()),
RT ? VK_ATTACHMENT_LOAD_OP_LOAD : VK_ATTACHMENT_LOAD_OP_DONT_CARE,
RT ? VK_ATTACHMENT_STORE_OP_STORE : VK_ATTACHMENT_STORE_OP_DONT_CARE,
DS ? VK_ATTACHMENT_LOAD_OP_LOAD : VK_ATTACHMENT_LOAD_OP_DONT_CARE,
DS ? VK_ATTACHMENT_STORE_OP_STORE : VK_ATTACHMENT_STORE_OP_DONT_CARE);
DebugAssert(render_pass != VK_NULL_HANDLE);
Vulkan::FramebufferBuilder fbb;
fbb.SetRenderPass(render_pass);
fbb.SetSize(width, height, 1);
if (RT)
fbb.AddAttachment(RT->GetView());
if (DS)
fbb.AddAttachment(DS->GetView());
const VkFramebuffer fb = fbb.Create(m_device, false);
if (fb == VK_NULL_HANDLE)
return {};
return std::unique_ptr<GPUFramebuffer>(new VulkanFramebuffer(RT, DS, width, height, fb));
}
VulkanTextureBuffer::VulkanTextureBuffer(Format format, u32 size_in_elements)
: GPUTextureBuffer(format, size_in_elements)
{

View file

@ -126,25 +126,6 @@ private:
VkSampler m_sampler;
};
class VulkanFramebuffer final : public GPUFramebuffer
{
friend VulkanDevice;
public:
~VulkanFramebuffer() override;
ALWAYS_INLINE VkFramebuffer GetFramebuffer() const { return m_framebuffer; }
void SetDebugName(const std::string_view& name) override;
// TODO: Maybe render passes should be in here to avoid the map lookup...
private:
VulkanFramebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height, VkFramebuffer fb);
VkFramebuffer m_framebuffer;
};
class VulkanTextureBuffer final : public GPUTextureBuffer
{
friend VulkanDevice;