diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp index 3c25aac2d..50b15e9b0 100644 --- a/src/core/gpu.cpp +++ b/src/core/gpu.cpp @@ -1770,7 +1770,6 @@ bool GPU::CompileDisplayPipelines(bool display, bool deinterlace, bool chroma_sm GL_OBJECT_NAME(vso, "Display Vertex Shader"); GL_OBJECT_NAME_FMT(fso, "Display Fragment Shader [{}]", Settings::GetDisplayScalingName(g_settings.display_scaling)); - plconfig.vertex_shader = vso.get(); plconfig.fragment_shader = fso.get(); if (!(m_display_pipeline = g_gpu_device->CreatePipeline(plconfig))) @@ -1913,10 +1912,12 @@ void GPU::ClearDisplayTexture() m_display_texture_view_height = 0; } -void GPU::SetDisplayTexture(GPUTexture* texture, s32 view_x, s32 view_y, s32 view_width, s32 view_height) +void GPU::SetDisplayTexture(GPUTexture* texture, GPUTexture* depth_buffer, s32 view_x, s32 view_y, s32 view_width, + s32 view_height) { DebugAssert(texture); m_display_texture = texture; + m_display_depth_buffer = depth_buffer; m_display_texture_view_x = view_x; m_display_texture_view_y = view_y; m_display_texture_view_width = view_width; @@ -1957,10 +1958,10 @@ bool GPU::RenderDisplay(GPUTexture* target, const Common::Rectangle& draw_r // Now we can apply the post chain. GPUTexture* post_output_texture = PostProcessing::InternalChain.GetOutputTexture(); - if (PostProcessing::InternalChain.Apply(display_texture, post_output_texture, 0, 0, display_texture_view_width, - display_texture_view_height, display_texture_view_width, - display_texture_view_height, m_crtc_state.display_width, - m_crtc_state.display_height)) + if (PostProcessing::InternalChain.Apply(display_texture, m_display_depth_buffer, post_output_texture, 0, 0, + display_texture_view_width, display_texture_view_height, + display_texture_view_width, display_texture_view_height, + m_crtc_state.display_width, m_crtc_state.display_height)) { display_texture_view_x = 0; display_texture_view_y = 0; @@ -2075,7 +2076,7 @@ bool GPU::RenderDisplay(GPUTexture* target, const Common::Rectangle& draw_r const s32 orig_width = static_cast(std::ceil(static_cast(m_crtc_state.display_width) * upscale_x)); const s32 orig_height = static_cast(std::ceil(static_cast(m_crtc_state.display_height) * upscale_y)); - return PostProcessing::DisplayChain.Apply(PostProcessing::DisplayChain.GetInputTexture(), target, + return PostProcessing::DisplayChain.Apply(PostProcessing::DisplayChain.GetInputTexture(), nullptr, target, real_draw_rect.left, real_draw_rect.top, real_draw_rect.GetWidth(), real_draw_rect.GetHeight(), orig_width, orig_height, m_crtc_state.display_width, m_crtc_state.display_height); @@ -2113,7 +2114,7 @@ bool GPU::Deinterlace(u32 field, u32 line_skip) if (!DeinterlaceExtractField(0, src, x, y, width, height, line_skip)) [[unlikely]] return false; - SetDisplayTexture(m_deinterlace_buffers[0].get(), 0, 0, width, height); + SetDisplayTexture(m_deinterlace_buffers[0].get(), m_display_depth_buffer, 0, 0, width, height); return true; } @@ -2139,7 +2140,7 @@ bool GPU::Deinterlace(u32 field, u32 line_skip) g_gpu_device->Draw(3, 0); m_deinterlace_texture->MakeReadyForSampling(); - SetDisplayTexture(m_deinterlace_texture.get(), 0, 0, width, full_height); + SetDisplayTexture(m_deinterlace_texture.get(), m_display_depth_buffer, 0, 0, width, full_height); return true; } @@ -2171,7 +2172,7 @@ bool GPU::Deinterlace(u32 field, u32 line_skip) g_gpu_device->Draw(3, 0); m_deinterlace_texture->MakeReadyForSampling(); - SetDisplayTexture(m_deinterlace_texture.get(), 0, 0, width, height); + SetDisplayTexture(m_deinterlace_texture.get(), m_display_depth_buffer, 0, 0, width, height); return true; } @@ -2206,7 +2207,7 @@ bool GPU::Deinterlace(u32 field, u32 line_skip) g_gpu_device->Draw(3, 0); m_deinterlace_texture->MakeReadyForSampling(); - SetDisplayTexture(m_deinterlace_texture.get(), 0, 0, width, full_height); + SetDisplayTexture(m_deinterlace_texture.get(), m_display_depth_buffer, 0, 0, width, full_height); return true; } @@ -2309,7 +2310,7 @@ bool GPU::ApplyChromaSmoothing() g_gpu_device->Draw(3, 0); m_chroma_smoothing_texture->MakeReadyForSampling(); - SetDisplayTexture(m_chroma_smoothing_texture.get(), 0, 0, width, height); + SetDisplayTexture(m_chroma_smoothing_texture.get(), m_display_depth_buffer, 0, 0, width, height); return true; } diff --git a/src/core/gpu.h b/src/core/gpu.h index c6e2cc491..0c432fcd3 100644 --- a/src/core/gpu.h +++ b/src/core/gpu.h @@ -603,7 +603,8 @@ protected: u32 m_fifo_size = 128; void ClearDisplayTexture(); - void SetDisplayTexture(GPUTexture* texture, s32 view_x, s32 view_y, s32 view_width, s32 view_height); + void SetDisplayTexture(GPUTexture* texture, GPUTexture* depth_texture, s32 view_x, s32 view_y, s32 view_width, + s32 view_height); bool RenderDisplay(GPUTexture* target, const Common::Rectangle& draw_rect, bool postfx); @@ -624,6 +625,7 @@ protected: std::unique_ptr m_display_pipeline; GPUTexture* m_display_texture = nullptr; + GPUTexture* m_display_depth_buffer = nullptr; s32 m_display_texture_view_x = 0; s32 m_display_texture_view_y = 0; s32 m_display_texture_view_width = 0; diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index f72de5203..ee683fb33 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -34,6 +34,7 @@ Log_SetChannel(GPU_HW); static constexpr GPUTexture::Format VRAM_RT_FORMAT = GPUTexture::Format::RGBA8; static constexpr GPUTexture::Format VRAM_DS_FORMAT = GPUTexture::Format::D16; static constexpr GPUTexture::Format VRAM_DS_DEPTH_FORMAT = GPUTexture::Format::D32F; +static constexpr GPUTexture::Format VRAM_DS_EXTRACT_FORMAT = GPUTexture::Format::R32F; #ifdef _DEBUG static u32 s_draw_number = 0; @@ -414,6 +415,7 @@ void GPU_HW::UpdateSettings(const Settings& old_settings) { m_pgxp_depth_buffer = g_settings.UsingPGXPDepthBuffer(); m_batch.use_depth_buffer = false; + m_depth_was_copied = false; // might be null when resizing if (m_vram_depth_texture) @@ -722,8 +724,10 @@ bool GPU_HW::CreateBuffers() if (!(m_vram_texture = g_gpu_device->FetchTexture(texture_width, texture_height, 1, 1, samples, GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT)) || (needs_depth_buffer && - !(m_vram_depth_texture = g_gpu_device->FetchTexture(texture_width, texture_height, 1, 1, samples, - GPUTexture::Type::DepthStencil, GetDepthBufferFormat()))) || + (!(m_vram_depth_texture = g_gpu_device->FetchTexture(texture_width, texture_height, 1, 1, samples, + GPUTexture::Type::DepthStencil, GetDepthBufferFormat())) || + !(m_vram_depth_copy_texture = g_gpu_device->FetchTexture( + texture_width, texture_height, 1, 1, samples, GPUTexture::Type::RenderTarget, VRAM_DS_EXTRACT_FORMAT)))) || !(m_vram_read_texture = g_gpu_device->FetchTexture(texture_width, texture_height, 1, 1, 1, read_texture_type, VRAM_RT_FORMAT)) || !(m_vram_readback_texture = g_gpu_device->FetchTexture(VRAM_WIDTH / 2, VRAM_HEIGHT, 1, 1, 1, @@ -802,8 +806,10 @@ void GPU_HW::DestroyBuffers() m_vram_upload_buffer.reset(); m_vram_readback_download_texture.reset(); g_gpu_device->RecycleTexture(std::move(m_downsample_texture)); + g_gpu_device->RecycleTexture(std::move(m_vram_extract_depth_texture)); g_gpu_device->RecycleTexture(std::move(m_vram_extract_texture)); g_gpu_device->RecycleTexture(std::move(m_vram_read_texture)); + g_gpu_device->RecycleTexture(std::move(m_vram_depth_copy_texture)); g_gpu_device->RecycleTexture(std::move(m_vram_depth_texture)); g_gpu_device->RecycleTexture(std::move(m_vram_texture)); g_gpu_device->RecycleTexture(std::move(m_vram_readback_texture)); @@ -1289,23 +1295,50 @@ bool GPU_HW::CompilePipelines() // Display { - for (u8 depth_24 = 0; depth_24 < 2; depth_24++) + for (u8 shader = 0; shader < 3; shader++) { + // 24-bit doesn't give you a depth buffer. + const bool color_24bit = (shader == 1); + const bool depth_extract = (shader == 2); + if (depth_extract && !m_pgxp_depth_buffer) + continue; + std::unique_ptr fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), - shadergen.GenerateVRAMExtractFragmentShader(ConvertToBoolUnchecked(depth_24))); + shadergen.GenerateVRAMExtractFragmentShader(color_24bit, depth_extract)); if (!fs) return false; plconfig.fragment_shader = fs.get(); - if (!(m_vram_extract_pipeline[depth_24] = g_gpu_device->CreatePipeline(plconfig))) + plconfig.layout = depth_extract ? GPUPipeline::Layout::MultiTextureAndPushConstants : + GPUPipeline::Layout::SingleTextureAndPushConstants; + plconfig.color_formats[1] = depth_extract ? VRAM_DS_EXTRACT_FORMAT : GPUTexture::Format::Unknown; + + if (!(m_vram_extract_pipeline[shader] = g_gpu_device->CreatePipeline(plconfig))) return false; progress.Increment(); } } + plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; + + if (m_pgxp_depth_buffer) + { + std::unique_ptr fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), + shadergen.GenerateCopyFragmentShader()); + if (!fs) + return false; + + plconfig.fragment_shader = fs.get(); + plconfig.SetTargetFormats(VRAM_DS_EXTRACT_FORMAT); + if (!(m_copy_depth_pipeline = g_gpu_device->CreatePipeline(plconfig))) + return false; + } + + plconfig.SetTargetFormats(VRAM_RT_FORMAT); + if (m_downsample_mode == GPUDownsampleMode::Adaptive) { std::unique_ptr vs = g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GetLanguage(), @@ -1419,6 +1452,8 @@ void GPU_HW::DestroyPipelines() destroy(m_downsample_blur_pass_pipeline); destroy(m_downsample_composite_pass_pipeline); m_downsample_composite_sampler.reset(); + + m_copy_depth_pipeline.reset(); } GPU_HW::BatchRenderMode GPU_HW::BatchConfig::GetRenderMode() const @@ -1515,10 +1550,40 @@ void GPU_HW::UpdateDepthBufferFromMaskBit() SetScissor(); } +void GPU_HW::CopyAndClearDepthBuffer() +{ + if (!m_depth_was_copied) + { + // Take a copy of the current depth buffer so it can be used when the previous frame/buffer gets scanned out. + // Don't bother when we're not postprocessing, it'd just be a wasted copy. + if (PostProcessing::InternalChain.NeedsDepthBuffer()) + { + // TODO: Shrink this to only the active area. + GL_SCOPE("Copy Depth Buffer"); + + m_vram_texture->MakeReadyForSampling(); + g_gpu_device->InvalidateRenderTarget(m_vram_depth_copy_texture.get()); + g_gpu_device->SetRenderTarget(m_vram_depth_copy_texture.get()); + g_gpu_device->SetViewportAndScissor(0, 0, m_vram_depth_texture->GetWidth(), m_vram_depth_texture->GetHeight()); + g_gpu_device->SetTextureSampler(0, m_vram_depth_texture.get(), g_gpu_device->GetNearestSampler()); + g_gpu_device->SetPipeline(m_copy_depth_pipeline.get()); + + const float uniforms[4] = {0.0f, 0.0f, 1.0f, 1.0f}; + g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); + g_gpu_device->Draw(3, 0); + RestoreDeviceContext(); + } + + m_depth_was_copied = true; + } + + ClearDepthBuffer(); +} + void GPU_HW::ClearDepthBuffer() { + GL_SCOPE("GPU_HW::ClearDepthBuffer()"); DebugAssert(m_pgxp_depth_buffer); - g_gpu_device->ClearDepth(m_vram_depth_texture.get(), 1.0f); m_last_depth_z = 1.0f; } @@ -1911,13 +1976,9 @@ void GPU_HW::CheckForDepthClear(const BatchVertex* vertices, u32 num_vertices) if ((average_z - m_last_depth_z) >= g_settings.gpu_pgxp_depth_clear_threshold) { - if (m_batch_index_count > 0) - { - FlushRender(); - EnsureVertexBufferSpaceForCurrentCommand(); - } - - ClearDepthBuffer(); + FlushRender(); + CopyAndClearDepthBuffer(); + EnsureVertexBufferSpaceForCurrentCommand(); } m_last_depth_z = average_z; @@ -3204,7 +3265,11 @@ void GPU_HW::DispatchRenderCommand() SetScissor(); if (m_pgxp_depth_buffer && m_last_depth_z < 1.0f) - ClearDepthBuffer(); + { + FlushRender(); + CopyAndClearDepthBuffer(); + EnsureVertexBufferSpaceForCurrentCommand(); + } if (m_sw_renderer) { @@ -3292,12 +3357,12 @@ void GPU_HW::UpdateDisplay() if (IsUsingMultisampling()) { UpdateVRAMReadTexture(true, true); - SetDisplayTexture(m_vram_read_texture.get(), 0, 0, m_vram_read_texture->GetWidth(), + SetDisplayTexture(m_vram_read_texture.get(), nullptr, 0, 0, m_vram_read_texture->GetWidth(), m_vram_read_texture->GetHeight()); } else { - SetDisplayTexture(m_vram_texture.get(), 0, 0, m_vram_texture->GetWidth(), m_vram_texture->GetHeight()); + SetDisplayTexture(m_vram_texture.get(), nullptr, 0, 0, m_vram_texture->GetWidth(), m_vram_texture->GetHeight()); } return; @@ -3315,6 +3380,12 @@ void GPU_HW::UpdateDisplay() const u32 line_skip = BoolToUInt32(interlaced && m_GPUSTAT.vertical_resolution); bool drew_anything = false; + // Don't bother grabbing depth if postfx doesn't need it. + GPUTexture* depth_source = (!m_GPUSTAT.display_area_color_depth_24 && m_pgxp_depth_buffer && + PostProcessing::InternalChain.NeedsDepthBuffer()) ? + (m_depth_was_copied ? m_vram_depth_copy_texture.get() : m_vram_depth_texture.get()) : + nullptr; + if (IsDisplayDisabled()) { ClearDisplayTexture(); @@ -3325,8 +3396,8 @@ void GPU_HW::UpdateDisplay() (scaled_vram_offset_y + scaled_display_height) <= m_vram_texture->GetHeight() && !PostProcessing::InternalChain.IsActive()) { - SetDisplayTexture(m_vram_texture.get(), scaled_vram_offset_x, scaled_vram_offset_y, scaled_display_width, - read_height); + SetDisplayTexture(m_vram_texture.get(), depth_source, scaled_vram_offset_x, scaled_vram_offset_y, + scaled_display_width, read_height); // Fast path if no copies are needed. if (interlaced) @@ -3353,14 +3424,39 @@ void GPU_HW::UpdateDisplay() } } + m_vram_texture->MakeReadyForSampling(); g_gpu_device->InvalidateRenderTarget(m_vram_extract_texture.get()); - g_gpu_device->SetRenderTarget(m_vram_extract_texture.get()); - g_gpu_device->SetPipeline(m_vram_extract_pipeline[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)].get()); - g_gpu_device->SetTextureSampler(0, m_vram_texture.get(), g_gpu_device->GetNearestSampler()); + + if (depth_source && + ((m_vram_extract_depth_texture && m_vram_extract_depth_texture->GetWidth() == scaled_display_width && + m_vram_extract_depth_texture->GetHeight() == scaled_display_height) || + !g_gpu_device->ResizeTexture(&m_vram_extract_depth_texture, scaled_display_width, scaled_display_height, + GPUTexture::Type::RenderTarget, VRAM_DS_EXTRACT_FORMAT))) + { + depth_source->MakeReadyForSampling(); + g_gpu_device->InvalidateRenderTarget(m_vram_extract_depth_texture.get()); + + GPUTexture* targets[] = {m_vram_extract_texture.get(), m_vram_extract_depth_texture.get()}; + g_gpu_device->SetRenderTargets(targets, static_cast(std::size(targets)), nullptr); + g_gpu_device->SetPipeline(m_vram_extract_pipeline[2].get()); + + g_gpu_device->SetTextureSampler(0, m_vram_texture.get(), g_gpu_device->GetNearestSampler()); + g_gpu_device->SetTextureSampler(1, depth_source, g_gpu_device->GetNearestSampler()); + } + else + { + g_gpu_device->SetRenderTarget(m_vram_extract_texture.get()); + g_gpu_device->SetPipeline(m_vram_extract_pipeline[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)].get()); + g_gpu_device->SetTextureSampler(0, m_vram_texture.get(), g_gpu_device->GetNearestSampler()); + } const u32 reinterpret_start_x = m_crtc_state.regs.X * resolution_scale; const u32 skip_x = (m_crtc_state.display_vram_left - m_crtc_state.regs.X) * resolution_scale; - GL_INS_FMT("Convert 16bpp to 24bpp, skip_x = {}, line_skip = {}", skip_x, line_skip); + GL_INS_FMT("VRAM extract, depth = {}, 24bpp = {}, skip_x = {}, line_skip = {}", depth_source ? "yes" : "no", + m_GPUSTAT.display_area_color_depth_24.GetValue(), skip_x, line_skip); + GL_INS_FMT("Source: {},{} => {},{} ({}x{})", reinterpret_start_x, scaled_vram_offset_y, + reinterpret_start_x + scaled_display_width, scaled_vram_offset_y + read_height, scaled_display_width, + read_height); const u32 uniforms[4] = {reinterpret_start_x, scaled_vram_offset_y, skip_x, line_skip}; g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); @@ -3369,9 +3465,17 @@ void GPU_HW::UpdateDisplay() g_gpu_device->Draw(3, 0); m_vram_extract_texture->MakeReadyForSampling(); + if (depth_source) + { + // Thanks DX11... + m_vram_extract_depth_texture->MakeReadyForSampling(); + g_gpu_device->SetTextureSampler(1, nullptr, nullptr); + } + drew_anything = true; - SetDisplayTexture(m_vram_extract_texture.get(), 0, 0, scaled_display_width, read_height); + SetDisplayTexture(m_vram_extract_texture.get(), depth_source ? m_vram_extract_depth_texture.get() : nullptr, 0, 0, + scaled_display_width, read_height); if (g_settings.gpu_24bit_chroma_smoothing) { if (ApplyChromaSmoothing()) @@ -3425,6 +3529,7 @@ void GPU_HW::UpdateDownsamplingLevels() void GPU_HW::OnBufferSwapped() { GL_INS("OnBufferSwapped()"); + m_depth_was_copied = false; } void GPU_HW::DownsampleFramebuffer() @@ -3556,7 +3661,7 @@ void GPU_HW::DownsampleFramebufferAdaptive(GPUTexture* source, u32 left, u32 top RestoreDeviceContext(); - SetDisplayTexture(m_downsample_texture.get(), 0, 0, width, height); + SetDisplayTexture(m_downsample_texture.get(), m_display_depth_buffer, 0, 0, width, height); } void GPU_HW::DownsampleFramebufferBoxFilter(GPUTexture* source, u32 left, u32 top, u32 width, u32 height) @@ -3594,7 +3699,7 @@ void GPU_HW::DownsampleFramebufferBoxFilter(GPUTexture* source, u32 left, u32 to RestoreDeviceContext(); - SetDisplayTexture(m_downsample_texture.get(), 0, 0, ds_width, ds_height); + SetDisplayTexture(m_downsample_texture.get(), m_display_depth_buffer, 0, 0, ds_width, ds_height); } void GPU_HW::DrawRendererStats() diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index 08351d5e6..d47ce2cd4 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -156,6 +156,7 @@ private: void SetClampedDrawingArea(); void UpdateVRAMReadTexture(bool drawn, bool written); void UpdateDepthBufferFromMaskBit(); + void CopyAndClearDepthBuffer(); void ClearDepthBuffer(); void SetScissor(); void SetVRAMRenderTarget(); @@ -229,6 +230,7 @@ private: std::unique_ptr m_vram_texture; std::unique_ptr m_vram_depth_texture; + std::unique_ptr m_vram_depth_copy_texture; std::unique_ptr m_vram_read_texture; std::unique_ptr m_vram_readback_texture; std::unique_ptr m_vram_readback_download_texture; @@ -269,6 +271,7 @@ private: bool m_allow_shader_blend : 1 = false; u8 m_texpage_dirty = 0; + bool m_depth_was_copied = false; BatchConfig m_batch; @@ -295,8 +298,10 @@ private: std::unique_ptr m_vram_update_depth_pipeline; std::unique_ptr m_vram_write_replacement_pipeline; - std::array, 2> m_vram_extract_pipeline; // [24bit] + std::array, 3> m_vram_extract_pipeline; // [24bit, 2=depth] std::unique_ptr m_vram_extract_texture; + std::unique_ptr m_vram_extract_depth_texture; + std::unique_ptr m_copy_depth_pipeline; std::unique_ptr m_downsample_texture; std::unique_ptr m_downsample_first_pass_pipeline; diff --git a/src/core/gpu_hw_shadergen.cpp b/src/core/gpu_hw_shadergen.cpp index 65d989a7c..a01945ba5 100644 --- a/src/core/gpu_hw_shadergen.cpp +++ b/src/core/gpu_hw_shadergen.cpp @@ -1031,16 +1031,19 @@ float3 ApplyDebanding(float2 frag_coord) return ss.str(); } -std::string GPU_HW_ShaderGen::GenerateVRAMExtractFragmentShader(bool depth_24bit) +std::string GPU_HW_ShaderGen::GenerateVRAMExtractFragmentShader(bool color_24bit, bool depth_buffer) { std::stringstream ss; WriteHeader(ss); - DefineMacro(ss, "DEPTH_24BIT", depth_24bit); + DefineMacro(ss, "COLOR_24BIT", color_24bit); + DefineMacro(ss, "DEPTH_BUFFER", depth_buffer); DefineMacro(ss, "MULTISAMPLED", UsingMSAA()); WriteCommonFunctions(ss); DeclareUniformBuffer(ss, {"uint2 u_vram_offset", "uint u_skip_x", "uint u_line_skip"}, true); DeclareTexture(ss, "samp0", 0, UsingMSAA()); + if (depth_buffer) + DeclareTexture(ss, "samp1", 1, UsingMSAA()); ss << R"( float4 LoadVRAM(int2 coords) @@ -1056,6 +1059,22 @@ float4 LoadVRAM(int2 coords) #endif } +#if DEPTH_BUFFER +float LoadDepth(int2 coords) +{ + // Need to duplicate because different types in different languages... +#if MULTISAMPLING + float value = LOAD_TEXTURE_MS(samp1, coords, 0u).r; + FOR_UNROLL (uint sample_index = 1u; sample_index < MULTISAMPLES; sample_index++) + value += LOAD_TEXTURE_MS(samp1, coords, sample_index).r; + value /= float(MULTISAMPLES); + return value; +#else + return LOAD_TEXTURE(samp1, coords, 0).r; +#endif +} +#endif + float3 SampleVRAM24(uint2 icoords) { // load adjacent 16-bit texels @@ -1075,15 +1094,20 @@ float3 SampleVRAM24(uint2 icoords) } )"; - DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1); + DeclareFragmentEntryPoint(ss, 0, 1, {}, true, depth_buffer ? 2 : 1); ss << R"( { uint2 icoords = uint2(uint(v_pos.x) + u_skip_x, uint(v_pos.y) << u_line_skip); + int2 wrapped_coords = int2((icoords + u_vram_offset) % VRAM_SIZE); - #if DEPTH_24BIT + #if COLOR_24BIT o_col0 = float4(SampleVRAM24(icoords), 1.0); #else - o_col0 = float4(LoadVRAM(int2((icoords + u_vram_offset) % VRAM_SIZE)).rgb, 1.0); + o_col0 = float4(LoadVRAM(wrapped_coords).rgb, 1.0); + #endif + + #if DEPTH_BUFFER + o_col1 = float4(LoadDepth(wrapped_coords), 0.0, 0.0, 0.0); #endif } )"; diff --git a/src/core/gpu_hw_shadergen.h b/src/core/gpu_hw_shadergen.h index e5fdf81d3..f87bf2bcf 100644 --- a/src/core/gpu_hw_shadergen.h +++ b/src/core/gpu_hw_shadergen.h @@ -25,7 +25,7 @@ public: std::string GenerateVRAMCopyFragmentShader(); std::string GenerateVRAMFillFragmentShader(bool wrapped, bool interlaced); std::string GenerateVRAMUpdateDepthFragmentShader(); - std::string GenerateVRAMExtractFragmentShader(bool depth_24bit); + std::string GenerateVRAMExtractFragmentShader(bool color_24bit, bool depth_buffer); std::string GenerateAdaptiveDownsampleVertexShader(); std::string GenerateAdaptiveDownsampleMipFragmentShader(bool first_pass); diff --git a/src/core/gpu_sw.cpp b/src/core/gpu_sw.cpp index eefba1a15..85e94a5a8 100644 --- a/src/core/gpu_sw.cpp +++ b/src/core/gpu_sw.cpp @@ -486,7 +486,7 @@ void GPU_SW::UpdateDisplay() const u32 line_skip = m_GPUSTAT.vertical_resolution; if (CopyOut(vram_offset_x, vram_offset_y, skip_x, read_width, read_height, line_skip, is_24bit)) { - SetDisplayTexture(m_upload_texture.get(), 0, 0, read_width, read_height); + SetDisplayTexture(m_upload_texture.get(), nullptr, 0, 0, read_width, read_height); if (is_24bit && g_settings.gpu_24bit_chroma_smoothing) { if (ApplyChromaSmoothing()) @@ -502,7 +502,7 @@ void GPU_SW::UpdateDisplay() { if (CopyOut(vram_offset_x, vram_offset_y, skip_x, read_width, read_height, 0, is_24bit)) { - SetDisplayTexture(m_upload_texture.get(), 0, 0, read_width, read_height); + SetDisplayTexture(m_upload_texture.get(), nullptr, 0, 0, read_width, read_height); if (is_24bit && g_settings.gpu_24bit_chroma_smoothing) ApplyChromaSmoothing(); } @@ -511,7 +511,7 @@ void GPU_SW::UpdateDisplay() else { if (CopyOut(0, 0, 0, VRAM_WIDTH, VRAM_HEIGHT, 0, false)) - SetDisplayTexture(m_upload_texture.get(), 0, 0, VRAM_WIDTH, VRAM_HEIGHT); + SetDisplayTexture(m_upload_texture.get(), nullptr, 0, 0, VRAM_WIDTH, VRAM_HEIGHT); } } diff --git a/src/util/postprocessing.cpp b/src/util/postprocessing.cpp index 2437a28d7..af2b1ea81 100644 --- a/src/util/postprocessing.cpp +++ b/src/util/postprocessing.cpp @@ -396,6 +396,7 @@ void PostProcessing::Chain::LoadStages() SettingsInterface& si = GetLoadSettingsInterface(m_section); m_enabled = si.GetBoolValue(m_section, "Enabled", false); + m_wants_depth_buffer = false; const u32 stage_count = Config::GetStageCount(si, m_section); if (stage_count == 0) @@ -441,6 +442,13 @@ void PostProcessing::Chain::LoadStages() CheckTargets(g_gpu_device->GetWindowFormat(), g_gpu_device->GetWindowWidth(), g_gpu_device->GetWindowHeight(), &progress); } + + // must be down here, because we need to compile first, triggered by CheckTargets() + for (std::unique_ptr& shader : m_stages) + m_wants_depth_buffer |= shader->WantsDepthBuffer(); + m_needs_depth_buffer = m_enabled && m_wants_depth_buffer; + if (m_wants_depth_buffer) + DEV_LOG("Depth buffer is needed."); } void PostProcessing::Chain::ClearStages() @@ -469,6 +477,7 @@ void PostProcessing::Chain::UpdateSettings(std::unique_lock& setting progress.SetProgressRange(stage_count); const GPUTexture::Format prev_format = m_target_format; + m_wants_depth_buffer = false; for (u32 i = 0; i < stage_count; i++) { @@ -516,6 +525,13 @@ void PostProcessing::Chain::UpdateSettings(std::unique_lock& setting s_timer.Reset(); DEV_LOG("Loaded {} post-processing stages.", stage_count); } + + // must be down here, because we need to compile first, triggered by CheckTargets() + for (std::unique_ptr& shader : m_stages) + m_wants_depth_buffer |= shader->WantsDepthBuffer(); + m_needs_depth_buffer = m_enabled && m_wants_depth_buffer; + if (m_wants_depth_buffer) + DEV_LOG("Depth buffer is needed."); } void PostProcessing::Chain::Toggle() @@ -534,12 +550,13 @@ void PostProcessing::Chain::Toggle() TRANSLATE_STR("OSDMessage", "Post-processing is now disabled."), Host::OSD_QUICK_DURATION); m_enabled = new_enabled; + m_needs_depth_buffer = new_enabled && m_wants_depth_buffer; if (m_enabled) s_timer.Reset(); } bool PostProcessing::Chain::CheckTargets(GPUTexture::Format target_format, u32 target_width, u32 target_height, - ProgressCallback* progress) + ProgressCallback* progress /* = nullptr */) { if (m_target_format == target_format && m_target_width == target_width && m_target_height == target_height) return true; @@ -562,6 +579,8 @@ bool PostProcessing::Chain::CheckTargets(GPUTexture::Format target_format, u32 t progress->SetProgressRange(static_cast(m_stages.size())); progress->SetProgressValue(0); + m_wants_depth_buffer = false; + for (size_t i = 0; i < m_stages.size(); i++) { Shader* const shader = m_stages[i].get(); @@ -580,11 +599,13 @@ bool PostProcessing::Chain::CheckTargets(GPUTexture::Format target_format, u32 t } progress->SetProgressValue(static_cast(i + 1)); + m_wants_depth_buffer |= shader->WantsDepthBuffer(); } m_target_format = target_format; m_target_width = target_width; m_target_height = target_height; + m_needs_depth_buffer = m_enabled && m_wants_depth_buffer; return true; } @@ -598,21 +619,24 @@ void PostProcessing::Chain::DestroyTextures() g_gpu_device->RecycleTexture(std::move(m_input_texture)); } -bool PostProcessing::Chain::Apply(GPUTexture* input_color, GPUTexture* final_target, s32 final_left, s32 final_top, - s32 final_width, s32 final_height, s32 orig_width, s32 orig_height, s32 native_width, - s32 native_height) +bool PostProcessing::Chain::Apply(GPUTexture* input_color, GPUTexture* input_depth, GPUTexture* final_target, + s32 final_left, s32 final_top, s32 final_width, s32 final_height, s32 orig_width, + s32 orig_height, s32 native_width, s32 native_height) { GL_SCOPE_FMT("{} Apply", m_section); GPUTexture* output = m_output_texture.get(); input_color->MakeReadyForSampling(); + if (input_depth) + input_depth->MakeReadyForSampling(); for (const std::unique_ptr& stage : m_stages) { const bool is_final = (stage.get() == m_stages.back().get()); - if (!stage->Apply(input_color, is_final ? final_target : output, final_left, final_top, final_width, final_height, - orig_width, orig_height, native_width, native_height, m_target_width, m_target_height)) + if (!stage->Apply(input_color, input_depth, is_final ? final_target : output, final_left, final_top, final_width, + final_height, orig_width, orig_height, native_width, native_height, m_target_width, + m_target_height)) { return false; } diff --git a/src/util/postprocessing.h b/src/util/postprocessing.h index 335f4c012..af4f0bb9d 100644 --- a/src/util/postprocessing.h +++ b/src/util/postprocessing.h @@ -115,6 +115,7 @@ public: ~Chain(); ALWAYS_INLINE bool HasStages() const { return m_stages.empty(); } + ALWAYS_INLINE bool NeedsDepthBuffer() const { return m_needs_depth_buffer; } ALWAYS_INLINE GPUTexture* GetInputTexture() const { return m_input_texture.get(); } ALWAYS_INLINE GPUTexture* GetOutputTexture() const { return m_output_texture.get(); } @@ -133,8 +134,8 @@ public: bool CheckTargets(GPUTexture::Format target_format, u32 target_width, u32 target_height, ProgressCallback* progress = nullptr); - bool Apply(GPUTexture* input_color, GPUTexture* final_target, s32 final_left, s32 final_top, s32 final_width, - s32 final_height, s32 orig_width, s32 orig_height, s32 native_width, s32 native_height); + bool Apply(GPUTexture* input_color, GPUTexture* input_depth, GPUTexture* final_target, s32 final_left, s32 final_top, + s32 final_width, s32 final_height, s32 orig_width, s32 orig_height, s32 native_width, s32 native_height); private: void ClearStagesWithError(const Error& error); @@ -145,6 +146,8 @@ private: u32 m_target_width = 0; u32 m_target_height = 0; bool m_enabled = false; + bool m_wants_depth_buffer = false; + bool m_needs_depth_buffer = false; std::vector> m_stages; std::unique_ptr m_input_texture; diff --git a/src/util/postprocessing_shader.h b/src/util/postprocessing_shader.h index 5dabda31e..3930d4629 100644 --- a/src/util/postprocessing_shader.h +++ b/src/util/postprocessing_shader.h @@ -37,6 +37,7 @@ public: ALWAYS_INLINE bool HasOptions() const { return !m_options.empty(); } virtual bool IsValid() const = 0; + virtual bool WantsDepthBuffer() const = 0; std::vector TakeOptions(); void LoadOptions(const SettingsInterface& si, const char* section); @@ -48,9 +49,9 @@ public: virtual bool CompilePipeline(GPUTexture::Format format, u32 width, u32 height, ProgressCallback* progress) = 0; - virtual bool Apply(GPUTexture* input, GPUTexture* final_target, s32 final_left, s32 final_top, s32 final_width, - s32 final_height, s32 orig_width, s32 orig_height, s32 native_width, s32 native_height, - u32 target_width, u32 target_height) = 0; + virtual bool Apply(GPUTexture* input_color, GPUTexture* input_depth, GPUTexture* final_target, s32 final_left, + s32 final_top, s32 final_width, s32 final_height, s32 orig_width, s32 orig_height, + s32 native_width, s32 native_height, u32 target_width, u32 target_height) = 0; protected: static void ParseKeyValue(std::string_view line, std::string_view* key, std::string_view* value); diff --git a/src/util/postprocessing_shader_fx.cpp b/src/util/postprocessing_shader_fx.cpp index 961497764..28def57d8 100644 --- a/src/util/postprocessing_shader_fx.cpp +++ b/src/util/postprocessing_shader_fx.cpp @@ -1,5 +1,4 @@ // SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin -// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #include "postprocessing_shader_fx.h" @@ -12,6 +11,7 @@ #include "core/settings.h" #include "common/assert.h" +#include "common/bitutils.h" #include "common/error.h" #include "common/file_system.h" #include "common/log.h" @@ -368,6 +368,11 @@ bool PostProcessing::ReShadeFXShader::IsValid() const return m_valid; } +bool PostProcessing::ReShadeFXShader::WantsDepthBuffer() const +{ + return m_wants_depth_buffer; +} + bool PostProcessing::ReShadeFXShader::CreateModule(s32 buffer_width, s32 buffer_height, reshadefx::module* mod, std::string code, Error* error) { @@ -396,6 +401,10 @@ bool PostProcessing::ReShadeFXShader::CreateModule(s32 buffer_width, s32 buffer_ pp.add_macro_definition("BUFFER_RCP_WIDTH", std::to_string(1.0f / static_cast(buffer_width))); pp.add_macro_definition("BUFFER_RCP_HEIGHT", std::to_string(1.0f / static_cast(buffer_height))); pp.add_macro_definition("BUFFER_COLOR_BIT_DEPTH", "32"); + pp.add_macro_definition("RESHADE_DEPTH_INPUT_IS_UPSIDE_DOWN", "0"); + pp.add_macro_definition("RESHADE_DEPTH_INPUT_IS_LOGARITHMIC", "0"); + pp.add_macro_definition("RESHADE_DEPTH_LINEARIZATION_FAR_PLANE", "1000.0"); + pp.add_macro_definition("RESHADE_DEPTH_INPUT_IS_REVERSED", "0"); switch (GetRenderAPI()) { @@ -435,7 +444,7 @@ bool PostProcessing::ReShadeFXShader::CreateModule(s32 buffer_width, s32 buffer_ cg->write_result(*mod); - // FileSystem::WriteBinaryFile("D:\\out.txt", mod->code.data(), mod->code.size()); + FileSystem::WriteBinaryFile("D:\\out.txt", mod->code.data(), mod->code.size()); return true; } @@ -837,11 +846,16 @@ bool PostProcessing::ReShadeFXShader::GetSourceOption(const reshadefx::uniform_i *si = (ui.type.base == reshadefx::type::t_float) ? SourceOptionType::RandomF : SourceOptionType::Random; return true; } - else if (source == "overlay_active" || source == "has_depth") + else if (source == "overlay_active") { *si = SourceOptionType::Zero; return true; } + else if (source == "has_depth") + { + *si = SourceOptionType::HasDepth; + return true; + } else if (source == "bufferwidth") { *si = (ui.type.base == reshadefx::type::t_float) ? SourceOptionType::BufferWidthF : SourceOptionType::BufferWidth; @@ -1185,8 +1199,8 @@ bool PostProcessing::ReShadeFXShader::CreatePasses(GPUTexture::Format backbuffer } else if (ti.semantic == "DEPTH") { - WARNING_LOG("Shader '{}' uses input depth as '{}' which is not supported.", m_name, si.texture_name); sampler.texture_id = INPUT_DEPTH_TEXTURE; + m_wants_depth_buffer = true; break; } else if (!ti.semantic.empty()) @@ -1252,18 +1266,18 @@ const char* PostProcessing::ReShadeFXShader::GetTextureNameForID(TextureID id) c return m_textures[static_cast(id)].reshade_name.c_str(); } -GPUTexture* PostProcessing::ReShadeFXShader::GetTextureByID(TextureID id, GPUTexture* input, - GPUTexture* final_target) const +GPUTexture* PostProcessing::ReShadeFXShader::GetTextureByID(TextureID id, GPUTexture* input_color, + GPUTexture* input_depth, GPUTexture* final_target) const { if (id < 0) { if (id == INPUT_COLOR_TEXTURE) { - return input; + return input_color; } else if (id == INPUT_DEPTH_TEXTURE) { - return PostProcessing::GetDummyTexture(); + return input_depth ? input_depth : GetDummyTexture(); } else if (id == OUTPUT_COLOR_TEXTURE) { @@ -1291,6 +1305,7 @@ bool PostProcessing::ReShadeFXShader::CompilePipeline(GPUTexture::Format format, m_valid = false; m_textures.clear(); m_passes.clear(); + m_wants_depth_buffer = false; std::string fxcode; if (!PreprocessorReadFileCallback(m_filename, fxcode)) @@ -1474,9 +1489,10 @@ bool PostProcessing::ReShadeFXShader::ResizeOutput(GPUTexture::Format format, u3 return true; } -bool PostProcessing::ReShadeFXShader::Apply(GPUTexture* input, GPUTexture* final_target, s32 final_left, s32 final_top, - s32 final_width, s32 final_height, s32 orig_width, s32 orig_height, - s32 native_width, s32 native_height, u32 target_width, u32 target_height) +bool PostProcessing::ReShadeFXShader::Apply(GPUTexture* input_color, GPUTexture* input_depth, GPUTexture* final_target, + s32 final_left, s32 final_top, s32 final_width, s32 final_height, + s32 orig_width, s32 orig_height, s32 native_width, s32 native_height, + u32 target_width, u32 target_height) { GL_PUSH_FMT("PostProcessingShaderFX {}", m_name); @@ -1507,6 +1523,13 @@ bool PostProcessing::ReShadeFXShader::Apply(GPUTexture* input, GPUTexture* final } break; + case SourceOptionType::HasDepth: + { + const u32 value = BoolToUInt32(input_depth != nullptr); + std::memcpy(dst, &value, sizeof(value)); + } + break; + case SourceOptionType::Timer: { const float value = static_cast(PostProcessing::GetTimer().GetTimeMilliseconds()); @@ -1750,7 +1773,7 @@ bool PostProcessing::ReShadeFXShader::Apply(GPUTexture* input, GPUTexture* final // Sucks doing this twice, but we need to set the RT first (for DX11), and transition layouts (for VK). for (const Sampler& sampler : pass.samplers) { - GPUTexture* const tex = GetTextureByID(sampler.texture_id, input, final_target); + GPUTexture* const tex = GetTextureByID(sampler.texture_id, input_color, input_depth, final_target); if (tex) tex->MakeReadyForSampling(); } @@ -1771,7 +1794,7 @@ bool PostProcessing::ReShadeFXShader::Apply(GPUTexture* input, GPUTexture* final { GL_INS_FMT("Render Target {}: ID {} [{}]", i, pass.render_targets[i], GetTextureNameForID(pass.render_targets[i])); - render_targets[i] = GetTextureByID(pass.render_targets[i], input, final_target); + render_targets[i] = GetTextureByID(pass.render_targets[i], input_color, input_depth, final_target); DebugAssert(render_targets[i]); } @@ -1795,8 +1818,8 @@ bool PostProcessing::ReShadeFXShader::Apply(GPUTexture* input, GPUTexture* final GL_INS_FMT("Texture Sampler {}: ID {} [{}]", sampler.slot, sampler.texture_id, GetTextureNameForID(sampler.texture_id)); - g_gpu_device->SetTextureSampler(sampler.slot, GetTextureByID(sampler.texture_id, input, final_target), - sampler.sampler); + g_gpu_device->SetTextureSampler( + sampler.slot, GetTextureByID(sampler.texture_id, input_color, input_depth, final_target), sampler.sampler); bound_textures[sampler.slot] = true; } @@ -1811,6 +1834,10 @@ bool PostProcessing::ReShadeFXShader::Apply(GPUTexture* input, GPUTexture* final g_gpu_device->Draw(pass.num_vertices, 0); } + // Don't leave any textures bound. + for (u32 i = 0; i < GPUDevice::MAX_TEXTURE_SAMPLERS; i++) + g_gpu_device->SetTextureSampler(i, nullptr, nullptr); + GL_POP(); m_frame_timer.Reset(); return true; diff --git a/src/util/postprocessing_shader_fx.h b/src/util/postprocessing_shader_fx.h index d28fdd83d..0ff054b75 100644 --- a/src/util/postprocessing_shader_fx.h +++ b/src/util/postprocessing_shader_fx.h @@ -24,15 +24,16 @@ public: ~ReShadeFXShader(); bool IsValid() const override; + bool WantsDepthBuffer() const override; bool LoadFromFile(std::string name, std::string filename, bool only_config, Error* error); bool LoadFromString(std::string name, std::string filename, std::string code, bool only_config, Error* error); bool ResizeOutput(GPUTexture::Format format, u32 width, u32 height) override; bool CompilePipeline(GPUTexture::Format format, u32 width, u32 height, ProgressCallback* progress) override; - bool Apply(GPUTexture* input, GPUTexture* final_target, s32 final_left, s32 final_top, s32 final_width, - s32 final_height, s32 orig_width, s32 orig_height, s32 native_width, s32 native_height, u32 target_width, - u32 target_height) override; + bool Apply(GPUTexture* input_color, GPUTexture* input_depth, GPUTexture* final_target, s32 final_left, s32 final_top, + s32 final_width, s32 final_height, s32 orig_width, s32 orig_height, s32 native_width, s32 native_height, + u32 target_width, u32 target_height) override; private: using TextureID = s32; @@ -45,6 +46,7 @@ private: { None, Zero, + HasDepth, Timer, FrameTime, FrameCount, @@ -98,7 +100,8 @@ private: bool CreatePasses(GPUTexture::Format backbuffer_format, reshadefx::module& mod, Error* error); const char* GetTextureNameForID(TextureID id) const; - GPUTexture* GetTextureByID(TextureID id, GPUTexture* input, GPUTexture* final_target) const; + GPUTexture* GetTextureByID(TextureID id, GPUTexture* input_color, GPUTexture* input_depth, + GPUTexture* final_target) const; std::string m_filename; @@ -135,6 +138,7 @@ private: std::vector m_source_options; u32 m_uniforms_size = 0; bool m_valid = false; + bool m_wants_depth_buffer = false; Common::Timer m_frame_timer; u32 m_frame_count = 0; diff --git a/src/util/postprocessing_shader_glsl.cpp b/src/util/postprocessing_shader_glsl.cpp index 8b9482cab..0861d3490 100644 --- a/src/util/postprocessing_shader_glsl.cpp +++ b/src/util/postprocessing_shader_glsl.cpp @@ -62,6 +62,11 @@ bool PostProcessing::GLSLShader::IsValid() const return !m_name.empty() && !m_code.empty(); } +bool PostProcessing::GLSLShader::WantsDepthBuffer() const +{ + return false; +} + u32 PostProcessing::GLSLShader::GetUniformsSize() const { // lazy packing. todo improve. @@ -162,9 +167,10 @@ bool PostProcessing::GLSLShader::CompilePipeline(GPUTexture::Format format, u32 return true; } -bool PostProcessing::GLSLShader::Apply(GPUTexture* input, GPUTexture* final_target, s32 final_left, s32 final_top, - s32 final_width, s32 final_height, s32 orig_width, s32 orig_height, - s32 native_width, s32 native_height, u32 target_width, u32 target_height) +bool PostProcessing::GLSLShader::Apply(GPUTexture* input_color, GPUTexture* input_depth, GPUTexture* final_target, + s32 final_left, s32 final_top, s32 final_width, s32 final_height, s32 orig_width, + s32 orig_height, s32 native_width, s32 native_height, u32 target_width, + u32 target_height) { GL_SCOPE_FMT("GLSL Shader {}", m_name); @@ -181,7 +187,7 @@ bool PostProcessing::GLSLShader::Apply(GPUTexture* input, GPUTexture* final_targ } g_gpu_device->SetPipeline(m_pipeline.get()); - g_gpu_device->SetTextureSampler(0, input, m_sampler.get()); + g_gpu_device->SetTextureSampler(0, input_color, m_sampler.get()); g_gpu_device->SetViewportAndScissor(final_left, final_top, final_width, final_height); const u32 uniforms_size = GetUniformsSize(); diff --git a/src/util/postprocessing_shader_glsl.h b/src/util/postprocessing_shader_glsl.h index d4203171f..108015c7a 100644 --- a/src/util/postprocessing_shader_glsl.h +++ b/src/util/postprocessing_shader_glsl.h @@ -17,15 +17,16 @@ public: ALWAYS_INLINE const std::string& GetCode() const { return m_code; } bool IsValid() const override; + bool WantsDepthBuffer() const override; bool LoadFromFile(std::string name, const char* filename, Error* error); bool LoadFromString(std::string name, std::string code, Error* error); bool ResizeOutput(GPUTexture::Format format, u32 width, u32 height) override; bool CompilePipeline(GPUTexture::Format format, u32 width, u32 height, ProgressCallback* progress) override; - bool Apply(GPUTexture* input, GPUTexture* final_target, s32 final_left, s32 final_top, s32 final_width, - s32 final_height, s32 orig_width, s32 orig_height, s32 native_width, s32 native_height, u32 target_width, - u32 target_height) override; + bool Apply(GPUTexture* input_color, GPUTexture* input_depth, GPUTexture* final_target, s32 final_left, s32 final_top, + s32 final_width, s32 final_height, s32 orig_width, s32 orig_height, s32 native_width, s32 native_height, + u32 target_width, u32 target_height) override; private: struct CommonUniforms