diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp index 77be92b64..7a9ea6b04 100644 --- a/src/core/gpu.cpp +++ b/src/core/gpu.cpp @@ -1569,7 +1569,8 @@ void GPU::SetTextureWindow(u32 value) bool GPU::CompileDisplayPipeline() { - GPUShaderGen shadergen(g_gpu_device->GetRenderAPI(), g_gpu_device->GetFeatures().dual_source_blend); + GPUShaderGen shadergen(g_gpu_device->GetRenderAPI(), g_gpu_device->GetFeatures().dual_source_blend, + g_gpu_device->GetFeatures().framebuffer_fetch); GPUPipeline::GraphicsConfig plconfig; plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index b65cb3cf3..42107cf5f 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -105,9 +105,9 @@ public: } ~ShaderCompileProgressTracker() = default; - void Increment() + void Increment(u32 progress = 1) { - m_progress++; + m_progress += progress; const u64 tv = Common::Timer::GetCurrentValue(); if ((tv - m_start_time) >= m_min_time && (tv - m_last_update_time) >= m_update_interval) @@ -209,6 +209,7 @@ bool GPU_HW::Initialize() m_resolution_scale = CalculateResolutionScale(); m_multisamples = std::min(g_settings.gpu_multisamples, g_gpu_device->GetMaxMultisamples()); m_supports_dual_source_blend = features.dual_source_blend; + m_supports_framebuffer_fetch = features.framebuffer_fetch; m_per_sample_shading = g_settings.gpu_per_sample_shading && features.per_sample_shading; m_true_color = g_settings.gpu_true_color; m_scaled_dithering = g_settings.gpu_scaled_dithering; @@ -457,7 +458,7 @@ void GPU_HW::CheckSettings() TRANSLATE_STR("GPU_HW", "SSAA is not supported, using MSAA instead."), Host::OSD_ERROR_DURATION); } - if (!features.dual_source_blend && IsBlendedTextureFiltering(m_texture_filtering)) + if (!features.dual_source_blend && !features.framebuffer_fetch && IsBlendedTextureFiltering(m_texture_filtering)) { Host::AddIconOSDMessage( "TextureFilterUnsupported", ICON_FA_EXCLAMATION_TRIANGLE, @@ -738,16 +739,17 @@ bool GPU_HW::CompilePipelines() const GPUDevice::Features features = g_gpu_device->GetFeatures(); GPU_HW_ShaderGen shadergen(g_gpu_device->GetRenderAPI(), m_resolution_scale, m_multisamples, m_per_sample_shading, m_true_color, m_scaled_dithering, m_texture_filtering, m_using_uv_limits, - m_pgxp_depth_buffer, m_disable_color_perspective, m_supports_dual_source_blend); + m_pgxp_depth_buffer, m_disable_color_perspective, m_supports_dual_source_blend, + m_supports_framebuffer_fetch); - ShaderCompileProgressTracker progress("Compiling Pipelines", 2 + (4 * 9 * 2 * 2) + (3 * 4 * 5 * 9 * 2 * 2) + 1 + 2 + - (2 * 2) + 2 + 1 + 1 + (2 * 3) + 1); + ShaderCompileProgressTracker progress("Compiling Pipelines", 2 + (4 * 5 * 9 * 2 * 2) + (3 * 4 * 5 * 9 * 2 * 2) + 1 + + 2 + (2 * 2) + 2 + 1 + 1 + (2 * 3) + 1); // vertex shaders - [textured] // fragment shaders - [render_mode][texture_mode][dithering][interlacing] static constexpr auto destroy_shader = [](std::unique_ptr& s) { s.reset(); }; DimensionalArray, 2> batch_vertex_shaders{}; - DimensionalArray, 2, 2, 9, 4> batch_fragment_shaders{}; + DimensionalArray, 2, 2, 9, 5, 4> batch_fragment_shaders{}; ScopedGuard batch_shader_guard([&batch_vertex_shaders, &batch_fragment_shaders]() { batch_vertex_shaders.enumerate(destroy_shader); batch_fragment_shaders.enumerate(destroy_shader); @@ -764,23 +766,47 @@ bool GPU_HW::CompilePipelines() for (u8 render_mode = 0; render_mode < 4; render_mode++) { - for (u8 texture_mode = 0; texture_mode < 9; texture_mode++) + for (u8 transparency_mode = 0; transparency_mode < 5; transparency_mode++) { - for (u8 dithering = 0; dithering < 2; dithering++) + if (m_supports_framebuffer_fetch) { - for (u8 interlacing = 0; interlacing < 2; interlacing++) + // Don't need multipass shaders. + if (render_mode != static_cast(BatchRenderMode::TransparencyDisabled) && + render_mode != static_cast(BatchRenderMode::TransparentAndOpaque)) { - const std::string fs = shadergen.GenerateBatchFragmentShader( - static_cast(render_mode), static_cast(texture_mode), - ConvertToBoolUnchecked(dithering), ConvertToBoolUnchecked(interlacing)); + progress.Increment(2 * 2 * 9); + continue; + } + } + else + { + // Can't generate shader blending. + if (transparency_mode != static_cast(GPUTransparencyMode::Disabled)) + { + progress.Increment(2 * 2 * 9); + continue; + } + } - if (!(batch_fragment_shaders[render_mode][texture_mode][dithering][interlacing] = - g_gpu_device->CreateShader(GPUShaderStage::Fragment, fs))) + for (u8 texture_mode = 0; texture_mode < 9; texture_mode++) + { + for (u8 dithering = 0; dithering < 2; dithering++) + { + for (u8 interlacing = 0; interlacing < 2; interlacing++) { - return false; - } + const std::string fs = shadergen.GenerateBatchFragmentShader( + static_cast(render_mode), static_cast(transparency_mode), + static_cast(texture_mode), ConvertToBoolUnchecked(dithering), + ConvertToBoolUnchecked(interlacing)); - progress.Increment(); + if (!(batch_fragment_shaders[render_mode][transparency_mode][texture_mode][dithering][interlacing] = + g_gpu_device->CreateShader(GPUShaderStage::Fragment, fs))) + { + return false; + } + + progress.Increment(); + } } } } @@ -818,6 +844,17 @@ bool GPU_HW::CompilePipelines() { for (u8 render_mode = 0; render_mode < 4; render_mode++) { + if (m_supports_framebuffer_fetch) + { + // Don't need multipass shaders. + if (render_mode != static_cast(BatchRenderMode::TransparencyDisabled) && + render_mode != static_cast(BatchRenderMode::TransparentAndOpaque)) + { + progress.Increment(2 * 2 * 9 * 5); + continue; + } + } + for (u8 transparency_mode = 0; transparency_mode < 5; transparency_mode++) { for (u8 texture_mode = 0; texture_mode < 9; texture_mode++) @@ -830,6 +867,8 @@ bool GPU_HW::CompilePipelines() GPUPipeline::DepthFunc::Always, GPUPipeline::DepthFunc::GreaterEqual, GPUPipeline::DepthFunc::LessEqual}; const bool textured = (static_cast(texture_mode) != GPUTextureMode::Disabled); + const bool use_shader_blending = + (textured && NeedsShaderBlending(static_cast(transparency_mode))); plconfig.input_layout.vertex_attributes = textured ? @@ -841,16 +880,21 @@ bool GPU_HW::CompilePipelines() plconfig.vertex_shader = batch_vertex_shaders[BoolToUInt8(textured)].get(); plconfig.fragment_shader = - batch_fragment_shaders[render_mode][texture_mode][dithering][interlacing].get(); + batch_fragment_shaders[render_mode] + [use_shader_blending ? transparency_mode : + static_cast(GPUTransparencyMode::Disabled)] + [texture_mode][dithering][interlacing] + .get(); plconfig.depth.depth_test = depth_test_values[depth_test]; plconfig.depth.depth_write = !m_pgxp_depth_buffer || depth_test != 0; plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); - if ((static_cast(transparency_mode) != GPUTransparencyMode::Disabled && - (static_cast(render_mode) != BatchRenderMode::TransparencyDisabled && - static_cast(render_mode) != BatchRenderMode::OnlyOpaque)) || - IsBlendedTextureFiltering(m_texture_filtering)) + if (!use_shader_blending && + ((static_cast(transparency_mode) != GPUTransparencyMode::Disabled && + (static_cast(render_mode) != BatchRenderMode::TransparencyDisabled && + static_cast(render_mode) != BatchRenderMode::OnlyOpaque)) || + (textured && IsBlendedTextureFiltering(m_texture_filtering)))) { plconfig.blend.enable = true; plconfig.blend.src_alpha_blend = GPUPipeline::BlendFunc::One; @@ -2051,17 +2095,24 @@ GPU_HW::InterlacedRenderMode GPU_HW::GetInterlacedRenderMode() const } } -ALWAYS_INLINE bool GPU_HW::NeedsTwoPassRendering() const +ALWAYS_INLINE_RELEASE bool GPU_HW::NeedsTwoPassRendering() const { // We need two-pass rendering when using BG-FG blending and texturing, as the transparency can be enabled // on a per-pixel basis, and the opaque pixels shouldn't be blended at all. - // TODO: see if there's a better way we can do this. definitely can with fbfetch. - return (m_batch.texture_mode != GPUTextureMode::Disabled && + return (m_batch.texture_mode != GPUTextureMode::Disabled && !m_supports_framebuffer_fetch && (m_batch.transparency_mode == GPUTransparencyMode::BackgroundMinusForeground || (!m_supports_dual_source_blend && m_batch.transparency_mode != GPUTransparencyMode::Disabled))); } +ALWAYS_INLINE_RELEASE bool GPU_HW::NeedsShaderBlending(GPUTransparencyMode transparency) const +{ + return (m_supports_framebuffer_fetch && + (transparency == GPUTransparencyMode::BackgroundMinusForeground || + (!m_supports_dual_source_blend && + (transparency != GPUTransparencyMode::Disabled || IsBlendedTextureFiltering(m_texture_filtering))))); +} + ALWAYS_INLINE u32 GPU_HW::GetBatchVertexSpace() const { return static_cast(m_batch_end_vertex_ptr - m_batch_current_vertex_ptr); @@ -2484,7 +2535,8 @@ void GPU_HW::DispatchRenderCommand() EnsureVertexBufferSpaceForCurrentCommand(); // transparency mode change - if (m_batch.transparency_mode != transparency_mode && transparency_mode != GPUTransparencyMode::Disabled) + if (transparency_mode != GPUTransparencyMode::Disabled && + (texture_mode == GPUTextureMode::Disabled || !NeedsShaderBlending(transparency_mode))) { static constexpr float transparent_alpha[4][2] = {{0.5f, 0.5f}, {1.0f, 1.0f}, {1.0f, 1.0f}, {0.25f, 1.0f}}; diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index 414ee75db..dac266249 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -168,6 +168,9 @@ private: /// Returns if the draw needs to be broken into opaque/transparent passes. bool NeedsTwoPassRendering() const; + /// Returns true if the draw is going to use shader blending/framebuffer fetch. + bool NeedsShaderBlending(GPUTransparencyMode transparency) const; + void FillBackendCommandParameters(GPUBackendCommand* cmd) const; void FillDrawCommand(GPUBackendDrawCommand* cmd, GPURenderCommand rc) const; void UpdateSoftwareRenderer(bool copy_vram_from_hw); @@ -235,10 +238,11 @@ private: union { BitField m_supports_dual_source_blend; - BitField m_per_sample_shading; - BitField m_scaled_dithering; - BitField m_chroma_smoothing; - BitField m_disable_color_perspective; + BitField m_supports_framebuffer_fetch; + BitField m_per_sample_shading; + BitField m_scaled_dithering; + BitField m_chroma_smoothing; + BitField m_disable_color_perspective; u8 bits = 0; }; diff --git a/src/core/gpu_hw_shadergen.cpp b/src/core/gpu_hw_shadergen.cpp index cb94eb431..6c8800567 100644 --- a/src/core/gpu_hw_shadergen.cpp +++ b/src/core/gpu_hw_shadergen.cpp @@ -8,8 +8,9 @@ GPU_HW_ShaderGen::GPU_HW_ShaderGen(RenderAPI render_api, u32 resolution_scale, u32 multisamples, bool per_sample_shading, bool true_color, bool scaled_dithering, GPUTextureFilter texture_filtering, bool uv_limits, bool pgxp_depth, - bool disable_color_perspective, bool supports_dual_source_blend) - : ShaderGen(render_api, supports_dual_source_blend), m_resolution_scale(resolution_scale), + bool disable_color_perspective, bool supports_dual_source_blend, + bool supports_framebuffer_fetch) + : ShaderGen(render_api, supports_dual_source_blend, supports_framebuffer_fetch), m_resolution_scale(resolution_scale), m_multisamples(multisamples), m_per_sample_shading(per_sample_shading), m_true_color(true_color), m_scaled_dithering(scaled_dithering), m_texture_filter(texture_filtering), m_uv_limits(uv_limits), m_pgxp_depth(pgxp_depth), m_disable_color_perspective(disable_color_perspective) @@ -629,22 +630,29 @@ void FilteredSampleFromVRAM(uint4 texpage, float2 coords, float4 uv_limits, } } -std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMode transparency, - GPUTextureMode texture_mode, bool dithering, bool interlacing) +std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMode render_mode, + GPUTransparencyMode transparency, GPUTextureMode texture_mode, + bool dithering, bool interlacing) { + // Shouldn't be using shader blending without fbfetch. + DebugAssert(m_supports_framebuffer_fetch || transparency == GPUTransparencyMode::Disabled); + const GPUTextureMode actual_texture_mode = texture_mode & ~GPUTextureMode::RawTextureBit; const bool raw_texture = (texture_mode & GPUTextureMode::RawTextureBit) == GPUTextureMode::RawTextureBit; const bool textured = (texture_mode != GPUTextureMode::Disabled); - const bool use_dual_source = - m_supports_dual_source_blend && ((transparency != GPU_HW::BatchRenderMode::TransparencyDisabled && - transparency != GPU_HW::BatchRenderMode::OnlyOpaque) || - m_texture_filter != GPUTextureFilter::Nearest); + const bool use_framebuffer_fetch = (m_supports_framebuffer_fetch && transparency != GPUTransparencyMode::Disabled); + const bool use_dual_source = !use_framebuffer_fetch && m_supports_dual_source_blend && + ((render_mode != GPU_HW::BatchRenderMode::TransparencyDisabled && + render_mode != GPU_HW::BatchRenderMode::OnlyOpaque) || + m_texture_filter != GPUTextureFilter::Nearest); std::stringstream ss; WriteHeader(ss); - DefineMacro(ss, "TRANSPARENCY", transparency != GPU_HW::BatchRenderMode::TransparencyDisabled); - DefineMacro(ss, "TRANSPARENCY_ONLY_OPAQUE", transparency == GPU_HW::BatchRenderMode::OnlyOpaque); - DefineMacro(ss, "TRANSPARENCY_ONLY_TRANSPARENT", transparency == GPU_HW::BatchRenderMode::OnlyTransparent); + DefineMacro(ss, "TRANSPARENCY", render_mode != GPU_HW::BatchRenderMode::TransparencyDisabled); + DefineMacro(ss, "TRANSPARENCY_ONLY_OPAQUE", render_mode == GPU_HW::BatchRenderMode::OnlyOpaque); + DefineMacro(ss, "TRANSPARENCY_ONLY_TRANSPARENT", render_mode == GPU_HW::BatchRenderMode::OnlyTransparent); + DefineMacro(ss, "TRANSPARENCY_MODE", static_cast(transparency)); + DefineMacro(ss, "SHADER_BLENDING", use_framebuffer_fetch); DefineMacro(ss, "TEXTURED", textured); DefineMacro(ss, "PALETTE", actual_texture_mode == GPUTextureMode::Palette4Bit || actual_texture_mode == GPUTextureMode::Palette8Bit); @@ -771,19 +779,19 @@ float4 SampleFromVRAM(uint4 texpage, float2 coords) DeclareFragmentEntryPoint(ss, 1, 1, {{"nointerpolation", "uint4 v_texpage"}, {"nointerpolation", "float4 v_uv_limits"}}, true, use_dual_source ? 2 : 1, !m_pgxp_depth, UsingMSAA(), UsingPerSampleShading(), - false, m_disable_color_perspective); + false, m_disable_color_perspective, use_framebuffer_fetch); } else { DeclareFragmentEntryPoint(ss, 1, 1, {{"nointerpolation", "uint4 v_texpage"}}, true, use_dual_source ? 2 : 1, - !m_pgxp_depth, UsingMSAA(), UsingPerSampleShading(), false, - m_disable_color_perspective); + !m_pgxp_depth, UsingMSAA(), UsingPerSampleShading(), false, m_disable_color_perspective, + use_framebuffer_fetch); } } else { DeclareFragmentEntryPoint(ss, 1, 0, {}, true, use_dual_source ? 2 : 1, !m_pgxp_depth, UsingMSAA(), - UsingPerSampleShading(), false, m_disable_color_perspective); + UsingPerSampleShading(), false, m_disable_color_perspective, use_framebuffer_fetch); } ss << R"( @@ -883,7 +891,7 @@ float4 SampleFromVRAM(uint4 texpage, float2 coords) // Premultiply alpha so we don't need to use a colour output for it. float premultiply_alpha = ialpha; - #if TRANSPARENCY + #if TRANSPARENCY && !SHADER_BLENDING premultiply_alpha = ialpha * (semitransparent ? u_src_alpha_factor : 1.0); #endif @@ -897,7 +905,34 @@ float4 SampleFromVRAM(uint4 texpage, float2 coords) color = (float3(icolor) * premultiply_alpha) / float3(255.0, 255.0, 255.0); #endif - #if TRANSPARENCY && TEXTURED + #if SHADER_BLENDING + float4 bg_col = LAST_FRAG_COLOR; + float4 fg_col = float4(color, oalpha); + + #if TEXTURE_FILTERING + #if TRANSPARENCY_MODE == 0 || TRANSPARENCY_MODE == 3 + bg_col.rgb /= ialpha; + #endif + fg_col.rgb *= ialpha; + #endif + + o_col0.a = fg_col.a; + #if TRANSPARENCY_MODE == 0 // Half BG + Half FG. + o_col0.rgb = (bg_col.rgb * 0.5) + (fg_col.rgb * 0.5); + #elif TRANSPARENCY_MODE == 1 // BG + FG + o_col0.rgb = bg_col.rgb + fg_col.rgb; + #elif TRANSPARENCY_MODE == 2 // BG - FG + o_col0.rgb = bg_col.rgb - fg_col.rgb; + #elif TRANSPARENCY_MODE == 3 // BG + 1/4 FG. + o_col0.rgb = bg_col.rgb + (fg_col.rgb * 0.25); + #else + o_col0.rgb = fg_col.rgb; + #endif + #if TRANSPARENCY + // If pixel isn't marked as semitransparent, replace with previous colour. + o_col0 = semitransparent ? o_col0 : fg_col; + #endif + #elif TRANSPARENCY && TEXTURED // Apply semitransparency. If not a semitransparent texel, destination alpha is ignored. if (semitransparent) { diff --git a/src/core/gpu_hw_shadergen.h b/src/core/gpu_hw_shadergen.h index 96274dac3..cda16ce22 100644 --- a/src/core/gpu_hw_shadergen.h +++ b/src/core/gpu_hw_shadergen.h @@ -10,12 +10,13 @@ class GPU_HW_ShaderGen : public ShaderGen public: GPU_HW_ShaderGen(RenderAPI render_api, u32 resolution_scale, u32 multisamples, bool per_sample_shading, bool true_color, bool scaled_dithering, GPUTextureFilter texture_filtering, bool uv_limits, - bool pgxp_depth, bool disable_color_perspective, bool supports_dual_source_blend); + bool pgxp_depth, bool disable_color_perspective, bool supports_dual_source_blend, + bool supports_framebuffer_fetch); ~GPU_HW_ShaderGen(); std::string GenerateBatchVertexShader(bool textured); - std::string GenerateBatchFragmentShader(GPU_HW::BatchRenderMode transparency, GPUTextureMode texture_mode, - bool dithering, bool interlacing); + std::string GenerateBatchFragmentShader(GPU_HW::BatchRenderMode render_mode, GPUTransparencyMode transparency, + GPUTextureMode texture_mode, bool dithering, bool interlacing); std::string GenerateDisplayFragmentShader(bool depth_24bit, GPU_HW::InterlacedRenderMode interlace_mode, bool smooth_chroma); std::string GenerateWireframeGeometryShader(); diff --git a/src/core/gpu_shadergen.cpp b/src/core/gpu_shadergen.cpp index 138b26a38..76583063b 100644 --- a/src/core/gpu_shadergen.cpp +++ b/src/core/gpu_shadergen.cpp @@ -3,8 +3,8 @@ #include "gpu_shadergen.h" -GPUShaderGen::GPUShaderGen(RenderAPI render_api, bool supports_dual_source_blend) - : ShaderGen(render_api, supports_dual_source_blend) +GPUShaderGen::GPUShaderGen(RenderAPI render_api, bool supports_dual_source_blend, bool supports_framebuffer_fetch) + : ShaderGen(render_api, supports_dual_source_blend, supports_framebuffer_fetch) { } @@ -62,7 +62,8 @@ std::string GPUShaderGen::GenerateDisplaySharpBilinearFragmentShader() WriteDisplayUniformBuffer(ss); DeclareTexture(ss, "samp0", 0, false); - // Based on https://github.com/rsn8887/Sharp-Bilinear-Shaders/blob/master/Copy_To_RetroPie/shaders/sharp-bilinear-simple.glsl + // Based on + // https://github.com/rsn8887/Sharp-Bilinear-Shaders/blob/master/Copy_To_RetroPie/shaders/sharp-bilinear-simple.glsl DeclareFragmentEntryPoint(ss, 0, 1, {}, false, 1, false, false, false, false); ss << R"( { diff --git a/src/core/gpu_shadergen.h b/src/core/gpu_shadergen.h index 2ccd13b7a..8a4d0cac7 100644 --- a/src/core/gpu_shadergen.h +++ b/src/core/gpu_shadergen.h @@ -8,7 +8,7 @@ class GPUShaderGen : public ShaderGen { public: - GPUShaderGen(RenderAPI render_api, bool supports_dual_source_blend); + GPUShaderGen(RenderAPI render_api, bool supports_dual_source_blend, bool supports_framebuffer_fetch); ~GPUShaderGen(); std::string GenerateDisplayVertexShader(); diff --git a/src/util/d3d11_device.cpp b/src/util/d3d11_device.cpp index 147705006..948e16d2e 100644 --- a/src/util/d3d11_device.cpp +++ b/src/util/d3d11_device.cpp @@ -170,6 +170,7 @@ void D3D11Device::SetFeatures() } m_features.dual_source_blend = true; + m_features.framebuffer_fetch = false; m_features.per_sample_shading = (feature_level >= D3D_FEATURE_LEVEL_10_1); m_features.noperspective_interpolation = true; m_features.supports_texture_buffers = true; diff --git a/src/util/d3d12_device.cpp b/src/util/d3d12_device.cpp index 0f05e9759..001e11c0a 100644 --- a/src/util/d3d12_device.cpp +++ b/src/util/d3d12_device.cpp @@ -1173,6 +1173,7 @@ void D3D12Device::SetFeatures() } m_features.dual_source_blend = true; + m_features.framebuffer_fetch = false; m_features.noperspective_interpolation = true; m_features.per_sample_shading = true; m_features.supports_texture_buffers = true; diff --git a/src/util/gpu_device.cpp b/src/util/gpu_device.cpp index dd40cf36a..def074a0a 100644 --- a/src/util/gpu_device.cpp +++ b/src/util/gpu_device.cpp @@ -410,7 +410,7 @@ bool GPUDevice::CreateResources() if (!(m_linear_sampler = CreateSampler(GPUSampler::GetLinearConfig()))) return false; - ShaderGen shadergen(GetRenderAPI(), m_features.dual_source_blend); + ShaderGen shadergen(GetRenderAPI(), m_features.dual_source_blend, m_features.framebuffer_fetch); std::unique_ptr imgui_vs = CreateShader(GPUShaderStage::Vertex, shadergen.GenerateImGuiVertexShader()); std::unique_ptr imgui_fs = CreateShader(GPUShaderStage::Fragment, shadergen.GenerateImGuiFragmentShader()); diff --git a/src/util/gpu_device.h b/src/util/gpu_device.h index da8972974..a659e4c88 100644 --- a/src/util/gpu_device.h +++ b/src/util/gpu_device.h @@ -446,6 +446,7 @@ public: struct Features { bool dual_source_blend : 1; + bool framebuffer_fetch : 1; bool per_sample_shading : 1; bool noperspective_interpolation : 1; bool supports_texture_buffers : 1; diff --git a/src/util/metal_device.mm b/src/util/metal_device.mm index e1bb8424c..d95f3189e 100644 --- a/src/util/metal_device.mm +++ b/src/util/metal_device.mm @@ -212,6 +212,7 @@ void MetalDevice::SetFeatures() } m_features.dual_source_blend = true; + m_features.framebuffer_fetch = false; // TODO m_features.per_sample_shading = true; m_features.noperspective_interpolation = true; m_features.supports_texture_buffers = true; diff --git a/src/util/opengl_device.cpp b/src/util/opengl_device.cpp index a3d961462..02b6c9026 100644 --- a/src/util/opengl_device.cpp +++ b/src/util/opengl_device.cpp @@ -259,7 +259,7 @@ void OpenGLDevice::InsertDebugMessage(const char* msg) if (msg[0] != '\0') { - glDebugMessageInsert(GL_DEBUG_SOURCE_APPLICATION, GL_DEBUG_TYPE_OTHER, 0, GL_DEBUG_SEVERITY_NOTIFICATION, + glDebugMessageInsert(GL_DEBUG_SOURCE_APPLICATION, GL_DEBUG_TYPE_MARKER, 0, GL_DEBUG_SEVERITY_NOTIFICATION, static_cast(std::strlen(msg)), msg); } #endif @@ -430,6 +430,8 @@ bool OpenGLDevice::CheckFeatures(bool* buggy_pbo) (max_dual_source_draw_buffers > 0) && (GLAD_GL_VERSION_3_3 || GLAD_GL_ARB_blend_func_extended || GLAD_GL_EXT_blend_func_extended); + m_features.framebuffer_fetch = (GLAD_GL_EXT_shader_framebuffer_fetch || GLAD_GL_ARM_shader_framebuffer_fetch); + #ifdef __APPLE__ // Partial texture buffer uploads appear to be broken in macOS's OpenGL driver. m_features.supports_texture_buffers = false; diff --git a/src/util/postprocessing_shader_glsl.cpp b/src/util/postprocessing_shader_glsl.cpp index 6845fe501..3833d7085 100644 --- a/src/util/postprocessing_shader_glsl.cpp +++ b/src/util/postprocessing_shader_glsl.cpp @@ -18,7 +18,7 @@ namespace { class PostProcessingGLSLShaderGen : public ShaderGen { public: - PostProcessingGLSLShaderGen(RenderAPI render_api, bool supports_dual_source_blend); + PostProcessingGLSLShaderGen(RenderAPI render_api, bool supports_dual_source_blend, bool supports_framebuffer_fetch); ~PostProcessingGLSLShaderGen(); std::string GeneratePostProcessingVertexShader(const PostProcessing::GLSLShader& shader); @@ -117,7 +117,8 @@ bool PostProcessing::GLSLShader::CompilePipeline(GPUTexture::Format format, u32 if (m_pipeline) m_pipeline.reset(); - PostProcessingGLSLShaderGen shadergen(g_gpu_device->GetRenderAPI(), g_gpu_device->GetFeatures().dual_source_blend); + PostProcessingGLSLShaderGen shadergen(g_gpu_device->GetRenderAPI(), g_gpu_device->GetFeatures().dual_source_blend, + g_gpu_device->GetFeatures().framebuffer_fetch); std::unique_ptr vs = g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GeneratePostProcessingVertexShader(*this)); @@ -320,8 +321,9 @@ void PostProcessing::GLSLShader::LoadOptions() } } -PostProcessingGLSLShaderGen::PostProcessingGLSLShaderGen(RenderAPI render_api, bool supports_dual_source_blend) - : ShaderGen(render_api, supports_dual_source_blend) +PostProcessingGLSLShaderGen::PostProcessingGLSLShaderGen(RenderAPI render_api, bool supports_dual_source_blend, + bool supports_framebuffer_fetch) + : ShaderGen(render_api, supports_dual_source_blend, supports_framebuffer_fetch) { } diff --git a/src/util/shadergen.cpp b/src/util/shadergen.cpp index f8d0c95f1..03ec50a7b 100644 --- a/src/util/shadergen.cpp +++ b/src/util/shadergen.cpp @@ -14,10 +14,11 @@ Log_SetChannel(ShaderGen); -ShaderGen::ShaderGen(RenderAPI render_api, bool supports_dual_source_blend) +ShaderGen::ShaderGen(RenderAPI render_api, bool supports_dual_source_blend, bool supports_framebuffer_fetch) : m_render_api(render_api), m_glsl(render_api != RenderAPI::D3D11 && render_api != RenderAPI::D3D12), m_spirv(render_api == RenderAPI::Vulkan || render_api == RenderAPI::Metal), - m_supports_dual_source_blend(supports_dual_source_blend), m_use_glsl_interface_blocks(false) + m_supports_dual_source_blend(supports_dual_source_blend), m_supports_framebuffer_fetch(supports_framebuffer_fetch), + m_use_glsl_interface_blocks(false) { #if defined(ENABLE_OPENGL) || defined(ENABLE_VULKAN) || defined(__APPLE__) if (m_glsl) @@ -62,6 +63,11 @@ void ShaderGen::DefineMacro(std::stringstream& ss, const char* name, bool enable ss << "#define " << name << " " << BoolToUInt32(enabled) << "\n"; } +void ShaderGen::DefineMacro(std::stringstream& ss, const char* name, s32 value) +{ + ss << "#define " << name << " " << value << "\n"; +} + #ifdef ENABLE_OPENGL void ShaderGen::SetGLSLVersionString() { @@ -123,6 +129,8 @@ void ShaderGen::WriteHeader(std::stringstream& ss) ss << "#extension GL_EXT_blend_func_extended : require\n"; if (GLAD_GL_ARB_blend_func_extended) ss << "#extension GL_ARB_blend_func_extended : require\n"; + if (GLAD_GL_EXT_shader_framebuffer_fetch) + ss << "#extension GL_EXT_shader_framebuffer_fetch : require\n"; // Test for V3D driver - we have to fudge coordinates slightly. if (std::strstr(reinterpret_cast(glGetString(GL_VENDOR)), "Broadcom") && @@ -151,6 +159,11 @@ void ShaderGen::WriteHeader(std::stringstream& ss) // Enable SSBOs if it's not required by the version. if (!GLAD_GL_VERSION_4_3 && !GLAD_GL_ES_VERSION_3_1 && GLAD_GL_ARB_shader_storage_buffer_object) ss << "#extension GL_ARB_shader_storage_buffer_object : require\n"; + + if (GLAD_GL_EXT_shader_framebuffer_fetch) + ss << "#extension GL_EXT_shader_framebuffer_fetch : require\n"; + else if (GLAD_GL_ARM_shader_framebuffer_fetch) + ss << "#extension GL_ARM_shader_framebuffer_fetch : require\n"; } #endif @@ -486,7 +499,7 @@ void ShaderGen::DeclareFragmentEntryPoint( const std::initializer_list>& additional_inputs, bool declare_fragcoord /* = false */, u32 num_color_outputs /* = 1 */, bool depth_output /* = false */, bool msaa /* = false */, bool ssaa /* = false */, bool declare_sample_id /* = false */, - bool noperspective_color /* = false */) + bool noperspective_color /* = false */, bool framebuffer_fetch /* = false */) { if (m_glsl) { @@ -540,24 +553,43 @@ void ShaderGen::DeclareFragmentEntryPoint( if (depth_output) ss << "#define o_depth gl_FragDepth\n"; + const char* target_0_qualifier = "out"; +#ifdef ENABLE_OPENGL + if ((m_render_api == RenderAPI::OpenGL || m_render_api == RenderAPI::OpenGLES) && m_supports_framebuffer_fetch && + framebuffer_fetch) + { + if (GLAD_GL_EXT_shader_framebuffer_fetch) + { + target_0_qualifier = "inout"; + ss << "#define LAST_FRAG_COLOR o_col0\n"; + } + else if (GLAD_GL_ARM_shader_framebuffer_fetch) + { + ss << "#define LAST_FRAG_COLOR gl_LastFragColorARM\n"; + } + } +#endif + if (m_use_glsl_binding_layout) { if (m_supports_dual_source_blend) { for (u32 i = 0; i < num_color_outputs; i++) - ss << "layout(location = 0, index = " << i << ") out float4 o_col" << i << ";\n"; + { + ss << "layout(location = 0, index = " << i << ") " << ((i == 0) ? target_0_qualifier : "out") + << " float4 o_col" << i << ";\n"; + } } else { Assert(num_color_outputs <= 1); - for (u32 i = 0; i < num_color_outputs; i++) - ss << "layout(location = " << i << ") out float4 o_col" << i << ";\n"; + ss << "layout(location = 0) " << target_0_qualifier << " float4 o_col0;\n"; } } else { for (u32 i = 0; i < num_color_outputs; i++) - ss << "out float4 o_col" << i << ";\n"; + ss << ((i == 0) ? target_0_qualifier : "out") << " float4 o_col" << i << ";\n"; } ss << "\n"; diff --git a/src/util/shadergen.h b/src/util/shadergen.h index 9d67a17e9..8cd4f119f 100644 --- a/src/util/shadergen.h +++ b/src/util/shadergen.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #pragma once @@ -11,7 +11,7 @@ class ShaderGen { public: - ShaderGen(RenderAPI render_api, bool supports_dual_source_blend); + ShaderGen(RenderAPI render_api, bool supports_dual_source_blend, bool supports_framebuffer_fetch); ~ShaderGen(); static bool UseGLSLBindingLayout(); @@ -36,6 +36,7 @@ protected: #endif void DefineMacro(std::stringstream& ss, const char* name, bool enabled); + void DefineMacro(std::stringstream& ss, const char* name, s32 value); void WriteHeader(std::stringstream& ss); void WriteUniformBufferDeclaration(std::stringstream& ss, bool push_constant_on_vulkan); void DeclareUniformBuffer(std::stringstream& ss, const std::initializer_list& members, @@ -51,12 +52,13 @@ protected: const std::initializer_list>& additional_inputs, bool declare_fragcoord = false, u32 num_color_outputs = 1, bool depth_output = false, bool msaa = false, bool ssaa = false, bool declare_sample_id = false, - bool noperspective_color = false); + bool noperspective_color = false, bool framebuffer_fetch = false); RenderAPI m_render_api; bool m_glsl; bool m_spirv; bool m_supports_dual_source_blend; + bool m_supports_framebuffer_fetch; bool m_use_glsl_interface_blocks; bool m_use_glsl_binding_layout; bool m_has_uniform_buffer = false; diff --git a/src/util/vulkan_device.cpp b/src/util/vulkan_device.cpp index 5f88e86bb..29a131135 100644 --- a/src/util/vulkan_device.cpp +++ b/src/util/vulkan_device.cpp @@ -2194,6 +2194,7 @@ bool VulkanDevice::CheckFeatures() m_max_multisamples = 1; m_features.dual_source_blend = m_device_features.dualSrcBlend; // TODO: Option to disable + m_features.framebuffer_fetch = false; // TODO: Option to disable if (!m_features.dual_source_blend) Log_WarningPrintf("Vulkan driver is missing dual-source blending. This will have an impact on performance.");