mirror of
https://github.com/RetroDECK/Duckstation.git
synced 2024-11-30 09:35:40 +00:00
320 lines
16 KiB
HLSL
320 lines
16 KiB
HLSL
#ifndef _BLOOM_FUNCTIONS_H
|
|
#define _BLOOM_FUNCTIONS_H
|
|
|
|
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
|
|
|
|
// crt-royale: A full-featured CRT shader, with cheese.
|
|
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
|
|
//
|
|
// This program is free software; you can redistribute it and/or modify it
|
|
// under the terms of the GNU General Public License as published by the Free
|
|
// Software Foundation; either version 2 of the License, or any later version.
|
|
//
|
|
// This program is distributed in the hope that it will be useful, but WITHOUT
|
|
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
// more details.
|
|
//
|
|
// You should have received a copy of the GNU General Public License along with
|
|
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
|
|
// Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
|
|
///////////////////////////////// DESCRIPTION ////////////////////////////////
|
|
|
|
// These utility functions and constants help several passes determine the
|
|
// size and center texel weight of the phosphor bloom in a uniform manner.
|
|
|
|
|
|
////////////////////////////////// INCLUDES //////////////////////////////////
|
|
|
|
// We need to calculate the correct blur sigma using some .cgp constants:
|
|
//#include "../user-settings.h"
|
|
|
|
|
|
#include "user-settings.fxh"
|
|
#include "derived-settings-and-constants.fxh"
|
|
#include "bind-shader-params.fxh"
|
|
#include "blur-functions.fxh"
|
|
|
|
/////////////////////////////// BLOOM CONSTANTS //////////////////////////////
|
|
|
|
// Compute constants with manual inlines of the functions below:
|
|
static const float bloom_diff_thresh = 1.0/256.0;
|
|
|
|
|
|
|
|
/////////////////////////////////// HELPERS //////////////////////////////////
|
|
|
|
float get_min_sigma_to_blur_triad(const float triad_size,
|
|
const float thresh)
|
|
{
|
|
// Requires: 1.) triad_size is the final phosphor triad size in pixels
|
|
// 2.) thresh is the max desired pixel difference in the
|
|
// blurred triad (e.g. 1.0/256.0).
|
|
// Returns: Return the minimum sigma that will fully blur a phosphor
|
|
// triad on the screen to an even color, within thresh.
|
|
// This closed-form function was found by curve-fitting data.
|
|
// Estimate: max error = ~0.086036, mean sq. error = ~0.0013387:
|
|
return -0.05168 + 0.6113*triad_size -
|
|
1.122*triad_size*sqrt(0.000416 + thresh);
|
|
// Estimate: max error = ~0.16486, mean sq. error = ~0.0041041:
|
|
//return 0.5985*triad_size - triad_size*sqrt(thresh)
|
|
}
|
|
|
|
float get_absolute_scale_blur_sigma(const float thresh)
|
|
{
|
|
// Requires: 1.) min_expected_triads must be a global float. The number
|
|
// of horizontal phosphor triads in the final image must be
|
|
// >= min_allowed_viewport_triads.x for realistic results.
|
|
// 2.) bloom_approx_scale_x must be a global float equal to the
|
|
// absolute horizontal scale of BLOOM_APPROX.
|
|
// 3.) bloom_approx_scale_x/min_allowed_viewport_triads.x
|
|
// should be <= 1.1658025090 to keep the final result <
|
|
// 0.62666015625 (the largest sigma ensuring the largest
|
|
// unused texel weight stays < 1.0/256.0 for a 3x3 blur).
|
|
// 4.) thresh is the max desired pixel difference in the
|
|
// blurred triad (e.g. 1.0/256.0).
|
|
// Returns: Return the minimum Gaussian sigma that will blur the pass
|
|
// output as much as it would have taken to blur away
|
|
// bloom_approx_scale_x horizontal phosphor triads.
|
|
// Description:
|
|
// BLOOM_APPROX should look like a downscaled phosphor blur. Ideally, we'd
|
|
// use the same blur sigma as the actual phosphor bloom and scale it down
|
|
// to the current resolution with (bloom_approx_scale_x/viewport_size_x), but
|
|
// we don't know the viewport size in this pass. Instead, we'll blur as
|
|
// much as it would take to blur away min_allowed_viewport_triads.x. This
|
|
// will blur "more than necessary" if the user actually uses more triads,
|
|
// but that's not terrible either, because blurring a constant fraction of
|
|
// the viewport may better resemble a true optical bloom anyway (since the
|
|
// viewport will generally be about the same fraction of each player's
|
|
// field of view, regardless of screen size and resolution).
|
|
// Assume an extremely large viewport size for asymptotic results.
|
|
return bloom_approx_scale_x/max_viewport_size_x *
|
|
get_min_sigma_to_blur_triad(
|
|
max_viewport_size_x/min_allowed_viewport_triads.x, thresh);
|
|
}
|
|
|
|
float get_center_weight(const float sigma)
|
|
{
|
|
// Given a Gaussian blur sigma, get the blur weight for the center texel.
|
|
#if _RUNTIME_PHOSPHOR_BLOOM_SIGMA
|
|
return get_fast_gaussian_weight_sum_inv(sigma);
|
|
#else
|
|
const float denom_inv = 0.5/(sigma*sigma);
|
|
const float w0 = 1.0;
|
|
const float w1 = exp(-1.0 * denom_inv);
|
|
const float w2 = exp(-4.0 * denom_inv);
|
|
const float w3 = exp(-9.0 * denom_inv);
|
|
const float w4 = exp(-16.0 * denom_inv);
|
|
const float w5 = exp(-25.0 * denom_inv);
|
|
const float w6 = exp(-36.0 * denom_inv);
|
|
const float w7 = exp(-49.0 * denom_inv);
|
|
const float w8 = exp(-64.0 * denom_inv);
|
|
const float w9 = exp(-81.0 * denom_inv);
|
|
const float w10 = exp(-100.0 * denom_inv);
|
|
const float w11 = exp(-121.0 * denom_inv);
|
|
const float w12 = exp(-144.0 * denom_inv);
|
|
const float w13 = exp(-169.0 * denom_inv);
|
|
const float w14 = exp(-196.0 * denom_inv);
|
|
const float w15 = exp(-225.0 * denom_inv);
|
|
const float w16 = exp(-256.0 * denom_inv);
|
|
const float w17 = exp(-289.0 * denom_inv);
|
|
const float w18 = exp(-324.0 * denom_inv);
|
|
const float w19 = exp(-361.0 * denom_inv);
|
|
const float w20 = exp(-400.0 * denom_inv);
|
|
const float w21 = exp(-441.0 * denom_inv);
|
|
// Note: If the implementation uses a smaller blur than the max allowed,
|
|
// the worst case scenario is that the center weight will be overestimated,
|
|
// so we'll put a bit more energy into the brightpass...no huge deal.
|
|
// Then again, if the implementation uses a larger blur than the max
|
|
// "allowed" because of dynamic branching, the center weight could be
|
|
// underestimated, which is more of a problem...consider always using
|
|
#if PHOSPHOR_BLOOM_TRIAD_SIZE_MODE >= _PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_12_PIXELS
|
|
// 43x blur:
|
|
const float weight_sum_inv = 1.0 /
|
|
(w0 + 2.0 * (w1 + w2 + w3 + w4 + w5 + w6 + w7 + w8 + w9 + w10 +
|
|
w11 + w12 + w13 + w14 + w15 + w16 + w17 + w18 + w19 + w20 + w21));
|
|
#else
|
|
#if PHOSPHOR_BLOOM_TRIAD_SIZE_MODE >= _PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_9_PIXELS
|
|
// 31x blur:
|
|
const float weight_sum_inv = 1.0 /
|
|
(w0 + 2.0 * (w1 + w2 + w3 + w4 + w5 + w6 + w7 +
|
|
w8 + w9 + w10 + w11 + w12 + w13 + w14 + w15));
|
|
#else
|
|
#if PHOSPHOR_BLOOM_TRIAD_SIZE_MODE >= _PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_6_PIXELS
|
|
// 25x blur:
|
|
const float weight_sum_inv = 1.0 / (w0 + 2.0 * (
|
|
w1 + w2 + w3 + w4 + w5 + w6 + w7 + w8 + w9 + w10 + w11 + w12));
|
|
#else
|
|
#if PHOSPHOR_BLOOM_TRIAD_SIZE_MODE >= _PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_3_PIXELS
|
|
// 17x blur:
|
|
const float weight_sum_inv = 1.0 / (w0 + 2.0 * (
|
|
w1 + w2 + w3 + w4 + w5 + w6 + w7 + w8));
|
|
#else
|
|
// 9x blur:
|
|
const float weight_sum_inv = 1.0 / (w0 + 2.0 * (w1 + w2 + w3 + w4));
|
|
#endif // _PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_3_PIXELS
|
|
#endif // _PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_6_PIXELS
|
|
#endif // _PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_9_PIXELS
|
|
#endif // _PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_12_PIXELS
|
|
const float center_weight = weight_sum_inv * weight_sum_inv;
|
|
return center_weight;
|
|
#endif
|
|
}
|
|
|
|
float3 tex2DblurNfast(const sampler2D tex, const float2 tex_uv,
|
|
const float2 dxdy, const float sigma,
|
|
const float input_gamma)
|
|
{
|
|
// If sigma is static, we can safely branch and use the smallest blur
|
|
// that's big enough. Ignore #define hints, because we'll only use a
|
|
// large blur if we actually need it, and the branches cost nothing.
|
|
#if !_RUNTIME_PHOSPHOR_BLOOM_SIGMA
|
|
#define PHOSPHOR_BLOOM_BRANCH_FOR_BLUR_SIZE
|
|
#else
|
|
// It's still worth branching if the profile supports dynamic branches:
|
|
// It's much faster than using a hugely excessive blur, but each branch
|
|
// eats ~1% FPS.
|
|
#if _DRIVERS_ALLOW_DYNAMIC_BRANCHES
|
|
#define PHOSPHOR_BLOOM_BRANCH_FOR_BLUR_SIZE
|
|
#endif
|
|
#endif
|
|
// Failed optimization notes:
|
|
// I originally created a same-size mipmapped 5-tap separable blur10 that
|
|
// could handle any sigma by reaching into lower mip levels. It was
|
|
// as fast as blur25fast for runtime sigmas and a tad faster than
|
|
// blur31fast for static sigmas, but mipmapping two viewport-size passes
|
|
// ate 10% of FPS across all codepaths, so it wasn't worth it.
|
|
#ifdef PHOSPHOR_BLOOM_BRANCH_FOR_BLUR_SIZE
|
|
if(sigma <= blur9_std_dev)
|
|
{
|
|
return tex2Dblur9fast(tex, tex_uv, dxdy, sigma, input_gamma);
|
|
}
|
|
else if(sigma <= blur17_std_dev)
|
|
{
|
|
return tex2Dblur17fast(tex, tex_uv, dxdy, sigma, input_gamma);
|
|
}
|
|
else if(sigma <= blur25_std_dev)
|
|
{
|
|
return tex2Dblur25fast(tex, tex_uv, dxdy, sigma, input_gamma);
|
|
}
|
|
else if(sigma <= blur31_std_dev)
|
|
{
|
|
return tex2Dblur31fast(tex, tex_uv, dxdy, sigma, input_gamma);
|
|
}
|
|
else
|
|
{
|
|
return tex2Dblur43fast(tex, tex_uv, dxdy, sigma, input_gamma);
|
|
}
|
|
#else
|
|
// If we can't afford to branch, we can only guess at what blur
|
|
// size we need. Therefore, use the largest blur allowed.
|
|
#ifdef PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_12_PIXELS
|
|
return tex2Dblur43fast(tex, tex_uv, dxdy, sigma, input_gamma);
|
|
#else
|
|
#ifdef PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_9_PIXELS
|
|
return tex2Dblur31fast(tex, tex_uv, dxdy, sigma, input_gamma);
|
|
#else
|
|
#ifdef PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_6_PIXELS
|
|
return tex2Dblur25fast(tex, tex_uv, dxdy, sigma, input_gamma);
|
|
#else
|
|
#if PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_3_PIXELS
|
|
return tex2Dblur17fast(tex, tex_uv, dxdy, sigma, input_gamma);
|
|
#else
|
|
return tex2Dblur9fast(tex, tex_uv, dxdy, sigma, input_gamma);
|
|
#endif // PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_3_PIXELS
|
|
#endif // PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_6_PIXELS
|
|
#endif // PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_9_PIXELS
|
|
#endif // PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_12_PIXELS
|
|
#endif // PHOSPHOR_BLOOM_BRANCH_FOR_BLUR_SIZE
|
|
}
|
|
|
|
float get_bloom_approx_sigma(const float output_size_x_runtime,
|
|
const float estimated_viewport_size_x)
|
|
{
|
|
// Requires: 1.) output_size_x_runtime == BLOOM_APPROX.output_size.x.
|
|
// This is included for dynamic codepaths just in case the
|
|
// following two globals are incorrect:
|
|
// 2.) bloom_approx_size_x_for_skip should == the same
|
|
// if PHOSPHOR_BLOOM_FAKE is #defined
|
|
// 3.) bloom_approx_size_x should == the same otherwise
|
|
// Returns: For gaussian4x4, return a dynamic small bloom sigma that's
|
|
// as close to optimal as possible given available information.
|
|
// For blur3x3, return the a static small bloom sigma that
|
|
// works well for typical cases. Otherwise, we're using simple
|
|
// bilinear filtering, so use static calculations.
|
|
// Assume the default static value. This is a compromise that ensures
|
|
// typical triads are blurred, even if unusually large ones aren't.
|
|
static const float mask_num_triads_static =
|
|
max(min_allowed_viewport_triads.x, mask_num_triads_across_static);
|
|
const float mask_num_triads_from_size =
|
|
estimated_viewport_size_x/mask_triad_width;
|
|
const float mask_num_triads_runtime = max(min_allowed_viewport_triads.x,
|
|
lerp(mask_num_triads_from_size, mask_num_triads_across,
|
|
mask_size_param));
|
|
// Assume an extremely large viewport size for asymptotic results:
|
|
static const float max_viewport_size_x = 1080.0*1024.0*(4.0/3.0);
|
|
if(bloom_approx_filter > 1.5) // 4x4 true Gaussian resize
|
|
{
|
|
// Use the runtime num triads and output size:
|
|
const float asymptotic_triad_size =
|
|
max_viewport_size_x/mask_num_triads_runtime;
|
|
const float asymptotic_sigma = get_min_sigma_to_blur_triad(
|
|
asymptotic_triad_size, bloom_diff_thresh);
|
|
const float bloom_approx_sigma =
|
|
asymptotic_sigma * output_size_x_runtime/max_viewport_size_x;
|
|
// The BLOOM_APPROX input has to be ORIG_LINEARIZED to avoid moire, but
|
|
// account for the Gaussian scanline sigma from the last pass too.
|
|
// The bloom will be too wide horizontally but tall enough vertically.
|
|
return length(float2(bloom_approx_sigma, gaussian_beam_max_sigma));
|
|
}
|
|
else // 3x3 blur resize (the bilinear resize doesn't need a sigma)
|
|
{
|
|
// We're either using blur3x3 or bilinear filtering. The biggest
|
|
// reason to choose blur3x3 is to avoid dynamic weights, so use a
|
|
// static calculation.
|
|
#ifdef PHOSPHOR_BLOOM_FAKE
|
|
static const float output_size_x_static =
|
|
bloom_approx_size_x_for_fake;
|
|
#else
|
|
static const float output_size_x_static = bloom_approx_size_x;
|
|
#endif
|
|
static const float asymptotic_triad_size =
|
|
max_viewport_size_x/mask_num_triads_static;
|
|
const float asymptotic_sigma = get_min_sigma_to_blur_triad(
|
|
asymptotic_triad_size, bloom_diff_thresh);
|
|
const float bloom_approx_sigma =
|
|
asymptotic_sigma * output_size_x_static/max_viewport_size_x;
|
|
// The BLOOM_APPROX input has to be ORIG_LINEARIZED to avoid moire, but
|
|
// try accounting for the Gaussian scanline sigma from the last pass
|
|
// too; use the static default value:
|
|
return length(float2(bloom_approx_sigma, gaussian_beam_max_sigma_static));
|
|
}
|
|
}
|
|
|
|
float get_final_bloom_sigma(const float bloom_sigma_runtime)
|
|
{
|
|
// Requires: 1.) bloom_sigma_runtime is a precalculated sigma that's
|
|
// optimal for the [known] triad size.
|
|
// 2.) Call this from a fragment shader (not a vertex shader),
|
|
// or blurring with static sigmas won't be constant-folded.
|
|
// Returns: Return the optimistic static sigma if the triad size is
|
|
// known at compile time. Otherwise return the optimal runtime
|
|
// sigma (10% slower) or an implementation-specific compromise
|
|
// between an optimistic or pessimistic static sigma.
|
|
// Notes: Call this from the fragment shader, NOT the vertex shader,
|
|
// so static sigmas can be constant-folded!
|
|
const float bloom_sigma_optimistic = get_min_sigma_to_blur_triad(
|
|
mask_triad_width_static, bloom_diff_thresh);
|
|
#if _RUNTIME_PHOSPHOR_BLOOM_SIGMA
|
|
return bloom_sigma_runtime;
|
|
#else
|
|
// Overblurring looks as bad as underblurring, so assume average-size
|
|
// triads, not worst-case huge triads:
|
|
return bloom_sigma_optimistic;
|
|
#endif
|
|
}
|
|
|
|
|
|
#endif // _BLOOM_FUNCTIONS_H |