mirror of
				https://github.com/RetroDECK/Duckstation.git
				synced 2025-04-10 19:15:14 +00:00 
			
		
		
		
	
		
			
	
	
		
			320 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			HLSL
		
	
	
	
	
	
		
		
			
		
	
	
			320 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			HLSL
		
	
	
	
	
	
|   | #ifndef _BLOOM_FUNCTIONS_H | ||
|  | #define _BLOOM_FUNCTIONS_H | ||
|  | 
 | ||
|  | /////////////////////////////  GPL LICENSE NOTICE  ///////////////////////////// | ||
|  | 
 | ||
|  | //  crt-royale: A full-featured CRT shader, with cheese. | ||
|  | //  Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com> | ||
|  | // | ||
|  | //  This program is free software; you can redistribute it and/or modify it | ||
|  | //  under the terms of the GNU General Public License as published by the Free | ||
|  | //  Software Foundation; either version 2 of the License, or any later version. | ||
|  | // | ||
|  | //  This program is distributed in the hope that it will be useful, but WITHOUT | ||
|  | //  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
|  | //  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
|  | //  more details. | ||
|  | // | ||
|  | //  You should have received a copy of the GNU General Public License along with | ||
|  | //  this program; if not, write to the Free Software Foundation, Inc., 59 Temple | ||
|  | //  Place, Suite 330, Boston, MA 02111-1307 USA | ||
|  | 
 | ||
|  | 
 | ||
|  | /////////////////////////////////  DESCRIPTION  //////////////////////////////// | ||
|  | 
 | ||
|  | //  These utility functions and constants help several passes determine the | ||
|  | //  size and center texel weight of the phosphor bloom in a uniform manner. | ||
|  | 
 | ||
|  | 
 | ||
|  | //////////////////////////////////  INCLUDES  ////////////////////////////////// | ||
|  | 
 | ||
|  | //  We need to calculate the correct blur sigma using some .cgp constants: | ||
|  | //#include "../user-settings.h" | ||
|  | 
 | ||
|  | 
 | ||
|  | #include "user-settings.fxh" | ||
|  | #include "derived-settings-and-constants.fxh" | ||
|  | #include "bind-shader-params.fxh" | ||
|  | #include "blur-functions.fxh" | ||
|  | 
 | ||
|  | ///////////////////////////////  BLOOM CONSTANTS  ////////////////////////////// | ||
|  | 
 | ||
|  | //  Compute constants with manual inlines of the functions below: | ||
|  | static const float bloom_diff_thresh = 1.0/256.0; | ||
|  | 
 | ||
|  | 
 | ||
|  | 
 | ||
|  | ///////////////////////////////////  HELPERS  ////////////////////////////////// | ||
|  | 
 | ||
|  | float get_min_sigma_to_blur_triad(const float triad_size, | ||
|  |     const float thresh) | ||
|  | { | ||
|  |     //  Requires:   1.) triad_size is the final phosphor triad size in pixels | ||
|  |     //              2.) thresh is the max desired pixel difference in the | ||
|  |     //                  blurred triad (e.g. 1.0/256.0). | ||
|  |     //  Returns:    Return the minimum sigma that will fully blur a phosphor | ||
|  |     //              triad on the screen to an even color, within thresh. | ||
|  |     //              This closed-form function was found by curve-fitting data. | ||
|  |     //  Estimate: max error = ~0.086036, mean sq. error = ~0.0013387: | ||
|  |     return -0.05168 + 0.6113*triad_size - | ||
|  |         1.122*triad_size*sqrt(0.000416 + thresh); | ||
|  |     //  Estimate: max error = ~0.16486, mean sq. error = ~0.0041041: | ||
|  |     //return 0.5985*triad_size - triad_size*sqrt(thresh) | ||
|  | } | ||
|  | 
 | ||
|  | float get_absolute_scale_blur_sigma(const float thresh) | ||
|  | { | ||
|  |     //  Requires:   1.) min_expected_triads must be a global float.  The number | ||
|  |     //                  of horizontal phosphor triads in the final image must be | ||
|  |     //                  >= min_allowed_viewport_triads.x for realistic results. | ||
|  |     //              2.) bloom_approx_scale_x must be a global float equal to the | ||
|  |     //                  absolute horizontal scale of BLOOM_APPROX. | ||
|  |     //              3.) bloom_approx_scale_x/min_allowed_viewport_triads.x | ||
|  |     //                  should be <= 1.1658025090 to keep the final result < | ||
|  |     //                  0.62666015625 (the largest sigma ensuring the largest | ||
|  |     //                  unused texel weight stays < 1.0/256.0 for a 3x3 blur). | ||
|  |     //              4.) thresh is the max desired pixel difference in the | ||
|  |     //                  blurred triad (e.g. 1.0/256.0). | ||
|  |     //  Returns:    Return the minimum Gaussian sigma that will blur the pass | ||
|  |     //              output as much as it would have taken to blur away | ||
|  |     //              bloom_approx_scale_x horizontal phosphor triads. | ||
|  |     //  Description: | ||
|  |     //  BLOOM_APPROX should look like a downscaled phosphor blur.  Ideally, we'd | ||
|  |     //  use the same blur sigma as the actual phosphor bloom and scale it down | ||
|  |     //  to the current resolution with (bloom_approx_scale_x/viewport_size_x), but | ||
|  |     //  we don't know the viewport size in this pass.  Instead, we'll blur as | ||
|  |     //  much as it would take to blur away min_allowed_viewport_triads.x.  This | ||
|  |     //  will blur "more than necessary" if the user actually uses more triads, | ||
|  |     //  but that's not terrible either, because blurring a constant fraction of | ||
|  |     //  the viewport may better resemble a true optical bloom anyway (since the | ||
|  |     //  viewport will generally be about the same fraction of each player's | ||
|  |     //  field of view, regardless of screen size and resolution). | ||
|  |     //  Assume an extremely large viewport size for asymptotic results. | ||
|  |     return bloom_approx_scale_x/max_viewport_size_x * | ||
|  |         get_min_sigma_to_blur_triad( | ||
|  |             max_viewport_size_x/min_allowed_viewport_triads.x, thresh); | ||
|  | } | ||
|  | 
 | ||
|  | float get_center_weight(const float sigma) | ||
|  | { | ||
|  |     //  Given a Gaussian blur sigma, get the blur weight for the center texel. | ||
|  |     #if _RUNTIME_PHOSPHOR_BLOOM_SIGMA | ||
|  |         return get_fast_gaussian_weight_sum_inv(sigma); | ||
|  |     #else | ||
|  |         const float denom_inv = 0.5/(sigma*sigma); | ||
|  |         const float w0 = 1.0; | ||
|  |         const float w1 = exp(-1.0 * denom_inv); | ||
|  |         const float w2 = exp(-4.0 * denom_inv); | ||
|  |         const float w3 = exp(-9.0 * denom_inv); | ||
|  |         const float w4 = exp(-16.0 * denom_inv); | ||
|  |         const float w5 = exp(-25.0 * denom_inv); | ||
|  |         const float w6 = exp(-36.0 * denom_inv); | ||
|  |         const float w7 = exp(-49.0 * denom_inv); | ||
|  |         const float w8 = exp(-64.0 * denom_inv); | ||
|  |         const float w9 = exp(-81.0 * denom_inv); | ||
|  |         const float w10 = exp(-100.0 * denom_inv); | ||
|  |         const float w11 = exp(-121.0 * denom_inv); | ||
|  |         const float w12 = exp(-144.0 * denom_inv); | ||
|  |         const float w13 = exp(-169.0 * denom_inv); | ||
|  |         const float w14 = exp(-196.0 * denom_inv); | ||
|  |         const float w15 = exp(-225.0 * denom_inv); | ||
|  |         const float w16 = exp(-256.0 * denom_inv); | ||
|  |         const float w17 = exp(-289.0 * denom_inv); | ||
|  |         const float w18 = exp(-324.0 * denom_inv); | ||
|  |         const float w19 = exp(-361.0 * denom_inv); | ||
|  |         const float w20 = exp(-400.0 * denom_inv); | ||
|  |         const float w21 = exp(-441.0 * denom_inv); | ||
|  |         //  Note: If the implementation uses a smaller blur than the max allowed, | ||
|  |         //  the worst case scenario is that the center weight will be overestimated, | ||
|  |         //  so we'll put a bit more energy into the brightpass...no huge deal. | ||
|  |         //  Then again, if the implementation uses a larger blur than the max | ||
|  |         //  "allowed" because of dynamic branching, the center weight could be | ||
|  |         //  underestimated, which is more of a problem...consider always using | ||
|  |         #if PHOSPHOR_BLOOM_TRIAD_SIZE_MODE >= _PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_12_PIXELS | ||
|  |             //  43x blur: | ||
|  |             const float weight_sum_inv = 1.0 / | ||
|  |                 (w0 + 2.0 * (w1 + w2 + w3 + w4 + w5 + w6 + w7 + w8 + w9 + w10 + | ||
|  |                 w11 + w12 + w13 + w14 + w15 + w16 + w17 + w18 + w19 + w20 + w21)); | ||
|  |         #else | ||
|  |         #if PHOSPHOR_BLOOM_TRIAD_SIZE_MODE >= _PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_9_PIXELS | ||
|  |             //  31x blur: | ||
|  |             const float weight_sum_inv = 1.0 / | ||
|  |                 (w0 + 2.0 * (w1 + w2 + w3 + w4 + w5 + w6 + w7 + | ||
|  |                 w8 + w9 + w10 + w11 + w12 + w13 + w14 + w15)); | ||
|  |         #else | ||
|  |         #if PHOSPHOR_BLOOM_TRIAD_SIZE_MODE >= _PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_6_PIXELS | ||
|  |             //  25x blur: | ||
|  |             const float weight_sum_inv = 1.0 / (w0 + 2.0 * ( | ||
|  |                 w1 + w2 + w3 + w4 + w5 + w6 + w7 + w8 + w9 + w10 + w11 + w12)); | ||
|  |         #else | ||
|  |         #if PHOSPHOR_BLOOM_TRIAD_SIZE_MODE >= _PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_3_PIXELS | ||
|  |             //  17x blur: | ||
|  |             const float weight_sum_inv = 1.0 / (w0 + 2.0 * ( | ||
|  |                 w1 + w2 + w3 + w4 + w5 + w6 + w7 + w8)); | ||
|  |         #else | ||
|  |             //  9x blur: | ||
|  |             const float weight_sum_inv = 1.0 / (w0 + 2.0 * (w1 + w2 + w3 + w4)); | ||
|  |         #endif  //  _PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_3_PIXELS | ||
|  |         #endif  //  _PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_6_PIXELS | ||
|  |         #endif  //  _PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_9_PIXELS | ||
|  |         #endif  //  _PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_12_PIXELS | ||
|  |         const float center_weight = weight_sum_inv * weight_sum_inv; | ||
|  |         return center_weight; | ||
|  |     #endif | ||
|  | } | ||
|  | 
 | ||
|  | float3 tex2DblurNfast(const sampler2D tex, const float2 tex_uv, | ||
|  |     const float2 dxdy, const float sigma, | ||
|  |     const float input_gamma) | ||
|  | { | ||
|  |     //  If sigma is static, we can safely branch and use the smallest blur | ||
|  |     //  that's big enough.  Ignore #define hints, because we'll only use a | ||
|  |     //  large blur if we actually need it, and the branches cost nothing. | ||
|  |     #if !_RUNTIME_PHOSPHOR_BLOOM_SIGMA | ||
|  |         #define PHOSPHOR_BLOOM_BRANCH_FOR_BLUR_SIZE | ||
|  |     #else | ||
|  |         //  It's still worth branching if the profile supports dynamic branches: | ||
|  |         //  It's much faster than using a hugely excessive blur, but each branch | ||
|  |         //  eats ~1% FPS. | ||
|  |         #if _DRIVERS_ALLOW_DYNAMIC_BRANCHES | ||
|  |             #define PHOSPHOR_BLOOM_BRANCH_FOR_BLUR_SIZE | ||
|  |         #endif | ||
|  |     #endif | ||
|  |     //  Failed optimization notes: | ||
|  |     //  I originally created a same-size mipmapped 5-tap separable blur10 that | ||
|  |     //  could handle any sigma by reaching into lower mip levels.  It was | ||
|  |     //  as fast as blur25fast for runtime sigmas and a tad faster than | ||
|  |     //  blur31fast for static sigmas, but mipmapping two viewport-size passes | ||
|  |     //  ate 10% of FPS across all codepaths, so it wasn't worth it. | ||
|  |     #ifdef PHOSPHOR_BLOOM_BRANCH_FOR_BLUR_SIZE | ||
|  |         if(sigma <= blur9_std_dev) | ||
|  |         { | ||
|  |             return tex2Dblur9fast(tex, tex_uv, dxdy, sigma, input_gamma); | ||
|  |         } | ||
|  |         else if(sigma <= blur17_std_dev) | ||
|  |         { | ||
|  |             return tex2Dblur17fast(tex, tex_uv, dxdy, sigma, input_gamma); | ||
|  |         } | ||
|  |         else if(sigma <= blur25_std_dev) | ||
|  |         { | ||
|  |             return tex2Dblur25fast(tex, tex_uv, dxdy, sigma, input_gamma); | ||
|  |         } | ||
|  |         else if(sigma <= blur31_std_dev) | ||
|  |         { | ||
|  |             return tex2Dblur31fast(tex, tex_uv, dxdy, sigma, input_gamma); | ||
|  |         } | ||
|  |         else | ||
|  |         { | ||
|  |             return tex2Dblur43fast(tex, tex_uv, dxdy, sigma, input_gamma); | ||
|  |         } | ||
|  |     #else | ||
|  |         //  If we can't afford to branch, we can only guess at what blur | ||
|  |         //  size we need.  Therefore, use the largest blur allowed. | ||
|  |         #ifdef PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_12_PIXELS | ||
|  |             return tex2Dblur43fast(tex, tex_uv, dxdy, sigma, input_gamma); | ||
|  |         #else | ||
|  |         #ifdef PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_9_PIXELS | ||
|  |             return tex2Dblur31fast(tex, tex_uv, dxdy, sigma, input_gamma); | ||
|  |         #else | ||
|  |         #ifdef PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_6_PIXELS | ||
|  |             return tex2Dblur25fast(tex, tex_uv, dxdy, sigma, input_gamma); | ||
|  |         #else | ||
|  |         #if PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_3_PIXELS | ||
|  |             return tex2Dblur17fast(tex, tex_uv, dxdy, sigma, input_gamma); | ||
|  |         #else | ||
|  |             return tex2Dblur9fast(tex, tex_uv, dxdy, sigma, input_gamma); | ||
|  |         #endif  //  PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_3_PIXELS | ||
|  |         #endif  //  PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_6_PIXELS | ||
|  |         #endif  //  PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_9_PIXELS | ||
|  |         #endif  //  PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_12_PIXELS | ||
|  |     #endif  //  PHOSPHOR_BLOOM_BRANCH_FOR_BLUR_SIZE | ||
|  | } | ||
|  | 
 | ||
|  | float get_bloom_approx_sigma(const float output_size_x_runtime, | ||
|  |     const float estimated_viewport_size_x) | ||
|  | { | ||
|  |     //  Requires:   1.) output_size_x_runtime == BLOOM_APPROX.output_size.x. | ||
|  |     //                  This is included for dynamic codepaths just in case the | ||
|  |     //                  following two globals are incorrect: | ||
|  |     //              2.) bloom_approx_size_x_for_skip should == the same | ||
|  |     //                  if PHOSPHOR_BLOOM_FAKE is #defined | ||
|  |     //              3.) bloom_approx_size_x should == the same otherwise | ||
|  |     //  Returns:    For gaussian4x4, return a dynamic small bloom sigma that's | ||
|  |     //              as close to optimal as possible given available information. | ||
|  |     //              For blur3x3, return the a static small bloom sigma that | ||
|  |     //              works well for typical cases.  Otherwise, we're using simple | ||
|  |     //              bilinear filtering, so use static calculations. | ||
|  |     //  Assume the default static value.  This is a compromise that ensures | ||
|  |     //  typical triads are blurred, even if unusually large ones aren't. | ||
|  |     static const float mask_num_triads_static = | ||
|  |         max(min_allowed_viewport_triads.x, mask_num_triads_across_static); | ||
|  |     const float mask_num_triads_from_size = | ||
|  |         estimated_viewport_size_x/mask_triad_width; | ||
|  |     const float mask_num_triads_runtime = max(min_allowed_viewport_triads.x, | ||
|  |         lerp(mask_num_triads_from_size, mask_num_triads_across, | ||
|  |             mask_size_param)); | ||
|  |     //  Assume an extremely large viewport size for asymptotic results: | ||
|  |     static const float max_viewport_size_x = 1080.0*1024.0*(4.0/3.0); | ||
|  |     if(bloom_approx_filter > 1.5)   //  4x4 true Gaussian resize | ||
|  |     { | ||
|  |         //  Use the runtime num triads and output size: | ||
|  |         const float asymptotic_triad_size = | ||
|  |             max_viewport_size_x/mask_num_triads_runtime; | ||
|  |         const float asymptotic_sigma = get_min_sigma_to_blur_triad( | ||
|  |             asymptotic_triad_size, bloom_diff_thresh); | ||
|  |         const float bloom_approx_sigma = | ||
|  |             asymptotic_sigma * output_size_x_runtime/max_viewport_size_x; | ||
|  |         //  The BLOOM_APPROX input has to be ORIG_LINEARIZED to avoid moire, but | ||
|  |         //  account for the Gaussian scanline sigma from the last pass too. | ||
|  |         //  The bloom will be too wide horizontally but tall enough vertically. | ||
|  |         return length(float2(bloom_approx_sigma, gaussian_beam_max_sigma)); | ||
|  |     } | ||
|  |     else    //  3x3 blur resize (the bilinear resize doesn't need a sigma) | ||
|  |     { | ||
|  |         //  We're either using blur3x3 or bilinear filtering.  The biggest | ||
|  |         //  reason to choose blur3x3 is to avoid dynamic weights, so use a | ||
|  |         //  static calculation. | ||
|  |         #ifdef PHOSPHOR_BLOOM_FAKE | ||
|  |             static const float output_size_x_static = | ||
|  |                 bloom_approx_size_x_for_fake; | ||
|  |         #else | ||
|  |             static const float output_size_x_static = bloom_approx_size_x; | ||
|  |         #endif | ||
|  |         static const float asymptotic_triad_size = | ||
|  |             max_viewport_size_x/mask_num_triads_static; | ||
|  |         const float asymptotic_sigma = get_min_sigma_to_blur_triad( | ||
|  |             asymptotic_triad_size, bloom_diff_thresh); | ||
|  |         const float bloom_approx_sigma = | ||
|  |             asymptotic_sigma * output_size_x_static/max_viewport_size_x; | ||
|  |         //  The BLOOM_APPROX input has to be ORIG_LINEARIZED to avoid moire, but | ||
|  |         //  try accounting for the Gaussian scanline sigma from the last pass | ||
|  |         //  too; use the static default value: | ||
|  |         return length(float2(bloom_approx_sigma, gaussian_beam_max_sigma_static)); | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | float get_final_bloom_sigma(const float bloom_sigma_runtime) | ||
|  | { | ||
|  |     //  Requires:   1.) bloom_sigma_runtime is a precalculated sigma that's | ||
|  |     //                  optimal for the [known] triad size. | ||
|  |     //              2.) Call this from a fragment shader (not a vertex shader), | ||
|  |     //                  or blurring with static sigmas won't be constant-folded. | ||
|  |     //  Returns:    Return the optimistic static sigma if the triad size is | ||
|  |     //              known at compile time.  Otherwise return the optimal runtime | ||
|  |     //              sigma (10% slower) or an implementation-specific compromise | ||
|  |     //              between an optimistic or pessimistic static sigma. | ||
|  |     //  Notes:      Call this from the fragment shader, NOT the vertex shader, | ||
|  |     //              so static sigmas can be constant-folded! | ||
|  |     const float bloom_sigma_optimistic = get_min_sigma_to_blur_triad( | ||
|  |         mask_triad_width_static, bloom_diff_thresh); | ||
|  |     #if _RUNTIME_PHOSPHOR_BLOOM_SIGMA | ||
|  |         return bloom_sigma_runtime; | ||
|  |     #else | ||
|  |         //  Overblurring looks as bad as underblurring, so assume average-size | ||
|  |         //  triads, not worst-case huge triads: | ||
|  |         return bloom_sigma_optimistic; | ||
|  |     #endif | ||
|  | } | ||
|  | 
 | ||
|  | 
 | ||
|  | #endif  //  _BLOOM_FUNCTIONS_H |