mirror of
https://github.com/RetroDECK/Duckstation.git
synced 2025-01-06 01:15:39 +00:00
bf1b023f12
- A new port of crt-royale. More faithful to original. It uses the same mask textures. - The only thing not ported is the original geometry pass. It was replaced by geom curvature code. - It's configured for 1080p displays. 4k displays need to adjust param mask_triad_size_desired from 3.0 to 4.0. OBS: It's up to you decide if the two versions should be maintained.
300 lines
13 KiB
HLSL
300 lines
13 KiB
HLSL
#ifndef DERIVED_SETTINGS_AND_CONSTANTS_H
|
|
#define DERIVED_SETTINGS_AND_CONSTANTS_H
|
|
|
|
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
|
|
|
|
// crt-royale: A full-featured CRT shader, with cheese.
|
|
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
|
|
//
|
|
// This program is free software; you can redistribute it and/or modify it
|
|
// under the terms of the GNU General Public License as published by the Free
|
|
// Software Foundation; either version 2 of the License, or any later version.
|
|
//
|
|
// This program is distributed in the hope that it will be useful, but WITHOUT
|
|
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
// more details.
|
|
//
|
|
// You should have received a copy of the GNU General Public License along with
|
|
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
|
|
// Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
|
|
///////////////////////////////// DESCRIPTION ////////////////////////////////
|
|
|
|
// These macros and constants can be used across the whole codebase.
|
|
// Unlike the values in user-settings.cgh, end users shouldn't modify these.
|
|
|
|
|
|
////////////////////////////////// INCLUDES //////////////////////////////////
|
|
|
|
#include "user-settings.fxh"
|
|
#include "user-cgp-constants.fxh"
|
|
|
|
|
|
/////////////////////////////// FIXED SETTINGS ///////////////////////////////
|
|
|
|
// Avoid dividing by zero; using a macro overloads for float, float2, etc.:
|
|
//#define FIX_ZERO(c) (max(abs(c), 0.0000152587890625)) // 2^-16
|
|
|
|
// Ensure the first pass decodes CRT gamma and the last encodes LCD gamma.
|
|
#ifndef SIMULATE_CRT_ON_LCD
|
|
#define SIMULATE_CRT_ON_LCD
|
|
#endif
|
|
|
|
// Manually tiling a manually resized texture creates texture coord derivative
|
|
// discontinuities and confuses anisotropic filtering, causing discolored tile
|
|
// seams in the phosphor mask. Workarounds:
|
|
// a.) Using tex2Dlod disables anisotropic filtering for tiled masks. It's
|
|
// downgraded to tex2Dbias without DRIVERS_ALLOW_TEX2DLOD #defined and
|
|
// disabled without DRIVERS_ALLOW_TEX2DBIAS #defined either.
|
|
// b.) "Tile flat twice" requires drawing two full tiles without border padding
|
|
// to the resized mask FBO, and it's incompatible with same-pass curvature.
|
|
// (Same-pass curvature isn't used but could be in the future...maybe.)
|
|
// c.) "Fix discontinuities" requires derivatives and drawing one tile with
|
|
// border padding to the resized mask FBO, but it works with same-pass
|
|
// curvature. It's disabled without DRIVERS_ALLOW_DERIVATIVES #defined.
|
|
// Precedence: a, then, b, then c (if multiple strategies are #defined).
|
|
#define ANISOTROPIC_TILING_COMPAT_TEX2DLOD // 129.7 FPS, 4x, flat; 101.8 at fullscreen
|
|
#define ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE // 128.1 FPS, 4x, flat; 101.5 at fullscreen
|
|
#define ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES // 124.4 FPS, 4x, flat; 97.4 at fullscreen
|
|
// Also, manually resampling the phosphor mask is slightly blurrier with
|
|
// anisotropic filtering. (Resampling with mipmapping is even worse: It
|
|
// creates artifacts, but only with the fully bloomed shader.) The difference
|
|
// is subtle with small triads, but you can fix it for a small cost.
|
|
//#define ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD
|
|
|
|
|
|
////////////////////////////// DERIVED SETTINGS //////////////////////////////
|
|
|
|
// Intel HD 4000 GPU's can't handle manual mask resizing (for now), setting the
|
|
// geometry mode at runtime, or a 4x4 true Gaussian resize. Disable
|
|
// incompatible settings ASAP. (INTEGRATED_GRAPHICS_COMPATIBILITY_MODE may be
|
|
// #defined by either user-settings.h or a wrapper .cg that #includes the
|
|
// current .cg pass.)
|
|
#ifdef INTEGRATED_GRAPHICS_COMPATIBILITY_MODE
|
|
#ifdef PHOSPHOR_MASK_MANUALLY_RESIZE
|
|
#undef PHOSPHOR_MASK_MANUALLY_RESIZE
|
|
#endif
|
|
#ifdef RUNTIME_GEOMETRY_MODE
|
|
#undef RUNTIME_GEOMETRY_MODE
|
|
#endif
|
|
// Mode 2 (4x4 Gaussian resize) won't work, and mode 1 (3x3 blur) is
|
|
// inferior in most cases, so replace 2.0 with 0.0:
|
|
static const float bloom_approx_filter =
|
|
bloom_approx_filter_static > 1.5 ? 0.0 : bloom_approx_filter_static;
|
|
#else
|
|
static const float bloom_approx_filter = bloom_approx_filter_static;
|
|
#endif
|
|
|
|
// Disable slow runtime paths if static parameters are used. Most of these
|
|
// won't be a problem anyway once the params are disabled, but some will.
|
|
#ifndef RUNTIME_SHADER_PARAMS_ENABLE
|
|
#ifdef RUNTIME_PHOSPHOR_BLOOM_SIGMA
|
|
#undef RUNTIME_PHOSPHOR_BLOOM_SIGMA
|
|
#endif
|
|
#ifdef RUNTIME_ANTIALIAS_WEIGHTS
|
|
#undef RUNTIME_ANTIALIAS_WEIGHTS
|
|
#endif
|
|
#ifdef RUNTIME_ANTIALIAS_SUBPIXEL_OFFSETS
|
|
#undef RUNTIME_ANTIALIAS_SUBPIXEL_OFFSETS
|
|
#endif
|
|
#ifdef RUNTIME_SCANLINES_HORIZ_FILTER_COLORSPACE
|
|
#undef RUNTIME_SCANLINES_HORIZ_FILTER_COLORSPACE
|
|
#endif
|
|
#ifdef RUNTIME_GEOMETRY_TILT
|
|
#undef RUNTIME_GEOMETRY_TILT
|
|
#endif
|
|
#ifdef RUNTIME_GEOMETRY_MODE
|
|
#undef RUNTIME_GEOMETRY_MODE
|
|
#endif
|
|
#ifdef FORCE_RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT
|
|
#undef FORCE_RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT
|
|
#endif
|
|
#endif
|
|
|
|
// Make tex2Dbias a backup for tex2Dlod for wider compatibility.
|
|
#ifdef ANISOTROPIC_TILING_COMPAT_TEX2DLOD
|
|
#define ANISOTROPIC_TILING_COMPAT_TEX2DBIAS
|
|
#endif
|
|
#ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD
|
|
#define ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS
|
|
#endif
|
|
// Rule out unavailable anisotropic compatibility strategies:
|
|
#ifndef DRIVERS_ALLOW_DERIVATIVES
|
|
#ifdef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
|
|
#undef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
|
|
#endif
|
|
#endif
|
|
#ifndef DRIVERS_ALLOW_TEX2DLOD
|
|
#ifdef ANISOTROPIC_TILING_COMPAT_TEX2DLOD
|
|
#undef ANISOTROPIC_TILING_COMPAT_TEX2DLOD
|
|
#endif
|
|
#ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD
|
|
#undef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD
|
|
#endif
|
|
#ifdef ANTIALIAS_DISABLE_ANISOTROPIC
|
|
#undef ANTIALIAS_DISABLE_ANISOTROPIC
|
|
#endif
|
|
#endif
|
|
#ifndef DRIVERS_ALLOW_TEX2DBIAS
|
|
#ifdef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS
|
|
#undef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS
|
|
#endif
|
|
#ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS
|
|
#undef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS
|
|
#endif
|
|
#endif
|
|
// Prioritize anisotropic tiling compatibility strategies by performance and
|
|
// disable unused strategies. This concentrates all the nesting in one place.
|
|
#ifdef ANISOTROPIC_TILING_COMPAT_TEX2DLOD
|
|
#ifdef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS
|
|
#undef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS
|
|
#endif
|
|
#ifdef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE
|
|
#undef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE
|
|
#endif
|
|
#ifdef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
|
|
#undef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
|
|
#endif
|
|
#else
|
|
#ifdef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS
|
|
#ifdef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE
|
|
#undef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE
|
|
#endif
|
|
#ifdef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
|
|
#undef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
|
|
#endif
|
|
#else
|
|
// ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE is only compatible with
|
|
// flat texture coords in the same pass, but that's all we use.
|
|
#ifdef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE
|
|
#ifdef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
|
|
#undef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
|
|
#endif
|
|
#endif
|
|
#endif
|
|
#endif
|
|
// The tex2Dlod and tex2Dbias strategies share a lot in common, and we can
|
|
// reduce some #ifdef nesting in the next section by essentially OR'ing them:
|
|
#ifdef ANISOTROPIC_TILING_COMPAT_TEX2DLOD
|
|
#define ANISOTROPIC_TILING_COMPAT_TEX2DLOD_FAMILY
|
|
#endif
|
|
#ifdef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS
|
|
#define ANISOTROPIC_TILING_COMPAT_TEX2DLOD_FAMILY
|
|
#endif
|
|
// Prioritize anisotropic resampling compatibility strategies the same way:
|
|
#ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD
|
|
#ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS
|
|
#undef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS
|
|
#endif
|
|
#endif
|
|
|
|
|
|
/////////////////////// DERIVED PHOSPHOR MASK CONSTANTS //////////////////////
|
|
|
|
// If we can use the large mipmapped LUT without mipmapping artifacts, we
|
|
// should: It gives us more options for using fewer samples.
|
|
#ifdef DRIVERS_ALLOW_TEX2DLOD
|
|
#ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD
|
|
// TODO: Take advantage of this!
|
|
#define PHOSPHOR_MASK_RESIZE_MIPMAPPED_LUT
|
|
static const float2 mask_resize_src_lut_size = mask_texture_large_size;
|
|
#else
|
|
static const float2 mask_resize_src_lut_size = mask_texture_small_size;
|
|
#endif
|
|
#else
|
|
static const float2 mask_resize_src_lut_size = mask_texture_small_size;
|
|
#endif
|
|
|
|
|
|
// tex2D's sampler2D parameter MUST be a uniform global, a uniform input to
|
|
// main_fragment, or a static alias of one of the above. This makes it hard
|
|
// to select the phosphor mask at runtime: We can't even assign to a uniform
|
|
// global in the vertex shader or select a sampler2D in the vertex shader and
|
|
// pass it to the fragment shader (even with explicit TEXUNIT# bindings),
|
|
// because it just gives us the input texture or a black screen. However, we
|
|
// can get around these limitations by calling tex2D three times with different
|
|
// uniform samplers (or resizing the phosphor mask three times altogether).
|
|
// With dynamic branches, we can process only one of these branches on top of
|
|
// quickly discarding fragments we don't need (cgc seems able to overcome
|
|
// limigations around dependent texture fetches inside of branches). Without
|
|
// dynamic branches, we have to process every branch for every fragment...which
|
|
// is slower. Runtime sampling mode selection is slower without dynamic
|
|
// branches as well. Let the user's static #defines decide if it's worth it.
|
|
#ifdef DRIVERS_ALLOW_DYNAMIC_BRANCHES
|
|
#define RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT
|
|
#else
|
|
#ifdef FORCE_RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT
|
|
#define RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT
|
|
#endif
|
|
#endif
|
|
|
|
// We need to render some minimum number of tiles in the resize passes.
|
|
// We need at least 1.0 just to repeat a single tile, and we need extra
|
|
// padding beyond that for anisotropic filtering, discontinuitity fixing,
|
|
// antialiasing, same-pass curvature (not currently used), etc. First
|
|
// determine how many border texels and tiles we need, based on how the result
|
|
// will be sampled:
|
|
#ifdef GEOMETRY_EARLY
|
|
static const float max_subpixel_offset = aa_subpixel_r_offset_static.x;
|
|
// Most antialiasing filters have a base radius of 4.0 pixels:
|
|
static const float max_aa_base_pixel_border = 4.0 +
|
|
max_subpixel_offset;
|
|
#else
|
|
static const float max_aa_base_pixel_border = 0.0;
|
|
#endif
|
|
// Anisotropic filtering adds about 0.5 to the pixel border:
|
|
#ifndef ANISOTROPIC_TILING_COMPAT_TEX2DLOD_FAMILY
|
|
static const float max_aniso_pixel_border = max_aa_base_pixel_border + 0.5;
|
|
#else
|
|
static const float max_aniso_pixel_border = max_aa_base_pixel_border;
|
|
#endif
|
|
// Fixing discontinuities adds 1.0 more to the pixel border:
|
|
#ifdef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
|
|
static const float max_tiled_pixel_border = max_aniso_pixel_border + 1.0;
|
|
#else
|
|
static const float max_tiled_pixel_border = max_aniso_pixel_border;
|
|
#endif
|
|
// Convert the pixel border to an integer texel border. Assume same-pass
|
|
// curvature about triples the texel frequency:
|
|
#ifdef GEOMETRY_EARLY
|
|
static const float max_mask_texel_border =
|
|
macro_ceil(max_tiled_pixel_border * 3.0);
|
|
#else
|
|
static const float max_mask_texel_border = macro_ceil(max_tiled_pixel_border);
|
|
#endif
|
|
// Convert the texel border to a tile border using worst-case assumptions:
|
|
static const float max_mask_tile_border = max_mask_texel_border/
|
|
(mask_min_allowed_triad_size * mask_triads_per_tile);
|
|
|
|
// Finally, set the number of resized tiles to render to MASK_RESIZE, and set
|
|
// the starting texel (inside borders) for sampling it.
|
|
#ifndef GEOMETRY_EARLY
|
|
#ifdef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE
|
|
// Special case: Render two tiles without borders. Anisotropic
|
|
// filtering doesn't seem to be a problem here.
|
|
static const float mask_resize_num_tiles = 1.0 + 1.0;
|
|
static const float mask_start_texels = 0.0;
|
|
#else
|
|
static const float mask_resize_num_tiles = 1.0 +
|
|
2.0 * max_mask_tile_border;
|
|
static const float mask_start_texels = max_mask_texel_border;
|
|
#endif
|
|
#else
|
|
static const float mask_resize_num_tiles = 1.0 + 2.0*max_mask_tile_border;
|
|
static const float mask_start_texels = max_mask_texel_border;
|
|
#endif
|
|
|
|
// We have to fit mask_resize_num_tiles into an FBO with a viewport scale of
|
|
// mask_resize_viewport_scale. This limits the maximum final triad size.
|
|
// Estimate the minimum number of triads we can split the screen into in each
|
|
// dimension (we'll be as correct as mask_resize_viewport_scale is):
|
|
static const float mask_resize_num_triads =
|
|
mask_resize_num_tiles * mask_triads_per_tile;
|
|
static const float2 min_allowed_viewport_triads =
|
|
mask_resize_num_triads.xx / mask_resize_viewport_scale;
|
|
|
|
#endif // DERIVED_SETTINGS_AND_CONSTANTS_H
|
|
|