#ifndef DERIVED_SETTINGS_AND_CONSTANTS_H #define DERIVED_SETTINGS_AND_CONSTANTS_H ///////////////////////////// GPL LICENSE NOTICE ///////////////////////////// // crt-royale: A full-featured CRT shader, with cheese. // Copyright (C) 2014 TroggleMonkey // // This program is free software; you can redistribute it and/or modify it // under the terms of the GNU General Public License as published by the Free // Software Foundation; either version 2 of the License, or any later version. // // This program is distributed in the hope that it will be useful, but WITHOUT // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for // more details. // // You should have received a copy of the GNU General Public License along with // this program; if not, write to the Free Software Foundation, Inc., 59 Temple // Place, Suite 330, Boston, MA 02111-1307 USA ///////////////////////////////// DESCRIPTION //////////////////////////////// // These macros and constants can be used across the whole codebase. // Unlike the values in user-settings.cgh, end users shouldn't modify these. ////////////////////////////////// INCLUDES ////////////////////////////////// #include "user-settings.fxh" #include "user-cgp-constants.fxh" /////////////////////////////// FIXED SETTINGS /////////////////////////////// // Avoid dividing by zero; using a macro overloads for float, float2, etc.: //#define FIX_ZERO(c) (max(abs(c), 0.0000152587890625)) // 2^-16 // Ensure the first pass decodes CRT gamma and the last encodes LCD gamma. #ifndef SIMULATE_CRT_ON_LCD #define SIMULATE_CRT_ON_LCD #endif // Manually tiling a manually resized texture creates texture coord derivative // discontinuities and confuses anisotropic filtering, causing discolored tile // seams in the phosphor mask. Workarounds: // a.) Using tex2Dlod disables anisotropic filtering for tiled masks. It's // downgraded to tex2Dbias without DRIVERS_ALLOW_TEX2DLOD #defined and // disabled without DRIVERS_ALLOW_TEX2DBIAS #defined either. // b.) "Tile flat twice" requires drawing two full tiles without border padding // to the resized mask FBO, and it's incompatible with same-pass curvature. // (Same-pass curvature isn't used but could be in the future...maybe.) // c.) "Fix discontinuities" requires derivatives and drawing one tile with // border padding to the resized mask FBO, but it works with same-pass // curvature. It's disabled without DRIVERS_ALLOW_DERIVATIVES #defined. // Precedence: a, then, b, then c (if multiple strategies are #defined). #define ANISOTROPIC_TILING_COMPAT_TEX2DLOD // 129.7 FPS, 4x, flat; 101.8 at fullscreen #define ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE // 128.1 FPS, 4x, flat; 101.5 at fullscreen #define ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES // 124.4 FPS, 4x, flat; 97.4 at fullscreen // Also, manually resampling the phosphor mask is slightly blurrier with // anisotropic filtering. (Resampling with mipmapping is even worse: It // creates artifacts, but only with the fully bloomed shader.) The difference // is subtle with small triads, but you can fix it for a small cost. //#define ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD ////////////////////////////// DERIVED SETTINGS ////////////////////////////// // Intel HD 4000 GPU's can't handle manual mask resizing (for now), setting the // geometry mode at runtime, or a 4x4 true Gaussian resize. Disable // incompatible settings ASAP. (INTEGRATED_GRAPHICS_COMPATIBILITY_MODE may be // #defined by either user-settings.h or a wrapper .cg that #includes the // current .cg pass.) #ifdef INTEGRATED_GRAPHICS_COMPATIBILITY_MODE #ifdef PHOSPHOR_MASK_MANUALLY_RESIZE #undef PHOSPHOR_MASK_MANUALLY_RESIZE #endif #ifdef RUNTIME_GEOMETRY_MODE #undef RUNTIME_GEOMETRY_MODE #endif // Mode 2 (4x4 Gaussian resize) won't work, and mode 1 (3x3 blur) is // inferior in most cases, so replace 2.0 with 0.0: static const float bloom_approx_filter = bloom_approx_filter_static > 1.5 ? 0.0 : bloom_approx_filter_static; #else static const float bloom_approx_filter = bloom_approx_filter_static; #endif // Disable slow runtime paths if static parameters are used. Most of these // won't be a problem anyway once the params are disabled, but some will. #ifndef RUNTIME_SHADER_PARAMS_ENABLE #ifdef RUNTIME_PHOSPHOR_BLOOM_SIGMA #undef RUNTIME_PHOSPHOR_BLOOM_SIGMA #endif #ifdef RUNTIME_ANTIALIAS_WEIGHTS #undef RUNTIME_ANTIALIAS_WEIGHTS #endif #ifdef RUNTIME_ANTIALIAS_SUBPIXEL_OFFSETS #undef RUNTIME_ANTIALIAS_SUBPIXEL_OFFSETS #endif #ifdef RUNTIME_SCANLINES_HORIZ_FILTER_COLORSPACE #undef RUNTIME_SCANLINES_HORIZ_FILTER_COLORSPACE #endif #ifdef RUNTIME_GEOMETRY_TILT #undef RUNTIME_GEOMETRY_TILT #endif #ifdef RUNTIME_GEOMETRY_MODE #undef RUNTIME_GEOMETRY_MODE #endif #ifdef FORCE_RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT #undef FORCE_RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT #endif #endif // Make tex2Dbias a backup for tex2Dlod for wider compatibility. #ifdef ANISOTROPIC_TILING_COMPAT_TEX2DLOD #define ANISOTROPIC_TILING_COMPAT_TEX2DBIAS #endif #ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD #define ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS #endif // Rule out unavailable anisotropic compatibility strategies: #ifndef DRIVERS_ALLOW_DERIVATIVES #ifdef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES #undef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES #endif #endif #ifndef DRIVERS_ALLOW_TEX2DLOD #ifdef ANISOTROPIC_TILING_COMPAT_TEX2DLOD #undef ANISOTROPIC_TILING_COMPAT_TEX2DLOD #endif #ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD #undef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD #endif #ifdef ANTIALIAS_DISABLE_ANISOTROPIC #undef ANTIALIAS_DISABLE_ANISOTROPIC #endif #endif #ifndef DRIVERS_ALLOW_TEX2DBIAS #ifdef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS #undef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS #endif #ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS #undef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS #endif #endif // Prioritize anisotropic tiling compatibility strategies by performance and // disable unused strategies. This concentrates all the nesting in one place. #ifdef ANISOTROPIC_TILING_COMPAT_TEX2DLOD #ifdef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS #undef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS #endif #ifdef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE #undef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE #endif #ifdef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES #undef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES #endif #else #ifdef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS #ifdef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE #undef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE #endif #ifdef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES #undef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES #endif #else // ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE is only compatible with // flat texture coords in the same pass, but that's all we use. #ifdef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE #ifdef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES #undef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES #endif #endif #endif #endif // The tex2Dlod and tex2Dbias strategies share a lot in common, and we can // reduce some #ifdef nesting in the next section by essentially OR'ing them: #ifdef ANISOTROPIC_TILING_COMPAT_TEX2DLOD #define ANISOTROPIC_TILING_COMPAT_TEX2DLOD_FAMILY #endif #ifdef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS #define ANISOTROPIC_TILING_COMPAT_TEX2DLOD_FAMILY #endif // Prioritize anisotropic resampling compatibility strategies the same way: #ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD #ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS #undef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS #endif #endif /////////////////////// DERIVED PHOSPHOR MASK CONSTANTS ////////////////////// // If we can use the large mipmapped LUT without mipmapping artifacts, we // should: It gives us more options for using fewer samples. #ifdef DRIVERS_ALLOW_TEX2DLOD #ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD // TODO: Take advantage of this! #define PHOSPHOR_MASK_RESIZE_MIPMAPPED_LUT static const float2 mask_resize_src_lut_size = mask_texture_large_size; #else static const float2 mask_resize_src_lut_size = mask_texture_small_size; #endif #else static const float2 mask_resize_src_lut_size = mask_texture_small_size; #endif // tex2D's sampler2D parameter MUST be a uniform global, a uniform input to // main_fragment, or a static alias of one of the above. This makes it hard // to select the phosphor mask at runtime: We can't even assign to a uniform // global in the vertex shader or select a sampler2D in the vertex shader and // pass it to the fragment shader (even with explicit TEXUNIT# bindings), // because it just gives us the input texture or a black screen. However, we // can get around these limitations by calling tex2D three times with different // uniform samplers (or resizing the phosphor mask three times altogether). // With dynamic branches, we can process only one of these branches on top of // quickly discarding fragments we don't need (cgc seems able to overcome // limigations around dependent texture fetches inside of branches). Without // dynamic branches, we have to process every branch for every fragment...which // is slower. Runtime sampling mode selection is slower without dynamic // branches as well. Let the user's static #defines decide if it's worth it. #ifdef DRIVERS_ALLOW_DYNAMIC_BRANCHES #define RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT #else #ifdef FORCE_RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT #define RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT #endif #endif // We need to render some minimum number of tiles in the resize passes. // We need at least 1.0 just to repeat a single tile, and we need extra // padding beyond that for anisotropic filtering, discontinuitity fixing, // antialiasing, same-pass curvature (not currently used), etc. First // determine how many border texels and tiles we need, based on how the result // will be sampled: #ifdef GEOMETRY_EARLY static const float max_subpixel_offset = aa_subpixel_r_offset_static.x; // Most antialiasing filters have a base radius of 4.0 pixels: static const float max_aa_base_pixel_border = 4.0 + max_subpixel_offset; #else static const float max_aa_base_pixel_border = 0.0; #endif // Anisotropic filtering adds about 0.5 to the pixel border: #ifndef ANISOTROPIC_TILING_COMPAT_TEX2DLOD_FAMILY static const float max_aniso_pixel_border = max_aa_base_pixel_border + 0.5; #else static const float max_aniso_pixel_border = max_aa_base_pixel_border; #endif // Fixing discontinuities adds 1.0 more to the pixel border: #ifdef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES static const float max_tiled_pixel_border = max_aniso_pixel_border + 1.0; #else static const float max_tiled_pixel_border = max_aniso_pixel_border; #endif // Convert the pixel border to an integer texel border. Assume same-pass // curvature about triples the texel frequency: #ifdef GEOMETRY_EARLY static const float max_mask_texel_border = macro_ceil(max_tiled_pixel_border * 3.0); #else static const float max_mask_texel_border = macro_ceil(max_tiled_pixel_border); #endif // Convert the texel border to a tile border using worst-case assumptions: static const float max_mask_tile_border = max_mask_texel_border/ (mask_min_allowed_triad_size * mask_triads_per_tile); // Finally, set the number of resized tiles to render to MASK_RESIZE, and set // the starting texel (inside borders) for sampling it. #ifndef GEOMETRY_EARLY #ifdef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE // Special case: Render two tiles without borders. Anisotropic // filtering doesn't seem to be a problem here. static const float mask_resize_num_tiles = 1.0 + 1.0; static const float mask_start_texels = 0.0; #else static const float mask_resize_num_tiles = 1.0 + 2.0 * max_mask_tile_border; static const float mask_start_texels = max_mask_texel_border; #endif #else static const float mask_resize_num_tiles = 1.0 + 2.0*max_mask_tile_border; static const float mask_start_texels = max_mask_texel_border; #endif // We have to fit mask_resize_num_tiles into an FBO with a viewport scale of // mask_resize_viewport_scale. This limits the maximum final triad size. // Estimate the minimum number of triads we can split the screen into in each // dimension (we'll be as correct as mask_resize_viewport_scale is): static const float mask_resize_num_triads = mask_resize_num_tiles * mask_triads_per_tile; static const float2 min_allowed_viewport_triads = mask_resize_num_triads.xx / mask_resize_viewport_scale; #endif // DERIVED_SETTINGS_AND_CONSTANTS_H