Data: Remove license-incompatible shaders

This commit is contained in:
Stenzek 2024-09-01 20:01:34 +10:00
parent 6d3b177714
commit e78539d7f9
No known key found for this signature in database
90 changed files with 47 additions and 30555 deletions

View file

@ -1,238 +0,0 @@
/*===============================================================================*\
|######################## [Dolphin FX Suite 2.20] #######################|
|########################## By Asmodean ##########################|
|| ||
|| This program is free software; you can redistribute it and/or ||
|| modify it under the terms of the GNU General Public License ||
|| as published by the Free Software Foundation; either version 2 ||
|| of the License, or (at your option) any later version. ||
|| ||
|| This program is distributed in the hope that it will be useful, ||
|| but WITHOUT ANY WARRANTY; without even the implied warranty of ||
|| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ||
|| GNU General Public License for more details. (C)2015 ||
|| ||
|#################################################################################|
\*===============================================================================*/
// Sourced from https://raw.githubusercontent.com/Asmodean-/dolphin/89d640cd557189bb5f921fc219150c74c39bdc55/Data/Sys/Shaders/DolphinFX.glsl with modifications.
/*
[configuration]
[OptionRangeInteger]
GUIName = BloomType
OptionName = A_BLOOM_TYPE
MinValue = 0
MaxValue = 5
StepAmount = 1
DefaultValue = 0
[OptionRangeFloat]
GUIName = BloomStrength
OptionName = B_BLOOM_STRENGTH
MinValue = 0.000
MaxValue = 1.000
StepAmount = 0.001
DefaultValue = 0.220
[OptionRangeFloat]
GUIName = BlendStrength
OptionName = C_BLEND_STRENGTH
MinValue = 0.000
MaxValue = 1.200
StepAmount = 0.010
DefaultValue = 1.000
[OptionRangeFloat]
GUIName = BloomDefocus
OptionName = D_B_DEFOCUS
MinValue = 1.000
MaxValue = 4.000
StepAmount = 0.100
DefaultValue = 2.000
[OptionRangeFloat]
GUIName = BloomWidth
OptionName = D_BLOOM_WIDTH
MinValue = 1.000
MaxValue = 8.000
StepAmount = 0.100
DefaultValue = 3.200
[OptionRangeFloat]
GUIName = BloomReds
OptionName = E_BLOOM_REDS
MinValue = 0.000
MaxValue = 0.500
StepAmount = 0.001
DefaultValue = 0.020
[OptionRangeFloat]
GUIName = BloomGreens
OptionName = F_BLOOM_GREENS
MinValue = 0.000
MaxValue = 0.500
StepAmount = 0.001
DefaultValue = 0.010
[OptionRangeFloat]
GUIName = BloomBlues
OptionName = G_BLOOM_BLUES
MinValue = 0.000
MaxValue = 0.500
StepAmount = 0.001
DefaultValue = 0.010
[/configuration]
*/
//Average relative luminance
CONSTANT float3 lumCoeff = float3(0.2126729, 0.7151522, 0.0721750);
float AvgLuminance(float3 color)
{
return sqrt(
(color.x * color.x * lumCoeff.x) +
(color.y * color.y * lumCoeff.y) +
(color.z * color.z * lumCoeff.z));
}
float smootherstep(float a, float b, float x)
{
x = saturate((x - a) / (b - a));
return x*x*x*(x*(x * 6.0 - 15.0) + 10.0);
}
float3 BlendAddLight(float3 bloom, float3 blend)
{
return saturate(bloom + blend);
}
float3 BlendScreen(float3 bloom, float3 blend)
{
return (bloom + blend) - (bloom * blend);
}
float3 BlendAddGlow(float3 bloom, float3 blend)
{
float glow = smootherstep(0.0, 1.0, AvgLuminance(bloom));
return lerp(saturate(bloom + blend),
(blend + blend) - (blend * blend), glow);
}
float3 BlendGlow(float3 bloom, float3 blend)
{
float glow = smootherstep(0.0, 1.0, AvgLuminance(bloom));
return lerp((bloom + blend) - (bloom * blend),
(blend + blend) - (blend * blend), glow);
}
float3 BlendLuma(float3 bloom, float3 blend)
{
float lumavg = smootherstep(0.0, 1.0, AvgLuminance(bloom + blend));
return lerp((bloom * blend), (1.0 -
((1.0 - bloom) * (1.0 - blend))), lumavg);
}
float3 BlendOverlay(float3 bloom, float3 blend)
{
float3 overlay = step(0.5, bloom);
return lerp((bloom * blend * 2.0), (1.0 - (2.0 *
(1.0 - bloom) * (1.0 - blend))), overlay);
}
float3 BloomCorrection(float3 color)
{
float3 bloom = color;
bloom.r = 2.0 / 3.0 * (1.0 - (bloom.r * bloom.r));
bloom.g = 2.0 / 3.0 * (1.0 - (bloom.g * bloom.g));
bloom.b = 2.0 / 3.0 * (1.0 - (bloom.b * bloom.b));
bloom.r = saturate(color.r + GetOption(E_BLOOM_REDS) * bloom.r);
bloom.g = saturate(color.g + GetOption(F_BLOOM_GREENS) * bloom.g);
bloom.b = saturate(color.b + GetOption(G_BLOOM_BLUES) * bloom.b);
color = saturate(bloom);
return color;
}
float4 PyramidFilter(float2 texcoord, float2 width)
{
float4 X = SampleLocation(texcoord + float2(0.5, 0.5) * width);
float4 Y = SampleLocation(texcoord + float2(-0.5, 0.5) * width);
float4 Z = SampleLocation(texcoord + float2(0.5, -0.5) * width);
float4 W = SampleLocation(texcoord + float2(-0.5, -0.5) * width);
return (X + Y + Z + W) / 4.0;
}
float3 Blend(float3 bloom, float3 blend)
{
if (GetOption(A_BLOOM_TYPE) == 0) { return BlendGlow(bloom, blend); }
else if (GetOption(A_BLOOM_TYPE) == 1) { return BlendAddGlow(bloom, blend); }
else if (GetOption(A_BLOOM_TYPE) == 2) { return BlendAddLight(bloom, blend); }
else if (GetOption(A_BLOOM_TYPE) == 3) { return BlendScreen(bloom, blend); }
else if (GetOption(A_BLOOM_TYPE) == 4) { return BlendLuma(bloom, blend); }
else /*if (GetOption(A_BLOOM_TYPE) == 5) */ { return BlendOverlay(bloom, blend); }
}
void main()
{
float4 color = Sample();
float2 texcoord = GetCoordinates();
float2 pixelSize = GetInvResolution();
float anflare = 4.0;
float2 defocus = float2(GetOption(D_B_DEFOCUS), GetOption(D_B_DEFOCUS));
float4 bloom = PyramidFilter(texcoord, pixelSize * defocus);
float2 dx = float2(pixelSize.x * GetOption(D_BLOOM_WIDTH), 0.0);
float2 dy = float2(0.0, pixelSize.y * GetOption(D_BLOOM_WIDTH));
float2 mdx = mul(dx, 2.0);
float2 mdy = mul(dy, 2.0);
float4 blend = bloom * 0.22520613262190495;
blend += 0.002589001911021066 * SampleLocation(texcoord - mdx + mdy);
blend += 0.010778807494659370 * SampleLocation(texcoord - dx + mdy);
blend += 0.024146616900339800 * SampleLocation(texcoord + mdy);
blend += 0.010778807494659370 * SampleLocation(texcoord + dx + mdy);
blend += 0.002589001911021066 * SampleLocation(texcoord + mdx + mdy);
blend += 0.010778807494659370 * SampleLocation(texcoord - mdx + dy);
blend += 0.044875475183061630 * SampleLocation(texcoord - dx + dy);
blend += 0.100529757860782610 * SampleLocation(texcoord + dy);
blend += 0.044875475183061630 * SampleLocation(texcoord + dx + dy);
blend += 0.010778807494659370 * SampleLocation(texcoord + mdx + dy);
blend += 0.024146616900339800 * SampleLocation(texcoord - mdx);
blend += 0.100529757860782610 * SampleLocation(texcoord - dx);
blend += 0.100529757860782610 * SampleLocation(texcoord + dx);
blend += 0.024146616900339800 * SampleLocation(texcoord + mdx);
blend += 0.010778807494659370 * SampleLocation(texcoord - mdx - dy);
blend += 0.044875475183061630 * SampleLocation(texcoord - dx - dy);
blend += 0.100529757860782610 * SampleLocation(texcoord - dy);
blend += 0.044875475183061630 * SampleLocation(texcoord + dx - dy);
blend += 0.010778807494659370 * SampleLocation(texcoord + mdx - dy);
blend += 0.002589001911021066 * SampleLocation(texcoord - mdx - mdy);
blend += 0.010778807494659370 * SampleLocation(texcoord - dx - mdy);
blend += 0.024146616900339800 * SampleLocation(texcoord - mdy);
blend += 0.010778807494659370 * SampleLocation(texcoord + dx - mdy);
blend += 0.002589001911021066 * SampleLocation(texcoord + mdx - mdy);
blend = lerp(color, blend, GetOption(C_BLEND_STRENGTH));
bloom.xyz = Blend(bloom.xyz, blend.xyz);
bloom.xyz = BloomCorrection(bloom.xyz);
color.a = AvgLuminance(color.xyz);
bloom.a = AvgLuminance(bloom.xyz);
bloom.a *= anflare;
SetOutput(lerp(color, bloom, GetOption(B_BLOOM_STRENGTH)));
}

View file

@ -1,174 +0,0 @@
/*===============================================================================*\
|######################## [Dolphin FX Suite 2.20] #######################|
|########################## By Asmodean ##########################|
|| ||
|| This program is free software; you can redistribute it and/or ||
|| modify it under the terms of the GNU General Public License ||
|| as published by the Free Software Foundation; either version 2 ||
|| of the License, or (at your option) any later version. ||
|| ||
|| This program is distributed in the hope that it will be useful, ||
|| but WITHOUT ANY WARRANTY; without even the implied warranty of ||
|| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ||
|| GNU General Public License for more details. (C)2015 ||
|| ||
|#################################################################################|
\*===============================================================================*/
// Sourced from https://raw.githubusercontent.com/Asmodean-/dolphin/89d640cd557189bb5f921fc219150c74c39bdc55/Data/Sys/Shaders/DolphinFX.glsl with modifications.
/*
[configuration]
[OptionRangeFloat]
GUIName = EdgeStrength
OptionName = A_EDGE_STRENGTH
MinValue = 0.00
MaxValue = 4.00
StepAmount = 0.01
DefaultValue = 1.00
[OptionRangeFloat]
GUIName = EdgeFilter
OptionName = B_EDGE_FILTER
MinValue = 0.25
MaxValue = 1.00
StepAmount = 0.01
DefaultValue = 0.60
[OptionRangeFloat]
GUIName = EdgeThickness
OptionName = C_EDGE_THICKNESS
MinValue = 0.25
MaxValue = 2.00
StepAmount = 0.01
DefaultValue = 1.00
[OptionRangeInteger]
GUIName = PaletteType
OptionName = D_PALETTE_TYPE
MinValue = 0
MaxValue = 2
StepAmount = 1
DefaultValue = 1
[OptionRangeInteger]
GUIName = UseYuvLuma
OptionName = E_YUV_LUMA
MinValue = 0
MaxValue = 1
StepAmount = 1
DefaultValue = 0
[OptionRangeInteger]
GUIName = ColourRounding
OptionName = G_COLOR_ROUNDING
MinValue = 0
MaxValue = 1
StepAmount = 1
DefaultValue = 1
[/configuration]
*/
//Average relative luminance
CONSTANT float3 lumCoeff = float3(0.2126729, 0.7151522, 0.0721750);
float AvgLuminance(float3 color)
{
return sqrt(
(color.x * color.x * lumCoeff.x) +
(color.y * color.y * lumCoeff.y) +
(color.z * color.z * lumCoeff.z));
}
float3 YUVtoRGB(float3 YUV)
{
const float3x3 m = float3x3(
1.000, 0.000, 1.28033,
1.000,-0.21482,-0.38059,
1.000, 2.12798, 0.000 );
return mul(m, YUV);
}
float3 RGBtoYUV(float3 RGB)
{
const float3x3 m = float3x3(
0.2126, 0.7152, 0.0722,
-0.09991,-0.33609, 0.436,
0.615, -0.55861, -0.05639 );
return mul(m, RGB);
}
void main()
{
float4 color = Sample();
float2 texcoord = GetCoordinates();
float2 pixelSize = GetInvResolution();
float2 texSize = GetResolution();
float3 yuv;
float3 sum = color.rgb;
const int NUM = 9;
const float2 RoundingOffset = float2(0.25, 0.25);
const float3 thresholds = float3(9.0, 8.0, 6.0);
float lum[NUM];
float3 col[NUM];
float2 set[NUM] = BEGIN_ARRAY(float2, NUM)
float2(-0.0078125, -0.0078125),
float2(0.00, -0.0078125),
float2(0.0078125, -0.0078125),
float2(-0.0078125, 0.00),
float2(0.00, 0.00),
float2(0.0078125, 0.00),
float2(-0.0078125, 0.0078125),
float2(0.00, 0.0078125),
float2(0.0078125, 0.0078125) END_ARRAY;
for (int i = 0; i < NUM; i++)
{
col[i] = SampleLocation(texcoord + set[i] * RoundingOffset).rgb;
if (GetOption(G_COLOR_ROUNDING) == 1) {
col[i].r = round(col[i].r * thresholds.r) / thresholds.r;
col[i].g = round(col[i].g * thresholds.g) / thresholds.g;
col[i].b = round(col[i].b * thresholds.b) / thresholds.b; }
lum[i] = AvgLuminance(col[i].xyz);
yuv = RGBtoYUV(col[i]);
if (GetOption(E_YUV_LUMA) == 0)
{ yuv.r = round(yuv.r * thresholds.r) / thresholds.r; }
else
{ yuv.r = saturate(round(yuv.r * lum[i]) / thresholds.r + lum[i]); }
yuv = YUVtoRGB(yuv);
sum += yuv;
}
float3 shadedColor = (sum / NUM);
float2 pixel = float2((1.0/texSize.x) * GetOption(C_EDGE_THICKNESS),
(1.0/texSize.y) * GetOption(C_EDGE_THICKNESS));
float edgeX = dot(SampleLocation(texcoord + pixel).rgb, lumCoeff);
edgeX = dot(float4(SampleLocation(texcoord - pixel).rgb, edgeX), float4(lumCoeff, -1.0));
float edgeY = dot(SampleLocation(texcoord + float2(pixel.x, -pixel.y)).rgb, lumCoeff);
edgeY = dot(float4(SampleLocation(texcoord + float2(-pixel.x, pixel.y)).rgb, edgeY), float4(lumCoeff, -1.0));
float edge = dot(float2(edgeX, edgeY), float2(edgeX, edgeY));
if (GetOption(D_PALETTE_TYPE) == 0)
{ color.rgb = lerp(color.rgb, color.rgb + pow(edge, GetOption(B_EDGE_FILTER)) * -GetOption(A_EDGE_STRENGTH), GetOption(A_EDGE_STRENGTH)); }
else if (GetOption(D_PALETTE_TYPE) == 1)
{ color.rgb = lerp(color.rgb + pow(edge, GetOption(B_EDGE_FILTER)) * -GetOption(A_EDGE_STRENGTH), shadedColor, 0.25); }
else if (GetOption(D_PALETTE_TYPE) == 2)
{ color.rgb = lerp(shadedColor + edge * -GetOption(A_EDGE_STRENGTH), pow(edge, GetOption(B_EDGE_FILTER)) * -GetOption(A_EDGE_STRENGTH) + color.rgb, 0.50); }
color.a = AvgLuminance(color.rgb);
SetOutput(saturate(color));
}

View file

@ -1,277 +0,0 @@
// CRT Shader by EasyMode
// License: GPL
// A flat CRT shader ideally for 1080p or higher displays.
// Recommended Settings:
// Video
// - Aspect Ratio: 4:3
// - Integer Scale: Off
// Shader
// - Filter: Nearest
// - Scale: Don't Care
// Example RGB Mask Parameter Settings:
// Aperture Grille (Default)
// - Dot Width: 1
// - Dot Height: 1
// - Stagger: 0
// Lottes' Shadow Mask
// - Dot Width: 2
// - Dot Height: 1
// - Stagger: 3
/*
[configuration]
[OptionRangeFloat]
GUIName = Sharpness Horizontal
OptionName = SHARPNESS_H
MinValue = 0.0
MaxValue = 1.0
StepAmount = 0.05
DefaultValue = 0.5
[OptionRangeFloat]
GUIName = Sharpness Vertical
OptionName = SHARPNESS_V
MinValue = 0.0
MaxValue = 1.0
StepAmount = 0.05
DefaultValue = 1.0
[OptionRangeFloat]
GUIName = Mask Strength
OptionName = MASK_STRENGTH
MinValue = 0.0
MaxValue = 1.0
StepAmount = 0.01
DefaultValue = 0.3
[OptionRangeFloat]
GUIName = Mask Dot Width
OptionName = MASK_DOT_WIDTH
MinValue = 1.0
MaxValue = 100.0
StepAmount = 1.0
DefaultValue = 1.0
[OptionRangeFloat]
GUIName = Mask Dot Height
OptionName = MASK_DOT_HEIGHT
MinValue = 1.0
MaxValue = 100.0
StepAmount = 1.0
DefaultValue = 1.0
[OptionRangeFloat]
GUIName = Mask Stagger
OptionName = MASK_STAGGER
MinValue = 0.0
MaxValue = 100.0
StepAmount = 1.0
DefaultValue = 0.0
[OptionRangeFloat]
GUIName = Mask Size
OptionName = MASK_SIZE
MinValue = 1.0
MaxValue = 100.0
StepAmount = 1.0
DefaultValue = 1.0
[OptionRangeFloat]
GUIName = Scanline Strength
OptionName = SCANLINE_STRENGTH
MinValue = 0.0
MaxValue = 1.0
StepAmount = 0.05
DefaultValue = 1.0
[OptionRangeFloat]
GUIName = Scanline Beam Width Min.
OptionName = SCANLINE_BEAM_WIDTH_MIN
MinValue = 0.5
MaxValue = 5.0
StepAmount = 0.5
DefaultValue = 1.5
[OptionRangeFloat]
GUIName = Scanline Beam Width Max.
OptionName = SCANLINE_BEAM_WIDTH_MAX
MinValue = 0.5
MaxValue = 5.0
StepAmount = 0.5
DefaultValue = 1.5
[OptionRangeFloat]
GUIName = Scanline Brightness Min.
OptionName = SCANLINE_BRIGHT_MIN
MinValue = 0.0
MaxValue = 1.0
StepAmount = 0.05
DefaultValue = 0.35
[OptionRangeFloat]
GUIName = Scanline Brightness Max.
OptionName = SCANLINE_BRIGHT_MAX
MinValue = 0.0
MaxValue = 1.0
StepAmount = 0.05
DefaultValue = 0.65
[OptionRangeFloat]
GUIName = Scanline Cutoff
OptionName = SCANLINE_CUTOFF
MinValue = 1.0
MaxValue = 1000.0
StepAmount = 1.0
DefaultValue = 400.0
[OptionRangeFloat]
GUIName = Gamma Input
OptionName = GAMMA_INPUT
MinValue = 0.1
MaxValue = 5.0
StepAmount = 0.1
DefaultValue = 2.0
[OptionRangeFloat]
GUIName = Gamma Output
OptionName = GAMMA_OUTPUT
MinValue = 0.1
MaxValue = 5.0
StepAmount = 0.1
DefaultValue = 1.8
[OptionRangeFloat]
GUIName = Brightness Boost
OptionName = BRIGHT_BOOST
MinValue = 1.0
MaxValue = 2.0
StepAmount = 0.01
DefaultValue = 1.2
[OptionRangeFloat]
GUIName = Dilation
OptionName = DILATION
MinValue = 0.0
MaxValue = 1.0
StepAmount = 1.0
DefaultValue = 1.0
[/configuration]
*/
#define FIX(c) max(abs(c), 1e-5)
#define PI 3.141592653589
#define TEX2D(c) dilate(SampleLocation(c))
// Set to 0 to use linear filter and gain speed
#define ENABLE_LANCZOS 1
vec4 dilate(vec4 col)
{
vec4 x = mix(vec4(1.0), col, GetOption(DILATION));
return col * x;
}
float curve_distance(float x, float sharp)
{
/*
apply half-circle s-curve to distance for sharper (more pixelated) interpolation
single line formula for Graph Toy:
0.5 - sqrt(0.25 - (x - step(0.5, x)) * (x - step(0.5, x))) * sign(0.5 - x)
*/
float x_step = step(0.5, x);
float curve = 0.5 - sqrt(0.25 - (x - x_step) * (x - x_step)) * sign(0.5 - x);
return mix(x, curve, sharp);
}
mat4x4 get_color_matrix(vec2 co, vec2 dx)
{
return mat4x4(TEX2D(co - dx), TEX2D(co), TEX2D(co + dx), TEX2D(co + 2.0 * dx));
}
vec3 filter_lanczos(vec4 coeffs, mat4x4 color_matrix)
{
vec4 col = color_matrix * coeffs;
vec4 sample_min = min(color_matrix[1], color_matrix[2]);
vec4 sample_max = max(color_matrix[1], color_matrix[2]);
col = clamp(col, sample_min, sample_max);
return col.rgb;
}
void main()
{
vec2 vTexCoord = GetCoordinates();
vec2 nativeSize = 1.0 / GetInvNativePixelSize();
vec4 SourceSize = vec4(nativeSize, 1.0/nativeSize);
vec2 dx = vec2(SourceSize.z, 0.0);
vec2 dy = vec2(0.0, SourceSize.w);
vec2 pix_co = vTexCoord * SourceSize.xy - vec2(0.5, 0.5);
vec2 tex_co = (floor(pix_co) + vec2(0.5, 0.5)) * SourceSize.zw;
vec2 dist = fract(pix_co);
float curve_x;
vec3 col, col2;
#if ENABLE_LANCZOS
curve_x = curve_distance(dist.x, GetOption(SHARPNESS_H) * GetOption(SHARPNESS_H));
vec4 coeffs = PI * vec4(1.0 + curve_x, curve_x, 1.0 - curve_x, 2.0 - curve_x);
coeffs = FIX(coeffs);
coeffs = 2.0 * sin(coeffs) * sin(coeffs * 0.5) / (coeffs * coeffs);
coeffs /= dot(coeffs, vec4(1.0));
col = filter_lanczos(coeffs, get_color_matrix(tex_co, dx));
col2 = filter_lanczos(coeffs, get_color_matrix(tex_co + dy, dx));
#else
curve_x = curve_distance(dist.x, GetOption(SHARPNESS_H));
col = mix(TEX2D(tex_co).rgb, TEX2D(tex_co + dx).rgb, curve_x);
col2 = mix(TEX2D(tex_co + dy).rgb, TEX2D(tex_co + dx + dy).rgb, curve_x);
#endif
col = mix(col, col2, curve_distance(dist.y, GetOption(SHARPNESS_V)));
col = pow(col, vec3(GetOption(GAMMA_INPUT) / (GetOption(DILATION) + 1.0)));
float luma = dot(vec3(0.2126, 0.7152, 0.0722), col);
float bright = (max(col.r, max(col.g, col.b)) + luma) * 0.5;
float scan_bright = clamp(bright, GetOption(SCANLINE_BRIGHT_MIN), GetOption(SCANLINE_BRIGHT_MAX));
float scan_beam = clamp(bright * GetOption(SCANLINE_BEAM_WIDTH_MAX), GetOption(SCANLINE_BEAM_WIDTH_MIN), GetOption(SCANLINE_BEAM_WIDTH_MAX));
float scan_weight = 1.0 - pow(cos(vTexCoord.y * 2.0 * PI * SourceSize.y) * 0.5 + 0.5, scan_beam) * GetOption(SCANLINE_STRENGTH);
float mask = 1.0 - GetOption(MASK_STRENGTH);
vec2 mod_fac = floor(vTexCoord * GetWindowSize().xy * SourceSize.xy / (SourceSize.xy * vec2(GetOption(MASK_SIZE), GetOption(MASK_DOT_HEIGHT) * GetOption(MASK_SIZE))));
int dot_no = int(mod((mod_fac.x + mod(mod_fac.y, 2.0) * GetOption(MASK_STAGGER)) / GetOption(MASK_DOT_WIDTH), 3.0));
vec3 mask_weight;
if (dot_no == 0) mask_weight = vec3(1.0, mask, mask);
else if (dot_no == 1) mask_weight = vec3(mask, 1.0, mask);
else mask_weight = vec3(mask, mask, 1.0);
if (SourceSize.y >= GetOption(SCANLINE_CUTOFF))
scan_weight = 1.0;
col2 = col.rgb;
col *= vec3(scan_weight);
col = mix(col, col2, scan_bright);
col *= mask_weight;
col = pow(col, vec3(1.0 / GetOption(GAMMA_OUTPUT)));
SetOutput(vec4(col * GetOption(BRIGHT_BOOST), 1.0));
}

View file

@ -1,180 +0,0 @@
// zfast_crt - A very simple CRT shader.
// Copyright (C) 2017 Greg Hogan (SoltanGris42)
// edited by metallic 77.
// ported to slang by gregoricavichioli & hunterk.
// ported to dolphinfx by Hyllian.
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or (at your option)
// any later version.
/*
[configuration]
[OptionRangeFloat]
GUIName = Curvature
OptionName = Curvature
MinValue = 0.0
MaxValue = 1.0
StepAmount = 1.0
DefaultValue = 1.0
[OptionRangeFloat]
GUIName = Convergence X-Axis
OptionName = blurx
MinValue = -1.0
MaxValue = 2.0
StepAmount = 0.05
DefaultValue = 0.85
[OptionRangeFloat]
GUIName = Convergence Y-Axis
OptionName = blury
MinValue = -1.0
MaxValue = 1.0
StepAmount = 0.05
DefaultValue = -0.10
[OptionRangeFloat]
GUIName = Scanline Amount (Low)
OptionName = HIGHSCANAMOUNT1
MinValue = 0.0
MaxValue = 1.0
StepAmount = 0.05
DefaultValue = 0.4
[OptionRangeFloat]
GUIName = Scanline Amount (High)
OptionName = HIGHSCANAMOUNT2
MinValue = 0.0
MaxValue = 1.0
StepAmount = 0.05
DefaultValue = 0.3
[OptionRangeFloat]
GUIName = Mask Type
OptionName = TYPE
MinValue = 0.0
MaxValue = 1.0
StepAmount = 1.0
DefaultValue = 0.0
[OptionRangeFloat]
GUIName = Mask Effect Amount
OptionName = MASK_DARK
MinValue = 0.0
MaxValue = 1.0
StepAmount = 0.05
DefaultValue = 0.3
[OptionRangeFloat]
GUIName = Mask/Scanline Fade
OptionName = MASK_FADE
MinValue = 0.0
MaxValue = 1.0
StepAmount = 0.05
DefaultValue = 0.7
[OptionRangeFloat]
GUIName = Saturation
OptionName = sat
MinValue = 0.0
MaxValue = 3.0
StepAmount = 0.05
DefaultValue = 1.0
[OptionRangeFloat]
GUIName = Flicker
OptionName = FLICK
MinValue = 0.0
MaxValue = 50.0
StepAmount = 1.0
DefaultValue = 10.0
[/configuration]
*/
#define pi 3.14159
#define blur_y GetOption(blury)/(SourceSize.y*2.0)
#define blur_x GetOption(blurx)/(SourceSize.x*2.0)
#define iTimer (float(GetTime())*2.0)
#define flicker GetOption(FLICK)/1000.0
// Distortion of scanlines, and end of screen alpha.
vec2 Warp(vec2 pos)
{
pos = pos*2.0-1.0;
pos *= vec2(1.0 + (pos.y*pos.y)*0.03, 1.0 + (pos.x*pos.x)*0.05);
return pos*0.5 + 0.5;
}
void main()
{
vec2 vTexCoord = GetCoordinates();
vec2 texSize = 1.0 / GetInvNativePixelSize();
vec4 SourceSize = vec4(texSize, 1.0 / texSize);
float maskFade = 0.3333*GetOption(MASK_FADE);
float omega = 2.0*pi*SourceSize.y;
vec2 pos,corn;
if (GetOption(Curvature) == 1.0)
{
pos = Warp(vTexCoord.xy);
corn = min(pos,vec2(1.0)-pos); // This is used to mask the rounded
corn.x = 0.00001/corn.x; // corners later on
}
else pos = vTexCoord;
float OGL2Pos = pos.y*SourceSize.y;
float cent = floor(OGL2Pos)+0.5;
float ycoord = cent*SourceSize.w;
ycoord = mix(pos.y,ycoord,0.6);
pos = vec2(pos.x,ycoord);
vec3 sample1 = sin(iTimer)*flicker + SampleLocation(vec2(pos.x + blur_x, pos.y - blur_y)).rgb;
vec3 sample2 = 0.5*SampleLocation(pos).rgb;
vec3 sample3 = sin(iTimer)*flicker + SampleLocation(vec2(pos.x - blur_x, pos.y + blur_y)).rgb;
vec3 colour = vec3 (sample1.r*0.5 + sample2.r,
sample1.g*0.25 + sample2.g + sample3.g*0.25,
sample2.b + sample3.b*0.5);
vec3 interl = colour;
vec3 lumweight=vec3(0.22,0.71,0.07);
float lumsat = dot(colour,lumweight);
vec3 graycolour = vec3(lumsat);
colour = vec3(mix(graycolour,colour.rgb,sat));
float SCANAMOUNT = mix(GetOption(HIGHSCANAMOUNT1),GetOption(HIGHSCANAMOUNT2),max(max(colour.r,colour.g),colour.b));
if (SourceSize.y > 400.0) {
colour ;
}
else {
colour *= SCANAMOUNT * sin(fract(OGL2Pos)*3.14159)+1.0-SCANAMOUNT;
colour *= SCANAMOUNT * sin(fract(1.0-OGL2Pos)*3.14159)+1.0-SCANAMOUNT;
colour *= SCANAMOUNT * sin(fract(1.0+OGL2Pos)*3.14159)+1.0-SCANAMOUNT;
}
float steps; if (GetOption(TYPE) == 0.0) steps = 0.5; else steps = 0.3333;
float whichmask = fract(vTexCoord.x*GetWindowSize().x*steps);
float mask = 1.0 + float(whichmask < steps) * (-GetOption(MASK_DARK));
colour.rgb = mix(mask*colour, colour, dot(colour.rgb,vec3(maskFade)));
if (GetOption(Curvature) == 1.0 && corn.y < corn.x || GetOption(Curvature) == 1.0 && corn.x < 0.00001 )
colour = vec3(0.0);
SetOutput(vec4(colour.rgb, 1.0));
}

View file

@ -1,144 +0,0 @@
// Hyllian's jinc windowed-jinc 2-lobe with anti-ringing Shader
// Copyright (C) 2011-2024 Hyllian - sergiogdb@gmail.com
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
/*
[configuration]
[OptionRangeFloat]
GUIName = Window Sinc Param
OptionName = JINC2_WINDOW_SINC
MinValue = 0.0
MaxValue = 1.0
StepAmount = 0.01
DefaultValue = 0.50
[OptionRangeFloat]
GUIName = Sinc Param
OptionName = JINC2_SINC
MinValue = 0.0
MaxValue = 1.0
StepAmount = 0.01
DefaultValue = 0.88
[OptionRangeFloat]
GUIName = Anti-ringing Strength
OptionName = JINC2_AR_STRENGTH
MinValue = 0.0
MaxValue = 1.0
StepAmount = 0.1
DefaultValue = 0.5
[/configuration]
*/
#define halfpi 1.5707963267948966192313216916398
#define pi 3.1415926535897932384626433832795
#define wa (JINC2_WINDOW_SINC*pi)
#define wb (JINC2_SINC*pi)
// Calculates the distance between two points
float d(vec2 pt1, vec2 pt2)
{
vec2 v = pt2 - pt1;
return sqrt(dot(v,v));
}
vec3 min4(vec3 a, vec3 b, vec3 c, vec3 d)
{
return min(a, min(b, min(c, d)));
}
vec3 max4(vec3 a, vec3 b, vec3 c, vec3 d)
{
return max(a, max(b, max(c, d)));
}
vec4 resampler(vec4 x)
{
vec4 res;
res.x = (x.x==0.0) ? wa*wb : sin(x.x*wa)*sin(x.x*wb)/(x.x*x.x);
res.y = (x.y==0.0) ? wa*wb : sin(x.y*wa)*sin(x.y*wb)/(x.y*x.y);
res.z = (x.z==0.0) ? wa*wb : sin(x.z*wa)*sin(x.z*wb)/(x.z*x.z);
res.w = (x.w==0.0) ? wa*wb : sin(x.w*wa)*sin(x.w*wb)/(x.w*x.w);
return res;
}
void main()
{
vec2 SourceSize = 1.0 / GetInvNativePixelSize();
vec2 invSourceSize = 1.0 / SourceSize;
vec2 vTexCoord = GetCoordinates();
vec3 color;
mat4x4 weights;
vec2 dx = vec2(1.0, 0.0);
vec2 dy = vec2(0.0, 1.0);
vec2 pc = vTexCoord*SourceSize;
vec2 tc = (floor(pc-vec2(0.5,0.5))+vec2(0.5,0.5));
weights[0] = resampler(vec4(d(pc, tc -dx -dy), d(pc, tc -dy), d(pc, tc +dx -dy), d(pc, tc+2.0*dx -dy)));
weights[1] = resampler(vec4(d(pc, tc -dx ), d(pc, tc ), d(pc, tc +dx ), d(pc, tc+2.0*dx )));
weights[2] = resampler(vec4(d(pc, tc -dx +dy), d(pc, tc +dy), d(pc, tc +dx +dy), d(pc, tc+2.0*dx +dy)));
weights[3] = resampler(vec4(d(pc, tc -dx+2.0*dy), d(pc, tc +2.0*dy), d(pc, tc +dx+2.0*dy), d(pc, tc+2.0*dx+2.0*dy)));
dx = dx * invSourceSize;
dy = dy * invSourceSize;
tc = tc * invSourceSize;
// reading the texels
vec3 c00 = SampleLocation(tc -dx -dy).xyz;
vec3 c10 = SampleLocation(tc -dy).xyz;
vec3 c20 = SampleLocation(tc +dx -dy).xyz;
vec3 c30 = SampleLocation(tc+2.0*dx -dy).xyz;
vec3 c01 = SampleLocation(tc -dx ).xyz;
vec3 c11 = SampleLocation(tc ).xyz;
vec3 c21 = SampleLocation(tc +dx ).xyz;
vec3 c31 = SampleLocation(tc+2.0*dx ).xyz;
vec3 c02 = SampleLocation(tc -dx +dy).xyz;
vec3 c12 = SampleLocation(tc +dy).xyz;
vec3 c22 = SampleLocation(tc +dx +dy).xyz;
vec3 c32 = SampleLocation(tc+2.0*dx +dy).xyz;
vec3 c03 = SampleLocation(tc -dx+2.0*dy).xyz;
vec3 c13 = SampleLocation(tc +2.0*dy).xyz;
vec3 c23 = SampleLocation(tc +dx+2.0*dy).xyz;
vec3 c33 = SampleLocation(tc+2.0*dx+2.0*dy).xyz;
// Get min/max samples
vec3 min_sample = min4(c11, c21, c12, c22);
vec3 max_sample = max4(c11, c21, c12, c22);
color = mat4x3(c00, c10, c20, c30) * weights[0];
color+= mat4x3(c01, c11, c21, c31) * weights[1];
color+= mat4x3(c02, c12, c22, c32) * weights[2];
color+= mat4x3(c03, c13, c23, c33) * weights[3];
color = color/(dot(weights * vec4(1.0), vec4(1.0)));
// Anti-ringing
vec3 aux = color;
color = clamp(color, min_sample, max_sample);
color = mix(aux, color, JINC2_AR_STRENGTH);
// final sum and weight normalization
SetOutput(vec4(color, 1.0));
}

View file

@ -1,120 +0,0 @@
/*===============================================================================*\
|######################## [Dolphin FX Suite 2.20] #######################|
|########################## By Asmodean ##########################|
|| ||
|| This program is free software; you can redistribute it and/or ||
|| modify it under the terms of the GNU General Public License ||
|| as published by the Free Software Foundation; either version 2 ||
|| of the License, or (at your option) any later version. ||
|| ||
|| This program is distributed in the hope that it will be useful, ||
|| but WITHOUT ANY WARRANTY; without even the implied warranty of ||
|| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ||
|| GNU General Public License for more details. (C)2015 ||
|| ||
|#################################################################################|
\*===============================================================================*/
// Sourced from https://raw.githubusercontent.com/Asmodean-/dolphin/89d640cd557189bb5f921fc219150c74c39bdc55/Data/Sys/Shaders/DolphinFX.glsl with modifications.
/*
[configuration]
[OptionRangeInteger]
GUIName = ScanlineType
OptionName = A_SCANLINE_TYPE
MinValue = 0
MaxValue = 2
StepAmount = 1
DefaultValue = 0
[OptionRangeFloat]
GUIName = ScanlineIntensity
OptionName = B_SCANLINE_INTENSITY
MinValue = 0.15
MaxValue = 0.30
StepAmount = 0.01
DefaultValue = 0.18
[OptionRangeFloat]
GUIName = ScanlineThickness
OptionName = B_SCANLINE_THICKNESS
MinValue = 0.20
MaxValue = 0.80
StepAmount = 0.01
DefaultValue = 0.50
[OptionRangeFloat]
GUIName = ScanlineBrightness
OptionName = B_SCANLINE_BRIGHTNESS
MinValue = 0.50
MaxValue = 2.00
StepAmount = 0.01
DefaultValue = 1.10
[OptionRangeFloat]
GUIName = ScanlineSpacing
OptionName = B_SCANLINE_SPACING
MinValue = 0.10
MaxValue = 0.99
StepAmount = 0.01
DefaultValue = 0.25
[/configuration]
*/
//Average relative luminance
CONSTANT float3 lumCoeff = float3(0.2126729, 0.7151522, 0.0721750);
float AvgLuminance(float3 color)
{
return sqrt(
(color.x * color.x * lumCoeff.x) +
(color.y * color.y * lumCoeff.y) +
(color.z * color.z * lumCoeff.z));
}
void main()
{
float4 color = Sample();
float4 intensity = float4(0.0, 0.0, 0.0, 0.0);
if (GetOption(A_SCANLINE_TYPE) == 0) { //X coord scanlines
if (fract(gl_FragCoord.y * GetOption(B_SCANLINE_SPACING)) > GetOption(B_SCANLINE_THICKNESS))
{
intensity = float4(0.0, 0.0, 0.0, 0.0);
}
else
{
intensity = smoothstep(0.2, GetOption(B_SCANLINE_BRIGHTNESS), color) +
normalize(float4(color.xyz, AvgLuminance(color.xyz)));
} }
else if (GetOption(A_SCANLINE_TYPE) == 1) { //Y coord scanlines
if (fract(gl_FragCoord.x * GetOption(B_SCANLINE_SPACING)) > GetOption(B_SCANLINE_THICKNESS))
{
intensity = float4(0.0, 0.0, 0.0, 0.0);
}
else
{
intensity = smoothstep(0.2, GetOption(B_SCANLINE_BRIGHTNESS), color) +
normalize(float4(color.xyz, AvgLuminance(color.xyz)));
} }
else if (GetOption(A_SCANLINE_TYPE) == 2) { //XY coord scanlines
if (fract(gl_FragCoord.x * GetOption(B_SCANLINE_SPACING)) > GetOption(B_SCANLINE_THICKNESS) &&
fract(gl_FragCoord.y * GetOption(B_SCANLINE_SPACING)) > GetOption(B_SCANLINE_THICKNESS))
{
intensity = float4(0.0, 0.0, 0.0, 0.0);
}
else
{
intensity = smoothstep(0.2, GetOption(B_SCANLINE_BRIGHTNESS), color) +
normalize(float4(color.xyz, AvgLuminance(color.xyz)));
} }
float level = (4.0-GetCoordinates().x) * GetOption(B_SCANLINE_INTENSITY);
color = intensity * (0.5 - level) + color * 1.1;
SetOutput(saturate(color));
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,84 +0,0 @@
#include "ReShade.fxh"
// CrashGG presents
// 'XY-Pos-free'
// A super-simple shader refined from the super-fast crt-cyclon.fx, It only provides
// the functions of free pixel stretching and position translation on the XY axis.
// Suitable for users who only want to fine-tune the screen zoom and position and do not like the bundled CRT-like effects.
// Fixed some bugs in the original version, adjusted the step progress and the range.
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or (at your option)
// any later version.
uniform float zoomx <
ui_type = "drag";
ui_min = -0.3000;
ui_max = 0.3000;
ui_step = 0.0005;
ui_label = "Zoom Image X";
> = 0.0000;
uniform float zoomy <
ui_type = "drag";
ui_min = -0.3000;
ui_max = 0.3000;
ui_step = 0.0005;
ui_label = "Zoom Image Y";
> = 0.0000;
uniform float centerx <
ui_type = "drag";
ui_min = -9.99;
ui_max = 9.99;
ui_step = 0.01;
ui_label = "Image Center X";
> = 0.00;
uniform float centery <
ui_type = "drag";
ui_min = -9.99;
ui_max = 9.99;
ui_step = 0.01;
ui_label = "Image Center Y";
> = 0.00;
float2 Warp(float2 pos)
{
pos = pos*2.0-1.0;
pos *= float2(1.0+pos.y*pos.y*0, 1.0+pos.x*pos.x*0);
pos = pos*0.5+0.5;
return pos;
}
float4 CRT_CYCLON_PS(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD0) : SV_Target
{
// zoom in and center screen
float2 pos = Warp((vTexCoord*float2(1.0-zoomx,1.0-zoomy)-float2(centerx,centery)/100.0));
// Convergence
float3 res = tex2D(ReShade::BackBuffer,pos).rgb;
// Vignette
float x = 0.0;
return float4(res, 1.0);
}
technique CRT_CYCLON
{
pass PS_CRT_CYCLON
{
VertexShader = PostProcessVS;
PixelShader = CRT_CYCLON_PS;
}
}

View file

@ -1,104 +0,0 @@
#include "ReShade.fxh"
/*
Copyright (C) 2016 guest(r) - guest.r@gmail.com
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >;
sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=POINT;MinFilter=POINT;};
static const float3 dt = float3(1.0,1.0,1.0);
float3 texture2d(sampler2D tex, float2 coord, float4 yx) {
float3 s00 = tex2D(tex, coord + yx.zw).xyz;
float3 s20 = tex2D(tex, coord + yx.xw).xyz;
float3 s22 = tex2D(tex, coord + yx.xy).xyz;
float3 s02 = tex2D(tex, coord + yx.zy).xyz;
float m1=dot(abs(s00-s22),dt)+0.001;
float m2=dot(abs(s02-s20),dt)+0.001;
return 0.5*(m2*(s00+s22)+m1*(s02+s20))/(m1+m2);
}
float4 PS_aa_shader_40(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD0) : SV_Target
{
// Calculating texel coordinates
float2 size = 4.0 / NormalizedNativePixelSize;
float2 inv_size = 1.0 / size;
float4 yx = float4(inv_size, -inv_size);
float2 OGL2Pos = vTexCoord * size;
float2 fp = frac(OGL2Pos);
float2 dx = float2(inv_size.x,0.0);
float2 dy = float2(0.0, inv_size.y);
float2 g1 = float2(inv_size.x,inv_size.y);
float2 g2 = float2(-inv_size.x,inv_size.y);
float2 pC4 = floor(OGL2Pos) * 1.0001 * inv_size;
// Reading the texels
float3 C1 = texture2d(sBackBuffer, pC4 - dy, yx);
float3 C0 = texture2d(sBackBuffer, pC4 - g1, yx);
float3 C2 = texture2d(sBackBuffer, pC4 - g2, yx);
float3 C3 = texture2d(sBackBuffer, pC4 - dx, yx);
float3 C4 = texture2d(sBackBuffer, pC4 , yx);
float3 C5 = texture2d(sBackBuffer, pC4 + dx, yx);
float3 C6 = texture2d(sBackBuffer, pC4 + g2, yx);
float3 C7 = texture2d(sBackBuffer, pC4 + dy, yx);
float3 C8 = texture2d(sBackBuffer, pC4 + g1, yx);
float3 ul, ur, dl, dr;
float m1, m2;
m1 = dot(abs(C0-C4),dt)+0.001;
m2 = dot(abs(C1-C3),dt)+0.001;
ul = (m2*(C0+C4)+m1*(C1+C3))/(m1+m2);
m1 = dot(abs(C1-C5),dt)+0.001;
m2 = dot(abs(C2-C4),dt)+0.001;
ur = (m2*(C1+C5)+m1*(C2+C4))/(m1+m2);
m1 = dot(abs(C3-C7),dt)+0.001;
m2 = dot(abs(C6-C4),dt)+0.001;
dl = (m2*(C3+C7)+m1*(C6+C4))/(m1+m2);
m1 = dot(abs(C4-C8),dt)+0.001;
m2 = dot(abs(C5-C7),dt)+0.001;
dr = (m2*(C4+C8)+m1*(C5+C7))/(m1+m2);
float3 c11 = 0.5*((dr*fp.x+dl*(1-fp.x))*fp.y+(ur*fp.x+ul*(1-fp.x))*(1-fp.y) );
return float4(c11, 1.0);
}
technique aa_shader_40
{
pass
{
VertexShader = PostProcessVS;
PixelShader = PS_aa_shader_40;
}
}

View file

@ -1,163 +0,0 @@
#include "ReShade.fxh"
/*
G-sharp resampler 2.0 - dynamic range (upscaler, downsampler)
Copyright (C) 2024 guest(r)
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
uniform float GSHARP0 <
ui_type = "drag";
ui_min = 0.75;
ui_max = 8.0;
ui_step = 0.05;
ui_label = "Filter Range";
> = 2.45;
uniform float GBOOST <
ui_type = "drag";
ui_min = 1.0;
ui_max = 2.5;
ui_step = 0.05;
ui_label = "Filter Boost (same range, speedup)";
> = 1.75;
uniform float GMAXSHARP <
ui_type = "drag";
ui_min = 0.0;
ui_max = 0.25;
ui_step = 0.01;
ui_label = "Filter Sharpness";
> = 0.1;
uniform float GPAR <
ui_type = "drag";
ui_min = 0.0;
ui_max = 1.0;
ui_step = 0.10;
ui_label = "Anti-Ringing";
> = 0.50;
uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >;
uniform float2 NormalizedInternalPixelSize < source = "normalized_internal_pixel_size"; >;
uniform float2 BufferToViewportRatio < source = "buffer_to_viewport_ratio"; >;
uniform float2 ViewportSize < source = "viewportsize"; >;
sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=POINT;MinFilter=POINT;};
texture2D tGSHARP2_H{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA8;};
sampler2D sGSHARP2_H{Texture=tGSHARP2_H;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=POINT;MinFilter=POINT;};
#define GMAXSHARP (0.25*GBOOST*GBOOST*GMAXSHARP)
float smothstep(float x)
{
return exp(-2.33*x*x);
}
float getw(float x)
{
float z = x/GBOOST;
float y = smothstep(z);
return max(y*y - GMAXSHARP, lerp(-GMAXSHARP, 0.0, x-1.0));
}
float3 gsharp2(float2 tex, float2 dx, float f, sampler2D Source)
{
float3 color = 0.0.xxx;
float w, fp;
float wsum = 0.0;
float3 pixel;
float3 cmax = 0.0.xxx;
float3 cmin = 1.0.xxx;
float FPR = GSHARP0;
float FPR2 = 2.0*FPR;
float FPR3 = FPR2*FPR2;
float LOOPSIZE = ceil(FPR2);
float x = -LOOPSIZE+1.0;
do
{
fp = min(abs(x+f),FPR2);
pixel = tex2D(Source, tex + x*dx).rgb;
fp = fp/FPR;
w = getw(fp);
if (w > 0.0) { cmin = min(cmin, pixel); cmax = max(cmax, pixel); }
color = color + w * pixel;
wsum = wsum + w;
x = x + 1.0;
} while (x <= LOOPSIZE);
color = color / wsum;
return lerp(clamp(color, 0.0, 1.0), clamp(color, cmin, cmax), GPAR);
}
float4 PS_GSHARP2_H(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD0) : SV_Target
{
float4 SourceSize = float4(1.0 / NormalizedInternalPixelSize, NormalizedInternalPixelSize);
float2 pos = vTexCoord * SourceSize.xy-0.5;
float f = -frac(pos.x);
float2 tex = (floor(pos) + 0.5)*SourceSize.zw;
float3 color;
float2 dx = float2(SourceSize.z, 0.0);
color = gsharp2(tex, dx, f, sBackBuffer);
return float4(color, 1.0);
}
float4 PS_GSHARP2_V(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD0) : SV_Target
{
float4 SourceSize = float4((ViewportSize.x*BufferToViewportRatio.x), 1.0/NormalizedInternalPixelSize.y, 1.0/(ViewportSize.x*BufferToViewportRatio.x), NormalizedInternalPixelSize.y);
float2 pos = vTexCoord * SourceSize.xy-0.5;
float f = -frac(pos.y);
float2 tex = (floor(pos) + 0.5)*SourceSize.zw;
float3 color;
float2 dy = float2(0.0, SourceSize.w);
color = gsharp2(tex, dy, f, sGSHARP2_H);
return float4(color, 1.0);
}
technique GSHARP2
{
pass
{
VertexShader = PostProcessVS;
PixelShader = PS_GSHARP2_H;
RenderTarget = tGSHARP2_H;
}
pass
{
VertexShader = PostProcessVS;
PixelShader = PS_GSHARP2_V;
}
}

View file

@ -1,145 +0,0 @@
#include "ReShade.fxh"
/*
G-sharp resampler 2.0 - dynamic range (upscaler, downsampler)
Copyright (C) 2024 guest(r)
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
uniform float GSHARP0 <
ui_type = "drag";
ui_min = 0.75;
ui_max = 8.0;
ui_step = 0.05;
ui_label = "Filter Range";
> = 2.45;
uniform float GBOOST <
ui_type = "drag";
ui_min = 1.0;
ui_max = 2.5;
ui_step = 0.05;
ui_label = "Filter Boost (same range, speedup)";
> = 1.75;
uniform float GMAXSHARP <
ui_type = "drag";
ui_min = 0.0;
ui_max = 0.25;
ui_step = 0.01;
ui_label = "Filter Sharpness";
> = 0.1;
uniform float GPAR <
ui_type = "drag";
ui_min = 0.0;
ui_max = 1.0;
ui_step = 0.10;
ui_label = "Anti-Ringing";
> = 0.50;
uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >;
uniform float2 NormalizedInternalPixelSize < source = "normalized_internal_pixel_size"; >;
sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=POINT;MinFilter=POINT;};
#define GMAXSHARP (0.25*GBOOST*GBOOST*GMAXSHARP)
float smothstep(float x)
{
return exp(-2.33*x*x);
}
float getw(float x)
{
float z = x/GBOOST;
float y = smothstep(z);
return max(y*y - GMAXSHARP, lerp(-GMAXSHARP, 0.0, x-1.0));
}
float4 PS_GSHARP2(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD0) : SV_Target
{
float2 texCoord = vTexCoord;
float4 SourceSize = float4(1.0 / NormalizedInternalPixelSize, NormalizedInternalPixelSize);
float2 pos = vTexCoord * SourceSize.xy-0.5;
float2 f = -frac(pos);
float2 tex = floor(pos)*SourceSize.zw + 0.5*SourceSize.zw;
float3 color = 0.0.xxx;
float2 dx = float2(SourceSize.z, 0.0);
float2 dy = float2(0.0, SourceSize.w);
float w, fp;
float wsum = 0.0;
float3 pixel;
float3 cmax = 0.0.xxx;
float3 cmin = 1.0.xxx;
float FPR = GSHARP0;
float FPR2 = 2.0*FPR;
float FPR3 = FPR2*FPR2;
float LOOPSIZE = ceil(FPR2);
float y = -LOOPSIZE+1.0;
float x = 0.0;
do
{
x = -LOOPSIZE + 1.0;
do
{
fp = dot(float2(x+f.x,y+f.y),float2(x+f.x,y+f.y));
if (fp >= FPR3) w = 0.0;
else
{
pixel = tex2D(sBackBuffer, tex + x*dx + y*dy).rgb;
fp = sqrt(fp)/FPR;
w = getw(fp);
if (w >= 0.0) { cmin = min(cmin, pixel); cmax = max(cmax, pixel); }
color = color + w * pixel;
wsum = wsum + w;
}
x = x + 1.0;
} while (x <= LOOPSIZE);
y = y + 1.0;
} while (y <= LOOPSIZE);
color = color / wsum;
color = lerp(clamp(color, 0.0, 1.0), clamp(color, cmin, cmax), GPAR);
return float4(color, 1.0);
}
technique GSHARP2
{
pass
{
VertexShader = PostProcessVS;
PixelShader = PS_GSHARP2;
}
}

View file

@ -1,244 +0,0 @@
#include "ReShade.fxh"
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale-reshade: A port of TroggleMonkey's crt-royale from libretro to ReShade.
// Copyright (C) 2020 Alex Gunter <akg7634@gmail.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
// Enable or disable the shader
#ifndef CONTENT_BOX_VISIBLE
#define CONTENT_BOX_VISIBLE 0
#endif
#include "crt-royale/shaders/content-box.fxh"
#if !CONTENT_BOX_VISIBLE
#include "crt-royale/shaders/input-blurring.fxh"
#include "crt-royale/shaders/electron-beams.fxh"
#include "crt-royale/shaders/blurring.fxh"
#include "crt-royale/shaders/deinterlace.fxh"
#include "crt-royale/shaders/phosphor-mask.fxh"
#include "crt-royale/shaders/brightpass.fxh"
#include "crt-royale/shaders/bloom.fxh"
#include "crt-royale/shaders/geometry-aa-last-pass.fxh"
#endif
technique CRT_Royale
{
// Toggle the content box to help users configure it
#if CONTENT_BOX_VISIBLE
pass contentBoxPass
{
// content-box.fxh
// Draw a box that displays the crop we'll perform.
VertexShader = PostProcessVS;
PixelShader = contentBoxPixelShader;
}
#else
#if ENABLE_PREBLUR
pass PreblurVert
{
// input-blurring.fxh
// Optionally blur the input buffer a little
VertexShader = contentCropVS;
PixelShader = preblurVertPS;
RenderTarget = texPreblurVert;
PrimitiveTopology = TRIANGLESTRIP;
VertexCount = 4;
}
pass PreblurHoriz
{
// input-blurring.fxh
VertexShader = PostProcessVS;
PixelShader = preblurHorizPS;
RenderTarget = texPreblurHoriz;
}
#endif
pass beamDistPass
{
// electron-beams.fxh
// Simulate emission of the interlaced video as electron beams.
VertexShader = calculateBeamDistsVS;
PixelShader = calculateBeamDistsPS;
RenderTarget = texBeamDist;
// This lets us improve performance by only computing the mask every k frames
ClearRenderTargets = false;
}
pass electronBeamPass
{
// electron-beams.fxh
// Simulate emission of the interlaced video as electron beams.
VertexShader = simulateEletronBeamsVS;
PixelShader = simulateEletronBeamsPS;
RenderTarget = texElectronBeams;
// If the preblur passes are disabled, we have to crop in this pass
#if !ENABLE_PREBLUR
PrimitiveTopology = TRIANGLESTRIP;
VertexCount = 4;
#endif
}
pass beamConvergencePass
{
// electron-beams.fxh
// Simulate beam convergence miscalibration
// Not to be confused with beam purity
VertexShader = beamConvergenceVS;
PixelShader = beamConvergencePS;
RenderTarget = texBeamConvergence;
}
pass bloomApproxPassVert
{
// bloom.fxh
VertexShader = PostProcessVS;
PixelShader = approximateBloomVertPS;
RenderTarget = texBloomApproxVert;
}
pass bloomApproxPassHoriz
{
// bloom.fxh
VertexShader = PostProcessVS;
PixelShader = approximateBloomHorizPS;
RenderTarget = texBloomApproxHoriz;
}
pass blurVerticalPass
{
// blurring.fxh
// Vertically blur the approx bloom
VertexShader = blurVerticalVS;
PixelShader = blurVerticalPS;
RenderTarget = texBlurVertical;
}
pass blurHorizontalPass
{
// blurring.fxh
// Horizontally blur the approx bloom
VertexShader = blurHorizontalVS;
PixelShader = blurHorizontalPS;
RenderTarget = texBlurHorizontal;
}
pass deinterlacePass
{
// deinterlace.fxh
// Optionally deinterlace the video if interlacing is enabled.
// Can help approximate the original crt-royale's appearance
// without some issues like image retention.
VertexShader = deinterlaceVS;
PixelShader = deinterlacePS;
RenderTarget = texDeinterlace;
}
pass freezeFramePass
{
// deinterlace.fxh
// Capture the current frame, so we can use it in the next
// frame's deinterlacing pass.
VertexShader = freezeFrameVS;
PixelShader = freezeFramePS;
RenderTarget = texFreezeFrame;
// Explicitly disable clearing render targets
// scanlineBlendPass will not work properly if this ever defaults to true
ClearRenderTargets = false;
}
pass generatePhosphorMask
{
// phosphor-mask.fxh
VertexShader = generatePhosphorMaskVS;
PixelShader = generatePhosphorMaskPS;
RenderTarget = texPhosphorMask;
// This lets us improve performance by only computing the mask every k frames
ClearRenderTargets = false;
PrimitiveTopology = TRIANGLESTRIP;
VertexCount = 4;
}
pass applyPhosphormask
{
// phosphor-mask.fxh
// Tile the scaled phosphor mask and apply it to
// the deinterlaced image.
VertexShader = PostProcessVS;
PixelShader = applyComputedPhosphorMaskPS;
RenderTarget = texMaskedScanlines;
// RenderTarget = texGeometry;
}
pass brightpassPass
{
// brightpass.fxh
// Apply a brightpass filter for the bloom effect
VertexShader = brightpassVS;
PixelShader = brightpassPS;
RenderTarget = texBrightpass;
}
pass bloomVerticalPass
{
// bloom.fxh
// Blur vertically for the bloom effect
VertexShader = bloomVerticalVS;
PixelShader = bloomVerticalPS;
RenderTarget = texBloomVertical;
}
pass bloomHorizontalPass
{
// bloom.fxh
// Blur horizontally for the bloom effect.
// Also apply various color changes and effects.
VertexShader = bloomHorizontalVS;
PixelShader = bloomHorizontalPS;
RenderTarget = texBloomHorizontal;
}
pass geometryPass
{
// geometry-aa-last-pass.fxh
// Apply screen geometry and anti-aliasing.
VertexShader = geometryVS;
PixelShader = geometryPS;
RenderTarget = texGeometry;
}
pass uncropPass
{
// content-box.fxh
// Uncrop the video, so we draw the game's content
// in the same position it started in.
VertexShader = contentUncropVS;
PixelShader = uncropContentPixelShader;
PrimitiveTopology = TRIANGLESTRIP;
VertexCount = 4;
}
#endif
}

View file

@ -1,908 +0,0 @@
#ifndef _BIND_SHADER_PARAMS_H
#define _BIND_SHADER_PARAMS_H
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// crt-royale-reshade: A port of TroggleMonkey's crt-royale from libretro to ReShade.
// Copyright (C) 2020 Alex Gunter <akg7634@gmail.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
///////////////////////////// SETTINGS MANAGEMENT ////////////////////////////
/////////////////////////////// BEGIN INCLUDES ///////////////////////////////
#include "helper-functions-and-macros.fxh"
#include "user-settings.fxh"
#include "derived-settings-and-constants.fxh"
#include "../version-number.fxh"
//////////////////////////////// END INCLUDES ////////////////////////////////
// Override some parameters for gamma-management.h and tex2Dantialias.h:
#ifndef _OVERRIDE_DEVICE_GAMMA
#define _OVERRIDE_DEVICE_GAMMA 1
#endif
#if __RENDERER__ != 0x9000
#define _DX9_ACTIVE 0
#else
#define _DX9_ACTIVE 1
#endif
// #ifndef ANTIALIAS_OVERRIDE_BASICS
// #define ANTIALIAS_OVERRIDE_BASICS 1
// #endif
// #ifndef ANTIALIAS_OVERRIDE_PARAMETERS
// #define ANTIALIAS_OVERRIDE_PARAMETERS 1
// #endif
#ifndef ADVANCED_SETTINGS
#define ADVANCED_SETTINGS 0
#endif
// The width of the game's content
#ifndef CONTENT_WIDTH
#define CONTENT_WIDTH BUFFER_WIDTH
#endif
// The height of the game's content
#ifndef CONTENT_HEIGHT
#define CONTENT_HEIGHT BUFFER_HEIGHT
#endif
#if ADVANCED_SETTINGS == 1
// Using vertex uncropping is marginally faster, but vulnerable to DX9 weirdness.
// Most users will likely prefer the slower algorithm.
#ifndef USE_VERTEX_UNCROPPING
#define USE_VERTEX_UNCROPPING 0
#endif
#ifndef NUM_BEAMDIST_COLOR_SAMPLES
#define NUM_BEAMDIST_COLOR_SAMPLES 1024
#endif
#ifndef NUM_BEAMDIST_DIST_SAMPLES
#define NUM_BEAMDIST_DIST_SAMPLES 120
#endif
#ifndef BLOOMAPPROX_DOWNSIZING_FACTOR
#define BLOOMAPPROX_DOWNSIZING_FACTOR 4.0
#endif
// Define this internal value, so ADVANCED_SETTINGS == 0 doesn't cause a redefinition error when
// NUM_BEAMDIST_COLOR_SAMPLES defined in the preset file. Also makes it easy to avoid bugs
// related to parentheses and order-of-operations when the user defines this arithmetically.
static const uint num_beamdist_color_samples = uint(NUM_BEAMDIST_COLOR_SAMPLES);
static const uint num_beamdist_dist_samples = uint(NUM_BEAMDIST_DIST_SAMPLES);
static const float bloomapprox_downsizing_factor = float(BLOOMAPPROX_DOWNSIZING_FACTOR);
#else
static const uint USE_VERTEX_CROPPING = 0;
static const uint num_beamdist_color_samples = 1024;
static const uint num_beamdist_dist_samples = 120;
static const float bloomapprox_downsizing_factor = 4.0;
#endif
#ifndef HIDE_HELP_SECTIONS
#define HIDE_HELP_SECTIONS 0
#endif
// Offset the center of the game's content (horizontal)
#ifndef CONTENT_CENTER_X
#define CONTENT_CENTER_X 0
#endif
// Offset the center of the game's content (vertical)
#ifndef CONTENT_CENTER_Y
#define CONTENT_CENTER_Y 0
#endif
// Wrap the content size in parenthesis for internal use, so the user doesn't have to
static const float2 content_size = float2(int(CONTENT_WIDTH), int(CONTENT_HEIGHT));
#ifndef ENABLE_PREBLUR
#define ENABLE_PREBLUR 1
#endif
static const float2 buffer_size = float2(BUFFER_WIDTH, BUFFER_HEIGHT);
// The normalized center is 0.5 plus the normalized offset
static const float2 content_center = float2(CONTENT_CENTER_X, CONTENT_CENTER_Y) / buffer_size + 0.5;
// The content's normalized diameter d is its size divided by the buffer's size. The radius is d/2.
static const float2 content_radius = content_size / (2.0 * buffer_size);
static const float2 content_scale = content_size / buffer_size;
static const float content_left = content_center.x - content_radius.x;
static const float content_right = content_center.x + content_radius.x;
static const float content_upper = content_center.y - content_radius.y;
static const float content_lower = content_center.y + content_radius.y;
// The xy-offset of the top-left pixel in the content box
static const float2 content_offset = float2(content_left, content_upper);
static const float2 content_offset_from_right = float2(content_right, content_lower);
uniform uint frame_count < source = "framecount"; >;
uniform int overlay_active < source = "overlay_active"; >;
static const float gba_gamma = 3.5; // Irrelevant but necessary to define.
// === HELP AND INFO ===
uniform int APPEND_VERSION_SUFFIX(version) <
ui_text = "Version: " DOT_VERSION_STR;
ui_label = " ";
ui_type = "radio";
>;
uniform int basic_setup_help <
ui_text = "1. Configure the Content Box if your game has letter-boxing.\n"
"2. Configure the Phosphor Mask.\n"
"3. Configure the Scanlines.\n"
"4. Configure the Colors and Effects.\n"
"5. Configure the Screen Geometry.\n"
"6. Configure or disable Preblur\n\n"
"- In Preprocessor Definitions, set ADVANCED_SETTINGS to 1 to access more settings.\n";
ui_category = "Basic Setup Instructions";
ui_category_closed = true;
ui_label = " ";
ui_type = "radio";
hidden = HIDE_HELP_SECTIONS;
>;
uniform int content_box_help <
ui_text = "1. Expand the Preprocessor Definitions section.\n"
"2. Set CONTENT_BOX_VISIBLE to 1.\n"
"3. Use the \"CONTENT_\" parameters to configure the Content Box.\n"
"4. Align the content box with the border of your game.\n"
"5. Set CONTENT_BOX_VISIBLE to 0 when you're done.\n\n"
"Parameters to focus on:\n"
"- CONTENT_HEIGHT and CONTENT_WIDTH\n"
"- CONTENT_CENTER_X and CONTENT_CENTER_Y\n"
"- CONTENT_BOX_INSCRIBED\n\n"
"Fancy Trick 1:\n"
"\tCONTENT_HEIGHT = BUFFER_HEIGHT\n"
"\tCONTENT_WIDTH = CONTENT_HEIGHT * 4.0 / 3.0\n"
"- Good if your game fills the screen vertically and has a 4:3 aspect ratio.\n"
"- Will also rescale automatically if you resize the window.\n\n"
"Fancy Trick 2:\n"
"\tCONTENT_HEIGHT = CONTENT_WIDTH * 9.0 / 16.0\n"
"\tCONTENT_WIDTH = 1500\n"
"- Good if your game is 1500 pixels wide with a 16:9 aspect ratio.\n"
"- Won't rescale automatically, but you'd only have to change the width.\n";
ui_category = "Content Box Instructions";
ui_category_closed = true;
ui_label = " ";
ui_type = "radio";
hidden = HIDE_HELP_SECTIONS;
>;
// ==== PHOSPHOR MASK ====
uniform int mask_type <
#if !HIDE_HELP_SECTIONS
ui_text = "Choose which kind of CRT you want.\n\n";
#endif
ui_label = "Mask Type";
ui_tooltip = "Selects the phosphor shape";
ui_type = "combo";
ui_items = "Grille\0"
"Slot\0"
"Shadow\0"
"LowRes Grille\0"
"LowRes Slot\0"
"LowRes Shadow\0";
ui_category = "Phosphor Mask";
ui_category_closed = true;
> = mask_type_static;
uniform uint mask_size_param <
ui_label = "Mask Size Param";
ui_tooltip = "Switch between using Mask Triad Size or Mask Num Triads";
ui_type = "combo";
ui_items = "Triad Width\0"
"Num Triads Across\0";
hidden = !ADVANCED_SETTINGS;
ui_spacing = 2;
ui_category = "Phosphor Mask";
> = mask_size_param_static;
uniform float mask_triad_width <
ui_label = "Mask Triad Width";
ui_tooltip = "The width of a triad in pixels";
ui_type = "slider";
ui_min = 1.0;
ui_max = 60.0;
ui_step = 0.1;
ui_category = "Phosphor Mask";
> = mask_triad_width_static;
uniform float mask_num_triads_across <
ui_label = "Mask Num Triads Across";
ui_tooltip = "The number of triads in the viewport (horizontally)";
ui_type = "drag";
ui_min = 1.0;
ui_max = 1280.0;
ui_step = 1.0;
hidden = !ADVANCED_SETTINGS;
ui_category = "Phosphor Mask";
> = mask_num_triads_across_static;
uniform float scale_triad_height<
ui_label = "Scale Triad Height";
ui_tooltip = "Scales the height of a triad";
ui_type = "drag";
ui_min = 0.01;
ui_max = 10.0;
ui_step = 0.001;
ui_spacing = 2;
ui_category = "Phosphor Mask";
> = 1.0;
uniform float2 phosphor_thickness <
ui_label = "Phosphor Thickness XY";
ui_tooltip = "Makes the phosphors appear thicker in each direction";
ui_type = "drag";
ui_min = 0.01;
ui_max = 0.99;
ui_step = 0.01;
// hidden = !ADVANCED_SETTINGS;
ui_category = "Phosphor Mask";
> = 0.2;
uniform float2 phosphor_sharpness <
ui_label = "Phosphor Sharpness XY";
ui_tooltip = "Makes the phosphors appear more crisp in each direction";
ui_type = "drag";
ui_min = 1;
ui_max = 100;
ui_step = 1;
// hidden = !ADVANCED_SETTINGS;
ui_category = "Phosphor Mask";
> = 50;
uniform float3 phosphor_offset_x <
ui_label = "Phosphor Offset RGB X";
ui_tooltip = "Very slightly shifts the phosphor mask. Can help with subpixel alignment.";
ui_type = "drag";
ui_min = -1;
ui_max = 1;
ui_step = 0.01;
// hidden = !ADVANCED_SETTINGS;
ui_spacing = 2;
ui_category = "Phosphor Mask";
> = 0;
uniform float3 phosphor_offset_y <
ui_label = "Phosphor Offset RGB Y";
ui_tooltip = "Very slightly shifts the phosphor mask. Can help with subpixel alignment.";
ui_type = "drag";
ui_min = -1;
ui_max = 1;
ui_step = 0.01;
// hidden = !ADVANCED_SETTINGS;
ui_category = "Phosphor Mask";
> = 0;
// static const uint pixel_grid_mode = 0;
// static const float2 pixel_size = 1;
/*
// ==== PIXELATION ===
uniform uint pixel_grid_mode <
#if !HIDE_HELP_SECTIONS
ui_text = "- Fix issues displaying pixel art.\n"
"- Force high-res games to look low-res.\n\n";
#endif
ui_label = "Pixel Grid Param";
ui_tooltip = "Switch between using Pixel Size or Num Pixels";
ui_type = "combo";
ui_items = "Pixel Size\0"
"Content Resolution\0";
hidden = !ADVANCED_SETTINGS;
ui_category = "Pixelation";
ui_category_closed = true;
> = 0;
uniform float2 pixel_size <
#if !HIDE_HELP_SECTIONS && !ADVANCED_SETTINGS
ui_text = "- Fix issues displaying pixel art.\n"
"- Force high-res games to look low-res.\n\n";
#endif
ui_label = "Pixel Size";
ui_tooltip = "The size of an in-game pixel on screen, in real-world pixels";
ui_type = "slider";
ui_min = 1.0;
ui_max = 30.0;
ui_step = 1.0;
ui_category = "Pixelation";
ui_category_closed = true;
> = float2(1, 1);
uniform float2 pixel_grid_resolution <
ui_label = "Num Pixels";
ui_tooltip = "The number of in-game pixels displayed on-screen in each direction";
ui_type = "drag";
ui_min = 1.0;
ui_max = 10000.0;
ui_step = 1.0;
hidden = !ADVANCED_SETTINGS;
ui_category = "Pixelation";
> = content_size;
uniform float2 pixel_grid_offset <
ui_label = "Pixel Grid Offset";
ui_tooltip = "Shifts the pixel-grid to help with alignment";
ui_type = "slider";
ui_min = -15.0;
ui_max = 15.0;
ui_step = 1.0;
#if ADVANCED_SETTINGS
ui_spacing = 2;
#endif
ui_category = "Pixelation";
> = float2(0, 0);
*/
// ==== SCANLINES ====
uniform uint scanline_thickness <
#if !HIDE_HELP_SECTIONS
ui_text = "Configure the electron beams and interlacing.\n\n";
#endif
ui_label = "Scanline Thickness";
ui_tooltip = "Sets the height of each scanline";
ui_type = "slider";
ui_min = 1;
ui_max = 30;
ui_step = 1;
ui_category = "Scanlines";
ui_category_closed = true;
> = 2;
uniform float scanline_offset <
ui_label = "Scanline Offset";
ui_tooltip = "Vertically shifts the scanlines to help with alignment";
ui_type = "slider";
ui_min = -30;
ui_max = 30;
ui_step = 1;
hidden = !ADVANCED_SETTINGS;
ui_category = "Scanlines";
> = 0;
uniform uint beam_shape_mode <
ui_label = "Beam Shape Mode";
ui_tooltip = "Select the kind of beam to use.";
ui_type = "combo";
ui_items = "Digital (Fast)\0"
"Linear (Simple)\0"
"Gaussian (Realistic)\0"
"Multi-Source Gaussian (Expensive)\0";
ui_category = "Scanlines";
> = 1;
uniform bool enable_interlacing <
ui_label = "Enable Interlacing";
ui_spacing = 5;
ui_category = "Scanlines";
> = false;
uniform bool interlace_back_field_first <
ui_label = "Draw Back-Field First";
ui_tooltip = "Draw odd-numbered scanlines first (often has no effect)";
ui_category = "Scanlines";
> = interlace_back_field_first_static;
uniform uint scanline_deinterlacing_mode <
ui_label = "Deinterlacing Mode";
ui_tooltip = "Selects the deinterlacing algorithm, if any.";
ui_type = "combo";
ui_items = "None\0"
"Fake-Progressive\0"
"Weaving\0"
"Blended Weaving\0";
ui_category = "Scanlines";
> = 1;
uniform float deinterlacing_blend_gamma <
ui_label = "Deinterlacing Blend Gamma";
ui_tooltip = "Nudge this if deinterlacing changes your colors too much";
ui_type = "slider";
ui_min = 0.01;
ui_max = 5.0;
ui_step = 0.01;
ui_category = "Scanlines";
> = 1.0;
uniform float linear_beam_thickness <
ui_label = "Linear Beam Thickness";
ui_tooltip = "Linearly widens or narrows the beam";
ui_type = "slider";
ui_min = 0.01;
ui_max = 3.0;
ui_step = 0.01;
ui_spacing = 5;
ui_category = "Scanlines";
> = 1.0;
uniform float gaussian_beam_min_sigma <
ui_label = "Gaussian Beam Min Sigma";
ui_tooltip = "For Gaussian Beam Shape, sets thickness of dim pixels";
ui_type = "drag";
ui_min = 0.0;
ui_step = 0.01;
ui_spacing = 5;
ui_category = "Scanlines";
> = gaussian_beam_min_sigma_static;
uniform float gaussian_beam_max_sigma <
ui_label = "Gaussian Beam Max Sigma";
ui_tooltip = "For Gaussian Beam Shape, sets thickness of bright pixels";
ui_type = "drag";
ui_min = 0.0;
ui_step = 0.01;
ui_category = "Scanlines";
> = gaussian_beam_max_sigma_static;
uniform float gaussian_beam_spot_power <
ui_label = "Gaussian Beam Spot Power";
ui_tooltip = "For Gaussian Beam Shape, balances between Min and Max Sigma";
ui_type = "drag";
ui_min = 0.0;
ui_step = 0.01;
ui_category = "Scanlines";
> = gaussian_beam_spot_power_static;
uniform float gaussian_beam_min_shape <
ui_label = "Gaussian Beam Min Shape";
ui_tooltip = "For Gaussian Beam Shape, sets sharpness of dim pixels";
ui_type = "drag";
ui_min = 0.0;
ui_step = 0.01;
hidden = !ADVANCED_SETTINGS;
ui_spacing = 2;
ui_category = "Scanlines";
> = gaussian_beam_min_shape_static;
uniform float gaussian_beam_max_shape <
ui_label = "Gaussian Beam Max Shape";
ui_tooltip = "For Gaussian Beam Shape, sets sharpness of bright pixels";
ui_type = "drag";
ui_min = 0.0;
ui_step = 0.01;
hidden = !ADVANCED_SETTINGS;
ui_category = "Scanlines";
> = gaussian_beam_max_shape_static;
uniform float gaussian_beam_shape_power <
ui_label = "Gaussian Beam Shape Power";
ui_tooltip = "For Gaussian Beam Shape, balances between Min and Max Shape";
ui_type = "drag";
ui_min = 0.0;
ui_step = 0.01;
hidden = !ADVANCED_SETTINGS;
ui_category = "Scanlines";
> = gaussian_beam_shape_power_static;
uniform float3 convergence_offset_x <
ui_label = "Convergence Offset X RGB";
ui_tooltip = "Shift the color channels horizontally";
ui_type = "drag";
ui_min = -10;
ui_max = 10;
ui_step = 0.05;
hidden = !ADVANCED_SETTINGS;
ui_spacing = 5;
ui_category = "Scanlines";
> = 0;
uniform float3 convergence_offset_y <
ui_label = "Convergence Offset Y RGB";
ui_tooltip = "Shift the color channels vertically";
ui_type = "drag";
ui_min = -10;
ui_max = 10;
ui_step = 0.05;
hidden = !ADVANCED_SETTINGS;
ui_category = "Scanlines";
> = 0;
static uint beam_horiz_filter = beam_horiz_filter_static;
static float beam_horiz_sigma = beam_horiz_sigma_static;
static float beam_horiz_linear_rgb_weight = beam_horiz_linear_rgb_weight_static;
// ==== IMAGE COLORIZATION ====
uniform float crt_gamma <
#if !HIDE_HELP_SECTIONS
ui_text = "Apply gamma, contrast, and blurring.\n\n";
#endif
ui_label = "CRT Gamma";
ui_tooltip = "The gamma-level of the original content";
ui_type = "slider";
ui_min = 1.0;
ui_max = 5.0;
ui_step = 0.01;
ui_category = "Colors and Effects";
ui_category_closed = true;
> = crt_gamma_static;
uniform float lcd_gamma <
ui_label = "LCD Gamma";
ui_tooltip = "The gamma-level of your display";
ui_type = "slider";
ui_min = 1.0;
ui_max = 5.0;
ui_step = 0.01;
ui_category = "Colors and Effects";
> = lcd_gamma_static;
uniform float levels_contrast <
ui_label = "Levels Contrast";
ui_tooltip = "Sets the contrast of the CRT";
ui_type = "slider";
ui_min = 0.0;
ui_max = 4.0;
ui_step = 0.01;
ui_spacing = 5;
ui_category = "Colors and Effects";
> = levels_contrast_static;
uniform float halation_weight <
ui_label = "Halation";
ui_tooltip = "Desaturation due to eletrons exciting the wrong phosphors";
ui_type = "slider";
ui_min = 0.0;
ui_max = 1.0;
ui_step = 0.01;
ui_spacing = 2;
ui_category = "Colors and Effects";
> = halation_weight_static;
uniform float diffusion_weight <
ui_label = "Diffusion";
ui_tooltip = "Blurring due to refraction from the screen's glass";
ui_type = "slider";
ui_min = 0.0;
ui_max = 1.0;
ui_step = 0.01;
ui_category = "Colors and Effects";
> = diffusion_weight_static;
uniform float blur_radius <
ui_label = "Blur Radius";
ui_tooltip = "Scales the radius of the halation and diffusion effects";
ui_type = "slider";
ui_min = 0.01;
ui_max = 5.0;
ui_step = 0.01;
hidden = !ADVANCED_SETTINGS;
ui_category = "Colors and Effects";
> = 1.0;
uniform float bloom_underestimate_levels <
ui_label = "Bloom Underestimation";
ui_tooltip = "Scale the bloom effect's intensity";
ui_type = "drag";
ui_min = FIX_ZERO(0.0);
ui_step = 0.01;
ui_spacing = 2;
ui_category = "Colors and Effects";
> = bloom_underestimate_levels_static;
uniform float bloom_excess <
ui_label = "Bloom Excess";
ui_tooltip = "Extra bloom applied to all colors";
ui_type = "slider";
ui_min = 0.0;
ui_max = 1.0;
ui_step = 0.01;
ui_category = "Colors and Effects";
> = bloom_excess_static;
uniform float2 aa_subpixel_r_offset_runtime <
ui_label = "AA Subpixel R Offet XY";
ui_type = "drag";
ui_min = -0.5;
ui_max = 0.5;
ui_step = 0.01;
hidden = !ADVANCED_SETTINGS || !_RUNTIME_ANTIALIAS_SUBPIXEL_OFFSETS;
ui_category = "Colors and Effects";
> = aa_subpixel_r_offset_static;
static const float aa_cubic_c = aa_cubic_c_static;
static const float aa_gauss_sigma = aa_gauss_sigma_static;
// ==== GEOMETRY ====
uniform uint geom_rotation_mode <
#if !HIDE_HELP_SECTIONS
ui_text = "Change the geometry of the screen's glass.\n\n";
#endif
ui_label = "Rotate Screen";
ui_type = "combo";
ui_items = "0 degrees\0"
"90 degrees\0"
"180 degrees\0"
"270 degrees\0";
ui_category = "Screen Geometry";
ui_category_closed = true;
> = 0;
uniform uint geom_mode_runtime <
ui_label = "Geometry Mode";
ui_tooltip = "Select screen curvature type";
ui_type = "combo";
ui_items = "Flat\0"
"Spherical\0"
"Spherical (Alt)\0"
"Cylindrical (Trinitron)\0";
ui_category = "Screen Geometry";
> = geom_mode_static;
uniform float geom_radius <
ui_label = "Geometry Radius";
ui_tooltip = "Select screen curvature radius";
ui_type = "slider";
ui_min = 1.0 / (2.0 * pi);
ui_max = 1024;
ui_step = 0.01;
ui_category = "Screen Geometry";
> = geom_radius_static;
uniform float geom_view_dist <
ui_label = "View Distance";
ui_type = "slider";
ui_min = 0.5;
ui_max = 1024;
ui_step = 0.01;
hidden = !ADVANCED_SETTINGS;
ui_spacing = 2;
ui_category = "Screen Geometry";
> = geom_view_dist_static;
uniform float2 geom_tilt_angle <
ui_label = "Screen Tilt Angles";
ui_type = "drag";
ui_min = -pi;
ui_max = pi;
ui_step = 0.01;
hidden = !ADVANCED_SETTINGS;
ui_category = "Screen Geometry";
> = geom_tilt_angle_static;
uniform float2 geom_aspect_ratio <
ui_label = "Screen Aspect Ratios";
ui_type = "drag";
ui_min = 1.0;
ui_step = 0.01;
hidden = !ADVANCED_SETTINGS;
ui_category = "Screen Geometry";
> = float2(geom_aspect_ratio_static, 1);
uniform float2 geom_overscan <
ui_label = "Geom Overscan";
ui_type = "drag";
ui_min = FIX_ZERO(0.0);
ui_step = 0.01;
hidden = !ADVANCED_SETTINGS;
ui_spacing = 2;
ui_category = "Screen Geometry";
> = geom_overscan_static;
// ==== BORDER ====
uniform float border_size <
#if !HIDE_HELP_SECTIONS
ui_text = "Apply a thin vignette to the edge of the screen.\n\n";
#endif
ui_label = "Border Size";
ui_category_closed = true;
ui_type = "slider";
ui_min = 0.0;
ui_max = 0.5;
ui_step = 0.01;
ui_category = "Screen Border";
> = border_size_static;
uniform float border_darkness <
ui_label = "Border Darkness";
ui_type = "drag";
ui_min = 0.0;
ui_step = 0.01;
ui_category = "Screen Border";
> = border_darkness_static;
uniform float border_compress <
ui_label = "Border Compress";
ui_type = "drag";
ui_min = 0.0;
ui_step = 0.01;
ui_category = "Screen Border";
> = border_compress_static;
// ==== PREBLUR ====
#if ENABLE_PREBLUR
uniform float2 preblur_effect_radius <
#if !HIDE_HELP_SECTIONS
ui_text = "- Apply a linear blur to the input image. Kind of like an NTSC/Composite shader, but much faster.\n"
"- If you want to use an NTSC shader or don't like this effect, disable it by setting ENABLE_PREBLUR to 0\n"
"- If you leave all of these set to 0, then they don't do anything. Consider disabling the effect to improve performance.\n\n";
#endif
ui_type = "drag";
ui_min = 0;
ui_max = 100;
ui_step = 1;
ui_label = "Effect Radius XY";
ui_tooltip = "The radius of the effect visible on the screen (measured in pixels)";
ui_category = "Pre-Blur";
ui_category_closed = true;
> = 0;
uniform uint2 preblur_sampling_radius <
ui_type = "drag";
ui_min = 0;
ui_max = 100;
ui_step = 1;
ui_label = "Sampling Radius XY";
ui_tooltip = "The number of samples to take on either side of each pixel";
ui_category = "Pre-Blur";
> = 0;
#else
static const float2 preblur_effect_radius = 0;
static const uint2 preblur_sampling_radius = 0;
#endif
// Provide accessors for vector constants that pack scalar uniforms:
float2 get_aspect_vector(const float geom_aspect_ratio)
{
// Get an aspect ratio vector. Enforce geom_max_aspect_ratio, and prevent
// the absolute scale from affecting the uv-mapping for curvature:
const float geom_clamped_aspect_ratio =
min(geom_aspect_ratio, geom_max_aspect_ratio);
const float2 geom_aspect =
normalize(float2(geom_clamped_aspect_ratio, 1.0));
return geom_aspect;
}
float2 get_geom_overscan_vector()
{
return geom_overscan;
}
float2 get_geom_tilt_angle_vector()
{
return geom_tilt_angle;
}
float3 get_convergence_offsets_x_vector()
{
return convergence_offset_x;
}
float3 get_convergence_offsets_y_vector()
{
return convergence_offset_y;
}
float2 get_convergence_offsets_r_vector()
{
return float2(convergence_offset_x.r, convergence_offset_y.r);
}
float2 get_convergence_offsets_g_vector()
{
return float2(convergence_offset_x.g, convergence_offset_y.g);
}
float2 get_convergence_offsets_b_vector()
{
return float2(convergence_offset_x.b, convergence_offset_y.b);
}
float2 get_aa_subpixel_r_offset()
{
#if _RUNTIME_ANTIALIAS_WEIGHTS
#if _RUNTIME_ANTIALIAS_SUBPIXEL_OFFSETS
// WARNING: THIS IS EXTREMELY EXPENSIVE.
return aa_subpixel_r_offset_runtime;
#else
return aa_subpixel_r_offset_static;
#endif
#else
return aa_subpixel_r_offset_static;
#endif
}
// Provide accessors settings which still need "cooking:"
float get_mask_amplify()
{
static const float mask_grille_amplify = 1.0/mask_grille_avg_color;
static const float mask_slot_amplify = 1.0/mask_slot_avg_color;
static const float mask_shadow_amplify = 1.0/mask_shadow_avg_color;
float mask_amplify;
[flatten]
switch (mask_type) {
case 0:
mask_amplify = mask_grille_amplify;
break;
case 1:
mask_amplify = mask_slot_amplify;
break;
case 2:
mask_amplify = mask_shadow_amplify;
break;
case 3:
mask_amplify = mask_grille_amplify;
break;
case 4:
mask_amplify = mask_slot_amplify;
break;
default:
mask_amplify = mask_shadow_amplify;
break;
}
return mask_amplify;
}
#endif // _BIND_SHADER_PARAMS_H

View file

@ -1,320 +0,0 @@
#ifndef _BLOOM_FUNCTIONS_H
#define _BLOOM_FUNCTIONS_H
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
///////////////////////////////// DESCRIPTION ////////////////////////////////
// These utility functions and constants help several passes determine the
// size and center texel weight of the phosphor bloom in a uniform manner.
////////////////////////////////// INCLUDES //////////////////////////////////
// We need to calculate the correct blur sigma using some .cgp constants:
//#include "../user-settings.h"
#include "user-settings.fxh"
#include "derived-settings-and-constants.fxh"
#include "bind-shader-params.fxh"
#include "blur-functions.fxh"
/////////////////////////////// BLOOM CONSTANTS //////////////////////////////
// Compute constants with manual inlines of the functions below:
static const float bloom_diff_thresh = 1.0/256.0;
/////////////////////////////////// HELPERS //////////////////////////////////
float get_min_sigma_to_blur_triad(const float triad_size,
const float thresh)
{
// Requires: 1.) triad_size is the final phosphor triad size in pixels
// 2.) thresh is the max desired pixel difference in the
// blurred triad (e.g. 1.0/256.0).
// Returns: Return the minimum sigma that will fully blur a phosphor
// triad on the screen to an even color, within thresh.
// This closed-form function was found by curve-fitting data.
// Estimate: max error = ~0.086036, mean sq. error = ~0.0013387:
return -0.05168 + 0.6113*triad_size -
1.122*triad_size*sqrt(0.000416 + thresh);
// Estimate: max error = ~0.16486, mean sq. error = ~0.0041041:
//return 0.5985*triad_size - triad_size*sqrt(thresh)
}
float get_absolute_scale_blur_sigma(const float thresh)
{
// Requires: 1.) min_expected_triads must be a global float. The number
// of horizontal phosphor triads in the final image must be
// >= min_allowed_viewport_triads.x for realistic results.
// 2.) bloom_approx_scale_x must be a global float equal to the
// absolute horizontal scale of BLOOM_APPROX.
// 3.) bloom_approx_scale_x/min_allowed_viewport_triads.x
// should be <= 1.1658025090 to keep the final result <
// 0.62666015625 (the largest sigma ensuring the largest
// unused texel weight stays < 1.0/256.0 for a 3x3 blur).
// 4.) thresh is the max desired pixel difference in the
// blurred triad (e.g. 1.0/256.0).
// Returns: Return the minimum Gaussian sigma that will blur the pass
// output as much as it would have taken to blur away
// bloom_approx_scale_x horizontal phosphor triads.
// Description:
// BLOOM_APPROX should look like a downscaled phosphor blur. Ideally, we'd
// use the same blur sigma as the actual phosphor bloom and scale it down
// to the current resolution with (bloom_approx_scale_x/viewport_size_x), but
// we don't know the viewport size in this pass. Instead, we'll blur as
// much as it would take to blur away min_allowed_viewport_triads.x. This
// will blur "more than necessary" if the user actually uses more triads,
// but that's not terrible either, because blurring a constant fraction of
// the viewport may better resemble a true optical bloom anyway (since the
// viewport will generally be about the same fraction of each player's
// field of view, regardless of screen size and resolution).
// Assume an extremely large viewport size for asymptotic results.
return bloom_approx_scale_x/max_viewport_size_x *
get_min_sigma_to_blur_triad(
max_viewport_size_x/min_allowed_viewport_triads.x, thresh);
}
float get_center_weight(const float sigma)
{
// Given a Gaussian blur sigma, get the blur weight for the center texel.
#if _RUNTIME_PHOSPHOR_BLOOM_SIGMA
return get_fast_gaussian_weight_sum_inv(sigma);
#else
const float denom_inv = 0.5/(sigma*sigma);
const float w0 = 1.0;
const float w1 = exp(-1.0 * denom_inv);
const float w2 = exp(-4.0 * denom_inv);
const float w3 = exp(-9.0 * denom_inv);
const float w4 = exp(-16.0 * denom_inv);
const float w5 = exp(-25.0 * denom_inv);
const float w6 = exp(-36.0 * denom_inv);
const float w7 = exp(-49.0 * denom_inv);
const float w8 = exp(-64.0 * denom_inv);
const float w9 = exp(-81.0 * denom_inv);
const float w10 = exp(-100.0 * denom_inv);
const float w11 = exp(-121.0 * denom_inv);
const float w12 = exp(-144.0 * denom_inv);
const float w13 = exp(-169.0 * denom_inv);
const float w14 = exp(-196.0 * denom_inv);
const float w15 = exp(-225.0 * denom_inv);
const float w16 = exp(-256.0 * denom_inv);
const float w17 = exp(-289.0 * denom_inv);
const float w18 = exp(-324.0 * denom_inv);
const float w19 = exp(-361.0 * denom_inv);
const float w20 = exp(-400.0 * denom_inv);
const float w21 = exp(-441.0 * denom_inv);
// Note: If the implementation uses a smaller blur than the max allowed,
// the worst case scenario is that the center weight will be overestimated,
// so we'll put a bit more energy into the brightpass...no huge deal.
// Then again, if the implementation uses a larger blur than the max
// "allowed" because of dynamic branching, the center weight could be
// underestimated, which is more of a problem...consider always using
#if PHOSPHOR_BLOOM_TRIAD_SIZE_MODE >= _PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_12_PIXELS
// 43x blur:
const float weight_sum_inv = 1.0 /
(w0 + 2.0 * (w1 + w2 + w3 + w4 + w5 + w6 + w7 + w8 + w9 + w10 +
w11 + w12 + w13 + w14 + w15 + w16 + w17 + w18 + w19 + w20 + w21));
#else
#if PHOSPHOR_BLOOM_TRIAD_SIZE_MODE >= _PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_9_PIXELS
// 31x blur:
const float weight_sum_inv = 1.0 /
(w0 + 2.0 * (w1 + w2 + w3 + w4 + w5 + w6 + w7 +
w8 + w9 + w10 + w11 + w12 + w13 + w14 + w15));
#else
#if PHOSPHOR_BLOOM_TRIAD_SIZE_MODE >= _PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_6_PIXELS
// 25x blur:
const float weight_sum_inv = 1.0 / (w0 + 2.0 * (
w1 + w2 + w3 + w4 + w5 + w6 + w7 + w8 + w9 + w10 + w11 + w12));
#else
#if PHOSPHOR_BLOOM_TRIAD_SIZE_MODE >= _PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_3_PIXELS
// 17x blur:
const float weight_sum_inv = 1.0 / (w0 + 2.0 * (
w1 + w2 + w3 + w4 + w5 + w6 + w7 + w8));
#else
// 9x blur:
const float weight_sum_inv = 1.0 / (w0 + 2.0 * (w1 + w2 + w3 + w4));
#endif // _PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_3_PIXELS
#endif // _PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_6_PIXELS
#endif // _PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_9_PIXELS
#endif // _PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_12_PIXELS
const float center_weight = weight_sum_inv * weight_sum_inv;
return center_weight;
#endif
}
float3 tex2DblurNfast(const sampler2D tex, const float2 tex_uv,
const float2 dxdy, const float sigma,
const float input_gamma)
{
// If sigma is static, we can safely branch and use the smallest blur
// that's big enough. Ignore #define hints, because we'll only use a
// large blur if we actually need it, and the branches cost nothing.
#if !_RUNTIME_PHOSPHOR_BLOOM_SIGMA
#define PHOSPHOR_BLOOM_BRANCH_FOR_BLUR_SIZE
#else
// It's still worth branching if the profile supports dynamic branches:
// It's much faster than using a hugely excessive blur, but each branch
// eats ~1% FPS.
#if _DRIVERS_ALLOW_DYNAMIC_BRANCHES
#define PHOSPHOR_BLOOM_BRANCH_FOR_BLUR_SIZE
#endif
#endif
// Failed optimization notes:
// I originally created a same-size mipmapped 5-tap separable blur10 that
// could handle any sigma by reaching into lower mip levels. It was
// as fast as blur25fast for runtime sigmas and a tad faster than
// blur31fast for static sigmas, but mipmapping two viewport-size passes
// ate 10% of FPS across all codepaths, so it wasn't worth it.
#ifdef PHOSPHOR_BLOOM_BRANCH_FOR_BLUR_SIZE
if(sigma <= blur9_std_dev)
{
return tex2Dblur9fast(tex, tex_uv, dxdy, sigma, input_gamma);
}
else if(sigma <= blur17_std_dev)
{
return tex2Dblur17fast(tex, tex_uv, dxdy, sigma, input_gamma);
}
else if(sigma <= blur25_std_dev)
{
return tex2Dblur25fast(tex, tex_uv, dxdy, sigma, input_gamma);
}
else if(sigma <= blur31_std_dev)
{
return tex2Dblur31fast(tex, tex_uv, dxdy, sigma, input_gamma);
}
else
{
return tex2Dblur43fast(tex, tex_uv, dxdy, sigma, input_gamma);
}
#else
// If we can't afford to branch, we can only guess at what blur
// size we need. Therefore, use the largest blur allowed.
#ifdef PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_12_PIXELS
return tex2Dblur43fast(tex, tex_uv, dxdy, sigma, input_gamma);
#else
#ifdef PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_9_PIXELS
return tex2Dblur31fast(tex, tex_uv, dxdy, sigma, input_gamma);
#else
#ifdef PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_6_PIXELS
return tex2Dblur25fast(tex, tex_uv, dxdy, sigma, input_gamma);
#else
#if PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_3_PIXELS
return tex2Dblur17fast(tex, tex_uv, dxdy, sigma, input_gamma);
#else
return tex2Dblur9fast(tex, tex_uv, dxdy, sigma, input_gamma);
#endif // PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_3_PIXELS
#endif // PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_6_PIXELS
#endif // PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_9_PIXELS
#endif // PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_12_PIXELS
#endif // PHOSPHOR_BLOOM_BRANCH_FOR_BLUR_SIZE
}
float get_bloom_approx_sigma(const float output_size_x_runtime,
const float estimated_viewport_size_x)
{
// Requires: 1.) output_size_x_runtime == BLOOM_APPROX.output_size.x.
// This is included for dynamic codepaths just in case the
// following two globals are incorrect:
// 2.) bloom_approx_size_x_for_skip should == the same
// if PHOSPHOR_BLOOM_FAKE is #defined
// 3.) bloom_approx_size_x should == the same otherwise
// Returns: For gaussian4x4, return a dynamic small bloom sigma that's
// as close to optimal as possible given available information.
// For blur3x3, return the a static small bloom sigma that
// works well for typical cases. Otherwise, we're using simple
// bilinear filtering, so use static calculations.
// Assume the default static value. This is a compromise that ensures
// typical triads are blurred, even if unusually large ones aren't.
static const float mask_num_triads_static =
max(min_allowed_viewport_triads.x, mask_num_triads_across_static);
const float mask_num_triads_from_size =
estimated_viewport_size_x/mask_triad_width;
const float mask_num_triads_runtime = max(min_allowed_viewport_triads.x,
lerp(mask_num_triads_from_size, mask_num_triads_across,
mask_size_param));
// Assume an extremely large viewport size for asymptotic results:
static const float max_viewport_size_x = 1080.0*1024.0*(4.0/3.0);
if(bloom_approx_filter > 1.5) // 4x4 true Gaussian resize
{
// Use the runtime num triads and output size:
const float asymptotic_triad_size =
max_viewport_size_x/mask_num_triads_runtime;
const float asymptotic_sigma = get_min_sigma_to_blur_triad(
asymptotic_triad_size, bloom_diff_thresh);
const float bloom_approx_sigma =
asymptotic_sigma * output_size_x_runtime/max_viewport_size_x;
// The BLOOM_APPROX input has to be ORIG_LINEARIZED to avoid moire, but
// account for the Gaussian scanline sigma from the last pass too.
// The bloom will be too wide horizontally but tall enough vertically.
return length(float2(bloom_approx_sigma, gaussian_beam_max_sigma));
}
else // 3x3 blur resize (the bilinear resize doesn't need a sigma)
{
// We're either using blur3x3 or bilinear filtering. The biggest
// reason to choose blur3x3 is to avoid dynamic weights, so use a
// static calculation.
#ifdef PHOSPHOR_BLOOM_FAKE
static const float output_size_x_static =
bloom_approx_size_x_for_fake;
#else
static const float output_size_x_static = bloom_approx_size_x;
#endif
static const float asymptotic_triad_size =
max_viewport_size_x/mask_num_triads_static;
const float asymptotic_sigma = get_min_sigma_to_blur_triad(
asymptotic_triad_size, bloom_diff_thresh);
const float bloom_approx_sigma =
asymptotic_sigma * output_size_x_static/max_viewport_size_x;
// The BLOOM_APPROX input has to be ORIG_LINEARIZED to avoid moire, but
// try accounting for the Gaussian scanline sigma from the last pass
// too; use the static default value:
return length(float2(bloom_approx_sigma, gaussian_beam_max_sigma_static));
}
}
float get_final_bloom_sigma(const float bloom_sigma_runtime)
{
// Requires: 1.) bloom_sigma_runtime is a precalculated sigma that's
// optimal for the [known] triad size.
// 2.) Call this from a fragment shader (not a vertex shader),
// or blurring with static sigmas won't be constant-folded.
// Returns: Return the optimistic static sigma if the triad size is
// known at compile time. Otherwise return the optimal runtime
// sigma (10% slower) or an implementation-specific compromise
// between an optimistic or pessimistic static sigma.
// Notes: Call this from the fragment shader, NOT the vertex shader,
// so static sigmas can be constant-folded!
const float bloom_sigma_optimistic = get_min_sigma_to_blur_triad(
mask_triad_width_static, bloom_diff_thresh);
#if _RUNTIME_PHOSPHOR_BLOOM_SIGMA
return bloom_sigma_runtime;
#else
// Overblurring looks as bad as underblurring, so assume average-size
// triads, not worst-case huge triads:
return bloom_sigma_optimistic;
#endif
}
#endif // _BLOOM_FUNCTIONS_H

View file

@ -1,405 +0,0 @@
#ifndef _DERIVED_SETTINGS_AND_CONSTANTS_H
#define _DERIVED_SETTINGS_AND_CONSTANTS_H
#include "helper-functions-and-macros.fxh"
#include "user-settings.fxh"
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// crt-royale-reshade: A port of TroggleMonkey's crt-royale from libretro to ReShade.
// Copyright (C) 2020 Alex Gunter <akg7634@gmail.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
///////////////////////////////// DESCRIPTION ////////////////////////////////
// These macros and constants can be used across the whole codebase.
// Unlike the values in user-settings.cgh, end users shouldn't modify these.
/////////////////////////////// BEGIN INCLUDES ///////////////////////////////
//#include "../user-settings.h"
//#include "user-cgp-constants.h"
///////////////////////// BEGIN USER-CGP-CONSTANTS /////////////////////////
#ifndef _USER_CGP_CONSTANTS_H
#define _USER_CGP_CONSTANTS_H
// IMPORTANT:
// These constants MUST be set appropriately for the settings in crt-royale.cgp
// (or whatever related .cgp file you're using). If they aren't, you're likely
// to get artifacts, the wrong phosphor mask size, etc. I wish these could be
// set directly in the .cgp file to make things easier, but...they can't.
// PASS SCALES AND RELATED CONSTANTS:
// Copy the absolute scale_x for BLOOM_APPROX. There are two major versions of
// this shader: One does a viewport-scale bloom, and the other skips it. The
// latter benefits from a higher bloom_approx_scale_x, so save both separately:
static const float bloom_approx_scale_x = 4.0 / 3.0;
static const float max_viewport_size_x = 1080.0*1024.0*(4.0/3.0);
static const float bloom_diff_thresh_ = 1.0/256.0;
static const float bloom_approx_size_x = 320.0;
static const float bloom_approx_size_x_for_fake = 400.0;
// Copy the viewport-relative scales of the phosphor mask resize passes
// (MASK_RESIZE and the pass immediately preceding it):
static const float2 mask_resize_viewport_scale = float2(0.0625, 0.0625);
// Copy the geom_max_aspect_ratio used to calculate the MASK_RESIZE scales, etc.:
static const float geom_max_aspect_ratio = 4.0/3.0;
// PHOSPHOR MASK TEXTURE CONSTANTS:
// Set the following constants to reflect the properties of the phosphor mask
// texture named in crt-royale.cgp. The shader optionally resizes a mask tile
// based on user settings, then repeats a single tile until filling the screen.
// The shader must know the input texture size (default 64x64), and to manually
// resize, it must also know the horizontal triads per tile (default 8).
static const float2 mask_texture_small_size = float2(64.0, 64.0);
static const float2 mask_texture_large_size = float2(512.0, 512.0);
static const float mask_triads_per_tile = 8.0;
// We need the average brightness of the phosphor mask to compensate for the
// dimming it causes. The following four values are roughly correct for the
// masks included with the shader. Update the value for any LUT texture you
// change. [Un]comment "#define PHOSPHOR_MASK_GRILLE14" depending on whether
// the loaded aperture grille uses 14-pixel or 15-pixel stripes (default 15).
// #ifndef PHOSPHOR_MASK_GRILLE14
// #define PHOSPHOR_MASK_GRILLE14 0
// #endif
static const float mask_grille14_avg_color = 50.6666666/255.0;
// TileableLinearApertureGrille14Wide7d33Spacing*.png
// TileableLinearApertureGrille14Wide10And6Spacing*.png
static const float mask_grille15_avg_color = 53.0/255.0;
// TileableLinearApertureGrille15Wide6d33Spacing*.png
// TileableLinearApertureGrille15Wide8And5d5Spacing*.png
static const float mask_slot_avg_color = 46.0/255.0;
// TileableLinearSlotMask15Wide9And4d5Horizontal8VerticalSpacing*.png
// TileableLinearSlotMaskTall15Wide9And4d5Horizontal9d14VerticalSpacing*.png
static const float mask_shadow_avg_color = 41.0/255.0;
// TileableLinearShadowMask*.png
// TileableLinearShadowMaskEDP*.png
// #if PHOSPHOR_MASK_GRILLE14
// static const float mask_grille_avg_color = mask_grille14_avg_color;
// #else
static const float mask_grille_avg_color = mask_grille15_avg_color;
// #endif
#endif // _USER_CGP_CONSTANTS_H
////////////////////////// END USER-CGP-CONSTANTS //////////////////////////
//////////////////////////////// END INCLUDES ////////////////////////////////
/////////////////////////////// FIXED SETTINGS ///////////////////////////////
#define _SIMULATE_CRT_ON_LCD 1
#define _SIMULATE_GBA_ON_LCD 2
#define _SIMULATE_LCD_ON_CRT 3
#define _SIMULATE_GBA_ON_CRT 4
// Ensure the first pass decodes CRT gamma and the last encodes LCD gamma.
#define GAMMA_SIMULATION_MODE _SIMULATE_CRT_ON_LCD
// Manually tiling a manually resized texture creates texture coord derivative
// discontinuities and confuses anisotropic filtering, causing discolored tile
// seams in the phosphor mask. Workarounds:
// a.) Using tex2Dlod disables anisotropic filtering for tiled masks. It's
// downgraded to tex2Dbias without _DRIVERS_ALLOW_TEX2DLOD #defined and
// disabled without _DRIVERS_ALLOW_TEX2DBIAS #defined either.
// b.) "Tile flat twice" requires drawing two full tiles without border padding
// to the resized mask FBO, and it's incompatible with same-pass curvature.
// (Same-pass curvature isn't used but could be in the future...maybe.)
// c.) "Fix discontinuities" requires derivatives and drawing one tile with
// border padding to the resized mask FBO, but it works with same-pass
// curvature. It's disabled without _DRIVERS_ALLOW_DERIVATIVES #defined.
// Precedence: a, then, b, then c (if multiple strategies are #defined).
// #ifndef ANISOTROPIC_TILING_COMPAT_TEX2DLOD
// #define ANISOTROPIC_TILING_COMPAT_TEX2DLOD 1 // 129.7 FPS, 4x, flat; 101.8 at fullscreen
// #endif
// #ifndef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE
// #define ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE 1 // 128.1 FPS, 4x, flat; 101.5 at fullscreen
// #endif
// #ifndef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
// #define ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES 1 // 124.4 FPS, 4x, flat; 97.4 at fullscreen
// #endif
// Also, manually resampling the phosphor mask is slightly blurrier with
// anisotropic filtering. (Resampling with mipmapping is even worse: It
// creates artifacts, but only with the fully bloomed shader.) The difference
// is subtle with small triads, but you can fix it for a small cost.
// #ifndef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD
// #define ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD 0
// #endif
////////////////////////////// DERIVED SETTINGS //////////////////////////////
// Intel HD 4000 GPU's can't handle manual mask resizing (for now), setting the
// geometry mode at runtime, or a 4x4 true Gaussian resize. Disable
// incompatible settings ASAP. (_INTEGRATED_GRAPHICS_COMPATIBILITY_MODE may be
// #defined by either user-settings.h or a wrapper .cg that #includes the
// current .cg pass.)
#if _INTEGRATED_GRAPHICS_COMPATIBILITY_MODE
#if _PHOSPHOR_MASK_MANUALLY_RESIZE
#undef _PHOSPHOR_MASK_MANUALLY_RESIZE
#define _PHOSPHOR_MASK_MANUALLY_RESIZE 0
#endif
#if _RUNTIME_GEOMETRY_MODE
#undef _RUNTIME_GEOMETRY_MODE
#define _RUNTIME_GEOMETRY_MODE 0
#endif
// Mode 2 (4x4 Gaussian resize) won't work, and mode 1 (3x3 blur) is
// inferior in most cases, so replace 2.0 with 0.0:
static const float bloom_approx_filter = macro_cond(
bloom_approx_filter_static > 1.5,
0.0,
bloom_approx_filter_static
);
#else
static const float bloom_approx_filter = bloom_approx_filter_static;
#endif
// Disable slow runtime paths if static parameters are used. Most of these
// won't be a problem anyway once the params are disabled, but some will.
#if !_RUNTIME_SHADER_PARAMS_ENABLE
#if _RUNTIME_PHOSPHOR_BLOOM_SIGMA
#undef _RUNTIME_PHOSPHOR_BLOOM_SIGMA
#define _RUNTIME_PHOSPHOR_BLOOM_SIGMA 0
#endif
#if _RUNTIME_ANTIALIAS_WEIGHTS
#undef _RUNTIME_ANTIALIAS_WEIGHTS
#define _RUNTIME_ANTIALIAS_WEIGHTS 0
#endif
#if _RUNTIME_ANTIALIAS_SUBPIXEL_OFFSETS
#undef _RUNTIME_ANTIALIAS_SUBPIXEL_OFFSETS
#define _RUNTIME_ANTIALIAS_SUBPIXEL_OFFSETS 0
#endif
#if _RUNTIME_SCANLINES_HORIZ_FILTER_COLORSPACE
#undef _RUNTIME_SCANLINES_HORIZ_FILTER_COLORSPACE
#define _RUNTIME_SCANLINES_HORIZ_FILTER_COLORSPACE 0
#endif
#if _RUNTIME_GEOMETRY_TILT
#undef _RUNTIME_GEOMETRY_TILT
#define _RUNTIME_GEOMETRY_TILT 0
#endif
#if _RUNTIME_GEOMETRY_MODE
#undef _RUNTIME_GEOMETRY_MODE
#define _RUNTIME_GEOMETRY_MODE 0
#endif
// #if FORCE_RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT
// #undef FORCE_RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT
// #define FORCE_RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT 0
// #endif
#endif
// Make tex2Dbias a backup for tex2Dlod for wider compatibility.
// #if ANISOTROPIC_TILING_COMPAT_TEX2DLOD
// #define ANISOTROPIC_TILING_COMPAT_TEX2DBIAS
// #endif
// #if ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD
// #define ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS
// #endif
// Rule out unavailable anisotropic compatibility strategies:
#if !_DRIVERS_ALLOW_DERIVATIVES
// #if ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
// #undef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
// #define ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES 0
// #endif
#endif
// #if !_DRIVERS_ALLOW_TEX2DLOD
// #if ANISOTROPIC_TILING_COMPAT_TEX2DLOD
// #undef ANISOTROPIC_TILING_COMPAT_TEX2DLOD
// #define ANISOTROPIC_TILING_COMPAT_TEX2DLOD 0
// #endif
// #if ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD
// #undef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD
// #define ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD 0
// #endif
// #ifdef ANTIALIAS_DISABLE_ANISOTROPIC
// #undef ANTIALIAS_DISABLE_ANISOTROPIC
// #endif
// #endif
// #if !_DRIVERS_ALLOW_TEX2DBIAS
// #ifdef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS
// #undef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS
// #endif
// #ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS
// #undef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS
// #endif
// #endif
// Prioritize anisotropic tiling compatibility strategies by performance and
// disable unused strategies. This concentrates all the nesting in one place.
// #if ANISOTROPIC_TILING_COMPAT_TEX2DLOD
// #ifdef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS
// #undef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS
// #endif
// #if ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE
// #undef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE
// #define ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE 0
// #endif
// #if ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
// #undef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
// #define ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES 0
// #endif
// #else
// #ifdef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS
// #if ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE
// #undef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE
// #define ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE 0
// #endif
// #if ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
// #undef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
// #define ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES 0
// #endif
// #else
// // ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE is only compatible with
// // flat texture coords in the same pass, but that's all we use.
// #if ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE
// #if ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
// #undef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
// #define ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES 0
// #endif
// #endif
// #endif
// #endif
// The tex2Dlod and tex2Dbias strategies share a lot in common, and we can
// reduce some #ifdef nesting in the next section by essentially OR'ing them:
// #if ANISOTROPIC_TILING_COMPAT_TEX2DLOD
// #define ANISOTROPIC_TILING_COMPAT_TEX2DLOD_FAMILY
// #endif
// #ifdef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS
// #define ANISOTROPIC_TILING_COMPAT_TEX2DLOD_FAMILY
// #endif
// Prioritize anisotropic resampling compatibility strategies the same way:
// #if ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD
// #ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS
// #undef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS
// #endif
// #endif
/////////////////////// DERIVED PHOSPHOR MASK CONSTANTS //////////////////////
// If we can use the large mipmapped LUT without mipmapping artifacts, we
// should: It gives us more options for using fewer samples.
// #if USE_LARGE_PHOSPHOR_MASK
// #if ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD
// // TODO: Take advantage of this!
// #define PHOSPHOR_MASK_RESIZE_MIPMAPPED_LUT
// static const float2 mask_resize_src_lut_size = mask_texture_large_size;
// #else
static const float2 mask_resize_src_lut_size = mask_texture_large_size;
// #endif
// #else
// static const float2 mask_resize_src_lut_size = mask_texture_small_size;
// #endif
static const float tile_aspect_inv = mask_resize_src_lut_size.y/mask_resize_src_lut_size.x;
// tex2D's sampler2D parameter MUST be a uniform global, a uniform input to
// main_fragment, or a static alias of one of the above. This makes it hard
// to select the phosphor mask at runtime: We can't even assign to a uniform
// global in the vertex shader or select a sampler2D in the vertex shader and
// pass it to the fragment shader (even with explicit TEXUNIT# bindings),
// because it just gives us the input texture or a black screen. However, we
// can get around these limitations by calling tex2D three times with different
// uniform samplers (or resizing the phosphor mask three times altogether).
// With dynamic branches, we can process only one of these branches on top of
// quickly discarding fragments we don't need (cgc seems able to overcome
// limigations around dependent texture fetches inside of branches). Without
// dynamic branches, we have to process every branch for every fragment...which
// is slower. Runtime sampling mode selection is slower without dynamic
// branches as well. Let the user's static #defines decide if it's worth it.
#if _DRIVERS_ALLOW_DYNAMIC_BRANCHES
#define _RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT
// #else
// #if FORCE_RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT
// #define _RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT
// #endif
#endif
// We need to render some minimum number of tiles in the resize passes.
// We need at least 1.0 just to repeat a single tile, and we need extra
// padding beyond that for anisotropic filtering, discontinuitity fixing,
// antialiasing, same-pass curvature (not currently used), etc. First
// determine how many border texels and tiles we need, based on how the result
// will be sampled:
#ifdef GEOMETRY_EARLY
static const float max_subpixel_offset = aa_subpixel_r_offset_static.x;
// Most antialiasing filters have a base radius of 4.0 pixels:
static const float max_aa_base_pixel_border = 4.0 +
max_subpixel_offset;
#else
static const float max_aa_base_pixel_border = 0.0;
#endif
// Anisotropic filtering adds about 0.5 to the pixel border:
// #ifndef ANISOTROPIC_TILING_COMPAT_TEX2DLOD_FAMILY
static const float max_aniso_pixel_border = max_aa_base_pixel_border + 0.5;
// #else
// static const float max_aniso_pixel_border = max_aa_base_pixel_border;
// #endif
// Fixing discontinuities adds 1.0 more to the pixel border:
// #if ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
// static const float max_tiled_pixel_border = max_aniso_pixel_border + 1.0;
// #else
static const float max_tiled_pixel_border = max_aniso_pixel_border;
// #endif
// Convert the pixel border to an integer texel border. Assume same-pass
// curvature about triples the texel frequency:
#ifdef GEOMETRY_EARLY
#define max_mask_texel_border macro_ceil(max_tiled_pixel_border * 3.0f)
#else
#define max_mask_texel_border macro_ceil(max_tiled_pixel_border)
#endif
// Convert the texel border to a tile border using worst-case assumptions:
static const float max_mask_tile_border = max_mask_texel_border/
(mask_min_allowed_triad_size * mask_triads_per_tile);
// Finally, set the number of resized tiles to render to MASK_RESIZE, and set
// the starting texel (inside borders) for sampling it.
#ifndef GEOMETRY_EARLY
// #if ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE
// Special case: Render two tiles without borders. Anisotropic
// filtering doesn't seem to be a problem here.
// static const float mask_resize_num_tiles = 1.0 + 1.0;
// static const float mask_start_texels = 0.0;
// #else
static const float mask_resize_num_tiles = 1.0 + 2.0 * max_mask_tile_border;
static const float mask_start_texels = max_mask_texel_border;
// #endif
#else
static const float mask_resize_num_tiles = 1.0 + 2.0*max_mask_tile_border;
static const float mask_start_texels = max_mask_texel_border;
#endif
// We have to fit mask_resize_num_tiles into an FBO with a viewport scale of
// mask_resize_viewport_scale. This limits the maximum final triad size.
// Estimate the minimum number of triads we can split the screen into in each
// dimension (we'll be as correct as mask_resize_viewport_scale is):
static const float mask_resize_num_triads = mask_resize_num_tiles * mask_triads_per_tile;
static const float2 min_allowed_viewport_triads =
float2(mask_resize_num_triads, mask_resize_num_triads) / mask_resize_viewport_scale;
#endif // _DERIVED_SETTINGS_AND_CONSTANTS_H

View file

@ -1,84 +0,0 @@
#ifndef _DOWNSAMPLING_FUNCTIONS_H
#define _DOWNSAMPLING_FUNCTIONS_H
///////////////////////////////// MIT LICENSE ////////////////////////////////
// Copyright (C) 2020 Alex Gunter <akg7634@gmail.com>
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
float3 opaque_linear_downsample(
const sampler2D tex,
const float2 texcoord,
const uint num_pairs,
const float2 delta_uv
) {
const uint total_num_samples = num_pairs * 2 + 1;
const float2 coord_left = texcoord - delta_uv * num_pairs;
float3 acc = 0;
for(int i = 0; i < total_num_samples; i++) {
const float2 coord = coord_left + i * delta_uv;
acc += tex2D_nograd(tex, coord).rgb;
}
return acc / total_num_samples;
}
float3 opaque_lanczos_downsample(
const sampler2D tex,
const float2 texcoord,
const uint num_pairs,
const float2 delta_uv,
const float num_sinc_lobes,
const float weight_at_center
) {
const uint total_num_samples = num_pairs * 2 + 1;
const float2 coord_left = texcoord - delta_uv * num_pairs;
const float sinc_dx = num_sinc_lobes / num_pairs; // 2 * num_sinc_lobes / (total_num_samples - 1)
float3 acc = 0;
float w_sum = 0;
for(int i = 0; i < total_num_samples; i++) {
const float2 coord = coord_left + i * delta_uv;
const float sinc_x = i * sinc_dx;
const float weight = (i != num_pairs) ?
num_sinc_lobes * sin(pi*sinc_x) * sin(pi*sinc_x/num_sinc_lobes) / (pi*pi * sinc_x*sinc_x) :
weight_at_center;
acc += weight * tex2D_nograd(tex, coord).rgb;
w_sum += weight;
}
return acc / w_sum;
}
float3 opaque_lanczos_downsample(
const sampler2D tex,
const float2 texcoord,
const uint num_pairs,
const float2 delta_uv,
const float num_sinc_lobes
) {
return opaque_lanczos_downsample(tex, texcoord, num_pairs, delta_uv, num_sinc_lobes, 1);
}
#endif // _DOWNSAMPLING_FUNCTIONS_H

View file

@ -1,225 +0,0 @@
#ifndef _GAMMA_MANAGEMENT_H
#define _GAMMA_MANAGEMENT_H
///////////////////////////////// MIT LICENSE ////////////////////////////////
// Copyright (C) 2014 TroggleMonkey
// Copyright (C) 2020 Alex Gunter
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
#include "helper-functions-and-macros.fxh"
/////////////////////////////// BASE CONSTANTS ///////////////////////////////
// Set standard gamma constants, but allow users to override them:
#ifndef OVERRIDE_STANDARD_GAMMA
// Standard encoding gammas:
static const float ntsc_gamma = 2.2; // Best to use NTSC for PAL too?
static const float pal_gamma = 2.8; // Never actually 2.8 in practice
// Typical device decoding gammas (only use for emulating devices):
// CRT/LCD reference gammas are higher than NTSC and Rec.709 video standard
// gammas: The standards purposely undercorrected for an analog CRT's
// assumed 2.5 reference display gamma to maintain contrast in assumed
// [dark] viewing conditions: http://www.poynton.com/PDFs/GammaFAQ.pdf
// These unstated assumptions about display gamma and perceptual rendering
// intent caused a lot of confusion, and more modern CRT's seemed to target
// NTSC 2.2 gamma with circuitry. LCD displays seem to have followed suit
// (they struggle near black with 2.5 gamma anyway), especially PC/laptop
// displays designed to view sRGB in bright environments. (Standards are
// also in flux again with BT.1886, but it's underspecified for displays.)
static const float crt_reference_gamma_high = 2.5; // In (2.35, 2.55)
static const float crt_reference_gamma_low = 2.35; // In (2.35, 2.55)
static const float lcd_reference_gamma = 2.5; // To match CRT
static const float crt_office_gamma = 2.2; // Circuitry-adjusted for NTSC
static const float lcd_office_gamma = 2.2; // Approximates sRGB
#endif // OVERRIDE_STANDARD_GAMMA
// Assuming alpha == 1.0 might make it easier for users to avoid some bugs,
// but only if they're aware of it.
#ifndef OVERRIDE_ALPHA_ASSUMPTIONS
static const bool assume_opaque_alpha = false;
#endif
/////////////////////// DERIVED CONSTANTS AS FUNCTIONS ///////////////////////
// gamma-management.h should be compatible with overriding gamma values with
// runtime user parameters, but we can only define other global constants in
// terms of static constants, not uniform user parameters. To get around this
// limitation, we need to define derived constants using functions.
// Set device gamma constants, but allow users to override them:
#if _OVERRIDE_DEVICE_GAMMA
// The user promises to globally define the appropriate constants:
float get_crt_gamma() { return crt_gamma; }
float get_gba_gamma() { return gba_gamma; }
float get_lcd_gamma() { return lcd_gamma; }
#else
float get_crt_gamma() { return crt_reference_gamma_high; }
float get_gba_gamma() { return 3.5; } // Game Boy Advance; in (3.0, 4.0)
float get_lcd_gamma() { return lcd_office_gamma; }
#endif // _OVERRIDE_DEVICE_GAMMA
// Set decoding/encoding gammas for the first/lass passes, but allow overrides:
#ifdef OVERRIDE_FINAL_GAMMA
// The user promises to globally define the appropriate constants:
float get_intermediate_gamma() { return intermediate_gamma; }
float get_input_gamma() { return input_gamma; }
float get_output_gamma() { return output_gamma; }
#else
// If we gamma-correct every pass, always use ntsc_gamma between passes to
// ensure middle passes don't need to care if anything is being simulated:
// TODO: Figure out the correct way to configure this now that intermediate
// FBOs all use get_intermediate_gamma() directly. Also refer to the
// original code to confirm when a shader uses ntsc_gamma despite
// GAMMA_ENCODE_EVERY_FBO being undefined.
// float get_intermediate_gamma() { return ntsc_gamma; }
float get_intermediate_gamma() { return 1.0; }
#if GAMMA_SIMULATION_MODE == _SIMULATE_CRT_ON_LCD
float get_input_gamma() { return get_crt_gamma(); }
float get_output_gamma() { return get_lcd_gamma(); }
#else
#if GAMMA_SIMULATION_MODE == _SIMULATE_GBA_ON_LCD
float get_input_gamma() { return get_gba_gamma(); }
float get_output_gamma() { return get_lcd_gamma(); }
#else
#if GAMMA_SIMULATION_MODE == _SIMULATE_LCD_ON_CRT
float get_input_gamma() { return get_lcd_gamma(); }
float get_output_gamma() { return get_crt_gamma(); }
#else
#if GAMMA_SIMULATION_MODE == _SIMULATE_GBA_ON_CRT
float get_input_gamma() { return get_gba_gamma(); }
float get_output_gamma() { return get_crt_gamma(); }
#else // Don't simulate anything:
float get_input_gamma() { return ntsc_gamma; }
float get_output_gamma() { return ntsc_gamma; }
#endif // _SIMULATE_GBA_ON_CRT
#endif // _SIMULATE_LCD_ON_CRT
#endif // _SIMULATE_GBA_ON_LCD
#endif // _SIMULATE_CRT_ON_LCD
#endif // OVERRIDE_FINAL_GAMMA
// Set decoding/encoding gammas for the current pass. Use static constants for
// linearize_input and gamma_encode_output, because they aren't derived, and
// they let the compiler do dead-code elimination.
// #ifndef GAMMA_ENCODE_EVERY_FBO
// #ifdef FIRST_PASS
// static const bool linearize_input = true;
// float get_pass_input_gamma() { return get_input_gamma(); }
// #else
// static const bool linearize_input = false;
// float get_pass_input_gamma() { return 1.0; }
// #endif
// #ifdef LAST_PASS
// static const bool gamma_encode_output = true;
// float get_pass_output_gamma() { return get_output_gamma(); }
// #else
// static const bool gamma_encode_output = false;
// float get_pass_output_gamma() { return 1.0; }
// #endif
// #else
// static const bool linearize_input = true;
// static const bool gamma_encode_output = true;
// #ifdef FIRST_PASS
// float get_pass_input_gamma() { return get_input_gamma(); }
// #else
// float get_pass_input_gamma() { return get_intermediate_gamma(); }
// #endif
// #ifdef LAST_PASS
// float get_pass_output_gamma() { return get_output_gamma(); }
// #else
// float get_pass_output_gamma() { return get_intermediate_gamma(); }
// #endif
// #endif
// Users might want to know if bilinear filtering will be gamma-correct:
// static const bool gamma_aware_bilinear = !linearize_input;
////////////////////// COLOR ENCODING/DECODING FUNCTIONS /////////////////////
float4 encode_output_opaque(const float4 color, const float gamma)
{
static const float3 g = 1.0 / float3(gamma, gamma, gamma);
return float4(pow(color.rgb, g), 1);
}
float4 decode_input_opaque(const float4 color, const float gamma)
{
static const float3 g = float3(gamma, gamma, gamma);
return float4(pow(color.rgb, g), 1);
}
float4 encode_output(const float4 color, const float gamma)
{
static const float3 g = 1.0 / float3(gamma, gamma, gamma);
return float4(pow(color.rgb, g), color.a);
}
float4 decode_input(const float4 color, const float gamma)
{
static const float3 g = float3(gamma, gamma, gamma);
return float4(pow(color.rgb, g), color.a);
}
/////////////////////////// TEXTURE LOOKUP WRAPPERS //////////////////////////
// "SMART" LINEARIZING TEXTURE LOOKUP FUNCTIONS:
// Provide a wide array of linearizing texture lookup wrapper functions. The
// Cg shader spec Retroarch uses only allows for 2D textures, but 1D and 3D
// lookups are provided for completeness in case that changes someday. Nobody
// is likely to use the *fetch and *proj functions, but they're included just
// in case. The only tex*D texture sampling functions omitted are:
// - tex*Dcmpbias
// - tex*Dcmplod
// - tex*DARRAY*
// - tex*DMS*
// - Variants returning integers
// Standard line length restrictions are ignored below for vertical brevity.
// tex2D:
float4 tex2D_linearize(const sampler2D tex, const float2 tex_coords, const float gamma)
{ return decode_input(tex2D(tex, tex_coords), gamma); }
float4 tex2D_linearize(const sampler2D tex, const float3 tex_coords, const float gamma)
{ return decode_input(tex2D(tex, tex_coords.xy), gamma); }
// float4 tex2D_linearize(const sampler2D tex, const float2 tex_coords, const int texel_off, const float gamma)
// { return decode_input(tex2Dlod(tex, float4(tex_coords.x, tex_coords.y, 0, 0), texel_off), gamma); }
// float4 tex2D_linearize(const sampler2D tex, const float3 tex_coords, const int texel_off, const float gamma)
// { return decode_input(tex2Dlod(tex, float4(tex_coords.x, tex_coords.y, 0, 0), texel_off), gamma); }
// tex2Dlod:
float4 tex2Dlod_linearize(const sampler2D tex, const float2 tex_coords, const float gamma)
{ return decode_input(tex2Dlod(tex, float4(tex_coords, 0, 0), 0.0), gamma); }
float4 tex2Dlod_linearize(const sampler2D tex, const float4 tex_coords, const float gamma)
{ return decode_input(tex2Dlod(tex, float4(tex_coords.xy, 0, 0), 0.0), gamma); }
// float4 tex2Dlod_linearize(const sampler2D tex, const float4 tex_coords, const int texel_off, const float gamma)
// { return decode_input(tex2Dlod(tex, float4(tex_coords.x, tex_coords.y, 0, 0), texel_off), gamma); }
#endif // _GAMMA_MANAGEMENT_H

View file

@ -1,715 +0,0 @@
#ifndef _GEOMETRY_FUNCTIONS_H
#define _GEOMETRY_FUNCTIONS_H
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
////////////////////////////////// INCLUDES //////////////////////////////////
#include "user-settings.fxh"
#include "derived-settings-and-constants.fxh"
#include "bind-shader-params.fxh"
//////////////////////////// MACROS AND CONSTANTS ////////////////////////////
// Curvature-related constants:
#define MAX_POINT_CLOUD_SIZE 9
///////////////////////////// CURVATURE FUNCTIONS /////////////////////////////
float2 quadratic_solve(const float a, const float b_over_2, const float c)
{
// Requires: 1.) a, b, and c are quadratic formula coefficients
// 2.) b_over_2 = b/2.0 (simplifies terms to factor 2 out)
// 3.) b_over_2 must be guaranteed < 0.0 (avoids a branch)
// Returns: Returns float2(first_solution, discriminant), so the caller
// can choose how to handle the "no intersection" case. The
// Kahan or Citardauq formula is used for numerical robustness.
const float discriminant = b_over_2*b_over_2 - a*c;
const float solution0 = c/(-b_over_2 + sqrt(discriminant));
return float2(solution0, discriminant);
}
float2 intersect_sphere(const float3 view_vec, const float3 eye_pos_vec)
{
// Requires: 1.) view_vec and eye_pos_vec are 3D vectors in the sphere's
// local coordinate frame (eye_pos_vec is a position, i.e.
// a vector from the origin to the eye/camera)
// 2.) geom_radius is a global containing the sphere's radius
// Returns: Cast a ray of direction view_vec from eye_pos_vec at a
// sphere of radius geom_radius, and return the distance to
// the first intersection in units of length(view_vec).
// http://wiki.cgsociety.org/index.php/Ray_Sphere_Intersection
// Quadratic formula coefficients (b_over_2 is guaranteed negative):
const float a = dot(view_vec, view_vec);
const float b_over_2 = dot(view_vec, eye_pos_vec); // * 2.0 factored out
const float c = dot(eye_pos_vec, eye_pos_vec) - geom_radius*geom_radius;
return quadratic_solve(a, b_over_2, c);
}
float2 intersect_cylinder(const float3 view_vec, const float3 eye_pos_vec)
{
// Requires: 1.) view_vec and eye_pos_vec are 3D vectors in the sphere's
// local coordinate frame (eye_pos_vec is a position, i.e.
// a vector from the origin to the eye/camera)
// 2.) geom_radius is a global containing the cylinder's radius
// Returns: Cast a ray of direction view_vec from eye_pos_vec at a
// cylinder of radius geom_radius, and return the distance to
// the first intersection in units of length(view_vec). The
// derivation of the coefficients is in Christer Ericson's
// Real-Time Collision Detection, p. 195-196, and this version
// uses LaGrange's identity to reduce operations.
// Arbitrary "cylinder top" reference point for an infinite cylinder:
const float3 cylinder_top_vec = float3(0.0, geom_radius, 0.0);
const float3 cylinder_axis_vec = float3(0.0, 1.0, 0.0);//float3(0.0, 2.0*geom_radius, 0.0);
const float3 top_to_eye_vec = eye_pos_vec - cylinder_top_vec;
const float3 axis_x_view = cross(cylinder_axis_vec, view_vec);
const float3 axis_x_top_to_eye = cross(cylinder_axis_vec, top_to_eye_vec);
// Quadratic formula coefficients (b_over_2 is guaranteed negative):
const float a = dot(axis_x_view, axis_x_view);
const float b_over_2 = dot(axis_x_top_to_eye, axis_x_view);
const float c = dot(axis_x_top_to_eye, axis_x_top_to_eye) -
geom_radius*geom_radius;//*dot(cylinder_axis_vec, cylinder_axis_vec);
return quadratic_solve(a, b_over_2, c);
}
float2 cylinder_xyz_to_uv(const float3 intersection_pos_local,
const float2 geom_aspect)
{
// Requires: An xyz intersection position on a cylinder.
// Returns: video_uv coords mapped to range [-0.5, 0.5]
// Mapping: Define square_uv.x to be the signed arc length in xz-space,
// and define square_uv.y = -intersection_pos_local.y (+v = -y).
// Start with a numerically robust arc length calculation.
const float angle_from_image_center = atan2(intersection_pos_local.x,
intersection_pos_local.z);
const float signed_arc_len = angle_from_image_center * geom_radius;
// Get a uv-mapping where [-0.5, 0.5] maps to a "square" area, then divide
// by the aspect ratio to stretch the mapping appropriately:
const float2 square_uv = float2(signed_arc_len, -intersection_pos_local.y);
const float2 video_uv = square_uv / geom_aspect;
return video_uv;
}
float3 cylinder_uv_to_xyz(const float2 video_uv, const float2 geom_aspect)
{
// Requires: video_uv coords mapped to range [-0.5, 0.5]
// Returns: An xyz intersection position on a cylinder. This is the
// inverse of cylinder_xyz_to_uv().
// Expand video_uv by the aspect ratio to get proportionate x/y lengths,
// then calculate an xyz position for the cylindrical mapping above.
const float2 square_uv = video_uv * geom_aspect;
const float arc_len = square_uv.x;
const float angle_from_image_center = arc_len / geom_radius;
const float x_pos = sin(angle_from_image_center) * geom_radius;
const float z_pos = cos(angle_from_image_center) * geom_radius;
// Or: z = sqrt(geom_radius**2 - x**2)
// Or: z = geom_radius/sqrt(1.0 + tan(angle)**2), x = z * tan(angle)
const float3 intersection_pos_local = float3(x_pos, -square_uv.y, z_pos);
return intersection_pos_local;
}
float2 sphere_xyz_to_uv(const float3 intersection_pos_local,
const float2 geom_aspect)
{
// Requires: An xyz intersection position on a sphere.
// Returns: video_uv coords mapped to range [-0.5, 0.5]
// Mapping: First define square_uv.x/square_uv.y ==
// intersection_pos_local.x/intersection_pos_local.y. Then,
// length(square_uv) is the arc length from the image center
// at (0.0, 0.0, geom_radius) along the tangent great circle.
// Credit for this mapping goes to cgwg: I never managed to
// understand his code, but he told me his mapping was based on
// great circle distances when I asked him about it, which
// informed this very similar (almost identical) mapping.
// Start with a numerically robust arc length calculation between the ray-
// sphere intersection point and the image center using a method posted by
// Roger Stafford on comp.soft-sys.matlab:
// https://groups.google.com/d/msg/comp.soft-sys.matlab/zNbUui3bjcA/c0HV_bHSx9cJ
const float3 image_center_pos_local = float3(0.0, 0.0, geom_radius);
const float cp_len =
length(cross(intersection_pos_local, image_center_pos_local));
const float dp = dot(intersection_pos_local, image_center_pos_local);
const float angle_from_image_center = atan2(cp_len, dp);
const float arc_len = angle_from_image_center * geom_radius;
// Get a uv-mapping where [-0.5, 0.5] maps to a "square" area, then divide
// by the aspect ratio to stretch the mapping appropriately:
const float2 square_uv_unit = normalize(float2(intersection_pos_local.x,
-intersection_pos_local.y));
const float2 square_uv = arc_len * square_uv_unit;
const float2 video_uv = square_uv / geom_aspect;
return video_uv;
}
float3 sphere_uv_to_xyz(const float2 video_uv, const float2 geom_aspect)
{
// Requires: video_uv coords mapped to range [-0.5, 0.5]
// Returns: An xyz intersection position on a sphere. This is the
// inverse of sphere_xyz_to_uv().
// Expand video_uv by the aspect ratio to get proportionate x/y lengths,
// then calculate an xyz position for the spherical mapping above.
if (video_uv.x != 0 && video_uv.y != 0) {
const float2 square_uv = video_uv * geom_aspect;
// Using length or sqrt here butchers the framerate on my 8800GTS if
// this function is called too many times, and so does taking the max
// component of square_uv/square_uv_unit (program length threshold?).
//float arc_len = length(square_uv);
const float2 square_uv_unit = normalize(square_uv);
const float arc_len = square_uv.y/square_uv_unit.y;
const float angle_from_image_center = arc_len / geom_radius;
const float xy_dist_from_sphere_center =
sin(angle_from_image_center) * geom_radius;
//float2 xy_pos = xy_dist_from_sphere_center * (square_uv/FIX_ZERO(arc_len));
const float2 xy_pos = xy_dist_from_sphere_center * square_uv_unit;
const float z_pos = cos(angle_from_image_center) * geom_radius;
const float3 intersection_pos_local = float3(xy_pos.x, -xy_pos.y, z_pos);
return intersection_pos_local;
}
else if (video_uv.x != 0) {
const float2 square_uv = video_uv * geom_aspect;
// Using length or sqrt here butchers the framerate on my 8800GTS if
// this function is called too many times, and so does taking the max
// component of square_uv/square_uv_unit (program length threshold?).
//float arc_len = length(square_uv);
const float2 square_uv_unit = normalize(square_uv);
const float angle_from_image_center = 0;
const float xy_dist_from_sphere_center = sin(angle_from_image_center) * geom_radius;
const float2 xy_pos = xy_dist_from_sphere_center * square_uv_unit;
const float z_pos = cos(angle_from_image_center) * geom_radius;
const float3 intersection_pos_local = float3(xy_pos.x, -xy_pos.y, z_pos);
return intersection_pos_local;
}
else {
const float2 xy_pos = float2(0, 0);
const float z_pos = geom_radius;
const float3 intersection_pos_local = float3(xy_pos.x, -xy_pos.y, z_pos);
return intersection_pos_local;
}
}
float2 sphere_alt_xyz_to_uv(const float3 intersection_pos_local,
const float2 geom_aspect)
{
// Requires: An xyz intersection position on a cylinder.
// Returns: video_uv coords mapped to range [-0.5, 0.5]
// Mapping: Define square_uv.x to be the signed arc length in xz-space,
// and define square_uv.y == signed arc length in yz-space.
// See cylinder_xyz_to_uv() for implementation details (very similar).
const float2 angle_from_image_center = atan2(
float2(intersection_pos_local.x, -intersection_pos_local.y),
intersection_pos_local.zz);
const float2 signed_arc_len = angle_from_image_center * geom_radius;
const float2 video_uv = signed_arc_len / geom_aspect;
return video_uv;
}
float3 sphere_alt_uv_to_xyz(const float2 video_uv, const float2 geom_aspect)
{
// Requires: video_uv coords mapped to range [-0.5, 0.5]
// Returns: An xyz intersection position on a sphere. This is the
// inverse of sphere_alt_xyz_to_uv().
// See cylinder_uv_to_xyz() for implementation details (very similar).
const float2 square_uv = video_uv * geom_aspect;
const float2 arc_len = square_uv;
const float2 angle_from_image_center = arc_len / geom_radius;
const float2 xy_pos = sin(angle_from_image_center) * geom_radius;
const float z_pos = sqrt(geom_radius*geom_radius - dot(xy_pos, xy_pos));
return float3(xy_pos.x, -xy_pos.y, z_pos);
}
float2 intersect(const float3 view_vec_local, const float3 eye_pos_local,
const float geom_mode)
{
return geom_mode < 2.5 ? intersect_sphere(view_vec_local, eye_pos_local) :
intersect_cylinder(view_vec_local, eye_pos_local);
}
float2 xyz_to_uv(const float3 intersection_pos_local,
const float2 geom_aspect, const float geom_mode)
{
return geom_mode < 1.5 ?
sphere_xyz_to_uv(intersection_pos_local, geom_aspect) :
geom_mode < 2.5 ?
sphere_alt_xyz_to_uv(intersection_pos_local, geom_aspect) :
cylinder_xyz_to_uv(intersection_pos_local, geom_aspect);
}
float3 uv_to_xyz(const float2 uv, const float2 geom_aspect,
const float geom_mode)
{
return geom_mode < 1.5 ? sphere_uv_to_xyz(uv, geom_aspect) :
geom_mode < 2.5 ? sphere_alt_uv_to_xyz(uv, geom_aspect) :
cylinder_uv_to_xyz(uv, geom_aspect);
}
float2 view_vec_to_uv(const float3 view_vec_local, const float3 eye_pos_local,
const float2 geom_aspect, const float geom_mode, out float3 intersection_pos)
{
// Get the intersection point on the primitive, given an eye position
// and view vector already in its local coordinate frame:
const float2 intersect_dist_and_discriminant = intersect(view_vec_local,
eye_pos_local, geom_mode);
const float3 intersection_pos_local = eye_pos_local +
view_vec_local * intersect_dist_and_discriminant.x;
// Save the intersection position to an output parameter:
intersection_pos = intersection_pos_local;
// Transform into uv coords, but give out-of-range coords if the
// view ray doesn't intersect the primitive in the first place:
return intersect_dist_and_discriminant.y > 0.005 ?
xyz_to_uv(intersection_pos_local, geom_aspect, geom_mode) : float2(1.0, 1.0);
}
float3 get_ideal_global_eye_pos_for_points(float3 eye_pos,
const float2 geom_aspect, const float3 global_coords[MAX_POINT_CLOUD_SIZE],
const int num_points)
{
// Requires: Parameters:
// 1.) Starting eye_pos is a global 3D position at which the
// camera contains all points in global_coords[] in its FOV
// 2.) geom_aspect = get_aspect_vector(
// IN.output_size.x / IN.output_size.y);
// 3.) global_coords is a point cloud containing global xyz
// coords of extreme points on the simulated CRT screen.
// Globals:
// 1.) geom_view_dist must be > 0.0. It controls the "near
// plane" used to interpret flat_video_uv as a view
// vector, which controls the field of view (FOV).
// Eyespace coordinate frame: +x = right, +y = up, +z = back
// Returns: Return an eye position at which the point cloud spans as
// much of the screen as possible (given the FOV controlled by
// geom_view_dist) without being cropped or sheared.
// Algorithm:
// 1.) Move the eye laterally to a point which attempts to maximize the
// the amount we can move forward without clipping the CRT screen.
// 2.) Move forward by as much as possible without clipping the CRT.
// Get the allowed movement range by solving for the eye_pos offsets
// that result in each point being projected to a screen edge/corner in
// pseudo-normalized device coords (where xy ranges from [-0.5, 0.5]
// and z = eyespace z):
// pndc_coord = float3(float2(eyespace_xyz.x, -eyespace_xyz.y)*
// geom_view_dist / (geom_aspect * -eyespace_xyz.z), eyespace_xyz.z);
// Notes:
// The field of view is controlled by geom_view_dist's magnitude relative to
// the view vector's x and y components:
// view_vec.xy ranges from [-0.5, 0.5] * geom_aspect
// view_vec.z = -geom_view_dist
// But for the purposes of perspective divide, it should be considered:
// view_vec.xy ranges from [-0.5, 0.5] * geom_aspect / geom_view_dist
// view_vec.z = -1.0
static const int max_centering_iters = 1; // Keep for easy testing.
for(int iter = 0; iter < max_centering_iters; iter++)
{
// 0.) Get the eyespace coordinates of our point cloud:
float3 eyespace_coords[MAX_POINT_CLOUD_SIZE];
for(int i = 0; i < num_points; i++)
{
eyespace_coords[i] = global_coords[i] - eye_pos;
}
// 1a.)For each point, find out how far we can move eye_pos in each
// lateral direction without the point clipping the frustum.
// Eyespace +y = up, screenspace +y = down, so flip y after
// applying the eyespace offset (on the way to "clip space").
// Solve for two offsets per point based on:
// (eyespace_xyz.xy - offset_dr) * float2(1.0, -1.0) *
// geom_view_dist / (geom_aspect * -eyespace_xyz.z) = float2(-0.5)
// (eyespace_xyz.xy - offset_dr) * float2(1.0, -1.0) *
// geom_view_dist / (geom_aspect * -eyespace_xyz.z) = float2(0.5)
// offset_ul and offset_dr represent the farthest we can move the
// eye_pos up-left and down-right. Save the min of all offset_dr's
// and the max of all offset_ul's (since it's negative).
float abs_radius = abs(geom_radius); // In case anyone gets ideas. ;)
float2 offset_dr_min = float2(10.0 * abs_radius, 10.0 * abs_radius);
float2 offset_ul_max = float2(-10.0 * abs_radius, -10.0 * abs_radius);
for(int i = 0; i < num_points; i++)
{
static const float2 flipy = float2(1.0, -1.0);
float3 eyespace_xyz = eyespace_coords[i];
float2 offset_dr = eyespace_xyz.xy - float2(-0.5, -0.5) *
(geom_aspect * -eyespace_xyz.z) / (geom_view_dist * flipy);
float2 offset_ul = eyespace_xyz.xy - float2(0.5, 0.5) *
(geom_aspect * -eyespace_xyz.z) / (geom_view_dist * flipy);
offset_dr_min = min(offset_dr_min, offset_dr);
offset_ul_max = max(offset_ul_max, offset_ul);
}
// 1b.)Update eye_pos: Adding the average of offset_ul_max and
// offset_dr_min gives it equal leeway on the top vs. bottom
// and left vs. right. Recalculate eyespace_coords accordingly.
float2 center_offset = 0.5 * (offset_ul_max + offset_dr_min);
eye_pos.xy += center_offset;
for(int i = 0; i < num_points; i++)
{
eyespace_coords[i] = global_coords[i] - eye_pos;
}
// 2a.)For each point, find out how far we can move eye_pos forward
// without the point clipping the frustum. Flip the y
// direction in advance (matters for a later step, not here).
// Solve for four offsets per point based on:
// eyespace_xyz_flipy.x * geom_view_dist /
// (geom_aspect.x * (offset_z - eyespace_xyz_flipy.z)) =-0.5
// eyespace_xyz_flipy.y * geom_view_dist /
// (geom_aspect.y * (offset_z - eyespace_xyz_flipy.z)) =-0.5
// eyespace_xyz_flipy.x * geom_view_dist /
// (geom_aspect.x * (offset_z - eyespace_xyz_flipy.z)) = 0.5
// eyespace_xyz_flipy.y * geom_view_dist /
// (geom_aspect.y * (offset_z - eyespace_xyz_flipy.z)) = 0.5
// We'll vectorize the actual computation. Take the maximum of
// these four for a single offset, and continue taking the max
// for every point (use max because offset.z is negative).
float offset_z_max = -10.0 * geom_radius * geom_view_dist;
for(int i = 0; i < num_points; i++)
{
float3 eyespace_xyz_flipy = eyespace_coords[i] *
float3(1.0, -1.0, 1.0);
float4 offset_zzzz = eyespace_xyz_flipy.zzzz +
(eyespace_xyz_flipy.xyxy * geom_view_dist) /
(float4(-0.5, -0.5, 0.5, 0.5) * float4(geom_aspect, geom_aspect));
// Ignore offsets that push positive x/y values to opposite
// boundaries, and vice versa, and don't let the camera move
// past a point in the dead center of the screen:
offset_z_max = (eyespace_xyz_flipy.x < 0.0) ?
max(offset_z_max, offset_zzzz.x) : offset_z_max;
offset_z_max = (eyespace_xyz_flipy.y < 0.0) ?
max(offset_z_max, offset_zzzz.y) : offset_z_max;
offset_z_max = (eyespace_xyz_flipy.x > 0.0) ?
max(offset_z_max, offset_zzzz.z) : offset_z_max;
offset_z_max = (eyespace_xyz_flipy.y > 0.0) ?
max(offset_z_max, offset_zzzz.w) : offset_z_max;
offset_z_max = max(offset_z_max, eyespace_xyz_flipy.z);
}
// 2b.)Update eye_pos: Add the maximum (smallest negative) z offset.
eye_pos.z += offset_z_max;
}
return eye_pos;
}
float3 get_ideal_global_eye_pos(const float3x3 local_to_global,
const float2 geom_aspect, const float geom_mode)
{
// Start with an initial eye_pos that includes the entire primitive
// (sphere or cylinder) in its field-of-view:
const float3 high_view = float3(0.0, geom_aspect.y, -geom_view_dist);
const float3 low_view = high_view * float3(1.0, -1.0, 1.0);
const float len_sq = dot(high_view, high_view);
const float fov = abs(acos(dot(high_view, low_view)/len_sq));
// Trigonometry/similar triangles say distance = geom_radius/sin(fov/2):
const float eye_z_spherical = geom_radius/sin(fov*0.5);
const float3 eye_pos = geom_mode < 2.5 ?
float3(0.0, 0.0, eye_z_spherical) :
float3(0.0, 0.0, max(geom_view_dist, eye_z_spherical));
// Get global xyz coords of extreme sample points on the simulated CRT
// screen. Start with the center, edge centers, and corners of the
// video image. We can't ignore backfacing points: They're occluded
// by closer points on the primitive, but they may NOT be occluded by
// the convex hull of the remaining samples (i.e. the remaining convex
// hull might not envelope points that do occlude a back-facing point.)
static const int num_points = MAX_POINT_CLOUD_SIZE;
float3 global_coords[MAX_POINT_CLOUD_SIZE];
global_coords[0] = mul(local_to_global, uv_to_xyz(float2(0.0, 0.0), geom_aspect, geom_mode));
global_coords[1] = mul(local_to_global, uv_to_xyz(float2(0.0, -0.5), geom_aspect, geom_mode));
global_coords[2] = mul(local_to_global, uv_to_xyz(float2(0.0, 0.5), geom_aspect, geom_mode));
global_coords[3] = mul(local_to_global, uv_to_xyz(float2(-0.5, 0.0), geom_aspect, geom_mode));
global_coords[4] = mul(local_to_global, uv_to_xyz(float2(0.5, 0.0), geom_aspect, geom_mode));
global_coords[5] = mul(local_to_global, uv_to_xyz(float2(-0.5, -0.5), geom_aspect, geom_mode));
global_coords[6] = mul(local_to_global, uv_to_xyz(float2(0.5, -0.5), geom_aspect, geom_mode));
global_coords[7] = mul(local_to_global, uv_to_xyz(float2(-0.5, 0.5), geom_aspect, geom_mode));
global_coords[8] = mul(local_to_global, uv_to_xyz(float2(0.5, 0.5), geom_aspect, geom_mode));
// Adding more inner image points could help in extreme cases, but too many
// points will kille the framerate. For safety, default to the initial
// eye_pos if any z coords are negative:
float num_negative_z_coords = 0.0;
for(int i = 0; i < num_points; i++)
{
num_negative_z_coords += float(global_coords[0].z < 0.0);
}
// Outsource the optimized eye_pos calculation:
return num_negative_z_coords > 0.5 ? eye_pos :
get_ideal_global_eye_pos_for_points(eye_pos, geom_aspect,
global_coords, num_points);
}
float3x3 get_pixel_to_object_matrix(const float3x3 global_to_local,
const float3 eye_pos_local, const float3 view_vec_global,
const float3 intersection_pos_local, const float3 normal,
const float2 output_size_inv)
{
// Requires: See get_curved_video_uv_coords_and_tangent_matrix for
// descriptions of each parameter.
// Returns: Return a transformation matrix from 2D pixel-space vectors
// (where (+1.0, +1.0) is a vector to one pixel down-right,
// i.e. same directionality as uv texels) to 3D object-space
// vectors in the CRT's local coordinate frame (right-handed)
// ***which are tangent to the CRT surface at the intersection
// position.*** (Basically, we want to convert pixel-space
// vectors to 3D vectors along the CRT's surface, for later
// conversion to uv vectors.)
// Shorthand inputs:
const float3 pos = intersection_pos_local;
const float3 eye_pos = eye_pos_local;
// Get a piecewise-linear matrix transforming from "pixelspace" offset
// vectors (1.0 = one pixel) to object space vectors in the tangent
// plane (faster than finding 3 view-object intersections).
// 1.) Get the local view vecs for the pixels to the right and down:
const float3 view_vec_right_global = view_vec_global +
float3(output_size_inv.x, 0.0, 0.0);
const float3 view_vec_down_global = view_vec_global +
float3(0.0, -output_size_inv.y, 0.0);
const float3 view_vec_right_local =
mul(global_to_local, view_vec_right_global);
const float3 view_vec_down_local =
mul(global_to_local, view_vec_down_global);
// 2.) Using the true intersection point, intersect the neighboring
// view vectors with the tangent plane:
const float3 intersection_vec_dot_normal = float3(dot(pos - eye_pos, normal), dot(pos - eye_pos, normal), dot(pos - eye_pos, normal));
const float3 right_pos = eye_pos + (intersection_vec_dot_normal /
dot(view_vec_right_local, normal))*view_vec_right_local;
const float3 down_pos = eye_pos + (intersection_vec_dot_normal /
dot(view_vec_down_local, normal))*view_vec_down_local;
// 3.) Subtract the original intersection pos from its neighbors; the
// resulting vectors are object-space vectors tangent to the plane.
// These vectors are the object-space transformations of (1.0, 0.0)
// and (0.0, 1.0) pixel offsets, so they form the first two basis
// vectors of a pixelspace to object space transformation. This
// transformation is 2D to 3D, so use (0, 0, 0) for the third vector.
const float3 object_right_vec = right_pos - pos;
const float3 object_down_vec = down_pos - pos;
const float3x3 pixel_to_object = float3x3(
object_right_vec.x, object_down_vec.x, 0.0,
object_right_vec.y, object_down_vec.y, 0.0,
object_right_vec.z, object_down_vec.z, 0.0);
return pixel_to_object;
}
float3x3 get_object_to_tangent_matrix(const float3 intersection_pos_local,
const float3 normal, const float2 geom_aspect, const float geom_mode)
{
// Requires: See get_curved_video_uv_coords_and_tangent_matrix for
// descriptions of each parameter.
// Returns: Return a transformation matrix from 3D object-space vectors
// in the CRT's local coordinate frame (right-handed, +y = up)
// to 2D video_uv vectors (+v = down).
// Description:
// The TBN matrix formed by the [tangent, bitangent, normal] basis
// vectors transforms ordinary vectors from tangent->object space.
// The cotangent matrix formed by the [cotangent, cobitangent, normal]
// basis vectors transforms normal vectors (covectors) from
// tangent->object space. It's the inverse-transpose of the TBN matrix.
// We want the inverse of the TBN matrix (transpose of the cotangent
// matrix), which transforms ordinary vectors from object->tangent space.
// Start by calculating the relevant basis vectors in accordance with
// Christian Schüler's blog post "Followup: Normal Mapping Without
// Precomputed Tangents": http://www.thetenthplanet.de/archives/1180
// With our particular uv mapping, the scale of the u and v directions
// is determined entirely by the aspect ratio for cylindrical and ordinary
// spherical mappings, and so tangent and bitangent lengths are also
// determined by it (the alternate mapping is more complex). Therefore, we
// must ensure appropriate cotangent and cobitangent lengths as well.
// Base these off the uv<=>xyz mappings for each primitive.
const float3 pos = intersection_pos_local;
static const float3 x_vec = float3(1.0, 0.0, 0.0);
static const float3 y_vec = float3(0.0, 1.0, 0.0);
// The tangent and bitangent vectors correspond with increasing u and v,
// respectively. Mathematically we'd base the cotangent/cobitangent on
// those, but we'll compute the cotangent/cobitangent directly when we can.
float3 cotangent_unscaled, cobitangent_unscaled;
// geom_mode should be constant-folded without _RUNTIME_GEOMETRY_MODE.
if(geom_mode < 1.5)
{
// Sphere:
// tangent = normalize(cross(normal, cross(x_vec, pos))) * geom_aspect.x
// bitangent = normalize(cross(cross(y_vec, pos), normal)) * geom_aspect.y
// inv_determinant = 1.0/length(cross(bitangent, tangent))
// cotangent = cross(normal, bitangent) * inv_determinant
// == normalize(cross(y_vec, pos)) * geom_aspect.y * inv_determinant
// cobitangent = cross(tangent, normal) * inv_determinant
// == normalize(cross(x_vec, pos)) * geom_aspect.x * inv_determinant
// Simplified (scale by inv_determinant below):
cotangent_unscaled = normalize(cross(y_vec, pos)) * geom_aspect.y;
cobitangent_unscaled = normalize(cross(x_vec, pos)) * geom_aspect.x;
}
else if(geom_mode < 2.5)
{
// Sphere, alternate mapping:
// This mapping works a bit like the cylindrical mapping in two
// directions, which makes the lengths and directions more complex.
// Unfortunately, I can't find much of a shortcut:
const float3 tangent = normalize(
cross(y_vec, float3(pos.x, 0.0, pos.z))) * geom_aspect.x;
const float3 bitangent = normalize(
cross(x_vec, float3(0.0, pos.yz))) * geom_aspect.y;
cotangent_unscaled = cross(normal, bitangent);
cobitangent_unscaled = cross(tangent, normal);
}
else
{
// Cylinder:
// tangent = normalize(cross(y_vec, normal)) * geom_aspect.x;
// bitangent = float3(0.0, -geom_aspect.y, 0.0);
// inv_determinant = 1.0/length(cross(bitangent, tangent))
// cotangent = cross(normal, bitangent) * inv_determinant
// == normalize(cross(y_vec, pos)) * geom_aspect.y * inv_determinant
// cobitangent = cross(tangent, normal) * inv_determinant
// == float3(0.0, -geom_aspect.x, 0.0) * inv_determinant
cotangent_unscaled = cross(y_vec, normal) * geom_aspect.y;
cobitangent_unscaled = float3(0.0, -geom_aspect.x, 0.0);
}
const float3 computed_normal =
cross(cobitangent_unscaled, cotangent_unscaled);
const float inv_determinant = rsqrt(dot(computed_normal, computed_normal));
const float3 cotangent = cotangent_unscaled * inv_determinant;
const float3 cobitangent = cobitangent_unscaled * inv_determinant;
// The [cotangent, cobitangent, normal] column vecs form the cotangent
// frame, i.e. the inverse-transpose TBN matrix. Get its transpose:
const float3x3 object_to_tangent = float3x3(cotangent, cobitangent, normal);
return object_to_tangent;
}
float2 get_curved_video_uv_coords_and_tangent_matrix(
const float2 flat_video_uv, const float3 eye_pos_local,
const float2 output_size_inv, const float2 geom_aspect,
const float geom_mode, const float3x3 global_to_local,
out float2x2 pixel_to_tangent_video_uv)
{
// Requires: Parameters:
// 1.) flat_video_uv coords are in range [0.0, 1.0], where
// (0.0, 0.0) is the top-left corner of the screen and
// (1.0, 1.0) is the bottom-right corner.
// 2.) eye_pos_local is the 3D camera position in the simulated
// CRT's local coordinate frame. For best results, it must
// be computed based on the same geom_view_dist used here.
// 3.) output_size_inv = float2(1.0)/IN.output_size
// 4.) geom_aspect = get_aspect_vector(
// IN.output_size.x / IN.output_size.y);
// 5.) geom_mode is a static or runtime mode setting:
// 0 = off, 1 = sphere, 2 = sphere alt., 3 = cylinder
// 6.) global_to_local is a 3x3 matrix transforming (ordinary)
// worldspace vectors to the CRT's local coordinate frame
// Globals:
// 1.) geom_view_dist must be > 0.0. It controls the "near
// plane" used to interpret flat_video_uv as a view
// vector, which controls the field of view (FOV).
// Returns: Return final uv coords in [0.0, 1.0], and return a pixel-
// space to video_uv tangent-space matrix in the out parameter.
// (This matrix assumes pixel-space +y = down, like +v = down.)
// We'll transform flat_video_uv into a view vector, project
// the view vector from the camera/eye, intersect with a sphere
// or cylinder representing the simulated CRT, and convert the
// intersection position into final uv coords and a local
// transformation matrix.
// First get the 3D view vector (geom_aspect and geom_view_dist are globals):
// 1.) Center uv around (0.0, 0.0) and make (-0.5, -0.5) and (0.5, 0.5)
// correspond to the top-left/bottom-right output screen corners.
// 2.) Multiply by geom_aspect to preemptively "undo" Retroarch's screen-
// space 2D aspect correction. We'll reapply it in uv-space.
// 3.) (x, y) = (u, -v), because +v is down in 2D screenspace, but +y
// is up in 3D worldspace (enforce a right-handed system).
// 4.) The view vector z controls the "near plane" distance and FOV.
// For the effect of "looking through a window" at a CRT, it should be
// set equal to the user's distance from their physical screen, in
// units of the viewport's physical diagonal size.
const float2 view_uv = (flat_video_uv - float2(0.5, 0.5)) * geom_aspect;
const float3 view_vec_global =
float3(view_uv.x, -view_uv.y, -geom_view_dist);
// Transform the view vector into the CRT's local coordinate frame, convert
// to video_uv coords, and get the local 3D intersection position:
const float3 view_vec_local = mul(global_to_local, view_vec_global);
float3 pos;
const float2 centered_uv = view_vec_to_uv(
view_vec_local, eye_pos_local, geom_aspect, geom_mode, pos);
const float2 video_uv = centered_uv + float2(0.5, 0.5);
// Get a pixel-to-tangent-video-uv matrix. The caller could deal with
// all but one of these cases, but that would be more complicated.
#if _DRIVERS_ALLOW_DERIVATIVES
// Derivatives obtain a matrix very fast, but the direction of pixel-
// space +y seems to depend on the pass. Enforce the correct direction
// on a best-effort basis (but it shouldn't matter for antialiasing).
const float2 duv_dx = ddx(video_uv);
const float2 duv_dy = ddy(video_uv);
#ifdef LAST_PASS
pixel_to_tangent_video_uv = float2x2(
duv_dx.x, duv_dy.x,
-duv_dx.y, -duv_dy.y);
#else
pixel_to_tangent_video_uv = float2x2(
duv_dx.x, duv_dy.x,
duv_dx.y, duv_dy.y);
#endif
#else
// Manually define a transformation matrix. We'll assume pixel-space
// +y = down, just like +v = down.
if(geom_force_correct_tangent_matrix)
{
// Get the surface normal based on the local intersection position:
const float3 normal_base = geom_mode < 2.5 ? pos :
float3(pos.x, 0.0, pos.z);
const float3 normal = normalize(normal_base);
// Get pixel-to-object and object-to-tangent matrices and combine
// them into a 2x2 pixel-to-tangent matrix for video_uv offsets:
const float3x3 pixel_to_object = get_pixel_to_object_matrix(
global_to_local, eye_pos_local, view_vec_global, pos, normal,
output_size_inv);
const float3x3 object_to_tangent = get_object_to_tangent_matrix(
pos, normal, geom_aspect, geom_mode);
const float3x3 pixel_to_tangent3x3 =
mul(object_to_tangent, pixel_to_object);
pixel_to_tangent_video_uv = float2x2(
pixel_to_tangent3x3[0][0], pixel_to_tangent3x3[0][1], pixel_to_tangent3x3[1][0], pixel_to_tangent3x3[1][1]);//._m00_m01_m10_m11);
}
else
{
// Ignore curvature, and just consider flat scaling. The
// difference is only apparent with strong curvature:
pixel_to_tangent_video_uv = float2x2(
output_size_inv.x, 0.0, 0.0, output_size_inv.y);
}
#endif
return video_uv;
}
float get_border_dim_factor(const float2 video_uv, const float2 geom_aspect)
{
// COPYRIGHT NOTE FOR THIS FUNCTION:
// Copyright (C) 2010-2012 cgwg, 2014 TroggleMonkey
// This function uses an algorithm first coded in several of cgwg's GPL-
// licensed lines in crt-geom-curved.cg and its ancestors. The line
// between algorithm and code is nearly indistinguishable here, so it's
// unclear whether I could even release this project under a non-GPL
// license with this function included.
// Calculate border_dim_factor from the proximity to uv-space image
// borders; geom_aspect/border_size/border/darkness/border_compress are globals:
const float2 edge_dists = min(video_uv, float2(1.0, 1.0) - video_uv) *
geom_aspect;
const float2 border_penetration =
max(float2(border_size, border_size) - edge_dists, float2(0.0, 0.0));
const float penetration_ratio = border_size > 0 ? length(border_penetration)/border_size : 0;
const float border_escape_ratio = max(1.0 - penetration_ratio, 0.0);
const float border_dim_factor =
pow(border_escape_ratio, border_darkness) * max(1.0, border_compress);
return min(border_dim_factor, 1.0);
}
#endif // _GEOMETRY_FUNCTIONS_H

View file

@ -1,76 +0,0 @@
#ifndef _HELPER_FUNCTIONS_AND_MACROS_H
#define _HELPER_FUNCTIONS_AND_MACROS_H
///////////////////////////////// MIT LICENSE ////////////////////////////////
// Copyright (C) 2020 Alex Gunter
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
float4 tex2D_nograd(sampler2D tex, float2 tex_coords)
{
return tex2Dlod(tex, float4(tex_coords, 0, 0), 0.0);
}
// ReShade 4 does not permit the use of functions or the ternary operator
// outside of a function definition. This is a problem for this port
// because the original crt-royale shader makes heavy use of these
// constructs at the root level.
// These preprocessor definitions are a workaround for this limitation.
// Note that they are strictly intended for defining complex global
// constants. I doubt they're more performant than the built-in
// equivalents, so I recommend using the built-ins whenever you can.
#define macro_sign(c) -((int) ((c) != 0)) * -((int) ((c) > 0))
#define macro_abs(c) (c) * macro_sign(c)
#define macro_min(c, d) (c) * ((int) ((c) <= (d))) + (d) * ((int) ((c) > (d)))
#define macro_max(c, d) (c) * ((int) ((c) >= (d))) + (d) * ((int) ((c) < (d)))
#define macro_clamp(c, l, u) macro_min(macro_max(c, l), u)
#define macro_ceil(c) (float) ((int) (c) + (int) (((int) (c)) < (c)))
#define macro_cond(c, a, b) float(c) * (a) + float(!(c)) * (b)
//////////////////////// COMMON MATHEMATICAL CONSTANTS ///////////////////////
static const float pi = 3.141592653589;
// We often want to find the location of the previous texel, e.g.:
// const float2 curr_texel = uv * texture_size;
// const float2 prev_texel = floor(curr_texel - float2(0.5)) + float2(0.5);
// const float2 prev_texel_uv = prev_texel / texture_size;
// However, many GPU drivers round incorrectly around exact texel locations.
// We need to subtract a little less than 0.5 before flooring, and some GPU's
// require this value to be farther from 0.5 than others; define it here.
// const float2 prev_texel =
// floor(curr_texel - float2(under_half)) + float2(0.5);
static const float under_half = 0.4995;
// Avoid dividing by zero; using a macro overloads for float, float2, etc.:
#define FIX_ZERO(c) (macro_max(macro_abs(c), 0.0000152587890625)) // 2^-16
// #define fmod(x, y) ((x) - (y) * floor((x)/(y) + FIX_ZERO(0.0)))
#define fmod(x, y) (frac((x) / (y)) * (y))
#endif // _HELPER_FUNCTIONS_AND_MACROS_H

View file

@ -1,624 +0,0 @@
#ifndef _PHOSHOR_MASK_CALCULATIONS_H
#define _PHOSHOR_MASK_CALCULATIONS_H
///////////////////////////////// MIT LICENSE ////////////////////////////////
// Copyright (C) 2020 Alex Gunter
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
/*
* Our goal is to use arithmetic to generate the phosphor mask.
* Phosphor masks are regular patterns, so we want something periodic.
* We need to avoid integer arithmetic because it tends to cause rounding errors.
*
* For all masks, we want to approximate a pulse wave in at least one dimension. This pulse wave
* will have narrow peaks, wide troughs, and constant periodicity.
* GRILLE will have a pulse wave along the x-axis and will be constant along the y-axis.
* SLOT and SHADOW will likely have a superposition of two out-of-phase pulse waves along each axis.
* For SHADOW, the width of the peaks will vary such that they generate ellipsoids on the screen.
*
* We can get a periodic function by starting with a triangle wave: T(t, f) = abs(1 - 2*frac(t * f)).
* This function gives us a triangle wave with f cycles in the domain [0, 1].
* Note that T(0, f) = 1.
*
* Then we can compose this with a sigmoid curve to squish the triangle wave into a pulse wave.
* P(s, p, q) = exp(q s - q/2) / (exp(q s - q/2) + exp(-p))
* s(t, f, o) = T(t*f - o, 1)
*
* f is the number of pulses to render along the given axis.
* o is the channel's horizontal ofset along the given axis, normalized via the quotient raw_offset / raw_triad width.
* p and q control how closely P resembles an ideal pulse wave and also how wide the peaks and troughs are.
*
* The interaction between p and q is rather complicated and difficult to describe, so they're not a good pair
* of parameters for users. But we have the info necessary to solve for p in terms of q.
* We know the width of a phosphor and the width of a triad, and we know the domain and range of P.
* We can choose a coordinate (t0, y0) that will denote the edge of the phosphor.
* Note that y0 = P(t0, p, q) for some p and q.
* We let t0 = raw_phosphor_width / raw_triad_width, since we need to respect the shape of the phosphor.
* We let the user define P(t0).
* Technically, this means the user is defining the brightness of the phosphor's furthest edge.
* Visually, this looks like the user is defining the width of the phosphor.
* We'll call this the Phosphor Thickness.
* We let the user define q.
* Technically, this means the user is defining the squareness of the pulse wave.
* Visually, this looks like the user is defining the sharpness of the phosphor.
* We'll call this the Phosphor Sharpness.
*
* We can solve for p in terms of q very efficiently.
* p = (ln(y0 / (1 - y0)) - q) / (0.5 - 2 t0)
*
* Note that, if you work through the algebra, you get a denominator of (t0 - 0.5).
* Using (0.5 - 2 t0) actually works better. It also matches up when you try plotting P and (t0, y0).
*
* For the GRILLE and SLOT masks, we can compute p once and recycle it.
* For the SHADOW mask, we can either compute p on each iteration or find a way to interpolate between min_p and max_p.
*
* One might expect it'd be way better to use a clamped triangle wave rather than a sigmoid or exponentiated cosine wave.
* As far as I can tell, this ends up being incorrect surprisingly enough. Although it's a good bit faster,
* it has terrible aliasing artifacts at small scales. The other implementations are slower, but they produce
* evenly-sized RGB phosphors for a variety of configurations even when the triad width is 3 pixels. At that
* scale, the triangle wave approach produces triads where one of the phosphors is thicker than the others.
* Taking into account the compute_mask_factor trick, the triangle wave approach would be a negligible
* performance improvement at the cost of a large drop in visual quality and user friendliness.
*/
#include "bind-shader-params.fxh"
#include "scanline-functions.fxh"
/*
* The GRILLE mask consists of an array of vertical stripes, so each channel will vary along the x-axis and will be constant
* along the y-axis.
*
* It has the following dimensions:
* Phosphors are 18 units wide with unbounded height.
* Phosphors in a triad are 2 units apart.
* Triads are 6 units apart.
* Triad centers are 64 units apart.
* The phosphors follow an RGB pattern.
* The left-most phosphor is red and offset by 3 units to the right.
*/
static const float grille_raw_phosphor_width = 18;
static const float grille_raw_phosphor_gap = 2;
static const float grille_raw_triad_horiz_gap = 6;
static const float grille_raw_triad_width = 3*grille_raw_phosphor_width + 2*grille_raw_phosphor_gap + grille_raw_triad_horiz_gap;
static const float grille_raw_r_offset = (grille_raw_triad_horiz_gap + grille_raw_phosphor_width) / 2;
static const float grille_raw_g_offset = grille_raw_r_offset + grille_raw_phosphor_width + grille_raw_phosphor_gap;
static const float grille_raw_b_offset = grille_raw_g_offset + grille_raw_phosphor_width + grille_raw_phosphor_gap;
static const float3 grille_norm_center_offsets = float3(
grille_raw_r_offset,
grille_raw_g_offset,
grille_raw_b_offset
) / grille_raw_triad_width;
static const float grille_edge_t = grille_raw_phosphor_width / 2;
static const float grille_edge_norm_t = grille_edge_t / grille_raw_triad_width;
/*
* The SLOT mask consists of an array of rectangles, so each channel will vary along both the x- and y-axes.
*
* It has the following dimensions:
* Phosphors are 18 units wide and 66 units tall.
* Phosphors in a triad are 2 units apart.
* Triads are 6 units apart horizontally and 6 units apart vertically.
* Triad centers are 64 units apart horizontally and 73 units apart vertically.
* The phosphors follow an RGB pattern.
* The upper-left-most phosphor is red and offset by 3 units to the right and 3 units down.
*/
static const float slot_raw_phosphor_width = 18;
static const float slot_raw_phosphor_gap = 2;
static const float slot_raw_triad_horiz_gap = 6;
static const float slot_raw_triad_width = 3*slot_raw_phosphor_width + 2*slot_raw_phosphor_gap + slot_raw_triad_horiz_gap;
static const float slot_raw_phosphor_height = 66;
static const float slot_raw_triad_vert_gap = 6;
static const float slot_raw_triad_height = slot_raw_phosphor_height + slot_raw_triad_vert_gap;
static const float slot_aspect_ratio = slot_raw_triad_height / slot_raw_triad_width;
static const float slot_raw_r_offset_x = (slot_raw_triad_horiz_gap + slot_raw_phosphor_width) / 2;
static const float slot_raw_g_offset_x = slot_raw_r_offset_x + slot_raw_phosphor_width + slot_raw_phosphor_gap;
static const float slot_raw_b_offset_x = slot_raw_g_offset_x + slot_raw_phosphor_width + slot_raw_phosphor_gap;
static const float3 slot_norm_center_offsets_x = float3(
slot_raw_r_offset_x,
slot_raw_g_offset_x,
slot_raw_b_offset_x
) / slot_raw_triad_width;
static const float3 slot_norm_center_offsets_y = float3(0.5, 0.5, 0.5);
static const float slot_edge_tx = slot_raw_phosphor_width / 2;
// We draw the slot mask as two sets of columns. To do that, we have to pretend the horizontal gap is the size of a whole triad.
// Then we need to halve the position of the phosphor edge.
static const float slot_edge_norm_tx = 0.5 * slot_edge_tx / slot_raw_triad_width;
static const float slot_edge_ty = slot_raw_phosphor_height / 2;
static const float slot_edge_norm_ty = slot_edge_ty / slot_raw_triad_height;
/*
* The SHADOW mask consists of an array of circles, so each channel will vary along both the x- and y-axes.
*
* It has the following dimensions:
* Phosphors are 21 units in diameter.
* All phosphors are 0 units apart.
* Triad centers are 63 units apart horizontally and 21 units apart vertically.
* The phosphors follow a GBR pattern on odd rows and RBG on even rows.
* The upper-left-most phosphor is green and centered on the corner of the screen.
*/
static const float shadow_raw_phosphor_diam = 21;
static const float shadow_raw_phosphor_gap = 0;
static const float shadow_raw_triad_horiz_gap = 0;
static const float shadow_raw_triad_vert_gap = 0;
static const float shadow_raw_triad_width = 3*shadow_raw_phosphor_diam + 2*shadow_raw_phosphor_gap + shadow_raw_triad_horiz_gap;
static const float shadow_raw_triad_height = shadow_raw_phosphor_diam + shadow_raw_triad_vert_gap;
static const float shadow_aspect_ratio = shadow_raw_triad_height / shadow_raw_triad_width;
static const float shadow_raw_g_offset_x = 0;
static const float shadow_raw_b_offset_x = shadow_raw_g_offset_x + shadow_raw_phosphor_diam + shadow_raw_phosphor_gap;
static const float shadow_raw_r_offset_x = shadow_raw_b_offset_x + shadow_raw_phosphor_diam + shadow_raw_phosphor_gap;
static const float3 shadow_norm_center_offsets_x = float3(
shadow_raw_r_offset_x,
shadow_raw_g_offset_x,
shadow_raw_b_offset_x
) / shadow_raw_triad_width;
static const float3 shadow_norm_center_offsets_y = float3(0.0, 0.0, 0.0);
static const float shadow_edge_tx = shadow_raw_phosphor_diam / 2;
static const float shadow_edge_norm_tx = shadow_edge_tx / shadow_raw_triad_width;
static const float shadow_edge_ty = shadow_raw_phosphor_diam / 2;
// We draw the shadow mask as two sets of rows. To do that, we have to pretend the vertical gap is the size of a whole triad.
// Then we need to halve the position of the phosphor edge.
static const float shadow_edge_norm_ty = 0.5 * shadow_edge_ty / shadow_raw_triad_height;
static const float shadow_norm_phosphor_rad = (shadow_raw_phosphor_diam/2) / shadow_raw_triad_width;
/*
* The SMALL GRILLE mask is composed of magenta and green stripes.
* Sourced from http://filthypants.blogspot.com/2020/02/crt-shader-masks.html
*
* It has the following dimensions:
* Stripes are 32 units wide.
* Stripes in a triad are 0 units apart.
* Triads are 0 units apart horizontally.
*
* Each triad has two quads, side-by-side and aligned.
* Neighboring triads are offset vertically.
* Below is an array of 2 triads.
* x's denote magenta stripes, and o's denote green ones.
*
* xxooxxoo
* xxooxxoo
* xxooxxoo
* xxooxxoo
* xxooxxoo
* xxooxxoo
*
* The phosphors follow a MG pattern.
* The left-most phosphor is magenta and offset by 16 units to the right.
*/
static const float smallgrille_raw_stripe_width = 32;
static const float smallgrille_raw_triad_width = 2*smallgrille_raw_stripe_width;
static const float smallgrille_raw_r_offset_x = 0.5 * smallgrille_raw_stripe_width;
static const float smallgrille_raw_g_offset_x = smallgrille_raw_r_offset_x + smallgrille_raw_stripe_width;
static const float smallgrille_raw_b_offset_x = smallgrille_raw_r_offset_x;
static const float3 smallgrille_norm_center_offsets_x = float3(
smallgrille_raw_r_offset_x,
smallgrille_raw_g_offset_x,
smallgrille_raw_b_offset_x
) / smallgrille_raw_triad_width;
static const float smallgrille_edge_t = 0.5 * smallgrille_raw_stripe_width;
static const float smallgrille_edge_norm_t = smallgrille_edge_t / smallgrille_raw_triad_width;
/*
* The SMALL SLOT mask is composed of magenta and green quads.
* Sourced from http://filthypants.blogspot.com/2020/02/crt-shader-masks.html
*
* It has the following dimensions:
* Quads are 32 units wide and 48 units tall.
* Quads in a triad are 0 units apart.
* Triads are 0 units apart horizontally and 16 units apart vertically.
*
* Each triad has two quads, side-by-side and aligned.
* Neighboring triads are offset vertically.
* Below is a 2x2 matrix of 4 triads.
* x's denote magenta quads, and o's denote green ones.
*
* xxoo
* xxooxxoo
* xxooxxoo
* xxoo
* xxoo
* xxooxxoo
* xxooxxoo
* xxoo
*
* The phosphors follow a MG pattern.
* The upper-left-most phosphor is magenta and offset by 16 units to the right and 16 units down.
*/
static const float smallslot_raw_quad_width = 32;
static const float smallslot_raw_triad_width = 2*smallslot_raw_quad_width;
static const float smallslot_raw_quad_height = 1.5 * smallslot_raw_quad_width;
static const float smallslot_raw_triad_vert_gap = 0.5 * smallslot_raw_quad_width;
static const float smallslot_raw_triad_height = smallslot_raw_quad_height + smallslot_raw_triad_vert_gap;
static const float smallslot_aspect_ratio = smallslot_raw_triad_height / smallslot_raw_triad_width;
static const float smallslot_raw_r_offset_x = 0.5 * smallslot_raw_quad_width;
static const float smallslot_raw_g_offset_x = smallslot_raw_r_offset_x + smallslot_raw_quad_width;
static const float smallslot_raw_b_offset_x = smallslot_raw_r_offset_x;
static const float3 smallslot_norm_center_offsets_x = float3(
smallslot_raw_r_offset_x,
smallslot_raw_g_offset_x,
smallslot_raw_b_offset_x
) / smallslot_raw_triad_width;
static const float3 smallslot_norm_center_offsets_y1 = 0.5 * smallslot_raw_quad_height / smallslot_raw_triad_height;
static const float3 smallslot_norm_center_offsets_y2 = smallslot_norm_center_offsets_y1 + smallslot_raw_triad_vert_gap / smallslot_raw_triad_height;
static const float smallslot_edge_tx = 0.5 * smallslot_raw_quad_width;
// We draw the slot mask as two sets of columns. To do that, we have to pretend the horizontal gap is the size of a whole triad.
// Then we need to halve the position of the phosphor edge.
static const float smallslot_edge_norm_tx = 0.5 * smallslot_edge_tx / smallslot_raw_triad_width;
static const float smallslot_edge_ty = smallslot_raw_quad_height / 2;
static const float smallslot_edge_norm_ty = smallslot_edge_ty / smallslot_raw_triad_height;
/*
* The SMALL SHADOW mask is composed of magenta and green quads.
* Sourced from http://filthypants.blogspot.com/2020/02/crt-shader-masks.html
*
* It has the following dimensions:
* Quads are 17 units wide and 17 units tall.
* Quads in a triad are 0 units apart.
* Triads are 0 units apart horizontally and 0 units apart vertically.
*
* Each triad has two quads, side-by-side and aligned.
* Neighboring triads are offset vertically.
* Below is a 2x2 matrix of 4 triads.
* x's denote magenta quads, and o's denote green ones.
*
* xxooxxoo
* xxooxxoo
* ooxxooxx
* ooxxooxx
*
* The phosphors follow a MG pattern.
* The upper-left-most phosphor is magenta and offset by 16 units to the right and 16 units down.
*/
static const float smallshadow_raw_quad_width = 17;
static const float smallshadow_raw_triad_width = 2 * smallshadow_raw_quad_width;
static const float smallshadow_raw_quad_height = 17;
static const float smallshadow_raw_triad_height = smallshadow_raw_quad_height;
static const float smallshadow_aspect_ratio = smallshadow_raw_triad_height / smallshadow_raw_triad_width;
static const float smallshadow_raw_r_offset_x = 0.5 * smallshadow_raw_quad_width;
static const float smallshadow_raw_g_offset_x = smallshadow_raw_r_offset_x + smallshadow_raw_quad_width;
static const float smallshadow_raw_b_offset_x = smallshadow_raw_r_offset_x;
static const float3 smallshadow_norm_center_offsets_x = float3(
smallshadow_raw_r_offset_x,
smallshadow_raw_g_offset_x,
smallshadow_raw_b_offset_x
) / smallshadow_raw_triad_width;
static const float3 smallshadow_norm_center_offsets_y = 0.5 * smallshadow_raw_triad_height;
static const float smallshadow_edge_tx = 0.5 * smallshadow_raw_quad_width;
static const float smallshadow_edge_norm_tx = smallshadow_edge_tx / smallshadow_raw_triad_width;
static const float smallshadow_edge_ty = 0.5 * smallshadow_raw_quad_height;
// We draw the shadow mask as two sets of rows. To do that, we have to pretend the vertical gap is the size of a whole triad.
// Then we need to halve the position of the phosphor edge.
static const float smallshadow_edge_norm_ty = 0.5 * smallshadow_edge_ty / smallshadow_raw_triad_height;
float get_selected_aspect_ratio() {
float aspect_ratio;
[flatten]
if (mask_type == 0 || mask_type == 3) {
aspect_ratio = scale_triad_height;
}
else if (mask_type == 1 || mask_type == 4) {
aspect_ratio = scale_triad_height * slot_aspect_ratio;
}
else {
aspect_ratio = scale_triad_height * shadow_aspect_ratio;
}
[flatten]
switch (mask_type) {
case 0:
aspect_ratio = scale_triad_height;
break;
case 1:
aspect_ratio = scale_triad_height * slot_aspect_ratio;
break;
case 2:
aspect_ratio = scale_triad_height * shadow_aspect_ratio;
break;
case 3:
aspect_ratio = scale_triad_height;
break;
case 4:
aspect_ratio = scale_triad_height * smallslot_aspect_ratio;
break;
default:
aspect_ratio = scale_triad_height * smallshadow_aspect_ratio;
break;
}
return aspect_ratio;
}
float2 calc_triad_size() {
const float aspect_ratio = get_selected_aspect_ratio();
[branch]
if (mask_size_param == 0) {
return float2(1, aspect_ratio) * mask_triad_width;
}
else {
float triad_width = content_size.x * rcp(mask_num_triads_across);
return float2(1, aspect_ratio) * triad_width;
}
}
float2 calc_phosphor_viewport_frequency_factor() {
const float aspect_ratio = get_selected_aspect_ratio();
float2 triad_size_factor;
float2 num_triads_factor;
[branch]
if (geom_rotation_mode == 0 || geom_rotation_mode == 2) {
triad_size_factor = content_size * rcp(mask_triad_width * float2(1, aspect_ratio));
num_triads_factor = mask_num_triads_across * float2(1, content_size.y * rcp(content_size.x) * rcp(aspect_ratio));
}
else {
triad_size_factor = content_size * rcp(mask_triad_width * float2(1, aspect_ratio)).yx;
num_triads_factor = mask_num_triads_across * float2(1, content_size.y * rcp(content_size.x) * rcp(aspect_ratio)).yx;
}
return ((mask_size_param == 0) ? triad_size_factor : num_triads_factor);
}
/*
* We have a pulse wave f(t0_norm, p, q) = y0 with unknown p.
* This function solves for p.
*/
#define calculate_phosphor_p_value(t0_norm, y0, q) (log((y0) * rcp(1 - (y0))) - (q) * (0.5 - 2*(t0_norm)))
/*
* If we don't rescale the phosphor_thickness parameter, it has a logarithmic effect on the phosphor shape.
* Rescaling it makes it look closer to a linear effect.
*/
#define linearize_phosphor_thickness_param(p) (1 - exp(-(p)))
/*
* Generates a grille mask with the desired resolution and sharpness.
*/
float3 get_phosphor_intensity_grille(
const float2 texcoord,
const float2 viewport_frequency_factor,
const float2 grille_pq
) {
float3 center_offsets = (geom_rotation_mode == 2 || geom_rotation_mode == 3) ?
grille_norm_center_offsets.bgr : grille_norm_center_offsets;
center_offsets += phosphor_offset_x * 0.5;
float3 theta = triangle_wave(texcoord.x * viewport_frequency_factor.x - center_offsets, 1);
float3 alpha = exp((theta - 0.5) * grille_pq.y);
return alpha * rcp(alpha + grille_pq.x);
}
/*
* Generates a slot mask with the desired resolution and sharpness.
*/
float3 get_phosphor_intensity_slot(
const float2 texcoord,
const float2 viewport_frequency_factor,
const float2 slot_pq_x,
const float2 slot_pq_y
) {
float3 center_offsets_x = (geom_rotation_mode == 2 || geom_rotation_mode == 3) ?
slot_norm_center_offsets_x.bgr : slot_norm_center_offsets_x;
float3 center_offsets_y = (geom_rotation_mode == 2 || geom_rotation_mode == 3) ?
slot_norm_center_offsets_y.bgr : slot_norm_center_offsets_y;
center_offsets_x += phosphor_offset_x * 0.5;
center_offsets_y += phosphor_offset_y * 0.5;
float3 theta_x1 = triangle_wave(texcoord.x * viewport_frequency_factor.x - center_offsets_x, 0.5);
float3 alpha_x1 = exp((theta_x1 - 0.5) * slot_pq_x.y);
alpha_x1 *= rcp(alpha_x1 + slot_pq_x.x);
float3 theta_x2 = triangle_wave(texcoord.x * viewport_frequency_factor.x - center_offsets_x + 1, 0.5);
float3 alpha_x2 = exp((theta_x2 - 0.5) * slot_pq_x.y);
alpha_x2 *= rcp(alpha_x2 + slot_pq_x.x);
float3 theta_y1 = triangle_wave(texcoord.y * viewport_frequency_factor.y - center_offsets_y, 1);
float3 alpha_y1 = exp((theta_y1 - 0.5) * slot_pq_y.y);
alpha_y1 *= rcp(alpha_y1 + slot_pq_y.x);
float3 theta_y2 = triangle_wave(texcoord.y * viewport_frequency_factor.y - center_offsets_y + 0.5, 1);
float3 alpha_y2 = exp((theta_y2 - 0.5) * slot_pq_y.y);
alpha_y2 *= rcp(alpha_y2 + slot_pq_y.x);
return alpha_x1 * alpha_y1 + alpha_x2 * alpha_y2;
}
/*
* Generates a shadow mask with the desired resolution and sharpness.
*/
float3 get_phosphor_intensity_shadow(
const float2 texcoord,
const float2 viewport_frequency_factor,
const float2 shadow_q
) {
float3 center_offsets_x = (geom_rotation_mode == 2 || geom_rotation_mode == 3) ?
shadow_norm_center_offsets_x.bgr : shadow_norm_center_offsets_x;
float3 center_offsets_y = (geom_rotation_mode == 2 || geom_rotation_mode == 3) ?
shadow_norm_center_offsets_y.bgr : shadow_norm_center_offsets_y;
center_offsets_x += phosphor_offset_x * 0.5;
center_offsets_y += phosphor_offset_y * 0.5;
const float2 thickness_scaled = linearize_phosphor_thickness_param(phosphor_thickness);
const float3 x_adj = texcoord.x * viewport_frequency_factor.x - center_offsets_x;
const float3 y_adj = texcoord.y * viewport_frequency_factor.y - center_offsets_y;
const float3 texcoord_x_periodic1 = shadow_norm_phosphor_rad * triangle_wave(x_adj * 3 - 0.5, 1.0);
const float3 texcoord_x_periodic2 = shadow_norm_phosphor_rad * triangle_wave(x_adj * 3, 1.0);
const float3 ty1 = sqrt(
shadow_norm_phosphor_rad*shadow_norm_phosphor_rad - texcoord_x_periodic1*texcoord_x_periodic1
);
const float3 ty2 = sqrt(
shadow_norm_phosphor_rad*shadow_norm_phosphor_rad - texcoord_x_periodic2*texcoord_x_periodic2
);
const float shadow_px = exp(-calculate_phosphor_p_value(shadow_edge_norm_tx, thickness_scaled.x, shadow_q.x));
const float3 shadow_py1 = exp(-calculate_phosphor_p_value(ty1 * 0.5 * rcp(shadow_aspect_ratio), thickness_scaled.y, shadow_q.y));
const float3 shadow_py2 = exp(-calculate_phosphor_p_value(ty2 * 0.5 * rcp(shadow_aspect_ratio), thickness_scaled.y, shadow_q.y));
float3 theta_x1 = triangle_wave(x_adj, 1);
float3 alpha_x1 = exp((theta_x1 - 0.5) * shadow_q.x);
alpha_x1 *= rcp(alpha_x1 + shadow_px);
float3 theta_x2 = triangle_wave(x_adj + 0.5, 1);
float3 alpha_x2 = exp((theta_x2 - 0.5) * shadow_q.x);
alpha_x2 *= rcp(alpha_x2 + shadow_px);
float3 theta_y1 = triangle_wave(y_adj, 0.5);
float3 alpha_y1 = exp((theta_y1 - 0.5) * shadow_q.y);
alpha_y1 *= rcp(alpha_y1 + shadow_py1);
float3 theta_y2 = triangle_wave(y_adj + 1, 0.5);
float3 alpha_y2 = exp((theta_y2 - 0.5) * shadow_q.y);
alpha_y2 *= rcp(alpha_y2 + shadow_py2);
return alpha_x1 * alpha_y1 + alpha_x2 * alpha_y2;
}
float3 get_phosphor_intensity_grille_small(
const float2 texcoord,
const float2 viewport_frequency_factor,
const float2 grille_pq_x
) {
float3 center_offsets_x = (geom_rotation_mode == 2 || geom_rotation_mode == 3) ?
smallgrille_norm_center_offsets_x.grg : smallgrille_norm_center_offsets_x;
center_offsets_x += phosphor_offset_x * 0.5;
float3 theta = triangle_wave(texcoord.x * viewport_frequency_factor.x - center_offsets_x, 1);
float3 alpha = exp((theta - 0.5) * grille_pq_x.y);
alpha *= rcp(alpha + grille_pq_x.x);
// Taking a sqrt here helps hide the gaps between the pixels when the triad size is small
return sqrt(alpha);
}
float3 get_phosphor_intensity_slot_small(
const float2 texcoord,
const float2 viewport_frequency_factor,
const float2 slot_pq_x,
const float2 slot_pq_y
) {
float3 center_offsets_x = (geom_rotation_mode == 2 || geom_rotation_mode == 3) ?
smallslot_norm_center_offsets_x.grg : smallslot_norm_center_offsets_x;
float3 center_offsets_y1 = (geom_rotation_mode == 2 || geom_rotation_mode == 3) ?
smallslot_norm_center_offsets_y1.grg : smallslot_norm_center_offsets_y1;
float3 center_offsets_y2 = (geom_rotation_mode == 2 || geom_rotation_mode == 3) ?
smallslot_norm_center_offsets_y2.grg : smallslot_norm_center_offsets_y2;
center_offsets_x += phosphor_offset_x * 0.5;
center_offsets_y1 += phosphor_offset_y * 0.5;
center_offsets_y2 += phosphor_offset_y * 0.5;
float3 theta_x1 = triangle_wave(texcoord.x * viewport_frequency_factor.x - center_offsets_x, 0.5);
float3 alpha_x1 = exp((theta_x1 - 0.5) * slot_pq_x.y);
alpha_x1 *= rcp(alpha_x1 + slot_pq_x.x);
float3 theta_x2 = triangle_wave(texcoord.x * viewport_frequency_factor.x - center_offsets_x + 1, 0.5);
float3 alpha_x2 = exp((theta_x2 - 0.5) * slot_pq_x.y);
alpha_x2 *= rcp(alpha_x2 + slot_pq_x.x);
float3 theta_y1 = triangle_wave(texcoord.y * viewport_frequency_factor.y - center_offsets_y1, 1);
float3 alpha_y1 = exp((theta_y1 - 0.5) * slot_pq_y.y);
alpha_y1 *= rcp(alpha_y1 + slot_pq_y.x);
float3 theta_y2 = triangle_wave(texcoord.y * viewport_frequency_factor.y - center_offsets_y2 + 0.5, 1);
float3 alpha_y2 = exp((theta_y2 - 0.5) * slot_pq_y.y);
alpha_y2 *= rcp(alpha_y2 + slot_pq_y.x);
// Taking a sqrt here helps hide the gaps between the pixels when the triad size is small
return (alpha_x1 * alpha_y1 + alpha_x2 * alpha_y2);
}
float3 get_phosphor_intensity_shadow_small(
const float2 texcoord,
const float2 viewport_frequency_factor,
const float2 shadow_pq_x,
const float2 shadow_pq_y
) {
float3 center_offsets_x = (geom_rotation_mode == 2 || geom_rotation_mode == 3) ?
smallshadow_norm_center_offsets_x.grg : smallshadow_norm_center_offsets_x;
float3 center_offsets_y = (geom_rotation_mode == 2 || geom_rotation_mode == 3) ?
smallshadow_norm_center_offsets_y.grg : smallshadow_norm_center_offsets_y;
center_offsets_x += phosphor_offset_x * 0.5;
center_offsets_y += phosphor_offset_y * 0.5;
float3 theta_x1 = triangle_wave(texcoord.x * viewport_frequency_factor.x - center_offsets_x, 1);
float3 alpha_x1 = exp((theta_x1 - 0.5) * shadow_pq_x.y);
alpha_x1 *= rcp(alpha_x1 + shadow_pq_x.x);
float3 theta_x2 = triangle_wave(texcoord.x * viewport_frequency_factor.x - center_offsets_x + 0.5, 1);
float3 alpha_x2 = exp((theta_x2 - 0.5) * shadow_pq_x.y);
alpha_x2 *= rcp(alpha_x2 + shadow_pq_x.x);
float3 theta_y1 = triangle_wave(texcoord.y * viewport_frequency_factor.y - center_offsets_y, 0.5);
float3 alpha_y1 = exp((theta_y1 - 0.5) * shadow_pq_y.y);
alpha_y1 *= rcp(alpha_y1 + shadow_pq_y.x);
float3 theta_y2 = triangle_wave(texcoord.y * viewport_frequency_factor.y - center_offsets_y + 1, 0.5);
float3 alpha_y2 = exp((theta_y2 - 0.5) * shadow_pq_y.y);
alpha_y2 *= rcp(alpha_y2 + shadow_pq_y.x);
// Taking a sqrt here helps hide the gaps between the pixels when the triad size is small
return sqrt(alpha_x1 * alpha_y1 + alpha_x2 * alpha_y2);
}
#endif // _PHOSHOR_MASK_CALCULATIONS_H

View file

@ -1,243 +0,0 @@
#ifndef _QUAD_PIXEL_COMMUNICATION_H
#define _QUAD_PIXEL_COMMUNICATION_H
///////////////////////////////// MIT LICENSE ////////////////////////////////
// Copyright (C) 2014 TroggleMonkey*
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
///////////////////////////////// DISCLAIMER /////////////////////////////////
// *This code was inspired by "Shader Amortization using Pixel Quad Message
// Passing" by Eric Penner, published in GPU Pro 2, Chapter VI.2. My intent
// is not to plagiarize his fundamentally similar code and assert my own
// copyright, but the algorithmic helper functions require so little code that
// implementations can't vary by much except bugfixes and conventions. I just
// wanted to license my own particular code here to avoid ambiguity and make it
// clear that as far as I'm concerned, people can do as they please with it.
///////////////////////////////// DESCRIPTION ////////////////////////////////
// Given screen pixel numbers, derive a "quad vector" describing a fragment's
// position in its 2x2 pixel quad. Given that vector, obtain the values of any
// variable at neighboring fragments.
// Requires: Using this file in general requires:
// 1.) ddx() and ddy() are present in the current Cg profile.
// 2.) The GPU driver is using fine/high-quality derivatives.
// Functions will give incorrect results if this is not true,
// so a test function is included.
///////////////////// QUAD-PIXEL COMMUNICATION PRIMITIVES ////////////////////
float4 get_quad_vector_naive(float4 output_pixel_num_wrt_uvxy)
{
// Requires: Two measures of the current fragment's output pixel number
// in the range ([0, output_size.x), [0, output_size.y)):
// 1.) output_pixel_num_wrt_uvxy.xy increase with uv coords.
// 2.) output_pixel_num_wrt_uvxy.zw increase with screen xy.
// Returns: Two measures of the fragment's position in its 2x2 quad:
// 1.) The .xy components are its 2x2 placement with respect to
// uv direction (the origin (0, 0) is at the top-left):
// top-left = (-1.0, -1.0) top-right = ( 1.0, -1.0)
// bottom-left = (-1.0, 1.0) bottom-right = ( 1.0, 1.0)
// You need this to arrange/weight shared texture samples.
// 2.) The .zw components are its 2x2 placement with respect to
// screen xy direction (position); the origin varies.
// quad_gather needs this measure to work correctly.
// Note: quad_vector.zw = quad_vector.xy * float2(
// ddx(output_pixel_num_wrt_uvxy.x),
// ddy(output_pixel_num_wrt_uvxy.y));
// Caveats: This function assumes the GPU driver always starts 2x2 pixel
// quads at even pixel numbers. This assumption can be wrong
// for odd output resolutions (nondeterministically so).
float4 pixel_odd = frac(output_pixel_num_wrt_uvxy * 0.5) * 2.0;
float4 quad_vector = pixel_odd * 2.0 - float4(1.0, 1.0, 1.0, 1.0);
return quad_vector;
}
float4 get_quad_vector(float4 output_pixel_num_wrt_uvxy)
{
// Requires: Same as get_quad_vector_naive() (see that first).
// Returns: Same as get_quad_vector_naive() (see that first), but it's
// correct even if the 2x2 pixel quad starts at an odd pixel,
// which can occur at odd resolutions.
float4 quad_vector_guess =
get_quad_vector_naive(output_pixel_num_wrt_uvxy);
// If quad_vector_guess.zw doesn't increase with screen xy, we know
// the 2x2 pixel quad starts at an odd pixel:
float2 odd_start_mirror = 0.5 * float2(ddx(quad_vector_guess.z),
ddy(quad_vector_guess.w));
return quad_vector_guess * odd_start_mirror.xyxy;
}
float4 get_quad_vector(float2 output_pixel_num_wrt_uv)
{
// Requires: 1.) ddx() and ddy() are present in the current Cg profile.
// 2.) output_pixel_num_wrt_uv must increase with uv coords and
// measure the current fragment's output pixel number in:
// ([0, output_size.x), [0, output_size.y))
// Returns: Same as get_quad_vector_naive() (see that first), but it's
// correct even if the 2x2 pixel quad starts at an odd pixel,
// which can occur at odd resolutions.
// Caveats: This function requires less information than the version
// taking a float4, but it's potentially slower.
// Do screen coords increase with or against uv? Get the direction
// with respect to (uv.x, uv.y) for (screen.x, screen.y) in {-1, 1}.
float2 screen_uv_mirror = float2(ddx(output_pixel_num_wrt_uv.x),
ddy(output_pixel_num_wrt_uv.y));
float2 pixel_odd_wrt_uv = frac(output_pixel_num_wrt_uv * 0.5) * 2.0;
float2 quad_vector_uv_guess = (pixel_odd_wrt_uv - float2(0.5, 0.5)) * 2.0;
float2 quad_vector_screen_guess = quad_vector_uv_guess * screen_uv_mirror;
// If quad_vector_screen_guess doesn't increase with screen xy, we know
// the 2x2 pixel quad starts at an odd pixel:
float2 odd_start_mirror = 0.5 * float2(ddx(quad_vector_screen_guess.x),
ddy(quad_vector_screen_guess.y));
float4 quad_vector_guess = float4(
quad_vector_uv_guess, quad_vector_screen_guess);
return quad_vector_guess * odd_start_mirror.xyxy;
}
void quad_gather(float4 quad_vector, float4 curr,
out float4 adjx, out float4 adjy, out float4 diag)
{
// Requires: 1.) ddx() and ddy() are present in the current Cg profile.
// 2.) The GPU driver is using fine/high-quality derivatives.
// 3.) quad_vector describes the current fragment's location in
// its 2x2 pixel quad using get_quad_vector()'s conventions.
// 4.) curr is any vector you wish to get neighboring values of.
// Returns: Values of an input vector (curr) at neighboring fragments
// adjacent x, adjacent y, and diagonal (via out parameters).
adjx = curr - ddx(curr) * quad_vector.z;
adjy = curr - ddy(curr) * quad_vector.w;
diag = adjx - ddy(adjx) * quad_vector.w;
}
void quad_gather(float4 quad_vector, float3 curr,
out float3 adjx, out float3 adjy, out float3 diag)
{
// Float3 version
adjx = curr - ddx(curr) * quad_vector.z;
adjy = curr - ddy(curr) * quad_vector.w;
diag = adjx - ddy(adjx) * quad_vector.w;
}
void quad_gather(float4 quad_vector, float2 curr,
out float2 adjx, out float2 adjy, out float2 diag)
{
// Float2 version
adjx = curr - ddx(curr) * quad_vector.z;
adjy = curr - ddy(curr) * quad_vector.w;
diag = adjx - ddy(adjx) * quad_vector.w;
}
float4 quad_gather(float4 quad_vector, float curr)
{
// Float version:
// Returns: return.x == current
// return.y == adjacent x
// return.z == adjacent y
// return.w == diagonal
float4 all = float4(curr, curr, curr, curr);
all.y = all.x - ddx(all.x) * quad_vector.z;
all.zw = all.xy - ddy(all.xy) * quad_vector.w;
return all;
}
float4 quad_gather_sum(float4 quad_vector, float4 curr)
{
// Requires: Same as quad_gather()
// Returns: Sum of an input vector (curr) at all fragments in a quad.
float4 adjx, adjy, diag;
quad_gather(quad_vector, curr, adjx, adjy, diag);
return (curr + adjx + adjy + diag);
}
float3 quad_gather_sum(float4 quad_vector, float3 curr)
{
// Float3 version:
float3 adjx, adjy, diag;
quad_gather(quad_vector, curr, adjx, adjy, diag);
return (curr + adjx + adjy + diag);
}
float2 quad_gather_sum(float4 quad_vector, float2 curr)
{
// Float2 version:
float2 adjx, adjy, diag;
quad_gather(quad_vector, curr, adjx, adjy, diag);
return (curr + adjx + adjy + diag);
}
float quad_gather_sum(float4 quad_vector, float curr)
{
// Float version:
float4 all_values = quad_gather(quad_vector, curr);
return (all_values.x + all_values.y + all_values.z + all_values.w);
}
bool fine_derivatives_working(float4 quad_vector, float4 curr)
{
// Requires: 1.) ddx() and ddy() are present in the current Cg profile.
// 2.) quad_vector describes the current fragment's location in
// its 2x2 pixel quad using get_quad_vector()'s conventions.
// 3.) curr must be a test vector with non-constant derivatives
// (its value should change nonlinearly across fragments).
// Returns: true if fine/hybrid/high-quality derivatives are used, or
// false if coarse derivatives are used or inconclusive
// Usage: Test whether quad-pixel communication is working!
// Method: We can confirm fine derivatives are used if the following
// holds (ever, for any value at any fragment):
// (ddy(curr) != ddy(adjx)) or (ddx(curr) != ddx(adjy))
// The more values we test (e.g. test a float4 two ways), the
// easier it is to demonstrate fine derivatives are working.
// TODO: Check for floating point exact comparison issues!
float4 ddx_curr = ddx(curr);
float4 ddy_curr = ddy(curr);
float4 adjx = curr - ddx_curr * quad_vector.z;
float4 adjy = curr - ddy_curr * quad_vector.w;
bool ddy_different = any(bool4(ddy_curr.x != ddy(adjx).x, ddy_curr.y != ddy(adjx).y, ddy_curr.z != ddy(adjx).z, ddy_curr.w != ddy(adjx).w));
bool ddx_different = any(bool4(ddx_curr.x != ddx(adjy).x, ddx_curr.y != ddx(adjy).y, ddx_curr.z != ddx(adjy).z, ddx_curr.w != ddx(adjy).w));
return any(bool2(ddy_different, ddx_different));
}
bool fine_derivatives_working_fast(float4 quad_vector, float curr)
{
// Requires: Same as fine_derivatives_working()
// Returns: Same as fine_derivatives_working()
// Usage: This is faster than fine_derivatives_working() but more
// likely to return false negatives, so it's less useful for
// offline testing/debugging. It's also useless as the basis
// for dynamic runtime branching as of May 2014: Derivatives
// (and quad-pixel communication) are currently disallowed in
// branches. However, future GPU's may allow you to use them
// in dynamic branches if you promise the branch condition
// evaluates the same for every fragment in the quad (and/or if
// the driver enforces that promise by making a single fragment
// control branch decisions). If that ever happens, this
// version may become a more economical choice.
float ddx_curr = ddx(curr);
float ddy_curr = ddy(curr);
float adjx = curr - ddx_curr * quad_vector.z;
return (ddy_curr != ddy(adjx));
}
#endif // _QUAD_PIXEL_COMMUNICATION_H

View file

@ -1,501 +0,0 @@
#ifndef _SCANLINE_FUNCTIONS_H
#define _SCANLINE_FUNCTIONS_H
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// crt-royale-reshade: A port of TroggleMonkey's crt-royale from libretro to ReShade.
// Copyright (C) 2020 Alex Gunter <akg7634@gmail.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
/////////////////////////////// BEGIN INCLUDES ///////////////////////////////
#include "bind-shader-params.fxh"
#include "gamma-management.fxh"
#include "special-functions.fxh"
//////////////////////////////// END INCLUDES ////////////////////////////////
///////////////////////////// SCANLINE FUNCTIONS /////////////////////////////
float2 round_coord(
const float2 c,
const float2 starting_position,
const float2 bin_size
) {
const float2 adj_c = c - starting_position;
return c - fmod(adj_c, bin_size) + bin_size * 0.5;
}
// Use preproc defs for these, so they work for arbitrary choices of float1/2/3/4
#define triangle_wave(t, f) abs(1 - 2*frac((t) * (f)))
#define sawtooth_incr_wave(t, f) frac((t) * (f))
// using fmod(-t*f, 1.0) outputs 0 at t == 0, but I want it to output 1
#define sawtooth_decr_wave(t, f) 1 - frac((t) * (f))
struct InterpolationFieldData {
float triangle_wave_freq;
bool field_parity;
bool scanline_parity;
bool wrong_field;
};
InterpolationFieldData precalc_interpolation_field_data(float2 texcoord) {
InterpolationFieldData data;
data.triangle_wave_freq = 2;
const float field_wave = triangle_wave(texcoord.y + rcp(2*data.triangle_wave_freq), data.triangle_wave_freq * 0.5) * 2 - 1;
data.scanline_parity = field_wave >= 0;
return data;
}
InterpolationFieldData calc_interpolation_field_data(float2 texcoord, float scale) {
InterpolationFieldData data;
data.triangle_wave_freq = scale * rcp(scanline_thickness);
// data.triangle_wave_freq = content_size.y * rcp(scanline_thickness);
const bool frame_count_parity = (frame_count % 2 == 1) && (scanline_deinterlacing_mode != 1);
data.field_parity = (frame_count_parity && !interlace_back_field_first) || (!frame_count_parity && interlace_back_field_first);
const float field_wave = triangle_wave(texcoord.y + rcp(2*data.triangle_wave_freq), data.triangle_wave_freq * 0.5) * 2 - 1;
data.scanline_parity = field_wave >= 0;
const bool wrong_field_raw = (data.scanline_parity && !data.field_parity) || (!data.scanline_parity && data.field_parity);
data.wrong_field = enable_interlacing && wrong_field_raw;
return data;
}
float get_gaussian_sigma(const float color, const float sigma_range)
{
// Requires: Globals:
// 1.) gaussian_beam_min_sigma and gaussian_beam_max_sigma are global floats
// containing the desired minimum and maximum beam standard
// deviations, for dim and bright colors respectively.
// 2.) gaussian_beam_max_sigma must be > 0.0
// 3.) gaussian_beam_min_sigma must be in (0.0, gaussian_beam_max_sigma]
// 4.) gaussian_beam_spot_power must be defined as a global float.
// Parameters:
// 1.) color is the underlying source color along a scanline
// 2.) sigma_range = gaussian_beam_max_sigma - gaussian_beam_min_sigma; we take
// sigma_range as a parameter to avoid repeated computation
// when beam_{min, max}_sigma are runtime shader parameters
// Optional: Users may set beam_spot_shape_function to 1 to define the
// inner f(color) subfunction (see below) as:
// f(color) = sqrt(1.0 - (color - 1.0)*(color - 1.0))
// Otherwise (technically, if beam_spot_shape_function < 0.5):
// f(color) = pow(color, gaussian_beam_spot_power)
// Returns: The standard deviation of the Gaussian beam for "color:"
// sigma = gaussian_beam_min_sigma + sigma_range * f(color)
// Details/Discussion:
// The beam's spot shape vaguely resembles an aspect-corrected f() in the
// range [0, 1] (not quite, but it's related). f(color) = color makes
// spots look like diamonds, and a spherical function or cube balances
// between variable width and a soft/realistic shape. A gaussian_beam_spot_power
// > 1.0 can produce an ugly spot shape and more initial clipping, but the
// final shape also differs based on the horizontal resampling filter and
// the phosphor bloom. For instance, resampling horizontally in nonlinear
// light and/or with a sharp (e.g. Lanczos) filter will sharpen the spot
// shape, but a sixth root is still quite soft. A power function (default
// 1.0/3.0 gaussian_beam_spot_power) is most flexible, but a fixed spherical curve
// has the highest variability without an awful spot shape.
//
// gaussian_beam_min_sigma affects scanline sharpness/aliasing in dim areas, and its
// difference from gaussian_beam_max_sigma affects beam width variability. It only
// affects clipping [for pure Gaussians] if gaussian_beam_spot_power > 1.0 (which is
// a conservative estimate for a more complex constraint).
//
// gaussian_beam_max_sigma affects clipping and increasing scanline width/softness
// as color increases. The wider this is, the more scanlines need to be
// evaluated to avoid distortion. For a pure Gaussian, the max_beam_sigma
// at which the first unused scanline always has a weight < 1.0/255.0 is:
// num scanlines = 2, max_beam_sigma = 0.2089; distortions begin ~0.34
// num scanlines = 3, max_beam_sigma = 0.3879; distortions begin ~0.52
// num scanlines = 4, max_beam_sigma = 0.5723; distortions begin ~0.70
// num scanlines = 5, max_beam_sigma = 0.7591; distortions begin ~0.89
// num scanlines = 6, max_beam_sigma = 0.9483; distortions begin ~1.08
// Generalized Gaussians permit more leeway here as steepness increases.
if(beam_spot_shape_function < 0.5)
{
// Use a power function:
return gaussian_beam_min_sigma + sigma_range * pow(color, gaussian_beam_spot_power);
}
else
{
// Use a spherical function:
const float color_minus_1 = color - 1;
return gaussian_beam_min_sigma + sigma_range * sqrt(1.0 - color_minus_1*color_minus_1);
}
}
float get_generalized_gaussian_beta(const float color, const float shape_range)
{
// Requires: Globals:
// 1.) gaussian_beam_min_shape and gaussian_beam_max_shape are global floats
// containing the desired min/max generalized Gaussian
// beta parameters, for dim and bright colors respectively.
// 2.) gaussian_beam_max_shape must be >= 2.0
// 3.) gaussian_beam_min_shape must be in [2.0, gaussian_beam_max_shape]
// 4.) gaussian_beam_shape_power must be defined as a global float.
// Parameters:
// 1.) color is the underlying source color along a scanline
// 2.) shape_range = gaussian_beam_max_shape - gaussian_beam_min_shape; we take
// shape_range as a parameter to avoid repeated computation
// when beam_{min, max}_shape are runtime shader parameters
// Returns: The type-I generalized Gaussian "shape" parameter beta for
// the given color.
// Details/Discussion:
// Beta affects the scanline distribution as follows:
// a.) beta < 2.0 narrows the peak to a spike with a discontinuous slope
// b.) beta == 2.0 just degenerates to a Gaussian
// c.) beta > 2.0 flattens and widens the peak, then drops off more steeply
// than a Gaussian. Whereas high sigmas widen and soften peaks, high
// beta widen and sharpen peaks at the risk of aliasing.
// Unlike high gaussian_beam_spot_powers, high gaussian_beam_shape_powers actually soften shape
// transitions, whereas lower ones sharpen them (at the risk of aliasing).
return gaussian_beam_min_shape + shape_range * pow(color, gaussian_beam_shape_power);
}
float3 get_raw_interpolated_color(const float3 color0,
const float3 color1, const float3 color2, const float3 color3,
const float4 weights)
{
// Use max to avoid bizarre artifacts from negative colors:
const float4x3 mtrx = float4x3(color0, color1, color2, color3);
const float3 m = mul(weights, mtrx);
return max(m, 0.0);
}
float3 get_interpolated_linear_color(const float3 color0, const float3 color1,
const float3 color2, const float3 color3, const float4 weights)
{
// Requires: 1.) Requirements of include/gamma-management.h must be met:
// intermediate_gamma must be globally defined, and input
// colors are interpreted as linear RGB unless you #define
// GAMMA_ENCODE_EVERY_FBO (in which case they are
// interpreted as gamma-encoded with intermediate_gamma).
// 2.) color0-3 are colors sampled from a texture with tex2D().
// They are interpreted as defined in requirement 1.
// 3.) weights contains weights for each color, summing to 1.0.
// 4.) beam_horiz_linear_rgb_weight must be defined as a global
// float in [0.0, 1.0] describing how much blending should
// be done in linear RGB (rest is gamma-corrected RGB).
// 5.) _RUNTIME_SCANLINES_HORIZ_FILTER_COLORSPACE must be #defined
// if beam_horiz_linear_rgb_weight is anything other than a
// static constant, or we may try branching at runtime
// without dynamic branches allowed (slow).
// Returns: Return an interpolated color lookup between the four input
// colors based on the weights in weights. The final color will
// be a linear RGB value, but the blending will be done as
// indicated above.
const float intermediate_gamma = get_intermediate_gamma();
const float inv_intermediate_gamma = 1.0 / intermediate_gamma;
// Branch if beam_horiz_linear_rgb_weight is static (for free) or if the
// profile allows dynamic branches (faster than computing extra pows):
#if !_RUNTIME_SCANLINES_HORIZ_FILTER_COLORSPACE
#define SCANLINES_BRANCH_FOR_LINEAR_RGB_WEIGHT
#else
#if _DRIVERS_ALLOW_DYNAMIC_BRANCHES
#define SCANLINES_BRANCH_FOR_LINEAR_RGB_WEIGHT
#endif
#endif
#ifdef SCANLINES_BRANCH_FOR_LINEAR_RGB_WEIGHT
// beam_horiz_linear_rgb_weight is static, so we can branch:
#ifdef GAMMA_ENCODE_EVERY_FBO
const float3 gamma_mixed_color = pow(
get_raw_interpolated_color(color0, color1, color2, color3, weights),
intermediate_gamma);
if(beam_horiz_linear_rgb_weight > 0.0)
{
const float3 linear_mixed_color = get_raw_interpolated_color(
pow(color0, intermediate_gamma),
pow(color1, intermediate_gamma),
pow(color2, intermediate_gamma),
pow(color3, intermediate_gamma),
weights);
return lerp(gamma_mixed_color, linear_mixed_color, beam_horiz_linear_rgb_weight);
}
else
{
return gamma_mixed_color;
}
#else
const float3 linear_mixed_color = get_raw_interpolated_color(
color0, color1, color2, color3, weights);
if(beam_horiz_linear_rgb_weight < 1.0)
{
const float3 gamma_mixed_color = get_raw_interpolated_color(
pow(color0, inv_intermediate_gamma),
pow(color1, inv_intermediate_gamma),
pow(color2, inv_intermediate_gamma),
pow(color3, inv_intermediate_gamma),
weights);
return lerp(gamma_mixed_color, linear_mixed_color, beam_horiz_linear_rgb_weight);
}
else
{
return linear_mixed_color;
}
#endif // GAMMA_ENCODE_EVERY_FBO
#else
#ifdef GAMMA_ENCODE_EVERY_FBO
// Inputs: color0-3 are colors in gamma-encoded RGB.
const float3 gamma_mixed_color = pow(get_raw_interpolated_color(
color0, color1, color2, color3, weights), intermediate_gamma);
const float3 linear_mixed_color = get_raw_interpolated_color(
pow(color0, intermediate_gamma),
pow(color1, intermediate_gamma),
pow(color2, intermediate_gamma),
pow(color3, intermediate_gamma),
weights);
return lerp(gamma_mixed_color, linear_mixed_color, beam_horiz_linear_rgb_weight);
#else
// Inputs: color0-3 are colors in linear RGB.
const float3 linear_mixed_color = get_raw_interpolated_color(
color0, color1, color2, color3, weights);
const float3 gamma_mixed_color = get_raw_interpolated_color(
pow(color0, inv_intermediate_gamma),
pow(color1, inv_intermediate_gamma),
pow(color2, inv_intermediate_gamma),
pow(color3, inv_intermediate_gamma),
weights);
// wtf fixme
// const float beam_horiz_linear_rgb_weight1 = 1.0;
return lerp(gamma_mixed_color, linear_mixed_color,
beam_horiz_linear_rgb_weight);
#endif // GAMMA_ENCODE_EVERY_FBO
#endif // SCANLINES_BRANCH_FOR_LINEAR_RGB_WEIGHT
}
float3 get_scanline_color(const sampler2D tex, const float2 scanline_uv,
const float2 uv_step_x, const float4 weights)
{
// Requires: 1.) scanline_uv must be vertically snapped to the caller's
// desired line or scanline and horizontally snapped to the
// texel just left of the output pixel (color1)
// 2.) uv_step_x must contain the horizontal uv distance
// between texels.
// 3.) weights must contain interpolation filter weights for
// color0, color1, color2, and color3, where color1 is just
// left of the output pixel.
// Returns: Return a horizontally interpolated texture lookup using 2-4
// nearby texels, according to weights and the conventions of
// get_interpolated_linear_color().
// We can ignore the outside texture lookups for Quilez resampling.
const float3 color1 = tex2D_linearize(tex, scanline_uv, get_input_gamma()).rgb;
const float3 color2 = tex2D_linearize(tex, scanline_uv + uv_step_x, get_input_gamma()).rgb;
float3 color0 = float3(0.0, 0.0, 0.0);
float3 color3 = float3(0.0, 0.0, 0.0);
if(beam_horiz_filter > 0.5)
{
color0 = tex2D_linearize(tex, scanline_uv - uv_step_x, get_input_gamma()).rgb;
color3 = tex2D_linearize(tex, scanline_uv + 2.0 * uv_step_x, get_input_gamma()).rgb;
}
// Sample the texture as-is, whether it's linear or gamma-encoded:
// get_interpolated_linear_color() will handle the difference.
return get_interpolated_linear_color(color0, color1, color2, color3, weights);
}
float3 sample_single_scanline_horizontal(const sampler2D tex,
const float2 tex_uv, const float2 tex_size,
const float2 texture_size_inv)
{
// TODO: Add function requirements.
// Snap to the previous texel and get sample dists from 2/4 nearby texels:
const float2 curr_texel = tex_uv * tex_size;
// Use under_half to fix a rounding bug right around exact texel locations.
const float2 prev_texel = floor(curr_texel - under_half) + 0.5;
const float2 prev_texel_hor = float2(prev_texel.x, curr_texel.y);
const float2 prev_texel_hor_uv = prev_texel_hor * texture_size_inv;
const float prev_dist = curr_texel.x - prev_texel_hor.x;
const float4 sample_dists = float4(1.0 + prev_dist, prev_dist,
1.0 - prev_dist, 2.0 - prev_dist);
// Get Quilez, Lanczos2, or Gaussian resize weights for 2/4 nearby texels:
float4 weights;
if (beam_horiz_filter < 0.5) {
// None:
weights = float4(0, 1, 0, 0);
}
else if(beam_horiz_filter < 1.5)
{
// Quilez:
const float x = sample_dists.y;
const float w2 = x*x*x*(x*(x*6.0 - 15.0) + 10.0);
weights = float4(0.0, 1.0 - w2, w2, 0.0);
}
else if(beam_horiz_filter < 2.5)
{
// Gaussian:
float inner_denom_inv = 1.0/(2.0*beam_horiz_sigma*beam_horiz_sigma);
weights = exp(-(sample_dists*sample_dists)*inner_denom_inv);
}
else
{
// Lanczos2:
const float4 pi_dists = FIX_ZERO(sample_dists * pi);
weights = 2.0 * sin(pi_dists) * sin(pi_dists * 0.5) /
(pi_dists * pi_dists);
}
// Ensure the weight sum == 1.0:
const float4 final_weights = weights/dot(weights, float4(1.0, 1.0, 1.0, 1.0));
// Get the interpolated horizontal scanline color:
const float2 uv_step_x = float2(texture_size_inv.x, 0.0);
return get_scanline_color(
tex, prev_texel_hor_uv, uv_step_x, final_weights);
}
float3 sample_rgb_scanline(
const sampler2D tex,
const float2 tex_uv, const float2 tex_size,
const float2 texture_size_inv
) {
if (beam_misconvergence) {
const float3 convergence_offsets_rgb_x = get_convergence_offsets_x_vector();
const float3 convergence_offsets_rgb_y = get_convergence_offsets_y_vector();
const float3 offset_u_rgb = convergence_offsets_rgb_x * texture_size_inv.x;
const float3 offset_v_rgb = convergence_offsets_rgb_y * texture_size_inv.y;
const float2 scanline_uv_r = tex_uv - float2(offset_u_rgb.r, offset_v_rgb.r);
const float2 scanline_uv_g = tex_uv - float2(offset_u_rgb.g, offset_v_rgb.g);
const float2 scanline_uv_b = tex_uv - float2(offset_u_rgb.b, offset_v_rgb.b);
/**/
const float4 sample_r = tex2D(tex, scanline_uv_r);
const float4 sample_g = tex2D(tex, scanline_uv_g);
const float4 sample_b = tex2D(tex, scanline_uv_b);
/**/
/*
const float3 sample_r = sample_single_scanline_horizontal(
tex, scanline_uv_r, tex_size, texture_size_inv);
const float3 sample_g = sample_single_scanline_horizontal(
tex, scanline_uv_g, tex_size, texture_size_inv);
const float3 sample_b = sample_single_scanline_horizontal(
tex, scanline_uv_b, tex_size, texture_size_inv);
*/
return float3(sample_r.r, sample_g.g, sample_b.b);
}
else {
// return tex2D(tex, tex_uv).rgb;
return sample_single_scanline_horizontal(tex, tex_uv, tex_size, texture_size_inv);
}
}
float3 sample_rgb_scanline_horizontal(const sampler2D tex,
const float2 tex_uv, const float2 tex_size,
const float2 texture_size_inv)
{
// TODO: Add function requirements.
// Rely on a helper to make convergence easier.
if(beam_misconvergence)
{
const float3 convergence_offsets_rgb = get_convergence_offsets_x_vector();
const float3 offset_u_rgb = convergence_offsets_rgb * texture_size_inv.xxx;
const float2 scanline_uv_r = tex_uv - float2(offset_u_rgb.r, 0.0);
const float2 scanline_uv_g = tex_uv - float2(offset_u_rgb.g, 0.0);
const float2 scanline_uv_b = tex_uv - float2(offset_u_rgb.b, 0.0);
const float3 sample_r = sample_single_scanline_horizontal(
tex, scanline_uv_r, tex_size, texture_size_inv);
const float3 sample_g = sample_single_scanline_horizontal(
tex, scanline_uv_g, tex_size, texture_size_inv);
const float3 sample_b = sample_single_scanline_horizontal(
tex, scanline_uv_b, tex_size, texture_size_inv);
return float3(sample_r.r, sample_g.g, sample_b.b);
}
else
{
return sample_single_scanline_horizontal(tex, tex_uv, tex_size, texture_size_inv);
}
}
float3 get_averaged_scanline_sample(
sampler2D tex, const float2 texcoord,
const float scanline_start_y, const float v_step_y,
const float input_gamma
) {
// Sample `scanline_thickness` vertically-contiguous pixels and average them.
float3 interpolated_line = 0.0;
for (int i = 0; i < scanline_thickness; i++) {
float4 coord = float4(texcoord.x, scanline_start_y + i * v_step_y, 0, 0);
interpolated_line += tex2Dlod_linearize(tex, coord, input_gamma).rgb;
}
interpolated_line /= float(scanline_thickness);
return interpolated_line;
}
float get_beam_strength(float dist, float color,
const float sigma_range, const float shape_range)
{
// entry point in original is scanline_contrib()
// this is based on scanline_gaussian_sampled_contrib() from original
// See scanline_gaussian_integral_contrib() for detailed comments!
// gaussian sample = 1/(sigma*sqrt(2*pi)) * e**(-(x**2)/(2*sigma**2))
const float sigma = get_gaussian_sigma(color, sigma_range);
// Avoid repeated divides:
const float sigma_inv = 1.0 / sigma;
const float inner_denom_inv = 0.5 * sigma_inv * sigma_inv;
const float outer_denom_inv = sigma_inv/sqrt(2.0*pi);
return color*exp(-(dist*dist)*inner_denom_inv)*outer_denom_inv;
}
float get_gaussian_beam_strength(
float dist,
float color,
const float sigma_range,
const float shape_range
) {
// entry point in original is scanline_contrib()
// this is based on scanline_generalized_gaussian_sampled_contrib() from original
// See scanline_generalized_gaussian_integral_contrib() for details!
// generalized sample =
// beta/(2*alpha*gamma(1/beta)) * e**(-(|x|/alpha)**beta)
const float alpha = sqrt(2.0) * get_gaussian_sigma(color, sigma_range);
const float beta = get_generalized_gaussian_beta(color, shape_range);
// Avoid repeated divides:
const float alpha_inv = 1.0 / alpha;
const float beta_inv = 1.0 / beta;
const float scale = color * beta * 0.5 * alpha_inv / gamma_impl(beta_inv, beta);
return scale * exp(-pow(abs(dist*alpha_inv), beta));
}
float get_linear_beam_strength(
const float dist,
const float color,
const float num_pixels,
const bool interlaced
) {
const float p = color * (1 - abs(dist));
return clamp(p, 0, color);
}
#endif // _SCANLINE_FUNCTIONS_H

View file

@ -1,504 +0,0 @@
#ifndef _SPECIAL_FUNCTIONS_H
#define _SPECIAL_FUNCTIONS_H
///////////////////////////////// MIT LICENSE ////////////////////////////////
// Copyright (C) 2014 TroggleMonkey
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
///////////////////////////////// DESCRIPTION ////////////////////////////////
// This file implements the following mathematical special functions:
// 1.) erf() = 2/sqrt(pi) * indefinite_integral(e**(-x**2))
// 2.) gamma(s), a real-numbered extension of the integer factorial function
// It also implements normalized_ligamma(s, z), a normalized lower incomplete
// gamma function for s < 0.5 only. Both gamma() and normalized_ligamma() can
// be called with an _impl suffix to use an implementation version with a few
// extra precomputed parameters (which may be useful for the caller to reuse).
// See below for details.
//
// Design Rationale:
// Pretty much every line of code in this file is duplicated four times for
// different input types (float4/float3/float2/float). This is unfortunate,
// but Cg doesn't allow function templates. Macros would be far less verbose,
// but they would make the code harder to document and read. I don't expect
// these functions will require a whole lot of maintenance changes unless
// someone ever has need for more robust incomplete gamma functions, so code
// duplication seems to be the lesser evil in this case.
/////////////////////////// GAUSSIAN ERROR FUNCTION //////////////////////////
float4 erf6(float4 x)
{
// Requires: x is the standard parameter to erf().
// Returns: Return an Abramowitz/Stegun approximation of erf(), where:
// erf(x) = 2/sqrt(pi) * integral(e**(-x**2))
// This approximation has a max absolute error of 2.5*10**-5
// with solid numerical robustness and efficiency. See:
// https://en.wikipedia.org/wiki/Error_function#Approximation_with_elementary_functions
const float4 sign_x = sign(x);
const float4 t = 1.0/(1.0 + 0.47047*abs(x));
const float4 result = 1.0 - t*(0.3480242 + t*(-0.0958798 + t*0.7478556))*
exp(-(x*x));
return result * sign_x;
}
float3 erf6(const float3 x)
{
// Float3 version:
const float3 sign_x = sign(x);
const float3 t = 1.0/(1.0 + 0.47047*abs(x));
const float3 result = 1.0 - t*(0.3480242 + t*(-0.0958798 + t*0.7478556))*
exp(-(x*x));
return result * sign_x;
}
float2 erf6(const float2 x)
{
// Float2 version:
const float2 sign_x = sign(x);
const float2 t = 1.0/(1.0 + 0.47047*abs(x));
const float2 result = 1.0 - t*(0.3480242 + t*(-0.0958798 + t*0.7478556))*
exp(-(x*x));
return result * sign_x;
}
float erf6(const float x)
{
// Float version:
const float sign_x = sign(x);
const float t = 1.0/(1.0 + 0.47047*abs(x));
const float result = 1.0 - t*(0.3480242 + t*(-0.0958798 + t*0.7478556))*
exp(-(x*x));
return result * sign_x;
}
float4 erft(const float4 x)
{
// Requires: x is the standard parameter to erf().
// Returns: Approximate erf() with the hyperbolic tangent. The error is
// visually noticeable, but it's blazing fast and perceptually
// close...at least on ATI hardware. See:
// http://www.maplesoft.com/applications/view.aspx?SID=5525&view=html
// Warning: Only use this if your hardware drivers correctly implement
// tanh(): My nVidia 8800GTS returns garbage output.
return tanh(1.202760580 * x);
}
float3 erft(const float3 x)
{
// Float3 version:
return tanh(1.202760580 * x);
}
float2 erft(const float2 x)
{
// Float2 version:
return tanh(1.202760580 * x);
}
float erft(const float x)
{
// Float version:
return tanh(1.202760580 * x);
}
float4 erf(const float4 x)
{
// Requires: x is the standard parameter to erf().
// Returns: Some approximation of erf(x), depending on user settings.
#ifdef ERF_FAST_APPROXIMATION
return erft(x);
#else
return erf6(x);
#endif
}
float3 erf(const float3 x)
{
// Float3 version:
#ifdef ERF_FAST_APPROXIMATION
return erft(x);
#else
return erf6(x);
#endif
}
float2 erf(const float2 x)
{
// Float2 version:
#ifdef ERF_FAST_APPROXIMATION
return erft(x);
#else
return erf6(x);
#endif
}
float erf(const float x)
{
// Float version:
#ifdef ERF_FAST_APPROXIMATION
return erft(x);
#else
return erf6(x);
#endif
}
/////////////////////////// COMPLETE GAMMA FUNCTION //////////////////////////
float4 gamma_impl(const float4 s, const float4 s_inv)
{
// Requires: 1.) s is the standard parameter to the gamma function, and
// it should lie in the [0, 36] range.
// 2.) s_inv = 1.0/s. This implementation function requires
// the caller to precompute this value, giving users the
// opportunity to reuse it.
// Returns: Return approximate gamma function (real-numbered factorial)
// output using the Lanczos approximation with two coefficients
// calculated using Paul Godfrey's method here:
// http://my.fit.edu/~gabdo/gamma.txt
// An optimal g value for s in [0, 36] is ~1.12906830989, with
// a maximum relative error of 0.000463 for 2**16 equally
// evals. We could use three coeffs (0.0000346 error) without
// hurting latency, but this allows more parallelism with
// outside instructions.
static const float g = 1.12906830989;
static const float c0 = 0.8109119309638332633713423362694399653724431;
static const float c1 = 0.4808354605142681877121661197951496120000040;
static const float e = 2.71828182845904523536028747135266249775724709;
const float4 sph = s + 0.5;
const float4 lanczos_sum = c0 + c1/(s + 1.0);
const float4 base = (sph + g)/e; // or (s + g + float4(0.5))/e
// gamma(s + 1) = base**sph * lanczos_sum; divide by s for gamma(s).
// This has less error for small s's than (s -= 1.0) at the beginning.
return (pow(base, sph) * lanczos_sum) * s_inv;
}
float3 gamma_impl(const float3 s, const float3 s_inv)
{
// Float3 version:
static const float g = 1.12906830989;
static const float c0 = 0.8109119309638332633713423362694399653724431;
static const float c1 = 0.4808354605142681877121661197951496120000040;
static const float e = 2.71828182845904523536028747135266249775724709;
const float3 sph = s + 0.5;
const float3 lanczos_sum = c0 + c1/(s + 1.0);
const float3 base = (sph + g)/e;
return (pow(base, sph) * lanczos_sum) * s_inv;
}
float2 gamma_impl(const float2 s, const float2 s_inv)
{
// Float2 version:
static const float g = 1.12906830989;
static const float c0 = 0.8109119309638332633713423362694399653724431;
static const float c1 = 0.4808354605142681877121661197951496120000040;
static const float e = 2.71828182845904523536028747135266249775724709;
const float2 sph = s + 0.5;
const float2 lanczos_sum = c0 + c1/(s + 1.0);
const float2 base = (sph + g)/e;
return (pow(base, sph) * lanczos_sum) * s_inv;
}
float gamma_impl(const float s, const float s_inv)
{
// Float version:
static const float g = 1.12906830989;
static const float c0 = 0.8109119309638332633713423362694399653724431;
static const float c1 = 0.4808354605142681877121661197951496120000040;
static const float e = 2.71828182845904523536028747135266249775724709;
const float sph = s + 0.5;
const float lanczos_sum = c0 + c1/(s + 1.0);
const float base = (sph + g)/e;
return (pow(base, sph) * lanczos_sum) * s_inv;
}
float4 gamma(const float4 s)
{
// Requires: s is the standard parameter to the gamma function, and it
// should lie in the [0, 36] range.
// Returns: Return approximate gamma function output with a maximum
// relative error of 0.000463. See gamma_impl for details.
return gamma_impl(s, 1.0/s);
}
float3 gamma(const float3 s)
{
// Float3 version:
return gamma_impl(s, 1.0/s);
}
float2 gamma(const float2 s)
{
// Float2 version:
return gamma_impl(s, 1.0/s);
}
float gamma(const float s)
{
// Float version:
return gamma_impl(s, 1.0/s);
}
//////////////// INCOMPLETE GAMMA FUNCTIONS (RESTRICTED INPUT) ///////////////
// Lower incomplete gamma function for small s and z (implementation):
float4 ligamma_small_z_impl(const float4 s, const float4 z, const float4 s_inv)
{
// Requires: 1.) s < ~0.5
// 2.) z <= ~0.775075
// 3.) s_inv = 1.0/s (precomputed for outside reuse)
// Returns: A series representation for the lower incomplete gamma
// function for small s and small z (4 terms).
// The actual "rolled up" summation looks like:
// last_sign = 1.0; last_pow = 1.0; last_factorial = 1.0;
// sum = last_sign * last_pow / ((s + k) * last_factorial)
// for(int i = 0; i < 4; ++i)
// {
// last_sign *= -1.0; last_pow *= z; last_factorial *= i;
// sum += last_sign * last_pow / ((s + k) * last_factorial);
// }
// Unrolled, constant-unfolded and arranged for madds and parallelism:
const float4 scale = pow(z, s);
float4 sum = s_inv; // Summation iteration 0 result
// Summation iterations 1, 2, and 3:
const float4 z_sq = z*z;
const float4 denom1 = s + 1.0;
const float4 denom2 = 2.0*s + 4.0;
const float4 denom3 = 6.0*s + 18.0;
//float4 denom4 = 24.0*s + float4(96.0);
sum -= z/denom1;
sum += z_sq/denom2;
sum -= z * z_sq/denom3;
//sum += z_sq * z_sq / denom4;
// Scale and return:
return scale * sum;
}
float3 ligamma_small_z_impl(const float3 s, const float3 z, const float3 s_inv)
{
// Float3 version:
const float3 scale = pow(z, s);
float3 sum = s_inv;
const float3 z_sq = z*z;
const float3 denom1 = s + 1.0;
const float3 denom2 = 2.0*s + 4.0;
const float3 denom3 = 6.0*s + 18.0;
sum -= z/denom1;
sum += z_sq/denom2;
sum -= z * z_sq/denom3;
return scale * sum;
}
float2 ligamma_small_z_impl(const float2 s, const float2 z, const float2 s_inv)
{
// Float2 version:
const float2 scale = pow(z, s);
float2 sum = s_inv;
const float2 z_sq = z*z;
const float2 denom1 = s + 1.0;
const float2 denom2 = 2.0*s + 4.0;
const float2 denom3 = 6.0*s + 18.0;
sum -= z/denom1;
sum += z_sq/denom2;
sum -= z * z_sq/denom3;
return scale * sum;
}
float ligamma_small_z_impl(const float s, const float z, const float s_inv)
{
// Float version:
const float scale = pow(z, s);
float sum = s_inv;
const float z_sq = z*z;
const float denom1 = s + 1.0;
const float denom2 = 2.0*s + 4.0;
const float denom3 = 6.0*s + 18.0;
sum -= z/denom1;
sum += z_sq/denom2;
sum -= z * z_sq/denom3;
return scale * sum;
}
// Upper incomplete gamma function for small s and large z (implementation):
float4 uigamma_large_z_impl(const float4 s, const float4 z)
{
// Requires: 1.) s < ~0.5
// 2.) z > ~0.775075
// Returns: Gauss's continued fraction representation for the upper
// incomplete gamma function (4 terms).
// The "rolled up" continued fraction looks like this. The denominator
// is truncated, and it's calculated "from the bottom up:"
// denom = float4('inf');
// float4 one = float4(1.0);
// for(int i = 4; i > 0; --i)
// {
// denom = ((i * 2.0) - one) + z - s + (i * (s - i))/denom;
// }
// Unrolled and constant-unfolded for madds and parallelism:
const float4 numerator = pow(z, s) * exp(-z);
float4 denom = 7.0 + z - s;
denom = 5.0 + z - s + (3.0*s - 9.0)/denom;
denom = 3.0 + z - s + (2.0*s - 4.0)/denom;
denom = 1.0 + z - s + (s - 1.0)/denom;
return numerator / denom;
}
float3 uigamma_large_z_impl(const float3 s, const float3 z)
{
// Float3 version:
const float3 numerator = pow(z, s) * exp(-z);
float3 denom = 7.0 + z - s;
denom = 5.0 + z - s + (3.0*s - 9.0)/denom;
denom = 3.0 + z - s + (2.0*s - 4.0)/denom;
denom = 1.0 + z - s + (s - 1.0)/denom;
return numerator / denom;
}
float2 uigamma_large_z_impl(const float2 s, const float2 z)
{
// Float2 version:
const float2 numerator = pow(z, s) * exp(-z);
float2 denom = 7.0 + z - s;
denom = 5.0 + z - s + (3.0*s - 9.0)/denom;
denom = 3.0 + z - s + (2.0*s - 4.0)/denom;
denom = 1.0 + z - s + (s - 1.0)/denom;
return numerator / denom;
}
float uigamma_large_z_impl(const float s, const float z)
{
// Float version:
const float numerator = pow(z, s) * exp(-z);
float denom = 7.0 + z - s;
denom = 5.0 + z - s + (3.0*s - 9.0)/denom;
denom = 3.0 + z - s + (2.0*s - 4.0)/denom;
denom = 1.0 + z - s + (s - 1.0)/denom;
return numerator / denom;
}
// Normalized lower incomplete gamma function for small s (implementation):
float4 normalized_ligamma_impl(const float4 s, const float4 z,
const float4 s_inv, const float4 gamma_s_inv)
{
// Requires: 1.) s < ~0.5
// 2.) s_inv = 1/s (precomputed for outside reuse)
// 3.) gamma_s_inv = 1/gamma(s) (precomputed for outside reuse)
// Returns: Approximate the normalized lower incomplete gamma function
// for s < 0.5. Since we only care about s < 0.5, we only need
// to evaluate two branches (not four) based on z. Each branch
// uses four terms, with a max relative error of ~0.00182. The
// branch threshold and specifics were adapted for fewer terms
// from Gil/Segura/Temme's paper here:
// http://oai.cwi.nl/oai/asset/20433/20433B.pdf
// Evaluate both branches: Real branches test slower even when available.
static const float thresh = 0.775075;
int4 z_is_large;
z_is_large.x = int(z.x > thresh);
z_is_large.y = int(z.y > thresh);
z_is_large.z = int(z.z > thresh);
z_is_large.w = int(z.w > thresh);
const float4 large_z = 1.0 - uigamma_large_z_impl(s, z) * gamma_s_inv;
const float4 small_z = ligamma_small_z_impl(s, z, s_inv) * gamma_s_inv;
// Combine the results from both branches:
int4 inverse_z_is_large = saturate(~(z_is_large));
return large_z * float4(z_is_large) + small_z * float4(inverse_z_is_large);
}
float3 normalized_ligamma_impl(const float3 s, const float3 z,
const float3 s_inv, const float3 gamma_s_inv)
{
// Float3 version:
static const float thresh = 0.775075;
int3 z_is_large;
z_is_large.x = int(z.x > thresh);
z_is_large.y = int(z.y > thresh);
z_is_large.z = int(z.z > thresh);
const float3 large_z = 1.0 - uigamma_large_z_impl(s, z) * gamma_s_inv;
const float3 small_z = ligamma_small_z_impl(s, z, s_inv) * gamma_s_inv;
int3 inverse_z_is_large = saturate(~(z_is_large));
return large_z * float3(z_is_large) + small_z * float3(inverse_z_is_large);
}
float2 normalized_ligamma_impl(const float2 s, const float2 z,
const float2 s_inv, const float2 gamma_s_inv)
{
// Float2 version:
static const float thresh = 0.775075;
int2 z_is_large;
z_is_large.x = int(z.x > thresh);
z_is_large.y = int(z.y > thresh);
const float2 large_z = 1.0 - uigamma_large_z_impl(s, z) * gamma_s_inv;
const float2 small_z = ligamma_small_z_impl(s, z, s_inv) * gamma_s_inv;
int2 inverse_z_is_large = saturate(~(z_is_large));
return large_z * float2(z_is_large) + small_z * float2(inverse_z_is_large);
}
float normalized_ligamma_impl(const float s, const float z,
const float s_inv, const float gamma_s_inv)
{
// Float version:
static const float thresh = 0.775075;
const bool z_is_large = z > thresh;
const float large_z = 1.0 - uigamma_large_z_impl(s, z) * gamma_s_inv;
const float small_z = ligamma_small_z_impl(s, z, s_inv) * gamma_s_inv;
return large_z * float(z_is_large) + small_z * float(!z_is_large);
}
// Normalized lower incomplete gamma function for small s:
float4 normalized_ligamma(const float4 s, const float4 z)
{
// Requires: s < ~0.5
// Returns: Approximate the normalized lower incomplete gamma function
// for s < 0.5. See normalized_ligamma_impl() for details.
const float4 s_inv = 1.0/s;
const float4 gamma_s_inv = 1.0/gamma_impl(s, s_inv);
return normalized_ligamma_impl(s, z, s_inv, gamma_s_inv);
}
float3 normalized_ligamma(const float3 s, const float3 z)
{
// Float3 version:
const float3 s_inv = 1.0/s;
const float3 gamma_s_inv = 1.0/gamma_impl(s, s_inv);
return normalized_ligamma_impl(s, z, s_inv, gamma_s_inv);
}
float2 normalized_ligamma(const float2 s, const float2 z)
{
// Float2 version:
const float2 s_inv = 1.0/s;
const float2 gamma_s_inv = 1.0/gamma_impl(s, s_inv);
return normalized_ligamma_impl(s, z, s_inv, gamma_s_inv);
}
float normalized_ligamma(const float s, const float z)
{
// Float version:
const float s_inv = 1.0/s;
const float gamma_s_inv = 1.0/gamma_impl(s, s_inv);
return normalized_ligamma_impl(s, z, s_inv, gamma_s_inv);
}
#endif // _SPECIAL_FUNCTIONS_H

View file

@ -1,428 +0,0 @@
#ifndef _USER_SETTINGS_H
#define _USER_SETTINGS_H
///////////////////////////// DRIVER CAPABILITIES ////////////////////////////
// The Cg compiler uses different "profiles" with different capabilities.
// This shader requires a Cg compilation profile >= arbfp1, but a few options
// require higher profiles like fp30 or fp40. The shader can't detect profile
// or driver capabilities, so instead you must comment or uncomment the lines
// below with "//" before "#define." Disable an option if you get compilation
// errors resembling those listed. Generally speaking, all of these options
// will run on nVidia cards, but only _DRIVERS_ALLOW_TEX2DBIAS (if that) is
// likely to run on ATI/AMD, due to the Cg compiler's profile limitations.
// Derivatives: Unsupported on fp20, ps_1_1, ps_1_2, ps_1_3, and arbfp1.
// Among other things, derivatives help us fix anisotropic filtering artifacts
// with curved manually tiled phosphor mask coords. Related errors:
// error C3004: function "float2 ddx(float2);" not supported in this profile
// error C3004: function "float2 ddy(float2);" not supported in this profile
#ifndef _DRIVERS_ALLOW_DERIVATIVES
#define _DRIVERS_ALLOW_DERIVATIVES 0
#endif
// Fine derivatives: Unsupported on older ATI cards.
// Fine derivatives enable 2x2 fragment block communication, letting us perform
// fast single-pass blur operations. If your card uses coarse derivatives and
// these are enabled, blurs could look broken. Derivatives are a prerequisite.
#if _DRIVERS_ALLOW_DERIVATIVES
#define _DRIVERS_ALLOW_FINE_DERIVATIVES
#endif
// Dynamic looping: Requires an fp30 or newer profile.
// This makes phosphor mask resampling faster in some cases. Related errors:
// error C5013: profile does not support "for" statements and "for" could not
// be unrolled
#ifndef _DRIVERS_ALLOW_DYNAMIC_BRANCHES
#define _DRIVERS_ALLOW_DYNAMIC_BRANCHES 0
#endif
// Without _DRIVERS_ALLOW_DYNAMIC_BRANCHES, we need to use unrollable loops.
// Using one static loop avoids overhead if the user is right, but if the user
// is wrong (loops are allowed), breaking a loop into if-blocked pieces with a
// binary search can potentially save some iterations. However, it may fail:
// error C6001: Temporary register limit of 32 exceeded; 35 registers
// needed to compile program
#ifndef _ACCOMODATE_POSSIBLE_DYNAMIC_LOOPS
#define _ACCOMODATE_POSSIBLE_DYNAMIC_LOOPS 0
#endif
// tex2Dlod: Requires an fp40 or newer profile. This can be used to disable
// anisotropic filtering, thereby fixing related artifacts. Related errors:
// error C3004: function "float4 tex2Dlod(sampler2D, float4);" not supported in
// this profile
// #ifndef _DRIVERS_ALLOW_TEX2DLOD
// #define _DRIVERS_ALLOW_TEX2DLOD 1
// #endif
// tex2Dbias: Requires an fp30 or newer profile. This can be used to alleviate
// artifacts from anisotropic filtering and mipmapping. Related errors:
// error C3004: function "float4 tex2Dbias(sampler2D, float4);" not supported
// in this profile
// #ifndef _DRIVERS_ALLOW_TEX2DBIAS
// #define _DRIVERS_ALLOW_TEX2DBIAS 0
// #endif
// Integrated graphics compatibility: Integrated graphics like Intel HD 4000
// impose stricter limitations on register counts and instructions. Enable
// _INTEGRATED_GRAPHICS_COMPATIBILITY_MODE if you still see error C6001 or:
// error C6002: Instruction limit of 1024 exceeded: 1523 instructions needed
// to compile program.
// Enabling integrated graphics compatibility mode will automatically disable:
// 1.) _PHOSPHOR_MASK_MANUALLY_RESIZE: The phosphor mask will be softer.
// (This may be reenabled in a later release.)
// 2.) _RUNTIME_GEOMETRY_MODE
// 3.) The high-quality 4x4 Gaussian resize for the bloom approximation
#ifndef _INTEGRATED_GRAPHICS_COMPATIBILITY_MODE
#define _INTEGRATED_GRAPHICS_COMPATIBILITY_MODE 0
#endif
//////////////////////////// USER CODEPATH OPTIONS ///////////////////////////
// To disable a #define option, turn its line into a comment with "//."
// RUNTIME VS. COMPILE-TIME OPTIONS (Major Performance Implications):
// Enable runtime shader parameters in the Retroarch (etc.) GUI? They override
// many of the options in this file and allow real-time tuning, but many of
// them are slower. Disabling them and using this text file will boost FPS.
#ifndef _RUNTIME_SHADER_PARAMS_ENABLE
#define _RUNTIME_SHADER_PARAMS_ENABLE 1
#endif
// Specify the phosphor bloom sigma at runtime? This option is 10% slower, but
// it's the only way to do a wide-enough full bloom with a runtime dot pitch.
#ifndef _RUNTIME_PHOSPHOR_BLOOM_SIGMA
#define _RUNTIME_PHOSPHOR_BLOOM_SIGMA 1
#endif
// Specify antialiasing weight parameters at runtime? (Costs ~20% with cubics)
#ifndef _RUNTIME_ANTIALIAS_WEIGHTS
#define _RUNTIME_ANTIALIAS_WEIGHTS 1
#endif
// Specify subpixel offsets at runtime? (WARNING: EXTREMELY EXPENSIVE!)
#ifndef _RUNTIME_ANTIALIAS_SUBPIXEL_OFFSETS
#define _RUNTIME_ANTIALIAS_SUBPIXEL_OFFSETS 0
#endif
// Make beam_horiz_filter and beam_horiz_linear_rgb_weight into runtime shader
// parameters? This will require more math or dynamic branching.
#ifndef _RUNTIME_SCANLINES_HORIZ_FILTER_COLORSPACE
#define _RUNTIME_SCANLINES_HORIZ_FILTER_COLORSPACE 1
#endif
// Specify the tilt at runtime? This makes things about 3% slower.
// akgunter:
// This is used in crt-royale-geometry-aa-last-pass.fxh.
// I've hard-coded it to 1 and hidden it from the UI in the ReShade version because
// I don't know a good way to port that logic. If anyone ever does figure that
// out, we can uncomment and port that logic and then unhide this definition.
#define _RUNTIME_GEOMETRY_TILT 1
// Specify the geometry mode at runtime?
#ifndef _RUNTIME_GEOMETRY_MODE
#define _RUNTIME_GEOMETRY_MODE 1
#endif
// Specify the phosphor mask type (aperture grille, slot mask, shadow mask) and
// mode (Lanczos-resize, hardware resize, or tile 1:1) at runtime, even without
// dynamic branches? This is cheap if mask_resize_viewport_scale is small.
// #ifndef FORCE_RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT
// #define FORCE_RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT 1
// #endif
// PHOSPHOR MASK:
// Choose between a 64x64 or 512x512 source for the phosphor mask
// Mainly affects Sample Mode 1
// #ifndef USE_LARGE_PHOSPHOR_MASK
// #define USE_LARGE_PHOSPHOR_MASK 1
// #endif
// Manually resize the phosphor mask for best results (slower)? Disabling this
// removes the option to do so, but it may be faster without dynamic branches.
#ifndef _PHOSPHOR_MASK_MANUALLY_RESIZE
#define _PHOSPHOR_MASK_MANUALLY_RESIZE 1
#endif
// If we sinc-resize the mask, should we Lanczos-window it (slower but better)?
// #ifndef PHOSPHOR_MASK_RESIZE_LANCZOS_WINDOW
// #define PHOSPHOR_MASK_RESIZE_LANCZOS_WINDOW 1
// #endif
// Larger blurs are expensive, but we need them to blur larger triads. We can
// detect the right blur if the triad size is static or our profile allows
// dynamic branches, but otherwise we use the largest blur the user indicates
// they might need:
#define _PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_3_PIXELS 1
#define _PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_6_PIXELS 2
#define _PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_9_PIXELS 3
#define _PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_12_PIXELS 4
#if !_RUNTIME_PHOSPHOR_BLOOM_SIGMA
#ifndef PHOSPHOR_BLOOM_TRIAD_SIZE_MODE
#define PHOSPHOR_BLOOM_TRIAD_SIZE_MODE _PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_3_PIXELS // [0 - 4]
#endif
#endif
// Here's a helpful chart:
// MaxTriadSize BlurSize MinTriadCountsByResolution
// 3.0 9.0 480/640/960/1920 triads at 1080p/1440p/2160p/4320p, 4:3 aspect
// 6.0 17.0 240/320/480/960 triads at 1080p/1440p/2160p/4320p, 4:3 aspect
// 9.0 25.0 160/213/320/640 triads at 1080p/1440p/2160p/4320p, 4:3 aspect
// 12.0 31.0 120/160/240/480 triads at 1080p/1440p/2160p/4320p, 4:3 aspect
// 18.0 43.0 80/107/160/320 triads at 1080p/1440p/2160p/4320p, 4:3 aspect
/////////////////////////////// USER PARAMETERS //////////////////////////////
// Note: Many of these static parameters are overridden by runtime shader
// parameters when those are enabled. However, many others are static codepath
// options that were cleaner or more convert to code as static constants.
// GAMMA:
static const float crt_gamma_static = 2.5; // range [1, 5]
static const float lcd_gamma_static = 2.2; // range [1, 5]
// LEVELS MANAGEMENT:
// Control the final multiplicative image contrast:
static const float levels_contrast_static = 1.0; // range [0, 4)
// We auto-dim to avoid clipping between passes and restore brightness
// later. Control the dim factor here: Lower values clip less but crush
// blacks more (static only for now).
static const float levels_autodim_temp = 0.5; // range (0, 1] default is 0.5 but that was unnecessarily dark for me, so I set it to 1.0
// HALATION/DIFFUSION/BLOOM:
// Halation weight: How much energy should be lost to electrons bounding
// around under the CRT glass and exciting random phosphors?
static const float halation_weight_static = 0.0; // range [0, 1]
// Refractive diffusion weight: How much light should spread/diffuse from
// refracting through the CRT glass?
static const float diffusion_weight_static = 0.075; // range [0, 1]
// Underestimate brightness: Bright areas bloom more, but we can base the
// bloom brightpass on a lower brightness to sharpen phosphors, or a higher
// brightness to soften them. Low values clip, but >= 0.8 looks okay.
static const float bloom_underestimate_levels_static = 0.8; // range [0, 5]
// Blur all colors more than necessary for a softer phosphor bloom?
static const float bloom_excess_static = 0.0; // range [0, 1]
// The BLOOM_APPROX pass approximates a phosphor blur early on with a small
// blurred resize of the input (convergence offsets are applied as well).
// There are three filter options (static option only for now):
// 0.) Bilinear resize: A fast, close approximation to a 4x4 resize
// if min_allowed_viewport_triads and the BLOOM_APPROX resolution are sane
// and gaussian_beam_max_sigma is low.
// 1.) 3x3 resize blur: Medium speed, soft/smeared from bilinear blurring,
// always uses a static sigma regardless of gaussian_beam_max_sigma or
// mask_num_triads_across.
// 2.) True 4x4 Gaussian resize: Slowest, technically correct.
// These options are more pronounced for the fast, unbloomed shader version.
#ifndef RADEON_FIX
#define RADEON_FIX 0
#endif
#if !RADEON_FIX
static const float bloom_approx_filter_static = 2.0;
#else
static const float bloom_approx_filter_static = 1.0;
#endif
// ELECTRON BEAM SCANLINE DISTRIBUTION:
// How many scanlines should contribute light to each pixel? Using more
// scanlines is slower (especially for a generalized Gaussian) but less
// distorted with larger beam sigmas (especially for a pure Gaussian). The
// max_beam_sigma at which the closest unused weight is guaranteed <
// 1.0/255.0 (for a 3x antialiased pure Gaussian) is:
// 2 scanlines: max_beam_sigma = 0.2089; distortions begin ~0.34; 141.7 FPS pure, 131.9 FPS generalized
// 3 scanlines, max_beam_sigma = 0.3879; distortions begin ~0.52; 137.5 FPS pure; 123.8 FPS generalized
// 4 scanlines, max_beam_sigma = 0.5723; distortions begin ~0.70; 134.7 FPS pure; 117.2 FPS generalized
// 5 scanlines, max_beam_sigma = 0.7591; distortions begin ~0.89; 131.6 FPS pure; 112.1 FPS generalized
// 6 scanlines, max_beam_sigma = 0.9483; distortions begin ~1.08; 127.9 FPS pure; 105.6 FPS generalized
static const float beam_num_scanlines = 3.0; // range [2, 6]
// A generalized Gaussian beam varies shape with color too, now just width.
// It's slower but more flexible (static option only for now).
static const bool beam_generalized_gaussian = true;
// What kind of scanline antialiasing do you want?
// 0: Sample weights at 1x; 1: Sample weights at 3x; 2: Compute an integral
// Integrals are slow (especially for generalized Gaussians) and rarely any
// better than 3x antialiasing (static option only for now).
static const float beam_antialias_level = 1.0; // range [0, 2]
// Min/max standard deviations for scanline beams: Higher values widen and
// soften scanlines. Depending on other options, low min sigmas can alias.
static const float gaussian_beam_min_sigma_static = 0.02; // range (0, 1]
static const float gaussian_beam_max_sigma_static = 0.3; // range (0, 1]
// Beam width varies as a function of color: A power function (0) is more
// configurable, but a spherical function (1) gives the widest beam
// variability without aliasing (static option only for now).
static const float beam_spot_shape_function = 0.0;
// Spot shape power: Powers <= 1 give smoother spot shapes but lower
// sharpness. Powers >= 1.0 are awful unless mix/max sigmas are close.
static const float gaussian_beam_spot_power_static = 1.0/3.0; // range (0, 16]
// Generalized Gaussian max shape parameters: Higher values give flatter
// scanline plateaus and steeper dropoffs, simultaneously widening and
// sharpening scanlines at the cost of aliasing. 2.0 is pure Gaussian, and
// values > ~40.0 cause artifacts with integrals.
static const float gaussian_beam_min_shape_static = 2.0; // range [2, 32]
static const float gaussian_beam_max_shape_static = 4.0; // range [2, 32]
// Generalized Gaussian shape power: Affects how quickly the distribution
// changes shape from Gaussian to steep/plateaued as color increases from 0
// to 1.0. Higher powers appear softer for most colors, and lower powers
// appear sharper for most colors.
static const float gaussian_beam_shape_power_static = 1.0/4.0; // range (0, 16]
// What filter should be used to sample scanlines horizontally?
// 0: Quilez (fast), 1: Gaussian (configurable), 2: Lanczos2 (sharp)
static const float beam_horiz_filter_static = 0.0;
// Standard deviation for horizontal Gaussian resampling:
static const float beam_horiz_sigma_static = 0.35; // range (0, 2/3]
// Do horizontal scanline sampling in linear RGB (correct light mixing),
// gamma-encoded RGB (darker, hard spot shape, may better match bandwidth-
// limiting circuitry in some CRT's), or a weighted avg.?
static const float beam_horiz_linear_rgb_weight_static = 1.0; // range [0, 1]
// Simulate scanline misconvergence? This needs 3x horizontal texture
// samples and 3x texture samples of BLOOM_APPROX and HALATION_BLUR in
// later passes (static option only for now).
static const bool beam_misconvergence = true;
// Convergence offsets in x/y directions for R/G/B scanline beams in units
// of scanlines. Positive offsets go right/down; ranges [-2, 2]
static const float2 convergence_offsets_r_static = float2(0.1, 0.2);
static const float2 convergence_offsets_g_static = float2(0.3, 0.4);
static const float2 convergence_offsets_b_static = float2(0.5, 0.6);
// Detect interlacing (static option only for now)?
static const bool interlace_detect = true;
// Assume 1080-line sources are interlaced?
static const bool interlace_1080i_static = false;
// For interlaced sources, assume TFF (top-field first) or BFF order?
// (Whether this matters depends on the nature of the interlaced input.)
static const bool interlace_back_field_first_static = false;
// ANTIALIASING:
// What AA level do you want for curvature/overscan/subpixels? Options:
// 0x (none), 1x (sample subpixels), 4x, 5x, 6x, 7x, 8x, 12x, 16x, 20x, 24x
// (Static option only for now)
#ifndef antialias_level
#define antialias_level 0.0
#endif
// static const float aa_level = 12.0; // range [0, 24]
// static const float aa_level = 0.0; // range [0, 24]
// What antialiasing filter do you want (static option only)? Options:
// 0: Box (separable), 1: Box (cylindrical),
// 2: Tent (separable), 3: Tent (cylindrical),
// 4: Gaussian (separable), 5: Gaussian (cylindrical),
// 6: Cubic* (separable), 7: Cubic* (cylindrical, poor)
// 8: Lanczos Sinc (separable), 9: Lanczos Jinc (cylindrical, poor)
// * = Especially slow with _RUNTIME_ANTIALIAS_WEIGHTS
#ifndef antialias_filter
#define antialias_filter 6
#endif
static const float aa_filter = antialias_filter; // range [0, 9]
// Flip the sample grid on odd/even frames (static option only for now)?
#ifndef antialias_temporal
#define antialias_temporal false
#endif
static const bool aa_temporal = antialias_temporal;
// Use RGB subpixel offsets for antialiasing? The pixel is at green, and
// the blue offset is the negative r offset; range [0, 0.5]
static const float2 aa_subpixel_r_offset_static = float2(-1.0/3.0, 0.0);//float2(0.0);
// Cubics: See http://www.imagemagick.org/Usage/filter/#mitchell
// 1.) "Keys cubics" with B = 1 - 2C are considered the highest quality.
// 2.) C = 0.5 (default) is Catmull-Rom; higher C's apply sharpening.
// 3.) C = 1.0/3.0 is the Mitchell-Netravali filter.
// 4.) C = 0.0 is a soft spline filter.
static const float aa_cubic_c_static = 0.5; // range [0, 4]
// Standard deviation for Gaussian antialiasing: Try 0.5/aa_pixel_diameter.
static const float aa_gauss_sigma_static = 0.5; // range [0.0625, 1.0]
// PHOSPHOR MASK:
// Mask type: 0 = aperture grille, 1 = slot mask, 2 = shadow mask
// 3 = lowres grille, 4 = lowres slot, 5 = lowres shadow
static const float mask_type_static = 4.0; // range [0, 5]
// We can sample the mask three ways. Pick 2/3 from: Pretty/Fast/Flexible.
// 0.) Sinc-resize to the desired dot pitch manually (pretty/slow/flexible).
// This requires _PHOSPHOR_MASK_MANUALLY_RESIZE to be #defined.
// 1.) Hardware-resize to the desired dot pitch (ugly/fast/flexible). This
// is halfway decent with LUT mipmapping but atrocious without it.
// 2.) Tile it without resizing at a 1:1 texel:pixel ratio for flat coords
// (pretty/fast/inflexible). Each input LUT has a fixed dot pitch.
// This mode reuses the same masks, so triads will be enormous unless
// you change the mask LUT filenames in your .cgp file.
static const float mask_sample_mode_static = 0.0; // range [0, 2]
// Prefer setting the triad size (0.0) or number on the screen (1.0)?
// If _RUNTIME_PHOSPHOR_BLOOM_SIGMA isn't #defined, the specified triad size
// will always be used to calculate the full bloom sigma statically.
static const float mask_size_param_static = 0.0; // range [0, 1]
// Specify the phosphor triad size, in pixels. Each tile (usually with 8
// triads) will be rounded to the nearest integer tile size and clamped to
// obey minimum size constraints (imposed to reduce downsize taps) and
// maximum size constraints (imposed to have a sane MASK_RESIZE FBO size).
// To increase the size limit, double the viewport-relative scales for the
// two MASK_RESIZE passes in crt-royale.cgp and user-cgp-contants.h.
// range [1, mask_texture_small_size/mask_triads_per_tile]
static const float mask_triad_width_static = 24.0 / 8.0;
// If mask_size_param is 1.0/true, we'll go by this instead (the
// final size will be rounded and constrained as above); default 480.0
static const float mask_num_triads_across_static = 480.0;
// How many lobes should the sinc/Lanczos resizer use? More lobes require
// more samples and avoid moire a bit better, but some is unavoidable
// depending on the destination size (static option for now).
static const float mask_sinc_lobes = 3.0; // range [2, 4]
// The mask is resized using a variable number of taps in each dimension,
// but some Cg profiles always fetch a constant number of taps no matter
// what (no dynamic branching). We can limit the maximum number of taps if
// we statically limit the minimum phosphor triad size. Larger values are
// faster, but the limit IS enforced (static option only, forever);
// range [1, mask_texture_small_size/mask_triads_per_tile]
// TODO: Make this 1.0 and compensate with smarter sampling!
static const float mask_min_allowed_triad_size = 2.0;
// GEOMETRY:
// Geometry mode:
// 0: Off (default), 1: Spherical mapping (like cgwg's),
// 2: Alt. spherical mapping (more bulbous), 3: Cylindrical/Trinitron
static const float geom_mode_static = 0.0; // range [0, 3]
// Radius of curvature: Measured in units of your viewport's diagonal size.
static const float geom_radius_static = 2.0; // range [1/(2*pi), 1024]
// View dist is the distance from the player to their physical screen, in
// units of the viewport's diagonal size. It controls the field of view.
static const float geom_view_dist_static = 2.0; // range [0.5, 1024]
// Tilt angle in radians (clockwise around up and right vectors):
static const float2 geom_tilt_angle_static = float2(0.0, 0.0); // range [-pi, pi]
// Aspect ratio: When the true viewport size is unknown, this value is used
// to help convert between the phosphor triad size and count, along with
// the mask_resize_viewport_scale constant from user-cgp-constants.h. Set
// this equal to Retroarch's display aspect ratio (DAR) for best results;
// range [1, geom_max_aspect_ratio from user-cgp-constants.h];
// default (256/224)*(54/47) = 1.313069909 (see below)
static const float geom_aspect_ratio_static = 1.313069909;
// Before getting into overscan, here's some general aspect ratio info:
// - DAR = display aspect ratio = SAR * PAR; as in your Retroarch setting
// - SAR = storage aspect ratio = DAR / PAR; square pixel emulator frame AR
// - PAR = pixel aspect ratio = DAR / SAR; holds regardless of cropping
// Geometry processing has to "undo" the screen-space 2D DAR to calculate
// 3D view vectors, then reapplies the aspect ratio to the simulated CRT in
// uv-space. To ensure the source SAR is intended for a ~4:3 DAR, either:
// a.) Enable Retroarch's "Crop Overscan"
// b.) Readd horizontal padding: Set overscan to e.g. N*(1.0, 240.0/224.0)
// Real consoles use horizontal black padding in the signal, but emulators
// often crop this without cropping the vertical padding; a 256x224 [S]NES
// frame (8:7 SAR) is intended for a ~4:3 DAR, but a 256x240 frame is not.
// The correct [S]NES PAR is 54:47, found by blargg and NewRisingSun:
// http://board.zsnes.com/phpBB3/viewtopic.php?f=22&t=11928&start=50
// http://forums.nesdev.com/viewtopic.php?p=24815#p24815
// For flat output, it's okay to set DAR = [existing] SAR * [correct] PAR
// without doing a. or b., but horizontal image borders will be tighter
// than vertical ones, messing up curvature and overscan. Fixing the
// padding first corrects this.
// Overscan: Amount to "zoom in" before cropping. You can zoom uniformly
// or adjust x/y independently to e.g. readd horizontal padding, as noted
// above: Values < 1.0 zoom out; range (0, inf)
static const float2 geom_overscan_static = float2(1.0, 1.0);// * 1.005 * (1.0, 240/224.0)
// Compute a proper pixel-space to texture-space matrix even without ddx()/
// ddy()? This is ~8.5% slower but improves antialiasing/subpixel filtering
// with strong curvature (static option only for now).
static const bool geom_force_correct_tangent_matrix = true;
// BORDERS:
// Rounded border size in texture uv coords:
static const float border_size_static = 0.015; // range [0, 0.5]
// Border darkness: Moderate values darken the border smoothly, and high
// values make the image very dark just inside the border:
static const float border_darkness_static = 2.0; // range [0, inf)
// Border compression: High numbers compress border transitions, narrowing
// the dark border area.
static const float border_compress_static = 2.5; // range [1, inf)
// TODO: Nuke this
#define mask_size_xy float2(512, 512)
#endif // _USER_SETTINGS_H

View file

@ -1,149 +0,0 @@
#ifndef _BLOOM_H
#define _BLOOM_H
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// crt-royale-reshade: A port of TroggleMonkey's crt-royale from libretro to ReShade.
// Copyright (C) 2020 Alex Gunter <akg7634@gmail.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
#include "../lib/user-settings.fxh"
#include "../lib/derived-settings-and-constants.fxh"
#include "../lib/bind-shader-params.fxh"
#include "../lib/gamma-management.fxh"
#include "../lib/downsampling-functions.fxh"
#include "../lib/blur-functions.fxh"
#include "../lib/bloom-functions.fxh"
#include "shared-objects.fxh"
void approximateBloomVertPS(
in float4 pos : SV_Position,
in float2 texcoord : TEXCOORD0,
out float4 color : SV_Target
) {
const float2 delta_uv = blur_radius * float2(0.0, rcp(TEX_BEAMCONVERGENCE_HEIGHT));
color = float4(opaque_linear_downsample(
samplerBeamConvergence, texcoord,
uint((bloomapprox_downsizing_factor - 1)/2),
delta_uv
), 1);
}
void approximateBloomHorizPS(
in float4 pos : SV_Position,
in float2 texcoord : TEXCOORD0,
out float4 color : SV_Target
) {
const float2 delta_uv = blur_radius * float2(rcp(TEX_BEAMCONVERGENCE_WIDTH), 0.0);
color = float4(opaque_linear_downsample(
samplerBloomApproxVert, texcoord,
uint((bloomapprox_downsizing_factor - 1)/2),
delta_uv
), 1);
}
void bloomHorizontalVS(
in uint id : SV_VertexID,
out float4 position : SV_Position,
out float2 texcoord : TEXCOORD0,
out float bloom_sigma_runtime : TEXCOORD1
) {
PostProcessVS(id, position, texcoord);
bloom_sigma_runtime = get_min_sigma_to_blur_triad(calc_triad_size().x, bloom_diff_thresh_);
}
void bloomHorizontalPS(
in float4 pos : SV_Position,
in float2 texcoord : TEXCOORD0,
in float bloom_sigma_runtime : TEXCOORD1,
out float4 color : SV_Target
) {
const float2 bloom_dxdy = float2(rcp(TEX_BLOOMVERTICAL_WIDTH), 0);
// Blur the vertically blurred brightpass horizontally by 9/17/25/43x:
const float bloom_sigma = get_final_bloom_sigma(bloom_sigma_runtime);
const float3 blurred_brightpass = tex2DblurNfast(samplerBloomVertical,
texcoord, bloom_dxdy, bloom_sigma, get_intermediate_gamma());
// Sample the masked scanlines. Alpha contains the auto-dim factor:
const float3 intensity_dim = tex2D_linearize(samplerMaskedScanlines, texcoord, get_intermediate_gamma()).rgb;
const float auto_dim_factor = levels_autodim_temp;
const float undim_factor = 1.0/auto_dim_factor;
// Calculate the mask dimpass, add it to the blurred brightpass, and
// undim (from scanline auto-dim) and amplify (from mask dim) the result:
const float mask_amplify = get_mask_amplify();
const float3 brightpass = tex2D_linearize(samplerBrightpass, texcoord, get_intermediate_gamma()).rgb;
const float3 dimpass = intensity_dim - brightpass;
const float3 phosphor_bloom = (dimpass + blurred_brightpass) *
mask_amplify * undim_factor * levels_contrast;
// Sample the halation texture, and let some light bleed into refractive
// diffusion. Conceptually this occurs before the phosphor bloom, but
// adding it in earlier passes causes black crush in the diffusion colors.
const float3 raw_diffusion_color = tex2D_linearize(samplerBlurHorizontal, texcoord, get_intermediate_gamma()).rgb;
const float3 raw_halation_color = dot(raw_diffusion_color, float3(1, 1, 1)) / 3.0;
const float3 diffusion_color = levels_contrast * lerp(raw_diffusion_color, raw_halation_color, halation_weight);
const float3 final_bloom = lerp(phosphor_bloom, diffusion_color, diffusion_weight);
// Encode and output the bloomed image:
color = encode_output(float4(final_bloom, 1.0), get_intermediate_gamma());
}
void bloomVerticalVS(
in uint id : SV_VertexID,
out float4 position : SV_Position,
out float2 texcoord : TEXCOORD0,
out float bloom_sigma_runtime : TEXCOORD1
) {
PostProcessVS(id, position, texcoord);
bloom_sigma_runtime = get_min_sigma_to_blur_triad(calc_triad_size().x, bloom_diff_thresh_);
}
void bloomVerticalPS(
in float4 pos : SV_Position,
in float2 texcoord : TEXCOORD0,
in float bloom_sigma_runtime : TEXCOORD1,
out float4 color : SV_Target
) {
const float2 bloom_dxdy = float2(0, rcp(TEX_BLOOMVERTICAL_HEIGHT));
// Blur the brightpass horizontally with a 9/17/25/43x blur:
const float bloom_sigma = get_final_bloom_sigma(bloom_sigma_runtime);
const float3 color3 = tex2DblurNfast(samplerBrightpass, texcoord,
bloom_dxdy, bloom_sigma, get_intermediate_gamma());
// Encode and output the blurred image:
color = encode_output(float4(color3, 1.0), get_intermediate_gamma());
}
#endif // _BLOOM_H

View file

@ -1,131 +0,0 @@
#ifndef _BLURRING_H
#define _BLURRING_H
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// crt-royale-reshade: A port of TroggleMonkey's crt-royale from libretro to ReShade.
// Copyright (C) 2020 Alex Gunter <akg7634@gmail.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
///////////////////////////////// MIT LICENSE ////////////////////////////////
// Copyright (C) 2014 TroggleMonkey
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
#include "../lib/gamma-management.fxh"
#include "../lib/blur-functions.fxh"
#include "shared-objects.fxh"
void blurHorizontalVS(
in uint id : SV_VertexID,
out float4 position : SV_Position,
out float2 texcoord : TEXCOORD0,
out float2 blur_dxdy : TEXCOORD1
) {
PostProcessVS(id, position, texcoord);
// Get the uv sample distance between output pixels. Blurs are not generic
// Gaussian resizers, and correct blurs require:
// 1.) OutputSize == InputSize * 2^m, where m is an integer <= 0.
// 2.) mipmap_inputN = "true" for this pass in the preset if m != 0
// 3.) filter_linearN = "true" except for 1x scale nearest neighbor blurs
// Gaussian resizers would upsize using the distance between input texels
// (not output pixels), but we avoid this and consistently blur at the
// destination size. Otherwise, combining statically calculated weights
// with bilinear sample exploitation would result in terrible artifacts.
static const float2 output_size = TEX_BLURHORIZONTAL_SIZE;
static const float2 dxdy = 1.0 / output_size;
// This blur is vertical-only, so zero out the horizontal offset:
blur_dxdy = float2(dxdy.x, 0.0);
}
void blurHorizontalPS(
in float4 pos : SV_Position,
in float2 texcoord : TEXCOORD0,
in float2 blur_dxdy : TEXCOORD1,
out float4 color : SV_Target
) {
static const float3 blur_color = tex2Dblur9fast(samplerBlurVertical, texcoord, blur_dxdy, get_intermediate_gamma());
// Encode and output the blurred image:
// color = encode_output(float4(blur_color, 1.0), 1.0);
color = encode_output(float4(blur_color, 1.0), get_intermediate_gamma());
}
void blurVerticalVS(
in uint id : SV_VertexID,
out float4 position : SV_Position,
out float2 texcoord : TEXCOORD0,
out float2 blur_dxdy : TEXCOORD1
) {
PostProcessVS(id, position, texcoord);
// Get the uv sample distance between output pixels. Blurs are not generic
// Gaussian resizers, and correct blurs require:
// 1.) OutputSize == InputSize * 2^m, where m is an integer <= 0.
// 2.) mipmap_inputN = "true" for this pass in the preset if m != 0
// 3.) filter_linearN = "true" except for 1x scale nearest neighbor blurs
// Gaussian resizers would upsize using the distance between input texels
// (not output pixels), but we avoid this and consistently blur at the
// destination size. Otherwise, combining statically calculated weights
// with bilinear sample exploitation would result in terrible artifacts.
static const float2 output_size = TEX_BLURVERTICAL_SIZE;
static const float2 dxdy = 1.0 / output_size;
// This blur is vertical-only, so zero out the horizontal offset:
blur_dxdy = float2(0.0, dxdy.y);
}
void blurVerticalPS(
in float4 pos : SV_Position,
in float2 texcoord : TEXCOORD0,
in float2 blur_dxdy : TEXCOORD1,
out float4 color : SV_Target
) {
static const float3 blur_color = tex2Dblur9fast(samplerBloomApproxHoriz, texcoord, blur_dxdy, get_intermediate_gamma());
// Encode and output the blurred image:
// color = encode_output(float4(blur_color, 1.0), 1.0);
color = encode_output(float4(blur_color, 1.0), get_intermediate_gamma());
}
#endif // _BLURRING_H

View file

@ -1,90 +0,0 @@
#ifndef _BRIGHTPASS_H
#define _BRIGHTPASS_H
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// crt-royale-reshade: A port of TroggleMonkey's crt-royale from libretro to ReShade.
// Copyright (C) 2020 Alex Gunter <akg7634@gmail.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
#include "../lib/user-settings.fxh"
#include "../lib/derived-settings-and-constants.fxh"
#include "../lib/bind-shader-params.fxh"
#include "../lib/gamma-management.fxh"
#include "../lib/phosphor-mask-calculations.fxh"
#include "../lib/scanline-functions.fxh"
#include "../lib/bloom-functions.fxh"
#include "../lib/blur-functions.fxh"
void brightpassVS(
in uint id : SV_VertexID,
out float4 position : SV_Position,
out float2 texcoord : TEXCOORD0,
out float bloom_sigma_runtime : TEXCOORD1
) {
PostProcessVS(id, position, texcoord);
bloom_sigma_runtime = get_min_sigma_to_blur_triad(calc_triad_size().x, bloom_diff_thresh_);
}
void brightpassPS(
in float4 pos : SV_Position,
in float2 texcoord : TEXCOORD0,
in float bloom_sigma_runtime : TEXCOORD1,
out float4 color : SV_Target
) {
// Sample the masked scanlines:
const float3 intensity_dim = tex2D_linearize(samplerMaskedScanlines, texcoord, get_intermediate_gamma()).rgb;
// Get the full intensity, including auto-undimming, and mask compensation:
const float mask_amplify = get_mask_amplify();
const float3 intensity = intensity_dim * rcp(levels_autodim_temp) * mask_amplify * levels_contrast;
// Sample BLOOM_APPROX to estimate what a straight blur of masked scanlines
// would look like, so we can estimate how much energy we'll receive from
// blooming neighbors:
const float3 phosphor_blur_approx = levels_contrast * tex2D_linearize(samplerBloomApproxHoriz, texcoord, get_intermediate_gamma()).rgb;
// Compute the blur weight for the center texel and the maximum energy we
// expect to receive from neighbors:
const float bloom_sigma = get_final_bloom_sigma(bloom_sigma_runtime);
const float center_weight = get_center_weight(bloom_sigma);
const float3 max_area_contribution_approx =
max(float3(0.0, 0.0, 0.0), phosphor_blur_approx - center_weight * intensity);
// Assume neighbors will blur 100% of their intensity (blur_ratio = 1.0),
// because it actually gets better results (on top of being very simple),
// but adjust all intensities for the user's desired underestimate factor:
const float3 area_contrib_underestimate = bloom_underestimate_levels * max_area_contribution_approx;
const float3 intensity_underestimate = bloom_underestimate_levels * intensity;
// Calculate the blur_ratio, the ratio of intensity we want to blur:
const float3 blur_ratio_temp =
((float3(1.0, 1.0, 1.0) - area_contrib_underestimate) /
intensity_underestimate - float3(1.0, 1.0, 1.0)) / (center_weight - 1.0);
const float3 blur_ratio = saturate(blur_ratio_temp);
// Calculate the brightpass based on the auto-dimmed, unamplified, masked
// scanlines, encode if necessary, and return!
const float3 brightpass = intensity_dim *
lerp(blur_ratio, float3(1.0, 1.0, 1.0), bloom_excess);
color = encode_output(float4(brightpass, 1.0), get_intermediate_gamma());
}
#endif // _BRIGHTPASS_H

View file

@ -1,221 +0,0 @@
#ifndef _CONTENT_BOX_H
#define _CONTENT_BOX_H
///////////////////////////////// MIT LICENSE ////////////////////////////////
// Copyright (C) 2020 Alex Gunter
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
#include "shared-objects.fxh"
void contentCropVS(
in uint id : SV_VertexID,
out float4 position : SV_Position,
out float2 texcoord : TEXCOORD0
) {
#if _DX9_ACTIVE
texcoord.x = (id == 1 || id == 3) ? content_right : content_left;
texcoord.y = (id > 1) ? content_lower : content_upper;
position.x = (id == 1 || id == 3) ? 1 : -1;
position.y = (id > 1) ? -1 : 1;
position.zw = 1;
#else
texcoord.x = (id & 1) ? content_right : content_left;
texcoord.y = (id & 2) ? content_lower : content_upper;
position.x = (id & 1) ? 1 : -1;
position.y = (id & 2) ? -1 : 1;
position.zw = 1;
#endif
}
#if USE_VERTEX_UNCROPPING
/*
* Using the vertex shader for uncropping can save about 0.1ms in some apps.
* However, some apps like SNES9X w/ DX9 don't trigger a refresh of the entire screen,
* which in turn causes the ReShade UI to "stick around" after it's closed.
*
* The slower algorithm forces the entire screen to refresh, which forces the
* area outside the content box to be black. I assume most users will prefer
* the results of the slower algorithm and won't notice the 0.1ms. Users who
* need that 0.1ms can use a preprocessor def to recover that time.
*/
void contentUncropVS(
in uint id : SV_VertexID,
out float4 position : SV_Position,
out float2 texcoord : TEXCOORD0
) {
#if _DX9_ACTIVE
texcoord.x = id == 1 || id == 3;
texcoord.y = id < 2;
position.x = (id == 1 || id == 3) ? content_scale.x : -content_scale.x;
position.y = (id > 1) ? content_scale.y : -content_scale.y;
position.zw = 1;
#else
texcoord.x = id & 1;
texcoord.y = !(id & 2);
position.x = (id & 1) ? content_scale.x : -content_scale.x;
position.y = (id & 2) ? content_scale.y : -content_scale.y;
position.zw = 1;
#endif
}
void uncropContentPixelShader(
in float4 pos : SV_Position,
in float2 texcoord : TEXCOORD0,
out float4 color : SV_Target
) {
color = tex2D(samplerGeometry, texcoord);
}
#else
void contentUncropVS(
in uint id : SV_VertexID,
out float4 position : SV_Position,
out float2 texcoord : TEXCOORD0
) {
// TODO: There's probably a better way to code this.
// I'll figure it out later.
#if _DX9_ACTIVE
texcoord.x = id == 1 || id == 3;
texcoord.y = id < 2;
position.x = (id == 1 || id == 3) ? 1 : -1;
position.y = (id > 1) ? 1 : -1;
position.zw = 1;
#else
texcoord.x = id & 1;
texcoord.y = !(id & 2);
position.x = (id & 1) ? 1 : -1;
position.y = (id & 2) ? 1 : -1;
position.zw = 1;
#endif
}
void uncropContentPixelShader(
in float4 pos : SV_Position,
in float2 texcoord : TEXCOORD0,
out float4 color : SV_Target
) {
const bool is_in_boundary = float(
texcoord.x >= content_left && texcoord.x <= content_right &&
texcoord.y >= content_upper && texcoord.y <= content_lower
);
const float2 texcoord_uncropped = ((texcoord - content_offset) * buffer_size + 0) / content_size;
const float4 raw_color = tex2D(samplerGeometry, texcoord_uncropped);
color = float4(is_in_boundary * raw_color.rgb, raw_color.a);
}
#endif
#if CONTENT_BOX_VISIBLE
#ifndef CONTENT_BOX_INSCRIBED
#define CONTENT_BOX_INSCRIBED 1
#endif
#ifndef CONTENT_BOX_THICKNESS
#define CONTENT_BOX_THICKNESS 5
#endif
#ifndef CONTENT_BOX_COLOR_R
#define CONTENT_BOX_COLOR_R 1.0
#endif
#ifndef CONTENT_BOX_COLOR_G
#define CONTENT_BOX_COLOR_G 0.0
#endif
#ifndef CONTENT_BOX_COLOR_B
#define CONTENT_BOX_COLOR_B 0.0
#endif
static const float vert_line_thickness = float(CONTENT_BOX_THICKNESS) / BUFFER_WIDTH;
static const float horiz_line_thickness = float(CONTENT_BOX_THICKNESS) / BUFFER_HEIGHT;
#if CONTENT_BOX_INSCRIBED
// Set the outer borders to the edge of the content
static const float left_line_1 = content_left;
static const float left_line_2 = left_line_1 + vert_line_thickness;
static const float right_line_2 = content_right;
static const float right_line_1 = right_line_2 - vert_line_thickness;
static const float upper_line_1 = content_upper;
static const float upper_line_2 = upper_line_1 + horiz_line_thickness;
static const float lower_line_2 = content_lower;
static const float lower_line_1 = lower_line_2 - horiz_line_thickness;
#else
// Set the inner borders to the edge of the content
static const float left_line_2 = content_left;
static const float left_line_1 = left_line_2 - vert_line_thickness;
static const float right_line_1 = content_right;
static const float right_line_2 = right_line_1 + vert_line_thickness;
static const float upper_line_2 = content_upper;
static const float upper_line_1 = upper_line_2 - horiz_line_thickness;
static const float lower_line_1 = content_lower;
static const float lower_line_2 = lower_line_1 + horiz_line_thickness;
#endif
static const float4 box_color = float4(
CONTENT_BOX_COLOR_R,
CONTENT_BOX_COLOR_G,
CONTENT_BOX_COLOR_B,
1.0
);
void contentBoxPixelShader(
in float4 pos : SV_Position,
in float2 texcoord : TEXCOORD0,
out float4 color : SV_Target
) {
const bool is_inside_outerbound = (
texcoord.x >= left_line_1 && texcoord.x <= right_line_2 &&
texcoord.y >= upper_line_1 && texcoord.y <= lower_line_2
);
const bool is_outside_innerbound = (
texcoord.x <= left_line_2 || texcoord.x >= right_line_1 ||
texcoord.y <= upper_line_2 || texcoord.y >= lower_line_1
);
if (is_inside_outerbound && is_outside_innerbound) {
color = box_color;
}
else {
color = tex2D(ReShade::BackBuffer, texcoord);
}
}
#endif // CONTENT_BOX_VISIBLE
#endif // _CONTENT_BOX_H

View file

@ -1,137 +0,0 @@
#ifndef _DEINTERLACE_H
#define _DEINTERLACE_H
///////////////////////////////// MIT LICENSE ////////////////////////////////
// Copyright (C) 2020 Alex Gunter
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
#include "../lib/user-settings.fxh"
#include "../lib/derived-settings-and-constants.fxh"
#include "../lib/bind-shader-params.fxh"
#include "../lib/gamma-management.fxh"
#include "../lib/scanline-functions.fxh"
void freezeFrameVS(
in uint id : SV_VertexID,
out float4 position : SV_Position,
out float2 texcoord : TEXCOORD0
) {
float use_deinterlacing_tex = enable_interlacing && (
scanline_deinterlacing_mode == 2 || scanline_deinterlacing_mode == 3
);
texcoord.x = (id == 2) ? use_deinterlacing_tex*2.0 : 0.0;
texcoord.y = (id == 1) ? 2.0 : 0.0;
position = float4(texcoord * float2(2, -2) + float2(-1, 1), 0, 1);
}
void freezeFramePS(
in float4 pos : SV_Position,
in float2 texcoord : TEXCOORD0,
out float4 color : SV_Target
) {
color = tex2D(samplerBeamConvergence, texcoord);
}
void deinterlaceVS(
in uint id : SV_VertexID,
out float4 position : SV_Position,
out float2 texcoord : TEXCOORD0,
out float2 v_step : TEXCOORD1
) {
freezeFrameVS(id, position, texcoord);
v_step = float2(0.0, scanline_thickness * rcp(TEX_FREEZEFRAME_HEIGHT));
}
void deinterlacePS(
in float4 pos : SV_Position,
in float2 texcoord : TEXCOORD0,
in float2 v_step : TEXCOORD1,
out float4 color : SV_Target
) {
// float2 scanline_offset_norm;
// float triangle_wave_freq;
// bool field_parity;
// bool wrong_field;
// calc_wrong_field(texcoord, scanline_offset_norm, triangle_wave_freq, field_parity, wrong_field);
float2 rotated_coord = lerp(texcoord.yx, texcoord, geom_rotation_mode == 0 || geom_rotation_mode == 2);
float scale = lerp(CONTENT_WIDTH, CONTENT_HEIGHT, geom_rotation_mode == 0 || geom_rotation_mode == 2);
InterpolationFieldData interpolation_data = calc_interpolation_field_data(rotated_coord, scale);
// TODO: add scanline_parity to calc_wrong_field()
// Weaving
// Sample texcoord from this frame and the previous frame
// If we're in the correct field, use the current sample
// If we're in the wrong field, average the current and prev samples
// In this case, we're probably averaging a color with 0 and producing a brightness of 0.5.
[branch]
if (enable_interlacing && scanline_deinterlacing_mode == 2) {
// const float cur_scanline_idx = get_curr_scanline_idx(texcoord.y, content_size.y);
// const float wrong_field = curr_line_is_wrong_field(cur_scanline_idx);
const float4 cur_line_color = tex2D_nograd(samplerBeamConvergence, texcoord);
const float4 cur_line_prev_color = tex2D_nograd(samplerFreezeFrame, texcoord);
const float4 avg_color = (cur_line_color + cur_line_prev_color) / 2.0;
// Multiply by 1.5, so each pair of scanlines has total brightness 2
const float4 raw_out_color = lerp(1.5*cur_line_color, avg_color, interpolation_data.wrong_field);
color = encode_output(raw_out_color, deinterlacing_blend_gamma);
}
// Blended Weaving
// Sample texcoord from this frame
// From the previous frame, sample the current scanline's sibling
// Do this by shifting up or down by a line
// If we're in the correct field, use the current sample
// If we're in the wrong field, average the current and prev samples
// In this case, we're averaging two fully illuminated colors
else if (enable_interlacing && scanline_deinterlacing_mode == 3) {
const float2 raw_offset = lerp(1, -1, interpolation_data.scanline_parity) * v_step;
const float2 curr_offset = lerp(0, raw_offset, interpolation_data.wrong_field);
const float2 prev_offset = lerp(raw_offset, 0, interpolation_data.wrong_field);
const float4 cur_line_color = tex2D_nograd(samplerBeamConvergence, texcoord + curr_offset);
const float4 prev_line_color = tex2D_nograd(samplerFreezeFrame, texcoord + prev_offset);
const float4 avg_color = (cur_line_color + prev_line_color) / 2.0;
const float4 raw_out_color = lerp(cur_line_color, avg_color, interpolation_data.wrong_field);
color = encode_output(raw_out_color, deinterlacing_blend_gamma);
}
// No temporal blending
else {
color = tex2D_nograd(samplerBeamConvergence, texcoord);
}
}
#endif // _DEINTERLACE_H

View file

@ -1,347 +0,0 @@
#ifndef _ELECTRON_BEAMS_H
#define _ELECTRON_BEAMS_H
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// crt-royale-reshade: A port of TroggleMonkey's crt-royale from libretro to ReShade.
// Copyright (C) 2020 Alex Gunter <akg7634@gmail.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
#include "../lib/bind-shader-params.fxh"
#include "../lib/gamma-management.fxh"
#include "../lib/scanline-functions.fxh"
#include "content-box.fxh"
#include "shared-objects.fxh"
void calculateBeamDistsVS(
in uint id : SV_VertexID,
out float4 position : SV_Position,
out float2 texcoord : TEXCOORD0
) {
const float compute_mask_factor = frame_count % 60 == 0 || overlay_active > 0;
texcoord.x = (id == 2) ? compute_mask_factor*2.0 : 0.0;
texcoord.y = (id == 1) ? 2.0 : 0.0;
position = float4(texcoord * float2(2, -2) + float2(-1, 1), 0, 1);
}
void calculateBeamDistsPS(
in float4 position : SV_Position,
in float2 texcoord : TEXCOORD0,
out float4 beam_strength : SV_Target
) {
InterpolationFieldData interpolation_data = precalc_interpolation_field_data(texcoord);
// We have to subtract off the texcoord offset to make sure we're using domain [0, 1]
const float color_corrected = texcoord.x - 1.0 / TEX_BEAMDIST_WIDTH;
// Digital shape
// Beam will be perfectly rectangular
[branch]
if (beam_shape_mode == 0) {
// Double the intensity when interlacing to maintain the same apparent brightness
const float interlacing_brightness_factor = 1 + float(
enable_interlacing &&
(scanline_deinterlacing_mode != 2) &&
(scanline_deinterlacing_mode != 3)
);
const float raw_beam_strength = (1 - interpolation_data.scanline_parity * enable_interlacing) * interlacing_brightness_factor * levels_autodim_temp;
beam_strength = float4(color_corrected * raw_beam_strength, 0, 0, 1);
}
// Linear shape
// Beam intensity will drop off linarly with distance from center
// Works better than gaussian with narrow scanlines (about 1-6 pixels wide)
// Will only consider contribution from nearest scanline
else if (beam_shape_mode == 1) {
const float beam_dist_y = triangle_wave(texcoord.y, interpolation_data.triangle_wave_freq);
const bool scanline_is_wider_than_1 = scanline_thickness > 1;
const bool deinterlacing_mode_requires_boost = (
enable_interlacing &&
(scanline_deinterlacing_mode != 2) &&
(scanline_deinterlacing_mode != 3)
);
const float interlacing_brightness_factor = (1 + scanline_is_wider_than_1) * (1 + deinterlacing_mode_requires_boost);
// const float raw_beam_strength = (1 - beam_dist_y) * (1 - interpolation_data.scanline_parity * enable_interlacing) * interlacing_brightness_factor * levels_autodim_temp;
// const float raw_beam_strength = (1 - beam_dist_y);
const float raw_beam_strength = saturate(-beam_dist_y * rcp(linear_beam_thickness) + 1);
const float adj_beam_strength = raw_beam_strength * (1 - interpolation_data.scanline_parity * enable_interlacing) * interlacing_brightness_factor * levels_autodim_temp;
beam_strength = float4(color_corrected * adj_beam_strength, 0, 0, 1);
}
// Gaussian Shape
// Beam will be a distorted Gaussian, dependent on color brightness and hyperparameters
// Will only consider contribution from nearest scanline
else if (beam_shape_mode == 2) {
// Calculate {sigma, shape}_range outside of scanline_contrib so it's only
// done once per pixel (not 6 times) with runtime params. Don't reuse the
// vertex shader calculations, so static versions can be constant-folded.
const float sigma_range = max(gaussian_beam_max_sigma, gaussian_beam_min_sigma) - gaussian_beam_min_sigma;
const float shape_range = max(gaussian_beam_max_shape, gaussian_beam_min_shape) - gaussian_beam_min_shape;
const float beam_dist_factor = 1 + float(enable_interlacing);
const float freq_adj = interpolation_data.triangle_wave_freq * rcp(beam_dist_factor);
// The conditional 0.25*f offset ensures the interlaced scanlines align with the non-interlaced ones as in the other beam shapes
const float frame_offset = enable_interlacing * (!interpolation_data.field_parity * 0.5 + 0.25) * rcp(freq_adj);
const float beam_dist_y = triangle_wave((texcoord.y - frame_offset), freq_adj) * rcp(linear_beam_thickness);
const float interlacing_brightness_factor = 1 + float(
!enable_interlacing &&
(scanline_thickness > 1)
) + float(
enable_interlacing &&
(scanline_deinterlacing_mode != 2) &&
(scanline_deinterlacing_mode != 3)
);
const float raw_beam_strength = get_gaussian_beam_strength(
beam_dist_y, color_corrected,
sigma_range, shape_range
) * interlacing_brightness_factor * levels_autodim_temp;
beam_strength = float4(raw_beam_strength, 0, 0, 1);
}
// Gaussian Shape
// Beam will be a distorted Gaussian, dependent on color brightness and hyperparameters
// Will consider contributions from current scanline and two neighboring in-field scanlines
else {
// Calculate {sigma, shape}_range outside of scanline_contrib so it's only
// done once per pixel (not 6 times) with runtime params. Don't reuse the
// vertex shader calculations, so static versions can be constant-folded.
const float sigma_range = max(gaussian_beam_max_sigma, gaussian_beam_min_sigma) - gaussian_beam_min_sigma;
const float shape_range = max(gaussian_beam_max_shape, gaussian_beam_min_shape) - gaussian_beam_min_shape;
const float beam_dist_factor = (1 + float(enable_interlacing));
const float freq_adj = interpolation_data.triangle_wave_freq * rcp(beam_dist_factor);
// The conditional 0.25*f offset ensures the interlaced scanlines align with the non-interlaced ones as in the other beam shapes
const float frame_offset = enable_interlacing * (!interpolation_data.field_parity * 0.5 + 0.25) * rcp(freq_adj);
const float curr_beam_dist_y = triangle_wave(texcoord.y - frame_offset, freq_adj) * rcp(linear_beam_thickness);
const float upper_beam_dist_y = (sawtooth_incr_wave(texcoord.y - frame_offset, freq_adj)*2 + 1) * rcp(linear_beam_thickness);
const float lower_beam_dist_y = 4 * rcp(linear_beam_thickness) - upper_beam_dist_y;
const float upper_beam_strength = get_gaussian_beam_strength(
upper_beam_dist_y, color_corrected,
sigma_range, shape_range
);
const float curr_beam_strength = get_gaussian_beam_strength(
curr_beam_dist_y, color_corrected,
sigma_range, shape_range
);
const float lower_beam_strength = get_gaussian_beam_strength(
lower_beam_dist_y, color_corrected,
sigma_range, shape_range
);
const float interlacing_brightness_factor = 1 + float(
!enable_interlacing &&
(scanline_thickness > 1)
) + float(
enable_interlacing &&
(scanline_deinterlacing_mode != 2) &&
(scanline_deinterlacing_mode != 3)
);
const float3 raw_beam_strength = float3(curr_beam_strength, upper_beam_strength, lower_beam_strength) * interlacing_brightness_factor * levels_autodim_temp;
beam_strength = float4(raw_beam_strength, 1);
}
}
void simulateEletronBeamsVS(
in uint id : SV_VertexID,
out float4 position : SV_Position,
out float2 texcoord : TEXCOORD0,
out float4 runtime_bin_shapes : TEXCOORD1
) {
#if ENABLE_PREBLUR
PostProcessVS(id, position, texcoord);
#else
// texcoord.x = (id == 0 || id == 2) ? content_left : content_right;
// texcoord.y = (id < 2) ? content_lower : content_upper;
// position.x = (id == 0 || id == 2) ? -1 : 1;
// position.y = (id < 2) ? -1 : 1;
// position.zw = 1;
contentCropVS(id, position, texcoord);
#endif
bool screen_is_landscape = geom_rotation_mode == 0 || geom_rotation_mode == 2;
// Mode 0: size of pixel in [0, 1] = pixel_dims / viewport_size
// Mode 1: size of pixel in [0, 1] = viewport_size / grid_dims
// float2 runtime_pixel_size = (pixel_grid_mode == 0) ? pixel_size * rcp(content_size) : rcp(pixel_grid_resolution);
float2 runtime_pixel_size = rcp(content_size);
float2 runtime_scanline_shape = lerp(
float2(scanline_thickness, 1),
float2(1, scanline_thickness),
screen_is_landscape
) * rcp(content_size);
runtime_bin_shapes = float4(runtime_pixel_size, runtime_scanline_shape);
}
void simulateEletronBeamsPS(
in float4 position : SV_Position,
in float2 texcoord : TEXCOORD0,
in float4 runtime_bin_shapes : TEXCOORD1,
out float4 color : SV_Target
) {
bool screen_is_landscape = geom_rotation_mode == 0 || geom_rotation_mode == 2;
float2 rotated_coord = lerp(texcoord.yx, texcoord, screen_is_landscape);
float scale = lerp(CONTENT_WIDTH, CONTENT_HEIGHT, screen_is_landscape);
// InterpolationFieldData interpolation_data = precalc_interpolation_field_data(rotated_coord);
// // We have to subtract off the texcoord offset to make sure we're using domain [0, 1]
// const float color_corrected = rotated_coord.x - 1.0 / scale;
InterpolationFieldData interpolation_data = calc_interpolation_field_data(rotated_coord, scale);
const float ypos = (rotated_coord.y * interpolation_data.triangle_wave_freq + interpolation_data.field_parity) * 0.5;
float2 texcoord_scanlined = round_coord(texcoord, 0, runtime_bin_shapes.zw);
// Sample from the neighboring scanline when in the wrong field
[branch]
if (interpolation_data.wrong_field && screen_is_landscape) {
const float coord_moved_up = texcoord_scanlined.y <= texcoord.y;
const float direction = lerp(-1, 1, coord_moved_up);
texcoord_scanlined.y += direction * scanline_thickness * rcp(content_size.y);
}
else if (interpolation_data.wrong_field) {
const float coord_moved_up = texcoord_scanlined.x <= texcoord.x;
const float direction = lerp(-1, 1, coord_moved_up);
texcoord_scanlined.x += direction * scanline_thickness * rcp(content_size.x);
}
// Now we apply pixellation and cropping
// float2 texcoord_pixellated = round_coord(
// texcoord_scanlined,
// pixel_grid_offset * rcp(content_size),
// runtime_bin_shapes.xy
// );
float2 texcoord_pixellated = texcoord_scanlined;
const float2 texcoord_uncropped = texcoord_pixellated;
#if ENABLE_PREBLUR
// If the pre-blur pass ran, then it's already handled cropping.
// const float2 texcoord_uncropped = texcoord_pixellated;
#define source_sampler samplerPreblurHoriz
#else
// const float2 texcoord_uncropped = texcoord_pixellated * content_scale + content_offset;
#define source_sampler ReShade::BackBuffer
#endif
[branch]
if (beam_shape_mode < 3) {
const float4 scanline_color = tex2Dlod_linearize(
source_sampler,
texcoord_uncropped,
get_input_gamma()
);
const float beam_strength_r = tex2D_nograd(samplerBeamDist, float2(scanline_color.r, ypos)).x;
const float beam_strength_g = tex2D_nograd(samplerBeamDist, float2(scanline_color.g, ypos)).x;
const float beam_strength_b = tex2D_nograd(samplerBeamDist, float2(scanline_color.b, ypos)).x;
const float4 beam_strength = float4(beam_strength_r, beam_strength_g, beam_strength_b, 1);
color = beam_strength;
}
else {
const float2 offset = float2(0, scanline_thickness) * (1 + enable_interlacing) * rcp(content_size);
const float4 curr_scanline_color = tex2Dlod_linearize(
source_sampler,
texcoord_uncropped,
get_input_gamma()
);
const float4 upper_scanline_color = tex2Dlod_linearize(
source_sampler,
texcoord_uncropped - offset,
get_input_gamma()
);
const float4 lower_scanline_color = tex2Dlod_linearize(
source_sampler,
texcoord_uncropped + offset,
get_input_gamma()
);
const float curr_beam_strength_r = tex2D_nograd(samplerBeamDist, float2(curr_scanline_color.r, ypos)).x;
const float curr_beam_strength_g = tex2D_nograd(samplerBeamDist, float2(curr_scanline_color.g, ypos)).x;
const float curr_beam_strength_b = tex2D_nograd(samplerBeamDist, float2(curr_scanline_color.b, ypos)).x;
const float upper_beam_strength_r = tex2D_nograd(samplerBeamDist, float2(upper_scanline_color.r, ypos)).y;
const float upper_beam_strength_g = tex2D_nograd(samplerBeamDist, float2(upper_scanline_color.g, ypos)).y;
const float upper_beam_strength_b = tex2D_nograd(samplerBeamDist, float2(upper_scanline_color.b, ypos)).y;
const float lower_beam_strength_r = tex2D_nograd(samplerBeamDist, float2(lower_scanline_color.r, ypos)).z;
const float lower_beam_strength_g = tex2D_nograd(samplerBeamDist, float2(lower_scanline_color.g, ypos)).z;
const float lower_beam_strength_b = tex2D_nograd(samplerBeamDist, float2(lower_scanline_color.b, ypos)).z;
color = float4(
curr_beam_strength_r + upper_beam_strength_r + lower_beam_strength_r,
curr_beam_strength_g + upper_beam_strength_g + lower_beam_strength_g,
curr_beam_strength_b + upper_beam_strength_b + lower_beam_strength_b,
1
);
}
}
void beamConvergenceVS(
in uint id : SV_VertexID,
out float4 position : SV_Position,
out float2 texcoord : TEXCOORD0,
out float run_convergence : TEXCOORD1
) {
PostProcessVS(id, position, texcoord);
const uint3 x_flag = convergence_offset_x != 0;
const uint3 y_flag = convergence_offset_y != 0;
run_convergence = dot(x_flag, 1) + dot(y_flag, 1);
}
void beamConvergencePS(
in float4 position : SV_Position,
in float2 texcoord : TEXCOORD0,
in float run_convergence : TEXCOORD1,
out float4 color : SV_TARGET
) {
// [branch]
if (!run_convergence) {
color = tex2D(samplerElectronBeams, texcoord - float2(0, scanline_offset * rcp(content_size.y)));
}
else {
const float3 offset_sample = sample_rgb_scanline(
samplerElectronBeams, texcoord - float2(0, scanline_offset * rcp(content_size.y)),
TEX_ELECTRONBEAMS_SIZE, rcp(TEX_ELECTRONBEAMS_SIZE)
);
color = float4(offset_sample, 1);
}
}
#endif // _ELECTRON_BEAMS_H

View file

@ -1,220 +0,0 @@
#ifndef _GEOMETRY_AA_LAST_PASS_H
#define _GEOMETRY_AA_LAST_PASS_H
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// crt-royale-reshade: A port of TroggleMonkey's crt-royale from libretro to ReShade.
// Copyright (C) 2020 Alex Gunter <akg7634@gmail.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
#include "../lib/user-settings.fxh"
#include "../lib/derived-settings-and-constants.fxh"
#include "../lib/bind-shader-params.fxh"
#include "../lib/gamma-management.fxh"
#include "../lib/tex2Dantialias.fxh"
#include "../lib/geometry-functions.fxh"
// Disabled in the ReShade port because I don't know a good way to make these
// static AND global AND defined with sin(), cos(), or pow().
// #if !_RUNTIME_GEOMETRY_TILT
// // Create a local-to-global rotation matrix for the CRT's coordinate frame
// // and its global-to-local inverse. See the vertex shader for details.
// // It's faster to compute these statically if possible.
// static const float2 sin_tilt = sin(geom_tilt_angle_static);
// static const float2 cos_tilt = cos(geom_tilt_angle_static);
// static const float3x3 geom_local_to_global_static = float3x3(
// cos_tilt.x, sin_tilt.y*sin_tilt.x, cos_tilt.y*sin_tilt.x,
// 0.0, cos_tilt.y, -sin_tilt.y,
// -sin_tilt.x, sin_tilt.y*cos_tilt.x, cos_tilt.y*cos_tilt.x);
// static const float3x3 geom_global_to_local_static = float3x3(
// cos_tilt.x, 0.0, -sin_tilt.x,
// sin_tilt.y*sin_tilt.x, cos_tilt.y, sin_tilt.y*cos_tilt.x,
// cos_tilt.y*sin_tilt.x, -sin_tilt.y, cos_tilt.y*cos_tilt.x);
// #endif
float2x2 mul_scale(float2 scale, float2x2 mtrx)
{
float4 temp_matrix = float4(mtrx[0][0], mtrx[0][1], mtrx[1][0], mtrx[1][1]) * scale.xxyy;
return float2x2(temp_matrix.x, temp_matrix.y, temp_matrix.z, temp_matrix.w);
}
void geometryVS(
in uint id : SV_VertexID,
out float4 position : SV_Position,
out float2 texcoord : TEXCOORD0,
out float2 output_size_inv : TEXCOORD1,
out float4 geom_aspect_and_overscan : TEXCOORD2,
out float3 eye_pos_local : TEXCOORD3,
out float3 global_to_local_row0 : TEXCOORD4,
out float3 global_to_local_row1 : TEXCOORD5,
out float3 global_to_local_row2 : TEXCOORD6
) {
PostProcessVS(id, position, texcoord);
output_size_inv = 1.0 / content_size;
// Get aspect/overscan vectors from scalar parameters (likely uniforms):
const float viewport_aspect_ratio = output_size_inv.y / output_size_inv.x;
const float2 geom_aspect = get_aspect_vector(viewport_aspect_ratio);
const float2 geom_overscan = get_geom_overscan_vector();
geom_aspect_and_overscan = float4(geom_aspect, geom_overscan);
#if _RUNTIME_GEOMETRY_TILT
// Create a local-to-global rotation matrix for the CRT's coordinate
// frame and its global-to-local inverse. Rotate around the x axis
// first (pitch) and then the y axis (yaw) with yucky Euler angles.
// Positive angles go clockwise around the right-vec and up-vec.
// Runtime shader parameters prevent us from computing these globally,
// but we can still combine the pitch/yaw matrices by hand to cut a
// few instructions. Note that cg matrices fill row1 first, then row2,
// etc. (row-major order).
const float2 geom_tilt_angle = get_geom_tilt_angle_vector();
const float2 sin_tilt = sin(geom_tilt_angle);
const float2 cos_tilt = cos(geom_tilt_angle);
// Conceptual breakdown:
static const float3x3 rot_x_matrix = float3x3(
1.0, 0.0, 0.0,
0.0, cos_tilt.y, -sin_tilt.y,
0.0, sin_tilt.y, cos_tilt.y);
static const float3x3 rot_y_matrix = float3x3(
cos_tilt.x, 0.0, sin_tilt.x,
0.0, 1.0, 0.0,
-sin_tilt.x, 0.0, cos_tilt.x);
static const float3x3 local_to_global =
mul(rot_y_matrix, rot_x_matrix);
/* static const float3x3 global_to_local =
transpose(local_to_global);
const float3x3 local_to_global = float3x3(
cos_tilt.x, sin_tilt.y*sin_tilt.x, cos_tilt.y*sin_tilt.x,
0.0, cos_tilt.y, sin_tilt.y,
sin_tilt.x, sin_tilt.y*cos_tilt.x, cos_tilt.y*cos_tilt.x);
*/ // This is a pure rotation, so transpose = inverse:
const float3x3 global_to_local = transpose(local_to_global);
// Decompose the matrix into 3 float3's for output:
global_to_local_row0 = float3(global_to_local[0][0], global_to_local[0][1], global_to_local[0][2]);//._m00_m01_m02);
global_to_local_row1 = float3(global_to_local[1][0], global_to_local[1][1], global_to_local[1][2]);//._m10_m11_m12);
global_to_local_row2 = float3(global_to_local[2][0], global_to_local[2][1], global_to_local[2][2]);//._m20_m21_m22);
#else
static const float3x3 global_to_local = geom_global_to_local_static;
static const float3x3 local_to_global = geom_local_to_global_static;
#endif
// Get an optimal eye position based on geom_view_dist, viewport_aspect,
// and CRT radius/rotation:
#if _RUNTIME_GEOMETRY_MODE
const float geom_mode = geom_mode_runtime;
#else
static const float geom_mode = geom_mode_static;
#endif
const float3 eye_pos_global = get_ideal_global_eye_pos(local_to_global, geom_aspect, geom_mode);
eye_pos_local = mul(global_to_local, eye_pos_global);
}
void geometryPS(
in float4 position : SV_Position,
in float2 texcoord : TEXCOORD0,
in float2 output_size_inv : TEXCOORD1,
in float4 geom_aspect_and_overscan : TEXCOORD2,
in float3 eye_pos_local : TEXCOORD3,
in float3 global_to_local_row0 : TEXCOORD4,
in float3 global_to_local_row1 : TEXCOORD5,
in float3 global_to_local_row2 : TEXCOORD6,
out float4 color : SV_Target
) {
// Localize some parameters:
const float2 geom_aspect = geom_aspect_and_overscan.xy;
const float2 geom_overscan = geom_aspect_and_overscan.zw;
#if _RUNTIME_GEOMETRY_TILT
const float3x3 global_to_local = float3x3(global_to_local_row0,
global_to_local_row1, global_to_local_row2);
#else
static const float3x3 global_to_local = geom_global_to_local_static;
#endif
#if _RUNTIME_GEOMETRY_MODE
const float geom_mode = geom_mode_runtime;
#else
static const float geom_mode = geom_mode_static;
#endif
// Get flat and curved texture coords for the current fragment point sample
// and a pixel_to_tangent_video_uv matrix for transforming pixel offsets:
// video_uv = relative position in video frame, mapped to [0.0, 1.0] range
// tex_uv = relative position in padded texture, mapped to [0.0, 1.0] range
const float2 flat_video_uv = texcoord;
float2x2 pixel_to_video_uv;
float2 video_uv_no_geom_overscan;
if(geom_mode > 0.5)
{
video_uv_no_geom_overscan =
get_curved_video_uv_coords_and_tangent_matrix(flat_video_uv,
eye_pos_local, output_size_inv, geom_aspect,
geom_mode, global_to_local, pixel_to_video_uv);
}
else
{
video_uv_no_geom_overscan = flat_video_uv;
pixel_to_video_uv = float2x2(
output_size_inv.x, 0.0, 0.0, output_size_inv.y);
}
// Correct for overscan here (not in curvature code):
const float2 video_uv =
(video_uv_no_geom_overscan - float2(0.5, 0.5))/geom_overscan + float2(0.5, 0.5);
const float2 tex_uv = video_uv;
// Get a matrix transforming pixel vectors to tex_uv vectors:
const float2x2 pixel_to_tex_uv =
mul_scale(1.0 / geom_overscan, pixel_to_video_uv);
// Sample! Skip antialiasing if antialias_level < 0.5 or both of these hold:
// 1.) Geometry/curvature isn't used
// 2.) Overscan == float2(1.0, 1.0)
// Skipping AA is sharper, but it's only faster with dynamic branches.
const float2 abs_aa_r_offset = abs(get_aa_subpixel_r_offset());
// this next check seems to always return true, even when it shouldn't so disabling it for now
const bool need_subpixel_aa = false;//abs_aa_r_offset.x + abs_aa_r_offset.y > 0.0;
float3 raw_color;
if(antialias_level > 0.5 && (geom_mode > 0.5 || any(bool2((geom_overscan.x != 1.0), (geom_overscan.y != 1.0)))))
{
// Sample the input with antialiasing (due to sharp phosphors, etc.):
raw_color = tex2Daa(samplerBloomHorizontal, tex_uv, pixel_to_tex_uv, float(frame_count), get_intermediate_gamma());
}
else if(antialias_level > 0.5 && need_subpixel_aa)
{
// Sample at each subpixel location:
raw_color = tex2Daa_subpixel_weights_only(
samplerBloomHorizontal, tex_uv, pixel_to_tex_uv, get_intermediate_gamma());
}
else
{
raw_color = tex2D_linearize(samplerBloomHorizontal, tex_uv, get_intermediate_gamma()).rgb;
}
// Dim borders and output the final result:
const float border_dim_factor = get_border_dim_factor(video_uv, geom_aspect);
const float3 final_color = raw_color * border_dim_factor;
color = encode_output(float4(final_color, 1.0), get_output_gamma());
}
#endif // _GEOMETRY_AA_LAST_PASS_H

View file

@ -1,74 +0,0 @@
#ifndef _INPUT_BLURRING_H
#define _INPUT_BLURRING_H
///////////////////////////////// MIT LICENSE ////////////////////////////////
// Copyright (C) 2022 Alex Gunter
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
// Theoretically this could go in blurring.fxh
// But that file has a bunch of GPL stuff in it.
// Keeping it separate makes it easier to communicate that this portion is
// available under the MIT license.
#include "../lib/downsampling-functions.fxh"
#include "content-box.fxh"
#include "shared-objects.fxh"
void preblurVertPS(
in const float4 pos : SV_Position,
in const float2 texcoord : TEXCOORD0,
out float4 color : SV_Target
) {
const float2 texcoord_uncropped = texcoord;
const float2 max_delta_uv = float2(0.0, rcp(content_size.y)) * preblur_effect_radius;
const float2 delta_uv = max_delta_uv * rcp(max(preblur_sampling_radius.y, 1));
color = float4(opaque_linear_downsample(
ReShade::BackBuffer,
texcoord_uncropped,
preblur_sampling_radius.y,
delta_uv
), 1);
}
void preblurHorizPS(
in const float4 pos : SV_Position,
in const float2 texcoord : TEXCOORD0,
out float4 color : SV_Target
) {
const float2 max_delta_uv = float2(rcp(content_size.x), 0.0) * preblur_effect_radius;
const float2 delta_uv = max_delta_uv * rcp(max(preblur_sampling_radius.x, 1));
color = float4(opaque_linear_downsample(
samplerPreblurVert,
texcoord,
preblur_sampling_radius.x,
delta_uv
), 1);
}
#endif // _INPUT_BLURRING_H

View file

@ -1,211 +0,0 @@
#ifndef _PHOSPHOR_MASK_H
#define _PHOSPHOR_MASK_H
///////////////////////////////// MIT LICENSE ////////////////////////////////
// Copyright (C) 2022 Alex Gunter
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
#include "../lib/bind-shader-params.fxh"
#include "../lib/phosphor-mask-calculations.fxh"
#include "shared-objects.fxh"
// Split into 64 segments that overlap a little bit
static const float num_segments = 64;
static const float segment_offset = 0.015625; // 1/64
static const float segment_width = 0.0234375; // 1/128
void generatePhosphorMaskVS(
in uint id : SV_VertexID,
out float4 position : SV_Position,
out float2 texcoord : TEXCOORD0,
out float2 viewport_frequency_factor: TEXCOORD1,
out float2 mask_pq_x : TEXCOORD2,
out float2 mask_pq_y : TEXCOORD3
) {
const float screen_segment_idx = frame_count % num_segments;
const float left_coord = lerp(segment_offset * screen_segment_idx, 0, overlay_active > 0);
const float right_coord = lerp(left_coord + segment_width, 1, overlay_active > 0);
const float pos_center = 2 * (left_coord + 0.5 * segment_width - 0.5);
const float pos_left = lerp(pos_center - segment_width, -1, overlay_active > 0);
const float pos_right = lerp(pos_center + segment_width, 1, overlay_active > 0);
#if _DX9_ACTIVE
texcoord.x = (id == 1 || id == 3) ? right_coord : left_coord;
texcoord.y = (id > 1) ? 1 : 0;
position.x = (id == 1 || id == 3) ? pos_right : pos_left;
position.y = (id > 1) ? -1 : 1;
position.zw = 1;
#else
texcoord.x = (id & 1) ? right_coord : left_coord;
texcoord.y = (id & 2) ? 1 : 0;
position.x = (id & 1) ? pos_right : pos_left;
position.y = (id & 2) ? -1 : 1;
position.zw = 1;
#endif
viewport_frequency_factor = calc_phosphor_viewport_frequency_factor();
// We don't alter these based on screen rotation because they're independent of screen dimensions.
float edge_norm_tx;
float edge_norm_ty;
[flatten]
switch (mask_type) {
case 0:
edge_norm_tx = grille_edge_norm_t;
break;
case 1:
edge_norm_tx = slot_edge_norm_tx;
edge_norm_ty = slot_edge_norm_ty;
break;
case 2:
edge_norm_tx = shadow_edge_norm_tx;
edge_norm_ty = shadow_edge_norm_ty;
break;
case 3:
edge_norm_tx = smallgrille_edge_norm_t;
break;
case 4:
edge_norm_tx = smallslot_edge_norm_tx;
edge_norm_ty = smallslot_edge_norm_ty;
break;
default:
edge_norm_tx = smallshadow_edge_norm_tx;
edge_norm_ty = smallshadow_edge_norm_ty;
break;
}
const float2 thickness_scaled = linearize_phosphor_thickness_param(phosphor_thickness);
const float mask_p_x = exp(-calculate_phosphor_p_value(edge_norm_tx, thickness_scaled.x, phosphor_sharpness.x));
const float mask_p_y = exp(-calculate_phosphor_p_value(edge_norm_ty, thickness_scaled.y, phosphor_sharpness.y));
mask_pq_x = float2(mask_p_x, phosphor_sharpness.x);
mask_pq_y = float2(mask_p_y, phosphor_sharpness.y);
}
void generatePhosphorMaskPS(
in float4 pos : SV_Position,
in float2 texcoord : TEXCOORD0,
in float2 viewport_frequency_factor: TEXCOORD1,
in float2 mask_pq_x : TEXCOORD2,
in float2 mask_pq_y : TEXCOORD3,
out float4 color : SV_Target
) {
[branch]
if (geom_rotation_mode == 1 || geom_rotation_mode == 3) {
texcoord = texcoord.yx;
viewport_frequency_factor = viewport_frequency_factor.yx;
}
float3 phosphor_color;
[branch]
if (mask_type == 0) {
phosphor_color = get_phosphor_intensity_grille(
texcoord,
viewport_frequency_factor,
mask_pq_x
);
}
else if (mask_type == 1) {
phosphor_color = get_phosphor_intensity_slot(
texcoord,
viewport_frequency_factor,
mask_pq_x,
mask_pq_y
);
}
else if (mask_type == 2) {
phosphor_color = get_phosphor_intensity_shadow(
texcoord,
viewport_frequency_factor,
float2(mask_pq_x.y, mask_pq_y.y)
);
}
else if (mask_type == 3) {
phosphor_color = get_phosphor_intensity_grille_small(
texcoord,
viewport_frequency_factor,
mask_pq_x
);
}
else if (mask_type == 4) {
phosphor_color = get_phosphor_intensity_slot_small(
texcoord,
viewport_frequency_factor,
mask_pq_x,
mask_pq_y
);
}
else {
phosphor_color = get_phosphor_intensity_shadow_small(
texcoord,
viewport_frequency_factor,
mask_pq_x,
mask_pq_y
);
}
color = float4(phosphor_color, 1.0);
}
void applyComputedPhosphorMaskPS(
in float4 pos : SV_Position,
in float2 texcoord : TEXCOORD0,
out float4 color : SV_Target
) {
bool use_deinterlacing_tex = enable_interlacing && (
scanline_deinterlacing_mode == 2 || scanline_deinterlacing_mode == 3
);
float3 scanline_color_dim;
[branch]
if (use_deinterlacing_tex) scanline_color_dim = tex2D(samplerDeinterlace, texcoord).rgb;
else scanline_color_dim = tex2D(samplerBeamConvergence, texcoord).rgb;
const float3 phosphor_color = tex2D(samplerPhosphorMask, texcoord).rgb;
// Sample the halation texture (auto-dim to match the scanlines), and
// account for both horizontal and vertical convergence offsets, given
// in units of texels horizontally and same-field scanlines vertically:
const float3 halation_color = tex2D_linearize(samplerBlurHorizontal, texcoord, get_intermediate_gamma()).rgb;
// Apply halation: Halation models electrons flying around under the glass
// and hitting the wrong phosphors (of any color). It desaturates, so
// average the halation electrons to a scalar. Reduce the local scanline
// intensity accordingly to conserve energy.
const float halation_intensity_dim_scalar = dot(halation_color, float3(1, 1, 1)) / 3.0;
const float3 halation_intensity_dim = halation_intensity_dim_scalar;
const float3 electron_intensity_dim = lerp(scanline_color_dim, halation_intensity_dim, halation_weight);
// Apply the phosphor mask:
const float3 phosphor_emission_dim = electron_intensity_dim * phosphor_color;
color = float4(phosphor_emission_dim, 1.0);
}
#endif // _PHOSPHOR_MASK_H

View file

@ -1,370 +0,0 @@
#ifndef _SHARED_OBJECTS_H
#define _SHARED_OBJECTS_H
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale-reshade: A port of TroggleMonkey's crt-royale from libretro to ReShade.
// Copyright (C) 2020 Alex Gunter <akg7634@gmail.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
#include "../lib/helper-functions-and-macros.fxh"
#include "../lib/derived-settings-and-constants.fxh"
#include "../lib/bind-shader-params.fxh"
// Yes, the WIDTH/HEIGHT/SIZE defines are kinda weird.
// Yes, we have to have them or something similar. This is for D3D11 which
// returns (0, 0) when you call tex2Dsize() on the pass's render target.
// Pass 0 Buffer (cropPass)
// Cannot be conditioned on __RENDERER__ b/c there are no
// available buffers of the same size
// Last usage is in interlacingPass
// electronBeamPass -> beamConvergencePass
// deinterlacePass -> phosphorMaskPass
// brightpassPass -> bloomHorizontalPass
// #define TEX_CROP_WIDTH content_size.x
// #define TEX_CROP_HEIGHT content_size.y
// #define TEX_CROP_SIZE int2(TEX_CROP_WIDTH, TEX_CROP_HEIGHT)
// texture2D texCrop {
// Width = TEX_CROP_WIDTH;
// Height = TEX_CROP_HEIGHT;
// Format = RGBA16;
// };
// sampler2D samplerCrop { Texture = texCrop; };
// Pass 1 Buffer (interlacingPass)
// Cannot be conditioned on __RENDERER__ b/c there are no
// available buffers of the same size
// Last usage is in electronBeamPass
// beamConvergencPass -> freezeFramePass
// phosphorMaskPass -> bloomHorizontalPass
// #define TEX_INTERLACED_WIDTH content_size.x
// #define TEX_INTERLACED_HEIGHT content_size.y
// #define TEX_INTERLACED_SIZE int2(TEX_INTERLACED_WIDTH, TEX_INTERLACED_HEIGHT)
// texture2D texInterlaced {
// Width = TEX_INTERLACED_WIDTH;
// Height = TEX_INTERLACED_HEIGHT;
// Format = RGBA16;
// };
// sampler2D samplerInterlaced { Texture = texInterlaced; };
// Pass 2 Buffer (electronBeamPass)
// Last usage is in beamConvergencePass
#define TEX_PREBLUR_VERT_WIDTH content_size.x
#define TEX_PREBLUR_VERT_HEIGHT content_size.y
static const uint2 TEX_PREBLUR_SIZE = uint2(TEX_PREBLUR_VERT_WIDTH, TEX_PREBLUR_VERT_HEIGHT);
texture2D texPreblurVert < pooled = true; > {
Width = TEX_PREBLUR_VERT_WIDTH;
Height = TEX_PREBLUR_VERT_HEIGHT;
Format = RGBA16;
};
sampler2D samplerPreblurVert { Texture = texPreblurVert; };
#define TEX_PREBLUR_HORIZ_WIDTH content_size.x
#define TEX_PREBLUR_HORIZ_HEIGHT content_size.y
static const uint2 TEX_PREBLUR_SIZE = uint2(TEX_PREBLUR_HORIZ_WIDTH, TEX_PREBLUR_HORIZ_HEIGHT);
texture2D texPreblurHoriz < pooled = true; > {
Width = TEX_PREBLUR_HORIZ_WIDTH;
Height = TEX_PREBLUR_HORIZ_HEIGHT;
Format = RGBA16;
};
sampler2D samplerPreblurHoriz { Texture = texPreblurHoriz; };
#define TEX_BEAMDIST_WIDTH num_beamdist_color_samples
#define TEX_BEAMDIST_HEIGHT num_beamdist_dist_samples
#define TEX_BEAMDIST_SIZE int2(TEX_BEAMDIST_WIDTH, TEX_BEAMDIST_HEIGHT)
texture2D texBeamDist < pooled = false; > {
Width = TEX_BEAMDIST_WIDTH;
Height = TEX_BEAMDIST_HEIGHT;
Format = RGB10A2;
};
sampler2D samplerBeamDist {
Texture = texBeamDist;
AddressV = WRAP;
};
// Pass 2 Buffer (electronBeamPass)
// Last usage is in beamConvergencePass
#define TEX_ELECTRONBEAMS_WIDTH content_size.x
#define TEX_ELECTRONBEAMS_HEIGHT content_size.y
#define TEX_ELECTRONBEAMS_SIZE int2(TEX_ELECTRONBEAMS_WIDTH, TEX_ELECTRONBEAMS_HEIGHT)
texture2D texElectronBeams < pooled = true; > {
Width = TEX_ELECTRONBEAMS_WIDTH;
Height = TEX_ELECTRONBEAMS_HEIGHT;
Format = RGBA16;
};
sampler2D samplerElectronBeams {
Texture = texElectronBeams;
AddressU = BORDER;
AddressV = BORDER;
};
// #define texElectronBeams texCrop
// #define samplerElectronBeams samplerCrop
// Pass 3 Buffer (beamConvergencPass)
// Last usage is freezeFramePass
#define TEX_BEAMCONVERGENCE_WIDTH content_size.x
#define TEX_BEAMCONVERGENCE_HEIGHT content_size.y
#define TEX_BEAMCONVERGENCE_SIZE int2(TEX_BEAMCONVERGENCE_WIDTH, TEX_BEAMCONVERGENCE_HEIGHT)
texture2D texBeamConvergence < pooled = true; > {
Width = TEX_BEAMCONVERGENCE_WIDTH;
Height = TEX_BEAMCONVERGENCE_HEIGHT;
Format = RGBA16;
};
sampler2D samplerBeamConvergence { Texture = texBeamConvergence; };
// #define texBeamConvergence texInterlaced
// #define samplerBeamConvergence samplerInterlaced
/*
// Pass 4 Buffer (bloomApproxPass)
// Cannot be conditioned on __RENDERER__ b/c there are no
// available buffers of the same size
// Last usage is in brightpassPass
#define TEX_BLOOMAPPROX_WIDTH 320
#define TEX_BLOOMAPPROX_HEIGHT 240
#define TEX_BLOOMAPPROX_SIZE int2(TEX_BLOOMAPPROX_WIDTH, TEX_BLOOMAPPROX_HEIGHT)
texture2D texBloomApprox {
Width = TEX_BLOOMAPPROX_WIDTH;
Height = TEX_BLOOMAPPROX_HEIGHT;
Format = RGBA16;
};
sampler2D samplerBloomApprox { Texture = texBloomApprox; };
*/
// Pass 4a Buffer (bloomApproxVerticalPass)
// Cannot be conditioned on __RENDERER__ b/c there are no
// available buffers of the same size
// Last usage is in brightpassPass
#define TEX_BLOOMAPPROXVERT_WIDTH content_size.x
// #define TEX_BLOOMAPPROXVERT_HEIGHT 240
#define TEX_BLOOMAPPROXVERT_HEIGHT int(content_size.y / bloomapprox_downsizing_factor)
#define TEX_BLOOMAPPROXVERT_SIZE int2(TEX_BLOOMAPPROXVERT_WIDTH, TEX_BLOOMAPPROXVERT_HEIGHT)
texture2D texBloomApproxVert < pooled = true; > {
Width = TEX_BLOOMAPPROXVERT_WIDTH;
Height = TEX_BLOOMAPPROXVERT_HEIGHT;
Format = RGBA16;
};
sampler2D samplerBloomApproxVert { Texture = texBloomApproxVert; };
// Pass 4b Buffer (bloomApproxHorizontalPass)
// Cannot be conditioned on __RENDERER__ b/c there are no
// available buffers of the same size
// Last usage is in brightpassPass
// #define TEX_BLOOMAPPROXHORIZ_WIDTH 320
// #define TEX_BLOOMAPPROXHORIZ_HEIGHT 240
#define TEX_BLOOMAPPROXHORIZ_WIDTH int(content_size.x / bloomapprox_downsizing_factor)
#define TEX_BLOOMAPPROXHORIZ_HEIGHT TEX_BLOOMAPPROXVERT_HEIGHT
#define TEX_BLOOMAPPROXHORIZ_SIZE int2(TEX_BLOOMAPPROXHORIZ_WIDTH, TEX_BLOOMAPPROXHORIZ_HEIGHT)
texture2D texBloomApproxHoriz < pooled = true; > {
Width = TEX_BLOOMAPPROXHORIZ_WIDTH;
Height = TEX_BLOOMAPPROXHORIZ_HEIGHT;
Format = RGBA16;
};
sampler2D samplerBloomApproxHoriz { Texture = texBloomApproxHoriz; };
// Pass 5 Buffer (blurVerticalPass)
// Cannot be conditioned on __RENDERER__ b/c there are no
// available buffers of the same size
// Last usage is blurHorizontalPass
#define TEX_BLURVERTICAL_WIDTH TEX_BLOOMAPPROXHORIZ_WIDTH
#define TEX_BLURVERTICAL_HEIGHT TEX_BLOOMAPPROXHORIZ_HEIGHT
#define TEX_BLURVERTICAL_SIZE int2(TEX_BLURVERTICAL_WIDTH, TEX_BLURVERTICAL_HEIGHT)
texture2D texBlurVertical < pooled = true; > {
Width = TEX_BLURVERTICAL_WIDTH;
Height = TEX_BLURVERTICAL_HEIGHT;
Format = RGBA16;
};
sampler2D samplerBlurVertical { Texture = texBlurVertical; };
// Pass 6 Buffer (blurHorizontalPass)
// Cannot be conditioned on __RENDERER__ b/c there are no
// available buffers of the same size
// Last usage is bloomHorizontalPass
#define TEX_BLURHORIZONTAL_WIDTH TEX_BLOOMAPPROXHORIZ_WIDTH
#define TEX_BLURHORIZONTAL_HEIGHT TEX_BLOOMAPPROXHORIZ_HEIGHT
#define TEX_BLURHORIZONTAL_SIZE int2(TEX_BLURHORIZONTAL_WIDTH, TEX_BLURHORIZONTAL_HEIGHT)
texture2D texBlurHorizontal < pooled = true; > {
Width = TEX_BLURHORIZONTAL_WIDTH;
Height = TEX_BLURHORIZONTAL_HEIGHT;
Format = RGBA16;
};
sampler2D samplerBlurHorizontal { Texture = texBlurHorizontal; };
// Pass 7 (deinterlacePass)
// Last usage is phosphorMaskPass
#define TEX_DEINTERLACE_WIDTH content_size.x
#define TEX_DEINTERLACE_HEIGHT content_size.y
#define TEX_DEINTERLACE_SIZE int2(TEX_DEINTERLACE_WIDTH, TEX_DEINTERLACE_HEIGHT)
#if _DX9_ACTIVE == 0
texture2D texDeinterlace < pooled = true; > {
Width = TEX_DEINTERLACE_WIDTH;
Height = TEX_DEINTERLACE_HEIGHT;
Format = RGBA16;
};
sampler2D samplerDeinterlace { Texture = texDeinterlace; };
#else
#define texDeinterlace texElectronBeams
#define samplerDeinterlace samplerElectronBeams
#endif
// Pass 8 (freezeFramePass)
// Do not condition this on __RENDERER__. It will not work if another
// pass corrupts it.
#define TEX_FREEZEFRAME_WIDTH content_size.x
#define TEX_FREEZEFRAME_HEIGHT content_size.y
#define TEX_FREEZEFRAME_SIZE int2(TEX_FREEZEFRAME_WIDTH, TEX_FREEZEFRAME_HEIGHT
texture2D texFreezeFrame < pooled = false; > {
Width = TEX_FREEZEFRAME_WIDTH;
Height = TEX_FREEZEFRAME_HEIGHT;
Format = RGBA16;
};
sampler2D samplerFreezeFrame { Texture = texFreezeFrame; };
// Pass 10 Mask Texture (phosphorMaskResizeHorizontalPass)
// Cannot be conditioned on __RENDERER__ b/c there are no
// available buffers of the same size
#define TEX_PHOSPHORMASK_WIDTH content_size.x
#define TEX_PHOSPHORMASK_HEIGHT content_size.y
#define TEX_PHOSPHORMASKL_SIZE int2(TEX_PHOSPHORMASK_WIDTH, TEX_PHOSPHORMASK_HEIGHT)
texture2D texPhosphorMask < pooled = false; > {
Width = TEX_PHOSPHORMASK_WIDTH;
Height = TEX_PHOSPHORMASK_HEIGHT;
Format = RGBA16;
};
sampler2D samplerPhosphorMask { Texture = texPhosphorMask; };
// Pass 11 Buffer (phosphorMaskPass)
// Last usage is bloomHorizontalPass
#define TEX_MASKEDSCANLINES_WIDTH content_size.x
#define TEX_MASKEDSCANLINES_HEIGHT content_size.y
#define TEX_MASKEDSCANLINES_SIZE int2(TEX_MASKEDSCANLINES_WIDTH, TEX_MASKEDSCANLINES_HEIGHT)
#if _DX9_ACTIVE == 0
texture2D texMaskedScanlines < pooled = true; > {
Width = TEX_MASKEDSCANLINES_WIDTH;
Height = TEX_MASKEDSCANLINES_HEIGHT;
Format = RGBA16;
};
sampler2D samplerMaskedScanlines { Texture = texMaskedScanlines; };
#else
#define texMaskedScanlines texBeamConvergence
#define samplerMaskedScanlines samplerBeamConvergence
#endif
// Pass 12 Buffer (brightpassPass)
// Last usage is bloomHorizontalPass
#define TEX_BRIGHTPASS_WIDTH content_size.x
#define TEX_BRIGHTPASS_HEIGHT content_size.y
#define TEX_BRIGHTPASS_SIZE int2(TEX_BRIGHTPASS_WIDTH, TEX_BRIGHTPASS_HEIGHT)
#if _DX9_ACTIVE == 0
texture2D texBrightpass < pooled = true; > {
Width = TEX_BRIGHTPASS_WIDTH;
Height = TEX_BRIGHTPASS_HEIGHT;
Format = RGBA16;
};
sampler2D samplerBrightpass { Texture = texBrightpass; };
#else
#define texBrightpass texElectronBeams
#define samplerBrightpass samplerElectronBeams
#endif
// Pass 13 Buffer (bloomVerticalPass)
// Cannot be conditioned on __RENDERER__ b/c there are no
// available buffers of the same size
// Last usage is bloomHorizontalPass
#define TEX_BLOOMVERTICAL_WIDTH content_size.x
#define TEX_BLOOMVERTICAL_HEIGHT content_size.y
#define TEX_BLOOMVERTICAL_SIZE int2(TEX_BLOOMVERTICAL_WIDTH, TEX_BLOOMVERTICAL_HEIGHT)
texture2D texBloomVertical < pooled = true; > {
Width = TEX_BLOOMVERTICAL_WIDTH;
Height = TEX_BLOOMVERTICAL_HEIGHT;
Format = RGBA16;
};
sampler2D samplerBloomVertical { Texture = texBloomVertical; };
// Pass 14 Buffer (bloomHorizontalPass)
// Cannot be conditioned on __RENDERER__ b/c there are no
// available buffers of the same size
// Last usage is geometryPass
#define TEX_BLOOMHORIZONTAL_WIDTH content_size.x
#define TEX_BLOOMHORIZONTAL_HEIGHT content_size.y
#define TEX_BLOOMHORIZONTAL_SIZE int2(TEX_BLOOMHORIZONTAL_WIDTH, TEX_BLOOMHORIZONTAL_HEIGHT)
texture2D texBloomHorizontal < pooled = true; > {
Width = TEX_BLOOMHORIZONTAL_WIDTH;
Height = TEX_BLOOMHORIZONTAL_HEIGHT;
Format = RGBA16;
};
sampler2D samplerBloomHorizontal { Texture = texBloomHorizontal; };
// Pass 15 Buffer (geometryPass)
// Last usage is uncropPass
#define TEX_GEOMETRY_WIDTH content_size.x
#define TEX_GEOMETRY_HEIGHT content_size.y
#define TEX_GEOMETRY_SIZE int2(TEX_GEOMETRY_WIDTH, TEX_GEOMETRY_HEIGHT)
#if _DX9_ACTIVE == 0
texture2D texGeometry < pooled = true; > {
Width = TEX_GEOMETRY_WIDTH;
Height = TEX_GEOMETRY_HEIGHT;
Format = RGBA16;
};
sampler2D samplerGeometry { Texture = texGeometry; };
#else
#define texGeometry texElectronBeams
#define samplerGeometry samplerElectronBeams
#endif
#endif // _SHARED_OBJECTS_H

View file

@ -1,44 +0,0 @@
#ifndef _VERSION_NUMBER_H
#define _VERSION_NUMBER_H
///////////////////////////////// MIT LICENSE ////////////////////////////////
// Copyright (C) 2022 Alex Gunter
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
#define MAJOR_VERSION 2
#define MINOR_VERSION 1
#define PATCH_VERSION 0
// Yes, both sibling preprocessor functions are necessary.
// Don't "simplify" this, or the substitution won't work.
#define BUILD_DOT_VERSION_(mav, miv, pav) #mav "." #miv "." #pav
#define BUILD_DOT_VERSION(mav, miv, pav) BUILD_DOT_VERSION_(mav, miv, pav)
#define DOT_VERSION_STR BUILD_DOT_VERSION(MAJOR_VERSION, MINOR_VERSION, PATCH_VERSION)
// Again, yes, both sibling preprocessor functions are necessary.
// Don't "simplify" this, or the substitution won't work.
#define BUILD_UNDERSCORE_VERSION_(prefix, mav, miv, pav) prefix ## _ ## mav ## _ ## miv ## _ ## pav
#define BUILD_UNDERSCORE_VERSION(p, mav, miv, pav) BUILD_UNDERSCORE_VERSION_(p, mav, miv, pav)
#define APPEND_VERSION_SUFFIX(prefix) BUILD_UNDERSCORE_VERSION(prefix, MAJOR_VERSION, MINOR_VERSION, PATCH_VERSION)
#endif // _VERSION_NUMBER_H

View file

@ -1,797 +0,0 @@
#include "ReShade.fxh"
/*
CRT-Consumer
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
uniform float PRE_SCALE <
ui_type = "drag";
ui_min = 1.0;
ui_max = 4.0;
ui_step = 0.1;
ui_label = "Pre-Scale Sharpening";
> = 1.5;
uniform float blurx <
ui_type = "drag";
ui_min = -4.0;
ui_max = 4.0;
ui_step = 0.05;
ui_label = "Convergence X";
> = 0.25;
uniform float blury <
ui_type = "drag";
ui_min = -4.0;
ui_max = 4.0;
ui_step = 0.05;
ui_label = "Convergence Y";
> = -0.1;
uniform float warpx <
ui_type = "drag";
ui_min = 0.0;
ui_max = 0.12;
ui_step = 0.01;
ui_label = " Curvature X";
> = 0.03;
uniform float warpy <
ui_type = "drag";
ui_min = 0.0;
ui_max = 0.12;
ui_step = 0.01;
ui_label = " Curvature Y";
> = 0.04;
uniform float corner <
ui_type = "drag";
ui_min = 0.0;
ui_max = 0.10;
ui_step = 0.01;
ui_label = " Corner size";
> = 0.03;
uniform float smoothness <
ui_type = "drag";
ui_min = 100.0;
ui_max = 600.0;
ui_step = 5.0;
ui_label = " Border Smoothness";
> = 400.0;
uniform bool inter <
ui_type = "radio";
ui_label = "Interlacing Toggle";
> = true;
uniform float Downscale <
ui_type = "drag";
ui_min = 1.0;
ui_max = 8.0;
ui_step = 1.;
ui_label = "Interlacing Downscale Scanlines";
> = 2.0;
uniform float scanlow <
ui_type = "drag";
ui_min = 1.0;
ui_max = 15.0;
ui_step = 1.0;
ui_label = "Beam low";
> = 6.0;
uniform float scanhigh <
ui_type = "drag";
ui_min = 1.0;
ui_max = 15.0;
ui_step = 1.0;
ui_label = "Beam high";
> = 8.0;
uniform float beamlow <
ui_type = "drag";
ui_min = 0.5;
ui_max = 2.5;
ui_step = 0.05;
ui_label = "Scanlines dark";
> = 1.45;
uniform float beamhigh <
ui_type = "drag";
ui_min = 0.5;
ui_max = 2.5;
ui_step = 0.05;
ui_label = "Scanlines bright";
> = 1.05;
uniform float preserve <
ui_type = "drag";
ui_min = 0.0;
ui_max = 1.0;
ui_step = 0.01;
ui_label = "Protect White On Masks";
> = 0.98;
uniform float brightboost1 <
ui_type = "drag";
ui_min = 0.0;
ui_max = 3.0;
ui_step = 0.05;
ui_label = "Bright boost dark pixels";
> = 1.25;
uniform float brightboost2 <
ui_type = "drag";
ui_min = 0.0;
ui_max = 3.0;
ui_step = 0.05;
ui_label = "Bright boost bright pixels";
> = 1.0;
uniform float glow <
ui_type = "drag";
ui_min = 1.0;
ui_max = 6.0;
ui_step = 1.0;
ui_label = "Glow pixels per axis";
> = 3.0;
uniform float quality <
ui_type = "drag";
ui_min = 0.25;
ui_max = 4.0;
ui_step = 0.05;
ui_label = "Glow quality";
> = 1.0;
uniform float glow_str <
ui_type = "drag";
ui_min = 0.0001;
ui_max = 2.0;
ui_step = 0.05;
ui_label = "Glow intensity";
> = 0.3;
uniform float nois <
ui_type = "drag";
ui_min = 0.0;
ui_max = 32.0;
ui_step = 1.0;
ui_label = "Add Noise";
> = 0.0;
uniform float postbr <
ui_type = "drag";
ui_min = 0.0;
ui_max = 2.5;
ui_step = 0.02;
ui_label = "Post Brightness";
> = 1.0;
uniform float palette_fix <
ui_type = "drag";
ui_min = 0.0;
ui_max = 2.0;
ui_step = 1.0;
ui_label = "Palette Fixes. Sega, PUAE Atari ST dark colors";
> = 0.0;
uniform float Shadowmask <
ui_type = "drag";
ui_min = -1.0;
ui_max = 8.0;
ui_step = 1.;
ui_label = "Mask Type";
> = 0.0;
uniform float masksize <
ui_type = "drag";
ui_min = 1.0;
ui_max = 2.0;
ui_step = 1.0;
ui_label = "Mask Size";
> = 1.0;
uniform float MaskDark <
ui_type = "drag";
ui_min = 0.0;
ui_max = 2.0;
ui_step = 0.1;
ui_label = "Mask dark";
> = 0.2;
uniform float MaskLight <
ui_type = "drag";
ui_min = 0.0;
ui_max = 2.0;
ui_step = 0.1;
ui_label = "Mask light";
> = 1.5;
uniform float slotmask <
ui_type = "drag";
ui_min = 0.0;
ui_max = 1.0;
ui_step = 0.05;
ui_label = "Slot Mask Strength";
> = 0.0;
uniform float slotwidth <
ui_type = "drag";
ui_min = 1.0;
ui_max = 6.0;
ui_step = 0.5;
ui_label = "Slot Mask Width";
> = 2.0;
uniform float double_slot <
ui_type = "drag";
ui_min = 1.0;
ui_max = 2.0;
ui_step = 1.0;
ui_label = "Slot Mask Height: 2x1 or 4x1";
> = 1.0;
uniform float slotms <
ui_type = "drag";
ui_min = 1.0;
ui_max = 2.0;
ui_step = 1.0;
ui_label = "Slot Mask Size";
> = 1.0;
uniform float GAMMA_OUT <
ui_type = "drag";
ui_min = 0.0;
ui_max = 4.0;
ui_step = 0.05;
ui_label = "Gamma Out";
> = 2.25;
uniform float sat <
ui_type = "drag";
ui_min = 0.0;
ui_max = 2.0;
ui_step = 0.05;
ui_label = "Saturation";
> = 1.0;
uniform float contrast <
ui_type = "drag";
ui_min = 0.00;
ui_max = 2.00;
ui_step = 0.05;
ui_label = "Contrast, 1.0:Off";
> = 1.0;
uniform float WP <
ui_type = "drag";
ui_min = -100.0;
ui_max = 100.0;
ui_step = 5.;
ui_label = "Color Temperature %";
> = 0.0;
uniform float rg <
ui_type = "drag";
ui_min = -1.0;
ui_max = 1.0;
ui_step = 0.005;
ui_label = "Red-Green Tint";
> = 0.0;
uniform float rb <
ui_type = "drag";
ui_min = -1.0;
ui_max = 1.0;
ui_step = 0.005;
ui_label = "Red-Blue Tint";
> = 0.0;
uniform float gr <
ui_type = "drag";
ui_min = -1.0;
ui_max = 1.0;
ui_step = 0.005;
ui_label = "Green-Red Tint";
> = 0.0;
uniform float gb <
ui_type = "drag";
ui_min = -1.0;
ui_max = 1.0;
ui_step = 0.005;
ui_label = "Green-Blue Tint";
> = 0.0;
uniform float br <
ui_type = "drag";
ui_min = -1.0;
ui_max = 1.0;
ui_step = 0.005;
ui_label = "Blue-Red Tint";
> = 0.0;
uniform float bg <
ui_type = "drag";
ui_min = -1.0;
ui_max = 1.0;
ui_step = 0.005;
ui_label = "Blue-Green Tint";
> = 0.0;
uniform bool vignette <
ui_type = "radio";
ui_label = "Vignette On/Off";
> = false;
uniform float vpower <
ui_type = "drag";
ui_min = 0.0;
ui_max = 1.0;
ui_step = 0.01;
ui_label = "Vignette Power";
> = 0.15;
uniform float vstr <
ui_type = "drag";
ui_min = 0.0;
ui_max = 50.0;
ui_step = 1.0;
ui_label = "Vignette strength";
> = 40.0;
uniform bool alloff <
ui_type = "radio";
ui_label = "Switch off shader";
> = false;
uniform float FrameCount < source = "framecount"; >;
uniform float2 BufferToViewportRatio < source = "buffer_to_viewport_ratio"; >;
uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >;
uniform float2 ViewportSize < source = "viewportsize"; >;
uniform float ViewportX < source = "viewportx"; >;
uniform float ViewportY < source = "viewporty"; >;
uniform float ViewportWidth < source = "viewportwidth"; >;
uniform float ViewportHeight < source = "viewportheight"; >;
uniform float2 ViewportOffset < source = "viewportoffset"; >;
uniform float BufferWidth < source = "bufferwidth"; >;
uniform float BufferHeight < source = "bufferheight"; >;
uniform float NativeWidth < source = "nativewidth"; >;
uniform float NativeHeight < source = "nativeheight"; >;
uniform float InternalWidth < source = "internalwidth"; >;
uniform float InternalHeight < source = "internalheight"; >;
sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=POINT;MinFilter=POINT;};
#define iTime (float(FrameCount)/2.0)
#define iTimer (float(FrameCount)/60.0)
#define SourceSize (float4(1.0/NormalizedNativePixelSize,NormalizedNativePixelSize))
#define OutputSize (ViewportSize*BufferToViewportRatio)
float2 Warp(float2 pos)
{
pos = pos * 2.0 - 1.0;
pos *= float2(1.0 + (pos.y * pos.y) * warpx, 1.0 + (pos.x * pos.x) * warpy);
return pos * 0.5 + 0.5;
}
float sw(float y, float l)
{
float beam = lerp(scanlow, scanhigh, y);
float scan = lerp(beamlow, beamhigh, l);
float ex = y * scan;
return exp2(-beam * ex * ex);
}
float3 mask(float2 x, float3 col, float l)
{
x = floor(x / masksize);
if (Shadowmask == 0.0)
{
float m = frac(x.x * 0.4999);
if (m < 0.4999) return float3(1.0, MaskDark, 1.0);
else return float3(MaskDark, 1.0, MaskDark);
}
else if (Shadowmask == 1.0)
{
float3 Mask = float3(MaskDark, MaskDark, MaskDark);
float line = MaskLight;
float odd = 0.0;
if (frac(x.x / 6.0) < 0.5) odd = 1.0;
if (frac((x.y + odd) / 2.0) < 0.5) line = MaskDark;
float m = frac(x.x / 3.0);
if (m < 0.333) Mask.b = MaskLight;
else if (m < 0.666) Mask.g = MaskLight;
else Mask.r = MaskLight;
Mask *= line;
return Mask;
}
else if (Shadowmask == 2.0)
{
float m = frac(x.x*0.3333);
if (m < 0.3333) return float3(MaskDark, MaskDark, MaskLight);
if (m < 0.6666) return float3(MaskDark, MaskLight, MaskDark);
else return float3(MaskLight, MaskDark, MaskDark);
}
if (Shadowmask == 3.0)
{
float m = frac(x.x * 0.5);
if (m < 0.5) return float3(1.0, 1.0, 1.0);
else return float3(MaskDark, MaskDark, MaskDark);
}
else if (Shadowmask == 4.0)
{
float3 Mask = float3(col.rgb);
float line = MaskLight;
float odd = 0.0;
if (frac(x.x / 4.0) < 0.5) odd = 1.0;
if (frac((x.y + odd) / 2.0) < 0.5) line = MaskDark;
float m = frac(x.x / 2.0);
if (m < 0.5) { Mask.r = 1.0; Mask.b = 1.0; }
else Mask.g = 1.0;
Mask *= line;
return Mask;
}
else if (Shadowmask == 5.0)
{
float3 Mask = float3(1.0, 1.0, 1.0);
if (frac(x.x / 4.0) < 0.5)
{
if (frac(x.y / 3.0) < 0.666)
{
if (frac(x.x / 2.0) < 0.5) Mask = float3(1.0, MaskDark, 1.0);
else Mask = float3(MaskDark, 1.0, MaskDark);
}
else Mask *= l;
}
else if (frac(x.x / 4.0) >= 0.5)
{
if (frac(x.y / 3.0) > 0.333)
{
if (frac(x.x / 2.0) < 0.5) Mask = float3(1.0, MaskDark, 1.0);
else Mask = float3(MaskDark, 1.0, MaskDark);
}
else Mask *= l;
}
return Mask;
}
else if (Shadowmask == 6.0)
{
float3 Mask = float3(MaskDark, MaskDark, MaskDark);
if (frac(x.x / 6.0) < 0.5)
{
if (frac(x.y / 4.0) < 0.75)
{
if (frac(x.x / 3.0) < 0.3333) Mask.r = MaskLight;
else if (frac(x.x / 3.0) < 0.6666) Mask.g = MaskLight;
else Mask.b = MaskLight;
}
else Mask * l * 0.9;
}
else if (frac(x.x / 6.0) >= 0.5)
{
if (frac(x.y / 4.0) >= 0.5 || frac(x.y / 4.0) < 0.25)
{
if (frac(x.x / 3.0) < 0.3333) Mask.r = MaskLight;
else if (frac(x.x / 3.0) < 0.6666) Mask.g = MaskLight;
else Mask.b = MaskLight;
}
else Mask * l * 0.9;
}
return Mask;
}
else if (Shadowmask == 7.0)
{
float m = frac(x.x * 0.3333);
if (m < 0.3333) return float3(MaskDark, MaskLight, MaskLight * col.b); //Cyan
if (m < 0.6666) return float3(MaskLight * col.r, MaskDark, MaskLight); //Magenta
else return float3(MaskLight, MaskLight * col.g, MaskDark); //Yellow
}
else if (Shadowmask == 8.0)
{
float3 Mask = float3(MaskDark, MaskDark, MaskDark);
float bright = MaskLight;
float left = 0.0;
if (frac(x.x / 6.0) < 0.5) left = 1.0;
float m = frac(x.x / 3.0);
if (m < 0.333) Mask.b = 0.9;
else if (m < 0.666) Mask.g = 0.9;
else Mask.r = 0.9;
if ((x.y % 2.0) == 1.0 && left == 1.0 || (x.y % 2.0) == 0.0 && left == 0.0)
Mask *= bright;
return Mask;
}
else return float3(1.0, 1.0, 1.0);
}
float SlotMask(float2 pos, float3 c)
{
if (slotmask == 0.0) return 1.0;
pos = floor(pos / slotms);
float mx = pow(max(max(c.r, c.g), c.b), 1.33);
float mlen = slotwidth * 2.0;
float px = frac(pos.x / mlen);
float py = floor(frac(pos.y / (2.0 * double_slot)) * 2.0 * double_slot);
float slot_dark = lerp(1.0 - slotmask, 1.0 - 0.80 * slotmask, mx);
float slot = 1.0 + 0.7 * slotmask * (1.0 - mx);
if (py == 0.0 && px < 0.5) slot = slot_dark;
else if (py == double_slot && px >= 0.5) slot = slot_dark;
return slot;
}
float4x4 contrastMatrix(float contrast)
{
float t = (1.0 - contrast) / 2.0;
return float4x4(contrast, 0, 0, 0,
0, contrast, 0, 0,
0, 0, contrast, 0,
t, t, t, 1);
}
float3x3 vign(float l, float2 tex)
{
float2 vpos = tex;
vpos *= 1.0 - vpos.xy;
float vig = vpos.x * vpos.y * vstr;
vig = min(pow(vig, vpower), 1.0);
if (vignette == false) vig = 1.0;
return float3x3(vig, 0, 0,
0, vig, 0,
0, 0, vig);
}
float3 saturation(float3 textureColor)
{
float luminance = length(textureColor.rgb) * 0.5775;
float3 luminanceWeighting = float3(0.4, 0.5, 0.1);
if (luminance < 0.5) luminanceWeighting.rgb = (luminanceWeighting.rgb * luminanceWeighting.rgb)
+ (luminanceWeighting.rgb * luminanceWeighting.rgb);
luminance = dot(textureColor.rgb, luminanceWeighting);
float3 greyScaleColor = float3(luminance, luminance, luminance);
float3 res = float3(lerp(greyScaleColor, textureColor.rgb, sat));
return res;
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////
float3 glow0 (float2 texcoord, float3 col)
{
// the more quality, the smaller the offset and better quality, less visible glow too
float2 size = SourceSize.zw/quality;
float3 c01;
float3 sum = float3(0.0, 0.0, 0.0);
// glow = pixels per axis, the more the slower!
for (float x = -glow; x <= glow; x = x+1.0)
{
// multiply texture, the more far away the less pronounced
float factor = 1.0/glow;
for (float y = -glow; y <= glow; y = y+1.0)
{
float2 offset = float2(x, y) * size;
c01 = tex2D(sBackBuffer, texcoord + offset).rgb*factor; c01 = c01*c01;
sum += c01;
}
}
return (glow_str * sum / (glow * glow )) ;
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////
float noise(float2 co)
{
return frac(sin(iTimer * dot(co.xy ,float2(12.9898,78.233))) * 43758.5453);
}
float corner0(float2 coord)
{
coord = (coord - float2(0.5, 0.5)) * 1.0 + float2(0.5, 0.5);
coord = min(coord, float2(1.0, 1.0) - coord) * float2(1.0, SourceSize.y / SourceSize.x);
float2 cdist = float2(corner, corner);
coord = (cdist - min(coord, cdist));
float dist = sqrt(dot(coord, coord));
return clamp((cdist.x - dist) * smoothness, 0.0, 1.0);
}
static const float3x3 D65_to_XYZ = float3x3(
0.4306190, 0.2220379, 0.0201853,
0.3415419, 0.7066384, 0.1295504,
0.1783091, 0.0713236, 0.9390944);
static const float3x3 XYZ_to_D65 = float3x3(
3.0628971, -0.9692660, 0.0678775,
-1.3931791, 1.8760108, -0.2288548,
-0.4757517, 0.0415560, 1.0693490);
static const float3x3 D50_to_XYZ = float3x3(
0.4552773, 0.2323025, 0.0145457,
0.3675500, 0.7077956, 0.1049154,
0.1413926, 0.0599019, 0.7057489);
static const float3x3 XYZ_to_D50 = float3x3(
2.9603944, -0.9787684, 0.0844874,
-1.4678519, 1.9161415, -0.2545973,
-0.4685105, 0.0334540, 1.4216174);
float4 PS_CRT_CONSUMER(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD0) : SV_Target
{
float2 pos = Warp(vTexCoord.xy);
float2 tex_size = SourceSize.xy;
float2 pC4 = (pos + 0.5/tex_size);
float2 fp = frac(pos * tex_size);
if (inter == false && tex_size.y > 400.0){ fp.y = frac(pos.y * tex_size.y*1.0/Downscale);}
float4 res = float4(1.0, 1.0, 1.0, 1.0);
if (alloff == true)
res = tex2D(sBackBuffer, pC4);
else
{
float2 texel = pos * tex_size;
float2 texel_floored = floor(texel);
float scale = PRE_SCALE;
float region_range = 0.5 - 0.5 / scale;
// Figure out where in the texel to sample to get correct pre-scaled bilinear.
// Uses the hardware bilinear interpolator to avoid having to sample 4 times manually.
float2 center_dist = fp - 0.5;
float2 fpp = (center_dist - clamp(center_dist, -region_range, region_range)) * scale + 0.5;
float2 mod_texel = texel_floored + fpp;
float2 coords = mod_texel / SourceSize.xy;
float3 sample1 = tex2D(sBackBuffer, float2(coords.x + blurx*SourceSize.z, coords.y - blury*SourceSize.w)).rgb;
float3 sample2 = tex2D(sBackBuffer, coords).rgb;
float3 sample3 = tex2D(sBackBuffer, float2(coords.x - blurx*SourceSize.z, coords.y + blury*SourceSize.w )).rgb;
float3 color = float3(sample1.r * 0.5 + sample2.r * 0.5,
sample1.g * 0.25 + sample2.g * 0.5 + sample3.g * 0.25,
sample2.b * 0.5 + sample3.b * 0.5);
if (palette_fix != 0.0)
{
if (palette_fix == 1.0) color = color* 1.0667;
else if (palette_fix == 2.0) color = color * 2.0;
}
//COLOR TEMPERATURE FROM GUEST.R-DR.VENOM
if (WP != 0.0)
{
float3 warmer = mul(color, D50_to_XYZ);
warmer = mul(warmer, XYZ_to_D65);
float3 cooler = mul(color, D65_to_XYZ);
cooler = mul(cooler, XYZ_to_D50);
float m = abs(WP) / 100.0;
float3 comp = (WP < 0.0) ? cooler : warmer;
comp = clamp(comp, 0.0, 1.0);
color = float3(lerp(color, comp, m));
}
float3x3 hue = float3x3 (1., rg, rb, //red tint
gr, 1., gb, //green tint
br, bg, 1.); //blue tint
color = mul(color, hue);
color = (2.0*pow(color,float3(2.8, 2.8, 2.8))) - pow(color,float3(3.6, 3.6, 3.6));
float lum = color.r * 0.3 + color.g * 0.6 + color.b * 0.1;
float f = frac(fp.y -0.5);
if (inter == true && tex_size.y > 400.0) color = color;
else
{color = color * sw(f,lum) + color * sw (1.0-f,lum);}
float lum1 = color.r * 0.3 + color.g * 0.6 + color.b * 0.1;
color *= lerp(mask((vTexCoord * OutputSize.xy), color,lum1), float3(1.0, 1.0, 1.0), lum1*preserve);
if (slotmask != 0.0) color *= SlotMask((vTexCoord * OutputSize.xy) * 1.0001, color);
color *= lerp(brightboost1, brightboost2, max(max(color.r, color.g), color.b));
color = pow(color,float3(1.0 / GAMMA_OUT, 1.0 / GAMMA_OUT, 1.0 / GAMMA_OUT));
if (glow_str != 0.0) color += glow0(coords,color);
if (sat != 1.0) color = saturation(color);
if (corner != 0.0) color *= corner0(pC4);
if (nois != 0.0) color *= 1.0 + noise(coords * 2.0) / nois;
color *= lerp(1.0, postbr, lum);
res = float4(color, 1.0);
if (contrast != 1.0) res = mul(res, contrastMatrix(contrast));
if (inter == true && SourceSize.y > 400.0 && frac(iTime) < 0.5) res = res * 0.95;
res.rgb = mul(res.rgb, vign(lum, vTexCoord));
}
return res;
}
technique CRT_CONSUMER
{
pass
{
VertexShader = PostProcessVS;
PixelShader = PS_CRT_CONSUMER;
}
}

View file

@ -1,545 +0,0 @@
#include "ReShade.fxh"
// DariusG presents
// 'crt-Cyclon'
// Why? Because it's speedy!
// A super-fast shader based on the magnificent crt-Geom, optimized for full speed
// on a Xiaomi Note 3 Pro cellphone (around 170(?) gflops gpu or so)
// This shader uses parts from:
// crt-Geom (scanlines)
// Quillez (main filter)
// Grade (some primaries)
// Dogway's inverse Gamma
// Masks-slot-color handling, tricks etc are mine.
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or (at your option)
// any later version.
uniform float SCANLINE <
ui_type = "drag";
ui_min = 0.2;
ui_max = 0.6;
ui_step = 0.05;
ui_label = "Scanline Weight";
> = 0.3;
uniform bool INTERLACE <
ui_type = "radio";
ui_label = "Interlacing On/Off";
> = 1.0;
uniform float bogus_msk <
ui_type = "drag";
ui_min = 0.0;
ui_max = 0.0;
ui_step = 0.0;
ui_label = " [ MASK SETTINGS ] ";
> = 0.0;
uniform float M_TYPE <
ui_type = "drag";
ui_min = -1.0;
ui_max = 1.0;
ui_step = 1.0;
ui_label = "Mask Type: -1:None, 0:CGWG, 1:RGB";
> = 1.0;
uniform float MSIZE <
ui_type = "drag";
ui_min = 1.0;
ui_max = 2.0;
ui_step = 1.0;
ui_label = "Mask Size";
> = 1.0;
uniform bool SLOT <
ui_type = "radio";
ui_label = "Slot Mask On/Off";
> = 1.0;
uniform float SLOTW <
ui_type = "drag";
ui_min = 2.0;
ui_max = 3.0;
ui_step = 1.0;
ui_label = "Slot Mask Width";
> = 3.0;
uniform float BGR <
ui_type = "drag";
ui_min = 0.0;
ui_max = 1.0;
ui_step = 1.0;
ui_label = "Subpixels BGR/RGB";
> = 0.0;
uniform float Maskl <
ui_type = "drag";
ui_min = 0.0;
ui_max = 1.0;
ui_step = 0.05;
ui_label = "Mask Brightness Dark";
> = 0.3;
uniform float Maskh <
ui_type = "drag";
ui_min = 0.0;
ui_max = 1.0;
ui_step = 0.05;
ui_label = "Mask Brightness Bright";
> = 0.75;
uniform float bogus_geom <
ui_type = "drag";
ui_min = 0.0;
ui_max = 0.0;
ui_step = 0.0;
ui_label = " [ GEOMETRY SETTINGS ] ";
> = 0.0;
uniform bool bzl <
ui_type = "radio";
ui_label = "Bezel On/Off";
> = 1.0;
uniform float ambient <
ui_type = "drag";
ui_min = 0.0;
ui_max = 1.0;
ui_step = 0.05;
ui_label = "Ambient Light";
> = 0.40;
uniform float zoomx <
ui_type = "drag";
ui_min = -1.0;
ui_max = 1.0;
ui_step = 0.005;
ui_label = "Zoom Image X";
> = 0.0;
uniform float zoomy <
ui_type = "drag";
ui_min = -1.0;
ui_max = 1.0;
ui_step = 0.005;
ui_label = "Zoom Image Y";
> = 0.0;
uniform float centerx <
ui_type = "drag";
ui_min = -5.0;
ui_max = 5.0;
ui_step = 0.05;
ui_label = "Image Center X";
> = 0.0;
uniform float centery <
ui_type = "drag";
ui_min = -5.0;
ui_max = 5.0;
ui_step = 0.05;
ui_label = "Image Center Y";
> = 0.0;
uniform float WARPX <
ui_type = "drag";
ui_min = 0.00;
ui_max = 0.25;
ui_step = 0.01;
ui_label = "Curvature Horizontal";
> = 0.02;
uniform float WARPY <
ui_type = "drag";
ui_min = 0.00;
ui_max = 0.25;
ui_step = 0.01;
ui_label = "Curvature Vertical";
> = 0.01;
uniform bool vig <
ui_type = "radio";
ui_label = "Vignette On/Off";
> = 1.0;
uniform float bogus_col <
ui_type = "drag";
ui_min = 0.0;
ui_max = 0.0;
ui_step = 0.0;
ui_label = " [ COLOR SETTINGS ] ";
> = 0.0;
uniform float BR_DEP <
ui_type = "drag";
ui_min = 0.0;
ui_max = 0.333;
ui_step = 0.01;
ui_label = "Scan/Mask Brightness Dependence";
> = 0.2;
uniform float c_space <
ui_type = "drag";
ui_min = 0.0;
ui_max = 3.0;
ui_step = 1.0;
ui_label = "Color Space: sRGB,PAL,NTSC-U,NTSC-J";
> = 0.0;
uniform float EXT_GAMMA <
ui_type = "drag";
ui_min = 0.0;
ui_max = 1.0;
ui_step = 1.0;
ui_label = "External Gamma In (Glow etc)";
> = 0.0;
uniform float SATURATION <
ui_type = "drag";
ui_min = 0.0;
ui_max = 2.0;
ui_step = 0.05;
ui_label = "Saturation";
> = 1.0;
uniform float BRIGHTNESS_ <
ui_type = "drag";
ui_min = 0.0;
ui_max = 2.0;
ui_step = 0.01;
ui_label = "Brightness, Sega fix:1.06";
> = 1.0;
uniform float BLACK <
ui_type = "drag";
ui_min = -0.20;
ui_max = 0.20;
ui_step = 0.01;
ui_label = "Black Level";
> = 0.0;
uniform float RG <
ui_type = "drag";
ui_min = -0.25;
ui_max = 0.25;
ui_step = 0.01;
ui_label = "Green <-to-> Red Hue";
> = 0.0;
uniform float RB <
ui_type = "drag";
ui_min = -0.25;
ui_max = 0.25;
ui_step = 0.01;
ui_label = "Blue <-to-> Red Hue";
> = 0.0;
uniform float GB <
ui_type = "drag";
ui_min = -0.25;
ui_max = 0.25;
ui_step = 0.01;
ui_label = "Blue <-to-> Green Hue";
> = 0.0;
uniform float bogus_con <
ui_type = "drag";
ui_min = 0.0;
ui_max = 0.0;
ui_step = 0.0;
ui_label = " [ CONVERGENCE SETTINGS ] ";
> = 0.0;
uniform float C_STR <
ui_type = "drag";
ui_min = 0.0;
ui_max = 0.5;
ui_step = 0.05;
ui_label = "Convergence Overall Strength";
> = 0.0;
uniform float CONV_R <
ui_type = "drag";
ui_min = -1.0;
ui_max = 1.0;
ui_step = 0.05;
ui_label = "Convergence Red X-Axis";
> = 0.0;
uniform float CONV_G <
ui_type = "drag";
ui_min = -1.0;
ui_max = 1.0;
ui_step = 0.05;
ui_label = "Convergence Green X-axis";
> = 0.0;
uniform float CONV_B <
ui_type = "drag";
ui_min = -1.0;
ui_max = 1.0;
ui_step = 0.05;
ui_label = "Convergence Blue X-Axis";
> = 0.0;
uniform float POTATO <
ui_type = "drag";
ui_min = 0.0;
ui_max = 1.0;
ui_step = 1.0;
ui_label = "Potato Boost(Simple Gamma, adjust Mask)";
> = 0.0;
#define blck ((1.0)/(1.0-BLACK))
#define pi 3.1415926535897932384626433
uniform float2 BufferViewportRatio < source = "buffer_to_viewport_ratio"; >;
uniform float2 InternalPixelSize < source = "internal_pixel_size"; >;
uniform float2 NativePixelSize < source = "native_pixel_size"; >;
uniform float2 NormalizedInternalPixelSize < source = "normalized_internal_pixel_size"; >;
uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >;
uniform float UpscaleMultiplier < source = "upscale_multiplier"; >;
uniform float2 ViewportSize < source = "viewportsize"; >;
uniform int FrameCount < source = "framecount"; >;
sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;MipFilter=LINEAR;};
texture tBezel < source = "crt-cyclon/bezel.png"; >
{
Width = BUFFER_WIDTH;
Height = BUFFER_HEIGHT;
MipLevels = 1;
};
sampler sBezel { Texture = tBezel; AddressU = BORDER; AddressV = BORDER; MinFilter = LINEAR; MagFilter = LINEAR;};
float3 Mask(float2 pos, float CGWG)
{
float3 mask = float3(CGWG,CGWG,CGWG);
if (M_TYPE == 0.0){
if (POTATO == 1.0) { float pot = (1.0-CGWG)*sin(pos.x*pi)+CGWG; return float3(pot,pot,pot); }
else{
float m = frac(pos.x*0.5);
if (m<0.5) mask.rb = float2(1.0,1.0);
else mask.g = 1.0;
return mask;
}
}
if (M_TYPE == 1.0){
if (POTATO == 1.0) { float pot = (1.0-CGWG)*sin(pos.x*pi*0.6667)+CGWG; return float3(pot,pot,pot );}
else{
float m = frac(pos.x*0.3333);
if (m<0.3333) mask.rgb = (BGR == 0.0) ? float3(mask.r, mask.g, 1.0) : float3(1.0, mask.g, mask.b);
else if (m<0.6666) mask.g = 1.0;
else mask.rgb = (BGR == 0.0) ? float3(1.0, mask.g, mask.b) : float3(mask.r, mask.g, 1.0);
return mask;
}
}
else return float3(1.0,1.0,1.0);
}
float scanlineWeights(float distance, float3 color, float x)
{
// "wid" controls the width of the scanline beam, for each RGB
// channel The "weights" lines basically specify the formula
// that gives you the profile of the beam, i.e. the intensity as
// a function of distance from the vertical center of the
// scanline. In this case, it is gaussian if width=2, and
// becomes nongaussian for larger widths. Ideally this should
// be normalized so that the integral across the beam is
// independent of its width. That is, for a narrower beam
// "weights" should have a higher peak at the center of the
// scanline than for a wider beam.
float wid = SCANLINE + 0.15 * dot(color, float3(0.25-0.8*x, 0.25-0.8*x, 0.25-0.8*x)); //0.8 vignette strength
float weights = distance / wid;
return 0.4 * exp(-weights * weights ) / wid;
}
#define pwr float3(1.0/((-1.0*SCANLINE+1.0)*(-0.8*CGWG+1.0))-1.2,1.0/((-1.0*SCANLINE+1.0)*(-0.8*CGWG+1.0))-1.2,1.0/((-1.0*SCANLINE+1.0)*(-0.8*CGWG+1.0))-1.2)
// Returns gamma corrected output, compensated for scanline+mask embedded gamma
float3 inv_gamma(float3 col, float3 power)
{
float3 cir = col-1.0;
cir *= cir;
col = lerp(sqrt(col),sqrt(1.0-cir),power);
return col;
}
// standard 6500k
static const float3x3 PAL = float3x3(
1.0740 , -0.0574 , -0.0119 ,
0.0384 , 0.9699 , -0.0059 ,
-0.0079 , 0.0204 , 0.9884 );
// standard 6500k
static const float3x3 NTSC = float3x3(
0.9318 , 0.0412 , 0.0217 ,
0.0135 , 0.9711 , 0.0148 ,
0.0055 , -0.0143 , 1.0085 );
// standard 8500k
static const float3x3 NTSC_J = float3x3(
0.9501 , -0.0431 , 0.0857 ,
0.0265 , 0.9278 , 0.0432 ,
0.0011 , -0.0206 , 1.3153 );
float3 slot(float2 pos)
{
float h = frac(pos.x/SLOTW);
float v = frac(pos.y);
float odd;
if (v<0.5) odd = 0.0; else odd = 1.0;
if (odd == 0.0)
{if (h<0.5) return float3(0.5,0.5,0.5); else return float3(1.5,1.5,1.5);}
else if (odd == 1.0)
{if (h<0.5) return float3(1.5,1.5,1.5); else return float3(0.5,0.5,0.5);}
}
float2 Warp(float2 pos)
{
pos = pos*2.0-1.0;
pos *= float2(1.0+pos.y*pos.y*WARPX, 1.0+pos.x*pos.x*WARPY);
pos = pos*0.5+0.5;
return pos;
}
uniform float2 BufferHeight < source = "bufferheight"; >;
float4 CRT_CYCLON_PS(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD0) : SV_Target
{
float4 SourceSize = float4(1.0 / NormalizedNativePixelSize, NormalizedNativePixelSize);
float2 OutputSize = ViewportSize;
float2 scale = BufferViewportRatio.xy;
float2 warpcoords = (vTexCoord-float2(0.5,0.5)) * BufferViewportRatio + float2(0.5,0.5);
// Hue matrix inside main() to avoid GLES error
float3x3 hue = float3x3(
1.0, -RG, -RB,
RG, 1.0, -GB,
RB, GB, 1.0
);
// zoom in and center screen for bezel
float2 pos = Warp((vTexCoord*float2(1.0-zoomx,1.0-zoomy)-float2(centerx,centery)/100.0));
float4 bez = float4(0.0,0.0,0.0,0.0);
// if (bzl == 1.0) bez = tex2D(sBezel,vTexCoord*SourceSize.xy/OriginalSize.xy*0.97+float2(0.015,0.015));
// if (bzl == 1.0) bez = tex2D(sBezel,vTexCoord*scale*0.97+float2(0.015,0.015));
if (bzl == true) bez = tex2D(sBezel,warpcoords*0.97+float2(0.015,0.015)); // This fix Bezel to adjust to Game's aspect ratio.
bez.rgb = lerp(bez.rgb, float3(ambient,ambient,ambient),0.5);
float2 bpos = pos;
float2 ps = SourceSize.zw;
float2 dx = float2(ps.x,0.0);
// Quilez
float2 ogl2 = pos*SourceSize.xy;
float2 i = floor(pos*SourceSize.xy) + 0.5;
float f = ogl2.y - i.y;
pos.y = (i.y + 4.0*f*f*f)*ps.y; // smooth
pos.x = lerp(pos.x, i.x*ps.x, 0.2);
// Convergence
float3 res0 = tex2D(sBackBuffer,pos).rgb;
float resr = tex2D(sBackBuffer,pos + dx*CONV_R).r;
float resb = tex2D(sBackBuffer,pos + dx*CONV_B).b;
float resg = tex2D(sBackBuffer,pos + dx*CONV_G).g;
float3 res = float3( res0.r*(1.0-C_STR) + resr*C_STR,
res0.g*(1.0-C_STR) + resg*C_STR,
res0.b*(1.0-C_STR) + resb*C_STR
);
// Vignette
float x = 0.0;
if (vig == true){
x = vTexCoord.x*scale.x-0.5;
// x = vTexCoord.x-0.5;
x = x*x;}
float l = dot(float3(BR_DEP,BR_DEP,BR_DEP),res);
// Color Spaces
if(EXT_GAMMA != 1.0) res *= res;
if (c_space != 0.0) {
if (c_space == 1.0) res = mul(PAL,res);
if (c_space == 2.0) res = mul(NTSC,res);
if (c_space == 3.0) res = mul(NTSC_J,res);
// Apply CRT-like luminances
res /= float3(0.24,0.69,0.07);
res *= float3(0.29,0.6,0.11);
res = clamp(res,0.0,1.0);
}
float s = frac(bpos.y*SourceSize.y-0.5);
// handle interlacing
if (SourceSize.y > 400.0)
{
s = frac(bpos.y*SourceSize.y/2.0-0.5);
// if (INTERLACE == 1.0) s = mod(float(FrameCount),2.0) < 1.0 ? s: s+0.5;
if (INTERLACE == true) s = (float(FrameCount) % 2.0) < 1.0 ? s: s+0.5;
}
// Calculate CRT-Geom scanlines weight and apply
float weight = scanlineWeights(s, res, x);
float weight2 = scanlineWeights(1.0-s, res, x);
res *= weight + weight2;
// Masks
float2 xy = vTexCoord*OutputSize.xy*scale/MSIZE;
// float2 xy = vTexCoord*OutputSize.xy/MSIZE;
float CGWG = lerp(Maskl, Maskh, l);
res *= Mask(xy, CGWG);
// Apply slot mask on top of Trinitron-like mask
if (SLOT == true) res *= lerp(slot(xy/2.0),float3(1.0,1.0,1.0),CGWG);
if (POTATO == 0.0) res = inv_gamma(res,pwr);
else {res = sqrt(res); res *= lerp(1.3,1.1,l);}
// Saturation
float lum = dot(float3(0.29,0.60,0.11),res);
res = lerp(float3(lum,lum,lum),res,SATURATION);
// Brightness, Hue and Black Level
res *= BRIGHTNESS_;
res = mul(hue,res);
res -= float3(BLACK,BLACK,BLACK);
res *= blck;
// Apply bezel code, adapted from New-Pixie
if (bzl == true)
res.rgb = lerp(res.rgb, lerp(max(res.rgb, 0.0), pow( abs(bez.rgb), float3( 1.4,1.4,1.4 ) ), bez.w * bez.w), float3( 1.0,1.0,1.0 ) );
return float4(res, 1.0);
}
technique CRT_CYCLON
{
pass PS_CRT_CYCLON
{
VertexShader = PostProcessVS;
PixelShader = CRT_CYCLON_PS;
}
}

View file

@ -1,150 +0,0 @@
#include "ReShade.fxh"
/*
zfast_crt_geo - A simple, fast CRT shader.
Copyright (C) 2017 Greg Hogan (SoltanGris42)
Copyright (C) 2023 Jose Linares (Dogway)
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your option)
any later version.
Notes: This shader does scaling with a weighted linear filter
based on the algorithm by Iñigo Quilez here:
https://iquilezles.org/articles/texture/
but modified to be somewhat sharper. Then a scanline effect that varies
based on pixel brightness is applied along with a monochrome aperture mask.
This shader runs at ~60fps on the Chromecast HD (10GFlops) on a 1080p display.
(https://forums.libretro.com/t/android-googletv-compatible-shaders-nitpicky)
Dogway: I modified zfast_crt.glsl shader to include screen curvature,
vignetting, round corners and phosphor*temperature. Horizontal pixel is left out
from the Quilez' algo (read above) to provide a more S-Video like horizontal blur.
The scanlines and mask are also now performed in the recommended linear light.
For this to run smoothly on GPU deprived platforms like the Chromecast and
older consoles, I had to remove several parameters and hardcode them into the shader.
Another POV is to run the shader on handhelds like the Switch or SteamDeck so they consume less battery.
*/
uniform float SCANLINE_WEIGHT <
ui_type = "drag";
ui_min = 0.0;
ui_max = 15.0;
ui_step = 0.5;
ui_label = "Scanline Amount";
> = 7.0;
uniform float MASK_DARK <
ui_type = "drag";
ui_min = 0.0;
ui_max = 1.0;
ui_step = 0.05;
ui_label = "Mask Effect Amount";
> = 0.5;
uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >;
uniform float BufferWidth < source = "bufferwidth"; >;
uniform float BufferHeight < source = "bufferheight"; >;
sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=LINEAR;MinFilter=LINEAR;};
struct ST_VertexOut
{
float2 invDims : TEXCOORD1;
};
// Vertex shader generating a triangle covering the entire screen
void VS_CRT_Geo_zFast(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out ST_VertexOut vVARS)
{
texcoord.x = (id == 2) ? 2.0 : 0.0;
texcoord.y = (id == 1) ? 2.0 : 0.0;
position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
vVARS.invDims = NormalizedNativePixelSize;
}
#define MSCL (BufferHeight > 1499.0 ? 0.3333 : 0.5)
// This compensates the scanline+mask embedded gamma from the beam dynamics
#define pwr ((1.0/((-0.0325*SCANLINE_WEIGHT+1.0)*(-0.311*MASK_DARK+1.0))-1.2).xxx)
// NTSC-J (D93) -> Rec709 D65 Joint Matrix (with D93 simulation)
// This is compensated for a linearization hack (RGB*RGB and then sqrt())
static const float3x3 P22D93 = float3x3(
1.00000, 0.00000, -0.06173,
0.07111, 0.96887, -0.01136,
0.00000, 0.08197, 1.07280);
// Returns gamma corrected output, compensated for scanline+mask embedded gamma
float3 inv_gamma(float3 col, float3 power)
{
float3 cir = col-1.0;
cir *= cir;
col = lerp(sqrt(col),sqrt(1.0-cir),power);
return col;
}
float2 Warp(float2 pos)
{
pos = pos*2.0-1.0;
pos *= float2(1.0 + (pos.y*pos.y)*0.0276, 1.0 + (pos.x*pos.x)*0.0414);
return pos*0.5 + 0.5;
}
float4 PS_CRT_Geo_zFast(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD0, in ST_VertexOut vVARS) : SV_Target
{
float2 pos = vTexCoord;
float2 xy = Warp(pos);
float2 corn = min(xy,1.0-xy); // This is used to mask the rounded
corn.x = 0.0001/corn.x; // corners later on
pos *= (1.0 - pos.xy);
float vig = pos.x * pos.y * 46.0;
vig = min(sqrt(vig), 1.0);
// Of all the pixels that are mapped onto the texel we are
// currently rendering, which pixel are we currently rendering?
float ratio_scale = xy.y / NormalizedNativePixelSize.y - 0.5;
// Snap to the center of the underlying texel.
float i = floor(ratio_scale) + 0.5;
// This is just like "Quilez Scaling" but sharper
float f = ratio_scale - i;
float Y = f*f;
float p = (i + 4.0*Y*f)*vVARS.invDims.y;
float whichmask = floor(vTexCoord.x*BufferWidth)*(-MSCL);
float mask = 1.0 + float(frac(whichmask) < MSCL)*(-MASK_DARK);
float3 colour = tex2D(sBackBuffer, float2(xy.x,p)).rgb;
colour = max(mul(P22D93 * vig, colour*colour), 0.0.xxx);
float scanLineWeight = (1.5 - SCANLINE_WEIGHT*(Y - Y*Y));
if (corn.y <= corn.x || corn.x < 0.0001 )
colour = 0.0.xxx;
return float4(inv_gamma(colour.rgb*lerp(scanLineWeight*mask, 1.0, colour.r*0.26667+colour.g*0.26667+colour.b*0.26667),pwr),1.0);
}
technique CRT_Geo_zFast
{
pass
{
VertexShader = VS_CRT_Geo_zFast;
PixelShader = PS_CRT_Geo_zFast;
}
}

View file

@ -1,654 +0,0 @@
#include "ReShade.fxh"
/*
CRT-interlaced
Copyright (C) 2010-2012 cgwg, Themaister and DOLLS
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your option)
any later version.
(cgwg gave their consent to have the original version of this shader
distributed under the GPL in this message:
http://board.byuu.org/viewtopic.php?p=26075#p26075
"Feel free to distribute my shaders under the GPL. After all, the
barrel distortion code was taken from the Curvature shader, which is
under the GPL."
)
This shader variant is pre-configured with screen curvature
*/
uniform float CRTgamma <
ui_type = "drag";
ui_min = 0.1;
ui_max = 5.0;
ui_step = 0.1;
ui_label = "CRTGeom Target Gamma";
> = 2.4;
uniform float monitorgamma <
ui_type = "drag";
ui_min = 0.1;
ui_max = 5.0;
ui_step = 0.1;
ui_label = "CRTGeom Monitor Gamma";
> = 2.2;
uniform float d <
ui_type = "drag";
ui_category = "Curvature";
ui_min = 0.1;
ui_max = 3.0;
ui_step = 0.1;
ui_label = "CRTGeom Distance";
> = 1.5;
uniform bool CURVATURE <
ui_category = "Curvature";
ui_type = "radio";
ui_label = "CRTGeom Curvature Toggle";
> = true;
uniform bool invert_aspect <
ui_type = "radio";
ui_category = "Curvature";
ui_label = "CRTGeom Curvature Aspect Inversion";
> = false;
uniform float R <
ui_type = "drag";
ui_category = "Curvature";
ui_min = 0.1;
ui_max = 10.0;
ui_step = 0.1;
ui_label = "CRTGeom Curvature Radius";
> = 2.0;
uniform float cornersize <
ui_type = "drag";
ui_category = "Curvature";
ui_min = 0.001;
ui_max = 1.0;
ui_step = 0.005;
ui_label = "CRTGeom Corner Size";
> = 0.03;
uniform float cornersmooth <
ui_type = "drag";
ui_category = "Curvature";
ui_min = 80.0;
ui_max = 2000.0;
ui_step = 100.0;
ui_label = "CRTGeom Corner Smoothness";
> = 1000.0;
uniform float x_tilt <
ui_type = "drag";
ui_category = "Curvature";
ui_min = -1.0;
ui_max = 1.0;
ui_step = 0.05;
ui_label = "CRTGeom Horizontal Tilt";
> = 0.0;
uniform float y_tilt <
ui_type = "drag";
ui_category = "Curvature";
ui_min = -1.0;
ui_max = 1.0;
ui_step = 0.05;
ui_label = "CRTGeom Vertical Tilt";
> = 0.0;
uniform float overscan_x <
ui_type = "drag";
ui_min = -125.0;
ui_max = 125.0;
ui_step = 0.5;
ui_label = "CRTGeom Horiz. Overscan %";
> = 100.0;
uniform float overscan_y <
ui_type = "drag";
ui_min = -125.0;
ui_max = 125.0;
ui_step = 0.5;
ui_label = "CRTGeom Vert. Overscan %";
> = 100.0;
uniform float centerx <
ui_type = "drag";
ui_min = -100.0;
ui_max = 100.0;
ui_step = 0.1;
ui_label = "Image Center X";
> = 0.00;
uniform float centery <
ui_type = "drag";
ui_min = -100.0;
ui_max = 100.0;
ui_step = 0.1;
ui_label = "Image Center Y";
> = 0.00;
uniform float DOTMASK <
ui_type = "drag";
ui_min = 0.0;
ui_max = 1.0;
ui_step = 0.05;
ui_label = "CRTGeom Dot Mask Strength";
> = 0.3;
uniform float SHARPER <
ui_type = "drag";
ui_min = 1.0;
ui_max = 3.0;
ui_step = 1.0;
ui_label = "CRTGeom Sharpness";
> = 1.0;
uniform float scanline_weight <
ui_type = "drag";
ui_min = 0.1;
ui_max = 0.5;
ui_step = 0.05;
ui_label = "CRTGeom Scanline Weight";
> = 0.3;
uniform bool vertical_scanlines <
ui_type = "radio";
ui_label = "CRTGeom Vertical Scanlines";
> = false;
uniform float lum <
ui_type = "drag";
ui_min = 0.0;
ui_max = 1.0;
ui_step = 0.01;
ui_label = "CRTGeom Luminance";
> = 0.0;
uniform float interlace_detect <
ui_type = "drag";
ui_min = 0.0;
ui_max = 1.0;
ui_step = 1.0;
ui_label = "CRTGeom Interlacing Simulation";
> = 1.0;
uniform float FrameCount < source = "framecount"; >;
uniform float2 BufferToViewportRatio < source = "buffer_to_viewport_ratio"; >;
uniform float2 InternalPixelSize < source = "internal_pixel_size"; >;
uniform float2 NativePixelSize < source = "native_pixel_size"; >;
uniform float2 NormalizedInternalPixelSize < source = "normalized_internal_pixel_size"; >;
uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >;
uniform float UpscaleMultiplier < source = "upscale_multiplier"; >;
uniform float2 ViewportSize < source = "viewportsize"; >;
uniform float ViewportWidth < source = "viewportwidth"; >;
uniform float ViewportHeight < source = "viewportheight"; >;
sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=POINT;MinFilter=POINT;};
// Comment the next line to disable interpolation in linear gamma (and
// gain speed).
#define LINEAR_PROCESSING
// Enable 3x oversampling of the beam profile; improves moire effect caused by scanlines+curvature
#define OVERSAMPLE
// Use the older, purely gaussian beam profile; uncomment for speed
//#define USEGAUSSIAN
// Macros.
#define FIX(c) max(abs(c), 1e-5);
#define PI 3.141592653589
#ifdef LINEAR_PROCESSING
# define TEX2D(c) pow(tex2D(sBackBuffer, (c)), float4(CRTgamma,CRTgamma,CRTgamma,CRTgamma))
#else
# define TEX2D(c) tex2D(sBackBuffer, (c))
#endif
// aspect ratio
#define aspect (invert_aspect==true?float2(ViewportHeight/ViewportWidth,1.0):float2(1.0,ViewportHeight/ViewportWidth))
#define overscan (float2(1.01,1.01));
struct ST_VertexOut
{
float2 sinangle : TEXCOORD1;
float2 cosangle : TEXCOORD2;
float3 stretch : TEXCOORD3;
float2 ilfac : TEXCOORD4;
float2 one : TEXCOORD5;
float mod_factor : TEXCOORD6;
float2 TextureSize : TEXCOORD7;
};
float vs_intersect(float2 xy, float2 sinangle, float2 cosangle)
{
float A = dot(xy,xy) + d*d;
float B = 2.0*(R*(dot(xy,sinangle)-d*cosangle.x*cosangle.y)-d*d);
float C = d*d + 2.0*R*d*cosangle.x*cosangle.y;
return (-B-sqrt(B*B-4.0*A*C))/(2.0*A);
}
float2 vs_bkwtrans(float2 xy, float2 sinangle, float2 cosangle)
{
float c = vs_intersect(xy, sinangle, cosangle);
float2 point = (float2(c, c)*xy - float2(-R, -R)*sinangle) / float2(R, R);
float2 poc = point/cosangle;
float2 tang = sinangle/cosangle;
float A = dot(tang, tang) + 1.0;
float B = -2.0*dot(poc, tang);
float C = dot(poc, poc) - 1.0;
float a = (-B + sqrt(B*B - 4.0*A*C))/(2.0*A);
float2 uv = (point - a*sinangle)/cosangle;
float r = FIX(R*acos(a));
return uv*r/sin(r/R);
}
float2 vs_fwtrans(float2 uv, float2 sinangle, float2 cosangle)
{
float r = FIX(sqrt(dot(uv,uv)));
uv *= sin(r/R)/r;
float x = 1.0-cos(r/R);
float D = d/R + x*cosangle.x*cosangle.y+dot(uv,sinangle);
return d*(uv*cosangle-x*sinangle)/D;
}
float3 vs_maxscale(float2 sinangle, float2 cosangle)
{
float2 c = vs_bkwtrans(-R * sinangle / (1.0 + R/d*cosangle.x*cosangle.y), sinangle, cosangle);
float2 a = float2(0.5,0.5)*aspect;
float2 lo = float2(vs_fwtrans(float2(-a.x, c.y), sinangle, cosangle).x,
vs_fwtrans(float2( c.x, -a.y), sinangle, cosangle).y)/aspect;
float2 hi = float2(vs_fwtrans(float2(+a.x, c.y), sinangle, cosangle).x,
vs_fwtrans(float2( c.x, +a.y), sinangle, cosangle).y)/aspect;
return float3((hi+lo)*aspect*0.5,max(hi.x-lo.x,hi.y-lo.y));
}
// Code snippet borrowed from crt-cyclon. (credits to DariusG)
float2 Warp(float2 pos)
{
pos = pos*2.0 - 1.0;
pos *= float2(1.0 + pos.y*pos.y*0, 1.0 + pos.x*pos.x*0);
pos = pos*0.5 + 0.5;
return pos;
}
// Vertex shader generating a triangle covering the entire screen
void VS_CRT_Geom(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out ST_VertexOut vVARS)
{
texcoord.x = (id == 2) ? 2.0 : 0.0;
texcoord.y = (id == 1) ? 2.0 : 0.0;
position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
// center screen
texcoord = Warp(texcoord - float2(centerx,centery)/100.0);
float2 SourceSize = 1.0/NormalizedNativePixelSize;
float2 OutputSize = ViewportSize*BufferToViewportRatio;
// Precalculate a bunch of useful values we'll need in the fragment
// shader.
vVARS.sinangle = sin(float2(x_tilt, y_tilt));
vVARS.cosangle = cos(float2(x_tilt, y_tilt));
vVARS.stretch = vs_maxscale(vVARS.sinangle, vVARS.cosangle);
if(vertical_scanlines == false)
{
vVARS.TextureSize = float2(SHARPER * SourceSize.x, SourceSize.y);
vVARS.ilfac = float2(1.0, clamp(floor(SourceSize.y/(interlace_detect > 0.5 ? 200.0 : 1000)), 1.0, 2.0));
// The size of one texel, in texture-coordinates.
vVARS.one = vVARS.ilfac / vVARS.TextureSize;
// Resulting X pixel-coordinate of the pixel we're drawing.
vVARS.mod_factor = texcoord.x * SourceSize.x * OutputSize.x / SourceSize.x;
}else{
vVARS.TextureSize = float2(SourceSize.x, SHARPER * SourceSize.y);
vVARS.ilfac = float2(clamp(floor(SourceSize.x/(interlace_detect > 0.5 ? 200.0 : 1000)), 1.0, 2.0), 1.0);
// The size of one texel, in texture-coordinates.
vVARS.one = vVARS.ilfac / vVARS.TextureSize;
// Resulting X pixel-coordinate of the pixel we're drawing.
vVARS.mod_factor = texcoord.y * SourceSize.y * OutputSize.y / SourceSize.y;
}
}
float intersect(float2 xy, float2 sinangle, float2 cosangle)
{
float A = dot(xy,xy) + d*d;
float B, C;
if(vertical_scanlines == false)
{
B = 2.0*(R*(dot(xy,sinangle) - d*cosangle.x*cosangle.y) - d*d);
C = d*d + 2.0*R*d*cosangle.x*cosangle.y;
}else{
B = 2.0*(R*(dot(xy,sinangle) - d*cosangle.y*cosangle.x) - d*d);
C = d*d + 2.0*R*d*cosangle.y*cosangle.x;
}
return (-B-sqrt(B*B - 4.0*A*C))/(2.0*A);
}
float2 bkwtrans(float2 xy, float2 sinangle, float2 cosangle)
{
float c = intersect(xy, sinangle, cosangle);
float2 point = (float2(c, c)*xy - float2(-R, -R)*sinangle) / float2(R, R);
float2 poc = point/cosangle;
float2 tang = sinangle/cosangle;
float A = dot(tang, tang) + 1.0;
float B = -2.0*dot(poc, tang);
float C = dot(poc, poc) - 1.0;
float a = (-B + sqrt(B*B - 4.0*A*C)) / (2.0*A);
float2 uv = (point - a*sinangle) / cosangle;
float r = FIX(R*acos(a));
return uv*r/sin(r/R);
}
float2 fwtrans(float2 uv, float2 sinangle, float2 cosangle)
{
float r = FIX(sqrt(dot(uv, uv)));
uv *= sin(r/R)/r;
float x = 1.0 - cos(r/R);
float D;
if(vertical_scanlines == false)
D = d/R + x*cosangle.x*cosangle.y + dot(uv,sinangle);
else
D = d/R + x*cosangle.y*cosangle.x + dot(uv,sinangle);
return d*(uv*cosangle - x*sinangle)/D;
}
float3 maxscale(float2 sinangle, float2 cosangle)
{
if(vertical_scanlines == false)
{
float2 c = bkwtrans(-R * sinangle / (1.0 + R/d*cosangle.x*cosangle.y), sinangle, cosangle);
float2 a = float2(0.5, 0.5)*aspect;
float2 lo = float2(fwtrans(float2(-a.x, c.y), sinangle, cosangle).x,
fwtrans(float2( c.x, -a.y), sinangle, cosangle).y)/aspect;
float2 hi = float2(fwtrans(float2(+a.x, c.y), sinangle, cosangle).x,
fwtrans(float2( c.x, +a.y), sinangle, cosangle).y)/aspect;
return float3((hi+lo)*aspect*0.5,max(hi.x-lo.x, hi.y-lo.y));
}else{
float2 c = bkwtrans(-R * sinangle / (1.0 + R/d*cosangle.y*cosangle.x), sinangle, cosangle);
float2 a = float2(0.5, 0.5)*aspect;
float2 lo = float2(fwtrans(float2(-a.y, c.x), sinangle, cosangle).y,
fwtrans(float2( c.y, -a.x), sinangle, cosangle).x)/aspect;
float2 hi = float2(fwtrans(float2(+a.y, c.x), sinangle, cosangle).y,
fwtrans(float2( c.y, +a.x), sinangle, cosangle).x)/aspect;
return float3((hi+lo)*aspect*0.5,max(hi.y-lo.y, hi.x-lo.x));
}
}
// Calculate the influence of a scanline on the current pixel.
//
// 'distance' is the distance in texture coordinates from the current
// pixel to the scanline in question.
// 'color' is the colour of the scanline at the horizontal location of
// the current pixel.
float4 scanlineWeights(float distance, float4 color)
{
// "wid" controls the width of the scanline beam, for each RGB
// channel The "weights" lines basically specify the formula
// that gives you the profile of the beam, i.e. the intensity as
// a function of distance from the vertical center of the
// scanline. In this case, it is gaussian if width=2, and
// becomes nongaussian for larger widths. Ideally this should
// be normalized so that the integral across the beam is
// independent of its width. That is, for a narrower beam
// "weights" should have a higher peak at the center of the
// scanline than for a wider beam.
#ifdef USEGAUSSIAN
float4 wid = 0.3 + 0.1 * pow(color, float4(3.0, 3.0, 3.0, 3.0));
float dsw = distance / scanline_weight;
float4 weights = float4(dsw, dsw, dsw, dsw);
return (lum + 0.4) * exp(-weights * weights) / wid;
#else
float4 wid = 2.0 + 2.0 * pow(color, float4(4.0, 4.0, 4.0, 4.0));
float dsw = distance / scanline_weight;
float4 weights = float4(dsw, dsw, dsw, dsw);
return (lum + 1.4) * exp(-pow(weights * rsqrt(0.5 * wid), wid)) / (0.6 + 0.2 * wid);
#endif
}
float2 transform(float2 coord, float2 sinangle, float2 cosangle, float3 stretch)
{
coord = (coord - float2(0.5, 0.5))*aspect*stretch.z + stretch.xy;
return (bkwtrans(coord, sinangle, cosangle) /
float2(overscan_x / 100.0, overscan_y / 100.0)/aspect + float2(0.5, 0.5));
}
float corner(float2 coord)
{
coord = min(coord, float2(1.0, 1.0) - coord) * aspect;
float2 cdist = float2(cornersize, cornersize);
coord = (cdist - min(coord, cdist));
float dist = sqrt(dot(coord, coord));
if(vertical_scanlines == false)
return clamp((cdist.x - dist)*cornersmooth, 0.0, 1.0);
else
return clamp((cdist.y - dist)*cornersmooth, 0.0, 1.0);
}
float fwidth(float value){
return abs(ddx(value)) + abs(ddy(value));
}
float4 PS_CRT_Geom(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in ST_VertexOut vVARS) : SV_Target
{
// Here's a helpful diagram to keep in mind while trying to
// understand the code:
//
// | | | | |
// -------------------------------
// | | | | |
// | 01 | 11 | 21 | 31 | <-- current scanline
// | | @ | | |
// -------------------------------
// | | | | |
// | 02 | 12 | 22 | 32 | <-- next scanline
// | | | | |
// -------------------------------
// | | | | |
//
// Each character-cell represents a pixel on the output
// surface, "@" represents the current pixel (always somewhere
// in the bottom half of the current scan-line, or the top-half
// of the next scanline). The grid of lines represents the
// edges of the texels of the underlying texture.
// Texture coordinates of the texel containing the active pixel.
float2 xy;
if (CURVATURE == true)
xy = transform(vTexCoord, vVARS.sinangle, vVARS.cosangle, vVARS.stretch);
else
xy = vTexCoord;
float cval = corner((xy-float2(0.5,0.5)) * BufferToViewportRatio + float2(0.5,0.5));
// Of all the pixels that are mapped onto the texel we are
// currently rendering, which pixel are we currently rendering?
float2 ilvec;
if(vertical_scanlines == false)
ilvec = float2(0.0, vVARS.ilfac.y * interlace_detect > 1.5 ? (float(FrameCount) % 2.0) : 0.0);
else
ilvec = float2(vVARS.ilfac.x * interlace_detect > 1.5 ? (float(FrameCount) % 2.0) : 0.0, 0.0);
float2 ratio_scale = (xy * vVARS.TextureSize - float2(0.5, 0.5) + ilvec) / vVARS.ilfac;
float2 uv_ratio = frac(ratio_scale);
// Snap to the center of the underlying texel.
xy = (floor(ratio_scale)*vVARS.ilfac + float2(0.5, 0.5) - ilvec) / vVARS.TextureSize;
// Calculate Lanczos scaling coefficients describing the effect
// of various neighbour texels in a scanline on the current
// pixel.
float4 coeffs;
if(vertical_scanlines == false)
coeffs = PI * float4(1.0 + uv_ratio.x, uv_ratio.x, 1.0 - uv_ratio.x, 2.0 - uv_ratio.x);
else
coeffs = PI * float4(1.0 + uv_ratio.y, uv_ratio.y, 1.0 - uv_ratio.y, 2.0 - uv_ratio.y);
// Prevent division by zero.
coeffs = FIX(coeffs);
// Lanczos2 kernel.
coeffs = 2.0 * sin(coeffs) * sin(coeffs / 2.0) / (coeffs * coeffs);
// Normalize.
coeffs /= dot(coeffs, float4(1.0, 1.0, 1.0, 1.0));
// Calculate the effective colour of the current and next
// scanlines at the horizontal location of the current pixel,
// using the Lanczos coefficients above.
float4 col, col2;
if(vertical_scanlines == false)
{
col = clamp(
mul(coeffs, float4x4(
TEX2D(xy + float2(-vVARS.one.x, 0.0)),
TEX2D(xy),
TEX2D(xy + float2(vVARS.one.x, 0.0)),
TEX2D(xy + float2(2.0 * vVARS.one.x, 0.0))
)),
0.0, 1.0
);
col2 = clamp(
mul(coeffs, float4x4(
TEX2D(xy + float2(-vVARS.one.x, vVARS.one.y)),
TEX2D(xy + float2(0.0, vVARS.one.y)),
TEX2D(xy + vVARS.one),
TEX2D(xy + float2(2.0 * vVARS.one.x, vVARS.one.y))
)),
0.0, 1.0
);
}else{
col = clamp(
mul(coeffs, float4x4(
TEX2D(xy + float2(0.0, -vVARS.one.y)),
TEX2D(xy),
TEX2D(xy + float2(0.0, vVARS.one.y)),
TEX2D(xy + float2(0.0, 2.0 * vVARS.one.y))
)),
0.0, 1.0
);
col2 = clamp(
mul(coeffs, float4x4(
TEX2D(xy + float2(vVARS.one.x, -vVARS.one.y)),
TEX2D(xy + float2(vVARS.one.x, 0.0)),
TEX2D(xy + vVARS.one),
TEX2D(xy + float2(vVARS.one.x, 2.0 * vVARS.one.y))
)),
0.0, 1.0
);
}
#ifndef LINEAR_PROCESSING
col = pow(col , float4(CRTgamma, CRTgamma, CRTgamma, CRTgamma));
col2 = pow(col2, float4(CRTgamma, CRTgamma, CRTgamma, CRTgamma));
#endif
// Calculate the influence of the current and next scanlines on
// the current pixel.
float4 weights, weights2;
if(vertical_scanlines == false)
{
weights = scanlineWeights(uv_ratio.y, col);
weights2 = scanlineWeights(1.0 - uv_ratio.y, col2);
#ifdef OVERSAMPLE
float filter = fwidth(ratio_scale.y);
uv_ratio.y = uv_ratio.y + 1.0/3.0*filter;
weights = (weights + scanlineWeights(uv_ratio.y, col))/3.0;
weights2 = (weights2 + scanlineWeights(abs(1.0 - uv_ratio.y), col2))/3.0;
uv_ratio.y = uv_ratio.y - 2.0/3.0*filter;
weights = weights + scanlineWeights(abs(uv_ratio.y), col)/3.0;
weights2 = weights2 + scanlineWeights(abs(1.0 - uv_ratio.y), col2)/3.0;
#endif
}else{
weights = scanlineWeights(uv_ratio.x, col);
weights2 = scanlineWeights(1.0 - uv_ratio.x, col2);
#ifdef OVERSAMPLE
float filter = fwidth(ratio_scale.x);
uv_ratio.x = uv_ratio.x + 1.0/3.0*filter;
weights = (weights + scanlineWeights(uv_ratio.x, col))/3.0;
weights2 = (weights2 + scanlineWeights(abs(1.0 - uv_ratio.x), col2))/3.0;
uv_ratio.x = uv_ratio.x - 2.0/3.0*filter;
weights = weights + scanlineWeights(abs(uv_ratio.x), col)/3.0;
weights2 = weights2 + scanlineWeights(abs(1.0 - uv_ratio.x), col2)/3.0;
#endif
}
float3 mul_res = (col * weights + col2 * weights2).rgb;
mul_res *= float3(cval, cval, cval);
// dot-mask emulation:
// Output pixels are alternately tinted green and magenta.
float3 dotMaskWeights = lerp(
float3(1.0, 1.0 - DOTMASK, 1.0),
float3(1.0 - DOTMASK, 1.0, 1.0 - DOTMASK),
floor((vVARS.mod_factor % 2.0))
);
mul_res *= dotMaskWeights;
// Convert the image gamma for display on our output device.
mul_res = pow(mul_res, float3(1.0 / monitorgamma, 1.0 / monitorgamma, 1.0 / monitorgamma));
return float4(mul_res, 1.0);
}
technique CRT_Geom
{
pass
{
VertexShader = VS_CRT_Geom;
PixelShader = PS_CRT_Geom;
}
}

View file

@ -1,521 +0,0 @@
#include "ReShade.fxh"
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
// Ported to Duckstation (ReShade specs) by Hyllian (2024).
// Set shader params for all passes here:
uniform float crt_gamma <
ui_type = "drag";
ui_min = 1.0;
ui_max = 5.0;
ui_step = 0.025;
ui_label = "Simulated CRT Gamma";
ui_category = "Display Settings";
> = 2.5;
uniform float lcd_gamma <
ui_type = "drag";
ui_min = 1.0;
ui_max = 5.0;
ui_step = 0.025;
ui_label = "Your Display Gamma";
ui_category = "Display Settings";
> = 2.2;
uniform float levels_contrast <
ui_type = "drag";
ui_min = 0.0;
ui_max = 4.0;
ui_step = 0.015625;
ui_label = "Contrast";
ui_category = "Display Settings";
> = 1.0;
uniform float halation_weight <
ui_type = "drag";
ui_min = 0.0;
ui_max = 1.0;
ui_step = 0.005;
ui_label = "Halation Weight";
ui_category = "Effects";
> = 0.0;
uniform float diffusion_weight <
ui_type = "drag";
ui_min = 0.0;
ui_max = 1.0;
ui_step = 0.005;
ui_label = "Diffusion Weight";
ui_category = "Effects";
> = 0.075;
uniform float bloom_underestimate_levels <
ui_type = "drag";
ui_min = 0.0;
ui_max = 5.0;
ui_step = 0.01;
ui_label = "Bloom - Underestimate Levels";
ui_category = "Effects";
> = 0.8;
uniform float bloom_excess <
ui_type = "drag";
ui_min = 0.0;
ui_max = 1.0;
ui_step = 0.005;
ui_label = "Bloom - Excess";
ui_category = "Effects";
> = 0.0;
uniform float beam_min_sigma <
ui_type = "drag";
ui_min = 0.005;
ui_max = 1.0;
ui_step = 0.005;
ui_label = "Min Sigma";
ui_category = "Beam Dynamics";
> = 0.02;
uniform float beam_max_sigma <
ui_type = "drag";
ui_min = 0.005;
ui_max = 1.0;
ui_step = 0.005;
ui_label = "Max Sigma";
ui_category = "Beam Dynamics";
> = 0.3;
uniform float beam_spot_power <
ui_type = "drag";
ui_min = 0.01;
ui_max = 16.0;
ui_step = 0.01;
ui_label = "Spot Power";
ui_category = "Beam Dynamics";
> = 0.33;
uniform float beam_min_shape <
ui_type = "drag";
ui_min = 2.0;
ui_max = 32.0;
ui_step = 0.1;
ui_label = "Min Shape";
ui_category = "Beam Dynamics";
> = 2.0;
uniform float beam_max_shape <
ui_type = "drag";
ui_min = 2.0;
ui_max = 32.0;
ui_step = 0.1;
ui_label = "Max Shape";
ui_category = "Beam Dynamics";
> = 4.0;
uniform float beam_shape_power <
ui_type = "drag";
ui_min = 0.01;
ui_max = 16.0;
ui_step = 0.01;
ui_label = "Shape Power";
ui_category = "Beam Dynamics";
> = 0.25;
uniform int beam_horiz_filter <
ui_type = "combo";
ui_items = "Quilez\0Gaussian\0Lanczos\0";
ui_label = "Horizontal Filter";
ui_category = "Beam Dynamics";
> = 0;
uniform float beam_horiz_sigma <
ui_type = "drag";
ui_min = 0.0;
ui_max = 0.67;
ui_step = 0.005;
ui_label = "Horizontal Sigma";
ui_category = "Beam Dynamics";
> = 0.35;
uniform float beam_horiz_linear_rgb_weight <
ui_type = "drag";
ui_min = 0.0;
ui_max = 1.0;
ui_step = 0.01;
ui_label = "Horiz Linear RGB Weight";
ui_category = "Beam Dynamics";
> = 1.0;
uniform float convergence_offset_x_r <
ui_type = "drag";
ui_min = -4.0;
ui_max = 4.0;
ui_step = 0.05;
ui_label = "Offset X Red";
ui_category = "Convergence";
> = 0.0;
uniform float convergence_offset_x_g <
ui_type = "drag";
ui_min = -4.0;
ui_max = 4.0;
ui_step = 0.05;
ui_label = "Offset X Green";
ui_category = "Convergence";
> = 0.0;
uniform float convergence_offset_x_b <
ui_type = "drag";
ui_min = -4.0;
ui_max = 4.0;
ui_step = 0.05;
ui_label = "Offset X Blue";
ui_category = "Convergence";
> = 0.0;
uniform float convergence_offset_y_r <
ui_type = "drag";
ui_min = -2.0;
ui_max = 2.0;
ui_step = 0.05;
ui_label = "Offset Y Red";
ui_category = "Convergence";
> = 0.0;
uniform float convergence_offset_y_g <
ui_type = "drag";
ui_min = -2.0;
ui_max = 2.0;
ui_step = 0.05;
ui_label = "Offset Y Green";
ui_category = "Convergence";
> = 0.0;
uniform float convergence_offset_y_b <
ui_type = "drag";
ui_min = -2.0;
ui_max = 2.0;
ui_step = 0.05;
ui_label = "Offset Y Blue";
ui_category = "Convergence";
> = 0.0;
uniform int mask_type <
ui_type = "combo";
ui_items = "Aperture Grille\0Slot Mask\0Shadow Mask\0";
ui_label = "Type";
ui_category = "Mask";
> = 0;
uniform float mask_sample_mode_desired <
ui_type = "drag";
ui_min = 0.0;
ui_max = 2.0;
ui_step = 1.;
ui_label = "Sample Mode";
ui_category = "Mask";
> = 0.0;
uniform float mask_specify_num_triads <
ui_type = "drag";
ui_min = 0.0;
ui_max = 1.0;
ui_step = 1.0;
ui_label = "Specify Number of Triads";
ui_category = "Mask";
> = 0.0;
uniform float mask_triad_size_desired <
ui_type = "drag";
ui_min = 1.0;
ui_max = 18.0;
ui_step = 0.125;
ui_label = "Triad Size Desired";
ui_category = "Mask";
> = 3.0;
uniform float mask_num_triads_desired <
ui_type = "drag";
ui_min = 342.0;
ui_max = 1920.0;
ui_step = 1.0;
ui_label = "Number of Triads Desired";
ui_category = "Mask";
> = 480.0;
uniform bool interlace_detect <
ui_type = "radio";
ui_label = "Enable Interlacing Detection";
ui_category = "Interlacing";
> = true;
uniform bool interlace_bff <
ui_type = "radio";
ui_label = "Bottom Field First";
ui_category = "Interlacing";
> = false;
uniform bool interlace_1080i <
ui_type = "radio";
ui_label = "Detect 1080i";
ui_category = "Interlacing";
> = false;
uniform float FrameCount < source = "framecount"; >;
uniform float2 BufferToViewportRatio < source = "buffer_to_viewport_ratio"; >;
uniform float2 InternalPixelSize < source = "internal_pixel_size"; >;
uniform float2 NativePixelSize < source = "native_pixel_size"; >;
uniform float2 NormalizedInternalPixelSize < source = "normalized_internal_pixel_size"; >;
uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >;
uniform float UpscaleMultiplier < source = "upscale_multiplier"; >;
uniform float2 ViewportSize < source = "viewportsize"; >;
uniform float ViewportWidth < source = "viewportwidth"; >;
uniform float ViewportHeight < source = "viewportheight"; >;
#include "../misc/include/geom.fxh"
#define VIEWPORT_SIZE (ViewportSize*BufferToViewportRatio)
#define TEXTURE_SIZE (1.0/NormalizedNativePixelSize)
#define ORIG_LINEARIZED_texture_size TEXTURE_SIZE
#define VERTICAL_SCANLINES_texture_size TEXTURE_SIZE
#define BLOOM_APPROX_texture_size TEXTURE_SIZE
#define BLUR9FAST_VERTICAL_texture_size TEXTURE_SIZE
#define HALATION_BLUR_texture_size TEXTURE_SIZE
#define MASK_RESIZE_VERT_texture_size TEXTURE_SIZE
#define MASK_RESIZE_texture_size float2(64.0,0.0625*((VIEWPORT_SIZE).y))
#define MASKED_SCANLINES_texture_size (0.0625*VIEWPORT_SIZE)
#define BRIGHTPASS_texture_size VIEWPORT_SIZE
#define BLOOM_VERTICAL_texture_size VIEWPORT_SIZE
#define BLOOM_HORIZONTAL_texture_size VIEWPORT_SIZE
#define ORIG_LINEARIZED_video_size ORIG_LINEARIZED_texture_size
#define VERTICAL_SCANLINES_video_size VERTICAL_SCANLINES_texture_size
#define BLOOM_APPROX_video_size BLOOM_APPROX_texture_size
#define BLUR9FAST_VERTICAL_video_size BLUR9FAST_VERTICAL_texture_size
#define HALATION_BLUR_video_size HALATION_BLUR_texture_size
#define MASK_RESIZE_VERT_video_size MASK_RESIZE_VERT_texture_size
#define MASK_RESIZE_video_size MASK_RESIZE_texture_size
#define MASKED_SCANLINES_video_size MASKED_SCANLINES_texture_size
#define BRIGHTPASS_video_size BRIGHTPASS_texture_size
#define BLOOM_VERTICAL_video_size BLOOM_VERTICAL_texture_size
#define BLOOM_HORIZONTAL_video_size BLOOM_HORIZONTAL_texture_size
#define video_size texture_size
texture2D tmask_grille_texture_small < source = "crt-royale/TileableLinearApertureGrille15Wide8And5d5SpacingResizeTo64.png"; > {Width=64.0;Height=64.0;MipLevels=0;};
texture2D tmask_slot_texture_small < source = "crt-royale/TileableLinearSlotMaskTall15Wide9And4d5Horizontal9d14VerticalSpacingResizeTo64.png"; > {Width=64.0;Height=64.0;MipLevels=0;};
texture2D tmask_shadow_texture_small < source = "crt-royale/TileableLinearShadowMaskEDPResizeTo64.png"; > {Width=64.0;Height=64.0;MipLevels=0;};
texture2D tmask_grille_texture_large < source = "crt-royale/TileableLinearApertureGrille15Wide8And5d5Spacing.png"; > {Width=512.0;Height=512.0;MipLevels=4;};
texture2D tmask_slot_texture_large < source = "crt-royale/TileableLinearSlotMaskTall15Wide9And4d5Horizontal9d14VerticalSpacing.png"; > {Width=512.0;Height=512.0;MipLevels=4;};
texture2D tmask_shadow_texture_large < source = "crt-royale/TileableLinearShadowMaskEDP.png"; > {Width=512.0;Height=512.0;MipLevels=4;};
sampler2D mask_grille_texture_small { Texture = tmask_grille_texture_small; AddressU = REPEAT; AddressV = REPEAT; MinFilter = POINT; MagFilter = POINT;};
sampler2D mask_slot_texture_small { Texture = tmask_slot_texture_small; AddressU = REPEAT; AddressV = REPEAT; MinFilter = POINT; MagFilter = POINT;};
sampler2D mask_shadow_texture_small { Texture = tmask_shadow_texture_small; AddressU = REPEAT; AddressV = REPEAT; MinFilter = POINT; MagFilter = POINT;};
sampler2D mask_grille_texture_large { Texture = tmask_grille_texture_large; AddressU = REPEAT; AddressV = REPEAT; MinFilter = POINT; MagFilter = POINT;};
sampler2D mask_slot_texture_large { Texture = tmask_slot_texture_large; AddressU = REPEAT; AddressV = REPEAT; MinFilter = POINT; MagFilter = POINT;};
sampler2D mask_shadow_texture_large { Texture = tmask_shadow_texture_large; AddressU = REPEAT; AddressV = REPEAT; MinFilter = POINT; MagFilter = POINT;};
#ifndef DEBUG_PASSES
#define DEBUG_PASSES 11
#endif
texture2D tORIG_LINEARIZED{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA16f;};
sampler2D ORIG_LINEARIZED{Texture=tORIG_LINEARIZED;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;};
#if (DEBUG_PASSES > 1)
texture2D tVERTICAL_SCANLINES{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA16f;};
sampler2D VERTICAL_SCANLINES{Texture=tVERTICAL_SCANLINES;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;};
#endif
#if (DEBUG_PASSES > 2)
texture2D tBLOOM_APPROX{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA16f;};
sampler2D BLOOM_APPROX{Texture=tBLOOM_APPROX;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;};
#endif
#if (DEBUG_PASSES > 3)
// Need checking if it's really necessary to rendertarget.
texture2D tBLUR9FAST_VERTICAL{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA16f;};
sampler2D BLUR9FAST_VERTICAL{Texture=tBLUR9FAST_VERTICAL;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;};
#endif
#if (DEBUG_PASSES > 4)
texture2D tHALATION_BLUR{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA16f;};
sampler2D HALATION_BLUR{Texture=tHALATION_BLUR;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;};
#endif
#if (DEBUG_PASSES > 5)
texture2D tMASK_RESIZE_VERTICAL{Width=64.0;Height=BUFFER_HEIGHT*0.0625;Format=RGBA8;};
sampler2D MASK_RESIZE_VERTICAL{Texture=tMASK_RESIZE_VERTICAL;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=POINT;MinFilter=POINT;};
#endif
#if (DEBUG_PASSES > 6)
texture2D tMASK_RESIZE{Width=BUFFER_WIDTH*0.0625;Height=BUFFER_HEIGHT*0.0625;Format=RGBA8;};
sampler2D MASK_RESIZE{Texture=tMASK_RESIZE;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=POINT;MinFilter=POINT;};
#endif
#if (DEBUG_PASSES > 7)
texture2D tMASKED_SCANLINES{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA16f;};
sampler2D MASKED_SCANLINES{Texture=tMASKED_SCANLINES;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;};
#endif
#if (DEBUG_PASSES > 8)
texture2D tBRIGHTPASS{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA16f;};
sampler2D BRIGHTPASS{Texture=tBRIGHTPASS;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;};
#endif
#if (DEBUG_PASSES > 9)
texture2D tBLOOM_VERTICAL{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA16f;};
sampler2D BLOOM_VERTICAL{Texture=tBLOOM_VERTICAL;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;};
#endif
#include "crt-royale/src/crt-royale-first-pass-linearize-crt-gamma-bob-fields.fxh"
#if (DEBUG_PASSES > 1)
#include "crt-royale/src/crt-royale-scanlines-vertical-interlacing.fxh"
#endif
#if (DEBUG_PASSES > 2)
#include "crt-royale/src/crt-royale-bloom-approx.fxh"
#endif
#if (DEBUG_PASSES > 3)
#include "crt-royale/src/blur9fast-vertical.fxh"
#endif
#if (DEBUG_PASSES > 4)
#include "crt-royale/src/blur9fast-horizontal.fxh"
#endif
#if (DEBUG_PASSES > 5)
#include "crt-royale/src/crt-royale-mask-resize-vertical.fxh"
#endif
#if (DEBUG_PASSES > 6)
#include "crt-royale/src/crt-royale-mask-resize-horizontal.fxh"
#endif
#if (DEBUG_PASSES > 7)
#include "crt-royale/src/crt-royale-scanlines-horizontal-apply-mask.fxh"
#endif
#if (DEBUG_PASSES > 8)
#include "crt-royale/src/crt-royale-brightpass.fxh"
#endif
#if (DEBUG_PASSES > 9)
#include "crt-royale/src/crt-royale-bloom-vertical.fxh"
#endif
#if (DEBUG_PASSES > 10)
#include "crt-royale/src/crt-royale-bloom-horizontal-reconstitute.fxh"
#endif
technique CRT_Royale
{
pass
{
VertexShader = VS_Linearize;
PixelShader = PS_Linearize;
RenderTarget = tORIG_LINEARIZED;
}
#if (DEBUG_PASSES > 1)
pass
{
VertexShader = VS_Scanlines_Vertical_Interlacing;
PixelShader = PS_Scanlines_Vertical_Interlacing;
RenderTarget = tVERTICAL_SCANLINES;
}
#endif
#if (DEBUG_PASSES > 2)
pass
{
VertexShader = VS_Bloom_Approx;
PixelShader = PS_Bloom_Approx;
RenderTarget = tBLOOM_APPROX;
}
#endif
#if (DEBUG_PASSES > 3)
pass
{
VertexShader = VS_Blur9Fast_Vertical;
PixelShader = PS_Blur9Fast_Vertical;
RenderTarget = tBLUR9FAST_VERTICAL;
}
#endif
#if (DEBUG_PASSES > 4)
pass
{
VertexShader = VS_Blur9Fast_Horizontal;
PixelShader = PS_Blur9Fast_Horizontal;
RenderTarget = tHALATION_BLUR;
}
#endif
#if (DEBUG_PASSES > 5)
pass
{
VertexShader = VS_Mask_Resize_Vertical;
PixelShader = PS_Mask_Resize_Vertical;
RenderTarget = tMASK_RESIZE_VERTICAL;
}
#endif
#if (DEBUG_PASSES > 6)
pass
{
VertexShader = VS_Mask_Resize_Horizontal;
PixelShader = PS_Mask_Resize_Horizontal;
RenderTarget = tMASK_RESIZE;
}
#endif
#if (DEBUG_PASSES > 7)
pass
{
VertexShader = VS_Scanlines_Horizontal_Apply_Mask;
PixelShader = PS_Scanlines_Horizontal_Apply_Mask;
RenderTarget = tMASKED_SCANLINES;
}
#endif
#if (DEBUG_PASSES > 8)
pass
{
VertexShader = VS_Brightpass;
PixelShader = PS_Brightpass;
RenderTarget = tBRIGHTPASS;
}
#endif
#if (DEBUG_PASSES > 9)
pass
{
VertexShader = VS_Bloom_Vertical;
PixelShader = PS_Bloom_Vertical;
RenderTarget = tBLOOM_VERTICAL;
}
#endif
#if (DEBUG_PASSES > 10)
pass
{
VertexShader = VS_Bloom_Horizontal;
PixelShader = PS_Bloom_Horizontal;
}
#endif
}

View file

@ -1,280 +0,0 @@
GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
License is intended to guarantee your freedom to share and change free
software--to make sure the software is free for all its users. This
General Public License applies to most of the Free Software
Foundation's software and to any other program whose authors commit to
using it. (Some other Free Software Foundation software is covered by
the GNU Lesser General Public License instead.) You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
this service if you wish), that you receive source code or can get it
if you want it, that you can change the software or use pieces of it
in new free programs; and that you know you can do these things.
To protect your rights, we need to make restrictions that forbid
anyone to deny you these rights or to ask you to surrender the rights.
These restrictions translate to certain responsibilities for you if you
distribute copies of the software, or if you modify it.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must give the recipients all the rights that
you have. You must make sure that they, too, receive or can get the
source code. And you must show them these terms so they know their
rights.
We protect your rights with two steps: (1) copyright the software, and
(2) offer you this license which gives you legal permission to copy,
distribute and/or modify the software.
Also, for each author's protection and ours, we want to make certain
that everyone understands that there is no warranty for this free
software. If the software is modified by someone else and passed on, we
want its recipients to know that what they have is not the original, so
that any problems introduced by others will not reflect on the original
authors' reputations.
Finally, any free program is threatened constantly by software
patents. We wish to avoid the danger that redistributors of a free
program will individually obtain patent licenses, in effect making the
program proprietary. To prevent this, we have made it clear that any
patent must be licensed for everyone's free use or not licensed at all.
The precise terms and conditions for copying, distribution and
modification follow.
GNU GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License applies to any program or other work which contains
a notice placed by the copyright holder saying it may be distributed
under the terms of this General Public License. The "Program", below,
refers to any such program or work, and a "work based on the Program"
means either the Program or any derivative work under copyright law:
that is to say, a work containing the Program or a portion of it,
either verbatim or with modifications and/or translated into another
language. (Hereinafter, translation is included without limitation in
the term "modification".) Each licensee is addressed as "you".
Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope. The act of
running the Program is not restricted, and the output from the Program
is covered only if its contents constitute a work based on the
Program (independent of having been made by running the Program).
Whether that is true depends on what the Program does.
1. You may copy and distribute verbatim copies of the Program's
source code as you receive it, in any medium, provided that you
conspicuously and appropriately publish on each copy an appropriate
copyright notice and disclaimer of warranty; keep intact all the
notices that refer to this License and to the absence of any warranty;
and give any other recipients of the Program a copy of this License
along with the Program.
You may charge a fee for the physical act of transferring a copy, and
you may at your option offer warranty protection in exchange for a fee.
2. You may modify your copy or copies of the Program or any portion
of it, thus forming a work based on the Program, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a) You must cause the modified files to carry prominent notices
stating that you changed the files and the date of any change.
b) You must cause any work that you distribute or publish, that in
whole or in part contains or is derived from the Program or any
part thereof, to be licensed as a whole at no charge to all third
parties under the terms of this License.
c) If the modified program normally reads commands interactively
when run, you must cause it, when started running for such
interactive use in the most ordinary way, to print or display an
announcement including an appropriate copyright notice and a
notice that there is no warranty (or else, saying that you provide
a warranty) and that users may redistribute the program under
these conditions, and telling the user how to view a copy of this
License. (Exception: if the Program itself is interactive but
does not normally print such an announcement, your work based on
the Program is not required to print an announcement.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Program,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works. But when you
distribute the same sections as part of a whole which is a work based
on the Program, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote it.
Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Program.
In addition, mere aggregation of another work not based on the Program
with the Program (or with a work based on the Program) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.
3. You may copy and distribute the Program (or a work based on it,
under Section 2) in object code or executable form under the terms of
Sections 1 and 2 above provided that you also do one of the following:
a) Accompany it with the complete corresponding machine-readable
source code, which must be distributed under the terms of Sections
1 and 2 above on a medium customarily used for software interchange; or,
b) Accompany it with a written offer, valid for at least three
years, to give any third party, for a charge no more than your
cost of physically performing source distribution, a complete
machine-readable copy of the corresponding source code, to be
distributed under the terms of Sections 1 and 2 above on a medium
customarily used for software interchange; or,
c) Accompany it with the information you received as to the offer
to distribute corresponding source code. (This alternative is
allowed only for noncommercial distribution and only if you
received the program in object code or executable form with such
an offer, in accord with Subsection b above.)
The source code for a work means the preferred form of the work for
making modifications to it. For an executable work, complete source
code means all the source code for all modules it contains, plus any
associated interface definition files, plus the scripts used to
control compilation and installation of the executable. However, as a
special exception, the source code distributed need not include
anything that is normally distributed (in either source or binary
form) with the major components (compiler, kernel, and so on) of the
operating system on which the executable runs, unless that component
itself accompanies the executable.
If distribution of executable or object code is made by offering
access to copy from a designated place, then offering equivalent
access to copy the source code from the same place counts as
distribution of the source code, even though third parties are not
compelled to copy the source along with the object code.
4. You may not copy, modify, sublicense, or distribute the Program
except as expressly provided under this License. Any attempt
otherwise to copy, modify, sublicense or distribute the Program is
void, and will automatically terminate your rights under this License.
However, parties who have received copies, or rights, from you under
this License will not have their licenses terminated so long as such
parties remain in full compliance.
5. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify or
distribute the Program or its derivative works. These actions are
prohibited by law if you do not accept this License. Therefore, by
modifying or distributing the Program (or any work based on the
Program), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Program or works based on it.
6. Each time you redistribute the Program (or any work based on the
Program), the recipient automatically receives a license from the
original licensor to copy, distribute or modify the Program subject to
these terms and conditions. You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties to
this License.
7. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Program at all. For example, if a patent
license would not permit royalty-free redistribution of the Program by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Program.
If any portion of this section is held invalid or unenforceable under
any particular circumstance, the balance of the section is intended to
apply and the section as a whole is intended to apply in other
circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system, which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
8. If the distribution and/or use of the Program is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Program under this License
may add an explicit geographical distribution limitation excluding
those countries, so that distribution is permitted only in or among
countries not thus excluded. In such case, this License incorporates
the limitation as if written in the body of this License.
9. The Free Software Foundation may publish revised and/or new versions
of the General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the Program
specifies a version number of this License which applies to it and "any
later version", you have the option of following the terms and conditions
either of that version or of any later version published by the Free
Software Foundation. If the Program does not specify a version number of
this License, you may choose any version ever published by the Free Software
Foundation.
10. If you wish to incorporate parts of the Program into other free
programs whose distribution conditions are different, write to the author
to ask for permission. For software which is copyrighted by the Free
Software Foundation, write to the Free Software Foundation; we sometimes
make exceptions for this. Our decision will be guided by the two goals
of preserving the free status of all derivatives of our free software and
of promoting the sharing and reuse of software generally.
NO WARRANTY
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.
END OF TERMS AND CONDITIONS

View file

@ -1,249 +0,0 @@
#ifndef BIND_SHADER_PARAMS_H
#define BIND_SHADER_PARAMS_H
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
///////////////////////////// SETTINGS MANAGEMENT ////////////////////////////
#include "helper-functions-and-macros.fxh"
#include "user-settings.fxh"
#include "derived-settings-and-constants.fxh"
// Override some parameters for gamma-management.h and tex2Dantialias.h:
#define OVERRIDE_DEVICE_GAMMA
static const float gba_gamma = 3.5; // Irrelevant but necessary to define.
#define ANTIALIAS_OVERRIDE_BASICS
#define ANTIALIAS_OVERRIDE_PARAMETERS
// Disable runtime shader params if the user doesn't explicitly want them.
// Static constants will be defined in place of uniforms of the same name.
#ifndef RUNTIME_SHADER_PARAMS_ENABLE
#undef PARAMETER_UNIFORM
#endif
// Bind option names to shader parameter uniforms or static constants.
#ifdef PARAMETER_UNIFORM
uniform float crt_gamma;
uniform float lcd_gamma;
uniform float levels_contrast;
uniform float halation_weight;
uniform float diffusion_weight;
uniform float bloom_underestimate_levels;
uniform float bloom_excess;
uniform float beam_min_sigma;
uniform float beam_max_sigma;
uniform float beam_spot_power;
uniform float beam_min_shape;
uniform float beam_max_shape;
uniform float beam_shape_power;
uniform float beam_horiz_sigma;
#ifdef RUNTIME_SCANLINES_HORIZ_FILTER_COLORSPACE
uniform float beam_horiz_filter;
uniform float beam_horiz_linear_rgb_weight;
#else
static const float beam_horiz_filter = clamp(beam_horiz_filter_static, 0.0, 2.0);
static const float beam_horiz_linear_rgb_weight = clamp(beam_horiz_linear_rgb_weight_static, 0.0, 1.0);
#endif
uniform float convergence_offset_x_r;
uniform float convergence_offset_x_g;
uniform float convergence_offset_x_b;
uniform float convergence_offset_y_r;
uniform float convergence_offset_y_g;
uniform float convergence_offset_y_b;
#ifdef RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT
uniform float mask_type;
#else
static const float mask_type = clamp(mask_type_static, 0.0, 2.0);
#endif
uniform float mask_sample_mode_desired;
uniform float mask_specify_num_triads;
uniform float mask_triad_size_desired;
uniform float mask_num_triads_desired;
uniform float aa_subpixel_r_offset_x_runtime;
uniform float aa_subpixel_r_offset_y_runtime;
#ifdef RUNTIME_ANTIALIAS_WEIGHTS
uniform float aa_cubic_c;
uniform float aa_gauss_sigma;
#else
static const float aa_cubic_c = aa_cubic_c_static; // Clamp to [0, 4]?
static const float aa_gauss_sigma = max(FIX_ZERO(0.0), aa_gauss_sigma_static); // Clamp to [FIXZERO(0), 1]?
#endif
uniform float geom_mode_runtime;
uniform float geom_radius;
uniform float geom_view_dist;
uniform float geom_tilt_angle_x;
uniform float geom_tilt_angle_y;
uniform float geom_aspect_ratio_x;
uniform float geom_aspect_ratio_y;
uniform float geom_overscan_x;
uniform float geom_overscan_y;
uniform float border_size;
uniform float border_darkness;
uniform float border_compress;
uniform float interlace_bff;
uniform float interlace_1080i;
#else
// Use constants from user-settings.h, and limit ranges appropriately:
/* static const float crt_gamma = macro_max(0.0, crt_gamma_static);
static const float lcd_gamma = macro_max(0.0, lcd_gamma_static);
static const float levels_contrast = macro_clamp(levels_contrast_static, 0.0, 4.0);
static const float halation_weight = macro_clamp(halation_weight_static, 0.0, 1.0);
static const float diffusion_weight = macro_clamp(diffusion_weight_static, 0.0, 1.0);
static const float bloom_underestimate_levels = macro_max(FIX_ZERO(0.0), bloom_underestimate_levels_static);
static const float bloom_excess = macro_clamp(bloom_excess_static, 0.0, 1.0);
static const float beam_min_sigma = macro_max(FIX_ZERO(0.0), beam_min_sigma_static);
static const float beam_max_sigma = macro_max(beam_min_sigma, beam_max_sigma_static);
static const float beam_spot_power = macro_max(beam_spot_power_static, 0.0);
static const float beam_min_shape = macro_max(2.0, beam_min_shape_static);
static const float beam_max_shape = macro_max(beam_min_shape, beam_max_shape_static);
static const float beam_shape_power = macro_max(0.0, beam_shape_power_static);
static const float beam_horiz_filter = macro_clamp(beam_horiz_filter_static, 0.0, 2.0);
static const float beam_horiz_sigma = macro_max(FIX_ZERO(0.0), beam_horiz_sigma_static);
static const float beam_horiz_linear_rgb_weight = macro_clamp(beam_horiz_linear_rgb_weight_static, 0.0, 1.0);
*/ // Unpack static vector elements to match scalar uniforms:
/* static const float convergence_offset_x_r = macro_clamp(convergence_offsets_r_static.x, -4.0, 4.0);
static const float convergence_offset_x_g = macro_clamp(convergence_offsets_g_static.x, -4.0, 4.0);
static const float convergence_offset_x_b = macro_clamp(convergence_offsets_b_static.x, -4.0, 4.0);
static const float convergence_offset_y_r = macro_clamp(convergence_offsets_r_static.y, -4.0, 4.0);
static const float convergence_offset_y_g = macro_clamp(convergence_offsets_g_static.y, -4.0, 4.0);
static const float convergence_offset_y_b = macro_clamp(convergence_offsets_b_static.y, -4.0, 4.0);
static const float mask_type = macro_clamp(mask_type_static, 0.0, 2.0);
static const float mask_sample_mode_desired = macro_clamp(mask_sample_mode_static, 0.0, 2.0);
static const float mask_specify_num_triads = macro_clamp(mask_specify_num_triads_static, 0.0, 1.0);
static const float mask_triad_size_desired = macro_clamp(mask_triad_size_desired_static, 1.0, 18.0);
static const float mask_num_triads_desired = macro_clamp(mask_num_triads_desired_static, 342.0, 1920.0);
static const float aa_subpixel_r_offset_x_runtime = macro_clamp(aa_subpixel_r_offset_static.x, -0.5, 0.5);
static const float aa_subpixel_r_offset_y_runtime = macro_clamp(aa_subpixel_r_offset_static.y, -0.5, 0.5);
static const float aa_cubic_c = aa_cubic_c_static; // Clamp to [0, 4]?
static const float aa_gauss_sigma = macro_max(FIX_ZERO(0.0), aa_gauss_sigma_static); // Clamp to [FIXZERO(0), 1]?
static const float geom_mode_runtime = macro_clamp(geom_mode_static, 0.0, 3.0);
static const float geom_radius = macro_max(1.0/(2.0*pi), geom_radius_static); // Clamp to [1/(2*pi), 1024]?
static const float geom_view_dist = macro_max(0.5, geom_view_dist_static); // Clamp to [0.5, 1024]?
static const float geom_tilt_angle_x = macro_clamp(geom_tilt_angle_static.x, -pi, pi);
static const float geom_tilt_angle_y = macro_clamp(geom_tilt_angle_static.y, -pi, pi);
static const float geom_aspect_ratio_x = geom_aspect_ratio_static; // Force >= 1?
static const float geom_aspect_ratio_y = 1.0;
static const float geom_overscan_x = macro_max(FIX_ZERO(0.0), geom_overscan_static.x);
static const float geom_overscan_y = macro_max(FIX_ZERO(0.0), geom_overscan_static.y);
static const float border_size = macro_clamp(border_size_static, 0.0, 0.5); // 0.5 reaches to image center
static const float border_darkness = macro_max(0.0, border_darkness_static);
static const float border_compress = macro_max(1.0, border_compress_static); // < 1.0 darkens whole image
static const float interlace_bff = float(interlace_bff_static);
static const float interlace_1080i = float(interlace_1080i_static);
*/
#endif
/*
// Provide accessors for vector constants that pack scalar uniforms:
float2 get_aspect_vector(const float geom_aspect_ratio)
{
// Get an aspect ratio vector. Enforce geom_max_aspect_ratio, and prevent
// the absolute scale from affecting the uv-mapping for curvature:
const float geom_clamped_aspect_ratio =
min(geom_aspect_ratio, geom_max_aspect_ratio);
const float2 geom_aspect =
normalize(float2(geom_clamped_aspect_ratio, 1.0));
return geom_aspect;
}
float2 get_geom_overscan_vector()
{
return float2(geom_overscan_x, geom_overscan_y);
}
float2 get_geom_tilt_angle_vector()
{
return float2(geom_tilt_angle_x, geom_tilt_angle_y);
}
*/
float3 get_convergence_offsets_x_vector()
{
return float3(convergence_offset_x_r, convergence_offset_x_g,
convergence_offset_x_b);
}
float3 get_convergence_offsets_y_vector()
{
return float3(convergence_offset_y_r, convergence_offset_y_g,
convergence_offset_y_b);
}
float2 get_convergence_offsets_r_vector()
{
return float2(convergence_offset_x_r, convergence_offset_y_r);
}
float2 get_convergence_offsets_g_vector()
{
return float2(convergence_offset_x_g, convergence_offset_y_g);
}
float2 get_convergence_offsets_b_vector()
{
return float2(convergence_offset_x_b, convergence_offset_y_b);
}
/*
float2 get_aa_subpixel_r_offset()
{
#ifdef RUNTIME_ANTIALIAS_WEIGHTS
#ifdef RUNTIME_ANTIALIAS_SUBPIXEL_OFFSETS
// WARNING: THIS IS EXTREMELY EXPENSIVE.
return float2(aa_subpixel_r_offset_x_runtime,
aa_subpixel_r_offset_y_runtime);
#else
return aa_subpixel_r_offset_static;
#endif
#else
return aa_subpixel_r_offset_static;
#endif
}
*/
// Provide accessors settings which still need "cooking:"
float get_mask_amplify()
{
static const float mask_grille_amplify = 1.0/mask_grille_avg_color;
static const float mask_slot_amplify = 1.0/mask_slot_avg_color;
static const float mask_shadow_amplify = 1.0/mask_shadow_avg_color;
return mask_type < 0.5 ? mask_grille_amplify :
mask_type < 1.5 ? mask_slot_amplify :
mask_shadow_amplify;
}
float get_mask_sample_mode()
{
#ifdef RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT
#ifdef PHOSPHOR_MASK_MANUALLY_RESIZE
return mask_sample_mode_desired;
#else
return clamp(mask_sample_mode_desired, 1.0, 2.0);
#endif
#else
#ifdef PHOSPHOR_MASK_MANUALLY_RESIZE
return mask_sample_mode_static;
#else
return clamp(mask_sample_mode_static, 1.0, 2.0);
#endif
#endif
}
#endif // BIND_SHADER_PARAMS_H

View file

@ -1,317 +0,0 @@
#ifndef BLOOM_FUNCTIONS_H
#define BLOOM_FUNCTIONS_H
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
///////////////////////////////// DESCRIPTION ////////////////////////////////
// These utility functions and constants help several passes determine the
// size and center texel weight of the phosphor bloom in a uniform manner.
////////////////////////////////// INCLUDES //////////////////////////////////
// We need to calculate the correct blur sigma using some .cgp constants:
#include "user-settings.fxh"
#include "derived-settings-and-constants.fxh"
#include "blur-functions.fxh"
/////////////////////////////// BLOOM CONSTANTS //////////////////////////////
// Compute constants with manual inlines of the functions below:
static const float bloom_diff_thresh = 1.0/256.0;
/////////////////////////////////// HELPERS //////////////////////////////////
float get_min_sigma_to_blur_triad(const float triad_size,
const float thresh)
{
// Requires: 1.) triad_size is the final phosphor triad size in pixels
// 2.) thresh is the max desired pixel difference in the
// blurred triad (e.g. 1.0/256.0).
// Returns: Return the minimum sigma that will fully blur a phosphor
// triad on the screen to an even color, within thresh.
// This closed-form function was found by curve-fitting data.
// Estimate: max error = ~0.086036, mean sq. error = ~0.0013387:
return -0.05168 + 0.6113*triad_size -
1.122*triad_size*sqrt(0.000416 + thresh);
// Estimate: max error = ~0.16486, mean sq. error = ~0.0041041:
//return 0.5985*triad_size - triad_size*sqrt(thresh)
}
float get_absolute_scale_blur_sigma(const float thresh)
{
// Requires: 1.) min_expected_triads must be a global float. The number
// of horizontal phosphor triads in the final image must be
// >= min_allowed_viewport_triads.x for realistic results.
// 2.) bloom_approx_scale_x must be a global float equal to the
// absolute horizontal scale of BLOOM_APPROX.
// 3.) bloom_approx_scale_x/min_allowed_viewport_triads.x
// should be <= 1.1658025090 to keep the final result <
// 0.62666015625 (the largest sigma ensuring the largest
// unused texel weight stays < 1.0/256.0 for a 3x3 blur).
// 4.) thresh is the max desired pixel difference in the
// blurred triad (e.g. 1.0/256.0).
// Returns: Return the minimum Gaussian sigma that will blur the pass
// output as much as it would have taken to blur away
// bloom_approx_scale_x horizontal phosphor triads.
// Description:
// BLOOM_APPROX should look like a downscaled phosphor blur. Ideally, we'd
// use the same blur sigma as the actual phosphor bloom and scale it down
// to the current resolution with (bloom_approx_scale_x/viewport_size_x), but
// we don't know the viewport size in this pass. Instead, we'll blur as
// much as it would take to blur away min_allowed_viewport_triads.x. This
// will blur "more than necessary" if the user actually uses more triads,
// but that's not terrible either, because blurring a constant fraction of
// the viewport may better resemble a true optical bloom anyway (since the
// viewport will generally be about the same fraction of each player's
// field of view, regardless of screen size and resolution).
// Assume an extremely large viewport size for asymptotic results.
return bloom_approx_scale_x/max_viewport_size_x *
get_min_sigma_to_blur_triad(
max_viewport_size_x/min_allowed_viewport_triads.x, thresh);
}
float get_center_weight(const float sigma)
{
// Given a Gaussian blur sigma, get the blur weight for the center texel.
#ifdef RUNTIME_PHOSPHOR_BLOOM_SIGMA
return get_fast_gaussian_weight_sum_inv(sigma);
#else
const float denom_inv = 0.5/(sigma*sigma);
const float w0 = 1.0;
const float w1 = exp(-1.0 * denom_inv);
const float w2 = exp(-4.0 * denom_inv);
const float w3 = exp(-9.0 * denom_inv);
const float w4 = exp(-16.0 * denom_inv);
const float w5 = exp(-25.0 * denom_inv);
const float w6 = exp(-36.0 * denom_inv);
const float w7 = exp(-49.0 * denom_inv);
const float w8 = exp(-64.0 * denom_inv);
const float w9 = exp(-81.0 * denom_inv);
const float w10 = exp(-100.0 * denom_inv);
const float w11 = exp(-121.0 * denom_inv);
const float w12 = exp(-144.0 * denom_inv);
const float w13 = exp(-169.0 * denom_inv);
const float w14 = exp(-196.0 * denom_inv);
const float w15 = exp(-225.0 * denom_inv);
const float w16 = exp(-256.0 * denom_inv);
const float w17 = exp(-289.0 * denom_inv);
const float w18 = exp(-324.0 * denom_inv);
const float w19 = exp(-361.0 * denom_inv);
const float w20 = exp(-400.0 * denom_inv);
const float w21 = exp(-441.0 * denom_inv);
// Note: If the implementation uses a smaller blur than the max allowed,
// the worst case scenario is that the center weight will be overestimated,
// so we'll put a bit more energy into the brightpass...no huge deal.
// Then again, if the implementation uses a larger blur than the max
// "allowed" because of dynamic branching, the center weight could be
// underestimated, which is more of a problem...consider always using
#ifdef PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_12_PIXELS
// 43x blur:
const float weight_sum_inv = 1.0 /
(w0 + 2.0 * (w1 + w2 + w3 + w4 + w5 + w6 + w7 + w8 + w9 + w10 +
w11 + w12 + w13 + w14 + w15 + w16 + w17 + w18 + w19 + w20 + w21));
#else
#ifdef PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_9_PIXELS
// 31x blur:
const float weight_sum_inv = 1.0 /
(w0 + 2.0 * (w1 + w2 + w3 + w4 + w5 + w6 + w7 +
w8 + w9 + w10 + w11 + w12 + w13 + w14 + w15));
#else
#ifdef PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_6_PIXELS
// 25x blur:
const float weight_sum_inv = 1.0 / (w0 + 2.0 * (
w1 + w2 + w3 + w4 + w5 + w6 + w7 + w8 + w9 + w10 + w11 + w12));
#else
#ifdef PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_3_PIXELS
// 17x blur:
const float weight_sum_inv = 1.0 / (w0 + 2.0 * (
w1 + w2 + w3 + w4 + w5 + w6 + w7 + w8));
#else
// 9x blur:
const float weight_sum_inv = 1.0 / (w0 + 2.0 * (w1 + w2 + w3 + w4));
#endif // PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_3_PIXELS
#endif // PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_6_PIXELS
#endif // PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_9_PIXELS
#endif // PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_12_PIXELS
const float center_weight = weight_sum_inv * weight_sum_inv;
return center_weight;
#endif
}
float3 tex2DblurNfast(const sampler2D tex, const float2 tex_uv,
const float2 dxdy, const float sigma)
{
// If sigma is static, we can safely branch and use the smallest blur
// that's big enough. Ignore #define hints, because we'll only use a
// large blur if we actually need it, and the branches cost nothing.
#ifndef RUNTIME_PHOSPHOR_BLOOM_SIGMA
#define PHOSPHOR_BLOOM_BRANCH_FOR_BLUR_SIZE
#else
// It's still worth branching if the profile supports dynamic branches:
// It's much faster than using a hugely excessive blur, but each branch
// eats ~1% FPS.
#ifdef DRIVERS_ALLOW_DYNAMIC_BRANCHES
#define PHOSPHOR_BLOOM_BRANCH_FOR_BLUR_SIZE
#endif
#endif
// Failed optimization notes:
// I originally created a same-size mipmapped 5-tap separable blur10 that
// could handle any sigma by reaching into lower mip levels. It was
// as fast as blur25fast for runtime sigmas and a tad faster than
// blur31fast for static sigmas, but mipmapping two viewport-size passes
// ate 10% of FPS across all codepaths, so it wasn't worth it.
#ifdef PHOSPHOR_BLOOM_BRANCH_FOR_BLUR_SIZE
if(sigma <= blur9_std_dev)
{
return tex2Dblur9fast(tex, tex_uv, dxdy, sigma);
}
else if(sigma <= blur17_std_dev)
{
return tex2Dblur17fast(tex, tex_uv, dxdy, sigma);
}
else if(sigma <= blur25_std_dev)
{
return tex2Dblur25fast(tex, tex_uv, dxdy, sigma);
}
else if(sigma <= blur31_std_dev)
{
return tex2Dblur31fast(tex, tex_uv, dxdy, sigma);
}
else
{
return tex2Dblur43fast(tex, tex_uv, dxdy, sigma);
}
#else
// If we can't afford to branch, we can only guess at what blur
// size we need. Therefore, use the largest blur allowed.
#ifdef PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_12_PIXELS
return tex2Dblur43fast(tex, tex_uv, dxdy, sigma);
#else
#ifdef PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_9_PIXELS
return tex2Dblur31fast(tex, tex_uv, dxdy, sigma);
#else
#ifdef PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_6_PIXELS
return tex2Dblur25fast(tex, tex_uv, dxdy, sigma);
#else
#ifdef PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_3_PIXELS
return tex2Dblur17fast(tex, tex_uv, dxdy, sigma);
#else
return tex2Dblur9fast(tex, tex_uv, dxdy, sigma);
#endif // PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_3_PIXELS
#endif // PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_6_PIXELS
#endif // PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_9_PIXELS
#endif // PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_12_PIXELS
#endif // PHOSPHOR_BLOOM_BRANCH_FOR_BLUR_SIZE
}
float get_bloom_approx_sigma(const float output_size_x_runtime,
const float estimated_viewport_size_x)
{
// Requires: 1.) output_size_x_runtime == BLOOM_APPROX.output_size.x.
// This is included for dynamic codepaths just in case the
// following two globals are incorrect:
// 2.) bloom_approx_size_x_for_skip should == the same
// if PHOSPHOR_BLOOM_FAKE is #defined
// 3.) bloom_approx_size_x should == the same otherwise
// Returns: For gaussian4x4, return a dynamic small bloom sigma that's
// as close to optimal as possible given available information.
// For blur3x3, return the a static small bloom sigma that
// works well for typical cases. Otherwise, we're using simple
// bilinear filtering, so use static calculations.
// Assume the default static value. This is a compromise that ensures
// typical triads are blurred, even if unusually large ones aren't.
static const float mask_num_triads_static =
max(min_allowed_viewport_triads.x, mask_num_triads_desired_static);
const float mask_num_triads_from_size =
estimated_viewport_size_x/mask_triad_size_desired;
const float mask_num_triads_runtime = max(min_allowed_viewport_triads.x,
lerp(mask_num_triads_from_size, mask_num_triads_desired,
mask_specify_num_triads));
// Assume an extremely large viewport size for asymptotic results:
static const float max_viewport_size_x = 1080.0*1024.0*(4.0/3.0);
if(bloom_approx_filter > 1.5) // 4x4 true Gaussian resize
{
// Use the runtime num triads and output size:
const float asymptotic_triad_size =
max_viewport_size_x/mask_num_triads_runtime;
const float asymptotic_sigma = get_min_sigma_to_blur_triad(
asymptotic_triad_size, bloom_diff_thresh);
const float bloom_approx_sigma =
asymptotic_sigma * output_size_x_runtime/max_viewport_size_x;
// The BLOOM_APPROX input has to be ORIG_LINEARIZED to avoid moire, but
// account for the Gaussian scanline sigma from the last pass too.
// The bloom will be too wide horizontally but tall enough vertically.
return length(float2(bloom_approx_sigma, beam_max_sigma));
}
else // 3x3 blur resize (the bilinear resize doesn't need a sigma)
{
// We're either using blur3x3 or bilinear filtering. The biggest
// reason to choose blur3x3 is to avoid dynamic weights, so use a
// static calculation.
#ifdef PHOSPHOR_BLOOM_FAKE
static const float output_size_x_static =
bloom_approx_size_x_for_fake;
#else
static const float output_size_x_static = bloom_approx_size_x;
#endif
static const float asymptotic_triad_size =
max_viewport_size_x/mask_num_triads_static;
const float asymptotic_sigma = get_min_sigma_to_blur_triad(
asymptotic_triad_size, bloom_diff_thresh);
const float bloom_approx_sigma =
asymptotic_sigma * output_size_x_static/max_viewport_size_x;
// The BLOOM_APPROX input has to be ORIG_LINEARIZED to avoid moire, but
// try accounting for the Gaussian scanline sigma from the last pass
// too; use the static default value:
return length(float2(bloom_approx_sigma, beam_max_sigma_static));
}
}
float get_final_bloom_sigma(const float bloom_sigma_runtime)
{
// Requires: 1.) bloom_sigma_runtime is a precalculated sigma that's
// optimal for the [known] triad size.
// 2.) Call this from a fragment shader (not a vertex shader),
// or blurring with static sigmas won't be constant-folded.
// Returns: Return the optimistic static sigma if the triad size is
// known at compile time. Otherwise return the optimal runtime
// sigma (10% slower) or an implementation-specific compromise
// between an optimistic or pessimistic static sigma.
// Notes: Call this from the fragment shader, NOT the vertex shader,
// so static sigmas can be constant-folded!
const float bloom_sigma_optimistic = get_min_sigma_to_blur_triad(
mask_triad_size_desired_static, bloom_diff_thresh);
#ifdef RUNTIME_PHOSPHOR_BLOOM_SIGMA
return bloom_sigma_runtime;
#else
// Overblurring looks as bad as underblurring, so assume average-size
// triads, not worst-case huge triads:
return bloom_sigma_optimistic;
#endif
}
#endif // BLOOM_FUNCTIONS_H

View file

@ -1,299 +0,0 @@
#ifndef DERIVED_SETTINGS_AND_CONSTANTS_H
#define DERIVED_SETTINGS_AND_CONSTANTS_H
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
///////////////////////////////// DESCRIPTION ////////////////////////////////
// These macros and constants can be used across the whole codebase.
// Unlike the values in user-settings.cgh, end users shouldn't modify these.
////////////////////////////////// INCLUDES //////////////////////////////////
#include "user-settings.fxh"
#include "user-cgp-constants.fxh"
/////////////////////////////// FIXED SETTINGS ///////////////////////////////
// Avoid dividing by zero; using a macro overloads for float, float2, etc.:
//#define FIX_ZERO(c) (max(abs(c), 0.0000152587890625)) // 2^-16
// Ensure the first pass decodes CRT gamma and the last encodes LCD gamma.
#ifndef SIMULATE_CRT_ON_LCD
#define SIMULATE_CRT_ON_LCD
#endif
// Manually tiling a manually resized texture creates texture coord derivative
// discontinuities and confuses anisotropic filtering, causing discolored tile
// seams in the phosphor mask. Workarounds:
// a.) Using tex2Dlod disables anisotropic filtering for tiled masks. It's
// downgraded to tex2Dbias without DRIVERS_ALLOW_TEX2DLOD #defined and
// disabled without DRIVERS_ALLOW_TEX2DBIAS #defined either.
// b.) "Tile flat twice" requires drawing two full tiles without border padding
// to the resized mask FBO, and it's incompatible with same-pass curvature.
// (Same-pass curvature isn't used but could be in the future...maybe.)
// c.) "Fix discontinuities" requires derivatives and drawing one tile with
// border padding to the resized mask FBO, but it works with same-pass
// curvature. It's disabled without DRIVERS_ALLOW_DERIVATIVES #defined.
// Precedence: a, then, b, then c (if multiple strategies are #defined).
#define ANISOTROPIC_TILING_COMPAT_TEX2DLOD // 129.7 FPS, 4x, flat; 101.8 at fullscreen
#define ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE // 128.1 FPS, 4x, flat; 101.5 at fullscreen
#define ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES // 124.4 FPS, 4x, flat; 97.4 at fullscreen
// Also, manually resampling the phosphor mask is slightly blurrier with
// anisotropic filtering. (Resampling with mipmapping is even worse: It
// creates artifacts, but only with the fully bloomed shader.) The difference
// is subtle with small triads, but you can fix it for a small cost.
//#define ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD
////////////////////////////// DERIVED SETTINGS //////////////////////////////
// Intel HD 4000 GPU's can't handle manual mask resizing (for now), setting the
// geometry mode at runtime, or a 4x4 true Gaussian resize. Disable
// incompatible settings ASAP. (INTEGRATED_GRAPHICS_COMPATIBILITY_MODE may be
// #defined by either user-settings.h or a wrapper .cg that #includes the
// current .cg pass.)
#ifdef INTEGRATED_GRAPHICS_COMPATIBILITY_MODE
#ifdef PHOSPHOR_MASK_MANUALLY_RESIZE
#undef PHOSPHOR_MASK_MANUALLY_RESIZE
#endif
#ifdef RUNTIME_GEOMETRY_MODE
#undef RUNTIME_GEOMETRY_MODE
#endif
// Mode 2 (4x4 Gaussian resize) won't work, and mode 1 (3x3 blur) is
// inferior in most cases, so replace 2.0 with 0.0:
static const float bloom_approx_filter =
bloom_approx_filter_static > 1.5 ? 0.0 : bloom_approx_filter_static;
#else
static const float bloom_approx_filter = bloom_approx_filter_static;
#endif
// Disable slow runtime paths if static parameters are used. Most of these
// won't be a problem anyway once the params are disabled, but some will.
#ifndef RUNTIME_SHADER_PARAMS_ENABLE
#ifdef RUNTIME_PHOSPHOR_BLOOM_SIGMA
#undef RUNTIME_PHOSPHOR_BLOOM_SIGMA
#endif
#ifdef RUNTIME_ANTIALIAS_WEIGHTS
#undef RUNTIME_ANTIALIAS_WEIGHTS
#endif
#ifdef RUNTIME_ANTIALIAS_SUBPIXEL_OFFSETS
#undef RUNTIME_ANTIALIAS_SUBPIXEL_OFFSETS
#endif
#ifdef RUNTIME_SCANLINES_HORIZ_FILTER_COLORSPACE
#undef RUNTIME_SCANLINES_HORIZ_FILTER_COLORSPACE
#endif
#ifdef RUNTIME_GEOMETRY_TILT
#undef RUNTIME_GEOMETRY_TILT
#endif
#ifdef RUNTIME_GEOMETRY_MODE
#undef RUNTIME_GEOMETRY_MODE
#endif
#ifdef FORCE_RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT
#undef FORCE_RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT
#endif
#endif
// Make tex2Dbias a backup for tex2Dlod for wider compatibility.
#ifdef ANISOTROPIC_TILING_COMPAT_TEX2DLOD
#define ANISOTROPIC_TILING_COMPAT_TEX2DBIAS
#endif
#ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD
#define ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS
#endif
// Rule out unavailable anisotropic compatibility strategies:
#ifndef DRIVERS_ALLOW_DERIVATIVES
#ifdef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
#undef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
#endif
#endif
#ifndef DRIVERS_ALLOW_TEX2DLOD
#ifdef ANISOTROPIC_TILING_COMPAT_TEX2DLOD
#undef ANISOTROPIC_TILING_COMPAT_TEX2DLOD
#endif
#ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD
#undef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD
#endif
#ifdef ANTIALIAS_DISABLE_ANISOTROPIC
#undef ANTIALIAS_DISABLE_ANISOTROPIC
#endif
#endif
#ifndef DRIVERS_ALLOW_TEX2DBIAS
#ifdef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS
#undef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS
#endif
#ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS
#undef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS
#endif
#endif
// Prioritize anisotropic tiling compatibility strategies by performance and
// disable unused strategies. This concentrates all the nesting in one place.
#ifdef ANISOTROPIC_TILING_COMPAT_TEX2DLOD
#ifdef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS
#undef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS
#endif
#ifdef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE
#undef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE
#endif
#ifdef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
#undef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
#endif
#else
#ifdef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS
#ifdef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE
#undef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE
#endif
#ifdef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
#undef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
#endif
#else
// ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE is only compatible with
// flat texture coords in the same pass, but that's all we use.
#ifdef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE
#ifdef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
#undef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
#endif
#endif
#endif
#endif
// The tex2Dlod and tex2Dbias strategies share a lot in common, and we can
// reduce some #ifdef nesting in the next section by essentially OR'ing them:
#ifdef ANISOTROPIC_TILING_COMPAT_TEX2DLOD
#define ANISOTROPIC_TILING_COMPAT_TEX2DLOD_FAMILY
#endif
#ifdef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS
#define ANISOTROPIC_TILING_COMPAT_TEX2DLOD_FAMILY
#endif
// Prioritize anisotropic resampling compatibility strategies the same way:
#ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD
#ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS
#undef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS
#endif
#endif
/////////////////////// DERIVED PHOSPHOR MASK CONSTANTS //////////////////////
// If we can use the large mipmapped LUT without mipmapping artifacts, we
// should: It gives us more options for using fewer samples.
#ifdef DRIVERS_ALLOW_TEX2DLOD
#ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD
// TODO: Take advantage of this!
#define PHOSPHOR_MASK_RESIZE_MIPMAPPED_LUT
static const float2 mask_resize_src_lut_size = mask_texture_large_size;
#else
static const float2 mask_resize_src_lut_size = mask_texture_small_size;
#endif
#else
static const float2 mask_resize_src_lut_size = mask_texture_small_size;
#endif
// tex2D's sampler2D parameter MUST be a uniform global, a uniform input to
// main_fragment, or a static alias of one of the above. This makes it hard
// to select the phosphor mask at runtime: We can't even assign to a uniform
// global in the vertex shader or select a sampler2D in the vertex shader and
// pass it to the fragment shader (even with explicit TEXUNIT# bindings),
// because it just gives us the input texture or a black screen. However, we
// can get around these limitations by calling tex2D three times with different
// uniform samplers (or resizing the phosphor mask three times altogether).
// With dynamic branches, we can process only one of these branches on top of
// quickly discarding fragments we don't need (cgc seems able to overcome
// limigations around dependent texture fetches inside of branches). Without
// dynamic branches, we have to process every branch for every fragment...which
// is slower. Runtime sampling mode selection is slower without dynamic
// branches as well. Let the user's static #defines decide if it's worth it.
#ifdef DRIVERS_ALLOW_DYNAMIC_BRANCHES
#define RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT
#else
#ifdef FORCE_RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT
#define RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT
#endif
#endif
// We need to render some minimum number of tiles in the resize passes.
// We need at least 1.0 just to repeat a single tile, and we need extra
// padding beyond that for anisotropic filtering, discontinuitity fixing,
// antialiasing, same-pass curvature (not currently used), etc. First
// determine how many border texels and tiles we need, based on how the result
// will be sampled:
#ifdef GEOMETRY_EARLY
static const float max_subpixel_offset = aa_subpixel_r_offset_static.x;
// Most antialiasing filters have a base radius of 4.0 pixels:
static const float max_aa_base_pixel_border = 4.0 +
max_subpixel_offset;
#else
static const float max_aa_base_pixel_border = 0.0;
#endif
// Anisotropic filtering adds about 0.5 to the pixel border:
#ifndef ANISOTROPIC_TILING_COMPAT_TEX2DLOD_FAMILY
static const float max_aniso_pixel_border = max_aa_base_pixel_border + 0.5;
#else
static const float max_aniso_pixel_border = max_aa_base_pixel_border;
#endif
// Fixing discontinuities adds 1.0 more to the pixel border:
#ifdef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
static const float max_tiled_pixel_border = max_aniso_pixel_border + 1.0;
#else
static const float max_tiled_pixel_border = max_aniso_pixel_border;
#endif
// Convert the pixel border to an integer texel border. Assume same-pass
// curvature about triples the texel frequency:
#ifdef GEOMETRY_EARLY
static const float max_mask_texel_border =
macro_ceil(max_tiled_pixel_border * 3.0);
#else
static const float max_mask_texel_border = macro_ceil(max_tiled_pixel_border);
#endif
// Convert the texel border to a tile border using worst-case assumptions:
static const float max_mask_tile_border = max_mask_texel_border/
(mask_min_allowed_triad_size * mask_triads_per_tile);
// Finally, set the number of resized tiles to render to MASK_RESIZE, and set
// the starting texel (inside borders) for sampling it.
#ifndef GEOMETRY_EARLY
#ifdef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE
// Special case: Render two tiles without borders. Anisotropic
// filtering doesn't seem to be a problem here.
static const float mask_resize_num_tiles = 1.0 + 1.0;
static const float mask_start_texels = 0.0;
#else
static const float mask_resize_num_tiles = 1.0 +
2.0 * max_mask_tile_border;
static const float mask_start_texels = max_mask_texel_border;
#endif
#else
static const float mask_resize_num_tiles = 1.0 + 2.0*max_mask_tile_border;
static const float mask_start_texels = max_mask_texel_border;
#endif
// We have to fit mask_resize_num_tiles into an FBO with a viewport scale of
// mask_resize_viewport_scale. This limits the maximum final triad size.
// Estimate the minimum number of triads we can split the screen into in each
// dimension (we'll be as correct as mask_resize_viewport_scale is):
static const float mask_resize_num_triads =
mask_resize_num_tiles * mask_triads_per_tile;
static const float2 min_allowed_viewport_triads =
mask_resize_num_triads.xx / mask_resize_viewport_scale;
#endif // DERIVED_SETTINGS_AND_CONSTANTS_H

View file

@ -1,545 +0,0 @@
#ifndef GAMMA_MANAGEMENT_H
#define GAMMA_MANAGEMENT_H
///////////////////////////////// MIT LICENSE ////////////////////////////////
// Copyright (C) 2014 TroggleMonkey
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
///////////////////////////////// DESCRIPTION ////////////////////////////////
// This file provides gamma-aware tex*D*() and encode_output() functions.
// Requires: Before #include-ing this file, the including file must #define
// the following macros when applicable and follow their rules:
// 1.) #define FIRST_PASS if this is the first pass.
// 2.) #define LAST_PASS if this is the last pass.
// 3.) If sRGB is available, set srgb_framebufferN = "true" for
// every pass except the last in your .cgp preset.
// 4.) If sRGB isn't available but you want gamma-correctness with
// no banding, #define GAMMA_ENCODE_EVERY_FBO each pass.
// 5.) #define SIMULATE_CRT_ON_LCD if desired (precedence over 5-7)
// 6.) #define SIMULATE_GBA_ON_LCD if desired (precedence over 6-7)
// 7.) #define SIMULATE_LCD_ON_CRT if desired (precedence over 7)
// 8.) #define SIMULATE_GBA_ON_CRT if desired (precedence over -)
// If an option in [5, 8] is #defined in the first or last pass, it
// should be #defined for both. It shouldn't make a difference
// whether it's #defined for intermediate passes or not.
// Optional: The including file (or an earlier included file) may optionally
// #define a number of macros indicating it will override certain
// macros and associated constants are as follows:
// static constants with either static or uniform constants. The
// 1.) OVERRIDE_STANDARD_GAMMA: The user must first define:
// static const float ntsc_gamma
// static const float pal_gamma
// static const float crt_reference_gamma_high
// static const float crt_reference_gamma_low
// static const float lcd_reference_gamma
// static const float crt_office_gamma
// static const float lcd_office_gamma
// 2.) OVERRIDE_DEVICE_GAMMA: The user must first define:
// static const float crt_gamma
// static const float gba_gamma
// static const float lcd_gamma
// 3.) OVERRIDE_FINAL_GAMMA: The user must first define:
// static const float input_gamma
// static const float intermediate_gamma
// static const float output_gamma
// (intermediate_gamma is for GAMMA_ENCODE_EVERY_FBO.)
// 4.) OVERRIDE_ALPHA_ASSUMPTIONS: The user must first define:
// static const bool assume_opaque_alpha
// The gamma constant overrides must be used in every pass or none,
// and OVERRIDE_FINAL_GAMMA bypasses all of the SIMULATE* macros.
// OVERRIDE_ALPHA_ASSUMPTIONS may be set on a per-pass basis.
// Usage: After setting macros appropriately, ignore gamma correction and
// replace all tex*D*() calls with equivalent gamma-aware
// tex*D*_linearize calls, except:
// 1.) When you read an LUT, use regular tex*D or a gamma-specified
// function, depending on its gamma encoding:
// tex*D*_linearize_gamma (takes a runtime gamma parameter)
// 2.) If you must read pass0's original input in a later pass, use
// tex2D_linearize_ntsc_gamma. If you want to read pass0's
// input with gamma-corrected bilinear filtering, consider
// creating a first linearizing pass and reading from the input
// of pass1 later.
// Then, return encode_output(color) from every fragment shader.
// Finally, use the global gamma_aware_bilinear boolean if you want
// to statically branch based on whether bilinear filtering is
// gamma-correct or not (e.g. for placing Gaussian blur samples).
//
// Detailed Policy:
// tex*D*_linearize() functions enforce a consistent gamma-management policy
// based on the FIRST_PASS and GAMMA_ENCODE_EVERY_FBO settings. They assume
// their input texture has the same encoding characteristics as the input for
// the current pass (which doesn't apply to the exceptions listed above).
// Similarly, encode_output() enforces a policy based on the LAST_PASS and
// GAMMA_ENCODE_EVERY_FBO settings. Together, they result in one of the
// following two pipelines.
// Typical pipeline with intermediate sRGB framebuffers:
// linear_color = pow(pass0_encoded_color, input_gamma);
// intermediate_output = linear_color; // Automatic sRGB encoding
// linear_color = intermediate_output; // Automatic sRGB decoding
// final_output = pow(intermediate_output, 1.0/output_gamma);
// Typical pipeline without intermediate sRGB framebuffers:
// linear_color = pow(pass0_encoded_color, input_gamma);
// intermediate_output = pow(linear_color, 1.0/intermediate_gamma);
// linear_color = pow(intermediate_output, intermediate_gamma);
// final_output = pow(intermediate_output, 1.0/output_gamma);
// Using GAMMA_ENCODE_EVERY_FBO is much slower, but it's provided as a way to
// easily get gamma-correctness without banding on devices where sRGB isn't
// supported.
//
// Use This Header to Maximize Code Reuse:
// The purpose of this header is to provide a consistent interface for texture
// reads and output gamma-encoding that localizes and abstracts away all the
// annoying details. This greatly reduces the amount of code in each shader
// pass that depends on the pass number in the .cgp preset or whether sRGB
// FBO's are being used: You can trivially change the gamma behavior of your
// whole pass by commenting or uncommenting 1-3 #defines. To reuse the same
// code in your first, Nth, and last passes, you can even put it all in another
// header file and #include it from skeleton .cg files that #define the
// appropriate pass-specific settings.
//
// Rationale for Using Three Macros:
// This file uses GAMMA_ENCODE_EVERY_FBO instead of an opposite macro like
// SRGB_PIPELINE to ensure sRGB is assumed by default, which hopefully imposes
// a lower maintenance burden on each pass. At first glance it seems we could
// accomplish everything with two macros: GAMMA_CORRECT_IN / GAMMA_CORRECT_OUT.
// This works for simple use cases where input_gamma == output_gamma, but it
// breaks down for more complex scenarios like CRT simulation, where the pass
// number determines the gamma encoding of the input and output.
/////////////////////////////// BASE CONSTANTS ///////////////////////////////
// Set standard gamma constants, but allow users to override them:
#ifndef OVERRIDE_STANDARD_GAMMA
// Standard encoding gammas:
static const float ntsc_gamma = 2.2; // Best to use NTSC for PAL too?
static const float pal_gamma = 2.8; // Never actually 2.8 in practice
// Typical device decoding gammas (only use for emulating devices):
// CRT/LCD reference gammas are higher than NTSC and Rec.709 video standard
// gammas: The standards purposely undercorrected for an analog CRT's
// assumed 2.5 reference display gamma to maintain contrast in assumed
// [dark] viewing conditions: http://www.poynton.com/PDFs/GammaFAQ.pdf
// These unstated assumptions about display gamma and perceptual rendering
// intent caused a lot of confusion, and more modern CRT's seemed to target
// NTSC 2.2 gamma with circuitry. LCD displays seem to have followed suit
// (they struggle near black with 2.5 gamma anyway), especially PC/laptop
// displays designed to view sRGB in bright environments. (Standards are
// also in flux again with BT.1886, but it's underspecified for displays.)
static const float crt_reference_gamma_high = 2.5; // In (2.35, 2.55)
static const float crt_reference_gamma_low = 2.35; // In (2.35, 2.55)
static const float lcd_reference_gamma = 2.5; // To match CRT
static const float crt_office_gamma = 2.2; // Circuitry-adjusted for NTSC
static const float lcd_office_gamma = 2.2; // Approximates sRGB
#endif // OVERRIDE_STANDARD_GAMMA
// Assuming alpha == 1.0 might make it easier for users to avoid some bugs,
// but only if they're aware of it.
#ifndef OVERRIDE_ALPHA_ASSUMPTIONS
static const bool assume_opaque_alpha = false;
#endif
/////////////////////// DERIVED CONSTANTS AS FUNCTIONS ///////////////////////
// gamma-management.h should be compatible with overriding gamma values with
// runtime user parameters, but we can only define other global constants in
// terms of static constants, not uniform user parameters. To get around this
// limitation, we need to define derived constants using functions.
// Set device gamma constants, but allow users to override them:
#ifdef OVERRIDE_DEVICE_GAMMA
// The user promises to globally define the appropriate constants:
float get_crt_gamma() { return crt_gamma; }
float get_gba_gamma() { return gba_gamma; }
float get_lcd_gamma() { return lcd_gamma; }
#else
float get_crt_gamma() { return crt_reference_gamma_high; }
float get_gba_gamma() { return 3.5; } // Game Boy Advance; in (3.0, 4.0)
float get_lcd_gamma() { return lcd_office_gamma; }
#endif // OVERRIDE_DEVICE_GAMMA
// Set decoding/encoding gammas for the first/lass passes, but allow overrides:
#ifdef OVERRIDE_FINAL_GAMMA
// The user promises to globally define the appropriate constants:
float get_intermediate_gamma() { return intermediate_gamma; }
float get_input_gamma() { return input_gamma; }
float get_output_gamma() { return output_gamma; }
#else
// If we gamma-correct every pass, always use ntsc_gamma between passes to
// ensure middle passes don't need to care if anything is being simulated:
float get_intermediate_gamma() { return ntsc_gamma; }
#ifdef SIMULATE_CRT_ON_LCD
float get_input_gamma() { return get_crt_gamma(); }
float get_output_gamma() { return get_lcd_gamma(); }
#else
#ifdef SIMULATE_GBA_ON_LCD
float get_input_gamma() { return get_gba_gamma(); }
float get_output_gamma() { return get_lcd_gamma(); }
#else
#ifdef SIMULATE_LCD_ON_CRT
float get_input_gamma() { return get_lcd_gamma(); }
float get_output_gamma() { return get_crt_gamma(); }
#else
#ifdef SIMULATE_GBA_ON_CRT
float get_input_gamma() { return get_gba_gamma(); }
float get_output_gamma() { return get_crt_gamma(); }
#else // Don't simulate anything:
float get_input_gamma() { return ntsc_gamma; }
float get_output_gamma() { return ntsc_gamma; }
#endif // SIMULATE_GBA_ON_CRT
#endif // SIMULATE_LCD_ON_CRT
#endif // SIMULATE_GBA_ON_LCD
#endif // SIMULATE_CRT_ON_LCD
#endif // OVERRIDE_FINAL_GAMMA
// Set decoding/encoding gammas for the current pass. Use static constants for
// linearize_input and gamma_encode_output, because they aren't derived, and
// they let the compiler do dead-code elimination.
#ifndef GAMMA_ENCODE_EVERY_FBO
#ifdef FIRST_PASS
static const bool linearize_input = true;
float get_pass_input_gamma() { return get_input_gamma(); }
#else
static const bool linearize_input = false;
float get_pass_input_gamma() { return 1.0; }
#endif
#ifdef LAST_PASS
static const bool gamma_encode_output = true;
float get_pass_output_gamma() { return get_output_gamma(); }
#else
static const bool gamma_encode_output = false;
float get_pass_output_gamma() { return 1.0; }
#endif
#else
static const bool linearize_input = true;
static const bool gamma_encode_output = true;
#ifdef FIRST_PASS
float get_pass_input_gamma() { return get_input_gamma(); }
#else
float get_pass_input_gamma() { return get_intermediate_gamma(); }
#endif
#ifdef LAST_PASS
float get_pass_output_gamma() { return get_output_gamma(); }
#else
float get_pass_output_gamma() { return get_intermediate_gamma(); }
#endif
#endif
// Users might want to know if bilinear filtering will be gamma-correct:
static const bool gamma_aware_bilinear = !linearize_input;
////////////////////// COLOR ENCODING/DECODING FUNCTIONS /////////////////////
float4 encode_output(const float4 color)
{
if(gamma_encode_output)
{
if(assume_opaque_alpha)
{
return float4(pow(color.rgb, 1.0/get_pass_output_gamma()), 1.0);
}
else
{
return float4(pow(color.rgb, 1.0/get_pass_output_gamma()), color.a);
}
}
else
{
return color;
}
}
float4 decode_input(const float4 color)
{
return color;
}
float4 decode_input_first(const float4 color)
{
if(assume_opaque_alpha)
{
return float4(pow(color.rgb, get_input_gamma()), 1.0);
}
else
{
return float4(pow(color.rgb, get_input_gamma()), color.a);
}
}
float4 decode_gamma_input(const float4 color, const float3 gamma)
{
if(assume_opaque_alpha)
{
return float4(pow(color.rgb, gamma), 1.0);
}
else
{
return float4(pow(color.rgb, gamma), color.a);
}
}
/////////////////////////// TEXTURE LOOKUP WRAPPERS //////////////////////////
// "SMART" LINEARIZING TEXTURE LOOKUP FUNCTIONS:
// Provide a wide array of linearizing texture lookup wrapper functions. The
// Cg shader spec Retroarch uses only allows for 2D textures, but 1D and 3D
// lookups are provided for completeness in case that changes someday. Nobody
// is likely to use the *fetch and *proj functions, but they're included just
// in case. The only tex*D texture sampling functions omitted are:
// - tex*Dcmpbias
// - tex*Dcmplod
// - tex*DARRAY*
// - tex*DMS*
// - Variants returning integers
// Standard line length restrictions are ignored below for vertical brevity.
/*
// tex1D:
float4 tex1D_linearize(const sampler1D tex, const float tex_coords)
{ return decode_input(tex1D(tex, tex_coords)); }
float4 tex1D_linearize(const sampler1D tex, const float2 tex_coords)
{ return decode_input(tex1D(tex, tex_coords)); }
float4 tex1D_linearize(const sampler1D tex, const float tex_coords, const int texel_off)
{ return decode_input(tex1D(tex, tex_coords, texel_off)); }
float4 tex1D_linearize(const sampler1D tex, const float2 tex_coords, const int texel_off)
{ return decode_input(tex1D(tex, tex_coords, texel_off)); }
float4 tex1D_linearize(const sampler1D tex, const float tex_coords, const float dx, const float dy)
{ return decode_input(tex1D(tex, tex_coords, dx, dy)); }
float4 tex1D_linearize(const sampler1D tex, const float2 tex_coords, const float dx, const float dy)
{ return decode_input(tex1D(tex, tex_coords, dx, dy)); }
float4 tex1D_linearize(const sampler1D tex, const float tex_coords, const float dx, const float dy, const int texel_off)
{ return decode_input(tex1D(tex, tex_coords, dx, dy, texel_off)); }
float4 tex1D_linearize(const sampler1D tex, const float2 tex_coords, const float dx, const float dy, const int texel_off)
{ return decode_input(tex1D(tex, tex_coords, dx, dy, texel_off)); }
// tex1Dbias:
float4 tex1Dbias_linearize(const sampler1D tex, const float4 tex_coords)
{ return decode_input(tex1Dbias(tex, tex_coords)); }
float4 tex1Dbias_linearize(const sampler1D tex, const float4 tex_coords, const int texel_off)
{ return decode_input(tex1Dbias(tex, tex_coords, texel_off)); }
// tex1Dfetch:
float4 tex1Dfetch_linearize(const sampler1D tex, const int4 tex_coords)
{ return decode_input(tex1Dfetch(tex, tex_coords)); }
float4 tex1Dfetch_linearize(const sampler1D tex, const int4 tex_coords, const int texel_off)
{ return decode_input(tex1Dfetch(tex, tex_coords, texel_off)); }
// tex1Dlod:
float4 tex1Dlod_linearize(const sampler1D tex, const float4 tex_coords)
{ return decode_input(tex1Dlod(tex, tex_coords)); }
float4 tex1Dlod_linearize(const sampler1D tex, const float4 tex_coords, const int texel_off)
{ return decode_input(tex1Dlod(tex, tex_coords, texel_off)); }
// tex1Dproj:
float4 tex1Dproj_linearize(const sampler1D tex, const float2 tex_coords)
{ return decode_input(tex1Dproj(tex, tex_coords)); }
float4 tex1Dproj_linearize(const sampler1D tex, const float3 tex_coords)
{ return decode_input(tex1Dproj(tex, tex_coords)); }
float4 tex1Dproj_linearize(const sampler1D tex, const float2 tex_coords, const int texel_off)
{ return decode_input(tex1Dproj(tex, tex_coords, texel_off)); }
float4 tex1Dproj_linearize(const sampler1D tex, const float3 tex_coords, const int texel_off)
{ return decode_input(tex1Dproj(tex, tex_coords, texel_off)); }
*/
// tex2D:
float4 tex2D_linearize(const sampler2D tex, const float2 tex_coords)
{ return decode_input(tex2D(tex, tex_coords)); }
float4 tex2D_linearize_first(const sampler2D tex, const float2 tex_coords)
{ return decode_input_first(tex2D(tex, tex_coords)); }
float4 tex2D_linearize(const sampler2D tex, const float3 tex_coords)
{ return decode_input(tex2D(tex, tex_coords.xy)); }
//float4 tex2D_linearize(const sampler2D tex, const float2 tex_coords, const int texel_off)
//{ return decode_input(tex2D(tex, tex_coords, texel_off)); }
//float4 tex2D_linearize(const sampler2D tex, const float3 tex_coords, const int texel_off)
//{ return decode_input(tex2D(tex, tex_coords.xy, texel_off)); }
/*
float4 tex2D_linearize(const sampler2D tex, const float2 tex_coords, const float2 dx, const float2 dy)
{ return decode_input(tex2D(tex, tex_coords, dx, dy)); }
float4 tex2D_linearize(const sampler2D tex, const float3 tex_coords, const float2 dx, const float2 dy)
{ return decode_input(tex2D(tex, tex_coords, dx, dy)); }
float4 tex2D_linearize(const sampler2D tex, const float2 tex_coords, const float2 dx, const float2 dy, const int texel_off)
{ return decode_input(tex2D(tex, tex_coords, dx, dy, texel_off)); }
float4 tex2D_linearize(const sampler2D tex, const float3 tex_coords, const float2 dx, const float2 dy, const int texel_off)
{ return decode_input(tex2D(tex, tex_coords, dx, dy, texel_off)); }
// tex2Dbias:
float4 tex2Dbias_linearize(const sampler2D tex, const float4 tex_coords)
{ return decode_input(tex2Dbias(tex, tex_coords)); }
float4 tex2Dbias_linearize(const sampler2D tex, const float4 tex_coords, const int texel_off)
{ return decode_input(tex2Dbias(tex, tex_coords, texel_off)); }
// tex2Dfetch:
float4 tex2Dfetch_linearize(const sampler2D tex, const int4 tex_coords)
{ return decode_input(tex2Dfetch(tex, tex_coords)); }
float4 tex2Dfetch_linearize(const sampler2D tex, const int4 tex_coords, const int texel_off)
{ return decode_input(tex2Dfetch(tex, tex_coords, texel_off)); }
*/
// tex2Dlod:
float4 tex2Dlod_linearize(const sampler2D tex, const float4 tex_coords)
{ return decode_input(tex2Dlod(tex, tex_coords)); }
//float4 tex2Dlod_linearize(const sampler2D tex, const float4 tex_coords, const int texel_off)
//{ return decode_input(tex2Dlod(tex, tex_coords, texel_off)); }
/*
// tex2Dproj:
float4 tex2Dproj_linearize(const sampler2D tex, const float3 tex_coords)
{ return decode_input(tex2Dproj(tex, tex_coords)); }
float4 tex2Dproj_linearize(const sampler2D tex, const float4 tex_coords)
{ return decode_input(tex2Dproj(tex, tex_coords)); }
float4 tex2Dproj_linearize(const sampler2D tex, const float3 tex_coords, const int texel_off)
{ return decode_input(tex2Dproj(tex, tex_coords, texel_off)); }
float4 tex2Dproj_linearize(const sampler2D tex, const float4 tex_coords, const int texel_off)
{ return decode_input(tex2Dproj(tex, tex_coords, texel_off)); }
// tex3D:
float4 tex3D_linearize(const sampler3D tex, const float3 tex_coords)
{ return decode_input(tex3D(tex, tex_coords)); }
float4 tex3D_linearize(const sampler3D tex, const float3 tex_coords, const int texel_off)
{ return decode_input(tex3D(tex, tex_coords, texel_off)); }
float4 tex3D_linearize(const sampler3D tex, const float3 tex_coords, const float3 dx, const float3 dy)
{ return decode_input(tex3D(tex, tex_coords, dx, dy)); }
float4 tex3D_linearize(const sampler3D tex, const float3 tex_coords, const float3 dx, const float3 dy, const int texel_off)
{ return decode_input(tex3D(tex, tex_coords, dx, dy, texel_off)); }
// tex3Dbias:
float4 tex3Dbias_linearize(const sampler3D tex, const float4 tex_coords)
{ return decode_input(tex3Dbias(tex, tex_coords)); }
float4 tex3Dbias_linearize(const sampler3D tex, const float4 tex_coords, const int texel_off)
{ return decode_input(tex3Dbias(tex, tex_coords, texel_off)); }
// tex3Dfetch:
float4 tex3Dfetch_linearize(const sampler3D tex, const int4 tex_coords)
{ return decode_input(tex3Dfetch(tex, tex_coords)); }
float4 tex3Dfetch_linearize(const sampler3D tex, const int4 tex_coords, const int texel_off)
{ return decode_input(tex3Dfetch(tex, tex_coords, texel_off)); }
// tex3Dlod:
float4 tex3Dlod_linearize(const sampler3D tex, const float4 tex_coords)
{ return decode_input(tex3Dlod(tex, tex_coords)); }
float4 tex3Dlod_linearize(const sampler3D tex, const float4 tex_coords, const int texel_off)
{ return decode_input(tex3Dlod(tex, tex_coords, texel_off)); }
// tex3Dproj:
float4 tex3Dproj_linearize(const sampler3D tex, const float4 tex_coords)
{ return decode_input(tex3Dproj(tex, tex_coords)); }
float4 tex3Dproj_linearize(const sampler3D tex, const float4 tex_coords, const int texel_off)
{ return decode_input(tex3Dproj(tex, tex_coords, texel_off)); }
// NONSTANDARD "SMART" LINEARIZING TEXTURE LOOKUP FUNCTIONS:
// This narrow selection of nonstandard tex2D* functions can be useful:
// tex2Dlod0: Automatically fill in the tex2D LOD parameter for mip level 0.
float4 tex2Dlod0_linearize(const sampler2D tex, const float2 tex_coords)
{ return decode_input(tex2Dlod(tex, float4(tex_coords, 0.0, 0.0))); }
float4 tex2Dlod0_linearize(const sampler2D tex, const float2 tex_coords, const int texel_off)
{ return decode_input(tex2Dlod(tex, float4(tex_coords, 0.0, 0.0), texel_off)); }
// MANUALLY LINEARIZING TEXTURE LOOKUP FUNCTIONS:
// Provide a narrower selection of tex2D* wrapper functions that decode an
// input sample with a specified gamma value. These are useful for reading
// LUT's and for reading the input of pass0 in a later pass.
// tex2D:
float4 tex2D_linearize_gamma(const sampler2D tex, const float2 tex_coords, const float3 gamma)
{ return decode_gamma_input(tex2D(tex, tex_coords), gamma); }
float4 tex2D_linearize_gamma(const sampler2D tex, const float3 tex_coords, const float3 gamma)
{ return decode_gamma_input(tex2D(tex, tex_coords), gamma); }
float4 tex2D_linearize_gamma(const sampler2D tex, const float2 tex_coords, const int texel_off, const float3 gamma)
{ return decode_gamma_input(tex2D(tex, tex_coords, texel_off), gamma); }
float4 tex2D_linearize_gamma(const sampler2D tex, const float3 tex_coords, const int texel_off, const float3 gamma)
{ return decode_gamma_input(tex2D(tex, tex_coords, texel_off), gamma); }
float4 tex2D_linearize_gamma(const sampler2D tex, const float2 tex_coords, const float2 dx, const float2 dy, const float3 gamma)
{ return decode_gamma_input(tex2D(tex, tex_coords, dx, dy), gamma); }
float4 tex2D_linearize_gamma(const sampler2D tex, const float3 tex_coords, const float2 dx, const float2 dy, const float3 gamma)
{ return decode_gamma_input(tex2D(tex, tex_coords, dx, dy), gamma); }
float4 tex2D_linearize_gamma(const sampler2D tex, const float2 tex_coords, const float2 dx, const float2 dy, const int texel_off, const float3 gamma)
{ return decode_gamma_input(tex2D(tex, tex_coords, dx, dy, texel_off), gamma); }
float4 tex2D_linearize_gamma(const sampler2D tex, const float3 tex_coords, const float2 dx, const float2 dy, const int texel_off, const float3 gamma)
{ return decode_gamma_input(tex2D(tex, tex_coords, dx, dy, texel_off), gamma); }
// tex2Dbias:
float4 tex2Dbias_linearize_gamma(const sampler2D tex, const float4 tex_coords, const float3 gamma)
{ return decode_gamma_input(tex2Dbias(tex, tex_coords), gamma); }
float4 tex2Dbias_linearize_gamma(const sampler2D tex, const float4 tex_coords, const int texel_off, const float3 gamma)
{ return decode_gamma_input(tex2Dbias(tex, tex_coords, texel_off), gamma); }
// tex2Dfetch:
float4 tex2Dfetch_linearize_gamma(const sampler2D tex, const int4 tex_coords, const float3 gamma)
{ return decode_gamma_input(tex2Dfetch(tex, tex_coords), gamma); }
float4 tex2Dfetch_linearize_gamma(const sampler2D tex, const int4 tex_coords, const int texel_off, const float3 gamma)
{ return decode_gamma_input(tex2Dfetch(tex, tex_coords, texel_off), gamma); }
*/
// tex2Dlod:
float4 tex2Dlod_linearize_gamma(const sampler2D tex, const float4 tex_coords, const float3 gamma)
{ return decode_gamma_input(tex2Dlod(tex, tex_coords), gamma); }
//float4 tex2Dlod_linearize_gamma(const sampler2D tex, const float4 tex_coords, const int texel_off, const float3 gamma)
//{ return decode_gamma_input(tex2Dlod(tex, tex_coords, texel_off), gamma); }
#endif // GAMMA_MANAGEMENT_H

View file

@ -1,76 +0,0 @@
#ifndef _HELPER_FUNCTIONS_AND_MACROS_H
#define _HELPER_FUNCTIONS_AND_MACROS_H
///////////////////////////////// MIT LICENSE ////////////////////////////////
// Copyright (C) 2020 Alex Gunter
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
float4 tex2D_nograd(sampler2D tex, float2 tex_coords)
{
return tex2Dlod(tex, float4(tex_coords, 0, 0), 0.0);
}
// ReShade 4 does not permit the use of functions or the ternary operator
// outside of a function definition. This is a problem for this port
// because the original crt-royale shader makes heavy use of these
// constructs at the root level.
// These preprocessor definitions are a workaround for this limitation.
// Note that they are strictly intended for defining complex global
// constants. I doubt they're more performant than the built-in
// equivalents, so I recommend using the built-ins whenever you can.
#define macro_sign(c) -((int) ((c) != 0)) * -((int) ((c) > 0))
#define macro_abs(c) (c) * macro_sign(c)
#define macro_min(c, d) (c) * ((int) ((c) <= (d))) + (d) * ((int) ((c) > (d)))
#define macro_max(c, d) (c) * ((int) ((c) >= (d))) + (d) * ((int) ((c) < (d)))
#define macro_clamp(c, l, u) macro_min(macro_max(c, l), u)
#define macro_ceil(c) (float) ((int) (c) + (int) (((int) (c)) < (c)))
#define macro_cond(c, a, b) float(c) * (a) + float(!(c)) * (b)
//////////////////////// COMMON MATHEMATICAL CONSTANTS ///////////////////////
static const float pi = 3.141592653589;
// We often want to find the location of the previous texel, e.g.:
// const float2 curr_texel = uv * texture_size;
// const float2 prev_texel = floor(curr_texel - float2(0.5)) + float2(0.5);
// const float2 prev_texel_uv = prev_texel / texture_size;
// However, many GPU drivers round incorrectly around exact texel locations.
// We need to subtract a little less than 0.5 before flooring, and some GPU's
// require this value to be farther from 0.5 than others; define it here.
// const float2 prev_texel =
// floor(curr_texel - float2(under_half)) + float2(0.5);
static const float under_half = 0.4995;
// Avoid dividing by zero; using a macro overloads for float, float2, etc.:
#define FIX_ZERO(c) (macro_max(macro_abs(c), 0.0000152587890625)) // 2^-16
// #define fmod(x, y) ((x) - (y) * floor((x)/(y) + FIX_ZERO(0.0)))
#define fmod(x, y) (frac((x) / (y)) * (y))
#endif // _HELPER_FUNCTIONS_AND_MACROS_H

View file

@ -1,676 +0,0 @@
#ifndef PHOSPHOR_MASK_RESIZING_H
#define PHOSPHOR_MASK_RESIZING_H
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
////////////////////////////////// INCLUDES //////////////////////////////////
#include "user-settings.fxh"
#include "derived-settings-and-constants.fxh"
///////////////////////////// CODEPATH SELECTION /////////////////////////////
// Choose a looping strategy based on what's allowed:
// Dynamic loops not allowed: Use a flat static loop.
// Dynamic loops accomodated: Coarsely branch around static loops.
// Dynamic loops assumed allowed: Use a flat dynamic loop.
#ifndef DRIVERS_ALLOW_DYNAMIC_BRANCHES
#ifdef ACCOMODATE_POSSIBLE_DYNAMIC_LOOPS
#define BREAK_LOOPS_INTO_PIECES
#else
#define USE_SINGLE_STATIC_LOOP
#endif
#endif // No else needed: Dynamic loops assumed.
////////////////////////////////// CONSTANTS /////////////////////////////////
// The larger the resized tile, the fewer samples we'll need for downsizing.
// See if we can get a static min tile size > mask_min_allowed_tile_size:
static const float mask_min_allowed_tile_size = macro_ceil(
mask_min_allowed_triad_size * mask_triads_per_tile);
static const float mask_min_expected_tile_size =
mask_min_allowed_tile_size;
// Limit the number of sinc resize taps by the maximum minification factor:
static const float pi_over_lobes = pi/mask_sinc_lobes;
static const float max_sinc_resize_samples_float = 2.0 * mask_sinc_lobes *
mask_resize_src_lut_size.x/mask_min_expected_tile_size;
// Vectorized loops sample in multiples of 4. Round up to be safe:
static const float max_sinc_resize_samples_m4 = macro_ceil(
max_sinc_resize_samples_float * 0.25) * 4.0;
///////////////////////// RESAMPLING FUNCTION HELPERS ////////////////////////
float get_dynamic_loop_size(const float magnification_scale)
{
// Requires: The following global constants must be defined:
// 1.) mask_sinc_lobes
// 2.) max_sinc_resize_samples_m4
// Returns: The minimum number of texture samples for a correct downsize
// at magnification_scale.
// We're downsizing, so the filter is sized across 2*lobes output pixels
// (not 2*lobes input texels). This impacts distance measurements and the
// minimum number of input samples needed.
const float min_samples_float = 2.0 * mask_sinc_lobes / magnification_scale;
const float min_samples_m4 = ceil(min_samples_float * 0.25) * 4.0;
#ifdef DRIVERS_ALLOW_DYNAMIC_BRANCHES
const float max_samples_m4 = max_sinc_resize_samples_m4;
#else // ifdef BREAK_LOOPS_INTO_PIECES
// Simulating loops with branches imposes a 128-sample limit.
const float max_samples_m4 = min(128.0, max_sinc_resize_samples_m4);
#endif
return min(min_samples_m4, max_samples_m4);
}
float2 get_first_texel_tile_uv_and_dist(const float2 tex_uv,
const float2 texture_size, const float dr,
const float input_tiles_per_texture_r, const float samples,
const bool vertical)
{
// Requires: 1.) dr == du == 1.0/texture_size.x or
// dr == dv == 1.0/texture_size.y
// (whichever direction we're resampling in).
// It's a scalar to save register space.
// 2.) input_tiles_per_texture_r is the number of input tiles
// that can fit in the input texture in the direction we're
// resampling this pass.
// 3.) vertical indicates whether we're resampling vertically
// this pass (or horizontally).
// Returns: Pack and return the first sample's tile_uv coord in [0, 1]
// and its texel distance from the destination pixel, in the
// resized dimension only.
// We'll start with the topmost or leftmost sample and work down or right,
// so get the first sample location and distance. Modify both dimensions
// as if we're doing a one-pass 2D resize; we'll throw away the unneeded
// (and incorrect) dimension at the end.
const float2 curr_texel = tex_uv * texture_size;
const float2 prev_texel = floor(curr_texel - under_half.xx) + 0.5.xx;
const float2 first_texel = prev_texel - float2(samples.xx/2.0.xx - 1.0.xx);
const float2 first_texel_uv_wrap_2D = first_texel * dr;
const float2 first_texel_dist_2D = curr_texel - first_texel;
// Convert from tex_uv to tile_uv coords so we can sub fracs for fmods.
const float2 first_texel_tile_uv_wrap_2D =
first_texel_uv_wrap_2D * input_tiles_per_texture_r;
// Project wrapped coordinates to the [0, 1] range. We'll do this with all
// samples,but the first texel is special, since it might be negative.
const float2 coord_negative =
float2(first_texel_tile_uv_wrap_2D < 0.0.xx);
const float2 first_texel_tile_uv_2D =
frac(first_texel_tile_uv_wrap_2D) + coord_negative;
// Pack the first texel's tile_uv coord and texel distance in 1D:
const float2 tile_u_and_dist =
float2(first_texel_tile_uv_2D.x, first_texel_dist_2D.x);
const float2 tile_v_and_dist =
float2(first_texel_tile_uv_2D.y, first_texel_dist_2D.y);
return vertical ? tile_v_and_dist : tile_u_and_dist;
//return lerp(tile_u_and_dist, tile_v_and_dist, float(vertical));
}
float4 tex2Dlod0try(const sampler2D tex, const float2 tex_uv)
{
// Mipmapping and anisotropic filtering get confused by sinc-resampling.
// One [slow] workaround is to select the lowest mip level:
#ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD
return tex2Dlod(tex, float4(tex_uv, 0.0, 0.0));
#else
#ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS
return tex2Dbias(tex, float4(tex_uv, 0.0, -16.0));
#else
return tex2D(tex, tex_uv);
#endif
#endif
}
////////////////////////////// LOOP BODY MACROS //////////////////////////////
// Using functions can exceed the temporary register limit, so we're
// stuck with #define macros (I'm TRULY sorry). They're declared here instead
// of above to be closer to the actual invocation sites. Steps:
// 1.) Get the exact texel location.
// 2.) Sample the phosphor mask (already assumed encoded in linear RGB).
// 3.) Get the distance from the current pixel and sinc weight:
// sinc(dist) = sin(pi * dist)/(pi * dist)
// We can also use the slower/smoother Lanczos instead:
// L(x) = sinc(dist) * sinc(dist / lobes)
// 4.) Accumulate the weight sum in weights, and accumulate the weighted texels
// in pixel_color (we'll normalize outside the loop at the end).
// We vectorize the loop to help reduce the Lanczos window's cost.
// The r coord is the coord in the dimension we're resizing along (u or v),
// and first_texel_tile_uv_rrrr is a float4 of the first texel's u or v
// tile_uv coord in [0, 1]. tex_uv_r will contain the tile_uv u or v coord
// for four new texel samples.
#define CALCULATE_R_COORD_FOR_4_SAMPLES \
const float4 true_i = float4(i_base + i,i_base + i,i_base + i,i_base + i) + float4(0.0, 1.0, 2.0, 3.0); \
const float4 tile_uv_r = frac( \
first_texel_tile_uv_rrrr + true_i * tile_dr); \
const float4 tex_uv_r = tile_uv_r * tile_size_uv_r;
#ifdef PHOSPHOR_MASK_RESIZE_LANCZOS_WINDOW
#define CALCULATE_SINC_RESAMPLE_WEIGHTS \
const float4 pi_dist_over_lobes = pi_over_lobes * dist; \
const float4 weights = min(sin(pi_dist) * sin(pi_dist_over_lobes) /\
(pi_dist*pi_dist_over_lobes), 1.0.xxxx);
#else
#define CALCULATE_SINC_RESAMPLE_WEIGHTS \
const float4 weights = min(sin(pi_dist)/pi_dist, 1.0.xxxx);
#endif
#define UPDATE_COLOR_AND_WEIGHT_SUMS \
const float4 dist = magnification_scale * \
abs(first_dist_unscaled - true_i); \
const float4 pi_dist = pi * dist; \
CALCULATE_SINC_RESAMPLE_WEIGHTS; \
pixel_color += new_sample0 * weights.xxx; \
pixel_color += new_sample1 * weights.yyy; \
pixel_color += new_sample2 * weights.zzz; \
pixel_color += new_sample3 * weights.www; \
weight_sum += weights;
#define VERTICAL_SINC_RESAMPLE_LOOP_BODY \
CALCULATE_R_COORD_FOR_4_SAMPLES; \
const float3 new_sample0 = tex2Dlod0try(tex, \
float2(tex_uv.x, tex_uv_r.x)).rgb; \
const float3 new_sample1 = tex2Dlod0try(tex, \
float2(tex_uv.x, tex_uv_r.y)).rgb; \
const float3 new_sample2 = tex2Dlod0try(tex, \
float2(tex_uv.x, tex_uv_r.z)).rgb; \
const float3 new_sample3 = tex2Dlod0try(tex, \
float2(tex_uv.x, tex_uv_r.w)).rgb; \
UPDATE_COLOR_AND_WEIGHT_SUMS;
#define HORIZONTAL_SINC_RESAMPLE_LOOP_BODY \
CALCULATE_R_COORD_FOR_4_SAMPLES; \
const float3 new_sample0 = tex2Dlod0try(tex, \
float2(tex_uv_r.x, tex_uv.y)).rgb; \
const float3 new_sample1 = tex2Dlod0try(tex, \
float2(tex_uv_r.y, tex_uv.y)).rgb; \
const float3 new_sample2 = tex2Dlod0try(tex, \
float2(tex_uv_r.z, tex_uv.y)).rgb; \
const float3 new_sample3 = tex2Dlod0try(tex, \
float2(tex_uv_r.w, tex_uv.y)).rgb; \
UPDATE_COLOR_AND_WEIGHT_SUMS;
//////////////////////////// RESAMPLING FUNCTIONS ////////////////////////////
float3 downsample_vertical_sinc_tiled(const sampler2D tex,
const float2 tex_uv, const float2 texture_size, const float dr,
const float magnification_scale, const float tile_size_uv_r)
{
// Requires: 1.) dr == du == 1.0/texture_size.x or
// dr == dv == 1.0/texture_size.y
// (whichever direction we're resampling in).
// It's a scalar to save register space.
// 2.) tile_size_uv_r is the number of texels an input tile
// takes up in the input texture, in the direction we're
// resampling this pass.
// 3.) magnification_scale must be <= 1.0.
// Returns: Return a [Lanczos] sinc-resampled pixel of a vertically
// downsized input tile embedded in an input texture. (The
// vertical version is special-cased though: It assumes the
// tile size equals the [static] texture size, since it's used
// on an LUT texture input containing one tile. For more
// generic use, eliminate the "static" in the parameters.)
// The "r" in "dr," "tile_size_uv_r," etc. refers to the dimension
// we're resizing along, e.g. "dy" in this case.
#ifdef USE_SINGLE_STATIC_LOOP
// A static loop can be faster, but it might blur too much from using
// more samples than it should.
static const int samples = int(max_sinc_resize_samples_m4);
#else
const int samples = int(get_dynamic_loop_size(magnification_scale));
#endif
// Get the first sample location (scalar tile uv coord along the resized
// dimension) and distance from the output location (in texels):
static const float input_tiles_per_texture_r = 1.0/tile_size_uv_r;
// true = vertical resize:
const float2 first_texel_tile_r_and_dist = get_first_texel_tile_uv_and_dist(
tex_uv, texture_size, dr, input_tiles_per_texture_r, samples, true);
const float4 first_texel_tile_uv_rrrr = first_texel_tile_r_and_dist.xxxx;
const float4 first_dist_unscaled = first_texel_tile_r_and_dist.yyyy;
// Get the tile sample offset:
static const float tile_dr = dr * input_tiles_per_texture_r;
// Sum up each weight and weighted sample color, varying the looping
// strategy based on our expected dynamic loop capabilities. See the
// loop body macros above.
int i_base = 0;
float4 weight_sum = 0.0.xxxx;
float3 pixel_color = 0.0.xxx;
static const int i_step = 4;
#ifdef BREAK_LOOPS_INTO_PIECES
if(samples - i_base >= 64)
{
for(int i = 0; i < 64; i += i_step)
{
VERTICAL_SINC_RESAMPLE_LOOP_BODY;
}
i_base += 64;
}
if(samples - i_base >= 32)
{
for(int i = 0; i < 32; i += i_step)
{
VERTICAL_SINC_RESAMPLE_LOOP_BODY;
}
i_base += 32;
}
if(samples - i_base >= 16)
{
for(int i = 0; i < 16; i += i_step)
{
VERTICAL_SINC_RESAMPLE_LOOP_BODY;
}
i_base += 16;
}
if(samples - i_base >= 8)
{
for(int i = 0; i < 8; i += i_step)
{
VERTICAL_SINC_RESAMPLE_LOOP_BODY;
}
i_base += 8;
}
if(samples - i_base >= 4)
{
for(int i = 0; i < 4; i += i_step)
{
VERTICAL_SINC_RESAMPLE_LOOP_BODY;
}
i_base += 4;
}
// Do another 4-sample block for a total of 128 max samples.
if(samples - i_base > 0)
{
for(int i = 0; i < 4; i += i_step)
{
VERTICAL_SINC_RESAMPLE_LOOP_BODY;
}
}
#else
for(int i = 0; i < samples; i += i_step)
{
VERTICAL_SINC_RESAMPLE_LOOP_BODY;
}
#endif
// Normalize so the weight_sum == 1.0, and return:
const float2 weight_sum_reduce = weight_sum.xy + weight_sum.zw;
const float3 scalar_weight_sum = float3(weight_sum_reduce.xxx +
weight_sum_reduce.yyy);
return (pixel_color/scalar_weight_sum);
}
float3 downsample_horizontal_sinc_tiled(const sampler2D tex,
const float2 tex_uv, const float2 texture_size, const float dr,
const float magnification_scale, const float tile_size_uv_r)
{
// Differences from downsample_horizontal_sinc_tiled:
// 1.) The dr and tile_size_uv_r parameters are not static consts.
// 2.) The "vertical" parameter to get_first_texel_tile_uv_and_dist is
// set to false instead of true.
// 3.) The horizontal version of the loop body is used.
// TODO: If we can get guaranteed compile-time dead code elimination,
// we can combine the vertical/horizontal downsampling functions by:
// 1.) Add an extra static const bool parameter called "vertical."
// 2.) Supply it with the result of get_first_texel_tile_uv_and_dist().
// 3.) Use a conditional assignment in the loop body macro. This is the
// tricky part: We DO NOT want to incur the extra conditional
// assignment in the inner loop at runtime!
// The "r" in "dr," "tile_size_uv_r," etc. refers to the dimension
// we're resizing along, e.g. "dx" in this case.
#ifdef USE_SINGLE_STATIC_LOOP
// If we have to load all samples, we might as well use them.
static const int samples = int(max_sinc_resize_samples_m4);
#else
const int samples = int(get_dynamic_loop_size(magnification_scale));
#endif
// Get the first sample location (scalar tile uv coord along resized
// dimension) and distance from the output location (in texels):
const float input_tiles_per_texture_r = 1.0/tile_size_uv_r;
// false = horizontal resize:
const float2 first_texel_tile_r_and_dist = get_first_texel_tile_uv_and_dist(
tex_uv, texture_size, dr, input_tiles_per_texture_r, samples, false);
const float4 first_texel_tile_uv_rrrr = first_texel_tile_r_and_dist.xxxx;
const float4 first_dist_unscaled = first_texel_tile_r_and_dist.yyyy;
// Get the tile sample offset:
const float tile_dr = dr * input_tiles_per_texture_r;
// Sum up each weight and weighted sample color, varying the looping
// strategy based on our expected dynamic loop capabilities. See the
// loop body macros above.
int i_base = 0;
float4 weight_sum = 0.0.xxxx;
float3 pixel_color = 0.0.xxx;
static const int i_step = 4;
#ifdef BREAK_LOOPS_INTO_PIECES
if(samples - i_base >= 64)
{
for(int i = 0; i < 64; i += i_step)
{
HORIZONTAL_SINC_RESAMPLE_LOOP_BODY;
}
i_base += 64;
}
if(samples - i_base >= 32)
{
for(int i = 0; i < 32; i += i_step)
{
HORIZONTAL_SINC_RESAMPLE_LOOP_BODY;
}
i_base += 32;
}
if(samples - i_base >= 16)
{
for(int i = 0; i < 16; i += i_step)
{
HORIZONTAL_SINC_RESAMPLE_LOOP_BODY;
}
i_base += 16;
}
if(samples - i_base >= 8)
{
for(int i = 0; i < 8; i += i_step)
{
HORIZONTAL_SINC_RESAMPLE_LOOP_BODY;
}
i_base += 8;
}
if(samples - i_base >= 4)
{
for(int i = 0; i < 4; i += i_step)
{
HORIZONTAL_SINC_RESAMPLE_LOOP_BODY;
}
i_base += 4;
}
// Do another 4-sample block for a total of 128 max samples.
if(samples - i_base > 0)
{
for(int i = 0; i < 4; i += i_step)
{
HORIZONTAL_SINC_RESAMPLE_LOOP_BODY;
}
}
#else
for(int i = 0; i < samples; i += i_step)
{
HORIZONTAL_SINC_RESAMPLE_LOOP_BODY;
}
#endif
// Normalize so the weight_sum == 1.0, and return:
const float2 weight_sum_reduce = weight_sum.xy + weight_sum.zw;
const float3 scalar_weight_sum = float3(weight_sum_reduce.xxx +
weight_sum_reduce.yyy);
return (pixel_color/scalar_weight_sum);
}
//////////////////////////// TILE SIZE CALCULATION ///////////////////////////
float2 get_resized_mask_tile_size(const float2 estimated_viewport_size,
const float2 estimated_mask_resize_output_size,
const bool solemnly_swear_same_inputs_for_every_pass)
{
// Requires: The following global constants must be defined according to
// certain constraints:
// 1.) mask_resize_num_triads: Must be high enough that our
// mask sampling method won't have artifacts later
// (long story; see derived-settings-and-constants.h)
// 2.) mask_resize_src_lut_size: Texel size of our mask LUT
// 3.) mask_triads_per_tile: Num horizontal triads in our LUT
// 4.) mask_min_allowed_triad_size: User setting (the more
// restrictive it is, the faster the resize will go)
// 5.) mask_min_allowed_tile_size_x < mask_resize_src_lut_size.x
// 6.) mask_triad_size_desired_{runtime, static}
// 7.) mask_num_triads_desired_{runtime, static}
// 8.) mask_specify_num_triads must be 0.0/1.0 (false/true)
// The function parameters must be defined as follows:
// 1.) estimated_viewport_size == (final viewport size);
// If mask_specify_num_triads is 1.0/true and the viewport
// estimate is wrong, the number of triads will differ from
// the user's preference by about the same factor.
// 2.) estimated_mask_resize_output_size: Must equal the
// output size of the MASK_RESIZE pass.
// Exception: The x component may be estimated garbage if
// and only if the caller throws away the x result.
// 3.) solemnly_swear_same_inputs_for_every_pass: Set to false,
// unless you can guarantee that every call across every
// pass will use the same sizes for the other parameters.
// When calling this across multiple passes, always use the
// same y viewport size/scale, and always use the same x
// viewport size/scale when using the x result.
// Returns: Return the final size of a manually resized mask tile, after
// constraining the desired size to avoid artifacts. Under
// unusual circumstances, tiles may become stretched vertically
// (see wall of text below).
// Stated tile properties must be correct:
static const float tile_aspect_ratio_inv =
mask_resize_src_lut_size.y/mask_resize_src_lut_size.x;
static const float tile_aspect_ratio = 1.0/tile_aspect_ratio_inv;
static const float2 tile_aspect = float2(1.0, tile_aspect_ratio_inv);
// If mask_specify_num_triads is 1.0/true and estimated_viewport_size.x is
// wrong, the user preference will be misinterpreted:
const float desired_tile_size_x = mask_triads_per_tile * lerp(
mask_triad_size_desired,
estimated_viewport_size.x / mask_num_triads_desired,
mask_specify_num_triads);
if(get_mask_sample_mode() > 0.5)
{
// We don't need constraints unless we're sampling MASK_RESIZE.
return desired_tile_size_x * tile_aspect;
}
// Make sure we're not upsizing:
const float temp_tile_size_x =
min(desired_tile_size_x, mask_resize_src_lut_size.x);
// Enforce min_tile_size and max_tile_size in both dimensions:
const float2 temp_tile_size = temp_tile_size_x * tile_aspect;
static const float2 min_tile_size =
mask_min_allowed_tile_size * tile_aspect;
const float2 max_tile_size =
estimated_mask_resize_output_size / mask_resize_num_tiles;
const float2 clamped_tile_size =
clamp(temp_tile_size, min_tile_size, max_tile_size);
// Try to maintain tile_aspect_ratio. This is the tricky part:
// If we're currently resizing in the y dimension, the x components
// could be MEANINGLESS. (If estimated_mask_resize_output_size.x is
// bogus, then so is max_tile_size.x and clamped_tile_size.x.)
// We can't adjust the y size based on clamped_tile_size.x. If it
// clamps when it shouldn't, it won't clamp again when later passes
// call this function with the correct sizes, and the discrepancy will
// break the sampling coords in MASKED_SCANLINES. Instead, we'll limit
// the x size based on the y size, but not vice versa, unless the
// caller swears the parameters were the same (correct) in every pass.
// As a result, triads could appear vertically stretched if:
// a.) mask_resize_src_lut_size.x > mask_resize_src_lut_size.y: Wide
// LUT's might clamp x more than y (all provided LUT's are square)
// b.) true_viewport_size.x < true_viewport_size.y: The user is playing
// with a vertically oriented screen (not accounted for anyway)
// c.) mask_resize_viewport_scale.x < masked_resize_viewport_scale.y:
// Viewport scales are equal by default.
// If any of these are the case, you can fix the stretching by setting:
// mask_resize_viewport_scale.x = mask_resize_viewport_scale.y *
// (1.0 / min_expected_aspect_ratio) *
// (mask_resize_src_lut_size.x / mask_resize_src_lut_size.y)
const float x_tile_size_from_y =
clamped_tile_size.y * tile_aspect_ratio;
const float y_tile_size_from_x = lerp(clamped_tile_size.y,
clamped_tile_size.x * tile_aspect_ratio_inv,
float(solemnly_swear_same_inputs_for_every_pass));
const float2 reclamped_tile_size = float2(
min(clamped_tile_size.x, x_tile_size_from_y),
min(clamped_tile_size.y, y_tile_size_from_x));
// We need integer tile sizes in both directions for tiled sampling to
// work correctly. Use floor (to make sure we don't round up), but be
// careful to avoid a rounding bug where floor decreases whole numbers:
const float2 final_resized_tile_size =
floor(reclamped_tile_size + float2(FIX_ZERO(0.0),FIX_ZERO(0.0)));
return final_resized_tile_size;
}
///////////////////////// FINAL MASK SAMPLING HELPERS ////////////////////////
float4 get_mask_sampling_parameters(const float2 mask_resize_texture_size,
const float2 mask_resize_video_size, const float2 true_viewport_size,
out float2 mask_tiles_per_screen)
{
// Requires: 1.) Requirements of get_resized_mask_tile_size() must be
// met, particularly regarding global constants.
// The function parameters must be defined as follows:
// 1.) mask_resize_texture_size == MASK_RESIZE.texture_size
// if get_mask_sample_mode() is 0 (otherwise anything)
// 2.) mask_resize_video_size == MASK_RESIZE.video_size
// if get_mask_sample_mode() is 0 (otherwise anything)
// 3.) true_viewport_size == IN.output_size for a pass set to
// 1.0 viewport scale (i.e. it must be correct)
// Returns: Return a float4 containing:
// xy: tex_uv coords for the start of the mask tile
// zw: tex_uv size of the mask tile from start to end
// mask_tiles_per_screen is an out parameter containing the
// number of mask tiles that will fit on the screen.
// First get the final resized tile size. The viewport size and mask
// resize viewport scale must be correct, but don't solemnly swear they
// were correct in both mask resize passes unless you know it's true.
// (We can better ensure a correct tile aspect ratio if the parameters are
// guaranteed correct in all passes...but if we lie, we'll get inconsistent
// sizes across passes, resulting in broken texture coordinates.)
const float mask_sample_mode = get_mask_sample_mode();
const float2 mask_resize_tile_size = get_resized_mask_tile_size(
true_viewport_size, mask_resize_video_size, false);
if(mask_sample_mode < 0.5)
{
// Sample MASK_RESIZE: The resized tile is a fraction of the texture
// size and starts at a nonzero offset to allow for border texels:
const float2 mask_tile_uv_size = mask_resize_tile_size /
mask_resize_texture_size;
const float2 skipped_tiles = mask_start_texels/mask_resize_tile_size;
const float2 mask_tile_start_uv = skipped_tiles * mask_tile_uv_size;
// mask_tiles_per_screen must be based on the *true* viewport size:
mask_tiles_per_screen = true_viewport_size / mask_resize_tile_size;
return float4(mask_tile_start_uv, mask_tile_uv_size);
}
else
{
// If we're tiling at the original size (1:1 pixel:texel), redefine a
// "tile" to be the full texture containing many triads. Otherwise,
// we're hardware-resampling an LUT, and the texture truly contains a
// single unresized phosphor mask tile anyway.
static const float2 mask_tile_uv_size = 1.0.xx;
static const float2 mask_tile_start_uv = 0.0.xx;
if(mask_sample_mode > 1.5)
{
// Repeat the full LUT at a 1:1 pixel:texel ratio without resizing:
mask_tiles_per_screen = true_viewport_size/mask_texture_large_size;
}
else
{
// Hardware-resize the original LUT:
mask_tiles_per_screen = true_viewport_size / mask_resize_tile_size;
}
return float4(mask_tile_start_uv, mask_tile_uv_size);
}
}
float2 fix_tiling_discontinuities_normalized(const float2 tile_uv,
float2 duv_dx, float2 duv_dy)
{
// Requires: 1.) duv_dx == ddx(tile_uv)
// 2.) duv_dy == ddy(tile_uv)
// 3.) tile_uv contains tile-relative uv coords in [0, 1],
// such that (0.5, 0.5) is the center of a tile, etc.
// ("Tile" can mean texture, the video embedded in the
// texture, or some other "tile" embedded in a texture.)
// Returns: Return new tile_uv coords that contain no discontinuities
// across a 2x2 pixel quad.
// Description:
// When uv coords wrap from 1.0 to 0.0, they create a discontinuity in the
// derivatives, which we assume happened if the absolute difference between
// any fragment in a 2x2 block is > ~half a tile. If the current block has
// a u or v discontinuity and the current fragment is in the first half of
// the tile along that axis (i.e. it wrapped from 1.0 to 0.0), add a tile
// to that coord to make the 2x2 block continuous. (It will now have a
// coord > 1.0 in the padding area beyond the tile.) This function takes
// derivatives as parameters so the caller can reuse them.
// In case we're using high-quality (nVidia-style) derivatives, ensure
// diagonically opposite fragments see each other for correctness:
duv_dx = abs(duv_dx) + abs(ddy(duv_dx));
duv_dy = abs(duv_dy) + abs(ddx(duv_dy));
const float2 pixel_in_first_half_tile = float2(tile_uv < 0.5.xx);
const float2 jump_exists = float2(duv_dx + duv_dy > 0.5.xx);
return tile_uv + jump_exists * pixel_in_first_half_tile;
}
float2 convert_phosphor_tile_uv_wrap_to_tex_uv(const float2 tile_uv_wrap,
const float4 mask_tile_start_uv_and_size)
{
// Requires: 1.) tile_uv_wrap contains tile-relative uv coords, where the
// tile spans from [0, 1], such that (0.5, 0.5) is at the
// tile center. The input coords can range from [0, inf],
// and their fractional parts map to a repeated tile.
// ("Tile" can mean texture, the video embedded in the
// texture, or some other "tile" embedded in a texture.)
// 2.) mask_tile_start_uv_and_size.xy contains tex_uv coords
// for the start of the embedded tile in the full texture.
// 3.) mask_tile_start_uv_and_size.zw contains the [fractional]
// tex_uv size of the embedded tile in the full texture.
// Returns: Return tex_uv coords (used for texture sampling)
// corresponding to tile_uv_wrap.
if(get_mask_sample_mode() < 0.5)
{
// Manually repeat the resized mask tile to fill the screen:
// First get fractional tile_uv coords. Using frac/fmod on coords
// confuses anisotropic filtering; fix it as user options dictate.
// derived-settings-and-constants.h disables incompatible options.
#ifdef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE
float2 tile_uv = frac(tile_uv_wrap * 0.5) * 2.0;
#else
float2 tile_uv = frac(tile_uv_wrap);
#endif
#ifdef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
const float2 tile_uv_dx = ddx(tile_uv);
const float2 tile_uv_dy = ddy(tile_uv);
tile_uv = fix_tiling_discontinuities_normalized(tile_uv,
tile_uv_dx, tile_uv_dy);
#endif
// The tile is embedded in a padded FBO, and it may start at a
// nonzero offset if border texels are used to avoid artifacts:
const float2 mask_tex_uv = mask_tile_start_uv_and_size.xy +
tile_uv * mask_tile_start_uv_and_size.zw;
return mask_tex_uv;
}
else
{
// Sample from the input phosphor mask texture with hardware tiling.
// If we're tiling at the original size (mode 2), the "tile" is the
// whole texture, and it contains a large number of triads mapped with
// a 1:1 pixel:texel ratio. OTHERWISE, the texture contains a single
// unresized tile. tile_uv_wrap already has correct coords for both!
return tile_uv_wrap;
}
}
#endif // PHOSPHOR_MASK_RESIZING_H

View file

@ -1,243 +0,0 @@
#ifndef QUAD_PIXEL_COMMUNICATION_H
#define QUAD_PIXEL_COMMUNICATION_H
///////////////////////////////// MIT LICENSE ////////////////////////////////
// Copyright (C) 2014 TroggleMonkey*
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
///////////////////////////////// DISCLAIMER /////////////////////////////////
// *This code was inspired by "Shader Amortization using Pixel Quad Message
// Passing" by Eric Penner, published in GPU Pro 2, Chapter VI.2. My intent
// is not to plagiarize his fundamentally similar code and assert my own
// copyright, but the algorithmic helper functions require so little code that
// implementations can't vary by much except bugfixes and conventions. I just
// wanted to license my own particular code here to avoid ambiguity and make it
// clear that as far as I'm concerned, people can do as they please with it.
///////////////////////////////// DESCRIPTION ////////////////////////////////
// Given screen pixel numbers, derive a "quad vector" describing a fragment's
// position in its 2x2 pixel quad. Given that vector, obtain the values of any
// variable at neighboring fragments.
// Requires: Using this file in general requires:
// 1.) ddx() and ddy() are present in the current Cg profile.
// 2.) The GPU driver is using fine/high-quality derivatives.
// Functions will give incorrect results if this is not true,
// so a test function is included.
///////////////////// QUAD-PIXEL COMMUNICATION PRIMITIVES ////////////////////
float4 get_quad_vector_naive(const float4 output_pixel_num_wrt_uvxy)
{
// Requires: Two measures of the current fragment's output pixel number
// in the range ([0, IN.output_size.x), [0, IN.output_size.y)):
// 1.) output_pixel_num_wrt_uvxy.xy increase with uv coords.
// 2.) output_pixel_num_wrt_uvxy.zw increase with screen xy.
// Returns: Two measures of the fragment's position in its 2x2 quad:
// 1.) The .xy components are its 2x2 placement with respect to
// uv direction (the origin (0, 0) is at the top-left):
// top-left = (-1.0, -1.0) top-right = ( 1.0, -1.0)
// bottom-left = (-1.0, 1.0) bottom-right = ( 1.0, 1.0)
// You need this to arrange/weight shared texture samples.
// 2.) The .zw components are its 2x2 placement with respect to
// screen xy direction (IN.position); the origin varies.
// quad_gather needs this measure to work correctly.
// Note: quad_vector.zw = quad_vector.xy * float2(
// ddx(output_pixel_num_wrt_uvxy.x),
// ddy(output_pixel_num_wrt_uvxy.y));
// Caveats: This function assumes the GPU driver always starts 2x2 pixel
// quads at even pixel numbers. This assumption can be wrong
// for odd output resolutions (nondeterministically so).
const float4 pixel_odd = frac(output_pixel_num_wrt_uvxy * 0.5) * 2.0;
const float4 quad_vector = pixel_odd * 2.0 - 1.0.xxxx;
return quad_vector;
}
float4 get_quad_vector(const float4 output_pixel_num_wrt_uvxy)
{
// Requires: Same as get_quad_vector_naive() (see that first).
// Returns: Same as get_quad_vector_naive() (see that first), but it's
// correct even if the 2x2 pixel quad starts at an odd pixel,
// which can occur at odd resolutions.
const float4 quad_vector_guess =
get_quad_vector_naive(output_pixel_num_wrt_uvxy);
// If quad_vector_guess.zw doesn't increase with screen xy, we know
// the 2x2 pixel quad starts at an odd pixel:
const float2 odd_start_mirror = 0.5 * float2(ddx(quad_vector_guess.z),
ddy(quad_vector_guess.w));
return quad_vector_guess * odd_start_mirror.xyxy;
}
float4 get_quad_vector(const float2 output_pixel_num_wrt_uv)
{
// Requires: 1.) ddx() and ddy() are present in the current Cg profile.
// 2.) output_pixel_num_wrt_uv must increase with uv coords and
// measure the current fragment's output pixel number in:
// ([0, IN.output_size.x), [0, IN.output_size.y))
// Returns: Same as get_quad_vector_naive() (see that first), but it's
// correct even if the 2x2 pixel quad starts at an odd pixel,
// which can occur at odd resolutions.
// Caveats: This function requires less information than the version
// taking a float4, but it's potentially slower.
// Do screen coords increase with or against uv? Get the direction
// with respect to (uv.x, uv.y) for (screen.x, screen.y) in {-1, 1}.
const float2 screen_uv_mirror = float2(ddx(output_pixel_num_wrt_uv.x),
ddy(output_pixel_num_wrt_uv.y));
const float2 pixel_odd_wrt_uv = frac(output_pixel_num_wrt_uv * 0.5) * 2.0;
const float2 quad_vector_uv_guess = (pixel_odd_wrt_uv - 0.5.xx) * 2.0;
const float2 quad_vector_screen_guess = quad_vector_uv_guess * screen_uv_mirror;
// If quad_vector_screen_guess doesn't increase with screen xy, we know
// the 2x2 pixel quad starts at an odd pixel:
const float2 odd_start_mirror = 0.5 * float2(ddx(quad_vector_screen_guess.x),
ddy(quad_vector_screen_guess.y));
const float4 quad_vector_guess = float4(
quad_vector_uv_guess, quad_vector_screen_guess);
return quad_vector_guess * odd_start_mirror.xyxy;
}
void quad_gather(const float4 quad_vector, const float4 curr,
out float4 adjx, out float4 adjy, out float4 diag)
{
// Requires: 1.) ddx() and ddy() are present in the current Cg profile.
// 2.) The GPU driver is using fine/high-quality derivatives.
// 3.) quad_vector describes the current fragment's location in
// its 2x2 pixel quad using get_quad_vector()'s conventions.
// 4.) curr is any vector you wish to get neighboring values of.
// Returns: Values of an input vector (curr) at neighboring fragments
// adjacent x, adjacent y, and diagonal (via out parameters).
adjx = curr - ddx(curr) * quad_vector.z;
adjy = curr - ddy(curr) * quad_vector.w;
diag = adjx - ddy(adjx) * quad_vector.w;
}
void quad_gather(const float4 quad_vector, const float3 curr,
out float3 adjx, out float3 adjy, out float3 diag)
{
// Float3 version
adjx = curr - ddx(curr) * quad_vector.z;
adjy = curr - ddy(curr) * quad_vector.w;
diag = adjx - ddy(adjx) * quad_vector.w;
}
void quad_gather(const float4 quad_vector, const float2 curr,
out float2 adjx, out float2 adjy, out float2 diag)
{
// Float2 version
adjx = curr - ddx(curr) * quad_vector.z;
adjy = curr - ddy(curr) * quad_vector.w;
diag = adjx - ddy(adjx) * quad_vector.w;
}
float4 quad_gather(const float4 quad_vector, const float curr)
{
// Float version:
// Returns: return.x == current
// return.y == adjacent x
// return.z == adjacent y
// return.w == diagonal
float4 all = curr.xxxx;
all.y = all.x - ddx(all.x) * quad_vector.z;
all.zw = all.xy - ddy(all.xy) * quad_vector.w;
return all;
}
float4 quad_gather_sum(const float4 quad_vector, const float4 curr)
{
// Requires: Same as quad_gather()
// Returns: Sum of an input vector (curr) at all fragments in a quad.
float4 adjx, adjy, diag;
quad_gather(quad_vector, curr, adjx, adjy, diag);
return (curr + adjx + adjy + diag);
}
float3 quad_gather_sum(const float4 quad_vector, const float3 curr)
{
// Float3 version:
float3 adjx, adjy, diag;
quad_gather(quad_vector, curr, adjx, adjy, diag);
return (curr + adjx + adjy + diag);
}
float2 quad_gather_sum(const float4 quad_vector, const float2 curr)
{
// Float2 version:
float2 adjx, adjy, diag;
quad_gather(quad_vector, curr, adjx, adjy, diag);
return (curr + adjx + adjy + diag);
}
float quad_gather_sum(const float4 quad_vector, const float curr)
{
// Float version:
const float4 all_values = quad_gather(quad_vector, curr);
return (all_values.x + all_values.y + all_values.z + all_values.w);
}
bool fine_derivatives_working(const float4 quad_vector, float4 curr)
{
// Requires: 1.) ddx() and ddy() are present in the current Cg profile.
// 2.) quad_vector describes the current fragment's location in
// its 2x2 pixel quad using get_quad_vector()'s conventions.
// 3.) curr must be a test vector with non-constant derivatives
// (its value should change nonlinearly across fragments).
// Returns: true if fine/hybrid/high-quality derivatives are used, or
// false if coarse derivatives are used or inconclusive
// Usage: Test whether quad-pixel communication is working!
// Method: We can confirm fine derivatives are used if the following
// holds (ever, for any value at any fragment):
// (ddy(curr) != ddy(adjx)) or (ddx(curr) != ddx(adjy))
// The more values we test (e.g. test a float4 two ways), the
// easier it is to demonstrate fine derivatives are working.
// TODO: Check for floating point exact comparison issues!
float4 ddx_curr = ddx(curr);
float4 ddy_curr = ddy(curr);
float4 adjx = curr - ddx_curr * quad_vector.z;
float4 adjy = curr - ddy_curr * quad_vector.w;
bool ddy_different = any(ddy_curr != ddy(adjx));
bool ddx_different = any(ddx_curr != ddx(adjy));
return any(bool2(ddy_different, ddx_different));
}
bool fine_derivatives_working_fast(const float4 quad_vector, float curr)
{
// Requires: Same as fine_derivatives_working()
// Returns: Same as fine_derivatives_working()
// Usage: This is faster than fine_derivatives_working() but more
// likely to return false negatives, so it's less useful for
// offline testing/debugging. It's also useless as the basis
// for dynamic runtime branching as of May 2014: Derivatives
// (and quad-pixel communication) are currently disallowed in
// branches. However, future GPU's may allow you to use them
// in dynamic branches if you promise the branch condition
// evaluates the same for every fragment in the quad (and/or if
// the driver enforces that promise by making a single fragment
// control branch decisions). If that ever happens, this
// version may become a more economical choice.
float ddx_curr = ddx(curr);
float ddy_curr = ddy(curr);
float adjx = curr - ddx_curr * quad_vector.z;
return (ddy_curr != ddy(adjx));
}
#endif // QUAD_PIXEL_COMMUNICATION_H

View file

@ -1,569 +0,0 @@
#ifndef SCANLINE_FUNCTIONS_H
#define SCANLINE_FUNCTIONS_H
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
////////////////////////////////// INCLUDES //////////////////////////////////
#include "user-settings.fxh"
#include "derived-settings-and-constants.fxh"
#include "special-functions.fxh"
#include "gamma-management.fxh"
///////////////////////////// SCANLINE FUNCTIONS /////////////////////////////
float3 get_gaussian_sigma(const float3 color, const float sigma_range)
{
// Requires: Globals:
// 1.) beam_min_sigma and beam_max_sigma are global floats
// containing the desired minimum and maximum beam standard
// deviations, for dim and bright colors respectively.
// 2.) beam_max_sigma must be > 0.0
// 3.) beam_min_sigma must be in (0.0, beam_max_sigma]
// 4.) beam_spot_power must be defined as a global float.
// Parameters:
// 1.) color is the underlying source color along a scanline
// 2.) sigma_range = beam_max_sigma - beam_min_sigma; we take
// sigma_range as a parameter to avoid repeated computation
// when beam_{min, max}_sigma are runtime shader parameters
// Optional: Users may set beam_spot_shape_function to 1 to define the
// inner f(color) subfunction (see below) as:
// f(color) = sqrt(1.0 - (color - 1.0)*(color - 1.0))
// Otherwise (technically, if beam_spot_shape_function < 0.5):
// f(color) = pow(color, beam_spot_power)
// Returns: The standard deviation of the Gaussian beam for "color:"
// sigma = beam_min_sigma + sigma_range * f(color)
// Details/Discussion:
// The beam's spot shape vaguely resembles an aspect-corrected f() in the
// range [0, 1] (not quite, but it's related). f(color) = color makes
// spots look like diamonds, and a spherical function or cube balances
// between variable width and a soft/realistic shape. A beam_spot_power
// > 1.0 can produce an ugly spot shape and more initial clipping, but the
// final shape also differs based on the horizontal resampling filter and
// the phosphor bloom. For instance, resampling horizontally in nonlinear
// light and/or with a sharp (e.g. Lanczos) filter will sharpen the spot
// shape, but a sixth root is still quite soft. A power function (default
// 1.0/3.0 beam_spot_power) is most flexible, but a fixed spherical curve
// has the highest variability without an awful spot shape.
//
// beam_min_sigma affects scanline sharpness/aliasing in dim areas, and its
// difference from beam_max_sigma affects beam width variability. It only
// affects clipping [for pure Gaussians] if beam_spot_power > 1.0 (which is
// a conservative estimate for a more complex constraint).
//
// beam_max_sigma affects clipping and increasing scanline width/softness
// as color increases. The wider this is, the more scanlines need to be
// evaluated to avoid distortion. For a pure Gaussian, the max_beam_sigma
// at which the first unused scanline always has a weight < 1.0/255.0 is:
// num scanlines = 2, max_beam_sigma = 0.2089; distortions begin ~0.34
// num scanlines = 3, max_beam_sigma = 0.3879; distortions begin ~0.52
// num scanlines = 4, max_beam_sigma = 0.5723; distortions begin ~0.70
// num scanlines = 5, max_beam_sigma = 0.7591; distortions begin ~0.89
// num scanlines = 6, max_beam_sigma = 0.9483; distortions begin ~1.08
// Generalized Gaussians permit more leeway here as steepness increases.
if(beam_spot_shape_function < 0.5)
{
// Use a power function:
return beam_min_sigma.xxx + sigma_range *
pow(color, beam_spot_power);
}
else
{
// Use a spherical function:
const float3 color_minus_1 = color - 1.0.xxx;
return beam_min_sigma.xxx + sigma_range *
sqrt(1.0.xxx - color_minus_1*color_minus_1);
}
}
float3 get_generalized_gaussian_beta(const float3 color,
const float shape_range)
{
// Requires: Globals:
// 1.) beam_min_shape and beam_max_shape are global floats
// containing the desired min/max generalized Gaussian
// beta parameters, for dim and bright colors respectively.
// 2.) beam_max_shape must be >= 2.0
// 3.) beam_min_shape must be in [2.0, beam_max_shape]
// 4.) beam_shape_power must be defined as a global float.
// Parameters:
// 1.) color is the underlying source color along a scanline
// 2.) shape_range = beam_max_shape - beam_min_shape; we take
// shape_range as a parameter to avoid repeated computation
// when beam_{min, max}_shape are runtime shader parameters
// Returns: The type-I generalized Gaussian "shape" parameter beta for
// the given color.
// Details/Discussion:
// Beta affects the scanline distribution as follows:
// a.) beta < 2.0 narrows the peak to a spike with a discontinuous slope
// b.) beta == 2.0 just degenerates to a Gaussian
// c.) beta > 2.0 flattens and widens the peak, then drops off more steeply
// than a Gaussian. Whereas high sigmas widen and soften peaks, high
// beta widen and sharpen peaks at the risk of aliasing.
// Unlike high beam_spot_powers, high beam_shape_powers actually soften shape
// transitions, whereas lower ones sharpen them (at the risk of aliasing).
return beam_min_shape + shape_range * pow(color, beam_shape_power);
}
float3 scanline_gaussian_integral_contrib(const float3 dist,
const float3 color, const float pixel_height, const float sigma_range)
{
// Requires: 1.) dist is the distance of the [potentially separate R/G/B]
// point(s) from a scanline in units of scanlines, where
// 1.0 means the sample point straddles the next scanline.
// 2.) color is the underlying source color along a scanline.
// 3.) pixel_height is the output pixel height in scanlines.
// 4.) Requirements of get_gaussian_sigma() must be met.
// Returns: Return a scanline's light output over a given pixel.
// Details:
// The CRT beam profile follows a roughly Gaussian distribution which is
// wider for bright colors than dark ones. The integral over the full
// range of a Gaussian function is always 1.0, so we can vary the beam
// with a standard deviation without affecting brightness. 'x' = distance:
// gaussian sample = 1/(sigma*sqrt(2*pi)) * e**(-(x**2)/(2*sigma**2))
// gaussian integral = 0.5 (1.0 + erf(x/(sigma * sqrt(2))))
// Use a numerical approximation of the "error function" (the Gaussian
// indefinite integral) to find the definite integral of the scanline's
// average brightness over a given pixel area. Even if curved coords were
// used in this pass, a flat scalar pixel height works almost as well as a
// pixel height computed from a full pixel-space to scanline-space matrix.
const float3 sigma = get_gaussian_sigma(color, sigma_range);
const float3 ph_offset = (pixel_height.xxx) * 0.5;
const float3 denom_inv = 1.0/(sigma*sqrt(2.0));
const float3 integral_high = erf((dist + ph_offset)*denom_inv);
const float3 integral_low = erf((dist - ph_offset)*denom_inv);
return color * 0.5*(integral_high - integral_low)/pixel_height;
}
float3 scanline_generalized_gaussian_integral_contrib(const float3 dist,
const float3 color, const float pixel_height, const float sigma_range,
const float shape_range)
{
// Requires: 1.) Requirements of scanline_gaussian_integral_contrib()
// must be met.
// 2.) Requirements of get_gaussian_sigma() must be met.
// 3.) Requirements of get_generalized_gaussian_beta() must be
// met.
// Returns: Return a scanline's light output over a given pixel.
// A generalized Gaussian distribution allows the shape (beta) to vary
// as well as the width (alpha). "gamma" refers to the gamma function:
// generalized sample =
// beta/(2*alpha*gamma(1/beta)) * e**(-(|x|/alpha)**beta)
// ligamma(s, z) is the lower incomplete gamma function, for which we only
// implement two of four branches (because we keep 1/beta <= 0.5):
// generalized integral = 0.5 + 0.5* sign(x) *
// ligamma(1/beta, (|x|/alpha)**beta)/gamma(1/beta)
// See get_generalized_gaussian_beta() for a discussion of beta.
// We base alpha on the intended Gaussian sigma, but it only strictly
// models models standard deviation at beta == 2, because the standard
// deviation depends on both alpha and beta (keeping alpha independent is
// faster and preserves intuitive behavior and a full spectrum of results).
const float3 alpha = sqrt(2.0) * get_gaussian_sigma(color, sigma_range);
const float3 beta = get_generalized_gaussian_beta(color, shape_range);
const float3 alpha_inv = 1.0.xxx/alpha;
const float3 s = 1.0.xxx/beta;
const float3 ph_offset = (pixel_height.xxx) * 0.5;
// Pass beta to gamma_impl to avoid repeated divides. Similarly pass
// beta (i.e. 1/s) and 1/gamma(s) to normalized_ligamma_impl.
const float3 gamma_s_inv = 1.0.xxx/gamma_impl(s, beta);
const float3 dist1 = dist + ph_offset;
const float3 dist0 = dist - ph_offset;
const float3 integral_high = sign(dist1) * normalized_ligamma_impl(
s, pow(abs(dist1)*alpha_inv, beta), beta, gamma_s_inv);
const float3 integral_low = sign(dist0) * normalized_ligamma_impl(
s, pow(abs(dist0)*alpha_inv, beta), beta, gamma_s_inv);
return color * 0.5*(integral_high - integral_low)/pixel_height;
}
float3 scanline_gaussian_sampled_contrib(const float3 dist, const float3 color,
const float pixel_height, const float sigma_range)
{
// See scanline_gaussian integral_contrib() for detailed comments!
// gaussian sample = 1/(sigma*sqrt(2*pi)) * e**(-(x**2)/(2*sigma**2))
const float3 sigma = get_gaussian_sigma(color, sigma_range);
// Avoid repeated divides:
const float3 sigma_inv = 1.0.xxx/sigma;
const float3 inner_denom_inv = 0.5 * sigma_inv * sigma_inv;
const float3 outer_denom_inv = sigma_inv/sqrt(2.0*pi);
if(beam_antialias_level > 0.5)
{
// Sample 1/3 pixel away in each direction as well:
const float3 sample_offset = pixel_height.xxx/3.0;
const float3 dist2 = dist + sample_offset;
const float3 dist3 = abs(dist - sample_offset);
// Average three pure Gaussian samples:
const float3 scale = color/3.0 * outer_denom_inv;
const float3 weight1 = exp(-(dist*dist)*inner_denom_inv);
const float3 weight2 = exp(-(dist2*dist2)*inner_denom_inv);
const float3 weight3 = exp(-(dist3*dist3)*inner_denom_inv);
return scale * (weight1 + weight2 + weight3);
}
else
{
return color*exp(-(dist*dist)*inner_denom_inv)*outer_denom_inv;
}
}
float3 scanline_generalized_gaussian_sampled_contrib(const float3 dist,
const float3 color, const float pixel_height, const float sigma_range,
const float shape_range)
{
// See scanline_generalized_gaussian_integral_contrib() for details!
// generalized sample =
// beta/(2*alpha*gamma(1/beta)) * e**(-(|x|/alpha)**beta)
const float3 alpha = sqrt(2.0) * get_gaussian_sigma(color, sigma_range);
const float3 beta = get_generalized_gaussian_beta(color, shape_range);
// Avoid repeated divides:
const float3 alpha_inv = 1.0.xxx/alpha;
const float3 beta_inv = 1.0.xxx/beta;
const float3 scale = color * beta * 0.5 * alpha_inv /
gamma_impl(beta_inv, beta);
if(beam_antialias_level > 0.5)
{
// Sample 1/3 pixel closer to and farther from the scanline too.
const float3 sample_offset = pixel_height.xxx/3.0;
const float3 dist2 = dist + sample_offset;
const float3 dist3 = abs(dist - sample_offset);
// Average three generalized Gaussian samples:
const float3 weight1 = exp(-pow(abs(dist*alpha_inv), beta));
const float3 weight2 = exp(-pow(abs(dist2*alpha_inv), beta));
const float3 weight3 = exp(-pow(abs(dist3*alpha_inv), beta));
return scale/3.0 * (weight1 + weight2 + weight3);
}
else
{
return scale * exp(-pow(abs(dist*alpha_inv), beta));
}
}
float3 scanline_contrib(float3 dist, float3 color,
float pixel_height, const float sigma_range, const float shape_range)
{
// Requires: 1.) Requirements of scanline_gaussian_integral_contrib()
// must be met.
// 2.) Requirements of get_gaussian_sigma() must be met.
// 3.) Requirements of get_generalized_gaussian_beta() must be
// met.
// Returns: Return a scanline's light output over a given pixel, using
// a generalized or pure Gaussian distribution and sampling or
// integrals as desired by user codepath choices.
if(beam_generalized_gaussian)
{
if(beam_antialias_level > 1.5)
{
return scanline_generalized_gaussian_integral_contrib(
dist, color, pixel_height, sigma_range, shape_range);
}
else
{
return scanline_generalized_gaussian_sampled_contrib(
dist, color, pixel_height, sigma_range, shape_range);
}
}
else
{
if(beam_antialias_level > 1.5)
{
return scanline_gaussian_integral_contrib(
dist, color, pixel_height, sigma_range);
}
else
{
return scanline_gaussian_sampled_contrib(
dist, color, pixel_height, sigma_range);
}
}
}
float3 get_raw_interpolated_color(const float3 color0,
const float3 color1, const float3 color2, const float3 color3,
const float4 weights)
{
// Use max to avoid bizarre artifacts from negative colors:
return max(mul(weights, float4x3(color0, color1, color2, color3)), 0.0);
}
float3 get_interpolated_linear_color(const float3 color0, const float3 color1,
const float3 color2, const float3 color3, const float4 weights)
{
// Requires: 1.) Requirements of include/gamma-management.h must be met:
// intermediate_gamma must be globally defined, and input
// colors are interpreted as linear RGB unless you #define
// GAMMA_ENCODE_EVERY_FBO (in which case they are
// interpreted as gamma-encoded with intermediate_gamma).
// 2.) color0-3 are colors sampled from a texture with tex2D().
// They are interpreted as defined in requirement 1.
// 3.) weights contains weights for each color, summing to 1.0.
// 4.) beam_horiz_linear_rgb_weight must be defined as a global
// float in [0.0, 1.0] describing how much blending should
// be done in linear RGB (rest is gamma-corrected RGB).
// 5.) RUNTIME_SCANLINES_HORIZ_FILTER_COLORSPACE must be #defined
// if beam_horiz_linear_rgb_weight is anything other than a
// static constant, or we may try branching at runtime
// without dynamic branches allowed (slow).
// Returns: Return an interpolated color lookup between the four input
// colors based on the weights in weights. The final color will
// be a linear RGB value, but the blending will be done as
// indicated above.
const float intermediate_gamma = get_intermediate_gamma();
// Branch if beam_horiz_linear_rgb_weight is static (for free) or if the
// profile allows dynamic branches (faster than computing extra pows):
#ifndef RUNTIME_SCANLINES_HORIZ_FILTER_COLORSPACE
#define SCANLINES_BRANCH_FOR_LINEAR_RGB_WEIGHT
#else
#ifdef DRIVERS_ALLOW_DYNAMIC_BRANCHES
#define SCANLINES_BRANCH_FOR_LINEAR_RGB_WEIGHT
#endif
#endif
#ifdef SCANLINES_BRANCH_FOR_LINEAR_RGB_WEIGHT
// beam_horiz_linear_rgb_weight is static, so we can branch:
#ifdef GAMMA_ENCODE_EVERY_FBO
const float3 gamma_mixed_color = pow(get_raw_interpolated_color(
color0, color1, color2, color3, weights), intermediate_gamma);
if(beam_horiz_linear_rgb_weight > 0.0)
{
const float3 linear_mixed_color = get_raw_interpolated_color(
pow(color0, intermediate_gamma),
pow(color1, intermediate_gamma),
pow(color2, intermediate_gamma),
pow(color3, intermediate_gamma),
weights);
return lerp(gamma_mixed_color, linear_mixed_color,
beam_horiz_linear_rgb_weight);
}
else
{
return gamma_mixed_color;
}
#else
const float3 linear_mixed_color = get_raw_interpolated_color(
color0, color1, color2, color3, weights);
if(beam_horiz_linear_rgb_weight < 1.0)
{
const float3 gamma_mixed_color = get_raw_interpolated_color(
pow(color0, 1.0/intermediate_gamma),
pow(color1, 1.0/intermediate_gamma),
pow(color2, 1.0/intermediate_gamma),
pow(color3, 1.0/intermediate_gamma),
weights);
return lerp(gamma_mixed_color, linear_mixed_color,
beam_horiz_linear_rgb_weight);
}
else
{
return linear_mixed_color;
}
#endif // GAMMA_ENCODE_EVERY_FBO
#else
#ifdef GAMMA_ENCODE_EVERY_FBO
// Inputs: color0-3 are colors in gamma-encoded RGB.
const float3 gamma_mixed_color = pow(get_raw_interpolated_color(
color0, color1, color2, color3, weights), intermediate_gamma);
const float3 linear_mixed_color = get_raw_interpolated_color(
pow(color0, intermediate_gamma),
pow(color1, intermediate_gamma),
pow(color2, intermediate_gamma),
pow(color3, intermediate_gamma),
weights);
return lerp(gamma_mixed_color, linear_mixed_color,
beam_horiz_linear_rgb_weight);
#else
// Inputs: color0-3 are colors in linear RGB.
const float3 linear_mixed_color = get_raw_interpolated_color(
color0, color1, color2, color3, weights);
const float3 gamma_mixed_color = get_raw_interpolated_color(
pow(color0, 1.0/intermediate_gamma),
pow(color1, 1.0/intermediate_gamma),
pow(color2, 1.0/intermediate_gamma),
pow(color3, 1.0/intermediate_gamma),
weights);
return lerp(gamma_mixed_color, linear_mixed_color,
beam_horiz_linear_rgb_weight);
#endif // GAMMA_ENCODE_EVERY_FBO
#endif // SCANLINES_BRANCH_FOR_LINEAR_RGB_WEIGHT
}
float3 get_scanline_color(const sampler2D Source, const float2 scanline_uv,
const float2 uv_step_x, const float4 weights)
{
// Requires: 1.) scanline_uv must be vertically snapped to the caller's
// desired line or scanline and horizontally snapped to the
// texel just left of the output pixel (color1)
// 2.) uv_step_x must contain the horizontal uv distance
// between texels.
// 3.) weights must contain interpolation filter weights for
// color0, color1, color2, and color3, where color1 is just
// left of the output pixel.
// Returns: Return a horizontally interpolated texture lookup using 2-4
// nearby texels, according to weights and the conventions of
// get_interpolated_linear_color().
// We can ignore the outside texture lookups for Quilez resampling.
const float3 color1 = tex2D(Source, scanline_uv).rgb;
const float3 color2 = tex2D(Source, scanline_uv + uv_step_x).rgb;
float3 color0 = 0.0.xxx;
float3 color3 = 0.0.xxx;
if(beam_horiz_filter > 0.5)
{
color0 = tex2D(Source, scanline_uv - uv_step_x).rgb;
color3 = tex2D(Source, scanline_uv + 2.0 * uv_step_x).rgb;
}
// Sample the texture as-is, whether it's linear or gamma-encoded:
// get_interpolated_linear_color() will handle the difference.
return get_interpolated_linear_color(color0, color1, color2, color3, weights);
}
float3 sample_single_scanline_horizontal(const sampler2D Source,
const float2 tex_uv, const float2 texture_size,
const float2 texture_size_inv)
{
// TODO: Add function requirements.
// Snap to the previous texel and get sample dists from 2/4 nearby texels:
const float2 curr_texel = tex_uv * texture_size;
// Use under_half to fix a rounding bug right around exact texel locations.
const float2 prev_texel =
floor(curr_texel - under_half.xx) + 0.5.xx;
const float2 prev_texel_hor = float2(prev_texel.x, curr_texel.y);
const float2 prev_texel_hor_uv = prev_texel_hor * texture_size_inv;
const float prev_dist = curr_texel.x - prev_texel_hor.x;
const float4 sample_dists = float4(1.0 + prev_dist, prev_dist,
1.0 - prev_dist, 2.0 - prev_dist);
// Get Quilez, Lanczos2, or Gaussian resize weights for 2/4 nearby texels:
float4 weights;
if(beam_horiz_filter < 0.5)
{
// Quilez:
const float x = sample_dists.y;
const float w2 = x*x*x*(x*(x*6.0 - 15.0) + 10.0);
weights = float4(0.0, 1.0 - w2, w2, 0.0);
}
else if(beam_horiz_filter < 1.5)
{
// Gaussian:
float inner_denom_inv = 1.0/(2.0*beam_horiz_sigma*beam_horiz_sigma);
weights = exp(-(sample_dists*sample_dists)*inner_denom_inv);
}
else
{
// Lanczos2:
const float4 pi_dists = FIX_ZERO(sample_dists * pi);
weights = 2.0 * sin(pi_dists) * sin(pi_dists * 0.5) /
(pi_dists * pi_dists);
}
// Ensure the weight sum == 1.0:
const float4 final_weights = weights/dot(weights, 1.0.xxxx);
// Get the interpolated horizontal scanline color:
const float2 uv_step_x = float2(texture_size_inv.x, 0.0);
return get_scanline_color(
Source, prev_texel_hor_uv, uv_step_x, final_weights);
}
float3 sample_rgb_scanline_horizontal(const sampler2D Source,
const float2 tex_uv, const float2 texture_size,
const float2 texture_size_inv)
{
// TODO: Add function requirements.
// Rely on a helper to make convergence easier.
if(beam_misconvergence)
{
const float3 convergence_offsets_rgb =
get_convergence_offsets_x_vector();
const float3 offset_u_rgb =
convergence_offsets_rgb * texture_size_inv.xxx;
const float2 scanline_uv_r = tex_uv - float2(offset_u_rgb.r, 0.0);
const float2 scanline_uv_g = tex_uv - float2(offset_u_rgb.g, 0.0);
const float2 scanline_uv_b = tex_uv - float2(offset_u_rgb.b, 0.0);
const float3 sample_r = sample_single_scanline_horizontal(
Source, scanline_uv_r, texture_size, texture_size_inv);
const float3 sample_g = sample_single_scanline_horizontal(
Source, scanline_uv_g, texture_size, texture_size_inv);
const float3 sample_b = sample_single_scanline_horizontal(
Source, scanline_uv_b, texture_size, texture_size_inv);
return float3(sample_r.r, sample_g.g, sample_b.b);
}
else
{
return sample_single_scanline_horizontal(Source, tex_uv, texture_size,
texture_size_inv);
}
}
float2 get_last_scanline_uv(const float2 tex_uv, const float2 texture_size,
const float2 texture_size_inv, const float2 il_step_multiple,
const float frame_count, out float dist)
{
// Compute texture coords for the last/upper scanline, accounting for
// interlacing: With interlacing, only consider even/odd scanlines every
// other frame. Top-field first (TFF) order puts even scanlines on even
// frames, and BFF order puts them on odd frames. Texels are centered at:
// frac(tex_uv * texture_size) == x.5
// Caution: If these coordinates ever seem incorrect, first make sure it's
// not because anisotropic filtering is blurring across field boundaries.
// Note: TFF/BFF won't matter for sources that double-weave or similar.
const float field_offset = floor(il_step_multiple.y * 0.75) *
fmod(frame_count + float(interlace_bff), 2.0);
const float2 curr_texel = tex_uv * texture_size;
// Use under_half to fix a rounding bug right around exact texel locations.
// This causes an insane bug on duckstation, so it's disabled here. (Hyllian, 2024)
// const float2 prev_texel_num = floor(curr_texel - under_half.xx);
const float2 prev_texel_num = curr_texel;
const float wrong_field = fmod(
prev_texel_num.y + field_offset, il_step_multiple.y);
const float2 scanline_texel_num = prev_texel_num - float2(0.0, wrong_field);
// Snap to the center of the previous scanline in the current field:
const float2 scanline_texel = scanline_texel_num + 0.5.xx;
const float2 scanline_uv = scanline_texel * texture_size_inv;
// Save the sample's distance from the scanline, in units of scanlines:
dist = (curr_texel.y - scanline_texel.y)/il_step_multiple.y;
return scanline_uv;
}
bool is_interlaced(float num_lines)
{
// Detect interlacing based on the number of lines in the source.
if(interlace_detect)
{
// NTSC: 525 lines, 262.5/field; 486 active (2 half-lines), 243/field
// NTSC Emulators: Typically 224 or 240 lines
// PAL: 625 lines, 312.5/field; 576 active (typical), 288/field
// PAL Emulators: ?
// ATSC: 720p, 1080i, 1080p
// Where do we place our cutoffs? Assumptions:
// 1.) We only need to care about active lines.
// 2.) Anything > 288 and <= 576 lines is probably interlaced.
// 3.) Anything > 576 lines is probably not interlaced...
// 4.) ...except 1080 lines, which is a crapshoot (user decision).
// 5.) Just in case the main program uses calculated video sizes,
// we should nudge the float thresholds a bit.
const bool sd_interlace = ((num_lines > 288.5) && (num_lines < 576.5));
const bool hd_interlace = interlace_1080i ?
((num_lines > 1079.5) && (num_lines < 1080.5)) :
false;
return (sd_interlace || hd_interlace);
}
else
{
return false;
}
}
#endif // SCANLINE_FUNCTIONS_H

View file

@ -1,498 +0,0 @@
#ifndef SPECIAL_FUNCTIONS_H
#define SPECIAL_FUNCTIONS_H
///////////////////////////////// MIT LICENSE ////////////////////////////////
// Copyright (C) 2014 TroggleMonkey
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
///////////////////////////////// DESCRIPTION ////////////////////////////////
// This file implements the following mathematical special functions:
// 1.) erf() = 2/sqrt(pi) * indefinite_integral(e**(-x**2))
// 2.) gamma(s), a real-numbered extension of the integer factorial function
// It also implements normalized_ligamma(s, z), a normalized lower incomplete
// gamma function for s < 0.5 only. Both gamma() and normalized_ligamma() can
// be called with an _impl suffix to use an implementation version with a few
// extra precomputed parameters (which may be useful for the caller to reuse).
// See below for details.
//
// Design Rationale:
// Pretty much every line of code in this file is duplicated four times for
// different input types (float4/float3/float2/float). This is unfortunate,
// but Cg doesn't allow function templates. Macros would be far less verbose,
// but they would make the code harder to document and read. I don't expect
// these functions will require a whole lot of maintenance changes unless
// someone ever has need for more robust incomplete gamma functions, so code
// duplication seems to be the lesser evil in this case.
/////////////////////////// GAUSSIAN ERROR FUNCTION //////////////////////////
float4 erf6(float4 x)
{
// Requires: x is the standard parameter to erf().
// Returns: Return an Abramowitz/Stegun approximation of erf(), where:
// erf(x) = 2/sqrt(pi) * integral(e**(-x**2))
// This approximation has a max absolute error of 2.5*10**-5
// with solid numerical robustness and efficiency. See:
// https://en.wikipedia.org/wiki/Error_function#Approximation_with_elementary_functions
static const float4 one = 1.0.xxxx;
const float4 sign_x = sign(x);
const float4 t = one/(one + 0.47047*abs(x));
const float4 result = one - t*(0.3480242 + t*(-0.0958798 + t*0.7478556))*
exp(-(x*x));
return result * sign_x;
}
float3 erf6(const float3 x)
{
// Float3 version:
static const float3 one = 1.0.xxx;
const float3 sign_x = sign(x);
const float3 t = one/(one + 0.47047*abs(x));
const float3 result = one - t*(0.3480242 + t*(-0.0958798 + t*0.7478556))*
exp(-(x*x));
return result * sign_x;
}
float2 erf6(const float2 x)
{
// Float2 version:
static const float2 one = 1.0.xx;
const float2 sign_x = sign(x);
const float2 t = one/(one + 0.47047*abs(x));
const float2 result = one - t*(0.3480242 + t*(-0.0958798 + t*0.7478556))*
exp(-(x*x));
return result * sign_x;
}
float erf6(const float x)
{
// Float version:
const float sign_x = sign(x);
const float t = 1.0/(1.0 + 0.47047*abs(x));
const float result = 1.0 - t*(0.3480242 + t*(-0.0958798 + t*0.7478556))*
exp(-(x*x));
return result * sign_x;
}
float4 erft(const float4 x)
{
// Requires: x is the standard parameter to erf().
// Returns: Approximate erf() with the hyperbolic tangent. The error is
// visually noticeable, but it's blazing fast and perceptually
// close...at least on ATI hardware. See:
// http://www.maplesoft.com/applications/view.aspx?SID=5525&view=html
// Warning: Only use this if your hardware drivers correctly implement
// tanh(): My nVidia 8800GTS returns garbage output.
return tanh(1.202760580 * x);
}
float3 erft(const float3 x)
{
// Float3 version:
return tanh(1.202760580 * x);
}
float2 erft(const float2 x)
{
// Float2 version:
return tanh(1.202760580 * x);
}
float erft(const float x)
{
// Float version:
return tanh(1.202760580 * x);
}
float4 erf(const float4 x)
{
// Requires: x is the standard parameter to erf().
// Returns: Some approximation of erf(x), depending on user settings.
#ifdef ERF_FAST_APPROXIMATION
return erft(x);
#else
return erf6(x);
#endif
}
float3 erf(const float3 x)
{
// Float3 version:
#ifdef ERF_FAST_APPROXIMATION
return erft(x);
#else
return erf6(x);
#endif
}
float2 erf(const float2 x)
{
// Float2 version:
#ifdef ERF_FAST_APPROXIMATION
return erft(x);
#else
return erf6(x);
#endif
}
float erf(const float x)
{
// Float version:
#ifdef ERF_FAST_APPROXIMATION
return erft(x);
#else
return erf6(x);
#endif
}
/////////////////////////// COMPLETE GAMMA FUNCTION //////////////////////////
float4 gamma_impl(const float4 s, const float4 s_inv)
{
// Requires: 1.) s is the standard parameter to the gamma function, and
// it should lie in the [0, 36] range.
// 2.) s_inv = 1.0/s. This implementation function requires
// the caller to precompute this value, giving users the
// opportunity to reuse it.
// Returns: Return approximate gamma function (real-numbered factorial)
// output using the Lanczos approximation with two coefficients
// calculated using Paul Godfrey's method here:
// http://my.fit.edu/~gabdo/gamma.txt
// An optimal g value for s in [0, 36] is ~1.12906830989, with
// a maximum relative error of 0.000463 for 2**16 equally
// evals. We could use three coeffs (0.0000346 error) without
// hurting latency, but this allows more parallelism with
// outside instructions.
static const float4 g = 1.12906830989.xxxx;
static const float4 c0 = 0.8109119309638332633713423362694399653724431.xxxx;
static const float4 c1 = 0.4808354605142681877121661197951496120000040.xxxx;
static const float4 e = 2.71828182845904523536028747135266249775724709.xxxx;
const float4 sph = s + 0.5.xxxx;
const float4 lanczos_sum = c0 + c1/(s + 1.0.xxxx);
const float4 base = (sph + g)/e; // or (s + g + float4(0.5))/e
// gamma(s + 1) = base**sph * lanczos_sum; divide by s for gamma(s).
// This has less error for small s's than (s -= 1.0) at the beginning.
return (pow(base, sph) * lanczos_sum) * s_inv;
}
float3 gamma_impl(const float3 s, const float3 s_inv)
{
// Float3 version:
static const float3 g = 1.12906830989.xxx;
static const float3 c0 = 0.8109119309638332633713423362694399653724431.xxx;
static const float3 c1 = 0.4808354605142681877121661197951496120000040.xxx;
static const float3 e = 2.71828182845904523536028747135266249775724709.xxx;
const float3 sph = s + 0.5.xxx;
const float3 lanczos_sum = c0 + c1/(s + 1.0.xxx);
const float3 base = (sph + g)/e;
return (pow(base, sph) * lanczos_sum) * s_inv;
}
float2 gamma_impl(const float2 s, const float2 s_inv)
{
// Float2 version:
static const float2 g = 1.12906830989.xx;
static const float2 c0 = 0.8109119309638332633713423362694399653724431.xx;
static const float2 c1 = 0.4808354605142681877121661197951496120000040.xx;
static const float2 e = 2.71828182845904523536028747135266249775724709.xx;
const float2 sph = s + 0.5.xx;
const float2 lanczos_sum = c0 + c1/(s + 1.0.xx);
const float2 base = (sph + g)/e;
return (pow(base, sph) * lanczos_sum) * s_inv;
}
float gamma_impl(const float s, const float s_inv)
{
// Float version:
static const float g = 1.12906830989;
static const float c0 = 0.8109119309638332633713423362694399653724431;
static const float c1 = 0.4808354605142681877121661197951496120000040;
static const float e = 2.71828182845904523536028747135266249775724709;
const float sph = s + 0.5;
const float lanczos_sum = c0 + c1/(s + 1.0);
const float base = (sph + g)/e;
return (pow(base, sph) * lanczos_sum) * s_inv;
}
float4 gamma(const float4 s)
{
// Requires: s is the standard parameter to the gamma function, and it
// should lie in the [0, 36] range.
// Returns: Return approximate gamma function output with a maximum
// relative error of 0.000463. See gamma_impl for details.
return gamma_impl(s, 1.0.xxxx/s);
}
float3 gamma(const float3 s)
{
// Float3 version:
return gamma_impl(s, 1.0.xxx/s);
}
float2 gamma(const float2 s)
{
// Float2 version:
return gamma_impl(s, 1.0.xx/s);
}
float gamma(const float s)
{
// Float version:
return gamma_impl(s, 1.0/s);
}
//////////////// INCOMPLETE GAMMA FUNCTIONS (RESTRICTED INPUT) ///////////////
// Lower incomplete gamma function for small s and z (implementation):
float4 ligamma_small_z_impl(const float4 s, const float4 z, const float4 s_inv)
{
// Requires: 1.) s < ~0.5
// 2.) z <= ~0.775075
// 3.) s_inv = 1.0/s (precomputed for outside reuse)
// Returns: A series representation for the lower incomplete gamma
// function for small s and small z (4 terms).
// The actual "rolled up" summation looks like:
// last_sign = 1.0; last_pow = 1.0; last_factorial = 1.0;
// sum = last_sign * last_pow / ((s + k) * last_factorial)
// for(int i = 0; i < 4; ++i)
// {
// last_sign *= -1.0; last_pow *= z; last_factorial *= i;
// sum += last_sign * last_pow / ((s + k) * last_factorial);
// }
// Unrolled, constant-unfolded and arranged for madds and parallelism:
const float4 scale = pow(z, s);
float4 sum = s_inv; // Summation iteration 0 result
// Summation iterations 1, 2, and 3:
const float4 z_sq = z*z;
const float4 denom1 = s + 1.0.xxxx;
const float4 denom2 = 2.0*s + 4.0.xxxx;
const float4 denom3 = 6.0*s + 18.0.xxxx;
//float4 denom4 = 24.0*s + float4(96.0);
sum -= z/denom1;
sum += z_sq/denom2;
sum -= z * z_sq/denom3;
//sum += z_sq * z_sq / denom4;
// Scale and return:
return scale * sum;
}
float3 ligamma_small_z_impl(const float3 s, const float3 z, const float3 s_inv)
{
// Float3 version:
const float3 scale = pow(z, s);
float3 sum = s_inv;
const float3 z_sq = z*z;
const float3 denom1 = s + 1.0.xxx;
const float3 denom2 = 2.0*s + 4.0.xxx;
const float3 denom3 = 6.0*s + 18.0.xxx;
sum -= z/denom1;
sum += z_sq/denom2;
sum -= z * z_sq/denom3;
return scale * sum;
}
float2 ligamma_small_z_impl(const float2 s, const float2 z, const float2 s_inv)
{
// Float2 version:
const float2 scale = pow(z, s);
float2 sum = s_inv;
const float2 z_sq = z*z;
const float2 denom1 = s + 1.0.xx;
const float2 denom2 = 2.0*s + 4.0.xx;
const float2 denom3 = 6.0*s + 18.0.xx;
sum -= z/denom1;
sum += z_sq/denom2;
sum -= z * z_sq/denom3;
return scale * sum;
}
float ligamma_small_z_impl(const float s, const float z, const float s_inv)
{
// Float version:
const float scale = pow(z, s);
float sum = s_inv;
const float z_sq = z*z;
const float denom1 = s + 1.0;
const float denom2 = 2.0*s + 4.0;
const float denom3 = 6.0*s + 18.0;
sum -= z/denom1;
sum += z_sq/denom2;
sum -= z * z_sq/denom3;
return scale * sum;
}
// Upper incomplete gamma function for small s and large z (implementation):
float4 uigamma_large_z_impl(const float4 s, const float4 z)
{
// Requires: 1.) s < ~0.5
// 2.) z > ~0.775075
// Returns: Gauss's continued fraction representation for the upper
// incomplete gamma function (4 terms).
// The "rolled up" continued fraction looks like this. The denominator
// is truncated, and it's calculated "from the bottom up:"
// denom = float4('inf');
// float4 one = float4(1.0);
// for(int i = 4; i > 0; --i)
// {
// denom = ((i * 2.0) - one) + z - s + (i * (s - i))/denom;
// }
// Unrolled and constant-unfolded for madds and parallelism:
const float4 numerator = pow(z, s) * exp(-z);
float4 denom = 7.0.xxxx + z - s;
denom = 5.0.xxxx + z - s + (3.0*s - 9.0.xxxx)/denom;
denom = 3.0.xxxx + z - s + (2.0*s - 4.0.xxxx)/denom;
denom = 1.0.xxxx + z - s + (s - 1.0.xxxx)/denom;
return numerator / denom;
}
float3 uigamma_large_z_impl(const float3 s, const float3 z)
{
// Float3 version:
const float3 numerator = pow(z, s) * exp(-z);
float3 denom = 7.0.xxx + z - s;
denom = 5.0.xxx + z - s + (3.0*s - 9.0.xxx)/denom;
denom = 3.0.xxx + z - s + (2.0*s - 4.0.xxx)/denom;
denom = 1.0.xxx + z - s + (s - 1.0.xxx)/denom;
return numerator / denom;
}
float2 uigamma_large_z_impl(const float2 s, const float2 z)
{
// Float2 version:
const float2 numerator = pow(z, s) * exp(-z);
float2 denom = 7.0.xx + z - s;
denom = 5.0.xx + z - s + (3.0*s - 9.0.xx)/denom;
denom = 3.0.xx + z - s + (2.0*s - 4.0.xx)/denom;
denom = 1.0.xx + z - s + (s - 1.0.xx)/denom;
return numerator / denom;
}
float uigamma_large_z_impl(const float s, const float z)
{
// Float version:
const float numerator = pow(z, s) * exp(-z);
float denom = 7.0 + z - s;
denom = 5.0 + z - s + (3.0*s - 9.0)/denom;
denom = 3.0 + z - s + (2.0*s - 4.0)/denom;
denom = 1.0 + z - s + (s - 1.0)/denom;
return numerator / denom;
}
// Normalized lower incomplete gamma function for small s (implementation):
float4 normalized_ligamma_impl(const float4 s, const float4 z,
const float4 s_inv, const float4 gamma_s_inv)
{
// Requires: 1.) s < ~0.5
// 2.) s_inv = 1/s (precomputed for outside reuse)
// 3.) gamma_s_inv = 1/gamma(s) (precomputed for outside reuse)
// Returns: Approximate the normalized lower incomplete gamma function
// for s < 0.5. Since we only care about s < 0.5, we only need
// to evaluate two branches (not four) based on z. Each branch
// uses four terms, with a max relative error of ~0.00182. The
// branch threshold and specifics were adapted for fewer terms
// from Gil/Segura/Temme's paper here:
// http://oai.cwi.nl/oai/asset/20433/20433B.pdf
// Evaluate both branches: Real branches test slower even when available.
static const float4 thresh = 0.775075.xxxx;
const bool4 z_is_large = z > thresh;
const float4 large_z = 1.0.xxxx - uigamma_large_z_impl(s, z) * gamma_s_inv;
const float4 small_z = ligamma_small_z_impl(s, z, s_inv) * gamma_s_inv;
// Combine the results from both branches:
return large_z * float4(z_is_large.xxxx) + small_z * float4(!z_is_large.xxxx);
}
float3 normalized_ligamma_impl(const float3 s, const float3 z,
const float3 s_inv, const float3 gamma_s_inv)
{
// Float3 version:
static const float3 thresh = 0.775075.xxx;
const bool3 z_is_large = z > thresh;
const float3 large_z = 1.0.xxx - uigamma_large_z_impl(s, z) * gamma_s_inv;
const float3 small_z = ligamma_small_z_impl(s, z, s_inv) * gamma_s_inv;
return large_z * float3(z_is_large.xxx) + small_z * float3(!z_is_large.xxx);
}
float2 normalized_ligamma_impl(const float2 s, const float2 z,
const float2 s_inv, const float2 gamma_s_inv)
{
// Float2 version:
static const float2 thresh = 0.775075.xx;
const bool2 z_is_large = z > thresh;
const float2 large_z = 1.0.xx - uigamma_large_z_impl(s, z) * gamma_s_inv;
const float2 small_z = ligamma_small_z_impl(s, z, s_inv) * gamma_s_inv;
return large_z * float2(z_is_large.xx) + small_z * float2(!z_is_large.xx);
}
float normalized_ligamma_impl(const float s, const float z,
const float s_inv, const float gamma_s_inv)
{
// Float version:
static const float thresh = 0.775075;
const bool z_is_large = z > thresh;
const float large_z = 1.0 - uigamma_large_z_impl(s, z) * gamma_s_inv;
const float small_z = ligamma_small_z_impl(s, z, s_inv) * gamma_s_inv;
return large_z * float(z_is_large) + small_z * float(!z_is_large);
}
// Normalized lower incomplete gamma function for small s:
float4 normalized_ligamma(const float4 s, const float4 z)
{
// Requires: s < ~0.5
// Returns: Approximate the normalized lower incomplete gamma function
// for s < 0.5. See normalized_ligamma_impl() for details.
const float4 s_inv = 1.0.xxxx/s;
const float4 gamma_s_inv = 1.0.xxxx/gamma_impl(s, s_inv);
return normalized_ligamma_impl(s, z, s_inv, gamma_s_inv);
}
float3 normalized_ligamma(const float3 s, const float3 z)
{
// Float3 version:
const float3 s_inv = 1.0.xxx/s;
const float3 gamma_s_inv = 1.0.xxx/gamma_impl(s, s_inv);
return normalized_ligamma_impl(s, z, s_inv, gamma_s_inv);
}
float2 normalized_ligamma(const float2 s, const float2 z)
{
// Float2 version:
const float2 s_inv = 1.0.xx/s;
const float2 gamma_s_inv = 1.0.xx/gamma_impl(s, s_inv);
return normalized_ligamma_impl(s, z, s_inv, gamma_s_inv);
}
float normalized_ligamma(const float s, const float z)
{
// Float version:
const float s_inv = 1.0/s;
const float gamma_s_inv = 1.0/gamma_impl(s, s_inv);
return normalized_ligamma_impl(s, z, s_inv, gamma_s_inv);
}
#endif // SPECIAL_FUNCTIONS_H

View file

@ -1,58 +0,0 @@
#ifndef USER_CGP_CONSTANTS_H
#define USER_CGP_CONSTANTS_H
// IMPORTANT:
// These constants MUST be set appropriately for the settings in crt-royale.cgp
// (or whatever related .cgp file you're using). If they aren't, you're likely
// to get artifacts, the wrong phosphor mask size, etc. I wish these could be
// set directly in the .cgp file to make things easier, but...they can't.
// PASS SCALES AND RELATED CONSTANTS:
// Copy the absolute scale_x for BLOOM_APPROX. There are two major versions of
// this shader: One does a viewport-scale bloom, and the other skips it. The
// latter benefits from a higher bloom_approx_scale_x, so save both separately:
static const float bloom_approx_size_x = 320.0;
static const float bloom_approx_size_x_for_fake = 400.0;
// Copy the viewport-relative scales of the phosphor mask resize passes
// (MASK_RESIZE and the pass immediately preceding it):
static const float2 mask_resize_viewport_scale = float2(0.0625, 0.0625);
// Copy the geom_max_aspect_ratio used to calculate the MASK_RESIZE scales, etc.:
static const float geom_max_aspect_ratio = 4.0/3.0;
// PHOSPHOR MASK TEXTURE CONSTANTS:
// Set the following constants to reflect the properties of the phosphor mask
// texture named in crt-royale.cgp. The shader optionally resizes a mask tile
// based on user settings, then repeats a single tile until filling the screen.
// The shader must know the input texture size (default 64x64), and to manually
// resize, it must also know the horizontal triads per tile (default 8).
static const float2 mask_texture_small_size = 64.0.xx;
static const float2 mask_texture_large_size = 512.0.xx;
static const float mask_triads_per_tile = 8.0;
// We need the average brightness of the phosphor mask to compensate for the
// dimming it causes. The following four values are roughly correct for the
// masks included with the shader. Update the value for any LUT texture you
// change. [Un]comment "#define PHOSPHOR_MASK_GRILLE14" depending on whether
// the loaded aperture grille uses 14-pixel or 15-pixel stripes (default 15).
//#define PHOSPHOR_MASK_GRILLE14
static const float mask_grille14_avg_color = 50.6666666/255.0;
// TileableLinearApertureGrille14Wide7d33Spacing*.png
// TileableLinearApertureGrille14Wide10And6Spacing*.png
static const float mask_grille15_avg_color = 53.0/255.0;
// TileableLinearApertureGrille15Wide6d33Spacing*.png
// TileableLinearApertureGrille15Wide8And5d5Spacing*.png
static const float mask_slot_avg_color = 46.0/255.0;
// TileableLinearSlotMask15Wide9And4d5Horizontal8VerticalSpacing*.png
// TileableLinearSlotMaskTall15Wide9And4d5Horizontal9d14VerticalSpacing*.png
static const float mask_shadow_avg_color = 41.0/255.0;
// TileableLinearShadowMask*.png
// TileableLinearShadowMaskEDP*.png
#ifdef PHOSPHOR_MASK_GRILLE14
static const float mask_grille_avg_color = mask_grille14_avg_color;
#else
static const float mask_grille_avg_color = mask_grille15_avg_color;
#endif
#endif // USER_CGP_CONSTANTS_H

View file

@ -1,359 +0,0 @@
#ifndef USER_SETTINGS_H
#define USER_SETTINGS_H
///////////////////////////// DRIVER CAPABILITIES ////////////////////////////
// The Cg compiler uses different "profiles" with different capabilities.
// This shader requires a Cg compilation profile >= arbfp1, but a few options
// require higher profiles like fp30 or fp40. The shader can't detect profile
// or driver capabilities, so instead you must comment or uncomment the lines
// below with "//" before "#define." Disable an option if you get compilation
// errors resembling those listed. Generally speaking, all of these options
// will run on nVidia cards, but only DRIVERS_ALLOW_TEX2DBIAS (if that) is
// likely to run on ATI/AMD, due to the Cg compiler's profile limitations.
// Derivatives: Unsupported on fp20, ps_1_1, ps_1_2, ps_1_3, and arbfp1.
// Among other things, derivatives help us fix anisotropic filtering artifacts
// with curved manually tiled phosphor mask coords. Related errors:
// error C3004: function "float2 ddx(float2);" not supported in this profile
// error C3004: function "float2 ddy(float2);" not supported in this profile
//#define DRIVERS_ALLOW_DERIVATIVES
// Fine derivatives: Unsupported on older ATI cards.
// Fine derivatives enable 2x2 fragment block communication, letting us perform
// fast single-pass blur operations. If your card uses coarse derivatives and
// these are enabled, blurs could look broken. Derivatives are a prerequisite.
#ifdef DRIVERS_ALLOW_DERIVATIVES
#define DRIVERS_ALLOW_FINE_DERIVATIVES
#endif
// Dynamic looping: Requires an fp30 or newer profile.
// This makes phosphor mask resampling faster in some cases. Related errors:
// error C5013: profile does not support "for" statements and "for" could not
// be unrolled
//#define DRIVERS_ALLOW_DYNAMIC_BRANCHES
// Without DRIVERS_ALLOW_DYNAMIC_BRANCHES, we need to use unrollable loops.
// Using one static loop avoids overhead if the user is right, but if the user
// is wrong (loops are allowed), breaking a loop into if-blocked pieces with a
// binary search can potentially save some iterations. However, it may fail:
// error C6001: Temporary register limit of 32 exceeded; 35 registers
// needed to compile program
//#define ACCOMODATE_POSSIBLE_DYNAMIC_LOOPS
// tex2Dlod: Requires an fp40 or newer profile. This can be used to disable
// anisotropic filtering, thereby fixing related artifacts. Related errors:
// error C3004: function "float4 tex2Dlod(sampler2D, float4);" not supported in
// this profile
//#define DRIVERS_ALLOW_TEX2DLOD
// tex2Dbias: Requires an fp30 or newer profile. This can be used to alleviate
// artifacts from anisotropic filtering and mipmapping. Related errors:
// error C3004: function "float4 tex2Dbias(sampler2D, float4);" not supported
// in this profile
//#define DRIVERS_ALLOW_TEX2DBIAS
// Integrated graphics compatibility: Integrated graphics like Intel HD 4000
// impose stricter limitations on register counts and instructions. Enable
// INTEGRATED_GRAPHICS_COMPATIBILITY_MODE if you still see error C6001 or:
// error C6002: Instruction limit of 1024 exceeded: 1523 instructions needed
// to compile program.
// Enabling integrated graphics compatibility mode will automatically disable:
// 1.) PHOSPHOR_MASK_MANUALLY_RESIZE: The phosphor mask will be softer.
// (This may be reenabled in a later release.)
// 2.) RUNTIME_GEOMETRY_MODE
// 3.) The high-quality 4x4 Gaussian resize for the bloom approximation
//#define INTEGRATED_GRAPHICS_COMPATIBILITY_MODE
//////////////////////////// USER CODEPATH OPTIONS ///////////////////////////
// To disable a #define option, turn its line into a comment with "//."
// RUNTIME VS. COMPILE-TIME OPTIONS (Major Performance Implications):
// Enable runtime shader parameters in the Retroarch (etc.) GUI? They override
// many of the options in this file and allow real-time tuning, but many of
// them are slower. Disabling them and using this text file will boost FPS.
#define RUNTIME_SHADER_PARAMS_ENABLE
// Specify the phosphor bloom sigma at runtime? This option is 10% slower, but
// it's the only way to do a wide-enough full bloom with a runtime dot pitch.
#define RUNTIME_PHOSPHOR_BLOOM_SIGMA
// Specify antialiasing weight parameters at runtime? (Costs ~20% with cubics)
#define RUNTIME_ANTIALIAS_WEIGHTS
// Specify subpixel offsets at runtime? (WARNING: EXTREMELY EXPENSIVE!)
//#define RUNTIME_ANTIALIAS_SUBPIXEL_OFFSETS
// Make beam_horiz_filter and beam_horiz_linear_rgb_weight into runtime shader
// parameters? This will require more math or dynamic branching.
#define RUNTIME_SCANLINES_HORIZ_FILTER_COLORSPACE
// Specify the tilt at runtime? This makes things about 3% slower.
#define RUNTIME_GEOMETRY_TILT
// Specify the geometry mode at runtime?
#define RUNTIME_GEOMETRY_MODE
// Specify the phosphor mask type (aperture grille, slot mask, shadow mask) and
// mode (Lanczos-resize, hardware resize, or tile 1:1) at runtime, even without
// dynamic branches? This is cheap if mask_resize_viewport_scale is small.
#define FORCE_RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT
// PHOSPHOR MASK:
// Manually resize the phosphor mask for best results (slower)? Disabling this
// removes the option to do so, but it may be faster without dynamic branches.
#define PHOSPHOR_MASK_MANUALLY_RESIZE
// If we sinc-resize the mask, should we Lanczos-window it (slower but better)?
#define PHOSPHOR_MASK_RESIZE_LANCZOS_WINDOW
// Larger blurs are expensive, but we need them to blur larger triads. We can
// detect the right blur if the triad size is static or our profile allows
// dynamic branches, but otherwise we use the largest blur the user indicates
// they might need:
#define PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_3_PIXELS
//#define PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_6_PIXELS
//#define PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_9_PIXELS
//#define PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_12_PIXELS
// Here's a helpful chart:
// MaxTriadSize BlurSize MinTriadCountsByResolution
// 3.0 9.0 480/640/960/1920 triads at 1080p/1440p/2160p/4320p, 4:3 aspect
// 6.0 17.0 240/320/480/960 triads at 1080p/1440p/2160p/4320p, 4:3 aspect
// 9.0 25.0 160/213/320/640 triads at 1080p/1440p/2160p/4320p, 4:3 aspect
// 12.0 31.0 120/160/240/480 triads at 1080p/1440p/2160p/4320p, 4:3 aspect
// 18.0 43.0 80/107/160/320 triads at 1080p/1440p/2160p/4320p, 4:3 aspect
/////////////////////////////// USER PARAMETERS //////////////////////////////
// Note: Many of these static parameters are overridden by runtime shader
// parameters when those are enabled. However, many others are static codepath
// options that were cleaner or more convert to code as static constants.
// GAMMA:
static const float crt_gamma_static = 2.5; // range [1, 5]
static const float lcd_gamma_static = 2.2; // range [1, 5]
// LEVELS MANAGEMENT:
// Control the final multiplicative image contrast:
static const float levels_contrast_static = 1.0; // range [0, 4)
// We auto-dim to avoid clipping between passes and restore brightness
// later. Control the dim factor here: Lower values clip less but crush
// blacks more (static only for now).
static const float levels_autodim_temp = 0.5; // range (0, 1]
// HALATION/DIFFUSION/BLOOM:
// Halation weight: How much energy should be lost to electrons bounding
// around under the CRT glass and exciting random phosphors?
static const float halation_weight_static = 0.0; // range [0, 1]
// Refractive diffusion weight: How much light should spread/diffuse from
// refracting through the CRT glass?
static const float diffusion_weight_static = 0.075; // range [0, 1]
// Underestimate brightness: Bright areas bloom more, but we can base the
// bloom brightpass on a lower brightness to sharpen phosphors, or a higher
// brightness to soften them. Low values clip, but >= 0.8 looks okay.
static const float bloom_underestimate_levels_static = 0.8; // range [0, 5]
// Blur all colors more than necessary for a softer phosphor bloom?
static const float bloom_excess_static = 0.0; // range [0, 1]
// The BLOOM_APPROX pass approximates a phosphor blur early on with a small
// blurred resize of the input (convergence offsets are applied as well).
// There are three filter options (static option only for now):
// 0.) Bilinear resize: A fast, close approximation to a 4x4 resize
// if min_allowed_viewport_triads and the BLOOM_APPROX resolution are sane
// and beam_max_sigma is low.
// 1.) 3x3 resize blur: Medium speed, soft/smeared from bilinear blurring,
// always uses a static sigma regardless of beam_max_sigma or
// mask_num_triads_desired.
// 2.) True 4x4 Gaussian resize: Slowest, technically correct.
// These options are more pronounced for the fast, unbloomed shader version.
static const float bloom_approx_filter_static = 2.0;
// ELECTRON BEAM SCANLINE DISTRIBUTION:
// How many scanlines should contribute light to each pixel? Using more
// scanlines is slower (especially for a generalized Gaussian) but less
// distorted with larger beam sigmas (especially for a pure Gaussian). The
// max_beam_sigma at which the closest unused weight is guaranteed <
// 1.0/255.0 (for a 3x antialiased pure Gaussian) is:
// 2 scanlines: max_beam_sigma = 0.2089; distortions begin ~0.34; 141.7 FPS pure, 131.9 FPS generalized
// 3 scanlines, max_beam_sigma = 0.3879; distortions begin ~0.52; 137.5 FPS pure; 123.8 FPS generalized
// 4 scanlines, max_beam_sigma = 0.5723; distortions begin ~0.70; 134.7 FPS pure; 117.2 FPS generalized
// 5 scanlines, max_beam_sigma = 0.7591; distortions begin ~0.89; 131.6 FPS pure; 112.1 FPS generalized
// 6 scanlines, max_beam_sigma = 0.9483; distortions begin ~1.08; 127.9 FPS pure; 105.6 FPS generalized
static const float beam_num_scanlines = 3.0; // range [2, 6]
// A generalized Gaussian beam varies shape with color too, now just width.
// It's slower but more flexible (static option only for now).
static const bool beam_generalized_gaussian = true;
// What kind of scanline antialiasing do you want?
// 0: Sample weights at 1x; 1: Sample weights at 3x; 2: Compute an integral
// Integrals are slow (especially for generalized Gaussians) and rarely any
// better than 3x antialiasing (static option only for now).
static const float beam_antialias_level = 1.0; // range [0, 2]
// Min/max standard deviations for scanline beams: Higher values widen and
// soften scanlines. Depending on other options, low min sigmas can alias.
static const float beam_min_sigma_static = 0.02; // range (0, 1]
static const float beam_max_sigma_static = 0.3; // range (0, 1]
// Beam width varies as a function of color: A power function (0) is more
// configurable, but a spherical function (1) gives the widest beam
// variability without aliasing (static option only for now).
static const float beam_spot_shape_function = 0.0;
// Spot shape power: Powers <= 1 give smoother spot shapes but lower
// sharpness. Powers >= 1.0 are awful unless mix/max sigmas are close.
static const float beam_spot_power_static = 1.0/3.0; // range (0, 16]
// Generalized Gaussian max shape parameters: Higher values give flatter
// scanline plateaus and steeper dropoffs, simultaneously widening and
// sharpening scanlines at the cost of aliasing. 2.0 is pure Gaussian, and
// values > ~40.0 cause artifacts with integrals.
static const float beam_min_shape_static = 2.0; // range [2, 32]
static const float beam_max_shape_static = 4.0; // range [2, 32]
// Generalized Gaussian shape power: Affects how quickly the distribution
// changes shape from Gaussian to steep/plateaued as color increases from 0
// to 1.0. Higher powers appear softer for most colors, and lower powers
// appear sharper for most colors.
static const float beam_shape_power_static = 1.0/4.0; // range (0, 16]
// What filter should be used to sample scanlines horizontally?
// 0: Quilez (fast), 1: Gaussian (configurable), 2: Lanczos2 (sharp)
static const float beam_horiz_filter_static = 0.0;
// Standard deviation for horizontal Gaussian resampling:
static const float beam_horiz_sigma_static = 0.35; // range (0, 2/3]
// Do horizontal scanline sampling in linear RGB (correct light mixing),
// gamma-encoded RGB (darker, hard spot shape, may better match bandwidth-
// limiting circuitry in some CRT's), or a weighted avg.?
static const float beam_horiz_linear_rgb_weight_static = 1.0; // range [0, 1]
// Simulate scanline misconvergence? This needs 3x horizontal texture
// samples and 3x texture samples of BLOOM_APPROX and HALATION_BLUR in
// later passes (static option only for now).
static const bool beam_misconvergence = true;
// Convergence offsets in x/y directions for R/G/B scanline beams in units
// of scanlines. Positive offsets go right/down; ranges [-2, 2]
static const float2 convergence_offsets_r_static = float2(0.1, 0.2);
static const float2 convergence_offsets_g_static = float2(0.3, 0.4);
static const float2 convergence_offsets_b_static = float2(0.5, 0.6);
// Detect interlacing (static option only for now)?
static const bool interlace_detect_static = true;
// Assume 1080-line sources are interlaced?
static const bool interlace_1080i_static = false;
// For interlaced sources, assume TFF (top-field first) or BFF order?
// (Whether this matters depends on the nature of the interlaced input.)
static const bool interlace_bff_static = false;
// ANTIALIASING:
// What AA level do you want for curvature/overscan/subpixels? Options:
// 0x (none), 1x (sample subpixels), 4x, 5x, 6x, 7x, 8x, 12x, 16x, 20x, 24x
// (Static option only for now)
static const float aa_level = 12.0; // range [0, 24]
// What antialiasing filter do you want (static option only)? Options:
// 0: Box (separable), 1: Box (cylindrical),
// 2: Tent (separable), 3: Tent (cylindrical),
// 4: Gaussian (separable), 5: Gaussian (cylindrical),
// 6: Cubic* (separable), 7: Cubic* (cylindrical, poor)
// 8: Lanczos Sinc (separable), 9: Lanczos Jinc (cylindrical, poor)
// * = Especially slow with RUNTIME_ANTIALIAS_WEIGHTS
static const float aa_filter = 6.0; // range [0, 9]
// Flip the sample grid on odd/even frames (static option only for now)?
static const bool aa_temporal = false;
// Use RGB subpixel offsets for antialiasing? The pixel is at green, and
// the blue offset is the negative r offset; range [0, 0.5]
static const float2 aa_subpixel_r_offset_static = float2(-1.0/3.0, 0.0);//float2(0.0);
// Cubics: See http://www.imagemagick.org/Usage/filter/#mitchell
// 1.) "Keys cubics" with B = 1 - 2C are considered the highest quality.
// 2.) C = 0.5 (default) is Catmull-Rom; higher C's apply sharpening.
// 3.) C = 1.0/3.0 is the Mitchell-Netravali filter.
// 4.) C = 0.0 is a soft spline filter.
static const float aa_cubic_c_static = 0.5; // range [0, 4]
// Standard deviation for Gaussian antialiasing: Try 0.5/aa_pixel_diameter.
static const float aa_gauss_sigma_static = 0.5; // range [0.0625, 1.0]
// PHOSPHOR MASK:
// Mask type: 0 = aperture grille, 1 = slot mask, 2 = EDP shadow mask
static const float mask_type_static = 1.0; // range [0, 2]
// We can sample the mask three ways. Pick 2/3 from: Pretty/Fast/Flexible.
// 0.) Sinc-resize to the desired dot pitch manually (pretty/slow/flexible).
// This requires PHOSPHOR_MASK_MANUALLY_RESIZE to be #defined.
// 1.) Hardware-resize to the desired dot pitch (ugly/fast/flexible). This
// is halfway decent with LUT mipmapping but atrocious without it.
// 2.) Tile it without resizing at a 1:1 texel:pixel ratio for flat coords
// (pretty/fast/inflexible). Each input LUT has a fixed dot pitch.
// This mode reuses the same masks, so triads will be enormous unless
// you change the mask LUT filenames in your .cgp file.
static const float mask_sample_mode_static = 0.0; // range [0, 2]
// Prefer setting the triad size (0.0) or number on the screen (1.0)?
// If RUNTIME_PHOSPHOR_BLOOM_SIGMA isn't #defined, the specified triad size
// will always be used to calculate the full bloom sigma statically.
static const float mask_specify_num_triads_static = 0.0; // range [0, 1]
// Specify the phosphor triad size, in pixels. Each tile (usually with 8
// triads) will be rounded to the nearest integer tile size and clamped to
// obey minimum size constraints (imposed to reduce downsize taps) and
// maximum size constraints (imposed to have a sane MASK_RESIZE FBO size).
// To increase the size limit, double the viewport-relative scales for the
// two MASK_RESIZE passes in crt-royale.cgp and user-cgp-contants.h.
// range [1, mask_texture_small_size/mask_triads_per_tile]
static const float mask_triad_size_desired_static = 24.0 / 8.0;
// If mask_specify_num_triads is 1.0/true, we'll go by this instead (the
// final size will be rounded and constrained as above); default 480.0
static const float mask_num_triads_desired_static = 480.0;
// How many lobes should the sinc/Lanczos resizer use? More lobes require
// more samples and avoid moire a bit better, but some is unavoidable
// depending on the destination size (static option for now).
static const float mask_sinc_lobes = 3.0; // range [2, 4]
// The mask is resized using a variable number of taps in each dimension,
// but some Cg profiles always fetch a constant number of taps no matter
// what (no dynamic branching). We can limit the maximum number of taps if
// we statically limit the minimum phosphor triad size. Larger values are
// faster, but the limit IS enforced (static option only, forever);
// range [1, mask_texture_small_size/mask_triads_per_tile]
// TODO: Make this 1.0 and compensate with smarter sampling!
static const float mask_min_allowed_triad_size = 2.0;
// GEOMETRY:
// Geometry mode:
// 0: Off (default), 1: Spherical mapping (like cgwg's),
// 2: Alt. spherical mapping (more bulbous), 3: Cylindrical/Trinitron
static const float geom_mode_static = 0.0; // range [0, 3]
// Radius of curvature: Measured in units of your viewport's diagonal size.
static const float geom_radius_static = 2.0; // range [1/(2*pi), 1024]
// View dist is the distance from the player to their physical screen, in
// units of the viewport's diagonal size. It controls the field of view.
static const float geom_view_dist_static = 2.0; // range [0.5, 1024]
// Tilt angle in radians (clockwise around up and right vectors):
static const float2 geom_tilt_angle_static = float2(0.0, 0.0); // range [-pi, pi]
// Aspect ratio: When the true viewport size is unknown, this value is used
// to help convert between the phosphor triad size and count, along with
// the mask_resize_viewport_scale constant from user-cgp-constants.h. Set
// this equal to Retroarch's display aspect ratio (DAR) for best results;
// range [1, geom_max_aspect_ratio from user-cgp-constants.h];
// default (256/224)*(54/47) = 1.313069909 (see below)
static const float geom_aspect_ratio_static = 1.313069909;
// Before getting into overscan, here's some general aspect ratio info:
// - DAR = display aspect ratio = SAR * PAR; as in your Retroarch setting
// - SAR = storage aspect ratio = DAR / PAR; square pixel emulator frame AR
// - PAR = pixel aspect ratio = DAR / SAR; holds regardless of cropping
// Geometry processing has to "undo" the screen-space 2D DAR to calculate
// 3D view vectors, then reapplies the aspect ratio to the simulated CRT in
// uv-space. To ensure the source SAR is intended for a ~4:3 DAR, either:
// a.) Enable Retroarch's "Crop Overscan"
// b.) Readd horizontal padding: Set overscan to e.g. N*(1.0, 240.0/224.0)
// Real consoles use horizontal black padding in the signal, but emulators
// often crop this without cropping the vertical padding; a 256x224 [S]NES
// frame (8:7 SAR) is intended for a ~4:3 DAR, but a 256x240 frame is not.
// The correct [S]NES PAR is 54:47, found by blargg and NewRisingSun:
// http://board.zsnes.com/phpBB3/viewtopic.php?f=22&t=11928&start=50
// http://forums.nesdev.com/viewtopic.php?p=24815#p24815
// For flat output, it's okay to set DAR = [existing] SAR * [correct] PAR
// without doing a. or b., but horizontal image borders will be tighter
// than vertical ones, messing up curvature and overscan. Fixing the
// padding first corrects this.
// Overscan: Amount to "zoom in" before cropping. You can zoom uniformly
// or adjust x/y independently to e.g. readd horizontal padding, as noted
// above: Values < 1.0 zoom out; range (0, inf)
static const float2 geom_overscan_static = float2(1.0, 1.0);// * 1.005 * (1.0, 240/224.0)
// Compute a proper pixel-space to texture-space matrix even without ddx()/
// ddy()? This is ~8.5% slower but improves antialiasing/subpixel filtering
// with strong curvature (static option only for now).
static const bool geom_force_correct_tangent_matrix = true;
// BORDERS:
// Rounded border size in texture uv coords:
static const float border_size_static = 0.015; // range [0, 0.5]
// Border darkness: Moderate values darken the border smoothly, and high
// values make the image very dark just inside the border:
static const float border_darkness_static = 2.0; // range [0, inf)
// Border compression: High numbers compress border transitions, narrowing
// the dark border area.
static const float border_compress_static = 2.5; // range [1, inf)
#endif // USER_SETTINGS_H

View file

@ -1,97 +0,0 @@
///////////////////////////////// MIT LICENSE ////////////////////////////////
// Copyright (C) 2014 TroggleMonkey
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
///////////////////////////// SETTINGS MANAGEMENT ////////////////////////////
// PASS SETTINGS:
// gamma-management.h needs to know what kind of pipeline we're using and
// what pass this is in that pipeline. This will become obsolete if/when we
// can #define things like this in the .cgp preset file.
//#define GAMMA_ENCODE_EVERY_FBO
//#define FIRST_PASS
//#define LAST_PASS
//#define SIMULATE_CRT_ON_LCD
//#define SIMULATE_GBA_ON_LCD
//#define SIMULATE_LCD_ON_CRT
//#define SIMULATE_GBA_ON_CRT
////////////////////////////////// INCLUDES //////////////////////////////////
// #included by vertex shader:
#include "../include/gamma-management.fxh"
#include "../include/blur-functions.fxh"
///////////////////////////////// STRUCTURES /////////////////////////////////
struct out_vertex_p4
{
float2 blur_dxdy : TEXCOORD1;
};
//////////////////////////////// VERTEX SHADER ///////////////////////////////
// Vertex shader generating a triangle covering the entire screen
void VS_Blur9Fast_Horizontal(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p4 OUT)
{
texcoord.x = (id == 2) ? 2.0 : 0.0;
texcoord.y = (id == 1) ? 2.0 : 0.0;
position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
/* float2 texture_size = 1.0/NormalizedNativePixelSize;
float2 output_size = (ViewportSize*BufferToViewportRatio);
float2 video_size = 1.0/NormalizedNativePixelSize;
*/
// float2 texture_size = float2(320.0, 240.0);
float2 texture_size = HALATION_BLUR_texture_size;
float2 output_size = VIEWPORT_SIZE;
// float2 output_size = VIEWPORT_SIZE*NormalizedNativePixelSize/float2(320.0, 240.0);
// float2 output_size = float2(320.0, 240.0);
// float2 output_size = 1.0/NormalizedNativePixelSize;
// Get the uv sample distance between output pixels. Blurs are not generic
// Gaussian resizers, and correct blurs require:
// 1.) IN.output_size == IN.video_size * 2^m, where m is an integer <= 0.
// 2.) mipmap_inputN = "true" for this pass in .cgp preset if m != 0
// 3.) filter_linearN = "true" except for 1x scale nearest neighbor blurs
// Gaussian resizers would upsize using the distance between input texels
// (not output pixels), but we avoid this and consistently blur at the
// destination size. Otherwise, combining statically calculated weights
// with bilinear sample exploitation would result in terrible artifacts.
const float2 dxdy_scale = video_size/output_size;
const float2 dxdy = dxdy_scale/texture_size;
// This blur is horizontal-only, so zero out the vertical offset:
OUT.blur_dxdy = float2(dxdy.x, 0.0);
}
/////////////////////////////// FRAGMENT SHADER //////////////////////////////
float4 PS_Blur9Fast_Horizontal(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p4 VAR) : SV_Target
{
float3 color = tex2Dblur9fast(BLUR9FAST_VERTICAL, vTexCoord, VAR.blur_dxdy);
// Encode and output the blurred image:
return encode_output(float4(color, 1.0));
}

View file

@ -1,95 +0,0 @@
///////////////////////////////// MIT LICENSE ////////////////////////////////
// Copyright (C) 2014 TroggleMonkey
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
///////////////////////////// SETTINGS MANAGEMENT ////////////////////////////
// PASS SETTINGS:
// gamma-management.h needs to know what kind of pipeline we're using and
// what pass this is in that pipeline. This will become obsolete if/when we
// can #define things like this in the .cgp preset file.
//#define GAMMA_ENCODE_EVERY_FBO
//#define FIRST_PASS
//#define LAST_PASS
//#define SIMULATE_CRT_ON_LCD
//#define SIMULATE_GBA_ON_LCD
//#define SIMULATE_LCD_ON_CRT
//#define SIMULATE_GBA_ON_CRT
////////////////////////////////// INCLUDES //////////////////////////////////
#include "../include/gamma-management.fxh"
#include "../include/blur-functions.fxh"
///////////////////////////////// STRUCTURES /////////////////////////////////
struct out_vertex_p3
{
float2 blur_dxdy : TEXCOORD1;
};
//////////////////////////////// VERTEX SHADER ///////////////////////////////
// Vertex shader generating a triangle covering the entire screen
void VS_Blur9Fast_Vertical(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p3 OUT)
{
texcoord.x = (id == 2) ? 2.0 : 0.0;
texcoord.y = (id == 1) ? 2.0 : 0.0;
position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
/*
float2 texture_size = 1.0/NormalizedNativePixelSize;
float2 output_size = (ViewportSize*BufferToViewportRatio);
float2 video_size = 1.0/NormalizedNativePixelSize;
*/
// float2 texture_size = float2(320.0, 240.0);
float2 texture_size = BLUR9FAST_VERTICAL_texture_size;
float2 output_size = VIEWPORT_SIZE;
// float2 output_size = VIEWPORT_SIZE/4.0;
// float2 output_size = VIEWPORT_SIZE*NormalizedNativePixelSize/float2(320.0, 240.0);
// float2 output_size = 1.0/NormalizedNativePixelSize;
// Get the uv sample distance between output pixels. Blurs are not generic
// Gaussian resizers, and correct blurs require:
// 1.) IN.output_size == IN.video_size * 2^m, where m is an integer <= 0.
// 2.) mipmap_inputN = "true" for this pass in .cgp preset if m != 0
// 3.) filter_linearN = "true" except for 1x scale nearest neighbor blurs
// Gaussian resizers would upsize using the distance between input texels
// (not output pixels), but we avoid this and consistently blur at the
// destination size. Otherwise, combining statically calculated weights
// with bilinear sample exploitation would result in terrible artifacts.
const float2 dxdy_scale = video_size/output_size;
const float2 dxdy = dxdy_scale/texture_size;
// This blur is vertical-only, so zero out the horizontal offset:
OUT.blur_dxdy = float2(0.0, dxdy.y);
}
/////////////////////////////// FRAGMENT SHADER //////////////////////////////
float4 PS_Blur9Fast_Vertical(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p3 VAR) : SV_Target
{
float3 color = tex2Dblur9fast(BLOOM_APPROX, vTexCoord, VAR.blur_dxdy);
// Encode and output the blurred image:
return encode_output(float4(color, 1.0));
}

View file

@ -1,363 +0,0 @@
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
////////////////////////////////// INCLUDES //////////////////////////////////
#define ORIG_LINEARIZEDvideo_size VERTICAL_SCANLINES_texture_size
#define ORIG_LINEARIZEDtexture_size VERTICAL_SCANLINES_video_size
#define bloom_approx_scale_x (4.0/3.0)
static const float max_viewport_size_x = 1080.0*1024.0*(4.0/3.0);
#include "../include/user-settings.fxh"
#include "../include/derived-settings-and-constants.fxh"
#include "../include/bind-shader-params.fxh"
#include "../include/gamma-management.fxh"
#include "../include/blur-functions.fxh"
#include "../include/scanline-functions.fxh"
#include "../include/bloom-functions.fxh"
/////////////////////////////////// HELPERS //////////////////////////////////
float3 tex2Dresize_gaussian4x4(const sampler2D tex, const float2 tex_uv,
const float2 dxdy, const float2 texture_size, const float2 texture_size_inv,
const float2 tex_uv_to_pixel_scale, const float sigma)
{
// Requires: 1.) All requirements of gamma-management.h must be satisfied!
// 2.) filter_linearN must == "true" in your .cgp preset.
// 3.) mipmap_inputN must == "true" in your .cgp preset if
// IN.output_size << SRC.video_size.
// 4.) dxdy should contain the uv pixel spacing:
// dxdy = max(float2(1.0),
// SRC.video_size/IN.output_size)/SRC.texture_size;
// 5.) texture_size == SRC.texture_size
// 6.) texture_size_inv == float2(1.0)/SRC.texture_size
// 7.) tex_uv_to_pixel_scale == IN.output_size *
// SRC.texture_size / SRC.video_size;
// 8.) sigma is the desired Gaussian standard deviation, in
// terms of output pixels. It should be < ~0.66171875 to
// ensure the first unused sample (outside the 4x4 box) has
// a weight < 1.0/256.0.
// Returns: A true 4x4 Gaussian resize of the input.
// Description:
// Given correct inputs, this Gaussian resizer samples 4 pixel locations
// along each downsized dimension and/or 4 texel locations along each
// upsized dimension. It computes dynamic weights based on the pixel-space
// distance of each sample from the destination pixel. It is arbitrarily
// resizable and higher quality than tex2Dblur3x3_resize, but it's slower.
// TODO: Move this to a more suitable file once there are others like it.
const float denom_inv = 0.5/(sigma*sigma);
// We're taking 4x4 samples, and we're snapping to texels for upsizing.
// Find texture coords for sample 5 (second row, second column):
const float2 curr_texel = tex_uv * texture_size;
const float2 prev_texel =
floor(curr_texel - under_half.xx) + 0.5.xx;
const float2 prev_texel_uv = prev_texel * texture_size_inv;
const float2 snap = float2(dxdy <= texture_size_inv);
const float2 sample5_downsize_uv = tex_uv - 0.5 * dxdy;
const float2 sample5_uv = lerp(sample5_downsize_uv, prev_texel_uv, snap);
// Compute texture coords for other samples:
const float2 dx = float2(dxdy.x, 0.0);
const float2 sample0_uv = sample5_uv - dxdy;
const float2 sample10_uv = sample5_uv + dxdy;
const float2 sample15_uv = sample5_uv + 2.0 * dxdy;
const float2 sample1_uv = sample0_uv + dx;
const float2 sample2_uv = sample0_uv + 2.0 * dx;
const float2 sample3_uv = sample0_uv + 3.0 * dx;
const float2 sample4_uv = sample5_uv - dx;
const float2 sample6_uv = sample5_uv + dx;
const float2 sample7_uv = sample5_uv + 2.0 * dx;
const float2 sample8_uv = sample10_uv - 2.0 * dx;
const float2 sample9_uv = sample10_uv - dx;
const float2 sample11_uv = sample10_uv + dx;
const float2 sample12_uv = sample15_uv - 3.0 * dx;
const float2 sample13_uv = sample15_uv - 2.0 * dx;
const float2 sample14_uv = sample15_uv - dx;
// Load each sample:
const float3 sample0 = tex2D_linearize(tex, sample0_uv).rgb;
const float3 sample1 = tex2D_linearize(tex, sample1_uv).rgb;
const float3 sample2 = tex2D_linearize(tex, sample2_uv).rgb;
const float3 sample3 = tex2D_linearize(tex, sample3_uv).rgb;
const float3 sample4 = tex2D_linearize(tex, sample4_uv).rgb;
const float3 sample5 = tex2D_linearize(tex, sample5_uv).rgb;
const float3 sample6 = tex2D_linearize(tex, sample6_uv).rgb;
const float3 sample7 = tex2D_linearize(tex, sample7_uv).rgb;
const float3 sample8 = tex2D_linearize(tex, sample8_uv).rgb;
const float3 sample9 = tex2D_linearize(tex, sample9_uv).rgb;
const float3 sample10 = tex2D_linearize(tex, sample10_uv).rgb;
const float3 sample11 = tex2D_linearize(tex, sample11_uv).rgb;
const float3 sample12 = tex2D_linearize(tex, sample12_uv).rgb;
const float3 sample13 = tex2D_linearize(tex, sample13_uv).rgb;
const float3 sample14 = tex2D_linearize(tex, sample14_uv).rgb;
const float3 sample15 = tex2D_linearize(tex, sample15_uv).rgb;
// Compute destination pixel offsets for each sample:
const float2 dest_pixel = tex_uv * tex_uv_to_pixel_scale;
const float2 sample0_offset = sample0_uv * tex_uv_to_pixel_scale - dest_pixel;
const float2 sample1_offset = sample1_uv * tex_uv_to_pixel_scale - dest_pixel;
const float2 sample2_offset = sample2_uv * tex_uv_to_pixel_scale - dest_pixel;
const float2 sample3_offset = sample3_uv * tex_uv_to_pixel_scale - dest_pixel;
const float2 sample4_offset = sample4_uv * tex_uv_to_pixel_scale - dest_pixel;
const float2 sample5_offset = sample5_uv * tex_uv_to_pixel_scale - dest_pixel;
const float2 sample6_offset = sample6_uv * tex_uv_to_pixel_scale - dest_pixel;
const float2 sample7_offset = sample7_uv * tex_uv_to_pixel_scale - dest_pixel;
const float2 sample8_offset = sample8_uv * tex_uv_to_pixel_scale - dest_pixel;
const float2 sample9_offset = sample9_uv * tex_uv_to_pixel_scale - dest_pixel;
const float2 sample10_offset = sample10_uv * tex_uv_to_pixel_scale - dest_pixel;
const float2 sample11_offset = sample11_uv * tex_uv_to_pixel_scale - dest_pixel;
const float2 sample12_offset = sample12_uv * tex_uv_to_pixel_scale - dest_pixel;
const float2 sample13_offset = sample13_uv * tex_uv_to_pixel_scale - dest_pixel;
const float2 sample14_offset = sample14_uv * tex_uv_to_pixel_scale - dest_pixel;
const float2 sample15_offset = sample15_uv * tex_uv_to_pixel_scale - dest_pixel;
// Compute Gaussian sample weights:
const float w0 = exp(-LENGTH_SQ(sample0_offset) * denom_inv);
const float w1 = exp(-LENGTH_SQ(sample1_offset) * denom_inv);
const float w2 = exp(-LENGTH_SQ(sample2_offset) * denom_inv);
const float w3 = exp(-LENGTH_SQ(sample3_offset) * denom_inv);
const float w4 = exp(-LENGTH_SQ(sample4_offset) * denom_inv);
const float w5 = exp(-LENGTH_SQ(sample5_offset) * denom_inv);
const float w6 = exp(-LENGTH_SQ(sample6_offset) * denom_inv);
const float w7 = exp(-LENGTH_SQ(sample7_offset) * denom_inv);
const float w8 = exp(-LENGTH_SQ(sample8_offset) * denom_inv);
const float w9 = exp(-LENGTH_SQ(sample9_offset) * denom_inv);
const float w10 = exp(-LENGTH_SQ(sample10_offset) * denom_inv);
const float w11 = exp(-LENGTH_SQ(sample11_offset) * denom_inv);
const float w12 = exp(-LENGTH_SQ(sample12_offset) * denom_inv);
const float w13 = exp(-LENGTH_SQ(sample13_offset) * denom_inv);
const float w14 = exp(-LENGTH_SQ(sample14_offset) * denom_inv);
const float w15 = exp(-LENGTH_SQ(sample15_offset) * denom_inv);
const float weight_sum_inv = 1.0/(
w0 + w1 + w2 + w3 + w4 + w5 + w6 + w7 +
w8 +w9 + w10 + w11 + w12 + w13 + w14 + w15);
// Weight and sum the samples:
const float3 sum = w0 * sample0 + w1 * sample1 + w2 * sample2 + w3 * sample3 +
w4 * sample4 + w5 * sample5 + w6 * sample6 + w7 * sample7 +
w8 * sample8 + w9 * sample9 + w10 * sample10 + w11 * sample11 +
w12 * sample12 + w13 * sample13 + w14 * sample14 + w15 * sample15;
return sum * weight_sum_inv;
}
///////////////////////////////// STRUCTURES /////////////////////////////////
struct out_vertex_p2
{
float2 tex_uv : TEXCOORD1;
float2 blur_dxdy : TEXCOORD2;
float2 uv_scanline_step : TEXCOORD3;
float estimated_viewport_size_x : TEXCOORD4;
float2 texture_size_inv : TEXCOORD5;
float2 tex_uv_to_pixel_scale : TEXCOORD6;
float2 output_size : TEXCOORD7;
};
//////////////////////////////// VERTEX SHADER ///////////////////////////////
// Vertex shader generating a triangle covering the entire screen
void VS_Bloom_Approx(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p2 OUT)
{
texcoord.x = (id == 2) ? 2.0 : 0.0;
texcoord.y = (id == 1) ? 2.0 : 0.0;
position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
float2 texture_size = BLOOM_APPROX_texture_size;
float2 output_size = VIEWPORT_SIZE;
OUT.output_size = output_size;
// This vertex shader copies blurs/vertex-shader-blur-one-pass-resize.h,
// except we're using a different source image.
const float2 video_uv = texcoord * texture_size/video_size;
OUT.tex_uv = video_uv * ORIG_LINEARIZEDvideo_size /
ORIG_LINEARIZEDtexture_size;
// The last pass (vertical scanlines) had a viewport y scale, so we can
// use it to calculate a better runtime sigma:
// OUT.estimated_viewport_size_x = video_size.y * geom_aspect_ratio_x/geom_aspect_ratio_y;
OUT.estimated_viewport_size_x = video_size.y * texture_size.x/texture_size.y;
// Get the uv sample distance between output pixels. We're using a resize
// blur, so arbitrary upsizing will be acceptable if filter_linearN =
// "true," and arbitrary downsizing will be acceptable if mipmap_inputN =
// "true" too. The blur will be much more accurate if a true 4x4 Gaussian
// resize is used instead of tex2Dblur3x3_resize (which samples between
// texels even for upsizing).
const float2 dxdy_min_scale = ORIG_LINEARIZEDvideo_size/output_size;
const float2 texture_size_inv = 1.0.xx/ORIG_LINEARIZEDtexture_size;
if(bloom_approx_filter > 1.5) // 4x4 true Gaussian resize
{
// For upsizing, we'll snap to texels and sample the nearest 4.
const float2 dxdy_scale = max(dxdy_min_scale, 1.0.xx);
OUT.blur_dxdy = dxdy_scale * texture_size_inv;
}
else
{
const float2 dxdy_scale = dxdy_min_scale;
OUT.blur_dxdy = dxdy_scale * texture_size_inv;
}
// tex2Dresize_gaussian4x4 needs to know a bit more than the other filters:
OUT.tex_uv_to_pixel_scale = output_size *
ORIG_LINEARIZEDtexture_size / ORIG_LINEARIZEDvideo_size;
OUT.texture_size_inv = texture_size_inv;
// Detecting interlacing again here lets us apply convergence offsets in
// this pass. il_step_multiple contains the (texel, scanline) step
// multiple: 1 for progressive, 2 for interlaced.
const float2 orig_video_size = ORIG_LINEARIZEDvideo_size;
const float y_step = 1.0 + float(is_interlaced(orig_video_size.y));
const float2 il_step_multiple = float2(1.0, y_step);
// Get the uv distance between (texels, same-field scanlines):
OUT.uv_scanline_step = il_step_multiple / ORIG_LINEARIZEDtexture_size;
}
/////////////////////////////// FRAGMENT SHADER //////////////////////////////
float4 PS_Bloom_Approx(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p2 VAR) : SV_Target
{
// Would a viewport-relative size work better for this pass? (No.)
// PROS:
// 1.) Instead of writing an absolute size to user-cgp-constants.h, we'd
// write a viewport scale. That number could be used to directly scale
// the viewport-resolution bloom sigma and/or triad size to a smaller
// scale. This way, we could calculate an optimal dynamic sigma no
// matter how the dot pitch is specified.
// CONS:
// 1.) Texel smearing would be much worse at small viewport sizes, but
// performance would be much worse at large viewport sizes, so there
// would be no easy way to calculate a decent scale.
// 2.) Worse, we could no longer get away with using a constant-size blur!
// Instead, we'd have to face all the same difficulties as the real
// phosphor bloom, which requires static #ifdefs to decide the blur
// size based on the expected triad size...a dynamic value.
// 3.) Like the phosphor bloom, we'd have less control over making the blur
// size correct for an optical blur. That said, we likely overblur (to
// maintain brightness) more than the eye would do by itself: 20/20
// human vision distinguishes ~1 arc minute, or 1/60 of a degree. The
// highest viewing angle recommendation I know of is THX's 40.04 degree
// recommendation, at which 20/20 vision can distinguish about 2402.4
// lines. Assuming the "TV lines" definition, that means 1201.2
// distinct light lines and 1201.2 distinct dark lines can be told
// apart, i.e. 1201.2 pairs of lines. This would correspond to 1201.2
// pairs of alternating lit/unlit phosphors, so 2402.4 phosphors total
// (if they're alternately lit). That's a max of 800.8 triads. Using
// a more popular 30 degree viewing angle recommendation, 20/20 vision
// can distinguish 1800 lines, or 600 triads of alternately lit
// phosphors. In contrast, we currently blur phosphors all the way
// down to 341.3 triads to ensure full brightness.
// 4.) Realistically speaking, we're usually just going to use bilinear
// filtering in this pass anyway, but it only works well to limit
// bandwidth if it's done at a small constant scale.
// Get the constants we need to sample:
float2 output_size = VAR.output_size;
//const sampler2D Source = ORIG_LINEARIZED;
const float2 tex_uv = VAR.tex_uv;
const float2 blur_dxdy = VAR.blur_dxdy;
const float2 texture_size = ORIG_LINEARIZEDtexture_size;
const float2 texture_size_inv = VAR.texture_size_inv;
const float2 tex_uv_to_pixel_scale = VAR.tex_uv_to_pixel_scale;
float2 tex_uv_r, tex_uv_g, tex_uv_b;
if(beam_misconvergence)
{
const float2 uv_scanline_step = VAR.uv_scanline_step;
const float2 convergence_offsets_r = get_convergence_offsets_r_vector();
const float2 convergence_offsets_g = get_convergence_offsets_g_vector();
const float2 convergence_offsets_b = get_convergence_offsets_b_vector();
tex_uv_r = tex_uv - convergence_offsets_r * uv_scanline_step;
tex_uv_g = tex_uv - convergence_offsets_g * uv_scanline_step;
tex_uv_b = tex_uv - convergence_offsets_b * uv_scanline_step;
}
// Get the blur sigma:
const float bloom_approx_sigma = get_bloom_approx_sigma(output_size.x,
VAR.estimated_viewport_size_x);
// Sample the resized and blurred texture, and apply convergence offsets if
// necessary. Applying convergence offsets here triples our samples from
// 16/9/1 to 48/27/3, but faster and easier than sampling BLOOM_APPROX and
// HALATION_BLUR 3 times at full resolution every time they're used.
float3 color_r, color_g, color_b, color;
if(bloom_approx_filter > 1.5)
{
// Use a 4x4 Gaussian resize. This is slower but technically correct.
if(beam_misconvergence)
{
color_r = tex2Dresize_gaussian4x4(ORIG_LINEARIZED, tex_uv_r,
blur_dxdy, texture_size, texture_size_inv,
tex_uv_to_pixel_scale, bloom_approx_sigma);
color_g = tex2Dresize_gaussian4x4(ORIG_LINEARIZED, tex_uv_g,
blur_dxdy, texture_size, texture_size_inv,
tex_uv_to_pixel_scale, bloom_approx_sigma);
color_b = tex2Dresize_gaussian4x4(ORIG_LINEARIZED, tex_uv_b,
blur_dxdy, texture_size, texture_size_inv,
tex_uv_to_pixel_scale, bloom_approx_sigma);
}
else
{
color = tex2Dresize_gaussian4x4(ORIG_LINEARIZED, tex_uv,
blur_dxdy, texture_size, texture_size_inv,
tex_uv_to_pixel_scale, bloom_approx_sigma);
}
}
else if(bloom_approx_filter > 0.5)
{
// Use a 3x3 resize blur. This is the softest option, because we're
// blurring already blurry bilinear samples. It doesn't play quite as
// nicely with convergence offsets, but it has its charms.
if(beam_misconvergence)
{
color_r = tex2Dblur3x3resize(ORIG_LINEARIZED, tex_uv_r,
blur_dxdy, bloom_approx_sigma);
color_g = tex2Dblur3x3resize(ORIG_LINEARIZED, tex_uv_g,
blur_dxdy, bloom_approx_sigma);
color_b = tex2Dblur3x3resize(ORIG_LINEARIZED, tex_uv_b,
blur_dxdy, bloom_approx_sigma);
}
else
{
color = tex2Dblur3x3resize(ORIG_LINEARIZED, tex_uv, blur_dxdy);
}
}
else
{
// Use bilinear sampling. This approximates a 4x4 Gaussian resize MUCH
// better than tex2Dblur3x3_resize for the very small sigmas we're
// likely to use at small output resolutions. (This estimate becomes
// too sharp above ~400x300, but the blurs break down above that
// resolution too, unless min_allowed_viewport_triads is high enough to
// keep bloom_approx_scale_x/min_allowed_viewport_triads < ~1.1658025.)
if(beam_misconvergence)
{
color_r = tex2D_linearize(ORIG_LINEARIZED, tex_uv_r).rgb;
color_g = tex2D_linearize(ORIG_LINEARIZED, tex_uv_g).rgb;
color_b = tex2D_linearize(ORIG_LINEARIZED, tex_uv_b).rgb;
}
else
{
color = tex2D_linearize(ORIG_LINEARIZED, tex_uv).rgb;
}
}
// Pack the colors from the red/green/blue beams into a single vector:
if(beam_misconvergence)
{
color = float3(color_r.r, color_g.g, color_b.b);
}
// Encode and output the blurred image:
return encode_output(float4(color, 1.0));
}

View file

@ -1,129 +0,0 @@
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
///////////////////////////// SETTINGS MANAGEMENT ////////////////////////////
#include "../include/user-settings.fxh"
#include "../include/derived-settings-and-constants.fxh"
#include "../include/bind-shader-params.fxh"
////////////////////////////////// INCLUDES //////////////////////////////////
#include "../include/gamma-management.fxh"
#include "../include/bloom-functions.fxh"
#include "../include/phosphor-mask-resizing.fxh"
#include "../include/scanline-functions.fxh"
///////////////////////////////// STRUCTURES /////////////////////////////////
struct out_vertex_p10
{
float2 video_uv : TEXCOORD1;
float2 bloom_dxdy : TEXCOORD2;
float bloom_sigma_runtime : TEXCOORD3;
float2 sinangle : TEXCOORD4;
float2 cosangle : TEXCOORD5;
float3 stretch : TEXCOORD6;
};
//////////////////////////////// VERTEX SHADER ///////////////////////////////
// Vertex shader generating a triangle covering the entire screen
void VS_Bloom_Horizontal(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p10 OUT)
{
texcoord.x = (id == 2) ? 2.0 : 0.0;
texcoord.y = (id == 1) ? 2.0 : 0.0;
position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
float2 texture_size = BLOOM_HORIZONTAL_texture_size;
float2 output_size = VIEWPORT_SIZE;
// Screen centering
texcoord = texcoord - float2(centerx,centery)/100.0;
float2 tex_uv = texcoord;
// Our various input textures use different coords:
const float2 video_uv = tex_uv * texture_size/video_size;
OUT.video_uv = video_uv;
// We're horizontally blurring the bloom input (vertically blurred
// brightpass). Get the uv distance between output pixels / input texels
// in the horizontal direction (this pass must NOT resize):
OUT.bloom_dxdy = float2(1.0/texture_size.x, 0.0);
// Calculate a runtime bloom_sigma in case it's needed:
const float mask_tile_size_x = get_resized_mask_tile_size(
output_size, output_size * mask_resize_viewport_scale, false).x;
OUT.bloom_sigma_runtime = get_min_sigma_to_blur_triad(
mask_tile_size_x / mask_triads_per_tile, bloom_diff_thresh);
// Precalculate a bunch of useful values we'll need in the fragment
// shader.
OUT.sinangle = sin(float2(geom_x_tilt, geom_y_tilt));
OUT.cosangle = cos(float2(geom_x_tilt, geom_y_tilt));
OUT.stretch = maxscale(OUT.sinangle, OUT.cosangle);
}
/////////////////////////////// FRAGMENT SHADER //////////////////////////////
float4 PS_Bloom_Horizontal(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p10 VAR) : SV_Target
{
VAR.video_uv = (geom_curvature == true) ? transform(VAR.video_uv, VAR.sinangle, VAR.cosangle, VAR.stretch) : VAR.video_uv;
float cval = corner((VAR.video_uv-0.5.xx) * BufferToViewportRatio + 0.5.xx);
// Blur the vertically blurred brightpass horizontally by 9/17/25/43x:
const float bloom_sigma = get_final_bloom_sigma(VAR.bloom_sigma_runtime);
const float3 blurred_brightpass = tex2DblurNfast(BLOOM_VERTICAL,
VAR.video_uv, VAR.bloom_dxdy, bloom_sigma);
// Sample the masked scanlines. Alpha contains the auto-dim factor:
const float3 intensity_dim =
tex2D_linearize(MASKED_SCANLINES, VAR.video_uv).rgb;
const float auto_dim_factor = levels_autodim_temp;
const float undim_factor = 1.0/auto_dim_factor;
// Calculate the mask dimpass, add it to the blurred brightpass, and
// undim (from scanline auto-dim) and amplify (from mask dim) the result:
const float mask_amplify = get_mask_amplify();
const float3 brightpass = tex2D_linearize(BRIGHTPASS,
VAR.video_uv).rgb;
const float3 dimpass = intensity_dim - brightpass;
const float3 phosphor_bloom = (dimpass + blurred_brightpass) *
mask_amplify * undim_factor * levels_contrast;
// Sample the halation texture, and let some light bleed into refractive
// diffusion. Conceptually this occurs before the phosphor bloom, but
// adding it in earlier passes causes black crush in the diffusion colors.
const float3 diffusion_color = levels_contrast * tex2D_linearize(
HALATION_BLUR, VAR.video_uv).rgb;
float3 final_bloom = lerp(phosphor_bloom,
diffusion_color, diffusion_weight);
final_bloom = (geom_curvature == true) ? final_bloom * cval.xxx : final_bloom;
final_bloom = pow(final_bloom.rgb, 1.0/get_output_gamma());
// Encode and output the bloomed image:
return encode_output(float4(final_bloom, 1.0));
}

View file

@ -1,83 +0,0 @@
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
///////////////////////////// SETTINGS MANAGEMENT ////////////////////////////
#include "../include/user-settings.fxh"
#include "../include/derived-settings-and-constants.fxh"
#include "../include/bind-shader-params.fxh"
////////////////////////////////// INCLUDES //////////////////////////////////
#include "../include/gamma-management.fxh"
#include "../include/bloom-functions.fxh"
#include "../include/phosphor-mask-resizing.fxh"
///////////////////////////////// STRUCTURES /////////////////////////////////
struct out_vertex_p9
{
float2 tex_uv : TEXCOORD1;
float2 bloom_dxdy : TEXCOORD2;
float bloom_sigma_runtime : TEXCOORD3;
};
//////////////////////////////// VERTEX SHADER ///////////////////////////////
// Vertex shader generating a triangle covering the entire screen
void VS_Bloom_Vertical(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p9 OUT)
{
texcoord.x = (id == 2) ? 2.0 : 0.0;
texcoord.y = (id == 1) ? 2.0 : 0.0;
position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
float2 texture_size = BLOOM_VERTICAL_texture_size;
float2 output_size = VIEWPORT_SIZE;
OUT.tex_uv = texcoord;
// Get the uv sample distance between output pixels. Calculate dxdy like
// blurs/vertex-shader-blur-fast-vertical.h.
const float2 dxdy_scale = video_size/output_size;
const float2 dxdy = dxdy_scale/texture_size;
// This blur is vertical-only, so zero out the vertical offset:
OUT.bloom_dxdy = float2(0.0, dxdy.y);
// Calculate a runtime bloom_sigma in case it's needed:
const float mask_tile_size_x = get_resized_mask_tile_size(
output_size, output_size * mask_resize_viewport_scale, false).x;
OUT.bloom_sigma_runtime = get_min_sigma_to_blur_triad(
mask_tile_size_x / mask_triads_per_tile, bloom_diff_thresh);
}
/////////////////////////////// FRAGMENT SHADER //////////////////////////////
float4 PS_Bloom_Vertical(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p9 VAR) : SV_Target
{
// Blur the brightpass horizontally with a 9/17/25/43x blur:
const float bloom_sigma = get_final_bloom_sigma(VAR.bloom_sigma_runtime);
const float3 color = tex2DblurNfast(BRIGHTPASS, VAR.tex_uv,
VAR.bloom_dxdy, bloom_sigma);
// Encode and output the blurred image:
return encode_output(float4(color, 1.0));
}

View file

@ -1,130 +0,0 @@
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
///////////////////////////// SETTINGS MANAGEMENT ////////////////////////////
#include "../include/user-settings.fxh"
#include "../include/derived-settings-and-constants.fxh"
#include "../include/bind-shader-params.fxh"
////////////////////////////////// INCLUDES //////////////////////////////////
#include "../include/gamma-management.fxh"
#include "../include/blur-functions.fxh"
#include "../include/phosphor-mask-resizing.fxh"
#include "../include/scanline-functions.fxh"
#include "../include/bloom-functions.fxh"
///////////////////////////////// STRUCTURES /////////////////////////////////
struct out_vertex_p8
{
float2 video_uv : TEXCOORD1;
float2 scanline_tex_uv : TEXCOORD2;
float2 blur3x3_tex_uv : TEXCOORD3;
float bloom_sigma_runtime : TEXCOORD4;
};
//////////////////////////////// VERTEX SHADER ///////////////////////////////
// Vertex shader generating a triangle covering the entire screen
void VS_Brightpass(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p8 OUT)
{
texcoord.x = (id == 2) ? 2.0 : 0.0;
texcoord.y = (id == 1) ? 2.0 : 0.0;
position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
float2 tex_uv = texcoord;
float2 texture_size = BRIGHTPASS_texture_size;
float2 output_size = VIEWPORT_SIZE;
// Our various input textures use different coords:
const float2 video_uv = tex_uv * texture_size/video_size;
OUT.video_uv = video_uv;
OUT.scanline_tex_uv = video_uv * MASKED_SCANLINES_video_size /
MASKED_SCANLINES_texture_size;
OUT.blur3x3_tex_uv = video_uv * BLOOM_APPROX_video_size / BLOOM_APPROX_texture_size;
// Calculate a runtime bloom_sigma in case it's needed:
const float mask_tile_size_x = get_resized_mask_tile_size(
output_size, output_size * mask_resize_viewport_scale, false).x;
OUT.bloom_sigma_runtime = get_min_sigma_to_blur_triad(
mask_tile_size_x / mask_triads_per_tile, bloom_diff_thresh);
}
/////////////////////////////// FRAGMENT SHADER //////////////////////////////
float4 PS_Brightpass(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p8 VAR) : SV_Target
{
// Sample the masked scanlines:
const float3 intensity_dim =
tex2D_linearize(MASKED_SCANLINES, VAR.scanline_tex_uv).rgb;
// Get the full intensity, including auto-undimming, and mask compensation:
const float auto_dim_factor = levels_autodim_temp;
const float undim_factor = 1.0/auto_dim_factor;
const float mask_amplify = get_mask_amplify();
const float3 intensity = intensity_dim * undim_factor * mask_amplify *
levels_contrast;
// Sample BLOOM_APPROX to estimate what a straight blur of masked scanlines
// would look like, so we can estimate how much energy we'll receive from
// blooming neighbors:
const float3 phosphor_blur_approx = levels_contrast * tex2D_linearize(
BLOOM_APPROX, VAR.blur3x3_tex_uv).rgb;
// Compute the blur weight for the center texel and the maximum energy we
// expect to receive from neighbors:
const float bloom_sigma = get_final_bloom_sigma(VAR.bloom_sigma_runtime);
const float center_weight = get_center_weight(bloom_sigma);
const float3 max_area_contribution_approx =
max(0.0.xxx, phosphor_blur_approx - center_weight * intensity);
// Assume neighbors will blur 100% of their intensity (blur_ratio = 1.0),
// because it actually gets better results (on top of being very simple),
// but adjust all intensities for the user's desired underestimate factor:
const float3 area_contrib_underestimate =
bloom_underestimate_levels * max_area_contribution_approx;
const float3 intensity_underestimate =
bloom_underestimate_levels * intensity;
// Calculate the blur_ratio, the ratio of intensity we want to blur:
#ifdef BRIGHTPASS_AREA_BASED
// This area-based version changes blur_ratio more smoothly and blurs
// more, clipping less but offering less phosphor differentiation:
const float3 phosphor_blur_underestimate = bloom_underestimate_levels *
phosphor_blur_approx;
const float3 soft_intensity = max(intensity_underestimate,
phosphor_blur_underestimate * mask_amplify);
const float3 blur_ratio_temp =
((1.0.xxx - area_contrib_underestimate) /
soft_intensity - 1.0.xxx) / (center_weight - 1.0);
#else
const float3 blur_ratio_temp =
((1.0.xxx - area_contrib_underestimate) /
intensity_underestimate - 1.0.xxx) / (center_weight - 1.0);
#endif
const float3 blur_ratio = clamp(blur_ratio_temp, 0.0, 1.0);
// Calculate the brightpass based on the auto-dimmed, unamplified, masked
// scanlines, encode if necessary, and return!
const float3 brightpass = intensity_dim *
lerp(blur_ratio, 1.0.xxx, bloom_excess);
return encode_output(float4(brightpass, 1.0));
}

View file

@ -1,109 +0,0 @@
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
///////////////////////////// SETTINGS MANAGEMENT ////////////////////////////
// PASS SETTINGS:
// gamma-management.h needs to know what kind of pipeline we're using and
// what pass this is in that pipeline. This will become obsolete if/when we
// can #define things like this in the .cgp preset file.
#define FIRST_PASS
#define SIMULATE_CRT_ON_LCD
////////////////////////////////// INCLUDES //////////////////////////////////
#include "../include/user-settings.fxh"
#include "../include/bind-shader-params.fxh"
#include "../include/gamma-management.fxh"
#include "../include/scanline-functions.fxh"
///////////////////////////////// STRUCTURES /////////////////////////////////
struct out_vertex
{
float2 tex_uv : TEXCOORD1;
float2 uv_step : TEXCOORD2;
float interlaced : TEXCOORD3;
};
//////////////////////////////// VERTEX SHADER ///////////////////////////////
// Vertex shader generating a triangle covering the entire screen
void VS_Linearize(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex OUT)
{
texcoord.x = (id == 2) ? 2.0 : 0.0;
texcoord.y = (id == 1) ? 2.0 : 0.0;
position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
OUT.tex_uv = texcoord;
// OUT.tex_uv = (floor(texcoord / NormalizedNativePixelSize)+float2(0.5,0.5)) * NormalizedNativePixelSize;
// Save the uv distance between texels:
OUT.uv_step = NormalizedNativePixelSize;
// Detect interlacing: 1.0 = true, 0.0 = false.
OUT.interlaced = is_interlaced(1.0/NormalizedNativePixelSize.y);
}
/////////////////////////////// FRAGMENT SHADER //////////////////////////////
sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=POINT;MinFilter=POINT;};
#define input_texture sBackBuffer
float4 PS_Linearize(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex VAR) : SV_Target
{
// Linearize the input based on CRT gamma and bob interlaced fields.
// Bobbing ensures we can immediately blur without getting artifacts.
// Note: TFF/BFF won't matter for sources that double-weave or similar.
// VAR.tex_uv = (floor(VAR.tex_uv / NormalizedNativePixelSize)+float2(0.5,0.5)) * NormalizedNativePixelSize;
if(interlace_detect)
{
// Sample the current line and an average of the previous/next line;
// tex2D_linearize will decode CRT gamma. Don't bother branching:
const float2 tex_uv = VAR.tex_uv;
const float2 v_step = float2(0.0, VAR.uv_step.y);
const float3 curr_line = tex2D_linearize_first(
input_texture, tex_uv).rgb;
const float3 last_line = tex2D_linearize_first(
input_texture, tex_uv - v_step).rgb;
const float3 next_line = tex2D_linearize_first(
input_texture, tex_uv + v_step).rgb;
const float3 interpolated_line = 0.5 * (last_line + next_line);
// If we're interlacing, determine which field curr_line is in:
const float modulus = VAR.interlaced + 1.0;
const float field_offset =
fmod(FrameCount + float(interlace_bff), modulus);
const float curr_line_texel = tex_uv.y / NormalizedNativePixelSize.y;
// Use under_half to fix a rounding bug around exact texel locations.
const float line_num_last = floor(curr_line_texel - under_half);
const float wrong_field = fmod(line_num_last + field_offset, modulus);
// Select the correct color, and output the result:
const float3 color = lerp(curr_line, interpolated_line, wrong_field);
return encode_output(float4(color, 1.0));
}
else
{
return encode_output(tex2D_linearize_first(input_texture, VAR.tex_uv));
}
}

View file

@ -1,130 +0,0 @@
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
///////////////////////////// SETTINGS MANAGEMENT ////////////////////////////
#include "../include/user-settings.fxh"
#include "../include/derived-settings-and-constants.fxh"
#include "../include/bind-shader-params.fxh"
////////////////////////////////// INCLUDES //////////////////////////////////
#include "../include/phosphor-mask-resizing.fxh"
///////////////////////////////// STRUCTURES /////////////////////////////////
struct out_vertex_p6
{
float2 src_tex_uv_wrap : TEXCOORD1;
float2 tile_uv_wrap : TEXCOORD2;
float2 resize_magnification_scale : TEXCOORD3;
float2 src_dxdy : TEXCOORD4;
float2 tile_size_uv : TEXCOORD5;
float2 input_tiles_per_texture : TEXCOORD6;
};
//////////////////////////////// VERTEX SHADER ///////////////////////////////
// Vertex shader generating a triangle covering the entire screen
void VS_Mask_Resize_Horizontal(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p6 OUT)
{
texcoord.x = (id == 2) ? 2.0 : 0.0;
texcoord.y = (id == 1) ? 2.0 : 0.0;
position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
float2 tex_uv = texcoord;
float2 texture_size = MASK_RESIZE_texture_size;
float2 output_size = 0.0625*(VIEWPORT_SIZE);
// First estimate the viewport size (the user will get the wrong number of
// triads if it's wrong and mask_specify_num_triads is 1.0/true).
const float2 estimated_viewport_size =
output_size / mask_resize_viewport_scale;
// Find the final size of our resized phosphor mask tiles. We probably
// estimated the viewport size and MASK_RESIZE output size differently last
// pass, so do not swear they were the same. ;)
const float2 mask_resize_tile_size = get_resized_mask_tile_size(
estimated_viewport_size, output_size, false);
// We'll render resized tiles until filling the output FBO or meeting a
// limit, so compute [wrapped] tile uv coords based on the output uv coords
// and the number of tiles that will fit in the FBO.
const float2 output_tiles_this_pass = output_size / mask_resize_tile_size;
const float2 output_video_uv = tex_uv * texture_size / video_size;
const float2 tile_uv_wrap = output_video_uv * output_tiles_this_pass;
// Get the texel size of an input tile and related values:
const float2 input_tile_size = float2(min(
mask_resize_src_lut_size.x, video_size.x), mask_resize_tile_size.y);
const float2 tile_size_uv = input_tile_size / texture_size;
const float2 input_tiles_per_texture = texture_size / input_tile_size;
// Derive [wrapped] texture uv coords from [wrapped] tile uv coords and
// the tile size in uv coords, and save frac() for the fragment shader.
const float2 src_tex_uv_wrap = tile_uv_wrap * tile_size_uv;
// Output the values we need, including the magnification scale and step:
OUT.tile_uv_wrap = tile_uv_wrap;
OUT.src_tex_uv_wrap = src_tex_uv_wrap;
OUT.resize_magnification_scale = mask_resize_tile_size / input_tile_size;
OUT.src_dxdy = float2(1.0/texture_size.x, 0.0);
OUT.tile_size_uv = tile_size_uv;
OUT.input_tiles_per_texture = input_tiles_per_texture;
}
/////////////////////////////// FRAGMENT SHADER //////////////////////////////
float4 PS_Mask_Resize_Horizontal(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p6 VAR) : SV_Target
{
// The input contains one mask tile horizontally and a number vertically.
// Resize the tile horizontally to its final screen size and repeat it
// until drawing at least mask_resize_num_tiles, leaving it unchanged
// vertically. Lanczos-resizing the phosphor mask achieves much sharper
// results than mipmapping, outputting >= mask_resize_num_tiles makes for
// easier tiled sampling later.
#ifdef PHOSPHOR_MASK_MANUALLY_RESIZE
// Discard unneeded fragments in case our profile allows real branches.
float2 texture_size = MASK_RESIZE_texture_size;
const float2 tile_uv_wrap = VAR.tile_uv_wrap;
if(get_mask_sample_mode() < 0.5 &&
max(tile_uv_wrap.x, tile_uv_wrap.y) <= mask_resize_num_tiles)
{
const float src_dx = VAR.src_dxdy.x;
const float2 src_tex_uv = frac(VAR.src_tex_uv_wrap);
const float3 pixel_color = downsample_horizontal_sinc_tiled(MASK_RESIZE_VERTICAL,
src_tex_uv, texture_size, VAR.src_dxdy.x,
VAR.resize_magnification_scale.x, VAR.tile_size_uv.x);
// The input LUT was linear RGB, and so is our output:
return float4(pixel_color, 1.0);
}
else
{
discard;
}
#else
discard;
return 1.0.xxxx;
#endif
}

View file

@ -1,164 +0,0 @@
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
///////////////////////////// SETTINGS MANAGEMENT ////////////////////////////
#include "../include/user-settings.fxh"
#include "../include/derived-settings-and-constants.fxh"
#include "../include/bind-shader-params.fxh"
////////////////////////////////// INCLUDES //////////////////////////////////
#include "../include/phosphor-mask-resizing.fxh"
///////////////////////////////// STRUCTURES /////////////////////////////////
struct out_vertex_p5
{
float2 src_tex_uv_wrap : TEXCOORD1;
float2 resize_magnification_scale : TEXCOORD2;
};
//////////////////////////////// VERTEX SHADER ///////////////////////////////
// Vertex shader generating a triangle covering the entire screen
void VS_Mask_Resize_Vertical(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p5 OUT)
{
texcoord.x = (id == 2) ? 2.0 : 0.0;
texcoord.y = (id == 1) ? 2.0 : 0.0;
position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
float2 tex_uv = texcoord;
float2 texture_size = MASK_RESIZE_VERT_texture_size;
float2 output_size = float2(64.0, 0.0625*((VIEWPORT_SIZE).y));
// First estimate the viewport size (the user will get the wrong number of
// triads if it's wrong and mask_specify_num_triads is 1.0/true).
const float viewport_y = output_size.y / mask_resize_viewport_scale.y;
// Now get aspect_ratio from texture_size.
// const float aspect_ratio = geom_aspect_ratio_x / geom_aspect_ratio_y;
const float aspect_ratio = texture_size.x / texture_size.y;
const float2 estimated_viewport_size =
float2(viewport_y * aspect_ratio, viewport_y);
// Estimate the output size of MASK_RESIZE (the next pass). The estimated
// x component shouldn't matter, because we're not using the x result, and
// we're not swearing it's correct (if we did, the x result would influence
// the y result to maintain the tile aspect ratio).
const float2 estimated_mask_resize_output_size =
float2(output_size.y * aspect_ratio, output_size.y);
// Find the final intended [y] size of our resized phosphor mask tiles,
// then the tile size for the current pass (resize y only):
const float2 mask_resize_tile_size = get_resized_mask_tile_size(
estimated_viewport_size, estimated_mask_resize_output_size, false);
const float2 pass_output_tile_size = float2(min(
mask_resize_src_lut_size.x, output_size.x), mask_resize_tile_size.y);
// We'll render resized tiles until filling the output FBO or meeting a
// limit, so compute [wrapped] tile uv coords based on the output uv coords
// and the number of tiles that will fit in the FBO.
const float2 output_tiles_this_pass = output_size / pass_output_tile_size;
const float2 output_video_uv = tex_uv * texture_size / video_size;
const float2 tile_uv_wrap = output_video_uv * output_tiles_this_pass;
// The input LUT is just a single mask tile, so texture uv coords are the
// same as tile uv coords (save frac() for the fragment shader). The
// magnification scale is also straightforward:
OUT.src_tex_uv_wrap = tile_uv_wrap;
OUT.resize_magnification_scale =
pass_output_tile_size / mask_resize_src_lut_size;
}
/////////////////////////////// FRAGMENT SHADER //////////////////////////////
float4 PS_Mask_Resize_Vertical(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p5 VAR) : SV_Target
{
// Resize the input phosphor mask tile to the final vertical size it will
// appear on screen. Keep 1x horizontal size if possible (IN.output_size
// >= mask_resize_src_lut_size), and otherwise linearly sample horizontally
// to fit exactly one tile. Lanczos-resizing the phosphor mask achieves
// much sharper results than mipmapping, and vertically resizing first
// minimizes the total number of taps required. We output a number of
// resized tiles >= mask_resize_num_tiles for easier tiled sampling later.
#ifdef PHOSPHOR_MASK_MANUALLY_RESIZE
// Discard unneeded fragments in case our profile allows real branches.
const float2 tile_uv_wrap = VAR.src_tex_uv_wrap;
if(get_mask_sample_mode() < 0.5 &&
tile_uv_wrap.y <= mask_resize_num_tiles)
{
static const float src_dy = 1.0/mask_resize_src_lut_size.y;
const float2 src_tex_uv = frac(VAR.src_tex_uv_wrap);
float3 pixel_color;
// If mask_type is static, this branch will be resolved statically.
#ifdef PHOSPHOR_MASK_RESIZE_MIPMAPPED_LUT
if(mask_type < 0.5)
{
pixel_color = downsample_vertical_sinc_tiled(
mask_grille_texture_large, src_tex_uv, mask_resize_src_lut_size,
src_dy, VAR.resize_magnification_scale.y, 1.0);
}
else if(mask_type < 1.5)
{
pixel_color = downsample_vertical_sinc_tiled(
mask_slot_texture_large, src_tex_uv, mask_resize_src_lut_size,
src_dy, VAR.resize_magnification_scale.y, 1.0);
}
else
{
pixel_color = downsample_vertical_sinc_tiled(
mask_shadow_texture_large, src_tex_uv, mask_resize_src_lut_size,
src_dy, VAR.resize_magnification_scale.y, 1.0);
}
#else
if(mask_type < 0.5)
{
pixel_color = downsample_vertical_sinc_tiled(
mask_grille_texture_small, src_tex_uv, mask_resize_src_lut_size,
src_dy, VAR.resize_magnification_scale.y, 1.0);
}
else if(mask_type < 1.5)
{
pixel_color = downsample_vertical_sinc_tiled(
mask_slot_texture_small, src_tex_uv, mask_resize_src_lut_size,
src_dy, VAR.resize_magnification_scale.y, 1.0);
}
else
{
pixel_color = downsample_vertical_sinc_tiled(
mask_shadow_texture_small, src_tex_uv, mask_resize_src_lut_size,
src_dy, VAR.resize_magnification_scale.y, 1.0);
}
#endif
// The input LUT was linear RGB, and so is our output:
return float4(pixel_color, 1.0);
}
else
{
discard;
}
#else
discard;
return 1.0.xxxx;
#endif
}

View file

@ -1,283 +0,0 @@
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
///////////////////////////// SETTINGS MANAGEMENT ////////////////////////////
#include "../include/user-settings.fxh"
#include "../include/derived-settings-and-constants.fxh"
#include "../include/bind-shader-params.fxh"
////////////////////////////////// INCLUDES //////////////////////////////////
#include "../include/scanline-functions.fxh"
#include "../include/phosphor-mask-resizing.fxh"
#include "../include/bloom-functions.fxh"
#include "../include/gamma-management.fxh"
/////////////////////////////////// HELPERS //////////////////////////////////
float4 tex2Dtiled_mask_linearize(const sampler2D tex,
const float2 tex_uv)
{
// If we're manually tiling a texture, anisotropic filtering can get
// confused. One workaround is to just select the lowest mip level:
#ifdef PHOSPHOR_MASK_MANUALLY_RESIZE
#ifdef ANISOTROPIC_TILING_COMPAT_TEX2DLOD
// TODO: Use tex2Dlod_linearize with a calculated mip level.
return tex2Dlod_linearize(tex, float4(tex_uv, 0.0, 0.0));
#else
#ifdef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS
return tex2Dbias_linearize(tex, float4(tex_uv, 0.0, -16.0));
#else
return tex2D_linearize(tex, tex_uv);
#endif
#endif
#else
return tex2D_linearize(tex, tex_uv);
#endif
}
///////////////////////////////// STRUCTURES /////////////////////////////////
struct out_vertex_p7
{
// Use explicit semantics so COLORx doesn't clamp values outside [0, 1].
float2 video_uv : TEXCOORD1;
float2 scanline_tex_uv : TEXCOORD2;
float2 blur3x3_tex_uv : TEXCOORD3;
float2 halation_tex_uv : TEXCOORD4;
float2 scanline_texture_size_inv : TEXCOORD5;
float4 mask_tile_start_uv_and_size : TEXCOORD6;
float2 mask_tiles_per_screen : TEXCOORD7;
};
//////////////////////////////// VERTEX SHADER ///////////////////////////////
// Vertex shader generating a triangle covering the entire screen
void VS_Scanlines_Horizontal_Apply_Mask(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p7 OUT)
{
texcoord.x = (id == 2) ? 2.0 : 0.0;
texcoord.y = (id == 1) ? 2.0 : 0.0;
position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
float2 tex_uv = texcoord;
float2 texture_size = MASKED_SCANLINES_texture_size;
float2 output_size = VIEWPORT_SIZE;
// Our various input textures use different coords.
const float2 video_uv = tex_uv * texture_size/video_size;
const float2 scanline_texture_size_inv =
1.0.xx/VERTICAL_SCANLINES_texture_size;
OUT.video_uv = video_uv;
OUT.scanline_tex_uv = video_uv * VERTICAL_SCANLINES_video_size *
scanline_texture_size_inv;
OUT.blur3x3_tex_uv = video_uv * BLOOM_APPROX_video_size /
BLOOM_APPROX_texture_size;
OUT.halation_tex_uv = video_uv * HALATION_BLUR_video_size /
HALATION_BLUR_texture_size;
OUT.scanline_texture_size_inv = scanline_texture_size_inv;
// Get a consistent name for the final mask texture size. Sample mode 0
// uses the manually resized mask, but ignore it if we never resized.
#ifdef PHOSPHOR_MASK_MANUALLY_RESIZE
const float mask_sample_mode = get_mask_sample_mode();
const float2 mask_resize_texture_size = mask_sample_mode < 0.5 ?
MASKED_SCANLINES_texture_size : mask_texture_large_size;
const float2 mask_resize_video_size = mask_sample_mode < 0.5 ?
MASKED_SCANLINES_video_size : mask_texture_large_size;
#else
const float2 mask_resize_texture_size = mask_texture_large_size;
const float2 mask_resize_video_size = mask_texture_large_size;
#endif
// Compute mask tile dimensions, starting points, etc.:
float2 mask_tiles_per_screen;
OUT.mask_tile_start_uv_and_size = get_mask_sampling_parameters(
mask_resize_texture_size, mask_resize_video_size, output_size,
mask_tiles_per_screen);
OUT.mask_tiles_per_screen = mask_tiles_per_screen;
}
/////////////////////////////// FRAGMENT SHADER //////////////////////////////
float4 PS_Scanlines_Horizontal_Apply_Mask(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p7 VAR) : SV_Target
{
// This pass: Sample (misconverged?) scanlines to the final horizontal
// resolution, apply halation (bouncing electrons), and apply the phosphor
// mask. Fake a bloom if requested. Unless we fake a bloom, the output
// will be dim from the scanline auto-dim, mask dimming, and low gamma.
// Horizontally sample the current row (a vertically interpolated scanline)
// and account for horizontal convergence offsets, given in units of texels.
// float2 VERTICAL_SCANLINES_texture_size = float2(1.0/NormalizedNativePixelSize.x, ViewportSize.y*BufferToViewportRatio.y);
float2 output_size = VIEWPORT_SIZE;
const float3 scanline_color_dim = sample_rgb_scanline_horizontal(
VERTICAL_SCANLINES, VAR.scanline_tex_uv,
VERTICAL_SCANLINES_texture_size, VAR.scanline_texture_size_inv);
const float auto_dim_factor = levels_autodim_temp;
// Sample the phosphor mask:
const float2 tile_uv_wrap = VAR.video_uv * VAR.mask_tiles_per_screen;
const float2 mask_tex_uv = convert_phosphor_tile_uv_wrap_to_tex_uv(
tile_uv_wrap, VAR.mask_tile_start_uv_and_size);
float3 phosphor_mask_sample;
#ifdef PHOSPHOR_MASK_MANUALLY_RESIZE
const bool sample_orig_luts = get_mask_sample_mode() > 0.5;
#else
static const bool sample_orig_luts = true;
#endif
if(sample_orig_luts)
{
// If mask_type is static, this branch will be resolved statically.
if(mask_type < 0.5)
{
phosphor_mask_sample = tex2D_linearize(
mask_grille_texture_large, mask_tex_uv).rgb;
}
else if(mask_type < 1.5)
{
phosphor_mask_sample = tex2D_linearize(
mask_slot_texture_large, mask_tex_uv).rgb;
}
else
{
phosphor_mask_sample = tex2D_linearize(
mask_shadow_texture_large, mask_tex_uv).rgb;
}
}
else
{
// Sample the resized mask, and avoid tiling artifacts:
phosphor_mask_sample = tex2Dtiled_mask_linearize(
MASK_RESIZE, mask_tex_uv).rgb;
}
// Sample the halation texture (auto-dim to match the scanlines), and
// account for both horizontal and vertical convergence offsets, given
// in units of texels horizontally and same-field scanlines vertically:
const float3 halation_color = tex2D_linearize(
HALATION_BLUR, VAR.halation_tex_uv).rgb;
// Apply halation: Halation models electrons flying around under the glass
// and hitting the wrong phosphors (of any color). It desaturates, so
// average the halation electrons to a scalar. Reduce the local scanline
// intensity accordingly to conserve energy.
const float3 halation_intensity_dim =
dot(halation_color, auto_dim_factor.xxx/3.0).xxx;
const float3 electron_intensity_dim = lerp(scanline_color_dim,
halation_intensity_dim, halation_weight);
// Apply the phosphor mask:
const float3 phosphor_emission_dim = electron_intensity_dim *
phosphor_mask_sample;
#ifdef PHOSPHOR_BLOOM_FAKE
// The BLOOM_APPROX pass approximates a blurred version of a masked
// and scanlined image. It's usually used to compute the brightpass,
// but we can also use it to fake the bloom stage entirely. Caveats:
// 1.) A fake bloom is conceptually different, since we're mixing in a
// fully blurred low-res image, and the biggest implication are:
// 2.) If mask_amplify is incorrect, results deteriorate more quickly.
// 3.) The inaccurate blurring hurts quality in high-contrast areas.
// 4.) The bloom_underestimate_levels parameter seems less sensitive.
// Reverse the auto-dimming and amplify to compensate for mask dimming:
#define PHOSPHOR_BLOOM_FAKE_WITH_SIMPLE_BLEND
#ifdef PHOSPHOR_BLOOM_FAKE_WITH_SIMPLE_BLEND
static const float blur_contrast = 1.05;
#else
static const float blur_contrast = 1.0;
#endif
const float mask_amplify = get_mask_amplify();
const float undim_factor = 1.0/auto_dim_factor;
const float3 phosphor_emission =
phosphor_emission_dim * undim_factor * mask_amplify;
// Get a phosphor blur estimate, accounting for convergence offsets:
const float3 electron_intensity = electron_intensity_dim * undim_factor;
const float3 phosphor_blur_approx_soft = tex2D_linearize(
BLOOM_APPROX, VAR.blur3x3_tex_uv).rgb;
const float3 phosphor_blur_approx = lerp(phosphor_blur_approx_soft,
electron_intensity, 0.1) * blur_contrast;
// We could blend between phosphor_emission and phosphor_blur_approx,
// solving for the minimum blend_ratio that avoids clipping past 1.0:
// 1.0 >= total_intensity
// 1.0 >= phosphor_emission * (1.0 - blend_ratio) +
// phosphor_blur_approx * blend_ratio
// blend_ratio = (phosphor_emission - 1.0)/
// (phosphor_emission - phosphor_blur_approx);
// However, this blurs far more than necessary, because it aims for
// full brightness, not minimal blurring. To fix it, base blend_ratio
// on a max area intensity only so it varies more smoothly:
const float3 phosphor_blur_underestimate =
phosphor_blur_approx * bloom_underestimate_levels;
const float3 area_max_underestimate =
phosphor_blur_underestimate * mask_amplify;
#ifdef PHOSPHOR_BLOOM_FAKE_WITH_SIMPLE_BLEND
const float3 blend_ratio_temp =
(area_max_underestimate - 1.0.xxx) /
(area_max_underestimate - phosphor_blur_underestimate);
#else
// Try doing it like an area-based brightpass. This is nearly
// identical, but it's worth toying with the code in case I ever
// find a way to make it look more like a real bloom. (I've had
// some promising textures from combining an area-based blend ratio
// for the phosphor blur and a more brightpass-like blend-ratio for
// the phosphor emission, but I haven't found a way to make the
// brightness correct across the whole color range, especially with
// different bloom_underestimate_levels values.)
const float desired_triad_size = lerp(mask_triad_size_desired,
output_size.x/mask_num_triads_desired,
mask_specify_num_triads);
const float bloom_sigma = get_min_sigma_to_blur_triad(
desired_triad_size, bloom_diff_thresh);
const float center_weight = get_center_weight(bloom_sigma);
const float3 max_area_contribution_approx =
max(0.0.xxx, phosphor_blur_approx -
center_weight * phosphor_emission);
const float3 area_contrib_underestimate =
bloom_underestimate_levels * max_area_contribution_approx;
const float3 blend_ratio_temp =
((1.0.xxx - area_contrib_underestimate) /
area_max_underestimate - 1.0.xxx) / (center_weight - 1.0);
#endif
// Clamp blend_ratio in case it's out-of-range, but be SUPER careful:
// min/max/clamp are BIZARRELY broken with lerp (optimization bug?),
// and this redundant sequence avoids bugs, at least on nVidia cards:
const float3 blend_ratio_clamped = max(clamp(blend_ratio_temp, 0.0, 1.0), 0.0);
const float3 blend_ratio = lerp(blend_ratio_clamped, 1.0.xxx, bloom_excess);
// Blend the blurred and unblurred images:
const float3 phosphor_emission_unclipped =
lerp(phosphor_emission, phosphor_blur_approx, blend_ratio);
// Simulate refractive diffusion by reusing the halation sample.
const float3 pixel_color = lerp(phosphor_emission_unclipped,
halation_color, diffusion_weight);
#else
const float3 pixel_color = phosphor_emission_dim;
#endif
// Encode if necessary, and output.
return encode_output(float4(pixel_color, 1.0));
}

View file

@ -1,241 +0,0 @@
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
#undef FIRST_PASS
////////////////////////////////// INCLUDES //////////////////////////////////
//#include "../include/user-settings.fxh"
//#include "../include/derived-settings-and-constants.fxh"
#include "../include/bind-shader-params.fxh"
#include "../include/scanline-functions.fxh"
//#include "../include/gamma-management.fxh"
///////////////////////////////// STRUCTURES /////////////////////////////////
struct out_vertex_p1
{
// Use explicit semantics so COLORx doesn't clamp values outside [0, 1].
float2 tex_uv : TEXCOORD1;
float2 uv_step : TEXCOORD2; // uv size of a texel (x) and scanline (y)
float2 il_step_multiple : TEXCOORD3; // (1, 1) = progressive, (1, 2) = interlaced
float pixel_height_in_scanlines : TEXCOORD4; // Height of an output pixel in scanlines
};
//////////////////////////////// VERTEX SHADER ///////////////////////////////
// Vertex shader generating a triangle covering the entire screen
void VS_Scanlines_Vertical_Interlacing(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p1 OUT)
{
texcoord.x = (id == 2) ? 2.0 : 0.0;
texcoord.y = (id == 1) ? 2.0 : 0.0;
position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
OUT.tex_uv = texcoord;
float2 texture_size = VERTICAL_SCANLINES_texture_size;
float2 output_size = float2(TEXTURE_SIZE.x, VIEWPORT_SIZE.y);
// Detect interlacing: il_step_multiple indicates the step multiple between
// lines: 1 is for progressive sources, and 2 is for interlaced sources.
// const float2 video_size = 1.0/NormalizedNativePixelSize;
const float y_step = 1.0 + float(is_interlaced(video_size.y));
OUT.il_step_multiple = float2(1.0, y_step);
// Get the uv tex coords step between one texel (x) and scanline (y):
OUT.uv_step = OUT.il_step_multiple / texture_size;
// If shader parameters are used, {min, max}_{sigma, shape} are runtime
// values. Compute {sigma, shape}_range outside of scanline_contrib() so
// they aren't computed once per scanline (6 times per fragment and up to
// 18 times per vertex):
/* const float sigma_range = max(beam_max_sigma, beam_min_sigma) -
beam_min_sigma;
const float shape_range = max(beam_max_shape, beam_min_shape) -
beam_min_shape;
*/
// We need the pixel height in scanlines for antialiased/integral sampling:
const float ph = (video_size.y / output_size.y) /
OUT.il_step_multiple.y;
OUT.pixel_height_in_scanlines = ph;
}
/////////////////////////////// FRAGMENT SHADER //////////////////////////////
float4 PS_Scanlines_Vertical_Interlacing(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p1 VAR) : SV_Target
{
// This pass: Sample multiple (misconverged?) scanlines to the final
// vertical resolution. Temporarily auto-dim the output to avoid clipping.
// Read some attributes into local variables:
const float2 texture_size = VERTICAL_SCANLINES_texture_size;
const float2 texture_size_inv = 1.0/texture_size;
const float2 uv_step = VAR.uv_step;
const float2 il_step_multiple = VAR.il_step_multiple;
const float frame_count = FrameCount;
const float ph = VAR.pixel_height_in_scanlines;
// Get the uv coords of the previous scanline (in this field), and the
// scanline's distance from this sample, in scanlines.
float dist;
const float2 scanline_uv = get_last_scanline_uv(VAR.tex_uv, texture_size,
texture_size_inv, il_step_multiple, frame_count, dist);
// Consider 2, 3, 4, or 6 scanlines numbered 0-5: The previous and next
// scanlines are numbered 2 and 3. Get scanline colors colors (ignore
// horizontal sampling, since since IN.output_size.x = video_size.x).
// NOTE: Anisotropic filtering creates interlacing artifacts, which is why
// ORIG_LINEARIZED bobbed any interlaced input before this pass.
const float2 v_step = float2(0.0, uv_step.y);
const float3 scanline2_color = tex2D_linearize(ORIG_LINEARIZED, scanline_uv).rgb;
const float3 scanline3_color =
tex2D_linearize(ORIG_LINEARIZED, scanline_uv + v_step).rgb;
float3 scanline0_color, scanline1_color, scanline4_color, scanline5_color,
scanline_outside_color;
float dist_round;
// Use scanlines 0, 1, 4, and 5 for a total of 6 scanlines:
if(beam_num_scanlines > 5.5)
{
scanline1_color =
tex2D_linearize(ORIG_LINEARIZED, scanline_uv - v_step).rgb;
scanline4_color =
tex2D_linearize(ORIG_LINEARIZED, scanline_uv + 2.0 * v_step).rgb;
scanline0_color =
tex2D_linearize(ORIG_LINEARIZED, scanline_uv - 2.0 * v_step).rgb;
scanline5_color =
tex2D_linearize(ORIG_LINEARIZED, scanline_uv + 3.0 * v_step).rgb;
}
// Use scanlines 1, 4, and either 0 or 5 for a total of 5 scanlines:
else if(beam_num_scanlines > 4.5)
{
scanline1_color =
tex2D_linearize(ORIG_LINEARIZED, scanline_uv - v_step).rgb;
scanline4_color =
tex2D_linearize(ORIG_LINEARIZED, scanline_uv + 2.0 * v_step).rgb;
// dist is in [0, 1]
dist_round = round(dist);
const float2 sample_0_or_5_uv_off =
lerp(-2.0 * v_step, 3.0 * v_step, dist_round);
// Call this "scanline_outside_color" to cope with the conditional
// scanline number:
scanline_outside_color = tex2D_linearize(
ORIG_LINEARIZED, scanline_uv + sample_0_or_5_uv_off).rgb;
}
// Use scanlines 1 and 4 for a total of 4 scanlines:
else if(beam_num_scanlines > 3.5)
{
scanline1_color =
tex2D_linearize(ORIG_LINEARIZED, scanline_uv - v_step).rgb;
scanline4_color =
tex2D_linearize(ORIG_LINEARIZED, scanline_uv + 2.0 * v_step).rgb;
}
// Use scanline 1 or 4 for a total of 3 scanlines:
else if(beam_num_scanlines > 2.5)
{
// dist is in [0, 1]
dist_round = round(dist);
const float2 sample_1or4_uv_off =
lerp(-v_step, 2.0 * v_step, dist_round);
scanline_outside_color = tex2D_linearize(
ORIG_LINEARIZED, scanline_uv + sample_1or4_uv_off).rgb;
}
// Compute scanline contributions, accounting for vertical convergence.
// Vertical convergence offsets are in units of current-field scanlines.
// dist2 means "positive sample distance from scanline 2, in scanlines:"
float3 dist2 = dist.xxx;
if(beam_misconvergence)
{
const float3 convergence_offsets_vert_rgb =
get_convergence_offsets_y_vector();
dist2 = dist.xxx - convergence_offsets_vert_rgb;
}
// Calculate {sigma, shape}_range outside of scanline_contrib so it's only
// done once per pixel (not 6 times) with runtime params. Don't reuse the
// vertex shader calculations, so static versions can be constant-folded.
const float sigma_range = max(beam_max_sigma, beam_min_sigma) -
beam_min_sigma;
const float shape_range = max(beam_max_shape, beam_min_shape) -
beam_min_shape;
// Calculate and sum final scanline contributions, starting with lines 2/3.
// There is no normalization step, because we're not interpolating a
// continuous signal. Instead, each scanline is an additive light source.
const float3 scanline2_contrib = scanline_contrib(dist2,
scanline2_color, ph, sigma_range, shape_range);
const float3 scanline3_contrib = scanline_contrib(abs(1.0.xxx - dist2),
scanline3_color, ph, sigma_range, shape_range);
float3 scanline_intensity = scanline2_contrib + scanline3_contrib;
if(beam_num_scanlines > 5.5)
{
const float3 scanline0_contrib =
scanline_contrib(dist2 + 2.0.xxx, scanline0_color,
ph, sigma_range, shape_range);
const float3 scanline1_contrib =
scanline_contrib(dist2 + 1.0.xxx, scanline1_color,
ph, sigma_range, shape_range);
const float3 scanline4_contrib =
scanline_contrib(abs(2.0.xxx - dist2), scanline4_color,
ph, sigma_range, shape_range);
const float3 scanline5_contrib =
scanline_contrib(abs(3.0.xxx - dist2), scanline5_color,
ph, sigma_range, shape_range);
scanline_intensity += scanline0_contrib + scanline1_contrib +
scanline4_contrib + scanline5_contrib;
}
else if(beam_num_scanlines > 4.5)
{
const float3 scanline1_contrib =
scanline_contrib(dist2 + 1.0.xxx, scanline1_color,
ph, sigma_range, shape_range);
const float3 scanline4_contrib =
scanline_contrib(abs(2.0.xxx - dist2), scanline4_color,
ph, sigma_range, shape_range);
const float3 dist0or5 = lerp(
dist2 + 2.0.xxx, 3.0.xxx - dist2, dist_round);
const float3 scanline0or5_contrib = scanline_contrib(
dist0or5, scanline_outside_color, ph, sigma_range, shape_range);
scanline_intensity += scanline1_contrib + scanline4_contrib +
scanline0or5_contrib;
}
else if(beam_num_scanlines > 3.5)
{
const float3 scanline1_contrib =
scanline_contrib(dist2 + 1.0.xxx, scanline1_color,
ph, sigma_range, shape_range);
const float3 scanline4_contrib =
scanline_contrib(abs(2.0.xxx - dist2), scanline4_color,
ph, sigma_range, shape_range);
scanline_intensity += scanline1_contrib + scanline4_contrib;
}
else if(beam_num_scanlines > 2.5)
{
const float3 dist1or4 = lerp(
dist2 + 1.0.xxx, 2.0.xxx - dist2, dist_round);
const float3 scanline1or4_contrib = scanline_contrib(
dist1or4, scanline_outside_color, ph, sigma_range, shape_range);
scanline_intensity += scanline1or4_contrib;
}
// Auto-dim the image to avoid clipping, encode if necessary, and output.
// My original idea was to compute a minimal auto-dim factor and put it in
// the alpha channel, but it wasn't working, at least not reliably. This
// is faster anyway, levels_autodim_temp = 0.5 isn't causing banding.
return encode_output(float4(scanline_intensity * levels_autodim_temp, 1.0));
}

View file

@ -1,166 +0,0 @@
#include "ReShade.fxh"
/*
Bilateral - Smart
Copyright (C) 2024 guest(r)
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
uniform float FRANGE <
ui_type = "drag";
ui_min = 1.0;
ui_max = 10.0;
ui_step = 1.0;
ui_label = "Filter Range";
> = 5.0;
uniform float FBSMOOTH <
ui_type = "drag";
ui_min = 0.05;
ui_max = 1.0;
ui_step = 0.025;
ui_label = "Filter Base Smoothing";
> = 0.3;
uniform float FSIGMA <
ui_type = "drag";
ui_min = 0.15;
ui_max = 1.5;
ui_step = 0.05;
ui_label = "Filter Strength";
> = 1.0;
uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >;
uniform float2 BufferToViewportRatio < source = "buffer_to_viewport_ratio"; >;
uniform float2 ViewportSize < source = "viewportsize"; >;
sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=POINT;MinFilter=POINT;};
texture2D tBilateral_P0{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA8;};
sampler2D sBilateral_P0{Texture=tBilateral_P0;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=POINT;MinFilter=POINT;};
#define FSIGMA1 (1.0/FSIGMA)
#define COMPAT_TEXTURE(c,d) tex2D(c,d)
float wt(float3 A, float3 B)
{
return clamp(FBSMOOTH - 2.33*dot(abs(A-B),1.0.xxx)/(dot(A+B,1.0.xxx)+1.0), 0.0, 0.25);
}
float getw(float x, float3 c, float3 p)
{
float y = pow(max(1.0-x,0.0), FSIGMA1);
float d = wt(c,p);
return y*d;
}
float4 PS_Bilateral_X(float4 position: SV_Position, float2 vTexCoord : TEXCOORD) : SV_Target
{
float4 SourceSize = float4((ViewportSize*BufferToViewportRatio), 1.0/(ViewportSize*BufferToViewportRatio));
// float4 SourceSize = float4(1.0/NormalizedNativePixelSize, NormalizedNativePixelSize);
float2 pos = vTexCoord * SourceSize.xy;
float f = 0.5-frac(pos.x);
float2 tex = floor(pos)*SourceSize.zw + 0.5*SourceSize.zw;
float2 dx = float2(SourceSize.z, 0.0);
float w, fp;
float wsum = 0.0;
float3 pixel;
float FPR = FRANGE;
float FPR1 = 1.0/FPR;
float LOOPSIZE = FPR;
float x = -FPR;
float3 comp = COMPAT_TEXTURE(sBackBuffer, tex).rgb;
float3 color = 0.0.xxx;
do
{
pixel = COMPAT_TEXTURE(sBackBuffer, tex + x*dx).rgb;
fp = min(abs(x+f),FPR)*FPR1;
w = getw(fp,comp,pixel);
color = color + w * pixel;
wsum = wsum + w;
x = x + 1.0;
} while (x <= LOOPSIZE);
color = color / wsum;
return float4(color, 1.0);
}
float4 PS_Bilateral_Y(float4 position: SV_Position, float2 vTexCoord : TEXCOORD) : SV_Target
{
float4 SourceSize = float4((ViewportSize*BufferToViewportRatio), 1.0/(ViewportSize*BufferToViewportRatio));
float2 pos = vTexCoord * SourceSize.xy;
float f = 0.5-frac(pos.y);
float2 tex = floor(pos)*SourceSize.zw + 0.5*SourceSize.zw;
float2 dy = float2(0.0, SourceSize.w);
float w, fp;
float wsum = 0.0;
float3 pixel;
float FPR = FRANGE;
float FPR1 = 1.0/FPR;
float LOOPSIZE = FPR;
float y = -FPR;
float3 comp = COMPAT_TEXTURE(sBilateral_P0, tex).rgb;
float3 color = 0.0.xxx;
do
{
pixel = COMPAT_TEXTURE(sBilateral_P0, tex + y*dy).rgb;
fp = min(abs(y+f),FPR)*FPR1;
w = getw(fp,comp,pixel);
color = color + w * pixel;
wsum = wsum + w;
y = y + 1.0;
} while (y <= LOOPSIZE);
color = color / wsum;
return float4(color, 1.0);
}
technique Bilateral
{
pass
{
VertexShader = PostProcessVS;
PixelShader = PS_Bilateral_X;
RenderTarget = tBilateral_P0;
}
pass
{
VertexShader = PostProcessVS;
PixelShader = PS_Bilateral_Y;
}
}

View file

@ -1,146 +0,0 @@
#include "ReShade.fxh"
/*
Lanczos3 - Multipass code by Hyllian 2022.
*/
/*
Copyright (C) 2010 Team XBMC
http://www.xbmc.org
Copyright (C) 2011 Stefanos A.
http://www.opentk.com
This Program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This Program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with XBMC; see the file COPYING. If not, write to
the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
http://www.gnu.org/copyleft/gpl.html
*/
uniform float L3_PRESCALE <
ui_type = "drag";
ui_min = 1.0;
ui_max = 8.0;
ui_step = 1.0;
ui_label = "Prescale factor";
> = 1.0;
uniform bool LANCZOS3_ANTI_RINGING <
ui_type = "radio";
ui_label = "Lanczos3 Anti-Ringing";
> = true;
uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >;
uniform float BufferWidth < source = "bufferwidth"; >;
texture2D tLanczos3_P0{Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA8;};
sampler2D sLanczos3_P0{Texture=tLanczos3_P0;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=POINT;MinFilter=POINT;};
#define AR_STRENGTH 1.0
#define FIX(c) (max(abs(c),1e-5))
#define PI 3.1415926535897932384626433832795
#define radius 3.0
float3 weight3(float x)
{
float3 Sampling = FIX(2.0 * PI * float3(x - 1.5, x - 0.5, x + 0.5));
// Lanczos3. Note: we normalize outside this function, so no point in multiplying by radius.
return sin(Sampling) * sin(Sampling / radius) / (Sampling * Sampling);
}
float3 lanczos3ar(float fp, float3 C0, float3 C1, float3 C2, float3 C3, float3 C4, float3 C5)
{
float3 w1 = weight3(0.5 - fp * 0.5);
float3 w2 = weight3(1.0 - fp * 0.5);
float sum = dot(w1, 1.0.xxx) + dot(w2, 1.0.xxx);
w1 /= sum;
w2 /= sum;
float3 color = mul(w1, float3x3( C0, C2, C4 )) + mul(w2, float3x3( C1, C3, C5));
// Anti-ringing
if (LANCZOS3_ANTI_RINGING == true)
{
float3 aux = color;
float3 min_sample = min(min(C1, C2), min(C3, C4));
float3 max_sample = max(max(C1, C2), max(C3, C4));
color = clamp(color, min_sample, max_sample);
color = lerp(aux, color, AR_STRENGTH*step(0.0, (C1-C2)*(C3-C4)));
}
return color;
}
float4 PS_Lanczos3_X(float4 vpos: SV_Position, float2 uv_tx : TEXCOORD) : SV_Target
{
// Both dimensions are unfiltered, so it looks for lores pixels.
float2 ps = NormalizedNativePixelSize/L3_PRESCALE;
float2 pos = uv_tx.xy/ps - float2(0.5, 0.0);
float2 tc = (floor(pos) + 0.5.xx) * ps;
float2 fp = frac(pos);
float3 C0 = tex2D(ReShade::BackBuffer, tc + ps*float2(-2.0, 0.0)).rgb;
float3 C1 = tex2D(ReShade::BackBuffer, tc + ps*float2(-1.0, 0.0)).rgb;
float3 C2 = tex2D(ReShade::BackBuffer, tc + ps*float2( 0.0, 0.0)).rgb;
float3 C3 = tex2D(ReShade::BackBuffer, tc + ps*float2( 1.0, 0.0)).rgb;
float3 C4 = tex2D(ReShade::BackBuffer, tc + ps*float2( 2.0, 0.0)).rgb;
float3 C5 = tex2D(ReShade::BackBuffer, tc + ps*float2( 3.0, 0.0)).rgb;
float3 color = lanczos3ar(fp.x, C0, C1, C2, C3, C4, C5);
return float4(color, 1.0);
}
float4 PS_Lanczos3_Y(float4 vpos: SV_Position, float2 uv_tx : TEXCOORD) : SV_Target
{
// One must be careful here. Horizontal dimension is already filtered, so it looks for x in hires.
float2 ps = float2(1.0/BufferWidth, NormalizedNativePixelSize.y/L3_PRESCALE);
float2 pos = uv_tx.xy/ps - float2(0.0, 0.5);
float2 tc = (floor(pos) + 0.5.xx) * ps;
float2 fp = frac(pos);
float3 C0 = tex2D(sLanczos3_P0, tc + ps*float2(0.0, -2.0)).rgb;
float3 C1 = tex2D(sLanczos3_P0, tc + ps*float2(0.0, -1.0)).rgb;
float3 C2 = tex2D(sLanczos3_P0, tc + ps*float2(0.0, 0.0)).rgb;
float3 C3 = tex2D(sLanczos3_P0, tc + ps*float2(0.0, 1.0)).rgb;
float3 C4 = tex2D(sLanczos3_P0, tc + ps*float2(0.0, 2.0)).rgb;
float3 C5 = tex2D(sLanczos3_P0, tc + ps*float2(0.0, 3.0)).rgb;
float3 color = lanczos3ar(fp.y, C0, C1, C2, C3, C4, C5);
return float4(color, 1.0);
}
technique Lanczos3
{
pass
{
VertexShader = PostProcessVS;
PixelShader = PS_Lanczos3_X;
RenderTarget = tLanczos3_P0;
}
pass
{
VertexShader = PostProcessVS;
PixelShader = PS_Lanczos3_Y;
}
}

View file

@ -1,151 +0,0 @@
#include "ReShade.fxh"
/*
Deblur-Luma Shader
Copyright (C) 2005 - 2024 guest(r) - guest.r@gmail.com
Luma adaptation by Hyllian
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
uniform float OFFSET <
ui_type = "drag";
ui_min = 0.25;
ui_max = 4.0;
ui_step = 0.25;
ui_label = "Deblur offset";
> = 2.0;
uniform float DEBLUR <
ui_type = "drag";
ui_min = 1.0;
ui_max = 7.0;
ui_step = 0.25;
ui_label = "Deblur str.";
> = 1.75;
uniform float SMART <
ui_type = "drag";
ui_min = 0.0;
ui_max = 1.0;
ui_step = 0.05;
ui_label = "Smart deblur";
> = 1.0;
uniform float2 ViewportSize < source = "viewportsize"; >;
static const float3 luma = float3(0.299,0.587,0.114);
static const float4 res = float4(0.0001, 0.0001, 0.0001, 0.0001);
static const float4 uno = float4(1.,1.,1.,1.);
float min8(float4 a4, float4 b4)
{
float4 ab4 = min(a4, b4); float2 ab2 = min(ab4.xy, ab4.zw); return min(ab2.x, ab2.y);
}
float max8(float4 a4, float4 b4)
{
float4 ab4 = max(a4, b4); float2 ab2 = max(ab4.xy, ab4.zw); return max(ab2.x, ab2.y);
}
struct ST_VertexOut
{
float4 t1 : TEXCOORD1;
float4 t2 : TEXCOORD2;
float4 t3 : TEXCOORD3;
};
// Vertex shader generating a triangle covering the entire screen
void VS_Deblur_Luma(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out ST_VertexOut vVARS)
{
texcoord.x = (id == 2) ? 2.0 : 0.0;
texcoord.y = (id == 1) ? 2.0 : 0.0;
position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
float dx = OFFSET/ViewportSize.x;
float dy = OFFSET/ViewportSize.y;
vVARS.t1 = texcoord.xxxy + float4( -dx, 0.0, dx, -dy); // c00 c10 c20
vVARS.t2 = texcoord.xxxy + float4( -dx, 0.0, dx, 0.0); // c01 c11 c21
vVARS.t3 = texcoord.xxxy + float4( -dx, 0.0, dx, dy); // c02 c12 c22
}
float4 PS_Deblur_Luma(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in ST_VertexOut vVARS) : SV_Target
{
float3 c11 = tex2D(ReShade::BackBuffer, vVARS.t2.yw).xyz;
float3 c00 = tex2D(ReShade::BackBuffer, vVARS.t1.xw).xyz;
float3 c20 = tex2D(ReShade::BackBuffer, vVARS.t1.zw).xyz;
float3 c22 = tex2D(ReShade::BackBuffer, vVARS.t3.zw).xyz;
float3 c02 = tex2D(ReShade::BackBuffer, vVARS.t3.xw).xyz;
float3 c10 = tex2D(ReShade::BackBuffer, vVARS.t1.yw).xyz;
float3 c21 = tex2D(ReShade::BackBuffer, vVARS.t2.zw).xyz;
float3 c12 = tex2D(ReShade::BackBuffer, vVARS.t3.yw).xyz;
float3 c01 = tex2D(ReShade::BackBuffer, vVARS.t2.xw).xyz;
float4x3 chv = float4x3(c10, c01, c21, c12);
float4x3 cdi = float4x3(c00, c02, c20, c22);
float4 CHV = mul(chv, luma);
float4 CDI = mul(cdi, luma);
float C11 = dot(c11, luma);
float mn1 = min8(CHV, CDI);
float mx1 = max8(CHV, CDI);
float2 mnmx = float2(min(C11, mn1), max(C11, mx1));
float2 dif = abs(float2(C11, C11) - mnmx) + res.xy;
dif = pow(dif, float2(DEBLUR, DEBLUR));
float D11 = dot(dif, mnmx.yx)/(dif.x + dif.y);
float k11 = 1.0/(abs(C11 - D11) + res.x);
float4 khv = float4(1.0/(abs(CHV-float4(D11, D11, D11, D11)) + res));
float4 kdi = float4(1.0/(abs(CDI-float4(D11, D11, D11, D11)) + res));
float avg = (dot(khv + kdi, uno) + k11)/10.0;
khv = max(khv-float4(avg, avg, avg, avg), float4(0.0, 0.0, 0.0, 0.0));
kdi = max(kdi-float4(avg, avg, avg, avg), float4(0.0, 0.0, 0.0, 0.0));
k11 = max(k11-avg, 0.0);
float3 d11 = (mul(khv, chv) + mul(kdi, cdi) + (k11 + res.x)*c11) / (dot(khv + kdi, uno) + k11 + res.x);
float contrast = mnmx.y - mnmx.x;
c11 = lerp(c11, d11, clamp(1.75*contrast-0.125, 0.0, 1.0));
c11 = lerp(d11, c11, SMART);
return float4(c11, 1.0);
}
technique Deblur_Luma
{
pass
{
VertexShader = VS_Deblur_Luma;
PixelShader = PS_Deblur_Luma;
}
}

View file

@ -1,325 +0,0 @@
#include "ReShade.fxh"
/*
Geom Shader - a modified CRT-Geom without CRT features made to be appended/integrated
into any other shaders and provide curvature/warping/oversampling features.
Adapted by Hyllian (2024).
*/
/*
CRT-interlaced
Copyright (C) 2010-2012 cgwg, Themaister and DOLLS
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your option)
any later version.
(cgwg gave their consent to have the original version of this shader
distributed under the GPL in this message:
http://board.byuu.org/viewtopic.php?p=26075#p26075
"Feel free to distribute my shaders under the GPL. After all, the
barrel distortion code was taken from the Curvature shader, which is
under the GPL."
)
This shader variant is pre-configured with screen curvature
*/
uniform bool geom_curvature <
ui_type = "radio";
ui_label = "Geom Curvature Toggle";
> = 1.0;
uniform float geom_R <
ui_type = "drag";
ui_min = 0.1;
ui_max = 10.0;
ui_step = 0.1;
ui_label = "Geom Curvature Radius";
> = 2.0;
uniform float geom_d <
ui_type = "drag";
ui_min = 0.1;
ui_max = 3.0;
ui_step = 0.1;
ui_label = "Geom Distance";
> = 1.5;
uniform bool geom_invert_aspect <
ui_type = "radio";
ui_label = "Geom Curvature Aspect Inversion";
> = 0.0;
uniform float geom_cornersize <
ui_type = "drag";
ui_min = 0.001;
ui_max = 1.0;
ui_step = 0.005;
ui_label = "Geom Corner Size";
> = 0.03;
uniform float geom_cornersmooth <
ui_type = "drag";
ui_min = 80.0;
ui_max = 2000.0;
ui_step = 100.0;
ui_label = "Geom Corner Smoothness";
> = 1000.0;
uniform float geom_x_tilt <
ui_type = "drag";
ui_min = -1.0;
ui_max = 1.0;
ui_step = 0.05;
ui_label = "Geom Horizontal Tilt";
> = 0.0;
uniform float geom_y_tilt <
ui_type = "drag";
ui_min = -1.0;
ui_max = 1.0;
ui_step = 0.05;
ui_label = "Geom Vertical Tilt";
> = 0.0;
uniform float geom_overscan_x <
ui_type = "drag";
ui_min = -125.0;
ui_max = 125.0;
ui_step = 0.5;
ui_label = "Geom Horiz. Overscan %";
> = 100.0;
uniform float geom_overscan_y <
ui_type = "drag";
ui_min = -125.0;
ui_max = 125.0;
ui_step = 0.5;
ui_label = "Geom Vert. Overscan %";
> = 100.0;
uniform float centerx <
ui_type = "drag";
ui_min = -100.0;
ui_max = 100.0;
ui_step = 0.1;
ui_label = "Image Center X";
> = 0.00;
uniform float centery <
ui_type = "drag";
ui_min = -100.0;
ui_max = 100.0;
ui_step = 0.1;
ui_label = "Image Center Y";
> = 0.00;
uniform float geom_lum <
ui_type = "drag";
ui_min = 0.5;
ui_max = 2.0;
ui_step = 0.01;
ui_label = "Geom Luminance";
> = 1.0;
uniform float geom_target_gamma <
ui_type = "drag";
ui_min = 0.1;
ui_max = 5.0;
ui_step = 0.1;
ui_label = "Geom Target Gamma";
> = 2.4;
uniform float geom_monitor_gamma <
ui_type = "drag";
ui_min = 0.1;
ui_max = 5.0;
ui_step = 0.1;
ui_label = "Geom Monitor Gamma";
> = 2.2;
uniform float2 BufferToViewportRatio < source = "buffer_to_viewport_ratio"; >;
uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >;
uniform float2 ViewportSize < source = "viewportsize"; >;
uniform float ViewportWidth < source = "viewportwidth"; >;
uniform float ViewportHeight < source = "viewportheight"; >;
sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;};
// Comment the next line to disable interpolation in linear gamma (and
// gain speed).
#define LINEAR_PROCESSING
// Enable 3x oversampling of the beam profile; improves moire effect caused by scanlines+curvature
#define OVERSAMPLE
// Use the older, purely gaussian beam profile; uncomment for speed
//#define USEGAUSSIAN
// Macros.
#define FIX(c) max(abs(c), 1e-5);
#define PI 3.141592653589
#ifdef LINEAR_PROCESSING
# define TEX2D(c) pow(tex2D(sBackBuffer, (c)), geom_target_gamma.xxxx)
#else
# define TEX2D(c) tex2D(sBackBuffer, (c))
#endif
// aspect ratio
#define aspect (geom_invert_aspect==true?float2(ViewportHeight/ViewportWidth,1.0):float2(1.0,ViewportHeight/ViewportWidth))
#define overscan (1.01.xx);
struct ST_VertexOut
{
float2 sinangle : TEXCOORD1;
float2 cosangle : TEXCOORD2;
float3 stretch : TEXCOORD3;
float2 TextureSize : TEXCOORD4;
};
float intersect(float2 xy, float2 sinangle, float2 cosangle)
{
float A = dot(xy,xy) + geom_d*geom_d;
float B, C;
B = 2.0*(geom_R*(dot(xy,sinangle) - geom_d*cosangle.x*cosangle.y) - geom_d*geom_d);
C = geom_d*geom_d + 2.0*geom_R*geom_d*cosangle.x*cosangle.y;
return (-B-sqrt(B*B - 4.0*A*C))/(2.0*A);
}
float2 bkwtrans(float2 xy, float2 sinangle, float2 cosangle)
{
float c = intersect(xy, sinangle, cosangle);
float2 point = (c.xx*xy + geom_R.xx*sinangle) / geom_R.xx;
float2 poc = point/cosangle;
float2 tang = sinangle/cosangle;
float A = dot(tang, tang) + 1.0;
float B = -2.0*dot(poc, tang);
float C = dot(poc, poc) - 1.0;
float a = (-B + sqrt(B*B - 4.0*A*C)) / (2.0*A);
float2 uv = (point - a*sinangle) / cosangle;
float r = FIX(geom_R*acos(a));
return uv*r/sin(r/geom_R);
}
float2 fwtrans(float2 uv, float2 sinangle, float2 cosangle)
{
float r = FIX(sqrt(dot(uv, uv)));
uv *= sin(r/geom_R)/r;
float x = 1.0 - cos(r/geom_R);
float D;
D = geom_d/geom_R + x*cosangle.x*cosangle.y + dot(uv,sinangle);
return geom_d*(uv*cosangle - x*sinangle)/D;
}
float3 maxscale(float2 sinangle, float2 cosangle)
{
float2 c = bkwtrans(-geom_R * sinangle / (1.0 + geom_R/geom_d*cosangle.x*cosangle.y), sinangle, cosangle);
float2 a = 0.5.xx*aspect;
float2 lo = float2(fwtrans(float2(-a.x, c.y), sinangle, cosangle).x,
fwtrans(float2( c.x, -a.y), sinangle, cosangle).y)/aspect;
float2 hi = float2(fwtrans(float2(+a.x, c.y), sinangle, cosangle).x,
fwtrans(float2( c.x, +a.y), sinangle, cosangle).y)/aspect;
return float3((hi+lo)*aspect*0.5,max(hi.x-lo.x, hi.y-lo.y));
}
float2 transform(float2 coord, float2 sinangle, float2 cosangle, float3 stretch)
{
coord = (coord - 0.5.xx)*aspect*stretch.z + stretch.xy;
return (bkwtrans(coord, sinangle, cosangle) /
float2(geom_overscan_x / 100.0, geom_overscan_y / 100.0)/aspect + 0.5.xx);
}
// Vertex shader generating a triangle covering the entire screen
void VS_CRT_Geom(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out ST_VertexOut vVARS)
{
texcoord.x = (id == 2) ? 2.0 : 0.0;
texcoord.y = (id == 1) ? 2.0 : 0.0;
position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
// Screen centering
texcoord = texcoord - float2(centerx,centery)/100.0;
float2 SourceSize = 1.0/NormalizedNativePixelSize;
// Precalculate a bunch of useful values we'll need in the fragment
// shader.
vVARS.sinangle = sin(float2(geom_x_tilt, geom_y_tilt));
vVARS.cosangle = cos(float2(geom_x_tilt, geom_y_tilt));
vVARS.stretch = maxscale(vVARS.sinangle, vVARS.cosangle);
vVARS.TextureSize = float2(SourceSize.x, SourceSize.y);
}
float corner(float2 coord)
{
coord = min(coord, 1.0.xx - coord) * aspect;
float2 cdist = geom_cornersize.xx;
coord = (cdist - min(coord, cdist));
float dist = sqrt(dot(coord, coord));
return clamp((cdist.x - dist)*geom_cornersmooth, 0.0, 1.0);
}
float fwidth(float value)
{
return abs(ddx(value)) + abs(ddy(value));
}
float4 PS_CRT_Geom(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in ST_VertexOut vVARS) : SV_Target
{
// Texture coordinates of the texel containing the active pixel.
float2 xy = (geom_curvature == true) ? transform(vTexCoord, vVARS.sinangle, vVARS.cosangle, vVARS.stretch) : vTexCoord;
float cval = corner((xy-0.5.xx) * BufferToViewportRatio + 0.5.xx);
float2 uv_ratio = frac((xy * vVARS.TextureSize - 0.5.xx) / vVARS.TextureSize);
float4 col = TEX2D(xy);
#ifndef LINEAR_PROCESSING
col = pow(col, geom_target_gamma.xxxx);
#endif
col.rgb *= (geom_lum * step(0.0, uv_ratio.y));
float3 mul_res = col.rgb * cval.xxx;
// Convert the image gamma for display on our output device.
mul_res = pow(mul_res, 1.0 / geom_monitor_gamma.xxx);
return float4(mul_res, 1.0);
}
technique CRT_Geom
{
pass
{
VertexShader = VS_CRT_Geom;
PixelShader = PS_CRT_Geom;
}
}

View file

@ -1,224 +0,0 @@
#ifndef GEOM_PARAMS_H
#define GEOM_PARAMS_H
/*
Geom Shader - a modified CRT-Geom without CRT features made to be appended/integrated
into any other shaders and provide curvature/warping/oversampling features.
Adapted by Hyllian (2024).
*/
/*
CRT-interlaced
Copyright (C) 2010-2012 cgwg, Themaister and DOLLS
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your option)
any later version.
(cgwg gave their consent to have the original version of this shader
distributed under the GPL in this message:
http://board.byuu.org/viewtopic.php?p=26075#p26075
"Feel free to distribute my shaders under the GPL. After all, the
barrel distortion code was taken from the Curvature shader, which is
under the GPL."
)
This shader variant is pre-configured with screen curvature
*/
uniform bool geom_curvature <
ui_type = "radio";
ui_category = "Geom Curvature";
ui_label = "Geom Curvature Toggle";
> = 0.0;
uniform float geom_R <
ui_type = "drag";
ui_min = 0.1;
ui_max = 10.0;
ui_step = 0.1;
ui_category = "Geom Curvature";
ui_label = "Geom Curvature Radius";
> = 2.0;
uniform float geom_d <
ui_type = "drag";
ui_min = 0.1;
ui_max = 3.0;
ui_step = 0.1;
ui_category = "Geom Curvature";
ui_label = "Geom Distance";
> = 1.5;
uniform bool geom_invert_aspect <
ui_type = "radio";
ui_category = "Geom Curvature";
ui_label = "Geom Curvature Aspect Inversion";
> = 0.0;
uniform float geom_cornersize <
ui_type = "drag";
ui_min = 0.001;
ui_max = 1.0;
ui_step = 0.005;
ui_category = "Geom Curvature";
ui_label = "Geom Corner Size";
> = 0.03;
uniform float geom_cornersmooth <
ui_type = "drag";
ui_min = 80.0;
ui_max = 2000.0;
ui_step = 100.0;
ui_category = "Geom Curvature";
ui_label = "Geom Corner Smoothness";
> = 1000.0;
uniform float geom_x_tilt <
ui_type = "drag";
ui_min = -1.0;
ui_max = 1.0;
ui_step = 0.05;
ui_category = "Geom Curvature";
ui_label = "Geom Horizontal Tilt";
> = 0.0;
uniform float geom_y_tilt <
ui_type = "drag";
ui_min = -1.0;
ui_max = 1.0;
ui_step = 0.05;
ui_category = "Geom Curvature";
ui_label = "Geom Vertical Tilt";
> = 0.0;
uniform float geom_overscan_x <
ui_type = "drag";
ui_min = -125.0;
ui_max = 125.0;
ui_step = 0.5;
ui_category = "Geom Curvature";
ui_label = "Geom Horiz. Overscan %";
> = 100.0;
uniform float geom_overscan_y <
ui_type = "drag";
ui_min = -125.0;
ui_max = 125.0;
ui_step = 0.5;
ui_category = "Geom Curvature";
ui_label = "Geom Vert. Overscan %";
> = 100.0;
uniform float centerx <
ui_type = "drag";
ui_min = -100.0;
ui_max = 100.0;
ui_step = 0.1;
ui_category = "Geom Curvature";
ui_label = "Image Center X";
> = 0.00;
uniform float centery <
ui_type = "drag";
ui_min = -100.0;
ui_max = 100.0;
ui_step = 0.1;
ui_category = "Geom Curvature";
ui_label = "Image Center Y";
> = 0.00;
// Macros.
#define FIX(c) max(abs(c), 1e-5);
// aspect ratio
#define aspect (geom_invert_aspect==true?float2(ViewportHeight/ViewportWidth,1.0):float2(1.0,ViewportHeight/ViewportWidth))
float intersect(float2 xy, float2 sinangle, float2 cosangle)
{
float A = dot(xy,xy) + geom_d*geom_d;
float B, C;
B = 2.0*(geom_R*(dot(xy,sinangle) - geom_d*cosangle.x*cosangle.y) - geom_d*geom_d);
C = geom_d*geom_d + 2.0*geom_R*geom_d*cosangle.x*cosangle.y;
return (-B-sqrt(B*B - 4.0*A*C))/(2.0*A);
}
float2 bkwtrans(float2 xy, float2 sinangle, float2 cosangle)
{
float c = intersect(xy, sinangle, cosangle);
float2 point = (c.xx*xy + geom_R.xx*sinangle) / geom_R.xx;
float2 poc = point/cosangle;
float2 tang = sinangle/cosangle;
float A = dot(tang, tang) + 1.0;
float B = -2.0*dot(poc, tang);
float C = dot(poc, poc) - 1.0;
float a = (-B + sqrt(B*B - 4.0*A*C)) / (2.0*A);
float2 uv = (point - a*sinangle) / cosangle;
float r = FIX(geom_R*acos(a));
return uv*r/sin(r/geom_R);
}
float2 fwtrans(float2 uv, float2 sinangle, float2 cosangle)
{
float r = FIX(sqrt(dot(uv, uv)));
uv *= sin(r/geom_R)/r;
float x = 1.0 - cos(r/geom_R);
float D;
D = geom_d/geom_R + x*cosangle.x*cosangle.y + dot(uv,sinangle);
return geom_d*(uv*cosangle - x*sinangle)/D;
}
float3 maxscale(float2 sinangle, float2 cosangle)
{
float2 c = bkwtrans(-geom_R * sinangle / (1.0 + geom_R/geom_d*cosangle.x*cosangle.y), sinangle, cosangle);
float2 a = 0.5.xx*aspect;
float2 lo = float2(fwtrans(float2(-a.x, c.y), sinangle, cosangle).x,
fwtrans(float2( c.x, -a.y), sinangle, cosangle).y)/aspect;
float2 hi = float2(fwtrans(float2(+a.x, c.y), sinangle, cosangle).x,
fwtrans(float2( c.x, +a.y), sinangle, cosangle).y)/aspect;
return float3((hi+lo)*aspect*0.5,max(hi.x-lo.x, hi.y-lo.y));
}
float2 transform(float2 coord, float2 sinangle, float2 cosangle, float3 stretch)
{
coord = (coord - 0.5.xx)*aspect*stretch.z + stretch.xy;
return (bkwtrans(coord, sinangle, cosangle) /
float2(geom_overscan_x / 100.0, geom_overscan_y / 100.0)/aspect + 0.5.xx);
}
float corner(float2 coord)
{
coord = min(coord, 1.0.xx - coord) * aspect;
float2 cdist = geom_cornersize.xx;
coord = (cdist - min(coord, cdist));
float dist = sqrt(dot(coord, coord));
return clamp((cdist.x - dist)*geom_cornersmooth, 0.0, 1.0);
}
float fwidth(float value)
{
return abs(ddx(value)) + abs(ddy(value));
}
#endif // GEOM_PARAMS_H

View file

@ -1,242 +0,0 @@
#ifndef MASK_PARAMS_H
#define MASK_PARAMS_H
uniform float MASK_DARK_STRENGTH <
ui_type = "drag";
ui_min = 0.0;
ui_max = 1.0;
ui_step = 0.01;
ui_category = "CRT Mask";
ui_label = "MASK DARK SUBPIXEL STRENGTH";
> = 0.5;
uniform float MASK_LIGHT_STRENGTH <
ui_type = "drag";
ui_min = 0.0;
ui_max = 6.0;
ui_step = 0.01;
ui_category = "CRT Mask";
ui_label = "MASK LIGHT SUBPIXEL STRENGTH";
> = 0.5;
/* Mask code pasted from subpixel_masks.h. Masks 3 and 4 added. */
float3 mask_weights(float2 coord, int phosphor_layout, float monitor_subpixels, float mask_light_str, float mask_dark_str){
float3 weights = float3(1.,1.,1.);
float on = 1.+mask_light_str;
// float on = 1.;
float off = 1.-mask_dark_str;
float3 red = monitor_subpixels==1.0 ? float3(on, off, off) : float3(off, off, on );
float3 green = float3(off, on, off);
float3 blue = monitor_subpixels==1.0 ? float3(off, off, on ) : float3(on, off, off);
float3 magenta = float3(on, off, on );
float3 yellow = monitor_subpixels==1.0 ? float3(on, on, off) : float3(off, on, on );
float3 cyan = monitor_subpixels==1.0 ? float3(off, on, on ) : float3(on, on, off);
float3 black = float3(off, off, off);
float3 white = float3(on, on, on );
int w, z = 0;
// This pattern is used by a few layouts, so we'll define it here
float3 aperture_weights = lerp(magenta, green, floor(coord.x % 2.0));
if(phosphor_layout == 0) return weights;
else if(phosphor_layout == 1){
// classic aperture for RGB panels; good for 1080p, too small for 4K+
// aka aperture_1_2_bgr
weights = aperture_weights;
return weights;
}
else if(phosphor_layout == 2){
// Classic RGB layout; good for 1080p and lower
float3 bw3[3] = {red, green, blue};
// float3 bw3[3] = float3[](black, yellow, blue);
z = int(floor(coord.x % 3.0));
weights = bw3[z];
return weights;
}
else if(phosphor_layout == 3){
// black and white aperture; good for weird subpixel layouts and low brightness; good for 1080p and lower
float3 bw3[3] = {black, white, black};
z = int(floor(coord.x % 3.0));
weights = bw3[z];
return weights;
}
else if(phosphor_layout == 4){
// reduced TVL aperture for RGB panels. Good for 4k.
// aperture_2_4_rgb
float3 big_ap_rgb[4] = {red, yellow, cyan, blue};
w = int(floor(coord.x % 4.0));
weights = big_ap_rgb[w];
return weights;
}
else if(phosphor_layout == 5){
// black and white aperture; good for weird subpixel layouts and low brightness; good for 4k
float3 bw4[4] = {black, black, white, white};
z = int(floor(coord.x % 4.0));
weights = bw4[z];
return weights;
}
else if(phosphor_layout == 6){
// aperture_1_4_rgb; good for simulating lower
float3 ap4[4] = {red, green, blue, black};
z = int(floor(coord.x % 4.0));
weights = ap4[z];
return weights;
}
else if(phosphor_layout == 7){
// 2x2 shadow mask for RGB panels; good for 1080p, too small for 4K+
// aka delta_1_2x1_bgr
float3 inverse_aperture = lerp(green, magenta, floor(coord.x % 2.0));
weights = lerp(aperture_weights, inverse_aperture, floor(coord.y % 2.0));
return weights;
}
else if(phosphor_layout == 8){
// delta_2_4x1_rgb
float3 delta[8] = {
red, yellow, cyan, blue,
cyan, blue, red, yellow
};
w = int(floor(coord.y % 2.0));
z = int(floor(coord.x % 4.0));
weights = delta[4*w+z];
return weights;
}
else if(phosphor_layout == 9){
// delta_1_4x1_rgb; dunno why this is called 4x1 when it's obviously 4x2 /shrug
float3 delta1[8] = {
red, green, blue, black,
blue, black, red, green
};
w = int(floor(coord.y % 2.0));
z = int(floor(coord.x % 4.0));
weights = delta1[4*w+z];
return weights;
}
else if(phosphor_layout == 10){
// delta_2_4x2_rgb
float3 delta[16] = {
red, yellow, cyan, blue,
red, yellow, cyan, blue,
cyan, blue, red, yellow,
cyan, blue, red, yellow
};
w = int(floor(coord.y % 4.0));
z = int(floor(coord.x % 4.0));
weights = delta[4*w+z];
return weights;
}
else if(phosphor_layout == 11){
// slot mask for RGB panels; looks okay at 1080p, looks better at 4K
float3 slotmask[24] = {
red, green, blue, red, green, blue,
red, green, blue, black, black, black,
red, green, blue, red, green, blue,
black, black, black, red, green, blue,
};
w = int(floor(coord.y % 4.0));
z = int(floor(coord.x % 6.0));
// use the indexes to find which color to apply to the current pixel
weights = slotmask[6*w+z];
return weights;
}
else if(phosphor_layout == 12){
// slot mask for RGB panels; looks okay at 1080p, looks better at 4K
float3 slotmask[24] = {
black, white, black, black, white, black,
black, white, black, black, black, black,
black, white, black, black, white, black,
black, black, black, black, white, black
};
w = int(floor(coord.y % 4.0));
z = int(floor(coord.x % 6.0));
// use the indexes to find which color to apply to the current pixel
weights = slotmask[6*w+z];
return weights;
}
else if(phosphor_layout == 13){
// based on MajorPainInTheCactus' HDR slot mask
float3 slot[32] = {
red, green, blue, black, red, green, blue, black,
red, green, blue, black, black, black, black, black,
red, green, blue, black, red, green, blue, black,
black, black, black, black, red, green, blue, black
};
w = int(floor(coord.y % 4.0));
z = int(floor(coord.x % 8.0));
weights = slot[8*w+z];
return weights;
}
else if(phosphor_layout == 14){
// same as above but for RGB panels
float3 slot2[40] = {
red, yellow, green, blue, blue, red, yellow, green, blue, blue ,
black, green, green, blue, blue, red, red, black, black, black,
red, yellow, green, blue, blue, red, yellow, green, blue, blue ,
red, red, black, black, black, black, green, green, blue, blue
};
w = int(floor(coord.y % 4.0));
z = int(floor(coord.x % 10.0));
weights = slot2[10*w+z];
return weights;
}
else if(phosphor_layout == 15){
// slot_3_7x6_rgb
float3 slot[84] = {
red, red, yellow, green, cyan, blue, blue, red, red, yellow, green, cyan, blue, blue,
red, red, yellow, green, cyan, blue, blue, red, red, yellow, green, cyan, blue, blue,
red, red, yellow, green, cyan, blue, blue, black, black, black, black, black, black, black,
red, red, yellow, green, cyan, blue, blue, red, red, yellow, green, cyan, blue, blue,
red, red, yellow, green, cyan, blue, blue, red, red, yellow, green, cyan, blue, blue,
black, black, black, black, black, black, black, black, red, red, yellow, green, cyan, blue
};
w = int(floor(coord.y % 6.0));
z = int(floor(coord.x % 14.0));
weights = slot[14*w+z];
return weights;
}
else return weights;
}
#endif // MASK_PARAMS_H

View file

@ -1,415 +0,0 @@
#include "ReShade.fxh"
/*
Geom Shader - a modified CRT-Geom without CRT features made to be appended/integrated
into any other shaders and provide curvature/warping/oversampling features.
Adapted by Hyllian (2024).
*/
/*
CRT-interlaced
Copyright (C) 2010-2012 cgwg, Themaister and DOLLS
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your option)
any later version.
(cgwg gave their consent to have the original version of this shader
distributed under the GPL in this message:
http://board.byuu.org/viewtopic.php?p=26075#p26075
"Feel free to distribute my shaders under the GPL. After all, the
barrel distortion code was taken from the Curvature shader, which is
under the GPL."
)
This shader variant is pre-configured with screen curvature
*/
uniform bool geom_curvature <
ui_type = "radio";
ui_label = "Geom Curvature Toggle";
ui_category = "Curvature";
ui_tooltip = "This shader only works with Aspect Ratio: Stretch to Fill.";
> = true;
uniform float geom_R <
ui_type = "drag";
ui_min = 0.1;
ui_max = 10.0;
ui_step = 0.1;
ui_label = "Geom Curvature Radius";
> = 10.0;
uniform float geom_d <
ui_type = "drag";
ui_min = 0.1;
ui_max = 10.0;
ui_step = 0.1;
ui_label = "Geom Distance";
> = 10.0;
uniform bool geom_invert_aspect <
ui_type = "radio";
ui_label = "Geom Curvature Aspect Inversion";
> = 0.0;
uniform float geom_cornersize <
ui_type = "drag";
ui_min = 0.001;
ui_max = 1.0;
ui_step = 0.005;
ui_label = "Geom Corner Size";
> = 0.006;
uniform float geom_cornersmooth <
ui_type = "drag";
ui_min = 80.0;
ui_max = 2000.0;
ui_step = 100.0;
ui_label = "Geom Corner Smoothness";
> = 200.0;
uniform float geom_x_tilt <
ui_type = "drag";
ui_min = -1.0;
ui_max = 1.0;
ui_step = 0.05;
ui_label = "Geom Horizontal Tilt";
> = 0.0;
uniform float geom_y_tilt <
ui_type = "drag";
ui_min = -1.0;
ui_max = 1.0;
ui_step = 0.05;
ui_label = "Geom Vertical Tilt";
> = 0.0;
uniform float geom_overscan_x <
ui_type = "drag";
ui_min = -125.0;
ui_max = 125.0;
ui_step = 0.5;
ui_label = "Geom Horiz. Overscan %";
> = 48.5;
uniform float geom_overscan_y <
ui_type = "drag";
ui_min = -125.0;
ui_max = 125.0;
ui_step = 0.5;
ui_label = "Geom Vert. Overscan %";
> = 64.5;
uniform float centerx <
ui_type = "drag";
ui_min = -100.0;
ui_max = 100.0;
ui_step = 0.1;
ui_label = "Image Center X";
> = 0.0;
uniform float centery <
ui_type = "drag";
ui_min = -100.0;
ui_max = 100.0;
ui_step = 0.1;
ui_label = "Image Center Y";
> = -8.8;
uniform float geom_lum <
ui_type = "drag";
ui_min = 0.5;
ui_max = 2.0;
ui_step = 0.01;
ui_label = "Geom Luminance";
> = 1.0;
uniform float geom_target_gamma <
ui_type = "drag";
ui_min = 0.1;
ui_max = 5.0;
ui_step = 0.1;
ui_label = "Geom Target Gamma";
> = 2.4;
uniform float geom_monitor_gamma <
ui_type = "drag";
ui_min = 0.1;
ui_max = 5.0;
ui_step = 0.1;
ui_label = "Geom Monitor Gamma";
> = 2.2;
uniform float2 BufferToViewportRatio < source = "buffer_to_viewport_ratio"; >;
uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >;
uniform float2 ViewportSize < source = "viewportsize"; >;
uniform float ViewportX < source = "viewportx"; >;
uniform float ViewportY < source = "viewporty"; >;
uniform float ViewportWidth < source = "viewportwidth"; >;
uniform float ViewportHeight < source = "viewportheight"; >;
uniform float2 ViewportOffset < source = "viewportoffset"; >;
sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=LINEAR;MinFilter=LINEAR;};
texture tOverlay < source = "overlay/psx.jpg"; >
{
Width = BUFFER_WIDTH;
Height = BUFFER_HEIGHT;
MipLevels = 1;
};
sampler sOverlay { Texture = tOverlay; AddressU = BORDER; AddressV = BORDER; MinFilter = LINEAR; MagFilter = LINEAR;};
// Comment the next line to disable interpolation in linear gamma (and
// gain speed).
#define LINEAR_PROCESSING
// Enable 3x oversampling of the beam profile; improves moire effect caused by scanlines+curvature
#define OVERSAMPLE
// Use the older, purely gaussian beam profile; uncomment for speed
//#define USEGAUSSIAN
// Macros.
#define FIX(c) max(abs(c), 1e-5);
#define PI 3.141592653589
#ifdef LINEAR_PROCESSING
# define TEX2D(c) pow(tex2D(sBackBuffer, (c)), float4(geom_target_gamma,geom_target_gamma,geom_target_gamma,geom_target_gamma))
#else
# define TEX2D(c) tex2D(sBackBuffer, (c))
#endif
// aspect ratio
#define aspect (geom_invert_aspect==true?float2(ViewportHeight/ViewportWidth,1.0):float2(1.0,ViewportHeight/ViewportWidth))
#define overscan (float2(1.01,1.01));
struct ST_VertexOut
{
float2 sinangle : TEXCOORD1;
float2 cosangle : TEXCOORD2;
float3 stretch : TEXCOORD3;
float2 TextureSize : TEXCOORD4;
};
float vs_intersect(float2 xy, float2 sinangle, float2 cosangle)
{
float A = dot(xy,xy) + geom_d*geom_d;
float B = 2.0*(geom_R*(dot(xy,sinangle)-geom_d*cosangle.x*cosangle.y)-geom_d*geom_d);
float C = geom_d*geom_d + 2.0*geom_R*geom_d*cosangle.x*cosangle.y;
return (-B-sqrt(B*B-4.0*A*C))/(2.0*A);
}
float2 vs_bkwtrans(float2 xy, float2 sinangle, float2 cosangle)
{
float c = vs_intersect(xy, sinangle, cosangle);
float2 point = (float2(c, c)*xy - float2(-geom_R, -geom_R)*sinangle) / float2(geom_R, geom_R);
float2 poc = point/cosangle;
float2 tang = sinangle/cosangle;
float A = dot(tang, tang) + 1.0;
float B = -2.0*dot(poc, tang);
float C = dot(poc, poc) - 1.0;
float a = (-B + sqrt(B*B - 4.0*A*C))/(2.0*A);
float2 uv = (point - a*sinangle)/cosangle;
float r = FIX(geom_R*acos(a));
return uv*r/sin(r/geom_R);
}
float2 vs_fwtrans(float2 uv, float2 sinangle, float2 cosangle)
{
float r = FIX(sqrt(dot(uv,uv)));
uv *= sin(r/geom_R)/r;
float x = 1.0-cos(r/geom_R);
float D = geom_d/geom_R + x*cosangle.x*cosangle.y+dot(uv,sinangle);
return geom_d*(uv*cosangle-x*sinangle)/D;
}
float3 vs_maxscale(float2 sinangle, float2 cosangle)
{
float2 c = vs_bkwtrans(-geom_R * sinangle / (1.0 + geom_R/geom_d*cosangle.x*cosangle.y), sinangle, cosangle);
float2 a = float2(0.5,0.5)*aspect;
float2 lo = float2(vs_fwtrans(float2(-a.x, c.y), sinangle, cosangle).x,
vs_fwtrans(float2( c.x, -a.y), sinangle, cosangle).y)/aspect;
float2 hi = float2(vs_fwtrans(float2(+a.x, c.y), sinangle, cosangle).x,
vs_fwtrans(float2( c.x, +a.y), sinangle, cosangle).y)/aspect;
return float3((hi+lo)*aspect*0.5,max(hi.x-lo.x,hi.y-lo.y));
}
// Vertex shader generating a triangle covering the entire screen
void VS_CRT_Geom(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out ST_VertexOut vVARS)
{
texcoord.x = (id == 2) ? 2.0 : 0.0;
texcoord.y = (id == 1) ? 2.0 : 0.0;
position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
float2 SourceSize = 1.0/NormalizedNativePixelSize;
// Precalculate a bunch of useful values we'll need in the fragment
// shader.
vVARS.sinangle = sin(float2(geom_x_tilt, geom_y_tilt));
vVARS.cosangle = cos(float2(geom_x_tilt, geom_y_tilt));
vVARS.stretch = vs_maxscale(vVARS.sinangle, vVARS.cosangle);
vVARS.TextureSize = float2(SourceSize.x, SourceSize.y);
}
float intersect(float2 xy, float2 sinangle, float2 cosangle)
{
float A = dot(xy,xy) + geom_d*geom_d;
float B, C;
B = 2.0*(geom_R*(dot(xy,sinangle) - geom_d*cosangle.x*cosangle.y) - geom_d*geom_d);
C = geom_d*geom_d + 2.0*geom_R*geom_d*cosangle.x*cosangle.y;
return (-B-sqrt(B*B - 4.0*A*C))/(2.0*A);
}
float2 bkwtrans(float2 xy, float2 sinangle, float2 cosangle)
{
float c = intersect(xy, sinangle, cosangle);
float2 point = (float2(c, c)*xy - float2(-geom_R, -geom_R)*sinangle) / float2(geom_R, geom_R);
float2 poc = point/cosangle;
float2 tang = sinangle/cosangle;
float A = dot(tang, tang) + 1.0;
float B = -2.0*dot(poc, tang);
float C = dot(poc, poc) - 1.0;
float a = (-B + sqrt(B*B - 4.0*A*C)) / (2.0*A);
float2 uv = (point - a*sinangle) / cosangle;
float r = FIX(geom_R*acos(a));
return uv*r/sin(r/geom_R);
}
float2 fwtrans(float2 uv, float2 sinangle, float2 cosangle)
{
float r = FIX(sqrt(dot(uv, uv)));
uv *= sin(r/geom_R)/r;
float x = 1.0 - cos(r/geom_R);
float D;
D = geom_d/geom_R + x*cosangle.x*cosangle.y + dot(uv,sinangle);
return geom_d*(uv*cosangle - x*sinangle)/D;
}
float3 maxscale(float2 sinangle, float2 cosangle)
{
float2 c = bkwtrans(-geom_R * sinangle / (1.0 + geom_R/geom_d*cosangle.x*cosangle.y), sinangle, cosangle);
float2 a = float2(0.5, 0.5)*aspect;
float2 lo = float2(fwtrans(float2(-a.x, c.y), sinangle, cosangle).x,
fwtrans(float2( c.x, -a.y), sinangle, cosangle).y)/aspect;
float2 hi = float2(fwtrans(float2(+a.x, c.y), sinangle, cosangle).x,
fwtrans(float2( c.x, +a.y), sinangle, cosangle).y)/aspect;
return float3((hi+lo)*aspect*0.5,max(hi.x-lo.x, hi.y-lo.y));
}
float2 transform(float2 coord, float2 sinangle, float2 cosangle, float3 stretch)
{
coord = (coord - float2(0.5, 0.5))*aspect*stretch.z + stretch.xy;
return (bkwtrans(coord, sinangle, cosangle) /
float2(geom_overscan_x / 100.0, geom_overscan_y / 100.0)/aspect + float2(0.5, 0.5));
}
float corner(float2 coord)
{
coord = min(coord, float2(1.0, 1.0) - coord) * aspect;
float2 cdist = float2(geom_cornersize, geom_cornersize);
coord = (cdist - min(coord, cdist));
float dist = sqrt(dot(coord, coord));
return clamp((cdist.x - dist)*geom_cornersmooth, 0.0, 1.0);
}
float fwidth(float value){
return abs(ddx(value)) + abs(ddy(value));
}
// Code snippet borrowed from crt-cyclon. (credits to DariusG)
float2 Warp(float2 pos)
{
pos = pos*2.0 - 1.0;
pos *= float2(1.0 + pos.y*pos.y*0, 1.0 + pos.x*pos.x*0);
pos = pos*0.5 + 0.5;
return pos;
}
float4 PS_CRT_Geom(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in ST_VertexOut vVARS) : SV_Target
{
// Texture coordinates of the texel containing the active pixel.
float2 xy;
if (geom_curvature == true)
xy = transform(vTexCoord, vVARS.sinangle, vVARS.cosangle, vVARS.stretch);
else
xy = vTexCoord;
// center screen
xy = Warp(xy - float2(centerx,centery)/100.0);
float cval = corner((xy-float2(0.5,0.5)) * BufferToViewportRatio + float2(0.5,0.5));
float2 uv_ratio = frac((xy * vVARS.TextureSize - float2(0.5, 0.5)) / vVARS.TextureSize);
float4 col = TEX2D(xy);
#ifndef LINEAR_PROCESSING
col = pow(col , float4(geom_target_gamma, geom_target_gamma, geom_target_gamma, geom_target_gamma));
#endif
col.rgb *= (geom_lum * step(0.0, uv_ratio.y));
float3 mul_res = col.rgb * float3(cval, cval, cval);
// Convert the image gamma for display on our output device.
mul_res = pow(mul_res, float3(1.0 / geom_monitor_gamma, 1.0 / geom_monitor_gamma, 1.0 / geom_monitor_gamma));
float4 overlay = tex2D(sOverlay, vTexCoord);
float2 top_left = (float2(ViewportX, ViewportY) - ViewportOffset)/ViewportSize;
float2 bottom_right = (float2(ViewportX + ViewportWidth, ViewportY + ViewportHeight) - ViewportOffset)/ViewportSize;
if (xy.x < top_left.x || xy.x > bottom_right.x || xy.y < top_left.y || xy.y > bottom_right.y)
mul_res = overlay.rgb;
return float4(mul_res, 1.0);
}
technique CRT_Geom
{
pass
{
VertexShader = VS_CRT_Geom;
PixelShader = PS_CRT_Geom;
}
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 22 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.5 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 214 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 202 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.2 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.9 KiB

View file

@ -1,15 +0,0 @@
# To Use
Choose Aspect Ratio: Stretch to Fill.
# Psx.jpg Credits
To the Author: SOQUEROEU.
The "psx.jpg" background was edited from the one obtained from "Soqueroeu TV Backgrounds 2.0" repository: https://github.com/soqueroeu/Soqueroeu-TV-Backgrounds_V2.0/tree/main.
The material is free to use according to the agreement below:
## AGREEMENT
This pack is free. You should not pay for anything related to this graphics pack and shader preset. You may distribute and reproduce part from this content, as long as you give credit to the authors involved. You may not profit from the sale of products that contain material in this package without the author's prior permission.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 470 KiB

View file

@ -209,6 +209,29 @@ void FilteredSampleFromVRAM(TEXPAGE_VALUE texpage, float2 coords, float4 uv_limi
}
else if (texture_filter == GPUTextureFilter::JINC2 || texture_filter == GPUTextureFilter::JINC2BinAlpha)
{
/*
Hyllian's jinc windowed-jinc 2-lobe sharper with anti-ringing Shader
Copyright (C) 2011-2016 Hyllian/Jararaca - sergiogdb@gmail.com
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
DefineMacro(ss, "BINALPHA", texture_filter == GPUTextureFilter::JINC2BinAlpha);
ss << R"(
CONSTANT float JINC2_WINDOW_SINC = 0.44;
@ -361,6 +384,30 @@ void FilteredSampleFromVRAM(TEXPAGE_VALUE texpage, float2 coords, float4 uv_limi
}
else if (texture_filter == GPUTextureFilter::xBR || texture_filter == GPUTextureFilter::xBRBinAlpha)
{
/*
Hyllian's xBR-vertex code and texel mapping
Copyright (C) 2011/2016 Hyllian - sergiogdb@gmail.com
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
DefineMacro(ss, "BINALPHA", texture_filter == GPUTextureFilter::xBRBinAlpha);
ss << R"(
CONSTANT int BLEND_NONE = 0;