mirror of
				https://github.com/RetroDECK/Duckstation.git
				synced 2025-04-10 19:15:14 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			243 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			HLSL
		
	
	
	
	
	
			
		
		
	
	
			243 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			HLSL
		
	
	
	
	
	
| 
 | |
| #ifndef _QUAD_PIXEL_COMMUNICATION_H
 | |
| #define _QUAD_PIXEL_COMMUNICATION_H
 | |
| 
 | |
| /////////////////////////////////  MIT LICENSE  ////////////////////////////////
 | |
| 
 | |
| //  Copyright (C) 2014 TroggleMonkey*
 | |
| //
 | |
| //  Permission is hereby granted, free of charge, to any person obtaining a copy
 | |
| //  of this software and associated documentation files (the "Software"), to
 | |
| //  deal in the Software without restriction, including without limitation the
 | |
| //  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 | |
| //  sell copies of the Software, and to permit persons to whom the Software is
 | |
| //  furnished to do so, subject to the following conditions:
 | |
| //
 | |
| //  The above copyright notice and this permission notice shall be included in
 | |
| //  all copies or substantial portions of the Software.
 | |
| //
 | |
| //  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 | |
| //  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 | |
| //  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 | |
| //  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 | |
| //  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 | |
| //  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 | |
| //  IN THE SOFTWARE.
 | |
| 
 | |
| /////////////////////////////////  DISCLAIMER  /////////////////////////////////
 | |
| 
 | |
| //  *This code was inspired by "Shader Amortization using Pixel Quad Message
 | |
| //  Passing" by Eric Penner, published in GPU Pro 2, Chapter VI.2.  My intent
 | |
| //  is not to plagiarize his fundamentally similar code and assert my own
 | |
| //  copyright, but the algorithmic helper functions require so little code that
 | |
| //  implementations can't vary by much except bugfixes and conventions.  I just
 | |
| //  wanted to license my own particular code here to avoid ambiguity and make it
 | |
| //  clear that as far as I'm concerned, people can do as they please with it.
 | |
| 
 | |
| /////////////////////////////////  DESCRIPTION  ////////////////////////////////
 | |
| 
 | |
| //  Given screen pixel numbers, derive a "quad vector" describing a fragment's
 | |
| //  position in its 2x2 pixel quad.  Given that vector, obtain the values of any
 | |
| //  variable at neighboring fragments.
 | |
| //  Requires:   Using this file in general requires:
 | |
| //              1.) ddx() and ddy() are present in the current Cg profile.
 | |
| //              2.) The GPU driver is using fine/high-quality derivatives.
 | |
| //                  Functions will give incorrect results if this is not true,
 | |
| //                  so a test function is included.
 | |
| 
 | |
| 
 | |
| /////////////////////  QUAD-PIXEL COMMUNICATION PRIMITIVES  ////////////////////
 | |
| 
 | |
| float4 get_quad_vector_naive(float4 output_pixel_num_wrt_uvxy)
 | |
| {
 | |
|     //  Requires:   Two measures of the current fragment's output pixel number
 | |
|     //              in the range ([0, output_size.x), [0, output_size.y)):
 | |
|     //              1.) output_pixel_num_wrt_uvxy.xy increase with uv coords.
 | |
|     //              2.) output_pixel_num_wrt_uvxy.zw increase with screen xy.
 | |
|     //  Returns:    Two measures of the fragment's position in its 2x2 quad:
 | |
|     //              1.) The .xy components are its 2x2 placement with respect to
 | |
|     //                  uv direction (the origin (0, 0) is at the top-left):
 | |
|     //                  top-left     = (-1.0, -1.0) top-right    = ( 1.0, -1.0)
 | |
|     //                  bottom-left  = (-1.0,  1.0) bottom-right = ( 1.0,  1.0)
 | |
|     //                  You need this to arrange/weight shared texture samples.
 | |
|     //              2.) The .zw components are its 2x2 placement with respect to
 | |
|     //                  screen xy direction (position); the origin varies.
 | |
|     //                  quad_gather needs this measure to work correctly.
 | |
|     //              Note: quad_vector.zw = quad_vector.xy * float2(
 | |
|     //                      ddx(output_pixel_num_wrt_uvxy.x),
 | |
|     //                      ddy(output_pixel_num_wrt_uvxy.y));
 | |
|     //  Caveats:    This function assumes the GPU driver always starts 2x2 pixel
 | |
|     //              quads at even pixel numbers.  This assumption can be wrong
 | |
|     //              for odd output resolutions (nondeterministically so).
 | |
|     float4 pixel_odd = frac(output_pixel_num_wrt_uvxy * 0.5) * 2.0;
 | |
|     float4 quad_vector = pixel_odd * 2.0 - float4(1.0, 1.0, 1.0, 1.0);
 | |
|     return quad_vector;
 | |
| }
 | |
| 
 | |
| float4 get_quad_vector(float4 output_pixel_num_wrt_uvxy)
 | |
| {
 | |
|     //  Requires:   Same as get_quad_vector_naive() (see that first).
 | |
|     //  Returns:    Same as get_quad_vector_naive() (see that first), but it's
 | |
|     //              correct even if the 2x2 pixel quad starts at an odd pixel,
 | |
|     //              which can occur at odd resolutions.
 | |
|     float4 quad_vector_guess =
 | |
|         get_quad_vector_naive(output_pixel_num_wrt_uvxy);
 | |
|     //  If quad_vector_guess.zw doesn't increase with screen xy, we know
 | |
|     //  the 2x2 pixel quad starts at an odd pixel:
 | |
|     float2 odd_start_mirror = 0.5 * float2(ddx(quad_vector_guess.z),
 | |
|                                                 ddy(quad_vector_guess.w));
 | |
|     return quad_vector_guess * odd_start_mirror.xyxy;
 | |
| }
 | |
| 
 | |
| float4 get_quad_vector(float2 output_pixel_num_wrt_uv)
 | |
| {
 | |
|     //  Requires:   1.) ddx() and ddy() are present in the current Cg profile.
 | |
|     //              2.) output_pixel_num_wrt_uv must increase with uv coords and
 | |
|     //                  measure the current fragment's output pixel number in:
 | |
|     //                      ([0, output_size.x), [0, output_size.y))
 | |
|     //  Returns:    Same as get_quad_vector_naive() (see that first), but it's
 | |
|     //              correct even if the 2x2 pixel quad starts at an odd pixel,
 | |
|     //              which can occur at odd resolutions.
 | |
|     //  Caveats:    This function requires less information than the version
 | |
|     //              taking a float4, but it's potentially slower.
 | |
|     //  Do screen coords increase with or against uv?  Get the direction
 | |
|     //  with respect to (uv.x, uv.y) for (screen.x, screen.y) in {-1, 1}.
 | |
|     float2 screen_uv_mirror = float2(ddx(output_pixel_num_wrt_uv.x),
 | |
|                                         ddy(output_pixel_num_wrt_uv.y));
 | |
|     float2 pixel_odd_wrt_uv = frac(output_pixel_num_wrt_uv * 0.5) * 2.0;
 | |
|     float2 quad_vector_uv_guess = (pixel_odd_wrt_uv - float2(0.5, 0.5)) * 2.0;
 | |
|     float2 quad_vector_screen_guess = quad_vector_uv_guess * screen_uv_mirror;
 | |
|     //  If quad_vector_screen_guess doesn't increase with screen xy, we know
 | |
|     //  the 2x2 pixel quad starts at an odd pixel:
 | |
|     float2 odd_start_mirror = 0.5 * float2(ddx(quad_vector_screen_guess.x),
 | |
|                                                 ddy(quad_vector_screen_guess.y));
 | |
|     float4 quad_vector_guess = float4(
 | |
|         quad_vector_uv_guess, quad_vector_screen_guess);
 | |
|     return quad_vector_guess * odd_start_mirror.xyxy;
 | |
| }
 | |
| 
 | |
| void quad_gather(float4 quad_vector, float4 curr,
 | |
|     out float4 adjx, out float4 adjy, out float4 diag)
 | |
| {
 | |
|     //  Requires:   1.) ddx() and ddy() are present in the current Cg profile.
 | |
|     //              2.) The GPU driver is using fine/high-quality derivatives.
 | |
|     //              3.) quad_vector describes the current fragment's location in
 | |
|     //                  its 2x2 pixel quad using get_quad_vector()'s conventions.
 | |
|     //              4.) curr is any vector you wish to get neighboring values of.
 | |
|     //  Returns:    Values of an input vector (curr) at neighboring fragments
 | |
|     //              adjacent x, adjacent y, and diagonal (via out parameters).
 | |
|     adjx = curr - ddx(curr) * quad_vector.z;
 | |
|     adjy = curr - ddy(curr) * quad_vector.w;
 | |
|     diag = adjx - ddy(adjx) * quad_vector.w;
 | |
| }
 | |
| 
 | |
| void quad_gather(float4 quad_vector, float3 curr,
 | |
|     out float3 adjx, out float3 adjy, out float3 diag)
 | |
| {
 | |
|     //  Float3 version
 | |
|     adjx = curr - ddx(curr) * quad_vector.z;
 | |
|     adjy = curr - ddy(curr) * quad_vector.w;
 | |
|     diag = adjx - ddy(adjx) * quad_vector.w;
 | |
| }
 | |
| 
 | |
| void quad_gather(float4 quad_vector, float2 curr,
 | |
|     out float2 adjx, out float2 adjy, out float2 diag)
 | |
| {
 | |
|     //  Float2 version
 | |
|     adjx = curr - ddx(curr) * quad_vector.z;
 | |
|     adjy = curr - ddy(curr) * quad_vector.w;
 | |
|     diag = adjx - ddy(adjx) * quad_vector.w;
 | |
| }
 | |
| 
 | |
| float4 quad_gather(float4 quad_vector, float curr)
 | |
| {
 | |
|     //  Float version:
 | |
|     //  Returns:    return.x == current
 | |
|     //              return.y == adjacent x
 | |
|     //              return.z == adjacent y
 | |
|     //              return.w == diagonal
 | |
|     float4 all = float4(curr, curr, curr, curr);
 | |
|     all.y = all.x - ddx(all.x) * quad_vector.z;
 | |
|     all.zw = all.xy - ddy(all.xy) * quad_vector.w;
 | |
|     return all;
 | |
| }
 | |
| 
 | |
| float4 quad_gather_sum(float4 quad_vector, float4 curr)
 | |
| {
 | |
|     //  Requires:   Same as quad_gather()
 | |
|     //  Returns:    Sum of an input vector (curr) at all fragments in a quad.
 | |
|     float4 adjx, adjy, diag;
 | |
|     quad_gather(quad_vector, curr, adjx, adjy, diag);
 | |
|     return (curr + adjx + adjy + diag);
 | |
| }
 | |
| 
 | |
| float3 quad_gather_sum(float4 quad_vector, float3 curr)
 | |
| {
 | |
|     //  Float3 version:
 | |
|     float3 adjx, adjy, diag;
 | |
|     quad_gather(quad_vector, curr, adjx, adjy, diag);
 | |
|     return (curr + adjx + adjy + diag);
 | |
| }
 | |
| 
 | |
| float2 quad_gather_sum(float4 quad_vector, float2 curr)
 | |
| {
 | |
|     //  Float2 version:
 | |
|     float2 adjx, adjy, diag;
 | |
|     quad_gather(quad_vector, curr, adjx, adjy, diag);
 | |
|     return (curr + adjx + adjy + diag);
 | |
| }
 | |
| 
 | |
| float quad_gather_sum(float4 quad_vector, float curr)
 | |
| {
 | |
|     //  Float version:
 | |
|     float4 all_values = quad_gather(quad_vector, curr);
 | |
|     return (all_values.x + all_values.y + all_values.z + all_values.w);
 | |
| }
 | |
| 
 | |
| bool fine_derivatives_working(float4 quad_vector, float4 curr)
 | |
| {
 | |
|     //  Requires:   1.) ddx() and ddy() are present in the current Cg profile.
 | |
|     //              2.) quad_vector describes the current fragment's location in
 | |
|     //                  its 2x2 pixel quad using get_quad_vector()'s conventions.
 | |
|     //              3.) curr must be a test vector with non-constant derivatives
 | |
|     //                  (its value should change nonlinearly across fragments).
 | |
|     //  Returns:    true if fine/hybrid/high-quality derivatives are used, or
 | |
|     //              false if coarse derivatives are used or inconclusive
 | |
|     //  Usage:      Test whether quad-pixel communication is working!
 | |
|     //  Method:     We can confirm fine derivatives are used if the following
 | |
|     //              holds (ever, for any value at any fragment):
 | |
|     //                  (ddy(curr) != ddy(adjx)) or (ddx(curr) != ddx(adjy))
 | |
|     //              The more values we test (e.g. test a float4 two ways), the
 | |
|     //              easier it is to demonstrate fine derivatives are working.
 | |
|     //  TODO: Check for floating point exact comparison issues!
 | |
|     float4 ddx_curr = ddx(curr);
 | |
|     float4 ddy_curr = ddy(curr);
 | |
|     float4 adjx = curr - ddx_curr * quad_vector.z;
 | |
|     float4 adjy = curr - ddy_curr * quad_vector.w;
 | |
|     bool ddy_different = any(bool4(ddy_curr.x != ddy(adjx).x, ddy_curr.y != ddy(adjx).y, ddy_curr.z != ddy(adjx).z, ddy_curr.w != ddy(adjx).w));
 | |
|     bool ddx_different = any(bool4(ddx_curr.x != ddx(adjy).x, ddx_curr.y != ddx(adjy).y, ddx_curr.z != ddx(adjy).z, ddx_curr.w != ddx(adjy).w));
 | |
|     return any(bool2(ddy_different, ddx_different));
 | |
| }
 | |
| 
 | |
| bool fine_derivatives_working_fast(float4 quad_vector, float curr)
 | |
| {
 | |
|     //  Requires:   Same as fine_derivatives_working()
 | |
|     //  Returns:    Same as fine_derivatives_working()
 | |
|     //  Usage:      This is faster than fine_derivatives_working() but more
 | |
|     //              likely to return false negatives, so it's less useful for
 | |
|     //              offline testing/debugging.  It's also useless as the basis
 | |
|     //              for dynamic runtime branching as of May 2014: Derivatives
 | |
|     //              (and quad-pixel communication) are currently disallowed in
 | |
|     //              branches.  However, future GPU's may allow you to use them
 | |
|     //              in dynamic branches if you promise the branch condition
 | |
|     //              evaluates the same for every fragment in the quad (and/or if
 | |
|     //              the driver enforces that promise by making a single fragment
 | |
|     //              control branch decisions).  If that ever happens, this
 | |
|     //              version may become a more economical choice.
 | |
|     float ddx_curr = ddx(curr);
 | |
|     float ddy_curr = ddy(curr);
 | |
|     float adjx = curr - ddx_curr * quad_vector.z;
 | |
|     return (ddy_curr != ddy(adjx));
 | |
| }
 | |
| 
 | |
| #endif  //  _QUAD_PIXEL_COMMUNICATION_H | 
