Supermodel/Src/Graphics/New3D/R3DShaderTriangles.h
Ian Curtis 40c8259130 Rewrite the whole project for GL4+. I figured if we removed the limitation of a legacy rendering API we could improve things a bit. With GL4+ we can do unsigned integer math in the shaders. This allows us to upload a direct copy of the real3d texture sheet, and texture directly from this memory given the x/y pos and type. This massively simplifies the binding and invalidation code. Also the crazy corner cases will work because it essentially works the same way as the original hardware.
The standard triangle render requires gl 4.1 core, so should work on mac. The quad renderer runs on 4.5 core. The legacy renderer should still work, and when enabled a regular opengl context will be created, which allows functions marked depreciated in the core profiles to still work. This will only work in windows/linux I think. Apple doesn't support this.

A GL 4.1 GPU is now the min required spec. Sorry if you have an OLDER gpu. GL 4.1 is over 12 years old now.

This is a big update so I apologise in advance if I accidently broke something :]
2022-11-07 21:33:01 +00:00

573 lines
16 KiB
C

#ifndef _R3DSHADERTRIANGLES_H_
#define _R3DSHADERTRIANGLES_H_
static const char *vertexShaderR3D = R"glsl(
#version 410 core
// uniforms
uniform float modelScale;
uniform mat4 modelMat;
uniform mat4 projMat;
uniform bool translatorMap;
// attributes
in vec4 inVertex;
in vec3 inNormal;
in vec2 inTexCoord;
in vec4 inColour;
in vec3 inFaceNormal; // used to emulate r3d culling
in float inFixedShade;
// outputs to fragment shader
out vec3 fsViewVertex;
out vec3 fsViewNormal; // per vertex normal vector
out vec2 fsTexCoord;
out vec4 fsColor;
out float fsDiscard; // can't have varying bool (glsl spec)
out float fsFixedShade;
vec4 GetColour(vec4 colour)
{
vec4 c = colour;
if(translatorMap) {
c.rgb *= 16.0;
}
return c;
}
float CalcBackFace(in vec3 viewVertex)
{
vec3 vt = viewVertex - vec3(0.0);
vec3 vn = (mat3(modelMat) * inFaceNormal);
// dot product of face normal with view direction
return dot(vt, vn);
}
void main(void)
{
fsViewVertex = vec3(modelMat * inVertex);
fsViewNormal = (mat3(modelMat) * inNormal) / modelScale;
fsDiscard = CalcBackFace(fsViewVertex);
fsColor = GetColour(inColour);
fsTexCoord = inTexCoord;
fsFixedShade = inFixedShade;
gl_Position = projMat * modelMat * inVertex;
}
)glsl";
static const char *fragmentShaderR3D = R"glsl(
#version 410 core
uniform usampler2D tex1; // entire texture sheet
// texturing
uniform bool textureEnabled;
uniform bool microTexture;
uniform float microTextureScale;
uniform int microTextureID;
uniform ivec4 baseTexInfo; // x/y are x,y positions in the texture sheet. z/w are with and height
uniform int baseTexType;
uniform bool textureInverted;
uniform bool textureAlpha;
uniform bool alphaTest;
uniform bool discardAlpha;
uniform ivec2 textureWrapMode;
// general
uniform vec3 fogColour;
uniform vec4 spotEllipse; // spotlight ellipse position: .x=X position (screen coordinates), .y=Y position, .z=half-width, .w=half-height)
uniform vec2 spotRange; // spotlight Z range: .x=start (viewspace coordinates), .y=limit
uniform vec3 spotColor; // spotlight RGB color
uniform vec3 spotFogColor; // spotlight RGB color on fog
uniform vec3 lighting[2]; // lighting state (lighting[0] = sun direction, lighting[1].x,y = diffuse, ambient intensities from 0-1.0)
uniform bool lightEnabled; // lighting enabled (1.0) or luminous (0.0), drawn at full intensity
uniform bool sunClamp; // not used by daytona and la machine guns
uniform bool intensityClamp; // some games such as daytona and
uniform bool specularEnabled; // specular enabled
uniform float specularValue; // specular coefficient
uniform float shininess; // specular shininess
uniform float fogIntensity;
uniform float fogDensity;
uniform float fogStart;
uniform float fogAttenuation;
uniform float fogAmbient;
uniform bool fixedShading;
uniform int hardwareStep;
//interpolated inputs from vertex shader
in vec3 fsViewVertex;
in vec3 fsViewNormal; // per vertex normal vector
in vec4 fsColor;
in vec2 fsTexCoord;
in float fsDiscard;
in float fsFixedShade;
//outputs
out vec4 outColor;
vec4 ExtractColour(int type, uint value)
{
vec4 c = vec4(0.0);
if(type==0) { // T1RGB5
c.r = float((value >> 10) & 0x1F) / 31.0;
c.g = float((value >> 5 ) & 0x1F) / 31.0;
c.b = float((value ) & 0x1F) / 31.0;
c.a = 1.0 - float((value >> 15) & 0x1);
}
else if(type==1) { // Interleaved A4L4 (low byte)
c.rgb = vec3(float(value&0xF) / 15.0);
c.a = float((value >> 4) & 0xF) / 15.0;
}
else if(type==2) {
c.a = float(value&0xF) / 15.0;
c.rgb = vec3(float((value >> 4) & 0xF) / 15.0);
}
else if(type==3) {
c.rgb = vec3(float((value>>8)&0xF) / 15.0);
c.a = float((value >> 12) & 0xF) / 15.0;
}
else if(type==4) {
c.a = float((value>>8)&0xF) / 15.0;
c.rgb = vec3(float((value >> 12) & 0xF) / 15.0);
}
else if(type==5) {
c = vec4(float(value&0xFF) / 255.0);
if(c.a==1.0) { c.a = 0.0; }
else { c.a = 1.0; }
}
else if(type==6) {
c = vec4(float((value>>8)&0xFF) / 255.0);
if(c.a==1.0) { c.a = 0.0; }
else { c.a = 1.0; }
}
else if(type==7) { // RGBA4
c.r = float((value>>12)&0xF) / 15.0;
c.g = float((value>> 8)&0xF) / 15.0;
c.b = float((value>> 4)&0xF) / 15.0;
c.a = float((value>> 0)&0xF) / 15.0;
}
else if(type==8) { // low byte, low nibble
c = vec4(float(value&0xF) / 15.0);
if(c.a==1.0) { c.a = 0.0; }
else { c.a = 1.0; }
}
else if(type==9) { // low byte, high nibble
c = vec4(float((value>>4)&0xF) / 15.0);
if(c.a==1.0) { c.a = 0.0; }
else { c.a = 1.0; }
}
else if(type==10) { // high byte, low nibble
c = vec4(float((value>>8)&0xF) / 15.0);
if(c.a==1.0) { c.a = 0.0; }
else { c.a = 1.0; }
}
else if(type==11) { // high byte, high nibble
c = vec4(float((value>>12)&0xF) / 15.0);
if(c.a==1.0) { c.a = 0.0; }
else { c.a = 1.0; }
}
return c;
}
ivec2 GetTexturePosition(int level, ivec2 pos)
{
const int mipXBase[] = int[](0, 1024, 1536, 1792, 1920, 1984, 2016, 2032, 2040, 2044, 2046, 2047);
const int mipYBase[] = int[](0, 512, 768, 896, 960, 992, 1008, 1016, 1020, 1022, 1023);
int mipDivisor = 1 << level;
int page = pos.y / 1024;
pos.y -= (page * 1024); // remove page from tex y
ivec2 retPos;
retPos.x = mipXBase[level] + (pos.x / mipDivisor);
retPos.y = mipYBase[level] + (pos.y / mipDivisor);
retPos.y += (page * 1024); // add page back to tex y
return retPos;
}
ivec2 GetTextureSize(int level, ivec2 size)
{
int mipDivisor = 1 << level;
return size / mipDivisor;
}
ivec2 GetMicroTexturePos(int id)
{
const int xCoords[8] = int[](0, 0, 128, 128, 0, 0, 128, 128);
const int yCoords[8] = int[](0, 128, 0, 128, 256, 384, 256, 384);
return ivec2(xCoords[id],yCoords[id]);
}
int GetPage(int yCoord)
{
return yCoord / 1024;
}
int GetNextPage(int yCoord)
{
return (GetPage(yCoord) + 1) & 1;
}
int GetNextPageOffset(int yCoord)
{
return GetNextPage(yCoord) * 1024;
}
// wrapping tex coords would be super easy but we combined tex sheets so have to handle wrap around between sheets
// hardware testing would be useful because i don't know exactly what happens if you try to read outside the texture sheet
// wrap around is a good guess
ivec2 WrapTexCoords(ivec2 pos, ivec2 coordinate)
{
ivec2 newCoord;
newCoord.x = coordinate.x & 2047;
newCoord.y = coordinate.y;
int page = GetPage(pos.y);
newCoord.y -= (page * 1024); // remove page
newCoord.y &= 1023; // wrap around in the same sheet
newCoord.y += (page * 1024); // add page back
return newCoord;
}
float mip_map_level(in vec2 texture_coordinate) // in texel units
{
vec2 dx_vtc = dFdx(texture_coordinate);
vec2 dy_vtc = dFdy(texture_coordinate);
float delta_max_sqr = max(dot(dx_vtc, dx_vtc), dot(dy_vtc, dy_vtc));
float mml = 0.5 * log2(delta_max_sqr);
return max( 0, mml );
}
float LinearTexLocations(int wrapMode, float size, float u, out float u0, out float u1)
{
float texelSize = 1.0 / size;
float halfTexelSize = 0.5 / size;
if(wrapMode==0) { // repeat
u = (u * size) - 0.5;
u0 = (floor(u) + 0.5) / size; // + 0.5 offset added to push us into the centre of a pixel, without we'll get rounding errors
u0 = fract(u0);
u1 = u0 + texelSize;
u1 = fract(u1);
return fract(u); // return weight
}
else if(wrapMode==1) { // repeat + clamp
u = fract(u); // must force into 0-1 to start
u = (u * size) - 0.5;
u0 = (floor(u) + 0.5) / size; // + 0.5 offset added to push us into the centre of a pixel, without we'll get rounding errors
u1 = u0 + texelSize;
if(u0 < 0.0) u0 = 0.0;
if(u1 >= 1.0) u1 = 1.0 - halfTexelSize;
return fract(u); // return weight
}
else { // mirror + mirror clamp - both are the same since the edge pixels are repeated anyway
float odd = floor(mod(u, 2.0)); // odd values are mirrored
if(odd > 0.0) {
u = 1.0 - fract(u);
}
else {
u = fract(u);
}
u = (u * size) - 0.5;
u0 = (floor(u) + 0.5) / size; // + 0.5 offset added to push us into the centre of a pixel, without we'll get rounding errors
u1 = u0 + texelSize;
if(u0 < 0.0) u0 = 0.0;
if(u1 >= 1.0) u1 = 1.0 - halfTexelSize;
return fract(u); // return weight
}
}
vec4 texBiLinear(usampler2D texSampler, ivec2 wrapMode, vec2 texSize, ivec2 texPos, vec2 texCoord)
{
float tx[2], ty[2];
float a = LinearTexLocations(wrapMode.s, texSize.x, texCoord.x, tx[0], tx[1]);
float b = LinearTexLocations(wrapMode.t, texSize.y, texCoord.y, ty[0], ty[1]);
vec4 p0q0 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[0],ty[0]) * texSize + texPos)), 0).r);
vec4 p1q0 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[1],ty[0]) * texSize + texPos)), 0).r);
vec4 p0q1 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[0],ty[1]) * texSize + texPos)), 0).r);
vec4 p1q1 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[1],ty[1]) * texSize + texPos)), 0).r);
if(alphaTest) {
if(p0q0.a > p1q0.a) { p1q0.rgb = p0q0.rgb; }
if(p0q0.a > p0q1.a) { p0q1.rgb = p0q0.rgb; }
if(p1q0.a > p0q0.a) { p0q0.rgb = p1q0.rgb; }
if(p1q0.a > p1q1.a) { p1q1.rgb = p1q0.rgb; }
if(p0q1.a > p0q0.a) { p0q0.rgb = p0q1.rgb; }
if(p0q1.a > p1q1.a) { p1q1.rgb = p0q1.rgb; }
if(p1q1.a > p0q1.a) { p0q1.rgb = p1q1.rgb; }
if(p1q1.a > p1q0.a) { p1q0.rgb = p1q1.rgb; }
}
// Interpolation in X direction.
vec4 pInterp_q0 = mix( p0q0, p1q0, a ); // Interpolates top row in X direction.
vec4 pInterp_q1 = mix( p0q1, p1q1, a ); // Interpolates bottom row in X direction.
return mix( pInterp_q0, pInterp_q1, b ); // Interpolate in Y direction.
}
vec4 textureR3D(usampler2D texSampler, ivec2 wrapMode, ivec2 texSize, ivec2 texPos, vec2 texCoord)
{
float numLevels = floor(log2(min(float(texSize.x), float(texSize.y)))); // r3d only generates down to 1:1 for square textures, otherwise its the min dimension
float fLevel = min(mip_map_level(texCoord * vec2(texSize)), numLevels);
if(alphaTest) fLevel *= 0.5;
else fLevel *= 0.8;
int iLevel = int(fLevel);
ivec2 texPos0 = GetTexturePosition(iLevel,texPos);
ivec2 texPos1 = GetTexturePosition(iLevel+1,texPos);
ivec2 texSize0 = GetTextureSize(iLevel, texSize);
ivec2 texSize1 = GetTextureSize(iLevel+1, texSize);
vec4 texLevel0 = texBiLinear(texSampler, wrapMode, vec2(texSize0), texPos0, texCoord);
vec4 texLevel1 = texBiLinear(texSampler, wrapMode, vec2(texSize1), texPos1, texCoord);
return mix(texLevel0, texLevel1, fract(fLevel)); // linear blend between our mipmap levels
}
vec4 GetTextureValue()
{
vec4 tex1Data = textureR3D(tex1, textureWrapMode, ivec2(baseTexInfo.zw), ivec2(baseTexInfo.xy), fsTexCoord);
if(textureInverted) {
tex1Data.rgb = vec3(1.0) - vec3(tex1Data.rgb);
}
if (microTexture) {
vec2 scale = (vec2(baseTexInfo.zw) / 128.0) * microTextureScale;
ivec2 pos = GetMicroTexturePos(microTextureID);
// add page offset to microtexture position
pos.y += GetNextPageOffset(baseTexInfo.y);
vec4 tex2Data = textureR3D(tex1, ivec2(0), ivec2(128), pos, fsTexCoord * scale);
float lod = mip_map_level(fsTexCoord * scale * vec2(128.0));
float blendFactor = max(lod - 1.5, 0.0); // bias -1.5
blendFactor = min(blendFactor, 1.0); // clamp to max value 1
blendFactor = (blendFactor + 1.0) / 2.0; // 0.5 - 1 range
tex1Data = mix(tex2Data, tex1Data, blendFactor);
}
if (alphaTest) {
if (tex1Data.a < (32.0/255.0)) {
discard;
}
}
if(textureAlpha) {
if(discardAlpha) { // opaque 1st pass
if (tex1Data.a < 1.0) {
discard;
}
}
else { // transparent 2nd pass
if ((tex1Data.a * fsColor.a) >= 1.0) {
discard;
}
}
}
if (textureAlpha == false) {
tex1Data.a = 1.0;
}
return tex1Data;
}
void Step15Luminous(inout vec4 colour)
{
// luminous polys seem to behave very differently on step 1.5 hardware
// when fixed shading is enabled the colour is modulated by the vp ambient + fixed shade value
// when disabled it appears to be multiplied by 1.5, presumably to allow a higher range
if(hardwareStep==0x15) {
if(!lightEnabled && textureEnabled) {
if(fixedShading) {
colour.rgb *= 1.0 + fsFixedShade + lighting[1].y;
}
else {
colour.rgb *= vec3(1.5);
}
}
}
}
float CalcFog()
{
float z = -fsViewVertex.z;
float fog = fogIntensity * clamp(fogStart + z * fogDensity, 0.0, 1.0);
return fog;
}
void main()
{
vec4 tex1Data;
vec4 colData;
vec4 finalData;
vec4 fogData;
if(fsDiscard > 0) {
discard; //emulate back face culling here
}
fogData = vec4(fogColour.rgb * fogAmbient, CalcFog());
tex1Data = vec4(1.0, 1.0, 1.0, 1.0);
if(textureEnabled) {
tex1Data = GetTextureValue();
}
colData = fsColor;
Step15Luminous(colData); // no-op for step 2.0+
finalData = tex1Data * colData;
if (finalData.a < (1.0/16.0)) { // basically chuck out any totally transparent pixels value = 1/16 the smallest transparency level h/w supports
discard;
}
float ellipse;
ellipse = length((gl_FragCoord.xy - spotEllipse.xy) / spotEllipse.zw);
ellipse = pow(ellipse, 2.0); // decay rate = square of distance from center
ellipse = 1.0 - ellipse; // invert
ellipse = max(0.0, ellipse); // clamp
// Compute spotlight and apply lighting
float enable, absExtent, d, inv_r, range;
// start of spotlight
enable = step(spotRange.x, -fsViewVertex.z);
if (spotRange.y == 0.0) {
range = 0.0;
}
else {
absExtent = abs(spotRange.y);
d = spotRange.x + absExtent + fsViewVertex.z;
d = min(d, 0.0);
// slope of decay function
inv_r = 1.0 / (1.0 + absExtent);
// inverse-linear falloff
// Reference: https://imdoingitwrong.wordpress.com/2011/01/31/light-attenuation/
// y = 1 / (d/r + 1)^2
range = 1.0 / pow(d * inv_r - 1.0, 2.0);
range *= enable;
}
float lobeEffect = range * ellipse;
float lobeFogEffect = enable * ellipse;
if (lightEnabled) {
vec3 lightIntensity;
vec3 sunVector; // sun lighting vector (as reflecting away from vertex)
float sunFactor; // sun light projection along vertex normal (0.0 to 1.0)
// Sun angle
sunVector = lighting[0];
// Compute diffuse factor for sunlight
if(fixedShading) {
sunFactor = fsFixedShade;
}
else {
sunFactor = dot(sunVector, fsViewNormal);
}
// Clamp ceil, fix for upscaled models without "modelScale" defined
sunFactor = clamp(sunFactor,-1.0,1.0);
// Optional clamping, value is allowed to be negative
if(sunClamp) {
sunFactor = max(sunFactor,0.0);
}
// Total light intensity: sum of all components
lightIntensity = vec3(sunFactor*lighting[1].x + lighting[1].y); // diffuse + ambient
lightIntensity.rgb += spotColor*lobeEffect;
// Upper clamp is optional, step 1.5+ games will drive brightness beyond 100%
if(intensityClamp) {
lightIntensity = min(lightIntensity,1.0);
}
finalData.rgb *= lightIntensity;
// for now assume fixed shading doesn't work with specular
if (specularEnabled) {
float exponent, NdotL, specularFactor;
vec4 biasIndex, expIndex, multIndex;
// Always clamp floor to zero, we don't want deep black areas
NdotL = max(0.0,sunFactor);
expIndex = vec4(8.0, 16.0, 32.0, 64.0);
multIndex = vec4(2.0, 2.0, 3.0, 4.0);
biasIndex = vec4(0.95, 0.95, 1.05, 1.0);
exponent = expIndex[int(shininess)] / biasIndex[int(shininess)];
specularFactor = pow(NdotL, exponent);
specularFactor *= multIndex[int(shininess)];
specularFactor *= biasIndex[int(shininess)];
specularFactor *= specularValue;
specularFactor *= lighting[1].x;
if (colData.a < 1.0) {
/// Specular hi-light affects translucent polygons alpha channel ///
finalData.a = max(finalData.a, specularFactor);
}
finalData.rgb += vec3(specularFactor);
}
}
// Final clamp: we need it for proper shading in dimmed light and dark ambients
finalData.rgb = min(finalData.rgb, vec3(1.0));
// Spotlight on fog
vec3 lSpotFogColor = spotFogColor * fogAttenuation * fogColour.rgb * lobeFogEffect;
// Fog & spotlight applied
finalData.rgb = mix(finalData.rgb, fogData.rgb + lSpotFogColor, fogData.a);
outColor = finalData;
}
)glsl";
#endif