Rewrite the renderer a bit to spit out the finished graphics from the 3d chip on separate buffers. One buffer is for opaque pixels, and 2 more for translucent pixels. Before the frame was composited on the back buffer, which meant the tilegen had to have been drawn first. This way the images are now totally independant of the tilegen chip so can be drawn as soon as the register write 0xC is written to the tilegen.

Some games update the tilegen after the ping_ping bit has flipped at 66% of the frame, so we need to split the tilegen drawing up into two stages to get some effects to work. So having the tilegen draw independantly of the 3d chip can make this happen.
This commit is contained in:
Ian Curtis 2023-10-14 20:05:00 +01:00
parent d656643087
commit 6595b9320e
17 changed files with 510 additions and 1004 deletions

View file

@ -725,7 +725,7 @@ void CLegacy3D::InsertVertex(ModelCache *Cache, const Vertex *V, const Poly *P,
// Specular shininess
GLfloat specularCoefficient = (GLfloat) ((P->header[0]>>26) & 0x3F) * (1.0f/63.0f);
int shinyBits = (P->header[6] >> 5) & 3;
float shininess = std::exp2f(1 + shinyBits);
float shininess = std::exp2f(1.0f + shinyBits);
if (!(P->header[0]&0x80)) //|| (shininess == 0)) // bit 0x80 seems to enable specular lighting
{
specularCoefficient = 0.; // disable

View file

@ -9,6 +9,7 @@ NodeAttributes::NodeAttributes()
currentPage = 0;
currentClipStatus = Clip::INTERCEPT;
currentModelScale = 1.0f;
currentModelAlpha = 1.0;
}
bool NodeAttributes::Push()
@ -27,6 +28,7 @@ bool NodeAttributes::Push()
na.texOffsetY = currentTexOffsetY;
na.clip = currentClipStatus;
na.modelScale = currentModelScale;
na.modelAlpha = currentModelAlpha;
m_vecAttribs.emplace_back(na);
@ -46,6 +48,7 @@ bool NodeAttributes::Pop()
currentTexOffsetY = last.texOffsetY;
currentClipStatus = last.clip;
currentModelScale = last.modelScale;
currentModelAlpha = last.modelAlpha;
m_vecAttribs.pop_back();
@ -64,6 +67,7 @@ void NodeAttributes::Reset()
currentTexOffsetY = 0;
currentClipStatus = Clip::INTERCEPT;
currentModelScale = 1.0f;
currentModelAlpha = 1.0f;
m_vecAttribs.clear();
}

View file

@ -97,22 +97,24 @@ enum class Layer { colour, trans1, trans2, trans12 /*both 1&2*/, all, none };
struct Mesh
{
//helper funcs
bool Render(Layer layer)
bool Render(Layer layer, float nodeAlpha)
{
bool nAlpha = nodeAlpha < 1.0f;
switch (layer)
{
case Layer::colour:
if (polyAlpha) {
if (polyAlpha || nAlpha) {
return false;
}
break;
case Layer::trans1:
if ((!textureAlpha && !polyAlpha) || transLSelect) {
if ((!textureAlpha && !polyAlpha && !nAlpha) || transLSelect) {
return false;
}
break;
case Layer::trans2:
if ((!textureAlpha && !polyAlpha) || !transLSelect) {
if ((!textureAlpha && !polyAlpha && !nAlpha) || !transLSelect) {
return false;
}
break;
@ -183,6 +185,9 @@ struct Model
//model scale step 1.5+
float scale = 1.0f;
//node transparency
float alpha = 1.0f;
};
struct Viewport
@ -236,6 +241,7 @@ public:
int currentPage;
Clip currentClipStatus;
float currentModelScale;
float currentModelAlpha;
private:
@ -246,6 +252,7 @@ private:
int page;
Clip clip;
float modelScale;
float modelAlpha; // from culling node
};
std::vector<NodeAttribs> m_vecAttribs;
};

View file

@ -232,7 +232,7 @@ bool CNew3D::RenderScene(int priority, bool renderOverlay, Layer layer)
hasOverlay = true;
}
if (!mesh.Render(layer)) continue;
if (!mesh.Render(layer, m.alpha)) continue;
if (mesh.highPriority != renderOverlay) continue;
if (!matrixLoaded) {
@ -274,11 +274,13 @@ void CNew3D::SetRenderStates()
glDepthMask (GL_TRUE);
glActiveTexture (GL_TEXTURE0);
glDisable (GL_CULL_FACE); // we'll emulate this in the shader
glDisable (GL_BLEND);
glStencilFunc (GL_EQUAL, 0, 0xFF); // basically stencil test passes if the value is zero
glStencilOp (GL_KEEP, GL_INCR, GL_INCR); // if the stencil test passes, we increment the value
glStencilMask (0xFF);
glBlendFunc (GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
glDisable (GL_BLEND);
}
void CNew3D::DisableRenderStates()
@ -313,7 +315,6 @@ void CNew3D::RenderFrame(void)
m_nodeAttribs.Reset();
RenderViewport(0x800000); // build model structure
DrawScrollFog(); // fog layer if applicable must be drawn here
m_vbo.Bind(true);
m_vbo.BufferSubData(MAX_ROM_VERTS*sizeof(FVertex), m_polyBufferRam.size()*sizeof(FVertex), m_polyBufferRam.data()); // upload all the dynamic data to GPU in one go
@ -339,8 +340,10 @@ void CNew3D::RenderFrame(void)
}
}
m_r3dFrameBuffers.SetFBO(Layer::trans12);
glClear(GL_COLOR_BUFFER_BIT); // wipe both trans layers
m_r3dFrameBuffers.SetFBO(Layer::colour); // colour will draw to all 3 buffers. For regular opaque pixels the transparent layers will be essentially masked
glClear(GL_COLOR_BUFFER_BIT);
DrawScrollFog(); // fog layer if applicable must be drawn here
for (int pri = 0; pri <= 3; pri++) {
@ -350,33 +353,31 @@ void CNew3D::RenderFrame(void)
bool renderOverlay = (i == 1);
m_r3dFrameBuffers.SetFBO(Layer::colour);
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
SetRenderStates();
m_r3dShader.DiscardAlpha(true); // discard all translucent pixels in opaque pass
m_r3dFrameBuffers.SetFBO(Layer::colour);
glClear(GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
m_r3dShader.DiscardAlpha(true);
m_r3dShader.SetLayer(Layer::colour);
bool hasOverlay = RenderScene(pri, renderOverlay, Layer::colour);
if (!renderOverlay) {
ProcessLos(pri);
}
DisableRenderStates();
glDepthFunc(GL_LESS);
m_r3dFrameBuffers.DrawOverTransLayers(); // mask trans layer with opaque pixels
m_r3dFrameBuffers.CompositeBaseLayer(); // copy opaque pixels to back buffer
m_r3dShader.DiscardAlpha(false);
SetRenderStates();
glDepthFunc(GL_LESS); // alpha polys seem to use gl_less (ocean hunter)
m_r3dShader.DiscardAlpha (false); // render only translucent pixels
m_r3dFrameBuffers.StoreDepth (); // save depth buffer for 1st trans pass
m_r3dFrameBuffers.StoreDepth();
m_r3dShader.SetLayer(Layer::trans1);
m_r3dFrameBuffers.SetFBO(Layer::trans1);
RenderScene(pri, renderOverlay, Layer::trans1);
m_r3dFrameBuffers.RestoreDepth (); // restore depth buffer, trans layers don't seem to depth test against each other
m_r3dFrameBuffers.RestoreDepth();
m_r3dShader.SetLayer(Layer::trans2);
m_r3dFrameBuffers.SetFBO(Layer::trans2);
RenderScene(pri, renderOverlay, Layer::trans2);
@ -386,7 +387,8 @@ void CNew3D::RenderFrame(void)
}
}
m_r3dFrameBuffers.CompositeAlphaLayer();
m_r3dFrameBuffers.SetFBO(Layer::none);
m_r3dFrameBuffers.Draw();
}
void CNew3D::BeginFrame(void)
@ -475,6 +477,7 @@ bool CNew3D::DrawModel(UINT32 modelAddr)
m->textureOffsetY = m_nodeAttribs.currentTexOffsetY;
m->page = m_nodeAttribs.currentPage;
m->scale = m_nodeAttribs.currentModelScale;
m->alpha = m_nodeAttribs.currentModelAlpha;
if (!cached) {
CacheModel(m, modelAddress);
@ -588,6 +591,8 @@ void CNew3D::DescendCullingNode(UINT32 addr)
}
}
m_nodeAttribs.currentModelAlpha = 1; // TODO fade out if required
// Apply matrix and translation
m_modelMat.PushMatrix();
@ -1387,23 +1392,6 @@ bool CNew3D::IsVROMModel(UINT32 modelAddr)
return modelAddr >= 0x100000;
}
void CNew3D::CalcTexOffset(int offX, int offY, int page, int x, int y, int& newX, int& newY)
{
newX = (x + offX) & 2047; // wrap around 2048, shouldn't be required
int oldPage = y / 1024;
y -= (oldPage * 1024); // remove page from tex y
// calc newY with wrap around, wraps around in the same sheet, not into another memory sheet
newY = (y + offY) & 1023;
// add page to Y
newY += ((oldPage + page) & 1) * 1024; // max page 0-1
}
void CNew3D::CalcFrustumPlanes(Plane p[5], const float* matrix)
{
// Left Plane

View file

@ -223,8 +223,6 @@ private:
void TranslateLosPosition(int inX, int inY, int& outX, int& outY);
bool ProcessLos(int priority);
void CalcTexOffset(int offX, int offY, int page, int x, int y, int& newX, int& newY);
/*
* Data
*/

View file

@ -1,7 +1,4 @@
#include "R3DFrameBuffers.h"
#include "Mat4.h"
#define countof(a) (sizeof(a)/sizeof(*(a)))
namespace New3D {
@ -23,7 +20,6 @@ R3DFrameBuffers::R3DFrameBuffers()
AllocShaderTrans();
AllocShaderBase();
AllocShaderWipe();
glGenVertexArrays(1, &m_vao);
glBindVertexArray(m_vao);
@ -36,7 +32,7 @@ R3DFrameBuffers::~R3DFrameBuffers()
DestroyFBO();
m_shaderTrans.UnloadShaders();
m_shaderBase.UnloadShaders();
m_shaderWipe.UnloadShaders();
if (m_vao) {
glDeleteVertexArrays(1, &m_vao);
m_vao = 0;
@ -165,26 +161,24 @@ void R3DFrameBuffers::SetFBO(Layer layer)
switch (layer)
{
case Layer::colour:
case Layer::trans1:
case Layer::trans2:
{
glBindFramebuffer(GL_FRAMEBUFFER, m_frameBufferID);
GLenum buffers[] = { GL_COLOR_ATTACHMENT0 + (GLenum)layer };
glDrawBuffers(countof(buffers), buffers);
break;
}
case Layer::trans12:
{
glBindFramebuffer(GL_FRAMEBUFFER, m_frameBufferID);
GLenum buffers[] = { GL_COLOR_ATTACHMENT1, GL_COLOR_ATTACHMENT2 };
glDrawBuffers(countof(buffers), buffers);
break;
}
case Layer::all:
{
glBindFramebuffer(GL_FRAMEBUFFER, m_frameBufferID);
GLenum buffers[] = { GL_COLOR_ATTACHMENT0, GL_COLOR_ATTACHMENT1, GL_COLOR_ATTACHMENT2 };
glDrawBuffers(countof(buffers), buffers);
glDrawBuffers((GLsizei)std::size(buffers), buffers);
break;
}
case Layer::trans1:
{
glBindFramebuffer(GL_FRAMEBUFFER, m_frameBufferID);
GLenum buffers[] = { GL_NONE, GL_COLOR_ATTACHMENT1, GL_NONE };
glDrawBuffers((GLsizei)std::size(buffers), buffers);
break;
}
case Layer::trans2:
{
glBindFramebuffer(GL_FRAMEBUFFER, m_frameBufferID);
GLenum buffers[] = { GL_NONE, GL_NONE, GL_COLOR_ATTACHMENT2 };
glDrawBuffers((GLsizei)std::size(buffers), buffers);
break;
}
case Layer::none:
@ -302,58 +296,6 @@ void R3DFrameBuffers::AllocShaderTrans()
m_shaderTrans.uniformLoc[1] = m_shaderTrans.GetUniformLocation("tex2");
}
void R3DFrameBuffers::AllocShaderWipe()
{
const char *vertexShader = R"glsl(
#version 410 core
// outputs
out vec2 fsTexCoord;
void main(void)
{
const vec4 vertices[] = vec4[](vec4(-1.0, -1.0, 0.0, 1.0),
vec4(-1.0, 1.0, 0.0, 1.0),
vec4( 1.0, -1.0, 0.0, 1.0),
vec4( 1.0, 1.0, 0.0, 1.0));
fsTexCoord = (vertices[gl_VertexID % 4].xy + 1.0) / 2.0;
gl_Position = vertices[gl_VertexID % 4];
}
)glsl";
const char *fragmentShader = R"glsl(
#version 410 core
uniform sampler2D texColor; // base colour layer
in vec2 fsTexCoord;
// outputs
layout (location = 0) out vec4 fragColor0;
layout (location = 1) out vec4 fragColor1;
void main()
{
vec4 colBase = texture(texColor, fsTexCoord);
if(colBase.a == 0.0) {
discard; // no colour pixels have been written
}
fragColor0 = vec4(0.0); // wipe these parts of the alpha buffer
fragColor1 = vec4(0.0); // since they have been overwritten by the next priority layer
}
)glsl";
m_shaderWipe.LoadShaders(vertexShader, fragmentShader);
m_shaderWipe.uniformLoc[0] = m_shaderWipe.GetUniformLocation("texColor");
}
void R3DFrameBuffers::Draw()
{
SetFBO (Layer::none); // make sure to draw on the back buffer
@ -362,7 +304,7 @@ void R3DFrameBuffers::Draw()
glDisable (GL_CULL_FACE);
glDisable (GL_BLEND);
for (int i = 0; i < countof(m_texIDs); i++) { // bind our textures to correct texture units
for (int i = 0; i < (int)std::size(m_texIDs); i++) { // bind our textures to correct texture units
glActiveTexture(GL_TEXTURE0 + i);
glBindTexture(GL_TEXTURE_2D, m_texIDs[i]);
}
@ -381,77 +323,10 @@ void R3DFrameBuffers::Draw()
glBindVertexArray (0);
}
void R3DFrameBuffers::CompositeBaseLayer()
{
SetFBO(Layer::none); // make sure to draw on the back buffer
glViewport(0, 0, m_width, m_height); // cover the entire screen
glDisable(GL_DEPTH_TEST); // disable depth testing / writing
glDisable(GL_CULL_FACE);
glDisable(GL_BLEND);
for (int i = 0; i < countof(m_texIDs); i++) { // bind our textures to correct texture units
glActiveTexture(GL_TEXTURE0 + i);
glBindTexture(GL_TEXTURE_2D, m_texIDs[i]);
}
glActiveTexture(GL_TEXTURE0);
glBindVertexArray(m_vao);
DrawBaseLayer();
glBindVertexArray(0);
}
void R3DFrameBuffers::CompositeAlphaLayer()
{
SetFBO(Layer::none); // make sure to draw on the back buffer
glViewport(0, 0, m_width, m_height); // cover the entire screen
glDisable(GL_DEPTH_TEST); // disable depth testing / writing
glDisable(GL_CULL_FACE);
for (int i = 0; i < countof(m_texIDs); i++) { // bind our textures to correct texture units
glActiveTexture(GL_TEXTURE0 + i);
glBindTexture(GL_TEXTURE_2D, m_texIDs[i]);
}
glActiveTexture(GL_TEXTURE0);
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
glEnable(GL_BLEND);
glBindVertexArray(m_vao);
DrawAlphaLayer();
glDisable(GL_BLEND);
glBindVertexArray(0);
}
void R3DFrameBuffers::DrawOverTransLayers()
{
SetFBO(Layer::trans12); // need to write to both layers
glViewport (0, 0, m_width, m_height); // cover the entire screen
glDisable (GL_DEPTH_TEST); // disable depth testing / writing
glDisable (GL_CULL_FACE);
glDisable (GL_BLEND);
glActiveTexture (GL_TEXTURE0);
glBindTexture (GL_TEXTURE_2D, m_texIDs[0]);
glBindVertexArray(m_vao);
m_shaderWipe.EnableShader();
glUniform1i(m_shaderWipe.uniformLoc[0], 0);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
m_shaderWipe.DisableShader();
glBindVertexArray(0);
}
void R3DFrameBuffers::DrawBaseLayer()
{
m_shaderBase.EnableShader();
glUniform1i(m_shaderTrans.uniformLoc[0], 0); // to do check this
glUniform1i(m_shaderTrans.uniformLoc[0], 0);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);

View file

@ -15,9 +15,6 @@ public:
~R3DFrameBuffers();
void Draw(); // draw and composite the transparent layers
void CompositeBaseLayer();
void CompositeAlphaLayer();
void DrawOverTransLayers(); // opaque pixels in next priority layer need to wipe trans pixels
bool CreateFBO(int width, int height);
void DestroyFBO();
@ -33,7 +30,6 @@ private:
GLuint CreateTexture(int width, int height);
void AllocShaderTrans();
void AllocShaderBase();
void AllocShaderWipe();
void DrawBaseLayer();
void DrawAlphaLayer();
@ -50,7 +46,6 @@ private:
// shaders
GLSLShader m_shaderBase;
GLSLShader m_shaderTrans;
GLSLShader m_shaderWipe;
// vao
GLuint m_vao; // this really needed if we don't actually use vertex attribs?

View file

@ -40,7 +40,25 @@ vec3 lFogColor;
vec4 scrollFog;
// outputs
out vec4 fragColor;
layout(location = 0) out vec4 out0; // opaque
layout(location = 1) out vec4 out1; // trans layer 1
layout(location = 2) out vec4 out2; // trans layer 2
void WriteOutputs(vec4 colour)
{
vec4 blank = vec4(0.0);
if(colour.a < 1.0) { // some transparency
out0 = blank;
out1 = colour;
out2 = blank;
}
else { // opaque
out0 = colour;
out1 = blank;
out2 = blank;
}
}
void main()
{
@ -62,7 +80,7 @@ void main()
scrollFog = vec4(lFogColor + lSpotFogColor, fogColour.a);
// Final Color
fragColor = scrollFog;
WriteOutputs(scrollFog);
}
)glsl";
@ -71,7 +89,6 @@ void main()
R3DScrollFog::R3DScrollFog(const Util::Config::Node &config)
: m_config(config),
m_vao(0)
{
m_shaderProgram = 0;
m_vertexShader = 0;

View file

@ -1,6 +1,7 @@
#include "R3DShader.h"
#include "R3DShaderQuads.h"
#include "R3DShaderTriangles.h"
#include "R3DShaderCommon.h"
// having 2 sets of shaders to maintain is really less than ideal
// but hopefully not too many breaking changes at this point
@ -31,6 +32,7 @@ void R3DShader::Start()
m_fixedShading = false;
m_translatorMap = false;
m_modelScale = 1.0f;
m_nodeAlpha = 1.0f;
m_shininess = 0;
m_specularValue = 0;
m_microTexScale = 0;
@ -62,30 +64,27 @@ bool R3DShader::LoadShader(const char* vertexShader, const char* fragmentShader)
const char* gShader = "";
const char* fShader = fragmentShaderR3D;
std::string fragmentShaderCombined;
if (quads) {
vShader = vertexShaderR3DQuads;
gShader = geometryShaderR3DQuads;
fragmentShaderCombined += fragmentShaderR3DQuads1;
fragmentShaderCombined += fragmentShaderR3DQuads2;
fShader = fragmentShaderCombined.c_str();
fShader = fragmentShaderR3DQuads;
}
m_shaderProgram = glCreateProgram();
m_vertexShader = glCreateShader(GL_VERTEX_SHADER);
m_fragmentShader = glCreateShader(GL_FRAGMENT_SHADER);
glShaderSource(m_vertexShader, 1, (const GLchar **)&vShader, NULL);
glShaderSource(m_fragmentShader, 1, (const GLchar **)&fShader, NULL);
const char* shaderArray[] = { fShader, fragmentShaderR3DCommon };
glShaderSource(m_vertexShader, 1, (const GLchar **)&vShader, nullptr);
glShaderSource(m_fragmentShader, (GLsizei)std::size(shaderArray), shaderArray, nullptr);
glCompileShader(m_vertexShader);
glCompileShader(m_fragmentShader);
if (quads) {
m_geoShader = glCreateShader(GL_GEOMETRY_SHADER);
glShaderSource(m_geoShader, 1, (const GLchar **)&gShader, NULL);
glShaderSource(m_geoShader, 1, (const GLchar **)&gShader, nullptr);
glCompileShader(m_geoShader);
glAttachShader(m_shaderProgram, m_geoShader);
PrintShaderResult(m_geoShader);
@ -111,6 +110,7 @@ bool R3DShader::LoadShader(const char* vertexShader, const char* fragmentShader)
m_locBaseTexType = glGetUniformLocation(m_shaderProgram, "baseTexType");
m_locTextureInverted = glGetUniformLocation(m_shaderProgram, "textureInverted");
m_locTexWrapMode = glGetUniformLocation(m_shaderProgram, "textureWrapMode");
m_locColourLayer = glGetUniformLocation(m_shaderProgram, "colourLayer");
m_locFogIntensity = glGetUniformLocation(m_shaderProgram, "fogIntensity");
m_locFogDensity = glGetUniformLocation(m_shaderProgram, "fogDensity");
@ -134,6 +134,7 @@ bool R3DShader::LoadShader(const char* vertexShader, const char* fragmentShader)
m_locSpotColor = glGetUniformLocation(m_shaderProgram, "spotColor");
m_locSpotFogColor = glGetUniformLocation(m_shaderProgram, "spotFogColor");
m_locModelScale = glGetUniformLocation(m_shaderProgram, "modelScale");
m_locNodeAlpha = glGetUniformLocation(m_shaderProgram, "nodeAlpha");
m_locProjMat = glGetUniformLocation(m_shaderProgram, "projMat");
m_locModelMat = glGetUniformLocation(m_shaderProgram, "modelMat");
@ -338,6 +339,11 @@ void R3DShader::SetModelStates(const Model* model)
m_modelScale = model->scale;
}
if (m_dirtyModel || model->alpha != m_nodeAlpha) {
glUniform1f(m_locNodeAlpha, model->alpha);
m_nodeAlpha = model->alpha;
}
m_transX = model->textureOffsetX;
m_transY = model->textureOffsetY;
m_transPage = model->page;
@ -355,6 +361,11 @@ void R3DShader::DiscardAlpha(bool discard)
glUniform1i(m_locDiscardAlpha, discard);
}
void R3DShader::SetLayer(Layer layer)
{
glUniform1i(m_locColourLayer, (GLint)layer);
}
void R3DShader::PrintShaderResult(GLuint shader)
{
//===========

View file

@ -23,6 +23,7 @@ public:
void SetShader (bool enable = true);
GLint GetVertexAttribPos (const std::string& attrib);
void DiscardAlpha (bool discard); // use to remove alpha from texture alpha only polys for 1st pass
void SetLayer (Layer layer);
private:
@ -53,6 +54,7 @@ private:
GLint m_locTextureInverted;
GLint m_locTexWrapMode;
GLint m_locTranslatorMap;
GLint m_locColourLayer;
// cached mesh values
bool m_textured1;
@ -77,6 +79,7 @@ private:
// cached model values
float m_modelScale;
float m_nodeAlpha;
int m_transX;
int m_transY;
int m_transPage;
@ -111,6 +114,7 @@ private:
// model uniforms
GLint m_locModelScale;
GLint m_locNodeAlpha;
GLint m_locModelMat;
// global uniforms

View file

@ -0,0 +1,361 @@
#pragma once
// I altered this code a bit to make sure it always compiles with gl 4.1. Version 4.5 allows you to specify arrays differently.
// Ripped out most of the common code, people have been pushing changes to the shaders but we are ending up with diverging implementations
// between triangle / quad version which is less than ideal.
static const char* fragmentShaderR3DCommon = R"glsl(
#define LayerColour 0x0
#define LayerTrans0 0x1
#define LayerTrans1 0x2
vec4 ExtractColour(int type, uint value)
{
vec4 c = vec4(0.0);
if(type==0) { // T1RGB5
c.r = float((value >> 10) & 0x1Fu);
c.g = float((value >> 5 ) & 0x1Fu);
c.b = float((value ) & 0x1Fu);
c.rgb *= (1.0/31.0);
c.a = 1.0 - float((value >> 15) & 0x1u);
}
else if(type==1) { // Interleaved A4L4 (low byte)
c.rgb = vec3(float(value&0xFu));
c.a = float((value >> 4) & 0xFu);
c *= (1.0/15.0);
}
else if(type==2) {
c.a = float(value&0xFu);
c.rgb = vec3(float((value >> 4) & 0xFu));
c *= (1.0/15.0);
}
else if(type==3) {
c.rgb = vec3(float((value>>8)&0xFu));
c.a = float((value >> 12) & 0xFu);
c *= (1.0/15.0);
}
else if(type==4) {
c.a = float((value>>8)&0xFu);
c.rgb = vec3(float((value >> 12) & 0xFu));
c *= (1.0/15.0);
}
else if(type==5) {
c = vec4(float(value&0xFFu) / 255.0);
if(c.a==1.0) { c.a = 0.0; }
else { c.a = 1.0; }
}
else if(type==6) {
c = vec4(float((value>>8)&0xFFu) / 255.0);
if(c.a==1.0) { c.a = 0.0; }
else { c.a = 1.0; }
}
else if(type==7) { // RGBA4
c.r = float((value>>12)&0xFu);
c.g = float((value>> 8)&0xFu);
c.b = float((value>> 4)&0xFu);
c.a = float((value>> 0)&0xFu);
c *= (1.0/15.0);
}
else if(type==8) { // low byte, low nibble
c = vec4(float(value&0xFu) / 15.0);
if(c.a==1.0) { c.a = 0.0; }
else { c.a = 1.0; }
}
else if(type==9) { // low byte, high nibble
c = vec4(float((value>>4)&0xFu) / 15.0);
if(c.a==1.0) { c.a = 0.0; }
else { c.a = 1.0; }
}
else if(type==10) { // high byte, low nibble
c = vec4(float((value>>8)&0xFu) / 15.0);
if(c.a==1.0) { c.a = 0.0; }
else { c.a = 1.0; }
}
else if(type==11) { // high byte, high nibble
c = vec4(float((value>>12)&0xFu) / 15.0);
if(c.a==1.0) { c.a = 0.0; }
else { c.a = 1.0; }
}
return c;
}
int GetPage(int yCoord)
{
return yCoord / 1024;
}
int GetNextPage(int yCoord)
{
return (GetPage(yCoord) + 1) & 1;
}
int GetNextPageOffset(int yCoord)
{
return GetNextPage(yCoord) * 1024;
}
// wrapping tex coords would be super easy but we combined tex sheets so have to handle wrap around between sheets
// hardware testing would be useful because i don't know exactly what happens if you try to read outside the texture sheet
// wrap around is a good guess
ivec2 WrapTexCoords(ivec2 pos, ivec2 coordinate)
{
ivec2 newCoord;
newCoord.x = coordinate.x & 2047;
newCoord.y = coordinate.y;
int page = GetPage(pos.y);
newCoord.y -= (page * 1024); // remove page
newCoord.y &= 1023; // wrap around in the same sheet
newCoord.y += (page * 1024); // add page back
return newCoord;
}
ivec2 GetTextureSize(int level, ivec2 size)
{
int mipDivisor = 1 << level;
return size / mipDivisor;
}
ivec2 GetTexturePosition(int level, ivec2 pos)
{
const int mipXBase[] = int[](0, 1024, 1536, 1792, 1920, 1984, 2016, 2032, 2040, 2044, 2046, 2047);
const int mipYBase[] = int[](0, 512, 768, 896, 960, 992, 1008, 1016, 1020, 1022, 1023);
int mipDivisor = 1 << level;
int page = pos.y / 1024;
pos.y -= (page * 1024); // remove page from tex y
ivec2 retPos;
retPos.x = mipXBase[level] + (pos.x / mipDivisor);
retPos.y = mipYBase[level] + (pos.y / mipDivisor);
retPos.y += (page * 1024); // add page back to tex y
return retPos;
}
ivec2 GetMicroTexturePos(int id)
{
const int xCoords[8] = int[](0, 0, 128, 128, 0, 0, 128, 128);
const int yCoords[8] = int[](0, 128, 0, 128, 256, 384, 256, 384);
return ivec2(xCoords[id],yCoords[id]);
}
float mip_map_level(in vec2 texture_coordinate) // in texel units
{
vec2 dx_vtc = dFdx(texture_coordinate);
vec2 dy_vtc = dFdy(texture_coordinate);
float delta_max_sqr = max(dot(dx_vtc, dx_vtc), dot(dy_vtc, dy_vtc));
float mml = 0.5 * log2(delta_max_sqr);
return max( 0.0, mml );
}
float LinearTexLocations(int wrapMode, float size, float u, out float u0, out float u1)
{
float texelSize = 1.0 / size;
float halfTexelSize = 0.5 / size;
if(wrapMode==0) { // repeat
u = u * size - 0.5;
u0 = (floor(u) + 0.5) / size; // + 0.5 offset added to push us into the centre of a pixel, without we'll get rounding errors
u0 = fract(u0);
u1 = u0 + texelSize;
u1 = fract(u1);
return fract(u); // return weight
}
else if(wrapMode==1) { // repeat + clamp
u = fract(u); // must force into 0-1 to start
u = u * size - 0.5;
u0 = (floor(u) + 0.5) / size; // + 0.5 offset added to push us into the centre of a pixel, without we'll get rounding errors
u1 = u0 + texelSize;
if(u0 < 0.0) u0 = 0.0;
if(u1 >= 1.0) u1 = 1.0 - halfTexelSize;
return fract(u); // return weight
}
else { // mirror + mirror clamp - both are the same since the edge pixels are repeated anyway
float odd = floor(mod(u, 2.0)); // odd values are mirrored
if(odd > 0.0) {
u = 1.0 - fract(u);
}
else {
u = fract(u);
}
u = u * size - 0.5;
u0 = (floor(u) + 0.5) / size; // + 0.5 offset added to push us into the centre of a pixel, without we'll get rounding errors
u1 = u0 + texelSize;
if(u0 < 0.0) u0 = 0.0;
if(u1 >= 1.0) u1 = 1.0 - halfTexelSize;
return fract(u); // return weight
}
}
vec4 texBiLinear(usampler2D texSampler, ivec2 wrapMode, vec2 texSize, ivec2 texPos, vec2 texCoord)
{
float tx[2], ty[2];
float a = LinearTexLocations(wrapMode.s, texSize.x, texCoord.x, tx[0], tx[1]);
float b = LinearTexLocations(wrapMode.t, texSize.y, texCoord.y, ty[0], ty[1]);
vec4 p0q0 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[0],ty[0]) * texSize + texPos)), 0).r);
vec4 p1q0 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[1],ty[0]) * texSize + texPos)), 0).r);
vec4 p0q1 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[0],ty[1]) * texSize + texPos)), 0).r);
vec4 p1q1 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[1],ty[1]) * texSize + texPos)), 0).r);
if(alphaTest) {
if(p0q0.a > p1q0.a) { p1q0.rgb = p0q0.rgb; }
if(p0q0.a > p0q1.a) { p0q1.rgb = p0q0.rgb; }
if(p1q0.a > p0q0.a) { p0q0.rgb = p1q0.rgb; }
if(p1q0.a > p1q1.a) { p1q1.rgb = p1q0.rgb; }
if(p0q1.a > p0q0.a) { p0q0.rgb = p0q1.rgb; }
if(p0q1.a > p1q1.a) { p1q1.rgb = p0q1.rgb; }
if(p1q1.a > p0q1.a) { p0q1.rgb = p1q1.rgb; }
if(p1q1.a > p1q0.a) { p1q0.rgb = p1q1.rgb; }
}
// Interpolation in X direction.
vec4 pInterp_q0 = mix( p0q0, p1q0, a ); // Interpolates top row in X direction.
vec4 pInterp_q1 = mix( p0q1, p1q1, a ); // Interpolates bottom row in X direction.
return mix( pInterp_q0, pInterp_q1, b ); // Interpolate in Y direction.
}
vec4 textureR3D(usampler2D texSampler, ivec2 wrapMode, ivec2 texSize, ivec2 texPos, vec2 texCoord)
{
float numLevels = floor(log2(min(float(texSize.x), float(texSize.y)))); // r3d only generates down to 1:1 for square textures, otherwise its the min dimension
float fLevel = min(mip_map_level(texCoord * vec2(texSize)), numLevels);
if(alphaTest) fLevel *= 0.5;
else fLevel *= 0.8;
int iLevel = int(fLevel);
ivec2 texPos0 = GetTexturePosition(iLevel,texPos);
ivec2 texPos1 = GetTexturePosition(iLevel+1,texPos);
ivec2 texSize0 = GetTextureSize(iLevel, texSize);
ivec2 texSize1 = GetTextureSize(iLevel+1, texSize);
vec4 texLevel0 = texBiLinear(texSampler, wrapMode, vec2(texSize0), texPos0, texCoord);
vec4 texLevel1 = texBiLinear(texSampler, wrapMode, vec2(texSize1), texPos1, texCoord);
return mix(texLevel0, texLevel1, fract(fLevel)); // linear blend between our mipmap levels
}
vec4 GetTextureValue()
{
vec4 tex1Data = textureR3D(tex1, textureWrapMode, ivec2(baseTexInfo.zw), ivec2(baseTexInfo.xy), fsTexCoord);
if(textureInverted) {
tex1Data.rgb = vec3(1.0) - vec3(tex1Data.rgb);
}
if (microTexture) {
vec2 scale = (vec2(baseTexInfo.zw) / 128.0) * microTextureScale;
ivec2 pos = GetMicroTexturePos(microTextureID);
// add page offset to microtexture position
pos.y += GetNextPageOffset(baseTexInfo.y);
vec4 tex2Data = textureR3D(tex1, ivec2(0), ivec2(128), pos, fsTexCoord * scale);
float lod = mip_map_level(fsTexCoord * scale * vec2(128.0));
float blendFactor = max(lod - 1.5, 0.0); // bias -1.5
blendFactor = min(blendFactor, 1.0); // clamp to max value 1
blendFactor = (blendFactor + 1.0) / 2.0; // 0.5 - 1 range
tex1Data = mix(tex2Data, tex1Data, blendFactor);
}
if (alphaTest) {
if (tex1Data.a < (32.0/255.0)) {
discard;
}
}
if(textureAlpha) {
if(discardAlpha) { // opaque 1st pass
if (tex1Data.a < 1.0) {
discard;
}
}
else { // transparent 2nd pass
if ((tex1Data.a * fsColor.a) >= 1.0) {
discard;
}
}
}
if (textureAlpha == false) {
tex1Data.a = 1.0;
}
return tex1Data;
}
void Step15Luminous(inout vec4 colour)
{
// luminous polys seem to behave very differently on step 1.5 hardware
// when fixed shading is enabled the colour is modulated by the vp ambient + fixed shade value
// when disabled it appears to be multiplied by 1.5, presumably to allow a higher range
if(hardwareStep==0x15) {
if(!lightEnabled && textureEnabled) {
if(fixedShading) {
colour.rgb *= 1.0 + fsFixedShade + lighting[1].y;
}
else {
colour.rgb *= 1.5;
}
}
}
}
float CalcFog()
{
float z = -fsViewVertex.z;
float fog = fogIntensity * clamp(fogStart + z * fogDensity, 0.0, 1.0);
return fog;
}
void WriteOutputs(vec4 colour, int layer)
{
vec4 blank = vec4(0.0);
if(layer==LayerColour) {
out0 = colour;
out1 = blank;
out2 = blank;
}
else if(layer==LayerTrans0) {
out0 = blank;
out1 = colour;
out2 = blank;
}
else if(layer==LayerTrans1) {
out0 = blank;
out1 = blank;
out2 = colour;
}
}
)glsl";

View file

@ -7,6 +7,7 @@ static const char *vertexShaderR3DQuads = R"glsl(
// uniforms
uniform float modelScale;
uniform float nodeAlpha;
uniform mat4 modelMat;
uniform mat4 projMat;
uniform bool translatorMap;
@ -39,6 +40,8 @@ vec4 GetColour(vec4 colour)
c.rgb *= 16.0;
}
c.a *= nodeAlpha;
return c;
}
@ -168,7 +171,7 @@ void main(void)
)glsl";
static const char *fragmentShaderR3DQuads1 = R"glsl(
static const char *fragmentShaderR3DQuads = R"glsl(
#version 450 core
@ -207,6 +210,7 @@ uniform float fogAttenuation;
uniform float fogAmbient;
uniform bool fixedShading;
uniform int hardwareStep;
uniform int colourLayer;
// matrices (shared with vertex shader)
uniform mat4 projMat;
@ -235,7 +239,16 @@ float fsFixedShade;
vec4 fsColor;
//outputs
out vec4 outColor;
layout(location = 0) out vec4 out0; // opaque
layout(location = 1) out vec4 out1; // trans layer 1
layout(location = 2) out vec4 out2; // trans layer 2
// forward declarations (see common file)
float CalcFog();
void Step15Luminous(inout vec4 colour);
vec4 GetTextureValue();
void WriteOutputs(vec4 colour, int layer);
void QuadraticInterpolation()
{
@ -341,333 +354,6 @@ void QuadraticInterpolation()
gl_FragDepth = depth * 0.5 + 0.5;
}
vec4 ExtractColour(int type, uint value)
{
vec4 c = vec4(0.0);
if(type==0) { // T1RGB5
c.r = float((value >> 10) & 0x1Fu);
c.g = float((value >> 5 ) & 0x1Fu);
c.b = float((value ) & 0x1Fu);
c.rgb *= (1.0/31.0);
c.a = 1.0 - float((value >> 15) & 0x1u);
}
else if(type==1) { // Interleaved A4L4 (low byte)
c.rgb = vec3(float(value&0xFu));
c.a = float((value >> 4) & 0xFu);
c *= (1.0/15.0);
}
else if(type==2) {
c.a = float(value&0xFu);
c.rgb = vec3(float((value >> 4) & 0xFu));
c *= (1.0/15.0);
}
else if(type==3) {
c.rgb = vec3(float((value>>8)&0xFu));
c.a = float((value >> 12) & 0xFu);
c *= (1.0/15.0);
}
else if(type==4) {
c.a = float((value>>8)&0xFu);
c.rgb = vec3(float((value >> 12) & 0xFu));
c *= (1.0/15.0);
}
else if(type==5) {
c = vec4(float(value&0xFFu) / 255.0);
if(c.a==1.0) { c.a = 0.0; }
else { c.a = 1.0; }
}
else if(type==6) {
c = vec4(float((value>>8)&0xFFu) / 255.0);
if(c.a==1.0) { c.a = 0.0; }
else { c.a = 1.0; }
}
else if(type==7) { // RGBA4
c.r = float((value>>12)&0xFu);
c.g = float((value>> 8)&0xFu);
c.b = float((value>> 4)&0xFu);
c.a = float((value>> 0)&0xFu);
c *= (1.0/15.0);
}
else if(type==8) { // low byte, low nibble
c = vec4(float(value&0xFu) / 15.0);
if(c.a==1.0) { c.a = 0.0; }
else { c.a = 1.0; }
}
else if(type==9) { // low byte, high nibble
c = vec4(float((value>>4)&0xFu) / 15.0);
if(c.a==1.0) { c.a = 0.0; }
else { c.a = 1.0; }
}
else if(type==10) { // high byte, low nibble
c = vec4(float((value>>8)&0xFu) / 15.0);
if(c.a==1.0) { c.a = 0.0; }
else { c.a = 1.0; }
}
else if(type==11) { // high byte, high nibble
c = vec4(float((value>>12)&0xFu) / 15.0);
if(c.a==1.0) { c.a = 0.0; }
else { c.a = 1.0; }
}
return c;
}
ivec2 GetTexturePosition(int level, ivec2 pos)
{
const int mipXBase[] = { 0, 1024, 1536, 1792, 1920, 1984, 2016, 2032, 2040, 2044, 2046, 2047 };
const int mipYBase[] = { 0, 512, 768, 896, 960, 992, 1008, 1016, 1020, 1022, 1023 };
int mipDivisor = 1 << level;
int page = pos.y / 1024;
pos.y -= (page * 1024); // remove page from tex y
ivec2 retPos;
retPos.x = mipXBase[level] + (pos.x / mipDivisor);
retPos.y = mipYBase[level] + (pos.y / mipDivisor);
retPos.y += (page * 1024); // add page back to tex y
return retPos;
}
ivec2 GetTextureSize(int level, ivec2 size)
{
int mipDivisor = 1 << level;
return size / mipDivisor;
}
ivec2 GetMicroTexturePos(int id)
{
int xCoords[8] = { 0, 0, 128, 128, 0, 0, 128, 128 };
int yCoords[8] = { 0, 128, 0, 128, 256, 384, 256, 384 };
return ivec2(xCoords[id],yCoords[id]);
}
int GetPage(int yCoord)
{
return yCoord / 1024;
}
int GetNextPage(int yCoord)
{
return (GetPage(yCoord) + 1) & 1;
}
int GetNextPageOffset(int yCoord)
{
return GetNextPage(yCoord) * 1024;
}
// wrapping tex coords would be super easy but we combined tex sheets so have to handle wrap around between sheets
// hardware testing would be useful because i don't know exactly what happens if you try to read outside the texture sheet
// wrap around is a good guess
ivec2 WrapTexCoords(ivec2 pos, ivec2 coordinate)
{
ivec2 newCoord;
newCoord.x = coordinate.x & 2047;
newCoord.y = coordinate.y;
int page = GetPage(pos.y);
newCoord.y -= (page * 1024); // remove page
newCoord.y &= 1023; // wrap around in the same sheet
newCoord.y += (page * 1024); // add page back
return newCoord;
}
float mip_map_level(in vec2 texture_coordinate) // in texel units
{
vec2 dx_vtc = dFdx(texture_coordinate);
vec2 dy_vtc = dFdy(texture_coordinate);
float delta_max_sqr = max(dot(dx_vtc, dx_vtc), dot(dy_vtc, dy_vtc));
float mml = 0.5 * log2(delta_max_sqr);
return max( 0.0, mml );
}
float LinearTexLocations(int wrapMode, float size, float u, out float u0, out float u1)
{
float texelSize = 1.0 / size;
float halfTexelSize = 0.5 / size;
if(wrapMode==0) { // repeat
u = u * size - 0.5;
u0 = (floor(u) + 0.5) / size; // + 0.5 offset added to push us into the centre of a pixel, without we'll get rounding errors
u0 = fract(u0);
u1 = u0 + texelSize;
u1 = fract(u1);
return fract(u); // return weight
}
else if(wrapMode==1) { // repeat + clamp
u = fract(u); // must force into 0-1 to start
u = u * size - 0.5;
u0 = (floor(u) + 0.5) / size; // + 0.5 offset added to push us into the centre of a pixel, without we'll get rounding errors
u1 = u0 + texelSize;
if(u0 < 0.0) u0 = 0.0;
if(u1 >= 1.0) u1 = 1.0 - halfTexelSize;
return fract(u); // return weight
}
else { // mirror + mirror clamp - both are the same since the edge pixels are repeated anyway
float odd = floor(mod(u, 2.0)); // odd values are mirrored
if(odd > 0.0) {
u = 1.0 - fract(u);
}
else {
u = fract(u);
}
u = u * size - 0.5;
u0 = (floor(u) + 0.5) / size; // + 0.5 offset added to push us into the centre of a pixel, without we'll get rounding errors
u1 = u0 + texelSize;
if(u0 < 0.0) u0 = 0.0;
if(u1 >= 1.0) u1 = 1.0 - halfTexelSize;
return fract(u); // return weight
}
}
vec4 texBiLinear(usampler2D texSampler, ivec2 wrapMode, vec2 texSize, ivec2 texPos, vec2 texCoord)
{
float tx[2], ty[2];
float a = LinearTexLocations(wrapMode.s, texSize.x, texCoord.x, tx[0], tx[1]);
float b = LinearTexLocations(wrapMode.t, texSize.y, texCoord.y, ty[0], ty[1]);
vec4 p0q0 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[0],ty[0]) * texSize + texPos)), 0).r);
vec4 p1q0 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[1],ty[0]) * texSize + texPos)), 0).r);
vec4 p0q1 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[0],ty[1]) * texSize + texPos)), 0).r);
vec4 p1q1 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[1],ty[1]) * texSize + texPos)), 0).r);
if(alphaTest) {
if(p0q0.a > p1q0.a) { p1q0.rgb = p0q0.rgb; }
if(p0q0.a > p0q1.a) { p0q1.rgb = p0q0.rgb; }
if(p1q0.a > p0q0.a) { p0q0.rgb = p1q0.rgb; }
if(p1q0.a > p1q1.a) { p1q1.rgb = p1q0.rgb; }
if(p0q1.a > p0q0.a) { p0q0.rgb = p0q1.rgb; }
if(p0q1.a > p1q1.a) { p1q1.rgb = p0q1.rgb; }
if(p1q1.a > p0q1.a) { p0q1.rgb = p1q1.rgb; }
if(p1q1.a > p1q0.a) { p1q0.rgb = p1q1.rgb; }
}
// Interpolation in X direction.
vec4 pInterp_q0 = mix( p0q0, p1q0, a ); // Interpolates top row in X direction.
vec4 pInterp_q1 = mix( p0q1, p1q1, a ); // Interpolates bottom row in X direction.
return mix( pInterp_q0, pInterp_q1, b ); // Interpolate in Y direction.
}
vec4 textureR3D(usampler2D texSampler, ivec2 wrapMode, ivec2 texSize, ivec2 texPos, vec2 texCoord)
{
float numLevels = floor(log2(min(float(texSize.x), float(texSize.y)))); // r3d only generates down to 1:1 for square textures, otherwise its the min dimension
float fLevel = min(mip_map_level(texCoord * vec2(texSize)), numLevels);
if(alphaTest) fLevel *= 0.5;
else fLevel *= 0.8;
int iLevel = int(fLevel);
ivec2 texPos0 = GetTexturePosition(iLevel,texPos);
ivec2 texPos1 = GetTexturePosition(iLevel+1,texPos);
ivec2 texSize0 = GetTextureSize(iLevel, texSize);
ivec2 texSize1 = GetTextureSize(iLevel+1, texSize);
vec4 texLevel0 = texBiLinear(texSampler, wrapMode, vec2(texSize0), texPos0, texCoord);
vec4 texLevel1 = texBiLinear(texSampler, wrapMode, vec2(texSize1), texPos1, texCoord);
return mix(texLevel0, texLevel1, fract(fLevel)); // linear blend between our mipmap levels
}
vec4 GetTextureValue()
{
vec4 tex1Data = textureR3D(tex1, textureWrapMode, ivec2(baseTexInfo.zw), ivec2(baseTexInfo.xy), fsTexCoord);
if(textureInverted) {
tex1Data.rgb = vec3(1.0) - vec3(tex1Data.rgb);
}
if (microTexture) {
vec2 scale = (vec2(baseTexInfo.zw) / 128.0) * microTextureScale;
ivec2 pos = GetMicroTexturePos(microTextureID);
// add page offset to microtexture position
pos.y += GetNextPageOffset(baseTexInfo.y);
vec4 tex2Data = textureR3D(tex1, ivec2(0), ivec2(128), pos, fsTexCoord * scale);
float lod = mip_map_level(fsTexCoord * scale * vec2(128.0));
float blendFactor = max(lod - 1.5, 0.0); // bias -1.5
blendFactor = min(blendFactor, 1.0); // clamp to max value 1
blendFactor = (blendFactor + 1.0) / 2.0; // 0.5 - 1 range
tex1Data = mix(tex2Data, tex1Data, blendFactor);
}
if (alphaTest) {
if (tex1Data.a < (32.0/255.0)) {
discard;
}
}
if(textureAlpha) {
if(discardAlpha) { // opaque 1st pass
if (tex1Data.a < 1.0) {
discard;
}
}
else { // transparent 2nd pass
if ((tex1Data.a * fsColor.a) >= 1.0) {
discard;
}
}
}
if (textureAlpha == false) {
tex1Data.a = 1.0;
}
return tex1Data;
}
void Step15Luminous(inout vec4 colour)
{
// luminous polys seem to behave very differently on step 1.5 hardware
// when fixed shading is enabled the colour is modulated by the vp ambient + fixed shade value
// when disabled it appears to be multiplied by 1.5, presumably to allow a higher range
if(hardwareStep==0x15) {
if(!lightEnabled && textureEnabled) {
if(fixedShading) {
colour.rgb *= 1.0 + fsFixedShade + lighting[1].y;
}
else {
colour.rgb *= 1.5;
}
}
}
}
float CalcFog()
{
float z = -fsViewVertex.z;
float fog = fogIntensity * clamp(fogStart + z * fogDensity, 0.0, 1.0);
return fog;
}
float sqr(float a)
{
return a*a;
@ -678,10 +364,6 @@ float sqr_length(vec2 a)
return a.x*a.x + a.y*a.y;
}
)glsl";
static const char* fragmentShaderR3DQuads2 = R"glsl(
void main()
{
vec4 tex1Data;
@ -814,8 +496,8 @@ void main()
// Fog & spotlight applied
finalData.rgb = mix(finalData.rgb, fogData.rgb + lSpotFogColor, fogData.a);
// Write output
outColor = finalData;
// Write outputs to colour buffers
WriteOutputs(finalData,colourLayer);
}
)glsl";

View file

@ -7,6 +7,7 @@ static const char *vertexShaderR3D = R"glsl(
// uniforms
uniform float modelScale;
uniform float nodeAlpha;
uniform mat4 modelMat;
uniform mat4 projMat;
uniform bool translatorMap;
@ -35,6 +36,8 @@ vec4 GetColour(vec4 colour)
c.rgb *= 16.0;
}
c.a *= nodeAlpha;
return c;
}
@ -98,6 +101,7 @@ uniform float fogAttenuation;
uniform float fogAmbient;
uniform bool fixedShading;
uniform int hardwareStep;
uniform int colourLayer;
//interpolated inputs from vertex shader
in vec3 fsViewVertex;
@ -108,334 +112,16 @@ in float fsDiscard;
in float fsFixedShade;
//outputs
out vec4 outColor;
layout(location = 0) out vec4 out0; // opaque
layout(location = 1) out vec4 out1; // trans layer 1
layout(location = 2) out vec4 out2; // trans layer 2
vec4 ExtractColour(int type, uint value)
{
vec4 c = vec4(0.0);
// forward declarations (see common file)
if(type==0) { // T1RGB5
c.r = float((value >> 10) & 0x1Fu);
c.g = float((value >> 5 ) & 0x1Fu);
c.b = float((value ) & 0x1Fu);
c.rgb *= (1.0/31.0);
c.a = 1.0 - float((value >> 15) & 0x1u);
}
else if(type==1) { // Interleaved A4L4 (low byte)
c.rgb = vec3(float(value&0xFu));
c.a = float((value >> 4) & 0xFu);
c *= (1.0/15.0);
}
else if(type==2) {
c.a = float(value&0xFu);
c.rgb = vec3(float((value >> 4) & 0xFu));
c *= (1.0/15.0);
}
else if(type==3) {
c.rgb = vec3(float((value>>8)&0xFu));
c.a = float((value >> 12) & 0xFu);
c *= (1.0/15.0);
}
else if(type==4) {
c.a = float((value>>8)&0xFu);
c.rgb = vec3(float((value >> 12) & 0xFu));
c *= (1.0/15.0);
}
else if(type==5) {
c = vec4(float(value&0xFFu) / 255.0);
if(c.a==1.0) { c.a = 0.0; }
else { c.a = 1.0; }
}
else if(type==6) {
c = vec4(float((value>>8)&0xFFu) / 255.0);
if(c.a==1.0) { c.a = 0.0; }
else { c.a = 1.0; }
}
else if(type==7) { // RGBA4
c.r = float((value>>12)&0xFu);
c.g = float((value>> 8)&0xFu);
c.b = float((value>> 4)&0xFu);
c.a = float((value>> 0)&0xFu);
c *= (1.0/15.0);
}
else if(type==8) { // low byte, low nibble
c = vec4(float(value&0xFu) / 15.0);
if(c.a==1.0) { c.a = 0.0; }
else { c.a = 1.0; }
}
else if(type==9) { // low byte, high nibble
c = vec4(float((value>>4)&0xFu) / 15.0);
if(c.a==1.0) { c.a = 0.0; }
else { c.a = 1.0; }
}
else if(type==10) { // high byte, low nibble
c = vec4(float((value>>8)&0xFu) / 15.0);
if(c.a==1.0) { c.a = 0.0; }
else { c.a = 1.0; }
}
else if(type==11) { // high byte, high nibble
c = vec4(float((value>>12)&0xFu) / 15.0);
if(c.a==1.0) { c.a = 0.0; }
else { c.a = 1.0; }
}
return c;
}
ivec2 GetTexturePosition(int level, ivec2 pos)
{
const int mipXBase[] = int[](0, 1024, 1536, 1792, 1920, 1984, 2016, 2032, 2040, 2044, 2046, 2047);
const int mipYBase[] = int[](0, 512, 768, 896, 960, 992, 1008, 1016, 1020, 1022, 1023);
int mipDivisor = 1 << level;
int page = pos.y / 1024;
pos.y -= (page * 1024); // remove page from tex y
ivec2 retPos;
retPos.x = mipXBase[level] + (pos.x / mipDivisor);
retPos.y = mipYBase[level] + (pos.y / mipDivisor);
retPos.y += (page * 1024); // add page back to tex y
return retPos;
}
ivec2 GetTextureSize(int level, ivec2 size)
{
int mipDivisor = 1 << level;
return size / mipDivisor;
}
ivec2 GetMicroTexturePos(int id)
{
const int xCoords[8] = int[](0, 0, 128, 128, 0, 0, 128, 128);
const int yCoords[8] = int[](0, 128, 0, 128, 256, 384, 256, 384);
return ivec2(xCoords[id],yCoords[id]);
}
int GetPage(int yCoord)
{
return yCoord / 1024;
}
int GetNextPage(int yCoord)
{
return (GetPage(yCoord) + 1) & 1;
}
int GetNextPageOffset(int yCoord)
{
return GetNextPage(yCoord) * 1024;
}
// wrapping tex coords would be super easy but we combined tex sheets so have to handle wrap around between sheets
// hardware testing would be useful because i don't know exactly what happens if you try to read outside the texture sheet
// wrap around is a good guess
ivec2 WrapTexCoords(ivec2 pos, ivec2 coordinate)
{
ivec2 newCoord;
newCoord.x = coordinate.x & 2047;
newCoord.y = coordinate.y;
int page = GetPage(pos.y);
newCoord.y -= (page * 1024); // remove page
newCoord.y &= 1023; // wrap around in the same sheet
newCoord.y += (page * 1024); // add page back
return newCoord;
}
float mip_map_level(in vec2 texture_coordinate) // in texel units
{
vec2 dx_vtc = dFdx(texture_coordinate);
vec2 dy_vtc = dFdy(texture_coordinate);
float delta_max_sqr = max(dot(dx_vtc, dx_vtc), dot(dy_vtc, dy_vtc));
float mml = 0.5 * log2(delta_max_sqr);
return max( 0, mml );
}
float LinearTexLocations(int wrapMode, float size, float u, out float u0, out float u1)
{
float texelSize = 1.0 / size;
float halfTexelSize = 0.5 / size;
if(wrapMode==0) { // repeat
u = (u * size) - 0.5;
u0 = (floor(u) + 0.5) / size; // + 0.5 offset added to push us into the centre of a pixel, without we'll get rounding errors
u0 = fract(u0);
u1 = u0 + texelSize;
u1 = fract(u1);
return fract(u); // return weight
}
else if(wrapMode==1) { // repeat + clamp
u = fract(u); // must force into 0-1 to start
u = (u * size) - 0.5;
u0 = (floor(u) + 0.5) / size; // + 0.5 offset added to push us into the centre of a pixel, without we'll get rounding errors
u1 = u0 + texelSize;
if(u0 < 0.0) u0 = 0.0;
if(u1 >= 1.0) u1 = 1.0 - halfTexelSize;
return fract(u); // return weight
}
else { // mirror + mirror clamp - both are the same since the edge pixels are repeated anyway
float odd = floor(mod(u, 2.0)); // odd values are mirrored
if(odd > 0.0) {
u = 1.0 - fract(u);
}
else {
u = fract(u);
}
u = (u * size) - 0.5;
u0 = (floor(u) + 0.5) / size; // + 0.5 offset added to push us into the centre of a pixel, without we'll get rounding errors
u1 = u0 + texelSize;
if(u0 < 0.0) u0 = 0.0;
if(u1 >= 1.0) u1 = 1.0 - halfTexelSize;
return fract(u); // return weight
}
}
vec4 texBiLinear(usampler2D texSampler, ivec2 wrapMode, vec2 texSize, ivec2 texPos, vec2 texCoord)
{
float tx[2], ty[2];
float a = LinearTexLocations(wrapMode.s, texSize.x, texCoord.x, tx[0], tx[1]);
float b = LinearTexLocations(wrapMode.t, texSize.y, texCoord.y, ty[0], ty[1]);
vec4 p0q0 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[0],ty[0]) * texSize + texPos)), 0).r);
vec4 p1q0 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[1],ty[0]) * texSize + texPos)), 0).r);
vec4 p0q1 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[0],ty[1]) * texSize + texPos)), 0).r);
vec4 p1q1 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[1],ty[1]) * texSize + texPos)), 0).r);
if(alphaTest) {
if(p0q0.a > p1q0.a) { p1q0.rgb = p0q0.rgb; }
if(p0q0.a > p0q1.a) { p0q1.rgb = p0q0.rgb; }
if(p1q0.a > p0q0.a) { p0q0.rgb = p1q0.rgb; }
if(p1q0.a > p1q1.a) { p1q1.rgb = p1q0.rgb; }
if(p0q1.a > p0q0.a) { p0q0.rgb = p0q1.rgb; }
if(p0q1.a > p1q1.a) { p1q1.rgb = p0q1.rgb; }
if(p1q1.a > p0q1.a) { p0q1.rgb = p1q1.rgb; }
if(p1q1.a > p1q0.a) { p1q0.rgb = p1q1.rgb; }
}
// Interpolation in X direction.
vec4 pInterp_q0 = mix( p0q0, p1q0, a ); // Interpolates top row in X direction.
vec4 pInterp_q1 = mix( p0q1, p1q1, a ); // Interpolates bottom row in X direction.
return mix( pInterp_q0, pInterp_q1, b ); // Interpolate in Y direction.
}
vec4 textureR3D(usampler2D texSampler, ivec2 wrapMode, ivec2 texSize, ivec2 texPos, vec2 texCoord)
{
float numLevels = floor(log2(min(float(texSize.x), float(texSize.y)))); // r3d only generates down to 1:1 for square textures, otherwise its the min dimension
float fLevel = min(mip_map_level(texCoord * vec2(texSize)), numLevels);
if(alphaTest) fLevel *= 0.5;
else fLevel *= 0.8;
int iLevel = int(fLevel);
ivec2 texPos0 = GetTexturePosition(iLevel,texPos);
ivec2 texPos1 = GetTexturePosition(iLevel+1,texPos);
ivec2 texSize0 = GetTextureSize(iLevel, texSize);
ivec2 texSize1 = GetTextureSize(iLevel+1, texSize);
vec4 texLevel0 = texBiLinear(texSampler, wrapMode, vec2(texSize0), texPos0, texCoord);
vec4 texLevel1 = texBiLinear(texSampler, wrapMode, vec2(texSize1), texPos1, texCoord);
return mix(texLevel0, texLevel1, fract(fLevel)); // linear blend between our mipmap levels
}
vec4 GetTextureValue()
{
vec4 tex1Data = textureR3D(tex1, textureWrapMode, ivec2(baseTexInfo.zw), ivec2(baseTexInfo.xy), fsTexCoord);
if(textureInverted) {
tex1Data.rgb = vec3(1.0) - vec3(tex1Data.rgb);
}
if (microTexture) {
vec2 scale = (vec2(baseTexInfo.zw) / 128.0) * microTextureScale;
ivec2 pos = GetMicroTexturePos(microTextureID);
// add page offset to microtexture position
pos.y += GetNextPageOffset(baseTexInfo.y);
vec4 tex2Data = textureR3D(tex1, ivec2(0), ivec2(128), pos, fsTexCoord * scale);
float lod = mip_map_level(fsTexCoord * scale * vec2(128.0));
float blendFactor = max(lod - 1.5, 0.0); // bias -1.5
blendFactor = min(blendFactor, 1.0); // clamp to max value 1
blendFactor = (blendFactor + 1.0) / 2.0; // 0.5 - 1 range
tex1Data = mix(tex2Data, tex1Data, blendFactor);
}
if (alphaTest) {
if (tex1Data.a < (32.0/255.0)) {
discard;
}
}
if(textureAlpha) {
if(discardAlpha) { // opaque 1st pass
if (tex1Data.a < 1.0) {
discard;
}
}
else { // transparent 2nd pass
if ((tex1Data.a * fsColor.a) >= 1.0) {
discard;
}
}
}
if (textureAlpha == false) {
tex1Data.a = 1.0;
}
return tex1Data;
}
void Step15Luminous(inout vec4 colour)
{
// luminous polys seem to behave very differently on step 1.5 hardware
// when fixed shading is enabled the colour is modulated by the vp ambient + fixed shade value
// when disabled it appears to be multiplied by 1.5, presumably to allow a higher range
if(hardwareStep==0x15) {
if(!lightEnabled && textureEnabled) {
if(fixedShading) {
colour.rgb *= 1.0 + fsFixedShade + lighting[1].y;
}
else {
colour.rgb *= vec3(1.5);
}
}
}
}
float CalcFog()
{
float z = -fsViewVertex.z;
float fog = fogIntensity * clamp(fogStart + z * fogDensity, 0.0, 1.0);
return fog;
}
float CalcFog();
void Step15Luminous(inout vec4 colour);
vec4 GetTextureValue();
void WriteOutputs(vec4 colour, int layer);
void main()
{
@ -572,7 +258,8 @@ void main()
// Fog & spotlight applied
finalData.rgb = mix(finalData.rgb, fogData.rgb + lSpotFogColor, fogData.a);
outColor = finalData;
// Write outputs to colour buffers
WriteOutputs(finalData,colourLayer);
}
)glsl";

View file

@ -48,6 +48,7 @@
* - For consistency, the registers should probably be byte reversed (this is a
* little endian device), forcing the Model3 Read32/Write32 handlers to
* manually reverse the data. This keeps with the convention for VRAM.
* Need to finish ripping out code that no longer does anything. Removed a lot but there's still more.
*/
#include "TileGen.h"
@ -109,9 +110,6 @@ void CTileGen::LoadState(CBlockFile *SaveState)
}
SaveState->Read(regs, sizeof(regs));
// Because regs were read after palette, must recompute
RecomputePalettes();
// If multi-threaded, update read-only snapshots too
if (m_gpuMultiThreaded)
UpdateSnapshots(true);
@ -143,34 +141,8 @@ void CTileGen::EndVBlank(void)
//
}
void CTileGen::RecomputePalettes(void)
{
// Writing the colors forces palettes to be computed
if (m_gpuMultiThreaded)
{
for (unsigned colorAddr = 0; colorAddr < 32768*4; colorAddr += 4 )
{
MARK_DIRTY(palDirty[0], colorAddr);
MARK_DIRTY(palDirty[1], colorAddr);
WritePalette(colorAddr/4, *(UINT32 *) &vram[0x100000+colorAddr]);
}
}
else
{
for (unsigned colorAddr = 0; colorAddr < 32768*4; colorAddr += 4 )
WritePalette(colorAddr/4, *(UINT32 *) &vram[0x100000+colorAddr]);
}
}
UINT32 CTileGen::SyncSnapshots(void)
{
// Good time to recompute the palettes
if (recomputePalettes)
{
RecomputePalettes();
recomputePalettes = false;
}
if (!m_gpuMultiThreaded)
return 0;
@ -279,23 +251,6 @@ void CTileGen::WriteRAM32(unsigned addr, UINT32 data)
if (m_gpuMultiThreaded)
MARK_DIRTY(vramDirty, addr);
*(UINT32 *) &vram[addr] = data;
// Update palette if required
if (addr >= 0x100000)
{
addr -= 0x100000;
unsigned color = addr/4; // color index
// Same address in both palettes must be marked dirty
if (m_gpuMultiThreaded)
{
MARK_DIRTY(palDirty[0], addr);
MARK_DIRTY(palDirty[1], addr);
}
// Both palettes will be modified simultaneously
WritePalette(color, data);
}
}
//TODO: 8- and 16-bit handlers have not been thoroughly tested
@ -330,76 +285,6 @@ void CTileGen::WriteRAM16(unsigned addr, uint16_t data)
WriteRAM32(addr & ~1, tmp);
}
void CTileGen::InitPalette(void)
{
for (int i = 0; i < 0x20000/4; i++)
{
WritePalette(i, *(UINT32 *) &vram[0x100000 + i*4]);
if (m_gpuMultiThreaded)
{
palRO[0][i] = pal[0][i];
palRO[1][i] = pal[1][i];
}
}
}
static inline UINT32 AddColorOffset(UINT8 r, UINT8 g, UINT8 b, UINT8 a, UINT32 offsetReg)
{
INT32 ir, ig, ib;
/*
* Color offsets are signed but I'm not sure whether or not their range is
* merely [-128,+127], which would mean adding to a 0 component would not
* result full intensity (only +127 at most). Alternatively, the signed
* value might have to be multiplied by 2. That is assumed here. In either
* case, the signed addition should be saturated.
*/
ib = (INT32) (INT8)((offsetReg>>16)&0xFF);
ig = (INT32) (INT8)((offsetReg>>8)&0xFF);
ir = (INT32) (INT8)((offsetReg>>0)&0xFF);
ib *= 2;
ig *= 2;
ir *= 2;
// Add with saturation
ib += (INT32) (UINT32) b;
if (ib < 0) ib = 0;
else if (ib > 0xFF) ib = 0xFF;
ig += (INT32) (UINT32) g;
if (ig < 0) ig = 0;
else if (ig > 0xFF) ig = 0xFF;
ir += (INT32) (UINT32) r;
if (ir < 0) ir = 0;
else if (ir > 0xFF) ir = 0xFF;
// Construct the final 32-bit ABGR-format color
r = (UINT8) ir;
g = (UINT8) ig;
b = (UINT8) ib;
return ((UINT32)a<<24)|((UINT32)b<<16)|((UINT32)g<<8)|(UINT32)r;
}
void CTileGen::WritePalette(unsigned color, UINT32 data)
{
UINT8 r, g, b, a;
a = 0xFF * ((data>>15)&1); // decode the RGBA (make alpha 0xFF or 0x00)
a = ~a; // invert it (set on Model 3 means clear pixel)
if ((data&0x8000))
r = g = b = 0;
else
{
b = (((data >> 10) & 0x1F) * 255) / 31;
g = (((data >> 5) & 0x1F) * 255) / 31;
r = ((data & 0x1F) * 255) / 31;
}
pal[0][color] = AddColorOffset(r, g, b, a, regs[0x40/4]); // A/A'
pal[1][color] = AddColorOffset(r, g, b, a, regs[0x44/4]); // B/B'
}
UINT32 CTileGen::ReadRegister(unsigned reg)
{
reg &= 0xFF;
@ -423,11 +308,6 @@ void CTileGen::WriteRegister(unsigned reg, UINT32 data)
break;
case 0x40: // layer A/A' color offset
case 0x44: // layer B/B' color offset
// We only have a mechanism to recompute both palettes simultaneously.
// These regs are often written together in the same frame. To avoid
// needlessly recomputing both palettes twice, we defer the operation.
if (regs[reg/4] != data) // only if changed
recomputePalettes = true;
break;
case 0x10: // IRQ acknowledge
IRQ->Deassert(data&0xFF);
@ -451,9 +331,6 @@ void CTileGen::Reset(void)
memset(regs, 0, sizeof(regs));
memset(regsRO, 0, sizeof(regsRO));
InitPalette();
recomputePalettes = false;
DebugLog("Tile Generator reset\n");
}

View file

@ -254,9 +254,6 @@ public:
private:
// Private member functions
void RecomputePalettes(void);
void InitPalette(void);
void WritePalette(unsigned color, UINT32 data);
UINT32 UpdateSnapshots(bool copyWhole);
UINT32 UpdateSnapshot(bool copyWhole, UINT8 *src, UINT8 *dst, unsigned size, UINT8 *dirty);
@ -274,7 +271,6 @@ private:
UINT8 *memoryPool; // all memory allocated here
UINT8 *vram; // 1.125MB of VRAM
UINT32 *pal[2]; // 2 x 0x20000 byte (32K colors) palette
bool recomputePalettes; // whether to recompute palettes A/A' and B/B' during sync
// Read-only snapshots
UINT8 *vramRO; // 1.125MB of VRAM [read-only snapshot]

View file

@ -495,6 +495,7 @@ xcopy /D /Y "$(ProjectDir)..\Assets\*" "$(TargetDir)Assets"</Command>
<ClInclude Include="..\Src\Graphics\New3D\R3DFrameBuffers.h" />
<ClInclude Include="..\Src\Graphics\New3D\R3DScrollFog.h" />
<ClInclude Include="..\Src\Graphics\New3D\R3DShader.h" />
<ClInclude Include="..\Src\Graphics\New3D\R3DShaderCommon.h" />
<ClInclude Include="..\Src\Graphics\New3D\R3DShaderQuads.h" />
<ClInclude Include="..\Src\Graphics\New3D\R3DShaderTriangles.h" />
<ClInclude Include="..\Src\Graphics\New3D\VBO.h" />

View file

@ -853,6 +853,9 @@
<ClInclude Include="..\Src\Graphics\FBO.h">
<Filter>Header Files\Graphics</Filter>
</ClInclude>
<ClInclude Include="..\Src\Graphics\New3D\R3DShaderCommon.h">
<Filter>Header Files\Graphics\New</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<CustomBuild Include="..\Src\Debugger\ReadMe.txt">