From 6595b9320e7332914cb0b5c1229f32c7d1840636 Mon Sep 17 00:00:00 2001 From: Ian Curtis Date: Sat, 14 Oct 2023 20:05:00 +0100 Subject: [PATCH] Rewrite the renderer a bit to spit out the finished graphics from the 3d chip on separate buffers. One buffer is for opaque pixels, and 2 more for translucent pixels. Before the frame was composited on the back buffer, which meant the tilegen had to have been drawn first. This way the images are now totally independant of the tilegen chip so can be drawn as soon as the register write 0xC is written to the tilegen. Some games update the tilegen after the ping_ping bit has flipped at 66% of the frame, so we need to split the tilegen drawing up into two stages to get some effects to work. So having the tilegen draw independantly of the 3d chip can make this happen. --- Src/Graphics/Legacy3D/Models.cpp | 2 +- Src/Graphics/New3D/Model.cpp | 4 + Src/Graphics/New3D/Model.h | 15 +- Src/Graphics/New3D/New3D.cpp | 78 +++-- Src/Graphics/New3D/New3D.h | 2 - Src/Graphics/New3D/R3DFrameBuffers.cpp | 165 ++--------- Src/Graphics/New3D/R3DFrameBuffers.h | 5 - Src/Graphics/New3D/R3DScrollFog.cpp | 23 +- Src/Graphics/New3D/R3DShader.cpp | 29 +- Src/Graphics/New3D/R3DShader.h | 4 + Src/Graphics/New3D/R3DShaderCommon.h | 361 ++++++++++++++++++++++++ Src/Graphics/New3D/R3DShaderQuads.h | 352 ++--------------------- Src/Graphics/New3D/R3DShaderTriangles.h | 341 +--------------------- Src/Model3/TileGen.cpp | 125 +------- Src/Model3/TileGen.h | 4 - VS2008/Supermodel.vcxproj | 1 + VS2008/Supermodel.vcxproj.filters | 3 + 17 files changed, 510 insertions(+), 1004 deletions(-) create mode 100644 Src/Graphics/New3D/R3DShaderCommon.h diff --git a/Src/Graphics/Legacy3D/Models.cpp b/Src/Graphics/Legacy3D/Models.cpp index 98d4fd3..4182bab 100644 --- a/Src/Graphics/Legacy3D/Models.cpp +++ b/Src/Graphics/Legacy3D/Models.cpp @@ -725,7 +725,7 @@ void CLegacy3D::InsertVertex(ModelCache *Cache, const Vertex *V, const Poly *P, // Specular shininess GLfloat specularCoefficient = (GLfloat) ((P->header[0]>>26) & 0x3F) * (1.0f/63.0f); int shinyBits = (P->header[6] >> 5) & 3; - float shininess = std::exp2f(1 + shinyBits); + float shininess = std::exp2f(1.0f + shinyBits); if (!(P->header[0]&0x80)) //|| (shininess == 0)) // bit 0x80 seems to enable specular lighting { specularCoefficient = 0.; // disable diff --git a/Src/Graphics/New3D/Model.cpp b/Src/Graphics/New3D/Model.cpp index 0677ec6..d30994a 100644 --- a/Src/Graphics/New3D/Model.cpp +++ b/Src/Graphics/New3D/Model.cpp @@ -9,6 +9,7 @@ NodeAttributes::NodeAttributes() currentPage = 0; currentClipStatus = Clip::INTERCEPT; currentModelScale = 1.0f; + currentModelAlpha = 1.0; } bool NodeAttributes::Push() @@ -27,6 +28,7 @@ bool NodeAttributes::Push() na.texOffsetY = currentTexOffsetY; na.clip = currentClipStatus; na.modelScale = currentModelScale; + na.modelAlpha = currentModelAlpha; m_vecAttribs.emplace_back(na); @@ -46,6 +48,7 @@ bool NodeAttributes::Pop() currentTexOffsetY = last.texOffsetY; currentClipStatus = last.clip; currentModelScale = last.modelScale; + currentModelAlpha = last.modelAlpha; m_vecAttribs.pop_back(); @@ -64,6 +67,7 @@ void NodeAttributes::Reset() currentTexOffsetY = 0; currentClipStatus = Clip::INTERCEPT; currentModelScale = 1.0f; + currentModelAlpha = 1.0f; m_vecAttribs.clear(); } diff --git a/Src/Graphics/New3D/Model.h b/Src/Graphics/New3D/Model.h index b317b66..62daf8d 100644 --- a/Src/Graphics/New3D/Model.h +++ b/Src/Graphics/New3D/Model.h @@ -97,22 +97,24 @@ enum class Layer { colour, trans1, trans2, trans12 /*both 1&2*/, all, none }; struct Mesh { //helper funcs - bool Render(Layer layer) + bool Render(Layer layer, float nodeAlpha) { + bool nAlpha = nodeAlpha < 1.0f; + switch (layer) { case Layer::colour: - if (polyAlpha) { + if (polyAlpha || nAlpha) { return false; } break; case Layer::trans1: - if ((!textureAlpha && !polyAlpha) || transLSelect) { + if ((!textureAlpha && !polyAlpha && !nAlpha) || transLSelect) { return false; } break; case Layer::trans2: - if ((!textureAlpha && !polyAlpha) || !transLSelect) { + if ((!textureAlpha && !polyAlpha && !nAlpha) || !transLSelect) { return false; } break; @@ -183,6 +185,9 @@ struct Model //model scale step 1.5+ float scale = 1.0f; + + //node transparency + float alpha = 1.0f; }; struct Viewport @@ -236,6 +241,7 @@ public: int currentPage; Clip currentClipStatus; float currentModelScale; + float currentModelAlpha; private: @@ -246,6 +252,7 @@ private: int page; Clip clip; float modelScale; + float modelAlpha; // from culling node }; std::vector m_vecAttribs; }; diff --git a/Src/Graphics/New3D/New3D.cpp b/Src/Graphics/New3D/New3D.cpp index ff15d63..1360c67 100644 --- a/Src/Graphics/New3D/New3D.cpp +++ b/Src/Graphics/New3D/New3D.cpp @@ -232,7 +232,7 @@ bool CNew3D::RenderScene(int priority, bool renderOverlay, Layer layer) hasOverlay = true; } - if (!mesh.Render(layer)) continue; + if (!mesh.Render(layer, m.alpha)) continue; if (mesh.highPriority != renderOverlay) continue; if (!matrixLoaded) { @@ -274,11 +274,13 @@ void CNew3D::SetRenderStates() glDepthMask (GL_TRUE); glActiveTexture (GL_TEXTURE0); glDisable (GL_CULL_FACE); // we'll emulate this in the shader - glDisable (GL_BLEND); glStencilFunc (GL_EQUAL, 0, 0xFF); // basically stencil test passes if the value is zero glStencilOp (GL_KEEP, GL_INCR, GL_INCR); // if the stencil test passes, we increment the value glStencilMask (0xFF); + + glBlendFunc (GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + glDisable (GL_BLEND); } void CNew3D::DisableRenderStates() @@ -313,7 +315,6 @@ void CNew3D::RenderFrame(void) m_nodeAttribs.Reset(); RenderViewport(0x800000); // build model structure - DrawScrollFog(); // fog layer if applicable must be drawn here m_vbo.Bind(true); m_vbo.BufferSubData(MAX_ROM_VERTS*sizeof(FVertex), m_polyBufferRam.size()*sizeof(FVertex), m_polyBufferRam.data()); // upload all the dynamic data to GPU in one go @@ -339,8 +340,10 @@ void CNew3D::RenderFrame(void) } } - m_r3dFrameBuffers.SetFBO(Layer::trans12); - glClear(GL_COLOR_BUFFER_BIT); // wipe both trans layers + m_r3dFrameBuffers.SetFBO(Layer::colour); // colour will draw to all 3 buffers. For regular opaque pixels the transparent layers will be essentially masked + glClear(GL_COLOR_BUFFER_BIT); + + DrawScrollFog(); // fog layer if applicable must be drawn here for (int pri = 0; pri <= 3; pri++) { @@ -350,35 +353,33 @@ void CNew3D::RenderFrame(void) bool renderOverlay = (i == 1); - m_r3dFrameBuffers.SetFBO(Layer::colour); - glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); - SetRenderStates(); - m_r3dShader.DiscardAlpha(true); // discard all translucent pixels in opaque pass + m_r3dFrameBuffers.SetFBO(Layer::colour); + + glClear(GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); + + m_r3dShader.DiscardAlpha(true); + m_r3dShader.SetLayer(Layer::colour); bool hasOverlay = RenderScene(pri, renderOverlay, Layer::colour); if (!renderOverlay) { ProcessLos(pri); } - DisableRenderStates(); + glDepthFunc(GL_LESS); - m_r3dFrameBuffers.DrawOverTransLayers(); // mask trans layer with opaque pixels - m_r3dFrameBuffers.CompositeBaseLayer(); // copy opaque pixels to back buffer + m_r3dShader.DiscardAlpha(false); - SetRenderStates(); + m_r3dFrameBuffers.StoreDepth(); + m_r3dShader.SetLayer(Layer::trans1); + m_r3dFrameBuffers.SetFBO(Layer::trans1); + RenderScene(pri, renderOverlay, Layer::trans1); - glDepthFunc(GL_LESS); // alpha polys seem to use gl_less (ocean hunter) - - m_r3dShader.DiscardAlpha (false); // render only translucent pixels - m_r3dFrameBuffers.StoreDepth (); // save depth buffer for 1st trans pass - m_r3dFrameBuffers.SetFBO (Layer::trans1); - RenderScene (pri, renderOverlay, Layer::trans1); - - m_r3dFrameBuffers.RestoreDepth (); // restore depth buffer, trans layers don't seem to depth test against each other - m_r3dFrameBuffers.SetFBO (Layer::trans2); - RenderScene (pri, renderOverlay, Layer::trans2); + m_r3dFrameBuffers.RestoreDepth(); + m_r3dShader.SetLayer(Layer::trans2); + m_r3dFrameBuffers.SetFBO(Layer::trans2); + RenderScene(pri, renderOverlay, Layer::trans2); DisableRenderStates(); @@ -386,7 +387,8 @@ void CNew3D::RenderFrame(void) } } - m_r3dFrameBuffers.CompositeAlphaLayer(); + m_r3dFrameBuffers.SetFBO(Layer::none); + m_r3dFrameBuffers.Draw(); } void CNew3D::BeginFrame(void) @@ -471,10 +473,11 @@ bool CNew3D::DrawModel(UINT32 modelAddr) } // update texture offsets - m->textureOffsetX = m_nodeAttribs.currentTexOffsetX; - m->textureOffsetY = m_nodeAttribs.currentTexOffsetY; - m->page = m_nodeAttribs.currentPage; - m->scale = m_nodeAttribs.currentModelScale; + m->textureOffsetX = m_nodeAttribs.currentTexOffsetX; + m->textureOffsetY = m_nodeAttribs.currentTexOffsetY; + m->page = m_nodeAttribs.currentPage; + m->scale = m_nodeAttribs.currentModelScale; + m->alpha = m_nodeAttribs.currentModelAlpha; if (!cached) { CacheModel(m, modelAddress); @@ -588,6 +591,8 @@ void CNew3D::DescendCullingNode(UINT32 addr) } } + m_nodeAttribs.currentModelAlpha = 1; // TODO fade out if required + // Apply matrix and translation m_modelMat.PushMatrix(); @@ -1387,23 +1392,6 @@ bool CNew3D::IsVROMModel(UINT32 modelAddr) return modelAddr >= 0x100000; } -void CNew3D::CalcTexOffset(int offX, int offY, int page, int x, int y, int& newX, int& newY) -{ - newX = (x + offX) & 2047; // wrap around 2048, shouldn't be required - - int oldPage = y / 1024; - - y -= (oldPage * 1024); // remove page from tex y - - // calc newY with wrap around, wraps around in the same sheet, not into another memory sheet - - newY = (y + offY) & 1023; - - // add page to Y - - newY += ((oldPage + page) & 1) * 1024; // max page 0-1 -} - void CNew3D::CalcFrustumPlanes(Plane p[5], const float* matrix) { // Left Plane diff --git a/Src/Graphics/New3D/New3D.h b/Src/Graphics/New3D/New3D.h index f2e9e5d..6705a7e 100644 --- a/Src/Graphics/New3D/New3D.h +++ b/Src/Graphics/New3D/New3D.h @@ -223,8 +223,6 @@ private: void TranslateLosPosition(int inX, int inY, int& outX, int& outY); bool ProcessLos(int priority); - void CalcTexOffset(int offX, int offY, int page, int x, int y, int& newX, int& newY); - /* * Data */ diff --git a/Src/Graphics/New3D/R3DFrameBuffers.cpp b/Src/Graphics/New3D/R3DFrameBuffers.cpp index 347ef09..27a82fc 100644 --- a/Src/Graphics/New3D/R3DFrameBuffers.cpp +++ b/Src/Graphics/New3D/R3DFrameBuffers.cpp @@ -1,7 +1,4 @@ #include "R3DFrameBuffers.h" -#include "Mat4.h" - -#define countof(a) (sizeof(a)/sizeof(*(a))) namespace New3D { @@ -23,7 +20,6 @@ R3DFrameBuffers::R3DFrameBuffers() AllocShaderTrans(); AllocShaderBase(); - AllocShaderWipe(); glGenVertexArrays(1, &m_vao); glBindVertexArray(m_vao); @@ -36,7 +32,7 @@ R3DFrameBuffers::~R3DFrameBuffers() DestroyFBO(); m_shaderTrans.UnloadShaders(); m_shaderBase.UnloadShaders(); - m_shaderWipe.UnloadShaders(); + if (m_vao) { glDeleteVertexArrays(1, &m_vao); m_vao = 0; @@ -165,26 +161,24 @@ void R3DFrameBuffers::SetFBO(Layer layer) switch (layer) { case Layer::colour: - case Layer::trans1: - case Layer::trans2: - { - glBindFramebuffer(GL_FRAMEBUFFER, m_frameBufferID); - GLenum buffers[] = { GL_COLOR_ATTACHMENT0 + (GLenum)layer }; - glDrawBuffers(countof(buffers), buffers); - break; - } - case Layer::trans12: - { - glBindFramebuffer(GL_FRAMEBUFFER, m_frameBufferID); - GLenum buffers[] = { GL_COLOR_ATTACHMENT1, GL_COLOR_ATTACHMENT2 }; - glDrawBuffers(countof(buffers), buffers); - break; - } - case Layer::all: { glBindFramebuffer(GL_FRAMEBUFFER, m_frameBufferID); GLenum buffers[] = { GL_COLOR_ATTACHMENT0, GL_COLOR_ATTACHMENT1, GL_COLOR_ATTACHMENT2 }; - glDrawBuffers(countof(buffers), buffers); + glDrawBuffers((GLsizei)std::size(buffers), buffers); + break; + } + case Layer::trans1: + { + glBindFramebuffer(GL_FRAMEBUFFER, m_frameBufferID); + GLenum buffers[] = { GL_NONE, GL_COLOR_ATTACHMENT1, GL_NONE }; + glDrawBuffers((GLsizei)std::size(buffers), buffers); + break; + } + case Layer::trans2: + { + glBindFramebuffer(GL_FRAMEBUFFER, m_frameBufferID); + GLenum buffers[] = { GL_NONE, GL_NONE, GL_COLOR_ATTACHMENT2 }; + glDrawBuffers((GLsizei)std::size(buffers), buffers); break; } case Layer::none: @@ -280,8 +274,8 @@ void R3DFrameBuffers::AllocShaderTrans() void main() { - vec4 colTrans1 = texture( tex1, fsTexCoord); - vec4 colTrans2 = texture( tex2, fsTexCoord); + vec4 colTrans1 = texture(tex1, fsTexCoord); + vec4 colTrans2 = texture(tex2, fsTexCoord); if(colTrans1.a+colTrans2.a > 0.0) { vec3 col1 = colTrans1.rgb * colTrans1.a; @@ -302,58 +296,6 @@ void R3DFrameBuffers::AllocShaderTrans() m_shaderTrans.uniformLoc[1] = m_shaderTrans.GetUniformLocation("tex2"); } -void R3DFrameBuffers::AllocShaderWipe() -{ - const char *vertexShader = R"glsl( - - #version 410 core - - // outputs - out vec2 fsTexCoord; - - void main(void) - { - const vec4 vertices[] = vec4[](vec4(-1.0, -1.0, 0.0, 1.0), - vec4(-1.0, 1.0, 0.0, 1.0), - vec4( 1.0, -1.0, 0.0, 1.0), - vec4( 1.0, 1.0, 0.0, 1.0)); - - fsTexCoord = (vertices[gl_VertexID % 4].xy + 1.0) / 2.0; - gl_Position = vertices[gl_VertexID % 4]; - } - - )glsl"; - - const char *fragmentShader = R"glsl( - - #version 410 core - - uniform sampler2D texColor; // base colour layer - in vec2 fsTexCoord; - - // outputs - layout (location = 0) out vec4 fragColor0; - layout (location = 1) out vec4 fragColor1; - - void main() - { - vec4 colBase = texture(texColor, fsTexCoord); - - if(colBase.a == 0.0) { - discard; // no colour pixels have been written - } - - fragColor0 = vec4(0.0); // wipe these parts of the alpha buffer - fragColor1 = vec4(0.0); // since they have been overwritten by the next priority layer - } - - )glsl"; - - m_shaderWipe.LoadShaders(vertexShader, fragmentShader); - - m_shaderWipe.uniformLoc[0] = m_shaderWipe.GetUniformLocation("texColor"); -} - void R3DFrameBuffers::Draw() { SetFBO (Layer::none); // make sure to draw on the back buffer @@ -362,7 +304,7 @@ void R3DFrameBuffers::Draw() glDisable (GL_CULL_FACE); glDisable (GL_BLEND); - for (int i = 0; i < countof(m_texIDs); i++) { // bind our textures to correct texture units + for (int i = 0; i < (int)std::size(m_texIDs); i++) { // bind our textures to correct texture units glActiveTexture(GL_TEXTURE0 + i); glBindTexture(GL_TEXTURE_2D, m_texIDs[i]); } @@ -381,77 +323,10 @@ void R3DFrameBuffers::Draw() glBindVertexArray (0); } -void R3DFrameBuffers::CompositeBaseLayer() -{ - SetFBO(Layer::none); // make sure to draw on the back buffer - glViewport(0, 0, m_width, m_height); // cover the entire screen - glDisable(GL_DEPTH_TEST); // disable depth testing / writing - glDisable(GL_CULL_FACE); - glDisable(GL_BLEND); - - for (int i = 0; i < countof(m_texIDs); i++) { // bind our textures to correct texture units - glActiveTexture(GL_TEXTURE0 + i); - glBindTexture(GL_TEXTURE_2D, m_texIDs[i]); - } - - glActiveTexture(GL_TEXTURE0); - glBindVertexArray(m_vao); - - DrawBaseLayer(); - - glBindVertexArray(0); -} - -void R3DFrameBuffers::CompositeAlphaLayer() -{ - SetFBO(Layer::none); // make sure to draw on the back buffer - glViewport(0, 0, m_width, m_height); // cover the entire screen - glDisable(GL_DEPTH_TEST); // disable depth testing / writing - glDisable(GL_CULL_FACE); - - for (int i = 0; i < countof(m_texIDs); i++) { // bind our textures to correct texture units - glActiveTexture(GL_TEXTURE0 + i); - glBindTexture(GL_TEXTURE_2D, m_texIDs[i]); - } - - glActiveTexture(GL_TEXTURE0); - - glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); - glEnable(GL_BLEND); - glBindVertexArray(m_vao); - - DrawAlphaLayer(); - - glDisable(GL_BLEND); - glBindVertexArray(0); -} - -void R3DFrameBuffers::DrawOverTransLayers() -{ - SetFBO(Layer::trans12); // need to write to both layers - - glViewport (0, 0, m_width, m_height); // cover the entire screen - glDisable (GL_DEPTH_TEST); // disable depth testing / writing - glDisable (GL_CULL_FACE); - glDisable (GL_BLEND); - - glActiveTexture (GL_TEXTURE0); - glBindTexture (GL_TEXTURE_2D, m_texIDs[0]); - - glBindVertexArray(m_vao); - m_shaderWipe.EnableShader(); - glUniform1i(m_shaderWipe.uniformLoc[0], 0); - - glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); - - m_shaderWipe.DisableShader(); - glBindVertexArray(0); -} - void R3DFrameBuffers::DrawBaseLayer() { m_shaderBase.EnableShader(); - glUniform1i(m_shaderTrans.uniformLoc[0], 0); // to do check this + glUniform1i(m_shaderTrans.uniformLoc[0], 0); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); diff --git a/Src/Graphics/New3D/R3DFrameBuffers.h b/Src/Graphics/New3D/R3DFrameBuffers.h index 19a8596..379f00e 100644 --- a/Src/Graphics/New3D/R3DFrameBuffers.h +++ b/Src/Graphics/New3D/R3DFrameBuffers.h @@ -15,9 +15,6 @@ public: ~R3DFrameBuffers(); void Draw(); // draw and composite the transparent layers - void CompositeBaseLayer(); - void CompositeAlphaLayer(); - void DrawOverTransLayers(); // opaque pixels in next priority layer need to wipe trans pixels bool CreateFBO(int width, int height); void DestroyFBO(); @@ -33,7 +30,6 @@ private: GLuint CreateTexture(int width, int height); void AllocShaderTrans(); void AllocShaderBase(); - void AllocShaderWipe(); void DrawBaseLayer(); void DrawAlphaLayer(); @@ -50,7 +46,6 @@ private: // shaders GLSLShader m_shaderBase; GLSLShader m_shaderTrans; - GLSLShader m_shaderWipe; // vao GLuint m_vao; // this really needed if we don't actually use vertex attribs? diff --git a/Src/Graphics/New3D/R3DScrollFog.cpp b/Src/Graphics/New3D/R3DScrollFog.cpp index 6305c7a..e4ca6ed 100644 --- a/Src/Graphics/New3D/R3DScrollFog.cpp +++ b/Src/Graphics/New3D/R3DScrollFog.cpp @@ -40,7 +40,25 @@ vec3 lFogColor; vec4 scrollFog; // outputs -out vec4 fragColor; +layout(location = 0) out vec4 out0; // opaque +layout(location = 1) out vec4 out1; // trans layer 1 +layout(location = 2) out vec4 out2; // trans layer 2 + +void WriteOutputs(vec4 colour) +{ + vec4 blank = vec4(0.0); + + if(colour.a < 1.0) { // some transparency + out0 = blank; + out1 = colour; + out2 = blank; + } + else { // opaque + out0 = colour; + out1 = blank; + out2 = blank; + } +} void main() { @@ -62,7 +80,7 @@ void main() scrollFog = vec4(lFogColor + lSpotFogColor, fogColour.a); // Final Color - fragColor = scrollFog; + WriteOutputs(scrollFog); } )glsl"; @@ -71,7 +89,6 @@ void main() R3DScrollFog::R3DScrollFog(const Util::Config::Node &config) : m_config(config), m_vao(0) - { m_shaderProgram = 0; m_vertexShader = 0; diff --git a/Src/Graphics/New3D/R3DShader.cpp b/Src/Graphics/New3D/R3DShader.cpp index 922eb8d..307f2bd 100644 --- a/Src/Graphics/New3D/R3DShader.cpp +++ b/Src/Graphics/New3D/R3DShader.cpp @@ -1,6 +1,7 @@ #include "R3DShader.h" #include "R3DShaderQuads.h" #include "R3DShaderTriangles.h" +#include "R3DShaderCommon.h" // having 2 sets of shaders to maintain is really less than ideal // but hopefully not too many breaking changes at this point @@ -31,6 +32,7 @@ void R3DShader::Start() m_fixedShading = false; m_translatorMap = false; m_modelScale = 1.0f; + m_nodeAlpha = 1.0f; m_shininess = 0; m_specularValue = 0; m_microTexScale = 0; @@ -62,30 +64,27 @@ bool R3DShader::LoadShader(const char* vertexShader, const char* fragmentShader) const char* gShader = ""; const char* fShader = fragmentShaderR3D; - std::string fragmentShaderCombined; - if (quads) { vShader = vertexShaderR3DQuads; gShader = geometryShaderR3DQuads; - - fragmentShaderCombined += fragmentShaderR3DQuads1; - fragmentShaderCombined += fragmentShaderR3DQuads2; - fShader = fragmentShaderCombined.c_str(); + fShader = fragmentShaderR3DQuads; } m_shaderProgram = glCreateProgram(); m_vertexShader = glCreateShader(GL_VERTEX_SHADER); m_fragmentShader = glCreateShader(GL_FRAGMENT_SHADER); - glShaderSource(m_vertexShader, 1, (const GLchar **)&vShader, NULL); - glShaderSource(m_fragmentShader, 1, (const GLchar **)&fShader, NULL); + const char* shaderArray[] = { fShader, fragmentShaderR3DCommon }; + + glShaderSource(m_vertexShader, 1, (const GLchar **)&vShader, nullptr); + glShaderSource(m_fragmentShader, (GLsizei)std::size(shaderArray), shaderArray, nullptr); glCompileShader(m_vertexShader); glCompileShader(m_fragmentShader); if (quads) { m_geoShader = glCreateShader(GL_GEOMETRY_SHADER); - glShaderSource(m_geoShader, 1, (const GLchar **)&gShader, NULL); + glShaderSource(m_geoShader, 1, (const GLchar **)&gShader, nullptr); glCompileShader(m_geoShader); glAttachShader(m_shaderProgram, m_geoShader); PrintShaderResult(m_geoShader); @@ -111,6 +110,7 @@ bool R3DShader::LoadShader(const char* vertexShader, const char* fragmentShader) m_locBaseTexType = glGetUniformLocation(m_shaderProgram, "baseTexType"); m_locTextureInverted = glGetUniformLocation(m_shaderProgram, "textureInverted"); m_locTexWrapMode = glGetUniformLocation(m_shaderProgram, "textureWrapMode"); + m_locColourLayer = glGetUniformLocation(m_shaderProgram, "colourLayer"); m_locFogIntensity = glGetUniformLocation(m_shaderProgram, "fogIntensity"); m_locFogDensity = glGetUniformLocation(m_shaderProgram, "fogDensity"); @@ -134,6 +134,7 @@ bool R3DShader::LoadShader(const char* vertexShader, const char* fragmentShader) m_locSpotColor = glGetUniformLocation(m_shaderProgram, "spotColor"); m_locSpotFogColor = glGetUniformLocation(m_shaderProgram, "spotFogColor"); m_locModelScale = glGetUniformLocation(m_shaderProgram, "modelScale"); + m_locNodeAlpha = glGetUniformLocation(m_shaderProgram, "nodeAlpha"); m_locProjMat = glGetUniformLocation(m_shaderProgram, "projMat"); m_locModelMat = glGetUniformLocation(m_shaderProgram, "modelMat"); @@ -338,6 +339,11 @@ void R3DShader::SetModelStates(const Model* model) m_modelScale = model->scale; } + if (m_dirtyModel || model->alpha != m_nodeAlpha) { + glUniform1f(m_locNodeAlpha, model->alpha); + m_nodeAlpha = model->alpha; + } + m_transX = model->textureOffsetX; m_transY = model->textureOffsetY; m_transPage = model->page; @@ -355,6 +361,11 @@ void R3DShader::DiscardAlpha(bool discard) glUniform1i(m_locDiscardAlpha, discard); } +void R3DShader::SetLayer(Layer layer) +{ + glUniform1i(m_locColourLayer, (GLint)layer); +} + void R3DShader::PrintShaderResult(GLuint shader) { //=========== diff --git a/Src/Graphics/New3D/R3DShader.h b/Src/Graphics/New3D/R3DShader.h index af74905..ddf7c9b 100644 --- a/Src/Graphics/New3D/R3DShader.h +++ b/Src/Graphics/New3D/R3DShader.h @@ -23,6 +23,7 @@ public: void SetShader (bool enable = true); GLint GetVertexAttribPos (const std::string& attrib); void DiscardAlpha (bool discard); // use to remove alpha from texture alpha only polys for 1st pass + void SetLayer (Layer layer); private: @@ -53,6 +54,7 @@ private: GLint m_locTextureInverted; GLint m_locTexWrapMode; GLint m_locTranslatorMap; + GLint m_locColourLayer; // cached mesh values bool m_textured1; @@ -77,6 +79,7 @@ private: // cached model values float m_modelScale; + float m_nodeAlpha; int m_transX; int m_transY; int m_transPage; @@ -111,6 +114,7 @@ private: // model uniforms GLint m_locModelScale; + GLint m_locNodeAlpha; GLint m_locModelMat; // global uniforms diff --git a/Src/Graphics/New3D/R3DShaderCommon.h b/Src/Graphics/New3D/R3DShaderCommon.h new file mode 100644 index 0000000..b449dee --- /dev/null +++ b/Src/Graphics/New3D/R3DShaderCommon.h @@ -0,0 +1,361 @@ +#pragma once + +// I altered this code a bit to make sure it always compiles with gl 4.1. Version 4.5 allows you to specify arrays differently. +// Ripped out most of the common code, people have been pushing changes to the shaders but we are ending up with diverging implementations +// between triangle / quad version which is less than ideal. + +static const char* fragmentShaderR3DCommon = R"glsl( + +#define LayerColour 0x0 +#define LayerTrans0 0x1 +#define LayerTrans1 0x2 + +vec4 ExtractColour(int type, uint value) +{ + vec4 c = vec4(0.0); + + if(type==0) { // T1RGB5 + c.r = float((value >> 10) & 0x1Fu); + c.g = float((value >> 5 ) & 0x1Fu); + c.b = float((value ) & 0x1Fu); + c.rgb *= (1.0/31.0); + c.a = 1.0 - float((value >> 15) & 0x1u); + } + else if(type==1) { // Interleaved A4L4 (low byte) + c.rgb = vec3(float(value&0xFu)); + c.a = float((value >> 4) & 0xFu); + c *= (1.0/15.0); + } + else if(type==2) { + c.a = float(value&0xFu); + c.rgb = vec3(float((value >> 4) & 0xFu)); + c *= (1.0/15.0); + } + else if(type==3) { + c.rgb = vec3(float((value>>8)&0xFu)); + c.a = float((value >> 12) & 0xFu); + c *= (1.0/15.0); + } + else if(type==4) { + c.a = float((value>>8)&0xFu); + c.rgb = vec3(float((value >> 12) & 0xFu)); + c *= (1.0/15.0); + } + else if(type==5) { + c = vec4(float(value&0xFFu) / 255.0); + if(c.a==1.0) { c.a = 0.0; } + else { c.a = 1.0; } + } + else if(type==6) { + c = vec4(float((value>>8)&0xFFu) / 255.0); + if(c.a==1.0) { c.a = 0.0; } + else { c.a = 1.0; } + } + else if(type==7) { // RGBA4 + c.r = float((value>>12)&0xFu); + c.g = float((value>> 8)&0xFu); + c.b = float((value>> 4)&0xFu); + c.a = float((value>> 0)&0xFu); + c *= (1.0/15.0); + } + else if(type==8) { // low byte, low nibble + c = vec4(float(value&0xFu) / 15.0); + if(c.a==1.0) { c.a = 0.0; } + else { c.a = 1.0; } + } + else if(type==9) { // low byte, high nibble + c = vec4(float((value>>4)&0xFu) / 15.0); + if(c.a==1.0) { c.a = 0.0; } + else { c.a = 1.0; } + } + else if(type==10) { // high byte, low nibble + c = vec4(float((value>>8)&0xFu) / 15.0); + if(c.a==1.0) { c.a = 0.0; } + else { c.a = 1.0; } + } + else if(type==11) { // high byte, high nibble + c = vec4(float((value>>12)&0xFu) / 15.0); + if(c.a==1.0) { c.a = 0.0; } + else { c.a = 1.0; } + } + + return c; +} + +int GetPage(int yCoord) +{ + return yCoord / 1024; +} + +int GetNextPage(int yCoord) +{ + return (GetPage(yCoord) + 1) & 1; +} + +int GetNextPageOffset(int yCoord) +{ + return GetNextPage(yCoord) * 1024; +} + +// wrapping tex coords would be super easy but we combined tex sheets so have to handle wrap around between sheets +// hardware testing would be useful because i don't know exactly what happens if you try to read outside the texture sheet +// wrap around is a good guess +ivec2 WrapTexCoords(ivec2 pos, ivec2 coordinate) +{ + ivec2 newCoord; + + newCoord.x = coordinate.x & 2047; + newCoord.y = coordinate.y; + + int page = GetPage(pos.y); + + newCoord.y -= (page * 1024); // remove page + newCoord.y &= 1023; // wrap around in the same sheet + newCoord.y += (page * 1024); // add page back + + return newCoord; +} + +ivec2 GetTextureSize(int level, ivec2 size) +{ + int mipDivisor = 1 << level; + + return size / mipDivisor; +} + +ivec2 GetTexturePosition(int level, ivec2 pos) +{ + const int mipXBase[] = int[](0, 1024, 1536, 1792, 1920, 1984, 2016, 2032, 2040, 2044, 2046, 2047); + const int mipYBase[] = int[](0, 512, 768, 896, 960, 992, 1008, 1016, 1020, 1022, 1023); + + int mipDivisor = 1 << level; + + int page = pos.y / 1024; + pos.y -= (page * 1024); // remove page from tex y + + ivec2 retPos; + retPos.x = mipXBase[level] + (pos.x / mipDivisor); + retPos.y = mipYBase[level] + (pos.y / mipDivisor); + + retPos.y += (page * 1024); // add page back to tex y + + return retPos; +} + +ivec2 GetMicroTexturePos(int id) +{ + const int xCoords[8] = int[](0, 0, 128, 128, 0, 0, 128, 128); + const int yCoords[8] = int[](0, 128, 0, 128, 256, 384, 256, 384); + + return ivec2(xCoords[id],yCoords[id]); +} + +float mip_map_level(in vec2 texture_coordinate) // in texel units +{ + vec2 dx_vtc = dFdx(texture_coordinate); + vec2 dy_vtc = dFdy(texture_coordinate); + float delta_max_sqr = max(dot(dx_vtc, dx_vtc), dot(dy_vtc, dy_vtc)); + float mml = 0.5 * log2(delta_max_sqr); + return max( 0.0, mml ); +} + +float LinearTexLocations(int wrapMode, float size, float u, out float u0, out float u1) +{ + float texelSize = 1.0 / size; + float halfTexelSize = 0.5 / size; + + if(wrapMode==0) { // repeat + u = u * size - 0.5; + u0 = (floor(u) + 0.5) / size; // + 0.5 offset added to push us into the centre of a pixel, without we'll get rounding errors + u0 = fract(u0); + u1 = u0 + texelSize; + u1 = fract(u1); + + return fract(u); // return weight + } + else if(wrapMode==1) { // repeat + clamp + u = fract(u); // must force into 0-1 to start + u = u * size - 0.5; + u0 = (floor(u) + 0.5) / size; // + 0.5 offset added to push us into the centre of a pixel, without we'll get rounding errors + u1 = u0 + texelSize; + + if(u0 < 0.0) u0 = 0.0; + if(u1 >= 1.0) u1 = 1.0 - halfTexelSize; + + return fract(u); // return weight + } + else { // mirror + mirror clamp - both are the same since the edge pixels are repeated anyway + + float odd = floor(mod(u, 2.0)); // odd values are mirrored + + if(odd > 0.0) { + u = 1.0 - fract(u); + } + else { + u = fract(u); + } + + u = u * size - 0.5; + u0 = (floor(u) + 0.5) / size; // + 0.5 offset added to push us into the centre of a pixel, without we'll get rounding errors + u1 = u0 + texelSize; + + if(u0 < 0.0) u0 = 0.0; + if(u1 >= 1.0) u1 = 1.0 - halfTexelSize; + + return fract(u); // return weight + } +} + +vec4 texBiLinear(usampler2D texSampler, ivec2 wrapMode, vec2 texSize, ivec2 texPos, vec2 texCoord) +{ + float tx[2], ty[2]; + float a = LinearTexLocations(wrapMode.s, texSize.x, texCoord.x, tx[0], tx[1]); + float b = LinearTexLocations(wrapMode.t, texSize.y, texCoord.y, ty[0], ty[1]); + + vec4 p0q0 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[0],ty[0]) * texSize + texPos)), 0).r); + vec4 p1q0 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[1],ty[0]) * texSize + texPos)), 0).r); + vec4 p0q1 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[0],ty[1]) * texSize + texPos)), 0).r); + vec4 p1q1 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[1],ty[1]) * texSize + texPos)), 0).r); + + if(alphaTest) { + if(p0q0.a > p1q0.a) { p1q0.rgb = p0q0.rgb; } + if(p0q0.a > p0q1.a) { p0q1.rgb = p0q0.rgb; } + + if(p1q0.a > p0q0.a) { p0q0.rgb = p1q0.rgb; } + if(p1q0.a > p1q1.a) { p1q1.rgb = p1q0.rgb; } + + if(p0q1.a > p0q0.a) { p0q0.rgb = p0q1.rgb; } + if(p0q1.a > p1q1.a) { p1q1.rgb = p0q1.rgb; } + + if(p1q1.a > p0q1.a) { p0q1.rgb = p1q1.rgb; } + if(p1q1.a > p1q0.a) { p1q0.rgb = p1q1.rgb; } + } + + // Interpolation in X direction. + vec4 pInterp_q0 = mix( p0q0, p1q0, a ); // Interpolates top row in X direction. + vec4 pInterp_q1 = mix( p0q1, p1q1, a ); // Interpolates bottom row in X direction. + + return mix( pInterp_q0, pInterp_q1, b ); // Interpolate in Y direction. +} + +vec4 textureR3D(usampler2D texSampler, ivec2 wrapMode, ivec2 texSize, ivec2 texPos, vec2 texCoord) +{ + float numLevels = floor(log2(min(float(texSize.x), float(texSize.y)))); // r3d only generates down to 1:1 for square textures, otherwise its the min dimension + float fLevel = min(mip_map_level(texCoord * vec2(texSize)), numLevels); + + if(alphaTest) fLevel *= 0.5; + else fLevel *= 0.8; + + int iLevel = int(fLevel); + + ivec2 texPos0 = GetTexturePosition(iLevel,texPos); + ivec2 texPos1 = GetTexturePosition(iLevel+1,texPos); + + ivec2 texSize0 = GetTextureSize(iLevel, texSize); + ivec2 texSize1 = GetTextureSize(iLevel+1, texSize); + + vec4 texLevel0 = texBiLinear(texSampler, wrapMode, vec2(texSize0), texPos0, texCoord); + vec4 texLevel1 = texBiLinear(texSampler, wrapMode, vec2(texSize1), texPos1, texCoord); + + return mix(texLevel0, texLevel1, fract(fLevel)); // linear blend between our mipmap levels +} + +vec4 GetTextureValue() +{ + vec4 tex1Data = textureR3D(tex1, textureWrapMode, ivec2(baseTexInfo.zw), ivec2(baseTexInfo.xy), fsTexCoord); + + if(textureInverted) { + tex1Data.rgb = vec3(1.0) - vec3(tex1Data.rgb); + } + + if (microTexture) { + vec2 scale = (vec2(baseTexInfo.zw) / 128.0) * microTextureScale; + ivec2 pos = GetMicroTexturePos(microTextureID); + + // add page offset to microtexture position + pos.y += GetNextPageOffset(baseTexInfo.y); + + vec4 tex2Data = textureR3D(tex1, ivec2(0), ivec2(128), pos, fsTexCoord * scale); + + float lod = mip_map_level(fsTexCoord * scale * vec2(128.0)); + + float blendFactor = max(lod - 1.5, 0.0); // bias -1.5 + blendFactor = min(blendFactor, 1.0); // clamp to max value 1 + blendFactor = (blendFactor + 1.0) / 2.0; // 0.5 - 1 range + + tex1Data = mix(tex2Data, tex1Data, blendFactor); + } + + if (alphaTest) { + if (tex1Data.a < (32.0/255.0)) { + discard; + } + } + + if(textureAlpha) { + if(discardAlpha) { // opaque 1st pass + if (tex1Data.a < 1.0) { + discard; + } + } + else { // transparent 2nd pass + if ((tex1Data.a * fsColor.a) >= 1.0) { + discard; + } + } + } + + if (textureAlpha == false) { + tex1Data.a = 1.0; + } + + return tex1Data; +} + +void Step15Luminous(inout vec4 colour) +{ + // luminous polys seem to behave very differently on step 1.5 hardware + // when fixed shading is enabled the colour is modulated by the vp ambient + fixed shade value + // when disabled it appears to be multiplied by 1.5, presumably to allow a higher range + if(hardwareStep==0x15) { + if(!lightEnabled && textureEnabled) { + if(fixedShading) { + colour.rgb *= 1.0 + fsFixedShade + lighting[1].y; + } + else { + colour.rgb *= 1.5; + } + } + } +} + +float CalcFog() +{ + float z = -fsViewVertex.z; + float fog = fogIntensity * clamp(fogStart + z * fogDensity, 0.0, 1.0); + + return fog; +} + +void WriteOutputs(vec4 colour, int layer) +{ + vec4 blank = vec4(0.0); + + if(layer==LayerColour) { + out0 = colour; + out1 = blank; + out2 = blank; + } + else if(layer==LayerTrans0) { + out0 = blank; + out1 = colour; + out2 = blank; + } + else if(layer==LayerTrans1) { + out0 = blank; + out1 = blank; + out2 = colour; + } +} + +)glsl"; diff --git a/Src/Graphics/New3D/R3DShaderQuads.h b/Src/Graphics/New3D/R3DShaderQuads.h index 442a6c7..df3387e 100644 --- a/Src/Graphics/New3D/R3DShaderQuads.h +++ b/Src/Graphics/New3D/R3DShaderQuads.h @@ -7,6 +7,7 @@ static const char *vertexShaderR3DQuads = R"glsl( // uniforms uniform float modelScale; +uniform float nodeAlpha; uniform mat4 modelMat; uniform mat4 projMat; uniform bool translatorMap; @@ -39,6 +40,8 @@ vec4 GetColour(vec4 colour) c.rgb *= 16.0; } + c.a *= nodeAlpha; + return c; } @@ -168,7 +171,7 @@ void main(void) )glsl"; -static const char *fragmentShaderR3DQuads1 = R"glsl( +static const char *fragmentShaderR3DQuads = R"glsl( #version 450 core @@ -207,6 +210,7 @@ uniform float fogAttenuation; uniform float fogAmbient; uniform bool fixedShading; uniform int hardwareStep; +uniform int colourLayer; // matrices (shared with vertex shader) uniform mat4 projMat; @@ -235,7 +239,16 @@ float fsFixedShade; vec4 fsColor; //outputs -out vec4 outColor; +layout(location = 0) out vec4 out0; // opaque +layout(location = 1) out vec4 out1; // trans layer 1 +layout(location = 2) out vec4 out2; // trans layer 2 + +// forward declarations (see common file) + +float CalcFog(); +void Step15Luminous(inout vec4 colour); +vec4 GetTextureValue(); +void WriteOutputs(vec4 colour, int layer); void QuadraticInterpolation() { @@ -341,333 +354,6 @@ void QuadraticInterpolation() gl_FragDepth = depth * 0.5 + 0.5; } -vec4 ExtractColour(int type, uint value) -{ - vec4 c = vec4(0.0); - - if(type==0) { // T1RGB5 - c.r = float((value >> 10) & 0x1Fu); - c.g = float((value >> 5 ) & 0x1Fu); - c.b = float((value ) & 0x1Fu); - c.rgb *= (1.0/31.0); - c.a = 1.0 - float((value >> 15) & 0x1u); - } - else if(type==1) { // Interleaved A4L4 (low byte) - c.rgb = vec3(float(value&0xFu)); - c.a = float((value >> 4) & 0xFu); - c *= (1.0/15.0); - } - else if(type==2) { - c.a = float(value&0xFu); - c.rgb = vec3(float((value >> 4) & 0xFu)); - c *= (1.0/15.0); - } - else if(type==3) { - c.rgb = vec3(float((value>>8)&0xFu)); - c.a = float((value >> 12) & 0xFu); - c *= (1.0/15.0); - } - else if(type==4) { - c.a = float((value>>8)&0xFu); - c.rgb = vec3(float((value >> 12) & 0xFu)); - c *= (1.0/15.0); - } - else if(type==5) { - c = vec4(float(value&0xFFu) / 255.0); - if(c.a==1.0) { c.a = 0.0; } - else { c.a = 1.0; } - } - else if(type==6) { - c = vec4(float((value>>8)&0xFFu) / 255.0); - if(c.a==1.0) { c.a = 0.0; } - else { c.a = 1.0; } - } - else if(type==7) { // RGBA4 - c.r = float((value>>12)&0xFu); - c.g = float((value>> 8)&0xFu); - c.b = float((value>> 4)&0xFu); - c.a = float((value>> 0)&0xFu); - c *= (1.0/15.0); - } - else if(type==8) { // low byte, low nibble - c = vec4(float(value&0xFu) / 15.0); - if(c.a==1.0) { c.a = 0.0; } - else { c.a = 1.0; } - } - else if(type==9) { // low byte, high nibble - c = vec4(float((value>>4)&0xFu) / 15.0); - if(c.a==1.0) { c.a = 0.0; } - else { c.a = 1.0; } - } - else if(type==10) { // high byte, low nibble - c = vec4(float((value>>8)&0xFu) / 15.0); - if(c.a==1.0) { c.a = 0.0; } - else { c.a = 1.0; } - } - else if(type==11) { // high byte, high nibble - c = vec4(float((value>>12)&0xFu) / 15.0); - if(c.a==1.0) { c.a = 0.0; } - else { c.a = 1.0; } - } - - return c; -} - -ivec2 GetTexturePosition(int level, ivec2 pos) -{ - const int mipXBase[] = { 0, 1024, 1536, 1792, 1920, 1984, 2016, 2032, 2040, 2044, 2046, 2047 }; - const int mipYBase[] = { 0, 512, 768, 896, 960, 992, 1008, 1016, 1020, 1022, 1023 }; - - int mipDivisor = 1 << level; - - int page = pos.y / 1024; - pos.y -= (page * 1024); // remove page from tex y - - ivec2 retPos; - retPos.x = mipXBase[level] + (pos.x / mipDivisor); - retPos.y = mipYBase[level] + (pos.y / mipDivisor); - - retPos.y += (page * 1024); // add page back to tex y - - return retPos; -} - -ivec2 GetTextureSize(int level, ivec2 size) -{ - int mipDivisor = 1 << level; - - return size / mipDivisor; -} - -ivec2 GetMicroTexturePos(int id) -{ - int xCoords[8] = { 0, 0, 128, 128, 0, 0, 128, 128 }; - int yCoords[8] = { 0, 128, 0, 128, 256, 384, 256, 384 }; - - return ivec2(xCoords[id],yCoords[id]); -} - -int GetPage(int yCoord) -{ - return yCoord / 1024; -} - -int GetNextPage(int yCoord) -{ - return (GetPage(yCoord) + 1) & 1; -} - -int GetNextPageOffset(int yCoord) -{ - return GetNextPage(yCoord) * 1024; -} - -// wrapping tex coords would be super easy but we combined tex sheets so have to handle wrap around between sheets -// hardware testing would be useful because i don't know exactly what happens if you try to read outside the texture sheet -// wrap around is a good guess -ivec2 WrapTexCoords(ivec2 pos, ivec2 coordinate) -{ - ivec2 newCoord; - - newCoord.x = coordinate.x & 2047; - newCoord.y = coordinate.y; - - int page = GetPage(pos.y); - - newCoord.y -= (page * 1024); // remove page - newCoord.y &= 1023; // wrap around in the same sheet - newCoord.y += (page * 1024); // add page back - - return newCoord; -} - -float mip_map_level(in vec2 texture_coordinate) // in texel units -{ - vec2 dx_vtc = dFdx(texture_coordinate); - vec2 dy_vtc = dFdy(texture_coordinate); - float delta_max_sqr = max(dot(dx_vtc, dx_vtc), dot(dy_vtc, dy_vtc)); - float mml = 0.5 * log2(delta_max_sqr); - return max( 0.0, mml ); -} - -float LinearTexLocations(int wrapMode, float size, float u, out float u0, out float u1) -{ - float texelSize = 1.0 / size; - float halfTexelSize = 0.5 / size; - - if(wrapMode==0) { // repeat - u = u * size - 0.5; - u0 = (floor(u) + 0.5) / size; // + 0.5 offset added to push us into the centre of a pixel, without we'll get rounding errors - u0 = fract(u0); - u1 = u0 + texelSize; - u1 = fract(u1); - - return fract(u); // return weight - } - else if(wrapMode==1) { // repeat + clamp - u = fract(u); // must force into 0-1 to start - u = u * size - 0.5; - u0 = (floor(u) + 0.5) / size; // + 0.5 offset added to push us into the centre of a pixel, without we'll get rounding errors - u1 = u0 + texelSize; - - if(u0 < 0.0) u0 = 0.0; - if(u1 >= 1.0) u1 = 1.0 - halfTexelSize; - - return fract(u); // return weight - } - else { // mirror + mirror clamp - both are the same since the edge pixels are repeated anyway - - float odd = floor(mod(u, 2.0)); // odd values are mirrored - - if(odd > 0.0) { - u = 1.0 - fract(u); - } - else { - u = fract(u); - } - - u = u * size - 0.5; - u0 = (floor(u) + 0.5) / size; // + 0.5 offset added to push us into the centre of a pixel, without we'll get rounding errors - u1 = u0 + texelSize; - - if(u0 < 0.0) u0 = 0.0; - if(u1 >= 1.0) u1 = 1.0 - halfTexelSize; - - return fract(u); // return weight - } -} - -vec4 texBiLinear(usampler2D texSampler, ivec2 wrapMode, vec2 texSize, ivec2 texPos, vec2 texCoord) -{ - float tx[2], ty[2]; - float a = LinearTexLocations(wrapMode.s, texSize.x, texCoord.x, tx[0], tx[1]); - float b = LinearTexLocations(wrapMode.t, texSize.y, texCoord.y, ty[0], ty[1]); - - vec4 p0q0 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[0],ty[0]) * texSize + texPos)), 0).r); - vec4 p1q0 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[1],ty[0]) * texSize + texPos)), 0).r); - vec4 p0q1 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[0],ty[1]) * texSize + texPos)), 0).r); - vec4 p1q1 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[1],ty[1]) * texSize + texPos)), 0).r); - - if(alphaTest) { - if(p0q0.a > p1q0.a) { p1q0.rgb = p0q0.rgb; } - if(p0q0.a > p0q1.a) { p0q1.rgb = p0q0.rgb; } - - if(p1q0.a > p0q0.a) { p0q0.rgb = p1q0.rgb; } - if(p1q0.a > p1q1.a) { p1q1.rgb = p1q0.rgb; } - - if(p0q1.a > p0q0.a) { p0q0.rgb = p0q1.rgb; } - if(p0q1.a > p1q1.a) { p1q1.rgb = p0q1.rgb; } - - if(p1q1.a > p0q1.a) { p0q1.rgb = p1q1.rgb; } - if(p1q1.a > p1q0.a) { p1q0.rgb = p1q1.rgb; } - } - - // Interpolation in X direction. - vec4 pInterp_q0 = mix( p0q0, p1q0, a ); // Interpolates top row in X direction. - vec4 pInterp_q1 = mix( p0q1, p1q1, a ); // Interpolates bottom row in X direction. - - return mix( pInterp_q0, pInterp_q1, b ); // Interpolate in Y direction. -} - -vec4 textureR3D(usampler2D texSampler, ivec2 wrapMode, ivec2 texSize, ivec2 texPos, vec2 texCoord) -{ - float numLevels = floor(log2(min(float(texSize.x), float(texSize.y)))); // r3d only generates down to 1:1 for square textures, otherwise its the min dimension - float fLevel = min(mip_map_level(texCoord * vec2(texSize)), numLevels); - - if(alphaTest) fLevel *= 0.5; - else fLevel *= 0.8; - - int iLevel = int(fLevel); - - ivec2 texPos0 = GetTexturePosition(iLevel,texPos); - ivec2 texPos1 = GetTexturePosition(iLevel+1,texPos); - - ivec2 texSize0 = GetTextureSize(iLevel, texSize); - ivec2 texSize1 = GetTextureSize(iLevel+1, texSize); - - vec4 texLevel0 = texBiLinear(texSampler, wrapMode, vec2(texSize0), texPos0, texCoord); - vec4 texLevel1 = texBiLinear(texSampler, wrapMode, vec2(texSize1), texPos1, texCoord); - - return mix(texLevel0, texLevel1, fract(fLevel)); // linear blend between our mipmap levels -} - -vec4 GetTextureValue() -{ - vec4 tex1Data = textureR3D(tex1, textureWrapMode, ivec2(baseTexInfo.zw), ivec2(baseTexInfo.xy), fsTexCoord); - - if(textureInverted) { - tex1Data.rgb = vec3(1.0) - vec3(tex1Data.rgb); - } - - if (microTexture) { - vec2 scale = (vec2(baseTexInfo.zw) / 128.0) * microTextureScale; - ivec2 pos = GetMicroTexturePos(microTextureID); - - // add page offset to microtexture position - pos.y += GetNextPageOffset(baseTexInfo.y); - - vec4 tex2Data = textureR3D(tex1, ivec2(0), ivec2(128), pos, fsTexCoord * scale); - - float lod = mip_map_level(fsTexCoord * scale * vec2(128.0)); - - float blendFactor = max(lod - 1.5, 0.0); // bias -1.5 - blendFactor = min(blendFactor, 1.0); // clamp to max value 1 - blendFactor = (blendFactor + 1.0) / 2.0; // 0.5 - 1 range - - tex1Data = mix(tex2Data, tex1Data, blendFactor); - } - - if (alphaTest) { - if (tex1Data.a < (32.0/255.0)) { - discard; - } - } - - if(textureAlpha) { - if(discardAlpha) { // opaque 1st pass - if (tex1Data.a < 1.0) { - discard; - } - } - else { // transparent 2nd pass - if ((tex1Data.a * fsColor.a) >= 1.0) { - discard; - } - } - } - - if (textureAlpha == false) { - tex1Data.a = 1.0; - } - - return tex1Data; -} - -void Step15Luminous(inout vec4 colour) -{ - // luminous polys seem to behave very differently on step 1.5 hardware - // when fixed shading is enabled the colour is modulated by the vp ambient + fixed shade value - // when disabled it appears to be multiplied by 1.5, presumably to allow a higher range - if(hardwareStep==0x15) { - if(!lightEnabled && textureEnabled) { - if(fixedShading) { - colour.rgb *= 1.0 + fsFixedShade + lighting[1].y; - } - else { - colour.rgb *= 1.5; - } - } - } -} - -float CalcFog() -{ - float z = -fsViewVertex.z; - float fog = fogIntensity * clamp(fogStart + z * fogDensity, 0.0, 1.0); - - return fog; -} - float sqr(float a) { return a*a; @@ -678,10 +364,6 @@ float sqr_length(vec2 a) return a.x*a.x + a.y*a.y; } -)glsl"; - -static const char* fragmentShaderR3DQuads2 = R"glsl( - void main() { vec4 tex1Data; @@ -814,8 +496,8 @@ void main() // Fog & spotlight applied finalData.rgb = mix(finalData.rgb, fogData.rgb + lSpotFogColor, fogData.a); - // Write output - outColor = finalData; + // Write outputs to colour buffers + WriteOutputs(finalData,colourLayer); } )glsl"; diff --git a/Src/Graphics/New3D/R3DShaderTriangles.h b/Src/Graphics/New3D/R3DShaderTriangles.h index 3c31411..e79638a 100644 --- a/Src/Graphics/New3D/R3DShaderTriangles.h +++ b/Src/Graphics/New3D/R3DShaderTriangles.h @@ -7,6 +7,7 @@ static const char *vertexShaderR3D = R"glsl( // uniforms uniform float modelScale; +uniform float nodeAlpha; uniform mat4 modelMat; uniform mat4 projMat; uniform bool translatorMap; @@ -35,6 +36,8 @@ vec4 GetColour(vec4 colour) c.rgb *= 16.0; } + c.a *= nodeAlpha; + return c; } @@ -98,6 +101,7 @@ uniform float fogAttenuation; uniform float fogAmbient; uniform bool fixedShading; uniform int hardwareStep; +uniform int colourLayer; //interpolated inputs from vertex shader in vec3 fsViewVertex; @@ -108,334 +112,16 @@ in float fsDiscard; in float fsFixedShade; //outputs -out vec4 outColor; +layout(location = 0) out vec4 out0; // opaque +layout(location = 1) out vec4 out1; // trans layer 1 +layout(location = 2) out vec4 out2; // trans layer 2 -vec4 ExtractColour(int type, uint value) -{ - vec4 c = vec4(0.0); +// forward declarations (see common file) - if(type==0) { // T1RGB5 - c.r = float((value >> 10) & 0x1Fu); - c.g = float((value >> 5 ) & 0x1Fu); - c.b = float((value ) & 0x1Fu); - c.rgb *= (1.0/31.0); - c.a = 1.0 - float((value >> 15) & 0x1u); - } - else if(type==1) { // Interleaved A4L4 (low byte) - c.rgb = vec3(float(value&0xFu)); - c.a = float((value >> 4) & 0xFu); - c *= (1.0/15.0); - } - else if(type==2) { - c.a = float(value&0xFu); - c.rgb = vec3(float((value >> 4) & 0xFu)); - c *= (1.0/15.0); - } - else if(type==3) { - c.rgb = vec3(float((value>>8)&0xFu)); - c.a = float((value >> 12) & 0xFu); - c *= (1.0/15.0); - } - else if(type==4) { - c.a = float((value>>8)&0xFu); - c.rgb = vec3(float((value >> 12) & 0xFu)); - c *= (1.0/15.0); - } - else if(type==5) { - c = vec4(float(value&0xFFu) / 255.0); - if(c.a==1.0) { c.a = 0.0; } - else { c.a = 1.0; } - } - else if(type==6) { - c = vec4(float((value>>8)&0xFFu) / 255.0); - if(c.a==1.0) { c.a = 0.0; } - else { c.a = 1.0; } - } - else if(type==7) { // RGBA4 - c.r = float((value>>12)&0xFu); - c.g = float((value>> 8)&0xFu); - c.b = float((value>> 4)&0xFu); - c.a = float((value>> 0)&0xFu); - c *= (1.0/15.0); - } - else if(type==8) { // low byte, low nibble - c = vec4(float(value&0xFu) / 15.0); - if(c.a==1.0) { c.a = 0.0; } - else { c.a = 1.0; } - } - else if(type==9) { // low byte, high nibble - c = vec4(float((value>>4)&0xFu) / 15.0); - if(c.a==1.0) { c.a = 0.0; } - else { c.a = 1.0; } - } - else if(type==10) { // high byte, low nibble - c = vec4(float((value>>8)&0xFu) / 15.0); - if(c.a==1.0) { c.a = 0.0; } - else { c.a = 1.0; } - } - else if(type==11) { // high byte, high nibble - c = vec4(float((value>>12)&0xFu) / 15.0); - if(c.a==1.0) { c.a = 0.0; } - else { c.a = 1.0; } - } - - return c; -} - -ivec2 GetTexturePosition(int level, ivec2 pos) -{ - const int mipXBase[] = int[](0, 1024, 1536, 1792, 1920, 1984, 2016, 2032, 2040, 2044, 2046, 2047); - const int mipYBase[] = int[](0, 512, 768, 896, 960, 992, 1008, 1016, 1020, 1022, 1023); - - int mipDivisor = 1 << level; - - int page = pos.y / 1024; - pos.y -= (page * 1024); // remove page from tex y - - ivec2 retPos; - retPos.x = mipXBase[level] + (pos.x / mipDivisor); - retPos.y = mipYBase[level] + (pos.y / mipDivisor); - - retPos.y += (page * 1024); // add page back to tex y - - return retPos; -} - -ivec2 GetTextureSize(int level, ivec2 size) -{ - int mipDivisor = 1 << level; - - return size / mipDivisor; -} - -ivec2 GetMicroTexturePos(int id) -{ - const int xCoords[8] = int[](0, 0, 128, 128, 0, 0, 128, 128); - const int yCoords[8] = int[](0, 128, 0, 128, 256, 384, 256, 384); - - return ivec2(xCoords[id],yCoords[id]); -} - -int GetPage(int yCoord) -{ - return yCoord / 1024; -} - -int GetNextPage(int yCoord) -{ - return (GetPage(yCoord) + 1) & 1; -} - -int GetNextPageOffset(int yCoord) -{ - return GetNextPage(yCoord) * 1024; -} - -// wrapping tex coords would be super easy but we combined tex sheets so have to handle wrap around between sheets -// hardware testing would be useful because i don't know exactly what happens if you try to read outside the texture sheet -// wrap around is a good guess -ivec2 WrapTexCoords(ivec2 pos, ivec2 coordinate) -{ - ivec2 newCoord; - - newCoord.x = coordinate.x & 2047; - newCoord.y = coordinate.y; - - int page = GetPage(pos.y); - - newCoord.y -= (page * 1024); // remove page - newCoord.y &= 1023; // wrap around in the same sheet - newCoord.y += (page * 1024); // add page back - - return newCoord; -} - -float mip_map_level(in vec2 texture_coordinate) // in texel units -{ - vec2 dx_vtc = dFdx(texture_coordinate); - vec2 dy_vtc = dFdy(texture_coordinate); - float delta_max_sqr = max(dot(dx_vtc, dx_vtc), dot(dy_vtc, dy_vtc)); - float mml = 0.5 * log2(delta_max_sqr); - return max( 0, mml ); -} - -float LinearTexLocations(int wrapMode, float size, float u, out float u0, out float u1) -{ - float texelSize = 1.0 / size; - float halfTexelSize = 0.5 / size; - - if(wrapMode==0) { // repeat - u = (u * size) - 0.5; - u0 = (floor(u) + 0.5) / size; // + 0.5 offset added to push us into the centre of a pixel, without we'll get rounding errors - u0 = fract(u0); - u1 = u0 + texelSize; - u1 = fract(u1); - - return fract(u); // return weight - } - else if(wrapMode==1) { // repeat + clamp - u = fract(u); // must force into 0-1 to start - u = (u * size) - 0.5; - u0 = (floor(u) + 0.5) / size; // + 0.5 offset added to push us into the centre of a pixel, without we'll get rounding errors - u1 = u0 + texelSize; - - if(u0 < 0.0) u0 = 0.0; - if(u1 >= 1.0) u1 = 1.0 - halfTexelSize; - - return fract(u); // return weight - } - else { // mirror + mirror clamp - both are the same since the edge pixels are repeated anyway - - float odd = floor(mod(u, 2.0)); // odd values are mirrored - - if(odd > 0.0) { - u = 1.0 - fract(u); - } - else { - u = fract(u); - } - - u = (u * size) - 0.5; - u0 = (floor(u) + 0.5) / size; // + 0.5 offset added to push us into the centre of a pixel, without we'll get rounding errors - u1 = u0 + texelSize; - - if(u0 < 0.0) u0 = 0.0; - if(u1 >= 1.0) u1 = 1.0 - halfTexelSize; - - return fract(u); // return weight - } -} - -vec4 texBiLinear(usampler2D texSampler, ivec2 wrapMode, vec2 texSize, ivec2 texPos, vec2 texCoord) -{ - float tx[2], ty[2]; - float a = LinearTexLocations(wrapMode.s, texSize.x, texCoord.x, tx[0], tx[1]); - float b = LinearTexLocations(wrapMode.t, texSize.y, texCoord.y, ty[0], ty[1]); - - vec4 p0q0 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[0],ty[0]) * texSize + texPos)), 0).r); - vec4 p1q0 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[1],ty[0]) * texSize + texPos)), 0).r); - vec4 p0q1 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[0],ty[1]) * texSize + texPos)), 0).r); - vec4 p1q1 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[1],ty[1]) * texSize + texPos)), 0).r); - - if(alphaTest) { - if(p0q0.a > p1q0.a) { p1q0.rgb = p0q0.rgb; } - if(p0q0.a > p0q1.a) { p0q1.rgb = p0q0.rgb; } - - if(p1q0.a > p0q0.a) { p0q0.rgb = p1q0.rgb; } - if(p1q0.a > p1q1.a) { p1q1.rgb = p1q0.rgb; } - - if(p0q1.a > p0q0.a) { p0q0.rgb = p0q1.rgb; } - if(p0q1.a > p1q1.a) { p1q1.rgb = p0q1.rgb; } - - if(p1q1.a > p0q1.a) { p0q1.rgb = p1q1.rgb; } - if(p1q1.a > p1q0.a) { p1q0.rgb = p1q1.rgb; } - } - - // Interpolation in X direction. - vec4 pInterp_q0 = mix( p0q0, p1q0, a ); // Interpolates top row in X direction. - vec4 pInterp_q1 = mix( p0q1, p1q1, a ); // Interpolates bottom row in X direction. - - return mix( pInterp_q0, pInterp_q1, b ); // Interpolate in Y direction. -} - -vec4 textureR3D(usampler2D texSampler, ivec2 wrapMode, ivec2 texSize, ivec2 texPos, vec2 texCoord) -{ - float numLevels = floor(log2(min(float(texSize.x), float(texSize.y)))); // r3d only generates down to 1:1 for square textures, otherwise its the min dimension - float fLevel = min(mip_map_level(texCoord * vec2(texSize)), numLevels); - - if(alphaTest) fLevel *= 0.5; - else fLevel *= 0.8; - - int iLevel = int(fLevel); - - ivec2 texPos0 = GetTexturePosition(iLevel,texPos); - ivec2 texPos1 = GetTexturePosition(iLevel+1,texPos); - - ivec2 texSize0 = GetTextureSize(iLevel, texSize); - ivec2 texSize1 = GetTextureSize(iLevel+1, texSize); - - vec4 texLevel0 = texBiLinear(texSampler, wrapMode, vec2(texSize0), texPos0, texCoord); - vec4 texLevel1 = texBiLinear(texSampler, wrapMode, vec2(texSize1), texPos1, texCoord); - - return mix(texLevel0, texLevel1, fract(fLevel)); // linear blend between our mipmap levels -} - -vec4 GetTextureValue() -{ - vec4 tex1Data = textureR3D(tex1, textureWrapMode, ivec2(baseTexInfo.zw), ivec2(baseTexInfo.xy), fsTexCoord); - - if(textureInverted) { - tex1Data.rgb = vec3(1.0) - vec3(tex1Data.rgb); - } - - if (microTexture) { - vec2 scale = (vec2(baseTexInfo.zw) / 128.0) * microTextureScale; - ivec2 pos = GetMicroTexturePos(microTextureID); - - // add page offset to microtexture position - pos.y += GetNextPageOffset(baseTexInfo.y); - - vec4 tex2Data = textureR3D(tex1, ivec2(0), ivec2(128), pos, fsTexCoord * scale); - - float lod = mip_map_level(fsTexCoord * scale * vec2(128.0)); - - float blendFactor = max(lod - 1.5, 0.0); // bias -1.5 - blendFactor = min(blendFactor, 1.0); // clamp to max value 1 - blendFactor = (blendFactor + 1.0) / 2.0; // 0.5 - 1 range - - tex1Data = mix(tex2Data, tex1Data, blendFactor); - } - - if (alphaTest) { - if (tex1Data.a < (32.0/255.0)) { - discard; - } - } - - if(textureAlpha) { - if(discardAlpha) { // opaque 1st pass - if (tex1Data.a < 1.0) { - discard; - } - } - else { // transparent 2nd pass - if ((tex1Data.a * fsColor.a) >= 1.0) { - discard; - } - } - } - - if (textureAlpha == false) { - tex1Data.a = 1.0; - } - - return tex1Data; -} - -void Step15Luminous(inout vec4 colour) -{ - // luminous polys seem to behave very differently on step 1.5 hardware - // when fixed shading is enabled the colour is modulated by the vp ambient + fixed shade value - // when disabled it appears to be multiplied by 1.5, presumably to allow a higher range - if(hardwareStep==0x15) { - if(!lightEnabled && textureEnabled) { - if(fixedShading) { - colour.rgb *= 1.0 + fsFixedShade + lighting[1].y; - } - else { - colour.rgb *= vec3(1.5); - } - } - } -} - -float CalcFog() -{ - float z = -fsViewVertex.z; - float fog = fogIntensity * clamp(fogStart + z * fogDensity, 0.0, 1.0); - - return fog; -} +float CalcFog(); +void Step15Luminous(inout vec4 colour); +vec4 GetTextureValue(); +void WriteOutputs(vec4 colour, int layer); void main() { @@ -572,7 +258,8 @@ void main() // Fog & spotlight applied finalData.rgb = mix(finalData.rgb, fogData.rgb + lSpotFogColor, fogData.a); - outColor = finalData; + // Write outputs to colour buffers + WriteOutputs(finalData,colourLayer); } )glsl"; diff --git a/Src/Model3/TileGen.cpp b/Src/Model3/TileGen.cpp index 9cc2db5..385fa82 100644 --- a/Src/Model3/TileGen.cpp +++ b/Src/Model3/TileGen.cpp @@ -48,6 +48,7 @@ * - For consistency, the registers should probably be byte reversed (this is a * little endian device), forcing the Model3 Read32/Write32 handlers to * manually reverse the data. This keeps with the convention for VRAM. + * Need to finish ripping out code that no longer does anything. Removed a lot but there's still more. */ #include "TileGen.h" @@ -109,9 +110,6 @@ void CTileGen::LoadState(CBlockFile *SaveState) } SaveState->Read(regs, sizeof(regs)); - // Because regs were read after palette, must recompute - RecomputePalettes(); - // If multi-threaded, update read-only snapshots too if (m_gpuMultiThreaded) UpdateSnapshots(true); @@ -143,34 +141,8 @@ void CTileGen::EndVBlank(void) // } -void CTileGen::RecomputePalettes(void) -{ - // Writing the colors forces palettes to be computed - if (m_gpuMultiThreaded) - { - for (unsigned colorAddr = 0; colorAddr < 32768*4; colorAddr += 4 ) - { - MARK_DIRTY(palDirty[0], colorAddr); - MARK_DIRTY(palDirty[1], colorAddr); - WritePalette(colorAddr/4, *(UINT32 *) &vram[0x100000+colorAddr]); - } - } - else - { - for (unsigned colorAddr = 0; colorAddr < 32768*4; colorAddr += 4 ) - WritePalette(colorAddr/4, *(UINT32 *) &vram[0x100000+colorAddr]); - } -} - UINT32 CTileGen::SyncSnapshots(void) { - // Good time to recompute the palettes - if (recomputePalettes) - { - RecomputePalettes(); - recomputePalettes = false; - } - if (!m_gpuMultiThreaded) return 0; @@ -279,23 +251,6 @@ void CTileGen::WriteRAM32(unsigned addr, UINT32 data) if (m_gpuMultiThreaded) MARK_DIRTY(vramDirty, addr); *(UINT32 *) &vram[addr] = data; - - // Update palette if required - if (addr >= 0x100000) - { - addr -= 0x100000; - unsigned color = addr/4; // color index - - // Same address in both palettes must be marked dirty - if (m_gpuMultiThreaded) - { - MARK_DIRTY(palDirty[0], addr); - MARK_DIRTY(palDirty[1], addr); - } - - // Both palettes will be modified simultaneously - WritePalette(color, data); - } } //TODO: 8- and 16-bit handlers have not been thoroughly tested @@ -330,76 +285,6 @@ void CTileGen::WriteRAM16(unsigned addr, uint16_t data) WriteRAM32(addr & ~1, tmp); } -void CTileGen::InitPalette(void) -{ - for (int i = 0; i < 0x20000/4; i++) - { - WritePalette(i, *(UINT32 *) &vram[0x100000 + i*4]); - if (m_gpuMultiThreaded) - { - palRO[0][i] = pal[0][i]; - palRO[1][i] = pal[1][i]; - } - } -} - -static inline UINT32 AddColorOffset(UINT8 r, UINT8 g, UINT8 b, UINT8 a, UINT32 offsetReg) -{ - INT32 ir, ig, ib; - - /* - * Color offsets are signed but I'm not sure whether or not their range is - * merely [-128,+127], which would mean adding to a 0 component would not - * result full intensity (only +127 at most). Alternatively, the signed - * value might have to be multiplied by 2. That is assumed here. In either - * case, the signed addition should be saturated. - */ - - ib = (INT32) (INT8)((offsetReg>>16)&0xFF); - ig = (INT32) (INT8)((offsetReg>>8)&0xFF); - ir = (INT32) (INT8)((offsetReg>>0)&0xFF); - ib *= 2; - ig *= 2; - ir *= 2; - - // Add with saturation - ib += (INT32) (UINT32) b; - if (ib < 0) ib = 0; - else if (ib > 0xFF) ib = 0xFF; - ig += (INT32) (UINT32) g; - if (ig < 0) ig = 0; - else if (ig > 0xFF) ig = 0xFF; - ir += (INT32) (UINT32) r; - if (ir < 0) ir = 0; - else if (ir > 0xFF) ir = 0xFF; - - // Construct the final 32-bit ABGR-format color - r = (UINT8) ir; - g = (UINT8) ig; - b = (UINT8) ib; - return ((UINT32)a<<24)|((UINT32)b<<16)|((UINT32)g<<8)|(UINT32)r; -} - -void CTileGen::WritePalette(unsigned color, UINT32 data) -{ - UINT8 r, g, b, a; - - a = 0xFF * ((data>>15)&1); // decode the RGBA (make alpha 0xFF or 0x00) - a = ~a; // invert it (set on Model 3 means clear pixel) - - if ((data&0x8000)) - r = g = b = 0; - else - { - b = (((data >> 10) & 0x1F) * 255) / 31; - g = (((data >> 5) & 0x1F) * 255) / 31; - r = ((data & 0x1F) * 255) / 31; - } - - pal[0][color] = AddColorOffset(r, g, b, a, regs[0x40/4]); // A/A' - pal[1][color] = AddColorOffset(r, g, b, a, regs[0x44/4]); // B/B' -} - UINT32 CTileGen::ReadRegister(unsigned reg) { reg &= 0xFF; @@ -423,11 +308,6 @@ void CTileGen::WriteRegister(unsigned reg, UINT32 data) break; case 0x40: // layer A/A' color offset case 0x44: // layer B/B' color offset - // We only have a mechanism to recompute both palettes simultaneously. - // These regs are often written together in the same frame. To avoid - // needlessly recomputing both palettes twice, we defer the operation. - if (regs[reg/4] != data) // only if changed - recomputePalettes = true; break; case 0x10: // IRQ acknowledge IRQ->Deassert(data&0xFF); @@ -450,9 +330,6 @@ void CTileGen::Reset(void) memset(memoryPool, 0, memSize); memset(regs, 0, sizeof(regs)); memset(regsRO, 0, sizeof(regsRO)); - - InitPalette(); - recomputePalettes = false; DebugLog("Tile Generator reset\n"); } diff --git a/Src/Model3/TileGen.h b/Src/Model3/TileGen.h index c1c8a87..fa9dfb2 100644 --- a/Src/Model3/TileGen.h +++ b/Src/Model3/TileGen.h @@ -254,9 +254,6 @@ public: private: // Private member functions - void RecomputePalettes(void); - void InitPalette(void); - void WritePalette(unsigned color, UINT32 data); UINT32 UpdateSnapshots(bool copyWhole); UINT32 UpdateSnapshot(bool copyWhole, UINT8 *src, UINT8 *dst, unsigned size, UINT8 *dirty); @@ -274,7 +271,6 @@ private: UINT8 *memoryPool; // all memory allocated here UINT8 *vram; // 1.125MB of VRAM UINT32 *pal[2]; // 2 x 0x20000 byte (32K colors) palette - bool recomputePalettes; // whether to recompute palettes A/A' and B/B' during sync // Read-only snapshots UINT8 *vramRO; // 1.125MB of VRAM [read-only snapshot] diff --git a/VS2008/Supermodel.vcxproj b/VS2008/Supermodel.vcxproj index 23fa4bb..17baa3c 100644 --- a/VS2008/Supermodel.vcxproj +++ b/VS2008/Supermodel.vcxproj @@ -495,6 +495,7 @@ xcopy /D /Y "$(ProjectDir)..\Assets\*" "$(TargetDir)Assets" + diff --git a/VS2008/Supermodel.vcxproj.filters b/VS2008/Supermodel.vcxproj.filters index a56ee68..8c2a4cd 100644 --- a/VS2008/Supermodel.vcxproj.filters +++ b/VS2008/Supermodel.vcxproj.filters @@ -853,6 +853,9 @@ Header Files\Graphics + + Header Files\Graphics\New +