Rewrite the renderer a bit to spit out the finished graphics from the 3d chip on separate buffers. One buffer is for opaque pixels, and 2 more for translucent pixels. Before the frame was composited on the back buffer, which meant the tilegen had to have been drawn first. This way the images are now totally independant of the tilegen chip so can be drawn as soon as the register write 0xC is written to the tilegen.

Some games update the tilegen after the ping_ping bit has flipped at 66% of the frame, so we need to split the tilegen drawing up into two stages to get some effects to work. So having the tilegen draw independantly of the 3d chip can make this happen.
2024-11-22 05:45:38 +00:00 · 2023-10-14 20:05:00 +01:00 · 2023-10-14 20:05:00 +01:00 · 6595b9320e
parent d656643087
commit 6595b9320e
17 changed files with 510 additions and 1004 deletions
--- a/Src/Graphics/Legacy3D/Models.cpp
+++ b/Src/Graphics/Legacy3D/Models.cpp
@ -725,7 +725,7 @@ void CLegacy3D::InsertVertex(ModelCache *Cache, const Vertex *V, const Poly *P,
  // Specular shininess
  GLfloat specularCoefficient = (GLfloat) ((P->header[0]>>26) & 0x3F) * (1.0f/63.0f);
  int shinyBits = (P->header[6] >> 5) & 3;
-  float shininess = std::exp2f(1 + shinyBits);
+  float shininess = std::exp2f(1.0f + shinyBits);
  if (!(P->header[0]&0x80)) //|| (shininess == 0)) // bit 0x80 seems to enable specular lighting
  {
    specularCoefficient = 0.; // disable
--- a/Src/Graphics/New3D/Model.cpp
+++ b/Src/Graphics/New3D/Model.cpp
@ -9,6 +9,7 @@ NodeAttributes::NodeAttributes()
 	currentPage			= 0;
 	currentClipStatus	= Clip::INTERCEPT;
 	currentModelScale	= 1.0f;
+	currentModelAlpha	= 1.0;
 }

 bool NodeAttributes::Push()
@ -27,6 +28,7 @@ bool NodeAttributes::Push()
 	na.texOffsetY	= currentTexOffsetY;
 	na.clip			= currentClipStatus;
 	na.modelScale	= currentModelScale;
+	na.modelAlpha	= currentModelAlpha;

 	m_vecAttribs.emplace_back(na);

@ -46,6 +48,7 @@ bool NodeAttributes::Pop()
 	currentTexOffsetY	= last.texOffsetY;
 	currentClipStatus	= last.clip;
 	currentModelScale	= last.modelScale;
+	currentModelAlpha	= last.modelAlpha;

 	m_vecAttribs.pop_back();

@ -64,6 +67,7 @@ void NodeAttributes::Reset()
 	currentTexOffsetY	= 0;
 	currentClipStatus	= Clip::INTERCEPT;
 	currentModelScale	= 1.0f;
+	currentModelAlpha	= 1.0f;

 	m_vecAttribs.clear();
 }
--- a/Src/Graphics/New3D/Model.h
+++ b/Src/Graphics/New3D/Model.h
@ -97,22 +97,24 @@ enum class Layer { colour, trans1, trans2, trans12 /*both 1&2*/, all, none };
 struct Mesh
 {
 	//helper funcs
-	bool Render(Layer layer)
+	bool Render(Layer layer, float nodeAlpha)
 	{
+		bool nAlpha = nodeAlpha < 1.0f;
+
 		switch (layer)
 		{
 		case Layer::colour:
-			if (polyAlpha) {
+			if (polyAlpha || nAlpha) {
 				return false;
 			}
 			break;
 		case Layer::trans1:
-			if ((!textureAlpha && !polyAlpha) || transLSelect) {
+			if ((!textureAlpha && !polyAlpha && !nAlpha) || transLSelect) {
 				return false;
 			}
 			break;
 		case Layer::trans2:
-			if ((!textureAlpha && !polyAlpha) || !transLSelect) {
+			if ((!textureAlpha && !polyAlpha && !nAlpha) || !transLSelect) {
 				return false;
 			}
 			break;
@ -183,6 +185,9 @@ struct Model

 	//model scale step 1.5+
 	float scale = 1.0f;
+
+	//node transparency
+	float alpha = 1.0f;
 };

 struct Viewport
@ -236,6 +241,7 @@ public:
 	int currentPage;
 	Clip currentClipStatus;
 	float currentModelScale;
+	float currentModelAlpha;

 private:

@ -246,6 +252,7 @@ private:
 		int page;
 		Clip clip;
 		float modelScale;
+		float modelAlpha;	// from culling node
 	};
 	std::vector<NodeAttribs> m_vecAttribs;
 };
--- a/Src/Graphics/New3D/New3D.cpp
+++ b/Src/Graphics/New3D/New3D.cpp
@ -232,7 +232,7 @@ bool CNew3D::RenderScene(int priority, bool renderOverlay, Layer layer)
 					hasOverlay = true;
 				}

-				if (!mesh.Render(layer)) continue;
+				if (!mesh.Render(layer, m.alpha)) continue;
 				if (mesh.highPriority != renderOverlay) continue;

 				if (!matrixLoaded) {
@ -274,11 +274,13 @@ void CNew3D::SetRenderStates()
 	glDepthMask		(GL_TRUE);
 	glActiveTexture	(GL_TEXTURE0);
 	glDisable		(GL_CULL_FACE);					// we'll emulate this in the shader		
-	glDisable		(GL_BLEND);

 	glStencilFunc	(GL_EQUAL, 0, 0xFF);			// basically stencil test passes if the value is zero
 	glStencilOp		(GL_KEEP, GL_INCR, GL_INCR);	// if the stencil test passes, we increment the value
 	glStencilMask	(0xFF);
+
+	glBlendFunc		(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
+	glDisable		(GL_BLEND);
 }

 void CNew3D::DisableRenderStates()
@ -313,7 +315,6 @@ void CNew3D::RenderFrame(void)
 	m_nodeAttribs.Reset();

 	RenderViewport(0x800000);						// build model structure
-	DrawScrollFog();								// fog layer if applicable must be drawn here
 	
 	m_vbo.Bind(true);
 	m_vbo.BufferSubData(MAX_ROM_VERTS*sizeof(FVertex), m_polyBufferRam.size()*sizeof(FVertex), m_polyBufferRam.data());	// upload all the dynamic data to GPU in one go
@ -339,8 +340,10 @@ void CNew3D::RenderFrame(void)
 		}
 	}

-	m_r3dFrameBuffers.SetFBO(Layer::trans12);
-	glClear(GL_COLOR_BUFFER_BIT);					// wipe both trans layers
+	m_r3dFrameBuffers.SetFBO(Layer::colour);		// colour will draw to all 3 buffers. For regular opaque pixels the transparent layers will be essentially masked
+	glClear(GL_COLOR_BUFFER_BIT);
+
+	DrawScrollFog();								// fog layer if applicable must be drawn here

 	for (int pri = 0; pri <= 3; pri++) {

@ -350,35 +353,33 @@ void CNew3D::RenderFrame(void)

 			bool renderOverlay = (i == 1);

-			m_r3dFrameBuffers.SetFBO(Layer::colour);
-			glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
-
 			SetRenderStates();

-			m_r3dShader.DiscardAlpha(true);						// discard all translucent pixels in opaque pass
+			m_r3dFrameBuffers.SetFBO(Layer::colour);
+
+			glClear(GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
+
+			m_r3dShader.DiscardAlpha(true);
+			m_r3dShader.SetLayer(Layer::colour);
 			bool hasOverlay = RenderScene(pri, renderOverlay, Layer::colour);

 			if (!renderOverlay) {
 				ProcessLos(pri);
 			}

-			DisableRenderStates();
+			glDepthFunc(GL_LESS);

-			m_r3dFrameBuffers.DrawOverTransLayers();			// mask trans layer with opaque pixels
-			m_r3dFrameBuffers.CompositeBaseLayer();				// copy opaque pixels to back buffer
+			m_r3dShader.DiscardAlpha(false);

-			SetRenderStates();
+			m_r3dFrameBuffers.StoreDepth();
+			m_r3dShader.SetLayer(Layer::trans1);
+			m_r3dFrameBuffers.SetFBO(Layer::trans1);
+			RenderScene(pri, renderOverlay, Layer::trans1);

-			glDepthFunc(GL_LESS);								// alpha polys seem to use gl_less (ocean hunter)
-
-			m_r3dShader.DiscardAlpha		(false);			// render only translucent pixels
-			m_r3dFrameBuffers.StoreDepth	();					// save depth buffer for 1st trans pass
-			m_r3dFrameBuffers.SetFBO		(Layer::trans1);
-			RenderScene						(pri, renderOverlay, Layer::trans1);
-
-			m_r3dFrameBuffers.RestoreDepth	();					// restore depth buffer, trans layers don't seem to depth test against each other
-			m_r3dFrameBuffers.SetFBO		(Layer::trans2);
-			RenderScene						(pri, renderOverlay, Layer::trans2);
+			m_r3dFrameBuffers.RestoreDepth();
+			m_r3dShader.SetLayer(Layer::trans2);
+			m_r3dFrameBuffers.SetFBO(Layer::trans2);
+			RenderScene(pri, renderOverlay, Layer::trans2);

 			DisableRenderStates();

@ -386,7 +387,8 @@ void CNew3D::RenderFrame(void)
 		}
 	}

-	m_r3dFrameBuffers.CompositeAlphaLayer();
+	m_r3dFrameBuffers.SetFBO(Layer::none);
+	m_r3dFrameBuffers.Draw();
 }

 void CNew3D::BeginFrame(void)
@ -471,10 +473,11 @@ bool CNew3D::DrawModel(UINT32 modelAddr)
 	}

 	// update texture offsets
-	m->textureOffsetX = m_nodeAttribs.currentTexOffsetX;
-	m->textureOffsetY = m_nodeAttribs.currentTexOffsetY;
-	m->page = m_nodeAttribs.currentPage;
-	m->scale = m_nodeAttribs.currentModelScale;
+	m->textureOffsetX	= m_nodeAttribs.currentTexOffsetX;
+	m->textureOffsetY	= m_nodeAttribs.currentTexOffsetY;
+	m->page				= m_nodeAttribs.currentPage;
+	m->scale			= m_nodeAttribs.currentModelScale;
+	m->alpha			= m_nodeAttribs.currentModelAlpha;

 	if (!cached) {
 		CacheModel(m, modelAddress);
@ -588,6 +591,8 @@ void CNew3D::DescendCullingNode(UINT32 addr)
 		}
 	}

+	m_nodeAttribs.currentModelAlpha = 1;	// TODO fade out if required
+
 	// Apply matrix and translation
 	m_modelMat.PushMatrix();

@ -1387,23 +1392,6 @@ bool CNew3D::IsVROMModel(UINT32 modelAddr)
 	return modelAddr >= 0x100000;
 }

-void CNew3D::CalcTexOffset(int offX, int offY, int page, int x, int y, int& newX, int& newY)
-{
-	newX = (x + offX) & 2047;	// wrap around 2048, shouldn't be required
-
-	int oldPage = y / 1024;
-
-	y -= (oldPage * 1024);	// remove page from tex y
-
-	// calc newY with wrap around, wraps around in the same sheet, not into another memory sheet
-
-	newY = (y + offY) & 1023;
-
-	// add page to Y
-
-	newY += ((oldPage + page) & 1) * 1024;		// max page 0-1
-}
-
 void CNew3D::CalcFrustumPlanes(Plane p[5], const float* matrix)
 {
 	// Left Plane
--- a/Src/Graphics/New3D/New3D.h
+++ b/Src/Graphics/New3D/New3D.h
@ -223,8 +223,6 @@ private:
 	void TranslateLosPosition(int inX, int inY, int& outX, int& outY);
 	bool ProcessLos(int priority);

-	void CalcTexOffset(int offX, int offY, int page, int x, int y, int& newX, int& newY);	
-
 	/*
 	* Data
 	*/
--- a/Src/Graphics/New3D/R3DFrameBuffers.cpp
+++ b/Src/Graphics/New3D/R3DFrameBuffers.cpp
@ -1,7 +1,4 @@
 #include "R3DFrameBuffers.h"
-#include "Mat4.h"
-
-#define countof(a) (sizeof(a)/sizeof(*(a)))

 namespace New3D {

@ -23,7 +20,6 @@ R3DFrameBuffers::R3DFrameBuffers()

 	AllocShaderTrans();
 	AllocShaderBase();
-	AllocShaderWipe();

 	glGenVertexArrays(1, &m_vao);
 	glBindVertexArray(m_vao);
@ -36,7 +32,7 @@ R3DFrameBuffers::~R3DFrameBuffers()
 	DestroyFBO();
 	m_shaderTrans.UnloadShaders();
 	m_shaderBase.UnloadShaders();
-	m_shaderWipe.UnloadShaders();
+
 	if (m_vao) {
 		glDeleteVertexArrays(1, &m_vao);
 		m_vao = 0;
@ -165,26 +161,24 @@ void R3DFrameBuffers::SetFBO(Layer layer)
 	switch (layer)
 	{
 	case Layer::colour:
-	case Layer::trans1:
-	case Layer::trans2:
-	{
-		glBindFramebuffer(GL_FRAMEBUFFER, m_frameBufferID);
-		GLenum buffers[] = { GL_COLOR_ATTACHMENT0 + (GLenum)layer };
-		glDrawBuffers(countof(buffers), buffers);
-		break;
-	}
-	case Layer::trans12:
-	{
-		glBindFramebuffer(GL_FRAMEBUFFER, m_frameBufferID);
-		GLenum buffers[] = { GL_COLOR_ATTACHMENT1, GL_COLOR_ATTACHMENT2 };
-		glDrawBuffers(countof(buffers), buffers);
-		break;
-	}
-	case Layer::all:
 	{
 		glBindFramebuffer(GL_FRAMEBUFFER, m_frameBufferID);
 		GLenum buffers[] = { GL_COLOR_ATTACHMENT0, GL_COLOR_ATTACHMENT1, GL_COLOR_ATTACHMENT2 };
-		glDrawBuffers(countof(buffers), buffers);
+		glDrawBuffers((GLsizei)std::size(buffers), buffers);
+		break;
+	}
+	case Layer::trans1:
+	{
+		glBindFramebuffer(GL_FRAMEBUFFER, m_frameBufferID);
+		GLenum buffers[] = { GL_NONE, GL_COLOR_ATTACHMENT1, GL_NONE };
+		glDrawBuffers((GLsizei)std::size(buffers), buffers);
+		break;
+	}
+	case Layer::trans2:
+	{
+		glBindFramebuffer(GL_FRAMEBUFFER, m_frameBufferID);
+		GLenum buffers[] = { GL_NONE, GL_NONE, GL_COLOR_ATTACHMENT2 };
+		glDrawBuffers((GLsizei)std::size(buffers), buffers);
 		break;
 	}
 	case Layer::none:
@ -280,8 +274,8 @@ void R3DFrameBuffers::AllocShaderTrans()

 	void main()
 	{
-		vec4 colTrans1 = texture( tex1, fsTexCoord);
-		vec4 colTrans2 = texture( tex2, fsTexCoord);
+		vec4 colTrans1 = texture(tex1, fsTexCoord);
+		vec4 colTrans2 = texture(tex2, fsTexCoord);

 		if(colTrans1.a+colTrans2.a > 0.0) {
 			vec3 col1 = colTrans1.rgb * colTrans1.a;
@ -302,58 +296,6 @@ void R3DFrameBuffers::AllocShaderTrans()
 	m_shaderTrans.uniformLoc[1] = m_shaderTrans.GetUniformLocation("tex2");
 }

-void R3DFrameBuffers::AllocShaderWipe()
-{
-	const char *vertexShader = R"glsl(
-
-	#version 410 core
-
-	// outputs
-	out vec2 fsTexCoord;
-
-	void main(void)
-	{
-		const vec4 vertices[] = vec4[](vec4(-1.0, -1.0, 0.0, 1.0),
-										vec4(-1.0,  1.0, 0.0, 1.0),
-										vec4( 1.0, -1.0, 0.0, 1.0),
-										vec4( 1.0,  1.0, 0.0, 1.0));
-
-		fsTexCoord = (vertices[gl_VertexID % 4].xy + 1.0) / 2.0;
-		gl_Position = vertices[gl_VertexID % 4];
-	}
-
-	)glsl";
-
-	const char *fragmentShader = R"glsl(
-
-	#version 410 core
-
-	uniform sampler2D texColor;				// base colour layer
-	in vec2 fsTexCoord;
-
-	// outputs
-	layout (location = 0) out vec4 fragColor0;
-	layout (location = 1) out vec4 fragColor1;
-
-	void main()
-	{
-		vec4 colBase = texture(texColor, fsTexCoord);
-
-		if(colBase.a == 0.0) {
-			discard;					// no colour pixels have been written
-		}
-
-		fragColor0 = vec4(0.0);			// wipe these parts of the alpha buffer
-		fragColor1 = vec4(0.0);			// since they have been overwritten by the next priority layer
-	}
-
-	)glsl";
-
-	m_shaderWipe.LoadShaders(vertexShader, fragmentShader);
-
-	m_shaderWipe.uniformLoc[0] = m_shaderWipe.GetUniformLocation("texColor");
-}
-
 void R3DFrameBuffers::Draw()
 {
 	SetFBO		(Layer::none);						// make sure to draw on the back buffer
@ -362,7 +304,7 @@ void R3DFrameBuffers::Draw()
 	glDisable	(GL_CULL_FACE);
 	glDisable	(GL_BLEND);

-	for (int i = 0; i < countof(m_texIDs); i++) {	// bind our textures to correct texture units
+	for (int i = 0; i < (int)std::size(m_texIDs); i++) {	// bind our textures to correct texture units
 		glActiveTexture(GL_TEXTURE0 + i);
 		glBindTexture(GL_TEXTURE_2D, m_texIDs[i]);
 	}
@ -381,77 +323,10 @@ void R3DFrameBuffers::Draw()
 	glBindVertexArray	(0);
 }

-void R3DFrameBuffers::CompositeBaseLayer()
-{
-	SetFBO(Layer::none);							// make sure to draw on the back buffer
-	glViewport(0, 0, m_width, m_height);			// cover the entire screen
-	glDisable(GL_DEPTH_TEST);						// disable depth testing / writing
-	glDisable(GL_CULL_FACE);
-	glDisable(GL_BLEND);
-
-	for (int i = 0; i < countof(m_texIDs); i++) {	// bind our textures to correct texture units
-		glActiveTexture(GL_TEXTURE0 + i);
-		glBindTexture(GL_TEXTURE_2D, m_texIDs[i]);
-	}
-
-	glActiveTexture(GL_TEXTURE0);
-	glBindVertexArray(m_vao);
-
-	DrawBaseLayer();
-
-	glBindVertexArray(0);
-}
-
-void R3DFrameBuffers::CompositeAlphaLayer()
-{
-	SetFBO(Layer::none);							// make sure to draw on the back buffer
-	glViewport(0, 0, m_width, m_height);			// cover the entire screen
-	glDisable(GL_DEPTH_TEST);						// disable depth testing / writing
-	glDisable(GL_CULL_FACE);
-
-	for (int i = 0; i < countof(m_texIDs); i++) {	// bind our textures to correct texture units
-		glActiveTexture(GL_TEXTURE0 + i);
-		glBindTexture(GL_TEXTURE_2D, m_texIDs[i]);
-	}
-
-	glActiveTexture(GL_TEXTURE0);
-
-	glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
-	glEnable(GL_BLEND);
-	glBindVertexArray(m_vao);
-
-	DrawAlphaLayer();
-
-	glDisable(GL_BLEND);
-	glBindVertexArray(0);
-}
-
-void R3DFrameBuffers::DrawOverTransLayers()
-{
-	SetFBO(Layer::trans12);							// need to write to both layers
-
-	glViewport	(0, 0, m_width, m_height);			// cover the entire screen
-	glDisable	(GL_DEPTH_TEST);					// disable depth testing / writing
-	glDisable	(GL_CULL_FACE);
-	glDisable	(GL_BLEND);
-
-	glActiveTexture	(GL_TEXTURE0);
-	glBindTexture	(GL_TEXTURE_2D, m_texIDs[0]);
-	
-	glBindVertexArray(m_vao);
-	m_shaderWipe.EnableShader();
-	glUniform1i(m_shaderWipe.uniformLoc[0], 0);
-
-	glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
-
-	m_shaderWipe.DisableShader();
-	glBindVertexArray(0);
-}
-
 void R3DFrameBuffers::DrawBaseLayer()
 {
 	m_shaderBase.EnableShader();
-	glUniform1i(m_shaderTrans.uniformLoc[0], 0);		// to do check this
+	glUniform1i(m_shaderTrans.uniformLoc[0], 0);

 	glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);

--- a/Src/Graphics/New3D/R3DFrameBuffers.h
+++ b/Src/Graphics/New3D/R3DFrameBuffers.h
@ -15,9 +15,6 @@ public:
 	~R3DFrameBuffers();

 	void	Draw();					// draw and composite the transparent layers
-	void	CompositeBaseLayer();
-	void	CompositeAlphaLayer();
-	void	DrawOverTransLayers();	// opaque pixels in next priority layer need to wipe trans pixels
 	
 	bool	CreateFBO(int width, int height);
 	void	DestroyFBO();
@ -33,7 +30,6 @@ private:
 	GLuint	CreateTexture(int width, int height);
 	void	AllocShaderTrans();
 	void	AllocShaderBase();
-	void	AllocShaderWipe();

 	void	DrawBaseLayer();
 	void	DrawAlphaLayer();
@ -50,7 +46,6 @@ private:
 	// shaders
 	GLSLShader m_shaderBase;
 	GLSLShader m_shaderTrans;
-	GLSLShader m_shaderWipe;

 	// vao
 	GLuint m_vao;	// this really needed if we don't actually use vertex attribs?
--- a/Src/Graphics/New3D/R3DScrollFog.cpp
+++ b/Src/Graphics/New3D/R3DScrollFog.cpp
@ -40,7 +40,25 @@ vec3	lFogColor;
 vec4	scrollFog;

 // outputs
-out vec4 fragColor;
+layout(location = 0) out vec4 out0;		// opaque
+layout(location = 1) out vec4 out1;		// trans layer 1
+layout(location = 2) out vec4 out2;		// trans layer 2
+
+void WriteOutputs(vec4 colour)
+{
+	vec4 blank = vec4(0.0);
+	
+	if(colour.a < 1.0) {		// some transparency
+		out0 = blank;
+		out1 = colour;
+		out2 = blank;
+	}
+	else {						// opaque
+		out0 = colour;
+		out1 = blank;
+		out2 = blank;
+	}
+}

 void main()
 {
@ -62,7 +80,7 @@ void main()
 	scrollFog = vec4(lFogColor + lSpotFogColor, fogColour.a);

 	// Final Color
-	fragColor = scrollFog;
+	WriteOutputs(scrollFog);
 }

 )glsl";
@ -71,7 +89,6 @@ void main()
 R3DScrollFog::R3DScrollFog(const Util::Config::Node &config)
  : m_config(config),
 	m_vao(0)
-
 {
 	m_shaderProgram		= 0;
 	m_vertexShader		= 0;
--- a/Src/Graphics/New3D/R3DShader.cpp
+++ b/Src/Graphics/New3D/R3DShader.cpp
@ -1,6 +1,7 @@
 #include "R3DShader.h"
 #include "R3DShaderQuads.h"
 #include "R3DShaderTriangles.h"
+#include "R3DShaderCommon.h"

 // having 2 sets of shaders to maintain is really less than ideal
 // but hopefully not too many breaking changes at this point
@ -31,6 +32,7 @@ void R3DShader::Start()
 	m_fixedShading		= false;
 	m_translatorMap		= false;
 	m_modelScale		= 1.0f;
+	m_nodeAlpha			= 1.0f;
 	m_shininess			= 0;
 	m_specularValue		= 0;
 	m_microTexScale		= 0;
@ -62,30 +64,27 @@ bool R3DShader::LoadShader(const char* vertexShader, const char* fragmentShader)
 	const char* gShader = "";
 	const char* fShader = fragmentShaderR3D;

-	std::string fragmentShaderCombined;
-
 	if (quads) {
 		vShader = vertexShaderR3DQuads;
 		gShader = geometryShaderR3DQuads;
-
-		fragmentShaderCombined += fragmentShaderR3DQuads1;
-		fragmentShaderCombined += fragmentShaderR3DQuads2;
-		fShader = fragmentShaderCombined.c_str();
+		fShader = fragmentShaderR3DQuads;
 	}

 	m_shaderProgram		= glCreateProgram();
 	m_vertexShader		= glCreateShader(GL_VERTEX_SHADER);
 	m_fragmentShader	= glCreateShader(GL_FRAGMENT_SHADER);

-	glShaderSource(m_vertexShader,		1, (const GLchar **)&vShader, NULL);
-	glShaderSource(m_fragmentShader,	1, (const GLchar **)&fShader, NULL);
+	const char* shaderArray[] = { fShader, fragmentShaderR3DCommon };
+
+	glShaderSource(m_vertexShader, 1, (const GLchar **)&vShader, nullptr);
+	glShaderSource(m_fragmentShader, (GLsizei)std::size(shaderArray), shaderArray, nullptr);

 	glCompileShader(m_vertexShader);
 	glCompileShader(m_fragmentShader);

 	if (quads) {
 		m_geoShader = glCreateShader(GL_GEOMETRY_SHADER);
-		glShaderSource(m_geoShader, 1, (const GLchar **)&gShader, NULL);
+		glShaderSource(m_geoShader, 1, (const GLchar **)&gShader, nullptr);
 		glCompileShader(m_geoShader);
 		glAttachShader(m_shaderProgram, m_geoShader);
 		PrintShaderResult(m_geoShader);
@ -111,6 +110,7 @@ bool R3DShader::LoadShader(const char* vertexShader, const char* fragmentShader)
 	m_locBaseTexType		= glGetUniformLocation(m_shaderProgram, "baseTexType");
 	m_locTextureInverted	= glGetUniformLocation(m_shaderProgram, "textureInverted");
 	m_locTexWrapMode		= glGetUniformLocation(m_shaderProgram, "textureWrapMode");
+	m_locColourLayer		= glGetUniformLocation(m_shaderProgram, "colourLayer");

 	m_locFogIntensity		= glGetUniformLocation(m_shaderProgram, "fogIntensity");
 	m_locFogDensity			= glGetUniformLocation(m_shaderProgram, "fogDensity");
@ -134,6 +134,7 @@ bool R3DShader::LoadShader(const char* vertexShader, const char* fragmentShader)
 	m_locSpotColor			= glGetUniformLocation(m_shaderProgram, "spotColor");
 	m_locSpotFogColor		= glGetUniformLocation(m_shaderProgram, "spotFogColor");
 	m_locModelScale			= glGetUniformLocation(m_shaderProgram, "modelScale");
+	m_locNodeAlpha			= glGetUniformLocation(m_shaderProgram, "nodeAlpha");

 	m_locProjMat			= glGetUniformLocation(m_shaderProgram, "projMat");
 	m_locModelMat			= glGetUniformLocation(m_shaderProgram, "modelMat");
@ -338,6 +339,11 @@ void R3DShader::SetModelStates(const Model* model)
 		m_modelScale = model->scale;
 	}

+	if (m_dirtyModel || model->alpha != m_nodeAlpha) {
+		glUniform1f(m_locNodeAlpha, model->alpha);
+		m_nodeAlpha = model->alpha;
+	}
+
 	m_transX = model->textureOffsetX;
 	m_transY = model->textureOffsetY;
 	m_transPage = model->page;
@ -355,6 +361,11 @@ void R3DShader::DiscardAlpha(bool discard)
 	glUniform1i(m_locDiscardAlpha, discard);
 }

+void R3DShader::SetLayer(Layer layer)
+{
+	glUniform1i(m_locColourLayer, (GLint)layer);
+}
+
 void R3DShader::PrintShaderResult(GLuint shader)
 {
 	//===========
--- a/Src/Graphics/New3D/R3DShader.h
+++ b/Src/Graphics/New3D/R3DShader.h
@ -23,6 +23,7 @@ public:
 	void	SetShader			(bool enable = true);
 	GLint	GetVertexAttribPos	(const std::string& attrib);
 	void	DiscardAlpha		(bool discard);				// use to remove alpha from texture alpha only polys for 1st pass
+	void	SetLayer			(Layer layer);

 private:

@ -53,6 +54,7 @@ private:
 	GLint m_locTextureInverted;
 	GLint m_locTexWrapMode;
 	GLint m_locTranslatorMap;
+	GLint m_locColourLayer;

 	// cached mesh values
 	bool	m_textured1;
@ -77,6 +79,7 @@ private:

 	// cached model values
 	float	m_modelScale;
+	float	m_nodeAlpha;
 	int		m_transX;
 	int		m_transY;
 	int		m_transPage;
@ -111,6 +114,7 @@ private:

 	// model uniforms
 	GLint m_locModelScale;
+	GLint m_locNodeAlpha;
 	GLint m_locModelMat;

 	// global uniforms
--- a/Src/Graphics/New3D/R3DShaderCommon.h
+++ b/Src/Graphics/New3D/R3DShaderCommon.h
@ -0,0 +1,361 @@
+#pragma once
+
+// I altered this code a bit to make sure it always compiles with gl 4.1. Version 4.5 allows you to specify arrays differently.
+// Ripped out most of the common code, people have been pushing changes to the shaders but we are ending up with diverging implementations
+// between triangle / quad version which is less than ideal.
+
+static const char* fragmentShaderR3DCommon = R"glsl(
+
+#define LayerColour 0x0
+#define LayerTrans0 0x1
+#define LayerTrans1 0x2
+
+vec4 ExtractColour(int type, uint value)
+{
+	vec4 c = vec4(0.0);
+
+	if(type==0) {			// T1RGB5	
+		c.r		= float((value >> 10) & 0x1Fu);
+		c.g		= float((value >> 5 ) & 0x1Fu);
+		c.b		= float((value      ) & 0x1Fu);
+		c.rgb  *= (1.0/31.0);
+		c.a		= 1.0 - float((value >> 15) & 0x1u);
+	}
+	else if(type==1) {		// Interleaved A4L4 (low byte)
+		c.rgb	= vec3(float(value&0xFu));
+		c.a		= float((value >> 4) & 0xFu);
+		c      *= (1.0/15.0);
+	}
+	else if(type==2) {
+		c.a		= float(value&0xFu);
+		c.rgb   = vec3(float((value >> 4) & 0xFu));
+		c	   *= (1.0/15.0);
+	}
+	else if(type==3) {
+		c.rgb	= vec3(float((value>>8)&0xFu));
+		c.a		= float((value >> 12) & 0xFu);
+		c	   *= (1.0/15.0);
+	}
+	else if(type==4) {
+		c.a		= float((value>>8)&0xFu);
+		c.rgb   = vec3(float((value >> 12) & 0xFu));
+		c	   *= (1.0/15.0);
+	}
+	else if(type==5) {
+		c = vec4(float(value&0xFFu) / 255.0);
+		if(c.a==1.0)	{ c.a = 0.0; }
+		else			{ c.a = 1.0; }
+	}
+	else if(type==6) {
+		c = vec4(float((value>>8)&0xFFu) / 255.0);
+		if(c.a==1.0)	{ c.a = 0.0; }
+		else			{ c.a = 1.0; }
+	}
+	else if(type==7) {	// RGBA4
+		c.r = float((value>>12)&0xFu);
+		c.g = float((value>> 8)&0xFu);
+		c.b = float((value>> 4)&0xFu);
+		c.a = float((value>> 0)&0xFu);
+		c *= (1.0/15.0);
+	}
+	else if(type==8) {	// low byte, low nibble
+		c = vec4(float(value&0xFu) / 15.0);
+		if(c.a==1.0)	{ c.a = 0.0; }
+		else			{ c.a = 1.0; }
+	}
+	else if(type==9) {	// low byte, high nibble
+		c = vec4(float((value>>4)&0xFu) / 15.0);
+		if(c.a==1.0)	{ c.a = 0.0; }
+		else			{ c.a = 1.0; }
+	}
+	else if(type==10) {	// high byte, low nibble
+		c = vec4(float((value>>8)&0xFu) / 15.0);
+		if(c.a==1.0)	{ c.a = 0.0; }
+		else			{ c.a = 1.0; }
+	}
+	else if(type==11) {	// high byte, high nibble
+		c = vec4(float((value>>12)&0xFu) / 15.0);
+		if(c.a==1.0)	{ c.a = 0.0; }
+		else			{ c.a = 1.0; }
+	}
+
+	return c;
+}
+
+int GetPage(int yCoord)
+{
+	return yCoord / 1024;
+}
+
+int GetNextPage(int yCoord)
+{
+	return (GetPage(yCoord) + 1) & 1;
+}
+
+int GetNextPageOffset(int yCoord)
+{
+	return GetNextPage(yCoord) * 1024;
+}
+
+// wrapping tex coords would be super easy but we combined tex sheets so have to handle wrap around between sheets
+// hardware testing would be useful because i don't know exactly what happens if you try to read outside the texture sheet
+// wrap around is a good guess
+ivec2 WrapTexCoords(ivec2 pos, ivec2 coordinate)
+{
+	ivec2 newCoord;
+
+	newCoord.x = coordinate.x & 2047;
+	newCoord.y = coordinate.y;
+
+	int page = GetPage(pos.y);
+
+	newCoord.y -= (page * 1024);	// remove page
+	newCoord.y &= 1023;				// wrap around in the same sheet
+	newCoord.y += (page * 1024);	// add page back
+
+	return newCoord;
+}
+
+ivec2 GetTextureSize(int level, ivec2 size)
+{
+	int mipDivisor = 1 << level;
+
+	return size / mipDivisor;
+}
+
+ivec2 GetTexturePosition(int level, ivec2 pos)
+{
+	const int mipXBase[] = int[](0, 1024, 1536, 1792, 1920, 1984, 2016, 2032, 2040, 2044, 2046, 2047);
+	const int mipYBase[] = int[](0, 512, 768, 896, 960, 992, 1008, 1016, 1020, 1022, 1023);
+
+	int mipDivisor = 1 << level;
+
+	int page = pos.y / 1024;
+	pos.y -= (page * 1024);		// remove page from tex y
+
+	ivec2 retPos;
+	retPos.x = mipXBase[level] + (pos.x / mipDivisor);
+	retPos.y = mipYBase[level] + (pos.y / mipDivisor);
+
+	retPos.y += (page * 1024);	// add page back to tex y
+
+	return retPos;
+}
+
+ivec2 GetMicroTexturePos(int id)
+{
+	const int xCoords[8] = int[](0, 0, 128, 128, 0, 0, 128, 128);
+	const int yCoords[8] = int[](0, 128, 0, 128, 256, 384, 256, 384);
+
+	return ivec2(xCoords[id],yCoords[id]);
+}
+
+float mip_map_level(in vec2 texture_coordinate) // in texel units
+{
+    vec2  dx_vtc        = dFdx(texture_coordinate);
+    vec2  dy_vtc        = dFdy(texture_coordinate);
+    float delta_max_sqr = max(dot(dx_vtc, dx_vtc), dot(dy_vtc, dy_vtc));
+    float mml = 0.5 * log2(delta_max_sqr);
+    return max( 0.0, mml );
+}
+
+float LinearTexLocations(int wrapMode, float size, float u, out float u0, out float u1)
+{
+	float texelSize		= 1.0 / size;
+	float halfTexelSize	= 0.5 / size;
+
+	if(wrapMode==0) {							// repeat
+		u	= u * size - 0.5;
+		u0	= (floor(u) + 0.5) / size;			// + 0.5 offset added to push us into the centre of a pixel, without we'll get rounding errors
+		u0	= fract(u0);
+		u1	= u0 + texelSize;
+		u1	= fract(u1);
+
+		return fract(u);						// return weight
+	}
+	else if(wrapMode==1) {						// repeat + clamp
+		u	= fract(u);							// must force into 0-1 to start
+		u	= u * size - 0.5;
+		u0	= (floor(u) + 0.5) / size;			// + 0.5 offset added to push us into the centre of a pixel, without we'll get rounding errors
+		u1	= u0 + texelSize;
+
+		if(u0 <  0.0)	u0 = 0.0;
+		if(u1 >= 1.0)	u1 = 1.0 - halfTexelSize;
+		
+		return fract(u);						// return weight
+	}
+	else {										// mirror + mirror clamp - both are the same since the edge pixels are repeated anyway
+
+		float odd = floor(mod(u, 2.0));			// odd values are mirrored
+
+		if(odd > 0.0) {
+			u = 1.0 - fract(u);
+		}
+		else {
+			u = fract(u);
+		}
+
+		u	= u * size - 0.5;
+		u0	= (floor(u) + 0.5) / size;			// + 0.5 offset added to push us into the centre of a pixel, without we'll get rounding errors
+		u1	= u0 + texelSize;
+
+		if(u0 <  0.0)	u0 = 0.0;
+		if(u1 >= 1.0)	u1 = 1.0 - halfTexelSize;
+		
+		return fract(u);						// return weight
+	}
+}
+
+vec4 texBiLinear(usampler2D texSampler, ivec2 wrapMode, vec2 texSize, ivec2 texPos, vec2 texCoord)
+{
+	float tx[2], ty[2];
+	float a = LinearTexLocations(wrapMode.s, texSize.x, texCoord.x, tx[0], tx[1]);
+	float b = LinearTexLocations(wrapMode.t, texSize.y, texCoord.y, ty[0], ty[1]);
+
+	vec4 p0q0 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[0],ty[0]) * texSize + texPos)), 0).r);
+    vec4 p1q0 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[1],ty[0]) * texSize + texPos)), 0).r);
+    vec4 p0q1 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[0],ty[1]) * texSize + texPos)), 0).r);
+    vec4 p1q1 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[1],ty[1]) * texSize + texPos)), 0).r);
+
+	if(alphaTest) {
+		if(p0q0.a > p1q0.a)		{ p1q0.rgb = p0q0.rgb; }
+		if(p0q0.a > p0q1.a)		{ p0q1.rgb = p0q0.rgb; }
+
+		if(p1q0.a > p0q0.a)		{ p0q0.rgb = p1q0.rgb; }
+		if(p1q0.a > p1q1.a)		{ p1q1.rgb = p1q0.rgb; }
+
+		if(p0q1.a > p0q0.a)		{ p0q0.rgb = p0q1.rgb; }
+		if(p0q1.a > p1q1.a)		{ p1q1.rgb = p0q1.rgb; }
+
+		if(p1q1.a > p0q1.a)		{ p0q1.rgb = p1q1.rgb; }
+		if(p1q1.a > p1q0.a)		{ p1q0.rgb = p1q1.rgb; }
+	}
+
+	// Interpolation in X direction.
+    vec4 pInterp_q0 = mix( p0q0, p1q0, a ); // Interpolates top row in X direction.
+    vec4 pInterp_q1 = mix( p0q1, p1q1, a ); // Interpolates bottom row in X direction.
+
+    return mix( pInterp_q0, pInterp_q1, b ); // Interpolate in Y direction.
+}
+
+vec4 textureR3D(usampler2D texSampler, ivec2 wrapMode, ivec2 texSize, ivec2 texPos, vec2 texCoord)
+{
+	float numLevels	= floor(log2(min(float(texSize.x), float(texSize.y))));				// r3d only generates down to 1:1 for square textures, otherwise its the min dimension
+	float fLevel	= min(mip_map_level(texCoord * vec2(texSize)), numLevels);
+
+	if(alphaTest) fLevel *= 0.5;
+	else fLevel *= 0.8;
+
+	int iLevel = int(fLevel);
+
+	ivec2 texPos0 = GetTexturePosition(iLevel,texPos);
+	ivec2 texPos1 = GetTexturePosition(iLevel+1,texPos);
+
+	ivec2 texSize0 = GetTextureSize(iLevel, texSize);
+	ivec2 texSize1 = GetTextureSize(iLevel+1, texSize); 
+
+	vec4 texLevel0 = texBiLinear(texSampler, wrapMode, vec2(texSize0), texPos0, texCoord);
+	vec4 texLevel1 = texBiLinear(texSampler, wrapMode, vec2(texSize1), texPos1, texCoord);
+
+	return mix(texLevel0, texLevel1, fract(fLevel));	// linear blend between our mipmap levels
+}
+
+vec4 GetTextureValue()
+{
+	vec4 tex1Data = textureR3D(tex1, textureWrapMode, ivec2(baseTexInfo.zw), ivec2(baseTexInfo.xy), fsTexCoord);
+
+	if(textureInverted) {
+		tex1Data.rgb = vec3(1.0) - vec3(tex1Data.rgb);
+	}
+
+	if (microTexture) {
+		vec2 scale			= (vec2(baseTexInfo.zw) / 128.0) * microTextureScale;
+		ivec2 pos			= GetMicroTexturePos(microTextureID);
+
+		// add page offset to microtexture position
+		pos.y				+= GetNextPageOffset(baseTexInfo.y);
+	
+		vec4 tex2Data		= textureR3D(tex1, ivec2(0), ivec2(128), pos, fsTexCoord * scale);
+
+		float lod			= mip_map_level(fsTexCoord * scale * vec2(128.0));
+
+		float blendFactor	= max(lod - 1.5, 0.0);			// bias -1.5
+		blendFactor			= min(blendFactor, 1.0);		// clamp to max value 1
+		blendFactor			= (blendFactor + 1.0) / 2.0;	// 0.5 - 1 range
+
+		tex1Data			= mix(tex2Data, tex1Data, blendFactor);
+	}
+
+	if (alphaTest) {
+		if (tex1Data.a < (32.0/255.0)) {
+			discard;
+		}
+	}
+
+	if(textureAlpha) {
+		if(discardAlpha) {					// opaque 1st pass
+			if (tex1Data.a < 1.0) {
+				discard;
+			}
+		}
+		else {								// transparent 2nd pass
+			if ((tex1Data.a * fsColor.a) >= 1.0) {
+				discard;
+			}
+		}
+	}
+
+	if (textureAlpha == false) {
+		tex1Data.a = 1.0;
+	}
+
+	return tex1Data;
+}
+
+void Step15Luminous(inout vec4 colour)
+{
+	// luminous polys seem to behave very differently on step 1.5 hardware
+	// when fixed shading is enabled the colour is modulated by the vp ambient + fixed shade value
+	// when disabled it appears to be multiplied by 1.5, presumably to allow a higher range
+	if(hardwareStep==0x15) {
+		if(!lightEnabled && textureEnabled) {
+			if(fixedShading) {
+				colour.rgb *= 1.0 + fsFixedShade + lighting[1].y;
+			}
+			else {
+				colour.rgb *= 1.5;
+			}
+		}
+	}
+}
+
+float CalcFog()
+{
+	float z		= -fsViewVertex.z;
+	float fog	= fogIntensity * clamp(fogStart + z * fogDensity, 0.0, 1.0);
+
+	return fog;
+}
+
+void WriteOutputs(vec4 colour, int layer)
+{
+	vec4 blank = vec4(0.0);
+
+	if(layer==LayerColour) {
+		out0 = colour;
+		out1 = blank;
+		out2 = blank;
+	}
+	else if(layer==LayerTrans0) {
+		out0 = blank;
+		out1 = colour;
+		out2 = blank;
+	}
+	else if(layer==LayerTrans1) {
+		out0 = blank;
+		out1 = blank;
+		out2 = colour;
+	}
+}
+
+)glsl";
--- a/Src/Graphics/New3D/R3DShaderQuads.h
+++ b/Src/Graphics/New3D/R3DShaderQuads.h
@ -7,6 +7,7 @@ static const char *vertexShaderR3DQuads = R"glsl(

 // uniforms
 uniform float	modelScale;
+uniform float	nodeAlpha;
 uniform mat4	modelMat;
 uniform mat4	projMat;
 uniform bool	translatorMap;
@ -39,6 +40,8 @@ vec4 GetColour(vec4 colour)
 		c.rgb *= 16.0;
 	}

+	c.a *= nodeAlpha;
+
 	return c;
 }

@ -168,7 +171,7 @@ void main(void)

 )glsl";

-static const char *fragmentShaderR3DQuads1 = R"glsl(
+static const char *fragmentShaderR3DQuads = R"glsl(

 #version 450 core

@ -207,6 +210,7 @@ uniform float	fogAttenuation;
 uniform float	fogAmbient;
 uniform bool	fixedShading;
 uniform int		hardwareStep;
+uniform int		colourLayer;

 // matrices (shared with vertex shader)
 uniform mat4	projMat;
@ -235,7 +239,16 @@ float	fsFixedShade;
 vec4	fsColor;

 //outputs
-out vec4 outColor;
+layout(location = 0) out vec4 out0;		// opaque
+layout(location = 1) out vec4 out1;		// trans layer 1
+layout(location = 2) out vec4 out2;		// trans layer 2
+
+// forward declarations (see common file)
+
+float CalcFog();
+void Step15Luminous(inout vec4 colour);
+vec4 GetTextureValue();
+void WriteOutputs(vec4 colour, int layer);

 void QuadraticInterpolation()
 {
@ -341,333 +354,6 @@ void QuadraticInterpolation()
 	gl_FragDepth = depth * 0.5 + 0.5;
 }

-vec4 ExtractColour(int type, uint value)
-{
-	vec4 c = vec4(0.0);
-
-	if(type==0) {			// T1RGB5	
-		c.r		= float((value >> 10) & 0x1Fu);
-		c.g		= float((value >> 5 ) & 0x1Fu);
-		c.b		= float((value      ) & 0x1Fu);
-		c.rgb  *= (1.0/31.0);
-		c.a		= 1.0 - float((value >> 15) & 0x1u);
-	}
-	else if(type==1) {		// Interleaved A4L4 (low byte)
-		c.rgb	= vec3(float(value&0xFu));
-		c.a		= float((value >> 4) & 0xFu);
-		c      *= (1.0/15.0);
-	}
-	else if(type==2) {
-		c.a		= float(value&0xFu);
-		c.rgb   = vec3(float((value >> 4) & 0xFu));
-		c	   *= (1.0/15.0);
-	}
-	else if(type==3) {
-		c.rgb	= vec3(float((value>>8)&0xFu));
-		c.a		= float((value >> 12) & 0xFu);
-		c	   *= (1.0/15.0);
-	}
-	else if(type==4) {
-		c.a		= float((value>>8)&0xFu);
-		c.rgb   = vec3(float((value >> 12) & 0xFu));
-		c	   *= (1.0/15.0);
-	}
-	else if(type==5) {
-		c = vec4(float(value&0xFFu) / 255.0);
-		if(c.a==1.0)	{ c.a = 0.0; }
-		else			{ c.a = 1.0; }
-	}
-	else if(type==6) {
-		c = vec4(float((value>>8)&0xFFu) / 255.0);
-		if(c.a==1.0)	{ c.a = 0.0; }
-		else			{ c.a = 1.0; }
-	}
-	else if(type==7) {	// RGBA4
-		c.r = float((value>>12)&0xFu);
-		c.g = float((value>> 8)&0xFu);
-		c.b = float((value>> 4)&0xFu);
-		c.a = float((value>> 0)&0xFu);
-		c *= (1.0/15.0);
-	}
-	else if(type==8) {	// low byte, low nibble
-		c = vec4(float(value&0xFu) / 15.0);
-		if(c.a==1.0)	{ c.a = 0.0; }
-		else			{ c.a = 1.0; }
-	}
-	else if(type==9) {	// low byte, high nibble
-		c = vec4(float((value>>4)&0xFu) / 15.0);
-		if(c.a==1.0)	{ c.a = 0.0; }
-		else			{ c.a = 1.0; }
-	}
-	else if(type==10) {	// high byte, low nibble
-		c = vec4(float((value>>8)&0xFu) / 15.0);
-		if(c.a==1.0)	{ c.a = 0.0; }
-		else			{ c.a = 1.0; }
-	}
-	else if(type==11) {	// high byte, high nibble
-		c = vec4(float((value>>12)&0xFu) / 15.0);
-		if(c.a==1.0)	{ c.a = 0.0; }
-		else			{ c.a = 1.0; }
-	}
-
-	return c;
-}
-
-ivec2 GetTexturePosition(int level, ivec2 pos)
-{
-	const int mipXBase[] = { 0, 1024, 1536, 1792, 1920, 1984, 2016, 2032, 2040, 2044, 2046, 2047 };
-	const int mipYBase[] = { 0, 512, 768, 896, 960, 992, 1008, 1016, 1020, 1022, 1023 };
-
-	int mipDivisor = 1 << level;
-
-	int page = pos.y / 1024;
-	pos.y -= (page * 1024);		// remove page from tex y
-
-	ivec2 retPos;
-	retPos.x = mipXBase[level] + (pos.x / mipDivisor);
-	retPos.y = mipYBase[level] + (pos.y / mipDivisor);
-
-	retPos.y += (page * 1024);	// add page back to tex y
-
-	return retPos;
-}
-
-ivec2 GetTextureSize(int level, ivec2 size)
-{
-	int mipDivisor = 1 << level;
-
-	return size / mipDivisor;
-}
-
-ivec2 GetMicroTexturePos(int id)
-{
-	int xCoords[8] = { 0, 0, 128, 128, 0, 0, 128, 128 };
-	int yCoords[8] = { 0, 128, 0, 128, 256, 384, 256, 384 };
-
-	return ivec2(xCoords[id],yCoords[id]);
-}
-
-int GetPage(int yCoord)
-{
-	return yCoord / 1024;
-}
-
-int GetNextPage(int yCoord)
-{
-	return (GetPage(yCoord) + 1) & 1;
-}
-
-int GetNextPageOffset(int yCoord)
-{
-	return GetNextPage(yCoord) * 1024;
-}
-
-// wrapping tex coords would be super easy but we combined tex sheets so have to handle wrap around between sheets
-// hardware testing would be useful because i don't know exactly what happens if you try to read outside the texture sheet
-// wrap around is a good guess
-ivec2 WrapTexCoords(ivec2 pos, ivec2 coordinate)
-{
-	ivec2 newCoord;
-
-	newCoord.x = coordinate.x & 2047;
-	newCoord.y = coordinate.y;
-
-	int page = GetPage(pos.y);
-
-	newCoord.y -= (page * 1024);	// remove page
-	newCoord.y &= 1023;				// wrap around in the same sheet
-	newCoord.y += (page * 1024);	// add page back
-
-	return newCoord;
-}
-
-float mip_map_level(in vec2 texture_coordinate) // in texel units
-{
-    vec2  dx_vtc        = dFdx(texture_coordinate);
-    vec2  dy_vtc        = dFdy(texture_coordinate);
-    float delta_max_sqr = max(dot(dx_vtc, dx_vtc), dot(dy_vtc, dy_vtc));
-    float mml = 0.5 * log2(delta_max_sqr);
-    return max( 0.0, mml );
-}
-
-float LinearTexLocations(int wrapMode, float size, float u, out float u0, out float u1)
-{
-	float texelSize		= 1.0 / size;
-	float halfTexelSize	= 0.5 / size;
-
-	if(wrapMode==0) {							// repeat
-		u	= u * size - 0.5;
-		u0	= (floor(u) + 0.5) / size;			// + 0.5 offset added to push us into the centre of a pixel, without we'll get rounding errors
-		u0	= fract(u0);
-		u1	= u0 + texelSize;
-		u1	= fract(u1);
-
-		return fract(u);						// return weight
-	}
-	else if(wrapMode==1) {						// repeat + clamp
-		u	= fract(u);							// must force into 0-1 to start
-		u	= u * size - 0.5;
-		u0	= (floor(u) + 0.5) / size;			// + 0.5 offset added to push us into the centre of a pixel, without we'll get rounding errors
-		u1	= u0 + texelSize;
-
-		if(u0 <  0.0)	u0 = 0.0;
-		if(u1 >= 1.0)	u1 = 1.0 - halfTexelSize;
-		
-		return fract(u);						// return weight
-	}
-	else {										// mirror + mirror clamp - both are the same since the edge pixels are repeated anyway
-
-		float odd = floor(mod(u, 2.0));			// odd values are mirrored
-
-		if(odd > 0.0) {
-			u = 1.0 - fract(u);
-		}
-		else {
-			u = fract(u);
-		}
-
-		u	= u * size - 0.5;
-		u0	= (floor(u) + 0.5) / size;			// + 0.5 offset added to push us into the centre of a pixel, without we'll get rounding errors
-		u1	= u0 + texelSize;
-
-		if(u0 <  0.0)	u0 = 0.0;
-		if(u1 >= 1.0)	u1 = 1.0 - halfTexelSize;
-		
-		return fract(u);						// return weight
-	}
-}
-
-vec4 texBiLinear(usampler2D texSampler, ivec2 wrapMode, vec2 texSize, ivec2 texPos, vec2 texCoord)
-{
-	float tx[2], ty[2];
-	float a = LinearTexLocations(wrapMode.s, texSize.x, texCoord.x, tx[0], tx[1]);
-	float b = LinearTexLocations(wrapMode.t, texSize.y, texCoord.y, ty[0], ty[1]);
-
-	vec4 p0q0 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[0],ty[0]) * texSize + texPos)), 0).r);
-    vec4 p1q0 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[1],ty[0]) * texSize + texPos)), 0).r);
-    vec4 p0q1 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[0],ty[1]) * texSize + texPos)), 0).r);
-    vec4 p1q1 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[1],ty[1]) * texSize + texPos)), 0).r);
-
-	if(alphaTest) {
-		if(p0q0.a > p1q0.a)		{ p1q0.rgb = p0q0.rgb; }
-		if(p0q0.a > p0q1.a)		{ p0q1.rgb = p0q0.rgb; }
-
-		if(p1q0.a > p0q0.a)		{ p0q0.rgb = p1q0.rgb; }
-		if(p1q0.a > p1q1.a)		{ p1q1.rgb = p1q0.rgb; }
-
-		if(p0q1.a > p0q0.a)		{ p0q0.rgb = p0q1.rgb; }
-		if(p0q1.a > p1q1.a)		{ p1q1.rgb = p0q1.rgb; }
-
-		if(p1q1.a > p0q1.a)		{ p0q1.rgb = p1q1.rgb; }
-		if(p1q1.a > p1q0.a)		{ p1q0.rgb = p1q1.rgb; }
-	}
-
-	// Interpolation in X direction.
-    vec4 pInterp_q0 = mix( p0q0, p1q0, a ); // Interpolates top row in X direction.
-    vec4 pInterp_q1 = mix( p0q1, p1q1, a ); // Interpolates bottom row in X direction.
-
-    return mix( pInterp_q0, pInterp_q1, b ); // Interpolate in Y direction.
-}
-
-vec4 textureR3D(usampler2D texSampler, ivec2 wrapMode, ivec2 texSize, ivec2 texPos, vec2 texCoord)
-{
-	float numLevels	= floor(log2(min(float(texSize.x), float(texSize.y))));				// r3d only generates down to 1:1 for square textures, otherwise its the min dimension
-	float fLevel	= min(mip_map_level(texCoord * vec2(texSize)), numLevels);
-
-	if(alphaTest) fLevel *= 0.5;
-	else fLevel *= 0.8;
-
-	int iLevel = int(fLevel);
-
-	ivec2 texPos0 = GetTexturePosition(iLevel,texPos);
-	ivec2 texPos1 = GetTexturePosition(iLevel+1,texPos);
-
-	ivec2 texSize0 = GetTextureSize(iLevel, texSize);
-	ivec2 texSize1 = GetTextureSize(iLevel+1, texSize); 
-
-	vec4 texLevel0 = texBiLinear(texSampler, wrapMode, vec2(texSize0), texPos0, texCoord);
-	vec4 texLevel1 = texBiLinear(texSampler, wrapMode, vec2(texSize1), texPos1, texCoord);
-
-	return mix(texLevel0, texLevel1, fract(fLevel));	// linear blend between our mipmap levels
-}
-
-vec4 GetTextureValue()
-{
-	vec4 tex1Data = textureR3D(tex1, textureWrapMode, ivec2(baseTexInfo.zw), ivec2(baseTexInfo.xy), fsTexCoord);
-
-	if(textureInverted) {
-		tex1Data.rgb = vec3(1.0) - vec3(tex1Data.rgb);
-	}
-
-	if (microTexture) {
-		vec2 scale			= (vec2(baseTexInfo.zw) / 128.0) * microTextureScale;
-		ivec2 pos			= GetMicroTexturePos(microTextureID);
-
-		// add page offset to microtexture position
-		pos.y				+= GetNextPageOffset(baseTexInfo.y);
-	
-		vec4 tex2Data		= textureR3D(tex1, ivec2(0), ivec2(128), pos, fsTexCoord * scale);
-
-		float lod			= mip_map_level(fsTexCoord * scale * vec2(128.0));
-
-		float blendFactor	= max(lod - 1.5, 0.0);			// bias -1.5
-		blendFactor			= min(blendFactor, 1.0);		// clamp to max value 1
-		blendFactor			= (blendFactor + 1.0) / 2.0;	// 0.5 - 1 range
-
-		tex1Data			= mix(tex2Data, tex1Data, blendFactor);
-	}
-
-	if (alphaTest) {
-		if (tex1Data.a < (32.0/255.0)) {
-			discard;
-		}
-	}
-
-	if(textureAlpha) {
-		if(discardAlpha) {					// opaque 1st pass
-			if (tex1Data.a < 1.0) {
-				discard;
-			}
-		}
-		else {								// transparent 2nd pass
-			if ((tex1Data.a * fsColor.a) >= 1.0) {
-				discard;
-			}
-		}
-	}
-
-	if (textureAlpha == false) {
-		tex1Data.a = 1.0;
-	}
-
-	return tex1Data;
-}
-
-void Step15Luminous(inout vec4 colour)
-{
-	// luminous polys seem to behave very differently on step 1.5 hardware
-	// when fixed shading is enabled the colour is modulated by the vp ambient + fixed shade value
-	// when disabled it appears to be multiplied by 1.5, presumably to allow a higher range
-	if(hardwareStep==0x15) {
-		if(!lightEnabled && textureEnabled) {
-			if(fixedShading) {
-				colour.rgb *= 1.0 + fsFixedShade + lighting[1].y;
-			}
-			else {
-				colour.rgb *= 1.5;
-			}
-		}
-	}
-}
-
-float CalcFog()
-{
-	float z		= -fsViewVertex.z;
-	float fog	= fogIntensity * clamp(fogStart + z * fogDensity, 0.0, 1.0);
-
-	return fog;
-}
-
 float sqr(float a)
 {
 	return a*a;
@ -678,10 +364,6 @@ float sqr_length(vec2 a)
 	return a.x*a.x + a.y*a.y;
 }

-)glsl";
-
-static const char* fragmentShaderR3DQuads2 = R"glsl(
-
 void main()
 {
 	vec4 tex1Data;
@ -814,8 +496,8 @@ void main()
 	 // Fog & spotlight applied
 	finalData.rgb = mix(finalData.rgb, fogData.rgb + lSpotFogColor, fogData.a);

-	// Write output
-	outColor = finalData;	
+	// Write outputs to colour buffers
+	WriteOutputs(finalData,colourLayer);
 }
 )glsl";

--- a/Src/Graphics/New3D/R3DShaderTriangles.h
+++ b/Src/Graphics/New3D/R3DShaderTriangles.h
@ -7,6 +7,7 @@ static const char *vertexShaderR3D = R"glsl(

 // uniforms
 uniform float	modelScale;
+uniform float	nodeAlpha;
 uniform mat4	modelMat;
 uniform mat4	projMat;
 uniform bool	translatorMap;
@ -35,6 +36,8 @@ vec4 GetColour(vec4 colour)
 		c.rgb *= 16.0;
 	}

+	c.a *= nodeAlpha;
+
 	return c;
 }

@ -98,6 +101,7 @@ uniform float	fogAttenuation;
 uniform float	fogAmbient;
 uniform bool	fixedShading;
 uniform int		hardwareStep;
+uniform int		colourLayer;

 //interpolated inputs from vertex shader
 in	vec3	fsViewVertex;
@ -108,334 +112,16 @@ in  float	fsDiscard;
 in  float	fsFixedShade;

 //outputs
-out vec4 outColor;
+layout(location = 0) out vec4 out0;		// opaque
+layout(location = 1) out vec4 out1;		// trans layer 1
+layout(location = 2) out vec4 out2;		// trans layer 2

-vec4 ExtractColour(int type, uint value)
-{
-	vec4 c = vec4(0.0);
+// forward declarations (see common file)

-	if(type==0) {			// T1RGB5	
-		c.r		= float((value >> 10) & 0x1Fu);
-		c.g		= float((value >> 5 ) & 0x1Fu);
-		c.b		= float((value      ) & 0x1Fu);
-		c.rgb  *= (1.0/31.0);
-		c.a		= 1.0 - float((value >> 15) & 0x1u);
-	}
-	else if(type==1) {		// Interleaved A4L4 (low byte)
-		c.rgb	= vec3(float(value&0xFu));
-		c.a		= float((value >> 4) & 0xFu);
-		c      *= (1.0/15.0);
-	}
-	else if(type==2) {
-		c.a		= float(value&0xFu);
-		c.rgb   = vec3(float((value >> 4) & 0xFu));
-		c	   *= (1.0/15.0);
-	}
-	else if(type==3) {
-		c.rgb	= vec3(float((value>>8)&0xFu));
-		c.a		= float((value >> 12) & 0xFu);
-		c	   *= (1.0/15.0);
-	}
-	else if(type==4) {
-		c.a		= float((value>>8)&0xFu);
-		c.rgb   = vec3(float((value >> 12) & 0xFu));
-		c	   *= (1.0/15.0);
-	}
-	else if(type==5) {
-		c = vec4(float(value&0xFFu) / 255.0);
-		if(c.a==1.0)	{ c.a = 0.0; }
-		else			{ c.a = 1.0; }
-	}
-	else if(type==6) {
-		c = vec4(float((value>>8)&0xFFu) / 255.0);
-		if(c.a==1.0)	{ c.a = 0.0; }
-		else			{ c.a = 1.0; }
-	}
-	else if(type==7) {	// RGBA4
-		c.r = float((value>>12)&0xFu);
-		c.g = float((value>> 8)&0xFu);
-		c.b = float((value>> 4)&0xFu);
-		c.a = float((value>> 0)&0xFu);
-		c *= (1.0/15.0);
-	}
-	else if(type==8) {	// low byte, low nibble
-		c = vec4(float(value&0xFu) / 15.0);
-		if(c.a==1.0)	{ c.a = 0.0; }
-		else			{ c.a = 1.0; }
-	}
-	else if(type==9) {	// low byte, high nibble
-		c = vec4(float((value>>4)&0xFu) / 15.0);
-		if(c.a==1.0)	{ c.a = 0.0; }
-		else			{ c.a = 1.0; }
-	}
-	else if(type==10) {	// high byte, low nibble
-		c = vec4(float((value>>8)&0xFu) / 15.0);
-		if(c.a==1.0)	{ c.a = 0.0; }
-		else			{ c.a = 1.0; }
-	}
-	else if(type==11) {	// high byte, high nibble
-		c = vec4(float((value>>12)&0xFu) / 15.0);
-		if(c.a==1.0)	{ c.a = 0.0; }
-		else			{ c.a = 1.0; }
-	}
-
-	return c;
-}
-
-ivec2 GetTexturePosition(int level, ivec2 pos)
-{
-	const int mipXBase[] = int[](0, 1024, 1536, 1792, 1920, 1984, 2016, 2032, 2040, 2044, 2046, 2047);
-	const int mipYBase[] = int[](0, 512, 768, 896, 960, 992, 1008, 1016, 1020, 1022, 1023);
-
-	int mipDivisor = 1 << level;
-
-	int page = pos.y / 1024;
-	pos.y -= (page * 1024);		// remove page from tex y
-
-	ivec2 retPos;
-	retPos.x = mipXBase[level] + (pos.x / mipDivisor);
-	retPos.y = mipYBase[level] + (pos.y / mipDivisor);
-
-	retPos.y += (page * 1024);	// add page back to tex y
-
-	return retPos;
-}
-
-ivec2 GetTextureSize(int level, ivec2 size)
-{
-	int mipDivisor = 1 << level;
-
-	return size / mipDivisor;
-}
-
-ivec2 GetMicroTexturePos(int id)
-{
-	const int xCoords[8] = int[](0, 0, 128, 128, 0, 0, 128, 128);
-	const int yCoords[8] = int[](0, 128, 0, 128, 256, 384, 256, 384);
-
-	return ivec2(xCoords[id],yCoords[id]);
-}
-
-int GetPage(int yCoord)
-{
-	return yCoord / 1024;
-}
-
-int GetNextPage(int yCoord)
-{
-	return (GetPage(yCoord) + 1) & 1;
-}
-
-int GetNextPageOffset(int yCoord)
-{
-	return GetNextPage(yCoord) * 1024;
-}
-
-// wrapping tex coords would be super easy but we combined tex sheets so have to handle wrap around between sheets
-// hardware testing would be useful because i don't know exactly what happens if you try to read outside the texture sheet
-// wrap around is a good guess
-ivec2 WrapTexCoords(ivec2 pos, ivec2 coordinate)
-{
-	ivec2 newCoord;
-
-	newCoord.x = coordinate.x & 2047;
-	newCoord.y = coordinate.y;
-
-	int page = GetPage(pos.y);
-
-	newCoord.y -= (page * 1024);	// remove page
-	newCoord.y &= 1023;				// wrap around in the same sheet
-	newCoord.y += (page * 1024);	// add page back
-
-	return newCoord;
-}
-
-float mip_map_level(in vec2 texture_coordinate) // in texel units
-{
-    vec2  dx_vtc        = dFdx(texture_coordinate);
-    vec2  dy_vtc        = dFdy(texture_coordinate);
-    float delta_max_sqr = max(dot(dx_vtc, dx_vtc), dot(dy_vtc, dy_vtc));
-    float mml = 0.5 * log2(delta_max_sqr);
-    return max( 0, mml );
-}
-
-float LinearTexLocations(int wrapMode, float size, float u, out float u0, out float u1)
-{
-	float texelSize		= 1.0 / size;
-	float halfTexelSize	= 0.5 / size;
-
-	if(wrapMode==0) {							// repeat
-		u	= (u * size) - 0.5;
-		u0	= (floor(u) + 0.5) / size;			// + 0.5 offset added to push us into the centre of a pixel, without we'll get rounding errors
-		u0	= fract(u0);
-		u1	= u0 + texelSize;
-		u1	= fract(u1);
-
-		return fract(u);						// return weight
-	}
-	else if(wrapMode==1) {						// repeat + clamp
-		u	= fract(u);							// must force into 0-1 to start
-		u	= (u * size) - 0.5;
-		u0	= (floor(u) + 0.5) / size;			// + 0.5 offset added to push us into the centre of a pixel, without we'll get rounding errors
-		u1	= u0 + texelSize;
-
-		if(u0 <  0.0)	u0 = 0.0;
-		if(u1 >= 1.0)	u1 = 1.0 - halfTexelSize;
-		
-		return fract(u);						// return weight
-	}
-	else {										// mirror + mirror clamp - both are the same since the edge pixels are repeated anyway
-
-		float odd = floor(mod(u, 2.0));			// odd values are mirrored
-
-		if(odd > 0.0) {
-			u = 1.0 - fract(u);
-		}
-		else {
-			u = fract(u);
-		}
-
-		u	= (u * size) - 0.5;
-		u0	= (floor(u) + 0.5) / size;			// + 0.5 offset added to push us into the centre of a pixel, without we'll get rounding errors
-		u1	= u0 + texelSize;
-
-		if(u0 <  0.0)	u0 = 0.0;
-		if(u1 >= 1.0)	u1 = 1.0 - halfTexelSize;
-		
-		return fract(u);						// return weight
-	}
-}
-
-vec4 texBiLinear(usampler2D texSampler, ivec2 wrapMode, vec2 texSize, ivec2 texPos, vec2 texCoord)
-{
-	float tx[2], ty[2];
-	float a = LinearTexLocations(wrapMode.s, texSize.x, texCoord.x, tx[0], tx[1]);
-	float b = LinearTexLocations(wrapMode.t, texSize.y, texCoord.y, ty[0], ty[1]);
-
-	vec4 p0q0 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[0],ty[0]) * texSize + texPos)), 0).r);
-    vec4 p1q0 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[1],ty[0]) * texSize + texPos)), 0).r);
-    vec4 p0q1 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[0],ty[1]) * texSize + texPos)), 0).r);
-    vec4 p1q1 = ExtractColour(baseTexType,texelFetch(texSampler, WrapTexCoords(texPos,ivec2(vec2(tx[1],ty[1]) * texSize + texPos)), 0).r);
-
-	if(alphaTest) {
-		if(p0q0.a > p1q0.a)		{ p1q0.rgb = p0q0.rgb; }
-		if(p0q0.a > p0q1.a)		{ p0q1.rgb = p0q0.rgb; }
-
-		if(p1q0.a > p0q0.a)		{ p0q0.rgb = p1q0.rgb; }
-		if(p1q0.a > p1q1.a)		{ p1q1.rgb = p1q0.rgb; }
-
-		if(p0q1.a > p0q0.a)		{ p0q0.rgb = p0q1.rgb; }
-		if(p0q1.a > p1q1.a)		{ p1q1.rgb = p0q1.rgb; }
-
-		if(p1q1.a > p0q1.a)		{ p0q1.rgb = p1q1.rgb; }
-		if(p1q1.a > p1q0.a)		{ p1q0.rgb = p1q1.rgb; }
-	}
-
-	// Interpolation in X direction.
-    vec4 pInterp_q0 = mix( p0q0, p1q0, a ); // Interpolates top row in X direction.
-    vec4 pInterp_q1 = mix( p0q1, p1q1, a ); // Interpolates bottom row in X direction.
-
-    return mix( pInterp_q0, pInterp_q1, b ); // Interpolate in Y direction.
-}
-
-vec4 textureR3D(usampler2D texSampler, ivec2 wrapMode, ivec2 texSize, ivec2 texPos, vec2 texCoord)
-{
-	float numLevels	= floor(log2(min(float(texSize.x), float(texSize.y))));				// r3d only generates down to 1:1 for square textures, otherwise its the min dimension
-	float fLevel	= min(mip_map_level(texCoord * vec2(texSize)), numLevels);
-
-	if(alphaTest) fLevel *= 0.5;
-	else fLevel *= 0.8;
-
-	int iLevel = int(fLevel);
-
-	ivec2 texPos0 = GetTexturePosition(iLevel,texPos);
-	ivec2 texPos1 = GetTexturePosition(iLevel+1,texPos);
-
-	ivec2 texSize0 = GetTextureSize(iLevel, texSize);
-	ivec2 texSize1 = GetTextureSize(iLevel+1, texSize); 
-
-	vec4 texLevel0 = texBiLinear(texSampler, wrapMode, vec2(texSize0), texPos0, texCoord);
-	vec4 texLevel1 = texBiLinear(texSampler, wrapMode, vec2(texSize1), texPos1, texCoord);
-
-	return mix(texLevel0, texLevel1, fract(fLevel));	// linear blend between our mipmap levels
-}
-
-vec4 GetTextureValue()
-{
-	vec4 tex1Data = textureR3D(tex1, textureWrapMode, ivec2(baseTexInfo.zw), ivec2(baseTexInfo.xy), fsTexCoord);
-
-	if(textureInverted) {
-		tex1Data.rgb = vec3(1.0) - vec3(tex1Data.rgb);
-	}
-
-	if (microTexture) {
-		vec2 scale			= (vec2(baseTexInfo.zw) / 128.0) * microTextureScale;
-		ivec2 pos			= GetMicroTexturePos(microTextureID);
-
-		// add page offset to microtexture position
-		pos.y				+= GetNextPageOffset(baseTexInfo.y);
-	
-		vec4 tex2Data		= textureR3D(tex1, ivec2(0), ivec2(128), pos, fsTexCoord * scale);
-
-		float lod			= mip_map_level(fsTexCoord * scale * vec2(128.0));
-
-		float blendFactor	= max(lod - 1.5, 0.0);			// bias -1.5
-		blendFactor			= min(blendFactor, 1.0);		// clamp to max value 1
-		blendFactor			= (blendFactor + 1.0) / 2.0;	// 0.5 - 1 range
-
-		tex1Data			= mix(tex2Data, tex1Data, blendFactor);
-	}
-
-	if (alphaTest) {
-		if (tex1Data.a < (32.0/255.0)) {
-			discard;
-		}
-	}
-
-	if(textureAlpha) {
-		if(discardAlpha) {					// opaque 1st pass
-			if (tex1Data.a < 1.0) {
-				discard;
-			}
-		}
-		else {								// transparent 2nd pass
-			if ((tex1Data.a * fsColor.a) >= 1.0) {
-				discard;
-			}
-		}
-	}
-
-	if (textureAlpha == false) {
-		tex1Data.a = 1.0;
-	}
-
-	return tex1Data;
-}
-
-void Step15Luminous(inout vec4 colour)
-{
-	// luminous polys seem to behave very differently on step 1.5 hardware
-	// when fixed shading is enabled the colour is modulated by the vp ambient + fixed shade value
-	// when disabled it appears to be multiplied by 1.5, presumably to allow a higher range
-	if(hardwareStep==0x15) {
-		if(!lightEnabled && textureEnabled) {
-			if(fixedShading) {
-				colour.rgb *= 1.0 + fsFixedShade + lighting[1].y;
-			}
-			else {
-				colour.rgb *= vec3(1.5);
-			}
-		}
-	}
-}
-
-float CalcFog()
-{
-	float z		= -fsViewVertex.z;
-	float fog	= fogIntensity * clamp(fogStart + z * fogDensity, 0.0, 1.0);
-
-	return fog;
-}
+float CalcFog();
+void Step15Luminous(inout vec4 colour);
+vec4 GetTextureValue();
+void WriteOutputs(vec4 colour, int layer);

 void main()
 {
@ -572,7 +258,8 @@ void main()
 	 // Fog & spotlight applied
 	finalData.rgb = mix(finalData.rgb, fogData.rgb + lSpotFogColor, fogData.a);

-	outColor = finalData;
+	// Write outputs to colour buffers
+	WriteOutputs(finalData,colourLayer);
 }
 )glsl";

--- a/Src/Model3/TileGen.cpp
+++ b/Src/Model3/TileGen.cpp
@ -48,6 +48,7 @@
 * - For consistency, the registers should probably be byte reversed (this is a
 *   little endian device), forcing the Model3 Read32/Write32 handlers to
 *   manually reverse the data. This keeps with the convention for VRAM.
+ *   Need to finish ripping out code that no longer does anything. Removed a lot but there's still more.
 */

 #include "TileGen.h"
@ -109,9 +110,6 @@ void CTileGen::LoadState(CBlockFile *SaveState)
 	}	
 	SaveState->Read(regs, sizeof(regs));
 	
-	// Because regs were read after palette, must recompute
-	RecomputePalettes();
-	
 	// If multi-threaded, update read-only snapshots too
 	if (m_gpuMultiThreaded)
 		UpdateSnapshots(true);
@ -143,34 +141,8 @@ void CTileGen::EndVBlank(void)
 	//
 }

-void CTileGen::RecomputePalettes(void)
-{
-	// Writing the colors forces palettes to be computed
-	if (m_gpuMultiThreaded)
-	{
-		for (unsigned colorAddr = 0; colorAddr < 32768*4; colorAddr += 4 )
-		{
-			MARK_DIRTY(palDirty[0], colorAddr);
-			MARK_DIRTY(palDirty[1], colorAddr);
-			WritePalette(colorAddr/4, *(UINT32 *) &vram[0x100000+colorAddr]);
-		}
-	}
-	else
-	{
-		for (unsigned colorAddr = 0; colorAddr < 32768*4; colorAddr += 4 )
-			WritePalette(colorAddr/4, *(UINT32 *) &vram[0x100000+colorAddr]);
-	}
-}
-
 UINT32 CTileGen::SyncSnapshots(void)
 {
-	// Good time to recompute the palettes
-	if (recomputePalettes)
-	{
-		RecomputePalettes();
-		recomputePalettes = false;
-	}
-	
 	if (!m_gpuMultiThreaded)
 		return 0;
 	
@ -279,23 +251,6 @@ void CTileGen::WriteRAM32(unsigned addr, UINT32 data)
 	if (m_gpuMultiThreaded)
 		MARK_DIRTY(vramDirty, addr);
 	*(UINT32 *) &vram[addr] = data;
-		
-	// Update palette if required
-	if (addr >= 0x100000)
-    {
-		addr -= 0x100000;
-		unsigned color = addr/4;	// color index
-		
-		// Same address in both palettes must be marked dirty
-		if (m_gpuMultiThreaded)
-		{
-			MARK_DIRTY(palDirty[0], addr);
-			MARK_DIRTY(palDirty[1], addr);
-		}
-			
-		// Both palettes will be modified simultaneously
-        WritePalette(color, data);
-    }
 }

 //TODO: 8- and 16-bit handlers have not been thoroughly tested
@ -330,76 +285,6 @@ void CTileGen::WriteRAM16(unsigned addr, uint16_t data)
  WriteRAM32(addr & ~1, tmp);
 }

-void CTileGen::InitPalette(void)
-{
-	for (int i = 0; i < 0x20000/4; i++)
-	{
-		WritePalette(i, *(UINT32 *) &vram[0x100000 + i*4]);
-		if (m_gpuMultiThreaded)
-		{
-			palRO[0][i] = pal[0][i];
-			palRO[1][i] = pal[1][i];
-		}
-	}
-}
-
-static inline UINT32 AddColorOffset(UINT8 r, UINT8 g, UINT8 b, UINT8 a, UINT32 offsetReg)
-{
-	INT32	ir, ig, ib;
-	
-	/*
-	 * Color offsets are signed but I'm not sure whether or not their range is 
-	 * merely [-128,+127], which would mean adding to a 0 component would not 
-	 * result full intensity (only +127 at most). Alternatively, the signed 
-	 * value might have to be multiplied by 2. That is assumed here. In either 
-	 * case, the signed addition should be saturated.
-	 */
-
-	ib = (INT32) (INT8)((offsetReg>>16)&0xFF);
-	ig = (INT32) (INT8)((offsetReg>>8)&0xFF);
-	ir = (INT32) (INT8)((offsetReg>>0)&0xFF);
-	ib *= 2;
-	ig *= 2;
-	ir *= 2;
-	
-	// Add with saturation
-	ib += (INT32) (UINT32) b;
-	if (ib < 0)			ib = 0;
-	else if (ib > 0xFF)	ib = 0xFF;
-	ig += (INT32) (UINT32) g;
-	if (ig < 0)			ig = 0;
-	else if (ig > 0xFF)	ig = 0xFF;
-	ir += (INT32) (UINT32) r;
-	if (ir < 0)			ir = 0;
-	else if (ir > 0xFF)	ir = 0xFF;
-	
-	// Construct the final 32-bit ABGR-format color
-	r = (UINT8) ir;
-	g = (UINT8) ig;
-	b = (UINT8) ib;
-	return ((UINT32)a<<24)|((UINT32)b<<16)|((UINT32)g<<8)|(UINT32)r;
-}
-
-void CTileGen::WritePalette(unsigned color, UINT32 data)
-{
-	UINT8		r, g, b, a;
-	
-	a = 0xFF * ((data>>15)&1); 	// decode the RGBA (make alpha 0xFF or 0x00)
-    a = ~a;                  	// invert it (set on Model 3 means clear pixel)
-	
-	if ((data&0x8000))
-    	r = g = b = 0;
-	else
-    {
-		b = (((data >> 10) & 0x1F) * 255) / 31;
-		g = (((data >> 5) & 0x1F) * 255) / 31;
-		r = ((data & 0x1F) * 255) / 31;
-	}
-
-	pal[0][color] = AddColorOffset(r, g, b, a, regs[0x40/4]);	// A/A'
-	pal[1][color] = AddColorOffset(r, g, b, a, regs[0x44/4]);	// B/B'
-}
-
 UINT32 CTileGen::ReadRegister(unsigned reg)
 {
  reg &= 0xFF;
@ -423,11 +308,6 @@ void CTileGen::WriteRegister(unsigned reg, UINT32 data)
 		break;
 	case 0x40:	// layer A/A' color offset
 	case 0x44:	// layer B/B' color offset
-		// We only have a mechanism to recompute both palettes simultaneously.
-		// These regs are often written together in the same frame. To avoid
-		// needlessly recomputing both palettes twice, we defer the operation.
-		if (regs[reg/4] != data)	// only if changed
-			recomputePalettes = true;
 		break;
 	case 0x10:	// IRQ acknowledge
 		IRQ->Deassert(data&0xFF);
@ -451,9 +331,6 @@ void CTileGen::Reset(void)
 	memset(regs, 0, sizeof(regs));
 	memset(regsRO, 0, sizeof(regsRO));

-	InitPalette();
-	recomputePalettes = false;
-
 	DebugLog("Tile Generator reset\n");
 }

--- a/Src/Model3/TileGen.h
+++ b/Src/Model3/TileGen.h
@ -254,9 +254,6 @@ public:
 	
 private:
 	// Private member functions
-	void		RecomputePalettes(void);
-	void		InitPalette(void);
-	void		WritePalette(unsigned color, UINT32 data);
 	UINT32		UpdateSnapshots(bool copyWhole);
 	UINT32		UpdateSnapshot(bool copyWhole, UINT8 *src, UINT8 *dst, unsigned size, UINT8 *dirty);

@ -274,7 +271,6 @@ private:
 	UINT8	*memoryPool;		// all memory allocated here
 	UINT8   *vram;          	// 1.125MB of VRAM
 	UINT32	*pal[2];			// 2 x 0x20000 byte (32K colors) palette
-	bool	recomputePalettes;	// whether to recompute palettes A/A' and B/B' during sync

 	// Read-only snapshots
 	UINT8   *vramRO;        // 1.125MB of VRAM                       [read-only snapshot]	
--- a/VS2008/Supermodel.vcxproj
+++ b/VS2008/Supermodel.vcxproj
@ -495,6 +495,7 @@ xcopy /D /Y "$(ProjectDir)..\Assets\*" "$(TargetDir)Assets"</Command>
    <ClInclude Include="..\Src\Graphics\New3D\R3DFrameBuffers.h" />
    <ClInclude Include="..\Src\Graphics\New3D\R3DScrollFog.h" />
    <ClInclude Include="..\Src\Graphics\New3D\R3DShader.h" />
+    <ClInclude Include="..\Src\Graphics\New3D\R3DShaderCommon.h" />
    <ClInclude Include="..\Src\Graphics\New3D\R3DShaderQuads.h" />
    <ClInclude Include="..\Src\Graphics\New3D\R3DShaderTriangles.h" />
    <ClInclude Include="..\Src\Graphics\New3D\VBO.h" />
--- a/VS2008/Supermodel.vcxproj.filters
+++ b/VS2008/Supermodel.vcxproj.filters
@ -853,6 +853,9 @@
    <ClInclude Include="..\Src\Graphics\FBO.h">
      <Filter>Header Files\Graphics</Filter>
    </ClInclude>
+    <ClInclude Include="..\Src\Graphics\New3D\R3DShaderCommon.h">
+      <Filter>Header Files\Graphics\New</Filter>
+    </ClInclude>
  </ItemGroup>
  <ItemGroup>
    <CustomBuild Include="..\Src\Debugger\ReadMe.txt">