#include "New3D.h" #include "Vec.h" #include #include #include #include #include #include "R3DFloat.h" #include "Util/BitCast.h" #define MAX_RAM_VERTS 300000 #define MAX_ROM_VERTS 1500000 #define BYTE_TO_FLOAT(B) ((2.0f * (B) + 1.0f) * (float)(1.0/255.0)) namespace New3D { CNew3D::CNew3D(const Util::Config::Node &config, const std::string& gameName) : m_r3dShader(config), m_r3dScrollFog(config), m_gameName(gameName), m_textureBuffer(0), m_vao(0) { m_cullingRAMLo = nullptr; m_cullingRAMHi = nullptr; m_polyRAM = nullptr; m_vrom = nullptr; m_textureRAM = nullptr; m_sunClamp = true; m_shadeIsSigned = true; m_numPolyVerts = 3; m_primType = GL_TRIANGLES; if (config["QuadRendering"].ValueAs()) { m_numPolyVerts = 4; m_primType = GL_LINES_ADJACENCY; } m_r3dShader.LoadShader(); glUseProgram(0); // setup our texture memory glGenTextures(1, &m_textureBuffer); glBindTexture(GL_TEXTURE_2D, m_textureBuffer); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexImage2D(GL_TEXTURE_2D, 0, GL_R16UI, 2048, 2048, 0, GL_RED_INTEGER, GL_UNSIGNED_SHORT, nullptr); // allocate storage // setup up our vertex buffer memory glGenVertexArrays(1, &m_vao); glBindVertexArray(m_vao); m_vbo.Create(GL_ARRAY_BUFFER, GL_DYNAMIC_DRAW, sizeof(FVertex) * (MAX_RAM_VERTS + MAX_ROM_VERTS)); m_vbo.Bind(true); glEnableVertexAttribArray(m_r3dShader.GetVertexAttribPos("inVertex")); glEnableVertexAttribArray(m_r3dShader.GetVertexAttribPos("inNormal")); glEnableVertexAttribArray(m_r3dShader.GetVertexAttribPos("inTexCoord")); glEnableVertexAttribArray(m_r3dShader.GetVertexAttribPos("inColour")); glEnableVertexAttribArray(m_r3dShader.GetVertexAttribPos("inFaceNormal")); glEnableVertexAttribArray(m_r3dShader.GetVertexAttribPos("inFixedShade")); // before draw, specify vertex and index arrays with their offsets, offsetof is maybe evil .. glVertexAttribPointer(m_r3dShader.GetVertexAttribPos("inVertex"), 4, GL_FLOAT, GL_FALSE, sizeof(FVertex), 0); glVertexAttribPointer(m_r3dShader.GetVertexAttribPos("inNormal"), 3, GL_FLOAT, GL_FALSE, sizeof(FVertex), (void*)offsetof(FVertex, normal)); glVertexAttribPointer(m_r3dShader.GetVertexAttribPos("inTexCoord"), 2, GL_FLOAT, GL_FALSE, sizeof(FVertex), (void*)offsetof(FVertex, texcoords)); glVertexAttribPointer(m_r3dShader.GetVertexAttribPos("inColour"), 4, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(FVertex), (void*)offsetof(FVertex, faceColour)); glVertexAttribPointer(m_r3dShader.GetVertexAttribPos("inFaceNormal"), 3, GL_FLOAT, GL_FALSE, sizeof(FVertex), (void*)offsetof(FVertex, faceNormal)); glVertexAttribPointer(m_r3dShader.GetVertexAttribPos("inFixedShade"), 1, GL_FLOAT, GL_FALSE, sizeof(FVertex), (void*)offsetof(FVertex, fixedShade)); glBindVertexArray(0); m_vbo.Bind(false); } CNew3D::~CNew3D() { m_vbo.Destroy(); if (m_vao) { glDeleteVertexArrays(1, &m_vao); m_vao = 0; } if (m_textureBuffer) { glDeleteTextures(1, &m_textureBuffer); m_textureBuffer = 0; } m_r3dShader.UnloadShader(); } void CNew3D::AttachMemory(const UINT32 *cullingRAMLoPtr, const UINT32 *cullingRAMHiPtr, const UINT32 *polyRAMPtr, const UINT32 *vromPtr, const UINT16 *textureRAMPtr) { m_cullingRAMLo = cullingRAMLoPtr; m_cullingRAMHi = cullingRAMHiPtr; m_polyRAM = polyRAMPtr; m_vrom = vromPtr; m_textureRAM = textureRAMPtr; } void CNew3D::SetStepping(int stepping) { m_step = stepping; if ((m_step != 0x10) && (m_step != 0x15) && (m_step != 0x20) && (m_step != 0x21)) { m_step = 0x10; } if (m_step > 0x10) { m_offset = 0; // culling nodes are 10 words m_vertexFactor = (1.0f / 2048.0f); // vertices are in 13.11 format } else { m_offset = 2; // 8 words m_vertexFactor = (1.0f / 128.0f); // 17.7 } } bool CNew3D::Init(unsigned xOffset, unsigned yOffset, unsigned xRes, unsigned yRes, unsigned totalXResParam, unsigned totalYResParam) { // Resolution and offset within physical display area m_xRatio = xRes * (float)(1.0 / 496.0); m_yRatio = yRes * (float)(1.0 / 384.0); m_xOffs = xOffset; m_yOffs = yOffset; m_xRes = xRes; m_yRes = yRes; m_totalXRes = totalXResParam; m_totalYRes = totalYResParam; m_r3dFrameBuffers.DestroyFBO(); // remove any old ones if created m_r3dFrameBuffers.CreateFBO(totalXResParam, totalYResParam); return OKAY; } void CNew3D::UploadTextures(unsigned level, unsigned x, unsigned y, unsigned width, unsigned height) { glBindTexture(GL_TEXTURE_2D, m_textureBuffer); glPixelStorei(GL_UNPACK_ALIGNMENT, 2); for (unsigned i = 0; i < height; i++) { glTexSubImage2D(GL_TEXTURE_2D, 0, x, y + i, width, 1, GL_RED_INTEGER, GL_UNSIGNED_SHORT, m_textureRAM + ((y + i) * 2048) + x); } } void CNew3D::DrawScrollFog() { // this is my best guess at the logic based upon what games are doing // // ocean hunter - every viewport has scroll fog values set. Must start with lowest priority layers as the higher ones sometimes are garbage // scud race - first viewports in priority layer missing scroll values. The latter ones all contain valid scroll values. // daytona - doesn't seem to use scroll fog at all. Will set scroll values for the first viewports, the end ones contain no scroll values // vf3 - first viewport only has it set. But set with highest select value ?? Rest of the viewports in priority layer contain a lower select value // sega bassfishing - first viewport in priority 1 sets scroll value. The rest all contain the wrong value + a higher select value .. // spikeout final - 2nd viewport in the priority layer has scroll values set, none of the others do. It also uses the highest select value float rgba[4]; for (int i = 0; i < 4; i++) { for (auto &n : m_nodes) { if (n.viewport.priority == i) { if (n.viewport.scrollFog != 0.f) { rgba[0] = n.viewport.fogParams[0]; rgba[1] = n.viewport.fogParams[1]; rgba[2] = n.viewport.fogParams[2]; rgba[3] = n.viewport.scrollFog; goto CheckScroll; } } } } return; CheckScroll: for (int i = 0; i < 4; i++) { for (auto &n : m_nodes) { if (n.viewport.priority == i) { //if we have a fog density value if (n.viewport.fogParams[3] != 0.f) { if (rgba[0] == n.viewport.fogParams[0] && rgba[1] == n.viewport.fogParams[1] && rgba[2] == n.viewport.fogParams[2]) { glViewport(n.viewport.x, n.viewport.y, n.viewport.width, n.viewport.height); m_r3dScrollFog.DrawScrollFog(rgba, n.viewport.scrollAtt, n.viewport.fogParams[6], n.viewport.spotFogColor, n.viewport.spotEllipse); return; } } } } } } bool CNew3D::RenderScene(int priority, bool renderOverlay, Layer layer) { glActiveTexture(GL_TEXTURE0); glBindTexture(GL_TEXTURE_2D, m_textureBuffer); bool hasOverlay = false; // (high priority polys) for (auto &n : m_nodes) { if (n.viewport.priority != priority || n.models.empty()) { continue; } CalcViewport(&n.viewport, std::abs(m_nfPairs[priority].zNear*0.96f), std::abs(m_nfPairs[priority].zFar*1.05f)); // make planes 5% bigger glViewport(n.viewport.x, n.viewport.y, n.viewport.width, n.viewport.height); m_r3dShader.SetViewportUniforms(&n.viewport); for (auto &m : n.models) { bool matrixLoaded = false; if (m.meshes->empty()) { continue; } for (auto &mesh : *m.meshes) { if (mesh.highPriority) { hasOverlay = true; } if (!mesh.Render(layer)) continue; if (mesh.highPriority != renderOverlay) continue; if (!matrixLoaded) { m_r3dShader.SetModelStates(&m); matrixLoaded = true; // do this here to stop loading matrices we don't need. Ie when rendering non transparent etc } m_r3dShader.SetMeshUniforms(&mesh); glDrawArrays(m_primType, mesh.vboOffset, mesh.vertexCount); } } } return hasOverlay; } bool CNew3D::SkipLayer(int layer) { for (const auto &n : m_nodes) { if (n.viewport.priority == layer) { if (!n.models.empty()) { return false; } } } return true; } void CNew3D::SetRenderStates() { m_vbo.Bind(true); glBindVertexArray(m_vao); m_r3dShader.SetShader(true); glDepthFunc (GL_LEQUAL); glEnable (GL_DEPTH_TEST); glDepthMask (GL_TRUE); glActiveTexture (GL_TEXTURE0); glDisable (GL_CULL_FACE); // we'll emulate this in the shader glDisable (GL_BLEND); glStencilFunc (GL_EQUAL, 0, 0xFF); // basically stencil test passes if the value is zero glStencilOp (GL_KEEP, GL_INCR, GL_INCR); // if the stencil test passes, we increment the value glStencilMask (0xFF); } void CNew3D::DisableRenderStates() { m_vbo.Bind(false); glBindVertexArray(0); m_r3dShader.SetShader(false); glDisable(GL_STENCIL_TEST); } void CNew3D::RenderFrame(void) { for (int i = 0; i < 4; i++) { m_nfPairs[i].zNear = -std::numeric_limits::max(); m_nfPairs[i].zFar = std::numeric_limits::max(); } { std::lock_guard guard(m_losMutex); std::swap(m_losBack, m_losFront); for (int i = 0; i < 4; i++) { m_losBack->value[i] = 0; } } // release any resources from last frame m_polyBufferRam.clear(); // clear dynamic model memory buffer m_nodes.clear(); // memory will grow during the object life time, that's fine, no need to shrink to fit m_modelMat.Release(); // would hope we wouldn't need this but no harm in checking m_nodeAttribs.Reset(); RenderViewport(0x800000); // build model structure DrawScrollFog(); // fog layer if applicable must be drawn here m_vbo.Bind(true); m_vbo.BufferSubData(MAX_ROM_VERTS*sizeof(FVertex), m_polyBufferRam.size()*sizeof(FVertex), m_polyBufferRam.data()); // upload all the dynamic data to GPU in one go if (!m_polyBufferRom.empty()) { // sync rom memory with vbo int romBytes = (int)m_polyBufferRom.size() * sizeof(FVertex); int vboBytes = m_vbo.GetSize(); int size = romBytes - vboBytes; if (size) { //check we haven't blown up the memory buffers //we will lose rom models for 1 frame is this happens, not the end of the world, as probably won't ever happen anyway if (m_polyBufferRom.size() >= MAX_ROM_VERTS) { m_polyBufferRom.clear(); m_romMap.clear(); m_vbo.Reset(); } else { m_vbo.AppendData(size, &m_polyBufferRom[vboBytes / sizeof(FVertex)]); } } } m_r3dFrameBuffers.SetFBO(Layer::trans12); glClear(GL_COLOR_BUFFER_BIT); // wipe both trans layers for (int pri = 0; pri <= 3; pri++) { if (SkipLayer(pri)) continue; for (int i = 0; i < 2; i++) { bool renderOverlay = (i == 1); m_r3dFrameBuffers.SetFBO(Layer::colour); glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); SetRenderStates(); m_r3dShader.DiscardAlpha(true); // discard all translucent pixels in opaque pass bool hasOverlay = RenderScene(pri, renderOverlay, Layer::colour); if (!renderOverlay) { ProcessLos(pri); } DisableRenderStates(); m_r3dFrameBuffers.DrawOverTransLayers(); // mask trans layer with opaque pixels m_r3dFrameBuffers.CompositeBaseLayer(); // copy opaque pixels to back buffer SetRenderStates(); glDepthFunc(GL_LESS); // alpha polys seem to use gl_less (ocean hunter) m_r3dShader.DiscardAlpha (false); // render only translucent pixels m_r3dFrameBuffers.StoreDepth (); // save depth buffer for 1st trans pass m_r3dFrameBuffers.SetFBO (Layer::trans1); RenderScene (pri, renderOverlay, Layer::trans1); m_r3dFrameBuffers.RestoreDepth (); // restore depth buffer, trans layers don't seem to depth test against each other m_r3dFrameBuffers.SetFBO (Layer::trans2); RenderScene (pri, renderOverlay, Layer::trans2); DisableRenderStates(); if (!hasOverlay) break; // no high priority polys } } m_r3dFrameBuffers.CompositeAlphaLayer(); } void CNew3D::BeginFrame(void) { } void CNew3D::EndFrame(void) { } /****************************************************************************** Real3D Address Translation Functions that interpret word-granular Real3D addresses and return pointers. ******************************************************************************/ // Translates 24-bit culling RAM addresses const UINT32* CNew3D::TranslateCullingAddress(UINT32 addr) { addr &= 0x00FFFFFF; // caller should have done this already if ((addr >= 0x800000) && (addr < 0x840000)) { return &m_cullingRAMHi[addr & 0x3FFFF]; } else if (addr < 0x100000) { return &m_cullingRAMLo[addr]; } return NULL; } // Translates model references const UINT32* CNew3D::TranslateModelAddress(UINT32 modelAddr) { modelAddr &= 0x00FFFFFF; // caller should have done this already if (modelAddr < 0x100000) { return &m_polyRAM[modelAddr]; } else { return &m_vrom[modelAddr]; } } bool CNew3D::DrawModel(UINT32 modelAddr) { const UINT32* modelAddress; bool cached = false; Model* m; modelAddress = TranslateModelAddress(modelAddr); // create a new model to push onto the vector m_nodes.back().models.emplace_back(); // get the last model in the array m = &m_nodes.back().models.back(); if (IsVROMModel(modelAddr) && !IsDynamicModel((UINT32*)modelAddress)) { // try to find meshes in the rom cache m->meshes = m_romMap[modelAddr]; // will create an entry with a null pointer if empty if (m->meshes) { cached = true; } else { m->meshes = std::make_shared>(); m_romMap[modelAddr] = m->meshes; // store meshes in our rom map here } m->dynamic = false; } else { m->meshes = std::make_shared>(); } // copy current model matrix for (int i = 0; i < 16; i++) { m->modelMat[i] = m_modelMat.currentMatrix[i]; } // update texture offsets m->textureOffsetX = m_nodeAttribs.currentTexOffsetX; m->textureOffsetY = m_nodeAttribs.currentTexOffsetY; m->page = m_nodeAttribs.currentPage; m->scale = m_nodeAttribs.currentModelScale; if (!cached) { CacheModel(m, modelAddress); } if (m_nodeAttribs.currentClipStatus != Clip::INSIDE) { ClipModel(m); // not storing clipped values, only working out the Z range } return true; } /* 0x00: x------- -------- -------- -------- Is UF ref -x------ -------- -------- -------- Is 3D model --x----- -------- -------- -------- Is point ---x---- -------- -------- -------- Is point ref ----x--- -------- -------- -------- Is animation -----x-- -------- -------- -------- Is billboard ------x- -------- -------- -------- Child is billboard -------x -------- -------- -------- Extra child pointer needed -------- -----xxx xxxxxx-- -------- Node ID -------- -------- -------- x------- Reset matrix -------- -------- -------- -x------ Use child pointer -------- -------- -------- --x----- Use sibling pointer -------- -------- -------- ---x---- No matrix -------- -------- -------- ----x--- Indirect child -------- -------- -------- -----x-- Valid color table -------- -------- -------- ------xx Node type(0 = viewport, 1 = root node, 2 = culling node) 0x01, 0x02 only present on Step 2 + 0x01: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx Model scale(float) 0x02 : -------- -------- x------- -------- Texture replace -------- -------- -x------ -------- Switch bank -------- -------- --xxxxxx x------- X offset -------- -------- -------- -xxxxxxx Y offset 0x03 : xxxxxxxx xxxxx--- -------- -------- Color table address 1 -------- -----xxx xxxx---- -------- LOD table pointer -------- -------- ----xxxx xxxxxxxx Node matrix */ void CNew3D::DescendCullingNode(UINT32 addr) { enum class NodeType { undefined = -1, viewport = 0, rootNode = 1, cullingNode = 2 }; const UINT32 *node, *lodTable; UINT32 matrixOffset, child1Ptr, sibling2Ptr; BBox bbox; UINT16 uCullRadius; float fCullRadius; UINT16 uBlendRadius; float fBlendRadius; UINT8 lodTablePointer; NodeType nodeType; bool resetMatrix; if (m_nodeAttribs.StackLimit()) { return; } node = TranslateCullingAddress(addr); if (NULL == node) { return; } // Extract known fields nodeType = (NodeType)(node[0x00] & 3); child1Ptr = node[0x07 - m_offset] & 0x7FFFFFF; // mask colour table bits sibling2Ptr = node[0x08 - m_offset] & 0x1FFFFFF; // mask colour table bits matrixOffset = node[0x03 - m_offset] & 0xFFF; resetMatrix = (node[0x0] & 0x80) > 0; lodTablePointer = (node[0x03 - m_offset] >> 12) & 0x7F; // check our node type if (nodeType == NodeType::viewport) { return; // viewport nodes aren't rendered } // parse siblings if ((node[0x00] & 0x07) != 0x06) { // colour table seems to indicate no siblings if (!(sibling2Ptr & 0x1000000) && sibling2Ptr) { DescendCullingNode(sibling2Ptr); // no need to mask bit, would already be zero } } if ((node[0x00] & 0x04)) { m_colorTableAddr = ((node[0x03 - m_offset] >> 19) << 0) | ((node[0x07 - m_offset] >> 28) << 13) | ((node[0x08 - m_offset] >> 25) << 17); m_colorTableAddr &= 0x000FFFFF; // clamp to 4MB (in words) range } m_nodeAttribs.Push(); // save current attribs if (!m_offset) { // Step 1.5+ float modelScale = Util::Uint32AsFloat(node[1]); if (modelScale > std::numeric_limits::min()) { m_nodeAttribs.currentModelScale = modelScale; } // apply texture offsets, else retain current ones if ((node[0x02] & 0x8000)) { int tx = 32 * ((node[0x02] >> 7) & 0x3F); int ty = 32 * (node[0x02] & 0x1F); m_nodeAttribs.currentTexOffsetX = tx; m_nodeAttribs.currentTexOffsetY = ty; m_nodeAttribs.currentPage = (node[0x02] & 0x4000) >> 14; } } // Apply matrix and translation m_modelMat.PushMatrix(); // apply translation vector if (node[0x00] & 0x10) { float x = Util::Uint32AsFloat(node[0x04 - m_offset]); float y = Util::Uint32AsFloat(node[0x05 - m_offset]); float z = Util::Uint32AsFloat(node[0x06 - m_offset]); m_modelMat.Translate(x, y, z); } // multiply matrix, if specified else if (matrixOffset) { MultMatrix(matrixOffset,m_modelMat); } if (resetMatrix) { ResetMatrix(m_modelMat); } uCullRadius = node[9 - m_offset] & 0xFFFF; fCullRadius = R3DFloat::GetFloat16(uCullRadius); uBlendRadius = node[9 - m_offset] >> 16; fBlendRadius = R3DFloat::GetFloat16(uBlendRadius); if (m_nodeAttribs.currentClipStatus != Clip::INSIDE) { if (uCullRadius != R3DFloat::Pro16BitMax) { CalcBox(fCullRadius, bbox); TransformBox(m_modelMat, bbox); m_nodeAttribs.currentClipStatus = ClipBox(bbox, m_planes); if (m_nodeAttribs.currentClipStatus == Clip::INSIDE) { CalcBoxExtents(bbox); } } else { m_nodeAttribs.currentClipStatus = Clip::NOT_SET; } } if (m_nodeAttribs.currentClipStatus != Clip::OUTSIDE && fCullRadius > R3DFloat::Pro16BitFltMin) { // Descend down first link if ((node[0x00] & 0x08)) // 4-element LOD table { lodTable = TranslateCullingAddress(child1Ptr); if (NULL != lodTable) { if ((node[0x03 - m_offset] & 0x20000000)) { DescendCullingNode(lodTable[0] & 0xFFFFFF); } else { DrawModel(lodTable[0] & 0xFFFFFF); //TODO } } } else { DescendNodePtr(child1Ptr); } } m_modelMat.PopMatrix(); // Restore old texture offsets m_nodeAttribs.Pop(); } void CNew3D::DescendNodePtr(UINT32 nodeAddr) { // Ignore null links if ((nodeAddr & 0x00FFFFFF) == 0) { return; } switch ((nodeAddr >> 24) & 0x5) // pointer type encoded in upper 8 bits { case 0x00: DescendCullingNode(nodeAddr & 0xFFFFFF); break; case 0x01: DrawModel(nodeAddr & 0xFFFFFF); break; case 0x04: DescendPointerList(nodeAddr & 0xFFFFFF); break; default: break; } } void CNew3D::DescendPointerList(UINT32 addr) { const UINT32* list; UINT32 nodeAddr; int index; list = TranslateCullingAddress(addr); if (NULL == list) { return; } index = 0; while (true) { if (list[index] & 0x01000000) { break; // empty list } nodeAddr = list[index] & 0x00FFFFFF; // clear upper 8 bits to ensure this is processed as a culling node DescendCullingNode(nodeAddr); if (list[index] & 0x02000000) { break; // list end } index++; } } /****************************************************************************** Matrix Stack ******************************************************************************/ // Macro to generate column-major (OpenGL) index from y,x subscripts #define CMINDEX(y,x) (x*4+y) /* * MultMatrix(): * * Multiplies the matrix stack by the specified Real3D matrix. The matrix * index is a 12-bit number specifying a matrix number relative to the base. * The base matrix MUST be set up before calling this function. */ void CNew3D::MultMatrix(UINT32 matrixOffset, Mat4& mat) { GLfloat m[4*4]; const float *src = &m_matrixBasePtr[matrixOffset * 12]; if (m_matrixBasePtr == NULL) // LA Machineguns return; m[CMINDEX(0, 0)] = src[3]; m[CMINDEX(0, 1)] = src[4]; m[CMINDEX(0, 2)] = src[5]; m[CMINDEX(0, 3)] = src[0]; m[CMINDEX(1, 0)] = src[6]; m[CMINDEX(1, 1)] = src[7]; m[CMINDEX(1, 2)] = src[8]; m[CMINDEX(1, 3)] = src[1]; m[CMINDEX(2, 0)] = src[9]; m[CMINDEX(2, 1)] = src[10]; m[CMINDEX(2, 2)] = src[11]; m[CMINDEX(2, 3)] = src[2]; m[CMINDEX(3, 0)] = 0.0; m[CMINDEX(3, 1)] = 0.0; m[CMINDEX(3, 2)] = 0.0; m[CMINDEX(3, 3)] = 1.0; mat.MultMatrix(m); } /* * InitMatrixStack(): * * Initializes the modelview (model space -> view space) matrix stack and * Real3D coordinate system. These are the last transforms to be applied (and * the first to be defined on the stack) before projection. * * Model 3 games tend to define the following unusual base matrix: * * 0 0 -1 0 * 1 0 0 0 * 0 -1 0 0 * 0 0 0 1 * * When this is multiplied by a column vector, the output is: * * -Z * X * -Y * 1 * * My theory is that the Real3D GPU accepts vectors in Z,X,Y order. The games * store everything as X,Y,Z and perform the translation at the end. The Real3D * also has Y and Z coordinates opposite of the OpenGL convention. This * function inserts a compensating matrix to undo these things. * * NOTE: This function assumes we are in GL_MODELVIEW matrix mode. */ void CNew3D::InitMatrixStack(UINT32 matrixBaseAddr, Mat4& mat) { GLfloat m[4 * 4]; // This matrix converts vectors back from the weird Model 3 Z,X,Y ordering // and also into OpenGL viewspace (-Y,-Z) m[CMINDEX(0, 0)] = 0.0; m[CMINDEX(0, 1)] = 1.0; m[CMINDEX(0, 2)] = 0.0; m[CMINDEX(0, 3)] = 0.0; m[CMINDEX(1, 0)] = 0.0; m[CMINDEX(1, 1)] = 0.0; m[CMINDEX(1, 2)] =-1.0; m[CMINDEX(1, 3)] = 0.0; m[CMINDEX(2, 0)] =-1.0; m[CMINDEX(2, 1)] = 0.0; m[CMINDEX(2, 2)] = 0.0; m[CMINDEX(2, 3)] = 0.0; m[CMINDEX(3, 0)] = 0.0; m[CMINDEX(3, 1)] = 0.0; m[CMINDEX(3, 2)] = 0.0; m[CMINDEX(3, 3)] = 1.0; mat.LoadMatrix(m); // Set matrix base address and apply matrix #0 (coordinate system matrix) m_matrixBasePtr = (float *)TranslateCullingAddress(matrixBaseAddr); MultMatrix(0, mat); } // what this does is to set the rotation back to zero, whilst keeping the position and scale of the current matrix void CNew3D::ResetMatrix(Mat4& mat) { float m[16]; memcpy(m, mat.currentMatrix, 16 * 4); // transpose the top 3x3 of the matrix (this effectively inverts the rotation). When we multiply our new matrix it'll effectively cancel out the rotations. std::swap(m[1], m[4]); std::swap(m[2], m[8]); std::swap(m[6], m[9]); // set position to zero m[12] = 0; m[13] = 0; m[14] = 0; m[15] = 1; // normalise columns, this removes the scaling, otherwise we'll apply it twice float s1 = std::sqrt((m[0] * m[0]) + (m[1] * m[1]) + (m[2] * m[2])); float s2 = std::sqrt((m[4] * m[4]) + (m[5] * m[5]) + (m[6] * m[6])); float s3 = std::sqrt((m[8] * m[8]) + (m[9] * m[9]) + (m[10] * m[10])); m[0] /= s1; m[4] /= s2; m[8] /= s3; m[1] /= s1; m[5] /= s2; m[9] /= s3; m[2] /= s1; m[6] /= s2; m[10] /= s3; mat.MultMatrix(m); } // Draws viewports of the given priority void CNew3D::RenderViewport(UINT32 addr) { static const GLfloat color[8][3] = { // RGB1 color translation { 0.0f, 0.0f, 0.0f }, // off { 0.0f, 0.0f, 1.0f }, // blue { 0.0f, 1.0f, 0.0f }, // green { 0.0f, 1.0f, 1.0f }, // cyan { 1.0f, 0.0f, 0.0f }, // red { 1.0f, 0.0f, 1.0f }, // purple { 1.0f, 1.0f, 0.0f }, // yellow { 1.0f, 1.0f, 1.0f } // white }; if ((addr & 0x00FFFFFF) == 0) { return; } // Translate address and obtain pointer const uint32_t *vpnode = TranslateCullingAddress(addr); if (NULL == vpnode) { return; } if (!(vpnode[0] & 0x20)) { // only if viewport enabled // create node object m_nodes.emplace_back(Node()); m_nodes.back().models.reserve(2048); // create space for models // get pointer to its viewport Viewport *vp = &m_nodes.back().viewport; vp->priority = (vpnode[0] >> 3) & 0x3; vp->select = (vpnode[0] >> 8) & 0x3; vp->number = (vpnode[0] >> 10); m_currentPriority = vp->priority; // Fetch viewport parameters (TO-DO: would rounding make a difference?) vp->vpX = (int)(((vpnode[0x1A] & 0xFFFF) * (float)(1.0 / 16.0)) + 0.5f); // viewport X (12.4 fixed point) vp->vpY = (int)(((vpnode[0x1A] >> 16) * (float)(1.0 / 16.0)) + 0.5f); // viewport Y (12.4) vp->vpWidth = (int)(((vpnode[0x14] & 0xFFFF) * (float)(1.0 / 4.0)) + 0.5f); // width (14.2) vp->vpHeight = (int)(((vpnode[0x14] >> 16) * (float)(1.0 / 4.0)) + 0.5f); // height (14.2) uint32_t matrixBase = vpnode[0x16] & 0xFFFFFF; // matrix base address m_LODBlendTable = (LODBlendTable*)TranslateCullingAddress(vpnode[0x17] & 0xFFFFFF); /* vp->angle_left = -atan2f(Util::Uint32AsFloat(vpnode[12]), Util::Uint32AsFloat(vpnode[13])); // These values work out as the normals for the clipping planes. vp->angle_right = atan2f(Util::Uint32AsFloat(vpnode[16]), -Util::Uint32AsFloat(vpnode[17])); // Sometimes these values (dirt devils,lost world) are totally wrong vp->angle_top = atan2f(Util::Uint32AsFloat(vpnode[14]), Util::Uint32AsFloat(vpnode[15])); // and don't work for the frustum values exactly. vp->angle_bottom = -atan2f(Util::Uint32AsFloat(vpnode[18]), -Util::Uint32AsFloat(vpnode[19])); // Perhaps they are just used for culling and not rendering. */ float cv = Util::Uint32AsFloat(vpnode[0x8]); // 1/(left-right) float cw = Util::Uint32AsFloat(vpnode[0x9]); // 1/(top-bottom) float io = Util::Uint32AsFloat(vpnode[0xa]); // top / bottom (ratio) - ish float jo = Util::Uint32AsFloat(vpnode[0xb]); // left / right (ratio) vp->angle_left = (0.0f - jo) / cv; vp->angle_right = (1.0f - jo) / cv; vp->angle_bottom = -(1.0f - io)/ cw; vp->angle_top = -(0.0f - io)/ cw; // calculate the frustum shape, near/far pair are dummy values CalcViewport(vp, 1.f, 1000.f); // calculate frustum planes CalcFrustumPlanes(m_planes, vp->projectionMatrix); // we need to calc a 'projection matrix' to get the correct frustum planes for clipping // Lighting (note that sun vector points toward sun -- away from vertex) vp->lightingParams[0] = Util::Uint32AsFloat(vpnode[0x05]); // sun X vp->lightingParams[1] = -Util::Uint32AsFloat(vpnode[0x06]); // sun Y (- to convert to ogl cordinate system) vp->lightingParams[2] = -Util::Uint32AsFloat(vpnode[0x04]); // sun Z (- to convert to ogl cordinate system) vp->lightingParams[3] = std::max(0.f, std::min(Util::Uint32AsFloat(vpnode[0x07]), 1.0f)); // sun intensity (clamp to 0-1) vp->lightingParams[4] = (float)((vpnode[0x24] >> 8) & 0xFF) * (float)(1.0 / 255.0); // ambient intensity vp->lightingParams[5] = 0.0f; // reserved vp->sunClamp = m_sunClamp; vp->intensityClamp = (m_step == 0x10); // just step 1.0 ? vp->hardwareStep = m_step; // Spotlight int spotColorIdx = (vpnode[0x20] >> 11) & 7; // spotlight color index int spotFogColorIdx = (vpnode[0x20] >> 8) & 7; // spotlight on fog color index vp->spotEllipse[0] = (float)(INT16)(vpnode[0x1E] & 0xFFFF) * (float)(1.0 / 8.0);// spotlight X position (13.3 fixed point) vp->spotEllipse[1] = (float)(INT16)(vpnode[0x1D] & 0xFFFF) * (float)(1.0 / 8.0);// spotlight Y vp->spotEllipse[2] = (float)((vpnode[0x1E] >> 16) & 0xFFFF); // spotlight X size (16-bit) vp->spotEllipse[3] = (float)((vpnode[0x1D] >> 16) & 0xFFFF); // spotlight Y size vp->spotRange[0] = 1.0f / Util::Uint32AsFloat(vpnode[0x21]); // spotlight start vp->spotRange[1] = Util::Uint32AsFloat(vpnode[0x1F]); // spotlight extent vp->spotColor[0] = color[spotColorIdx][0]; // spotlight color vp->spotColor[1] = color[spotColorIdx][1]; vp->spotColor[2] = color[spotColorIdx][2]; vp->spotFogColor[0] = color[spotFogColorIdx][0]; // spotlight color on fog vp->spotFogColor[1] = color[spotFogColorIdx][1]; vp->spotFogColor[2] = color[spotFogColorIdx][2]; // spotlight is specified in terms of physical resolution vp->spotEllipse[1] = 384.0f - vp->spotEllipse[1]; // flip Y position // Avoid division by zero vp->spotEllipse[2] = std::max(1.0f, vp->spotEllipse[2]); vp->spotEllipse[3] = std::max(1.0f, vp->spotEllipse[3]); vp->spotEllipse[2] = std::roundf(2047.0f / vp->spotEllipse[2]); vp->spotEllipse[3] = std::roundf(2047.0f / vp->spotEllipse[3]); // Scale the spotlight to the OpenGL viewport vp->spotEllipse[0] = vp->spotEllipse[0] * m_xRatio + (float)m_xOffs; vp->spotEllipse[1] = vp->spotEllipse[1] * m_yRatio + (float)m_yOffs; vp->spotEllipse[2] *= m_xRatio; vp->spotEllipse[3] *= m_yRatio; // Line of sight position vp->losPosX = (int)(((vpnode[0x1c] & 0xFFFF) / 16.0f) + 0.5f); // x position vp->losPosY = (int)(((vpnode[0x1c] >> 16) / 16.0f) + 0.5f); // y position 0 starts from the top // Fog vp->fogParams[0] = (float)((vpnode[0x22] >> 16) & 0xFF) * (float)(1.0 / 255.0); // fog color R vp->fogParams[1] = (float)((vpnode[0x22] >> 8) & 0xFF) * (float)(1.0 / 255.0); // fog color G vp->fogParams[2] = (float)((vpnode[0x22] >> 0) & 0xFF) * (float)(1.0 / 255.0); // fog color B vp->fogParams[3] = std::abs(Util::Uint32AsFloat(vpnode[0x23])); // fog density - ocean hunter uses negative values, but looks the same vp->fogParams[4] = (float)(INT16)(vpnode[0x25] & 0xFFFF)* (float)(1.0 / 255.0); // fog start // Avoid Infinite and NaN values for Star Wars Trilogy if (std::isinf(vp->fogParams[3]) || std::isnan(vp->fogParams[3])) { for (int i = 0; i < 7; i++) vp->fogParams[i] = 0.0f; } vp->fogParams[5] = (float)((vpnode[0x24] >> 16) & 0xFF) * (float)(1.0 / 255.0); // fog attenuation vp->fogParams[6] = (float)((vpnode[0x25] >> 16) & 0xFF) * (float)(1.0 / 255.0); // fog ambient vp->scrollFog = (float)(vpnode[0x20] & 0xFF) * (float)(1.0 / 255.0); // scroll fog vp->scrollAtt = (float)(vpnode[0x24] & 0xFF) * (float)(1.0 / 255.0); // scroll attenuation // Clear texture offsets before proceeding m_nodeAttribs.Reset(); // Set up coordinate system and base matrix InitMatrixStack(matrixBase, m_modelMat); // Descend down the node link. Need to start with a culling node because that defines our culling radius. auto childptr = vpnode[0x02]; if (((childptr >> 24) & 0x5) == 0) { DescendNodePtr(vpnode[0x02]); } } // render next viewport if (vpnode[0x01] != 0x01000000) { RenderViewport(vpnode[0x01]); } } void CNew3D::CopyVertexData(const R3DPoly& r3dPoly, std::vector& vertexArray) { // both lemans 24 and dirt devils are rendering some totally transparent polys as the first object in each viewport // in dirt devils it's parallel to the camera so is completely invisible, but breaks our depth calculation // in lemans 24 its a sort of diamond shape, but never leaves a hole in the transparent geometry so must be being skipped by the h/w if (r3dPoly.faceColour[3] == 0) { return; } if (m_numPolyVerts==4) { if (r3dPoly.number == 4) { vertexArray.emplace_back(r3dPoly, 0); // construct directly inside container without copy vertexArray.emplace_back(r3dPoly, 1); vertexArray.emplace_back(r3dPoly, 2); vertexArray.emplace_back(r3dPoly, 3); // check for identical points (ie forced triangle) and replace with average point // if we don't do this our quad code falls apart FVertex* v = (&vertexArray.back()) - 3; for (int i = 0; i < 4; i++) { int next1 = (i + 1) % 4; int next2 = (i + 2) % 4; if (FVertex::Equal(v[i], v[next1])) { FVertex::Average(v[next1], v[next2], v[next1]); break; } } } else { vertexArray.emplace_back(r3dPoly, 0); vertexArray.emplace_back(r3dPoly, 1); vertexArray.emplace_back(r3dPoly, 2); vertexArray.emplace_back(r3dPoly, 0, 2); // last point is an average of 0 and 2 } } else { vertexArray.emplace_back(r3dPoly, 0); vertexArray.emplace_back(r3dPoly, 1); vertexArray.emplace_back(r3dPoly, 2); if (r3dPoly.number == 4) { vertexArray.emplace_back(r3dPoly, 0); vertexArray.emplace_back(r3dPoly, 2); vertexArray.emplace_back(r3dPoly, 3); } } } void CNew3D::GetCoordinates(int width, int height, UINT16 uIn, UINT16 vIn, float uvScale, float& uOut, float& vOut) { uOut = (uIn * uvScale) / width; vOut = (vIn * uvScale) / height; } int CNew3D::GetTexFormat(int originalFormat, bool contour) { if (!contour) { return originalFormat; // the same } switch (originalFormat) { case 1: case 2: case 3: case 4: return originalFormat + 7; // these formats are identical to 1-4, except they lose the 4 bit alpha part when contour is enabled default: return originalFormat; } } void CNew3D::SetMeshValues(SortingMesh *currentMesh, PolyHeader &ph) { //copy attributes currentMesh->textured = ph.TexEnabled(); currentMesh->alphaTest = ph.AlphaTest(); currentMesh->textureAlpha = ph.TextureAlpha(); currentMesh->polyAlpha = ph.PolyAlpha(); currentMesh->lighting = ph.LightEnabled(); currentMesh->fixedShading = ph.FixedShading() && !ph.SmoothShading(); currentMesh->highPriority = ph.HighPriority(); currentMesh->transLSelect = ph.TranslucencyPatternSelect(); currentMesh->layered = ph.Layered(); currentMesh->specular = ph.SpecularEnabled(); currentMesh->shininess = ph.Shininess(); currentMesh->specularValue = ph.SpecularValue(); currentMesh->fogIntensity = ph.LightModifier(); currentMesh->translatorMap = ph.TranslatorMap(); if (currentMesh->textured) { currentMesh->format = GetTexFormat(ph.TexFormat(), ph.AlphaTest()); if (currentMesh->format == 7) { currentMesh->alphaTest = false; // alpha test is a 1 bit test, this format needs a lower threshold, since it has 16 levels of transparency } currentMesh->x = ph.X(); currentMesh->y = ph.Y(); currentMesh->width = ph.TexWidth(); currentMesh->height = ph.TexHeight(); currentMesh->microTexture = ph.MicroTexture(); currentMesh->inverted = ph.TranslatorMapOffset() == 2; { bool smoothU = ph.TexSmoothU(); bool smoothV = ph.TexSmoothV(); if (ph.AlphaTest()) { smoothU = false; // smooth wrap makes no sense for alpha tested polys with pixel dilate smoothV = false; } if (ph.TexUMirror()) { if (smoothU) currentMesh->wrapModeU = Mesh::TexWrapMode::mirror; else currentMesh->wrapModeU = Mesh::TexWrapMode::mirrorClamp; } else { if (smoothU) currentMesh->wrapModeU = Mesh::TexWrapMode::repeat; else currentMesh->wrapModeU = Mesh::TexWrapMode::repeatClamp; } if (ph.TexVMirror()) { if (smoothV) currentMesh->wrapModeV = Mesh::TexWrapMode::mirror; else currentMesh->wrapModeV = Mesh::TexWrapMode::mirrorClamp; } else { if (smoothV) currentMesh->wrapModeV = Mesh::TexWrapMode::repeat; else currentMesh->wrapModeV = Mesh::TexWrapMode::repeatClamp; } } if (currentMesh->microTexture) { static const float microTexScale[] = { 2.f, 4.f, 16.f, 256.f }; currentMesh->microTextureID = ph.MicroTextureID(); currentMesh->microTextureScale = microTexScale[ph.MicroTextureMinLOD()]; } } } void CNew3D::CacheModel(Model *m, const UINT32 *data) { if (data == NULL) return; UINT16 texCoords[4][2]; PolyHeader ph; UINT64 lastHash = -1; SortingMesh* currentMesh = nullptr; std::unordered_map sMap; ph = data; int numTriangles = ph.NumTrianglesTotal(); // Cache all polygons do { R3DPoly p; // current polygon float uvScale; if (ph.header[6] == 0) { break; } // create a hash value based on poly attributes -todo add more attributes auto hash = ph.Hash(); if (hash != lastHash) { if (sMap.count(hash) == 0) { currentMesh = &sMap.insert({hash, SortingMesh()}).first->second; //make space for our vertices currentMesh->verts.reserve(numTriangles * 3); //set mesh values SetMeshValues(currentMesh, ph); } else currentMesh = &sMap[hash]; } // Obtain basic polygon parameters p.number = ph.NumVerts(); uvScale = ph.UVScale(); ph.FaceNormal(p.faceNormal); // Fetch reused vertices according to bitfield, then new verts int j = 0; for (int i = 0; i < 4; i++) // up to 4 reused vertices { if (ph.SharedVertex(i)) { p.v[j] = m_prev[i]; texCoords[j][0] = m_prevTexCoords[i][0]; texCoords[j][1] = m_prevTexCoords[i][1]; //check if we need to recalc tex coords - will only happen if tex tiles are different + sharing vertices if (hash != lastHash) { if (currentMesh->textured) { GetCoordinates(currentMesh->width, currentMesh->height, texCoords[j][0], texCoords[j][1], uvScale, p.v[j].texcoords[0], p.v[j].texcoords[1]); } } j++; } } lastHash = hash; // copy face attributes if (!ph.PolyColor()) { int colorIdx = ph.ColorIndex(); p.faceColour[2] = (m_polyRAM[m_colorTableAddr + colorIdx] & 0xFF); p.faceColour[1] = ((m_polyRAM[m_colorTableAddr + colorIdx] >> 8) & 0xFF); p.faceColour[0] = ((m_polyRAM[m_colorTableAddr + colorIdx] >> 16) & 0xFF); } else { p.faceColour[0] = ((ph.header[4] >> 24)); p.faceColour[1] = ((ph.header[4] >> 16) & 0xFF); p.faceColour[2] = ((ph.header[4] >> 8) & 0xFF); } p.faceColour[3] = ph.Transparency(); if (ph.Discard1() && !ph.Discard2()) { p.faceColour[3] /= 2; } // if we have flat shading, we can't re-use normals from shared vertices for (int i = 0; i < p.number && !ph.SmoothShading(); i++) { p.v[i].normal[0] = p.faceNormal[0]; p.v[i].normal[1] = p.faceNormal[1]; p.v[i].normal[2] = p.faceNormal[2]; } UINT32* vData = ph.StartOfData(); // vertex data starts here // remaining vertices are new and defined here for (; j < p.number; j++) { // Fetch vertices UINT32 ix = vData[0]; UINT32 iy = vData[1]; UINT32 iz = vData[2]; UINT32 it = vData[3]; // Decode vertices p.v[j].pos[0] = (((INT32)ix) >> 8) * m_vertexFactor; p.v[j].pos[1] = (((INT32)iy) >> 8) * m_vertexFactor; p.v[j].pos[2] = (((INT32)iz) >> 8) * m_vertexFactor; p.v[j].pos[3] = 1.0f; // Per vertex normals if (ph.SmoothShading()) { p.v[j].normal[0] = BYTE_TO_FLOAT((INT8)(ix & 0xFF)); p.v[j].normal[1] = BYTE_TO_FLOAT((INT8)(iy & 0xFF)); p.v[j].normal[2] = BYTE_TO_FLOAT((INT8)(iz & 0xFF)); } if (ph.FixedShading() && !ph.SmoothShading()) { // fixed shading seems to be disabled if actual normals are set //========== float shade; //========== if (!m_shadeIsSigned) { shade = (ix & 0xFF) * (float)(1.0 / 255.0); } else { shade = BYTE_TO_FLOAT((INT8)(ix & 0xFF)); } p.v[j].fixedShade = shade; } float texU = 0; float texV = 0; // tex coords if (currentMesh->textured) { GetCoordinates(currentMesh->width, currentMesh->height, (UINT16)(it >> 16), (UINT16)(it & 0xFFFF), uvScale, texU, texV); } p.v[j].texcoords[0] = texU; p.v[j].texcoords[1] = texV; //cache un-normalised tex coordinates texCoords[j][0] = (UINT16)(it >> 16); texCoords[j][1] = (UINT16)(it & 0xFFFF); vData += 4; } // check if we need to double up vertices for two sided lighting if (ph.DoubleSided() && !ph.Discard()) { R3DPoly tempP = p; // flip normals V3::inverse(tempP.faceNormal); for (int i2 = 0; i2 < tempP.number; i2++) { V3::inverse(tempP.v[i2].normal); } CopyVertexData(tempP, currentMesh->verts); } // Copy this polygon into the model buffer if (!ph.Discard()) { CopyVertexData(p, currentMesh->verts); } // Copy current vertices into previous vertex array for (int i = 0; i < 4; i++) { m_prev[i] = p.v[i]; m_prevTexCoords[i][0] = texCoords[i][0]; m_prevTexCoords[i][1] = texCoords[i][1]; } } while (ph.NextPoly()); //sorted the data, now copy to main data structures // we know how many meshes we have to reserve appropriate space m->meshes->reserve(sMap.size()); for (auto& it : sMap) { if (m->dynamic) { // calculate VBO values for current mesh it.second.vboOffset = (int)m_polyBufferRam.size() + MAX_ROM_VERTS; it.second.vertexCount = (int)it.second.verts.size(); // copy poly data to main buffer m_polyBufferRam.insert(m_polyBufferRam.end(), it.second.verts.begin(), it.second.verts.end()); } else { // calculate VBO values for current mesh it.second.vboOffset = (int)m_polyBufferRom.size(); it.second.vertexCount = (int)it.second.verts.size(); // copy poly data to main buffer m_polyBufferRom.insert(m_polyBufferRom.end(), it.second.verts.begin(), it.second.verts.end()); } //copy the temp mesh into the model structure //this will lose the associated vertex data, which is now copied to the main buffer anyway m->meshes->push_back(it.second); } } bool CNew3D::IsDynamicModel(UINT32 *data) { if (data == NULL) { return false; } PolyHeader p(data); do { if ((p.header[1] & 2) == 0) { // model has rgb colour palette return true; } if (p.header[6] == 0) { break; } } while (p.NextPoly()); return false; } bool CNew3D::IsVROMModel(UINT32 modelAddr) { return modelAddr >= 0x100000; } void CNew3D::CalcTexOffset(int offX, int offY, int page, int x, int y, int& newX, int& newY) { newX = (x + offX) & 2047; // wrap around 2048, shouldn't be required int oldPage = y / 1024; y -= (oldPage * 1024); // remove page from tex y // calc newY with wrap around, wraps around in the same sheet, not into another memory sheet newY = (y + offY) & 1023; // add page to Y newY += ((oldPage + page) & 1) * 1024; // max page 0-1 } void CNew3D::CalcFrustumPlanes(Plane p[5], const float* matrix) { // Left Plane p[0].a = matrix[3] + matrix[0]; p[0].b = matrix[7] + matrix[4]; p[0].c = matrix[11] + matrix[8]; p[0].d = matrix[15] + matrix[12]; p[0].Normalise(); // Right Plane p[1].a = matrix[3] - matrix[0]; p[1].b = matrix[7] - matrix[4]; p[1].c = matrix[11] - matrix[8]; p[1].d = matrix[15] - matrix[12]; p[1].Normalise(); // Bottom Plane p[2].a = matrix[3] + matrix[1]; p[2].b = matrix[7] + matrix[5]; p[2].c = matrix[11] + matrix[9]; p[2].d = matrix[15] + matrix[13]; p[2].Normalise(); // Top Plane p[3].a = matrix[3] - matrix[1]; p[3].b = matrix[7] - matrix[5]; p[3].c = matrix[11] - matrix[9]; p[3].d = matrix[15] - matrix[13]; p[3].Normalise(); // Front Plane p[4].a = 0.f; p[4].b = 0.f; p[4].c = -1.f; p[4].d = 0.f; } void CNew3D::CalcBox(float distance, BBox& box) { //bottom left front box.points[0][0] = -distance; box.points[0][1] = -distance; box.points[0][2] = distance; box.points[0][3] = 1.f; //bottom left back box.points[1][0] = -distance; box.points[1][1] = -distance; box.points[1][2] = -distance; box.points[1][3] = 1.f; //bottom right back box.points[2][0] = distance; box.points[2][1] = -distance; box.points[2][2] = -distance; box.points[2][3] = 1.f; //bottom right front box.points[3][0] = distance; box.points[3][1] = -distance; box.points[3][2] = distance; box.points[3][3] = 1.f; //top left front box.points[4][0] = -distance; box.points[4][1] = distance; box.points[4][2] = distance; box.points[4][3] = 1.f; //top left back box.points[5][0] = -distance; box.points[5][1] = distance; box.points[5][2] = -distance; box.points[5][3] = 1.f; //top right back box.points[6][0] = distance; box.points[6][1] = distance; box.points[6][2] = -distance; box.points[6][3] = 1.f; //top right front box.points[7][0] = distance; box.points[7][1] = distance; box.points[7][2] = distance; box.points[7][3] = 1.f; } void CNew3D::MultVec(const float matrix[16], const float in[4], float out[4]) { for (int i = 0; i < 4; i++) { out[i] = in[0] * matrix[0 * 4 + i] + in[1] * matrix[1 * 4 + i] + in[2] * matrix[2 * 4 + i] + in[3] * matrix[3 * 4 + i]; } } void CNew3D::TransformBox(const float *m, BBox& box) { for (int i = 0; i < 8; i++) { float v[4]; MultVec(m, box.points[i], v); box.points[i][0] = v[0]; box.points[i][1] = v[1]; box.points[i][2] = v[2]; } } Clip CNew3D::ClipBox(const BBox& box, Plane planes[5]) { int count = 0; for (int i = 0; i < 8; i++) { int temp = 0; for (int j = 0; j < 5; j++) { if (planes[j].DistanceToPoint(box.points[i]) >= 0.f) { temp++; } } if (temp == 5) count++; // point is inside all 4 frustum planes } if (count == 8) return Clip::INSIDE; if (count > 0) return Clip::INTERCEPT; //if we got here all points are outside of the view frustum //check for all points being side same of any plane, means box outside of view for (int i = 0; i < 5; i++) { int temp = 0; for (int j = 0; j < 8; j++) { if (planes[i].DistanceToPoint(box.points[j]) >= 0.f) { temp++; } } if (temp == 0) { return Clip::OUTSIDE; } } //if we got here, box is traversing view frustum return Clip::INTERCEPT; } void CNew3D::CalcBoxExtents(const BBox& box) { for (int i = 0; i < 8; i++) { if (box.points[i][2] < 0.f) { m_nfPairs[m_currentPriority].zNear = std::max(box.points[i][2], m_nfPairs[m_currentPriority].zNear); m_nfPairs[m_currentPriority].zFar = std::min(box.points[i][2], m_nfPairs[m_currentPriority].zFar); } } } void CNew3D::ClipPolygon(ClipPoly& clipPoly, Plane planes[5]) { //============ ClipPoly temp; ClipPoly *in; ClipPoly *out; //============ in = &clipPoly; out = &temp; for (int i = 0; i < 4; i++) { //================= bool currentIn; float currentDot; //================= currentDot = planes[i].DotProduct(in->list[0].pos); currentIn = (currentDot + planes[i].d) >= 0.f; out->count = 0; for (int j = 0; j < in->count; j++) { if (currentIn) { out->list[out->count] = in->list[j]; out->count++; } int nextIndex = j + 1; if (nextIndex >= in->count) { nextIndex = 0; } float nextDot = planes[i].DotProduct(in->list[nextIndex].pos); bool nextIn = (nextDot + planes[i].d) >= 0.f; // we have an intersection if (currentIn != nextIn) { float u = (currentDot + planes[i].d) / (currentDot - nextDot); const float* p1 = in->list[j].pos; const float* p2 = in->list[nextIndex].pos; out->list[out->count].pos[0] = p1[0] + ((p2[0] - p1[0]) * u); out->list[out->count].pos[1] = p1[1] + ((p2[1] - p1[1]) * u); out->list[out->count].pos[2] = p1[2] + ((p2[2] - p1[2]) * u); out->count++; } currentDot = nextDot; currentIn = nextIn; } std::swap(in, out); } } void CNew3D::ClipModel(const Model *m) { //=============================== ClipPoly clipPoly; std::vector* vertices; int offset; //=============================== if (m->dynamic) { vertices = &m_polyBufferRam; offset = MAX_ROM_VERTS; } else { vertices = &m_polyBufferRom; offset = 0; } for (const auto &mesh : *m->meshes) { int start = mesh.vboOffset - offset; for (int i = 0; i < mesh.vertexCount; i += m_numPolyVerts) { // inc to next poly for (int j = 0; j < m_numPolyVerts; j++) { MultVec(m->modelMat, (*vertices)[start + i + j].pos, clipPoly.list[j].pos); // copy all 3 of 4 our transformed vertices into our clip poly struct } clipPoly.count = m_numPolyVerts; ClipPolygon(clipPoly, m_planes); for (int j = 0; j < clipPoly.count; j++) { if (clipPoly.list[j].pos[2] < 0.f) { m_nfPairs[m_currentPriority].zNear = std::max(clipPoly.list[j].pos[2], m_nfPairs[m_currentPriority].zNear); m_nfPairs[m_currentPriority].zFar = std::min(clipPoly.list[j].pos[2], m_nfPairs[m_currentPriority].zFar); } } } } } void CNew3D::CalcViewport(Viewport* vp, float near, float far) { if (far > 1e30f) { far = near * 1000000.f; // fix for ocean hunter which passes some FLT_MAX for a few matrices. HW must have some safe guard for these } if (near < far / 1000000.f) { near = far / 1000000.f; // if we get really close to zero somehow, we will have almost no depth precision } float l = near * vp->angle_left; // we need to calc the shape of the projection frustum for culling float r = near * vp->angle_right; float t = near * vp->angle_top; float b = near * vp->angle_bottom; vp->projectionMatrix.LoadIdentity(); // reset matrix if ((vp->vpX == 0) && (vp->vpWidth >= 495) && (vp->vpY == 0) && (vp->vpHeight >= 383)) { /* * Compute aspect ratio correction factor. "Window" refers to the full GL * viewport (i.e., totalXRes x totalYRes). "Viewable area" is the effective * Model 3 screen (xRes x yRes). In non-wide-screen, non-stretch mode, this * is intended to replicate the 496x384 display and may in general be * smaller than the window. The rest of the window appears to have a * border, which is created by a scissor box. * * In wide-screen mode, we want to expand the frustum horizontally to fill * the window. We want the aspect ratio to be correct. To accomplish this, * the viewable area is set *the same* as in non-wide-screen mode (e.g., * often smaller than the window) but glScissor() is set by the OSD layer's * screen setup code to reveal the entire window. * * In stretch mode, the window and viewable area are both set the same, * which means there will be no aspect ratio correction and the display * will stretch to fill the entire window while keeping the view frustum * the same as a 496x384 Model 3 display. The display will be distorted. */ float windowAR = (float)m_totalXRes / (float)m_totalYRes; float viewableAreaAR = (float)m_xRes / (float)m_yRes; // Will expand horizontal frustum planes only in non-stretch mode (wide- // screen and non-wide-screen modes have identical resolution parameters // and only their scissor box differs) float correction = windowAR / viewableAreaAR; vp->x = 0; vp->y = m_yOffs + (int)((float)(384 - (vp->vpY + vp->vpHeight))*m_yRatio); vp->width = m_totalXRes; vp->height = (int)((float)vp->vpHeight*m_yRatio); vp->projectionMatrix.Frustum(l*correction, r*correction, b, t, near, far); } else { vp->x = m_xOffs + (int)((float)vp->vpX*m_xRatio); vp->y = m_yOffs + (int)((float)(384 - (vp->vpY + vp->vpHeight))*m_yRatio); vp->width = (int)((float)vp->vpWidth*m_xRatio); vp->height = (int)((float)vp->vpHeight*m_yRatio); vp->projectionMatrix.Frustum(l, r, b, t, near, far); } } void CNew3D::SetSunClamp(bool enable) { m_sunClamp = enable; } void CNew3D::SetSignedShade(bool enable) { m_shadeIsSigned = enable; } float CNew3D::GetLosValue(int layer) { // we always write to the 'back' buffer, and the software reads from the front // then they get swapped std::lock_guard guard(m_losMutex); return m_losFront->value[layer]; } void CNew3D::TranslateLosPosition(int inX, int inY, int& outX, int& outY) { // remap real3d 496x384 to our new viewport inY = 384 - inY; outX = m_xOffs + int(inX * m_xRatio); outY = m_yOffs + int(inY * m_yRatio); } bool CNew3D::ProcessLos(int priority) { for (const auto &n : m_nodes) { if (n.viewport.priority == priority) { if (n.viewport.losPosX || n.viewport.losPosY) { int losX, losY; TranslateLosPosition(n.viewport.losPosX, n.viewport.losPosY, losX, losY); float depth; glReadPixels(losX, losY, 1, 1, GL_DEPTH_COMPONENT, GL_FLOAT, &depth); if (depth < 0.99f || depth == 1.0f) { // kinda guess work but when depth = 1, haven't drawn anything, when 0.99~ drawing sky somewhere far return false; } depth = 2.0f * depth - 1.0f; float zNear = m_nfPairs[priority].zNear; float zFar = m_nfPairs[priority].zFar; float zVal = 2.0f * zNear * zFar / (zFar + zNear - depth * (zFar - zNear)); m_losBack->value[priority] = zVal; return true; } } } return false; } } // New3D