#include "New3D.h" #include "Vec.h" #include #include #include #include #include #include "R3DFloat.h" #include "Util/BitCast.h" #define MAX_RAM_VERTS 300000 #define MAX_ROM_VERTS 1500000 #define BYTE_TO_FLOAT(B) ((2.0f * (B) + 1.0f) * (float)(1.0/255.0)) namespace New3D { CNew3D::CNew3D(const Util::Config::Node &config, const std::string& gameName) : m_r3dShader(config), m_r3dScrollFog(config), m_gameName(gameName), m_textureBuffer(0), m_vao(0) { m_cullingRAMLo = nullptr; m_cullingRAMHi = nullptr; m_polyRAM = nullptr; m_vrom = nullptr; m_textureRAM = nullptr; m_sunClamp = true; m_shadeIsSigned = true; m_numPolyVerts = 3; m_primType = GL_TRIANGLES; if (config["QuadRendering"].ValueAs()) { m_numPolyVerts = 4; m_primType = GL_LINES_ADJACENCY; } m_r3dShader.LoadShader(); glUseProgram(0); // setup our texture memory glGenTextures(1, &m_textureBuffer); glBindTexture(GL_TEXTURE_2D, m_textureBuffer); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexImage2D(GL_TEXTURE_2D, 0, GL_R16UI, 2048, 2048, 0, GL_RED_INTEGER, GL_UNSIGNED_SHORT, nullptr); // allocate storage // setup up our vertex buffer memory glGenVertexArrays(1, &m_vao); glBindVertexArray(m_vao); m_vbo.Create(GL_ARRAY_BUFFER, GL_DYNAMIC_DRAW, sizeof(FVertex) * (MAX_RAM_VERTS + MAX_ROM_VERTS)); m_vbo.Bind(true); glEnableVertexAttribArray(m_r3dShader.GetVertexAttribPos("inVertex")); glEnableVertexAttribArray(m_r3dShader.GetVertexAttribPos("inNormal")); glEnableVertexAttribArray(m_r3dShader.GetVertexAttribPos("inTexCoord")); glEnableVertexAttribArray(m_r3dShader.GetVertexAttribPos("inColour")); glEnableVertexAttribArray(m_r3dShader.GetVertexAttribPos("inFaceNormal")); glEnableVertexAttribArray(m_r3dShader.GetVertexAttribPos("inFixedShade")); // before draw, specify vertex and index arrays with their offsets, offsetof is maybe evil .. glVertexAttribPointer(m_r3dShader.GetVertexAttribPos("inVertex"), 4, GL_FLOAT, GL_FALSE, sizeof(FVertex), 0); glVertexAttribPointer(m_r3dShader.GetVertexAttribPos("inNormal"), 3, GL_FLOAT, GL_FALSE, sizeof(FVertex), (void*)offsetof(FVertex, normal)); glVertexAttribPointer(m_r3dShader.GetVertexAttribPos("inTexCoord"), 2, GL_FLOAT, GL_FALSE, sizeof(FVertex), (void*)offsetof(FVertex, texcoords)); glVertexAttribPointer(m_r3dShader.GetVertexAttribPos("inColour"), 4, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(FVertex), (void*)offsetof(FVertex, faceColour)); glVertexAttribPointer(m_r3dShader.GetVertexAttribPos("inFaceNormal"), 3, GL_FLOAT, GL_FALSE, sizeof(FVertex), (void*)offsetof(FVertex, faceNormal)); glVertexAttribPointer(m_r3dShader.GetVertexAttribPos("inFixedShade"), 1, GL_FLOAT, GL_FALSE, sizeof(FVertex), (void*)offsetof(FVertex, fixedShade)); glBindVertexArray(0); m_vbo.Bind(false); } CNew3D::~CNew3D() { m_vbo.Destroy(); if (m_vao) { glDeleteVertexArrays(1, &m_vao); m_vao = 0; } if (m_textureBuffer) { glDeleteTextures(1, &m_textureBuffer); m_textureBuffer = 0; } m_r3dShader.UnloadShader(); } void CNew3D::AttachMemory(const UINT32 *cullingRAMLoPtr, const UINT32 *cullingRAMHiPtr, const UINT32 *polyRAMPtr, const UINT32 *vromPtr, const UINT16 *textureRAMPtr) { m_cullingRAMLo = cullingRAMLoPtr; m_cullingRAMHi = cullingRAMHiPtr; m_polyRAM = polyRAMPtr; m_vrom = vromPtr; m_textureRAM = textureRAMPtr; } void CNew3D::SetStepping(int stepping) { m_step = stepping; if ((m_step != 0x10) && (m_step != 0x15) && (m_step != 0x20) && (m_step != 0x21)) { m_step = 0x10; } if (m_step > 0x10) { m_offset = 0; // culling nodes are 10 words m_vertexFactor = (1.0f / 2048.0f); // vertices are in 13.11 format } else { m_offset = 2; // 8 words m_vertexFactor = (1.0f / 128.0f); // 17.7 } } bool CNew3D::Init(unsigned xOffset, unsigned yOffset, unsigned xRes, unsigned yRes, unsigned totalXResParam, unsigned totalYResParam) { // Resolution and offset within physical display area m_xRatio = xRes * (float)(1.0 / 496.0); m_yRatio = yRes * (float)(1.0 / 384.0); m_xOffs = xOffset; m_yOffs = yOffset; m_xRes = xRes; m_yRes = yRes; m_totalXRes = totalXResParam; m_totalYRes = totalYResParam; m_r3dFrameBuffers.DestroyFBO(); // remove any old ones if created m_r3dFrameBuffers.CreateFBO(totalXResParam, totalYResParam); return OKAY; } void CNew3D::UploadTextures(unsigned level, unsigned x, unsigned y, unsigned width, unsigned height) { glBindTexture(GL_TEXTURE_2D, m_textureBuffer); glPixelStorei(GL_UNPACK_ALIGNMENT, 2); for (unsigned i = 0; i < height; i++) { glTexSubImage2D(GL_TEXTURE_2D, 0, x, y + i, width, 1, GL_RED_INTEGER, GL_UNSIGNED_SHORT, m_textureRAM + ((y + i) * 2048) + x); } } void CNew3D::DrawScrollFog() { // this is my best guess at the logic based upon what games are doing // // ocean hunter - every viewport has scroll fog values set. Must start with lowest priority layers as the higher ones sometimes are garbage // scud race - first viewports in priority layer missing scroll values. The latter ones all contain valid scroll values. // daytona - doesn't seem to use scroll fog at all. Will set scroll values for the first viewports, the end ones contain no scroll values. End credits have scroll fog, but constrained to the viewport // vf3 - first viewport only has it set. But set with highest select value ?? Rest of the viewports in priority layer contain a lower select value // sega bassfishing - first viewport in priority 1 sets scroll value. The rest all contain the wrong value + a higher select value .. // spikeout final - 2nd viewport in the priority layer has scroll values set, none of the others do. It also uses the highest select value // I think the basic logic is this: the real3d picks the highest scroll fog value, starting from the lowest priority layer. // If it finds a value for priority layer 0 for example, it then bails out looking for any more. // Fogging seems to be constrained to whatever the viewport is that is set. // Scroll fog needs a density or start value to work, but these can come from another viewport if the fog colour is the same Node* nodePtr = nullptr; for (int i = 0; i < 4 && !nodePtr; i++) { for (auto &n : m_nodes) { if (n.viewport.priority == i) { if (n.viewport.scrollFog > 0.f) { // check to see if we have a higher scroll fog value if (nodePtr) { if (nodePtr->viewport.scrollFog < n.viewport.scrollFog) { nodePtr = &n; } continue; } nodePtr = &n; } } } } if (nodePtr) { // interate nodes to see if any viewports with that fog colour actually set a fog density or start value // if both of these are zero fogging is effectively disabled for (auto& n : m_nodes) { if (nodePtr->viewport.fogParams[0] == n.viewport.fogParams[0] && nodePtr->viewport.fogParams[1] == n.viewport.fogParams[1] && nodePtr->viewport.fogParams[2] == n.viewport.fogParams[2]) { // check to see if we have a fog start or density value if (n.viewport.fogParams[3] > 0.0f || n.viewport.fogParams[4] > 0.0f) { float rgba[4]; auto& vp = nodePtr->viewport; rgba[0] = vp.fogParams[0]; rgba[1] = vp.fogParams[1]; rgba[2] = vp.fogParams[2]; rgba[3] = vp.scrollFog; glViewport(vp.x, vp.y, vp.width, vp.height); m_r3dScrollFog.DrawScrollFog(rgba, vp.scrollAtt, vp.fogParams[6]); } } } } } void CNew3D::DrawAmbientFog() { // logic here is still not totally understood // some games are setting fog ambient which seems to darken the 2d background layer too when scroll fogging is not set // The logic is something like tileGenColour * fogAmbient // If fogAmbient = 1.0 it's a no-op. Lower values darken the image // Does this work with scroll fog? Well technically scroll fog already takes into account the fog ambient as it darkens the fog colour // Let's pick the lowest fog ambient value // Check for fog density or a fog start value, otherwise the effect seems to be disabled (lost world) float fogAmbient = 1.0f; Node* nodePtr = nullptr; for (auto& n : m_nodes) { // check to see if we have a fog density or fog start if (n.viewport.fogParams[3] <= 0.0f && n.viewport.fogParams[4] <= 0.0f) { continue; } if (n.viewport.fogParams[6] < fogAmbient) { nodePtr = &n; fogAmbient = n.viewport.fogParams[6]; } } if (nodePtr) { auto& vp = nodePtr->viewport; float rgba[] = { 0.0f, 0.0f, 0.0f, 1.0f - fogAmbient }; glViewport(vp.x, vp.y, vp.width, vp.height); m_r3dScrollFog.DrawScrollFog(rgba, 0.0f, 1.0f); } } bool CNew3D::RenderScene(int priority, bool renderOverlay, Layer layer) { glActiveTexture(GL_TEXTURE0); glBindTexture(GL_TEXTURE_2D, m_textureBuffer); bool hasOverlay = false; // (high priority polys) for (auto &n : m_nodes) { if (n.viewport.priority != priority || n.models.empty()) { continue; } CalcViewport(&n.viewport, std::abs(m_nfPairs[priority].zNear*0.96f), std::abs(m_nfPairs[priority].zFar*1.05f)); // make planes 5% bigger glViewport(n.viewport.x, n.viewport.y, n.viewport.width, n.viewport.height); m_r3dShader.SetViewportUniforms(&n.viewport); for (auto &m : n.models) { bool matrixLoaded = false; if (m.meshes->empty()) { continue; } for (auto &mesh : *m.meshes) { if (mesh.highPriority) { hasOverlay = true; } if (!mesh.Render(layer, m.alpha)) continue; if (mesh.highPriority != renderOverlay) continue; if (!matrixLoaded) { m_r3dShader.SetModelStates(&m); matrixLoaded = true; // do this here to stop loading matrices we don't need. Ie when rendering non transparent etc } m_r3dShader.SetMeshUniforms(&mesh); glDrawArrays(m_primType, mesh.vboOffset, mesh.vertexCount); } } } return hasOverlay; } bool CNew3D::SkipLayer(int layer) { for (const auto &n : m_nodes) { if (n.viewport.priority == layer) { if (!n.models.empty()) { return false; } } } return true; } void CNew3D::SetRenderStates() { m_vbo.Bind(true); glBindVertexArray(m_vao); m_r3dShader.SetShader(true); glDepthFunc (GL_LEQUAL); glEnable (GL_DEPTH_TEST); glDepthMask (GL_TRUE); glActiveTexture (GL_TEXTURE0); glDisable (GL_CULL_FACE); // we'll emulate this in the shader glEnable (GL_STENCIL_TEST); glStencilOp (GL_KEEP, GL_KEEP, GL_REPLACE); glStencilMask (0xFF); glBlendFunc (GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); glDisable (GL_BLEND); } void CNew3D::DisableRenderStates() { m_vbo.Bind(false); glBindVertexArray(0); m_r3dShader.SetShader(false); glDisable(GL_STENCIL_TEST); } void CNew3D::RenderFrame(void) { for (int i = 0; i < 4; i++) { m_nfPairs[i].zNear = -std::numeric_limits::max(); m_nfPairs[i].zFar = std::numeric_limits::max(); } { std::lock_guard guard(m_losMutex); std::swap(m_losBack, m_losFront); for (int i = 0; i < 4; i++) { m_losBack->value[i] = 0; } } // release any resources from last frame m_polyBufferRam.clear(); // clear dynamic model memory buffer m_nodes.clear(); // memory will grow during the object life time, that's fine, no need to shrink to fit m_modelMat.Release(); // would hope we wouldn't need this but no harm in checking m_nodeAttribs.Reset(); RenderViewport(0x800000); // build model structure m_vbo.Bind(true); m_vbo.BufferSubData(MAX_ROM_VERTS*sizeof(FVertex), m_polyBufferRam.size()*sizeof(FVertex), m_polyBufferRam.data()); // upload all the dynamic data to GPU in one go if (!m_polyBufferRom.empty()) { // sync rom memory with vbo int romBytes = (int)m_polyBufferRom.size() * sizeof(FVertex); int vboBytes = m_vbo.GetSize(); int size = romBytes - vboBytes; if (size) { //check we haven't blown up the memory buffers //we will lose rom models for 1 frame is this happens, not the end of the world, as probably won't ever happen anyway if (m_polyBufferRom.size() >= MAX_ROM_VERTS) { m_polyBufferRom.clear(); m_romMap.clear(); m_vbo.Reset(); } else { m_vbo.AppendData(size, &m_polyBufferRom[vboBytes / sizeof(FVertex)]); } } } m_r3dFrameBuffers.SetFBO(Layer::colour); // colour will draw to all 3 buffers. For regular opaque pixels the transparent layers will be essentially masked glClear(GL_COLOR_BUFFER_BIT); DrawAmbientFog(); DrawScrollFog(); // fog layer if applicable must be drawn here for (int pri = 0; pri <= 3; pri++) { if (SkipLayer(pri)) continue; for (int i = 0; i < 2; i++) { bool renderOverlay = (i == 1); SetRenderStates(); m_r3dFrameBuffers.SetFBO(Layer::colour); glClear(GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); m_r3dShader.DiscardAlpha(true); m_r3dShader.SetLayer(Layer::colour); bool hasOverlay = RenderScene(pri, renderOverlay, Layer::colour); if (!renderOverlay) { ProcessLos(pri); } glDepthFunc(GL_LESS); m_r3dShader.DiscardAlpha(false); m_r3dFrameBuffers.StoreDepth(); m_r3dShader.SetLayer(Layer::trans1); m_r3dFrameBuffers.SetFBO(Layer::trans1); RenderScene(pri, renderOverlay, Layer::trans1); m_r3dFrameBuffers.RestoreDepth(); m_r3dShader.SetLayer(Layer::trans2); m_r3dFrameBuffers.SetFBO(Layer::trans2); RenderScene(pri, renderOverlay, Layer::trans2); DisableRenderStates(); if (!hasOverlay) break; // no high priority polys } } m_r3dFrameBuffers.SetFBO(Layer::none); m_r3dFrameBuffers.Draw(); } void CNew3D::BeginFrame(void) { } void CNew3D::EndFrame(void) { } /****************************************************************************** Real3D Address Translation Functions that interpret word-granular Real3D addresses and return pointers. ******************************************************************************/ // Translates 24-bit culling RAM addresses const UINT32* CNew3D::TranslateCullingAddress(UINT32 addr) { addr &= 0x00FFFFFF; // caller should have done this already if ((addr >= 0x800000) && (addr < 0x840000)) { return &m_cullingRAMHi[addr & 0x3FFFF]; } else if (addr < 0x100000) { return &m_cullingRAMLo[addr]; } return NULL; } // Translates model references const UINT32* CNew3D::TranslateModelAddress(UINT32 modelAddr) { modelAddr &= 0x00FFFFFF; // caller should have done this already if (modelAddr < 0x100000) { return &m_polyRAM[modelAddr]; } else { return &m_vrom[modelAddr]; } } bool CNew3D::DrawModel(UINT32 modelAddr) { const UINT32* modelAddress; bool cached = false; Model* m; modelAddress = TranslateModelAddress(modelAddr); // create a new model to push onto the vector m_nodes.back().models.emplace_back(); // get the last model in the array m = &m_nodes.back().models.back(); if (IsVROMModel(modelAddr) && !IsDynamicModel((UINT32*)modelAddress)) { // try to find meshes in the rom cache m->meshes = m_romMap[modelAddr]; // will create an entry with a null pointer if empty if (m->meshes) { cached = true; } else { m->meshes = std::make_shared>(); m_romMap[modelAddr] = m->meshes; // store meshes in our rom map here } m->dynamic = false; } else { m->meshes = std::make_shared>(); } // copy current model matrix for (int i = 0; i < 16; i++) { m->modelMat[i] = m_modelMat.currentMatrix[i]; } // update texture offsets m->textureOffsetX = m_nodeAttribs.currentTexOffsetX; m->textureOffsetY = m_nodeAttribs.currentTexOffsetY; m->page = m_nodeAttribs.currentPage; m->scale = m_nodeAttribs.currentModelScale; m->alpha = m_nodeAttribs.currentModelAlpha; if (!cached) { CacheModel(m, modelAddress); } if (m_nodeAttribs.currentClipStatus != Clip::INSIDE) { ClipModel(m); // not storing clipped values, only working out the Z range } return true; } /* 0x00: x------- -------- -------- -------- Is UF ref -x------ -------- -------- -------- Is 3D model --x----- -------- -------- -------- Is point ---x---- -------- -------- -------- Is point ref ----x--- -------- -------- -------- Is animation -----x-- -------- -------- -------- Is billboard ------x- -------- -------- -------- Child is billboard -------x -------- -------- -------- Extra child pointer needed -------- -----xxx xxxxxx-- -------- Node ID -------- -------- -------- x------- Reset matrix -------- -------- -------- -x------ Use child pointer -------- -------- -------- --x----- Use sibling pointer -------- -------- -------- ---x---- No matrix -------- -------- -------- ----x--- Indirect child -------- -------- -------- -----x-- Valid color table -------- -------- -------- ------xx Node type(0 = viewport, 1 = root node, 2 = culling node) 0x01, 0x02 only present on Step 1.5+ 0x01: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxx-- Model scale (float32) last 2 bits are control words -------- -------- -------- ------x- Disable culling -------- -------- -------- -------x Valid model scale 0x02 : -------- -------- x------- -------- Texture replace -------- -------- -x------ -------- Switch bank -------- -------- --xxxxxx x------- X offset -------- -------- -------- -xxxxxxx Y offset 0x03 : xxxxxxxx xxxxx--- -------- -------- Color table address 1 -------- -----xxx xxxx---- -------- LOD table pointer -------- -------- ----xxxx xxxxxxxx Node matrix 0x04: Translation X coordinate 0x05: Translation Y coordinate 0x06: Translation Z coordinate 0x07: xxxx---- -------- -------- -------- Color table address 2 -----x-- -------- -------- -------- Sibling table ------x- -------- -------- -------- Point -------x -------- -------- -------- Leaf node -------- xxxxxxxx xxxxxxxx xxxxxxxx Child pointer 0x08: xxxxxxx- -------- -------- -------- Color table address 3 -------x -------- -------- -------- Null sibling -------- xxxxxxxx xxxxxxxx xxxxxxxx Sibling pointer 0x09: xxxxxxxx xxxxxxxx -------- -------- Blend radius -------- -------- xxxxxxxx xxxxxxxx Culling radius */ void CNew3D::DescendCullingNode(UINT32 addr) { enum class NodeType { undefined = -1, viewport = 0, rootNode = 1, cullingNode = 2 }; const UINT32 *node, *lodPtr; UINT32 matrixOffset, child1Ptr, sibling2Ptr; BBox bbox; UINT16 uCullRadius; float fCullRadius; UINT16 uBlendRadius; float fBlendRadius; UINT8 lodTablePointer; NodeType nodeType; bool resetMatrix; if (m_nodeAttribs.StackLimit()) { return; } node = TranslateCullingAddress(addr); if (NULL == node) { return; } // Extract known fields nodeType = (NodeType)(node[0x00] & 3); child1Ptr = node[0x07 - m_offset] & 0x7FFFFFF; // mask colour table bits sibling2Ptr = node[0x08 - m_offset] & 0x1FFFFFF; // mask colour table bits matrixOffset = node[0x03 - m_offset] & 0xFFF; resetMatrix = (node[0x0] & 0x80) > 0; lodTablePointer = (node[0x03 - m_offset] >> 12) & 0x7F; // check our node type if (nodeType == NodeType::viewport) { return; // viewport nodes aren't rendered } // parse siblings if ((node[0x00] & 0x07) != 0x06) { // colour table seems to indicate no siblings if (!(sibling2Ptr & 0x1000000) && sibling2Ptr) { DescendCullingNode(sibling2Ptr); // no need to mask bit, would already be zero } } if ((node[0x00] & 0x04)) { m_colorTableAddr = ((node[0x03 - m_offset] >> 19) << 0) | ((node[0x07 - m_offset] >> 28) << 13) | ((node[0x08 - m_offset] >> 25) << 17); m_colorTableAddr &= 0x000FFFFF; // clamp to 4MB (in words) range } m_nodeAttribs.Push(); // save current attribs if (!m_offset) { // Step 1.5+ if (node[0x01] & 1) m_nodeAttribs.currentModelScale = Util::Uint32AsFloat(node[0x01] & ~3); // mask out control bits if (node[0x01] & 2) m_nodeAttribs.currentDisableCulling = true; // apply texture offsets, else retain current ones if ((node[0x02] & 0x8000)) { int tx = 32 * ((node[0x02] >> 7) & 0x3F); int ty = 32 * (node[0x02] & 0x1F); m_nodeAttribs.currentTexOffsetX = tx; m_nodeAttribs.currentTexOffsetY = ty; m_nodeAttribs.currentPage = (node[0x02] & 0x4000) >> 14; } } // Apply matrix and translation m_modelMat.PushMatrix(); // apply translation vector if (node[0x00] & 0x10) { float x = Util::Uint32AsFloat(node[0x04 - m_offset]); float y = Util::Uint32AsFloat(node[0x05 - m_offset]); float z = Util::Uint32AsFloat(node[0x06 - m_offset]); m_modelMat.Translate(x, y, z); } // multiply matrix, if specified else if (matrixOffset) { MultMatrix(matrixOffset,m_modelMat); } if (resetMatrix) { ResetMatrix(m_modelMat); } uCullRadius = node[9 - m_offset] & 0xFFFF; fCullRadius = R3DFloat::GetFloat16(uCullRadius); uBlendRadius = node[9 - m_offset] >> 16; fBlendRadius = R3DFloat::GetFloat16(uBlendRadius); if (m_nodeAttribs.currentClipStatus != Clip::INSIDE) { if (uCullRadius != R3DFloat::Pro16BitMax) { CalcBox(fCullRadius, bbox); TransformBox(m_modelMat, bbox); m_nodeAttribs.currentClipStatus = ClipBox(bbox, m_planes); if (m_nodeAttribs.currentClipStatus == Clip::INSIDE) { CalcBoxExtents(bbox); } } else { m_nodeAttribs.currentClipStatus = Clip::NOT_SET; } } float LODscale = fBlendRadius * m_nodeAttribs.currentModelScale / std::abs(m_modelMat.currentMatrix[14]); LODFeatureType lodTableEntry = m_LODBlendTable->table[lodTablePointer]; if (m_nodeAttribs.currentDisableCulling) { m_nodeAttribs.currentModelAlpha = 1.0f; } else { float nodeAlpha = lodTableEntry.lod[3].blendFactor * (LODscale - lodTableEntry.lod[3].deleteSize); nodeAlpha = std::clamp(nodeAlpha, 0.0f, 1.0f); m_nodeAttribs.currentModelAlpha *= nodeAlpha; // alpha of each node multiples by the alpha of its parent } if (m_nodeAttribs.currentClipStatus != Clip::OUTSIDE && m_nodeAttribs.currentModelAlpha > 0.0f) { // Descend down first link if ((node[0x00] & 0x08)) // 4-element LOD table { lodPtr = TranslateCullingAddress(child1Ptr); // determine which LOD to use; we do not currently blend between LODs int modelLOD; for (modelLOD = 0; modelLOD < 3; modelLOD++) { if (LODscale >= lodTableEntry.lod[modelLOD].deleteSize) break; } if (NULL != lodPtr) { if ((node[0x03 - m_offset] & 0x20000000)) { DescendCullingNode(lodPtr[modelLOD] & 0xFFFFFF); } else { DrawModel(lodPtr[modelLOD] & 0xFFFFFF); } } } else { DescendNodePtr(child1Ptr); } } m_modelMat.PopMatrix(); // Restore old texture offsets m_nodeAttribs.Pop(); } void CNew3D::DescendNodePtr(UINT32 nodeAddr) { // Ignore null links if ((nodeAddr & 0x00FFFFFF) == 0) { return; } switch ((nodeAddr >> 24) & 0x5) // pointer type encoded in upper 8 bits { case 0x00: DescendCullingNode(nodeAddr & 0xFFFFFF); break; case 0x01: DrawModel(nodeAddr & 0xFFFFFF); break; case 0x04: DescendPointerList(nodeAddr & 0xFFFFFF); break; default: break; } } void CNew3D::DescendPointerList(UINT32 addr) { const UINT32* list; UINT32 nodeAddr; int index; list = TranslateCullingAddress(addr); if (NULL == list) { return; } index = 0; while (true) { if (list[index] & 0x01000000) { break; // empty list } nodeAddr = list[index] & 0x00FFFFFF; // clear upper 8 bits to ensure this is processed as a culling node DescendCullingNode(nodeAddr); if (list[index] & 0x02000000) { break; // list end } index++; } } /****************************************************************************** Matrix Stack ******************************************************************************/ // Macro to generate column-major (OpenGL) index from y,x subscripts #define CMINDEX(y,x) (x*4+y) /* * MultMatrix(): * * Multiplies the matrix stack by the specified Real3D matrix. The matrix * index is a 12-bit number specifying a matrix number relative to the base. * The base matrix MUST be set up before calling this function. */ void CNew3D::MultMatrix(UINT32 matrixOffset, Mat4& mat) { GLfloat m[4*4]; const float *src = &m_matrixBasePtr[matrixOffset * 12]; if (m_matrixBasePtr == NULL) // LA Machineguns return; m[CMINDEX(0, 0)] = src[3]; m[CMINDEX(0, 1)] = src[4]; m[CMINDEX(0, 2)] = src[5]; m[CMINDEX(0, 3)] = src[0]; m[CMINDEX(1, 0)] = src[6]; m[CMINDEX(1, 1)] = src[7]; m[CMINDEX(1, 2)] = src[8]; m[CMINDEX(1, 3)] = src[1]; m[CMINDEX(2, 0)] = src[9]; m[CMINDEX(2, 1)] = src[10]; m[CMINDEX(2, 2)] = src[11]; m[CMINDEX(2, 3)] = src[2]; m[CMINDEX(3, 0)] = 0.0; m[CMINDEX(3, 1)] = 0.0; m[CMINDEX(3, 2)] = 0.0; m[CMINDEX(3, 3)] = 1.0; mat.MultMatrix(m); } /* * InitMatrixStack(): * * Initializes the modelview (model space -> view space) matrix stack and * Real3D coordinate system. These are the last transforms to be applied (and * the first to be defined on the stack) before projection. * * Model 3 games tend to define the following unusual base matrix: * * 0 0 -1 0 * 1 0 0 0 * 0 -1 0 0 * 0 0 0 1 * * When this is multiplied by a column vector, the output is: * * -Z * X * -Y * 1 * * My theory is that the Real3D GPU accepts vectors in Z,X,Y order. The games * store everything as X,Y,Z and perform the translation at the end. The Real3D * also has Y and Z coordinates opposite of the OpenGL convention. This * function inserts a compensating matrix to undo these things. * * NOTE: This function assumes we are in GL_MODELVIEW matrix mode. */ void CNew3D::InitMatrixStack(UINT32 matrixBaseAddr, Mat4& mat) { GLfloat m[4 * 4]; // This matrix converts vectors back from the weird Model 3 Z,X,Y ordering // and also into OpenGL viewspace (-Y,-Z) m[CMINDEX(0, 0)] = 0.0; m[CMINDEX(0, 1)] = 1.0; m[CMINDEX(0, 2)] = 0.0; m[CMINDEX(0, 3)] = 0.0; m[CMINDEX(1, 0)] = 0.0; m[CMINDEX(1, 1)] = 0.0; m[CMINDEX(1, 2)] =-1.0; m[CMINDEX(1, 3)] = 0.0; m[CMINDEX(2, 0)] =-1.0; m[CMINDEX(2, 1)] = 0.0; m[CMINDEX(2, 2)] = 0.0; m[CMINDEX(2, 3)] = 0.0; m[CMINDEX(3, 0)] = 0.0; m[CMINDEX(3, 1)] = 0.0; m[CMINDEX(3, 2)] = 0.0; m[CMINDEX(3, 3)] = 1.0; mat.LoadMatrix(m); // Set matrix base address and apply matrix #0 (coordinate system matrix) m_matrixBasePtr = (float *)TranslateCullingAddress(matrixBaseAddr); MultMatrix(0, mat); } // what this does is to set the rotation back to zero, whilst keeping the position and scale of the current matrix void CNew3D::ResetMatrix(Mat4& mat) { float m[16]; memcpy(m, mat.currentMatrix, 16 * 4); // transpose the top 3x3 of the matrix (this effectively inverts the rotation). When we multiply our new matrix it'll effectively cancel out the rotations. std::swap(m[1], m[4]); std::swap(m[2], m[8]); std::swap(m[6], m[9]); // set position to zero m[12] = 0; m[13] = 0; m[14] = 0; m[15] = 1; // normalise columns, this removes the scaling, otherwise we'll apply it twice float s1 = std::sqrt((m[0] * m[0]) + (m[1] * m[1]) + (m[2] * m[2])); float s2 = std::sqrt((m[4] * m[4]) + (m[5] * m[5]) + (m[6] * m[6])); float s3 = std::sqrt((m[8] * m[8]) + (m[9] * m[9]) + (m[10] * m[10])); m[0] /= s1; m[4] /= s2; m[8] /= s3; m[1] /= s1; m[5] /= s2; m[9] /= s3; m[2] /= s1; m[6] /= s2; m[10] /= s3; mat.MultMatrix(m); } // Draws viewports of the given priority void CNew3D::RenderViewport(UINT32 addr) { static const GLfloat color[8][3] = { // RGB1 color translation { 0.0f, 0.0f, 0.0f }, // off { 0.0f, 0.0f, 1.0f }, // blue { 0.0f, 1.0f, 0.0f }, // green { 0.0f, 1.0f, 1.0f }, // cyan { 1.0f, 0.0f, 0.0f }, // red { 1.0f, 0.0f, 1.0f }, // purple { 1.0f, 1.0f, 0.0f }, // yellow { 1.0f, 1.0f, 1.0f } // white }; if ((addr & 0x00FFFFFF) == 0) { return; } // Translate address and obtain pointer const uint32_t *vpnode = TranslateCullingAddress(addr); if (NULL == vpnode) { return; } if (!(vpnode[0] & 0x20)) { // only if viewport enabled // create node object m_nodes.emplace_back(Node()); m_nodes.back().models.reserve(2048); // create space for models // get pointer to its viewport Viewport *vp = &m_nodes.back().viewport; vp->priority = (vpnode[0] >> 3) & 0x3; vp->select = (vpnode[0] >> 8) & 0x3; vp->number = (vpnode[0] >> 10); m_currentPriority = vp->priority; // Fetch viewport parameters (TO-DO: would rounding make a difference?) vp->vpX = (int)(((vpnode[0x1A] & 0xFFFF) * (float)(1.0 / 16.0)) + 0.5f); // viewport X (12.4 fixed point) vp->vpY = (int)(((vpnode[0x1A] >> 16) * (float)(1.0 / 16.0)) + 0.5f); // viewport Y (12.4) vp->vpWidth = (int)(((vpnode[0x14] & 0xFFFF) * (float)(1.0 / 4.0)) + 0.5f); // width (14.2) vp->vpHeight = (int)(((vpnode[0x14] >> 16) * (float)(1.0 / 4.0)) + 0.5f); // height (14.2) uint32_t matrixBase = vpnode[0x16] & 0xFFFFFF; // matrix base address m_LODBlendTable = (LODBlendTable*)TranslateCullingAddress(vpnode[0x17] & 0xFFFFFF); /* vp->angle_left = -atan2f(Util::Uint32AsFloat(vpnode[12]), Util::Uint32AsFloat(vpnode[13])); // These values work out as the normals for the clipping planes. vp->angle_right = atan2f(Util::Uint32AsFloat(vpnode[16]), -Util::Uint32AsFloat(vpnode[17])); // Sometimes these values (dirt devils,lost world) are totally wrong vp->angle_top = atan2f(Util::Uint32AsFloat(vpnode[14]), Util::Uint32AsFloat(vpnode[15])); // and don't work for the frustum values exactly. vp->angle_bottom = -atan2f(Util::Uint32AsFloat(vpnode[18]), -Util::Uint32AsFloat(vpnode[19])); // Perhaps they are just used for culling and not rendering. */ float cv = Util::Uint32AsFloat(vpnode[0x8]); // 1/(left-right) float cw = Util::Uint32AsFloat(vpnode[0x9]); // 1/(top-bottom) float io = Util::Uint32AsFloat(vpnode[0xa]); // top / bottom (ratio) - ish float jo = Util::Uint32AsFloat(vpnode[0xb]); // left / right (ratio) vp->angle_left = (0.0f - jo) / cv; vp->angle_right = (1.0f - jo) / cv; vp->angle_bottom = -(1.0f - io)/ cw; vp->angle_top = -(0.0f - io)/ cw; // calculate the frustum shape, near/far pair are dummy values CalcViewport(vp, 1.f, 1000.f); // calculate frustum planes CalcFrustumPlanes(m_planes, vp->projectionMatrix); // we need to calc a 'projection matrix' to get the correct frustum planes for clipping // Lighting (note that sun vector points toward sun -- away from vertex) vp->lightingParams[0] = Util::Uint32AsFloat(vpnode[0x05]); // sun X vp->lightingParams[1] = -Util::Uint32AsFloat(vpnode[0x06]); // sun Y (- to convert to ogl cordinate system) vp->lightingParams[2] = -Util::Uint32AsFloat(vpnode[0x04]); // sun Z (- to convert to ogl cordinate system) vp->lightingParams[3] = std::max(0.f, std::min(Util::Uint32AsFloat(vpnode[0x07]), 1.0f)); // sun intensity (clamp to 0-1) vp->lightingParams[4] = (float)((vpnode[0x24] >> 8) & 0xFF) * (float)(1.0 / 255.0); // ambient intensity vp->lightingParams[5] = 0.0f; // reserved vp->sunClamp = m_sunClamp; vp->intensityClamp = (m_step == 0x10); // just step 1.0 ? vp->hardwareStep = m_step; // Spotlight int spotColorIdx = (vpnode[0x20] >> 11) & 7; // spotlight color index int spotFogColorIdx = (vpnode[0x20] >> 8) & 7; // spotlight on fog color index vp->spotEllipse[0] = (float)(INT16)(vpnode[0x1E] & 0xFFFF) * (float)(1.0 / 8.0);// spotlight X position (13.3 fixed point) vp->spotEllipse[1] = (float)(INT16)(vpnode[0x1D] & 0xFFFF) * (float)(1.0 / 8.0);// spotlight Y vp->spotEllipse[2] = (float)((vpnode[0x1E] >> 16) & 0xFFFF); // spotlight X size (16-bit) vp->spotEllipse[3] = (float)((vpnode[0x1D] >> 16) & 0xFFFF); // spotlight Y size vp->spotRange[0] = 1.0f / Util::Uint32AsFloat(vpnode[0x21]); // spotlight start vp->spotRange[1] = Util::Uint32AsFloat(vpnode[0x1F]); // spotlight extent vp->spotColor[0] = color[spotColorIdx][0]; // spotlight color vp->spotColor[1] = color[spotColorIdx][1]; vp->spotColor[2] = color[spotColorIdx][2]; vp->spotFogColor[0] = color[spotFogColorIdx][0]; // spotlight color on fog vp->spotFogColor[1] = color[spotFogColorIdx][1]; vp->spotFogColor[2] = color[spotFogColorIdx][2]; // spotlight is specified in terms of physical resolution vp->spotEllipse[1] = 384.0f - vp->spotEllipse[1]; // flip Y position // Avoid division by zero vp->spotEllipse[2] = std::max(1.0f, vp->spotEllipse[2]); vp->spotEllipse[3] = std::max(1.0f, vp->spotEllipse[3]); vp->spotEllipse[2] = std::roundf(2047.0f / vp->spotEllipse[2]); vp->spotEllipse[3] = std::roundf(2047.0f / vp->spotEllipse[3]); // Scale the spotlight to the OpenGL viewport vp->spotEllipse[0] = vp->spotEllipse[0] * m_xRatio + (float)m_xOffs; vp->spotEllipse[1] = vp->spotEllipse[1] * m_yRatio + (float)m_yOffs; vp->spotEllipse[2] *= m_xRatio; vp->spotEllipse[3] *= m_yRatio; // Line of sight position vp->losPosX = (int)(((vpnode[0x1c] & 0xFFFF) / 16.0f) + 0.5f); // x position vp->losPosY = (int)(((vpnode[0x1c] >> 16) / 16.0f) + 0.5f); // y position 0 starts from the top // Fog vp->fogParams[0] = (float)((vpnode[0x22] >> 16) & 0xFF) * (float)(1.0 / 255.0); // fog color R vp->fogParams[1] = (float)((vpnode[0x22] >> 8) & 0xFF) * (float)(1.0 / 255.0); // fog color G vp->fogParams[2] = (float)((vpnode[0x22] >> 0) & 0xFF) * (float)(1.0 / 255.0); // fog color B vp->fogParams[3] = std::abs(Util::Uint32AsFloat(vpnode[0x23])); // fog density - ocean hunter uses negative values, but looks the same vp->fogParams[4] = (float)(INT16)(vpnode[0x25] & 0xFFFF)* (float)(1.0 / 255.0); // fog start // Avoid Infinite and NaN values for Star Wars Trilogy if (std::isinf(vp->fogParams[3]) || std::isnan(vp->fogParams[3])) { for (int i = 0; i < 7; i++) vp->fogParams[i] = 0.0f; } vp->fogParams[5] = (float)((vpnode[0x24] >> 16) & 0xFF) * (float)(1.0 / 255.0); // fog attenuation vp->fogParams[6] = (float)((vpnode[0x25] >> 16) & 0xFF) * (float)(1.0 / 255.0); // fog ambient vp->scrollFog = (float)(vpnode[0x20] & 0xFF) * (float)(1.0 / 255.0); // scroll fog vp->scrollAtt = (float)(vpnode[0x24] & 0xFF) * (float)(1.0 / 255.0); // scroll attenuation // Clear texture offsets before proceeding m_nodeAttribs.Reset(); // Set up coordinate system and base matrix InitMatrixStack(matrixBase, m_modelMat); // Descend down the node link. Need to start with a culling node because that defines our culling radius. auto childptr = vpnode[0x02]; if (((childptr >> 24) & 0x5) == 0) { DescendNodePtr(vpnode[0x02]); } } // render next viewport if (vpnode[0x01] != 0x01000000) { RenderViewport(vpnode[0x01]); } } void CNew3D::CopyVertexData(const R3DPoly& r3dPoly, std::vector& vertexArray) { // both lemans 24 and dirt devils are rendering some totally transparent polys as the first object in each viewport // in dirt devils it's parallel to the camera so is completely invisible, but breaks our depth calculation // in lemans 24 its a sort of diamond shape, but never leaves a hole in the transparent geometry so must be being skipped by the h/w if (r3dPoly.faceColour[3] == 0) { return; } if (m_numPolyVerts==4) { if (r3dPoly.number == 4) { vertexArray.emplace_back(r3dPoly, 0); // construct directly inside container without copy vertexArray.emplace_back(r3dPoly, 1); vertexArray.emplace_back(r3dPoly, 2); vertexArray.emplace_back(r3dPoly, 3); // check for identical points (ie forced triangle) and replace with average point // if we don't do this our quad code falls apart FVertex* v = (&vertexArray.back()) - 3; for (int i = 0; i < 4; i++) { int next1 = (i + 1) % 4; int next2 = (i + 2) % 4; if (FVertex::Equal(v[i], v[next1])) { FVertex::Average(v[next1], v[next2], v[next1]); break; } } } else { vertexArray.emplace_back(r3dPoly, 0); vertexArray.emplace_back(r3dPoly, 1); vertexArray.emplace_back(r3dPoly, 2); vertexArray.emplace_back(r3dPoly, 0, 2); // last point is an average of 0 and 2 } } else { vertexArray.emplace_back(r3dPoly, 0); vertexArray.emplace_back(r3dPoly, 1); vertexArray.emplace_back(r3dPoly, 2); if (r3dPoly.number == 4) { vertexArray.emplace_back(r3dPoly, 0); vertexArray.emplace_back(r3dPoly, 2); vertexArray.emplace_back(r3dPoly, 3); } } } void CNew3D::GetCoordinates(int width, int height, UINT16 uIn, UINT16 vIn, float uvScale, float& uOut, float& vOut) { uOut = (uIn * uvScale) / width; vOut = (vIn * uvScale) / height; } int CNew3D::GetTexFormat(int originalFormat, bool contour) { if (!contour) { return originalFormat; // the same } switch (originalFormat) { case 1: case 2: case 3: case 4: return originalFormat + 7; // these formats are identical to 1-4, except they lose the 4 bit alpha part when contour is enabled default: return originalFormat; } } void CNew3D::SetMeshValues(SortingMesh *currentMesh, PolyHeader &ph) { //copy attributes currentMesh->textured = ph.TexEnabled(); currentMesh->alphaTest = ph.AlphaTest(); currentMesh->textureAlpha = ph.TextureAlpha(); currentMesh->polyAlpha = ph.PolyAlpha(); currentMesh->lighting = ph.LightEnabled(); currentMesh->fixedShading = ph.FixedShading() && !ph.SmoothShading(); currentMesh->highPriority = ph.HighPriority(); currentMesh->transLSelect = ph.TranslucencyPatternSelect(); currentMesh->layered = ph.Layered(); currentMesh->specular = ph.SpecularEnabled(); currentMesh->shininess = ph.Shininess(); currentMesh->specularValue = ph.SpecularValue(); currentMesh->fogIntensity = ph.LightModifier(); currentMesh->translatorMap = ph.TranslatorMap(); currentMesh->noLosReturn = ph.NoLosReturn(); if (currentMesh->textured) { currentMesh->format = GetTexFormat(ph.TexFormat(), ph.AlphaTest()); if (currentMesh->format == 7) { currentMesh->alphaTest = false; // alpha test is a 1 bit test, this format needs a lower threshold, since it has 16 levels of transparency } currentMesh->x = ph.X(); currentMesh->y = ph.Y(); currentMesh->width = ph.TexWidth(); currentMesh->height = ph.TexHeight(); currentMesh->microTexture = ph.MicroTexture(); currentMesh->inverted = ph.TranslatorMapOffset() == 2; { bool smoothU = ph.TexSmoothU(); bool smoothV = ph.TexSmoothV(); if (ph.AlphaTest()) { smoothU = false; // smooth wrap makes no sense for alpha tested polys with pixel dilate smoothV = false; } if (ph.TexUMirror()) { if (smoothU) currentMesh->wrapModeU = Mesh::TexWrapMode::mirror; else currentMesh->wrapModeU = Mesh::TexWrapMode::mirrorClamp; } else { if (smoothU) currentMesh->wrapModeU = Mesh::TexWrapMode::repeat; else currentMesh->wrapModeU = Mesh::TexWrapMode::repeatClamp; } if (ph.TexVMirror()) { if (smoothV) currentMesh->wrapModeV = Mesh::TexWrapMode::mirror; else currentMesh->wrapModeV = Mesh::TexWrapMode::mirrorClamp; } else { if (smoothV) currentMesh->wrapModeV = Mesh::TexWrapMode::repeat; else currentMesh->wrapModeV = Mesh::TexWrapMode::repeatClamp; } } if (currentMesh->microTexture) { static const float microTexScale[] = { 2.f, 4.f, 16.f, 256.f }; currentMesh->microTextureID = ph.MicroTextureID(); currentMesh->microTextureScale = microTexScale[ph.MicroTextureMinLOD()]; } } } void CNew3D::CacheModel(Model *m, const UINT32 *data) { if (data == NULL) return; UINT16 texCoords[4][2]; PolyHeader ph; UINT64 lastHash = -1; SortingMesh* currentMesh = nullptr; std::unordered_map sMap; ph = data; int numTriangles = ph.NumTrianglesTotal(); // Cache all polygons do { R3DPoly p; // current polygon float uvScale; if (ph.header[6] == 0) { break; } // create a hash value based on poly attributes -todo add more attributes auto hash = ph.Hash(); if (hash != lastHash) { if (sMap.count(hash) == 0) { currentMesh = &sMap.insert({hash, SortingMesh()}).first->second; //make space for our vertices currentMesh->verts.reserve(numTriangles * 3); //set mesh values SetMeshValues(currentMesh, ph); } else currentMesh = &sMap[hash]; } // Obtain basic polygon parameters p.number = ph.NumVerts(); uvScale = ph.UVScale(); ph.FaceNormal(p.faceNormal); // Fetch reused vertices according to bitfield, then new verts int j = 0; for (int i = 0; i < 4; i++) // up to 4 reused vertices { if (ph.SharedVertex(i)) { p.v[j] = m_prev[i]; texCoords[j][0] = m_prevTexCoords[i][0]; texCoords[j][1] = m_prevTexCoords[i][1]; //check if we need to recalc tex coords - will only happen if tex tiles are different + sharing vertices if (hash != lastHash) { if (currentMesh->textured) { GetCoordinates(currentMesh->width, currentMesh->height, texCoords[j][0], texCoords[j][1], uvScale, p.v[j].texcoords[0], p.v[j].texcoords[1]); } } j++; } } lastHash = hash; // copy face attributes if (!ph.PolyColor()) { int colorIdx = ph.ColorIndex(); p.faceColour[2] = (m_polyRAM[m_colorTableAddr + colorIdx] & 0xFF); p.faceColour[1] = ((m_polyRAM[m_colorTableAddr + colorIdx] >> 8) & 0xFF); p.faceColour[0] = ((m_polyRAM[m_colorTableAddr + colorIdx] >> 16) & 0xFF); } else { p.faceColour[0] = ((ph.header[4] >> 24)); p.faceColour[1] = ((ph.header[4] >> 16) & 0xFF); p.faceColour[2] = ((ph.header[4] >> 8) & 0xFF); } p.faceColour[3] = ph.Transparency(); if (ph.Discard1() && !ph.Discard2()) { p.faceColour[3] /= 2; } // if we have flat shading, we can't re-use normals from shared vertices for (int i = 0; i < p.number && !ph.SmoothShading(); i++) { p.v[i].normal[0] = p.faceNormal[0]; p.v[i].normal[1] = p.faceNormal[1]; p.v[i].normal[2] = p.faceNormal[2]; } UINT32* vData = ph.StartOfData(); // vertex data starts here // remaining vertices are new and defined here for (; j < p.number; j++) { // Fetch vertices UINT32 ix = vData[0]; UINT32 iy = vData[1]; UINT32 iz = vData[2]; UINT32 it = vData[3]; // Decode vertices p.v[j].pos[0] = (((INT32)ix) >> 8) * m_vertexFactor; p.v[j].pos[1] = (((INT32)iy) >> 8) * m_vertexFactor; p.v[j].pos[2] = (((INT32)iz) >> 8) * m_vertexFactor; p.v[j].pos[3] = 1.0f; // Per vertex normals if (ph.SmoothShading()) { p.v[j].normal[0] = BYTE_TO_FLOAT((INT8)(ix & 0xFF)); p.v[j].normal[1] = BYTE_TO_FLOAT((INT8)(iy & 0xFF)); p.v[j].normal[2] = BYTE_TO_FLOAT((INT8)(iz & 0xFF)); } if (ph.FixedShading() && !ph.SmoothShading()) { // fixed shading seems to be disabled if actual normals are set //========== float shade; //========== if (!m_shadeIsSigned) { shade = (ix & 0xFF) * (float)(1.0 / 255.0); } else { shade = BYTE_TO_FLOAT((INT8)(ix & 0xFF)); } p.v[j].fixedShade = shade; } float texU = 0; float texV = 0; // tex coords if (currentMesh->textured) { GetCoordinates(currentMesh->width, currentMesh->height, (UINT16)(it >> 16), (UINT16)(it & 0xFFFF), uvScale, texU, texV); } p.v[j].texcoords[0] = texU; p.v[j].texcoords[1] = texV; //cache un-normalised tex coordinates texCoords[j][0] = (UINT16)(it >> 16); texCoords[j][1] = (UINT16)(it & 0xFFFF); vData += 4; } // check if we need to double up vertices for two sided lighting if (ph.DoubleSided() && !ph.Discard()) { R3DPoly tempP = p; // flip normals V3::inverse(tempP.faceNormal); for (int i2 = 0; i2 < tempP.number; i2++) { V3::inverse(tempP.v[i2].normal); } CopyVertexData(tempP, currentMesh->verts); } // Copy this polygon into the model buffer if (!ph.Discard()) { CopyVertexData(p, currentMesh->verts); } // Copy current vertices into previous vertex array for (int i = 0; i < 4; i++) { m_prev[i] = p.v[i]; m_prevTexCoords[i][0] = texCoords[i][0]; m_prevTexCoords[i][1] = texCoords[i][1]; } } while (ph.NextPoly()); //sorted the data, now copy to main data structures // we know how many meshes we have to reserve appropriate space m->meshes->reserve(sMap.size()); for (auto& it : sMap) { if (m->dynamic) { // calculate VBO values for current mesh it.second.vboOffset = (int)m_polyBufferRam.size() + MAX_ROM_VERTS; it.second.vertexCount = (int)it.second.verts.size(); // copy poly data to main buffer m_polyBufferRam.insert(m_polyBufferRam.end(), it.second.verts.begin(), it.second.verts.end()); } else { // calculate VBO values for current mesh it.second.vboOffset = (int)m_polyBufferRom.size(); it.second.vertexCount = (int)it.second.verts.size(); // copy poly data to main buffer m_polyBufferRom.insert(m_polyBufferRom.end(), it.second.verts.begin(), it.second.verts.end()); } //copy the temp mesh into the model structure //this will lose the associated vertex data, which is now copied to the main buffer anyway m->meshes->push_back(it.second); } } bool CNew3D::IsDynamicModel(UINT32 *data) { if (data == NULL) { return false; } PolyHeader p(data); do { if ((p.header[1] & 2) == 0) { // model has rgb colour palette return true; } if (p.header[6] == 0) { break; } } while (p.NextPoly()); return false; } bool CNew3D::IsVROMModel(UINT32 modelAddr) { return modelAddr >= 0x100000; } void CNew3D::CalcFrustumPlanes(Plane p[5], const float* matrix) { // Left Plane p[0].a = matrix[3] + matrix[0]; p[0].b = matrix[7] + matrix[4]; p[0].c = matrix[11] + matrix[8]; p[0].d = matrix[15] + matrix[12]; p[0].Normalise(); // Right Plane p[1].a = matrix[3] - matrix[0]; p[1].b = matrix[7] - matrix[4]; p[1].c = matrix[11] - matrix[8]; p[1].d = matrix[15] - matrix[12]; p[1].Normalise(); // Bottom Plane p[2].a = matrix[3] + matrix[1]; p[2].b = matrix[7] + matrix[5]; p[2].c = matrix[11] + matrix[9]; p[2].d = matrix[15] + matrix[13]; p[2].Normalise(); // Top Plane p[3].a = matrix[3] - matrix[1]; p[3].b = matrix[7] - matrix[5]; p[3].c = matrix[11] - matrix[9]; p[3].d = matrix[15] - matrix[13]; p[3].Normalise(); // Front Plane p[4].a = 0.f; p[4].b = 0.f; p[4].c = -1.f; p[4].d = 0.f; } void CNew3D::CalcBox(float distance, BBox& box) { //bottom left front box.points[0][0] = -distance; box.points[0][1] = -distance; box.points[0][2] = distance; box.points[0][3] = 1.f; //bottom left back box.points[1][0] = -distance; box.points[1][1] = -distance; box.points[1][2] = -distance; box.points[1][3] = 1.f; //bottom right back box.points[2][0] = distance; box.points[2][1] = -distance; box.points[2][2] = -distance; box.points[2][3] = 1.f; //bottom right front box.points[3][0] = distance; box.points[3][1] = -distance; box.points[3][2] = distance; box.points[3][3] = 1.f; //top left front box.points[4][0] = -distance; box.points[4][1] = distance; box.points[4][2] = distance; box.points[4][3] = 1.f; //top left back box.points[5][0] = -distance; box.points[5][1] = distance; box.points[5][2] = -distance; box.points[5][3] = 1.f; //top right back box.points[6][0] = distance; box.points[6][1] = distance; box.points[6][2] = -distance; box.points[6][3] = 1.f; //top right front box.points[7][0] = distance; box.points[7][1] = distance; box.points[7][2] = distance; box.points[7][3] = 1.f; } void CNew3D::MultVec(const float matrix[16], const float in[4], float out[4]) { for (int i = 0; i < 4; i++) { out[i] = in[0] * matrix[0 * 4 + i] + in[1] * matrix[1 * 4 + i] + in[2] * matrix[2 * 4 + i] + in[3] * matrix[3 * 4 + i]; } } void CNew3D::TransformBox(const float *m, BBox& box) { for (int i = 0; i < 8; i++) { float v[4]; MultVec(m, box.points[i], v); box.points[i][0] = v[0]; box.points[i][1] = v[1]; box.points[i][2] = v[2]; } } Clip CNew3D::ClipBox(const BBox& box, Plane planes[5]) { int count = 0; for (int i = 0; i < 8; i++) { int temp = 0; for (int j = 0; j < 5; j++) { if (planes[j].DistanceToPoint(box.points[i]) >= 0.f) { temp++; } } if (temp == 5) count++; // point is inside all 4 frustum planes } if (count == 8) return Clip::INSIDE; if (count > 0) return Clip::INTERCEPT; //if we got here all points are outside of the view frustum //check for all points being side same of any plane, means box outside of view for (int i = 0; i < 5; i++) { int temp = 0; for (int j = 0; j < 8; j++) { if (planes[i].DistanceToPoint(box.points[j]) >= 0.f) { temp++; } } if (temp == 0) { return Clip::OUTSIDE; } } //if we got here, box is traversing view frustum return Clip::INTERCEPT; } void CNew3D::CalcBoxExtents(const BBox& box) { for (int i = 0; i < 8; i++) { if (box.points[i][2] < 0.f) { m_nfPairs[m_currentPriority].zNear = std::max(box.points[i][2], m_nfPairs[m_currentPriority].zNear); m_nfPairs[m_currentPriority].zFar = std::min(box.points[i][2], m_nfPairs[m_currentPriority].zFar); } } } void CNew3D::ClipPolygon(ClipPoly& clipPoly, Plane planes[5]) { //============ ClipPoly temp; ClipPoly *in; ClipPoly *out; //============ in = &clipPoly; out = &temp; for (int i = 0; i < 4; i++) { //================= bool currentIn; float currentDot; //================= currentDot = planes[i].DotProduct(in->list[0].pos); currentIn = (currentDot + planes[i].d) >= 0.f; out->count = 0; for (int j = 0; j < in->count; j++) { if (currentIn) { out->list[out->count] = in->list[j]; out->count++; } int nextIndex = j + 1; if (nextIndex >= in->count) { nextIndex = 0; } float nextDot = planes[i].DotProduct(in->list[nextIndex].pos); bool nextIn = (nextDot + planes[i].d) >= 0.f; // we have an intersection if (currentIn != nextIn) { float u = (currentDot + planes[i].d) / (currentDot - nextDot); const float* p1 = in->list[j].pos; const float* p2 = in->list[nextIndex].pos; out->list[out->count].pos[0] = p1[0] + ((p2[0] - p1[0]) * u); out->list[out->count].pos[1] = p1[1] + ((p2[1] - p1[1]) * u); out->list[out->count].pos[2] = p1[2] + ((p2[2] - p1[2]) * u); out->count++; } currentDot = nextDot; currentIn = nextIn; } std::swap(in, out); } } void CNew3D::ClipModel(const Model *m) { //=============================== ClipPoly clipPoly; std::vector* vertices; int offset; //=============================== if (m->dynamic) { vertices = &m_polyBufferRam; offset = MAX_ROM_VERTS; } else { vertices = &m_polyBufferRom; offset = 0; } for (const auto &mesh : *m->meshes) { int start = mesh.vboOffset - offset; for (int i = 0; i < mesh.vertexCount; i += m_numPolyVerts) { // inc to next poly for (int j = 0; j < m_numPolyVerts; j++) { MultVec(m->modelMat, (*vertices)[start + i + j].pos, clipPoly.list[j].pos); // copy all 3 of 4 our transformed vertices into our clip poly struct } clipPoly.count = m_numPolyVerts; ClipPolygon(clipPoly, m_planes); for (int j = 0; j < clipPoly.count; j++) { if (clipPoly.list[j].pos[2] < 0.f) { m_nfPairs[m_currentPriority].zNear = std::max(clipPoly.list[j].pos[2], m_nfPairs[m_currentPriority].zNear); m_nfPairs[m_currentPriority].zFar = std::min(clipPoly.list[j].pos[2], m_nfPairs[m_currentPriority].zFar); } } } } } void CNew3D::CalcViewport(Viewport* vp, float near, float far) { if (far > 1e30f) { far = near * 1000000.f; // fix for ocean hunter which passes some FLT_MAX for a few matrices. HW must have some safe guard for these } if (near < far / 1000000.f) { near = far / 1000000.f; // if we get really close to zero somehow, we will have almost no depth precision } float l = near * vp->angle_left; // we need to calc the shape of the projection frustum for culling float r = near * vp->angle_right; float t = near * vp->angle_top; float b = near * vp->angle_bottom; vp->projectionMatrix.LoadIdentity(); // reset matrix if ((vp->vpX == 0) && (vp->vpWidth >= 495) && (vp->vpY == 0) && (vp->vpHeight >= 383)) { /* * Compute aspect ratio correction factor. "Window" refers to the full GL * viewport (i.e., totalXRes x totalYRes). "Viewable area" is the effective * Model 3 screen (xRes x yRes). In non-wide-screen, non-stretch mode, this * is intended to replicate the 496x384 display and may in general be * smaller than the window. The rest of the window appears to have a * border, which is created by a scissor box. * * In wide-screen mode, we want to expand the frustum horizontally to fill * the window. We want the aspect ratio to be correct. To accomplish this, * the viewable area is set *the same* as in non-wide-screen mode (e.g., * often smaller than the window) but glScissor() is set by the OSD layer's * screen setup code to reveal the entire window. * * In stretch mode, the window and viewable area are both set the same, * which means there will be no aspect ratio correction and the display * will stretch to fill the entire window while keeping the view frustum * the same as a 496x384 Model 3 display. The display will be distorted. */ float windowAR = (float)m_totalXRes / (float)m_totalYRes; float viewableAreaAR = (float)m_xRes / (float)m_yRes; // Will expand horizontal frustum planes only in non-stretch mode (wide- // screen and non-wide-screen modes have identical resolution parameters // and only their scissor box differs) float correction = windowAR / viewableAreaAR; vp->x = 0; vp->y = m_yOffs + (int)((float)(384 - (vp->vpY + vp->vpHeight))*m_yRatio); vp->width = m_totalXRes; vp->height = (int)((float)vp->vpHeight*m_yRatio); vp->projectionMatrix.Frustum(l*correction, r*correction, b, t, near, far); } else { vp->x = m_xOffs + (int)((float)vp->vpX*m_xRatio); vp->y = m_yOffs + (int)((float)(384 - (vp->vpY + vp->vpHeight))*m_yRatio); vp->width = (int)((float)vp->vpWidth*m_xRatio); vp->height = (int)((float)vp->vpHeight*m_yRatio); vp->projectionMatrix.Frustum(l, r, b, t, near, far); } } void CNew3D::SetSunClamp(bool enable) { m_sunClamp = enable; } void CNew3D::SetSignedShade(bool enable) { m_shadeIsSigned = enable; } float CNew3D::GetLosValue(int layer) { // we always write to the 'back' buffer, and the software reads from the front // then they get swapped std::lock_guard guard(m_losMutex); return m_losFront->value[layer]; } void CNew3D::TranslateLosPosition(int inX, int inY, int& outX, int& outY) { // remap real3d 496x384 to our new viewport inY = 384 - inY; outX = m_xOffs + int(inX * m_xRatio); outY = m_yOffs + int(inY * m_yRatio); } bool CNew3D::ProcessLos(int priority) { for (const auto &n : m_nodes) { if (n.viewport.priority == priority) { if (n.viewport.losPosX || n.viewport.losPosY) { int losX, losY; TranslateLosPosition(n.viewport.losPosX, n.viewport.losPosY, losX, losY); float depth; glReadPixels(losX, losY, 1, 1, GL_DEPTH_COMPONENT, GL_FLOAT, &depth); depth = 2.0f * depth - 1.0f; float zNear = m_nfPairs[priority].zNear; float zFar = m_nfPairs[priority].zFar; float zVal = 2.0f * zNear * zFar / (zFar + zNear - depth * (zFar - zNear)); // real3d test program indicates that return values are 1/zVal zVal = 1.0f / zVal; GLubyte stencilVal; glReadPixels(losX, losY, 1, 1, GL_STENCIL_INDEX, GL_UNSIGNED_BYTE, &stencilVal); // if the stencil val is zero that means we've hit sky or whatever, if it hits a 1 we've hit geometry // the real3d returns 1 in the top bit of the float if the line of sight test passes (ie doesn't hit geometry) auto zValP = reinterpret_cast(&zVal); // this is legal in c++, casting to int technically isn't if (stencilVal == 0) { zValP[0] |= 1; // set first bit to 1 } else { zValP[0] &= 0xFE; // set first bit to zero } m_losBack->value[priority] = zVal; return true; } } } return false; } } // New3D