Supermodel/Src/Graphics/New3D/New3D.cpp
gm-matthew ac53101214 Implement LOD blending
If two translucent polygons with opposing patterns overlap the result is always opaque
Also the LOD scale calculation depends on Euclidean distance of x, y and z, not just z
2023-11-17 15:13:27 +00:00

1921 lines
58 KiB
C++

#include "New3D.h"
#include "Vec.h"
#include <cmath>
#include <algorithm>
#include <limits>
#include <cstring>
#include <unordered_map>
#include "R3DFloat.h"
#include "Util/BitCast.h"
#define MAX_RAM_VERTS 300000
#define MAX_ROM_VERTS 1500000
#define BYTE_TO_FLOAT(B) ((2.0f * (B) + 1.0f) * (float)(1.0/255.0))
namespace New3D {
CNew3D::CNew3D(const Util::Config::Node &config, const std::string& gameName) :
m_r3dShader(config),
m_r3dScrollFog(config),
m_gameName(gameName),
m_textureBuffer(0),
m_vao(0)
{
m_cullingRAMLo = nullptr;
m_cullingRAMHi = nullptr;
m_polyRAM = nullptr;
m_vrom = nullptr;
m_textureRAM = nullptr;
m_sunClamp = true;
m_shadeIsSigned = true;
m_numPolyVerts = 3;
m_primType = GL_TRIANGLES;
if (config["QuadRendering"].ValueAs<bool>()) {
m_numPolyVerts = 4;
m_primType = GL_LINES_ADJACENCY;
}
m_r3dShader.LoadShader();
glUseProgram(0);
// setup our texture memory
glGenTextures(1, &m_textureBuffer);
glBindTexture(GL_TEXTURE_2D, m_textureBuffer);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexImage2D(GL_TEXTURE_2D, 0, GL_R16UI, 2048, 2048, 0, GL_RED_INTEGER, GL_UNSIGNED_SHORT, nullptr); // allocate storage
// setup up our vertex buffer memory
glGenVertexArrays(1, &m_vao);
glBindVertexArray(m_vao);
m_vbo.Create(GL_ARRAY_BUFFER, GL_DYNAMIC_DRAW, sizeof(FVertex) * (MAX_RAM_VERTS + MAX_ROM_VERTS));
m_vbo.Bind(true);
glEnableVertexAttribArray(m_r3dShader.GetVertexAttribPos("inVertex"));
glEnableVertexAttribArray(m_r3dShader.GetVertexAttribPos("inNormal"));
glEnableVertexAttribArray(m_r3dShader.GetVertexAttribPos("inTexCoord"));
glEnableVertexAttribArray(m_r3dShader.GetVertexAttribPos("inColour"));
glEnableVertexAttribArray(m_r3dShader.GetVertexAttribPos("inFaceNormal"));
glEnableVertexAttribArray(m_r3dShader.GetVertexAttribPos("inFixedShade"));
// before draw, specify vertex and index arrays with their offsets, offsetof is maybe evil ..
glVertexAttribPointer(m_r3dShader.GetVertexAttribPos("inVertex"), 4, GL_FLOAT, GL_FALSE, sizeof(FVertex), 0);
glVertexAttribPointer(m_r3dShader.GetVertexAttribPos("inNormal"), 3, GL_FLOAT, GL_FALSE, sizeof(FVertex), (void*)offsetof(FVertex, normal));
glVertexAttribPointer(m_r3dShader.GetVertexAttribPos("inTexCoord"), 2, GL_FLOAT, GL_FALSE, sizeof(FVertex), (void*)offsetof(FVertex, texcoords));
glVertexAttribPointer(m_r3dShader.GetVertexAttribPos("inColour"), 4, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(FVertex), (void*)offsetof(FVertex, faceColour));
glVertexAttribPointer(m_r3dShader.GetVertexAttribPos("inFaceNormal"), 3, GL_FLOAT, GL_FALSE, sizeof(FVertex), (void*)offsetof(FVertex, faceNormal));
glVertexAttribPointer(m_r3dShader.GetVertexAttribPos("inFixedShade"), 1, GL_FLOAT, GL_FALSE, sizeof(FVertex), (void*)offsetof(FVertex, fixedShade));
glBindVertexArray(0);
m_vbo.Bind(false);
}
CNew3D::~CNew3D()
{
m_vbo.Destroy();
if (m_vao) {
glDeleteVertexArrays(1, &m_vao);
m_vao = 0;
}
if (m_textureBuffer) {
glDeleteTextures(1, &m_textureBuffer);
m_textureBuffer = 0;
}
m_r3dShader.UnloadShader();
}
void CNew3D::AttachMemory(const UINT32 *cullingRAMLoPtr, const UINT32 *cullingRAMHiPtr, const UINT32 *polyRAMPtr, const UINT32 *vromPtr, const UINT16 *textureRAMPtr)
{
m_cullingRAMLo = cullingRAMLoPtr;
m_cullingRAMHi = cullingRAMHiPtr;
m_polyRAM = polyRAMPtr;
m_vrom = vromPtr;
m_textureRAM = textureRAMPtr;
}
void CNew3D::SetStepping(int stepping)
{
m_step = stepping;
if ((m_step != 0x10) && (m_step != 0x15) && (m_step != 0x20) && (m_step != 0x21)) {
m_step = 0x10;
}
if (m_step > 0x10) {
m_offset = 0; // culling nodes are 10 words
m_vertexFactor = (1.0f / 2048.0f); // vertices are in 13.11 format
}
else {
m_offset = 2; // 8 words
m_vertexFactor = (1.0f / 128.0f); // 17.7
}
}
bool CNew3D::Init(unsigned xOffset, unsigned yOffset, unsigned xRes, unsigned yRes, unsigned totalXResParam, unsigned totalYResParam)
{
// Resolution and offset within physical display area
m_xRatio = xRes * (float)(1.0 / 496.0);
m_yRatio = yRes * (float)(1.0 / 384.0);
m_xOffs = xOffset;
m_yOffs = yOffset;
m_xRes = xRes;
m_yRes = yRes;
m_totalXRes = totalXResParam;
m_totalYRes = totalYResParam;
m_r3dFrameBuffers.DestroyFBO(); // remove any old ones if created
m_r3dFrameBuffers.CreateFBO(totalXResParam, totalYResParam);
return OKAY;
}
void CNew3D::UploadTextures(unsigned level, unsigned x, unsigned y, unsigned width, unsigned height)
{
glBindTexture(GL_TEXTURE_2D, m_textureBuffer);
glPixelStorei(GL_UNPACK_ALIGNMENT, 2);
for (unsigned i = 0; i < height; i++) {
glTexSubImage2D(GL_TEXTURE_2D, 0, x, y + i, width, 1, GL_RED_INTEGER, GL_UNSIGNED_SHORT, m_textureRAM + ((y + i) * 2048) + x);
}
}
void CNew3D::DrawScrollFog()
{
// this is my best guess at the logic based upon what games are doing
//
// ocean hunter - every viewport has scroll fog values set. Must start with lowest priority layers as the higher ones sometimes are garbage
// scud race - first viewports in priority layer missing scroll values. The latter ones all contain valid scroll values.
// daytona - doesn't seem to use scroll fog at all. Will set scroll values for the first viewports, the end ones contain no scroll values. End credits have scroll fog, but constrained to the viewport
// vf3 - first viewport only has it set. But set with highest select value ?? Rest of the viewports in priority layer contain a lower select value
// sega bassfishing - first viewport in priority 1 sets scroll value. The rest all contain the wrong value + a higher select value ..
// spikeout final - 2nd viewport in the priority layer has scroll values set, none of the others do. It also uses the highest select value
// I think the basic logic is this: the real3d picks the highest scroll fog value, starting from the lowest priority layer.
// If it finds a value for priority layer 0 for example, it then bails out looking for any more.
// Fogging seems to be constrained to whatever the viewport is that is set.
// Scroll fog needs a density or start value to work, but these can come from another viewport if the fog colour is the same
Node* nodePtr = nullptr;
for (int i = 0; i < 4 && !nodePtr; i++) {
for (auto &n : m_nodes) {
if (n.viewport.priority == i) {
if (n.viewport.scrollFog > 0.f) {
// check to see if we have a higher scroll fog value
if (nodePtr) {
if (nodePtr->viewport.scrollFog < n.viewport.scrollFog) {
nodePtr = &n;
}
continue;
}
nodePtr = &n;
}
}
}
}
if (nodePtr) {
// interate nodes to see if any viewports with that fog colour actually set a fog density or start value
// if both of these are zero fogging is effectively disabled
for (auto& n : m_nodes) {
if (nodePtr->viewport.fogParams[0] == n.viewport.fogParams[0] &&
nodePtr->viewport.fogParams[1] == n.viewport.fogParams[1] &&
nodePtr->viewport.fogParams[2] == n.viewport.fogParams[2])
{
// check to see if we have a fog start or density value
if (n.viewport.fogParams[3] > 0.0f || n.viewport.fogParams[4] > 0.0f || n.viewport.scrollAtt > 0.0f) {
float rgba[4];
auto& vp = nodePtr->viewport;
rgba[0] = vp.fogParams[0];
rgba[1] = vp.fogParams[1];
rgba[2] = vp.fogParams[2];
rgba[3] = vp.scrollFog;
glViewport(vp.x, vp.y, vp.width, vp.height);
m_r3dScrollFog.DrawScrollFog(rgba, n.viewport.scrollAtt, n.viewport.fogParams[6], n.viewport.spotFogColor, n.viewport.spotEllipse);
break;
}
}
}
}
}
void CNew3D::DrawAmbientFog()
{
// logic here is still not totally understood
// some games are setting fog ambient which seems to darken the 2d background layer too when scroll fogging is not set
// The logic is something like tileGenColour * fogAmbient
// If fogAmbient = 1.0 it's a no-op. Lower values darken the image
// Does this work with scroll fog? Well technically scroll fog already takes into account the fog ambient as it darkens the fog colour
// Let's pick the lowest fog ambient value
// Check for fog density or a fog start value, otherwise the effect seems to be disabled (lost world)
float fogAmbient = 1.0f;
Node* nodePtr = nullptr;
for (auto& n : m_nodes) {
// check to see if we have a fog density or fog start
if (n.viewport.fogParams[3] <= 0.0f && n.viewport.fogParams[4] <= 0.0f) {
continue;
}
if (n.viewport.fogParams[6] < fogAmbient) {
nodePtr = &n;
fogAmbient = n.viewport.fogParams[6];
}
}
if (nodePtr) {
auto& vp = nodePtr->viewport;
float rgba[] = { 0.0f, 0.0f, 0.0f, 1.0f - fogAmbient };
glViewport(vp.x, vp.y, vp.width, vp.height);
m_r3dScrollFog.DrawScrollFog(rgba, 0.0f, 1.0f, vp.spotFogColor, vp.spotEllipse); // we assume spot light is not used
}
}
bool CNew3D::RenderScene(int priority, bool renderOverlay, Layer layer)
{
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, m_textureBuffer);
bool hasOverlay = false; // (high priority polys)
for (auto &n : m_nodes) {
if (n.viewport.priority != priority || n.models.empty()) {
continue;
}
CalcViewport(&n.viewport, std::abs(m_nfPairs[priority].zNear*0.96f), std::abs(m_nfPairs[priority].zFar*1.05f)); // make planes 5% bigger
glViewport(n.viewport.x, n.viewport.y, n.viewport.width, n.viewport.height);
m_r3dShader.SetViewportUniforms(&n.viewport);
for (auto &m : n.models) {
bool matrixLoaded = false;
if (m.meshes->empty()) {
continue;
}
for (auto &mesh : *m.meshes) {
if (mesh.highPriority) {
hasOverlay = true;
}
if (!mesh.Render(layer, m.alpha)) continue;
if (mesh.highPriority != renderOverlay) continue;
if (!matrixLoaded) {
m_r3dShader.SetModelStates(&m);
matrixLoaded = true; // do this here to stop loading matrices we don't need. Ie when rendering non transparent etc
}
m_r3dShader.SetMeshUniforms(&mesh);
glDrawArrays(m_primType, mesh.vboOffset, mesh.vertexCount);
}
}
}
return hasOverlay;
}
bool CNew3D::SkipLayer(int layer)
{
for (const auto &n : m_nodes) {
if (n.viewport.priority == layer) {
if (!n.models.empty()) {
return false;
}
}
}
return true;
}
void CNew3D::SetRenderStates()
{
m_vbo.Bind(true);
glBindVertexArray(m_vao);
m_r3dShader.SetShader(true);
glDepthFunc (GL_LEQUAL);
glEnable (GL_DEPTH_TEST);
glDepthMask (GL_TRUE);
glActiveTexture (GL_TEXTURE0);
glDisable (GL_CULL_FACE); // we'll emulate this in the shader
glEnable (GL_STENCIL_TEST);
glStencilOp (GL_KEEP, GL_KEEP, GL_REPLACE);
glStencilMask (0xFF);
glBlendFunc (GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
glDisable (GL_BLEND);
}
void CNew3D::DisableRenderStates()
{
m_vbo.Bind(false);
glBindVertexArray(0);
m_r3dShader.SetShader(false);
glDisable(GL_STENCIL_TEST);
}
void CNew3D::RenderFrame(void)
{
for (int i = 0; i < 4; i++) {
m_nfPairs[i].zNear = -std::numeric_limits<float>::max();
m_nfPairs[i].zFar = std::numeric_limits<float>::max();
}
{
std::lock_guard<std::mutex> guard(m_losMutex);
std::swap(m_losBack, m_losFront);
for (int i = 0; i < 4; i++) {
m_losBack->value[i] = 0;
}
}
// release any resources from last frame
m_polyBufferRam.clear(); // clear dynamic model memory buffer
m_nodes.clear(); // memory will grow during the object life time, that's fine, no need to shrink to fit
m_modelMat.Release(); // would hope we wouldn't need this but no harm in checking
m_nodeAttribs.Reset();
RenderViewport(0x800000); // build model structure
m_vbo.Bind(true);
m_vbo.BufferSubData(MAX_ROM_VERTS*sizeof(FVertex), m_polyBufferRam.size()*sizeof(FVertex), m_polyBufferRam.data()); // upload all the dynamic data to GPU in one go
if (!m_polyBufferRom.empty()) {
// sync rom memory with vbo
int romBytes = (int)m_polyBufferRom.size() * sizeof(FVertex);
int vboBytes = m_vbo.GetSize();
int size = romBytes - vboBytes;
if (size) {
//check we haven't blown up the memory buffers
//we will lose rom models for 1 frame is this happens, not the end of the world, as probably won't ever happen anyway
if (m_polyBufferRom.size() >= MAX_ROM_VERTS) {
m_polyBufferRom.clear();
m_romMap.clear();
m_vbo.Reset();
}
else {
m_vbo.AppendData(size, &m_polyBufferRom[vboBytes / sizeof(FVertex)]);
}
}
}
m_r3dFrameBuffers.SetFBO(Layer::colour); // colour will draw to all 3 buffers. For regular opaque pixels the transparent layers will be essentially masked
glClear(GL_COLOR_BUFFER_BIT);
DrawAmbientFog();
DrawScrollFog(); // fog layer if applicable must be drawn here
for (int pri = 0; pri <= 3; pri++) {
if (SkipLayer(pri)) continue;
for (int i = 0; i < 2; i++) {
bool renderOverlay = (i == 1);
SetRenderStates();
m_r3dFrameBuffers.SetFBO(Layer::colour);
glClear(GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
m_r3dShader.DiscardAlpha(true);
m_r3dShader.SetLayer(Layer::colour);
bool hasOverlay = RenderScene(pri, renderOverlay, Layer::colour);
if (!renderOverlay) {
ProcessLos(pri);
}
glDepthFunc(GL_LESS);
m_r3dShader.DiscardAlpha(false);
m_r3dFrameBuffers.StoreDepth();
m_r3dShader.SetLayer(Layer::trans1);
m_r3dFrameBuffers.SetFBO(Layer::trans1);
RenderScene(pri, renderOverlay, Layer::trans1);
m_r3dFrameBuffers.RestoreDepth();
m_r3dShader.SetLayer(Layer::trans2);
m_r3dFrameBuffers.SetFBO(Layer::trans2);
RenderScene(pri, renderOverlay, Layer::trans2);
DisableRenderStates();
if (!hasOverlay) break; // no high priority polys
}
}
m_r3dFrameBuffers.SetFBO(Layer::none);
m_r3dFrameBuffers.Draw();
}
void CNew3D::BeginFrame(void)
{
}
void CNew3D::EndFrame(void)
{
}
/******************************************************************************
Real3D Address Translation
Functions that interpret word-granular Real3D addresses and return pointers.
******************************************************************************/
// Translates 24-bit culling RAM addresses
const UINT32* CNew3D::TranslateCullingAddress(UINT32 addr)
{
addr &= 0x00FFFFFF; // caller should have done this already
if ((addr >= 0x800000) && (addr < 0x840000)) {
return &m_cullingRAMHi[addr & 0x3FFFF];
}
else if (addr < 0x100000) {
return &m_cullingRAMLo[addr];
}
return NULL;
}
// Translates model references
const UINT32* CNew3D::TranslateModelAddress(UINT32 modelAddr)
{
modelAddr &= 0x00FFFFFF; // caller should have done this already
if (modelAddr < 0x100000) {
return &m_polyRAM[modelAddr];
}
else {
return &m_vrom[modelAddr];
}
}
bool CNew3D::DrawModel(UINT32 modelAddr)
{
const UINT32* modelAddress;
bool cached = false;
Model* m;
modelAddress = TranslateModelAddress(modelAddr);
// create a new model to push onto the vector
m_nodes.back().models.emplace_back();
// get the last model in the array
m = &m_nodes.back().models.back();
if (IsVROMModel(modelAddr) && !IsDynamicModel((UINT32*)modelAddress)) {
// try to find meshes in the rom cache
m->meshes = m_romMap[modelAddr]; // will create an entry with a null pointer if empty
if (m->meshes) {
cached = true;
}
else {
m->meshes = std::make_shared<std::vector<Mesh>>();
m_romMap[modelAddr] = m->meshes; // store meshes in our rom map here
}
m->dynamic = false;
}
else {
m->meshes = std::make_shared<std::vector<Mesh>>();
}
// copy current model matrix
for (int i = 0; i < 16; i++) {
m->modelMat[i] = m_modelMat.currentMatrix[i];
}
// update texture offsets
m->textureOffsetX = m_nodeAttribs.currentTexOffsetX;
m->textureOffsetY = m_nodeAttribs.currentTexOffsetY;
m->page = m_nodeAttribs.currentPage;
m->scale = m_nodeAttribs.currentModelScale;
m->alpha = m_nodeAttribs.currentModelAlpha;
if (!cached) {
CacheModel(m, modelAddress);
}
if (m_nodeAttribs.currentClipStatus != Clip::INSIDE) {
ClipModel(m); // not storing clipped values, only working out the Z range
}
return true;
}
/*
0x00: x------- -------- -------- -------- Is UF ref
-x------ -------- -------- -------- Is 3D model
--x----- -------- -------- -------- Is point
---x---- -------- -------- -------- Is point ref
----x--- -------- -------- -------- Is animation
-----x-- -------- -------- -------- Is billboard
------x- -------- -------- -------- Child is billboard
-------x -------- -------- -------- Extra child pointer needed
-------x xxxxx--- -------- -------- Spare (unknown if used)
-------- -----xxx xxxxxx-- -------- Node ID
-------- -------- ------x- -------- Discard 1
-------- -------- -------x -------- Discard 2
-------- -------- -------- x------- Reset matrix
-------- -------- -------- -x------ Use child pointer
-------- -------- -------- --x----- Use sibling pointer
-------- -------- -------- ---x---- No matrix
-------- -------- -------- ----x--- Indirect child
-------- -------- -------- -----x-- Valid color table
-------- -------- -------- ------xx Node type(0 = viewport, 1 = root node, 2 = culling node)
0x01, 0x02 only present on Step 1.5+
0x01: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxx-- Model scale (float32) last 2 bits are control words
-------- -------- -------- ------x- Disable culling
-------- -------- -------- -------x Valid model scale
0x02 : -------- -------- x------- -------- Texture replace
-------- -------- -x------ -------- Switch bank
-------- -------- --xxxxxx x------- X offset
-------- -------- -------- -xxxxxxx Y offset
0x03 : xxxxxxxx xxxxx--- -------- -------- Color table address 1
-------- -----xxx xxxx---- -------- LOD table pointer
-------- -------- ----xxxx xxxxxxxx Node matrix
0x04: Translation X coordinate
0x05: Translation Y coordinate
0x06: Translation Z coordinate
0x07: xxxx---- -------- -------- -------- Color table address 2
-----x-- -------- -------- -------- Sibling table
------x- -------- -------- -------- Point
-------x -------- -------- -------- Leaf node
-------- xxxxxxxx xxxxxxxx xxxxxxxx Child pointer
0x08: xxxxxxx- -------- -------- -------- Color table address 3
-------x -------- -------- -------- Null sibling
-------- xxxxxxxx xxxxxxxx xxxxxxxx Sibling pointer
0x09: xxxxxxxx xxxxxxxx -------- -------- Blend radius
-------- -------- xxxxxxxx xxxxxxxx Culling radius
*/
void CNew3D::DescendCullingNode(UINT32 addr)
{
enum class NodeType { undefined = -1, viewport = 0, rootNode = 1, cullingNode = 2 };
const UINT32 *node, *lodPtr;
UINT32 matrixOffset, child1Ptr, sibling2Ptr;
BBox bbox;
UINT16 uCullRadius;
float fCullRadius;
UINT16 uBlendRadius;
float fBlendRadius;
UINT8 lodTablePointer;
NodeType nodeType;
bool resetMatrix;
if (m_nodeAttribs.StackLimit()) {
return;
}
node = TranslateCullingAddress(addr);
if (NULL == node) {
return;
}
// Extract known fields
nodeType = (NodeType)(node[0x00] & 3);
child1Ptr = node[0x07 - m_offset] & 0x7FFFFFF; // mask colour table bits
sibling2Ptr = node[0x08 - m_offset] & 0x1FFFFFF; // mask colour table bits
matrixOffset = node[0x03 - m_offset] & 0xFFF;
resetMatrix = (node[0x0] & 0x80) > 0;
lodTablePointer = (node[0x03 - m_offset] >> 12) & 0x7F;
// check our node type
if (nodeType == NodeType::viewport) {
return; // viewport nodes aren't rendered
}
// node discard
if ((0x300 & node[0]) == 0x300) { // why 2 bits for node discard? Sega rally uses this
return;
}
// parse siblings
if ((node[0x00] & 0x07) != 0x06) { // colour table seems to indicate no siblings
if (!(sibling2Ptr & 0x1000000) && sibling2Ptr) {
DescendCullingNode(sibling2Ptr); // no need to mask bit, would already be zero
}
}
if ((node[0x00] & 0x04)) {
m_colorTableAddr = ((node[0x03 - m_offset] >> 19) << 0) | ((node[0x07 - m_offset] >> 28) << 13) | ((node[0x08 - m_offset] >> 25) << 17);
m_colorTableAddr &= 0x000FFFFF; // clamp to 4MB (in words) range
}
m_nodeAttribs.Push(); // save current attribs
if (!m_offset) { // Step 1.5+
if (node[0x01] & 1)
m_nodeAttribs.currentModelScale = Util::Uint32AsFloat(node[0x01] & ~3); // mask out control bits
if (node[0x01] & 2)
m_nodeAttribs.currentDisableCulling = true;
// apply texture offsets, else retain current ones
if ((node[0x02] & 0x8000)) {
int tx = 32 * ((node[0x02] >> 7) & 0x3F);
int ty = 32 * (node[0x02] & 0x1F);
m_nodeAttribs.currentTexOffsetX = tx;
m_nodeAttribs.currentTexOffsetY = ty;
m_nodeAttribs.currentPage = (node[0x02] & 0x4000) >> 14;
}
}
// Apply matrix and translation
m_modelMat.PushMatrix();
// apply translation vector
if (node[0x00] & 0x10) {
float x = Util::Uint32AsFloat(node[0x04 - m_offset]);
float y = Util::Uint32AsFloat(node[0x05 - m_offset]);
float z = Util::Uint32AsFloat(node[0x06 - m_offset]);
m_modelMat.Translate(x, y, z);
}
// multiply matrix, if specified
else if (matrixOffset) {
MultMatrix(matrixOffset,m_modelMat);
}
if (resetMatrix) {
ResetMatrix(m_modelMat);
}
uCullRadius = node[9 - m_offset] & 0xFFFF;
fCullRadius = R3DFloat::GetFloat16(uCullRadius);
uBlendRadius = node[9 - m_offset] >> 16;
fBlendRadius = R3DFloat::GetFloat16(uBlendRadius);
if (m_nodeAttribs.currentClipStatus != Clip::INSIDE) {
if (uCullRadius != R3DFloat::Pro16BitMax) {
CalcBox(fCullRadius, bbox);
TransformBox(m_modelMat, bbox);
m_nodeAttribs.currentClipStatus = ClipBox(bbox, m_planes);
if (m_nodeAttribs.currentClipStatus == Clip::INSIDE) {
CalcBoxExtents(bbox);
}
}
else {
m_nodeAttribs.currentClipStatus = Clip::NOT_SET;
}
}
float LODscale;
if (m_nodeAttribs.currentDisableCulling)
LODscale = FLT_MAX;
else
{
float distance = std::hypot(m_modelMat.currentMatrix[12], m_modelMat.currentMatrix[13], m_modelMat.currentMatrix[14]);
LODscale = fBlendRadius * m_nodeAttribs.currentModelScale / distance;
}
const LODFeatureType& lodTableEntry = m_LODBlendTable->table[lodTablePointer];
if (m_nodeAttribs.currentClipStatus != Clip::OUTSIDE && LODscale >= lodTableEntry.lod[3].deleteSize) {
// Descend down first link
if ((node[0x00] & 0x08)) // 4-element LOD table
{
lodPtr = TranslateCullingAddress(child1Ptr);
if (NULL != lodPtr)
{
int modelLOD;
for (modelLOD = 0; modelLOD < 3; modelLOD++)
{
if (LODscale >= lodTableEntry.lod[modelLOD].deleteSize && lodPtr[modelLOD] & 0x1000000)
break;
}
float tempAlpha = m_nodeAttribs.currentModelAlpha;
float nodeAlpha = lodTableEntry.lod[modelLOD].blendFactor * (LODscale - lodTableEntry.lod[modelLOD].deleteSize);
nodeAlpha = std::clamp(nodeAlpha, 0.0f, 1.0f);
if (nodeAlpha > 15.0f / 16.0f) // shader discards pixels below 1/16 alpha
nodeAlpha = 1.0f;
else if (nodeAlpha < 1.0f / 16.0f)
nodeAlpha = 0.0f;
m_nodeAttribs.currentModelAlpha *= nodeAlpha; // alpha of each node multiples by the alpha of its parent
if ((node[0x03 - m_offset] & 0x20000000)) {
DescendCullingNode(lodPtr[modelLOD] & 0xFFFFFF);
if (nodeAlpha < 1.0f && modelLOD != 3)
{
m_nodeAttribs.currentModelAlpha = (1.0f - nodeAlpha) * tempAlpha;
DescendCullingNode(lodPtr[modelLOD+1] & 0xFFFFFF);
}
}
else {
DrawModel(lodPtr[modelLOD] & 0xFFFFFF);
if (nodeAlpha < 1.0f && modelLOD != 3)
{
m_nodeAttribs.currentModelAlpha = (1.0f - nodeAlpha) * tempAlpha;
DrawModel(lodPtr[modelLOD + 1] & 0xFFFFFF);
}
}
}
}
else {
float nodeAlpha = lodTableEntry.lod[3].blendFactor * (LODscale - lodTableEntry.lod[3].deleteSize);
nodeAlpha = std::clamp(nodeAlpha, 0.0f, 1.0f);
m_nodeAttribs.currentModelAlpha *= nodeAlpha; // alpha of each node multiples by the alpha of its parent
DescendNodePtr(child1Ptr);
}
}
m_modelMat.PopMatrix();
// Restore old texture offsets
m_nodeAttribs.Pop();
}
void CNew3D::DescendNodePtr(UINT32 nodeAddr)
{
// Ignore null links
if ((nodeAddr & 0x00FFFFFF) == 0) {
return;
}
switch ((nodeAddr >> 24) & 0x5) // pointer type encoded in upper 8 bits
{
case 0x00:
DescendCullingNode(nodeAddr & 0xFFFFFF);
break;
case 0x01:
DrawModel(nodeAddr & 0xFFFFFF);
break;
case 0x04:
DescendPointerList(nodeAddr & 0xFFFFFF);
break;
default:
break;
}
}
void CNew3D::DescendPointerList(UINT32 addr)
{
const UINT32* list;
UINT32 nodeAddr;
int index;
list = TranslateCullingAddress(addr);
if (NULL == list) {
return;
}
index = 0;
while (true) {
if (list[index] & 0x01000000) {
break; // empty list
}
nodeAddr = list[index] & 0x00FFFFFF; // clear upper 8 bits to ensure this is processed as a culling node
DescendCullingNode(nodeAddr);
if (list[index] & 0x02000000) {
break; // list end
}
index++;
}
}
/******************************************************************************
Matrix Stack
******************************************************************************/
// Macro to generate column-major (OpenGL) index from y,x subscripts
#define CMINDEX(y,x) (x*4+y)
/*
* MultMatrix():
*
* Multiplies the matrix stack by the specified Real3D matrix. The matrix
* index is a 12-bit number specifying a matrix number relative to the base.
* The base matrix MUST be set up before calling this function.
*/
void CNew3D::MultMatrix(UINT32 matrixOffset, Mat4& mat)
{
GLfloat m[4*4];
const float *src = &m_matrixBasePtr[matrixOffset * 12];
if (m_matrixBasePtr == NULL) // LA Machineguns
return;
m[CMINDEX(0, 0)] = src[3];
m[CMINDEX(0, 1)] = src[4];
m[CMINDEX(0, 2)] = src[5];
m[CMINDEX(0, 3)] = src[0];
m[CMINDEX(1, 0)] = src[6];
m[CMINDEX(1, 1)] = src[7];
m[CMINDEX(1, 2)] = src[8];
m[CMINDEX(1, 3)] = src[1];
m[CMINDEX(2, 0)] = src[9];
m[CMINDEX(2, 1)] = src[10];
m[CMINDEX(2, 2)] = src[11];
m[CMINDEX(2, 3)] = src[2];
m[CMINDEX(3, 0)] = 0.0;
m[CMINDEX(3, 1)] = 0.0;
m[CMINDEX(3, 2)] = 0.0;
m[CMINDEX(3, 3)] = 1.0;
mat.MultMatrix(m);
}
/*
* InitMatrixStack():
*
* Initializes the modelview (model space -> view space) matrix stack and
* Real3D coordinate system. These are the last transforms to be applied (and
* the first to be defined on the stack) before projection.
*
* Model 3 games tend to define the following unusual base matrix:
*
* 0 0 -1 0
* 1 0 0 0
* 0 -1 0 0
* 0 0 0 1
*
* When this is multiplied by a column vector, the output is:
*
* -Z
* X
* -Y
* 1
*
* My theory is that the Real3D GPU accepts vectors in Z,X,Y order. The games
* store everything as X,Y,Z and perform the translation at the end. The Real3D
* also has Y and Z coordinates opposite of the OpenGL convention. This
* function inserts a compensating matrix to undo these things.
*
* NOTE: This function assumes we are in GL_MODELVIEW matrix mode.
*/
void CNew3D::InitMatrixStack(UINT32 matrixBaseAddr, Mat4& mat)
{
GLfloat m[4 * 4];
// This matrix converts vectors back from the weird Model 3 Z,X,Y ordering
// and also into OpenGL viewspace (-Y,-Z)
m[CMINDEX(0, 0)] = 0.0; m[CMINDEX(0, 1)] = 1.0; m[CMINDEX(0, 2)] = 0.0; m[CMINDEX(0, 3)] = 0.0;
m[CMINDEX(1, 0)] = 0.0; m[CMINDEX(1, 1)] = 0.0; m[CMINDEX(1, 2)] =-1.0; m[CMINDEX(1, 3)] = 0.0;
m[CMINDEX(2, 0)] =-1.0; m[CMINDEX(2, 1)] = 0.0; m[CMINDEX(2, 2)] = 0.0; m[CMINDEX(2, 3)] = 0.0;
m[CMINDEX(3, 0)] = 0.0; m[CMINDEX(3, 1)] = 0.0; m[CMINDEX(3, 2)] = 0.0; m[CMINDEX(3, 3)] = 1.0;
mat.LoadMatrix(m);
// Set matrix base address and apply matrix #0 (coordinate system matrix)
m_matrixBasePtr = (float *)TranslateCullingAddress(matrixBaseAddr);
MultMatrix(0, mat);
}
// what this does is to set the rotation back to zero, whilst keeping the position and scale of the current matrix
void CNew3D::ResetMatrix(Mat4& mat)
{
float m[16];
memcpy(m, mat.currentMatrix, 16 * 4);
// transpose the top 3x3 of the matrix (this effectively inverts the rotation). When we multiply our new matrix it'll effectively cancel out the rotations.
std::swap(m[1], m[4]);
std::swap(m[2], m[8]);
std::swap(m[6], m[9]);
// set position to zero
m[12] = 0;
m[13] = 0;
m[14] = 0;
m[15] = 1;
// normalise columns, this removes the scaling, otherwise we'll apply it twice
float s1 = std::sqrt((m[0] * m[0]) + (m[1] * m[1]) + (m[2] * m[2]));
float s2 = std::sqrt((m[4] * m[4]) + (m[5] * m[5]) + (m[6] * m[6]));
float s3 = std::sqrt((m[8] * m[8]) + (m[9] * m[9]) + (m[10] * m[10]));
m[0] /= s1; m[4] /= s2; m[8] /= s3;
m[1] /= s1; m[5] /= s2; m[9] /= s3;
m[2] /= s1; m[6] /= s2; m[10] /= s3;
mat.MultMatrix(m);
}
// Draws viewports of the given priority
void CNew3D::RenderViewport(UINT32 addr)
{
static const GLfloat color[8][3] =
{ // RGB1 color translation
{ 0.0f, 0.0f, 0.0f }, // off
{ 0.0f, 0.0f, 1.0f }, // blue
{ 0.0f, 1.0f, 0.0f }, // green
{ 0.0f, 1.0f, 1.0f }, // cyan
{ 1.0f, 0.0f, 0.0f }, // red
{ 1.0f, 0.0f, 1.0f }, // purple
{ 1.0f, 1.0f, 0.0f }, // yellow
{ 1.0f, 1.0f, 1.0f } // white
};
if ((addr & 0x00FFFFFF) == 0) {
return;
}
// Translate address and obtain pointer
const uint32_t *vpnode = TranslateCullingAddress(addr);
if (NULL == vpnode) {
return;
}
if (!(vpnode[0] & 0x20)) { // only if viewport enabled
// create node object
m_nodes.emplace_back(Node());
m_nodes.back().models.reserve(2048); // create space for models
// get pointer to its viewport
Viewport *vp = &m_nodes.back().viewport;
vp->priority = (vpnode[0] >> 3) & 0x3;
vp->select = (vpnode[0] >> 8) & 0x3;
vp->number = (vpnode[0] >> 10);
m_currentPriority = vp->priority;
// Fetch viewport parameters (TO-DO: would rounding make a difference?)
vp->vpX = (int)(((vpnode[0x1A] & 0xFFFF) * (float)(1.0 / 16.0)) + 0.5f); // viewport X (12.4 fixed point)
vp->vpY = (int)(((vpnode[0x1A] >> 16) * (float)(1.0 / 16.0)) + 0.5f); // viewport Y (12.4)
vp->vpWidth = (int)(((vpnode[0x14] & 0xFFFF) * (float)(1.0 / 4.0)) + 0.5f); // width (14.2)
vp->vpHeight = (int)(((vpnode[0x14] >> 16) * (float)(1.0 / 4.0)) + 0.5f); // height (14.2)
uint32_t matrixBase = vpnode[0x16] & 0xFFFFFF; // matrix base address
m_LODBlendTable = (LODBlendTable*)TranslateCullingAddress(vpnode[0x17] & 0xFFFFFF);
/*
vp->angle_left = -atan2f(Util::Uint32AsFloat(vpnode[12]), Util::Uint32AsFloat(vpnode[13])); // These values work out as the normals for the clipping planes.
vp->angle_right = atan2f(Util::Uint32AsFloat(vpnode[16]), -Util::Uint32AsFloat(vpnode[17])); // Sometimes these values (dirt devils,lost world) are totally wrong
vp->angle_top = atan2f(Util::Uint32AsFloat(vpnode[14]), Util::Uint32AsFloat(vpnode[15])); // and don't work for the frustum values exactly.
vp->angle_bottom = -atan2f(Util::Uint32AsFloat(vpnode[18]), -Util::Uint32AsFloat(vpnode[19])); // Perhaps they are just used for culling and not rendering.
*/
float cv = Util::Uint32AsFloat(vpnode[0x8]); // 1/(left-right)
float cw = Util::Uint32AsFloat(vpnode[0x9]); // 1/(top-bottom)
float io = Util::Uint32AsFloat(vpnode[0xa]); // top / bottom (ratio) - ish
float jo = Util::Uint32AsFloat(vpnode[0xb]); // left / right (ratio)
vp->angle_left = (0.0f - jo) / cv;
vp->angle_right = (1.0f - jo) / cv;
vp->angle_bottom = -(1.0f - io)/ cw;
vp->angle_top = -(0.0f - io)/ cw;
// calculate the frustum shape, near/far pair are dummy values
CalcViewport(vp, 1.f, 1000.f);
// calculate frustum planes
CalcFrustumPlanes(m_planes, vp->projectionMatrix); // we need to calc a 'projection matrix' to get the correct frustum planes for clipping
// Lighting (note that sun vector points toward sun -- away from vertex)
vp->lightingParams[0] = Util::Uint32AsFloat(vpnode[0x05]); // sun X
vp->lightingParams[1] = -Util::Uint32AsFloat(vpnode[0x06]); // sun Y (- to convert to ogl cordinate system)
vp->lightingParams[2] = -Util::Uint32AsFloat(vpnode[0x04]); // sun Z (- to convert to ogl cordinate system)
vp->lightingParams[3] = std::max(0.f, std::min(Util::Uint32AsFloat(vpnode[0x07]), 1.0f)); // sun intensity (clamp to 0-1)
vp->lightingParams[4] = (float)((vpnode[0x24] >> 8) & 0xFF) * (float)(1.0 / 255.0); // ambient intensity
vp->lightingParams[5] = 0.0f; // reserved
vp->sunClamp = m_sunClamp;
vp->intensityClamp = (m_step == 0x10); // just step 1.0 ?
vp->hardwareStep = m_step;
// Spotlight
int spotColorIdx = (vpnode[0x20] >> 11) & 7; // spotlight color index
int spotFogColorIdx = (vpnode[0x20] >> 8) & 7; // spotlight on fog color index
vp->spotEllipse[0] = (float)(INT16)(vpnode[0x1E] & 0xFFFF) * (float)(1.0 / 8.0);// spotlight X position (13.3 fixed point)
vp->spotEllipse[1] = (float)(INT16)(vpnode[0x1D] & 0xFFFF) * (float)(1.0 / 8.0);// spotlight Y
vp->spotEllipse[2] = (float)((vpnode[0x1E] >> 16) & 0xFFFF); // spotlight X size (16-bit)
vp->spotEllipse[3] = (float)((vpnode[0x1D] >> 16) & 0xFFFF); // spotlight Y size
vp->spotRange[0] = 1.0f / Util::Uint32AsFloat(vpnode[0x21]); // spotlight start
vp->spotRange[1] = Util::Uint32AsFloat(vpnode[0x1F]); // spotlight extent
vp->spotColor[0] = color[spotColorIdx][0]; // spotlight color
vp->spotColor[1] = color[spotColorIdx][1];
vp->spotColor[2] = color[spotColorIdx][2];
vp->spotFogColor[0] = color[spotFogColorIdx][0]; // spotlight color on fog
vp->spotFogColor[1] = color[spotFogColorIdx][1];
vp->spotFogColor[2] = color[spotFogColorIdx][2];
// spotlight is specified in terms of physical resolution
vp->spotEllipse[1] = 384.0f - vp->spotEllipse[1]; // flip Y position
// Avoid division by zero
vp->spotEllipse[2] = std::max(1.0f, vp->spotEllipse[2]);
vp->spotEllipse[3] = std::max(1.0f, vp->spotEllipse[3]);
vp->spotEllipse[2] = std::roundf(2047.0f / vp->spotEllipse[2]);
vp->spotEllipse[3] = std::roundf(2047.0f / vp->spotEllipse[3]);
// Scale the spotlight to the OpenGL viewport
vp->spotEllipse[0] = vp->spotEllipse[0] * m_xRatio + (float)m_xOffs;
vp->spotEllipse[1] = vp->spotEllipse[1] * m_yRatio + (float)m_yOffs;
vp->spotEllipse[2] *= m_xRatio;
vp->spotEllipse[3] *= m_yRatio;
// Line of sight position
vp->losPosX = (int)(((vpnode[0x1c] & 0xFFFF) / 16.0f) + 0.5f); // x position
vp->losPosY = (int)(((vpnode[0x1c] >> 16) / 16.0f) + 0.5f); // y position 0 starts from the top
// Fog
vp->fogParams[0] = (float)((vpnode[0x22] >> 16) & 0xFF) * (float)(1.0 / 255.0); // fog color R
vp->fogParams[1] = (float)((vpnode[0x22] >> 8) & 0xFF) * (float)(1.0 / 255.0); // fog color G
vp->fogParams[2] = (float)((vpnode[0x22] >> 0) & 0xFF) * (float)(1.0 / 255.0); // fog color B
vp->fogParams[3] = std::abs(Util::Uint32AsFloat(vpnode[0x23])); // fog density - ocean hunter uses negative values, but looks the same
vp->fogParams[4] = (float)(INT16)(vpnode[0x25] & 0xFFFF)* (float)(1.0 / 255.0); // fog start
// Avoid Infinite and NaN values for Star Wars Trilogy
if (std::isinf(vp->fogParams[3]) || std::isnan(vp->fogParams[3])) {
for (int i = 0; i < 7; i++) vp->fogParams[i] = 0.0f;
}
vp->fogParams[5] = (float)((vpnode[0x24] >> 16) & 0xFF) * (float)(1.0 / 255.0); // fog attenuation
vp->fogParams[6] = (float)((vpnode[0x25] >> 16) & 0xFF) * (float)(1.0 / 255.0); // fog ambient
vp->scrollFog = (float)(vpnode[0x20] & 0xFF) * (float)(1.0 / 255.0); // scroll fog
vp->scrollAtt = (float)(vpnode[0x24] & 0xFF) * (float)(1.0 / 255.0); // scroll attenuation
// Clear texture offsets before proceeding
m_nodeAttribs.Reset();
// Set up coordinate system and base matrix
InitMatrixStack(matrixBase, m_modelMat);
// Descend down the node link. Need to start with a culling node because that defines our culling radius.
auto childptr = vpnode[0x02];
if (((childptr >> 24) & 0x5) == 0) {
DescendNodePtr(vpnode[0x02]);
}
}
// render next viewport
if (vpnode[0x01] != 0x01000000) {
RenderViewport(vpnode[0x01]);
}
}
void CNew3D::CopyVertexData(const R3DPoly& r3dPoly, std::vector<FVertex>& vertexArray)
{
// both lemans 24 and dirt devils are rendering some totally transparent polys as the first object in each viewport
// in dirt devils it's parallel to the camera so is completely invisible, but breaks our depth calculation
// in lemans 24 its a sort of diamond shape, but never leaves a hole in the transparent geometry so must be being skipped by the h/w
if (r3dPoly.faceColour[3] == 0) {
return;
}
if (m_numPolyVerts==4) {
if (r3dPoly.number == 4) {
vertexArray.emplace_back(r3dPoly, 0); // construct directly inside container without copy
vertexArray.emplace_back(r3dPoly, 1);
vertexArray.emplace_back(r3dPoly, 2);
vertexArray.emplace_back(r3dPoly, 3);
// check for identical points (ie forced triangle) and replace with average point
// if we don't do this our quad code falls apart
FVertex* v = (&vertexArray.back()) - 3;
for (int i = 0; i < 4; i++) {
int next1 = (i + 1) % 4;
int next2 = (i + 2) % 4;
if (FVertex::Equal(v[i], v[next1])) {
FVertex::Average(v[next1], v[next2], v[next1]);
break;
}
}
}
else {
vertexArray.emplace_back(r3dPoly, 0);
vertexArray.emplace_back(r3dPoly, 1);
vertexArray.emplace_back(r3dPoly, 2);
vertexArray.emplace_back(r3dPoly, 0, 2); // last point is an average of 0 and 2
}
}
else {
vertexArray.emplace_back(r3dPoly, 0);
vertexArray.emplace_back(r3dPoly, 1);
vertexArray.emplace_back(r3dPoly, 2);
if (r3dPoly.number == 4) {
vertexArray.emplace_back(r3dPoly, 0);
vertexArray.emplace_back(r3dPoly, 2);
vertexArray.emplace_back(r3dPoly, 3);
}
}
}
void CNew3D::GetCoordinates(int width, int height, UINT16 uIn, UINT16 vIn, float uvScale, float& uOut, float& vOut)
{
uOut = (uIn * uvScale) / width;
vOut = (vIn * uvScale) / height;
}
int CNew3D::GetTexFormat(int originalFormat, bool contour)
{
if (!contour) {
return originalFormat; // the same
}
switch (originalFormat)
{
case 1:
case 2:
case 3:
case 4:
return originalFormat + 7; // these formats are identical to 1-4, except they lose the 4 bit alpha part when contour is enabled
default:
return originalFormat;
}
}
void CNew3D::SetMeshValues(SortingMesh *currentMesh, PolyHeader &ph)
{
//copy attributes
currentMesh->textured = ph.TexEnabled();
currentMesh->alphaTest = ph.AlphaTest();
currentMesh->textureAlpha = ph.TextureAlpha();
currentMesh->polyAlpha = ph.PolyAlpha();
currentMesh->lighting = ph.LightEnabled();
currentMesh->fixedShading = ph.FixedShading() && !ph.SmoothShading();
currentMesh->highPriority = ph.HighPriority();
currentMesh->transLSelect = ph.TranslucencyPatternSelect();
currentMesh->layered = ph.Layered();
currentMesh->specular = ph.SpecularEnabled();
currentMesh->shininess = ph.Shininess();
currentMesh->specularValue = ph.SpecularValue();
currentMesh->fogIntensity = ph.LightModifier();
currentMesh->translatorMap = ph.TranslatorMap();
currentMesh->noLosReturn = ph.NoLosReturn();
if (currentMesh->textured) {
currentMesh->format = GetTexFormat(ph.TexFormat(), ph.AlphaTest());
if (currentMesh->format == 7) {
currentMesh->alphaTest = false; // alpha test is a 1 bit test, this format needs a lower threshold, since it has 16 levels of transparency
}
currentMesh->x = ph.X();
currentMesh->y = ph.Y();
currentMesh->width = ph.TexWidth();
currentMesh->height = ph.TexHeight();
currentMesh->microTexture = ph.MicroTexture();
currentMesh->inverted = ph.TranslatorMapOffset() == 2;
{
bool smoothU = ph.TexSmoothU();
bool smoothV = ph.TexSmoothV();
if (ph.AlphaTest()) {
smoothU = false; // smooth wrap makes no sense for alpha tested polys with pixel dilate
smoothV = false;
}
if (ph.TexUMirror()) {
if (smoothU) currentMesh->wrapModeU = Mesh::TexWrapMode::mirror;
else currentMesh->wrapModeU = Mesh::TexWrapMode::mirrorClamp;
}
else {
if (smoothU) currentMesh->wrapModeU = Mesh::TexWrapMode::repeat;
else currentMesh->wrapModeU = Mesh::TexWrapMode::repeatClamp;
}
if (ph.TexVMirror()) {
if (smoothV) currentMesh->wrapModeV = Mesh::TexWrapMode::mirror;
else currentMesh->wrapModeV = Mesh::TexWrapMode::mirrorClamp;
}
else {
if (smoothV) currentMesh->wrapModeV = Mesh::TexWrapMode::repeat;
else currentMesh->wrapModeV = Mesh::TexWrapMode::repeatClamp;
}
}
if (currentMesh->microTexture) {
static const float microTexScale[] = { 2.f, 4.f, 16.f, 256.f };
currentMesh->microTextureID = ph.MicroTextureID();
currentMesh->microTextureScale = microTexScale[ph.MicroTextureMinLOD()];
}
}
}
void CNew3D::CacheModel(Model *m, const UINT32 *data)
{
if (data == NULL)
return;
UINT16 texCoords[4][2];
PolyHeader ph;
UINT64 lastHash = -1;
SortingMesh* currentMesh = nullptr;
std::unordered_map<UINT64, SortingMesh> sMap;
ph = data;
int numTriangles = ph.NumTrianglesTotal();
// Cache all polygons
do {
R3DPoly p; // current polygon
float uvScale;
if (ph.header[6] == 0) {
break;
}
// create a hash value based on poly attributes -todo add more attributes
auto hash = ph.Hash();
if (hash != lastHash) {
if (sMap.count(hash) == 0) {
currentMesh = &sMap.insert({hash, SortingMesh()}).first->second;
//make space for our vertices
currentMesh->verts.reserve(numTriangles * 3);
//set mesh values
SetMeshValues(currentMesh, ph);
}
else
currentMesh = &sMap[hash];
}
// Obtain basic polygon parameters
p.number = ph.NumVerts();
uvScale = ph.UVScale();
ph.FaceNormal(p.faceNormal);
// Fetch reused vertices according to bitfield, then new verts
int j = 0;
for (int i = 0; i < 4; i++) // up to 4 reused vertices
{
if (ph.SharedVertex(i))
{
p.v[j] = m_prev[i];
texCoords[j][0] = m_prevTexCoords[i][0];
texCoords[j][1] = m_prevTexCoords[i][1];
//check if we need to recalc tex coords - will only happen if tex tiles are different + sharing vertices
if (hash != lastHash) {
if (currentMesh->textured) {
GetCoordinates(currentMesh->width, currentMesh->height, texCoords[j][0], texCoords[j][1], uvScale, p.v[j].texcoords[0], p.v[j].texcoords[1]);
}
}
j++;
}
}
lastHash = hash;
// copy face attributes
if (!ph.PolyColor()) {
int colorIdx = ph.ColorIndex();
p.faceColour[2] = (m_polyRAM[m_colorTableAddr + colorIdx] & 0xFF);
p.faceColour[1] = ((m_polyRAM[m_colorTableAddr + colorIdx] >> 8) & 0xFF);
p.faceColour[0] = ((m_polyRAM[m_colorTableAddr + colorIdx] >> 16) & 0xFF);
}
else {
p.faceColour[0] = ((ph.header[4] >> 24));
p.faceColour[1] = ((ph.header[4] >> 16) & 0xFF);
p.faceColour[2] = ((ph.header[4] >> 8) & 0xFF);
}
p.faceColour[3] = ph.Transparency();
if (ph.Discard1() && !ph.Discard2()) {
p.faceColour[3] /= 2;
}
// if we have flat shading, we can't re-use normals from shared vertices
for (int i = 0; i < p.number && !ph.SmoothShading(); i++) {
p.v[i].normal[0] = p.faceNormal[0];
p.v[i].normal[1] = p.faceNormal[1];
p.v[i].normal[2] = p.faceNormal[2];
}
UINT32* vData = ph.StartOfData(); // vertex data starts here
// remaining vertices are new and defined here
for (; j < p.number; j++)
{
// Fetch vertices
UINT32 ix = vData[0];
UINT32 iy = vData[1];
UINT32 iz = vData[2];
UINT32 it = vData[3];
// Decode vertices
p.v[j].pos[0] = (((INT32)ix) >> 8) * m_vertexFactor;
p.v[j].pos[1] = (((INT32)iy) >> 8) * m_vertexFactor;
p.v[j].pos[2] = (((INT32)iz) >> 8) * m_vertexFactor;
p.v[j].pos[3] = 1.0f;
// Per vertex normals
if (ph.SmoothShading()) {
p.v[j].normal[0] = BYTE_TO_FLOAT((INT8)(ix & 0xFF));
p.v[j].normal[1] = BYTE_TO_FLOAT((INT8)(iy & 0xFF));
p.v[j].normal[2] = BYTE_TO_FLOAT((INT8)(iz & 0xFF));
}
if (ph.FixedShading() && !ph.SmoothShading()) { // fixed shading seems to be disabled if actual normals are set
//==========
float shade;
//==========
if (!m_shadeIsSigned) {
shade = (ix & 0xFF) * (float)(1.0 / 255.0);
}
else {
shade = BYTE_TO_FLOAT((INT8)(ix & 0xFF));
}
p.v[j].fixedShade = shade;
}
float texU = 0;
float texV = 0;
// tex coords
if (currentMesh->textured) {
GetCoordinates(currentMesh->width, currentMesh->height, (UINT16)(it >> 16), (UINT16)(it & 0xFFFF), uvScale, texU, texV);
}
p.v[j].texcoords[0] = texU;
p.v[j].texcoords[1] = texV;
//cache un-normalised tex coordinates
texCoords[j][0] = (UINT16)(it >> 16);
texCoords[j][1] = (UINT16)(it & 0xFFFF);
vData += 4;
}
// check if we need to double up vertices for two sided lighting
if (ph.DoubleSided() && !ph.Discard()) {
R3DPoly tempP = p;
// flip normals
V3::inverse(tempP.faceNormal);
for (int i2 = 0; i2 < tempP.number; i2++) {
V3::inverse(tempP.v[i2].normal);
}
CopyVertexData(tempP, currentMesh->verts);
}
// Copy this polygon into the model buffer
if (!ph.Discard()) {
CopyVertexData(p, currentMesh->verts);
}
// Copy current vertices into previous vertex array
for (int i = 0; i < 4; i++) {
m_prev[i] = p.v[i];
m_prevTexCoords[i][0] = texCoords[i][0];
m_prevTexCoords[i][1] = texCoords[i][1];
}
} while (ph.NextPoly());
//sorted the data, now copy to main data structures
// we know how many meshes we have to reserve appropriate space
m->meshes->reserve(sMap.size());
for (auto& it : sMap) {
if (m->dynamic) {
// calculate VBO values for current mesh
it.second.vboOffset = (int)m_polyBufferRam.size() + MAX_ROM_VERTS;
it.second.vertexCount = (int)it.second.verts.size();
// copy poly data to main buffer
m_polyBufferRam.insert(m_polyBufferRam.end(), it.second.verts.begin(), it.second.verts.end());
}
else {
// calculate VBO values for current mesh
it.second.vboOffset = (int)m_polyBufferRom.size();
it.second.vertexCount = (int)it.second.verts.size();
// copy poly data to main buffer
m_polyBufferRom.insert(m_polyBufferRom.end(), it.second.verts.begin(), it.second.verts.end());
}
//copy the temp mesh into the model structure
//this will lose the associated vertex data, which is now copied to the main buffer anyway
m->meshes->push_back(it.second);
}
}
bool CNew3D::IsDynamicModel(UINT32 *data)
{
if (data == NULL) {
return false;
}
PolyHeader p(data);
do {
if ((p.header[1] & 2) == 0) { // model has rgb colour palette
return true;
}
if (p.header[6] == 0) {
break;
}
} while (p.NextPoly());
return false;
}
bool CNew3D::IsVROMModel(UINT32 modelAddr)
{
return modelAddr >= 0x100000;
}
void CNew3D::CalcFrustumPlanes(Plane p[5], const float* matrix)
{
// Left Plane
p[0].a = matrix[3] + matrix[0];
p[0].b = matrix[7] + matrix[4];
p[0].c = matrix[11] + matrix[8];
p[0].d = matrix[15] + matrix[12];
p[0].Normalise();
// Right Plane
p[1].a = matrix[3] - matrix[0];
p[1].b = matrix[7] - matrix[4];
p[1].c = matrix[11] - matrix[8];
p[1].d = matrix[15] - matrix[12];
p[1].Normalise();
// Bottom Plane
p[2].a = matrix[3] + matrix[1];
p[2].b = matrix[7] + matrix[5];
p[2].c = matrix[11] + matrix[9];
p[2].d = matrix[15] + matrix[13];
p[2].Normalise();
// Top Plane
p[3].a = matrix[3] - matrix[1];
p[3].b = matrix[7] - matrix[5];
p[3].c = matrix[11] - matrix[9];
p[3].d = matrix[15] - matrix[13];
p[3].Normalise();
// Front Plane
p[4].a = 0.f;
p[4].b = 0.f;
p[4].c = -1.f;
p[4].d = 0.f;
}
void CNew3D::CalcBox(float distance, BBox& box)
{
//bottom left front
box.points[0][0] = -distance;
box.points[0][1] = -distance;
box.points[0][2] = distance;
box.points[0][3] = 1.f;
//bottom left back
box.points[1][0] = -distance;
box.points[1][1] = -distance;
box.points[1][2] = -distance;
box.points[1][3] = 1.f;
//bottom right back
box.points[2][0] = distance;
box.points[2][1] = -distance;
box.points[2][2] = -distance;
box.points[2][3] = 1.f;
//bottom right front
box.points[3][0] = distance;
box.points[3][1] = -distance;
box.points[3][2] = distance;
box.points[3][3] = 1.f;
//top left front
box.points[4][0] = -distance;
box.points[4][1] = distance;
box.points[4][2] = distance;
box.points[4][3] = 1.f;
//top left back
box.points[5][0] = -distance;
box.points[5][1] = distance;
box.points[5][2] = -distance;
box.points[5][3] = 1.f;
//top right back
box.points[6][0] = distance;
box.points[6][1] = distance;
box.points[6][2] = -distance;
box.points[6][3] = 1.f;
//top right front
box.points[7][0] = distance;
box.points[7][1] = distance;
box.points[7][2] = distance;
box.points[7][3] = 1.f;
}
void CNew3D::MultVec(const float matrix[16], const float in[4], float out[4])
{
for (int i = 0; i < 4; i++) {
out[i] =
in[0] * matrix[0 * 4 + i] +
in[1] * matrix[1 * 4 + i] +
in[2] * matrix[2 * 4 + i] +
in[3] * matrix[3 * 4 + i];
}
}
void CNew3D::TransformBox(const float *m, BBox& box)
{
for (int i = 0; i < 8; i++) {
float v[4];
MultVec(m, box.points[i], v);
box.points[i][0] = v[0];
box.points[i][1] = v[1];
box.points[i][2] = v[2];
}
}
Clip CNew3D::ClipBox(const BBox& box, Plane planes[5])
{
int count = 0;
for (int i = 0; i < 8; i++) {
int temp = 0;
for (int j = 0; j < 5; j++) {
if (planes[j].DistanceToPoint(box.points[i]) >= 0.f) {
temp++;
}
}
if (temp == 5) count++; // point is inside all 4 frustum planes
}
if (count == 8) return Clip::INSIDE;
if (count > 0) return Clip::INTERCEPT;
//if we got here all points are outside of the view frustum
//check for all points being side same of any plane, means box outside of view
for (int i = 0; i < 5; i++) {
int temp = 0;
for (int j = 0; j < 8; j++) {
if (planes[i].DistanceToPoint(box.points[j]) >= 0.f) {
temp++;
}
}
if (temp == 0) {
return Clip::OUTSIDE;
}
}
//if we got here, box is traversing view frustum
return Clip::INTERCEPT;
}
void CNew3D::CalcBoxExtents(const BBox& box)
{
for (int i = 0; i < 8; i++) {
if (box.points[i][2] < 0.f) {
m_nfPairs[m_currentPriority].zNear = std::max(box.points[i][2], m_nfPairs[m_currentPriority].zNear);
m_nfPairs[m_currentPriority].zFar = std::min(box.points[i][2], m_nfPairs[m_currentPriority].zFar);
}
}
}
void CNew3D::ClipPolygon(ClipPoly& clipPoly, Plane planes[5])
{
//============
ClipPoly temp;
ClipPoly *in;
ClipPoly *out;
//============
in = &clipPoly;
out = &temp;
for (int i = 0; i < 4; i++) {
//=================
bool currentIn;
float currentDot;
//=================
currentDot = planes[i].DotProduct(in->list[0].pos);
currentIn = (currentDot + planes[i].d) >= 0.f;
out->count = 0;
for (int j = 0; j < in->count; j++) {
if (currentIn) {
out->list[out->count] = in->list[j];
out->count++;
}
int nextIndex = j + 1;
if (nextIndex >= in->count) {
nextIndex = 0;
}
float nextDot = planes[i].DotProduct(in->list[nextIndex].pos);
bool nextIn = (nextDot + planes[i].d) >= 0.f;
// we have an intersection
if (currentIn != nextIn) {
float u = (currentDot + planes[i].d) / (currentDot - nextDot);
const float* p1 = in->list[j].pos;
const float* p2 = in->list[nextIndex].pos;
out->list[out->count].pos[0] = p1[0] + ((p2[0] - p1[0]) * u);
out->list[out->count].pos[1] = p1[1] + ((p2[1] - p1[1]) * u);
out->list[out->count].pos[2] = p1[2] + ((p2[2] - p1[2]) * u);
out->count++;
}
currentDot = nextDot;
currentIn = nextIn;
}
std::swap(in, out);
}
}
void CNew3D::ClipModel(const Model *m)
{
//===============================
ClipPoly clipPoly;
std::vector<FVertex>* vertices;
int offset;
//===============================
if (m->dynamic) {
vertices = &m_polyBufferRam;
offset = MAX_ROM_VERTS;
}
else {
vertices = &m_polyBufferRom;
offset = 0;
}
for (const auto &mesh : *m->meshes) {
int start = mesh.vboOffset - offset;
for (int i = 0; i < mesh.vertexCount; i += m_numPolyVerts) { // inc to next poly
for (int j = 0; j < m_numPolyVerts; j++) {
MultVec(m->modelMat, (*vertices)[start + i + j].pos, clipPoly.list[j].pos); // copy all 3 of 4 our transformed vertices into our clip poly struct
}
clipPoly.count = m_numPolyVerts;
ClipPolygon(clipPoly, m_planes);
for (int j = 0; j < clipPoly.count; j++) {
if (clipPoly.list[j].pos[2] < 0.f) {
m_nfPairs[m_currentPriority].zNear = std::max(clipPoly.list[j].pos[2], m_nfPairs[m_currentPriority].zNear);
m_nfPairs[m_currentPriority].zFar = std::min(clipPoly.list[j].pos[2], m_nfPairs[m_currentPriority].zFar);
}
}
}
}
}
void CNew3D::CalcViewport(Viewport* vp, float near, float far)
{
if (far > 1e30f) {
far = near * 1000000.f; // fix for ocean hunter which passes some FLT_MAX for a few matrices. HW must have some safe guard for these
}
if (near < far / 1000000.f) {
near = far / 1000000.f; // if we get really close to zero somehow, we will have almost no depth precision
}
float l = near * vp->angle_left; // we need to calc the shape of the projection frustum for culling
float r = near * vp->angle_right;
float t = near * vp->angle_top;
float b = near * vp->angle_bottom;
vp->projectionMatrix.LoadIdentity(); // reset matrix
if ((vp->vpX == 0) && (vp->vpWidth >= 495) && (vp->vpY == 0) && (vp->vpHeight >= 383)) {
/*
* Compute aspect ratio correction factor. "Window" refers to the full GL
* viewport (i.e., totalXRes x totalYRes). "Viewable area" is the effective
* Model 3 screen (xRes x yRes). In non-wide-screen, non-stretch mode, this
* is intended to replicate the 496x384 display and may in general be
* smaller than the window. The rest of the window appears to have a
* border, which is created by a scissor box.
*
* In wide-screen mode, we want to expand the frustum horizontally to fill
* the window. We want the aspect ratio to be correct. To accomplish this,
* the viewable area is set *the same* as in non-wide-screen mode (e.g.,
* often smaller than the window) but glScissor() is set by the OSD layer's
* screen setup code to reveal the entire window.
*
* In stretch mode, the window and viewable area are both set the same,
* which means there will be no aspect ratio correction and the display
* will stretch to fill the entire window while keeping the view frustum
* the same as a 496x384 Model 3 display. The display will be distorted.
*/
float windowAR = (float)m_totalXRes / (float)m_totalYRes;
float viewableAreaAR = (float)m_xRes / (float)m_yRes;
// Will expand horizontal frustum planes only in non-stretch mode (wide-
// screen and non-wide-screen modes have identical resolution parameters
// and only their scissor box differs)
float correction = windowAR / viewableAreaAR;
vp->x = 0;
vp->y = m_yOffs + (int)((float)(384 - (vp->vpY + vp->vpHeight))*m_yRatio);
vp->width = m_totalXRes;
vp->height = (int)((float)vp->vpHeight*m_yRatio);
vp->projectionMatrix.Frustum(l*correction, r*correction, b, t, near, far);
}
else {
vp->x = m_xOffs + (int)((float)vp->vpX*m_xRatio);
vp->y = m_yOffs + (int)((float)(384 - (vp->vpY + vp->vpHeight))*m_yRatio);
vp->width = (int)((float)vp->vpWidth*m_xRatio);
vp->height = (int)((float)vp->vpHeight*m_yRatio);
vp->projectionMatrix.Frustum(l, r, b, t, near, far);
}
}
void CNew3D::SetSunClamp(bool enable)
{
m_sunClamp = enable;
}
void CNew3D::SetSignedShade(bool enable)
{
m_shadeIsSigned = enable;
}
float CNew3D::GetLosValue(int layer)
{
// we always write to the 'back' buffer, and the software reads from the front
// then they get swapped
std::lock_guard<std::mutex> guard(m_losMutex);
return m_losFront->value[layer];
}
void CNew3D::TranslateLosPosition(int inX, int inY, int& outX, int& outY)
{
// remap real3d 496x384 to our new viewport
inY = 384 - inY;
outX = m_xOffs + int(inX * m_xRatio);
outY = m_yOffs + int(inY * m_yRatio);
}
bool CNew3D::ProcessLos(int priority)
{
for (const auto &n : m_nodes) {
if (n.viewport.priority == priority) {
if (n.viewport.losPosX || n.viewport.losPosY) {
int losX, losY;
TranslateLosPosition(n.viewport.losPosX, n.viewport.losPosY, losX, losY);
float depth;
glReadPixels(losX, losY, 1, 1, GL_DEPTH_COMPONENT, GL_FLOAT, &depth);
depth = 2.0f * depth - 1.0f;
float zNear = m_nfPairs[priority].zNear;
float zFar = m_nfPairs[priority].zFar;
float zVal = 2.0f * zNear * zFar / (zFar + zNear - depth * (zFar - zNear));
// real3d test program indicates that return values are 1/zVal
zVal = 1.0f / zVal;
GLubyte stencilVal;
glReadPixels(losX, losY, 1, 1, GL_STENCIL_INDEX, GL_UNSIGNED_BYTE, &stencilVal);
// if the stencil val is zero that means we've hit sky or whatever, if it hits a 1 we've hit geometry
// the real3d returns 1 in the top bit of the float if the line of sight test passes (ie doesn't hit geometry)
auto zValP = reinterpret_cast<unsigned char*>(&zVal); // this is legal in c++, casting to int technically isn't
if (stencilVal == 0) {
zValP[0] |= 1; // set first bit to 1
}
else {
zValP[0] &= 0xFE; // set first bit to zero
}
m_losBack->value[priority] = zVal;
return true;
}
}
}
return false;
}
} // New3D