Supermodel/Src/Graphics/New3D/New3D.cpp

1793 lines
53 KiB
C++

#include "New3D.h"
#include "Vec.h"
#include <cmath>
#include <algorithm>
#include <limits>
#include <cstring>
#include <unordered_map>
#include "R3DFloat.h"
#include "Util/BitCast.h"
#define MAX_RAM_VERTS 300000
#define MAX_ROM_VERTS 1500000
#define BYTE_TO_FLOAT(B) ((2.0f * (B) + 1.0f) * (float)(1.0/255.0))
namespace New3D {
CNew3D::CNew3D(const Util::Config::Node &config, const std::string& gameName) :
m_r3dShader(config),
m_r3dScrollFog(config),
m_gameName(gameName),
m_textureBuffer(0),
m_vao(0)
{
m_cullingRAMLo = nullptr;
m_cullingRAMHi = nullptr;
m_polyRAM = nullptr;
m_vrom = nullptr;
m_textureRAM = nullptr;
m_sunClamp = true;
m_shadeIsSigned = true;
m_numPolyVerts = 3;
m_primType = GL_TRIANGLES;
if (config["QuadRendering"].ValueAs<bool>()) {
m_numPolyVerts = 4;
m_primType = GL_LINES_ADJACENCY;
}
m_r3dShader.LoadShader();
glUseProgram(0);
// setup our texture memory
glGenTextures(1, &m_textureBuffer);
glBindTexture(GL_TEXTURE_2D, m_textureBuffer);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexImage2D(GL_TEXTURE_2D, 0, GL_R16UI, 2048, 2048, 0, GL_RED_INTEGER, GL_UNSIGNED_SHORT, nullptr); // allocate storage
// setup up our vertex buffer memory
glGenVertexArrays(1, &m_vao);
glBindVertexArray(m_vao);
m_vbo.Create(GL_ARRAY_BUFFER, GL_DYNAMIC_DRAW, sizeof(FVertex) * (MAX_RAM_VERTS + MAX_ROM_VERTS));
m_vbo.Bind(true);
glEnableVertexAttribArray(m_r3dShader.GetVertexAttribPos("inVertex"));
glEnableVertexAttribArray(m_r3dShader.GetVertexAttribPos("inNormal"));
glEnableVertexAttribArray(m_r3dShader.GetVertexAttribPos("inTexCoord"));
glEnableVertexAttribArray(m_r3dShader.GetVertexAttribPos("inColour"));
glEnableVertexAttribArray(m_r3dShader.GetVertexAttribPos("inFaceNormal"));
glEnableVertexAttribArray(m_r3dShader.GetVertexAttribPos("inFixedShade"));
// before draw, specify vertex and index arrays with their offsets, offsetof is maybe evil ..
glVertexAttribPointer(m_r3dShader.GetVertexAttribPos("inVertex"), 4, GL_FLOAT, GL_FALSE, sizeof(FVertex), 0);
glVertexAttribPointer(m_r3dShader.GetVertexAttribPos("inNormal"), 3, GL_FLOAT, GL_FALSE, sizeof(FVertex), (void*)offsetof(FVertex, normal));
glVertexAttribPointer(m_r3dShader.GetVertexAttribPos("inTexCoord"), 2, GL_FLOAT, GL_FALSE, sizeof(FVertex), (void*)offsetof(FVertex, texcoords));
glVertexAttribPointer(m_r3dShader.GetVertexAttribPos("inColour"), 4, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(FVertex), (void*)offsetof(FVertex, faceColour));
glVertexAttribPointer(m_r3dShader.GetVertexAttribPos("inFaceNormal"), 3, GL_FLOAT, GL_FALSE, sizeof(FVertex), (void*)offsetof(FVertex, faceNormal));
glVertexAttribPointer(m_r3dShader.GetVertexAttribPos("inFixedShade"), 1, GL_FLOAT, GL_FALSE, sizeof(FVertex), (void*)offsetof(FVertex, fixedShade));
glBindVertexArray(0);
m_vbo.Bind(false);
}
CNew3D::~CNew3D()
{
m_vbo.Destroy();
if (m_vao) {
glDeleteVertexArrays(1, &m_vao);
m_vao = 0;
}
if (m_textureBuffer) {
glDeleteTextures(1, &m_textureBuffer);
m_textureBuffer = 0;
}
m_r3dShader.UnloadShader();
}
void CNew3D::AttachMemory(const UINT32 *cullingRAMLoPtr, const UINT32 *cullingRAMHiPtr, const UINT32 *polyRAMPtr, const UINT32 *vromPtr, const UINT16 *textureRAMPtr)
{
m_cullingRAMLo = cullingRAMLoPtr;
m_cullingRAMHi = cullingRAMHiPtr;
m_polyRAM = polyRAMPtr;
m_vrom = vromPtr;
m_textureRAM = textureRAMPtr;
}
void CNew3D::SetStepping(int stepping)
{
m_step = stepping;
if ((m_step != 0x10) && (m_step != 0x15) && (m_step != 0x20) && (m_step != 0x21)) {
m_step = 0x10;
}
if (m_step > 0x10) {
m_offset = 0; // culling nodes are 10 words
m_vertexFactor = (1.0f / 2048.0f); // vertices are in 13.11 format
}
else {
m_offset = 2; // 8 words
m_vertexFactor = (1.0f / 128.0f); // 17.7
}
}
bool CNew3D::Init(unsigned xOffset, unsigned yOffset, unsigned xRes, unsigned yRes, unsigned totalXResParam, unsigned totalYResParam)
{
// Resolution and offset within physical display area
m_xRatio = xRes * (float)(1.0 / 496.0);
m_yRatio = yRes * (float)(1.0 / 384.0);
m_xOffs = xOffset;
m_yOffs = yOffset;
m_xRes = xRes;
m_yRes = yRes;
m_totalXRes = totalXResParam;
m_totalYRes = totalYResParam;
m_r3dFrameBuffers.DestroyFBO(); // remove any old ones if created
m_r3dFrameBuffers.CreateFBO(totalXResParam, totalYResParam);
return OKAY;
}
void CNew3D::UploadTextures(unsigned level, unsigned x, unsigned y, unsigned width, unsigned height)
{
glBindTexture(GL_TEXTURE_2D, m_textureBuffer);
glPixelStorei(GL_UNPACK_ALIGNMENT, 2);
for (unsigned i = 0; i < height; i++) {
glTexSubImage2D(GL_TEXTURE_2D, 0, x, y + i, width, 1, GL_RED_INTEGER, GL_UNSIGNED_SHORT, m_textureRAM + ((y + i) * 2048) + x);
}
}
void CNew3D::DrawScrollFog()
{
// this is my best guess at the logic based upon what games are doing
//
// ocean hunter - every viewport has scroll fog values set. Must start with lowest priority layers as the higher ones sometimes are garbage
// scud race - first viewports in priority layer missing scroll values. The latter ones all contain valid scroll values.
// daytona - doesn't seem to use scroll fog at all. Will set scroll values for the first viewports, the end ones contain no scroll values
// vf3 - first viewport only has it set. But set with highest select value ?? Rest of the viewports in priority layer contain a lower select value
// sega bassfishing - first viewport in priority 1 sets scroll value. The rest all contain the wrong value + a higher select value ..
// spikeout final - 2nd viewport in the priority layer has scroll values set, none of the others do. It also uses the highest select value
float rgba[4];
for (int i = 0; i < 4; i++) {
for (auto &n : m_nodes) {
if (n.viewport.priority == i) {
if (n.viewport.scrollFog != 0.f) {
rgba[0] = n.viewport.fogParams[0];
rgba[1] = n.viewport.fogParams[1];
rgba[2] = n.viewport.fogParams[2];
rgba[3] = n.viewport.scrollFog;
goto CheckScroll;
}
}
}
}
return;
CheckScroll:
for (int i = 0; i < 4; i++) {
for (auto &n : m_nodes) {
if (n.viewport.priority == i) {
//if we have a fog density value
if (n.viewport.fogParams[3] != 0.f) {
if (rgba[0] == n.viewport.fogParams[0] &&
rgba[1] == n.viewport.fogParams[1] &&
rgba[2] == n.viewport.fogParams[2]) {
glViewport(n.viewport.x, n.viewport.y, n.viewport.width, n.viewport.height);
m_r3dScrollFog.DrawScrollFog(rgba, n.viewport.scrollAtt, n.viewport.fogParams[6], n.viewport.spotFogColor, n.viewport.spotEllipse);
return;
}
}
}
}
}
}
bool CNew3D::RenderScene(int priority, bool renderOverlay, Layer layer)
{
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, m_textureBuffer);
bool hasOverlay = false; // (high priority polys)
for (auto &n : m_nodes) {
if (n.viewport.priority != priority || n.models.empty()) {
continue;
}
CalcViewport(&n.viewport, std::abs(m_nfPairs[priority].zNear*0.96f), std::abs(m_nfPairs[priority].zFar*1.05f)); // make planes 5% bigger
glViewport(n.viewport.x, n.viewport.y, n.viewport.width, n.viewport.height);
m_r3dShader.SetViewportUniforms(&n.viewport);
for (auto &m : n.models) {
bool matrixLoaded = false;
if (m.meshes->empty()) {
continue;
}
for (auto &mesh : *m.meshes) {
if (mesh.highPriority) {
hasOverlay = true;
}
if (!mesh.Render(layer)) continue;
if (mesh.highPriority != renderOverlay) continue;
if (!matrixLoaded) {
m_r3dShader.SetModelStates(&m);
matrixLoaded = true; // do this here to stop loading matrices we don't need. Ie when rendering non transparent etc
}
m_r3dShader.SetMeshUniforms(&mesh);
glDrawArrays(m_primType, mesh.vboOffset, mesh.vertexCount);
}
}
}
return hasOverlay;
}
bool CNew3D::SkipLayer(int layer)
{
for (const auto &n : m_nodes) {
if (n.viewport.priority == layer) {
if (!n.models.empty()) {
return false;
}
}
}
return true;
}
void CNew3D::SetRenderStates()
{
m_vbo.Bind(true);
glBindVertexArray(m_vao);
m_r3dShader.SetShader(true);
glDepthFunc (GL_LEQUAL);
glEnable (GL_DEPTH_TEST);
glDepthMask (GL_TRUE);
glActiveTexture (GL_TEXTURE0);
glDisable (GL_CULL_FACE); // we'll emulate this in the shader
glDisable (GL_BLEND);
glStencilFunc (GL_EQUAL, 0, 0xFF); // basically stencil test passes if the value is zero
glStencilOp (GL_KEEP, GL_INCR, GL_INCR); // if the stencil test passes, we increment the value
glStencilMask (0xFF);
}
void CNew3D::DisableRenderStates()
{
m_vbo.Bind(false);
glBindVertexArray(0);
m_r3dShader.SetShader(false);
glDisable(GL_STENCIL_TEST);
}
void CNew3D::RenderFrame(void)
{
for (int i = 0; i < 4; i++) {
m_nfPairs[i].zNear = -std::numeric_limits<float>::max();
m_nfPairs[i].zFar = std::numeric_limits<float>::max();
}
{
std::lock_guard<std::mutex> guard(m_losMutex);
std::swap(m_losBack, m_losFront);
for (int i = 0; i < 4; i++) {
m_losBack->value[i] = 0;
}
}
// release any resources from last frame
m_polyBufferRam.clear(); // clear dynamic model memory buffer
m_nodes.clear(); // memory will grow during the object life time, that's fine, no need to shrink to fit
m_modelMat.Release(); // would hope we wouldn't need this but no harm in checking
m_nodeAttribs.Reset();
RenderViewport(0x800000); // build model structure
DrawScrollFog(); // fog layer if applicable must be drawn here
m_vbo.Bind(true);
m_vbo.BufferSubData(MAX_ROM_VERTS*sizeof(FVertex), m_polyBufferRam.size()*sizeof(FVertex), m_polyBufferRam.data()); // upload all the dynamic data to GPU in one go
if (!m_polyBufferRom.empty()) {
// sync rom memory with vbo
int romBytes = (int)m_polyBufferRom.size() * sizeof(FVertex);
int vboBytes = m_vbo.GetSize();
int size = romBytes - vboBytes;
if (size) {
//check we haven't blown up the memory buffers
//we will lose rom models for 1 frame is this happens, not the end of the world, as probably won't ever happen anyway
if (m_polyBufferRom.size() >= MAX_ROM_VERTS) {
m_polyBufferRom.clear();
m_romMap.clear();
m_vbo.Reset();
}
else {
m_vbo.AppendData(size, &m_polyBufferRom[vboBytes / sizeof(FVertex)]);
}
}
}
m_r3dFrameBuffers.SetFBO(Layer::trans12);
glClear(GL_COLOR_BUFFER_BIT); // wipe both trans layers
for (int pri = 0; pri <= 3; pri++) {
if (SkipLayer(pri)) continue;
for (int i = 0; i < 2; i++) {
bool renderOverlay = (i == 1);
m_r3dFrameBuffers.SetFBO(Layer::colour);
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
SetRenderStates();
m_r3dShader.DiscardAlpha(true); // discard all translucent pixels in opaque pass
bool hasOverlay = RenderScene(pri, renderOverlay, Layer::colour);
if (!renderOverlay) {
ProcessLos(pri);
}
DisableRenderStates();
m_r3dFrameBuffers.DrawOverTransLayers(); // mask trans layer with opaque pixels
m_r3dFrameBuffers.CompositeBaseLayer(); // copy opaque pixels to back buffer
SetRenderStates();
glDepthFunc(GL_LESS); // alpha polys seem to use gl_less (ocean hunter)
m_r3dShader.DiscardAlpha (false); // render only translucent pixels
m_r3dFrameBuffers.StoreDepth (); // save depth buffer for 1st trans pass
m_r3dFrameBuffers.SetFBO (Layer::trans1);
RenderScene (pri, renderOverlay, Layer::trans1);
m_r3dFrameBuffers.RestoreDepth (); // restore depth buffer, trans layers don't seem to depth test against each other
m_r3dFrameBuffers.SetFBO (Layer::trans2);
RenderScene (pri, renderOverlay, Layer::trans2);
DisableRenderStates();
if (!hasOverlay) break; // no high priority polys
}
}
m_r3dFrameBuffers.CompositeAlphaLayer();
}
void CNew3D::BeginFrame(void)
{
}
void CNew3D::EndFrame(void)
{
}
/******************************************************************************
Real3D Address Translation
Functions that interpret word-granular Real3D addresses and return pointers.
******************************************************************************/
// Translates 24-bit culling RAM addresses
const UINT32* CNew3D::TranslateCullingAddress(UINT32 addr)
{
addr &= 0x00FFFFFF; // caller should have done this already
if ((addr >= 0x800000) && (addr < 0x840000)) {
return &m_cullingRAMHi[addr & 0x3FFFF];
}
else if (addr < 0x100000) {
return &m_cullingRAMLo[addr];
}
return NULL;
}
// Translates model references
const UINT32* CNew3D::TranslateModelAddress(UINT32 modelAddr)
{
modelAddr &= 0x00FFFFFF; // caller should have done this already
if (modelAddr < 0x100000) {
return &m_polyRAM[modelAddr];
}
else {
return &m_vrom[modelAddr];
}
}
bool CNew3D::DrawModel(UINT32 modelAddr)
{
const UINT32* modelAddress;
bool cached = false;
Model* m;
modelAddress = TranslateModelAddress(modelAddr);
// create a new model to push onto the vector
m_nodes.back().models.emplace_back();
// get the last model in the array
m = &m_nodes.back().models.back();
if (IsVROMModel(modelAddr) && !IsDynamicModel((UINT32*)modelAddress)) {
// try to find meshes in the rom cache
m->meshes = m_romMap[modelAddr]; // will create an entry with a null pointer if empty
if (m->meshes) {
cached = true;
}
else {
m->meshes = std::make_shared<std::vector<Mesh>>();
m_romMap[modelAddr] = m->meshes; // store meshes in our rom map here
}
m->dynamic = false;
}
else {
m->meshes = std::make_shared<std::vector<Mesh>>();
}
// copy current model matrix
for (int i = 0; i < 16; i++) {
m->modelMat[i] = m_modelMat.currentMatrix[i];
}
// update texture offsets
m->textureOffsetX = m_nodeAttribs.currentTexOffsetX;
m->textureOffsetY = m_nodeAttribs.currentTexOffsetY;
m->page = m_nodeAttribs.currentPage;
m->scale = m_nodeAttribs.currentModelScale;
if (!cached) {
CacheModel(m, modelAddress);
}
if (m_nodeAttribs.currentClipStatus != Clip::INSIDE) {
ClipModel(m); // not storing clipped values, only working out the Z range
}
return true;
}
/*
0x00: x------- -------- -------- -------- Is UF ref
-x------ -------- -------- -------- Is 3D model
--x----- -------- -------- -------- Is point
---x---- -------- -------- -------- Is point ref
----x--- -------- -------- -------- Is animation
-----x-- -------- -------- -------- Is billboard
------x- -------- -------- -------- Child is billboard
-------x -------- -------- -------- Extra child pointer needed
-------- -----xxx xxxxxx-- -------- Node ID
-------- -------- -------- x------- Reset matrix
-------- -------- -------- -x------ Use child pointer
-------- -------- -------- --x----- Use sibling pointer
-------- -------- -------- ---x---- No matrix
-------- -------- -------- ----x--- Indirect child
-------- -------- -------- -----x-- Valid color table
-------- -------- -------- ------xx Node type(0 = viewport, 1 = root node, 2 = culling node)
0x01, 0x02 only present on Step 2 +
0x01: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx Model scale(float)
0x02 : -------- -------- x------- -------- Texture replace
-------- -------- -x------ -------- Switch bank
-------- -------- --xxxxxx x------- X offset
-------- -------- -------- -xxxxxxx Y offset
0x03 : xxxxxxxx xxxxx--- -------- -------- Color table address 1
-------- -----xxx xxxx---- -------- LOD table pointer
-------- -------- ----xxxx xxxxxxxx Node matrix
*/
void CNew3D::DescendCullingNode(UINT32 addr)
{
enum class NodeType { undefined = -1, viewport = 0, rootNode = 1, cullingNode = 2 };
const UINT32 *node, *lodTable;
UINT32 matrixOffset, child1Ptr, sibling2Ptr;
BBox bbox;
UINT16 uCullRadius;
float fCullRadius;
UINT16 uBlendRadius;
float fBlendRadius;
UINT8 lodTablePointer;
NodeType nodeType;
bool resetMatrix;
if (m_nodeAttribs.StackLimit()) {
return;
}
node = TranslateCullingAddress(addr);
if (NULL == node) {
return;
}
// Extract known fields
nodeType = (NodeType)(node[0x00] & 3);
child1Ptr = node[0x07 - m_offset] & 0x7FFFFFF; // mask colour table bits
sibling2Ptr = node[0x08 - m_offset] & 0x1FFFFFF; // mask colour table bits
matrixOffset = node[0x03 - m_offset] & 0xFFF;
resetMatrix = (node[0x0] & 0x80) > 0;
lodTablePointer = (node[0x03 - m_offset] >> 12) & 0x7F;
// check our node type
if (nodeType == NodeType::viewport) {
return; // viewport nodes aren't rendered
}
// parse siblings
if ((node[0x00] & 0x07) != 0x06) { // colour table seems to indicate no siblings
if (!(sibling2Ptr & 0x1000000) && sibling2Ptr) {
DescendCullingNode(sibling2Ptr); // no need to mask bit, would already be zero
}
}
if ((node[0x00] & 0x04)) {
m_colorTableAddr = ((node[0x03 - m_offset] >> 19) << 0) | ((node[0x07 - m_offset] >> 28) << 13) | ((node[0x08 - m_offset] >> 25) << 17);
m_colorTableAddr &= 0x000FFFFF; // clamp to 4MB (in words) range
}
m_nodeAttribs.Push(); // save current attribs
if (!m_offset) { // Step 1.5+
float modelScale = Util::Uint32AsFloat(node[1]);
if (modelScale > std::numeric_limits<float>::min()) {
m_nodeAttribs.currentModelScale = modelScale;
}
// apply texture offsets, else retain current ones
if ((node[0x02] & 0x8000)) {
int tx = 32 * ((node[0x02] >> 7) & 0x3F);
int ty = 32 * (node[0x02] & 0x1F);
m_nodeAttribs.currentTexOffsetX = tx;
m_nodeAttribs.currentTexOffsetY = ty;
m_nodeAttribs.currentPage = (node[0x02] & 0x4000) >> 14;
}
}
// Apply matrix and translation
m_modelMat.PushMatrix();
// apply translation vector
if (node[0x00] & 0x10) {
float x = Util::Uint32AsFloat(node[0x04 - m_offset]);
float y = Util::Uint32AsFloat(node[0x05 - m_offset]);
float z = Util::Uint32AsFloat(node[0x06 - m_offset]);
m_modelMat.Translate(x, y, z);
}
// multiply matrix, if specified
else if (matrixOffset) {
MultMatrix(matrixOffset,m_modelMat);
}
if (resetMatrix) {
ResetMatrix(m_modelMat);
}
uCullRadius = node[9 - m_offset] & 0xFFFF;
fCullRadius = R3DFloat::GetFloat16(uCullRadius);
uBlendRadius = node[9 - m_offset] >> 16;
fBlendRadius = R3DFloat::GetFloat16(uBlendRadius);
if (m_nodeAttribs.currentClipStatus != Clip::INSIDE) {
if (uCullRadius != R3DFloat::Pro16BitMax) {
CalcBox(fCullRadius, bbox);
TransformBox(m_modelMat, bbox);
m_nodeAttribs.currentClipStatus = ClipBox(bbox, m_planes);
if (m_nodeAttribs.currentClipStatus == Clip::INSIDE) {
CalcBoxExtents(bbox);
}
}
else {
m_nodeAttribs.currentClipStatus = Clip::NOT_SET;
}
}
if (m_nodeAttribs.currentClipStatus != Clip::OUTSIDE && fCullRadius > R3DFloat::Pro16BitFltMin) {
// Descend down first link
if ((node[0x00] & 0x08)) // 4-element LOD table
{
lodTable = TranslateCullingAddress(child1Ptr);
if (NULL != lodTable) {
if ((node[0x03 - m_offset] & 0x20000000)) {
DescendCullingNode(lodTable[0] & 0xFFFFFF);
}
else {
DrawModel(lodTable[0] & 0xFFFFFF); //TODO
}
}
}
else {
DescendNodePtr(child1Ptr);
}
}
m_modelMat.PopMatrix();
// Restore old texture offsets
m_nodeAttribs.Pop();
}
void CNew3D::DescendNodePtr(UINT32 nodeAddr)
{
// Ignore null links
if ((nodeAddr & 0x00FFFFFF) == 0) {
return;
}
switch ((nodeAddr >> 24) & 0x5) // pointer type encoded in upper 8 bits
{
case 0x00:
DescendCullingNode(nodeAddr & 0xFFFFFF);
break;
case 0x01:
DrawModel(nodeAddr & 0xFFFFFF);
break;
case 0x04:
DescendPointerList(nodeAddr & 0xFFFFFF);
break;
default:
break;
}
}
void CNew3D::DescendPointerList(UINT32 addr)
{
const UINT32* list;
UINT32 nodeAddr;
int index;
list = TranslateCullingAddress(addr);
if (NULL == list) {
return;
}
index = 0;
while (true) {
if (list[index] & 0x01000000) {
break; // empty list
}
nodeAddr = list[index] & 0x00FFFFFF; // clear upper 8 bits to ensure this is processed as a culling node
DescendCullingNode(nodeAddr);
if (list[index] & 0x02000000) {
break; // list end
}
index++;
}
}
/******************************************************************************
Matrix Stack
******************************************************************************/
// Macro to generate column-major (OpenGL) index from y,x subscripts
#define CMINDEX(y,x) (x*4+y)
/*
* MultMatrix():
*
* Multiplies the matrix stack by the specified Real3D matrix. The matrix
* index is a 12-bit number specifying a matrix number relative to the base.
* The base matrix MUST be set up before calling this function.
*/
void CNew3D::MultMatrix(UINT32 matrixOffset, Mat4& mat)
{
GLfloat m[4*4];
const float *src = &m_matrixBasePtr[matrixOffset * 12];
if (m_matrixBasePtr == NULL) // LA Machineguns
return;
m[CMINDEX(0, 0)] = src[3];
m[CMINDEX(0, 1)] = src[4];
m[CMINDEX(0, 2)] = src[5];
m[CMINDEX(0, 3)] = src[0];
m[CMINDEX(1, 0)] = src[6];
m[CMINDEX(1, 1)] = src[7];
m[CMINDEX(1, 2)] = src[8];
m[CMINDEX(1, 3)] = src[1];
m[CMINDEX(2, 0)] = src[9];
m[CMINDEX(2, 1)] = src[10];
m[CMINDEX(2, 2)] = src[11];
m[CMINDEX(2, 3)] = src[2];
m[CMINDEX(3, 0)] = 0.0;
m[CMINDEX(3, 1)] = 0.0;
m[CMINDEX(3, 2)] = 0.0;
m[CMINDEX(3, 3)] = 1.0;
mat.MultMatrix(m);
}
/*
* InitMatrixStack():
*
* Initializes the modelview (model space -> view space) matrix stack and
* Real3D coordinate system. These are the last transforms to be applied (and
* the first to be defined on the stack) before projection.
*
* Model 3 games tend to define the following unusual base matrix:
*
* 0 0 -1 0
* 1 0 0 0
* 0 -1 0 0
* 0 0 0 1
*
* When this is multiplied by a column vector, the output is:
*
* -Z
* X
* -Y
* 1
*
* My theory is that the Real3D GPU accepts vectors in Z,X,Y order. The games
* store everything as X,Y,Z and perform the translation at the end. The Real3D
* also has Y and Z coordinates opposite of the OpenGL convention. This
* function inserts a compensating matrix to undo these things.
*
* NOTE: This function assumes we are in GL_MODELVIEW matrix mode.
*/
void CNew3D::InitMatrixStack(UINT32 matrixBaseAddr, Mat4& mat)
{
GLfloat m[4 * 4];
// This matrix converts vectors back from the weird Model 3 Z,X,Y ordering
// and also into OpenGL viewspace (-Y,-Z)
m[CMINDEX(0, 0)] = 0.0; m[CMINDEX(0, 1)] = 1.0; m[CMINDEX(0, 2)] = 0.0; m[CMINDEX(0, 3)] = 0.0;
m[CMINDEX(1, 0)] = 0.0; m[CMINDEX(1, 1)] = 0.0; m[CMINDEX(1, 2)] =-1.0; m[CMINDEX(1, 3)] = 0.0;
m[CMINDEX(2, 0)] =-1.0; m[CMINDEX(2, 1)] = 0.0; m[CMINDEX(2, 2)] = 0.0; m[CMINDEX(2, 3)] = 0.0;
m[CMINDEX(3, 0)] = 0.0; m[CMINDEX(3, 1)] = 0.0; m[CMINDEX(3, 2)] = 0.0; m[CMINDEX(3, 3)] = 1.0;
mat.LoadMatrix(m);
// Set matrix base address and apply matrix #0 (coordinate system matrix)
m_matrixBasePtr = (float *)TranslateCullingAddress(matrixBaseAddr);
MultMatrix(0, mat);
}
// what this does is to set the rotation back to zero, whilst keeping the position and scale of the current matrix
void CNew3D::ResetMatrix(Mat4& mat)
{
float m[16];
memcpy(m, mat.currentMatrix, 16 * 4);
// transpose the top 3x3 of the matrix (this effectively inverts the rotation). When we multiply our new matrix it'll effectively cancel out the rotations.
std::swap(m[1], m[4]);
std::swap(m[2], m[8]);
std::swap(m[6], m[9]);
// set position to zero
m[12] = 0;
m[13] = 0;
m[14] = 0;
m[15] = 1;
// normalise columns, this removes the scaling, otherwise we'll apply it twice
float s1 = std::sqrt((m[0] * m[0]) + (m[1] * m[1]) + (m[2] * m[2]));
float s2 = std::sqrt((m[4] * m[4]) + (m[5] * m[5]) + (m[6] * m[6]));
float s3 = std::sqrt((m[8] * m[8]) + (m[9] * m[9]) + (m[10] * m[10]));
m[0] /= s1; m[4] /= s2; m[8] /= s3;
m[1] /= s1; m[5] /= s2; m[9] /= s3;
m[2] /= s1; m[6] /= s2; m[10] /= s3;
mat.MultMatrix(m);
}
// Draws viewports of the given priority
void CNew3D::RenderViewport(UINT32 addr)
{
static const GLfloat color[8][3] =
{ // RGB1 color translation
{ 0.0f, 0.0f, 0.0f }, // off
{ 0.0f, 0.0f, 1.0f }, // blue
{ 0.0f, 1.0f, 0.0f }, // green
{ 0.0f, 1.0f, 1.0f }, // cyan
{ 1.0f, 0.0f, 0.0f }, // red
{ 1.0f, 0.0f, 1.0f }, // purple
{ 1.0f, 1.0f, 0.0f }, // yellow
{ 1.0f, 1.0f, 1.0f } // white
};
if ((addr & 0x00FFFFFF) == 0) {
return;
}
// Translate address and obtain pointer
const uint32_t *vpnode = TranslateCullingAddress(addr);
if (NULL == vpnode) {
return;
}
if (!(vpnode[0] & 0x20)) { // only if viewport enabled
// create node object
m_nodes.emplace_back(Node());
m_nodes.back().models.reserve(2048); // create space for models
// get pointer to its viewport
Viewport *vp = &m_nodes.back().viewport;
vp->priority = (vpnode[0] >> 3) & 0x3;
vp->select = (vpnode[0] >> 8) & 0x3;
vp->number = (vpnode[0] >> 10);
m_currentPriority = vp->priority;
// Fetch viewport parameters (TO-DO: would rounding make a difference?)
vp->vpX = (int)(((vpnode[0x1A] & 0xFFFF) * (float)(1.0 / 16.0)) + 0.5f); // viewport X (12.4 fixed point)
vp->vpY = (int)(((vpnode[0x1A] >> 16) * (float)(1.0 / 16.0)) + 0.5f); // viewport Y (12.4)
vp->vpWidth = (int)(((vpnode[0x14] & 0xFFFF) * (float)(1.0 / 4.0)) + 0.5f); // width (14.2)
vp->vpHeight = (int)(((vpnode[0x14] >> 16) * (float)(1.0 / 4.0)) + 0.5f); // height (14.2)
uint32_t matrixBase = vpnode[0x16] & 0xFFFFFF; // matrix base address
m_LODBlendTable = (LODBlendTable*)TranslateCullingAddress(vpnode[0x17] & 0xFFFFFF);
/*
vp->angle_left = -atan2f(Util::Uint32AsFloat(vpnode[12]), Util::Uint32AsFloat(vpnode[13])); // These values work out as the normals for the clipping planes.
vp->angle_right = atan2f(Util::Uint32AsFloat(vpnode[16]), -Util::Uint32AsFloat(vpnode[17])); // Sometimes these values (dirt devils,lost world) are totally wrong
vp->angle_top = atan2f(Util::Uint32AsFloat(vpnode[14]), Util::Uint32AsFloat(vpnode[15])); // and don't work for the frustum values exactly.
vp->angle_bottom = -atan2f(Util::Uint32AsFloat(vpnode[18]), -Util::Uint32AsFloat(vpnode[19])); // Perhaps they are just used for culling and not rendering.
*/
float cv = Util::Uint32AsFloat(vpnode[0x8]); // 1/(left-right)
float cw = Util::Uint32AsFloat(vpnode[0x9]); // 1/(top-bottom)
float io = Util::Uint32AsFloat(vpnode[0xa]); // top / bottom (ratio) - ish
float jo = Util::Uint32AsFloat(vpnode[0xb]); // left / right (ratio)
vp->angle_left = (0.0f - jo) / cv;
vp->angle_right = (1.0f - jo) / cv;
vp->angle_bottom = -(1.0f - io)/ cw;
vp->angle_top = -(0.0f - io)/ cw;
// calculate the frustum shape, near/far pair are dummy values
CalcViewport(vp, 1.f, 1000.f);
// calculate frustum planes
CalcFrustumPlanes(m_planes, vp->projectionMatrix); // we need to calc a 'projection matrix' to get the correct frustum planes for clipping
// Lighting (note that sun vector points toward sun -- away from vertex)
vp->lightingParams[0] = Util::Uint32AsFloat(vpnode[0x05]); // sun X
vp->lightingParams[1] = -Util::Uint32AsFloat(vpnode[0x06]); // sun Y (- to convert to ogl cordinate system)
vp->lightingParams[2] = -Util::Uint32AsFloat(vpnode[0x04]); // sun Z (- to convert to ogl cordinate system)
vp->lightingParams[3] = std::max(0.f, std::min(Util::Uint32AsFloat(vpnode[0x07]), 1.0f)); // sun intensity (clamp to 0-1)
vp->lightingParams[4] = (float)((vpnode[0x24] >> 8) & 0xFF) * (float)(1.0 / 255.0); // ambient intensity
vp->lightingParams[5] = 0.0f; // reserved
vp->sunClamp = m_sunClamp;
vp->intensityClamp = (m_step == 0x10); // just step 1.0 ?
vp->hardwareStep = m_step;
// Spotlight
int spotColorIdx = (vpnode[0x20] >> 11) & 7; // spotlight color index
int spotFogColorIdx = (vpnode[0x20] >> 8) & 7; // spotlight on fog color index
vp->spotEllipse[0] = (float)(INT16)(vpnode[0x1E] & 0xFFFF) * (float)(1.0 / 8.0);// spotlight X position (13.3 fixed point)
vp->spotEllipse[1] = (float)(INT16)(vpnode[0x1D] & 0xFFFF) * (float)(1.0 / 8.0);// spotlight Y
vp->spotEllipse[2] = (float)((vpnode[0x1E] >> 16) & 0xFFFF); // spotlight X size (16-bit)
vp->spotEllipse[3] = (float)((vpnode[0x1D] >> 16) & 0xFFFF); // spotlight Y size
vp->spotRange[0] = 1.0f / Util::Uint32AsFloat(vpnode[0x21]); // spotlight start
vp->spotRange[1] = Util::Uint32AsFloat(vpnode[0x1F]); // spotlight extent
vp->spotColor[0] = color[spotColorIdx][0]; // spotlight color
vp->spotColor[1] = color[spotColorIdx][1];
vp->spotColor[2] = color[spotColorIdx][2];
vp->spotFogColor[0] = color[spotFogColorIdx][0]; // spotlight color on fog
vp->spotFogColor[1] = color[spotFogColorIdx][1];
vp->spotFogColor[2] = color[spotFogColorIdx][2];
// spotlight is specified in terms of physical resolution
vp->spotEllipse[1] = 384.0f - vp->spotEllipse[1]; // flip Y position
// Avoid division by zero
vp->spotEllipse[2] = std::max(1.0f, vp->spotEllipse[2]);
vp->spotEllipse[3] = std::max(1.0f, vp->spotEllipse[3]);
vp->spotEllipse[2] = std::roundf(2047.0f / vp->spotEllipse[2]);
vp->spotEllipse[3] = std::roundf(2047.0f / vp->spotEllipse[3]);
// Scale the spotlight to the OpenGL viewport
vp->spotEllipse[0] = vp->spotEllipse[0] * m_xRatio + (float)m_xOffs;
vp->spotEllipse[1] = vp->spotEllipse[1] * m_yRatio + (float)m_yOffs;
vp->spotEllipse[2] *= m_xRatio;
vp->spotEllipse[3] *= m_yRatio;
// Line of sight position
vp->losPosX = (int)(((vpnode[0x1c] & 0xFFFF) / 16.0f) + 0.5f); // x position
vp->losPosY = (int)(((vpnode[0x1c] >> 16) / 16.0f) + 0.5f); // y position 0 starts from the top
// Fog
vp->fogParams[0] = (float)((vpnode[0x22] >> 16) & 0xFF) * (float)(1.0 / 255.0); // fog color R
vp->fogParams[1] = (float)((vpnode[0x22] >> 8) & 0xFF) * (float)(1.0 / 255.0); // fog color G
vp->fogParams[2] = (float)((vpnode[0x22] >> 0) & 0xFF) * (float)(1.0 / 255.0); // fog color B
vp->fogParams[3] = std::abs(Util::Uint32AsFloat(vpnode[0x23])); // fog density - ocean hunter uses negative values, but looks the same
vp->fogParams[4] = (float)(INT16)(vpnode[0x25] & 0xFFFF)* (float)(1.0 / 255.0); // fog start
// Avoid Infinite and NaN values for Star Wars Trilogy
if (std::isinf(vp->fogParams[3]) || std::isnan(vp->fogParams[3])) {
for (int i = 0; i < 7; i++) vp->fogParams[i] = 0.0f;
}
vp->fogParams[5] = (float)((vpnode[0x24] >> 16) & 0xFF) * (float)(1.0 / 255.0); // fog attenuation
vp->fogParams[6] = (float)((vpnode[0x25] >> 16) & 0xFF) * (float)(1.0 / 255.0); // fog ambient
vp->scrollFog = (float)(vpnode[0x20] & 0xFF) * (float)(1.0 / 255.0); // scroll fog
vp->scrollAtt = (float)(vpnode[0x24] & 0xFF) * (float)(1.0 / 255.0); // scroll attenuation
// Clear texture offsets before proceeding
m_nodeAttribs.Reset();
// Set up coordinate system and base matrix
InitMatrixStack(matrixBase, m_modelMat);
// Descend down the node link. Need to start with a culling node because that defines our culling radius.
auto childptr = vpnode[0x02];
if (((childptr >> 24) & 0x5) == 0) {
DescendNodePtr(vpnode[0x02]);
}
}
// render next viewport
if (vpnode[0x01] != 0x01000000) {
RenderViewport(vpnode[0x01]);
}
}
void CNew3D::CopyVertexData(const R3DPoly& r3dPoly, std::vector<FVertex>& vertexArray)
{
// both lemans 24 and dirt devils are rendering some totally transparent polys as the first object in each viewport
// in dirt devils it's parallel to the camera so is completely invisible, but breaks our depth calculation
// in lemans 24 its a sort of diamond shape, but never leaves a hole in the transparent geometry so must be being skipped by the h/w
if (r3dPoly.faceColour[3] == 0) {
return;
}
if (m_numPolyVerts==4) {
if (r3dPoly.number == 4) {
vertexArray.emplace_back(r3dPoly, 0); // construct directly inside container without copy
vertexArray.emplace_back(r3dPoly, 1);
vertexArray.emplace_back(r3dPoly, 2);
vertexArray.emplace_back(r3dPoly, 3);
// check for identical points (ie forced triangle) and replace with average point
// if we don't do this our quad code falls apart
FVertex* v = (&vertexArray.back()) - 3;
for (int i = 0; i < 4; i++) {
int next1 = (i + 1) % 4;
int next2 = (i + 2) % 4;
if (FVertex::Equal(v[i], v[next1])) {
FVertex::Average(v[next1], v[next2], v[next1]);
break;
}
}
}
else {
vertexArray.emplace_back(r3dPoly, 0);
vertexArray.emplace_back(r3dPoly, 1);
vertexArray.emplace_back(r3dPoly, 2);
vertexArray.emplace_back(r3dPoly, 0, 2); // last point is an average of 0 and 2
}
}
else {
vertexArray.emplace_back(r3dPoly, 0);
vertexArray.emplace_back(r3dPoly, 1);
vertexArray.emplace_back(r3dPoly, 2);
if (r3dPoly.number == 4) {
vertexArray.emplace_back(r3dPoly, 0);
vertexArray.emplace_back(r3dPoly, 2);
vertexArray.emplace_back(r3dPoly, 3);
}
}
}
void CNew3D::GetCoordinates(int width, int height, UINT16 uIn, UINT16 vIn, float uvScale, float& uOut, float& vOut)
{
uOut = (uIn * uvScale) / width;
vOut = (vIn * uvScale) / height;
}
int CNew3D::GetTexFormat(int originalFormat, bool contour)
{
if (!contour) {
return originalFormat; // the same
}
switch (originalFormat)
{
case 1:
case 2:
case 3:
case 4:
return originalFormat + 7; // these formats are identical to 1-4, except they lose the 4 bit alpha part when contour is enabled
default:
return originalFormat;
}
}
void CNew3D::SetMeshValues(SortingMesh *currentMesh, PolyHeader &ph)
{
//copy attributes
currentMesh->textured = ph.TexEnabled();
currentMesh->alphaTest = ph.AlphaTest();
currentMesh->textureAlpha = ph.TextureAlpha();
currentMesh->polyAlpha = ph.PolyAlpha();
currentMesh->lighting = ph.LightEnabled();
currentMesh->fixedShading = ph.FixedShading() && !ph.SmoothShading();
currentMesh->highPriority = ph.HighPriority();
currentMesh->transLSelect = ph.TranslucencyPatternSelect();
currentMesh->layered = ph.Layered();
currentMesh->specular = ph.SpecularEnabled();
currentMesh->shininess = ph.Shininess();
currentMesh->specularValue = ph.SpecularValue();
currentMesh->fogIntensity = ph.LightModifier();
currentMesh->translatorMap = ph.TranslatorMap();
if (currentMesh->textured) {
currentMesh->format = GetTexFormat(ph.TexFormat(), ph.AlphaTest());
if (currentMesh->format == 7) {
currentMesh->alphaTest = false; // alpha test is a 1 bit test, this format needs a lower threshold, since it has 16 levels of transparency
}
currentMesh->x = ph.X();
currentMesh->y = ph.Y();
currentMesh->width = ph.TexWidth();
currentMesh->height = ph.TexHeight();
currentMesh->microTexture = ph.MicroTexture();
currentMesh->inverted = ph.TranslatorMapOffset() == 2;
{
bool smoothU = ph.TexSmoothU();
bool smoothV = ph.TexSmoothV();
if (ph.AlphaTest()) {
smoothU = false; // smooth wrap makes no sense for alpha tested polys with pixel dilate
smoothV = false;
}
if (ph.TexUMirror()) {
if (smoothU) currentMesh->wrapModeU = Mesh::TexWrapMode::mirror;
else currentMesh->wrapModeU = Mesh::TexWrapMode::mirrorClamp;
}
else {
if (smoothU) currentMesh->wrapModeU = Mesh::TexWrapMode::repeat;
else currentMesh->wrapModeU = Mesh::TexWrapMode::repeatClamp;
}
if (ph.TexVMirror()) {
if (smoothV) currentMesh->wrapModeV = Mesh::TexWrapMode::mirror;
else currentMesh->wrapModeV = Mesh::TexWrapMode::mirrorClamp;
}
else {
if (smoothV) currentMesh->wrapModeV = Mesh::TexWrapMode::repeat;
else currentMesh->wrapModeV = Mesh::TexWrapMode::repeatClamp;
}
}
if (currentMesh->microTexture) {
static const float microTexScale[] = { 2.f, 4.f, 16.f, 256.f };
currentMesh->microTextureID = ph.MicroTextureID();
currentMesh->microTextureScale = microTexScale[ph.MicroTextureMinLOD()];
}
}
}
void CNew3D::CacheModel(Model *m, const UINT32 *data)
{
if (data == NULL)
return;
UINT16 texCoords[4][2];
PolyHeader ph;
UINT64 lastHash = -1;
SortingMesh* currentMesh = nullptr;
std::unordered_map<UINT64, SortingMesh> sMap;
ph = data;
int numTriangles = ph.NumTrianglesTotal();
// Cache all polygons
do {
R3DPoly p; // current polygon
float uvScale;
if (ph.header[6] == 0) {
break;
}
// create a hash value based on poly attributes -todo add more attributes
auto hash = ph.Hash();
if (hash != lastHash) {
if (sMap.count(hash) == 0) {
currentMesh = &sMap.insert({hash, SortingMesh()}).first->second;
//make space for our vertices
currentMesh->verts.reserve(numTriangles * 3);
//set mesh values
SetMeshValues(currentMesh, ph);
}
else
currentMesh = &sMap[hash];
}
// Obtain basic polygon parameters
p.number = ph.NumVerts();
uvScale = ph.UVScale();
ph.FaceNormal(p.faceNormal);
// Fetch reused vertices according to bitfield, then new verts
int j = 0;
for (int i = 0; i < 4; i++) // up to 4 reused vertices
{
if (ph.SharedVertex(i))
{
p.v[j] = m_prev[i];
texCoords[j][0] = m_prevTexCoords[i][0];
texCoords[j][1] = m_prevTexCoords[i][1];
//check if we need to recalc tex coords - will only happen if tex tiles are different + sharing vertices
if (hash != lastHash) {
if (currentMesh->textured) {
GetCoordinates(currentMesh->width, currentMesh->height, texCoords[j][0], texCoords[j][1], uvScale, p.v[j].texcoords[0], p.v[j].texcoords[1]);
}
}
j++;
}
}
lastHash = hash;
// copy face attributes
if (!ph.PolyColor()) {
int colorIdx = ph.ColorIndex();
p.faceColour[2] = (m_polyRAM[m_colorTableAddr + colorIdx] & 0xFF);
p.faceColour[1] = ((m_polyRAM[m_colorTableAddr + colorIdx] >> 8) & 0xFF);
p.faceColour[0] = ((m_polyRAM[m_colorTableAddr + colorIdx] >> 16) & 0xFF);
}
else {
p.faceColour[0] = ((ph.header[4] >> 24));
p.faceColour[1] = ((ph.header[4] >> 16) & 0xFF);
p.faceColour[2] = ((ph.header[4] >> 8) & 0xFF);
}
p.faceColour[3] = ph.Transparency();
if (ph.Discard1() && !ph.Discard2()) {
p.faceColour[3] /= 2;
}
// if we have flat shading, we can't re-use normals from shared vertices
for (int i = 0; i < p.number && !ph.SmoothShading(); i++) {
p.v[i].normal[0] = p.faceNormal[0];
p.v[i].normal[1] = p.faceNormal[1];
p.v[i].normal[2] = p.faceNormal[2];
}
UINT32* vData = ph.StartOfData(); // vertex data starts here
// remaining vertices are new and defined here
for (; j < p.number; j++)
{
// Fetch vertices
UINT32 ix = vData[0];
UINT32 iy = vData[1];
UINT32 iz = vData[2];
UINT32 it = vData[3];
// Decode vertices
p.v[j].pos[0] = (((INT32)ix) >> 8) * m_vertexFactor;
p.v[j].pos[1] = (((INT32)iy) >> 8) * m_vertexFactor;
p.v[j].pos[2] = (((INT32)iz) >> 8) * m_vertexFactor;
p.v[j].pos[3] = 1.0f;
// Per vertex normals
if (ph.SmoothShading()) {
p.v[j].normal[0] = BYTE_TO_FLOAT((INT8)(ix & 0xFF));
p.v[j].normal[1] = BYTE_TO_FLOAT((INT8)(iy & 0xFF));
p.v[j].normal[2] = BYTE_TO_FLOAT((INT8)(iz & 0xFF));
}
if (ph.FixedShading() && !ph.SmoothShading()) { // fixed shading seems to be disabled if actual normals are set
//==========
float shade;
//==========
if (!m_shadeIsSigned) {
shade = (ix & 0xFF) * (float)(1.0 / 255.0);
}
else {
shade = BYTE_TO_FLOAT((INT8)(ix & 0xFF));
}
p.v[j].fixedShade = shade;
}
float texU = 0;
float texV = 0;
// tex coords
if (currentMesh->textured) {
GetCoordinates(currentMesh->width, currentMesh->height, (UINT16)(it >> 16), (UINT16)(it & 0xFFFF), uvScale, texU, texV);
}
p.v[j].texcoords[0] = texU;
p.v[j].texcoords[1] = texV;
//cache un-normalised tex coordinates
texCoords[j][0] = (UINT16)(it >> 16);
texCoords[j][1] = (UINT16)(it & 0xFFFF);
vData += 4;
}
// check if we need to double up vertices for two sided lighting
if (ph.DoubleSided() && !ph.Discard()) {
R3DPoly tempP = p;
// flip normals
V3::inverse(tempP.faceNormal);
for (int i2 = 0; i2 < tempP.number; i2++) {
V3::inverse(tempP.v[i2].normal);
}
CopyVertexData(tempP, currentMesh->verts);
}
// Copy this polygon into the model buffer
if (!ph.Discard()) {
CopyVertexData(p, currentMesh->verts);
}
// Copy current vertices into previous vertex array
for (int i = 0; i < 4; i++) {
m_prev[i] = p.v[i];
m_prevTexCoords[i][0] = texCoords[i][0];
m_prevTexCoords[i][1] = texCoords[i][1];
}
} while (ph.NextPoly());
//sorted the data, now copy to main data structures
// we know how many meshes we have to reserve appropriate space
m->meshes->reserve(sMap.size());
for (auto& it : sMap) {
if (m->dynamic) {
// calculate VBO values for current mesh
it.second.vboOffset = (int)m_polyBufferRam.size() + MAX_ROM_VERTS;
it.second.vertexCount = (int)it.second.verts.size();
// copy poly data to main buffer
m_polyBufferRam.insert(m_polyBufferRam.end(), it.second.verts.begin(), it.second.verts.end());
}
else {
// calculate VBO values for current mesh
it.second.vboOffset = (int)m_polyBufferRom.size();
it.second.vertexCount = (int)it.second.verts.size();
// copy poly data to main buffer
m_polyBufferRom.insert(m_polyBufferRom.end(), it.second.verts.begin(), it.second.verts.end());
}
//copy the temp mesh into the model structure
//this will lose the associated vertex data, which is now copied to the main buffer anyway
m->meshes->push_back(it.second);
}
}
bool CNew3D::IsDynamicModel(UINT32 *data)
{
if (data == NULL) {
return false;
}
PolyHeader p(data);
do {
if ((p.header[1] & 2) == 0) { // model has rgb colour palette
return true;
}
if (p.header[6] == 0) {
break;
}
} while (p.NextPoly());
return false;
}
bool CNew3D::IsVROMModel(UINT32 modelAddr)
{
return modelAddr >= 0x100000;
}
void CNew3D::CalcTexOffset(int offX, int offY, int page, int x, int y, int& newX, int& newY)
{
newX = (x + offX) & 2047; // wrap around 2048, shouldn't be required
int oldPage = y / 1024;
y -= (oldPage * 1024); // remove page from tex y
// calc newY with wrap around, wraps around in the same sheet, not into another memory sheet
newY = (y + offY) & 1023;
// add page to Y
newY += ((oldPage + page) & 1) * 1024; // max page 0-1
}
void CNew3D::CalcFrustumPlanes(Plane p[5], const float* matrix)
{
// Left Plane
p[0].a = matrix[3] + matrix[0];
p[0].b = matrix[7] + matrix[4];
p[0].c = matrix[11] + matrix[8];
p[0].d = matrix[15] + matrix[12];
p[0].Normalise();
// Right Plane
p[1].a = matrix[3] - matrix[0];
p[1].b = matrix[7] - matrix[4];
p[1].c = matrix[11] - matrix[8];
p[1].d = matrix[15] - matrix[12];
p[1].Normalise();
// Bottom Plane
p[2].a = matrix[3] + matrix[1];
p[2].b = matrix[7] + matrix[5];
p[2].c = matrix[11] + matrix[9];
p[2].d = matrix[15] + matrix[13];
p[2].Normalise();
// Top Plane
p[3].a = matrix[3] - matrix[1];
p[3].b = matrix[7] - matrix[5];
p[3].c = matrix[11] - matrix[9];
p[3].d = matrix[15] - matrix[13];
p[3].Normalise();
// Front Plane
p[4].a = 0.f;
p[4].b = 0.f;
p[4].c = -1.f;
p[4].d = 0.f;
}
void CNew3D::CalcBox(float distance, BBox& box)
{
//bottom left front
box.points[0][0] = -distance;
box.points[0][1] = -distance;
box.points[0][2] = distance;
box.points[0][3] = 1.f;
//bottom left back
box.points[1][0] = -distance;
box.points[1][1] = -distance;
box.points[1][2] = -distance;
box.points[1][3] = 1.f;
//bottom right back
box.points[2][0] = distance;
box.points[2][1] = -distance;
box.points[2][2] = -distance;
box.points[2][3] = 1.f;
//bottom right front
box.points[3][0] = distance;
box.points[3][1] = -distance;
box.points[3][2] = distance;
box.points[3][3] = 1.f;
//top left front
box.points[4][0] = -distance;
box.points[4][1] = distance;
box.points[4][2] = distance;
box.points[4][3] = 1.f;
//top left back
box.points[5][0] = -distance;
box.points[5][1] = distance;
box.points[5][2] = -distance;
box.points[5][3] = 1.f;
//top right back
box.points[6][0] = distance;
box.points[6][1] = distance;
box.points[6][2] = -distance;
box.points[6][3] = 1.f;
//top right front
box.points[7][0] = distance;
box.points[7][1] = distance;
box.points[7][2] = distance;
box.points[7][3] = 1.f;
}
void CNew3D::MultVec(const float matrix[16], const float in[4], float out[4])
{
for (int i = 0; i < 4; i++) {
out[i] =
in[0] * matrix[0 * 4 + i] +
in[1] * matrix[1 * 4 + i] +
in[2] * matrix[2 * 4 + i] +
in[3] * matrix[3 * 4 + i];
}
}
void CNew3D::TransformBox(const float *m, BBox& box)
{
for (int i = 0; i < 8; i++) {
float v[4];
MultVec(m, box.points[i], v);
box.points[i][0] = v[0];
box.points[i][1] = v[1];
box.points[i][2] = v[2];
}
}
Clip CNew3D::ClipBox(const BBox& box, Plane planes[5])
{
int count = 0;
for (int i = 0; i < 8; i++) {
int temp = 0;
for (int j = 0; j < 5; j++) {
if (planes[j].DistanceToPoint(box.points[i]) >= 0.f) {
temp++;
}
}
if (temp == 5) count++; // point is inside all 4 frustum planes
}
if (count == 8) return Clip::INSIDE;
if (count > 0) return Clip::INTERCEPT;
//if we got here all points are outside of the view frustum
//check for all points being side same of any plane, means box outside of view
for (int i = 0; i < 5; i++) {
int temp = 0;
for (int j = 0; j < 8; j++) {
if (planes[i].DistanceToPoint(box.points[j]) >= 0.f) {
temp++;
}
}
if (temp == 0) {
return Clip::OUTSIDE;
}
}
//if we got here, box is traversing view frustum
return Clip::INTERCEPT;
}
void CNew3D::CalcBoxExtents(const BBox& box)
{
for (int i = 0; i < 8; i++) {
if (box.points[i][2] < 0.f) {
m_nfPairs[m_currentPriority].zNear = std::max(box.points[i][2], m_nfPairs[m_currentPriority].zNear);
m_nfPairs[m_currentPriority].zFar = std::min(box.points[i][2], m_nfPairs[m_currentPriority].zFar);
}
}
}
void CNew3D::ClipPolygon(ClipPoly& clipPoly, Plane planes[5])
{
//============
ClipPoly temp;
ClipPoly *in;
ClipPoly *out;
//============
in = &clipPoly;
out = &temp;
for (int i = 0; i < 4; i++) {
//=================
bool currentIn;
float currentDot;
//=================
currentDot = planes[i].DotProduct(in->list[0].pos);
currentIn = (currentDot + planes[i].d) >= 0.f;
out->count = 0;
for (int j = 0; j < in->count; j++) {
if (currentIn) {
out->list[out->count] = in->list[j];
out->count++;
}
int nextIndex = j + 1;
if (nextIndex >= in->count) {
nextIndex = 0;
}
float nextDot = planes[i].DotProduct(in->list[nextIndex].pos);
bool nextIn = (nextDot + planes[i].d) >= 0.f;
// we have an intersection
if (currentIn != nextIn) {
float u = (currentDot + planes[i].d) / (currentDot - nextDot);
const float* p1 = in->list[j].pos;
const float* p2 = in->list[nextIndex].pos;
out->list[out->count].pos[0] = p1[0] + ((p2[0] - p1[0]) * u);
out->list[out->count].pos[1] = p1[1] + ((p2[1] - p1[1]) * u);
out->list[out->count].pos[2] = p1[2] + ((p2[2] - p1[2]) * u);
out->count++;
}
currentDot = nextDot;
currentIn = nextIn;
}
std::swap(in, out);
}
}
void CNew3D::ClipModel(const Model *m)
{
//===============================
ClipPoly clipPoly;
std::vector<FVertex>* vertices;
int offset;
//===============================
if (m->dynamic) {
vertices = &m_polyBufferRam;
offset = MAX_ROM_VERTS;
}
else {
vertices = &m_polyBufferRom;
offset = 0;
}
for (const auto &mesh : *m->meshes) {
int start = mesh.vboOffset - offset;
for (int i = 0; i < mesh.vertexCount; i += m_numPolyVerts) { // inc to next poly
for (int j = 0; j < m_numPolyVerts; j++) {
MultVec(m->modelMat, (*vertices)[start + i + j].pos, clipPoly.list[j].pos); // copy all 3 of 4 our transformed vertices into our clip poly struct
}
clipPoly.count = m_numPolyVerts;
ClipPolygon(clipPoly, m_planes);
for (int j = 0; j < clipPoly.count; j++) {
if (clipPoly.list[j].pos[2] < 0.f) {
m_nfPairs[m_currentPriority].zNear = std::max(clipPoly.list[j].pos[2], m_nfPairs[m_currentPriority].zNear);
m_nfPairs[m_currentPriority].zFar = std::min(clipPoly.list[j].pos[2], m_nfPairs[m_currentPriority].zFar);
}
}
}
}
}
void CNew3D::CalcViewport(Viewport* vp, float near, float far)
{
if (far > 1e30f) {
far = near * 1000000.f; // fix for ocean hunter which passes some FLT_MAX for a few matrices. HW must have some safe guard for these
}
if (near < far / 1000000.f) {
near = far / 1000000.f; // if we get really close to zero somehow, we will have almost no depth precision
}
float l = near * vp->angle_left; // we need to calc the shape of the projection frustum for culling
float r = near * vp->angle_right;
float t = near * vp->angle_top;
float b = near * vp->angle_bottom;
vp->projectionMatrix.LoadIdentity(); // reset matrix
if ((vp->vpX == 0) && (vp->vpWidth >= 495) && (vp->vpY == 0) && (vp->vpHeight >= 383)) {
/*
* Compute aspect ratio correction factor. "Window" refers to the full GL
* viewport (i.e., totalXRes x totalYRes). "Viewable area" is the effective
* Model 3 screen (xRes x yRes). In non-wide-screen, non-stretch mode, this
* is intended to replicate the 496x384 display and may in general be
* smaller than the window. The rest of the window appears to have a
* border, which is created by a scissor box.
*
* In wide-screen mode, we want to expand the frustum horizontally to fill
* the window. We want the aspect ratio to be correct. To accomplish this,
* the viewable area is set *the same* as in non-wide-screen mode (e.g.,
* often smaller than the window) but glScissor() is set by the OSD layer's
* screen setup code to reveal the entire window.
*
* In stretch mode, the window and viewable area are both set the same,
* which means there will be no aspect ratio correction and the display
* will stretch to fill the entire window while keeping the view frustum
* the same as a 496x384 Model 3 display. The display will be distorted.
*/
float windowAR = (float)m_totalXRes / (float)m_totalYRes;
float viewableAreaAR = (float)m_xRes / (float)m_yRes;
// Will expand horizontal frustum planes only in non-stretch mode (wide-
// screen and non-wide-screen modes have identical resolution parameters
// and only their scissor box differs)
float correction = windowAR / viewableAreaAR;
vp->x = 0;
vp->y = m_yOffs + (int)((float)(384 - (vp->vpY + vp->vpHeight))*m_yRatio);
vp->width = m_totalXRes;
vp->height = (int)((float)vp->vpHeight*m_yRatio);
vp->projectionMatrix.Frustum(l*correction, r*correction, b, t, near, far);
}
else {
vp->x = m_xOffs + (int)((float)vp->vpX*m_xRatio);
vp->y = m_yOffs + (int)((float)(384 - (vp->vpY + vp->vpHeight))*m_yRatio);
vp->width = (int)((float)vp->vpWidth*m_xRatio);
vp->height = (int)((float)vp->vpHeight*m_yRatio);
vp->projectionMatrix.Frustum(l, r, b, t, near, far);
}
}
void CNew3D::SetSunClamp(bool enable)
{
m_sunClamp = enable;
}
void CNew3D::SetSignedShade(bool enable)
{
m_shadeIsSigned = enable;
}
float CNew3D::GetLosValue(int layer)
{
// we always write to the 'back' buffer, and the software reads from the front
// then they get swapped
std::lock_guard<std::mutex> guard(m_losMutex);
return m_losFront->value[layer];
}
void CNew3D::TranslateLosPosition(int inX, int inY, int& outX, int& outY)
{
// remap real3d 496x384 to our new viewport
inY = 384 - inY;
outX = m_xOffs + int(inX * m_xRatio);
outY = m_yOffs + int(inY * m_yRatio);
}
bool CNew3D::ProcessLos(int priority)
{
for (const auto &n : m_nodes) {
if (n.viewport.priority == priority) {
if (n.viewport.losPosX || n.viewport.losPosY) {
int losX, losY;
TranslateLosPosition(n.viewport.losPosX, n.viewport.losPosY, losX, losY);
float depth;
glReadPixels(losX, losY, 1, 1, GL_DEPTH_COMPONENT, GL_FLOAT, &depth);
if (depth < 0.99f || depth == 1.0f) { // kinda guess work but when depth = 1, haven't drawn anything, when 0.99~ drawing sky somewhere far
return false;
}
depth = 2.0f * depth - 1.0f;
float zNear = m_nfPairs[priority].zNear;
float zFar = m_nfPairs[priority].zFar;
float zVal = 2.0f * zNear * zFar / (zFar + zNear - depth * (zFar - zNear));
m_losBack->value[priority] = zVal;
return true;
}
}
}
return false;
}
} // New3D