diff --git a/Makefiles/Makefile.SDL.Win32.GCC b/Makefiles/Makefile.SDL.Win32.GCC index becc7cb..c9bcf80 100644 --- a/Makefiles/Makefile.SDL.Win32.GCC +++ b/Makefiles/Makefile.SDL.Win32.GCC @@ -38,7 +38,7 @@ BITS = 64 # # Include console-based debugger in emulator ('yes' or 'no') # -ENABLE_DEBUGGER = no +ENABLE_DEBUGGER = yes ############################################################################### @@ -47,6 +47,11 @@ ENABLE_DEBUGGER = no # Edit these paths as necessary. ############################################################################### +# +# Boost 1.55 +# +BOOST_INCLUDEPATH = /mingw64/boost_1_55_0 + # # SDL # @@ -90,11 +95,11 @@ OUTFILE = $(BIN_DIR)\Supermodel.exe CC = gcc CXX = g++ LD = g++ -COMPILER_FLAGS = -I$(SDL_INCLUDEPATH) -ISrc/ -ISrc/OSD/ -ISrc/OSD/SDL/ -ISrc/OSD/Windows/ -c -Wall -O3 -DSUPERMODEL_WIN32 -DGLEW_STATIC +COMPILER_FLAGS = -I$(SDL_INCLUDEPATH) -ISrc/ -ISrc/OSD/ -ISrc/OSD/SDL/ -ISrc/OSD/Windows/ -c -Wall -DSUPERMODEL_WIN32 -DGLEW_STATIC -O3 CFLAGS = $(COMPILER_FLAGS) -CPPFLAGS = $(COMPILER_FLAGS) +CPPFLAGS = $(COMPILER_FLAGS) -I$(BOOST_INCLUDEPATH) -std=c++11 #LFLAGS = -s -o $(OUTFILE) $(OBJ) -L$(SDL_LIBPATH) -lmingw32 -lSDLmain -lSDL -lopengl32 -lglu32 -ldinput8 -ldxguid -lole32 -loleaut32 -lz -l:$(WINSDK_LIBPATH)/WbemUuid.lib -LFLAGS = -s -o $(OUTFILE) $(OBJ) -L$(SDL_LIBPATH) -lmingw32 -lSDLmain -lSDL -lopengl32 -lglu32 -ldinput8 -ldxguid -lole32 -loleaut32 -lz -l:$(WINSDK_LIBPATH)/WbemUuid.lib +LFLAGS = -o $(OUTFILE) $(OBJ) -L$(SDL_LIBPATH) -lmingw32 -lSDLmain -lSDL -lopengl32 -lglu32 -ldinput8 -ldxguid -lole32 -loleaut32 -lz -l:$(WINSDK_LIBPATH)/WbemUuid.lib -s # # Build options... @@ -115,7 +120,9 @@ endif # OBJ = $(OBJ_DIR)/PPCDisasm.o $(OBJ_DIR)/Games.o $(OBJ_DIR)/Config.o $(OBJ_DIR)/INIFile.o $(OBJ_DIR)/BlockFile.o $(OBJ_DIR)/93C46.o \ $(OBJ_DIR)/ROMLoad.o $(OBJ_DIR)/unzip.o $(OBJ_DIR)/ioapi.o $(OBJ_DIR)/Error.o $(OBJ_DIR)/glew.o $(OBJ_DIR)/Shader.o \ - $(OBJ_DIR)/Real3D.o $(OBJ_DIR)/Render3D.o $(OBJ_DIR)/Models.o $(OBJ_DIR)/TextureRefs.o $(OBJ_DIR)/Render2D.o $(OBJ_DIR)/TileGen.o \ + $(OBJ_DIR)/Real3D.o $(OBJ_DIR)/Legacy3D.o $(OBJ_DIR)/Models.o $(OBJ_DIR)/TextureRefs.o \ + $(OBJ_DIR)/New3D.o $(OBJ_DIR)/Mat4.o $(OBJ_DIR)/Model.o $(OBJ_DIR)/PolyHeader.o $(OBJ_DIR)/Texture.o $(OBJ_DIR)/TextureSheet.o $(OBJ_DIR)/VBO.o $(OBJ_DIR)/Vec.o $(OBJ_DIR)/R3DShader.o \ + $(OBJ_DIR)/Render2D.o $(OBJ_DIR)/TileGen.o \ $(OBJ_DIR)/Model3.o $(OBJ_DIR)/ppc.o $(OBJ_DIR)/Main.o $(OBJ_DIR)/Audio.o $(OBJ_DIR)/Thread.o $(OBJ_DIR)/SoundBoard.o \ $(OBJ_DIR)/SCSP.o $(OBJ_DIR)/SCSPDSP.o $(OBJ_DIR)/68K.o $(OBJ_DIR)/m68kcpu.o $(OBJ_DIR)/m68kopnz.o $(OBJ_DIR)/m68kopdm.o \ $(OBJ_DIR)/m68kopac.o $(OBJ_DIR)/m68kops.o $(OBJ_DIR)/DSB.o $(OBJ_DIR)/Z80.o \ @@ -143,6 +150,10 @@ endif all: $(BIN_DIR) $(OBJ_DIR) $(OBJ) $(LD) $(LFLAGS) +ppcd: $(BIN_DIR) $(OBJ_DIR) + $(CXX) Src/CPU/PowerPC/PPCDisasm.cpp $(CPPFLAGS) -DSTANDALONE -o $(OBJ_DIR)/ppcd.o + $(LD) -o $(BIN_DIR)/ppcd.exe -mconsole $(OBJ_DIR)/ppcd.o + $(BIN_DIR): mkdir $(BIN_DIR) @@ -195,7 +206,13 @@ $(OBJ_DIR)/%.o: Src/%.cpp $(OBJ_DIR)/%.o: Src/Model3/%.cpp $(CXX) $< $(CPPFLAGS) -o $(OBJ_DIR)/$(*F).o -$(OBJ_DIR)/%.o: Src/Graphics/%.cpp Src/Graphics/Shaders2D.h Src/Graphics/Shaders3D.h +$(OBJ_DIR)/%.o: Src/Graphics/%.cpp Src/Graphics/Shaders2D.h + $(CXX) $< $(CPPFLAGS) -o $(OBJ_DIR)/$(*F).o + +$(OBJ_DIR)/%.o: Src/Graphics/Legacy3D/%.cpp Src/Graphics/Legacy3D/Shaders3D.h + $(CXX) $< $(CPPFLAGS) -o $(OBJ_DIR)/$(*F).o + +$(OBJ_DIR)/%.o: Src/Graphics/New3D/%.cpp Src/Graphics/New3D/%.h $(CXX) $< $(CPPFLAGS) -o $(OBJ_DIR)/$(*F).o $(OBJ_DIR)/%.o: Src/Sound/%.cpp diff --git a/Src/Graphics/IRender3D.h b/Src/Graphics/IRender3D.h new file mode 100644 index 0000000..40dbac0 --- /dev/null +++ b/Src/Graphics/IRender3D.h @@ -0,0 +1,27 @@ +#ifndef INCLUDED_IRENDER3D_H +#define INCLUDED_IRENDER3D_H + +#include + +/* + * IRender3D: + * + * Interface (abstract base class) for Real3D rendering engine. + */ +class IRender3D +{ +public: + virtual void RenderFrame(void) = 0; + virtual void BeginFrame(void) = 0; + virtual void EndFrame(void) = 0; + virtual void UploadTextures(unsigned x, unsigned y, unsigned width, unsigned height) = 0; + virtual void AttachMemory(const uint32_t *cullingRAMLoPtr, const uint32_t *cullingRAMHiPtr, const uint32_t *polyRAMPtr, const uint32_t *vromPtr, const uint16_t *textureRAMPtr) = 0; + virtual void SetStep(int stepID) = 0; + virtual bool Init(unsigned xOffset, unsigned yOffset, unsigned xRes, unsigned yRes, unsigned totalXRes, unsigned totalYRes) = 0; + + virtual ~IRender3D() + { + } +}; + +#endif // INCLUDED_IRENDER3D_H diff --git a/Src/Graphics/Legacy3D/Error.cpp b/Src/Graphics/Legacy3D/Error.cpp new file mode 100644 index 0000000..2e5e04e --- /dev/null +++ b/Src/Graphics/Legacy3D/Error.cpp @@ -0,0 +1,67 @@ +/** + ** Supermodel + ** A Sega Model 3 Arcade Emulator. + ** Copyright 2011 Bart Trzynadlowski, Nik Henson + ** + ** This file is part of Supermodel. + ** + ** Supermodel is free software: you can redistribute it and/or modify it under + ** the terms of the GNU General Public License as published by the Free + ** Software Foundation, either version 3 of the License, or (at your option) + ** any later version. + ** + ** Supermodel is distributed in the hope that it will be useful, but WITHOUT + ** ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + ** FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + ** more details. + ** + ** You should have received a copy of the GNU General Public License along + ** with Supermodel. If not, see . + **/ + +/* + * Error.cpp + * + * Error reporting for 3D renderer. There are some situations in which it is + * appropriate to inform the user of an error during rendering. However, these + * will frequently lead to an avalanche of error messages. Therefore, error + * messages are managed by this interface, ensuring that they are displayed not + * more than once per frame. + * + * Error functions can always be counted on to return FAIL, like ErrorLog(). + */ + +#include "Supermodel.h" + +namespace Legacy3D { + +// Error bit flags (must not conflict) +#define ERROR_LOCAL_VERTEX_OVERFLOW 0x1 +#define ERROR_UNABLE_TO_CACHE_MODEL 0x2 + + +// Overflow in the local vertex buffer, which holds one model +bool CLegacy3D::ErrorLocalVertexOverflow(void) +{ + if ((errorMsgFlags&ERROR_LOCAL_VERTEX_OVERFLOW)) + return FAIL; + errorMsgFlags |= ERROR_LOCAL_VERTEX_OVERFLOW; + return ErrorLog("Overflow in local vertex buffer!"); +} + +// Model could not be cached, even after dumping display list and re-caching +bool CLegacy3D::ErrorUnableToCacheModel(UINT32 modelAddr) +{ + if ((errorMsgFlags&ERROR_UNABLE_TO_CACHE_MODEL)) + return FAIL; + errorMsgFlags |= ERROR_UNABLE_TO_CACHE_MODEL; + return ErrorLog("Encountered a model that was too large to cache (at %08X)!", modelAddr); +} + +// Call this every frame to clear the error flag, allowing errors to be printed +void CLegacy3D::ClearErrors(void) +{ + errorMsgFlags = 0; +} + +} // Legacy3D diff --git a/Src/Graphics/Legacy3D/Legacy3D.cpp b/Src/Graphics/Legacy3D/Legacy3D.cpp new file mode 100644 index 0000000..ea0b726 --- /dev/null +++ b/Src/Graphics/Legacy3D/Legacy3D.cpp @@ -0,0 +1,1407 @@ +/** + ** Supermodel + ** A Sega Model 3 Arcade Emulator. + ** Copyright 2011-2016 Bart Trzynadlowski, Nik Henson + ** + ** This file is part of Supermodel. + ** + ** Supermodel is free software: you can redistribute it and/or modify it under + ** the terms of the GNU General Public License as published by the Free + ** Software Foundation, either version 3 of the License, or (at your option) + ** any later version. + ** + ** Supermodel is distributed in the hope that it will be useful, but WITHOUT + ** ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + ** FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + ** more details. + ** + ** You should have received a copy of the GNU General Public License along + ** with Supermodel. If not, see . + **/ + +/* + * Legacy3D.cpp + * + * Core module for OpenGL-based Real3D graphics engine. + * + * + * Optimization To-Do List + * ----------------------- + * + * 0. Optimize backface culling. Is it possible to compute normal matrix only + * when needed? Should also be more careful about OpenGL state info, such as + * the winding mode. + * 1. Do not store matrices in a uniform, use glLoadMatrix() in MODELVIEW mode. + * It will no longer be necessary to compute normal matrix! + * 2. Move stuff into vertex shader (vision by 2048? Subtract of 0.5,0.5 for bilinear filtering?) + * 3. Just one call to BufferSubData rather than 2 + * + * Spotlight + * --------- + * + * Spotlight illumination occurs between two Z ranges within an ellipse + * specified in coordinates that ought to be relative to the viewport. They + * actually appear to be defined in terms of physical display coordinates + * regardless of the size of the viewport, although this has not been 100% + * confirmed. + * + * The parameters that describe the ellipse in display coordinates are: + * + * cx,cy Center point. + * a,b Width (or rather, half-width) and height of spotlight. + * + * These correspond to the standard form of the ellipse equation: + * + * ((x-cx)/a)^2 + ((y-cy)/b)^2 = 1 + * + * It is trivial to test whether a point lies inside an ellipse by plugging + * it into the equation and checking to see if it is less than or equal to + * 1. The a and b parameters appear to be stored as values w and h, which + * range from 0 to 255 (according to the Scud Race debug menu) but which + * may be up to 16 bits (this has not been observed). They are already + * inverted, scaled by the screen size, and squared. + * + * w = (496/a)^2 -> a = 496/sqrt(w) + * h = (384/b)^2 -> b = 384/sqrt(h) + * + * This is mostly a guess. It is almost certain, however, based on + * observations of the Scud Race backfire effect that w and h are related + * to spotlight size in an inverse-square-root fashion. The spotlight in + * view 3 should be smaller than in view 4, but the values are actually + * larger. Here is some data: + * + * View 3: + * X,Y=247,342 + * W,H=24,16 + * N,F=1e-9,200 + * Car translation length: 4.93 + * View 4: + * X,Y=247,317 + * W,H=48,32 + * N,F=1e-9,200 + * Car translation length: 7.5 + * + * The translation length is the total translation vector for the car model + * extracted by applying the scene matrices. Note that sqrt(48/24) = 1.4 + * and 7.5/4.93 = 1.52, a fairly close match. + * + * It remains unknown whether the spotlight parameters are relative to the + * physical display resolution (496x384), as computed here, or the viewport + * size. What is needed is an example of a spotlight in a viewport whose + * dimensions are not 496x384. + * + * The spotlight near and far ranges are in viewspace (eye) coordinates. + * The inverse of the near range is specified and the far range is stored + * as a displacement (I think) from the near range. Color is RGB111. + * + * The spotlight should be smooth at the edges. Using the magnitude of the + * ellipse test works well -- when it is 1.0, the spotlight should be fully + * attenuated (0 intensity) and when it is 0.0, render at full intensity. + * + * Alpha Processing + * ---------------- + * When processing "alpha" (translucent) polygons, alpha values range from 0.0, + * completely transparent, to 1.0, completely opaque. This appears to be the + * same convention as for Model 3 and corresponds to a blend mode setting of: + * glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA). + * + * For all texels and colors which do not include an alpha channel, for + * translucency to work properly, the alpha channel must be set to opaque. + * Contour textures use T=1 to indicate transparency, therefore their alpha + * value must be inverted. + * + * Translucent Polygons + * -------------------- + * The 32-level polygon translucency appears to be applied as follows + * + * 1. If polygon is untextured, fragment color is the polygon color and + * the translucency level becomes the alpha channel. + * 2. If contour textures are used, the translucency level becomes the + * alpha channel regardless of the contour bit. I assume that contour + * bit processing is still carried out, if enabled, however. + * 3. If the texture format is RGBA4, translucency is multiplied by texel + * alpha. + * 4. Other texture formats: ??? + * + * A simple way to handle this is to force alpha to 1.0 for polygon colors, + * discard fragments if required by the contour setting (forcing alpha to 1.0 + * otherwise), and then in the end, multiplying whatever alpha value remains by + * the translucency level. + * + * List of Safeguards + * ------------------ + * During boot-up, many games load up scene data that cannot feasibly be + * processed (way too many models). This occurs in Scud Race and Virtual On 2, + * for example. This is currently being handled by attempting to detect the + * defective scenes. + * + * 1. Scud Race: the coordinate system matrix is checked for vectors whose + * magnitudes are not 1.0. + * 2. Virtual On 2: model 0x200000 is not rendered. + * + * There are probably better ways of doing it. + * + * To-Do List + * ---------- + * - Can some of the floating point flag attribs be replaced with ints? + */ + +#include +#include "Supermodel.h" +#include "Graphics/Legacy3D/Shaders3D.h" // fragment and vertex shaders + +namespace Legacy3D { + +// Microsoft doesn't provide isnan() and isinf() +#ifdef _MSC_VER + #include + #define ISNAN(x) (_isnan(x)) + #define ISINF(x) (!_finite(x)) +#else + #define ISNAN(x) (std::isnan(x)) + #define ISINF(x) (std::isinf(x)) +#endif + +/****************************************************************************** + Definitions and Constants +******************************************************************************/ + +// Shader program files +#define VERTEX_SHADER_FILE "Src/Graphics/Vertex.glsl" +#define FRAGMENT_SHADER_FILE "Src/Graphics/Fragment.glsl" + +// Model cache settings +#define NUM_STATIC_VERTS 700000 // suggested maximum number of static vertices +#define NUM_DYNAMIC_VERTS 64000 // "" dynamic vertices +#define NUM_LOCAL_VERTS 32768 // size of local vertex buffer +#define NUM_STATIC_MODELS 10000 // maximum number of unique static models to cache +#define NUM_DYNAMIC_MODELS 1024 // maximum number of unique dynamic models to cache +#define NUM_DISPLAY_LIST_ITEMS 10000 // maximum number of model instances displayed per frame + +// Scene traversal stack +#define STACK_SIZE 1024 + + +/****************************************************************************** + Texture Management +******************************************************************************/ + +// Default mapping from Model3 texture format to texture sheet. +// Currently this is just a simple 1-to-1 mapping but if/when more formats get added, sheets will start to get reused. +int CLegacy3D::defaultFmtToTexSheetNum[] = { + 0, // Fmt 0 -> 0 + 1, // 1 -> 1 + 2, // 2 -> 2 + 3, // 3 -> 3 + 4, // 4 -> 4 + 5, // 5 -> 5 + 6, // 6 -> 6 + 7 // 7 -> 7 + }; + +void CLegacy3D::DecodeTexture(int format, int x, int y, int width, int height) +{ + int xi, yi, i; + UINT16 texel; + GLfloat c, a; + + x &= 2047; + y &= 2047; + + if ((x+width)>2048 || (y+height)>2048) + return; + if (width > 512 || height > 512) + { + //ErrorLog("Encountered a texture that is too large (%d,%d,%d,%d)", x, y, width, height); + return; + } + + // Map Model3 format to texture sheet + TexSheet *texSheet = fmtToTexSheet[format]; + + // Check to see if ALL texture tiles have been properly decoded on texture sheet + if ((texSheet->texFormat[y/32][x/32] == format) && (texSheet->texWidth[y/32][x/32] >= width) && (texSheet->texHeight[y/32][x/32] >= height)) + return; + + //printf("Decoding texture format %u: %u x %u @ (%u, %u) sheet %u\n", format, width, height, x, y, texNum); + + // Copy and decode + i = 0; + switch (format) + { + default: // Unknown + + for (yi = y; yi < (y+height); yi++) + { + for (xi = x; xi < (x+width); xi++) + { + textureBuffer[i++] = 0.0; // R + textureBuffer[i++] = 0.0; // G + textureBuffer[i++] = 1.0f; // B + textureBuffer[i++] = 1.0f; // A + } + } + break; + + case 0: // T1RGB5 + for (yi = y; yi < (y+height); yi++) + { + for (xi = x; xi < (x+width); xi++) + { + textureBuffer[i++] = (GLfloat) ((textureRAM[yi*2048+xi]>>10)&0x1F) * (1.0f/31.0f); // R + textureBuffer[i++] = (GLfloat) ((textureRAM[yi*2048+xi]>>5)&0x1F) * (1.0f/31.0f); // G + textureBuffer[i++] = (GLfloat) ((textureRAM[yi*2048+xi]>>0)&0x1F) * (1.0f/31.0f); // B + textureBuffer[i++] = ((textureRAM[yi*2048+xi]&0x8000)?0.0f:1.0f); // T + } + } + break; + + case 7: // RGBA4 + for (yi = y; yi < (y+height); yi++) + { + for (xi = x; xi < (x+width); xi++) + { + textureBuffer[i++] = (GLfloat) ((textureRAM[yi*2048+xi]>>12)&0xF) * (1.0f/15.0f); // R + textureBuffer[i++] = (GLfloat) ((textureRAM[yi*2048+xi]>>8)&0xF) * (1.0f/15.0f); // G + textureBuffer[i++] = (GLfloat) ((textureRAM[yi*2048+xi]>>4)&0xF) * (1.0f/15.0f); // B + textureBuffer[i++] = (GLfloat) ((textureRAM[yi*2048+xi]>>0)&0xF) * (1.0f/15.0f); // A + } + } + break; + + case 5: // 8-bit grayscale + for (yi = y; yi < (y+height); yi++) + { + for (xi = x; xi < (x+width); xi++) + { + /* + texel = textureRAM[yi*2048+xi]; + c = (GLfloat) (texel&0xFF) * (1.0f/255.0f); + textureBuffer[i++] = c; + textureBuffer[i++] = c; + textureBuffer[i++] = c; + textureBuffer[i++] = 1.0; + */ + // Interpret as 8-bit grayscale + texel = textureRAM[yi*2048+xi]; + c = (GLfloat) texel * (1.0f/255.0f); + textureBuffer[i++] = c; + textureBuffer[i++] = c; + textureBuffer[i++] = c; + textureBuffer[i++] = 1.0f; + } + } + + break; + + case 4: // 8-bit, L4A4 + + for (yi = y; yi < (y+height); yi++) + { + for (xi = x; xi < (x+width); xi++) + { + texel = textureRAM[yi*2048+xi]; + //c = (GLfloat) (~texel&0x0F) * (1.0f/15.0f); + //a = (GLfloat) ((texel>>4)&0xF) * (1.0f/15.0f); + c = (GLfloat) ((texel>>4)&0xF) * (1.0f/15.0f); // seems to work better in Lost World (raptor shadows) + a = (GLfloat) (texel&0xF) * (1.0f/15.0f); + textureBuffer[i++] = c; + textureBuffer[i++] = c; + textureBuffer[i++] = c; + textureBuffer[i++] = a; + } + } + + break; + + case 6: // 8-bit grayscale? (How does this differ from format 5? Alpha values?) + for (yi = y; yi < (y+height); yi++) + { + for (xi = x; xi < (x+width); xi++) + { + /* + texel = textureRAM[yi*2048+xi]; + c = (GLfloat) ((texel>>4)&0xF) * (1.0f/15.0f); + a = (GLfloat) (texel&0xF) * (1.0f/15.0f); + textureBuffer[i++] = c; + textureBuffer[i++] = c; + textureBuffer[i++] = c; + textureBuffer[i++] = a; + */ + texel = textureRAM[yi*2048+xi]&0xFF; + c = (GLfloat) texel * (1.0f/255.0f); + textureBuffer[i++] = c; + textureBuffer[i++] = c; + textureBuffer[i++] = c; + textureBuffer[i++] = 1.0f; + + } + } + break; + + case 2: // Unknown (all 16 bits appear present in Daytona 2, but only lower 8 bits in Le Mans 24) + for (yi = y; yi < (y+height); yi++) + { + for (xi = x; xi < (x+width); xi++) + { + texel = textureRAM[yi*2048+xi]; + a = (GLfloat) ((texel>>4)&0xF) * (1.0f/15.0f); + c = (GLfloat) (texel&0xF) * (1.0f/15.0f); + textureBuffer[i++] = c; + textureBuffer[i++] = c; + textureBuffer[i++] = c; + textureBuffer[i++] = a; + + //printf("%04X\n", textureRAM[yi*2048+xi]); + /* + texel = textureRAM[yi*2048+xi]&0xFF; + c = (GLfloat) texel * (1.0f/255.0f); + textureBuffer[i++] = c; + textureBuffer[i++] = c; + textureBuffer[i++] = c; + textureBuffer[i++] = 1.0f; + */ + } + } + break; + + case 3: // Interleaved A4L4 (high byte) + for (yi = y; yi < (y+height); yi++) + { + for (xi = x; xi < (x+width); xi++) + { + texel = textureRAM[yi*2048+xi]>>8; + c = (GLfloat) (texel&0xF) * (1.0f/15.0f); + a = (GLfloat) (texel>>4) * (1.0f/15.0f); + textureBuffer[i++] = c; + textureBuffer[i++] = c; + textureBuffer[i++] = c; + textureBuffer[i++] = a; + } + } + break; + + case 1: // Interleaved A4L4 (low byte) + for (yi = y; yi < (y+height); yi++) + { + for (xi = x; xi < (x+width); xi++) + { + // Interpret as A4L4 + texel = textureRAM[yi*2048+xi]&0xFF; + c = (GLfloat) (texel&0xF) * (1.0f/15.0f); + a = (GLfloat) (texel>>4) * (1.0f/15.0f); + textureBuffer[i++] = c; + textureBuffer[i++] = c; + textureBuffer[i++] = c; + textureBuffer[i++] = a; + } + } + break; + } + + // Upload texture to correct position within texture map + glPixelStorei(GL_UNPACK_ALIGNMENT, 1); + glActiveTexture(GL_TEXTURE0 + texSheet->mapNum); // activate correct texture unit + glBindTexture(GL_TEXTURE_2D, texMapIDs[texSheet->mapNum]); // bind correct texture map + glTexSubImage2D(GL_TEXTURE_2D, 0, texSheet->xOffset + x, texSheet->yOffset + y, width, height, GL_RGBA, GL_FLOAT, textureBuffer); + + // Mark texture as decoded + texSheet->texFormat[y/32][x/32] = format; + texSheet->texWidth[y/32][x/32] = width; + texSheet->texHeight[y/32][x/32] = height; +} + +// Signals that new textures have been uploaded. Flushes model caches. Be careful not to exceed bounds! +void CLegacy3D::UploadTextures(unsigned x, unsigned y, unsigned width, unsigned height) +{ + unsigned texSheet, xi, yi; + + // Make everything red +#ifdef DEBUG + for (int i = 0; i < 512*512; ) + { + textureBuffer[i++] = 1.0f; + textureBuffer[i++] = 0.0f; + textureBuffer[i++] = 0.0f; + textureBuffer[i++] = 1.0f; + } +#endif + + // Update all texture sheets + for (texSheet = 0; texSheet < numTexSheets; texSheet++) + { + for (xi = x/32; xi < (x+width)/32; xi++) + { + for (yi = y/32; yi < (y+height)/32; yi++) + { + texSheets[texSheet].texFormat[yi][xi] = -1; + texSheets[texSheet].texWidth[yi][xi] = -1; + texSheets[texSheet].texHeight[yi][xi] = -1; + } + } + } +} + +/****************************************************************************** + Real3D Address Translation + + Functions that interpret word-granular Real3D addresses and return pointers. +******************************************************************************/ + +// Translates 24-bit culling RAM addresses +const UINT32 *CLegacy3D::TranslateCullingAddress(UINT32 addr) +{ + addr &= 0x00FFFFFF; // caller should have done this already + + if ((addr>=0x800000) && (addr<0x840000)) + return &cullingRAMHi[addr&0x3FFFF]; + else if (addr < 0x100000) + return &cullingRAMLo[addr]; + +#ifdef DEBUG + ErrorLog("TranslateCullingAddress(): invalid address: %06X", addr); +#endif + return NULL; +} + +// Translates model references +const UINT32 *CLegacy3D::TranslateModelAddress(UINT32 modelAddr) +{ + modelAddr &= 0x00FFFFFF; // caller should have done this already + + if (modelAddr < 0x100000) + return &polyRAM[modelAddr]; + else + return &vrom[modelAddr]; +} + + +/****************************************************************************** + Matrix Stack +******************************************************************************/ + +// Macro to generate column-major (OpenGL) index from y,x subscripts +#define CMINDEX(y,x) (x*4+y) + +/* + * MultMatrix(): + * + * Multiplies the matrix stack by the specified Real3D matrix. The matrix + * index is a 12-bit number specifying a matrix number relative to the base. + * The base matrix MUST be set up before calling this function. + */ +void CLegacy3D::MultMatrix(UINT32 matrixOffset) +{ + GLfloat m[4*4]; + const float *src = &matrixBasePtr[matrixOffset*12]; + + if (matrixBasePtr==NULL) // LA Machineguns + return; + m[CMINDEX(0, 0)] = src[3]; + m[CMINDEX(0, 1)] = src[4]; + m[CMINDEX(0, 2)] = src[5]; + m[CMINDEX(0, 3)] = src[0]; + m[CMINDEX(1, 0)] = src[6]; + m[CMINDEX(1, 1)] = src[7]; + m[CMINDEX(1, 2)] = src[8]; + m[CMINDEX(1, 3)] = src[1]; + m[CMINDEX(2, 0)] = src[9]; + m[CMINDEX(2, 1)] = src[10]; + m[CMINDEX(2, 2)] = src[11]; + m[CMINDEX(2, 3)] = src[2]; + m[CMINDEX(3, 0)] = 0.0; + m[CMINDEX(3, 1)] = 0.0; + m[CMINDEX(3, 2)] = 0.0; + m[CMINDEX(3, 3)] = 1.0; + + glMultMatrixf(m); +} + +/* + * InitMatrixStack(): + * + * Initializes the modelview (model space -> view space) matrix stack and + * Real3D coordinate system. These are the last transforms to be applied (and + * the first to be defined on the stack) before projection. + * + * Model 3 games tend to define the following unusual base matrix: + * + * 0 0 -1 0 + * 1 0 0 0 + * 0 -1 0 0 + * 0 0 0 1 + * + * When this is multiplied by a column vector, the output is: + * + * -Z + * X + * -Y + * 1 + * + * My theory is that the Real3D GPU accepts vectors in Z,X,Y order. The games + * store everything as X,Y,Z and perform the translation at the end. The Real3D + * also has Y and Z coordinates opposite of the OpenGL convention. This + * function inserts a compensating matrix to undo these things. + * + * NOTE: This function assumes we are in GL_MODELVIEW matrix mode. + */ + +void CLegacy3D::InitMatrixStack(UINT32 matrixBaseAddr) +{ + GLfloat m[4*4]; + + // This matrix converts vectors back from the weird Model 3 Z,X,Y ordering + // and also into OpenGL viewspace (-Y,-Z) + m[CMINDEX(0,0)]=0.0; m[CMINDEX(0,1)]=1.0; m[CMINDEX(0,2)]=0.0; m[CMINDEX(0,3)]=0.0; + m[CMINDEX(1,0)]=0.0; m[CMINDEX(1,1)]=0.0; m[CMINDEX(1,2)]=-1.0; m[CMINDEX(1,3)]=0.0; + m[CMINDEX(2,0)]=-1.0; m[CMINDEX(2,1)]=0.0; m[CMINDEX(2,2)]=0.0; m[CMINDEX(2,3)]=0.0; + m[CMINDEX(3,0)]=0.0; m[CMINDEX(3,1)]=0.0; m[CMINDEX(3,2)]=0.0; m[CMINDEX(3,3)]=1.0; + + if (step > 0x10) + glLoadMatrixf(m); + else + { + // Scaling seems to help w/ Step 1.0's extremely large coordinates + GLfloat s = 1.0f/2048.0f; + glLoadIdentity(); + glScalef(s,s,s); + glMultMatrixf(m); + } + + // Set matrix base address and apply matrix #0 (coordinate system matrix) + matrixBasePtr = (float *) TranslateCullingAddress(matrixBaseAddr); + MultMatrix(0); +} + + +/****************************************************************************** + Scene Database + + Complete scene database traversal and rendering. +******************************************************************************/ + +static bool IsVROMModel(UINT32 modelAddr) +{ + return modelAddr >= 0x100000; +} + +static bool IsDynamicModel(const UINT32 *data) +{ + if (data == NULL) + return false; + unsigned sharedVerts[16] = { 0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4 }; + // VROM models are only dynamic if they reference polygon RAM via color palette indices + bool done = false; + do + { + // Check if polygon has color palette reference, if so polygon is dynamic and can return here + if ((data[1]&2) == 0) + return true; + if (data[6] == 0) + break; + // Get number of vertices + unsigned numVerts = (data[0]&0x40 ? 4 : 3); + // Deduct number of reused verts + numVerts -= sharedVerts[data[0]&0xf]; + done = data[1]&4; + // Skip header and vertices to next polygon + data += 7 + numVerts * 4; + } + while (!done); + return false; +} + +/* + * DrawModel(): + * + * Draw the specified model (adds it to the display list). This is where vertex + * buffer overflows and display list overflows will be detected. An attempt is + * made to salvage the situation if this occurs, so if DrawModel() returns + * FAIL, it is a serious matter and rendering should be aborted for the frame. + * + * The current texture offset state, texOffset, is also used. Models are cached + * for each unique texOffset. + */ +bool CLegacy3D::DrawModel(UINT32 modelAddr) +{ + ModelCache *Cache; + const UINT32 *model; + int lutIdx; + struct VBORef *ModelRef; + + //if (modelAddr==0x7FFF00) // Fighting Vipers (this is not polygon data!) + // return; + if (modelAddr == 0x200000) // Virtual On 2 (during boot-up, causes slow-down) + return OKAY; + model = TranslateModelAddress(modelAddr); + + // Determine whether model is in polygon RAM or VROM + if (IsVROMModel(modelAddr)) + Cache = &VROMCache; + else + Cache = &PolyCache; + + // Look up the model in the LUT and cache it if necessary + lutIdx = modelAddr&0xFFFFFF; + ModelRef = LookUpModel(Cache, lutIdx, texOffset); + if (NULL == ModelRef && Cache == &VROMCache) + { + // If the model was a VROM model, it may be dynamic, so we need to try + // another lookup in the dynamic cache + ModelRef = LookUpModel(&PolyCache, lutIdx, texOffset); + if (ModelRef != NULL) + Cache = &PolyCache; + } + + if (NULL == ModelRef) + { + // Attempt to cache the model, and perform a final check to determine + // whether VROM model is in fact dynamic (this should be fixed -- models + // should be decoded to a common buffer and the cache determined + // afterwards) + if (Cache == &VROMCache && IsDynamicModel(model)) + Cache = &PolyCache; + ModelRef = CacheModel(Cache, lutIdx, texOffset, model); + if (NULL == ModelRef) + { + // Model could not be cached. Render what we have so far and try again. + DrawDisplayList(&VROMCache, POLY_STATE_NORMAL); + DrawDisplayList(&PolyCache, POLY_STATE_NORMAL); + DrawDisplayList(&VROMCache, POLY_STATE_ALPHA); + DrawDisplayList(&PolyCache, POLY_STATE_ALPHA); + ClearModelCache(&VROMCache); + ClearModelCache(&PolyCache); + + // Try caching again... + ModelRef = CacheModel(Cache, lutIdx, texOffset, model); + if (NULL == ModelRef) + return ErrorUnableToCacheModel(modelAddr); // nothing we can do :( + } + } + + // If cache is static then decode all the texture references contained in the cached model + // before rendering (models in dynamic cache will have been decoded already in CacheModel) + if (!Cache->dynamic) + ModelRef->texRefs.DecodeAllTextures(this); + + // Add to display list + return AppendDisplayList(Cache, false, ModelRef); +} + +// Descends into a 10-word culling node +void CLegacy3D::DescendCullingNode(UINT32 addr) +{ + const UINT32 *node, *lodTable; + UINT32 matrixOffset, node1Ptr, node2Ptr; + float x, y, z, oldTexOffsetX, oldTexOffsetY; + int tx, ty; + UINT16 oldTexOffset; + + ++stackDepth; + // Stack depth of 64 is too small for Star Wars Trilogy (Hoth) + if (stackDepth>=(512+64)) // safety (prevent overflows -- OpenGL matrix stack will still overflow by this point) + { + --stackDepth; + return; + } + + node = TranslateCullingAddress(addr); + if (NULL == node) + { + --stackDepth; + return; + } +//printf("%08x NODE %d\n", addr, stackDepth); +//for (int i = 0; i < 8; i++) +// printf(" %08x\n", node[i]); + + // Debug: texture offset? (NOTE: offsets 1 and 2 don't exist on step 1.0) + //if (node[0x02]&0xFFFF) + // printf("%X -> %02X %04X\n", addr, node[0x00]&0xFF, node[0x02]&0xFFFF); + + // Extract known fields + node1Ptr = node[0x07-offset]; + node2Ptr = node[0x08-offset]; + matrixOffset = node[0x03-offset]&0xFFF; + x = *(float *) &node[0x04-offset]; + y = *(float *) &node[0x05-offset]; + z = *(float *) &node[0x06-offset]; + + // Texture offset? + oldTexOffsetX = texOffsetXY[0]; // save old offsets + oldTexOffsetY = texOffsetXY[1]; + oldTexOffset = texOffset; + if (!offset) // Step 1.5+ + { + tx = 32*((node[0x02]>>7)&0x3F); + ty = 32*(node[0x02]&0x3F) + ((node[0x02]&0x4000)?1024:0); // TODO: 5 or 6 bits for Y coord? + if ((node[0x02]&0x8000)) // apply texture offsets, else retain current ones + { + texOffsetXY[0] = (GLfloat) tx; + texOffsetXY[1] = (GLfloat) ty; + texOffset = node[0x02]&0x7FFF; + //printf("Tex Offset: %d, %d (%08X %08X)\n", tx, ty, node[0x02], node1Ptr); + } + } + + // Apply matrix and translation + glPushMatrix(); + if ((node[0x00]&0x10)) // apply translation vector + glTranslatef(x,y,z); + else if (matrixOffset) // multiply matrix, if specified + MultMatrix(matrixOffset); + + // Descend down first link + if ((node[0x00]&0x08)) // 4-element LOD table + { + lodTable = TranslateCullingAddress(node1Ptr); + if (NULL != lodTable) + { + if ((node[0x03-offset]&0x20000000)) + DescendCullingNode(lodTable[0]&0xFFFFFF); + else + DrawModel(lodTable[0]&0xFFFFFF); + } + } + else + DescendNodePtr(node1Ptr); + + // Proceed to second link + glPopMatrix(); + if ((node[0x00] & 0x07) != 0x06) // seems to indicate second link is invalid (fixes circular references) + DescendNodePtr(node2Ptr); + --stackDepth; + + // Restore old texture offsets + texOffsetXY[0] = oldTexOffsetX; + texOffsetXY[1] = oldTexOffsetY; + texOffset = oldTexOffset; +} + +// A list of pointers. MAME assumes that these may only point to culling nodes. +void CLegacy3D::DescendPointerList(UINT32 addr) +{ + const UINT32 *list; + UINT32 nodeAddr; + int listEnd; + + if (listDepth > 2) // several Step 2.1 games require this safeguard + return; + + list = TranslateCullingAddress(addr); + if (NULL == list) + return; + + ++listDepth; +//printf("%08x LIST\n", addr); + // Traverse the list forward and print it out + listEnd = 0; + while (1) + { +//printf(" %08x\n", list[listEnd]); + if ((list[listEnd] & 0x02000000)) // end of list (?) + break; + + if ((list[listEnd] == 0) || (((list[listEnd])>>24) != 0)) + { + //printf("ATTENTION: Unknown list termination: %08X.\n", list[listEnd]); + listEnd--; // back up to last valid list element + break; + } + + ++listEnd; + } + + // Traverse the list backward and descend into each pointer + while (listEnd >= 0) + { + nodeAddr = list[listEnd]&0x00FFFFFF; // clear upper 8 bits to ensure this is processed as a culling node + if (!(list[listEnd]&0x01000000))//Fighting Vipers + { + if ((nodeAddr != 0) && (nodeAddr != 0x800800)) + { + DescendCullingNode(nodeAddr); + } + //else + // printf("Strange pointers encountered\n"); + } + --listEnd; + } + + --listDepth; +} + +/* + * DescendNodePtr(): + * + * The old scene traversal engine. Recursively descends into a node pointer. + */ +void CLegacy3D::DescendNodePtr(UINT32 nodeAddr) +{ + // Ignore null links + if ((nodeAddr&0x00FFFFFF) == 0) + return; + + switch ((nodeAddr>>24)&0xFF) // pointer type encoded in upper 8 bits + { + case 0x00: // culling node + DescendCullingNode(nodeAddr&0xFFFFFF); + break; + case 0x01: // model (perhaps bit 1 is a flag in this case?) + case 0x03: + DrawModel(nodeAddr&0xFFFFFF); + break; + case 0x04: // pointer list + DescendPointerList(nodeAddr&0xFFFFFF); + break; + default: + //printf("ATTENTION: Unknown pointer format: %08X\n\n", nodeAddr); + break; + } +} + +// Draws viewports of the given priority +void CLegacy3D::RenderViewport(UINT32 addr, int pri) +{ + GLfloat color[8][3] = // RGB1 translation + { + { 0.0, 0.0, 0.0 }, // off + { 0.0, 0.0, 1.0 }, // blue + { 0.0, 1.0, 0.0 }, // green + { 0.0, 1.0, 1.0 }, // cyan + { 1.0, 0.0, 0.0 }, // red + { 1.0, 0.0, 1.0 }, // purple + { 1.0, 1.0, 0.0 }, // yellow + { 1.0, 1.0, 1.0 } // white + }; + const UINT32 *vpnode; + UINT32 nextAddr, nodeAddr, matrixBase; + int curPri; + int vpX, vpY, vpWidth, vpHeight; + int spotColorIdx; + GLfloat vpTopAngle, vpBotAngle, fovYDegrees; + GLfloat scrollFog, scrollAtt; + + // Translate address and obtain pointer + vpnode = TranslateCullingAddress(addr); + if (NULL == vpnode) + return; + + curPri = (vpnode[0x00] >> 3) & 3; // viewport priority + nextAddr = vpnode[0x01] & 0xFFFFFF; // next viewport + nodeAddr = vpnode[0x02]; // scene database node pointer + + // Recursively process next viewport + if (vpnode[0x01] == 0) // memory probably hasn't been set up yet, abort + return; + if (vpnode[0x01] != 0x01000000) + RenderViewport(vpnode[0x01],pri); + + // If the priority doesn't match, do not process + if (curPri != pri) + return; + + // Fetch viewport parameters (TO-DO: would rounding make a difference?) + vpX = (vpnode[0x1A]&0xFFFF)>>4; // viewport X (12.4 fixed point) + vpY = (vpnode[0x1A]>>20)&0xFFF; // viewport Y (12.4) + vpWidth = (vpnode[0x14]&0xFFFF)>>2; // width (14.2) + vpHeight = (vpnode[0x14]>>18)&0x3FFF; // height (14.2) + matrixBase = vpnode[0x16]&0xFFFFFF; // matrix base address + + // Field of view and clipping + vpTopAngle = (float) asin(*(float *)&vpnode[0x0E]); // FOV Y upper half-angle (radians) + vpBotAngle = (float) asin(*(float *)&vpnode[0x12]); // FOV Y lower half-angle + fovYDegrees = (vpTopAngle+vpBotAngle)*(float)(180.0/3.14159265358979323846); + // TO-DO: investigate clipping planes + + // Set up viewport and projection (TO-DO: near and far clipping) + glMatrixMode(GL_PROJECTION); + glLoadIdentity(); + if (g_Config.wideScreen && (vpX==0) && (vpWidth>=495) && (vpY==0) && (vpHeight >= 383)) // only expand viewports that occupy whole screen + { + // Wide screen hack only modifies X axis and not the Y FOV + viewportX = 0; + viewportY = yOffs + (GLint) ((float)(384-(vpY+vpHeight))*yRatio); + viewportWidth = totalXRes; + viewportHeight = (GLint) ((float)vpHeight*yRatio); + gluPerspective(fovYDegrees,(GLfloat)viewportWidth/(GLfloat)viewportHeight,0.1f,1e5); // use actual full screen ratio to get proper X FOV + //printf("viewportX=%d, viewportY=%d, viewportWidth=%d, viewportHeight=%d\tvpY=%d vpHeight=%d\n", viewportX, viewportY, viewportWidth, viewportHeight, vpY,vpHeight); + } + else + { + viewportX = xOffs + (GLint) ((float)vpX*xRatio); + viewportY = yOffs + (GLint) ((float)(384-(vpY+vpHeight))*yRatio); + viewportWidth = (GLint) ((float)vpWidth*xRatio); + viewportHeight = (GLint) ((float)vpHeight*yRatio); + gluPerspective(fovYDegrees,(GLfloat)vpWidth/(GLfloat)vpHeight,0.1f,1e5); // use Model 3 viewport ratio + } + + // Lighting (note that sun vector points toward sun -- away from vertex) + lightingParams[0] = *(float *) &vpnode[0x05]; // sun X + lightingParams[1] = *(float *) &vpnode[0x06]; // sun Y + lightingParams[2] = *(float *) &vpnode[0x04]; // sun Z + lightingParams[3] = *(float *) &vpnode[0x07]; // sun intensity + lightingParams[4] = (float) ((vpnode[0x24]>>8)&0xFF) * (1.0f/255.0f); // ambient intensity + lightingParams[5] = 0.0; // reserved + + // Spotlight + spotColorIdx = (vpnode[0x20]>>11)&7; // spotlight color index + spotEllipse[0] = (float) ((vpnode[0x1E]>>3)&0x1FFF); // spotlight X position (fractional component?) + spotEllipse[1] = (float) ((vpnode[0x1D]>>3)&0x1FFF); // spotlight Y + spotEllipse[2] = (float) ((vpnode[0x1E]>>16)&0xFFFF); // spotlight X size (16-bit? May have fractional component below bit 16) + spotEllipse[3] = (float) ((vpnode[0x1D]>>16)&0xFFFF); // spotlight Y size + spotRange[0] = 1.0f/(*(float *) &vpnode[0x21]); // spotlight start + spotRange[1] = *(float *) &vpnode[0x1F]; // spotlight extent + spotColor[0] = color[spotColorIdx][0]; // spotlight color + spotColor[1] = color[spotColorIdx][1]; + spotColor[2] = color[spotColorIdx][2]; + //printf("(%g,%g),(%g,%g),(%g,%g) -> \n", spotEllipse[0], spotEllipse[1], spotEllipse[2], spotEllipse[3], spotRange[0], spotRange[1]); + + // Spotlight is applied on a per pixel basis, must scale its position and size to screen + spotEllipse[1] = 384.0f-spotEllipse[1]; + spotRange[1] += spotRange[0]; // limit + spotEllipse[2] = 496.0f/sqrt(spotEllipse[2]); // spotlight appears to be specified in terms of physical resolution (unconfirmed) + spotEllipse[3] = 384.0f/sqrt(spotEllipse[3]); + + // Scale the spotlight to the OpenGL viewport + spotEllipse[0] = spotEllipse[0]*xRatio + xOffs; + spotEllipse[1] = spotEllipse[1]*yRatio + yOffs; + spotEllipse[2] *= xRatio; + spotEllipse[3] *= yRatio; + + // Fog + fogParams[0] = (float) ((vpnode[0x22]>>16)&0xFF) * (1.0f/255.0f); // fog color R + fogParams[1] = (float) ((vpnode[0x22]>>8)&0xFF) * (1.0f/255.0f); // fog color G + fogParams[2] = (float) ((vpnode[0x22]>>0)&0xFF) * (1.0f/255.0f); // fog color B + fogParams[3] = *(float *) &vpnode[0x23]; // fog density + fogParams[4] = (float) (INT16) (vpnode[0x25]&0xFFFF)*(1.0f/255.0f); // fog start + if (ISINF(fogParams[3]) || ISNAN(fogParams[3]) || ISINF(fogParams[4]) || ISNAN(fogParams[4])) // Star Wars Trilogy + fogParams[3] = fogParams[4] = 0.0f; + + // Unknown light/fog parameters + scrollFog = (float) (vpnode[0x20]&0xFF) * (1.0f/255.0f); // scroll fog + scrollAtt = (float) (vpnode[0x24]&0xFF) * (1.0f/255.0f); // scroll attenuation + //printf("scrollFog = %g, scrollAtt = %g\n", scrollFog, scrollAtt); + //printf("Fog: R=%02X G=%02X B=%02X density=%g (%X) %d start=%g\n", ((vpnode[0x22]>>16)&0xFF), ((vpnode[0x22]>>8)&0xFF), ((vpnode[0x22]>>0)&0xFF), fogParams[3], vpnode[0x23], (fogParams[3]==fogParams[3]), fogParams[4]); + + // Clear texture offsets before proceeding + texOffsetXY[0] = 0.0; + texOffsetXY[1] = 0.0; + texOffset = 0x0000; + + // Set up coordinate system and base matrix + glMatrixMode(GL_MODELVIEW); + InitMatrixStack(matrixBase); + + // Safeguard: weird coordinate system matrices usually indicate scenes that will choke the renderer + if (NULL != matrixBasePtr) + { + float m21, m32, m13; + + // Get the three elements that are usually set and see if their magnitudes are 1 + m21 = matrixBasePtr[6]; + m32 = matrixBasePtr[10]; + m13 = matrixBasePtr[5]; + + m21 *= m21; + m32 *= m32; + m13 *= m13; + + if ((m21>1.05) || (m21<0.95)) + return; + if ((m32>1.05) || (m32<0.95)) + return; + if ((m13>1.05) || (m13<0.95)) + return; + } + + // Render + AppendDisplayList(&VROMCache, true, 0); // add a viewport display list node + AppendDisplayList(&PolyCache, true, 0); + stackDepth = 0; + listDepth = 0; + + // Descend down the node link: Use recursive traversal + DescendNodePtr(nodeAddr); +} + +void CLegacy3D::RenderFrame(void) +{ + // Begin frame + ClearErrors(); // must be cleared each frame + //printf("BEGIN FRAME\n"); + + // Z buffering (Z buffer is cleared by display list viewport nodes) + glDepthFunc(GL_LESS); + glEnable(GL_DEPTH_TEST); + + // Bind Real3D shader program and texture maps + glUseProgram(shaderProgram); + for (unsigned mapNum = 0; mapNum < numTexMaps; mapNum++) + { + // Map Model3 format to texture unit and texture unit to texture sheet number + glActiveTexture(GL_TEXTURE0 + mapNum); // activate correct texture unit + glBindTexture(GL_TEXTURE_2D, texMapIDs[mapNum]); // bind correct texture sheet + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); // fragment shader performs its own interpolation + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + } + + // Enable VBO client states + glEnableClientState(GL_VERTEX_ARRAY); + glEnableClientState(GL_NORMAL_ARRAY); + glEnableClientState(GL_COLOR_ARRAY); + glEnableClientState(GL_TEXTURE_COORD_ARRAY); + if (subTextureLoc != -1) glEnableVertexAttribArray(subTextureLoc); + if (texParamsLoc != -1) glEnableVertexAttribArray(texParamsLoc); + if (texFormatLoc != -1) glEnableVertexAttribArray(texFormatLoc); + if (texMapLoc != -1) glEnableVertexAttribArray(texMapLoc); + if (transLevelLoc != -1) glEnableVertexAttribArray(transLevelLoc); + if (lightEnableLoc != -1) glEnableVertexAttribArray(lightEnableLoc); + if (shininessLoc != -1) glEnableVertexAttribArray(shininessLoc); + if (fogIntensityLoc != -1) glEnableVertexAttribArray(fogIntensityLoc); + + // Draw + //ClearModelCache(&VROMCache); // debug + ClearModelCache(&PolyCache); + for (int pri = 0; pri <= 3; pri++) + { + glClear(GL_DEPTH_BUFFER_BIT); + //ClearModelCache(&PolyCache); + ClearDisplayList(&PolyCache); + ClearDisplayList(&VROMCache); + RenderViewport(0x800000,pri); + DrawDisplayList(&VROMCache, POLY_STATE_NORMAL); + DrawDisplayList(&PolyCache, POLY_STATE_NORMAL); + DrawDisplayList(&VROMCache, POLY_STATE_ALPHA); + DrawDisplayList(&PolyCache, POLY_STATE_ALPHA); + } + glFrontFace(GL_CW); // restore front face + + // Disable VBO client states + if (fogIntensityLoc != -1) glDisableVertexAttribArray(fogIntensityLoc); + if (shininessLoc != -1) glDisableVertexAttribArray(shininessLoc); + if (lightEnableLoc != -1) glDisableVertexAttribArray(lightEnableLoc); + if (transLevelLoc != -1) glDisableVertexAttribArray(transLevelLoc); + if (texMapLoc != -1) glDisableVertexAttribArray(texMapLoc); + if (texFormatLoc != -1) glDisableVertexAttribArray(texFormatLoc); + if (texParamsLoc != -1) glDisableVertexAttribArray(texParamsLoc); + if (subTextureLoc != -1) glDisableVertexAttribArray(subTextureLoc); + glDisableClientState(GL_COLOR_ARRAY); + glDisableClientState(GL_TEXTURE_COORD_ARRAY); + glDisableClientState(GL_NORMAL_ARRAY); + glDisableClientState(GL_VERTEX_ARRAY); +} + +void CLegacy3D::EndFrame(void) +{ +} + +void CLegacy3D::BeginFrame(void) +{ +//printf("--- BEGIN FRAME ---\n"); +} + + +/****************************************************************************** + Configuration, Initialization, and Shutdown +******************************************************************************/ + +void CLegacy3D::AttachMemory(const UINT32 *cullingRAMLoPtr, const UINT32 *cullingRAMHiPtr, const UINT32 *polyRAMPtr, const UINT32 *vromPtr, const UINT16 *textureRAMPtr) +{ + cullingRAMLo = cullingRAMLoPtr; + cullingRAMHi = cullingRAMHiPtr; + polyRAM = polyRAMPtr; + vrom = vromPtr; + textureRAM = textureRAMPtr; + DebugLog("Legacy3D attached Real3D memory regions\n"); +} + +void CLegacy3D::SetStep(int stepID) +{ + step = stepID; + + if ((step!=0x10) && (step!=0x15) && (step!=0x20) && (step!=0x21)) + { + DebugLog("Legacy3D: Unrecognized stepping: %d.%d\n", (step>>4)&0xF, step&0xF); + step = 0x10; + } + + if (step > 0x10) + { + offset = 0; // culling nodes are 10 words + vertexFactor = (1.0f/2048.0f); // vertices are in 13.11 format + } + else + { + offset = 2; // 8 words + vertexFactor = (1.0f/128.0f); // 17.7 + } + + DebugLog("Legacy3D set to Step %d.%d\n", (step>>4)&0xF, step&0xF); +} + +bool CLegacy3D::Init(unsigned xOffset, unsigned yOffset, unsigned xRes, unsigned yRes, unsigned totalXResParam, unsigned totalYResParam) +{ + // Allocate memory for texture buffer + textureBuffer = new(std::nothrow) GLfloat[512*512*4]; + if (NULL == textureBuffer) + return ErrorLog("Insufficient memory for texture decode buffer."); + + glGetError(); // clear error flag + + // Create model caches and VBOs + if (CreateModelCache(&VROMCache, NUM_STATIC_VERTS, NUM_LOCAL_VERTS, NUM_STATIC_MODELS, 0x4000000/4, NUM_DISPLAY_LIST_ITEMS, false)) + return FAIL; + if (CreateModelCache(&PolyCache, NUM_DYNAMIC_VERTS, NUM_LOCAL_VERTS, NUM_DYNAMIC_MODELS, 0x4000000/4, NUM_DISPLAY_LIST_ITEMS, true)) + return FAIL; + + // Initialize lighting parameters (updated as viewports are traversed) + lightingParams[0] = 0.0; + lightingParams[1] = 0.0; + lightingParams[2] = 0.0; + lightingParams[3] = 0.0; + lightingParams[4] = 1.0; // full ambient intensity in case we want to render a standalone model + lightingParams[5] = 0.0; + + // Resolution and offset within physical display area + xRatio = (GLfloat) xRes / 496.0f; + yRatio = (GLfloat) yRes / 384.0f; + xOffs = xOffset; + yOffs = yOffset; + totalXRes = totalXResParam; + totalYRes = totalYResParam; + + // Get ideal number of texture sheets required by default mapping from Model3 texture format to texture sheet + unsigned idealTexSheets = 0; + for (unsigned fmt = 0; fmt < 8; fmt++) + { + int sheetNum = defaultFmtToTexSheetNum[fmt]; + idealTexSheets = max(idealTexSheets, sheetNum + 1); + } + + // Get upper limit for number of texture maps to use from max number of texture units supported by video card + GLint glMaxTexUnits; + glGetIntegerv(GL_MAX_TEXTURE_IMAGE_UNITS, &glMaxTexUnits); + unsigned maxTexMaps = max(1, min(g_Config.maxTexMaps, glMaxTexUnits)); + + // Get upper limit for extent of texture maps to use from max texture size supported by video card + GLint maxTexSize; + glGetIntegerv(GL_MAX_TEXTURE_SIZE, &maxTexSize); + unsigned mapExtent = max(1, min(g_Config.maxTexMapExtent, maxTexSize / 2048)); + unsigned mapSize = 2048 * mapExtent; + while (mapExtent > 1) + { + if ((mapExtent - 1) * (mapExtent - 1) < idealTexSheets) + { + // Use a GL proxy texture to double check max texture size returned above + glTexImage2D(GL_PROXY_TEXTURE_2D, 0, GL_RGBA8, mapSize, mapSize, 0, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1, NULL); + GLint glTexWidth; + glGetTexLevelParameteriv(GL_PROXY_TEXTURE_2D, 0, GL_TEXTURE_WIDTH, &glTexWidth); + if (glTexWidth == mapSize) + break; + } + mapExtent--; + mapSize -= 2048; + } + + // Load shaders, using multi-sheet shader if requested. + const char *vsFile = g_Config.vertexShaderFile.size() ? g_Config.vertexShaderFile.c_str() : NULL; + const char *fsFile = g_Config.fragmentShaderFile.size() ? g_Config.fragmentShaderFile.c_str() : NULL; + const char *fragmentShaderSource = (g_Config.multiTexture ? fragmentShaderMultiSheetSource : fragmentShaderSingleSheetSource); // single texture shader + if (OKAY != LoadShaderProgram(&shaderProgram,&vertexShader,&fragmentShader,vsFile,fsFile,vertexShaderSource,fragmentShaderSource)) + return FAIL; + + // Try locating default "textureMap" uniform in shader program + glUseProgram(shaderProgram); // bind program + textureMapLoc = glGetUniformLocation(shaderProgram, "textureMap"); + + // If exists, bind to first texture unit + unsigned mapCount = 0; + if (textureMapLoc != -1) + glUniform1i(textureMapLoc, mapCount++); + + // Try locating "textureMap[0-7]" uniforms in shader program + for (unsigned mapNum = 0; mapNum < 8 && mapCount < maxTexMaps; mapNum++) + { + char uniformName[12]; + sprintf(uniformName, "textureMap%u", mapNum); + textureMapLocs[mapNum] = glGetUniformLocation(shaderProgram, uniformName); + // If exist, bind to remaining texture units + if (textureMapLocs[mapNum] != -1) + glUniform1i(textureMapLocs[mapNum], mapCount++); + } + + // Check sucessully located at least one "textureMap" uniform in shader program + if (mapCount == 0) + return ErrorLog("Fragment shader must contain at least one 'textureMap' uniform."); + InfoLog("Located and bound %u 'textureMap' uniform(s) in fragment shader.", mapCount); + + // Readjust map extent so as to utilise as many texture maps found in shader program as possible + while (mapExtent > 1 && mapCount * (mapExtent - 1) * (mapExtent - 1) >= idealTexSheets) + { + mapExtent--; + mapSize -= 2048; + } + + // Create required number of GL textures for texture maps, decreasing map extent if memory is insufficent + unsigned sheetsPerMap = mapExtent * mapExtent; + while (true) + { + numTexMaps = min(mapCount, 1 + (idealTexSheets - 1) / sheetsPerMap); + + glPixelStorei(GL_UNPACK_ALIGNMENT, 1); + glGenTextures(numTexMaps, texMapIDs); + bool okay = true; + for (unsigned mapNum = 0; mapNum < numTexMaps; mapNum++) + { + glActiveTexture(GL_TEXTURE0 + mapNum); // activate correct texture unit + glBindTexture(GL_TEXTURE_2D, texMapIDs[mapNum]); // bind correct texture sheet + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); // fragment shader performs its own interpolation + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, mapSize, mapSize, 0, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1, 0); + if (glGetError() != GL_NO_ERROR) + { + // Ran out of video memory or texture size is too large + numTexMaps = mapNum; + okay = false; + break; + } + } + if (okay || mapExtent == 1) + break; + + // Delete textures, decrease extent and try again + glDeleteTextures(numTexMaps, texMapIDs); + mapExtent--; + mapSize -= 2048; + sheetsPerMap = mapExtent * mapExtent; + } + + // Check successfully created at least one texture map + if (numTexMaps == 0) + return ErrorLog("OpenGL was unable to provide any 2048x2048-texel texture maps."); + InfoLog("Created %u %ux%u-texel GL texture map(s).", numTexMaps, mapSize, mapSize); + + // Create texture sheet objects and assign them to texture maps + numTexSheets = min(numTexMaps * sheetsPerMap, idealTexSheets); + texSheets = new(std::nothrow) TexSheet[numTexSheets]; + if (texSheets == NULL) + return ErrorLog("Unable to assign memory for %u texture sheet objects.", numTexSheets); + for (unsigned sheetNum = 0; sheetNum < numTexSheets; sheetNum++) + { + unsigned mapNum = sheetNum / sheetsPerMap; + unsigned posInMap = sheetNum % sheetsPerMap; + texSheets[sheetNum].sheetNum = sheetNum; + texSheets[sheetNum].mapNum = mapNum; + texSheets[sheetNum].xOffset = 2048 * (posInMap % mapExtent); + texSheets[sheetNum].yOffset = 2048 * (posInMap / mapExtent); + } + + // Assign Model3 texture formats to texture sheets (cannot just use default mapping as may have ended up with fewer + // texture sheets than anticipated) + for (unsigned fmt = 0; fmt < 8; fmt++) + { + int sheetNum = defaultFmtToTexSheetNum[fmt] % numTexSheets; + fmtToTexSheet[fmt] = &texSheets[sheetNum]; + } + + InfoLog("Mapped %u Model3 texture formats to %u texture sheet(s) in %u %ux%u-texel texture map(s).", 8, numTexSheets, numTexMaps, mapSize, mapSize); + + // Get location of the rest of the uniforms + modelViewMatrixLoc = glGetUniformLocation(shaderProgram,"modelViewMatrix"); + projectionMatrixLoc = glGetUniformLocation(shaderProgram,"projectionMatrix"); + lightingLoc = glGetUniformLocation(shaderProgram, "lighting"); + mapSizeLoc = glGetUniformLocation(shaderProgram, "mapSize"); + spotEllipseLoc = glGetUniformLocation(shaderProgram, "spotEllipse"); + spotRangeLoc = glGetUniformLocation(shaderProgram, "spotRange"); + spotColorLoc = glGetUniformLocation(shaderProgram, "spotColor"); + + // Get locations of custom vertex attributes + subTextureLoc = glGetAttribLocation(shaderProgram,"subTexture"); + texParamsLoc = glGetAttribLocation(shaderProgram,"texParams"); + texFormatLoc = glGetAttribLocation(shaderProgram,"texFormat"); + texMapLoc = glGetAttribLocation(shaderProgram,"texMap"); + transLevelLoc = glGetAttribLocation(shaderProgram,"transLevel"); + lightEnableLoc = glGetAttribLocation(shaderProgram,"lightEnable"); + shininessLoc = glGetAttribLocation(shaderProgram,"shininess"); + fogIntensityLoc = glGetAttribLocation(shaderProgram,"fogIntensity"); + + // Set map size + if (mapSizeLoc != -1) + glUniform1f(mapSizeLoc, (GLfloat)mapSize); + + // Additional OpenGL stuff + glFrontFace(GL_CW); // polygons are uploaded w/ clockwise winding + glCullFace(GL_BACK); + glEnable(GL_CULL_FACE); + glClearDepth(1.0); + glEnable(GL_TEXTURE_2D); + glMatrixMode(GL_MODELVIEW); + glLoadIdentity(); + + // Mark all textures as dirty + UploadTextures(0,0,2048,2048); + + DebugLog("Legacy3D initialized\n"); + return OKAY; +} + +CLegacy3D::CLegacy3D(void) +{ + cullingRAMLo = NULL; + cullingRAMHi = NULL; + polyRAM = NULL; + vrom = NULL; + textureRAM = NULL; + textureBuffer = NULL; + texSheets = NULL; + + // Clear model cache pointers so we can safely destroy them if init fails + for (int i = 0; i < 2; i++) + { + VROMCache.verts[i] = NULL; + PolyCache.verts[i] = NULL; + VROMCache.Models = NULL; + PolyCache.Models = NULL; + VROMCache.lut = NULL; + PolyCache.lut = NULL; + VROMCache.List = NULL; + PolyCache.List = NULL; + VROMCache.ListHead[i] = NULL; + PolyCache.ListHead[i] = NULL; + VROMCache.ListTail[i] = NULL; + PolyCache.ListTail[i] = NULL; + } + + DebugLog("Built Legacy3D\n"); +} + +CLegacy3D::~CLegacy3D(void) +{ + DestroyShaderProgram(shaderProgram,vertexShader,fragmentShader); + if (glBindBuffer != NULL) // we may have failed earlier due to lack of OpenGL 2.0 functions + glBindBuffer(GL_ARRAY_BUFFER, 0); // disable VBOs by binding to 0 + glDeleteTextures(numTexMaps, texMapIDs); + + DestroyModelCache(&VROMCache); + DestroyModelCache(&PolyCache); + + cullingRAMLo = NULL; + cullingRAMHi = NULL; + polyRAM = NULL; + vrom = NULL; + textureRAM = NULL; + + if (texSheets != NULL) + delete [] texSheets; + + if (textureBuffer != NULL) + delete [] textureBuffer; + textureBuffer = NULL; + + DebugLog("Destroyed Legacy3D\n"); +} + +} // Legacy3D diff --git a/Src/Graphics/Legacy3D/Legacy3D.h b/Src/Graphics/Legacy3D/Legacy3D.h new file mode 100644 index 0000000..2e3a904 --- /dev/null +++ b/Src/Graphics/Legacy3D/Legacy3D.h @@ -0,0 +1,485 @@ +/** + ** Supermodel + ** A Sega Model 3 Arcade Emulator. + ** Copyright 2011 Bart Trzynadlowski, Nik Henson + ** + ** This file is part of Supermodel. + ** + ** Supermodel is free software: you can redistribute it and/or modify it under + ** the terms of the GNU General Public License as published by the Free + ** Software Foundation, either version 3 of the License, or (at your option) + ** any later version. + ** + ** Supermodel is distributed in the hope that it will be useful, but WITHOUT + ** ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + ** FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + ** more details. + ** + ** You should have received a copy of the GNU General Public License along + ** with Supermodel. If not, see . + **/ + +/* + * Legacy3D.h + * + * Header file defining the CLegacy3D class: Supermodel's original OpenGL + * Real3D graphics engine. + */ + +#ifndef INCLUDED_LEGACY3D_H +#define INCLUDED_LEGACY3D_H + +#include "Graphics/IRender3D.h" +#include "Pkgs/glew.h" + +namespace Legacy3D { + +/****************************************************************************** + Internal Definitions and Data Structures + + NOTE: These should probably be moved inside the Legacy3D namespace at some + point. +******************************************************************************/ + +// Model caches sort models by alpha (translucency) state +enum POLY_STATE +{ + POLY_STATE_NORMAL = 0, + POLY_STATE_ALPHA +}; + +struct Vertex +{ + GLfloat x,y,z; // vertex + GLfloat n[3]; // normal X, Y, Z + GLfloat u,v; // texture U, V coordinates (in texels, relative to selected texture) +}; + +struct Poly +{ + Vertex Vert[4]; + GLfloat n[3]; // polygon normal (used for backface culling) + POLY_STATE state; // alpha or normal? + unsigned numVerts; // triangle (3) or quad (4) + const UINT32 *header; // pointer to Real3D 7-word polygon header +}; + +/* + * VBORef: + * + * Reference to model polygons stored in a VBO. Each reference has two sets of + * vertices: normal and alpha. Copies of the model with different texture + * offsets applied are searchable via the linked list of texture offset states. + */ + +struct VBORef +{ + unsigned index[2]; // index of model polygons in VBO + unsigned numVerts[2]; // number of vertices + unsigned lutIdx; // LUT index associated with this model (for fast LUT clearing) + + struct VBORef *nextTexOffset; // linked list of models with different texture offset states + UINT16 texOffset; // texture offset data for this model + + CTextureRefs texRefs; // unique texture references contained in this model + + /* + * Clear(): + * + * Clears the VBORef by setting all fields to 0 and clearing the texture + * references. + */ + inline void Clear(void) + { + texRefs.Clear(); + lutIdx = 0; + texOffset = 0; + nextTexOffset = NULL; + for (int i = 0; i < 2; i++) + { + index[i] = 0; + numVerts[i] = 0; + } + } +}; + +// Display list items: model instances and viewport settings +struct DisplayList +{ + bool isViewport; // if true, this is a viewport node + + union + { + // Viewport data + struct + { + GLfloat projectionMatrix[4*4]; // projection matrix + GLfloat lightingParams[6]; // lighting parameters (see RenderViewport() and vertex shader) + GLfloat spotEllipse[4]; // spotlight ellipse (see RenderViewport()) + GLfloat spotRange[2]; // Z range + GLfloat spotColor[3]; // color + GLfloat fogParams[5]; // fog parameters (...) + GLint x, y; // viewport coordinates (scaled and in OpenGL format) + GLint width, height; // viewport dimensions (scaled for display surface size) + } Viewport; + + // Model data + struct + { + GLfloat modelViewMatrix[4*4]; // model-view matrix + unsigned index; // index in VBO + unsigned numVerts; // number of vertices + GLint frontFace; // GL_CW (default), GL_CCW, or -GL_CW to indicate no culling + } Model; + } Data; + + DisplayList *next; // next display list item with the same state (alpha or non-alpha) +}; + +/* + * ModelCache: + * + * A model cache tracks all models in a particular region (ie., VROM or polygon + * RAM). It contains a look-up table to quickly obtain VBO indices. Be careful + * when accessing the LUT, there are some special cases. + * + * If the model cache is marked dynamic, cached models may not necessarily be + * retained. Clearing the model cache is also much faster. The LUT entry for + * the last model cached will be valid, but because the LUT may not be + * cleared, one cannot assume a model exists because there is a LUT entry + * pointing to it. Always use NeedToCache() to determine whether caching is + * necessary before reading the LUT! + */ +struct ModelCache +{ + // Cache type + bool dynamic; + + // Vertex buffer object + unsigned vboMaxOffset; // size of VBO (in bytes) + unsigned vboCurOffset; // current offset in VBO (in bytes) + GLuint vboID; // OpenGL VBO handle + + // Local vertex buffers (enough for a single model) + unsigned maxVertIdx; // size of each local vertex buffer (in vertices) + unsigned curVertIdx[2]; // current vertex index (in vertices) + GLfloat *verts[2]; + + // Array of cached models + unsigned maxModels; // maximum number of models + unsigned numModels; // current number stored + VBORef *Models; + + /* + * Look-Up Table: + * + * Can be accessed directly with a LUT index to determine the model index. + * However, it should not be used to determine whether a model needs to be + * cached. Use NeedToCache() instead. A valid index, for example, may still + * have to be re-cached if the model cache is dynamic (polygon RAM). + */ + unsigned lutSize; // number of elements in LUT + INT16 *lut; // stores indices into Models[] or -1 if not yet cached + + // Display list + unsigned maxListSize; // maximum number of display list items + unsigned listSize; // number of items in display list + DisplayList *List; // holds all display list items + DisplayList *ListHead[2]; // heads of linked lists for each state + DisplayList *ListTail[2]; // current tail node for each state +}; + +struct TexSheet +{ + unsigned sheetNum; + unsigned mapNum; + unsigned xOffset; + unsigned yOffset; + + /* + * Texture Format Buffer + * + * Records the format that a texture (at a given location within the + * texture sheet) is currently stored in. A negative value indicates the + * texture has not been accessed and converted yet and non-negative values + * correspond to the texture format bits in the polygon headers. They can + * be used to determine whether a texture needs to be updated. + */ + int texWidth[2048/32][2048/32]; + int texHeight[2048/32][2048/32]; + INT8 texFormat[2048/32][2048/32]; +}; + +/****************************************************************************** + CLegacy3D Classes +******************************************************************************/ + +/* + * CLegacy3DConfig: + * + * Settings used by CLegacy3D. + */ +class CLegacy3DConfig +{ +public: + string vertexShaderFile; // path to vertex shader or "" to use internal shader + string fragmentShaderFile; // fragment shader + unsigned maxTexMaps; // maximum number of texture maps to use (1-9) + unsigned maxTexMapExtent; // maximum extent of texture maps (where num of tex sheets per map = extent ^ 2) + bool multiTexture; // if enabled and no external fragment shader, select internal shader w/ multiple texture sheet support + + // Defaults + CLegacy3DConfig(void) + { + // strings will be clear to begin with + maxTexMaps = 9; + maxTexMapExtent = 4; + multiTexture = false; + } +}; + +/* + * CLegacy3D: + * + * 3D renderer. Lots of work to do here :) + */ +class CLegacy3D: public IRender3D +{ + friend class CTextureRefs; + +public: + /* + * RenderFrame(void): + * + * Renders the complete scene database. Must be called between BeginFrame() and + * EndFrame(). This function traverses the scene database and builds up display + * lists. + */ + void RenderFrame(void); + + /* + * BeginFrame(void): + * + * Prepare to render a new frame. Must be called once per frame prior to + * drawing anything. + */ + void BeginFrame(void); + + /* + * EndFrame(void): + * + * Signals the end of rendering for this frame. Must be called last during + * the frame. + */ + void EndFrame(void); + + /* + * UploadTextures(x, y, width, height): + * + * Signals that a portion of texture RAM has been updated. + * + * Parameters: + * x X position within texture RAM. + * y Y position within texture RAM. + * width Width of texture data in texels. + * height Height. + */ + void UploadTextures(unsigned x, unsigned y, unsigned width, unsigned height); + + /* + * AttachMemory(cullingRAMLoPtr, cullingRAMHiPtr, polyRAMPtr, vromPtr, + * textureRAMPtr): + * + * Attaches RAM and ROM areas. This must be done prior to any rendering + * otherwise the program may crash with an access violation. + * + * Parameters: + * cullingRAMLoPtr Pointer to low culling RAM (4 MB). + * cullingRAMHiPtr Pointer to high culling RAM (1 MB). + * polyRAMPtr Pointer to polygon RAM (4 MB). + * vromPtr Pointer to video ROM (64 MB). + * textureRAMPtr Pointer to texture RAM (8 MB). + */ + void AttachMemory(const UINT32 *cullingRAMLoPtr, + const UINT32 *cullingRAMHiPtr, const UINT32 *polyRAMPtr, + const UINT32 *vromPtr, const UINT16 *textureRAMPtr); + + /* + * SetStep(stepID): + * + * Sets the Model 3 hardware stepping, which also determines the Real3D + * functionality. The default is Step 1.0. This should be called prior to + * any other emulation functions and after Init(). + * + * Parameters: + * stepID 0x10 for Step 1.0, 0x15 for Step 1.5, 0x20 for Step 2.0, + * or 0x21 for Step 2.1. Anything else defaults to 1.0. + */ + void SetStep(int stepID); + + /* + * Init(xOffset, yOffset, xRes, yRes, totalXRes, totalYRes): + * + * One-time initialization of the context. Must be called before any other + * members (meaning it should be called even before being attached to any + * other objects that want to use it). + * + * External shader files are loaded according to configuration settings. + * + * Parameters: + * xOffset X offset of the viewable area within OpenGL display + * surface, in pixels. + * yOffset Y offset. + * xRes Horizontal resolution of the viewable area. + * yRes Vertical resolution. + * totalXRes Horizontal resolution of the complete display area. + * totalYRes Vertical resolution. + * + * Returns: + * OKAY is successful, otherwise FAILED if a non-recoverable error + * occurred. Any allocated memory will not be freed until the + * destructor is called. Prints own error messages. + */ + bool Init(unsigned xOffset, unsigned yOffset, unsigned xRes, unsigned yRes, unsigned totalXRes, unsigned totalYRes); + + /* + * CLegacy3D(void): + * ~CLegacy3D(void): + * + * Constructor and destructor. + */ + CLegacy3D(void); + ~CLegacy3D(void); + +private: + /* + * Private Members + */ + + // Real3D address translation + const UINT32 *TranslateCullingAddress(UINT32 addr); + const UINT32 *TranslateModelAddress(UINT32 addr); + + // Model caching and display list management + void DrawDisplayList(ModelCache *Cache, POLY_STATE state); + bool AppendDisplayList(ModelCache *Cache, bool isViewport, const struct VBORef *Model); + void ClearDisplayList(ModelCache *Cache); + bool InsertPolygon(ModelCache *cache, const Poly *p); + void InsertVertex(ModelCache *cache, const Vertex *v, const Poly *p, float normFlip); + struct VBORef *BeginModel(ModelCache *cache); + void EndModel(ModelCache *cache, struct VBORef *Model, int lutIdx, UINT16 texOffset); + struct VBORef *CacheModel(ModelCache *cache, int lutIdx, UINT16 texOffset, const UINT32 *data); + struct VBORef *LookUpModel(ModelCache *cache, int lutIdx, UINT16 texOffset); + void ClearModelCache(ModelCache *cache); + bool CreateModelCache(ModelCache *cache, unsigned vboMaxVerts, unsigned localMaxVerts, unsigned maxNumModels, unsigned numLUTEntries, unsigned displayListSize, bool isDynamic); + void DestroyModelCache(ModelCache *cache); + + // Texture management + void DecodeTexture(int format, int x, int y, int width, int height); + + // Matrix stack + void MultMatrix(UINT32 matrixOffset); + void InitMatrixStack(UINT32 matrixBaseAddr); + + // Scene database traversal + bool DrawModel(UINT32 modelAddr); + void DescendCullingNode(UINT32 addr); + void DescendPointerList(UINT32 addr); + void DescendNodePtr(UINT32 nodeAddr); + void RenderViewport(UINT32 addr, int pri); + + // In-frame error reporting + bool ErrorLocalVertexOverflow(void); + bool ErrorUnableToCacheModel(UINT32 modelAddr); + void ClearErrors(void); + + /* + * Data + */ + + // Stepping + int step; + int offset; // offset to subtract for words 3 and higher of culling nodes + GLfloat vertexFactor; // fixed-point conversion factor for vertices + + // Memory (passed from outside) + const UINT32 *cullingRAMLo; // 4 MB + const UINT32 *cullingRAMHi; // 1 MB + const UINT32 *polyRAM; // 4 MB + const UINT32 *vrom; // 64 MB + const UINT16 *textureRAM; // 8 MB + + // Error reporting + unsigned errorMsgFlags; // tracks which errors have been printed this frame + + // Real3D Base Matrix Pointer + const float *matrixBasePtr; + + // Current viewport parameters (updated as viewports are traversed) + GLfloat lightingParams[6]; + GLfloat fogParams[5]; + GLfloat spotEllipse[4]; + GLfloat spotRange[2]; + GLfloat spotColor[3]; + GLint viewportX, viewportY; + GLint viewportWidth, viewportHeight; + + // Scene graph stack + int listDepth; // how many lists have we recursed into + int stackDepth; // for debugging and error handling purposes + + // Texture offset (during scene graph processing) + GLfloat texOffsetXY[2]; // decoded X, Y offsets + UINT16 texOffset; // raw texture offset data as it appears in culling node + + // Resolution and scaling factors (to support resolutions higher than 496x384) and offsets + GLfloat xRatio, yRatio; + unsigned xOffs, yOffs; + unsigned totalXRes, totalYRes; + + // Texture details + static int defaultFmtToTexSheetNum[8]; // default mapping from Model3 texture format to texture sheet + unsigned numTexMaps; // total number of texture maps + GLuint texMapIDs[9]; // GL texture IDs of texture maps + unsigned numTexSheets; // total number of texture sheets + TexSheet *texSheets; // texture sheet objects + TexSheet *fmtToTexSheet[8]; // final mapping from Model3 texture format to texture sheet + + // Shader programs and input data locations + GLuint shaderProgram; // shader program object + GLuint vertexShader; // vertex shader handle + GLuint fragmentShader; // fragment shader + GLint textureMapLoc; // location of "textureMap" uniform (if available) + GLint textureMapLocs[8]; // location of "textureMap[0-7]" uniforms (if available) + GLint modelViewMatrixLoc; // uniform + GLint projectionMatrixLoc; // uniform + GLint lightingLoc; // uniform + GLint mapSizeLoc; // uniform + GLint spotEllipseLoc; // uniform + GLint spotRangeLoc; // uniform + GLint spotColorLoc; // uniform + GLint subTextureLoc; // attribute + GLint texParamsLoc; // attribute + GLint texFormatLoc; // attribute + GLint texMapLoc; // attribute + GLint transLevelLoc; // attribute + GLint lightEnableLoc; // attribute + GLint shininessLoc; // attribute + GLint fogIntensityLoc; // attribute + + // Model caching + ModelCache VROMCache; // VROM (static) models + ModelCache PolyCache; // polygon RAM (dynamic) models + + /* + * Texture Decode Buffer + * + * Textures are decoded and copied from texture RAM into this temporary buffer + * before being uploaded. Dimensions are 512x512. + */ + GLfloat *textureBuffer; // RGBA8 format +}; + +} // Legacy3D + +#endif // INCLUDED_LEGACY3D_H diff --git a/Src/Graphics/Legacy3D/Models.cpp b/Src/Graphics/Legacy3D/Models.cpp new file mode 100644 index 0000000..1de4964 --- /dev/null +++ b/Src/Graphics/Legacy3D/Models.cpp @@ -0,0 +1,1064 @@ +/** + ** Supermodel + ** A Sega Model 3 Arcade Emulator. + ** Copyright 2011-2016 Bart Trzynadlowski, Nik Henson + ** + ** This file is part of Supermodel. + ** + ** Supermodel is free software: you can redistribute it and/or modify it under + ** the terms of the GNU General Public License as published by the Free + ** Software Foundation, either version 3 of the License, or (at your option) + ** any later version. + ** + ** Supermodel is distributed in the hope that it will be useful, but WITHOUT + ** ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + ** FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + ** more details. + ** + ** You should have received a copy of the GNU General Public License along + ** with Supermodel. If not, see . + **/ + +/* + * Models.cpp + * + * Model parsing, caching, and drawing. + * + * TO-DO List: + * ----------- + * - If vertex normals aren't offset from polygon normals, would that improve + * specular lighting? + * - Check to see if vertices in LA Machineguns and Dirt Devils contain color + * values rather than normals. + * - More should be predecoded into the polygon structures, so that things like + * texture base coordinates are not re-decoded in two different places! + */ + +#include +#include +#include "Supermodel.h" + +namespace Legacy3D { + +/****************************************************************************** + Definitions and Constants +******************************************************************************/ + +/* + * VBO Vertex Layout + * + * All vertex information is stored in an array of GLfloats. Offset and size + * information is defined here for now. + */ +#define VBO_VERTEX_OFFSET_X 0 // vertex X +#define VBO_VERTEX_OFFSET_Y 1 // vertex Y +#define VBO_VERTEX_OFFSET_Z 2 // vertex Z +#define VBO_VERTEX_OFFSET_NX 3 // normal X +#define VBO_VERTEX_OFFSET_NY 4 // normal Y +#define VBO_VERTEX_OFFSET_NZ 5 // normal Z +#define VBO_VERTEX_OFFSET_R 6 // color (untextured polys) and material (textured polys) R +#define VBO_VERTEX_OFFSET_G 7 // color and material G +#define VBO_VERTEX_OFFSET_B 8 // color and material B +#define VBO_VERTEX_OFFSET_TRANSLUCENCE 9 // translucence level (0.0 fully transparent, 1.0 opaque) +#define VBO_VERTEX_OFFSET_LIGHTENABLE 10 // lighting enabled (0.0 luminous, 1.0 light enabled) +#define VBO_VERTEX_OFFSET_SHININESS 11 // shininess (if negative, disables specular lighting) +#define VBO_VERTEX_OFFSET_FOGINTENSITY 12 // fog intensity (0.0 no fog applied, 1.0 all fog applied) +#define VBO_VERTEX_OFFSET_U 13 // texture U coordinate (in texels, relative to sub-texture) +#define VBO_VERTEX_OFFSET_V 14 // texture V coordinate +#define VBO_VERTEX_OFFSET_TEXTURE_X 15 // sub-texture parameters, X (position in overall texture map, in texels) +#define VBO_VERTEX_OFFSET_TEXTURE_Y 16 // "" Y "" +#define VBO_VERTEX_OFFSET_TEXTURE_W 17 // sub-texture parameters, width of texture in texels +#define VBO_VERTEX_OFFSET_TEXTURE_H 18 // "" height of texture in texels +#define VBO_VERTEX_OFFSET_TEXPARAMS_EN 19 // texture parameter: ==1 texturing enabled, ==0 disabled (per-polygon) +#define VBO_VERTEX_OFFSET_TEXPARAMS_TRANS 20 // texture parameter: >=0 use transparency bit, <0 no transparency (per-polygon) +#define VBO_VERTEX_OFFSET_TEXPARAMS_UWRAP 21 // texture parameters: U wrap mode: ==1 mirrored repeat, ==0 normal repeat +#define VBO_VERTEX_OFFSET_TEXPARAMS_VWRAP 22 // "" V wrap mode "" +#define VBO_VERTEX_OFFSET_TEXFORMAT 23 // texture format 0-7 (also ==0 indicates contour texture - see also texParams.trans) +#define VBO_VERTEX_OFFSET_TEXMAP 24 // texture map number +#define VBO_VERTEX_SIZE 25 // total size (may include padding for alignment) + + +/****************************************************************************** + Math Routines +******************************************************************************/ + +// Macro to generate column-major (OpenGL) index from y,x subscripts +#define CMINDEX(y,x) (x*4+y) + +static void CrossProd(GLfloat out[3], GLfloat a[3], GLfloat b[3]) +{ + out[0] = a[1]*b[2]-a[2]*b[1]; + out[1] = a[2]*b[0]-a[0]*b[2]; + out[2] = a[0]*b[1]-a[1]*b[0]; +} + +// 3x3 matrix used (upper-left of m[]) +static void MultMat3Vec3(GLfloat out[3], GLfloat m[4*4], GLfloat v[3]) +{ + out[0] = m[CMINDEX(0,0)]*v[0]+m[CMINDEX(0,1)]*v[1]+m[CMINDEX(0,2)]*v[2]; + out[1] = m[CMINDEX(1,0)]*v[0]+m[CMINDEX(1,1)]*v[1]+m[CMINDEX(1,2)]*v[2]; + out[2] = m[CMINDEX(2,0)]*v[0]+m[CMINDEX(2,1)]*v[1]+m[CMINDEX(2,2)]*v[2]; +} + +static GLfloat Sign(GLfloat x) +{ + if (x > 0.0f) + return 1.0f; + else if (x < 0.0f) + return -1.0f; + return 0.0f; +} + +// Inverts and transposes a 3x3 matrix (upper-left of the 4x4), returning a +// 4x4 matrix with the extra components undefined (do not use them!) +static void InvertTransposeMat3(GLfloat out[4*4], GLfloat m[4*4]) +{ + GLfloat invDet; + GLfloat a00 = m[CMINDEX(0,0)], a01 = m[CMINDEX(0,1)], a02 = m[CMINDEX(0,2)]; + GLfloat a10 = m[CMINDEX(1,0)], a11 = m[CMINDEX(1,1)], a12 = m[CMINDEX(1,2)]; + GLfloat a20 = m[CMINDEX(2,0)], a21 = m[CMINDEX(2,1)], a22 = m[CMINDEX(2,2)]; + + invDet = 1.0f/(a00*(a22*a11-a21*a12)-a10*(a22*a01-a21*a02)+a20*(a12*a01-a11*a02)); + out[CMINDEX(0,0)] = invDet*(a22*a11-a21*a12); out[CMINDEX(1,0)] = invDet*(-(a22*a01-a21*a02)); out[CMINDEX(2,0)] = invDet*(a12*a01-a11*a02); + out[CMINDEX(0,1)] = invDet*(-(a22*a10-a20*a12)); out[CMINDEX(1,1)] = invDet*(a22*a00-a20*a02); out[CMINDEX(2,1)] = invDet*(-(a12*a00-a10*a02)); + out[CMINDEX(0,2)] = invDet*(a21*a10-a20*a11); out[CMINDEX(1,2)] = invDet*(-(a21*a00-a20*a01)); out[CMINDEX(2,2)] = invDet*(a11*a00-a10*a01); +} + +static void PrintMatrix(GLfloat m[4*4]) +{ + for (int i = 0; i < 3; i++) + { + for (int j = 0; j < 3; j++) + printf("%g\t", m[CMINDEX(i,j)]); + printf("\n"); + } +} + + +/****************************************************************************** + Display Lists + + Every instance of a model encountered in the scene database during rendering + is stored in the display list along with its current transformation matrices + and other state information. Display lists are bound to model caches for + performance: only one VBO has to be bound for an entire display list. + + Binding display lists to model caches may cause priority problems among + alpha polygons. Therefore, it may be necessary in the future to decouple them. +******************************************************************************/ + +// Draws the display list +void CLegacy3D::DrawDisplayList(ModelCache *Cache, POLY_STATE state) +{ + DisplayList *D; + + // Bind and activate VBO (pointers activate currently bound VBO) + glBindBuffer(GL_ARRAY_BUFFER, Cache->vboID); + glVertexPointer(3, GL_FLOAT, VBO_VERTEX_SIZE*sizeof(GLfloat), (GLvoid *) (VBO_VERTEX_OFFSET_X*sizeof(GLfloat))); + glNormalPointer(GL_FLOAT, VBO_VERTEX_SIZE*sizeof(GLfloat), (GLvoid *) (VBO_VERTEX_OFFSET_NX*sizeof(GLfloat))); + glTexCoordPointer(2, GL_FLOAT, VBO_VERTEX_SIZE*sizeof(GLfloat), (GLvoid *) (VBO_VERTEX_OFFSET_U*sizeof(GLfloat))); + glColorPointer(3, GL_FLOAT, VBO_VERTEX_SIZE*sizeof(GLfloat), (GLvoid *) (VBO_VERTEX_OFFSET_R*sizeof(GLfloat))); + if (subTextureLoc != -1) glVertexAttribPointer(subTextureLoc, 4, GL_FLOAT, GL_FALSE, VBO_VERTEX_SIZE*sizeof(GLfloat), (GLvoid *) (VBO_VERTEX_OFFSET_TEXTURE_X*sizeof(GLfloat))); + if (texParamsLoc != -1) glVertexAttribPointer(texParamsLoc, 4, GL_FLOAT, GL_FALSE, VBO_VERTEX_SIZE*sizeof(GLfloat), (GLvoid *) (VBO_VERTEX_OFFSET_TEXPARAMS_EN*sizeof(GLfloat))); + if (texFormatLoc != -1) glVertexAttribPointer(texFormatLoc, 1, GL_FLOAT, GL_FALSE, VBO_VERTEX_SIZE*sizeof(GLfloat), (GLvoid *) (VBO_VERTEX_OFFSET_TEXFORMAT*sizeof(GLfloat))); + if (texMapLoc != -1) glVertexAttribPointer(texMapLoc, 1, GL_FLOAT, GL_FALSE, VBO_VERTEX_SIZE*sizeof(GLfloat), (GLvoid *) (VBO_VERTEX_OFFSET_TEXMAP*sizeof(GLfloat))); + if (transLevelLoc != -1) glVertexAttribPointer(transLevelLoc, 1, GL_FLOAT, GL_FALSE, VBO_VERTEX_SIZE*sizeof(GLfloat), (GLvoid *) (VBO_VERTEX_OFFSET_TRANSLUCENCE*sizeof(GLfloat))); + if (lightEnableLoc != -1) glVertexAttribPointer(lightEnableLoc, 1, GL_FLOAT, GL_FALSE, VBO_VERTEX_SIZE*sizeof(GLfloat), (GLvoid *) (VBO_VERTEX_OFFSET_LIGHTENABLE*sizeof(GLfloat))); + if (shininessLoc != -1) glVertexAttribPointer(shininessLoc, 1, GL_FLOAT, GL_FALSE, VBO_VERTEX_SIZE*sizeof(GLfloat), (GLvoid *) (VBO_VERTEX_OFFSET_SHININESS*sizeof(GLfloat))); + if (fogIntensityLoc != -1) glVertexAttribPointer(fogIntensityLoc, 1, GL_FLOAT, GL_FALSE, VBO_VERTEX_SIZE*sizeof(GLfloat), (GLvoid *) (VBO_VERTEX_OFFSET_FOGINTENSITY*sizeof(GLfloat))); + + // Set up state + if (state == POLY_STATE_ALPHA) + { + glEnable(GL_BLEND); + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + } + else + { + glDisable(GL_BLEND); + } + + // Draw if there are items in the list + D = Cache->ListHead[state]; + while (D != NULL) + { + if (D->isViewport) + { + if (D->next != NULL) // if nothing follows, no point in doing this + { + if (!D->next->isViewport) + { + if (lightingLoc != -1) glUniform3fv(lightingLoc, 2, D->Data.Viewport.lightingParams); + if (projectionMatrixLoc != -1) glUniformMatrix4fv(projectionMatrixLoc, 1, GL_FALSE, D->Data.Viewport.projectionMatrix); + glFogf(GL_FOG_DENSITY, D->Data.Viewport.fogParams[3]); + glFogf(GL_FOG_START, D->Data.Viewport.fogParams[4]); + glFogfv(GL_FOG_COLOR, &(D->Data.Viewport.fogParams[0])); + if (spotEllipseLoc != -1) glUniform4fv(spotEllipseLoc, 1, D->Data.Viewport.spotEllipse); + if (spotRangeLoc != -1) glUniform2fv(spotRangeLoc, 1, D->Data.Viewport.spotRange); + if (spotColorLoc != -1) glUniform3fv(spotColorLoc, 1, D->Data.Viewport.spotColor); + glViewport(D->Data.Viewport.x, D->Data.Viewport.y, D->Data.Viewport.width, D->Data.Viewport.height); + } + } + } + else + { + GLint frontFace; + + if (D->Data.Model.frontFace == -GL_CW) // no backface culling (all normals have lost their Z component) + glDisable(GL_CULL_FACE); + else // use appropriate winding convention + { + glGetIntegerv(GL_FRONT_FACE, &frontFace); + if (frontFace != D->Data.Model.frontFace) + glFrontFace(D->Data.Model.frontFace); + } + + if (modelViewMatrixLoc != -1) + glUniformMatrix4fv(modelViewMatrixLoc, 1, GL_FALSE, D->Data.Model.modelViewMatrix); + glDrawArrays(GL_TRIANGLES, D->Data.Model.index, D->Data.Model.numVerts); + + if (D->Data.Model.frontFace == -GL_CW) + glEnable(GL_CULL_FACE); + } + + D = D->next; + } +} + +// Appends an instance of a model or viewport to the display list, copying over the required state information +bool CLegacy3D::AppendDisplayList(ModelCache *Cache, bool isViewport, const struct VBORef *Model) +{ + int lm, i; + + if ((Cache->listSize+2) > Cache->maxListSize) // a model may have 2 states (viewports are added to both display lists) + return FAIL; + //return ErrorLog("Display list is full."); + + // Insert states into the display list + for (i = 0; i < 2; i++) + { + if (isViewport) + { + // Get index for new display list item and advance to next one + lm = Cache->listSize++; + + // Viewport parameters + Cache->List[lm].Data.Viewport.x = viewportX; + Cache->List[lm].Data.Viewport.y = viewportY; + Cache->List[lm].Data.Viewport.width = viewportWidth; + Cache->List[lm].Data.Viewport.height = viewportHeight; + + // Copy over lighting and fog state + memcpy(Cache->List[lm].Data.Viewport.lightingParams, lightingParams, sizeof(lightingParams)); + memcpy(Cache->List[lm].Data.Viewport.fogParams, fogParams, sizeof(fogParams)); + memcpy(Cache->List[lm].Data.Viewport.spotEllipse, spotEllipse, sizeof(spotEllipse)); + memcpy(Cache->List[lm].Data.Viewport.spotRange, spotRange, sizeof(spotRange)); + memcpy(Cache->List[lm].Data.Viewport.spotColor, spotColor, sizeof(spotColor)); + + // Copy projection matrix + glGetFloatv(GL_PROJECTION_MATRIX, Cache->List[lm].Data.Viewport.projectionMatrix); + } + else if (Model->numVerts[i] > 0) // vertices exist for this state + { + // Get index for new display list item and advance to next one + lm = Cache->listSize++; + + // Point to VBO for current model and state + Cache->List[lm].Data.Model.index = Model->index[i]; + Cache->List[lm].Data.Model.numVerts = Model->numVerts[i]; + + // Copy modelview matrix + glGetFloatv(GL_MODELVIEW_MATRIX, Cache->List[lm].Data.Model.modelViewMatrix); + + /* + * Determining if winding was reversed (but not polygon normal): + * + * Real3D performs backface culling in view space based on the + * polygon normal unlike OpenGL, which uses the computed normal + * from the edges (in screen space) of the polygon. Consequently, + * it is possible to create a matrix that mirrors an axis without + * rotating the normal, which in turn flips the polygon winding and + * makes it invisible in OpenGL but not on Real3D, because the + * normal is still facing the right way. + * + * To detect such a situation, we create a fictitious polygon with + * edges X = [1 0 0] and Y = [0 1 0], with normal Z = [0 0 1]. We + * rotate the edges by the matrix then compute a normal P, which is + * what OpenGL would use for culling. We transform the normal Z by + * the normal matrix (normals are special and must be multiplied by + * Transpose(Inverse(M)), not M). If the Z components of P and the + * transformed Z vector have opposite signs, the OpenGL winding + * mode must be switched in order to draw correctly. The X axis may + * have been flipped, for example, changing the winding mode while + * leaving the polygon normal unaffected. OpenGL would erroneously + * discard these polygons, so we flip the winding convention, + * ensuring they are drawn correctly. + * + * We have to adjust the Z vector (fictitious normal) by the sign + * of the Z axis specified by the coordinate system matrix (#0). + * This is described further in InsertPolygon(), where the vertices + * are ordered in clockwise fashion. + */ + GLfloat x[3] = { 1.0f, 0.0f, 0.0f }; + GLfloat y[3] = { 0.0f, 1.0f, 0.0f }; + GLfloat z[3] = { 0.0f, 0.0f, -1.0f*matrixBasePtr[0x5] }; + GLfloat m[4*4]; + GLfloat xT[3], yT[3], zT[3], pT[3]; + + InvertTransposeMat3(m,Cache->List[lm].Data.Model.modelViewMatrix); + MultMat3Vec3(xT,Cache->List[lm].Data.Model.modelViewMatrix,x); + MultMat3Vec3(yT,Cache->List[lm].Data.Model.modelViewMatrix,y); + MultMat3Vec3(zT,m,z); + CrossProd(pT,xT,yT); + + float s = Sign(zT[2]*pT[2]); + if (s < 0.0f) + Cache->List[lm].Data.Model.frontFace = GL_CCW; + else if (s > 0.0f) + Cache->List[lm].Data.Model.frontFace = GL_CW; + else + Cache->List[lm].Data.Model.frontFace = -GL_CW; + } + else // nothing to do, continue loop + continue; + + // Update list pointers and set list node type + Cache->List[lm].isViewport = isViewport; + Cache->List[lm].next = NULL; // current end of list + if (Cache->ListHead[i] == NULL) + { + Cache->ListHead[i] = &(Cache->List[lm]); + Cache->ListTail[i] = Cache->ListHead[i]; + } + else + { + Cache->ListTail[i]->next = &(Cache->List[lm]); + Cache->ListTail[i] = &(Cache->List[lm]); + } + } + + return OKAY; +} + +// Clears the display list in preparation for a new frame +void CLegacy3D::ClearDisplayList(ModelCache *Cache) +{ + Cache->listSize = 0; + for (int i = 0; i < 2; i++) + { + Cache->ListHead[i] = NULL; + Cache->ListTail[i] = NULL; + } +} + + +/****************************************************************************** + Model Caching + + Note that as vertices are inserted into the appropriate local vertex buffer + (sorted by polygon state -- alpha and normal), the VBO index is advanced to + reserve space and does not correspond to the actual position of each vertex. + Vertices are copied in batches sorted by state when the model is complete. +******************************************************************************/ + +// Inserts a vertex into the local vertex buffer, incrementing both the local and VBO pointers. The normal is scaled by normFlip. +void CLegacy3D::InsertVertex(ModelCache *Cache, const Vertex *V, const Poly *P, float normFlip) +{ + GLfloat r, g, b; + GLfloat translucence, fogIntensity, texWidth, texHeight, texBaseX, texBaseY, contourProcessing; + unsigned baseIdx, texFormat, texEnable, lightEnable, modulate, colorIdx; + TexSheet *texSheet; + int s, texPage, shininess; + + // Texture selection + texEnable = P->header[6]&0x04000000; + texFormat = (P->header[6]>>7)&7; + texWidth = (GLfloat) (32<<((P->header[3]>>3)&7)); + texHeight = (GLfloat) (32<<((P->header[3]>>0)&7)); + texPage = (P->header[4]&0x40) ? 1024 : 0; // treat texture page as Y coordinate + texSheet = fmtToTexSheet[texFormat]; // get X&Y offset of texture sheet within texture map + texBaseX = (GLfloat) (texSheet->xOffset + (((32*(((P->header[4]&0x1F)<<1)|((P->header[5]>>7)&1))) + (int)texOffsetXY[0])&2047)); + texBaseY = (GLfloat) (texSheet->yOffset + (((32*(P->header[5]&0x1F)+texPage) + (int)texOffsetXY[1])&2047)); + + /* + * Lighting and Color Modulation: + * + * It appears that there is a modulate bit which causes the polygon color + * to be multiplied by texel colors. However, if polygons are luminous, + * this appears to be disabled (not quite correct yet, though). + * + * Color Table + * ----------- + * 1. Color table base is definitely at 0x400 for most games. + * 2. There are two color indexes in header[4]. One between bits 31-20 and + * the other between bits 19-8. Sometimes they are set the same, sometimes + * they differ by 1. They must either be selectable or apply to different + * sides of the polygon. Indexed colors appear to be enabled by + * !(header[1]&2). + * 3. Bits 19-8 are needed to make Daytona 2 lights blink. They also seem to + * work well for Scud Race. + * 4. Two bits, header[4]&0x80 and header[3]&0x80, seem to affect color + * modulation (multiplication of RGB or indexed color value by texels). + * header[4] works best in Sega Rally 2 but header[3] works a bit better + * elsewhere. + * 5. !(header[4]&0x80) is sufficient to get blinking lights to work in + * Daytona and also fixes shadows under the overpass (spiral turn) on the + * expert course. But, it makes the waterfalls on Scud's medium course too + * dark. The waterfalls have !(header[1]&2), which seems to indicate they + * use indexed colors, but they are too dark when used. header[3]&0x80 is + * 0, which if interpreted as modulation off, makes waterfalls appear + * correctly. If !(header[4]&0x80) is used instead, it is enabled, and + * modulation fails. Blinking lights in Scud Race (medium, expert courses) + * seem to work with both. + * 6. Forcing modulation to be enabled in color index mode does not seem to + * work because of the Scud Race waterfalls (they seem to dislike being + * modulated). + * 7. A possibly important test case, in addition to waterfalls, are the red + * traffic cones at the start of the Desert course in Sega Rally 2's + * championship mode. When !header[4]&0x80 is used, colors are mostly + * correct, but cones are too dark. Need to investigate further. + */ + + lightEnable = !(P->header[6]&0x00010000); + modulate = !(P->header[4]&0x80); // makes traffic lights blink in Daytona and works best in Sega Rally 2 + //modulate = P->header[3]&0x80; // seems to work better overall (TODO: are header[3]&0x80 and header[4]&0x80 ever both set?) + + // Material color + if ((P->header[1]&2) == 0) + { + colorIdx = ((P->header[4]>>20)&0x7FF) - 0; // works for Scud + colorIdx = ((P->header[4]>>8)&0x7FF) - 0; // works for Daytona2 lights + unsigned base = 0x400; + b = (GLfloat) (polyRAM[base+colorIdx]&0xFF) * (1.0f/255.0f); + g = (GLfloat) ((polyRAM[base+colorIdx]>>8)&0xFF) * (1.0f/255.0f); + r = (GLfloat) ((polyRAM[base+colorIdx]>>16)&0xFF) * (1.0f/255.0f); +//modulate=true; + } + else if ((P->header[6] & 0x02000000)) + { + r = g = b = (GLfloat) ((P->header[6]>>26)&0x1f) * (1.0f/31.0f); + } + else + { + // Colors are 8-bit (almost certainly true, see Star Wars) + r = (GLfloat) (P->header[4]>>24) * (1.0f/255.0f); + g = (GLfloat) ((P->header[4]>>16)&0xFF) * (1.0f/255.0f); + b = (GLfloat) ((P->header[4]>>8)&0xFF) * (1.0f/255.0f); + } + + // Determine modulation settings + if (texEnable) + { + //if (!lightEnable|| !modulate) + if (!modulate) + r = g = b = 1.0f; + } +if ((P->header[6] & 0x02000000)) { lightEnable=false;r=b=0;g=1.0;texEnable=false;modulate=false;} + // Specular shininess + shininess = (P->header[0]>>26)&0x3F; + //shininess = (P->header[0]>>28)&0xF; + //if (shininess) + // printf("%X\n", shininess); + if (!(P->header[0]&0x80) || (shininess == 0)) // bit 0x80 seems to enable specular lighting + shininess = -1; // disable + +#if 0 + if (texFormat==5)//texFormat==6||texFormat==2) + { + //printf("%03X\n", P->header[4]>>8); + //texEnable=0; + g=b=1.0; + r=1.0f; + } +#endif +#if 0 + int testWord = 0; + int testBit = 7; + //if ((P->header[testWord]&(1<header[0]>>24) & 0x3) != 0) + //if (!((P->header[0]>>26) & 0x3F) && (P->header[0]&0x80)) + { + texEnable = 0; + r=b=0; + g=1.0f; + g = ((P->header[0]>>26)&0x3F) * (1.0f/64.0f); + //if (!lightEnable) + // b=1.0f; + lightEnable=0; + } +#endif + + // Determine whether polygon is translucent + translucence = (GLfloat) ((P->header[6]>>18)&0x1F) * (1.0f/31.0f); + if ((P->header[6]&0x00800000)) // if set, polygon is opaque + translucence = 1.0f; + + // Fog intensity (for luminous polygons) + fogIntensity = (GLfloat) ((P->header[6]>>11)&0x1F) * (1.0f/31.0f); + if (!(P->header[6]&0x00010000)) // if not luminous, always use full fog intensity + fogIntensity = 1.0f; + + /* + * Contour processing. Any alpha value sufficiently close to 0 seems to + * cause pixels to be discarded entirely on Model 3 (no modification of the + * depth buffer). Strictly speaking, only T1RGB5 format textures are + * "contour textures" (in Real3D lingo), we enable contour processing for + * alpha blended texture formats as well in order to discard fully + * transparent pixels. + */ + if ((P->header[6]&0x80000000) || (texFormat==7) || // contour processing enabled or RGBA4 texture + ((texFormat==1) && (P->header[6]&2)) || // A4L4 interleaved (these formats are not being interpreted correctly, see Scud Race clock tower) + ((texFormat==3) && (P->header[6]&4))) // A4L4 interleaved + contourProcessing = 1.0f; + else + contourProcessing = -1.0f; + + // Store to local vertex buffer + s = P->state; + baseIdx = Cache->curVertIdx[s]*VBO_VERTEX_SIZE; + + Cache->verts[s][baseIdx + VBO_VERTEX_OFFSET_X] = V->x; + Cache->verts[s][baseIdx + VBO_VERTEX_OFFSET_Y] = V->y; + Cache->verts[s][baseIdx + VBO_VERTEX_OFFSET_Z] = V->z; + Cache->verts[s][baseIdx + VBO_VERTEX_OFFSET_R] = r; + Cache->verts[s][baseIdx + VBO_VERTEX_OFFSET_G] = g; + Cache->verts[s][baseIdx + VBO_VERTEX_OFFSET_B] = b; + Cache->verts[s][baseIdx + VBO_VERTEX_OFFSET_TRANSLUCENCE] = translucence; + Cache->verts[s][baseIdx + VBO_VERTEX_OFFSET_LIGHTENABLE] = lightEnable ? 1.0f : 0.0f; + Cache->verts[s][baseIdx + VBO_VERTEX_OFFSET_SHININESS] = (GLfloat) shininess; + Cache->verts[s][baseIdx + VBO_VERTEX_OFFSET_FOGINTENSITY] = fogIntensity; + + Cache->verts[s][baseIdx + VBO_VERTEX_OFFSET_NX] = V->n[0]*normFlip; + Cache->verts[s][baseIdx + VBO_VERTEX_OFFSET_NY] = V->n[1]*normFlip; + Cache->verts[s][baseIdx + VBO_VERTEX_OFFSET_NZ] = V->n[2]*normFlip; + + Cache->verts[s][baseIdx + VBO_VERTEX_OFFSET_U] = V->u; + Cache->verts[s][baseIdx + VBO_VERTEX_OFFSET_V] = V->v; + Cache->verts[s][baseIdx + VBO_VERTEX_OFFSET_TEXTURE_X] = texBaseX; + Cache->verts[s][baseIdx + VBO_VERTEX_OFFSET_TEXTURE_Y] = texBaseY; + Cache->verts[s][baseIdx + VBO_VERTEX_OFFSET_TEXTURE_W] = texWidth; + Cache->verts[s][baseIdx + VBO_VERTEX_OFFSET_TEXTURE_H] = texHeight; + Cache->verts[s][baseIdx + VBO_VERTEX_OFFSET_TEXPARAMS_EN] = texEnable ? 1.0f : 0.0f; + Cache->verts[s][baseIdx + VBO_VERTEX_OFFSET_TEXPARAMS_TRANS] = contourProcessing; + Cache->verts[s][baseIdx + VBO_VERTEX_OFFSET_TEXPARAMS_UWRAP] = (P->header[2]&2) ? 1.0f : 0.0f; + Cache->verts[s][baseIdx + VBO_VERTEX_OFFSET_TEXPARAMS_VWRAP] = (P->header[2]&1) ? 1.0f : 0.0f; + Cache->verts[s][baseIdx + VBO_VERTEX_OFFSET_TEXFORMAT] = (float)texFormat; + Cache->verts[s][baseIdx + VBO_VERTEX_OFFSET_TEXMAP] = (float)texSheet->mapNum; + + Cache->curVertIdx[s]++; + Cache->vboCurOffset += VBO_VERTEX_SIZE*sizeof(GLfloat); +} + +bool CLegacy3D::InsertPolygon(ModelCache *Cache, const Poly *P) +{ + GLfloat n[3], v1[3], v2[3], normZFlip; + int i; + bool doubleSided; + + // Bounds testing: up to 12 triangles will be inserted (worst case: double sided quad is 6 triangles) + if ((Cache->curVertIdx[P->state]+6*2) >= Cache->maxVertIdx) + return ErrorLocalVertexOverflow(); // local buffers are not expected to overflow + if ((Cache->vboCurOffset+6*2*VBO_VERTEX_SIZE*sizeof(GLfloat)) >= Cache->vboMaxOffset) + return FAIL; // this just indicates we may need to re-cache + + // Is the polygon double sided? + doubleSided = (P->header[1]&0x10) ? true : false; + + /* + * Determine polygon winding by taking cross product of vectors formed from + * 3 polygon vertices (the middle one being the origin). In reality, back- + * face culling is determined by the polygon normal and two-sided polygons + * exist. This is just a temporary hack. + * + * If the cross product points the same way as the normal, the winding is + * clockwise and can be kept, otherwise it must be reversed. + * + * NOTE: This assumes that the Model 3 base coordinate system's Z axis + * (into the screen) is -1, like OpenGL. For some games (eg., Lost World), + * this is not the case. Assuming games consistently use the same type of + * coordinate system matrix, it seems that inverting the whole dot product + * when Z is positive helps. I don't understand exactly why... but it has + * to do with using the correct Z convention to identify a vector pointing + * toward or away from the screen. + */ + v1[0] = P->Vert[0].x-P->Vert[1].x; + v1[1] = P->Vert[0].y-P->Vert[1].y; + v1[2] = P->Vert[0].z-P->Vert[1].z; + v2[0] = P->Vert[2].x-P->Vert[1].x; + v2[1] = P->Vert[2].y-P->Vert[1].y; + v2[2] = P->Vert[2].z-P->Vert[1].z; + CrossProd(n,v1,v2); + + normZFlip = -1.0f*matrixBasePtr[0x5]; // coordinate system m13 component + + if (normZFlip*(n[0]*P->n[0]+n[1]*P->n[1]+n[2]*P->n[2]) >= 0.0) // clockwise winding confirmed + { + // Store the first triangle + for (i = 0; i < 3; i++) + { + InsertVertex(Cache, &(P->Vert[i]), P, 1.0f); + } + + if (doubleSided) // store backside as counter-clockwise + { + for (i = 2; i >=0; i--) + { + InsertVertex(Cache, &(P->Vert[i]), P, -1.0f); + } + } + + // If quad, second triangle will just be vertices 1, 3, 4 + if (P->numVerts == 4) + { + InsertVertex(Cache, &(P->Vert[0]), P, 1.0f); + InsertVertex(Cache, &(P->Vert[2]), P, 1.0f); + InsertVertex(Cache, &(P->Vert[3]), P, 1.0f); + + if (doubleSided) + { + InsertVertex(Cache, &(P->Vert[0]), P, -1.0f); + InsertVertex(Cache, &(P->Vert[3]), P, -1.0f); + InsertVertex(Cache, &(P->Vert[2]), P, -1.0f); + } + } + } + else // counterclockwise winding, reverse it + { + for (i = 2; i >=0; i--) + { + InsertVertex(Cache, &(P->Vert[i]), P, 1.0f); + } + + if (doubleSided) // store backside as clockwise + { + for (i = 0; i < 3; i++) + { + InsertVertex(Cache, &(P->Vert[i]), P, -1.0f); + } + } + + if (P->numVerts == 4) + { + InsertVertex(Cache, &(P->Vert[0]), P, 1.0f); + InsertVertex(Cache, &(P->Vert[3]), P, 1.0f); + InsertVertex(Cache, &(P->Vert[2]), P, 1.0f); + + if (doubleSided) + { + InsertVertex(Cache, &(P->Vert[0]), P, -1.0f); + InsertVertex(Cache, &(P->Vert[2]), P, -1.0f); + InsertVertex(Cache, &(P->Vert[3]), P, -1.0f); + } + } + } + + return OKAY; +} + +// Begins caching a new model by resetting to the start of the local vertex buffer +struct VBORef *CLegacy3D::BeginModel(ModelCache *Cache) +{ + struct VBORef *Model; + + unsigned m = Cache->numModels; + + // Determine whether we've exceeded the model cache limits (caller will have to recache) + if (m >= Cache->maxModels) + { + //ErrorLog("Too many %s models.", Cache->dynamic?"dynamic":"static"); + return NULL; + } + + Model = &(Cache->Models[m]); + + // Reset to the beginning of the local vertex buffer + for (int i = 0; i < 2; i++) + Cache->curVertIdx[i] = 0; + + // Clear the VBO reference to 0 and clear texture references + Model->Clear(); + + // Record starting index of first opaque polygon in VBO (alpha poly index will be re-set in EndModel()) + Model->index[POLY_STATE_NORMAL] = Cache->vboCurOffset/(VBO_VERTEX_SIZE*sizeof(GLfloat)); + Model->index[POLY_STATE_ALPHA] = Model->index[POLY_STATE_NORMAL]; + + return Model; +} + +// Uploads all vertices from the local vertex buffer to the VBO, sets up the VBO reference, updates the LUT +void CLegacy3D::EndModel(ModelCache *Cache, struct VBORef *Model, int lutIdx, UINT16 texOffset) +{ + int m = Cache->numModels++; + + // Record the number of vertices, completing the VBORef + for (int i = 0; i < 2; i++) + Model->numVerts[i] = Cache->curVertIdx[i]; + + // First alpha polygon immediately follows the normal polygons + Model->index[POLY_STATE_ALPHA] = Model->index[POLY_STATE_NORMAL] + Model->numVerts[POLY_STATE_NORMAL]; + + // Upload from local vertex buffer to real VBO + glBindBuffer(GL_ARRAY_BUFFER, Cache->vboID); + if (Model->numVerts[POLY_STATE_NORMAL] > 0) + glBufferSubData(GL_ARRAY_BUFFER, Model->index[POLY_STATE_NORMAL]*VBO_VERTEX_SIZE*sizeof(GLfloat), Cache->curVertIdx[POLY_STATE_NORMAL]*VBO_VERTEX_SIZE*sizeof(GLfloat), Cache->verts[POLY_STATE_NORMAL]); + if (Model->numVerts[POLY_STATE_ALPHA] > 0) + glBufferSubData(GL_ARRAY_BUFFER, Model->index[POLY_STATE_ALPHA]*VBO_VERTEX_SIZE*sizeof(GLfloat), Cache->curVertIdx[POLY_STATE_ALPHA]*VBO_VERTEX_SIZE*sizeof(GLfloat), Cache->verts[POLY_STATE_ALPHA]); + + // Record LUT index in the model VBORef + Model->lutIdx = lutIdx; + + // Texture offset of this model state + Model->texOffset = texOffset; + + // Update the LUT and link up to any existing model that already exists here + if (Cache->lut[lutIdx] >= 0) // another texture offset state already cached + Model->nextTexOffset = &(Cache->Models[Cache->lut[lutIdx]]); + Cache->lut[lutIdx] = m; +} + +/* + * CacheModel(): + * + * Decodes and caches a complete model. Returns NULL if any sort of overflow in + * the cache occurred. In this case, the model cache should be cleared before + * being used again because an incomplete model will be stored, wasting vertex + * buffer space. + * + * A pointer to the VBO reference for the cached model is returned when + * successful. + */ + +struct VBORef *CLegacy3D::CacheModel(ModelCache *Cache, int lutIdx, UINT16 texOffset, const UINT32 *data) +{ + Vertex Prev[4]; // previous vertices + int numPolys = 0; + bool done = false; + + // Sega Rally 2 bad models + //if (lutIdx == 0x27a1 || lutIdx == 0x21e0) + // return FAIL; + + if (data == NULL) + return NULL; + + // Start constructing a new model + struct VBORef *Model = BeginModel(Cache); + if (NULL == Model) + return NULL; // too many models! + + // Cache all polygons + while (!done) + { + Poly P; // current polygon + GLfloat mag; + GLfloat uvScale; + int texEnable, texFormat, texWidth, texHeight, texPage, texBaseX, texBaseY; + unsigned i, j, vmask; + UINT32 ix, iy, iz, it; + bool validPoly; + + // Set current header pointer (header is 7 words) + P.header = data; + data += 7; // data will now point to first vertex + if (P.header[6]==0)// || P.header[0]==0) + break; + + // Sega Rally 2: dust trails often have polygons with seemingly invalid + // vertices (very large values or 0). Ignoring polygons with these bits set + // seems to fix the problem. Perhaps these polygons exist for alignment + // purposes or are another type of entity altogether? + validPoly = (P.header[0] & 0x300) != 0x300; +// if (!validPoly) + + { + //printf("Invalid poly:\n"); + //for (int i = 0; i < 7; i++) + // printf(" %d: %08x\n", i, P.header[i]); + } + + // Obtain basic polygon parameters + done = P.header[1]&4; // last polygon? + P.numVerts = (P.header[0]&0x40)?4:3; + +#if 0 +if ((P.header[0] & 0xf) && numPolys == 0) +{ + validPoly=false; + printf("LNK=%x num=%d\n", P.header[0] & 0xf, P.numVerts); +} +if (lutIdx==0x8c0955) +{ + for (int i = 0; i < 7; i++) + printf("%d %d: %08x\n", numPolys, i, P.header[i]); + +} +#endif + + // Texture data + texEnable = P.header[6]&0x04000000; + texFormat = (P.header[6]>>7)&7; + texWidth = (32<<((P.header[3]>>3)&7)); + texHeight = (32<<((P.header[3]>>0)&7)); + texPage = (P.header[4]&0x40) ? 1024 : 0; // treat texture page as Y coordinate + texBaseX = (32*(((P.header[4]&0x1F)<<1)|((P.header[5]>>7)&1))) + (int)texOffsetXY[0]; + texBaseY = (32*(P.header[5]&0x1F)+texPage) + (int)texOffsetXY[1]; + texBaseX &= 2047; + texBaseY &= 2047; + uvScale = (P.header[1]&0x40)?1.0f:(1.0f/8.0f); + + // Determine whether this is an alpha polygon (TODO: when testing textures, test if texturing enabled? Might not matter) + if (((P.header[6]&0x00800000)==0) || // translucent polygon + (texFormat==7) || // RGBA4 texture + (texFormat==4)) // A4L4 texture + P.state = POLY_STATE_ALPHA; + else + P.state = POLY_STATE_NORMAL; + if (texFormat==1) // A4L4 interleaved + { + if ((P.header[6]&2)) + P.state = POLY_STATE_ALPHA; + else + P.state = POLY_STATE_NORMAL; + } + if (texFormat==3) // A4L4 interleaved + { + if ((P.header[6]&4)) + P.state = POLY_STATE_ALPHA; + else + P.state = POLY_STATE_NORMAL; + } + + // Decode the texture + if (texEnable) + { + // If model cache is static, record texture reference in model cache entry for later decoding. + // If cache is dynamic, or if it's not possible to record the texture reference (due to lack of + // memory) then decode the texture now. + if (Cache->dynamic || !Model->texRefs.AddRef(texFormat, texBaseX, texBaseY, texWidth, texHeight)) + DecodeTexture(texFormat, texBaseX, texBaseY, texWidth, texHeight); + } + + // Polygon normal is in upper 24 bits: sign + 1.22 fixed point + P.n[0] = (GLfloat) (((INT32)P.header[1])>>8) * (1.0f/4194304.0f); + P.n[1] = (GLfloat) (((INT32)P.header[2])>>8) * (1.0f/4194304.0f); + P.n[2] = (GLfloat) (((INT32)P.header[3])>>8) * (1.0f/4194304.0f); + + // Fetch reused vertices according to bitfield, then new verts + i = 0; + j = 0; + vmask = 1; + for (i = 0; i < 4; i++) // up to 4 reused vertices + { + if ((P.header[0x00]&vmask)) + { + P.Vert[j] = Prev[i]; + ++j; + } + vmask <<= 1; + } + + for (; j < P.numVerts; j++) // remaining vertices are new and defined here + { + // Fetch vertices + ix = data[0]; + iy = data[1]; + iz = data[2]; + it = data[3]; + + /* + // Check for bad vertices (Sega Rally 2) + if (((ix>>28)==7) || ((iy>>28)==7) || ((iz>>28)==7)) + { + //printf("%X ix=%08X, iy=%08X, iz=%08X\n", lutIdx, ix, iy, iz); + goto StopDecoding; + } + */ + + // Decode vertices + P.Vert[j].x = (GLfloat) (((INT32)ix)>>8) * vertexFactor; + P.Vert[j].y = (GLfloat) (((INT32)iy)>>8) * vertexFactor; + P.Vert[j].z = (GLfloat) (((INT32)iz)>>8) * vertexFactor; + P.Vert[j].n[0] = P.n[0]+(GLfloat)(INT8)(ix&0xFF); // vertex normals are offset from polygon normal + P.Vert[j].n[1] = P.n[1]+(GLfloat)(INT8)(iy&0xFF); + P.Vert[j].n[2] = P.n[2]+(GLfloat)(INT8)(iz&0xFF); + P.Vert[j].u = (GLfloat) ((UINT16)(it>>16)) * uvScale; // TO-DO: might these be signed? + P.Vert[j].v = (GLfloat) ((UINT16)(it&0xFFFF)) * uvScale; + data += 4; + + // Normalize the vertex normal + mag = sqrt(P.Vert[j].n[0]*P.Vert[j].n[0]+P.Vert[j].n[1]*P.Vert[j].n[1]+P.Vert[j].n[2]*P.Vert[j].n[2]); + P.Vert[j].n[0] /= mag; + P.Vert[j].n[1] /= mag; + P.Vert[j].n[2] /= mag; + } + + if (validPoly) + { + // Copy current vertices into previous vertex array + for (i = 0; i < 4; i++) + Prev[i] = P.Vert[i]; + + // Copy this polygon into the model buffer + if (OKAY != InsertPolygon(Cache,&P)) + return NULL; + ++numPolys; + } + } + + // Finish model and enter it into the LUT + EndModel(Cache,Model,lutIdx,texOffset); + return Model; +} + + +/****************************************************************************** + Cache Management +******************************************************************************/ + +/* + * Look up a model. Use this to determine if a model needs to be cached + * (returns NULL if so). + */ +struct VBORef *CLegacy3D::LookUpModel(ModelCache *Cache, int lutIdx, UINT16 texOffset) +{ + int m = Cache->lut[lutIdx]; + + // Has any state associated with this model LUT index been cached at all? + if (m < 0) + return NULL; + + // Has the specified texture offset been cached? + for (struct VBORef *Model = &(Cache->Models[m]); Model != NULL; Model = Model->nextTexOffset) + { + if (Model->texOffset == texOffset) + return Model; + } + + return NULL; // no match found, we must cache this new model state +} + +// Discard all models in the cache and the display list +void CLegacy3D::ClearModelCache(ModelCache *Cache) +{ + Cache->vboCurOffset = 0; + for (int i = 0; i < 2; i++) + Cache->curVertIdx[i] = 0; + for (int i = 0; i < Cache->numModels; i++) + Cache->lut[Cache->Models[i].lutIdx] = -1; + + Cache->numModels = 0; + ClearDisplayList(Cache); +} + +bool CLegacy3D::CreateModelCache(ModelCache *Cache, unsigned vboMaxVerts, + unsigned localMaxVerts, unsigned maxNumModels, unsigned numLUTEntries, + unsigned displayListSize, bool isDynamic) +{ + unsigned i; + int vboBytes, localBytes; + bool success; + + Cache->dynamic = isDynamic; + + /* + * VBO allocation: + * + * Progressively smaller VBOs, in steps of localMaxVerts are allocated + * until successful. If the size dips below localMaxVerts, localMaxVerts is + * attempted as the final try. + */ + + glGetError(); // clear error flag + glGenBuffers(1, &(Cache->vboID)); + glBindBuffer(GL_ARRAY_BUFFER, Cache->vboID); + + vboBytes = vboMaxVerts*VBO_VERTEX_SIZE*sizeof(GLfloat); + localBytes = localMaxVerts*VBO_VERTEX_SIZE*sizeof(GLfloat); + + // Try allocating until size is + success = false; + while (vboBytes >= localBytes) + { + glBufferData(GL_ARRAY_BUFFER, vboBytes, 0, isDynamic?GL_STREAM_DRAW:GL_STATIC_DRAW); + if (glGetError() == GL_NO_ERROR) + { + success = true; + break; + } + + vboBytes -= localBytes; + } + + if (!success) + { + // Last ditch attempt: try the local buffer size + vboBytes = localBytes; + glBufferData(GL_ARRAY_BUFFER, vboBytes, 0, isDynamic?GL_STREAM_DRAW:GL_STATIC_DRAW); + if (glGetError() != GL_NO_ERROR) + return ErrorLog("OpenGL was unable to provide a %s vertex buffer.", isDynamic?"dynamic":"static"); + } + + DebugLog("%s vertex buffer size: %1.2f MB", isDynamic?"Dynamic":"Static", (float)vboBytes/(float)0x100000); + InfoLog("%s vertex buffer size: %1.2f MB", isDynamic?"Dynamic":"Static", (float)vboBytes/(float)0x100000); + + // Set the VBO to the size we obtained + Cache->vboMaxOffset = vboBytes; + Cache->vboCurOffset = 0; + + // Attempt to allocate space for local VBO + for (i = 0; i < 2; i++) + { + Cache->verts[i] = new(std::nothrow) GLfloat[localMaxVerts*VBO_VERTEX_SIZE]; + Cache->curVertIdx[i] = 0; + } + Cache->maxVertIdx = localMaxVerts; + + // ... model array + Cache->Models = new(std::nothrow) VBORef[maxNumModels]; + Cache->maxModels = maxNumModels; + Cache->numModels = 0; + + // ... LUT + Cache->lut = new(std::nothrow) INT16[numLUTEntries]; + Cache->lutSize = numLUTEntries; + + // ... display list + Cache->List = new(std::nothrow) DisplayList[displayListSize]; + ClearDisplayList(Cache); + Cache->maxListSize = displayListSize; + + // Check if memory allocation succeeded + if ((Cache->verts[0]==NULL) || (Cache->verts[1]==NULL) || (Cache->Models==NULL) || (Cache->lut==NULL) || (Cache->List==NULL)) + { + DestroyModelCache(Cache); + return ErrorLog("Insufficient memory for model cache."); + } + + // Clear LUT (MUST be done here because ClearModelCache() won't do it for dynamic models) + for (i = 0; i < numLUTEntries; i++) + Cache->lut[i] = -1; + + // All good! + return OKAY; +} + +void CLegacy3D::DestroyModelCache(ModelCache *Cache) +{ + glDeleteBuffers(1, &(Cache->vboID)); + + for (int i = 0; i < 2; i++) + { + if (Cache->verts[i] != NULL) + delete [] Cache->verts[i]; + } + if (Cache->Models != NULL) + delete [] Cache->Models; + if (Cache->lut != NULL) + delete [] Cache->lut; + if (Cache->List != NULL) + delete [] Cache->List; + + memset(Cache, 0, sizeof(ModelCache)); +} + +} // Legacy3D diff --git a/Src/Graphics/Legacy3D/Shaders/DIR.txt b/Src/Graphics/Legacy3D/Shaders/DIR.txt new file mode 100644 index 0000000..fca6919 --- /dev/null +++ b/Src/Graphics/Legacy3D/Shaders/DIR.txt @@ -0,0 +1,2 @@ +Shader source files go here. Completed versions of the shader files should be +copied into Src/Graphics/Shaders3D.h and Src/Graphics/Shaders2D.h. \ No newline at end of file diff --git a/Src/Graphics/Legacy3D/Shaders/Fragment.glsl b/Src/Graphics/Legacy3D/Shaders/Fragment.glsl new file mode 100644 index 0000000..fb2804e --- /dev/null +++ b/Src/Graphics/Legacy3D/Shaders/Fragment.glsl @@ -0,0 +1,194 @@ +/** + ** Supermodel + ** A Sega Model 3 Arcade Emulator. + ** Copyright 2011-2012 Bart Trzynadlowski, Nik Henson + ** + ** This file is part of Supermodel. + ** + ** Supermodel is free software: you can redistribute it and/or modify it under + ** the terms of the GNU General Public License as published by the Free + ** Software Foundation, either version 3 of the License, or (at your option) + ** any later version. + ** + ** Supermodel is distributed in the hope that it will be useful, but WITHOUT + ** ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + ** FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + ** more details. + ** + ** You should have received a copy of the GNU General Public License along + ** with Supermodel. If not, see . + **/ + +/* + * Fragment.glsl + * + * Fragment shader for 3D rendering. + */ + +#version 120 + +// Global uniforms +uniform sampler2D textureMap; // complete texture map, 2048x2048 texels +uniform vec4 spotEllipse; // spotlight ellipse position: .x=X position (screen coordinates), .y=Y position, .z=half-width, .w=half-height) +uniform vec2 spotRange; // spotlight Z range: .x=start (viewspace coordinates), .y=limit +uniform vec3 spotColor; // spotlight RGB color +uniform vec3 lighting[2]; // lighting state (lighting[0] = sun direction, lighting[1].x,y = diffuse, ambient intensities from 0-1.0) +uniform float mapSize; // texture map size (2048,4096,6144 etc) + +// Inputs from vertex shader +varying vec4 fsSubTexture; // .x=texture X, .y=texture Y, .z=texture width, .w=texture height (all in texels) +varying vec4 fsTexParams; // .x=texture enable (if 1, else 0), .y=use transparency (if > 0), .z=U wrap mode (1=mirror, 0=repeat), .w=V wrap mode +varying float fsTexFormat; // T1RGB5 contour texture (if > 0) +varying float fsTexMap; // texture map number +varying float fsTransLevel; // translucence level, 0.0 (transparent) to 1.0 (opaque) +varying vec3 fsLightIntensity; // lighting intensity +varying float fsSpecularTerm; // specular highlight +varying float fsFogFactor; // fog factor +varying float fsViewZ; // Z distance to fragment from viewpoint at origin + +/* + * WrapTexelCoords(): + * + * Computes the normalized OpenGL S,T coordinates within the 2048x2048 texture + * sheet, taking into account wrapping behavior. + * + * Computing normalized OpenGL texture coordinates (0 to 1) within the + * Real3D texture sheet: + * + * If the texture is not mirrored, we simply have to clamp the + * coordinates to fit within the texture dimensions, add the texture + * X, Y position to select the appropriate one, and normalize by 2048 + * (the dimensions of the Real3D texture sheet). + * + * = [(u,v)%(w,h)+(x,y)]/(2048,2048) + * + * If mirroring is enabled, textures are mirrored every odd multiple of + * the original texture. To detect whether we are in an odd multiple, + * simply divide the coordinate by the texture dimension and check + * whether the result is odd. Then, clamp the coordinates as before but + * subtract from the last texel to mirror them: + * + * = [M*((w-1,h-1)-(u,v)%(w,h)) + (1-M)*(u,v)%(w,h) + (x,y)]/(2048,2048) + * where M is 1.0 if the texture must be mirrored. + * + * As an optimization, this function computes TWO texture coordinates + * simultaneously. The first is texCoord.xy, the second is in .zw. The other + * parameters must have .xy = .zw. + */ +vec4 WrapTexelCoords(vec4 texCoord, vec4 texOffset, vec4 texSize, vec4 mirrorEnable) +{ + vec4 clampedCoord, mirror, glTexCoord; + + clampedCoord = mod(texCoord,texSize); // clamp coordinates to within texture size + mirror = mirrorEnable * mod(floor(texCoord/texSize),2.0); // whether this texel needs to be mirrored + + glTexCoord = ( mirror*(texSize-clampedCoord) + + (vec4(1.0,1.0,1.0,1.0)-mirror)*clampedCoord + + texOffset + ) / mapSize; + return glTexCoord; +} + +/* + * main(): + * + * Fragment shader entry point. + */ + +void main(void) +{ + vec4 uv_top, uv_bot, c[4]; + vec2 r; + vec4 fragColor; + vec2 ellipse; + vec3 lightIntensity; + float insideSpot; + int x; + + // Get polygon color for untextured polygons (textured polygons will overwrite) + if (fsTexParams.x < 0.5) + fragColor = gl_Color; + else + // Textured polygons: set fragment color to texel value + { + /* + * Bilinear Filtering + * + * In order to get this working on ATI, the number of operations is + * reduced by putting everything into vec4s. uv_top holds the UV + * coordinates for the top two texels (.xy=left, .zw=right) and uv_bot + * is for the lower two. + */ + + // Compute fractional blending factor, r, and lower left corner of texel 0 + uv_bot.xy = gl_TexCoord[0].st-vec2(0.5,0.5); // move into the lower left blending texel + r = uv_bot.xy-floor(uv_bot.xy); // fractional part + uv_bot.xy = floor(uv_bot.xy); // integral part + + // Compute texel coordinates + uv_bot.xy += vec2(0.5,0.5); // offset to center of pixel (should not be needed but it fixes a lot of glitches, esp. on Nvidia) + uv_bot.zw = uv_bot.xy + vec2(1.0,0.0); // compute coordinates of the other three neighbors + uv_top = uv_bot + vec4(0.0,1.0,0.0,1.0); + + // Compute the properly wrapped texel coordinates + uv_top = WrapTexelCoords(uv_top,vec4(fsSubTexture.xy,fsSubTexture.xy),vec4(fsSubTexture.zw,fsSubTexture.zw), vec4(fsTexParams.zw,fsTexParams.zw)); + uv_bot = WrapTexelCoords(uv_bot,vec4(fsSubTexture.xy,fsSubTexture.xy),vec4(fsSubTexture.zw,fsSubTexture.zw), vec4(fsTexParams.zw,fsTexParams.zw)); + + // Fetch the texels + c[0]=texture2D(textureMap,uv_bot.xy); // bottom-left (base texel) + c[1]=texture2D(textureMap,uv_bot.zw); // bottom-right + c[2]=texture2D(textureMap,uv_top.xy); // top-left + c[3]=texture2D(textureMap,uv_top.zw); // top-right + + // Interpolate texels and blend result with material color to determine final (unlit) fragment color + // fragColor = (c[0]*(1.0-r.s)*(1.0-r.t) + c[1]*r.s*(1.0-r.t) + c[2]*(1.0-r.s)*r.t + c[3]*r.s*r.t); + // Faster method: + c[0] += (c[1]-c[0])*r.s; // 2 alu + c[2] += (c[3]-c[2])*r.s; // 2 alu + fragColor = c[0]+(c[2]-c[0])*r.t; //2 alu + + /* + * T1RGB5: + * + * The transparency bit determines whether to discard pixels (if set). + * What is unknown is how this bit behaves when interpolated. OpenGL + * processes it as an alpha value, so it might concievably be blended + * with neighbors. Here, an arbitrary threshold is chosen. + * + * To-do: blending could probably enabled and this would work even + * better with a hard threshold. + * + * Countour processing also seems to be enabled for RGBA4 textures. + * When the alpha value is 0.0 (or close), pixels are discarded + * entirely. + */ + if (fsTexParams.y > 0.5) // contour processing enabled + { + if (fragColor.a < 0.01) // discard anything with alpha == 0 + discard; + } + + // If contour texture and not discarded, force alpha to 1.0 because will later be modified by polygon translucency + if (fsTexFormat < 0.5) // contour (T1RGB5) texture + fragColor.a = 1.0; + } + + // Compute spotlight and apply lighting + ellipse = (gl_FragCoord.xy-spotEllipse.xy)/spotEllipse.zw; + insideSpot = dot(ellipse,ellipse); + if ((insideSpot <= 1.0) && (fsViewZ>=spotRange.x) && (fsViewZ. + **/ + +/* + * Fragment2D.glsl + * + * Fragment shader for 2D tilemap rendering. + */ + +#version 120 + +// Global uniforms +uniform sampler2D textureMap; // 512x512 layer surface + +/* + * main(): + * + * Fragment shader entry point. + */ + +void main(void) +{ + gl_FragColor = texture2D(textureMap, gl_TexCoord[0].st); +} diff --git a/Src/Graphics/Legacy3D/Shaders/Fragment_Flat.glsl b/Src/Graphics/Legacy3D/Shaders/Fragment_Flat.glsl new file mode 100644 index 0000000..d1af1c4 --- /dev/null +++ b/Src/Graphics/Legacy3D/Shaders/Fragment_Flat.glsl @@ -0,0 +1,142 @@ +/** + ** Supermodel + ** A Sega Model 3 Arcade Emulator. + ** Copyright 2011 Bart Trzynadlowski, Nik Henson + ** + ** This file is part of Supermodel. + ** + ** Supermodel is free software: you can redistribute it and/or modify it under + ** the terms of the GNU General Public License as published by the Free + ** Software Foundation, either version 3 of the License, or (at your option) + ** any later version. + ** + ** Supermodel is distributed in the hope that it will be useful, but WITHOUT + ** ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + ** FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + ** more details. + ** + ** You should have received a copy of the GNU General Public License along + ** with Supermodel. If not, see . + **/ + +/* + * Fragment_NoSpotlight.glsl + * + * Fragment shader for 3D rendering. Spotlight effect removed. Fixes fragment + * shader link errors on older ATI Radeon GPUs. + * + * To load external fragment shaders, use the -frag-shader= option when + * starting Supermodel. + */ + +#version 120 + +// Global uniforms +uniform sampler2D textureMap; // complete texture map, 2048x2048 texels +uniform vec4 spotEllipse; // spotlight ellipse position: .x=X position (screen coordinates), .y=Y position, .z=half-width, .w=half-height) +uniform vec2 spotRange; // spotlight Z range: .x=start (viewspace coordinates), .y=limit +uniform vec3 spotColor; // spotlight RGB color +uniform float mapSize; // texture map size (2048,4096,6144 etc) + +// Inputs from vertex shader +varying vec4 fsSubTexture; // .x=texture X, .y=texture Y, .z=texture width, .w=texture height (all in texels) +varying vec4 fsTexParams; // .x=texture enable (if 1, else 0), .y=use transparency (if > 0), .z=U wrap mode (1=mirror, 0=repeat), .w=V wrap mode +varying float fsTexFormat; // .x=T1RGB5 contour texture (if > 0) +varying float fsTransLevel; // translucence level, 0.0 (transparent) to 1.0 (opaque) +varying vec3 fsLightIntensity; // lighting intensity +varying float fsFogFactor; // fog factor +varying float fsViewZ; // Z distance to fragment from viewpoint at origin + +/* + * WrapTexelCoords(): + * + * Computes the normalized OpenGL S,T coordinates within the 2048x2048 texture + * sheet, taking into account wrapping behavior. + * + * Computing normalized OpenGL texture coordinates (0 to 1) within the + * Real3D texture sheet: + * + * If the texture is not mirrored, we simply have to clamp the + * coordinates to fit within the texture dimensions, add the texture + * X, Y position to select the appropriate one, and normalize by 2048 + * (the dimensions of the Real3D texture sheet). + * + * = [(u,v)%(w,h)+(x,y)]/(2048,2048) + * + * If mirroring is enabled, textures are mirrored every odd multiple of + * the original texture. To detect whether we are in an odd multiple, + * simply divide the coordinate by the texture dimension and check + * whether the result is odd. Then, clamp the coordinates as before but + * subtract from the last texel to mirror them: + * + * = [M*((w-1,h-1)-(u,v)%(w,h)) + (1-M)*(u,v)%(w,h) + (x,y)]/(2048,2048) + * where M is 1.0 if the texture must be mirrored. + * + * As an optimization, this function computes TWO texture coordinates + * simultaneously. The first is texCoord.xy, the second is in .zw. The other + * parameters must have .xy = .zw. + */ +vec4 WrapTexelCoords(vec4 texCoord, vec4 texOffset, vec4 texSize, vec4 mirrorEnable) +{ + vec4 clampedCoord, mirror, glTexCoord; + + clampedCoord = mod(texCoord,texSize); // clamp coordinates to within texture size + mirror = mirrorEnable * mod(floor(texCoord/texSize),2.0); // whether this texel needs to be mirrored + + glTexCoord = ( mirror*(texSize-clampedCoord) + + (vec4(1.0,1.0,1.0,1.0)-mirror)*clampedCoord + + texOffset + ) / mapSize; +/* + glTexCoord = ( mirror*(texSize-vec4(1.0,1.0,1.0,1.0)-clampedCoord) + + (vec4(1.0,1.0,1.0,1.0)-mirror)*clampedCoord + + texOffset + ) / mapSize; +*/ + return glTexCoord; +} + +/* + * main(): + * + * Fragment shader entry point. + */ + +void main(void) +{ + vec4 uv_top, uv_bot, c[4]; + vec2 r; + vec4 fragColor; + vec2 ellipse; + vec3 lightIntensity; + float insideSpot; + + // Get polygon color for untextured polygons (textured polygons will overwrite) + if (fsTexParams.x < 0.5) + fragColor = gl_Color; + else + // Textured polygons: set fragment color to texel value + { + fragColor = texture2D(textureMap,(fsSubTexture.xy+fsSubTexture.zw/2.0)/mapSize); + //fragColor += texture2D(textureMap,(fsSubTexture.xy+fsSubTexture.zw)/mapSize); + + } + + // Compute spotlight and apply lighting + ellipse = (gl_FragCoord.xy-spotEllipse.xy)/spotEllipse.zw; + insideSpot = dot(ellipse,ellipse); + if ((insideSpot <= 1.0) && (fsViewZ>=spotRange.x) && (fsViewZ. + **/ + +/* + * Fragment_MultiSheet.glsl + * + * Fragment shader for 3D rendering. Uses 8 texture sheets to decode the + * different possible formats. + */ + +#version 120 + +// Global uniforms +uniform sampler2D textureMap0; // complete texture map (fmt 0), 2048x2048 texels +uniform sampler2D textureMap1; // complete texture map (fmt 1), 2048x2048 texels +uniform sampler2D textureMap2; // complete texture map (fmt 2), 2048x2048 texels +uniform sampler2D textureMap3; // complete texture map (fmt 3), 2048x2048 texels +uniform sampler2D textureMap4; // complete texture map (fmt 4), 2048x2048 texels +uniform sampler2D textureMap5; // complete texture map (fmt 5), 2048x2048 texels +uniform sampler2D textureMap6; // complete texture map (fmt 6), 2048x2048 texels +uniform sampler2D textureMap7; // complete texture map (fmt 7), 2048x2048 texels +uniform vec4 spotEllipse; // spotlight ellipse position: .x=X position (screen coordinates), .y=Y position, .z=half-width, .w=half-height) +uniform vec2 spotRange; // spotlight Z range: .x=start (viewspace coordinates), .y=limit +uniform vec3 spotColor; // spotlight RGB color +uniform vec3 lighting[2]; // lighting state (lighting[0] = sun direction, lighting[1].x,y = diffuse, ambient intensities from 0-1.0) +uniform float mapSize; // texture map size (2048,4096,6144 etc) + +// Inputs from vertex shader +varying vec4 fsSubTexture; // .x=texture X, .y=texture Y, .z=texture width, .w=texture height (all in texels) +varying vec4 fsTexParams; // .x=texture enable (if 1, else 0), .y=use transparency (if > 0), .z=U wrap mode (1=mirror, 0=repeat), .w=V wrap mode +varying float fsTexFormat; // T1RGB5 contour texture (if > 0) +varying float fsTexMap; // texture map number +varying float fsTransLevel; // translucence level, 0.0 (transparent) to 1.0 (opaque) +varying vec3 fsLightIntensity; // lighting intensity +varying float fsSpecularTerm; // specular highlight +varying float fsFogFactor; // fog factor +varying float fsViewZ; // Z distance to fragment from viewpoint at origin + +/* + * WrapTexelCoords(): + * + * Computes the normalized OpenGL S,T coordinates within the 2048x2048 texture + * sheet, taking into account wrapping behavior. + * + * Computing normalized OpenGL texture coordinates (0 to 1) within the + * Real3D texture sheet: + * + * If the texture is not mirrored, we simply have to clamp the + * coordinates to fit within the texture dimensions, add the texture + * X, Y position to select the appropriate one, and normalize by 2048 + * (the dimensions of the Real3D texture sheet). + * + * = [(u,v)%(w,h)+(x,y)]/(2048,2048) + * + * If mirroring is enabled, textures are mirrored every odd multiple of + * the original texture. To detect whether we are in an odd multiple, + * simply divide the coordinate by the texture dimension and check + * whether the result is odd. Then, clamp the coordinates as before but + * subtract from the last texel to mirror them: + * + * = [M*((w-1,h-1)-(u,v)%(w,h)) + (1-M)*(u,v)%(w,h) + (x,y)]/(2048,2048) + * where M is 1.0 if the texture must be mirrored. + * + * As an optimization, this function computes TWO texture coordinates + * simultaneously. The first is texCoord.xy, the second is in .zw. The other + * parameters must have .xy = .zw. + */ +vec4 WrapTexelCoords(vec4 texCoord, vec4 texOffset, vec4 texSize, vec4 mirrorEnable) +{ + vec4 clampedCoord, mirror, glTexCoord; + + clampedCoord = mod(texCoord,texSize); // clamp coordinates to within texture size + mirror = mirrorEnable * mod(floor(texCoord/texSize),2.0); // whether this texel needs to be mirrored + + glTexCoord = ( mirror*(texSize-clampedCoord) + + (vec4(1.0,1.0,1.0,1.0)-mirror)*clampedCoord + + texOffset + ) / mapSize; + return glTexCoord; +} + +/* + * main(): + * + * Fragment shader entry point. + */ + +void main(void) +{ + vec4 uv_top, uv_bot, c[4]; + vec2 r; + vec4 fragColor; + vec2 ellipse; + vec3 lightIntensity; + float insideSpot; + int x; + + // Get polygon color for untextured polygons (textured polygons will overwrite) + if (fsTexParams.x < 0.5) + fragColor = gl_Color; + else + // Textured polygons: set fragment color to texel value + { + /* + * Bilinear Filtering + * + * In order to get this working on ATI, the number of operations is + * reduced by putting everything into vec4s. uv_top holds the UV + * coordinates for the top two texels (.xy=left, .zw=right) and uv_bot + * is for the lower two. + */ + + // Compute fractional blending factor, r, and lower left corner of texel 0 + uv_bot.xy = gl_TexCoord[0].st-vec2(0.5,0.5); // move into the lower left blending texel + r = uv_bot.xy-floor(uv_bot.xy); // fractional part + uv_bot.xy = floor(uv_bot.xy); // integral part + + // Compute texel coordinates + uv_bot.xy += vec2(0.5,0.5); // offset to center of pixel (should not be needed but it fixes a lot of glitches, esp. on Nvidia) + uv_bot.zw = uv_bot.xy + vec2(1.0,0.0); // compute coordinates of the other three neighbors + uv_top = uv_bot + vec4(0.0,1.0,0.0,1.0); + + // Compute the properly wrapped texel coordinates + uv_top = WrapTexelCoords(uv_top,vec4(fsSubTexture.xy,fsSubTexture.xy),vec4(fsSubTexture.zw,fsSubTexture.zw), vec4(fsTexParams.zw,fsTexParams.zw)); + uv_bot = WrapTexelCoords(uv_bot,vec4(fsSubTexture.xy,fsSubTexture.xy),vec4(fsSubTexture.zw,fsSubTexture.zw), vec4(fsTexParams.zw,fsTexParams.zw)); + + // Fetch the texels from the given texture map + if (fsTexMap < 0.5f) { + c[0]=texture2D(textureMap0, uv_bot.xy); // bottom-left (base texel) + c[1]=texture2D(textureMap0, uv_bot.zw); // bottom-right + c[2]=texture2D(textureMap0, uv_top.xy); // top-left + c[3]=texture2D(textureMap0, uv_top.zw); // top-right + } else if (fsTexMap < 1.5f) { + c[0]=texture2D(textureMap1, uv_bot.xy); // bottom-left (base texel) + c[1]=texture2D(textureMap1, uv_bot.zw); // bottom-right + c[2]=texture2D(textureMap1, uv_top.xy); // top-left + c[3]=texture2D(textureMap1, uv_top.zw); // top-right + } else if (fsTexMap < 2.5f) { + c[0]=texture2D(textureMap2, uv_bot.xy); // bottom-left (base texel) + c[1]=texture2D(textureMap2, uv_bot.zw); // bottom-right + c[2]=texture2D(textureMap2, uv_top.xy); // top-left + c[3]=texture2D(textureMap2, uv_top.zw); // top-right + } else if (fsTexMap < 3.5f) { + c[0]=texture2D(textureMap3, uv_bot.xy); // bottom-left (base texel) + c[1]=texture2D(textureMap3, uv_bot.zw); // bottom-right + c[2]=texture2D(textureMap3, uv_top.xy); // top-left + c[3]=texture2D(textureMap3, uv_top.zw); // top-right + } else if (fsTexMap < 4.5f) { + c[0]=texture2D(textureMap4, uv_bot.xy); // bottom-left (base texel) + c[1]=texture2D(textureMap4, uv_bot.zw); // bottom-right + c[2]=texture2D(textureMap4, uv_top.xy); // top-left + c[3]=texture2D(textureMap4, uv_top.zw); // top-right + } else if (fsTexMap < 5.5f) { + c[0]=texture2D(textureMap5, uv_bot.xy); // bottom-left (base texel) + c[1]=texture2D(textureMap5, uv_bot.zw); // bottom-right + c[2]=texture2D(textureMap5, uv_top.xy); // top-left + c[3]=texture2D(textureMap5, uv_top.zw); // top-right + } else if (fsTexMap < 6.5f) { + c[0]=texture2D(textureMap6, uv_bot.xy); // bottom-left (base texel) + c[1]=texture2D(textureMap6, uv_bot.zw); // bottom-right + c[2]=texture2D(textureMap6, uv_top.xy); // top-left + c[3]=texture2D(textureMap6, uv_top.zw); // top-right + } else { + c[0]=texture2D(textureMap7, uv_bot.xy); // bottom-left (base texel) + c[1]=texture2D(textureMap7, uv_bot.zw); // bottom-right + c[2]=texture2D(textureMap7, uv_top.xy); // top-left + c[3]=texture2D(textureMap7, uv_top.zw); // top-right + } + + // Interpolate texels and blend result with material color to determine final (unlit) fragment color + // fragColor = (c[0]*(1.0-r.s)*(1.0-r.t) + c[1]*r.s*(1.0-r.t) + c[2]*(1.0-r.s)*r.t + c[3]*r.s*r.t); + // Faster method: + c[0] += (c[1]-c[0])*r.s; // 2 alu + c[2] += (c[3]-c[2])*r.s; // 2 alu + fragColor = c[0]+(c[2]-c[0])*r.t; // 2 alu + + /* + * T1RGB5: + * + * The transparency bit determines whether to discard pixels (if set). + * What is unknown is how this bit behaves when interpolated. OpenGL + * processes it as an alpha value, so it might concievably be blended + * with neighbors. Here, an arbitrary threshold is chosen. + * + * To-do: blending could probably enabled and this would work even + * better with a hard threshold. + * + * Countour processing also seems to be enabled for RGBA4 textures. + * When the alpha value is 0.0 (or close), pixels are discarded + * entirely. + */ + if (fsTexParams.y > 0.5) // contour processing enabled + { + if (fragColor.a < 0.01) // discard anything with alpha == 0 + discard; + } + + // If contour texture and not discarded, force alpha to 1.0 because will later be modified by polygon translucency + if (fsTexFormat < 0.5) // contour (T1RGB5) texture map + fragColor.a = 1.0; + } + + // Compute spotlight and apply lighting + ellipse = (gl_FragCoord.xy-spotEllipse.xy)/spotEllipse.zw; + insideSpot = dot(ellipse,ellipse); + if ((insideSpot <= 1.0) && (fsViewZ>=spotRange.x) && (fsViewZ. + **/ + +/* + * Fragment_NoSpotlight.glsl + * + * Fragment shader for 3D rendering. Spotlight effect removed. Fixes fragment + * shader link errors on older ATI Radeon GPUs. + * + * To load external fragment shaders, use the -frag-shader= option when + * starting Supermodel. + */ + +#version 120 + +// Global uniforms +uniform sampler2D textureMap; // complete texture map, 2048x2048 texels +uniform vec4 spotEllipse; // spotlight ellipse position: .x=X position (screen coordinates), .y=Y position, .z=half-width, .w=half-height) +uniform vec2 spotRange; // spotlight Z range: .x=start (viewspace coordinates), .y=limit +uniform vec3 spotColor; // spotlight RGB color +uniform float mapSize; // texture map size (2048,4096,6144 etc) + +// Inputs from vertex shader +varying vec4 fsSubTexture; // .x=texture X, .y=texture Y, .z=texture width, .w=texture height (all in texels) +varying vec4 fsTexParams; // .x=texture enable (if 1, else 0), .y=use transparency (if > 0), .z=U wrap mode (1=mirror, 0=repeat), .w=V wrap mode +varying float fsTexFormat; // .x=T1RGB5 contour texture (if > 0) +varying float fsTransLevel; // translucence level, 0.0 (transparent) to 1.0 (opaque) +varying vec3 fsLightIntensity; // lighting intensity +varying float fsFogFactor; // fog factor +varying float fsViewZ; // Z distance to fragment from viewpoint at origin + +/* + * WrapTexelCoords(): + * + * Computes the normalized OpenGL S,T coordinates within the 2048x2048 texture + * sheet, taking into account wrapping behavior. + * + * Computing normalized OpenGL texture coordinates (0 to 1) within the + * Real3D texture sheet: + * + * If the texture is not mirrored, we simply have to clamp the + * coordinates to fit within the texture dimensions, add the texture + * X, Y position to select the appropriate one, and normalize by 2048 + * (the dimensions of the Real3D texture sheet). + * + * = [(u,v)%(w,h)+(x,y)]/(2048,2048) + * + * If mirroring is enabled, textures are mirrored every odd multiple of + * the original texture. To detect whether we are in an odd multiple, + * simply divide the coordinate by the texture dimension and check + * whether the result is odd. Then, clamp the coordinates as before but + * subtract from the last texel to mirror them: + * + * = [M*((w-1,h-1)-(u,v)%(w,h)) + (1-M)*(u,v)%(w,h) + (x,y)]/(2048,2048) + * where M is 1.0 if the texture must be mirrored. + * + * As an optimization, this function computes TWO texture coordinates + * simultaneously. The first is texCoord.xy, the second is in .zw. The other + * parameters must have .xy = .zw. + */ +vec4 WrapTexelCoords(vec4 texCoord, vec4 texOffset, vec4 texSize, vec4 mirrorEnable) +{ + vec4 clampedCoord, mirror, glTexCoord; + + clampedCoord = mod(texCoord,texSize); // clamp coordinates to within texture size + mirror = mirrorEnable * mod(floor(texCoord/texSize),2.0); // whether this texel needs to be mirrored + + glTexCoord = ( mirror*(texSize-clampedCoord) + + (vec4(1.0,1.0,1.0,1.0)-mirror)*clampedCoord + + texOffset + ) / mapSize; +/* + glTexCoord = ( mirror*(texSize-vec4(1.0,1.0,1.0,1.0)-clampedCoord) + + (vec4(1.0,1.0,1.0,1.0)-mirror)*clampedCoord + + texOffset + ) / mapSize; +*/ + return glTexCoord; +} + +/* + * main(): + * + * Fragment shader entry point. + */ + +void main(void) +{ + vec4 uv_top, uv_bot, c[4]; + vec2 r; + vec4 fragColor; + vec2 ellipse; + vec3 lightIntensity; + float insideSpot; + + // Get polygon color for untextured polygons (textured polygons will overwrite) + if (fsTexParams.x < 0.5) + fragColor = gl_Color; + else + // Textured polygons: set fragment color to texel value + { + /* + * Bilinear Filtering + * + * In order to get this working on ATI, the number of operations is + * reduced by putting everything into vec4s. uv_top holds the UV + * coordinates for the top two texels (.xy=left, .zw=right) and uv_bot + * is for the lower two. + */ + + // Compute fractional blending factor, r, and lower left corner of texel 0 + uv_bot.xy = gl_TexCoord[0].st-vec2(0.5,0.5); // move into the lower left blending texel + r = uv_bot.xy-floor(uv_bot.xy); // fractional part + uv_bot.xy = floor(uv_bot.xy); // integral part + + // Compute texel coordinates + uv_bot.xy += vec2(0.5,0.5); // offset to center of pixel (should not be needed but it fixes a lot of glitches, esp. on Nvidia) + uv_bot.zw = uv_bot.xy + vec2(1.0,0.0); // compute coordinates of the other three neighbors + uv_top = uv_bot + vec4(0.0,1.0,0.0,1.0); + + // Compute the properly wrapped texel coordinates + uv_top = WrapTexelCoords(uv_top,vec4(fsSubTexture.xy,fsSubTexture.xy),vec4(fsSubTexture.zw,fsSubTexture.zw), vec4(fsTexParams.zw,fsTexParams.zw)); + uv_bot = WrapTexelCoords(uv_bot,vec4(fsSubTexture.xy,fsSubTexture.xy),vec4(fsSubTexture.zw,fsSubTexture.zw), vec4(fsTexParams.zw,fsTexParams.zw)); + + // Fetch the texels + c[0]=texture2D(textureMap,uv_bot.xy); // bottom-left (base texel) + c[1]=texture2D(textureMap,uv_bot.zw); // bottom-right + c[2]=texture2D(textureMap,uv_top.xy); // top-left + c[3]=texture2D(textureMap,uv_top.zw); // top-right + + // Interpolate texels and blend result with material color to determine final (unlit) fragment color + // fragColor = (c[0]*(1.0-r.s)*(1.0-r.t) + c[1]*r.s*(1.0-r.t) + c[2]*(1.0-r.s)*r.t + c[3]*r.s*r.t); + // Faster method: + c[0] += (c[1]-c[0])*r.s; // 2 alu + c[2] += (c[3]-c[2])*r.s; // 2 alu + fragColor = c[0]+(c[2]-c[0])*r.t; //2 alu + + /* + * T1RGB5: + * + * The transparency bit determines whether to discard pixels (if set). + * What is unknown is how this bit behaves when interpolated. OpenGL + * processes it as an alpha value, so it might concievably be blended + * with neighbors. Here, an arbitrary threshold is chosen. + * + * To-do: blending could probably enabled and this would work even + * better with a hard threshold. + * + * Countour processing also seems to be enabled for RGBA4 textures. + * When the alpha value is 0.0 (or close), pixels are discarded + * entirely. + */ + if (fsTexParams.y > 0.5) // contour processing enabled + { + if (fragColor.a < 0.01) // discard anything with alpha == 0 + discard; + } + + // If contour texture and not discarded, force alpha to 1.0 because will later be modified by polygon translucency + if (fsTexFormat < 0.5) // contour (T1RGB5) texture map + fragColor.a = 1.0; + } + + // Compute spotlight and apply lighting + /*** + ellipse = (gl_FragCoord.xy-spotEllipse.xy)/spotEllipse.zw; + insideSpot = dot(ellipse,ellipse); + if ((insideSpot <= 1.0) && (fsViewZ>=spotRange.x) && (fsViewZ. + **/ + +/* + * Vertex.glsl + * + * Vertex shader for 3D rendering. + */ + +#version 120 + +// Global uniforms +uniform mat4 modelViewMatrix; // model -> view space matrix +uniform mat4 projectionMatrix; // view space -> screen space matrix +uniform vec3 lighting[2]; // lighting state (lighting[0] = sun direction, lighting[1].x,y = diffuse, ambient intensities from 0-1.0) +uniform vec4 spotEllipse; // spotlight ellipse position: .x=X position (normalized device coordinates), .y=Y position, .z=half-width, .w=half-height) +uniform vec2 spotRange; // spotlight Z range: .x=start (viewspace coordinates), .y=limit +uniform vec3 spotColor; // spotlight RGB color + +// Custom vertex attributes +attribute vec4 subTexture; // .x=texture X, .y=texture Y, .z=texture width, .w=texture height (all in texels) +attribute vec4 texParams; // .x=texture enable (if 1, else 0), .y=use transparency (if >=0), .z=U wrap mode (1=mirror, 0=repeat), .w=V wrap mode +attribute float texFormat; // T1RGB5 contour texture (if > 0) +attribute float texMap; // texture map number +attribute float transLevel; // translucence level, 0.0 (transparent) to 1.0 (opaque). if less than 1.0, replace alpha value +attribute float lightEnable; // lighting enabled (1.0) or luminous (0.0), drawn at full intensity +attribute float shininess; // specular shininess (if >= 0.0) or disable specular lighting (negative) +attribute float fogIntensity; // fog intensity (1.0, full fog effect, 0.0, no fog) + +// Custom outputs to fragment shader +varying vec4 fsSubTexture; +varying vec4 fsTexParams; +varying float fsTexFormat; +varying float fsTexMap; +varying float fsTransLevel; +varying vec3 fsLightIntensity; // total light intensity for this vertex +varying float fsSpecularTerm; // specular light term (additive) +varying float fsFogFactor; // fog factor +varying float fsViewZ; + +// Gets the 3x3 matrix out of a 4x4 (because mat3(mat4matrix) does not work on ATI!) +mat3 GetLinearPart( mat4 m ) +{ + mat3 result; + + result[0][0] = m[0][0]; + result[0][1] = m[0][1]; + result[0][2] = m[0][2]; + + result[1][0] = m[1][0]; + result[1][1] = m[1][1]; + result[1][2] = m[1][2]; + + result[2][0] = m[2][0]; + result[2][1] = m[2][1]; + result[2][2] = m[2][2]; + + return result; +} + +void main(void) +{ + vec3 viewVertex; // vertex coordinates in view space + vec3 viewNormal; // vertex normal in view space + vec3 sunVector; // sun lighting vector (as reflecting away from vertex) + float sunFactor; // sun light projection along vertex normal (0.0 to 1.0) + vec3 halfway; + float specFactor; + + // Transform vertex + gl_Position = projectionMatrix * modelViewMatrix * gl_Vertex; + viewVertex = vec3(modelViewMatrix * gl_Vertex); + + /* + * Modulation + * + * Polygon color serves as material color (modulating the light intensity) + * for textured polygons. The fragment shader will ignore (overwrite) the + * the color passed to it if the fragment is textured. + * + * Untextured fragments must be set to the polygon color and the light + * intensity is initialized to 1.0 here. Alpha must be set to 1.0 because + * the fragment shader multiplies it by the polygon translucency setting. + * + * TO-DO: Does OpenGL set alpha to 1.0 by default if no alpha is specified + * for the vertex? If so, we can remove that line from here. + */ + + gl_FrontColor = gl_Color; // untextured polygons will use this + gl_FrontColor.a = 1.0; + fsLightIntensity = vec3(1.0,1.0,1.0); + if (texParams.x > 0.5) // textured + fsLightIntensity *= gl_Color.rgb; + + /* + * Sun Light + * + * Parallel light source and ambient lighting are only applied for non- + * luminous polygons. + */ + fsSpecularTerm = 0.0; + if (lightEnable > 0.5) // not luminous + { + // Normal -> view space + viewNormal = normalize(GetLinearPart(modelViewMatrix)*gl_Normal); + + // Real3D -> OpenGL view space convention (TO-DO: do this outside of shader) + sunVector = lighting[0]*vec3(1.0,-1.0,-1.0); + + // Compute diffuse factor for sunlight + sunFactor = max(dot(sunVector,viewNormal),0.0); + + // Total light intensity: sum of all components + fsLightIntensity *= (sunFactor*lighting[1].x+lighting[1].y); + + /* + * Specular Lighting + * + * The specular term is treated similarly to the "separate specular + * color" functionality of OpenGL: it is added as a highlight in the + * fragment shader. This allows even black textures to be lit. + * + * TO-DO: Ambient intensity viewport parameter is known but what about + * the intensity of the specular term? Always applied with full + * intensity here but this is unlikely to be correct. + */ + if (shininess >= 0.0) + { + // Standard specular lighting equation + vec3 V = normalize(-viewVertex); + vec3 H = normalize(sunVector+V); // halfway vector + float s = max(10.0,64.0-shininess); // seems to look nice, but probably not correct + fsSpecularTerm = pow(max(dot(viewNormal,H),0.0),s); + if (sunFactor <= 0.0) fsSpecularTerm = 0.0; + + // Faster approximation + //float temp = max(dot(viewNormal,H),0.0); + //float s = 64.0-shininess; + //fsSpecularTerm = temp/(s-temp*s+temp); + + // Phong formula + //vec3 R = normalize(2.0*dot(sunVector,viewNormal)*viewNormal - sunVector); + //vec3 V = normalize(-viewVertex); + //float s = max(2.0,64.0-shininess); + //fsSpecularTerm = pow(max(dot(R,V),0.0),s); + } + } + + // Fog + float z = length(viewVertex); + fsFogFactor = clamp(1.0-fogIntensity*(gl_Fog.start+z*gl_Fog.density), 0.0, 1.0); + + // Pass viewspace Z coordinate (for spotlight) + fsViewZ = -viewVertex.z; // convert Z from GL->Real3D convention (want +Z to be further into screen) + + // Pass remaining parameters to fragment shader + gl_TexCoord[0] = gl_MultiTexCoord0; + fsSubTexture = subTexture; + fsTexParams = texParams; + fsTransLevel = transLevel; + fsTexFormat = texFormat; + fsTexMap = texMap; +} diff --git a/Src/Graphics/Legacy3D/Shaders/Vertex2D.glsl b/Src/Graphics/Legacy3D/Shaders/Vertex2D.glsl new file mode 100644 index 0000000..3c286df --- /dev/null +++ b/Src/Graphics/Legacy3D/Shaders/Vertex2D.glsl @@ -0,0 +1,34 @@ +/** + ** Supermodel + ** A Sega Model 3 Arcade Emulator. + ** Copyright 2011-2012 Bart Trzynadlowski, Nik Henson + ** + ** This file is part of Supermodel. + ** + ** Supermodel is free software: you can redistribute it and/or modify it under + ** the terms of the GNU General Public License as published by the Free + ** Software Foundation, either version 3 of the License, or (at your option) + ** any later version. + ** + ** Supermodel is distributed in the hope that it will be useful, but WITHOUT + ** ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + ** FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + ** more details. + ** + ** You should have received a copy of the GNU General Public License along + ** with Supermodel. If not, see . + **/ + +/* + * Vertex2D.glsl + * + * Vertex shader for 2D tilemap rendering. + */ + +#version 120 + +void main(void) +{ + gl_TexCoord[0] = gl_MultiTexCoord0; + gl_Position = gl_ModelViewProjectionMatrix*gl_Vertex; +} \ No newline at end of file diff --git a/Src/Graphics/Legacy3D/Shaders3D.h b/Src/Graphics/Legacy3D/Shaders3D.h new file mode 100644 index 0000000..befcbe6 --- /dev/null +++ b/Src/Graphics/Legacy3D/Shaders3D.h @@ -0,0 +1,666 @@ +/** + ** Supermodel + ** A Sega Model 3 Arcade Emulator. + ** Copyright 2011-2012 Bart Trzynadlowski, Nik Henson + ** + ** This file is part of Supermodel. + ** + ** Supermodel is free software: you can redistribute it and/or modify it under + ** the terms of the GNU General Public License as published by the Free + ** Software Foundation, either version 3 of the License, or (at your option) + ** any later version. + ** + ** Supermodel is distributed in the hope that it will be useful, but WITHOUT + ** ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + ** FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + ** more details. + ** + ** You should have received a copy of the GNU General Public License along + ** with Supermodel. If not, see . + **/ + +/* + * Shaders3D.h + * + * Header file containing the 3D vertex and fragment shaders. + */ + +#ifndef INCLUDED_SHADERS3D_H +#define INCLUDED_SHADERS3D_H + +namespace Legacy3D { + +// Vertex shader +static const char vertexShaderSource[] = +{ +"/**\n" +" ** Supermodel\n" +" ** A Sega Model 3 Arcade Emulator.\n" +" ** Copyright 2011-2012 Bart Trzynadlowski, Nik Henson \n" +" **\n" +" ** This file is part of Supermodel.\n" +" **\n" +" ** Supermodel is free software: you can redistribute it and/or modify it under\n" +" ** the terms of the GNU General Public License as published by the Free \n" +" ** Software Foundation, either version 3 of the License, or (at your option)\n" +" ** any later version.\n" +" **\n" +" ** Supermodel is distributed in the hope that it will be useful, but WITHOUT\n" +" ** ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or\n" +" ** FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for\n" +" ** more details.\n" +" **\n" +" ** You should have received a copy of the GNU General Public License along\n" +" ** with Supermodel. If not, see .\n" +" **/\n" +" \n" +"/*\n" +" * Vertex.glsl\n" +" *\n" +" * Vertex shader for 3D rendering.\n" +" */\n" +" \n" +"#version 120\n" +"\n" +"// Global uniforms\n" +"uniform mat4\tmodelViewMatrix;\t// model -> view space matrix\n" +"uniform mat4\tprojectionMatrix;\t// view space -> screen space matrix\n" +"uniform vec3\tlighting[2];\t\t// lighting state (lighting[0] = sun direction, lighting[1].x,y = diffuse, ambient intensities from 0-1.0)\n" +"uniform vec4\tspotEllipse;\t\t// spotlight ellipse position: .x=X position (normalized device coordinates), .y=Y position, .z=half-width, .w=half-height)\n" +"uniform vec2\tspotRange;\t\t\t// spotlight Z range: .x=start (viewspace coordinates), .y=limit\n" +"uniform vec3\tspotColor;\t\t\t// spotlight RGB color\n" +"\n" +"// Custom vertex attributes\n" +"attribute vec4\tsubTexture;\t\t// .x=texture X, .y=texture Y, .z=texture width, .w=texture height (all in texels)\n" +"attribute vec4\ttexParams;\t\t// .x=texture enable (if 1, else 0), .y=use transparency (if >=0), .z=U wrap mode (1=mirror, 0=repeat), .w=V wrap mode\n" +"attribute float\ttexFormat;\t\t// T1RGB5 contour texture (if > 0)\n" +"attribute float\ttexMap;\t\t// texture map number\n" +"attribute float\ttransLevel;\t\t// translucence level, 0.0 (transparent) to 1.0 (opaque). if less than 1.0, replace alpha value\n" +"attribute float\tlightEnable;\t// lighting enabled (1.0) or luminous (0.0), drawn at full intensity\n" +"attribute float\tshininess;\t\t// specular shininess (if >= 0.0) or disable specular lighting (negative)\n" +"attribute float\tfogIntensity;\t// fog intensity (1.0, full fog effect, 0.0, no fog) \n" +"\n" +"// Custom outputs to fragment shader\n" +"varying vec4\tfsSubTexture;\n" +"varying vec4\tfsTexParams;\n" +"varying float\tfsTexFormat;\n" +"varying float\tfsTexMap;\n" +"varying float\tfsTransLevel;\n" +"varying vec3\tfsLightIntensity;\t// total light intensity for this vertex\n" +"varying float\tfsSpecularTerm;\t\t// specular light term (additive)\n" +"varying float\tfsFogFactor;\t\t// fog factor\n" +"varying float\tfsViewZ;\n" +"\n" +"// Gets the 3x3 matrix out of a 4x4 (because mat3(mat4matrix) does not work on ATI!)\n" +"mat3 GetLinearPart( mat4 m )\n" +"{\n" +"\tmat3 result;\n" +"\t\n" +"\tresult[0][0] = m[0][0]; \n" +"\tresult[0][1] = m[0][1]; \n" +"\tresult[0][2] = m[0][2]; \n" +"\n" +"\tresult[1][0] = m[1][0]; \n" +"\tresult[1][1] = m[1][1]; \n" +"\tresult[1][2] = m[1][2]; \n" +"\t\n" +"\tresult[2][0] = m[2][0]; \n" +"\tresult[2][1] = m[2][1]; \n" +"\tresult[2][2] = m[2][2]; \n" +"\t\n" +"\treturn result;\n" +"}\n" +"\n" +"void main(void)\n" +"{\n" +"\tvec3\tviewVertex;\t\t// vertex coordinates in view space\n" +"\tvec3\tviewNormal;\t\t// vertex normal in view space\n" +"\tvec3\tsunVector;\t\t// sun lighting vector (as reflecting away from vertex)\n" +"\tfloat\tsunFactor;\t\t// sun light projection along vertex normal (0.0 to 1.0)\n" +"\tvec3\thalfway;\n" +"\tfloat\tspecFactor;\n" +"\t\n" +"\t// Transform vertex\n" +"\tgl_Position = projectionMatrix * modelViewMatrix * gl_Vertex;\n" +"\tviewVertex = vec3(modelViewMatrix * gl_Vertex);\t\n" +"\t\n" +"\t/*\n" +"\t * Modulation\n" +"\t *\n" +" \t * Polygon color serves as material color (modulating the light intensity)\n" +"\t * for textured polygons. The fragment shader will ignore (overwrite) the\n" +"\t * the color passed to it if the fragment is textured. \n" +"\t *\n" +"\t * Untextured fragments must be set to the polygon color and the light\n" +"\t * intensity is initialized to 1.0 here. Alpha must be set to 1.0 because\n" +"\t * the fragment shader multiplies it by the polygon translucency setting. \n" +"\t *\n" +"\t * TO-DO: Does OpenGL set alpha to 1.0 by default if no alpha is specified\n" +"\t * for the vertex? If so, we can remove that line from here.\n" +"\t */\n" +"\n" +"\tgl_FrontColor = gl_Color;\t// untextured polygons will use this\n" +"\tgl_FrontColor.a = 1.0;\t\n" +"\tfsLightIntensity = vec3(1.0,1.0,1.0);\n" +"\tif (texParams.x > 0.5)\t\t// textured\n" +"\t\tfsLightIntensity *= gl_Color.rgb;\n" +"\t\t\n" +"\t/*\n" +" \t * Sun Light\n" +"\t *\n" +"\t * Parallel light source and ambient lighting are only applied for non-\n" +"\t * luminous polygons.\n" +" \t */\n" +"\tfsSpecularTerm = 0.0;\n" +" \tif (lightEnable > 0.5)\t// not luminous\n" +"\t{\n" +"\t\t// Normal -> view space\n" +"\t\tviewNormal = normalize(GetLinearPart(modelViewMatrix)*gl_Normal);\n" +"\n" +"\t\t// Real3D -> OpenGL view space convention (TO-DO: do this outside of shader)\n" +"\t\tsunVector = lighting[0]*vec3(1.0,-1.0,-1.0);\n" +"\t\t\n" +"\t\t// Compute diffuse factor for sunlight\n" +"\t\tsunFactor = max(dot(sunVector,viewNormal),0.0);\n" +"\t\t\n" +"\t\t// Total light intensity: sum of all components\n" +"\t\tfsLightIntensity *= (sunFactor*lighting[1].x+lighting[1].y);\n" +"\t\t\n" +"\t\t/*\n" +"\t\t * Specular Lighting\n" +"\t\t *\n" +"\t\t * The specular term is treated similarly to the \"separate specular\n" +"\t\t * color\" functionality of OpenGL: it is added as a highlight in the\n" +"\t\t * fragment shader. This allows even black textures to be lit.\n" +"\t\t *\n" +"\t\t * TO-DO: Ambient intensity viewport parameter is known but what about\n" +"\t\t * the intensity of the specular term? Always applied with full \n" +"\t\t * intensity here but this is unlikely to be correct.\n" +"\t\t */\n" +" \t\tif (shininess >= 0.0)\n" +" \t\t{\n" +" \t\t\t// Standard specular lighting equation\n" +" \t\t\tvec3 V = normalize(-viewVertex);\n" +" \t\t\tvec3 H = normalize(sunVector+V);\t// halfway vector\n" +" \t\t\tfloat s = max(10.0,64.0-shininess);\t\t// seems to look nice, but probably not correct\n" +" \t\t\tfsSpecularTerm = pow(max(dot(viewNormal,H),0.0),s);\n" +" \t\t\tif (sunFactor <= 0.0) fsSpecularTerm = 0.0;\n" +" \t\t\t\n" +" \t\t\t// Faster approximation \t\t\t\n" +" \t\t\t//float temp = max(dot(viewNormal,H),0.0);\n" +" \t\t\t//float s = 64.0-shininess;\n" +" \t\t\t//fsSpecularTerm = temp/(s-temp*s+temp);\n" +" \t\t\t\n" +" \t\t\t// Phong formula\n" +" \t\t\t//vec3 R = normalize(2.0*dot(sunVector,viewNormal)*viewNormal - sunVector);\n" +" \t\t\t//vec3 V = normalize(-viewVertex);\n" +" \t\t\t//float s = max(2.0,64.0-shininess);\n" +" \t\t\t//fsSpecularTerm = pow(max(dot(R,V),0.0),s);\n" +" \t\t}\n" +"\t}\n" +"\t\n" +"\t// Fog\n" +"\tfloat z = length(viewVertex);\n" +"\tfsFogFactor = clamp(1.0-fogIntensity*(gl_Fog.start+z*gl_Fog.density), 0.0, 1.0);\n" +"\n" +"\t// Pass viewspace Z coordinate (for spotlight)\n" +"\tfsViewZ = -viewVertex.z;\t// convert Z from GL->Real3D convention (want +Z to be further into screen)\n" +"\n" +"\t// Pass remaining parameters to fragment shader\n" +"\tgl_TexCoord[0] = gl_MultiTexCoord0;\n" +"\tfsSubTexture = subTexture;\n" +"\tfsTexParams = texParams;\n" +"\tfsTransLevel = transLevel;\n" +"\tfsTexFormat = texFormat;\n" +"\tfsTexMap = texMap;\n" +"}\n" +}; + + +// Fragment shader (single texture sheet) +static const char fragmentShaderSingleSheetSource[] = +{ +"/**\n" +" ** Supermodel\n" +" ** A Sega Model 3 Arcade Emulator.\n" +" ** Copyright 2011-2012 Bart Trzynadlowski, Nik Henson \n" +" **\n" +" ** This file is part of Supermodel.\n" +" **\n" +" ** Supermodel is free software: you can redistribute it and/or modify it under\n" +" ** the terms of the GNU General Public License as published by the Free \n" +" ** Software Foundation, either version 3 of the License, or (at your option)\n" +" ** any later version.\n" +" **\n" +" ** Supermodel is distributed in the hope that it will be useful, but WITHOUT\n" +" ** ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or\n" +" ** FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for\n" +" ** more details.\n" +" **\n" +" ** You should have received a copy of the GNU General Public License along\n" +" ** with Supermodel. If not, see .\n" +" **/\n" +" \n" +"/*\n" +" * Fragment.glsl\n" +" *\n" +" * Fragment shader for 3D rendering.\n" +" */\n" +"\n" +"#version 120\n" +"\n" +"// Global uniforms\n" +"uniform sampler2D\ttextureMap;\t\t// complete texture map, 2048x2048 texels\n" +"uniform vec4\t\tspotEllipse;\t\t// spotlight ellipse position: .x=X position (screen coordinates), .y=Y position, .z=half-width, .w=half-height)\n" +"uniform vec2\t\tspotRange;\t\t// spotlight Z range: .x=start (viewspace coordinates), .y=limit\n" +"uniform vec3\t\tspotColor;\t\t// spotlight RGB color\n" +"uniform vec3\t\tlighting[2];\t\t// lighting state (lighting[0] = sun direction, lighting[1].x,y = diffuse, ambient intensities from 0-1.0)\n" +"uniform float\t\tmapSize;\t\t// texture map size (2048,4096,6144 etc)\n" +"\n" +"// Inputs from vertex shader \n" +"varying vec4\t\tfsSubTexture;\t// .x=texture X, .y=texture Y, .z=texture width, .w=texture height (all in texels)\n" +"varying vec4\t\tfsTexParams;\t// .x=texture enable (if 1, else 0), .y=use transparency (if > 0), .z=U wrap mode (1=mirror, 0=repeat), .w=V wrap mode\n" +"varying float\t\tfsTexFormat;\t// T1RGB5 contour texture (if > 0)\n" +"varying float\t\tfsTexMap;\t\t// texture map number\n" +"varying float\t\tfsTransLevel;\t// translucence level, 0.0 (transparent) to 1.0 (opaque)\n" +"varying vec3\t\tfsLightIntensity;\t// lighting intensity \n" +"varying float\t\tfsSpecularTerm;\t// specular highlight\n" +"varying float\t\tfsFogFactor;\t// fog factor\n" +"varying float\t\tfsViewZ;\t\t// Z distance to fragment from viewpoint at origin\n" +"\n" +"/*\n" +" * WrapTexelCoords():\n" +" *\n" +" * Computes the normalized OpenGL S,T coordinates within the 2048x2048 texture\n" +" * sheet, taking into account wrapping behavior.\n" +" *\n" +" * Computing normalized OpenGL texture coordinates (0 to 1) within the \n" +" * Real3D texture sheet:\n" +" *\n" +" * If the texture is not mirrored, we simply have to clamp the\n" +" * coordinates to fit within the texture dimensions, add the texture\n" +" * X, Y position to select the appropriate one, and normalize by 2048\n" +" * (the dimensions of the Real3D texture sheet).\n" +" *\n" +" *\t\t= [(u,v)%(w,h)+(x,y)]/(2048,2048)\n" +" *\n" +" * If mirroring is enabled, textures are mirrored every odd multiple of\n" +" * the original texture. To detect whether we are in an odd multiple, \n" +" * simply divide the coordinate by the texture dimension and check \n" +" * whether the result is odd. Then, clamp the coordinates as before but\n" +" * subtract from the last texel to mirror them:\n" +" *\n" +" * \t\t= [M*((w-1,h-1)-(u,v)%(w,h)) + (1-M)*(u,v)%(w,h) + (x,y)]/(2048,2048)\n" +" *\t\twhere M is 1.0 if the texture must be mirrored.\n" +" *\n" +" * As an optimization, this function computes TWO texture coordinates\n" +" * simultaneously. The first is texCoord.xy, the second is in .zw. The other\n" +" * parameters must have .xy = .zw.\n" +" */\n" +"vec4 WrapTexelCoords(vec4 texCoord, vec4 texOffset, vec4 texSize, vec4 mirrorEnable)\n" +"{\n" +"\tvec4\tclampedCoord, mirror, glTexCoord;\n" +"\t\n" +"\tclampedCoord = mod(texCoord,texSize);\t\t\t\t\t\t// clamp coordinates to within texture size\n" +"\tmirror = mirrorEnable * mod(floor(texCoord/texSize),2.0);\t// whether this texel needs to be mirrored\n" +"\n" +"\tglTexCoord = (\tmirror*(texSize-clampedCoord) +\n" +"\t\t\t\t\t(vec4(1.0,1.0,1.0,1.0)-mirror)*clampedCoord +\n" +"\t\t\t\t\ttexOffset\n" +"\t\t\t\t ) / mapSize;\n" +"\treturn glTexCoord;\n" +"}\n" +"\n" +"/*\n" +" * main():\n" +" *\n" +" * Fragment shader entry point.\n" +" */\n" +"\n" +"void main(void)\n" +"{\t\n" +"\tvec4\tuv_top, uv_bot, c[4];\n" +"\tvec2\tr;\n" +"\tvec4\tfragColor;\n" +"\tvec2\tellipse;\n" +"\tvec3\tlightIntensity;\n" +"\tfloat\tinsideSpot;\n" +"\tint\t\tx;\n" +"\t\n" +"\t// Get polygon color for untextured polygons (textured polygons will overwrite)\n" +"\tif (fsTexParams.x < 0.5)\n" +"\t\tfragColor = gl_Color;\t\t\n" +"\telse\n" +"\t// Textured polygons: set fragment color to texel value\n" +"\t{\t\t\t\n" +"\t\t/*\n" +"\t\t * Bilinear Filtering\n" +"\t\t *\n" +"\t\t * In order to get this working on ATI, the number of operations is\n" +"\t\t * reduced by putting everything into vec4s. uv_top holds the UV \n" +"\t\t * coordinates for the top two texels (.xy=left, .zw=right) and uv_bot\n" +"\t\t * is for the lower two.\n" +"\t\t */\n" +"\n" +"\t\t// Compute fractional blending factor, r, and lower left corner of texel 0\n" +"\t\tuv_bot.xy = gl_TexCoord[0].st-vec2(0.5,0.5);\t// move into the lower left blending texel \n" +"\t\tr = uv_bot.xy-floor(uv_bot.xy);\t\t\t\t\t// fractional part\n" +"\t\tuv_bot.xy = floor(uv_bot.xy);\t\t\t\t\t// integral part\n" +"\t\t\n" +"\t\t// Compute texel coordinates\n" +"\t\tuv_bot.xy += vec2(0.5,0.5);\t// offset to center of pixel (should not be needed but it fixes a lot of glitches, esp. on Nvidia)\n" +"\t\tuv_bot.zw = uv_bot.xy + vec2(1.0,0.0);\t\t\t// compute coordinates of the other three neighbors\n" +"\t\tuv_top = uv_bot + vec4(0.0,1.0,0.0,1.0);\n" +"\n" +"\t\t// Compute the properly wrapped texel coordinates\n" +"\t\tuv_top = WrapTexelCoords(uv_top,vec4(fsSubTexture.xy,fsSubTexture.xy),vec4(fsSubTexture.zw,fsSubTexture.zw), vec4(fsTexParams.zw,fsTexParams.zw));\n" +"\t\tuv_bot = WrapTexelCoords(uv_bot,vec4(fsSubTexture.xy,fsSubTexture.xy),vec4(fsSubTexture.zw,fsSubTexture.zw), vec4(fsTexParams.zw,fsTexParams.zw));\n" +"\n" +"\t\t// Fetch the texels\n" +"\t\tc[0]=texture2D(textureMap,uv_bot.xy);\t// bottom-left (base texel)\n" +"\t\tc[1]=texture2D(textureMap,uv_bot.zw);\t// bottom-right\n" +"\t\tc[2]=texture2D(textureMap,uv_top.xy);\t// top-left\n" +"\t\tc[3]=texture2D(textureMap,uv_top.zw);\t// top-right\t\t\n" +"\n" +"\t\t// Interpolate texels and blend result with material color to determine final (unlit) fragment color\n" +"\t\t// fragColor = (c[0]*(1.0-r.s)*(1.0-r.t) + c[1]*r.s*(1.0-r.t) + c[2]*(1.0-r.s)*r.t + c[3]*r.s*r.t);\n" +"\t\t// Faster method:\n" +"\t\tc[0] += (c[1]-c[0])*r.s;\t\t\t// 2 alu\n" +"\t\tc[2] += (c[3]-c[2])*r.s;\t\t\t// 2 alu\n" +"\t\tfragColor = c[0]+(c[2]-c[0])*r.t;\t//2 alu\n" +"\t\n" +"\t\t/*\n" +"\t\t * T1RGB5:\n" +"\t\t *\n" +"\t\t * The transparency bit determines whether to discard pixels (if set).\n" +"\t\t * What is unknown is how this bit behaves when interpolated. OpenGL\n" +"\t\t * processes it as an alpha value, so it might concievably be blended\n" +"\t\t * with neighbors. Here, an arbitrary threshold is chosen.\n" +"\t\t *\n" +"\t\t * To-do: blending could probably enabled and this would work even\n" +"\t\t * better with a hard threshold.\n" +"\t\t *\n" +"\t\t * Countour processing also seems to be enabled for RGBA4 textures.\n" +"\t\t * When the alpha value is 0.0 (or close), pixels are discarded \n" +"\t\t * entirely.\n" +"\t\t */\n" +"\t\tif (fsTexParams.y > 0.5)\t// contour processing enabled\n" +"\t\t{\n" +"\t\t\tif (fragColor.a < 0.01)\t// discard anything with alpha == 0\n" +"\t\t\t\tdiscard;\n" +"\t\t}\n" +"\t\t\n" +"\t\t// If contour texture and not discarded, force alpha to 1.0 because will later be modified by polygon translucency\n" +"\t\tif (fsTexFormat < 0.5)\t\t// contour (T1RGB5) texture\n" +"\t\t\tfragColor.a = 1.0;\n" +"\t}\n" +"\n" +"\t// Compute spotlight and apply lighting\n" +"\tellipse = (gl_FragCoord.xy-spotEllipse.xy)/spotEllipse.zw;\n" +"\tinsideSpot = dot(ellipse,ellipse);\n" +"\tif ((insideSpot <= 1.0) && (fsViewZ>=spotRange.x) && (fsViewZ.\n" +" **/\n" +" \n" +"/*\n" +" * Fragment_MultiSheet.glsl\n" +" *\n" +" * Fragment shader for 3D rendering. Uses 8 texture sheets to decode the \n" +" * different possible formats.\n" +" */\n" +"\n" +"#version 120\n" +"\n" +"// Global uniforms\n" +"uniform sampler2D\ttextureMap0;\t\t// complete texture map (fmt 0), 2048x2048 texels\n" +"uniform sampler2D\ttextureMap1;\t\t// complete texture map (fmt 1), 2048x2048 texels\n" +"uniform sampler2D\ttextureMap2;\t\t// complete texture map (fmt 2), 2048x2048 texels\n" +"uniform sampler2D\ttextureMap3;\t\t// complete texture map (fmt 3), 2048x2048 texels\n" +"uniform sampler2D\ttextureMap4;\t\t// complete texture map (fmt 4), 2048x2048 texels\n" +"uniform sampler2D\ttextureMap5;\t\t// complete texture map (fmt 5), 2048x2048 texels\n" +"uniform sampler2D\ttextureMap6;\t\t// complete texture map (fmt 6), 2048x2048 texels\n" +"uniform sampler2D\ttextureMap7;\t\t// complete texture map (fmt 7), 2048x2048 texels\n" +"uniform vec4\t\tspotEllipse;\t\t// spotlight ellipse position: .x=X position (screen coordinates), .y=Y position, .z=half-width, .w=half-height)\n" +"uniform vec2\t\tspotRange;\t\t\t// spotlight Z range: .x=start (viewspace coordinates), .y=limit\n" +"uniform vec3\t\tspotColor;\t\t\t// spotlight RGB color\n" +"uniform vec3\t\tlighting[2];\t\t// lighting state (lighting[0] = sun direction, lighting[1].x,y = diffuse, ambient intensities from 0-1.0)\n" +"uniform float\t\tmapSize;\t\t// texture map size (2048,4096,6144 etc)\n" +"\n" +"// Inputs from vertex shader \n" +"varying vec4\t\tfsSubTexture;\t// .x=texture X, .y=texture Y, .z=texture width, .w=texture height (all in texels)\n" +"varying vec4\t\tfsTexParams;\t// .x=texture enable (if 1, else 0), .y=use transparency (if > 0), .z=U wrap mode (1=mirror, 0=repeat), .w=V wrap mode\n" +"varying float\t\tfsTexFormat;\t// T1RGB5 contour texture (if > 0)\n" +"varying float\t\tfsTexMap;\t// texture map number\n" +"varying float\t\tfsTransLevel;\t// translucence level, 0.0 (transparent) to 1.0 (opaque)\n" +"varying vec3\t\tfsLightIntensity;\t// lighting intensity \n" +"varying float\t\tfsSpecularTerm;\t// specular highlight\n" +"varying float\t\tfsFogFactor;\t// fog factor\n" +"varying float\t\tfsViewZ;\t\t// Z distance to fragment from viewpoint at origin\n" +"\n" +"/*\n" +" * WrapTexelCoords():\n" +" *\n" +" * Computes the normalized OpenGL S,T coordinates within the 2048x2048 texture\n" +" * sheet, taking into account wrapping behavior.\n" +" *\n" +" * Computing normalized OpenGL texture coordinates (0 to 1) within the \n" +" * Real3D texture sheet:\n" +" *\n" +" * If the texture is not mirrored, we simply have to clamp the\n" +" * coordinates to fit within the texture dimensions, add the texture\n" +" * X, Y position to select the appropriate one, and normalize by 2048\n" +" * (the dimensions of the Real3D texture sheet).\n" +" *\n" +" *\t\t= [(u,v)%(w,h)+(x,y)]/(2048,2048)\n" +" *\n" +" * If mirroring is enabled, textures are mirrored every odd multiple of\n" +" * the original texture. To detect whether we are in an odd multiple, \n" +" * simply divide the coordinate by the texture dimension and check \n" +" * whether the result is odd. Then, clamp the coordinates as before but\n" +" * subtract from the last texel to mirror them:\n" +" *\n" +" * \t\t= [M*((w-1,h-1)-(u,v)%(w,h)) + (1-M)*(u,v)%(w,h) + (x,y)]/(2048,2048)\n" +" *\t\twhere M is 1.0 if the texture must be mirrored.\n" +" *\n" +" * As an optimization, this function computes TWO texture coordinates\n" +" * simultaneously. The first is texCoord.xy, the second is in .zw. The other\n" +" * parameters must have .xy = .zw.\n" +" */\n" +"vec4 WrapTexelCoords(vec4 texCoord, vec4 texOffset, vec4 texSize, vec4 mirrorEnable)\n" +"{\n" +"\tvec4\tclampedCoord, mirror, glTexCoord;\n" +"\t\n" +"\tclampedCoord = mod(texCoord,texSize);\t\t\t\t\t\t// clamp coordinates to within texture size\n" +"\tmirror = mirrorEnable * mod(floor(texCoord/texSize),2.0);\t// whether this texel needs to be mirrored\n" +"\n" +"\tglTexCoord = (\tmirror*(texSize-clampedCoord) +\n" +"\t\t\t\t\t(vec4(1.0,1.0,1.0,1.0)-mirror)*clampedCoord +\n" +"\t\t\t\t\ttexOffset\n" +"\t\t\t\t ) / mapSize;\n" +"\treturn glTexCoord;\n" +"}\n" +"\n" +"/*\n" +" * main():\n" +" *\n" +" * Fragment shader entry point.\n" +" */\n" +"\n" +"void main(void)\n" +"{\t\n" +"\tvec4\tuv_top, uv_bot, c[4];\n" +"\tvec2\tr;\n" +"\tvec4\tfragColor;\n" +"\tvec2\tellipse;\n" +"\tvec3\tlightIntensity;\n" +"\tfloat\tinsideSpot;\n" +"\tint\t\tx;\n" +"\t\n" +"\t// Get polygon color for untextured polygons (textured polygons will overwrite)\n" +"\tif (fsTexParams.x < 0.5)\n" +"\t\tfragColor = gl_Color;\t\t\n" +"\telse\n" +"\t// Textured polygons: set fragment color to texel value\n" +"\t{\t\t\t\n" +"\t\t/*\n" +"\t\t * Bilinear Filtering\n" +"\t\t *\n" +"\t\t * In order to get this working on ATI, the number of operations is\n" +"\t\t * reduced by putting everything into vec4s. uv_top holds the UV \n" +"\t\t * coordinates for the top two texels (.xy=left, .zw=right) and uv_bot\n" +"\t\t * is for the lower two.\n" +"\t\t */\n" +"\n" +"\t\t// Compute fractional blending factor, r, and lower left corner of texel 0\n" +"\t\tuv_bot.xy = gl_TexCoord[0].st-vec2(0.5,0.5);\t// move into the lower left blending texel \n" +"\t\tr = uv_bot.xy-floor(uv_bot.xy);\t\t\t\t\t// fractional part\n" +"\t\tuv_bot.xy = floor(uv_bot.xy);\t\t\t\t\t// integral part\n" +"\t\t\n" +"\t\t// Compute texel coordinates\n" +"\t\tuv_bot.xy += vec2(0.5,0.5);\t// offset to center of pixel (should not be needed but it fixes a lot of glitches, esp. on Nvidia)\n" +"\t\tuv_bot.zw = uv_bot.xy + vec2(1.0,0.0);\t\t\t// compute coordinates of the other three neighbors\n" +"\t\tuv_top = uv_bot + vec4(0.0,1.0,0.0,1.0);\n" +"\n" +"\t\t// Compute the properly wrapped texel coordinates\n" +"\t\tuv_top = WrapTexelCoords(uv_top,vec4(fsSubTexture.xy,fsSubTexture.xy),vec4(fsSubTexture.zw,fsSubTexture.zw), vec4(fsTexParams.zw,fsTexParams.zw));\n" +"\t\tuv_bot = WrapTexelCoords(uv_bot,vec4(fsSubTexture.xy,fsSubTexture.xy),vec4(fsSubTexture.zw,fsSubTexture.zw), vec4(fsTexParams.zw,fsTexParams.zw));\n" +"\n" +"\t\t// Fetch the texels from the given texture map\n" +"\t\tif (fsTexMap < 0.5f)\t{\n" +"\t\t\tc[0]=texture2D(textureMap0, uv_bot.xy); // bottom-left (base texel)\n" +"\t\t\tc[1]=texture2D(textureMap0, uv_bot.zw); // bottom-right\n" +"\t\t\tc[2]=texture2D(textureMap0, uv_top.xy); // top-left\n" +"\t\t\tc[3]=texture2D(textureMap0, uv_top.zw); // top-right\n" +"\t\t} else if (fsTexMap < 1.5f) {\n" +" c[0]=texture2D(textureMap1, uv_bot.xy); // bottom-left (base texel)\n" +"\t\t\tc[1]=texture2D(textureMap1, uv_bot.zw); // bottom-right\n" +"\t\t\tc[2]=texture2D(textureMap1, uv_top.xy); // top-left\n" +"\t\t\tc[3]=texture2D(textureMap1, uv_top.zw); // top-right\n" +"\t\t} else if (fsTexMap < 2.5f) {\n" +" c[0]=texture2D(textureMap2, uv_bot.xy); // bottom-left (base texel)\n" +"\t\t\tc[1]=texture2D(textureMap2, uv_bot.zw); // bottom-right\n" +"\t\t\tc[2]=texture2D(textureMap2, uv_top.xy); // top-left\n" +"\t\t\tc[3]=texture2D(textureMap2, uv_top.zw); // top-right\n" +"\t\t} else if (fsTexMap < 3.5f) {\n" +" c[0]=texture2D(textureMap3, uv_bot.xy); // bottom-left (base texel)\n" +"\t\t\tc[1]=texture2D(textureMap3, uv_bot.zw); // bottom-right\n" +"\t\t\tc[2]=texture2D(textureMap3, uv_top.xy); // top-left\n" +"\t\t\tc[3]=texture2D(textureMap3, uv_top.zw); // top-right\n" +"\t\t} else if (fsTexMap < 4.5f) {\n" +" c[0]=texture2D(textureMap4, uv_bot.xy); // bottom-left (base texel)\n" +"\t\t\tc[1]=texture2D(textureMap4, uv_bot.zw); // bottom-right\n" +"\t\t\tc[2]=texture2D(textureMap4, uv_top.xy); // top-left\n" +"\t\t\tc[3]=texture2D(textureMap4, uv_top.zw); // top-right\n" +"\t\t} else if (fsTexMap < 5.5f) {\n" +" c[0]=texture2D(textureMap5, uv_bot.xy); // bottom-left (base texel)\n" +"\t\t\tc[1]=texture2D(textureMap5, uv_bot.zw); // bottom-right\n" +"\t\t\tc[2]=texture2D(textureMap5, uv_top.xy); // top-left\n" +"\t\t\tc[3]=texture2D(textureMap5, uv_top.zw); // top-right\n" +"\t\t} else if (fsTexMap < 6.5f) {\n" +"\t\t\tc[0]=texture2D(textureMap6, uv_bot.xy); // bottom-left (base texel)\n" +"\t\t\tc[1]=texture2D(textureMap6, uv_bot.zw); // bottom-right\n" +"\t\t\tc[2]=texture2D(textureMap6, uv_top.xy); // top-left\n" +"\t\t\tc[3]=texture2D(textureMap6, uv_top.zw); // top-right\n" +"\t\t} else {\n" +" c[0]=texture2D(textureMap7, uv_bot.xy); // bottom-left (base texel)\n" +"\t\t\tc[1]=texture2D(textureMap7, uv_bot.zw); // bottom-right\n" +"\t\t\tc[2]=texture2D(textureMap7, uv_top.xy); // top-left\n" +"\t\t\tc[3]=texture2D(textureMap7, uv_top.zw); // top-right\n" +"\t\t} \n" +"\n" +"\t\t// Interpolate texels and blend result with material color to determine final (unlit) fragment color\n" +"\t\t// fragColor = (c[0]*(1.0-r.s)*(1.0-r.t) + c[1]*r.s*(1.0-r.t) + c[2]*(1.0-r.s)*r.t + c[3]*r.s*r.t);\n" +"\t\t// Faster method:\n" +"\t\tc[0] += (c[1]-c[0])*r.s;\t\t\t// 2 alu\n" +"\t\tc[2] += (c[3]-c[2])*r.s;\t\t\t// 2 alu\n" +"\t\tfragColor = c[0]+(c[2]-c[0])*r.t;\t// 2 alu\n" +"\t\n" +"\t\t/*\n" +"\t\t * T1RGB5:\n" +"\t\t *\n" +"\t\t * The transparency bit determines whether to discard pixels (if set).\n" +"\t\t * What is unknown is how this bit behaves when interpolated. OpenGL\n" +"\t\t * processes it as an alpha value, so it might concievably be blended\n" +"\t\t * with neighbors. Here, an arbitrary threshold is chosen.\n" +"\t\t *\n" +"\t\t * To-do: blending could probably enabled and this would work even\n" +"\t\t * better with a hard threshold.\n" +"\t\t *\n" +"\t\t * Countour processing also seems to be enabled for RGBA4 textures.\n" +"\t\t * When the alpha value is 0.0 (or close), pixels are discarded \n" +"\t\t * entirely.\n" +"\t\t */\n" +"\t\tif (fsTexParams.y > 0.5)\t// contour processing enabled\n" +"\t\t{\n" +"\t\t\tif (fragColor.a < 0.01)\t// discard anything with alpha == 0\n" +"\t\t\t\tdiscard;\n" +"\t\t}\n" +"\t\t\n" +"\t\t// If contour texture and not discarded, force alpha to 1.0 because will later be modified by polygon translucency\n" +"\t\tif (fsTexFormat < 0.5)\t\t// contour (T1RGB5) texture map\n" +"\t\t\tfragColor.a = 1.0;\n" +"\t}\n" +"\n" +"\t// Compute spotlight and apply lighting\n" +"\tellipse = (gl_FragCoord.xy-spotEllipse.xy)/spotEllipse.zw;\n" +"\tinsideSpot = dot(ellipse,ellipse);\n" +"\tif ((insideSpot <= 1.0) && (fsViewZ>=spotRange.x) && (fsViewZ. + **/ + +/* + * CTextureRefs.cpp + * + * Class that tracks unique texture references, eg in a cached model. + * + * Texture references are stored internally as a 27-bit field (3 bits for format, 6 bits each for x, y, width & height) to save space. + * + * A pre-allocated array is used for storing up to TEXREFS_ARRAY_SIZE texture references. When that limit is exceeded, it switches + * to using a hashset to store the texture references, but this requires extra memory allocation. + */ + +#include "Supermodel.h" + +namespace Legacy3D { + +CTextureRefs::CTextureRefs() : m_size(0), m_hashCapacity(0), m_hashEntries(NULL) +{ + // +} + +CTextureRefs::~CTextureRefs() +{ + DeleteAllHashEntries(); +} + +unsigned CTextureRefs::GetSize() const +{ + return m_size; +} + +void CTextureRefs::Clear() +{ + // Delete all hash entries + DeleteAllHashEntries(); + m_size = 0; + m_hashCapacity = 0; + m_hashEntries = NULL; +} + +bool CTextureRefs::ContainsRef(unsigned fmt, unsigned x, unsigned y, unsigned width, unsigned height) +{ + // Pack texture reference into bitfield + unsigned texRef = (fmt&7)<<24|(x&0x7E0)<<13|(y&0x7E0)<<7|(width&0x7E0)<<1|(height&0x7E0)>>5; + + // Check if using array or hashset + if (m_size <= TEXREFS_ARRAY_SIZE) + { + // See if texture reference held in array + for (unsigned i = 0; i < m_size; i++) + { + if (texRef == m_array[i]) + return true; + } + return false; + } + else + // See if texture reference held in hashset + return HashContains(texRef); +} + +bool CTextureRefs::AddRef(unsigned fmt, unsigned x, unsigned y, unsigned width, unsigned height) +{ + // Pack texture reference into bitfield + unsigned texRef = (fmt&7)<<24|(x&0x7E0)<<13|(y&0x7E0)<<7|(width&0x7E0)<<1|(height&0x7E0)>>5; + + // Check if using array or hashset + if (m_size <= TEXREFS_ARRAY_SIZE) + { + // See if already held in array, if so nothing to do + for (unsigned i = 0; i < m_size; i++) + { + if (texRef == m_array[i]) + return true; + } + // If not, check if array is full + if (m_size == TEXREFS_ARRAY_SIZE) + { + // If so, set initial hashset capacity to 47 to initialize it + UpdateHashCapacity(47); + // Copy array into hashset + for (unsigned i = 0; i < TEXREFS_ARRAY_SIZE; i++) + AddToHash(m_array[i]); + // Add texture reference to hashset + AddToHash(texRef); + } + else + { + // Add texture reference to array + m_array[m_size] = texRef; + m_size++; + } + return true; + } + else + // Add texture reference to hashset + return AddToHash(texRef); +} + +bool CTextureRefs::RemoveRef(unsigned fmt, unsigned x, unsigned y, unsigned width, unsigned height) +{ + // Pack texture reference into bitfield + unsigned texRef = (fmt&7)<<24|(x&0x7E0)<<13|(y&0x7E0)<<7|(width&0x7E0)<<1|(height&0x7E0)>>5; + + // Check if using array or hashset + if (m_size <= TEXREFS_ARRAY_SIZE) + { + for (unsigned i = 0; i < m_size; i++) + { + if (texRef == m_array[i]) + { + for (unsigned j = i + 1; j < m_size; j++) + m_array[j - 1] = m_array[j]; + m_size--; + return true; + } + } + return false; + } + else + { + // Remove texture reference from hashset + bool removed = RemoveFromHash(texRef); + + // See if should switch back to array + if (m_size == TEXREFS_ARRAY_SIZE) + { + // Loop through all hash entries and copy texture references into array + unsigned j = 0; + for (unsigned i = 0; i < m_hashCapacity; i++) + { + for (HashEntry *entry = m_hashEntries[i]; entry; entry = entry->nextEntry) + m_array[j++] = entry->texRef; + } + // Delete all hash entries + DeleteAllHashEntries(); + } + return removed; + } +} + +void CTextureRefs::DecodeAllTextures(CLegacy3D *Render3D) +{ + // Check if using array or hashset + if (m_size <= TEXREFS_ARRAY_SIZE) + { + // Loop through elements in array and call CLegacy3D::DecodeTexture + for (unsigned i = 0; i < m_size; i++) + { + // Unpack texture reference from bitfield + unsigned texRef = m_array[i]; + unsigned fmt = texRef>>24; + unsigned x = (texRef>>13)&0x7E0; + unsigned y = (texRef>>7)&0x7E0; + unsigned width = (texRef>>1)&0x7E0; + unsigned height = (texRef<<5)&0x7E0; + Render3D->DecodeTexture(fmt, x, y, width, height); + } + } + else + { + // Loop through all hash entriesa and call CLegacy3D::DecodeTexture + for (unsigned i = 0; i < m_hashCapacity; i++) + { + for (HashEntry *entry = m_hashEntries[i]; entry; entry = entry->nextEntry) + { + // Unpack texture reference from bitfield + unsigned texRef = entry->texRef; + unsigned fmt = texRef>>24; + unsigned x = (texRef>>13)&0x7E0; + unsigned y = (texRef>>7)&0x7E0; + unsigned width = (texRef>>1)&0x7E0; + unsigned height = (texRef<<5)&0x7E0; + Render3D->DecodeTexture(fmt, x, y, width, height); + } + } + } +} + +bool CTextureRefs::UpdateHashCapacity(unsigned capacity) +{ + unsigned oldCapacity = m_hashCapacity; + HashEntry **oldEntries = m_hashEntries; + // Update capacity and create new empty entries array + m_hashCapacity = capacity; + m_hashEntries = new(std::nothrow) HashEntry*[capacity]; + if (!m_hashEntries) + return false; + memset(m_hashEntries, NULL, capacity * sizeof(HashEntry*)); + if (oldEntries) + { + // Redistribute entries into new entries array + for (unsigned i = 0; i < oldCapacity; i++) + { + HashEntry *entry = oldEntries[i]; + while (entry) + { + HashEntry *nextEntry = entry->nextEntry; + unsigned hash = entry->texRef % capacity; + entry->nextEntry = m_hashEntries[hash]; + m_hashEntries[hash] = entry; + entry = nextEntry; + } + } + // Delete old entries array + delete[] oldEntries; + } + return true; +} + +HashEntry *CTextureRefs::CreateHashEntry(unsigned texRef, bool &hashCapacityUpdated) +{ + // Update size and increase hash capacity if required + m_size++; + hashCapacityUpdated = m_size >= m_hashCapacity; + if (hashCapacityUpdated) + { + if (m_hashCapacity < 89) + UpdateHashCapacity(89); // Capacity of 89 gives good sequence of mostly prime capacities (89, 179, 359, 719, 1439, 2879 etc) + else + UpdateHashCapacity(2 * m_hashCapacity + 1); + } + return new(std::nothrow) HashEntry(texRef); +} + +void CTextureRefs::DeleteHashEntry(HashEntry *entry) +{ + // Update size and delete hash entry + m_size--; + delete entry; +} + +void CTextureRefs::DeleteAllHashEntries() +{ + if (!m_hashEntries) + return; + // Delete all hash entries and their storage + for (unsigned i = 0; i < m_hashCapacity; i++) + { + HashEntry *entry = m_hashEntries[i]; + if (entry) + delete entry; + } + delete[] m_hashEntries; +} + +bool CTextureRefs::AddToHash(unsigned texRef) +{ + // Convert texture reference to hash value + unsigned hash = texRef % m_hashCapacity; + // Loop through linked list for hash value and see if have texture reference already + HashEntry *headEntry = m_hashEntries[hash]; + HashEntry *entry = headEntry; + while (entry && texRef != entry->texRef) + entry = entry->nextEntry; + // If found, nothing to do + if (entry) + return true; + // Otherwise, create new hash entry for texture reference + bool hashCapacityUpdated; + entry = CreateHashEntry(texRef, hashCapacityUpdated); + // If couldn't create entry (ie out of memory), let caller know + if (!entry) + return false; + if (hashCapacityUpdated) + { + // If hash capacity was increased recalculate hash value + hash = texRef % m_hashCapacity; + headEntry = m_hashEntries[hash]; + } + // Store hash entry in linked list for hash value + entry->nextEntry = headEntry; + m_hashEntries[hash] = entry; + return true; +} + +bool CTextureRefs::RemoveFromHash(unsigned texRef) +{ + // Convert texture reference to hash value + unsigned hash = texRef % m_hashCapacity; + // Loop through linked list for hash value and see if have texture reference + HashEntry *entry = m_hashEntries[hash]; + HashEntry *prevEntry = NULL; + while (entry && texRef != entry->texRef) + { + prevEntry = entry; + entry = entry->nextEntry; + } + // If not found, nothing to do + if (!entry) + return false; + // Otherwise, remove entry from linked list for hash value + if (prevEntry) + prevEntry->nextEntry = entry->nextEntry; + else + m_hashEntries[hash] = entry->nextEntry; + // Delete hash entry storage + DeleteHashEntry(entry); + return true; +} + +bool CTextureRefs::HashContains(unsigned texRef) const +{ + // Convert texture reference to hash value + unsigned hash = texRef % m_hashCapacity; + // Loop through linked list for hash value and see if have texture reference + HashEntry *entry = m_hashEntries[hash]; + while (entry && texRef != entry->texRef) + entry = entry->nextEntry; + return !!entry; +} + +} // Legacy3D diff --git a/Src/Graphics/Legacy3D/TextureRefs.h b/Src/Graphics/Legacy3D/TextureRefs.h new file mode 100644 index 0000000..13e08f3 --- /dev/null +++ b/Src/Graphics/Legacy3D/TextureRefs.h @@ -0,0 +1,168 @@ +/** + ** Supermodel + ** A Sega Model 3 Arcade Emulator. + ** Copyright 2011 Bart Trzynadlowski, Nik Henson + ** + ** This file is part of Supermodel. + ** + ** Supermodel is free software: you can redistribute it and/or modify it under + ** the terms of the GNU General Public License as published by the Free + ** Software Foundation, either version 3 of the License, or (at your option) + ** any later version. + ** + ** Supermodel is distributed in the hope that it will be useful, but WITHOUT + ** ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + ** FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + ** more details. + ** + ** You should have received a copy of the GNU General Public License along + ** with Supermodel. If not, see . + **/ + +/* + * CTextureRefs.h + * + * Class that tracks unique texture references, eg in a cached model. + */ + +#ifndef INCLUDED_TEXTUREREFS_H +#define INCLUDED_TEXTUREREFS_H + +namespace Legacy3D { + +#define TEXREFS_ARRAY_SIZE 12 + +// Hash entry that holds a texture reference in the hashset +struct HashEntry +{ + const unsigned texRef; // Texture reference as a bitfield + HashEntry *nextEntry; // Next entry with the same hash + + HashEntry(unsigned theTexRef) : texRef(theTexRef), nextEntry(NULL) { } +}; + +class CLegacy3D; + +class CTextureRefs +{ +public: + /* + * CTextureRefs(): + * + * Constructor. + */ + CTextureRefs(); + + /* + * ~CTextureRefs(): + * + * Destructor. + */ + ~CTextureRefs(); + + /* + * GetSize(): + * + * Returns number of unique texture references held. + */ + unsigned GetSize() const; + + /* + * Clear(): + * + * Removes all texture references. + */ + void Clear(); + + /* + * ContainsRef(fmt, x, y, width, height): + * + * Returns true if holds the given texture reference. + */ + bool ContainsRef(unsigned fmt, unsigned x, unsigned y, unsigned width, unsigned height); + + /* + * AddRef(fmt, x, y, width, height): + * + * Adds the given texture reference. Returns false if it was not possible to add the reference (ie out of memory). + */ + bool AddRef(unsigned fmt, unsigned x, unsigned y, unsigned width, unsigned height); + + /* + * RemoveRef(fmt, x, y, width, height): + * + * Removes the given texture reference. Return true if the reference was found. + */ + bool RemoveRef(unsigned fmt, unsigned x, unsigned y, unsigned width, unsigned height); + + /* + * RemoveRef(fmt, x, y, width, height): + * + * Decodes all texture references held, calling CLegacy3D::DecodeTexture for each one. + */ + void DecodeAllTextures(CLegacy3D *Render3D); + +private: + // Number of texture references held. + unsigned m_size; + + // Pre-allocated array used to hold first TEXREFS_ARRAY_SIZE texture references. + unsigned m_array[TEXREFS_ARRAY_SIZE]; + + // Dynamically allocated hashset used to hold texture references when there are more than TEXREFS_ARRAY_SIZE. + unsigned m_hashCapacity; + HashEntry **m_hashEntries; + + /* + * UpdateHashCapacity(hashCapacity) + * + * Increases capacity of the hashset to given size. + */ + bool UpdateHashCapacity(unsigned hashCapacity); + + /* + * CreateHashEntry(texRef, hashCapacityUpdated) + * + * Creates and returns a new hash entry, updating the capacity if required (hashCapacityUpdated is set to true). + */ + HashEntry *CreateHashEntry(unsigned texRef, bool &hashCapacityUpdated); + + /* + * DeleteHashEntry(entry) + * + * Deletes the given hash entry and its storage. + */ + void DeleteHashEntry(HashEntry *entry); + + /* + * DeleteAllHashEntries() + * + * Deletes all hash entries and their storage. + */ + void DeleteAllHashEntries(); + + /* + * AddToHash(texRef) + * + * Adds the given texture reference (as a bitfield) to the hashset. + */ + bool AddToHash(unsigned texRef); + + /* + * RemoveFromHash(texRef) + * + * Removes the given texture reference (as a bitfield) from the hashset. + */ + bool RemoveFromHash(unsigned texRef); + + /* + * HashContains(texRef) + * + * Returns true if given texture reference (as a bitfield) is held in the hashset. + */ + bool HashContains(unsigned texRef) const; +}; + +} // Legacy3D + +#endif // INCLUDED_TEXTUREREFS_H diff --git a/Src/Graphics/New3D/Mat4.cpp b/Src/Graphics/New3D/Mat4.cpp new file mode 100644 index 0000000..f2bbfd1 --- /dev/null +++ b/Src/Graphics/New3D/Mat4.cpp @@ -0,0 +1,353 @@ +#include "Mat4.h" +#define _USE_MATH_DEFINES +#include +#include + +#ifndef M_PI +#define M_PI 3.14159265359 +#endif + +namespace New3D { + +Mat4::Mat4() +{ + LoadIdentity(); +} + +void Mat4::LoadIdentity() +{ + float *m = currentMatrix; + + m[0] = 1; m[4] = 0; m[8 ] = 0; m[12] = 0; + m[1] = 0; m[5] = 1; m[9 ] = 0; m[13] = 0; + m[2] = 0; m[6] = 0; m[10] = 1; m[14] = 0; + m[3] = 0; m[7] = 0; m[11] = 0; m[15] = 1; +} + +void Mat4::MultiMatrices(const float a[16], const float b[16], float r[16]) { + +#define A(row,col) a[(col<<2)+row] +#define B(row,col) b[(col<<2)+row] +#define P(row,col) r[(col<<2)+row] + + int i; + for (i = 0; i < 4; i++) { + const float ai0 = A(i, 0), ai1 = A(i, 1), ai2 = A(i, 2), ai3 = A(i, 3); + P(i, 0) = ai0 * B(0, 0) + ai1 * B(1, 0) + ai2 * B(2, 0) + ai3 * B(3, 0); + P(i, 1) = ai0 * B(0, 1) + ai1 * B(1, 1) + ai2 * B(2, 1) + ai3 * B(3, 1); + P(i, 2) = ai0 * B(0, 2) + ai1 * B(1, 2) + ai2 * B(2, 2) + ai3 * B(3, 2); + P(i, 3) = ai0 * B(0, 3) + ai1 * B(1, 3) + ai2 * B(2, 3) + ai3 * B(3, 3); + } + +#undef A +#undef B +#undef p +} + +void Mat4::Copy(const float in[16], float out[16]) { + + for (int i = 0; i<16; i++) { + out[i] = in[i]; + } +} + +void Mat4::Translate(float x, float y, float z) { + + //============= + float m[16]; + float cur[16]; + //============= + + Mat4::Copy(currentMatrix, cur); + + m[0] = 1; + m[1] = 0; + m[2] = 0; + m[3] = 0; + + m[4] = 0; + m[5] = 1; + m[6] = 0; + m[7] = 0; + + m[8] = 0; + m[9] = 0; + m[10] = 1; + m[11] = 0; + + m[12] = x; + m[13] = y; + m[14] = z; + m[15] = 1; + + Mat4::MultiMatrices(cur, m, currentMatrix); +} + +void Mat4::Scale(float x, float y, float z) { + + //============= + float m[16]; + float cur[16]; + //============= + + Mat4::Copy(currentMatrix, cur); + + m[0] = x; + m[1] = 0; + m[2] = 0; + m[3] = 0; + + m[4] = 0; + m[5] = y; + m[6] = 0; + m[7] = 0; + + m[8] = 0; + m[9] = 0; + m[10] = z; + m[11] = 0; + + m[12] = 0; + m[13] = 0; + m[14] = 0; + m[15] = 1; + + Mat4::MultiMatrices(cur, m, currentMatrix); +} + +void Mat4::Rotate(float angle, float x, float y, float z) { + + //============ + float c; + float s; + float m[16]; + float cur[16]; + //=========== + + Mat4::Copy(currentMatrix, cur); + + // normalise vector first + { + //======== + float len; + //======== + + len = sqrt(x * x + y * y + z * z); + + x /= len; + y /= len; + z /= len; + } + + c = cos(angle*3.14159265f / 180.0f); + s = sin(angle*3.14159265f / 180.0f); + + m[0] = (x*x) * (1 - c) + c; + m[1] = (y*x) * (1 - c) + (z*s); + m[2] = (x*z) * (1 - c) - (y*s); + m[3] = 0; + + m[4] = (x*y) * (1 - c) - (z*s); + m[5] = (y*y) * (1 - c) + c; + m[6] = (y*z) * (1 - c) + (x*s); + m[7] = 0; + + m[8] = (x*z) * (1 - c) + (y*s); + m[9] = (y*z) * (1 - c) - (x*s); + m[10] = (z*z) * (1 - c) + c; + m[11] = 0; + + m[12] = 0; + m[13] = 0; + m[14] = 0; + m[15] = 1; + + Mat4::MultiMatrices(cur, m, currentMatrix); +} + +void Mat4::Frustum(float left, float right, float bottom, float top, float nearVal, float farVal) { + + //===================== + float m[16]; + float cur[16]; + float x, y, a, b, c, d; + //===================== + + Mat4::Copy(currentMatrix, cur); + + x = (2.0F*nearVal) / (right - left); + y = (2.0F*nearVal) / (top - bottom); + a = (right + left) / (right - left); + b = (top + bottom) / (top - bottom); + c = -(farVal + nearVal) / (farVal - nearVal); + d = -(2.0F*farVal*nearVal) / (farVal - nearVal); + + m[0] = x; + m[1] = 0; + m[2] = 0; + m[3] = 0; + + m[4] = 0; + m[5] = y; + m[6] = 0; + m[7] = 0; + + m[8] = a; + m[9] = b; + m[10] = c; + m[11] = -1; + + m[12] = 0; + m[13] = 0; + m[14] = d; + m[15] = 0; + + Mat4::MultiMatrices(cur, m, currentMatrix); +} + +void Mat4::Perspective(float fovy, float aspect, float zNear, float zFar) +{ + //========= + float ymax; + float xmax; + //========= + + ymax = zNear * tanf(fovy * (float)M_PI / 360.0f); + xmax = ymax * aspect; + + Frustum(-xmax, xmax, -ymax, ymax, zNear, zFar); +} + +void Mat4::Ortho(float left, float right, float bottom, float top, float nearVal, float farVal) +{ + //================ + float m[16]; + float cur[16]; + float tx, ty, tz; + //================ + + Mat4::Copy(currentMatrix, cur); + + tx = -(right + left) / (right - left); + ty = -(top + bottom) / (top - bottom); + tz = -(farVal + nearVal) / (farVal - nearVal); + + m[0] = 2/(right-left); + m[1] = 0; + m[2] = 0; + m[3] = 0; + + m[4] = 0; + m[5] = 2/(top-bottom); + m[6] = 0; + m[7] = 0; + + m[8] = 0; + m[9] = 0; + m[10] = -2/(farVal-nearVal); + m[11] = 0; + + m[12] = tx; + m[13] = ty; + m[14] = tz; + m[15] = 1; + + Mat4::MultiMatrices(cur, m, currentMatrix); +} + +void Mat4::Transpose(float m[16]) { + + std::swap(m[1], m[4]); + std::swap(m[2], m[8]); + std::swap(m[3], m[12]); + std::swap(m[6], m[9]); + std::swap(m[7], m[13]); + std::swap(m[11], m[14]); +} + +void Mat4::MultMatrix(const float *m) +{ + if (!m) { + return; + } + + //============ + float cur[16]; + //============ + + Mat4::Copy(currentMatrix, cur); + Mat4::MultiMatrices(cur, m, currentMatrix); +} + +void Mat4::LoadMatrix(const float *m) +{ + if (!m) { + return; + } + + Mat4::Copy(m, currentMatrix); +} + +void Mat4::LoadTransposeMatrix(const float *m) +{ + if (!m) { + return; + } + + //============= + float copy[16]; + //============= + + Mat4::Copy(m, copy); + Mat4::Transpose(copy); + Mat4::LoadMatrix(copy); +} + +void Mat4::MultTransposeMatrix(const float *m) +{ + if (!m) { + return; + } + + //============= + float copy[16]; + //============= + + Mat4::Copy(m, copy); + Mat4::Transpose(copy); + Mat4::MultMatrix(copy); +} + +void Mat4::PushMatrix() +{ + //============== + Mat4Container m; + //============== + + if (m_vMat4.size() > 128) { + return; // check for overflow + } + + Mat4::Copy(currentMatrix, m.mat); + + m_vMat4.push_back(m); +} + +void Mat4::PopMatrix() +{ + if (m_vMat4.empty()) { + return; // check for underflow + } + + Mat4::Copy(m_vMat4.back().mat, currentMatrix); + + m_vMat4.pop_back(); +} + +// flush the matrix stack +void Mat4::Release() +{ + m_vMat4.clear(); +} + +} // New3D \ No newline at end of file diff --git a/Src/Graphics/New3D/Mat4.h b/Src/Graphics/New3D/Mat4.h new file mode 100644 index 0000000..4dcb1f0 --- /dev/null +++ b/Src/Graphics/New3D/Mat4.h @@ -0,0 +1,48 @@ +#ifndef _MAT4_H_ +#define _MAT4_H_ + +#include + +namespace New3D { + +class Mat4 +{ +public: + + Mat4(); + + void LoadIdentity (); + void Translate (float x, float y, float z); + void Rotate (float angle, float x, float y, float z); + void Scale (float x, float y, float z); + void Frustum (float left, float right, float bottom, float top, float nearVal, float farVal); + void Ortho (float left, float right, float bottom, float top, float nearVal, float farVal); + void Perspective (float fovy, float aspect, float zNear, float zFar); + void MultMatrix (const float *m); + void LoadMatrix (const float *m); + void LoadTransposeMatrix (const float *m); + void MultTransposeMatrix (const float *m); + void PushMatrix (); + void PopMatrix (); + void Release (); + + operator const float* () { return currentMatrix; } + + float currentMatrix[16]; +private: + + void MultiMatrices (const float a[16], const float b[16], float r[16]); + void Copy (const float in[16], float out[16]); + void Transpose (float m[16]); + + struct Mat4Container { + float mat[16]; // we must wrap the matrix inside a struct otherwise vector doesn't work + }; + + std::vector m_vMat4; +}; + +} // New3D + +#endif + diff --git a/Src/Graphics/New3D/Model.cpp b/Src/Graphics/New3D/Model.cpp new file mode 100644 index 0000000..ee9eae3 --- /dev/null +++ b/Src/Graphics/New3D/Model.cpp @@ -0,0 +1,60 @@ +#include "Model.h" + +namespace New3D { + +NodeAttributes::NodeAttributes() +{ + currentTexOffsetX = 0; + currentTexOffsetY = 0; + currentTexOffset = 0; +} + +bool NodeAttributes::Push() +{ + //============= + NodeAttribs na; + //============= + + // check for overflow + if (m_vecAttribs.size() >= 128) { + return false; + } + + na.texOffset = currentTexOffset; + na.texOffsetX = currentTexOffsetX; + na.texOffsetY = currentTexOffsetY; + + m_vecAttribs.push_back(na); + + return true; +} + +bool NodeAttributes::Pop() +{ + if (m_vecAttribs.empty()) { + return false; // check for underflow + } + + currentTexOffset = m_vecAttribs.back().texOffset; + currentTexOffsetX = m_vecAttribs.back().texOffsetX; + currentTexOffsetY = m_vecAttribs.back().texOffsetY; + + m_vecAttribs.pop_back(); + + return true; +} + +bool NodeAttributes::StackLimit() +{ + return m_vecAttribs.size() >= 1024; +} + +void NodeAttributes::Reset() +{ + currentTexOffset = 0; + currentTexOffsetX = 0; + currentTexOffsetY = 0; + m_vecAttribs.clear(); +} + +} // New3D diff --git a/Src/Graphics/New3D/Model.h b/Src/Graphics/New3D/Model.h new file mode 100644 index 0000000..24e4feb --- /dev/null +++ b/Src/Graphics/New3D/Model.h @@ -0,0 +1,127 @@ +#ifndef _MODEL_H_ +#define _MODEL_H_ + +#include "types.h" +#include +#include +#include +#include +#include "Texture.h" +#include "Mat4.h" + +namespace New3D { + +struct Vertex +{ + float pos[3]; + float normal[3]; + float texcoords[2]; + UINT8 color[4]; //rgba +}; + +struct Poly // our polys are always 3 triangles, unlike the real h/w +{ + Vertex p1; + Vertex p2; + Vertex p3; +}; + +struct R3DPoly +{ + Vertex v[4]; // just easier to have them as an array + float faceNormal[3]; // we need this to help work out poly winding, i assume the h/w uses this instead of calculating normals itself + int number = 4; +}; + +struct Mesh +{ + std::shared_ptr texture; + + // attributes + bool doubleSided = false; + bool textured = false; + bool polyAlpha = false; // specified in the rgba colour + bool textureAlpha = false; // use alpha in texture + bool alphaTest = false; // discard fragment based on alpha (ogl does this with fixed function) + bool lighting = false; + bool testBit = false; + bool clockWise = true; // we need to check if the matrix will change the winding + + float fogIntensity = 1.0f; + + // texture + bool mirrorU = false; + bool mirrorV = false; + + // opengl resources + int vboOffset = 0; // this will be calculated later + int triangleCount = 0; +}; + +struct SortingMesh : public Mesh // This struct temporarily holds the model data, before it gets copied to the main buffer +{ + std::vector polys; +}; + +struct Model +{ + std::vector meshes; + + //matrices + float modelMat[16]; + + + + // misc + int lutIdx = 0; +}; + +struct Viewport +{ + Mat4 projectionMatrix; // projection matrix + float lightingParams[6]; // lighting parameters (see RenderViewport() and vertex shader) + float spotEllipse[4]; // spotlight ellipse (see RenderViewport()) + float spotRange[2]; // Z range + float spotColor[3]; // color + float fogParams[5]; // fog parameters (...) + int x, y; // viewport coordinates (scaled and in OpenGL format) + int width, height; // viewport dimensions (scaled for display surface size) + int priority; +}; + +class NodeAttributes +{ +public: + + NodeAttributes(); + + bool Push(); + bool Pop(); + bool StackLimit(); + void Reset(); + + int currentTexOffsetX; + int currentTexOffsetY; + int currentTexOffset; // raw value + +private: + + struct NodeAttribs + { + int texOffsetX; + int texOffsetY; + int texOffset; + }; + std::vector m_vecAttribs; +}; + +struct Node +{ + Viewport viewport; + std::vector models; +}; + +} // New3D + + +#endif \ No newline at end of file diff --git a/Src/Graphics/New3D/New3D.cpp b/Src/Graphics/New3D/New3D.cpp new file mode 100644 index 0000000..78fe11f --- /dev/null +++ b/Src/Graphics/New3D/New3D.cpp @@ -0,0 +1,1020 @@ +#include "New3D.h" +#include "PolyHeader.h" +#include "Texture.h" +#include "Vec.h" +#include // needed by gcc + +#ifndef M_PI +#define M_PI 3.14159265359 +#endif + +namespace New3D { + +CNew3D::CNew3D() +{ + m_cullingRAMLo = nullptr; + m_cullingRAMHi = nullptr; + m_polyRAM = nullptr; + m_vrom = nullptr; + m_textureRAM = nullptr; +} + +CNew3D::~CNew3D() +{ + m_vboDynamic.Destroy(); +} + +void CNew3D::AttachMemory(const UINT32 *cullingRAMLoPtr, const UINT32 *cullingRAMHiPtr, const UINT32 *polyRAMPtr, const UINT32 *vromPtr, const UINT16 *textureRAMPtr) +{ + m_cullingRAMLo = cullingRAMLoPtr; + m_cullingRAMHi = cullingRAMHiPtr; + m_polyRAM = polyRAMPtr; + m_vrom = vromPtr; + m_textureRAM = textureRAMPtr; +} + +void CNew3D::SetStep(int stepID) +{ + m_step = stepID; + + if ((m_step != 0x10) && (m_step != 0x15) && (m_step != 0x20) && (m_step != 0x21)) { + m_step = 0x10; + } + + if (m_step > 0x10) { + m_offset = 0; // culling nodes are 10 words + m_vertexFactor = (1.0f / 2048.0f); // vertices are in 13.11 format + } + else { + m_offset = 2; // 8 words + m_vertexFactor = (1.0f / 128.0f); // 17.7 + } + + m_vboDynamic.Create(GL_ARRAY_BUFFER, GL_DYNAMIC_DRAW, sizeof(Poly)* 100000); // allocate space for 100k polys ~ 10meg +} + +bool CNew3D::Init(unsigned xOffset, unsigned yOffset, unsigned xRes, unsigned yRes, unsigned totalXResParam, unsigned totalYResParam) +{ + // Resolution and offset within physical display area + m_xRatio = xRes / 496.0f; + m_yRatio = yRes / 384.0f; + m_xOffs = xOffset; + m_yOffs = yOffset; + m_totalXRes = totalXResParam; + m_totalYRes = totalYResParam; + + m_r3dShader.LoadShader(); + + glUseProgram(0); + + return OKAY; // OKAY ? wtf .. +} + +void CNew3D::UploadTextures(unsigned x, unsigned y, unsigned width, unsigned height) +{ + m_texSheet.Invalidate(x, y, width, height); +} + +void CNew3D::RenderScene(int priority, bool alpha) +{ + if (alpha) { + glEnable(GL_BLEND); + } + + for (auto &n : m_nodes) { + + if (n.viewport.priority != priority) { + continue; + } + + glViewport(n.viewport.x, n.viewport.y, n.viewport.width, n.viewport.height); + + glMatrixMode(GL_PROJECTION); + glLoadMatrixf(n.viewport.projectionMatrix); + + m_r3dShader.SetViewportUniforms(&n.viewport); + + for (auto &m : n.models) { + + if (m.meshes.empty()) { + continue; + } + + glMatrixMode(GL_MODELVIEW); + glLoadMatrixf(m.modelMat); + + for (auto &mesh : m.meshes) { + + if (alpha) { + if (!mesh.textureAlpha && !mesh.polyAlpha) { + continue; + } + } + else { + if (mesh.textureAlpha || mesh.polyAlpha) { + continue; + } + } + + if (mesh.texture) { + mesh.texture->BindTexture(); + mesh.texture->SetWrapMode(mesh.mirrorU, mesh.mirrorV); + } + + m_r3dShader.SetMeshUniforms(&mesh); + glDrawArrays(GL_TRIANGLES, mesh.vboOffset*3, mesh.triangleCount*3); // times 3 to convert triangles to vertices + } + } + } + + glDisable(GL_BLEND); + glDepthMask(GL_TRUE); +} + +void CNew3D::RenderFrame(void) +{ + // release any resources from last frame + m_polyBuffer.clear(); // clear dyanmic model memory buffer + m_nodes.clear(); // memory will grow during the object life time, that's fine, no need to shrink to fit + m_modelMat.Release(); // would hope we wouldn't need this but no harm in checking + m_nodeAttribs.Reset(); + + glDepthFunc (GL_LEQUAL); + glEnable (GL_DEPTH_TEST); + glActiveTexture (GL_TEXTURE0); + glEnable (GL_CULL_FACE); + glFrontFace (GL_CW); + + for (int pri = 0; pri <= 3; pri++) { + RenderViewport(0x800000, pri); // build model structure + } + + m_vboDynamic.Bind(true); + m_vboDynamic.BufferSubData(0, m_polyBuffer.size()*sizeof(Poly), m_polyBuffer.data()); // upload all the data to GPU in one go + + glEnableClientState(GL_VERTEX_ARRAY); + glEnableClientState(GL_NORMAL_ARRAY); + glEnableClientState(GL_TEXTURE_COORD_ARRAY); + glEnableClientState(GL_COLOR_ARRAY); + + // before draw, specify vertex and index arrays with their offsets, offsetof is maybe evil .. + glVertexPointer (3, GL_FLOAT, sizeof(Vertex), 0); + glNormalPointer (GL_FLOAT, sizeof(Vertex), (void*)offsetof(Vertex, normal)); + glTexCoordPointer (2, GL_FLOAT, sizeof(Vertex), (void*)offsetof(Vertex, texcoords)); + glColorPointer (4, GL_UNSIGNED_BYTE, sizeof(Vertex), (void*)offsetof(Vertex, color)); + + m_r3dShader.SetShader(true); + + for (int pri = 0; pri <= 3; pri++) { + glClear (GL_DEPTH_BUFFER_BIT); + RenderScene (pri, false); + RenderScene (pri, true); + } + + m_r3dShader.SetShader(false); // unbind shader + m_vboDynamic.Bind(false); + + glDisable(GL_CULL_FACE); + glDisableClientState(GL_VERTEX_ARRAY); + glDisableClientState(GL_NORMAL_ARRAY); + glEnableClientState(GL_TEXTURE_COORD_ARRAY); + glDisableClientState(GL_COLOR_ARRAY); +} + +void CNew3D::BeginFrame(void) +{ +} + +void CNew3D::EndFrame(void) +{ +} + +/****************************************************************************** +Real3D Address Translation + +Functions that interpret word-granular Real3D addresses and return pointers. +******************************************************************************/ + +// Translates 24-bit culling RAM addresses +const UINT32* CNew3D::TranslateCullingAddress(UINT32 addr) +{ + addr &= 0x00FFFFFF; // caller should have done this already + + if ((addr >= 0x800000) && (addr < 0x840000)) { + return &m_cullingRAMHi[addr & 0x3FFFF]; + } + else if (addr < 0x100000) { + return &m_cullingRAMLo[addr]; + } + + return NULL; +} + +// Translates model references +const UINT32* CNew3D::TranslateModelAddress(UINT32 modelAddr) +{ + modelAddr &= 0x00FFFFFF; // caller should have done this already + + if (modelAddr < 0x100000) { + return &m_polyRAM[modelAddr]; + } + else { + return &m_vrom[modelAddr]; + } +} + +bool CNew3D::DrawModel(UINT32 modelAddr) +{ + const UINT32 *modelAddress; + Model* m; + + modelAddress = TranslateModelAddress(modelAddr); + + // create a new model to push onto the vector + m_nodes.back().models.emplace_back(Model()); + + // get the pointer to the last element in the array + m = &m_nodes.back().models.back(); + + // copy lutidx - wtf is this + m->lutIdx = modelAddr & 0xFFFFFF; + + // copy model matrix + for (int i = 0; i < 16; i++) { + m->modelMat[i] = m_modelMat.currentMatrix[i]; + } + + CacheModel(m, modelAddress); + + return true; +} + +// Descends into a 10-word culling node +void CNew3D::DescendCullingNode(UINT32 addr) +{ + const UINT32 *node, *lodTable; + UINT32 matrixOffset, node1Ptr, node2Ptr; + float x, y, z; + int tx, ty; + + if (m_nodeAttribs.StackLimit()) { + return; + } + + node = TranslateCullingAddress(addr); + + if (NULL == node) { + return; + } + + // Extract known fields + node1Ptr = node[0x07 - m_offset]; + node2Ptr = node[0x08 - m_offset]; + matrixOffset = node[0x03 - m_offset] & 0xFFF; + + x = *(float *)&node[0x04 - m_offset]; // magic numbers everywhere ! + y = *(float *)&node[0x05 - m_offset]; + z = *(float *)&node[0x06 - m_offset]; + + m_nodeAttribs.Push(); // save current attribs + + if (!m_offset) // Step 1.5+ + { + tx = 32 * ((node[0x02] >> 7) & 0x3F); + ty = 32 * (node[0x02] & 0x3F) + ((node[0x02] & 0x4000) ? 1024 : 0); // TODO: 5 or 6 bits for Y coord? + + // apply texture offsets, else retain current ones + if ((node[0x02] & 0x8000)) { + m_nodeAttribs.currentTexOffsetX = tx; + m_nodeAttribs.currentTexOffsetY = ty; + m_nodeAttribs.currentTexOffset = node[0x02] & 0x7FFF; + } + } + + // Apply matrix and translation + m_modelMat.PushMatrix(); + + // apply translation vector + if ((node[0x00] & 0x10)) { + m_modelMat.Translate(x, y, z); + } + // multiply matrix, if specified + else if (matrixOffset) { + MultMatrix(matrixOffset,m_modelMat); + } + + // Descend down first link + if ((node[0x00] & 0x08)) // 4-element LOD table + { + lodTable = TranslateCullingAddress(node1Ptr); + + if (NULL != lodTable) { + if ((node[0x03 - m_offset] & 0x20000000)) { + DescendCullingNode(lodTable[0] & 0xFFFFFF); + } + else { + DrawModel(lodTable[0] & 0xFFFFFF); //TODO + } + } + } + else { + DescendNodePtr(node1Ptr); + } + + // Proceed to second link + m_modelMat.PopMatrix(); + + // seems to indicate second link is invalid (fixes circular references) + if ((node[0x00] & 0x07) != 0x06) { + DescendNodePtr(node2Ptr); + } + + // Restore old texture offsets + m_nodeAttribs.Pop(); +} + +void CNew3D::DescendNodePtr(UINT32 nodeAddr) +{ + // Ignore null links + if ((nodeAddr & 0x00FFFFFF) == 0) { + return; + } + + switch ((nodeAddr >> 24) & 0xFF) // pointer type encoded in upper 8 bits + { + case 0x00: // culling node + DescendCullingNode(nodeAddr & 0xFFFFFF); + break; + case 0x01: // model (perhaps bit 1 is a flag in this case?) + case 0x03: + DrawModel(nodeAddr & 0xFFFFFF); + break; + case 0x04: // pointer list + DescendPointerList(nodeAddr & 0xFFFFFF); + break; + default: + //printf("ATTENTION: Unknown pointer format: %08X\n\n", nodeAddr); + break; + } +} + +void CNew3D::DescendPointerList(UINT32 addr) +{ + const UINT32* list; + UINT32 nodeAddr; + int listEnd; + + if (m_listDepth > 2) { // several Step 2.1 games require this safeguard + return; + } + + list = TranslateCullingAddress(addr); + + if (NULL == list) { + return; + } + + m_listDepth++; + + // Traverse the list forward and print it out + listEnd = 0; + + while (1) + { + if ((list[listEnd] & 0x02000000)) { // end of list (?) + break; + } + + if ((list[listEnd] == 0) || (((list[listEnd]) >> 24) != 0)) { + listEnd--; // back up to last valid list element + break; + } + + listEnd++; + } + + for (int i = 0; i <= listEnd; i++) { + + nodeAddr = list[i] & 0x00FFFFFF; // clear upper 8 bits to ensure this is processed as a culling node + + if (!(list[i] & 0x01000000)) { //Fighting Vipers + + if ((nodeAddr != 0) && (nodeAddr != 0x800800)) { + DescendCullingNode(nodeAddr); + } + } + } + + + /* + // Traverse the list backward and descend into each pointer + while (listEnd >= 0) + { + nodeAddr = list[listEnd] & 0x00FFFFFF; // clear upper 8 bits to ensure this is processed as a culling node + + if (!(list[listEnd] & 0x01000000)) { //Fighting Vipers + + if ((nodeAddr != 0) && (nodeAddr != 0x800800)) { + DescendCullingNode(nodeAddr); + } + } + + listEnd--; + } + */ + + m_listDepth--; +} + + +/****************************************************************************** +Matrix Stack +******************************************************************************/ + +// Macro to generate column-major (OpenGL) index from y,x subscripts +#define CMINDEX(y,x) (x*4+y) + +/* +* MultMatrix(): +* +* Multiplies the matrix stack by the specified Real3D matrix. The matrix +* index is a 12-bit number specifying a matrix number relative to the base. +* The base matrix MUST be set up before calling this function. +*/ +void CNew3D::MultMatrix(UINT32 matrixOffset, Mat4& mat) +{ + GLfloat m[4*4]; + const float *src = &m_matrixBasePtr[matrixOffset * 12]; + + if (m_matrixBasePtr == NULL) // LA Machineguns + return; + + m[CMINDEX(0, 0)] = src[3]; + m[CMINDEX(0, 1)] = src[4]; + m[CMINDEX(0, 2)] = src[5]; + m[CMINDEX(0, 3)] = src[0]; + m[CMINDEX(1, 0)] = src[6]; + m[CMINDEX(1, 1)] = src[7]; + m[CMINDEX(1, 2)] = src[8]; + m[CMINDEX(1, 3)] = src[1]; + m[CMINDEX(2, 0)] = src[9]; + m[CMINDEX(2, 1)] = src[10]; + m[CMINDEX(2, 2)] = src[11]; + m[CMINDEX(2, 3)] = src[2]; + m[CMINDEX(3, 0)] = 0.0; + m[CMINDEX(3, 1)] = 0.0; + m[CMINDEX(3, 2)] = 0.0; + m[CMINDEX(3, 3)] = 1.0; + + mat.MultMatrix(m); +} + +/* +* InitMatrixStack(): +* +* Initializes the modelview (model space -> view space) matrix stack and +* Real3D coordinate system. These are the last transforms to be applied (and +* the first to be defined on the stack) before projection. +* +* Model 3 games tend to define the following unusual base matrix: +* +* 0 0 -1 0 +* 1 0 0 0 +* 0 -1 0 0 +* 0 0 0 1 +* +* When this is multiplied by a column vector, the output is: +* +* -Z +* X +* -Y +* 1 +* +* My theory is that the Real3D GPU accepts vectors in Z,X,Y order. The games +* store everything as X,Y,Z and perform the translation at the end. The Real3D +* also has Y and Z coordinates opposite of the OpenGL convention. This +* function inserts a compensating matrix to undo these things. +* +* NOTE: This function assumes we are in GL_MODELVIEW matrix mode. +*/ + +void CNew3D::InitMatrixStack(UINT32 matrixBaseAddr, Mat4& mat) +{ + GLfloat m[4 * 4]; + + // This matrix converts vectors back from the weird Model 3 Z,X,Y ordering + // and also into OpenGL viewspace (-Y,-Z) + m[CMINDEX(0, 0)] = 0.0; m[CMINDEX(0, 1)] = 1.0; m[CMINDEX(0, 2)] = 0.0; m[CMINDEX(0, 3)] = 0.0; + m[CMINDEX(1, 0)] = 0.0; m[CMINDEX(1, 1)] = 0.0; m[CMINDEX(1, 2)] =-1.0; m[CMINDEX(1, 3)] = 0.0; + m[CMINDEX(2, 0)] =-1.0; m[CMINDEX(2, 1)] = 0.0; m[CMINDEX(2, 2)] = 0.0; m[CMINDEX(2, 3)] = 0.0; + m[CMINDEX(3, 0)] = 0.0; m[CMINDEX(3, 1)] = 0.0; m[CMINDEX(3, 2)] = 0.0; m[CMINDEX(3, 3)] = 1.0; + + if (m_step > 0x10) { + mat.LoadMatrix(m); + } + else { + // Scaling seems to help w/ Step 1.0's extremely large coordinates + GLfloat s = 1.0f / 2048.0f; // this will fuck up normals + mat.LoadIdentity(); + mat.Scale(s, s, s); + mat.MultMatrix(m); + } + + // Set matrix base address and apply matrix #0 (coordinate system matrix) + m_matrixBasePtr = (float *)TranslateCullingAddress(matrixBaseAddr); + MultMatrix(0, mat); +} + +// Draws viewports of the given priority +void CNew3D::RenderViewport(UINT32 addr, int pri) +{ + GLfloat color[8][3] = // RGB1 translation + { + { 0.0, 0.0, 0.0 }, // off + { 0.0, 0.0, 1.0 }, // blue + { 0.0, 1.0, 0.0 }, // green + { 0.0, 1.0, 1.0 }, // cyan + { 1.0, 0.0, 0.0 }, // red + { 1.0, 0.0, 1.0 }, // purple + { 1.0, 1.0, 0.0 }, // yellow + { 1.0, 1.0, 1.0 } // white + }; + const UINT32 *vpnode; + UINT32 nextAddr, nodeAddr, matrixBase; + int curPri; + int vpX, vpY, vpWidth, vpHeight; + int spotColorIdx; + GLfloat vpTopAngle, vpBotAngle, fovYDegrees; + GLfloat scrollFog, scrollAtt; + Viewport* vp; + + // Translate address and obtain pointer + vpnode = TranslateCullingAddress(addr); + + if (NULL == vpnode) { + return; + } + + curPri = (vpnode[0x00] >> 3) & 3; // viewport priority + nextAddr = vpnode[0x01] & 0xFFFFFF; // next viewport + nodeAddr = vpnode[0x02]; // scene database node pointer + + // Recursively process next viewport + if (vpnode[0x01] == 0) { // memory probably hasn't been set up yet, abort + return; + } + if (vpnode[0x01] != 0x01000000) { + RenderViewport(vpnode[0x01], pri); + } + + // If the priority doesn't match, do not process + if (curPri != pri) { + return; + } + + // create node object + m_nodes.emplace_back(Node()); + m_nodes.back().models.reserve(2048); // create space for models + + // get pointer to its viewport + vp = &m_nodes.back().viewport; + + vp->priority = pri; + + // Fetch viewport parameters (TO-DO: would rounding make a difference?) + vpX = (vpnode[0x1A] & 0xFFFF) >> 4; // viewport X (12.4 fixed point) + vpY = (vpnode[0x1A] >> 20) & 0xFFF; // viewport Y (12.4) + vpWidth = (vpnode[0x14] & 0xFFFF) >> 2; // width (14.2) + vpHeight = (vpnode[0x14] >> 18) & 0x3FFF; // height (14.2) + matrixBase = vpnode[0x16] & 0xFFFFFF; // matrix base address + + // Field of view and clipping + vpTopAngle = (float)asin(*(float *)&vpnode[0x0E]); // FOV Y upper half-angle (radians) + vpBotAngle = (float)asin(*(float *)&vpnode[0x12]); // FOV Y lower half-angle + fovYDegrees = (vpTopAngle + vpBotAngle)*(float)(180.0 / 3.14159265358979323846); + + // TO-DO: investigate clipping planes + + //if (g_Config.wideScreen && (vpX == 0) && (vpWidth >= 495) && (vpY == 0) && (vpHeight >= 383)) // only expand viewports that occupy whole screen + //if (0) + if ((vpX == 0) && (vpWidth >= 495) && (vpY == 0) && (vpHeight >= 383)) + { + // Wide screen hack only modifies X axis and not the Y FOV + vp->x = 0; + vp->y = m_yOffs + (GLint)((float)(384 - (vpY + vpHeight))*m_yRatio); + vp->width = m_totalXRes; + vp->height = (GLint)((float)vpHeight*m_yRatio); + + vp->projectionMatrix.Perspective(fovYDegrees, (GLfloat)vp->width / (GLfloat)vp->height, 0.1f, 1e5); // use actual full screen ratio to get proper X FOV + } + else + { + vp->x = m_xOffs + (GLint)((float)vpX*m_xRatio); + vp->y = m_yOffs + (GLint)((float)(384 - (vpY + vpHeight))*m_yRatio); + vp->width = (GLint)((float)vpWidth*m_xRatio); + vp->height = (GLint)((float)vpHeight*m_yRatio); + + vp->projectionMatrix.Perspective(fovYDegrees, (GLfloat)vpWidth / (GLfloat)vpHeight, 0.1f, 1e5); // use Model 3 viewport ratio + } + + // Lighting (note that sun vector points toward sun -- away from vertex) + vp->lightingParams[0] = *(float *)&vpnode[0x05]; // sun X + vp->lightingParams[1] = *(float *)&vpnode[0x06]; // sun Y + vp->lightingParams[2] = *(float *)&vpnode[0x04]; // sun Z + vp->lightingParams[3] = *(float *)&vpnode[0x07]; // sun intensity + vp->lightingParams[4] = (float)((vpnode[0x24] >> 8) & 0xFF) * (1.0f / 255.0f); // ambient intensity + vp->lightingParams[5] = 0.0; // reserved + + // Spotlight + spotColorIdx = (vpnode[0x20] >> 11) & 7; // spotlight color index + vp->spotEllipse[0] = (float)((vpnode[0x1E] >> 3) & 0x1FFF); // spotlight X position (fractional component?) + vp->spotEllipse[1] = (float)((vpnode[0x1D] >> 3) & 0x1FFF); // spotlight Y + vp->spotEllipse[2] = (float)((vpnode[0x1E] >> 16) & 0xFFFF); // spotlight X size (16-bit? May have fractional component below bit 16) + vp->spotEllipse[3] = (float)((vpnode[0x1D] >> 16) & 0xFFFF); // spotlight Y size + + vp->spotRange[0] = 1.0f / (*(float *)&vpnode[0x21]); // spotlight start + vp->spotRange[1] = *(float *)&vpnode[0x1F]; // spotlight extent + vp->spotColor[0] = color[spotColorIdx][0]; // spotlight color + vp->spotColor[1] = color[spotColorIdx][1]; + vp->spotColor[2] = color[spotColorIdx][2]; + + // Spotlight is applied on a per pixel basis, must scale its position and size to screen + vp->spotEllipse[1] = 384.0f - vp->spotEllipse[1]; + vp->spotRange[1] += vp->spotRange[0]; // limit + vp->spotEllipse[2] = 496.0f / sqrt(vp->spotEllipse[2]); // spotlight appears to be specified in terms of physical resolution (unconfirmed) + vp->spotEllipse[3] = 384.0f / sqrt(vp->spotEllipse[3]); + + // Scale the spotlight to the OpenGL viewport + vp->spotEllipse[0] = vp->spotEllipse[0] * m_xRatio + m_xOffs; + vp->spotEllipse[1] = vp->spotEllipse[1] * m_yRatio + m_yOffs; + vp->spotEllipse[2] *= m_xRatio; + vp->spotEllipse[3] *= m_yRatio; + + // Fog + vp->fogParams[0] = (float)((vpnode[0x22] >> 16) & 0xFF) * (1.0f / 255.0f); // fog color R + vp->fogParams[1] = (float)((vpnode[0x22] >> 8) & 0xFF) * (1.0f / 255.0f); // fog color G + vp->fogParams[2] = (float)((vpnode[0x22] >> 0) & 0xFF) * (1.0f / 255.0f); // fog color B + vp->fogParams[3] = *(float *)&vpnode[0x23]; // fog density + vp->fogParams[4] = (float)(INT16)(vpnode[0x25] & 0xFFFF)*(1.0f / 255.0f); // fog start + + + if (std::isinf(vp->fogParams[3]) || std::isnan(vp->fogParams[3]) || std::isinf(vp->fogParams[4]) || std::isnan(vp->fogParams[4])) { // Star Wars Trilogy + vp->fogParams[3] = vp->fogParams[4] = 0.0f; + } + + // Unknown light/fog parameters + scrollFog = (float)(vpnode[0x20] & 0xFF) * (1.0f / 255.0f); // scroll fog + scrollAtt = (float)(vpnode[0x24] & 0xFF) * (1.0f / 255.0f); // scroll attenuation + + // Clear texture offsets before proceeding + m_nodeAttribs.Reset(); + + // Set up coordinate system and base matrix + InitMatrixStack(matrixBase, m_modelMat); + + // Safeguard: weird coordinate system matrices usually indicate scenes that will choke the renderer + if (NULL != m_matrixBasePtr) + { + float m21, m32, m13; + + // Get the three elements that are usually set and see if their magnitudes are 1 + m21 = m_matrixBasePtr[6]; + m32 = m_matrixBasePtr[10]; + m13 = m_matrixBasePtr[5]; + + m21 *= m21; + m32 *= m32; + m13 *= m13; + + if ((m21>1.05) || (m21<0.95)) + return; + if ((m32>1.05) || (m32<0.95)) + return; + if ((m13>1.05) || (m13<0.95)) + return; + } + + m_listDepth = 0; + + // Descend down the node link: Use recursive traversal + DescendNodePtr(nodeAddr); +} + +void CNew3D::CopyVertexData(R3DPoly& r3dPoly, std::vector& polyArray) +{ + //==================== + Poly p; + V3::Vec3 normal; + float dotProd; + float zFlip; + bool clockWise; + //==================== + + V3::createNormal(r3dPoly.v[0].pos, r3dPoly.v[1].pos, r3dPoly.v[2].pos, normal); + + dotProd = V3::dotProduct(normal, r3dPoly.faceNormal); + zFlip = -1.0f*m_matrixBasePtr[0x5]; // coordinate system m13 component + clockWise = (zFlip*dotProd >= 0.0); + + if (clockWise) { + p.p1 = r3dPoly.v[0]; + p.p2 = r3dPoly.v[1]; + p.p3 = r3dPoly.v[2]; + } + else { + p.p1 = r3dPoly.v[2]; + p.p2 = r3dPoly.v[1]; + p.p3 = r3dPoly.v[0]; + } + + polyArray.emplace_back(p); + + if (r3dPoly.number == 4) { + + if (clockWise) { + p.p1 = r3dPoly.v[0]; + p.p2 = r3dPoly.v[2]; + p.p3 = r3dPoly.v[3]; + } + else { + p.p1 = r3dPoly.v[0]; + p.p2 = r3dPoly.v[3]; + p.p3 = r3dPoly.v[2]; + } + + polyArray.emplace_back(p); + } +} + +void CNew3D::CacheModel(Model *m, const UINT32 *data) +{ + Vertex prev[4]; + PolyHeader ph; + int numPolys = 0; + bool done = false; + UINT64 lastHash = -1; + SortingMesh* currentMesh = nullptr; + + std::shared_ptr tex; + std::map sMap; + + if (data == NULL) + return; + + ph = data; + int numTriangles = ph.NumTrianglesTotal(); + + // Cache all polygons + while (!done) + { + R3DPoly p; // current polygon + GLfloat uvScale; + int i, j; + bool validPoly = true; + + ph = data; + + if (ph.header[6] == 0) { + break; + } + + if ((ph.header[0] & 0x100) && (ph.header[0] & 0x200)) { // assuming these two bits mean z and colour writes are disabled + validPoly = false; + } + else { + if (!numPolys && (ph.NumSharedVerts() != 0)) { // sharing vertices, but we haven't started the model yet + printf("incomplete data\n"); + validPoly = false; + } + } + + // Set current header pointer (header is 7 words) + data += 7; // data will now point to first vertex + + // create a hash value based on poly attributes -todo add more attributes + auto hash = ph.Hash(m_nodeAttribs.currentTexOffsetX, m_nodeAttribs.currentTexOffsetY); + + if (hash != lastHash && validPoly) { + + if (sMap.count(hash) == 0) { + + sMap[hash] = SortingMesh(); + + currentMesh = &sMap[hash]; + + //make space for our vertices + currentMesh->polys.reserve(numTriangles); + + //copy attributes + currentMesh->doubleSided = ph.DoubleSided(); + currentMesh->mirrorU = ph.TexUMirror(); + currentMesh->mirrorV = ph.TexVMirror(); + currentMesh->textured = ph.TexEnabled(); + currentMesh->alphaTest = ph.AlphaTest(); + currentMesh->textureAlpha = ph.TextureAlpha(); + currentMesh->polyAlpha = ph.PolyAlpha(); + currentMesh->lighting = ph.LightEnabled(); + + if (ph.header[6] & 0x10000) { + currentMesh->testBit = true; + } + + if (!ph.Luminous()) { + currentMesh->fogIntensity = 1.0f; + } + else { + currentMesh->fogIntensity = ph.LightModifier(); + } + + if (ph.TexEnabled()) { + currentMesh->texture = m_texSheet.BindTexture(m_textureRAM, ph.TexFormat(), ph.TexUMirror(), ph.TexVMirror(), ph.X(m_nodeAttribs.currentTexOffsetX), ph.Y(m_nodeAttribs.currentTexOffsetY), ph.TexWidth(), ph.TexHeight()); + } + } + + currentMesh = &sMap[hash]; + + if (ph.TexEnabled()) { + tex = currentMesh->texture; + } + else { + tex = nullptr; + } + } + + if (validPoly) { + lastHash = hash; + } + + // Obtain basic polygon parameters + done = ph.LastPoly(); + p.number = ph.NumVerts(); + uvScale = ph.UVScale(); + + ph.FaceNormal(p.faceNormal); + + // Fetch reused vertices according to bitfield, then new verts + i = 0; + j = 0; + for (i = 0; i < 4; i++) // up to 4 reused vertices + { + if (ph.SharedVertex(i)) + { + p.v[j] = prev[i]; + ++j; + } + } + + for (; j < p.number; j++) // remaining vertices are new and defined here + { + // Fetch vertices + UINT32 ix = data[0]; + UINT32 iy = data[1]; + UINT32 iz = data[2]; + UINT32 it = data[3]; + + // Decode vertices + p.v[j].pos[0] = (GLfloat)(((INT32)ix) >> 8) * m_vertexFactor; + p.v[j].pos[1] = (GLfloat)(((INT32)iy) >> 8) * m_vertexFactor; + p.v[j].pos[2] = (GLfloat)(((INT32)iz) >> 8) * m_vertexFactor; + + p.v[j].normal[0] = p.faceNormal[0] + (GLfloat)(INT8)(ix & 0xFF); // vertex normals are offset from polygon normal - we can normalise them in the shader + p.v[j].normal[1] = p.faceNormal[1] + (GLfloat)(INT8)(iy & 0xFF); + p.v[j].normal[2] = p.faceNormal[2] + (GLfloat)(INT8)(iz & 0xFF); + + if ((ph.header[1] & 2) == 0) { + UINT32 colorIdx = ((ph.header[4] >> 20) & 0x7FF); + p.v[j].color[0] = (m_polyRAM[0x400 + colorIdx] & 0xFF); + p.v[j].color[1] = (m_polyRAM[0x400 + colorIdx] >> 8) & 0xFF; + p.v[j].color[2] = (m_polyRAM[0x400 + colorIdx] >> 16) & 0xFF; + } + else if (ph.FixedShading()) { + UINT8 shade = ph.ShadeValue(); + p.v[j].color[0] = shade; + p.v[j].color[1] = shade; + p.v[j].color[2] = shade; + } + else { + p.v[j].color[0] = (ph.header[4] >> 24); + p.v[j].color[1] = (ph.header[4] >> 16) & 0xFF; + p.v[j].color[2] = (ph.header[4] >> 8) & 0xFF; + } + + if ((ph.header[6] & 0x00800000)) { // if set, polygon is opaque + p.v[j].color[3] = 255; + } + else { + p.v[j].color[3] = ph.Transparency(); + } + + float texU, texV = 0; + + // tex coords + if (tex) { + tex->GetCoordinates((UINT16)(it >> 16), (UINT16)(it & 0xFFFF), uvScale, texU, texV); + } + + p.v[j].texcoords[0] = texU; + p.v[j].texcoords[1] = texV; + + data += 4; + } + + // Copy current vertices into previous vertex array + for (i = 0; i < 4 && validPoly; i++) { + prev[i] = p.v[i]; + } + + // Copy this polygon into the model buffer + if (validPoly) { + CopyVertexData(p, currentMesh->polys); + numPolys++; + } + } + + bool cw = ClockWiseWinding(); + + //sorted the data, now copy to main data structures + + // we know how many meshes we have so reserve appropriate space + m->meshes.reserve(sMap.size()); + + for (auto& it : sMap) { + + // calculate VBO values for current mesh + it.second.vboOffset = m_polyBuffer.size(); + it.second.triangleCount = it.second.polys.size(); + //it.second.clockWise = cw; + + // copy poly data to main buffer + m_polyBuffer.insert(m_polyBuffer.end(), it.second.polys.begin(), it.second.polys.end()); + + //copy the temp mesh into the model structure + //this will lose the associated vertex data, which is now copied to the main buffer anyway + m->meshes.push_back(it.second); + } +} + +// Macro to generate column-major (OpenGL) index from y,x subscripts +#define CMINDEX(y,x) (x*4+y) + +// 3x3 matrix used (upper-left of m[]) +static void MultMat3Vec3(GLfloat out[3], GLfloat m[4 * 4], GLfloat v[3]) +{ + out[0] = m[CMINDEX(0, 0)] * v[0] + m[CMINDEX(0, 1)] * v[1] + m[CMINDEX(0, 2)] * v[2]; + out[1] = m[CMINDEX(1, 0)] * v[0] + m[CMINDEX(1, 1)] * v[1] + m[CMINDEX(1, 2)] * v[2]; + out[2] = m[CMINDEX(2, 0)] * v[0] + m[CMINDEX(2, 1)] * v[1] + m[CMINDEX(2, 2)] * v[2]; +} + +static GLfloat Sign(GLfloat x) +{ + if (x > 0.0f) + return 1.0f; + else if (x < 0.0f) + return -1.0f; + return 0.0f; +} + +// Inverts and transposes a 3x3 matrix (upper-left of the 4x4), returning a +// 4x4 matrix with the extra components undefined (do not use them!) +static void InvertTransposeMat3(GLfloat out[4 * 4], GLfloat m[4 * 4]) +{ + GLfloat invDet; + GLfloat a00 = m[CMINDEX(0, 0)], a01 = m[CMINDEX(0, 1)], a02 = m[CMINDEX(0, 2)]; + GLfloat a10 = m[CMINDEX(1, 0)], a11 = m[CMINDEX(1, 1)], a12 = m[CMINDEX(1, 2)]; + GLfloat a20 = m[CMINDEX(2, 0)], a21 = m[CMINDEX(2, 1)], a22 = m[CMINDEX(2, 2)]; + + invDet = 1.0f / (a00*(a22*a11 - a21*a12) - a10*(a22*a01 - a21*a02) + a20*(a12*a01 - a11*a02)); + out[CMINDEX(0, 0)] = invDet*(a22*a11 - a21*a12); out[CMINDEX(1, 0)] = invDet*(-(a22*a01 - a21*a02)); out[CMINDEX(2, 0)] = invDet*(a12*a01 - a11*a02); + out[CMINDEX(0, 1)] = invDet*(-(a22*a10 - a20*a12)); out[CMINDEX(1, 1)] = invDet*(a22*a00 - a20*a02); out[CMINDEX(2, 1)] = invDet*(-(a12*a00 - a10*a02)); + out[CMINDEX(0, 2)] = invDet*(a21*a10 - a20*a11); out[CMINDEX(1, 2)] = invDet*(-(a21*a00 - a20*a01)); out[CMINDEX(2, 2)] = invDet*(a11*a00 - a10*a01); +} + +bool CNew3D::ClockWiseWinding() +{ + GLfloat x[3] = { 1.0f, 0.0f, 0.0f }; + GLfloat y[3] = { 0.0f, 1.0f, 0.0f }; + GLfloat z[3] = { 0.0f, 0.0f, -1.0f*m_matrixBasePtr[0x5] }; + GLfloat m[4 * 4]; + GLfloat xT[3], yT[3], zT[3], pT[3]; + + InvertTransposeMat3(m, m_modelMat.currentMatrix); + MultMat3Vec3(xT, m_modelMat.currentMatrix, x); + MultMat3Vec3(yT, m_modelMat.currentMatrix, y); + MultMat3Vec3(zT, m, z); + V3::crossProduct(pT, xT, yT); + + float s = Sign(zT[2] * pT[2]); + + if (s < 0.0f) { + return false; + } + else if (s > 0.0f) { + return true; + } + else { + int debugbreak = 0; + return false; + } +} + +} // New3D diff --git a/Src/Graphics/New3D/New3D.h b/Src/Graphics/New3D/New3D.h new file mode 100644 index 0000000..ae4fea4 --- /dev/null +++ b/Src/Graphics/New3D/New3D.h @@ -0,0 +1,216 @@ +/** +** Supermodel +** A Sega Model 3 Arcade Emulator. +** Copyright 2011 Bart Trzynadlowski, Nik Henson +** +** This file is part of Supermodel. +** +** Supermodel is free software: you can redistribute it and/or modify it under +** the terms of the GNU General Public License as published by the Free +** Software Foundation, either version 3 of the License, or (at your option) +** any later version. +** +** Supermodel is distributed in the hope that it will be useful, but WITHOUT +** ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +** FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +** more details. +** +** You should have received a copy of the GNU General Public License along +** with Supermodel. If not, see . +**/ + +/* +* New3D.h +* +* Header file defining the CNew3D class: OpenGL Real3D graphics engine. +*/ + +#ifndef INCLUDED_NEW3D_H +#define INCLUDED_NEW3D_H + +#include "Pkgs/glew.h" +#include "Types.h" +#include "TextureSheet.h" +#include "Graphics/IRender3D.h" +#include "Model.h" +#include "Mat4.h" +#include "R3DShader.h" +#include "VBO.h" + +namespace New3D { + +class CNew3D : public IRender3D +{ +public: + /* + * RenderFrame(void): + * + * Renders the complete scene database. Must be called between BeginFrame() and + * EndFrame(). This function traverses the scene database and builds up display + * lists. + */ + void RenderFrame(void); + + /* + * BeginFrame(void): + * + * Prepare to render a new frame. Must be called once per frame prior to + * drawing anything. + */ + void BeginFrame(void); + + /* + * EndFrame(void): + * + * Signals the end of rendering for this frame. Must be called last during + * the frame. + */ + void EndFrame(void); + + /* + * UploadTextures(x, y, width, height): + * + * Signals that a portion of texture RAM has been updated. + * + * Parameters: + * x X position within texture RAM. + * y Y position within texture RAM. + * width Width of texture data in texels. + * height Height. + */ + void UploadTextures(unsigned x, unsigned y, unsigned width, unsigned height); + + /* + * AttachMemory(cullingRAMLoPtr, cullingRAMHiPtr, polyRAMPtr, vromPtr, + * textureRAMPtr): + * + * Attaches RAM and ROM areas. This must be done prior to any rendering + * otherwise the program may crash with an access violation. + * + * Parameters: + * cullingRAMLoPtr Pointer to low culling RAM (4 MB). + * cullingRAMHiPtr Pointer to high culling RAM (1 MB). + * polyRAMPtr Pointer to polygon RAM (4 MB). + * vromPtr Pointer to video ROM (64 MB). + * textureRAMPtr Pointer to texture RAM (8 MB). + */ + void AttachMemory(const UINT32 *cullingRAMLoPtr, + const UINT32 *cullingRAMHiPtr, const UINT32 *polyRAMPtr, + const UINT32 *vromPtr, const UINT16 *textureRAMPtr); + + /* + * SetStep(stepID): + * + * Sets the Model 3 hardware stepping, which also determines the Real3D + * functionality. The default is Step 1.0. This should be called prior to + * any other emulation functions and after Init(). + * + * Parameters: + * stepID 0x10 for Step 1.0, 0x15 for Step 1.5, 0x20 for Step 2.0, + * or 0x21 for Step 2.1. Anything else defaults to 1.0. + */ + void SetStep(int stepID); + + /* + * Init(xOffset, yOffset, xRes, yRes, totalXRes, totalYRes): + * + * One-time initialization of the context. Must be called before any other + * members (meaning it should be called even before being attached to any + * other objects that want to use it). + * + * External shader files are loaded according to configuration settings. + * + * Parameters: + * xOffset X offset of the viewable area within OpenGL display + * surface, in pixels. + * yOffset Y offset. + * xRes Horizontal resolution of the viewable area. + * yRes Vertical resolution. + * totalXRes Horizontal resolution of the complete display area. + * totalYRes Vertical resolution. + * + * Returns: + * OKAY is successful, otherwise FAILED if a non-recoverable error + * occurred. Any allocated memory will not be freed until the + * destructor is called. Prints own error messages. + */ + bool Init(unsigned xOffset, unsigned yOffset, unsigned xRes, unsigned yRes, unsigned totalXRes, unsigned totalYRes); + + /* + * CRender3D(void): + * ~CRender3D(void): + * + * Constructor and destructor. + */ + CNew3D(void); + ~CNew3D(void); + +private: + /* + * Private Members + */ + + // Real3D address translation + const UINT32 *TranslateCullingAddress(UINT32 addr); + const UINT32 *TranslateModelAddress(UINT32 addr); + + // Matrix stack + void MultMatrix(UINT32 matrixOffset, Mat4& mat); + void InitMatrixStack(UINT32 matrixBaseAddr, Mat4& mat); + + // Scene database traversal + bool DrawModel(UINT32 modelAddr); + void DescendCullingNode(UINT32 addr); + void DescendPointerList(UINT32 addr); + void DescendNodePtr(UINT32 nodeAddr); + void RenderViewport(UINT32 addr, int pri); + + // building the scene + void CacheModel(Model *m, const UINT32 *data); + void CopyVertexData(R3DPoly& r3dPoly, std::vector& polyArray); + + enum class AlphaType{ none, poly, texture }; + void RenderScene(int priority, bool alpha); + bool ClockWiseWinding(); // calculate winding with current matrix + + /* + * Data + */ + + // Stepping + int m_step; + int m_offset; // offset to subtract for words 3 and higher of culling nodes + float m_vertexFactor; // fixed-point conversion factor for vertices + + // Memory (passed from outside) + const UINT32 *m_cullingRAMLo; // 4 MB + const UINT32 *m_cullingRAMHi; // 1 MB + const UINT32 *m_polyRAM; // 4 MB + const UINT32 *m_vrom; // 64 MB + const UINT16 *m_textureRAM; // 8 MB + + // Resolution and scaling factors (to support resolutions higher than 496x384) and offsets + float m_xRatio, m_yRatio; + unsigned m_xOffs, m_yOffs; + unsigned m_totalXRes, m_totalYRes; + + // Real3D Base Matrix Pointer + const float *m_matrixBasePtr; + + TextureSheet m_texSheet; + NodeAttributes m_nodeAttribs; + Mat4 m_modelMat; // current modelview matrix + int m_listDepth; + + std::vector m_nodes; // build the scene + std::vector m_polyBuffer; // we actually hold the vertex data here, one buffer to send to opengl, instead of 2000+ small ones. + + VBO m_vboDynamic; // dynamic data from poly ram, rom polys can go in a different buffer + + R3DShader m_r3dShader; + int m_currentVPPriority; +}; + +} // New3D + +#endif // INCLUDED_NEW3D_H diff --git a/Src/Graphics/New3D/OglStateCache.cpp b/Src/Graphics/New3D/OglStateCache.cpp new file mode 100644 index 0000000..66f218b --- /dev/null +++ b/Src/Graphics/New3D/OglStateCache.cpp @@ -0,0 +1,2 @@ +namespace New3D { +} // New3D diff --git a/Src/Graphics/New3D/OglStateCache.h b/Src/Graphics/New3D/OglStateCache.h new file mode 100644 index 0000000..66f218b --- /dev/null +++ b/Src/Graphics/New3D/OglStateCache.h @@ -0,0 +1,2 @@ +namespace New3D { +} // New3D diff --git a/Src/Graphics/New3D/PolyHeader.cpp b/Src/Graphics/New3D/PolyHeader.cpp new file mode 100644 index 0000000..96fdea2 --- /dev/null +++ b/Src/Graphics/New3D/PolyHeader.cpp @@ -0,0 +1,320 @@ +#include "Supermodel.h" +#include "PolyHeader.h" + +namespace New3D { + +PolyHeader::PolyHeader() +{ + header = NULL; +} + +PolyHeader::PolyHeader(UINT32* h) +{ + header = h; +} + +void PolyHeader::operator = (const UINT32* h) +{ + header = (UINT32*)h; +} + +UINT32* PolyHeader::StartOfData() +{ + return header + 7; // 7 is size of header in bytes, data immediately follows +} + +bool PolyHeader::NextPoly() +{ + if (LastPoly()) { + return false; + } + + header += 7 + (NumVerts() - NumSharedVerts()) * 4; + + return true; +} + +int PolyHeader::NumPolysTotal() +{ + UINT32* start = header; // save start address + int count = 1; + + while (NextPoly()) { + count++; + } + + header = start; // restore start address + + return count; +} + +int PolyHeader::NumTrianglesTotal() +{ + if (header[6] == 0) { + return 0; // no poly data + } + + UINT32* start = header; // save start address + + int count = (NumVerts() == 4) ? 2 : 1; + + while (NextPoly()) { + count += (NumVerts() == 4) ? 2 : 1; + } + + header = start; // restore start address + + return count; +} + +// +// header 0 +// + +bool PolyHeader::Specular() +{ + return (header[0] & 0x800000000) > 0; +} + +int PolyHeader::PolyNumber() +{ + return (header[0] & 0x000FFFC00) >> 10; // not all programs pass this, instead they are set to 0 +} + +bool PolyHeader::Disabled() +{ + if ((header[0] & 0x100) && (header[0] & 0x200)) { // assuming these two bits mean z and colour writes are disabled + return true; + } + + return false; +} + +int PolyHeader::NumVerts() +{ + return (header[0] & 0x40) ? 4 : 3; +} + +int PolyHeader::NumSharedVerts() +{ + int num = 0; + + for (int i = 0; i < 4; i++) { + if (SharedVertex(i)) { + num++; + } + } + + return num; +} + +bool PolyHeader::SharedVertex(int vertex) +{ + UINT32 mask = 1 << vertex; + + return (header[0] & mask) > 0; +} + +// +// header 1 +// + +void PolyHeader::FaceNormal(float n[3]) +{ + n[0] = (float)(((INT32)header[1]) >> 8) * (1.0f / 4194304.0f); + n[1] = (float)(((INT32)header[2]) >> 8) * (1.0f / 4194304.0f); + n[2] = (float)(((INT32)header[3]) >> 8) * (1.0f / 4194304.0f); +} + +float PolyHeader::UVScale() +{ + return (header[1] & 0x40) ? 1.0f : (1.0f / 8.0f); +} + +bool PolyHeader::DoubleSided() +{ + return (header[1] & 0x10) ? true : false; +} + +bool PolyHeader::LastPoly() +{ + if ((header[1] & 4) > 0 || header[6] == 0) { + return true; + } + + return false; +} + +bool PolyHeader::PolyColor() +{ + return (header[1] & 2) > 0; +} + +// +// header 2 +// + +bool PolyHeader::TexUMirror() +{ + return (header[2] & 2) > 0; +} + +bool PolyHeader::TexVMirror() +{ + return (header[2] & 1) > 0; +} + +// +// header 3 + +int PolyHeader::TexWidth() +{ + return 32 << ((header[3] >> 3) & 7); +} +int PolyHeader::TexHeight() +{ + return 32 << ((header[3] >> 0) & 7); +} + +// +// header 4 +// + +void PolyHeader::Color(UINT8& r, UINT8& g, UINT8& b) +{ + r = (header[4] >> 24); + g = (header[4] >> 16) & 0xFF; + b = (header[4] >> 8) & 0xFF; +} + +int PolyHeader::Page() +{ + return (header[4] & 0x40) >> 6; +} + +// +// header 5 +// + +int PolyHeader::X(int textureXOffset) +{ + //==== + int x; + //==== + + x = (32 * (((header[4] & 0x1F) << 1) | ((header[5] >> 7) & 1))) + textureXOffset; + x &= 2047; + return x; +} + +int PolyHeader::Y(int textureYOffset) +{ + //======= + int y; + int page; + //======= + + if (Page()) { + page = 1024; + } + else { + page = 0; + } + + y = (32 * (header[5] & 0x1F) + page) + textureYOffset; // if we hit 2nd page add 1024 to y coordinate + y &= 2047; + + return y; +} + +// +// header 6 +// + +int PolyHeader::TexFormat() +{ + return (header[6] >> 7) & 7; +} + +bool PolyHeader::TexEnabled() +{ + return (header[6] & 0x04000000) > 0; +} + +bool PolyHeader::LightEnabled() +{ + return !(header[6] & 0x00010000); +} + +bool PolyHeader::AlphaTest() +{ + return (header[6] & 0x80000000) > 0; +} + +UINT8 PolyHeader::Transparency() +{ + return (UINT8)(((header[6] >> 18) & 0x1F) * 255.f / 0x1F); +} + +bool PolyHeader::FixedShading() +{ + return (header[6] & 0x2000000) > 0; +} + +UINT8 PolyHeader::ShadeValue() +{ + return (UINT8)(((header[6] >> 26) & 0x1F) * (255.f / 0x1F)); +} + +bool PolyHeader::PolyAlpha() +{ + return (header[6] & 0x00800000) == 0; +} + +bool PolyHeader::TextureAlpha() +{ + return (header[6] & 0x1); +} + +bool PolyHeader::StencilPoly() +{ + return (header[6] & 1000000) > 0; +} + +bool PolyHeader::Luminous() +{ + return (header[6] & 0x00010000) > 0; +} + +float PolyHeader::LightModifier() +{ + return (float)((header[6] >> 11) & 0x1F) * (1.0f / 31.0f); +} + +// +// misc +// + +UINT64 PolyHeader::Hash(int textureXOffset, int textureYOffset) +{ + UINT64 hash = 0; + + hash |= (header[2] & 3); // bits 0-1 uv mirror bits + hash |= ((header[3] >> 0) & 7) << 2; // bits 2-4 tex height + hash |= ((header[3] >> 3) & 7) << 5; // bits 5-7 tex width + hash |= X(textureXOffset) << 8; // bits 8-17 x offset + hash |= Y(textureYOffset) << 18; // bits 18-27 y offset + hash |= TexFormat() << 28; // bits 28-30 tex format + hash |= (UINT64)TexEnabled() << 31; // bits 31 textures enabled + hash |= (UINT64)LightEnabled() << 32; // bits 32 light enabled + hash |= (UINT64)DoubleSided() << 33; // bits 33 double sided + hash |= (UINT64)AlphaTest() << 34; // bits 34 contour processing + hash |= (UINT64)PolyAlpha() << 35; // bits 35 poly alpha processing + hash |= (UINT64)TextureAlpha() << 36; // bits 35 poly alpha processing + + //to do add the rest of the states + + return hash; +} + +} // New3D diff --git a/Src/Graphics/New3D/PolyHeader.h b/Src/Graphics/New3D/PolyHeader.h new file mode 100644 index 0000000..67ba32d --- /dev/null +++ b/Src/Graphics/New3D/PolyHeader.h @@ -0,0 +1,139 @@ +#ifndef _POLY_HEADER_H_ +#define _POLY_HEADER_H_ + +namespace New3D { + +/* +Polygon Data + +0x00: +x------- -------- -------- -------- Specular enable +-xxxxx-- -------- -------- -------- ? +------xx xxxxxxxx xxxxxx-- -------- Polygon number (not always present) +-------- -------- ------xx -------- Possibly disable z and colour writing +-------- -------- -------- -x------ 0 = Triangle, 1 = Quad +-------- -------- -------- ----x--- Vertex 3 shared from previous polygon +-------- -------- -------- -----x-- Vertex 2 shared from previous polygon +-------- -------- -------- ------x- Vertex 1 shared from previous polygon +-------- -------- -------- -------x Vertex 0 shared from previous polygon +-------- -------- -------- x-xx---- ? + +0x01: +xxxxxxxx xxxxxxxx xxxxxxxx-------- Polygon normal X coordinate(2.22 fixed point) +-------- -------- -------- -x------ UV scale (0 = 13.3, 1 = 16.0) +-------- -------- -------- ---x---- 1 = Double-sided polygon +-------- -------- -------- -----x-- If set, this is the last polygon +-------- -------- -------- ------x- Poly color, 1 = RGB, 0 = color table +-------- -------- -------- x-x-x--x ? + +0x02: +xxxxxxxx xxxxxxxx xxxxxxxx -------- Polygon normal Y coordinate(2.22 fixed point) +-------- -------- -------- ------x- Texture U mirror enable +-------- -------- -------- -------x Texture V mirror enable +-------- -------- -------- xxxxxx-- H/W also supports texture clamp/mirror, so guessing some of these bits must be that + +0x03: +xxxxxxxx xxxxxxxx xxxxxxxx -------- Polygon normal Z coordinate(2.22 fixed point) +-------- -------- -------- --xxx--- Texture width(in 8 - pixel tiles) +-------- -------- -------- ---- - xxx Texture height(in 8 - pixel tiles) + +0x04: +xxxxxxxx xxxxxxxx xxxxxxxx -------- Color(RGB888) +-------- -------- -------- -x------ Texture page +-------- -------- -------- ---xxxxx Upper 5 bits of texture U coordinate +-------- -------- -------- x------- ? +-------- -------- -------- --x----- ? + +0x05 : +xxxxxxxx xxxxxxxx xxxxxxxx -------- Specular color ? +-------- -------- -------- x------- Low bit of texture U coordinate +-------- -------- -------- ---xxxxx Low 5 bits of texture V coordinate +-------- -------- -------- -xx----- ? + +0x06: +x------- -------- -------- -------- Alpha testing / contour +-xxxxx-- -------- -------- -------- Fixed shading ? +------x- -------- -------- -------- Enable fixed shading ? +-------x -------- -------- -------- Possible stencil +-------- x------- -------- -------- 1 = disable transparency ? +-------- -xxxxx-- -------- -------- Polygon translucency(0 = fully transparent) +-------- -------x -------- -------- 1 = disable lighting +-------- -------- xxxxx--- -------- Polygon light modifier(Amount that a luminous polygon will burn through fog. Valid range is 0.0 to 1.0. 0.0 is completely fogged; 1.0 has no fog.) +-------- -------- -----x-- -------- Texture enable +-------- -------- ------xx x------- Texture format +-------- -------- -------- -------x Alpha enable ? +-------- ------x - ------- -------- Never seen set ? +-------- -------- -------- -----xx- Always set ? +-------- -------- -------- -xxxx--- ? +*/ + +class PolyHeader +{ +public: + PolyHeader(); + PolyHeader(UINT32* h); + + void operator =(const UINT32* h); + + UINT32* StartOfData(); + bool NextPoly(); + int NumPolysTotal(); // could be quads or triangles + int NumTrianglesTotal(); + + //header 0 + bool Specular(); + int PolyNumber(); + bool Disabled(); // z & colour disabled + int NumVerts(); + int NumSharedVerts(); + bool SharedVertex(int vertex); + + //header 1 + void FaceNormal(float n[3]); + float UVScale(); + bool DoubleSided(); + bool LastPoly(); + bool PolyColor(); // if false uses LUT from ram + + //header 2 + bool TexUMirror(); + bool TexVMirror(); + + // header 3 + int TexWidth(); + int TexHeight(); + + //header 4 + void Color(UINT8& r, UINT8& g, UINT8& b); + int Page(); + + // header 5 + int X(int textureXOffset); + int Y(int textureYOffset); + + //header 6 + int TexFormat(); + bool TexEnabled(); + bool LightEnabled(); + bool AlphaTest(); + UINT8 Transparency(); // 0-255 + bool FixedShading(); + UINT8 ShadeValue(); + bool PolyAlpha(); + bool TextureAlpha(); + bool StencilPoly(); + bool Luminous(); + float LightModifier(); + + // misc + UINT64 Hash(int textureXOffset, int textureYOffset); // make a unique hash for sorting by state + + + //============= + UINT32* header; + //============= +}; + +} // New3D + +#endif diff --git a/Src/Graphics/New3D/R3DShader.cpp b/Src/Graphics/New3D/R3DShader.cpp new file mode 100644 index 0000000..da723fb --- /dev/null +++ b/Src/Graphics/New3D/R3DShader.cpp @@ -0,0 +1,263 @@ +#include "R3DShader.h" +#include "Graphics/Shader.h" + +namespace New3D { + +static char *vertexShaderBasic = + +// uniforms +"uniform float fogIntensity;\n" +"uniform float fogDensity;\n" +"uniform float fogStart;\n" + +//outputs to fragment shader +"varying float fsFogFactor;\n" +"varying float fsSpecularTerm;\n" // specular light term (additive) +"varying float fsViewZ;\n" +"varying vec3 fsViewNormal;\n" // per vertex normal vector + +"void main(void)\n" +"{\n" + "vec3 viewVertex;\n" + + "viewVertex = vec3(gl_ModelViewMatrix * gl_Vertex);\n" + "fsViewNormal = normalize(gl_NormalMatrix*gl_Normal);\n" + "float z = length(viewVertex);\n" + "fsFogFactor = clamp(1.0 - fogIntensity*(fogStart + z*fogDensity), 0.0, 1.0);\n" + "fsViewZ = -viewVertex.z;\n" // convert Z from GL->Real3D convention (want +Z to be further into screen) + + "gl_FrontColor = gl_Color;\n" + "gl_TexCoord[0] = gl_MultiTexCoord0;\n" + "gl_Position = gl_ModelViewProjectionMatrix * gl_Vertex;\n" +"}\n"; + +static char *fragmentShaderBasic = + +"uniform sampler2D tex;\n" +"uniform int textureEnabled;\n" +"uniform int alphaTest;\n" +"uniform int textureAlpha;\n" +"uniform vec3 fogColour;\n" +"uniform vec4 spotEllipse;\n" // spotlight ellipse position: .x=X position (screen coordinates), .y=Y position, .z=half-width, .w=half-height) +"uniform vec2 spotRange;\n" // spotlight Z range: .x=start (viewspace coordinates), .y=limit +"uniform vec3 spotColor;\n" // spotlight RGB color +"uniform vec3 lighting[2];\n" // lighting state (lighting[0] = sun direction, lighting[1].x,y = diffuse, ambient intensities from 0-1.0) +"uniform int lightEnable;\n" // lighting enabled (1.0) or luminous (0.0), drawn at full intensity +"uniform float shininess;\n" // specular shininess (if >= 0.0) or disable specular lighting (negative) + +//interpolated inputs from vertex shader +"varying float fsFogFactor;\n" +"varying float fsSpecularTerm;\n" // specular light term (additive) +"varying float fsViewZ;\n" +"varying vec3 fsViewNormal;\n" // per vertex normal vector + +"void main()\n" +"{\n" + "vec4 texData;\n" + "vec4 colData;\n" + "vec4 finalData;\n" + + "texData = vec4(1.0, 1.0, 1.0, 1.0);\n" + + "if(textureEnabled==1) {\n" + "texData = texture2D( tex, gl_TexCoord[0].st);\n" + + "if (alphaTest==1) {\n" // does it make any sense to do this later? + "if (texData.a < (8.0/16.0)) {\n" + "discard;\n" + "}\n" + "}\n" + + "if (textureAlpha == 0) {\n" + "texData.a = 1.0;\n" + "}\n" + "}\n" + + "colData = gl_Color;\n" + + "finalData = texData * colData;\n" + "if (finalData.a < (1.0/16.0)) {\n" // basically chuck out any totally transparent pixels value = 1/16 the smallest transparency level h/w supports + "discard;\n" + "}\n" + + "vec3 lightIntensity;\n" + + "if (lightEnable==1)\n" + "{\n" + "vec3 sunVector;\n" // sun lighting vector (as reflecting away from vertex) + "float sunFactor;\n" // sun light projection along vertex normal (0.0 to 1.0) + + // Real3D -> OpenGL view space convention (TO-DO: do this outside of shader) + "sunVector = lighting[0] * vec3(1.0, -1.0, -1.0);\n" + + // Compute diffuse factor for sunlight + "sunFactor = max(dot(sunVector, fsViewNormal), 0.0);\n" + + // Total light intensity: sum of all components + "lightIntensity = vec3(sunFactor*lighting[1].x + lighting[1].y);\n" + "lightIntensity = clamp(lightIntensity,0.0,1.0);\n" + "}\n" + "else {\n" + "lightIntensity = vec3(1.0,1.0,1.0);\n" + "}\n" + + "finalData.rgb *= lightIntensity;\n" + + /* + "vec2 ellipse;\n" + "vec3 lightIntensity;\n" + "float insideSpot;\n" + + // Compute spotlight and apply lighting + "ellipse = (gl_FragCoord.xy - spotEllipse.xy) / spotEllipse.zw;\n" + "insideSpot = dot(ellipse, ellipse);\n" + + "if ((insideSpot <= 1.0) && (fsViewZ >= spotRange.x) && (fsViewZtextured != m_textured) { + glUniform1i(m_locTextureEnabled, m->textured); + m_textured = m->textured; + } + + if (m_dirty || m->alphaTest != m_alphaTest) { + glUniform1i(m_locAlphaTest, m->alphaTest); + m_alphaTest = m->alphaTest; + } + + if (m_dirty || m->textureAlpha != m_textureAlpha) { + glUniform1i(m_locTextureAlpha, m->textureAlpha); + m_textureAlpha = m->textureAlpha; + } + + if (m_dirty || m->fogIntensity != m_fogIntensity) { + glUniform1f(m_locFogIntensity, m->fogIntensity); + m_fogIntensity = m->fogIntensity; + } + + glUniform1i(m_locLightEnable, m->lighting); + glUniform1f(m_locShininess, 1); + + // technically not uniforms + if (m_dirty || m->doubleSided != m_doubleSided) { + m_doubleSided = m->doubleSided; + if (m_doubleSided) { + glDisable(GL_CULL_FACE); + } + else { + glEnable(GL_CULL_FACE); + } + } + + m_dirty = false; +} + +void R3DShader::SetViewportUniforms(const Viewport *vp) +{ + //didn't bother caching these, they don't get frequently called anyway + glUniform1f(m_locFogDensity, vp->fogParams[3]); + glUniform1f(m_locFogStart, vp->fogParams[4]); + glUniform3fv(m_locFogColour, 1, vp->fogParams); + + glUniform3fv(m_locLighting, 2, vp->lightingParams); + glUniform4fv(m_locSpotEllipse, 1, vp->spotEllipse); + glUniform2fv(m_locSpotRange, 1, vp->spotRange); + glUniform3fv(m_locSpotColor, 1, vp->spotColor); +} + +} // New3D diff --git a/Src/Graphics/New3D/R3DShader.h b/Src/Graphics/New3D/R3DShader.h new file mode 100644 index 0000000..6c5108f --- /dev/null +++ b/Src/Graphics/New3D/R3DShader.h @@ -0,0 +1,57 @@ +#ifndef _R3DSHADER_H_ +#define _R3DSHADER_H_ + +#include "Pkgs/glew.h" +#include "Model.h" + +namespace New3D { + +class R3DShader +{ +public: + R3DShader(); + + bool LoadShader(const char* vertexShader = nullptr, const char* fragmentShader = nullptr); + void SetMeshUniforms(const Mesh* m); + void SetViewportUniforms(const Viewport *vp); + void Start(); + void SetShader(bool enable = true); + +private: + + GLuint m_shaderProgram; + GLuint m_vertexShader; + GLuint m_fragmentShader; + + // mesh uniform data + GLint m_locTexture; + GLint m_locTextureEnabled; + GLint m_locTextureAlpha; + GLint m_locAlphaTest; + + bool m_textured; + bool m_textureAlpha; // use alpha in texture + bool m_alphaTest; // discard fragment based on alpha (ogl does this with fixed function) + float m_fogIntensity; + bool m_doubleSided; + + bool m_dirty; + + // viewport uniform data + GLint m_locFogIntensity; + GLint m_locFogDensity; + GLint m_locFogStart; + GLint m_locFogColour; + + // lighting + GLint m_locLighting; + GLint m_locLightEnable; + GLint m_locShininess; + GLint m_locSpotEllipse; + GLint m_locSpotRange; + GLint m_locSpotColor; +}; + +} // New3D + +#endif \ No newline at end of file diff --git a/Src/Graphics/New3D/Texture.cpp b/Src/Graphics/New3D/Texture.cpp new file mode 100644 index 0000000..6a9f2b5 --- /dev/null +++ b/Src/Graphics/New3D/Texture.cpp @@ -0,0 +1,264 @@ +#include "Texture.h" +#include +#include + +namespace New3D { + +Texture::Texture() +{ + Reset(); +} + +Texture::~Texture() +{ + DeleteTexture(); // make sure to have valid context before destroying +} + +void Texture::DeleteTexture() +{ + if (m_textureID) { + glDeleteTextures(1, &m_textureID); + printf("-----> deleting %i %i %i %i %i\n", m_format, m_x, m_y, m_width, m_height); + Reset(); + } +} + +void Texture::Reset() +{ + m_x = 0; + m_y = 0; + m_width = 0; + m_height = 0; + m_format = 0; + m_textureID = 0; + m_mirrorU = false; + m_mirrorV = false; +} + +void Texture::BindTexture() +{ + glBindTexture(GL_TEXTURE_2D, m_textureID); +} + +void Texture::GetCoordinates(UINT16 uIn, UINT16 vIn, float uvScale, float& uOut, float& vOut) +{ + uOut = ((uIn*uvScale)+0.0f) / m_width; + vOut = ((vIn*uvScale)+0.0f) / m_height; +} + +void Texture::SetWrapMode(bool mirrorU, bool mirrorV) +{ + if (mirrorU != m_mirrorU) { + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, mirrorU ? GL_MIRRORED_REPEAT : GL_REPEAT); + m_mirrorU = mirrorU; + } + + if (mirrorV != m_mirrorV) { + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, mirrorV ? GL_MIRRORED_REPEAT : GL_REPEAT); + m_mirrorV = mirrorV; + } +} + +UINT32 Texture::UploadTexture(const UINT16* src, UINT8* scratch, int format, bool mirrorU, bool mirrorV, int x, int y, int width, int height) +{ + int xi, yi, i; + GLubyte texel; + GLubyte c, a; + + if (!src || !scratch) { + return 0; // sanity checking + } + + DeleteTexture(); // free any existing texture + + i = 0; + + switch (format) + { + default: // Debug texture, use TEXTURE_DEBUG mask + for (yi = y; yi < (y + height); yi++) + { + for (xi = x; xi < (x + width); xi++) + { + scratch[i++] = 255; // R + scratch[i++] = 0; // G + scratch[i++] = 0; // B + scratch[i++] = 255; // A + } + } + break; + + case 0: // T1RGB5 <- correct + for (yi = y; yi < (y + height); yi++) + { + for (xi = x; xi < (x + width); xi++) + { + scratch[i++] = (GLubyte)(((src[yi * 2048 + xi] >> 10) & 0x1F) * 255.f / 0x1F); // R + scratch[i++] = (GLubyte)(((src[yi * 2048 + xi] >> 5) & 0x1F) * 255.f / 0x1F); // G + scratch[i++] = (GLubyte)(((src[yi * 2048 + xi] >> 0) & 0x1F) * 255.f / 0x1F); // B + scratch[i++] = ((src[yi * 2048 + xi] & 0x8000) ? 0 : 255); // T + } + } + break; + + case 1: // Interleaved A4L4 (low byte) + for (yi = y; yi < (y + height); yi++) + { + for (xi = x; xi < (x + width); xi++) + { + // Interpret as A4L4 + texel = src[yi * 2048 + xi] & 0xFF; + c = (texel & 0xF) * 17; + a = (texel >> 4) * 17; + scratch[i++] = c; + scratch[i++] = c; + scratch[i++] = c; + scratch[i++] = a; + } + } + break; + + case 2: // luminance alpha texture <- this one is correct + for (yi = y; yi < (y + height); yi++) + { + for (xi = x; xi < (x + width); xi++) + { + texel = src[yi * 2048 + xi] & 0xFF; + c = ((texel >> 4) & 0xF) * 17; + a = (texel & 0xF) * 17; + scratch[i++] = c; + scratch[i++] = c; + scratch[i++] = c; + scratch[i++] = a; + } + } + break; + + case 3: // Interleaved A4L4 (high byte) + for (yi = y; yi < (y + height); yi++) + { + for (xi = x; xi < (x + width); xi++) + { + texel = src[yi * 2048 + xi] >> 8; + c = (texel & 0xF) * 17; + a = (texel >> 4) * 17; + scratch[i++] = c; + scratch[i++] = c; + scratch[i++] = c; + scratch[i++] = a; + } + } + break; + + case 4: // 8-bit, L4A4 + + for (yi = y; yi < (y + height); yi++) + { + for (xi = x; xi < (x + width); xi++) + { + texel = src[yi * 2048 + xi] & 0xFF; + c = ((texel >> 4) & 0xF) * 17; // seems to work better in Lost World (raptor shadows) + a = (texel & 0xF) * 17; + scratch[i++] = c; + scratch[i++] = c; + scratch[i++] = c; + scratch[i++] = a; + } + } + break; + + case 5: // 8-bit grayscale + for (yi = y; yi < (y + height); yi++) + { + for (xi = x; xi < (x + width); xi++) + { + // Interpret as 8-bit grayscale + texel = src[yi * 2048 + xi] & 0xFF; + + scratch[i++] = texel; + scratch[i++] = texel; + scratch[i++] = texel; + scratch[i++] = 255; + } + } + break; + + case 6: // 8-bit grayscale <-- this one is correct + for (yi = y; yi < (y + height); yi++) + { + for (xi = x; xi < (x + width); xi++) + { + texel = src[yi * 2048 + xi] >> 8; + + scratch[i++] = texel; + scratch[i++] = texel; + scratch[i++] = texel; + scratch[i++] = 255; + } + } + break; + + case 7: // RGBA4 + for (yi = y; yi < (y + height); yi++) + { + for (xi = x; xi < (x + width); xi++) + { + scratch[i++] = ((src[yi * 2048 + xi] >> 12) & 0xF) * 17;// R + scratch[i++] = ((src[yi * 2048 + xi] >> 8) & 0xF) * 17; // G + scratch[i++] = ((src[yi * 2048 + xi] >> 4) & 0xF) * 17; // B + scratch[i++] = ((src[yi * 2048 + xi] >> 0) & 0xF) * 17; // A + } + } + break; + } + + //remove debug mask + format &= 7; + + GLfloat maxAnistrophy; + + glGetFloatv(GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT, &maxAnistrophy); + + if (maxAnistrophy > 8) { + maxAnistrophy = 8.0f; //anymore than 8 can get expensive for little gain + } + + glPixelStorei(GL_UNPACK_ALIGNMENT, 4); // rgba is always 4 byte aligned + glActiveTexture(GL_TEXTURE0); // activate correct texture unit + + glGenTextures(1, &m_textureID); + glBindTexture(GL_TEXTURE_2D, m_textureID); + + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, mirrorU ? GL_MIRRORED_REPEAT : GL_REPEAT); //todo this in shaders? + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, mirrorV ? GL_MIRRORED_REPEAT : GL_REPEAT); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, maxAnistrophy); + glTexParameteri(GL_TEXTURE_2D, GL_GENERATE_MIPMAP, GL_TRUE); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, scratch); + + // assuming successful we can copy details + + m_x = x; + m_y = y; + m_width = width; + m_height = height; + m_format = format; + m_mirrorU = mirrorU; + m_mirrorV = mirrorV; + + printf("create format %i x: %i y: %i width: %i height: %i\n", format, x, y, width, height); + + return m_textureID; +} + +void Texture::GetDetails(int& x, int&y, int& width, int& height, int& format) +{ + x = m_x; + y = m_y; + width = m_width; + height = m_height; + format = m_format; +} + +} // New3D diff --git a/Src/Graphics/New3D/Texture.h b/Src/Graphics/New3D/Texture.h new file mode 100644 index 0000000..52bfc9a --- /dev/null +++ b/Src/Graphics/New3D/Texture.h @@ -0,0 +1,42 @@ +#ifndef _TEXTURE_H_ +#define _TEXTURE_H_ + +#include "Types.h" +#include "Pkgs/glew.h" //arg + +namespace New3D { + +#define TEXTURE_DEBUG 0x8 +#define TEXTURE_DEBUG_MASK 0x7 + +class Texture +{ +public: + + Texture(); + ~Texture(); + + UINT32 UploadTexture (const UINT16* src, UINT8* scratch, int format, bool mirrorU, bool mirrorV, int x, int y, int width, int height); + void DeleteTexture (); + void BindTexture (); + void GetCoordinates (UINT16 uIn, UINT16 vIn, float uvScale, float& uOut, float& vOut); + void GetDetails (int& x, int&y, int& width, int& height, int& format); + void SetWrapMode (bool mirrorU, bool mirrorV); + +private: + + void Reset(); + + int m_x; + int m_y; + int m_width; + int m_height; + int m_format; + bool m_mirrorU; + bool m_mirrorV; + GLuint m_textureID; +}; + +} // New3D + +#endif diff --git a/Src/Graphics/New3D/TextureSheet.cpp b/Src/Graphics/New3D/TextureSheet.cpp new file mode 100644 index 0000000..9612fdd --- /dev/null +++ b/Src/Graphics/New3D/TextureSheet.cpp @@ -0,0 +1,104 @@ +#include "TextureSheet.h" + +namespace New3D { + +TextureSheet::TextureSheet() +{ + m_temp.resize(512 * 512 * 4); // temporay buffer for textures +} + +int TextureSheet::ToIndex(int x, int y) +{ + return (y * 2048) + x; +} + +std::shared_ptr TextureSheet::BindTexture(const UINT16* src, int format, bool mirrorU, bool mirrorV, int x, int y, int width, int height) +{ + //======== + int index; + //======== + + x &= 2047; + y &= 2047; + + if ((x + width) > 2048 || (y + height) > 2048) { + return 0; + } + + if (width > 512 || height > 512) { + return 0; + } + + index = ToIndex(x, y); + + if (m_texMap[format&TEXTURE_DEBUG_MASK].count(index) == 0) { + + //no textures at this position or format so add it to the map + + std::shared_ptr t(new Texture()); + m_texMap[format&TEXTURE_DEBUG_MASK].insert(std::pair>(index, t)); + t->UploadTexture(src, m_temp.data(), format, mirrorU, mirrorV, x, y, width, height); + return t; + } + else { + //scan for duplicates + //only texture width/height and wrap modes can change here. Since key is based on x/y pos, and each map is a separate format + + auto range = m_texMap[format&TEXTURE_DEBUG_MASK].equal_range(index); + + for (auto it = range.first; it != range.second; ++it) { + + int x2, y2, width2, height2, format2; + + it->second->GetDetails(x2, y2, width2, height2, format2); + + if (width == width2 && height == height2) { + return it->second; + } + } + + std::shared_ptr t(new Texture()); + m_texMap[format&TEXTURE_DEBUG_MASK].insert(std::pair>(index, t)); + t->UploadTexture(src, m_temp.data(), format, mirrorU, mirrorV, x, y, width, height); + + return t; + } +} + +void TextureSheet::Release() +{ + for (int i = 0; i < 8; i++) { + m_texMap[i].clear(); + } +} + +void TextureSheet::Invalidate(int x, int y, int width, int height) +{ + //========== + int count; + int sWidth; // sample width + int sHeight; // sample height + //========== + + // since the smallest sized texture is 32x32 pixels? + // we can invalidate 32x32 tiles over the width/height of the area + + sWidth = width / 32; + sHeight = height / 32; + count = sWidth * sHeight; + + for (int i = 0; i < count; i++) { + + int index = ToIndex(x + ((i%sWidth) * 32), y + ((i / sWidth) * 32)); + + for (int j = 0; j<8; j++) { + + if (m_texMap[j].count(index) > 0) { + + m_texMap[j].erase(index); + } + } + } +} + +} // New3D diff --git a/Src/Graphics/New3D/TextureSheet.h b/Src/Graphics/New3D/TextureSheet.h new file mode 100644 index 0000000..9f7a1d3 --- /dev/null +++ b/Src/Graphics/New3D/TextureSheet.h @@ -0,0 +1,38 @@ +#ifndef _TEXTURE_SHEET_H_ +#define _TEXTURE_SHEET_H_ + +#include "Types.h" +#include +#include +#include +#include "Texture.h" + +namespace New3D { + +#define REAL_SHEET_WIDTH 2048 +#define REAL_SHEET_HEIGHT 2048 + +class TextureSheet +{ +public: + TextureSheet(); + + std::shared_ptr BindTexture (const UINT16* src, int format, bool mirrorU, bool mirrorV, int x, int y, int width, int height); + void Invalidate (int x, int y, int width, int height); // release parts of the memory + void Release (); // release all texture objects and memory + +private: + + int ToIndex(int x, int y); + + std::unordered_multimap> m_texMap[8]; + + // the key for the above maps is the x/y position in the 2048x2048 texture + // array of 8 planes for each texture type + + std::vector m_temp; +}; + +} // New3D + +#endif \ No newline at end of file diff --git a/Src/Graphics/New3D/VBO.cpp b/Src/Graphics/New3D/VBO.cpp new file mode 100644 index 0000000..f2dec29 --- /dev/null +++ b/Src/Graphics/New3D/VBO.cpp @@ -0,0 +1,46 @@ +#include "VBO.h" + +namespace New3D { + +VBO::VBO() +{ + m_id = 0; + m_target = 0; +} + +void VBO::Create(GLenum target, GLenum usage, GLsizeiptr size, const void* data) +{ + glGenBuffers(1, &m_id); // create a vbo + glBindBuffer(target, m_id); // activate vbo id to use + glBufferData(target, size, data, usage); // upload data to video card + + m_target = target; + + Bind(false); // unbind +} + +void VBO::BufferSubData(GLintptr offset, GLsizeiptr size, const GLvoid* data) +{ + glBufferSubData(m_target, offset, size, data); +} + +void VBO::Destroy() +{ + if (m_id) { + glDeleteBuffers(1, &m_id); + m_id = 0; + m_target = 0; + } +} + +void VBO::Bind(bool enable) +{ + if (enable) { + glBindBuffer(m_target, m_id); + } + else { + glBindBuffer(m_target, 0); + } +} + +} // New3D diff --git a/Src/Graphics/New3D/VBO.h b/Src/Graphics/New3D/VBO.h new file mode 100644 index 0000000..2422632 --- /dev/null +++ b/Src/Graphics/New3D/VBO.h @@ -0,0 +1,25 @@ +#ifndef _VBO_H_ +#define _VBO_H_ + +#include "Pkgs/glew.h" + +namespace New3D { + +class VBO +{ +public: + VBO(); + + void Create (GLenum target, GLenum usage, GLsizeiptr size, const void* data=nullptr); + void BufferSubData (GLintptr offset, GLsizeiptr size, const GLvoid* data); + void Destroy (); + void Bind (bool enable); + +private: + GLuint m_id; + GLenum m_target; +}; + +} // New3D + +#endif diff --git a/Src/Graphics/New3D/Vec.cpp b/Src/Graphics/New3D/Vec.cpp new file mode 100644 index 0000000..57f1b91 --- /dev/null +++ b/Src/Graphics/New3D/Vec.cpp @@ -0,0 +1,237 @@ +#include +#include "Vec.h" +#include + +namespace New3D { + +static float fastSqrt(float number) { + long i; + float x, y; + const float f = 1.5F; + + x = number * 0.5F; + y = number; + i = * ( long * ) &y; + i = 0x5f375a86 - ( i >> 1 ); + y = * ( float * ) &i; + y = y * ( f - ( x * y * y ) ); + y = y * ( f - ( x * y * y ) ); + return number * y; +} + +static float fastInvSqrt(float x) +{ + float xhalf = 0.5f*x; + int i = *(int*)&x; // get bits for floating value + i = 0x5f375a86- (i>>1); // gives initial guess y0 + x = *(float*)&i; // convert bits back to float + x = x*(1.5f-xhalf*x*x); // Newton step, repeating increases accuracy + return x; +} + +void V3::subtract(const Vec3 a, const Vec3 b, Vec3 out) { + + out[0] = a[0] - b[0]; + out[1] = a[1] - b[1]; + out[2] = a[2] - b[2]; +} + +void V3::subtract(Vec3 a, const Vec3 b) { + + a[0] -= b[0]; + a[1] -= b[1]; + a[2] -= b[2]; +} + +void V3::add(const Vec3 a, const Vec3 b, Vec3 out) { + + out[0] = a[0] + b[0]; + out[1] = a[1] + b[1]; + out[2] = a[2] + b[2]; +} + +void V3::add(Vec3 a, const Vec3 b) { + + a[0] += b[0]; + a[1] += b[1]; + a[2] += b[2]; +} + +void V3::divide(Vec3 a, float number) { + + multiply(a,1/number); +} + +void V3::multiply(Vec3 a, float number) { + + a[0] *= number; + a[1] *= number; + a[2] *= number; +} + +void V3::multiply(Vec3 a, const Vec3 b) { + + a[0] *= b[0]; + a[1] *= b[1]; + a[2] *= b[2]; +} + +void V3::multiply(const Vec3 a, const Vec3 b, Vec3 out) { + + out[0] = a[0] * b[0]; + out[1] = a[1] * b[1]; + out[2] = a[2] * b[2]; +} + +void V3::crossProduct(const Vec3 v1, const Vec3 v2, Vec3 cross) { + + cross[0] = v1[1]*v2[2] - v1[2]*v2[1]; + cross[1] = v1[2]*v2[0] - v1[0]*v2[2]; + cross[2] = v1[0]*v2[1] - v1[1]*v2[0]; +} + +float V3::dotProduct(const Vec3 v1, const Vec3 v2) { + + return v1[0]*v2[0] + v1[1]*v2[1] + v1[2]*v2[2]; +} + +void V3::copy(const Vec3 in, Vec3 out) { + + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; +} + +void V3::inverse(Vec3 v) { + + v[0] = -v[0]; + v[1] = -v[1]; + v[2] = -v[2]; +} + +float V3::length(const Vec3 v) { + + //=========== + float length; + //=========== + + length = v[0]*v[0] + v[1]*v[1] + v[2]*v[2]; + length = sqrt(length); + + return length; +} + +void V3::normalise(Vec3 v) { + + //======== + float len; + //======== + + len = v[0]*v[0] + v[1]*v[1] + v[2]*v[2]; + len = fastInvSqrt(len); + + multiply(v,len); +} + +void V3::multiplyAdd(const Vec3 a, float scale, const Vec3 b, Vec3 out) { + + out[0] = a[0] + scale*b[0]; + out[1] = a[1] + scale*b[1]; + out[2] = a[2] + scale*b[2]; +} + +void V3::reset(Vec3 v) { + + v[0] = 0; + v[1] = 0; + v[2] = 0; +} + +void V3::set(Vec3 v, float value) { + + v[0] = value; + v[1] = value; + v[2] = value; +} + +void V3::set(Vec3 v, float x, float y, float z) { + + v[0] = x; + v[1] = y; + v[2] = z; +} + +void V3::reflect(const Vec3 a, const Vec3 b, Vec3 out) { + + //=========== + float temp; + Vec3 v; + //=========== + + //Vect2 = Vect1 - 2 * WallN * (WallN DOT Vect1) + + V3::copy(a,v); + + temp = V3::dotProduct(a,b) * 2; + + V3::multiply(v,temp); + V3::subtract(b,v,out); +} + +void V3::createNormal(const Vec3 a, const Vec3 b, const Vec3 c, Vec3 outNormal) { + + //====== + Vec3 v1; + Vec3 v2; + //====== + + V3::subtract (a,b,v1); + V3::subtract (c,b,v2); + V3::crossProduct(v1,v2,outNormal); +} + +void V3::_max(Vec3 a, const Vec3 compare) { + + if(a[0] < compare[0]) a[0] = compare[0]; + if(a[1] < compare[1]) a[1] = compare[1]; + if(a[2] < compare[2]) a[2] = compare[2]; +} + +void V3::_min(Vec3 a, const Vec3 compare) { + + if(a[0] > compare[0]) a[0] = compare[0]; + if(a[1] > compare[1]) a[1] = compare[1]; + if(a[2] > compare[2]) a[2] = compare[2]; +} + +bool V3::cmp(const Vec3 a, float b) { + + if(a[0]!=b) return false; + if(a[1]!=b) return false; + if(a[2]!=b) return false; + + return true; +} + +bool V3::cmp(const Vec3 a, const Vec3 b) { + + if(a[0]!=b[0]) return false; + if(a[1]!=b[1]) return false; + if(a[2]!=b[2]) return false; + + return true; +} + +void V3::clamp(Vec3 a, float _min, float _max) { + + if(a[0] < _min) a[0] = _min; + if(a[0] > _max) a[0] = _max; + + if(a[1] < _min) a[1] = _min; + if(a[1] > _max) a[1] = _max; + + if(a[2] < _min) a[2] = _min; + if(a[2] > _max) a[2] = _max; +} + +} // New3D diff --git a/Src/Graphics/New3D/Vec.h b/Src/Graphics/New3D/Vec.h new file mode 100644 index 0000000..cc38bd6 --- /dev/null +++ b/Src/Graphics/New3D/Vec.h @@ -0,0 +1,37 @@ +#ifndef VEC_H +#define VEC_H + +namespace New3D { +namespace V3 +{ + typedef float Vec3[3]; + + void subtract (const Vec3 a, const Vec3 b, Vec3 out); + void subtract (Vec3 a, const Vec3 b); + void add (const Vec3 a, const Vec3 b, Vec3 out); + void add (Vec3 a, const Vec3 b); + void divide (Vec3 a, float number); + void multiply (Vec3 a, float number); + void multiply (Vec3 a, const Vec3 b); + void multiply (const Vec3 a, const Vec3 b, Vec3 out); + void crossProduct (const Vec3 v1, const Vec3 v2, Vec3 cross); + float dotProduct (const Vec3 v1, const Vec3 v2); + void copy (const Vec3 in, Vec3 out); + void inverse (Vec3 v); + float length (const Vec3 v); + void normalise (Vec3 v); + void multiplyAdd (const Vec3 a, float scale, const Vec3 b, Vec3 out); + void reset (Vec3 v); + void set (Vec3 v, float value); + void set (Vec3 v, float x, float y, float z); + void reflect (const Vec3 a, const Vec3 b, Vec3 out); + void createNormal (const Vec3 a, const Vec3 b, const Vec3 c, Vec3 outNormal); // assume a,b,c are wound clockwise + void _max (Vec3 a, const Vec3 compare); + void _min (Vec3 a, const Vec3 compare); + bool cmp (const Vec3 a, float b); + bool cmp (const Vec3 a, const Vec3 b); + void clamp (Vec3 a, float _min, float _max); +} +} // New3D + +#endif \ No newline at end of file diff --git a/Src/Graphics/Render2D.cpp b/Src/Graphics/Render2D.cpp index 14757da..720e72c 100644 --- a/Src/Graphics/Render2D.cpp +++ b/Src/Graphics/Render2D.cpp @@ -710,7 +710,7 @@ void CRender2D::BeginFrame(void) // Top layers void CRender2D::EndFrame(void) -{ +{ // Display top surface Setup2D(false, false); glEnable(GL_BLEND); diff --git a/Src/Model3/Real3D.cpp b/Src/Model3/Real3D.cpp index e746927..8c92f80 100644 --- a/Src/Model3/Real3D.cpp +++ b/Src/Model3/Real3D.cpp @@ -107,6 +107,7 @@ void CReal3D::LoadState(CBlockFile *SaveState) } SaveState->Read(memoryPool, MEM_POOL_SIZE_RW); + // If multi-threaded, update read-only snapshots too if (g_Config.gpuMultiThreaded) UpdateSnapshots(true); @@ -706,7 +707,6 @@ void CReal3D::UploadTexture(UINT32 header, UINT16 *texData) unsigned x, y, page, width, height, bytesPerTexel, mipYPos, mipWidth, mipHeight, mipNum, mipX, mipY; - // Position: texture RAM is arranged as 2 2048x1024 texel sheets x = 32*(header&0x3F); y = 32*((header>>7)&0x1F); @@ -855,7 +855,7 @@ void CReal3D::WriteTexturePort(unsigned reg, UINT32 data) case 0x8: // VROM texture length (also used to trigger uploads) case 0x14: UploadTexture(vromTextureHeader,(UINT16 *)&vrom[vromTextureAddr&0xFFFFFF]); - //printf("texture upload: addr=%08X\n", vromTextureAddr); + //printf("texture upload: addr=%08X\n", vromTextureAddr); break; default: DebugLog("Real3D texture port write: %X=%08X\n", reg, data); @@ -969,7 +969,7 @@ void CReal3D::Reset(void) Configuration, Initialization, and Shutdown ******************************************************************************/ -void CReal3D::AttachRenderer(CRender3D *Render3DPtr) +void CReal3D::AttachRenderer(IRender3D *Render3DPtr) { Render3D = Render3DPtr; diff --git a/Src/Model3/Real3D.h b/Src/Model3/Real3D.h index f9ffc35..ee2bafd 100644 --- a/Src/Model3/Real3D.h +++ b/Src/Model3/Real3D.h @@ -319,7 +319,7 @@ public: void Reset(void); /* - * AttachRenderer(render2DPtr): + * AttachRenderer(render3DPtr): * * Attaches a 3D renderer for the Real3D to use. This function will * immediately pass along the information that a CRender3D object needs to @@ -328,7 +328,7 @@ public: * Parameters: * Render3DPtr Pointer to a 3D renderer object. */ - void AttachRenderer(CRender3D *Render3DPtr); + void AttachRenderer(IRender3D *Render3DPtr); /* * SetStep(stepID): @@ -388,7 +388,7 @@ private: UINT32 UpdateSnapshot(bool copyWhole, UINT8 *src, UINT8 *dst, unsigned size, UINT8 *dirty); // Renderer attached to the Real3D - CRender3D *Render3D; + IRender3D *Render3D; // Data passed from Model 3 object const UINT32 *vrom; // Video ROM diff --git a/Src/OSD/SDL/Main.cpp b/Src/OSD/SDL/Main.cpp index eb7f468..7d54cce 100644 --- a/Src/OSD/SDL/Main.cpp +++ b/Src/OSD/SDL/Main.cpp @@ -1,7 +1,7 @@ /** ** Supermodel ** A Sega Model 3 Arcade Emulator. - ** Copyright 2011-2012 Bart Trzynadlowski, Nik Henson + ** Copyright 2011-2016 Bart Trzynadlowski, Nik Henson ** ** This file is part of Supermodel. ** @@ -28,6 +28,7 @@ * ------------------------- * - Add UI keys for balance setting? * - 5.1 audio support? + * - Stretch video option * * Compile-Time Options * -------------------- @@ -216,10 +217,10 @@ static bool CreateGLScreen(const char *caption, unsigned *xOffsetPtr, unsigned * return ErrorLog("Unable to initialize SDL video subsystem: %s\n", SDL_GetError()); // Important GL attributes - SDL_GL_SetAttribute(SDL_GL_RED_SIZE,5); // need at least RGB555 for Model 3 textures - SDL_GL_SetAttribute(SDL_GL_GREEN_SIZE,5); - SDL_GL_SetAttribute(SDL_GL_BLUE_SIZE,5); - SDL_GL_SetAttribute(SDL_GL_DEPTH_SIZE,16); + SDL_GL_SetAttribute(SDL_GL_RED_SIZE,8); + SDL_GL_SetAttribute(SDL_GL_GREEN_SIZE,8); + SDL_GL_SetAttribute(SDL_GL_BLUE_SIZE,8); + SDL_GL_SetAttribute(SDL_GL_DEPTH_SIZE,24); SDL_GL_SetAttribute(SDL_GL_DOUBLEBUFFER,1); // Set vsync @@ -350,6 +351,87 @@ static void PrintGLInfo(bool createScreen, bool infoLog, bool printExtensions) else printf("\n"); } +static void PrintBAT(unsigned regu, unsigned regl) +{ +#ifdef DEBUG + UINT32 batu = ppc_read_spr(regu); + UINT32 batl = ppc_read_spr(regl); + UINT32 bepi = batu >> (31 - 14); + UINT32 bl = (batu >> (31 - 29)) & 0x7ff; + bool vs = batu & 2; + bool vp = batu & 1; + UINT32 brpn = batl >> (31 - 14); + UINT32 wimg = (batl >> (31 - 28)) & 0xf; + UINT32 pp = batl & 3; + UINT32 size = (bl + 1) * 128 * 1024; + UINT32 ea_base = bepi << (31 - 14); + UINT32 ea_limit = ea_base + size - 1; + UINT32 pa_base = brpn << (31 - 14); + UINT32 pa_limit = pa_base + size - 1; + printf("%08X-%08X -> %08X-%08X ", ea_base, ea_limit, pa_base, pa_limit); + printf("%c%c%c%c ", (wimg&8)?'W':'-', (wimg&4)?'I':'-', (wimg&2)?'M':'-', (wimg&1)?'G':'-'); + printf("PP="); + if (pp == 0) + printf("NA"); + else if (pp == 2) + printf("RW"); + else + printf("RO"); + printf(" Vs=%d Vp=%d", vs, vp); +#endif +} + +static void DumpPPCRegisters(CBus *bus) +{ +#ifdef DEBUG + for (int i = 0; i < 32; i += 4) + printf("R%d=%08X\tR%d=%08X\tR%d=%08X\tR%d=%08X\n", + i + 0, ppc_get_gpr(i + 0), + i + 1, ppc_get_gpr(i + 1), + i + 2, ppc_get_gpr(i + 2), + i + 3, ppc_get_gpr(i + 3)); + printf("PC =%08X\n", ppc_get_pc()); + printf("LR =%08X\n", ppc_get_lr()); + printf("DBAT0U=%08X\tIBAT0U=%08X\n", ppc_read_spr(SPR603E_DBAT0U), ppc_read_spr(SPR603E_IBAT0U)); + printf("DBAT0L=%08X\tIBAT0L=%08X\n", ppc_read_spr(SPR603E_DBAT0L), ppc_read_spr(SPR603E_IBAT0L)); + printf("DBAT1U=%08X\tIBAT1U=%08X\n", ppc_read_spr(SPR603E_DBAT1U), ppc_read_spr(SPR603E_IBAT1U)); + printf("DBAT1L=%08X\tIBAT1L=%08X\n", ppc_read_spr(SPR603E_DBAT1L), ppc_read_spr(SPR603E_IBAT1L)); + printf("DBAT2U=%08X\tIBAT2U=%08X\n", ppc_read_spr(SPR603E_DBAT2U), ppc_read_spr(SPR603E_IBAT2U)); + printf("DBAT2L=%08X\tIBAT2L=%08X\n", ppc_read_spr(SPR603E_DBAT2L), ppc_read_spr(SPR603E_IBAT2L)); + printf("DBAT3U=%08X\tIBAT3U=%08X\n", ppc_read_spr(SPR603E_DBAT3U), ppc_read_spr(SPR603E_IBAT3U)); + printf("DBAT3L=%08X\tIBAT3L=%08X\n", ppc_read_spr(SPR603E_DBAT3L), ppc_read_spr(SPR603E_IBAT3L)); + for (int i = 0; i < 10; i++) + printf("SR%d =%08X VSID=%06X\n", i, ppc_read_sr(i), ppc_read_sr(i) & 0x00ffffff); + for (int i = 10; i < 16; i++) + printf("SR%d=%08X VSID=%06X\n", i, ppc_read_sr(i), ppc_read_sr(i) & 0x00ffffff); + printf("SDR1=%08X\n", ppc_read_spr(SPR603E_SDR1)); + printf("\n"); + printf("DBAT0: "); PrintBAT(SPR603E_DBAT0U, SPR603E_DBAT0L); printf("\n"); + printf("DBAT1: "); PrintBAT(SPR603E_DBAT1U, SPR603E_DBAT1L); printf("\n"); + printf("DBAT2: "); PrintBAT(SPR603E_DBAT2U, SPR603E_DBAT2L); printf("\n"); + printf("DBAT3: "); PrintBAT(SPR603E_DBAT3U, SPR603E_DBAT3L); printf("\n"); + printf("IBAT0: "); PrintBAT(SPR603E_IBAT0U, SPR603E_IBAT0L); printf("\n"); + printf("IBAT1: "); PrintBAT(SPR603E_IBAT1U, SPR603E_IBAT1L); printf("\n"); + printf("IBAT2: "); PrintBAT(SPR603E_IBAT2U, SPR603E_IBAT2L); printf("\n"); + printf("IBAT3: "); PrintBAT(SPR603E_IBAT3U, SPR603E_IBAT3L); printf("\n"); + printf("\n"); + /* + printf("First PTEG:\n"); + UINT32 ptab = ppc_read_spr(SPR603E_SDR1) & 0xffff0000; + for (int i = 0; i < 65536/8; i++) + { + UINT64 pte = bus->Read64(ptab + i*8); + UINT32 vsid = (pte >> (32 + (31 - 24))) & 0x00ffffff; + UINT32 rpn = pte & 0xfffff000; + int wimg = (pte >> 3) & 0xf; + bool v = pte & 0x8000000000000000ULL; + printf(" %d: %016llX V=%d VSID=%06X RPN=%08X WIMG=%c%c%c%c\n", i, pte, v, vsid, rpn, (wimg&8)?'W':'-', (wimg&4)?'I':'-', (wimg&2)?'M':'-', (wimg&1)?'G':'-'); + } + */ + +#endif +} + /****************************************************************************** Configuration @@ -433,6 +515,7 @@ static void ApplySettings(CINIFile *INI, const char *section) #endif // SUPERMODEL_WIN32 // OSD + INI->Get(section, "New3DEngine", g_Config.new3DEngine); INI->Get(section, "XResolution", g_Config.xRes); INI->Get(section, "YResolution", g_Config.yRes); INI->Get(section, "FullScreen", g_Config.fullScreen); @@ -479,6 +562,7 @@ static void LogConfig(void) InfoLog("Program settings:"); // COSDConfig + InfoLog("\tNew3DEngine = %d", g_Config.new3DEngine); InfoLog("\tXResolution = %d", g_Config.xRes); InfoLog("\tYResolution = %d", g_Config.yRes); InfoLog("\tFullScreen = %d", g_Config.fullScreen); @@ -528,7 +612,7 @@ static void LogConfig(void) InfoLog("\tForceFeedback = %d", g_Config.forceFeedback); #endif - // CRender3DConfig + // CLegacy3DConfig InfoLog("\tVertexShader = %s", g_Config.vertexShaderFile.c_str()); InfoLog("\tFragmentShader = %s", g_Config.fragmentShaderFile.c_str()); @@ -800,8 +884,8 @@ int Supermodel(const char *zipFile, CInputs *Inputs, COutputs *Outputs, CINIFile CModel3 *Model3 = new CModel3(); #endif // SUPERMODEL_DEBUGGER char baseTitleStr[128], titleStr[128]; - CRender2D *Render2D = new CRender2D(); - CRender3D *Render3D = new CRender3D(); + CRender2D *Render2D; + IRender3D *Render3D; unsigned prevFPSTicks, currentFPSTicks, currentTicks, targetTicks, startTicks; unsigned fpsFramesElapsed, framesElapsed; bool gameHasLightguns = false; @@ -852,7 +936,9 @@ int Supermodel(const char *zipFile, CInputs *Inputs, COutputs *Outputs, CINIFile if (Outputs != NULL) Model3->AttachOutputs(Outputs); - // Initialize the renderer + // Initialize the renderers + Render2D = new CRender2D(); + Render3D = g_Config.new3DEngine ? ((IRender3D *) new New3D::CNew3D()) : ((IRender3D *) new Legacy3D::CLegacy3D()); if (OKAY != Render2D->Init(xOffset, yOffset, xRes, yRes, totalXRes, totalYRes)) goto QuitError; if (OKAY != Render3D->Init(xOffset, yOffset, xRes, yRes, totalXRes, totalYRes)) @@ -985,7 +1071,7 @@ int Supermodel(const char *zipFile, CInputs *Inputs, COutputs *Outputs, CINIFile // Recreate renderers and attach to the emulator Render2D = new CRender2D(); - Render3D = new CRender3D(); + Render3D = g_Config.new3DEngine ? ((IRender3D *) new New3D::CNew3D()) : ((IRender3D *) new Legacy3D::CLegacy3D()); if (OKAY != Render2D->Init(xOffset, yOffset, xRes, yRes, totalXRes, totalYRes)) goto QuitError; if (OKAY != Render3D->Init(xOffset, yOffset, xRes, yRes, totalXRes, totalYRes)) @@ -1204,33 +1290,6 @@ int Supermodel(const char *zipFile, CInputs *Inputs, COutputs *Outputs, CINIFile delete Render2D; delete Render3D; - // Dump PowerPC registers -#ifdef DEBUG - for (int i = 0; i < 32; i += 4) - printf("R%d=%08X\tR%d=%08X\tR%d=%08X\tR%d=%08X\n", - i + 0, ppc_get_gpr(i + 0), - i + 1, ppc_get_gpr(i + 1), - i + 2, ppc_get_gpr(i + 2), - i + 3, ppc_get_gpr(i + 3)); - printf("PC =%08X\n", ppc_get_pc()); - printf("LR =%08X\n", ppc_get_lr()); - /* - printf("DBAT0U=%08X\tIBAT0U=%08X\n", ppc_read_spr(SPR603E_DBAT0U), ppc_read_spr(SPR603E_IBAT0U)); - printf("DBAT0L=%08X\tIBAT0L=%08X\n", ppc_read_spr(SPR603E_DBAT0L), ppc_read_spr(SPR603E_IBAT0L)); - printf("DBAT1U=%08X\tIBAT1U=%08X\n", ppc_read_spr(SPR603E_DBAT1U), ppc_read_spr(SPR603E_IBAT1U)); - printf("DBAT1L=%08X\tIBAT1L=%08X\n", ppc_read_spr(SPR603E_DBAT1L), ppc_read_spr(SPR603E_IBAT1L)); - printf("DBAT2U=%08X\tIBAT2U=%08X\n", ppc_read_spr(SPR603E_DBAT2U), ppc_read_spr(SPR603E_IBAT2U)); - printf("DBAT2L=%08X\tIBAT2L=%08X\n", ppc_read_spr(SPR603E_DBAT2L), ppc_read_spr(SPR603E_IBAT2L)); - printf("DBAT3U=%08X\tIBAT3U=%08X\n", ppc_read_spr(SPR603E_DBAT3U), ppc_read_spr(SPR603E_IBAT3U)); - printf("DBAT3L=%08X\tIBAT3L=%08X\n", ppc_read_spr(SPR603E_DBAT3L), ppc_read_spr(SPR603E_IBAT3L)); - for (int i = 0; i < 10; i++) - printf("SR%d =%08X\n", i, ppc_read_sr(i)); - for (int i = 10; i < 16; i++) - printf("SR%d=%08X\n", i, ppc_read_sr(i)); - printf("SDR1=%08X\n", ppc_read_spr(SPR603E_SDR1)); - */ -#endif - return 0; // Quit with an error @@ -1316,7 +1375,7 @@ static int DisassembleCROM(const char *zipFile, UINT32 addr, unsigned n) static void Title(void) { puts("Supermodel: A Sega Model 3 Arcade Emulator (Version "SUPERMODEL_VERSION")"); - puts("Copyright (C) 2011-2012 by Bart Trzynadlowski and Nik Henson\n"); + puts("Copyright 2011-2016 by Bart Trzynadlowski and Nik Henson\n"); } // Print usage information @@ -1326,7 +1385,7 @@ static void Help(void) puts("ROM set must be a valid ZIP file containing a single game."); puts(""); puts("General Options:"); - puts(" -?, -h Print this help text"); + puts(" -?, -h, -help, --help Print this help text"); puts(" -print-games List supported games and quit"); puts(""); puts("Core Options:"); @@ -1346,8 +1405,10 @@ static void Help(void) puts(" -show-fps Display frame rate in window title bar"); puts(" -crosshairs= Crosshairs configuration for gun games:"); puts(" 0=none [Default], 1=P1 only, 2=P2 only, 3=P1 & P2"); - puts(" -vert-shader= Load 3D vertex shader from external file"); - puts(" -frag-shader= Load 3D fragment shader from external file"); + puts(" -legacy3d Legacy 3D engine [Default]"); + puts(" -new3d New 3D engine by Ian Curtis"); + puts(" -vert-shader= Load vertex shader from file (legacy 3D engine)"); + puts(" -frag-shader= Load fragment shader from file (legacy 3D engine)"); puts(" -print-gl-info Print OpenGL driver information and quit"); puts(""); puts("Audio Options:"); @@ -1371,7 +1432,7 @@ static void Help(void) puts(""); #ifdef SUPERMODEL_DEBUGGER puts("Debug Options:"); - puts(" -disable-debugger Completely disable debugger functionality"); + puts(" -disable-debugger Completely disable debugger functionality"); puts(" -enter-debugger Enter debugger at start of emulation"); puts(" -dis=[,n] Disassemble PowerPC code from CROM"); puts(""); @@ -1449,7 +1510,17 @@ int main(int argc, char **argv) CmdLine.SetDefaultSectionName("Global"); // command line settings are global-level for (i = 1; i < argc; i++) { - if (!strcmp(argv[i],"-h") || !strcmp(argv[i],"-?")) + if (!strcmp(argv[i],"-new3d")) + { + n = 1; + CmdLine.Set("Global", "New3DEngine", n); + } + else if (!strcmp(argv[i],"-legacy3d")) + { + n = 0; + CmdLine.Set("Global", "New3DEngine", n); + } + else if (!strcmp(argv[i],"-h") || !strcmp(argv[i],"-?") || !strcmp(argv[i],"-help") || !strcmp(argv[i],"--help")) { Help(); return 0; diff --git a/Src/OSD/SDL/OSDConfig.h b/Src/OSD/SDL/OSDConfig.h index 14befab..cec2b1b 100644 --- a/Src/OSD/SDL/OSDConfig.h +++ b/Src/OSD/SDL/OSDConfig.h @@ -1,7 +1,7 @@ /** ** Supermodel ** A Sega Model 3 Arcade Emulator. - ** Copyright 2011 Bart Trzynadlowski, Nik Henson + ** Copyright 2011-2016 Bart Trzynadlowski, Nik Henson ** ** This file is part of Supermodel. ** @@ -43,6 +43,7 @@ using namespace std; class COSDConfig { public: + bool new3DEngine; // New 3D engine unsigned xRes, yRes; // X and Y resolution, in pixels bool fullScreen; // Full screen mode (if true) bool wideScreen; // Wide screen hack @@ -135,6 +136,7 @@ public: // Defaults COSDConfig(void) { + new3DEngine = false; xRes = 496; yRes = 384; fullScreen = false;