From 453df4f5f551bb4e8ef95fa3198083c7180a23ed Mon Sep 17 00:00:00 2001 From: Bart Trzynadlowski Date: Mon, 20 Feb 2012 03:45:48 +0000 Subject: [PATCH] - Finished cleaning up and optimizing the 2D renderer. - Fixed up color offset register support for new 2D rendering system. Now maintains 2 computed palettes for layers A/A' and B/B'. - Fixed a minor bug in InitPalette(); VRAM was not being typecast properly. - Fixed specular lighting bug that occurred on some OpenGL drivers because integers were not being interpreted as floats in the vertex shader. - Began to update copyright date in some files. - Graphics modules now use the C++-style names for C standard library headers (e.g., stdio.h -> cstdio) consistent with the rest of Supermodel. --- Src/Graphics/Models.cpp | 4 +- Src/Graphics/Render2D.cpp | 342 ++++++++++-------- Src/Graphics/Render2D.h | 35 +- Src/Graphics/Render3D.cpp | 4 +- Src/Graphics/Shader.cpp | 4 +- Src/Graphics/Shaders/Fragment.glsl | 4 +- Src/Graphics/Shaders/Fragment2D.glsl | 4 +- Src/Graphics/Shaders/Fragment_MultiSheet.glsl | 2 +- Src/Graphics/Shaders/Vertex.glsl | 16 +- Src/Graphics/Shaders/Vertex2D.glsl | 2 +- Src/Graphics/Shaders2D.h | 140 ++++--- Src/Graphics/Shaders3D.h | 24 +- Src/Model3/TileGen.cpp | 185 ++++++++-- Src/Model3/TileGen.h | 22 +- Src/OSD/SDL/Main.cpp | 4 +- Src/Supermodel.h | 2 +- 16 files changed, 473 insertions(+), 321 deletions(-) diff --git a/Src/Graphics/Models.cpp b/Src/Graphics/Models.cpp index 4739a8d..d4b3bb5 100644 --- a/Src/Graphics/Models.cpp +++ b/Src/Graphics/Models.cpp @@ -34,8 +34,8 @@ * texture base coordinates are not re-decoded in two different places! */ -#include -#include +#include +#include #include "Supermodel.h" diff --git a/Src/Graphics/Render2D.cpp b/Src/Graphics/Render2D.cpp index e05a9e4..605e51d 100644 --- a/Src/Graphics/Render2D.cpp +++ b/Src/Graphics/Render2D.cpp @@ -1,7 +1,7 @@ /** ** Supermodel ** A Sega Model 3 Arcade Emulator. - ** Copyright 2011 Bart Trzynadlowski, Nik Henson + ** Copyright 2011-2012 Bart Trzynadlowski, Nik Henson ** ** This file is part of Supermodel. ** @@ -278,9 +278,12 @@ * Where 'r', 'g', and 'b' appear to be signed 8-bit color offsets. Because * they exceed the color resolution of the palette, they must be scaled * appropriately. + * + * Color offset registers are handled in TileGen.cpp. Two palettes are computed + * -- one for A/A' and another for B/B'. These are passed to the renderer. */ -#include +#include #include "Pkgs/glew.h" #include "Supermodel.h" #include "Graphics/Shaders2D.h" // fragment and vertex shaders @@ -300,7 +303,7 @@ ******************************************************************************/ // Draw 4-bit tile line, no clipping performed -void CRender2D::DrawTileLine4BitNoClip(UINT32 *buf, UINT16 tile, int tileLine) +void CRender2D::DrawTileLine4BitNoClip(UINT32 *buf, UINT16 tile, int tileLine, const UINT32 *pal) { unsigned tileOffset; // offset of tile pattern within VRAM unsigned palette; // color palette bits obtained from tile @@ -326,8 +329,8 @@ void CRender2D::DrawTileLine4BitNoClip(UINT32 *buf, UINT16 tile, int tileLine) *buf++ = pal[((pattern>>0)&0xF) | palette]; } -// Draw 8-bit tile line, clipped at left edge -void CRender2D::DrawTileLine8BitNoClip(UINT32 *buf, UINT16 tile, int tileLine) +// Draw 8-bit tile line, no clipping performed +void CRender2D::DrawTileLine8BitNoClip(UINT32 *buf, UINT16 tile, int tileLine, const UINT32 *pal) { unsigned tileOffset; // offset of tile pattern within VRAM unsigned palette; // color palette bits obtained from tile @@ -379,8 +382,9 @@ void CRender2D::DrawTileLine8BitNoClip(UINT32 *buf, UINT16 tile, int tileLine) * for this layer. * hScrollTable Pointer to the line-by-line horizontal scroll value * table for this layer. + * pal Palette to draw with. */ -void CRender2D::DrawLine(UINT32 *dest, int layerNum, int y, const UINT16 *nameTableBase) +void CRender2D::DrawLine(UINT32 *dest, int layerNum, int y, const UINT16 *nameTableBase, const UINT32 *pal) { // Determine the layer color depth (4 or 8-bit pixels) @@ -397,10 +401,10 @@ void CRender2D::DrawLine(UINT32 *dest, int layerNum, int y, const UINT16 *nameTa for (int tx = 0; tx < 64; tx += 4) { // Little endian: offsets 0,1,2,3 become 1,0,3,2 - DrawTileLine4BitNoClip(dest, nameTable[1], vOffset); dest += 8; - DrawTileLine4BitNoClip(dest, nameTable[0], vOffset); dest += 8; - DrawTileLine4BitNoClip(dest, nameTable[3], vOffset); dest += 8; - DrawTileLine4BitNoClip(dest, nameTable[2], vOffset); dest += 8; + DrawTileLine4BitNoClip(dest, nameTable[1], vOffset, pal); dest += 8; + DrawTileLine4BitNoClip(dest, nameTable[0], vOffset, pal); dest += 8; + DrawTileLine4BitNoClip(dest, nameTable[3], vOffset, pal); dest += 8; + DrawTileLine4BitNoClip(dest, nameTable[2], vOffset, pal); dest += 8; nameTable += 4; // next set of 4 tiles } } @@ -408,41 +412,28 @@ void CRender2D::DrawLine(UINT32 *dest, int layerNum, int y, const UINT16 *nameTa { for (int tx = 0; tx < 64; tx += 4) { - DrawTileLine8BitNoClip(dest, nameTable[1], vOffset); dest += 8; - DrawTileLine8BitNoClip(dest, nameTable[0], vOffset); dest += 8; - DrawTileLine8BitNoClip(dest, nameTable[3], vOffset); dest += 8; - DrawTileLine8BitNoClip(dest, nameTable[2], vOffset); dest += 8; + DrawTileLine8BitNoClip(dest, nameTable[1], vOffset, pal); dest += 8; + DrawTileLine8BitNoClip(dest, nameTable[0], vOffset, pal); dest += 8; + DrawTileLine8BitNoClip(dest, nameTable[3], vOffset, pal); dest += 8; + DrawTileLine8BitNoClip(dest, nameTable[2], vOffset, pal); dest += 8; nameTable += 4; } } } -void CRender2D::MixLine(UINT32 *dest, const UINT32 *src, int layerNum, int y, bool isBottom) +// Mix in the appropriate layer (add on top of current contents) with horizontal scrolling under control of the stencil mask +static void MixLine(UINT32 *dest, const UINT32 *src, int layerNum, int y, bool isBottom, const UINT16 *hScrollTable, const UINT16 *maskTableLine, int hFullScroll, bool lineScrollMode) { - /* - * Mix in the appropriate layer under control of the stencil mask, applying - * horizontal scrolling in theprocess - */ - - // Line scroll table - const UINT16 *hScrollTable = (UINT16 *) &vram[(0xF6000+layerNum*0x400)/4]; - - // Load horizontal full-screen scroll values and scroll mode - int hFullScroll = regs[0x60/4+layerNum]&0x3FF; - bool lineScrollMode = regs[0x60/4+layerNum]&0x8000; - - // Load horizontal scroll values + // Determine horizontal scroll values int hScroll; if (lineScrollMode) hScroll = hScrollTable[y]; else hScroll = hFullScroll; - // Get correct offset into mask table - const UINT16 *maskTable = (UINT16 *) &vram[0xF7000/4]; - maskTable += 2*y; + // Get correct mask table entry if (layerNum < 2) // little endian: layers A and A' use second word in each pair - ++maskTable; + ++maskTableLine; // Figure out what mask bit should be to mix in this layer UINT16 doCopy; @@ -452,13 +443,15 @@ void CRender2D::MixLine(UINT32 *dest, const UINT32 *src, int layerNum, int y, bo doCopy = 0x8000; // copy primary layer when mask is set // Mix first 60 tiles (4 at a time) - UINT16 mask = *maskTable; // mask for this line (each bit covers 4 tiles) - int i = hScroll&511; // line index (where to copy from) - for (int tx = 0; tx < 60; tx += 4) + UINT16 mask = *maskTableLine; // mask for this line (each bit covers 4 tiles) + int i = hScroll&511; // line index (where to copy from) + if (isBottom) { - // If bottom layer, we can copy without worrying about transparency, and must also write blank values when this layer is not showing - //TODO: move this test outside of loop - if (isBottom) + /* + * Bottom layers can be copied in without worrying about transparency + * but we must write blank values when layer is not showing. + */ + for (int tx = 0; tx < 60; tx += 4) { // Only copy pixels if the mask bit is appropriate for this layer type if ((mask&0x8000) == doCopy) @@ -486,36 +479,11 @@ void CRender2D::MixLine(UINT32 *dest, const UINT32 *src, int layerNum, int y, bo i &= 511; // wrap line boundaries dest += 32; } - } - else - { - // Copy while testing for transparencies - if ((mask&0x8000) == doCopy) - { - UINT32 p; - for (int k = 0; k < 32; k++) - { - i &= 511; - p = src[i++]; - if ((p>>24) != 0) // opaque pixel, put it down - *dest = p; - dest++; - } - } - else - { - i += 32; - i &= 511; - dest += 32; - } + + mask <<= 1; } - mask <<= 1; - } - - // Mix last two tiles - if (isBottom) - { + // Mix last two tiles if ((mask&0x8000) == doCopy) { for (int k = 0; k < 16; k++) @@ -535,6 +503,34 @@ void CRender2D::MixLine(UINT32 *dest, const UINT32 *src, int layerNum, int y, bo } else { + /* + * Subsequent layers must test for transparency while mixing. + */ + for (int tx = 0; tx < 60; tx += 4) + { + if ((mask&0x8000) == doCopy) + { + UINT32 p; + for (int k = 0; k < 32; k++) + { + i &= 511; + p = src[i++]; + if ((p>>24) != 0) // opaque pixel, put it down + *dest = p; + dest++; + } + } + else + { + i += 32; + i &= 511; + dest += 32; + } + + mask <<= 1; + } + + if ((mask&0x8000) == doCopy) { UINT32 p; @@ -552,64 +548,127 @@ void CRender2D::MixLine(UINT32 *dest, const UINT32 *src, int layerNum, int y, bo void CRender2D::DrawTilemaps(UINT32 *destBottom, UINT32 *destTop) { - // Base address of all 4 name tables - const UINT16 *nameTableBase[4]; - nameTableBase[0] = (UINT16 *) &vram[(0xF8000+0*0x2000)/4]; // A - nameTableBase[1] = (UINT16 *) &vram[(0xF8000+1*0x2000)/4]; // A' - nameTableBase[2] = (UINT16 *) &vram[(0xF8000+2*0x2000)/4]; // B - nameTableBase[3] = (UINT16 *) &vram[(0xF8000+3*0x2000)/4]; // B' + /* + * Precompute data needed for each layer + */ + const UINT16 *nameTableBase[4]; + const UINT16 *hScrollTable[4]; + const UINT16 *maskTableLine = (UINT16 *) &vram[0xF7000/4]; // start at line 0 + int hFullScroll[4]; + bool lineScrollMode[4]; - // Render and mix each line - for (int y = 0; y < 384; y++) + for (int i = 0; i < 4; i++) // 0=A, 1=A', 2=B, 3=B' { - // Draw each layer - DrawLine(lineBuffer[0], 0, y, nameTableBase[0]); - DrawLine(lineBuffer[1], 1, y, nameTableBase[1]); - DrawLine(lineBuffer[2], 2, y, nameTableBase[2]); - DrawLine(lineBuffer[3], 3, y, nameTableBase[3]); - - //TODO: could probably further optimize: only have a single layer clear masked-out areas, then if alt. layer is being written to same place, don't bother worrying about transparencies if directly on top - // Combine according to priority settings - // NOTE: question mark indicates unobserved and therefore unknown - switch ((regs[0x20/4]>>8)&0xF) - { - case 0x5: // top: A, B, A'? bottom: B' - MixLine(destBottom, lineBuffer[3], 3, y, true); - MixLine(destTop, lineBuffer[2], 2, y, true); - MixLine(destTop, lineBuffer[0], 0, y, false); - MixLine(destTop, lineBuffer[1], 1, y, false); - break; - case 0x9: // ? all layers on top but relative order unknown (Spikeout Final Edition, after first boss) - memset(destBottom, 0, 496*sizeof(UINT32)); //TODO: use glClear(GL_COLOR_BUFFER_BIT) if there is no bottom layer - MixLine(destTop, lineBuffer[2], 2, y, true); - MixLine(destTop, lineBuffer[3], 3, y, false); - MixLine(destTop, lineBuffer[1], 1, y, false); - MixLine(destTop, lineBuffer[0], 0, y, false); - break; - case 0xF: // all on top - memset(destBottom, 0, 496*sizeof(UINT32)); //TODO: use glClear(GL_COLOR_BUFFER_BIT) if there is no bottom layer - MixLine(destTop, lineBuffer[2], 2, y, true); - MixLine(destTop, lineBuffer[3], 3, y, false); - MixLine(destTop, lineBuffer[0], 0, y, false); - MixLine(destTop, lineBuffer[1], 1, y, false); - break; - case 0x7: // top: A, B bottom: A'?, B' - MixLine(destBottom, lineBuffer[3], 3, y, true); - MixLine(destBottom, lineBuffer[1], 1, y, false); - MixLine(destTop, lineBuffer[2], 2, y, true); - MixLine(destTop, lineBuffer[0], 0, y, false); - break; - default: // unknown, use A and A' on top, B and B' on the bottom - MixLine(destBottom, lineBuffer[2], 2, y, true); - MixLine(destBottom, lineBuffer[3], 3, y, false); - MixLine(destTop, lineBuffer[0], 0, y, true); - MixLine(destTop, lineBuffer[1], 1, y, false); - break; - } + // Base of name table + nameTableBase[i] = (UINT16 *) &vram[(0xF8000+i*0x2000)/4]; - // Advance to next line in output surfaces - destBottom += 496; - destTop += 496; + // Horizontal line scroll tables + hScrollTable[i] = (UINT16 *) &vram[(0xF6000+i*0x400)/4]; + + // Load horizontal full-screen scroll values and scroll mode + hFullScroll[i] = regs[0x60/4+i]&0x3FF; + lineScrollMode[i] = regs[0x60/4+i]&0x8000; + } + + /* + * Precompute layer mixing order + */ + UINT32 *dest[4]; + const UINT32 *src[4]; + int sortedLayerNum[4]; + bool sortedIsBottom[4]; + const UINT16 *sortedHScrollTable[4]; + int sortedHFullScroll[4]; + bool sortedLineScrollMode[4]; + bool clearBottom; // when true, no layer assigned to bottom surface + + switch ((regs[0x20/4]>>8)&0xF) + { + case 0x5: // top: A, B, A'? bottom: B' + clearBottom = false; + dest[0]=destBottom; src[0]=lineBuffer[3]; sortedLayerNum[0]=3; sortedIsBottom[0]=true; sortedHScrollTable[0] = hScrollTable[3]; sortedHFullScroll[0]=hFullScroll[3]; sortedLineScrollMode[0]=lineScrollMode[3]; + dest[1]=destTop; src[1]=lineBuffer[2]; sortedLayerNum[1]=2; sortedIsBottom[1]=true; sortedHScrollTable[1] = hScrollTable[2]; sortedHFullScroll[1]=hFullScroll[2]; sortedLineScrollMode[1]=lineScrollMode[2]; + dest[2]=destTop; src[2]=lineBuffer[0]; sortedLayerNum[2]=0; sortedIsBottom[2]=false; sortedHScrollTable[2] = hScrollTable[0]; sortedHFullScroll[2]=hFullScroll[0]; sortedLineScrollMode[2]=lineScrollMode[0]; + dest[3]=destTop; src[3]=lineBuffer[1]; sortedLayerNum[3]=1; sortedIsBottom[3]=false; sortedHScrollTable[3] = hScrollTable[1]; sortedHFullScroll[3]=hFullScroll[1]; sortedLineScrollMode[3]=lineScrollMode[1]; + break; + case 0x9: // ? all layers on top but relative order unknown (Spikeout Final Edition, after first boss) + clearBottom = true; + dest[0]=destTop; src[0]=lineBuffer[2]; sortedLayerNum[0]=2; sortedIsBottom[0]=true; sortedHScrollTable[0] = hScrollTable[2]; sortedHFullScroll[0]=hFullScroll[2]; sortedLineScrollMode[0]=lineScrollMode[3]; + dest[1]=destTop; src[1]=lineBuffer[3]; sortedLayerNum[1]=3; sortedIsBottom[1]=false; sortedHScrollTable[1] = hScrollTable[3]; sortedHFullScroll[1]=hFullScroll[3]; sortedLineScrollMode[1]=lineScrollMode[2]; + dest[2]=destTop; src[2]=lineBuffer[1]; sortedLayerNum[2]=1; sortedIsBottom[2]=false; sortedHScrollTable[2] = hScrollTable[1]; sortedHFullScroll[2]=hFullScroll[1]; sortedLineScrollMode[2]=lineScrollMode[1]; + dest[3]=destTop; src[3]=lineBuffer[0]; sortedLayerNum[3]=0; sortedIsBottom[3]=false; sortedHScrollTable[3] = hScrollTable[0]; sortedHFullScroll[3]=hFullScroll[0]; sortedLineScrollMode[3]=lineScrollMode[0]; + break; + case 0xF: // all on top + clearBottom = true; + dest[0]=destTop; src[0]=lineBuffer[2]; sortedLayerNum[0]=2; sortedIsBottom[0]=true; sortedHScrollTable[0] = hScrollTable[2]; sortedHFullScroll[0]=hFullScroll[2]; sortedLineScrollMode[0]=lineScrollMode[2]; + dest[1]=destTop; src[1]=lineBuffer[3]; sortedLayerNum[1]=3; sortedIsBottom[1]=false; sortedHScrollTable[1] = hScrollTable[3]; sortedHFullScroll[1]=hFullScroll[3]; sortedLineScrollMode[1]=lineScrollMode[3]; + dest[2]=destTop; src[2]=lineBuffer[0]; sortedLayerNum[2]=0; sortedIsBottom[2]=false; sortedHScrollTable[2] = hScrollTable[0]; sortedHFullScroll[2]=hFullScroll[0]; sortedLineScrollMode[2]=lineScrollMode[0]; + dest[3]=destTop; src[3]=lineBuffer[1]; sortedLayerNum[3]=1; sortedIsBottom[3]=false; sortedHScrollTable[3] = hScrollTable[1]; sortedHFullScroll[3]=hFullScroll[1]; sortedLineScrollMode[3]=lineScrollMode[1]; + break; + case 0x7: // top: A, B bottom: A'?, B' + clearBottom = false; + dest[0]=destBottom; src[0]=lineBuffer[3]; sortedLayerNum[0]=3; sortedIsBottom[0]=true; sortedHScrollTable[0] = hScrollTable[3]; sortedHFullScroll[0]=hFullScroll[3]; sortedLineScrollMode[0]=lineScrollMode[3]; + dest[1]=destBottom; src[1]=lineBuffer[1]; sortedLayerNum[1]=1; sortedIsBottom[1]=false; sortedHScrollTable[1] = hScrollTable[1]; sortedHFullScroll[1]=hFullScroll[1]; sortedLineScrollMode[1]=lineScrollMode[1]; + dest[2]=destTop; src[2]=lineBuffer[2]; sortedLayerNum[2]=2; sortedIsBottom[2]=true; sortedHScrollTable[2] = hScrollTable[2]; sortedHFullScroll[2]=hFullScroll[2]; sortedLineScrollMode[2]=lineScrollMode[2]; + dest[3]=destTop; src[3]=lineBuffer[0]; sortedLayerNum[3]=0; sortedIsBottom[3]=false; sortedHScrollTable[3] = hScrollTable[0]; sortedHFullScroll[3]=hFullScroll[0]; sortedLineScrollMode[3]=lineScrollMode[0]; + break; + default: // unknown, use A and A' on top, B and B' on the bottom + clearBottom = false; + dest[0]=destBottom; src[0]=lineBuffer[2]; sortedLayerNum[0]=2; sortedIsBottom[0]=true; sortedHScrollTable[0] = hScrollTable[2]; sortedHFullScroll[0]=hFullScroll[2]; sortedLineScrollMode[0]=lineScrollMode[2]; + dest[1]=destBottom; src[1]=lineBuffer[3]; sortedLayerNum[1]=3; sortedIsBottom[1]=false; sortedHScrollTable[1] = hScrollTable[3]; sortedHFullScroll[1]=hFullScroll[3]; sortedLineScrollMode[1]=lineScrollMode[3]; + dest[2]=destTop; src[2]=lineBuffer[0]; sortedLayerNum[2]=0; sortedIsBottom[2]=true; sortedHScrollTable[2] = hScrollTable[0]; sortedHFullScroll[2]=hFullScroll[0]; sortedLineScrollMode[2]=lineScrollMode[0]; + dest[3]=destTop; src[3]=lineBuffer[1]; sortedLayerNum[3]=1; sortedIsBottom[3]=false; sortedHScrollTable[3] = hScrollTable[1]; sortedHFullScroll[3]=hFullScroll[1]; sortedLineScrollMode[3]=lineScrollMode[1]; + break; + } + + /* + * Render and mix each line + */ + if (clearBottom) + { + for (int y = 0; y < 384; y++) + { + // Draw one scanline from each layer + DrawLine(lineBuffer[0], 0, y, nameTableBase[0], pal[0]); + DrawLine(lineBuffer[1], 1, y, nameTableBase[1], pal[0]); + DrawLine(lineBuffer[2], 2, y, nameTableBase[2], pal[1]); + DrawLine(lineBuffer[3], 3, y, nameTableBase[3], pal[1]); + + // No bottom layer + memset(destBottom, 0, 496*sizeof(UINT32)); + + // Mix the layers in the correct order + for (int i = 0; i < 4; i++) + { + MixLine(dest[i], src[i], sortedLayerNum[i], y, sortedIsBottom[i], sortedHScrollTable[i], maskTableLine, sortedHFullScroll[i], sortedLineScrollMode[i]); + dest[i] += 496; // next line + } + + // Next line in mask table + maskTableLine += 2; + } + } + else + { + for (int y = 0; y < 384; y++) + { + // Draw one scanline from each layer + DrawLine(lineBuffer[0], 0, y, nameTableBase[0], pal[0]); + DrawLine(lineBuffer[1], 1, y, nameTableBase[1], pal[0]); + DrawLine(lineBuffer[2], 2, y, nameTableBase[2], pal[1]); + DrawLine(lineBuffer[3], 3, y, nameTableBase[3], pal[1]); + + // Mix the layers in the correct order + for (int i = 0; i < 4; i++) + { + MixLine(dest[i], src[i], sortedLayerNum[i], y, sortedIsBottom[i], sortedHScrollTable[i], maskTableLine, sortedHFullScroll[i], sortedLineScrollMode[i]); + dest[i] += 496; // next line + } + + // Next line in mask table + maskTableLine += 2; + } + } } @@ -624,11 +683,8 @@ void CRender2D::DisplaySurface(int surface, GLfloat z) // If bottom surface and wide screen, clear overscan areas if (surface && g_Config.wideScreen) { - UINT32 c = pal[0]; // just use palette color 0 for now (not the best solution, it's usually black) - GLfloat r = (GLfloat)(c&0xFF) / 255.0f; - GLfloat g = (GLfloat)((c>>8)&0xFF) / 255.0f; - GLfloat b = (GLfloat)((c>>16)&0xFF) / 255.0f; - glClearColor(r, g, b, 0.0); + // For now, clear w/ black (may want to use color 0 later) + glClearColor(0.0, 0.0, 0.0, 0.0); glViewport(0, 0, xOffs, totalYPixels); glClear(GL_COLOR_BUFFER_BIT); glViewport(xOffs+xPixels, 0, totalXPixels, totalYPixels); @@ -670,31 +726,9 @@ void CRender2D::Setup2D(void) glUseProgram(shaderProgram); } -// Convert color offset register data to RGB -void CRender2D::ColorOffset(GLfloat colorOffset[3], UINT32 reg) -{ - INT8 ir, ig, ib; - - ib = (reg>>16)&0xFF; - ig = (reg>>8)&0xFF; - ir = (reg>>0)&0xFF; - - /* - * Uncertain how these should be interpreted. It appears to be signed, - * which means the values range from -128 to +127. The division by 128 - * normalizes this to roughly -1,+1. - */ - colorOffset[0] = (GLfloat) ir * (1.0f/128.0f); - colorOffset[1] = (GLfloat) ig * (1.0f/128.0f); - colorOffset[2] = (GLfloat) ib * (1.0f/128.0f); - //printf("%08X -> %g,%g,%g\n", reg, colorOffset[2], colorOffset[1], colorOffset[0]); -} - // Bottom layers void CRender2D::BeginFrame(void) { - GLfloat colorOffset[3]; - // Update all layers DrawTilemaps(surfBottom, surfTop); glActiveTexture(GL_TEXTURE0); // texture unit 0 @@ -705,21 +739,15 @@ void CRender2D::BeginFrame(void) // Display bottom surface Setup2D(); - ColorOffset(colorOffset, regs[0x44/4]); - glUniform3fv(colorOffsetLoc, 1, colorOffset); DisplaySurface(1, 0.0); } // Top layers void CRender2D::EndFrame(void) { - GLfloat colorOffset[3]; - // Display top surface Setup2D(); glEnable(GL_BLEND); - ColorOffset(colorOffset, regs[0x40/4]); - glUniform3fv(colorOffsetLoc, 1, colorOffset); DisplaySurface(0, -0.5); } @@ -745,9 +773,10 @@ void CRender2D::AttachRegisters(const UINT32 *regPtr) DebugLog("Render2D attached registers\n"); } -void CRender2D::AttachPalette(const UINT32 *palPtr) +void CRender2D::AttachPalette(const UINT32 *palPtr[2]) { - pal = palPtr; + pal[0] = palPtr[0]; + pal[1] = palPtr[1]; DebugLog("Render2D attached palette\n"); } @@ -775,7 +804,6 @@ bool CRender2D::Init(unsigned xOffset, unsigned yOffset, unsigned xRes, unsigned glUseProgram(shaderProgram); // bind program textureMapLoc = glGetUniformLocation(shaderProgram, "textureMap"); glUniform1i(textureMapLoc,0); // attach it to texture unit 0 - colorOffsetLoc = glGetUniformLocation(shaderProgram, "colorOffset"); // Allocate memory for layer surfaces memoryPool = new(std::nothrow) UINT8[MEMORY_POOL_SIZE]; diff --git a/Src/Graphics/Render2D.h b/Src/Graphics/Render2D.h index 496cf81..f0e64bf 100644 --- a/Src/Graphics/Render2D.h +++ b/Src/Graphics/Render2D.h @@ -1,7 +1,7 @@ /** ** Supermodel ** A Sega Model 3 Arcade Emulator. - ** Copyright 2011 Bart Trzynadlowski, Nik Henson + ** Copyright 2011-2012 Bart Trzynadlowski, Nik Henson ** ** This file is part of Supermodel. ** @@ -82,12 +82,22 @@ public: * rendering otherwise the program may crash with an access violation. * * Parameters: - * regPtr Pointer to the base of the tile generator registers. - * There are assumed to be 64 in all. + * regPtr Pointer to the base of the tile generator registers. There + * are assumed to be 64 in all. */ void AttachRegisters(const UINT32 *regPtr); - void AttachPalette(const UINT32 *palPtr); + /* + * AttachPalette(palPtr): + * + * Attaches tile generator palettes. This must be done prior to any + * rendering. + * + * Parameters: + * palPtr Pointer to two palettes. The first is for layers A/A' and + * the second is for B/B'. + */ + void AttachPalette(const UINT32 *palPtr[2]); /* * AttachVRAM(vramPtr): @@ -134,22 +144,16 @@ public: private: // Private member functions - void DrawTileLine8BitNoClip(UINT32 *buf, UINT16 tile, int tileLine); - void DrawTileLine4BitNoClip(UINT32 *buf, UINT16 tile, int tileLine); - void DrawTileLine4Bit(UINT32 *buf, int offset, UINT16 tile, int tileLine); - void DrawTileLine4BitRightClip(UINT32 *buf, int offset, UINT16 tile, int tileLine, int numPixels); - void DrawTileLine8Bit(UINT32 *buf, int offset, UINT16 tile, int tileLine); - void DrawTileLine8BitRightClip(UINT32 *buf, int offset, UINT16 tile, int tileLine, int numPixels); - void DrawLine(UINT32 *dest, int layerNum, int y, const UINT16 *nameTableBase); - void MixLine(UINT32 *dest, const UINT32 *src, int layerNum, int y, bool isBottom); + void DrawTileLine8BitNoClip(UINT32 *buf, UINT16 tile, int tileLine, const UINT32 *pal); + void DrawTileLine4BitNoClip(UINT32 *buf, UINT16 tile, int tileLine, const UINT32 *pal); + void DrawLine(UINT32 *dest, int layerNum, int y, const UINT16 *nameTableBase, const UINT32 *pal); void DrawTilemaps(UINT32 *destBottom, UINT32 *destTop); void DisplaySurface(int surface, GLfloat z); void Setup2D(void); - void ColorOffset(GLfloat colorOffset[3], UINT32 reg); - + // Data received from tile generator device object const UINT32 *vram; - const UINT32 *pal; + const UINT32 *pal[2]; // palettes for A/A' and B/B' const UINT32 *regs; // OpenGL data @@ -163,7 +167,6 @@ private: GLuint vertexShader; // vertex shader handle GLuint fragmentShader; // fragment shader GLuint textureMapLoc; // location of "textureMap" uniform - GLuint colorOffsetLoc; // uniform // Buffers UINT8 *memoryPool; // all memory is allocated here diff --git a/Src/Graphics/Render3D.cpp b/Src/Graphics/Render3D.cpp index 729fc02..02fe420 100644 --- a/Src/Graphics/Render3D.cpp +++ b/Src/Graphics/Render3D.cpp @@ -1,7 +1,7 @@ /** ** Supermodel ** A Sega Model 3 Arcade Emulator. - ** Copyright 2011 Bart Trzynadlowski, Nik Henson + ** Copyright 2011-2012 Bart Trzynadlowski, Nik Henson ** ** This file is part of Supermodel. ** @@ -853,7 +853,7 @@ void CRender3D::RenderViewport(UINT32 addr, int pri) // Set up viewport and projection (TO-DO: near and far clipping) glMatrixMode(GL_PROJECTION); glLoadIdentity(); - if (g_Config.wideScreen && (vpX==0) && (vpY==0) && (vpWidth>=495) && (vpHeight >= 383)) + if (g_Config.wideScreen && (vpX==0) && (vpWidth>=495) && (vpY==0) && (vpHeight >= 383)) // only expand viewports that occupy whole screen { // Wide screen hack only modifies X axis and not the Y FOV viewportX = 0; diff --git a/Src/Graphics/Shader.cpp b/Src/Graphics/Shader.cpp index 1daf0d6..45d6722 100644 --- a/Src/Graphics/Shader.cpp +++ b/Src/Graphics/Shader.cpp @@ -1,7 +1,7 @@ /** ** Supermodel ** A Sega Model 3 Arcade Emulator. - ** Copyright 2011 Bart Trzynadlowski, Nik Henson + ** Copyright 2011-2012 Bart Trzynadlowski, Nik Henson ** ** This file is part of Supermodel. ** @@ -43,7 +43,7 @@ */ #include -#include +#include #include "Pkgs/glew.h" #include "Supermodel.h" diff --git a/Src/Graphics/Shaders/Fragment.glsl b/Src/Graphics/Shaders/Fragment.glsl index 383af9d..9735872 100644 --- a/Src/Graphics/Shaders/Fragment.glsl +++ b/Src/Graphics/Shaders/Fragment.glsl @@ -1,7 +1,7 @@ -/** + /** ** Supermodel ** A Sega Model 3 Arcade Emulator. - ** Copyright 2011 Bart Trzynadlowski, Nik Henson + ** Copyright 2011-2012 Bart Trzynadlowski, Nik Henson ** ** This file is part of Supermodel. ** diff --git a/Src/Graphics/Shaders/Fragment2D.glsl b/Src/Graphics/Shaders/Fragment2D.glsl index b9be9e0..9a3cd3d 100644 --- a/Src/Graphics/Shaders/Fragment2D.glsl +++ b/Src/Graphics/Shaders/Fragment2D.glsl @@ -1,7 +1,7 @@ /** ** Supermodel ** A Sega Model 3 Arcade Emulator. - ** Copyright 2011 Bart Trzynadlowski, Nik Henson + ** Copyright 2011-2012 Bart Trzynadlowski, Nik Henson ** ** This file is part of Supermodel. ** @@ -29,7 +29,6 @@ // Global uniforms uniform sampler2D textureMap; // 512x512 layer surface -uniform vec3 colorOffset; // color offset for this layer /* * main(): @@ -40,5 +39,4 @@ uniform vec3 colorOffset; // color offset for this layer void main(void) { gl_FragColor = texture2D(textureMap, gl_TexCoord[0].st); - gl_FragColor.rgb = clamp(gl_FragColor.rgb+colorOffset,0.0,1.0); } diff --git a/Src/Graphics/Shaders/Fragment_MultiSheet.glsl b/Src/Graphics/Shaders/Fragment_MultiSheet.glsl index 5b454d8..20d3fd4 100644 --- a/Src/Graphics/Shaders/Fragment_MultiSheet.glsl +++ b/Src/Graphics/Shaders/Fragment_MultiSheet.glsl @@ -1,7 +1,7 @@ /** ** Supermodel ** A Sega Model 3 Arcade Emulator. - ** Copyright 2011 Bart Trzynadlowski, Nik Henson + ** Copyright 2011-2012 Bart Trzynadlowski, Nik Henson ** ** This file is part of Supermodel. ** diff --git a/Src/Graphics/Shaders/Vertex.glsl b/Src/Graphics/Shaders/Vertex.glsl index 4ba8952..368001f 100644 --- a/Src/Graphics/Shaders/Vertex.glsl +++ b/Src/Graphics/Shaders/Vertex.glsl @@ -1,7 +1,7 @@ /** ** Supermodel ** A Sega Model 3 Arcade Emulator. - ** Copyright 2011 Bart Trzynadlowski, Nik Henson + ** Copyright 2011-2012 Bart Trzynadlowski, Nik Henson ** ** This file is part of Supermodel. ** @@ -145,19 +145,19 @@ void main(void) // Standard specular lighting equation vec3 V = normalize(-viewVertex); vec3 H = normalize(sunVector+V); // halfway vector - float s = max(10,64-shininess); // seems to look nice, but probably not correct - fsSpecularTerm = pow(max(dot(viewNormal,H),0),s); - if (sunFactor <= 0) fsSpecularTerm = 0; + float s = max(10.0,64.0-shininess); // seems to look nice, but probably not correct + fsSpecularTerm = pow(max(dot(viewNormal,H),0.0),s); + if (sunFactor <= 0.0) fsSpecularTerm = 0.0; // Faster approximation - //float temp = max(dot(viewNormal,H),0); - //float s = 64-shininess; + //float temp = max(dot(viewNormal,H),0.0); + //float s = 64.0-shininess; //fsSpecularTerm = temp/(s-temp*s+temp); // Phong formula - //vec3 R = normalize(2*dot(sunVector,viewNormal)*viewNormal - sunVector); + //vec3 R = normalize(2.0*dot(sunVector,viewNormal)*viewNormal - sunVector); //vec3 V = normalize(-viewVertex); - //float s = max(2,64-shininess); + //float s = max(2.0,64.0-shininess); //fsSpecularTerm = pow(max(dot(R,V),0),s); } } diff --git a/Src/Graphics/Shaders/Vertex2D.glsl b/Src/Graphics/Shaders/Vertex2D.glsl index c1567a9..3c286df 100644 --- a/Src/Graphics/Shaders/Vertex2D.glsl +++ b/Src/Graphics/Shaders/Vertex2D.glsl @@ -1,7 +1,7 @@ /** ** Supermodel ** A Sega Model 3 Arcade Emulator. - ** Copyright 2011 Bart Trzynadlowski, Nik Henson + ** Copyright 2011-2012 Bart Trzynadlowski, Nik Henson ** ** This file is part of Supermodel. ** diff --git a/Src/Graphics/Shaders2D.h b/Src/Graphics/Shaders2D.h index c39432b..fbc40ac 100644 --- a/Src/Graphics/Shaders2D.h +++ b/Src/Graphics/Shaders2D.h @@ -1,7 +1,7 @@ /** ** Supermodel ** A Sega Model 3 Arcade Emulator. - ** Copyright 2011 Bart Trzynadlowski, Nik Henson + ** Copyright 2011-2012 Bart Trzynadlowski, Nik Henson ** ** This file is part of Supermodel. ** @@ -31,88 +31,86 @@ // Vertex shader static const char vertexShaderSource[] = { -"/** \n" -" ** Supermodel \n" -" ** A Sega Model 3 Arcade Emulator. \n" -" ** Copyright 2011 Bart Trzynadlowski \n" -" ** \n" -" ** This file is part of Supermodel. \n" -" ** \n" -" ** Supermodel is free software: you can redistribute it and/or modify it under \n" -" ** the terms of the GNU General Public License as published by the Free \n" -" ** Software Foundation, either version 3 of the License, or (at your option) \n" -" ** any later version. \n" -" ** \n" -" ** Supermodel is distributed in the hope that it will be useful, but WITHOUT \n" -" ** ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or \n" -" ** FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for \n" -" ** more details. \n" -" ** \n" -" ** You should have received a copy of the GNU General Public License along \n" -" ** with Supermodel. If not, see . \n" -" **/ \n" +"/**\n" +" ** Supermodel\n" +" ** A Sega Model 3 Arcade Emulator.\n" +" ** Copyright 2011-2012 Bart Trzynadlowski, Nik Henson \n" +" **\n" +" ** This file is part of Supermodel.\n" +" **\n" +" ** Supermodel is free software: you can redistribute it and/or modify it under\n" +" ** the terms of the GNU General Public License as published by the Free \n" +" ** Software Foundation, either version 3 of the License, or (at your option)\n" +" ** any later version.\n" +" **\n" +" ** Supermodel is distributed in the hope that it will be useful, but WITHOUT\n" +" ** ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or\n" +" ** FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for\n" +" ** more details.\n" +" **\n" +" ** You should have received a copy of the GNU General Public License along\n" +" ** with Supermodel. If not, see .\n" +" **/\n" "\n" -"/* \n" -" * Vertex2D.glsl \n" -" * \n" -" * Vertex shader for 2D tilemap rendering. \n" -" */ \n" -" \n" -"#version 120 \n" +"/*\n" +" * Vertex2D.glsl\n" +" *\n" +" * Vertex shader for 2D tilemap rendering.\n" +" */\n" +" \n" +"#version 120\n" "\n" -"void main(void) \n" -"{ \n" -" gl_TexCoord[0] = gl_MultiTexCoord0; \n" -" gl_Position = gl_ModelViewProjectionMatrix*gl_Vertex; \n" +"void main(void)\n" +"{\n" +"\tgl_TexCoord[0] = gl_MultiTexCoord0;\n" +"\tgl_Position = gl_ModelViewProjectionMatrix*gl_Vertex;\n" "}\n" }; // Fragment shader static const char fragmentShaderSource[] = { -"/** \n" -" ** Supermodel \n" -" ** A Sega Model 3 Arcade Emulator. \n" -" ** Copyright 2011 Bart Trzynadlowski \n" -" ** \n" -" ** This file is part of Supermodel. \n" -" ** \n" -" ** Supermodel is free software: you can redistribute it and/or modify it under \n" -" ** the terms of the GNU General Public License as published by the Free \n" -" ** Software Foundation, either version 3 of the License, or (at your option) \n" -" ** any later version. \n" -" ** \n" -" ** Supermodel is distributed in the hope that it will be useful, but WITHOUT \n" -" ** ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or \n" -" ** FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for \n" -" ** more details. \n" -" ** \n" -" ** You should have received a copy of the GNU General Public License along \n" -" ** with Supermodel. If not, see . \n" -" **/ \n" +"/**\n" +" ** Supermodel\n" +" ** A Sega Model 3 Arcade Emulator.\n" +" ** Copyright 2011-2012 Bart Trzynadlowski, Nik Henson \n" +" **\n" +" ** This file is part of Supermodel.\n" +" **\n" +" ** Supermodel is free software: you can redistribute it and/or modify it under\n" +" ** the terms of the GNU General Public License as published by the Free \n" +" ** Software Foundation, either version 3 of the License, or (at your option)\n" +" ** any later version.\n" +" **\n" +" ** Supermodel is distributed in the hope that it will be useful, but WITHOUT\n" +" ** ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or\n" +" ** FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for\n" +" ** more details.\n" +" **\n" +" ** You should have received a copy of the GNU General Public License along\n" +" ** with Supermodel. If not, see .\n" +" **/\n" +" \n" +"/*\n" +" * Fragment2D.glsl\n" +" *\n" +" * Fragment shader for 2D tilemap rendering.\n" +" */\n" "\n" -"/* \n" -" * Fragment2D.glsl \n" -" * \n" -" * Fragment shader for 2D tilemap rendering. \n" -" */ \n" +"#version 120\n" "\n" -"#version 120 \n" +"// Global uniforms\n" +"uniform sampler2D\ttextureMap;\t\t// 512x512 layer surface\n" "\n" -"// Global uniforms \n" -"uniform sampler2D textureMap; // 512x512 layer surface \n" -"uniform vec3 colorOffset; // color offset for this layer \n" +"/*\n" +" * main():\n" +" *\n" +" * Fragment shader entry point.\n" +" */\n" "\n" -"/* \n" -" * main(): \n" -" * \n" -" * Fragment shader entry point. \n" -" */ \n" -"\n" -"void main(void) \n" -"{ \n" -" gl_FragColor = texture2D(textureMap, gl_TexCoord[0].st); \n" -" gl_FragColor.rgb = clamp(gl_FragColor.rgb+colorOffset,0.0,1.0); \n" +"void main(void)\n" +"{\t\n" +"\tgl_FragColor = texture2D(textureMap, gl_TexCoord[0].st);\n" "}\n" }; diff --git a/Src/Graphics/Shaders3D.h b/Src/Graphics/Shaders3D.h index 5f8a343..78344be 100644 --- a/Src/Graphics/Shaders3D.h +++ b/Src/Graphics/Shaders3D.h @@ -1,7 +1,7 @@ /** ** Supermodel ** A Sega Model 3 Arcade Emulator. - ** Copyright 2011 Bart Trzynadlowski, Nik Henson + ** Copyright 2011-2012 Bart Trzynadlowski, Nik Henson ** ** This file is part of Supermodel. ** @@ -34,7 +34,7 @@ static const char vertexShaderSource[] = "/**\n" " ** Supermodel\n" " ** A Sega Model 3 Arcade Emulator.\n" -" ** Copyright 2011 Bart Trzynadlowski, Nik Henson \n" +" ** Copyright 2011-2012 Bart Trzynadlowski, Nik Henson \n" " **\n" " ** This file is part of Supermodel.\n" " **\n" @@ -178,20 +178,20 @@ static const char vertexShaderSource[] = " \t\t\t// Standard specular lighting equation\n" " \t\t\tvec3 V = normalize(-viewVertex);\n" " \t\t\tvec3 H = normalize(sunVector+V);\t// halfway vector\n" -" \t\t\tfloat s = max(10,64-shininess);\t\t// seems to look nice, but probably not correct\n" -" \t\t\tfsSpecularTerm = pow(max(dot(viewNormal,H),0),s);\n" -" \t\t\tif (sunFactor <= 0) fsSpecularTerm = 0;\n" +" \t\t\tfloat s = max(10.0,64.0-shininess);\t\t// seems to look nice, but probably not correct\n" +" \t\t\tfsSpecularTerm = pow(max(dot(viewNormal,H),0.0),s);\n" +" \t\t\tif (sunFactor <= 0.0) fsSpecularTerm = 0.0;\n" " \t\t\t\n" " \t\t\t// Faster approximation \t\t\t\n" -" \t\t\t//float temp = max(dot(viewNormal,H),0);\n" -" \t\t\t//float s = 64-shininess;\n" +" \t\t\t//float temp = max(dot(viewNormal,H),0.0);\n" +" \t\t\t//float s = 64.0-shininess;\n" " \t\t\t//fsSpecularTerm = temp/(s-temp*s+temp);\n" " \t\t\t\n" " \t\t\t// Phong formula\n" -" \t\t\t//vec3 R = normalize(2*dot(sunVector,viewNormal)*viewNormal - sunVector);\n" +" \t\t\t//vec3 R = normalize(2.0*dot(sunVector,viewNormal)*viewNormal - sunVector);\n" " \t\t\t//vec3 V = normalize(-viewVertex);\n" -" \t\t\t//float s = max(2,64-shininess);\n" -" \t\t\t//fsSpecularTerm = pow(max(dot(R,V),0),s);\n" +" \t\t\t//float s = max(2.0,64.0-shininess);\n" +" \t\t\t//fsSpecularTerm = pow(max(dot(R,V),0.0),s);\n" " \t\t}\n" "\t}\n" "\t\n" @@ -218,7 +218,7 @@ static const char fragmentShaderSingleSheetSource[] = "/**\n" " ** Supermodel\n" " ** A Sega Model 3 Arcade Emulator.\n" -" ** Copyright 2011 Bart Trzynadlowski, Nik Henson \n" +" ** Copyright 2011-2012 Bart Trzynadlowski, Nik Henson \n" " **\n" " ** This file is part of Supermodel.\n" " **\n" @@ -416,7 +416,7 @@ static const char fragmentShaderMultiSheetSource[] = "/**\n" " ** Supermodel\n" " ** A Sega Model 3 Arcade Emulator.\n" -" ** Copyright 2011 Bart Trzynadlowski, Nik Henson \n" +" ** Copyright 2011-2012 Bart Trzynadlowski, Nik Henson \n" " **\n" " ** This file is part of Supermodel.\n" " **\n" diff --git a/Src/Model3/TileGen.cpp b/Src/Model3/TileGen.cpp index 385b2a6..53a0262 100644 --- a/Src/Model3/TileGen.cpp +++ b/Src/Model3/TileGen.cpp @@ -1,7 +1,7 @@ /** ** Supermodel ** A Sega Model 3 Arcade Emulator. - ** Copyright 2011 Bart Trzynadlowski, Nik Henson + ** Copyright 2011-2012 Bart Trzynadlowski, Nik Henson ** ** This file is part of Supermodel. ** @@ -22,7 +22,26 @@ /* * TileGen.cpp * - * Implementation of the CTileGen class: 2D tile generator. + * Implementation of the CTileGen class: 2D tile generator. Palette decoding + * and synchronization with the renderer (which may run in a separate thread) + * are performed here as well. For a description of the tile generator + * hardware, please refer to the 2D rendering engine source code. + * + * Palettes + * -------- + * + * Multiple copies of the 32K-color palette data are maintained. The first is + * the raw data as written to the VRAM. Two copies are computed, one for layers + * A/A' and the other for layers B/B'. These pairs of layers have independent + * color offset registers associated with them. The renderer uses these + * "computed" palettes. + * + * The computed palettes are updated whenever the real palette is modified, a + * single color entry at a time. If the color register is modified, the entire + * palette has to be recomputed accordingly. + * + * The read-only copy of the palette, which is generated for the renderer, only + * stores the two computed palettes. * * TO-DO List: * ----------- @@ -41,17 +60,24 @@ #define MARK_DIRTY(dirtyArray, addr) dirtyArray[addr>>(PAGE_WIDTH+3)] |= 1<<((addr>>PAGE_WIDTH)&7) // Offsets of memory regions within TileGen memory pool -#define OFFSET_VRAM 0x000000 -#define OFFSET_PAL 0x120000 -#define MEM_POOL_SIZE_RW (0x120000+0x020000) -#define OFFSET_VRAM_RO 0x140000 // [read-only snapshot] -#define OFFSET_PAL_RO 0x260000 // [read-only snapshot] -#define MEM_POOL_SIZE_RO (0x120000+0x020000) -#define OFFSET_VRAM_DIRTY 0x280000 -#define OFFSET_PAL_DIRTY (OFFSET_VRAM_DIRTY+DIRTY_SIZE(0x120000)) -#define MEM_POOL_SIZE_DIRTY (DIRTY_SIZE(MEM_POOL_SIZE_RO)) +#define OFFSET_VRAM 0x000000 // VRAM and palette data +#define OFFSET_PAL_A 0x120000 // computed A/A' palette +#define OFFSET_PAL_B 0x140000 // computed B/B' palette +#define MEM_POOL_SIZE_RW (0x120000+0x040000) + +#define OFFSET_VRAM_RO 0x160000 // [read-only snapshot] +#define OFFSET_PAL_RO_A 0x280000 // [read-only snapshot] +#define OFFSET_PAL_RO_B 0x2A0000 +#define MEM_POOL_SIZE_RO (0x120000+0x040000) + +#define OFFSET_VRAM_DIRTY 0x2C0000 +#define OFFSET_PAL_A_DIRTY (OFFSET_VRAM_DIRTY+DIRTY_SIZE(0x120000)) +#define OFFSET_PAL_B_DIRTY (OFFSET_PAL_A_DIRTY+DIRTY_SIZE(0x20000)) +#define MEM_POOL_SIZE_DIRTY (DIRTY_SIZE(0x120000)+2*DIRTY_SIZE(0x20000)) // VRAM + 2 palette dirty buffers + #define MEMORY_POOL_SIZE (MEM_POOL_SIZE_RW+MEM_POOL_SIZE_RO+MEM_POOL_SIZE_DIRTY) + /****************************************************************************** Save States ******************************************************************************/ @@ -80,6 +106,10 @@ void CTileGen::LoadState(CBlockFile *SaveState) WriteRAM(i, data); } SaveState->Read(regs, sizeof(regs)); + + // Because regs were read after palette, must recompute + RecomputePalettes(); + // If multi-threaded, update read-only snapshots too if (g_Config.gpuMultiThreaded) UpdateSnapshots(true); @@ -102,6 +132,7 @@ void CTileGen::BeginVBlank(void) printf("64: %08X\n", regs[0x64/4]); printf("68: %08X\n", regs[0x68/4]); printf("6C: %08X\n", regs[0x6C/4]); + printf("\n"); */ } @@ -110,8 +141,34 @@ void CTileGen::EndVBlank(void) // } +void CTileGen::RecomputePalettes(void) +{ + // Writing the colors forces palettes to be computed + if (g_Config.gpuMultiThreaded) + { + for (unsigned colorAddr = 0; colorAddr < 32768*4; colorAddr += 4 ) + { + MARK_DIRTY(palDirty[0], colorAddr); + MARK_DIRTY(palDirty[1], colorAddr); + WritePalette(colorAddr/4, *(UINT32 *) &vram[0x100000+colorAddr]); + } + } + else + { + for (unsigned colorAddr = 0; colorAddr < 32768*4; colorAddr += 4 ) + WritePalette(colorAddr/4, *(UINT32 *) &vram[0x100000+colorAddr]); + } +} + UINT32 CTileGen::SyncSnapshots(void) { + // Good time to recompute the palettes + if (recomputePalettes) + { + RecomputePalettes(); + recomputePalettes = false; + } + if (!g_Config.gpuMultiThreaded) return 0; @@ -168,11 +225,12 @@ UINT32 CTileGen::UpdateSnapshot(bool copyWhole, UINT8 *src, UINT8 *dst, unsigned UINT32 CTileGen::UpdateSnapshots(bool copyWhole) { // Update all memory region snapshots - UINT32 palCopied = UpdateSnapshot(copyWhole, (UINT8*)pal, (UINT8*)palRO, 0x020000, palDirty); + UINT32 palACopied = UpdateSnapshot(copyWhole, (UINT8*)pal[0], (UINT8*)palRO[0], 0x020000, palDirty[0]); + UINT32 palBCopied = UpdateSnapshot(copyWhole, (UINT8*)pal[1], (UINT8*)palRO[1], 0x020000, palDirty[1]); UINT32 vramCopied = UpdateSnapshot(copyWhole, (UINT8*)vram, (UINT8*)vramRO, 0x120000, vramDirty); memcpy(regsRO, regs, sizeof(regs)); // Always copy whole of regs buffer - //printf("TileGen copied - pal:%4uK, vram:%4uK, regs:%uK\n", palCopied / 1024, vramCopied / 1024, sizeof(regs) / 1024); - return palCopied + vramCopied + sizeof(regs); + //printf("TileGen copied - palA:%4uK, palB:%4uK, vram:%4uK, regs:%uK\n", palACopied / 1024, palBCopied / 1024, vramCopied / 1024, sizeof(regs) / 1024); + return palACopied + palBCopied + vramCopied + sizeof(regs); } void CTileGen::BeginFrame(void) @@ -210,8 +268,15 @@ void CTileGen::WriteRAM(unsigned addr, UINT32 data) { addr -= 0x100000; unsigned color = addr/4; // color index + + // Same address in both palettes must be marked dirty if (g_Config.gpuMultiThreaded) - MARK_DIRTY(palDirty, addr); + { + MARK_DIRTY(palDirty[0], addr); + MARK_DIRTY(palDirty[1], addr); + } + + // Both palettes will be modified simultaneously WritePalette(color, data); } } @@ -220,12 +285,51 @@ void CTileGen::InitPalette(void) { for (int i = 0; i < 0x20000/4; i++) { - WritePalette(i, vram[0x100000/4 + i]); + WritePalette(i, *(UINT32 *) &vram[0x100000 + i*4]); if (g_Config.gpuMultiThreaded) - palRO[i] = pal[i]; + { + palRO[0][i] = pal[0][i]; + palRO[1][i] = pal[1][i]; + } } } +static inline UINT32 AddColorOffset(UINT8 r, UINT8 g, UINT8 b, UINT8 a, UINT32 offsetReg) +{ + INT32 ir, ig, ib; + + /* + * Color offsets are signed but I'm not sure whether or not their range is + * merely [-128,+127], which would mean adding to a 0 component would not + * result full intensity (only +127 at most). Alternatively, the signed + * value might have to be multiplied by 2. That is assumed here. In either + * case, the signed addition should be saturated. + */ + ib = (INT32) (INT8)((offsetReg>>16)&0xFF); + ig = (INT32) (INT8)((offsetReg>>8)&0xFF); + ir = (INT32) (INT8)((offsetReg>>0)&0xFF); + ib *= 2; + ig *= 2; + ir *= 2; + + // Add with saturation + ib += (INT32) (UINT32) b; + if (ib < 0) ib = 0; + else if (ib > 0xFF) ib = 0xFF; + ig += (INT32) (UINT32) g; + if (ig < 0) ig = 0; + else if (ig > 0xFF) ig = 0xFF; + ir += (INT32) (UINT32) r; + if (ir < 0) ir = 0; + else if (ir > 0xFF) ir = 0xFF; + + // Construct the final 32-bit ABGR-format color + r = (UINT8) ir; + g = (UINT8) ig; + b = (UINT8) ib; + return ((UINT32)a<<24)|((UINT32)b<<16)|((UINT32)g<<8)|(UINT32)r; +} + void CTileGen::WritePalette(unsigned color, UINT32 data) { UINT8 r, g, b, a; @@ -242,32 +346,43 @@ void CTileGen::WritePalette(unsigned color, UINT32 data) r = (data<<3)&0xF8; } - pal[color] = (a<<24)|(b<<16)|(g<<8)|r; + pal[0][color] = AddColorOffset(r, g, b, a, regs[0x40/4]); // A/A' + pal[1][color] = AddColorOffset(r, g, b, a, regs[0x44/4]); // B/B' } void CTileGen::WriteRegister(unsigned reg, UINT32 data) { reg &= 0xFF; - regs[reg/4] = data; - + switch (reg) { + case 0x08: + case 0x0C: + case 0x20: + case 0x60: + case 0x64: + case 0x68: + case 0x6C: + break; + case 0x40: // layer A/A' color offset + case 0x44: // layer B/B' color offset + // We only have a mechanism to recompute both palettes simultaneously. + // These regs are often written together in the same frame. To avoid + // needlessly recomputing both palettes twice, we defer the operation. + if (regs[reg] != data) // only if changed + recomputePalettes = true; + break; case 0x10: // IRQ acknowledge IRQ->Deassert(data&0xFF); break; - case 0x60: - break; - case 0x64: - break; - case 0x68: - break; - case 0x6C: - break; default: DebugLog("Tile Generator reg %02X = %08X\n", reg, data); //printf("%02X = %08X\n", reg, data); break; } + + // Modify register + regs[reg/4] = data; } void CTileGen::Reset(void) @@ -278,6 +393,7 @@ void CTileGen::Reset(void) memset(regsRO, 0, sizeof(regsRO)); InitPalette(); + recomputePalettes = false; DebugLog("Tile Generator reset\n"); } @@ -295,13 +411,13 @@ void CTileGen::AttachRenderer(CRender2D *Render2DPtr) if (g_Config.gpuMultiThreaded) { Render2D->AttachVRAM(vramRO); - Render2D->AttachPalette(palRO); + Render2D->AttachPalette((const UINT32 **)palRO); Render2D->AttachRegisters(regsRO); } else { Render2D->AttachVRAM(vram); - Render2D->AttachPalette(pal); + Render2D->AttachPalette((const UINT32 **)pal); Render2D->AttachRegisters(regs); } @@ -321,15 +437,18 @@ bool CTileGen::Init(CIRQ *IRQObjectPtr) // Set up main pointers vram = (UINT8 *) &memoryPool[OFFSET_VRAM]; - pal = (UINT32 *) &memoryPool[OFFSET_PAL]; + pal[0] = (UINT32 *) &memoryPool[OFFSET_PAL_A]; + pal[1] = (UINT32 *) &memoryPool[OFFSET_PAL_B]; // If multi-threaded, set up pointers for read-only snapshots and dirty page arrays too if (g_Config.gpuMultiThreaded) { vramRO = (UINT8 *) &memoryPool[OFFSET_VRAM_RO]; - palRO = (UINT32 *) &memoryPool[OFFSET_PAL_RO]; + palRO[0] = (UINT32 *) &memoryPool[OFFSET_PAL_RO_A]; + palRO[1] = (UINT32 *) &memoryPool[OFFSET_PAL_RO_B]; vramDirty = (UINT8 *) &memoryPool[OFFSET_VRAM_DIRTY]; - palDirty = (UINT8 *) &memoryPool[OFFSET_PAL_DIRTY]; + palDirty[0] = (UINT8 *) &memoryPool[OFFSET_PAL_A_DIRTY]; + palDirty[1] = (UINT8 *) &memoryPool[OFFSET_PAL_B_DIRTY]; } // Hook up the IRQ controller diff --git a/Src/Model3/TileGen.h b/Src/Model3/TileGen.h index 4d1e0de..71eb7d0 100644 --- a/Src/Model3/TileGen.h +++ b/Src/Model3/TileGen.h @@ -1,7 +1,7 @@ /** ** Supermodel ** A Sega Model 3 Arcade Emulator. - ** Copyright 2011 Bart Trzynadlowski, Nik Henson + ** Copyright 2011-2012 Bart Trzynadlowski, Nik Henson ** ** This file is part of Supermodel. ** @@ -191,6 +191,7 @@ public: private: // Private member functions + void RecomputePalettes(void); void InitPalette(void); void WritePalette(unsigned color, UINT32 data); UINT32 UpdateSnapshots(bool copyWhole); @@ -199,18 +200,23 @@ private: CIRQ *IRQ; // IRQ controller the tile generator is attached to CRender2D *Render2D; // 2D renderer the tile generator is attached to - // Tile generator VRAM - UINT8 *memoryPool; // all memory allocated here - UINT8 *vram; // 1.8MB of VRAM - UINT32 *pal; // 0x20000 byte (32K colors) palette + /* + * Tile generator VRAM. The upper 128KB of VRAM stores the palette data. + * Two palettes are computed from this based on the color offset registers: + * A/A' and B/B'. + */ + UINT8 *memoryPool; // all memory allocated here + UINT8 *vram; // 1.125MB of VRAM + UINT32 *pal[2]; // 2 x 0x20000 byte (32K colors) palette + bool recomputePalettes; // whether to recompute palettes A/A' and B/B' during sync // Read-only snapshots - UINT8 *vramRO; // 1.8MB of VRAM [read-only snapshot] - UINT32 *palRO; // 0x20000 byte (32K colors) palette [read-only snapshot] + UINT8 *vramRO; // 1.125MB of VRAM [read-only snapshot] + UINT32 *palRO[2]; // 2 x 0x20000 byte (32K colors) palette [read-only snapshot] // Arrays to keep track of dirty pages in memory regions UINT8 *vramDirty; - UINT8 *palDirty; + UINT8 *palDirty[2]; // one for each palette // Registers UINT32 regs[64]; diff --git a/Src/OSD/SDL/Main.cpp b/Src/OSD/SDL/Main.cpp index da4d727..ce271f4 100644 --- a/Src/OSD/SDL/Main.cpp +++ b/Src/OSD/SDL/Main.cpp @@ -1,7 +1,7 @@ /** ** Supermodel ** A Sega Model 3 Arcade Emulator. - ** Copyright 2011 Bart Trzynadlowski, Nik Henson + ** Copyright 2011-2012 Bart Trzynadlowski, Nik Henson ** ** This file is part of Supermodel. ** @@ -1242,7 +1242,7 @@ static int DisassembleCROM(const char *zipFile, UINT32 addr, unsigned n) static void Title(void) { puts("Supermodel: A Sega Model 3 Arcade Emulator (Version "SUPERMODEL_VERSION")"); - puts("Copyright (C) 2011 by Bart Trzynadlowski and Nik Henson\n"); + puts("Copyright (C) 2011-2012 by Bart Trzynadlowski and Nik Henson\n"); } // Print usage information diff --git a/Src/Supermodel.h b/Src/Supermodel.h index add3e3a..d6a8bed 100644 --- a/Src/Supermodel.h +++ b/Src/Supermodel.h @@ -1,7 +1,7 @@ /** ** Supermodel ** A Sega Model 3 Arcade Emulator. - ** Copyright 2011 Bart Trzynadlowski, Nik Henson + ** Copyright 2011-2012 Bart Trzynadlowski, Nik Henson ** ** This file is part of Supermodel. **