Rewrote 2D renderer and fixed 2D layer priorities, stencil mask clipping (Scud Race 'ROLLING START'), and cleaned up code. Unfortunately, the new code is about 50% slower, mostly due to the need to perform stencil clipping after scrolling.

This commit is contained in:
Bart Trzynadlowski 2016-05-07 21:54:03 +00:00
parent 69a44a5d5f
commit 5d048958b9
3 changed files with 527 additions and 668 deletions

View file

@ -191,8 +191,9 @@
*
* Each mask entry is a little endian 32-bit word. The high 16 bits control
* A/A' and the low 16 bits control B/B'. Each word controls an entire line
* (32 pixels per bit, 512 pixels per 16-bit line mask). If a bit is set to 1,
* the pixel from the primary layer is used, otherwise the alternate layer is
* (32 pixels per bit, 512 pixels per 16-bit line mask, where the first 16
* pixels are allocated to the overscan region.) If a bit is set to 1, the
* pixel from the primary layer is used, otherwise the alternate layer is
* used when the mask is 0. It is important to remember that the layers may
* have been scrolled independently. The mask operates on the final resultant
* two pixels that are determined for each location.
@ -210,6 +211,10 @@
* The stencil mask does not affect layer priorities, which are managed
* separately regardless of mask settings.
*
* The formula for mapping a screen pixel (0-495) to stencil bit mask is:
*
* bit = 1 << (15 - ((x + 16) / 32))
*
* Scrolling
* ---------
*
@ -222,13 +227,13 @@
* The scroll registers are laid out as:
*
* 31 0
* v??? ???y yyyy yyyy h??? ??xx xxxx xxxx
* e??? ???y yyyy yyyy h??? ??xx xxxx xxxx
*
* The 'y' bits comprise a vertical scroll value in pixels. The 'x' bits form a
* horizontal scroll value. If 'h' is set, then the VRAM table (line-by-line
* scrolling) is used, otherwise the 'x' values are applied to every line. The
* meaning of 'v' is unknown. It is also possible that the scroll values use
* more or less bits, but probably no more than 1.
* The 'e' bit enables the layer when set. The 'y' bits comprise a vertical
* scroll value in pixels. The 'x' bits form a horizontal scroll value. If 'h'
* is set, then the VRAM table (line-by-line scrolling) is used, otherwise the
* 'x' values are applied to every line. It is also possible that the scroll
* values use more or less bits, but probably no more than 1.
*
* Each line must be wrapped back to the beginning of the same line. Likewise,
* vertical scrolling wraps around back to the top of the tilemap.
@ -283,352 +288,220 @@
#define FRAGMENT_2D_SHADER_FILE "Src/Graphics/Fragment2D.glsl"
/******************************************************************************
Tile Drawing Functions
******************************************************************************/
// Draw 4-bit tile line, no clipping performed
void CRender2D::DrawTileLine4BitNoClip(UINT32 *buf, UINT16 tile, int tileLine, const UINT32 *pal)
{
unsigned tileOffset; // offset of tile pattern within VRAM
unsigned palette; // color palette bits obtained from tile
UINT32 pattern; // 8 pattern pixels fetched at once
// Tile pattern offset: each tile occupies 32 bytes when using 4-bit pixels
tileOffset = ((tile&0x3FFF)<<1) | ((tile>>15)&1);
tileOffset *= 32;
tileOffset /= 4; // VRAM is a UINT32 array
// Upper color bits; the lower 4 bits come from the tile pattern
palette = tile&0x7FF0;
// Draw 8 pixels
pattern = vram[tileOffset+tileLine];
*buf++ = pal[((pattern>>28)&0xF) | palette];
*buf++ = pal[((pattern>>24)&0xF) | palette];
*buf++ = pal[((pattern>>20)&0xF) | palette];
*buf++ = pal[((pattern>>16)&0xF) | palette];
*buf++ = pal[((pattern>>12)&0xF) | palette];
*buf++ = pal[((pattern>>8)&0xF) | palette];
*buf++ = pal[((pattern>>4)&0xF) | palette];
*buf++ = pal[((pattern>>0)&0xF) | palette];
}
// Draw 8-bit tile line, no clipping performed
void CRender2D::DrawTileLine8BitNoClip(UINT32 *buf, UINT16 tile, int tileLine, const UINT32 *pal)
{
unsigned tileOffset; // offset of tile pattern within VRAM
unsigned palette; // color palette bits obtained from tile
UINT32 pattern; // 4 pattern pixels fetched at once
tileLine *= 2; // 8-bit pixels, each line is two words
// Tile pattern offset: each tile occupies 64 bytes when using 8-bit pixels
tileOffset = tile&0x3FFF;
tileOffset *= 64;
tileOffset /= 4;
// Upper color bits
palette = tile&0x7F00;
// Draw 4 pixels at a time
pattern = vram[tileOffset+tileLine];
*buf++ = pal[((pattern>>24)&0xFF) | palette];
*buf++ = pal[((pattern>>16)&0xFF) | palette];
*buf++ = pal[((pattern>>8)&0xFF) | palette];
*buf++ = pal[((pattern>>0)&0xFF) | palette];
pattern = vram[tileOffset+tileLine+1];
*buf++ = pal[((pattern>>24)&0xFF) | palette];
*buf++ = pal[((pattern>>16)&0xFF) | palette];
*buf++ = pal[((pattern>>8)&0xFF) | palette];
*buf++ = pal[((pattern>>0)&0xFF) | palette];
}
/******************************************************************************
Layer Rendering
This code is quite slow and badly needs to be optimized. Dirty rectangles
should be implemented first and tile pre-decoding second.
******************************************************************************/
/*
* DrawLine():
*
* Draws a single scanline of single layer. Vertical (but not horizontal)
* scrolling is applied here.
*
* Parametes:
* dest Destination of 512-pixel output buffer to draw to.
* layerNum Layer number:
* 0 = Layer A (@ 0xF8000)
* 1 = Layer A' (@ 0xFA000)
* 2 = Layer B (@ 0xFC000)
* 3 = Layer B' (@ 0xFE000)
* y Line number (0-495).
* nameTableBase Pointer to VRAM name table (see above addresses)
* for this layer.
* hScrollTable Pointer to the line-by-line horizontal scroll value
* table for this layer.
* pal Palette to draw with.
*/
void CRender2D::DrawLine(UINT32 *dest, int layerNum, int y, const UINT16 *nameTableBase, const UINT32 *pal)
template <int bits, bool alphaTest, bool clip>
static inline void DrawTileLine(uint32_t *line, int pixelOffset, uint16_t tile, int patternLine, const uint32_t *vram, const uint32_t *palette, uint16_t mask)
{
// Determine the layer color depth (4 or 8-bit pixels)
bool is4Bit = (regs[0x20 / 4] & (1 << (12 + layerNum))) > 0;
static_assert(bits == 4 || bits == 8, "Tiles are either 4- or 8-bit");
// Compute offsets due to vertical scrolling
int vScroll = (regs[0x60/4+layerNum]>>16)&0x1FF;
const UINT16 *nameTable = &nameTableBase[(64*((y+vScroll)/8)) & 0xFFF]; // clamp to 64x64=0x1000
int vOffset = (y+vScroll)&7; // vertical pixel offset within 8x8 tile
// For 8-bit pixels, each line of tile pattern is two words
if (bits == 8)
patternLine *= 2;
// Render 512 pixels (64 tiles) w/out any horizontal scrolling or masking
if (is4Bit)
// Compute offset of pattern for this line
int patternOffset;
if (bits == 4)
{
for (int tx = 0; tx < 64; tx += 4)
patternOffset = ((tile & 0x3FFF) << 1) | ((tile >> 15) & 1);
patternOffset *= 32;
patternOffset /= 4;
}
else
{
// Little endian: offsets 0,1,2,3 become 1,0,3,2
DrawTileLine4BitNoClip(dest, nameTable[1], vOffset, pal); dest += 8;
DrawTileLine4BitNoClip(dest, nameTable[0], vOffset, pal); dest += 8;
DrawTileLine4BitNoClip(dest, nameTable[3], vOffset, pal); dest += 8;
DrawTileLine4BitNoClip(dest, nameTable[2], vOffset, pal); dest += 8;
nameTable += 4; // next set of 4 tiles
patternOffset = tile & 0x3FFF;
patternOffset *= 64;
patternOffset /= 4;
}
// Name table entry provides high color bits
uint32_t colorHi = tile & ((bits == 4) ? 0x7FF0 : 0x7F00);
// Draw
if (bits == 4)
{
uint32_t pattern = vram[patternOffset + patternLine];
for (int p = 7; p >= 0; p--)
{
if (!clip || (clip && pixelOffset >= 0 && pixelOffset < 496))
{
uint16_t maskTest = 1 << (15-((pixelOffset+16)/32)); // first 16 pixels in stencil mask are overscan
bool visible = (mask & maskTest) != 0;
uint32_t pixel = palette[((pattern >> (p*4)) & 0xF) | colorHi];
if (alphaTest)
{
if (visible && (pixel >> 24) != 0) // only draw opaque pixels
line[pixelOffset] = pixel;
}
else
{
if (visible)
line[pixelOffset] = pixel;
else
line[pixelOffset] = 0;
}
}
++pixelOffset;
}
}
else
{
for (int tx = 0; tx < 64; tx += 4)
for (int i = 0; i < 2; i++) // 4 pixels per word
{
DrawTileLine8BitNoClip(dest, nameTable[1], vOffset, pal); dest += 8;
DrawTileLine8BitNoClip(dest, nameTable[0], vOffset, pal); dest += 8;
DrawTileLine8BitNoClip(dest, nameTable[3], vOffset, pal); dest += 8;
DrawTileLine8BitNoClip(dest, nameTable[2], vOffset, pal); dest += 8;
nameTable += 4;
uint32_t pattern = vram[patternOffset + patternLine + i];
for (int p = 3; p >= 0; p--)
{
if (!clip || (clip && pixelOffset >= 0 && pixelOffset < 496))
{
uint16_t maskTest = 1 << (15-((pixelOffset+16)/32));
bool visible = (mask & maskTest) != 0;
uint32_t pixel = palette[((pattern >> (p*8)) & 0xFF) | colorHi];
if (alphaTest)
{
if (visible && (pixel >> 24) != 0)
line[pixelOffset] = pixel;
}
else
{
if (visible)
line[pixelOffset] = pixel;
else
line[pixelOffset] = 0; // transparent
}
}
++pixelOffset;
}
}
}
}
// Mix in the appropriate layer (add on top of current contents) with horizontal scrolling under control of the stencil mask
static void MixLine(UINT32 *dest, const UINT32 *src, int layerNum, int y, bool isBottom, const UINT16 *hScrollTable, const UINT16 *maskTableLine, int hFullScroll, bool lineScrollMode)
static inline void ClearLayer(uint32_t *pixels)
{
// Determine horizontal scroll values
int hScroll;
if (lineScrollMode)
hScroll = hScrollTable[y];
else
hScroll = hFullScroll;
memset(pixels, 0, 496*384*sizeof(uint32_t));
}
// Get correct mask table entry
template <int bits, bool alphaTest>
static void DrawLayer(uint32_t *pixels, int layerNum, const uint32_t *vram, const uint32_t *regs, const uint32_t *palette)
{
const uint16_t *nameTableBase = (const uint16_t *) &vram[(0xF8000 + layerNum * 0x2000) / 4];
const uint16_t *hScrollTable = (const uint16_t *) &vram[(0xF6000 + layerNum * 0x400) / 4];
bool lineScrollMode = (regs[0x60/4 + layerNum] & 0x8000) != 0;
int hFullScroll = regs[0x60/4 + layerNum] & 0x3FF;
int vScroll = (regs[0x60/4 + layerNum] >> 16) & 0x1FF;
const uint16_t *maskTable = (const uint16_t *) &vram[0xF7000 / 4];
if (layerNum < 2) // little endian: layers A and A' use second word in each pair
++maskTableLine;
maskTable += 1;
// Figure out what mask bit should be to mix in this layer
UINT16 doCopy;
if ((layerNum & 1)) // layers 1 and 3 are A' and B': alternates
doCopy = 0x0000; // if mask is clear, copy alternate layer
else
doCopy = 0x8000; // copy primary layer when mask is set
// If mask bit is clear, alternate layer is shown. We want to test for non-
// zero, so we flip the mask when drawing alternate layers (layers 1 and 3).
const uint16_t maskPolarity = (layerNum & 1) ? 0xFFFF : 0x0000;
// Mix first 60 tiles (4 at a time)
UINT16 mask = *maskTableLine; // mask for this line (each bit covers 4 tiles)
int i = hScroll&511; // line index (where to copy from)
if (isBottom)
{
/*
* Bottom layers can be copied in without worrying about transparency
* but we must write blank values when layer is not showing.
*/
for (int tx = 0; tx < 60; tx += 4)
{
// Only copy pixels if the mask bit is appropriate for this layer type
if ((mask&0x8000) == doCopy)
{
if (i <= (512-32)) // safe to use memcpy for fast blit?
{
memcpy(dest, &src[i], 32*sizeof(UINT32));
i += 32;
dest += 32;
}
else // slow copy, wrap line boundary
{
for (int k = 0; k < 32; k++)
{
i &= 511;
*dest++ = src[i++];
}
}
}
else
{
// Write blank pixels
memset(dest, 0, 32*sizeof(UINT32));
i += 32;
i &= 511; // wrap line boundaries
dest += 32;
}
uint32_t *line = pixels;
mask <<= 1;
}
// Mix last two tiles
if ((mask&0x8000) == doCopy)
{
for (int k = 0; k < 16; k++)
{
i &= 511;
*dest++ = src[i++];
}
}
else // clear
{
for (int k = 0; k < 16; k++)
{
i &= 511;
*dest++ = 0;
}
}
}
else
{
/*
* Subsequent layers must test for transparency while mixing.
*/
for (int tx = 0; tx < 60; tx += 4)
{
if ((mask&0x8000) == doCopy)
{
UINT32 p;
for (int k = 0; k < 32; k++)
{
i &= 511;
p = src[i++];
if ((p>>24) != 0) // opaque pixel, put it down
*dest = p;
dest++;
}
}
else
{
i += 32;
i &= 511;
dest += 32;
}
mask <<= 1;
}
if ((mask&0x8000) == doCopy)
{
UINT32 p;
for (int k = 0; k < 16; k++)
{
i &= 511;
p = src[i++];
if ((p>>24) != 0)
*dest = p;
dest++;
}
}
}
}
// Returns true if there is no bottom layer (requiring the color buffer to be cleared)
bool CRender2D::DrawTilemaps(UINT32 *destBottom, UINT32 *destTop)
{
/*
* Precompute data needed for each layer
*/
const UINT16 *nameTableBase[4];
const UINT16 *hScrollTable[4];
const UINT16 *maskTableLine = (UINT16 *) &vram[0xF7000/4]; // start at line 0
int hFullScroll[4];
bool lineScrollMode[4];
for (int i = 0; i < 4; i++) // 0=A, 1=A', 2=B, 3=B'
{
// Base of name table
nameTableBase[i] = (UINT16 *) &vram[(0xF8000+i*0x2000)/4];
// Horizontal line scroll tables
hScrollTable[i] = (UINT16 *) &vram[(0xF6000+i*0x400)/4];
// Load horizontal full-screen scroll values and scroll mode
hFullScroll[i] = regs[0x60/4+i]&0x3FF;
lineScrollMode[i] = (regs[0x60 / 4 + i] & 0x8000)>0;
}
/*
* Precompute layer mixing order
*/
UINT32 *dest[4];
const UINT32 *src[4];
int sortedLayerNum[4];
bool sortedIsBottom[4];
const UINT16 *sortedHScrollTable[4];
int sortedHFullScroll[4];
bool sortedLineScrollMode[4];
bool noBottom; // when true, no layer assigned to bottom surface
switch ((regs[0x20/4]>>8)&0xF)
{
case 0x5: // top: A, B, A'? bottom: B'
noBottom = false;
dest[0]=destBottom; src[0]=lineBuffer[3]; sortedLayerNum[0]=3; sortedIsBottom[0]=true; sortedHScrollTable[0] = hScrollTable[3]; sortedHFullScroll[0]=hFullScroll[3]; sortedLineScrollMode[0]=lineScrollMode[3];
dest[1]=destTop; src[1]=lineBuffer[2]; sortedLayerNum[1]=2; sortedIsBottom[1]=true; sortedHScrollTable[1] = hScrollTable[2]; sortedHFullScroll[1]=hFullScroll[2]; sortedLineScrollMode[1]=lineScrollMode[2];
dest[2]=destTop; src[2]=lineBuffer[0]; sortedLayerNum[2]=0; sortedIsBottom[2]=false; sortedHScrollTable[2] = hScrollTable[0]; sortedHFullScroll[2]=hFullScroll[0]; sortedLineScrollMode[2]=lineScrollMode[0];
dest[3]=destTop; src[3]=lineBuffer[1]; sortedLayerNum[3]=1; sortedIsBottom[3]=false; sortedHScrollTable[3] = hScrollTable[1]; sortedHFullScroll[3]=hFullScroll[1]; sortedLineScrollMode[3]=lineScrollMode[1];
break;
case 0x9: // ? all layers on top but relative order unknown (Spikeout Final Edition, after first boss)
noBottom = true;
dest[0]=destTop; src[0]=lineBuffer[2]; sortedLayerNum[0]=2; sortedIsBottom[0]=true; sortedHScrollTable[0] = hScrollTable[2]; sortedHFullScroll[0]=hFullScroll[2]; sortedLineScrollMode[0]=lineScrollMode[3];
dest[1]=destTop; src[1]=lineBuffer[3]; sortedLayerNum[1]=3; sortedIsBottom[1]=false; sortedHScrollTable[1] = hScrollTable[3]; sortedHFullScroll[1]=hFullScroll[3]; sortedLineScrollMode[1]=lineScrollMode[2];
dest[2]=destTop; src[2]=lineBuffer[1]; sortedLayerNum[2]=1; sortedIsBottom[2]=false; sortedHScrollTable[2] = hScrollTable[1]; sortedHFullScroll[2]=hFullScroll[1]; sortedLineScrollMode[2]=lineScrollMode[1];
dest[3]=destTop; src[3]=lineBuffer[0]; sortedLayerNum[3]=0; sortedIsBottom[3]=false; sortedHScrollTable[3] = hScrollTable[0]; sortedHFullScroll[3]=hFullScroll[0]; sortedLineScrollMode[3]=lineScrollMode[0];
break;
case 0xF: // all on top
noBottom = true;
dest[0]=destTop; src[0]=lineBuffer[2]; sortedLayerNum[0]=2; sortedIsBottom[0]=true; sortedHScrollTable[0] = hScrollTable[2]; sortedHFullScroll[0]=hFullScroll[2]; sortedLineScrollMode[0]=lineScrollMode[2];
dest[1]=destTop; src[1]=lineBuffer[3]; sortedLayerNum[1]=3; sortedIsBottom[1]=false; sortedHScrollTable[1] = hScrollTable[3]; sortedHFullScroll[1]=hFullScroll[3]; sortedLineScrollMode[1]=lineScrollMode[3];
dest[2]=destTop; src[2]=lineBuffer[0]; sortedLayerNum[2]=0; sortedIsBottom[2]=false; sortedHScrollTable[2] = hScrollTable[0]; sortedHFullScroll[2]=hFullScroll[0]; sortedLineScrollMode[2]=lineScrollMode[0];
dest[3]=destTop; src[3]=lineBuffer[1]; sortedLayerNum[3]=1; sortedIsBottom[3]=false; sortedHScrollTable[3] = hScrollTable[1]; sortedHFullScroll[3]=hFullScroll[1]; sortedLineScrollMode[3]=lineScrollMode[1];
break;
case 0x7: // top: A, B bottom: A'?, B'
noBottom = false;
dest[0]=destBottom; src[0]=lineBuffer[3]; sortedLayerNum[0]=3; sortedIsBottom[0]=true; sortedHScrollTable[0] = hScrollTable[3]; sortedHFullScroll[0]=hFullScroll[3]; sortedLineScrollMode[0]=lineScrollMode[3];
dest[1]=destBottom; src[1]=lineBuffer[1]; sortedLayerNum[1]=1; sortedIsBottom[1]=false; sortedHScrollTable[1] = hScrollTable[1]; sortedHFullScroll[1]=hFullScroll[1]; sortedLineScrollMode[1]=lineScrollMode[1];
dest[2]=destTop; src[2]=lineBuffer[2]; sortedLayerNum[2]=2; sortedIsBottom[2]=true; sortedHScrollTable[2] = hScrollTable[2]; sortedHFullScroll[2]=hFullScroll[2]; sortedLineScrollMode[2]=lineScrollMode[2];
dest[3]=destTop; src[3]=lineBuffer[0]; sortedLayerNum[3]=0; sortedIsBottom[3]=false; sortedHScrollTable[3] = hScrollTable[0]; sortedHFullScroll[3]=hFullScroll[0]; sortedLineScrollMode[3]=lineScrollMode[0];
break;
default: // unknown, use A and A' on top, B and B' on the bottom
noBottom = false;
dest[0]=destBottom; src[0]=lineBuffer[2]; sortedLayerNum[0]=2; sortedIsBottom[0]=true; sortedHScrollTable[0] = hScrollTable[2]; sortedHFullScroll[0]=hFullScroll[2]; sortedLineScrollMode[0]=lineScrollMode[2];
dest[1]=destBottom; src[1]=lineBuffer[3]; sortedLayerNum[1]=3; sortedIsBottom[1]=false; sortedHScrollTable[1] = hScrollTable[3]; sortedHFullScroll[1]=hFullScroll[3]; sortedLineScrollMode[1]=lineScrollMode[3];
dest[2]=destTop; src[2]=lineBuffer[0]; sortedLayerNum[2]=0; sortedIsBottom[2]=true; sortedHScrollTable[2] = hScrollTable[0]; sortedHFullScroll[2]=hFullScroll[0]; sortedLineScrollMode[2]=lineScrollMode[0];
dest[3]=destTop; src[3]=lineBuffer[1]; sortedLayerNum[3]=1; sortedIsBottom[3]=false; sortedHScrollTable[3] = hScrollTable[1]; sortedHFullScroll[3]=hFullScroll[1]; sortedLineScrollMode[3]=lineScrollMode[1];
break;
}
/*
* Render and mix each line
*/
for (int y = 0; y < 384; y++)
{
// Draw one scanline from each layer
DrawLine(lineBuffer[0], 0, y, nameTableBase[0], pal[0]);
DrawLine(lineBuffer[1], 1, y, nameTableBase[1], pal[0]);
DrawLine(lineBuffer[2], 2, y, nameTableBase[2], pal[1]);
DrawLine(lineBuffer[3], 3, y, nameTableBase[3], pal[1]);
int hScroll = lineScrollMode ? hScrollTable[y] : hFullScroll;
int hTile = hScroll / 8;
int hFine = hScroll & 7; // horizontal pixel offset within tile line
int vFine = (y + vScroll) & 7; // vertical pixel offset within 8x8 tile
const uint16_t *nameTable = &nameTableBase[(64 * ((y + vScroll) / 8)) & 0xFFF]; // clamp to 64x64 = 0x1000
uint16_t mask = *maskTable ^ maskPolarity; // each bit covers 32 pixels
// Mix the layers in the correct order
for (int i = 0; i < 4; i++)
int pixelOffset = -hFine;
int extraTile = (hFine != 0) ? 1 : 0; // h-scrolling requires part of 63rd tile
int tx = 0;
DrawTileLine<bits, alphaTest, true>(line, pixelOffset, nameTable[(hTile ^ 1) & 63], vFine, vram, palette, mask);
++hTile;
pixelOffset += 8;
for (tx = 1; tx < (62 - 1 + extraTile); tx++)
{
MixLine(dest[i], src[i], sortedLayerNum[i], y, sortedIsBottom[i], sortedHScrollTable[i], maskTableLine, sortedHFullScroll[i], sortedLineScrollMode[i]);
dest[i] += 496; // next line
DrawTileLine<bits, alphaTest, false>(line, pixelOffset, nameTable[(hTile ^ 1) & 63], vFine, vram, palette, mask);
++hTile;
pixelOffset += 8;
}
DrawTileLine<bits, alphaTest, true>(line, pixelOffset, nameTable[(hTile ^ 1) & 63], vFine, vram, palette, mask);
++hTile;
pixelOffset += 8;
// Advance one line
maskTable += 2;
line += 496;
}
}
bool CRender2D::DrawTilemaps(uint32_t *pixelsBottom, uint32_t *pixelsTop)
{
unsigned priority = (m_regs[0x20/4] >> 8) & 0xF;
// Render bottom layers
bool nothingDrawn = true;
for (int layerNum = 3; layerNum >= 0; layerNum--)
{
bool is4Bit = (m_regs[0x20/4] & (1 << (12 + layerNum))) != 0;
bool enabled = (m_regs[0x60/4 + layerNum] & 0x80000000) != 0;
bool selected = (priority & (1 << layerNum)) == 0;
if (enabled && selected)
{
if (nothingDrawn)
{
if (is4Bit)
DrawLayer<4, false>(pixelsBottom, layerNum, m_vram, m_regs, m_palette[layerNum / 2]);
else
DrawLayer<8, false>(pixelsBottom, layerNum, m_vram, m_regs, m_palette[layerNum / 2]);
}
else
{
if (is4Bit)
DrawLayer<4, true>(pixelsBottom, layerNum, m_vram, m_regs, m_palette[layerNum / 2]);
else
DrawLayer<8, true>(pixelsBottom, layerNum, m_vram, m_regs, m_palette[layerNum / 2]);
}
nothingDrawn = false;
}
}
// Next line in mask table
maskTableLine += 2;
if (nothingDrawn)
ClearLayer(pixelsBottom);
// Render top layers
nothingDrawn = true;
for (int layerNum = 3; layerNum >= 0; layerNum--)
{
bool is4Bit = (m_regs[0x20/4] & (1 << (12 + layerNum))) != 0;
bool enabled = (m_regs[0x60/4 + layerNum] & 0x80000000) != 0;
bool selected = (priority & (1 << layerNum)) != 0;
if (enabled && selected)
{
if (nothingDrawn)
{
if (is4Bit)
DrawLayer<4, false>(pixelsTop, layerNum, m_vram, m_regs, m_palette[layerNum / 2]);
else
DrawLayer<8, false>(pixelsTop, layerNum, m_vram, m_regs, m_palette[layerNum / 2]);
}
else
{
if (is4Bit)
DrawLayer<4, true>(pixelsTop, layerNum, m_vram, m_regs, m_palette[layerNum / 2]);
else
DrawLayer<8, true>(pixelsTop, layerNum, m_vram, m_regs, m_palette[layerNum / 2]);
}
nothingDrawn = false;
}
}
// Indicate whether color buffer must be cleared because no bottom layer
return noBottom;
if (nothingDrawn)
ClearLayer(pixelsTop);
// Indicate whether color buffer must be cleared because of no bottom layer
return nothingDrawn;
}
@ -640,13 +513,15 @@ bool CRender2D::DrawTilemaps(UINT32 *destBottom, UINT32 *destTop)
void CRender2D::DisplaySurface(int surface, GLfloat z)
{
// Draw the surface
float width = m_npot ? 1.0f : (496.0f / 512.0f);
float height = m_npot ? 1.0f : (384.0f / 512.0f);
glActiveTexture(GL_TEXTURE0); // texture unit 0
glBindTexture(GL_TEXTURE_2D, texID[surface]);
glBindTexture(GL_TEXTURE_2D, m_texID[surface]);
glBegin(GL_QUADS);
glTexCoord2f(0.0f/512.0f, 0.0f); glVertex3f(0.0f, 0.0f, z);
glTexCoord2f(496.0f/512.0f, 0.0f); glVertex3f(1.0f, 0.0f, z);
glTexCoord2f(496.0f/512.0f, 384.0f/512.0f); glVertex3f(1.0f, 1.0f, z);
glTexCoord2f(0.0f/512.0f, 384.0f/512.0f); glVertex3f(0.0f, 1.0f, z);
glTexCoord2f(0.0f, 0.0f); glVertex3f(0.0f, 0.0f, z);
glTexCoord2f(width, 0.0f); glVertex3f(1.0f, 0.0f, z);
glTexCoord2f(width, height); glVertex3f(1.0f, 1.0f, z);
glTexCoord2f(0.0f, height); glVertex3f(0.0f, 1.0f, z);
glEnd();
}
@ -663,27 +538,27 @@ void CRender2D::Setup2D(bool isBottom, bool clearAll)
glDisable(GL_DEPTH_TEST);
// Shader program
glUseProgram(shaderProgram);
glUseProgram(m_shaderProgram);
// Clear everything if requested or just overscan areas for wide screen mode
if (clearAll)
{
glClearColor(0.0, 0.0, 0.0, 0.0);
glViewport(0, 0, totalXPixels, totalYPixels);
glViewport(0, 0, m_totalXPixels, m_totalYPixels);
glClear(GL_COLOR_BUFFER_BIT);
}
else if (isBottom && g_Config.wideScreen)
{
// For now, clear w/ black (may want to use color 0 later)
glClearColor(0.0, 0.0, 0.0, 0.0);
glViewport(0, 0, xOffs, totalYPixels);
glViewport(0, 0, m_xOffset, m_totalYPixels);
glClear(GL_COLOR_BUFFER_BIT);
glViewport(xOffs+xPixels, 0, totalXPixels, totalYPixels);
glViewport(m_xOffset + m_xPixels, 0, m_totalXPixels, m_totalYPixels);
glClear(GL_COLOR_BUFFER_BIT);
}
// Set up the viewport and orthogonal projection
glViewport(xOffs, yOffs, xPixels, yPixels);
glViewport(m_xOffset, m_yOffset, m_xPixels, m_yPixels);
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
gluOrtho2D(0.0, 1.0, 1.0, 0.0);
@ -695,12 +570,12 @@ void CRender2D::Setup2D(bool isBottom, bool clearAll)
void CRender2D::BeginFrame(void)
{
// Update all layers
bool clear = DrawTilemaps(surfBottom, surfTop);
bool clear = DrawTilemaps(m_bottomSurface, m_topSurface);
glActiveTexture(GL_TEXTURE0); // texture unit 0
glBindTexture(GL_TEXTURE_2D, texID[0]);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 496, 384, GL_RGBA, GL_UNSIGNED_BYTE, surfTop);
glBindTexture(GL_TEXTURE_2D, texID[1]);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 496, 384, GL_RGBA, GL_UNSIGNED_BYTE, surfBottom);
glBindTexture(GL_TEXTURE_2D, m_texID[0]);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 496, 384, GL_RGBA, GL_UNSIGNED_BYTE, m_topSurface);
glBindTexture(GL_TEXTURE_2D, m_texID[1]);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 496, 384, GL_RGBA, GL_UNSIGNED_BYTE, m_bottomSurface);
// Display bottom surface
Setup2D(true, clear);
@ -723,7 +598,7 @@ void CRender2D::EndFrame(void)
******************************************************************************/
// Deprecated
void CRender2D::WriteVRAM(unsigned addr, UINT32 data)
void CRender2D::WriteVRAM(unsigned addr, uint32_t data)
{
}
@ -732,117 +607,101 @@ void CRender2D::WriteVRAM(unsigned addr, UINT32 data)
Configuration, Initialization, and Shutdown
******************************************************************************/
void CRender2D::AttachRegisters(const UINT32 *regPtr)
void CRender2D::AttachRegisters(const uint32_t *regPtr)
{
regs = regPtr;
m_regs = regPtr;
DebugLog("Render2D attached registers\n");
}
void CRender2D::AttachPalette(const UINT32 *palPtr[2])
void CRender2D::AttachPalette(const uint32_t *palPtr[2])
{
pal[0] = palPtr[0];
pal[1] = palPtr[1];
m_palette[0] = palPtr[0];
m_palette[1] = palPtr[1];
DebugLog("Render2D attached palette\n");
}
void CRender2D::AttachVRAM(const UINT8 *vramPtr)
void CRender2D::AttachVRAM(const uint8_t *vramPtr)
{
vram = (UINT32 *) vramPtr;
m_vram = (uint32_t *) vramPtr;
DebugLog("Render2D attached VRAM\n");
}
// Memory pool and offsets within it
#define MEMORY_POOL_SIZE (2*512*384*4 + 4*512*4)
#define MEMORY_POOL_SIZE (2*512*384*4)
#define OFFSET_TOP_SURFACE 0 // 512*384*4 bytes
#define OFFSET_BOTTOM_SURFACE (512*384*4) // 512*384*4
#define OFFSET_LINE_BUFFERS (2*512*384*4) // 4*512*4 (4 lines)
bool CRender2D::Init(unsigned xOffset, unsigned yOffset, unsigned xRes, unsigned yRes, unsigned totalXRes, unsigned totalYRes)
{
float memSizeMB = (float)MEMORY_POOL_SIZE/(float)0x100000;
// Load shaders
if (OKAY != LoadShaderProgram(&shaderProgram,&vertexShader,&fragmentShader,NULL,NULL,vertexShaderSource,fragmentShaderSource))
if (OKAY != LoadShaderProgram(&m_shaderProgram, &m_vertexShader, &m_fragmentShader, 0, 0, s_vertexShaderSource, s_fragmentShaderSource))
return FAIL;
// Get locations of the uniforms
glUseProgram(shaderProgram); // bind program
textureMapLoc = glGetUniformLocation(shaderProgram, "textureMap");
glUniform1i(textureMapLoc,0); // attach it to texture unit 0
glUseProgram(m_shaderProgram); // bind program
m_textureMapLoc = glGetUniformLocation(m_shaderProgram, "textureMap");
glUniform1i(m_textureMapLoc, 0); // attach it to texture unit 0
// Allocate memory for layer surfaces
memoryPool = new(std::nothrow) UINT8[MEMORY_POOL_SIZE];
if (NULL == memoryPool)
return ErrorLog("Insufficient memory for tilemap surfaces (need %1.1f MB).", memSizeMB);
memset(memoryPool,0,MEMORY_POOL_SIZE); // clear textures
m_memoryPool = new(std::nothrow) uint8_t[MEMORY_POOL_SIZE];
if (NULL == m_memoryPool)
return ErrorLog("Insufficient memory for tilemap surfaces (need %1.1f MB).", float(MEMORY_POOL_SIZE) / 0x100000);
memset(m_memoryPool, 0, MEMORY_POOL_SIZE); // clear textures
// Set up pointers to memory regions
surfTop = (UINT32 *) &memoryPool[OFFSET_TOP_SURFACE];
surfBottom = (UINT32 *) &memoryPool[OFFSET_BOTTOM_SURFACE];
for (int i = 0; i < 4; i++)
lineBuffer[i] = (UINT32 *) &memoryPool[OFFSET_LINE_BUFFERS + i*512*4];
m_topSurface = (uint32_t *) &m_memoryPool[OFFSET_TOP_SURFACE];
m_bottomSurface = (uint32_t *) &m_memoryPool[OFFSET_BOTTOM_SURFACE];
// Resolution
xPixels = xRes;
yPixels = yRes;
xOffs = xOffset;
yOffs = yOffset;
totalXPixels = totalXRes;
totalYPixels = totalYRes;
m_xPixels = xRes;
m_yPixels = yRes;
m_xOffset = xOffset;
m_yOffset = yOffset;
m_totalXPixels = totalXRes;
m_totalYPixels = totalYRes;
// Create textures
m_npot = glewIsSupported("GL_ARB_texture_non_power_of_two") != 0;
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
glGenTextures(2, texID);
glGenTextures(2, m_texID);
for (int i = 0; i < 2; i++)
{
glActiveTexture(GL_TEXTURE0); // texture unit 0
glBindTexture(GL_TEXTURE_2D, texID[i]);
glBindTexture(GL_TEXTURE_2D, m_texID[i]);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, 512, 512, 0, GL_RGBA, GL_UNSIGNED_BYTE, surfTop);
int width = m_npot ? 496 : 512;
int height = m_npot ? 384 : 512;
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, m_topSurface);
if (glGetError() != GL_NO_ERROR)
return ErrorLog("OpenGL was unable to provide 512x512-texel texture maps for tilemap layers.");
return ErrorLog("OpenGL was unable to provide %dx%d-texel texture maps for tilemap layers.", width, height);
}
DebugLog("Render2D initialized (allocated %1.1f MB)\n", memSizeMB);
DebugLog("Render2D initialized (allocated %1.1f MB)\n", float(MEMORY_POOL_SIZE) / 0x100000);
return OKAY;
}
CRender2D::CRender2D(void)
{
xPixels = 496;
yPixels = 384;
xOffs = 0;
yOffs = 0;
memoryPool = NULL;
vram = NULL;
surfTop = NULL;
surfBottom = NULL;
for (int i = 0; i < 4; i++)
lineBuffer[i] = NULL;
DebugLog("Built Render2D\n");
}
CRender2D::~CRender2D(void)
{
DestroyShaderProgram(shaderProgram,vertexShader,fragmentShader);
glDeleteTextures(2, texID);
DestroyShaderProgram(m_shaderProgram, m_vertexShader, m_fragmentShader);
glDeleteTextures(2, m_texID);
if (memoryPool != NULL)
if (m_memoryPool)
{
delete [] memoryPool;
memoryPool = NULL;
delete [] m_memoryPool;
m_memoryPool = 0;
}
vram = NULL;
surfTop = NULL;
surfBottom = NULL;
for (int i = 0; i < 4; i++)
lineBuffer[i] = NULL;
m_vram = 0;
m_topSurface = 0;
m_bottomSurface = 0;
DebugLog("Destroyed Render2D\n");
}

View file

@ -73,7 +73,7 @@ public:
* function does not.
* data The data to write.
*/
void WriteVRAM(unsigned addr, UINT32 data);
void WriteVRAM(unsigned addr, uint32_t data);
/*
* AttachRegisters(regPtr):
@ -85,7 +85,7 @@ public:
* regPtr Pointer to the base of the tile generator registers. There
* are assumed to be 64 in all.
*/
void AttachRegisters(const UINT32 *regPtr);
void AttachRegisters(const uint32_t *regPtr);
/*
* AttachPalette(palPtr):
@ -97,7 +97,7 @@ public:
* palPtr Pointer to two palettes. The first is for layers A/A' and
* the second is for B/B'.
*/
void AttachPalette(const UINT32 *palPtr[2]);
void AttachPalette(const uint32_t *palPtr[2]);
/*
* AttachVRAM(vramPtr):
@ -109,7 +109,7 @@ public:
* vramPtr Pointer to the base of the tile generator RAM (0x120000
* bytes). VRAM is assumed to be in little endian format.
*/
void AttachVRAM(const UINT8 *vramPtr);
void AttachVRAM(const uint8_t *vramPtr);
/*
* Init(xOffset, yOffset, xRes, yRes, totalXRes, totalYRes);
@ -144,35 +144,35 @@ public:
private:
// Private member functions
void DrawTileLine8BitNoClip(UINT32 *buf, UINT16 tile, int tileLine, const UINT32 *pal);
void DrawTileLine4BitNoClip(UINT32 *buf, UINT16 tile, int tileLine, const UINT32 *pal);
void DrawLine(UINT32 *dest, int layerNum, int y, const UINT16 *nameTableBase, const UINT32 *pal);
bool DrawTilemaps(UINT32 *destBottom, UINT32 *destTop);
bool DrawTilemaps(uint32_t *destBottom, uint32_t *destTop);
void DisplaySurface(int surface, GLfloat z);
void Setup2D(bool isBottom, bool clearAll);
// Data received from tile generator device object
const UINT32 *vram;
const UINT32 *pal[2]; // palettes for A/A' and B/B'
const UINT32 *regs;
const uint32_t *m_vram;
const uint32_t *m_palette[2]; // palettes for A/A' and B/B'
const uint32_t *m_regs;
// OpenGL data
GLuint texID[2]; // IDs for the 2 layer textures (top and bottom)
unsigned xPixels, yPixels; // display surface resolution
unsigned xOffs, yOffs; // offset
unsigned totalXPixels, totalYPixels; // total display surface resolution
bool m_npot = false; // NPOT texture support
GLuint m_texID[2]; // IDs for the 2 layer textures (top and bottom)
unsigned m_xPixels = 496; // display surface resolution
unsigned m_yPixels = 384; // ...
unsigned m_xOffset = 0; // offset
unsigned m_yOffset = 0;
unsigned m_totalXPixels; // total display surface resolution
unsigned m_totalYPixels;
// Shader programs and input data locations
GLuint shaderProgram; // shader program object
GLuint vertexShader; // vertex shader handle
GLuint fragmentShader; // fragment shader
GLuint textureMapLoc; // location of "textureMap" uniform
GLuint m_shaderProgram; // shader program object
GLuint m_vertexShader; // vertex shader handle
GLuint m_fragmentShader; // fragment shader
GLuint m_textureMapLoc; // location of "textureMap" uniform
// Buffers
UINT8 *memoryPool; // all memory is allocated here
UINT32 *surfTop; // 512x384x32bpp pixel surface for top layers
UINT32 *surfBottom; // bottom layers
UINT32 *lineBuffer[4]; // 512 32bpp pixel line buffers for layer composition
uint8_t *m_memoryPool = 0; // all memory is allocated here
uint32_t *m_topSurface = 0; // 512x384x32bpp pixel surface for top layers
uint32_t *m_bottomSurface = 0; // bottom layers
};

View file

@ -29,7 +29,7 @@
#define INCLUDED_SHADERS2D_H
// Vertex shader
static const char vertexShaderSource[] =
static const char s_vertexShaderSource[] =
{
"/**\n"
" ** Supermodel\n"
@ -68,7 +68,7 @@ static const char vertexShaderSource[] =
};
// Fragment shader
static const char fragmentShaderSource[] =
static const char s_fragmentShaderSource[] =
{
"/**\n"
" ** Supermodel\n"