Emulate the entire tilegen chip in a GLSL shader. (This is now possible with opengl 3+). The tilegen drawing was emulated on the CPU, but was one of the most expensive functions in the emulator according to a profiler. On a modern GPU it's pretty much free, because a GPU is a massive SIMD monster.

Tilegen shaders are mapped to uniforms, and the vram and palette are mapped to two textures.

TODO rip out the redundant code in the tilegen class. We don't need to pre-calculate palettes anymore. etc

The tilegen code supports has a start/end line so we can emulate as many lines as we want in a chunk, which will come in later as some games update the tilegen immediately after the ping_pong bit has flipped ~ 66% of the frame.

The scud rolling start tilegen bug is probably actually a bug in the original h/w implementation, that ends up looking correct on original h/w but not for us. Need hardware testing to confirm what it's actually doing.
This commit is contained in:
Ian Curtis 2023-09-23 15:27:04 +01:00
parent 015e8e9212
commit c6ea81d996
7 changed files with 1008 additions and 765 deletions

73
Src/Graphics/FBO.cpp Normal file
View file

@ -0,0 +1,73 @@
#include "FBO.h"
FBO::FBO() :
m_frameBufferID(0),
m_textureID(0)
{
}
bool FBO::Create(int width, int height)
{
CreateTexture(width, height);
glGenFramebuffers(1, &m_frameBufferID);
glBindFramebuffer(GL_FRAMEBUFFER, m_frameBufferID);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_textureID, 0);
auto frameBufferStatus = glCheckFramebufferStatus(GL_FRAMEBUFFER);
glBindFramebuffer(GL_FRAMEBUFFER, 0); //created FBO now disable it
return frameBufferStatus == GL_FRAMEBUFFER_COMPLETE;
}
void FBO::Destroy()
{
if (m_frameBufferID) {
glBindFramebuffer(GL_FRAMEBUFFER, 0);
glDeleteFramebuffers(1, &m_frameBufferID);
}
if (m_textureID) {
glDeleteTextures(1, &m_textureID);
}
m_frameBufferID = 0;
m_textureID = 0;
}
void FBO::BindTexture()
{
glBindTexture(GL_TEXTURE_2D, m_textureID);
}
void FBO::Set()
{
glBindFramebuffer(GL_FRAMEBUFFER, m_frameBufferID);
}
void FBO::Disable()
{
glBindFramebuffer(GL_FRAMEBUFFER, 0);
}
GLuint FBO::GetFBOID()
{
return m_frameBufferID;
}
GLuint FBO::GetTextureID()
{
return m_textureID;
}
void FBO::CreateTexture(int width, int height)
{
glGenTextures (1, &m_textureID);
glBindTexture (GL_TEXTURE_2D, m_textureID);
glTexParameteri (GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri (GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri (GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri (GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexImage2D (GL_TEXTURE_2D, 0, GL_RGBA8, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, 0);
}

28
Src/Graphics/FBO.h Normal file
View file

@ -0,0 +1,28 @@
#ifndef _FBO_H_
#define _FBO_H_
#include <GL/glew.h>
class FBO
{
public:
FBO();
bool Create(int width, int height);
void Destroy();
void BindTexture();
void Set();
void Disable();
GLuint GetFBOID();
GLuint GetTextureID();
private:
void CreateTexture(int width, int height);
GLuint m_frameBufferID;
GLuint m_textureID;
};
#endif

View file

@ -54,36 +54,47 @@
* *
* Registers * Registers
* --------- * ---------
*
* Registers are listed by their byte offset in the PowerPC address space. Each 0xF1180020: -------- -------- -------- -------- ?
* is 32 bits wide and little endian. Only those registers relevant to -------- -------- x------- -------- Layer 3 bitdepth (0 = 8-bit, 1 = 4-bit)
* rendering are listed here (see CTileGen for others). -------- -------- -x------ -------- Layer 2 bitdepth (0 = 8-bit, 1 = 4-bit)
* -------- -------- --x----- -------- Layer 1 bitdepth (0 = 8-bit, 1 = 4-bit)
* Offset: Description: -------- -------- ---x---- -------- Layer 0 bitdepth (0 = 8-bit, 1 = 4-bit)
* -------- -------- ----x--- -------- Layer 3 priority (0 = below 3D, 1 = above 3D)
* 0x20 Layer configuration -------- -------- -----x-- -------- Layer 2 priority (0 = below 3D, 1 = above 3D)
* 0x40 Layer A/A' color offset -------- -------- ------x- -------- Layer 1 priority (0 = below 3D, 1 = above 3D)
* 0x44 Layer B/B' color offset -------- -------- -------x -------- Layer 0 priority (0 = below 3D, 1 = above 3D)
* 0x60 Layer A scroll
* 0x64 Layer A' scroll 0xF1180040: Foreground layer color modulation
* 0x68 Layer B scroll -------- xxxxxxxx -------- -------- Red component
* 0x6C Layer B' scroll -------- -------- xxxxxxxx -------- Green component
* -------- -------- -------- xxxxxxxx Blue component
* Layer configuration is formatted as:
* 0xF1180044: Background layer color modulation
* 31 0 -------- xxxxxxxx -------- -------- Red component
* ???? ???? ???? ???? pqrs tuvw ???? ???? -------- -------- xxxxxxxx -------- Green component
* -------- -------- -------- xxxxxxxx Blue component
* Bits 'pqrs' control the color depth of layers B', B, A', and A,
* respectively. If set, the layer's pattern data is encoded as 4 bits, 0xF1180060: x------- -------- -------- -------- Layer 0 enable
* otherwise the pixels are 8 bits. -------x xxxxxxxx -------- -------- Layer 0 Y scroll position
* -------- -------- x------- -------- Layer 0 X line scroll enable
* Bits 'tuvw' control priority for layers B', B, A', and A, respectively, -------- -------- -------x xxxxxxxx Layer 0 X scroll position
* which is also the relative ordering of the layers from bottom to top. For
* each layer, if its bit is clear, it will be drawn below the 3D layer, 0xF1180064: x------- -------- -------- -------- Layer 1 enable
* otherwise it is drawn on top. -------x xxxxxxxx -------- -------- Layer 1 Y scroll position
* -------- -------- x------- -------- Layer 1 X line scroll enable
* The remaining registers are described where appropriate further below. -------- -------- -------x xxxxxxxx Layer 1 X scroll position
0xF1180068: x------- -------- -------- -------- Layer 2 enable
-------x xxxxxxxx -------- -------- Layer 2 Y scroll position
-------- -------- x------- -------- Layer 2 X line scroll enable
-------- -------- -------x xxxxxxxx Layer 2 X scroll position
0xF118006C: x------- -------- -------- -------- Layer 3 enable
-------x xxxxxxxx -------- -------- Layer 3 Y scroll position
-------- -------- x------- -------- Layer 3 X line scroll enable
-------- -------- -------x xxxxxxxx Layer 3 X scroll position
* *
* VRAM Memory Map * VRAM Memory Map
* --------------- * ---------------
@ -284,238 +295,14 @@
#include <GL/glew.h> #include <GL/glew.h>
/******************************************************************************
Definitions and Constants
******************************************************************************/
// Shader program files (for use in development builds only)
#define VERTEX_2D_SHADER_FILE "Src/Graphics/Vertex2D.glsl"
#define FRAGMENT_2D_SHADER_FILE "Src/Graphics/Fragment2D.glsl"
/******************************************************************************
Layer Rendering
This code is quite slow and badly needs to be optimized. Dirty rectangles
should be implemented first and tile pre-decoding second.
******************************************************************************/
template <int bits, bool alphaTest, bool clip>
static inline void DrawTileLine(uint32_t *line, int pixelOffset, uint16_t tile, int patternLine, const uint32_t *vram, const uint32_t *palette, uint16_t mask)
{
static_assert(bits == 4 || bits == 8, "Tiles are either 4- or 8-bit");
// For 8-bit pixels, each line of tile pattern is two words
if (bits == 8)
patternLine *= 2;
// Compute offset of pattern for this line
int patternOffset;
if (bits == 4)
{
patternOffset = ((tile & 0x3FFF) << 1) | ((tile >> 15) & 1);
patternOffset *= 32;
patternOffset /= 4;
}
else
{
patternOffset = tile & 0x3FFF;
patternOffset *= 64;
patternOffset /= 4;
}
// Name table entry provides high color bits
uint32_t colorHi = tile & ((bits == 4) ? 0x7FF0 : 0x7F00);
// Draw
if (bits == 4)
{
uint32_t pattern = vram[patternOffset + patternLine];
for (int p = 7; p >= 0; p--)
{
if (!clip || (/*pixelOffset >= 0 &&*/ (unsigned int)pixelOffset < 496u)) // the >= 0 check is accounted for, as the cast to uint makes them appear as very large unsigned values
{
uint16_t maskTest = 1 << (15-((pixelOffset+0)/32));
bool visible = (mask & maskTest) != 0;
uint32_t pixel = visible ? palette[((pattern >> (p*4)) & 0xF) | colorHi] : 0;
if (!alphaTest || (visible && (pixel >> 24) != 0)) // only draw opaque pixels
line[pixelOffset] = pixel;
}
++pixelOffset;
}
}
else
{
for (int i = 0; i < 2; i++) // 4 pixels per word
{
uint32_t pattern = vram[patternOffset + patternLine + i];
for (int p = 3; p >= 0; p--)
{
if (!clip || (/*pixelOffset >= 0 &&*/ (unsigned int)pixelOffset < 496u)) // the >= 0 check is accounted for, as the cast to uint makes them appear as very large unsigned values
{
uint16_t maskTest = 1 << (15-((pixelOffset+0)/32));
bool visible = (mask & maskTest) != 0;
uint32_t pixel = visible ? palette[((pattern >> (p*8)) & 0xFF) | colorHi] : 0;
if (!alphaTest || (visible && (pixel >> 24) != 0))
line[pixelOffset] = pixel;
}
++pixelOffset;
}
}
}
}
template <int bits, bool alphaTest>
static void DrawLayer(uint32_t *pixels, int layerNum, const uint32_t *vram, const uint32_t *regs, const uint32_t *palette)
{
const uint16_t *nameTableBase = (const uint16_t *) &vram[(0xF8000 + layerNum * 0x2000) / 4];
const uint16_t *hScrollTable = (const uint16_t *) &vram[(0xF6000 + layerNum * 0x400) / 4];
bool lineScrollMode = (regs[0x60/4 + layerNum] & 0x8000) != 0;
int hFullScroll = regs[0x60/4 + layerNum] & 0x3FF;
int vScroll = (regs[0x60/4 + layerNum] >> 16) & 0x1FF;
const uint16_t *maskTable = (const uint16_t *) &vram[0xF7000 / 4];
if (layerNum < 2) // little endian: layers A and A' use second word in each pair
maskTable += 1;
// If mask bit is clear, alternate layer is shown. We want to test for non-
// zero, so we flip the mask when drawing alternate layers (layers 1 and 3).
const uint16_t maskPolarity = (layerNum & 1) ? 0xFFFF : 0x0000;
uint32_t *line = pixels;
for (int y = 0; y < 384; y++)
{
int hScroll = (lineScrollMode ? hScrollTable[y] : hFullScroll) & 0x1FF;
int hTile = hScroll / 8;
int hFine = hScroll & 7; // horizontal pixel offset within tile line
int vFine = (y + vScroll) & 7; // vertical pixel offset within 8x8 tile
const uint16_t *nameTable = &nameTableBase[(64 * ((y + vScroll) / 8)) & 0xFFF]; // clamp to 64x64 = 0x1000
uint16_t mask = *maskTable ^ maskPolarity; // each bit covers 32 pixels
int pixelOffset = -hFine;
int extraTile = (hFine != 0) ? 1 : 0; // h-scrolling requires part of 63rd tile
// First tile may be clipped
DrawTileLine<bits, alphaTest, true>(line, pixelOffset, nameTable[(hTile ^ 1) & 63], vFine, vram, palette, mask);
++hTile;
pixelOffset += 8;
// Middle tiles will not be clipped
for (int tx = 1; tx < (62 - 1 + extraTile); tx++)
{
DrawTileLine<bits, alphaTest, false>(line, pixelOffset, nameTable[(hTile ^ 1) & 63], vFine, vram, palette, mask);
++hTile;
pixelOffset += 8;
}
// Last tile may be clipped
DrawTileLine<bits, alphaTest, true>(line, pixelOffset, nameTable[(hTile ^ 1) & 63], vFine, vram, palette, mask);
++hTile;
pixelOffset += 8;
// Advance one line
maskTable += 2;
line += 496;
}
}
std::pair<bool, bool> CRender2D::DrawTilemaps(uint32_t *pixelsBottom, uint32_t *pixelsTop)
{
unsigned priority = (m_regs[0x20/4] >> 8) & 0xF;
// Render bottom layers
bool noBottomSurface = true;
static const int bottomOrder[4] = { 3, 2, 1, 0 };
for (int i = 0; i < 4; i++)
{
int layerNum = bottomOrder[i];
bool is4Bit = (m_regs[0x20/4] & (1 << (12 + layerNum))) != 0;
bool enabled = (m_regs[0x60/4 + layerNum] & 0x80000000) != 0;
bool selected = (priority & (1 << layerNum)) == 0;
if (enabled && selected)
{
if (noBottomSurface)
{
if (is4Bit)
DrawLayer<4, false>(pixelsBottom, layerNum, m_vram, m_regs, m_palette[layerNum / 2]);
else
DrawLayer<8, false>(pixelsBottom, layerNum, m_vram, m_regs, m_palette[layerNum / 2]);
}
else
{
if (is4Bit)
DrawLayer<4, true>(pixelsBottom, layerNum, m_vram, m_regs, m_palette[layerNum / 2]);
else
DrawLayer<8, true>(pixelsBottom, layerNum, m_vram, m_regs, m_palette[layerNum / 2]);
}
noBottomSurface = false;
}
}
// Render top layers
// NOTE: layer ordering is different according to MAME (which has 3, 2, 0, 1
// for top layer). Until I see evidence that this is correct and not a typo,
// I will assume consistent layer ordering.
bool noTopSurface = true;
static const int topOrder[4] = { 3, 2, 1, 0 };
for (int i = 0; i < 4; i++)
{
int layerNum = topOrder[i];
bool is4Bit = (m_regs[0x20/4] & (1 << (12 + layerNum))) != 0;
bool enabled = (m_regs[0x60/4 + layerNum] & 0x80000000) != 0;
bool selected = (priority & (1 << layerNum)) != 0;
if (enabled && selected)
{
if (noTopSurface)
{
if (is4Bit)
DrawLayer<4, false>(pixelsTop, layerNum, m_vram, m_regs, m_palette[layerNum / 2]);
else
DrawLayer<8, false>(pixelsTop, layerNum, m_vram, m_regs, m_palette[layerNum / 2]);
}
else
{
if (is4Bit)
DrawLayer<4, true>(pixelsTop, layerNum, m_vram, m_regs, m_palette[layerNum / 2]);
else
DrawLayer<8, true>(pixelsTop, layerNum, m_vram, m_regs, m_palette[layerNum / 2]);
}
noTopSurface = false;
}
}
// Indicate whether top and bottom surfaces have to be rendered
return std::pair<bool, bool>(!noTopSurface, !noBottomSurface);
}
/****************************************************************************** /******************************************************************************
Frame Display Functions Frame Display Functions
******************************************************************************/ ******************************************************************************/
// Draws a surface to the screen (0 is top and 1 is bottom)
void CRender2D::DisplaySurface(int surface)
{
// Shader program
m_shader.EnableShader();
glBindVertexArray(m_vao);
// Draw the surface
glActiveTexture(GL_TEXTURE0); // texture unit 0
glBindTexture(GL_TEXTURE_2D, m_texID[surface]);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
glBindVertexArray(0);
m_shader.DisableShader();
}
// Set up viewport and OpenGL state for 2D rendering (sets up blending function but disables blending) // Set up viewport and OpenGL state for 2D rendering (sets up blending function but disables blending)
void CRender2D::Setup2D(bool isBottom) void CRender2D::Setup2D(bool isBottom)
{ {
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); // alpha of 1.0 is opaque, 0 is transparent glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); // alpha of 1.0 is opaque, 0 is transparent
glDisable(GL_BLEND);
// Disable Z-buffering // Disable Z-buffering
glDisable(GL_DEPTH_TEST); glDisable(GL_DEPTH_TEST);
@ -538,44 +325,111 @@ void CRender2D::Setup2D(bool isBottom)
} }
} }
void CRender2D::DrawSurface(GLuint textureID)
{
m_shader.EnableShader();
glEnable (GL_BLEND);
glBindVertexArray (m_vao);
glActiveTexture (GL_TEXTURE0); // texture unit 0
glBindTexture (GL_TEXTURE_2D, textureID);
glDrawArrays (GL_TRIANGLE_STRIP, 0, 4);
glBindVertexArray (0);
glDisable (GL_BLEND);
m_shader.DisableShader();
}
float CRender2D::LineToPercentStart(int lineNumber)
{
return lineNumber / 384.0f;
}
float CRender2D::LineToPercentEnd(int lineNumber)
{
return (lineNumber + 1) / 384.0f;
}
void CRender2D::BeginFrame(void) void CRender2D::BeginFrame(void)
{ {
} }
void CRender2D::PreRenderFrame(void) void CRender2D::PreRenderFrame(void)
{ {
// Update all layers glDisable(GL_SCISSOR_TEST);
m_surfaces_present = DrawTilemaps(m_bottomSurface, m_topSurface); glViewport(0, 0, 496, 384);
m_shaderTileGen.EnableShader();
glActiveTexture(GL_TEXTURE0); // texture unit 0 glActiveTexture(GL_TEXTURE0); // texture unit 0
if (m_surfaces_present.first) glBindTexture(GL_TEXTURE_2D, m_vramTexID);
{ glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 512, 512, GL_RED_INTEGER, GL_UNSIGNED_INT, m_vram);
glBindTexture(GL_TEXTURE_2D, m_texID[0]); glActiveTexture(GL_TEXTURE1); // texture unit 1
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 496, 384, GL_RGBA, GL_UNSIGNED_BYTE, m_topSurface); glBindTexture(GL_TEXTURE_2D, m_paletteTexID);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 128, 256, GL_RED_INTEGER, GL_UNSIGNED_INT, m_vram + 0x40000);
glActiveTexture(GL_TEXTURE0); // texture unit 1
glUniform1uiv(m_shaderTileGen.uniformLocMap["regs"], 32, m_regs);
glBindVertexArray(m_vao);
m_fboBottom.Set();
glClearColor(0, 0, 0, 0);
glClear(GL_COLOR_BUFFER_BIT);
glEnable(GL_BLEND);
// render bottom layer
for (int i = 4; i-- > 0;) {
if (!IsEnabled(i)) {
continue;
} }
if (m_surfaces_present.second)
{ if (Above3D(i)) {
glBindTexture(GL_TEXTURE_2D, m_texID[1]); continue;
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 496, 384, GL_RGBA, GL_UNSIGNED_BYTE, m_bottomSurface);
} }
glUniform1i(m_shaderTileGen.uniformLocMap["layerNumber"], i);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
}
m_fboTop.Set();
glClear(GL_COLOR_BUFFER_BIT);
// render top layer
for (int i = 4; i-- > 0;) {
if (!IsEnabled(i)) {
continue;
}
if (!Above3D(i)) {
continue;
}
glUniform1i(m_shaderTileGen.uniformLocMap["layerNumber"], i);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
}
glBindVertexArray(0);
m_shaderTileGen.DisableShader();
m_fboBottom.Disable();
glDisable(GL_BLEND);
} }
void CRender2D::RenderFrameBottom(void) void CRender2D::RenderFrameBottom(void)
{ {
// Display bottom surface if anything was drawn there, else clear everything
Setup2D(true); Setup2D(true);
if (m_surfaces_present.second) DrawSurface(m_fboBottom.GetTextureID());
DisplaySurface(1);
} }
void CRender2D::RenderFrameTop(void) void CRender2D::RenderFrameTop(void)
{
// Display top surface only if it exists
if (m_surfaces_present.first)
{ {
Setup2D(false); Setup2D(false);
glEnable(GL_BLEND); DrawSurface(m_fboTop.GetTextureID());
DisplaySurface(0);
}
} }
void CRender2D::EndFrame(void) void CRender2D::EndFrame(void)
@ -616,23 +470,8 @@ void CRender2D::AttachVRAM(const uint8_t *vramPtr)
DebugLog("Render2D attached VRAM\n"); DebugLog("Render2D attached VRAM\n");
} }
// Memory pool and offsets within it
#define MEMORY_POOL_SIZE (2*512*384*4)
#define OFFSET_TOP_SURFACE 0 // 512*384*4 bytes
#define OFFSET_BOTTOM_SURFACE (512*384*4) // 512*384*4
bool CRender2D::Init(unsigned xOffset, unsigned yOffset, unsigned xRes, unsigned yRes, unsigned totalXRes, unsigned totalYRes) bool CRender2D::Init(unsigned xOffset, unsigned yOffset, unsigned xRes, unsigned yRes, unsigned totalXRes, unsigned totalYRes)
{ {
// Allocate memory for layer surfaces
m_memoryPool = new(std::nothrow) uint8_t[MEMORY_POOL_SIZE];
if (NULL == m_memoryPool)
return ErrorLog("Insufficient memory for tilemap surfaces (need %1.1f MB).", float(MEMORY_POOL_SIZE) / 0x100000);
memset(m_memoryPool, 0, MEMORY_POOL_SIZE); // clear textures
// Set up pointers to memory regions
m_topSurface = (uint32_t *) &m_memoryPool[OFFSET_TOP_SURFACE];
m_bottomSurface = (uint32_t *) &m_memoryPool[OFFSET_BOTTOM_SURFACE];
// Resolution // Resolution
m_xPixels = xRes; m_xPixels = xRes;
m_yPixels = yRes; m_yPixels = yRes;
@ -642,13 +481,15 @@ bool CRender2D::Init(unsigned xOffset, unsigned yOffset, unsigned xRes, unsigned
m_totalYPixels = totalYRes; m_totalYPixels = totalYRes;
m_correction = (UINT32)(((yRes / 384.f) * 2) + 0.5f); // for some reason the 2d layer is 2 pixels off the 3D m_correction = (UINT32)(((yRes / 384.f) * 2) + 0.5f); // for some reason the 2d layer is 2 pixels off the 3D
DebugLog("Render2D initialized (allocated %1.1f MB)\n", float(MEMORY_POOL_SIZE) / 0x100000);
return OKAY; return OKAY;
} }
CRender2D::CRender2D(const Util::Config::Node& config) CRender2D::CRender2D(const Util::Config::Node& config)
: m_config(config), : m_config(config),
m_vao(0) m_vao(0),
m_vram(nullptr),
m_palette{nullptr},
m_regs(nullptr)
{ {
DebugLog("Built Render2D\n"); DebugLog("Built Render2D\n");
@ -657,49 +498,88 @@ CRender2D::CRender2D(const Util::Config::Node& config)
m_shader.EnableShader(); m_shader.EnableShader();
// update uniform memory // update uniform memory
glUniform1i(m_shader.uniformLocMap["tex1"], 0); // bind to texture unit zero glUniform1i(m_shader.uniformLocMap["tex1"], 0); // texture unit zero
m_shader.DisableShader(); m_shader.DisableShader();
// Create textures m_shaderTileGen.LoadShaders(s_vertexShaderTileGen, s_fragmentShaderTileGen);
glActiveTexture(GL_TEXTURE0); // texture unit 0 m_shaderTileGen.GetUniformLocationMap("vram");
glGenTextures(2, m_texID); m_shaderTileGen.GetUniformLocationMap("palette");
m_shaderTileGen.GetUniformLocationMap("regs");
m_shaderTileGen.GetUniformLocationMap("layerNumber");
m_shaderTileGen.GetUniformLocationMap("lineStart");
m_shaderTileGen.GetUniformLocationMap("lineEnd");
for (int i = 0; i < 2; i++) m_shaderTileGen.EnableShader();
{
glBindTexture(GL_TEXTURE_2D, m_texID[i]); glUniform1i(m_shaderTileGen.uniformLocMap["vram"], 0); // texture unit 0
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glUniform1i(m_shaderTileGen.uniformLocMap["palette"], 1); // texture unit 1
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); glUniform1f(m_shaderTileGen.uniformLocMap["lineStart"], LineToPercentStart(0));
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glUniform1f(m_shaderTileGen.uniformLocMap["lineEnd"], LineToPercentEnd(383));
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, 496, 384, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); m_shaderTileGen.DisableShader();
}
glGenVertexArrays(1, &m_vao); glGenVertexArrays(1, &m_vao);
glBindVertexArray(m_vao); glBindVertexArray(m_vao);
// no states needed since we do it in the shader // no states needed since we do it in the shader
glBindVertexArray(0); glBindVertexArray(0);
glGenTextures(1, &m_vramTexID);
glBindTexture(GL_TEXTURE_2D, m_vramTexID);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexImage2D(GL_TEXTURE_2D, 0, GL_R32UI, 512, 512, 0, GL_RED_INTEGER, GL_UNSIGNED_INT, nullptr);
glGenTextures(1, &m_paletteTexID);
glBindTexture(GL_TEXTURE_2D, m_paletteTexID);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexImage2D(GL_TEXTURE_2D, 0, GL_R32UI, 128, 256, 0, GL_RED_INTEGER, GL_UNSIGNED_INT, nullptr);
glBindTexture(GL_TEXTURE_2D, 0);
m_fboBottom.Create(496, 384);
m_fboTop.Create(496, 384);
} }
CRender2D::~CRender2D(void) CRender2D::~CRender2D(void)
{ {
m_shader.UnloadShaders(); m_shader.UnloadShaders();
glDeleteTextures(2, m_texID); m_shaderTileGen.UnloadShaders();
if (m_vramTexID) {
glDeleteTextures(1, &m_vramTexID);
m_vramTexID = 0;
}
if (m_paletteTexID) {
glDeleteTextures(1, &m_paletteTexID);
m_paletteTexID = 0;
}
if (m_vao) { if (m_vao) {
glDeleteVertexArrays(1, &m_vao); glDeleteVertexArrays(1, &m_vao);
m_vao = 0; m_vao = 0;
} }
if (m_memoryPool) m_fboBottom.Destroy();
{ m_fboTop.Destroy();
delete [] m_memoryPool;
m_memoryPool = 0;
}
m_vram = 0; m_vram = nullptr;
m_topSurface = 0;
m_bottomSurface = 0;
DebugLog("Destroyed Render2D\n"); DebugLog("Destroyed Render2D\n");
} }
bool CRender2D::IsEnabled(int layerNumber)
{
return (m_regs[0x60 / 4 + layerNumber] & 0x80000000) > 0;
}
bool CRender2D::Above3D(int layerNumber)
{
return (m_regs[0x20 / 4] >> (8 + layerNumber)) & 0x1;
}

View file

@ -31,7 +31,7 @@
#include <GL/glew.h> #include <GL/glew.h>
#include "Util/NewConfig.h" #include "Util/NewConfig.h"
#include "New3D/GLSLShader.h" #include "New3D/GLSLShader.h"
#include "FBO.h"
/* /*
* CRender2D: * CRender2D:
@ -173,10 +173,14 @@ public:
~CRender2D(void); ~CRender2D(void);
private: private:
// Private member functions
std::pair<bool, bool> DrawTilemaps(uint32_t *destBottom, uint32_t *destTop); bool IsEnabled (int layerNumber);
void DisplaySurface(int surface); bool Above3D (int layerNumber);
void Setup2D (bool isBottom); void Setup2D (bool isBottom);
void DrawSurface (GLuint textureID);
float LineToPercentStart (int lineNumber); // vertical line numbers are from 0-383
float LineToPercentEnd (int lineNumber); // vertical line numbers are from 0-383
// Run-time configuration // Run-time configuration
const Util::Config::Node& m_config; const Util::Config::Node& m_config;
@ -187,25 +191,24 @@ private:
const uint32_t* m_regs; const uint32_t* m_regs;
// OpenGL data // OpenGL data
GLuint m_texID[2]; // IDs for the 2 layer textures (top and bottom)
unsigned m_xPixels = 496; // display surface resolution unsigned m_xPixels = 496; // display surface resolution
unsigned m_yPixels = 384; // ... unsigned m_yPixels = 384; // ...
unsigned m_xOffset = 0; // offset unsigned m_xOffset = 0; // offset
unsigned m_yOffset = 0; unsigned m_yOffset = 0;
unsigned m_totalXPixels; // total display surface resolution unsigned m_totalXPixels = 0; // total display surface resolution
unsigned m_totalYPixels; unsigned m_totalYPixels = 0;
unsigned m_correction = 0; unsigned m_correction = 0;
GLuint m_vao; GLuint m_vao;
GLSLShader m_shader; GLSLShader m_shader;
GLSLShader m_shaderTileGen;
// PreRenderFrame() tracks which surfaces exist in current frame GLuint m_vramTexID = 0;
std::pair<bool, bool> m_surfaces_present = std::pair<bool, bool>(false, false); GLuint m_paletteTexID = 0;
FBO m_fboBottom;
FBO m_fboTop;
// Buffers
uint8_t *m_memoryPool = 0; // all memory is allocated here
uint32_t *m_topSurface = 0; // 512x384x32bpp pixel surface for top layers
uint32_t *m_bottomSurface = 0; // bottom layers
}; };

View file

@ -71,4 +71,255 @@ static const char s_fragmentShaderSource[] = R"glsl(
)glsl"; )glsl";
// Vertex shader
static const char s_vertexShaderTileGen[] = R"glsl(
#version 410 core
uniform float lineStart; // defined as a % of the viewport height in the range 0-1. So 0 is top line, 0.5 is line 192 etc
uniform float lineEnd;
void main(void)
{
const float v1 = -1.0;
const float v2 = 1.0;
vec4 vertices[] = vec4[]( vec4(-1.0, v1, 0.0, 1.0),
vec4(-1.0, v2, 0.0, 1.0),
vec4( 1.0, v1, 0.0, 1.0),
vec4( 1.0, v2, 0.0, 1.0));
float top = ((v2 - v1) * lineStart) + v1;
float bottom = ((v2 - v1) * lineEnd ) + v1;
vertices[0].y = top;
vertices[2].y = top;
vertices[1].y = bottom;
vertices[3].y = bottom;
gl_Position = vertices[gl_VertexID % 4];
}
)glsl";
// Fragment shader
static const char s_fragmentShaderTileGen[] = R"glsl(
#version 410 core
//layout(origin_upper_left) in vec4 gl_FragCoord;
// inputs
uniform usampler2D vram; // texture 512x512
uniform usampler2D palette; // texture 128x256 - actual dimensions dont matter too much but we have to stay in the limits of max tex width/height, so can't have 1 giant 1d array
uniform uint regs[32];
uniform int layerNumber;
// outputs
out vec4 fragColor;
ivec2 GetVRamCoords(int offset)
{
return ivec2(offset % 512, offset / 512);
}
ivec2 GetPaletteCoords(int offset)
{
return ivec2(offset % 128, offset / 128);
}
uint GetLineMask(int layerNum, int yCoord)
{
uint shift = (layerNum<2) ? 16u : 0u; // need to check this, we could be endian swapped so could be wrong
uint maskPolarity = ((layerNum & 1) > 0) ? 0xFFFFu : 0x0000u;
int index = (0xF7000 / 4) + yCoord;
ivec2 coords = GetVRamCoords(index);
uint mask = ((texelFetch(vram,coords,0).r >> shift) & 0xFFFFu) ^ maskPolarity;
return mask;
}
bool GetPixelMask(int layerNum, int xCoord, int yCoord)
{
uint lineMask = GetLineMask(layerNum, yCoord);
uint maskTest = 1 << (15-(xCoord/32));
return (lineMask & maskTest) != 0;
}
int GetLineScrollValue(int layerNum, int yCoord)
{
int index = ((0xF6000 + (layerNum * 0x400)) / 4) + (yCoord / 2);
int shift = (yCoord % 2) * 16; // double check this
ivec2 coords = GetVRamCoords(index);
return int((texelFetch(vram,coords,0).r >> shift) & 0xFFFFu);
}
int GetTileNumber(int xCoord, int yCoord, int xScroll, int yScroll)
{
int xIndex = ((xCoord + xScroll) / 8) & 0x3F;
int yIndex = ((yCoord + yScroll) / 8) & 0x3F;
return (yIndex*64) + xIndex;
}
int GetTileData(int layerNum, int tileNumber)
{
int addressBase = (0xF8000 + (layerNum * 0x2000)) / 4;
int offset = tileNumber / 2; // two tiles per 32bit word
int shift = (1 - (tileNumber % 2)) * 16; // triple check this
ivec2 coords = GetVRamCoords(addressBase+offset);
uint data = (texelFetch(vram,coords,0).r >> shift) & 0xFFFFu;
return int(data);
}
int GetVFine(int yCoord, int yScroll)
{
return (yCoord + yScroll) & 7;
}
int GetHFine(int xCoord, int xScroll)
{
return (xCoord + xScroll) & 7;
}
// register data
bool LineScrollMode (int layerNum) { return (regs[0x60/4 + layerNum] & 0x8000) != 0; }
int GetHorizontalScroll(int layerNum) { return int(regs[0x60 / 4 + layerNum] &0x3FFu); }
int GetVerticalScroll (int layerNum) { return int((regs[0x60/4 + layerNum] >> 16) & 0x1FFu); }
int LayerPriority () { return int((regs[0x20/4] >> 8) & 0xFu); }
bool LayerIs4Bit (int layerNum) { return (regs[0x20/4] & (1 << (12 + layerNum))) != 0; }
bool LayerEnabled (int layerNum) { return (regs[0x60/4 + layerNum] & 0x80000000) != 0; }
bool LayerSelected (int layerNum) { return (LayerPriority() & (1 << layerNum)) == 0; }
float Int8ToFloat(uint c)
{
if((c & 0x80u) > 0u) { // this is a bit harder in GLSL. Top bit means negative number, we extend to make 32bit
return float(int(c | 0xFFFFFF00u)) / 128.0;
}
else {
return float(c) / 127.0;
}
}
vec4 AddColourOffset(int layerNum, vec4 colour)
{
uint offsetReg = regs[(0x40/4) + layerNum/2];
vec4 c;
c.b = Int8ToFloat((offsetReg >>16) & 0xFFu);
c.g = Int8ToFloat((offsetReg >> 8) & 0xFFu);
c.r = Int8ToFloat((offsetReg >> 0) & 0xFFu);
c.a = 0.0;
colour += c;
return clamp(colour,0.0,1.0); // clamp is probably not needed since will get clamped on render target
}
vec4 Int16ColourToVec4(uint colour)
{
uint alpha = (colour>>15); // top bit is alpha. 1 means clear, 0 opaque
alpha = ~alpha; // invert
alpha = alpha & 0x1u; // mask bit
vec4 c;
c.r = float((colour >> 0 ) & 0x1F) / 31.0;
c.g = float((colour >> 5 ) & 0x1F) / 31.0;
c.b = float((colour >> 10) & 0x1F) / 31.0;
c.a = float(alpha) / 1.0;
c.rgb *= c.a; // multiply by alpha value, this will push transparent to black, no branch needed
return c;
}
vec4 GetColour(int layerNum, int paletteOffset)
{
ivec2 coords = GetPaletteCoords(paletteOffset);
uint colour = texelFetch(palette,coords,0).r;
vec4 col = Int16ColourToVec4(colour); // each colour is only 16bits, but occupies 32bits
return AddColourOffset(layerNum,col); // apply colour offsets from registers
}
vec4 Draw4Bit(int layerNum, int tileData, int hFine, int vFine)
{
// Tile pattern offset: each tile occupies 32 bytes when using 4-bit pixels (offset of tile pattern within VRAM)
int patternOffset = ((tileData & 0x3FFF) << 1) | ((tileData >> 15) & 1);
patternOffset *= 32;
patternOffset /= 4;
// Upper color bits; the lower 4 bits come from the tile pattern
int paletteIndex = tileData & 0x7FF0;
ivec2 coords = GetVRamCoords(patternOffset+vFine);
uint pattern = texelFetch(vram,coords,0).r;
pattern = (pattern >> ((7-hFine)*4)) & 0xFu; // get the pattern for our horizontal value
return GetColour(layerNum, paletteIndex | int(pattern));
}
vec4 Draw8Bit(int layerNum, int tileData, int hFine, int vFine)
{
// Tile pattern offset: each tile occupies 64 bytes when using 8-bit pixels
int patternOffset = tileData & 0x3FFF;
patternOffset *= 64;
patternOffset /= 4;
// Upper color bits
int paletteIndex = tileData & 0x7F00;
// each read is 4 pixels
int offset = hFine / 4;
ivec2 coords = GetVRamCoords(patternOffset+(vFine*2)+offset); // 8-bit pixels, each line is two words
uint pattern = texelFetch(vram,coords,0).r;
pattern = (pattern >> ((3-(hFine%4))*8)) & 0xFFu; // shift out the bits we want for this pixel
return GetColour(layerNum, paletteIndex | int(pattern));
}
void main()
{
ivec2 pos = ivec2(gl_FragCoord.xy);
int scrollX;
if(LineScrollMode(layerNumber)) {
scrollX = GetLineScrollValue(layerNumber, pos.y);
}
else {
scrollX = GetHorizontalScroll(layerNumber);
}
int scrollY = GetVerticalScroll(layerNumber);
int tileNumber = GetTileNumber(pos.x,pos.y,scrollX,scrollY);
int hFine = GetHFine(pos.x,scrollX);
int vFine = GetVFine(pos.y,scrollY);
bool pixelMask = GetPixelMask(layerNumber,pos.x,pos.y);
if(pixelMask==true) {
int tileData = GetTileData(layerNumber,tileNumber);
if(LayerIs4Bit(layerNumber)) {
fragColor = Draw4Bit(layerNumber,tileData,hFine,vFine);
}
else {
fragColor = Draw8Bit(layerNumber,tileData,hFine,vFine);
}
}
else {
fragColor = vec4(0.0);
}
}
)glsl";
#endif // INCLUDED_SHADERS2D_H #endif // INCLUDED_SHADERS2D_H

View file

@ -306,6 +306,7 @@ xcopy /D /Y "$(ProjectDir)..\Assets\*" "$(TargetDir)Assets"</Command>
<ClCompile Include="..\Src\Debugger\SupermodelDebugger.cpp" /> <ClCompile Include="..\Src\Debugger\SupermodelDebugger.cpp" />
<ClCompile Include="..\Src\Debugger\Watch.cpp" /> <ClCompile Include="..\Src\Debugger\Watch.cpp" />
<ClCompile Include="..\Src\GameLoader.cpp" /> <ClCompile Include="..\Src\GameLoader.cpp" />
<ClCompile Include="..\Src\Graphics\FBO.cpp" />
<ClCompile Include="..\Src\Graphics\Legacy3D\Error.cpp" /> <ClCompile Include="..\Src\Graphics\Legacy3D\Error.cpp" />
<ClCompile Include="..\Src\Graphics\Legacy3D\Legacy3D.cpp" /> <ClCompile Include="..\Src\Graphics\Legacy3D\Legacy3D.cpp" />
<ClCompile Include="..\Src\Graphics\Legacy3D\Models.cpp" /> <ClCompile Include="..\Src\Graphics\Legacy3D\Models.cpp" />
@ -478,6 +479,7 @@ xcopy /D /Y "$(ProjectDir)..\Assets\*" "$(TargetDir)Assets"</Command>
<ClInclude Include="..\Src\Debugger\SupermodelDebugger.h" /> <ClInclude Include="..\Src\Debugger\SupermodelDebugger.h" />
<ClInclude Include="..\Src\Debugger\Watch.h" /> <ClInclude Include="..\Src\Debugger\Watch.h" />
<ClInclude Include="..\Src\GameLoader.h" /> <ClInclude Include="..\Src\GameLoader.h" />
<ClInclude Include="..\Src\Graphics\FBO.h" />
<ClInclude Include="..\Src\Graphics\IRender3D.h" /> <ClInclude Include="..\Src\Graphics\IRender3D.h" />
<ClInclude Include="..\Src\Graphics\Legacy3D\Legacy3D.h" /> <ClInclude Include="..\Src\Graphics\Legacy3D\Legacy3D.h" />
<ClInclude Include="..\Src\Graphics\Legacy3D\Shaders3D.h" /> <ClInclude Include="..\Src\Graphics\Legacy3D\Shaders3D.h" />

View file

@ -467,6 +467,9 @@
<ClCompile Include="..\Src\OSD\SDL\Crosshair.cpp"> <ClCompile Include="..\Src\OSD\SDL\Crosshair.cpp">
<Filter>Source Files\OSD\SDL</Filter> <Filter>Source Files\OSD\SDL</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="..\Src\Graphics\FBO.cpp">
<Filter>Source Files\Graphics</Filter>
</ClCompile>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<MASM Include="..\Src\CPU\68K\Turbo68K\Turbo68K.asm"> <MASM Include="..\Src\CPU\68K\Turbo68K\Turbo68K.asm">
@ -847,6 +850,9 @@
<ClInclude Include="..\Src\OSD\SDL\Crosshair.h"> <ClInclude Include="..\Src\OSD\SDL\Crosshair.h">
<Filter>Header Files\OSD\SDL</Filter> <Filter>Header Files\OSD\SDL</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="..\Src\Graphics\FBO.h">
<Filter>Header Files\Graphics</Filter>
</ClInclude>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<CustomBuild Include="..\Src\Debugger\ReadMe.txt"> <CustomBuild Include="..\Src\Debugger\ReadMe.txt">