From c6ea81d996cac351c98226c1881501554794f66a Mon Sep 17 00:00:00 2001 From: Ian Curtis Date: Sat, 23 Sep 2023 15:27:04 +0100 Subject: [PATCH] Emulate the entire tilegen chip in a GLSL shader. (This is now possible with opengl 3+). The tilegen drawing was emulated on the CPU, but was one of the most expensive functions in the emulator according to a profiler. On a modern GPU it's pretty much free, because a GPU is a massive SIMD monster. Tilegen shaders are mapped to uniforms, and the vram and palette are mapped to two textures. TODO rip out the redundant code in the tilegen class. We don't need to pre-calculate palettes anymore. etc The tilegen code supports has a start/end line so we can emulate as many lines as we want in a chunk, which will come in later as some games update the tilegen immediately after the ping_pong bit has flipped ~ 66% of the frame. The scud rolling start tilegen bug is probably actually a bug in the original h/w implementation, that ends up looking correct on original h/w but not for us. Need hardware testing to confirm what it's actually doing. --- Src/Graphics/FBO.cpp | 73 ++ Src/Graphics/FBO.h | 28 + Src/Graphics/Render2D.cpp | 1070 +++++++++++++---------------- Src/Graphics/Render2D.h | 343 ++++----- Src/Graphics/Shaders2D.h | 251 +++++++ VS2008/Supermodel.vcxproj | 2 + VS2008/Supermodel.vcxproj.filters | 6 + 7 files changed, 1008 insertions(+), 765 deletions(-) create mode 100644 Src/Graphics/FBO.cpp create mode 100644 Src/Graphics/FBO.h diff --git a/Src/Graphics/FBO.cpp b/Src/Graphics/FBO.cpp new file mode 100644 index 0000000..99f7a25 --- /dev/null +++ b/Src/Graphics/FBO.cpp @@ -0,0 +1,73 @@ +#include "FBO.h" + +FBO::FBO() : + m_frameBufferID(0), + m_textureID(0) +{ +} + +bool FBO::Create(int width, int height) +{ + CreateTexture(width, height); + + glGenFramebuffers(1, &m_frameBufferID); + glBindFramebuffer(GL_FRAMEBUFFER, m_frameBufferID); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_textureID, 0); + + auto frameBufferStatus = glCheckFramebufferStatus(GL_FRAMEBUFFER); + + glBindFramebuffer(GL_FRAMEBUFFER, 0); //created FBO now disable it + + return frameBufferStatus == GL_FRAMEBUFFER_COMPLETE; +} + +void FBO::Destroy() +{ + if (m_frameBufferID) { + glBindFramebuffer(GL_FRAMEBUFFER, 0); + glDeleteFramebuffers(1, &m_frameBufferID); + } + + if (m_textureID) { + glDeleteTextures(1, &m_textureID); + } + + m_frameBufferID = 0; + m_textureID = 0; +} + +void FBO::BindTexture() +{ + glBindTexture(GL_TEXTURE_2D, m_textureID); +} + +void FBO::Set() +{ + glBindFramebuffer(GL_FRAMEBUFFER, m_frameBufferID); +} + +void FBO::Disable() +{ + glBindFramebuffer(GL_FRAMEBUFFER, 0); +} + +GLuint FBO::GetFBOID() +{ + return m_frameBufferID; +} + +GLuint FBO::GetTextureID() +{ + return m_textureID; +} + +void FBO::CreateTexture(int width, int height) +{ + glGenTextures (1, &m_textureID); + glBindTexture (GL_TEXTURE_2D, m_textureID); + glTexParameteri (GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameteri (GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri (GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri (GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glTexImage2D (GL_TEXTURE_2D, 0, GL_RGBA8, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, 0); +} diff --git a/Src/Graphics/FBO.h b/Src/Graphics/FBO.h new file mode 100644 index 0000000..51ac979 --- /dev/null +++ b/Src/Graphics/FBO.h @@ -0,0 +1,28 @@ +#ifndef _FBO_H_ +#define _FBO_H_ + +#include + +class FBO +{ +public: + + FBO(); + + bool Create(int width, int height); + void Destroy(); + void BindTexture(); + void Set(); + void Disable(); + GLuint GetFBOID(); + GLuint GetTextureID(); + +private: + + void CreateTexture(int width, int height); + + GLuint m_frameBufferID; + GLuint m_textureID; +}; + +#endif \ No newline at end of file diff --git a/Src/Graphics/Render2D.cpp b/Src/Graphics/Render2D.cpp index 3e47ae4..29a7bba 100644 --- a/Src/Graphics/Render2D.cpp +++ b/Src/Graphics/Render2D.cpp @@ -19,260 +19,271 @@ ** with Supermodel. If not, see . **/ -/* - * Render2D.cpp - * - * Implementation of the CRender2D class: OpenGL tile generator graphics. - * - * To-Do List - * ---------- - * - Is there a universal solution to the 'ROLLING START' scrolling bug (Scud - * Race) and the scrolling text during Magical Truck Adventure's attract - * mode? To fix Scud Race, either the stencil mask or the h-scroll value must - * be shifted by 16 pixels. Magical Truck Adventure is similar but opposite. - * Perhaps this is a function of timing registers accessed via JTAG? - * - Is there a better way to handle the overscan regions in wide screen mode? - * Is clearing two thin viewports better than one big clear? - * - Are v-scroll values 9 or 10 bits? (Does it matter?) Lost World seems to - * have some scrolling issues. - * - A proper shut-down function is needed! OpenGL might not be available when - * the destructor for this class is called. - * - * Tile Generator Hardware Overview - * -------------------------------- - * - * Model 3's medium resolution tile generator hardware appears to be derived - * from the Model 2 and System 24 chipset, but is much simpler. It consists of - * four 64x64 tile layers, comprised of 8x8 pixel tiles, with configurable - * priorities. There may be additional features but so far, no known Model 3 - * games use them. - * - * VRAM is comprised of 1 MB for tile data and an additional 128 KB for the - * palette (each color occupies 32 bits). The four tilemap layers are referred - * to as: A (0), A' (1), B (2), and B' (3). Palette RAM may be located on a - * separate RAM IC. - * - * Registers - * --------- - * - * Registers are listed by their byte offset in the PowerPC address space. Each - * is 32 bits wide and little endian. Only those registers relevant to - * rendering are listed here (see CTileGen for others). - * - * Offset: Description: - * - * 0x20 Layer configuration - * 0x40 Layer A/A' color offset - * 0x44 Layer B/B' color offset - * 0x60 Layer A scroll - * 0x64 Layer A' scroll - * 0x68 Layer B scroll - * 0x6C Layer B' scroll - * - * Layer configuration is formatted as: - * - * 31 0 - * ???? ???? ???? ???? pqrs tuvw ???? ???? - * - * Bits 'pqrs' control the color depth of layers B', B, A', and A, - * respectively. If set, the layer's pattern data is encoded as 4 bits, - * otherwise the pixels are 8 bits. - * - * Bits 'tuvw' control priority for layers B', B, A', and A, respectively, - * which is also the relative ordering of the layers from bottom to top. For - * each layer, if its bit is clear, it will be drawn below the 3D layer, - * otherwise it is drawn on top. - * - * The remaining registers are described where appropriate further below. - * - * VRAM Memory Map - * --------------- - * - * The lower 1 MB of VRAM is used for storing tiles, per-line horizontal scroll - * values, and the stencil mask, which determines which of each pair of layers - * is displayed on a given line and column. - * - * 00000-F5FFF Tile pattern data - * F6000-F63FF Layer A horizontal scroll table (512 lines) - * F6400-F67FF Layer A' horizontal scroll table - * F6800-F6BFF Layer B horizontal scroll table - * F6C00-F6FFF Layer B' horizontal scroll table - * F7000-F77FF Mask table (assuming 4 bytes per line, 512 lines) - * F7800-F7FFF ? - * F8000-F9FFF Layer A name table - * FA000-FBFFF Layer A' name table - * FC000-FDFFF Layer B name table - * FE000-FFFFF Layer B' name table - * - * Tiles may actually address the entire 1 MB space, although in practice, - * that would conflict with the other fixed memory regions. - * - * Palette - * ------- - * - * The palette stores 32768 colors. Each entry is a little endian 32-bit word. - * The upper 16 bits are unused and the lower 16 bits contain the color: - * - * 15 0 - * tbbb bbgg gggr rrrr - * - * The 't' bit is for transparency. When set, pixels of that color are - * transparent, unless they are the bottom-most layer. - * - * Tile Name Table and Pattern Layout - * ---------------------------------- - * - * The name table is a 64x64 array of 16-bit words serving as indices for tile - * pattern data and the palette. The first 64 words correspond to the first - * row of tiles, the next 64 to the second row, etc. Although 64x64 entries - * describes a 512x512 pixel screen, only the upper-left 62x48 tiles are - * visible when the vertical and horizontal scroll values are 0. Scrolling - * moves the 496x384 pixel 'window' around, with individual wrapping of the - * two axes. - * - * The data is actually arranged in 32-bit chunks in little endian format, so - * that tiles 0, 1, 2, and 3 will be stored as 1, 0, 3, 2. Fetching two name - * table entries as a single 32-bit word places the left tile in the high 16 - * bits and the right tile in the low 16 bits. - * - * The format of a name table entry in 4-bit color mode is: - * - * 15 0 - * jkpp pppp pppp iiii - * - * The pattern index is '0ppp pppp pppi iiij'. Multiplying by 32 yields the - * offset in VRAM at which the tile pattern data is stored. Note that the MSB - * of the name table entry becomes the LSB of the pattern index. This allows - * for 32768 4-bit tile patterns, each occupying 32 bytes, which means the - * whole 1 MB VRAM space can be addressed. - * - * The 4-bit pattern data is stored as 8 32-bit words. Each word stores a row - * of 8 pixels: - * - * 31 0 - * aaaa bbbb cccc dddd eeee ffff gggg hhhh - * - * 'a' is the left-most pixel data. These 4-bit values are combined with bits - * from the name table to form a palette index, which determines the final - * color. For example, for pixel 'a', the 15-bit color index is: - * - * 14 0 - * kpp pppp pppp aaaa - * - * Note that index bits are re-used to form the palette index, meaning that - * the pattern address partly determines the color. - * - * In 8-bit color mode, the name table entry looks like: - * - * 15 0 - * ?ppp pppp iiii iiii - * - * The low 15 'p' and 'i' bits together form the pattern index, which must be - * multiplied by 64 to get the offset. The pattern data now consists of 16 32- - * bit words, each containing four 8-bit pixels: - * - * 31 0 - * aaaa aaaa bbbb bbbb cccc cccc dddd dddd - * - * 'a' is the left-most pixel. Each line is therefore comprised of two 32-bit - * words. The palette index for pixel 'a' is now formed from: - * - * 14 0 - * ppp pppp aaaa aaaa - * - * Stencil Mask - * ------------ - * - * For any pixel position, there are in fact only two visible layers, despite - * there being four defined layers. The layers are grouped in pairs: A (the - * 'primary' layer) and A' (the 'alternate') form one pair, and B and B' form - * the other. Only one of the primary or alternate layers from each group may - * be visible at a given position. The 'stencil mask' controls this. - * - * The mask table is a bit field organized into 512 (or 384?) lines with each - * bit controlling four columns (32 pixels). The mask does not appear to be - * affected by scrolling -- that is, it does not scroll with the underlying - * tiles, which do so independently. The mask remains fixed. - * - * Each mask entry is a little endian 32-bit word. The high 16 bits control - * A/A' and the low 16 bits control B/B'. Each word controls an entire line - * (32 pixels per bit, 512 pixels per 16-bit line mask, where the first 16 - * pixels are allocated to the overscan region.) If a bit is set to 1, the - * pixel from the primary layer is used, otherwise the alternate layer is - * used when the mask is 0. It is important to remember that the layers may - * have been scrolled independently. The mask operates on the final resultant - * two pixels that are determined for each location. - * - * Example of a line mask: - * - * 31 15 0 - * 0111 0000 0000 1111 0000 0000 1111 1111 - * - * These settings would display layer A' for the first 32 pixels of the line, - * followed by layer A for the next 96 pixels, A' for the subsequent 256 - * pixels, and A for the final 128 pixels. The first 256 pixels of the line - * would display layer B' and the second 256 pixels would be from layer B. - * - * The stencil mask does not affect layer priorities, which are managed - * separately regardless of mask settings. - * - * Scrolling - * --------- - * - * Each of the four layers can be scrolled independently. Vertical scroll - * values are stored in the appropriate scroll register and horizontal scroll - * values can be sourced either from the register (in which case the entire - * layer will be scrolled uniformly) or from a table in VRAM (which contains - * independent values for each line). - * - * The scroll registers are laid out as: - * - * 31 0 - * e??? ???y yyyy yyyy h??? ??xx xxxx xxxx - * - * The 'e' bit enables the layer when set. The 'y' bits comprise a vertical - * scroll value in pixels. The 'x' bits form a horizontal scroll value. If 'h' - * is set, then the VRAM table (line-by-line scrolling) is used, otherwise the - * 'x' values are applied to every line. It is also possible that the scroll - * values use more or less bits, but probably no more than 1. - * - * Each line must be wrapped back to the beginning of the same line. Likewise, - * vertical scrolling wraps around back to the top of the tilemap. - * - * The horizontal scroll table is a series of 16-bit little endian words, one - * for each line beginning at 0. It appears all the values can be used for - * scrolling (no control bits have been observed). The number of bits actually - * used by the hardware is irrelevant -- wrapping has the effect of making - * higher order bits unimportant. - * - * Layer Priorities - * ---------------- - * - * The layer control register (0x20) contains 4 bits that appear to control - * layer priorities. It is assumed that the 3D graphics, output by the Real3D - * pixel processors independently of the tile generator, constitute their own - * 'layer' and that the 2D tilemaps appear in front or behind. There may be a - * specific function for each priority bit or the field may be interpreted as a - * single 4-bit value denoting preset layer orders. - * - * Color Offsets - * ------------- - * - * Color offsets can be applied to the final RGB color value of every pixel. - * This is used for effects such as fading to a certain color, lightning (Lost - * World), etc. The current best guess is that the two registers control each - * pair (A/A' and B/B') of layers. The format appears to be: - * - * 31 0 - * ???? ???? rrrr rrrr gggg gggg bbbb bbbb - * - * Where 'r', 'g', and 'b' appear to be signed 8-bit color offsets. Because - * they exceed the color resolution of the palette, they must be scaled - * appropriately. - * - * Color offset registers are handled in TileGen.cpp. Two palettes are computed - * -- one for A/A' and another for B/B'. These are passed to the renderer. - */ + /* + * Render2D.cpp + * + * Implementation of the CRender2D class: OpenGL tile generator graphics. + * + * To-Do List + * ---------- + * - Is there a universal solution to the 'ROLLING START' scrolling bug (Scud + * Race) and the scrolling text during Magical Truck Adventure's attract + * mode? To fix Scud Race, either the stencil mask or the h-scroll value must + * be shifted by 16 pixels. Magical Truck Adventure is similar but opposite. + * Perhaps this is a function of timing registers accessed via JTAG? + * - Is there a better way to handle the overscan regions in wide screen mode? + * Is clearing two thin viewports better than one big clear? + * - Are v-scroll values 9 or 10 bits? (Does it matter?) Lost World seems to + * have some scrolling issues. + * - A proper shut-down function is needed! OpenGL might not be available when + * the destructor for this class is called. + * + * Tile Generator Hardware Overview + * -------------------------------- + * + * Model 3's medium resolution tile generator hardware appears to be derived + * from the Model 2 and System 24 chipset, but is much simpler. It consists of + * four 64x64 tile layers, comprised of 8x8 pixel tiles, with configurable + * priorities. There may be additional features but so far, no known Model 3 + * games use them. + * + * VRAM is comprised of 1 MB for tile data and an additional 128 KB for the + * palette (each color occupies 32 bits). The four tilemap layers are referred + * to as: A (0), A' (1), B (2), and B' (3). Palette RAM may be located on a + * separate RAM IC. + * + * Registers + * --------- + + 0xF1180020: -------- -------- -------- -------- ? + -------- -------- x------- -------- Layer 3 bitdepth (0 = 8-bit, 1 = 4-bit) + -------- -------- -x------ -------- Layer 2 bitdepth (0 = 8-bit, 1 = 4-bit) + -------- -------- --x----- -------- Layer 1 bitdepth (0 = 8-bit, 1 = 4-bit) + -------- -------- ---x---- -------- Layer 0 bitdepth (0 = 8-bit, 1 = 4-bit) + -------- -------- ----x--- -------- Layer 3 priority (0 = below 3D, 1 = above 3D) + -------- -------- -----x-- -------- Layer 2 priority (0 = below 3D, 1 = above 3D) + -------- -------- ------x- -------- Layer 1 priority (0 = below 3D, 1 = above 3D) + -------- -------- -------x -------- Layer 0 priority (0 = below 3D, 1 = above 3D) + + 0xF1180040: Foreground layer color modulation + -------- xxxxxxxx -------- -------- Red component + -------- -------- xxxxxxxx -------- Green component + -------- -------- -------- xxxxxxxx Blue component + + 0xF1180044: Background layer color modulation + -------- xxxxxxxx -------- -------- Red component + -------- -------- xxxxxxxx -------- Green component + -------- -------- -------- xxxxxxxx Blue component + + 0xF1180060: x------- -------- -------- -------- Layer 0 enable + -------x xxxxxxxx -------- -------- Layer 0 Y scroll position + -------- -------- x------- -------- Layer 0 X line scroll enable + -------- -------- -------x xxxxxxxx Layer 0 X scroll position + + 0xF1180064: x------- -------- -------- -------- Layer 1 enable + -------x xxxxxxxx -------- -------- Layer 1 Y scroll position + -------- -------- x------- -------- Layer 1 X line scroll enable + -------- -------- -------x xxxxxxxx Layer 1 X scroll position + + 0xF1180068: x------- -------- -------- -------- Layer 2 enable + -------x xxxxxxxx -------- -------- Layer 2 Y scroll position + -------- -------- x------- -------- Layer 2 X line scroll enable + -------- -------- -------x xxxxxxxx Layer 2 X scroll position + + 0xF118006C: x------- -------- -------- -------- Layer 3 enable + -------x xxxxxxxx -------- -------- Layer 3 Y scroll position + -------- -------- x------- -------- Layer 3 X line scroll enable + -------- -------- -------x xxxxxxxx Layer 3 X scroll position + + * + * VRAM Memory Map + * --------------- + * + * The lower 1 MB of VRAM is used for storing tiles, per-line horizontal scroll + * values, and the stencil mask, which determines which of each pair of layers + * is displayed on a given line and column. + * + * 00000-F5FFF Tile pattern data + * F6000-F63FF Layer A horizontal scroll table (512 lines) + * F6400-F67FF Layer A' horizontal scroll table + * F6800-F6BFF Layer B horizontal scroll table + * F6C00-F6FFF Layer B' horizontal scroll table + * F7000-F77FF Mask table (assuming 4 bytes per line, 512 lines) + * F7800-F7FFF ? + * F8000-F9FFF Layer A name table + * FA000-FBFFF Layer A' name table + * FC000-FDFFF Layer B name table + * FE000-FFFFF Layer B' name table + * + * Tiles may actually address the entire 1 MB space, although in practice, + * that would conflict with the other fixed memory regions. + * + * Palette + * ------- + * + * The palette stores 32768 colors. Each entry is a little endian 32-bit word. + * The upper 16 bits are unused and the lower 16 bits contain the color: + * + * 15 0 + * tbbb bbgg gggr rrrr + * + * The 't' bit is for transparency. When set, pixels of that color are + * transparent, unless they are the bottom-most layer. + * + * Tile Name Table and Pattern Layout + * ---------------------------------- + * + * The name table is a 64x64 array of 16-bit words serving as indices for tile + * pattern data and the palette. The first 64 words correspond to the first + * row of tiles, the next 64 to the second row, etc. Although 64x64 entries + * describes a 512x512 pixel screen, only the upper-left 62x48 tiles are + * visible when the vertical and horizontal scroll values are 0. Scrolling + * moves the 496x384 pixel 'window' around, with individual wrapping of the + * two axes. + * + * The data is actually arranged in 32-bit chunks in little endian format, so + * that tiles 0, 1, 2, and 3 will be stored as 1, 0, 3, 2. Fetching two name + * table entries as a single 32-bit word places the left tile in the high 16 + * bits and the right tile in the low 16 bits. + * + * The format of a name table entry in 4-bit color mode is: + * + * 15 0 + * jkpp pppp pppp iiii + * + * The pattern index is '0ppp pppp pppi iiij'. Multiplying by 32 yields the + * offset in VRAM at which the tile pattern data is stored. Note that the MSB + * of the name table entry becomes the LSB of the pattern index. This allows + * for 32768 4-bit tile patterns, each occupying 32 bytes, which means the + * whole 1 MB VRAM space can be addressed. + * + * The 4-bit pattern data is stored as 8 32-bit words. Each word stores a row + * of 8 pixels: + * + * 31 0 + * aaaa bbbb cccc dddd eeee ffff gggg hhhh + * + * 'a' is the left-most pixel data. These 4-bit values are combined with bits + * from the name table to form a palette index, which determines the final + * color. For example, for pixel 'a', the 15-bit color index is: + * + * 14 0 + * kpp pppp pppp aaaa + * + * Note that index bits are re-used to form the palette index, meaning that + * the pattern address partly determines the color. + * + * In 8-bit color mode, the name table entry looks like: + * + * 15 0 + * ?ppp pppp iiii iiii + * + * The low 15 'p' and 'i' bits together form the pattern index, which must be + * multiplied by 64 to get the offset. The pattern data now consists of 16 32- + * bit words, each containing four 8-bit pixels: + * + * 31 0 + * aaaa aaaa bbbb bbbb cccc cccc dddd dddd + * + * 'a' is the left-most pixel. Each line is therefore comprised of two 32-bit + * words. The palette index for pixel 'a' is now formed from: + * + * 14 0 + * ppp pppp aaaa aaaa + * + * Stencil Mask + * ------------ + * + * For any pixel position, there are in fact only two visible layers, despite + * there being four defined layers. The layers are grouped in pairs: A (the + * 'primary' layer) and A' (the 'alternate') form one pair, and B and B' form + * the other. Only one of the primary or alternate layers from each group may + * be visible at a given position. The 'stencil mask' controls this. + * + * The mask table is a bit field organized into 512 (or 384?) lines with each + * bit controlling four columns (32 pixels). The mask does not appear to be + * affected by scrolling -- that is, it does not scroll with the underlying + * tiles, which do so independently. The mask remains fixed. + * + * Each mask entry is a little endian 32-bit word. The high 16 bits control + * A/A' and the low 16 bits control B/B'. Each word controls an entire line + * (32 pixels per bit, 512 pixels per 16-bit line mask, where the first 16 + * pixels are allocated to the overscan region.) If a bit is set to 1, the + * pixel from the primary layer is used, otherwise the alternate layer is + * used when the mask is 0. It is important to remember that the layers may + * have been scrolled independently. The mask operates on the final resultant + * two pixels that are determined for each location. + * + * Example of a line mask: + * + * 31 15 0 + * 0111 0000 0000 1111 0000 0000 1111 1111 + * + * These settings would display layer A' for the first 32 pixels of the line, + * followed by layer A for the next 96 pixels, A' for the subsequent 256 + * pixels, and A for the final 128 pixels. The first 256 pixels of the line + * would display layer B' and the second 256 pixels would be from layer B. + * + * The stencil mask does not affect layer priorities, which are managed + * separately regardless of mask settings. + * + * Scrolling + * --------- + * + * Each of the four layers can be scrolled independently. Vertical scroll + * values are stored in the appropriate scroll register and horizontal scroll + * values can be sourced either from the register (in which case the entire + * layer will be scrolled uniformly) or from a table in VRAM (which contains + * independent values for each line). + * + * The scroll registers are laid out as: + * + * 31 0 + * e??? ???y yyyy yyyy h??? ??xx xxxx xxxx + * + * The 'e' bit enables the layer when set. The 'y' bits comprise a vertical + * scroll value in pixels. The 'x' bits form a horizontal scroll value. If 'h' + * is set, then the VRAM table (line-by-line scrolling) is used, otherwise the + * 'x' values are applied to every line. It is also possible that the scroll + * values use more or less bits, but probably no more than 1. + * + * Each line must be wrapped back to the beginning of the same line. Likewise, + * vertical scrolling wraps around back to the top of the tilemap. + * + * The horizontal scroll table is a series of 16-bit little endian words, one + * for each line beginning at 0. It appears all the values can be used for + * scrolling (no control bits have been observed). The number of bits actually + * used by the hardware is irrelevant -- wrapping has the effect of making + * higher order bits unimportant. + * + * Layer Priorities + * ---------------- + * + * The layer control register (0x20) contains 4 bits that appear to control + * layer priorities. It is assumed that the 3D graphics, output by the Real3D + * pixel processors independently of the tile generator, constitute their own + * 'layer' and that the 2D tilemaps appear in front or behind. There may be a + * specific function for each priority bit or the field may be interpreted as a + * single 4-bit value denoting preset layer orders. + * + * Color Offsets + * ------------- + * + * Color offsets can be applied to the final RGB color value of every pixel. + * This is used for effects such as fading to a certain color, lightning (Lost + * World), etc. The current best guess is that the two registers control each + * pair (A/A' and B/B') of layers. The format appears to be: + * + * 31 0 + * ???? ???? rrrr rrrr gggg gggg bbbb bbbb + * + * Where 'r', 'g', and 'b' appear to be signed 8-bit color offsets. Because + * they exceed the color resolution of the palette, they must be scaled + * appropriately. + * + * Color offset registers are handled in TileGen.cpp. Two palettes are computed + * -- one for A/A' and another for B/B'. These are passed to the renderer. + */ #include "Render2D.h" @@ -284,258 +295,59 @@ #include -/****************************************************************************** - Definitions and Constants -******************************************************************************/ - -// Shader program files (for use in development builds only) -#define VERTEX_2D_SHADER_FILE "Src/Graphics/Vertex2D.glsl" -#define FRAGMENT_2D_SHADER_FILE "Src/Graphics/Fragment2D.glsl" - - -/****************************************************************************** - Layer Rendering - - This code is quite slow and badly needs to be optimized. Dirty rectangles - should be implemented first and tile pre-decoding second. -******************************************************************************/ - -template -static inline void DrawTileLine(uint32_t *line, int pixelOffset, uint16_t tile, int patternLine, const uint32_t *vram, const uint32_t *palette, uint16_t mask) -{ - static_assert(bits == 4 || bits == 8, "Tiles are either 4- or 8-bit"); - - // For 8-bit pixels, each line of tile pattern is two words - if (bits == 8) - patternLine *= 2; - - // Compute offset of pattern for this line - int patternOffset; - if (bits == 4) - { - patternOffset = ((tile & 0x3FFF) << 1) | ((tile >> 15) & 1); - patternOffset *= 32; - patternOffset /= 4; - } - else - { - patternOffset = tile & 0x3FFF; - patternOffset *= 64; - patternOffset /= 4; - } - - // Name table entry provides high color bits - uint32_t colorHi = tile & ((bits == 4) ? 0x7FF0 : 0x7F00); - - // Draw - if (bits == 4) - { - uint32_t pattern = vram[patternOffset + patternLine]; - for (int p = 7; p >= 0; p--) - { - if (!clip || (/*pixelOffset >= 0 &&*/ (unsigned int)pixelOffset < 496u)) // the >= 0 check is accounted for, as the cast to uint makes them appear as very large unsigned values - { - uint16_t maskTest = 1 << (15-((pixelOffset+0)/32)); - bool visible = (mask & maskTest) != 0; - uint32_t pixel = visible ? palette[((pattern >> (p*4)) & 0xF) | colorHi] : 0; - if (!alphaTest || (visible && (pixel >> 24) != 0)) // only draw opaque pixels - line[pixelOffset] = pixel; - } - ++pixelOffset; - } - } - else - { - for (int i = 0; i < 2; i++) // 4 pixels per word - { - uint32_t pattern = vram[patternOffset + patternLine + i]; - for (int p = 3; p >= 0; p--) - { - if (!clip || (/*pixelOffset >= 0 &&*/ (unsigned int)pixelOffset < 496u)) // the >= 0 check is accounted for, as the cast to uint makes them appear as very large unsigned values - { - uint16_t maskTest = 1 << (15-((pixelOffset+0)/32)); - bool visible = (mask & maskTest) != 0; - uint32_t pixel = visible ? palette[((pattern >> (p*8)) & 0xFF) | colorHi] : 0; - if (!alphaTest || (visible && (pixel >> 24) != 0)) - line[pixelOffset] = pixel; - } - ++pixelOffset; - } - } - } -} - -template -static void DrawLayer(uint32_t *pixels, int layerNum, const uint32_t *vram, const uint32_t *regs, const uint32_t *palette) -{ - const uint16_t *nameTableBase = (const uint16_t *) &vram[(0xF8000 + layerNum * 0x2000) / 4]; - const uint16_t *hScrollTable = (const uint16_t *) &vram[(0xF6000 + layerNum * 0x400) / 4]; - bool lineScrollMode = (regs[0x60/4 + layerNum] & 0x8000) != 0; - int hFullScroll = regs[0x60/4 + layerNum] & 0x3FF; - int vScroll = (regs[0x60/4 + layerNum] >> 16) & 0x1FF; - - const uint16_t *maskTable = (const uint16_t *) &vram[0xF7000 / 4]; - if (layerNum < 2) // little endian: layers A and A' use second word in each pair - maskTable += 1; - - // If mask bit is clear, alternate layer is shown. We want to test for non- - // zero, so we flip the mask when drawing alternate layers (layers 1 and 3). - const uint16_t maskPolarity = (layerNum & 1) ? 0xFFFF : 0x0000; - - uint32_t *line = pixels; - - for (int y = 0; y < 384; y++) - { - int hScroll = (lineScrollMode ? hScrollTable[y] : hFullScroll) & 0x1FF; - int hTile = hScroll / 8; - int hFine = hScroll & 7; // horizontal pixel offset within tile line - int vFine = (y + vScroll) & 7; // vertical pixel offset within 8x8 tile - const uint16_t *nameTable = &nameTableBase[(64 * ((y + vScroll) / 8)) & 0xFFF]; // clamp to 64x64 = 0x1000 - uint16_t mask = *maskTable ^ maskPolarity; // each bit covers 32 pixels - - int pixelOffset = -hFine; - int extraTile = (hFine != 0) ? 1 : 0; // h-scrolling requires part of 63rd tile - - // First tile may be clipped - DrawTileLine(line, pixelOffset, nameTable[(hTile ^ 1) & 63], vFine, vram, palette, mask); - ++hTile; - pixelOffset += 8; - // Middle tiles will not be clipped - for (int tx = 1; tx < (62 - 1 + extraTile); tx++) - { - DrawTileLine(line, pixelOffset, nameTable[(hTile ^ 1) & 63], vFine, vram, palette, mask); - ++hTile; - pixelOffset += 8; - } - // Last tile may be clipped - DrawTileLine(line, pixelOffset, nameTable[(hTile ^ 1) & 63], vFine, vram, palette, mask); - ++hTile; - pixelOffset += 8; - - // Advance one line - maskTable += 2; - line += 496; - } -} - -std::pair CRender2D::DrawTilemaps(uint32_t *pixelsBottom, uint32_t *pixelsTop) -{ - unsigned priority = (m_regs[0x20/4] >> 8) & 0xF; - - // Render bottom layers - bool noBottomSurface = true; - static const int bottomOrder[4] = { 3, 2, 1, 0 }; - for (int i = 0; i < 4; i++) - { - int layerNum = bottomOrder[i]; - bool is4Bit = (m_regs[0x20/4] & (1 << (12 + layerNum))) != 0; - bool enabled = (m_regs[0x60/4 + layerNum] & 0x80000000) != 0; - bool selected = (priority & (1 << layerNum)) == 0; - if (enabled && selected) - { - if (noBottomSurface) - { - if (is4Bit) - DrawLayer<4, false>(pixelsBottom, layerNum, m_vram, m_regs, m_palette[layerNum / 2]); - else - DrawLayer<8, false>(pixelsBottom, layerNum, m_vram, m_regs, m_palette[layerNum / 2]); - } - else - { - if (is4Bit) - DrawLayer<4, true>(pixelsBottom, layerNum, m_vram, m_regs, m_palette[layerNum / 2]); - else - DrawLayer<8, true>(pixelsBottom, layerNum, m_vram, m_regs, m_palette[layerNum / 2]); - } - noBottomSurface = false; - } - } - - // Render top layers - // NOTE: layer ordering is different according to MAME (which has 3, 2, 0, 1 - // for top layer). Until I see evidence that this is correct and not a typo, - // I will assume consistent layer ordering. - bool noTopSurface = true; - static const int topOrder[4] = { 3, 2, 1, 0 }; - for (int i = 0; i < 4; i++) - { - int layerNum = topOrder[i]; - bool is4Bit = (m_regs[0x20/4] & (1 << (12 + layerNum))) != 0; - bool enabled = (m_regs[0x60/4 + layerNum] & 0x80000000) != 0; - bool selected = (priority & (1 << layerNum)) != 0; - if (enabled && selected) - { - if (noTopSurface) - { - if (is4Bit) - DrawLayer<4, false>(pixelsTop, layerNum, m_vram, m_regs, m_palette[layerNum / 2]); - else - DrawLayer<8, false>(pixelsTop, layerNum, m_vram, m_regs, m_palette[layerNum / 2]); - } - else - { - if (is4Bit) - DrawLayer<4, true>(pixelsTop, layerNum, m_vram, m_regs, m_palette[layerNum / 2]); - else - DrawLayer<8, true>(pixelsTop, layerNum, m_vram, m_regs, m_palette[layerNum / 2]); - } - noTopSurface = false; - } - } - - // Indicate whether top and bottom surfaces have to be rendered - return std::pair(!noTopSurface, !noBottomSurface); -} - - /****************************************************************************** Frame Display Functions ******************************************************************************/ -// Draws a surface to the screen (0 is top and 1 is bottom) -void CRender2D::DisplaySurface(int surface) -{ - // Shader program - m_shader.EnableShader(); - - glBindVertexArray(m_vao); - - // Draw the surface - glActiveTexture(GL_TEXTURE0); // texture unit 0 - glBindTexture(GL_TEXTURE_2D, m_texID[surface]); - - glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); - - glBindVertexArray(0); - - m_shader.DisableShader(); -} - // Set up viewport and OpenGL state for 2D rendering (sets up blending function but disables blending) void CRender2D::Setup2D(bool isBottom) { - glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); // alpha of 1.0 is opaque, 0 is transparent - glDisable(GL_BLEND); + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); // alpha of 1.0 is opaque, 0 is transparent - // Disable Z-buffering - glDisable(GL_DEPTH_TEST); + // Disable Z-buffering + glDisable(GL_DEPTH_TEST); - // Clear everything if requested or just overscan areas for wide screen mode - if (isBottom) - { - glClearColor(0.0, 0.0, 0.0, 0.0); - glViewport(0, 0, m_totalXPixels, m_totalYPixels); - glDisable(GL_SCISSOR_TEST); // scissor is enabled to fix the 2d/3d miss match problem - glClear(GL_COLOR_BUFFER_BIT); // we want to clear outside the scissored areas so must disable it - glEnable(GL_SCISSOR_TEST); - } + // Clear everything if requested or just overscan areas for wide screen mode + if (isBottom) + { + glClearColor(0.0, 0.0, 0.0, 0.0); + glViewport (0, 0, m_totalXPixels, m_totalYPixels); + glDisable (GL_SCISSOR_TEST); // scissor is enabled to fix the 2d/3d miss match problem + glClear (GL_COLOR_BUFFER_BIT); // we want to clear outside the scissored areas so must disable it + glEnable (GL_SCISSOR_TEST); + } - // Set up the viewport and orthogonal projection - bool stretchBottom = m_config["WideBackground"].ValueAs() && isBottom; - if (!stretchBottom) - { - glViewport(m_xOffset - m_correction, m_yOffset + m_correction, m_xPixels, m_yPixels); //Preserve aspect ratio of tile layer by constraining and centering viewport - } + // Set up the viewport and orthogonal projection + bool stretchBottom = m_config["WideBackground"].ValueAs() && isBottom; + if (!stretchBottom) + { + glViewport(m_xOffset - m_correction, m_yOffset + m_correction, m_xPixels, m_yPixels); //Preserve aspect ratio of tile layer by constraining and centering viewport + } +} + +void CRender2D::DrawSurface(GLuint textureID) +{ + m_shader.EnableShader(); + + glEnable (GL_BLEND); + glBindVertexArray (m_vao); + glActiveTexture (GL_TEXTURE0); // texture unit 0 + glBindTexture (GL_TEXTURE_2D, textureID); + glDrawArrays (GL_TRIANGLE_STRIP, 0, 4); + glBindVertexArray (0); + glDisable (GL_BLEND); + + m_shader.DisableShader(); +} + +float CRender2D::LineToPercentStart(int lineNumber) +{ + return lineNumber / 384.0f; +} + +float CRender2D::LineToPercentEnd(int lineNumber) +{ + return (lineNumber + 1) / 384.0f; } void CRender2D::BeginFrame(void) @@ -544,38 +356,80 @@ void CRender2D::BeginFrame(void) void CRender2D::PreRenderFrame(void) { - // Update all layers - m_surfaces_present = DrawTilemaps(m_bottomSurface, m_topSurface); - glActiveTexture(GL_TEXTURE0); // texture unit 0 - if (m_surfaces_present.first) - { - glBindTexture(GL_TEXTURE_2D, m_texID[0]); - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 496, 384, GL_RGBA, GL_UNSIGNED_BYTE, m_topSurface); - } - if (m_surfaces_present.second) - { - glBindTexture(GL_TEXTURE_2D, m_texID[1]); - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 496, 384, GL_RGBA, GL_UNSIGNED_BYTE, m_bottomSurface); - } + glDisable(GL_SCISSOR_TEST); + glViewport(0, 0, 496, 384); + + m_shaderTileGen.EnableShader(); + + glActiveTexture(GL_TEXTURE0); // texture unit 0 + glBindTexture(GL_TEXTURE_2D, m_vramTexID); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 512, 512, GL_RED_INTEGER, GL_UNSIGNED_INT, m_vram); + glActiveTexture(GL_TEXTURE1); // texture unit 1 + glBindTexture(GL_TEXTURE_2D, m_paletteTexID); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 128, 256, GL_RED_INTEGER, GL_UNSIGNED_INT, m_vram + 0x40000); + glActiveTexture(GL_TEXTURE0); // texture unit 1 + + glUniform1uiv(m_shaderTileGen.uniformLocMap["regs"], 32, m_regs); + + glBindVertexArray(m_vao); + + m_fboBottom.Set(); + + glClearColor(0, 0, 0, 0); + glClear(GL_COLOR_BUFFER_BIT); + glEnable(GL_BLEND); + + // render bottom layer + for (int i = 4; i-- > 0;) { + + if (!IsEnabled(i)) { + continue; + } + + if (Above3D(i)) { + continue; + } + + glUniform1i(m_shaderTileGen.uniformLocMap["layerNumber"], i); + glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); + } + + m_fboTop.Set(); + glClear(GL_COLOR_BUFFER_BIT); + + // render top layer + for (int i = 4; i-- > 0;) { + + if (!IsEnabled(i)) { + continue; + } + + if (!Above3D(i)) { + continue; + } + + glUniform1i(m_shaderTileGen.uniformLocMap["layerNumber"], i); + glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); + } + + glBindVertexArray(0); + + m_shaderTileGen.DisableShader(); + m_fboBottom.Disable(); + + glDisable(GL_BLEND); } void CRender2D::RenderFrameBottom(void) { - // Display bottom surface if anything was drawn there, else clear everything - Setup2D(true); - if (m_surfaces_present.second) - DisplaySurface(1); + Setup2D(true); + DrawSurface(m_fboBottom.GetTextureID()); } void CRender2D::RenderFrameTop(void) { - // Display top surface only if it exists - if (m_surfaces_present.first) - { - Setup2D(false); - glEnable(GL_BLEND); - DisplaySurface(0); - } + Setup2D(false); + DrawSurface(m_fboTop.GetTextureID()); } void CRender2D::EndFrame(void) @@ -597,109 +451,135 @@ void CRender2D::WriteVRAM(unsigned addr, uint32_t data) Configuration, Initialization, and Shutdown ******************************************************************************/ -void CRender2D::AttachRegisters(const uint32_t *regPtr) +void CRender2D::AttachRegisters(const uint32_t* regPtr) { - m_regs = regPtr; - DebugLog("Render2D attached registers\n"); + m_regs = regPtr; + DebugLog("Render2D attached registers\n"); } -void CRender2D::AttachPalette(const uint32_t *palPtr[2]) +void CRender2D::AttachPalette(const uint32_t* palPtr[2]) { - m_palette[0] = palPtr[0]; - m_palette[1] = palPtr[1]; - DebugLog("Render2D attached palette\n"); + m_palette[0] = palPtr[0]; + m_palette[1] = palPtr[1]; + DebugLog("Render2D attached palette\n"); } -void CRender2D::AttachVRAM(const uint8_t *vramPtr) +void CRender2D::AttachVRAM(const uint8_t* vramPtr) { - m_vram = (uint32_t *) vramPtr; - DebugLog("Render2D attached VRAM\n"); + m_vram = (uint32_t*)vramPtr; + DebugLog("Render2D attached VRAM\n"); } -// Memory pool and offsets within it -#define MEMORY_POOL_SIZE (2*512*384*4) -#define OFFSET_TOP_SURFACE 0 // 512*384*4 bytes -#define OFFSET_BOTTOM_SURFACE (512*384*4) // 512*384*4 - bool CRender2D::Init(unsigned xOffset, unsigned yOffset, unsigned xRes, unsigned yRes, unsigned totalXRes, unsigned totalYRes) { - // Allocate memory for layer surfaces - m_memoryPool = new(std::nothrow) uint8_t[MEMORY_POOL_SIZE]; - if (NULL == m_memoryPool) - return ErrorLog("Insufficient memory for tilemap surfaces (need %1.1f MB).", float(MEMORY_POOL_SIZE) / 0x100000); - memset(m_memoryPool, 0, MEMORY_POOL_SIZE); // clear textures + // Resolution + m_xPixels = xRes; + m_yPixels = yRes; + m_xOffset = xOffset; + m_yOffset = yOffset; + m_totalXPixels = totalXRes; + m_totalYPixels = totalYRes; + m_correction = (UINT32)(((yRes / 384.f) * 2) + 0.5f); // for some reason the 2d layer is 2 pixels off the 3D - // Set up pointers to memory regions - m_topSurface = (uint32_t *) &m_memoryPool[OFFSET_TOP_SURFACE]; - m_bottomSurface = (uint32_t *) &m_memoryPool[OFFSET_BOTTOM_SURFACE]; - - // Resolution - m_xPixels = xRes; - m_yPixels = yRes; - m_xOffset = xOffset; - m_yOffset = yOffset; - m_totalXPixels = totalXRes; - m_totalYPixels = totalYRes; - m_correction = (UINT32)(((yRes / 384.f) * 2) + 0.5f); // for some reason the 2d layer is 2 pixels off the 3D - - DebugLog("Render2D initialized (allocated %1.1f MB)\n", float(MEMORY_POOL_SIZE) / 0x100000); - return OKAY; + return OKAY; } CRender2D::CRender2D(const Util::Config::Node& config) - : m_config(config), - m_vao(0) + : m_config(config), + m_vao(0), + m_vram(nullptr), + m_palette{nullptr}, + m_regs(nullptr) { - DebugLog("Built Render2D\n"); + DebugLog("Built Render2D\n"); - m_shader.LoadShaders(s_vertexShaderSource, s_fragmentShaderSource); - m_shader.GetUniformLocationMap("tex1"); - m_shader.EnableShader(); + m_shader.LoadShaders(s_vertexShaderSource, s_fragmentShaderSource); + m_shader.GetUniformLocationMap("tex1"); + m_shader.EnableShader(); - // update uniform memory - glUniform1i(m_shader.uniformLocMap["tex1"], 0); // bind to texture unit zero + // update uniform memory + glUniform1i(m_shader.uniformLocMap["tex1"], 0); // texture unit zero - m_shader.DisableShader(); + m_shader.DisableShader(); - // Create textures - glActiveTexture(GL_TEXTURE0); // texture unit 0 - glGenTextures(2, m_texID); + m_shaderTileGen.LoadShaders(s_vertexShaderTileGen, s_fragmentShaderTileGen); + m_shaderTileGen.GetUniformLocationMap("vram"); + m_shaderTileGen.GetUniformLocationMap("palette"); + m_shaderTileGen.GetUniformLocationMap("regs"); + m_shaderTileGen.GetUniformLocationMap("layerNumber"); + m_shaderTileGen.GetUniformLocationMap("lineStart"); + m_shaderTileGen.GetUniformLocationMap("lineEnd"); - for (int i = 0; i < 2; i++) - { - glBindTexture(GL_TEXTURE_2D, m_texID[i]); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, 496, 384, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); - } + m_shaderTileGen.EnableShader(); - glGenVertexArrays(1, &m_vao); - glBindVertexArray(m_vao); - // no states needed since we do it in the shader - glBindVertexArray(0); + glUniform1i(m_shaderTileGen.uniformLocMap["vram"], 0); // texture unit 0 + glUniform1i(m_shaderTileGen.uniformLocMap["palette"], 1); // texture unit 1 + glUniform1f(m_shaderTileGen.uniformLocMap["lineStart"], LineToPercentStart(0)); + glUniform1f(m_shaderTileGen.uniformLocMap["lineEnd"], LineToPercentEnd(383)); + + m_shaderTileGen.DisableShader(); + + glGenVertexArrays(1, &m_vao); + glBindVertexArray(m_vao); + // no states needed since we do it in the shader + glBindVertexArray(0); + + glGenTextures(1, &m_vramTexID); + glBindTexture(GL_TEXTURE_2D, m_vramTexID); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexImage2D(GL_TEXTURE_2D, 0, GL_R32UI, 512, 512, 0, GL_RED_INTEGER, GL_UNSIGNED_INT, nullptr); + + glGenTextures(1, &m_paletteTexID); + glBindTexture(GL_TEXTURE_2D, m_paletteTexID); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexImage2D(GL_TEXTURE_2D, 0, GL_R32UI, 128, 256, 0, GL_RED_INTEGER, GL_UNSIGNED_INT, nullptr); + + glBindTexture(GL_TEXTURE_2D, 0); + + m_fboBottom.Create(496, 384); + m_fboTop.Create(496, 384); } CRender2D::~CRender2D(void) { - m_shader.UnloadShaders(); - glDeleteTextures(2, m_texID); + m_shader.UnloadShaders(); + m_shaderTileGen.UnloadShaders(); - if (m_vao) { - glDeleteVertexArrays(1, &m_vao); - m_vao = 0; - } + if (m_vramTexID) { + glDeleteTextures(1, &m_vramTexID); + m_vramTexID = 0; + } - if (m_memoryPool) - { - delete [] m_memoryPool; - m_memoryPool = 0; - } + if (m_paletteTexID) { + glDeleteTextures(1, &m_paletteTexID); + m_paletteTexID = 0; + } - m_vram = 0; - m_topSurface = 0; - m_bottomSurface = 0; + if (m_vao) { + glDeleteVertexArrays(1, &m_vao); + m_vao = 0; + } - DebugLog("Destroyed Render2D\n"); + m_fboBottom.Destroy(); + m_fboTop.Destroy(); + + m_vram = nullptr; + + DebugLog("Destroyed Render2D\n"); +} + +bool CRender2D::IsEnabled(int layerNumber) +{ + return (m_regs[0x60 / 4 + layerNumber] & 0x80000000) > 0; +} + +bool CRender2D::Above3D(int layerNumber) +{ + return (m_regs[0x20 / 4] >> (8 + layerNumber)) & 0x1; } diff --git a/Src/Graphics/Render2D.h b/Src/Graphics/Render2D.h index ed04571..c13b644 100644 --- a/Src/Graphics/Render2D.h +++ b/Src/Graphics/Render2D.h @@ -1,12 +1,12 @@ /** ** Supermodel ** A Sega Model 3 Arcade Emulator. - ** Copyright 2011-2012 Bart Trzynadlowski, Nik Henson + ** Copyright 2011-2012 Bart Trzynadlowski, Nik Henson ** ** This file is part of Supermodel. ** ** Supermodel is free software: you can redistribute it and/or modify it under - ** the terms of the GNU General Public License as published by the Free + ** the terms of the GNU General Public License as published by the Free ** Software Foundation, either version 3 of the License, or (at your option) ** any later version. ** @@ -18,12 +18,12 @@ ** You should have received a copy of the GNU General Public License along ** with Supermodel. If not, see . **/ - -/* - * Render2D.h - * - * Header file defining the CRender2D class: OpenGL tile generator graphics. - */ + + /* + * Render2D.h + * + * Header file defining the CRender2D class: OpenGL tile generator graphics. + */ #ifndef INCLUDED_RENDER2D_H #define INCLUDED_RENDER2D_H @@ -31,181 +31,184 @@ #include #include "Util/NewConfig.h" #include "New3D/GLSLShader.h" +#include "FBO.h" - -/* - * CRender2D: - * - * Tile generator graphics engine. This must be constructed and initialized - * before being attached to any objects that want to make use of it. Apart from - * the constructor, all members assume that a global GL device - * context is available and that GL functions may be called. - */ + /* + * CRender2D: + * + * Tile generator graphics engine. This must be constructed and initialized + * before being attached to any objects that want to make use of it. Apart from + * the constructor, all members assume that a global GL device + * context is available and that GL functions may be called. + */ class CRender2D { public: - /* - * BeginFrame(void): - * - * Prepare to render a new frame. Must be called once per frame prior to - * drawing anything. - */ - void BeginFrame(void); + /* + * BeginFrame(void): + * + * Prepare to render a new frame. Must be called once per frame prior to + * drawing anything. + */ + void BeginFrame(void); - /* - * PreRenderFrame(void): - * - * Draws the all top layers (above 3D graphics) and bottom layers (below 3D - * graphics) but does not yet display them. May send data to the GPU. - */ - void PreRenderFrame(void); + /* + * PreRenderFrame(void): + * + * Draws the all top layers (above 3D graphics) and bottom layers (below 3D + * graphics) but does not yet display them. May send data to the GPU. + */ + void PreRenderFrame(void); - /* - * RenderFrameBottom(void): - * - * Overwrites the color buffer with bottom surface that was pre-rendered by - * the last call to PreRenderFrame(). - */ - void RenderFrameBottom(void); + /* + * RenderFrameBottom(void): + * + * Overwrites the color buffer with bottom surface that was pre-rendered by + * the last call to PreRenderFrame(). + */ + void RenderFrameBottom(void); - /* - * RenderFrameTop(void): - * - * Draws the top surface (if it exists) that was pre-rendered by the last - * call to PreRenderFrame(). Previously drawn graphics layers will be visible - * through transparent regions. - */ - void RenderFrameTop(void); - - /* - * EndFrame(void): - * - * Signals the end of rendering for this frame. Must be called last during - * the frame. - */ - void EndFrame(void); - - /* - * WriteVRAM(addr, data): - * - * Indicates what will be written next to the tile generator's RAM. The - * VRAM address must not have yet been updated, to allow the renderer to - * check for changes. Data is accepted in the same form as the tile - * generator: the MSB is what was written to addr+3. This function is - * intended to facilitate on-the-fly decoding of tiles and palette data. - * - * Parameters: - * addr Address in tile generator RAM. Caller must ensure it is - * clamped to the range 0x000000 to 0x11FFFF because this - * function does not. - * data The data to write. - */ - void WriteVRAM(unsigned addr, uint32_t data); - - /* - * AttachRegisters(regPtr): - * - * Attaches tile generator registers. This must be done prior to any - * rendering otherwise the program may crash with an access violation. - * - * Parameters: - * regPtr Pointer to the base of the tile generator registers. There - * are assumed to be 64 in all. - */ - void AttachRegisters(const uint32_t *regPtr); - - /* - * AttachPalette(palPtr): - * - * Attaches tile generator palettes. This must be done prior to any - * rendering. - * - * Parameters: - * palPtr Pointer to two palettes. The first is for layers A/A' and - * the second is for B/B'. - */ - void AttachPalette(const uint32_t *palPtr[2]); + /* + * RenderFrameTop(void): + * + * Draws the top surface (if it exists) that was pre-rendered by the last + * call to PreRenderFrame(). Previously drawn graphics layers will be visible + * through transparent regions. + */ + void RenderFrameTop(void); - /* - * AttachVRAM(vramPtr): - * - * Attaches tile generator RAM. This must be done prior to any rendering - * otherwise the program may crash with an access violation. - * - * Parameters: - * vramPtr Pointer to the base of the tile generator RAM (0x120000 - * bytes). VRAM is assumed to be in little endian format. - */ - void AttachVRAM(const uint8_t *vramPtr); + /* + * EndFrame(void): + * + * Signals the end of rendering for this frame. Must be called last during + * the frame. + */ + void EndFrame(void); + + /* + * WriteVRAM(addr, data): + * + * Indicates what will be written next to the tile generator's RAM. The + * VRAM address must not have yet been updated, to allow the renderer to + * check for changes. Data is accepted in the same form as the tile + * generator: the MSB is what was written to addr+3. This function is + * intended to facilitate on-the-fly decoding of tiles and palette data. + * + * Parameters: + * addr Address in tile generator RAM. Caller must ensure it is + * clamped to the range 0x000000 to 0x11FFFF because this + * function does not. + * data The data to write. + */ + void WriteVRAM(unsigned addr, uint32_t data); + + /* + * AttachRegisters(regPtr): + * + * Attaches tile generator registers. This must be done prior to any + * rendering otherwise the program may crash with an access violation. + * + * Parameters: + * regPtr Pointer to the base of the tile generator registers. There + * are assumed to be 64 in all. + */ + void AttachRegisters(const uint32_t* regPtr); + + /* + * AttachPalette(palPtr): + * + * Attaches tile generator palettes. This must be done prior to any + * rendering. + * + * Parameters: + * palPtr Pointer to two palettes. The first is for layers A/A' and + * the second is for B/B'. + */ + void AttachPalette(const uint32_t* palPtr[2]); + + /* + * AttachVRAM(vramPtr): + * + * Attaches tile generator RAM. This must be done prior to any rendering + * otherwise the program may crash with an access violation. + * + * Parameters: + * vramPtr Pointer to the base of the tile generator RAM (0x120000 + * bytes). VRAM is assumed to be in little endian format. + */ + void AttachVRAM(const uint8_t* vramPtr); + + /* + * Init(xOffset, yOffset, xRes, yRes, totalXRes, totalYRes); + * + * One-time initialization of the context. Must be called before any other + * members (meaning it should be called even before being attached to any + * other objects that want to use it). + * + * Parameters: + * xOffset X offset of the viewable area within OpenGL display + * surface, in pixels. + * yOffset Y offset. + * xRes Horizontal resolution of the viewable area. + * yRes Vertical resolution. + * totalXRes Horizontal resolution of the complete display area. + * totalYRes Vertical resolution. + * + * Returns: + * OKAY is successful, otherwise FAILED if a non-recoverable error + * occurred. Prints own error messages. + */ + bool Init(unsigned xOffset, unsigned yOffset, unsigned xRes, unsigned yRes, unsigned totalXRes, unsigned totalYRes); + + /* + * CRender2D(config): + * ~CRender2D(void): + * + * Constructor and destructor. + * + * Parameters: + * config Run-time configuration. + */ + CRender2D(const Util::Config::Node& config); + ~CRender2D(void); - /* - * Init(xOffset, yOffset, xRes, yRes, totalXRes, totalYRes); - * - * One-time initialization of the context. Must be called before any other - * members (meaning it should be called even before being attached to any - * other objects that want to use it). - * - * Parameters: - * xOffset X offset of the viewable area within OpenGL display - * surface, in pixels. - * yOffset Y offset. - * xRes Horizontal resolution of the viewable area. - * yRes Vertical resolution. - * totalXRes Horizontal resolution of the complete display area. - * totalYRes Vertical resolution. - * - * Returns: - * OKAY is successful, otherwise FAILED if a non-recoverable error - * occurred. Prints own error messages. - */ - bool Init(unsigned xOffset, unsigned yOffset, unsigned xRes, unsigned yRes, unsigned totalXRes, unsigned totalYRes); - - /* - * CRender2D(config): - * ~CRender2D(void): - * - * Constructor and destructor. - * - * Parameters: - * config Run-time configuration. - */ - CRender2D(const Util::Config::Node &config); - ~CRender2D(void); - private: - // Private member functions - std::pair DrawTilemaps(uint32_t *destBottom, uint32_t *destTop); - void DisplaySurface(int surface); - void Setup2D(bool isBottom); - - // Run-time configuration - const Util::Config::Node &m_config; - // Data received from tile generator device object - const uint32_t *m_vram; - const uint32_t *m_palette[2]; // palettes for A/A' and B/B' - const uint32_t *m_regs; - - // OpenGL data - GLuint m_texID[2]; // IDs for the 2 layer textures (top and bottom) - unsigned m_xPixels = 496; // display surface resolution - unsigned m_yPixels = 384; // ... - unsigned m_xOffset = 0; // offset - unsigned m_yOffset = 0; - unsigned m_totalXPixels; // total display surface resolution - unsigned m_totalYPixels; - unsigned m_correction = 0; - - GLuint m_vao; - GLSLShader m_shader; + bool IsEnabled (int layerNumber); + bool Above3D (int layerNumber); + void Setup2D (bool isBottom); + void DrawSurface (GLuint textureID); - // PreRenderFrame() tracks which surfaces exist in current frame - std::pair m_surfaces_present = std::pair(false, false); + float LineToPercentStart (int lineNumber); // vertical line numbers are from 0-383 + float LineToPercentEnd (int lineNumber); // vertical line numbers are from 0-383 + + // Run-time configuration + const Util::Config::Node& m_config; + + // Data received from tile generator device object + const uint32_t* m_vram; + const uint32_t* m_palette[2]; // palettes for A/A' and B/B' + const uint32_t* m_regs; + + // OpenGL data + unsigned m_xPixels = 496; // display surface resolution + unsigned m_yPixels = 384; // ... + unsigned m_xOffset = 0; // offset + unsigned m_yOffset = 0; + unsigned m_totalXPixels = 0; // total display surface resolution + unsigned m_totalYPixels = 0; + unsigned m_correction = 0; + + GLuint m_vao; + GLSLShader m_shader; + GLSLShader m_shaderTileGen; + + GLuint m_vramTexID = 0; + GLuint m_paletteTexID = 0; + + FBO m_fboBottom; + FBO m_fboTop; - // Buffers - uint8_t *m_memoryPool = 0; // all memory is allocated here - uint32_t *m_topSurface = 0; // 512x384x32bpp pixel surface for top layers - uint32_t *m_bottomSurface = 0; // bottom layers }; diff --git a/Src/Graphics/Shaders2D.h b/Src/Graphics/Shaders2D.h index 2536b9a..0c0c63f 100644 --- a/Src/Graphics/Shaders2D.h +++ b/Src/Graphics/Shaders2D.h @@ -71,4 +71,255 @@ static const char s_fragmentShaderSource[] = R"glsl( )glsl"; +// Vertex shader +static const char s_vertexShaderTileGen[] = R"glsl( + + #version 410 core + + uniform float lineStart; // defined as a % of the viewport height in the range 0-1. So 0 is top line, 0.5 is line 192 etc + uniform float lineEnd; + + void main(void) + { + const float v1 = -1.0; + const float v2 = 1.0; + + vec4 vertices[] = vec4[]( vec4(-1.0, v1, 0.0, 1.0), + vec4(-1.0, v2, 0.0, 1.0), + vec4( 1.0, v1, 0.0, 1.0), + vec4( 1.0, v2, 0.0, 1.0)); + + float top = ((v2 - v1) * lineStart) + v1; + float bottom = ((v2 - v1) * lineEnd ) + v1; + + vertices[0].y = top; + vertices[2].y = top; + vertices[1].y = bottom; + vertices[3].y = bottom; + + gl_Position = vertices[gl_VertexID % 4]; + } + + )glsl"; + +// Fragment shader +static const char s_fragmentShaderTileGen[] = R"glsl( + + #version 410 core + + //layout(origin_upper_left) in vec4 gl_FragCoord; + + // inputs + uniform usampler2D vram; // texture 512x512 + uniform usampler2D palette; // texture 128x256 - actual dimensions dont matter too much but we have to stay in the limits of max tex width/height, so can't have 1 giant 1d array + uniform uint regs[32]; + uniform int layerNumber; + + // outputs + out vec4 fragColor; + + ivec2 GetVRamCoords(int offset) + { + return ivec2(offset % 512, offset / 512); + } + + ivec2 GetPaletteCoords(int offset) + { + return ivec2(offset % 128, offset / 128); + } + + uint GetLineMask(int layerNum, int yCoord) + { + uint shift = (layerNum<2) ? 16u : 0u; // need to check this, we could be endian swapped so could be wrong + uint maskPolarity = ((layerNum & 1) > 0) ? 0xFFFFu : 0x0000u; + int index = (0xF7000 / 4) + yCoord; + + ivec2 coords = GetVRamCoords(index); + uint mask = ((texelFetch(vram,coords,0).r >> shift) & 0xFFFFu) ^ maskPolarity; + + return mask; + } + + bool GetPixelMask(int layerNum, int xCoord, int yCoord) + { + uint lineMask = GetLineMask(layerNum, yCoord); + uint maskTest = 1 << (15-(xCoord/32)); + + return (lineMask & maskTest) != 0; + } + + int GetLineScrollValue(int layerNum, int yCoord) + { + int index = ((0xF6000 + (layerNum * 0x400)) / 4) + (yCoord / 2); + int shift = (yCoord % 2) * 16; // double check this + + ivec2 coords = GetVRamCoords(index); + return int((texelFetch(vram,coords,0).r >> shift) & 0xFFFFu); + } + + int GetTileNumber(int xCoord, int yCoord, int xScroll, int yScroll) + { + int xIndex = ((xCoord + xScroll) / 8) & 0x3F; + int yIndex = ((yCoord + yScroll) / 8) & 0x3F; + + return (yIndex*64) + xIndex; + } + + int GetTileData(int layerNum, int tileNumber) + { + int addressBase = (0xF8000 + (layerNum * 0x2000)) / 4; + int offset = tileNumber / 2; // two tiles per 32bit word + int shift = (1 - (tileNumber % 2)) * 16; // triple check this + + ivec2 coords = GetVRamCoords(addressBase+offset); + uint data = (texelFetch(vram,coords,0).r >> shift) & 0xFFFFu; + + return int(data); + } + + int GetVFine(int yCoord, int yScroll) + { + return (yCoord + yScroll) & 7; + } + + int GetHFine(int xCoord, int xScroll) + { + return (xCoord + xScroll) & 7; + } + + // register data + bool LineScrollMode (int layerNum) { return (regs[0x60/4 + layerNum] & 0x8000) != 0; } + int GetHorizontalScroll(int layerNum) { return int(regs[0x60 / 4 + layerNum] &0x3FFu); } + int GetVerticalScroll (int layerNum) { return int((regs[0x60/4 + layerNum] >> 16) & 0x1FFu); } + int LayerPriority () { return int((regs[0x20/4] >> 8) & 0xFu); } + bool LayerIs4Bit (int layerNum) { return (regs[0x20/4] & (1 << (12 + layerNum))) != 0; } + bool LayerEnabled (int layerNum) { return (regs[0x60/4 + layerNum] & 0x80000000) != 0; } + bool LayerSelected (int layerNum) { return (LayerPriority() & (1 << layerNum)) == 0; } + + float Int8ToFloat(uint c) + { + if((c & 0x80u) > 0u) { // this is a bit harder in GLSL. Top bit means negative number, we extend to make 32bit + return float(int(c | 0xFFFFFF00u)) / 128.0; + } + else { + return float(c) / 127.0; + } + } + + vec4 AddColourOffset(int layerNum, vec4 colour) + { + uint offsetReg = regs[(0x40/4) + layerNum/2]; + + vec4 c; + c.b = Int8ToFloat((offsetReg >>16) & 0xFFu); + c.g = Int8ToFloat((offsetReg >> 8) & 0xFFu); + c.r = Int8ToFloat((offsetReg >> 0) & 0xFFu); + c.a = 0.0; + + colour += c; + return clamp(colour,0.0,1.0); // clamp is probably not needed since will get clamped on render target + } + + vec4 Int16ColourToVec4(uint colour) + { + uint alpha = (colour>>15); // top bit is alpha. 1 means clear, 0 opaque + alpha = ~alpha; // invert + alpha = alpha & 0x1u; // mask bit + + vec4 c; + c.r = float((colour >> 0 ) & 0x1F) / 31.0; + c.g = float((colour >> 5 ) & 0x1F) / 31.0; + c.b = float((colour >> 10) & 0x1F) / 31.0; + c.a = float(alpha) / 1.0; + + c.rgb *= c.a; // multiply by alpha value, this will push transparent to black, no branch needed + + return c; + } + + vec4 GetColour(int layerNum, int paletteOffset) + { + ivec2 coords = GetPaletteCoords(paletteOffset); + uint colour = texelFetch(palette,coords,0).r; + + vec4 col = Int16ColourToVec4(colour); // each colour is only 16bits, but occupies 32bits + + return AddColourOffset(layerNum,col); // apply colour offsets from registers + } + + vec4 Draw4Bit(int layerNum, int tileData, int hFine, int vFine) + { + // Tile pattern offset: each tile occupies 32 bytes when using 4-bit pixels (offset of tile pattern within VRAM) + int patternOffset = ((tileData & 0x3FFF) << 1) | ((tileData >> 15) & 1); + patternOffset *= 32; + patternOffset /= 4; + + // Upper color bits; the lower 4 bits come from the tile pattern + int paletteIndex = tileData & 0x7FF0; + + ivec2 coords = GetVRamCoords(patternOffset+vFine); + uint pattern = texelFetch(vram,coords,0).r; + pattern = (pattern >> ((7-hFine)*4)) & 0xFu; // get the pattern for our horizontal value + + return GetColour(layerNum, paletteIndex | int(pattern)); + } + + vec4 Draw8Bit(int layerNum, int tileData, int hFine, int vFine) + { + // Tile pattern offset: each tile occupies 64 bytes when using 8-bit pixels + int patternOffset = tileData & 0x3FFF; + patternOffset *= 64; + patternOffset /= 4; + + // Upper color bits + int paletteIndex = tileData & 0x7F00; + + // each read is 4 pixels + int offset = hFine / 4; + + ivec2 coords = GetVRamCoords(patternOffset+(vFine*2)+offset); // 8-bit pixels, each line is two words + uint pattern = texelFetch(vram,coords,0).r; + + pattern = (pattern >> ((3-(hFine%4))*8)) & 0xFFu; // shift out the bits we want for this pixel + + return GetColour(layerNum, paletteIndex | int(pattern)); + } + + void main() + { + ivec2 pos = ivec2(gl_FragCoord.xy); + + int scrollX; + if(LineScrollMode(layerNumber)) { + scrollX = GetLineScrollValue(layerNumber, pos.y); + } + else { + scrollX = GetHorizontalScroll(layerNumber); + } + + int scrollY = GetVerticalScroll(layerNumber); + int tileNumber = GetTileNumber(pos.x,pos.y,scrollX,scrollY); + int hFine = GetHFine(pos.x,scrollX); + int vFine = GetVFine(pos.y,scrollY); + bool pixelMask = GetPixelMask(layerNumber,pos.x,pos.y); + + if(pixelMask==true) { + + int tileData = GetTileData(layerNumber,tileNumber); + + if(LayerIs4Bit(layerNumber)) { + fragColor = Draw4Bit(layerNumber,tileData,hFine,vFine); + } + else { + fragColor = Draw8Bit(layerNumber,tileData,hFine,vFine); + } + } + else { + fragColor = vec4(0.0); + } + } + + )glsl"; + + #endif // INCLUDED_SHADERS2D_H diff --git a/VS2008/Supermodel.vcxproj b/VS2008/Supermodel.vcxproj index d755d81..23fa4bb 100644 --- a/VS2008/Supermodel.vcxproj +++ b/VS2008/Supermodel.vcxproj @@ -306,6 +306,7 @@ xcopy /D /Y "$(ProjectDir)..\Assets\*" "$(TargetDir)Assets" + @@ -478,6 +479,7 @@ xcopy /D /Y "$(ProjectDir)..\Assets\*" "$(TargetDir)Assets" + diff --git a/VS2008/Supermodel.vcxproj.filters b/VS2008/Supermodel.vcxproj.filters index e554f3d..a56ee68 100644 --- a/VS2008/Supermodel.vcxproj.filters +++ b/VS2008/Supermodel.vcxproj.filters @@ -467,6 +467,9 @@ Source Files\OSD\SDL + + Source Files\Graphics + @@ -847,6 +850,9 @@ Header Files\OSD\SDL + + Header Files\Graphics +