diff --git a/Src/Graphics/FBO.cpp b/Src/Graphics/FBO.cpp new file mode 100644 index 0000000..99f7a25 --- /dev/null +++ b/Src/Graphics/FBO.cpp @@ -0,0 +1,73 @@ +#include "FBO.h" + +FBO::FBO() : + m_frameBufferID(0), + m_textureID(0) +{ +} + +bool FBO::Create(int width, int height) +{ + CreateTexture(width, height); + + glGenFramebuffers(1, &m_frameBufferID); + glBindFramebuffer(GL_FRAMEBUFFER, m_frameBufferID); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_textureID, 0); + + auto frameBufferStatus = glCheckFramebufferStatus(GL_FRAMEBUFFER); + + glBindFramebuffer(GL_FRAMEBUFFER, 0); //created FBO now disable it + + return frameBufferStatus == GL_FRAMEBUFFER_COMPLETE; +} + +void FBO::Destroy() +{ + if (m_frameBufferID) { + glBindFramebuffer(GL_FRAMEBUFFER, 0); + glDeleteFramebuffers(1, &m_frameBufferID); + } + + if (m_textureID) { + glDeleteTextures(1, &m_textureID); + } + + m_frameBufferID = 0; + m_textureID = 0; +} + +void FBO::BindTexture() +{ + glBindTexture(GL_TEXTURE_2D, m_textureID); +} + +void FBO::Set() +{ + glBindFramebuffer(GL_FRAMEBUFFER, m_frameBufferID); +} + +void FBO::Disable() +{ + glBindFramebuffer(GL_FRAMEBUFFER, 0); +} + +GLuint FBO::GetFBOID() +{ + return m_frameBufferID; +} + +GLuint FBO::GetTextureID() +{ + return m_textureID; +} + +void FBO::CreateTexture(int width, int height) +{ + glGenTextures (1, &m_textureID); + glBindTexture (GL_TEXTURE_2D, m_textureID); + glTexParameteri (GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameteri (GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri (GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri (GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glTexImage2D (GL_TEXTURE_2D, 0, GL_RGBA8, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, 0); +} diff --git a/Src/Graphics/FBO.h b/Src/Graphics/FBO.h new file mode 100644 index 0000000..51ac979 --- /dev/null +++ b/Src/Graphics/FBO.h @@ -0,0 +1,28 @@ +#ifndef _FBO_H_ +#define _FBO_H_ + +#include + +class FBO +{ +public: + + FBO(); + + bool Create(int width, int height); + void Destroy(); + void BindTexture(); + void Set(); + void Disable(); + GLuint GetFBOID(); + GLuint GetTextureID(); + +private: + + void CreateTexture(int width, int height); + + GLuint m_frameBufferID; + GLuint m_textureID; +}; + +#endif \ No newline at end of file diff --git a/Src/Graphics/Render2D.cpp b/Src/Graphics/Render2D.cpp index 3e47ae4..29a7bba 100644 --- a/Src/Graphics/Render2D.cpp +++ b/Src/Graphics/Render2D.cpp @@ -19,260 +19,271 @@ ** with Supermodel. If not, see . **/ -/* - * Render2D.cpp - * - * Implementation of the CRender2D class: OpenGL tile generator graphics. - * - * To-Do List - * ---------- - * - Is there a universal solution to the 'ROLLING START' scrolling bug (Scud - * Race) and the scrolling text during Magical Truck Adventure's attract - * mode? To fix Scud Race, either the stencil mask or the h-scroll value must - * be shifted by 16 pixels. Magical Truck Adventure is similar but opposite. - * Perhaps this is a function of timing registers accessed via JTAG? - * - Is there a better way to handle the overscan regions in wide screen mode? - * Is clearing two thin viewports better than one big clear? - * - Are v-scroll values 9 or 10 bits? (Does it matter?) Lost World seems to - * have some scrolling issues. - * - A proper shut-down function is needed! OpenGL might not be available when - * the destructor for this class is called. - * - * Tile Generator Hardware Overview - * -------------------------------- - * - * Model 3's medium resolution tile generator hardware appears to be derived - * from the Model 2 and System 24 chipset, but is much simpler. It consists of - * four 64x64 tile layers, comprised of 8x8 pixel tiles, with configurable - * priorities. There may be additional features but so far, no known Model 3 - * games use them. - * - * VRAM is comprised of 1 MB for tile data and an additional 128 KB for the - * palette (each color occupies 32 bits). The four tilemap layers are referred - * to as: A (0), A' (1), B (2), and B' (3). Palette RAM may be located on a - * separate RAM IC. - * - * Registers - * --------- - * - * Registers are listed by their byte offset in the PowerPC address space. Each - * is 32 bits wide and little endian. Only those registers relevant to - * rendering are listed here (see CTileGen for others). - * - * Offset: Description: - * - * 0x20 Layer configuration - * 0x40 Layer A/A' color offset - * 0x44 Layer B/B' color offset - * 0x60 Layer A scroll - * 0x64 Layer A' scroll - * 0x68 Layer B scroll - * 0x6C Layer B' scroll - * - * Layer configuration is formatted as: - * - * 31 0 - * ???? ???? ???? ???? pqrs tuvw ???? ???? - * - * Bits 'pqrs' control the color depth of layers B', B, A', and A, - * respectively. If set, the layer's pattern data is encoded as 4 bits, - * otherwise the pixels are 8 bits. - * - * Bits 'tuvw' control priority for layers B', B, A', and A, respectively, - * which is also the relative ordering of the layers from bottom to top. For - * each layer, if its bit is clear, it will be drawn below the 3D layer, - * otherwise it is drawn on top. - * - * The remaining registers are described where appropriate further below. - * - * VRAM Memory Map - * --------------- - * - * The lower 1 MB of VRAM is used for storing tiles, per-line horizontal scroll - * values, and the stencil mask, which determines which of each pair of layers - * is displayed on a given line and column. - * - * 00000-F5FFF Tile pattern data - * F6000-F63FF Layer A horizontal scroll table (512 lines) - * F6400-F67FF Layer A' horizontal scroll table - * F6800-F6BFF Layer B horizontal scroll table - * F6C00-F6FFF Layer B' horizontal scroll table - * F7000-F77FF Mask table (assuming 4 bytes per line, 512 lines) - * F7800-F7FFF ? - * F8000-F9FFF Layer A name table - * FA000-FBFFF Layer A' name table - * FC000-FDFFF Layer B name table - * FE000-FFFFF Layer B' name table - * - * Tiles may actually address the entire 1 MB space, although in practice, - * that would conflict with the other fixed memory regions. - * - * Palette - * ------- - * - * The palette stores 32768 colors. Each entry is a little endian 32-bit word. - * The upper 16 bits are unused and the lower 16 bits contain the color: - * - * 15 0 - * tbbb bbgg gggr rrrr - * - * The 't' bit is for transparency. When set, pixels of that color are - * transparent, unless they are the bottom-most layer. - * - * Tile Name Table and Pattern Layout - * ---------------------------------- - * - * The name table is a 64x64 array of 16-bit words serving as indices for tile - * pattern data and the palette. The first 64 words correspond to the first - * row of tiles, the next 64 to the second row, etc. Although 64x64 entries - * describes a 512x512 pixel screen, only the upper-left 62x48 tiles are - * visible when the vertical and horizontal scroll values are 0. Scrolling - * moves the 496x384 pixel 'window' around, with individual wrapping of the - * two axes. - * - * The data is actually arranged in 32-bit chunks in little endian format, so - * that tiles 0, 1, 2, and 3 will be stored as 1, 0, 3, 2. Fetching two name - * table entries as a single 32-bit word places the left tile in the high 16 - * bits and the right tile in the low 16 bits. - * - * The format of a name table entry in 4-bit color mode is: - * - * 15 0 - * jkpp pppp pppp iiii - * - * The pattern index is '0ppp pppp pppi iiij'. Multiplying by 32 yields the - * offset in VRAM at which the tile pattern data is stored. Note that the MSB - * of the name table entry becomes the LSB of the pattern index. This allows - * for 32768 4-bit tile patterns, each occupying 32 bytes, which means the - * whole 1 MB VRAM space can be addressed. - * - * The 4-bit pattern data is stored as 8 32-bit words. Each word stores a row - * of 8 pixels: - * - * 31 0 - * aaaa bbbb cccc dddd eeee ffff gggg hhhh - * - * 'a' is the left-most pixel data. These 4-bit values are combined with bits - * from the name table to form a palette index, which determines the final - * color. For example, for pixel 'a', the 15-bit color index is: - * - * 14 0 - * kpp pppp pppp aaaa - * - * Note that index bits are re-used to form the palette index, meaning that - * the pattern address partly determines the color. - * - * In 8-bit color mode, the name table entry looks like: - * - * 15 0 - * ?ppp pppp iiii iiii - * - * The low 15 'p' and 'i' bits together form the pattern index, which must be - * multiplied by 64 to get the offset. The pattern data now consists of 16 32- - * bit words, each containing four 8-bit pixels: - * - * 31 0 - * aaaa aaaa bbbb bbbb cccc cccc dddd dddd - * - * 'a' is the left-most pixel. Each line is therefore comprised of two 32-bit - * words. The palette index for pixel 'a' is now formed from: - * - * 14 0 - * ppp pppp aaaa aaaa - * - * Stencil Mask - * ------------ - * - * For any pixel position, there are in fact only two visible layers, despite - * there being four defined layers. The layers are grouped in pairs: A (the - * 'primary' layer) and A' (the 'alternate') form one pair, and B and B' form - * the other. Only one of the primary or alternate layers from each group may - * be visible at a given position. The 'stencil mask' controls this. - * - * The mask table is a bit field organized into 512 (or 384?) lines with each - * bit controlling four columns (32 pixels). The mask does not appear to be - * affected by scrolling -- that is, it does not scroll with the underlying - * tiles, which do so independently. The mask remains fixed. - * - * Each mask entry is a little endian 32-bit word. The high 16 bits control - * A/A' and the low 16 bits control B/B'. Each word controls an entire line - * (32 pixels per bit, 512 pixels per 16-bit line mask, where the first 16 - * pixels are allocated to the overscan region.) If a bit is set to 1, the - * pixel from the primary layer is used, otherwise the alternate layer is - * used when the mask is 0. It is important to remember that the layers may - * have been scrolled independently. The mask operates on the final resultant - * two pixels that are determined for each location. - * - * Example of a line mask: - * - * 31 15 0 - * 0111 0000 0000 1111 0000 0000 1111 1111 - * - * These settings would display layer A' for the first 32 pixels of the line, - * followed by layer A for the next 96 pixels, A' for the subsequent 256 - * pixels, and A for the final 128 pixels. The first 256 pixels of the line - * would display layer B' and the second 256 pixels would be from layer B. - * - * The stencil mask does not affect layer priorities, which are managed - * separately regardless of mask settings. - * - * Scrolling - * --------- - * - * Each of the four layers can be scrolled independently. Vertical scroll - * values are stored in the appropriate scroll register and horizontal scroll - * values can be sourced either from the register (in which case the entire - * layer will be scrolled uniformly) or from a table in VRAM (which contains - * independent values for each line). - * - * The scroll registers are laid out as: - * - * 31 0 - * e??? ???y yyyy yyyy h??? ??xx xxxx xxxx - * - * The 'e' bit enables the layer when set. The 'y' bits comprise a vertical - * scroll value in pixels. The 'x' bits form a horizontal scroll value. If 'h' - * is set, then the VRAM table (line-by-line scrolling) is used, otherwise the - * 'x' values are applied to every line. It is also possible that the scroll - * values use more or less bits, but probably no more than 1. - * - * Each line must be wrapped back to the beginning of the same line. Likewise, - * vertical scrolling wraps around back to the top of the tilemap. - * - * The horizontal scroll table is a series of 16-bit little endian words, one - * for each line beginning at 0. It appears all the values can be used for - * scrolling (no control bits have been observed). The number of bits actually - * used by the hardware is irrelevant -- wrapping has the effect of making - * higher order bits unimportant. - * - * Layer Priorities - * ---------------- - * - * The layer control register (0x20) contains 4 bits that appear to control - * layer priorities. It is assumed that the 3D graphics, output by the Real3D - * pixel processors independently of the tile generator, constitute their own - * 'layer' and that the 2D tilemaps appear in front or behind. There may be a - * specific function for each priority bit or the field may be interpreted as a - * single 4-bit value denoting preset layer orders. - * - * Color Offsets - * ------------- - * - * Color offsets can be applied to the final RGB color value of every pixel. - * This is used for effects such as fading to a certain color, lightning (Lost - * World), etc. The current best guess is that the two registers control each - * pair (A/A' and B/B') of layers. The format appears to be: - * - * 31 0 - * ???? ???? rrrr rrrr gggg gggg bbbb bbbb - * - * Where 'r', 'g', and 'b' appear to be signed 8-bit color offsets. Because - * they exceed the color resolution of the palette, they must be scaled - * appropriately. - * - * Color offset registers are handled in TileGen.cpp. Two palettes are computed - * -- one for A/A' and another for B/B'. These are passed to the renderer. - */ + /* + * Render2D.cpp + * + * Implementation of the CRender2D class: OpenGL tile generator graphics. + * + * To-Do List + * ---------- + * - Is there a universal solution to the 'ROLLING START' scrolling bug (Scud + * Race) and the scrolling text during Magical Truck Adventure's attract + * mode? To fix Scud Race, either the stencil mask or the h-scroll value must + * be shifted by 16 pixels. Magical Truck Adventure is similar but opposite. + * Perhaps this is a function of timing registers accessed via JTAG? + * - Is there a better way to handle the overscan regions in wide screen mode? + * Is clearing two thin viewports better than one big clear? + * - Are v-scroll values 9 or 10 bits? (Does it matter?) Lost World seems to + * have some scrolling issues. + * - A proper shut-down function is needed! OpenGL might not be available when + * the destructor for this class is called. + * + * Tile Generator Hardware Overview + * -------------------------------- + * + * Model 3's medium resolution tile generator hardware appears to be derived + * from the Model 2 and System 24 chipset, but is much simpler. It consists of + * four 64x64 tile layers, comprised of 8x8 pixel tiles, with configurable + * priorities. There may be additional features but so far, no known Model 3 + * games use them. + * + * VRAM is comprised of 1 MB for tile data and an additional 128 KB for the + * palette (each color occupies 32 bits). The four tilemap layers are referred + * to as: A (0), A' (1), B (2), and B' (3). Palette RAM may be located on a + * separate RAM IC. + * + * Registers + * --------- + + 0xF1180020: -------- -------- -------- -------- ? + -------- -------- x------- -------- Layer 3 bitdepth (0 = 8-bit, 1 = 4-bit) + -------- -------- -x------ -------- Layer 2 bitdepth (0 = 8-bit, 1 = 4-bit) + -------- -------- --x----- -------- Layer 1 bitdepth (0 = 8-bit, 1 = 4-bit) + -------- -------- ---x---- -------- Layer 0 bitdepth (0 = 8-bit, 1 = 4-bit) + -------- -------- ----x--- -------- Layer 3 priority (0 = below 3D, 1 = above 3D) + -------- -------- -----x-- -------- Layer 2 priority (0 = below 3D, 1 = above 3D) + -------- -------- ------x- -------- Layer 1 priority (0 = below 3D, 1 = above 3D) + -------- -------- -------x -------- Layer 0 priority (0 = below 3D, 1 = above 3D) + + 0xF1180040: Foreground layer color modulation + -------- xxxxxxxx -------- -------- Red component + -------- -------- xxxxxxxx -------- Green component + -------- -------- -------- xxxxxxxx Blue component + + 0xF1180044: Background layer color modulation + -------- xxxxxxxx -------- -------- Red component + -------- -------- xxxxxxxx -------- Green component + -------- -------- -------- xxxxxxxx Blue component + + 0xF1180060: x------- -------- -------- -------- Layer 0 enable + -------x xxxxxxxx -------- -------- Layer 0 Y scroll position + -------- -------- x------- -------- Layer 0 X line scroll enable + -------- -------- -------x xxxxxxxx Layer 0 X scroll position + + 0xF1180064: x------- -------- -------- -------- Layer 1 enable + -------x xxxxxxxx -------- -------- Layer 1 Y scroll position + -------- -------- x------- -------- Layer 1 X line scroll enable + -------- -------- -------x xxxxxxxx Layer 1 X scroll position + + 0xF1180068: x------- -------- -------- -------- Layer 2 enable + -------x xxxxxxxx -------- -------- Layer 2 Y scroll position + -------- -------- x------- -------- Layer 2 X line scroll enable + -------- -------- -------x xxxxxxxx Layer 2 X scroll position + + 0xF118006C: x------- -------- -------- -------- Layer 3 enable + -------x xxxxxxxx -------- -------- Layer 3 Y scroll position + -------- -------- x------- -------- Layer 3 X line scroll enable + -------- -------- -------x xxxxxxxx Layer 3 X scroll position + + * + * VRAM Memory Map + * --------------- + * + * The lower 1 MB of VRAM is used for storing tiles, per-line horizontal scroll + * values, and the stencil mask, which determines which of each pair of layers + * is displayed on a given line and column. + * + * 00000-F5FFF Tile pattern data + * F6000-F63FF Layer A horizontal scroll table (512 lines) + * F6400-F67FF Layer A' horizontal scroll table + * F6800-F6BFF Layer B horizontal scroll table + * F6C00-F6FFF Layer B' horizontal scroll table + * F7000-F77FF Mask table (assuming 4 bytes per line, 512 lines) + * F7800-F7FFF ? + * F8000-F9FFF Layer A name table + * FA000-FBFFF Layer A' name table + * FC000-FDFFF Layer B name table + * FE000-FFFFF Layer B' name table + * + * Tiles may actually address the entire 1 MB space, although in practice, + * that would conflict with the other fixed memory regions. + * + * Palette + * ------- + * + * The palette stores 32768 colors. Each entry is a little endian 32-bit word. + * The upper 16 bits are unused and the lower 16 bits contain the color: + * + * 15 0 + * tbbb bbgg gggr rrrr + * + * The 't' bit is for transparency. When set, pixels of that color are + * transparent, unless they are the bottom-most layer. + * + * Tile Name Table and Pattern Layout + * ---------------------------------- + * + * The name table is a 64x64 array of 16-bit words serving as indices for tile + * pattern data and the palette. The first 64 words correspond to the first + * row of tiles, the next 64 to the second row, etc. Although 64x64 entries + * describes a 512x512 pixel screen, only the upper-left 62x48 tiles are + * visible when the vertical and horizontal scroll values are 0. Scrolling + * moves the 496x384 pixel 'window' around, with individual wrapping of the + * two axes. + * + * The data is actually arranged in 32-bit chunks in little endian format, so + * that tiles 0, 1, 2, and 3 will be stored as 1, 0, 3, 2. Fetching two name + * table entries as a single 32-bit word places the left tile in the high 16 + * bits and the right tile in the low 16 bits. + * + * The format of a name table entry in 4-bit color mode is: + * + * 15 0 + * jkpp pppp pppp iiii + * + * The pattern index is '0ppp pppp pppi iiij'. Multiplying by 32 yields the + * offset in VRAM at which the tile pattern data is stored. Note that the MSB + * of the name table entry becomes the LSB of the pattern index. This allows + * for 32768 4-bit tile patterns, each occupying 32 bytes, which means the + * whole 1 MB VRAM space can be addressed. + * + * The 4-bit pattern data is stored as 8 32-bit words. Each word stores a row + * of 8 pixels: + * + * 31 0 + * aaaa bbbb cccc dddd eeee ffff gggg hhhh + * + * 'a' is the left-most pixel data. These 4-bit values are combined with bits + * from the name table to form a palette index, which determines the final + * color. For example, for pixel 'a', the 15-bit color index is: + * + * 14 0 + * kpp pppp pppp aaaa + * + * Note that index bits are re-used to form the palette index, meaning that + * the pattern address partly determines the color. + * + * In 8-bit color mode, the name table entry looks like: + * + * 15 0 + * ?ppp pppp iiii iiii + * + * The low 15 'p' and 'i' bits together form the pattern index, which must be + * multiplied by 64 to get the offset. The pattern data now consists of 16 32- + * bit words, each containing four 8-bit pixels: + * + * 31 0 + * aaaa aaaa bbbb bbbb cccc cccc dddd dddd + * + * 'a' is the left-most pixel. Each line is therefore comprised of two 32-bit + * words. The palette index for pixel 'a' is now formed from: + * + * 14 0 + * ppp pppp aaaa aaaa + * + * Stencil Mask + * ------------ + * + * For any pixel position, there are in fact only two visible layers, despite + * there being four defined layers. The layers are grouped in pairs: A (the + * 'primary' layer) and A' (the 'alternate') form one pair, and B and B' form + * the other. Only one of the primary or alternate layers from each group may + * be visible at a given position. The 'stencil mask' controls this. + * + * The mask table is a bit field organized into 512 (or 384?) lines with each + * bit controlling four columns (32 pixels). The mask does not appear to be + * affected by scrolling -- that is, it does not scroll with the underlying + * tiles, which do so independently. The mask remains fixed. + * + * Each mask entry is a little endian 32-bit word. The high 16 bits control + * A/A' and the low 16 bits control B/B'. Each word controls an entire line + * (32 pixels per bit, 512 pixels per 16-bit line mask, where the first 16 + * pixels are allocated to the overscan region.) If a bit is set to 1, the + * pixel from the primary layer is used, otherwise the alternate layer is + * used when the mask is 0. It is important to remember that the layers may + * have been scrolled independently. The mask operates on the final resultant + * two pixels that are determined for each location. + * + * Example of a line mask: + * + * 31 15 0 + * 0111 0000 0000 1111 0000 0000 1111 1111 + * + * These settings would display layer A' for the first 32 pixels of the line, + * followed by layer A for the next 96 pixels, A' for the subsequent 256 + * pixels, and A for the final 128 pixels. The first 256 pixels of the line + * would display layer B' and the second 256 pixels would be from layer B. + * + * The stencil mask does not affect layer priorities, which are managed + * separately regardless of mask settings. + * + * Scrolling + * --------- + * + * Each of the four layers can be scrolled independently. Vertical scroll + * values are stored in the appropriate scroll register and horizontal scroll + * values can be sourced either from the register (in which case the entire + * layer will be scrolled uniformly) or from a table in VRAM (which contains + * independent values for each line). + * + * The scroll registers are laid out as: + * + * 31 0 + * e??? ???y yyyy yyyy h??? ??xx xxxx xxxx + * + * The 'e' bit enables the layer when set. The 'y' bits comprise a vertical + * scroll value in pixels. The 'x' bits form a horizontal scroll value. If 'h' + * is set, then the VRAM table (line-by-line scrolling) is used, otherwise the + * 'x' values are applied to every line. It is also possible that the scroll + * values use more or less bits, but probably no more than 1. + * + * Each line must be wrapped back to the beginning of the same line. Likewise, + * vertical scrolling wraps around back to the top of the tilemap. + * + * The horizontal scroll table is a series of 16-bit little endian words, one + * for each line beginning at 0. It appears all the values can be used for + * scrolling (no control bits have been observed). The number of bits actually + * used by the hardware is irrelevant -- wrapping has the effect of making + * higher order bits unimportant. + * + * Layer Priorities + * ---------------- + * + * The layer control register (0x20) contains 4 bits that appear to control + * layer priorities. It is assumed that the 3D graphics, output by the Real3D + * pixel processors independently of the tile generator, constitute their own + * 'layer' and that the 2D tilemaps appear in front or behind. There may be a + * specific function for each priority bit or the field may be interpreted as a + * single 4-bit value denoting preset layer orders. + * + * Color Offsets + * ------------- + * + * Color offsets can be applied to the final RGB color value of every pixel. + * This is used for effects such as fading to a certain color, lightning (Lost + * World), etc. The current best guess is that the two registers control each + * pair (A/A' and B/B') of layers. The format appears to be: + * + * 31 0 + * ???? ???? rrrr rrrr gggg gggg bbbb bbbb + * + * Where 'r', 'g', and 'b' appear to be signed 8-bit color offsets. Because + * they exceed the color resolution of the palette, they must be scaled + * appropriately. + * + * Color offset registers are handled in TileGen.cpp. Two palettes are computed + * -- one for A/A' and another for B/B'. These are passed to the renderer. + */ #include "Render2D.h" @@ -284,258 +295,59 @@ #include -/****************************************************************************** - Definitions and Constants -******************************************************************************/ - -// Shader program files (for use in development builds only) -#define VERTEX_2D_SHADER_FILE "Src/Graphics/Vertex2D.glsl" -#define FRAGMENT_2D_SHADER_FILE "Src/Graphics/Fragment2D.glsl" - - -/****************************************************************************** - Layer Rendering - - This code is quite slow and badly needs to be optimized. Dirty rectangles - should be implemented first and tile pre-decoding second. -******************************************************************************/ - -template -static inline void DrawTileLine(uint32_t *line, int pixelOffset, uint16_t tile, int patternLine, const uint32_t *vram, const uint32_t *palette, uint16_t mask) -{ - static_assert(bits == 4 || bits == 8, "Tiles are either 4- or 8-bit"); - - // For 8-bit pixels, each line of tile pattern is two words - if (bits == 8) - patternLine *= 2; - - // Compute offset of pattern for this line - int patternOffset; - if (bits == 4) - { - patternOffset = ((tile & 0x3FFF) << 1) | ((tile >> 15) & 1); - patternOffset *= 32; - patternOffset /= 4; - } - else - { - patternOffset = tile & 0x3FFF; - patternOffset *= 64; - patternOffset /= 4; - } - - // Name table entry provides high color bits - uint32_t colorHi = tile & ((bits == 4) ? 0x7FF0 : 0x7F00); - - // Draw - if (bits == 4) - { - uint32_t pattern = vram[patternOffset + patternLine]; - for (int p = 7; p >= 0; p--) - { - if (!clip || (/*pixelOffset >= 0 &&*/ (unsigned int)pixelOffset < 496u)) // the >= 0 check is accounted for, as the cast to uint makes them appear as very large unsigned values - { - uint16_t maskTest = 1 << (15-((pixelOffset+0)/32)); - bool visible = (mask & maskTest) != 0; - uint32_t pixel = visible ? palette[((pattern >> (p*4)) & 0xF) | colorHi] : 0; - if (!alphaTest || (visible && (pixel >> 24) != 0)) // only draw opaque pixels - line[pixelOffset] = pixel; - } - ++pixelOffset; - } - } - else - { - for (int i = 0; i < 2; i++) // 4 pixels per word - { - uint32_t pattern = vram[patternOffset + patternLine + i]; - for (int p = 3; p >= 0; p--) - { - if (!clip || (/*pixelOffset >= 0 &&*/ (unsigned int)pixelOffset < 496u)) // the >= 0 check is accounted for, as the cast to uint makes them appear as very large unsigned values - { - uint16_t maskTest = 1 << (15-((pixelOffset+0)/32)); - bool visible = (mask & maskTest) != 0; - uint32_t pixel = visible ? palette[((pattern >> (p*8)) & 0xFF) | colorHi] : 0; - if (!alphaTest || (visible && (pixel >> 24) != 0)) - line[pixelOffset] = pixel; - } - ++pixelOffset; - } - } - } -} - -template -static void DrawLayer(uint32_t *pixels, int layerNum, const uint32_t *vram, const uint32_t *regs, const uint32_t *palette) -{ - const uint16_t *nameTableBase = (const uint16_t *) &vram[(0xF8000 + layerNum * 0x2000) / 4]; - const uint16_t *hScrollTable = (const uint16_t *) &vram[(0xF6000 + layerNum * 0x400) / 4]; - bool lineScrollMode = (regs[0x60/4 + layerNum] & 0x8000) != 0; - int hFullScroll = regs[0x60/4 + layerNum] & 0x3FF; - int vScroll = (regs[0x60/4 + layerNum] >> 16) & 0x1FF; - - const uint16_t *maskTable = (const uint16_t *) &vram[0xF7000 / 4]; - if (layerNum < 2) // little endian: layers A and A' use second word in each pair - maskTable += 1; - - // If mask bit is clear, alternate layer is shown. We want to test for non- - // zero, so we flip the mask when drawing alternate layers (layers 1 and 3). - const uint16_t maskPolarity = (layerNum & 1) ? 0xFFFF : 0x0000; - - uint32_t *line = pixels; - - for (int y = 0; y < 384; y++) - { - int hScroll = (lineScrollMode ? hScrollTable[y] : hFullScroll) & 0x1FF; - int hTile = hScroll / 8; - int hFine = hScroll & 7; // horizontal pixel offset within tile line - int vFine = (y + vScroll) & 7; // vertical pixel offset within 8x8 tile - const uint16_t *nameTable = &nameTableBase[(64 * ((y + vScroll) / 8)) & 0xFFF]; // clamp to 64x64 = 0x1000 - uint16_t mask = *maskTable ^ maskPolarity; // each bit covers 32 pixels - - int pixelOffset = -hFine; - int extraTile = (hFine != 0) ? 1 : 0; // h-scrolling requires part of 63rd tile - - // First tile may be clipped - DrawTileLine(line, pixelOffset, nameTable[(hTile ^ 1) & 63], vFine, vram, palette, mask); - ++hTile; - pixelOffset += 8; - // Middle tiles will not be clipped - for (int tx = 1; tx < (62 - 1 + extraTile); tx++) - { - DrawTileLine(line, pixelOffset, nameTable[(hTile ^ 1) & 63], vFine, vram, palette, mask); - ++hTile; - pixelOffset += 8; - } - // Last tile may be clipped - DrawTileLine(line, pixelOffset, nameTable[(hTile ^ 1) & 63], vFine, vram, palette, mask); - ++hTile; - pixelOffset += 8; - - // Advance one line - maskTable += 2; - line += 496; - } -} - -std::pair CRender2D::DrawTilemaps(uint32_t *pixelsBottom, uint32_t *pixelsTop) -{ - unsigned priority = (m_regs[0x20/4] >> 8) & 0xF; - - // Render bottom layers - bool noBottomSurface = true; - static const int bottomOrder[4] = { 3, 2, 1, 0 }; - for (int i = 0; i < 4; i++) - { - int layerNum = bottomOrder[i]; - bool is4Bit = (m_regs[0x20/4] & (1 << (12 + layerNum))) != 0; - bool enabled = (m_regs[0x60/4 + layerNum] & 0x80000000) != 0; - bool selected = (priority & (1 << layerNum)) == 0; - if (enabled && selected) - { - if (noBottomSurface) - { - if (is4Bit) - DrawLayer<4, false>(pixelsBottom, layerNum, m_vram, m_regs, m_palette[layerNum / 2]); - else - DrawLayer<8, false>(pixelsBottom, layerNum, m_vram, m_regs, m_palette[layerNum / 2]); - } - else - { - if (is4Bit) - DrawLayer<4, true>(pixelsBottom, layerNum, m_vram, m_regs, m_palette[layerNum / 2]); - else - DrawLayer<8, true>(pixelsBottom, layerNum, m_vram, m_regs, m_palette[layerNum / 2]); - } - noBottomSurface = false; - } - } - - // Render top layers - // NOTE: layer ordering is different according to MAME (which has 3, 2, 0, 1 - // for top layer). Until I see evidence that this is correct and not a typo, - // I will assume consistent layer ordering. - bool noTopSurface = true; - static const int topOrder[4] = { 3, 2, 1, 0 }; - for (int i = 0; i < 4; i++) - { - int layerNum = topOrder[i]; - bool is4Bit = (m_regs[0x20/4] & (1 << (12 + layerNum))) != 0; - bool enabled = (m_regs[0x60/4 + layerNum] & 0x80000000) != 0; - bool selected = (priority & (1 << layerNum)) != 0; - if (enabled && selected) - { - if (noTopSurface) - { - if (is4Bit) - DrawLayer<4, false>(pixelsTop, layerNum, m_vram, m_regs, m_palette[layerNum / 2]); - else - DrawLayer<8, false>(pixelsTop, layerNum, m_vram, m_regs, m_palette[layerNum / 2]); - } - else - { - if (is4Bit) - DrawLayer<4, true>(pixelsTop, layerNum, m_vram, m_regs, m_palette[layerNum / 2]); - else - DrawLayer<8, true>(pixelsTop, layerNum, m_vram, m_regs, m_palette[layerNum / 2]); - } - noTopSurface = false; - } - } - - // Indicate whether top and bottom surfaces have to be rendered - return std::pair(!noTopSurface, !noBottomSurface); -} - - /****************************************************************************** Frame Display Functions ******************************************************************************/ -// Draws a surface to the screen (0 is top and 1 is bottom) -void CRender2D::DisplaySurface(int surface) -{ - // Shader program - m_shader.EnableShader(); - - glBindVertexArray(m_vao); - - // Draw the surface - glActiveTexture(GL_TEXTURE0); // texture unit 0 - glBindTexture(GL_TEXTURE_2D, m_texID[surface]); - - glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); - - glBindVertexArray(0); - - m_shader.DisableShader(); -} - // Set up viewport and OpenGL state for 2D rendering (sets up blending function but disables blending) void CRender2D::Setup2D(bool isBottom) { - glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); // alpha of 1.0 is opaque, 0 is transparent - glDisable(GL_BLEND); + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); // alpha of 1.0 is opaque, 0 is transparent - // Disable Z-buffering - glDisable(GL_DEPTH_TEST); + // Disable Z-buffering + glDisable(GL_DEPTH_TEST); - // Clear everything if requested or just overscan areas for wide screen mode - if (isBottom) - { - glClearColor(0.0, 0.0, 0.0, 0.0); - glViewport(0, 0, m_totalXPixels, m_totalYPixels); - glDisable(GL_SCISSOR_TEST); // scissor is enabled to fix the 2d/3d miss match problem - glClear(GL_COLOR_BUFFER_BIT); // we want to clear outside the scissored areas so must disable it - glEnable(GL_SCISSOR_TEST); - } + // Clear everything if requested or just overscan areas for wide screen mode + if (isBottom) + { + glClearColor(0.0, 0.0, 0.0, 0.0); + glViewport (0, 0, m_totalXPixels, m_totalYPixels); + glDisable (GL_SCISSOR_TEST); // scissor is enabled to fix the 2d/3d miss match problem + glClear (GL_COLOR_BUFFER_BIT); // we want to clear outside the scissored areas so must disable it + glEnable (GL_SCISSOR_TEST); + } - // Set up the viewport and orthogonal projection - bool stretchBottom = m_config["WideBackground"].ValueAs() && isBottom; - if (!stretchBottom) - { - glViewport(m_xOffset - m_correction, m_yOffset + m_correction, m_xPixels, m_yPixels); //Preserve aspect ratio of tile layer by constraining and centering viewport - } + // Set up the viewport and orthogonal projection + bool stretchBottom = m_config["WideBackground"].ValueAs() && isBottom; + if (!stretchBottom) + { + glViewport(m_xOffset - m_correction, m_yOffset + m_correction, m_xPixels, m_yPixels); //Preserve aspect ratio of tile layer by constraining and centering viewport + } +} + +void CRender2D::DrawSurface(GLuint textureID) +{ + m_shader.EnableShader(); + + glEnable (GL_BLEND); + glBindVertexArray (m_vao); + glActiveTexture (GL_TEXTURE0); // texture unit 0 + glBindTexture (GL_TEXTURE_2D, textureID); + glDrawArrays (GL_TRIANGLE_STRIP, 0, 4); + glBindVertexArray (0); + glDisable (GL_BLEND); + + m_shader.DisableShader(); +} + +float CRender2D::LineToPercentStart(int lineNumber) +{ + return lineNumber / 384.0f; +} + +float CRender2D::LineToPercentEnd(int lineNumber) +{ + return (lineNumber + 1) / 384.0f; } void CRender2D::BeginFrame(void) @@ -544,38 +356,80 @@ void CRender2D::BeginFrame(void) void CRender2D::PreRenderFrame(void) { - // Update all layers - m_surfaces_present = DrawTilemaps(m_bottomSurface, m_topSurface); - glActiveTexture(GL_TEXTURE0); // texture unit 0 - if (m_surfaces_present.first) - { - glBindTexture(GL_TEXTURE_2D, m_texID[0]); - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 496, 384, GL_RGBA, GL_UNSIGNED_BYTE, m_topSurface); - } - if (m_surfaces_present.second) - { - glBindTexture(GL_TEXTURE_2D, m_texID[1]); - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 496, 384, GL_RGBA, GL_UNSIGNED_BYTE, m_bottomSurface); - } + glDisable(GL_SCISSOR_TEST); + glViewport(0, 0, 496, 384); + + m_shaderTileGen.EnableShader(); + + glActiveTexture(GL_TEXTURE0); // texture unit 0 + glBindTexture(GL_TEXTURE_2D, m_vramTexID); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 512, 512, GL_RED_INTEGER, GL_UNSIGNED_INT, m_vram); + glActiveTexture(GL_TEXTURE1); // texture unit 1 + glBindTexture(GL_TEXTURE_2D, m_paletteTexID); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 128, 256, GL_RED_INTEGER, GL_UNSIGNED_INT, m_vram + 0x40000); + glActiveTexture(GL_TEXTURE0); // texture unit 1 + + glUniform1uiv(m_shaderTileGen.uniformLocMap["regs"], 32, m_regs); + + glBindVertexArray(m_vao); + + m_fboBottom.Set(); + + glClearColor(0, 0, 0, 0); + glClear(GL_COLOR_BUFFER_BIT); + glEnable(GL_BLEND); + + // render bottom layer + for (int i = 4; i-- > 0;) { + + if (!IsEnabled(i)) { + continue; + } + + if (Above3D(i)) { + continue; + } + + glUniform1i(m_shaderTileGen.uniformLocMap["layerNumber"], i); + glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); + } + + m_fboTop.Set(); + glClear(GL_COLOR_BUFFER_BIT); + + // render top layer + for (int i = 4; i-- > 0;) { + + if (!IsEnabled(i)) { + continue; + } + + if (!Above3D(i)) { + continue; + } + + glUniform1i(m_shaderTileGen.uniformLocMap["layerNumber"], i); + glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); + } + + glBindVertexArray(0); + + m_shaderTileGen.DisableShader(); + m_fboBottom.Disable(); + + glDisable(GL_BLEND); } void CRender2D::RenderFrameBottom(void) { - // Display bottom surface if anything was drawn there, else clear everything - Setup2D(true); - if (m_surfaces_present.second) - DisplaySurface(1); + Setup2D(true); + DrawSurface(m_fboBottom.GetTextureID()); } void CRender2D::RenderFrameTop(void) { - // Display top surface only if it exists - if (m_surfaces_present.first) - { - Setup2D(false); - glEnable(GL_BLEND); - DisplaySurface(0); - } + Setup2D(false); + DrawSurface(m_fboTop.GetTextureID()); } void CRender2D::EndFrame(void) @@ -597,109 +451,135 @@ void CRender2D::WriteVRAM(unsigned addr, uint32_t data) Configuration, Initialization, and Shutdown ******************************************************************************/ -void CRender2D::AttachRegisters(const uint32_t *regPtr) +void CRender2D::AttachRegisters(const uint32_t* regPtr) { - m_regs = regPtr; - DebugLog("Render2D attached registers\n"); + m_regs = regPtr; + DebugLog("Render2D attached registers\n"); } -void CRender2D::AttachPalette(const uint32_t *palPtr[2]) +void CRender2D::AttachPalette(const uint32_t* palPtr[2]) { - m_palette[0] = palPtr[0]; - m_palette[1] = palPtr[1]; - DebugLog("Render2D attached palette\n"); + m_palette[0] = palPtr[0]; + m_palette[1] = palPtr[1]; + DebugLog("Render2D attached palette\n"); } -void CRender2D::AttachVRAM(const uint8_t *vramPtr) +void CRender2D::AttachVRAM(const uint8_t* vramPtr) { - m_vram = (uint32_t *) vramPtr; - DebugLog("Render2D attached VRAM\n"); + m_vram = (uint32_t*)vramPtr; + DebugLog("Render2D attached VRAM\n"); } -// Memory pool and offsets within it -#define MEMORY_POOL_SIZE (2*512*384*4) -#define OFFSET_TOP_SURFACE 0 // 512*384*4 bytes -#define OFFSET_BOTTOM_SURFACE (512*384*4) // 512*384*4 - bool CRender2D::Init(unsigned xOffset, unsigned yOffset, unsigned xRes, unsigned yRes, unsigned totalXRes, unsigned totalYRes) { - // Allocate memory for layer surfaces - m_memoryPool = new(std::nothrow) uint8_t[MEMORY_POOL_SIZE]; - if (NULL == m_memoryPool) - return ErrorLog("Insufficient memory for tilemap surfaces (need %1.1f MB).", float(MEMORY_POOL_SIZE) / 0x100000); - memset(m_memoryPool, 0, MEMORY_POOL_SIZE); // clear textures + // Resolution + m_xPixels = xRes; + m_yPixels = yRes; + m_xOffset = xOffset; + m_yOffset = yOffset; + m_totalXPixels = totalXRes; + m_totalYPixels = totalYRes; + m_correction = (UINT32)(((yRes / 384.f) * 2) + 0.5f); // for some reason the 2d layer is 2 pixels off the 3D - // Set up pointers to memory regions - m_topSurface = (uint32_t *) &m_memoryPool[OFFSET_TOP_SURFACE]; - m_bottomSurface = (uint32_t *) &m_memoryPool[OFFSET_BOTTOM_SURFACE]; - - // Resolution - m_xPixels = xRes; - m_yPixels = yRes; - m_xOffset = xOffset; - m_yOffset = yOffset; - m_totalXPixels = totalXRes; - m_totalYPixels = totalYRes; - m_correction = (UINT32)(((yRes / 384.f) * 2) + 0.5f); // for some reason the 2d layer is 2 pixels off the 3D - - DebugLog("Render2D initialized (allocated %1.1f MB)\n", float(MEMORY_POOL_SIZE) / 0x100000); - return OKAY; + return OKAY; } CRender2D::CRender2D(const Util::Config::Node& config) - : m_config(config), - m_vao(0) + : m_config(config), + m_vao(0), + m_vram(nullptr), + m_palette{nullptr}, + m_regs(nullptr) { - DebugLog("Built Render2D\n"); + DebugLog("Built Render2D\n"); - m_shader.LoadShaders(s_vertexShaderSource, s_fragmentShaderSource); - m_shader.GetUniformLocationMap("tex1"); - m_shader.EnableShader(); + m_shader.LoadShaders(s_vertexShaderSource, s_fragmentShaderSource); + m_shader.GetUniformLocationMap("tex1"); + m_shader.EnableShader(); - // update uniform memory - glUniform1i(m_shader.uniformLocMap["tex1"], 0); // bind to texture unit zero + // update uniform memory + glUniform1i(m_shader.uniformLocMap["tex1"], 0); // texture unit zero - m_shader.DisableShader(); + m_shader.DisableShader(); - // Create textures - glActiveTexture(GL_TEXTURE0); // texture unit 0 - glGenTextures(2, m_texID); + m_shaderTileGen.LoadShaders(s_vertexShaderTileGen, s_fragmentShaderTileGen); + m_shaderTileGen.GetUniformLocationMap("vram"); + m_shaderTileGen.GetUniformLocationMap("palette"); + m_shaderTileGen.GetUniformLocationMap("regs"); + m_shaderTileGen.GetUniformLocationMap("layerNumber"); + m_shaderTileGen.GetUniformLocationMap("lineStart"); + m_shaderTileGen.GetUniformLocationMap("lineEnd"); - for (int i = 0; i < 2; i++) - { - glBindTexture(GL_TEXTURE_2D, m_texID[i]); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, 496, 384, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); - } + m_shaderTileGen.EnableShader(); - glGenVertexArrays(1, &m_vao); - glBindVertexArray(m_vao); - // no states needed since we do it in the shader - glBindVertexArray(0); + glUniform1i(m_shaderTileGen.uniformLocMap["vram"], 0); // texture unit 0 + glUniform1i(m_shaderTileGen.uniformLocMap["palette"], 1); // texture unit 1 + glUniform1f(m_shaderTileGen.uniformLocMap["lineStart"], LineToPercentStart(0)); + glUniform1f(m_shaderTileGen.uniformLocMap["lineEnd"], LineToPercentEnd(383)); + + m_shaderTileGen.DisableShader(); + + glGenVertexArrays(1, &m_vao); + glBindVertexArray(m_vao); + // no states needed since we do it in the shader + glBindVertexArray(0); + + glGenTextures(1, &m_vramTexID); + glBindTexture(GL_TEXTURE_2D, m_vramTexID); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexImage2D(GL_TEXTURE_2D, 0, GL_R32UI, 512, 512, 0, GL_RED_INTEGER, GL_UNSIGNED_INT, nullptr); + + glGenTextures(1, &m_paletteTexID); + glBindTexture(GL_TEXTURE_2D, m_paletteTexID); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexImage2D(GL_TEXTURE_2D, 0, GL_R32UI, 128, 256, 0, GL_RED_INTEGER, GL_UNSIGNED_INT, nullptr); + + glBindTexture(GL_TEXTURE_2D, 0); + + m_fboBottom.Create(496, 384); + m_fboTop.Create(496, 384); } CRender2D::~CRender2D(void) { - m_shader.UnloadShaders(); - glDeleteTextures(2, m_texID); + m_shader.UnloadShaders(); + m_shaderTileGen.UnloadShaders(); - if (m_vao) { - glDeleteVertexArrays(1, &m_vao); - m_vao = 0; - } + if (m_vramTexID) { + glDeleteTextures(1, &m_vramTexID); + m_vramTexID = 0; + } - if (m_memoryPool) - { - delete [] m_memoryPool; - m_memoryPool = 0; - } + if (m_paletteTexID) { + glDeleteTextures(1, &m_paletteTexID); + m_paletteTexID = 0; + } - m_vram = 0; - m_topSurface = 0; - m_bottomSurface = 0; + if (m_vao) { + glDeleteVertexArrays(1, &m_vao); + m_vao = 0; + } - DebugLog("Destroyed Render2D\n"); + m_fboBottom.Destroy(); + m_fboTop.Destroy(); + + m_vram = nullptr; + + DebugLog("Destroyed Render2D\n"); +} + +bool CRender2D::IsEnabled(int layerNumber) +{ + return (m_regs[0x60 / 4 + layerNumber] & 0x80000000) > 0; +} + +bool CRender2D::Above3D(int layerNumber) +{ + return (m_regs[0x20 / 4] >> (8 + layerNumber)) & 0x1; } diff --git a/Src/Graphics/Render2D.h b/Src/Graphics/Render2D.h index ed04571..c13b644 100644 --- a/Src/Graphics/Render2D.h +++ b/Src/Graphics/Render2D.h @@ -1,12 +1,12 @@ /** ** Supermodel ** A Sega Model 3 Arcade Emulator. - ** Copyright 2011-2012 Bart Trzynadlowski, Nik Henson + ** Copyright 2011-2012 Bart Trzynadlowski, Nik Henson ** ** This file is part of Supermodel. ** ** Supermodel is free software: you can redistribute it and/or modify it under - ** the terms of the GNU General Public License as published by the Free + ** the terms of the GNU General Public License as published by the Free ** Software Foundation, either version 3 of the License, or (at your option) ** any later version. ** @@ -18,12 +18,12 @@ ** You should have received a copy of the GNU General Public License along ** with Supermodel. If not, see . **/ - -/* - * Render2D.h - * - * Header file defining the CRender2D class: OpenGL tile generator graphics. - */ + + /* + * Render2D.h + * + * Header file defining the CRender2D class: OpenGL tile generator graphics. + */ #ifndef INCLUDED_RENDER2D_H #define INCLUDED_RENDER2D_H @@ -31,181 +31,184 @@ #include #include "Util/NewConfig.h" #include "New3D/GLSLShader.h" +#include "FBO.h" - -/* - * CRender2D: - * - * Tile generator graphics engine. This must be constructed and initialized - * before being attached to any objects that want to make use of it. Apart from - * the constructor, all members assume that a global GL device - * context is available and that GL functions may be called. - */ + /* + * CRender2D: + * + * Tile generator graphics engine. This must be constructed and initialized + * before being attached to any objects that want to make use of it. Apart from + * the constructor, all members assume that a global GL device + * context is available and that GL functions may be called. + */ class CRender2D { public: - /* - * BeginFrame(void): - * - * Prepare to render a new frame. Must be called once per frame prior to - * drawing anything. - */ - void BeginFrame(void); + /* + * BeginFrame(void): + * + * Prepare to render a new frame. Must be called once per frame prior to + * drawing anything. + */ + void BeginFrame(void); - /* - * PreRenderFrame(void): - * - * Draws the all top layers (above 3D graphics) and bottom layers (below 3D - * graphics) but does not yet display them. May send data to the GPU. - */ - void PreRenderFrame(void); + /* + * PreRenderFrame(void): + * + * Draws the all top layers (above 3D graphics) and bottom layers (below 3D + * graphics) but does not yet display them. May send data to the GPU. + */ + void PreRenderFrame(void); - /* - * RenderFrameBottom(void): - * - * Overwrites the color buffer with bottom surface that was pre-rendered by - * the last call to PreRenderFrame(). - */ - void RenderFrameBottom(void); + /* + * RenderFrameBottom(void): + * + * Overwrites the color buffer with bottom surface that was pre-rendered by + * the last call to PreRenderFrame(). + */ + void RenderFrameBottom(void); - /* - * RenderFrameTop(void): - * - * Draws the top surface (if it exists) that was pre-rendered by the last - * call to PreRenderFrame(). Previously drawn graphics layers will be visible - * through transparent regions. - */ - void RenderFrameTop(void); - - /* - * EndFrame(void): - * - * Signals the end of rendering for this frame. Must be called last during - * the frame. - */ - void EndFrame(void); - - /* - * WriteVRAM(addr, data): - * - * Indicates what will be written next to the tile generator's RAM. The - * VRAM address must not have yet been updated, to allow the renderer to - * check for changes. Data is accepted in the same form as the tile - * generator: the MSB is what was written to addr+3. This function is - * intended to facilitate on-the-fly decoding of tiles and palette data. - * - * Parameters: - * addr Address in tile generator RAM. Caller must ensure it is - * clamped to the range 0x000000 to 0x11FFFF because this - * function does not. - * data The data to write. - */ - void WriteVRAM(unsigned addr, uint32_t data); - - /* - * AttachRegisters(regPtr): - * - * Attaches tile generator registers. This must be done prior to any - * rendering otherwise the program may crash with an access violation. - * - * Parameters: - * regPtr Pointer to the base of the tile generator registers. There - * are assumed to be 64 in all. - */ - void AttachRegisters(const uint32_t *regPtr); - - /* - * AttachPalette(palPtr): - * - * Attaches tile generator palettes. This must be done prior to any - * rendering. - * - * Parameters: - * palPtr Pointer to two palettes. The first is for layers A/A' and - * the second is for B/B'. - */ - void AttachPalette(const uint32_t *palPtr[2]); + /* + * RenderFrameTop(void): + * + * Draws the top surface (if it exists) that was pre-rendered by the last + * call to PreRenderFrame(). Previously drawn graphics layers will be visible + * through transparent regions. + */ + void RenderFrameTop(void); - /* - * AttachVRAM(vramPtr): - * - * Attaches tile generator RAM. This must be done prior to any rendering - * otherwise the program may crash with an access violation. - * - * Parameters: - * vramPtr Pointer to the base of the tile generator RAM (0x120000 - * bytes). VRAM is assumed to be in little endian format. - */ - void AttachVRAM(const uint8_t *vramPtr); + /* + * EndFrame(void): + * + * Signals the end of rendering for this frame. Must be called last during + * the frame. + */ + void EndFrame(void); + + /* + * WriteVRAM(addr, data): + * + * Indicates what will be written next to the tile generator's RAM. The + * VRAM address must not have yet been updated, to allow the renderer to + * check for changes. Data is accepted in the same form as the tile + * generator: the MSB is what was written to addr+3. This function is + * intended to facilitate on-the-fly decoding of tiles and palette data. + * + * Parameters: + * addr Address in tile generator RAM. Caller must ensure it is + * clamped to the range 0x000000 to 0x11FFFF because this + * function does not. + * data The data to write. + */ + void WriteVRAM(unsigned addr, uint32_t data); + + /* + * AttachRegisters(regPtr): + * + * Attaches tile generator registers. This must be done prior to any + * rendering otherwise the program may crash with an access violation. + * + * Parameters: + * regPtr Pointer to the base of the tile generator registers. There + * are assumed to be 64 in all. + */ + void AttachRegisters(const uint32_t* regPtr); + + /* + * AttachPalette(palPtr): + * + * Attaches tile generator palettes. This must be done prior to any + * rendering. + * + * Parameters: + * palPtr Pointer to two palettes. The first is for layers A/A' and + * the second is for B/B'. + */ + void AttachPalette(const uint32_t* palPtr[2]); + + /* + * AttachVRAM(vramPtr): + * + * Attaches tile generator RAM. This must be done prior to any rendering + * otherwise the program may crash with an access violation. + * + * Parameters: + * vramPtr Pointer to the base of the tile generator RAM (0x120000 + * bytes). VRAM is assumed to be in little endian format. + */ + void AttachVRAM(const uint8_t* vramPtr); + + /* + * Init(xOffset, yOffset, xRes, yRes, totalXRes, totalYRes); + * + * One-time initialization of the context. Must be called before any other + * members (meaning it should be called even before being attached to any + * other objects that want to use it). + * + * Parameters: + * xOffset X offset of the viewable area within OpenGL display + * surface, in pixels. + * yOffset Y offset. + * xRes Horizontal resolution of the viewable area. + * yRes Vertical resolution. + * totalXRes Horizontal resolution of the complete display area. + * totalYRes Vertical resolution. + * + * Returns: + * OKAY is successful, otherwise FAILED if a non-recoverable error + * occurred. Prints own error messages. + */ + bool Init(unsigned xOffset, unsigned yOffset, unsigned xRes, unsigned yRes, unsigned totalXRes, unsigned totalYRes); + + /* + * CRender2D(config): + * ~CRender2D(void): + * + * Constructor and destructor. + * + * Parameters: + * config Run-time configuration. + */ + CRender2D(const Util::Config::Node& config); + ~CRender2D(void); - /* - * Init(xOffset, yOffset, xRes, yRes, totalXRes, totalYRes); - * - * One-time initialization of the context. Must be called before any other - * members (meaning it should be called even before being attached to any - * other objects that want to use it). - * - * Parameters: - * xOffset X offset of the viewable area within OpenGL display - * surface, in pixels. - * yOffset Y offset. - * xRes Horizontal resolution of the viewable area. - * yRes Vertical resolution. - * totalXRes Horizontal resolution of the complete display area. - * totalYRes Vertical resolution. - * - * Returns: - * OKAY is successful, otherwise FAILED if a non-recoverable error - * occurred. Prints own error messages. - */ - bool Init(unsigned xOffset, unsigned yOffset, unsigned xRes, unsigned yRes, unsigned totalXRes, unsigned totalYRes); - - /* - * CRender2D(config): - * ~CRender2D(void): - * - * Constructor and destructor. - * - * Parameters: - * config Run-time configuration. - */ - CRender2D(const Util::Config::Node &config); - ~CRender2D(void); - private: - // Private member functions - std::pair DrawTilemaps(uint32_t *destBottom, uint32_t *destTop); - void DisplaySurface(int surface); - void Setup2D(bool isBottom); - - // Run-time configuration - const Util::Config::Node &m_config; - // Data received from tile generator device object - const uint32_t *m_vram; - const uint32_t *m_palette[2]; // palettes for A/A' and B/B' - const uint32_t *m_regs; - - // OpenGL data - GLuint m_texID[2]; // IDs for the 2 layer textures (top and bottom) - unsigned m_xPixels = 496; // display surface resolution - unsigned m_yPixels = 384; // ... - unsigned m_xOffset = 0; // offset - unsigned m_yOffset = 0; - unsigned m_totalXPixels; // total display surface resolution - unsigned m_totalYPixels; - unsigned m_correction = 0; - - GLuint m_vao; - GLSLShader m_shader; + bool IsEnabled (int layerNumber); + bool Above3D (int layerNumber); + void Setup2D (bool isBottom); + void DrawSurface (GLuint textureID); - // PreRenderFrame() tracks which surfaces exist in current frame - std::pair m_surfaces_present = std::pair(false, false); + float LineToPercentStart (int lineNumber); // vertical line numbers are from 0-383 + float LineToPercentEnd (int lineNumber); // vertical line numbers are from 0-383 + + // Run-time configuration + const Util::Config::Node& m_config; + + // Data received from tile generator device object + const uint32_t* m_vram; + const uint32_t* m_palette[2]; // palettes for A/A' and B/B' + const uint32_t* m_regs; + + // OpenGL data + unsigned m_xPixels = 496; // display surface resolution + unsigned m_yPixels = 384; // ... + unsigned m_xOffset = 0; // offset + unsigned m_yOffset = 0; + unsigned m_totalXPixels = 0; // total display surface resolution + unsigned m_totalYPixels = 0; + unsigned m_correction = 0; + + GLuint m_vao; + GLSLShader m_shader; + GLSLShader m_shaderTileGen; + + GLuint m_vramTexID = 0; + GLuint m_paletteTexID = 0; + + FBO m_fboBottom; + FBO m_fboTop; - // Buffers - uint8_t *m_memoryPool = 0; // all memory is allocated here - uint32_t *m_topSurface = 0; // 512x384x32bpp pixel surface for top layers - uint32_t *m_bottomSurface = 0; // bottom layers }; diff --git a/Src/Graphics/Shaders2D.h b/Src/Graphics/Shaders2D.h index 2536b9a..0c0c63f 100644 --- a/Src/Graphics/Shaders2D.h +++ b/Src/Graphics/Shaders2D.h @@ -71,4 +71,255 @@ static const char s_fragmentShaderSource[] = R"glsl( )glsl"; +// Vertex shader +static const char s_vertexShaderTileGen[] = R"glsl( + + #version 410 core + + uniform float lineStart; // defined as a % of the viewport height in the range 0-1. So 0 is top line, 0.5 is line 192 etc + uniform float lineEnd; + + void main(void) + { + const float v1 = -1.0; + const float v2 = 1.0; + + vec4 vertices[] = vec4[]( vec4(-1.0, v1, 0.0, 1.0), + vec4(-1.0, v2, 0.0, 1.0), + vec4( 1.0, v1, 0.0, 1.0), + vec4( 1.0, v2, 0.0, 1.0)); + + float top = ((v2 - v1) * lineStart) + v1; + float bottom = ((v2 - v1) * lineEnd ) + v1; + + vertices[0].y = top; + vertices[2].y = top; + vertices[1].y = bottom; + vertices[3].y = bottom; + + gl_Position = vertices[gl_VertexID % 4]; + } + + )glsl"; + +// Fragment shader +static const char s_fragmentShaderTileGen[] = R"glsl( + + #version 410 core + + //layout(origin_upper_left) in vec4 gl_FragCoord; + + // inputs + uniform usampler2D vram; // texture 512x512 + uniform usampler2D palette; // texture 128x256 - actual dimensions dont matter too much but we have to stay in the limits of max tex width/height, so can't have 1 giant 1d array + uniform uint regs[32]; + uniform int layerNumber; + + // outputs + out vec4 fragColor; + + ivec2 GetVRamCoords(int offset) + { + return ivec2(offset % 512, offset / 512); + } + + ivec2 GetPaletteCoords(int offset) + { + return ivec2(offset % 128, offset / 128); + } + + uint GetLineMask(int layerNum, int yCoord) + { + uint shift = (layerNum<2) ? 16u : 0u; // need to check this, we could be endian swapped so could be wrong + uint maskPolarity = ((layerNum & 1) > 0) ? 0xFFFFu : 0x0000u; + int index = (0xF7000 / 4) + yCoord; + + ivec2 coords = GetVRamCoords(index); + uint mask = ((texelFetch(vram,coords,0).r >> shift) & 0xFFFFu) ^ maskPolarity; + + return mask; + } + + bool GetPixelMask(int layerNum, int xCoord, int yCoord) + { + uint lineMask = GetLineMask(layerNum, yCoord); + uint maskTest = 1 << (15-(xCoord/32)); + + return (lineMask & maskTest) != 0; + } + + int GetLineScrollValue(int layerNum, int yCoord) + { + int index = ((0xF6000 + (layerNum * 0x400)) / 4) + (yCoord / 2); + int shift = (yCoord % 2) * 16; // double check this + + ivec2 coords = GetVRamCoords(index); + return int((texelFetch(vram,coords,0).r >> shift) & 0xFFFFu); + } + + int GetTileNumber(int xCoord, int yCoord, int xScroll, int yScroll) + { + int xIndex = ((xCoord + xScroll) / 8) & 0x3F; + int yIndex = ((yCoord + yScroll) / 8) & 0x3F; + + return (yIndex*64) + xIndex; + } + + int GetTileData(int layerNum, int tileNumber) + { + int addressBase = (0xF8000 + (layerNum * 0x2000)) / 4; + int offset = tileNumber / 2; // two tiles per 32bit word + int shift = (1 - (tileNumber % 2)) * 16; // triple check this + + ivec2 coords = GetVRamCoords(addressBase+offset); + uint data = (texelFetch(vram,coords,0).r >> shift) & 0xFFFFu; + + return int(data); + } + + int GetVFine(int yCoord, int yScroll) + { + return (yCoord + yScroll) & 7; + } + + int GetHFine(int xCoord, int xScroll) + { + return (xCoord + xScroll) & 7; + } + + // register data + bool LineScrollMode (int layerNum) { return (regs[0x60/4 + layerNum] & 0x8000) != 0; } + int GetHorizontalScroll(int layerNum) { return int(regs[0x60 / 4 + layerNum] &0x3FFu); } + int GetVerticalScroll (int layerNum) { return int((regs[0x60/4 + layerNum] >> 16) & 0x1FFu); } + int LayerPriority () { return int((regs[0x20/4] >> 8) & 0xFu); } + bool LayerIs4Bit (int layerNum) { return (regs[0x20/4] & (1 << (12 + layerNum))) != 0; } + bool LayerEnabled (int layerNum) { return (regs[0x60/4 + layerNum] & 0x80000000) != 0; } + bool LayerSelected (int layerNum) { return (LayerPriority() & (1 << layerNum)) == 0; } + + float Int8ToFloat(uint c) + { + if((c & 0x80u) > 0u) { // this is a bit harder in GLSL. Top bit means negative number, we extend to make 32bit + return float(int(c | 0xFFFFFF00u)) / 128.0; + } + else { + return float(c) / 127.0; + } + } + + vec4 AddColourOffset(int layerNum, vec4 colour) + { + uint offsetReg = regs[(0x40/4) + layerNum/2]; + + vec4 c; + c.b = Int8ToFloat((offsetReg >>16) & 0xFFu); + c.g = Int8ToFloat((offsetReg >> 8) & 0xFFu); + c.r = Int8ToFloat((offsetReg >> 0) & 0xFFu); + c.a = 0.0; + + colour += c; + return clamp(colour,0.0,1.0); // clamp is probably not needed since will get clamped on render target + } + + vec4 Int16ColourToVec4(uint colour) + { + uint alpha = (colour>>15); // top bit is alpha. 1 means clear, 0 opaque + alpha = ~alpha; // invert + alpha = alpha & 0x1u; // mask bit + + vec4 c; + c.r = float((colour >> 0 ) & 0x1F) / 31.0; + c.g = float((colour >> 5 ) & 0x1F) / 31.0; + c.b = float((colour >> 10) & 0x1F) / 31.0; + c.a = float(alpha) / 1.0; + + c.rgb *= c.a; // multiply by alpha value, this will push transparent to black, no branch needed + + return c; + } + + vec4 GetColour(int layerNum, int paletteOffset) + { + ivec2 coords = GetPaletteCoords(paletteOffset); + uint colour = texelFetch(palette,coords,0).r; + + vec4 col = Int16ColourToVec4(colour); // each colour is only 16bits, but occupies 32bits + + return AddColourOffset(layerNum,col); // apply colour offsets from registers + } + + vec4 Draw4Bit(int layerNum, int tileData, int hFine, int vFine) + { + // Tile pattern offset: each tile occupies 32 bytes when using 4-bit pixels (offset of tile pattern within VRAM) + int patternOffset = ((tileData & 0x3FFF) << 1) | ((tileData >> 15) & 1); + patternOffset *= 32; + patternOffset /= 4; + + // Upper color bits; the lower 4 bits come from the tile pattern + int paletteIndex = tileData & 0x7FF0; + + ivec2 coords = GetVRamCoords(patternOffset+vFine); + uint pattern = texelFetch(vram,coords,0).r; + pattern = (pattern >> ((7-hFine)*4)) & 0xFu; // get the pattern for our horizontal value + + return GetColour(layerNum, paletteIndex | int(pattern)); + } + + vec4 Draw8Bit(int layerNum, int tileData, int hFine, int vFine) + { + // Tile pattern offset: each tile occupies 64 bytes when using 8-bit pixels + int patternOffset = tileData & 0x3FFF; + patternOffset *= 64; + patternOffset /= 4; + + // Upper color bits + int paletteIndex = tileData & 0x7F00; + + // each read is 4 pixels + int offset = hFine / 4; + + ivec2 coords = GetVRamCoords(patternOffset+(vFine*2)+offset); // 8-bit pixels, each line is two words + uint pattern = texelFetch(vram,coords,0).r; + + pattern = (pattern >> ((3-(hFine%4))*8)) & 0xFFu; // shift out the bits we want for this pixel + + return GetColour(layerNum, paletteIndex | int(pattern)); + } + + void main() + { + ivec2 pos = ivec2(gl_FragCoord.xy); + + int scrollX; + if(LineScrollMode(layerNumber)) { + scrollX = GetLineScrollValue(layerNumber, pos.y); + } + else { + scrollX = GetHorizontalScroll(layerNumber); + } + + int scrollY = GetVerticalScroll(layerNumber); + int tileNumber = GetTileNumber(pos.x,pos.y,scrollX,scrollY); + int hFine = GetHFine(pos.x,scrollX); + int vFine = GetVFine(pos.y,scrollY); + bool pixelMask = GetPixelMask(layerNumber,pos.x,pos.y); + + if(pixelMask==true) { + + int tileData = GetTileData(layerNumber,tileNumber); + + if(LayerIs4Bit(layerNumber)) { + fragColor = Draw4Bit(layerNumber,tileData,hFine,vFine); + } + else { + fragColor = Draw8Bit(layerNumber,tileData,hFine,vFine); + } + } + else { + fragColor = vec4(0.0); + } + } + + )glsl"; + + #endif // INCLUDED_SHADERS2D_H diff --git a/VS2008/Supermodel.vcxproj b/VS2008/Supermodel.vcxproj index d755d81..23fa4bb 100644 --- a/VS2008/Supermodel.vcxproj +++ b/VS2008/Supermodel.vcxproj @@ -306,6 +306,7 @@ xcopy /D /Y "$(ProjectDir)..\Assets\*" "$(TargetDir)Assets" + @@ -478,6 +479,7 @@ xcopy /D /Y "$(ProjectDir)..\Assets\*" "$(TargetDir)Assets" + diff --git a/VS2008/Supermodel.vcxproj.filters b/VS2008/Supermodel.vcxproj.filters index e554f3d..a56ee68 100644 --- a/VS2008/Supermodel.vcxproj.filters +++ b/VS2008/Supermodel.vcxproj.filters @@ -467,6 +467,9 @@ Source Files\OSD\SDL + + Source Files\Graphics + @@ -847,6 +850,9 @@ Header Files\OSD\SDL + + Header Files\Graphics +