/** ** Supermodel ** A Sega Model 3 Arcade Emulator. ** Copyright 2011-2012 Bart Trzynadlowski, Nik Henson ** ** This file is part of Supermodel. ** ** Supermodel is free software: you can redistribute it and/or modify it under ** the terms of the GNU General Public License as published by the Free ** Software Foundation, either version 3 of the License, or (at your option) ** any later version. ** ** Supermodel is distributed in the hope that it will be useful, but WITHOUT ** ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ** FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ** more details. ** ** You should have received a copy of the GNU General Public License along ** with Supermodel. If not, see . **/ /* * TileGen.cpp * * Implementation of the CTileGen class: 2D tile generator. Palette decoding * and synchronization with the renderer (which may run in a separate thread) * are performed here as well. For a description of the tile generator * hardware, please refer to the 2D rendering engine source code. * * Palettes * -------- * * Multiple copies of the 32K-color palette data are maintained. The first is * the raw data as written to the VRAM. Two copies are computed, one for layers * A/A' and the other for layers B/B'. These pairs of layers have independent * color offset registers associated with them. The renderer uses these * "computed" palettes. * * The computed palettes are updated whenever the real palette is modified, a * single color entry at a time. If the color register is modified, the entire * palette has to be recomputed accordingly. * * The read-only copy of the palette, which is generated for the renderer, only * stores the two computed palettes. * * TO-DO List: * ----------- * - For consistency, the registers should probably be byte reversed (this is a * little endian device), forcing the Model3 Read32/Write32 handlers to * manually reverse the data. This keeps with the convention for VRAM. */ #include #include "Supermodel.h" // Macros that divide memory regions into pages and mark them as dirty when they are written to #define PAGE_WIDTH 10 #define PAGE_SIZE (1<>(PAGE_WIDTH+3)] |= 1<<((addr>>PAGE_WIDTH)&7) // Offsets of memory regions within TileGen memory pool #define OFFSET_VRAM 0x000000 // VRAM and palette data #define OFFSET_PAL_A 0x120000 // computed A/A' palette #define OFFSET_PAL_B 0x140000 // computed B/B' palette #define MEM_POOL_SIZE_RW (0x120000+0x040000) #define OFFSET_VRAM_RO 0x160000 // [read-only snapshot] #define OFFSET_PAL_RO_A 0x280000 // [read-only snapshot] #define OFFSET_PAL_RO_B 0x2A0000 #define MEM_POOL_SIZE_RO (0x120000+0x040000) #define OFFSET_VRAM_DIRTY 0x2C0000 #define OFFSET_PAL_A_DIRTY (OFFSET_VRAM_DIRTY+DIRTY_SIZE(0x120000)) #define OFFSET_PAL_B_DIRTY (OFFSET_PAL_A_DIRTY+DIRTY_SIZE(0x20000)) #define MEM_POOL_SIZE_DIRTY (DIRTY_SIZE(0x120000)+2*DIRTY_SIZE(0x20000)) // VRAM + 2 palette dirty buffers #define MEMORY_POOL_SIZE (MEM_POOL_SIZE_RW+MEM_POOL_SIZE_RO+MEM_POOL_SIZE_DIRTY) /****************************************************************************** Save States ******************************************************************************/ void CTileGen::SaveState(CBlockFile *SaveState) { SaveState->NewBlock("Tile Generator", __FILE__); SaveState->Write(vram, 0x120000); // Don't write out palette, read-only snapshots or dirty page arrays, just VRAM SaveState->Write(regs, sizeof(regs)); } void CTileGen::LoadState(CBlockFile *SaveState) { if (OKAY != SaveState->FindBlock("Tile Generator")) { ErrorLog("Unable to load tile generator state. Save state file is corrupt."); return; } // Load memory one word at a time for (int i = 0; i < 0x120000; i += 4) { UINT32 data; SaveState->Read(&data, sizeof(data)); WriteRAM(i, data); } SaveState->Read(regs, sizeof(regs)); // Because regs were read after palette, must recompute RecomputePalettes(); // If multi-threaded, update read-only snapshots too if (g_Config.gpuMultiThreaded) UpdateSnapshots(true); } /****************************************************************************** Rendering ******************************************************************************/ void CTileGen::BeginVBlank(void) { /* printf("08: %X\n", regs[0x08/4]); printf("0C: %X\n", regs[0x0C/4]); printf("20: %X\n", regs[0x20/4]); printf("40: %X\n", regs[0x40/4]); printf("44: %X\n", regs[0x44/4]); printf("60: %08X\n", regs[0x60/4]); printf("64: %08X\n", regs[0x64/4]); printf("68: %08X\n", regs[0x68/4]); printf("6C: %08X\n", regs[0x6C/4]); printf("\n"); */ } void CTileGen::EndVBlank(void) { // } void CTileGen::RecomputePalettes(void) { // Writing the colors forces palettes to be computed if (g_Config.gpuMultiThreaded) { for (unsigned colorAddr = 0; colorAddr < 32768*4; colorAddr += 4 ) { MARK_DIRTY(palDirty[0], colorAddr); MARK_DIRTY(palDirty[1], colorAddr); WritePalette(colorAddr/4, *(UINT32 *) &vram[0x100000+colorAddr]); } } else { for (unsigned colorAddr = 0; colorAddr < 32768*4; colorAddr += 4 ) WritePalette(colorAddr/4, *(UINT32 *) &vram[0x100000+colorAddr]); } } UINT32 CTileGen::SyncSnapshots(void) { // Good time to recompute the palettes if (recomputePalettes) { RecomputePalettes(); recomputePalettes = false; } if (!g_Config.gpuMultiThreaded) return 0; // Update read-only snapshots return UpdateSnapshots(false); } UINT32 CTileGen::UpdateSnapshot(bool copyWhole, UINT8 *src, UINT8 *dst, unsigned size, UINT8 *dirty) { unsigned dirtySize = DIRTY_SIZE(size); if (copyWhole) { // If updating whole region, then just copy all data in one go memcpy(dst, src, size); memset(dirty, 0, dirtySize); return size; } else { // Otherwise, loop through dirty pages array to find out what needs to be updated and copy only those parts UINT32 copied = 0; UINT8 *pSrc = src; UINT8 *pDst = dst; for (unsigned i = 0; i < dirtySize; i++) { UINT8 d = dirty[i]; if (d) { for (unsigned j = 0; j < 8; j++) { if (d&1) { // If not at very end of region, then copy an extra 4 bytes to allow for a possible 32-bit overlap UINT32 toCopy = (i < dirtySize - 1 || j < 7 ? PAGE_SIZE + 4 : PAGE_SIZE); memcpy(pDst, pSrc, toCopy); copied += toCopy; } d >>= 1; pSrc += PAGE_SIZE; pDst += PAGE_SIZE; } dirty[i] = 0; } else { pSrc += 8 * PAGE_SIZE; pDst += 8 * PAGE_SIZE; } } return copied; } } UINT32 CTileGen::UpdateSnapshots(bool copyWhole) { // Update all memory region snapshots UINT32 palACopied = UpdateSnapshot(copyWhole, (UINT8*)pal[0], (UINT8*)palRO[0], 0x020000, palDirty[0]); UINT32 palBCopied = UpdateSnapshot(copyWhole, (UINT8*)pal[1], (UINT8*)palRO[1], 0x020000, palDirty[1]); UINT32 vramCopied = UpdateSnapshot(copyWhole, (UINT8*)vram, (UINT8*)vramRO, 0x120000, vramDirty); memcpy(regsRO, regs, sizeof(regs)); // Always copy whole of regs buffer //printf("TileGen copied - palA:%4uK, palB:%4uK, vram:%4uK, regs:%uK\n", palACopied / 1024, palBCopied / 1024, vramCopied / 1024, sizeof(regs) / 1024); return palACopied + palBCopied + vramCopied + sizeof(regs); } void CTileGen::BeginFrame(void) { // NOTE: Render2D->WriteVRAM(addr, data) is no longer being called for RAM addresses that are written // to and instead this class relies upon the fact that Render2D currently marks everything as dirty // with every frame. If this were to change in the future then code to handle marking the correct // parts of the renderer as dirty would need to be added here. Render2D->BeginFrame(); } void CTileGen::PreRenderFrame(void) { Render2D->PreRenderFrame(); } void CTileGen::RenderFrameBottom(void) { Render2D->RenderFrameBottom(); } void CTileGen::RenderFrameTop(void) { Render2D->RenderFrameTop(); } void CTileGen::EndFrame(void) { Render2D->EndFrame(); } /****************************************************************************** Emulation Functions ******************************************************************************/ UINT32 CTileGen::ReadRAM(unsigned addr) { return *(UINT32 *) &vram[addr]; } void CTileGen::WriteRAM(unsigned addr, UINT32 data) { if (g_Config.gpuMultiThreaded) MARK_DIRTY(vramDirty, addr); *(UINT32 *) &vram[addr] = data; // Update palette if required if (addr >= 0x100000) { addr -= 0x100000; unsigned color = addr/4; // color index // Same address in both palettes must be marked dirty if (g_Config.gpuMultiThreaded) { MARK_DIRTY(palDirty[0], addr); MARK_DIRTY(palDirty[1], addr); } // Both palettes will be modified simultaneously WritePalette(color, data); } } void CTileGen::InitPalette(void) { for (int i = 0; i < 0x20000/4; i++) { WritePalette(i, *(UINT32 *) &vram[0x100000 + i*4]); if (g_Config.gpuMultiThreaded) { palRO[0][i] = pal[0][i]; palRO[1][i] = pal[1][i]; } } } static inline UINT32 AddColorOffset(UINT8 r, UINT8 g, UINT8 b, UINT8 a, UINT32 offsetReg) { INT32 ir, ig, ib; /* * Color offsets are signed but I'm not sure whether or not their range is * merely [-128,+127], which would mean adding to a 0 component would not * result full intensity (only +127 at most). Alternatively, the signed * value might have to be multiplied by 2. That is assumed here. In either * case, the signed addition should be saturated. */ ib = (INT32) (INT8)((offsetReg>>16)&0xFF); ig = (INT32) (INT8)((offsetReg>>8)&0xFF); ir = (INT32) (INT8)((offsetReg>>0)&0xFF); ib *= 2; ig *= 2; ir *= 2; // Add with saturation ib += (INT32) (UINT32) b; if (ib < 0) ib = 0; else if (ib > 0xFF) ib = 0xFF; ig += (INT32) (UINT32) g; if (ig < 0) ig = 0; else if (ig > 0xFF) ig = 0xFF; ir += (INT32) (UINT32) r; if (ir < 0) ir = 0; else if (ir > 0xFF) ir = 0xFF; // Construct the final 32-bit ABGR-format color r = (UINT8) ir; g = (UINT8) ig; b = (UINT8) ib; return ((UINT32)a<<24)|((UINT32)b<<16)|((UINT32)g<<8)|(UINT32)r; } void CTileGen::WritePalette(unsigned color, UINT32 data) { UINT8 r, g, b, a; a = 0xFF * ((data>>15)&1); // decode the RGBA (make alpha 0xFF or 0x00) a = ~a; // invert it (set on Model 3 means clear pixel) if ((data&0x8000)) r = g = b = 0; else { b = (data>>7)&0xF8; g = (data>>2)&0xF8; r = (data<<3)&0xF8; } pal[0][color] = AddColorOffset(r, g, b, a, regs[0x40/4]); // A/A' pal[1][color] = AddColorOffset(r, g, b, a, regs[0x44/4]); // B/B' } void CTileGen::WriteRegister(unsigned reg, UINT32 data) { reg &= 0xFF; switch (reg) { case 0x08: case 0x0C: case 0x20: case 0x60: case 0x64: case 0x68: case 0x6C: break; case 0x40: // layer A/A' color offset case 0x44: // layer B/B' color offset // We only have a mechanism to recompute both palettes simultaneously. // These regs are often written together in the same frame. To avoid // needlessly recomputing both palettes twice, we defer the operation. if (regs[reg/4] != data) // only if changed recomputePalettes = true; break; case 0x10: // IRQ acknowledge IRQ->Deassert(data&0xFF); break; default: DebugLog("Tile Generator reg %02X = %08X\n", reg, data); //printf("%02X = %08X\n", reg, data); break; } // Modify register regs[reg/4] = data; } void CTileGen::Reset(void) { unsigned memSize = (g_Config.gpuMultiThreaded ? MEMORY_POOL_SIZE : MEM_POOL_SIZE_RW); memset(memoryPool, 0, memSize); memset(regs, 0, sizeof(regs)); memset(regsRO, 0, sizeof(regsRO)); InitPalette(); recomputePalettes = false; DebugLog("Tile Generator reset\n"); } /****************************************************************************** Configuration, Initialization, and Shutdown ******************************************************************************/ void CTileGen::AttachRenderer(CRender2D *Render2DPtr) { Render2D = Render2DPtr; // If multi-threaded, attach read-only snapshots to renderer instead of real ones if (g_Config.gpuMultiThreaded) { Render2D->AttachVRAM(vramRO); Render2D->AttachPalette((const UINT32 **)palRO); Render2D->AttachRegisters(regsRO); } else { Render2D->AttachVRAM(vram); Render2D->AttachPalette((const UINT32 **)pal); Render2D->AttachRegisters(regs); } DebugLog("Tile Generator attached a Render2D object\n"); } bool CTileGen::Init(CIRQ *IRQObjectPtr) { unsigned memSize = (g_Config.gpuMultiThreaded ? MEMORY_POOL_SIZE : MEM_POOL_SIZE_RW); float memSizeMB = (float)memSize/(float)0x100000; // Allocate all memory for all TileGen RAM regions memoryPool = new(std::nothrow) UINT8[memSize]; if (NULL == memoryPool) return ErrorLog("Insufficient memory for tile generator object (needs %1.1f MB).", memSizeMB); // Set up main pointers vram = (UINT8 *) &memoryPool[OFFSET_VRAM]; pal[0] = (UINT32 *) &memoryPool[OFFSET_PAL_A]; pal[1] = (UINT32 *) &memoryPool[OFFSET_PAL_B]; // If multi-threaded, set up pointers for read-only snapshots and dirty page arrays too if (g_Config.gpuMultiThreaded) { vramRO = (UINT8 *) &memoryPool[OFFSET_VRAM_RO]; palRO[0] = (UINT32 *) &memoryPool[OFFSET_PAL_RO_A]; palRO[1] = (UINT32 *) &memoryPool[OFFSET_PAL_RO_B]; vramDirty = (UINT8 *) &memoryPool[OFFSET_VRAM_DIRTY]; palDirty[0] = (UINT8 *) &memoryPool[OFFSET_PAL_A_DIRTY]; palDirty[1] = (UINT8 *) &memoryPool[OFFSET_PAL_B_DIRTY]; } // Hook up the IRQ controller IRQ = IRQObjectPtr; DebugLog("Initialized Tile Generator (allocated %1.1f MB and connected to IRQ controller)\n", memSizeMB); return OKAY; } CTileGen::CTileGen(void) { IRQ = NULL; memoryPool = NULL; DebugLog("Built Tile Generator\n"); } CTileGen::~CTileGen(void) { // Dump tile generator RAM #if 0 FILE *fp; fp = fopen("tileram", "wb"); if (NULL != fp) { fwrite(memoryPool, sizeof(UINT8), 0x120000, fp); fclose(fp); printf("dumped %s\n", "tileram"); } else printf("unable to dump %s\n", "tileram"); #endif IRQ = NULL; if (memoryPool != NULL) { delete [] memoryPool; memoryPool = NULL; } DebugLog("Destroyed Tile Generator\n"); }