mirror of
https://github.com/RetroDECK/Supermodel.git
synced 2024-11-23 06:15:37 +00:00
549 lines
16 KiB
C++
549 lines
16 KiB
C++
/**
|
|
** Supermodel
|
|
** A Sega Model 3 Arcade Emulator.
|
|
** Copyright 2011-2012 Bart Trzynadlowski, Nik Henson
|
|
**
|
|
** This file is part of Supermodel.
|
|
**
|
|
** Supermodel is free software: you can redistribute it and/or modify it under
|
|
** the terms of the GNU General Public License as published by the Free
|
|
** Software Foundation, either version 3 of the License, or (at your option)
|
|
** any later version.
|
|
**
|
|
** Supermodel is distributed in the hope that it will be useful, but WITHOUT
|
|
** ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
** FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
** more details.
|
|
**
|
|
** You should have received a copy of the GNU General Public License along
|
|
** with Supermodel. If not, see <http://www.gnu.org/licenses/>.
|
|
**/
|
|
|
|
/*
|
|
* TileGen.cpp
|
|
*
|
|
* Implementation of the CTileGen class: 2D tile generator. Palette decoding
|
|
* and synchronization with the renderer (which may run in a separate thread)
|
|
* are performed here as well. For a description of the tile generator
|
|
* hardware, please refer to the 2D rendering engine source code.
|
|
*
|
|
* Palettes
|
|
* --------
|
|
*
|
|
* Multiple copies of the 32K-color palette data are maintained. The first is
|
|
* the raw data as written to the VRAM. Two copies are computed, one for layers
|
|
* A/A' and the other for layers B/B'. These pairs of layers have independent
|
|
* color offset registers associated with them. The renderer uses these
|
|
* "computed" palettes.
|
|
*
|
|
* The computed palettes are updated whenever the real palette is modified, a
|
|
* single color entry at a time. If the color register is modified, the entire
|
|
* palette has to be recomputed accordingly.
|
|
*
|
|
* The read-only copy of the palette, which is generated for the renderer, only
|
|
* stores the two computed palettes.
|
|
*
|
|
* TO-DO List:
|
|
* -----------
|
|
* - For consistency, the registers should probably be byte reversed (this is a
|
|
* little endian device), forcing the Model3 Read32/Write32 handlers to
|
|
* manually reverse the data. This keeps with the convention for VRAM.
|
|
*/
|
|
|
|
#include <cstring>
|
|
#include "Supermodel.h"
|
|
|
|
// Macros that divide memory regions into pages and mark them as dirty when they are written to
|
|
#define PAGE_WIDTH 10
|
|
#define PAGE_SIZE (1<<PAGE_WIDTH)
|
|
#define DIRTY_SIZE(arraySize) (1+(arraySize-1)/(8*PAGE_SIZE))
|
|
#define MARK_DIRTY(dirtyArray, addr) dirtyArray[addr>>(PAGE_WIDTH+3)] |= 1<<((addr>>PAGE_WIDTH)&7)
|
|
|
|
// Offsets of memory regions within TileGen memory pool
|
|
#define OFFSET_VRAM 0x000000 // VRAM and palette data
|
|
#define OFFSET_PAL_A 0x120000 // computed A/A' palette
|
|
#define OFFSET_PAL_B 0x140000 // computed B/B' palette
|
|
#define MEM_POOL_SIZE_RW (0x120000+0x040000)
|
|
|
|
#define OFFSET_VRAM_RO 0x160000 // [read-only snapshot]
|
|
#define OFFSET_PAL_RO_A 0x280000 // [read-only snapshot]
|
|
#define OFFSET_PAL_RO_B 0x2A0000
|
|
#define MEM_POOL_SIZE_RO (0x120000+0x040000)
|
|
|
|
#define OFFSET_VRAM_DIRTY 0x2C0000
|
|
#define OFFSET_PAL_A_DIRTY (OFFSET_VRAM_DIRTY+DIRTY_SIZE(0x120000))
|
|
#define OFFSET_PAL_B_DIRTY (OFFSET_PAL_A_DIRTY+DIRTY_SIZE(0x20000))
|
|
#define MEM_POOL_SIZE_DIRTY (DIRTY_SIZE(0x120000)+2*DIRTY_SIZE(0x20000)) // VRAM + 2 palette dirty buffers
|
|
|
|
#define MEMORY_POOL_SIZE (MEM_POOL_SIZE_RW+MEM_POOL_SIZE_RO+MEM_POOL_SIZE_DIRTY)
|
|
|
|
|
|
/******************************************************************************
|
|
Save States
|
|
******************************************************************************/
|
|
|
|
void CTileGen::SaveState(CBlockFile *SaveState)
|
|
{
|
|
SaveState->NewBlock("Tile Generator", __FILE__);
|
|
SaveState->Write(vram, 0x120000); // Don't write out palette, read-only snapshots or dirty page arrays, just VRAM
|
|
SaveState->Write(regs, sizeof(regs));
|
|
}
|
|
|
|
void CTileGen::LoadState(CBlockFile *SaveState)
|
|
{
|
|
if (OKAY != SaveState->FindBlock("Tile Generator"))
|
|
{
|
|
ErrorLog("Unable to load tile generator state. Save state file is corrupt.");
|
|
return;
|
|
}
|
|
|
|
// Load memory one word at a time
|
|
for (int i = 0; i < 0x120000; i += 4)
|
|
{
|
|
UINT32 data;
|
|
|
|
SaveState->Read(&data, sizeof(data));
|
|
WriteRAM32(i, data);
|
|
}
|
|
SaveState->Read(regs, sizeof(regs));
|
|
|
|
// Because regs were read after palette, must recompute
|
|
RecomputePalettes();
|
|
|
|
// If multi-threaded, update read-only snapshots too
|
|
if (g_Config.gpuMultiThreaded)
|
|
UpdateSnapshots(true);
|
|
}
|
|
|
|
|
|
/******************************************************************************
|
|
Rendering
|
|
******************************************************************************/
|
|
|
|
void CTileGen::BeginVBlank(void)
|
|
{
|
|
/*
|
|
printf("08: %X\n", regs[0x08/4]);
|
|
printf("0C: %X\n", regs[0x0C/4]);
|
|
printf("20: %X\n", regs[0x20/4]);
|
|
printf("40: %X\n", regs[0x40/4]);
|
|
printf("44: %X\n", regs[0x44/4]);
|
|
printf("60: %08X\n", regs[0x60/4]);
|
|
printf("64: %08X\n", regs[0x64/4]);
|
|
printf("68: %08X\n", regs[0x68/4]);
|
|
printf("6C: %08X\n", regs[0x6C/4]);
|
|
printf("\n");
|
|
*/
|
|
}
|
|
|
|
void CTileGen::EndVBlank(void)
|
|
{
|
|
//
|
|
}
|
|
|
|
void CTileGen::RecomputePalettes(void)
|
|
{
|
|
// Writing the colors forces palettes to be computed
|
|
if (g_Config.gpuMultiThreaded)
|
|
{
|
|
for (unsigned colorAddr = 0; colorAddr < 32768*4; colorAddr += 4 )
|
|
{
|
|
MARK_DIRTY(palDirty[0], colorAddr);
|
|
MARK_DIRTY(palDirty[1], colorAddr);
|
|
WritePalette(colorAddr/4, *(UINT32 *) &vram[0x100000+colorAddr]);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (unsigned colorAddr = 0; colorAddr < 32768*4; colorAddr += 4 )
|
|
WritePalette(colorAddr/4, *(UINT32 *) &vram[0x100000+colorAddr]);
|
|
}
|
|
}
|
|
|
|
UINT32 CTileGen::SyncSnapshots(void)
|
|
{
|
|
// Good time to recompute the palettes
|
|
if (recomputePalettes)
|
|
{
|
|
RecomputePalettes();
|
|
recomputePalettes = false;
|
|
}
|
|
|
|
if (!g_Config.gpuMultiThreaded)
|
|
return 0;
|
|
|
|
// Update read-only snapshots
|
|
return UpdateSnapshots(false);
|
|
}
|
|
|
|
UINT32 CTileGen::UpdateSnapshot(bool copyWhole, UINT8 *src, UINT8 *dst, unsigned size, UINT8 *dirty)
|
|
{
|
|
unsigned dirtySize = DIRTY_SIZE(size);
|
|
if (copyWhole)
|
|
{
|
|
// If updating whole region, then just copy all data in one go
|
|
memcpy(dst, src, size);
|
|
memset(dirty, 0, dirtySize);
|
|
return size;
|
|
}
|
|
else
|
|
{
|
|
// Otherwise, loop through dirty pages array to find out what needs to be updated and copy only those parts
|
|
UINT32 copied = 0;
|
|
UINT8 *pSrc = src;
|
|
UINT8 *pDst = dst;
|
|
for (unsigned i = 0; i < dirtySize; i++)
|
|
{
|
|
UINT8 d = dirty[i];
|
|
if (d)
|
|
{
|
|
for (unsigned j = 0; j < 8; j++)
|
|
{
|
|
if (d&1)
|
|
{
|
|
// If not at very end of region, then copy an extra 4 bytes to allow for a possible 32-bit overlap
|
|
UINT32 toCopy = (i < dirtySize - 1 || j < 7 ? PAGE_SIZE + 4 : PAGE_SIZE);
|
|
memcpy(pDst, pSrc, toCopy);
|
|
copied += toCopy;
|
|
}
|
|
d >>= 1;
|
|
pSrc += PAGE_SIZE;
|
|
pDst += PAGE_SIZE;
|
|
}
|
|
dirty[i] = 0;
|
|
}
|
|
else
|
|
{
|
|
pSrc += 8 * PAGE_SIZE;
|
|
pDst += 8 * PAGE_SIZE;
|
|
}
|
|
}
|
|
return copied;
|
|
}
|
|
}
|
|
|
|
UINT32 CTileGen::UpdateSnapshots(bool copyWhole)
|
|
{
|
|
// Update all memory region snapshots
|
|
UINT32 palACopied = UpdateSnapshot(copyWhole, (UINT8*)pal[0], (UINT8*)palRO[0], 0x020000, palDirty[0]);
|
|
UINT32 palBCopied = UpdateSnapshot(copyWhole, (UINT8*)pal[1], (UINT8*)palRO[1], 0x020000, palDirty[1]);
|
|
UINT32 vramCopied = UpdateSnapshot(copyWhole, (UINT8*)vram, (UINT8*)vramRO, 0x120000, vramDirty);
|
|
memcpy(regsRO, regs, sizeof(regs)); // Always copy whole of regs buffer
|
|
//printf("TileGen copied - palA:%4uK, palB:%4uK, vram:%4uK, regs:%uK\n", palACopied / 1024, palBCopied / 1024, vramCopied / 1024, sizeof(regs) / 1024);
|
|
return palACopied + palBCopied + vramCopied + sizeof(regs);
|
|
}
|
|
|
|
void CTileGen::BeginFrame(void)
|
|
{
|
|
// NOTE: Render2D->WriteVRAM(addr, data) is no longer being called for RAM addresses that are written
|
|
// to and instead this class relies upon the fact that Render2D currently marks everything as dirty
|
|
// with every frame. If this were to change in the future then code to handle marking the correct
|
|
// parts of the renderer as dirty would need to be added here.
|
|
|
|
Render2D->BeginFrame();
|
|
}
|
|
|
|
void CTileGen::PreRenderFrame(void)
|
|
{
|
|
Render2D->PreRenderFrame();
|
|
}
|
|
|
|
void CTileGen::RenderFrameBottom(void)
|
|
{
|
|
Render2D->RenderFrameBottom();
|
|
}
|
|
|
|
void CTileGen::RenderFrameTop(void)
|
|
{
|
|
Render2D->RenderFrameTop();
|
|
}
|
|
|
|
void CTileGen::EndFrame(void)
|
|
{
|
|
Render2D->EndFrame();
|
|
}
|
|
|
|
/******************************************************************************
|
|
Emulation Functions
|
|
******************************************************************************/
|
|
|
|
UINT32 CTileGen::ReadRAM32(unsigned addr)
|
|
{
|
|
return *(UINT32 *) &vram[addr];
|
|
}
|
|
|
|
void CTileGen::WriteRAM32(unsigned addr, UINT32 data)
|
|
{
|
|
if (g_Config.gpuMultiThreaded)
|
|
MARK_DIRTY(vramDirty, addr);
|
|
*(UINT32 *) &vram[addr] = data;
|
|
|
|
// Update palette if required
|
|
if (addr >= 0x100000)
|
|
{
|
|
addr -= 0x100000;
|
|
unsigned color = addr/4; // color index
|
|
|
|
// Same address in both palettes must be marked dirty
|
|
if (g_Config.gpuMultiThreaded)
|
|
{
|
|
MARK_DIRTY(palDirty[0], addr);
|
|
MARK_DIRTY(palDirty[1], addr);
|
|
}
|
|
|
|
// Both palettes will be modified simultaneously
|
|
WritePalette(color, data);
|
|
}
|
|
}
|
|
|
|
//TODO: 8- and 16-bit handlers have not been thoroughly tested
|
|
uint8_t CTileGen::ReadRAM8(unsigned addr)
|
|
{
|
|
return vram[addr];
|
|
}
|
|
|
|
void CTileGen::WriteRAM8(unsigned addr, uint8_t data)
|
|
{
|
|
uint32_t tmp = ReadRAM32(addr & ~3);
|
|
uint32_t shift = (addr & 3) * 8;
|
|
uint32_t mask = 0xff << shift;
|
|
tmp &= ~mask;
|
|
tmp |= uint32_t(data) << shift;
|
|
WriteRAM32(addr & ~3, tmp);
|
|
}
|
|
|
|
// Star Wars Trilogy uses this
|
|
uint16_t CTileGen::ReadRAM16(unsigned addr)
|
|
{
|
|
return *((uint16_t *) &vram[addr]);
|
|
}
|
|
|
|
void CTileGen::WriteRAM16(unsigned addr, uint16_t data)
|
|
{
|
|
uint32_t tmp = ReadRAM32(addr & ~1);
|
|
uint32_t shift = (addr & 1) * 16;
|
|
uint32_t mask = 0xffff << shift;
|
|
tmp &= ~mask;
|
|
tmp |= uint32_t(data) << shift;
|
|
WriteRAM32(addr & ~1, tmp);
|
|
}
|
|
|
|
void CTileGen::InitPalette(void)
|
|
{
|
|
for (int i = 0; i < 0x20000/4; i++)
|
|
{
|
|
WritePalette(i, *(UINT32 *) &vram[0x100000 + i*4]);
|
|
if (g_Config.gpuMultiThreaded)
|
|
{
|
|
palRO[0][i] = pal[0][i];
|
|
palRO[1][i] = pal[1][i];
|
|
}
|
|
}
|
|
}
|
|
|
|
static inline UINT32 AddColorOffset(UINT8 r, UINT8 g, UINT8 b, UINT8 a, UINT32 offsetReg)
|
|
{
|
|
INT32 ir, ig, ib;
|
|
|
|
/*
|
|
* Color offsets are signed but I'm not sure whether or not their range is
|
|
* merely [-128,+127], which would mean adding to a 0 component would not
|
|
* result full intensity (only +127 at most). Alternatively, the signed
|
|
* value might have to be multiplied by 2. That is assumed here. In either
|
|
* case, the signed addition should be saturated.
|
|
*/
|
|
|
|
ib = (INT32) (INT8)((offsetReg>>16)&0xFF);
|
|
ig = (INT32) (INT8)((offsetReg>>8)&0xFF);
|
|
ir = (INT32) (INT8)((offsetReg>>0)&0xFF);
|
|
ib *= 2;
|
|
ig *= 2;
|
|
ir *= 2;
|
|
|
|
// Add with saturation
|
|
ib += (INT32) (UINT32) b;
|
|
if (ib < 0) ib = 0;
|
|
else if (ib > 0xFF) ib = 0xFF;
|
|
ig += (INT32) (UINT32) g;
|
|
if (ig < 0) ig = 0;
|
|
else if (ig > 0xFF) ig = 0xFF;
|
|
ir += (INT32) (UINT32) r;
|
|
if (ir < 0) ir = 0;
|
|
else if (ir > 0xFF) ir = 0xFF;
|
|
|
|
// Construct the final 32-bit ABGR-format color
|
|
r = (UINT8) ir;
|
|
g = (UINT8) ig;
|
|
b = (UINT8) ib;
|
|
return ((UINT32)a<<24)|((UINT32)b<<16)|((UINT32)g<<8)|(UINT32)r;
|
|
}
|
|
|
|
void CTileGen::WritePalette(unsigned color, UINT32 data)
|
|
{
|
|
UINT8 r, g, b, a;
|
|
|
|
a = 0xFF * ((data>>15)&1); // decode the RGBA (make alpha 0xFF or 0x00)
|
|
a = ~a; // invert it (set on Model 3 means clear pixel)
|
|
|
|
if ((data&0x8000))
|
|
r = g = b = 0;
|
|
else
|
|
{
|
|
b = (((data >> 10) & 0x1F) * 255) / 31;
|
|
g = (((data >> 5) & 0x1F) * 255) / 31;
|
|
r = ((data & 0x1F) * 255) / 31;
|
|
}
|
|
|
|
pal[0][color] = AddColorOffset(r, g, b, a, regs[0x40/4]); // A/A'
|
|
pal[1][color] = AddColorOffset(r, g, b, a, regs[0x44/4]); // B/B'
|
|
}
|
|
|
|
UINT32 CTileGen::ReadRegister(unsigned reg)
|
|
{
|
|
reg &= 0xFF;
|
|
return regs[reg/4];
|
|
}
|
|
|
|
void CTileGen::WriteRegister(unsigned reg, UINT32 data)
|
|
{
|
|
reg &= 0xFF;
|
|
|
|
switch (reg)
|
|
{
|
|
case 0x00:
|
|
case 0x08:
|
|
case 0x0C:
|
|
case 0x20:
|
|
case 0x60:
|
|
case 0x64:
|
|
case 0x68:
|
|
case 0x6C:
|
|
break;
|
|
case 0x40: // layer A/A' color offset
|
|
case 0x44: // layer B/B' color offset
|
|
// We only have a mechanism to recompute both palettes simultaneously.
|
|
// These regs are often written together in the same frame. To avoid
|
|
// needlessly recomputing both palettes twice, we defer the operation.
|
|
if (regs[reg/4] != data) // only if changed
|
|
recomputePalettes = true;
|
|
break;
|
|
case 0x10: // IRQ acknowledge
|
|
IRQ->Deassert(data&0xFF);
|
|
// MAME believes only lower 4 bits should be cleared
|
|
//IRQ->Deassert(data & 0x0F);
|
|
break;
|
|
default:
|
|
DebugLog("Tile Generator reg %02X = %08X\n", reg, data);
|
|
//printf("%02X = %08X\n", reg, data);
|
|
break;
|
|
}
|
|
|
|
// Modify register
|
|
regs[reg/4] = data;
|
|
}
|
|
|
|
void CTileGen::Reset(void)
|
|
{
|
|
unsigned memSize = (g_Config.gpuMultiThreaded ? MEMORY_POOL_SIZE : MEM_POOL_SIZE_RW);
|
|
memset(memoryPool, 0, memSize);
|
|
memset(regs, 0, sizeof(regs));
|
|
memset(regsRO, 0, sizeof(regsRO));
|
|
|
|
InitPalette();
|
|
recomputePalettes = false;
|
|
|
|
DebugLog("Tile Generator reset\n");
|
|
}
|
|
|
|
|
|
/******************************************************************************
|
|
Configuration, Initialization, and Shutdown
|
|
******************************************************************************/
|
|
|
|
void CTileGen::AttachRenderer(CRender2D *Render2DPtr)
|
|
{
|
|
Render2D = Render2DPtr;
|
|
|
|
// If multi-threaded, attach read-only snapshots to renderer instead of real ones
|
|
if (g_Config.gpuMultiThreaded)
|
|
{
|
|
Render2D->AttachVRAM(vramRO);
|
|
Render2D->AttachPalette((const UINT32 **)palRO);
|
|
Render2D->AttachRegisters(regsRO);
|
|
}
|
|
else
|
|
{
|
|
Render2D->AttachVRAM(vram);
|
|
Render2D->AttachPalette((const UINT32 **)pal);
|
|
Render2D->AttachRegisters(regs);
|
|
}
|
|
|
|
DebugLog("Tile Generator attached a Render2D object\n");
|
|
}
|
|
|
|
|
|
bool CTileGen::Init(CIRQ *IRQObjectPtr)
|
|
{
|
|
unsigned memSize = (g_Config.gpuMultiThreaded ? MEMORY_POOL_SIZE : MEM_POOL_SIZE_RW);
|
|
float memSizeMB = (float)memSize/(float)0x100000;
|
|
|
|
// Allocate all memory for all TileGen RAM regions
|
|
memoryPool = new(std::nothrow) UINT8[memSize];
|
|
if (NULL == memoryPool)
|
|
return ErrorLog("Insufficient memory for tile generator object (needs %1.1f MB).", memSizeMB);
|
|
|
|
// Set up main pointers
|
|
vram = (UINT8 *) &memoryPool[OFFSET_VRAM];
|
|
pal[0] = (UINT32 *) &memoryPool[OFFSET_PAL_A];
|
|
pal[1] = (UINT32 *) &memoryPool[OFFSET_PAL_B];
|
|
|
|
// If multi-threaded, set up pointers for read-only snapshots and dirty page arrays too
|
|
if (g_Config.gpuMultiThreaded)
|
|
{
|
|
vramRO = (UINT8 *) &memoryPool[OFFSET_VRAM_RO];
|
|
palRO[0] = (UINT32 *) &memoryPool[OFFSET_PAL_RO_A];
|
|
palRO[1] = (UINT32 *) &memoryPool[OFFSET_PAL_RO_B];
|
|
vramDirty = (UINT8 *) &memoryPool[OFFSET_VRAM_DIRTY];
|
|
palDirty[0] = (UINT8 *) &memoryPool[OFFSET_PAL_A_DIRTY];
|
|
palDirty[1] = (UINT8 *) &memoryPool[OFFSET_PAL_B_DIRTY];
|
|
}
|
|
|
|
// Hook up the IRQ controller
|
|
IRQ = IRQObjectPtr;
|
|
|
|
DebugLog("Initialized Tile Generator (allocated %1.1f MB and connected to IRQ controller)\n", memSizeMB);
|
|
return OKAY;
|
|
}
|
|
|
|
CTileGen::CTileGen(void)
|
|
{
|
|
IRQ = NULL;
|
|
memoryPool = NULL;
|
|
DebugLog("Built Tile Generator\n");
|
|
}
|
|
|
|
CTileGen::~CTileGen(void)
|
|
{
|
|
// Dump tile generator RAM
|
|
#if 0
|
|
FILE *fp;
|
|
fp = fopen("tileram", "wb");
|
|
if (NULL != fp)
|
|
{
|
|
fwrite(memoryPool, sizeof(UINT8), 0x120000, fp);
|
|
fclose(fp);
|
|
printf("dumped %s\n", "tileram");
|
|
}
|
|
else
|
|
printf("unable to dump %s\n", "tileram");
|
|
#endif
|
|
|
|
IRQ = NULL;
|
|
if (memoryPool != NULL)
|
|
{
|
|
delete [] memoryPool;
|
|
memoryPool = NULL;
|
|
}
|
|
DebugLog("Destroyed Tile Generator\n");
|
|
}
|