Supermodel/Src/Model3/Real3D.cpp

1128 lines
34 KiB
C++

/**
** Supermodel
** A Sega Model 3 Arcade Emulator.
** Copyright 2011 Bart Trzynadlowski, Nik Henson
**
** This file is part of Supermodel.
**
** Supermodel is free software: you can redistribute it and/or modify it under
** the terms of the GNU General Public License as published by the Free
** Software Foundation, either version 3 of the License, or (at your option)
** any later version.
**
** Supermodel is distributed in the hope that it will be useful, but WITHOUT
** ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
** FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
** more details.
**
** You should have received a copy of the GNU General Public License along
** with Supermodel. If not, see <http://www.gnu.org/licenses/>.
**/
/*
* Real3D.cpp
*
* The Model 3's Real3D-based graphics hardware. Based on the Real3D Pro-1000
* family of image generators.
*
* PCI IDs
* -------
* It appears that Step 2.0 returns a different PCI ID depending on whether
* the PCI configuration space or DMA register are accessed. For example,
* Virtual On 2 expects 0x178611DB from the PCI configuration header but
* 0x16C311DB from the DMA device.
*
* To-Do List
* ----------
* - For consistency, the status registers should probably be byte reversed (this is a
* little endian device), forcing the Model3 Read32/Write32 handlers to
* manually reverse the data. This keeps with the convention for VRAM.
* - Keep an eye out for games writing non-mipmap textures to the mipmap area.
* The render currently cannot cope with this.
*/
#include "Supermodel.h"
#include <cstring>
// Macros that divide memory regions into pages and mark them as dirty when they are written to
#define PAGE_WIDTH 12
#define PAGE_SIZE (1<<PAGE_WIDTH)
#define DIRTY_SIZE(arraySize) (1+(arraySize-1)/(8*PAGE_SIZE))
#define MARK_DIRTY(dirtyArray, addr) dirtyArray[addr>>(PAGE_WIDTH+3)] |= 1<<((addr>>PAGE_WIDTH)&7)
// Offsets of memory regions within Real3D memory pool
#define OFFSET_8C 0x0000000 // 4 MB, culling RAM low (at 0x8C000000)
#define OFFSET_8E 0x0400000 // 1 MB, culling RAM high (at 0x8E000000)
#define OFFSET_98 0x0500000 // 4 MB, polygon RAM (at 0x98000000)
#define OFFSET_TEXRAM 0x0900000 // 8 MB, texture RAM
#define OFFSET_TEXFIFO 0x1100000 // 1 MB, texture FIFO
#define MEM_POOL_SIZE_RW (0x400000+0x100000+0x400000+0x800000+0x100000)
#define OFFSET_8C_RO 0x1200000 // 4 MB, culling RAM low (at 0x8C000000) [read-only snapshot]
#define OFFSET_8E_RO 0x1600000 // 1 MB, culling RAM high (at 0x8E000000) [read-only snapshot]
#define OFFSET_98_RO 0x1700000 // 4 MB, polygon RAM (at 0x98000000) [read-only snapshot]
#define OFFSET_TEXRAM_RO 0x1B00000 // 8 MB, texture RAM [read-only snapshot]
#define MEM_POOL_SIZE_RO (0x400000+0x100000+0x400000+0x800000)
#define OFFSET_8C_DIRTY 0x2300000
#define OFFSET_8E_DIRTY (OFFSET_8C_DIRTY+DIRTY_SIZE(0x400000))
#define OFFSET_98_DIRTY (OFFSET_8E_DIRTY+DIRTY_SIZE(0x100000))
#define OFFSET_TEXRAM_DIRTY (OFFSET_98_DIRTY+DIRTY_SIZE(0x400000))
#define MEM_POOL_SIZE_DIRTY (DIRTY_SIZE(MEM_POOL_SIZE_RO))
#define MEMORY_POOL_SIZE (MEM_POOL_SIZE_RW+MEM_POOL_SIZE_RO+MEM_POOL_SIZE_DIRTY)
/******************************************************************************
Save States
******************************************************************************/
void CReal3D::SaveState(CBlockFile *SaveState)
{
SaveState->NewBlock("Real3D", __FILE__);
SaveState->Write(memoryPool, MEM_POOL_SIZE_RW); // Don't write out read-only snapshots or dirty page arrays
SaveState->Write(&fifoIdx, sizeof(fifoIdx));
SaveState->Write(m_vromTextureFIFO, sizeof(m_vromTextureFIFO));
SaveState->Write(&dmaSrc, sizeof(dmaSrc));
SaveState->Write(&dmaDest, sizeof(dmaDest));
SaveState->Write(&dmaLength, sizeof(dmaLength));
SaveState->Write(&dmaData, sizeof(dmaData));
SaveState->Write(&dmaUnknownReg, sizeof(dmaUnknownReg));
SaveState->Write(&dmaStatus, sizeof(dmaStatus));
SaveState->Write(&dmaConfig, sizeof(dmaConfig));
SaveState->Write(&tapCurrentInstruction, sizeof(tapCurrentInstruction));
SaveState->Write(&tapIR, sizeof(tapIR));
SaveState->Write(tapID, sizeof(tapID));
SaveState->Write(&tapIDSize, sizeof(tapIDSize));
SaveState->Write(&tapTDO, sizeof(tapTDO));
SaveState->Write(&tapState, sizeof(tapState));
SaveState->Write(&m_vromTextureFIFOIdx, sizeof(m_vromTextureFIFOIdx));
}
void CReal3D::LoadState(CBlockFile *SaveState)
{
if (OKAY != SaveState->FindBlock("Real3D"))
{
ErrorLog("Unable to load Real3D GPU state. Save state file is corrupt.");
return;
}
SaveState->Read(memoryPool, MEM_POOL_SIZE_RW);
// If multi-threaded, update read-only snapshots too
if (g_Config.gpuMultiThreaded)
UpdateSnapshots(true);
Render3D->UploadTextures(0, 0, 2048, 2048);
SaveState->Read(&fifoIdx, sizeof(fifoIdx));
SaveState->Read(&m_vromTextureFIFO, sizeof(m_vromTextureFIFO));
SaveState->Read(&dmaSrc, sizeof(dmaSrc));
SaveState->Read(&dmaDest, sizeof(dmaDest));
SaveState->Read(&dmaLength, sizeof(dmaLength));
SaveState->Read(&dmaData, sizeof(dmaData));
SaveState->Read(&dmaUnknownReg, sizeof(dmaUnknownReg));
SaveState->Read(&dmaStatus, sizeof(dmaStatus));
SaveState->Read(&dmaConfig, sizeof(dmaConfig));
SaveState->Read(&tapCurrentInstruction, sizeof(tapCurrentInstruction));
SaveState->Read(&tapIR, sizeof(tapIR));
SaveState->Read(tapID, sizeof(tapID));
SaveState->Read(&tapIDSize, sizeof(tapIDSize));
SaveState->Read(&tapTDO, sizeof(tapTDO));
SaveState->Read(&tapState, sizeof(tapState));
SaveState->Read(&m_vromTextureFIFOIdx, sizeof(m_vromTextureFIFOIdx));
}
/******************************************************************************
Rendering
******************************************************************************/
void CReal3D::BeginVBlank(int statusCycles)
{
// Calculate point at which status bit should change value. Currently the same timing is used for both the status bit in ReadRegister
// and in WriteDMARegister32/ReadDMARegister32, however it may be that they are completely unrelated. It appears that step 1.x games
// access just the former while step 2.x access the latter. It is not known yet what this bit/these bits actually represent.
statusChange = ppc_total_cycles() + statusCycles;
}
void CReal3D::EndVBlank(void)
{
error = false; // clear error (just needs to be done once per frame)
}
uint32_t CReal3D::SyncSnapshots(void)
{
// Update read-only copy of command port flag
commandPortWrittenRO = commandPortWritten;
commandPortWritten = false;
if (!g_Config.gpuMultiThreaded)
return 0;
// Update read-only queue
queuedUploadTexturesRO = queuedUploadTextures;
queuedUploadTextures.clear();
// Update read-only snapshots
return UpdateSnapshots(false);
}
uint32_t CReal3D::UpdateSnapshot(bool copyWhole, uint8_t *src, uint8_t *dst, unsigned size, uint8_t *dirty)
{
unsigned dirtySize = DIRTY_SIZE(size);
if (copyWhole)
{
// If updating whole region, then just copy all data in one go
memcpy(dst, src, size);
memset(dirty, 0, dirtySize);
return size;
}
else
{
// Otherwise, loop through dirty pages array to find out what needs to be updated and copy only those parts
uint32_t copied = 0;
uint8_t *pSrc = src;
uint8_t *pDst = dst;
for (unsigned i = 0; i < dirtySize; i++)
{
uint8_t d = dirty[i];
if (d)
{
for (unsigned j = 0; j < 8; j++)
{
if (d&1)
{
// If not at very end of region, then copy an extra 4 bytes to allow for a possible 32-bit overlap
uint32_t toCopy = (i < dirtySize - 1 || j < 7 ? PAGE_SIZE + 4 : PAGE_SIZE);
memcpy(pDst, pSrc, toCopy);
copied += toCopy;
}
d >>= 1;
pSrc += PAGE_SIZE;
pDst += PAGE_SIZE;
}
dirty[i] = 0;
}
else
{
pSrc += 8 * PAGE_SIZE;
pDst += 8 * PAGE_SIZE;
}
}
return copied;
}
}
uint32_t CReal3D::UpdateSnapshots(bool copyWhole)
{
// Update all memory region snapshots
uint32_t cullLoCopied = UpdateSnapshot(copyWhole, (uint8_t*)cullingRAMLo, (uint8_t*)cullingRAMLoRO, 0x400000, cullingRAMLoDirty);
uint32_t cullHiCopied = UpdateSnapshot(copyWhole, (uint8_t*)cullingRAMHi, (uint8_t*)cullingRAMHiRO, 0x100000, cullingRAMHiDirty);
uint32_t polyCopied = UpdateSnapshot(copyWhole, (uint8_t*)polyRAM, (uint8_t*)polyRAMRO, 0x400000, polyRAMDirty);
uint32_t textureCopied = UpdateSnapshot(copyWhole, (uint8_t*)textureRAM, (uint8_t*)textureRAMRO, 0x800000, textureRAMDirty);
//printf("Read3D copied - cullLo:%4uK, cullHi:%4uK, poly:%4uK, texture:%4uK\n", cullLoCopied / 1024, cullHiCopied / 1024, polyCopied / 1024, textureCopied / 1024);
return cullLoCopied + cullHiCopied + polyCopied + textureCopied;
}
void CReal3D::BeginFrame(void)
{
// If multi-threaded, perform now any queued texture uploads to renderer before rendering begins
if (g_Config.gpuMultiThreaded)
{
for (vector<QueuedUploadTextures>::iterator it = queuedUploadTexturesRO.begin(), end = queuedUploadTexturesRO.end(); it != end; it++)
Render3D->UploadTextures(it->x, it->y, it->width, it->height);
}
Render3D->BeginFrame();
}
void CReal3D::RenderFrame(void)
{
//if (commandPortWrittenRO)
Render3D->RenderFrame();
}
void CReal3D::EndFrame(void)
{
Render3D->EndFrame();
}
/******************************************************************************
DMA Device
Register 0xC:
-------------
+---+---+---+---+---+---+---+---+
|BUS|???|???|???|???|???|???|IRQ|
+---+---+---+---+---+---+---+---+
BUS: Busy (see von2 0x18A104) if 1.
IRQ: IRQ pending.
******************************************************************************/
void CReal3D::DMACopy(void)
{
DebugLog("Real3D DMA copy (PC=%08X, LR=%08X): %08X -> %08X, %X %s\n", ppc_get_pc(), ppc_get_lr(), dmaSrc, dmaDest, dmaLength*4, (dmaConfig&0x80)?"(byte reversed)":"");
//printf("Real3D DMA copy (PC=%08X, LR=%08X): %08X -> %08X, %X %s\n", ppc_get_pc(), ppc_get_lr(), dmaSrc, dmaDest, dmaLength*4, (dmaConfig&0x80)?"(byte reversed)":"");
if ((dmaConfig&0x80)) // reverse bytes
{
while (dmaLength != 0)
{
uint32_t data = Bus->Read32(dmaSrc);
Bus->Write32(dmaDest, FLIPENDIAN32(data));
dmaSrc += 4;
dmaDest += 4;
--dmaLength;
}
}
else
{
while (dmaLength != 0)
{
Bus->Write32(dmaDest, Bus->Read32(dmaSrc));
dmaSrc += 4;
dmaDest += 4;
--dmaLength;
}
}
}
uint8_t CReal3D::ReadDMARegister8(unsigned reg)
{
switch (reg)
{
case 0xC: // status
return dmaStatus;
case 0xE: // configuration
return dmaConfig;
default:
break;
}
DebugLog("Real3D: ReadDMARegister8: reg=%X\n", reg);
return 0;
}
void CReal3D::WriteDMARegister8(unsigned reg, uint8_t data)
{
switch (reg)
{
case 0xD: // IRQ acknowledge
if ((data&1))
{
dmaStatus &= ~1;
IRQ->Deassert(dmaIRQ);
}
break;
case 0xE: // configuration
dmaConfig = data;
break;
default:
DebugLog("Real3D: WriteDMARegister8: reg=%X, data=%02X\n", reg, data);
break;
}
//DebugLog("Real3D: WriteDMARegister8: reg=%X, data=%02X\n", reg, data);
}
uint32_t CReal3D::ReadDMARegister32(unsigned reg)
{
switch (reg)
{
case 0x14: // command result
return dmaData;
default:
break;
}
DebugLog("Real3D: ReadDMARegister32: reg=%X\n", reg);
return 0;
}
void CReal3D::WriteDMARegister32(unsigned reg, uint32_t data)
{
switch (reg)
{
case 0x00: // DMA source address
dmaSrc = data;
break;
case 0x04: // DMA destination address
dmaDest = data;
break;
case 0x08: // DMA length
dmaLength = data;
DMACopy();
dmaStatus |= 1;
IRQ->Assert(dmaIRQ);
break;
case 0x10: // command register
if ((data&0x20000000))
{
dmaData = 0x16C311DB; // Virtual On 2 expects this from DMA
DebugLog("Real3D: DMA ID command issued (ATTENTION: make sure we're returning the correct value), PC=%08X, LR=%08X\n", ppc_get_pc(), ppc_get_lr());
}
else if ((data&0x80000000))
{
//dmaUnknownReg ^= 0xFFFFFFFF;
//dmaData = dmaUnknownReg;
dmaData = (ppc_total_cycles() >= statusChange ? 0x0 : 0xFFFFFFFF); // Not sure yet if it is just bit 2 as per ReadRegister above
}
break;
case 0x14: // ?
dmaData = 0xFFFFFFFF;
break;
default:
DebugLog("Real3D: WriteDMARegister32: reg=%X, data=%08X\n", reg, data);
break;
}
//DebugLog("Real3D: WriteDMARegister32: reg=%X, data=%08X\n", reg, data);
}
/******************************************************************************
JTAG Test Access Port Simulation
What I term as "IDs" here are really boundary scan values.
******************************************************************************/
static const int tapFSM[][2] = // finite state machine, each state can lead to 2 next states
{
{ 1, 0 }, // 0 Test-Logic/Reset
{ 1, 2 }, // 1 Run-Test/Idle
{ 3, 9 }, // 2 Select-DR-Scan
{ 4, 5 }, // 3 Capture-DR
{ 4, 5 }, // 4 Shift-DR
{ 6, 8 }, // 5 Exit1-DR
{ 6, 7 }, // 6 Pause-DR
{ 4, 8 }, // 7 Exit2-DR
{ 1, 2 }, // 8 Update-DR
{ 10, 0 }, // 9 Select-IR-Scan
{ 11, 12 }, // 10 Capture-IR
{ 11, 12 }, // 11 Shift-IR
{ 13, 15 }, // 12 Exit1-IR
{ 13, 14 }, // 13 Pause-IR
{ 11, 15 }, // 14 Exit2-IR
{ 1, 2 } // 15 Update-IR
};
/*
* InsertBit():
*
* Inserts a bit into an arbitrarily long bit field. Bit 0 is assumed to be
* the MSB of the first byte in the buffer.
*/
void CReal3D::InsertBit(uint8_t *buf, unsigned bitNum, unsigned bit)
{
unsigned bitInByte = 7 - (bitNum & 7);
buf[bitNum / 8] &= ~(1 << bitInByte);
buf[bitNum / 8] |= (bit << bitInByte);
}
/*
* InsertID():
*
* Inserts a 32-bit ID code into the ID bit field.
*/
void CReal3D::InsertID(uint32_t id, unsigned startBit)
{
for (int i = 31; i >= 0; i--)
InsertBit(tapID, startBit++, (id >> i) & 1);
}
/*
* Shift():
*
* Shifts the data buffer right (towards LSB at byte 0) by 1 bit. The size of
* the number of bits must be specified. The bit shifted out of the LSB is
* returned.
*/
unsigned CReal3D::Shift(uint8_t *data, unsigned numBits)
{
// This loop takes care of all the fully-filled bytes
unsigned shiftIn = 0;
unsigned shiftOut = 0;
uint32_t i;
for (i = 0; i < numBits / 8; i++)
{
shiftOut = data[i] & 1;
data[i] >>= 1;
data[i] |= (shiftIn << 7);
shiftIn = shiftOut; // carry over to next element's MSB
}
// Take care of the last partial byte (if there is one)
if ((numBits & 7) != 0)
{
shiftOut = (data[i] >> (8 - (numBits & 7))) & 1;
data[i] >>= 1;
data[i] |= (shiftIn << 7);
}
return shiftOut;
}
unsigned CReal3D::ReadTAP(void)
{
return tapTDO;
}
void CReal3D::WriteTAP(unsigned tck, unsigned tms, unsigned tdi, unsigned trst)
{
if (!tck)
return;
// Go to next state
tapState = tapFSM[tapState][tms];
switch (tapState)
{
case 3: // Capture-DR
/*
* Read ASIC IDs.
*
* The ID Sequence is:
* - Jupiter
* - Mercury
* - Venus
* - Earth
* - Mars
* - Mars (again)
*
* Note that different Model 3 steps have different chip
* revisions, hence the different IDs returned below.
*
* On Step 1.5 and 1.0, instruction 0x0C631F8C7FFE is used to retrieve
* the ID codes but Step 2.0 is a little weirder. It seems to use this
* and either the state of the TAP after reset or other instructions
* to read the IDs as well. This can be emulated in one of 2 ways:
* Ignore the instruction and always load up the data or load the
* data on TAP reset and when the instruction is issued.
*/
if (step == 0x10)
{
InsertID(0x116C7057, 1 + 0 * 32);
InsertID(0x216C3057, 1 + 1 * 32);
InsertID(0x116C4057, 1 + 2 * 32);
InsertID(0x216C5057, 1 + 3 * 32);
InsertID(0x116C6057, 1 + 4 * 32 + 1);
InsertID(0x116C6057, 1 + 5 * 32 + 1);
}
else if (step == 0x15)
{
InsertID(0x316C7057, 1 + 0 * 32);
InsertID(0x316C3057, 1 + 1 * 32);
InsertID(0x216C4057, 1 + 2 * 32); // Lost World may to use 0x016C4057
InsertID(0x316C5057, 1 + 3 * 32);
InsertID(0x216C6057, 1 + 4 * 32 + 1);
InsertID(0x216C6057, 1 + 5 * 32 + 1);
}
else if (step >= 0x20)
{
InsertID(0x416C7057, 1 + 0 * 32);
InsertID(0x416C3057, 1 + 1 * 32);
InsertID(0x316C4057, 1 + 2 * 32); // skichamp at PC=A89F4, this value causes "NO DAUGHTER BOARD" message
InsertID(0x416C5057, 1 + 3 * 32);
InsertID(0x316C6057, 1 + 4 * 32 + 1);
InsertID(0x316C6057, 1 + 5 * 32 + 1);
}
break;
case 4: // Shift-DR
tapTDO = Shift(tapID, tapIDSize);
//printf("TAP: Shift-DR Bit %d\n", bit++);
break;
case 10: // Capture-IR
// Load lower 2 bits with 01 as per IEEE 1149.1-1990
tapIR = 1;
break;
case 11: // Shift-IR
// Shift IR towards output and load in new data from TDI
tapTDO = tapIR & 1; // shift LSB to output
tapIR >>= 1;
tapIR |= ((uint64_t) tdi << 45);
break;
case 15: // Update-IR
/*
* Latch IR (technically, this should occur on the falling edge of
* TCK)
*/
tapIR &= 0x3FFFFFFFFFFFULL;
tapCurrentInstruction = tapIR;
//printf("TAP: Update-IR %XLL\n", tapCurrentInstruction);
break;
default:
break;
}
}
/******************************************************************************
Texture Uploading and Decoding
******************************************************************************/
// Mipmap coordinates for each reduction level (within a single 2048x1024 page)
static const int mipXBase[11] =
{
1024, // 1024/2
1536, // 512/2
1792, // 256/2
1920, // ...
1984,
2016,
2032,
2040,
2044,
2046,
2047
};
static const int mipYBase[11] =
{
512,
768,
896,
960,
992,
1008,
1016,
1020,
1022,
1023,
0
};
// Mipmap reduction factors
static const int mipDivisor[9] = { 2, 4, 8, 16, 32, 64, 128, 256, 512 };
// Table of texel offsets corresponding to an 8x8 texel texture tile
static const unsigned decode[64] =
{
0, 1, 4, 5, 8, 9,12,13,
2, 3, 6, 7,10,11,14,15,
16,17,20,21,24,25,28,29,
18,19,22,23,26,27,30,31,
32,33,36,37,40,41,44,45,
34,35,38,39,42,43,46,47,
48,49,52,53,56,57,60,61,
50,51,54,55,58,59,62,63
};
static void StoreTexelByte(uint16_t *texel, uint32_t byteSelect, uint8_t byte)
{
if ((byteSelect & 1)) // write to LSB
*texel = (*texel & 0xFF00) | byte;
if ((byteSelect & 2)) // write to MSB
*texel = (*texel & 0x00FF) | (uint16_t(byte) << 8);
}
void CReal3D::StoreTexture(unsigned xPos, unsigned yPos, unsigned width, unsigned height, const uint16_t *texData, uint32_t header)
{
if ((header & 0x00800000)) // 16-bit textures
{
// Outer 2 loops: 8x8 tiles
for (uint32_t y = yPos; y < (yPos+height); y += 8)
{
for (uint32_t x = xPos; x < (xPos+width); x += 8)
{
// Inner 2 loops: 8x8 texels for the current tile
uint32_t destOffset = y*2048+x;
for (uint32_t yy = 0; yy < 8; yy++)
{
for (uint32_t xx = 0; xx < 8; xx++)
{
if (g_Config.gpuMultiThreaded)
MARK_DIRTY(textureRAMDirty, destOffset * 2);
textureRAM[destOffset++] = texData[decode[(yy*8+xx)^1]];
}
destOffset += 2048-8; // next line
}
texData += 8*8; // next tile
}
}
}
else // 8-bit textures
{
/*
* 8-bit textures appear to be unpacked into 16-bit words in the
* texture RAM. Oddly, the rows of the decoding table seem to be
* swapped.
*/
uint32_t byteSelect = (header>>21)&3; // which byte to unpack to
if (byteSelect == 3) // write to both?
DebugLog("Observed 8-bit texture with byte_select=3!");
// Outer 2 loops: 8x8 tiles
for (uint32_t y = yPos; y < (yPos+height); y += 8)
{
for (uint32_t x = xPos; x < (xPos+width); x += 8)
{
// Inner 2 loops: 8x8 texels for the current tile
uint32_t destOffset = y*2048+x;
for (uint32_t yy = 0; yy < 8; yy++)
{
for (uint32_t xx = 0; xx < 8; xx += 2)
{
uint8_t byte1 = texData[decode[(yy^1)*8+((xx+0)^1)]/2]>>8;
uint8_t byte2 = texData[decode[(yy^1)*8+((xx+1)^1)]/2]&0xFF;
if (g_Config.gpuMultiThreaded)
MARK_DIRTY(textureRAMDirty, destOffset * 2);
StoreTexelByte(&textureRAM[destOffset], byteSelect, byte1);
++destOffset;
if (g_Config.gpuMultiThreaded)
MARK_DIRTY(textureRAMDirty, destOffset * 2);
StoreTexelByte(&textureRAM[destOffset], byteSelect, byte2);
++destOffset;
}
destOffset += 2048-8;
}
texData += 8*8/2; // next tile
}
}
}
// Signal to renderer that textures have changed
// TO-DO: mipmaps? What if a game writes non-mipmap textures to mipmap area?
if (g_Config.gpuMultiThreaded)
{
// If multi-threaded, then queue calls to UploadTextures for render thread to perform at beginning of next frame
QueuedUploadTextures upl;
upl.x = xPos;
upl.y = yPos;
upl.width = width;
upl.height = height;
queuedUploadTextures.push_back(upl);
}
else
Render3D->UploadTextures(xPos, yPos, width, height);
}
// Texture data will be in little endian format
void CReal3D::UploadTexture(uint32_t header, const uint16_t *texData)
{
// Position: texture RAM is arranged as 2 2048x1024 texel sheets
uint32_t x = 32*(header&0x3F);
uint32_t y = 32*((header>>7)&0x1F);
uint32_t page = (header>>20)&1;
y += page*1024; // treat page as additional Y bit (one 2048x2048 sheet)
// Texture size and bit depth
uint32_t width = 32<<((header>>14)&7);
uint32_t height = 32<<((header>>17)&7);
uint32_t bytesPerTexel;
if ((header&0x00800000)) // 16 bits per texel
bytesPerTexel = 2;
else // 8 bits
{
bytesPerTexel = 1;
//printf("8-bit textures!\n");
}
// Mipmaps
uint32_t mipYPos = 32*((header>>7)&0x1F);
// Process texture data
DebugLog("Real3D: Texture upload: pos=(%d,%d) size=(%d,%d), %d-bit\n", x, y, width, height, bytesPerTexel*8);
//printf("Real3D: Texture upload: pos=(%d,%d) size=(%d,%d), %d-bit\n", x, y, width, height, bytesPerTexel*8);
switch ((header>>24)&0x0F)
{
case 0x00: // texture w/ mipmaps
{
StoreTexture(x, y, width, height, texData, header);
uint32_t mipWidth = width;
uint32_t mipHeight = height;
uint32_t mipNum = 0;
while((mipHeight>8) && (mipWidth>8))
{
if (bytesPerTexel == 1)
texData += (mipWidth*mipHeight)/2;
else
texData += (mipWidth*mipHeight);
mipWidth /= 2;
mipHeight /= 2;
uint32_t mipX = mipXBase[mipNum] + (x / mipDivisor[mipNum]);
uint32_t mipY = mipYBase[mipNum] + (mipYPos / mipDivisor[mipNum]);
if(page)
mipY += 1024;
mipNum++;
StoreTexture(mipX, mipY, mipWidth, mipHeight, (uint16_t *) texData, header);
}
break;
}
case 0x01: // texture w/out mipmaps
StoreTexture(x, y, width, height, texData, header);
break;
case 0x02: // mipmaps only
{
uint32_t mipWidth = width;
uint32_t mipHeight = height;
uint32_t mipNum = 0;
while((mipHeight>8) && (mipWidth>8))
{
mipWidth /= 2;
mipHeight /= 2;
uint32_t mipX = mipXBase[mipNum] + (x / mipDivisor[mipNum]);
uint32_t mipY = mipYBase[mipNum] + (mipYPos / mipDivisor[mipNum]);
if(page)
mipY += 1024;
mipNum++;
StoreTexture(mipX, mipY, mipWidth, mipHeight, texData, header);
if (bytesPerTexel == 1)
texData += (mipWidth*mipHeight)/2;
else
texData += (mipWidth*mipHeight);
}
break;
}
case 0x80: // MAME thinks these might be a gamma table
//break;
default: // unknown
DebugLog("Unknown texture format %02X\n", header>>24);
//printf("unknown texture format %02X\n", header>>24);
break;
}
}
/******************************************************************************
Basic Emulation Functions, Registers, Memory, and Texture FIFO
******************************************************************************/
void CReal3D::Flush(void)
{
commandPortWritten = true;
DebugLog("Real3D 88000000 written @ PC=%08X\n", ppc_get_pc());
// Upload textures (if any)
if (fifoIdx > 0)
{
for (uint32_t i = 0; i < fifoIdx; )
{
uint32_t size = 2+textureFIFO[i+0]/2;
size /= 4;
uint32_t header = textureFIFO[i+1]; // texture information header
// Spikeout seems to be uploading 0 length textures
if (0 == size)
{
DebugLog("Real3D: 0-length texture upload @ PC=%08X (%08X %08X %08X)\n", ppc_get_pc(), textureFIFO[i+0], textureFIFO[i+1], textureFIFO[i+2]);
break;
}
UploadTexture(header,(uint16_t *)&textureFIFO[i+2]);
DebugLog("Real3D: Texture upload completed: %X bytes (%X)\n", size*4, textureFIFO[i+0]);
i += size;
}
}
// Reset texture FIFO
fifoIdx = 0;
}
void CReal3D::WriteTextureFIFO(uint32_t data)
{
if (fifoIdx >= (0x100000/4))
{
if (!error)
ErrorLog("Overflow in Real3D texture FIFO!");
error = true;
}
else
textureFIFO[fifoIdx++] = data;
}
void CReal3D::WriteTexturePort(unsigned reg, uint32_t data)
{
if (step == 0x10)
{
uint32_t addr = data & 0xFFFFFF;
uint32_t num_words = (2+vrom[addr+0]/2) / 4;
if (!num_words)
{
DebugLog("Real3D: 0-length VROM texture upload @ PC=%08X (%08X)\n", ppc_get_pc(), data);
return;
}
for (uint32_t i = 0; i < num_words; i++)
WriteTextureFIFO(vrom[(addr + i) & 0xFFFFFF]);
}
else
{
if (m_vromTextureFIFOIdx == 2)
{
uint32_t addr = m_vromTextureFIFO[0];
uint32_t header = m_vromTextureFIFO[1];
UploadTexture(header, (const uint16_t *) &vrom[addr & 0xFFFFFF]);
m_vromTextureFIFOIdx = 0;
}
else
m_vromTextureFIFO[m_vromTextureFIFOIdx++] = data;
}
}
void CReal3D::WriteLowCullingRAM(uint32_t addr, uint32_t data)
{
if (g_Config.gpuMultiThreaded)
MARK_DIRTY(cullingRAMLoDirty, addr);
cullingRAMLo[addr/4] = data;
}
void CReal3D::WriteHighCullingRAM(uint32_t addr, uint32_t data)
{
if (g_Config.gpuMultiThreaded)
MARK_DIRTY(cullingRAMHiDirty, addr);
cullingRAMHi[addr/4] = data;
}
void CReal3D::WritePolygonRAM(uint32_t addr, uint32_t data)
{
if (g_Config.gpuMultiThreaded)
MARK_DIRTY(polyRAMDirty, addr);
polyRAM[addr/4] = data;
}
// Registers seem to range from 0x00 to around 0x3C but they are not understood
uint32_t CReal3D::ReadRegister(unsigned reg)
{
DebugLog("Real3D: Read reg %X\n", reg);
if (reg == 0)
{
uint32_t status = (ppc_total_cycles() >= statusChange ? 0x0 : 0x2);
return 0xFFFFFFFD|status;
}
else
return 0xFFFFFFFF;
}
uint32_t CReal3D::ReadPCIConfigSpace(unsigned device, unsigned reg, unsigned bits, unsigned offset)
{
uint32_t d;
if ((bits==8))
{
DebugLog("Real3D: %d-bit PCI read request for reg=%02X\n", bits, reg);
return 0;
}
// This is a little endian device, must return little endian words
switch (reg)
{
case 0x00: // Device ID and Vendor ID
d = FLIPENDIAN32(pciID);
switch (bits)
{
case 8:
d >>= (3-offset)*8; // offset will be 0-3; select appropriate byte
d &= 0xFF;
break;
case 16:
d >>= (2-offset)*8; // offset will be 0 or 2 only; select either high or low word
d &= 0xFFFF;
break;
default:
break;
}
DebugLog("Real3D: PCI ID read. Returning %X (%d-bits). PC=%08X, LR=%08X\n", d, bits, ppc_get_pc(), ppc_get_lr());
return d;
default:
DebugLog("Real3D: PCI read request for reg=%02X (%d-bit)\n", reg, bits);
break;
}
return 0;
}
void CReal3D::WritePCIConfigSpace(unsigned device, unsigned reg, unsigned bits, unsigned offset, uint32_t data)
{
DebugLog("Real3D: PCI %d-bit write request for reg=%02X, data=%08X\n", bits, reg, data);
}
void CReal3D::Reset(void)
{
error = false;
commandPortWritten = false;
commandPortWrittenRO = false;
queuedUploadTextures.clear();
queuedUploadTexturesRO.clear();
fifoIdx = 0;
m_vromTextureFIFOIdx = 0;
tapState = 0;
tapIDSize = 197;
dmaStatus = 0;
dmaUnknownReg = 0;
unsigned memSize = (g_Config.gpuMultiThreaded ? MEMORY_POOL_SIZE : MEM_POOL_SIZE_RW);
memset(memoryPool, 0, memSize);
memset(m_vromTextureFIFO, 0, sizeof(m_vromTextureFIFO));
DebugLog("Real3D reset\n");
}
/******************************************************************************
Configuration, Initialization, and Shutdown
******************************************************************************/
void CReal3D::AttachRenderer(IRender3D *Render3DPtr)
{
Render3D = Render3DPtr;
// If multi-threaded, attach read-only snapshots to renderer instead of real ones
if (g_Config.gpuMultiThreaded)
Render3D->AttachMemory(cullingRAMLoRO, cullingRAMHiRO, polyRAMRO, vrom, textureRAMRO);
else
Render3D->AttachMemory(cullingRAMLo, cullingRAMHi, polyRAM, vrom, textureRAM);
Render3D->SetStep(step);
DebugLog("Real3D attached a Render3D object\n");
}
void CReal3D::SetStep(int stepID)
{
step = stepID;
if ((step!=0x10) && (step!=0x15) && (step!=0x20) && (step!=0x21))
{
DebugLog("Real3D: Unrecognized stepping: %d.%d\n", (step>>4)&0xF, step&0xF);
step = 0x10;
}
// Set PCI ID
if (step < 0x20)
pciID = 0x16C311DB; // vendor 0x11DB = Sega
else
pciID = 0x178611DB;
// Pass to renderer
if (Render3D != NULL)
Render3D->SetStep(step);
DebugLog("Real3D set to Step %d.%d\n", (step>>4)&0xF, step&0xF);
}
bool CReal3D::Init(const uint8_t *vromPtr, IBus *BusObjectPtr, CIRQ *IRQObjectPtr, unsigned dmaIRQBit)
{
uint32_t memSize = (g_Config.gpuMultiThreaded ? MEMORY_POOL_SIZE : MEM_POOL_SIZE_RW);
float memSizeMB = (float)memSize/(float)0x100000;
// IRQ and bus objects
Bus = BusObjectPtr;
IRQ = IRQObjectPtr;
dmaIRQ = dmaIRQBit;
// Allocate all Real3D RAM regions
memoryPool = new(std::nothrow) uint8_t[memSize];
if (NULL == memoryPool)
return ErrorLog("Insufficient memory for Real3D object (needs %1.1f MB).", memSizeMB);
// Set up main pointers
cullingRAMLo = (uint32_t *) &memoryPool[OFFSET_8C];
cullingRAMHi = (uint32_t *) &memoryPool[OFFSET_8E];
polyRAM = (uint32_t *) &memoryPool[OFFSET_98];
textureRAM = (uint16_t *) &memoryPool[OFFSET_TEXRAM];
textureFIFO = (uint32_t *) &memoryPool[OFFSET_TEXFIFO];
// If multi-threaded, set up pointers for read-only snapshots and dirty page arrays too
if (g_Config.gpuMultiThreaded)
{
cullingRAMLoRO = (uint32_t *) &memoryPool[OFFSET_8C_RO];
cullingRAMHiRO = (uint32_t *) &memoryPool[OFFSET_8E_RO];
polyRAMRO = (uint32_t *) &memoryPool[OFFSET_98_RO];
textureRAMRO = (uint16_t *) &memoryPool[OFFSET_TEXRAM_RO];
cullingRAMLoDirty = (uint8_t *) &memoryPool[OFFSET_8C_DIRTY];
cullingRAMHiDirty = (uint8_t *) &memoryPool[OFFSET_8E_DIRTY];
polyRAMDirty = (uint8_t *) &memoryPool[OFFSET_98_DIRTY];
textureRAMDirty = (uint8_t *) &memoryPool[OFFSET_TEXRAM_DIRTY];
}
// VROM pointer passed to us
vrom = (uint32_t *) vromPtr;
DebugLog("Initialized Real3D (allocated %1.1f MB)\n", memSizeMB);
return OKAY;
}
CReal3D::CReal3D(void)
{
Render3D = NULL;
memoryPool = NULL;
cullingRAMLo = NULL;
cullingRAMHi = NULL;
polyRAM = NULL;
textureRAM = NULL;
textureFIFO = NULL;
vrom = NULL;
error = false;
fifoIdx = 0;
tapState = 0;
tapIDSize = 197;
m_vromTextureFIFO[0] = 0;
m_vromTextureFIFO[1] = 0;
m_vromTextureFIFOIdx = 0;
DebugLog("Built Real3D\n");
}
/*
* CReal3D::~CReal3D(void):
*
* Destructor.
*/
CReal3D::~CReal3D(void)
{
// Dump memory
#if 0
FILE *fp;
fp = fopen("8c000000", "wb");
if (NULL != fp)
{
fwrite(cullingRAMLo, sizeof(uint8_t), 0x400000, fp);
fclose(fp);
printf("dumped %s\n", "8c000000");
}
else
printf("unable to dump %s\n", "8c000000");
fp = fopen("8e000000", "wb");
if (NULL != fp)
{
fwrite(cullingRAMHi, sizeof(uint8_t), 0x100000, fp);
fclose(fp);
printf("dumped %s\n", "8e000000");
}
else
printf("unable to dump %s\n", "8e000000");
fp = fopen("98000000", "wb");
if (NULL != fp)
{
fwrite(polyRAM, sizeof(uint8_t), 0x400000, fp);
fclose(fp);
printf("dumped %s\n", "98000000");
}
else
printf("unable to dump %s\n", "98000000");
fp = fopen("texram", "wb");
if (NULL != fp)
{
fwrite(textureRAM, sizeof(uint8_t), 0x800000, fp);
fclose(fp);
printf("dumped %s\n", "texram");
}
else
printf("unable to dump %s\n", "texram");
#endif
Render3D = NULL;
if (memoryPool != NULL)
{
delete [] memoryPool;
memoryPool = NULL;
}
cullingRAMLo = NULL;
cullingRAMHi = NULL;
polyRAM = NULL;
textureRAM = NULL;
textureFIFO = NULL;
vrom = NULL;
DebugLog("Destroyed Real3D\n");
}