/** ** Supermodel ** A Sega Model 3 Arcade Emulator. ** Copyright 2011 Bart Trzynadlowski, Nik Henson ** ** This file is part of Supermodel. ** ** Supermodel is free software: you can redistribute it and/or modify it under ** the terms of the GNU General Public License as published by the Free ** Software Foundation, either version 3 of the License, or (at your option) ** any later version. ** ** Supermodel is distributed in the hope that it will be useful, but WITHOUT ** ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ** FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ** more details. ** ** You should have received a copy of the GNU General Public License along ** with Supermodel. If not, see . **/ /* * Real3D.cpp * * The Model 3's Real3D-based graphics hardware. Based on the Real3D Pro-1000 * family of image generators. * * PCI IDs * ------- * It appears that Step 2.0 returns a different PCI ID depending on whether * the PCI configuration space or DMA register are accessed. For example, * Virtual On 2 expects 0x178611DB from the PCI configuration header but * 0x16C311DB from the DMA device. * * To-Do List * ---------- * - For consistency, the status registers should probably be byte reversed (this is a * little endian device), forcing the Model3 Read32/Write32 handlers to * manually reverse the data. This keeps with the convention for VRAM. * - Keep an eye out for games writing non-mipmap textures to the mipmap area. * The render currently cannot cope with this. */ #include "Real3D.h" #include "Supermodel.h" #include "JTAG.h" #include "CPU/PowerPC/ppc.h" #include "Util/BMPFile.h" #include #include // Macros that divide memory regions into pages and mark them as dirty when they are written to #define PAGE_WIDTH 12 #define PAGE_SIZE (1<>(PAGE_WIDTH+3)] |= 1<<((addr>>PAGE_WIDTH)&7) // Offsets of memory regions within Real3D memory pool #define OFFSET_8C 0x0000000 // 4 MB, culling RAM low (at 0x8C000000) #define OFFSET_8E 0x0400000 // 1 MB, culling RAM high (at 0x8E000000) #define OFFSET_98 0x0500000 // 4 MB, polygon RAM (at 0x98000000) #define OFFSET_TEXRAM 0x0900000 // 8 MB, texture RAM #define OFFSET_TEXFIFO 0x1100000 // 1 MB, texture FIFO #define MEM_POOL_SIZE_RW (0x400000+0x100000+0x400000+0x800000+0x100000) #define OFFSET_8C_RO 0x1200000 // 4 MB, culling RAM low (at 0x8C000000) [read-only snapshot] #define OFFSET_8E_RO 0x1600000 // 1 MB, culling RAM high (at 0x8E000000) [read-only snapshot] #define OFFSET_98_RO 0x1700000 // 4 MB, polygon RAM (at 0x98000000) [read-only snapshot] #define OFFSET_TEXRAM_RO 0x1B00000 // 8 MB, texture RAM [read-only snapshot] #define MEM_POOL_SIZE_RO (0x400000+0x100000+0x400000+0x800000) #define OFFSET_8C_DIRTY 0x2300000 #define OFFSET_8E_DIRTY (OFFSET_8C_DIRTY+DIRTY_SIZE(0x400000)) #define OFFSET_98_DIRTY (OFFSET_8E_DIRTY+DIRTY_SIZE(0x100000)) #define OFFSET_TEXRAM_DIRTY (OFFSET_98_DIRTY+DIRTY_SIZE(0x400000)) #define MEM_POOL_SIZE_DIRTY (DIRTY_SIZE(MEM_POOL_SIZE_RO)) #define MEMORY_POOL_SIZE (MEM_POOL_SIZE_RW+MEM_POOL_SIZE_RO+MEM_POOL_SIZE_DIRTY) static void UpdateRenderConfig(IRender3D *Render3D, uint64_t internalRenderConfig[]); /****************************************************************************** Save States ******************************************************************************/ void CReal3D::SaveState(CBlockFile *SaveState) { SaveState->NewBlock("Real3D", __FILE__); SaveState->Write(memoryPool, MEM_POOL_SIZE_RW); // Don't write out read-only snapshots or dirty page arrays SaveState->Write(&fifoIdx, sizeof(fifoIdx)); SaveState->Write(m_vromTextureFIFO, sizeof(m_vromTextureFIFO)); SaveState->Write(&dmaSrc, sizeof(dmaSrc)); SaveState->Write(&dmaDest, sizeof(dmaDest)); SaveState->Write(&dmaLength, sizeof(dmaLength)); SaveState->Write(&dmaData, sizeof(dmaData)); SaveState->Write(&dmaUnknownReg, sizeof(dmaUnknownReg)); SaveState->Write(&dmaStatus, sizeof(dmaStatus)); SaveState->Write(&dmaConfig, sizeof(dmaConfig)); // These used to be occupied by JTAG state SaveState->Write(m_internalRenderConfig, sizeof(m_internalRenderConfig)); SaveState->Write(commandPortWritten); SaveState->Write(&m_pingPong, sizeof(m_pingPong)); for (int i = 0; i < 39; i++) { uint8_t nul = 0; SaveState->Write(&nul, sizeof(uint8_t)); } SaveState->Write(&m_vromTextureFIFOIdx, sizeof(m_vromTextureFIFOIdx)); } void CReal3D::LoadState(CBlockFile *SaveState) { if (OKAY != SaveState->FindBlock("Real3D")) { ErrorLog("Unable to load Real3D GPU state. Save state file is corrupt."); return; } SaveState->Read(memoryPool, MEM_POOL_SIZE_RW); // If multi-threaded, update read-only snapshots too if (m_gpuMultiThreaded) UpdateSnapshots(true); Render3D->UploadTextures(0, 0, 0, 2048, 2048); SaveState->Read(&fifoIdx, sizeof(fifoIdx)); SaveState->Read(&m_vromTextureFIFO, sizeof(m_vromTextureFIFO)); SaveState->Read(&dmaSrc, sizeof(dmaSrc)); SaveState->Read(&dmaDest, sizeof(dmaDest)); SaveState->Read(&dmaLength, sizeof(dmaLength)); SaveState->Read(&dmaData, sizeof(dmaData)); SaveState->Read(&dmaUnknownReg, sizeof(dmaUnknownReg)); SaveState->Read(&dmaStatus, sizeof(dmaStatus)); SaveState->Read(&dmaConfig, sizeof(dmaConfig)); SaveState->Read(m_internalRenderConfig, sizeof(m_internalRenderConfig)); UpdateRenderConfig(Render3D, m_internalRenderConfig); SaveState->Read(&commandPortWritten); SaveState->Read(&m_pingPong, sizeof(m_pingPong)); for (int i = 0; i < 39; i++) { uint8_t nul; SaveState->Read(&nul, sizeof(uint8_t)); } SaveState->Read(&m_vromTextureFIFOIdx, sizeof(m_vromTextureFIFOIdx)); } /****************************************************************************** Rendering ******************************************************************************/ static void UpdateRenderConfig(IRender3D *Render3D, uint64_t internalRenderConfig[]) { bool noSunClamp = (internalRenderConfig[0] & 0x800000) != 0 && (internalRenderConfig[1] & 0x400000) != 0; bool shadeIsSigned = (internalRenderConfig[0] & 0x1) == 0; Render3D->SetSunClamp(!noSunClamp); Render3D->SetSignedShade(shadeIsSigned); } void CReal3D::BeginVBlank(int statusCycles) { // Calculate point at which status bit should change value. Currently the same timing is used for both the status bit in ReadRegister // and in WriteDMARegister32/ReadDMARegister32, however it may be that they are completely unrelated. It appears that step 1.x games // access just the former while step 2.x access the latter. It is not known yet what this bit/these bits actually represent. statusChange = ppc_total_cycles() + statusCycles; m_evenFrame = !m_evenFrame; } void CReal3D::EndVBlank(void) { error = false; // clear error (just needs to be done once per frame) } uint32_t CReal3D::SyncSnapshots(void) { // Update read-only copy of command port flag commandPortWrittenRO = commandPortWritten; commandPortWritten = false; if (!m_gpuMultiThreaded) return 0; // Update read-only queue queuedUploadTexturesRO = queuedUploadTextures; queuedUploadTextures.clear(); // Update read-only snapshots return UpdateSnapshots(false); } uint32_t CReal3D::UpdateSnapshot(bool copyWhole, uint8_t *src, uint8_t *dst, unsigned size, uint8_t *dirty) { unsigned dirtySize = DIRTY_SIZE(size); if (copyWhole) { // If updating whole region, then just copy all data in one go memcpy(dst, src, size); memset(dirty, 0, dirtySize); return size; } else { // Otherwise, loop through dirty pages array to find out what needs to be updated and copy only those parts uint32_t copied = 0; uint8_t *pSrc = src; uint8_t *pDst = dst; for (unsigned i = 0; i < dirtySize; i++) { uint8_t d = dirty[i]; if (d) { for (unsigned j = 0; j < 8; j++) { if (d&1) { // If not at very end of region, then copy an extra 4 bytes to allow for a possible 32-bit overlap uint32_t toCopy = (i < dirtySize - 1 || j < 7 ? PAGE_SIZE + 4 : PAGE_SIZE); memcpy(pDst, pSrc, toCopy); copied += toCopy; } d >>= 1; pSrc += PAGE_SIZE; pDst += PAGE_SIZE; } dirty[i] = 0; } else { pSrc += 8 * PAGE_SIZE; pDst += 8 * PAGE_SIZE; } } return copied; } } uint32_t CReal3D::UpdateSnapshots(bool copyWhole) { // Update all memory region snapshots uint32_t cullLoCopied = UpdateSnapshot(copyWhole, (uint8_t*)cullingRAMLo, (uint8_t*)cullingRAMLoRO, 0x400000, cullingRAMLoDirty); uint32_t cullHiCopied = UpdateSnapshot(copyWhole, (uint8_t*)cullingRAMHi, (uint8_t*)cullingRAMHiRO, 0x100000, cullingRAMHiDirty); uint32_t polyCopied = UpdateSnapshot(copyWhole, (uint8_t*)polyRAM, (uint8_t*)polyRAMRO, 0x400000, polyRAMDirty); uint32_t textureCopied = UpdateSnapshot(copyWhole, (uint8_t*)textureRAM, (uint8_t*)textureRAMRO, 0x800000, textureRAMDirty); //printf("Read3D copied - cullLo:%4uK, cullHi:%4uK, poly:%4uK, texture:%4uK\n", cullLoCopied / 1024, cullHiCopied / 1024, polyCopied / 1024, textureCopied / 1024); return cullLoCopied + cullHiCopied + polyCopied + textureCopied; } void CReal3D::BeginFrame(void) { // If multi-threaded, perform now any queued texture uploads to renderer before rendering begins if (m_gpuMultiThreaded) { for (const auto &it : queuedUploadTexturesRO) { Render3D->UploadTextures(it.level, it.x, it.y, it.width, it.height); } // done syncing data queuedUploadTexturesRO.clear(); } Render3D->BeginFrame(); } void CReal3D::RenderFrame(void) { //if (commandPortWrittenRO) Render3D->RenderFrame(); } void CReal3D::EndFrame(void) { Render3D->EndFrame(); } /****************************************************************************** Texture Uploading and Decoding ******************************************************************************/ // Mipmap coordinates for each reduction level (within a single 2048x1024 page) static const int mipXBase[] = { 0, 1024, 1536, 1792, 1920, 1984, 2016, 2032, 2040, 2044, 2046, 2047 }; static const int mipYBase[] = { 0, 512, 768, 896, 960, 992, 1008, 1016, 1020, 1022, 1023 }; static const int mipDivisor[] = { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024 }; // Tables of texel offsets corresponding to an NxN texel texture tile static const unsigned decode8x8[64] = { 1, 0, 5, 4, 9, 8,13,12, 3, 2, 7, 6,11,10,15,14, 17,16,21,20,25,24,29,28, 19,18,23,22,27,26,31,30, 33,32,37,36,41,40,45,44, 35,34,39,38,43,42,47,46, 49,48,53,52,57,56,61,60, 51,50,55,54,59,58,63,62 }; static const unsigned decode8x4[32] = { 1, 0, 5, 4, 3, 2, 7, 6, 9, 8,13,12, 11,10,15,14, 17,16,21,20, 19,18,23,22, 25,24,29,28, 27,26,31,30 }; static const unsigned decode8x2[16] = { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }; static const unsigned decode8x1[8] = { 1, 3, 0, 2, 5, 7, 4, 6 }; void CReal3D::StoreTexture(unsigned level, unsigned xPos, unsigned yPos, unsigned width, unsigned height, const uint16_t *texData, bool sixteenBit, bool writeLSB, bool writeMSB, uint32_t &texDataOffset) { uint32_t tileX = (std::min)(8u, width); uint32_t tileY = (std::min)(8u, height); texDataOffset = 0; if (sixteenBit) // 16-bit textures { // Outer 2 loops: NxN tiles for (uint32_t y = yPos; y < (yPos + height); y += tileY) { for (uint32_t x = xPos; x < (xPos + width); x += tileX) { // Inner 2 loops: NxN texels for the current tile uint32_t destOffset = y * 2048 + x; for (uint32_t yy = 0; yy < tileY; yy++) { for (uint32_t xx = 0; xx < tileX; xx++) { if (m_gpuMultiThreaded) MARK_DIRTY(textureRAMDirty, destOffset * 2); if (tileX == 1) texData -= tileY; if (tileY == 1) texData -= tileX; if (tileX == 8) textureRAM[destOffset++] = texData[decode8x8[yy * tileX + xx]]; else if (tileX == 4) textureRAM[destOffset++] = texData[decode8x4[yy * tileX + xx]]; else if (tileX == 2) textureRAM[destOffset++] = texData[decode8x2[yy * tileX + xx]]; else if (tileX == 1) textureRAM[destOffset++] = texData[decode8x1[yy * tileX + xx]]; texDataOffset++; } destOffset += 2048 - tileX; // next line } texData += tileY * tileX; // next tile } } } else // 8-bit textures { /* * 8-bit textures appear to be unpacked into 16-bit words in the * texture RAM. Oddly, the rows of the decoding table seem to be * swapped. */ if (writeLSB && writeMSB) // write to both? DebugLog("Observed 8-bit texture with byte_select=3!"); // Outer 2 loops: NxN tiles const uint8_t byteSelect = (uint8_t)writeLSB | ((uint8_t)writeMSB << 1); uint16_t tempData; const uint16_t byteMask[4] = {0xFFFF, 0xFF00, 0x00FF, 0x0000}; for (uint32_t y = yPos; y < (yPos + height); y += tileY) { for (uint32_t x = xPos; x < (xPos + width); x += tileX) { // Inner 2 loops: NxN texels for the current tile uint32_t destOffset = y * 2048 + x; for (uint32_t yy = 0; yy < tileY; yy++) { for (uint32_t xx = 0; xx < tileX; xx++) { if (writeLSB | writeMSB) { if (m_gpuMultiThreaded) MARK_DIRTY(textureRAMDirty, destOffset * 2); textureRAM[destOffset] &= byteMask[byteSelect]; const uint8_t shift = (8 * ((xx & 1) ^ 1)); const uint8_t index = (yy ^ 1) * tileX + (xx ^ 1) - (tileX & 1); if (tileX == 1) texData -= tileY; if (tileY == 1) texData -= tileX; if (tileX == 8) tempData = (texData[decode8x8[index] / 2] >> shift) & 0xFF; else if (tileX == 4) tempData = (texData[decode8x4[index] / 2] >> shift) & 0xFF; else if (tileX == 2) tempData = (texData[decode8x2[index] / 2] >> shift) & 0xFF; else if (tileX == 1) tempData = (texData[decode8x1[index] / 2] >> shift) & 0xFF; tempData |= tempData << 8; tempData &= byteMask[byteSelect] ^ 0xFFFF; textureRAM[destOffset] |= tempData; } destOffset++; } destOffset += 2048 - tileX; // next line } uint32_t offset = (std::max)(1u, (tileY * tileX) / 2); texData += offset; // next tile texDataOffset += offset; // next tile } } } // Signal to renderer that textures have changed // TO-DO: mipmaps? What if a game writes non-mipmap textures to mipmap area? if (m_gpuMultiThreaded) { // If multi-threaded, then queue calls to UploadTextures for render thread to perform at beginning of next frame QueuedUploadTextures upl; upl.level = level; upl.x = xPos; upl.y = yPos; upl.width = width; upl.height = height; queuedUploadTextures.push_back(upl); } else Render3D->UploadTextures(level, xPos, yPos, width, height); } /* Texture header: -------- -------- -------- --xxxxxx X-position -------- -------- ----xxxx x------- Y-position -------- -------x xx------ -------- Width -------- ----xxx- -------- -------- Height -------- ---x---- -------- -------- Texture page -------- --x----- -------- -------- Write 8-bit data to the lower byte of texel -------- -x------ -------- -------- Write 8-bit data to the upper byte of texel -------- x------- -------- -------- Bitdepth, 0 = 8-bit, 1 = 16-bit xxxxxxxx -------- -------- -------- Texture type: 0x00 = texture with mipmaps 0x01 = texture without mipmaps 0x02 = only mipmaps 0x80 = possibly gamma table */ // Texture data will be in little endian format void CReal3D::UploadTexture(uint32_t header, const uint16_t *texData) { // Position: texture RAM is arranged as 2 2048x1024 texel sheets uint32_t x = 32 * (header & 0x3F); uint32_t y = 32 * ((header >> 7) & 0x1F); uint32_t page = (header >> 20) & 1; uint32_t width = 32 << ((header >> 14) & 7); uint32_t height = 32 << ((header >> 17) & 7); uint32_t type = (header >> 24) & 0xFF; bool sixteenBit = (header >> 23) & 0x1; bool writeUpperByte = (header >> 22) & 0x1; bool writeLowerByte = (header >> 21) & 0x1; uint32_t offset = 0; switch (type) { case 0x00: // texture w/ mipmaps case 0x01: // texture w/out mipmaps StoreTexture(0, x, y + (page * 1024), width, height, texData, sixteenBit, writeLowerByte, writeUpperByte, offset); texData += offset; if (type == 0x01) { break; } case 0x02: // mipmaps only { for (int i = 1; width > 0 && height > 0; i++) { int xPos = mipXBase[i] + (x / mipDivisor[i]); int yPos = mipYBase[i] + (y / mipDivisor[i]); width /= 2; height /= 2; StoreTexture(i, xPos, yPos + (page * 1024), width, height, texData, sixteenBit, writeLowerByte, writeUpperByte, offset); texData += offset; } break; } case 0x80: // MAME thinks these might be a gamma table (vf3 uploads this as the first texture) break; default: // unknown DebugLog("Unknown texture format %02X\n", type); break; } } /****************************************************************************** DMA Device Register 0xC: ------------- +---+---+---+---+---+---+---+---+ |BUS|???|???|???|???|???|???|IRQ| +---+---+---+---+---+---+---+---+ BUS: Busy (see von2 0x18A104) if 1. IRQ: IRQ pending. ******************************************************************************/ void CReal3D::DMACopy(void) { DebugLog("Real3D DMA copy (PC=%08X, LR=%08X): %08X -> %08X, %X %s\n", ppc_get_pc(), ppc_get_lr(), dmaSrc, dmaDest, dmaLength*4, (dmaConfig&0x80)?"(byte reversed)":""); //printf("Real3D DMA copy (PC=%08X, LR=%08X): %08X -> %08X, %X %s\n", ppc_get_pc(), ppc_get_lr(), dmaSrc, dmaDest, dmaLength*4, (dmaConfig&0x80)?"(byte reversed)":""); if ((dmaConfig&0x80)) // reverse bytes { while (dmaLength != 0) { uint32_t data = Bus->Read32(dmaSrc); Bus->Write32(dmaDest, FLIPENDIAN32(data)); dmaSrc += 4; dmaDest += 4; --dmaLength; } } else { while (dmaLength != 0) { Bus->Write32(dmaDest, Bus->Read32(dmaSrc)); dmaSrc += 4; dmaDest += 4; --dmaLength; } } } uint8_t CReal3D::ReadDMARegister8(unsigned reg) { switch (reg) { case 0xC: // status return dmaStatus; case 0xE: // configuration return dmaConfig; default: break; } DebugLog("Real3D: ReadDMARegister8: reg=%X\n", reg); return 0; } void CReal3D::WriteDMARegister8(unsigned reg, uint8_t data) { switch (reg) { case 0xD: // IRQ acknowledge if ((data&1)) { dmaStatus &= ~1; IRQ->Deassert(dmaIRQ); } break; case 0xE: // configuration dmaConfig = data; break; default: DebugLog("Real3D: WriteDMARegister8: reg=%X, data=%02X\n", reg, data); break; } //DebugLog("Real3D: WriteDMARegister8: reg=%X, data=%02X\n", reg, data); } uint32_t CReal3D::ReadDMARegister32(unsigned reg) { switch (reg) { case 0x14: // command result return dmaData; default: break; } DebugLog("Real3D: ReadDMARegister32: reg=%X\n", reg); return 0; } void CReal3D::WriteDMARegister32(unsigned reg, uint32_t data) { switch (reg) { case 0x00: // DMA source address dmaSrc = data; break; case 0x04: // DMA destination address dmaDest = data; break; case 0x08: // DMA length dmaLength = data; DMACopy(); if (dmaConfig & 1) // only fire an IRQ if the low bit of dmaConfig is set { dmaStatus |= 1; IRQ->Assert(dmaIRQ); } break; case 0x10: // command register if ((data&0x20000000)) // DMA ID command { dmaData = pciID; DebugLog("Real3D: DMA ID command issued (ATTENTION: make sure we're returning the correct value), PC=%08X, LR=%08X\n", ppc_get_pc(), ppc_get_lr()); } else if ((data&0x80000000)) { dmaData = ReadRegister(data & 0x3F); } break; case 0x14: // ? dmaData = 0xFFFFFFFF; break; default: DebugLog("Real3D: WriteDMARegister32: reg=%X, data=%08X\n", reg, data); break; } //DebugLog("Real3D: WriteDMARegister32: reg=%X, data=%08X\n", reg, data); } /****************************************************************************** Basic Emulation Functions, Registers, Memory, and Texture FIFO ******************************************************************************/ void CReal3D::Flush(void) { commandPortWritten = true; DebugLog("Real3D 88000000 written @ PC=%08X\n", ppc_get_pc()); // Upload textures (if any) if (fifoIdx > 0) { for (uint32_t i = 0; i < fifoIdx; ) { uint32_t size = 2+textureFIFO[i+0]/2; size /= 4; uint32_t header = textureFIFO[i+1]; // texture information header // Spikeout seems to be uploading 0 length textures if (0 == size) { DebugLog("Real3D: 0-length texture upload @ PC=%08X (%08X %08X %08X)\n", ppc_get_pc(), textureFIFO[i+0], textureFIFO[i+1], textureFIFO[i+2]); break; } UploadTexture(header,(uint16_t *)&textureFIFO[i+2]); DebugLog("Real3D: Texture upload completed: %X bytes (%X)\n", size*4, textureFIFO[i+0]); i += size; } } // Reset texture FIFO fifoIdx = 0; } void CReal3D::WriteTextureFIFO(uint32_t data) { if (fifoIdx >= (0x100000/4)) { if (!error) ErrorLog("Overflow in Real3D texture FIFO!"); error = true; } else textureFIFO[fifoIdx++] = data; } void CReal3D::WriteTexturePort(unsigned reg, uint32_t data) { if (step == 0x10) { uint32_t addr = data & 0xFFFFFF; uint32_t num_words = (2+vrom[addr+0]/2) / 4; if (!num_words) { DebugLog("Real3D: 0-length VROM texture upload @ PC=%08X (%08X)\n", ppc_get_pc(), data); return; } for (uint32_t i = 0; i < num_words; i++) WriteTextureFIFO(vrom[(addr + i) & 0xFFFFFF]); } else { if (m_vromTextureFIFOIdx == 2) { uint32_t addr = m_vromTextureFIFO[0]; uint32_t header = m_vromTextureFIFO[1]; UploadTexture(header, (const uint16_t *) &vrom[addr & 0xFFFFFF]); m_vromTextureFIFOIdx = 0; } else m_vromTextureFIFO[m_vromTextureFIFOIdx++] = data; } } void CReal3D::WriteLowCullingRAM(uint32_t addr, uint32_t data) { if (m_gpuMultiThreaded) MARK_DIRTY(cullingRAMLoDirty, addr); cullingRAMLo[addr/4] = data; } void CReal3D::WriteHighCullingRAM(uint32_t addr, uint32_t data) { if (m_gpuMultiThreaded) MARK_DIRTY(cullingRAMHiDirty, addr); cullingRAMHi[addr/4] = data; } void CReal3D::WritePolygonRAM(uint32_t addr, uint32_t data) { if (m_gpuMultiThreaded) MARK_DIRTY(polyRAMDirty, addr); polyRAM[addr/4] = data; } // Internal registers accessible via JTAG port void CReal3D::WriteJTAGRegister(uint64_t instruction, uint64_t data) { if (instruction == CJTAG::Instruction::SetReal3DRenderConfig0) m_internalRenderConfig[0] = data; else if (instruction == CJTAG::Instruction::SetReal3DRenderConfig1) m_internalRenderConfig[1] = data; UpdateRenderConfig(Render3D, m_internalRenderConfig); } // Registers correspond to the Stat_Pckt in the Real3d sdk /* Stat Packet 0x00: xxxx---- -------- -------- -------- spare1 ----x--- -------- -------- -------- gp_done -----x-- -------- -------- -------- dp_done ------x- -------- -------- -------- ping_pong -------x -------- -------- -------- update_done -------- x------- -------- -------- rend_done -------- -xxxxxxx xxxxxxxx xxxxxxxx tot_clks 23bit val (0x7FFFFF). This is a 33.33mhz clock value. Think this is the time the GPU takes to process the frame, used by software to estimate the frame rate. 0x01: -------- -------- -------- -------- spare2 -------- -xxxxxxx xxxxxxxx xxxxxxxx vpt0_clks - not sure what this is used for (if anything). It's not used by the SDK 0x02: -------- -------- -------- -------- spare3 -------- -xxxxxxx xxxxxxxx xxxxxxxx vpt1_clks - not sure what this is used for (if anything). It's not used by the SDK 0x03: -------- -------- -------- -------- spare4 -------- -xxxxxxx xxxxxxxx xxxxxxxx vpt2_clks - not sure what this is used for (if anything). It's not used by the SDK 0x04: -------- -------- -------- -------- spare5 -------- -xxxxxxx xxxxxxxx xxxxxxxx vpt3_clks - not sure what this is used for (if anything). It's not used by the SDK 0x05: range0 (float) Line of sight value for priority level 0 0x06: range1 (float) Line of sight value for priority level 1 0x07: range2 (float) Line of sight value for priority level 2 0x08: range3 (float) Line of sight value for priority level 3 0x09: ls_cycle (uint32) Think this is the frame number, don't think it's used by model3, since games never read this far into memory */ uint32_t CReal3D::ReadRegister(unsigned reg) { DebugLog("Real3D: Read reg %X\n", reg); if (reg == 0) { uint32_t ping_pong; if (m_evenFrame) { ping_pong = (ppc_total_cycles() >= statusChange ? 0x0 : 0x02000000); } else { ping_pong = (ppc_total_cycles() >= statusChange ? 0x02000000 : 0x0); } return 0xfdffffff | ping_pong; } else if (reg >= 20 && reg<=32) { // line of sight registers int index = (reg - 20) / 4; float val = Render3D->GetLosValue(index); if (val) { //val = 1.0f / val; // test program indicate z values are 1 over return 0xffffffff; // infinity } return *(uint32_t*)(&val); } return 0xffffffff; } // TODO: This returns data in the way that the PowerPC bus expects. Other functions in CReal3D should // return data this way. uint32_t CReal3D::ReadPCIConfigSpace(unsigned device, unsigned reg, unsigned bits, unsigned offset) { uint32_t d; if ((bits==8)) { DebugLog("Real3D: %d-bit PCI read request for reg=%02X\n", bits, reg); return 0; } // This is a little endian device, must return little endian words switch (reg) { case 0x00: // Device ID and Vendor ID d = FLIPENDIAN32(pciID); switch (bits) { case 8: d >>= (3-offset)*8; // offset will be 0-3; select appropriate byte d &= 0xFF; break; case 16: d >>= (2-offset)*8; // offset will be 0 or 2 only; select either high or low word d &= 0xFFFF; break; default: break; } DebugLog("Real3D: PCI ID read. Returning %X (%d-bits). PC=%08X, LR=%08X\n", d, bits, ppc_get_pc(), ppc_get_lr()); return d; default: DebugLog("Real3D: PCI read request for reg=%02X (%d-bit)\n", reg, bits); break; } return 0; } void CReal3D::WritePCIConfigSpace(unsigned device, unsigned reg, unsigned bits, unsigned offset, uint32_t data) { DebugLog("Real3D: PCI %d-bit write request for reg=%02X, data=%08X\n", bits, reg, data); } void CReal3D::Reset(void) { error = false; m_pingPong = 0; commandPortWritten = false; commandPortWrittenRO = false; queuedUploadTextures.clear(); queuedUploadTexturesRO.clear(); fifoIdx = 0; m_vromTextureFIFOIdx = 0; dmaSrc = 0; dmaDest = 0; dmaLength = 0; dmaData = 0; dmaUnknownReg = 0; dmaStatus = 0; dmaConfig = 0; unsigned memSize = (m_gpuMultiThreaded ? MEMORY_POOL_SIZE : MEM_POOL_SIZE_RW); memset(memoryPool, 0, memSize); memset(m_vromTextureFIFO, 0, sizeof(m_vromTextureFIFO)); memset(m_internalRenderConfig, 0, sizeof(m_internalRenderConfig)); DebugLog("Real3D reset\n"); } /****************************************************************************** Configuration, Initialization, and Shutdown ******************************************************************************/ void CReal3D::AttachRenderer(IRender3D *Render3DPtr) { Render3D = Render3DPtr; // If mult-threaded, attach read-only snapshots to renderer instead of real ones if (m_gpuMultiThreaded) Render3D->AttachMemory(cullingRAMLoRO, cullingRAMHiRO, polyRAMRO, vrom, textureRAMRO); else Render3D->AttachMemory(cullingRAMLo, cullingRAMHi, polyRAM, vrom, textureRAM); Render3D->SetStepping(step); DebugLog("Real3D attached a Render3D object\n"); } uint32_t CReal3D::GetASICIDCode(ASIC asic) const { auto it = m_asicID.find(asic); return it == m_asicID.end() ? 0 : it->second; } void CReal3D::SetStepping(int stepping, uint32_t pciIDValue) { step = stepping; if ((step!=0x10) && (step!=0x15) && (step!=0x20) && (step!=0x21)) { DebugLog("Real3D: Unrecognized stepping: %d.%d\n", (step>>4)&0xF, step&0xF); step = 0x10; } // Set PCI ID pciID = pciIDValue; // Pass to renderer if (Render3D != NULL) Render3D->SetStepping(step); // Set ASIC ID codes m_asicID.clear(); if (step == 0x10) { m_asicID = decltype(m_asicID) { { ASIC::Mercury, 0x216c3057 }, { ASIC::Venus, 0x116c4057 }, { ASIC::Earth, 0x216c5057 }, { ASIC::Mars, 0x116c6057 }, { ASIC::Jupiter, 0x116c7057 } }; } else if (step == 0x15) { m_asicID = decltype(m_asicID) { { ASIC::Mercury, 0x316c3057 }, { ASIC::Venus, 0x216c4057 }, { ASIC::Earth, 0x316c5057 }, { ASIC::Mars, 0x216c6057 }, { ASIC::Jupiter, 0x316c7057 } }; } else if (step >= 0x20) { m_asicID = decltype(m_asicID) { { ASIC::Mercury, 0x416c3057 }, { ASIC::Venus, 0x316c4057 }, // skichamp @ pc=0xa89f4, this value causes 'NO DAUGHTER BOARD' message { ASIC::Earth, 0x416c5057 }, { ASIC::Mars, 0x316c6057 }, { ASIC::Jupiter, 0x416c7057 } }; } DebugLog("Real3D set to Step %d.%d\n", (step>>4)&0xF, step&0xF); } bool CReal3D::Init(const uint8_t *vromPtr, IBus *BusObjectPtr, CIRQ *IRQObjectPtr, unsigned dmaIRQBit) { uint32_t memSize = (m_config["GPUMultiThreaded"].ValueAs() ? MEMORY_POOL_SIZE : MEM_POOL_SIZE_RW); float memSizeMB = (float)memSize/(float)0x100000; // IRQ and bus objects Bus = BusObjectPtr; IRQ = IRQObjectPtr; dmaIRQ = dmaIRQBit; // Allocate all Real3D RAM regions memoryPool = new(std::nothrow) uint8_t[memSize]; if (NULL == memoryPool) return ErrorLog("Insufficient memory for Real3D object (needs %1.1f MB).", memSizeMB); // Set up main pointers cullingRAMLo = (uint32_t *) &memoryPool[OFFSET_8C]; cullingRAMHi = (uint32_t *) &memoryPool[OFFSET_8E]; polyRAM = (uint32_t *) &memoryPool[OFFSET_98]; textureRAM = (uint16_t *) &memoryPool[OFFSET_TEXRAM]; textureFIFO = (uint32_t *) &memoryPool[OFFSET_TEXFIFO]; // If multi-threaded, set up pointers for read-only snapshots and dirty page arrays too if (m_gpuMultiThreaded) { cullingRAMLoRO = (uint32_t *) &memoryPool[OFFSET_8C_RO]; cullingRAMHiRO = (uint32_t *) &memoryPool[OFFSET_8E_RO]; polyRAMRO = (uint32_t *) &memoryPool[OFFSET_98_RO]; textureRAMRO = (uint16_t *) &memoryPool[OFFSET_TEXRAM_RO]; cullingRAMLoDirty = (uint8_t *) &memoryPool[OFFSET_8C_DIRTY]; cullingRAMHiDirty = (uint8_t *) &memoryPool[OFFSET_8E_DIRTY]; polyRAMDirty = (uint8_t *) &memoryPool[OFFSET_98_DIRTY]; textureRAMDirty = (uint8_t *) &memoryPool[OFFSET_TEXRAM_DIRTY]; } // VROM pointer passed to us vrom = (uint32_t *) vromPtr; DebugLog("Initialized Real3D (allocated %1.1f MB)\n", memSizeMB); return OKAY; } CReal3D::CReal3D(const Util::Config::Node &config) : m_config(config), m_gpuMultiThreaded(config["GPUMultiThreaded"].ValueAs()) { Render3D = NULL; memoryPool = NULL; cullingRAMLo = NULL; cullingRAMHi = NULL; polyRAM = NULL; textureRAM = NULL; textureFIFO = NULL; vrom = NULL; error = false; fifoIdx = 0; m_vromTextureFIFO[0] = 0; m_vromTextureFIFO[1] = 0; m_vromTextureFIFOIdx = 0; m_internalRenderConfig[0] = 0; m_internalRenderConfig[1] = 0; DebugLog("Built Real3D\n"); } /* * CReal3D::~CReal3D(void): * * Destructor. */ CReal3D::~CReal3D(void) { // Dump memory #if 0 FILE *fp; fp = fopen("8c000000", "wb"); if (NULL != fp) { fwrite(cullingRAMLo, sizeof(uint8_t), 0x400000, fp); fclose(fp); printf("dumped %s\n", "8c000000"); } else printf("unable to dump %s\n", "8c000000"); fp = fopen("8e000000", "wb"); if (NULL != fp) { fwrite(cullingRAMHi, sizeof(uint8_t), 0x100000, fp); fclose(fp); printf("dumped %s\n", "8e000000"); } else printf("unable to dump %s\n", "8e000000"); fp = fopen("98000000", "wb"); if (NULL != fp) { fwrite(polyRAM, sizeof(uint8_t), 0x400000, fp); fclose(fp); printf("dumped %s\n", "98000000"); } else printf("unable to dump %s\n", "98000000"); fp = fopen("texram", "wb"); if (NULL != fp) { fwrite(textureRAM, sizeof(uint8_t), 0x800000, fp); fclose(fp); printf("dumped %s\n", "texram"); } else printf("unable to dump %s\n", "texram"); #endif // Dump textures if requested if (m_config["DumpTextures"].ValueAsDefault(false)) { Util::WriteSurfaceToBMP("textures_t1rgb5_contour.bmp", reinterpret_cast(textureRAM), 2048, 2048, false); printf("Wrote textures as T1RGB5 (contour bit enabled) to 'textures_t1rgb5_contour.bmp'\n"); Util::WriteSurfaceToBMP("textures_t1rgb5_opaque.bmp", reinterpret_cast(textureRAM), 2048, 2048, false); printf("Wrote textures as T1RGB5 (contour bit ignored) to 'textures_t1rgb5_opaque.bmp'\n"); Util::WriteSurfaceToBMP("textures_a4l4_lo.bmp", reinterpret_cast(textureRAM), 2048, 2048, false); printf("Wrote textures as A4L4 (low) to 'textures_a4l4_lo.bmp'\n"); Util::WriteSurfaceToBMP("textures_l4a4_lo.bmp", reinterpret_cast(textureRAM), 2048, 2048, false); printf("Wrote textures as L4A4 (low) to 'textures_l4a4_lo.bmp'\n"); Util::WriteSurfaceToBMP("textures_a4l4_hi.bmp", reinterpret_cast(textureRAM), 2048, 2048, false); printf("Wrote textures as A4L4 (high) to 'textures_a4l4_hi.bmp'\n"); Util::WriteSurfaceToBMP("textures_l4a4_hi.bmp", reinterpret_cast(textureRAM), 2048, 2048, false); printf("Wrote textures as L4A4 (high) to 'textures_l4a4_hi.bmp'\n"); Util::WriteSurfaceToBMP("textures_l8_lo.bmp", reinterpret_cast(textureRAM), 2048, 2048, false); printf("Wrote textures as L8 (low) to 'textures_l8_lo.bmp'\n"); Util::WriteSurfaceToBMP("textures_l8_hi.bmp", reinterpret_cast(textureRAM), 2048, 2048, false); printf("Wrote textures as L8 (high) to 'textures_l8_hi.bmp'\n"); Util::WriteSurfaceToBMP("textures_rgba4.bmp", reinterpret_cast(textureRAM), 2048, 2048, false); printf("Wrote textures as RGBA4 to 'textures_rgba4.bmp'\n"); Util::WriteSurfaceToBMP("textures_l4_0.bmp", reinterpret_cast(textureRAM), 2048, 2048, false); printf("Wrote textures as L4 (channel 0) to 'textures_l4_0.bmp'\n"); Util::WriteSurfaceToBMP("textures_l4_1.bmp", reinterpret_cast(textureRAM), 2048, 2048, false); printf("Wrote textures as L4 (channel 1) to 'textures_l4_1.bmp'\n"); Util::WriteSurfaceToBMP("textures_l4_2.bmp", reinterpret_cast(textureRAM), 2048, 2048, false); printf("Wrote textures as L4 (channel 2) to 'textures_l4_2.bmp'\n"); Util::WriteSurfaceToBMP("textures_l4_3.bmp", reinterpret_cast(textureRAM), 2048, 2048, false); printf("Wrote textures as L4 (channel 3) to 'textures_l4_3.bmp'\n"); } Render3D = NULL; if (memoryPool != NULL) { delete [] memoryPool; memoryPool = NULL; } cullingRAMLo = NULL; cullingRAMHi = NULL; polyRAM = NULL; textureRAM = NULL; textureFIFO = NULL; vrom = NULL; DebugLog("Destroyed Real3D\n"); }