New multi-threaded rendering changes that parallelise graphics rendering and PPC execution in order to increase performance on multi-core machines.

New gpuMultiThreaded config option to enable/disable multi-threaded rendering (enabled by default, disabling it reverts to previous behaviour).
Other rendering optimisations:
 - texture uploads now only affect appropriate region in the texture sheet, rather than uploading the whole sheet each time
 - performance of clearing the model caches has been improved
New Alt+O key input added to toggle outputting of frame timings for debugging purposes.
This commit is contained in:
Nik Henson 2012-01-16 23:21:14 +00:00
parent 0835e38b94
commit d1d5175548
12 changed files with 881 additions and 244 deletions

View file

@ -834,13 +834,8 @@ void CRender3D::ClearModelCache(ModelCache *Cache)
Cache->vboCurOffset = 0; Cache->vboCurOffset = 0;
for (int i = 0; i < 2; i++) for (int i = 0; i < 2; i++)
Cache->curVertIdx[i] = 0; Cache->curVertIdx[i] = 0;
if (!Cache->dynamic) for (int i = 0; i < Cache->numModels; i++)
memset(Cache->lut, 0xFF, sizeof(INT16)*Cache->lutSize); // set all to -1 Cache->lut[Cache->Models[i].lutIdx] = -1;
else
{
for (int i = 0; i < Cache->numModels; i++)
Cache->lut[Cache->Models[i].lutIdx] = -1;
}
Cache->numModels = 0; Cache->numModels = 0;
ClearDisplayList(Cache); ClearDisplayList(Cache);

View file

@ -737,24 +737,7 @@ void CRender2D::EndFrame(void)
Emulation Callbacks Emulation Callbacks
******************************************************************************/ ******************************************************************************/
void CRender2D::WritePalette(unsigned color, UINT32 data)
{
UINT8 r, g, b, a;
a = 0xFF * ((data>>15)&1); // decode the RGBA (make alpha 0xFF or 0x00)
a = ~a; // invert it (set on Model 3 means clear pixel)
if ((data&0x8000))
r = g = b = 0;
else
{
b = (data>>7)&0xF8;
g = (data>>2)&0xF8;
r = (data<<3)&0xF8;
}
pal[color] = (a<<24)|(b<<16)|(g<<8)|r;
}
void CRender2D::WriteVRAM(unsigned addr, UINT32 data) void CRender2D::WriteVRAM(unsigned addr, UINT32 data)
{ {
@ -763,31 +746,6 @@ void CRender2D::WriteVRAM(unsigned addr, UINT32 data)
// For now, mark everything as dirty // For now, mark everything as dirty
allDirty = true; allDirty = true;
// Palette
if (addr >= 0x100000)
{
unsigned color = (addr-0x100000)/4; // color index
WritePalette(color, data);
}
}
/*
* InitPalette():
*
* This must be called from AttachVRAM() to initialize the palette. The reason
* is that because WriteVRAM() always compares incoming data to what is already
* in the VRAM, there is no actual way to initialize the palette by calling
* WriteVRAM() and passing it the initial VRAM contents. It will always fail to
* update because nothing is being changed.
*
* This function fixes the transparent pixel bug that frequently occurred when
* loading save states in Supermodel 0.1a.
*/
void CRender2D::InitPalette(void)
{
for (int i = 0; i < 0x20000/4; i++)
WritePalette(i, vram[0x100000/4 + i]);
} }
@ -801,14 +759,19 @@ void CRender2D::AttachRegisters(const UINT32 *regPtr)
DebugLog("Render2D attached registers\n"); DebugLog("Render2D attached registers\n");
} }
void CRender2D::AttachPalette(const UINT32 *palPtr)
{
pal = palPtr;
DebugLog("Render2D attached palette\n");
}
void CRender2D::AttachVRAM(const UINT8 *vramPtr) void CRender2D::AttachVRAM(const UINT8 *vramPtr)
{ {
vram = (UINT32 *) vramPtr; vram = (UINT32 *) vramPtr;
InitPalette();
DebugLog("Render2D attached VRAM\n"); DebugLog("Render2D attached VRAM\n");
} }
#define MEMORY_POOL_SIZE (512*512*4+0x20000) #define MEMORY_POOL_SIZE (512*512*4)
bool CRender2D::Init(unsigned xOffset, unsigned yOffset, unsigned xRes, unsigned yRes) bool CRender2D::Init(unsigned xOffset, unsigned yOffset, unsigned xRes, unsigned yRes)
{ {
@ -832,7 +795,6 @@ bool CRender2D::Init(unsigned xOffset, unsigned yOffset, unsigned xRes, unsigned
// Set up pointers to memory regions // Set up pointers to memory regions
surf = (UINT32 *) memoryPool; surf = (UINT32 *) memoryPool;
pal = (UINT32 *) &memoryPool[512*512*4];
// Resolution // Resolution
xPixels = xRes; xPixels = xRes;

View file

@ -91,6 +91,8 @@ public:
*/ */
void AttachRegisters(const UINT32 *regPtr); void AttachRegisters(const UINT32 *regPtr);
void AttachPalette(const UINT32 *palPtr);
/* /*
* AttachVRAM(vramPtr): * AttachVRAM(vramPtr):
* *
@ -145,11 +147,10 @@ private:
void DisplayLayer(int layerNum, GLfloat z); void DisplayLayer(int layerNum, GLfloat z);
void Setup2D(void); void Setup2D(void);
void ColorOffset(GLfloat colorOffset[3], UINT32 reg); void ColorOffset(GLfloat colorOffset[3], UINT32 reg);
void WritePalette(unsigned color, UINT32 data);
void InitPalette(void);
// Data received from tile generator device object // Data received from tile generator device object
const UINT32 *vram; const UINT32 *vram;
const UINT32 *pal;
const UINT32 *regs; const UINT32 *regs;
// OpenGL data // OpenGL data
@ -172,7 +173,6 @@ private:
// Buffers // Buffers
UINT8 *memoryPool; // all memory is allocated here UINT8 *memoryPool; // all memory is allocated here
UINT32 *surf; // 512x512x32bpp pixel surface UINT32 *surf; // 512x512x32bpp pixel surface
UINT32 *pal; // 0x20000 byte (32K colors) palette
}; };

View file

@ -50,10 +50,11 @@ CInputs::CInputs(CInputSystem *system) : m_system(system)
uiMusicVolDown = AddSwitchInput("UIMusicVolDown", "Decrease Music Volume", GAME_INPUT_UI, "KEY_F9"); uiMusicVolDown = AddSwitchInput("UIMusicVolDown", "Decrease Music Volume", GAME_INPUT_UI, "KEY_F9");
uiSoundVolUp = AddSwitchInput("UISoundVolUp", "Increase Sound Volume", GAME_INPUT_UI, "KEY_F12"); uiSoundVolUp = AddSwitchInput("UISoundVolUp", "Increase Sound Volume", GAME_INPUT_UI, "KEY_F12");
uiSoundVolDown = AddSwitchInput("UISoundVolDown", "Decrease Sound Volume", GAME_INPUT_UI, "KEY_F11"); uiSoundVolDown = AddSwitchInput("UISoundVolDown", "Decrease Sound Volume", GAME_INPUT_UI, "KEY_F11");
uiDumpInpState = AddSwitchInput("UIDumpInputState", "Dump Input State", GAME_INPUT_UI, "NONE"); // disabled for release
uiClearNVRAM = AddSwitchInput("UIClearNVRAM", "Clear NVRAM", GAME_INPUT_UI, "KEY_ALT+KEY_N"); uiClearNVRAM = AddSwitchInput("UIClearNVRAM", "Clear NVRAM", GAME_INPUT_UI, "KEY_ALT+KEY_N");
uiSelectCrosshairs = AddSwitchInput("UISelectCrosshairs", "Select Crosshairs", GAME_INPUT_UI, "KEY_ALT+KEY_I"); uiSelectCrosshairs = AddSwitchInput("UISelectCrosshairs", "Select Crosshairs", GAME_INPUT_UI, "KEY_ALT+KEY_I");
uiToggleFrLimit = AddSwitchInput("UIToggleFrameLimit", "Toggle Frame Limiting", GAME_INPUT_UI, "KEY_ALT+KEY_T"); uiToggleFrLimit = AddSwitchInput("UIToggleFrameLimit", "Toggle Frame Limiting", GAME_INPUT_UI, "KEY_ALT+KEY_T");
uiDumpInpState = AddSwitchInput("UIDumpInputState", "Dump Input State", GAME_INPUT_UI, "KEY_ALT+KEY_U");
uiDumpTimings = AddSwitchInput("UIDumpTimings", "Dump Frame Timings", GAME_INPUT_UI, "KEY_ALT+KEY_O");
#ifdef SUPERMODEL_DEBUGGER #ifdef SUPERMODEL_DEBUGGER
uiEnterDebugger = AddSwitchInput("UIEnterDebugger", "Enter Debugger", GAME_INPUT_UI, "KEY_ALT+KEY_B"); uiEnterDebugger = AddSwitchInput("UIEnterDebugger", "Enter Debugger", GAME_INPUT_UI, "KEY_ALT+KEY_B");
#endif #endif

View file

@ -101,10 +101,11 @@ public:
CSwitchInput *uiMusicVolDown; CSwitchInput *uiMusicVolDown;
CSwitchInput *uiSoundVolUp; CSwitchInput *uiSoundVolUp;
CSwitchInput *uiSoundVolDown; CSwitchInput *uiSoundVolDown;
CSwitchInput *uiDumpInpState;
CSwitchInput *uiClearNVRAM; CSwitchInput *uiClearNVRAM;
CSwitchInput *uiSelectCrosshairs; CSwitchInput *uiSelectCrosshairs;
CSwitchInput *uiToggleFrLimit; CSwitchInput *uiToggleFrLimit;
CSwitchInput *uiDumpInpState;
CSwitchInput *uiDumpTimings;
#ifdef SUPERMODEL_DEBUGGER #ifdef SUPERMODEL_DEBUGGER
CSwitchInput *uiEnterDebugger; CSwitchInput *uiEnterDebugger;
#endif #endif

View file

@ -1910,6 +1910,8 @@ void CModel3::ClearNVRAM(void)
void CModel3::RunFrame(void) void CModel3::RunFrame(void)
{ {
UINT32 start = CThread::GetTicks();
// See if currently running multi-threaded // See if currently running multi-threaded
if (g_Config.multiThreaded) if (g_Config.multiThreaded)
{ {
@ -1917,39 +1919,60 @@ void CModel3::RunFrame(void)
if (!StartThreads()) if (!StartThreads())
goto ThreadError; goto ThreadError;
// Wake threads for sound board (if sync'd) and drive board (if attached) so they can process a frame // Wake threads for PPC main board (if multi-threading GPU), sound board (if sync'd) and drive board (if attached) so they can process a frame
if (syncSndBrdThread && !sndBrdThreadSync->Post() || DriveBoard.IsAttached() && !drvBrdThreadSync->Post()) if (g_Config.gpuMultiThreaded && !ppcBrdThreadSync->Post() ||
syncSndBrdThread && !sndBrdThreadSync->Post() ||
DriveBoard.IsAttached() && !drvBrdThreadSync->Post())
goto ThreadError; goto ThreadError;
// At the same time, process a single frame for main board (PPC) in this thread // If not multi-threading GPU, then run PPC main board for a frame and sync GPUs now in this thread
RunMainBoardFrame(); if (!g_Config.gpuMultiThreaded)
{
RunMainBoardFrame();
SyncGPUs();
}
// Render frame if ready to do so
if (gpusReady)
RenderFrame();
// Enter notify wait critical section // Enter notify wait critical section
if (!notifyLock->Lock()) if (!notifyLock->Lock())
goto ThreadError; goto ThreadError;
// Wait for sound board and drive board threads to finish their work (if they haven't done so already) // Wait for PPC main board, sound board and drive board threads to finish their work (if they are running and haven't finished already)
while (syncSndBrdThread && !sndBrdThreadDone || DriveBoard.IsAttached() && !drvBrdThreadDone) while (g_Config.gpuMultiThreaded && !ppcBrdThreadDone ||
syncSndBrdThread && !sndBrdThreadDone ||
DriveBoard.IsAttached() && !drvBrdThreadDone)
{ {
if (!notifySync->Wait(notifyLock)) if (!notifySync->Wait(notifyLock))
goto ThreadError; goto ThreadError;
} }
ppcBrdThreadDone = false;
sndBrdThreadDone = false; sndBrdThreadDone = false;
drvBrdThreadDone = false; drvBrdThreadDone = false;
// Leave notify wait critical section // Leave notify wait critical section
if (!notifyLock->Unlock()) if (!notifyLock->Unlock())
goto ThreadError; goto ThreadError;
// If multi-threading GPU, then sync GPUs last while PPC main board thread is waiting
if (g_Config.gpuMultiThreaded)
SyncGPUs();
} }
else else
{ {
// If not multi-threaded, then just process a single frame for main board, sound board and drive board in turn in this thread // If not multi-threaded, then just process and render a single frame for PPC main board, sound board and drive board in turn in this thread
RunMainBoardFrame(); RunMainBoardFrame();
SoundBoard.RunFrame(); SyncGPUs();
RenderFrame();
RunSoundBoardFrame();
if (DriveBoard.IsAttached()) if (DriveBoard.IsAttached())
DriveBoard.RunFrame(); RunDriveBoardFrame();
} }
frameTicks = CThread::GetTicks() - start;
return; return;
ThreadError: ThreadError:
@ -1957,13 +1980,129 @@ ThreadError:
g_Config.multiThreaded = false; g_Config.multiThreaded = false;
} }
void CModel3::RunMainBoardFrame(void)
{
UINT32 start = CThread::GetTicks();
// Compute display and VBlank timings
unsigned frameCycles = g_Config.GetPowerPCFrequency()*1000000/60;
unsigned vblCycles = (unsigned) ((float) frameCycles * 2.5f/100.0f); // 2.5% vblank (ridiculously short and wrong but bigger values cause flicker in Daytona)
unsigned dispCycles = frameCycles - vblCycles;
// VBlank
if (gpusReady)
{
TileGen.BeginVBlank();
GPU.BeginVBlank();
IRQ.Assert(0x02);
ppc_execute(vblCycles);
//printf("PC=%08X LR=%08X\n", ppc_get_pc(), ppc_get_lr());
/*
* Sound:
*
* Bit 0x20 of the MIDI control port appears to enable periodic interrupts,
* which are used to send MIDI commands. Often games will write 0x27, send
* a series of commands, and write 0x06 to stop. Other games, like Star
* Wars Trilogy and Sega Rally 2, will enable interrupts at the beginning
* by writing 0x37 and will disable/enable interrupts to control command
* output.
*/
//printf("\t-- BEGIN (Ctrl=%02X, IRQEn=%02X, IRQPend=%02X) --\n", midiCtrlPort, IRQ.ReadIRQEnable()&0x40, IRQ.ReadIRQState());
int irqCount = 0;
while ((midiCtrlPort&0x20))
//while (midiCtrlPort == 0x27) // 27 triggers IRQ sequence, 06 stops it
{
// Don't waste time firing MIDI interrupts if game has disabled them
if ((IRQ.ReadIRQEnable()&0x40) == 0)
break;
// Process MIDI interrupt
IRQ.Assert(0x40);
ppc_execute(200); // give PowerPC time to acknowledge IRQ
IRQ.Deassert(0x40);
ppc_execute(200); // acknowledge that IRQ was deasserted (TODO: is this really needed?)
++irqCount;
if (irqCount > 128)
{
//printf("\tMIDI FIFO OVERFLOW! (IRQEn=%02X, IRQPend=%02X)\n", IRQ.ReadIRQEnable()&0x40, IRQ.ReadIRQState());
break;
}
}
//printf("\t-- END --\n");
//printf("PC=%08X LR=%08X\n", ppc_get_pc(), ppc_get_lr());
// End VBlank
GPU.EndVBlank();
TileGen.EndVBlank();
IRQ.Assert(0x0D);
}
// Run the PowerPC for the active display part of the frame
ppc_execute(dispCycles);
//printf("PC=%08X LR=%08X\n", ppc_get_pc(), ppc_get_lr());
ppcTicks = CThread::GetTicks() - start;
}
void CModel3::SyncGPUs(void)
{
UINT32 start = CThread::GetTicks();
syncSize = GPU.SyncSnapshots() + TileGen.SyncSnapshots();
gpusReady = true;
syncTicks = CThread::GetTicks() - start;
}
void CModel3::RenderFrame(void)
{
UINT32 start = CThread::GetTicks();
// Render frame
TileGen.BeginFrame();
GPU.BeginFrame();
GPU.RenderFrame();
GPU.EndFrame();
TileGen.EndFrame();
renderTicks = CThread::GetTicks() - start;
}
bool CModel3::RunSoundBoardFrame(void)
{
UINT32 start = CThread::GetTicks();
bool bufferFull = SoundBoard.RunFrame();
sndTicks = CThread::GetTicks() - start;
return bufferFull;
}
void CModel3::RunDriveBoardFrame(void)
{
UINT32 start = CThread::GetTicks();
DriveBoard.RunFrame();
drvTicks = CThread::GetTicks() - start;
}
bool CModel3::StartThreads(void) bool CModel3::StartThreads(void)
{ {
if (startedThreads) if (startedThreads)
return true; return true;
// Create synchronization objects // Create synchronization objects
sndBrdThreadSync = CThread::CreateSemaphore(1); if (g_Config.gpuMultiThreaded)
{
ppcBrdThreadSync = CThread::CreateSemaphore(0);
if (ppcBrdThreadSync == NULL)
goto ThreadError;
}
sndBrdThreadSync = CThread::CreateSemaphore(0);
if (sndBrdThreadSync == NULL) if (sndBrdThreadSync == NULL)
goto ThreadError; goto ThreadError;
sndBrdNotifyLock = CThread::CreateMutex(); sndBrdNotifyLock = CThread::CreateMutex();
@ -1974,7 +2113,7 @@ bool CModel3::StartThreads(void)
goto ThreadError; goto ThreadError;
if (DriveBoard.IsAttached()) if (DriveBoard.IsAttached())
{ {
drvBrdThreadSync = CThread::CreateSemaphore(1); drvBrdThreadSync = CThread::CreateSemaphore(0);
if (drvBrdThreadSync == NULL) if (drvBrdThreadSync == NULL)
goto ThreadError; goto ThreadError;
} }
@ -1985,6 +2124,14 @@ bool CModel3::StartThreads(void)
if (notifySync == NULL) if (notifySync == NULL)
goto ThreadError; goto ThreadError;
// Create PPC main board thread, if multi-threading GPU
if (g_Config.gpuMultiThreaded)
{
ppcBrdThread = CThread::CreateThread(StartMainBoardThread, this);
if (ppcBrdThread == NULL)
goto ThreadError;
}
// Create sound board thread (sync'd or unsync'd) // Create sound board thread (sync'd or unsync'd)
if (syncSndBrdThread) if (syncSndBrdThread)
sndBrdThread = CThread::CreateThread(StartSoundBoardThreadSyncd, this); sndBrdThread = CThread::CreateThread(StartSoundBoardThreadSyncd, this);
@ -1993,15 +2140,15 @@ bool CModel3::StartThreads(void)
if (sndBrdThread == NULL) if (sndBrdThread == NULL)
goto ThreadError; goto ThreadError;
// Create drive board thread (sync'd), if drive board is attached // Create drive board thread, if drive board is attached
if (DriveBoard.IsAttached()) if (DriveBoard.IsAttached())
{ {
drvBrdThread = CThread::CreateThread(StartDriveBoardThreadSyncd, this); drvBrdThread = CThread::CreateThread(StartDriveBoardThread, this);
if (drvBrdThread == NULL) if (drvBrdThread == NULL)
goto ThreadError; goto ThreadError;
} }
// Set audio callback if unsync'd // Set audio callback if sound board thread is unsync'd
if (!syncSndBrdThread) if (!syncSndBrdThread)
SetAudioCallback(AudioCallback, this); SetAudioCallback(AudioCallback, this);
@ -2026,7 +2173,7 @@ bool CModel3::PauseThreads(void)
// Wait for all threads to finish their processing // Wait for all threads to finish their processing
pausedThreads = true; pausedThreads = true;
while (sndBrdThreadRunning || drvBrdThreadRunning) while (ppcBrdThreadRunning || sndBrdThreadRunning || drvBrdThreadRunning)
{ {
if (!notifySync->Wait(notifyLock)) if (!notifySync->Wait(notifyLock))
goto ThreadError; goto ThreadError;
@ -2043,11 +2190,27 @@ ThreadError:
return false; return false;
} }
void CModel3::ResumeThreads(void) bool CModel3::ResumeThreads(void)
{ {
// No need to use any locking here if (!startedThreads)
return true;
// Enter notify critical section
if (!notifyLock->Lock())
goto ThreadError;
// Let all threads know that they can continue running
pausedThreads = false; pausedThreads = false;
return;
// Leave notify critical section
if (!notifyLock->Unlock())
goto ThreadError;
return true;
ThreadError:
ErrorLog("Threading error in CModel3::ResumeThreads: %s\nSwitching back to single-threaded mode.\n", CThread::GetLastError());
g_Config.multiThreaded = false;
return false;
} }
void CModel3::StopThreads(void) void CModel3::StopThreads(void)
@ -2055,7 +2218,7 @@ void CModel3::StopThreads(void)
if (!startedThreads) if (!startedThreads)
return; return;
// If sound board not sync'd then remove callback // If sound board thread is unsync'd then remove audio callback
if (!syncSndBrdThread) if (!syncSndBrdThread)
SetAudioCallback(NULL, NULL); SetAudioCallback(NULL, NULL);
@ -2068,8 +2231,13 @@ void CModel3::StopThreads(void)
void CModel3::DeleteThreadObjects(void) void CModel3::DeleteThreadObjects(void)
{ {
// Delete (which in turn kills) sound board and drive board threads // Delete (which in turn kills) PPC main board, sound board and drive board threads
// Note that can do so here safely because threads will always be waiting on their semaphores when this method is called // Note that can do so here safely because threads will always be waiting on their semaphores when this method is called
if (ppcBrdThread != NULL)
{
delete ppcBrdThread;
ppcBrdThread = NULL;
}
if (sndBrdThread != NULL) if (sndBrdThread != NULL)
{ {
delete sndBrdThread; delete sndBrdThread;
@ -2082,6 +2250,11 @@ void CModel3::DeleteThreadObjects(void)
} }
// Delete synchronization objects // Delete synchronization objects
if (ppcBrdThreadSync != NULL)
{
delete ppcBrdThreadSync;
ppcBrdThreadSync = NULL;
}
if (sndBrdThreadSync != NULL) if (sndBrdThreadSync != NULL)
{ {
delete sndBrdThreadSync; delete sndBrdThreadSync;
@ -2114,9 +2287,28 @@ void CModel3::DeleteThreadObjects(void)
} }
} }
void CModel3::DumpTimings(void)
{
printf("PPC:%3ums%c render:%3ums%c sync:%4uK%c%3ums%c snd:%3ums%c drv:%3ums%c frame:%3ums%c\n",
ppcTicks, (ppcTicks > renderTicks ? '!' : ','),
renderTicks, (renderTicks > ppcTicks ? '!' : ','),
syncSize / 1024, (syncSize / 1024 > 128 ? '!' : ','), syncTicks, (syncTicks > 1 ? '!' : ','),
sndTicks, (sndTicks > 10 ? '!' : ','),
drvTicks, (drvTicks > 10 ? '!' : ','),
frameTicks, (frameTicks > 16 ? '!' : ' '));
}
int CModel3::StartMainBoardThread(void *data)
{
// Call method on CModel3 to run PPC main board thread
CModel3 *model3 = (CModel3*)data;
model3->RunMainBoardThread();
return 0;
}
int CModel3::StartSoundBoardThread(void *data) int CModel3::StartSoundBoardThread(void *data)
{ {
// Call method on CModel3 to run unsync'd sound board thread // Call method on CModel3 to run sound board thread (unsync'd)
CModel3 *model3 = (CModel3*)data; CModel3 *model3 = (CModel3*)data;
model3->RunSoundBoardThread(); model3->RunSoundBoardThread();
return 0; return 0;
@ -2124,20 +2316,70 @@ int CModel3::StartSoundBoardThread(void *data)
int CModel3::StartSoundBoardThreadSyncd(void *data) int CModel3::StartSoundBoardThreadSyncd(void *data)
{ {
// Call method on CModel3 to run sync'd sound board thread // Call method on CModel3 to run sound board thread (sync'd)
CModel3 *model3 = (CModel3*)data; CModel3 *model3 = (CModel3*)data;
model3->RunSoundBoardThreadSyncd(); model3->RunSoundBoardThreadSyncd();
return 0; return 0;
} }
int CModel3::StartDriveBoardThreadSyncd(void *data) int CModel3::StartDriveBoardThread(void *data)
{ {
// Call method on CModel3 to run sync'd drive board thread // Call method on CModel3 to run drive board thread
CModel3 *model3 = (CModel3*)data; CModel3 *model3 = (CModel3*)data;
model3->RunDriveBoardThreadSyncd(); model3->RunDriveBoardThread();
return 0; return 0;
} }
void CModel3::RunMainBoardThread(void)
{
for (;;)
{
bool wait = true;
while (wait)
{
// Wait on PPC main board thread semaphore
if (!ppcBrdThreadSync->Wait())
goto ThreadError;
// Enter notify critical section
if (!notifyLock->Lock())
goto ThreadError;
// Check threads not paused
if (!pausedThreads)
{
wait = false;
ppcBrdThreadRunning = true;
}
// Leave notify critical section
if (!notifyLock->Unlock())
goto ThreadError;
}
// Process a single frame for PPC main board
RunMainBoardFrame();
// Enter notify critical section
if (!notifyLock->Lock())
goto ThreadError;
// Let other threads know processing has finished
ppcBrdThreadRunning = false;
ppcBrdThreadDone = true;
if (!notifySync->SignalAll())
goto ThreadError;
// Leave notify critical section
if (!notifyLock->Unlock())
goto ThreadError;
}
ThreadError:
ErrorLog("Threading error in RunMainBoardThread: %s\nSwitching back to single-threaded mode.\n", CThread::GetLastError());
g_Config.multiThreaded = false;
}
void CModel3::AudioCallback(void *data) void CModel3::AudioCallback(void *data)
{ {
// Call method on CModel3 to wake sound board thread // Call method on CModel3 to wake sound board thread
@ -2151,7 +2393,7 @@ void CModel3::WakeSoundBoardThread(void)
if (!sndBrdNotifyLock->Lock()) if (!sndBrdNotifyLock->Lock())
goto ThreadError; goto ThreadError;
// Signal to sound board that it should start processing again // Signal to sound board thread that it should start processing again
if (!sndBrdNotifySync->Signal()) if (!sndBrdNotifySync->Signal())
goto ThreadError; goto ThreadError;
@ -2200,11 +2442,22 @@ void CModel3::RunSoundBoardThread(void)
goto ThreadError; goto ThreadError;
} }
// Keep processing frames until audio buffer is full // Keep processing frames until paused or audio buffer is full
bool repeat = true; while (true)
// NOTE - performs an unlocked read of pausedThreads here, but this is okay
while (!pausedThreads && !SoundBoard.RunFrame())
{ {
// Enter main notify critical section
bool paused;
if (!notifyLock->Lock())
goto ThreadError;
paused = pausedThreads;
// Leave main notify critical section
if (!notifyLock->Unlock())
goto ThreadError;
if (paused || RunSoundBoardFrame())
break;
//printf("Rerunning sound board\n"); //printf("Rerunning sound board\n");
} }
@ -2256,7 +2509,7 @@ void CModel3::RunSoundBoardThreadSyncd(void)
} }
// Process a single frame for sound board // Process a single frame for sound board
SoundBoard.RunFrame(); RunSoundBoardFrame();
// Enter notify critical section // Enter notify critical section
if (!notifyLock->Lock()) if (!notifyLock->Lock())
@ -2278,7 +2531,7 @@ ThreadError:
g_Config.multiThreaded = false; g_Config.multiThreaded = false;
} }
void CModel3::RunDriveBoardThreadSyncd(void) void CModel3::RunDriveBoardThread(void)
{ {
for (;;) for (;;)
{ {
@ -2306,7 +2559,7 @@ void CModel3::RunDriveBoardThreadSyncd(void)
} }
// Process a single frame for drive board // Process a single frame for drive board
DriveBoard.RunFrame(); RunDriveBoardFrame();
// Enter notify critical section // Enter notify critical section
if (!notifyLock->Lock()) if (!notifyLock->Lock())
@ -2324,70 +2577,10 @@ void CModel3::RunDriveBoardThreadSyncd(void)
} }
ThreadError: ThreadError:
ErrorLog("Threading error in RunDriveBoardThreadSyncd: %s\nSwitching back to single-threaded mode.\n", CThread::GetLastError()); ErrorLog("Threading error in RunDriveBoardThread: %s\nSwitching back to single-threaded mode.\n", CThread::GetLastError());
g_Config.multiThreaded = false; g_Config.multiThreaded = false;
} }
void CModel3::RunMainBoardFrame(void)
{
// Compute display and VBlank timings
unsigned frameCycles = g_Config.GetPowerPCFrequency()*1000000/60;
unsigned vblCycles = (unsigned) ((float) frameCycles * 2.5f/100.0f); // 2.5% vblank (ridiculously short and wrong but bigger values cause flicker in Daytona)
unsigned dispCycles = frameCycles - vblCycles;
// Run the PowerPC for the active display part of the frame
ppc_execute(dispCycles);
//printf("PC=%08X LR=%08X\n", ppc_get_pc(), ppc_get_lr());
// VBlank
TileGen.BeginFrame();
GPU.BeginFrame();
GPU.RenderFrame();
IRQ.Assert(0x02);
ppc_execute(vblCycles);
//printf("PC=%08X LR=%08X\n", ppc_get_pc(), ppc_get_lr());
/*
* Sound:
*
* Bit 0x20 of the MIDI control port appears to enable periodic interrupts,
* which are used to send MIDI commands. Often games will write 0x27, send
* a series of commands, and write 0x06 to stop. Other games, like Star
* Wars Trilogy and Sega Rally 2, will enable interrupts at the beginning
* by writing 0x37 and will disable/enable interrupts to control command
* output.
*/
//printf("\t-- BEGIN (Ctrl=%02X, IRQEn=%02X, IRQPend=%02X) --\n", midiCtrlPort, IRQ.ReadIRQEnable()&0x40, IRQ.ReadIRQState());
int irqCount = 0;
while ((midiCtrlPort&0x20))
//while (midiCtrlPort == 0x27) // 27 triggers IRQ sequence, 06 stops it
{
// Don't waste time firing MIDI interrupts if game has disabled them
if ((IRQ.ReadIRQEnable()&0x40) == 0)
break;
// Process MIDI interrupt
IRQ.Assert(0x40);
ppc_execute(200); // give PowerPC time to acknowledge IRQ
IRQ.Deassert(0x40);
ppc_execute(200); // acknowledge that IRQ was deasserted (TODO: is this really needed?)
++irqCount;
if (irqCount > 128)
{
//printf("\tMIDI FIFO OVERFLOW! (IRQEn=%02X, IRQPend=%02X)\n", IRQ.ReadIRQEnable()&0x40, IRQ.ReadIRQState());
break;
}
}
//printf("\t-- END --\n");
//printf("PC=%08X LR=%08X\n", ppc_get_pc(), ppc_get_lr());
// End frame
GPU.EndFrame();
TileGen.EndFrame();
IRQ.Assert(0x0D);
}
void CModel3::Reset(void) void CModel3::Reset(void)
{ {
// Clear memory (but do not modify backup RAM!) // Clear memory (but do not modify backup RAM!)
@ -2422,6 +2615,15 @@ void CModel3::Reset(void)
if (DriveBoard.IsAttached()) if (DriveBoard.IsAttached())
DriveBoard.Reset(); DriveBoard.Reset();
gpusReady = false;
ppcTicks = 0;
syncSize = 0;
syncTicks = 0;
renderTicks = 0;
sndTicks = 0;
drvTicks = 0;
frameTicks = 0;
DebugLog("Model 3 reset\n"); DebugLog("Model 3 reset\n");
} }
@ -2964,13 +3166,17 @@ CModel3::CModel3(void)
startedThreads = false; startedThreads = false;
pausedThreads = false; pausedThreads = false;
ppcBrdThread = NULL;
sndBrdThread = NULL; sndBrdThread = NULL;
drvBrdThread = NULL; drvBrdThread = NULL;
ppcBrdThreadRunning = false;
ppcBrdThreadDone = false;
sndBrdThreadRunning = false; sndBrdThreadRunning = false;
sndBrdThreadDone = false; sndBrdThreadDone = false;
drvBrdThreadRunning = false; drvBrdThreadRunning = false;
drvBrdThreadDone = false; drvBrdThreadDone = false;
syncSndBrdThread = false; syncSndBrdThread = false;
ppcBrdThreadSync = NULL;
sndBrdThreadSync = NULL; sndBrdThreadSync = NULL;
drvBrdThreadSync = NULL; drvBrdThreadSync = NULL;
notifyLock = NULL; notifyLock = NULL;

View file

@ -28,6 +28,7 @@
#ifndef INCLUDED_MODEL3_H #ifndef INCLUDED_MODEL3_H
#define INCLUDED_MODEL3_H #define INCLUDED_MODEL3_H
/* /*
* CModel3Config: * CModel3Config:
* *
@ -36,7 +37,8 @@
class CModel3Config class CModel3Config
{ {
public: public:
bool multiThreaded; // Multi-threading (enabled if true) bool multiThreaded; // Multi-threaded (enabled if true)
bool gpuMultiThreaded; // Multi-threaded rendering (enabled if true)
// PowerPC clock frequency in MHz (minimum: 1 MHz) // PowerPC clock frequency in MHz (minimum: 1 MHz)
inline void SetPowerPCFrequency(unsigned f) inline void SetPowerPCFrequency(unsigned f)
@ -57,6 +59,7 @@ public:
CModel3Config(void) CModel3Config(void)
{ {
multiThreaded = true; // enable by default multiThreaded = true; // enable by default
gpuMultiThreaded = true; // enable by default
ppcFrequency = 50*1000000; // 50 MHz ppcFrequency = 50*1000000; // 50 MHz
} }
@ -313,7 +316,14 @@ public:
* *
* Flags that any paused threads should resume running. * Flags that any paused threads should resume running.
*/ */
void ResumeThreads(void); bool ResumeThreads(void);
/*
* DumpTimings(void):
*
* Prints all timings for the most recent frame to the console, for debugging purposes.
*/
void DumpTimings(void);
/* /*
* CModel3(void): * CModel3(void):
@ -342,21 +352,28 @@ private:
void WriteSystemRegister(unsigned reg, UINT8 data); void WriteSystemRegister(unsigned reg, UINT8 data);
void Patch(void); void Patch(void);
void RunMainBoardFrame(void); // Runs the main board (PPC) for a frame void RunMainBoardFrame(void); // Runs PPC main board for a frame
void SyncGPUs(void); // Sync's up GPUs in preparation for rendering - must be called when PPC is not running
void RenderFrame(void); // Renders current frame
bool RunSoundBoardFrame(void); // Runs sound board for a frame
void RunDriveBoardFrame(void); // Runs drive board for a frame
bool StartThreads(void); // Starts all threads bool StartThreads(void); // Starts all threads
void StopThreads(void); // Stops all threads void StopThreads(void); // Stops all threads
void DeleteThreadObjects(void); // Deletes all threads and synchronization objects void DeleteThreadObjects(void); // Deletes all threads and synchronization objects
static int StartSoundBoardThread(void *data); // Callback to start unsync'd sound board thread static int StartMainBoardThread(void *data); // Callback to start PPC main board thread
static int StartSoundBoardThreadSyncd(void *data); // Callback to start sync'd sound board thread static int StartSoundBoardThread(void *data); // Callback to start sound board thread (unsync'd)
static int StartDriveBoardThreadSyncd(void *data); // Callback to start sync'd drive board thread static int StartSoundBoardThreadSyncd(void *data); // Callback to start sound board thread (sync'd)
static int StartDriveBoardThread(void *data); // Callback to start drive board thread
static void AudioCallback(void *data); // Audio buffer callback static void AudioCallback(void *data); // Audio buffer callback
void WakeSoundBoardThread(void); // Used by audio callback to wake sound board thread when not sync'd with PPC thread void WakeSoundBoardThread(void); // Used by audio callback to wake sound board thread when not sync'd with PPC thread
void RunSoundBoardThread(void); // Runs sound board thread unsync'd with PPC thread, ie at full speed void RunMainBoardThread(void); // Runs PPC main board thread (sync'd in step with render thread)
void RunSoundBoardThreadSyncd(void); // Runs sound board thread sync'd in step with PPC thread void RunSoundBoardThread(void); // Runs sound board thread (unsync'd with render thread, ie at full speed)
void RunDriveBoardThreadSyncd(void); // Runs drive board thread sync'd in step with PPC thread void RunSoundBoardThreadSyncd(void); // Runs sound board thread (sync'd in step with render thread)
void RunDriveBoardThread(void); // Runs drive board thread (sync'd in step with render thread)
// Game and hardware information // Game and hardware information
const struct GameInfo *Game; const struct GameInfo *Game;
@ -397,17 +414,22 @@ private:
PPC_FETCH_REGION PPCFetchRegions[3]; PPC_FETCH_REGION PPCFetchRegions[3];
// Multiple threading // Multiple threading
bool gpusReady; // True if GPUs are ready to render
bool startedThreads; // True if threads have been created and started bool startedThreads; // True if threads have been created and started
bool pausedThreads; // True if threads are currently paused bool pausedThreads; // True if threads are currently paused
bool syncSndBrdThread; // True if sound board thread should be sync'd with PPC thread bool syncSndBrdThread; // True if sound board thread should be sync'd in step with render thread
CThread *ppcBrdThread; // PPC main board thread
CThread *sndBrdThread; // Sound board thread CThread *sndBrdThread; // Sound board thread
CThread *drvBrdThread; // Drive board thread CThread *drvBrdThread; // Drive board thread
bool ppcBrdThreadRunning; // Flag to indicate PPC main board thread is currently processing
bool ppcBrdThreadDone; // Flag to indicate PPC main board thread has finished processing
bool sndBrdThreadRunning; // Flag to indicate sound board thread is currently processing bool sndBrdThreadRunning; // Flag to indicate sound board thread is currently processing
bool sndBrdThreadDone; // Flag to indicate sound board thread has finished processing bool sndBrdThreadDone; // Flag to indicate sound board thread has finished processing
bool drvBrdThreadRunning; // Flag to indicate drive board thread is currently processing bool drvBrdThreadRunning; // Flag to indicate drive board thread is currently processing
bool drvBrdThreadDone; // Flag to indicate drive board thread has finished processing bool drvBrdThreadDone; // Flag to indicate drive board thread has finished processing
// Thread synchronization objects // Thread synchronization objects
CSemaphore *ppcBrdThreadSync;
CSemaphore *sndBrdThreadSync; CSemaphore *sndBrdThreadSync;
CMutex *sndBrdNotifyLock; CMutex *sndBrdNotifyLock;
CCondVar *sndBrdNotifySync; CCondVar *sndBrdNotifySync;
@ -427,6 +449,15 @@ private:
CSoundBoard SoundBoard; // Sound board CSoundBoard SoundBoard; // Sound board
CDSB *DSB; // Digital Sound Board (type determined dynamically at load time) CDSB *DSB; // Digital Sound Board (type determined dynamically at load time)
CDriveBoard DriveBoard; // Drive board CDriveBoard DriveBoard; // Drive board
// Frame timings
UINT32 ppcTicks;
UINT32 syncSize;
UINT32 syncTicks;
UINT32 renderTicks;
UINT32 sndTicks;
UINT32 drvTicks;
UINT32 frameTicks;
}; };

View file

@ -44,14 +44,30 @@
#include <cstring> #include <cstring>
#include "Supermodel.h" #include "Supermodel.h"
// Offsets of memory regions within Real3D memory pool // Macros that divide memory regions into pages and mark them as dirty when they are written to
#define OFFSET_8C 0 // 4 MB, culling RAM low (at 0x8C000000) #define PAGE_WIDTH 12
#define OFFSET_8E 0x400000 // 1 MB, culling RAM high (at 0x8E000000) #define PAGE_SIZE (1<<PAGE_WIDTH)
#define OFFSET_98 0x500000 // 4 MB, polygon RAM (at 0x98000000) #define DIRTY_SIZE(arraySize) (1+(arraySize-1)/(8*PAGE_SIZE))
#define OFFSET_TEXRAM 0x900000 // 8 MB, texture RAM #define MARK_DIRTY(dirtyArray, addr) dirtyArray[addr>>(PAGE_WIDTH+3)] |= 1<<((addr>>PAGE_WIDTH)&7)
#define OFFSET_TEXFIFO 0x1100000 // 1 MB, texture FIFO
#define MEMORY_POOL_SIZE (0x400000+0x100000+0x400000+0x800000+0x100000)
// Offsets of memory regions within Real3D memory pool
#define OFFSET_8C 0x0000000 // 4 MB, culling RAM low (at 0x8C000000)
#define OFFSET_8E 0x0400000 // 1 MB, culling RAM high (at 0x8E000000)
#define OFFSET_98 0x0500000 // 4 MB, polygon RAM (at 0x98000000)
#define OFFSET_TEXRAM 0x0900000 // 8 MB, texture RAM
#define OFFSET_TEXFIFO 0x1100000 // 1 MB, texture FIFO
#define MEM_POOL_SIZE_RW (0x400000+0x100000+0x400000+0x800000+0x100000)
#define OFFSET_8C_RO 0x1200000 // 4 MB, culling RAM low (at 0x8C000000) [read-only snapshot]
#define OFFSET_8E_RO 0x1600000 // 1 MB, culling RAM high (at 0x8E000000) [read-only snapshot]
#define OFFSET_98_RO 0x1700000 // 4 MB, polygon RAM (at 0x98000000) [read-only snapshot]
#define OFFSET_TEXRAM_RO 0x1B00000 // 8 MB, texture RAM [read-only snapshot]
#define MEM_POOL_SIZE_RO (0x400000+0x100000+0x400000+0x800000)
#define OFFSET_8C_DIRTY 0x2300000
#define OFFSET_8E_DIRTY (OFFSET_8C_DIRTY+DIRTY_SIZE(0x400000))
#define OFFSET_98_DIRTY (OFFSET_8E_DIRTY+DIRTY_SIZE(0x100000))
#define OFFSET_TEXRAM_DIRTY (OFFSET_98_DIRTY+DIRTY_SIZE(0x400000))
#define MEM_POOL_SIZE_DIRTY (DIRTY_SIZE(MEM_POOL_SIZE_RO))
#define MEMORY_POOL_SIZE (MEM_POOL_SIZE_RW+MEM_POOL_SIZE_RO+MEM_POOL_SIZE_DIRTY)
/****************************************************************************** /******************************************************************************
Save States Save States
@ -61,7 +77,7 @@ void CReal3D::SaveState(CBlockFile *SaveState)
{ {
SaveState->NewBlock("Real3D", __FILE__); SaveState->NewBlock("Real3D", __FILE__);
SaveState->Write(memoryPool, MEMORY_POOL_SIZE); SaveState->Write(memoryPool, MEM_POOL_SIZE_RW); // Don't write out read-only snapshots or dirty page arrays
SaveState->Write(&fifoIdx, sizeof(fifoIdx)); SaveState->Write(&fifoIdx, sizeof(fifoIdx));
SaveState->Write(&vromTextureAddr, sizeof(vromTextureAddr)); SaveState->Write(&vromTextureAddr, sizeof(vromTextureAddr));
SaveState->Write(&vromTextureHeader, sizeof(vromTextureHeader)); SaveState->Write(&vromTextureHeader, sizeof(vromTextureHeader));
@ -90,8 +106,11 @@ void CReal3D::LoadState(CBlockFile *SaveState)
return; return;
} }
SaveState->Read(memoryPool, MEMORY_POOL_SIZE); SaveState->Read(memoryPool, MEM_POOL_SIZE_RW);
Render3D->UploadTextures(0,0,2048,2048); // If multi-threaded, update read-only snapshots too
if (g_Config.gpuMultiThreaded)
UpdateSnapshots(true);
Render3D->UploadTextures(0, 0, 2048, 2048);
SaveState->Read(&fifoIdx, sizeof(fifoIdx)); SaveState->Read(&fifoIdx, sizeof(fifoIdx));
SaveState->Read(&vromTextureAddr, sizeof(vromTextureAddr)); SaveState->Read(&vromTextureAddr, sizeof(vromTextureAddr));
SaveState->Read(&vromTextureHeader, sizeof(vromTextureHeader)); SaveState->Read(&vromTextureHeader, sizeof(vromTextureHeader));
@ -117,23 +136,111 @@ void CReal3D::LoadState(CBlockFile *SaveState)
Rendering Rendering
******************************************************************************/ ******************************************************************************/
void CReal3D::RenderFrame(void) void CReal3D::BeginVBlank(void)
{ {
//if (commandPortWritten) status |= 2; // VBlank bit
Render3D->RenderFrame(); }
void CReal3D::EndVBlank(void)
{
error = false; // clear error (just needs to be done once per frame)
status &= ~2;
}
UINT32 CReal3D::SyncSnapshots(void)
{
// Update read-only copy of command port flag
commandPortWrittenRO = commandPortWritten;
commandPortWritten = false; commandPortWritten = false;
if (!g_Config.gpuMultiThreaded)
return 0;
// Update read-only queue
queuedUploadTexturesRO = queuedUploadTextures;
queuedUploadTextures.clear();
// Update read-only snapshots
return UpdateSnapshots(false);
}
UINT32 CReal3D::UpdateSnapshot(bool copyWhole, UINT8 *src, UINT8 *dst, unsigned size, UINT8 *dirty)
{
unsigned dirtySize = DIRTY_SIZE(size);
if (copyWhole)
{
// If updating whole region, then just copy all data in one go
memcpy(dst, src, size);
memset(dirty, 0, dirtySize);
return size;
}
else
{
// Otherwise, loop through dirty pages array to find out what needs to be updated and copy only those parts
UINT32 copied = 0;
UINT8 *pSrc = src;
UINT8 *pDst = dst;
for (unsigned i = 0; i < dirtySize; i++)
{
UINT8 d = dirty[i];
if (d)
{
for (unsigned j = 0; j < 8; j++)
{
if (d&1)
{
// If not at very end of region, then copy an extra 4 bytes to allow for a possible 32-bit overlap
UINT32 toCopy = (i < dirtySize - 1 || j < 7 ? PAGE_SIZE + 4 : PAGE_SIZE);
memcpy(pDst, pSrc, toCopy);
copied += toCopy;
}
d >>= 1;
pSrc += PAGE_SIZE;
pDst += PAGE_SIZE;
}
dirty[i] = 0;
}
else
{
pSrc += 8 * PAGE_SIZE;
pDst += 8 * PAGE_SIZE;
}
}
return copied;
}
}
UINT32 CReal3D::UpdateSnapshots(bool copyWhole)
{
// Update all memory region snapshots
UINT32 cullLoCopied = UpdateSnapshot(copyWhole, (UINT8*)cullingRAMLo, (UINT8*)cullingRAMLoRO, 0x400000, cullingRAMLoDirty);
UINT32 cullHiCopied = UpdateSnapshot(copyWhole, (UINT8*)cullingRAMHi, (UINT8*)cullingRAMHiRO, 0x100000, cullingRAMHiDirty);
UINT32 polyCopied = UpdateSnapshot(copyWhole, (UINT8*)polyRAM, (UINT8*)polyRAMRO, 0x400000, polyRAMDirty);
UINT32 textureCopied = UpdateSnapshot(copyWhole, (UINT8*)textureRAM, (UINT8*)textureRAMRO, 0x800000, textureRAMDirty);
//printf("Read3D copied - cullLo:%4uK, cullHi:%4uK, poly:%4uK, texture:%4uK\n", cullLoCopied / 1024, cullHiCopied / 1024, polyCopied / 1024, textureCopied / 1024);
return cullLoCopied + cullHiCopied + polyCopied + textureCopied;
} }
void CReal3D::BeginFrame(void) void CReal3D::BeginFrame(void)
{ {
status |= 2; // VBlank bit // If multi-threaded, perform now any queued texture uploads to renderer before rendering begins
if (g_Config.gpuMultiThreaded)
{
for (vector<QueuedUploadTextures>::iterator it = queuedUploadTexturesRO.begin(), end = queuedUploadTexturesRO.end(); it != end; it++)
Render3D->UploadTextures(it->x, it->y, it->width, it->height);
}
Render3D->BeginFrame(); Render3D->BeginFrame();
} }
void CReal3D::RenderFrame(void)
{
//if (commandPortWrittenRO)
Render3D->RenderFrame();
}
void CReal3D::EndFrame(void) void CReal3D::EndFrame(void)
{ {
error = false; // clear error (just needs to be done once per frame)
status &= ~2;
Render3D->EndFrame(); Render3D->EndFrame();
} }
@ -528,7 +635,12 @@ void CReal3D::StoreTexture(unsigned xPos, unsigned yPos, unsigned width, unsigne
for (yy = 0; yy < 8; yy++) for (yy = 0; yy < 8; yy++)
{ {
for (xx = 0; xx < 8; xx++) for (xx = 0; xx < 8; xx++)
{
if (g_Config.gpuMultiThreaded)
MARK_DIRTY(textureRAMDirty, destOffset * 2);
textureRAM[destOffset++] = texData[decode[(yy*8+xx)^1]]; textureRAM[destOffset++] = texData[decode[(yy*8+xx)^1]];
}
destOffset += 2048-8; // next line destOffset += 2048-8; // next line
} }
texData += 8*8; // next tile texData += 8*8; // next tile
@ -554,7 +666,11 @@ void CReal3D::StoreTexture(unsigned xPos, unsigned yPos, unsigned width, unsigne
{ {
for (xx = 0; xx < 8; xx += 2) for (xx = 0; xx < 8; xx += 2)
{ {
if (g_Config.gpuMultiThreaded)
MARK_DIRTY(textureRAMDirty, destOffset * 2);
textureRAM[destOffset++] = texData[decode[(yy^1)*8+((xx+0)^1)]/2]>>8; textureRAM[destOffset++] = texData[decode[(yy^1)*8+((xx+0)^1)]/2]>>8;
if (g_Config.gpuMultiThreaded)
MARK_DIRTY(textureRAMDirty, destOffset * 2);
textureRAM[destOffset++] = texData[decode[(yy^1)*8+((xx+1)^1)]/2]&0xFF; textureRAM[destOffset++] = texData[decode[(yy^1)*8+((xx+1)^1)]/2]&0xFF;
} }
@ -564,6 +680,21 @@ void CReal3D::StoreTexture(unsigned xPos, unsigned yPos, unsigned width, unsigne
} }
} }
} }
// Signal to renderer that textures have changed
// TO-DO: mipmaps? What if a game writes non-mipmap textures to mipmap area?
if (g_Config.gpuMultiThreaded)
{
// If multi-threaded, then queue calls to UploadTextures for render thread to perform at beginning of next frame
QueuedUploadTextures upl;
upl.x = xPos;
upl.y = yPos;
upl.width = width;
upl.height = height;
queuedUploadTextures.push_back(upl);
}
else
Render3D->UploadTextures(xPos, yPos, width, height);
} }
// Texture data will be in little endian format // Texture data will be in little endian format
@ -651,11 +782,6 @@ void CReal3D::UploadTexture(UINT32 header, UINT16 *texData)
//printf("unknown texture format %02X\n", header>>24); //printf("unknown texture format %02X\n", header>>24);
break; break;
} }
// Signal to renderer that textures have changed
// TO-DO: mipmaps? What if a game writes non-mipmap textures to mipmap area?
//Render3D->UploadTextures(x,y,width,height);
Render3D->UploadTextures(0,0,2048,2048); // TO-DO: should not have to upload all 2048x2048 texels
} }
@ -736,16 +862,22 @@ void CReal3D::WriteTexturePort(unsigned reg, UINT32 data)
void CReal3D::WriteLowCullingRAM(UINT32 addr, UINT32 data) void CReal3D::WriteLowCullingRAM(UINT32 addr, UINT32 data)
{ {
if (g_Config.gpuMultiThreaded)
MARK_DIRTY(cullingRAMLoDirty, addr);
cullingRAMLo[addr/4] = data; cullingRAMLo[addr/4] = data;
} }
void CReal3D::WriteHighCullingRAM(UINT32 addr, UINT32 data) void CReal3D::WriteHighCullingRAM(UINT32 addr, UINT32 data)
{ {
if (g_Config.gpuMultiThreaded)
MARK_DIRTY(cullingRAMHiDirty, addr);
cullingRAMHi[addr/4] = data; cullingRAMHi[addr/4] = data;
} }
void CReal3D::WritePolygonRAM(UINT32 addr, UINT32 data) void CReal3D::WritePolygonRAM(UINT32 addr, UINT32 data)
{ {
if (g_Config.gpuMultiThreaded)
MARK_DIRTY(polyRAMDirty, addr);
polyRAM[addr/4] = data; polyRAM[addr/4] = data;
} }
@ -807,7 +939,11 @@ void CReal3D::Reset(void)
error = false; error = false;
commandPortWritten = false; commandPortWritten = false;
commandPortWrittenRO = false;
queuedUploadTextures.clear();
queuedUploadTexturesRO.clear();
fifoIdx = 0; fifoIdx = 0;
status = 0; status = 0;
vromTextureAddr = 0; vromTextureAddr = 0;
@ -817,8 +953,9 @@ void CReal3D::Reset(void)
dmaStatus = 0; dmaStatus = 0;
dmaUnknownReg = 0; dmaUnknownReg = 0;
memset(memoryPool, 0, MEMORY_POOL_SIZE); unsigned memSize = (g_Config.gpuMultiThreaded ? MEMORY_POOL_SIZE : MEM_POOL_SIZE_RW);
memset(memoryPool, 0, memSize);
DebugLog("Real3D reset\n"); DebugLog("Real3D reset\n");
} }
@ -830,8 +967,15 @@ void CReal3D::Reset(void)
void CReal3D::AttachRenderer(CRender3D *Render3DPtr) void CReal3D::AttachRenderer(CRender3D *Render3DPtr)
{ {
Render3D = Render3DPtr; Render3D = Render3DPtr;
Render3D->AttachMemory(cullingRAMLo,cullingRAMHi,polyRAM,vrom,textureRAM);
// If multi-threaded, attach read-only snapshots to renderer instead of real ones
if (g_Config.gpuMultiThreaded)
Render3D->AttachMemory(cullingRAMLoRO, cullingRAMHiRO, polyRAMRO, vrom, textureRAMRO);
else
Render3D->AttachMemory(cullingRAMLo, cullingRAMHi, polyRAM, vrom, textureRAM);
Render3D->SetStep(step); Render3D->SetStep(step);
DebugLog("Real3D attached a Render3D object\n"); DebugLog("Real3D attached a Render3D object\n");
} }
@ -859,7 +1003,8 @@ void CReal3D::SetStep(int stepID)
bool CReal3D::Init(const UINT8 *vromPtr, CBus *BusObjectPtr, CIRQ *IRQObjectPtr, unsigned dmaIRQBit) bool CReal3D::Init(const UINT8 *vromPtr, CBus *BusObjectPtr, CIRQ *IRQObjectPtr, unsigned dmaIRQBit)
{ {
float memSizeMB = (float)MEMORY_POOL_SIZE/(float)0x100000; unsigned memSize = (g_Config.gpuMultiThreaded ? MEMORY_POOL_SIZE : MEM_POOL_SIZE_RW);
float memSizeMB = (float)memSize/(float)0x100000;
// IRQ and bus objects // IRQ and bus objects
Bus = BusObjectPtr; Bus = BusObjectPtr;
@ -867,20 +1012,34 @@ bool CReal3D::Init(const UINT8 *vromPtr, CBus *BusObjectPtr, CIRQ *IRQObjectPtr,
dmaIRQ = dmaIRQBit; dmaIRQ = dmaIRQBit;
// Allocate all Real3D RAM regions // Allocate all Real3D RAM regions
memoryPool = new(std::nothrow) UINT8[MEMORY_POOL_SIZE]; memoryPool = new(std::nothrow) UINT8[memSize];
if (NULL == memoryPool) if (NULL == memoryPool)
return ErrorLog("Insufficient memory for Real3D object (needs %1.1f MB).", memSizeMB); return ErrorLog("Insufficient memory for Real3D object (needs %1.1f MB).", memSizeMB);
// Set up pointers // Set up main pointers
cullingRAMLo = (UINT32 *) &memoryPool[OFFSET_8C]; cullingRAMLo = (UINT32 *) &memoryPool[OFFSET_8C];
cullingRAMHi = (UINT32 *) &memoryPool[OFFSET_8E]; cullingRAMHi = (UINT32 *) &memoryPool[OFFSET_8E];
polyRAM = (UINT32 *) &memoryPool[OFFSET_98]; polyRAM = (UINT32 *) &memoryPool[OFFSET_98];
textureRAM = (UINT16 *) &memoryPool[OFFSET_TEXRAM]; textureRAM = (UINT16 *) &memoryPool[OFFSET_TEXRAM];
textureFIFO = (UINT32 *) &memoryPool[OFFSET_TEXFIFO]; textureFIFO = (UINT32 *) &memoryPool[OFFSET_TEXFIFO];
// If multi-threaded, set up pointers for read-only snapshots and dirty page arrays too
if (g_Config.gpuMultiThreaded)
{
cullingRAMLoRO = (UINT32 *) &memoryPool[OFFSET_8C_RO];
cullingRAMHiRO = (UINT32 *) &memoryPool[OFFSET_8E_RO];
polyRAMRO = (UINT32 *) &memoryPool[OFFSET_98_RO];
textureRAMRO = (UINT16 *) &memoryPool[OFFSET_TEXRAM_RO];
cullingRAMLoDirty = (UINT8 *) &memoryPool[OFFSET_8C_DIRTY];
cullingRAMHiDirty = (UINT8 *) &memoryPool[OFFSET_8E_DIRTY];
polyRAMDirty = (UINT8 *) &memoryPool[OFFSET_98_DIRTY];
textureRAMDirty = (UINT8 *) &memoryPool[OFFSET_TEXRAM_DIRTY];
}
// VROM pointer passed to us // VROM pointer passed to us
vrom = (UINT32 *) vromPtr; vrom = (UINT32 *) vromPtr;
DebugLog("Initialized Real3D (allocated %1.1f MB)\n", memSizeMB);
return OKAY; return OKAY;
} }

View file

@ -29,6 +29,20 @@
#ifndef INCLUDED_REAL3D_H #ifndef INCLUDED_REAL3D_H
#define INCLUDED_REAL3D_H #define INCLUDED_REAL3D_H
/*
* QueuedUploadTextures:
*
* When rendering is multi-threaded, this struct is used to represent a postponed
* call to CRender3D::UploadTextures that will be performed by the render thread
* at the beginning of the next frame, rather than directly in the PPC thread.
*/
struct QueuedUploadTextures
{
unsigned x;
unsigned y;
unsigned width;
unsigned height;
};
/* /*
* CReal3D: * CReal3D:
@ -60,28 +74,56 @@ public:
* SaveState Block file to load state information from. * SaveState Block file to load state information from.
*/ */
void LoadState(CBlockFile *SaveState); void LoadState(CBlockFile *SaveState);
/*
* BeginVBlank(void):
*
* Must be called before the VBlank starts.
*/
void BeginVBlank(void);
/* /*
* RenderFrame(void): * EndVBlank(void)
* *
* Traverses the scene database and renders a frame. Must be called after * Must be called after the VBlank finishes.
* BeginFrame() but before EndFrame().
*/ */
void RenderFrame(void); void EndVBlank(void);
/*
* SyncSnapshots(void):
*
* Syncs the read-only memory snapshots with the real ones so that rendering
* of the current frame can begin in the render thread. Must be called at the
* end of each frame when both the render thread and the PPC thread have finished
* their work. If multi-threaded rendering is not enabled, then this method does
* nothing.
*/
UINT32 SyncSnapshots(void);
/* /*
* BeginFrame(void): * BeginFrame(void):
* *
* Prepare to render a new frame. Must be called once per frame prior to * Prepares to render a new frame. Must be called once per frame prior to
* drawing anything. * drawing anything and must only access read-only snapshots and variables
* since it may be running in a separate thread.
*/ */
void BeginFrame(void); void BeginFrame(void);
/*
* RenderFrame(void):
*
* Traverses the scene database and renders a frame. Must be called after
* BeginFrame() but before EndFrame() and must only access read-only snapshots
* and variables since it may be running in a separate thread.
*/
void RenderFrame(void);
/* /*
* EndFrame(void): * EndFrame(void):
* *
* Signals the end of rendering for this frame. Must be called last during * Signals the end of rendering for this frame. Must be called last during
* the frame. * the frame and must only access read-only snapshots and variables since it
* may be running in a separate thread.
*/ */
void EndFrame(void); void EndFrame(void);
@ -342,7 +384,9 @@ private:
unsigned Shift(UINT8 *data, unsigned numBits); unsigned Shift(UINT8 *data, unsigned numBits);
void StoreTexture(unsigned xPos, unsigned yPos, unsigned width, unsigned height, UINT16 *texData, unsigned bytesPerTexel); void StoreTexture(unsigned xPos, unsigned yPos, unsigned width, unsigned height, UINT16 *texData, unsigned bytesPerTexel);
void UploadTexture(UINT32 header, UINT16 *texData); void UploadTexture(UINT32 header, UINT16 *texData);
UINT32 UpdateSnapshots(bool copyWhole);
UINT32 UpdateSnapshot(bool copyWhole, UINT8 *src, UINT8 *dst, unsigned size, UINT8 *dirty);
// Renderer attached to the Real3D // Renderer attached to the Real3D
CRender3D *Render3D; CRender3D *Render3D;
@ -353,18 +397,34 @@ private:
// Error flag (to limit errors to once per frame) // Error flag (to limit errors to once per frame)
bool error; // true if an error occurred this frame bool error; // true if an error occurred this frame
// Real3D memory // Real3D memory
UINT8 *memoryPool; // all memory allocated here UINT8 *memoryPool; // all memory allocated here
UINT32 *cullingRAMLo; // 4MB of culling RAM at 8C000000 UINT32 *cullingRAMLo; // 4MB of culling RAM at 8C000000
UINT32 *cullingRAMHi; // 1MB of culling RAM at 8E000000 UINT32 *cullingRAMHi; // 1MB of culling RAM at 8E000000
UINT32 *polyRAM; // 4MB of polygon RAM at 98000000 UINT32 *polyRAM; // 4MB of polygon RAM at 98000000
UINT16 *textureRAM; // 8MB of internal texture RAM UINT16 *textureRAM; // 8MB of internal texture RAM
UINT32 *textureFIFO; // 1MB texture FIFO at 0x94000000 UINT32 *textureFIFO; // 1MB texture FIFO at 0x94000000
unsigned fifoIdx; // index into texture FIFO unsigned fifoIdx; // index into texture FIFO
UINT32 vromTextureAddr; // VROM texture port address data UINT32 vromTextureAddr; // VROM texture port address data
UINT32 vromTextureHeader; // VROM texture port header data UINT32 vromTextureHeader; // VROM texture port header data
// Read-only snapshots
UINT32 *cullingRAMLoRO; // 4MB of culling RAM at 8C000000 [read-only snapshot]
UINT32 *cullingRAMHiRO; // 1MB of culling RAM at 8E000000 [read-only snapshot]
UINT32 *polyRAMRO; // 4MB of polygon RAM at 98000000 [read-only snapshot]
UINT16 *textureRAMRO; // 8MB of internal texture RAM [read-only snapshot]
// Arrays to keep track of dirty pages in memory regions
UINT8 *cullingRAMLoDirty;
UINT8 *cullingRAMHiDirty;
UINT8 *polyRAMDirty;
UINT8 *textureRAMDirty;
// Queued texture uploads
vector<QueuedUploadTextures> queuedUploadTextures;
vector<QueuedUploadTextures> queuedUploadTexturesRO; // Read-only copy of queue
// Big endian bus object for DMA memory access // Big endian bus object for DMA memory access
CBus *Bus; CBus *Bus;
@ -383,6 +443,7 @@ private:
// Command port // Command port
bool commandPortWritten; bool commandPortWritten;
bool commandPortWrittenRO; // Read-only copy of flag
// Status and command registers // Status and command registers
UINT32 status; UINT32 status;
@ -394,7 +455,6 @@ private:
unsigned tapIDSize; // size of ID data in bits unsigned tapIDSize; // size of ID data in bits
unsigned tapTDO; // bit shifted out to TDO unsigned tapTDO; // bit shifted out to TDO
int tapState; // current state int tapState; // current state
}; };

View file

@ -34,6 +34,23 @@
#include <cstring> #include <cstring>
#include "Supermodel.h" #include "Supermodel.h"
// Macros that divide memory regions into pages and mark them as dirty when they are written to
#define PAGE_WIDTH 10
#define PAGE_SIZE (1<<PAGE_WIDTH)
#define DIRTY_SIZE(arraySize) (1+(arraySize-1)/(8*PAGE_SIZE))
#define MARK_DIRTY(dirtyArray, addr) dirtyArray[addr>>(PAGE_WIDTH+3)] |= 1<<((addr>>PAGE_WIDTH)&7)
// Offsets of memory regions within TileGen memory pool
#define OFFSET_VRAM 0x000000
#define OFFSET_PAL 0x120000
#define MEM_POOL_SIZE_RW (0x120000+0x020000)
#define OFFSET_VRAM_RO 0x140000 // [read-only snapshot]
#define OFFSET_PAL_RO 0x260000 // [read-only snapshot]
#define MEM_POOL_SIZE_RO (0x120000+0x020000)
#define OFFSET_VRAM_DIRTY 0x280000
#define OFFSET_PAL_DIRTY (OFFSET_VRAM_DIRTY+DIRTY_SIZE(0x120000))
#define MEM_POOL_SIZE_DIRTY (DIRTY_SIZE(MEM_POOL_SIZE_RO))
#define MEMORY_POOL_SIZE (MEM_POOL_SIZE_RW+MEM_POOL_SIZE_RO+MEM_POOL_SIZE_DIRTY)
/****************************************************************************** /******************************************************************************
Save States Save States
@ -42,7 +59,7 @@
void CTileGen::SaveState(CBlockFile *SaveState) void CTileGen::SaveState(CBlockFile *SaveState)
{ {
SaveState->NewBlock("Tile Generator", __FILE__); SaveState->NewBlock("Tile Generator", __FILE__);
SaveState->Write(memoryPool, 0x100000+0x20000); SaveState->Write(vram, 0x120000); // Don't write out palette, read-only snapshots or dirty page arrays, just VRAM
SaveState->Write(regs, sizeof(regs)); SaveState->Write(regs, sizeof(regs));
} }
@ -55,16 +72,17 @@ void CTileGen::LoadState(CBlockFile *SaveState)
} }
// Load memory one word at a time // Load memory one word at a time
for (int i = 0; i < (0x100000+0x20000); i += 4) for (int i = 0; i < 0x120000; i += 4)
{ {
UINT32 data; UINT32 data;
SaveState->Read(&data, sizeof(data));
Render2D->WriteVRAM(i, data);
*(UINT32 *) &memoryPool[i] = data;
}
SaveState->Read(&data, sizeof(data));
WriteRAM(i, data);
}
SaveState->Read(regs, sizeof(regs)); SaveState->Read(regs, sizeof(regs));
// If multi-threaded, update read-only snapshots too
if (g_Config.gpuMultiThreaded)
UpdateSnapshots(true);
} }
@ -72,9 +90,8 @@ void CTileGen::LoadState(CBlockFile *SaveState)
Rendering Rendering
******************************************************************************/ ******************************************************************************/
void CTileGen::BeginFrame(void) void CTileGen::BeginVBlank(void)
{ {
Render2D->BeginFrame();
/* /*
printf("08: %X\n", regs[0x08/4]); printf("08: %X\n", regs[0x08/4]);
printf("0C: %X\n", regs[0x0C/4]); printf("0C: %X\n", regs[0x0C/4]);
@ -88,25 +105,144 @@ void CTileGen::BeginFrame(void)
*/ */
} }
void CTileGen::EndVBlank(void)
{
//
}
UINT32 CTileGen::SyncSnapshots(void)
{
if (!g_Config.gpuMultiThreaded)
return 0;
// Update read-only snapshots
return UpdateSnapshots(false);
}
UINT32 CTileGen::UpdateSnapshot(bool copyWhole, UINT8 *src, UINT8 *dst, unsigned size, UINT8 *dirty)
{
unsigned dirtySize = DIRTY_SIZE(size);
if (copyWhole)
{
// If updating whole region, then just copy all data in one go
memcpy(dst, src, size);
memset(dirty, 0, dirtySize);
return size;
}
else
{
// Otherwise, loop through dirty pages array to find out what needs to be updated and copy only those parts
UINT32 copied = 0;
UINT8 *pSrc = src;
UINT8 *pDst = dst;
for (unsigned i = 0; i < dirtySize; i++)
{
UINT8 d = dirty[i];
if (d)
{
for (unsigned j = 0; j < 8; j++)
{
if (d&1)
{
// If not at very end of region, then copy an extra 4 bytes to allow for a possible 32-bit overlap
UINT32 toCopy = (i < dirtySize - 1 || j < 7 ? PAGE_SIZE + 4 : PAGE_SIZE);
memcpy(pDst, pSrc, toCopy);
copied += toCopy;
}
d >>= 1;
pSrc += PAGE_SIZE;
pDst += PAGE_SIZE;
}
dirty[i] = 0;
}
else
{
pSrc += 8 * PAGE_SIZE;
pDst += 8 * PAGE_SIZE;
}
}
return copied;
}
}
UINT32 CTileGen::UpdateSnapshots(bool copyWhole)
{
// Update all memory region snapshots
UINT32 palCopied = UpdateSnapshot(copyWhole, (UINT8*)pal, (UINT8*)palRO, 0x020000, palDirty);
UINT32 vramCopied = UpdateSnapshot(copyWhole, (UINT8*)vram, (UINT8*)vramRO, 0x120000, vramDirty);
memcpy(regsRO, regs, sizeof(regs)); // Always copy whole of regs buffer
//printf("TileGen copied - pal:%4uK, vram:%4uK, regs:%uK\n", palCopied / 1024, vramCopied / 1024, sizeof(regs) / 1024);
return palCopied + vramCopied + sizeof(regs);
}
void CTileGen::BeginFrame(void)
{
// NOTE: Render2D->WriteVRAM(addr, data) is no longer being called for RAM addresses that are written
// to and instead this class relies upon the fact that Render2D currently marks everything as dirty
// with every frame. If this were to change in the future then code to handle marking the correct
// parts of the renderer as dirty would need to be added here.
Render2D->BeginFrame();
}
void CTileGen::EndFrame(void) void CTileGen::EndFrame(void)
{ {
Render2D->EndFrame(); Render2D->EndFrame();
} }
/****************************************************************************** /******************************************************************************
Emulation Functions Emulation Functions
******************************************************************************/ ******************************************************************************/
UINT32 CTileGen::ReadRAM(unsigned addr) UINT32 CTileGen::ReadRAM(unsigned addr)
{ {
return *(UINT32 *) &memoryPool[addr]; return *(UINT32 *) &vram[addr];
} }
void CTileGen::WriteRAM(unsigned addr, UINT32 data) void CTileGen::WriteRAM(unsigned addr, UINT32 data)
{ {
Render2D->WriteVRAM(addr,data); // inform renderer of update first if (g_Config.gpuMultiThreaded)
*(UINT32 *) &memoryPool[addr] = data; MARK_DIRTY(vramDirty, addr);
*(UINT32 *) &vram[addr] = data;
// Update palette if required
if (addr >= 0x100000)
{
addr -= 0x100000;
unsigned color = addr/4; // color index
if (g_Config.gpuMultiThreaded)
MARK_DIRTY(palDirty, addr);
WritePalette(color, data);
}
}
void CTileGen::InitPalette(void)
{
for (int i = 0; i < 0x20000/4; i++)
{
WritePalette(i, vram[0x100000/4 + i]);
if (g_Config.gpuMultiThreaded)
palRO[i] = pal[i];
}
}
void CTileGen::WritePalette(unsigned color, UINT32 data)
{
UINT8 r, g, b, a;
a = 0xFF * ((data>>15)&1); // decode the RGBA (make alpha 0xFF or 0x00)
a = ~a; // invert it (set on Model 3 means clear pixel)
if ((data&0x8000))
r = g = b = 0;
else
{
b = (data>>7)&0xF8;
g = (data>>2)&0xF8;
r = (data<<3)&0xF8;
}
pal[color] = (a<<24)|(b<<16)|(g<<8)|r;
} }
void CTileGen::WriteRegister(unsigned reg, UINT32 data) void CTileGen::WriteRegister(unsigned reg, UINT32 data)
@ -136,8 +272,13 @@ void CTileGen::WriteRegister(unsigned reg, UINT32 data)
void CTileGen::Reset(void) void CTileGen::Reset(void)
{ {
unsigned memSize = (g_Config.gpuMultiThreaded ? MEMORY_POOL_SIZE : MEM_POOL_SIZE_RW);
memset(memoryPool, 0, memSize);
memset(regs, 0, sizeof(regs)); memset(regs, 0, sizeof(regs));
memset(memoryPool, 0, 0x120000); memset(regsRO, 0, sizeof(regsRO));
InitPalette();
DebugLog("Tile Generator reset\n"); DebugLog("Tile Generator reset\n");
} }
@ -149,22 +290,48 @@ void CTileGen::Reset(void)
void CTileGen::AttachRenderer(CRender2D *Render2DPtr) void CTileGen::AttachRenderer(CRender2D *Render2DPtr)
{ {
Render2D = Render2DPtr; Render2D = Render2DPtr;
Render2D->AttachVRAM(memoryPool);
Render2D->AttachRegisters(regs); // If multi-threaded, attach read-only snapshots to renderer instead of real ones
if (g_Config.gpuMultiThreaded)
{
Render2D->AttachVRAM(vramRO);
Render2D->AttachPalette(palRO);
Render2D->AttachRegisters(regsRO);
}
else
{
Render2D->AttachVRAM(vram);
Render2D->AttachPalette(pal);
Render2D->AttachRegisters(regs);
}
DebugLog("Tile Generator attached a Render2D object\n"); DebugLog("Tile Generator attached a Render2D object\n");
} }
#define MEMORY_POOL_SIZE 0x120000
bool CTileGen::Init(CIRQ *IRQObjectPtr) bool CTileGen::Init(CIRQ *IRQObjectPtr)
{ {
float memSizeMB = (float)MEMORY_POOL_SIZE/(float)0x100000; unsigned memSize = (g_Config.gpuMultiThreaded ? MEMORY_POOL_SIZE : MEM_POOL_SIZE_RW);
float memSizeMB = (float)memSize/(float)0x100000;
// Allocate all memory for ROMs and PPC RAM // Allocate all memory for all TileGen RAM regions
memoryPool = new(std::nothrow) UINT8[MEMORY_POOL_SIZE]; memoryPool = new(std::nothrow) UINT8[memSize];
if (NULL == memoryPool) if (NULL == memoryPool)
return ErrorLog("Insufficient memory for tile generator object (needs %1.1f MB).", memSizeMB); return ErrorLog("Insufficient memory for tile generator object (needs %1.1f MB).", memSizeMB);
// Set up main pointers
vram = (UINT8 *) &memoryPool[OFFSET_VRAM];
pal = (UINT32 *) &memoryPool[OFFSET_PAL];
// If multi-threaded, set up pointers for read-only snapshots and dirty page arrays too
if (g_Config.gpuMultiThreaded)
{
vramRO = (UINT8 *) &memoryPool[OFFSET_VRAM_RO];
palRO = (UINT32 *) &memoryPool[OFFSET_PAL_RO];
vramDirty = (UINT8 *) &memoryPool[OFFSET_VRAM_DIRTY];
palDirty = (UINT8 *) &memoryPool[OFFSET_PAL_DIRTY];
}
// Hook up the IRQ controller // Hook up the IRQ controller
IRQ = IRQObjectPtr; IRQ = IRQObjectPtr;

View file

@ -58,22 +58,49 @@ public:
*/ */
void LoadState(CBlockFile *SaveState); void LoadState(CBlockFile *SaveState);
/*
* BeginVBlank(void):
*
* Must be called before the VBlank starts.
*/
void BeginVBlank(void);
/*
* EndVBlank(void)
*
* Must be called after the VBlank finishes.
*/
void EndVBlank(void);
/*
* SyncSnapshots(void):
*
* Syncs the read-only memory snapshots with the real ones so that rendering
* of the current frame can begin in the render thread. Must be called at the
* end of each frame when both the render thread and the PPC thread have finished
* their work. If multi-threaded rendering is not enabled, then this method does
* nothing.
*/
UINT32 SyncSnapshots(void);
/* /*
* BeginFrame(void): * BeginFrame(void):
* *
* Prepare to render a new frame. Must be called once per frame prior to * Prepares to render a new frame. Must be called once per frame prior to
* drawing anything. * drawing anything and must only access read-only snapshots and variables
* since it may be running in a separate thread.
*/ */
void BeginFrame(void); void BeginFrame(void);
/* /*
* EndFrame(void): * EndFrame(void):
* *
* Signals the end of rendering for this frame. Must be called last during * Signals the end of rendering for this frame. Must be called last during
* the frame. * the frame and must only access read-only snapshots and variables since it
* may be running in a separate thread.
*/ */
void EndFrame(void); void EndFrame(void);
/* /*
* ReadRAM(addr): * ReadRAM(addr):
* *
@ -163,14 +190,32 @@ public:
~CTileGen(void); ~CTileGen(void);
private: private:
// Private member functions
void InitPalette(void);
void WritePalette(unsigned color, UINT32 data);
UINT32 UpdateSnapshots(bool copyWhole);
UINT32 UpdateSnapshot(bool copyWhole, UINT8 *src, UINT8 *dst, unsigned size, UINT8 *dirty);
CIRQ *IRQ; // IRQ controller the tile generator is attached to CIRQ *IRQ; // IRQ controller the tile generator is attached to
CRender2D *Render2D; // 2D renderer the tile generator is attached to CRender2D *Render2D; // 2D renderer the tile generator is attached to
// Tile generator VRAM // Tile generator VRAM
UINT8 *memoryPool; // all memory allocated here UINT8 *memoryPool; // all memory allocated here
UINT8 *vram; // 1.8MB of VRAM
UINT32 *pal; // 0x20000 byte (32K colors) palette
// Read-only snapshots
UINT8 *vramRO; // 1.8MB of VRAM [read-only snapshot]
UINT32 *palRO; // 0x20000 byte (32K colors) palette [read-only snapshot]
// Arrays to keep track of dirty pages in memory regions
UINT8 *vramDirty;
UINT8 *palDirty;
// Registers // Registers
UINT32 regs[64]; UINT32 regs[64];
UINT32 regsRO[64]; // Read-only copy of registers
}; };

View file

@ -360,6 +360,8 @@ static void ApplySettings(CINIFile *INI, const char *section)
// Model 3 // Model 3
if (OKAY == INI->Get(section, "MultiThreaded", x)) if (OKAY == INI->Get(section, "MultiThreaded", x))
g_Config.multiThreaded = x ? true : false; g_Config.multiThreaded = x ? true : false;
if (OKAY == INI->Get(section, "GPUMultiThreaded", x))
g_Config.gpuMultiThreaded = x ? true : false;
if (OKAY == INI->Get(section, "PowerPCFrequency", x)) if (OKAY == INI->Get(section, "PowerPCFrequency", x))
g_Config.SetPowerPCFrequency(x); g_Config.SetPowerPCFrequency(x);
@ -381,7 +383,7 @@ static void ApplySettings(CINIFile *INI, const char *section)
#ifdef SUPERMODEL_WIN32 #ifdef SUPERMODEL_WIN32
if (OKAY == INI->Get(section, "ForceFeedback", x)) if (OKAY == INI->Get(section, "ForceFeedback", x))
g_Config.forceFeedback = x ? true : false; g_Config.forceFeedback = x ? true : false;
#endif #endif // SUPERMODEL_WIN32
// OSD // OSD
INI->Get(section, "XResolution", g_Config.xRes); INI->Get(section, "XResolution", g_Config.xRes);
@ -457,6 +459,7 @@ static void LogConfig(void)
// CModel3Config // CModel3Config
InfoLog("\tMultiThreaded = %d", g_Config.multiThreaded); InfoLog("\tMultiThreaded = %d", g_Config.multiThreaded);
InfoLog("\tGPUMultiThreaded = %d", g_Config.gpuMultiThreaded);
InfoLog("\tPowerPCFrequency = %d", g_Config.GetPowerPCFrequency()); InfoLog("\tPowerPCFrequency = %d", g_Config.GetPowerPCFrequency());
// CSoundBoardConfig // CSoundBoardConfig
@ -709,8 +712,9 @@ int Supermodel(const char *zipFile, CInputs *Inputs, CINIFile *CmdLine)
unsigned fpsFramesElapsed, framesElapsed; unsigned fpsFramesElapsed, framesElapsed;
unsigned showCrosshairs = 0; // bit 1: player 1 crosshair, bit 0: player 2 unsigned showCrosshairs = 0; // bit 1: player 1 crosshair, bit 0: player 2
bool gameHasLightguns = false; bool gameHasLightguns = false;
bool quit = 0; bool quit = false;
bool paused = 0; bool paused = false;
bool dumpTimings = false;
// Initialize and load ROMs // Initialize and load ROMs
if (OKAY != Model3->Init()) if (OKAY != Model3->Init())
@ -791,7 +795,7 @@ int Supermodel(const char *zipFile, CInputs *Inputs, CINIFile *CmdLine)
// Poll the inputs // Poll the inputs
if (!Inputs->Poll(Model3->GetGameInfo(), xOffset, yOffset, xRes, yRes)) if (!Inputs->Poll(Model3->GetGameInfo(), xOffset, yOffset, xRes, yRes))
quit = 1; quit = true;
#ifdef SUPERMODEL_DEBUGGER #ifdef SUPERMODEL_DEBUGGER
bool processUI = true; bool processUI = true;
@ -802,12 +806,12 @@ int Supermodel(const char *zipFile, CInputs *Inputs, CINIFile *CmdLine)
// Check if debugger requests exit or pause // Check if debugger requests exit or pause
if (Debugger->CheckExit()) if (Debugger->CheckExit())
{ {
quit = 1; quit = true;
processUI = false; processUI = false;
} }
else if (Debugger->CheckPause()) else if (Debugger->CheckPause())
{ {
paused = 1; paused = true;
processUI = false; processUI = false;
} }
} }
@ -819,7 +823,7 @@ int Supermodel(const char *zipFile, CInputs *Inputs, CINIFile *CmdLine)
if (Inputs->uiExit->Pressed()) if (Inputs->uiExit->Pressed())
{ {
// Quit emulator // Quit emulator
quit = 1; quit = true;
} }
else if (Inputs->uiReset->Pressed()) else if (Inputs->uiReset->Pressed())
{ {
@ -979,9 +983,12 @@ int Supermodel(const char *zipFile, CInputs *Inputs, CINIFile *CmdLine)
// Dump input states // Dump input states
Inputs->DumpState(Model3->GetGameInfo()); Inputs->DumpState(Model3->GetGameInfo());
} }
else if (Inputs->uiDumpTimings->Pressed())
{
dumpTimings = !dumpTimings;
}
else if (Inputs->uiSelectCrosshairs->Pressed() && gameHasLightguns) else if (Inputs->uiSelectCrosshairs->Pressed() && gameHasLightguns)
{ {
showCrosshairs++; showCrosshairs++;
switch ((showCrosshairs&3)) switch ((showCrosshairs&3))
{ {
@ -1042,6 +1049,9 @@ int Supermodel(const char *zipFile, CInputs *Inputs, CINIFile *CmdLine)
startTicks = currentTicks; startTicks = currentTicks;
} }
} }
if (dumpTimings && !paused)
Model3->DumpTimings();
} }
// Make sure all threads are paused before shutting down // Make sure all threads are paused before shutting down