New multi-threaded rendering changes that parallelise graphics rendering and PPC execution in order to increase performance on multi-core machines.

New gpuMultiThreaded config option to enable/disable multi-threaded rendering (enabled by default, disabling it reverts to previous behaviour).
Other rendering optimisations:
 - texture uploads now only affect appropriate region in the texture sheet, rather than uploading the whole sheet each time
 - performance of clearing the model caches has been improved
New Alt+O key input added to toggle outputting of frame timings for debugging purposes.
This commit is contained in:
Nik Henson 2012-01-16 23:21:14 +00:00
parent 0835e38b94
commit d1d5175548
12 changed files with 881 additions and 244 deletions

View file

@ -834,13 +834,8 @@ void CRender3D::ClearModelCache(ModelCache *Cache)
Cache->vboCurOffset = 0;
for (int i = 0; i < 2; i++)
Cache->curVertIdx[i] = 0;
if (!Cache->dynamic)
memset(Cache->lut, 0xFF, sizeof(INT16)*Cache->lutSize); // set all to -1
else
{
for (int i = 0; i < Cache->numModels; i++)
Cache->lut[Cache->Models[i].lutIdx] = -1;
}
for (int i = 0; i < Cache->numModels; i++)
Cache->lut[Cache->Models[i].lutIdx] = -1;
Cache->numModels = 0;
ClearDisplayList(Cache);

View file

@ -737,24 +737,7 @@ void CRender2D::EndFrame(void)
Emulation Callbacks
******************************************************************************/
void CRender2D::WritePalette(unsigned color, UINT32 data)
{
UINT8 r, g, b, a;
a = 0xFF * ((data>>15)&1); // decode the RGBA (make alpha 0xFF or 0x00)
a = ~a; // invert it (set on Model 3 means clear pixel)
if ((data&0x8000))
r = g = b = 0;
else
{
b = (data>>7)&0xF8;
g = (data>>2)&0xF8;
r = (data<<3)&0xF8;
}
pal[color] = (a<<24)|(b<<16)|(g<<8)|r;
}
void CRender2D::WriteVRAM(unsigned addr, UINT32 data)
{
@ -763,31 +746,6 @@ void CRender2D::WriteVRAM(unsigned addr, UINT32 data)
// For now, mark everything as dirty
allDirty = true;
// Palette
if (addr >= 0x100000)
{
unsigned color = (addr-0x100000)/4; // color index
WritePalette(color, data);
}
}
/*
* InitPalette():
*
* This must be called from AttachVRAM() to initialize the palette. The reason
* is that because WriteVRAM() always compares incoming data to what is already
* in the VRAM, there is no actual way to initialize the palette by calling
* WriteVRAM() and passing it the initial VRAM contents. It will always fail to
* update because nothing is being changed.
*
* This function fixes the transparent pixel bug that frequently occurred when
* loading save states in Supermodel 0.1a.
*/
void CRender2D::InitPalette(void)
{
for (int i = 0; i < 0x20000/4; i++)
WritePalette(i, vram[0x100000/4 + i]);
}
@ -801,14 +759,19 @@ void CRender2D::AttachRegisters(const UINT32 *regPtr)
DebugLog("Render2D attached registers\n");
}
void CRender2D::AttachPalette(const UINT32 *palPtr)
{
pal = palPtr;
DebugLog("Render2D attached palette\n");
}
void CRender2D::AttachVRAM(const UINT8 *vramPtr)
{
vram = (UINT32 *) vramPtr;
InitPalette();
DebugLog("Render2D attached VRAM\n");
}
#define MEMORY_POOL_SIZE (512*512*4+0x20000)
#define MEMORY_POOL_SIZE (512*512*4)
bool CRender2D::Init(unsigned xOffset, unsigned yOffset, unsigned xRes, unsigned yRes)
{
@ -832,7 +795,6 @@ bool CRender2D::Init(unsigned xOffset, unsigned yOffset, unsigned xRes, unsigned
// Set up pointers to memory regions
surf = (UINT32 *) memoryPool;
pal = (UINT32 *) &memoryPool[512*512*4];
// Resolution
xPixels = xRes;

View file

@ -91,6 +91,8 @@ public:
*/
void AttachRegisters(const UINT32 *regPtr);
void AttachPalette(const UINT32 *palPtr);
/*
* AttachVRAM(vramPtr):
*
@ -145,11 +147,10 @@ private:
void DisplayLayer(int layerNum, GLfloat z);
void Setup2D(void);
void ColorOffset(GLfloat colorOffset[3], UINT32 reg);
void WritePalette(unsigned color, UINT32 data);
void InitPalette(void);
// Data received from tile generator device object
const UINT32 *vram;
const UINT32 *pal;
const UINT32 *regs;
// OpenGL data
@ -172,7 +173,6 @@ private:
// Buffers
UINT8 *memoryPool; // all memory is allocated here
UINT32 *surf; // 512x512x32bpp pixel surface
UINT32 *pal; // 0x20000 byte (32K colors) palette
};

View file

@ -50,10 +50,11 @@ CInputs::CInputs(CInputSystem *system) : m_system(system)
uiMusicVolDown = AddSwitchInput("UIMusicVolDown", "Decrease Music Volume", GAME_INPUT_UI, "KEY_F9");
uiSoundVolUp = AddSwitchInput("UISoundVolUp", "Increase Sound Volume", GAME_INPUT_UI, "KEY_F12");
uiSoundVolDown = AddSwitchInput("UISoundVolDown", "Decrease Sound Volume", GAME_INPUT_UI, "KEY_F11");
uiDumpInpState = AddSwitchInput("UIDumpInputState", "Dump Input State", GAME_INPUT_UI, "NONE"); // disabled for release
uiClearNVRAM = AddSwitchInput("UIClearNVRAM", "Clear NVRAM", GAME_INPUT_UI, "KEY_ALT+KEY_N");
uiSelectCrosshairs = AddSwitchInput("UISelectCrosshairs", "Select Crosshairs", GAME_INPUT_UI, "KEY_ALT+KEY_I");
uiToggleFrLimit = AddSwitchInput("UIToggleFrameLimit", "Toggle Frame Limiting", GAME_INPUT_UI, "KEY_ALT+KEY_T");
uiDumpInpState = AddSwitchInput("UIDumpInputState", "Dump Input State", GAME_INPUT_UI, "KEY_ALT+KEY_U");
uiDumpTimings = AddSwitchInput("UIDumpTimings", "Dump Frame Timings", GAME_INPUT_UI, "KEY_ALT+KEY_O");
#ifdef SUPERMODEL_DEBUGGER
uiEnterDebugger = AddSwitchInput("UIEnterDebugger", "Enter Debugger", GAME_INPUT_UI, "KEY_ALT+KEY_B");
#endif

View file

@ -101,10 +101,11 @@ public:
CSwitchInput *uiMusicVolDown;
CSwitchInput *uiSoundVolUp;
CSwitchInput *uiSoundVolDown;
CSwitchInput *uiDumpInpState;
CSwitchInput *uiClearNVRAM;
CSwitchInput *uiSelectCrosshairs;
CSwitchInput *uiToggleFrLimit;
CSwitchInput *uiDumpInpState;
CSwitchInput *uiDumpTimings;
#ifdef SUPERMODEL_DEBUGGER
CSwitchInput *uiEnterDebugger;
#endif

View file

@ -1910,6 +1910,8 @@ void CModel3::ClearNVRAM(void)
void CModel3::RunFrame(void)
{
UINT32 start = CThread::GetTicks();
// See if currently running multi-threaded
if (g_Config.multiThreaded)
{
@ -1917,39 +1919,60 @@ void CModel3::RunFrame(void)
if (!StartThreads())
goto ThreadError;
// Wake threads for sound board (if sync'd) and drive board (if attached) so they can process a frame
if (syncSndBrdThread && !sndBrdThreadSync->Post() || DriveBoard.IsAttached() && !drvBrdThreadSync->Post())
// Wake threads for PPC main board (if multi-threading GPU), sound board (if sync'd) and drive board (if attached) so they can process a frame
if (g_Config.gpuMultiThreaded && !ppcBrdThreadSync->Post() ||
syncSndBrdThread && !sndBrdThreadSync->Post() ||
DriveBoard.IsAttached() && !drvBrdThreadSync->Post())
goto ThreadError;
// At the same time, process a single frame for main board (PPC) in this thread
RunMainBoardFrame();
// If not multi-threading GPU, then run PPC main board for a frame and sync GPUs now in this thread
if (!g_Config.gpuMultiThreaded)
{
RunMainBoardFrame();
SyncGPUs();
}
// Render frame if ready to do so
if (gpusReady)
RenderFrame();
// Enter notify wait critical section
if (!notifyLock->Lock())
goto ThreadError;
// Wait for sound board and drive board threads to finish their work (if they haven't done so already)
while (syncSndBrdThread && !sndBrdThreadDone || DriveBoard.IsAttached() && !drvBrdThreadDone)
// Wait for PPC main board, sound board and drive board threads to finish their work (if they are running and haven't finished already)
while (g_Config.gpuMultiThreaded && !ppcBrdThreadDone ||
syncSndBrdThread && !sndBrdThreadDone ||
DriveBoard.IsAttached() && !drvBrdThreadDone)
{
if (!notifySync->Wait(notifyLock))
goto ThreadError;
}
ppcBrdThreadDone = false;
sndBrdThreadDone = false;
drvBrdThreadDone = false;
// Leave notify wait critical section
if (!notifyLock->Unlock())
goto ThreadError;
// If multi-threading GPU, then sync GPUs last while PPC main board thread is waiting
if (g_Config.gpuMultiThreaded)
SyncGPUs();
}
else
{
// If not multi-threaded, then just process a single frame for main board, sound board and drive board in turn in this thread
// If not multi-threaded, then just process and render a single frame for PPC main board, sound board and drive board in turn in this thread
RunMainBoardFrame();
SoundBoard.RunFrame();
SyncGPUs();
RenderFrame();
RunSoundBoardFrame();
if (DriveBoard.IsAttached())
DriveBoard.RunFrame();
RunDriveBoardFrame();
}
frameTicks = CThread::GetTicks() - start;
return;
ThreadError:
@ -1957,13 +1980,129 @@ ThreadError:
g_Config.multiThreaded = false;
}
void CModel3::RunMainBoardFrame(void)
{
UINT32 start = CThread::GetTicks();
// Compute display and VBlank timings
unsigned frameCycles = g_Config.GetPowerPCFrequency()*1000000/60;
unsigned vblCycles = (unsigned) ((float) frameCycles * 2.5f/100.0f); // 2.5% vblank (ridiculously short and wrong but bigger values cause flicker in Daytona)
unsigned dispCycles = frameCycles - vblCycles;
// VBlank
if (gpusReady)
{
TileGen.BeginVBlank();
GPU.BeginVBlank();
IRQ.Assert(0x02);
ppc_execute(vblCycles);
//printf("PC=%08X LR=%08X\n", ppc_get_pc(), ppc_get_lr());
/*
* Sound:
*
* Bit 0x20 of the MIDI control port appears to enable periodic interrupts,
* which are used to send MIDI commands. Often games will write 0x27, send
* a series of commands, and write 0x06 to stop. Other games, like Star
* Wars Trilogy and Sega Rally 2, will enable interrupts at the beginning
* by writing 0x37 and will disable/enable interrupts to control command
* output.
*/
//printf("\t-- BEGIN (Ctrl=%02X, IRQEn=%02X, IRQPend=%02X) --\n", midiCtrlPort, IRQ.ReadIRQEnable()&0x40, IRQ.ReadIRQState());
int irqCount = 0;
while ((midiCtrlPort&0x20))
//while (midiCtrlPort == 0x27) // 27 triggers IRQ sequence, 06 stops it
{
// Don't waste time firing MIDI interrupts if game has disabled them
if ((IRQ.ReadIRQEnable()&0x40) == 0)
break;
// Process MIDI interrupt
IRQ.Assert(0x40);
ppc_execute(200); // give PowerPC time to acknowledge IRQ
IRQ.Deassert(0x40);
ppc_execute(200); // acknowledge that IRQ was deasserted (TODO: is this really needed?)
++irqCount;
if (irqCount > 128)
{
//printf("\tMIDI FIFO OVERFLOW! (IRQEn=%02X, IRQPend=%02X)\n", IRQ.ReadIRQEnable()&0x40, IRQ.ReadIRQState());
break;
}
}
//printf("\t-- END --\n");
//printf("PC=%08X LR=%08X\n", ppc_get_pc(), ppc_get_lr());
// End VBlank
GPU.EndVBlank();
TileGen.EndVBlank();
IRQ.Assert(0x0D);
}
// Run the PowerPC for the active display part of the frame
ppc_execute(dispCycles);
//printf("PC=%08X LR=%08X\n", ppc_get_pc(), ppc_get_lr());
ppcTicks = CThread::GetTicks() - start;
}
void CModel3::SyncGPUs(void)
{
UINT32 start = CThread::GetTicks();
syncSize = GPU.SyncSnapshots() + TileGen.SyncSnapshots();
gpusReady = true;
syncTicks = CThread::GetTicks() - start;
}
void CModel3::RenderFrame(void)
{
UINT32 start = CThread::GetTicks();
// Render frame
TileGen.BeginFrame();
GPU.BeginFrame();
GPU.RenderFrame();
GPU.EndFrame();
TileGen.EndFrame();
renderTicks = CThread::GetTicks() - start;
}
bool CModel3::RunSoundBoardFrame(void)
{
UINT32 start = CThread::GetTicks();
bool bufferFull = SoundBoard.RunFrame();
sndTicks = CThread::GetTicks() - start;
return bufferFull;
}
void CModel3::RunDriveBoardFrame(void)
{
UINT32 start = CThread::GetTicks();
DriveBoard.RunFrame();
drvTicks = CThread::GetTicks() - start;
}
bool CModel3::StartThreads(void)
{
if (startedThreads)
return true;
// Create synchronization objects
sndBrdThreadSync = CThread::CreateSemaphore(1);
if (g_Config.gpuMultiThreaded)
{
ppcBrdThreadSync = CThread::CreateSemaphore(0);
if (ppcBrdThreadSync == NULL)
goto ThreadError;
}
sndBrdThreadSync = CThread::CreateSemaphore(0);
if (sndBrdThreadSync == NULL)
goto ThreadError;
sndBrdNotifyLock = CThread::CreateMutex();
@ -1974,7 +2113,7 @@ bool CModel3::StartThreads(void)
goto ThreadError;
if (DriveBoard.IsAttached())
{
drvBrdThreadSync = CThread::CreateSemaphore(1);
drvBrdThreadSync = CThread::CreateSemaphore(0);
if (drvBrdThreadSync == NULL)
goto ThreadError;
}
@ -1985,6 +2124,14 @@ bool CModel3::StartThreads(void)
if (notifySync == NULL)
goto ThreadError;
// Create PPC main board thread, if multi-threading GPU
if (g_Config.gpuMultiThreaded)
{
ppcBrdThread = CThread::CreateThread(StartMainBoardThread, this);
if (ppcBrdThread == NULL)
goto ThreadError;
}
// Create sound board thread (sync'd or unsync'd)
if (syncSndBrdThread)
sndBrdThread = CThread::CreateThread(StartSoundBoardThreadSyncd, this);
@ -1993,15 +2140,15 @@ bool CModel3::StartThreads(void)
if (sndBrdThread == NULL)
goto ThreadError;
// Create drive board thread (sync'd), if drive board is attached
// Create drive board thread, if drive board is attached
if (DriveBoard.IsAttached())
{
drvBrdThread = CThread::CreateThread(StartDriveBoardThreadSyncd, this);
drvBrdThread = CThread::CreateThread(StartDriveBoardThread, this);
if (drvBrdThread == NULL)
goto ThreadError;
}
// Set audio callback if unsync'd
// Set audio callback if sound board thread is unsync'd
if (!syncSndBrdThread)
SetAudioCallback(AudioCallback, this);
@ -2026,7 +2173,7 @@ bool CModel3::PauseThreads(void)
// Wait for all threads to finish their processing
pausedThreads = true;
while (sndBrdThreadRunning || drvBrdThreadRunning)
while (ppcBrdThreadRunning || sndBrdThreadRunning || drvBrdThreadRunning)
{
if (!notifySync->Wait(notifyLock))
goto ThreadError;
@ -2043,11 +2190,27 @@ ThreadError:
return false;
}
void CModel3::ResumeThreads(void)
bool CModel3::ResumeThreads(void)
{
// No need to use any locking here
if (!startedThreads)
return true;
// Enter notify critical section
if (!notifyLock->Lock())
goto ThreadError;
// Let all threads know that they can continue running
pausedThreads = false;
return;
// Leave notify critical section
if (!notifyLock->Unlock())
goto ThreadError;
return true;
ThreadError:
ErrorLog("Threading error in CModel3::ResumeThreads: %s\nSwitching back to single-threaded mode.\n", CThread::GetLastError());
g_Config.multiThreaded = false;
return false;
}
void CModel3::StopThreads(void)
@ -2055,7 +2218,7 @@ void CModel3::StopThreads(void)
if (!startedThreads)
return;
// If sound board not sync'd then remove callback
// If sound board thread is unsync'd then remove audio callback
if (!syncSndBrdThread)
SetAudioCallback(NULL, NULL);
@ -2068,8 +2231,13 @@ void CModel3::StopThreads(void)
void CModel3::DeleteThreadObjects(void)
{
// Delete (which in turn kills) sound board and drive board threads
// Delete (which in turn kills) PPC main board, sound board and drive board threads
// Note that can do so here safely because threads will always be waiting on their semaphores when this method is called
if (ppcBrdThread != NULL)
{
delete ppcBrdThread;
ppcBrdThread = NULL;
}
if (sndBrdThread != NULL)
{
delete sndBrdThread;
@ -2082,6 +2250,11 @@ void CModel3::DeleteThreadObjects(void)
}
// Delete synchronization objects
if (ppcBrdThreadSync != NULL)
{
delete ppcBrdThreadSync;
ppcBrdThreadSync = NULL;
}
if (sndBrdThreadSync != NULL)
{
delete sndBrdThreadSync;
@ -2114,9 +2287,28 @@ void CModel3::DeleteThreadObjects(void)
}
}
void CModel3::DumpTimings(void)
{
printf("PPC:%3ums%c render:%3ums%c sync:%4uK%c%3ums%c snd:%3ums%c drv:%3ums%c frame:%3ums%c\n",
ppcTicks, (ppcTicks > renderTicks ? '!' : ','),
renderTicks, (renderTicks > ppcTicks ? '!' : ','),
syncSize / 1024, (syncSize / 1024 > 128 ? '!' : ','), syncTicks, (syncTicks > 1 ? '!' : ','),
sndTicks, (sndTicks > 10 ? '!' : ','),
drvTicks, (drvTicks > 10 ? '!' : ','),
frameTicks, (frameTicks > 16 ? '!' : ' '));
}
int CModel3::StartMainBoardThread(void *data)
{
// Call method on CModel3 to run PPC main board thread
CModel3 *model3 = (CModel3*)data;
model3->RunMainBoardThread();
return 0;
}
int CModel3::StartSoundBoardThread(void *data)
{
// Call method on CModel3 to run unsync'd sound board thread
// Call method on CModel3 to run sound board thread (unsync'd)
CModel3 *model3 = (CModel3*)data;
model3->RunSoundBoardThread();
return 0;
@ -2124,20 +2316,70 @@ int CModel3::StartSoundBoardThread(void *data)
int CModel3::StartSoundBoardThreadSyncd(void *data)
{
// Call method on CModel3 to run sync'd sound board thread
// Call method on CModel3 to run sound board thread (sync'd)
CModel3 *model3 = (CModel3*)data;
model3->RunSoundBoardThreadSyncd();
return 0;
}
int CModel3::StartDriveBoardThreadSyncd(void *data)
int CModel3::StartDriveBoardThread(void *data)
{
// Call method on CModel3 to run sync'd drive board thread
// Call method on CModel3 to run drive board thread
CModel3 *model3 = (CModel3*)data;
model3->RunDriveBoardThreadSyncd();
model3->RunDriveBoardThread();
return 0;
}
void CModel3::RunMainBoardThread(void)
{
for (;;)
{
bool wait = true;
while (wait)
{
// Wait on PPC main board thread semaphore
if (!ppcBrdThreadSync->Wait())
goto ThreadError;
// Enter notify critical section
if (!notifyLock->Lock())
goto ThreadError;
// Check threads not paused
if (!pausedThreads)
{
wait = false;
ppcBrdThreadRunning = true;
}
// Leave notify critical section
if (!notifyLock->Unlock())
goto ThreadError;
}
// Process a single frame for PPC main board
RunMainBoardFrame();
// Enter notify critical section
if (!notifyLock->Lock())
goto ThreadError;
// Let other threads know processing has finished
ppcBrdThreadRunning = false;
ppcBrdThreadDone = true;
if (!notifySync->SignalAll())
goto ThreadError;
// Leave notify critical section
if (!notifyLock->Unlock())
goto ThreadError;
}
ThreadError:
ErrorLog("Threading error in RunMainBoardThread: %s\nSwitching back to single-threaded mode.\n", CThread::GetLastError());
g_Config.multiThreaded = false;
}
void CModel3::AudioCallback(void *data)
{
// Call method on CModel3 to wake sound board thread
@ -2151,7 +2393,7 @@ void CModel3::WakeSoundBoardThread(void)
if (!sndBrdNotifyLock->Lock())
goto ThreadError;
// Signal to sound board that it should start processing again
// Signal to sound board thread that it should start processing again
if (!sndBrdNotifySync->Signal())
goto ThreadError;
@ -2200,11 +2442,22 @@ void CModel3::RunSoundBoardThread(void)
goto ThreadError;
}
// Keep processing frames until audio buffer is full
bool repeat = true;
// NOTE - performs an unlocked read of pausedThreads here, but this is okay
while (!pausedThreads && !SoundBoard.RunFrame())
// Keep processing frames until paused or audio buffer is full
while (true)
{
// Enter main notify critical section
bool paused;
if (!notifyLock->Lock())
goto ThreadError;
paused = pausedThreads;
// Leave main notify critical section
if (!notifyLock->Unlock())
goto ThreadError;
if (paused || RunSoundBoardFrame())
break;
//printf("Rerunning sound board\n");
}
@ -2256,7 +2509,7 @@ void CModel3::RunSoundBoardThreadSyncd(void)
}
// Process a single frame for sound board
SoundBoard.RunFrame();
RunSoundBoardFrame();
// Enter notify critical section
if (!notifyLock->Lock())
@ -2278,7 +2531,7 @@ ThreadError:
g_Config.multiThreaded = false;
}
void CModel3::RunDriveBoardThreadSyncd(void)
void CModel3::RunDriveBoardThread(void)
{
for (;;)
{
@ -2306,7 +2559,7 @@ void CModel3::RunDriveBoardThreadSyncd(void)
}
// Process a single frame for drive board
DriveBoard.RunFrame();
RunDriveBoardFrame();
// Enter notify critical section
if (!notifyLock->Lock())
@ -2324,70 +2577,10 @@ void CModel3::RunDriveBoardThreadSyncd(void)
}
ThreadError:
ErrorLog("Threading error in RunDriveBoardThreadSyncd: %s\nSwitching back to single-threaded mode.\n", CThread::GetLastError());
ErrorLog("Threading error in RunDriveBoardThread: %s\nSwitching back to single-threaded mode.\n", CThread::GetLastError());
g_Config.multiThreaded = false;
}
void CModel3::RunMainBoardFrame(void)
{
// Compute display and VBlank timings
unsigned frameCycles = g_Config.GetPowerPCFrequency()*1000000/60;
unsigned vblCycles = (unsigned) ((float) frameCycles * 2.5f/100.0f); // 2.5% vblank (ridiculously short and wrong but bigger values cause flicker in Daytona)
unsigned dispCycles = frameCycles - vblCycles;
// Run the PowerPC for the active display part of the frame
ppc_execute(dispCycles);
//printf("PC=%08X LR=%08X\n", ppc_get_pc(), ppc_get_lr());
// VBlank
TileGen.BeginFrame();
GPU.BeginFrame();
GPU.RenderFrame();
IRQ.Assert(0x02);
ppc_execute(vblCycles);
//printf("PC=%08X LR=%08X\n", ppc_get_pc(), ppc_get_lr());
/*
* Sound:
*
* Bit 0x20 of the MIDI control port appears to enable periodic interrupts,
* which are used to send MIDI commands. Often games will write 0x27, send
* a series of commands, and write 0x06 to stop. Other games, like Star
* Wars Trilogy and Sega Rally 2, will enable interrupts at the beginning
* by writing 0x37 and will disable/enable interrupts to control command
* output.
*/
//printf("\t-- BEGIN (Ctrl=%02X, IRQEn=%02X, IRQPend=%02X) --\n", midiCtrlPort, IRQ.ReadIRQEnable()&0x40, IRQ.ReadIRQState());
int irqCount = 0;
while ((midiCtrlPort&0x20))
//while (midiCtrlPort == 0x27) // 27 triggers IRQ sequence, 06 stops it
{
// Don't waste time firing MIDI interrupts if game has disabled them
if ((IRQ.ReadIRQEnable()&0x40) == 0)
break;
// Process MIDI interrupt
IRQ.Assert(0x40);
ppc_execute(200); // give PowerPC time to acknowledge IRQ
IRQ.Deassert(0x40);
ppc_execute(200); // acknowledge that IRQ was deasserted (TODO: is this really needed?)
++irqCount;
if (irqCount > 128)
{
//printf("\tMIDI FIFO OVERFLOW! (IRQEn=%02X, IRQPend=%02X)\n", IRQ.ReadIRQEnable()&0x40, IRQ.ReadIRQState());
break;
}
}
//printf("\t-- END --\n");
//printf("PC=%08X LR=%08X\n", ppc_get_pc(), ppc_get_lr());
// End frame
GPU.EndFrame();
TileGen.EndFrame();
IRQ.Assert(0x0D);
}
void CModel3::Reset(void)
{
// Clear memory (but do not modify backup RAM!)
@ -2423,6 +2616,15 @@ void CModel3::Reset(void)
if (DriveBoard.IsAttached())
DriveBoard.Reset();
gpusReady = false;
ppcTicks = 0;
syncSize = 0;
syncTicks = 0;
renderTicks = 0;
sndTicks = 0;
drvTicks = 0;
frameTicks = 0;
DebugLog("Model 3 reset\n");
}
@ -2964,13 +3166,17 @@ CModel3::CModel3(void)
startedThreads = false;
pausedThreads = false;
ppcBrdThread = NULL;
sndBrdThread = NULL;
drvBrdThread = NULL;
ppcBrdThreadRunning = false;
ppcBrdThreadDone = false;
sndBrdThreadRunning = false;
sndBrdThreadDone = false;
drvBrdThreadRunning = false;
drvBrdThreadDone = false;
syncSndBrdThread = false;
ppcBrdThreadSync = NULL;
sndBrdThreadSync = NULL;
drvBrdThreadSync = NULL;
notifyLock = NULL;

View file

@ -28,6 +28,7 @@
#ifndef INCLUDED_MODEL3_H
#define INCLUDED_MODEL3_H
/*
* CModel3Config:
*
@ -36,7 +37,8 @@
class CModel3Config
{
public:
bool multiThreaded; // Multi-threading (enabled if true)
bool multiThreaded; // Multi-threaded (enabled if true)
bool gpuMultiThreaded; // Multi-threaded rendering (enabled if true)
// PowerPC clock frequency in MHz (minimum: 1 MHz)
inline void SetPowerPCFrequency(unsigned f)
@ -57,6 +59,7 @@ public:
CModel3Config(void)
{
multiThreaded = true; // enable by default
gpuMultiThreaded = true; // enable by default
ppcFrequency = 50*1000000; // 50 MHz
}
@ -313,7 +316,14 @@ public:
*
* Flags that any paused threads should resume running.
*/
void ResumeThreads(void);
bool ResumeThreads(void);
/*
* DumpTimings(void):
*
* Prints all timings for the most recent frame to the console, for debugging purposes.
*/
void DumpTimings(void);
/*
* CModel3(void):
@ -342,21 +352,28 @@ private:
void WriteSystemRegister(unsigned reg, UINT8 data);
void Patch(void);
void RunMainBoardFrame(void); // Runs the main board (PPC) for a frame
void RunMainBoardFrame(void); // Runs PPC main board for a frame
void SyncGPUs(void); // Sync's up GPUs in preparation for rendering - must be called when PPC is not running
void RenderFrame(void); // Renders current frame
bool RunSoundBoardFrame(void); // Runs sound board for a frame
void RunDriveBoardFrame(void); // Runs drive board for a frame
bool StartThreads(void); // Starts all threads
void StopThreads(void); // Stops all threads
void DeleteThreadObjects(void); // Deletes all threads and synchronization objects
static int StartSoundBoardThread(void *data); // Callback to start unsync'd sound board thread
static int StartSoundBoardThreadSyncd(void *data); // Callback to start sync'd sound board thread
static int StartDriveBoardThreadSyncd(void *data); // Callback to start sync'd drive board thread
static int StartMainBoardThread(void *data); // Callback to start PPC main board thread
static int StartSoundBoardThread(void *data); // Callback to start sound board thread (unsync'd)
static int StartSoundBoardThreadSyncd(void *data); // Callback to start sound board thread (sync'd)
static int StartDriveBoardThread(void *data); // Callback to start drive board thread
static void AudioCallback(void *data); // Audio buffer callback
void WakeSoundBoardThread(void); // Used by audio callback to wake sound board thread when not sync'd with PPC thread
void RunSoundBoardThread(void); // Runs sound board thread unsync'd with PPC thread, ie at full speed
void RunSoundBoardThreadSyncd(void); // Runs sound board thread sync'd in step with PPC thread
void RunDriveBoardThreadSyncd(void); // Runs drive board thread sync'd in step with PPC thread
void RunMainBoardThread(void); // Runs PPC main board thread (sync'd in step with render thread)
void RunSoundBoardThread(void); // Runs sound board thread (unsync'd with render thread, ie at full speed)
void RunSoundBoardThreadSyncd(void); // Runs sound board thread (sync'd in step with render thread)
void RunDriveBoardThread(void); // Runs drive board thread (sync'd in step with render thread)
// Game and hardware information
const struct GameInfo *Game;
@ -397,17 +414,22 @@ private:
PPC_FETCH_REGION PPCFetchRegions[3];
// Multiple threading
bool gpusReady; // True if GPUs are ready to render
bool startedThreads; // True if threads have been created and started
bool pausedThreads; // True if threads are currently paused
bool syncSndBrdThread; // True if sound board thread should be sync'd with PPC thread
bool syncSndBrdThread; // True if sound board thread should be sync'd in step with render thread
CThread *ppcBrdThread; // PPC main board thread
CThread *sndBrdThread; // Sound board thread
CThread *drvBrdThread; // Drive board thread
bool ppcBrdThreadRunning; // Flag to indicate PPC main board thread is currently processing
bool ppcBrdThreadDone; // Flag to indicate PPC main board thread has finished processing
bool sndBrdThreadRunning; // Flag to indicate sound board thread is currently processing
bool sndBrdThreadDone; // Flag to indicate sound board thread has finished processing
bool drvBrdThreadRunning; // Flag to indicate drive board thread is currently processing
bool drvBrdThreadDone; // Flag to indicate drive board thread has finished processing
// Thread synchronization objects
CSemaphore *ppcBrdThreadSync;
CSemaphore *sndBrdThreadSync;
CMutex *sndBrdNotifyLock;
CCondVar *sndBrdNotifySync;
@ -427,6 +449,15 @@ private:
CSoundBoard SoundBoard; // Sound board
CDSB *DSB; // Digital Sound Board (type determined dynamically at load time)
CDriveBoard DriveBoard; // Drive board
// Frame timings
UINT32 ppcTicks;
UINT32 syncSize;
UINT32 syncTicks;
UINT32 renderTicks;
UINT32 sndTicks;
UINT32 drvTicks;
UINT32 frameTicks;
};

View file

@ -44,14 +44,30 @@
#include <cstring>
#include "Supermodel.h"
// Offsets of memory regions within Real3D memory pool
#define OFFSET_8C 0 // 4 MB, culling RAM low (at 0x8C000000)
#define OFFSET_8E 0x400000 // 1 MB, culling RAM high (at 0x8E000000)
#define OFFSET_98 0x500000 // 4 MB, polygon RAM (at 0x98000000)
#define OFFSET_TEXRAM 0x900000 // 8 MB, texture RAM
#define OFFSET_TEXFIFO 0x1100000 // 1 MB, texture FIFO
#define MEMORY_POOL_SIZE (0x400000+0x100000+0x400000+0x800000+0x100000)
// Macros that divide memory regions into pages and mark them as dirty when they are written to
#define PAGE_WIDTH 12
#define PAGE_SIZE (1<<PAGE_WIDTH)
#define DIRTY_SIZE(arraySize) (1+(arraySize-1)/(8*PAGE_SIZE))
#define MARK_DIRTY(dirtyArray, addr) dirtyArray[addr>>(PAGE_WIDTH+3)] |= 1<<((addr>>PAGE_WIDTH)&7)
// Offsets of memory regions within Real3D memory pool
#define OFFSET_8C 0x0000000 // 4 MB, culling RAM low (at 0x8C000000)
#define OFFSET_8E 0x0400000 // 1 MB, culling RAM high (at 0x8E000000)
#define OFFSET_98 0x0500000 // 4 MB, polygon RAM (at 0x98000000)
#define OFFSET_TEXRAM 0x0900000 // 8 MB, texture RAM
#define OFFSET_TEXFIFO 0x1100000 // 1 MB, texture FIFO
#define MEM_POOL_SIZE_RW (0x400000+0x100000+0x400000+0x800000+0x100000)
#define OFFSET_8C_RO 0x1200000 // 4 MB, culling RAM low (at 0x8C000000) [read-only snapshot]
#define OFFSET_8E_RO 0x1600000 // 1 MB, culling RAM high (at 0x8E000000) [read-only snapshot]
#define OFFSET_98_RO 0x1700000 // 4 MB, polygon RAM (at 0x98000000) [read-only snapshot]
#define OFFSET_TEXRAM_RO 0x1B00000 // 8 MB, texture RAM [read-only snapshot]
#define MEM_POOL_SIZE_RO (0x400000+0x100000+0x400000+0x800000)
#define OFFSET_8C_DIRTY 0x2300000
#define OFFSET_8E_DIRTY (OFFSET_8C_DIRTY+DIRTY_SIZE(0x400000))
#define OFFSET_98_DIRTY (OFFSET_8E_DIRTY+DIRTY_SIZE(0x100000))
#define OFFSET_TEXRAM_DIRTY (OFFSET_98_DIRTY+DIRTY_SIZE(0x400000))
#define MEM_POOL_SIZE_DIRTY (DIRTY_SIZE(MEM_POOL_SIZE_RO))
#define MEMORY_POOL_SIZE (MEM_POOL_SIZE_RW+MEM_POOL_SIZE_RO+MEM_POOL_SIZE_DIRTY)
/******************************************************************************
Save States
@ -61,7 +77,7 @@ void CReal3D::SaveState(CBlockFile *SaveState)
{
SaveState->NewBlock("Real3D", __FILE__);
SaveState->Write(memoryPool, MEMORY_POOL_SIZE);
SaveState->Write(memoryPool, MEM_POOL_SIZE_RW); // Don't write out read-only snapshots or dirty page arrays
SaveState->Write(&fifoIdx, sizeof(fifoIdx));
SaveState->Write(&vromTextureAddr, sizeof(vromTextureAddr));
SaveState->Write(&vromTextureHeader, sizeof(vromTextureHeader));
@ -90,8 +106,11 @@ void CReal3D::LoadState(CBlockFile *SaveState)
return;
}
SaveState->Read(memoryPool, MEMORY_POOL_SIZE);
Render3D->UploadTextures(0,0,2048,2048);
SaveState->Read(memoryPool, MEM_POOL_SIZE_RW);
// If multi-threaded, update read-only snapshots too
if (g_Config.gpuMultiThreaded)
UpdateSnapshots(true);
Render3D->UploadTextures(0, 0, 2048, 2048);
SaveState->Read(&fifoIdx, sizeof(fifoIdx));
SaveState->Read(&vromTextureAddr, sizeof(vromTextureAddr));
SaveState->Read(&vromTextureHeader, sizeof(vromTextureHeader));
@ -117,23 +136,111 @@ void CReal3D::LoadState(CBlockFile *SaveState)
Rendering
******************************************************************************/
void CReal3D::RenderFrame(void)
void CReal3D::BeginVBlank(void)
{
//if (commandPortWritten)
Render3D->RenderFrame();
status |= 2; // VBlank bit
}
void CReal3D::EndVBlank(void)
{
error = false; // clear error (just needs to be done once per frame)
status &= ~2;
}
UINT32 CReal3D::SyncSnapshots(void)
{
// Update read-only copy of command port flag
commandPortWrittenRO = commandPortWritten;
commandPortWritten = false;
if (!g_Config.gpuMultiThreaded)
return 0;
// Update read-only queue
queuedUploadTexturesRO = queuedUploadTextures;
queuedUploadTextures.clear();
// Update read-only snapshots
return UpdateSnapshots(false);
}
UINT32 CReal3D::UpdateSnapshot(bool copyWhole, UINT8 *src, UINT8 *dst, unsigned size, UINT8 *dirty)
{
unsigned dirtySize = DIRTY_SIZE(size);
if (copyWhole)
{
// If updating whole region, then just copy all data in one go
memcpy(dst, src, size);
memset(dirty, 0, dirtySize);
return size;
}
else
{
// Otherwise, loop through dirty pages array to find out what needs to be updated and copy only those parts
UINT32 copied = 0;
UINT8 *pSrc = src;
UINT8 *pDst = dst;
for (unsigned i = 0; i < dirtySize; i++)
{
UINT8 d = dirty[i];
if (d)
{
for (unsigned j = 0; j < 8; j++)
{
if (d&1)
{
// If not at very end of region, then copy an extra 4 bytes to allow for a possible 32-bit overlap
UINT32 toCopy = (i < dirtySize - 1 || j < 7 ? PAGE_SIZE + 4 : PAGE_SIZE);
memcpy(pDst, pSrc, toCopy);
copied += toCopy;
}
d >>= 1;
pSrc += PAGE_SIZE;
pDst += PAGE_SIZE;
}
dirty[i] = 0;
}
else
{
pSrc += 8 * PAGE_SIZE;
pDst += 8 * PAGE_SIZE;
}
}
return copied;
}
}
UINT32 CReal3D::UpdateSnapshots(bool copyWhole)
{
// Update all memory region snapshots
UINT32 cullLoCopied = UpdateSnapshot(copyWhole, (UINT8*)cullingRAMLo, (UINT8*)cullingRAMLoRO, 0x400000, cullingRAMLoDirty);
UINT32 cullHiCopied = UpdateSnapshot(copyWhole, (UINT8*)cullingRAMHi, (UINT8*)cullingRAMHiRO, 0x100000, cullingRAMHiDirty);
UINT32 polyCopied = UpdateSnapshot(copyWhole, (UINT8*)polyRAM, (UINT8*)polyRAMRO, 0x400000, polyRAMDirty);
UINT32 textureCopied = UpdateSnapshot(copyWhole, (UINT8*)textureRAM, (UINT8*)textureRAMRO, 0x800000, textureRAMDirty);
//printf("Read3D copied - cullLo:%4uK, cullHi:%4uK, poly:%4uK, texture:%4uK\n", cullLoCopied / 1024, cullHiCopied / 1024, polyCopied / 1024, textureCopied / 1024);
return cullLoCopied + cullHiCopied + polyCopied + textureCopied;
}
void CReal3D::BeginFrame(void)
{
status |= 2; // VBlank bit
// If multi-threaded, perform now any queued texture uploads to renderer before rendering begins
if (g_Config.gpuMultiThreaded)
{
for (vector<QueuedUploadTextures>::iterator it = queuedUploadTexturesRO.begin(), end = queuedUploadTexturesRO.end(); it != end; it++)
Render3D->UploadTextures(it->x, it->y, it->width, it->height);
}
Render3D->BeginFrame();
}
void CReal3D::RenderFrame(void)
{
//if (commandPortWrittenRO)
Render3D->RenderFrame();
}
void CReal3D::EndFrame(void)
{
error = false; // clear error (just needs to be done once per frame)
status &= ~2;
Render3D->EndFrame();
}
@ -528,7 +635,12 @@ void CReal3D::StoreTexture(unsigned xPos, unsigned yPos, unsigned width, unsigne
for (yy = 0; yy < 8; yy++)
{
for (xx = 0; xx < 8; xx++)
{
if (g_Config.gpuMultiThreaded)
MARK_DIRTY(textureRAMDirty, destOffset * 2);
textureRAM[destOffset++] = texData[decode[(yy*8+xx)^1]];
}
destOffset += 2048-8; // next line
}
texData += 8*8; // next tile
@ -554,7 +666,11 @@ void CReal3D::StoreTexture(unsigned xPos, unsigned yPos, unsigned width, unsigne
{
for (xx = 0; xx < 8; xx += 2)
{
if (g_Config.gpuMultiThreaded)
MARK_DIRTY(textureRAMDirty, destOffset * 2);
textureRAM[destOffset++] = texData[decode[(yy^1)*8+((xx+0)^1)]/2]>>8;
if (g_Config.gpuMultiThreaded)
MARK_DIRTY(textureRAMDirty, destOffset * 2);
textureRAM[destOffset++] = texData[decode[(yy^1)*8+((xx+1)^1)]/2]&0xFF;
}
@ -564,6 +680,21 @@ void CReal3D::StoreTexture(unsigned xPos, unsigned yPos, unsigned width, unsigne
}
}
}
// Signal to renderer that textures have changed
// TO-DO: mipmaps? What if a game writes non-mipmap textures to mipmap area?
if (g_Config.gpuMultiThreaded)
{
// If multi-threaded, then queue calls to UploadTextures for render thread to perform at beginning of next frame
QueuedUploadTextures upl;
upl.x = xPos;
upl.y = yPos;
upl.width = width;
upl.height = height;
queuedUploadTextures.push_back(upl);
}
else
Render3D->UploadTextures(xPos, yPos, width, height);
}
// Texture data will be in little endian format
@ -651,11 +782,6 @@ void CReal3D::UploadTexture(UINT32 header, UINT16 *texData)
//printf("unknown texture format %02X\n", header>>24);
break;
}
// Signal to renderer that textures have changed
// TO-DO: mipmaps? What if a game writes non-mipmap textures to mipmap area?
//Render3D->UploadTextures(x,y,width,height);
Render3D->UploadTextures(0,0,2048,2048); // TO-DO: should not have to upload all 2048x2048 texels
}
@ -736,16 +862,22 @@ void CReal3D::WriteTexturePort(unsigned reg, UINT32 data)
void CReal3D::WriteLowCullingRAM(UINT32 addr, UINT32 data)
{
if (g_Config.gpuMultiThreaded)
MARK_DIRTY(cullingRAMLoDirty, addr);
cullingRAMLo[addr/4] = data;
}
void CReal3D::WriteHighCullingRAM(UINT32 addr, UINT32 data)
{
if (g_Config.gpuMultiThreaded)
MARK_DIRTY(cullingRAMHiDirty, addr);
cullingRAMHi[addr/4] = data;
}
void CReal3D::WritePolygonRAM(UINT32 addr, UINT32 data)
{
if (g_Config.gpuMultiThreaded)
MARK_DIRTY(polyRAMDirty, addr);
polyRAM[addr/4] = data;
}
@ -807,6 +939,10 @@ void CReal3D::Reset(void)
error = false;
commandPortWritten = false;
commandPortWrittenRO = false;
queuedUploadTextures.clear();
queuedUploadTexturesRO.clear();
fifoIdx = 0;
status = 0;
@ -817,7 +953,8 @@ void CReal3D::Reset(void)
dmaStatus = 0;
dmaUnknownReg = 0;
memset(memoryPool, 0, MEMORY_POOL_SIZE);
unsigned memSize = (g_Config.gpuMultiThreaded ? MEMORY_POOL_SIZE : MEM_POOL_SIZE_RW);
memset(memoryPool, 0, memSize);
DebugLog("Real3D reset\n");
}
@ -830,8 +967,15 @@ void CReal3D::Reset(void)
void CReal3D::AttachRenderer(CRender3D *Render3DPtr)
{
Render3D = Render3DPtr;
Render3D->AttachMemory(cullingRAMLo,cullingRAMHi,polyRAM,vrom,textureRAM);
// If multi-threaded, attach read-only snapshots to renderer instead of real ones
if (g_Config.gpuMultiThreaded)
Render3D->AttachMemory(cullingRAMLoRO, cullingRAMHiRO, polyRAMRO, vrom, textureRAMRO);
else
Render3D->AttachMemory(cullingRAMLo, cullingRAMHi, polyRAM, vrom, textureRAM);
Render3D->SetStep(step);
DebugLog("Real3D attached a Render3D object\n");
}
@ -859,7 +1003,8 @@ void CReal3D::SetStep(int stepID)
bool CReal3D::Init(const UINT8 *vromPtr, CBus *BusObjectPtr, CIRQ *IRQObjectPtr, unsigned dmaIRQBit)
{
float memSizeMB = (float)MEMORY_POOL_SIZE/(float)0x100000;
unsigned memSize = (g_Config.gpuMultiThreaded ? MEMORY_POOL_SIZE : MEM_POOL_SIZE_RW);
float memSizeMB = (float)memSize/(float)0x100000;
// IRQ and bus objects
Bus = BusObjectPtr;
@ -867,20 +1012,34 @@ bool CReal3D::Init(const UINT8 *vromPtr, CBus *BusObjectPtr, CIRQ *IRQObjectPtr,
dmaIRQ = dmaIRQBit;
// Allocate all Real3D RAM regions
memoryPool = new(std::nothrow) UINT8[MEMORY_POOL_SIZE];
memoryPool = new(std::nothrow) UINT8[memSize];
if (NULL == memoryPool)
return ErrorLog("Insufficient memory for Real3D object (needs %1.1f MB).", memSizeMB);
// Set up pointers
// Set up main pointers
cullingRAMLo = (UINT32 *) &memoryPool[OFFSET_8C];
cullingRAMHi = (UINT32 *) &memoryPool[OFFSET_8E];
polyRAM = (UINT32 *) &memoryPool[OFFSET_98];
textureRAM = (UINT16 *) &memoryPool[OFFSET_TEXRAM];
textureFIFO = (UINT32 *) &memoryPool[OFFSET_TEXFIFO];
// If multi-threaded, set up pointers for read-only snapshots and dirty page arrays too
if (g_Config.gpuMultiThreaded)
{
cullingRAMLoRO = (UINT32 *) &memoryPool[OFFSET_8C_RO];
cullingRAMHiRO = (UINT32 *) &memoryPool[OFFSET_8E_RO];
polyRAMRO = (UINT32 *) &memoryPool[OFFSET_98_RO];
textureRAMRO = (UINT16 *) &memoryPool[OFFSET_TEXRAM_RO];
cullingRAMLoDirty = (UINT8 *) &memoryPool[OFFSET_8C_DIRTY];
cullingRAMHiDirty = (UINT8 *) &memoryPool[OFFSET_8E_DIRTY];
polyRAMDirty = (UINT8 *) &memoryPool[OFFSET_98_DIRTY];
textureRAMDirty = (UINT8 *) &memoryPool[OFFSET_TEXRAM_DIRTY];
}
// VROM pointer passed to us
vrom = (UINT32 *) vromPtr;
DebugLog("Initialized Real3D (allocated %1.1f MB)\n", memSizeMB);
return OKAY;
}

View file

@ -29,6 +29,20 @@
#ifndef INCLUDED_REAL3D_H
#define INCLUDED_REAL3D_H
/*
* QueuedUploadTextures:
*
* When rendering is multi-threaded, this struct is used to represent a postponed
* call to CRender3D::UploadTextures that will be performed by the render thread
* at the beginning of the next frame, rather than directly in the PPC thread.
*/
struct QueuedUploadTextures
{
unsigned x;
unsigned y;
unsigned width;
unsigned height;
};
/*
* CReal3D:
@ -62,26 +76,54 @@ public:
void LoadState(CBlockFile *SaveState);
/*
* RenderFrame(void):
* BeginVBlank(void):
*
* Traverses the scene database and renders a frame. Must be called after
* BeginFrame() but before EndFrame().
* Must be called before the VBlank starts.
*/
void RenderFrame(void);
void BeginVBlank(void);
/*
* EndVBlank(void)
*
* Must be called after the VBlank finishes.
*/
void EndVBlank(void);
/*
* SyncSnapshots(void):
*
* Syncs the read-only memory snapshots with the real ones so that rendering
* of the current frame can begin in the render thread. Must be called at the
* end of each frame when both the render thread and the PPC thread have finished
* their work. If multi-threaded rendering is not enabled, then this method does
* nothing.
*/
UINT32 SyncSnapshots(void);
/*
* BeginFrame(void):
*
* Prepare to render a new frame. Must be called once per frame prior to
* drawing anything.
* Prepares to render a new frame. Must be called once per frame prior to
* drawing anything and must only access read-only snapshots and variables
* since it may be running in a separate thread.
*/
void BeginFrame(void);
/*
* RenderFrame(void):
*
* Traverses the scene database and renders a frame. Must be called after
* BeginFrame() but before EndFrame() and must only access read-only snapshots
* and variables since it may be running in a separate thread.
*/
void RenderFrame(void);
/*
* EndFrame(void):
*
* Signals the end of rendering for this frame. Must be called last during
* the frame.
* Signals the end of rendering for this frame. Must be called last during
* the frame and must only access read-only snapshots and variables since it
* may be running in a separate thread.
*/
void EndFrame(void);
@ -342,6 +384,8 @@ private:
unsigned Shift(UINT8 *data, unsigned numBits);
void StoreTexture(unsigned xPos, unsigned yPos, unsigned width, unsigned height, UINT16 *texData, unsigned bytesPerTexel);
void UploadTexture(UINT32 header, UINT16 *texData);
UINT32 UpdateSnapshots(bool copyWhole);
UINT32 UpdateSnapshot(bool copyWhole, UINT8 *src, UINT8 *dst, unsigned size, UINT8 *dirty);
// Renderer attached to the Real3D
CRender3D *Render3D;
@ -355,16 +399,32 @@ private:
bool error; // true if an error occurred this frame
// Real3D memory
UINT8 *memoryPool; // all memory allocated here
UINT32 *cullingRAMLo; // 4MB of culling RAM at 8C000000
UINT32 *cullingRAMHi; // 1MB of culling RAM at 8E000000
UINT32 *polyRAM; // 4MB of polygon RAM at 98000000
UINT16 *textureRAM; // 8MB of internal texture RAM
UINT32 *textureFIFO; // 1MB texture FIFO at 0x94000000
unsigned fifoIdx; // index into texture FIFO
UINT8 *memoryPool; // all memory allocated here
UINT32 *cullingRAMLo; // 4MB of culling RAM at 8C000000
UINT32 *cullingRAMHi; // 1MB of culling RAM at 8E000000
UINT32 *polyRAM; // 4MB of polygon RAM at 98000000
UINT16 *textureRAM; // 8MB of internal texture RAM
UINT32 *textureFIFO; // 1MB texture FIFO at 0x94000000
unsigned fifoIdx; // index into texture FIFO
UINT32 vromTextureAddr; // VROM texture port address data
UINT32 vromTextureHeader; // VROM texture port header data
// Read-only snapshots
UINT32 *cullingRAMLoRO; // 4MB of culling RAM at 8C000000 [read-only snapshot]
UINT32 *cullingRAMHiRO; // 1MB of culling RAM at 8E000000 [read-only snapshot]
UINT32 *polyRAMRO; // 4MB of polygon RAM at 98000000 [read-only snapshot]
UINT16 *textureRAMRO; // 8MB of internal texture RAM [read-only snapshot]
// Arrays to keep track of dirty pages in memory regions
UINT8 *cullingRAMLoDirty;
UINT8 *cullingRAMHiDirty;
UINT8 *polyRAMDirty;
UINT8 *textureRAMDirty;
// Queued texture uploads
vector<QueuedUploadTextures> queuedUploadTextures;
vector<QueuedUploadTextures> queuedUploadTexturesRO; // Read-only copy of queue
// Big endian bus object for DMA memory access
CBus *Bus;
@ -383,6 +443,7 @@ private:
// Command port
bool commandPortWritten;
bool commandPortWrittenRO; // Read-only copy of flag
// Status and command registers
UINT32 status;
@ -394,7 +455,6 @@ private:
unsigned tapIDSize; // size of ID data in bits
unsigned tapTDO; // bit shifted out to TDO
int tapState; // current state
};

View file

@ -34,6 +34,23 @@
#include <cstring>
#include "Supermodel.h"
// Macros that divide memory regions into pages and mark them as dirty when they are written to
#define PAGE_WIDTH 10
#define PAGE_SIZE (1<<PAGE_WIDTH)
#define DIRTY_SIZE(arraySize) (1+(arraySize-1)/(8*PAGE_SIZE))
#define MARK_DIRTY(dirtyArray, addr) dirtyArray[addr>>(PAGE_WIDTH+3)] |= 1<<((addr>>PAGE_WIDTH)&7)
// Offsets of memory regions within TileGen memory pool
#define OFFSET_VRAM 0x000000
#define OFFSET_PAL 0x120000
#define MEM_POOL_SIZE_RW (0x120000+0x020000)
#define OFFSET_VRAM_RO 0x140000 // [read-only snapshot]
#define OFFSET_PAL_RO 0x260000 // [read-only snapshot]
#define MEM_POOL_SIZE_RO (0x120000+0x020000)
#define OFFSET_VRAM_DIRTY 0x280000
#define OFFSET_PAL_DIRTY (OFFSET_VRAM_DIRTY+DIRTY_SIZE(0x120000))
#define MEM_POOL_SIZE_DIRTY (DIRTY_SIZE(MEM_POOL_SIZE_RO))
#define MEMORY_POOL_SIZE (MEM_POOL_SIZE_RW+MEM_POOL_SIZE_RO+MEM_POOL_SIZE_DIRTY)
/******************************************************************************
Save States
@ -42,7 +59,7 @@
void CTileGen::SaveState(CBlockFile *SaveState)
{
SaveState->NewBlock("Tile Generator", __FILE__);
SaveState->Write(memoryPool, 0x100000+0x20000);
SaveState->Write(vram, 0x120000); // Don't write out palette, read-only snapshots or dirty page arrays, just VRAM
SaveState->Write(regs, sizeof(regs));
}
@ -55,16 +72,17 @@ void CTileGen::LoadState(CBlockFile *SaveState)
}
// Load memory one word at a time
for (int i = 0; i < (0x100000+0x20000); i += 4)
for (int i = 0; i < 0x120000; i += 4)
{
UINT32 data;
SaveState->Read(&data, sizeof(data));
Render2D->WriteVRAM(i, data);
*(UINT32 *) &memoryPool[i] = data;
WriteRAM(i, data);
}
SaveState->Read(regs, sizeof(regs));
// If multi-threaded, update read-only snapshots too
if (g_Config.gpuMultiThreaded)
UpdateSnapshots(true);
}
@ -72,9 +90,8 @@ void CTileGen::LoadState(CBlockFile *SaveState)
Rendering
******************************************************************************/
void CTileGen::BeginFrame(void)
void CTileGen::BeginVBlank(void)
{
Render2D->BeginFrame();
/*
printf("08: %X\n", regs[0x08/4]);
printf("0C: %X\n", regs[0x0C/4]);
@ -88,25 +105,144 @@ void CTileGen::BeginFrame(void)
*/
}
void CTileGen::EndVBlank(void)
{
//
}
UINT32 CTileGen::SyncSnapshots(void)
{
if (!g_Config.gpuMultiThreaded)
return 0;
// Update read-only snapshots
return UpdateSnapshots(false);
}
UINT32 CTileGen::UpdateSnapshot(bool copyWhole, UINT8 *src, UINT8 *dst, unsigned size, UINT8 *dirty)
{
unsigned dirtySize = DIRTY_SIZE(size);
if (copyWhole)
{
// If updating whole region, then just copy all data in one go
memcpy(dst, src, size);
memset(dirty, 0, dirtySize);
return size;
}
else
{
// Otherwise, loop through dirty pages array to find out what needs to be updated and copy only those parts
UINT32 copied = 0;
UINT8 *pSrc = src;
UINT8 *pDst = dst;
for (unsigned i = 0; i < dirtySize; i++)
{
UINT8 d = dirty[i];
if (d)
{
for (unsigned j = 0; j < 8; j++)
{
if (d&1)
{
// If not at very end of region, then copy an extra 4 bytes to allow for a possible 32-bit overlap
UINT32 toCopy = (i < dirtySize - 1 || j < 7 ? PAGE_SIZE + 4 : PAGE_SIZE);
memcpy(pDst, pSrc, toCopy);
copied += toCopy;
}
d >>= 1;
pSrc += PAGE_SIZE;
pDst += PAGE_SIZE;
}
dirty[i] = 0;
}
else
{
pSrc += 8 * PAGE_SIZE;
pDst += 8 * PAGE_SIZE;
}
}
return copied;
}
}
UINT32 CTileGen::UpdateSnapshots(bool copyWhole)
{
// Update all memory region snapshots
UINT32 palCopied = UpdateSnapshot(copyWhole, (UINT8*)pal, (UINT8*)palRO, 0x020000, palDirty);
UINT32 vramCopied = UpdateSnapshot(copyWhole, (UINT8*)vram, (UINT8*)vramRO, 0x120000, vramDirty);
memcpy(regsRO, regs, sizeof(regs)); // Always copy whole of regs buffer
//printf("TileGen copied - pal:%4uK, vram:%4uK, regs:%uK\n", palCopied / 1024, vramCopied / 1024, sizeof(regs) / 1024);
return palCopied + vramCopied + sizeof(regs);
}
void CTileGen::BeginFrame(void)
{
// NOTE: Render2D->WriteVRAM(addr, data) is no longer being called for RAM addresses that are written
// to and instead this class relies upon the fact that Render2D currently marks everything as dirty
// with every frame. If this were to change in the future then code to handle marking the correct
// parts of the renderer as dirty would need to be added here.
Render2D->BeginFrame();
}
void CTileGen::EndFrame(void)
{
Render2D->EndFrame();
}
/******************************************************************************
Emulation Functions
******************************************************************************/
UINT32 CTileGen::ReadRAM(unsigned addr)
{
return *(UINT32 *) &memoryPool[addr];
return *(UINT32 *) &vram[addr];
}
void CTileGen::WriteRAM(unsigned addr, UINT32 data)
{
Render2D->WriteVRAM(addr,data); // inform renderer of update first
*(UINT32 *) &memoryPool[addr] = data;
if (g_Config.gpuMultiThreaded)
MARK_DIRTY(vramDirty, addr);
*(UINT32 *) &vram[addr] = data;
// Update palette if required
if (addr >= 0x100000)
{
addr -= 0x100000;
unsigned color = addr/4; // color index
if (g_Config.gpuMultiThreaded)
MARK_DIRTY(palDirty, addr);
WritePalette(color, data);
}
}
void CTileGen::InitPalette(void)
{
for (int i = 0; i < 0x20000/4; i++)
{
WritePalette(i, vram[0x100000/4 + i]);
if (g_Config.gpuMultiThreaded)
palRO[i] = pal[i];
}
}
void CTileGen::WritePalette(unsigned color, UINT32 data)
{
UINT8 r, g, b, a;
a = 0xFF * ((data>>15)&1); // decode the RGBA (make alpha 0xFF or 0x00)
a = ~a; // invert it (set on Model 3 means clear pixel)
if ((data&0x8000))
r = g = b = 0;
else
{
b = (data>>7)&0xF8;
g = (data>>2)&0xF8;
r = (data<<3)&0xF8;
}
pal[color] = (a<<24)|(b<<16)|(g<<8)|r;
}
void CTileGen::WriteRegister(unsigned reg, UINT32 data)
@ -136,8 +272,13 @@ void CTileGen::WriteRegister(unsigned reg, UINT32 data)
void CTileGen::Reset(void)
{
unsigned memSize = (g_Config.gpuMultiThreaded ? MEMORY_POOL_SIZE : MEM_POOL_SIZE_RW);
memset(memoryPool, 0, memSize);
memset(regs, 0, sizeof(regs));
memset(memoryPool, 0, 0x120000);
memset(regsRO, 0, sizeof(regsRO));
InitPalette();
DebugLog("Tile Generator reset\n");
}
@ -149,22 +290,48 @@ void CTileGen::Reset(void)
void CTileGen::AttachRenderer(CRender2D *Render2DPtr)
{
Render2D = Render2DPtr;
Render2D->AttachVRAM(memoryPool);
Render2D->AttachRegisters(regs);
// If multi-threaded, attach read-only snapshots to renderer instead of real ones
if (g_Config.gpuMultiThreaded)
{
Render2D->AttachVRAM(vramRO);
Render2D->AttachPalette(palRO);
Render2D->AttachRegisters(regsRO);
}
else
{
Render2D->AttachVRAM(vram);
Render2D->AttachPalette(pal);
Render2D->AttachRegisters(regs);
}
DebugLog("Tile Generator attached a Render2D object\n");
}
#define MEMORY_POOL_SIZE 0x120000
bool CTileGen::Init(CIRQ *IRQObjectPtr)
{
float memSizeMB = (float)MEMORY_POOL_SIZE/(float)0x100000;
unsigned memSize = (g_Config.gpuMultiThreaded ? MEMORY_POOL_SIZE : MEM_POOL_SIZE_RW);
float memSizeMB = (float)memSize/(float)0x100000;
// Allocate all memory for ROMs and PPC RAM
memoryPool = new(std::nothrow) UINT8[MEMORY_POOL_SIZE];
// Allocate all memory for all TileGen RAM regions
memoryPool = new(std::nothrow) UINT8[memSize];
if (NULL == memoryPool)
return ErrorLog("Insufficient memory for tile generator object (needs %1.1f MB).", memSizeMB);
// Set up main pointers
vram = (UINT8 *) &memoryPool[OFFSET_VRAM];
pal = (UINT32 *) &memoryPool[OFFSET_PAL];
// If multi-threaded, set up pointers for read-only snapshots and dirty page arrays too
if (g_Config.gpuMultiThreaded)
{
vramRO = (UINT8 *) &memoryPool[OFFSET_VRAM_RO];
palRO = (UINT32 *) &memoryPool[OFFSET_PAL_RO];
vramDirty = (UINT8 *) &memoryPool[OFFSET_VRAM_DIRTY];
palDirty = (UINT8 *) &memoryPool[OFFSET_PAL_DIRTY];
}
// Hook up the IRQ controller
IRQ = IRQObjectPtr;

View file

@ -58,19 +58,46 @@ public:
*/
void LoadState(CBlockFile *SaveState);
/*
* BeginVBlank(void):
*
* Must be called before the VBlank starts.
*/
void BeginVBlank(void);
/*
* EndVBlank(void)
*
* Must be called after the VBlank finishes.
*/
void EndVBlank(void);
/*
* SyncSnapshots(void):
*
* Syncs the read-only memory snapshots with the real ones so that rendering
* of the current frame can begin in the render thread. Must be called at the
* end of each frame when both the render thread and the PPC thread have finished
* their work. If multi-threaded rendering is not enabled, then this method does
* nothing.
*/
UINT32 SyncSnapshots(void);
/*
* BeginFrame(void):
*
* Prepare to render a new frame. Must be called once per frame prior to
* drawing anything.
* Prepares to render a new frame. Must be called once per frame prior to
* drawing anything and must only access read-only snapshots and variables
* since it may be running in a separate thread.
*/
void BeginFrame(void);
/*
* EndFrame(void):
*
* Signals the end of rendering for this frame. Must be called last during
* the frame.
* Signals the end of rendering for this frame. Must be called last during
* the frame and must only access read-only snapshots and variables since it
* may be running in a separate thread.
*/
void EndFrame(void);
@ -163,14 +190,32 @@ public:
~CTileGen(void);
private:
// Private member functions
void InitPalette(void);
void WritePalette(unsigned color, UINT32 data);
UINT32 UpdateSnapshots(bool copyWhole);
UINT32 UpdateSnapshot(bool copyWhole, UINT8 *src, UINT8 *dst, unsigned size, UINT8 *dirty);
CIRQ *IRQ; // IRQ controller the tile generator is attached to
CRender2D *Render2D; // 2D renderer the tile generator is attached to
// Tile generator VRAM
UINT8 *memoryPool; // all memory allocated here
UINT8 *vram; // 1.8MB of VRAM
UINT32 *pal; // 0x20000 byte (32K colors) palette
// Read-only snapshots
UINT8 *vramRO; // 1.8MB of VRAM [read-only snapshot]
UINT32 *palRO; // 0x20000 byte (32K colors) palette [read-only snapshot]
// Arrays to keep track of dirty pages in memory regions
UINT8 *vramDirty;
UINT8 *palDirty;
// Registers
UINT32 regs[64];
UINT32 regsRO[64]; // Read-only copy of registers
};

View file

@ -360,6 +360,8 @@ static void ApplySettings(CINIFile *INI, const char *section)
// Model 3
if (OKAY == INI->Get(section, "MultiThreaded", x))
g_Config.multiThreaded = x ? true : false;
if (OKAY == INI->Get(section, "GPUMultiThreaded", x))
g_Config.gpuMultiThreaded = x ? true : false;
if (OKAY == INI->Get(section, "PowerPCFrequency", x))
g_Config.SetPowerPCFrequency(x);
@ -381,7 +383,7 @@ static void ApplySettings(CINIFile *INI, const char *section)
#ifdef SUPERMODEL_WIN32
if (OKAY == INI->Get(section, "ForceFeedback", x))
g_Config.forceFeedback = x ? true : false;
#endif
#endif // SUPERMODEL_WIN32
// OSD
INI->Get(section, "XResolution", g_Config.xRes);
@ -457,6 +459,7 @@ static void LogConfig(void)
// CModel3Config
InfoLog("\tMultiThreaded = %d", g_Config.multiThreaded);
InfoLog("\tGPUMultiThreaded = %d", g_Config.gpuMultiThreaded);
InfoLog("\tPowerPCFrequency = %d", g_Config.GetPowerPCFrequency());
// CSoundBoardConfig
@ -709,8 +712,9 @@ int Supermodel(const char *zipFile, CInputs *Inputs, CINIFile *CmdLine)
unsigned fpsFramesElapsed, framesElapsed;
unsigned showCrosshairs = 0; // bit 1: player 1 crosshair, bit 0: player 2
bool gameHasLightguns = false;
bool quit = 0;
bool paused = 0;
bool quit = false;
bool paused = false;
bool dumpTimings = false;
// Initialize and load ROMs
if (OKAY != Model3->Init())
@ -791,7 +795,7 @@ int Supermodel(const char *zipFile, CInputs *Inputs, CINIFile *CmdLine)
// Poll the inputs
if (!Inputs->Poll(Model3->GetGameInfo(), xOffset, yOffset, xRes, yRes))
quit = 1;
quit = true;
#ifdef SUPERMODEL_DEBUGGER
bool processUI = true;
@ -802,12 +806,12 @@ int Supermodel(const char *zipFile, CInputs *Inputs, CINIFile *CmdLine)
// Check if debugger requests exit or pause
if (Debugger->CheckExit())
{
quit = 1;
quit = true;
processUI = false;
}
else if (Debugger->CheckPause())
{
paused = 1;
paused = true;
processUI = false;
}
}
@ -819,7 +823,7 @@ int Supermodel(const char *zipFile, CInputs *Inputs, CINIFile *CmdLine)
if (Inputs->uiExit->Pressed())
{
// Quit emulator
quit = 1;
quit = true;
}
else if (Inputs->uiReset->Pressed())
{
@ -979,9 +983,12 @@ int Supermodel(const char *zipFile, CInputs *Inputs, CINIFile *CmdLine)
// Dump input states
Inputs->DumpState(Model3->GetGameInfo());
}
else if (Inputs->uiDumpTimings->Pressed())
{
dumpTimings = !dumpTimings;
}
else if (Inputs->uiSelectCrosshairs->Pressed() && gameHasLightguns)
{
showCrosshairs++;
switch ((showCrosshairs&3))
{
@ -1042,6 +1049,9 @@ int Supermodel(const char *zipFile, CInputs *Inputs, CINIFile *CmdLine)
startTicks = currentTicks;
}
}
if (dumpTimings && !paused)
Model3->DumpTimings();
}
// Make sure all threads are paused before shutting down