mirror of
https://github.com/RetroDECK/Supermodel.git
synced 2024-11-25 23:25:40 +00:00
New multi-threaded rendering changes that parallelise graphics rendering and PPC execution in order to increase performance on multi-core machines.
New gpuMultiThreaded config option to enable/disable multi-threaded rendering (enabled by default, disabling it reverts to previous behaviour). Other rendering optimisations: - texture uploads now only affect appropriate region in the texture sheet, rather than uploading the whole sheet each time - performance of clearing the model caches has been improved New Alt+O key input added to toggle outputting of frame timings for debugging purposes.
This commit is contained in:
parent
0835e38b94
commit
d1d5175548
|
@ -834,13 +834,8 @@ void CRender3D::ClearModelCache(ModelCache *Cache)
|
|||
Cache->vboCurOffset = 0;
|
||||
for (int i = 0; i < 2; i++)
|
||||
Cache->curVertIdx[i] = 0;
|
||||
if (!Cache->dynamic)
|
||||
memset(Cache->lut, 0xFF, sizeof(INT16)*Cache->lutSize); // set all to -1
|
||||
else
|
||||
{
|
||||
for (int i = 0; i < Cache->numModels; i++)
|
||||
Cache->lut[Cache->Models[i].lutIdx] = -1;
|
||||
}
|
||||
for (int i = 0; i < Cache->numModels; i++)
|
||||
Cache->lut[Cache->Models[i].lutIdx] = -1;
|
||||
|
||||
Cache->numModels = 0;
|
||||
ClearDisplayList(Cache);
|
||||
|
|
|
@ -737,24 +737,7 @@ void CRender2D::EndFrame(void)
|
|||
Emulation Callbacks
|
||||
******************************************************************************/
|
||||
|
||||
void CRender2D::WritePalette(unsigned color, UINT32 data)
|
||||
{
|
||||
UINT8 r, g, b, a;
|
||||
|
||||
a = 0xFF * ((data>>15)&1); // decode the RGBA (make alpha 0xFF or 0x00)
|
||||
a = ~a; // invert it (set on Model 3 means clear pixel)
|
||||
|
||||
if ((data&0x8000))
|
||||
r = g = b = 0;
|
||||
else
|
||||
{
|
||||
b = (data>>7)&0xF8;
|
||||
g = (data>>2)&0xF8;
|
||||
r = (data<<3)&0xF8;
|
||||
}
|
||||
|
||||
pal[color] = (a<<24)|(b<<16)|(g<<8)|r;
|
||||
}
|
||||
|
||||
void CRender2D::WriteVRAM(unsigned addr, UINT32 data)
|
||||
{
|
||||
|
@ -763,31 +746,6 @@ void CRender2D::WriteVRAM(unsigned addr, UINT32 data)
|
|||
|
||||
// For now, mark everything as dirty
|
||||
allDirty = true;
|
||||
|
||||
// Palette
|
||||
if (addr >= 0x100000)
|
||||
{
|
||||
unsigned color = (addr-0x100000)/4; // color index
|
||||
WritePalette(color, data);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* InitPalette():
|
||||
*
|
||||
* This must be called from AttachVRAM() to initialize the palette. The reason
|
||||
* is that because WriteVRAM() always compares incoming data to what is already
|
||||
* in the VRAM, there is no actual way to initialize the palette by calling
|
||||
* WriteVRAM() and passing it the initial VRAM contents. It will always fail to
|
||||
* update because nothing is being changed.
|
||||
*
|
||||
* This function fixes the transparent pixel bug that frequently occurred when
|
||||
* loading save states in Supermodel 0.1a.
|
||||
*/
|
||||
void CRender2D::InitPalette(void)
|
||||
{
|
||||
for (int i = 0; i < 0x20000/4; i++)
|
||||
WritePalette(i, vram[0x100000/4 + i]);
|
||||
}
|
||||
|
||||
|
||||
|
@ -801,14 +759,19 @@ void CRender2D::AttachRegisters(const UINT32 *regPtr)
|
|||
DebugLog("Render2D attached registers\n");
|
||||
}
|
||||
|
||||
void CRender2D::AttachPalette(const UINT32 *palPtr)
|
||||
{
|
||||
pal = palPtr;
|
||||
DebugLog("Render2D attached palette\n");
|
||||
}
|
||||
|
||||
void CRender2D::AttachVRAM(const UINT8 *vramPtr)
|
||||
{
|
||||
vram = (UINT32 *) vramPtr;
|
||||
InitPalette();
|
||||
DebugLog("Render2D attached VRAM\n");
|
||||
}
|
||||
|
||||
#define MEMORY_POOL_SIZE (512*512*4+0x20000)
|
||||
#define MEMORY_POOL_SIZE (512*512*4)
|
||||
|
||||
bool CRender2D::Init(unsigned xOffset, unsigned yOffset, unsigned xRes, unsigned yRes)
|
||||
{
|
||||
|
@ -832,7 +795,6 @@ bool CRender2D::Init(unsigned xOffset, unsigned yOffset, unsigned xRes, unsigned
|
|||
|
||||
// Set up pointers to memory regions
|
||||
surf = (UINT32 *) memoryPool;
|
||||
pal = (UINT32 *) &memoryPool[512*512*4];
|
||||
|
||||
// Resolution
|
||||
xPixels = xRes;
|
||||
|
|
|
@ -91,6 +91,8 @@ public:
|
|||
*/
|
||||
void AttachRegisters(const UINT32 *regPtr);
|
||||
|
||||
void AttachPalette(const UINT32 *palPtr);
|
||||
|
||||
/*
|
||||
* AttachVRAM(vramPtr):
|
||||
*
|
||||
|
@ -145,11 +147,10 @@ private:
|
|||
void DisplayLayer(int layerNum, GLfloat z);
|
||||
void Setup2D(void);
|
||||
void ColorOffset(GLfloat colorOffset[3], UINT32 reg);
|
||||
void WritePalette(unsigned color, UINT32 data);
|
||||
void InitPalette(void);
|
||||
|
||||
// Data received from tile generator device object
|
||||
const UINT32 *vram;
|
||||
const UINT32 *pal;
|
||||
const UINT32 *regs;
|
||||
|
||||
// OpenGL data
|
||||
|
@ -172,7 +173,6 @@ private:
|
|||
// Buffers
|
||||
UINT8 *memoryPool; // all memory is allocated here
|
||||
UINT32 *surf; // 512x512x32bpp pixel surface
|
||||
UINT32 *pal; // 0x20000 byte (32K colors) palette
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -50,10 +50,11 @@ CInputs::CInputs(CInputSystem *system) : m_system(system)
|
|||
uiMusicVolDown = AddSwitchInput("UIMusicVolDown", "Decrease Music Volume", GAME_INPUT_UI, "KEY_F9");
|
||||
uiSoundVolUp = AddSwitchInput("UISoundVolUp", "Increase Sound Volume", GAME_INPUT_UI, "KEY_F12");
|
||||
uiSoundVolDown = AddSwitchInput("UISoundVolDown", "Decrease Sound Volume", GAME_INPUT_UI, "KEY_F11");
|
||||
uiDumpInpState = AddSwitchInput("UIDumpInputState", "Dump Input State", GAME_INPUT_UI, "NONE"); // disabled for release
|
||||
uiClearNVRAM = AddSwitchInput("UIClearNVRAM", "Clear NVRAM", GAME_INPUT_UI, "KEY_ALT+KEY_N");
|
||||
uiSelectCrosshairs = AddSwitchInput("UISelectCrosshairs", "Select Crosshairs", GAME_INPUT_UI, "KEY_ALT+KEY_I");
|
||||
uiToggleFrLimit = AddSwitchInput("UIToggleFrameLimit", "Toggle Frame Limiting", GAME_INPUT_UI, "KEY_ALT+KEY_T");
|
||||
uiDumpInpState = AddSwitchInput("UIDumpInputState", "Dump Input State", GAME_INPUT_UI, "KEY_ALT+KEY_U");
|
||||
uiDumpTimings = AddSwitchInput("UIDumpTimings", "Dump Frame Timings", GAME_INPUT_UI, "KEY_ALT+KEY_O");
|
||||
#ifdef SUPERMODEL_DEBUGGER
|
||||
uiEnterDebugger = AddSwitchInput("UIEnterDebugger", "Enter Debugger", GAME_INPUT_UI, "KEY_ALT+KEY_B");
|
||||
#endif
|
||||
|
|
|
@ -101,10 +101,11 @@ public:
|
|||
CSwitchInput *uiMusicVolDown;
|
||||
CSwitchInput *uiSoundVolUp;
|
||||
CSwitchInput *uiSoundVolDown;
|
||||
CSwitchInput *uiDumpInpState;
|
||||
CSwitchInput *uiClearNVRAM;
|
||||
CSwitchInput *uiSelectCrosshairs;
|
||||
CSwitchInput *uiToggleFrLimit;
|
||||
CSwitchInput *uiDumpInpState;
|
||||
CSwitchInput *uiDumpTimings;
|
||||
#ifdef SUPERMODEL_DEBUGGER
|
||||
CSwitchInput *uiEnterDebugger;
|
||||
#endif
|
||||
|
|
|
@ -1910,6 +1910,8 @@ void CModel3::ClearNVRAM(void)
|
|||
|
||||
void CModel3::RunFrame(void)
|
||||
{
|
||||
UINT32 start = CThread::GetTicks();
|
||||
|
||||
// See if currently running multi-threaded
|
||||
if (g_Config.multiThreaded)
|
||||
{
|
||||
|
@ -1917,39 +1919,60 @@ void CModel3::RunFrame(void)
|
|||
if (!StartThreads())
|
||||
goto ThreadError;
|
||||
|
||||
// Wake threads for sound board (if sync'd) and drive board (if attached) so they can process a frame
|
||||
if (syncSndBrdThread && !sndBrdThreadSync->Post() || DriveBoard.IsAttached() && !drvBrdThreadSync->Post())
|
||||
// Wake threads for PPC main board (if multi-threading GPU), sound board (if sync'd) and drive board (if attached) so they can process a frame
|
||||
if (g_Config.gpuMultiThreaded && !ppcBrdThreadSync->Post() ||
|
||||
syncSndBrdThread && !sndBrdThreadSync->Post() ||
|
||||
DriveBoard.IsAttached() && !drvBrdThreadSync->Post())
|
||||
goto ThreadError;
|
||||
|
||||
// At the same time, process a single frame for main board (PPC) in this thread
|
||||
RunMainBoardFrame();
|
||||
// If not multi-threading GPU, then run PPC main board for a frame and sync GPUs now in this thread
|
||||
if (!g_Config.gpuMultiThreaded)
|
||||
{
|
||||
RunMainBoardFrame();
|
||||
SyncGPUs();
|
||||
}
|
||||
|
||||
// Render frame if ready to do so
|
||||
if (gpusReady)
|
||||
RenderFrame();
|
||||
|
||||
// Enter notify wait critical section
|
||||
if (!notifyLock->Lock())
|
||||
goto ThreadError;
|
||||
|
||||
// Wait for sound board and drive board threads to finish their work (if they haven't done so already)
|
||||
while (syncSndBrdThread && !sndBrdThreadDone || DriveBoard.IsAttached() && !drvBrdThreadDone)
|
||||
// Wait for PPC main board, sound board and drive board threads to finish their work (if they are running and haven't finished already)
|
||||
while (g_Config.gpuMultiThreaded && !ppcBrdThreadDone ||
|
||||
syncSndBrdThread && !sndBrdThreadDone ||
|
||||
DriveBoard.IsAttached() && !drvBrdThreadDone)
|
||||
{
|
||||
if (!notifySync->Wait(notifyLock))
|
||||
goto ThreadError;
|
||||
}
|
||||
ppcBrdThreadDone = false;
|
||||
sndBrdThreadDone = false;
|
||||
drvBrdThreadDone = false;
|
||||
|
||||
// Leave notify wait critical section
|
||||
if (!notifyLock->Unlock())
|
||||
goto ThreadError;
|
||||
|
||||
// If multi-threading GPU, then sync GPUs last while PPC main board thread is waiting
|
||||
if (g_Config.gpuMultiThreaded)
|
||||
SyncGPUs();
|
||||
}
|
||||
else
|
||||
{
|
||||
// If not multi-threaded, then just process a single frame for main board, sound board and drive board in turn in this thread
|
||||
// If not multi-threaded, then just process and render a single frame for PPC main board, sound board and drive board in turn in this thread
|
||||
RunMainBoardFrame();
|
||||
SoundBoard.RunFrame();
|
||||
SyncGPUs();
|
||||
RenderFrame();
|
||||
RunSoundBoardFrame();
|
||||
if (DriveBoard.IsAttached())
|
||||
DriveBoard.RunFrame();
|
||||
RunDriveBoardFrame();
|
||||
}
|
||||
|
||||
frameTicks = CThread::GetTicks() - start;
|
||||
|
||||
return;
|
||||
|
||||
ThreadError:
|
||||
|
@ -1957,13 +1980,129 @@ ThreadError:
|
|||
g_Config.multiThreaded = false;
|
||||
}
|
||||
|
||||
void CModel3::RunMainBoardFrame(void)
|
||||
{
|
||||
UINT32 start = CThread::GetTicks();
|
||||
|
||||
// Compute display and VBlank timings
|
||||
unsigned frameCycles = g_Config.GetPowerPCFrequency()*1000000/60;
|
||||
unsigned vblCycles = (unsigned) ((float) frameCycles * 2.5f/100.0f); // 2.5% vblank (ridiculously short and wrong but bigger values cause flicker in Daytona)
|
||||
unsigned dispCycles = frameCycles - vblCycles;
|
||||
|
||||
// VBlank
|
||||
if (gpusReady)
|
||||
{
|
||||
TileGen.BeginVBlank();
|
||||
GPU.BeginVBlank();
|
||||
IRQ.Assert(0x02);
|
||||
ppc_execute(vblCycles);
|
||||
//printf("PC=%08X LR=%08X\n", ppc_get_pc(), ppc_get_lr());
|
||||
|
||||
/*
|
||||
* Sound:
|
||||
*
|
||||
* Bit 0x20 of the MIDI control port appears to enable periodic interrupts,
|
||||
* which are used to send MIDI commands. Often games will write 0x27, send
|
||||
* a series of commands, and write 0x06 to stop. Other games, like Star
|
||||
* Wars Trilogy and Sega Rally 2, will enable interrupts at the beginning
|
||||
* by writing 0x37 and will disable/enable interrupts to control command
|
||||
* output.
|
||||
*/
|
||||
//printf("\t-- BEGIN (Ctrl=%02X, IRQEn=%02X, IRQPend=%02X) --\n", midiCtrlPort, IRQ.ReadIRQEnable()&0x40, IRQ.ReadIRQState());
|
||||
int irqCount = 0;
|
||||
while ((midiCtrlPort&0x20))
|
||||
//while (midiCtrlPort == 0x27) // 27 triggers IRQ sequence, 06 stops it
|
||||
{
|
||||
// Don't waste time firing MIDI interrupts if game has disabled them
|
||||
if ((IRQ.ReadIRQEnable()&0x40) == 0)
|
||||
break;
|
||||
|
||||
// Process MIDI interrupt
|
||||
IRQ.Assert(0x40);
|
||||
ppc_execute(200); // give PowerPC time to acknowledge IRQ
|
||||
IRQ.Deassert(0x40);
|
||||
ppc_execute(200); // acknowledge that IRQ was deasserted (TODO: is this really needed?)
|
||||
|
||||
++irqCount;
|
||||
if (irqCount > 128)
|
||||
{
|
||||
//printf("\tMIDI FIFO OVERFLOW! (IRQEn=%02X, IRQPend=%02X)\n", IRQ.ReadIRQEnable()&0x40, IRQ.ReadIRQState());
|
||||
break;
|
||||
}
|
||||
}
|
||||
//printf("\t-- END --\n");
|
||||
//printf("PC=%08X LR=%08X\n", ppc_get_pc(), ppc_get_lr());
|
||||
|
||||
// End VBlank
|
||||
GPU.EndVBlank();
|
||||
TileGen.EndVBlank();
|
||||
IRQ.Assert(0x0D);
|
||||
}
|
||||
|
||||
// Run the PowerPC for the active display part of the frame
|
||||
ppc_execute(dispCycles);
|
||||
//printf("PC=%08X LR=%08X\n", ppc_get_pc(), ppc_get_lr());
|
||||
|
||||
ppcTicks = CThread::GetTicks() - start;
|
||||
}
|
||||
|
||||
void CModel3::SyncGPUs(void)
|
||||
{
|
||||
UINT32 start = CThread::GetTicks();
|
||||
|
||||
syncSize = GPU.SyncSnapshots() + TileGen.SyncSnapshots();
|
||||
gpusReady = true;
|
||||
|
||||
syncTicks = CThread::GetTicks() - start;
|
||||
}
|
||||
|
||||
void CModel3::RenderFrame(void)
|
||||
{
|
||||
UINT32 start = CThread::GetTicks();
|
||||
|
||||
// Render frame
|
||||
TileGen.BeginFrame();
|
||||
GPU.BeginFrame();
|
||||
GPU.RenderFrame();
|
||||
GPU.EndFrame();
|
||||
TileGen.EndFrame();
|
||||
|
||||
renderTicks = CThread::GetTicks() - start;
|
||||
}
|
||||
|
||||
bool CModel3::RunSoundBoardFrame(void)
|
||||
{
|
||||
UINT32 start = CThread::GetTicks();
|
||||
|
||||
bool bufferFull = SoundBoard.RunFrame();
|
||||
|
||||
sndTicks = CThread::GetTicks() - start;
|
||||
|
||||
return bufferFull;
|
||||
}
|
||||
|
||||
void CModel3::RunDriveBoardFrame(void)
|
||||
{
|
||||
UINT32 start = CThread::GetTicks();
|
||||
|
||||
DriveBoard.RunFrame();
|
||||
|
||||
drvTicks = CThread::GetTicks() - start;
|
||||
}
|
||||
|
||||
bool CModel3::StartThreads(void)
|
||||
{
|
||||
if (startedThreads)
|
||||
return true;
|
||||
|
||||
// Create synchronization objects
|
||||
sndBrdThreadSync = CThread::CreateSemaphore(1);
|
||||
if (g_Config.gpuMultiThreaded)
|
||||
{
|
||||
ppcBrdThreadSync = CThread::CreateSemaphore(0);
|
||||
if (ppcBrdThreadSync == NULL)
|
||||
goto ThreadError;
|
||||
}
|
||||
sndBrdThreadSync = CThread::CreateSemaphore(0);
|
||||
if (sndBrdThreadSync == NULL)
|
||||
goto ThreadError;
|
||||
sndBrdNotifyLock = CThread::CreateMutex();
|
||||
|
@ -1974,7 +2113,7 @@ bool CModel3::StartThreads(void)
|
|||
goto ThreadError;
|
||||
if (DriveBoard.IsAttached())
|
||||
{
|
||||
drvBrdThreadSync = CThread::CreateSemaphore(1);
|
||||
drvBrdThreadSync = CThread::CreateSemaphore(0);
|
||||
if (drvBrdThreadSync == NULL)
|
||||
goto ThreadError;
|
||||
}
|
||||
|
@ -1985,6 +2124,14 @@ bool CModel3::StartThreads(void)
|
|||
if (notifySync == NULL)
|
||||
goto ThreadError;
|
||||
|
||||
// Create PPC main board thread, if multi-threading GPU
|
||||
if (g_Config.gpuMultiThreaded)
|
||||
{
|
||||
ppcBrdThread = CThread::CreateThread(StartMainBoardThread, this);
|
||||
if (ppcBrdThread == NULL)
|
||||
goto ThreadError;
|
||||
}
|
||||
|
||||
// Create sound board thread (sync'd or unsync'd)
|
||||
if (syncSndBrdThread)
|
||||
sndBrdThread = CThread::CreateThread(StartSoundBoardThreadSyncd, this);
|
||||
|
@ -1993,15 +2140,15 @@ bool CModel3::StartThreads(void)
|
|||
if (sndBrdThread == NULL)
|
||||
goto ThreadError;
|
||||
|
||||
// Create drive board thread (sync'd), if drive board is attached
|
||||
// Create drive board thread, if drive board is attached
|
||||
if (DriveBoard.IsAttached())
|
||||
{
|
||||
drvBrdThread = CThread::CreateThread(StartDriveBoardThreadSyncd, this);
|
||||
drvBrdThread = CThread::CreateThread(StartDriveBoardThread, this);
|
||||
if (drvBrdThread == NULL)
|
||||
goto ThreadError;
|
||||
}
|
||||
|
||||
// Set audio callback if unsync'd
|
||||
// Set audio callback if sound board thread is unsync'd
|
||||
if (!syncSndBrdThread)
|
||||
SetAudioCallback(AudioCallback, this);
|
||||
|
||||
|
@ -2026,7 +2173,7 @@ bool CModel3::PauseThreads(void)
|
|||
|
||||
// Wait for all threads to finish their processing
|
||||
pausedThreads = true;
|
||||
while (sndBrdThreadRunning || drvBrdThreadRunning)
|
||||
while (ppcBrdThreadRunning || sndBrdThreadRunning || drvBrdThreadRunning)
|
||||
{
|
||||
if (!notifySync->Wait(notifyLock))
|
||||
goto ThreadError;
|
||||
|
@ -2043,11 +2190,27 @@ ThreadError:
|
|||
return false;
|
||||
}
|
||||
|
||||
void CModel3::ResumeThreads(void)
|
||||
bool CModel3::ResumeThreads(void)
|
||||
{
|
||||
// No need to use any locking here
|
||||
if (!startedThreads)
|
||||
return true;
|
||||
|
||||
// Enter notify critical section
|
||||
if (!notifyLock->Lock())
|
||||
goto ThreadError;
|
||||
|
||||
// Let all threads know that they can continue running
|
||||
pausedThreads = false;
|
||||
return;
|
||||
|
||||
// Leave notify critical section
|
||||
if (!notifyLock->Unlock())
|
||||
goto ThreadError;
|
||||
return true;
|
||||
|
||||
ThreadError:
|
||||
ErrorLog("Threading error in CModel3::ResumeThreads: %s\nSwitching back to single-threaded mode.\n", CThread::GetLastError());
|
||||
g_Config.multiThreaded = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
void CModel3::StopThreads(void)
|
||||
|
@ -2055,7 +2218,7 @@ void CModel3::StopThreads(void)
|
|||
if (!startedThreads)
|
||||
return;
|
||||
|
||||
// If sound board not sync'd then remove callback
|
||||
// If sound board thread is unsync'd then remove audio callback
|
||||
if (!syncSndBrdThread)
|
||||
SetAudioCallback(NULL, NULL);
|
||||
|
||||
|
@ -2068,8 +2231,13 @@ void CModel3::StopThreads(void)
|
|||
|
||||
void CModel3::DeleteThreadObjects(void)
|
||||
{
|
||||
// Delete (which in turn kills) sound board and drive board threads
|
||||
// Delete (which in turn kills) PPC main board, sound board and drive board threads
|
||||
// Note that can do so here safely because threads will always be waiting on their semaphores when this method is called
|
||||
if (ppcBrdThread != NULL)
|
||||
{
|
||||
delete ppcBrdThread;
|
||||
ppcBrdThread = NULL;
|
||||
}
|
||||
if (sndBrdThread != NULL)
|
||||
{
|
||||
delete sndBrdThread;
|
||||
|
@ -2082,6 +2250,11 @@ void CModel3::DeleteThreadObjects(void)
|
|||
}
|
||||
|
||||
// Delete synchronization objects
|
||||
if (ppcBrdThreadSync != NULL)
|
||||
{
|
||||
delete ppcBrdThreadSync;
|
||||
ppcBrdThreadSync = NULL;
|
||||
}
|
||||
if (sndBrdThreadSync != NULL)
|
||||
{
|
||||
delete sndBrdThreadSync;
|
||||
|
@ -2114,9 +2287,28 @@ void CModel3::DeleteThreadObjects(void)
|
|||
}
|
||||
}
|
||||
|
||||
void CModel3::DumpTimings(void)
|
||||
{
|
||||
printf("PPC:%3ums%c render:%3ums%c sync:%4uK%c%3ums%c snd:%3ums%c drv:%3ums%c frame:%3ums%c\n",
|
||||
ppcTicks, (ppcTicks > renderTicks ? '!' : ','),
|
||||
renderTicks, (renderTicks > ppcTicks ? '!' : ','),
|
||||
syncSize / 1024, (syncSize / 1024 > 128 ? '!' : ','), syncTicks, (syncTicks > 1 ? '!' : ','),
|
||||
sndTicks, (sndTicks > 10 ? '!' : ','),
|
||||
drvTicks, (drvTicks > 10 ? '!' : ','),
|
||||
frameTicks, (frameTicks > 16 ? '!' : ' '));
|
||||
}
|
||||
|
||||
int CModel3::StartMainBoardThread(void *data)
|
||||
{
|
||||
// Call method on CModel3 to run PPC main board thread
|
||||
CModel3 *model3 = (CModel3*)data;
|
||||
model3->RunMainBoardThread();
|
||||
return 0;
|
||||
}
|
||||
|
||||
int CModel3::StartSoundBoardThread(void *data)
|
||||
{
|
||||
// Call method on CModel3 to run unsync'd sound board thread
|
||||
// Call method on CModel3 to run sound board thread (unsync'd)
|
||||
CModel3 *model3 = (CModel3*)data;
|
||||
model3->RunSoundBoardThread();
|
||||
return 0;
|
||||
|
@ -2124,20 +2316,70 @@ int CModel3::StartSoundBoardThread(void *data)
|
|||
|
||||
int CModel3::StartSoundBoardThreadSyncd(void *data)
|
||||
{
|
||||
// Call method on CModel3 to run sync'd sound board thread
|
||||
// Call method on CModel3 to run sound board thread (sync'd)
|
||||
CModel3 *model3 = (CModel3*)data;
|
||||
model3->RunSoundBoardThreadSyncd();
|
||||
return 0;
|
||||
}
|
||||
|
||||
int CModel3::StartDriveBoardThreadSyncd(void *data)
|
||||
int CModel3::StartDriveBoardThread(void *data)
|
||||
{
|
||||
// Call method on CModel3 to run sync'd drive board thread
|
||||
// Call method on CModel3 to run drive board thread
|
||||
CModel3 *model3 = (CModel3*)data;
|
||||
model3->RunDriveBoardThreadSyncd();
|
||||
model3->RunDriveBoardThread();
|
||||
return 0;
|
||||
}
|
||||
|
||||
void CModel3::RunMainBoardThread(void)
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
bool wait = true;
|
||||
while (wait)
|
||||
{
|
||||
// Wait on PPC main board thread semaphore
|
||||
if (!ppcBrdThreadSync->Wait())
|
||||
goto ThreadError;
|
||||
|
||||
// Enter notify critical section
|
||||
if (!notifyLock->Lock())
|
||||
goto ThreadError;
|
||||
|
||||
// Check threads not paused
|
||||
if (!pausedThreads)
|
||||
{
|
||||
wait = false;
|
||||
ppcBrdThreadRunning = true;
|
||||
}
|
||||
|
||||
// Leave notify critical section
|
||||
if (!notifyLock->Unlock())
|
||||
goto ThreadError;
|
||||
}
|
||||
|
||||
// Process a single frame for PPC main board
|
||||
RunMainBoardFrame();
|
||||
|
||||
// Enter notify critical section
|
||||
if (!notifyLock->Lock())
|
||||
goto ThreadError;
|
||||
|
||||
// Let other threads know processing has finished
|
||||
ppcBrdThreadRunning = false;
|
||||
ppcBrdThreadDone = true;
|
||||
if (!notifySync->SignalAll())
|
||||
goto ThreadError;
|
||||
|
||||
// Leave notify critical section
|
||||
if (!notifyLock->Unlock())
|
||||
goto ThreadError;
|
||||
}
|
||||
|
||||
ThreadError:
|
||||
ErrorLog("Threading error in RunMainBoardThread: %s\nSwitching back to single-threaded mode.\n", CThread::GetLastError());
|
||||
g_Config.multiThreaded = false;
|
||||
}
|
||||
|
||||
void CModel3::AudioCallback(void *data)
|
||||
{
|
||||
// Call method on CModel3 to wake sound board thread
|
||||
|
@ -2151,7 +2393,7 @@ void CModel3::WakeSoundBoardThread(void)
|
|||
if (!sndBrdNotifyLock->Lock())
|
||||
goto ThreadError;
|
||||
|
||||
// Signal to sound board that it should start processing again
|
||||
// Signal to sound board thread that it should start processing again
|
||||
if (!sndBrdNotifySync->Signal())
|
||||
goto ThreadError;
|
||||
|
||||
|
@ -2200,11 +2442,22 @@ void CModel3::RunSoundBoardThread(void)
|
|||
goto ThreadError;
|
||||
}
|
||||
|
||||
// Keep processing frames until audio buffer is full
|
||||
bool repeat = true;
|
||||
// NOTE - performs an unlocked read of pausedThreads here, but this is okay
|
||||
while (!pausedThreads && !SoundBoard.RunFrame())
|
||||
// Keep processing frames until paused or audio buffer is full
|
||||
while (true)
|
||||
{
|
||||
// Enter main notify critical section
|
||||
bool paused;
|
||||
if (!notifyLock->Lock())
|
||||
goto ThreadError;
|
||||
|
||||
paused = pausedThreads;
|
||||
|
||||
// Leave main notify critical section
|
||||
if (!notifyLock->Unlock())
|
||||
goto ThreadError;
|
||||
|
||||
if (paused || RunSoundBoardFrame())
|
||||
break;
|
||||
//printf("Rerunning sound board\n");
|
||||
}
|
||||
|
||||
|
@ -2256,7 +2509,7 @@ void CModel3::RunSoundBoardThreadSyncd(void)
|
|||
}
|
||||
|
||||
// Process a single frame for sound board
|
||||
SoundBoard.RunFrame();
|
||||
RunSoundBoardFrame();
|
||||
|
||||
// Enter notify critical section
|
||||
if (!notifyLock->Lock())
|
||||
|
@ -2278,7 +2531,7 @@ ThreadError:
|
|||
g_Config.multiThreaded = false;
|
||||
}
|
||||
|
||||
void CModel3::RunDriveBoardThreadSyncd(void)
|
||||
void CModel3::RunDriveBoardThread(void)
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
|
@ -2306,7 +2559,7 @@ void CModel3::RunDriveBoardThreadSyncd(void)
|
|||
}
|
||||
|
||||
// Process a single frame for drive board
|
||||
DriveBoard.RunFrame();
|
||||
RunDriveBoardFrame();
|
||||
|
||||
// Enter notify critical section
|
||||
if (!notifyLock->Lock())
|
||||
|
@ -2324,70 +2577,10 @@ void CModel3::RunDriveBoardThreadSyncd(void)
|
|||
}
|
||||
|
||||
ThreadError:
|
||||
ErrorLog("Threading error in RunDriveBoardThreadSyncd: %s\nSwitching back to single-threaded mode.\n", CThread::GetLastError());
|
||||
ErrorLog("Threading error in RunDriveBoardThread: %s\nSwitching back to single-threaded mode.\n", CThread::GetLastError());
|
||||
g_Config.multiThreaded = false;
|
||||
}
|
||||
|
||||
void CModel3::RunMainBoardFrame(void)
|
||||
{
|
||||
// Compute display and VBlank timings
|
||||
unsigned frameCycles = g_Config.GetPowerPCFrequency()*1000000/60;
|
||||
unsigned vblCycles = (unsigned) ((float) frameCycles * 2.5f/100.0f); // 2.5% vblank (ridiculously short and wrong but bigger values cause flicker in Daytona)
|
||||
unsigned dispCycles = frameCycles - vblCycles;
|
||||
|
||||
// Run the PowerPC for the active display part of the frame
|
||||
ppc_execute(dispCycles);
|
||||
//printf("PC=%08X LR=%08X\n", ppc_get_pc(), ppc_get_lr());
|
||||
|
||||
// VBlank
|
||||
TileGen.BeginFrame();
|
||||
GPU.BeginFrame();
|
||||
GPU.RenderFrame();
|
||||
IRQ.Assert(0x02);
|
||||
ppc_execute(vblCycles);
|
||||
//printf("PC=%08X LR=%08X\n", ppc_get_pc(), ppc_get_lr());
|
||||
|
||||
/*
|
||||
* Sound:
|
||||
*
|
||||
* Bit 0x20 of the MIDI control port appears to enable periodic interrupts,
|
||||
* which are used to send MIDI commands. Often games will write 0x27, send
|
||||
* a series of commands, and write 0x06 to stop. Other games, like Star
|
||||
* Wars Trilogy and Sega Rally 2, will enable interrupts at the beginning
|
||||
* by writing 0x37 and will disable/enable interrupts to control command
|
||||
* output.
|
||||
*/
|
||||
//printf("\t-- BEGIN (Ctrl=%02X, IRQEn=%02X, IRQPend=%02X) --\n", midiCtrlPort, IRQ.ReadIRQEnable()&0x40, IRQ.ReadIRQState());
|
||||
int irqCount = 0;
|
||||
while ((midiCtrlPort&0x20))
|
||||
//while (midiCtrlPort == 0x27) // 27 triggers IRQ sequence, 06 stops it
|
||||
{
|
||||
// Don't waste time firing MIDI interrupts if game has disabled them
|
||||
if ((IRQ.ReadIRQEnable()&0x40) == 0)
|
||||
break;
|
||||
|
||||
// Process MIDI interrupt
|
||||
IRQ.Assert(0x40);
|
||||
ppc_execute(200); // give PowerPC time to acknowledge IRQ
|
||||
IRQ.Deassert(0x40);
|
||||
ppc_execute(200); // acknowledge that IRQ was deasserted (TODO: is this really needed?)
|
||||
|
||||
++irqCount;
|
||||
if (irqCount > 128)
|
||||
{
|
||||
//printf("\tMIDI FIFO OVERFLOW! (IRQEn=%02X, IRQPend=%02X)\n", IRQ.ReadIRQEnable()&0x40, IRQ.ReadIRQState());
|
||||
break;
|
||||
}
|
||||
}
|
||||
//printf("\t-- END --\n");
|
||||
//printf("PC=%08X LR=%08X\n", ppc_get_pc(), ppc_get_lr());
|
||||
|
||||
// End frame
|
||||
GPU.EndFrame();
|
||||
TileGen.EndFrame();
|
||||
IRQ.Assert(0x0D);
|
||||
}
|
||||
|
||||
void CModel3::Reset(void)
|
||||
{
|
||||
// Clear memory (but do not modify backup RAM!)
|
||||
|
@ -2423,6 +2616,15 @@ void CModel3::Reset(void)
|
|||
if (DriveBoard.IsAttached())
|
||||
DriveBoard.Reset();
|
||||
|
||||
gpusReady = false;
|
||||
ppcTicks = 0;
|
||||
syncSize = 0;
|
||||
syncTicks = 0;
|
||||
renderTicks = 0;
|
||||
sndTicks = 0;
|
||||
drvTicks = 0;
|
||||
frameTicks = 0;
|
||||
|
||||
DebugLog("Model 3 reset\n");
|
||||
}
|
||||
|
||||
|
@ -2964,13 +3166,17 @@ CModel3::CModel3(void)
|
|||
|
||||
startedThreads = false;
|
||||
pausedThreads = false;
|
||||
ppcBrdThread = NULL;
|
||||
sndBrdThread = NULL;
|
||||
drvBrdThread = NULL;
|
||||
ppcBrdThreadRunning = false;
|
||||
ppcBrdThreadDone = false;
|
||||
sndBrdThreadRunning = false;
|
||||
sndBrdThreadDone = false;
|
||||
drvBrdThreadRunning = false;
|
||||
drvBrdThreadDone = false;
|
||||
syncSndBrdThread = false;
|
||||
ppcBrdThreadSync = NULL;
|
||||
sndBrdThreadSync = NULL;
|
||||
drvBrdThreadSync = NULL;
|
||||
notifyLock = NULL;
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
#ifndef INCLUDED_MODEL3_H
|
||||
#define INCLUDED_MODEL3_H
|
||||
|
||||
|
||||
/*
|
||||
* CModel3Config:
|
||||
*
|
||||
|
@ -36,7 +37,8 @@
|
|||
class CModel3Config
|
||||
{
|
||||
public:
|
||||
bool multiThreaded; // Multi-threading (enabled if true)
|
||||
bool multiThreaded; // Multi-threaded (enabled if true)
|
||||
bool gpuMultiThreaded; // Multi-threaded rendering (enabled if true)
|
||||
|
||||
// PowerPC clock frequency in MHz (minimum: 1 MHz)
|
||||
inline void SetPowerPCFrequency(unsigned f)
|
||||
|
@ -57,6 +59,7 @@ public:
|
|||
CModel3Config(void)
|
||||
{
|
||||
multiThreaded = true; // enable by default
|
||||
gpuMultiThreaded = true; // enable by default
|
||||
ppcFrequency = 50*1000000; // 50 MHz
|
||||
}
|
||||
|
||||
|
@ -313,7 +316,14 @@ public:
|
|||
*
|
||||
* Flags that any paused threads should resume running.
|
||||
*/
|
||||
void ResumeThreads(void);
|
||||
bool ResumeThreads(void);
|
||||
|
||||
/*
|
||||
* DumpTimings(void):
|
||||
*
|
||||
* Prints all timings for the most recent frame to the console, for debugging purposes.
|
||||
*/
|
||||
void DumpTimings(void);
|
||||
|
||||
/*
|
||||
* CModel3(void):
|
||||
|
@ -342,21 +352,28 @@ private:
|
|||
void WriteSystemRegister(unsigned reg, UINT8 data);
|
||||
void Patch(void);
|
||||
|
||||
void RunMainBoardFrame(void); // Runs the main board (PPC) for a frame
|
||||
void RunMainBoardFrame(void); // Runs PPC main board for a frame
|
||||
void SyncGPUs(void); // Sync's up GPUs in preparation for rendering - must be called when PPC is not running
|
||||
void RenderFrame(void); // Renders current frame
|
||||
bool RunSoundBoardFrame(void); // Runs sound board for a frame
|
||||
void RunDriveBoardFrame(void); // Runs drive board for a frame
|
||||
|
||||
bool StartThreads(void); // Starts all threads
|
||||
void StopThreads(void); // Stops all threads
|
||||
void DeleteThreadObjects(void); // Deletes all threads and synchronization objects
|
||||
|
||||
static int StartSoundBoardThread(void *data); // Callback to start unsync'd sound board thread
|
||||
static int StartSoundBoardThreadSyncd(void *data); // Callback to start sync'd sound board thread
|
||||
static int StartDriveBoardThreadSyncd(void *data); // Callback to start sync'd drive board thread
|
||||
static int StartMainBoardThread(void *data); // Callback to start PPC main board thread
|
||||
static int StartSoundBoardThread(void *data); // Callback to start sound board thread (unsync'd)
|
||||
static int StartSoundBoardThreadSyncd(void *data); // Callback to start sound board thread (sync'd)
|
||||
static int StartDriveBoardThread(void *data); // Callback to start drive board thread
|
||||
|
||||
static void AudioCallback(void *data); // Audio buffer callback
|
||||
|
||||
void WakeSoundBoardThread(void); // Used by audio callback to wake sound board thread when not sync'd with PPC thread
|
||||
void RunSoundBoardThread(void); // Runs sound board thread unsync'd with PPC thread, ie at full speed
|
||||
void RunSoundBoardThreadSyncd(void); // Runs sound board thread sync'd in step with PPC thread
|
||||
void RunDriveBoardThreadSyncd(void); // Runs drive board thread sync'd in step with PPC thread
|
||||
void RunMainBoardThread(void); // Runs PPC main board thread (sync'd in step with render thread)
|
||||
void RunSoundBoardThread(void); // Runs sound board thread (unsync'd with render thread, ie at full speed)
|
||||
void RunSoundBoardThreadSyncd(void); // Runs sound board thread (sync'd in step with render thread)
|
||||
void RunDriveBoardThread(void); // Runs drive board thread (sync'd in step with render thread)
|
||||
|
||||
// Game and hardware information
|
||||
const struct GameInfo *Game;
|
||||
|
@ -397,17 +414,22 @@ private:
|
|||
PPC_FETCH_REGION PPCFetchRegions[3];
|
||||
|
||||
// Multiple threading
|
||||
bool gpusReady; // True if GPUs are ready to render
|
||||
bool startedThreads; // True if threads have been created and started
|
||||
bool pausedThreads; // True if threads are currently paused
|
||||
bool syncSndBrdThread; // True if sound board thread should be sync'd with PPC thread
|
||||
bool syncSndBrdThread; // True if sound board thread should be sync'd in step with render thread
|
||||
CThread *ppcBrdThread; // PPC main board thread
|
||||
CThread *sndBrdThread; // Sound board thread
|
||||
CThread *drvBrdThread; // Drive board thread
|
||||
bool ppcBrdThreadRunning; // Flag to indicate PPC main board thread is currently processing
|
||||
bool ppcBrdThreadDone; // Flag to indicate PPC main board thread has finished processing
|
||||
bool sndBrdThreadRunning; // Flag to indicate sound board thread is currently processing
|
||||
bool sndBrdThreadDone; // Flag to indicate sound board thread has finished processing
|
||||
bool drvBrdThreadRunning; // Flag to indicate drive board thread is currently processing
|
||||
bool drvBrdThreadDone; // Flag to indicate drive board thread has finished processing
|
||||
|
||||
// Thread synchronization objects
|
||||
CSemaphore *ppcBrdThreadSync;
|
||||
CSemaphore *sndBrdThreadSync;
|
||||
CMutex *sndBrdNotifyLock;
|
||||
CCondVar *sndBrdNotifySync;
|
||||
|
@ -427,6 +449,15 @@ private:
|
|||
CSoundBoard SoundBoard; // Sound board
|
||||
CDSB *DSB; // Digital Sound Board (type determined dynamically at load time)
|
||||
CDriveBoard DriveBoard; // Drive board
|
||||
|
||||
// Frame timings
|
||||
UINT32 ppcTicks;
|
||||
UINT32 syncSize;
|
||||
UINT32 syncTicks;
|
||||
UINT32 renderTicks;
|
||||
UINT32 sndTicks;
|
||||
UINT32 drvTicks;
|
||||
UINT32 frameTicks;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -44,14 +44,30 @@
|
|||
#include <cstring>
|
||||
#include "Supermodel.h"
|
||||
|
||||
// Offsets of memory regions within Real3D memory pool
|
||||
#define OFFSET_8C 0 // 4 MB, culling RAM low (at 0x8C000000)
|
||||
#define OFFSET_8E 0x400000 // 1 MB, culling RAM high (at 0x8E000000)
|
||||
#define OFFSET_98 0x500000 // 4 MB, polygon RAM (at 0x98000000)
|
||||
#define OFFSET_TEXRAM 0x900000 // 8 MB, texture RAM
|
||||
#define OFFSET_TEXFIFO 0x1100000 // 1 MB, texture FIFO
|
||||
#define MEMORY_POOL_SIZE (0x400000+0x100000+0x400000+0x800000+0x100000)
|
||||
// Macros that divide memory regions into pages and mark them as dirty when they are written to
|
||||
#define PAGE_WIDTH 12
|
||||
#define PAGE_SIZE (1<<PAGE_WIDTH)
|
||||
#define DIRTY_SIZE(arraySize) (1+(arraySize-1)/(8*PAGE_SIZE))
|
||||
#define MARK_DIRTY(dirtyArray, addr) dirtyArray[addr>>(PAGE_WIDTH+3)] |= 1<<((addr>>PAGE_WIDTH)&7)
|
||||
|
||||
// Offsets of memory regions within Real3D memory pool
|
||||
#define OFFSET_8C 0x0000000 // 4 MB, culling RAM low (at 0x8C000000)
|
||||
#define OFFSET_8E 0x0400000 // 1 MB, culling RAM high (at 0x8E000000)
|
||||
#define OFFSET_98 0x0500000 // 4 MB, polygon RAM (at 0x98000000)
|
||||
#define OFFSET_TEXRAM 0x0900000 // 8 MB, texture RAM
|
||||
#define OFFSET_TEXFIFO 0x1100000 // 1 MB, texture FIFO
|
||||
#define MEM_POOL_SIZE_RW (0x400000+0x100000+0x400000+0x800000+0x100000)
|
||||
#define OFFSET_8C_RO 0x1200000 // 4 MB, culling RAM low (at 0x8C000000) [read-only snapshot]
|
||||
#define OFFSET_8E_RO 0x1600000 // 1 MB, culling RAM high (at 0x8E000000) [read-only snapshot]
|
||||
#define OFFSET_98_RO 0x1700000 // 4 MB, polygon RAM (at 0x98000000) [read-only snapshot]
|
||||
#define OFFSET_TEXRAM_RO 0x1B00000 // 8 MB, texture RAM [read-only snapshot]
|
||||
#define MEM_POOL_SIZE_RO (0x400000+0x100000+0x400000+0x800000)
|
||||
#define OFFSET_8C_DIRTY 0x2300000
|
||||
#define OFFSET_8E_DIRTY (OFFSET_8C_DIRTY+DIRTY_SIZE(0x400000))
|
||||
#define OFFSET_98_DIRTY (OFFSET_8E_DIRTY+DIRTY_SIZE(0x100000))
|
||||
#define OFFSET_TEXRAM_DIRTY (OFFSET_98_DIRTY+DIRTY_SIZE(0x400000))
|
||||
#define MEM_POOL_SIZE_DIRTY (DIRTY_SIZE(MEM_POOL_SIZE_RO))
|
||||
#define MEMORY_POOL_SIZE (MEM_POOL_SIZE_RW+MEM_POOL_SIZE_RO+MEM_POOL_SIZE_DIRTY)
|
||||
|
||||
/******************************************************************************
|
||||
Save States
|
||||
|
@ -61,7 +77,7 @@ void CReal3D::SaveState(CBlockFile *SaveState)
|
|||
{
|
||||
SaveState->NewBlock("Real3D", __FILE__);
|
||||
|
||||
SaveState->Write(memoryPool, MEMORY_POOL_SIZE);
|
||||
SaveState->Write(memoryPool, MEM_POOL_SIZE_RW); // Don't write out read-only snapshots or dirty page arrays
|
||||
SaveState->Write(&fifoIdx, sizeof(fifoIdx));
|
||||
SaveState->Write(&vromTextureAddr, sizeof(vromTextureAddr));
|
||||
SaveState->Write(&vromTextureHeader, sizeof(vromTextureHeader));
|
||||
|
@ -90,8 +106,11 @@ void CReal3D::LoadState(CBlockFile *SaveState)
|
|||
return;
|
||||
}
|
||||
|
||||
SaveState->Read(memoryPool, MEMORY_POOL_SIZE);
|
||||
Render3D->UploadTextures(0,0,2048,2048);
|
||||
SaveState->Read(memoryPool, MEM_POOL_SIZE_RW);
|
||||
// If multi-threaded, update read-only snapshots too
|
||||
if (g_Config.gpuMultiThreaded)
|
||||
UpdateSnapshots(true);
|
||||
Render3D->UploadTextures(0, 0, 2048, 2048);
|
||||
SaveState->Read(&fifoIdx, sizeof(fifoIdx));
|
||||
SaveState->Read(&vromTextureAddr, sizeof(vromTextureAddr));
|
||||
SaveState->Read(&vromTextureHeader, sizeof(vromTextureHeader));
|
||||
|
@ -117,23 +136,111 @@ void CReal3D::LoadState(CBlockFile *SaveState)
|
|||
Rendering
|
||||
******************************************************************************/
|
||||
|
||||
void CReal3D::RenderFrame(void)
|
||||
void CReal3D::BeginVBlank(void)
|
||||
{
|
||||
//if (commandPortWritten)
|
||||
Render3D->RenderFrame();
|
||||
status |= 2; // VBlank bit
|
||||
}
|
||||
|
||||
void CReal3D::EndVBlank(void)
|
||||
{
|
||||
error = false; // clear error (just needs to be done once per frame)
|
||||
status &= ~2;
|
||||
}
|
||||
|
||||
UINT32 CReal3D::SyncSnapshots(void)
|
||||
{
|
||||
// Update read-only copy of command port flag
|
||||
commandPortWrittenRO = commandPortWritten;
|
||||
commandPortWritten = false;
|
||||
|
||||
if (!g_Config.gpuMultiThreaded)
|
||||
return 0;
|
||||
|
||||
// Update read-only queue
|
||||
queuedUploadTexturesRO = queuedUploadTextures;
|
||||
queuedUploadTextures.clear();
|
||||
|
||||
// Update read-only snapshots
|
||||
return UpdateSnapshots(false);
|
||||
}
|
||||
|
||||
UINT32 CReal3D::UpdateSnapshot(bool copyWhole, UINT8 *src, UINT8 *dst, unsigned size, UINT8 *dirty)
|
||||
{
|
||||
unsigned dirtySize = DIRTY_SIZE(size);
|
||||
if (copyWhole)
|
||||
{
|
||||
// If updating whole region, then just copy all data in one go
|
||||
memcpy(dst, src, size);
|
||||
memset(dirty, 0, dirtySize);
|
||||
return size;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Otherwise, loop through dirty pages array to find out what needs to be updated and copy only those parts
|
||||
UINT32 copied = 0;
|
||||
UINT8 *pSrc = src;
|
||||
UINT8 *pDst = dst;
|
||||
for (unsigned i = 0; i < dirtySize; i++)
|
||||
{
|
||||
UINT8 d = dirty[i];
|
||||
if (d)
|
||||
{
|
||||
for (unsigned j = 0; j < 8; j++)
|
||||
{
|
||||
if (d&1)
|
||||
{
|
||||
// If not at very end of region, then copy an extra 4 bytes to allow for a possible 32-bit overlap
|
||||
UINT32 toCopy = (i < dirtySize - 1 || j < 7 ? PAGE_SIZE + 4 : PAGE_SIZE);
|
||||
memcpy(pDst, pSrc, toCopy);
|
||||
copied += toCopy;
|
||||
}
|
||||
d >>= 1;
|
||||
pSrc += PAGE_SIZE;
|
||||
pDst += PAGE_SIZE;
|
||||
}
|
||||
dirty[i] = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
pSrc += 8 * PAGE_SIZE;
|
||||
pDst += 8 * PAGE_SIZE;
|
||||
}
|
||||
}
|
||||
return copied;
|
||||
}
|
||||
}
|
||||
|
||||
UINT32 CReal3D::UpdateSnapshots(bool copyWhole)
|
||||
{
|
||||
// Update all memory region snapshots
|
||||
UINT32 cullLoCopied = UpdateSnapshot(copyWhole, (UINT8*)cullingRAMLo, (UINT8*)cullingRAMLoRO, 0x400000, cullingRAMLoDirty);
|
||||
UINT32 cullHiCopied = UpdateSnapshot(copyWhole, (UINT8*)cullingRAMHi, (UINT8*)cullingRAMHiRO, 0x100000, cullingRAMHiDirty);
|
||||
UINT32 polyCopied = UpdateSnapshot(copyWhole, (UINT8*)polyRAM, (UINT8*)polyRAMRO, 0x400000, polyRAMDirty);
|
||||
UINT32 textureCopied = UpdateSnapshot(copyWhole, (UINT8*)textureRAM, (UINT8*)textureRAMRO, 0x800000, textureRAMDirty);
|
||||
//printf("Read3D copied - cullLo:%4uK, cullHi:%4uK, poly:%4uK, texture:%4uK\n", cullLoCopied / 1024, cullHiCopied / 1024, polyCopied / 1024, textureCopied / 1024);
|
||||
return cullLoCopied + cullHiCopied + polyCopied + textureCopied;
|
||||
}
|
||||
|
||||
void CReal3D::BeginFrame(void)
|
||||
{
|
||||
status |= 2; // VBlank bit
|
||||
// If multi-threaded, perform now any queued texture uploads to renderer before rendering begins
|
||||
if (g_Config.gpuMultiThreaded)
|
||||
{
|
||||
for (vector<QueuedUploadTextures>::iterator it = queuedUploadTexturesRO.begin(), end = queuedUploadTexturesRO.end(); it != end; it++)
|
||||
Render3D->UploadTextures(it->x, it->y, it->width, it->height);
|
||||
}
|
||||
|
||||
Render3D->BeginFrame();
|
||||
}
|
||||
|
||||
void CReal3D::RenderFrame(void)
|
||||
{
|
||||
//if (commandPortWrittenRO)
|
||||
Render3D->RenderFrame();
|
||||
}
|
||||
|
||||
void CReal3D::EndFrame(void)
|
||||
{
|
||||
error = false; // clear error (just needs to be done once per frame)
|
||||
status &= ~2;
|
||||
Render3D->EndFrame();
|
||||
}
|
||||
|
||||
|
@ -528,7 +635,12 @@ void CReal3D::StoreTexture(unsigned xPos, unsigned yPos, unsigned width, unsigne
|
|||
for (yy = 0; yy < 8; yy++)
|
||||
{
|
||||
for (xx = 0; xx < 8; xx++)
|
||||
{
|
||||
if (g_Config.gpuMultiThreaded)
|
||||
MARK_DIRTY(textureRAMDirty, destOffset * 2);
|
||||
textureRAM[destOffset++] = texData[decode[(yy*8+xx)^1]];
|
||||
}
|
||||
|
||||
destOffset += 2048-8; // next line
|
||||
}
|
||||
texData += 8*8; // next tile
|
||||
|
@ -554,7 +666,11 @@ void CReal3D::StoreTexture(unsigned xPos, unsigned yPos, unsigned width, unsigne
|
|||
{
|
||||
for (xx = 0; xx < 8; xx += 2)
|
||||
{
|
||||
if (g_Config.gpuMultiThreaded)
|
||||
MARK_DIRTY(textureRAMDirty, destOffset * 2);
|
||||
textureRAM[destOffset++] = texData[decode[(yy^1)*8+((xx+0)^1)]/2]>>8;
|
||||
if (g_Config.gpuMultiThreaded)
|
||||
MARK_DIRTY(textureRAMDirty, destOffset * 2);
|
||||
textureRAM[destOffset++] = texData[decode[(yy^1)*8+((xx+1)^1)]/2]&0xFF;
|
||||
|
||||
}
|
||||
|
@ -564,6 +680,21 @@ void CReal3D::StoreTexture(unsigned xPos, unsigned yPos, unsigned width, unsigne
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Signal to renderer that textures have changed
|
||||
// TO-DO: mipmaps? What if a game writes non-mipmap textures to mipmap area?
|
||||
if (g_Config.gpuMultiThreaded)
|
||||
{
|
||||
// If multi-threaded, then queue calls to UploadTextures for render thread to perform at beginning of next frame
|
||||
QueuedUploadTextures upl;
|
||||
upl.x = xPos;
|
||||
upl.y = yPos;
|
||||
upl.width = width;
|
||||
upl.height = height;
|
||||
queuedUploadTextures.push_back(upl);
|
||||
}
|
||||
else
|
||||
Render3D->UploadTextures(xPos, yPos, width, height);
|
||||
}
|
||||
|
||||
// Texture data will be in little endian format
|
||||
|
@ -651,11 +782,6 @@ void CReal3D::UploadTexture(UINT32 header, UINT16 *texData)
|
|||
//printf("unknown texture format %02X\n", header>>24);
|
||||
break;
|
||||
}
|
||||
|
||||
// Signal to renderer that textures have changed
|
||||
// TO-DO: mipmaps? What if a game writes non-mipmap textures to mipmap area?
|
||||
//Render3D->UploadTextures(x,y,width,height);
|
||||
Render3D->UploadTextures(0,0,2048,2048); // TO-DO: should not have to upload all 2048x2048 texels
|
||||
}
|
||||
|
||||
|
||||
|
@ -736,16 +862,22 @@ void CReal3D::WriteTexturePort(unsigned reg, UINT32 data)
|
|||
|
||||
void CReal3D::WriteLowCullingRAM(UINT32 addr, UINT32 data)
|
||||
{
|
||||
if (g_Config.gpuMultiThreaded)
|
||||
MARK_DIRTY(cullingRAMLoDirty, addr);
|
||||
cullingRAMLo[addr/4] = data;
|
||||
}
|
||||
|
||||
void CReal3D::WriteHighCullingRAM(UINT32 addr, UINT32 data)
|
||||
{
|
||||
if (g_Config.gpuMultiThreaded)
|
||||
MARK_DIRTY(cullingRAMHiDirty, addr);
|
||||
cullingRAMHi[addr/4] = data;
|
||||
}
|
||||
|
||||
void CReal3D::WritePolygonRAM(UINT32 addr, UINT32 data)
|
||||
{
|
||||
if (g_Config.gpuMultiThreaded)
|
||||
MARK_DIRTY(polyRAMDirty, addr);
|
||||
polyRAM[addr/4] = data;
|
||||
}
|
||||
|
||||
|
@ -807,6 +939,10 @@ void CReal3D::Reset(void)
|
|||
error = false;
|
||||
|
||||
commandPortWritten = false;
|
||||
commandPortWrittenRO = false;
|
||||
|
||||
queuedUploadTextures.clear();
|
||||
queuedUploadTexturesRO.clear();
|
||||
|
||||
fifoIdx = 0;
|
||||
status = 0;
|
||||
|
@ -817,7 +953,8 @@ void CReal3D::Reset(void)
|
|||
dmaStatus = 0;
|
||||
dmaUnknownReg = 0;
|
||||
|
||||
memset(memoryPool, 0, MEMORY_POOL_SIZE);
|
||||
unsigned memSize = (g_Config.gpuMultiThreaded ? MEMORY_POOL_SIZE : MEM_POOL_SIZE_RW);
|
||||
memset(memoryPool, 0, memSize);
|
||||
|
||||
DebugLog("Real3D reset\n");
|
||||
}
|
||||
|
@ -830,8 +967,15 @@ void CReal3D::Reset(void)
|
|||
void CReal3D::AttachRenderer(CRender3D *Render3DPtr)
|
||||
{
|
||||
Render3D = Render3DPtr;
|
||||
Render3D->AttachMemory(cullingRAMLo,cullingRAMHi,polyRAM,vrom,textureRAM);
|
||||
|
||||
// If multi-threaded, attach read-only snapshots to renderer instead of real ones
|
||||
if (g_Config.gpuMultiThreaded)
|
||||
Render3D->AttachMemory(cullingRAMLoRO, cullingRAMHiRO, polyRAMRO, vrom, textureRAMRO);
|
||||
else
|
||||
Render3D->AttachMemory(cullingRAMLo, cullingRAMHi, polyRAM, vrom, textureRAM);
|
||||
|
||||
Render3D->SetStep(step);
|
||||
|
||||
DebugLog("Real3D attached a Render3D object\n");
|
||||
}
|
||||
|
||||
|
@ -859,7 +1003,8 @@ void CReal3D::SetStep(int stepID)
|
|||
|
||||
bool CReal3D::Init(const UINT8 *vromPtr, CBus *BusObjectPtr, CIRQ *IRQObjectPtr, unsigned dmaIRQBit)
|
||||
{
|
||||
float memSizeMB = (float)MEMORY_POOL_SIZE/(float)0x100000;
|
||||
unsigned memSize = (g_Config.gpuMultiThreaded ? MEMORY_POOL_SIZE : MEM_POOL_SIZE_RW);
|
||||
float memSizeMB = (float)memSize/(float)0x100000;
|
||||
|
||||
// IRQ and bus objects
|
||||
Bus = BusObjectPtr;
|
||||
|
@ -867,20 +1012,34 @@ bool CReal3D::Init(const UINT8 *vromPtr, CBus *BusObjectPtr, CIRQ *IRQObjectPtr,
|
|||
dmaIRQ = dmaIRQBit;
|
||||
|
||||
// Allocate all Real3D RAM regions
|
||||
memoryPool = new(std::nothrow) UINT8[MEMORY_POOL_SIZE];
|
||||
memoryPool = new(std::nothrow) UINT8[memSize];
|
||||
if (NULL == memoryPool)
|
||||
return ErrorLog("Insufficient memory for Real3D object (needs %1.1f MB).", memSizeMB);
|
||||
|
||||
// Set up pointers
|
||||
// Set up main pointers
|
||||
cullingRAMLo = (UINT32 *) &memoryPool[OFFSET_8C];
|
||||
cullingRAMHi = (UINT32 *) &memoryPool[OFFSET_8E];
|
||||
polyRAM = (UINT32 *) &memoryPool[OFFSET_98];
|
||||
textureRAM = (UINT16 *) &memoryPool[OFFSET_TEXRAM];
|
||||
textureFIFO = (UINT32 *) &memoryPool[OFFSET_TEXFIFO];
|
||||
|
||||
// If multi-threaded, set up pointers for read-only snapshots and dirty page arrays too
|
||||
if (g_Config.gpuMultiThreaded)
|
||||
{
|
||||
cullingRAMLoRO = (UINT32 *) &memoryPool[OFFSET_8C_RO];
|
||||
cullingRAMHiRO = (UINT32 *) &memoryPool[OFFSET_8E_RO];
|
||||
polyRAMRO = (UINT32 *) &memoryPool[OFFSET_98_RO];
|
||||
textureRAMRO = (UINT16 *) &memoryPool[OFFSET_TEXRAM_RO];
|
||||
cullingRAMLoDirty = (UINT8 *) &memoryPool[OFFSET_8C_DIRTY];
|
||||
cullingRAMHiDirty = (UINT8 *) &memoryPool[OFFSET_8E_DIRTY];
|
||||
polyRAMDirty = (UINT8 *) &memoryPool[OFFSET_98_DIRTY];
|
||||
textureRAMDirty = (UINT8 *) &memoryPool[OFFSET_TEXRAM_DIRTY];
|
||||
}
|
||||
|
||||
// VROM pointer passed to us
|
||||
vrom = (UINT32 *) vromPtr;
|
||||
|
||||
DebugLog("Initialized Real3D (allocated %1.1f MB)\n", memSizeMB);
|
||||
return OKAY;
|
||||
}
|
||||
|
||||
|
|
|
@ -29,6 +29,20 @@
|
|||
#ifndef INCLUDED_REAL3D_H
|
||||
#define INCLUDED_REAL3D_H
|
||||
|
||||
/*
|
||||
* QueuedUploadTextures:
|
||||
*
|
||||
* When rendering is multi-threaded, this struct is used to represent a postponed
|
||||
* call to CRender3D::UploadTextures that will be performed by the render thread
|
||||
* at the beginning of the next frame, rather than directly in the PPC thread.
|
||||
*/
|
||||
struct QueuedUploadTextures
|
||||
{
|
||||
unsigned x;
|
||||
unsigned y;
|
||||
unsigned width;
|
||||
unsigned height;
|
||||
};
|
||||
|
||||
/*
|
||||
* CReal3D:
|
||||
|
@ -62,26 +76,54 @@ public:
|
|||
void LoadState(CBlockFile *SaveState);
|
||||
|
||||
/*
|
||||
* RenderFrame(void):
|
||||
* BeginVBlank(void):
|
||||
*
|
||||
* Traverses the scene database and renders a frame. Must be called after
|
||||
* BeginFrame() but before EndFrame().
|
||||
* Must be called before the VBlank starts.
|
||||
*/
|
||||
void RenderFrame(void);
|
||||
void BeginVBlank(void);
|
||||
|
||||
/*
|
||||
* EndVBlank(void)
|
||||
*
|
||||
* Must be called after the VBlank finishes.
|
||||
*/
|
||||
void EndVBlank(void);
|
||||
|
||||
/*
|
||||
* SyncSnapshots(void):
|
||||
*
|
||||
* Syncs the read-only memory snapshots with the real ones so that rendering
|
||||
* of the current frame can begin in the render thread. Must be called at the
|
||||
* end of each frame when both the render thread and the PPC thread have finished
|
||||
* their work. If multi-threaded rendering is not enabled, then this method does
|
||||
* nothing.
|
||||
*/
|
||||
UINT32 SyncSnapshots(void);
|
||||
|
||||
/*
|
||||
* BeginFrame(void):
|
||||
*
|
||||
* Prepare to render a new frame. Must be called once per frame prior to
|
||||
* drawing anything.
|
||||
* Prepares to render a new frame. Must be called once per frame prior to
|
||||
* drawing anything and must only access read-only snapshots and variables
|
||||
* since it may be running in a separate thread.
|
||||
*/
|
||||
void BeginFrame(void);
|
||||
|
||||
/*
|
||||
* RenderFrame(void):
|
||||
*
|
||||
* Traverses the scene database and renders a frame. Must be called after
|
||||
* BeginFrame() but before EndFrame() and must only access read-only snapshots
|
||||
* and variables since it may be running in a separate thread.
|
||||
*/
|
||||
void RenderFrame(void);
|
||||
|
||||
/*
|
||||
* EndFrame(void):
|
||||
*
|
||||
* Signals the end of rendering for this frame. Must be called last during
|
||||
* the frame.
|
||||
* Signals the end of rendering for this frame. Must be called last during
|
||||
* the frame and must only access read-only snapshots and variables since it
|
||||
* may be running in a separate thread.
|
||||
*/
|
||||
void EndFrame(void);
|
||||
|
||||
|
@ -342,6 +384,8 @@ private:
|
|||
unsigned Shift(UINT8 *data, unsigned numBits);
|
||||
void StoreTexture(unsigned xPos, unsigned yPos, unsigned width, unsigned height, UINT16 *texData, unsigned bytesPerTexel);
|
||||
void UploadTexture(UINT32 header, UINT16 *texData);
|
||||
UINT32 UpdateSnapshots(bool copyWhole);
|
||||
UINT32 UpdateSnapshot(bool copyWhole, UINT8 *src, UINT8 *dst, unsigned size, UINT8 *dirty);
|
||||
|
||||
// Renderer attached to the Real3D
|
||||
CRender3D *Render3D;
|
||||
|
@ -355,16 +399,32 @@ private:
|
|||
bool error; // true if an error occurred this frame
|
||||
|
||||
// Real3D memory
|
||||
UINT8 *memoryPool; // all memory allocated here
|
||||
UINT32 *cullingRAMLo; // 4MB of culling RAM at 8C000000
|
||||
UINT32 *cullingRAMHi; // 1MB of culling RAM at 8E000000
|
||||
UINT32 *polyRAM; // 4MB of polygon RAM at 98000000
|
||||
UINT16 *textureRAM; // 8MB of internal texture RAM
|
||||
UINT32 *textureFIFO; // 1MB texture FIFO at 0x94000000
|
||||
unsigned fifoIdx; // index into texture FIFO
|
||||
UINT8 *memoryPool; // all memory allocated here
|
||||
UINT32 *cullingRAMLo; // 4MB of culling RAM at 8C000000
|
||||
UINT32 *cullingRAMHi; // 1MB of culling RAM at 8E000000
|
||||
UINT32 *polyRAM; // 4MB of polygon RAM at 98000000
|
||||
UINT16 *textureRAM; // 8MB of internal texture RAM
|
||||
UINT32 *textureFIFO; // 1MB texture FIFO at 0x94000000
|
||||
unsigned fifoIdx; // index into texture FIFO
|
||||
UINT32 vromTextureAddr; // VROM texture port address data
|
||||
UINT32 vromTextureHeader; // VROM texture port header data
|
||||
|
||||
// Read-only snapshots
|
||||
UINT32 *cullingRAMLoRO; // 4MB of culling RAM at 8C000000 [read-only snapshot]
|
||||
UINT32 *cullingRAMHiRO; // 1MB of culling RAM at 8E000000 [read-only snapshot]
|
||||
UINT32 *polyRAMRO; // 4MB of polygon RAM at 98000000 [read-only snapshot]
|
||||
UINT16 *textureRAMRO; // 8MB of internal texture RAM [read-only snapshot]
|
||||
|
||||
// Arrays to keep track of dirty pages in memory regions
|
||||
UINT8 *cullingRAMLoDirty;
|
||||
UINT8 *cullingRAMHiDirty;
|
||||
UINT8 *polyRAMDirty;
|
||||
UINT8 *textureRAMDirty;
|
||||
|
||||
// Queued texture uploads
|
||||
vector<QueuedUploadTextures> queuedUploadTextures;
|
||||
vector<QueuedUploadTextures> queuedUploadTexturesRO; // Read-only copy of queue
|
||||
|
||||
// Big endian bus object for DMA memory access
|
||||
CBus *Bus;
|
||||
|
||||
|
@ -383,6 +443,7 @@ private:
|
|||
|
||||
// Command port
|
||||
bool commandPortWritten;
|
||||
bool commandPortWrittenRO; // Read-only copy of flag
|
||||
|
||||
// Status and command registers
|
||||
UINT32 status;
|
||||
|
@ -394,7 +455,6 @@ private:
|
|||
unsigned tapIDSize; // size of ID data in bits
|
||||
unsigned tapTDO; // bit shifted out to TDO
|
||||
int tapState; // current state
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -34,6 +34,23 @@
|
|||
#include <cstring>
|
||||
#include "Supermodel.h"
|
||||
|
||||
// Macros that divide memory regions into pages and mark them as dirty when they are written to
|
||||
#define PAGE_WIDTH 10
|
||||
#define PAGE_SIZE (1<<PAGE_WIDTH)
|
||||
#define DIRTY_SIZE(arraySize) (1+(arraySize-1)/(8*PAGE_SIZE))
|
||||
#define MARK_DIRTY(dirtyArray, addr) dirtyArray[addr>>(PAGE_WIDTH+3)] |= 1<<((addr>>PAGE_WIDTH)&7)
|
||||
|
||||
// Offsets of memory regions within TileGen memory pool
|
||||
#define OFFSET_VRAM 0x000000
|
||||
#define OFFSET_PAL 0x120000
|
||||
#define MEM_POOL_SIZE_RW (0x120000+0x020000)
|
||||
#define OFFSET_VRAM_RO 0x140000 // [read-only snapshot]
|
||||
#define OFFSET_PAL_RO 0x260000 // [read-only snapshot]
|
||||
#define MEM_POOL_SIZE_RO (0x120000+0x020000)
|
||||
#define OFFSET_VRAM_DIRTY 0x280000
|
||||
#define OFFSET_PAL_DIRTY (OFFSET_VRAM_DIRTY+DIRTY_SIZE(0x120000))
|
||||
#define MEM_POOL_SIZE_DIRTY (DIRTY_SIZE(MEM_POOL_SIZE_RO))
|
||||
#define MEMORY_POOL_SIZE (MEM_POOL_SIZE_RW+MEM_POOL_SIZE_RO+MEM_POOL_SIZE_DIRTY)
|
||||
|
||||
/******************************************************************************
|
||||
Save States
|
||||
|
@ -42,7 +59,7 @@
|
|||
void CTileGen::SaveState(CBlockFile *SaveState)
|
||||
{
|
||||
SaveState->NewBlock("Tile Generator", __FILE__);
|
||||
SaveState->Write(memoryPool, 0x100000+0x20000);
|
||||
SaveState->Write(vram, 0x120000); // Don't write out palette, read-only snapshots or dirty page arrays, just VRAM
|
||||
SaveState->Write(regs, sizeof(regs));
|
||||
}
|
||||
|
||||
|
@ -55,16 +72,17 @@ void CTileGen::LoadState(CBlockFile *SaveState)
|
|||
}
|
||||
|
||||
// Load memory one word at a time
|
||||
for (int i = 0; i < (0x100000+0x20000); i += 4)
|
||||
for (int i = 0; i < 0x120000; i += 4)
|
||||
{
|
||||
UINT32 data;
|
||||
|
||||
SaveState->Read(&data, sizeof(data));
|
||||
Render2D->WriteVRAM(i, data);
|
||||
*(UINT32 *) &memoryPool[i] = data;
|
||||
WriteRAM(i, data);
|
||||
}
|
||||
|
||||
SaveState->Read(regs, sizeof(regs));
|
||||
// If multi-threaded, update read-only snapshots too
|
||||
if (g_Config.gpuMultiThreaded)
|
||||
UpdateSnapshots(true);
|
||||
}
|
||||
|
||||
|
||||
|
@ -72,9 +90,8 @@ void CTileGen::LoadState(CBlockFile *SaveState)
|
|||
Rendering
|
||||
******************************************************************************/
|
||||
|
||||
void CTileGen::BeginFrame(void)
|
||||
void CTileGen::BeginVBlank(void)
|
||||
{
|
||||
Render2D->BeginFrame();
|
||||
/*
|
||||
printf("08: %X\n", regs[0x08/4]);
|
||||
printf("0C: %X\n", regs[0x0C/4]);
|
||||
|
@ -88,25 +105,144 @@ void CTileGen::BeginFrame(void)
|
|||
*/
|
||||
}
|
||||
|
||||
void CTileGen::EndVBlank(void)
|
||||
{
|
||||
//
|
||||
}
|
||||
|
||||
UINT32 CTileGen::SyncSnapshots(void)
|
||||
{
|
||||
if (!g_Config.gpuMultiThreaded)
|
||||
return 0;
|
||||
|
||||
// Update read-only snapshots
|
||||
return UpdateSnapshots(false);
|
||||
}
|
||||
|
||||
UINT32 CTileGen::UpdateSnapshot(bool copyWhole, UINT8 *src, UINT8 *dst, unsigned size, UINT8 *dirty)
|
||||
{
|
||||
unsigned dirtySize = DIRTY_SIZE(size);
|
||||
if (copyWhole)
|
||||
{
|
||||
// If updating whole region, then just copy all data in one go
|
||||
memcpy(dst, src, size);
|
||||
memset(dirty, 0, dirtySize);
|
||||
return size;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Otherwise, loop through dirty pages array to find out what needs to be updated and copy only those parts
|
||||
UINT32 copied = 0;
|
||||
UINT8 *pSrc = src;
|
||||
UINT8 *pDst = dst;
|
||||
for (unsigned i = 0; i < dirtySize; i++)
|
||||
{
|
||||
UINT8 d = dirty[i];
|
||||
if (d)
|
||||
{
|
||||
for (unsigned j = 0; j < 8; j++)
|
||||
{
|
||||
if (d&1)
|
||||
{
|
||||
// If not at very end of region, then copy an extra 4 bytes to allow for a possible 32-bit overlap
|
||||
UINT32 toCopy = (i < dirtySize - 1 || j < 7 ? PAGE_SIZE + 4 : PAGE_SIZE);
|
||||
memcpy(pDst, pSrc, toCopy);
|
||||
copied += toCopy;
|
||||
}
|
||||
d >>= 1;
|
||||
pSrc += PAGE_SIZE;
|
||||
pDst += PAGE_SIZE;
|
||||
}
|
||||
dirty[i] = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
pSrc += 8 * PAGE_SIZE;
|
||||
pDst += 8 * PAGE_SIZE;
|
||||
}
|
||||
}
|
||||
return copied;
|
||||
}
|
||||
}
|
||||
|
||||
UINT32 CTileGen::UpdateSnapshots(bool copyWhole)
|
||||
{
|
||||
// Update all memory region snapshots
|
||||
UINT32 palCopied = UpdateSnapshot(copyWhole, (UINT8*)pal, (UINT8*)palRO, 0x020000, palDirty);
|
||||
UINT32 vramCopied = UpdateSnapshot(copyWhole, (UINT8*)vram, (UINT8*)vramRO, 0x120000, vramDirty);
|
||||
memcpy(regsRO, regs, sizeof(regs)); // Always copy whole of regs buffer
|
||||
//printf("TileGen copied - pal:%4uK, vram:%4uK, regs:%uK\n", palCopied / 1024, vramCopied / 1024, sizeof(regs) / 1024);
|
||||
return palCopied + vramCopied + sizeof(regs);
|
||||
}
|
||||
|
||||
void CTileGen::BeginFrame(void)
|
||||
{
|
||||
// NOTE: Render2D->WriteVRAM(addr, data) is no longer being called for RAM addresses that are written
|
||||
// to and instead this class relies upon the fact that Render2D currently marks everything as dirty
|
||||
// with every frame. If this were to change in the future then code to handle marking the correct
|
||||
// parts of the renderer as dirty would need to be added here.
|
||||
|
||||
Render2D->BeginFrame();
|
||||
}
|
||||
|
||||
void CTileGen::EndFrame(void)
|
||||
{
|
||||
Render2D->EndFrame();
|
||||
}
|
||||
|
||||
|
||||
/******************************************************************************
|
||||
Emulation Functions
|
||||
******************************************************************************/
|
||||
|
||||
UINT32 CTileGen::ReadRAM(unsigned addr)
|
||||
{
|
||||
return *(UINT32 *) &memoryPool[addr];
|
||||
return *(UINT32 *) &vram[addr];
|
||||
}
|
||||
|
||||
void CTileGen::WriteRAM(unsigned addr, UINT32 data)
|
||||
{
|
||||
Render2D->WriteVRAM(addr,data); // inform renderer of update first
|
||||
*(UINT32 *) &memoryPool[addr] = data;
|
||||
if (g_Config.gpuMultiThreaded)
|
||||
MARK_DIRTY(vramDirty, addr);
|
||||
*(UINT32 *) &vram[addr] = data;
|
||||
|
||||
// Update palette if required
|
||||
if (addr >= 0x100000)
|
||||
{
|
||||
addr -= 0x100000;
|
||||
unsigned color = addr/4; // color index
|
||||
if (g_Config.gpuMultiThreaded)
|
||||
MARK_DIRTY(palDirty, addr);
|
||||
WritePalette(color, data);
|
||||
}
|
||||
}
|
||||
|
||||
void CTileGen::InitPalette(void)
|
||||
{
|
||||
for (int i = 0; i < 0x20000/4; i++)
|
||||
{
|
||||
WritePalette(i, vram[0x100000/4 + i]);
|
||||
if (g_Config.gpuMultiThreaded)
|
||||
palRO[i] = pal[i];
|
||||
}
|
||||
}
|
||||
|
||||
void CTileGen::WritePalette(unsigned color, UINT32 data)
|
||||
{
|
||||
UINT8 r, g, b, a;
|
||||
|
||||
a = 0xFF * ((data>>15)&1); // decode the RGBA (make alpha 0xFF or 0x00)
|
||||
a = ~a; // invert it (set on Model 3 means clear pixel)
|
||||
|
||||
if ((data&0x8000))
|
||||
r = g = b = 0;
|
||||
else
|
||||
{
|
||||
b = (data>>7)&0xF8;
|
||||
g = (data>>2)&0xF8;
|
||||
r = (data<<3)&0xF8;
|
||||
}
|
||||
|
||||
pal[color] = (a<<24)|(b<<16)|(g<<8)|r;
|
||||
}
|
||||
|
||||
void CTileGen::WriteRegister(unsigned reg, UINT32 data)
|
||||
|
@ -136,8 +272,13 @@ void CTileGen::WriteRegister(unsigned reg, UINT32 data)
|
|||
|
||||
void CTileGen::Reset(void)
|
||||
{
|
||||
unsigned memSize = (g_Config.gpuMultiThreaded ? MEMORY_POOL_SIZE : MEM_POOL_SIZE_RW);
|
||||
memset(memoryPool, 0, memSize);
|
||||
memset(regs, 0, sizeof(regs));
|
||||
memset(memoryPool, 0, 0x120000);
|
||||
memset(regsRO, 0, sizeof(regsRO));
|
||||
|
||||
InitPalette();
|
||||
|
||||
DebugLog("Tile Generator reset\n");
|
||||
}
|
||||
|
||||
|
@ -149,22 +290,48 @@ void CTileGen::Reset(void)
|
|||
void CTileGen::AttachRenderer(CRender2D *Render2DPtr)
|
||||
{
|
||||
Render2D = Render2DPtr;
|
||||
Render2D->AttachVRAM(memoryPool);
|
||||
Render2D->AttachRegisters(regs);
|
||||
|
||||
// If multi-threaded, attach read-only snapshots to renderer instead of real ones
|
||||
if (g_Config.gpuMultiThreaded)
|
||||
{
|
||||
Render2D->AttachVRAM(vramRO);
|
||||
Render2D->AttachPalette(palRO);
|
||||
Render2D->AttachRegisters(regsRO);
|
||||
}
|
||||
else
|
||||
{
|
||||
Render2D->AttachVRAM(vram);
|
||||
Render2D->AttachPalette(pal);
|
||||
Render2D->AttachRegisters(regs);
|
||||
}
|
||||
|
||||
DebugLog("Tile Generator attached a Render2D object\n");
|
||||
}
|
||||
|
||||
#define MEMORY_POOL_SIZE 0x120000
|
||||
|
||||
bool CTileGen::Init(CIRQ *IRQObjectPtr)
|
||||
{
|
||||
float memSizeMB = (float)MEMORY_POOL_SIZE/(float)0x100000;
|
||||
unsigned memSize = (g_Config.gpuMultiThreaded ? MEMORY_POOL_SIZE : MEM_POOL_SIZE_RW);
|
||||
float memSizeMB = (float)memSize/(float)0x100000;
|
||||
|
||||
// Allocate all memory for ROMs and PPC RAM
|
||||
memoryPool = new(std::nothrow) UINT8[MEMORY_POOL_SIZE];
|
||||
// Allocate all memory for all TileGen RAM regions
|
||||
memoryPool = new(std::nothrow) UINT8[memSize];
|
||||
if (NULL == memoryPool)
|
||||
return ErrorLog("Insufficient memory for tile generator object (needs %1.1f MB).", memSizeMB);
|
||||
|
||||
// Set up main pointers
|
||||
vram = (UINT8 *) &memoryPool[OFFSET_VRAM];
|
||||
pal = (UINT32 *) &memoryPool[OFFSET_PAL];
|
||||
|
||||
// If multi-threaded, set up pointers for read-only snapshots and dirty page arrays too
|
||||
if (g_Config.gpuMultiThreaded)
|
||||
{
|
||||
vramRO = (UINT8 *) &memoryPool[OFFSET_VRAM_RO];
|
||||
palRO = (UINT32 *) &memoryPool[OFFSET_PAL_RO];
|
||||
vramDirty = (UINT8 *) &memoryPool[OFFSET_VRAM_DIRTY];
|
||||
palDirty = (UINT8 *) &memoryPool[OFFSET_PAL_DIRTY];
|
||||
}
|
||||
|
||||
// Hook up the IRQ controller
|
||||
IRQ = IRQObjectPtr;
|
||||
|
||||
|
|
|
@ -58,19 +58,46 @@ public:
|
|||
*/
|
||||
void LoadState(CBlockFile *SaveState);
|
||||
|
||||
/*
|
||||
* BeginVBlank(void):
|
||||
*
|
||||
* Must be called before the VBlank starts.
|
||||
*/
|
||||
void BeginVBlank(void);
|
||||
|
||||
/*
|
||||
* EndVBlank(void)
|
||||
*
|
||||
* Must be called after the VBlank finishes.
|
||||
*/
|
||||
void EndVBlank(void);
|
||||
|
||||
/*
|
||||
* SyncSnapshots(void):
|
||||
*
|
||||
* Syncs the read-only memory snapshots with the real ones so that rendering
|
||||
* of the current frame can begin in the render thread. Must be called at the
|
||||
* end of each frame when both the render thread and the PPC thread have finished
|
||||
* their work. If multi-threaded rendering is not enabled, then this method does
|
||||
* nothing.
|
||||
*/
|
||||
UINT32 SyncSnapshots(void);
|
||||
|
||||
/*
|
||||
* BeginFrame(void):
|
||||
*
|
||||
* Prepare to render a new frame. Must be called once per frame prior to
|
||||
* drawing anything.
|
||||
* Prepares to render a new frame. Must be called once per frame prior to
|
||||
* drawing anything and must only access read-only snapshots and variables
|
||||
* since it may be running in a separate thread.
|
||||
*/
|
||||
void BeginFrame(void);
|
||||
|
||||
/*
|
||||
* EndFrame(void):
|
||||
*
|
||||
* Signals the end of rendering for this frame. Must be called last during
|
||||
* the frame.
|
||||
* Signals the end of rendering for this frame. Must be called last during
|
||||
* the frame and must only access read-only snapshots and variables since it
|
||||
* may be running in a separate thread.
|
||||
*/
|
||||
void EndFrame(void);
|
||||
|
||||
|
@ -163,14 +190,32 @@ public:
|
|||
~CTileGen(void);
|
||||
|
||||
private:
|
||||
// Private member functions
|
||||
void InitPalette(void);
|
||||
void WritePalette(unsigned color, UINT32 data);
|
||||
UINT32 UpdateSnapshots(bool copyWhole);
|
||||
UINT32 UpdateSnapshot(bool copyWhole, UINT8 *src, UINT8 *dst, unsigned size, UINT8 *dirty);
|
||||
|
||||
CIRQ *IRQ; // IRQ controller the tile generator is attached to
|
||||
CRender2D *Render2D; // 2D renderer the tile generator is attached to
|
||||
|
||||
// Tile generator VRAM
|
||||
UINT8 *memoryPool; // all memory allocated here
|
||||
UINT8 *vram; // 1.8MB of VRAM
|
||||
UINT32 *pal; // 0x20000 byte (32K colors) palette
|
||||
|
||||
// Read-only snapshots
|
||||
UINT8 *vramRO; // 1.8MB of VRAM [read-only snapshot]
|
||||
UINT32 *palRO; // 0x20000 byte (32K colors) palette [read-only snapshot]
|
||||
|
||||
// Arrays to keep track of dirty pages in memory regions
|
||||
UINT8 *vramDirty;
|
||||
UINT8 *palDirty;
|
||||
|
||||
// Registers
|
||||
UINT32 regs[64];
|
||||
UINT32 regsRO[64]; // Read-only copy of registers
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -360,6 +360,8 @@ static void ApplySettings(CINIFile *INI, const char *section)
|
|||
// Model 3
|
||||
if (OKAY == INI->Get(section, "MultiThreaded", x))
|
||||
g_Config.multiThreaded = x ? true : false;
|
||||
if (OKAY == INI->Get(section, "GPUMultiThreaded", x))
|
||||
g_Config.gpuMultiThreaded = x ? true : false;
|
||||
if (OKAY == INI->Get(section, "PowerPCFrequency", x))
|
||||
g_Config.SetPowerPCFrequency(x);
|
||||
|
||||
|
@ -381,7 +383,7 @@ static void ApplySettings(CINIFile *INI, const char *section)
|
|||
#ifdef SUPERMODEL_WIN32
|
||||
if (OKAY == INI->Get(section, "ForceFeedback", x))
|
||||
g_Config.forceFeedback = x ? true : false;
|
||||
#endif
|
||||
#endif // SUPERMODEL_WIN32
|
||||
|
||||
// OSD
|
||||
INI->Get(section, "XResolution", g_Config.xRes);
|
||||
|
@ -457,6 +459,7 @@ static void LogConfig(void)
|
|||
|
||||
// CModel3Config
|
||||
InfoLog("\tMultiThreaded = %d", g_Config.multiThreaded);
|
||||
InfoLog("\tGPUMultiThreaded = %d", g_Config.gpuMultiThreaded);
|
||||
InfoLog("\tPowerPCFrequency = %d", g_Config.GetPowerPCFrequency());
|
||||
|
||||
// CSoundBoardConfig
|
||||
|
@ -709,8 +712,9 @@ int Supermodel(const char *zipFile, CInputs *Inputs, CINIFile *CmdLine)
|
|||
unsigned fpsFramesElapsed, framesElapsed;
|
||||
unsigned showCrosshairs = 0; // bit 1: player 1 crosshair, bit 0: player 2
|
||||
bool gameHasLightguns = false;
|
||||
bool quit = 0;
|
||||
bool paused = 0;
|
||||
bool quit = false;
|
||||
bool paused = false;
|
||||
bool dumpTimings = false;
|
||||
|
||||
// Initialize and load ROMs
|
||||
if (OKAY != Model3->Init())
|
||||
|
@ -791,7 +795,7 @@ int Supermodel(const char *zipFile, CInputs *Inputs, CINIFile *CmdLine)
|
|||
|
||||
// Poll the inputs
|
||||
if (!Inputs->Poll(Model3->GetGameInfo(), xOffset, yOffset, xRes, yRes))
|
||||
quit = 1;
|
||||
quit = true;
|
||||
|
||||
#ifdef SUPERMODEL_DEBUGGER
|
||||
bool processUI = true;
|
||||
|
@ -802,12 +806,12 @@ int Supermodel(const char *zipFile, CInputs *Inputs, CINIFile *CmdLine)
|
|||
// Check if debugger requests exit or pause
|
||||
if (Debugger->CheckExit())
|
||||
{
|
||||
quit = 1;
|
||||
quit = true;
|
||||
processUI = false;
|
||||
}
|
||||
else if (Debugger->CheckPause())
|
||||
{
|
||||
paused = 1;
|
||||
paused = true;
|
||||
processUI = false;
|
||||
}
|
||||
}
|
||||
|
@ -819,7 +823,7 @@ int Supermodel(const char *zipFile, CInputs *Inputs, CINIFile *CmdLine)
|
|||
if (Inputs->uiExit->Pressed())
|
||||
{
|
||||
// Quit emulator
|
||||
quit = 1;
|
||||
quit = true;
|
||||
}
|
||||
else if (Inputs->uiReset->Pressed())
|
||||
{
|
||||
|
@ -979,9 +983,12 @@ int Supermodel(const char *zipFile, CInputs *Inputs, CINIFile *CmdLine)
|
|||
// Dump input states
|
||||
Inputs->DumpState(Model3->GetGameInfo());
|
||||
}
|
||||
else if (Inputs->uiDumpTimings->Pressed())
|
||||
{
|
||||
dumpTimings = !dumpTimings;
|
||||
}
|
||||
else if (Inputs->uiSelectCrosshairs->Pressed() && gameHasLightguns)
|
||||
{
|
||||
|
||||
showCrosshairs++;
|
||||
switch ((showCrosshairs&3))
|
||||
{
|
||||
|
@ -1042,6 +1049,9 @@ int Supermodel(const char *zipFile, CInputs *Inputs, CINIFile *CmdLine)
|
|||
startTicks = currentTicks;
|
||||
}
|
||||
}
|
||||
|
||||
if (dumpTimings && !paused)
|
||||
Model3->DumpTimings();
|
||||
}
|
||||
|
||||
// Make sure all threads are paused before shutting down
|
||||
|
|
Loading…
Reference in a new issue