From 816d541b98c72c8021cd4202607e063b38ab074c Mon Sep 17 00:00:00 2001
From: Ian Curtis <i.curtis@gmail.com>
Date: Sat, 22 Aug 2020 20:41:47 +0000
Subject: [PATCH] Various SCSP improvements and code cleanup: - Ported MAME's
 implementation - Corrected FM sound for songs in VF3 that use it - Music
 tempo now closer to real hardware thanks to emulating two SCSP chips. - Add
 LegacySoundDSP config option for games with SCSP DSP glitches (ex. engine
 noises in Sega Rally 2, and loud garbage on Bahn's stage in Fighting Vipers
 2) - Renamed SysFPS to "SoundClock" (since raising this appears to adjust the
 sound frequency).

(Submitted by Paul Prosser)
---
 Src/OSD/SDL/Main.cpp  |    4 +
 Src/Sound/SCSP.cpp    | 1899 ++++++++++++++++++-----------------------
 Src/Sound/SCSP.h      |    2 +-
 Src/Sound/SCSPDSP.cpp | 1194 ++++++--------------------
 Src/Sound/SCSPDSP.h   |    1 +
 Src/Sound/SCSPLFO.cpp |  294 +++----
 6 files changed, 1267 insertions(+), 2127 deletions(-)

diff --git a/Src/OSD/SDL/Main.cpp b/Src/OSD/SDL/Main.cpp
index f04d3be..4b80c10 100644
--- a/Src/OSD/SDL/Main.cpp
+++ b/Src/OSD/SDL/Main.cpp
@@ -1371,6 +1371,8 @@ static Util::Config::Node DefaultConfig()
   config.Set("EmulateDSB", true);
   config.Set("SoundVolume", "100");
   config.Set("MusicVolume", "100");
+  // Other sound options
+  config.Set("LegacySoundDSP", false); // New config option for games that do not play correctly with MAME's SCSP sound core.
   // CDriveBoard
 #ifdef SUPERMODEL_WIN32
   config.Set("ForceFeedback", false);
@@ -1469,6 +1471,8 @@ static void Help(void)
   puts("  -flip-stereo            Swap left and right audio channels");
   puts("  -no-sound               Disable sound board emulation (sound effects)");
   puts("  -no-dsb                 Disable Digital Sound Board (MPEG music)");
+  puts("  -legacy-sound           Enable ElSemi's legacy SCSP DSP emulator from 0.2a. Recommended for Sega Rally 2 as engine sound does not work with MAME's implementation.");
+  puts("  -no-legacy-sound        Disable ElSemi's legacy SCSP DSP emulator and use MAME's implementation instead");
   puts("");
 #ifdef NET_BOARD
   puts("Net Options:");
diff --git a/Src/Sound/SCSP.cpp b/Src/Sound/SCSP.cpp
index ac53d66..d4cd0c2 100644
--- a/Src/Sound/SCSP.cpp
+++ b/Src/Sound/SCSP.cpp
@@ -1,3 +1,4 @@
+
 /**
  ** Supermodel
  ** A Sega Model 3 Arcade Emulator.
@@ -49,6 +50,19 @@
 	Driven by MC68000
 */
 
+/* MAME SCSP conversion by Paul Prosser (aka Conversus W. Vans). Code by R.Belmont and ElSemi.
+Fixes made: buggy timing, better envelope processing, better FM support, a more reasonable DSP emulation and all kinds of improvements.
+But the new core isn't without its to-do list:
+- Fix low sound volume when changing music in Harley-Davidson & LA Riders (Maybe the MVOL code from MAME will fix this?)
+- Prevent music in Fighting Vipers 2 Bahn's stage from hanging with MAME's SCSP DSP core.
+- Figure out why Sega Rally 2 navigator voice can cut off sometimes (Legacy DSP)
+
+Since I (Paul) started this code overhaul in November 2019, I have also removed some obsolete features like the REVERB software effect.
+It doesn't sound good at all.
+
+Anyways credit to R. Belmont and ElSemi for the code, and for being awesome emulation Gods.
+*/
+
 #include "Supermodel.h"
 #include "Sound/SCSP.h"
 #include <cstdio>
@@ -59,27 +73,17 @@
 
 static const Util::Config::Node *s_config = 0;
 static bool s_multiThreaded = false;
+bool legacySound; // For LegacySound (SCSP DSP) config option. 
 
-//#define NEWSCSP
+#define USEDSP
 //#define RB_VOLUME
 
-//#define REVERB
-#define USEDSP
-
 #define MAX_SCSP	2
-/*#define TIMER_LIMITSA  0x101
-#define TIMER_LIMITSB  0x100
-#define TIMER_LIMITSC  0xff
-*/
 
-#define TIMER_LIMITSA  0xff
-#define TIMER_LIMITSB  0xff
-#define TIMER_LIMITSC  0xff
 
 // These globals control the operation of the SCSP, they are no longer extern and are set through SCSP_SetBuffers(). --Bart
-float SysFPS;
-//extern "C" A68KContext M68000_regs;
-//extern void __cdecl ErrorLogMessage(char *,...);
+float SoundClock; // Originally titled SysFPS; seems to be for the sound CPU.
+const float Freq = 76;
 signed short *bufferl;		
 signed short *bufferr;
 int length;
@@ -90,15 +94,11 @@ signed int *buffertmpl,*buffertmpr;	// these are allocated inside this file
 unsigned int srate=44100;
 
 
-
-#define REVERB_LEN	0x10000
-#define REVERB_DIF	6000
-#define REVERB_DIV	4
+#define ICLIP16(x) (x<-32768)?-32768:((x>32767)?32767:x)
+#define ICLIP18(x) (x<-131072)?-131072:((x>131071)?131071:x)
+
 
 
-signed short bufferrevr[REVERB_LEN];
-signed short bufferrevl[REVERB_LEN];
-unsigned int RevR,RevW;
 
 //#define _DEBUG
 
@@ -122,14 +122,17 @@ static CMutex *MIDILock;	// for safe access to the MIDI FIFOs
 static int (*Run68kCB)(int cycles);
 static void (*Int68kCB)(int irq);
 static void (*RetIntCB)();
-static DWORD IrqTimA=1;
-static DWORD IrqTimBC=2;
-static DWORD IrqMidi=3;
+static DWORD IrqTimA;
+static DWORD IrqTimBC;
+static DWORD IrqMidi;
+
+unsigned short MCIEB;
+unsigned short MCIPD;
 
 #define MIDI_STACK_SIZE			128
 #define MIDI_STACK_SIZE_MASK	(MIDI_STACK_SIZE-1)
 
-static BYTE MidiOutStack[8];
+static BYTE MidiOutStack[16];
 static BYTE MidiOutW=0,MidiOutR=0;
 static BYTE MidiStack[MIDI_STACK_SIZE];
 static BYTE MidiOutFill;
@@ -138,28 +141,27 @@ static BYTE MidiW=0,MidiR=0;
 static BYTE HasSlaveSCSP=0;
 
 static DWORD FNS_Table[0x400];
-/*static int TLTABLE[256];
-static int LPANTABLE[16];
-static int RPANTABLE[16];
-*/
+static INT32 EG_TABLE[0x400];
 
 #ifdef RB_VOLUME
-static int volume[256*4];	// precalculated attenuation values with some marging for enveloppe and pan levels
+static int volume[256 * 4];	// precalculated attenuation values with some marging for enveloppe and pan levels
 static int pan_left[32], pan_right[32];	// pan volume offsets
 #else
-static float SDLT[8]={-1000000.0,-36.0,-30.0,-24.0,-18.0,-12.0,-6.0,0.0};
+static const float SDLT[8] = { -1000000.0f,-36.0f,-30.0f,-24.0f,-18.0f,-12.0f,-6.0f,0.0f };
 static int LPANTABLE[0x10000];
 static int RPANTABLE[0x10000];
 #endif
 
+
 static int TimPris[3];
 static int TimCnt[3];
 
 #define SHIFT	12
-#define FIX(v)	((DWORD) ((float) (1<<SHIFT)*(v)))
+#define FIX(v)	((UINT32) ((float) (1<<SHIFT)*(v)))
 
+#define EG_SHIFT	16
+#define FM_DELAY    0
 
-#define EG_SHIFT	8
 
 #include "SCSPLFO.cpp"
 
@@ -168,54 +170,55 @@ static int TimCnt[3];
 	that can generate FM and PCM (from ROM/RAM) sound
 */
 //SLOT PARAMETERS
-#define KEYONEX(slot)	((slot->data[0x0]>>0x0)&0x1000)
-#define KEYONB(slot)	((slot->data[0x0]>>0x0)&0x0800)
-#define SBCTL(slot)		((slot->data[0x0]>>0x9)&0x0003)
-#define SSCTL(slot)		((slot->data[0x0]>>0x7)&0x0003)
-#define LPCTL(slot)		((slot->data[0x0]>>0x5)&0x0003)
-#define PCM8B(slot)		((slot->data[0x0]>>0x0)&0x0010)
+#define KEYONEX(slot)   ((slot->data[0x0] >> 0x0) & 0x1000)
+#define KEYONB(slot)    ((slot->data[0x0] >> 0x0) & 0x0800)
+#define SBCTL(slot)     ((slot->data[0x0] >> 0x9) & 0x0003)
+#define SSCTL(slot)     ((slot->data[0x0] >> 0x7) & 0x0003)
+#define LPCTL(slot)     ((slot->data[0x0] >> 0x5) & 0x0003)
+#define PCM8B(slot)     ((slot->data[0x0] >> 0x0) & 0x0010)
 
-#define SA(slot)		(((slot->data[0x0]&0xF)<<16)|(slot->data[0x1]))
+#define SA(slot)        (((slot->data[0x0] & 0xF) << 16) | (slot->data[0x1]))
 
-#define LSA(slot)		(slot->data[0x2])
+#define LSA(slot)       (slot->data[0x2])
 
-#define LEA(slot)		(slot->data[0x3])
+#define LEA(slot)       (slot->data[0x3])
 
-#define D2R(slot)		((slot->data[0x4]>>0xB)&0x001F)
-#define D1R(slot)		((slot->data[0x4]>>0x6)&0x001F)
-#define EGHOLD(slot)	((slot->data[0x4]>>0x0)&0x0020)
-#define AR(slot)		((slot->data[0x4]>>0x0)&0x001F)
+#define D2R(slot)       ((slot->data[0x4] >> 0xB) & 0x001F)
+#define D1R(slot)       ((slot->data[0x4] >> 0x6) & 0x001F)
+#define EGHOLD(slot)    ((slot->data[0x4] >> 0x0) & 0x0020)
+#define AR(slot)        ((slot->data[0x4] >> 0x0) & 0x001F)
 
-#define LPSLNK(slot)	((slot->data[0x5]>>0x0)&0x4000)
-#define KRS(slot)		((slot->data[0x5]>>0xA)&0x000F)
-#define DL(slot)		((slot->data[0x5]>>0x5)&0x001F)
-#define RR(slot)		((slot->data[0x5]>>0x0)&0x001F)
+#define LPSLNK(slot)    ((slot->data[0x5] >> 0x0) & 0x4000)
+#define KRS(slot)       ((slot->data[0x5] >> 0xA) & 0x000F)
+#define DL(slot)        ((slot->data[0x5] >> 0x5) & 0x001F)
+#define RR(slot)        ((slot->data[0x5] >> 0x0) & 0x001F)
 
-#define STWINH(slot)	((slot->data[0x6]>>0x0)&0x0200)
-#define SDIR(slot)		((slot->data[0x6]>>0x0)&0x0100)
-#define TL(slot)		((slot->data[0x6]>>0x0)&0x00FF)
+#define STWINH(slot)    ((slot->data[0x6] >> 0x0) & 0x0200)
+#define SDIR(slot)      ((slot->data[0x6] >> 0x0) & 0x0100)
+#define TL(slot)        ((slot->data[0x6] >> 0x0) & 0x00FF)
 
-#define MDL(slot)		((slot->data[0x7]>>0xB)&0x0007)
-#define MDXSL(slot)		((slot->data[0x7]>>0x6)&0x003F)
-#define MDYSL(slot)		((slot->data[0x7]>>0x0)&0x003F)
+#define MDL(slot)       ((slot->data[0x7] >> 0xC) & 0x001E) // A value of 30 is somehow needed for correct FM mix in VF3.
+#define MDXSL(slot)     ((slot->data[0x7] >> 0x6) & 0x003F)
+#define MDYSL(slot)     ((slot->data[0x7] >> 0x0) & 0x003F)
 
-#define OCT(slot)		((slot->data[0x8]>>0xB)&0x000F)
-#define FNS(slot)		((slot->data[0x8]>>0x0)&0x03FF)
+#define OCT(slot)       ((slot->data[0x8] >> 0xB) & 0x000F)
+#define FNS(slot)       ((slot->data[0x8] >> 0x0) & 0x03FF)
 
-#define LFORE(slot)		((slot->data[0x9]>>0x0)&0x8000)
-#define LFOF(slot)		((slot->data[0x9]>>0xA)&0x001F)
-#define PLFOWS(slot)	((slot->data[0x9]>>0x8)&0x0003)
-#define PLFOS(slot)		((slot->data[0x9]>>0x5)&0x0007)
-#define ALFOWS(slot)	((slot->data[0x9]>>0x3)&0x0003)
-#define ALFOS(slot)		((slot->data[0x9]>>0x0)&0x0007)
+#define LFORE(slot)     ((slot->data[0x9] >> 0x0) & 0x8000)
+#define LFOF(slot)      ((slot->data[0x9] >> 0xA) & 0x001F)
+#define PLFOWS(slot)    ((slot->data[0x9] >> 0x8) & 0x0003)
+#define PLFOS(slot)     ((slot->data[0x9] >> 0x5) & 0x000E) // Setting this to 14 seems to make FM more precise
+#define ALFOWS(slot)    ((slot->data[0x9] >> 0x3) & 0x0003)
+#define ALFOS(slot)     ((slot->data[0x9] >> 0x0) & 0x0007) 
 
-#define ISEL(slot)		((slot->data[0xA]>>0x3)&0x000F)
-#define IMXL(slot)		((slot->data[0xA]>>0x0)&0x0007)
+#define ISEL(slot)      ((slot->data[0xA] >> 0x3) & 0x000F)
+#define IMXL(slot)      ((slot->data[0xA] >> 0x0) & 0x0007)
+
+#define DISDL(slot)     ((slot->data[0xB] >> 0xD) & 0x0007)
+#define DIPAN(slot)     ((slot->data[0xB] >> 0x8) & 0x001F)
+#define EFSDL(slot)     ((slot->data[0xB] >> 0x5) & 0x0007)
+#define EFPAN(slot)     ((slot->data[0xB] >> 0x0) & 0x001F)
 
-#define DISDL(slot)		((slot->data[0xB]>>0xD)&0x0007)
-#define DIPAN(slot)		((slot->data[0xB]>>0x8)&0x001F)
-#define EFSDL(slot)		((slot->data[0xB]>>0x5)&0x0007)
-#define EFPAN(slot)		((slot->data[0xB]>>0x0)&0x001F)
 
 //Envelope step (fixed point)
 int ARTABLE[64],DRTABLE[64];
@@ -257,7 +260,8 @@ struct _SLOT
 	};
 	BYTE active;	//this slot is currently playing
 	BYTE *base;		//samples base address
-	DWORD cur_addr;	//current play address (24.8)
+	DWORD cur_addr;
+	DWORD nxt_addr;	//current play address (24.8)
 	DWORD step;		//pitch step (24.8)
 	BYTE Back;
 	_EG EG;			//Envelope
@@ -291,6 +295,8 @@ struct _SLOT
 #define SCITMA	6
 #define SCITMB	7
 
+bool HasMVOL;
+
 struct _SCSP
 {
 	union
@@ -301,15 +307,23 @@ struct _SCSP
 	_SLOT Slots[32];
 	signed short RINGBUF[64];
 	unsigned char BUFPTR;
-	unsigned char *SCSPRAM;	
+#if FM_DELAY
+	signed short DELAYBUF[FM_DELAY];
+	BYTE DELAYPTR;
+#endif
+	unsigned char *SCSPRAM;
+	UINT32 SCSPRAM_LENGTH;
 	char Master;
 #ifdef USEDSP
 	_SCSPDSP DSP;
 	signed short *MIXBuf;
 #endif
+
+	int ARTABLE[64], DRTABLE[64];
 } SCSPs[MAX_SCSP],*SCSP=SCSPs;
 
-signed short *RBUFDST;	//this points to where the sample will be stored in the RingBuf
+static signed short *RBUFDST;	//this points to where the sample will be stored in the RingBuf
+
 
 unsigned char DecodeSCI(unsigned char irq)
 {
@@ -324,17 +338,12 @@ unsigned char DecodeSCI(unsigned char irq)
 	return SCI;
 }
 
+
+
 void CheckPendingIRQ()
 {
-	DWORD pend=SCSP[0].data[0x20/2];
-	DWORD en=SCSP[0].data[0x1e/2];
-	/*if(pend&0x8)
-		if(en&0x8)
-		{
-			Int68kCB(IrqMidi);
-			return;
-		}
-	*/
+	DWORD pend=SCSPs->data[0x20/2];
+	DWORD en=SCSPs->data[0x1e/2];
 	
 	/*
 	 * MIDI FIFO critical section
@@ -351,10 +360,13 @@ void CheckPendingIRQ()
 		//	MIDILock->Unlock();
 		
 		//SCSP.data[0x20/2]|=0x8;	//Hold midi line while there are commands pending
-		Int68kCB(IrqMidi);
+
+		//Int68kCB(IrqMidi);
 		//printf("68K: MIDI IRQ\n");
 		//ErrorLogMessage("Midi");
-		return;
+		
+		SCSP->data[0x20 / 2] |= 8;
+		pend |= 8;
 	}
 	
 	//if (g_Config.multiThreaded)
@@ -383,9 +395,38 @@ void CheckPendingIRQ()
 			//ErrorLogMessage("TimC");
 			return;
 		}
+	if(pend&0x8)
+	if(en&0x8)
+	{
+		Int68kCB(IrqMidi);
+		SCSP->data[0x20 / 2] &= ~8;
+		return;
+	}
+
 	Int68kCB(0);
 }
 
+//void ResetInterrupts() // Can't get this to work correctly in Supermodel.
+//{
+//	unsigned int reset = SCSP->data[0x22 / 2];
+//
+//	if (reset & 0x40)
+//	{
+//		Int68kCB(IrqTimA);
+//	}
+//	if (reset & 0x180)
+//	{
+//		Int68kCB(IrqTimBC);
+//	}
+//	if (reset & 0x8)
+//	{
+//		Int68kCB(IrqMidi);
+//	}
+//
+//	CheckPendingIRQ();
+//}
+
+
 int Get_AR(int base,int R)
 {
 	int Rate=base+(R<<1);
@@ -404,107 +445,102 @@ int Get_DR(int base,int R)
 	return DRTABLE[Rate];
 }
 
-int Get_RR(int base,int R)
-{
-	int Rate=base+(R<<1);
-//	int Rate=(base+R)<<1;
-	if(Rate>63)	Rate=63;
-	if(Rate<0) Rate=0;
-	return DRTABLE[Rate];
-}
 
 void Compute_EG(_SLOT *slot)
 {
-	int octave=OCT(slot);
+	int octave = (OCT(slot) ^ 8) - 8;
 	int rate;
-	if(octave&8) octave=octave-16;
-	if(KRS(slot)!=0xf)
-		rate=2*(octave+KRS(slot))+((FNS(slot)>>9)&1);
+	if (KRS(slot) != 0xf)
+		rate = octave + 2 * KRS(slot) + ((FNS(slot) >> 9) & 1);
 	else
-		rate=((FNS(slot)>>9)&1);
-	slot->EG.volume=0;
-	slot->EG.AR=Get_AR(rate,AR(slot));
-	slot->EG.D1R=Get_DR(rate,D1R(slot));
-	slot->EG.D2R=Get_DR(rate,D2R(slot));
-	slot->EG.RR=Get_RR(rate,RR(slot));
-	slot->EG.DL=0x1f-DL(slot);
-	slot->EG.EGHOLD=EGHOLD(slot);
+		rate = 0; //rate = ((FNS(slot) >> 9) & 1);
+
+	slot->EG.volume = 0x17F << EG_SHIFT;
+	slot->EG.AR = Get_AR(rate, AR(slot));
+	slot->EG.D1R = Get_DR(rate, D1R(slot));
+	slot->EG.D2R = Get_DR(rate, D2R(slot));
+	slot->EG.RR = Get_DR(rate, RR(slot));
+	slot->EG.DL = 0x1f - DL(slot);
+	slot->EG.EGHOLD = EGHOLD(slot);
 }
 
 void SCSP_StopSlot(_SLOT *slot,int keyoff);
 
 int EG_Update(_SLOT *slot)
 {
-	
-	switch(slot->EG.state)
+	switch (slot->EG.state)
 	{
-		case ATTACK:
-			slot->EG.volume+=slot->EG.AR;
-			if(slot->EG.volume>=(0x3ff<<EG_SHIFT))
+	case ATTACK:
+		slot->EG.volume += slot->EG.AR;
+		if (slot->EG.volume >= (0x3ff << EG_SHIFT))
+		{
+			if (!LPSLNK(slot))
 			{
-				slot->EG.state=DECAY1;
-				if(slot->EG.D1R>=(1024<<EG_SHIFT))	//Skip DECAY1, go directly to DECAY2
-					slot->EG.state=DECAY2;
-				slot->EG.volume=0x3ff<<EG_SHIFT;
+				slot->EG.state = DECAY1;
+				if (slot->EG.D1R >= (1024 << EG_SHIFT)) //Skip SCSP_DECAY1, go directly to SCSP_DECAY2
+					slot->EG.state = DECAY2;
 			}
-			if(slot->EG.EGHOLD)
-				return 0x3ff<<(SHIFT-10);
-			break;
-		case DECAY1:
-			slot->EG.volume-=slot->EG.D1R;
-			if((slot->EG.volume>>(EG_SHIFT+5))<=slot->EG.DL)
-				slot->EG.state=DECAY2;
-			break;
-		case DECAY2:
-			if(slot->EG.volume<=0 || slot->EG.DL==0)
-			{
-				slot->EG.volume=0;
-				SCSP_StopSlot(slot,0);
-			}
-			if(D2R(slot)==0)
-				return (slot->EG.volume>>EG_SHIFT)<<(SHIFT-10);
-			slot->EG.volume-=slot->EG.D2R;
-			if(slot->EG.volume<=0)
-				slot->EG.volume=0;
+			slot->EG.volume = 0x3ff << EG_SHIFT;
+		}
+		if (slot->EG.EGHOLD)
+			return 0x3ff << (SHIFT - 10);
+		break;
+	case DECAY1:
+		slot->EG.volume -= slot->EG.D1R;
+		if (slot->EG.volume <= 0)
+			slot->EG.volume = 0;
+		if (slot->EG.volume >> (EG_SHIFT + 5) <= slot->EG.DL)
+			slot->EG.state = DECAY2;
+		break;
+	case DECAY2:
+		if (D2R(slot) == 0)
+			return (slot->EG.volume >> EG_SHIFT) << (SHIFT - 10);
+		slot->EG.volume -= slot->EG.D2R;
+		if (slot->EG.volume <= 0)
+			slot->EG.volume = 0;
 
-			break;
-		case RELEASE:
-			slot->EG.volume-=slot->EG.RR;
-			if(slot->EG.volume<=0)
-			{
-				SCSP_StopSlot(slot,0);
-				slot->EG.volume=0;
-				slot->EG.state=ATTACK;
-			}
-			//slot->EG.volume=0;
-			break;
-		default:
-			return 1<<SHIFT;
+		break;
+	case RELEASE:
+		slot->EG.volume -= slot->EG.RR;
+		if (slot->EG.volume <= 0)
+		{
+			slot->EG.volume = 0;
+			SCSP_StopSlot(slot, 0);
+			//slot->EG.volume = 0x17F << EG_SHIFT;
+			//slot->EG.state = SCSP_ATTACK;
+		}
+		break;
+	default:
+		return 1 << SHIFT;
 	}
-	return (slot->EG.volume>>EG_SHIFT)<<(SHIFT-10);
-	
+	return (slot->EG.volume >> EG_SHIFT) << (SHIFT - 10);
 }
 
+
 DWORD SCSP_Step(_SLOT *slot)
 {
-	int octave=OCT(slot);
-	int Fn;
-	/*
-	int Fo=44100;
-	if(octave&8)
-		Fo>>=(16-octave);
-	else
-		Fo<<=octave;
-	Fn=Fo*(((FNS(slot))<<(SHIFT-10))|(1<<SHIFT));
-	*/
-	Fn=(FNS_Table[FNS(slot)]);	//24.8
-	if(octave&8)
-		Fn>>=(16-octave);
-	else
-		Fn<<=octave;
+	//int octave=OCT(slot);
+	//UINT64 Fn;
+	//Fn=(FNS_Table[FNS(slot)]);	//24.8
+	//if(octave&8)
+	//	Fn>>=(16-octave);
+	//else
+	//	Fn<<=octave;
 
 
-	return Fn/srate;
+	//return Fn/srate;
+	int octave = (OCT(slot) ^ 8) - 8 + SHIFT - 10;
+	UINT32 Fn = FNS(slot) + (1 << 10);
+	if (octave >= 0)
+	{
+		Fn <<= octave;
+	}
+	else
+	{
+		Fn >>= -octave;
+	}
+
+	return Fn;
 }
 
 void Compute_LFO(_SLOT *slot)
@@ -515,17 +551,22 @@ void Compute_LFO(_SLOT *slot)
 		LFO_ComputeStep(&(slot->ALFO),LFOF(slot),ALFOWS(slot),ALFOS(slot),1);
 }
 
+
 void SCSP_StartSlot(_SLOT *slot)
 {
-	slot->active=1;
-	slot->Back=0;
-	slot->base=SCSP->SCSPRAM+SA(slot);
-	slot->cur_addr=0;
-	slot->step=SCSP_Step(slot);	
+	UINT32 start_offset;
+
+	slot->active = 1;
+	slot->Back = 0;
+	slot->nxt_addr = 1 << SHIFT;
+	slot->cur_addr = 0;
+	start_offset = PCM8B(slot) ? SA(slot) : SA(slot) & 0x7FFFE;
+	slot->step = SCSP_Step(slot);
+	slot->base = SCSP->SCSPRAM + start_offset;
 	Compute_EG(slot);
-	slot->EG.state=ATTACK;
-	slot->EG.volume=0;
-	slot->Prev=0;
+	slot->EG.state = ATTACK;
+	slot->EG.volume = 0x17F << EG_SHIFT;
+	slot->Prev = 0;
 	Compute_LFO(slot);
 	/*{
 		char aux[12];
@@ -542,7 +583,7 @@ void SCSP_StartSlot(_SLOT *slot)
 
 void SCSP_StopSlot(_SLOT *slot,int keyoff)
 {
-	if(keyoff && slot->EG.state!=RELEASE)
+	if(keyoff)
 	{
 		slot->EG.state=RELEASE;
 //		return;
@@ -559,6 +600,8 @@ bool SCSP_Init(const Util::Config::Node &config, int n)
 {
 	s_config = &config;
 	s_multiThreaded = config["MultiThreaded"].ValueAs<bool>();
+	legacySound = config["LegacySoundDSP"].ValueAs<bool>();
+	SoundClock = Freq;
 
 	if(n==2)
 	{
@@ -577,14 +620,15 @@ bool SCSP_Init(const Util::Config::Node &config, int n)
 	SCSPDSP_Init(&SCSP->DSP);
 #endif
 	SCSP->Master=1;
-	RevR=0;
-	RevW=REVERB_DIF;
-	memset(bufferrevl,0,sizeof(bufferrevl));
-	memset(bufferrevr,0,sizeof(bufferrevr));
+	SCSP->SCSPRAM_LENGTH = 512 * 1024;
+	SCSP->DSP.SCSPRAM = (UINT16 *)SCSP->SCSPRAM;
+	SCSP->DSP.SCSPRAM_LENGTH = (512 * 1024) / 2;
 	MidiR=MidiW=0;
 	MidiOutR=MidiOutW=0;
 	MidiOutFill=0;
 	MidiInFill=0;
+	
+
 	for(int i=0;i<0x400;++i)
 	{
 		float fcent=(double) 1200.0*log2((double)(((double) 1024.0+(double)i)/(double)1024.0));
@@ -594,43 +638,41 @@ bool SCSP_Init(const Util::Config::Node &config, int n)
 		//FNS_Table[i]=(i>>(10-SHIFT))|(1<<SHIFT);
 		
 	}
+	for (int i = 0; i < 0x400; ++i) {
+		float envDB = ((float)(3 * (i - 0x3ff))) / 32.0;
+		float scale = (float)(1 << SHIFT);
+		EG_TABLE[i] = (INT32)(pow(10.0, envDB / 20.0)*scale);
+	}
+
 #ifdef RB_VOLUME
 	// Volume table, 1 = -0.375dB, 8 = -3dB, 256 = -96dB
-	for(i = 0; i < 256; i++)
-		volume[i] = 65536.0*pow(2.0, (-0.375/6.0)*i);
-	for(i = 256; i < 256*4; i++)
+	for (i = 0; i < 256; i++)
+		volume[i] = 65536.0*pow(2.0, (-0.375 / 6.0)*i);
+	for (i = 256; i < 256 * 4; i++)
 		volume[i] = 0;
 
 	// Pan values, units are a linear -3dB ramp, i.e. 8 places in the volume[] table.
-	for(i = 0; i < 16; i++)
+	for (i = 0; i < 16; i++)
 	{
-		pan_left[i] = i*8;
-		pan_left[i+16] = 0;
+		pan_left[i] = i * 8;
+		pan_left[i + 16] = 0;
 		pan_right[i] = 0;
-		pan_right[i+16] = i*8;
+		pan_right[i + 16] = i * 8;
 	}
 	// patch in the infinity values
 	pan_left[15] = 256;
 	pan_right[31] = 256;
 
 #else
+
 	for(int i=0;i<0x10000;++i)
 	{
-		int iTL =(i>>0x8)&0xff;
-		int iPAN=(i>>0x0)&0x1f;
-		int iSDL=(i>>0x5)&0x07;
+		int iTL =(i>>0x0)&0xff;
+		int iPAN=(i>>0x8)&0x1f;
+		int iSDL=(i>>0xd)&0x07;
 
 		float TL=1.0;
 		float SegaDB=0;
-		//2^(-(TL-2^4))
-		/*if(iTL&0x01) TL*=0.95760;
-		if(iTL&0x02) TL*=0.91700;
-		if(iTL&0x04) TL*=0.84090;
-		if(iTL&0x08) TL*=0.70711;
-		if(iTL&0x10) TL*=0.50000;
-		if(iTL&0x20) TL*=0.25000;
-		if(iTL&0x40) TL*=0.06250;
-		if(iTL&0x80) TL*=0.00391;*/
 		if(iTL&0x01) SegaDB-=0.4;
 		if(iTL&0x02) SegaDB-=0.8;
 		if(iTL&0x04) SegaDB-=1.5;
@@ -643,12 +685,6 @@ bool SCSP_Init(const Util::Config::Node &config, int n)
 		TL=pow(10.0,SegaDB/20.0);
 
 		float PAN=1.0;
-		//2^(-2^(PAN-2))
-		/*if(iPAN&0x1) PAN*=0.70711;
-		if(iPAN&0x2) PAN*=0.50000;
-		if(iPAN&0x4) PAN*=0.25000;
-		if(iPAN&0x8) PAN*=0.06250;
-		if(iPAN==0xf) PAN=0.0;*/
 
 		SegaDB=0;
 		if(iPAN&0x1) SegaDB-=3;
@@ -686,25 +722,8 @@ bool SCSP_Init(const Util::Config::Node &config, int n)
 		
 		LPANTABLE[i]=FIX((4.0*LPAN*TL*SDL));
 		RPANTABLE[i]=FIX((4.0*RPAN*TL*SDL));
-
-
 	}
 #endif
-	/*for(i=0;i<4;++i)	
-		ARTABLE[i]=DRTABLE[i]=0;
-	for(i=4;i<62;++i)*/
-
-	/*for(i=2;i<62;++i)
-	{
-		//double t=BaseTimes[i];	//In ms
-		double t=BaseTimes2[i/2]/AR2DR;	//In ms
-		double step=(1023*1000.0)/((float) srate*t);
-		double scale=(double) (1<<EG_SHIFT);
-		ARTABLE[i]=(int) (step*scale);
-		step/=AR2DR;
-		DRTABLE[i]=(int) (step*scale);
-	}
-	*/
 	ARTABLE[0]=DRTABLE[0]=0;	//Infinite time
 	ARTABLE[1]=DRTABLE[1]=0;
 	for(int i=2;i<64;++i)
@@ -748,6 +767,10 @@ bool SCSP_Init(const Util::Config::Node &config, int n)
 	}
 	memset(buffertmpl,0,44100*sizeof(signed int));
 	memset(buffertmpr,0,44100*sizeof(signed int));
+	SCSPs->data[0x20 / 2] = 0;
+	TimCnt[0] = 0xffff;
+	TimCnt[1] = 0xffff;
+	TimCnt[2] = 0xffff;
 	
 	// MIDI FIFO mutex
 	MIDILock = CThread::CreateMutex();
@@ -771,54 +794,44 @@ void SCSP_SetRAM(int n,unsigned char *r)
 
 void SCSP_UpdateSlotReg(int s,int r)
 {
-	_SLOT *slot=SCSP->Slots+s;
-	switch(r&0x3f)
+	struct _SLOT *slot = SCSP->Slots + s;
+	int sl;
+	switch (r & 0x3f)
 	{
-		case 0:
-		case 1:
-			if(KEYONEX(slot))
+	case 0:
+	case 1:
+		if (KEYONEX(slot))
+		{
+			for (sl = 0; sl < 32; ++sl)
 			{
-				for(int sl=0;sl<32;++sl)
+				struct _SLOT *s2 = SCSP->Slots + sl;
 				{
-					_SLOT *s2=SCSP->Slots+sl;
-					if(!KEYONB(s2) && sl==cnts && s2->active)
-						int a=1;
-					//if(s2->EG.state!=RELEASE)
+					if (KEYONB(s2) && s2->EG.state == RELEASE/*&& !s2->active*/)
 					{
-						if(KEYONB(s2) && (!s2->active || (s2->active && s2->EG.state==RELEASE)))
-						{
-							//DebugLog("KEYON %d",sl);
-							//printf("68K: KEYON %d\n",sl);
-							SCSP_StartSlot(s2);
-						}
-						if(!KEYONB(s2) && s2->active)
-						{
-							//s2->active=0;
-							SCSP_StopSlot(s2,1);
-							//DebugLog("KEYOFF %d",sl);
-						}
+						SCSP_StartSlot(s2);
+					}
+					if (!KEYONB(s2) /*&& s2->active*/)
+					{
+						SCSP_StopSlot(s2, 1);
 					}
 				}
-				slot->data[0]&=~0x1000;
 			}
-			break;
-		case 0x10:
-		case 0x11:
-			slot->step=SCSP_Step(slot);	
-			break;
-		case 0xA:
-		case 0xB:
-			if(slot->active)
-				int a=1;
-//			if(RR(slot)==0x1f)
-//				SCSP_StopSlot(slot,0);
-			slot->EG.RR=Get_RR(0,RR(slot));
-			slot->EG.DL=0x1f-DL(slot);
-			break;
-		case 0x12:
-		case 0x13:
-			Compute_LFO(slot);
-			break;
+			slot->data[0] &= ~0x1000;
+		}
+		break;
+	case 0x10:
+	case 0x11:
+		slot->step = SCSP_Step(slot);
+		break;
+	case 0xA:
+	case 0xB:
+		slot->EG.RR = Get_DR(0, RR(slot));
+		slot->EG.DL = 0x1f - DL(slot);
+		break;
+	case 0x12:
+	case 0x13:
+		Compute_LFO(slot);
+		break;
 	}
 }
 
@@ -826,20 +839,18 @@ void SCSP_UpdateReg(int reg)
 {
 	switch(reg&0x3f)
 	{
+		case 0x0: // Need to get this working in Supermodel as well
+			//m_stream->set_output_gain(0, MVOL() / 15.0);
+			//m_stream->set_output_gain(1, MVOL() / 15.0);
+			//break;
 		case 0x2:
 		case 0x3:
 		{
 #ifdef USEDSP
-			unsigned int v=RBL(SCSP);
-			SCSP->DSP.RBP=RBP(SCSP);
-			if(v==0)
-				SCSP->DSP.RBL=8*1024;
-			else if(v==1)
-				SCSP->DSP.RBL=16*1024;
-			else if(v==2)
-				SCSP->DSP.RBL=32*1024;
-			else if(v==3)
-				SCSP->DSP.RBL=64*1024;
+			{
+				SCSP->DSP.RBL = (8 * 1024) << RBL(SCSP); // 8 / 16 / 32 / 64 kwords
+				SCSP->DSP.RBP = RBP(SCSP);
+			}
 #endif
 		}
 		break;
@@ -849,15 +860,10 @@ void SCSP_UpdateReg(int reg)
 			SCSP_MidiOutW(SCSP->data[0x6/2]&0xff);
 			break;
 
-/*		case 0x8:
-		case 0x9:
-			{
-				unsigned char slot=SCSP.data[0x8/2]>>11;	
-				int a=1;
-				
-			}
+		case 8:
+		case 9:
+			SCSP->data[0x8 / 2] &= 0xf800; 
 			break;
-*/
 		case 0x12:
 		case 0x13:
 		case 0x14:
@@ -872,32 +878,46 @@ void SCSP_UpdateReg(int reg)
 		case 0x19:
 			if(SCSP->Master)	
 			{
-				TimPris[0]=1<<((SCSP->data[0x18/2]>>8)&0x7);
-				TimCnt[0]=((SCSP->data[0x18/2]&0xff)<<8)|(TimCnt[0]&0xff);
+				TimPris[0]=1<<((SCSPs->data[0x18/2]>>8)&0x7);
+				TimCnt[0]=((SCSPs->data[0x18/2]&0xfe)<<8)/*|(TimCnt[0]&0xff)*/;
 			}
 			break;
 		case 0x1a:
 		case 0x1b:
 			if(SCSP->Master)	
 			{
-				TimPris[1]=1<<((SCSP->data[0x1A/2]>>8)&0x7);
-				TimCnt[1]=((SCSP->data[0x1A/2]&0xff)<<8)|(TimCnt[1]&0xff);
+				TimPris[1]=1<<((SCSPs->data[0x1A/2]>>8)&0x7);
+				TimCnt[1]=((SCSPs->data[0x1A/2]&0xfe)<<8)/*|(TimCnt[1]&0xff)*/;
 			}
 			break;
 		case 0x1C:
 		case 0x1D:
 			if(SCSP->Master)	
 			{
-				TimPris[2]=1<<((SCSP->data[0x1C/2]>>8)&0x7);
-				TimCnt[2]=((SCSP->data[0x1C/2]&0xff)<<8)|(TimCnt[2]&0xff);
+				TimPris[2]=1<<((SCSPs->data[0x1C/2]>>8)&0x7);
+				TimCnt[2]=((SCSPs->data[0x1C/2]&0xfe)<<8)/*|(TimCnt[2]&0xff)*/;
 			}
 			break;
 		case 0x22:	//SCIRE
 		case 0x23:
 			if(SCSP->Master)	
 			{
-				SCSP->data[0x20/2]&=~SCSP->data[0x22/2];
-				CheckPendingIRQ();
+				SCSP->data[0x20 / 2] &= ~SCSP->data[0x22 / 2];
+				//ResetInterrupts();
+
+
+				if (TimCnt[0] == 0xffff)
+				{
+					SCSP->data[0x20 / 2] |= 0x40;
+				}
+				if (TimCnt[1] == 0xffff)
+				{
+					SCSP->data[0x20 / 2] |= 0x80;
+				}
+				if (TimCnt[2] == 0xffff)
+				{
+					SCSP->data[0x20 / 2] |= 0x100;
+				}
 			}
 			break;
 		case 0x24:
@@ -913,6 +933,16 @@ void SCSP_UpdateReg(int reg)
 				IrqMidi=DecodeSCI(SCIMID);
 			}
 			break;
+		case 0x2b:
+			MCIEB = SCSP->data[0x2a / 2];
+			break;
+		case 0x2c:
+		case 0x2d:
+			break;
+		case 0x2e:
+		case 0x2f:
+			MCIPD &= ~SCSP->data[0x2e / 2];
+			break;
 	}
 }
 
@@ -923,45 +953,71 @@ void SCSP_UpdateSlotRegR(int slot,int reg)
 
 void SCSP_UpdateRegR(int reg)
 {
-	switch(reg&0x3f)
+	switch (reg & 0x3f)
 	{
-		case 4:
-		case 5:
-			{
-				unsigned short v=SCSP->data[0x5/2];
-				v&=0xff00;
-				
-				/*
-				 * MIDI FIFO critical section!
-				 */
-				if (s_multiThreaded)
-					MIDILock->Lock();
-					
-				v|=MidiStack[MidiR];
-				//printf("read MIDI\n");
-				if(MidiR!=MidiW)
-				{
-					++MidiR;
-					MidiR&=MIDI_STACK_SIZE_MASK;
-					//Int68kCB(IrqMidi);
-				}
-				
-				MidiInFill--;
-				SCSP->data[0x5/2]=v;
-				
-				if (s_multiThreaded)
-					MIDILock->Unlock();
-			}
-			break;
-		case 8:
-		case 9:
-			{
-				unsigned char slot=SCSP->data[0x8/2]>>11;	
-				unsigned int CA=SCSP->Slots[slot&0x1f].cur_addr>>(SHIFT+12);
-				SCSP->data[0x8/2]&=~(0x780);
-				SCSP->data[0x8/2]|=CA<<7;
-			}
-			break;
+	case 4:
+	case 5:
+	{
+		unsigned short v = SCSP->data[0x4 / 2];
+		v &= 0xff00;
+
+		/*
+		 * MIDI FIFO critical section!
+		 */
+		if (s_multiThreaded)
+			MIDILock->Lock();
+
+		v |= MidiStack[MidiR];
+		//printf("read MIDI\n");
+		if (MidiR != MidiW)
+		{
+			++MidiR;
+			MidiR &= MIDI_STACK_SIZE_MASK;
+			//Int68kCB(IrqMidi);
+		}
+
+		MidiInFill--;
+		SCSP->data[0x4 / 2] = v;
+
+		if (s_multiThreaded)
+			MIDILock->Unlock();
+	}
+	break;
+	case 8:
+	case 9:
+	{
+		// MSLC     |  CA   |SGC|EG
+		// f e d c b a 9 8 7 6 5 4 3 2 1 0
+		BYTE MSLC = (SCSP->data[0x8 / 2] >> 11) & 0x1f;
+		_SLOT *slot = SCSP->Slots + MSLC;
+		unsigned int SGC = (slot->EG.state) & 3;
+		unsigned int CA = (slot->cur_addr >> (SHIFT + 12)) & 0xf;
+		unsigned int EG = (0x1f - (slot->EG.volume >> (EG_SHIFT + 5))) & 0x1f;
+		/* note: according to the manual MSLC is write only, CA, SGC and EG read only.  */
+		SCSP->data[0x8 / 2] =  /*(MSLC << 11) |*/ (CA << 7) | (SGC << 5) | EG;
+	}
+	break;
+	case 0x18:
+	case 0x19:
+		break;
+
+	case 0x1a:
+	case 0x1b:
+		break;
+
+	case 0x1c:
+	case 0x1d:
+		break;
+
+	case 0x2a:
+	case 0x2b:
+		SCSP->data[0x2a / 2] = MCIEB;
+		break;
+
+	case 0x2c:
+	case 0x2d:
+		SCSP->data[0x2c / 2] = MCIPD;
+		break;
 	}
 }
 
@@ -987,22 +1043,44 @@ void SCSP_w8(unsigned int addr,unsigned char val)
 		SCSP->RINGBUF[(addr-0x600)/2]=val;
 	else
 	{
+		if (legacySound == true) {
 #ifdef USEDSP
-		//DSP
-		if(addr<0x780)	//COEF
-			((unsigned char *) SCSP->DSP.COEF)[(addr-0x700)^1]=val;
-		else if(addr<0x7C0)
-			((unsigned char *) SCSP->DSP.MADRS)[(addr-0x780)^1]=val;
-		else if(addr>=0x800 && addr<0xC00)
-			((unsigned char *) SCSP->DSP.MPRO)[(addr-0x800)^1]=val;
-		else
-			int a=1;
-		if(addr==0xBFE)
-		{
-			SCSPDSP_Start(&SCSP->DSP);
-		}
-		int a=1;
+			//DSP
+			if (addr < 0x780)	//COEF
+				((unsigned char *)SCSP->DSP.COEF)[(addr - 0x700) ^ 1] = val;
+			else if (addr < 0x7C0)
+				((unsigned char *)SCSP->DSP.MADRS)[(addr - 0x780) ^ 1] = val;
+			else if (addr >= 0x800 && addr < 0xC00)
+				((unsigned char *)SCSP->DSP.MPRO)[(addr - 0x800) ^ 1] = val;
+			else
+				int a = 1;
+			if (addr == 0xBF0)
+			{
+				SCSPDSP_Start(&SCSP->DSP);
+			}
+			int a = 1;
 #endif
+		}
+		else {
+#ifdef USEDSP
+			//DSP
+			if (addr < 0x780)	//COEF
+				((unsigned char *)SCSP->DSP.COEF)[(addr - 0x700) ^ 1] = val;
+			else if (addr < 0x7C0)
+				((unsigned char *)SCSP->DSP.MADRS)[(addr - 0x780) ^ 1] = val;
+			else if (addr < 0x800)
+				((unsigned char *)SCSP->DSP.MADRS)[(addr - 0x7c0) ^ 1] = val;
+			else if (addr < 0xC00)
+				((unsigned char *)SCSP->DSP.MPRO)[(addr - 0x800) ^ 1] = val;
+			else
+				int a = 1;
+			if (addr == 0xBF0)
+			{
+				SCSPDSP_Start(&SCSP->DSP);
+			}
+			int a = 1;
+#endif
+		}
 	}
 }
 
@@ -1020,28 +1098,54 @@ void SCSP_w16(unsigned int addr,unsigned short val)
 	}
 	else if(addr<0x600)
 	{
-		*(unsigned short *) &(SCSP->datab[addr&0xff]) = val;
-		SCSP_UpdateReg(addr&0xff);
+		/**(unsigned short *) &(SCSP->datab[addr&0xff]) = val;
+		SCSP_UpdateReg(addr&0xff);*/
+		if (addr < 0x430)
+		{
+			*((unsigned short *)(SCSP->datab + ((addr & 0x3f)))) = val;
+			SCSP_UpdateReg(addr & 0x3f);
+		}
 	}	
-	else if(addr<0x700)
-		SCSP->RINGBUF[(addr-0x600)/2]=val;
+	else if (addr < 0x700)
+		SCSP->RINGBUF[(addr - 0x600) / 2] = val;
 	else
 	{
+		if (legacySound == true) {
 #ifdef USEDSP
-		//DSP
-		if(addr<0x780)	//COEF
-			*(unsigned short *) &(SCSP->DSP.COEF[(addr-0x700)/2])=val;
-		else if(addr<0x800)
-			*(unsigned short *) &(SCSP->DSP.MADRS[(addr-0x780)/2])=val;
-		else if(addr<0xC00)
-			*(unsigned short *) &(SCSP->DSP.MPRO[(addr-0x800)/2])=val;
-		else
-			int a=1;
-		if(addr==0xBFE)
-			SCSPDSP_Start(&SCSP->DSP);
-		int a=1;
+			// ElSemi's legacy DSP. For now we will need this for Fighting Vipers 2.
+			if (addr < 0x780)	//COEF
+				*(unsigned short *) &(SCSP->DSP.COEF[(addr - 0x700) / 2]) = val;
+			else if (addr < 0x800)
+				*(unsigned short *) &(SCSP->DSP.MADRS[(addr - 0x780) / 2]) = val;
+			else if (addr < 0xC00)
+				*(unsigned short *) &(SCSP->DSP.MPRO[(addr - 0x800) / 2]) = val;
+			else
+				int a = 1;
+			if (addr == 0xBF0)
+				SCSPDSP_Start(&SCSP->DSP);
+			int a = 1;
 #endif
-
+		}
+		else {
+#ifdef USEDSP
+			// MAME DSP
+			if (addr < 0x780)  //COEF
+				*((UINT16 *)(SCSP->DSP.COEF + (addr - 0x700) / 2)) = val;
+			else if (addr < 0x7c0)
+				*((UINT16 *)(SCSP->DSP.MADRS + (addr - 0x780) / 2)) = val;
+			else if (addr < 0x800) // MADRS is mirrored twice
+				*((UINT16 *)(SCSP->DSP.MADRS + (addr - 0x7c0) / 2)) = val;
+			else if (addr < 0xC00)
+			{
+				*((UINT16 *)(SCSP->DSP.MPRO + (addr - 0x800) / 2)) = val;
+			}
+			else
+				int a = 1;
+			if (addr == 0xBF0)
+				SCSPDSP_Start(&SCSP->DSP);
+			int a = 1;
+#endif
+		}
 	}
 }
 
@@ -1077,13 +1181,15 @@ void SCSP_w32(unsigned int addr,unsigned int val)
 		rotl(val, 16);
 			if(addr<0x780)	//COEF
 				*(unsigned int *) &(SCSP->DSP.COEF[(addr-0x700)/2])=val;
-			else if(addr<0x800)
-				*(unsigned int *) &(SCSP->DSP.MADRS[(addr-0x780)/2])=val;
+			else if (addr < 0x7c0)
+				*(unsigned int *) &(SCSP->DSP.MADRS[(addr-0x780)/2]) = val;
+			else if (addr < 0x800) // MADRS is mirrored twice
+				*(unsigned int *) &(SCSP->DSP.MADRS[(addr-0x7c0)/2]) = val;
 			else if(addr<0xC00)
 				*(unsigned int *) &(SCSP->DSP.MPRO[(addr-0x800)/2])=val;
 			else
 				int a=1;
-			if(addr==0xBFC)
+			if(addr==0xBF0)
 				SCSPDSP_Start(&SCSP->DSP);
 			int a=1;
 #endif
@@ -1128,12 +1234,57 @@ unsigned short SCSP_r16(unsigned int addr)
 	}
 	else if(addr<0x600)
 	{
-		SCSP_UpdateRegR(addr&0xff);
-		v= *(unsigned short *) &(SCSP->datab[addr&0xff]);
-		//ErrorLogMessage("SCSP Reg %02X Read word %04X",addr&0xff,v);
+		//SCSP_UpdateRegR(addr&0xff);
+		//v= *(unsigned short *) &(SCSP->datab[addr&0xff]);
+		////ErrorLogMessage("SCSP Reg %02X Read word %04X",addr&0xff,v);
+		if (addr < 0x430)
+		{
+			SCSP_UpdateRegR(addr & 0x3f);
+			v = *((UINT16 *)(SCSP->datab + ((addr & 0x3f))));
+		}
 	}	
-	else if(addr<0x700)
-		v=SCSP->RINGBUF[(addr-0x600)/2];
+	else if (addr < 0x700)
+		v = SCSP->RINGBUF[(addr - 0x600) / 2];
+	else
+	{
+		// DSP stuff
+		if (addr < 0x780)	//COEF
+			v = *((UINT16 *)(SCSP->DSP.COEF + (addr - 0x700) / 2));
+		else if (addr < 0x7c0)
+			v = *((UINT16 *)(SCSP->DSP.MADRS + (addr - 0x780) / 2));
+		else if (addr < 0x800)
+			v = *((UINT16 *)(SCSP->DSP.MADRS + (addr - 0x7c0) / 2));
+		else if (addr < 0xC00)
+			v = *((UINT16 *)(SCSP->DSP.MPRO + (addr - 0x800) / 2));
+		else if (addr < 0xE00)
+		{
+			if (addr & 2)
+				v = SCSP->DSP.TEMP[(addr >> 2) & 0x7f] & 0xffff;
+			else
+				v = SCSP->DSP.TEMP[(addr >> 2) & 0x7f] >> 16;
+		}
+		else if (addr < 0xE80)
+		{
+			if (addr & 2)
+				v = SCSP->DSP.MEMS[(addr >> 2) & 0x1f] & 0xffff;
+			else
+				v = SCSP->DSP.MEMS[(addr >> 2) & 0x1f] >> 16;
+		}
+		else if (addr < 0xEC0)
+		{
+			if (addr & 2)
+				v = SCSP->DSP.MIXS[(addr >> 2) & 0xf] & 0xffff;
+			else
+				v = SCSP->DSP.MIXS[(addr >> 2) & 0xf] >> 16;
+		}
+		else if (addr < 0xEE0)
+			v = *((UINT16 *)(SCSP->DSP.EFREG + (addr - 0xec0) / 2));
+		else
+		{
+			if (addr < 0xEE4)
+				v = *((UINT16 *)(SCSP->DSP.EXTS + (addr - 0xee0) / 2));
+		}
+	}
 	return v;
 }
 
@@ -1144,622 +1295,226 @@ unsigned int SCSP_r32(unsigned int addr)
 
 #define REVSIGN(v) ((~v)+1)
 
-
-void SCSP_TimersAddTicks2(int ticks)
-{
-		//Update timers
-		WORD cnt;
-		WORD step;
-		//Timer A
-		
-		if(!TimPris[0])
-		{
-			//cnt=SCSPs[0].data[0x18/2]&0xff;
-			cnt=TimCnt[0];
-			if(cnt==0xffff)
-				goto noTA;
-			++cnt;
-			++TimCnt[0];
-			if(cnt>=TIMER_LIMITSA)
-			{
-				/*if((SCSPs[0].data[0x20/2]&SCSPs[0].data[0x1e/2])&0x40)	//timer pending ack
-					int a=1;*/
-				SCSPs[0].data[0x20/2]|=0x40;
-				/*if(SCSP.data[0x1e/2]&0x40)
-					Int68kCB(IrqTimA);*/
-				cnt=0xff;
-				TimCnt[0]=0xffff;
-			}
-			step=1<<((SCSPs[0].data[0x18/2]>>8)&0x7);
-			TimPris[0]=step;
-			SCSPs[0].data[0x18/2]&=0xff00;
-			SCSPs[0].data[0x18/2]|=cnt;
-		}
-//		else
-			TimPris[0]--;
-noTA:
-;
-		//Timer B
-		
-		if(!TimPris[1])
-		{
-			//cnt=SCSPs[0].data[0x1a/2]&0xff;
-			cnt=TimCnt[1];
-			if(cnt==0xffff)
-				goto noTB;
-			++cnt;
-			++TimCnt[1];
-			if(cnt>=TIMER_LIMITSB)
-			{
-				/*if((SCSP.data[0x20/2]&SCSP.data[0x1e/2])&0x80)	//timer pending ack
-					int a=1;*/
-				SCSPs[0].data[0x20/2]|=0x80;
-				/*if(SCSP.data[0x1e/2]&0x80)
-					Int68kCB(IrqTimBC);*/
-				cnt=0xff;
-				TimCnt[1]=0xffff;
-			}
-			step=1<<((SCSP[0].data[0x1a/2]>>8)&0x7);
-			TimPris[1]=step;
-			SCSPs[0].data[0x1a/2]&=0xff00;
-			SCSPs[0].data[0x1a/2]|=cnt;
-		}
-//		else
-			TimPris[1]--;
-noTB:
-;
-		//Timer C
-		
-		if(!TimPris[2])
-		{
-			//cnt=SCSPs[0].data[0x1c/2]&0xff;
-			cnt=TimCnt[2];
-			if(cnt==0xffff)
-				goto noTC;
-			++cnt;
-			++TimCnt[2];
-			if(cnt>=TIMER_LIMITSC)
-			{
-				/*if((SCSP.data[0x20/2]&SCSP.data[0x1e/2])&0x100)	//timer pending ack
-					int a=1;*/
-				SCSP[0].data[0x20/2]|=0x100;
-				/*if(SCSP.data[0x1e/2]&0x100)
-					Int68kCB(IrqTimBC);*/
-				cnt=0xff;
-				TimCnt[2]=0xffff;
-			}
-			step=1<<((SCSPs[0].data[0x1c/2]>>8)&0x7);
-			TimPris[2]=step;
-			SCSPs[0].data[0x1c/2]&=0xff00;
-			SCSPs[0].data[0x1c/2]|=cnt;
-		}
-//		else
-			TimPris[2]--;
-noTC:
-;
-}
-
 void SCSP_TimersAddTicks(int ticks)
 {
-	if(TimCnt[0]<=0xff00)
+	if (TimCnt[0] <= 0xff00)
 	{
-		TimCnt[0]+=ticks << (8-((SCSPs[0].data[0x18/2]>>8)&0x7));
-		if (TimCnt[0] > 0xFE00)
+		TimCnt[0] += ticks << (8 - ((SCSPs->data[0x18 / 2] >> 8) & 0x7));
+		if (TimCnt[0] > 0xFF00)
 		{
 			TimCnt[0] = 0xFFFF;
-			SCSPs[0].data[0x20/2]|=0x40;
+			SCSPs->data[0x20 / 2] |= 0x40;
 		}
-		SCSPs[0].data[0x18/2]&=0xff00;
-		SCSPs[0].data[0x18/2]|=TimCnt[0]>>8;
+		SCSPs->data[0x18 / 2] &= 0xff00;
+		SCSPs->data[0x18 / 2] |= TimCnt[0] >> 8;
 	}
-	if(TimCnt[1]<=0xff00)
+
+	if (TimCnt[1] <= 0xff00)
 	{
-		TimCnt[1]+=ticks << (8-((SCSPs[0].data[0x1a/2]>>8)&0x7));
-		if (TimCnt[1] > 0xFE00)
+		TimCnt[1] += ticks << (8 - ((SCSPs->data[0x1a / 2] >> 8) & 0x7));
+		if (TimCnt[1] > 0xFF00)
 		{
 			TimCnt[1] = 0xFFFF;
-			SCSPs[0].data[0x20/2]|=0x80;
+			SCSPs->data[0x20 / 2] |= 0x80;
 		}
-		SCSPs[0].data[0x1a/2]&=0xff00;
-		SCSPs[0].data[0x1a/2]|=TimCnt[1]>>8;
-
+		SCSPs->data[0x1a / 2] &= 0xff00;
+		SCSPs->data[0x1a / 2] |= TimCnt[1] >> 8;
 	}
-	if(TimCnt[2]<=0xff00)
+
+	if (TimCnt[2] <= 0xff00)
 	{
-		TimCnt[2]+=ticks << (8-((SCSPs[0].data[0x1c/2]>>8)&0x7));
-		if (TimCnt[2] > 0xFE00)
+		TimCnt[2] += ticks << (8 - ((SCSPs->data[0x1c / 2] >> 8) & 0x7));
+		if (TimCnt[2] > 0xFF00)
 		{
 			TimCnt[2] = 0xFFFF;
-			SCSPs[0].data[0x20/2]|=0x100;
+			SCSPs->data[0x20 / 2] |= 0x100;
 		}
-		SCSPs[0].data[0x1c/2]&=0xff00;
-		SCSPs[0].data[0x1c/2]|=TimCnt[2]>>8;
-	}
-
-}
-
-
-#ifdef NEWSCSP
-
-#ifdef USEDSP
-const bool hasDSP=true;
-
-#else
-const bool hasDSP=false;
-#endif
-signed short *bufmix;
-
-signed int *bufl1,*bufr1;
-#define SCSPNAME(_8bit,lfo,alfo,loop) \
-void SCSP_Update##_8bit##lfo##alfo##loop(_SLOT *slot,unsigned int Enc,unsigned int nsamples)
-
-//TRUST ON THE COMPILER OPTIMIZATIONS
-#define SCSPTMPL(_8bit,lfo,alfo,loop) \
-SCSPNAME(_8bit,lfo,alfo,loop)\
-{\
-	signed int sample;\
-	DWORD addr;\
-	for(unsigned int s=0;s<nsamples;++s)\
-	{\
-		int step=slot->step;\
-		if(!slot->active)\
-			return;\
-		if(lfo) \
-		{\
-			step=step*PLFO_Step(&(slot->PLFO));\
-			step>>=SHIFT; \
-		}\
-		if(_8bit)\
-		{\
-			unsigned int offs=(slot->cur_addr>>SHIFT);\
-			signed char *p=(signed char *) (slot->base);\
-			int s;\
-			signed int fpart=slot->cur_addr&((1<<SHIFT)-1);\
-			s=(int) p[offs^1]*((1<<SHIFT)-fpart)+(int) p[(offs+1)^1]*fpart;\
-			sample=(s>>SHIFT)<<8;\
-		}\
-		else\
-		{\
-			signed short *p=(signed short *) &(slot->base[(slot->cur_addr>>(SHIFT-1))&(~1)]);\
-			signed int fpart=slot->cur_addr&((1<<SHIFT)-1);\
-			sample=(p[0]);\
-		}\
-		if(loop==0)\
-		{\
-			slot->cur_addr+=step;\
-			addr=slot->cur_addr>>SHIFT;\
-			if(addr>LEA(slot))\
-			{\
-				SCSP_StopSlot(slot,0);\
-			}\
-		}\
-		if(loop==1)\
-		{\
-			slot->cur_addr+=step;\
-			addr=slot->cur_addr>>SHIFT;\
-			if(addr>LEA(slot))\
-				slot->cur_addr=(LSA(slot)+1)<<SHIFT;\
-		}\
-		if(loop==2)\
-		{\
-			if(slot->Back)\
-				slot->cur_addr+=REVSIGN(step);\
-			else\
-				slot->cur_addr+=step;\
-			addr=slot->cur_addr>>SHIFT;\
-			if(addr>=LEA(slot))\
-			{\
-				slot->cur_addr=LEA(slot)<<SHIFT;\
-				slot->Back=1;\
-			}\
-			if((addr<LSA(slot) || (addr&0x80000000)) && slot->Back)\
-				slot->cur_addr=LEA(slot)<<SHIFT;\
-		}\
-		if(loop==3)\
-		{\
-			if(slot->Back)\
-				slot->cur_addr+=REVSIGN(step);\
-			else\
-				slot->cur_addr+=step;\
-			addr=slot->cur_addr>>SHIFT;\
-			if(addr>=LEA(slot)) /*reached end, reverse till start*/ \
-			{\
-				slot->cur_addr=LEA(slot)<<SHIFT;\
-				slot->Back=1;\
-			}\
-			if((addr<=LSA(slot) || (addr&0x80000000)) && slot->Back) /*reached start or negative*/\
-			{\
-				slot->cur_addr=LSA(slot)<<SHIFT;\
-				slot->Back=0;\
-			}\
-		}\
-		if(alfo)\
-		{\
-			sample=sample*ALFO_Step(&(slot->ALFO));\
-			sample>>=SHIFT;\
-		}\
-		*RBUFDST=sample;\
-		\
-		sample=(sample*EG_Update(slot))>>SHIFT;\
-		if(hasDSP)\
-			*bufmix++=((sample*LPANTABLE[(Enc|0xE0)&0xFFE0])>>(SHIFT+3));\
-	\
-		*bufl1=*bufl1 + ((sample*LPANTABLE[Enc])>>SHIFT);\
-		*bufr1=*bufr1 + ((sample*RPANTABLE[Enc])>>SHIFT);\
-		++bufl1;\
-		++bufr1;\
-	}\
-}
-
-SCSPTMPL(0,0,0,0) SCSPTMPL(0,0,0,1) SCSPTMPL(0,0,0,2) SCSPTMPL(0,0,0,3)
-SCSPTMPL(0,0,1,0) SCSPTMPL(0,0,1,1) SCSPTMPL(0,0,1,2) SCSPTMPL(0,0,1,3)
-SCSPTMPL(0,1,0,0) SCSPTMPL(0,1,0,1) SCSPTMPL(0,1,0,2) SCSPTMPL(0,1,0,3)
-SCSPTMPL(0,1,1,0) SCSPTMPL(0,1,1,1) SCSPTMPL(0,1,1,2) SCSPTMPL(0,1,1,3)
-SCSPTMPL(1,0,0,0) SCSPTMPL(1,0,0,1) SCSPTMPL(1,0,0,2) SCSPTMPL(1,0,0,3)
-SCSPTMPL(1,0,1,0) SCSPTMPL(1,0,1,1) SCSPTMPL(1,0,1,2) SCSPTMPL(1,0,1,3)
-SCSPTMPL(1,1,0,0) SCSPTMPL(1,1,0,1) SCSPTMPL(1,1,0,2) SCSPTMPL(1,1,0,3)
-SCSPTMPL(1,1,1,0) SCSPTMPL(1,1,1,1) SCSPTMPL(1,1,1,2) SCSPTMPL(1,1,1,3)
-
-#undef SCSPTMPL
-#define SCSPTMPL(_8bit,lfo,alfo,loop) \
- SCSP_Update##_8bit##lfo##alfo##loop ,
-
-
-typedef void (*_SCSPUpdateModes)(_SLOT *,unsigned int,unsigned int);
-
-_SCSPUpdateModes SCSPUpdateModes[]=
-{
-	SCSPTMPL(0,0,0,0) SCSPTMPL(0,0,0,1) SCSPTMPL(0,0,0,2) SCSPTMPL(0,0,0,3)
-	SCSPTMPL(0,0,1,0) SCSPTMPL(0,0,1,1) SCSPTMPL(0,0,1,2) SCSPTMPL(0,0,1,3)
-	SCSPTMPL(0,1,0,0) SCSPTMPL(0,1,0,1) SCSPTMPL(0,1,0,2) SCSPTMPL(0,1,0,3)
-	SCSPTMPL(0,1,1,0) SCSPTMPL(0,1,1,1) SCSPTMPL(0,1,1,2) SCSPTMPL(0,1,1,3)
-	SCSPTMPL(1,0,0,0) SCSPTMPL(1,0,0,1) SCSPTMPL(1,0,0,2) SCSPTMPL(1,0,0,3)
-	SCSPTMPL(1,0,1,0) SCSPTMPL(1,0,1,1) SCSPTMPL(1,0,1,2) SCSPTMPL(1,0,1,3)
-	SCSPTMPL(1,1,0,0) SCSPTMPL(1,1,0,1) SCSPTMPL(1,1,0,2) SCSPTMPL(1,1,0,3)
-	SCSPTMPL(1,1,1,0) SCSPTMPL(1,1,1,1) SCSPTMPL(1,1,1,2) SCSPTMPL(1,1,1,3)
-
-};
-
-#define SCANLINES	210
-
-void SCSP_CpuRunScanline()
-{
-	int slice=12000000/(44100);
-	static unsigned int smp=0;
-	smp+=(unsigned int) ((256.0*44100.0)/((float) SCANLINES*SysFPS));
-	int lastdiff=0;
-	for(;smp&0xffffff00;)
-	{
-		lastdiff=Run68kCB(slice+lastdiff);
-		/*while(slice>0)
-		{
-			lastdiff=Run68kCB(1);
-			slice+=lastdiff;
-			if(M68000_regs.pc==0x0602620)
-				int a=1;
-		}*/
-		SCSP_TimersAddTicks(1);
-		CheckPendingIRQ();
-		smp-=0x100;
+		SCSPs->data[0x1c / 2] &= 0xff00;
+		SCSPs->data[0x1c / 2] |= TimCnt[2] >> 8;
 	}
 }
 
-void SCSP_DoMasterSamples(int nsamples)
-{
-
-	static int lastdiff=0;
-	signed short *bufr,*bufl;
-	
-	
-	for(int sl=0;sl<32;++sl)
-	{
-		bufr1=buffertmpr;
-		bufl1=buffertmpl;
-//		if(sl!=0x3)
-//			continue;
-
-		if(SCSPs[0].Slots[sl].active)
-		{
-			_SLOT *slot=SCSPs[0].Slots+sl;
-			unsigned int disdl=DISDL(slot);
-			unsigned int efsdl=EFSDL(slot);
-			unsigned int tl=TL(slot);
-			unsigned short Enc=((TL(slot))<<0x8)|((DIPAN(slot))<<0x0)|((DISDL(slot))<<0x5);
-			//unsigned short Enc=(0x00)|((DIPAN(slot))<<0x8)|((0x7)<<0xd);
-			unsigned int mode=LPCTL(slot);
-
-/*			if(SSCTL(slot)!=0)	//no FM or noise yet
-				int a=1;
-			int MDX=MDXSL(slot);
-			int MDY=MDYSL(slot);
-			int LEV=MDL(slot);
-			if(LEV!=0 || MDX!=0 || MDY!=0)
-				goto norender;
-			if(SBCTL(slot))
-				int a=1;
-
-*/
-			RBUFDST=SCSPs[0].RINGBUF+SCSPs[0].BUFPTR;
-			if(sl==0x15)
-				int a=1;
-			if(PLFOS(slot))
-				mode|=8;
-			if(ALFOS(slot))
-				mode|=4;
-			if(PCM8B(slot))
-				mode|=0x10;
-#ifdef USEDSP
-			bufmix=SCSPs[0].MIXBuf+0x300*slot->slot;
-#endif
-
-			SCSPUpdateModes[mode](slot,Enc,nsamples);
-//norender:
-//;
-			
-		}
-		++SCSPs[0].BUFPTR;
-		SCSPs[0].BUFPTR&=63;
-	}
-	if(HasSlaveSCSP)
-	{
-		for(int sl=0;sl<32;++sl)
-		{
-			bufr1=buffertmpr;
-			bufl1=buffertmpl;
-			if(SCSPs[1].Slots[sl].active)
-			{
-				_SLOT *slot=SCSPs[1].Slots+sl;
-				//unsigned short Enc=((TL(slot))<<0x0)|((DIPAN(slot))<<0x8)|((0x7)<<0xd);
-				unsigned short Enc=((TL(slot))<<0x8)|((DIPAN(slot))<<0x0)|((DISDL(slot))<<0x5);
-				unsigned int mode=LPCTL(slot);
-				
-				if(PLFOS(slot))
-					mode|=8;
-				if(ALFOS(slot))
-					mode|=4;
-				if(PCM8B(slot))
-					mode|=0x10;
-				RBUFDST=SCSPs[1].RINGBUF+SCSPs[1].BUFPTR;
-#ifdef USEDSP
-				bufmix=SCSPs[1].MIXBuf+0x300*slot->slot;
-#endif
-				SCSPUpdateModes[mode](slot,Enc,nsamples);
-				
-			}
-			++SCSPs[1].BUFPTR;
-			SCSPs[1].BUFPTR&=63;
-		}
-	}
-	bufr=bufferr;
-	bufl=bufferl;
-	bufr1=buffertmpr;
-	bufl1=buffertmpl;
-	for(int s=0;s<nsamples;++s)
-	{
-#define ICLIP16(x) (x<-32768)?-32768:((x>32767)?32767:x)
-		signed int smpl=*bufl1;
-		signed int smpr=*bufr1;
-#ifdef USEDSP
-		signed short *pt=SCSPs[0].MIXBuf+s;
-		for(int sl=0;sl<32;++sl)
-		{
-			_SLOT *slot=SCSPs[0].Slots+sl;
-			if(slot->active)
-			{
-				SCSPDSP_SetSample(&SCSP[0].DSP,pt[0],ISEL(slot),IMXL(slot));
-			}
-			pt+=0x300;
-		}
-		SCSPDSP_Step(&SCSP[0].DSP);
-		if(HasSlaveSCSP)
-		{
-			pt=SCSPs[1].MIXBuf+s;
-			for(int sl=0;sl<32;++sl)
-			{
-				_SLOT *slot=SCSPs[1].Slots+sl;
-				if(slot->active)
-				{
-					SCSPDSP_SetSample(&SCSP[1].DSP,pt[0],ISEL(slot),IMXL(slot));
-				}
-				pt+=0x300;
-			}
-			SCSPDSP_Step(&SCSP[1].DSP);
-		}
-
-		//		smpl=0;
-		//		smpr=0;
-		for(int i=0;i<16;++i)
-		{
-			_SLOT *slot=SCSPs[0].Slots+i;
-			int ef=EFSDL(slot);
-			if(ef)
-			{
-				unsigned short Enc=0|((EFPAN(slot))<<0x0)|((EFSDL(slot))<<0x5);
-				smpl+=(SCSPs[0].DSP.EFREG[i]*LPANTABLE[Enc])>>SHIFT;
-				smpr+=(SCSPs[0].DSP.EFREG[i]*RPANTABLE[Enc])>>SHIFT;
-			}
-			
-			if(HasSlaveSCSP)
-			{
-				_SLOT *slot=SCSPs[1].Slots+i;
-				ef=EFSDL(slot);
-				if(ef)
-				{
-					unsigned short Enc=0|((EFPAN(slot))<<0x0)|((EFSDL(slot))<<0x5);
-					smpl+=(SCSPs[1].DSP.EFREG[i]*LPANTABLE[Enc])>>SHIFT;
-					smpr+=(SCSPs[1].DSP.EFREG[i]*RPANTABLE[Enc])>>SHIFT;
-				}
-			}
-		}
-#endif
-#ifdef REVERB
-		smpl+=bufferrevl[RevR];
-		smpr+=bufferrevr[RevR];
-		bufferrevl[RevW]=((smpl<<0)/REVERB_DIV)>>0;
-		bufferrevr[RevW]=((smpr<<0)/REVERB_DIV)>>0;
-		++RevW;
-		if(RevW==REVERB_LEN)
-			RevW=0;
-		++RevR;
-		if(RevR==REVERB_LEN)
-			RevR=0;
-#endif
-		*bufl=ICLIP16(smpl);
-		*bufr=ICLIP16(smpr);
-		*bufl1=0;
-		*bufr1=0;
-		++bufl;
-		++bufr;
-		++bufl1;
-		++bufr1;
-
-	}
-}
-
-
-#else
 
 signed int inline SCSP_UpdateSlot(_SLOT *slot)
 {
 	signed int sample;
-	int step=slot->step;
-	DWORD addr;
+	int step = slot->step;
+	DWORD addr1, addr2, addr_select;
+	DWORD *addr[2] = { &addr1, &addr2 };
+	DWORD *slot_addr[2] = { &(slot->cur_addr), &(slot->nxt_addr) };
 
-	if(SSCTL(slot)!=0)	//no FM or noise yet
+
+	if (SSCTL(slot) != 0)
 		return 0;
-	
-	if(PLFOS(slot)!=0)
+
+	if (PLFOS(slot) != 0)
 	{
-		step=step*PLFO_Step(&(slot->PLFO));
-		step>>=SHIFT;
+		step = step * PLFO_Step(&(slot->PLFO));
+		step >>= (SHIFT);
 	}
 
-	if(PCM8B(slot))
-		addr=slot->cur_addr>>SHIFT;
+	if (PCM8B(slot)) {
+		addr1 = slot->cur_addr >> SHIFT;
+		addr2 = slot->nxt_addr >> SHIFT;
+	}
+	else {
+		//addr=(slot->cur_addr>>(SHIFT-1))&(~1);
+		addr1 = (slot->cur_addr >> (SHIFT - 1)) & 0x7fffe;
+		addr2 = (slot->nxt_addr >> (SHIFT - 1)) & 0x7fffe;
+	}
+
+	if (MDL(slot) != 0 || MDXSL(slot) != 0 || MDYSL(slot) != 0)
+	{
+		signed int smp = (SCSPs->RINGBUF[(SCSPs->BUFPTR + MDXSL(slot)) & 63] + SCSPs->RINGBUF[(SCSPs->BUFPTR + MDYSL(slot)) & 63]) / 2;
+		smp <<= 0xA; // associate cycle with 1024
+		// Here down below, a sample range of 24 is needed for VF3 to sound correct.
+		smp >>= 0x18 - MDL(slot); // ex. for MDL=0xF, sample range corresponds to +/- 64 pi (32=2^5 cycles) so shift by 11 (16-5 == 0x1A-0xF)
+		if (!PCM8B(slot)) smp <<= 1;
+		addr1 += smp; addr2 += smp;
+		if (!PCM8B(slot))
+		{
+			addr1 &= 0x7fffe; addr2 &= 0x7fffe;
+		}
+		else
+		{
+			addr1 &= 0x7ffff; addr2 &= 0x7ffff;
+		}
+	}
+	//if (SSCTL(slot) == 0) {
+		if (PCM8B(slot))	//8 bit signed
+		{
+			signed char *p1 = (signed char *) &(slot->base[addr1 ^ 1]);
+			signed char *p2 = (signed char *) &(slot->base[addr2 ^ 1]);
+			int s;
+			signed int fpart = slot->cur_addr&((1 << SHIFT) - 1);
+			//sample=(p[0])<<8;
+			s = (int)(p1[0] << 8)*((1 << SHIFT) - fpart) + (int)(p2[0] << 8)*fpart;
+			sample = (s >> SHIFT);
+		}
+		else	//16 bit signed (endianness?)
+		{
+			signed short *p1 = (signed short *) &(slot->base[addr1]);
+			signed short *p2 = (signed short *) &(slot->base[addr2]);
+			int s;
+			signed int fpart = slot->cur_addr&((1 << SHIFT) - 1);
+			//sample=(p[0]);
+			s = (int)(p1[0])*((1 << (SHIFT)) - fpart) + (int)(p2[0])*fpart;
+			
+			
+			sample = (s >> (SHIFT));
+
+			//sample=((p[0]>>8)&0xFF)|(p[0]<<8);
+			//s=(int) p[0]*((1<<SHIFT)-fpart)+(int) p[1]*fpart;
+			//sample=s>>SHIFT;
+
+			/*		if(SBCTL(slot)&1)	//reverse data
+			sample^=0x7fff;
+			if(SBCTL(slot)&2)	//reverse sign
+			sample^=0x8000;
+			*/
+		}
+	//}
+
+	if (SBCTL(slot) & 0x1)
+		sample ^= 0x7FFF;
+	if (SBCTL(slot) & 0x2)
+		sample = (INT16)(sample ^ 0x8000);
+
+	if (slot->Back)
+		slot->cur_addr -= step;
 	else
-		addr=(slot->cur_addr>>(SHIFT-1))&(~1);
+		slot->cur_addr += step;
+	slot->nxt_addr = slot->cur_addr + (1 << SHIFT);
 
-	if(MDL(slot)!=0 || MDXSL(slot)!=0 || MDYSL(slot)!=0)
+	addr1 = slot->cur_addr >> SHIFT;
+	addr2 = slot->nxt_addr >> SHIFT;
+
+	if (addr1 >= LSA(slot) && !(slot->Back))
 	{
-//TODO: is this correct? SCSP is not necessarily set to the slave SCSP. ElSemi may not have noticed this.
-		unsigned char v;
-		signed int smp=(SCSP->RINGBUF[(SCSP->BUFPTR+MDXSL(slot))&63]+SCSP->RINGBUF[(SCSP->BUFPTR+MDYSL(slot))&63])/2;
-		
-		smp>>=11;
-		// Check for underflow before adding to addr
-		if (smp >= 0 || (DWORD)(-smp) < addr)
-			addr+=smp;
-		else 
-			addr = 0;
-		if(!PCM8B(slot))
-			addr&=~1;
+		if (LPSLNK(slot) && slot->EG.state == ATTACK)
+			slot->EG.state = DECAY1;
 	}
 
-	if(PCM8B(slot))	//8 bit signed
-	{	
-		signed char *p=(signed char *) &(slot->base[addr^1]);
-		int s;
-		signed int fpart=slot->cur_addr&((1<<SHIFT)-1);
-		sample=(p[0])<<8;
-		
-		//if (p>=(signed char *) &SCSP->SCSPRAM[0x200000])
-		//	printf("%X %X %X %p %p %p\n", addr, SA(slot), LEA(slot), p, slot->base, SCSP->SCSPRAM);
-		
-		/*s=(int) p[0]*((1<<SHIFT)-fpart)+(int) p[1]*fpart;
-		sample=(s>>SHIFT)<<8;
-		*/
-/*		if(SBCTL(slot)&1)	//reverse data
-			sample^=0x7f;
-		if(SBCTL(slot)&2)	//reverse sign
-			sample^=0x80;
-*/		
-	}
-	else	//16 bit signed (endianness?)
-	{
-		signed short *p=(signed short *) &(slot->base[addr]);
-		int s;
-		signed int fpart=slot->cur_addr&((1<<SHIFT)-1);
-		sample=(p[0]);
-		//sample=((p[0]>>8)&0xFF)|(p[0]<<8);
-		//s=(int) p[0]*((1<<SHIFT)-fpart)+(int) p[1]*fpart;
-		//sample=s>>SHIFT;
 
-		/*		if(SBCTL(slot)&1)	//reverse data
-		sample^=0x7fff;
-		if(SBCTL(slot)&2)	//reverse sign
-		sample^=0x8000;
-		*/
-	}
-
-	
-	switch(LPCTL(slot))
+	for (addr_select = 0; addr_select < 2; addr_select++)
 	{
-	case 0:	//no loop
-		slot->cur_addr+=step;
-		addr=slot->cur_addr>>SHIFT;
-		if(addr>LEA(slot))
+		INT32 rem_addr;
+		switch (LPCTL(slot))
 		{
-			//slot->active=0;
-			SCSP_StopSlot(slot,0);
+		case 0: //no loop
+			if (*addr[addr_select] >= LSA(slot) && *addr[addr_select] >= LEA(slot))
+			{
+				//slot->active=0;
+				SCSP_StopSlot(slot, 0);
+			}
+			break;
+		case 1: //normal loop
+			if (*addr[addr_select] >= LEA(slot))
+			{
+				rem_addr = *slot_addr[addr_select] - (LEA(slot) << SHIFT);
+				*slot_addr[addr_select] = (LSA(slot) << SHIFT) + rem_addr;
+			}
+			break;
+		case 2: //reverse loop
+			if ((*addr[addr_select] >= LSA(slot)) && !(slot->Back))
+			{
+				rem_addr = *slot_addr[addr_select] - (LSA(slot) << SHIFT);
+				*slot_addr[addr_select] = (LEA(slot) << SHIFT) - rem_addr;
+				slot->Back = 1;
+			}
+			else if ((*addr[addr_select] < LSA(slot) || (*slot_addr[addr_select] & 0x80000000)) && slot->Back)
+			{
+				rem_addr = (LSA(slot) << SHIFT) - *slot_addr[addr_select];
+				*slot_addr[addr_select] = (LEA(slot) << SHIFT) - rem_addr;
+			}
+			break;
+		case 3: //ping-pong
+			if (*addr[addr_select] >= LEA(slot)) //reached end, reverse till start
+			{
+				rem_addr = *slot_addr[addr_select] - (LEA(slot) << SHIFT);
+				*slot_addr[addr_select] = (LEA(slot) << SHIFT) - rem_addr;
+				slot->Back = 1;
+			}
+			else if ((*addr[addr_select] < LSA(slot) || (*slot_addr[addr_select] & 0x80000000)) && slot->Back)//reached start or negative
+			{
+				rem_addr = (LSA(slot) << SHIFT) - *slot_addr[addr_select];
+				*slot_addr[addr_select] = (LSA(slot) << SHIFT) + rem_addr;
+				slot->Back = 0;
+			}
+			break;
 		}
-		break;
-	case 1: //normal loop
-		slot->cur_addr+=step;
-		addr=slot->cur_addr>>SHIFT;
-		if(addr>LEA(slot))
-			slot->cur_addr=LSA(slot)<<SHIFT;
-		break;
-	case 2:	//reverse loop
-		if(slot->Back)
-			slot->cur_addr+=REVSIGN(step);
+	}
+
+	if (!SDIR(slot))
+	{
+		if (ALFOS(slot) != 0) 
+		{
+			sample = sample * ALFO_Step(&(slot->ALFO));
+			sample >>= (SHIFT);
+		}
+
+
+
+		if (slot->EG.state == ATTACK)
+			sample = (sample * EG_Update(slot)) >> SHIFT;
 		else
-			slot->cur_addr+=step;
-		addr=slot->cur_addr>>SHIFT;
-		if(addr>LEA(slot))
-		{
-			slot->cur_addr=LEA(slot)<<SHIFT;
-			slot->Back=1;
-		}
-		if((addr<LSA(slot) || (addr&0x80000000)) && slot->Back)
-			slot->cur_addr=LEA(slot)<<SHIFT;
-		break;
-	case 3: //ping-pong
-		if(slot->Back)
-			slot->cur_addr+=REVSIGN(step);
-		else
-			slot->cur_addr+=step;
-		addr=slot->cur_addr>>SHIFT;
-		if(addr>LEA(slot)) //reached end, reverse till start
-		{
-			slot->cur_addr=LEA(slot)<<SHIFT;
-			//slot->step=REVSIGN(slot->step);
-			slot->Back=1;
-		}
-		if((addr<LSA(slot) || (addr&0x80000000)) && (slot->Back)) //reached start or negative
-		{
-			slot->cur_addr=LSA(slot)<<SHIFT;
-			//slot->step=REVSIGN(slot->step);
-			slot->Back=0;
-		}
-		break;
+			sample = (sample * EG_TABLE[EG_Update(slot) >> (SHIFT - 10)]) >> SHIFT;
 	}
 
-	if(ALFOS(slot)!=0)
+	if (!STWINH(slot))
 	{
-		sample=sample*ALFO_Step(&(slot->ALFO));
-		sample>>=SHIFT;
+		if (!SDIR(slot))
+		{
+			UINT16 Enc = ((TL(slot)) << 0x0) | (0x7 << 0xd);
+			*RBUFDST = (sample * LPANTABLE[Enc]) >> (SHIFT + 1);
+		}
 	}
 
-	if(!STWINH(slot))
-		*RBUFDST=sample;
-	else 
-		int a=1;
-
-	sample=(sample*EG_Update(slot))>>SHIFT;
 
 	return sample;
 }
 
+
 void SCSP_CpuRunScanline()
 {
 
@@ -1767,170 +1522,182 @@ void SCSP_CpuRunScanline()
 
 void SCSP_DoMasterSamples(int nsamples)
 {
-	int slice=12000000/(SysFPS*nsamples);	// 68K cycles/sample
-	static int lastdiff=0;
-	
+	int slice = 12000000 / (SoundClock*nsamples);	// 68K cycles/sample
+	static int lastdiff = 0;
+
 	/*
-	 * Compute relative master/slave SCSP balance (note: master is often used 
+	 * Compute relative master/slave SCSP balance (note: master is often used
 	 * for the front speakers). Equal balance is a 1.0 scale factor for both.
 	 * When one SCSP is fully attenuated, the other's samples will be multiplied
 	 * by 2.
 	 */
-	float balance = (float) s_config->Get("Balance").ValueAs<float>();
+	float balance = (float)s_config->Get("Balance").ValueAs<float>();
 	if (balance < -100.0f)
-	  balance = -100.0f;
-  else if (balance > 100.0f)
-    balance = 100.0f;
-  balance /= 100.0f;
-	float masterBalance = 1.0f+balance;
-	float slaveBalance = 1.0f-balance;
+		balance = -100.0f;
+	else if (balance > 100.0f)
+		balance = 100.0f;
+	balance /= 100.0f;
+	float masterBalance = 1.0f + balance;
+	float slaveBalance = 1.0f - balance;
+	signed short *bufl, *bufr;
+
+	INT32 sl, s, i;
+
+	bufl = bufferl;
+	bufr = bufferr;
 
 	/*
 	 * Generate samples
 	 */
-	for(int s=0;s<nsamples;++s)
+	for (s = 0; s < nsamples; ++s)
 	{
-		signed int smpl=0;
-		signed int smpr=0;
+		signed int smpl = 0, smpr = 0;
 
-		for(int sl=0;sl<32;++sl)
+		for (sl = 0; sl < 32; ++sl)
 		{
-			if(SCSPs[0].Slots[sl].active)
-			{
-				_SLOT *slot=SCSPs[0].Slots+sl;
-				unsigned short Enc=((TL(slot))<<0x8)|((DIPAN(slot))<<0x0)|((DISDL(slot))<<0x5);
-				RBUFDST=SCSPs[0].RINGBUF+SCSPs[0].BUFPTR;
-				signed int sample;
-				//signed int sample=(int) (masterBalance*(float)SCSP_UpdateSlot(slot));
-				//if(SA(slot)!=0x2ccf4)
-				/*if(SA(slot)!=0x1c77e)
-					sample=0;
-				else*/
-					sample= (int) (masterBalance*(float)SCSP_UpdateSlot(slot));
-
-				
-				/*unsigned char ef=EFSDL(slot);
-				ef+=DISDL(slot);
-				if(ef>0xf) ef=0xf;
-				unsigned short Enc=((TL(slot))<<0x0)|((DIPAN(slot))<<0x8)|((ef)<<0xd);
-				*/
-#ifdef USEDSP
-				// Spindizzi's fix for the VF3 cave stage
-				//SCSPDSP_SetSample(&SCSPs[0].DSP,/*sample>>5*/(sample*LPANTABLE[(Enc|0xE0)&0xFFE0])>>(SHIFT+3)/*>>SHIFT*/,ISEL(slot),IMXL(slot));
-				SCSPDSP_SetSample(&SCSPs[0].DSP,/*sample>>5*/(sample*LPANTABLE[(Enc|0xE0)/*&0xFFE0*/])>>(SHIFT+3)/*>>SHIFT*/,ISEL(slot),IMXL(slot));
+#if FM_DELAY
+			RBUFDST = SCSPs[0].DELAYBUF + SCSPs[0].DELAYPTR;
+#else
+			RBUFDST = SCSPs[0].RINGBUF + SCSPs[0].BUFPTR;
 #endif
+			if (SCSPs[0].Slots[sl].active)
+			{
+				_SLOT *slot = SCSPs[0].Slots + sl;
+				UINT16 Enc;
 
+				signed int sample = (int)(masterBalance*(float)SCSP_UpdateSlot(slot));
+
+
+
+				Enc = ((TL(slot)) << 0x0) | ((IMXL(slot)) << 0xd);
+				SCSPDSP_SetSample(&SCSPs[0].DSP, (sample*LPANTABLE[Enc]) >> (SHIFT - 2), ISEL(slot), IMXL(slot));
+				Enc = ((TL(slot)) << 0x0) | ((DIPAN(slot)) << 0x8) | ((DISDL(slot)) << 0xd);
 #ifdef RB_VOLUME
-				smpl += (sample * volume[TL(slot)+pan_left [DIPAN(slot)]])>>17;
-				smpr += (sample * volume[TL(slot)+pan_right[DIPAN(slot)]])>>17;
+				smpl += (sample * volume[TL(slot) + pan_left[DIPAN(slot)]]) >> 17;
+				smpr += (sample * volume[TL(slot) + pan_right[DIPAN(slot)]]) >> 17;
 #else				
-
-				//if(sl==cnts)
 				{
-					smpl+=(sample*LPANTABLE[Enc])>>SHIFT;
-					smpr+=(sample*RPANTABLE[Enc])>>SHIFT;
+					smpl += (sample*LPANTABLE[Enc]) >> SHIFT;
+					smpr += (sample*RPANTABLE[Enc]) >> SHIFT;
 				}
 #endif
 			}
+#if FM_DELAY
+			SCSPs[0].RINGBUF[(SCSPs[0].BUFPTR + 64 - (FM_DELAY - 1)) & 63] = SCSPs[0].DELAYBUF[(SCSPs[0].DELAYPTR + FM_DELAY - (FM_DELAY - 1)) % FM_DELAY];
+#endif
 			++SCSPs[0].BUFPTR;
-			SCSPs[0].BUFPTR&=63;
-			if(HasSlaveSCSP)
-			{
-				if(SCSPs[1].Slots[sl].active)
-				{
-					_SLOT *slot=SCSPs[1].Slots+sl;
-					unsigned short Enc=((TL(slot))<<0x8)|((DIPAN(slot))<<0x0)|((DISDL(slot))<<0x5);
-					RBUFDST=SCSPs[1].RINGBUF+SCSPs[1].BUFPTR;
-					signed int sample=(int) (slaveBalance*(float)SCSP_UpdateSlot(slot));
-#ifdef USEDSP
-					SCSPDSP_SetSample(&SCSPs[1].DSP,(sample*LPANTABLE[(Enc|0xE0)&0xFFE0])>>(SHIFT+3),ISEL(slot),IMXL(slot));
+			SCSPs[0].BUFPTR &= 63;
+#if FM_DELAY
+			++SCSPs[0].DELAYPTR;
+			if (SCSPs[0].DELAYPTR > FM_DELAY - 1) SCSPs[0].DELAYPTR = 0;
 #endif
+			if (HasSlaveSCSP)
+#if FM_DELAY
+				RBUFDST = SCSPs[1].DELAYBUF + SCSPs[1].DELAYPTR;
+#else
+				RBUFDST = SCSPs[1].RINGBUF + SCSPs[1].BUFPTR;
+#endif
+			{
+				if (SCSPs[1].Slots[sl].active)
+				{
+					_SLOT *slot = SCSPs[1].Slots + sl;
+					UINT16 Enc;
+
+					signed int sample = (int)(slaveBalance*(float)SCSP_UpdateSlot(slot));
+
+					Enc = ((TL(slot)) << 0x0) | ((IMXL(slot)) << 0xd);
+					SCSPDSP_SetSample(&SCSPs[1].DSP, (sample*LPANTABLE[Enc]) >> (SHIFT - 2), ISEL(slot), IMXL(slot));
+					Enc = ((TL(slot)) << 0x0) | ((DIPAN(slot)) << 0x8) | ((DISDL(slot)) << 0xd);
+					{
 #ifdef RB_VOLUME
-					smpl += (sample * volume[TL(slot)+pan_left [DIPAN(slot)]])>>17;
-					smpr += (sample * volume[TL(slot)+pan_right[DIPAN(slot)]])>>17;
+						smpl += (sample * volume[TL(slot) + pan_left[DIPAN(slot)]]) >> 17;
+						smpr += (sample * volume[TL(slot) + pan_right[DIPAN(slot)]]) >> 17;
 #else				
-					smpl+=(sample*LPANTABLE[Enc])>>SHIFT;
-					smpr+=(sample*RPANTABLE[Enc])>>SHIFT;
+						smpl += (sample*LPANTABLE[Enc]) >> SHIFT;
+						smpr += (sample*RPANTABLE[Enc]) >> SHIFT;
+					}
 #endif
 				}
+#if FM_DELAY
+				SCSPs[1].RINGBUF[(SCSPs[1].BUFPTR + 64 - (FM_DELAY - 1)) & 63] = SCSPs[1].DELAYBUF[(SCSPs[1].DELAYPTR + FM_DELAY - (FM_DELAY - 1)) % FM_DELAY];
+#endif
 				++SCSPs[1].BUFPTR;
-				SCSPs[1].BUFPTR&=63;
+				SCSPs[1].BUFPTR &= 63;
+#if FM_DELAY
+				++SCSPs[1].DELAYPTR;
+				if (SCSPs[1].DELAYPTR > FM_DELAY - 1) SCSPs[1].DELAYPTR = 0;
+#endif
 			}
-		}
-#define ICLIP16(x) (x<-32768)?-32768:((x>32767)?32767:x)
-#ifdef USEDSP
+
+	}
+
 		SCSPDSP_Step(&SCSPs[0].DSP);
-		if(HasSlaveSCSP)
+		if (HasSlaveSCSP)
 			SCSPDSP_Step(&SCSPs[1].DSP);
 
-//		smpl=0;
-//		smpr=0;
-		for(int i=0;i<16;++i)
+		//		smpl=0;
+		//		smpr=0;
+		for (i = 0; i < 16; ++i)
 		{
-			_SLOT *slot=SCSPs[0].Slots+i;
-			int ef=EFSDL(slot);
-			if(ef)
-			{
-				unsigned short Enc=0|((EFPAN(slot))<<0x0)|((EFSDL(slot))<<0x5);
-				signed int leftSample  = (int) (masterBalance*(float)((SCSPs[0].DSP.EFREG[i]*LPANTABLE[Enc])>>SHIFT));
-				signed int rightSample = (int) (masterBalance*(float)((SCSPs[0].DSP.EFREG[i]*RPANTABLE[Enc])>>SHIFT));
-				smpl+=leftSample;
-				smpr+=rightSample;
-			}
-			if(HasSlaveSCSP)
-			{
-				_SLOT *slot=SCSPs[1].Slots+i;
-				ef=EFSDL(slot);
-				if(ef)
+			_SLOT *slot = SCSPs[0].Slots + i;
+			if (legacySound == true) {
+				if (EFSDL(slot))
 				{
-					unsigned short Enc=0|((EFPAN(slot))<<0x0)|((EFSDL(slot))<<0x5);
-					signed int leftSample  = (int) (slaveBalance*(float)((SCSPs[1].DSP.EFREG[i]*LPANTABLE[Enc])>>SHIFT));
-					signed int rightSample = (int) (slaveBalance*(float)((SCSPs[1].DSP.EFREG[i]*RPANTABLE[Enc])>>SHIFT));
-					smpl+=leftSample;
-					smpr+=rightSample;
+					// For legacy option, 14 is the most reasonable value I can set at the moment for the EFSDL slot. - Paul
+					UINT16 Enc = ((EFPAN(slot)) << 0x8) | ((EFSDL(slot)) << 0xe);
+					smpl += (int)(masterBalance*(float)(((SCSPs[0].DSP.EFREG[i] * LPANTABLE[Enc]) >> SHIFT)));
+					smpr += (int)(masterBalance*(float)(((SCSPs[0].DSP.EFREG[i] * RPANTABLE[Enc]) >> SHIFT)));
+				}
+				if (HasSlaveSCSP)
+				{
+					_SLOT *slot = SCSPs[1].Slots + i;
+					if (EFSDL(slot))
+					{
+						UINT16 Enc = ((EFPAN(slot)) << 0x8) | ((EFSDL(slot)) << 0xe);
+						smpl += (int)(slaveBalance*(float)(((SCSPs[1].DSP.EFREG[i] * LPANTABLE[Enc]) >> SHIFT)));
+						smpr += (int)(slaveBalance*(float)(((SCSPs[1].DSP.EFREG[i] * RPANTABLE[Enc]) >> SHIFT)));
+					}
+				}
+			}
+			else {
+				if (EFSDL(slot))
+				{
+					UINT16 Enc = ((EFPAN(slot)) << 0x8) | ((EFSDL(slot)) << 0xd);
+					smpl += (int)(masterBalance*(float)(((SCSPs[0].DSP.EFREG[i] * LPANTABLE[Enc]) >> SHIFT)));
+					smpr += (int)(masterBalance*(float)(((SCSPs[0].DSP.EFREG[i] * RPANTABLE[Enc]) >> SHIFT)));
+				}
+				if (HasSlaveSCSP)
+				{
+					_SLOT *slot = SCSPs[1].Slots + i;
+					if (EFSDL(slot))
+					{
+						UINT16 Enc = ((EFPAN(slot)) << 0x8) | ((EFSDL(slot)) << 0xd);
+						smpl += (int)(slaveBalance*(float)(((SCSPs[1].DSP.EFREG[i] * LPANTABLE[Enc]) >> SHIFT)));
+						smpr += (int)(slaveBalance*(float)(((SCSPs[1].DSP.EFREG[i] * RPANTABLE[Enc]) >> SHIFT)));
+					}
 				}
 			}
 		}
-#endif
-#ifdef REVERB
-		smpl+=bufferrevl[RevR];
-		smpr+=bufferrevr[RevR];
-		bufferrevl[RevW]=((smpl<<0)/REVERB_DIV)>>0;
-		bufferrevr[RevW]=((smpr<<0)/REVERB_DIV)>>0;
-		++RevW;
-		if(RevW==REVERB_LEN)
-			RevW=0;
-		++RevR;
-		if(RevR==REVERB_LEN)
-			RevR=0;
-#endif
-		if(smpl<-32768)
-			smpl=-32768;
-		else if(smpl>32767)
-			smpl=32767;
-		bufferl[s]=smpl;
-		//bufferl[s]=ICLIP16(smpl);
-		bufferr[s]=ICLIP16(smpr);
+
+		if (DAC18B(SCSP))
+		{
+			smpl = ICLIP18(smpl);
+			smpr = ICLIP18(smpr);
+		}
+		else
+		{
+			smpl = ICLIP16(smpl >> 2);
+			smpr = ICLIP16(smpr >> 2);
+		}
+		*bufl++ = ICLIP16(smpl);
+		*bufr++ = ICLIP16(smpr);
+
 
 		SCSP_TimersAddTicks(1);
 		CheckPendingIRQ();
-
-		/*for(int nc=slice;nc;nc--)
-		{
-		Run68kCB(1);		
-		if(M68000_regs.pc==0x6035A6)
-		{
-
-		int a=1;
+		lastdiff = Run68kCB(slice - lastdiff);
 		}
-		}
-		*/
-
-
-		lastdiff=Run68kCB(slice-lastdiff);
 	}
-}
-#endif
 
 void SCSP_Update()
 {
@@ -1973,9 +1740,9 @@ void SCSP_MidiOutW(BYTE val)
 	//printf("68K: MIDI out\n");
 	//DebugLog("Midi Out Buffer push %02X",val);
 	MidiStack[MidiOutW++]=val;
-	MidiOutW&=7;
+	MidiOutW&=31;
 	++MidiOutFill;
-	
+
 	if (s_multiThreaded)
 		MIDILock->Unlock();
 }
@@ -1996,7 +1763,7 @@ unsigned char SCSP_MidiOutR()
 
 	val=MidiStack[MidiOutR++];
 	//DebugLog("Midi Out Buffer pop %02X",val);
-	MidiOutR&=7;
+	MidiOutR&=31;
 	--MidiOutFill;
 	
 	if (s_multiThreaded)
@@ -2062,11 +1829,7 @@ void SCSP_RTECheck()
 */
 }
 
-int SCSP_IRQCB(int)
-{
-	CheckPendingIRQ();
-	return -1;
-}
+
 
 void SCSP_Master_w8(unsigned int addr,unsigned char val)
 {
@@ -2158,7 +1921,6 @@ void SCSP_SaveState(CBlockFile *StateFile)
 	 *
 	 * Things not saved:
 	 *
-	 *	- Reverb buffers and pointers
 	 *	- FNS table (populated by SCSP_Init() and only read)
 	 *	- RB_VOLUME stuff
 	 * 	- ARTABLE, DRTABLE
@@ -2184,6 +1946,10 @@ void SCSP_SaveState(CBlockFile *StateFile)
 		StateFile->Write(SCSPs[i].datab, sizeof(SCSPs[i].datab));
 		StateFile->Write(&(SCSPs[i].BUFPTR), sizeof(SCSPs[i].BUFPTR));
 		StateFile->Write(&(SCSPs[i].Master), sizeof(SCSPs[i].Master));
+#if FM_DELAY
+		StateFile->Write(&(SCSPs[i].DELAYBUF), sizeof(SCSPs[i].DELAYBUF));
+		StateFile->Write(&(SCSPs[i].DELAYPTR), sizeof(SCSPs[i].DELAYPTR));
+#endif
 		
 		// Save each slot
 		for (int j = 0; j < 32; j++)
@@ -2196,6 +1962,7 @@ void SCSP_SaveState(CBlockFile *StateFile)
 			baseOffset = (UINT64) (SCSPs[i].Slots[j].base - SCSPs[i].SCSPRAM);
 			StateFile->Write(&baseOffset, sizeof(baseOffset));
 			StateFile->Write(&(SCSPs[i].Slots[j].cur_addr), sizeof(SCSPs[i].Slots[j].cur_addr));
+			StateFile->Write(&(SCSPs[i].Slots[j].nxt_addr), sizeof(SCSPs[i].Slots[j].nxt_addr));
 			StateFile->Write(&(SCSPs[i].Slots[j].step), sizeof(SCSPs[i].Slots[j].step));
 			StateFile->Write(&(SCSPs[i].Slots[j].Back), sizeof(SCSPs[i].Slots[j].Back));
 			StateFile->Write(&(SCSPs[i].Slots[j].slot), sizeof(SCSPs[i].Slots[j].slot));
@@ -2271,6 +2038,10 @@ void SCSP_LoadState(CBlockFile *StateFile)
 		StateFile->Read(SCSPs[i].datab, sizeof(SCSPs[i].datab));
 		StateFile->Read(&(SCSPs[i].BUFPTR), sizeof(SCSPs[i].BUFPTR));
 		StateFile->Read(&(SCSPs[i].Master), sizeof(SCSPs[i].Master));
+#if FM_DELAY
+		StateFile->Read(&(SCSPs[i].DELAYBUF), sizeof(SCSPs[i].DELAYBUF));
+		StateFile->Read(&(SCSPs[i].DELAYPTR), sizeof(SCSPs[i].DELAYPTR));
+#endif
 		
 		// Load each slot
 		for (int j = 0; j < 32; j++)
@@ -2283,6 +2054,7 @@ void SCSP_LoadState(CBlockFile *StateFile)
 			StateFile->Read(&baseOffset, sizeof(baseOffset));
 			SCSPs[i].Slots[j].base = &(SCSPs[i].SCSPRAM[baseOffset&0xFFFFF]);	// clamp to 1 MB
 			StateFile->Read(&(SCSPs[i].Slots[j].cur_addr), sizeof(SCSPs[i].Slots[j].cur_addr));
+			StateFile->Read(&(SCSPs[i].Slots[j].nxt_addr), sizeof(SCSPs[i].Slots[j].nxt_addr));
 			StateFile->Read(&(SCSPs[i].Slots[j].step), sizeof(SCSPs[i].Slots[j].step));
 			StateFile->Read(&(SCSPs[i].Slots[j].Back), sizeof(SCSPs[i].Slots[j].Back));
 			StateFile->Read(&(SCSPs[i].Slots[j].slot), sizeof(SCSPs[i].Slots[j].slot));
@@ -2332,11 +2104,10 @@ void SCSP_LoadState(CBlockFile *StateFile)
 
 void SCSP_SetBuffers(INT16 *leftBufferPtr, INT16 *rightBufferPtr, int bufferLength)
 {
-	SysFPS = 60.0;	// should this be updated to reflect actual FPS?
+	SoundClock = 76;
 	bufferl = leftBufferPtr;
 	bufferr = rightBufferPtr;
 	length = bufferLength;
-	
 	cnts = 0;		// what is this for? seems unimportant but need to find out
 }
 
diff --git a/Src/Sound/SCSP.h b/Src/Sound/SCSP.h
index 4ce759a..610f021 100644
--- a/Src/Sound/SCSP.h
+++ b/Src/Sound/SCSP.h
@@ -24,7 +24,7 @@
  * 
  * Header file defining for SCSP emulation.
  */
-
+#define MAX_SCSP	2
 #ifndef INCLUDED_SCSP_H
 #define INCLUDED_SCSP_H
 
diff --git a/Src/Sound/SCSPDSP.cpp b/Src/Sound/SCSPDSP.cpp
index f49167b..2c8277c 100644
--- a/Src/Sound/SCSPDSP.cpp
+++ b/Src/Sound/SCSPDSP.cpp
@@ -51,112 +51,112 @@ unsigned char UnpackFunc[]={0x8B,0xD8,0x8B,0xC8,0x81,0xE3,0x00,0x80,0x00,0x00,0x
 							0x00,0x00,0xC1,0xE9,0x0B,0xC1,0xE0,0x0B,0xC1,0xE3,0x08,0x83,0xE1,0x0F,0x0B,0xC3,
 							0xD1,0xEB,0x81,0xF3,0x00,0x00,0x40,0x00,0x0B,0xC3,0x83,0xC1,0x08,0xC1,0xE0,0x08,
 							0xD3,0xF8};
-#if 0
-unsigned short inline PACK(signed int val)
+//#if 0
+//unsigned short inline PACK(signed int val)
+//{
+///*	signed int v1=val;
+//	int n=0;
+//	while(((v1>>22)&1) == ((v1>>23)&1))
+//	{
+//		v1<<=1;
+//		++n;
+//	}
+//	v1<<=8;
+//	v1>>=11+8;
+//	v1=(v1&(~0x7800))|(n<<11);
+//	return v1;
+//*/
+//#ifdef USEFLOATPACK
+//	unsigned short f;
+//	__asm
+//	{
+//		mov eax,val
+//		mov ebx,eax
+//		test eax,0x00800000
+//		jne negval
+//		not ebx
+//negval:	not ebx
+//		bsr ecx,ebx
+//		neg ecx
+//		shl eax,8
+//		add ecx,22
+//		shl eax,cl
+//		sar eax,8+11
+//		shl ecx,11
+//		and eax,~0x7800
+//		or eax,ecx
+//		mov f,ax
+//	}
+//	return f;
+//#else
+//
+//	//cut to 16 bits
+//	unsigned int f=((unsigned int ) val)>>8;
+//	return f;
+//#endif
+//}
+//
+//signed int inline UNPACK(unsigned short val)
+//{
+///*	if(val)
+//		int a=1;
+//	unsigned int mant=val&0x7ff;
+//	unsigned int exp=(val>>11)&0xf;
+//	unsigned int sign=(val>>15)&1;
+//	signed int r=0;
+//	r|=mant<<11;
+//	r|=sign<<23;
+//	r|=(sign^1)<<22;
+//
+//	//signed int r=val<<8;
+//	//if(r&0x00800000)
+//	//	r|=0xFF000000;
+//	r<<=8;
+//	r>>=8+exp;
+//	return r;
+//*/
+//#ifdef USEFLOATPACK
+//	signed int r;
+//	__asm
+//	{
+//		xor eax,eax
+//		mov ax,val
+//		mov ebx,eax
+//		mov ecx,eax
+//		and ebx,0x8000
+//		and eax,0x07ff
+//		shr ecx,11
+//		shl eax,11
+//		shl ebx,8
+//		and ecx,0xF
+//		or eax,ebx
+//		shr ebx,1
+//		xor ebx,0x00400000
+//		or eax,ebx
+//		add ecx,8
+//		shl eax,8
+//		sar eax,cl
+//		mov r,eax
+//	}
+//#else
+//	//unpack 16->24
+//	signed int r=val<<8;
+//	r<<=8;
+//	r>>=8;
+//#endif
+//	return r;
+//}
+//#else
+
+static UINT16 PACK(INT32 val)
 {
-/*	signed int v1=val;
-	int n=0;
-	while(((v1>>22)&1) == ((v1>>23)&1))
-	{
-		v1<<=1;
-		++n;
-	}
-	v1<<=8;
-	v1>>=11+8;
-	v1=(v1&(~0x7800))|(n<<11);
-	return v1;
-*/
-#ifdef USEFLOATPACK
-	unsigned short f;
-	__asm
-	{
-		mov eax,val
-		mov ebx,eax
-		test eax,0x00800000
-		jne negval
-		not ebx
-negval:	not ebx
-		bsr ecx,ebx
-		neg ecx
-		shl eax,8
-		add ecx,22
-		shl eax,cl
-		sar eax,8+11
-		shl ecx,11
-		and eax,~0x7800
-		or eax,ecx
-		mov f,ax
-	}
-	return f;
-#else
-
-	//cut to 16 bits
-	unsigned int f=((unsigned int ) val)>>8;
-	return f;
-#endif
-}
-
-signed int inline UNPACK(unsigned short val)
-{
-/*	if(val)
-		int a=1;
-	unsigned int mant=val&0x7ff;
-	unsigned int exp=(val>>11)&0xf;
-	unsigned int sign=(val>>15)&1;
-	signed int r=0;
-	r|=mant<<11;
-	r|=sign<<23;
-	r|=(sign^1)<<22;
-
-	//signed int r=val<<8;
-	//if(r&0x00800000)
-	//	r|=0xFF000000;
-	r<<=8;
-	r>>=8+exp;
-	return r;
-*/
-#ifdef USEFLOATPACK
-	signed int r;
-	__asm
-	{
-		xor eax,eax
-		mov ax,val
-		mov ebx,eax
-		mov ecx,eax
-		and ebx,0x8000
-		and eax,0x07ff
-		shr ecx,11
-		shl eax,11
-		shl ebx,8
-		and ecx,0xF
-		or eax,ebx
-		shr ebx,1
-		xor ebx,0x00400000
-		or eax,ebx
-		add ecx,8
-		shl eax,8
-		sar eax,cl
-		mov r,eax
-	}
-#else
-	//unpack 16->24
-	signed int r=val<<8;
-	r<<=8;
-	r>>=8;
-#endif
-	return r;
-}
-#else
-
-static signed short PACK(signed int val)
-{
-	unsigned int temp;
-	int sign,exponent,k;
+	UINT32 temp;
+	int sign, exponent, k;
 
 	sign = (val >> 23) & 0x1;
 	temp = (val ^ (val << 1)) & 0xFFFFFF;
 	exponent = 0;
-	for (k=0; k<12; k++)
+	for (k = 0; k < 12; k++)
 	{
 		if (temp & 0x800000)
 			break;
@@ -168,25 +168,31 @@ static signed short PACK(signed int val)
 	else
 		val <<= 11;
 	val >>= 11;
+	val &= 0x7FF;
 	val |= sign << 15;
 	val |= exponent << 11;
 
-	return (unsigned short)val;
+	return (UINT16)val;
 }
 
-static signed int UNPACK(unsigned short val)
+static INT32 UNPACK(UINT16 val)
 {
-	int sign,exponent,mantissa;
-	signed int uval;
+	int sign, exponent, mantissa;
+	INT32 uval;
 
 	sign = (val >> 15) & 0x1;
 	exponent = (val >> 11) & 0xF;
 	mantissa = val & 0x7FF;
 	uval = mantissa << 11;
 	if (exponent > 11)
+	{
 		exponent = 11;
+		uval |= sign << 22;
+	}
 	else
+	{
 		uval |= (sign ^ 1) << 22;
+	}
 	uval |= sign << 23;
 	uval <<= 8;
 	uval >>= 8;
@@ -194,918 +200,276 @@ static signed int UNPACK(unsigned short val)
 
 	return uval;
 }
-#endif
+//#endif
 
 void SCSPDSP_Init(_SCSPDSP *DSP)
 {
-	memset(DSP,0,sizeof(_SCSPDSP));
-	DSP->RBL=0x8000;
-	DSP->Stopped=true;
+	memset(DSP, 0, sizeof(_SCSPDSP));
+	DSP->RBL = (8 * 1024); // Initial RBL is 0
+	DSP->Stopped = 1;
 }
-#ifndef DYNDSP
+//#ifndef DYNDSP
 void SCSPDSP_Step(_SCSPDSP *DSP)
 {
-	if(DSP->Stopped)
+	INT32 ACC = 0;    //26 bit
+	INT32 SHIFTED = 0;    //24 bit
+	INT32 X = 0;  //24 bit
+	INT32 Y = 0;  //13 bit
+	INT32 B = 0;  //26 bit
+	INT32 INPUTS = 0; //24 bit
+	INT32 MEMVAL = 0;
+	INT32 FRC_REG = 0;    //13 bit
+	INT32 Y_REG = 0;      //24 bit
+	UINT32 ADDR = 0;
+	UINT32 ADRS_REG = 0;  //13 bit
+	int step;
+
+	if (DSP->Stopped)
 		return;
-	signed int ACC=0;	//26 bit
-	signed int SHIFTED=0;	//24 bit
-	signed int X=0;	//24 bit
-	signed int Y=0;	//13 bit
-	signed int B=0;	//26 bit
-	signed int INPUTS=0;	//24 bit
-	signed int MEMVAL=0;
-	signed int FRC_REG=0;	//13 bit
-	signed int Y_REG=0;		//24 bit
-	unsigned int ADDR=0;
-	unsigned int ADRS_REG=0;	//13 bit
 
-	memset(DSP->EFREG,0,2*16);
-	int dump=0;
-	FILE *f=NULL;
-	if(dump)
-		f=fopen("dsp.txt","wt");
-	for(int step=0;step</*128*/DSP->LastStep;++step)
+	memset(DSP->EFREG, 0, 2 * 16);
+	for (step = 0; step </*128*/DSP->LastStep; ++step)
 	{
-		unsigned short *IPtr=&(DSP->MPRO[step*4]);
+		UINT16 *IPtr = DSP->MPRO + step * 4;
 
-//		if(IPtr[0]==0 && IPtr[1]==0 && IPtr[2]==0 && IPtr[3]==0)
-//			break;
+		//		if(IPtr[0]==0 && IPtr[1]==0 && IPtr[2]==0 && IPtr[3]==0)
+		//			break;
 
-		unsigned int TRA=(IPtr[0]>>8)&0x7F;
-		unsigned int TWT=(IPtr[0]>>7)&0x01;
-		unsigned int TWA=(IPtr[0]>>0)&0x7F;
-		
-		unsigned int XSEL=(IPtr[1]>>15)&0x01;
-		unsigned int YSEL=(IPtr[1]>>13)&0x03;
-		unsigned int IRA=(IPtr[1]>>6)&0x3F;
-		unsigned int IWT=(IPtr[1]>>5)&0x01;
-		unsigned int IWA=(IPtr[1]>>0)&0x1F;
-	
-		unsigned int TABLE=(IPtr[2]>>15)&0x01;
-		unsigned int MWT=(IPtr[2]>>14)&0x01;
-		unsigned int MRD=(IPtr[2]>>13)&0x01;
-		unsigned int EWT=(IPtr[2]>>12)&0x01;
-		unsigned int EWA=(IPtr[2]>>8)&0x0F;
-		unsigned int ADRL=(IPtr[2]>>7)&0x01;
-		unsigned int FRCL=(IPtr[2]>>6)&0x01;
-		unsigned int SHIFT=(IPtr[2]>>4)&0x03;
-		unsigned int YRL=(IPtr[2]>>3)&0x01;
-		unsigned int NEGB=(IPtr[2]>>2)&0x01;
-		unsigned int ZERO=(IPtr[2]>>1)&0x01;
-		unsigned int BSEL=(IPtr[2]>>0)&0x01;
+		UINT32 TRA = (IPtr[0] >> 8) & 0x7F;
+		UINT32 TWT = (IPtr[0] >> 7) & 0x01;
+		UINT32 TWA = (IPtr[0] >> 0) & 0x7F;
 
-		unsigned int NOFL=(IPtr[3]>>15)&1;		//????
-		unsigned int COEF=(IPtr[3]>>9)&0x3f;
-		
-		unsigned int MASA=(IPtr[3]>>2)&0x1f;	//???
-		unsigned int ADREB=(IPtr[3]>>1)&0x1;
-		unsigned int NXADR=(IPtr[3]>>0)&0x1;
+		UINT32 XSEL = (IPtr[1] >> 15) & 0x01;
+		UINT32 YSEL = (IPtr[1] >> 13) & 0x03;
+		UINT32 IRA = (IPtr[1] >> 6) & 0x3F;
+		UINT32 IWT = (IPtr[1] >> 5) & 0x01;
+		UINT32 IWA = (IPtr[1] >> 0) & 0x1F;
 
+		UINT32 TABLE = (IPtr[2] >> 15) & 0x01;
+		UINT32 MWT = (IPtr[2] >> 14) & 0x01;
+		UINT32 MRD = (IPtr[2] >> 13) & 0x01;
+		UINT32 EWT = (IPtr[2] >> 12) & 0x01;
+		UINT32 EWA = (IPtr[2] >> 8) & 0x0F;
+		UINT32 ADRL = (IPtr[2] >> 7) & 0x01;
+		UINT32 FRCL = (IPtr[2] >> 6) & 0x01;
+		UINT32 SHIFT = (IPtr[2] >> 4) & 0x03;
+		UINT32 YRL = (IPtr[2] >> 3) & 0x01;
+		UINT32 NEGB = (IPtr[2] >> 2) & 0x01;
+		UINT32 ZERO = (IPtr[2] >> 1) & 0x01;
+		UINT32 BSEL = (IPtr[2] >> 0) & 0x01;
+
+		UINT32 NOFL = (IPtr[3] >> 15) & 0x01;	//????
+		UINT32 COEF = (IPtr[3] >> 9) & 0x3f;
+
+		UINT32 MASA = (IPtr[3] >> 2) & 0x1f;	//???
+		UINT32 ADREB = (IPtr[3] >> 1) & 0x01;
+		UINT32 NXADR = (IPtr[3] >> 0) & 0x01;
+
+		INT64 v;
 
 		//operations are done at 24 bit precision
-
-		if(MASA)
-			int a=1;
-		if(NOFL)
-			int a=1;
-
-		int dump=0;
-
-		if(f)
-		{
-#define DUMP(v)	fprintf(f," " #v ": %04X",v);
-			
-			fprintf(f,"%d: ",step);
-			DUMP(ACC);
-			DUMP(SHIFTED);
-			DUMP(X);
-			DUMP(Y);
-			DUMP(B);
-			DUMP(INPUTS);
-			DUMP(MEMVAL);
-			DUMP(FRC_REG);
-			DUMP(Y_REG);
-			DUMP(ADDR);
-			DUMP(ADRS_REG);
-			fprintf(f,"\n");
-		}
+#if 0
+		if (MASA)
+			int a = 1;
+		if (NOFL)
+			int a = 1;
+#endif
 		//INPUTS RW
-		assert(IRA<0x32);
-		if(IRA<=0x1f)
-			INPUTS=DSP->MEMS[IRA];	
-		else if(IRA<=0x2F)
-			INPUTS=DSP->MIXS[IRA-0x20];	//MIXS is 24 bit
-		else if(IRA<=0x31)
-			INPUTS=DSP->EXTS[IRA-0x30];
-		else INPUTS=0;
+// colmns97 hits this
+//		assert(IRA<0x32);
+		if (IRA <= 0x1f)
+			INPUTS = DSP->MEMS[IRA];
+		else if (IRA <= 0x2F)
+			INPUTS = DSP->MIXS[IRA - 0x20] << 4;  //MIXS is 20 bit
+		else if (IRA <= 0x31)
+			INPUTS = DSP->EXTS[IRA - 0x30] << 8;  //EXTS is 16 bit
+		else
+			return;
 
-		INPUTS<<=8;
-		INPUTS>>=8;
+		INPUTS <<= 8;
+		INPUTS >>= 8;
 		//if(INPUTS&0x00800000)
 		//	INPUTS|=0xFF000000;
 
-		if(IWT)
+		if (IWT)
 		{
-			DSP->MEMS[IWA]=MEMVAL;	//MEMVAL was selected in previous MRD
-			if(IRA==IWA)
-				INPUTS=MEMVAL;
+			DSP->MEMS[IWA] = MEMVAL;  //MEMVAL was selected in previous MRD
+			if (IRA == IWA)
+				INPUTS = MEMVAL;
 		}
 
 		//Operand sel
 		//B
-		if(!ZERO)
+		if (!ZERO)
 		{
-			if(BSEL)
-				B=ACC;
+			if (BSEL)
+				B = ACC;
 			else
 			{
-				B=DSP->TEMP[(TRA+DSP->DEC)&0x7F];
-				B<<=8;
-				B>>=8;
+				B = DSP->TEMP[(TRA + DSP->DEC) & 0x7F];
+				B <<= 8;
+				B >>= 8;
 				//if(B&0x00800000)
-				//	B|=0xFF000000;	//Sign extend
+				//	B|=0xFF000000;  //Sign extend
 			}
-			if(NEGB)
-				B=0-B;
+			if (NEGB)
+				B = 0 - B;
 		}
 		else
-			B=0;
+			B = 0;
 
 		//X
-		if(XSEL)
-			X=INPUTS;
+		if (XSEL)
+			X = INPUTS;
 		else
 		{
-			X=DSP->TEMP[(TRA+DSP->DEC)&0x7F];
-			X<<=8;
-			X>>=8;
+			X = DSP->TEMP[(TRA + DSP->DEC) & 0x7F];
+			X <<= 8;
+			X >>= 8;
 			//if(X&0x00800000)
 			//	X|=0xFF000000;
 		}
 
 		//Y
-		if(YSEL==0)
-			Y=FRC_REG;
-		else if(YSEL==1)
-			Y=DSP->COEF[COEF]>>3;	//COEF is 16 bits
-		else if(YSEL==2)
-			Y=(Y_REG>>11)&0x1FFF;
-		else if(YSEL==3)
-			Y=(Y_REG>>4)&0x0FFF;
+		if (YSEL == 0)
+			Y = FRC_REG;
+		else if (YSEL == 1)
+			Y = DSP->COEF[COEF] >> 3;   //COEF is 16 bits
+		else if (YSEL == 2)
+			Y = (Y_REG >> 11) & 0x1FFF;
+		else if (YSEL == 3)
+			Y = (Y_REG >> 4) & 0x0FFF;
 
-		if(YRL)
-			Y_REG=INPUTS;
+		if (YRL)
+			Y_REG = INPUTS;
 
 		//Shifter
-		if(SHIFT==0)
+		if (SHIFT == 0)
 		{
-			SHIFTED=ACC;
-			if(SHIFTED>0x007FFFFF)
-				SHIFTED=0x007FFFFF;
-			if(SHIFTED<(-0x00800000))
-				SHIFTED=-0x00800000;
+			SHIFTED = ACC;
+			if (SHIFTED > 0x007FFFFF)
+				SHIFTED = 0x007FFFFF;
+			if (SHIFTED < (-0x00800000))
+				SHIFTED = -0x00800000;
 		}
-		else if(SHIFT==1)
+		else if (SHIFT == 1)
 		{
-			SHIFTED=ACC*2;
-			if(SHIFTED>0x007FFFFF)
-				SHIFTED=0x007FFFFF;
-			if(SHIFTED<(-0x00800000))
-				SHIFTED=-0x00800000;
+			SHIFTED = ACC * 2;
+			if (SHIFTED > 0x007FFFFF)
+				SHIFTED = 0x007FFFFF;
+			if (SHIFTED < (-0x00800000))
+				SHIFTED = -0x00800000;
 		}
-		else if(SHIFT==2)
+		else if (SHIFT == 2)
 		{
-			SHIFTED=ACC*2;
-			SHIFTED<<=8;
-			SHIFTED>>=8;
+			SHIFTED = ACC * 2;
+			SHIFTED <<= 8;
+			SHIFTED >>= 8;
 			//SHIFTED&=0x00FFFFFF;
 			//if(SHIFTED&0x00800000)
 			//	SHIFTED|=0xFF000000;
 		}
-		else if(SHIFT==3)
+		else if (SHIFT == 3)
 		{
-			SHIFTED=ACC;
-			SHIFTED<<=8;
-			SHIFTED>>=8;
+			SHIFTED = ACC;
+			SHIFTED <<= 8;
+			SHIFTED >>= 8;
 			//SHIFTED&=0x00FFFFFF;
 			//if(SHIFTED&0x00800000)
 			//	SHIFTED|=0xFF000000;
 		}
 
 		//ACCUM
-		Y<<=19;
-		Y>>=19;
+		Y <<= 19;
+		Y >>= 19;
 		//if(Y&0x1000)
 		//	Y|=0xFFFFF000;
 
-		INT64 v=(((INT64) X*(INT64) Y)>>12);
-		ACC=(int) v+B;
+		v = (((INT64)X*(INT64)Y) >> 12);
+		ACC = (int)v + B;
 
-		if(TWT)
-			DSP->TEMP[(TWA+DSP->DEC)&0x7F]=SHIFTED;
+		if (TWT)
+			DSP->TEMP[(TWA + DSP->DEC) & 0x7F] = SHIFTED;
 
-		if(FRCL)
+		if (FRCL)
 		{
-			if(SHIFT==3)
-				FRC_REG=SHIFTED&0x0FFF;
+			if (SHIFT == 3)
+				FRC_REG = SHIFTED & 0x0FFF;
 			else
-				FRC_REG=(SHIFTED>>11)&0x1FFF;
+				FRC_REG = (SHIFTED >> 11) & 0x1FFF;
 		}
 
-		if(MRD || MWT)
-		//if(0)
+		if (MRD || MWT)
+			//if(0)
 		{
-			ADDR=DSP->MADRS[MASA];
-			if(!TABLE)
-				ADDR+=DSP->DEC;
-			if(ADREB)
-				ADDR+=ADRS_REG&0x0FFF;
-			if(NXADR)
+			ADDR = DSP->MADRS[MASA];
+			if (!TABLE)
+				ADDR += DSP->DEC;
+			if (ADREB)
+				ADDR += ADRS_REG & 0x0FFF;
+			if (NXADR)
 				ADDR++;
-			if(!TABLE)
-				ADDR&=DSP->RBL-1;
+			if (!TABLE)
+				ADDR &= DSP->RBL - 1;
 			else
-				ADDR&=0xFFFF;
+				ADDR &= 0xFFFF;
 			//ADDR<<=1;
 			//ADDR+=DSP->RBP<<13;
 			//MEMVAL=DSP->SCSPRAM[ADDR>>1];
-			ADDR+=DSP->RBP<<12;
-
-			if(MWT && (step&1))
+			ADDR += DSP->RBP << 12;
+			if (ADDR > 0x7ffff) ADDR = 0;
+			if (MRD && (step & 1)) //memory only allowed on odd? DoA inserts NOPs on even
 			{
-				if(NOFL)
-					DSP->SCSPRAM[ADDR]=SHIFTED>>8;
+				if (NOFL)
+					MEMVAL = DSP->SCSPRAM[ADDR] << 8;
 				else
-					DSP->SCSPRAM[ADDR]=PACK(SHIFTED);
+					MEMVAL = UNPACK(DSP->SCSPRAM[ADDR]);
 			}
-
-			if(MRD && (step&1))	//memory only allowed on odd? DoA inserts NOPs on even
+			if (MWT && (step & 1))
 			{
-				if(NOFL)
-					MEMVAL=DSP->SCSPRAM[ADDR]<<8;
+				if (NOFL)
+					DSP->SCSPRAM[ADDR] = SHIFTED >> 8;
 				else
-					MEMVAL=UNPACK(DSP->SCSPRAM[ADDR]);
-				if(MEMVAL)
-					int a=1;
-
+					DSP->SCSPRAM[ADDR] = PACK(SHIFTED);
 			}
-			
 		}
 
-		if(ADRL)
+		if (ADRL)
 		{
-			if(SHIFT==3)
-				ADRS_REG=(SHIFTED>>12)&0xFFF;
+			if (SHIFT == 3)
+				ADRS_REG = (SHIFTED >> 12) & 0xFFF;
 			else
-				ADRS_REG=(INPUTS>>16);
+				ADRS_REG = (INPUTS >> 16);
 		}
 
-		if(EWT)
-			DSP->EFREG[EWA]+=SHIFTED>>8;
+		if (EWT)
+			DSP->EFREG[EWA] += SHIFTED >> 8;
 
 	}
 	--DSP->DEC;
-	memset(DSP->MIXS,0,4*16);
-	if(f)
-		fclose(f);
-}
-#else
-
-FILE *f=NULL;
-
-void SCSPDSP_Step(_SCSPDSP *DSP)
-{
-	if(DSP->Stopped)
-		return;
-
-	memset(DSP->EFREG,0,2*16);
-	
-	assert(DSP->DoSteps!=NULL);
-
-	int dump=0;
-	if(dump)
-		f=fopen("dsp2.txt","wt");
-
-	DSP->DoSteps();
-
-	if(f)
-	{
-		fclose(f);
-		f=NULL;
-	}
-
-	--DSP->DEC;
-	memset(DSP->MIXS,0,4*16);
-
+	memset(DSP->MIXS, 0, 4 * 16);
 }
 
-void __fastcall dumpreg(_SCSPDSP *DSP)
+void SCSPDSP_SetSample(_SCSPDSP *DSP, INT32 sample, int SEL, int MXL)
 {
-	static int n=0;
-	//f=fopen("dsp2.txt","a+t");
-		if(f)
-		{
-#define DUMP(v)	fprintf(f," " #v ": %04X",DSP->v);
-			
-			fprintf(f,"%d: ",n++);
-			DUMP(ACC);
-			DUMP(SHIFTED);
-			DUMP(X);
-			DUMP(Y);
-			DUMP(B);
-			DUMP(INPUTS);
-			DUMP(MEMVAL);
-			DUMP(FRC_REG);
-			DUMP(Y_REG);
-			DUMP(ADDR);
-			DUMP(ADRS_REG);
-			fprintf(f,"\n");
-		}
-}
-
-#define EMIT8(x) *PtrInsts=x; ++PtrInsts;
-#define EMIT16(x) *((unsigned short *) PtrInsts)=x; PtrInsts+=2;
-#define EMIT32(x) *((unsigned int *) PtrInsts)=x; PtrInsts+=4;
-
-#define MOV_EAXTOMEM(addr)	EMIT8(0xA3); EMIT32((unsigned int) addr);
-#define MOV_MEMTOEAX(addr)	EMIT8(0xA1); EMIT32((unsigned int) addr);
-#define MOV_MEMTOAX(addr)	EMIT8(0x66); EMIT8(0xA1); EMIT32((unsigned int) addr);
-#define MOV_MEMTOEBX(addr)	EMIT8(0x8B); EMIT8(0x1D); EMIT32((unsigned int) addr);
-#define ADD_MEMTOEAX(addr)	EMIT8(0x03); EMIT8(0x05); EMIT32((unsigned int) addr);
-#define ADD_EAXTOMEM(addr)	EMIT8(0x01); EMIT8(0x05); EMIT32((unsigned int) addr);
-#define ADD_AXTOMEM(addr)	EMIT8(0x66); EMIT8(0x01); EMIT8(0x05); EMIT32((unsigned int) addr);
-#define MOV_IMMTOEAX(imm)	EMIT8(0xB8); EMIT32((unsigned int) imm);
-#define MOV_IMMTOECX(imm)	EMIT8(0xB9); EMIT32((unsigned int) imm);
-#define ADD_IMMTOEAX(imm)	EMIT8(0x05); EMIT32((unsigned int) imm);
-#define ADD_EBXTOEAX()		EMIT8(0x03); EMIT8(0xC3);
-#define CMP_IMMTOEAX(imm)	EMIT8(0x3D); EMIT32((unsigned int) imm);
-#define MOV_0TOEAX()		EMIT8(0x33); EMIT8(0xC0);
-#define MOV_EAXTOEBX()		EMIT8(0x8B); EMIT8(0xD8);
-#define MOV_EAXTOECX()		EMIT8(0x8B); EMIT8(0xC8);
-#define NEG_EAX()			EMIT8(0xF7); EMIT8(0xD8);
-#define INC_EAX()			EMIT8(0x40);
-#define MOV_MEMEAXTOEAX()	EMIT8(0x8b); EMIT8(0x00);
-#define MOV_MEMEAXTOAX()	EMIT8(0x66); EMIT8(0x8b); EMIT8(0x00);
-#define MOV_EBXTOMEMEAX()	EMIT8(0x89); EMIT8(0x18);
-#define MOV_EAXTOMEMEBX()	EMIT8(0x89); EMIT8(0x03);
-#define MOV_AXTOMEMEBX()	EMIT8(0x66); EMIT8(0x89); EMIT8(0x03);
-#define SHL_EAX(count)		EMIT8(0xC1); EMIT8(0xE0); EMIT8(count);
-#define SHL_EBX(count)		EMIT8(0xC1); EMIT8(0xE3); EMIT8(count);
-#define SHL_EDX(count)		EMIT8(0xC1); EMIT8(0xE2); EMIT8(count);
-#define SHRD_EAX_EDX(count)	EMIT8(0x0F); EMIT8(0xAC); EMIT8(0xD0); EMIT8(count);
-#define SAR_EAX(count)		EMIT8(0xC1); EMIT8(0xF8); EMIT8(count);
-#define SHR_EAX(count)		EMIT8(0xC1); EMIT8(0xE8); EMIT8(count);
-#define AND_EAX(mask)		EMIT8(0x25); EMIT32(mask);
-#define AND_EBX(mask)		EMIT8(0x81); EMIT8(0xE3); EMIT32(mask);
-#define AND_EAX_EBX()		EMIT8(0x23); EMIT8(0xC3);
-#define DEC_EBX()			EMIT8(0x4B);
-#define OR_EAX_EDX()		EMIT8(0x0B); EMIT8(0xC2);
-#define ADD_EAX_ECX()		EMIT8(0x03); EMIT8(0xC1);
-#define IMUL_EAX_EBX()		EMIT8(0xF7); EMIT8(0xEB);
-//#define IMUL_EAX_EBX()		EMIT8(0xF7); EMIT8(0xE3);
-#define RET()				EMIT8(0xC3);
-#define PUSHA()				EMIT8(0x51); EMIT8(0x52); EMIT8(0x53); EMIT8(0x56); //ecx edx ebx esi
-#define POPA()				EMIT8(0x5E); EMIT8(0x5B); EMIT8(0x5A); EMIT8(0x59);	//esi ebx edx ecx
-
-struct _INST
-{
-	unsigned int TRA;
-	unsigned int TWT;
-	unsigned int TWA;
-	
-	unsigned int XSEL;
-	unsigned int YSEL;
-	unsigned int IRA;
-	unsigned int IWT;
-	unsigned int IWA;
-
-	unsigned int TABLE;
-	unsigned int MWT;
-	unsigned int MRD;
-	unsigned int EWT;
-	unsigned int EWA;
-	unsigned int ADRL;
-	unsigned int FRCL;
-	unsigned int SHIFT;
-	unsigned int YRL;
-	unsigned int NEGB;
-	unsigned int ZERO;
-	unsigned int BSEL;
-
-	unsigned int NOFL;
-	unsigned int COEF;
-	
-	unsigned int MASA;
-	unsigned int ADREB;
-	unsigned int NXADR;
-};
-
-void DecodeInst(unsigned short *IPtr,_INST *i)
-{
-	i->TRA=(IPtr[0]>>8)&0x7F;
-	i->TWT=(IPtr[0]>>7)&0x01;
-	i->TWA=(IPtr[0]>>0)&0x7F;
-	
-	i->XSEL=(IPtr[1]>>15)&0x01;
-	i->YSEL=(IPtr[1]>>13)&0x03;
-	i->IRA=(IPtr[1]>>6)&0x3F;
-	i->IWT=(IPtr[1]>>5)&0x01;
-	i->IWA=(IPtr[1]>>0)&0x1F;
-
-	i->TABLE=(IPtr[2]>>15)&0x01;
-	i->MWT=(IPtr[2]>>14)&0x01;
-	i->MRD=(IPtr[2]>>13)&0x01;
-	i->EWT=(IPtr[2]>>12)&0x01;
-	i->EWA=(IPtr[2]>>8)&0x0F;
-	i->ADRL=(IPtr[2]>>7)&0x01;
-	i->FRCL=(IPtr[2]>>6)&0x01;
-	i->SHIFT=(IPtr[2]>>4)&0x03;
-	i->YRL=(IPtr[2]>>3)&0x01;
-	i->NEGB=(IPtr[2]>>2)&0x01;
-	i->ZERO=(IPtr[2]>>1)&0x01;
-	i->BSEL=(IPtr[2]>>0)&0x01;
-
-	i->NOFL=(IPtr[3]>>15)&1;		//????
-	i->COEF=(IPtr[3]>>9)&0x3f;
-	
-	i->MASA=(IPtr[3]>>2)&0x1f;	//???
-	i->ADREB=(IPtr[3]>>1)&0x1;
-	i->NXADR=(IPtr[3]>>0)&0x1;
-}
-
-#define USES_SHIFTED(inst)	(inst.TWT || inst.FRCL || inst.MWT || inst.ADRL || inst.EWT)
-
-void SCSPDSP_Recompile(_SCSPDSP *DSP)
-{
-	if(DSP->DoSteps)
-		free(DSP->DoSteps);
-	DSP->DoSteps=(void (*)()) malloc(DYNBUF);
-	unsigned char *PtrInsts=(unsigned char *)DSP->DoSteps;
-
-	PUSHA();
-
-	for(int step=0;step</*128*/DSP->LastStep;++step)
-	{
-		unsigned short *IPtr=&(DSP->MPRO[step*4]);
-		_INST ThisInst,NextInst;
-		DecodeInst(IPtr,&ThisInst);
-		DecodeInst(IPtr+4,&NextInst);
-		
-
-
-		EMIT8(0x90);
-
-		MOV_IMMTOECX(DSP);
-		MOV_IMMTOEAX(dumpreg);
-		EMIT8(0xFF); EMIT8(0xD0);
-
-		//INPUTS RW
-		assert(ThisInst.IRA<0x32);
-		if((ThisInst.XSEL || ThisInst.YRL || ThisInst.ADRL) || !DYNOPT)
-		{
-			if(ThisInst.IRA<=0x1f)
-			{
-				//INPUTS=DSP->MEMS[IRA];	
-				MOV_MEMTOEAX(&(DSP->MEMS[ThisInst.IRA]));
-				SHL_EAX(8);
-			}
-			else if(ThisInst.IRA<=0x2F)
-			{
-				//INPUTS=DSP->MIXS[IRA-0x20]<<8;	//MIXS is 16 bit
-				MOV_MEMTOEAX(&(DSP->MIXS[ThisInst.IRA-0x20]));
-				SHL_EAX(8);
-			}
-			else if(ThisInst.IRA<=0x31)
-			{
-				MOV_MEMTOEAX(&(DSP->EXTS[ThisInst.IRA-0x30]));
-				SHL_EAX(8);				
-			}
-			else
-			{
-				MOV_0TOEAX();
-			}
-			SAR_EAX(8);
-			
-			MOV_EAXTOMEM(&(DSP->INPUTS));
-		}
-
-		if(ThisInst.IWT)
-		{
-			MOV_MEMTOEAX(&DSP->MEMVAL);
-			MOV_EAXTOMEM(&DSP->MEMS[ThisInst.IWA]);
-			//DSP->MEMS[IWA]=MEMVAL;	//MEMVAL was selected in previous MRD
-			if(ThisInst.IRA==ThisInst.IWA)
-			{
-				//INPUTS=MEMVAL;
-				MOV_EAXTOMEM(&DSP->INPUTS);
-			}
-		}
-		
-		if((USES_SHIFTED(NextInst) || NextInst.BSEL) || !DYNOPT)
-		{
-			//Operand sel
-			//B
-			if(!ThisInst.ZERO)
-			{
-				if(ThisInst.BSEL)
-				{
-					//B=ACC;
-					MOV_MEMTOEAX(&DSP->ACC);
-	//				MOV_EAXTOMEM(&DSP->B);	//
-				}
-				else
-				{
-					MOV_IMMTOEAX(ThisInst.TRA);
-					ADD_MEMTOEAX(&(DSP->DEC));
-					AND_EAX(0x7F);
-					SHL_EAX(2);
-					ADD_IMMTOEAX(&(DSP->TEMP));
-					MOV_MEMEAXTOEAX();
-					SHL_EAX(8);
-					SAR_EAX(8);
-	//				MOV_EAXTOMEM(&DSP->B);	//
-
-
-					//B=DSP->TEMP[(TRA+DSP->DEC)&0x7F];
-					//B<<=8;
-					//B>>=8;
-					//if(B&0x00800000)
-					//	B|=0xFF000000;	//Sign extend
-				}
-				if(ThisInst.NEGB)
-				{
-					//B=0-B;
-					NEG_EAX();
-				}
-			}
-			else
-			{
-				MOV_0TOEAX();
-			}
-			//MOV_EAXTOMEM(&DSP->B);
-			MOV_EAXTOECX();
-
-			//X
-			if(ThisInst.XSEL)
-			{
-				//X=INPUTS;
-				MOV_MEMTOEAX(&DSP->INPUTS);
-	//			MOV_EAXTOMEM(&DSP->X);	//
-			}
-			else
-			{
-				//X=DSP->TEMP[(TRA+DSP->DEC)&0x7F];
-				//X<<=8;
-				//X>>=8;
-				MOV_IMMTOEAX(ThisInst.TRA);
-				ADD_MEMTOEAX(&(DSP->DEC));
-				AND_EAX(0x7F);
-				SHL_EAX(2);
-				ADD_IMMTOEAX(&(DSP->TEMP));
-				MOV_MEMEAXTOEAX();
-				SHL_EAX(8);
-				SAR_EAX(8);
-				//if(X&0x00800000)
-				//	X|=0xFF000000;
-	//			MOV_EAXTOMEM(&DSP->X);	//
-			}
-			MOV_EAXTOEBX();
-		}
-
-
-		//if(TWT || /*MRD ||*/ MWT || EWT || ADRL || FRCL)
-		if(USES_SHIFTED(ThisInst) || !DYNOPT)
-		{
-			if(ThisInst.SHIFT==0)
-			{
-				MOV_MEMTOEAX(&DSP->ACC);
-				CMP_IMMTOEAX(0x007FFFFF);
-				EMIT8(0x7E); EMIT8(0x05);	//JLE
-				MOV_IMMTOEAX(0x007FFFFF);
-				CMP_IMMTOEAX(0xFF800000);
-				EMIT8(0x7D); EMIT8(0x05);	//JGE
-				MOV_IMMTOEAX(0xFF800000);
-
-
-				//SHIFTED=ACC;
-				//if(SHIFTED>0x007FFFFF)
-				//	SHIFTED=0x007FFFFF;
-				//if(SHIFTED<(-0x00800000))
-				//	SHIFTED=-0x00800000;
-			}
-			else if(ThisInst.SHIFT==1)
-			{
-				//SHIFTED=ACC*2;
-				MOV_MEMTOEAX(&DSP->ACC);
-				SHL_EAX(1);
-				CMP_IMMTOEAX(0x007FFFFF);
-				EMIT8(0x7E); EMIT8(0x05);	//JLE
-				MOV_IMMTOEAX(0x007FFFFF);
-				CMP_IMMTOEAX(0xFF800000);
-				EMIT8(0x7D); EMIT8(0x05);	//JGE
-				MOV_IMMTOEAX(0xFF800000);
-				//if(SHIFTED>0x007FFFFF)
-				//	SHIFTED=0x007FFFFF;
-				//if(SHIFTED<(-0x00800000))
-				//	SHIFTED=-0x00800000;
-			}
-			else if(ThisInst.SHIFT==2)
-			{
-				//SHIFTED=ACC*2;
-				//SHIFTED<<=8;
-				//SHIFTED>>=8;
-				MOV_MEMTOEAX(&DSP->ACC);
-				SHL_EAX(9);
-				SAR_EAX(8);
-				//SHIFTED&=0x00FFFFFF;
-				//if(SHIFTED&0x00800000)
-				//	SHIFTED|=0xFF000000;
-			}
-			else if(ThisInst.SHIFT==3)
-			{
-				//SHIFTED=ACC;
-				//SHIFTED<<=8;
-				//SHIFTED>>=8;
-				MOV_MEMTOEAX(&DSP->ACC);
-				SHL_EAX(8);
-				SAR_EAX(8);
-				//SHIFTED&=0x00FFFFFF;
-				//if(SHIFTED&0x00800000)
-				//	SHIFTED|=0xFF000000;
-			}
-			MOV_EAXTOMEM(&DSP->SHIFTED);
-		}
-
-		if((USES_SHIFTED(NextInst) || NextInst.BSEL) || !DYNOPT)
-		{
-			//Y
-			if(ThisInst.YSEL==0)
-			{
-				//Y=FRC_REG;
-				MOV_MEMTOEAX(&DSP->FRC_REG);
-			}
-			else if(ThisInst.YSEL==1)
-			{
-				//MOV_0TOEAX();
-				MOV_MEMTOAX(&DSP->COEF[ThisInst.COEF]);
-				SAR_EAX(3);
-				//Y=DSP->COEF[COEF]>>3;	//COEF is 16 bits
-			}
-			else if(ThisInst.YSEL==2)
-			{
-				//Y=(Y_REG>>11)&0x1FFF;
-				MOV_MEMTOEAX(&DSP->Y_REG);
-				SAR_EAX(11);
-				AND_EAX(0x1FFF);
-			}
-			else if(ThisInst.YSEL==3)
-			{
-				//Y=(Y_REG>>4)&0x0FFF;
-				MOV_MEMTOEAX(&DSP->Y_REG);
-				SAR_EAX(4);
-				AND_EAX(0x0FFF);
-			}
-
-			SHL_EAX(19);
-			SAR_EAX(19);
-	//		MOV_EAXTOMEM(&DSP->Y);	//
-
-
-
-			//X:EBX
-			//B:ECX
-			//Y:EAX
-			IMUL_EAX_EBX();
-	//		SHR_EAX(12);
-	//		SHL_EDX((32-12));
-			SHRD_EAX_EDX(12);
-			ADD_EAX_ECX();
-
-			MOV_EAXTOMEM(&DSP->ACC);
-		}
-
-		if(ThisInst.YRL)
-		{
-			MOV_MEMTOEAX(&DSP->INPUTS);
-			MOV_EAXTOMEM(&DSP->Y_REG);
-			//Y_REG=INPUTS;
-		}
-
-		if(ThisInst.TWT)
-		{
-			MOV_MEMTOEAX(&DSP->SHIFTED);
-			MOV_EAXTOEBX();
-			//DSP->TEMP[(TWA+DSP->DEC)&0x7F]=SHIFTED;
-			MOV_IMMTOEAX(ThisInst.TWA);
-			ADD_MEMTOEAX(&(DSP->DEC));
-			AND_EAX(0x7F);
-			SHL_EAX(2);
-			ADD_IMMTOEAX(&(DSP->TEMP));
-			MOV_EBXTOMEMEAX();
-		}
-
-		if(ThisInst.FRCL)
-		{
-			if(ThisInst.SHIFT==3)
-			{
-				//FRC_REG=SHIFTED&0x0FFF;
-				MOV_MEMTOEAX(&DSP->SHIFTED);
-				AND_EAX(0x0FFF);
-				MOV_EAXTOMEM(&DSP->FRC_REG);
-			}
-			else
-			{
-				//FRC_REG=(SHIFTED>>11)&0x1FFF;
-				MOV_MEMTOEAX(&DSP->SHIFTED);
-				SHR_EAX(11);
-				AND_EAX(0x1FFF);
-				MOV_EAXTOMEM(&DSP->FRC_REG);
-			}
-		}
-
-		//MEM
-		if(ThisInst.MRD || ThisInst.MWT)
-		//if(0)
-		{
-			MOV_0TOEAX();
-			MOV_MEMTOAX(&DSP->MADRS[ThisInst.MASA]);
-			//ADDR=DSP->MADRS[MASA];
-			if(!ThisInst.TABLE)
-			{
-				//ADDR+=DSP->DEC;
-				//ADD_MEMTOEAX(&DSP->DEC);
-				MOV_MEMTOEBX(&DSP->DEC);
-				ADD_EBXTOEAX();
-			}
-			if(ThisInst.ADREB)
-			{
-				MOV_MEMTOEBX(&(DSP->ADRS_REG));
-				AND_EBX(0x0FFF);
-				ADD_EBXTOEAX();
-				//ADDR+=ADRS_REG&0x0FFF;
-			}
-			if(ThisInst.NXADR)
-			{
-				//ADDR++;
-				INC_EAX();
-			}
-			if(!ThisInst.TABLE)
-			{
-				MOV_MEMTOEBX(&(DSP->RBL));
-				DEC_EBX();
-				//ADDR&=DSP->RBL-1;
-				AND_EAX_EBX();
-
-				//AND_EAX((DSP->RBL-1));
-			}
-			else
-			{
-				//ADDR&=0xFFFF;
-				AND_EAX(0xFFFF);
-			}
-
-			//ADDR+=DSP->RBP<<12;
-			MOV_MEMTOEBX(&(DSP->RBP));
-			SHL_EBX(12);
-			ADD_EBXTOEAX();
-
-			assert(!(ThisInst.MRD && ThisInst.MWT));	//this shouldn't happen, read & write in the same cycle
-
-			if(ThisInst.MWT && (step&1))
-			{
-				if(ThisInst.NOFL)
-				{
-					SHL_EAX(1);
-					ADD_IMMTOEAX(DSP->SCSPRAM);
-					MOV_EAXTOEBX();
-					MOV_MEMTOEAX(&DSP->SHIFTED);
-					SHR_EAX(8);
-					MOV_AXTOMEMEBX();
-					//DSP->SCSPRAM[ADDR]=SHIFTED>>8;
-				}
-				else
-				{
-#ifdef USEFLOATPACK
-					SHL_EAX(1);
-					ADD_IMMTOEAX(DSP->SCSPRAM);
-					EMIT8(0x8B); EMIT8(0xF0);	//mov esi,eax
-					MOV_MEMTOEAX(&DSP->SHIFTED);
-					SHR_EAX(8);
-					memcpy(PtrInsts,PackFunc,sizeof(PackFunc));
-					PtrInsts+=sizeof(PackFunc);
-#else
-					SHL_EAX(1);
-					ADD_IMMTOEAX(DSP->SCSPRAM);
-					MOV_EAXTOEBX();
-					MOV_MEMTOEAX(&DSP->SHIFTED);
-					SHR_EAX(8);
-					MOV_AXTOMEMEBX();
-#endif
-					//DSP->SCSPRAM[ADDR]=PACK(SHIFTED);
-				}
-			}
-
-			if(ThisInst.MRD && (step&1))	//memory only allowed on odd? DoA inserts NOPs on even
-			{
-				if(ThisInst.NOFL)
-				{
-					//MEMVAL=DSP->SCSPRAM[ADDR]<<8;
-					SHL_EAX(1);
-					ADD_IMMTOEAX(DSP->SCSPRAM);
-					MOV_MEMEAXTOAX();
-					SHL_EAX(8);
-					//MOV_EAXTOMEM(&DSP->MEMVAL);
-				}
-				else
-				{
-					//MEMVAL=UNPACK(DSP->SCSPRAM[ADDR]);
-					SHL_EAX(1);
-					ADD_IMMTOEAX(DSP->SCSPRAM);
-					MOV_MEMEAXTOAX();
-#ifdef USEFLOATPACK
-					memcpy(PtrInsts,UnpackFunc,sizeof(UnpackFunc));
-					PtrInsts+=sizeof(UnpackFunc);
-#else
-					SHL_EAX(16);
-					SAR_EAX(8);
-#endif
-					MOV_EAXTOMEM(&DSP->MEMVAL);
-				}
-			}
-			
-			
-			
-		}
-
-		if(ThisInst.ADRL)
-		{
-			if(ThisInst.SHIFT==3)
-			{
-				MOV_MEMTOEAX(&DSP->SHIFTED);
-				SAR_EAX(12);
-				AND_EAX(0xFFF);
-				MOV_EAXTOMEM(&DSP->ADRS_REG);
-				//ADRS_REG=(SHIFTED>>12)&0xFFF;
-			}
-			else
-			{
-				MOV_MEMTOEAX(&DSP->INPUTS);
-				SAR_EAX(16);
-				MOV_EAXTOMEM(&DSP->ADRS_REG);
-				//ADRS_REG=(INPUTS>>16);
-			}
-		}
-
-		if(ThisInst.EWT)
-		{
-			MOV_MEMTOEAX(&DSP->SHIFTED);
-			SAR_EAX(8);
-			ADD_AXTOMEM(&DSP->EFREG[ThisInst.EWA]);
-			//DSP->EFREG[EWA]+=SHIFTED>>8;
-		}
-//		EMIT8(0x90);
-//		EMIT8(0xCC);
-		
-	}
-
-	POPA();
-
-	RET();
-
-	FILE *f=fopen("dsp.rec","wb");
-	fwrite(DSP->DoSteps,1,PtrInsts-(unsigned char *)DSP->DoSteps,f);
-	fclose(f);
-}
-
-#endif
-void SCSPDSP_SetSample(_SCSPDSP *DSP,signed int sample,int SEL,int MXL)
-{
-//	if(MXL!=6)
-//		return;
-	//16 to 24
-	DSP->MIXS[SEL]+=sample<<(MXL+1)/*7*/;
-//	DSP->MIXS[SEL]+=sample<<7;
-	if(MXL)
-	{
-		int a=1;
-		if(MXL!=6)
-			int a=1;
-	}
-
+	//DSP->MIXS[SEL]+=sample<<(MXL+1)/*7*/;
+	DSP->MIXS[SEL] += sample;
+	//	if(MXL)
+	//		int a=1;
 }
 
 void SCSPDSP_Start(_SCSPDSP *DSP)
 {
 	int i;
-	DSP->Stopped=false;
-	for(i=127;i>=0;--i)
+	DSP->Stopped = 0;
+	for (i = 127; i >= 0; --i)
 	{
-		unsigned short *IPtr=&(DSP->MPRO[i*4]);
+		UINT16 *IPtr = DSP->MPRO + i * 4;
 
-		if(IPtr[0]!=0 || IPtr[1]!=0 || IPtr[2]!=0 || IPtr[3]!=0)
+		if (IPtr[0] != 0 || IPtr[1] != 0 || IPtr[2] != 0 || IPtr[3] != 0)
 			break;
 	}
-	DSP->LastStep=i+1;
+	DSP->LastStep = i + 1;
 
 /*
 	int test=0;
diff --git a/Src/Sound/SCSPDSP.h b/Src/Sound/SCSPDSP.h
index 2206d4d..5c6b82a 100644
--- a/Src/Sound/SCSPDSP.h
+++ b/Src/Sound/SCSPDSP.h
@@ -38,6 +38,7 @@ struct _SCSPDSP
 {
 //Config
 	UINT16 *SCSPRAM;
+	UINT32 SCSPRAM_LENGTH;
 	unsigned int RBP;	//Ring buf pointer
 	unsigned int RBL;	//Delay ram (Ring buffer) size in words
 
diff --git a/Src/Sound/SCSPLFO.cpp b/Src/Sound/SCSPLFO.cpp
index d297f73..297f400 100644
--- a/Src/Sound/SCSPLFO.cpp
+++ b/Src/Sound/SCSPLFO.cpp
@@ -29,150 +29,150 @@
 #include <cmath>
 #include <cstdlib>
 
-#define LFO_SHIFT 	8
-
-struct _LFO
-{
-    unsigned short phase;
-    DWORD phase_step;
-    int *table;
-    int *scale;
-};
-
-#define LFIX(v)	((unsigned int) ((float) (1<<LFO_SHIFT)*(v)))
-
-//Convert DB to multiply amplitude
-#define DB(v) 	LFIX(pow(10.0,(float) (v)/20.0))
-
-//Convert cents to step increment
-#define CENTS(v) LFIX(pow(2.0,(float) (v)/1200.0))
-
-static int PLFO_TRI[256],PLFO_SQR[256],PLFO_SAW[256],PLFO_NOI[256];
-static int ALFO_TRI[256],ALFO_SQR[256],ALFO_SAW[256],ALFO_NOI[256];
-static float LFOFreq[32]={0.17f,0.19f,0.23f,0.27f,0.34f,0.39f,0.45f,0.55f,0.68f,0.78f,0.92f,1.10f,1.39f,1.60f,1.87f,2.27f,
-			  2.87f,3.31f,3.92f,4.79f,6.15f,7.18f,8.60f,10.8f,14.4f,17.2f,21.5f,28.7f,43.1f,57.4f,86.1f,172.3f};
-static float ASCALE[8]={0.0f,0.4f,0.8f,1.5f,3.0f,6.0f,12.0f,24.0f};
-static float PSCALE[8]={0.0f,7.0f,13.5f,27.0f,55.0f,112.0f,230.0f,494.0f};
-static int PSCALES[8][256];
-static int ASCALES[8][256];
-
-void LFO_Init()
-{
-    int i;
-    for(i=0;i<256;++i)
-    {
-		int a,p;
-		float TL;
-		//Saw
-		a=255-i;
-		if(i<128)
-			p=i;
-		else
-			p=255-i;    
-		ALFO_SAW[i]=a;
-		PLFO_SAW[i]=p;
-	
-		//Square
-		if(i<128)
-		{
-			a=255;
-			p=127;
-		}
-		else
-		{
-			a=0;
-			p=-128;
-		}
-		ALFO_SQR[i]=a;
-		PLFO_SQR[i]=p;
-	
-		//Tri
-		if(i<128)
-			a=255-(i*2);
-		else
-			a=(i*2)-256;
-		if(i<64)
-			p=i*2;
-		else if(i<128)
-			p=255-i*2;
-		else if(i<192)
-			p=256-i*2;
-		else
-			p=i*2-511;
-		ALFO_TRI[i]=a;
-		PLFO_TRI[i]=p;
-	
-		//noise
-		//a=lfo_noise[i];
-		a=rand()&0xff;
-		p=128-a;
-		ALFO_NOI[i]=a;
-		PLFO_NOI[i]=p;
-    }
-
-	for(int s=0;s<8;++s)
-	{
-		float limit=PSCALE[s];
-		for(i=-128;i<128;++i)
-		{
-			PSCALES[s][i+128]=CENTS(((limit*((float) i))/128.0));
-		}
-		limit=-ASCALE[s];
-		for(i=0;i<256;++i)
-		{
-			ASCALES[s][i]=DB(((limit*(float) i)/256.0));
-		}
-	}
-}
-
-signed int inline PLFO_Step(_LFO *LFO)
-{
-	int p;
-    LFO->phase+=LFO->phase_step;    
-#if LFO_SHIFT!=8    
-    LFO->phase&=(1<<(LFO_SHIFT+8))-1;
-#endif    
-    p=LFO->table[LFO->phase>>LFO_SHIFT];
-	p=LFO->scale[p+128];
-	return p<<(SHIFT-LFO_SHIFT);
-}
-
-signed int inline ALFO_Step(_LFO *LFO)
-{
-	int p;
-    LFO->phase+=LFO->phase_step;    
-#if LFO_SHIFT!=8    
-    LFO->phase&=(1<<(LFO_SHIFT+8))-1;
-#endif    
-    p=LFO->table[LFO->phase>>LFO_SHIFT];
-	p=LFO->scale[p];
-	return p<<(SHIFT-LFO_SHIFT);
-}
-
-void LFO_ComputeStep(_LFO *LFO,DWORD LFOF,DWORD LFOWS,DWORD LFOS,int ALFO)
-{
-    float step=(float) LFOFreq[LFOF]*256.0f/(float) srate;
-    LFO->phase_step=(unsigned int) ((float) (1<<LFO_SHIFT)*step);
-    if(ALFO)
-    {
-		switch(LFOWS)
-		{
-			case 0: LFO->table=ALFO_SAW; break;
-			case 1: LFO->table=ALFO_SQR; break;
-			case 2: LFO->table=ALFO_TRI; break;
-			case 3: LFO->table=ALFO_NOI; break;
-		}
-		LFO->scale=ASCALES[LFOS];
-	}
-	else
-	{
-		switch(LFOWS)
-		{
-		    case 0: LFO->table=PLFO_SAW; break;
-		    case 1: LFO->table=PLFO_SQR; break;
-			case 2: LFO->table=PLFO_TRI; break;
-		    case 3: LFO->table=PLFO_NOI; break;
-		}
-		LFO->scale=PSCALES[LFOS];
-	}
-}
+#define LFO_SHIFT 	8
+
+struct _LFO
+{
+	unsigned short phase;
+	UINT32 phase_step;
+	int *table;
+	int *scale;
+};
+
+#define LFIX(v)	((unsigned int) ((float) (1<<LFO_SHIFT)*(v)))
+
+//Convert DB to multiply amplitude
+#define DB(v) 	LFIX(pow(10.0,v/20.0))
+
+//Convert cents to step increment
+#define CENTS(v) LFIX(pow(2.0,v/1200.0))
+
+static int PLFO_TRI[256], PLFO_SQR[256], PLFO_SAW[256], PLFO_NOI[256];
+static int ALFO_TRI[256], ALFO_SQR[256], ALFO_SAW[256], ALFO_NOI[256];
+static float LFOFreq[32] = { 0.17,0.19,0.23,0.27,0.34,0.39,0.45,0.55,0.68,0.78,0.92,1.10,1.39,1.60,1.87,2.27,
+			  2.87,3.31,3.92,4.79,6.15,7.18,8.60,10.8,14.4,17.2,21.5,28.7,43.1,57.4,86.1,172.3 };
+static float ASCALE[8] = { 0.0,0.4,0.8,1.5,3.0,6.0,12.0,24.0 };
+static float PSCALE[8] = { 0.0,7.0,13.5,27.0,55.0,112.0,230.0,494 };
+static int PSCALES[8][256];
+static int ASCALES[8][256];
+
+void LFO_Init(void)
+{
+	int i, s;
+	for (i = 0; i < 256; ++i)
+	{
+		int a, p;
+		//		float TL;
+				//Saw
+		a = 255 - i;
+		if (i < 128)
+			p = i;
+		else
+			p = i - 256;
+		ALFO_SAW[i] = a;
+		PLFO_SAW[i] = p;
+
+		//Square
+		if (i < 128)
+		{
+			a = 255;
+			p = 127;
+		}
+		else
+		{
+			a = 0;
+			p = -128;
+		}
+		ALFO_SQR[i] = a;
+		PLFO_SQR[i] = p;
+
+		//Tri
+		if (i < 128)
+			a = 255 - (i * 2);
+		else
+			a = (i * 2) - 256;
+		if (i < 64)
+			p = i * 2;
+		else if (i < 128)
+			p = 255 - i * 2;
+		else if (i < 192)
+			p = 256 - i * 2;
+		else
+			p = i * 2 - 511;
+		ALFO_TRI[i] = a;
+		PLFO_TRI[i] = p;
+
+		//noise
+		//a=lfo_noise[i];
+		a = rand() & 0xff;
+		p = 128 - a;
+		ALFO_NOI[i] = a;
+		PLFO_NOI[i] = p;
+	}
+
+	for (s = 0; s < 8; ++s)
+	{
+		float limit = PSCALE[s];
+		for (i = -128; i < 128; ++i)
+		{
+			PSCALES[s][i + 128] = CENTS(((limit*(float)i) / 128.0));
+		}
+		limit = -ASCALE[s];
+		for (i = 0; i < 256; ++i)
+		{
+			ASCALES[s][i] = DB(((limit*(float)i) / 256.0));
+		}
+	}
+}
+
+signed int INLINE PLFO_Step(struct _LFO *LFO)
+{
+	int p;
+	LFO->phase += LFO->phase_step;
+#if LFO_SHIFT!=8    
+	LFO->phase &= (1 << (LFO_SHIFT + 8)) - 1;
+#endif    
+	p = LFO->table[LFO->phase >> LFO_SHIFT];
+	p = LFO->scale[p + 128];
+	return p << (SHIFT - LFO_SHIFT);
+}
+
+signed int INLINE ALFO_Step(struct _LFO *LFO)
+{
+	int p;
+	LFO->phase += LFO->phase_step;
+#if LFO_SHIFT!=8    
+	LFO->phase &= (1 << (LFO_SHIFT + 8)) - 1;
+#endif    
+	p = LFO->table[LFO->phase >> LFO_SHIFT];
+	p = LFO->scale[p];
+	return p << (SHIFT - LFO_SHIFT);
+}
+
+void LFO_ComputeStep(struct _LFO *LFO, UINT32 LFOF, UINT32 LFOWS, UINT32 LFOS, int ALFO)
+{
+	float step = (float)LFOFreq[LFOF] * 256.0 / (float)44100.0;
+	LFO->phase_step = (unsigned int)((float)(1 << LFO_SHIFT)*step);
+	if (ALFO)
+	{
+		switch (LFOWS)
+		{
+		case 0: LFO->table = ALFO_SAW; break;
+		case 1: LFO->table = ALFO_SQR; break;
+		case 2: LFO->table = ALFO_TRI; break;
+		case 3: LFO->table = ALFO_NOI; break;
+		}
+		LFO->scale = ASCALES[LFOS];
+	}
+	else
+	{
+		switch (LFOWS)
+		{
+		case 0: LFO->table = PLFO_SAW; break;
+		case 1: LFO->table = PLFO_SQR; break;
+		case 2: LFO->table = PLFO_TRI; break;
+		case 3: LFO->table = PLFO_NOI; break;
+		}
+		LFO->scale = PSCALES[LFOS];
+	}
+}
\ No newline at end of file