diff --git a/Source/Core/Common/Src/PluginVideo.cpp b/Source/Core/Common/Src/PluginVideo.cpp
index 159f72f261..bc8a2e9164 100644
--- a/Source/Core/Common/Src/PluginVideo.cpp
+++ b/Source/Core/Common/Src/PluginVideo.cpp
@@ -23,7 +23,6 @@ namespace Common
 PluginVideo::PluginVideo(const char *_Filename) : CPlugin(_Filename), validVideo(false)
 {
 	Video_Prepare = 0;
-	Video_SendFifoData = 0;
 	Video_BeginField = 0;
 	Video_EndField = 0;
 	Video_EnterLoop = 0;
@@ -31,11 +30,17 @@ PluginVideo::PluginVideo(const char *_Filename) : CPlugin(_Filename), validVideo
 	Video_Screenshot = 0;
 	Video_AddMessage = 0;
 	Video_AccessEFB = 0;
+    Video_SetRendering = 0;    
+    Video_CommandProcessorRead16 = 0;
+    Video_CommandProcessorWrite16 = 0;
+    Video_PixelEngineRead16 = 0;
+    Video_PixelEngineWrite16 = 0;
+    Video_PixelEngineWrite32 = 0;
+    Video_GatherPipeBursted = 0;
+    Video_WaitForFrameFinish = 0;
 
 	Video_Prepare  = reinterpret_cast<TVideo_Prepare>
 		(LoadSymbol("Video_Prepare"));
-	Video_SendFifoData = reinterpret_cast<TVideo_SendFifoData>
-		(LoadSymbol("Video_SendFifoData"));
 	Video_BeginField = reinterpret_cast<TVideo_BeginField>
 		(LoadSymbol("Video_BeginField"));
 	Video_EndField = reinterpret_cast<TVideo_EndField>
@@ -50,19 +55,40 @@ PluginVideo::PluginVideo(const char *_Filename) : CPlugin(_Filename), validVideo
 		(LoadSymbol("Video_AddMessage"));
 	Video_AccessEFB = reinterpret_cast<TVideo_AccessEFB>
 		(LoadSymbol("Video_AccessEFB"));
-		Video_SetRendering = reinterpret_cast<TVideo_SetRendering>
+	Video_SetRendering = reinterpret_cast<TVideo_SetRendering>
 		(LoadSymbol("Video_SetRendering"));
+    Video_CommandProcessorRead16 = reinterpret_cast<TVideo_Read16>
+		(LoadSymbol("Video_CommandProcessorRead16"));
+    Video_CommandProcessorWrite16 = reinterpret_cast<TVideo_Write16>
+		(LoadSymbol("Video_CommandProcessorWrite16"));
+    Video_PixelEngineRead16 = reinterpret_cast<TVideo_Read16>
+		(LoadSymbol("Video_PixelEngineRead16"));
+    Video_PixelEngineWrite16 = reinterpret_cast<TVideo_Write16>
+		(LoadSymbol("Video_PixelEngineWrite16"));
+    Video_PixelEngineWrite32 = reinterpret_cast<TVideo_Write32>
+		(LoadSymbol("Video_PixelEngineWrite32"));
+    Video_GatherPipeBursted = reinterpret_cast<TVideo_GatherPipeBursted>
+		(LoadSymbol("Video_GatherPipeBursted"));
+    Video_WaitForFrameFinish = reinterpret_cast<TVideo_WaitForFrameFinish>
+		(LoadSymbol("Video_WaitForFrameFinish"));
 
-	if ((Video_Prepare      != 0) &&
-		(Video_SendFifoData != 0) &&
-		(Video_BeginField   != 0) &&
-		(Video_EndField     != 0) &&
-		(Video_EnterLoop    != 0) &&
-		(Video_ExitLoop     != 0) &&
-		(Video_Screenshot   != 0) &&
-		(Video_AddMessage   != 0) &&
-		(Video_SetRendering != 0) &&
-		(Video_AccessEFB	!= 0))
+	if ((Video_Prepare                  != 0) &&
+		(Video_BeginField               != 0) &&
+		(Video_EndField                 != 0) &&
+		(Video_EnterLoop                != 0) &&
+		(Video_ExitLoop                 != 0) &&
+		(Video_Screenshot               != 0) &&
+		(Video_AddMessage               != 0) &&
+		(Video_SetRendering             != 0) &&
+		(Video_AccessEFB	            != 0) &&
+        (Video_SetRendering             != 0) &&        
+        (Video_CommandProcessorRead16   != 0) &&
+        (Video_CommandProcessorWrite16  != 0) &&
+        (Video_PixelEngineRead16        != 0) &&
+        (Video_PixelEngineWrite16       != 0) &&
+        (Video_PixelEngineWrite32       != 0) &&
+        (Video_GatherPipeBursted        != 0) &&
+        (Video_WaitForFrameFinish       != 0))
 		validVideo = true;
 }
 
diff --git a/Source/Core/Common/Src/PluginVideo.h b/Source/Core/Common/Src/PluginVideo.h
index 85af93f907..dbeeafc1a3 100644
--- a/Source/Core/Common/Src/PluginVideo.h
+++ b/Source/Core/Common/Src/PluginVideo.h
@@ -33,6 +33,12 @@ typedef void (__cdecl* TVideo_ExitLoop)();
 typedef void (__cdecl* TVideo_SetRendering)(bool bEnabled);
 typedef void (__cdecl* TVideo_AddMessage)(const char* pstr, unsigned int milliseconds);
 typedef u32 (__cdecl* TVideo_AccessEFB)(EFBAccessType, u32, u32);
+typedef void (__cdecl* TVideo_Read16)(u16& _rReturnValue, const u32 _Address);
+typedef void (__cdecl* TVideo_Write16)(const u16 _Data, const u32 _Address);
+typedef void (__cdecl* TVideo_Read32)(u32& _rReturnValue, const u32 _Address);
+typedef void (__cdecl* TVideo_Write32)(const u32 _Data, const u32 _Address);
+typedef void (__cdecl* TVideo_GatherPipeBursted)();
+typedef void (__cdecl* TVideo_WaitForFrameFinish)();
 
 class PluginVideo : public CPlugin
 {
@@ -41,18 +47,25 @@ public:
 	virtual ~PluginVideo();
 	virtual bool IsValid() {return validVideo;};
 
-	TVideo_Prepare      Video_Prepare;
-	TVideo_SendFifoData Video_SendFifoData;
-	TVideo_EnterLoop    Video_EnterLoop;
-	TVideo_ExitLoop     Video_ExitLoop;
-	TVideo_BeginField   Video_BeginField;
-	TVideo_EndField     Video_EndField;
-	TVideo_AccessEFB	Video_AccessEFB;
+	TVideo_Prepare              Video_Prepare;
+	TVideo_EnterLoop            Video_EnterLoop;
+	TVideo_ExitLoop             Video_ExitLoop;
+	TVideo_BeginField           Video_BeginField;
+	TVideo_EndField             Video_EndField;
+	TVideo_AccessEFB	        Video_AccessEFB;
 
-	TVideo_AddMessage   Video_AddMessage;
-	TVideo_Screenshot   Video_Screenshot;
+	TVideo_AddMessage           Video_AddMessage;
+	TVideo_Screenshot           Video_Screenshot;
 
-	TVideo_SetRendering Video_SetRendering;
+	TVideo_SetRendering         Video_SetRendering;    
+
+    TVideo_Read16               Video_CommandProcessorRead16;
+    TVideo_Write16              Video_CommandProcessorWrite16;
+    TVideo_Read16               Video_PixelEngineRead16;
+    TVideo_Write16              Video_PixelEngineWrite16;
+    TVideo_Write32              Video_PixelEngineWrite32;
+    TVideo_GatherPipeBursted    Video_GatherPipeBursted;
+    TVideo_WaitForFrameFinish   Video_WaitForFrameFinish;
 
 private:
 	bool validVideo;
diff --git a/Source/Core/Core/Core.vcproj b/Source/Core/Core/Core.vcproj
index b9fe781f68..447d09692d 100644
--- a/Source/Core/Core/Core.vcproj
+++ b/Source/Core/Core/Core.vcproj
@@ -811,18 +811,6 @@
 					>
 				</File>
 			</Filter>
-			<Filter
-				Name="PE - Pixel Engine"
-				>
-				<File
-					RelativePath=".\Src\Hw\PixelEngine.cpp"
-					>
-				</File>
-				<File
-					RelativePath=".\Src\Hw\PixelEngine.h"
-					>
-				</File>
-			</Filter>
 			<Filter
 				Name="AI - Audio Interface"
 				>
@@ -843,18 +831,6 @@
 					>
 				</File>
 			</Filter>
-			<Filter
-				Name="CP - Command Processor"
-				>
-				<File
-					RelativePath=".\Src\Hw\CommandProcessor.cpp"
-					>
-				</File>
-				<File
-					RelativePath=".\Src\Hw\CommandProcessor.h"
-					>
-				</File>
-			</Filter>
 			<Filter
 				Name="DSP"
 				>
diff --git a/Source/Core/Core/Src/Core.cpp b/Source/Core/Core/Src/Core.cpp
index 9cfd14d794..6f1b45d962 100644
--- a/Source/Core/Core/Src/Core.cpp
+++ b/Source/Core/Core/Src/Core.cpp
@@ -43,8 +43,6 @@
 #include "HW/GPFifo.h"
 #include "HW/AudioInterface.h"
 #include "HW/VideoInterface.h"
-#include "HW/CommandProcessor.h"
-#include "HW/PixelEngine.h"
 #include "HW/SystemTimers.h"
  
 #include "PowerPC/PowerPC.h"
@@ -360,26 +358,25 @@ THREAD_RETURN EmuThread(void *pArg)
  
 	// Load the VideoPlugin
  	SVideoInitialize VideoInitialize;
-	VideoInitialize.pGetMemoryPointer	= Memory::GetPointer;
-	VideoInitialize.pSetPEToken			= PixelEngine::SetToken;
-	VideoInitialize.pSetPEFinish		= PixelEngine::SetFinish;
-	// This is first the m_Panel handle, then it is updated to have the new window handle
-	VideoInitialize.pWindowHandle		= _CoreParameter.hMainWindow;
-	VideoInitialize.pLog				= Callback_VideoLog;
-	VideoInitialize.pSysMessage			= Host_SysMessage;
-	VideoInitialize.pRequestWindowSize	= NULL; //Callback_VideoRequestWindowSize;
-	VideoInitialize.pCopiedToXFB		= Callback_VideoCopiedToXFB;
-	VideoInitialize.pPeekMessages       = NULL;
-	VideoInitialize.pUpdateFPSDisplay   = NULL;
-	VideoInitialize.pCPFifo             = (SCPFifoStruct*)&CommandProcessor::fifo;
-	VideoInitialize.pUpdateInterrupts   = &(CommandProcessor::UpdateInterruptsFromVideoPlugin);
-	VideoInitialize.pMemoryBase         = Memory::base;
-	VideoInitialize.pKeyPress           = Callback_KeyPress;
-	VideoInitialize.pSetFifoIdle        = &(CommandProcessor::SetFifoIdleFromVideoPlugin);
-	VideoInitialize.bWii                = _CoreParameter.bWii;
-	VideoInitialize.bUseDualCore		= _CoreParameter.bUseDualCore;
-	VideoInitialize.pBBox               = &PixelEngine::bbox[0];
-	VideoInitialize.pBBoxActive         = &PixelEngine::bbox_active;
+	VideoInitialize.pGetMemoryPointer	        = Memory::GetPointer;
+    VideoInitialize.pSetInterrupt               = ProcessorInterface::SetInterrupt;
+    VideoInitialize.pRegisterEvent              = CoreTiming::RegisterEvent;
+    VideoInitialize.pScheduleEvent_Threadsafe   = CoreTiming::ScheduleEvent_Threadsafe;
+    // This is first the m_Panel handle, then it is updated to have the new window handle
+	VideoInitialize.pWindowHandle		        = _CoreParameter.hMainWindow;
+	VideoInitialize.pLog				        = Callback_VideoLog;
+	VideoInitialize.pSysMessage			        = Host_SysMessage;
+	VideoInitialize.pRequestWindowSize	        = NULL; //Callback_VideoRequestWindowSize;
+	VideoInitialize.pCopiedToXFB		        = Callback_VideoCopiedToXFB;
+	VideoInitialize.pPeekMessages               = NULL;
+	VideoInitialize.pUpdateFPSDisplay           = NULL;
+	VideoInitialize.pMemoryBase                 = Memory::base;
+	VideoInitialize.pKeyPress                   = Callback_KeyPress;
+	VideoInitialize.bWii                        = _CoreParameter.bWii;
+	VideoInitialize.bUseDualCore		        = _CoreParameter.bUseDualCore;
+    VideoInitialize.Fifo_CPUBase                = &ProcessorInterface::Fifo_CPUBase;
+    VideoInitialize.Fifo_CPUEnd                 = &ProcessorInterface::Fifo_CPUEnd;
+    VideoInitialize.Fifo_CPUWritePointer        = &ProcessorInterface::Fifo_CPUWritePointer;
 
 	Plugins.GetVideo()->Initialize(&VideoInitialize); // Call the dll
  
diff --git a/Source/Core/Core/Src/HW/GPFifo.cpp b/Source/Core/Core/Src/HW/GPFifo.cpp
index 8ff474bafc..01d38ed0f5 100644
--- a/Source/Core/Core/Src/HW/GPFifo.cpp
+++ b/Source/Core/Core/Src/HW/GPFifo.cpp
@@ -18,7 +18,7 @@
 #include "Common.h"
 #include "ChunkFile.h"
 #include "ProcessorInterface.h"
-#include "CommandProcessor.h"
+#include "../PluginManager.h"
 #include "Memmap.h"
 #include "../PowerPC/PowerPC.h"
 
@@ -87,8 +87,10 @@ void STACKALIGN CheckGatherPipe()
 						ProcessorInterface::Fifo_CPUWritePointer, ProcessorInterface::Fifo_CPUEnd);
 
 		if (ProcessorInterface::Fifo_CPUWritePointer >= ProcessorInterface::Fifo_CPUEnd)
-			ProcessorInterface::Fifo_CPUWritePointer = ProcessorInterface::Fifo_CPUBase;		
-		CommandProcessor::GatherPipeBursted();
+			ProcessorInterface::Fifo_CPUWritePointer = ProcessorInterface::Fifo_CPUBase;
+
+        // TODO store video plugin pointer
+		CPluginManager::GetInstance().GetVideo()->Video_GatherPipeBursted();
 	}
 }
 
diff --git a/Source/Core/Core/Src/HW/HW.cpp b/Source/Core/Core/Src/HW/HW.cpp
index b5bbcc92b8..8cfd88e20c 100644
--- a/Source/Core/Core/Src/HW/HW.cpp
+++ b/Source/Core/Core/Src/HW/HW.cpp
@@ -21,14 +21,12 @@
 #include "HW.h"
 #include "../PowerPC/PowerPC.h"
 #include "CPU.h"
-#include "CommandProcessor.h"
 #include "DSP.h"
 #include "DVDInterface.h"
 #include "EXI.h"
 #include "GPFifo.h"
 #include "Memmap.h"
 #include "ProcessorInterface.h"
-#include "PixelEngine.h"
 #include "SI.h"
 #include "AudioInterface.h"
 #include "VideoInterface.h"
@@ -52,8 +50,6 @@ namespace HW
 
 		// Init the whole Hardware
 		AudioInterface::Init();
-		PixelEngine::Init();
-		CommandProcessor::Init();
 		VideoInterface::Init();
 		SerialInterface::Init();
 		ProcessorInterface::Init();
@@ -96,8 +92,6 @@ namespace HW
 	void DoState(PointerWrap &p)
 	{
 		Memory::DoState(p);
-		PixelEngine::DoState(p);
-		CommandProcessor::DoState(p);
 		VideoInterface::DoState(p);
 		SerialInterface::DoState(p);
 		ProcessorInterface::DoState(p);
diff --git a/Source/Core/Core/Src/HW/Memmap.cpp b/Source/Core/Core/Src/HW/Memmap.cpp
index 6a1fef0202..91118f5379 100644
--- a/Source/Core/Core/Src/HW/Memmap.cpp
+++ b/Source/Core/Core/Src/HW/Memmap.cpp
@@ -39,14 +39,14 @@ may be redirected here (for example to Read_U32()).
 #include "VideoInterface.h"
 #include "SI.h"
 #include "EXI.h"
-#include "PixelEngine.h"
-#include "CommandProcessor.h"
+#include "PluginVideo.h"
 #include "AudioInterface.h"
 #include "MemoryInterface.h"
 #include "WII_IOB.h"
 #include "WII_IPC.h"
 #include "../ConfigManager.h"
 #include "../Debugger/Debugger_SymbolMap.h"
+#include "../PluginManager.h"
 
 
 
@@ -191,12 +191,12 @@ void InitHWMemFuncs()
 
 	for (int i = 0; i < BLOCKSIZE; i++)
 	{
-		hwRead16 [CP_START+i] = CommandProcessor::Read16;
-		hwWrite16[CP_START+i] = CommandProcessor::Write16;
+		hwRead16 [CP_START+i] = CPluginManager::GetInstance().GetVideo()->Video_CommandProcessorRead16;
+		hwWrite16[CP_START+i] = CPluginManager::GetInstance().GetVideo()->Video_CommandProcessorWrite16;
  
-		hwRead16 [PE_START+i] = PixelEngine::Read16;
-		hwWrite16[PE_START+i] = PixelEngine::Write16;
-		hwWrite32[PE_START+i] = PixelEngine::Write32;
+		hwRead16 [PE_START+i] = CPluginManager::GetInstance().GetVideo()->Video_PixelEngineRead16;
+		hwWrite16[PE_START+i] = CPluginManager::GetInstance().GetVideo()->Video_PixelEngineWrite16;
+		hwWrite32[PE_START+i] = CPluginManager::GetInstance().GetVideo()->Video_PixelEngineWrite32;
 
 		hwRead8  [VI_START+i] = VideoInterface::Read8;
 		hwRead16 [VI_START+i] = VideoInterface::Read16;
@@ -263,12 +263,12 @@ void InitHWMemFuncsWii()
 	// MI, PI, DSP are still mapped to 0xCCxxxxxx
 	for (int i = 0; i < BLOCKSIZE; i++)
 	{
-		hwRead16 [CP_START+i] = CommandProcessor::Read16;
-		hwWrite16[CP_START+i] = CommandProcessor::Write16;
-
-		hwRead16 [PE_START+i] = PixelEngine::Read16;
-		hwWrite16[PE_START+i] = PixelEngine::Write16;
-		hwWrite32[PE_START+i] = PixelEngine::Write32;
+		hwRead16 [CP_START+i] = CPluginManager::GetInstance().GetVideo()->Video_CommandProcessorRead16;
+		hwWrite16[CP_START+i] = CPluginManager::GetInstance().GetVideo()->Video_CommandProcessorWrite16;
+ 
+		hwRead16 [PE_START+i] = CPluginManager::GetInstance().GetVideo()->Video_PixelEngineRead16;
+		hwWrite16[PE_START+i] = CPluginManager::GetInstance().GetVideo()->Video_PixelEngineWrite16;
+		hwWrite32[PE_START+i] = CPluginManager::GetInstance().GetVideo()->Video_PixelEngineWrite32;
 
 		hwRead32 [PI_START+i] = ProcessorInterface::Read32;
 		hwWrite32[PI_START+i] = ProcessorInterface::Write32;
diff --git a/Source/Core/Core/Src/HW/ProcessorInterface.cpp b/Source/Core/Core/Src/HW/ProcessorInterface.cpp
index 446863048c..5b8f2cf486 100644
--- a/Source/Core/Core/Src/HW/ProcessorInterface.cpp
+++ b/Source/Core/Core/Src/HW/ProcessorInterface.cpp
@@ -198,7 +198,7 @@ void UpdateException()
 		PowerPC::ppcState.Exceptions &= ~EXCEPTION_EXTERNAL_INT;
 }
 
-static const char *Debug_GetInterruptName(InterruptCause _causemask)
+static const char *Debug_GetInterruptName(u32 _causemask)
 {
 	switch (_causemask)
 	{
@@ -222,24 +222,24 @@ static const char *Debug_GetInterruptName(InterruptCause _causemask)
 	}
 }
 
-void SetInterrupt(InterruptCause _causemask, bool _bSet)
+void SetInterrupt(u32 _causemask, bool _bSet)
 {
 	// TODO(ector): add sanity check that current thread id is cpu thread
 
-    if (_bSet && !(m_InterruptCause & (u32)_causemask))
+    if (_bSet && !(m_InterruptCause & _causemask))
     {
         DEBUG_LOG(PROCESSORINTERFACE, "Setting Interrupt %s (%s)",Debug_GetInterruptName(_causemask), "set");
     }
 
-    if (!_bSet && (m_InterruptCause & (u32)_causemask))
+    if (!_bSet && (m_InterruptCause & _causemask))
     {
         DEBUG_LOG(PROCESSORINTERFACE, "Setting Interrupt %s (%s)",Debug_GetInterruptName(_causemask), "clear");
     }
 	
 	if (_bSet)
-		m_InterruptCause |= (u32)_causemask;
+		m_InterruptCause |= _causemask;
 	else
-		m_InterruptCause &= ~(u32)_causemask;   // is there any reason to have this possibility?
+		m_InterruptCause &= ~_causemask;   // is there any reason to have this possibility?
 												// F|RES: i think the hw devices reset the interrupt in the PI to 0 
 												// if the interrupt cause is eliminated. that isnt done by software (afaik)
 	UpdateException();
diff --git a/Source/Core/Core/Src/HW/ProcessorInterface.h b/Source/Core/Core/Src/HW/ProcessorInterface.h
index 78cc0c2f49..adec8ea86e 100644
--- a/Source/Core/Core/Src/HW/ProcessorInterface.h
+++ b/Source/Core/Core/Src/HW/ProcessorInterface.h
@@ -63,7 +63,7 @@ void Write32(const u32 _iValue, const u32 _iAddress);
 inline u32 GetMask() { return m_InterruptMask; }
 inline u32 GetCause() { return m_InterruptCause; }
 
-void SetInterrupt(InterruptCause _causemask, bool _bSet=true);
+void SetInterrupt(u32 _causemask, bool _bSet=true);
 
 // Thread-safe func which sets and clears reset button state automagically
 void ResetButton_Tap();
diff --git a/Source/Core/Core/Src/HW/SystemTimers.cpp b/Source/Core/Core/Src/HW/SystemTimers.cpp
index 2e3232983d..a6fb5439b6 100644
--- a/Source/Core/Core/Src/HW/SystemTimers.cpp
+++ b/Source/Core/Core/Src/HW/SystemTimers.cpp
@@ -68,7 +68,6 @@
 #include "../HW/AudioInterface.h"
 #include "../HW/VideoInterface.h"
 #include "../HW/SI.h"
-#include "../HW/CommandProcessor.h" // for DC watchdog hack
 #include "../HW/EXI_DeviceIPL.h"
 #include "../PowerPC/PowerPC.h"
 #include "../CoreTiming.h"
@@ -226,7 +225,7 @@ void AdvanceCallback(int cyclesExecuted)
 // For DC watchdog hack
 void FakeGPWatchdogCallback(u64 userdata, int cyclesLate)
 {
-	CommandProcessor::WaitForFrameFinish(); // lock CPUThread until frame finish
+    CPluginManager::GetInstance().GetVideo()->Video_WaitForFrameFinish();  // lock CPUThread until frame finish
 	CoreTiming::ScheduleEvent(FAKE_GP_WATCHDOG_PERIOD-cyclesLate, et_FakeGPWD);
 }
 
diff --git a/Source/Core/Core/Src/HW/VideoInterface.cpp b/Source/Core/Core/Src/HW/VideoInterface.cpp
index ce19253524..b464182212 100644
--- a/Source/Core/Core/Src/HW/VideoInterface.cpp
+++ b/Source/Core/Core/Src/HW/VideoInterface.cpp
@@ -21,7 +21,6 @@
 #include "../PowerPC/PowerPC.h"
 
 #include "../Core.h"			// <- for Core::GetStartupParameter().bUseDualCore
-#include "CommandProcessor.h"	// <- for homebrew's XFB draw hack
 #include "ProcessorInterface.h"
 #include "VideoInterface.h"
 #include "Memmap.h"
diff --git a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStore.cpp b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStore.cpp
index 17b03721c4..b5d39dde94 100644
--- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStore.cpp
+++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStore.cpp
@@ -19,8 +19,6 @@
 #include "MathUtil.h"
 
 #include "../../HW/Memmap.h"
-#include "../../HW/CommandProcessor.h"
-#include "../../HW/PixelEngine.h"
 
 #include "Interpreter.h"
 #include "../../Core.h"
diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp
index 43a3bbe96a..a76dc74eb3 100644
--- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp
+++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp
@@ -24,8 +24,6 @@
 #include "../PowerPC.h"
 #include "../../Core.h"
 #include "../../HW/GPFifo.h"
-#include "../../HW/CommandProcessor.h"
-#include "../../HW/PixelEngine.h"
 #include "../../HW/Memmap.h"
 #include "../PPCTables.h"
 #include "x64Emitter.h"
diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp
index 240cb15298..dc1a691c00 100644
--- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp
+++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp
@@ -23,8 +23,6 @@
 #include "../PowerPC.h"
 #include "../../Core.h" // include "Common.h", "CoreParameter.h"
 #include "../../HW/GPFifo.h"
-#include "../../HW/CommandProcessor.h"
-#include "../../HW/PixelEngine.h"
 #include "../../HW/Memmap.h"
 #include "../PPCTables.h"
 #include "CPUDetect.h"
diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp
index 97420fb7e0..098831d286 100644
--- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp
+++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp
@@ -24,8 +24,6 @@
 #include "../PowerPC.h"
 #include "../../Core.h"
 #include "../../HW/GPFifo.h"
-#include "../../HW/CommandProcessor.h"
-#include "../../HW/PixelEngine.h"
 #include "../../HW/Memmap.h"
 #include "../PPCTables.h"
 #include "CPUDetect.h"
diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp b/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp
index f05b404448..8f3e643468 100644
--- a/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp
+++ b/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp
@@ -21,8 +21,6 @@
 #include "../PowerPC.h"
 #include "../../Core.h"
 #include "../../HW/GPFifo.h"
-#include "../../HW/CommandProcessor.h"
-#include "../../HW/PixelEngine.h"
 #include "../../HW/Memmap.h"
 #include "../PPCTables.h"
 #include "x64Emitter.h"
diff --git a/Source/Core/Core/Src/SConscript b/Source/Core/Core/Src/SConscript
index f9413c1935..211d0ae058 100644
--- a/Source/Core/Core/Src/SConscript
+++ b/Source/Core/Core/Src/SConscript
@@ -29,7 +29,6 @@ files = ["ActionReplay.cpp",
          "Debugger/Dump.cpp",
          "Debugger/PPCDebugInterface.cpp",
          "HW/AudioInterface.cpp",
-         "HW/CommandProcessor.cpp",
          "HW/CPU.cpp",
          "HW/DSP.cpp",
          "HW/DVDInterface.cpp",
@@ -46,7 +45,6 @@ files = ["ActionReplay.cpp",
          "HW/Memmap.cpp",
          "HW/MemmapFunctions.cpp",
          "HW/MemoryInterface.cpp",
-         "HW/PixelEngine.cpp",
          "HW/ProcessorInterface.cpp",
          "HW/SI.cpp",
          "HW/SI_Device.cpp",
diff --git a/Source/Core/VideoCommon/Src/BPMemory.h b/Source/Core/VideoCommon/Src/BPMemory.h
index 1ca48d70df..89238d8ac7 100644
--- a/Source/Core/VideoCommon/Src/BPMemory.h
+++ b/Source/Core/VideoCommon/Src/BPMemory.h
@@ -875,7 +875,7 @@ struct BPMemory
     u32 clearcolorAR; //4f
     u32 clearcolorGB; //50
     u32 clearZValue; //51
-    u32 triggerEFBCopy; //52
+    UPE_Copy triggerEFBCopy; //52
     u32 copyfilter[2]; //53,54
     u32 boundbox0;//55
     u32 boundbox1;//56
diff --git a/Source/Core/VideoCommon/Src/BPStructs.cpp b/Source/Core/VideoCommon/Src/BPStructs.cpp
index 50c2d110e3..3fabc58f53 100644
--- a/Source/Core/VideoCommon/Src/BPStructs.cpp
+++ b/Source/Core/VideoCommon/Src/BPStructs.cpp
@@ -22,6 +22,7 @@
 #include "Render.h"
 #include "VideoCommon.h"
 #include "PixelShaderManager.h"
+#include "PixelEngine.h"
 #include "BPFunctions.h"
 #include "BPStructs.h"
 #include "TextureDecoder.h"
@@ -162,7 +163,7 @@ void BPWritten(const BPCmd& bp)
 		switch (bp.newvalue & 0xFF)
         {
         case 0x02:
-            g_VideoInitialize.pSetPEFinish(); // may generate interrupt
+            PixelEngine::SetFinish(); // may generate interrupt
             DEBUG_LOG(VIDEO, "GXSetDrawDone SetPEFinish (value: 0x%02X)", (bp.newvalue & 0xFFFF));
             break;
 
@@ -172,11 +173,11 @@ void BPWritten(const BPCmd& bp)
         }
         break;
 	case BPMEM_PE_TOKEN_ID: // Pixel Engine Token ID
-        g_VideoInitialize.pSetPEToken(static_cast<u16>(bp.newvalue & 0xFFFF), FALSE);
+        PixelEngine::SetToken(static_cast<u16>(bp.newvalue & 0xFFFF), FALSE);
         DEBUG_LOG(VIDEO, "SetPEToken 0x%04x", (bp.newvalue & 0xFFFF));
         break;
     case BPMEM_PE_TOKEN_INT_ID: // Pixel Engine Interrupt Token ID
-        g_VideoInitialize.pSetPEToken(static_cast<u16>(bp.newvalue & 0xFFFF), TRUE);
+        PixelEngine::SetToken(static_cast<u16>(bp.newvalue & 0xFFFF), TRUE);
         DEBUG_LOG(VIDEO, "SetPEToken + INT 0x%04x", (bp.newvalue & 0xFFFF));
         break;
 	// ------------------------
@@ -194,8 +195,7 @@ void BPWritten(const BPCmd& bp)
 			rc.right = (int)(bpmem.copyTexSrcXY.x + bpmem.copyTexSrcWH.x + 1);
 			rc.bottom = (int)(bpmem.copyTexSrcXY.y + bpmem.copyTexSrcWH.y + 1);
 
-			UPE_Copy PE_copy;
-			PE_copy.Hex = bpmem.triggerEFBCopy;
+			UPE_Copy PE_copy = bpmem.triggerEFBCopy;
 
 			// Check if we are to copy from the EFB or draw to the XFB
 			if (PE_copy.copy_to_xfb == 0)
diff --git a/Source/Core/Core/Src/HW/CommandProcessor.cpp b/Source/Core/VideoCommon/Src/CommandProcessor.cpp
similarity index 90%
rename from Source/Core/Core/Src/HW/CommandProcessor.cpp
rename to Source/Core/VideoCommon/Src/CommandProcessor.cpp
index b88944fb69..4a023ab0fc 100644
--- a/Source/Core/Core/Src/HW/CommandProcessor.cpp
+++ b/Source/Core/VideoCommon/Src/CommandProcessor.cpp
@@ -1,746 +1,748 @@
-// Copyright (C) 2003 Dolphin Project.
-
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation, version 2.0.
-
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License 2.0 for more details.
-
-// A copy of the GPL 2.0 should have been included with the program.
-// If not, see http://www.gnu.org/licenses/
-
-// Official SVN repository and contact information can be found at
-// http://code.google.com/p/dolphin-emu/
-
-
-// NOTES (mb2):
-
-// * GP/CPU sync can be done by several way:
-// - MP1 use BP (breakpoint) in movie-menus and mostly PEtoken in 3D
-// - ZWW as Crazy Taxi: PEfinish (GXSetDrawDone)
-// - SMS: BP, PEToken, PEfinish
-// - ZTP: seems to use PEfinish only
-// - Animal Crossing: PEfinish at start but there's a bug...
-//		There's tons of HiWmk/LoWmk ping pong -> Another sync fashion?
-// - Super Monkey Ball Adventures: PEToken. Oddity: read&check-PEToken-value-loop stays
-//		in its JITed block (never fall in Advance() until the game-watchdog's stuff).
-//		That's why we can't let perform the AdvanceCallBack as usual.
-//		The PEToken is volatile now and in the fifo struct.
-// - Super Monkey Ball: PEFinish. This game has the lamest way to deal with fifo sync for our MT's stuff.
-//		A hack is mandatory. DONE and should be ok for other games.
-
-// *What I guess (thx to asynchronous DualCore mode):
-// PPC have a frame-finish watchdog. Handled by system timming stuff like the decrementer.
-// (DualCore mode): I have observed, after ZTP logos, a fifo-recovery start when DECREMENTER_EXCEPTION is throwned.
-// The frame setting (by GP) took too much time and didn't finish properly due to this watchdog.
-// Faster GX plugins required, indeed :p
-
-// * BPs are needed for some game GP/CPU sync.
-// But it could slowdown (MP1 at least) because our GP in DC is faster than "expected" in some area.
-// eg: in movie-menus in MP1, BP are reached quickly.
-// The bad thing is that involve too much PPC work (int ack, lock GP, reset BP, new BP addr, unlock BP...) hence the slowdown.
-// Anyway, emulation should more accurate like this and it emulate some sort of better load balancing.
-// Eather way in those area a more accurate GP timing could be done by slowing down the GP or something less stupid.
-// Not functional and not used atm (breaks MP2).
-
-// * funny, in revs before those with this note, BP irq wasn't cleared (a bug indeed) and MP1 menus was faster.
-// BP irq was raised and ack just once but never cleared. However it's sufficient for MP1 to work.
-// This hack is used atm. Known BPs handling doesn't work well (btw, BP irq clearing might be done by CPIntEnable raising edge).
-// The hack seems to be responsible of the movie stutering in MP1 menus.
-
-// TODO (mb2):
-// * raise watermark Ov/Un irq: POINTLESS since emulated GP timings can't be accuratly set.
-//   Only 3 choices IMHO for a correct emulated load balancing in DC mode:
-//		- make our own GP watchdog hack that can lock CPU if GP too slow. STARTED
-//		- hack directly something in PPC timings (dunno how)
-//		- boost GP so we can consider it as infinitely fast compared to CPU.
-// * raise ReadIdle/CmdIdle flags and observe behaviour of MP1 & ZTP (at least)
-// * Clean useless comments and debug stuff in Read16, Write16, GatherPipeBursted when sync will be fixed for DC
-// * (reminder) do the same in:
-//		PeripheralInterface.cpp, PixelEngine.cpp, OGL->BPStructs.cpp, fifo.cpp... ok just check change log >>
-
-// TODO
-// * Kick GPU from dispatcher, not from writes
-// * Thunking framework
-// * Cleanup of messy now unnecessary safety code in jit
-
-#include "Common.h"
-#include "../PowerPC/PowerPC.h"
-#include "../CoreTiming.h"
-#include "../PluginManager.h"
-#include "../ConfigManager.h"
-#include "MathUtil.h"
-#include "Thread.h"
-#include "Atomic.h"
-
-#include "Memmap.h"
-#include "ProcessorInterface.h"
-#include "GPFifo.h"
-#include "CPU.h"
-#include "../Core.h"
-#include "CommandProcessor.h"
-
-namespace CommandProcessor
-{
-// look for 1002 verts, breakpoint there, see why next draw is flushed
-// TODO(ector): Warn on bbox read/write
-
-// Fifo Status Register
-union UCPStatusReg
-{
-	struct
-	{
-		unsigned OverflowHiWatermark	:	1;
-		unsigned UnderflowLoWatermark	:	1;
-		unsigned ReadIdle				:	1;
-		unsigned CommandIdle			:	1;
-		unsigned Breakpoint				:	1;
-		unsigned						:	11;
-	};
-	u16 Hex;
-	UCPStatusReg() {Hex = 0; }
-	UCPStatusReg(u16 _hex) {Hex = _hex; }
-};
-
-// Fifo Control Register
-union UCPCtrlReg
-{
-	struct
-	{
-		unsigned GPReadEnable			:	1;
-		unsigned CPIntEnable			:	1;
-		unsigned FifoOverflowIntEnable	:	1;
-		unsigned FifoUnderflowIntEnable	:	1;
-		unsigned GPLinkEnable			:	1;
-		unsigned BPEnable				:	1;
-		unsigned						:	10;
-	};
-	u16 Hex;
-	UCPCtrlReg() {Hex = 0; }
-	UCPCtrlReg(u16 _hex) {Hex = _hex; }
-};
-
-// Fifo Control Register
-union UCPClearReg
-{
-	struct
-	{
-		unsigned ClearFifoOverflow		:	1;
-		unsigned ClearFifoUnderflow		:	1;
-		unsigned ClearMetrices			:	1;
-		unsigned						:	13;
-	};
-	u16 Hex;
-	UCPClearReg() {Hex = 0; }
-	UCPClearReg(u16 _hex) {Hex = _hex; }
-};
-
-// STATE_TO_SAVE
-// variables
-UCPStatusReg m_CPStatusReg;
-UCPCtrlReg	m_CPCtrlReg;
-UCPClearReg	m_CPClearReg;
-
-int m_bboxleft;
-int m_bboxtop;
-int m_bboxright;
-int m_bboxbottom;
-u16 m_tokenReg;
-
-SCPFifoStruct fifo; //This one is shared between gfx thread and emulator thread
-static u32 fake_GPWatchdogLastToken = 0;
-static Common::Event s_fifoIdleEvent;
-
-void DoState(PointerWrap &p)
-{
-	p.Do(m_CPStatusReg);
-	p.Do(m_CPCtrlReg);
-	p.Do(m_CPClearReg);
-	p.Do(m_bboxleft);
-	p.Do(m_bboxtop);
-	p.Do(m_bboxright);
-	p.Do(m_bboxbottom);
-	p.Do(m_tokenReg);
-	p.Do(fifo);
-}
-
-// function
-void UpdateFifoRegister();
-void UpdateInterrupts();
-
-//inline void WriteLow (u32& _reg, u16 lowbits)  {_reg = (_reg & 0xFFFF0000) | lowbits;}
-//inline void WriteHigh(u32& _reg, u16 highbits) {_reg = (_reg & 0x0000FFFF) | ((u32)highbits << 16);}
-inline void WriteLow (volatile u32& _reg, u16 lowbits)  {Common::AtomicStore(_reg,(_reg & 0xFFFF0000) | lowbits);}
-inline void WriteHigh(volatile u32& _reg, u16 highbits) {Common::AtomicStore(_reg,(_reg & 0x0000FFFF) | ((u32)highbits << 16));}
-
-inline u16 ReadLow  (u32 _reg)  {return (u16)(_reg & 0xFFFF);}
-inline u16 ReadHigh (u32 _reg)  {return (u16)(_reg >> 16);}
-
-int et_UpdateInterrupts;
-
-// for GP watchdog hack
-void IncrementGPWDToken()
-{
-    Common::AtomicIncrement(fifo.Fake_GPWDToken);
-}
-
-// Check every FAKE_GP_WATCHDOG_PERIOD if a PE-frame-finish occured
-// if not then lock CPUThread until GP finish a frame.
-void WaitForFrameFinish()
-{
-	while ((fake_GPWatchdogLastToken == fifo.Fake_GPWDToken) && fifo.bFF_GPReadEnable && (fifo.CPReadWriteDistance > 0) && !(fifo.bFF_BPEnable && fifo.bFF_Breakpoint))
-		s_fifoIdleEvent.MsgWait();
-	
-	fake_GPWatchdogLastToken = fifo.Fake_GPWDToken;
-}
-
-
-void UpdateInterrupts_Wrapper(u64 userdata, int cyclesLate)
-{
-	UpdateInterrupts();
-}
-
-void Init()
-{
-	m_CPStatusReg.Hex = 0;
-	m_CPStatusReg.CommandIdle = 1;
-	m_CPStatusReg.ReadIdle = 1;
-
-	m_CPCtrlReg.Hex = 0;
-
-	m_bboxleft = 0;
-	m_bboxtop  = 0;
-	m_bboxright = 640;
-	m_bboxbottom = 480;
-
-	m_tokenReg = 0;
-	
-	fake_GPWatchdogLastToken = 0;
-
-	memset(&fifo,0,sizeof(fifo));
-	fifo.CPCmdIdle  = 1 ;
-	fifo.CPReadIdle = 1;
-
-	s_fifoIdleEvent.Init();
-
-	et_UpdateInterrupts = CoreTiming::RegisterEvent("UpdateInterrupts", UpdateInterrupts_Wrapper);
-}
-
-void Shutdown()
-{
-	s_fifoIdleEvent.Shutdown();
-}
-
-void Read16(u16& _rReturnValue, const u32 _Address)
-{
-	DEBUG_LOG(COMMANDPROCESSOR, "(r): 0x%08x", _Address);
-	switch (_Address & 0xFFF)
-	{
-	case STATUS_REGISTER:
-		//TODO?: if really needed
-		//m_CPStatusReg.CommandIdle = fifo.CPCmdIdle;
-		// uncomment: change a bit the behaviour MP1. Not very useful though
-		m_CPStatusReg.ReadIdle = fifo.CPReadIdle;
-		//m_CPStatusReg.CommandIdle = fifo.CPReadIdle;
-
-		// hack: CPU will always believe fifo is empty and on idle
-		//m_CPStatusReg.ReadIdle = 1;
-		//m_CPStatusReg.CommandIdle = 1;
-		
-		_rReturnValue = m_CPStatusReg.Hex;
-		INFO_LOG(COMMANDPROCESSOR, "\t iBP %s | fREADIDLE %s | fCMDIDLE %s | iOvF %s | iUndF %s"
-			, m_CPStatusReg.Breakpoint ?			"ON" : "OFF"
-			, m_CPStatusReg.ReadIdle ?				"ON" : "OFF"
-			, m_CPStatusReg.CommandIdle ?			"ON" : "OFF"
-			, m_CPStatusReg.OverflowHiWatermark ?	"ON" : "OFF"
-			, m_CPStatusReg.UnderflowLoWatermark ?	"ON" : "OFF"
-				);
-		return;
-
-	case CTRL_REGISTER:		_rReturnValue = m_CPCtrlReg.Hex; return;
-	case CLEAR_REGISTER:	_rReturnValue = m_CPClearReg.Hex; return;
-
-	case FIFO_TOKEN_REGISTER:		_rReturnValue = m_tokenReg; return;
-	case FIFO_BOUNDING_BOX_LEFT:	_rReturnValue = m_bboxleft; return;
-	case FIFO_BOUNDING_BOX_RIGHT:	_rReturnValue = m_bboxright; return;
-	case FIFO_BOUNDING_BOX_TOP:		_rReturnValue = m_bboxtop; return;
-	case FIFO_BOUNDING_BOX_BOTTOM:	_rReturnValue = m_bboxbottom; return;
-
-	case FIFO_BASE_LO:			_rReturnValue = ReadLow (fifo.CPBase); return;
-	case FIFO_BASE_HI:			_rReturnValue = ReadHigh(fifo.CPBase); return;
-	case FIFO_END_LO:			_rReturnValue = ReadLow (fifo.CPEnd);  return;
-	case FIFO_END_HI:			_rReturnValue = ReadHigh(fifo.CPEnd);  return;
-	case FIFO_HI_WATERMARK_LO:	_rReturnValue = ReadLow (fifo.CPHiWatermark); return;
-	case FIFO_HI_WATERMARK_HI:	_rReturnValue = ReadHigh(fifo.CPHiWatermark); return;
-	case FIFO_LO_WATERMARK_LO:	_rReturnValue = ReadLow (fifo.CPLoWatermark); return;
-	case FIFO_LO_WATERMARK_HI:	_rReturnValue = ReadHigh(fifo.CPLoWatermark); return;
-
-	// TODO: cases cleanup
-	case FIFO_RW_DISTANCE_LO:
-		//_rReturnValue = ReadLow (fifo.CPReadWriteDistance);
-		// hack: CPU will always believe fifo is empty and on idle
-		_rReturnValue = 0;
-		DEBUG_LOG(COMMANDPROCESSOR, "read FIFO_RW_DISTANCE_LO : %04x", _rReturnValue);
-		return;
-	case FIFO_RW_DISTANCE_HI:
-		//_rReturnValue = ReadHigh(fifo.CPReadWriteDistance);
-		// hack: CPU will always believe fifo is empty and on idle
-		_rReturnValue = 0;
-		DEBUG_LOG(COMMANDPROCESSOR, "read FIFO_RW_DISTANCE_HI : %04x", _rReturnValue);
-		return;
-	case FIFO_WRITE_POINTER_LO:
-		_rReturnValue = ReadLow (fifo.CPWritePointer);
-		DEBUG_LOG(COMMANDPROCESSOR, "read FIFO_WRITE_POINTER_LO : %04x", _rReturnValue);
-		return;
-	case FIFO_WRITE_POINTER_HI:
-		_rReturnValue = ReadHigh(fifo.CPWritePointer);
-		DEBUG_LOG(COMMANDPROCESSOR, "read FIFO_WRITE_POINTER_HI : %04x", _rReturnValue);
-		return;
-	case FIFO_READ_POINTER_LO:
-		//_rReturnValue = ReadLow (fifo.CPReadPointer);
-		// hack: CPU will always believe fifo is empty and on idle
-		_rReturnValue = ReadLow (fifo.CPWritePointer);
-		DEBUG_LOG(COMMANDPROCESSOR, "read FIFO_READ_POINTER_LO : %04x", _rReturnValue);
-		return;
-	case FIFO_READ_POINTER_HI:
-		//_rReturnValue = ReadHigh(fifo.CPReadPointer);
-		// hack: CPU will always believe fifo is empty and on idle
-		_rReturnValue = ReadHigh(fifo.CPWritePointer);
-		DEBUG_LOG(COMMANDPROCESSOR, "read FIFO_READ_POINTER_HI : %04x", _rReturnValue);
-		return;
-
-	case FIFO_BP_LO: _rReturnValue = ReadLow (fifo.CPBreakpoint); return;
-	case FIFO_BP_HI: _rReturnValue = ReadHigh(fifo.CPBreakpoint); return;
-
-	case CP_PERF0_L: _rReturnValue = 0; WARN_LOG(COMMANDPROCESSOR, "Read from PERF0_L: %04x", _rReturnValue); break;  // XF counters
-	case CP_PERF0_H: _rReturnValue = 0; WARN_LOG(COMMANDPROCESSOR, "Read from PERF0_H: %04x", _rReturnValue); break;
-
-	case CP_PERF1_L: _rReturnValue = 0; WARN_LOG(COMMANDPROCESSOR, "Read from PERF1_L: %04x", _rReturnValue); break;
-	case CP_PERF1_H: _rReturnValue = 0; WARN_LOG(COMMANDPROCESSOR, "Read from PERF1_H: %04x", _rReturnValue); break;
-
-	case CP_PERF2_L: _rReturnValue = 0; WARN_LOG(COMMANDPROCESSOR, "Read from PERF2_L: %04x", _rReturnValue); break;
-	case CP_PERF2_H: _rReturnValue = 0; WARN_LOG(COMMANDPROCESSOR, "Read from PERF2_H: %04x", _rReturnValue); break;
-
-	case CP_PERF3_L: _rReturnValue = 0; WARN_LOG(COMMANDPROCESSOR, "Read from PERF3_L: %04x", _rReturnValue); break;
-	case CP_PERF3_H: _rReturnValue = 0; WARN_LOG(COMMANDPROCESSOR, "Read from PERF3_H: %04x", _rReturnValue); break;
-
-//	case 0x64:
-//		return 4; //Number of clocks per vertex.. todo: calculate properly
-
-		//add all the other regs here? are they ever read?
-	default:
-		WARN_LOG(COMMANDPROCESSOR, "(r16) unknown CP reg @ %08x", _Address);
-		_rReturnValue = 0;
-		return;
-	}
-}
-
-bool AllowIdleSkipping()
-{
-	return !SConfig::GetInstance().m_LocalCoreStartupParameter.bUseDualCore || (!m_CPCtrlReg.CPIntEnable && !m_CPCtrlReg.BPEnable);
-}
-
-void Write16(const u16 _Value, const u32 _Address)
-{
-	INFO_LOG(COMMANDPROCESSOR, "(write16): 0x%04x @ 0x%08x",_Value,_Address);
-
-	//Spin until queue is empty - it WILL become empty because this is the only thread
-	//that submits data
-
-	if (SConfig::GetInstance().m_LocalCoreStartupParameter.bUseDualCore)
-	{
-		// Force complete fifo flush if we attempt to set/reset the fifo (API GXSetGPFifo or equivalent)
-		// It's kind of an API hack but it works for lots of games... and I hope it's the same way for every games.
-		// TODO: HLE for GX fifo's APIs?
-		// Here is the hack:
-		// - if (attempt to overwrite CTRL_REGISTER by 0x0000)
-		//		// then we assume CPReadWriteDistance will be overwrited very soon.
-		//		- if (fifo is not empty)
-		//			// (not 100% sure): shouln't happen unless PPC think having trouble with the sync
-		//			// and it attempt a fifo recovery (look for PI_FIFO_RESET in log).
-		//			// If we want to emulate self fifo recovery we need proper GX metrics emulation... yeah sure :p
-		//			- spin until fifo is empty
-		// - else
-		//		- normal write16
-
-		if (((_Address&0xFFF) == CTRL_REGISTER) && (_Value == 0)) // API hack
-		{
-			// weird MP1 redo that right after linking fifo with GP... hmmm
-			/*_dbg_assert_msg_(COMMANDPROCESSOR, fifo.CPReadWriteDistance == 0,
-				"WTF! Something went wrong with GP/PPC the sync! -> CPReadWriteDistance: 0x%08X\n"
-				" - The fifo is not empty but we are going to lock it anyway.\n"
-				" - \"Normaly\", this is due to fifo-hang-so-lets-attempt-recovery.\n"
-				" - The bad news is dolphin don't support special recovery features like GXfifo's metric yet.\n"
-				" - The good news is, the time you read that message, the fifo should be empty now :p\n"
-				" - Anyway, fifo flush will be forced if you press OK and dolphin might continue to work...\n"
-				" - We aren't betting on that :)", fifo.CPReadWriteDistance);
-                        */
-			DEBUG_LOG(COMMANDPROCESSOR, "*********************** GXSetGPFifo very soon? ***********************");
-			// (mb2) We don't sleep here since it could be a perf issue for super monkey ball (yup only this game IIRC)
-			// Touching that game is a no-go so I don't want to take the risk :p
-			while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance > 0 && !(fifo.bFF_BPEnable && fifo.bFF_Breakpoint) )
-				s_fifoIdleEvent.MsgWait();
-		}
-	}
-
-	switch (_Address & 0xFFF)
-	{
-	case STATUS_REGISTER:
-		{
-			UCPStatusReg tmpStatus(_Value);
-
-			// set the flags to "all is okay"
-			m_CPStatusReg.OverflowHiWatermark	= 0;
-			m_CPStatusReg.UnderflowLoWatermark	= 0;
-
-			// TOCHECK (mb2): could BP irq be cleared here too?
-			//if (tmpStatus.Breakpoint!=m_CPStatusReg.Breakpoint) _asm int 3
-			// breakpoint
-			/*if (tmpStatus.Breakpoint)
-			{
-				m_CPStatusReg.Breakpoint = 0;
-			}
-			//fifo.bFF_Breakpoint = m_CPStatusReg.Breakpoint;
-			fifo.bFF_Breakpoint = m_CPStatusReg.Breakpoint	? true : false;
-			//LOG(COMMANDPROCESSOR,"fifo.bFF_Breakpoint : %i",fifo.bFF_Breakpoint);
-			*/
-
-			// update interrupts
-			UpdateInterrupts();
-
-			INFO_LOG(COMMANDPROCESSOR,"\t write to STATUS_REGISTER : %04x", _Value);
-		}
-		break;
-
-	case CTRL_REGISTER:
-		{
-			UCPCtrlReg tmpCtrl(_Value);
-
-			Common::AtomicStore(fifo.bFF_GPReadEnable,	tmpCtrl.GPReadEnable);
-			Common::AtomicStore(fifo.bFF_GPLinkEnable,	tmpCtrl.GPLinkEnable);
-			Common::AtomicStore(fifo.bFF_BPEnable,		tmpCtrl.BPEnable);
-
-			// TOCHECK (mb2): could BP irq be cleared with w16 to STATUS_REGISTER?
-			// funny hack: eg in MP1 if we disable the clear breakpoint ability by commenting this block
-			// the game is of course faster but looks stable too.
-			// Well, the hack is more stable than the "proper" way actualy :p ... it breaks MP2 when ship lands
-			// So I let the hack for now.
-			// Checkmate re-enabled it, so please test
-			// TODO (mb2): fix this!
-			
-			// BP interrupt is cleared here
-
-			//if (m_CPCtrlReg.CPIntEnable && !tmpCtrl.Hex) // falling edge
-			// raising edge or falling egde
-			if ((!m_CPCtrlReg.CPIntEnable && tmpCtrl.CPIntEnable) || (m_CPCtrlReg.CPIntEnable && !tmpCtrl.Hex)) 
-			{
-				m_CPStatusReg.Breakpoint = 0;
-				Common::AtomicStore(fifo.bFF_Breakpoint, 0);
-			}
-
-			m_CPCtrlReg.Hex = tmpCtrl.Hex;
-			UpdateInterrupts();
-			DEBUG_LOG(COMMANDPROCESSOR,"\t write to CTRL_REGISTER : %04x", _Value);
-			DEBUG_LOG(COMMANDPROCESSOR, "\t GPREAD %s | CPULINK %s | BP %s || CPIntEnable %s | OvF %s | UndF %s"
-				, fifo.bFF_GPReadEnable ?				"ON" : "OFF"
-				, fifo.bFF_GPLinkEnable ?				"ON" : "OFF"
-				, fifo.bFF_BPEnable ?					"ON" : "OFF"
-				, m_CPCtrlReg.CPIntEnable ?				"ON" : "OFF"
-				, m_CPCtrlReg.FifoOverflowIntEnable ?	"ON" : "OFF"
-				, m_CPCtrlReg.FifoUnderflowIntEnable ?	"ON" : "OFF"
-				);
-
-		}
-		break;
-
-	case PERF_SELECT:
-		{
-			WARN_LOG(COMMANDPROCESSOR, "write to PERF_SELECT: %04x", _Value);
-			// Seems to select which set of perf counters should be exposed.
-		}
-		break;
-
-	case CLEAR_REGISTER:
-		{
-			// ????
-			UCPClearReg tmpClearReg(_Value);			
-			m_CPClearReg.Hex = 0;
-			INFO_LOG(COMMANDPROCESSOR,"\t write to CLEAR_REGISTER : %04x",_Value);
-		}		
-		break;
-
-	// Fifo Registers
-	case FIFO_TOKEN_REGISTER:
-		m_tokenReg = _Value;
-		DEBUG_LOG(COMMANDPROCESSOR,"\t write to FIFO_TOKEN_REGISTER : %04x", _Value);
-		break;
-
-	case FIFO_BASE_LO:			
-		WriteLow ((u32 &)fifo.CPBase, _Value);
-		fifo.CPBase &= 0xFFFFFFE0;
-		DEBUG_LOG(COMMANDPROCESSOR,"\t write to FIFO_BASE_LO : %04x", _Value);
-		break;
-	case FIFO_BASE_HI:			
-		WriteHigh((u32 &)fifo.CPBase, _Value);
-		fifo.CPBase &= 0xFFFFFFE0;
-		DEBUG_LOG(COMMANDPROCESSOR,"\t write to FIFO_BASE_HI : %04x", _Value);
-		break;
-	case FIFO_END_LO:			
-		WriteLow ((u32 &)fifo.CPEnd,  _Value);
-		fifo.CPEnd &= 0xFFFFFFE0;
-		DEBUG_LOG(COMMANDPROCESSOR,"\t write to FIFO_END_LO : %04x", _Value);
-		break;
-	case FIFO_END_HI:			
-		WriteHigh((u32 &)fifo.CPEnd,  _Value);
-		fifo.CPEnd &= 0xFFFFFFE0;
-		DEBUG_LOG(COMMANDPROCESSOR,"\t write to FIFO_END_HI : %04x", _Value);
-		break;
-
-	// Hm. Should we really & these with FFFFFFE0?
-	// (mb2): never seen 32B not aligned values for those following regs.
-	// fifo.CPEnd is the only value that could be not 32B aligned so far.
-	case FIFO_WRITE_POINTER_LO:
-		WriteLow ((u32 &)fifo.CPWritePointer, _Value); fifo.CPWritePointer &= 0xFFFFFFE0;
-		DEBUG_LOG(COMMANDPROCESSOR,"\t write to FIFO_WRITE_POINTER_LO : %04x", _Value);
-		break;
-	case FIFO_WRITE_POINTER_HI:
-		WriteHigh((u32 &)fifo.CPWritePointer, _Value); fifo.CPWritePointer &= 0xFFFFFFE0;
-		DEBUG_LOG(COMMANDPROCESSOR,"\t write to FIFO_WRITE_POINTER_HI : %04x", _Value);
-		break;
-	case FIFO_READ_POINTER_LO:
-		WriteLow ((u32 &)fifo.CPReadPointer, _Value); fifo.CPReadPointer &= 0xFFFFFFE0;
-		DEBUG_LOG(COMMANDPROCESSOR,"\t write to FIFO_READ_POINTER_LO : %04x", _Value);
-		break;
-	case FIFO_READ_POINTER_HI:
-		WriteHigh((u32 &)fifo.CPReadPointer, _Value); fifo.CPReadPointer &= 0xFFFFFFE0;
-		DEBUG_LOG(COMMANDPROCESSOR,"\t write to FIFO_READ_POINTER_HI : %04x", _Value);
-		break;
-
-	case FIFO_HI_WATERMARK_LO:
-		WriteLow ((u32 &)fifo.CPHiWatermark, _Value);
-		DEBUG_LOG(COMMANDPROCESSOR,"\t write to FIFO_HI_WATERMARK_LO : %04x", _Value);
-		break;
-	case FIFO_HI_WATERMARK_HI:
-		WriteHigh((u32 &)fifo.CPHiWatermark, _Value);
-		DEBUG_LOG(COMMANDPROCESSOR,"\t write to FIFO_HI_WATERMARK_HI : %04x", _Value);
-		break;
-	case FIFO_LO_WATERMARK_LO:
-		WriteLow ((u32 &)fifo.CPLoWatermark, _Value);
-		DEBUG_LOG(COMMANDPROCESSOR,"\t write to FIFO_LO_WATERMARK_LO : %04x", _Value);
-		break;
-	case FIFO_LO_WATERMARK_HI:
-		WriteHigh((u32 &)fifo.CPLoWatermark, _Value);
-		DEBUG_LOG(COMMANDPROCESSOR,"\t write to FIFO_LO_WATERMARK_HI : %04x", _Value);
-		break;
-
-	case FIFO_BP_LO:			
-		WriteLow ((u32 &)fifo.CPBreakpoint,	_Value);
-		DEBUG_LOG(COMMANDPROCESSOR,"write to FIFO_BP_LO : %04x", _Value);
-		break;
-	case FIFO_BP_HI:			
-		WriteHigh((u32 &)fifo.CPBreakpoint,	_Value);
-		DEBUG_LOG(COMMANDPROCESSOR,"write to FIFO_BP_HI : %04x", _Value);
-		break;
-
-	// Super monkey try to overwrite CPReadWriteDistance by an old saved RWD value. Which is lame for us.
-	// hack: We have to force CPU to think fifo is alway empty and on idle.
-	// When we fall here CPReadWriteDistance should be always null and the game should always want to overwrite it by 0.
-	// So, we can skip it.
-	case FIFO_RW_DISTANCE_HI:
-		//WriteHigh((u32 &)fifo.CPReadWriteDistance, _Value);
-		DEBUG_LOG(COMMANDPROCESSOR,"try to write to FIFO_RW_DISTANCE_HI : %04x", _Value);
-		break;
-	case FIFO_RW_DISTANCE_LO:
-		//WriteLow((u32 &)fifo.CPReadWriteDistance, _Value);
-		DEBUG_LOG(COMMANDPROCESSOR,"try to write to FIFO_RW_DISTANCE_LO : %04x", _Value);
-		break;
-	default:
-		WARN_LOG(COMMANDPROCESSOR, "(w16) unknown CP reg write %04x @ %08x", _Value, _Address);
-	}
-
-	// TODO(mb2): better. Check if it help: avoid CPReadPointer overwrites when stupidly done like in Super Monkey Ball
-	if ((!fifo.bFF_GPReadEnable && fifo.CPReadIdle) || !SConfig::GetInstance().m_LocalCoreStartupParameter.bUseDualCore) // TOCHECK(mb2): check again if thread safe?
-		UpdateFifoRegister();
-}
-
-void Read32(u32& _rReturnValue, const u32 _Address)
-{
-	_rReturnValue = 0;
-	_dbg_assert_msg_(COMMANDPROCESSOR, 0, "Read32 from CommandProccessor at 0x%08x", _Address);
-}
-
-void Write32(const u32 _Data, const u32 _Address)
-{
-	_dbg_assert_msg_(COMMANDPROCESSOR, 0, "Write32 at CommandProccessor at 0x%08x", _Address);
-}
-
-void STACKALIGN GatherPipeBursted()
-{
-	// if we aren't linked, we don't care about gather pipe data
-	if (!fifo.bFF_GPLinkEnable)
-		return;
-
-	if (SConfig::GetInstance().m_LocalCoreStartupParameter.bUseDualCore)
-	{
-		// update the fifo-pointer
-		fifo.CPWritePointer += GPFifo::GATHER_PIPE_SIZE;
-		if (fifo.CPWritePointer >= fifo.CPEnd)
-			fifo.CPWritePointer = fifo.CPBase;
-        Common::AtomicAdd(fifo.CPReadWriteDistance, GPFifo::GATHER_PIPE_SIZE);
-
-		// High watermark overflow handling (hacked way)
-		if (fifo.CPReadWriteDistance > fifo.CPHiWatermark)
-		{
-			// we should raise an Ov interrupt for an accurate fifo emulation and let PPC deal with it.
-			// But it slowdowns things because of if(interrupt blah blah){} blocks for each 32B fifo transactions.
-			// CPU would be a bit more loaded too by its interrupt handling...
-			// Eather way, CPU would have the ability to resume another thread.
-			// To be clear: this spin loop is like a critical section spin loop in the emulated GX thread hence "hacked way"
-
-			// Yes, in real life, the only purpose of the low watermark interrupt is just for cooling down OV contention.
-			// - @ game start -> watermark init: Overflow enabled, Underflow disabled
-			// - if (OV is raised)
-			//		- CPU stop to write to fifo
-			//		- enable Underflow interrupt (this only happens if OV is raised)
-			//		- do other things
-			// - if (Underflow is raised (implicite: AND if an OV has been raised))
-			//		- CPU can write to fifo
-			//		- disable Underflow interrupt
-
-			INFO_LOG(COMMANDPROCESSOR, "(GatherPipeBursted): CPHiWatermark reached");
-			// Wait for GPU to catch up
-			while (!(fifo.bFF_BPEnable && fifo.bFF_Breakpoint) && fifo.CPReadWriteDistance > fifo.CPLoWatermark)
-				s_fifoIdleEvent.MsgWait();
-		}
-		// check if we are in sync
-		_assert_msg_(COMMANDPROCESSOR, fifo.CPWritePointer	== ProcessorInterface::Fifo_CPUWritePointer, "FIFOs linked but out of sync");
-		_assert_msg_(COMMANDPROCESSOR, fifo.CPBase			== ProcessorInterface::Fifo_CPUBase, "FIFOs linked but out of sync");
-		_assert_msg_(COMMANDPROCESSOR, fifo.CPEnd			== ProcessorInterface::Fifo_CPUEnd, "FIFOs linked but out of sync");
-	}
-	else
-	{
-		fifo.CPWritePointer += GPFifo::GATHER_PIPE_SIZE;
-		if (fifo.CPWritePointer >= fifo.CPEnd)
-			fifo.CPWritePointer = fifo.CPBase;
-		// check if we are in sync
-		_assert_msg_(COMMANDPROCESSOR, fifo.CPWritePointer	== ProcessorInterface::Fifo_CPUWritePointer, "FIFOs linked but out of sync");
-		_assert_msg_(COMMANDPROCESSOR, fifo.CPBase			== ProcessorInterface::Fifo_CPUBase, "FIFOs linked but out of sync");
-		_assert_msg_(COMMANDPROCESSOR, fifo.CPEnd			== ProcessorInterface::Fifo_CPUEnd, "FIFOs linked but out of sync");
-
-		UpdateFifoRegister();
-	}
-}
-
-
-// This is mostly used in single core mode
-void CatchUpGPU()
-{
-	// check if we are able to run this buffer
-	if ((fifo.bFF_GPReadEnable) && !(fifo.bFF_BPEnable && fifo.bFF_Breakpoint))
-	{
-		// HyperIris: Memory::GetPointer is an expensive call, call it less, run faster
-		u8 *ptr = Memory::GetPointer(fifo.CPReadPointer);
-		// HyperIris: point out by magumagu, GetVideo() is a bottleneck, so move it out of loop;
-		Common::PluginVideo * pVideo = CPluginManager::GetInstance().GetVideo();
-
-		while (fifo.CPReadWriteDistance > 0)
-		{
-			// check if we are on a breakpoint
-			if (fifo.bFF_BPEnable)
-			{
-				//MessageBox(0,"Breakpoint enabled",0,0);
-				if ((fifo.CPReadPointer & ~0x1F) == (fifo.CPBreakpoint & ~0x1F))
-				{
-					//_assert_msg_(POWERPC,0,"BP: %08x",fifo.CPBreakpoint);
-					//LOG(COMMANDPROCESSOR,"!!! BP irq raised");
-					fifo.bFF_Breakpoint = 1;
-					m_CPStatusReg.Breakpoint = 1;
-					UpdateInterrupts();
-					break;
-				}
-			}
-
-			// read the data and send it to the VideoPlugin
-			fifo.CPReadPointer += 32;
-			// We are going to do FP math on the main thread so have to save the current state
-			SaveSSEState();
-			LoadDefaultSSEState();
-			pVideo->Video_SendFifoData(ptr,32);
-			LoadSSEState();
-			// adjust
-			ptr += 32;
-
-			fifo.CPReadWriteDistance -= 32;
-
-			// increase the ReadPtr
-			if (fifo.CPReadPointer >= fifo.CPEnd)
-			{
-				fifo.CPReadPointer = fifo.CPBase;
-				// adjust, take care
-				ptr = Memory::GetPointer(fifo.CPReadPointer);
-				INFO_LOG(COMMANDPROCESSOR, "BUFFER LOOP");
-			}
-		}
-	}
-}
-
-// __________________________________________________________________________________________________
-// !!! Temporary (I hope): re-used in DC mode
-// UpdateFifoRegister
-// It's no problem if the gfx falls behind a little bit. Better make sure to stop the cpu thread
-// when the distance is way huge, though.
-// So:
-// CPU thread
-///  0. Write data (done before entering this)
-//   1. Compute distance
-//   2. If distance > threshold, sleep and goto 1
-// GPU thread
-//   1. Compute distance
-//   2. If distance < threshold, sleep and goto 1 (or wait for trigger?)
-//   3. Read and use a bit of data, goto 1
-void UpdateFifoRegister()
-{
-	// update the distance
-	int wp = fifo.CPWritePointer;
-	int rp = fifo.CPReadPointer;
-	int dist;
-	if (wp >= rp)
-		dist = wp - rp;
-	else
-		dist = (wp - fifo.CPBase) + (fifo.CPEnd - rp);
-
-	Common::AtomicStore(fifo.CPReadWriteDistance, dist);
-
-	if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bUseDualCore)
-		CatchUpGPU();
-}
-
-void UpdateInterrupts()
-{
-	if (m_CPCtrlReg.CPIntEnable &&
-		(fifo.bFF_BPEnable && fifo.bFF_Breakpoint))
-	{
-		ProcessorInterface::SetInterrupt(ProcessorInterface::INT_CAUSE_CP, true);
-	}
-	else
-	{
-		ProcessorInterface::SetInterrupt(ProcessorInterface::INT_CAUSE_CP, false);
-	}
-}
-
-void UpdateInterruptsFromVideoPlugin()
-{
-	if (fifo.bFF_Breakpoint) // implicit since only BP trigger (see fifo.cpp) can call this
-		m_CPStatusReg.Breakpoint = 1;
-	CoreTiming::ScheduleEvent_Threadsafe(0, et_UpdateInterrupts);
-}
-
-void SetFifoIdleFromVideoPlugin()
-{
-	s_fifoIdleEvent.Set();
-}
-
-} // end of namespace CommandProcessor
+// Copyright (C) 2003 Dolphin Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official SVN repository and contact information can be found at
+// http://code.google.com/p/dolphin-emu/
+
+
+// NOTES (mb2):
+
+// * GP/CPU sync can be done by several way:
+// - MP1 use BP (breakpoint) in movie-menus and mostly PEtoken in 3D
+// - ZWW as Crazy Taxi: PEfinish (GXSetDrawDone)
+// - SMS: BP, PEToken, PEfinish
+// - ZTP: seems to use PEfinish only
+// - Animal Crossing: PEfinish at start but there's a bug...
+//		There's tons of HiWmk/LoWmk ping pong -> Another sync fashion?
+// - Super Monkey Ball Adventures: PEToken. Oddity: read&check-PEToken-value-loop stays
+//		in its JITed block (never fall in Advance() until the game-watchdog's stuff).
+//		That's why we can't let perform the AdvanceCallBack as usual.
+//		The PEToken is volatile now and in the fifo struct.
+// - Super Monkey Ball: PEFinish. This game has the lamest way to deal with fifo sync for our MT's stuff.
+//		A hack is mandatory. DONE and should be ok for other games.
+
+// *What I guess (thx to asynchronous DualCore mode):
+// PPC have a frame-finish watchdog. Handled by system timming stuff like the decrementer.
+// (DualCore mode): I have observed, after ZTP logos, a fifo-recovery start when DECREMENTER_EXCEPTION is throwned.
+// The frame setting (by GP) took too much time and didn't finish properly due to this watchdog.
+// Faster GX plugins required, indeed :p
+
+// * BPs are needed for some game GP/CPU sync.
+// But it could slowdown (MP1 at least) because our GP in DC is faster than "expected" in some area.
+// eg: in movie-menus in MP1, BP are reached quickly.
+// The bad thing is that involve too much PPC work (int ack, lock GP, reset BP, new BP addr, unlock BP...) hence the slowdown.
+// Anyway, emulation should more accurate like this and it emulate some sort of better load balancing.
+// Eather way in those area a more accurate GP timing could be done by slowing down the GP or something less stupid.
+// Not functional and not used atm (breaks MP2).
+
+// * funny, in revs before those with this note, BP irq wasn't cleared (a bug indeed) and MP1 menus was faster.
+// BP irq was raised and ack just once but never cleared. However it's sufficient for MP1 to work.
+// This hack is used atm. Known BPs handling doesn't work well (btw, BP irq clearing might be done by CPIntEnable raising edge).
+// The hack seems to be responsible of the movie stutering in MP1 menus.
+
+// TODO (mb2):
+// * raise watermark Ov/Un irq: POINTLESS since emulated GP timings can't be accuratly set.
+//   Only 3 choices IMHO for a correct emulated load balancing in DC mode:
+//		- make our own GP watchdog hack that can lock CPU if GP too slow. STARTED
+//		- hack directly something in PPC timings (dunno how)
+//		- boost GP so we can consider it as infinitely fast compared to CPU.
+// * raise ReadIdle/CmdIdle flags and observe behaviour of MP1 & ZTP (at least)
+// * Clean useless comments and debug stuff in Read16, Write16, GatherPipeBursted when sync will be fixed for DC
+// * (reminder) do the same in:
+//		PeripheralInterface.cpp, PixelEngine.cpp, OGL->BPStructs.cpp, fifo.cpp... ok just check change log >>
+
+// TODO
+// * Kick GPU from dispatcher, not from writes
+// * Thunking framework
+// * Cleanup of messy now unnecessary safety code in jit
+
+#include "Common.h"
+#include "VideoCommon.h"
+#include "MathUtil.h"
+#include "Thread.h"
+#include "Atomic.h"
+
+#include "Fifo.h"
+#include "ChunkFile.h"
+#include "CommandProcessor.h"
+
+
+namespace CommandProcessor
+{
+
+// look for 1002 verts, breakpoint there, see why next draw is flushed
+// TODO(ector): Warn on bbox read/write
+
+// Fifo Status Register
+union UCPStatusReg
+{
+	struct
+	{
+		unsigned OverflowHiWatermark	:	1;
+		unsigned UnderflowLoWatermark	:	1;
+		unsigned ReadIdle				:	1;
+		unsigned CommandIdle			:	1;
+		unsigned Breakpoint				:	1;
+		unsigned						:	11;
+	};
+	u16 Hex;
+	UCPStatusReg() {Hex = 0; }
+	UCPStatusReg(u16 _hex) {Hex = _hex; }
+};
+
+// Fifo Control Register
+union UCPCtrlReg
+{
+	struct
+	{
+		unsigned GPReadEnable			:	1;
+		unsigned CPIntEnable			:	1;
+		unsigned FifoOverflowIntEnable	:	1;
+		unsigned FifoUnderflowIntEnable	:	1;
+		unsigned GPLinkEnable			:	1;
+		unsigned BPEnable				:	1;
+		unsigned						:	10;
+	};
+	u16 Hex;
+	UCPCtrlReg() {Hex = 0; }
+	UCPCtrlReg(u16 _hex) {Hex = _hex; }
+};
+
+// Fifo Control Register
+union UCPClearReg
+{
+	struct
+	{
+		unsigned ClearFifoOverflow		:	1;
+		unsigned ClearFifoUnderflow		:	1;
+		unsigned ClearMetrices			:	1;
+		unsigned						:	13;
+	};
+	u16 Hex;
+	UCPClearReg() {Hex = 0; }
+	UCPClearReg(u16 _hex) {Hex = _hex; }
+};
+
+// STATE_TO_SAVE
+// variables
+UCPStatusReg m_CPStatusReg;
+UCPCtrlReg	m_CPCtrlReg;
+UCPClearReg	m_CPClearReg;
+
+int m_bboxleft;
+int m_bboxtop;
+int m_bboxright;
+int m_bboxbottom;
+u16 m_tokenReg;
+
+SCPFifoStruct fifo; //This one is shared between gfx thread and emulator thread
+static u32 fake_GPWatchdogLastToken = 0;
+static Common::Event s_fifoIdleEvent;
+
+enum
+{
+	GATHER_PIPE_SIZE = 32,
+    INT_CAUSE_CP =  0x800
+};
+
+void DoState(PointerWrap &p)
+{
+	p.Do(m_CPStatusReg);
+	p.Do(m_CPCtrlReg);
+	p.Do(m_CPClearReg);
+	p.Do(m_bboxleft);
+	p.Do(m_bboxtop);
+	p.Do(m_bboxright);
+	p.Do(m_bboxbottom);
+	p.Do(m_tokenReg);
+	p.Do(fifo);
+}
+
+// function
+void UpdateFifoRegister();
+void UpdateInterrupts();
+
+//inline void WriteLow (u32& _reg, u16 lowbits)  {_reg = (_reg & 0xFFFF0000) | lowbits;}
+//inline void WriteHigh(u32& _reg, u16 highbits) {_reg = (_reg & 0x0000FFFF) | ((u32)highbits << 16);}
+inline void WriteLow (volatile u32& _reg, u16 lowbits)  {Common::AtomicStore(_reg,(_reg & 0xFFFF0000) | lowbits);}
+inline void WriteHigh(volatile u32& _reg, u16 highbits) {Common::AtomicStore(_reg,(_reg & 0x0000FFFF) | ((u32)highbits << 16));}
+
+inline u16 ReadLow  (u32 _reg)  {return (u16)(_reg & 0xFFFF);}
+inline u16 ReadHigh (u32 _reg)  {return (u16)(_reg >> 16);}
+
+int et_UpdateInterrupts;
+
+// for GP watchdog hack
+void IncrementGPWDToken()
+{
+    Common::AtomicIncrement(fifo.Fake_GPWDToken);
+}
+
+// Check every FAKE_GP_WATCHDOG_PERIOD if a PE-frame-finish occured
+// if not then lock CPUThread until GP finish a frame.
+void WaitForFrameFinish()
+{
+	while ((fake_GPWatchdogLastToken == fifo.Fake_GPWDToken) && fifo.bFF_GPReadEnable && (fifo.CPReadWriteDistance > 0) && !(fifo.bFF_BPEnable && fifo.bFF_Breakpoint))
+		s_fifoIdleEvent.MsgWait();
+	
+	fake_GPWatchdogLastToken = fifo.Fake_GPWDToken;
+}
+
+
+void UpdateInterrupts_Wrapper(u64 userdata, int cyclesLate)
+{
+	UpdateInterrupts();
+}
+
+void Init()
+{
+	m_CPStatusReg.Hex = 0;
+	m_CPStatusReg.CommandIdle = 1;
+	m_CPStatusReg.ReadIdle = 1;
+
+	m_CPCtrlReg.Hex = 0;
+
+	m_bboxleft = 0;
+	m_bboxtop  = 0;
+	m_bboxright = 640;
+	m_bboxbottom = 480;
+
+	m_tokenReg = 0;
+	
+	fake_GPWatchdogLastToken = 0;
+
+	memset(&fifo,0,sizeof(fifo));
+	fifo.CPCmdIdle  = 1 ;
+	fifo.CPReadIdle = 1;
+
+	s_fifoIdleEvent.Init();
+
+    et_UpdateInterrupts = g_VideoInitialize.pRegisterEvent("UpdateInterrupts", UpdateInterrupts_Wrapper);
+}
+
+
+
+void Shutdown()
+{
+	s_fifoIdleEvent.Shutdown();
+}
+
+void Read16(u16& _rReturnValue, const u32 _Address)
+{
+	DEBUG_LOG(COMMANDPROCESSOR, "(r): 0x%08x", _Address);
+	switch (_Address & 0xFFF)
+	{
+	case STATUS_REGISTER:
+		//TODO?: if really needed
+		//m_CPStatusReg.CommandIdle = fifo.CPCmdIdle;
+		// uncomment: change a bit the behaviour MP1. Not very useful though
+		m_CPStatusReg.ReadIdle = fifo.CPReadIdle;
+		//m_CPStatusReg.CommandIdle = fifo.CPReadIdle;
+
+		// hack: CPU will always believe fifo is empty and on idle
+		//m_CPStatusReg.ReadIdle = 1;
+		//m_CPStatusReg.CommandIdle = 1;
+		
+		_rReturnValue = m_CPStatusReg.Hex;
+		INFO_LOG(COMMANDPROCESSOR, "\t iBP %s | fREADIDLE %s | fCMDIDLE %s | iOvF %s | iUndF %s"
+			, m_CPStatusReg.Breakpoint ?			"ON" : "OFF"
+			, m_CPStatusReg.ReadIdle ?				"ON" : "OFF"
+			, m_CPStatusReg.CommandIdle ?			"ON" : "OFF"
+			, m_CPStatusReg.OverflowHiWatermark ?	"ON" : "OFF"
+			, m_CPStatusReg.UnderflowLoWatermark ?	"ON" : "OFF"
+				);
+		return;
+
+	case CTRL_REGISTER:		_rReturnValue = m_CPCtrlReg.Hex; return;
+	case CLEAR_REGISTER:	_rReturnValue = m_CPClearReg.Hex; return;
+
+	case FIFO_TOKEN_REGISTER:		_rReturnValue = m_tokenReg; return;
+	case FIFO_BOUNDING_BOX_LEFT:	_rReturnValue = m_bboxleft; return;
+	case FIFO_BOUNDING_BOX_RIGHT:	_rReturnValue = m_bboxright; return;
+	case FIFO_BOUNDING_BOX_TOP:		_rReturnValue = m_bboxtop; return;
+	case FIFO_BOUNDING_BOX_BOTTOM:	_rReturnValue = m_bboxbottom; return;
+
+	case FIFO_BASE_LO:			_rReturnValue = ReadLow (fifo.CPBase); return;
+	case FIFO_BASE_HI:			_rReturnValue = ReadHigh(fifo.CPBase); return;
+	case FIFO_END_LO:			_rReturnValue = ReadLow (fifo.CPEnd);  return;
+	case FIFO_END_HI:			_rReturnValue = ReadHigh(fifo.CPEnd);  return;
+	case FIFO_HI_WATERMARK_LO:	_rReturnValue = ReadLow (fifo.CPHiWatermark); return;
+	case FIFO_HI_WATERMARK_HI:	_rReturnValue = ReadHigh(fifo.CPHiWatermark); return;
+	case FIFO_LO_WATERMARK_LO:	_rReturnValue = ReadLow (fifo.CPLoWatermark); return;
+	case FIFO_LO_WATERMARK_HI:	_rReturnValue = ReadHigh(fifo.CPLoWatermark); return;
+
+	// TODO: cases cleanup
+	case FIFO_RW_DISTANCE_LO:
+		//_rReturnValue = ReadLow (fifo.CPReadWriteDistance);
+		// hack: CPU will always believe fifo is empty and on idle
+		_rReturnValue = 0;
+		DEBUG_LOG(COMMANDPROCESSOR, "read FIFO_RW_DISTANCE_LO : %04x", _rReturnValue);
+		return;
+	case FIFO_RW_DISTANCE_HI:
+		//_rReturnValue = ReadHigh(fifo.CPReadWriteDistance);
+		// hack: CPU will always believe fifo is empty and on idle
+		_rReturnValue = 0;
+		DEBUG_LOG(COMMANDPROCESSOR, "read FIFO_RW_DISTANCE_HI : %04x", _rReturnValue);
+		return;
+	case FIFO_WRITE_POINTER_LO:
+		_rReturnValue = ReadLow (fifo.CPWritePointer);
+		DEBUG_LOG(COMMANDPROCESSOR, "read FIFO_WRITE_POINTER_LO : %04x", _rReturnValue);
+		return;
+	case FIFO_WRITE_POINTER_HI:
+		_rReturnValue = ReadHigh(fifo.CPWritePointer);
+		DEBUG_LOG(COMMANDPROCESSOR, "read FIFO_WRITE_POINTER_HI : %04x", _rReturnValue);
+		return;
+	case FIFO_READ_POINTER_LO:
+		//_rReturnValue = ReadLow (fifo.CPReadPointer);
+		// hack: CPU will always believe fifo is empty and on idle
+		_rReturnValue = ReadLow (fifo.CPWritePointer);
+		DEBUG_LOG(COMMANDPROCESSOR, "read FIFO_READ_POINTER_LO : %04x", _rReturnValue);
+		return;
+	case FIFO_READ_POINTER_HI:
+		//_rReturnValue = ReadHigh(fifo.CPReadPointer);
+		// hack: CPU will always believe fifo is empty and on idle
+		_rReturnValue = ReadHigh(fifo.CPWritePointer);
+		DEBUG_LOG(COMMANDPROCESSOR, "read FIFO_READ_POINTER_HI : %04x", _rReturnValue);
+		return;
+
+	case FIFO_BP_LO: _rReturnValue = ReadLow (fifo.CPBreakpoint); return;
+	case FIFO_BP_HI: _rReturnValue = ReadHigh(fifo.CPBreakpoint); return;
+
+	case CP_PERF0_L: _rReturnValue = 0; WARN_LOG(COMMANDPROCESSOR, "Read from PERF0_L: %04x", _rReturnValue); break;  // XF counters
+	case CP_PERF0_H: _rReturnValue = 0; WARN_LOG(COMMANDPROCESSOR, "Read from PERF0_H: %04x", _rReturnValue); break;
+
+	case CP_PERF1_L: _rReturnValue = 0; WARN_LOG(COMMANDPROCESSOR, "Read from PERF1_L: %04x", _rReturnValue); break;
+	case CP_PERF1_H: _rReturnValue = 0; WARN_LOG(COMMANDPROCESSOR, "Read from PERF1_H: %04x", _rReturnValue); break;
+
+	case CP_PERF2_L: _rReturnValue = 0; WARN_LOG(COMMANDPROCESSOR, "Read from PERF2_L: %04x", _rReturnValue); break;
+	case CP_PERF2_H: _rReturnValue = 0; WARN_LOG(COMMANDPROCESSOR, "Read from PERF2_H: %04x", _rReturnValue); break;
+
+	case CP_PERF3_L: _rReturnValue = 0; WARN_LOG(COMMANDPROCESSOR, "Read from PERF3_L: %04x", _rReturnValue); break;
+	case CP_PERF3_H: _rReturnValue = 0; WARN_LOG(COMMANDPROCESSOR, "Read from PERF3_H: %04x", _rReturnValue); break;
+
+//	case 0x64:
+//		return 4; //Number of clocks per vertex.. todo: calculate properly
+
+		//add all the other regs here? are they ever read?
+	default:
+		WARN_LOG(COMMANDPROCESSOR, "(r16) unknown CP reg @ %08x", _Address);
+		_rReturnValue = 0;
+		return;
+	}
+}
+
+bool AllowIdleSkipping()
+{
+	return !g_VideoInitialize.bUseDualCore || (!m_CPCtrlReg.CPIntEnable && !m_CPCtrlReg.BPEnable);
+}
+
+void Write16(const u16 _Value, const u32 _Address)
+{
+	INFO_LOG(COMMANDPROCESSOR, "(write16): 0x%04x @ 0x%08x",_Value,_Address);
+
+	//Spin until queue is empty - it WILL become empty because this is the only thread
+	//that submits data
+
+	if (g_VideoInitialize.bUseDualCore)
+	{
+		// Force complete fifo flush if we attempt to set/reset the fifo (API GXSetGPFifo or equivalent)
+		// It's kind of an API hack but it works for lots of games... and I hope it's the same way for every games.
+		// TODO: HLE for GX fifo's APIs?
+		// Here is the hack:
+		// - if (attempt to overwrite CTRL_REGISTER by 0x0000)
+		//		// then we assume CPReadWriteDistance will be overwrited very soon.
+		//		- if (fifo is not empty)
+		//			// (not 100% sure): shouln't happen unless PPC think having trouble with the sync
+		//			// and it attempt a fifo recovery (look for PI_FIFO_RESET in log).
+		//			// If we want to emulate self fifo recovery we need proper GX metrics emulation... yeah sure :p
+		//			- spin until fifo is empty
+		// - else
+		//		- normal write16
+
+		if (((_Address&0xFFF) == CTRL_REGISTER) && (_Value == 0)) // API hack
+		{
+			// weird MP1 redo that right after linking fifo with GP... hmmm
+			/*_dbg_assert_msg_(COMMANDPROCESSOR, fifo.CPReadWriteDistance == 0,
+				"WTF! Something went wrong with GP/PPC the sync! -> CPReadWriteDistance: 0x%08X\n"
+				" - The fifo is not empty but we are going to lock it anyway.\n"
+				" - \"Normaly\", this is due to fifo-hang-so-lets-attempt-recovery.\n"
+				" - The bad news is dolphin don't support special recovery features like GXfifo's metric yet.\n"
+				" - The good news is, the time you read that message, the fifo should be empty now :p\n"
+				" - Anyway, fifo flush will be forced if you press OK and dolphin might continue to work...\n"
+				" - We aren't betting on that :)", fifo.CPReadWriteDistance);
+                        */
+			DEBUG_LOG(COMMANDPROCESSOR, "*********************** GXSetGPFifo very soon? ***********************");
+			// (mb2) We don't sleep here since it could be a perf issue for super monkey ball (yup only this game IIRC)
+			// Touching that game is a no-go so I don't want to take the risk :p
+			while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance > 0 && !(fifo.bFF_BPEnable && fifo.bFF_Breakpoint) )
+				s_fifoIdleEvent.MsgWait();
+		}
+	}
+
+	switch (_Address & 0xFFF)
+	{
+	case STATUS_REGISTER:
+		{
+			UCPStatusReg tmpStatus(_Value);
+
+			// set the flags to "all is okay"
+			m_CPStatusReg.OverflowHiWatermark	= 0;
+			m_CPStatusReg.UnderflowLoWatermark	= 0;
+
+			// TOCHECK (mb2): could BP irq be cleared here too?
+			//if (tmpStatus.Breakpoint!=m_CPStatusReg.Breakpoint) _asm int 3
+			// breakpoint
+			/*if (tmpStatus.Breakpoint)
+			{
+				m_CPStatusReg.Breakpoint = 0;
+			}
+			//fifo.bFF_Breakpoint = m_CPStatusReg.Breakpoint;
+			fifo.bFF_Breakpoint = m_CPStatusReg.Breakpoint	? true : false;
+			//LOG(COMMANDPROCESSOR,"fifo.bFF_Breakpoint : %i",fifo.bFF_Breakpoint);
+			*/
+
+			// update interrupts
+			UpdateInterrupts();
+
+			INFO_LOG(COMMANDPROCESSOR,"\t write to STATUS_REGISTER : %04x", _Value);
+		}
+		break;
+
+	case CTRL_REGISTER:
+		{
+			UCPCtrlReg tmpCtrl(_Value);
+
+			Common::AtomicStore(fifo.bFF_GPReadEnable,	tmpCtrl.GPReadEnable);
+			Common::AtomicStore(fifo.bFF_GPLinkEnable,	tmpCtrl.GPLinkEnable);
+			Common::AtomicStore(fifo.bFF_BPEnable,		tmpCtrl.BPEnable);
+
+			// TOCHECK (mb2): could BP irq be cleared with w16 to STATUS_REGISTER?
+			// funny hack: eg in MP1 if we disable the clear breakpoint ability by commenting this block
+			// the game is of course faster but looks stable too.
+			// Well, the hack is more stable than the "proper" way actualy :p ... it breaks MP2 when ship lands
+			// So I let the hack for now.
+			// Checkmate re-enabled it, so please test
+			// TODO (mb2): fix this!
+			
+			// BP interrupt is cleared here
+
+			//if (m_CPCtrlReg.CPIntEnable && !tmpCtrl.Hex) // falling edge
+			// raising edge or falling egde
+			if ((!m_CPCtrlReg.CPIntEnable && tmpCtrl.CPIntEnable) || (m_CPCtrlReg.CPIntEnable && !tmpCtrl.Hex)) 
+			{
+				m_CPStatusReg.Breakpoint = 0;
+				Common::AtomicStore(fifo.bFF_Breakpoint, 0);
+			}
+
+			m_CPCtrlReg.Hex = tmpCtrl.Hex;
+			UpdateInterrupts();
+			DEBUG_LOG(COMMANDPROCESSOR,"\t write to CTRL_REGISTER : %04x", _Value);
+			DEBUG_LOG(COMMANDPROCESSOR, "\t GPREAD %s | CPULINK %s | BP %s || CPIntEnable %s | OvF %s | UndF %s"
+				, fifo.bFF_GPReadEnable ?				"ON" : "OFF"
+				, fifo.bFF_GPLinkEnable ?				"ON" : "OFF"
+				, fifo.bFF_BPEnable ?					"ON" : "OFF"
+				, m_CPCtrlReg.CPIntEnable ?				"ON" : "OFF"
+				, m_CPCtrlReg.FifoOverflowIntEnable ?	"ON" : "OFF"
+				, m_CPCtrlReg.FifoUnderflowIntEnable ?	"ON" : "OFF"
+				);
+
+		}
+		break;
+
+	case PERF_SELECT:
+		{
+			WARN_LOG(COMMANDPROCESSOR, "write to PERF_SELECT: %04x", _Value);
+			// Seems to select which set of perf counters should be exposed.
+		}
+		break;
+
+	case CLEAR_REGISTER:
+		{
+			// ????
+			UCPClearReg tmpClearReg(_Value);			
+			m_CPClearReg.Hex = 0;
+			INFO_LOG(COMMANDPROCESSOR,"\t write to CLEAR_REGISTER : %04x",_Value);
+		}		
+		break;
+
+	// Fifo Registers
+	case FIFO_TOKEN_REGISTER:
+		m_tokenReg = _Value;
+		DEBUG_LOG(COMMANDPROCESSOR,"\t write to FIFO_TOKEN_REGISTER : %04x", _Value);
+		break;
+
+	case FIFO_BASE_LO:			
+		WriteLow ((u32 &)fifo.CPBase, _Value);
+		fifo.CPBase &= 0xFFFFFFE0;
+		DEBUG_LOG(COMMANDPROCESSOR,"\t write to FIFO_BASE_LO : %04x", _Value);
+		break;
+	case FIFO_BASE_HI:			
+		WriteHigh((u32 &)fifo.CPBase, _Value);
+		fifo.CPBase &= 0xFFFFFFE0;
+		DEBUG_LOG(COMMANDPROCESSOR,"\t write to FIFO_BASE_HI : %04x", _Value);
+		break;
+	case FIFO_END_LO:			
+		WriteLow ((u32 &)fifo.CPEnd,  _Value);
+		fifo.CPEnd &= 0xFFFFFFE0;
+		DEBUG_LOG(COMMANDPROCESSOR,"\t write to FIFO_END_LO : %04x", _Value);
+		break;
+	case FIFO_END_HI:			
+		WriteHigh((u32 &)fifo.CPEnd,  _Value);
+		fifo.CPEnd &= 0xFFFFFFE0;
+		DEBUG_LOG(COMMANDPROCESSOR,"\t write to FIFO_END_HI : %04x", _Value);
+		break;
+
+	// Hm. Should we really & these with FFFFFFE0?
+	// (mb2): never seen 32B not aligned values for those following regs.
+	// fifo.CPEnd is the only value that could be not 32B aligned so far.
+	case FIFO_WRITE_POINTER_LO:
+		WriteLow ((u32 &)fifo.CPWritePointer, _Value); fifo.CPWritePointer &= 0xFFFFFFE0;
+		DEBUG_LOG(COMMANDPROCESSOR,"\t write to FIFO_WRITE_POINTER_LO : %04x", _Value);
+		break;
+	case FIFO_WRITE_POINTER_HI:
+		WriteHigh((u32 &)fifo.CPWritePointer, _Value); fifo.CPWritePointer &= 0xFFFFFFE0;
+		DEBUG_LOG(COMMANDPROCESSOR,"\t write to FIFO_WRITE_POINTER_HI : %04x", _Value);
+		break;
+	case FIFO_READ_POINTER_LO:
+		WriteLow ((u32 &)fifo.CPReadPointer, _Value); fifo.CPReadPointer &= 0xFFFFFFE0;
+		DEBUG_LOG(COMMANDPROCESSOR,"\t write to FIFO_READ_POINTER_LO : %04x", _Value);
+		break;
+	case FIFO_READ_POINTER_HI:
+		WriteHigh((u32 &)fifo.CPReadPointer, _Value); fifo.CPReadPointer &= 0xFFFFFFE0;
+		DEBUG_LOG(COMMANDPROCESSOR,"\t write to FIFO_READ_POINTER_HI : %04x", _Value);
+		break;
+
+	case FIFO_HI_WATERMARK_LO:
+		WriteLow ((u32 &)fifo.CPHiWatermark, _Value);
+		DEBUG_LOG(COMMANDPROCESSOR,"\t write to FIFO_HI_WATERMARK_LO : %04x", _Value);
+		break;
+	case FIFO_HI_WATERMARK_HI:
+		WriteHigh((u32 &)fifo.CPHiWatermark, _Value);
+		DEBUG_LOG(COMMANDPROCESSOR,"\t write to FIFO_HI_WATERMARK_HI : %04x", _Value);
+		break;
+	case FIFO_LO_WATERMARK_LO:
+		WriteLow ((u32 &)fifo.CPLoWatermark, _Value);
+		DEBUG_LOG(COMMANDPROCESSOR,"\t write to FIFO_LO_WATERMARK_LO : %04x", _Value);
+		break;
+	case FIFO_LO_WATERMARK_HI:
+		WriteHigh((u32 &)fifo.CPLoWatermark, _Value);
+		DEBUG_LOG(COMMANDPROCESSOR,"\t write to FIFO_LO_WATERMARK_HI : %04x", _Value);
+		break;
+
+	case FIFO_BP_LO:			
+		WriteLow ((u32 &)fifo.CPBreakpoint,	_Value);
+		DEBUG_LOG(COMMANDPROCESSOR,"write to FIFO_BP_LO : %04x", _Value);
+		break;
+	case FIFO_BP_HI:			
+		WriteHigh((u32 &)fifo.CPBreakpoint,	_Value);
+		DEBUG_LOG(COMMANDPROCESSOR,"write to FIFO_BP_HI : %04x", _Value);
+		break;
+
+	// Super monkey try to overwrite CPReadWriteDistance by an old saved RWD value. Which is lame for us.
+	// hack: We have to force CPU to think fifo is alway empty and on idle.
+	// When we fall here CPReadWriteDistance should be always null and the game should always want to overwrite it by 0.
+	// So, we can skip it.
+	case FIFO_RW_DISTANCE_HI:
+		//WriteHigh((u32 &)fifo.CPReadWriteDistance, _Value);
+		DEBUG_LOG(COMMANDPROCESSOR,"try to write to FIFO_RW_DISTANCE_HI : %04x", _Value);
+		break;
+	case FIFO_RW_DISTANCE_LO:
+		//WriteLow((u32 &)fifo.CPReadWriteDistance, _Value);
+		DEBUG_LOG(COMMANDPROCESSOR,"try to write to FIFO_RW_DISTANCE_LO : %04x", _Value);
+		break;
+	default:
+		WARN_LOG(COMMANDPROCESSOR, "(w16) unknown CP reg write %04x @ %08x", _Value, _Address);
+	}
+
+	// TODO(mb2): better. Check if it help: avoid CPReadPointer overwrites when stupidly done like in Super Monkey Ball
+	if ((!fifo.bFF_GPReadEnable && fifo.CPReadIdle) || !g_VideoInitialize.bUseDualCore) // TOCHECK(mb2): check again if thread safe?
+		UpdateFifoRegister();
+}
+
+void Read32(u32& _rReturnValue, const u32 _Address)
+{
+	_rReturnValue = 0;
+	_dbg_assert_msg_(COMMANDPROCESSOR, 0, "Read32 from CommandProccessor at 0x%08x", _Address);
+}
+
+void Write32(const u32 _Data, const u32 _Address)
+{
+	_dbg_assert_msg_(COMMANDPROCESSOR, 0, "Write32 at CommandProccessor at 0x%08x", _Address);
+}
+
+void STACKALIGN GatherPipeBursted()
+{
+	// if we aren't linked, we don't care about gather pipe data
+	if (!fifo.bFF_GPLinkEnable)
+		return;
+
+	if (g_VideoInitialize.bUseDualCore)
+	{
+		// update the fifo-pointer
+		fifo.CPWritePointer += GATHER_PIPE_SIZE;
+		if (fifo.CPWritePointer >= fifo.CPEnd)
+			fifo.CPWritePointer = fifo.CPBase;
+        Common::AtomicAdd(fifo.CPReadWriteDistance, GATHER_PIPE_SIZE);
+
+		// High watermark overflow handling (hacked way)
+		if (fifo.CPReadWriteDistance > fifo.CPHiWatermark)
+		{
+			// we should raise an Ov interrupt for an accurate fifo emulation and let PPC deal with it.
+			// But it slowdowns things because of if(interrupt blah blah){} blocks for each 32B fifo transactions.
+			// CPU would be a bit more loaded too by its interrupt handling...
+			// Eather way, CPU would have the ability to resume another thread.
+			// To be clear: this spin loop is like a critical section spin loop in the emulated GX thread hence "hacked way"
+
+			// Yes, in real life, the only purpose of the low watermark interrupt is just for cooling down OV contention.
+			// - @ game start -> watermark init: Overflow enabled, Underflow disabled
+			// - if (OV is raised)
+			//		- CPU stop to write to fifo
+			//		- enable Underflow interrupt (this only happens if OV is raised)
+			//		- do other things
+			// - if (Underflow is raised (implicite: AND if an OV has been raised))
+			//		- CPU can write to fifo
+			//		- disable Underflow interrupt
+
+			INFO_LOG(COMMANDPROCESSOR, "(GatherPipeBursted): CPHiWatermark reached");
+			// Wait for GPU to catch up
+			while (!(fifo.bFF_BPEnable && fifo.bFF_Breakpoint) && fifo.CPReadWriteDistance > fifo.CPLoWatermark)
+				s_fifoIdleEvent.MsgWait();
+		}
+		// check if we are in sync
+		_assert_msg_(COMMANDPROCESSOR, fifo.CPWritePointer	== *(g_VideoInitialize.Fifo_CPUWritePointer), "FIFOs linked but out of sync");
+		_assert_msg_(COMMANDPROCESSOR, fifo.CPBase			== *(g_VideoInitialize.Fifo_CPUBase), "FIFOs linked but out of sync");
+		_assert_msg_(COMMANDPROCESSOR, fifo.CPEnd			== *(g_VideoInitialize.Fifo_CPUEnd), "FIFOs linked but out of sync");
+	}
+	else
+	{
+		fifo.CPWritePointer += GATHER_PIPE_SIZE;
+		if (fifo.CPWritePointer >= fifo.CPEnd)
+			fifo.CPWritePointer = fifo.CPBase;
+		// check if we are in sync
+		_assert_msg_(COMMANDPROCESSOR, fifo.CPWritePointer	== *(g_VideoInitialize.Fifo_CPUWritePointer), "FIFOs linked but out of sync");
+		_assert_msg_(COMMANDPROCESSOR, fifo.CPBase			== *(g_VideoInitialize.Fifo_CPUBase), "FIFOs linked but out of sync");
+		_assert_msg_(COMMANDPROCESSOR, fifo.CPEnd			== *(g_VideoInitialize.Fifo_CPUEnd), "FIFOs linked but out of sync");
+
+		UpdateFifoRegister();
+	}
+}
+
+
+// This is mostly used in single core mode
+void CatchUpGPU()
+{
+	// check if we are able to run this buffer
+	if ((fifo.bFF_GPReadEnable) && !(fifo.bFF_BPEnable && fifo.bFF_Breakpoint))
+	{
+		// HyperIris: Memory_GetPtr is an expensive call, call it less, run faster
+		u8 *ptr = Memory_GetPtr(fifo.CPReadPointer);
+
+        while (fifo.CPReadWriteDistance > 0)
+		{
+			// check if we are on a breakpoint
+			if (fifo.bFF_BPEnable)
+			{
+				//MessageBox(0,"Breakpoint enabled",0,0);
+				if ((fifo.CPReadPointer & ~0x1F) == (fifo.CPBreakpoint & ~0x1F))
+				{
+					//_assert_msg_(POWERPC,0,"BP: %08x",fifo.CPBreakpoint);
+					//LOG(COMMANDPROCESSOR,"!!! BP irq raised");
+					fifo.bFF_Breakpoint = 1;
+					m_CPStatusReg.Breakpoint = 1;
+					UpdateInterrupts();
+					break;
+				}
+			}
+
+			// read the data and send it to the VideoPlugin
+			fifo.CPReadPointer += 32;
+			// We are going to do FP math on the main thread so have to save the current state
+			SaveSSEState();
+			LoadDefaultSSEState();
+			Fifo_SendFifoData(ptr,32);
+			LoadSSEState();
+			// adjust
+			ptr += 32;
+
+			fifo.CPReadWriteDistance -= 32;
+
+			// increase the ReadPtr
+			if (fifo.CPReadPointer >= fifo.CPEnd)
+			{
+				fifo.CPReadPointer = fifo.CPBase;
+				// adjust, take care
+				ptr = Memory_GetPtr(fifo.CPReadPointer);
+				INFO_LOG(COMMANDPROCESSOR, "BUFFER LOOP");
+			}
+		}
+	}
+}
+
+// __________________________________________________________________________________________________
+// !!! Temporary (I hope): re-used in DC mode
+// UpdateFifoRegister
+// It's no problem if the gfx falls behind a little bit. Better make sure to stop the cpu thread
+// when the distance is way huge, though.
+// So:
+// CPU thread
+///  0. Write data (done before entering this)
+//   1. Compute distance
+//   2. If distance > threshold, sleep and goto 1
+// GPU thread
+//   1. Compute distance
+//   2. If distance < threshold, sleep and goto 1 (or wait for trigger?)
+//   3. Read and use a bit of data, goto 1
+void UpdateFifoRegister()
+{
+	// update the distance
+	int wp = fifo.CPWritePointer;
+	int rp = fifo.CPReadPointer;
+	int dist;
+	if (wp >= rp)
+		dist = wp - rp;
+	else
+		dist = (wp - fifo.CPBase) + (fifo.CPEnd - rp);
+
+	Common::AtomicStore(fifo.CPReadWriteDistance, dist);
+
+	if (!g_VideoInitialize.bUseDualCore)
+		CatchUpGPU();
+}
+
+void UpdateInterrupts()
+{
+	if (m_CPCtrlReg.CPIntEnable &&
+		(fifo.bFF_BPEnable && fifo.bFF_Breakpoint))
+	{
+        g_VideoInitialize.pSetInterrupt(INT_CAUSE_CP, true);
+	}
+	else
+	{
+		g_VideoInitialize.pSetInterrupt(INT_CAUSE_CP, false);
+	}
+}
+
+void UpdateInterruptsFromVideoPlugin()
+{
+	if (fifo.bFF_Breakpoint) // implicit since only BP trigger (see fifo.cpp) can call this
+		m_CPStatusReg.Breakpoint = 1;
+	g_VideoInitialize.pScheduleEvent_Threadsafe(0, et_UpdateInterrupts, 0);
+}
+
+void SetFifoIdleFromVideoPlugin()
+{
+	s_fifoIdleEvent.Set();
+}
+
+} // end of namespace CommandProcessor
diff --git a/Source/Core/Core/Src/HW/CommandProcessor.h b/Source/Core/VideoCommon/Src/CommandProcessor.h
similarity index 96%
rename from Source/Core/Core/Src/HW/CommandProcessor.h
rename to Source/Core/VideoCommon/Src/CommandProcessor.h
index 62b83f771f..aa1d81978d 100644
--- a/Source/Core/Core/Src/HW/CommandProcessor.h
+++ b/Source/Core/VideoCommon/Src/CommandProcessor.h
@@ -1,96 +1,96 @@
-// Copyright (C) 2003 Dolphin Project.
-
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation, version 2.0.
-
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License 2.0 for more details.
-
-// A copy of the GPL 2.0 should have been included with the program.
-// If not, see http://www.gnu.org/licenses/
-
-// Official SVN repository and contact information can be found at
-// http://code.google.com/p/dolphin-emu/
-
-#ifndef _COMMANDPROCESSOR_H
-#define _COMMANDPROCESSOR_H
-
-#include "Common.h"
-#include "pluginspecs_video.h"
-class PointerWrap;
-
-extern bool MT;
-namespace CommandProcessor
-{
-// internal hardware addresses
-enum
-{
-	STATUS_REGISTER				= 0x00,
-	CTRL_REGISTER				= 0x02,
-	CLEAR_REGISTER				= 0x04,
-	PERF_SELECT                 = 0x06,
-	FIFO_TOKEN_REGISTER			= 0x0E,
-	FIFO_BOUNDING_BOX_LEFT		= 0x10,
-	FIFO_BOUNDING_BOX_RIGHT		= 0x12,
-	FIFO_BOUNDING_BOX_TOP		= 0x14,
-	FIFO_BOUNDING_BOX_BOTTOM	= 0x16,
-	FIFO_BASE_LO				= 0x20,
-	FIFO_BASE_HI				= 0x22,
-	FIFO_END_LO					= 0x24,
-	FIFO_END_HI					= 0x26,
-	FIFO_HI_WATERMARK_LO		= 0x28,
-	FIFO_HI_WATERMARK_HI		= 0x2a,
-	FIFO_LO_WATERMARK_LO		= 0x2c,
-	FIFO_LO_WATERMARK_HI		= 0x2e,
-	FIFO_RW_DISTANCE_LO			= 0x30,
-	FIFO_RW_DISTANCE_HI			= 0x32,
-	FIFO_WRITE_POINTER_LO		= 0x34,
-	FIFO_WRITE_POINTER_HI		= 0x36,
-	FIFO_READ_POINTER_LO		= 0x38,
-	FIFO_READ_POINTER_HI		= 0x3A,
-	FIFO_BP_LO					= 0x3C,
-	FIFO_BP_HI					= 0x3E,
-	CP_PERF0_L                  = 0x40,
-	CP_PERF0_H                  = 0x42,
-	CP_PERF1_L                  = 0x44,
-	CP_PERF1_H                  = 0x46,
-	CP_PERF2_L                  = 0x48,
-	CP_PERF2_H                  = 0x4a,
-	CP_PERF3_L                  = 0x4c,
-	CP_PERF3_H                  = 0x4e,
-};
-
-extern SCPFifoStruct fifo;
-
-// Init
-void Init();
-void Shutdown();
-void DoState(PointerWrap &p);
-
-// Read
-void Read16(u16& _rReturnValue, const u32 _Address);
-void Write16(const u16 _Data, const u32 _Address);
-void Read32(u32& _rReturnValue, const u32 _Address);
-void Write32(const u32 _Data, const u32 _Address);
-
-// for CGPFIFO
-void CatchUpGPU();
-void GatherPipeBursted();
-void UpdateInterrupts();
-void UpdateInterruptsFromVideoPlugin();
-void SetFifoIdleFromVideoPlugin();
-
-bool AllowIdleSkipping();
-
-// for DC GP watchdog hack
-void IncrementGPWDToken();
-void WaitForFrameFinish();
-
-} // namespace CommandProcessor
-
-#endif // _COMMANDPROCESSOR_H
-
-
+// Copyright (C) 2003 Dolphin Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official SVN repository and contact information can be found at
+// http://code.google.com/p/dolphin-emu/
+
+#ifndef _COMMANDPROCESSOR_H
+#define _COMMANDPROCESSOR_H
+
+#include "Common.h"
+#include "pluginspecs_video.h"
+class PointerWrap;
+
+extern bool MT;
+namespace CommandProcessor
+{
+// internal hardware addresses
+enum
+{
+	STATUS_REGISTER				= 0x00,
+	CTRL_REGISTER				= 0x02,
+	CLEAR_REGISTER				= 0x04,
+	PERF_SELECT                 = 0x06,
+	FIFO_TOKEN_REGISTER			= 0x0E,
+	FIFO_BOUNDING_BOX_LEFT		= 0x10,
+	FIFO_BOUNDING_BOX_RIGHT		= 0x12,
+	FIFO_BOUNDING_BOX_TOP		= 0x14,
+	FIFO_BOUNDING_BOX_BOTTOM	= 0x16,
+	FIFO_BASE_LO				= 0x20,
+	FIFO_BASE_HI				= 0x22,
+	FIFO_END_LO					= 0x24,
+	FIFO_END_HI					= 0x26,
+	FIFO_HI_WATERMARK_LO		= 0x28,
+	FIFO_HI_WATERMARK_HI		= 0x2a,
+	FIFO_LO_WATERMARK_LO		= 0x2c,
+	FIFO_LO_WATERMARK_HI		= 0x2e,
+	FIFO_RW_DISTANCE_LO			= 0x30,
+	FIFO_RW_DISTANCE_HI			= 0x32,
+	FIFO_WRITE_POINTER_LO		= 0x34,
+	FIFO_WRITE_POINTER_HI		= 0x36,
+	FIFO_READ_POINTER_LO		= 0x38,
+	FIFO_READ_POINTER_HI		= 0x3A,
+	FIFO_BP_LO					= 0x3C,
+	FIFO_BP_HI					= 0x3E,
+	CP_PERF0_L                  = 0x40,
+	CP_PERF0_H                  = 0x42,
+	CP_PERF1_L                  = 0x44,
+	CP_PERF1_H                  = 0x46,
+	CP_PERF2_L                  = 0x48,
+	CP_PERF2_H                  = 0x4a,
+	CP_PERF3_L                  = 0x4c,
+	CP_PERF3_H                  = 0x4e,
+};
+
+extern SCPFifoStruct fifo;
+
+// Init
+void Init();
+void Shutdown();
+void DoState(PointerWrap &p);
+
+// Read
+void Read16(u16& _rReturnValue, const u32 _Address);
+void Write16(const u16 _Data, const u32 _Address);
+void Read32(u32& _rReturnValue, const u32 _Address);
+void Write32(const u32 _Data, const u32 _Address);
+
+// for CGPFIFO
+void CatchUpGPU();
+void GatherPipeBursted();
+void UpdateInterrupts();
+void UpdateInterruptsFromVideoPlugin();
+void SetFifoIdleFromVideoPlugin();
+
+bool AllowIdleSkipping();
+
+// for DC GP watchdog hack
+void IncrementGPWDToken();
+void WaitForFrameFinish();
+
+} // namespace CommandProcessor
+
+#endif // _COMMANDPROCESSOR_H
+
+
diff --git a/Source/Core/VideoCommon/Src/Fifo.cpp b/Source/Core/VideoCommon/Src/Fifo.cpp
index ee05646f72..b258eb97f2 100644
--- a/Source/Core/VideoCommon/Src/Fifo.cpp
+++ b/Source/Core/VideoCommon/Src/Fifo.cpp
@@ -23,6 +23,7 @@
 #include "Thread.h"
 #include "Atomic.h"
 #include "OpcodeDecoding.h"
+#include "CommandProcessor.h"
 
 #include "Fifo.h"
 
@@ -127,7 +128,7 @@ void Fifo_SendFifoData(u8* _uData, u32 len)
 void Fifo_EnterLoop(const SVideoInitialize &video_initialize)
 {
     fifoStateRun = true;
-    SCPFifoStruct &_fifo = *video_initialize.pCPFifo;
+    SCPFifoStruct &_fifo = CommandProcessor::fifo;
 	s32 distToSend;
 
     while (fifoStateRun)
@@ -178,7 +179,7 @@ void Fifo_EnterLoop(const SVideoInitialize &video_initialize)
 				if (_fifo.bFF_BPEnable && (readPtr == _fifo.CPBreakpoint))
                 {
 						Common::AtomicStore(_fifo.bFF_Breakpoint, 1);
-                        video_initialize.pUpdateInterrupts();
+                        CommandProcessor::UpdateInterruptsFromVideoPlugin();
                 }
 
 				// Update CPReadPointer and RWDistance
@@ -189,7 +190,7 @@ void Fifo_EnterLoop(const SVideoInitialize &video_initialize)
 			} while (_fifo.bFF_GPReadEnable && _fifo.CPReadWriteDistance  && !(_fifo.bFF_BPEnable && _fifo.bFF_Breakpoint));
 
 			Common::AtomicStore(_fifo.CPReadIdle, 1);
-			video_initialize.pSetFifoIdle();
+            CommandProcessor::SetFifoIdleFromVideoPlugin();
         }
 		else
 		{
diff --git a/Source/Core/VideoCommon/Src/OpcodeDecoding.cpp b/Source/Core/VideoCommon/Src/OpcodeDecoding.cpp
index f5d0b41211..3e04c71266 100644
--- a/Source/Core/VideoCommon/Src/OpcodeDecoding.cpp
+++ b/Source/Core/VideoCommon/Src/OpcodeDecoding.cpp
@@ -29,6 +29,7 @@
 #include "VideoCommon.h"
 #include "Profiler.h"
 #include "OpcodeDecoding.h"
+#include "CommandProcessor.h"
 
 #include "VertexLoaderManager.h"
 
@@ -170,7 +171,7 @@ bool FifoCommandRunnable()
             g_VideoInitialize.pSysMessage(szTemp);
             g_VideoInitialize.pLog(szTemp, TRUE);
 			{
-				SCPFifoStruct &fifo = *g_VideoInitialize.pCPFifo;
+                SCPFifoStruct &fifo = CommandProcessor::fifo;
 
 				char szTmp[256];
 				// sprintf(szTmp, "Illegal command %02x (at %08x)",cmd_byte,g_pDataReader->GetPtr());
diff --git a/Source/Core/Core/Src/HW/PixelEngine.cpp b/Source/Core/VideoCommon/Src/PixelEngine.cpp
similarity index 88%
rename from Source/Core/Core/Src/HW/PixelEngine.cpp
rename to Source/Core/VideoCommon/Src/PixelEngine.cpp
index 1268e5294e..c7d75c3b5b 100644
--- a/Source/Core/Core/Src/HW/PixelEngine.cpp
+++ b/Source/Core/VideoCommon/Src/PixelEngine.cpp
@@ -1,380 +1,380 @@
-// Copyright (C) 2003 Dolphin Project.
-
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation, version 2.0.
-
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License 2.0 for more details.
-
-// A copy of the GPL 2.0 should have been included with the program.
-// If not, see http://www.gnu.org/licenses/
-
-// Official SVN repository and contact information can be found at
-// http://code.google.com/p/dolphin-emu/
-
-
-// http://developer.nvidia.com/object/General_FAQ.html#t6 !!!!!
-
-
-
-#include "Common.h"
-#include "ChunkFile.h"
-#include "Atomic.h"
-
-#include "PixelEngine.h"
-
-#include "../CoreTiming.h"
-#include "../PowerPC/PowerPC.h"
-#include "ProcessorInterface.h"
-#include "CommandProcessor.h"
-#include "CPU.h"
-#include "../Core.h"
-#include "../ConfigManager.h"
-
-namespace PixelEngine
-{
-
-union UPEZConfReg
-{
-	u16 Hex;
-	struct 
-	{
-		unsigned ZCompEnable	: 1; // Z Comparator Enable
-		unsigned Function		: 3;
-		unsigned ZUpdEnable		: 1;
-		unsigned				: 11;
-	};
-};
-
-union UPEAlphaConfReg
-{
-	u16 Hex;
-	struct 
-	{
-		unsigned BMMath			: 1; // GX_BM_BLEND || GX_BM_SUBSTRACT
-		unsigned BMLogic		: 1; // GX_BM_LOGIC
-		unsigned Dither			: 1;
-		unsigned ColorUpdEnable	: 1;
-		unsigned AlphaUpdEnable	: 1;
-		unsigned DstFactor		: 3;
-		unsigned SrcFactor		: 3;
-		unsigned Substract		: 1; // Additive mode by default
-		unsigned BlendOperator	: 4;
-	};
-};
-
-union UPEDstAlphaConfReg
-{
-	u16 Hex;
-	struct 
-	{
-		unsigned DstAlpha		: 8;
-		unsigned Enable			: 1;
-		unsigned				: 7;
-	};
-};
-
-union UPEAlphaModeConfReg
-{
-	u16 Hex;
-	struct 
-	{
-		unsigned Threshold		: 8;
-		unsigned CompareMode	: 8;
-	};
-};
-
-// Not sure about this reg...
-union UPEAlphaReadReg
-{
-	u16 Hex;
-	struct 
-	{
-		unsigned ReadMode		: 3;
-		unsigned				: 13;
-	};
-};
-
-// fifo Control Register
-union UPECtrlReg
-{
-	struct 
-	{
-		unsigned PETokenEnable	:	1;
-		unsigned PEFinishEnable	:	1;
-		unsigned PEToken		:	1; // write only
-		unsigned PEFinish		:	1; // write only
-		unsigned				:	12;
-	};
-	u16 Hex;
-	UPECtrlReg() {Hex = 0; }
-	UPECtrlReg(u16 _hex) {Hex = _hex; }
-};
-
-// STATE_TO_SAVE
-static UPEZConfReg			m_ZConf;
-static UPEAlphaConfReg		m_AlphaConf;
-static UPEDstAlphaConfReg	m_DstAlphaConf;
-static UPEAlphaModeConfReg	m_AlphaModeConf;
-static UPEAlphaReadReg		m_AlphaRead;
-static UPECtrlReg			m_Control;
-//static u16					m_Token; // token value most recently encountered
-
-static bool g_bSignalTokenInterrupt;
-static bool g_bSignalFinishInterrupt;
-
-static int et_SetTokenOnMainThread;
-static int et_SetFinishOnMainThread;
-
-u16 bbox[4];
-bool bbox_active;
-
-void DoState(PointerWrap &p)
-{
-	p.Do(m_ZConf);
-	p.Do(m_AlphaConf);
-	p.Do(m_DstAlphaConf);
-	p.Do(m_AlphaModeConf);
-	p.Do(m_AlphaRead);
-	p.Do(m_Control);
-	p.Do(CommandProcessor::fifo.PEToken);
-
-	p.Do(g_bSignalTokenInterrupt);
-	p.Do(g_bSignalFinishInterrupt);
-	
-	p.Do(bbox);
-	p.Do(bbox_active);
-}
-
-void UpdateInterrupts();
-
-void SetToken_OnMainThread(u64 userdata, int cyclesLate);
-void SetFinish_OnMainThread(u64 userdata, int cyclesLate);
-
-void Init()
-{
-	m_Control.Hex = 0;
-
-	et_SetTokenOnMainThread = CoreTiming::RegisterEvent("SetToken", SetToken_OnMainThread);
-	et_SetFinishOnMainThread = CoreTiming::RegisterEvent("SetFinish", SetFinish_OnMainThread);
-
-	bbox[0] = 0x80;
-	bbox[1] = 0xA0;
-	bbox[2] = 0x80;
-	bbox[3] = 0xA0;
-
-	bbox_active = false;
-}
-
-void Read16(u16& _uReturnValue, const u32 _iAddress)
-{
-	DEBUG_LOG(PIXELENGINE, "(r16) 0x%08x", _iAddress);
-
-	switch (_iAddress & 0xFFF)
-	{
-		// CPU Direct Access EFB Raster State Config
-	case PE_ZCONF:
-		_uReturnValue = m_ZConf.Hex;
-		INFO_LOG(PIXELENGINE, "(r16) ZCONF");
-		break;
-	case PE_ALPHACONF:
-		// Most games read this early. no idea why.
-		_uReturnValue = m_AlphaConf.Hex;
-		INFO_LOG(PIXELENGINE, "(r16) ALPHACONF");
-		break;
-	case PE_DSTALPHACONF:
-		_uReturnValue = m_DstAlphaConf.Hex;
-		INFO_LOG(PIXELENGINE, "(r16) DSTALPHACONF");
-		break;
-	case PE_ALPHAMODE:
-		_uReturnValue = m_AlphaModeConf.Hex;
-		INFO_LOG(PIXELENGINE, "(r16) ALPHAMODE");
-		break;	
-	case PE_ALPHAREAD:
-		_uReturnValue = m_AlphaRead.Hex;
-		WARN_LOG(PIXELENGINE, "(r16) ALPHAREAD");
-		break;
-
-	case PE_CTRL_REGISTER:
-		_uReturnValue = m_Control.Hex;
-		INFO_LOG(PIXELENGINE, "(r16) CTRL_REGISTER : %04x", _uReturnValue);
-		break;
-
-	case PE_TOKEN_REG:
-		_uReturnValue = CommandProcessor::fifo.PEToken;
-		INFO_LOG(PIXELENGINE, "(r16) TOKEN_REG : %04x", _uReturnValue);
-		break;
-
-		// The return values for these BBOX registers need to be gotten from the bounding box of the object. 
-		// See http://code.google.com/p/dolphin-emu/issues/detail?id=360#c74 for more details.
-
-	// 0x80, 0xa0, 0x80, 0xa0 makes Paper Mario happy.
-	case PE_BBOX_LEFT:   _uReturnValue = bbox[0]; INFO_LOG(PIXELENGINE, "R: BBOX_LEFT   = %i", bbox[0]); bbox_active = false; break;
-	case PE_BBOX_RIGHT:  _uReturnValue = bbox[1]; INFO_LOG(PIXELENGINE, "R: BBOX_RIGHT  = %i", bbox[1]); bbox_active = false; break;
-	case PE_BBOX_TOP:    _uReturnValue = bbox[2]; INFO_LOG(PIXELENGINE, "R: BBOX_TOP    = %i", bbox[2]); bbox_active = false; break;
-	case PE_BBOX_BOTTOM: _uReturnValue = bbox[3]; INFO_LOG(PIXELENGINE, "R: BBOX_BOTTOM = %i", bbox[3]); bbox_active = false; break;
-
-	case PE_PERF_0L:
-	case PE_PERF_0H:
-	case PE_PERF_1L:
-	case PE_PERF_1H:
-	case PE_PERF_2L:
-	case PE_PERF_2H:
-	case PE_PERF_3L:
-	case PE_PERF_3H:
-	case PE_PERF_4L:
-	case PE_PERF_4H:
-	case PE_PERF_5L:
-	case PE_PERF_5H:
-		INFO_LOG(PIXELENGINE, "(r16) perf counter @ %08x", _iAddress);
-		break;
-
-	default:
-		INFO_LOG(PIXELENGINE, "(r16) unknown @ %08x", _iAddress);
-		_uReturnValue = 1;
-		break;
-	}
-}
-
-void Write16(const u16 _iValue, const u32 _iAddress)
-{
-	switch (_iAddress & 0xFFF)
-	{
-		// CPU Direct Access EFB Raster State Config
-	case PE_ZCONF:
-		m_ZConf.Hex = _iValue;
-		INFO_LOG(PIXELENGINE, "(w16) ZCONF: %02x", _iValue);
-		break;
-	case PE_ALPHACONF:
-		m_AlphaConf.Hex = _iValue;
-		INFO_LOG(PIXELENGINE, "(w16) ALPHACONF: %02x", _iValue);
-		break;
-	case PE_DSTALPHACONF:
-		m_DstAlphaConf.Hex = _iValue;
-		INFO_LOG(PIXELENGINE, "(w16) DSTALPHACONF: %02x", _iValue);
-		break;
-	case PE_ALPHAMODE:
-		m_AlphaModeConf.Hex = _iValue;
-		INFO_LOG(PIXELENGINE, "(w16) ALPHAMODE: %02x", _iValue);
-		break;
-	case PE_ALPHAREAD:
-		m_AlphaRead.Hex = _iValue;
-		INFO_LOG(PIXELENGINE, "(w16) ALPHAREAD: %02x", _iValue);
-		break;
-
-	case PE_CTRL_REGISTER:	
-		{
-			UPECtrlReg tmpCtrl(_iValue);
-
-			if (tmpCtrl.PEToken)	g_bSignalTokenInterrupt = false;
-			if (tmpCtrl.PEFinish)	g_bSignalFinishInterrupt = false;
-
-			m_Control.PETokenEnable  = tmpCtrl.PETokenEnable;
-			m_Control.PEFinishEnable = tmpCtrl.PEFinishEnable;
-			m_Control.PEToken = 0;		// this flag is write only
-			m_Control.PEFinish = 0;		// this flag is write only
-
-			DEBUG_LOG(PIXELENGINE, "(w16) CTRL_REGISTER: 0x%04x", _iValue);
-			UpdateInterrupts();
-		}
-		break;
-
-	case PE_TOKEN_REG:
-		//LOG(PIXELENGINE,"WEIRD: program wrote token: %i",_iValue);
-		PanicAlert("(w16) WTF? PowerPC program wrote token: %i", _iValue);
-		//only the gx pipeline is supposed to be able to write here
-		//g_token = _iValue;
-		break;
-
-	default:
-		WARN_LOG(PIXELENGINE, "(w16) unknown %04x @ %08x", _iValue, _iAddress);
-		break;
-	}
-}
-
-void Write32(const u32 _iValue, const u32 _iAddress)
-{
-	WARN_LOG(PIXELENGINE, "(w32) 0x%08x @ 0x%08x IGNORING...",_iValue,_iAddress);
-}
-
-bool AllowIdleSkipping()
-{
-	return !SConfig::GetInstance().m_LocalCoreStartupParameter.bUseDualCore || (!m_Control.PETokenEnable && !m_Control.PEFinishEnable);
-}
-
-void UpdateInterrupts()
-{
-	// check if there is a token-interrupt
-	if (g_bSignalTokenInterrupt	& m_Control.PETokenEnable)
-		ProcessorInterface::SetInterrupt(ProcessorInterface::INT_CAUSE_PE_TOKEN, true);
-	else
-		ProcessorInterface::SetInterrupt(ProcessorInterface::INT_CAUSE_PE_TOKEN, false);
-
-	// check if there is a finish-interrupt
-	if (g_bSignalFinishInterrupt & m_Control.PEFinishEnable)
-		ProcessorInterface::SetInterrupt(ProcessorInterface::INT_CAUSE_PE_FINISH, true);
-	else
-		ProcessorInterface::SetInterrupt(ProcessorInterface::INT_CAUSE_PE_FINISH, false);
-}
-
-// TODO(mb2): Refactor SetTokenINT_OnMainThread(u64 userdata, int cyclesLate).
-//			  Think about the right order between tokenVal and tokenINT... one day maybe.
-//			  Cleanup++
-
-// Called only if BPMEM_PE_TOKEN_INT_ID is ack by GP
-void SetToken_OnMainThread(u64 userdata, int cyclesLate)
-{
-	//if (userdata >> 16)
-	//{
-		g_bSignalTokenInterrupt = true;	
-		//_dbg_assert_msg_(PIXELENGINE, (CommandProcessor::fifo.PEToken == (userdata&0xFFFF)), "WTF? BPMEM_PE_TOKEN_INT_ID's token != BPMEM_PE_TOKEN_ID's token" );
-		INFO_LOG(PIXELENGINE, "VIDEO Plugin raises INT_CAUSE_PE_TOKEN (btw, token: %04x)", CommandProcessor::fifo.PEToken);
-		UpdateInterrupts();
-	//}
-	//else
-	//	LOGV(PIXELENGINE, 1, "VIDEO Plugin wrote token: %i", CommandProcessor::fifo.PEToken);
-}
-
-void SetFinish_OnMainThread(u64 userdata, int cyclesLate)
-{
-	g_bSignalFinishInterrupt = 1;	
-	UpdateInterrupts();
-}
-
-// SetToken
-// THIS IS EXECUTED FROM VIDEO THREAD
-void SetToken(const u16 _token, const int _bSetTokenAcknowledge)
-{
-	// TODO?: set-token-value and set-token-INT could be merged since set-token-INT own the token value.
-	if (_bSetTokenAcknowledge) // set token INT
-	{
-		// This seems smelly...
-		CommandProcessor::IncrementGPWDToken(); // for DC watchdog hack since PEToken seems to be a frame-finish too
-		CoreTiming::ScheduleEvent_Threadsafe(
-			0, et_SetTokenOnMainThread, _token | (_bSetTokenAcknowledge << 16));
-	}
-	else // set token value
-	{
-		// we do it directly from videoThread because of
-		// Super Monkey Ball
-		// XXX: No 16-bit atomic store available, so cheat and use 32-bit.
-		// That's what we've always done. We're counting on fifo.PEToken to be
-		// 4-byte padded.
-        Common::AtomicStore(*(volatile u32*)&CommandProcessor::fifo.PEToken, _token);
-	}
-}
-
-// SetFinish
-// THIS IS EXECUTED FROM VIDEO THREAD (BPStructs.cpp) when a new frame has been drawn
-void SetFinish()
-{
-	CommandProcessor::IncrementGPWDToken(); // for DC watchdog hack
-	CoreTiming::ScheduleEvent_Threadsafe(
-		0, et_SetFinishOnMainThread);
-	INFO_LOG(PIXELENGINE, "VIDEO Set Finish");
-}
-
-} // end of namespace PixelEngine
+// Copyright (C) 2003 Dolphin Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official SVN repository and contact information can be found at
+// http://code.google.com/p/dolphin-emu/
+
+
+// http://developer.nvidia.com/object/General_FAQ.html#t6 !!!!!
+
+
+
+#include "Common.h"
+#include "VideoCommon.h"
+#include "ChunkFile.h"
+#include "Atomic.h"
+
+#include "PixelEngine.h"
+#include "CommandProcessor.h"
+
+namespace PixelEngine
+{
+
+union UPEZConfReg
+{
+	u16 Hex;
+	struct 
+	{
+		unsigned ZCompEnable	: 1; // Z Comparator Enable
+		unsigned Function		: 3;
+		unsigned ZUpdEnable		: 1;
+		unsigned				: 11;
+	};
+};
+
+union UPEAlphaConfReg
+{
+	u16 Hex;
+	struct 
+	{
+		unsigned BMMath			: 1; // GX_BM_BLEND || GX_BM_SUBSTRACT
+		unsigned BMLogic		: 1; // GX_BM_LOGIC
+		unsigned Dither			: 1;
+		unsigned ColorUpdEnable	: 1;
+		unsigned AlphaUpdEnable	: 1;
+		unsigned DstFactor		: 3;
+		unsigned SrcFactor		: 3;
+		unsigned Substract		: 1; // Additive mode by default
+		unsigned BlendOperator	: 4;
+	};
+};
+
+union UPEDstAlphaConfReg
+{
+	u16 Hex;
+	struct 
+	{
+		unsigned DstAlpha		: 8;
+		unsigned Enable			: 1;
+		unsigned				: 7;
+	};
+};
+
+union UPEAlphaModeConfReg
+{
+	u16 Hex;
+	struct 
+	{
+		unsigned Threshold		: 8;
+		unsigned CompareMode	: 8;
+	};
+};
+
+// Not sure about this reg...
+union UPEAlphaReadReg
+{
+	u16 Hex;
+	struct 
+	{
+		unsigned ReadMode		: 3;
+		unsigned				: 13;
+	};
+};
+
+// fifo Control Register
+union UPECtrlReg
+{
+	struct 
+	{
+		unsigned PETokenEnable	:	1;
+		unsigned PEFinishEnable	:	1;
+		unsigned PEToken		:	1; // write only
+		unsigned PEFinish		:	1; // write only
+		unsigned				:	12;
+	};
+	u16 Hex;
+	UPECtrlReg() {Hex = 0; }
+	UPECtrlReg(u16 _hex) {Hex = _hex; }
+};
+
+// STATE_TO_SAVE
+static UPEZConfReg			m_ZConf;
+static UPEAlphaConfReg		m_AlphaConf;
+static UPEDstAlphaConfReg	m_DstAlphaConf;
+static UPEAlphaModeConfReg	m_AlphaModeConf;
+static UPEAlphaReadReg		m_AlphaRead;
+static UPECtrlReg			m_Control;
+//static u16					m_Token; // token value most recently encountered
+
+static bool g_bSignalTokenInterrupt;
+static bool g_bSignalFinishInterrupt;
+
+static int et_SetTokenOnMainThread;
+static int et_SetFinishOnMainThread;
+
+u16 bbox[4];
+bool bbox_active;
+
+enum
+{
+    INT_CAUSE_PE_TOKEN    =  0x200, // GP Token
+    INT_CAUSE_PE_FINISH   =  0x400, // GP Finished
+};
+
+void DoState(PointerWrap &p)
+{
+	p.Do(m_ZConf);
+	p.Do(m_AlphaConf);
+	p.Do(m_DstAlphaConf);
+	p.Do(m_AlphaModeConf);
+	p.Do(m_AlphaRead);
+	p.Do(m_Control);
+	p.Do(CommandProcessor::fifo.PEToken);
+
+	p.Do(g_bSignalTokenInterrupt);
+	p.Do(g_bSignalFinishInterrupt);
+	
+	p.Do(bbox);
+	p.Do(bbox_active);
+}
+
+void UpdateInterrupts();
+
+void SetToken_OnMainThread(u64 userdata, int cyclesLate);
+void SetFinish_OnMainThread(u64 userdata, int cyclesLate);
+
+void Init()
+{
+	m_Control.Hex = 0;
+
+	et_SetTokenOnMainThread = g_VideoInitialize.pRegisterEvent("SetToken", SetToken_OnMainThread);
+	et_SetFinishOnMainThread = g_VideoInitialize.pRegisterEvent("SetFinish", SetFinish_OnMainThread);
+
+	bbox[0] = 0x80;
+	bbox[1] = 0xA0;
+	bbox[2] = 0x80;
+	bbox[3] = 0xA0;
+
+	bbox_active = false;
+}
+
+void Read16(u16& _uReturnValue, const u32 _iAddress)
+{
+	DEBUG_LOG(PIXELENGINE, "(r16) 0x%08x", _iAddress);
+
+	switch (_iAddress & 0xFFF)
+	{
+		// CPU Direct Access EFB Raster State Config
+	case PE_ZCONF:
+		_uReturnValue = m_ZConf.Hex;
+		INFO_LOG(PIXELENGINE, "(r16) ZCONF");
+		break;
+	case PE_ALPHACONF:
+		// Most games read this early. no idea why.
+		_uReturnValue = m_AlphaConf.Hex;
+		INFO_LOG(PIXELENGINE, "(r16) ALPHACONF");
+		break;
+	case PE_DSTALPHACONF:
+		_uReturnValue = m_DstAlphaConf.Hex;
+		INFO_LOG(PIXELENGINE, "(r16) DSTALPHACONF");
+		break;
+	case PE_ALPHAMODE:
+		_uReturnValue = m_AlphaModeConf.Hex;
+		INFO_LOG(PIXELENGINE, "(r16) ALPHAMODE");
+		break;	
+	case PE_ALPHAREAD:
+		_uReturnValue = m_AlphaRead.Hex;
+		WARN_LOG(PIXELENGINE, "(r16) ALPHAREAD");
+		break;
+
+	case PE_CTRL_REGISTER:
+		_uReturnValue = m_Control.Hex;
+		INFO_LOG(PIXELENGINE, "(r16) CTRL_REGISTER : %04x", _uReturnValue);
+		break;
+
+	case PE_TOKEN_REG:
+		_uReturnValue = CommandProcessor::fifo.PEToken;
+		INFO_LOG(PIXELENGINE, "(r16) TOKEN_REG : %04x", _uReturnValue);
+		break;
+
+		// The return values for these BBOX registers need to be gotten from the bounding box of the object. 
+		// See http://code.google.com/p/dolphin-emu/issues/detail?id=360#c74 for more details.
+
+	// 0x80, 0xa0, 0x80, 0xa0 makes Paper Mario happy.
+	case PE_BBOX_LEFT:   _uReturnValue = bbox[0]; INFO_LOG(PIXELENGINE, "R: BBOX_LEFT   = %i", bbox[0]); bbox_active = false; break;
+	case PE_BBOX_RIGHT:  _uReturnValue = bbox[1]; INFO_LOG(PIXELENGINE, "R: BBOX_RIGHT  = %i", bbox[1]); bbox_active = false; break;
+	case PE_BBOX_TOP:    _uReturnValue = bbox[2]; INFO_LOG(PIXELENGINE, "R: BBOX_TOP    = %i", bbox[2]); bbox_active = false; break;
+	case PE_BBOX_BOTTOM: _uReturnValue = bbox[3]; INFO_LOG(PIXELENGINE, "R: BBOX_BOTTOM = %i", bbox[3]); bbox_active = false; break;
+
+	case PE_PERF_0L:
+	case PE_PERF_0H:
+	case PE_PERF_1L:
+	case PE_PERF_1H:
+	case PE_PERF_2L:
+	case PE_PERF_2H:
+	case PE_PERF_3L:
+	case PE_PERF_3H:
+	case PE_PERF_4L:
+	case PE_PERF_4H:
+	case PE_PERF_5L:
+	case PE_PERF_5H:
+		INFO_LOG(PIXELENGINE, "(r16) perf counter @ %08x", _iAddress);
+		break;
+
+	default:
+		INFO_LOG(PIXELENGINE, "(r16) unknown @ %08x", _iAddress);
+		_uReturnValue = 1;
+		break;
+	}
+}
+
+void Write16(const u16 _iValue, const u32 _iAddress)
+{
+	switch (_iAddress & 0xFFF)
+	{
+		// CPU Direct Access EFB Raster State Config
+	case PE_ZCONF:
+		m_ZConf.Hex = _iValue;
+		INFO_LOG(PIXELENGINE, "(w16) ZCONF: %02x", _iValue);
+		break;
+	case PE_ALPHACONF:
+		m_AlphaConf.Hex = _iValue;
+		INFO_LOG(PIXELENGINE, "(w16) ALPHACONF: %02x", _iValue);
+		break;
+	case PE_DSTALPHACONF:
+		m_DstAlphaConf.Hex = _iValue;
+		INFO_LOG(PIXELENGINE, "(w16) DSTALPHACONF: %02x", _iValue);
+		break;
+	case PE_ALPHAMODE:
+		m_AlphaModeConf.Hex = _iValue;
+		INFO_LOG(PIXELENGINE, "(w16) ALPHAMODE: %02x", _iValue);
+		break;
+	case PE_ALPHAREAD:
+		m_AlphaRead.Hex = _iValue;
+		INFO_LOG(PIXELENGINE, "(w16) ALPHAREAD: %02x", _iValue);
+		break;
+
+	case PE_CTRL_REGISTER:	
+		{
+			UPECtrlReg tmpCtrl(_iValue);
+
+			if (tmpCtrl.PEToken)	g_bSignalTokenInterrupt = false;
+			if (tmpCtrl.PEFinish)	g_bSignalFinishInterrupt = false;
+
+			m_Control.PETokenEnable  = tmpCtrl.PETokenEnable;
+			m_Control.PEFinishEnable = tmpCtrl.PEFinishEnable;
+			m_Control.PEToken = 0;		// this flag is write only
+			m_Control.PEFinish = 0;		// this flag is write only
+
+			DEBUG_LOG(PIXELENGINE, "(w16) CTRL_REGISTER: 0x%04x", _iValue);
+			UpdateInterrupts();
+		}
+		break;
+
+	case PE_TOKEN_REG:
+		//LOG(PIXELENGINE,"WEIRD: program wrote token: %i",_iValue);
+		PanicAlert("(w16) WTF? PowerPC program wrote token: %i", _iValue);
+		//only the gx pipeline is supposed to be able to write here
+		//g_token = _iValue;
+		break;
+
+	default:
+		WARN_LOG(PIXELENGINE, "(w16) unknown %04x @ %08x", _iValue, _iAddress);
+		break;
+	}
+}
+
+void Write32(const u32 _iValue, const u32 _iAddress)
+{
+	WARN_LOG(PIXELENGINE, "(w32) 0x%08x @ 0x%08x IGNORING...",_iValue,_iAddress);
+}
+
+bool AllowIdleSkipping()
+{
+	return !g_VideoInitialize.bUseDualCore|| (!m_Control.PETokenEnable && !m_Control.PEFinishEnable);
+}
+
+void UpdateInterrupts()
+{
+	// check if there is a token-interrupt
+	if (g_bSignalTokenInterrupt	& m_Control.PETokenEnable)
+		g_VideoInitialize.pSetInterrupt(INT_CAUSE_PE_TOKEN, true);
+	else
+		g_VideoInitialize.pSetInterrupt(INT_CAUSE_PE_TOKEN, false);
+
+	// check if there is a finish-interrupt
+	if (g_bSignalFinishInterrupt & m_Control.PEFinishEnable)
+		g_VideoInitialize.pSetInterrupt(INT_CAUSE_PE_FINISH, true);
+	else
+		g_VideoInitialize.pSetInterrupt(INT_CAUSE_PE_FINISH, false);
+}
+
+// TODO(mb2): Refactor SetTokenINT_OnMainThread(u64 userdata, int cyclesLate).
+//			  Think about the right order between tokenVal and tokenINT... one day maybe.
+//			  Cleanup++
+
+// Called only if BPMEM_PE_TOKEN_INT_ID is ack by GP
+void SetToken_OnMainThread(u64 userdata, int cyclesLate)
+{
+	//if (userdata >> 16)
+	//{
+		g_bSignalTokenInterrupt = true;	
+		//_dbg_assert_msg_(PIXELENGINE, (CommandProcessor::fifo.PEToken == (userdata&0xFFFF)), "WTF? BPMEM_PE_TOKEN_INT_ID's token != BPMEM_PE_TOKEN_ID's token" );
+		INFO_LOG(PIXELENGINE, "VIDEO Plugin raises INT_CAUSE_PE_TOKEN (btw, token: %04x)", CommandProcessor::fifo.PEToken);
+		UpdateInterrupts();
+	//}
+	//else
+	//	LOGV(PIXELENGINE, 1, "VIDEO Plugin wrote token: %i", CommandProcessor::fifo.PEToken);
+}
+
+void SetFinish_OnMainThread(u64 userdata, int cyclesLate)
+{
+	g_bSignalFinishInterrupt = 1;	
+	UpdateInterrupts();
+}
+
+// SetToken
+// THIS IS EXECUTED FROM VIDEO THREAD
+void SetToken(const u16 _token, const int _bSetTokenAcknowledge)
+{
+	// TODO?: set-token-value and set-token-INT could be merged since set-token-INT own the token value.
+	if (_bSetTokenAcknowledge) // set token INT
+	{
+		// This seems smelly...
+		CommandProcessor::IncrementGPWDToken(); // for DC watchdog hack since PEToken seems to be a frame-finish too
+		g_VideoInitialize.pScheduleEvent_Threadsafe(
+			0, et_SetTokenOnMainThread, _token | (_bSetTokenAcknowledge << 16));
+	}
+	else // set token value
+	{
+		// we do it directly from videoThread because of
+		// Super Monkey Ball
+		// XXX: No 16-bit atomic store available, so cheat and use 32-bit.
+		// That's what we've always done. We're counting on fifo.PEToken to be
+		// 4-byte padded.
+        Common::AtomicStore(*(volatile u32*)&CommandProcessor::fifo.PEToken, _token);
+	}
+}
+
+// SetFinish
+// THIS IS EXECUTED FROM VIDEO THREAD (BPStructs.cpp) when a new frame has been drawn
+void SetFinish()
+{
+	CommandProcessor::IncrementGPWDToken(); // for DC watchdog hack
+	g_VideoInitialize.pScheduleEvent_Threadsafe(
+		0, et_SetFinishOnMainThread, 0);
+	INFO_LOG(PIXELENGINE, "VIDEO Set Finish");
+}
+
+} // end of namespace PixelEngine
diff --git a/Source/Core/Core/Src/HW/PixelEngine.h b/Source/Core/VideoCommon/Src/PixelEngine.h
similarity index 96%
rename from Source/Core/Core/Src/HW/PixelEngine.h
rename to Source/Core/VideoCommon/Src/PixelEngine.h
index 0043f16b8e..163b163e03 100644
--- a/Source/Core/Core/Src/HW/PixelEngine.h
+++ b/Source/Core/VideoCommon/Src/PixelEngine.h
@@ -1,78 +1,78 @@
-// Copyright (C) 2003 Dolphin Project.
-
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation, version 2.0.
-
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License 2.0 for more details.
-
-// A copy of the GPL 2.0 should have been included with the program.
-// If not, see http://www.gnu.org/licenses/
-
-// Official SVN repository and contact information can be found at
-// http://code.google.com/p/dolphin-emu/
-
-#ifndef _PIXELENGINE_H
-#define _PIXELENGINE_H
-
-#include "CommonTypes.h"
-class PointerWrap;
-
-// internal hardware addresses
-enum
-{
-	PE_ZCONF         = 0x00, // Z Config
-	PE_ALPHACONF     = 0x02, // Alpha Config
-	PE_DSTALPHACONF  = 0x04, // Destination Alpha Config
-	PE_ALPHAMODE     = 0x06, // Alpha Mode Config
-	PE_ALPHAREAD     = 0x08, // Alpha Read
-	PE_CTRL_REGISTER = 0x0a, // Control
-	PE_TOKEN_REG     = 0x0e, // Token
-	PE_BBOX_LEFT	 = 0x10, // Flip Left
-	PE_BBOX_RIGHT	 = 0x12, // Flip Right
-	PE_BBOX_TOP		 = 0x14, // Flip Top
-	PE_BBOX_BOTTOM	 = 0x16, // Flip Bottom
-
-	// These have not yet been RE:d. They are the perf counters.
-	PE_PERF_0L       = 0x18, 
-	PE_PERF_0H       = 0x1a, 
-	PE_PERF_1L       = 0x1c, 
-	PE_PERF_1H       = 0x1e, 
-	PE_PERF_2L       = 0x20, 
-	PE_PERF_2H       = 0x22, 
-	PE_PERF_3L       = 0x24, 
-	PE_PERF_3H       = 0x26, 
-	PE_PERF_4L       = 0x28, 
-	PE_PERF_4H       = 0x2a, 
-	PE_PERF_5L       = 0x2c, 
-	PE_PERF_5H       = 0x2e, 
-};
-
-namespace PixelEngine
-{
-
-void Init();
-void DoState(PointerWrap &p);
-
-// Read
-void Read16(u16& _uReturnValue, const u32 _iAddress);
-
-// Write
-void Write16(const u16 _iValue, const u32 _iAddress);
-void Write32(const u32 _iValue, const u32 _iAddress);
-
-// gfx plugin support
-void SetToken(const u16 _token, const int _bSetTokenAcknowledge);
-void SetFinish(void);
-bool AllowIdleSkipping();
-
-// Bounding box functionality. Paper Mario (both) are a couple of the few games that use it.
-extern u16 bbox[4];
-extern bool bbox_active;
-
-} // end of namespace PixelEngine
-
-#endif
+// Copyright (C) 2003 Dolphin Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official SVN repository and contact information can be found at
+// http://code.google.com/p/dolphin-emu/
+
+#ifndef _PIXELENGINE_H
+#define _PIXELENGINE_H
+
+#include "CommonTypes.h"
+class PointerWrap;
+
+// internal hardware addresses
+enum
+{
+	PE_ZCONF         = 0x00, // Z Config
+	PE_ALPHACONF     = 0x02, // Alpha Config
+	PE_DSTALPHACONF  = 0x04, // Destination Alpha Config
+	PE_ALPHAMODE     = 0x06, // Alpha Mode Config
+	PE_ALPHAREAD     = 0x08, // Alpha Read
+	PE_CTRL_REGISTER = 0x0a, // Control
+	PE_TOKEN_REG     = 0x0e, // Token
+	PE_BBOX_LEFT	 = 0x10, // Flip Left
+	PE_BBOX_RIGHT	 = 0x12, // Flip Right
+	PE_BBOX_TOP		 = 0x14, // Flip Top
+	PE_BBOX_BOTTOM	 = 0x16, // Flip Bottom
+
+	// These have not yet been RE:d. They are the perf counters.
+	PE_PERF_0L       = 0x18, 
+	PE_PERF_0H       = 0x1a, 
+	PE_PERF_1L       = 0x1c, 
+	PE_PERF_1H       = 0x1e, 
+	PE_PERF_2L       = 0x20, 
+	PE_PERF_2H       = 0x22, 
+	PE_PERF_3L       = 0x24, 
+	PE_PERF_3H       = 0x26, 
+	PE_PERF_4L       = 0x28, 
+	PE_PERF_4H       = 0x2a, 
+	PE_PERF_5L       = 0x2c, 
+	PE_PERF_5H       = 0x2e, 
+};
+
+namespace PixelEngine
+{
+
+void Init();
+void DoState(PointerWrap &p);
+
+// Read
+void Read16(u16& _uReturnValue, const u32 _iAddress);
+
+// Write
+void Write16(const u16 _iValue, const u32 _iAddress);
+void Write32(const u32 _iValue, const u32 _iAddress);
+
+// gfx plugin support
+void SetToken(const u16 _token, const int _bSetTokenAcknowledge);
+void SetFinish(void);
+bool AllowIdleSkipping();
+
+// Bounding box functionality. Paper Mario (both) are a couple of the few games that use it.
+extern u16 bbox[4];
+extern bool bbox_active;
+
+} // end of namespace PixelEngine
+
+#endif
diff --git a/Source/Core/VideoCommon/Src/SConscript b/Source/Core/VideoCommon/Src/SConscript
index c8615c5ed5..1e1b896627 100644
--- a/Source/Core/VideoCommon/Src/SConscript
+++ b/Source/Core/VideoCommon/Src/SConscript
@@ -10,6 +10,8 @@ files = [
 	'XFMemory.cpp',
 	'XFStructs.cpp',
 	'BPStructs.cpp',
+	'CommandProcessor.cpp',
+	'PixelEngine.cpp',
 	'memcpy_amd.cpp',
 	'OpcodeDecoding.cpp',
 	'TextureDecoder.cpp',
diff --git a/Source/Core/VideoCommon/Src/TextureDecoder.cpp b/Source/Core/VideoCommon/Src/TextureDecoder.cpp
index 302f1c6704..9c09ad37cb 100644
--- a/Source/Core/VideoCommon/Src/TextureDecoder.cpp
+++ b/Source/Core/VideoCommon/Src/TextureDecoder.cpp
@@ -325,6 +325,11 @@ inline u32 makecol(int r, int g, int b, int a)
     return (a << 24)|(r << 16)|(g << 8)|b;
 }
 
+inline u32 makeRGBA(int r, int g, int b, int a)
+{
+    return (a<<24)|(b<<16)|(g<<8)|r;
+}
+
 void decodeDXTBlock(u32 *dst, const DXTBlock *src, int pitch)
 {
 	// S3TC Decoder (Note: GCN decodes differently from PC so we can't use native support)
@@ -664,6 +669,338 @@ PC_TexFormat TexDecoder_Decode(u8 *dst, const u8 *src, int width, int height, in
 	return retval;
 }
 
+
+inline u32 decode565RGBA(u16 val)
+{
+    int r,g,b,a;
+    r=Convert5To8((val>>11) & 0x1f);
+    g=Convert6To8((val>>5 ) & 0x3f);
+    b=Convert5To8((val    ) & 0x1f);
+    a=0xFF;
+    return  r | (g<<8) | (b << 16) | (a << 24);
+}
+
+inline u32 decodeIA8Swapped(u16 val)
+{
+    int a = val & 0xFF;
+    int i = val >> 8;
+    return i | (i<<8) | (i<<16) | (a<<24);
+}
+
+inline u32 decode5A3RGBA(u16 val)
+{
+    int r,g,b,a;
+    if ((val&0x8000))
+    {
+        r=Convert5To8((val>>10) & 0x1f);
+        g=Convert5To8((val>>5 ) & 0x1f);
+        b=Convert5To8((val    ) & 0x1f);
+        a=0xFF;
+	}
+    else
+    {
+        a=Convert3To8((val>>12) & 0x7);
+        r=Convert4To8((val>>8 ) & 0xf);
+        g=Convert4To8((val>>4 ) & 0xf);
+        b=Convert4To8((val    ) & 0xf);
+    }
+    return r | (g<<8) | (b << 16) | (a << 24);
+}
+
+
+void TexDecoder_DecodeTexel(u8 *dst, const u8 *src, int s, int t, int imageWidth, int texformat, int tlutaddr, int tlutfmt)
+{
+    /* General formula for computing texture offset
+    // 
+    u16 sBlk = s / blockWidth;
+    u16 tBlk = t / blockHeight;
+    u16 widthBlks = (width / blockWidth) + 1;
+    u32 base = (tBlk * widthBlks + sBlk) * blockWidth * blockHeight;
+    u16 blkS = s & (blockWidth - 1);
+    u16 blkT =  t & (blockHeight - 1);
+    u32 blkOff = blkT * blockWidth + blkS;    
+    */
+
+    switch (texformat)
+    {
+    case GX_TF_C4:
+        {
+		    u16 sBlk = s >> 3;
+            u16 tBlk = t >> 3;
+            u16 widthBlks = (imageWidth >> 3) + 1;
+            u32 base = (tBlk * widthBlks + sBlk) << 5;
+            u16 blkS = s & 7;
+            u16 blkT =  t & 7;
+            u32 blkOff = (blkT << 3) + blkS;
+            
+            int rs = (blkOff & 1)?0:4;
+            u32 offset = base + (blkOff >> 1);            
+
+            u8 val = (*(src + offset) >> rs) & 0xF;
+            u16 *tlut = (u16*)(texMem + tlutaddr);
+
+            switch (tlutfmt)
+            {
+            case 0:
+                *((u32*)dst) = decodeIA8Swapped(tlut[val]);
+                break;
+            case 1:                
+                *((u32*)dst) = decode565RGBA(Common::swap16(tlut[val]));
+                break;
+            case 2:
+                *((u32*)dst) = decode5A3RGBA(Common::swap16(tlut[val]));
+                break;
+            }
+        }
+        break;
+    case GX_TF_I4:
+		{
+			u16 sBlk = s >> 3;
+            u16 tBlk = t >> 3;
+            u16 widthBlks = (imageWidth >> 3) + 1;
+            u32 base = (tBlk * widthBlks + sBlk) << 5;
+            u16 blkS = s & 7;
+            u16 blkT =  t & 7;
+            u32 blkOff = (blkT << 3) + blkS;
+            
+            int rs = (blkOff & 1)?0:4;
+            u32 offset = base + (blkOff >> 1);            
+
+            u8 val = (*(src + offset) >> rs) & 0xF;
+            val = Convert4To8(val);
+			dst[0] = val;
+            dst[1] = val;
+            dst[2] = val;
+            dst[3] = val;
+        }
+       break;
+	case GX_TF_I8:
+		{
+            u16 sBlk = s >> 3;
+            u16 tBlk = t >> 2;
+            u16 widthBlks = (imageWidth >> 3) + 1;
+            u32 base = (tBlk * widthBlks + sBlk) << 5;
+            u16 blkS = s & 7;
+            u16 blkT =  t & 3;
+            u32 blkOff = (blkT << 3) + blkS;
+            
+            u8 val = *(src + base + blkOff);
+            dst[0] = val;
+            dst[1] = val;
+            dst[2] = val;
+            dst[3] = val;
+		}
+		break;
+    case GX_TF_C8:
+        {
+		    u16 sBlk = s >> 3;
+            u16 tBlk = t >> 2;
+            u16 widthBlks = (imageWidth >> 3) + 1;
+            u32 base = (tBlk * widthBlks + sBlk) << 5;
+            u16 blkS = s & 7;
+            u16 blkT =  t & 3;
+            u32 blkOff = (blkT << 3) + blkS;
+            
+            u8 val = *(src + base + blkOff);
+            u16 *tlut = (u16*)(texMem + tlutaddr);
+
+            switch (tlutfmt)
+            {
+            case 0:
+                *((u32*)dst) = decodeIA8Swapped(tlut[val]);
+                break;
+            case 1:                
+                *((u32*)dst) = decode565RGBA(Common::swap16(tlut[val]));
+                break;
+            case 2:
+                *((u32*)dst) = decode5A3RGBA(Common::swap16(tlut[val]));
+                break;
+            }
+        }
+        break;
+    case GX_TF_IA4:
+        {
+            u16 sBlk = s >> 3;
+            u16 tBlk = t >> 2;
+            u16 widthBlks = (imageWidth >> 3) + 1;
+            u32 base = (tBlk * widthBlks + sBlk) << 5;
+            u16 blkS = s & 7;
+            u16 blkT =  t & 3;
+            u32 blkOff = (blkT << 3) + blkS;
+            
+            u8 val = *(src + base + blkOff);
+            const u8 a = Convert4To8(val>>4);
+            const u8 l = Convert4To8(val&0xF);
+            dst[0] = l;
+            dst[1] = l;
+            dst[2] = l;
+            dst[3] = a;
+        }
+		break;
+    case GX_TF_IA8:
+        {
+			u16 sBlk = s >> 2;
+            u16 tBlk = t >> 2;
+            u16 widthBlks = (imageWidth >> 2) + 1;
+            u32 base = (tBlk * widthBlks + sBlk) << 4;
+            u16 blkS = s & 3;
+            u16 blkT =  t & 3;
+            u32 blkOff = (blkT << 2) + blkS;
+            
+            u32 offset = (base + blkOff) << 1;
+            const u16* valAddr = (u16*)(src + offset);
+
+            *((u32*)dst) = decodeIA8Swapped(*valAddr);
+        }
+		break;
+    case GX_TF_C14X2: 
+        {
+            u16 sBlk = s >> 2;
+            u16 tBlk = t >> 2;
+            u16 widthBlks = (imageWidth >> 2) + 1;
+            u32 base = (tBlk * widthBlks + sBlk) << 4;
+            u16 blkS = s & 3;
+            u16 blkT =  t & 3;
+            u32 blkOff = (blkT << 2) + blkS;
+            
+            u32 offset = (base + blkOff) << 1;
+            const u16* valAddr = (u16*)(src + offset);
+
+            u16 val = Common::swap16(*valAddr) & 0x3FFF;
+            u16 *tlut = (u16*)(texMem + tlutaddr);
+
+            switch (tlutfmt)
+            {
+            case 0:
+                *((u32*)dst) = decodeIA8Swapped(tlut[val]);
+                break;
+            case 1:                
+                *((u32*)dst) = decode565RGBA(Common::swap16(tlut[val]));
+                break;
+            case 2:
+                *((u32*)dst) = decode5A3RGBA(Common::swap16(tlut[val]));
+                break;
+            }
+        }
+		break;
+    case GX_TF_RGB565:
+		{
+			u16 sBlk = s >> 2;
+            u16 tBlk = t >> 2;
+            u16 widthBlks = (imageWidth >> 2) + 1;
+            u32 base = (tBlk * widthBlks + sBlk) << 4;
+            u16 blkS = s & 3;
+            u16 blkT =  t & 3;
+            u32 blkOff = (blkT << 2) + blkS;
+            
+            u32 offset = (base + blkOff) << 1;
+            const u16* valAddr = (u16*)(src + offset);
+
+            *((u32*)dst) = decode565RGBA(Common::swap16(*valAddr));
+		}
+		break;
+    case GX_TF_RGB5A3:
+        {
+            u16 sBlk = s >> 2;
+            u16 tBlk = t >> 2;
+            u16 widthBlks = (imageWidth >> 2) + 1;
+            u32 base = (tBlk * widthBlks + sBlk) << 4;
+            u16 blkS = s & 3;
+            u16 blkT =  t & 3;
+            u32 blkOff = (blkT << 2) + blkS;
+
+            u32 offset = (base + blkOff) << 1;
+            const u16* valAddr = (u16*)(src + offset);
+
+            *((u32*)dst) = decode5A3RGBA(Common::swap16(*valAddr));
+        }
+        break;
+    case GX_TF_RGBA8:
+        {
+			u16 sBlk = s >> 2;
+            u16 tBlk = t >> 2;
+            u16 widthBlks = (imageWidth >> 2) + 1;
+            u32 base = (tBlk * widthBlks + sBlk) << 5; // shift by 5 is correct
+            u16 blkS = s & 3;
+            u16 blkT =  t & 3;
+            u32 blkOff = (blkT << 2) + blkS;
+
+            u32 offset = (base + blkOff) << 1 ;
+            const u8* valAddr = src + offset;
+
+            dst[3] = valAddr[0];
+            dst[0] = valAddr[1];
+            dst[1] = valAddr[32];
+            dst[2] = valAddr[33];
+        }
+        break;
+    case GX_TF_CMPR:
+		{
+            u16 sDxt = s >> 2;
+            u16 tDxt = t >> 2;
+
+            u16 sBlk = sDxt >> 1;
+            u16 tBlk = tDxt >> 1;
+            u16 widthBlks = (imageWidth >> 3) + 1;
+            u32 base = (tBlk * widthBlks + sBlk) << 2;
+            u16 blkS = sDxt & 1;
+            u16 blkT =  tDxt & 1;
+            u32 blkOff = (blkT << 1) + blkS;
+
+            u32 offset = (base + blkOff) << 3;
+
+            const DXTBlock* dxtBlock = (const DXTBlock*)(src + offset);
+	            
+            u16 c1 = Common::swap16(dxtBlock->color1);
+            u16 c2 = Common::swap16(dxtBlock->color2);
+            int blue1 = Convert5To8(c1 & 0x1F);
+            int blue2 = Convert5To8(c2 & 0x1F);
+            int green1 = Convert6To8((c1 >> 5) & 0x3F);
+            int green2 = Convert6To8((c2 >> 5) & 0x3F);
+            int red1 = Convert5To8((c1 >> 11) & 0x1F);
+            int red2 = Convert5To8((c2 >> 11) & 0x1F);
+
+            u16 ss = s & 3;
+            u16 tt = t & 3;
+
+            int colorSel = dxtBlock->lines[tt];
+            int rs = 6 - (ss << 1);
+            colorSel = (colorSel >> rs) & 3;
+            colorSel |= c1 > c2?0:4;
+
+            u32 color = 0;
+            
+            switch (colorSel)
+            {
+                case 0:
+                case 4:
+                    color = makeRGBA(red1, green1, blue1, 255);
+                    break;
+                case 1:
+                case 5:
+                    color = makeRGBA(red2, green2, blue2, 255);
+                    break;
+                case 2:
+                    color = makeRGBA(red1+(red2-red1)/3, green1+(green2-green1)/3, blue1+(blue2-blue1)/3, 255);
+                    break;
+                case 3:
+                    color = makeRGBA(red2+(red1-red2)/3, green2+(green1-green2)/3, blue2+(blue1-blue2)/3, 255);
+                    break;
+                case 6:
+                    color = makeRGBA((int)ceil((float)(red1+red2)/2), (int)ceil((float)(green1+green2)/2), (int)ceil((float)(blue1+blue2)/2), 255);
+                    break;
+                case 7:
+                    color = makeRGBA(red2, green2, blue2, 0);
+                    break;
+            }
+
+            *((u32*)dst) = color;
+		}
+        break;
+    }
+}
+
+
 const char* texfmt[] = {
 	// pixel
 	"I4",		"I8",		"IA4",		"IA8",
diff --git a/Source/Core/VideoCommon/Src/TextureDecoder.h b/Source/Core/VideoCommon/Src/TextureDecoder.h
index 68f4c56fa6..33ba57ee2f 100644
--- a/Source/Core/VideoCommon/Src/TextureDecoder.h
+++ b/Source/Core/VideoCommon/Src/TextureDecoder.h
@@ -85,6 +85,8 @@ enum PC_TexFormat
 
 PC_TexFormat TexDecoder_Decode(u8 *dst, const u8 *src, int width, int height, int texformat, int tlutaddr, int tlutfmt);
 
+void TexDecoder_DecodeTexel(u8 *dst, const u8 *src, int s, int t, int imageWidth, int texformat, int tlutaddr, int tlutfmt);
+
 u32 TexDecoder_GetSafeTextureHash(const u8 *src, int width, int height, int texformat, u32 seed=0);
 u32 TexDecoder_GetTlutHash(const u8* src, int len);
 
diff --git a/Source/Core/VideoCommon/Src/VertexLoader.cpp b/Source/Core/VideoCommon/Src/VertexLoader.cpp
index 45b0e8eb70..70879ee586 100644
--- a/Source/Core/VideoCommon/Src/VertexLoader.cpp
+++ b/Source/Core/VideoCommon/Src/VertexLoader.cpp
@@ -25,6 +25,7 @@
 #include "StringUtil.h"
 #include "x64Emitter.h"
 #include "ABI.h"
+#include "PixelEngine.h"
 
 #include "LookUpTables.h"
 #include "Statistics.h"
@@ -89,7 +90,7 @@ void LOADERDECL PosMtx_Write()
 
 void LOADERDECL UpdateBoundingBox() 
 {
-	if (!*g_VideoInitialize.pBBoxActive)
+    if (!PixelEngine::bbox_active)
 		return;
 
 	// Truly evil hack, reading backwards from the write pointer. If we were writing to write-only
@@ -125,10 +126,10 @@ void LOADERDECL UpdateBoundingBox()
 	o[0] = (o[0] + 1.0f) * 320.0f;
 	o[1] = (o[1] + 1.0f) * 240.0f;
 	
-	if (o[0] < g_VideoInitialize.pBBox[0]) g_VideoInitialize.pBBox[0] = (u16)std::max(0.0f, o[0]);
-	if (o[0] > g_VideoInitialize.pBBox[1]) g_VideoInitialize.pBBox[1] = (u16)std::min(640.0f, o[0]);
-	if (o[1] < g_VideoInitialize.pBBox[2]) g_VideoInitialize.pBBox[2] = (u16)std::max(0.0f, o[1]);
-	if (o[1] > g_VideoInitialize.pBBox[3]) g_VideoInitialize.pBBox[3] = (u16)std::min(480.0f, o[1]);
+    if (o[0] < PixelEngine::bbox[0]) PixelEngine::bbox[0] = (u16)std::max(0.0f, o[0]);
+	if (o[0] > PixelEngine::bbox[1]) PixelEngine::bbox[1] = (u16)std::min(640.0f, o[0]);
+	if (o[1] < PixelEngine::bbox[2]) PixelEngine::bbox[2] = (u16)std::max(0.0f, o[1]);
+	if (o[1] > PixelEngine::bbox[3]) PixelEngine::bbox[3] = (u16)std::min(480.0f, o[1]);
 	/*
 	if (GetAsyncKeyState(VK_LSHIFT)) {
 		ERROR_LOG(VIDEO, "XForm: %f %f %f to %f %f", p[0], p[1], p[2], o[0], o[1]);
diff --git a/Source/Core/VideoCommon/Src/VideoState.cpp b/Source/Core/VideoCommon/Src/VideoState.cpp
index 6fdf27afc0..b7ca4c00c1 100644
--- a/Source/Core/VideoCommon/Src/VideoState.cpp
+++ b/Source/Core/VideoCommon/Src/VideoState.cpp
@@ -22,6 +22,8 @@
 #include "XFMemory.h"
 #include "TextureDecoder.h"
 #include "Fifo.h"
+#include "CommandProcessor.h"
+#include "PixelEngine.h"
 
 static void DoState(PointerWrap &p)
 {
@@ -43,6 +45,9 @@ static void DoState(PointerWrap &p)
  
     // FIFO
     Fifo_DoState(p);
+
+    CommandProcessor::DoState(p);
+    PixelEngine::DoState(p);
 }
 
 void VideoCommon_DoState(PointerWrap &p)
diff --git a/Source/Core/VideoCommon/VideoCommon.vcproj b/Source/Core/VideoCommon/VideoCommon.vcproj
index 0ca8d95d0a..a74ddf1217 100644
--- a/Source/Core/VideoCommon/VideoCommon.vcproj
+++ b/Source/Core/VideoCommon/VideoCommon.vcproj
@@ -701,6 +701,14 @@
 				</File>
 			</Filter>
 		</Filter>
+		<File
+			RelativePath=".\Src\CommandProcessor.cpp"
+			>
+		</File>
+		<File
+			RelativePath=".\Src\CommandProcessor.h"
+			>
+		</File>
 		<File
 			RelativePath=".\Src\GlobalControl.cpp"
 			>
@@ -717,6 +725,14 @@
 			RelativePath=".\Src\NativeVertexFormat.h"
 			>
 		</File>
+		<File
+			RelativePath=".\Src\PixelEngine.cpp"
+			>
+		</File>
+		<File
+			RelativePath=".\Src\PixelEngine.h"
+			>
+		</File>
 		<File
 			RelativePath=".\Src\Render.h"
 			>
diff --git a/Source/PluginSpecs/pluginspecs_video.h b/Source/PluginSpecs/pluginspecs_video.h
index 4315384034..a1d0490c41 100644
--- a/Source/PluginSpecs/pluginspecs_video.h
+++ b/Source/PluginSpecs/pluginspecs_video.h
@@ -9,19 +9,19 @@
 
 #include "ExportProlog.h"
 
+typedef void (*TimedCallback)(u64 userdata, int cyclesLate);
 
-typedef void			(*TSetPEToken)(const unsigned short _token, const int _bSetTokenAcknowledge);
-typedef void			(*TSetPEFinish)(void);
+typedef void            (*TSetInterrupt)(u32 _causemask, bool _bSet);
+typedef int             (*TRegisterEvent)(const char *name, TimedCallback callback);
+typedef void            (*TScheduleEvent_Threadsafe)(int cyclesIntoFuture, int event_type, u64 userdata);
 typedef unsigned char*	(*TGetMemoryPointer)(const unsigned int  _iAddress);
 typedef void			(*TVideoLog)(const char* _pMessage, int _bBreak);
 typedef void			(*TSysMessage)(const char *fmt, ...);
 typedef void			(*TRequestWindowSize)(int _iWidth, int _iHeight, bool _bFullscreen);
 typedef void			(*TCopiedToXFB)(bool video_update);
 typedef unsigned int	(*TPeekMessages)(void);
-typedef void			(*TUpdateInterrupts)(void);
 typedef void			(*TUpdateFPSDisplay)(const char* text); // sets the window title
 typedef void			(*TKeyPressed)(int keycode, bool shift, bool control); // sets the window title
-typedef void            (*TSetFifoIdle)();
 
 enum FieldType
 {
@@ -71,26 +71,23 @@ typedef struct
 {
 	void *pWindowHandle;
 
-	TSetPEToken						pSetPEToken;
-	TSetPEFinish					pSetPEFinish;
+    TSetInterrupt                   pSetInterrupt;
+    TRegisterEvent                  pRegisterEvent;
+    TScheduleEvent_Threadsafe       pScheduleEvent_Threadsafe;
 	TGetMemoryPointer				pGetMemoryPointer;
 	TVideoLog						pLog;
 	TSysMessage						pSysMessage;
 	TRequestWindowSize              pRequestWindowSize;
 	TCopiedToXFB					pCopiedToXFB;
 	TPeekMessages					pPeekMessages;
-	TUpdateInterrupts               pUpdateInterrupts;
     TUpdateFPSDisplay               pUpdateFPSDisplay;
 	TKeyPressed                     pKeyPress;
-	TSetFifoIdle                    pSetFifoIdle;
-	SCPFifoStruct                   *pCPFifo;
 	void *pMemoryBase;
 	bool bWii;
 	bool bUseDualCore;
-
-	unsigned short                  *pBBox;  // points to four shorts: left, top, right, bottom
-	// TODO:
-	bool                            *pBBoxActive;  // we guess that after a bbox reset, we only need to track bbox size until the corresponding read.
+    u32 *Fifo_CPUBase;
+    u32 *Fifo_CPUEnd;
+    u32 *Fifo_CPUWritePointer;
 
 } SVideoInitialize;
 
@@ -108,14 +105,6 @@ typedef struct
 //
 EXPORT void CALL Video_Prepare(void);
 
-// __________________________________________________________________________________________________
-// Function: Video_SendFifoData
-// Purpose:  This function is called to submit fifo data directly - only single core mode calls this.
-// input:    u8 *_uData, u32 len - a block of fifo data.
-// output:   none
-//
-EXPORT void CALL Video_SendFifoData(u8* _uData, u32 len);
-
 // __________________________________________________________________________________________________
 // Function: Video_BeginField
 // Purpose:  When a field begins in the VI emulator, this function tells the video plugin what the
@@ -181,5 +170,13 @@ EXPORT void CALL Video_SetRendering(bool bEnabled);
 //
 EXPORT void CALL Video_AddMessage(const char* pstr, unsigned int milliseconds);
 
+EXPORT void CALL Video_CommandProcessorRead16(u16& _rReturnValue, const u32 _Address);
+EXPORT void CALL Video_CommandProcessorWrite16(const u16 _Data, const u32 _Address);
+EXPORT void CALL Video_PixelEngineRead16(u16& _rReturnValue, const u32 _Address);
+EXPORT void CALL Video_PixelEngineWrite16(const u16 _Data, const u32 _Address);
+EXPORT void CALL Video_PixelEngineWrite32(const u32 _Data, const u32 _Address);
+EXPORT void CALL Video_GatherPipeBursted(void);
+EXPORT void CALL Video_WaitForFrameFinish(void);
+
 #include "ExportEpilog.h"
 #endif
diff --git a/Source/Plugins/Plugin_VideoDX9/Src/main.cpp b/Source/Plugins/Plugin_VideoDX9/Src/main.cpp
index 7c508e6e0f..fc745ac536 100644
--- a/Source/Plugins/Plugin_VideoDX9/Src/main.cpp
+++ b/Source/Plugins/Plugin_VideoDX9/Src/main.cpp
@@ -45,6 +45,8 @@ GFXDebuggerDX9 *m_DebuggerFrame = NULL;
 #include "PixelShaderManager.h"
 #include "VertexShaderCache.h"
 #include "PixelShaderCache.h"
+#include "CommandProcessor.h"
+#include "PixelEngine.h"
 #include "OnScreenDisplay.h"
 #include "DlgSettings.h"
 #include "D3DTexture.h"
@@ -271,6 +273,8 @@ void Video_Prepare()
 	VertexShaderManager::Init();
 	PixelShaderCache::Init();
 	PixelShaderManager::Init();
+    CommandProcessor::Init();
+    PixelEngine::Init();
 }
 
 void Shutdown()
@@ -314,11 +318,6 @@ void Video_SetRendering(bool bEnabled) {
 	Fifo_SetRendering(bEnabled);
 }
 
-void Video_SendFifoData(u8* _uData, u32 len)
-{
-	Fifo_SendFifoData(_uData, len);
-}
-
 // Run from the graphics thread
 void VideoFifo_CheckSwapRequest()
 {
@@ -461,3 +460,39 @@ u32 Video_AccessEFB(EFBAccessType type, u32 x, u32 y)
 
 	return s_AccessEFBResult;
 }
+
+
+void Video_CommandProcessorRead16(u16& _rReturnValue, const u32 _Address)
+{
+    CommandProcessor::Read16(_rReturnValue, _Address);
+}
+
+void Video_CommandProcessorWrite16(const u16 _Data, const u32 _Address)
+{
+    CommandProcessor::Write16(_Data, _Address);
+}
+
+void Video_PixelEngineRead16(u16& _rReturnValue, const u32 _Address)
+{
+    PixelEngine::Read16(_rReturnValue, _Address);
+}
+
+void Video_PixelEngineWrite16(const u16 _Data, const u32 _Address)
+{
+    PixelEngine::Write16(_Data, _Address);
+}
+
+void Video_PixelEngineWrite32(const u32 _Data, const u32 _Address)
+{
+    PixelEngine::Write32(_Data, _Address);
+}
+
+inline void Video_GatherPipeBursted(void)
+{
+    CommandProcessor::GatherPipeBursted();
+}
+
+void Video_WaitForFrameFinish(void)
+{
+    CommandProcessor::WaitForFrameFinish();
+}
diff --git a/Source/Plugins/Plugin_VideoOGL/Src/main.cpp b/Source/Plugins/Plugin_VideoOGL/Src/main.cpp
index f45d5110a4..1597179b30 100644
--- a/Source/Plugins/Plugin_VideoOGL/Src/main.cpp
+++ b/Source/Plugins/Plugin_VideoOGL/Src/main.cpp
@@ -85,6 +85,8 @@ GFXDebuggerOGL *m_DebuggerFrame = NULL;
 #include "VertexShaderManager.h"
 #include "XFB.h"
 #include "XFBConvert.h"
+#include "CommandProcessor.h"
+#include "PixelEngine.h"
 #include "TextureConverter.h"
 #include "PostProcessing.h"
 #include "OnScreenDisplay.h"
@@ -377,6 +379,9 @@ void Video_Prepare(void)
         exit(1);
     }
 
+    CommandProcessor::Init();
+    PixelEngine::Init();
+
     TextureMngr::Init();
 
     BPInit();
@@ -425,11 +430,6 @@ void Shutdown(void)
 	OpenGL_Shutdown();
 }
 
-void Video_SendFifoData(u8* _uData, u32 len)
-{
-	Fifo_SendFifoData(_uData, len);
-}
-
 // Enter and exit the video loop
 void Video_EnterLoop()
 {
@@ -579,3 +579,39 @@ u32 Video_AccessEFB(EFBAccessType type, u32 x, u32 y)
 	return 0;
 }
 
+void Video_CommandProcessorRead16(u16& _rReturnValue, const u32 _Address)
+{
+    CommandProcessor::Read16(_rReturnValue, _Address);
+}
+
+void Video_CommandProcessorWrite16(const u16 _Data, const u32 _Address)
+{
+    CommandProcessor::Write16(_Data, _Address);
+}
+
+void Video_PixelEngineRead16(u16& _rReturnValue, const u32 _Address)
+{
+    PixelEngine::Read16(_rReturnValue, _Address);
+}
+
+void Video_PixelEngineWrite16(const u16 _Data, const u32 _Address)
+{
+    PixelEngine::Write16(_Data, _Address);
+}
+
+void Video_PixelEngineWrite32(const u32 _Data, const u32 _Address)
+{
+    PixelEngine::Write32(_Data, _Address);
+}
+
+inline void Video_GatherPipeBursted(void)
+{
+    CommandProcessor::GatherPipeBursted();
+}
+
+void Video_WaitForFrameFinish(void)
+{
+    CommandProcessor::WaitForFrameFinish();
+}
+
+