From 5c58227702d1783fe3b67de4c8a69932cea1d82d Mon Sep 17 00:00:00 2001
From: hrydgard <hrydgard@gmail.com>
Date: Sun, 23 Nov 2008 17:46:14 +0000
Subject: [PATCH] Optimize vertex loader with a mini JIT (only first step, more
 optimizations may follow). Some various error message and warning fixes.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@1276 8ced0084-cf51-0410-be5f-012b33b47a6e
---
 Source/Core/Common/Src/ABI.cpp                |  38 ++++-
 Source/Core/Common/Src/ABI.h                  |   5 +
 Source/Core/Common/Src/x64Emitter.cpp         |  37 -----
 Source/Core/Common/Src/x64Emitter.h           |  10 +-
 .../Core/Core/Src/HW/PeripheralInterface.cpp  |   9 +-
 .../Core/VideoCommon/Src/NativeVertexFormat.h |   2 +-
 Source/Core/VideoCommon/Src/Statistics.h      |  10 +-
 Source/Plugins/Plugin_VideoDX9/Src/Render.cpp |   2 +-
 .../Plugin_VideoDX9/Src/VertexManager.cpp     |   4 +-
 .../Plugins/Plugin_VideoOGL/Src/BPStructs.cpp |   4 +-
 .../Src/NativeVertexFormat.cpp                |   4 +-
 Source/Plugins/Plugin_VideoOGL/Src/Render.cpp |   3 +
 .../Plugin_VideoOGL/Src/VertexLoader.cpp      | 147 +++++++++++-------
 .../Plugin_VideoOGL/Src/VertexLoader.h        |   2 +
 .../Plugin_VideoOGL/Src/VertexLoader_Color.h  |  36 ++---
 .../Src/VertexLoader_Normal.cpp               |  48 +++---
 .../Plugin_VideoOGL/Src/VertexLoader_Normal.h |  48 +++---
 .../Src/VertexLoader_Position.h               |  45 ++----
 .../Src/VertexLoader_TextCoord.h              |  62 ++++----
 .../Plugin_VideoOGL/Src/VertexManager.cpp     |   9 +-
 20 files changed, 274 insertions(+), 251 deletions(-)

diff --git a/Source/Core/Common/Src/ABI.cpp b/Source/Core/Common/Src/ABI.cpp
index 097d6fc45b..2e29e2bc6c 100644
--- a/Source/Core/Common/Src/ABI.cpp
+++ b/Source/Core/Common/Src/ABI.cpp
@@ -4,6 +4,40 @@
 
 using namespace Gen;
 
+// Shared code between Win64 and Unix64
+// ====================================
+
+// Sets up a __cdecl function.
+void ABI_EmitPrologue(int maxCallParams)
+{
+#ifdef _M_IX86
+	// Don't really need to do anything
+#elif defined(_M_X64)
+#if _WIN32
+	int stacksize = ((maxCallParams + 1) & ~1)*8 + 8;
+	// Set up a stack frame so that we can call functions
+	// TODO: use maxCallParams
+    SUB(64, R(RSP), Imm8(stacksize));
+#endif
+#else
+#error Arch not supported
+#endif
+}
+void ABI_EmitEpilogue(int maxCallParams)
+{
+#ifdef _M_IX86
+	RET();
+#elif defined(_M_X64)
+#ifdef _WIN32
+	int stacksize = ((maxCallParams+1)&~1)*8 + 8;
+	ADD(64, R(RSP), Imm8(stacksize));
+#endif
+	RET();
+#else
+#error Arch not supported
+#endif
+}
+
 #ifdef _M_IX86 // All32
 
 // Shared code between Win32 and Unix32
@@ -76,6 +110,7 @@ unsigned int ABI_GetAlignedFrameSize(unsigned int frameSize) {
 	return alignedSize;
 }
 
+
 void ABI_AlignStack(unsigned int frameSize) {
 // Mac OS X requires the stack to be 16-byte aligned before every call.
 // Linux requires the stack to be 16-byte aligned before calls that put SSE
@@ -103,9 +138,6 @@ void ABI_RestoreStack(unsigned int frameSize) {
 
 #else
 
-// Shared code between Win64 and Unix64
-// ====================================
-
 void ABI_CallFunctionC(void *func, u32 param1) {
 	MOV(32, R(ABI_PARAM1), Imm32(param1));
 	CALL(func);
diff --git a/Source/Core/Common/Src/ABI.h b/Source/Core/Common/Src/ABI.h
index 2bbd169d00..2dfbf5aa19 100644
--- a/Source/Core/Common/Src/ABI.h
+++ b/Source/Core/Common/Src/ABI.h
@@ -107,6 +107,11 @@ unsigned int ABI_GetAlignedFrameSize(unsigned int frameSize);
 void ABI_AlignStack(unsigned int frameSize);
 void ABI_RestoreStack(unsigned int frameSize);
 
+// Sets up a __cdecl function.
+// Only x64 really needs the parameter.
+void ABI_EmitPrologue(int maxCallParams);
+void ABI_EmitEpilogue(int maxCallParams);
+
 #ifdef _M_IX86
 inline int ABI_GetNumXMMRegs() { return 8; }
 #else
diff --git a/Source/Core/Common/Src/x64Emitter.cpp b/Source/Core/Common/Src/x64Emitter.cpp
index fe839d8537..e3adb2e54b 100644
--- a/Source/Core/Common/Src/x64Emitter.cpp
+++ b/Source/Core/Common/Src/x64Emitter.cpp
@@ -1316,43 +1316,6 @@ namespace Gen
 	}
 
 	void RTDSC() { Write8(0x0F); Write8(0x31); }
-
-	namespace Util
-	{
-
-	// Sets up a __cdecl function.
-	void EmitPrologue(int maxCallParams)
-	{
-#ifdef _M_IX86
-		// Don't really need to do anything
-#elif defined(_M_X64)
-#if _WIN32
-		int stacksize = ((maxCallParams + 1) & ~1)*8 + 8;
-		// Set up a stack frame so that we can call functions
-		// TODO: use maxCallParams
-	    SUB(64, R(RSP), Imm8(stacksize));
-#endif
-#else
-#error Arch not supported
-#endif
-	}
-	void EmitEpilogue(int maxCallParams)
-	{
-#ifdef _M_IX86
-		RET();
-#elif defined(_M_X64)
-#ifdef _WIN32
-		int stacksize = ((maxCallParams+1)&~1)*8 + 8;
-		ADD(64, R(RSP), Imm8(stacksize));
-#endif
-		RET();
-#else
-#error Arch not supported
-#endif
-	}
-
-	}  // namespace
-
 	
 // helper routines for setting pointers
 void CallCdeclFunction3(void* fnptr, u32 arg0, u32 arg1, u32 arg2)
diff --git a/Source/Core/Common/Src/x64Emitter.h b/Source/Core/Common/Src/x64Emitter.h
index 995143b91a..0238569a28 100644
--- a/Source/Core/Common/Src/x64Emitter.h
+++ b/Source/Core/Common/Src/x64Emitter.h
@@ -520,16 +520,8 @@ namespace Gen
 	void PMOVMSKB(X64Reg dest, OpArg arg);
 	void PSHUFB(X64Reg dest, OpArg arg);
 
-        void RTDSC();
+	void RTDSC();
 
-	namespace Util
-	{
-		// Sets up a __cdecl function.
-		// Only x64 really needs the parameter.
-		void EmitPrologue(int maxCallParams);
-		void EmitEpilogue(int maxCallParams);
-	}
-	
 void CallCdeclFunction3(void* fnptr, u32 arg0, u32 arg1, u32 arg2);
 void CallCdeclFunction4(void* fnptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3);
 void CallCdeclFunction5(void* fnptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4);
diff --git a/Source/Core/Core/Src/HW/PeripheralInterface.cpp b/Source/Core/Core/Src/HW/PeripheralInterface.cpp
index 2e30eaa1b8..2d4424d9c5 100644
--- a/Source/Core/Core/Src/HW/PeripheralInterface.cpp
+++ b/Source/Core/Core/Src/HW/PeripheralInterface.cpp
@@ -145,13 +145,12 @@ void CPeripheralInterface::Write32(const u32 _uValue, const u32 _iAddress)
             {
 				switch (_uValue) {
 				case 3:
-					PanicAlert("Game wants to go to memory card manager. Since BIOS is being HLE:d - can't do that.\n"
-						       "We might pop up a fake memcard manager here and then reset the game in the future :)\n");
+					PanicAlert("The game wants to go to memory card manager. BIOS is being HLE:d - so we can't do that.\n");
 					break;
 				default:
 					{
 					TCHAR szTemp[256];
-					sprintf(szTemp, "Game wants to reset the machine. PI_RESET_CODE: (%08x)", _uValue);
+					sprintf(szTemp, "The game wants to reset the machine. PI_RESET_CODE: (%08x)", _uValue);
 					PanicAlert(szTemp);
 					}
 					break;
@@ -161,8 +160,8 @@ void CPeripheralInterface::Write32(const u32 _uValue, const u32 _iAddress)
 		break;
 
 	default:
-		LOG(PERIPHERALINTERFACE,"!!!!Unknown write!!!! 0x%08x", _iAddress);
-		PanicAlert("Unknown write to PI");
+		LOG(PERIPHERALINTERFACE,"!!!!Unknown PI write!!!! 0x%08x", _iAddress);
+		PanicAlert("Unknown write to PI: %08x", _iAddress);
 		break;
 	}
 }
diff --git a/Source/Core/VideoCommon/Src/NativeVertexFormat.h b/Source/Core/VideoCommon/Src/NativeVertexFormat.h
index bb9be45d79..3aee5cd753 100644
--- a/Source/Core/VideoCommon/Src/NativeVertexFormat.h
+++ b/Source/Core/VideoCommon/Src/NativeVertexFormat.h
@@ -55,7 +55,7 @@ enum {
 };
 
 #define LOADERDECL __cdecl
-typedef void (LOADERDECL *TPipelineFunction)(const void *);
+typedef void (LOADERDECL *TPipelineFunction)();
 
 enum VarType {
 	VAR_BYTE,
diff --git a/Source/Core/VideoCommon/Src/Statistics.h b/Source/Core/VideoCommon/Src/Statistics.h
index e618d543ba..fe4a1e201c 100644
--- a/Source/Core/VideoCommon/Src/Statistics.h
+++ b/Source/Core/VideoCommon/Src/Statistics.h
@@ -20,8 +20,6 @@
 
 struct Statistics
 {
-    int numPrimitives;
-
     int numPixelShadersCreated;
     int numPixelShadersAlive;
     int numVertexShadersCreated;
@@ -37,8 +35,6 @@ struct Statistics
     int numDListsCreated;
     int numDListsAlive;
 
-    int numJoins;
-
 	int numVertexLoaders;
 
     struct ThisFrame
@@ -52,10 +48,14 @@ struct Statistics
         int numXFLoadsInDL;
         
         int numDLs;
-        int numDLPrims;
         int numPrims;
+        int numDLPrims;
         int numShaderChanges;
 
+	    int numPrimitiveJoins;
+	    int numDrawCalls;
+	    int numBufferSplits;
+
 		int numDListsCalled;
     };
     ThisFrame thisFrame;
diff --git a/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp b/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp
index b367c32b5b..71ae5fb05b 100644
--- a/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp
+++ b/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp
@@ -207,8 +207,8 @@ void Renderer::SwapBuffers(void)
 		p+=sprintf(p,"Num dlists called: %i\n",stats.numDListsCalled);
 		p+=sprintf(p,"Num dlists created: %i\n",stats.numDListsCreated);
 		p+=sprintf(p,"Num dlists alive: %i\n",stats.numDListsAlive);
-		p+=sprintf(p,"Num strip joins: %i\n",stats.numJoins);
 		p+=sprintf(p,"Num primitives: %i\n",stats.thisFrame.numPrims);
+		p+=sprintf(p,"Num primitive joins: %i\n",stats.thisFrame.numPrimitiveJoins);
 		p+=sprintf(p,"Num primitives (DL): %i\n",stats.thisFrame.numDLPrims);
 		p+=sprintf(p,"Num XF loads: %i\n",stats.thisFrame.numXFLoads);
 		p+=sprintf(p,"Num XF loads (DL): %i\n",stats.thisFrame.numXFLoadsInDL);
diff --git a/Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp b/Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp
index 6fc2a6d911..425e6fad19 100644
--- a/Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp
+++ b/Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp
@@ -173,9 +173,9 @@ void AddVertices(int _primitive, int _numVertices, const DecodedVArray *varray)
 	else //We are collecting the right type, keep going
 	{
 		_assert_msg_(vbufferwrite!=0, "collecting: vbufferwrite == 0!","WTF");
-		INCSTAT(stats.numJoins);
+		INCSTAT(stats.thisFrame.numPrimitiveJoins);
 		//Success, keep adding to unlocked buffer
-		int last=indexGen.GetNumVerts();
+		int last = indexGen.GetNumVerts();
 		AddIndices(_primitive, _numVertices);
 
 		if (_numVertices >= MAXVBUFFERSIZE)
diff --git a/Source/Plugins/Plugin_VideoOGL/Src/BPStructs.cpp b/Source/Plugins/Plugin_VideoOGL/Src/BPStructs.cpp
index a0e7c7c196..0eabc01cf9 100644
--- a/Source/Plugins/Plugin_VideoOGL/Src/BPStructs.cpp
+++ b/Source/Plugins/Plugin_VideoOGL/Src/BPStructs.cpp
@@ -462,8 +462,8 @@ void BPWritten(int addr, int changes, int newval)
 				{
 					// the number of lines copied is determined by the y scale * source efb height
 					float yScale = bpmem.dispcopyyscale / 256.0f;
-					float xfbLines = bpmem.copyTexSrcWH.y + 1 * yScale;
-					XFB_Write(Memory_GetPtr(bpmem.copyTexDest<<5), multirc, (bpmem.copyMipMapStrideChannels << 4), xfbLines);
+					float xfbLines = bpmem.copyTexSrcWH.y + 1.0 * yScale;
+					XFB_Write(Memory_GetPtr(bpmem.copyTexDest<<5), multirc, (bpmem.copyMipMapStrideChannels << 4), (int)xfbLines);
 				}
 				else
 				{
diff --git a/Source/Plugins/Plugin_VideoOGL/Src/NativeVertexFormat.cpp b/Source/Plugins/Plugin_VideoOGL/Src/NativeVertexFormat.cpp
index 25a8095ea8..d13cb06daf 100644
--- a/Source/Plugins/Plugin_VideoOGL/Src/NativeVertexFormat.cpp
+++ b/Source/Plugins/Plugin_VideoOGL/Src/NativeVertexFormat.cpp
@@ -85,7 +85,7 @@ void NativeVertexFormat::Initialize(const PortableVertexDeclaration &vtx_decl)
 	// Alright, we have our vertex declaration. Compile some crazy code to set it quickly using GL.
 	u8 *old_code_ptr = GetWritableCodePtr();
 	SetCodePtr(m_compiledCode);
-	Util::EmitPrologue(6);
+	ABI_EmitPrologue(6);
 	
 	CallCdeclFunction4_I(glVertexPointer, 3, GL_FLOAT, vtx_decl.stride, 0);
 
@@ -137,7 +137,7 @@ void NativeVertexFormat::Initialize(const PortableVertexDeclaration &vtx_decl)
 		CallCdeclFunction6((void *)glVertexAttribPointer, SHADER_POSMTX_ATTRIB, 4, GL_UNSIGNED_BYTE, GL_FALSE, vtx_decl.stride, vtx_decl.posmtx_offset);
 	}
 
-	Util::EmitEpilogue(6);
+	ABI_EmitEpilogue(6);
 	if (Gen::GetCodePtr() - (u8*)m_compiledCode > COMPILED_CODE_SIZE)
 	{
 		Crash();
diff --git a/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp b/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp
index ea4a56544b..6927a26b2c 100644
--- a/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp
+++ b/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp
@@ -775,6 +775,9 @@ void Renderer::SwapBuffers()
         //p+=sprintf(p,"Num dlists alive:    %i\n",stats.numDListsAlive);
         //p+=sprintf(p,"Num strip joins:     %i\n",stats.numJoins);
         p+=sprintf(p,"Num primitives:       %i\n",stats.thisFrame.numPrims);
+		p+=sprintf(p,"Num primitive joins:  %i\n",stats.thisFrame.numPrimitiveJoins);
+		p+=sprintf(p,"Num buffer splits:    %i\n",stats.thisFrame.numBufferSplits);
+		p+=sprintf(p,"Num draw calls:       %i\n",stats.thisFrame.numDrawCalls);
         p+=sprintf(p,"Num primitives (DL):  %i\n",stats.thisFrame.numDLPrims);
         p+=sprintf(p,"Num XF loads:      %i\n",stats.thisFrame.numXFLoads);
         p+=sprintf(p,"Num XF loads (DL): %i\n",stats.thisFrame.numXFLoadsInDL);
diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader.cpp b/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader.cpp
index fba0314008..c426359b60 100644
--- a/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader.cpp
+++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader.cpp
@@ -22,13 +22,19 @@
 #include "Common.h"
 #include "Config.h"
 #include "Profiler.h"
+#include "MemoryUtil.h"
+#include "x64Emitter.h"
+#include "ABI.h"
 
+#include "Statistics.h"
 #include "VertexManager.h"
 #include "VertexLoaderManager.h"
 #include "VertexLoader.h"
 #include "BPStructs.h"
 #include "DataReader.h"
 
+#define USE_JIT
+
 NativeVertexFormat *g_nativeVertexFmt;
 
 //these don't need to be saved
@@ -49,14 +55,17 @@ static u8 s_curposmtx;
 static u8 s_curtexmtx[8];
 static int s_texmtxwrite = 0;
 static int s_texmtxread = 0;
+static TVtxAttr* pVtxAttr;
 
-void LOADERDECL PosMtx_ReadDirect_UByte(const void *_p)
+using namespace Gen;
+
+void LOADERDECL PosMtx_ReadDirect_UByte()
 {
 	s_curposmtx = DataReadU8() & 0x3f;
 	PRIM_LOG("posmtx: %d, ", s_curposmtx);
 }
 
-void LOADERDECL PosMtx_Write(const void *_p)
+void LOADERDECL PosMtx_Write()
 {
 	*VertexManager::s_pCurBufferPointer++ = s_curposmtx;
 	*VertexManager::s_pCurBufferPointer++ = 0;
@@ -64,27 +73,27 @@ void LOADERDECL PosMtx_Write(const void *_p)
 	*VertexManager::s_pCurBufferPointer++ = 0;
 }
 
-void LOADERDECL TexMtx_ReadDirect_UByte(const void *_p)
+void LOADERDECL TexMtx_ReadDirect_UByte()
 {
 	s_curtexmtx[s_texmtxread] = DataReadU8()&0x3f;
 	PRIM_LOG("texmtx%d: %d, ", s_texmtxread, s_curtexmtx[s_texmtxread]);
 	s_texmtxread++;
 }
 
-void LOADERDECL TexMtx_Write_Float(const void *_p)
+void LOADERDECL TexMtx_Write_Float()
 {
 	*(float*)VertexManager::s_pCurBufferPointer = (float)s_curtexmtx[s_texmtxwrite++];
 	VertexManager::s_pCurBufferPointer += 4;
 }
 
-void LOADERDECL TexMtx_Write_Float2(const void *_p)
+void LOADERDECL TexMtx_Write_Float2()
 {
 	((float*)VertexManager::s_pCurBufferPointer)[0] = 0;
 	((float*)VertexManager::s_pCurBufferPointer)[1] = (float)s_curtexmtx[s_texmtxwrite++];
 	VertexManager::s_pCurBufferPointer += 8;
 }
 
-void LOADERDECL TexMtx_Write_Short3(const void *_p)
+void LOADERDECL TexMtx_Write_Short3()
 {
 	((s16*)VertexManager::s_pCurBufferPointer)[0] = 0;
 	((s16*)VertexManager::s_pCurBufferPointer)[1] = 0;
@@ -97,6 +106,8 @@ void LOADERDECL TexMtx_Write_Short3(const void *_p)
 #include "VertexLoader_Color.h"
 #include "VertexLoader_TextCoord.h"
 
+#define COMPILED_CODE_SIZE 4096
+
 VertexLoader::VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr) 
 {
 	m_VertexSize = 0;
@@ -107,11 +118,16 @@ VertexLoader::VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr)
 	m_VtxDesc = vtx_desc;
 	SetVAT(vtx_attr.g0.Hex, vtx_attr.g1.Hex, vtx_attr.g2.Hex);
 
+	m_compiledCode = (u8 *)AllocateExecutableMemory(COMPILED_CODE_SIZE, false);
+	if (m_compiledCode) {
+		memset(m_compiledCode, 0, COMPILED_CODE_SIZE);
+	}
 	CompileVertexTranslator();
 }
 
 VertexLoader::~VertexLoader() 
 {
+	FreeMemoryPages(m_compiledCode, COMPILED_CODE_SIZE);
 	delete m_NativeFmt;
 }
 
@@ -119,6 +135,9 @@ void VertexLoader::CompileVertexTranslator()
 {
 	m_VertexSize = 0;
 
+	u8 *old_code_ptr = GetWritableCodePtr();
+	SetCodePtr(m_compiledCode);
+	ABI_EmitPrologue(4);
 	// Colors
 	const int col[2] = {m_VtxDesc.Color0, m_VtxDesc.Color1};
 	// TextureCoord
@@ -144,7 +163,7 @@ void VertexLoader::CompileVertexTranslator()
 	
 	// Position Matrix Index
 	if (m_VtxDesc.PosMatIdx) {
-		m_PipelineStages[m_numPipelineStages++] = PosMtx_ReadDirect_UByte;
+		WriteCall(PosMtx_ReadDirect_UByte);
 		m_NativeFmt->m_components |= VB_HAS_POSMTXIDX;
 		m_VertexSize += 1;
 	}
@@ -430,7 +449,10 @@ void VertexLoader::CompileVertexTranslator()
 	vtx_decl.stride = native_stride;
 	if (vtx_decl.stride != offset)
 		PanicAlert("offset/stride mismatch, %i %i", vtx_decl.stride, offset);
-
+#ifdef USE_JIT
+	ABI_EmitEpilogue(4);
+#endif
+	SetCodePtr(old_code_ptr);
 	m_NativeFmt->Initialize(vtx_decl);
 }
 
@@ -532,7 +554,11 @@ void VertexLoader::SetupTexCoord(int num, int mode, int format, int elements, in
 
 void VertexLoader::WriteCall(TPipelineFunction func)
 {
+#ifdef USE_JIT
+	CALL((void*)func);
+#else
 	m_PipelineStages[m_numPipelineStages++] = func;
+#endif
 }
 
 void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)
@@ -569,6 +595,7 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)
 	m_VtxAttr.texCoord[6].Frac		= g_VtxAttr[vtx_attr_group].g2.Tex6Frac;
 	m_VtxAttr.texCoord[7].Frac		= g_VtxAttr[vtx_attr_group].g2.Tex7Frac;
 
+	pVtxAttr = &m_VtxAttr;
 	posScale = shiftLookup[m_VtxAttr.PosFrac];
 	if (m_NativeFmt->m_components & VB_HAS_UVALL) {
 		for (int i = 0; i < 8; i++) {
@@ -582,7 +609,7 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)
 	// if strips or fans, make sure all vertices can fit in buffer, otherwise flush
 	int granularity = 1;
 	switch (primitive) {
-		case 3: // strip
+		case 3: // strip .. hm, weird
 		case 4: // fan
 			if (VertexManager::GetRemainingSize() < 3 * native_stride)
 				VertexManager::Flush();
@@ -603,59 +630,67 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)
 	}
 
 	int startv = 0, extraverts = 0;
-	for (int v = 0; v < count; v++)
+	int v = 0;
+
+	while (v < count)
 	{
-		if ((v % granularity) == 0)
-		{
-			if (VertexManager::GetRemainingSize() < granularity*native_stride) {
-				// This buffer full - break current primitive and flush, to switch to the next buffer.
-				u8* plastptr = VertexManager::s_pCurBufferPointer;
-				if (v - startv > 0)
-					VertexManager::AddVertices(primitive, v - startv + extraverts);
-				VertexManager::Flush();
-				// Why does this need to be so complicated?
-				switch (primitive) {
-					case 3: // triangle strip, copy last two vertices
-						// a little trick since we have to keep track of signs
-						if (v & 1) {
-							memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-2*native_stride, native_stride);
-							memcpy_gc(VertexManager::s_pCurBufferPointer+native_stride, plastptr-native_stride*2, 2*native_stride);
-							VertexManager::s_pCurBufferPointer += native_stride*3;
-							extraverts = 3;
-						}
-						else {
-							memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride*2, native_stride*2);
-							VertexManager::s_pCurBufferPointer += native_stride*2;
-							extraverts = 2;
-						}
-						break;
-					case 4: // tri fan, copy first and last vert
-						memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride*(v-startv+extraverts), native_stride);
-						VertexManager::s_pCurBufferPointer += native_stride;
-						memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride, native_stride);
-						VertexManager::s_pCurBufferPointer += native_stride;
+		if (VertexManager::GetRemainingSize() < granularity*native_stride) {
+			INCSTAT(stats.thisFrame.numBufferSplits);
+			// This buffer full - break current primitive and flush, to switch to the next buffer.
+			u8* plastptr = VertexManager::s_pCurBufferPointer;
+			if (v - startv > 0)
+				VertexManager::AddVertices(primitive, v - startv + extraverts);
+			VertexManager::Flush();
+			// Why does this need to be so complicated?
+			switch (primitive) {
+				case 3: // triangle strip, copy last two vertices
+					// a little trick since we have to keep track of signs
+					if (v & 1) {
+						memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-2*native_stride, native_stride);
+						memcpy_gc(VertexManager::s_pCurBufferPointer+native_stride, plastptr-native_stride*2, 2*native_stride);
+						VertexManager::s_pCurBufferPointer += native_stride*3;
+						extraverts = 3;
+					}
+					else {
+						memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride*2, native_stride*2);
+						VertexManager::s_pCurBufferPointer += native_stride*2;
 						extraverts = 2;
-						break;
-					case 6: // line strip
-						memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride, native_stride);
-						VertexManager::s_pCurBufferPointer += native_stride;
-						extraverts = 1;
-						break;
-					default:
-						extraverts = 0;
-						break;
-				}
-				startv = v;
+					}
+					break;
+				case 4: // tri fan, copy first and last vert
+					memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride*(v-startv+extraverts), native_stride);
+					VertexManager::s_pCurBufferPointer += native_stride;
+					memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride, native_stride);
+					VertexManager::s_pCurBufferPointer += native_stride;
+					extraverts = 2;
+					break;
+				case 6: // line strip
+					memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride, native_stride);
+					VertexManager::s_pCurBufferPointer += native_stride;
+					extraverts = 1;
+					break;
+				default:
+					extraverts = 0;
+					break;
 			}
+			startv = v;
 		}
-		tcIndex = 0;
-		colIndex = 0;
-		s_texmtxwrite = s_texmtxread = 0;
 
-		for (int i = 0; i < m_numPipelineStages; i++)
-			m_PipelineStages[i](&m_VtxAttr);
+		for (int s = 0; s < granularity; s++)
+		{
+			tcIndex = 0;
+			colIndex = 0;
+			s_texmtxwrite = s_texmtxread = 0;
+	#ifdef USE_JIT
+			((void (*)())(void*)m_compiledCode)();
+	#else
+			for (int i = 0; i < m_numPipelineStages; i++)
+				m_PipelineStages[i]();
+	#endif
 
-		PRIM_LOG("\n");
+			PRIM_LOG("\n");
+			v++;
+		}
 	}
 
 	if (startv < count)
diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader.h b/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader.h
index 3519d920ff..74586ad3db 100644
--- a/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader.h
+++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader.h
@@ -81,6 +81,8 @@ private:
 	TPipelineFunction m_PipelineStages[32];  // TODO - figure out real max. it's lower.
 	int m_numPipelineStages;
 
+	u8 *m_compiledCode;
+
 	void SetupColor(int num, int _iMode, int _iFormat, int _iElements);
 	void SetupTexCoord(int num, int _iMode, int _iFormat, int _iElements, int _iFrac);
 
diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader_Color.h b/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader_Color.h
index dc91765dbb..76c298fb0d 100644
--- a/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader_Color.h
+++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader_Color.h
@@ -81,7 +81,7 @@ inline u32 _Read32(u32 iAddress)
 //////////////////////////////////////////////////////////////////////////
 //////////////////////////////////////////////////////////////////////////
 
-void LOADERDECL Color_ReadDirect_24b_888(const void *_p)
+void LOADERDECL Color_ReadDirect_24b_888()
 {
     u32 col = DataReadU8()<<RSHIFT;
     col     |= DataReadU8()<<GSHIFT;
@@ -89,22 +89,22 @@ void LOADERDECL Color_ReadDirect_24b_888(const void *_p)
     _SetCol(col | (0xFF<<ASHIFT));
 }
 
-void LOADERDECL Color_ReadDirect_32b_888x(const void *_p){
+void LOADERDECL Color_ReadDirect_32b_888x(){
     u32 col = DataReadU8()<<RSHIFT;
     col     |= DataReadU8()<<GSHIFT;
     col     |= DataReadU8()<<BSHIFT;
     _SetCol(col | (0xFF<<ASHIFT));
     DataReadU8();
 }
-void LOADERDECL Color_ReadDirect_16b_565(const void *_p)
+void LOADERDECL Color_ReadDirect_16b_565()
 {
     _SetCol565(DataReadU16());
 }
-void LOADERDECL Color_ReadDirect_16b_4444(const void *_p)
+void LOADERDECL Color_ReadDirect_16b_4444()
 {
     _SetCol4444(DataReadU16());
 }
-void LOADERDECL Color_ReadDirect_24b_6666(const void *_p)
+void LOADERDECL Color_ReadDirect_24b_6666()
 {
     u32 val = DataReadU8()<<16;
     val|=DataReadU8()<<8;
@@ -119,7 +119,7 @@ void LOADERDECL Color_ReadDirect_24b_6666(const void *_p)
 //	else
 //		col |= 0xFF<<ASHIFT;
 //
-void LOADERDECL Color_ReadDirect_32b_8888(const void *_p)
+void LOADERDECL Color_ReadDirect_32b_8888()
 {
     // TODO (mb2): check this
     u32 col = DataReadU8()<<RSHIFT;
@@ -136,33 +136,33 @@ void LOADERDECL Color_ReadDirect_32b_8888(const void *_p)
 //////////////////////////////////////////////////////////////////////////
 //////////////////////////////////////////////////////////////////////////
 //////////////////////////////////////////////////////////////////////////
-void LOADERDECL Color_ReadIndex8_16b_565(const void *_p)
+void LOADERDECL Color_ReadIndex8_16b_565()
 {
     u8 Index = DataReadU8();
     u32 iAddress = arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
     u16 val = Memory_Read_U16(iAddress);
     _SetCol565(val);
 }
-void LOADERDECL Color_ReadIndex8_24b_888(const void *_p)
+void LOADERDECL Color_ReadIndex8_24b_888()
 {
     u8 Index = DataReadU8();
     u32 iAddress = arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
     _SetCol(_Read24(iAddress));
 }
-void LOADERDECL Color_ReadIndex8_32b_888x(const void *_p)
+void LOADERDECL Color_ReadIndex8_32b_888x()
 {
     u8 Index = DataReadU8();
     u32 iAddress = arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR]+colIndex);
     _SetCol(_Read24(iAddress));
 }
-void LOADERDECL Color_ReadIndex8_16b_4444(const void *_p)
+void LOADERDECL Color_ReadIndex8_16b_4444()
 {
     u8 Index = DataReadU8();
     u32 iAddress = arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
     u16 val = Memory_Read_U16(iAddress);
     _SetCol4444(val);
 }
-void LOADERDECL Color_ReadIndex8_24b_6666(const void *_p)
+void LOADERDECL Color_ReadIndex8_24b_6666()
 {
     u8 Index = DataReadU8();
     u32 iAddress = arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
@@ -172,7 +172,7 @@ void LOADERDECL Color_ReadIndex8_24b_6666(const void *_p)
     
     _SetCol6666(val);
 }
-void LOADERDECL Color_ReadIndex8_32b_8888(const void *_p)
+void LOADERDECL Color_ReadIndex8_32b_8888()
 {
     u8 Index = DataReadU8();
     u32 iAddress = arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
@@ -181,33 +181,33 @@ void LOADERDECL Color_ReadIndex8_32b_8888(const void *_p)
 //////////////////////////////////////////////////////////////////////////
 //////////////////////////////////////////////////////////////////////////
 //////////////////////////////////////////////////////////////////////////
-void LOADERDECL Color_ReadIndex16_16b_565(const void *_p)
+void LOADERDECL Color_ReadIndex16_16b_565()
 {
     u16 Index = DataReadU16(); 
     u32 iAddress = arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
     u16 val = Memory_Read_U16(iAddress);
     _SetCol565(val);
 }
-void LOADERDECL Color_ReadIndex16_24b_888(const void *_p)
+void LOADERDECL Color_ReadIndex16_24b_888()
 {
     u16 Index = DataReadU16(); 
     u32 iAddress = arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
     _SetCol(_Read24(iAddress));
 }
-void LOADERDECL Color_ReadIndex16_32b_888x(const void *_p)
+void LOADERDECL Color_ReadIndex16_32b_888x()
 {
     u16 Index = DataReadU16(); 
     u32 iAddress = arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
     _SetCol(_Read24(iAddress));
 }
-void LOADERDECL Color_ReadIndex16_16b_4444(const void *_p)
+void LOADERDECL Color_ReadIndex16_16b_4444()
 {
     u16 Index = DataReadU16();
     u32 iAddress = arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
     u16 val = Memory_Read_U16(iAddress);
     _SetCol4444(val);
 }
-void LOADERDECL Color_ReadIndex16_24b_6666(const void *_p)
+void LOADERDECL Color_ReadIndex16_24b_6666()
 {
     u16 Index = DataReadU16();
     u32 iAddress = arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
@@ -216,7 +216,7 @@ void LOADERDECL Color_ReadIndex16_24b_6666(const void *_p)
                (Memory_Read_U8(iAddress)<<16); 
     _SetCol6666(val);
 }
-void LOADERDECL Color_ReadIndex16_32b_8888(const void *_p)
+void LOADERDECL Color_ReadIndex16_32b_8888()
 {
     u16 Index = DataReadU16(); 
     u32 iAddress = arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader_Normal.cpp b/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader_Normal.cpp
index 2b429cee2a..42e5969155 100644
--- a/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader_Normal.cpp
+++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader_Normal.cpp
@@ -179,7 +179,7 @@ TPipelineFunction VertexLoader_Normal::GetFunction(unsigned int _type, unsigned
 /////////////////////////////////////////////////////////////////////////////////////////////////////
 // --- Direct ---
 /////////////////////////////////////////////////////////////////////////////////////////////////////
-void LOADERDECL VertexLoader_Normal::Normal_DirectByte(const void *_p)
+void LOADERDECL VertexLoader_Normal::Normal_DirectByte()
 {
     *VertexManager::s_pCurBufferPointer++ = DataReadU8();
     *VertexManager::s_pCurBufferPointer++ = DataReadU8();
@@ -189,7 +189,7 @@ void LOADERDECL VertexLoader_Normal::Normal_DirectByte(const void *_p)
 //    ((float*)VertexManager::s_pCurBufferPointer)[0] = ((float)(signed char)DataReadU8()+0.5f) / 127.5f;
 }
 
-void LOADERDECL VertexLoader_Normal::Normal_DirectShort(const void *_p)
+void LOADERDECL VertexLoader_Normal::Normal_DirectShort()
 {
     ((u16*)VertexManager::s_pCurBufferPointer)[0] = DataReadU16();
     ((u16*)VertexManager::s_pCurBufferPointer)[1] = DataReadU16();
@@ -201,7 +201,7 @@ void LOADERDECL VertexLoader_Normal::Normal_DirectShort(const void *_p)
 //    ((float*)VertexManager::s_pCurBufferPointer)[2] = ((float)(signed short)DataReadU16()+0.5f) / 32767.5f;
 }
 
-void LOADERDECL VertexLoader_Normal::Normal_DirectFloat(const void *_p)
+void LOADERDECL VertexLoader_Normal::Normal_DirectFloat()
 {
     ((float*)VertexManager::s_pCurBufferPointer)[0] = DataReadF32();
     ((float*)VertexManager::s_pCurBufferPointer)[1] = DataReadF32();
@@ -210,7 +210,7 @@ void LOADERDECL VertexLoader_Normal::Normal_DirectFloat(const void *_p)
     LOG_NORMF()
 }
 
-void LOADERDECL VertexLoader_Normal::Normal_DirectByte3(const void *_p)
+void LOADERDECL VertexLoader_Normal::Normal_DirectByte3()
 {
     for (int i = 0; i < 3; i++)
     {
@@ -222,7 +222,7 @@ void LOADERDECL VertexLoader_Normal::Normal_DirectByte3(const void *_p)
     }
 }
 
-void LOADERDECL VertexLoader_Normal::Normal_DirectShort3(const void *_p)
+void LOADERDECL VertexLoader_Normal::Normal_DirectShort3()
 {
     for (int i = 0; i < 3; i++)
     {
@@ -234,7 +234,7 @@ void LOADERDECL VertexLoader_Normal::Normal_DirectShort3(const void *_p)
     }
 }
 
-void LOADERDECL VertexLoader_Normal::Normal_DirectFloat3(const void *_p)
+void LOADERDECL VertexLoader_Normal::Normal_DirectFloat3()
 {
     for (int i = 0; i < 3; i++)
     {
@@ -249,7 +249,7 @@ void LOADERDECL VertexLoader_Normal::Normal_DirectFloat3(const void *_p)
 /////////////////////////////////////////////////////////////////////////////////////////////////////
 // --- Index8 ---
 /////////////////////////////////////////////////////////////////////////////////////////////////////
-void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte(const void *_p)
+void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte()
 {
     u8 Index = DataReadU8();
     u32 iAddress = arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]);
@@ -264,7 +264,7 @@ void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte(const void *_p)
     LOG_NORM8();
 }
 
-void LOADERDECL VertexLoader_Normal::Normal_Index8_Short(const void *_p)
+void LOADERDECL VertexLoader_Normal::Normal_Index8_Short()
 {
     u8 Index = DataReadU8();
     u32 iAddress = arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]);
@@ -275,7 +275,7 @@ void LOADERDECL VertexLoader_Normal::Normal_Index8_Short(const void *_p)
     LOG_NORM16();
 }
 
-void LOADERDECL VertexLoader_Normal::Normal_Index8_Float(const void *_p)
+void LOADERDECL VertexLoader_Normal::Normal_Index8_Float()
 {
     u8 Index = DataReadU8();
     u32 iAddress = arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]);
@@ -286,7 +286,7 @@ void LOADERDECL VertexLoader_Normal::Normal_Index8_Float(const void *_p)
     LOG_NORMF();
 }
 
-void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte3_Indices1(const void *_p)
+void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte3_Indices1()
 {
     u8 Index = DataReadU8();
     for (int i = 0; i < 3; i++)
@@ -300,7 +300,7 @@ void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte3_Indices1(const void *_p
     }
 }
 
-void LOADERDECL VertexLoader_Normal::Normal_Index8_Short3_Indices1(const void *_p)
+void LOADERDECL VertexLoader_Normal::Normal_Index8_Short3_Indices1()
 {
     u8 Index = DataReadU8();
     for (int i = 0; i < 3; i++)
@@ -314,7 +314,7 @@ void LOADERDECL VertexLoader_Normal::Normal_Index8_Short3_Indices1(const void *_
     }    
 }
 
-void LOADERDECL VertexLoader_Normal::Normal_Index8_Float3_Indices1(const void *_p)
+void LOADERDECL VertexLoader_Normal::Normal_Index8_Float3_Indices1()
 {
     u8 Index = DataReadU8();
     for (int i = 0; i < 3; i++)
@@ -328,7 +328,7 @@ void LOADERDECL VertexLoader_Normal::Normal_Index8_Float3_Indices1(const void *_
     }    
 }
 
-void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte3_Indices3(const void *_p)
+void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte3_Indices3()
 {
     for (int i = 0; i < 3; i++)
 	{
@@ -342,7 +342,7 @@ void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte3_Indices3(const void *_p
     }
 }
 
-void LOADERDECL VertexLoader_Normal::Normal_Index8_Short3_Indices3(const void *_p)
+void LOADERDECL VertexLoader_Normal::Normal_Index8_Short3_Indices3()
 {
     for (int i = 0; i < 3; i++)
 	{
@@ -356,7 +356,7 @@ void LOADERDECL VertexLoader_Normal::Normal_Index8_Short3_Indices3(const void *_
     }
 }
 
-void LOADERDECL VertexLoader_Normal::Normal_Index8_Float3_Indices3(const void *_p)
+void LOADERDECL VertexLoader_Normal::Normal_Index8_Float3_Indices3()
 {
     for (int i = 0; i < 3; i++)
 	{
@@ -374,7 +374,7 @@ void LOADERDECL VertexLoader_Normal::Normal_Index8_Float3_Indices3(const void *_
 // --- Index16 ---
 /////////////////////////////////////////////////////////////////////////////////////////////////////
 
-void LOADERDECL VertexLoader_Normal::Normal_Index16_Byte(const void *_p)
+void LOADERDECL VertexLoader_Normal::Normal_Index16_Byte()
 {
     u16 Index = DataReadU16();
     u32 iAddress = arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]);
@@ -385,7 +385,7 @@ void LOADERDECL VertexLoader_Normal::Normal_Index16_Byte(const void *_p)
     LOG_NORM8();
 }
 
-void LOADERDECL VertexLoader_Normal::Normal_Index16_Short(const void *_p)
+void LOADERDECL VertexLoader_Normal::Normal_Index16_Short()
 {
     u16 Index = DataReadU16();
     u32 iAddress = arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]);
@@ -396,7 +396,7 @@ void LOADERDECL VertexLoader_Normal::Normal_Index16_Short(const void *_p)
     LOG_NORM16();
 }
 
-void LOADERDECL VertexLoader_Normal::Normal_Index16_Float(const void *_p)
+void LOADERDECL VertexLoader_Normal::Normal_Index16_Float()
 {
     u16 Index = DataReadU16();
     u32 iAddress = arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]);
@@ -407,7 +407,7 @@ void LOADERDECL VertexLoader_Normal::Normal_Index16_Float(const void *_p)
     LOG_NORMF();
 }
 
-void LOADERDECL VertexLoader_Normal::Normal_Index16_Byte3_Indices1(const void *_p)
+void LOADERDECL VertexLoader_Normal::Normal_Index16_Byte3_Indices1()
 {
     u16 Index = DataReadU16();
     for (int i = 0; i < 3; i++)
@@ -421,7 +421,7 @@ void LOADERDECL VertexLoader_Normal::Normal_Index16_Byte3_Indices1(const void *_
     }
 }
 
-void LOADERDECL VertexLoader_Normal::Normal_Index16_Short3_Indices1(const void *_p)
+void LOADERDECL VertexLoader_Normal::Normal_Index16_Short3_Indices1()
 {
     u16 Index = DataReadU16();
     for (int i = 0; i < 3; i++)
@@ -435,7 +435,7 @@ void LOADERDECL VertexLoader_Normal::Normal_Index16_Short3_Indices1(const void *
     }
 }
 
-void LOADERDECL VertexLoader_Normal::Normal_Index16_Float3_Indices1(const void *_p)
+void LOADERDECL VertexLoader_Normal::Normal_Index16_Float3_Indices1()
 {
     u16 Index = DataReadU16();
     for (int i = 0; i < 3; i++)
@@ -449,7 +449,7 @@ void LOADERDECL VertexLoader_Normal::Normal_Index16_Float3_Indices1(const void *
     }
 }
 
-void LOADERDECL VertexLoader_Normal::Normal_Index16_Byte3_Indices3(const void *_p)
+void LOADERDECL VertexLoader_Normal::Normal_Index16_Byte3_Indices3()
 {
     for (int i = 0; i < 3; i++)
 	{
@@ -463,7 +463,7 @@ void LOADERDECL VertexLoader_Normal::Normal_Index16_Byte3_Indices3(const void *_
     }    
 }
 
-void LOADERDECL VertexLoader_Normal::Normal_Index16_Short3_Indices3(const void *_p)
+void LOADERDECL VertexLoader_Normal::Normal_Index16_Short3_Indices3()
 {
     for (int i = 0; i < 3; i++)
     {
@@ -478,7 +478,7 @@ void LOADERDECL VertexLoader_Normal::Normal_Index16_Short3_Indices3(const void *
    
 }
 
-void LOADERDECL VertexLoader_Normal::Normal_Index16_Float3_Indices3(const void *_p)
+void LOADERDECL VertexLoader_Normal::Normal_Index16_Float3_Indices3()
 {
     for (int i = 0; i < 3; i++)
     {
diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader_Normal.h b/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader_Normal.h
index e3fcdab9c7..31c169e7f9 100644
--- a/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader_Normal.h
+++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader_Normal.h
@@ -73,34 +73,34 @@ private:
     static TPipelineFunction m_funcTable[NUM_NRM_TYPE][NUM_NRM_FORMAT][NUM_NRM_ELEMENTS][NUM_NRM_INDICES];
 
     // direct
-    static void LOADERDECL Normal_DirectByte(const void *_p);
-    static void LOADERDECL Normal_DirectShort(const void *_p);
-    static void LOADERDECL Normal_DirectFloat(const void *_p);
-    static void LOADERDECL Normal_DirectByte3(const void *_p);
-    static void LOADERDECL Normal_DirectShort3(const void *_p);
-    static void LOADERDECL Normal_DirectFloat3(const void *_p);
+    static void LOADERDECL Normal_DirectByte();
+    static void LOADERDECL Normal_DirectShort();
+    static void LOADERDECL Normal_DirectFloat();
+    static void LOADERDECL Normal_DirectByte3();
+    static void LOADERDECL Normal_DirectShort3();
+    static void LOADERDECL Normal_DirectFloat3();
 
     // index8
-    static void LOADERDECL Normal_Index8_Byte(const void *_p);
-    static void LOADERDECL Normal_Index8_Short(const void *_p);
-    static void LOADERDECL Normal_Index8_Float(const void *_p);
-    static void LOADERDECL Normal_Index8_Byte3_Indices1(const void *_p);
-    static void LOADERDECL Normal_Index8_Short3_Indices1(const void *_p);
-    static void LOADERDECL Normal_Index8_Float3_Indices1(const void *_p);
-	static void LOADERDECL Normal_Index8_Byte3_Indices3(const void *_p);
-    static void LOADERDECL Normal_Index8_Short3_Indices3(const void *_p);
-    static void LOADERDECL Normal_Index8_Float3_Indices3(const void *_p);
+    static void LOADERDECL Normal_Index8_Byte();
+    static void LOADERDECL Normal_Index8_Short();
+    static void LOADERDECL Normal_Index8_Float();
+    static void LOADERDECL Normal_Index8_Byte3_Indices1();
+    static void LOADERDECL Normal_Index8_Short3_Indices1();
+    static void LOADERDECL Normal_Index8_Float3_Indices1();
+	static void LOADERDECL Normal_Index8_Byte3_Indices3();
+    static void LOADERDECL Normal_Index8_Short3_Indices3();
+    static void LOADERDECL Normal_Index8_Float3_Indices3();
 
     // index16
-    static void LOADERDECL Normal_Index16_Byte(const void *_p);
-    static void LOADERDECL Normal_Index16_Short(const void *_p);
-    static void LOADERDECL Normal_Index16_Float(const void *_p);
-    static void LOADERDECL Normal_Index16_Byte3_Indices1(const void *_p);
-    static void LOADERDECL Normal_Index16_Short3_Indices1(const void *_p);
-    static void LOADERDECL Normal_Index16_Float3_Indices1(const void *_p);
-	static void LOADERDECL Normal_Index16_Byte3_Indices3(const void *_p);
-    static void LOADERDECL Normal_Index16_Short3_Indices3(const void *_p);
-    static void LOADERDECL Normal_Index16_Float3_Indices3(const void *_p);
+    static void LOADERDECL Normal_Index16_Byte();
+    static void LOADERDECL Normal_Index16_Short();
+    static void LOADERDECL Normal_Index16_Float();
+    static void LOADERDECL Normal_Index16_Byte3_Indices1();
+    static void LOADERDECL Normal_Index16_Short3_Indices1();
+    static void LOADERDECL Normal_Index16_Float3_Indices1();
+	static void LOADERDECL Normal_Index16_Byte3_Indices3();
+    static void LOADERDECL Normal_Index16_Short3_Indices3();
+    static void LOADERDECL Normal_Index16_Float3_Indices3();
 };
 
 #endif
diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader_Position.h b/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader_Position.h
index 2fece648d1..4807f9edf6 100644
--- a/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader_Position.h
+++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader_Position.h
@@ -23,9 +23,8 @@
 // ==============================================================================
 // Direct
 // ==============================================================================
-void LOADERDECL Pos_ReadDirect_UByte(const void *_p)
+void LOADERDECL Pos_ReadDirect_UByte()
 {
-	TVtxAttr* pVtxAttr = (TVtxAttr*)_p;
 	((float*)VertexManager::s_pCurBufferPointer)[0] = (float)DataReadU8() * posScale;
     ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)DataReadU8() * posScale;
     if (pVtxAttr->PosElements)
@@ -36,9 +35,8 @@ void LOADERDECL Pos_ReadDirect_UByte(const void *_p)
     VertexManager::s_pCurBufferPointer += 12;
 }
 
-void LOADERDECL Pos_ReadDirect_Byte(const void *_p)
+void LOADERDECL Pos_ReadDirect_Byte()
 {	
-	TVtxAttr* pVtxAttr = (TVtxAttr*)_p;
 	((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)DataReadU8() * posScale;
 	((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s8)DataReadU8() * posScale;
 	if (pVtxAttr->PosElements)
@@ -49,9 +47,8 @@ void LOADERDECL Pos_ReadDirect_Byte(const void *_p)
     VertexManager::s_pCurBufferPointer += 12;
 }
 
-void LOADERDECL Pos_ReadDirect_UShort(const void *_p)
+void LOADERDECL Pos_ReadDirect_UShort()
 {
-	TVtxAttr* pVtxAttr = (TVtxAttr*)_p;
 	((float*)VertexManager::s_pCurBufferPointer)[0] = (float)DataReadU16() * posScale;
 	((float*)VertexManager::s_pCurBufferPointer)[1] = (float)DataReadU16() * posScale;
 	if (pVtxAttr->PosElements)
@@ -62,9 +59,8 @@ void LOADERDECL Pos_ReadDirect_UShort(const void *_p)
     VertexManager::s_pCurBufferPointer += 12;
 }
 
-void LOADERDECL Pos_ReadDirect_Short(const void *_p)
+void LOADERDECL Pos_ReadDirect_Short()
 {
-	TVtxAttr* pVtxAttr = (TVtxAttr*)_p;
 	((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)DataReadU16() * posScale;
 	((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s16)DataReadU16() * posScale;
 	if (pVtxAttr->PosElements)
@@ -75,9 +71,8 @@ void LOADERDECL Pos_ReadDirect_Short(const void *_p)
     VertexManager::s_pCurBufferPointer += 12;
 }
 
-void LOADERDECL Pos_ReadDirect_Float(const void *_p)
+void LOADERDECL Pos_ReadDirect_Float()
 {
-	TVtxAttr* pVtxAttr = (TVtxAttr*)_p;
 	((float*)VertexManager::s_pCurBufferPointer)[0] = DataReadF32(); 
 	((float*)VertexManager::s_pCurBufferPointer)[1] = DataReadF32();
 	if (pVtxAttr->PosElements)
@@ -127,37 +122,32 @@ void LOADERDECL Pos_ReadDirect_Float(const void *_p)
 // ==============================================================================
 // Index 8
 // ==============================================================================
-void LOADERDECL Pos_ReadIndex8_UByte(const void *_p) 
+void LOADERDECL Pos_ReadIndex8_UByte() 
 { 
-	TVtxAttr* pVtxAttr = (TVtxAttr*)_p;
 	u8 Index = DataReadU8();
     Pos_ReadIndex_Byte(u8);
 }
 
-void LOADERDECL Pos_ReadIndex8_Byte(const void *_p)
+void LOADERDECL Pos_ReadIndex8_Byte()
 {
-	TVtxAttr* pVtxAttr = (TVtxAttr*)_p;
 	u8 Index = DataReadU8();
 	Pos_ReadIndex_Byte(s8);
 }
 
-void LOADERDECL Pos_ReadIndex8_UShort(const void *_p)
+void LOADERDECL Pos_ReadIndex8_UShort()
 {
-	TVtxAttr* pVtxAttr = (TVtxAttr*)_p;
 	u8 Index = DataReadU8();
     Pos_ReadIndex_Short(u16);
 }
 
-void LOADERDECL Pos_ReadIndex8_Short(const void *_p)
+void LOADERDECL Pos_ReadIndex8_Short()
 {
-	TVtxAttr* pVtxAttr = (TVtxAttr*)_p;
 	u8 Index = DataReadU8();
 	Pos_ReadIndex_Short(s16);
 }
 
-void LOADERDECL Pos_ReadIndex8_Float(const void *_p)
+void LOADERDECL Pos_ReadIndex8_Float()
 {
-	TVtxAttr* pVtxAttr = (TVtxAttr*)_p;
 	u8 Index = DataReadU8();
     Pos_ReadIndex_Float();
 }
@@ -166,34 +156,29 @@ void LOADERDECL Pos_ReadIndex8_Float(const void *_p)
 // Index 16
 // ==============================================================================
 
-void LOADERDECL Pos_ReadIndex16_UByte(const void *_p){
-	TVtxAttr* pVtxAttr = (TVtxAttr*)_p;
+void LOADERDECL Pos_ReadIndex16_UByte(){
 	u16 Index = DataReadU16(); 
 	Pos_ReadIndex_Byte(u8);
 }
 
-void LOADERDECL Pos_ReadIndex16_Byte(const void *_p){
-	TVtxAttr* pVtxAttr = (TVtxAttr*)_p;
+void LOADERDECL Pos_ReadIndex16_Byte(){
 	u16 Index = DataReadU16(); 
 	Pos_ReadIndex_Byte(s8);
 }
 
-void LOADERDECL Pos_ReadIndex16_UShort(const void *_p){
-	TVtxAttr* pVtxAttr = (TVtxAttr*)_p;
+void LOADERDECL Pos_ReadIndex16_UShort(){
 	u16 Index = DataReadU16(); 
 	Pos_ReadIndex_Short(u16);
 }
 
-void LOADERDECL Pos_ReadIndex16_Short(const void *_p)
+void LOADERDECL Pos_ReadIndex16_Short()
 {
-	TVtxAttr* pVtxAttr = (TVtxAttr*)_p;
 	u16 Index = DataReadU16(); 
 	Pos_ReadIndex_Short(s16);
 }
 
-void LOADERDECL Pos_ReadIndex16_Float(const void *_p)
+void LOADERDECL Pos_ReadIndex16_Float()
 {
-	TVtxAttr* pVtxAttr = (TVtxAttr*)_p;
 	u16 Index = DataReadU16(); 
 	Pos_ReadIndex_Float();
 }
diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader_TextCoord.h b/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader_TextCoord.h
index 4596fa5f45..ccc23d4903 100644
--- a/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader_TextCoord.h
+++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader_TextCoord.h
@@ -23,19 +23,19 @@
 
 extern int tcIndex;
 
-void LOADERDECL TexCoord_Read_Dummy(const void *_p)
+void LOADERDECL TexCoord_Read_Dummy()
 {
     tcIndex++;
 }
 
-void LOADERDECL TexCoord_ReadDirect_UByte1(const void *_p)
+void LOADERDECL TexCoord_ReadDirect_UByte1()
 {
     ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)DataReadU8() * tcScaleU[tcIndex];
     LOG_TEX1();
     VertexManager::s_pCurBufferPointer += 4;
     tcIndex++;
 }
-void LOADERDECL TexCoord_ReadDirect_UByte2(const void *_p)
+void LOADERDECL TexCoord_ReadDirect_UByte2()
 {
     ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)DataReadU8() * tcScaleU[tcIndex];
     ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)DataReadU8() * tcScaleV[tcIndex];
@@ -44,14 +44,14 @@ void LOADERDECL TexCoord_ReadDirect_UByte2(const void *_p)
     tcIndex++;
 }
 
-void LOADERDECL TexCoord_ReadDirect_Byte1(const void *_p)
+void LOADERDECL TexCoord_ReadDirect_Byte1()
 {
     ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)DataReadU8() * tcScaleU[tcIndex];
     LOG_TEX1();
     VertexManager::s_pCurBufferPointer += 4;
     tcIndex++;
 }
-void LOADERDECL TexCoord_ReadDirect_Byte2(const void *_p)
+void LOADERDECL TexCoord_ReadDirect_Byte2()
 {
     ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)DataReadU8() * tcScaleU[tcIndex];
     ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s8)DataReadU8() * tcScaleV[tcIndex];
@@ -60,14 +60,14 @@ void LOADERDECL TexCoord_ReadDirect_Byte2(const void *_p)
     tcIndex++;
 }
 
-void LOADERDECL TexCoord_ReadDirect_UShort1(const void *_p)
+void LOADERDECL TexCoord_ReadDirect_UShort1()
 {
     ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)DataReadU16() * tcScaleU[tcIndex];
     LOG_TEX1();
     VertexManager::s_pCurBufferPointer += 4;
     tcIndex++;
 }
-void LOADERDECL TexCoord_ReadDirect_UShort2(const void *_p)
+void LOADERDECL TexCoord_ReadDirect_UShort2()
 {
     ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)DataReadU16() * tcScaleU[tcIndex];
     ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)DataReadU16() * tcScaleV[tcIndex];
@@ -76,14 +76,14 @@ void LOADERDECL TexCoord_ReadDirect_UShort2(const void *_p)
     tcIndex++;
 }
 
-void LOADERDECL TexCoord_ReadDirect_Short1(const void *_p)
+void LOADERDECL TexCoord_ReadDirect_Short1()
 {
     ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)DataReadU16() * tcScaleU[tcIndex];
     LOG_TEX1();
     VertexManager::s_pCurBufferPointer += 4;
     tcIndex++;
 }
-void LOADERDECL TexCoord_ReadDirect_Short2(const void *_p)
+void LOADERDECL TexCoord_ReadDirect_Short2()
 {
     ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)DataReadU16() * tcScaleU[tcIndex];
     ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s16)DataReadU16() * tcScaleV[tcIndex];
@@ -92,14 +92,14 @@ void LOADERDECL TexCoord_ReadDirect_Short2(const void *_p)
     tcIndex++;
 }
 
-void LOADERDECL TexCoord_ReadDirect_Float1(const void *_p)
+void LOADERDECL TexCoord_ReadDirect_Float1()
 {
     ((float*)VertexManager::s_pCurBufferPointer)[0] = DataReadF32() * tcScaleU[tcIndex];
     LOG_TEX1();
     VertexManager::s_pCurBufferPointer += 4;
     tcIndex++;
 }
-void LOADERDECL TexCoord_ReadDirect_Float2(const void *_p)
+void LOADERDECL TexCoord_ReadDirect_Float2()
 {
     ((float*)VertexManager::s_pCurBufferPointer)[0] = DataReadF32() * tcScaleU[tcIndex];
     ((float*)VertexManager::s_pCurBufferPointer)[1] = DataReadF32() * tcScaleV[tcIndex];
@@ -109,7 +109,7 @@ void LOADERDECL TexCoord_ReadDirect_Float2(const void *_p)
 }
 
 // ==================================================================================
-void LOADERDECL TexCoord_ReadIndex8_UByte1(const void *_p)	
+void LOADERDECL TexCoord_ReadIndex8_UByte1()	
 {
     u8 Index = DataReadU8();
     u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
@@ -119,7 +119,7 @@ void LOADERDECL TexCoord_ReadIndex8_UByte1(const void *_p)
     VertexManager::s_pCurBufferPointer += 4;
     tcIndex++;
 }
-void LOADERDECL TexCoord_ReadIndex8_UByte2(const void *_p)	
+void LOADERDECL TexCoord_ReadIndex8_UByte2()	
 {
     u8 Index = DataReadU8();
     u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
@@ -131,7 +131,7 @@ void LOADERDECL TexCoord_ReadIndex8_UByte2(const void *_p)
     tcIndex++;
 }
 
-void LOADERDECL TexCoord_ReadIndex8_Byte1(const void *_p)		
+void LOADERDECL TexCoord_ReadIndex8_Byte1()		
 {
     u8 Index = DataReadU8();
     u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
@@ -141,7 +141,7 @@ void LOADERDECL TexCoord_ReadIndex8_Byte1(const void *_p)
     VertexManager::s_pCurBufferPointer += 4;
     tcIndex++;
 }
-void LOADERDECL TexCoord_ReadIndex8_Byte2(const void *_p)		
+void LOADERDECL TexCoord_ReadIndex8_Byte2()		
 {
     u8 Index = DataReadU8();
     u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
@@ -153,7 +153,7 @@ void LOADERDECL TexCoord_ReadIndex8_Byte2(const void *_p)
     tcIndex++;
 }
 
-void LOADERDECL TexCoord_ReadIndex8_UShort1(const void *_p)	
+void LOADERDECL TexCoord_ReadIndex8_UShort1()	
 {
     u8 Index = DataReadU8();
     u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
@@ -163,7 +163,7 @@ void LOADERDECL TexCoord_ReadIndex8_UShort1(const void *_p)
     VertexManager::s_pCurBufferPointer += 4;
     tcIndex++;
 }
-void LOADERDECL TexCoord_ReadIndex8_UShort2(const void *_p)	
+void LOADERDECL TexCoord_ReadIndex8_UShort2()	
 {
     u8 Index = DataReadU8();
     u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
@@ -175,7 +175,7 @@ void LOADERDECL TexCoord_ReadIndex8_UShort2(const void *_p)
     tcIndex++;
 }
 
-void LOADERDECL TexCoord_ReadIndex8_Short1(const void *_p)	
+void LOADERDECL TexCoord_ReadIndex8_Short1()	
 {
     u8 Index = DataReadU8();
     u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
@@ -185,7 +185,7 @@ void LOADERDECL TexCoord_ReadIndex8_Short1(const void *_p)
     VertexManager::s_pCurBufferPointer += 4;
     tcIndex++;
 }
-void LOADERDECL TexCoord_ReadIndex8_Short2(const void *_p)	
+void LOADERDECL TexCoord_ReadIndex8_Short2()	
 {
     u8 Index = DataReadU8();
     u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
@@ -197,7 +197,7 @@ void LOADERDECL TexCoord_ReadIndex8_Short2(const void *_p)
     tcIndex++;
 }
 
-void LOADERDECL TexCoord_ReadIndex8_Float1(const void *_p)	
+void LOADERDECL TexCoord_ReadIndex8_Float1()	
 {
     u16 Index = DataReadU8(); 
     u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
@@ -208,7 +208,7 @@ void LOADERDECL TexCoord_ReadIndex8_Float1(const void *_p)
     VertexManager::s_pCurBufferPointer += 4;
     tcIndex++;
 }
-void LOADERDECL TexCoord_ReadIndex8_Float2(const void *_p)	
+void LOADERDECL TexCoord_ReadIndex8_Float2()	
 {
     u16 Index = DataReadU8(); 
     u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
@@ -223,7 +223,7 @@ void LOADERDECL TexCoord_ReadIndex8_Float2(const void *_p)
 }
 
 // ==================================================================================
-void LOADERDECL TexCoord_ReadIndex16_UByte1(const void *_p)	
+void LOADERDECL TexCoord_ReadIndex16_UByte1()	
 {
     u16 Index = DataReadU16(); 
     u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
@@ -233,7 +233,7 @@ void LOADERDECL TexCoord_ReadIndex16_UByte1(const void *_p)
     VertexManager::s_pCurBufferPointer += 4;
     tcIndex++;
 }
-void LOADERDECL TexCoord_ReadIndex16_UByte2(const void *_p)	
+void LOADERDECL TexCoord_ReadIndex16_UByte2()	
 {
     u16 Index = DataReadU16(); 
     u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
@@ -245,7 +245,7 @@ void LOADERDECL TexCoord_ReadIndex16_UByte2(const void *_p)
     tcIndex++;
 }
 
-void LOADERDECL TexCoord_ReadIndex16_Byte1(const void *_p)	
+void LOADERDECL TexCoord_ReadIndex16_Byte1()	
 {
     u16 Index = DataReadU16(); 
     u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
@@ -255,7 +255,7 @@ void LOADERDECL TexCoord_ReadIndex16_Byte1(const void *_p)
     VertexManager::s_pCurBufferPointer += 4;
     tcIndex++;
 }
-void LOADERDECL TexCoord_ReadIndex16_Byte2(const void *_p)	
+void LOADERDECL TexCoord_ReadIndex16_Byte2()	
 {
     u16 Index = DataReadU16(); 
     u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
@@ -267,7 +267,7 @@ void LOADERDECL TexCoord_ReadIndex16_Byte2(const void *_p)
     tcIndex++;
 }
 
-void LOADERDECL TexCoord_ReadIndex16_UShort1(const void *_p)	
+void LOADERDECL TexCoord_ReadIndex16_UShort1()	
 {
     u16 Index = DataReadU16(); 
     u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
@@ -277,7 +277,7 @@ void LOADERDECL TexCoord_ReadIndex16_UShort1(const void *_p)
     VertexManager::s_pCurBufferPointer += 4;
     tcIndex++;
 }
-void LOADERDECL TexCoord_ReadIndex16_UShort2(const void *_p)	
+void LOADERDECL TexCoord_ReadIndex16_UShort2()	
 {
     u16 Index = DataReadU16(); 
     u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
@@ -289,7 +289,7 @@ void LOADERDECL TexCoord_ReadIndex16_UShort2(const void *_p)
     tcIndex++;
 }
 
-void LOADERDECL TexCoord_ReadIndex16_Short1(const void *_p)	
+void LOADERDECL TexCoord_ReadIndex16_Short1()	
 {
     u16 Index = DataReadU16(); 
     u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
@@ -299,7 +299,7 @@ void LOADERDECL TexCoord_ReadIndex16_Short1(const void *_p)
     VertexManager::s_pCurBufferPointer += 4;
     tcIndex++;
 }
-void LOADERDECL TexCoord_ReadIndex16_Short2(const void *_p)	
+void LOADERDECL TexCoord_ReadIndex16_Short2()	
 {
     u16 Index = DataReadU16(); 
     u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
@@ -311,7 +311,7 @@ void LOADERDECL TexCoord_ReadIndex16_Short2(const void *_p)
     tcIndex++;
 }
 
-void LOADERDECL TexCoord_ReadIndex16_Float1(const void *_p)	
+void LOADERDECL TexCoord_ReadIndex16_Float1()	
 {
     u16 Index = DataReadU16(); 
     u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
@@ -322,7 +322,7 @@ void LOADERDECL TexCoord_ReadIndex16_Float1(const void *_p)
     VertexManager::s_pCurBufferPointer += 4;
     tcIndex++;
 }
-void LOADERDECL TexCoord_ReadIndex16_Float2(const void *_p)	
+void LOADERDECL TexCoord_ReadIndex16_Float2()	
 {
     u16 Index = DataReadU16(); 
     u32 iAddress = arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp b/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp
index c9f672ec93..a772813fa7 100644
--- a/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp
+++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp
@@ -108,6 +108,12 @@ void AddVertices(int primitive, int numvertices)
 	_assert_( numvertices > 0 );
 
 	ADDSTAT(stats.thisFrame.numPrims, numvertices);
+	/*
+	if (s_vStoredPrimitives.size() && s_vStoredPrimitives[s_vStoredPrimitives.size() - 1].first == primitive) {
+		// Actually, just count potential primitive joins.
+		// Doesn't seem worth it in Metroid Prime games.
+		INCSTAT(stats.thisFrame.numPrimitiveJoins);
+	}*/
 	s_vStoredPrimitives.push_back(std::pair<int, int>(c_primitiveType[primitive], numvertices));
 
 #if defined(_DEBUG) || defined(DEBUGFAST) 
@@ -150,7 +156,7 @@ void Flush()
 
 	DVSTARTPROFILE();
 
-	GL_REPORT_ERRORD();
+	GL_REPORT_ERRORD(); 
 
 	glBindBuffer(GL_ARRAY_BUFFER, s_vboBuffers[s_nCurVBOIndex]);
 	glBufferData(GL_ARRAY_BUFFER, s_pCurBufferPointer - s_pBaseBufferPointer, s_pBaseBufferPointer, GL_STREAM_DRAW);
@@ -263,6 +269,7 @@ void Flush()
 	int offset = 0;
 	for (std::vector< std::pair<int, int> >::const_iterator it = s_vStoredPrimitives.begin(); it != s_vStoredPrimitives.end(); ++it)
 	{
+		INCSTAT(stats.thisFrame.numDrawCalls);
 		glDrawArrays(it->first, offset, it->second);
 		offset += it->second;
 	}