From f77624147d5f4c7271d8b032626b7f6a1d93b817 Mon Sep 17 00:00:00 2001
From: hrydgard <hrydgard@gmail.com>
Date: Sat, 25 Oct 2008 15:53:43 +0000
Subject: [PATCH] Cachable vertex loaders. Not a very big speedup in itself,
 but makes it easier to speed up vertex loading in the future.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@960 8ced0084-cf51-0410-be5f-012b33b47a6e
---
 Source/Core/VideoCommon/Src/Statistics.h      |    2 +
 Source/Core/VideoCommon/Src/VideoState.cpp    |    4 +-
 .../Src/PixelShaderManager.cpp                |   11 +-
 Source/Plugins/Plugin_VideoOGL/Src/Render.cpp |    1 +
 .../Plugin_VideoOGL/Src/TextureMngr.cpp       |   12 +-
 .../Plugin_VideoOGL/Src/VertexLoader.cpp      | 1022 ++++++++---------
 .../Plugin_VideoOGL/Src/VertexLoader.h        |   94 +-
 .../Src/VertexLoaderManager.cpp               |   70 +-
 .../Plugin_VideoOGL/Src/VertexLoaderManager.h |    2 +
 .../Src/VertexShaderManager.cpp               |    2 +-
 Source/Plugins/Plugin_VideoOGL/Src/main.cpp   |    7 +-
 11 files changed, 609 insertions(+), 618 deletions(-)

diff --git a/Source/Core/VideoCommon/Src/Statistics.h b/Source/Core/VideoCommon/Src/Statistics.h
index 72e1cb3e82..e618d543ba 100644
--- a/Source/Core/VideoCommon/Src/Statistics.h
+++ b/Source/Core/VideoCommon/Src/Statistics.h
@@ -39,6 +39,8 @@ struct Statistics
 
     int numJoins;
 
+	int numVertexLoaders;
+
     struct ThisFrame
     {
         int numBPLoads;
diff --git a/Source/Core/VideoCommon/Src/VideoState.cpp b/Source/Core/VideoCommon/Src/VideoState.cpp
index 55b7c6f9f7..f1149be7f3 100644
--- a/Source/Core/VideoCommon/Src/VideoState.cpp
+++ b/Source/Core/VideoCommon/Src/VideoState.cpp
@@ -23,8 +23,8 @@
 #include "TextureDecoder.h"
 #include "Fifo.h"
 
-static void DoState(PointerWrap &p) {
-
+static void DoState(PointerWrap &p)
+{
     // BP Memory
     p.Do(bpmem);
     // CP Memory
diff --git a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderManager.cpp b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderManager.cpp
index 25372b7f6b..0de73de9f7 100644
--- a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderManager.cpp
+++ b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderManager.cpp
@@ -171,7 +171,7 @@ void PixelShaderMngr::Cleanup()
     PSCache::iterator iter = pshaders.begin();
     while (iter != pshaders.end()) {
         PSCacheEntry &entry = iter->second;
-        if (entry.frameCount < frameCount - 200) {
+        if (entry.frameCount < frameCount - 400) {
             entry.Destroy();
 #ifdef _WIN32
             iter = pshaders.erase(iter);
@@ -242,10 +242,9 @@ void PixelShaderMngr::SetConstants()
 {
     for (int i = 0; i < 2; ++i) {
         if (s_nColorsChanged[i]) {
-            
-            int baseind = i?C_KCOLORS:C_COLORS;
+            int baseind = i ? C_KCOLORS : C_COLORS;
             for (int j = 0; j < 4; ++j) {
-                if (s_nColorsChanged[i] & (1<<j)) {
+                if (s_nColorsChanged[i] & (1 << j)) {
                     SetPSConstant4fv(baseind+j, &lastRGBAfull[i][j][0]);
                 }
             }
@@ -258,7 +257,7 @@ void PixelShaderMngr::SetConstants()
         if (bpmem.tevorders[i/2].getEnable(i&1)) {
             int texmap = bpmem.tevorders[i/2].getTexMap(i&1);
             maptocoord[texmap] = bpmem.tevorders[i/2].getTexCoord(i&1);
-            newmask |= 1<<texmap;
+            newmask |= 1 << texmap;
             SetTexDimsChanged(texmap);
         }
     }
@@ -266,7 +265,7 @@ void PixelShaderMngr::SetConstants()
     if (maptocoord_mask != newmask) {
         //u32 changes = maptocoord_mask ^ newmask;
         for (int i = 0; i < 8; ++i) {
-            if (newmask&(1<<i)) {
+            if (newmask & (1 << i)) {
                 SetTexDimsChanged(i);
             }
 			else {
diff --git a/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp b/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp
index 20862dbc26..5095f3dba7 100644
--- a/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp
+++ b/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp
@@ -773,6 +773,7 @@ void Renderer::SwapBuffers()
         p+=sprintf(p,"Num CP loads (DL): %i\n",stats.thisFrame.numCPLoadsInDL);
         p+=sprintf(p,"Num BP loads:      %i\n",stats.thisFrame.numBPLoads);
         p+=sprintf(p,"Num BP loads (DL): %i\n",stats.thisFrame.numBPLoadsInDL);
+        p+=sprintf(p,"Num vertex loaders:       %i\n",stats.numVertexLoaders);
 
 		Renderer::RenderText(st, 20, 20, 0xFF00FFFF);
     }
diff --git a/Source/Plugins/Plugin_VideoOGL/Src/TextureMngr.cpp b/Source/Plugins/Plugin_VideoOGL/Src/TextureMngr.cpp
index f181376392..ef48edc4ef 100644
--- a/Source/Plugins/Plugin_VideoOGL/Src/TextureMngr.cpp
+++ b/Source/Plugins/Plugin_VideoOGL/Src/TextureMngr.cpp
@@ -50,7 +50,9 @@ int TextureMngr::nTex2DEnabled, TextureMngr::nTexRECTEnabled;
 
 extern int frameCount;
 static u32 s_TempFramebuffer = 0;
+
 #define TEMP_SIZE (1024*1024*4)
+#define TEXTURE_KILL_THRESHOLD 200
 
 const GLint c_MinLinearFilter[8] = {
 	GL_NEAREST,
@@ -119,7 +121,7 @@ void TextureMngr::Init()
 void TextureMngr::Invalidate()
 {
     TexCache::iterator iter = textures.begin();
-    for (;iter!=textures.end();iter++)
+    for (; iter!=textures.end(); iter++)
         iter->second.Destroy();
     textures.clear();
 	TexDecoder_SetTexFmtOverlayOptions(g_Config.bTexFmtOverlayEnable, g_Config.bTexFmtOverlayCenter);
@@ -134,7 +136,7 @@ void TextureMngr::Shutdown()
 	}
     mapDepthTargets.clear();
 
-    if( s_TempFramebuffer ) {
+    if (s_TempFramebuffer) {
         glDeleteFramebuffersEXT(1, (GLuint *)&s_TempFramebuffer);
         s_TempFramebuffer = 0;
     }
@@ -147,7 +149,7 @@ void TextureMngr::Cleanup()
 {
     TexCache::iterator iter = textures.begin();
     while (iter != textures.end()) {
-        if (frameCount > 20 + iter->second.frameCount) {
+        if (frameCount > TEXTURE_KILL_THRESHOLD + iter->second.frameCount) {
             if (!iter->second.isRenderTarget) {
                 iter->second.Destroy();
 #ifdef _WIN32
@@ -215,10 +217,10 @@ TextureMngr::TCacheEntry* TextureMngr::Load(int texstage, u32 address, int width
     if (iter != textures.end()) {
         TCacheEntry &entry = iter->second;
 
-        if(entry.isRenderTarget || (((u32 *)ptr)[entry.hashoffset] == entry.hash && palhash == entry.paletteHash)) { //stupid, improve
+        if (entry.isRenderTarget || (((u32 *)ptr)[entry.hashoffset] == entry.hash && palhash == entry.paletteHash)) { //stupid, improve
             entry.frameCount = frameCount;
             //glEnable(entry.isNonPow2?GL_TEXTURE_RECTANGLE_ARB:GL_TEXTURE_2D);
-            glBindTexture(entry.isNonPow2?GL_TEXTURE_RECTANGLE_ARB:GL_TEXTURE_2D, entry.texture);
+            glBindTexture(entry.isNonPow2 ? GL_TEXTURE_RECTANGLE_ARB:GL_TEXTURE_2D, entry.texture);
             if (entry.mode.hex != tm0.hex)
                 entry.SetTextureParameters(tm0);
             return &entry;
diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader.cpp b/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader.cpp
index 824502c90a..614b824d3b 100644
--- a/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader.cpp
+++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader.cpp
@@ -17,29 +17,18 @@
 
 #include "Globals.h"
 
-#include <fstream>
 #include <assert.h>
 
 #include "Common.h"
 #include "Config.h"
-#include "ImageWrite.h"
 #include "Profiler.h"
-#include "StringUtil.h"
 
-#include "Render.h"
-#include "VertexShader.h"
 #include "VertexManager.h"
 #include "VertexLoaderManager.h"
 #include "VertexLoader.h"
 #include "BPStructs.h"
 #include "DataReader.h"
 
-#include "VertexShaderManager.h"
-#include "PixelShaderManager.h"
-#include "TextureMngr.h"
-
-#include <fstream>
-
 NativeVertexFormat *g_nativeVertexFmt;
 
 //these don't need to be saved
@@ -50,12 +39,10 @@ static float tcScaleV[8];
 static int tcIndex;
 static int colIndex;
 #ifndef _WIN32
-    #undef inline
-    #define inline
+	#undef inline
+	#define inline
 #endif
 
-
-// ==============================================================================
 // Direct
 // ==============================================================================
 static u8 s_curposmtx;
@@ -65,44 +52,44 @@ static int s_texmtxread = 0;
 
 void LOADERDECL PosMtx_ReadDirect_UByte(const void *_p)
 {
-    s_curposmtx = DataReadU8()&0x3f;
-    PRIM_LOG("posmtx: %d, ", s_curposmtx);
+	s_curposmtx = DataReadU8() & 0x3f;
+	PRIM_LOG("posmtx: %d, ", s_curposmtx);
 }
 
 void LOADERDECL PosMtx_Write(const void *_p)
 {
-    *VertexManager::s_pCurBufferPointer++ = s_curposmtx;
-    //*VertexManager::s_pCurBufferPointer++ = 0;
-    //*VertexManager::s_pCurBufferPointer++ = 0;
-    //*VertexManager::s_pCurBufferPointer++ = 0;
+	*VertexManager::s_pCurBufferPointer++ = s_curposmtx;
+	//*VertexManager::s_pCurBufferPointer++ = 0;
+	//*VertexManager::s_pCurBufferPointer++ = 0;
+	//*VertexManager::s_pCurBufferPointer++ = 0;
 }
 
 void LOADERDECL TexMtx_ReadDirect_UByte(const void *_p)
 {
-    s_curtexmtx[s_texmtxread] = DataReadU8()&0x3f;
-    PRIM_LOG("texmtx%d: %d, ", s_texmtxread, s_curtexmtx[s_texmtxread]);
-    s_texmtxread++;
+	s_curtexmtx[s_texmtxread] = DataReadU8()&0x3f;
+	PRIM_LOG("texmtx%d: %d, ", s_texmtxread, s_curtexmtx[s_texmtxread]);
+	s_texmtxread++;
 }
 
 void LOADERDECL TexMtx_Write_Float(const void *_p)
 {
-    *(float*)VertexManager::s_pCurBufferPointer = (float)s_curtexmtx[s_texmtxwrite++];
-    VertexManager::s_pCurBufferPointer += 4;
+	*(float*)VertexManager::s_pCurBufferPointer = (float)s_curtexmtx[s_texmtxwrite++];
+	VertexManager::s_pCurBufferPointer += 4;
 }
 
 void LOADERDECL TexMtx_Write_Float2(const void *_p)
 {
-    ((float*)VertexManager::s_pCurBufferPointer)[0] = 0;
-    ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)s_curtexmtx[s_texmtxwrite++];
-    VertexManager::s_pCurBufferPointer += 8;
+	((float*)VertexManager::s_pCurBufferPointer)[0] = 0;
+	((float*)VertexManager::s_pCurBufferPointer)[1] = (float)s_curtexmtx[s_texmtxwrite++];
+	VertexManager::s_pCurBufferPointer += 8;
 }
 
 void LOADERDECL TexMtx_Write_Short3(const void *_p)
 {
-    ((s16*)VertexManager::s_pCurBufferPointer)[0] = 0;
-    ((s16*)VertexManager::s_pCurBufferPointer)[1] = 0;
-    ((s16*)VertexManager::s_pCurBufferPointer)[2] = s_curtexmtx[s_texmtxwrite++];
-    VertexManager::s_pCurBufferPointer += 6;
+	((s16*)VertexManager::s_pCurBufferPointer)[0] = 0;
+	((s16*)VertexManager::s_pCurBufferPointer)[1] = 0;
+	((s16*)VertexManager::s_pCurBufferPointer)[2] = s_curtexmtx[s_texmtxwrite++];
+	VertexManager::s_pCurBufferPointer += 6;
 }
 
 #include "VertexLoader_Position.h"
@@ -110,12 +97,17 @@ void LOADERDECL TexMtx_Write_Short3(const void *_p)
 #include "VertexLoader_Color.h"
 #include "VertexLoader_TextCoord.h"
 
-VertexLoader::VertexLoader() 
+VertexLoader::VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr) 
 {
-    m_VertexSize = 0;
-    m_AttrDirty = AD_DIRTY;
+	m_VertexSize = 0;
 	m_numPipelineStages = 0;
-    VertexLoader_Normal::Init();
+	VertexLoader_Normal::Init();
+
+	m_VtxDesc = vtx_desc;
+	SetVAT(vtx_attr.g0.Hex, vtx_attr.g1.Hex, vtx_attr.g2.Hex);
+
+	ComputeVertexSize();
+	CompileVertexTranslator();
 }
 
 VertexLoader::~VertexLoader() 
@@ -124,160 +116,130 @@ VertexLoader::~VertexLoader()
 
 int VertexLoader::ComputeVertexSize()
 {
-    if (m_AttrDirty == AD_CLEAN) {
-		// Compare the 33 desc bits. 
-        if (m_VtxDesc.Hex0 == g_VtxDesc.Hex0 &&
-		    (m_VtxDesc.Hex1 & 1) == (g_VtxDesc.Hex1 & 1))
-            return m_VertexSize;
+	m_VertexSize = 0;
+	// Position Matrix Index
+	if (m_VtxDesc.PosMatIdx)
+		m_VertexSize += 1;
 
-        m_VtxDesc.Hex = g_VtxDesc.Hex;
-    }
-    else {
-        // Attributes are dirty so we have to recompute everything anyway.
-        m_VtxDesc.Hex = g_VtxDesc.Hex;
-    }
-
-    m_AttrDirty = AD_DIRTY;
-    m_VertexSize = 0;
-    // Position Matrix Index
-    if (m_VtxDesc.PosMatIdx)
-        m_VertexSize += 1;
-
-    // Texture matrix indices
-    if (m_VtxDesc.Tex0MatIdx) m_VertexSize += 1;
-    if (m_VtxDesc.Tex1MatIdx) m_VertexSize += 1;
-    if (m_VtxDesc.Tex2MatIdx) m_VertexSize += 1;
-    if (m_VtxDesc.Tex3MatIdx) m_VertexSize += 1;
-    if (m_VtxDesc.Tex4MatIdx) m_VertexSize += 1;
-    if (m_VtxDesc.Tex5MatIdx) m_VertexSize += 1;
-    if (m_VtxDesc.Tex6MatIdx) m_VertexSize += 1;
-    if (m_VtxDesc.Tex7MatIdx) m_VertexSize += 1;
-    
-    switch (m_VtxDesc.Position) {
-    case NOT_PRESENT:	{_assert_("Vertex descriptor without position!");} break;
-    case DIRECT:
-        {
-            switch (m_VtxAttr.PosFormat) {
-            case FORMAT_UBYTE:
-            case FORMAT_BYTE: m_VertexSize += m_VtxAttr.PosElements?3:2; break;
-            case FORMAT_USHORT:
-            case FORMAT_SHORT: m_VertexSize += m_VtxAttr.PosElements?6:4; break;
-            case FORMAT_FLOAT: m_VertexSize += m_VtxAttr.PosElements?12:8; break;
-            default: _assert_(0); break;
-            }
-        }
-        break;
-    case INDEX8:		
-        m_VertexSize += 1;
-        break;
-    case INDEX16:
-        m_VertexSize += 2;
-        break;
-    }
+	// Texture matrix indices
+	if (m_VtxDesc.Tex0MatIdx) m_VertexSize += 1;
+	if (m_VtxDesc.Tex1MatIdx) m_VertexSize += 1;
+	if (m_VtxDesc.Tex2MatIdx) m_VertexSize += 1;
+	if (m_VtxDesc.Tex3MatIdx) m_VertexSize += 1;
+	if (m_VtxDesc.Tex4MatIdx) m_VertexSize += 1;
+	if (m_VtxDesc.Tex5MatIdx) m_VertexSize += 1;
+	if (m_VtxDesc.Tex6MatIdx) m_VertexSize += 1;
+	if (m_VtxDesc.Tex7MatIdx) m_VertexSize += 1;
+	
+	switch (m_VtxDesc.Position) {
+	case NOT_PRESENT:	{_assert_("Vertex descriptor without position!");} break;
+	case DIRECT:
+		{
+			switch (m_VtxAttr.PosFormat) {
+			case FORMAT_UBYTE:
+			case FORMAT_BYTE: m_VertexSize += m_VtxAttr.PosElements?3:2; break;
+			case FORMAT_USHORT:
+			case FORMAT_SHORT: m_VertexSize += m_VtxAttr.PosElements?6:4; break;
+			case FORMAT_FLOAT: m_VertexSize += m_VtxAttr.PosElements?12:8; break;
+			default: _assert_(0); break;
+			}
+		}
+		break;
+	case INDEX8:		
+		m_VertexSize += 1;
+		break;
+	case INDEX16:
+		m_VertexSize += 2;
+		break;
+	}
 
 	VertexLoader_Normal::index3 = m_VtxAttr.NormalIndex3 ? true : false;
-    if (m_VtxDesc.Normal != NOT_PRESENT)
-        m_VertexSize += VertexLoader_Normal::GetSize(m_VtxDesc.Normal, m_VtxAttr.NormalFormat, m_VtxAttr.NormalElements);
-    
-    // Colors
-    int col[2] = {m_VtxDesc.Color0, m_VtxDesc.Color1};
-    for (int i = 0; i < 2; i++) {
-        switch (col[i])
-        {
-        case NOT_PRESENT: 
-            break;
-        case DIRECT:
-            switch (m_VtxAttr.color[i].Comp)
-            {
-            case FORMAT_16B_565:	m_VertexSize += 2; break;
-            case FORMAT_24B_888:	m_VertexSize += 3; break;
-            case FORMAT_32B_888x:	m_VertexSize += 4; break;
-            case FORMAT_16B_4444:	m_VertexSize += 2; break;
-            case FORMAT_24B_6666:	m_VertexSize += 3; break;
-            case FORMAT_32B_8888:	m_VertexSize += 4; break;
-            default: _assert_(0); break;
-            }									    
-            break;
-        case INDEX8:	
-            m_VertexSize += 1;
-            break;
-        case INDEX16:
-            m_VertexSize += 2;
-            break;
-        }   
-    }
+	if (m_VtxDesc.Normal != NOT_PRESENT)
+		m_VertexSize += VertexLoader_Normal::GetSize(m_VtxDesc.Normal, m_VtxAttr.NormalFormat, m_VtxAttr.NormalElements);
+	
+	// Colors
+	int col[2] = {m_VtxDesc.Color0, m_VtxDesc.Color1};
+	for (int i = 0; i < 2; i++) {
+		switch (col[i])
+		{
+		case NOT_PRESENT: 
+			break;
+		case DIRECT:
+			switch (m_VtxAttr.color[i].Comp)
+			{
+			case FORMAT_16B_565:	m_VertexSize += 2; break;
+			case FORMAT_24B_888:	m_VertexSize += 3; break;
+			case FORMAT_32B_888x:	m_VertexSize += 4; break;
+			case FORMAT_16B_4444:	m_VertexSize += 2; break;
+			case FORMAT_24B_6666:	m_VertexSize += 3; break;
+			case FORMAT_32B_8888:	m_VertexSize += 4; break;
+			default: _assert_(0); break;
+			}									    
+			break;
+		case INDEX8:	
+			m_VertexSize += 1;
+			break;
+		case INDEX16:
+			m_VertexSize += 2;
+			break;
+		}   
+	}
 
-    // TextureCoord
-    int tc[8] = {
-        m_VtxDesc.Tex0Coord, m_VtxDesc.Tex1Coord, m_VtxDesc.Tex2Coord, m_VtxDesc.Tex3Coord,
-        m_VtxDesc.Tex4Coord, m_VtxDesc.Tex5Coord, m_VtxDesc.Tex6Coord, m_VtxDesc.Tex7Coord,
-    };
-    
-    for (int i = 0; i < 8; i++) {
-        switch (tc[i]) {
-        case NOT_PRESENT: 
-            break;
-        case DIRECT: 
-            {
-                switch (m_VtxAttr.texCoord[i].Format)
-                {
-                case FORMAT_UBYTE:
-                case FORMAT_BYTE: m_VertexSize += m_VtxAttr.texCoord[i].Elements?2:1; break;
-                case FORMAT_USHORT:
-                case FORMAT_SHORT: m_VertexSize += m_VtxAttr.texCoord[i].Elements?4:2; break;
-                case FORMAT_FLOAT: m_VertexSize += m_VtxAttr.texCoord[i].Elements?8:4; break;
-                default: _assert_(0); break;
-                }
-            }
-            break;
-        case INDEX8:	
-            m_VertexSize += 1;
-            break;
-        case INDEX16:
-            m_VertexSize += 2;
-            break;
-        }
-    }
+	// TextureCoord
+	int tc[8] = {
+		m_VtxDesc.Tex0Coord, m_VtxDesc.Tex1Coord, m_VtxDesc.Tex2Coord, m_VtxDesc.Tex3Coord,
+		m_VtxDesc.Tex4Coord, m_VtxDesc.Tex5Coord, m_VtxDesc.Tex6Coord, m_VtxDesc.Tex7Coord,
+	};
+	
+	for (int i = 0; i < 8; i++) {
+		switch (tc[i]) {
+		case NOT_PRESENT: 
+			break;
+		case DIRECT: 
+			{
+				switch (m_VtxAttr.texCoord[i].Format)
+				{
+				case FORMAT_UBYTE:
+				case FORMAT_BYTE: m_VertexSize += m_VtxAttr.texCoord[i].Elements?2:1; break;
+				case FORMAT_USHORT:
+				case FORMAT_SHORT: m_VertexSize += m_VtxAttr.texCoord[i].Elements?4:2; break;
+				case FORMAT_FLOAT: m_VertexSize += m_VtxAttr.texCoord[i].Elements?8:4; break;
+				default: _assert_(0); break;
+				}
+			}
+			break;
+		case INDEX8:	
+			m_VertexSize += 1;
+			break;
+		case INDEX16:
+			m_VertexSize += 2;
+			break;
+		}
+	}
 
-    return m_VertexSize;
+	return m_VertexSize;
 }
 
 
 void VertexLoader::CompileVertexTranslator()
 {
-    if (m_AttrDirty == AD_CLEAN)
-    {
-		// Check if local cached desc (in this VL) matches global desc
-        if (m_VtxDesc.Hex0 == g_VtxDesc.Hex0 &&
-		    (m_VtxDesc.Hex1 & 1) == (g_VtxDesc.Hex1 & 1))
-		{
-            return;  // same
-		}
-    }
-    else
-	{
-        m_AttrDirty = AD_CLEAN;
-	}
-     
-    m_VtxDesc.Hex = g_VtxDesc.Hex;
-
-    // Reset pipeline
-    m_numPipelineStages = 0;
+	// Reset pipeline
+	m_numPipelineStages = 0;
 
 	// It's a bit ugly that we poke inside m_NativeFmt in this function. Planning to fix this.
-    m_NativeFmt.m_VBStridePad = 0;
-    m_NativeFmt.m_VBVertexStride = 0;
-    m_NativeFmt.m_components = 0;
+	m_NativeFmt.m_VBStridePad = 0;
+	m_NativeFmt.m_VBVertexStride = 0;
+	m_NativeFmt.m_components = 0;
 
-    // m_VBVertexStride for texmtx and posmtx is computed later when writing.
-    
-    // Position Matrix Index
-    if (m_VtxDesc.PosMatIdx) {
-        m_PipelineStages[m_numPipelineStages++] = PosMtx_ReadDirect_UByte;
-        m_NativeFmt.m_components |= VB_HAS_POSMTXIDX;
-    }
+	// m_VBVertexStride for texmtx and posmtx is computed later when writing.
+	
+	// Position Matrix Index
+	if (m_VtxDesc.PosMatIdx) {
+		m_PipelineStages[m_numPipelineStages++] = PosMtx_ReadDirect_UByte;
+		m_NativeFmt.m_components |= VB_HAS_POSMTXIDX;
+	}
 
-    if (m_VtxDesc.Tex0MatIdx) {m_NativeFmt.m_components |= VB_HAS_TEXMTXIDX0; WriteCall(TexMtx_ReadDirect_UByte); }
+	if (m_VtxDesc.Tex0MatIdx) {m_NativeFmt.m_components |= VB_HAS_TEXMTXIDX0; WriteCall(TexMtx_ReadDirect_UByte); }
 	if (m_VtxDesc.Tex1MatIdx) {m_NativeFmt.m_components |= VB_HAS_TEXMTXIDX1; WriteCall(TexMtx_ReadDirect_UByte); }
 	if (m_VtxDesc.Tex2MatIdx) {m_NativeFmt.m_components |= VB_HAS_TEXMTXIDX2; WriteCall(TexMtx_ReadDirect_UByte); }
 	if (m_VtxDesc.Tex3MatIdx) {m_NativeFmt.m_components |= VB_HAS_TEXMTXIDX3; WriteCall(TexMtx_ReadDirect_UByte); }
@@ -286,227 +248,227 @@ void VertexLoader::CompileVertexTranslator()
 	if (m_VtxDesc.Tex6MatIdx) {m_NativeFmt.m_components |= VB_HAS_TEXMTXIDX6; WriteCall(TexMtx_ReadDirect_UByte); }
 	if (m_VtxDesc.Tex7MatIdx) {m_NativeFmt.m_components |= VB_HAS_TEXMTXIDX7; WriteCall(TexMtx_ReadDirect_UByte); }
 
-    // Position
-    if (m_VtxDesc.Position != NOT_PRESENT)
-        m_NativeFmt.m_VBVertexStride += 12;
+	// Position
+	if (m_VtxDesc.Position != NOT_PRESENT)
+		m_NativeFmt.m_VBVertexStride += 12;
 
-    switch (m_VtxDesc.Position) {
-    case NOT_PRESENT:	{_assert_msg_(0, "Vertex descriptor without position!", "WTF?");} break;
-    case DIRECT:
-        {
-            switch (m_VtxAttr.PosFormat) {
-            case FORMAT_UBYTE:	WriteCall(Pos_ReadDirect_UByte);  break;
-            case FORMAT_BYTE:	WriteCall(Pos_ReadDirect_Byte);   break;
-            case FORMAT_USHORT:	WriteCall(Pos_ReadDirect_UShort); break;
-            case FORMAT_SHORT:	WriteCall(Pos_ReadDirect_Short);  break;
-            case FORMAT_FLOAT:	WriteCall(Pos_ReadDirect_Float);  break;
-            default: _assert_(0); break;
-            }
-        }
-        break;
-    case INDEX8:		
-        switch (m_VtxAttr.PosFormat) {
-        case FORMAT_UBYTE:	WriteCall(Pos_ReadIndex8_UByte);  break; //WTF?
-        case FORMAT_BYTE:	WriteCall(Pos_ReadIndex8_Byte);   break;
-        case FORMAT_USHORT:	WriteCall(Pos_ReadIndex8_UShort); break;
-        case FORMAT_SHORT:	WriteCall(Pos_ReadIndex8_Short);  break;
-        case FORMAT_FLOAT:	WriteCall(Pos_ReadIndex8_Float);  break;
-        default: _assert_(0); break;
-        }
-        break;
-    case INDEX16:
-        switch (m_VtxAttr.PosFormat) {
-        case FORMAT_UBYTE:	WriteCall(Pos_ReadIndex16_UByte);  break;
-        case FORMAT_BYTE:	WriteCall(Pos_ReadIndex16_Byte);   break;
-        case FORMAT_USHORT:	WriteCall(Pos_ReadIndex16_UShort); break;
-        case FORMAT_SHORT:	WriteCall(Pos_ReadIndex16_Short);  break;
-        case FORMAT_FLOAT:	WriteCall(Pos_ReadIndex16_Float);  break;
-        default: _assert_(0); break;
-        }
-        break;
-    }
+	switch (m_VtxDesc.Position) {
+	case NOT_PRESENT:	{_assert_msg_(0, "Vertex descriptor without position!", "WTF?");} break;
+	case DIRECT:
+		{
+			switch (m_VtxAttr.PosFormat) {
+			case FORMAT_UBYTE:	WriteCall(Pos_ReadDirect_UByte);  break;
+			case FORMAT_BYTE:	WriteCall(Pos_ReadDirect_Byte);   break;
+			case FORMAT_USHORT:	WriteCall(Pos_ReadDirect_UShort); break;
+			case FORMAT_SHORT:	WriteCall(Pos_ReadDirect_Short);  break;
+			case FORMAT_FLOAT:	WriteCall(Pos_ReadDirect_Float);  break;
+			default: _assert_(0); break;
+			}
+		}
+		break;
+	case INDEX8:		
+		switch (m_VtxAttr.PosFormat) {
+		case FORMAT_UBYTE:	WriteCall(Pos_ReadIndex8_UByte);  break; //WTF?
+		case FORMAT_BYTE:	WriteCall(Pos_ReadIndex8_Byte);   break;
+		case FORMAT_USHORT:	WriteCall(Pos_ReadIndex8_UShort); break;
+		case FORMAT_SHORT:	WriteCall(Pos_ReadIndex8_Short);  break;
+		case FORMAT_FLOAT:	WriteCall(Pos_ReadIndex8_Float);  break;
+		default: _assert_(0); break;
+		}
+		break;
+	case INDEX16:
+		switch (m_VtxAttr.PosFormat) {
+		case FORMAT_UBYTE:	WriteCall(Pos_ReadIndex16_UByte);  break;
+		case FORMAT_BYTE:	WriteCall(Pos_ReadIndex16_Byte);   break;
+		case FORMAT_USHORT:	WriteCall(Pos_ReadIndex16_UShort); break;
+		case FORMAT_SHORT:	WriteCall(Pos_ReadIndex16_Short);  break;
+		case FORMAT_FLOAT:	WriteCall(Pos_ReadIndex16_Float);  break;
+		default: _assert_(0); break;
+		}
+		break;
+	}
 
-    // Normals
-    if (m_VtxDesc.Normal != NOT_PRESENT) {
-        VertexLoader_Normal::index3 = m_VtxAttr.NormalIndex3 ? true : false;
-        TPipelineFunction pFunc = VertexLoader_Normal::GetFunction(m_VtxDesc.Normal, m_VtxAttr.NormalFormat, m_VtxAttr.NormalElements);
-        if (pFunc == 0)
-        {
-            char temp[256];
-            sprintf(temp,"%i %i %i", m_VtxDesc.Normal, m_VtxAttr.NormalFormat, m_VtxAttr.NormalElements);
-            g_VideoInitialize.pSysMessage("VertexLoader_Normal::GetFunction returned zero!");
-        }
-        WriteCall(pFunc);
+	// Normals
+	if (m_VtxDesc.Normal != NOT_PRESENT) {
+		VertexLoader_Normal::index3 = m_VtxAttr.NormalIndex3 ? true : false;
+		TPipelineFunction pFunc = VertexLoader_Normal::GetFunction(m_VtxDesc.Normal, m_VtxAttr.NormalFormat, m_VtxAttr.NormalElements);
+		if (pFunc == 0)
+		{
+			char temp[256];
+			sprintf(temp,"%i %i %i", m_VtxDesc.Normal, m_VtxAttr.NormalFormat, m_VtxAttr.NormalElements);
+			g_VideoInitialize.pSysMessage("VertexLoader_Normal::GetFunction returned zero!");
+		}
+		WriteCall(pFunc);
 
-        int sizePro = 0;
-        switch (m_VtxAttr.NormalFormat)
-        {
-        case FORMAT_UBYTE:	sizePro=1; break;
-        case FORMAT_BYTE:	sizePro=1; break;
-        case FORMAT_USHORT:	sizePro=2; break;
-        case FORMAT_SHORT:	sizePro=2; break;
-        case FORMAT_FLOAT:	sizePro=4; break;
-        default: _assert_(0); break;
-        }
-        m_NativeFmt.m_VBVertexStride += sizePro * 3 * (m_VtxAttr.NormalElements?3:1);
+		int sizePro = 0;
+		switch (m_VtxAttr.NormalFormat)
+		{
+		case FORMAT_UBYTE:	sizePro=1; break;
+		case FORMAT_BYTE:	sizePro=1; break;
+		case FORMAT_USHORT:	sizePro=2; break;
+		case FORMAT_SHORT:	sizePro=2; break;
+		case FORMAT_FLOAT:	sizePro=4; break;
+		default: _assert_(0); break;
+		}
+		m_NativeFmt.m_VBVertexStride += sizePro * 3 * (m_VtxAttr.NormalElements?3:1);
 
-        int numNormals = (m_VtxAttr.NormalElements == 1) ? NRM_THREE : NRM_ONE;
-        m_NativeFmt.m_components |= VB_HAS_NRM0;
-        if (numNormals == NRM_THREE)
-            m_NativeFmt.m_components |= VB_HAS_NRM1 | VB_HAS_NRM2;
-    }
-    
-    // Colors
-    int col[2] = {m_VtxDesc.Color0, m_VtxDesc.Color1};
-    for (int i = 0; i < 2; i++) {
-        SetupColor(i, col[i], m_VtxAttr.color[i].Comp, m_VtxAttr.color[i].Elements);
+		int numNormals = (m_VtxAttr.NormalElements == 1) ? NRM_THREE : NRM_ONE;
+		m_NativeFmt.m_components |= VB_HAS_NRM0;
+		if (numNormals == NRM_THREE)
+			m_NativeFmt.m_components |= VB_HAS_NRM1 | VB_HAS_NRM2;
+	}
+	
+	// Colors
+	int col[2] = {m_VtxDesc.Color0, m_VtxDesc.Color1};
+	for (int i = 0; i < 2; i++) {
+		SetupColor(i, col[i], m_VtxAttr.color[i].Comp, m_VtxAttr.color[i].Elements);
 
-        if (col[i] != NOT_PRESENT)
-            m_NativeFmt.m_VBVertexStride += 4;
-    }
+		if (col[i] != NOT_PRESENT)
+			m_NativeFmt.m_VBVertexStride += 4;
+	}
 
-    // TextureCoord
-    int tc[8] = {
-        m_VtxDesc.Tex0Coord, m_VtxDesc.Tex1Coord, m_VtxDesc.Tex2Coord, m_VtxDesc.Tex3Coord,
-        m_VtxDesc.Tex4Coord, m_VtxDesc.Tex5Coord, m_VtxDesc.Tex6Coord, m_VtxDesc.Tex7Coord,
-    };
-    
-    // Texture matrix indices (remove if corresponding texture coordinate isn't enabled)
-    for (int i = 0; i < 8; i++) {
-        SetupTexCoord(i, tc[i], m_VtxAttr.texCoord[i].Format, m_VtxAttr.texCoord[i].Elements, m_VtxAttr.texCoord[i].Frac);
-        if (m_NativeFmt.m_components & (VB_HAS_TEXMTXIDX0 << i)) {
-            if (tc[i] != NOT_PRESENT) {
-                // if texmtx is included, texcoord will always be 3 floats, z will be the texmtx index
-                WriteCall(m_VtxAttr.texCoord[i].Elements ? TexMtx_Write_Float : TexMtx_Write_Float2);
-                m_NativeFmt.m_VBVertexStride += 12;
-            }
-            else {
-                WriteCall(TexMtx_Write_Short3);
-                m_NativeFmt.m_VBVertexStride += 6; // still include the texture coordinate, but this time as 6 bytes
-                m_NativeFmt.m_components |= VB_HAS_UV0 << i; // have to include since using now
-            }
-        }
-        else {
-            if (tc[i] != NOT_PRESENT)
-                m_NativeFmt.m_VBVertexStride += 4 * (m_VtxAttr.texCoord[i].Elements ? 2 : 1);
-        }
+	// TextureCoord
+	int tc[8] = {
+		m_VtxDesc.Tex0Coord, m_VtxDesc.Tex1Coord, m_VtxDesc.Tex2Coord, m_VtxDesc.Tex3Coord,
+		m_VtxDesc.Tex4Coord, m_VtxDesc.Tex5Coord, m_VtxDesc.Tex6Coord, m_VtxDesc.Tex7Coord,
+	};
+	
+	// Texture matrix indices (remove if corresponding texture coordinate isn't enabled)
+	for (int i = 0; i < 8; i++) {
+		SetupTexCoord(i, tc[i], m_VtxAttr.texCoord[i].Format, m_VtxAttr.texCoord[i].Elements, m_VtxAttr.texCoord[i].Frac);
+		if (m_NativeFmt.m_components & (VB_HAS_TEXMTXIDX0 << i)) {
+			if (tc[i] != NOT_PRESENT) {
+				// if texmtx is included, texcoord will always be 3 floats, z will be the texmtx index
+				WriteCall(m_VtxAttr.texCoord[i].Elements ? TexMtx_Write_Float : TexMtx_Write_Float2);
+				m_NativeFmt.m_VBVertexStride += 12;
+			}
+			else {
+				WriteCall(TexMtx_Write_Short3);
+				m_NativeFmt.m_VBVertexStride += 6; // still include the texture coordinate, but this time as 6 bytes
+				m_NativeFmt.m_components |= VB_HAS_UV0 << i; // have to include since using now
+			}
+		}
+		else {
+			if (tc[i] != NOT_PRESENT)
+				m_NativeFmt.m_VBVertexStride += 4 * (m_VtxAttr.texCoord[i].Elements ? 2 : 1);
+		}
 
-        if (tc[i] == NOT_PRESENT) {
-            // if there's more tex coords later, have to write a dummy call 
-            int j = i + 1;
-            for (; j < 8; ++j) {
-                if (tc[j] != NOT_PRESENT) {
-                    WriteCall(TexCoord_Read_Dummy); // important to get indices right!
-                    break;
-                }
-            }
-            if (j == 8 && !((m_NativeFmt.m_components&VB_HAS_TEXMTXIDXALL) & (VB_HAS_TEXMTXIDXALL<<(i+1)))) // no more tex coords and tex matrices, so exit loop
-                break;
-        }
-    }
+		if (tc[i] == NOT_PRESENT) {
+			// if there's more tex coords later, have to write a dummy call 
+			int j = i + 1;
+			for (; j < 8; ++j) {
+				if (tc[j] != NOT_PRESENT) {
+					WriteCall(TexCoord_Read_Dummy); // important to get indices right!
+					break;
+				}
+			}
+			if (j == 8 && !((m_NativeFmt.m_components&VB_HAS_TEXMTXIDXALL) & (VB_HAS_TEXMTXIDXALL<<(i+1)))) // no more tex coords and tex matrices, so exit loop
+				break;
+		}
+	}
 
-    if (m_VtxDesc.PosMatIdx) {
-        WriteCall(PosMtx_Write);
-        m_NativeFmt.m_VBVertexStride += 1;
-    }
+	if (m_VtxDesc.PosMatIdx) {
+		WriteCall(PosMtx_Write);
+		m_NativeFmt.m_VBVertexStride += 1;
+	}
 
 	m_NativeFmt.Initialize(m_VtxDesc, m_VtxAttr);
 }
 
 void VertexLoader::SetupColor(int num, int mode, int format, int elements)
 {
-    // if COL0 not present, then embed COL1 into COL0
-    if (num == 1 && !(m_NativeFmt.m_components & VB_HAS_COL0))
+	// if COL0 not present, then embed COL1 into COL0
+	if (num == 1 && !(m_NativeFmt.m_components & VB_HAS_COL0))
 		num = 0;
 
-    m_NativeFmt.m_components |= VB_HAS_COL0 << num;
-    switch (mode)
-    {
-    case NOT_PRESENT: 
-        m_NativeFmt.m_components &= ~(VB_HAS_COL0 << num);
-        break;
-    case DIRECT:
-        switch (format)
-        {
-        case FORMAT_16B_565:	WriteCall(Color_ReadDirect_16b_565); break;
-        case FORMAT_24B_888:	WriteCall(Color_ReadDirect_24b_888); break;
-        case FORMAT_32B_888x:	WriteCall(Color_ReadDirect_32b_888x); break;
-        case FORMAT_16B_4444:	WriteCall(Color_ReadDirect_16b_4444); break;
-        case FORMAT_24B_6666:	WriteCall(Color_ReadDirect_24b_6666); break;
-        case FORMAT_32B_8888:	WriteCall(Color_ReadDirect_32b_8888); break;
-        default: _assert_(0); break;
-        }
-        break;
-    case INDEX8:	
-        switch (format)
-        {
-        case FORMAT_16B_565:	WriteCall(Color_ReadIndex8_16b_565); break;
-        case FORMAT_24B_888:	WriteCall(Color_ReadIndex8_24b_888); break;
-        case FORMAT_32B_888x:	WriteCall(Color_ReadIndex8_32b_888x); break;
-        case FORMAT_16B_4444:	WriteCall(Color_ReadIndex8_16b_4444); break;
-        case FORMAT_24B_6666:	WriteCall(Color_ReadIndex8_24b_6666); break;
-        case FORMAT_32B_8888:	WriteCall(Color_ReadIndex8_32b_8888); break;
-        default: _assert_(0); break;
-        }
-        break;
-    case INDEX16:
-        switch (format)
-        {
-        case FORMAT_16B_565:	WriteCall(Color_ReadIndex16_16b_565); break;
-        case FORMAT_24B_888:	WriteCall(Color_ReadIndex16_24b_888); break;
-        case FORMAT_32B_888x:	WriteCall(Color_ReadIndex16_32b_888x); break;
-        case FORMAT_16B_4444:	WriteCall(Color_ReadIndex16_16b_4444); break;
-        case FORMAT_24B_6666:	WriteCall(Color_ReadIndex16_24b_6666); break;
-        case FORMAT_32B_8888:	WriteCall(Color_ReadIndex16_32b_8888); break;
-        default: _assert_(0); break;
-        }
-        break;
-    }
+	m_NativeFmt.m_components |= VB_HAS_COL0 << num;
+	switch (mode)
+	{
+	case NOT_PRESENT: 
+		m_NativeFmt.m_components &= ~(VB_HAS_COL0 << num);
+		break;
+	case DIRECT:
+		switch (format)
+		{
+		case FORMAT_16B_565:	WriteCall(Color_ReadDirect_16b_565); break;
+		case FORMAT_24B_888:	WriteCall(Color_ReadDirect_24b_888); break;
+		case FORMAT_32B_888x:	WriteCall(Color_ReadDirect_32b_888x); break;
+		case FORMAT_16B_4444:	WriteCall(Color_ReadDirect_16b_4444); break;
+		case FORMAT_24B_6666:	WriteCall(Color_ReadDirect_24b_6666); break;
+		case FORMAT_32B_8888:	WriteCall(Color_ReadDirect_32b_8888); break;
+		default: _assert_(0); break;
+		}
+		break;
+	case INDEX8:	
+		switch (format)
+		{
+		case FORMAT_16B_565:	WriteCall(Color_ReadIndex8_16b_565); break;
+		case FORMAT_24B_888:	WriteCall(Color_ReadIndex8_24b_888); break;
+		case FORMAT_32B_888x:	WriteCall(Color_ReadIndex8_32b_888x); break;
+		case FORMAT_16B_4444:	WriteCall(Color_ReadIndex8_16b_4444); break;
+		case FORMAT_24B_6666:	WriteCall(Color_ReadIndex8_24b_6666); break;
+		case FORMAT_32B_8888:	WriteCall(Color_ReadIndex8_32b_8888); break;
+		default: _assert_(0); break;
+		}
+		break;
+	case INDEX16:
+		switch (format)
+		{
+		case FORMAT_16B_565:	WriteCall(Color_ReadIndex16_16b_565); break;
+		case FORMAT_24B_888:	WriteCall(Color_ReadIndex16_24b_888); break;
+		case FORMAT_32B_888x:	WriteCall(Color_ReadIndex16_32b_888x); break;
+		case FORMAT_16B_4444:	WriteCall(Color_ReadIndex16_16b_4444); break;
+		case FORMAT_24B_6666:	WriteCall(Color_ReadIndex16_24b_6666); break;
+		case FORMAT_32B_8888:	WriteCall(Color_ReadIndex16_32b_8888); break;
+		default: _assert_(0); break;
+		}
+		break;
+	}
 }
 
 void VertexLoader::SetupTexCoord(int num, int mode, int format, int elements, int _iFrac)
 {
-    m_NativeFmt.m_components |= VB_HAS_UV0 << num;
-    
-    switch (mode)
-    {
-    case NOT_PRESENT: 
-        m_NativeFmt.m_components &= ~(VB_HAS_UV0 << num);
-        break;
-    case DIRECT:
-        switch (format)
-        {
-        case FORMAT_UBYTE:	WriteCall(elements?TexCoord_ReadDirect_UByte2:TexCoord_ReadDirect_UByte1);  break;
-        case FORMAT_BYTE:	WriteCall(elements?TexCoord_ReadDirect_Byte2:TexCoord_ReadDirect_Byte1);   break;
-        case FORMAT_USHORT:	WriteCall(elements?TexCoord_ReadDirect_UShort2:TexCoord_ReadDirect_UShort1); break;
-        case FORMAT_SHORT:	WriteCall(elements?TexCoord_ReadDirect_Short2:TexCoord_ReadDirect_Short1);  break;
-        case FORMAT_FLOAT:	WriteCall(elements?TexCoord_ReadDirect_Float2:TexCoord_ReadDirect_Float1);  break;
-        default: _assert_(0); break;
-        }
-        break;
-    case INDEX8:	
-        switch (format)
-        {
-        case FORMAT_UBYTE:	WriteCall(elements?TexCoord_ReadIndex8_UByte2:TexCoord_ReadIndex8_UByte1);  break;
-        case FORMAT_BYTE:	WriteCall(elements?TexCoord_ReadIndex8_Byte2:TexCoord_ReadIndex8_Byte1);   break;
-        case FORMAT_USHORT:	WriteCall(elements?TexCoord_ReadIndex8_UShort2:TexCoord_ReadIndex8_UShort1); break;
-        case FORMAT_SHORT:	WriteCall(elements?TexCoord_ReadIndex8_Short2:TexCoord_ReadIndex8_Short1);  break;
-        case FORMAT_FLOAT:	WriteCall(elements?TexCoord_ReadIndex8_Float2:TexCoord_ReadIndex8_Float1);  break;
-        default: _assert_(0); break;
-        }
-        break;
-    case INDEX16:
-        switch (format)
-        {
-        case FORMAT_UBYTE:	WriteCall(elements?TexCoord_ReadIndex16_UByte2:TexCoord_ReadIndex16_UByte1);  break;
-        case FORMAT_BYTE:	WriteCall(elements?TexCoord_ReadIndex16_Byte2:TexCoord_ReadIndex16_Byte1);   break;
-        case FORMAT_USHORT:	WriteCall(elements?TexCoord_ReadIndex16_UShort2:TexCoord_ReadIndex16_UShort1); break;
-        case FORMAT_SHORT:	WriteCall(elements?TexCoord_ReadIndex16_Short2:TexCoord_ReadIndex16_Short1);  break;
-        case FORMAT_FLOAT:	WriteCall(elements?TexCoord_ReadIndex16_Float2:TexCoord_ReadIndex16_Float1);  break;
-        default: _assert_(0);
-        }
-        break;
-    }
+	m_NativeFmt.m_components |= VB_HAS_UV0 << num;
+	
+	switch (mode)
+	{
+	case NOT_PRESENT: 
+		m_NativeFmt.m_components &= ~(VB_HAS_UV0 << num);
+		break;
+	case DIRECT:
+		switch (format)
+		{
+		case FORMAT_UBYTE:	WriteCall(elements?TexCoord_ReadDirect_UByte2:TexCoord_ReadDirect_UByte1);  break;
+		case FORMAT_BYTE:	WriteCall(elements?TexCoord_ReadDirect_Byte2:TexCoord_ReadDirect_Byte1);   break;
+		case FORMAT_USHORT:	WriteCall(elements?TexCoord_ReadDirect_UShort2:TexCoord_ReadDirect_UShort1); break;
+		case FORMAT_SHORT:	WriteCall(elements?TexCoord_ReadDirect_Short2:TexCoord_ReadDirect_Short1);  break;
+		case FORMAT_FLOAT:	WriteCall(elements?TexCoord_ReadDirect_Float2:TexCoord_ReadDirect_Float1);  break;
+		default: _assert_(0); break;
+		}
+		break;
+	case INDEX8:	
+		switch (format)
+		{
+		case FORMAT_UBYTE:	WriteCall(elements?TexCoord_ReadIndex8_UByte2:TexCoord_ReadIndex8_UByte1);  break;
+		case FORMAT_BYTE:	WriteCall(elements?TexCoord_ReadIndex8_Byte2:TexCoord_ReadIndex8_Byte1);   break;
+		case FORMAT_USHORT:	WriteCall(elements?TexCoord_ReadIndex8_UShort2:TexCoord_ReadIndex8_UShort1); break;
+		case FORMAT_SHORT:	WriteCall(elements?TexCoord_ReadIndex8_Short2:TexCoord_ReadIndex8_Short1);  break;
+		case FORMAT_FLOAT:	WriteCall(elements?TexCoord_ReadIndex8_Float2:TexCoord_ReadIndex8_Float1);  break;
+		default: _assert_(0); break;
+		}
+		break;
+	case INDEX16:
+		switch (format)
+		{
+		case FORMAT_UBYTE:	WriteCall(elements?TexCoord_ReadIndex16_UByte2:TexCoord_ReadIndex16_UByte1);  break;
+		case FORMAT_BYTE:	WriteCall(elements?TexCoord_ReadIndex16_Byte2:TexCoord_ReadIndex16_Byte1);   break;
+		case FORMAT_USHORT:	WriteCall(elements?TexCoord_ReadIndex16_UShort2:TexCoord_ReadIndex16_UShort1); break;
+		case FORMAT_SHORT:	WriteCall(elements?TexCoord_ReadIndex16_Short2:TexCoord_ReadIndex16_Short1);  break;
+		case FORMAT_FLOAT:	WriteCall(elements?TexCoord_ReadIndex16_Float2:TexCoord_ReadIndex16_Float1);  break;
+		default: _assert_(0);
+		}
+		break;
+	}
 }
 
 void VertexLoader::WriteCall(TPipelineFunction func)
@@ -514,194 +476,176 @@ void VertexLoader::WriteCall(TPipelineFunction func)
 	m_PipelineStages[m_numPipelineStages++] = func;
 }
 
-void VertexLoader::RunVertices(int primitive, int count)
+void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)
 {
-    DVSTARTPROFILE();
+	DVSTARTPROFILE();
 
 	// Flush if our vertex format is different from the currently set.
-	// TODO - this check should be moved.
-    if (g_nativeVertexFmt != NULL && g_nativeVertexFmt != &m_NativeFmt)
+	if (g_nativeVertexFmt != NULL && g_nativeVertexFmt != &m_NativeFmt)
 	{
-        VertexManager::Flush();
+		VertexManager::Flush();
 		// Also move the Set() here?
 	}
 	g_nativeVertexFmt = &m_NativeFmt;
 
-	// This has dirty handling - won't actually recompute unless necessary.
-	ComputeVertexSize();
-
-    if (bpmem.genMode.cullmode == 3 && primitive < 5)
+	if (bpmem.genMode.cullmode == 3 && primitive < 5)
 	{
-        // if cull mode is none, ignore triangles and quads
+		// if cull mode is none, ignore triangles and quads
 		DataSkip(count * m_VertexSize);
-        return;
-    }
-
-	// This has dirty handling - won't actually recompute unless necessary.
-    CompileVertexTranslator();
+		return;
+	}
 
 	VertexManager::EnableComponents(m_NativeFmt.m_components);
 
-    // Load position and texcoord scale factors.
-	// Hm, this could be done when the VtxAttr is set, instead.
+	// Load position and texcoord scale factors.
+	// TODO - figure out if we should leave these independent, or compile them into
+	// the vertexloaders.
+	m_VtxAttr.PosFrac				= g_VtxAttr[vtx_attr_group].g0.PosFrac;
+	m_VtxAttr.texCoord[0].Frac		= g_VtxAttr[vtx_attr_group].g0.Tex0Frac;
+	m_VtxAttr.texCoord[1].Frac		= g_VtxAttr[vtx_attr_group].g1.Tex1Frac;
+	m_VtxAttr.texCoord[2].Frac		= g_VtxAttr[vtx_attr_group].g1.Tex2Frac;
+	m_VtxAttr.texCoord[3].Frac      = g_VtxAttr[vtx_attr_group].g1.Tex3Frac;
+	m_VtxAttr.texCoord[4].Frac		= g_VtxAttr[vtx_attr_group].g2.Tex4Frac;
+	m_VtxAttr.texCoord[5].Frac		= g_VtxAttr[vtx_attr_group].g2.Tex5Frac;
+	m_VtxAttr.texCoord[6].Frac		= g_VtxAttr[vtx_attr_group].g2.Tex6Frac;
+	m_VtxAttr.texCoord[7].Frac		= g_VtxAttr[vtx_attr_group].g2.Tex7Frac;
+
 	posScale = shiftLookup[m_VtxAttr.PosFrac];
-    if (m_NativeFmt.m_components & VB_HAS_UVALL) {
-        for (int i = 0; i < 8; i++) {
-            tcScaleU[i] = shiftLookup[m_VtxAttr.texCoord[i].Frac];
-            tcScaleV[i] = shiftLookup[m_VtxAttr.texCoord[i].Frac];
-        }
-    }
-    for (int i = 0; i < 2; i++)
-        colElements[i] = m_VtxAttr.color[i].Elements;
+	if (m_NativeFmt.m_components & VB_HAS_UVALL) {
+		for (int i = 0; i < 8; i++) {
+			tcScaleU[i] = shiftLookup[m_VtxAttr.texCoord[i].Frac];
+			tcScaleV[i] = shiftLookup[m_VtxAttr.texCoord[i].Frac];
+		}
+	}
+	for (int i = 0; i < 2; i++)
+		colElements[i] = m_VtxAttr.color[i].Elements;
 
-    // if strips or fans, make sure all vertices can fit in buffer, otherwise flush
-    int granularity = 1;
-    switch (primitive) {
-        case 3: // strip
-        case 4: // fan
-            if (VertexManager::GetRemainingSize() < 3 * m_NativeFmt.m_VBVertexStride )
-                VertexManager::Flush();
-            break;
-        case 6: // line strip
-            if (VertexManager::GetRemainingSize() < 2 * m_NativeFmt.m_VBVertexStride )
-                VertexManager::Flush();
-            break;
-        case 0: // quads
-            granularity = 4;
-            break;
-        case 2: // tris
-            granularity = 3;
-            break;
-        case 5: // lines
-            granularity = 2;
-            break;
-    }
+	// if strips or fans, make sure all vertices can fit in buffer, otherwise flush
+	int granularity = 1;
+	switch (primitive) {
+		case 3: // strip
+		case 4: // fan
+			if (VertexManager::GetRemainingSize() < 3 * m_NativeFmt.m_VBVertexStride)
+				VertexManager::Flush();
+			break;
+		case 6: // line strip
+			if (VertexManager::GetRemainingSize() < 2 * m_NativeFmt.m_VBVertexStride)
+				VertexManager::Flush();
+			break;
+		case 0: // quads
+			granularity = 4;
+			break;
+		case 2: // tris
+			granularity = 3;
+			break;
+		case 5: // lines
+			granularity = 2;
+			break;
+	}
 
-    int startv = 0, extraverts = 0;
-    for (int v = 0; v < count; v++)
+	int startv = 0, extraverts = 0;
+	for (int v = 0; v < count; v++)
 	{
-        if ((v % granularity) == 0)
+		if ((v % granularity) == 0)
 		{
-            if (VertexManager::GetRemainingSize() < granularity*m_NativeFmt.m_VBVertexStride) {
+			if (VertexManager::GetRemainingSize() < granularity*m_NativeFmt.m_VBVertexStride) {
 				// This buffer full - break current primitive and flush, to switch to the next buffer.
-                u8* plastptr = VertexManager::s_pCurBufferPointer;
-                if (v - startv > 0)
-                    VertexManager::AddVertices(primitive, v - startv + extraverts);
-                VertexManager::Flush();
+				u8* plastptr = VertexManager::s_pCurBufferPointer;
+				if (v - startv > 0)
+					VertexManager::AddVertices(primitive, v - startv + extraverts);
+				VertexManager::Flush();
 				// Why does this need to be so complicated?
-                switch (primitive) {
-                    case 3: // triangle strip, copy last two vertices
-                        // a little trick since we have to keep track of signs
-                        if (v & 1) {
-                            memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-2*m_NativeFmt.m_VBVertexStride, m_NativeFmt.m_VBVertexStride);
-                            memcpy_gc(VertexManager::s_pCurBufferPointer+m_NativeFmt.m_VBVertexStride, plastptr-m_NativeFmt.m_VBVertexStride*2, 2*m_NativeFmt.m_VBVertexStride);
-                            VertexManager::s_pCurBufferPointer += m_NativeFmt.m_VBVertexStride*3;
-                            extraverts = 3;
-                        }
-                        else {
-                            memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-m_NativeFmt.m_VBVertexStride*2, m_NativeFmt.m_VBVertexStride*2);
-                            VertexManager::s_pCurBufferPointer += m_NativeFmt.m_VBVertexStride*2;
-                            extraverts = 2;
-                        }
-                        break;
-                    case 4: // tri fan, copy first and last vert
-                        memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-m_NativeFmt.m_VBVertexStride*(v-startv+extraverts), m_NativeFmt.m_VBVertexStride);
-                        VertexManager::s_pCurBufferPointer += m_NativeFmt.m_VBVertexStride;
-                        memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-m_NativeFmt.m_VBVertexStride, m_NativeFmt.m_VBVertexStride);
-                        VertexManager::s_pCurBufferPointer += m_NativeFmt.m_VBVertexStride;
-                        extraverts = 2;
-                        break;
-                    case 6: // line strip
-                        memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-m_NativeFmt.m_VBVertexStride, m_NativeFmt.m_VBVertexStride);
-                        VertexManager::s_pCurBufferPointer += m_NativeFmt.m_VBVertexStride;
-                        extraverts = 1;
-                        break;
-                    default:
-                        extraverts = 0;
+				switch (primitive) {
+					case 3: // triangle strip, copy last two vertices
+						// a little trick since we have to keep track of signs
+						if (v & 1) {
+							memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-2*m_NativeFmt.m_VBVertexStride, m_NativeFmt.m_VBVertexStride);
+							memcpy_gc(VertexManager::s_pCurBufferPointer+m_NativeFmt.m_VBVertexStride, plastptr-m_NativeFmt.m_VBVertexStride*2, 2*m_NativeFmt.m_VBVertexStride);
+							VertexManager::s_pCurBufferPointer += m_NativeFmt.m_VBVertexStride*3;
+							extraverts = 3;
+						}
+						else {
+							memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-m_NativeFmt.m_VBVertexStride*2, m_NativeFmt.m_VBVertexStride*2);
+							VertexManager::s_pCurBufferPointer += m_NativeFmt.m_VBVertexStride*2;
+							extraverts = 2;
+						}
 						break;
-                }
-                startv = v;
-            }
-        }
-        tcIndex = 0;
-        colIndex = 0;
-        s_texmtxwrite = s_texmtxread = 0;
+					case 4: // tri fan, copy first and last vert
+						memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-m_NativeFmt.m_VBVertexStride*(v-startv+extraverts), m_NativeFmt.m_VBVertexStride);
+						VertexManager::s_pCurBufferPointer += m_NativeFmt.m_VBVertexStride;
+						memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-m_NativeFmt.m_VBVertexStride, m_NativeFmt.m_VBVertexStride);
+						VertexManager::s_pCurBufferPointer += m_NativeFmt.m_VBVertexStride;
+						extraverts = 2;
+						break;
+					case 6: // line strip
+						memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-m_NativeFmt.m_VBVertexStride, m_NativeFmt.m_VBVertexStride);
+						VertexManager::s_pCurBufferPointer += m_NativeFmt.m_VBVertexStride;
+						extraverts = 1;
+						break;
+					default:
+						extraverts = 0;
+						break;
+				}
+				startv = v;
+			}
+		}
+		tcIndex = 0;
+		colIndex = 0;
+		s_texmtxwrite = s_texmtxread = 0;
 
-		RunPipelineOnce();
+		for (int i = 0; i < m_numPipelineStages; i++)
+			m_PipelineStages[i](&m_VtxAttr);
 
-        VertexManager::s_pCurBufferPointer += m_NativeFmt.m_VBStridePad;
-        PRIM_LOG("\n");
-    }
+		VertexManager::s_pCurBufferPointer += m_NativeFmt.m_VBStridePad;
+		PRIM_LOG("\n");
+	}
 
-    if (startv < count)
-        VertexManager::AddVertices(primitive, count - startv + extraverts);
+	if (startv < count)
+		VertexManager::AddVertices(primitive, count - startv + extraverts);
 }
 
-void VertexLoader::RunPipelineOnce() const
+void VertexLoader::SetVAT(u32 _group0, u32 _group1, u32 _group2) 
 {
-	for (int i = 0; i < m_numPipelineStages; i++)
-		m_PipelineStages[i](&m_VtxAttr);
-}
+	VAT vat;
+	vat.g0.Hex = _group0;
+	vat.g1.Hex = _group1;
+	vat.g2.Hex = _group2;
 
-void VertexLoader::SetVAT_group0(u32 _group0) 
-{
-	// ignore frac bits - we don't need to recompute if all that's changed was the frac bits.
-    if ((m_group0.Hex & ~VAT_0_FRACBITS) != (_group0 & ~VAT_0_FRACBITS)) {
-        m_AttrDirty = AD_VAT_DIRTY;
-    }
-    m_group0.Hex = _group0;
+	m_VtxAttr.PosElements			= vat.g0.PosElements;
+	m_VtxAttr.PosFormat				= vat.g0.PosFormat;
+	m_VtxAttr.PosFrac				= vat.g0.PosFrac;
+	m_VtxAttr.NormalElements		= vat.g0.NormalElements;
+	m_VtxAttr.NormalFormat			= vat.g0.NormalFormat;
+	m_VtxAttr.color[0].Elements		= vat.g0.Color0Elements;
+	m_VtxAttr.color[0].Comp			= vat.g0.Color0Comp;
+	m_VtxAttr.color[1].Elements		= vat.g0.Color1Elements;
+	m_VtxAttr.color[1].Comp			= vat.g0.Color1Comp;
+	m_VtxAttr.texCoord[0].Elements	= vat.g0.Tex0CoordElements;
+	m_VtxAttr.texCoord[0].Format	= vat.g0.Tex0CoordFormat;
+	m_VtxAttr.texCoord[0].Frac		= vat.g0.Tex0Frac;
+	m_VtxAttr.ByteDequant			= vat.g0.ByteDequant;
+	m_VtxAttr.NormalIndex3			= vat.g0.NormalIndex3;
 
-    m_VtxAttr.PosElements			= m_group0.PosElements;
-    m_VtxAttr.PosFormat				= m_group0.PosFormat;
-    m_VtxAttr.PosFrac				= m_group0.PosFrac;
-    m_VtxAttr.NormalElements		= m_group0.NormalElements;
-    m_VtxAttr.NormalFormat			= m_group0.NormalFormat;
-    m_VtxAttr.color[0].Elements		= m_group0.Color0Elements;
-    m_VtxAttr.color[0].Comp			= m_group0.Color0Comp;
-    m_VtxAttr.color[1].Elements		= m_group0.Color1Elements;
-    m_VtxAttr.color[1].Comp			= m_group0.Color1Comp;
-    m_VtxAttr.texCoord[0].Elements	= m_group0.Tex0CoordElements;
-    m_VtxAttr.texCoord[0].Format	= m_group0.Tex0CoordFormat;
-    m_VtxAttr.texCoord[0].Frac		= m_group0.Tex0Frac;
-    m_VtxAttr.ByteDequant			= m_group0.ByteDequant;
-    m_VtxAttr.NormalIndex3			= m_group0.NormalIndex3;
-};
-
-void VertexLoader::SetVAT_group1(u32 _group1) 
-{
-    if ((m_group1.Hex & ~VAT_1_FRACBITS) != (_group1 & ~VAT_1_FRACBITS)) {
-        m_AttrDirty = AD_VAT_DIRTY;
-    }
-    m_group1.Hex = _group1;
-
-    m_VtxAttr.texCoord[1].Elements	= m_group1.Tex1CoordElements;
-    m_VtxAttr.texCoord[1].Format	= m_group1.Tex1CoordFormat;
-    m_VtxAttr.texCoord[1].Frac		= m_group1.Tex1Frac;
-    m_VtxAttr.texCoord[2].Elements	= m_group1.Tex2CoordElements;
-    m_VtxAttr.texCoord[2].Format	= m_group1.Tex2CoordFormat;
-    m_VtxAttr.texCoord[2].Frac		= m_group1.Tex2Frac;
-    m_VtxAttr.texCoord[3].Elements	= m_group1.Tex3CoordElements;
-    m_VtxAttr.texCoord[3].Format	= m_group1.Tex3CoordFormat;
-    m_VtxAttr.texCoord[3].Frac      = m_group1.Tex3Frac;
-    m_VtxAttr.texCoord[4].Elements	= m_group1.Tex4CoordElements;
-    m_VtxAttr.texCoord[4].Format	= m_group1.Tex4CoordFormat;
-};									  
-                                      
-void VertexLoader::SetVAT_group2(u32 _group2)		  
-{
-    if ((m_group2.Hex & ~VAT_2_FRACBITS) != (_group2 & ~VAT_2_FRACBITS)) {
-        m_AttrDirty = AD_VAT_DIRTY;
-    }
-    m_group2.Hex = _group2;
-
-    m_VtxAttr.texCoord[4].Frac		= m_group2.Tex4Frac;
-    m_VtxAttr.texCoord[5].Elements	= m_group2.Tex5CoordElements;
-    m_VtxAttr.texCoord[5].Format	= m_group2.Tex5CoordFormat;
-    m_VtxAttr.texCoord[5].Frac		= m_group2.Tex5Frac;
-    m_VtxAttr.texCoord[6].Elements	= m_group2.Tex6CoordElements;
-    m_VtxAttr.texCoord[6].Format	= m_group2.Tex6CoordFormat;
-    m_VtxAttr.texCoord[6].Frac		= m_group2.Tex6Frac;
-    m_VtxAttr.texCoord[7].Elements	= m_group2.Tex7CoordElements;
-    m_VtxAttr.texCoord[7].Format	= m_group2.Tex7CoordFormat;
-    m_VtxAttr.texCoord[7].Frac		= m_group2.Tex7Frac;
+	m_VtxAttr.texCoord[1].Elements	= vat.g1.Tex1CoordElements;
+	m_VtxAttr.texCoord[1].Format	= vat.g1.Tex1CoordFormat;
+	m_VtxAttr.texCoord[1].Frac		= vat.g1.Tex1Frac;
+	m_VtxAttr.texCoord[2].Elements	= vat.g1.Tex2CoordElements;
+	m_VtxAttr.texCoord[2].Format	= vat.g1.Tex2CoordFormat;
+	m_VtxAttr.texCoord[2].Frac		= vat.g1.Tex2Frac;
+	m_VtxAttr.texCoord[3].Elements	= vat.g1.Tex3CoordElements;
+	m_VtxAttr.texCoord[3].Format	= vat.g1.Tex3CoordFormat;
+	m_VtxAttr.texCoord[3].Frac      = vat.g1.Tex3Frac;
+	m_VtxAttr.texCoord[4].Elements	= vat.g1.Tex4CoordElements;
+	m_VtxAttr.texCoord[4].Format	= vat.g1.Tex4CoordFormat;
+
+	m_VtxAttr.texCoord[4].Frac		= vat.g2.Tex4Frac;
+	m_VtxAttr.texCoord[5].Elements	= vat.g2.Tex5CoordElements;
+	m_VtxAttr.texCoord[5].Format	= vat.g2.Tex5CoordFormat;
+	m_VtxAttr.texCoord[5].Frac		= vat.g2.Tex5Frac;
+	m_VtxAttr.texCoord[6].Elements	= vat.g2.Tex6CoordElements;
+	m_VtxAttr.texCoord[6].Format	= vat.g2.Tex6CoordFormat;
+	m_VtxAttr.texCoord[6].Frac		= vat.g2.Tex6Frac;
+	m_VtxAttr.texCoord[7].Elements	= vat.g2.Tex7CoordElements;
+	m_VtxAttr.texCoord[7].Format	= vat.g2.Tex7CoordFormat;
+	m_VtxAttr.texCoord[7].Frac		= vat.g2.Tex7Frac;
 };
diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader.h b/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader.h
index 4d2dd9ed7f..7e31c4ab5c 100644
--- a/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader.h
+++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader.h
@@ -23,66 +23,72 @@
 
 #include "NativeVertexFormat.h"
 
+class VertexLoaderUID
+{
+	u32 id[5];
+public:
+	VertexLoaderUID() {}
+	void InitFromCurrentState(int vtx_attr_group) {
+		id[0] = g_VtxDesc.Hex & 0xFFFFFFFF;
+		id[1] = g_VtxDesc.Hex >> 32;
+		id[2] = g_VtxAttr[vtx_attr_group].g0.Hex & ~VAT_0_FRACBITS;
+		id[3] = g_VtxAttr[vtx_attr_group].g1.Hex & ~VAT_1_FRACBITS;
+		id[4] = g_VtxAttr[vtx_attr_group].g2.Hex & ~VAT_2_FRACBITS;
+	}
+	bool operator < (const VertexLoaderUID &other) const {
+		if (id[0] < other.id[0])
+			return true;
+		else if (id[0] > other.id[0])
+			return false;
+		for (int i = 1; i < 5; ++i) {
+			if (id[i] < other.id[i])
+				return true;
+			else if (id[i] > other.id[i])
+				return false;
+		}
+		return false;
+	}
+};
+
 class VertexLoader
 {
 public:
-    enum
-    {
-        NRM_ZERO = 0,
-        NRM_ONE = 1,
-        NRM_THREE = 3
-    };
+	VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr);
+	~VertexLoader();
+
+	int GetVertexSize() const {return m_VertexSize;}
+	void RunVertices(int vtx_attr_group, int primitive, int count);
 
 private:
-	// The 3 possible values (0, 1, 2) should be documented here.
-	enum {
-		AD_CLEAN = 0,
-		AD_DIRTY = 1,
-		AD_VAT_DIRTY = 2,
-	} m_AttrDirty;
+	enum
+	{
+		NRM_ZERO = 0,
+		NRM_ONE = 1,
+		NRM_THREE = 3,
+	};
 
-    int m_VertexSize;      // number of bytes of a raw GC vertex
+	int m_VertexSize;      // number of bytes of a raw GC vertex
 
-	// Flipper vertex format
+	// GC vertex format
+	TVtxAttr m_VtxAttr;  // VAT decoded into easy format
+	TVtxDesc m_VtxDesc;  // Not really used currently - or well it is, but could be easily avoided.
 
-	// Raw VAttr
-    UVAT_group0 m_group0;
-    UVAT_group1 m_group1;
-    UVAT_group2 m_group2;
-    TVtxAttr m_VtxAttr;  // Decoded into easy format
-
-    // Vtx desc
-    TVtxDesc m_VtxDesc;
-
-	// PC vertex format, + converter
+	// PC vertex format
 	NativeVertexFormat m_NativeFmt;
 
 	// Pipeline. To be JIT compiled in the future.
-	TPipelineFunction m_PipelineStages[32];
+	TPipelineFunction m_PipelineStages[32];  // TODO - figure out real max. it's lower.
 	int m_numPipelineStages;
 
-    void SetupColor(int num, int _iMode, int _iFormat, int _iElements);
-    void SetupTexCoord(int num, int _iMode, int _iFormat, int _iElements, int _iFrac);
-	void RunPipelineOnce() const;
+	void SetupColor(int num, int _iMode, int _iFormat, int _iElements);
+	void SetupTexCoord(int num, int _iMode, int _iFormat, int _iElements, int _iFrac);
 
-public:
-    // constructor
-    VertexLoader();
-    ~VertexLoader();
-    
-    // run the pipeline 
-    void CompileVertexTranslator();
-    void RunVertices(int primitive, int count);
-    void WriteCall(TPipelineFunction);
-    
-    int GetGCVertexSize()   const { _assert_( !m_AttrDirty ); return m_VertexSize; }
-    int GetVBVertexStride() const { _assert_( !m_AttrDirty);  return m_NativeFmt.m_VBVertexStride; }
+	void SetVAT(u32 _group0, u32 _group1, u32 _group2);
 
-    int ComputeVertexSize();
+	int ComputeVertexSize();
+	void CompileVertexTranslator();
 
-    void SetVAT_group0(u32 _group0);
-    void SetVAT_group1(u32 _group1);       
-    void SetVAT_group2(u32 _group2);
+	void WriteCall(TPipelineFunction);
 };									  
 
 #endif
diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexLoaderManager.cpp b/Source/Plugins/Plugin_VideoOGL/Src/VertexLoaderManager.cpp
index fac7b1d7cd..0b44f294ba 100644
--- a/Source/Plugins/Plugin_VideoOGL/Src/VertexLoaderManager.cpp
+++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexLoaderManager.cpp
@@ -15,43 +15,78 @@
 // Official SVN repository and contact information can be found at
 // http://code.google.com/p/dolphin-emu/
 
+#include <map>
+
+#include "Statistics.h"
+
 #include "VertexShaderManager.h"
 #include "VertexLoader.h"
 #include "VertexLoaderManager.h"
 
-static bool s_desc_dirty;
-static bool s_attr_dirty[8];
+static int s_attr_dirty;  // bitfield
 
-// TODO - change into array of pointers. Keep a map of all seen so far.
-static VertexLoader g_VertexLoaders[8];
+static VertexLoader *g_VertexLoaders[8];
 
 namespace VertexLoaderManager
 {
 
+typedef std::map<VertexLoaderUID, VertexLoader *> VertexLoaderMap;
+static VertexLoaderMap g_VertexLoaderMap;
+// TODO - change into array of pointers. Keep a map of all seen so far.
+
 void Init()
 {
-	s_desc_dirty = false;
-	for (int i = 0; i < 8; i++)
-		s_attr_dirty[i] = false;
+	MarkAllDirty();
 }
 
 void Shutdown()
 {
+	for (VertexLoaderMap::iterator iter = g_VertexLoaderMap.begin(); iter != g_VertexLoaderMap.end(); ++iter)
+	{
+		delete iter->second;
+	}
+	g_VertexLoaderMap.clear();
+}
 
+void MarkAllDirty()
+{
+	s_attr_dirty = 0xff;
+}
+
+void RefreshLoader(int vtx_attr_group)
+{
+	if (((s_attr_dirty >> vtx_attr_group) & 1) || !g_VertexLoaders[vtx_attr_group])
+	{
+		VertexLoaderUID uid;
+		uid.InitFromCurrentState(vtx_attr_group);
+		VertexLoaderMap::iterator iter = g_VertexLoaderMap.find(uid);
+		if (iter != g_VertexLoaderMap.end())
+		{
+			g_VertexLoaders[vtx_attr_group] = iter->second;
+		}
+		else
+		{
+			VertexLoader *loader = new VertexLoader(g_VtxDesc, g_VtxAttr[vtx_attr_group]);
+			g_VertexLoaderMap[uid] = loader;
+			g_VertexLoaders[vtx_attr_group] = loader;
+			INCSTAT(stats.numVertexLoaders);
+		}
+	}
+	s_attr_dirty &= ~(1 << vtx_attr_group);
 }
 
 void RunVertices(int vtx_attr_group, int primitive, int count)
 {
 	if (!count)
 		return;
-	// TODO - grab & load the correct vertex loader if anything is dirty.
-	g_VertexLoaders[vtx_attr_group].RunVertices(primitive, count);
+	RefreshLoader(vtx_attr_group);
+	g_VertexLoaders[vtx_attr_group]->RunVertices(vtx_attr_group, primitive, count);
 }
 
 int GetVertexSize(int vtx_attr_group)
 {
-	// The vertex loaders will soon cache the vertex size.
-	return g_VertexLoaders[vtx_attr_group].ComputeVertexSize();
+	RefreshLoader(vtx_attr_group);
+	return g_VertexLoaders[vtx_attr_group]->GetVertexSize();
 }
 
 }  // namespace
@@ -71,34 +106,31 @@ void LoadCPReg(u32 sub_cmd, u32 value)
 	case 0x50:
 		g_VtxDesc.Hex &= ~0x1FFFF;  // keep the Upper bits
 		g_VtxDesc.Hex |= value;
-		s_desc_dirty = true;
+		s_attr_dirty = 0xFF;
 		break;
 
 	case 0x60:
 		g_VtxDesc.Hex &= 0x1FFFF;  // keep the lower 17Bits
 		g_VtxDesc.Hex |= (u64)value << 17;
-		s_desc_dirty = true;
+		s_attr_dirty = 0xFF;
 		break;
 
 	case 0x70:
 		_assert_((sub_cmd & 0x0F) < 8);
 		g_VtxAttr[sub_cmd & 7].g0.Hex = value;
-		g_VertexLoaders[sub_cmd & 7].SetVAT_group0(value);
-		s_attr_dirty[sub_cmd & 7] = true;
+		s_attr_dirty |= 1 << (sub_cmd & 7);
 		break;
 
 	case 0x80:
 		_assert_((sub_cmd & 0x0F) < 8);
 		g_VtxAttr[sub_cmd & 7].g1.Hex = value;
-		g_VertexLoaders[sub_cmd & 7].SetVAT_group1(value);
-		s_attr_dirty[sub_cmd & 7] = true;
+		s_attr_dirty |= 1 << (sub_cmd & 7);
 		break;
 
 	case 0x90:
 		_assert_((sub_cmd & 0x0F) < 8);
 		g_VtxAttr[sub_cmd & 7].g2.Hex = value;
-		g_VertexLoaders[sub_cmd & 7].SetVAT_group2(value);
-		s_attr_dirty[sub_cmd & 7] = true;
+		s_attr_dirty |= 1 << (sub_cmd & 7);
 		break;
 
 	// Pointers to vertex arrays in GC RAM
diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexLoaderManager.h b/Source/Plugins/Plugin_VideoOGL/Src/VertexLoaderManager.h
index 59028f5891..88611707ed 100644
--- a/Source/Plugins/Plugin_VideoOGL/Src/VertexLoaderManager.h
+++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexLoaderManager.h
@@ -25,6 +25,8 @@ namespace VertexLoaderManager
     void Init();
     void Shutdown();
 
+	void MarkAllDirty();
+
 	int GetVertexSize(int vtx_attr_group);
     void RunVertices(int vtx_attr_group, int primitive, int count);
 
diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderManager.cpp b/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderManager.cpp
index d5eba1cef0..0331b6fb8a 100644
--- a/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderManager.cpp
+++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderManager.cpp
@@ -104,7 +104,7 @@ VERTEXSHADER* VertexShaderMngr::GetShader(u32 components)
     VSCache::iterator iter = vshaders.find(uid);
 
     if (iter != vshaders.end()) {
-        iter->second.frameCount=frameCount;
+        iter->second.frameCount = frameCount;
         VSCacheEntry &entry = iter->second;
         if (&entry.shader != pShaderLast) {
             pShaderLast = &entry.shader;
diff --git a/Source/Plugins/Plugin_VideoOGL/Src/main.cpp b/Source/Plugins/Plugin_VideoOGL/Src/main.cpp
index fe68f85876..1aab16a515 100644
--- a/Source/Plugins/Plugin_VideoOGL/Src/main.cpp
+++ b/Source/Plugins/Plugin_VideoOGL/Src/main.cpp
@@ -116,7 +116,7 @@ void DllConfig(HWND _hParent)
 			resFound = (resos[b] == strBuffer);
 			b++;
 		}
-		if(!resFound)
+		if (!resFound)
 		//and add the res
 		{
 			resos[i] = strBuffer;
@@ -196,8 +196,11 @@ void Video_Initialize(SVideoInitialize* _pVideoInitialize)
 
 void Video_DoState(unsigned char **ptr, int mode) {
 
-	// Clear all caches
+	// Clear all caches that touch RAM
 	TextureMngr::Invalidate();
+	// DisplayListManager::Invalidate();
+
+	VertexLoaderManager::MarkAllDirty();
 
 	PointerWrap p(ptr, mode);
 	VideoCommon_DoState(p);