diff --git a/Source/Core/VideoCommon/CMakeLists.txt b/Source/Core/VideoCommon/CMakeLists.txt
index 1bbc69a150..a4dad8293b 100644
--- a/Source/Core/VideoCommon/CMakeLists.txt
+++ b/Source/Core/VideoCommon/CMakeLists.txt
@@ -11,7 +11,6 @@ set(SRCS	Src/BPFunctions.cpp
 			Src/HiresTextures.cpp
 			Src/ImageWrite.cpp
 			Src/IndexGenerator.cpp
-			Src/LightingShaderGen.cpp
 			Src/MainBase.cpp
 			Src/OnScreenDisplay.cpp
 			Src/OpcodeDecoding.cpp
diff --git a/Source/Core/VideoCommon/Src/LightingShaderGen.cpp b/Source/Core/VideoCommon/Src/LightingShaderGen.cpp
deleted file mode 100644
index 58da3612f5..0000000000
--- a/Source/Core/VideoCommon/Src/LightingShaderGen.cpp
+++ /dev/null
@@ -1,7 +0,0 @@
-// Copyright 2013 Dolphin Emulator Project
-// Licensed under GPLv2
-// Refer to the license.txt file included.
-
-#include "LightingShaderGen.h"
-#include "NativeVertexFormat.h"
-#include "XFMemory.h"
diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp
index 1ec80ba56d..dd284ced7d 100644
--- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp
+++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp
@@ -535,6 +535,11 @@ static void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE Api
 	for (unsigned int i = 0; i < numStages; i++)
 		WriteStage<T>(out, uid_data, i, ApiType, RegisterStates); // build the equation for this stage
 
+#define MY_STRUCT_OFFSET(str,elem) ((u32)((u64)&(str).elem-(u64)&(str)))
+	bool enable_pl = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting;
+	uid_data.num_values = (enable_pl) ? sizeof(uid_data)/sizeof(32) : MY_STRUCT_OFFSET(uid_data,stagehash[numStages])/sizeof(u32);
+
+
 	if (numStages)
 	{
 		// The results of the last texenv stage are put onto the screen,
@@ -706,13 +711,11 @@ static void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE
 
 	out.Write("// TEV stage %d\n", n);
 
-	uid_data.bHasIndStage |= bHasIndStage << n;
-	uid_data.tevorders_n_texcoord |= (u64)texcoord << (3 * n);
+	uid_data.stagehash[n].hasindstage = bHasIndStage;
+	uid_data.stagehash[n].tevorders_texcoord = texcoord;
 	if (bHasIndStage)
 	{
-		uid_data.tevind_n_bs |= bpmem.tevind[n].bs << (2*n);
-		uid_data.tevind_n_bt |= bpmem.tevind[n].bt << (2*n);
-		uid_data.tevind_n_fmt |= bpmem.tevind[n].fmt << (2*n);
+		uid_data.stagehash[n].tevind = bpmem.tevind[n].hex & 0x7FFFFF;
 
 		out.Write("// indirect op\n");
 		// perform the indirect op on the incoming regular coordinates using indtex%d as the offset coords
@@ -727,12 +730,10 @@ static void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE
 		out.Write("float3 indtevcrd%d = indtex%d * %s;\n", n, bpmem.tevind[n].bt, tevIndFmtScale[bpmem.tevind[n].fmt]);
 
 		// bias
-		uid_data.Set_tevind_bias(n, bpmem.tevind[n].bias);
 		if (bpmem.tevind[n].bias != ITB_NONE )
 			out.Write("indtevcrd%d.%s += %s;\n", n, tevIndBiasField[bpmem.tevind[n].bias], tevIndBiasAdd[bpmem.tevind[n].fmt]);
 
 		// multiply by offset matrix and scale
-		uid_data.Set_tevind_mid(n, bpmem.tevind[n].mid);
 		if (bpmem.tevind[n].mid != 0)
 		{
 			if (bpmem.tevind[n].mid <= 3)
@@ -769,9 +770,6 @@ static void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE
 		// ---------
 		// Wrapping
 		// ---------
-		uid_data.Set_tevind_sw(n, bpmem.tevind[n].sw);
-		uid_data.Set_tevind_tw(n, bpmem.tevind[n].tw);
-		uid_data.tevind_n_fb_addprev |= bpmem.tevind[n].fb_addprev << n;
 
 		// wrap S
 		if (bpmem.tevind[n].sw == ITW_OFF)
@@ -798,26 +796,8 @@ static void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE
 	TevStageCombiner::ColorCombiner &cc = bpmem.combiners[n].colorC;
 	TevStageCombiner::AlphaCombiner &ac = bpmem.combiners[n].alphaC;
 
-	uid_data.cc_n_d |= (u64)cc.d << (4*n);
-	uid_data.cc_n_c |= (u64)cc.c << (4*n);
-	uid_data.cc_n_b |= (u64)cc.b << (4*n);
-	uid_data.cc_n_a |= (u64)cc.a << (4*n);
-	uid_data.cc_n_bias |= cc.bias << (2*n);
-	uid_data.cc_n_op |= cc.op << n;
-	uid_data.cc_n_clamp |= cc.clamp << n;
-	uid_data.cc_n_shift |= cc.shift << (2*n);
-	uid_data.cc_n_dest |= cc.dest << (2*n);
-	uid_data.ac_n_rswap |= ac.rswap << (2*n);
-	uid_data.ac_n_tswap |= ac.tswap << (2*n);
-	uid_data.ac_n_d |= (u64)ac.d << (3*n);
-	uid_data.ac_n_c |= (u64)ac.c << (3*n);
-	uid_data.ac_n_b |= (u64)ac.b << (3*n);
-	uid_data.ac_n_a |= (u64)ac.a << (3*n);
-	uid_data.ac_n_bias |= ac.bias << (2*n);
-	uid_data.ac_n_op |= ac.op << n;
-	uid_data.ac_n_clamp |= ac.clamp << n;
-	uid_data.ac_n_shift |= ac.shift << (2*n);
-	uid_data.ac_n_dest |= ac.dest << (2*n);
+	uid_data.stagehash[n].cc = cc.hex & 0xFFFFFF;
+	uid_data.stagehash[n].ac = ac.hex & 0xFFFFF0; // Storing rswap and tswap later
 
 	if(cc.a == TEVCOLORARG_RASA || cc.a == TEVCOLORARG_RASC
 		|| cc.b == TEVCOLORARG_RASA || cc.b == TEVCOLORARG_RASC
@@ -827,17 +807,19 @@ static void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE
 		|| ac.c == TEVALPHAARG_RASA || ac.d == TEVALPHAARG_RASA)
 	{
 		const int i = bpmem.combiners[n].alphaC.rswap;
-		uid_data.tevksel_n_swap1 |= bpmem.tevksel[i*2  ].swap1 << (2 * (i*2  ));
-		uid_data.tevksel_n_swap1 |= bpmem.tevksel[i*2+1].swap1 << (2 * (i*2+1));
-		uid_data.tevksel_n_swap2 |= bpmem.tevksel[i*2  ].swap2 << (2 * (i*2  ));
-		uid_data.tevksel_n_swap2 |= bpmem.tevksel[i*2+1].swap2 << (2 * (i*2+1));
+		uid_data.stagehash[n].ac |= bpmem.combiners[n].alphaC.rswap;
+		uid_data.stagehash[n].tevksel_swap1a = bpmem.tevksel[i*2].swap1;
+		uid_data.stagehash[n].tevksel_swap2a = bpmem.tevksel[i*2].swap2;
+		uid_data.stagehash[n].tevksel_swap1b = bpmem.tevksel[i*2+1].swap1;
+		uid_data.stagehash[n].tevksel_swap2b = bpmem.tevksel[i*2+1].swap2;
+		uid_data.stagehash[n].tevorders_colorchan = bpmem.tevorders[n / 2].getColorChan(n & 1);
 
 		char *rasswap = swapModeTable[bpmem.combiners[n].alphaC.rswap];
 		out.Write("rastemp = %s.%s;\n", tevRasTable[bpmem.tevorders[n / 2].getColorChan(n & 1)], rasswap);
 		out.Write("crastemp = frac(rastemp * (255.0f/256.0f)) * (256.0f/255.0f);\n");
 	}
 
-
+	uid_data.stagehash[n].tevorders_enable = bpmem.tevorders[n / 2].getEnable(n & 1);
 	if (bpmem.tevorders[n/2].getEnable(n&1))
 	{
 		if (!bHasIndStage)
@@ -850,10 +832,13 @@ static void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE
 		}
 
 		const int i = bpmem.combiners[n].alphaC.tswap;
-		uid_data.tevksel_n_swap1 |= bpmem.tevksel[i*2  ].swap1 << (2 * (i*2  ));
-		uid_data.tevksel_n_swap1 |= bpmem.tevksel[i*2+1].swap1 << (2 * (i*2+1));
-		uid_data.tevksel_n_swap2 |= bpmem.tevksel[i*2  ].swap2 << (2 * (i*2  ));
-		uid_data.tevksel_n_swap2 |= bpmem.tevksel[i*2+1].swap2 << (2 * (i*2+1));
+		uid_data.stagehash[n].ac |= bpmem.combiners[n].alphaC.tswap << 2;
+		uid_data.stagehash[n].tevksel_swap1c = bpmem.tevksel[i*2].swap1;
+		uid_data.stagehash[n].tevksel_swap2c = bpmem.tevksel[i*2].swap2;
+		uid_data.stagehash[n].tevksel_swap1d = bpmem.tevksel[i*2+1].swap1;
+		uid_data.stagehash[n].tevksel_swap2d = bpmem.tevksel[i*2+1].swap2;
+
+		uid_data.stagehash[n].tevorders_texmap= bpmem.tevorders[n/2].getTexMap(n&1);
 
 		char *texswap = swapModeTable[bpmem.combiners[n].alphaC.tswap];
 		int texmap = bpmem.tevorders[n/2].getTexMap(n&1);
@@ -871,8 +856,8 @@ static void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE
 	{
 		int kc = bpmem.tevksel[n / 2].getKC(n & 1);
 		int ka = bpmem.tevksel[n / 2].getKA(n & 1);
-		uid_data.set_tevksel_kcsel(n/2, n & 1, kc);
-		uid_data.set_tevksel_kasel(n/2, n & 1, ka);
+		uid_data.stagehash[n].tevksel_kc = kc;
+		uid_data.stagehash[n].tevksel_ka = ka;
 		out.Write("konsttemp = float4(%s, %s);\n", tevKSelTableC[kc], tevKSelTableA[ka]);
 		if(kc > 7 || ka > 7)
 		{
diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.h b/Source/Core/VideoCommon/Src/PixelShaderGen.h
index 826ec561d0..150a69902e 100644
--- a/Source/Core/VideoCommon/Src/PixelShaderGen.h
+++ b/Source/Core/VideoCommon/Src/PixelShaderGen.h
@@ -55,12 +55,14 @@ const s_svar PSVar_Loc[] = { {I_COLORS, C_COLORS, 4 },
 						{I_PMATERIALS, C_PMATERIALS, 4 },
 						};
 
-// TODO: Should compact packing be enabled?
-//#pragma pack(4)
+#pragma pack(1)
 struct pixel_shader_uid_data
 {
 	// TODO: Optimize field order for easy access!
 
+	u32 num_values; // TODO: Shouldn't be a u32
+	u32 NumValues() const { return num_values; }
+
 	u32 components;
 	u32 dstAlphaMode : 2;
 	u32 Pretest : 2;
@@ -96,69 +98,10 @@ struct pixel_shader_uid_data
 		else if (index == 3) { tevindref_bi4 = texmap; }
 	}
 
-	u64 tevorders_n_texcoord : 48; // 16 x 3 bits
-
-	u64 tevind_n_sw : 48;         // 16 x 3 bits
-	u64 tevind_n_tw : 48;         // 16 x 3 bits
-	u32 tevind_n_fb_addprev : 16; // 16 x 1 bit
-	u32 tevind_n_bs : 32;         // 16 x 2 bits
-	u32 tevind_n_fmt : 32;        // 16 x 2 bits
-	u32 tevind_n_bt : 32;         // 16 x 2 bits
-	u64 tevind_n_bias : 48;       // 16 x 3 bits
-	u64 tevind_n_mid : 64;        // 16 x 4 bits
-
-	// NOTE: These assume that the affected bits are zero before calling
-	void Set_tevind_sw(int index, u64 val)
-	{
-		tevind_n_sw |= val << (3*index);
-	}
-	void Set_tevind_tw(int index, u64 val)
-	{
-		tevind_n_tw |= val << (3*index);
-	}
-	void Set_tevind_bias(int index, u64 val)
-	{
-		tevind_n_bias |= val << (3*index);
-	}
-	void Set_tevind_mid(int index, u64 val)
-	{
-		tevind_n_mid |= val << (4*index);
-	}
-
-	u32 tevksel_n_swap1 : 16; // 8x2 bits
-	u32 tevksel_n_swap2 : 16; // 8x2 bits
-	u64 tevksel_n_kcsel0 : 40; // 8x5 bits
-	u64 tevksel_n_kasel0 : 40; // 8x5 bits
-	u64 tevksel_n_kcsel1 : 40; // 8x5 bits
-	u64 tevksel_n_kasel1 : 40; // 8x5 bits
-	void set_tevksel_kcsel(int index, int i, u64 value) { if (i) tevksel_n_kcsel1 |= value << (5*index); else tevksel_n_kcsel0 |= value << (5*index); }
-	void set_tevksel_kasel(int index, int i, u64 value) { if( i) tevksel_n_kasel1 |= value << (5*index); else tevksel_n_kasel0 |= value << (5*index); }
-
-	u64 cc_n_d : 64; // 16x4 bits
-	u64 cc_n_c : 64; // 16x4 bits
-	u64 cc_n_b : 64; // 16x4 bits
-	u64 cc_n_a : 64; // 16x4 bits
-	u32 cc_n_bias : 32; // 16x2 bits
-	u32 cc_n_op : 16; // 16x1 bit
-	u32 cc_n_clamp : 16; // 16x1 bit
-	u32 cc_n_shift : 32; // 16x2 bits
-	u32 cc_n_dest : 32; // 16x2 bits
-
-	u32 ac_n_rswap : 32; // 16x2 bits
-	u32 ac_n_tswap : 32; // 16x2 bits
-	u64 ac_n_d : 48; // 16x3 bits
-	u64 ac_n_c : 48; // 16x3 bits
-	u64 ac_n_b : 48; // 16x3 bits
-	u64 ac_n_a : 48; // 16x3 bits
-	u32 ac_n_bias : 32; // 16x2 bits
-	u32 ac_n_op : 16; // 16x1 bit
-	u32 ac_n_clamp : 16; // 16x1 bit
-	u32 ac_n_shift : 32; // 16x2 bits
-	u32 ac_n_dest : 32; // 16x2 bits
-
 	u32 alpha_test_comp0 : 3;
 	u32 alpha_test_comp1 : 3;
 	u32 alpha_test_logic : 2;
+
 	u32 alpha_test_use_zcomploc_hack : 1;
 
 	u32 fog_proj : 1;
@@ -169,14 +112,42 @@ struct pixel_shader_uid_data
 
 	u32 fast_depth_calc : 1;
 	u32 per_pixel_depth : 1;
-	u32 bHasIndStage : 16;
 
 	u32 xfregs_numTexGen_numTexGens : 4;
 
+	struct {
+		// TODO: Can save a lot space by removing the padding bits
+		u32 cc : 24;
+		u32 ac : 24;
+
+		u32 tevorders_texmap : 3;
+		u32 tevorders_texcoord : 3;
+		u32 tevorders_enable : 1;
+		u32 tevorders_colorchan : 3;
+		u32 pad1 : 6;
+
+		// TODO: Clean up the swapXY mess
+		u32 hasindstage : 1;
+		u32 tevind : 21;
+		u32 tevksel_swap1a : 2;
+		u32 tevksel_swap2a : 2;
+		u32 tevksel_swap1b : 2;
+		u32 tevksel_swap2b : 2;
+		u32 pad2 : 2;
+
+		u32 tevksel_swap1c : 2;
+		u32 tevksel_swap2c : 2;
+		u32 tevksel_swap1d : 2;
+		u32 tevksel_swap2d : 2;
+		u32 tevksel_kc : 5;
+		u32 tevksel_ka : 5;
+		u32 pad3 : 14;
+	} stagehash[16];
+
 	// TODO: I think we're fine without an enablePixelLighting field, should probably double check, though..
 	LightingUidData lighting;
 };
-//#pragma pack()
+#pragma pack()
 
 typedef ShaderUid<pixel_shader_uid_data> PixelShaderUid;
 typedef ShaderCode PixelShaderCode; // TODO: Obsolete
diff --git a/Source/Core/VideoCommon/Src/PixelShaderManager.cpp b/Source/Core/VideoCommon/Src/PixelShaderManager.cpp
index 9fbe096607..aef4baa14c 100644
--- a/Source/Core/VideoCommon/Src/PixelShaderManager.cpp
+++ b/Source/Core/VideoCommon/Src/PixelShaderManager.cpp
@@ -29,45 +29,19 @@ static u32 lastTexDims[8]; // width | height << 16 | wrap_s << 28 | wrap_t << 30
 static u32 lastZBias;
 static int nMaterialsChanged;
 
-static float s_constant_cache[C_PENVCONST_END*4];
-
 inline void SetPSConstant4f(unsigned int const_number, float f1, float f2, float f3, float f4)
 {
-//	if (s_constant_cache[const_number*4] == f1 && s_constant_cache[const_number*4+1] == f2 &&
-//	    s_constant_cache[const_number*4+2] == f3 && s_constant_cache[const_number*4+3] == f4)
-//		return;
-
 	g_renderer->SetPSConstant4f(const_number, f1, f2, f3, f4);
-	s_constant_cache[const_number*4] = f1;
-	s_constant_cache[const_number*4+1] = f2;
-	s_constant_cache[const_number*4+2] = f3;
-	s_constant_cache[const_number*4+3] = f4;
 }
 
 inline void SetPSConstant4fv(unsigned int const_number, const float *f)
 {
-//	if (s_constant_cache[const_number*4] == f[0] && s_constant_cache[const_number*4+1] == f[1] &&
-//	    s_constant_cache[const_number*4+2] == f[2] && s_constant_cache[const_number*4+3] == f[3])
-//		return;
-
 	g_renderer->SetPSConstant4fv(const_number, f);
-	s_constant_cache[const_number*4] = f[0];
-	s_constant_cache[const_number*4+1] = f[1];
-	s_constant_cache[const_number*4+2] = f[2];
-	s_constant_cache[const_number*4+3] = f[3];
 }
 
 inline void SetMultiPSConstant4fv(unsigned int const_number, unsigned int count, const float *f)
 {
-//	for (unsigned int i = 0; i < 4*count; ++i)
-//		if (s_constant_cache[const_number*4+i] != f[i])
-//			break;
-//		else if (i == 4*count-1)
-//			return;
-
 	g_renderer->SetMultiPSConstant4fv(const_number, count, f);
-	for (unsigned int i = 0; i < 4*count; ++i)
-		s_constant_cache[const_number*4+i] = f[i];
 }
 
 void PixelShaderManager::Init()
@@ -76,7 +50,6 @@ void PixelShaderManager::Init()
 	memset(lastTexDims, 0, sizeof(lastTexDims));
 	lastZBias = 0;
 	memset(lastRGBAfull, 0, sizeof(lastRGBAfull));
-	memset(s_constant_cache, 0, sizeof(s_constant_cache)); // TODO: Should reflect that on the GPU side....
 	Dirty();
 }
 
@@ -102,19 +75,6 @@ void PixelShaderManager::SetConstants(u32 components)
 	if (g_ActiveConfig.backend_info.APIType == API_OPENGL && !g_ActiveConfig.backend_info.bSupportsGLSLUBO)
 		Dirty();
 
-	// TODO: Probably broken in the non-UBO path
-	PixelShaderConstantProfile constant_profile(C_PENVCONST_END);
-	/// TODO: dst alpha/api/components type parameter...
-	GetPixelShaderConstantProfile(constant_profile, DSTALPHA_DUAL_SOURCE_BLEND, API_OPENGL, components);
-
-	static int saved_updates = 0;
-	static int necessary_updates = 0;
-
-// TODO: Remove this!
-#define IncStuff() { \
-	saved_updates++; \
-	/*printf("Saved a constant update at line %d! Saved %d against %d now!\n", __LINE__, saved_updates, necessary_updates);*/ }
-
 	for (int i = 0; i < 2; ++i)
 	{
 		if (s_nColorsChanged[i])
@@ -122,12 +82,11 @@ void PixelShaderManager::SetConstants(u32 components)
 			int baseind = i ? C_KCOLORS : C_COLORS;
 			for (int j = 0; j < 4; ++j)
 			{
-				if ((s_nColorsChanged[i] & (1 << j)) && constant_profile.ConstantIsUsed(baseind+j))
+				if ((s_nColorsChanged[i] & (1 << j)))
 				{
 					SetPSConstant4fv(baseind+j, &lastRGBAfull[i][j][0]);
 					s_nColorsChanged[i] &= ~(1<<j);
-					++necessary_updates;
-				} else if ((s_nColorsChanged[i] & (1 << j))) IncStuff();
+				}
 			}
 		}
 	}
@@ -136,23 +95,21 @@ void PixelShaderManager::SetConstants(u32 components)
 	{
 		for (int i = 0; i < 8; ++i)
 		{
-            if ((s_nTexDimsChanged & (1<<i)) && constant_profile.ConstantIsUsed(C_TEXDIMS+i))
+            if ((s_nTexDimsChanged & (1<<i)))
 			{
-				++necessary_updates;
 				SetPSTextureDims(i);
 				s_nTexDimsChanged &= ~(1<<i);
-			}else if (s_nTexDimsChanged & (1<<i)) IncStuff();
+			}
         }
     }
 
-    if (s_bAlphaChanged && constant_profile.ConstantIsUsed(C_ALPHA))
+    if (s_bAlphaChanged)
 	{
-		++necessary_updates;
 		SetPSConstant4f(C_ALPHA, (lastAlpha&0xff)/255.0f, ((lastAlpha>>8)&0xff)/255.0f, 0, ((lastAlpha>>16)&0xff)/255.0f);
 		s_bAlphaChanged = false;
-    } else if (s_bAlphaChanged) IncStuff();
+    }
 
-	if (s_bZTextureTypeChanged && constant_profile.ConstantIsUsed(C_ZBIAS))
+	if (s_bZTextureTypeChanged)
 	{
 		float ftemp[4];
 		switch (bpmem.ztex2.type)
@@ -170,12 +127,11 @@ void PixelShaderManager::SetConstants(u32 components)
 				ftemp[0] = 16711680.0f/16777215.0f; ftemp[1] = 65280.0f/16777215.0f; ftemp[2] = 255.0f/16777215.0f; ftemp[3] = 0;
                 break;
         }
-		++necessary_updates;
 		SetPSConstant4fv(C_ZBIAS, ftemp);
 		s_bZTextureTypeChanged = false;
-	} else if (s_bZTextureTypeChanged) IncStuff();
+	}
 
-	if ((s_bZBiasChanged || s_bDepthRangeChanged) && constant_profile.ConstantIsUsed(C_ZBIAS+1))
+	if (s_bZBiasChanged || s_bDepthRangeChanged)
 	{
 		// reversed gxsetviewport(xorig, yorig, width, height, nearz, farz)
 		// [0] = width/2
@@ -186,10 +142,9 @@ void PixelShaderManager::SetConstants(u32 components)
 		// [5] = 16777215 * farz
 
 		//ERROR_LOG("pixel=%x,%x, bias=%x\n", bpmem.zcontrol.pixel_format, bpmem.ztex2.type, lastZBias);
-					++necessary_updates;
 		SetPSConstant4f(C_ZBIAS+1, xfregs.viewport.farZ / 16777216.0f, xfregs.viewport.zRange / 16777216.0f, 0, (float)(lastZBias)/16777215.0f);
 		s_bZBiasChanged = s_bDepthRangeChanged = false;
-    }else if ((s_bZBiasChanged || s_bDepthRangeChanged)) IncStuff();
+	}
 
 	// indirect incoming texture scales
 	if (s_nIndTexScaleChanged)
@@ -197,7 +152,7 @@ void PixelShaderManager::SetConstants(u32 components)
 		// set as two sets of vec4s, each containing S and T of two ind stages.
 		float f[8];
 
-        if ((s_nIndTexScaleChanged & 0x03) && constant_profile.ConstantIsUsed(C_INDTEXSCALE))
+        if (s_nIndTexScaleChanged & 0x03)
 		{
 			for (u32 i = 0; i < 2; ++i)
 			{
@@ -205,13 +160,10 @@ void PixelShaderManager::SetConstants(u32 components)
                 f[2 * i + 1] = bpmem.texscale[0].getScaleT(i & 1);
                 PRIM_LOG("tex indscale%d: %f %f\n", i, f[2 * i], f[2 * i + 1]);
             }
-			++necessary_updates;
 			SetPSConstant4fv(C_INDTEXSCALE, f);
-			s_nIndTexScaleChanged &= ~0x03;
         }
-        else if ((s_nIndTexScaleChanged & 0x03)) IncStuff();
 
-        if ((s_nIndTexScaleChanged & 0x0c) && constant_profile.ConstantIsUsed(C_INDTEXSCALE+1))
+		if (s_nIndTexScaleChanged & 0x0c)
 		{
             for (u32 i = 2; i < 4; ++i)
 			{
@@ -219,18 +171,16 @@ void PixelShaderManager::SetConstants(u32 components)
                 f[2 * i + 1] = bpmem.texscale[1].getScaleT(i & 1);
                 PRIM_LOG("tex indscale%d: %f %f\n", i, f[2 * i], f[2 * i + 1]);
             }
-			++necessary_updates;
 			SetPSConstant4fv(C_INDTEXSCALE+1, &f[4]);
-			s_nIndTexScaleChanged &= ~0x0c;
         }
-        else if ((s_nIndTexScaleChanged & 0x0c)) IncStuff();
+		s_nIndTexScaleChanged = 0;
     }
 
 	if (s_nIndTexMtxChanged)
 	{
 		for (int i = 0; i < 3; ++i)
 		{
-            if ((s_nIndTexMtxChanged & (1 << i)) && (constant_profile.ConstantIsUsed(C_INDTEXMTX+2*i) || constant_profile.ConstantIsUsed(C_INDTEXMTX+2*i+1)))
+            if (s_nIndTexMtxChanged & (1 << i))
 			{
                 int scale = ((u32)bpmem.indmtx[i].col0.s0 << 0) |
 					        ((u32)bpmem.indmtx[i].col1.s1 << 2) |
@@ -240,8 +190,6 @@ void PixelShaderManager::SetConstants(u32 components)
                 // xyz - static matrix
                 // TODO w - dynamic matrix scale / 256...... somehow / 4 works better
                 // rev 2972 - now using / 256.... verify that this works
-					++necessary_updates;
-					++necessary_updates;
 				SetPSConstant4f(C_INDTEXMTX + 2 * i,
 					bpmem.indmtx[i].col0.ma * fscale,
 					bpmem.indmtx[i].col1.mc * fscale,
@@ -259,20 +207,18 @@ void PixelShaderManager::SetConstants(u32 components)
                 	bpmem.indmtx[i].col0.mb * fscale, bpmem.indmtx[i].col1.md * fscale, bpmem.indmtx[i].col2.mf * fscale);
 
 				s_nIndTexMtxChanged &= ~(1 << i);
-            }else if ((s_nIndTexMtxChanged & (1 << i))) {IncStuff();IncStuff();}
+			}
         }
     }
 
-    if (s_bFogColorChanged && constant_profile.ConstantIsUsed(C_FOG))
+    if (s_bFogColorChanged)
 	{
-					++necessary_updates;
 		SetPSConstant4f(C_FOG, bpmem.fog.color.r / 255.0f, bpmem.fog.color.g / 255.0f, bpmem.fog.color.b / 255.0f, 0);
 		s_bFogColorChanged = false;
-    }else if (s_bFogColorChanged) IncStuff();
+    }
 
-    if (s_bFogParamChanged && constant_profile.ConstantIsUsed(C_FOG+1))
+    if (s_bFogParamChanged)
 	{
-					++necessary_updates;
 		if(!g_ActiveConfig.bDisableFog)
 		{
 			//downscale magnitude to 0.24 bits
@@ -285,11 +231,10 @@ void PixelShaderManager::SetConstants(u32 components)
 			SetPSConstant4f(C_FOG + 1, 0.0, 1.0, 0.0, 1.0);
 
         s_bFogParamChanged = false;
-    }else if ( s_bFogParamChanged) IncStuff();
+    }
 
-	if (s_bFogRangeAdjustChanged && constant_profile.ConstantIsUsed(C_FOG+2))
+	if (s_bFogRangeAdjustChanged)
 	{
-					++necessary_updates;
 		if(!g_ActiveConfig.bDisableFog && bpmem.fogRange.Base.Enabled == 1)
 		{
 			//bpmem.fogRange.Base.Center : center of the viewport in x axis. observation: bpmem.fogRange.Base.Center = realcenter + 342;
@@ -310,9 +255,8 @@ void PixelShaderManager::SetConstants(u32 components)
 		}
 
 		s_bFogRangeAdjustChanged = false;
-	}else if ( s_bFogRangeAdjustChanged) IncStuff();
+	}
 
-	// TODO: use constant profile here!
 	if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)  // config check added because the code in here was crashing for me inside SetPSConstant4f
 	{
 		if (nLightsChanged[0] >= 0)
@@ -372,7 +316,7 @@ void PixelShaderManager::SetConstants(u32 components)
 					SetPSConstant4fv(C_PMATERIALS + i, material);
 				}
 			}
-			
+
 			for (int i = 0; i < 2; ++i)
 			{
 				if (nMaterialsChanged & (1 << (i + 2)))
diff --git a/Source/Core/VideoCommon/Src/ShaderGenCommon.h b/Source/Core/VideoCommon/Src/ShaderGenCommon.h
index b8820e2e98..c8f8ff8345 100644
--- a/Source/Core/VideoCommon/Src/ShaderGenCommon.h
+++ b/Source/Core/VideoCommon/Src/ShaderGenCommon.h
@@ -100,7 +100,7 @@ public:
 	bool operator < (const ShaderUid& obj) const
 	{
 		// TODO: Store last frame used and order by that? makes much more sense anyway...
-		for (unsigned int i = 0; i < sizeof(uid_data) / sizeof(u32); ++i)
+		for (unsigned int i = 0; i < data.NumValues(); ++i)
 		{
 			if (this->values[i] < obj.values[i])
 				return true;
@@ -199,10 +199,9 @@ static void DeclareUniform(T& object, API_TYPE api_type, bool using_ubos, const
 	object.Write(";\n");
 }
 
-#pragma pack(4)
+#pragma pack(1)
 /**
  * Common uid data used for shader generators that use lighting calculations.
- * Expected to be stored as a member called "lighting".
  */
 struct LightingUidData
 {
@@ -212,6 +211,8 @@ struct LightingUidData
 	u32 diffusefunc : 8; // 4x2 bits
 	u32 attnfunc : 8; // 4x2 bits
 	u32 light_mask : 32; // 4x8 bits
+
+	u32 NumValues() const { return sizeof(LightingUidData) / sizeof(u32); }
 };
 #pragma pack()
 
diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp
index 3edb51c505..81d45cd6ad 100644
--- a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp
+++ b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp
@@ -74,6 +74,8 @@ static void GenerateVertexShader(T& out, u32 components, API_TYPE api_type)
 	vertex_shader_uid_data& uid_data = (&out.template GetUidData<vertex_shader_uid_data>() != NULL)
 											? out.template GetUidData<vertex_shader_uid_data>() : dummy_data;
 
+	uid_data.num_values = sizeof(uid_data)/sizeof(u32);
+
 	out.SetBuffer(text);
 #ifndef ANDROID
 	locale_t locale;
@@ -374,7 +376,7 @@ static void GenerateVertexShader(T& out, u32 components, API_TYPE api_type)
 				break;
 			case XF_TEXGEN_REGULAR:
 			default:
-				uid_data.texMtxInfo[i].projection = xfregs.texMtxInfo[i].projection;
+				uid_data.texMtxInfo_n_projection |= xfregs.texMtxInfo[i].projection << i;
 				if (components & (VB_HAS_TEXMTXIDX0<<i))
 				{
 					out.Write("int tmp = int(tex%d.z);\n", i);
@@ -393,7 +395,7 @@ static void GenerateVertexShader(T& out, u32 components, API_TYPE api_type)
 				break;
 		}
 
-		uid_data.dualTexTrans.enabled = xfregs.dualTexTrans.enabled;
+		uid_data.dualTexTrans_enabled = xfregs.dualTexTrans.enabled;
 		// CHECKME: does this only work for regular tex gen types?
 		if (xfregs.dualTexTrans.enabled && texinfo.texgentype == XF_TEXGEN_REGULAR)
 		{
diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.h b/Source/Core/VideoCommon/Src/VertexShaderGen.h
index eb7236678a..797fe7d108 100644
--- a/Source/Core/VideoCommon/Src/VertexShaderGen.h
+++ b/Source/Core/VideoCommon/Src/VertexShaderGen.h
@@ -63,29 +63,34 @@ const s_svar VSVar_Loc[] = {  {I_POSNORMALMATRIX, C_POSNORMALMATRIX, 6 },
 						{I_DEPTHPARAMS, C_DEPTHPARAMS, 1 },
 						};
 
-#pragma pack(4)
+#pragma pack(1)
 
 struct vertex_shader_uid_data
 {
+
+	u32 NumValues() const { return num_values; }
+
 	u32 components;
+	u32 num_values : 16; // TODO: u8 might be enough, actually
 	u32 numColorChans : 2;
 	u32 numTexGens : 4;
 
+	u32 dualTexTrans_enabled : 1;
+
+	u32 texMtxInfo_n_projection : 16; // Stored separately to guarantee that the texMtxInfo struct is 8 bits wide
 	struct {
-		u32 projection : 1; // XF_TEXPROJ_X
-		u32 inputform : 2; // XF_TEXINPUT_X
-		u32 texgentype : 3; // XF_TEXGEN_X
-		u32 sourcerow : 5; // XF_SRCGEOM_X
-		u32 embosssourceshift : 3; // what generated texcoord to use
-		u32 embosslightshift : 3; // light index that is used
+		u32 inputform : 2;
+		u32 texgentype : 3;
+		u32 sourcerow : 5;
+		u32 embosssourceshift : 3;
+		u32 embosslightshift : 3;
 	} texMtxInfo[8];
+
 	struct {
-		u32 index : 6; // base row of dual transform matrix
-		u32 normalize : 1; // normalize before send operation
+		u32 index : 6;
+		u32 normalize : 1;
+		u32 pad : 1;
 	} postMtxInfo[8];
-	struct {
-		u32 enabled : 1;
-	} dualTexTrans;
 
 	LightingUidData lighting;
 };
diff --git a/Source/Core/VideoCommon/VideoCommon.vcxproj b/Source/Core/VideoCommon/VideoCommon.vcxproj
index 617e4ec567..1e7a56f578 100644
--- a/Source/Core/VideoCommon/VideoCommon.vcxproj
+++ b/Source/Core/VideoCommon/VideoCommon.vcxproj
@@ -190,7 +190,6 @@
     <ClCompile Include="Src\HiresTextures.cpp" />
     <ClCompile Include="Src\ImageWrite.cpp" />
     <ClCompile Include="Src\IndexGenerator.cpp" />
-    <ClCompile Include="Src\LightingShaderGen.cpp" />
     <ClCompile Include="Src\MainBase.cpp" />
     <ClCompile Include="Src\memcpy_amd.cpp" />
     <ClCompile Include="Src\OnScreenDisplay.cpp" />
diff --git a/Source/Core/VideoCommon/VideoCommon.vcxproj.filters b/Source/Core/VideoCommon/VideoCommon.vcxproj.filters
index e988d34e12..785e55877f 100644
--- a/Source/Core/VideoCommon/VideoCommon.vcxproj.filters
+++ b/Source/Core/VideoCommon/VideoCommon.vcxproj.filters
@@ -113,9 +113,6 @@
     <ClCompile Include="Src\VertexManagerBase.cpp">
       <Filter>Base</Filter>
     </ClCompile>
-    <ClCompile Include="Src\LightingShaderGen.cpp">
-      <Filter>Shader Generators</Filter>
-    </ClCompile>
     <ClCompile Include="Src\FPSCounter.cpp">
       <Filter>Util</Filter>
     </ClCompile>
@@ -294,4 +291,4 @@
       <UniqueIdentifier>{e2a527a2-ccc8-4ab8-a93e-dd2628c0f3b6}</UniqueIdentifier>
     </Filter>
   </ItemGroup>
-</Project>
\ No newline at end of file
+</Project>