From 032c9202479d939af3406583bc48d2a5928f2c67 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 14 Feb 2014 16:20:11 +0100 Subject: [PATCH 01/45] OGL: Remove some dead code. --- Source/Core/VideoBackends/OGL/ProgramShaderCache.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/Source/Core/VideoBackends/OGL/ProgramShaderCache.h b/Source/Core/VideoBackends/OGL/ProgramShaderCache.h index 9b9511fd11..969a976c27 100644 --- a/Source/Core/VideoBackends/OGL/ProgramShaderCache.h +++ b/Source/Core/VideoBackends/OGL/ProgramShaderCache.h @@ -38,9 +38,6 @@ public: }; -const int NUM_UNIFORMS = 19; -extern const char *UniformNames[NUM_UNIFORMS]; - struct SHADER { SHADER() : glprogid(0) { } @@ -52,8 +49,6 @@ struct SHADER GLuint glprogid; // opengl program id std::string strvprog, strpprog; - GLint UniformLocations[NUM_UNIFORMS]; - u32 UniformSize[NUM_UNIFORMS]; void SetProgramVariables(); void SetProgramBindings(); From 2af50f04c21a12e9516dba6c12f73af3f5b7834d Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Mon, 7 Oct 2013 00:52:34 +0200 Subject: [PATCH 02/45] OGL: Force highp for integers. --- Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp index 8751ec35c4..085e79b8e5 100644 --- a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp +++ b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp @@ -457,6 +457,7 @@ void ProgramShaderCache::CreateHeader ( void ) // Precision defines for GLSLES3 "%s\n" + "%s\n" // Silly differences "#define float2 vec2\n" @@ -480,6 +481,7 @@ void ProgramShaderCache::CreateHeader ( void ) , g_ActiveConfig.backend_info.bSupportShadingLanguage420pack ? "#extension GL_ARB_shading_language_420pack : enable" : "" , v==GLSLES3 ? "precision highp float;" : "" + , v==GLSLES3 ? "precision highp int;" : "" , DriverDetails::HasBug(DriverDetails::BUG_BROKENTEXTURESIZE) ? "#define textureSize(x, y) ivec2(1, 1)" : "" , DriverDetails::HasBug(DriverDetails::BUG_BROKENCENTROID) ? "#define centroid" : "" From bed442198f2ac9eda58568d4653e93b6adcee27e Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Wed, 14 Aug 2013 13:21:43 +0200 Subject: [PATCH 03/45] PixelShaderGen: Make SampleTexture static inline. --- Source/Core/VideoCommon/PixelShaderGen.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 9f634d4def..58d29436a4 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -1040,7 +1040,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP } template -void SampleTexture(T& out, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType) +static inline void SampleTexture(T& out, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType) { out.SetConstantsUsed(C_TEXDIMS+texmap,C_TEXDIMS+texmap); From 22c989de9a067eb9b6f04d0c3fa8772e8412d4d8 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Tue, 8 Oct 2013 01:04:41 +0000 Subject: [PATCH 04/45] Provide our own dot functions in GLSL since GLSL doesn't provide integer versions of this function, even though AMD and Nvidia provide their own. --- Source/Core/VideoCommon/PixelShaderGen.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 58d29436a4..f9a3cb5d2f 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -257,6 +257,19 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T uid_data.genMode_numtevstages = bpmem.genMode.numtevstages; uid_data.genMode_numtexgens = bpmem.genMode.numtexgens; + // dot product for integer vectors + out.Write( "int idot(int3 x, int3 y)\n" + "{\n" + "\tint3 tmp = x * y;\n" + "\treturn tmp.x + tmp.y + tmp.z;\n" + "}\n"); + + out.Write( "int idot(int4 x, int4 y)\n" + "{\n" + "\tint4 tmp = x * y;\n" + "\treturn tmp.x + tmp.y + tmp.z + tmp.w;\n" + "}\n"); + if (ApiType == API_OPENGL) { // Fmod implementation gleaned from Nvidia From fa77e1d2b6a7e9a730b3e8b6fb8d95b68bf2a55c Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Wed, 14 Aug 2013 15:06:47 +0200 Subject: [PATCH 05/45] PixelShaderGen: Use integer math for alpha testing. --- .../VideoBackends/OGL/ProgramShaderCache.cpp | 3 ++ Source/Core/VideoCommon/PixelShaderGen.cpp | 32 +++++++++---------- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp index 085e79b8e5..63fd6c69ea 100644 --- a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp +++ b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp @@ -463,6 +463,9 @@ void ProgramShaderCache::CreateHeader ( void ) "#define float2 vec2\n" "#define float3 vec3\n" "#define float4 vec4\n" + "#define uint2 uvec2\n" + "#define uint3 uvec3\n" + "#define uint4 uvec4\n" "#define int2 ivec2\n" "#define int3 ivec3\n" "#define int4 ivec4\n" diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index f9a3cb5d2f..5a051b894c 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -541,9 +541,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T RegisterStates[0].AlphaNeedOverflowControl = RegisterStates[bpmem.combiners[numStages - 1].alphaC.dest].AlphaNeedOverflowControl; } } - // emulation of unsigned 8 overflow when casting if needed - if (RegisterStates[0].AlphaNeedOverflowControl || RegisterStates[0].ColorNeedOverflowControl) - out.Write("\tprev = frac(prev * (255.0/256.0)) * (256.0/255.0);\n"); + out.Write("\tint4 iprev = int4(round(prev * 255.0)) & 0xFF;\n"); AlphaTest::TEST_RESULT Pretest = bpmem.alpha_test.TestResult(); uid_data.Pretest = Pretest; @@ -602,12 +600,12 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T if (dstAlphaMode == DSTALPHA_ALPHA_PASS) { out.SetConstantsUsed(C_ALPHA, C_ALPHA); - out.Write("\tocol0 = float4(prev.rgb, " I_ALPHA"[0].a);\n"); + out.Write("\tocol0 = float4(float3(iprev.rgb) / 255.0, " I_ALPHA"[0].a);\n"); } else { WriteFog(out, uid_data); - out.Write("\tocol0 = prev;\n"); + out.Write("\tocol0 = float4(iprev) / 255.0;\n"); } // Use dual-source color blending to perform dst alpha in a single pass @@ -617,7 +615,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T // Colors will be blended against the alpha from ocol1 and // the alpha from ocol0 will be written to the framebuffer. - out.Write("\tocol1 = prev;\n"); + out.Write("\tocol1 = float4(iprev) / 255.0;\n"); out.Write("\tocol0.a = " I_ALPHA"[0].a;\n"); } @@ -1065,14 +1063,14 @@ static inline void SampleTexture(T& out, const char *texcoords, const char *texs static const char *tevAlphaFuncsTable[] = { - "(false)", // NEVER - "(prev.a <= %s - (0.25/255.0))", // LESS - "(abs( prev.a - %s ) < (0.5/255.0))", // EQUAL - "(prev.a < %s + (0.25/255.0))", // LEQUAL - "(prev.a >= %s + (0.25/255.0))", // GREATER - "(abs( prev.a - %s ) >= (0.5/255.0))", // NEQUAL - "(prev.a > %s - (0.25/255.0))", // GEQUAL - "(true)" // ALWAYS + "(false)", // NEVER + "(iprev.a < %s)", // LESS + "(iprev.a == %s)", // EQUAL + "(iprev.a <= %s)", // LEQUAL + "(iprev.a > %s)", // GREATER + "(iprev.a != %s)", // NEQUAL + "(iprev.a >= %s)", // GEQUAL + "(true)" // ALWAYS }; static const char *tevAlphaFunclogicTable[] = @@ -1088,8 +1086,8 @@ static inline void WriteAlphaTest(T& out, pixel_shader_uid_data& uid_data, API_T { static const char *alphaRef[2] = { - I_ALPHA"[0].r", - I_ALPHA"[0].g" + "int(round(" I_ALPHA"[0].r * 255.0))", + "int(round(" I_ALPHA"[0].g * 255.0))" }; out.SetConstantsUsed(C_ALPHA, C_ALPHA); @@ -1200,7 +1198,7 @@ static inline void WriteFog(T& out, pixel_shader_uid_data& uid_data) WARN_LOG(VIDEO, "Unknown Fog Type! %08x", bpmem.fog.c_proj_fsel.fsel); } - out.Write("\tprev.rgb = lerp(prev.rgb, " I_FOG"[0].rgb, fog);\n"); + out.Write("\tiprev.rgb = int3(round(lerp(float3(iprev.rgb), " I_FOG"[0].rgb*255.0, fog)));\n"); } void GetPixelShaderUid(PixelShaderUid& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components) From 1b3b12caa01d9ee77a9e3740d52f66ece1456d4b Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sun, 8 Dec 2013 14:31:33 +0100 Subject: [PATCH 06/45] PixelShaderGen: Fix an issue where small negative z coordinates would underflow when they shouldn't. --- Source/Core/VideoCommon/PixelShaderGen.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 5a051b894c..1a15d3ad04 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -590,7 +590,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T // U24 overflow emulation out.Write("zCoord = zCoord * (16777215.0/16777216.0);\n"); - out.Write("zCoord = frac(zCoord);\n"); + out.Write("zCoord = zCoord - 2.0*round(0.5*zCoord);\n"); out.Write("zCoord = zCoord * (16777216.0/16777215.0);\n"); } From 0e711bf520654bf6403116c905f74e4092c61926 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Wed, 14 Aug 2013 15:40:24 +0200 Subject: [PATCH 07/45] PixelShaderGen: Use integer math for sampling textures. --- Source/Core/VideoCommon/PixelShaderGen.cpp | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 1a15d3ad04..4e5a8740e0 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -390,6 +390,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T } out.Write(" float4 c0 = " I_COLORS"[1], c1 = " I_COLORS"[2], c2 = " I_COLORS"[3], prev = float4(0.0, 0.0, 0.0, 0.0), textemp = float4(0.0, 0.0, 0.0, 0.0), rastemp = float4(0.0, 0.0, 0.0, 0.0), konsttemp = float4(0.0, 0.0, 0.0, 0.0);\n" + " int4 itextemp = int4(0, 0, 0, 0);\n" " float3 comp16 = float3(1.0, 255.0, 0.0), comp24 = float3(1.0, 255.0, 255.0*255.0);\n" " float alphabump=0.0;\n" " float3 tevcoord=float3(0.0, 0.0, 0.0);\n" @@ -488,8 +489,9 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T else out.Write("\ttempcoord = float2(0.0, 0.0);\n"); - out.Write("float3 indtex%d = ", i); + out.Write("\tint3 iindtex%d = ", i); SampleTexture(out, "tempcoord", "abg", texmap, ApiType); + out.Write("\tfloat3 indtex%d = float3(iindtex%d) / 255.0f;\n", i, i); } } @@ -583,9 +585,9 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T // theoretical final depth value is used for fog calculation, though, so we have to emulate ztextures anyway if (bpmem.ztex2.op != ZTEXTURE_DISABLE && !skip_ztexture) { - // use the texture input of the last texture stage (textemp), hopefully this has been read and is in correct format... + // use the texture input of the last texture stage (itextemp), hopefully this has been read and is in correct format... out.SetConstantsUsed(C_ZBIAS, C_ZBIAS+1); - out.Write("zCoord = dot(" I_ZBIAS"[0].xyzw, textemp.xyzw) + " I_ZBIAS"[1].w %s;\n", + out.Write("zCoord = dot(" I_ZBIAS"[0].xyzw, float4(itextemp.xyzw)/255.0) + " I_ZBIAS"[1].w %s;\n", (bpmem.ztex2.op == ZTEXTURE_ADD) ? "+ zCoord" : ""); // U24 overflow emulation @@ -822,13 +824,14 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP int texmap = bpmem.tevorders[n/2].getTexMap(n&1); uid_data.SetTevindrefTexmap(i, texmap); - out.Write("textemp = "); + out.Write("itextemp = "); SampleTexture(out, "tevcoord", texswap, texmap, ApiType); } else { - out.Write("textemp = float4(1.0, 1.0, 1.0, 1.0);\n"); + out.Write("itextemp = int4(255, 255, 255, 255);\n"); } + out.Write("textemp = float4(itextemp) / 255.0f;\n"); if (cc.a == TEVCOLORARG_KONST || cc.b == TEVCOLORARG_KONST || @@ -1056,9 +1059,9 @@ static inline void SampleTexture(T& out, const char *texcoords, const char *texs out.SetConstantsUsed(C_TEXDIMS+texmap,C_TEXDIMS+texmap); if (ApiType == API_D3D) - out.Write("Tex%d.Sample(samp%d,%s.xy * " I_TEXDIMS"[%d].xy).%s;\n", texmap,texmap, texcoords, texmap, texswap); - else // OGL - out.Write("texture(samp%d,%s.xy * " I_TEXDIMS"[%d].xy).%s;\n", texmap, texcoords, texmap, texswap); + out.Write("int4(round(255.0 * Tex%d.Sample(samp%d,%s.xy * " I_TEXDIMS"[%d].xy))).%s;\n", texmap,texmap, texcoords, texmap, texswap); + else + out.Write("int4(round(255.0 * texture(samp%d,%s.xy * " I_TEXDIMS"[%d].xy))).%s;\n", texmap, texcoords, texmap, texswap); } static const char *tevAlphaFuncsTable[] = From 3ea97f77301d57f7ffddefb45f7e34ca926d5098 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Wed, 14 Aug 2013 16:08:50 +0200 Subject: [PATCH 08/45] PixelShaderGen: Use integer math for indirect texture coords. --- Source/Core/VideoCommon/PixelShaderGen.cpp | 31 ++++++++++++---------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 4e5a8740e0..cca3bb2621 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -198,12 +198,12 @@ static const char *tevRasTable[] = static const char *tevCOutputTable[] = { "prev.rgb", "c0.rgb", "c1.rgb", "c2.rgb" }; static const char *tevAOutputTable[] = { "prev.a", "c0.a", "c1.a", "c2.a" }; static const char *tevIndAlphaSel[] = {"", "x", "y", "z"}; -//static const char *tevIndAlphaScale[] = {"", "*32", "*16", "*8"}; -static const char *tevIndAlphaScale[] = {"*(248.0/255.0)", "*(224.0/255.0)", "*(240.0/255.0)", "*(248.0/255.0)"}; +static const char *tevIndAlphaMask[] = {"0xF8", "0xE0", "0xF0", "0xF8"}; static const char *tevIndBiasField[] = {"", "x", "y", "xy", "z", "xz", "yz", "xyz"}; // indexed by bias -static const char *tevIndBiasAdd[] = {"-128.0", "1.0", "1.0", "1.0" }; // indexed by fmt +static const char *tevIndBiasAdd[] = {"-128", "1", "1", "1" }; // indexed by fmt static const char *tevIndWrapStart[] = {"0.0", "256.0", "128.0", "64.0", "32.0", "16.0", "0.001" }; static const char *tevIndFmtScale[] = {"255.0", "31.0", "15.0", "7.0" }; +static const char *tevIndFmtMask[] = {"0xFF", "0x1F", "0x0F", "0x07" }; struct RegisterState { @@ -491,7 +491,6 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T out.Write("\tint3 iindtex%d = ", i); SampleTexture(out, "tempcoord", "abg", texmap, ApiType); - out.Write("\tfloat3 indtex%d = float3(iindtex%d) / 255.0f;\n", i, i); } } @@ -698,20 +697,24 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP uid_data.stagehash[n].tevind = bpmem.tevind[n].hex & 0x7FFFFF; out.Write("// indirect op\n"); - // perform the indirect op on the incoming regular coordinates using indtex%d as the offset coords + // perform the indirect op on the incoming regular coordinates using iindtex%d as the offset coords if (bpmem.tevind[n].bs != ITBA_OFF) { - out.Write("alphabump = indtex%d.%s %s;\n", + out.Write("alphabump = float(iindtex%d.%s & %s) / 255.0;\n", bpmem.tevind[n].bt, tevIndAlphaSel[bpmem.tevind[n].bs], - tevIndAlphaScale[bpmem.tevind[n].fmt]); + tevIndAlphaMask[bpmem.tevind[n].fmt]); } // format - out.Write("float3 indtevcrd%d = indtex%d * %s;\n", n, bpmem.tevind[n].bt, tevIndFmtScale[bpmem.tevind[n].fmt]); + out.Write("int3 iindtevcrd%d = iindtex%d & %s;\n", n, bpmem.tevind[n].bt, tevIndFmtMask[bpmem.tevind[n].fmt]); - // bias - if (bpmem.tevind[n].bias != ITB_NONE ) - out.Write("indtevcrd%d.%s += %s;\n", n, tevIndBiasField[bpmem.tevind[n].bias], tevIndBiasAdd[bpmem.tevind[n].fmt]); + // bias - TODO: Check if this needs to be this complicated.. + if (bpmem.tevind[n].bias == ITB_S || bpmem.tevind[n].bias == ITB_T || bpmem.tevind[n].bias == ITB_U) + out.Write("iindtevcrd%d.%s += int(%s);\n", n, tevIndBiasField[bpmem.tevind[n].bias], tevIndBiasAdd[bpmem.tevind[n].fmt]); + else if (bpmem.tevind[n].bias == ITB_ST || bpmem.tevind[n].bias == ITB_SU || bpmem.tevind[n].bias == ITB_TU) + out.Write("iindtevcrd%d.%s += int2(%s, %s);\n", n, tevIndBiasField[bpmem.tevind[n].bias], tevIndBiasAdd[bpmem.tevind[n].fmt], tevIndBiasAdd[bpmem.tevind[n].fmt]); + else if (bpmem.tevind[n].bias == ITB_STU) + out.Write("iindtevcrd%d.%s += int3(%s, %s, %s);\n", n, tevIndBiasField[bpmem.tevind[n].bias], tevIndBiasAdd[bpmem.tevind[n].fmt], tevIndBiasAdd[bpmem.tevind[n].fmt], tevIndBiasAdd[bpmem.tevind[n].fmt]); // multiply by offset matrix and scale if (bpmem.tevind[n].mid != 0) @@ -720,7 +723,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP { int mtxidx = 2*(bpmem.tevind[n].mid-1); out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx); - out.Write("float2 indtevtrans%d = float2(dot(" I_INDTEXMTX"[%d].xyz, indtevcrd%d), dot(" I_INDTEXMTX"[%d].xyz, indtevcrd%d));\n", + out.Write("float2 indtevtrans%d = float2(dot(" I_INDTEXMTX"[%d].xyz, float3(iindtevcrd%d)), dot(" I_INDTEXMTX"[%d].xyz, float3(iindtevcrd%d)));\n", n, mtxidx, n, mtxidx+1, n); } else if (bpmem.tevind[n].mid <= 7 && bHasTexCoord) @@ -728,14 +731,14 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP _assert_(bpmem.tevind[n].mid >= 5); int mtxidx = 2*(bpmem.tevind[n].mid-5); out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx); - out.Write("float2 indtevtrans%d = " I_INDTEXMTX"[%d].ww * uv%d.xy * indtevcrd%d.xx;\n", n, mtxidx, texcoord, n); + out.Write("float2 indtevtrans%d = " I_INDTEXMTX"[%d].ww * uv%d.xy * float3(iindtevcrd%d.xx);\n", n, mtxidx, texcoord, n); } else if (bpmem.tevind[n].mid <= 11 && bHasTexCoord) { // t matrix _assert_(bpmem.tevind[n].mid >= 9); int mtxidx = 2*(bpmem.tevind[n].mid-9); out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx); - out.Write("float2 indtevtrans%d = " I_INDTEXMTX"[%d].ww * uv%d.xy * indtevcrd%d.yy;\n", n, mtxidx, texcoord, n); + out.Write("float2 indtevtrans%d = " I_INDTEXMTX"[%d].ww * uv%d.xy * float3(iindtevcrd%d.yy);\n", n, mtxidx, texcoord, n); } else { From ac1c77c39285523f75c53a62fdc68cc0305dd570 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Wed, 14 Aug 2013 16:16:06 +0200 Subject: [PATCH 09/45] PixelShaderGen: Use integer math for rasterizer color. --- Source/Core/VideoCommon/PixelShaderGen.cpp | 32 +++++++++++----------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index cca3bb2621..c1278bf83b 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -132,8 +132,8 @@ static const char *tevCInputTable[] = // CC "(c2.aaa)", // A2, "(textemp.rgb)", // TEXC, "(textemp.aaa)", // TEXA, - "(rastemp.rgb)", // RASC, - "(rastemp.aaa)", // RASA, + "(float3(irastemp.rgb)/255.0)", // RASC, + "(float3(irastemp.aaa)/255.0)", // RASA, "float3(1.0, 1.0, 1.0)", // ONE "float3(0.5, 0.5, 0.5)", // HALF "(konsttemp.rgb)", //"konsttemp.rgb", // KONST @@ -149,8 +149,8 @@ static const char *tevCInputTable[] = // CC "(cc2.aaa)", // A2, "(textemp.rgb)", // TEXC, "(textemp.aaa)", // TEXA, - "(crastemp.rgb)", // RASC, - "(crastemp.aaa)", // RASA, + "(float3(icrastemp.rgb)/255.0)", // RASC, + "(float3(icrastemp.aaa)/255.0)", // RASA, "float3(1.0, 1.0, 1.0)", // ONE "float3(0.5, 0.5, 0.5)", // HALF "(ckonsttemp.rgb)", //"konsttemp.rgb", // KONST @@ -165,7 +165,7 @@ static const char *tevAInputTable[] = // CA "c1", // A1, "c2", // A2, "textemp", // TEXA, - "rastemp", // RASA, + "(float4(irastemp) / 255.0)", // RASA, "konsttemp", // KONST, (hw1 had quarter) "float4(0.0, 0.0, 0.0, 0.0)", // ZERO ///added extra values to map clamped values @@ -174,7 +174,7 @@ static const char *tevAInputTable[] = // CA "cc1", // A1, "cc2", // A2, "textemp", // TEXA, - "crastemp", // RASA, + "(float4(icrastemp) / 255.0)", // RASA, "ckonsttemp", // KONST, (hw1 had quarter) "float4(0.0, 0.0, 0.0, 0.0)", // ZERO "PADERROR5", "PADERROR6", "PADERROR7", "PADERROR8", @@ -183,14 +183,14 @@ static const char *tevAInputTable[] = // CA static const char *tevRasTable[] = { - "colors_0", - "colors_1", + "int4(round(colors_0 * 255.0))", + "int4(round(colors_1 * 255.0))", "ERROR13", //2 "ERROR14", //3 "ERROR15", //4 - "float4(alphabump,alphabump,alphabump,alphabump)", // use bump alpha - "(float4(alphabump,alphabump,alphabump,alphabump)*(255.0/248.0))", //normalized - "float4(0.0, 0.0, 0.0, 0.0)", // zero + "int4(1,1,1,1) * int(round(alphabump * 255.0))", // use bump alpha + "int4(1,1,1,1) * int(round(alphabump * 255.0)) * 255 / 248)", //normalized + "int4(0, 0, 0, 0)", // zero }; //static const char *tevTexFunc[] = { "tex2D", "texRECT" }; @@ -389,15 +389,15 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T out.Write(" ) {\n"); } - out.Write(" float4 c0 = " I_COLORS"[1], c1 = " I_COLORS"[2], c2 = " I_COLORS"[3], prev = float4(0.0, 0.0, 0.0, 0.0), textemp = float4(0.0, 0.0, 0.0, 0.0), rastemp = float4(0.0, 0.0, 0.0, 0.0), konsttemp = float4(0.0, 0.0, 0.0, 0.0);\n" - " int4 itextemp = int4(0, 0, 0, 0);\n" + out.Write(" float4 c0 = " I_COLORS"[1], c1 = " I_COLORS"[2], c2 = " I_COLORS"[3], prev = float4(0.0, 0.0, 0.0, 0.0), textemp = float4(0.0, 0.0, 0.0, 0.0), konsttemp = float4(0.0, 0.0, 0.0, 0.0);\n" + " int4 irastemp = int4(0, 0, 0, 0), icrastemp = int4(0, 0, 0, 0), itextemp = int4(0, 0, 0, 0);\n" " float3 comp16 = float3(1.0, 255.0, 0.0), comp24 = float3(1.0, 255.0, 255.0*255.0);\n" " float alphabump=0.0;\n" " float3 tevcoord=float3(0.0, 0.0, 0.0);\n" " float2 wrappedcoord=float2(0.0,0.0), tempcoord=float2(0.0,0.0);\n" " float4 cc0=float4(0.0,0.0,0.0,0.0), cc1=float4(0.0,0.0,0.0,0.0);\n" " float4 cc2=float4(0.0,0.0,0.0,0.0), cprev=float4(0.0,0.0,0.0,0.0);\n" - " float4 crastemp=float4(0.0,0.0,0.0,0.0),ckonsttemp=float4(0.0,0.0,0.0,0.0);\n\n"); + " float4 ckonsttemp=float4(0.0,0.0,0.0,0.0);\n\n"); if (ApiType == API_OPENGL) { @@ -798,8 +798,8 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP uid_data.stagehash[n].tevorders_colorchan = bpmem.tevorders[n / 2].getColorChan(n & 1); const char *rasswap = swapModeTable[bpmem.combiners[n].alphaC.rswap]; - out.Write("rastemp = %s.%s;\n", tevRasTable[bpmem.tevorders[n / 2].getColorChan(n & 1)], rasswap); - out.Write("crastemp = frac(rastemp * (255.0/256.0)) * (256.0/255.0);\n"); + out.Write("irastemp = %s.%s;\n", tevRasTable[bpmem.tevorders[n / 2].getColorChan(n & 1)], rasswap); + out.Write("icrastemp = irastemp & 0xFF;\n"); } uid_data.stagehash[n].tevorders_enable = bpmem.tevorders[n / 2].getEnable(n & 1); From e69ee6ae0a7d2647f4c4058d8b7006745422b5d3 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Wed, 14 Aug 2013 16:17:57 +0200 Subject: [PATCH 10/45] PixelShaderGen: Remove remaining floating point bits for texture color. --- Source/Core/VideoCommon/PixelShaderGen.cpp | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index c1278bf83b..fede7b6cb5 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -130,8 +130,8 @@ static const char *tevCInputTable[] = // CC "(c1.aaa)", // A1, "(c2.rgb)", // C2, "(c2.aaa)", // A2, - "(textemp.rgb)", // TEXC, - "(textemp.aaa)", // TEXA, + "(float3(itextemp.rgb)/255.0)", // TEXC, + "(float3(itextemp.aaa)/255.0)", // TEXA, "(float3(irastemp.rgb)/255.0)", // RASC, "(float3(irastemp.aaa)/255.0)", // RASA, "float3(1.0, 1.0, 1.0)", // ONE @@ -147,8 +147,8 @@ static const char *tevCInputTable[] = // CC "(cc1.aaa)", // A1, "(cc2.rgb)", // C2, "(cc2.aaa)", // A2, - "(textemp.rgb)", // TEXC, - "(textemp.aaa)", // TEXA, + "(float3(itextemp.rgb)/255.0)", // TEXC, + "(float3(itextemp.aaa)/255.0)", // TEXA, "(float3(icrastemp.rgb)/255.0)", // RASC, "(float3(icrastemp.aaa)/255.0)", // RASA, "float3(1.0, 1.0, 1.0)", // ONE @@ -164,7 +164,7 @@ static const char *tevAInputTable[] = // CA "c0", // A0, "c1", // A1, "c2", // A2, - "textemp", // TEXA, + "(float4(itextemp) / 255.0)", // TEXA, "(float4(irastemp) / 255.0)", // RASA, "konsttemp", // KONST, (hw1 had quarter) "float4(0.0, 0.0, 0.0, 0.0)", // ZERO @@ -173,7 +173,7 @@ static const char *tevAInputTable[] = // CA "cc0", // A0, "cc1", // A1, "cc2", // A2, - "textemp", // TEXA, + "(float4(itextemp) / 255.0)", // TEXA, "(float4(icrastemp) / 255.0)", // RASA, "ckonsttemp", // KONST, (hw1 had quarter) "float4(0.0, 0.0, 0.0, 0.0)", // ZERO @@ -389,7 +389,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T out.Write(" ) {\n"); } - out.Write(" float4 c0 = " I_COLORS"[1], c1 = " I_COLORS"[2], c2 = " I_COLORS"[3], prev = float4(0.0, 0.0, 0.0, 0.0), textemp = float4(0.0, 0.0, 0.0, 0.0), konsttemp = float4(0.0, 0.0, 0.0, 0.0);\n" + out.Write(" float4 c0 = " I_COLORS"[1], c1 = " I_COLORS"[2], c2 = " I_COLORS"[3], prev = float4(0.0, 0.0, 0.0, 0.0), konsttemp = float4(0.0, 0.0, 0.0, 0.0);\n" " int4 irastemp = int4(0, 0, 0, 0), icrastemp = int4(0, 0, 0, 0), itextemp = int4(0, 0, 0, 0);\n" " float3 comp16 = float3(1.0, 255.0, 0.0), comp24 = float3(1.0, 255.0, 255.0*255.0);\n" " float alphabump=0.0;\n" @@ -834,7 +834,6 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP { out.Write("itextemp = int4(255, 255, 255, 255);\n"); } - out.Write("textemp = float4(itextemp) / 255.0f;\n"); if (cc.a == TEVCOLORARG_KONST || cc.b == TEVCOLORARG_KONST || From 654442feb7970a5861998b8d6c3106259615e0b3 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Wed, 14 Aug 2013 16:25:39 +0200 Subject: [PATCH 11/45] PixelShaderGen: Use integer math for tev konst value. --- Source/Core/VideoCommon/PixelShaderGen.cpp | 130 ++++++++++----------- 1 file changed, 62 insertions(+), 68 deletions(-) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index fede7b6cb5..2ed4fe14c7 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -29,50 +29,50 @@ static const char *tevKSelTableC[] = // KCSEL { - "1.0,1.0,1.0", // 1 = 0x00 - "0.875,0.875,0.875", // 7_8 = 0x01 - "0.75,0.75,0.75", // 3_4 = 0x02 - "0.625,0.625,0.625", // 5_8 = 0x03 - "0.5,0.5,0.5", // 1_2 = 0x04 - "0.375,0.375,0.375", // 3_8 = 0x05 - "0.25,0.25,0.25", // 1_4 = 0x06 - "0.125,0.125,0.125", // 1_8 = 0x07 + "255,255,255", // 1 = 0x00 + "223,223,223", // 7_8 = 0x01 + "191,191,191", // 3_4 = 0x02 + "159,159,159", // 5_8 = 0x03 + "127,127,127", // 1_2 = 0x04 + "95,95,95", // 3_8 = 0x05 + "63,63,63", // 1_4 = 0x06 + "31,31,31", // 1_8 = 0x07 "ERROR1", // 0x08 "ERROR2", // 0x09 "ERROR3", // 0x0a "ERROR4", // 0x0b - I_KCOLORS"[0].rgb", // K0 = 0x0C - I_KCOLORS"[1].rgb", // K1 = 0x0D - I_KCOLORS"[2].rgb", // K2 = 0x0E - I_KCOLORS"[3].rgb", // K3 = 0x0F - I_KCOLORS"[0].rrr", // K0_R = 0x10 - I_KCOLORS"[1].rrr", // K1_R = 0x11 - I_KCOLORS"[2].rrr", // K2_R = 0x12 - I_KCOLORS"[3].rrr", // K3_R = 0x13 - I_KCOLORS"[0].ggg", // K0_G = 0x14 - I_KCOLORS"[1].ggg", // K1_G = 0x15 - I_KCOLORS"[2].ggg", // K2_G = 0x16 - I_KCOLORS"[3].ggg", // K3_G = 0x17 - I_KCOLORS"[0].bbb", // K0_B = 0x18 - I_KCOLORS"[1].bbb", // K1_B = 0x19 - I_KCOLORS"[2].bbb", // K2_B = 0x1A - I_KCOLORS"[3].bbb", // K3_B = 0x1B - I_KCOLORS"[0].aaa", // K0_A = 0x1C - I_KCOLORS"[1].aaa", // K1_A = 0x1D - I_KCOLORS"[2].aaa", // K2_A = 0x1E - I_KCOLORS"[3].aaa", // K3_A = 0x1F + "int3(round(" I_KCOLORS"[0].rgb * 255.0))", // K0 = 0x0C + "int3(round(" I_KCOLORS"[1].rgb * 255.0))", // K1 = 0x0D + "int3(round(" I_KCOLORS"[2].rgb * 255.0))", // K2 = 0x0E + "int3(round(" I_KCOLORS"[3].rgb * 255.0))", // K3 = 0x0F + "int3(round(" I_KCOLORS"[0].rrr * 255.0))", // K0_R = 0x10 + "int3(round(" I_KCOLORS"[1].rrr * 255.0))", // K1_R = 0x11 + "int3(round(" I_KCOLORS"[2].rrr * 255.0))", // K2_R = 0x12 + "int3(round(" I_KCOLORS"[3].rrr * 255.0))", // K3_R = 0x13 + "int3(round(" I_KCOLORS"[0].ggg * 255.0))", // K0_G = 0x14 + "int3(round(" I_KCOLORS"[1].ggg * 255.0))", // K1_G = 0x15 + "int3(round(" I_KCOLORS"[2].ggg * 255.0))", // K2_G = 0x16 + "int3(round(" I_KCOLORS"[3].ggg * 255.0))", // K3_G = 0x17 + "int3(round(" I_KCOLORS"[0].bbb * 255.0))", // K0_B = 0x18 + "int3(round(" I_KCOLORS"[1].bbb * 255.0))", // K1_B = 0x19 + "int3(round(" I_KCOLORS"[2].bbb * 255.0))", // K2_B = 0x1A + "int3(round(" I_KCOLORS"[3].bbb * 255.0))", // K3_B = 0x1B + "int3(round(" I_KCOLORS"[0].aaa * 255.0))", // K0_A = 0x1C + "int3(round(" I_KCOLORS"[1].aaa * 255.0))", // K1_A = 0x1D + "int3(round(" I_KCOLORS"[2].aaa * 255.0))", // K2_A = 0x1E + "int3(round(" I_KCOLORS"[3].aaa * 255.0))", // K3_A = 0x1F }; static const char *tevKSelTableA[] = // KASEL { - "1.0", // 1 = 0x00 - "0.875",// 7_8 = 0x01 - "0.75", // 3_4 = 0x02 - "0.625",// 5_8 = 0x03 - "0.5", // 1_2 = 0x04 - "0.375",// 3_8 = 0x05 - "0.25", // 1_4 = 0x06 - "0.125",// 1_8 = 0x07 + "255", // 1 = 0x00 + "223", // 7_8 = 0x01 + "191", // 3_4 = 0x02 + "159", // 5_8 = 0x03 + "127", // 1_2 = 0x04 + "95", // 3_8 = 0x05 + "63", // 1_4 = 0x06 + "31", // 1_8 = 0x07 "ERROR5", // 0x08 "ERROR6", // 0x09 "ERROR7", // 0x0a @@ -81,22 +81,22 @@ static const char *tevKSelTableA[] = // KASEL "ERROR10", // 0x0d "ERROR11", // 0x0e "ERROR12", // 0x0f - I_KCOLORS"[0].r", // K0_R = 0x10 - I_KCOLORS"[1].r", // K1_R = 0x11 - I_KCOLORS"[2].r", // K2_R = 0x12 - I_KCOLORS"[3].r", // K3_R = 0x13 - I_KCOLORS"[0].g", // K0_G = 0x14 - I_KCOLORS"[1].g", // K1_G = 0x15 - I_KCOLORS"[2].g", // K2_G = 0x16 - I_KCOLORS"[3].g", // K3_G = 0x17 - I_KCOLORS"[0].b", // K0_B = 0x18 - I_KCOLORS"[1].b", // K1_B = 0x19 - I_KCOLORS"[2].b", // K2_B = 0x1A - I_KCOLORS"[3].b", // K3_B = 0x1B - I_KCOLORS"[0].a", // K0_A = 0x1C - I_KCOLORS"[1].a", // K1_A = 0x1D - I_KCOLORS"[2].a", // K2_A = 0x1E - I_KCOLORS"[3].a", // K3_A = 0x1F + "int(round(" I_KCOLORS"[0].r * 255.0))", // K0_R = 0x10 + "int(round(" I_KCOLORS"[1].r * 255.0))", // K1_R = 0x11 + "int(round(" I_KCOLORS"[2].r * 255.0))", // K2_R = 0x12 + "int(round(" I_KCOLORS"[3].r * 255.0))", // K3_R = 0x13 + "int(round(" I_KCOLORS"[0].g * 255.0))", // K0_G = 0x14 + "int(round(" I_KCOLORS"[1].g * 255.0))", // K1_G = 0x15 + "int(round(" I_KCOLORS"[2].g * 255.0))", // K2_G = 0x16 + "int(round(" I_KCOLORS"[3].g * 255.0))", // K3_G = 0x17 + "int(round(" I_KCOLORS"[0].b * 255.0))", // K0_B = 0x18 + "int(round(" I_KCOLORS"[1].b * 255.0))", // K1_B = 0x19 + "int(round(" I_KCOLORS"[2].b * 255.0))", // K2_B = 0x1A + "int(round(" I_KCOLORS"[3].b * 255.0))", // K3_B = 0x1B + "int(round(" I_KCOLORS"[0].a * 255.0))", // K0_A = 0x1C + "int(round(" I_KCOLORS"[1].a * 255.0))", // K1_A = 0x1D + "int(round(" I_KCOLORS"[2].a * 255.0))", // K2_A = 0x1E + "int(round(" I_KCOLORS"[3].a * 255.0))", // K3_A = 0x1F }; static const char *tevScaleTable[] = // CS @@ -136,7 +136,7 @@ static const char *tevCInputTable[] = // CC "(float3(irastemp.aaa)/255.0)", // RASA, "float3(1.0, 1.0, 1.0)", // ONE "float3(0.5, 0.5, 0.5)", // HALF - "(konsttemp.rgb)", //"konsttemp.rgb", // KONST + "(float3(ikonsttemp.rgb)/255.0)", //"konsttemp.rgb", // KONST "float3(0.0, 0.0, 0.0)", // ZERO ///added extra values to map clamped values "(cprev.rgb)", // CPREV, @@ -153,7 +153,7 @@ static const char *tevCInputTable[] = // CC "(float3(icrastemp.aaa)/255.0)", // RASA, "float3(1.0, 1.0, 1.0)", // ONE "float3(0.5, 0.5, 0.5)", // HALF - "(ckonsttemp.rgb)", //"konsttemp.rgb", // KONST + "(float3(ickonsttemp.rgb)/255.0)", //"konsttemp.rgb", // KONST "float3(0.0, 0.0, 0.0)", // ZERO "PADERROR1", "PADERROR2", "PADERROR3", "PADERROR4" }; @@ -166,7 +166,7 @@ static const char *tevAInputTable[] = // CA "c2", // A2, "(float4(itextemp) / 255.0)", // TEXA, "(float4(irastemp) / 255.0)", // RASA, - "konsttemp", // KONST, (hw1 had quarter) + "(float4(ikonsttemp) / 255.0)", // KONST, (hw1 had quarter) "float4(0.0, 0.0, 0.0, 0.0)", // ZERO ///added extra values to map clamped values "cprev", // APREV, @@ -175,7 +175,7 @@ static const char *tevAInputTable[] = // CA "cc2", // A2, "(float4(itextemp) / 255.0)", // TEXA, "(float4(icrastemp) / 255.0)", // RASA, - "ckonsttemp", // KONST, (hw1 had quarter) + "(float4(ickonsttemp) / 255.0)", // KONST, (hw1 had quarter) "float4(0.0, 0.0, 0.0, 0.0)", // ZERO "PADERROR5", "PADERROR6", "PADERROR7", "PADERROR8", "PADERROR9", "PADERROR10", "PADERROR11", "PADERROR12", @@ -389,15 +389,15 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T out.Write(" ) {\n"); } - out.Write(" float4 c0 = " I_COLORS"[1], c1 = " I_COLORS"[2], c2 = " I_COLORS"[3], prev = float4(0.0, 0.0, 0.0, 0.0), konsttemp = float4(0.0, 0.0, 0.0, 0.0);\n" - " int4 irastemp = int4(0, 0, 0, 0), icrastemp = int4(0, 0, 0, 0), itextemp = int4(0, 0, 0, 0);\n" + out.Write(" float4 c0 = " I_COLORS"[1], c1 = " I_COLORS"[2], c2 = " I_COLORS"[3], prev = float4(0.0, 0.0, 0.0, 0.0);\n" + " int4 irastemp = int4(0, 0, 0, 0), itextemp = int4(0, 0, 0, 0), ikonsttemp = int4(0, 0, 0, 0);\n" " float3 comp16 = float3(1.0, 255.0, 0.0), comp24 = float3(1.0, 255.0, 255.0*255.0);\n" " float alphabump=0.0;\n" " float3 tevcoord=float3(0.0, 0.0, 0.0);\n" " float2 wrappedcoord=float2(0.0,0.0), tempcoord=float2(0.0,0.0);\n" " float4 cc0=float4(0.0,0.0,0.0,0.0), cc1=float4(0.0,0.0,0.0,0.0);\n" " float4 cc2=float4(0.0,0.0,0.0,0.0), cprev=float4(0.0,0.0,0.0,0.0);\n" - " float4 ckonsttemp=float4(0.0,0.0,0.0,0.0);\n\n"); + " int4 icrastemp = int4(0, 0, 0, 0), ickonsttemp = int4(0, 0, 0, 0);\n\n"); if (ApiType == API_OPENGL) { @@ -845,15 +845,9 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP int ka = bpmem.tevksel[n / 2].getKA(n & 1); uid_data.stagehash[n].tevksel_kc = kc; uid_data.stagehash[n].tevksel_ka = ka; - out.Write("konsttemp = float4(%s, %s);\n", tevKSelTableC[kc], tevKSelTableA[ka]); - if (kc > 7 || ka > 7) - { - out.Write("ckonsttemp = frac(konsttemp * (255.0/256.0)) * (256.0/255.0);\n"); - } - else - { - out.Write("ckonsttemp = konsttemp;\n"); - } + out.Write("ikonsttemp = int4(%s, %s);\n", tevKSelTableC[kc], tevKSelTableA[ka]); + out.Write("ickonsttemp = ikonsttemp & 0xFF;\n"); + if (kc > 7) out.SetConstantsUsed(C_KCOLORS+((kc-0xc)%4),C_KCOLORS+((kc-0xc)%4)); if (ka > 7) From aaa8e74a688997a69024a7c66cc9c0dd2f192354 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Wed, 14 Aug 2013 16:51:00 +0200 Subject: [PATCH 12/45] PixelShaderGen: Use integer math for tev outputs. --- Source/Core/VideoCommon/PixelShaderGen.cpp | 90 ++++++++++++---------- 1 file changed, 48 insertions(+), 42 deletions(-) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 2ed4fe14c7..3e49d567e7 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -122,14 +122,14 @@ static const char *tevOpTable[] = { // TEV static const char *tevCInputTable[] = // CC { - "(prev.rgb)", // CPREV, - "(prev.aaa)", // APREV, - "(c0.rgb)", // C0, - "(c0.aaa)", // A0, - "(c1.rgb)", // C1, - "(c1.aaa)", // A1, - "(c2.rgb)", // C2, - "(c2.aaa)", // A2, + "(float3(iprev.rgb)/255.0)", // CPREV, + "(float3(iprev.aaa)/255.0)", // APREV, + "(float3(ic0.rgb)/255.0)", // C0, + "(float3(ic0.aaa)/255.0)", // A0, + "(float3(ic1.rgb)/255.0)", // C1, + "(float3(ic1.aaa)/255.0)", // A1, + "(float3(ic2.rgb)/255.0)", // C2, + "(float3(ic2.aaa)/255.0)", // A2, "(float3(itextemp.rgb)/255.0)", // TEXC, "(float3(itextemp.aaa)/255.0)", // TEXA, "(float3(irastemp.rgb)/255.0)", // RASC, @@ -139,14 +139,14 @@ static const char *tevCInputTable[] = // CC "(float3(ikonsttemp.rgb)/255.0)", //"konsttemp.rgb", // KONST "float3(0.0, 0.0, 0.0)", // ZERO ///added extra values to map clamped values - "(cprev.rgb)", // CPREV, - "(cprev.aaa)", // APREV, - "(cc0.rgb)", // C0, - "(cc0.aaa)", // A0, - "(cc1.rgb)", // C1, - "(cc1.aaa)", // A1, - "(cc2.rgb)", // C2, - "(cc2.aaa)", // A2, + "(float3(icprev.rgb)/255.0)", // CPREV, + "(float3(icprev.aaa)/255.0)", // APREV, + "(float3(icc0.rgb)/255.0)", // C0, + "(float3(icc0.aaa)/255.0)", // A0, + "(float3(icc1.rgb)/255.0)", // C1, + "(float3(icc1.aaa)/255.0)", // A1, + "(float3(icc2.rgb)/255.0)", // C2, + "(float3(icc2.aaa)/255.0)", // A2, "(float3(itextemp.rgb)/255.0)", // TEXC, "(float3(itextemp.aaa)/255.0)", // TEXA, "(float3(icrastemp.rgb)/255.0)", // RASC, @@ -160,19 +160,19 @@ static const char *tevCInputTable[] = // CC static const char *tevAInputTable[] = // CA { - "prev", // APREV, - "c0", // A0, - "c1", // A1, - "c2", // A2, + "(float4(iprev)/255.0)", // APREV, + "(float4(ic0)/255.0)", // A0, + "(float4(ic1)/255.0)", // A1, + "(float4(ic2)/255.0)", // A2, "(float4(itextemp) / 255.0)", // TEXA, "(float4(irastemp) / 255.0)", // RASA, "(float4(ikonsttemp) / 255.0)", // KONST, (hw1 had quarter) "float4(0.0, 0.0, 0.0, 0.0)", // ZERO ///added extra values to map clamped values - "cprev", // APREV, - "cc0", // A0, - "cc1", // A1, - "cc2", // A2, + "(float4(icprev)/255.0)", // APREV, + "(float4(icc0)/255.0)", // A0, + "(float4(icc1)/255.0)", // A1, + "(float4(icc2)/255.0)", // A2, "(float4(itextemp) / 255.0)", // TEXA, "(float4(icrastemp) / 255.0)", // RASA, "(float4(ickonsttemp) / 255.0)", // KONST, (hw1 had quarter) @@ -195,8 +195,8 @@ static const char *tevRasTable[] = //static const char *tevTexFunc[] = { "tex2D", "texRECT" }; -static const char *tevCOutputTable[] = { "prev.rgb", "c0.rgb", "c1.rgb", "c2.rgb" }; -static const char *tevAOutputTable[] = { "prev.a", "c0.a", "c1.a", "c2.a" }; +static const char *tevCOutputTable[] = { "iprev.rgb", "ic0.rgb", "ic1.rgb", "ic2.rgb", "icprev.rgb", "icc0.rgb", "icc1.rgb", "icc2.rgb", }; +static const char *tevAOutputTable[] = { "iprev.a", "ic0.a", "ic1.a", "ic2.a", "icprev.a", "icc0.a", "icc1.a", "icc2.a" }; static const char *tevIndAlphaSel[] = {"", "x", "y", "z"}; static const char *tevIndAlphaMask[] = {"0xF8", "0xE0", "0xF0", "0xF8"}; static const char *tevIndBiasField[] = {"", "x", "y", "xy", "z", "xz", "yz", "xyz"}; // indexed by bias @@ -389,14 +389,14 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T out.Write(" ) {\n"); } - out.Write(" float4 c0 = " I_COLORS"[1], c1 = " I_COLORS"[2], c2 = " I_COLORS"[3], prev = float4(0.0, 0.0, 0.0, 0.0);\n" + out.Write(" int4 ic0 = int4(round(" I_COLORS"[1] * 255.0)), ic1 = int4(round(" I_COLORS"[2] * 255.0)), ic2 = int4(round(" I_COLORS"[3] * 255.0)), iprev = int4(0, 0, 0, 0);\n" " int4 irastemp = int4(0, 0, 0, 0), itextemp = int4(0, 0, 0, 0), ikonsttemp = int4(0, 0, 0, 0);\n" " float3 comp16 = float3(1.0, 255.0, 0.0), comp24 = float3(1.0, 255.0, 255.0*255.0);\n" " float alphabump=0.0;\n" " float3 tevcoord=float3(0.0, 0.0, 0.0);\n" " float2 wrappedcoord=float2(0.0,0.0), tempcoord=float2(0.0,0.0);\n" - " float4 cc0=float4(0.0,0.0,0.0,0.0), cc1=float4(0.0,0.0,0.0,0.0);\n" - " float4 cc2=float4(0.0,0.0,0.0,0.0), cprev=float4(0.0,0.0,0.0,0.0);\n" + " int4 icc0=int4(0, 0, 0, 0), icc1=int4(0, 0, 0, 0);\n" + " int4 icc2=int4(0, 0, 0, 0), icprev=int4(0, 0, 0, 0);\n" " int4 icrastemp = int4(0, 0, 0, 0), ickonsttemp = int4(0, 0, 0, 0);\n\n"); if (ApiType == API_OPENGL) @@ -532,17 +532,17 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T if (bpmem.combiners[numStages - 1].colorC.dest != 0) { bool retrieveFromAuxRegister = !RegisterStates[bpmem.combiners[numStages - 1].colorC.dest].ColorNeedOverflowControl && RegisterStates[bpmem.combiners[numStages - 1].colorC.dest].AuxStored; - out.Write("\tprev.rgb = %s%s;\n", retrieveFromAuxRegister ? "c" : "" , tevCOutputTable[bpmem.combiners[numStages - 1].colorC.dest]); + out.Write("\tiprev.rgb = %s;\n", tevCOutputTable[bpmem.combiners[numStages - 1].colorC.dest + (retrieveFromAuxRegister)?4:0]); RegisterStates[0].ColorNeedOverflowControl = RegisterStates[bpmem.combiners[numStages - 1].colorC.dest].ColorNeedOverflowControl; } if (bpmem.combiners[numStages - 1].alphaC.dest != 0) { bool retrieveFromAuxRegister = !RegisterStates[bpmem.combiners[numStages - 1].alphaC.dest].AlphaNeedOverflowControl && RegisterStates[bpmem.combiners[numStages - 1].alphaC.dest].AuxStored; - out.Write("\tprev.a = %s%s;\n", retrieveFromAuxRegister ? "c" : "" , tevAOutputTable[bpmem.combiners[numStages - 1].alphaC.dest]); + out.Write("\tiprev.a = %s;\n", tevAOutputTable[bpmem.combiners[numStages - 1].alphaC.dest + (retrieveFromAuxRegister)?4:0]); RegisterStates[0].AlphaNeedOverflowControl = RegisterStates[bpmem.combiners[numStages - 1].alphaC.dest].AlphaNeedOverflowControl; } } - out.Write("\tint4 iprev = int4(round(prev * 255.0)) & 0xFF;\n"); + out.Write("\tiprev = iprev & 0xFF;\n"); AlphaTest::TEST_RESULT Pretest = bpmem.alpha_test.TestResult(); uid_data.Pretest = Pretest; @@ -862,13 +862,13 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP { if (RegisterStates[0].AlphaNeedOverflowControl || RegisterStates[0].ColorNeedOverflowControl) { - out.Write("cprev = frac(prev * (255.0/256.0)) * (256.0/255.0);\n"); + out.Write("icprev = iprev & 0xFF;\n"); RegisterStates[0].AlphaNeedOverflowControl = false; RegisterStates[0].ColorNeedOverflowControl = false; } else { - out.Write("cprev = prev;\n"); + out.Write("icprev = iprev;\n"); } RegisterStates[0].AuxStored = true; } @@ -882,13 +882,13 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP out.SetConstantsUsed(C_COLORS+1,C_COLORS+1); if (RegisterStates[1].AlphaNeedOverflowControl || RegisterStates[1].ColorNeedOverflowControl) { - out.Write("cc0 = frac(c0 * (255.0/256.0)) * (256.0/255.0);\n"); + out.Write("icc0 = ic0 & 0xFF;\n"); RegisterStates[1].AlphaNeedOverflowControl = false; RegisterStates[1].ColorNeedOverflowControl = false; } else { - out.Write("cc0 = c0;\n"); + out.Write("icc0 = ic0;\n"); } RegisterStates[1].AuxStored = true; } @@ -902,13 +902,13 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP out.SetConstantsUsed(C_COLORS+2,C_COLORS+2); if (RegisterStates[2].AlphaNeedOverflowControl || RegisterStates[2].ColorNeedOverflowControl) { - out.Write("cc1 = frac(c1 * (255.0/256.0)) * (256.0/255.0);\n"); + out.Write("icc1 = ic1 & 0xFF;\n"); RegisterStates[2].AlphaNeedOverflowControl = false; RegisterStates[2].ColorNeedOverflowControl = false; } else { - out.Write("cc1 = c1;\n"); + out.Write("icc1 = ic1;\n"); } RegisterStates[2].AuxStored = true; } @@ -922,13 +922,13 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP out.SetConstantsUsed(C_COLORS+3,C_COLORS+3); if (RegisterStates[3].AlphaNeedOverflowControl || RegisterStates[3].ColorNeedOverflowControl) { - out.Write("cc2 = frac(c2 * (255.0/256.0)) * (256.0/255.0);\n"); + out.Write("icc2 = ic2 & 0xFF;\n"); RegisterStates[3].AlphaNeedOverflowControl = false; RegisterStates[3].ColorNeedOverflowControl = false; } else { - out.Write("cc2 = c2;\n"); + out.Write("icc2 = ic2;\n"); } RegisterStates[3].AuxStored = true; } @@ -957,6 +957,8 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP else out.Write("%s = ", tevCOutputTable[cc.dest]); + out.Write("int3("); + // combine the color channel if (cc.bias != TevBias_COMPARE) // if not compare { @@ -994,8 +996,9 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP tevCInputTable[cc.b + 16], tevCInputTable[cc.c + 16]); } + out.Write(" * 255.0f)"); if (cc.clamp) - out.Write(", 0.0, 1.0)"); + out.Write(", 0, 255)"); out.Write(";\n"); RegisterStates[ac.dest].AlphaNeedOverflowControl = (ac.clamp == 0); @@ -1007,6 +1010,8 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP else out.Write("%s = ", tevAOutputTable[ac.dest]); + out.Write("int("); + if (ac.bias != TevBias_COMPARE) // if not compare { //normal alpha combiner goes here @@ -1043,8 +1048,9 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP tevAInputTable[ac.b + 8], tevAInputTable[ac.c + 8]); } + out.Write(" * 255.0f)"); if (ac.clamp) - out.Write(", 0.0, 1.0)"); + out.Write(", 0, 255)"); out.Write(";\n\n"); out.Write("// TEV done\n"); } From a11ae69cb0312dd2e6d7b32ffc9d8292b0d50611 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Wed, 14 Aug 2013 17:26:15 +0200 Subject: [PATCH 13/45] PixelShaderGen: Use integer math for TEV combiners. --- Source/Core/VideoCommon/PixelShaderGen.cpp | 198 +++++++++------------ 1 file changed, 86 insertions(+), 112 deletions(-) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 3e49d567e7..515c239a18 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -101,17 +101,17 @@ static const char *tevKSelTableA[] = // KASEL static const char *tevScaleTable[] = // CS { - "1.0", // SCALE_1 - "2.0", // SCALE_2 - "4.0", // SCALE_4 - "0.5", // DIVIDE_2 + "*1", // SCALE_1 + "*2", // SCALE_2 + "*4", // SCALE_4 + "/ 2", // DIVIDE_2 }; static const char *tevBiasTable[] = // TB { "", // ZERO, - "+0.5", // ADDHALF, - "-0.5", // SUBHALF, + "+ 128", // ADDHALF, + "- 128", // SUBHALF, "", }; @@ -122,61 +122,61 @@ static const char *tevOpTable[] = { // TEV static const char *tevCInputTable[] = // CC { - "(float3(iprev.rgb)/255.0)", // CPREV, - "(float3(iprev.aaa)/255.0)", // APREV, - "(float3(ic0.rgb)/255.0)", // C0, - "(float3(ic0.aaa)/255.0)", // A0, - "(float3(ic1.rgb)/255.0)", // C1, - "(float3(ic1.aaa)/255.0)", // A1, - "(float3(ic2.rgb)/255.0)", // C2, - "(float3(ic2.aaa)/255.0)", // A2, - "(float3(itextemp.rgb)/255.0)", // TEXC, - "(float3(itextemp.aaa)/255.0)", // TEXA, - "(float3(irastemp.rgb)/255.0)", // RASC, - "(float3(irastemp.aaa)/255.0)", // RASA, - "float3(1.0, 1.0, 1.0)", // ONE - "float3(0.5, 0.5, 0.5)", // HALF - "(float3(ikonsttemp.rgb)/255.0)", //"konsttemp.rgb", // KONST - "float3(0.0, 0.0, 0.0)", // ZERO + "iprev.rgb", // CPREV, + "iprev.aaa", // APREV, + "ic0.rgb", // C0, + "ic0.aaa", // A0, + "ic1.rgb", // C1, + "ic1.aaa", // A1, + "ic2.rgb", // C2, + "ic2.aaa", // A2, + "itextemp.rgb", // TEXC, + "itextemp.aaa", // TEXA, + "irastemp.rgb", // RASC, + "irastemp.aaa", // RASA, + "int3(255,255,255)", // ONE + "int3(127,127,127)", // HALF + "ikonsttemp.rgb", // KONST + "int3(0,0,0)", // ZERO ///added extra values to map clamped values - "(float3(icprev.rgb)/255.0)", // CPREV, - "(float3(icprev.aaa)/255.0)", // APREV, - "(float3(icc0.rgb)/255.0)", // C0, - "(float3(icc0.aaa)/255.0)", // A0, - "(float3(icc1.rgb)/255.0)", // C1, - "(float3(icc1.aaa)/255.0)", // A1, - "(float3(icc2.rgb)/255.0)", // C2, - "(float3(icc2.aaa)/255.0)", // A2, - "(float3(itextemp.rgb)/255.0)", // TEXC, - "(float3(itextemp.aaa)/255.0)", // TEXA, - "(float3(icrastemp.rgb)/255.0)", // RASC, - "(float3(icrastemp.aaa)/255.0)", // RASA, - "float3(1.0, 1.0, 1.0)", // ONE - "float3(0.5, 0.5, 0.5)", // HALF - "(float3(ickonsttemp.rgb)/255.0)", //"konsttemp.rgb", // KONST - "float3(0.0, 0.0, 0.0)", // ZERO + "icprev.rgb", // CPREV, + "icprev.aaa", // APREV, + "icc0.rgb", // C0, + "icc0.aaa", // A0, + "icc1.rgb", // C1, + "icc1.aaa", // A1, + "icc2.rgb", // C2, + "icc2.aaa", // A2, + "itextemp.rgb", // TEXC, + "itextemp.aaa", // TEXA, + "icrastemp.rgb", // RASC, + "icrastemp.aaa", // RASA, + "int3(255,255,255)", // ONE + "int3(127,127,127)", // HALF + "ickonsttemp.rgb", // KONST + "int3(0,0,0)", // ZERO "PADERROR1", "PADERROR2", "PADERROR3", "PADERROR4" }; static const char *tevAInputTable[] = // CA { - "(float4(iprev)/255.0)", // APREV, - "(float4(ic0)/255.0)", // A0, - "(float4(ic1)/255.0)", // A1, - "(float4(ic2)/255.0)", // A2, - "(float4(itextemp) / 255.0)", // TEXA, - "(float4(irastemp) / 255.0)", // RASA, - "(float4(ikonsttemp) / 255.0)", // KONST, (hw1 had quarter) - "float4(0.0, 0.0, 0.0, 0.0)", // ZERO + "iprev", // APREV, + "ic0", // A0, + "ic1", // A1, + "ic2", // A2, + "itextemp", // TEXA, + "irastemp", // RASA, + "ikonsttemp", // KONST, (hw1 had quarter) + "int4(0,0,0,0)", // ZERO ///added extra values to map clamped values - "(float4(icprev)/255.0)", // APREV, - "(float4(icc0)/255.0)", // A0, - "(float4(icc1)/255.0)", // A1, - "(float4(icc2)/255.0)", // A2, - "(float4(itextemp) / 255.0)", // TEXA, - "(float4(icrastemp) / 255.0)", // RASA, - "(float4(ickonsttemp) / 255.0)", // KONST, (hw1 had quarter) - "float4(0.0, 0.0, 0.0, 0.0)", // ZERO + "icprev", // APREV, + "icc0", // A0, + "icc1", // A1, + "icc2", // A2, + "itextemp", // TEXA, + "icrastemp", // RASA, + "ickonsttemp", // KONST, (hw1 had quarter) + "int4(0,0,0,0)", // ZERO "PADERROR5", "PADERROR6", "PADERROR7", "PADERROR8", "PADERROR9", "PADERROR10", "PADERROR11", "PADERROR12", }; @@ -391,7 +391,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T out.Write(" int4 ic0 = int4(round(" I_COLORS"[1] * 255.0)), ic1 = int4(round(" I_COLORS"[2] * 255.0)), ic2 = int4(round(" I_COLORS"[3] * 255.0)), iprev = int4(0, 0, 0, 0);\n" " int4 irastemp = int4(0, 0, 0, 0), itextemp = int4(0, 0, 0, 0), ikonsttemp = int4(0, 0, 0, 0);\n" - " float3 comp16 = float3(1.0, 255.0, 0.0), comp24 = float3(1.0, 255.0, 255.0*255.0);\n" + " int3 comp16 = int3(1, 256, 0), comp24 = int3(1, 256, 256*256);\n" " float alphabump=0.0;\n" " float3 tevcoord=float3(0.0, 0.0, 0.0);\n" " float2 wrappedcoord=float2(0.0,0.0), tempcoord=float2(0.0,0.0);\n" @@ -647,14 +647,14 @@ static const char *TEVCMPColorOPTable[16] = "float3(0.0, 0.0, 0.0)",//5 "float3(0.0, 0.0, 0.0)",//6 "float3(0.0, 0.0, 0.0)",//7 - " %s + ((%s.r >= %s.r + (0.25/255.0)) ? %s : float3(0.0, 0.0, 0.0))",//#define TEVCMP_R8_GT 8 - " %s + ((abs(%s.r - %s.r) < (0.5/255.0)) ? %s : float3(0.0, 0.0, 0.0))",//#define TEVCMP_R8_EQ 9 - " %s + (( dot(%s.rgb, comp16) >= (dot(%s.rgb, comp16) + (0.25/255.0))) ? %s : float3(0.0, 0.0, 0.0))",//#define TEVCMP_GR16_GT 10 - " %s + (abs(dot(%s.rgb, comp16) - dot(%s.rgb, comp16)) < (0.5/255.0) ? %s : float3(0.0, 0.0, 0.0))",//#define TEVCMP_GR16_EQ 11 - " %s + (( dot(%s.rgb, comp24) >= (dot(%s.rgb, comp24) + (0.25/255.0))) ? %s : float3(0.0, 0.0, 0.0))",//#define TEVCMP_BGR24_GT 12 - " %s + (abs(dot(%s.rgb, comp24) - dot(%s.rgb, comp24)) < (0.5/255.0) ? %s : float3(0.0, 0.0, 0.0))",//#define TEVCMP_BGR24_EQ 13 - " %s + (max(sign(%s.rgb - %s.rgb - (0.25/255.0)), float3(0.0, 0.0, 0.0)) * %s)",//#define TEVCMP_RGB8_GT 14 - " %s + ((float3(1.0, 1.0, 1.0) - max(sign(abs(%s.rgb - %s.rgb) - (0.5/255.0)), float3(0.0, 0.0, 0.0))) * %s)"//#define TEVCMP_RGB8_EQ 15 + " %s + ((%s.r > %s.r) ? %s : int3(0,0,0))",//#define TEVCMP_R8_GT 8 + " %s + ((%s.r == %s.r) ? %s : int3(0,0,0))",//#define TEVCMP_R8_EQ 9 + " %s + ((idot(%s.rgb, comp16) > idot(%s.rgb, comp16)) ? %s : int3(0,0,0))",//#define TEVCMP_GR16_GT 10 + " %s + ((idot(%s.rgb, comp16) == idot(%s.rgb, comp16)) ? %s : int3(0,0,0))",//#define TEVCMP_GR16_EQ 11 + " %s + ((idot(%s.rgb, comp24) > idot(%s.rgb, comp24)) ? %s : int3(0,0,0))",//#define TEVCMP_BGR24_GT 12 + " %s + ((idot(%s.rgb, comp24) == idot(%s.rgb, comp24)) ? %s : int3(0,0,0))",//#define TEVCMP_BGR24_EQ 13 + " %s + int3(max(sign(int3(%s.rgb) - int3(%s.rgb)), int3(0,0,0)) * %s)",//#define TEVCMP_RGB8_GT 14 + " %s + int3((int3(255,255,255) - max(sign(abs(int3(%s.rgb) - int3(%s.rgb))), int3(0,0,0))) * %s)"//#define TEVCMP_RGB8_EQ 15 }; //table with the alpha compare operations @@ -668,14 +668,14 @@ static const char *TEVCMPAlphaOPTable[16] = "0.0",//5 "0.0",//6 "0.0",//7 - " %s.a + ((%s.r >= (%s.r + (0.25/255.0))) ? %s.a : 0.0)",//#define TEVCMP_R8_GT 8 - " %s.a + (abs(%s.r - %s.r) < (0.5/255.0) ? %s.a : 0.0)",//#define TEVCMP_R8_EQ 9 - " %s.a + ((dot(%s.rgb, comp16) >= (dot(%s.rgb, comp16) + (0.25/255.0))) ? %s.a : 0.0)",//#define TEVCMP_GR16_GT 10 - " %s.a + (abs(dot(%s.rgb, comp16) - dot(%s.rgb, comp16)) < (0.5/255.0) ? %s.a : 0.0)",//#define TEVCMP_GR16_EQ 11 - " %s.a + ((dot(%s.rgb, comp24) >= (dot(%s.rgb, comp24) + (0.25/255.0))) ? %s.a : 0.0)",//#define TEVCMP_BGR24_GT 12 - " %s.a + (abs(dot(%s.rgb, comp24) - dot(%s.rgb, comp24)) < (0.5/255.0) ? %s.a : 0.0)",//#define TEVCMP_BGR24_EQ 13 - " %s.a + ((%s.a >= (%s.a + (0.25/255.0))) ? %s.a : 0.0)",//#define TEVCMP_A8_GT 14 - " %s.a + (abs(%s.a - %s.a) < (0.5/255.0) ? %s.a : 0.0)"//#define TEVCMP_A8_EQ 15 + " %s.a + ((%s.r > %s.r) ? %s.a : 0)",//#define TEVCMP_R8_GT 8 + " %s.a + ((%s.r == %s.r) ? %s.a : 0)",//#define TEVCMP_R8_EQ 9 + " %s.a + ((idot(%s.rgb, comp16) > idot(%s.rgb, comp16)) ? %s.a : 0)",//#define TEVCMP_GR16_GT 10 + " %s.a + ((idot(%s.rgb, comp16) == idot(%s.rgb, comp16)) ? %s.a : 0)",//#define TEVCMP_GR16_EQ 11 + " %s.a + ((idot(%s.rgb, comp24) > idot(%s.rgb, comp24)) ? %s.a : 0)",//#define TEVCMP_BGR24_GT 12 + " %s.a + ((idot(%s.rgb, comp24) == idot(%s.rgb, comp24)) ? %s.a : 0)",//#define TEVCMP_BGR24_EQ 13 + " %s.a + ((%s.a > %s.a) ? %s.a : 0)",//#define TEVCMP_A8_GT 14 + " %s.a + ((%s.a == %s.a) ? %s.a : 0)" //#define TEVCMP_A8_EQ 15 }; template @@ -955,37 +955,24 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP if (cc.clamp) out.Write("%s = clamp(", tevCOutputTable[cc.dest]); else - out.Write("%s = ", tevCOutputTable[cc.dest]); - - out.Write("int3("); + out.Write("%s = (", tevCOutputTable[cc.dest]); // combine the color channel if (cc.bias != TevBias_COMPARE) // if not compare { //normal color combiner goes here if (cc.shift > TEVSCALE_1) - out.Write("%s*(", tevScaleTable[cc.shift]); + out.Write("("); if (!(cc.d == TEVCOLORARG_ZERO && cc.op == TEVOP_ADD)) - out.Write("%s%s", tevCInputTable[cc.d], tevOpTable[cc.op]); + out.Write("%s %s ", tevCInputTable[cc.d], tevOpTable[cc.op]); - if (cc.a == cc.b) - out.Write("%s", tevCInputTable[cc.a + 16]); - else if (cc.c == TEVCOLORARG_ZERO) - out.Write("%s", tevCInputTable[cc.a + 16]); - else if (cc.c == TEVCOLORARG_ONE) - out.Write("%s", tevCInputTable[cc.b + 16]); - else if (cc.a == TEVCOLORARG_ZERO) - out.Write("%s*%s", tevCInputTable[cc.b + 16], tevCInputTable[cc.c + 16]); - else if (cc.b == TEVCOLORARG_ZERO) - out.Write("%s*(float3(1.0, 1.0, 1.0)-%s)", tevCInputTable[cc.a + 16], tevCInputTable[cc.c + 16]); - else - out.Write("lerp(%s, %s, %s)", tevCInputTable[cc.a + 16], tevCInputTable[cc.b + 16], tevCInputTable[cc.c + 16]); + out.Write("(%s * %s + %s * (int3(255,255,255) - %s)) / 255", tevCInputTable[cc.a + 16], tevCInputTable[cc.c + 16], tevCInputTable[cc.b + 16], tevCInputTable[cc.c + 16]); out.Write("%s", tevBiasTable[cc.bias]); if (cc.shift > TEVSCALE_1) - out.Write(")"); + out.Write(")%s", tevScaleTable[cc.shift]); } else { @@ -996,10 +983,9 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP tevCInputTable[cc.b + 16], tevCInputTable[cc.c + 16]); } - out.Write(" * 255.0f)"); if (cc.clamp) - out.Write(", 0, 255)"); - out.Write(";\n"); + out.Write(", int3(0,0,0), int3(255,255,255)"); + out.Write(");\n"); RegisterStates[ac.dest].AlphaNeedOverflowControl = (ac.clamp == 0); RegisterStates[ac.dest].AuxStored = false; @@ -1008,34 +994,23 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP if (ac.clamp) out.Write("%s = clamp(", tevAOutputTable[ac.dest]); else - out.Write("%s = ", tevAOutputTable[ac.dest]); - - out.Write("int("); + out.Write("%s = (", tevAOutputTable[ac.dest]); if (ac.bias != TevBias_COMPARE) // if not compare { //normal alpha combiner goes here - if (ac.shift > TEVSCALE_1) - out.Write("%s*(", tevScaleTable[ac.shift]); + if (ac.shift > 0) + out.Write("("); if (!(ac.d == TEVALPHAARG_ZERO && ac.op == TEVOP_ADD)) - out.Write("%s.a%s", tevAInputTable[ac.d], tevOpTable[ac.op]); + out.Write("%s.a %s ", tevAInputTable[ac.d], tevOpTable[ac.op]); - if (ac.a == ac.b) - out.Write("%s.a", tevAInputTable[ac.a + 8]); - else if (ac.c == TEVALPHAARG_ZERO) - out.Write("%s.a", tevAInputTable[ac.a + 8]); - else if (ac.a == TEVALPHAARG_ZERO) - out.Write("%s.a*%s.a", tevAInputTable[ac.b + 8], tevAInputTable[ac.c + 8]); - else if (ac.b == TEVALPHAARG_ZERO) - out.Write("%s.a*(1.0-%s.a)", tevAInputTable[ac.a + 8], tevAInputTable[ac.c + 8]); - else - out.Write("lerp(%s.a, %s.a, %s.a)", tevAInputTable[ac.a + 8], tevAInputTable[ac.b + 8], tevAInputTable[ac.c + 8]); + out.Write("(%s.a * %s.a + %s.a * (255 - %s.a)) / 255", tevAInputTable[ac.a + 8], tevAInputTable[ac.c + 8], tevAInputTable[ac.b + 8], tevAInputTable[ac.c + 8]); out.Write("%s",tevBiasTable[ac.bias]); if (ac.shift>0) - out.Write(")"); + out.Write(")%s", tevScaleTable[ac.shift]); } else @@ -1048,10 +1023,9 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP tevAInputTable[ac.b + 8], tevAInputTable[ac.c + 8]); } - out.Write(" * 255.0f)"); if (ac.clamp) - out.Write(", 0, 255)"); - out.Write(";\n\n"); + out.Write(", 0, 255"); + out.Write(");\n\n"); out.Write("// TEV done\n"); } From cb1514e0821e21ed3e05802e4161a31a1b7d82ba Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Wed, 14 Aug 2013 18:24:47 +0200 Subject: [PATCH 14/45] PixelShaderGen: Remove superfluous registerstate stuff. Also, made alphabump an integer. --- Source/Core/VideoCommon/PixelShaderGen.cpp | 187 +++++---------------- 1 file changed, 39 insertions(+), 148 deletions(-) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 515c239a18..12b42afebc 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -188,8 +188,8 @@ static const char *tevRasTable[] = "ERROR13", //2 "ERROR14", //3 "ERROR15", //4 - "int4(1,1,1,1) * int(round(alphabump * 255.0))", // use bump alpha - "int4(1,1,1,1) * int(round(alphabump * 255.0)) * 255 / 248)", //normalized + "(int4(1,1,1,1) * alphabump)", // use bump alpha + "(int4(1,1,1,1) * (alphabump | (alphabump >> 5)))", //normalized "int4(0, 0, 0, 0)", // zero }; @@ -205,16 +205,9 @@ static const char *tevIndWrapStart[] = {"0.0", "256.0", "128.0", "64.0", "32.0" static const char *tevIndFmtScale[] = {"255.0", "31.0", "15.0", "7.0" }; static const char *tevIndFmtMask[] = {"0xFF", "0x1F", "0x0F", "0x07" }; -struct RegisterState -{ - bool ColorNeedOverflowControl; - bool AlphaNeedOverflowControl; - bool AuxStored; -}; - static char text[16384]; -template static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE ApiType, RegisterState RegisterStates[4], const char swapModeTable[4][5]); +template static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE ApiType, const char swapModeTable[4][5]); template static inline void SampleTexture(T& out, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType); template static inline void WriteAlphaTest(T& out, pixel_shader_uid_data& uid_data, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode, bool per_pixel_depth); template static inline void WriteFog(T& out, pixel_shader_uid_data& uid_data); @@ -392,7 +385,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T out.Write(" int4 ic0 = int4(round(" I_COLORS"[1] * 255.0)), ic1 = int4(round(" I_COLORS"[2] * 255.0)), ic2 = int4(round(" I_COLORS"[3] * 255.0)), iprev = int4(0, 0, 0, 0);\n" " int4 irastemp = int4(0, 0, 0, 0), itextemp = int4(0, 0, 0, 0), ikonsttemp = int4(0, 0, 0, 0);\n" " int3 comp16 = int3(1, 256, 0), comp24 = int3(1, 256, 256*256);\n" - " float alphabump=0.0;\n" + " int alphabump=0;\n" " float3 tevcoord=float3(0.0, 0.0, 0.0);\n" " float2 wrappedcoord=float2(0.0,0.0), tempcoord=float2(0.0,0.0);\n" " int4 icc0=int4(0, 0, 0, 0), icc1=int4(0, 0, 0, 0);\n" @@ -494,17 +487,6 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T } } - RegisterState RegisterStates[4]; - RegisterStates[0].AlphaNeedOverflowControl = false; - RegisterStates[0].ColorNeedOverflowControl = false; - RegisterStates[0].AuxStored = false; - for (int i = 1; i < 4; i++) - { - RegisterStates[i].AlphaNeedOverflowControl = true; - RegisterStates[i].ColorNeedOverflowControl = true; - RegisterStates[i].AuxStored = false; - } - // Uid fields for BuildSwapModeTable are set in WriteStage char swapModeTable[4][5]; const char* swapColors = "rgba"; @@ -518,7 +500,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T } for (unsigned int i = 0; i < numStages; i++) - WriteStage(out, uid_data, i, ApiType, RegisterStates, swapModeTable); // build the equation for this stage + WriteStage(out, uid_data, i, ApiType, swapModeTable); // build the equation for this stage #define MY_STRUCT_OFFSET(str,elem) ((u32)((u64)&(str).elem-(u64)&(str))) bool enable_pl = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting; @@ -531,15 +513,11 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T // regardless of the used destination register if (bpmem.combiners[numStages - 1].colorC.dest != 0) { - bool retrieveFromAuxRegister = !RegisterStates[bpmem.combiners[numStages - 1].colorC.dest].ColorNeedOverflowControl && RegisterStates[bpmem.combiners[numStages - 1].colorC.dest].AuxStored; - out.Write("\tiprev.rgb = %s;\n", tevCOutputTable[bpmem.combiners[numStages - 1].colorC.dest + (retrieveFromAuxRegister)?4:0]); - RegisterStates[0].ColorNeedOverflowControl = RegisterStates[bpmem.combiners[numStages - 1].colorC.dest].ColorNeedOverflowControl; + out.Write("\tiprev.rgb = %s;\n", tevCOutputTable[bpmem.combiners[numStages - 1].colorC.dest]); } if (bpmem.combiners[numStages - 1].alphaC.dest != 0) { - bool retrieveFromAuxRegister = !RegisterStates[bpmem.combiners[numStages - 1].alphaC.dest].AlphaNeedOverflowControl && RegisterStates[bpmem.combiners[numStages - 1].alphaC.dest].AuxStored; - out.Write("\tiprev.a = %s;\n", tevAOutputTable[bpmem.combiners[numStages - 1].alphaC.dest + (retrieveFromAuxRegister)?4:0]); - RegisterStates[0].AlphaNeedOverflowControl = RegisterStates[bpmem.combiners[numStages - 1].alphaC.dest].AlphaNeedOverflowControl; + out.Write("\tiprev.a = %s;\n", tevAOutputTable[bpmem.combiners[numStages - 1].alphaC.dest]); } } out.Write("\tiprev = iprev & 0xFF;\n"); @@ -647,14 +625,14 @@ static const char *TEVCMPColorOPTable[16] = "float3(0.0, 0.0, 0.0)",//5 "float3(0.0, 0.0, 0.0)",//6 "float3(0.0, 0.0, 0.0)",//7 - " %s + ((%s.r > %s.r) ? %s : int3(0,0,0))",//#define TEVCMP_R8_GT 8 - " %s + ((%s.r == %s.r) ? %s : int3(0,0,0))",//#define TEVCMP_R8_EQ 9 - " %s + ((idot(%s.rgb, comp16) > idot(%s.rgb, comp16)) ? %s : int3(0,0,0))",//#define TEVCMP_GR16_GT 10 - " %s + ((idot(%s.rgb, comp16) == idot(%s.rgb, comp16)) ? %s : int3(0,0,0))",//#define TEVCMP_GR16_EQ 11 - " %s + ((idot(%s.rgb, comp24) > idot(%s.rgb, comp24)) ? %s : int3(0,0,0))",//#define TEVCMP_BGR24_GT 12 - " %s + ((idot(%s.rgb, comp24) == idot(%s.rgb, comp24)) ? %s : int3(0,0,0))",//#define TEVCMP_BGR24_EQ 13 - " %s + int3(max(sign(int3(%s.rgb) - int3(%s.rgb)), int3(0,0,0)) * %s)",//#define TEVCMP_RGB8_GT 14 - " %s + int3((int3(255,255,255) - max(sign(abs(int3(%s.rgb) - int3(%s.rgb))), int3(0,0,0))) * %s)"//#define TEVCMP_RGB8_EQ 15 + " %s + (((%s.r&0xFF) > %s.r) ? (%s&0xFF): int3(0,0,0))",//#define TEVCMP_R8_GT 8 + " %s + (((%s.r&0xFF) == %s.r) ? (%s&0xFF): int3(0,0,0))",//#define TEVCMP_R8_EQ 9 + " %s + ((idot((%s.rgb&0xFF), comp16) > idot((%s.rgb&0xFF), comp16)) ? (%s&0xFF): int3(0,0,0))",//#define TEVCMP_GR16_GT 10 + " %s + ((idot((%s.rgb&0xFF), comp16) == idot((%s.rgb&0xFF), comp16)) ? (%s&0xFF): int3(0,0,0))",//#define TEVCMP_GR16_EQ 11 + " %s + ((idot((%s.rgb&0xFF), comp24) > idot((%s.rgb&0xFF), comp24)) ? (%s&0xFF): int3(0,0,0))",//#define TEVCMP_BGR24_GT 12 + " %s + ((idot((%s.rgb&0xFF), comp24) == idot((%s.rgb&0xFF), comp24)) ? (%s&0xFF): int3(0,0,0))",//#define TEVCMP_BGR24_EQ 13 + " %s + int3(max(sign(int3((%s.rgb&0xFF)) - int3((%s.rgb&0xFF))), int3(0,0,0)) * (%s&0xFF))",//#define TEVCMP_RGB8_GT 14 + " %s + int3((int3(255,255,255) - max(sign(abs(int3((%s.rgb&0xFF)) - int3((%s.rgb&0xFF)))), int3(0,0,0))) * (%s&0xFF))"//#define TEVCMP_RGB8_EQ 15 }; //table with the alpha compare operations @@ -668,18 +646,18 @@ static const char *TEVCMPAlphaOPTable[16] = "0.0",//5 "0.0",//6 "0.0",//7 - " %s.a + ((%s.r > %s.r) ? %s.a : 0)",//#define TEVCMP_R8_GT 8 - " %s.a + ((%s.r == %s.r) ? %s.a : 0)",//#define TEVCMP_R8_EQ 9 - " %s.a + ((idot(%s.rgb, comp16) > idot(%s.rgb, comp16)) ? %s.a : 0)",//#define TEVCMP_GR16_GT 10 - " %s.a + ((idot(%s.rgb, comp16) == idot(%s.rgb, comp16)) ? %s.a : 0)",//#define TEVCMP_GR16_EQ 11 - " %s.a + ((idot(%s.rgb, comp24) > idot(%s.rgb, comp24)) ? %s.a : 0)",//#define TEVCMP_BGR24_GT 12 - " %s.a + ((idot(%s.rgb, comp24) == idot(%s.rgb, comp24)) ? %s.a : 0)",//#define TEVCMP_BGR24_EQ 13 - " %s.a + ((%s.a > %s.a) ? %s.a : 0)",//#define TEVCMP_A8_GT 14 - " %s.a + ((%s.a == %s.a) ? %s.a : 0)" //#define TEVCMP_A8_EQ 15 + " %s.a + (((%s.r&0xFF) > (%s.r&0xFF)) ? (%s.a&0xFF) : 0)",//#define TEVCMP_R8_GT 8 + " %s.a + (((%s.r&0xFF) == (%s.r&0xFF)) ? (%s.a&0xFF) : 0)",//#define TEVCMP_R8_EQ 9 + " %s.a + ((idot((%s.rgb&0xFF), comp16) > idot((%s.rgb&0xFF), comp16)) ? (%s.a&0xFF) : 0)",//#define TEVCMP_GR16_GT 10 + " %s.a + ((idot((%s.rgb&0xFF), comp16) == idot((%s.rgb&0xFF), comp16)) ? (%s.a&0xFF) : 0)",//#define TEVCMP_GR16_EQ 11 + " %s.a + ((idot((%s.rgb&0xFF), comp24) > idot((%s.rgb&0xFF), comp24)) ? (%s.a&0xFF) : 0)",//#define TEVCMP_BGR24_GT 12 + " %s.a + ((idot((%s.rgb&0xFF), comp24) == idot((%s.rgb&0xFF), comp24)) ? (%s.a&0xFF) : 0)",//#define TEVCMP_BGR24_EQ 13 + " %s.a + (((%s.a&0xFF) > (%s.a&0xFF)) ? (%s.a&0xFF) : 0)",//#define TEVCMP_A8_GT 14 + " %s.a + (((%s.a&0xFF) == (%s.a&0xFF)) ? (%s.a&0xFF) : 0)" //#define TEVCMP_A8_EQ 15 }; template -static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE ApiType, RegisterState RegisterStates[4], const char swapModeTable[4][5]) +static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE ApiType, const char swapModeTable[4][5]) { int texcoord = bpmem.tevorders[n/2].getTexCoord(n&1); bool bHasTexCoord = (u32)texcoord < bpmem.genMode.numtexgens; @@ -700,7 +678,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP // perform the indirect op on the incoming regular coordinates using iindtex%d as the offset coords if (bpmem.tevind[n].bs != ITBA_OFF) { - out.Write("alphabump = float(iindtex%d.%s & %s) / 255.0;\n", + out.Write("alphabump = iindtex%d.%s & %s;\n", bpmem.tevind[n].bt, tevIndAlphaSel[bpmem.tevind[n].bs], tevIndAlphaMask[bpmem.tevind[n].fmt]); @@ -799,7 +777,6 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP const char *rasswap = swapModeTable[bpmem.combiners[n].alphaC.rswap]; out.Write("irastemp = %s.%s;\n", tevRasTable[bpmem.tevorders[n / 2].getColorChan(n & 1)], rasswap); - out.Write("icrastemp = irastemp & 0xFF;\n"); } uid_data.stagehash[n].tevorders_enable = bpmem.tevorders[n / 2].getEnable(n & 1); @@ -846,7 +823,6 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP uid_data.stagehash[n].tevksel_kc = kc; uid_data.stagehash[n].tevksel_ka = ka; out.Write("ikonsttemp = int4(%s, %s);\n", tevKSelTableC[kc], tevKSelTableA[ka]); - out.Write("ickonsttemp = ikonsttemp & 0xFF;\n"); if (kc > 7) out.SetConstantsUsed(C_KCOLORS+((kc-0xc)%4),C_KCOLORS+((kc-0xc)%4)); @@ -854,88 +830,6 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP out.SetConstantsUsed(C_KCOLORS+((ka-0xc)%4),C_KCOLORS+((ka-0xc)%4)); } - if (cc.a == TEVCOLORARG_CPREV || cc.a == TEVCOLORARG_APREV || - cc.b == TEVCOLORARG_CPREV || cc.b == TEVCOLORARG_APREV || - cc.c == TEVCOLORARG_CPREV || cc.c == TEVCOLORARG_APREV || - ac.a == TEVALPHAARG_APREV || ac.b == TEVALPHAARG_APREV || - ac.c == TEVALPHAARG_APREV) - { - if (RegisterStates[0].AlphaNeedOverflowControl || RegisterStates[0].ColorNeedOverflowControl) - { - out.Write("icprev = iprev & 0xFF;\n"); - RegisterStates[0].AlphaNeedOverflowControl = false; - RegisterStates[0].ColorNeedOverflowControl = false; - } - else - { - out.Write("icprev = iprev;\n"); - } - RegisterStates[0].AuxStored = true; - } - - if (cc.a == TEVCOLORARG_C0 || cc.a == TEVCOLORARG_A0 || - cc.b == TEVCOLORARG_C0 || cc.b == TEVCOLORARG_A0 || - cc.c == TEVCOLORARG_C0 || cc.c == TEVCOLORARG_A0 || - ac.a == TEVALPHAARG_A0 || ac.b == TEVALPHAARG_A0 || - ac.c == TEVALPHAARG_A0) - { - out.SetConstantsUsed(C_COLORS+1,C_COLORS+1); - if (RegisterStates[1].AlphaNeedOverflowControl || RegisterStates[1].ColorNeedOverflowControl) - { - out.Write("icc0 = ic0 & 0xFF;\n"); - RegisterStates[1].AlphaNeedOverflowControl = false; - RegisterStates[1].ColorNeedOverflowControl = false; - } - else - { - out.Write("icc0 = ic0;\n"); - } - RegisterStates[1].AuxStored = true; - } - - if (cc.a == TEVCOLORARG_C1 || cc.a == TEVCOLORARG_A1 || - cc.b == TEVCOLORARG_C1 || cc.b == TEVCOLORARG_A1 || - cc.c == TEVCOLORARG_C1 || cc.c == TEVCOLORARG_A1 || - ac.a == TEVALPHAARG_A1 || ac.b == TEVALPHAARG_A1 || - ac.c == TEVALPHAARG_A1) - { - out.SetConstantsUsed(C_COLORS+2,C_COLORS+2); - if (RegisterStates[2].AlphaNeedOverflowControl || RegisterStates[2].ColorNeedOverflowControl) - { - out.Write("icc1 = ic1 & 0xFF;\n"); - RegisterStates[2].AlphaNeedOverflowControl = false; - RegisterStates[2].ColorNeedOverflowControl = false; - } - else - { - out.Write("icc1 = ic1;\n"); - } - RegisterStates[2].AuxStored = true; - } - - if (cc.a == TEVCOLORARG_C2 || cc.a == TEVCOLORARG_A2 || - cc.b == TEVCOLORARG_C2 || cc.b == TEVCOLORARG_A2 || - cc.c == TEVCOLORARG_C2 || cc.c == TEVCOLORARG_A2 || - ac.a == TEVALPHAARG_A2 || ac.b == TEVALPHAARG_A2 || - ac.c == TEVALPHAARG_A2) - { - out.SetConstantsUsed(C_COLORS+3,C_COLORS+3); - if (RegisterStates[3].AlphaNeedOverflowControl || RegisterStates[3].ColorNeedOverflowControl) - { - out.Write("icc2 = ic2 & 0xFF;\n"); - RegisterStates[3].AlphaNeedOverflowControl = false; - RegisterStates[3].ColorNeedOverflowControl = false; - } - else - { - out.Write("icc2 = ic2;\n"); - } - RegisterStates[3].AuxStored = true; - } - - RegisterStates[cc.dest].ColorNeedOverflowControl = (cc.clamp == 0); - RegisterStates[cc.dest].AuxStored = false; - if (cc.d == TEVCOLORARG_C0 || cc.d == TEVCOLORARG_A0 || ac.d == TEVALPHAARG_A0) out.SetConstantsUsed(C_COLORS+1,C_COLORS+1); @@ -955,7 +849,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP if (cc.clamp) out.Write("%s = clamp(", tevCOutputTable[cc.dest]); else - out.Write("%s = (", tevCOutputTable[cc.dest]); + out.Write("%s = ", tevCOutputTable[cc.dest]); // combine the color channel if (cc.bias != TevBias_COMPARE) // if not compare @@ -967,7 +861,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP if (!(cc.d == TEVCOLORARG_ZERO && cc.op == TEVOP_ADD)) out.Write("%s %s ", tevCInputTable[cc.d], tevOpTable[cc.op]); - out.Write("(%s * %s + %s * (int3(255,255,255) - %s)) / 255", tevCInputTable[cc.a + 16], tevCInputTable[cc.c + 16], tevCInputTable[cc.b + 16], tevCInputTable[cc.c + 16]); + out.Write("((%s&0xFF) * (int3(255,255,255) - (%s&0xFF)) + (%s&0xFF) * (%s&0xFF)) / 255", tevCInputTable[cc.a], tevCInputTable[cc.c], tevCInputTable[cc.b], tevCInputTable[cc.c]); out.Write("%s", tevBiasTable[cc.bias]); @@ -979,22 +873,19 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP int cmp = (cc.shift<<1)|cc.op|8; // comparemode stored here out.Write(TEVCMPColorOPTable[cmp],//lookup the function from the op table tevCInputTable[cc.d], - tevCInputTable[cc.a + 16], - tevCInputTable[cc.b + 16], - tevCInputTable[cc.c + 16]); + tevCInputTable[cc.a], + tevCInputTable[cc.b], + tevCInputTable[cc.c]); } if (cc.clamp) - out.Write(", int3(0,0,0), int3(255,255,255)"); - out.Write(");\n"); - - RegisterStates[ac.dest].AlphaNeedOverflowControl = (ac.clamp == 0); - RegisterStates[ac.dest].AuxStored = false; + out.Write(", int3(0,0,0), int3(255,255,255))"); + out.Write(";\n"); out.Write("// alpha combine\n"); if (ac.clamp) out.Write("%s = clamp(", tevAOutputTable[ac.dest]); else - out.Write("%s = (", tevAOutputTable[ac.dest]); + out.Write("%s = ", tevAOutputTable[ac.dest]); if (ac.bias != TevBias_COMPARE) // if not compare { @@ -1005,7 +896,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP if (!(ac.d == TEVALPHAARG_ZERO && ac.op == TEVOP_ADD)) out.Write("%s.a %s ", tevAInputTable[ac.d], tevOpTable[ac.op]); - out.Write("(%s.a * %s.a + %s.a * (255 - %s.a)) / 255", tevAInputTable[ac.a + 8], tevAInputTable[ac.c + 8], tevAInputTable[ac.b + 8], tevAInputTable[ac.c + 8]); + out.Write("((%s.a&0xFF) * (255 - (%s.a&0xFF)) + (%s.a&0xFF) * (%s.a&0xFF)) / 255", tevAInputTable[ac.a], tevAInputTable[ac.c], tevAInputTable[ac.b], tevAInputTable[ac.c]); out.Write("%s",tevBiasTable[ac.bias]); @@ -1019,13 +910,13 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP int cmp = (ac.shift<<1)|ac.op|8; // comparemode stored here out.Write(TEVCMPAlphaOPTable[cmp], tevAInputTable[ac.d], - tevAInputTable[ac.a + 8], - tevAInputTable[ac.b + 8], - tevAInputTable[ac.c + 8]); + tevAInputTable[ac.a], + tevAInputTable[ac.b], + tevAInputTable[ac.c]); } if (ac.clamp) - out.Write(", 0, 255"); - out.Write(");\n\n"); + out.Write(", 0, 255)"); + out.Write(";\n\n"); out.Write("// TEV done\n"); } From e7a42d884f22c1242234a0fe5a6de7cca3acbc06 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Wed, 14 Aug 2013 18:54:43 +0200 Subject: [PATCH 15/45] PixelShaderGen: prev should be initialized to the proper value; tev output needs to be clamped between -1024 and 1023. --- Source/Core/VideoCommon/PixelShaderGen.cpp | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 12b42afebc..02a5e7831c 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -382,7 +382,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T out.Write(" ) {\n"); } - out.Write(" int4 ic0 = int4(round(" I_COLORS"[1] * 255.0)), ic1 = int4(round(" I_COLORS"[2] * 255.0)), ic2 = int4(round(" I_COLORS"[3] * 255.0)), iprev = int4(0, 0, 0, 0);\n" + out.Write(" int4 ic0 = int4(round(" I_COLORS"[1] * 255.0)), ic1 = int4(round(" I_COLORS"[2] * 255.0)), ic2 = int4(round(" I_COLORS"[3] * 255.0)), iprev = int4(round(" I_COLORS"[0] * 255.0));\n" " int4 irastemp = int4(0, 0, 0, 0), itextemp = int4(0, 0, 0, 0), ikonsttemp = int4(0, 0, 0, 0);\n" " int3 comp16 = int3(1, 256, 0), comp24 = int3(1, 256, 256*256);\n" " int alphabump=0;\n" @@ -846,10 +846,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP out.SetConstantsUsed(C_COLORS+ac.dest, C_COLORS+ac.dest); out.Write("// color combine\n"); - if (cc.clamp) - out.Write("%s = clamp(", tevCOutputTable[cc.dest]); - else - out.Write("%s = ", tevCOutputTable[cc.dest]); + out.Write("%s = clamp(", tevCOutputTable[cc.dest]); // combine the color channel if (cc.bias != TevBias_COMPARE) // if not compare @@ -879,13 +876,12 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP } if (cc.clamp) out.Write(", int3(0,0,0), int3(255,255,255))"); + else + out.Write(", int3(-1024,-1024,-1024), int3(1023,1023,1023))"); out.Write(";\n"); - out.Write("// alpha combine\n"); - if (ac.clamp) - out.Write("%s = clamp(", tevAOutputTable[ac.dest]); - else - out.Write("%s = ", tevAOutputTable[ac.dest]); + out.Write("\t// alpha combine\n"); + out.Write("\t%s = clamp(", tevAOutputTable[ac.dest]); if (ac.bias != TevBias_COMPARE) // if not compare { @@ -916,6 +912,9 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP } if (ac.clamp) out.Write(", 0, 255)"); + else + out.Write(", -1024, 1023)"); + out.Write(";\n\n"); out.Write("// TEV done\n"); } From cff952c397833873055e42a85b76ff8a621a0792 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Thu, 12 Sep 2013 13:55:38 +0200 Subject: [PATCH 16/45] PixelShaderGen: Use integer math for indirect tev stage texcoord calculation. --- Source/Core/VideoBackends/Software/Tev.cpp | 2 +- Source/Core/VideoCommon/PixelShaderGen.cpp | 61 +++++++++++-------- .../Core/VideoCommon/PixelShaderManager.cpp | 13 ++-- 3 files changed, 41 insertions(+), 35 deletions(-) diff --git a/Source/Core/VideoBackends/Software/Tev.cpp b/Source/Core/VideoBackends/Software/Tev.cpp index ad21496e88..0c80e04c93 100644 --- a/Source/Core/VideoBackends/Software/Tev.cpp +++ b/Source/Core/VideoBackends/Software/Tev.cpp @@ -478,7 +478,7 @@ void Tev::Indirect(unsigned int stageNum, s32 s, s32 t) case ITBA_OFF: AlphaBump = 0; break; - case ITBA_S: + case ITBA_S: AlphaBump = indmap[TextureSampler::ALP_SMP]; break; case ITBA_T: diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 02a5e7831c..53a466c1b9 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -193,17 +193,8 @@ static const char *tevRasTable[] = "int4(0, 0, 0, 0)", // zero }; -//static const char *tevTexFunc[] = { "tex2D", "texRECT" }; - static const char *tevCOutputTable[] = { "iprev.rgb", "ic0.rgb", "ic1.rgb", "ic2.rgb", "icprev.rgb", "icc0.rgb", "icc1.rgb", "icc2.rgb", }; static const char *tevAOutputTable[] = { "iprev.a", "ic0.a", "ic1.a", "ic2.a", "icprev.a", "icc0.a", "icc1.a", "icc2.a" }; -static const char *tevIndAlphaSel[] = {"", "x", "y", "z"}; -static const char *tevIndAlphaMask[] = {"0xF8", "0xE0", "0xF0", "0xF8"}; -static const char *tevIndBiasField[] = {"", "x", "y", "xy", "z", "xz", "yz", "xyz"}; // indexed by bias -static const char *tevIndBiasAdd[] = {"-128", "1", "1", "1" }; // indexed by fmt -static const char *tevIndWrapStart[] = {"0.0", "256.0", "128.0", "64.0", "32.0", "16.0", "0.001" }; -static const char *tevIndFmtScale[] = {"255.0", "31.0", "15.0", "7.0" }; -static const char *tevIndFmtMask[] = {"0xFF", "0x1F", "0x0F", "0x07" }; static char text[16384]; @@ -386,8 +377,8 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T " int4 irastemp = int4(0, 0, 0, 0), itextemp = int4(0, 0, 0, 0), ikonsttemp = int4(0, 0, 0, 0);\n" " int3 comp16 = int3(1, 256, 0), comp24 = int3(1, 256, 256*256);\n" " int alphabump=0;\n" - " float3 tevcoord=float3(0.0, 0.0, 0.0);\n" - " float2 wrappedcoord=float2(0.0,0.0), tempcoord=float2(0.0,0.0);\n" + " int3 tevcoord=int3(0, 0, 0);\n" + " int2 wrappedcoord=int2(0,0); float2 tempcoord=float2(0.0,0.0);\n" " int4 icc0=int4(0, 0, 0, 0), icc1=int4(0, 0, 0, 0);\n" " int4 icc2=int4(0, 0, 0, 0), icprev=int4(0, 0, 0, 0);\n" " int4 icrastemp = int4(0, 0, 0, 0), ickonsttemp = int4(0, 0, 0, 0);\n\n"); @@ -678,15 +669,25 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP // perform the indirect op on the incoming regular coordinates using iindtex%d as the offset coords if (bpmem.tevind[n].bs != ITBA_OFF) { + const char *tevIndAlphaSel[] = {"", "x", "y", "z"}; + const char *tevIndAlphaMask[] = {"0xF8", "0xE0", "0xF0", "0xF8"}; out.Write("alphabump = iindtex%d.%s & %s;\n", bpmem.tevind[n].bt, tevIndAlphaSel[bpmem.tevind[n].bs], tevIndAlphaMask[bpmem.tevind[n].fmt]); } + else + { + // TODO: Should we reset alphabump to 0 here? + } + // format + const char *tevIndFmtMask[] = {"0xFF", "0x1F", "0x0F", "0x07" }; out.Write("int3 iindtevcrd%d = iindtex%d & %s;\n", n, bpmem.tevind[n].bt, tevIndFmtMask[bpmem.tevind[n].fmt]); // bias - TODO: Check if this needs to be this complicated.. + const char *tevIndBiasField[] = {"", "x", "y", "xy", "z", "xz", "yz", "xyz"}; // indexed by bias + const char *tevIndBiasAdd[] = {"-128", "1", "1", "1" }; // indexed by fmt if (bpmem.tevind[n].bias == ITB_S || bpmem.tevind[n].bias == ITB_T || bpmem.tevind[n].bias == ITB_U) out.Write("iindtevcrd%d.%s += int(%s);\n", n, tevIndBiasField[bpmem.tevind[n].bias], tevIndBiasAdd[bpmem.tevind[n].fmt]); else if (bpmem.tevind[n].bias == ITB_ST || bpmem.tevind[n].bias == ITB_SU || bpmem.tevind[n].bias == ITB_TU) @@ -694,14 +695,16 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP else if (bpmem.tevind[n].bias == ITB_STU) out.Write("iindtevcrd%d.%s += int3(%s, %s, %s);\n", n, tevIndBiasField[bpmem.tevind[n].bias], tevIndBiasAdd[bpmem.tevind[n].fmt], tevIndBiasAdd[bpmem.tevind[n].fmt], tevIndBiasAdd[bpmem.tevind[n].fmt]); - // multiply by offset matrix and scale + // multiply by offset matrix and scale - calculations are likely to overflow badly, + // yet it works out since we only care about the lower 23 bits (+1 sign bit) of the result if (bpmem.tevind[n].mid != 0) { if (bpmem.tevind[n].mid <= 3) { int mtxidx = 2*(bpmem.tevind[n].mid-1); out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx); - out.Write("float2 indtevtrans%d = float2(dot(" I_INDTEXMTX"[%d].xyz, float3(iindtevcrd%d)), dot(" I_INDTEXMTX"[%d].xyz, float3(iindtevcrd%d)));\n", + + out.Write("int2 indtevtrans%d = int2(round(dot(" I_INDTEXMTX"[%d].xyz, float3(iindtevcrd%d)), dot(" I_INDTEXMTX"[%d].xyz, float3(iindtevcrd%d))));\n", n, mtxidx, n, mtxidx+1, n); } else if (bpmem.tevind[n].mid <= 7 && bHasTexCoord) @@ -709,49 +712,53 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP _assert_(bpmem.tevind[n].mid >= 5); int mtxidx = 2*(bpmem.tevind[n].mid-5); out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx); - out.Write("float2 indtevtrans%d = " I_INDTEXMTX"[%d].ww * uv%d.xy * float3(iindtevcrd%d.xx);\n", n, mtxidx, texcoord, n); + out.Write("int2 indtevtrans%d = int2(round(" I_INDTEXMTX"[%d].ww * uv%d.xy * float3(iindtevcrd%d.xx)));\n", n, mtxidx, texcoord, n); } else if (bpmem.tevind[n].mid <= 11 && bHasTexCoord) { // t matrix _assert_(bpmem.tevind[n].mid >= 9); int mtxidx = 2*(bpmem.tevind[n].mid-9); out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx); - out.Write("float2 indtevtrans%d = " I_INDTEXMTX"[%d].ww * uv%d.xy * float3(iindtevcrd%d.yy);\n", n, mtxidx, texcoord, n); + out.Write("int2 indtevtrans%d = int2(round(" I_INDTEXMTX"[%d].ww * uv%d.xy * float3(iindtevcrd%d.yy)));\n", n, mtxidx, texcoord, n); } else { - out.Write("float2 indtevtrans%d = float2(0.0, 0.0);\n", n); + out.Write("int2 indtevtrans%d = int2(0, 0);\n", n); } } else { - out.Write("float2 indtevtrans%d = float2(0.0, 0.0);\n", n); + out.Write("int2 indtevtrans%d = int2(0, 0);\n", n); } // --------- // Wrapping // --------- + const char *tevIndWrapStart[] = {"0", "(256<<7)", "(128<<7)", "(64<<7)", "(32<<7)", "(16<<7)", "1" }; // wrap S if (bpmem.tevind[n].sw == ITW_OFF) - out.Write("wrappedcoord.x = uv%d.x;\n", texcoord); + out.Write("wrappedcoord.x = int(round(uv%d.x*256.0));\n", texcoord); else if (bpmem.tevind[n].sw == ITW_0) - out.Write("wrappedcoord.x = 0.0;\n"); + out.Write("wrappedcoord.x = 0;\n"); else - out.Write("wrappedcoord.x = fmod( uv%d.x, %s );\n", texcoord, tevIndWrapStart[bpmem.tevind[n].sw]); + out.Write("wrappedcoord.x = int(round(uv%d.x*256.0)) %% %s;\n", texcoord, tevIndWrapStart[bpmem.tevind[n].sw]); // wrap T if (bpmem.tevind[n].tw == ITW_OFF) - out.Write("wrappedcoord.y = uv%d.y;\n", texcoord); + out.Write("wrappedcoord.y = int(round(uv%d.y*256.0));\n", texcoord); else if (bpmem.tevind[n].tw == ITW_0) - out.Write("wrappedcoord.y = 0.0;\n"); + out.Write("wrappedcoord.y = 0;\n"); else - out.Write("wrappedcoord.y = fmod( uv%d.y, %s );\n", texcoord, tevIndWrapStart[bpmem.tevind[n].tw]); + out.Write("wrappedcoord.y = int(round(uv%d.y*256.0)) %% %s;\n", texcoord, tevIndWrapStart[bpmem.tevind[n].tw]); if (bpmem.tevind[n].fb_addprev) // add previous tevcoord out.Write("tevcoord.xy += wrappedcoord + indtevtrans%d;\n", n); else out.Write("tevcoord.xy = wrappedcoord + indtevtrans%d;\n", n); + + // Emulate s24 overflows + out.Write("tevcoord.xy = (tevcoord.xy << 8) >> 8;\n"); } TevStageCombiner::ColorCombiner &cc = bpmem.combiners[n].colorC; @@ -782,13 +789,14 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP uid_data.stagehash[n].tevorders_enable = bpmem.tevorders[n / 2].getEnable(n & 1); if (bpmem.tevorders[n/2].getEnable(n&1)) { + int texmap = bpmem.tevorders[n/2].getTexMap(n&1); if (!bHasIndStage) { // calc tevcord if (bHasTexCoord) - out.Write("tevcoord.xy = uv%d.xy;\n", texcoord); + out.Write("tevcoord.xy = int2(round(uv%d.xy*256.0));\n", texcoord); else - out.Write("tevcoord.xy = float2(0.0, 0.0);\n"); + out.Write("tevcoord.xy = int2(0, 0);\n"); } const int i = bpmem.combiners[n].alphaC.tswap; @@ -801,11 +809,10 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP uid_data.stagehash[n].tevorders_texmap= bpmem.tevorders[n/2].getTexMap(n&1); const char *texswap = swapModeTable[bpmem.combiners[n].alphaC.tswap]; - int texmap = bpmem.tevorders[n/2].getTexMap(n&1); uid_data.SetTevindrefTexmap(i, texmap); out.Write("itextemp = "); - SampleTexture(out, "tevcoord", texswap, texmap, ApiType); + SampleTexture(out, "(float2(tevcoord.xy)/256.0)", texswap, texmap, ApiType); } else { diff --git a/Source/Core/VideoCommon/PixelShaderManager.cpp b/Source/Core/VideoCommon/PixelShaderManager.cpp index 9ed7ff2a76..9173396fac 100644 --- a/Source/Core/VideoCommon/PixelShaderManager.cpp +++ b/Source/Core/VideoCommon/PixelShaderManager.cpp @@ -212,25 +212,24 @@ void PixelShaderManager::SetIndMatrixChanged(int matrixidx) int scale = ((u32)bpmem.indmtx[matrixidx].col0.s0 << 0) | ((u32)bpmem.indmtx[matrixidx].col1.s1 << 2) | ((u32)bpmem.indmtx[matrixidx].col2.s2 << 4); - float fscale = powf(2.0f, (float)(scale - 17)) / 1024.0f; + float fscale = powf(2.0f, (float)(scale - 17)) / 8.0f; // xyz - static matrix - // TODO w - dynamic matrix scale / 256...... somehow / 4 works better - // rev 2972 - now using / 256.... verify that this works + // w - dynamic matrix scale / 128 constants.indtexmtx[2*matrixidx][0] = bpmem.indmtx[matrixidx].col0.ma * fscale; constants.indtexmtx[2*matrixidx][1] = bpmem.indmtx[matrixidx].col1.mc * fscale; constants.indtexmtx[2*matrixidx][2] = bpmem.indmtx[matrixidx].col2.me * fscale; - constants.indtexmtx[2*matrixidx][3] = fscale * 4.0f; + constants.indtexmtx[2*matrixidx][3] = fscale / 128.0f; constants.indtexmtx[2*matrixidx+1][0] = bpmem.indmtx[matrixidx].col0.mb * fscale; constants.indtexmtx[2*matrixidx+1][1] = bpmem.indmtx[matrixidx].col1.md * fscale; constants.indtexmtx[2*matrixidx+1][2] = bpmem.indmtx[matrixidx].col2.mf * fscale; - constants.indtexmtx[2*matrixidx+1][3] = fscale * 4.0f; + constants.indtexmtx[2*matrixidx+1][3] = fscale / 128.0f; dirty = true; PRIM_LOG("indmtx%d: scale=%f, mat=(%f %f %f; %f %f %f)\n", - matrixidx, 1024.0f*fscale, + matrixidx, fscale, bpmem.indmtx[matrixidx].col0.ma * fscale, bpmem.indmtx[matrixidx].col1.mc * fscale, bpmem.indmtx[matrixidx].col2.me * fscale, - bpmem.indmtx[matrixidx].col0.mb * fscale, bpmem.indmtx[matrixidx].col1.md * fscale, bpmem.indmtx[matrixidx].col2.mf * fscale); + bpmem.indmtx[matrixidx].col0.mb * fscale, bpmem.indmtx[matrixidx].col1.md * fscale, bpmem.indmtx[matrixidx].col2.mf * fscale); } From 21eb482a6eeed901378c7224e11dd5d73e07a59a Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Tue, 8 Oct 2013 00:56:03 +0200 Subject: [PATCH 17/45] PixelShaderGen: Write constants in decimal instead of hexadecimal where appropriate. --- Source/Core/VideoCommon/PixelShaderGen.cpp | 42 +++++++++++----------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 53a466c1b9..a1c06548ed 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -511,7 +511,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T out.Write("\tiprev.a = %s;\n", tevAOutputTable[bpmem.combiners[numStages - 1].alphaC.dest]); } } - out.Write("\tiprev = iprev & 0xFF;\n"); + out.Write("\tiprev = iprev & 255;\n"); AlphaTest::TEST_RESULT Pretest = bpmem.alpha_test.TestResult(); uid_data.Pretest = Pretest; @@ -616,14 +616,14 @@ static const char *TEVCMPColorOPTable[16] = "float3(0.0, 0.0, 0.0)",//5 "float3(0.0, 0.0, 0.0)",//6 "float3(0.0, 0.0, 0.0)",//7 - " %s + (((%s.r&0xFF) > %s.r) ? (%s&0xFF): int3(0,0,0))",//#define TEVCMP_R8_GT 8 - " %s + (((%s.r&0xFF) == %s.r) ? (%s&0xFF): int3(0,0,0))",//#define TEVCMP_R8_EQ 9 - " %s + ((idot((%s.rgb&0xFF), comp16) > idot((%s.rgb&0xFF), comp16)) ? (%s&0xFF): int3(0,0,0))",//#define TEVCMP_GR16_GT 10 - " %s + ((idot((%s.rgb&0xFF), comp16) == idot((%s.rgb&0xFF), comp16)) ? (%s&0xFF): int3(0,0,0))",//#define TEVCMP_GR16_EQ 11 - " %s + ((idot((%s.rgb&0xFF), comp24) > idot((%s.rgb&0xFF), comp24)) ? (%s&0xFF): int3(0,0,0))",//#define TEVCMP_BGR24_GT 12 - " %s + ((idot((%s.rgb&0xFF), comp24) == idot((%s.rgb&0xFF), comp24)) ? (%s&0xFF): int3(0,0,0))",//#define TEVCMP_BGR24_EQ 13 - " %s + int3(max(sign(int3((%s.rgb&0xFF)) - int3((%s.rgb&0xFF))), int3(0,0,0)) * (%s&0xFF))",//#define TEVCMP_RGB8_GT 14 - " %s + int3((int3(255,255,255) - max(sign(abs(int3((%s.rgb&0xFF)) - int3((%s.rgb&0xFF)))), int3(0,0,0))) * (%s&0xFF))"//#define TEVCMP_RGB8_EQ 15 + " %s + (((%s.r&255) > %s.r) ? (%s&255): int3(0,0,0))",//#define TEVCMP_R8_GT 8 + " %s + (((%s.r&255) == %s.r) ? (%s&255): int3(0,0,0))",//#define TEVCMP_R8_EQ 9 + " %s + ((idot((%s.rgb&255), comp16) > idot((%s.rgb&255), comp16)) ? (%s&255): int3(0,0,0))",//#define TEVCMP_GR16_GT 10 + " %s + ((idot((%s.rgb&255), comp16) == idot((%s.rgb&255), comp16)) ? (%s&255): int3(0,0,0))",//#define TEVCMP_GR16_EQ 11 + " %s + ((idot((%s.rgb&255), comp24) > idot((%s.rgb&255), comp24)) ? (%s&255): int3(0,0,0))",//#define TEVCMP_BGR24_GT 12 + " %s + ((idot((%s.rgb&255), comp24) == idot((%s.rgb&255), comp24)) ? (%s&255): int3(0,0,0))",//#define TEVCMP_BGR24_EQ 13 + " %s + int3(max(sign(int3((%s.rgb&255)) - int3((%s.rgb&255))), int3(0,0,0)) * (%s&255))",//#define TEVCMP_RGB8_GT 14 + " %s + int3((int3(255,255,255) - max(sign(abs(int3((%s.rgb&255)) - int3((%s.rgb&255)))), int3(0,0,0))) * (%s&255))"//#define TEVCMP_RGB8_EQ 15 }; //table with the alpha compare operations @@ -637,14 +637,14 @@ static const char *TEVCMPAlphaOPTable[16] = "0.0",//5 "0.0",//6 "0.0",//7 - " %s.a + (((%s.r&0xFF) > (%s.r&0xFF)) ? (%s.a&0xFF) : 0)",//#define TEVCMP_R8_GT 8 - " %s.a + (((%s.r&0xFF) == (%s.r&0xFF)) ? (%s.a&0xFF) : 0)",//#define TEVCMP_R8_EQ 9 - " %s.a + ((idot((%s.rgb&0xFF), comp16) > idot((%s.rgb&0xFF), comp16)) ? (%s.a&0xFF) : 0)",//#define TEVCMP_GR16_GT 10 - " %s.a + ((idot((%s.rgb&0xFF), comp16) == idot((%s.rgb&0xFF), comp16)) ? (%s.a&0xFF) : 0)",//#define TEVCMP_GR16_EQ 11 - " %s.a + ((idot((%s.rgb&0xFF), comp24) > idot((%s.rgb&0xFF), comp24)) ? (%s.a&0xFF) : 0)",//#define TEVCMP_BGR24_GT 12 - " %s.a + ((idot((%s.rgb&0xFF), comp24) == idot((%s.rgb&0xFF), comp24)) ? (%s.a&0xFF) : 0)",//#define TEVCMP_BGR24_EQ 13 - " %s.a + (((%s.a&0xFF) > (%s.a&0xFF)) ? (%s.a&0xFF) : 0)",//#define TEVCMP_A8_GT 14 - " %s.a + (((%s.a&0xFF) == (%s.a&0xFF)) ? (%s.a&0xFF) : 0)" //#define TEVCMP_A8_EQ 15 + " %s.a + (((%s.r&255) > (%s.r&255)) ? (%s.a&255) : 0)",//#define TEVCMP_R8_GT 8 + " %s.a + (((%s.r&255) == (%s.r&255)) ? (%s.a&255) : 0)",//#define TEVCMP_R8_EQ 9 + " %s.a + ((idot((%s.rgb&255), comp16) > idot((%s.rgb&255), comp16)) ? (%s.a&255) : 0)",//#define TEVCMP_GR16_GT 10 + " %s.a + ((idot((%s.rgb&255), comp16) == idot((%s.rgb&255), comp16)) ? (%s.a&255) : 0)",//#define TEVCMP_GR16_EQ 11 + " %s.a + ((idot((%s.rgb&255), comp24) > idot((%s.rgb&255), comp24)) ? (%s.a&255) : 0)",//#define TEVCMP_BGR24_GT 12 + " %s.a + ((idot((%s.rgb&255), comp24) == idot((%s.rgb&255), comp24)) ? (%s.a&255) : 0)",//#define TEVCMP_BGR24_EQ 13 + " %s.a + (((%s.a&255) > (%s.a&255)) ? (%s.a&255) : 0)",//#define TEVCMP_A8_GT 14 + " %s.a + (((%s.a&255) == (%s.a&255)) ? (%s.a&255) : 0)" //#define TEVCMP_A8_EQ 15 }; template @@ -670,7 +670,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP if (bpmem.tevind[n].bs != ITBA_OFF) { const char *tevIndAlphaSel[] = {"", "x", "y", "z"}; - const char *tevIndAlphaMask[] = {"0xF8", "0xE0", "0xF0", "0xF8"}; + const char *tevIndAlphaMask[] = {"248", "224", "240", "248"}; out.Write("alphabump = iindtex%d.%s & %s;\n", bpmem.tevind[n].bt, tevIndAlphaSel[bpmem.tevind[n].bs], @@ -682,7 +682,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP } // format - const char *tevIndFmtMask[] = {"0xFF", "0x1F", "0x0F", "0x07" }; + const char *tevIndFmtMask[] = {"255", "31", "15", "7" }; out.Write("int3 iindtevcrd%d = iindtex%d & %s;\n", n, bpmem.tevind[n].bt, tevIndFmtMask[bpmem.tevind[n].fmt]); // bias - TODO: Check if this needs to be this complicated.. @@ -865,7 +865,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP if (!(cc.d == TEVCOLORARG_ZERO && cc.op == TEVOP_ADD)) out.Write("%s %s ", tevCInputTable[cc.d], tevOpTable[cc.op]); - out.Write("((%s&0xFF) * (int3(255,255,255) - (%s&0xFF)) + (%s&0xFF) * (%s&0xFF)) / 255", tevCInputTable[cc.a], tevCInputTable[cc.c], tevCInputTable[cc.b], tevCInputTable[cc.c]); + out.Write("((%s&255) * (int3(255,255,255) - (%s&255)) + (%s&255) * (%s&255)) / 255", tevCInputTable[cc.a], tevCInputTable[cc.c], tevCInputTable[cc.b], tevCInputTable[cc.c]); out.Write("%s", tevBiasTable[cc.bias]); @@ -899,7 +899,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP if (!(ac.d == TEVALPHAARG_ZERO && ac.op == TEVOP_ADD)) out.Write("%s.a %s ", tevAInputTable[ac.d], tevOpTable[ac.op]); - out.Write("((%s.a&0xFF) * (255 - (%s.a&0xFF)) + (%s.a&0xFF) * (%s.a&0xFF)) / 255", tevAInputTable[ac.a], tevAInputTable[ac.c], tevAInputTable[ac.b], tevAInputTable[ac.c]); + out.Write("((%s.a&255) * (255 - (%s.a&255)) + (%s.a&255) * (%s.a&255)) / 255", tevAInputTable[ac.a], tevAInputTable[ac.c], tevAInputTable[ac.b], tevAInputTable[ac.c]); out.Write("%s",tevBiasTable[ac.bias]); From df94e623500cf7285f5ab9ad973e40d462500b9b Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Tue, 8 Oct 2013 02:14:52 +0200 Subject: [PATCH 18/45] PixelShaderGen: Process fog calculations with integer math. --- Source/Core/VideoCommon/PixelShaderGen.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index a1c06548ed..e44f658120 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -1074,7 +1074,8 @@ static inline void WriteFog(T& out, pixel_shader_uid_data& uid_data) WARN_LOG(VIDEO, "Unknown Fog Type! %08x", bpmem.fog.c_proj_fsel.fsel); } - out.Write("\tiprev.rgb = int3(round(lerp(float3(iprev.rgb), " I_FOG"[0].rgb*255.0, fog)));\n"); + out.Write("\tint ifog = int(round(fog * 256.0));\n"); + out.Write("\tiprev.rgb = (iprev.rgb * (256 - ifog) + int(" I_FOG"[0].rgb * 256.0 * ifog)) >> 8;\n"); } void GetPixelShaderUid(PixelShaderUid& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components) From ec60acac3a30bdcc4444a61b106bc7e60a597473 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Thu, 10 Oct 2013 20:26:41 +0200 Subject: [PATCH 19/45] PixelShaderGen: Change the "colors" and "kcolors" uniforms to be integers. --- Source/Core/VideoCommon/ConstantManager.h | 4 +- Source/Core/VideoCommon/PixelShaderGen.cpp | 78 +++++++++---------- .../Core/VideoCommon/PixelShaderManager.cpp | 12 +-- 3 files changed, 47 insertions(+), 47 deletions(-) diff --git a/Source/Core/VideoCommon/ConstantManager.h b/Source/Core/VideoCommon/ConstantManager.h index bca1c5c93e..1b1aa79dc3 100644 --- a/Source/Core/VideoCommon/ConstantManager.h +++ b/Source/Core/VideoCommon/ConstantManager.h @@ -11,8 +11,8 @@ typedef s32 int4[4]; struct PixelShaderConstants { - float4 colors[4]; - float4 kcolors[4]; + int4 colors[4]; + int4 kcolors[4]; float4 alpha; float4 texdims[8]; float4 zbias[2]; diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index e44f658120..e141168644 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -41,26 +41,26 @@ static const char *tevKSelTableC[] = // KCSEL "ERROR2", // 0x09 "ERROR3", // 0x0a "ERROR4", // 0x0b - "int3(round(" I_KCOLORS"[0].rgb * 255.0))", // K0 = 0x0C - "int3(round(" I_KCOLORS"[1].rgb * 255.0))", // K1 = 0x0D - "int3(round(" I_KCOLORS"[2].rgb * 255.0))", // K2 = 0x0E - "int3(round(" I_KCOLORS"[3].rgb * 255.0))", // K3 = 0x0F - "int3(round(" I_KCOLORS"[0].rrr * 255.0))", // K0_R = 0x10 - "int3(round(" I_KCOLORS"[1].rrr * 255.0))", // K1_R = 0x11 - "int3(round(" I_KCOLORS"[2].rrr * 255.0))", // K2_R = 0x12 - "int3(round(" I_KCOLORS"[3].rrr * 255.0))", // K3_R = 0x13 - "int3(round(" I_KCOLORS"[0].ggg * 255.0))", // K0_G = 0x14 - "int3(round(" I_KCOLORS"[1].ggg * 255.0))", // K1_G = 0x15 - "int3(round(" I_KCOLORS"[2].ggg * 255.0))", // K2_G = 0x16 - "int3(round(" I_KCOLORS"[3].ggg * 255.0))", // K3_G = 0x17 - "int3(round(" I_KCOLORS"[0].bbb * 255.0))", // K0_B = 0x18 - "int3(round(" I_KCOLORS"[1].bbb * 255.0))", // K1_B = 0x19 - "int3(round(" I_KCOLORS"[2].bbb * 255.0))", // K2_B = 0x1A - "int3(round(" I_KCOLORS"[3].bbb * 255.0))", // K3_B = 0x1B - "int3(round(" I_KCOLORS"[0].aaa * 255.0))", // K0_A = 0x1C - "int3(round(" I_KCOLORS"[1].aaa * 255.0))", // K1_A = 0x1D - "int3(round(" I_KCOLORS"[2].aaa * 255.0))", // K2_A = 0x1E - "int3(round(" I_KCOLORS"[3].aaa * 255.0))", // K3_A = 0x1F + I_KCOLORS"[0].rgb", // K0 = 0x0C + I_KCOLORS"[1].rgb", // K1 = 0x0D + I_KCOLORS"[2].rgb", // K2 = 0x0E + I_KCOLORS"[3].rgb", // K3 = 0x0F + I_KCOLORS"[0].rrr", // K0_R = 0x10 + I_KCOLORS"[1].rrr", // K1_R = 0x11 + I_KCOLORS"[2].rrr", // K2_R = 0x12 + I_KCOLORS"[3].rrr", // K3_R = 0x13 + I_KCOLORS"[0].ggg", // K0_G = 0x14 + I_KCOLORS"[1].ggg", // K1_G = 0x15 + I_KCOLORS"[2].ggg", // K2_G = 0x16 + I_KCOLORS"[3].ggg", // K3_G = 0x17 + I_KCOLORS"[0].bbb", // K0_B = 0x18 + I_KCOLORS"[1].bbb", // K1_B = 0x19 + I_KCOLORS"[2].bbb", // K2_B = 0x1A + I_KCOLORS"[3].bbb", // K3_B = 0x1B + I_KCOLORS"[0].aaa", // K0_A = 0x1C + I_KCOLORS"[1].aaa", // K1_A = 0x1D + I_KCOLORS"[2].aaa", // K2_A = 0x1E + I_KCOLORS"[3].aaa", // K3_A = 0x1F }; static const char *tevKSelTableA[] = // KASEL @@ -81,22 +81,22 @@ static const char *tevKSelTableA[] = // KASEL "ERROR10", // 0x0d "ERROR11", // 0x0e "ERROR12", // 0x0f - "int(round(" I_KCOLORS"[0].r * 255.0))", // K0_R = 0x10 - "int(round(" I_KCOLORS"[1].r * 255.0))", // K1_R = 0x11 - "int(round(" I_KCOLORS"[2].r * 255.0))", // K2_R = 0x12 - "int(round(" I_KCOLORS"[3].r * 255.0))", // K3_R = 0x13 - "int(round(" I_KCOLORS"[0].g * 255.0))", // K0_G = 0x14 - "int(round(" I_KCOLORS"[1].g * 255.0))", // K1_G = 0x15 - "int(round(" I_KCOLORS"[2].g * 255.0))", // K2_G = 0x16 - "int(round(" I_KCOLORS"[3].g * 255.0))", // K3_G = 0x17 - "int(round(" I_KCOLORS"[0].b * 255.0))", // K0_B = 0x18 - "int(round(" I_KCOLORS"[1].b * 255.0))", // K1_B = 0x19 - "int(round(" I_KCOLORS"[2].b * 255.0))", // K2_B = 0x1A - "int(round(" I_KCOLORS"[3].b * 255.0))", // K3_B = 0x1B - "int(round(" I_KCOLORS"[0].a * 255.0))", // K0_A = 0x1C - "int(round(" I_KCOLORS"[1].a * 255.0))", // K1_A = 0x1D - "int(round(" I_KCOLORS"[2].a * 255.0))", // K2_A = 0x1E - "int(round(" I_KCOLORS"[3].a * 255.0))", // K3_A = 0x1F + I_KCOLORS"[0].r", // K0_R = 0x10 + I_KCOLORS"[1].r", // K1_R = 0x11 + I_KCOLORS"[2].r", // K2_R = 0x12 + I_KCOLORS"[3].r", // K3_R = 0x13 + I_KCOLORS"[0].g", // K0_G = 0x14 + I_KCOLORS"[1].g", // K1_G = 0x15 + I_KCOLORS"[2].g", // K2_G = 0x16 + I_KCOLORS"[3].g", // K3_G = 0x17 + I_KCOLORS"[0].b", // K0_B = 0x18 + I_KCOLORS"[1].b", // K1_B = 0x19 + I_KCOLORS"[2].b", // K2_B = 0x1A + I_KCOLORS"[3].b", // K3_B = 0x1B + I_KCOLORS"[0].a", // K0_A = 0x1C + I_KCOLORS"[1].a", // K1_A = 0x1D + I_KCOLORS"[2].a", // K2_A = 0x1E + I_KCOLORS"[3].a", // K3_A = 0x1F }; static const char *tevScaleTable[] = // CS @@ -283,8 +283,8 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T if (ApiType == API_OPENGL) out.Write("layout(std140%s) uniform PSBlock {\n", g_ActiveConfig.backend_info.bSupportShadingLanguage420pack ? ", binding = 1" : ""); - DeclareUniform(out, ApiType, C_COLORS, "float4", I_COLORS"[4]"); - DeclareUniform(out, ApiType, C_KCOLORS, "float4", I_KCOLORS"[4]"); + DeclareUniform(out, ApiType, C_COLORS, "int4", I_COLORS"[4]"); + DeclareUniform(out, ApiType, C_KCOLORS, "int4", I_KCOLORS"[4]"); DeclareUniform(out, ApiType, C_ALPHA, "float4", I_ALPHA"[1]"); // TODO: Why is this an array...-.- DeclareUniform(out, ApiType, C_TEXDIMS, "float4", I_TEXDIMS"[8]"); DeclareUniform(out, ApiType, C_ZBIAS, "float4", I_ZBIAS"[2]"); @@ -373,7 +373,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T out.Write(" ) {\n"); } - out.Write(" int4 ic0 = int4(round(" I_COLORS"[1] * 255.0)), ic1 = int4(round(" I_COLORS"[2] * 255.0)), ic2 = int4(round(" I_COLORS"[3] * 255.0)), iprev = int4(round(" I_COLORS"[0] * 255.0));\n" + out.Write(" int4 ic0 = " I_COLORS"[1], ic1 = " I_COLORS"[2], ic2 = " I_COLORS"[3], iprev = " I_COLORS"[0];\n" " int4 irastemp = int4(0, 0, 0, 0), itextemp = int4(0, 0, 0, 0), ikonsttemp = int4(0, 0, 0, 0);\n" " int3 comp16 = int3(1, 256, 0), comp24 = int3(1, 256, 256*256);\n" " int alphabump=0;\n" diff --git a/Source/Core/VideoCommon/PixelShaderManager.cpp b/Source/Core/VideoCommon/PixelShaderManager.cpp index 9173396fac..5d5850da5b 100644 --- a/Source/Core/VideoCommon/PixelShaderManager.cpp +++ b/Source/Core/VideoCommon/PixelShaderManager.cpp @@ -152,14 +152,14 @@ void PixelShaderManager::SetConstants() // TODO: Conversion should be checked in the context of tev_fixes.. void PixelShaderManager::SetColorChanged(int type, int num) { - float4* c = type ? constants.kcolors : constants.colors; - c[num][0] = bpmem.tevregs[num].low.a / 255.0f; - c[num][3] = bpmem.tevregs[num].low.b / 255.0f; - c[num][2] = bpmem.tevregs[num].high.a / 255.0f; - c[num][1] = bpmem.tevregs[num].high.b / 255.0f; + int4* c = type ? constants.kcolors : constants.colors; + c[num][0] = bpmem.tevregs[num].low.a; + c[num][3] = bpmem.tevregs[num].low.b; + c[num][2] = bpmem.tevregs[num].high.a; + c[num][1] = bpmem.tevregs[num].high.b; dirty = true; - PRIM_LOG("pixel %scolor%d: %f %f %f %f\n", type?"k":"", num, c[num][0], c[num][1], c[num][2], c[num][3]); + PRIM_LOG("pixel %scolor%d: %d %d %d %d\n", type?"k":"", num, c[num][0], c[num][1], c[num][2], c[num][3]); } void PixelShaderManager::SetAlpha() From c13a5c38e9ab997f21b84c2ce64772b66cc3cfc9 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Thu, 10 Oct 2013 20:36:55 +0200 Subject: [PATCH 20/45] PixelShaderGen: Change the "alpha" uniform to use integers. --- Source/Core/VideoCommon/ConstantManager.h | 2 +- Source/Core/VideoCommon/PixelShaderGen.cpp | 10 +++++----- Source/Core/VideoCommon/PixelShaderManager.cpp | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Source/Core/VideoCommon/ConstantManager.h b/Source/Core/VideoCommon/ConstantManager.h index 1b1aa79dc3..d4c17860f5 100644 --- a/Source/Core/VideoCommon/ConstantManager.h +++ b/Source/Core/VideoCommon/ConstantManager.h @@ -13,7 +13,7 @@ struct PixelShaderConstants { int4 colors[4]; int4 kcolors[4]; - float4 alpha; + int4 alpha; float4 texdims[8]; float4 zbias[2]; float4 indtexscale[2]; diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index e141168644..b59a1f99f9 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -285,7 +285,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T DeclareUniform(out, ApiType, C_COLORS, "int4", I_COLORS"[4]"); DeclareUniform(out, ApiType, C_KCOLORS, "int4", I_KCOLORS"[4]"); - DeclareUniform(out, ApiType, C_ALPHA, "float4", I_ALPHA"[1]"); // TODO: Why is this an array...-.- + DeclareUniform(out, ApiType, C_ALPHA, "int4", I_ALPHA"[1]"); // TODO: Why is this an array...-.- DeclareUniform(out, ApiType, C_TEXDIMS, "float4", I_TEXDIMS"[8]"); DeclareUniform(out, ApiType, C_ZBIAS, "float4", I_ZBIAS"[2]"); DeclareUniform(out, ApiType, C_INDTEXSCALE, "float4", I_INDTEXSCALE"[2]"); @@ -570,7 +570,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T if (dstAlphaMode == DSTALPHA_ALPHA_PASS) { out.SetConstantsUsed(C_ALPHA, C_ALPHA); - out.Write("\tocol0 = float4(float3(iprev.rgb) / 255.0, " I_ALPHA"[0].a);\n"); + out.Write("\tocol0 = float4(float3(iprev.rgb), float(" I_ALPHA".a)) / 255.0;\n"); } else { @@ -586,7 +586,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T // Colors will be blended against the alpha from ocol1 and // the alpha from ocol0 will be written to the framebuffer. out.Write("\tocol1 = float4(iprev) / 255.0;\n"); - out.Write("\tocol0.a = " I_ALPHA"[0].a;\n"); + out.Write("\tocol0.a = float(" I_ALPHA".a) / 255.0;\n"); } out.Write("}\n"); @@ -962,8 +962,8 @@ static inline void WriteAlphaTest(T& out, pixel_shader_uid_data& uid_data, API_T { static const char *alphaRef[2] = { - "int(round(" I_ALPHA"[0].r * 255.0))", - "int(round(" I_ALPHA"[0].g * 255.0))" + I_ALPHA".r", + I_ALPHA".g" }; out.SetConstantsUsed(C_ALPHA, C_ALPHA); diff --git a/Source/Core/VideoCommon/PixelShaderManager.cpp b/Source/Core/VideoCommon/PixelShaderManager.cpp index 5d5850da5b..29e13dd717 100644 --- a/Source/Core/VideoCommon/PixelShaderManager.cpp +++ b/Source/Core/VideoCommon/PixelShaderManager.cpp @@ -164,14 +164,14 @@ void PixelShaderManager::SetColorChanged(int type, int num) void PixelShaderManager::SetAlpha() { - constants.alpha[0] = bpmem.alpha_test.ref0 / 255.0f; - constants.alpha[1] = bpmem.alpha_test.ref1 / 255.0f; + constants.alpha[0] = bpmem.alpha_test.ref0; + constants.alpha[1] = bpmem.alpha_test.ref1; dirty = true; } void PixelShaderManager::SetDestAlpha() { - constants.alpha[3] = bpmem.dstalpha.alpha / 255.0f; + constants.alpha[3] = bpmem.dstalpha.alpha; dirty = true; } From 0238a568162099f0ee1db3d584e42db734f4e509 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Thu, 10 Oct 2013 21:09:00 +0200 Subject: [PATCH 21/45] PixelShaderGen: Change indirect texture matrix uniforms to use integers. --- Source/Core/VideoBackends/Software/Tev.cpp | 20 +++++++------ Source/Core/VideoCommon/ConstantManager.h | 2 +- Source/Core/VideoCommon/PixelShaderGen.cpp | 18 ++++++++---- .../Core/VideoCommon/PixelShaderManager.cpp | 29 +++++++++---------- 4 files changed, 39 insertions(+), 30 deletions(-) diff --git a/Source/Core/VideoBackends/Software/Tev.cpp b/Source/Core/VideoBackends/Software/Tev.cpp index 0c80e04c93..49899f642c 100644 --- a/Source/Core/VideoBackends/Software/Tev.cpp +++ b/Source/Core/VideoBackends/Software/Tev.cpp @@ -544,19 +544,21 @@ void Tev::Indirect(unsigned int stageNum, s32 s, s32 t) switch (indirect.mid & 12) { case 0: - shift = 3 + (17 - scale); - indtevtrans[0] = indmtx.col0.ma * indcoord[0] + indmtx.col1.mc * indcoord[1] + indmtx.col2.me * indcoord[2]; - indtevtrans[1] = indmtx.col0.mb * indcoord[0] + indmtx.col1.md * indcoord[1] + indmtx.col2.mf * indcoord[2]; + // matrix values are S0.10, output format is S17.7, so divide by 8 + shift = (17 - scale); + indtevtrans[0] = (indmtx.col0.ma * indcoord[0] + indmtx.col1.mc * indcoord[1] + indmtx.col2.me * indcoord[2]) >> 3; + indtevtrans[1] = (indmtx.col0.mb * indcoord[0] + indmtx.col1.md * indcoord[1] + indmtx.col2.mf * indcoord[2]) >> 3; break; case 4: // s matrix - shift = 8 + (17 - scale); - indtevtrans[0] = s * indcoord[0]; - indtevtrans[1] = t * indcoord[0]; + // s is S17.7, matrix elements are divided by 256, output is S17.7, so divide by 256. - TODO: Maybe, since s is actually stored as S24, we should divide by 256*64? + shift = (17 - scale); + indtevtrans[0] = s * indcoord[0] / 256; + indtevtrans[1] = t * indcoord[0] / 256; break; case 8: // t matrix - shift = 8 + (17 - scale); - indtevtrans[0] = s * indcoord[1]; - indtevtrans[1] = t * indcoord[1]; + shift = (17 - scale); + indtevtrans[0] = s * indcoord[1] / 256; + indtevtrans[1] = t * indcoord[1] / 256; break; default: return; diff --git a/Source/Core/VideoCommon/ConstantManager.h b/Source/Core/VideoCommon/ConstantManager.h index d4c17860f5..0fb65bf917 100644 --- a/Source/Core/VideoCommon/ConstantManager.h +++ b/Source/Core/VideoCommon/ConstantManager.h @@ -17,7 +17,7 @@ struct PixelShaderConstants float4 texdims[8]; float4 zbias[2]; float4 indtexscale[2]; - float4 indtexmtx[6]; + int4 indtexmtx[6]; float4 fog[3]; // For pixel lighting diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index b59a1f99f9..fc899c5cf6 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -289,7 +289,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T DeclareUniform(out, ApiType, C_TEXDIMS, "float4", I_TEXDIMS"[8]"); DeclareUniform(out, ApiType, C_ZBIAS, "float4", I_ZBIAS"[2]"); DeclareUniform(out, ApiType, C_INDTEXSCALE, "float4", I_INDTEXSCALE"[2]"); - DeclareUniform(out, ApiType, C_INDTEXMTX, "float4", I_INDTEXMTX"[6]"); + DeclareUniform(out, ApiType, C_INDTEXMTX, "int4", I_INDTEXMTX"[6]"); DeclareUniform(out, ApiType, C_FOG, "float4", I_FOG"[3]"); // For pixel lighting - TODO: Should only be defined when per pixel lighting is enabled! @@ -704,22 +704,30 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP int mtxidx = 2*(bpmem.tevind[n].mid-1); out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx); - out.Write("int2 indtevtrans%d = int2(round(dot(" I_INDTEXMTX"[%d].xyz, float3(iindtevcrd%d)), dot(" I_INDTEXMTX"[%d].xyz, float3(iindtevcrd%d))));\n", - n, mtxidx, n, mtxidx+1, n); + out.Write("int2 indtevtrans%d = int2(idot(" I_INDTEXMTX"[%d].xyz, iindtevcrd%d), idot(" I_INDTEXMTX"[%d].xyz, iindtevcrd%d)) >> 3;\n", n, mtxidx, n, mtxidx+1, n); + + out.Write("if (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = indtevtrans%d >> " I_INDTEXMTX"[%d].w;\n", mtxidx, n, n, mtxidx); + out.Write("else indtevtrans%d = indtevtrans%d << (-" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx); } else if (bpmem.tevind[n].mid <= 7 && bHasTexCoord) { // s matrix _assert_(bpmem.tevind[n].mid >= 5); int mtxidx = 2*(bpmem.tevind[n].mid-5); out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx); - out.Write("int2 indtevtrans%d = int2(round(" I_INDTEXMTX"[%d].ww * uv%d.xy * float3(iindtevcrd%d.xx)));\n", n, mtxidx, texcoord, n); + out.Write("int2 indtevtrans%d = int2(int2(round(uv%d.xy*255.0)) * iindtevcrd%d.xx) >> 8;\n", n, texcoord, n); + + out.Write("if (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = indtevtrans%d >> " I_INDTEXMTX"[%d].w;\n", mtxidx, n, n, mtxidx); + out.Write("else indtevtrans%d = indtevtrans%d << (-" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx); } else if (bpmem.tevind[n].mid <= 11 && bHasTexCoord) { // t matrix _assert_(bpmem.tevind[n].mid >= 9); int mtxidx = 2*(bpmem.tevind[n].mid-9); out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx); - out.Write("int2 indtevtrans%d = int2(round(" I_INDTEXMTX"[%d].ww * uv%d.xy * float3(iindtevcrd%d.yy)));\n", n, mtxidx, texcoord, n); + out.Write("int2 indtevtrans%d = int2(int2(round(uv%d.xy*255.0)) * iindtevcrd%d.yy) >> 8;\n", n, texcoord, n); + + out.Write("if (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = indtevtrans%d >> " I_INDTEXMTX"[%d].w;\n", mtxidx, n, n, mtxidx); + out.Write("else indtevtrans%d = indtevtrans%d << (-" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx); } else { diff --git a/Source/Core/VideoCommon/PixelShaderManager.cpp b/Source/Core/VideoCommon/PixelShaderManager.cpp index 29e13dd717..30d6013efc 100644 --- a/Source/Core/VideoCommon/PixelShaderManager.cpp +++ b/Source/Core/VideoCommon/PixelShaderManager.cpp @@ -210,26 +210,25 @@ void PixelShaderManager::SetIndTexScaleChanged(bool high) void PixelShaderManager::SetIndMatrixChanged(int matrixidx) { int scale = ((u32)bpmem.indmtx[matrixidx].col0.s0 << 0) | - ((u32)bpmem.indmtx[matrixidx].col1.s1 << 2) | - ((u32)bpmem.indmtx[matrixidx].col2.s2 << 4); - float fscale = powf(2.0f, (float)(scale - 17)) / 8.0f; + ((u32)bpmem.indmtx[matrixidx].col1.s1 << 2) | + ((u32)bpmem.indmtx[matrixidx].col2.s2 << 4); // xyz - static matrix // w - dynamic matrix scale / 128 - constants.indtexmtx[2*matrixidx][0] = bpmem.indmtx[matrixidx].col0.ma * fscale; - constants.indtexmtx[2*matrixidx][1] = bpmem.indmtx[matrixidx].col1.mc * fscale; - constants.indtexmtx[2*matrixidx][2] = bpmem.indmtx[matrixidx].col2.me * fscale; - constants.indtexmtx[2*matrixidx][3] = fscale / 128.0f; - constants.indtexmtx[2*matrixidx+1][0] = bpmem.indmtx[matrixidx].col0.mb * fscale; - constants.indtexmtx[2*matrixidx+1][1] = bpmem.indmtx[matrixidx].col1.md * fscale; - constants.indtexmtx[2*matrixidx+1][2] = bpmem.indmtx[matrixidx].col2.mf * fscale; - constants.indtexmtx[2*matrixidx+1][3] = fscale / 128.0f; + constants.indtexmtx[2*matrixidx ][0] = bpmem.indmtx[matrixidx].col0.ma; + constants.indtexmtx[2*matrixidx ][1] = bpmem.indmtx[matrixidx].col1.mc; + constants.indtexmtx[2*matrixidx ][2] = bpmem.indmtx[matrixidx].col2.me; + constants.indtexmtx[2*matrixidx ][3] = 17 - scale; + constants.indtexmtx[2*matrixidx+1][0] = bpmem.indmtx[matrixidx].col0.mb; + constants.indtexmtx[2*matrixidx+1][1] = bpmem.indmtx[matrixidx].col1.md; + constants.indtexmtx[2*matrixidx+1][2] = bpmem.indmtx[matrixidx].col2.mf; + constants.indtexmtx[2*matrixidx+1][3] = 17 - scale; dirty = true; - PRIM_LOG("indmtx%d: scale=%f, mat=(%f %f %f; %f %f %f)\n", - matrixidx, fscale, - bpmem.indmtx[matrixidx].col0.ma * fscale, bpmem.indmtx[matrixidx].col1.mc * fscale, bpmem.indmtx[matrixidx].col2.me * fscale, - bpmem.indmtx[matrixidx].col0.mb * fscale, bpmem.indmtx[matrixidx].col1.md * fscale, bpmem.indmtx[matrixidx].col2.mf * fscale); + PRIM_LOG("indmtx%d: scale=%d, mat=(%d %d %d; %d %d %d)\n", + matrixidx, scale, + bpmem.indmtx[matrixidx].col0.ma, bpmem.indmtx[matrixidx].col1.mc, bpmem.indmtx[matrixidx].col2.me, + bpmem.indmtx[matrixidx].col0.mb, bpmem.indmtx[matrixidx].col1.md, bpmem.indmtx[matrixidx].col2.mf); } From 68e91f0d557206e241f7bc4fe6b66f5650788260 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sun, 27 Oct 2013 13:10:00 +0100 Subject: [PATCH 22/45] PixelShader: Store fog color as an integer. --- Source/Core/VideoCommon/ConstantManager.h | 3 +- Source/Core/VideoCommon/PixelShaderGen.cpp | 20 ++++++----- Source/Core/VideoCommon/PixelShaderGen.h | 4 ++- .../Core/VideoCommon/PixelShaderManager.cpp | 35 ++++++++++--------- 4 files changed, 34 insertions(+), 28 deletions(-) diff --git a/Source/Core/VideoCommon/ConstantManager.h b/Source/Core/VideoCommon/ConstantManager.h index 0fb65bf917..3be4793d86 100644 --- a/Source/Core/VideoCommon/ConstantManager.h +++ b/Source/Core/VideoCommon/ConstantManager.h @@ -18,7 +18,8 @@ struct PixelShaderConstants float4 zbias[2]; float4 indtexscale[2]; int4 indtexmtx[6]; - float4 fog[3]; + int4 fogcolor; + float4 fog[2]; // For pixel lighting float4 plights[40]; diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index fc899c5cf6..50f3a8e9c6 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -290,7 +290,8 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T DeclareUniform(out, ApiType, C_ZBIAS, "float4", I_ZBIAS"[2]"); DeclareUniform(out, ApiType, C_INDTEXSCALE, "float4", I_INDTEXSCALE"[2]"); DeclareUniform(out, ApiType, C_INDTEXMTX, "int4", I_INDTEXMTX"[6]"); - DeclareUniform(out, ApiType, C_FOG, "float4", I_FOG"[3]"); + DeclareUniform(out, ApiType, C_FOGCOLOR, "int4", I_FOGCOLOR); + DeclareUniform(out, ApiType, C_FOG, "float4", I_FOG"[2]"); // For pixel lighting - TODO: Should only be defined when per pixel lighting is enabled! DeclareUniform(out, ApiType, C_PLIGHTS, "float4", I_PLIGHTS"[40]"); @@ -1044,18 +1045,19 @@ static inline void WriteFog(T& out, pixel_shader_uid_data& uid_data) uid_data.fog_proj = bpmem.fog.c_proj_fsel.proj; - out.SetConstantsUsed(C_FOG, C_FOG+1); + out.SetConstantsUsed(C_FOGCOLOR, C_FOGCOLOR); + out.SetConstantsUsed(C_FOG, C_FOG); if (bpmem.fog.c_proj_fsel.proj == 0) { // perspective // ze = A/(B - (Zs >> B_SHF) - out.Write("\tfloat ze = " I_FOG"[1].x / (" I_FOG"[1].y - (zCoord / " I_FOG"[1].w));\n"); + out.Write("\tfloat ze = " I_FOG"[0].x / (" I_FOG"[0].y - (zCoord / " I_FOG"[0].w));\n"); } else { // orthographic // ze = a*Zs (here, no B_SHF) - out.Write("\tfloat ze = " I_FOG"[1].x * zCoord;\n"); + out.Write("\tfloat ze = " I_FOG"[0].x * zCoord;\n"); } // x_adjust = sqrt((x-center)^2 + k^2)/k @@ -1064,13 +1066,13 @@ static inline void WriteFog(T& out, pixel_shader_uid_data& uid_data) uid_data.fog_RangeBaseEnabled = bpmem.fogRange.Base.Enabled; if (bpmem.fogRange.Base.Enabled) { - out.SetConstantsUsed(C_FOG+2, C_FOG+2); - out.Write("\tfloat x_adjust = (2.0 * (clipPos.x / " I_FOG"[2].y)) - 1.0 - " I_FOG"[2].x;\n"); - out.Write("\tx_adjust = sqrt(x_adjust * x_adjust + " I_FOG"[2].z * " I_FOG"[2].z) / " I_FOG"[2].z;\n"); + out.SetConstantsUsed(C_FOG+1, C_FOG+1); + out.Write("\tfloat x_adjust = (2.0 * (clipPos.x / " I_FOG"[1].y)) - 1.0 - " I_FOG"[1].x;\n"); + out.Write("\tx_adjust = sqrt(x_adjust * x_adjust + " I_FOG"[1].z * " I_FOG"[1].z) / " I_FOG"[1].z;\n"); out.Write("\tze *= x_adjust;\n"); } - out.Write("\tfloat fog = clamp(ze - " I_FOG"[1].z, 0.0, 1.0);\n"); + out.Write("\tfloat fog = clamp(ze - " I_FOG"[0].z, 0.0, 1.0);\n"); if (bpmem.fog.c_proj_fsel.fsel > 3) { @@ -1083,7 +1085,7 @@ static inline void WriteFog(T& out, pixel_shader_uid_data& uid_data) } out.Write("\tint ifog = int(round(fog * 256.0));\n"); - out.Write("\tiprev.rgb = (iprev.rgb * (256 - ifog) + int(" I_FOG"[0].rgb * 256.0 * ifog)) >> 8;\n"); + out.Write("\tiprev.rgb = (iprev.rgb * (256 - ifog) + " I_FOGCOLOR".rgb * ifog) >> 8;\n"); } void GetPixelShaderUid(PixelShaderUid& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components) diff --git a/Source/Core/VideoCommon/PixelShaderGen.h b/Source/Core/VideoCommon/PixelShaderGen.h index 6ed6960420..ad9bbecddb 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.h +++ b/Source/Core/VideoCommon/PixelShaderGen.h @@ -16,6 +16,7 @@ #define I_ZBIAS "czbias" #define I_INDTEXSCALE "cindscale" #define I_INDTEXMTX "cindmtx" +#define I_FOGCOLOR "cfogcolor" #define I_FOG "cfog" #define I_PLIGHTS "cPLights" #define I_PMATERIALS "cPmtrl" @@ -29,7 +30,8 @@ #define C_ZBIAS (C_TEXDIMS + 8) //17 #define C_INDTEXSCALE (C_ZBIAS + 2) //19 #define C_INDTEXMTX (C_INDTEXSCALE + 2) //21 -#define C_FOG (C_INDTEXMTX + 6) //27 +#define C_FOGCOLOR (C_INDTEXMTX + 6) //27 +#define C_FOG (C_FOGCOLOR + 1) //28 #define C_PLIGHTS (C_FOG + 3) #define C_PMATERIALS (C_PLIGHTS + 40) diff --git a/Source/Core/VideoCommon/PixelShaderManager.cpp b/Source/Core/VideoCommon/PixelShaderManager.cpp index 30d6013efc..b0b1ce7037 100644 --- a/Source/Core/VideoCommon/PixelShaderManager.cpp +++ b/Source/Core/VideoCommon/PixelShaderManager.cpp @@ -84,15 +84,16 @@ void PixelShaderManager::SetConstants() // they always seems to be larger than 256 so my theory is : // they are the coefficients from the center to the border of the screen // so to simplify I use the hi coefficient as K in the shader taking 256 as the scale - constants.fog[2][0] = ScreenSpaceCenter; - constants.fog[2][1] = (float)Renderer::EFBToScaledX((int)(2.0f * xfregs.viewport.wd)); - constants.fog[2][2] = bpmem.fogRange.K[4].HI / 256.0f; + // TODO: Shouldn't this be EFBToScaledXf? + constants.fog[1][0] = ScreenSpaceCenter; + constants.fog[1][1] = (float)Renderer::EFBToScaledX((int)(2.0f * xfregs.viewport.wd)); + constants.fog[1][2] = bpmem.fogRange.K[4].HI / 256.0f; } else { - constants.fog[2][0] = 0; - constants.fog[2][1] = 1; - constants.fog[2][2] = 1; + constants.fog[1][0] = 0; + constants.fog[1][1] = 1; + constants.fog[1][2] = 1; } dirty = true; @@ -270,9 +271,9 @@ void PixelShaderManager::SetTexCoordChanged(u8 texmapid) void PixelShaderManager::SetFogColorChanged() { - constants.fog[0][0] = bpmem.fog.color.r / 255.0f; - constants.fog[0][1] = bpmem.fog.color.g / 255.0f; - constants.fog[0][2] = bpmem.fog.color.b / 255.0f; + constants.fogcolor[0] = bpmem.fog.color.r; + constants.fogcolor[1] = bpmem.fog.color.g; + constants.fogcolor[2] = bpmem.fog.color.b; dirty = true; } @@ -280,17 +281,17 @@ void PixelShaderManager::SetFogParamChanged() { if (!g_ActiveConfig.bDisableFog) { - constants.fog[1][0] = bpmem.fog.a.GetA(); - constants.fog[1][1] = (float)bpmem.fog.b_magnitude / 0xFFFFFF; - constants.fog[1][2] = bpmem.fog.c_proj_fsel.GetC(); - constants.fog[1][3] = (float)(1 << bpmem.fog.b_shift); + constants.fog[0][0] = bpmem.fog.a.GetA(); + constants.fog[0][1] = (float)bpmem.fog.b_magnitude / 0xFFFFFF; + constants.fog[0][2] = bpmem.fog.c_proj_fsel.GetC(); + constants.fog[0][3] = (float)(1 << bpmem.fog.b_shift); } else { - constants.fog[1][0] = 0; - constants.fog[1][1] = 1; - constants.fog[1][2] = 0; - constants.fog[1][3] = 1; + constants.fog[0][0] = 0; + constants.fog[0][1] = 1; + constants.fog[0][2] = 0; + constants.fog[0][3] = 1; } dirty = true; } From 4bf57565e8ce462a5272c45490abaf3ddac21a00 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sun, 27 Oct 2013 13:57:40 +0100 Subject: [PATCH 23/45] ShaderGen: Store light color uniforms as integers. --- Source/Core/VideoCommon/ConstantManager.h | 6 ++-- Source/Core/VideoCommon/LightingShaderGen.h | 32 +++++++++---------- Source/Core/VideoCommon/PixelShaderGen.cpp | 8 +++-- Source/Core/VideoCommon/PixelShaderGen.h | 28 ++++++++-------- .../Core/VideoCommon/PixelShaderManager.cpp | 17 +++++----- Source/Core/VideoCommon/VertexShaderGen.cpp | 5 +-- Source/Core/VideoCommon/VertexShaderGen.h | 6 ++-- .../Core/VideoCommon/VertexShaderManager.cpp | 17 +++++----- 8 files changed, 65 insertions(+), 54 deletions(-) diff --git a/Source/Core/VideoCommon/ConstantManager.h b/Source/Core/VideoCommon/ConstantManager.h index 3be4793d86..de1f346ed3 100644 --- a/Source/Core/VideoCommon/ConstantManager.h +++ b/Source/Core/VideoCommon/ConstantManager.h @@ -22,7 +22,8 @@ struct PixelShaderConstants float4 fog[2]; // For pixel lighting - float4 plights[40]; + int4 plight_colors[8]; + float4 plights[32]; float4 pmaterials[4]; }; @@ -31,7 +32,8 @@ struct VertexShaderConstants float4 posnormalmatrix[6]; float4 projection[4]; float4 materials[4]; - float4 lights[40]; + int4 light_colors[8]; // 8 lights + float4 lights[32]; // 8 lights * 4 parameters float4 texmatrices[24]; float4 transformmatrices[64]; float4 normalmatrices[32]; diff --git a/Source/Core/VideoCommon/LightingShaderGen.h b/Source/Core/VideoCommon/LightingShaderGen.h index 7afcff2ef7..485d020b2f 100644 --- a/Source/Core/VideoCommon/LightingShaderGen.h +++ b/Source/Core/VideoCommon/LightingShaderGen.h @@ -9,19 +9,19 @@ #include "VideoCommon/XFMemory.h" -#define LIGHT_COL "%s[5*%d].%s" -#define LIGHT_COL_PARAMS(lightsName, index, swizzle) (lightsName), (index), (swizzle) +#define LIGHT_COL "(float4(%s[%d]).%s / 255.0)" +#define LIGHT_COL_PARAMS(lightsColName, index, swizzle) (lightsColName), (index), (swizzle) -#define LIGHT_COSATT "%s[5*%d+1]" +#define LIGHT_COSATT "%s[4*%d]" #define LIGHT_COSATT_PARAMS(lightsName, index) (lightsName), (index) -#define LIGHT_DISTATT "%s[5*%d+2]" +#define LIGHT_DISTATT "%s[4*%d+1]" #define LIGHT_DISTATT_PARAMS(lightsName, index) (lightsName), (index) -#define LIGHT_POS "%s[5*%d+3]" +#define LIGHT_POS "%s[4*%d+2]" #define LIGHT_POS_PARAMS(lightsName, index) (lightsName), (index) -#define LIGHT_DIR "%s[5*%d+4]" +#define LIGHT_DIR "%s[4*%d+3]" #define LIGHT_DIR_PARAMS(lightsName, index) (lightsName), (index) /** @@ -39,7 +39,7 @@ struct LightingUidData template -static void GenerateLightShader(T& object, LightingUidData& uid_data, int index, int litchan_index, const char* lightsName, int coloralpha) +static void GenerateLightShader(T& object, LightingUidData& uid_data, int index, int litchan_index, const char* lightsColName, const char* lightsName, int coloralpha) { const LitChannel& chan = (litchan_index > 1) ? xfregs.alpha[litchan_index-2] : xfregs.color[litchan_index]; const char* swizzle = "xyzw"; @@ -56,13 +56,13 @@ static void GenerateLightShader(T& object, LightingUidData& uid_data, int index, switch (chan.diffusefunc) { case LIGHTDIF_NONE: - object.Write("lacc.%s += " LIGHT_COL";\n", swizzle, LIGHT_COL_PARAMS(lightsName, index, swizzle)); + object.Write("lacc.%s += " LIGHT_COL";\n", swizzle, LIGHT_COL_PARAMS(lightsColName, index, swizzle)); break; case LIGHTDIF_SIGN: case LIGHTDIF_CLAMP: object.Write("ldir = normalize(" LIGHT_POS".xyz - pos.xyz);\n", LIGHT_POS_PARAMS(lightsName, index)); object.Write("lacc.%s += %sdot(ldir, _norm0)) * " LIGHT_COL";\n", - swizzle, chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0," :"(", LIGHT_COL_PARAMS(lightsName, index, swizzle)); + swizzle, chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0," :"(", LIGHT_COL_PARAMS(lightsColName, index, swizzle)); break; default: _assert_(0); } @@ -94,14 +94,14 @@ static void GenerateLightShader(T& object, LightingUidData& uid_data, int index, switch (chan.diffusefunc) { case LIGHTDIF_NONE: - object.Write("lacc.%s += attn * " LIGHT_COL";\n", swizzle, LIGHT_COL_PARAMS(lightsName, index, swizzle)); + object.Write("lacc.%s += attn * " LIGHT_COL";\n", swizzle, LIGHT_COL_PARAMS(lightsColName, index, swizzle)); break; case LIGHTDIF_SIGN: case LIGHTDIF_CLAMP: object.Write("lacc.%s += attn * %sdot(ldir, _norm0)) * " LIGHT_COL";\n", swizzle, chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0," :"(", - LIGHT_COL_PARAMS(lightsName, index, swizzle)); + LIGHT_COL_PARAMS(lightsColName, index, swizzle)); break; default: _assert_(0); } @@ -115,7 +115,7 @@ static void GenerateLightShader(T& object, LightingUidData& uid_data, int index, // inColorName is color in vs and colors_ in ps // dest is o.colors_ in vs and colors_ in ps template -static void GenerateLightingShader(T& object, LightingUidData& uid_data, int components, const char* materialsName, const char* lightsName, const char* inColorName, const char* dest) +static void GenerateLightingShader(T& object, LightingUidData& uid_data, int components, const char* materialsName, const char* lightsColName, const char* lightsName, const char* inColorName, const char* dest) { for (unsigned int j = 0; j < xfregs.numChan.numColorChans; j++) { @@ -226,7 +226,7 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com { if (mask & (1<(object, uid_data, i, j, lightsName, 3); + GenerateLightShader(object, uid_data, i, j, lightsColName, lightsName, 3); } } } @@ -236,9 +236,9 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com for (int i = 0; i < 8; ++i) { if (!(mask&(1<(object, uid_data, i, j, lightsName, 1); + GenerateLightShader(object, uid_data, i, j, lightsColName, lightsName, 1); if (!(mask&(1<(object, uid_data, i, j+2, lightsName, 2); + GenerateLightShader(object, uid_data, i, j+2, lightsColName, lightsName, 2); } } else if (color.enablelighting || alpha.enablelighting) @@ -252,7 +252,7 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com for (int i = 0; i < 8; ++i) { if (workingchannel.GetFullLightMask() & (1<(object, uid_data, i, lit_index, lightsName, coloralpha); + GenerateLightShader(object, uid_data, i, lit_index, lightsColName, lightsName, coloralpha); } } object.Write("%s%d = mat * clamp(lacc, 0.0, 1.0);\n", dest, j); diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 50f3a8e9c6..915cfc2176 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -294,7 +294,8 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T DeclareUniform(out, ApiType, C_FOG, "float4", I_FOG"[2]"); // For pixel lighting - TODO: Should only be defined when per pixel lighting is enabled! - DeclareUniform(out, ApiType, C_PLIGHTS, "float4", I_PLIGHTS"[40]"); + DeclareUniform(out, ApiType, C_PLIGHT_COLORS, "int4", I_PLIGHT_COLORS"[8]"); + DeclareUniform(out, ApiType, C_PLIGHTS, "float4", I_PLIGHTS"[32]"); DeclareUniform(out, ApiType, C_PMATERIALS, "float4", I_PMATERIALS"[4]"); if (ApiType == API_OPENGL) @@ -416,10 +417,11 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T "\tfloat3 ldir, h;\n" "\tfloat dist, dist2, attn;\n"); - out.SetConstantsUsed(C_PLIGHTS, C_PLIGHTS+39); // TODO: Can be optimized further + out.SetConstantsUsed(C_PLIGHT_COLORS, C_PLIGHT_COLORS+7); // TODO: Can be optimized further + out.SetConstantsUsed(C_PLIGHTS, C_PLIGHTS+31); // TODO: Can be optimized further out.SetConstantsUsed(C_PMATERIALS, C_PMATERIALS+3); uid_data.components = components; - GenerateLightingShader(out, uid_data.lighting, components, I_PMATERIALS, I_PLIGHTS, "colors_", "colors_"); + GenerateLightingShader(out, uid_data.lighting, components, I_PMATERIALS, I_PLIGHT_COLORS, I_PLIGHTS, "colors_", "colors_"); } out.Write("\tclipPos = float4(rawpos.x, rawpos.y, clipPos.z, clipPos.w);\n"); diff --git a/Source/Core/VideoCommon/PixelShaderGen.h b/Source/Core/VideoCommon/PixelShaderGen.h index ad9bbecddb..972101cd1c 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.h +++ b/Source/Core/VideoCommon/PixelShaderGen.h @@ -9,17 +9,18 @@ #include "VideoCommon/ShaderGenCommon.h" #include "VideoCommon/VideoCommon.h" -#define I_COLORS "color" -#define I_KCOLORS "k" -#define I_ALPHA "alphaRef" -#define I_TEXDIMS "texdim" -#define I_ZBIAS "czbias" -#define I_INDTEXSCALE "cindscale" -#define I_INDTEXMTX "cindmtx" -#define I_FOGCOLOR "cfogcolor" -#define I_FOG "cfog" -#define I_PLIGHTS "cPLights" -#define I_PMATERIALS "cPmtrl" +#define I_COLORS "color" +#define I_KCOLORS "k" +#define I_ALPHA "alphaRef" +#define I_TEXDIMS "texdim" +#define I_ZBIAS "czbias" +#define I_INDTEXSCALE "cindscale" +#define I_INDTEXMTX "cindmtx" +#define I_FOGCOLOR "cfogcolor" +#define I_FOG "cfog" +#define I_PLIGHT_COLORS "cPLightColors" +#define I_PLIGHTS "cPLights" +#define I_PMATERIALS "cPmtrl" // TODO: get rid of them as they aren't used #define C_COLORMATRIX 0 // 0 @@ -33,8 +34,9 @@ #define C_FOGCOLOR (C_INDTEXMTX + 6) //27 #define C_FOG (C_FOGCOLOR + 1) //28 -#define C_PLIGHTS (C_FOG + 3) -#define C_PMATERIALS (C_PLIGHTS + 40) +#define C_PLIGHT_COLORS (C_FOG + 2) +#define C_PLIGHTS (C_PLIGHT_COLORS + 8) +#define C_PMATERIALS (C_PLIGHTS + 32) #define C_PENVCONST_END (C_PMATERIALS + 4) // Different ways to achieve rendering with destination alpha diff --git a/Source/Core/VideoCommon/PixelShaderManager.cpp b/Source/Core/VideoCommon/PixelShaderManager.cpp index b0b1ce7037..b9418b4afd 100644 --- a/Source/Core/VideoCommon/PixelShaderManager.cpp +++ b/Source/Core/VideoCommon/PixelShaderManager.cpp @@ -104,6 +104,7 @@ void PixelShaderManager::SetConstants() { if (nLightsChanged[0] >= 0) { + // TODO: Outdated comment // lights don't have a 1 to 1 mapping, the color component needs to be converted to 4 floats int istart = nLightsChanged[0] / 0x10; int iend = (nLightsChanged[1] + 15) / 0x10; @@ -112,10 +113,10 @@ void PixelShaderManager::SetConstants() for (int i = istart; i < iend; ++i) { u32 color = *(const u32*)(xfmemptr + 3); - constants.plights[5*i][0] = ((color >> 24) & 0xFF) / 255.0f; - constants.plights[5*i][1] = ((color >> 16) & 0xFF) / 255.0f; - constants.plights[5*i][2] = ((color >> 8) & 0xFF) / 255.0f; - constants.plights[5*i][3] = ((color) & 0xFF) / 255.0f; + constants.plight_colors[i][0] = (color >> 24) & 0xFF; + constants.plight_colors[i][1] = (color >> 16) & 0xFF; + constants.plight_colors[i][2] = (color >> 8) & 0xFF; + constants.plight_colors[i][3] = (color) & 0xFF; xfmemptr += 4; for (int j = 0; j < 4; ++j, xfmemptr += 3) @@ -125,11 +126,11 @@ void PixelShaderManager::SetConstants() fabs(xfmemptr[1]) < 0.00001f && fabs(xfmemptr[2]) < 0.00001f) // dist attenuation, make sure not equal to 0!!! - constants.plights[5*i+j+1][0] = 0.00001f; + constants.plights[4*i+j][0] = 0.00001f; else - constants.plights[5*i+j+1][0] = xfmemptr[0]; - constants.plights[5*i+j+1][1] = xfmemptr[1]; - constants.plights[5*i+j+1][2] = xfmemptr[2]; + constants.plights[4*i+j][0] = xfmemptr[0]; + constants.plights[4*i+j][1] = xfmemptr[1]; + constants.plights[4*i+j][2] = xfmemptr[2]; } } dirty = true; diff --git a/Source/Core/VideoCommon/VertexShaderGen.cpp b/Source/Core/VideoCommon/VertexShaderGen.cpp index a4893ce2dc..6310f6ceba 100644 --- a/Source/Core/VideoCommon/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/VertexShaderGen.cpp @@ -88,7 +88,8 @@ static inline void GenerateVertexShader(T& out, u32 components, API_TYPE api_typ DeclareUniform(out, api_type, C_POSNORMALMATRIX, "float4", I_POSNORMALMATRIX"[6]"); DeclareUniform(out, api_type, C_PROJECTION, "float4", I_PROJECTION"[4]"); DeclareUniform(out, api_type, C_MATERIALS, "float4", I_MATERIALS"[4]"); - DeclareUniform(out, api_type, C_LIGHTS, "float4", I_LIGHTS"[40]"); + DeclareUniform(out, api_type, C_LIGHT_COLORS, "int4", I_LIGHT_COLORS"[8]"); + DeclareUniform(out, api_type, C_LIGHTS, "float4", I_LIGHTS"[32]"); DeclareUniform(out, api_type, C_TEXMATRICES, "float4", I_TEXMATRICES"[24]"); DeclareUniform(out, api_type, C_TRANSFORMMATRICES, "float4", I_TRANSFORMMATRICES"[64]"); DeclareUniform(out, api_type, C_NORMALMATRICES, "float4", I_NORMALMATRICES"[32]"); @@ -230,7 +231,7 @@ static inline void GenerateVertexShader(T& out, u32 components, API_TYPE api_typ out.Write("o.colors_0 = float4(1.0, 1.0, 1.0, 1.0);\n"); } - GenerateLightingShader(out, uid_data.lighting, components, I_MATERIALS, I_LIGHTS, "color", "o.colors_"); + GenerateLightingShader(out, uid_data.lighting, components, I_MATERIALS, I_LIGHT_COLORS, I_LIGHTS, "color", "o.colors_"); if (xfregs.numChan.numColorChans < 2) { diff --git a/Source/Core/VideoCommon/VertexShaderGen.h b/Source/Core/VideoCommon/VertexShaderGen.h index e705845a6a..43d5273f4a 100644 --- a/Source/Core/VideoCommon/VertexShaderGen.h +++ b/Source/Core/VideoCommon/VertexShaderGen.h @@ -33,6 +33,7 @@ #define I_POSNORMALMATRIX "cpnmtx" #define I_PROJECTION "cproj" #define I_MATERIALS "cmtrl" +#define I_LIGHT_COLORS "clight_colors" #define I_LIGHTS "clights" #define I_TEXMATRICES "ctexmtx" #define I_TRANSFORMMATRICES "ctrmtx" @@ -44,8 +45,9 @@ #define C_POSNORMALMATRIX 0 #define C_PROJECTION (C_POSNORMALMATRIX + 6) #define C_MATERIALS (C_PROJECTION + 4) -#define C_LIGHTS (C_MATERIALS + 4) -#define C_TEXMATRICES (C_LIGHTS + 40) +#define C_LIGHT_COLORS (C_MATERIALS + 4) +#define C_LIGHTS (C_LIGHT_COLORS + 8) +#define C_TEXMATRICES (C_LIGHTS + 32) #define C_TRANSFORMMATRICES (C_TEXMATRICES + 24) #define C_NORMALMATRICES (C_TRANSFORMMATRICES + 64) #define C_POSTTRANSFORMMATRICES (C_NORMALMATRICES + 32) diff --git a/Source/Core/VideoCommon/VertexShaderManager.cpp b/Source/Core/VideoCommon/VertexShaderManager.cpp index e505fbba37..d1264a68ba 100644 --- a/Source/Core/VideoCommon/VertexShaderManager.cpp +++ b/Source/Core/VideoCommon/VertexShaderManager.cpp @@ -252,6 +252,7 @@ void VertexShaderManager::SetConstants() if (nLightsChanged[0] >= 0) { + // TODO: Outdated comment // lights don't have a 1 to 1 mapping, the color component needs to be converted to 4 floats int istart = nLightsChanged[0] / 0x10; int iend = (nLightsChanged[1] + 15) / 0x10; @@ -260,10 +261,10 @@ void VertexShaderManager::SetConstants() for (int i = istart; i < iend; ++i) { u32 color = *(const u32*)(xfmemptr + 3); - constants.lights[5*i][0] = ((color >> 24) & 0xFF) / 255.0f; - constants.lights[5*i][1] = ((color >> 16) & 0xFF) / 255.0f; - constants.lights[5*i][2] = ((color >> 8) & 0xFF) / 255.0f; - constants.lights[5*i][3] = ((color) & 0xFF) / 255.0f; + constants.light_colors[i][0] = (color >> 24) & 0xFF; + constants.light_colors[i][1] = (color >> 16) & 0xFF; + constants.light_colors[i][2] = (color >> 8) & 0xFF; + constants.light_colors[i][3] = (color) & 0xFF; xfmemptr += 4; for (int j = 0; j < 4; ++j, xfmemptr += 3) @@ -274,12 +275,12 @@ void VertexShaderManager::SetConstants() fabs(xfmemptr[2]) < 0.00001f) { // dist attenuation, make sure not equal to 0!!! - constants.lights[5*i+j+1][0] = 0.00001f; + constants.lights[4*i+j][0] = 0.00001f; } else - constants.lights[5*i+j+1][0] = xfmemptr[0]; - constants.lights[5*i+j+1][1] = xfmemptr[1]; - constants.lights[5*i+j+1][2] = xfmemptr[2]; + constants.lights[4*i+j][0] = xfmemptr[0]; + constants.lights[4*i+j][1] = xfmemptr[1]; + constants.lights[4*i+j][2] = xfmemptr[2]; } } dirty = true; From 78623871f96deff1da5b164772095124af1a2047 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sun, 27 Oct 2013 14:07:13 +0100 Subject: [PATCH 24/45] ShaderGen: Store material uniforms as integers. --- Source/Core/VideoCommon/ConstantManager.h | 4 ++-- Source/Core/VideoCommon/LightingShaderGen.h | 8 ++++---- Source/Core/VideoCommon/PixelShaderGen.cpp | 2 +- Source/Core/VideoCommon/PixelShaderManager.cpp | 8 ++++---- Source/Core/VideoCommon/VertexShaderGen.cpp | 2 +- Source/Core/VideoCommon/VertexShaderManager.cpp | 16 ++++++++-------- 6 files changed, 20 insertions(+), 20 deletions(-) diff --git a/Source/Core/VideoCommon/ConstantManager.h b/Source/Core/VideoCommon/ConstantManager.h index de1f346ed3..149599b1d5 100644 --- a/Source/Core/VideoCommon/ConstantManager.h +++ b/Source/Core/VideoCommon/ConstantManager.h @@ -24,14 +24,14 @@ struct PixelShaderConstants // For pixel lighting int4 plight_colors[8]; float4 plights[32]; - float4 pmaterials[4]; + int4 pmaterials[4]; }; struct VertexShaderConstants { float4 posnormalmatrix[6]; float4 projection[4]; - float4 materials[4]; + int4 materials[4]; int4 light_colors[8]; // 8 lights float4 lights[32]; // 8 lights * 4 parameters float4 texmatrices[24]; diff --git a/Source/Core/VideoCommon/LightingShaderGen.h b/Source/Core/VideoCommon/LightingShaderGen.h index 485d020b2f..b068f46a3e 100644 --- a/Source/Core/VideoCommon/LightingShaderGen.h +++ b/Source/Core/VideoCommon/LightingShaderGen.h @@ -136,7 +136,7 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com } else // from color { - object.Write("mat = %s[%d];\n", materialsName, j+2); + object.Write("mat = float4(%s[%d])/255.0;\n", materialsName, j+2); } uid_data.enablelighting |= xfregs.color[j].enablelighting << j; @@ -157,7 +157,7 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com } else // from color { - object.Write("lacc = %s[%d];\n", materialsName, j); + object.Write("lacc = float4(%s[%d])/255.0;\n", materialsName, j); } } else @@ -179,7 +179,7 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com } else // from color { - object.Write("mat.w = %s[%d].w;\n", materialsName, j+2); + object.Write("mat.w = float(%s[%d].w) / 255.0;\n", materialsName, j+2); } } @@ -199,7 +199,7 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com } else // from color { - object.Write("lacc.w = %s[%d].w;\n", materialsName, j); + object.Write("lacc.w = float(%s[%d].w) / 255.0;\n", materialsName, j); } } else diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 915cfc2176..6c89e88272 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -296,7 +296,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T // For pixel lighting - TODO: Should only be defined when per pixel lighting is enabled! DeclareUniform(out, ApiType, C_PLIGHT_COLORS, "int4", I_PLIGHT_COLORS"[8]"); DeclareUniform(out, ApiType, C_PLIGHTS, "float4", I_PLIGHTS"[32]"); - DeclareUniform(out, ApiType, C_PMATERIALS, "float4", I_PMATERIALS"[4]"); + DeclareUniform(out, ApiType, C_PMATERIALS, "int4", I_PMATERIALS"[4]"); if (ApiType == API_OPENGL) out.Write("};\n"); diff --git a/Source/Core/VideoCommon/PixelShaderManager.cpp b/Source/Core/VideoCommon/PixelShaderManager.cpp index b9418b4afd..8999247f24 100644 --- a/Source/Core/VideoCommon/PixelShaderManager.cpp +++ b/Source/Core/VideoCommon/PixelShaderManager.cpp @@ -326,10 +326,10 @@ void PixelShaderManager::SetMaterialColorChanged(int index, u32 color) { if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) { - constants.pmaterials[index][0] = ((color >> 24) & 0xFF) / 255.0f; - constants.pmaterials[index][1] = ((color >> 16) & 0xFF) / 255.0f; - constants.pmaterials[index][2] = ((color >> 8) & 0xFF) / 255.0f; - constants.pmaterials[index][3] = ( color & 0xFF) / 255.0f; + constants.pmaterials[index][0] = (color >> 24) & 0xFF; + constants.pmaterials[index][1] = (color >> 16) & 0xFF; + constants.pmaterials[index][2] = (color >> 8) & 0xFF; + constants.pmaterials[index][3] = (color) & 0xFF; dirty = true; } } diff --git a/Source/Core/VideoCommon/VertexShaderGen.cpp b/Source/Core/VideoCommon/VertexShaderGen.cpp index 6310f6ceba..1707b15773 100644 --- a/Source/Core/VideoCommon/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/VertexShaderGen.cpp @@ -87,7 +87,7 @@ static inline void GenerateVertexShader(T& out, u32 components, API_TYPE api_typ DeclareUniform(out, api_type, C_POSNORMALMATRIX, "float4", I_POSNORMALMATRIX"[6]"); DeclareUniform(out, api_type, C_PROJECTION, "float4", I_PROJECTION"[4]"); - DeclareUniform(out, api_type, C_MATERIALS, "float4", I_MATERIALS"[4]"); + DeclareUniform(out, api_type, C_MATERIALS, "int4", I_MATERIALS"[4]"); DeclareUniform(out, api_type, C_LIGHT_COLORS, "int4", I_LIGHT_COLORS"[8]"); DeclareUniform(out, api_type, C_LIGHTS, "float4", I_LIGHTS"[32]"); DeclareUniform(out, api_type, C_TEXMATRICES, "float4", I_TEXMATRICES"[24]"); diff --git a/Source/Core/VideoCommon/VertexShaderManager.cpp b/Source/Core/VideoCommon/VertexShaderManager.cpp index d1264a68ba..06fff792e2 100644 --- a/Source/Core/VideoCommon/VertexShaderManager.cpp +++ b/Source/Core/VideoCommon/VertexShaderManager.cpp @@ -295,10 +295,10 @@ void VertexShaderManager::SetConstants() if (nMaterialsChanged & (1 << i)) { u32 data = *(xfregs.ambColor + i); - constants.materials[i][0] = ((data >> 24) & 0xFF) / 255.0f; - constants.materials[i][1] = ((data >> 16) & 0xFF) / 255.0f; - constants.materials[i][2] = ((data >> 8) & 0xFF) / 255.0f; - constants.materials[i][3] = ( data & 0xFF) / 255.0f; + constants.materials[i][0] = (data >> 24) & 0xFF; + constants.materials[i][1] = (data >> 16) & 0xFF; + constants.materials[i][2] = (data >> 8) & 0xFF; + constants.materials[i][3] = data & 0xFF; } } @@ -307,10 +307,10 @@ void VertexShaderManager::SetConstants() if (nMaterialsChanged & (1 << (i + 2))) { u32 data = *(xfregs.matColor + i); - constants.materials[i+2][0] = ((data >> 24) & 0xFF) / 255.0f; - constants.materials[i+2][1] = ((data >> 16) & 0xFF) / 255.0f; - constants.materials[i+2][2] = ((data >> 8) & 0xFF) / 255.0f; - constants.materials[i+2][3] = ( data & 0xFF) / 255.0f; + constants.materials[i+2][0] = (data >> 24) & 0xFF; + constants.materials[i+2][1] = (data >> 16) & 0xFF; + constants.materials[i+2][2] = (data >> 8) & 0xFF; + constants.materials[i+2][3] = data & 0xFF; } } dirty = true; From 387b9bf3c234a7b87a98c2b2066581c4aac13d96 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sun, 27 Oct 2013 14:34:59 +0100 Subject: [PATCH 25/45] LightingShaderGen: Perform some lighting calculations with integers. --- Source/Core/VideoCommon/LightingShaderGen.h | 46 ++++++++++----------- Source/Core/VideoCommon/PixelShaderGen.cpp | 3 +- Source/Core/VideoCommon/VertexShaderGen.cpp | 3 +- 3 files changed, 27 insertions(+), 25 deletions(-) diff --git a/Source/Core/VideoCommon/LightingShaderGen.h b/Source/Core/VideoCommon/LightingShaderGen.h index b068f46a3e..578a56d42a 100644 --- a/Source/Core/VideoCommon/LightingShaderGen.h +++ b/Source/Core/VideoCommon/LightingShaderGen.h @@ -9,7 +9,7 @@ #include "VideoCommon/XFMemory.h" -#define LIGHT_COL "(float4(%s[%d]).%s / 255.0)" +#define LIGHT_COL "%s[%d].%s" #define LIGHT_COL_PARAMS(lightsColName, index, swizzle) (lightsColName), (index), (swizzle) #define LIGHT_COSATT "%s[4*%d]" @@ -42,11 +42,8 @@ template static void GenerateLightShader(T& object, LightingUidData& uid_data, int index, int litchan_index, const char* lightsColName, const char* lightsName, int coloralpha) { const LitChannel& chan = (litchan_index > 1) ? xfregs.alpha[litchan_index-2] : xfregs.color[litchan_index]; - const char* swizzle = "xyzw"; - if (coloralpha == 1) - swizzle = "xyz"; - else if (coloralpha == 2) - swizzle = "w"; + const char* swizzle = (coloralpha == 1) ? "xyz" : (coloralpha == 2) ? "w" : "xyzw"; + const char* swizzle_components = (coloralpha == 1) ? "3" : (coloralpha == 2) ? "" : "4"; uid_data.attnfunc |= chan.attnfunc << (2*litchan_index); uid_data.diffusefunc |= chan.diffusefunc << (2*litchan_index); @@ -61,8 +58,9 @@ static void GenerateLightShader(T& object, LightingUidData& uid_data, int index, case LIGHTDIF_SIGN: case LIGHTDIF_CLAMP: object.Write("ldir = normalize(" LIGHT_POS".xyz - pos.xyz);\n", LIGHT_POS_PARAMS(lightsName, index)); - object.Write("lacc.%s += %sdot(ldir, _norm0)) * " LIGHT_COL";\n", - swizzle, chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0," :"(", LIGHT_COL_PARAMS(lightsColName, index, swizzle)); + object.Write("lacc.%s += int%s(round(%sdot(ldir, _norm0)) * float%s(" LIGHT_COL")));\n", + swizzle, swizzle_components, chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0," :"(", + swizzle_components, LIGHT_COL_PARAMS(lightsColName, index, swizzle)); break; default: _assert_(0); } @@ -94,14 +92,16 @@ static void GenerateLightShader(T& object, LightingUidData& uid_data, int index, switch (chan.diffusefunc) { case LIGHTDIF_NONE: - object.Write("lacc.%s += attn * " LIGHT_COL";\n", swizzle, LIGHT_COL_PARAMS(lightsColName, index, swizzle)); + object.Write("lacc.%s += int%s(round(attn * float%s(" LIGHT_COL")));\n", + swizzle, swizzle_components, + swizzle_components, LIGHT_COL_PARAMS(lightsColName, index, swizzle)); break; case LIGHTDIF_SIGN: case LIGHTDIF_CLAMP: - object.Write("lacc.%s += attn * %sdot(ldir, _norm0)) * " LIGHT_COL";\n", - swizzle, + object.Write("lacc.%s += int%s(round(attn * %sdot(ldir, _norm0)) * float%s(" LIGHT_COL")));\n", + swizzle, swizzle_components, chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0," :"(", - LIGHT_COL_PARAMS(lightsColName, index, swizzle)); + swizzle_components, LIGHT_COL_PARAMS(lightsColName, index, swizzle)); break; default: _assert_(0); } @@ -146,23 +146,23 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com if (color.ambsource) // from vertex { if (components & (VB_HAS_COL0<(object, uid_data, i, lit_index, lightsColName, lightsName, coloralpha); } } - object.Write("%s%d = mat * clamp(lacc, 0.0, 1.0);\n", dest, j); + object.Write("%s%d = mat * float4(clamp(lacc, 0, 255)) / 255.0;\n", dest, j); object.Write("}\n"); } } diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 6c89e88272..c08386dd59 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -413,7 +413,8 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T out.Write("\tfloat3 _norm0 = normalize(Normal.xyz);\n\n"); out.Write("\tfloat3 pos = float3(clipPos.x,clipPos.y,Normal.w);\n"); - out.Write("\tfloat4 mat, lacc;\n" + out.Write("\tfloat4 mat;\n" + "\tint4 lacc;\n" "\tfloat3 ldir, h;\n" "\tfloat dist, dist2, attn;\n"); diff --git a/Source/Core/VideoCommon/VertexShaderGen.cpp b/Source/Core/VideoCommon/VertexShaderGen.cpp index 1707b15773..860f5f36a6 100644 --- a/Source/Core/VideoCommon/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/VertexShaderGen.cpp @@ -218,7 +218,8 @@ static inline void GenerateVertexShader(T& out, u32 components, API_TYPE api_typ out.Write("o.pos = float4(dot(" I_PROJECTION"[0], pos), dot(" I_PROJECTION"[1], pos), dot(" I_PROJECTION"[2], pos), dot(" I_PROJECTION"[3], pos));\n"); - out.Write("float4 mat, lacc;\n" + out.Write("float4 mat;\n" + "int4 lacc;\n" "float3 ldir, h;\n" "float dist, dist2, attn;\n"); From 3e6efdb53ec30fe65173c20ddbf97798f41ef7a7 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sun, 27 Oct 2013 14:43:29 +0100 Subject: [PATCH 26/45] LightingShaderGen: Perform more lighting calculations with integers. --- Source/Core/VideoCommon/LightingShaderGen.h | 18 +++++++++--------- Source/Core/VideoCommon/PixelShaderGen.cpp | 3 +-- Source/Core/VideoCommon/VertexShaderGen.cpp | 3 +-- 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/Source/Core/VideoCommon/LightingShaderGen.h b/Source/Core/VideoCommon/LightingShaderGen.h index 578a56d42a..87b4e3c14e 100644 --- a/Source/Core/VideoCommon/LightingShaderGen.h +++ b/Source/Core/VideoCommon/LightingShaderGen.h @@ -128,15 +128,15 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com if (color.matsource) // from vertex { if (components & (VB_HAS_COL0 << j)) - object.Write("mat = %s%d;\n", inColorName, j); + object.Write("int4 mat = int4(round(%s%d * 255.0));\n", inColorName, j); else if (components & VB_HAS_COL0) - object.Write("mat = %s0;\n", inColorName); + object.Write("int4 mat = int4(round(%s0 * 255.0));\n", inColorName); else - object.Write("mat = float4(1.0, 1.0, 1.0, 1.0);\n"); + object.Write("int4 mat = int4(255, 255, 255, 255);\n"); } else // from color { - object.Write("mat = float4(%s[%d])/255.0;\n", materialsName, j+2); + object.Write("int4 mat = %s[%d];\n", materialsName, j+2); } uid_data.enablelighting |= xfregs.color[j].enablelighting << j; @@ -172,14 +172,14 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com if (alpha.matsource) // from vertex { if (components & (VB_HAS_COL0<(object, uid_data, i, lit_index, lightsColName, lightsName, coloralpha); } } - object.Write("%s%d = mat * float4(clamp(lacc, 0, 255)) / 255.0;\n", dest, j); + object.Write("%s%d = float4(mat * clamp(lacc, 0, 255) / 255) / 255.0;\n", dest, j); object.Write("}\n"); } } diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index c08386dd59..6baf6d0907 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -413,8 +413,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T out.Write("\tfloat3 _norm0 = normalize(Normal.xyz);\n\n"); out.Write("\tfloat3 pos = float3(clipPos.x,clipPos.y,Normal.w);\n"); - out.Write("\tfloat4 mat;\n" - "\tint4 lacc;\n" + out.Write("\tint4 lacc;\n" "\tfloat3 ldir, h;\n" "\tfloat dist, dist2, attn;\n"); diff --git a/Source/Core/VideoCommon/VertexShaderGen.cpp b/Source/Core/VideoCommon/VertexShaderGen.cpp index 860f5f36a6..448b0deb04 100644 --- a/Source/Core/VideoCommon/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/VertexShaderGen.cpp @@ -218,8 +218,7 @@ static inline void GenerateVertexShader(T& out, u32 components, API_TYPE api_typ out.Write("o.pos = float4(dot(" I_PROJECTION"[0], pos), dot(" I_PROJECTION"[1], pos), dot(" I_PROJECTION"[2], pos), dot(" I_PROJECTION"[3], pos));\n"); - out.Write("float4 mat;\n" - "int4 lacc;\n" + out.Write("int4 lacc;\n" "float3 ldir, h;\n" "float dist, dist2, attn;\n"); From 3a6389992e65df1609d3a200807b6a1f4c389808 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 2 Nov 2013 11:42:30 +0100 Subject: [PATCH 27/45] PixelShaderGen: Treat UV coordinates like S17.7 integers (they're still stored as float, though). --- Source/Core/VideoBackends/Software/Tev.cpp | 4 ++-- Source/Core/VideoCommon/PixelShaderGen.cpp | 25 +++++++++++++--------- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/Source/Core/VideoBackends/Software/Tev.cpp b/Source/Core/VideoBackends/Software/Tev.cpp index 49899f642c..3b517175eb 100644 --- a/Source/Core/VideoBackends/Software/Tev.cpp +++ b/Source/Core/VideoBackends/Software/Tev.cpp @@ -528,9 +528,9 @@ void Tev::Indirect(unsigned int stageNum, s32 s, s32 t) return; } - s64 indtevtrans[2] = { 0,0 }; + s32 indtevtrans[2] = { 0,0 }; - // matrix multiply + // matrix multiply - results might overflow, but we don't care since we only use the lower 24 bits of the result. int indmtxid = indirect.mid & 3; if (indmtxid) { diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 6baf6d0907..7475ec6884 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -408,6 +408,10 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T } } + // UV coordinates are stored in 17.7 format.. TODO: Use an integer for this! + for (int i = 0; i < xfregs.numTexGen.numTexGens; ++i) + out.Write("uv%d.xy = uv%d.xy * 128.0;\n", i, i); + if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) { out.Write("\tfloat3 _norm0 = normalize(Normal.xyz);\n\n"); @@ -471,7 +475,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T if (texcoord < numTexgen) { out.SetConstantsUsed(C_INDTEXSCALE+i/2,C_INDTEXSCALE+i/2); - out.Write("\ttempcoord = uv%d.xy * " I_INDTEXSCALE"[%d].%s;\n", texcoord, i/2, (i&1)?"zw":"xy"); + out.Write("\ttempcoord = uv%d.xy / 128.0 * " I_INDTEXSCALE"[%d].%s;\n", texcoord, i/2, (i&1)?"zw":"xy"); } else out.Write("\ttempcoord = float2(0.0, 0.0);\n"); @@ -709,6 +713,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP out.Write("int2 indtevtrans%d = int2(idot(" I_INDTEXMTX"[%d].xyz, iindtevcrd%d), idot(" I_INDTEXMTX"[%d].xyz, iindtevcrd%d)) >> 3;\n", n, mtxidx, n, mtxidx+1, n); + // TODO: should use a shader uid branch for this.. out.Write("if (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = indtevtrans%d >> " I_INDTEXMTX"[%d].w;\n", mtxidx, n, n, mtxidx); out.Write("else indtevtrans%d = indtevtrans%d << (-" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx); } @@ -717,7 +722,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP _assert_(bpmem.tevind[n].mid >= 5); int mtxidx = 2*(bpmem.tevind[n].mid-5); out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx); - out.Write("int2 indtevtrans%d = int2(int2(round(uv%d.xy*255.0)) * iindtevcrd%d.xx) >> 8;\n", n, texcoord, n); + out.Write("int2 indtevtrans%d = int2(int2(round(uv%d.xy)) * iindtevcrd%d.xx) >> 8;\n", n, texcoord, n); out.Write("if (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = indtevtrans%d >> " I_INDTEXMTX"[%d].w;\n", mtxidx, n, n, mtxidx); out.Write("else indtevtrans%d = indtevtrans%d << (-" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx); @@ -727,7 +732,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP _assert_(bpmem.tevind[n].mid >= 9); int mtxidx = 2*(bpmem.tevind[n].mid-9); out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx); - out.Write("int2 indtevtrans%d = int2(int2(round(uv%d.xy*255.0)) * iindtevcrd%d.yy) >> 8;\n", n, texcoord, n); + out.Write("int2 indtevtrans%d = int2(int2(round(uv%d.xy)) * iindtevcrd%d.yy) >> 8;\n", n, texcoord, n); out.Write("if (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = indtevtrans%d >> " I_INDTEXMTX"[%d].w;\n", mtxidx, n, n, mtxidx); out.Write("else indtevtrans%d = indtevtrans%d << (-" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx); @@ -745,23 +750,23 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP // --------- // Wrapping // --------- - const char *tevIndWrapStart[] = {"0", "(256<<7)", "(128<<7)", "(64<<7)", "(32<<7)", "(16<<7)", "1" }; + const char *tevIndWrapStart[] = {"0", "(256<<7)", "(128<<7)", "(64<<7)", "(32<<7)", "(16<<7)", "1" }; // TODO: Should the last one be 1 or (1<<7)? // wrap S if (bpmem.tevind[n].sw == ITW_OFF) - out.Write("wrappedcoord.x = int(round(uv%d.x*256.0));\n", texcoord); + out.Write("wrappedcoord.x = int(round(uv%d.x));\n", texcoord); else if (bpmem.tevind[n].sw == ITW_0) out.Write("wrappedcoord.x = 0;\n"); else - out.Write("wrappedcoord.x = int(round(uv%d.x*256.0)) %% %s;\n", texcoord, tevIndWrapStart[bpmem.tevind[n].sw]); + out.Write("wrappedcoord.x = int(round(uv%d.x)) %% %s;\n", texcoord, tevIndWrapStart[bpmem.tevind[n].sw]); // wrap T if (bpmem.tevind[n].tw == ITW_OFF) - out.Write("wrappedcoord.y = int(round(uv%d.y*256.0));\n", texcoord); + out.Write("wrappedcoord.y = int(round(uv%d.y));\n", texcoord); else if (bpmem.tevind[n].tw == ITW_0) out.Write("wrappedcoord.y = 0;\n"); else - out.Write("wrappedcoord.y = int(round(uv%d.y*256.0)) %% %s;\n", texcoord, tevIndWrapStart[bpmem.tevind[n].tw]); + out.Write("wrappedcoord.y = int(round(uv%d.y)) %% %s;\n", texcoord, tevIndWrapStart[bpmem.tevind[n].tw]); if (bpmem.tevind[n].fb_addprev) // add previous tevcoord out.Write("tevcoord.xy += wrappedcoord + indtevtrans%d;\n", n); @@ -805,7 +810,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP { // calc tevcord if (bHasTexCoord) - out.Write("tevcoord.xy = int2(round(uv%d.xy*256.0));\n", texcoord); + out.Write("tevcoord.xy = int2(round(uv%d.xy));\n", texcoord); else out.Write("tevcoord.xy = int2(0, 0);\n"); } @@ -823,7 +828,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP uid_data.SetTevindrefTexmap(i, texmap); out.Write("itextemp = "); - SampleTexture(out, "(float2(tevcoord.xy)/256.0)", texswap, texmap, ApiType); + SampleTexture(out, "(float2(tevcoord.xy)/128.0)", texswap, texmap, ApiType); } else { From 605b687af8a46cfb32e1a9599d3ddb47d2cdf02c Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 23 Nov 2013 20:58:24 +0100 Subject: [PATCH 28/45] PixelShaderGen: Use integer math for z textures. --- Source/Core/VideoCommon/ConstantManager.h | 2 +- Source/Core/VideoCommon/PixelShaderGen.cpp | 24 ++++++++----------- .../Core/VideoCommon/PixelShaderManager.cpp | 18 +++++++------- 3 files changed, 20 insertions(+), 24 deletions(-) diff --git a/Source/Core/VideoCommon/ConstantManager.h b/Source/Core/VideoCommon/ConstantManager.h index 149599b1d5..1f42b20299 100644 --- a/Source/Core/VideoCommon/ConstantManager.h +++ b/Source/Core/VideoCommon/ConstantManager.h @@ -15,7 +15,7 @@ struct PixelShaderConstants int4 kcolors[4]; int4 alpha; float4 texdims[8]; - float4 zbias[2]; + int4 zbias[2]; float4 indtexscale[2]; int4 indtexmtx[6]; int4 fogcolor; diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 7475ec6884..e8820e0b9d 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -285,9 +285,9 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T DeclareUniform(out, ApiType, C_COLORS, "int4", I_COLORS"[4]"); DeclareUniform(out, ApiType, C_KCOLORS, "int4", I_KCOLORS"[4]"); - DeclareUniform(out, ApiType, C_ALPHA, "int4", I_ALPHA"[1]"); // TODO: Why is this an array...-.- + DeclareUniform(out, ApiType, C_ALPHA, "int4", I_ALPHA); DeclareUniform(out, ApiType, C_TEXDIMS, "float4", I_TEXDIMS"[8]"); - DeclareUniform(out, ApiType, C_ZBIAS, "float4", I_ZBIAS"[2]"); + DeclareUniform(out, ApiType, C_ZBIAS, "int4", I_ZBIAS"[2]"); DeclareUniform(out, ApiType, C_INDTEXSCALE, "float4", I_INDTEXSCALE"[2]"); DeclareUniform(out, ApiType, C_INDTEXMTX, "int4", I_INDTEXMTX"[6]"); DeclareUniform(out, ApiType, C_FOGCOLOR, "int4", I_FOGCOLOR); @@ -534,12 +534,12 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T // The performance impact of this additional calculation doesn't matter, but it prevents // the host GPU driver from performing any early depth test optimizations. if (g_ActiveConfig.bFastDepthCalc) - out.Write("float zCoord = rawpos.z;\n"); + out.Write("int zCoord = int(round(rawpos.z * 16777215.0));\n"); else { out.SetConstantsUsed(C_ZBIAS+1, C_ZBIAS+1); // the screen space depth value = far z + (clip z / clip w) * z range - out.Write("float zCoord = " I_ZBIAS"[1].x + (clipPos.z / clipPos.w) * " I_ZBIAS"[1].y;\n"); + out.Write("int zCoord = " I_ZBIAS"[1].x + int(round((clipPos.z / clipPos.w) * float(" I_ZBIAS"[1].y)));\n"); } // depth texture can safely be ignored if the result won't be written to the depth buffer (early_ztest) and isn't used for fog either @@ -554,7 +554,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T // Note: z-textures are not written to depth buffer if early depth test is used if (per_pixel_depth && bpmem.UseEarlyDepthTest()) - out.Write("depth = zCoord;\n"); + out.Write("depth = float(zCoord) / 16777215.0;\n"); // Note: depth texture output is only written to depth buffer if late depth test is used // theoretical final depth value is used for fog calculation, though, so we have to emulate ztextures anyway @@ -562,17 +562,13 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T { // use the texture input of the last texture stage (itextemp), hopefully this has been read and is in correct format... out.SetConstantsUsed(C_ZBIAS, C_ZBIAS+1); - out.Write("zCoord = dot(" I_ZBIAS"[0].xyzw, float4(itextemp.xyzw)/255.0) + " I_ZBIAS"[1].w %s;\n", + out.Write("zCoord = idot(" I_ZBIAS"[0].xyzw, itextemp.xyzw) + " I_ZBIAS"[1].w %s;\n", (bpmem.ztex2.op == ZTEXTURE_ADD) ? "+ zCoord" : ""); - - // U24 overflow emulation - out.Write("zCoord = zCoord * (16777215.0/16777216.0);\n"); - out.Write("zCoord = zCoord - 2.0*round(0.5*zCoord);\n"); - out.Write("zCoord = zCoord * (16777216.0/16777215.0);\n"); + out.Write("zCoord = zCoord & 16777215;\n"); } if (per_pixel_depth && bpmem.UseLateDepthTest()) - out.Write("depth = zCoord;\n"); + out.Write("depth = float(zCoord) / 16777215.0;\n"); if (dstAlphaMode == DSTALPHA_ALPHA_PASS) { @@ -1058,13 +1054,13 @@ static inline void WriteFog(T& out, pixel_shader_uid_data& uid_data) { // perspective // ze = A/(B - (Zs >> B_SHF) - out.Write("\tfloat ze = " I_FOG"[0].x / (" I_FOG"[0].y - (zCoord / " I_FOG"[0].w));\n"); + out.Write("\tfloat ze = " I_FOG"[0].x / (" I_FOG"[0].y - (float(zCoord) / 16777215.0 / " I_FOG"[0].w));\n"); } else { // orthographic // ze = a*Zs (here, no B_SHF) - out.Write("\tfloat ze = " I_FOG"[0].x * zCoord;\n"); + out.Write("\tfloat ze = " I_FOG"[0].x * float(zCoord) / 16777215.0;\n"); } // x_adjust = sqrt((x-center)^2 + k^2)/k diff --git a/Source/Core/VideoCommon/PixelShaderManager.cpp b/Source/Core/VideoCommon/PixelShaderManager.cpp index 8999247f24..79ff607b82 100644 --- a/Source/Core/VideoCommon/PixelShaderManager.cpp +++ b/Source/Core/VideoCommon/PixelShaderManager.cpp @@ -141,8 +141,8 @@ void PixelShaderManager::SetConstants() if (s_bViewPortChanged) { - constants.zbias[1][0] = xfregs.viewport.farZ / 16777216.0f; - constants.zbias[1][1] = xfregs.viewport.zRange / 16777216.0f; + constants.zbias[1][0] = xfregs.viewport.farZ; + constants.zbias[1][1] = xfregs.viewport.zRange; dirty = true; s_bViewPortChanged = false; } @@ -190,7 +190,7 @@ void PixelShaderManager::SetTexDims(int texmapid, u32 width, u32 height, u32 wra void PixelShaderManager::SetZTextureBias() { - constants.zbias[1][3] = bpmem.ztex1.bias/16777215.0f; + constants.zbias[1][3] = bpmem.ztex1.bias; dirty = true; } @@ -242,18 +242,18 @@ void PixelShaderManager::SetZTextureTypeChanged() constants.zbias[0][0] = 0; constants.zbias[0][1] = 0; constants.zbias[0][2] = 0; - constants.zbias[0][3] = 255.0f/16777215.0f; + constants.zbias[0][3] = 1; break; case TEV_ZTEX_TYPE_U16: - constants.zbias[0][0] = 255.0f/16777215.0f; + constants.zbias[0][0] = 1; constants.zbias[0][1] = 0; constants.zbias[0][2] = 0; - constants.zbias[0][3] = 65280.0f/16777215.0f; + constants.zbias[0][3] = 256; break; case TEV_ZTEX_TYPE_U24: - constants.zbias[0][0] = 16711680.0f/16777215.0f; - constants.zbias[0][1] = 65280.0f/16777215.0f; - constants.zbias[0][2] = 255.0f/16777215.0f; + constants.zbias[0][0] = 65536; + constants.zbias[0][1] = 256; + constants.zbias[0][2] = 1; constants.zbias[0][3] = 0; break; default: From 065919f599b86da9464a9f6a792e56ca87661540 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Mon, 16 Dec 2013 13:08:09 +0100 Subject: [PATCH 29/45] PixelShaderGen: Perform some of the fog calculations with integers. --- Source/Core/VideoCommon/ConstantManager.h | 3 +- Source/Core/VideoCommon/PixelShaderGen.cpp | 22 +++++++++------ Source/Core/VideoCommon/PixelShaderGen.h | 8 ++++-- .../Core/VideoCommon/PixelShaderManager.cpp | 28 +++++++++---------- 4 files changed, 34 insertions(+), 27 deletions(-) diff --git a/Source/Core/VideoCommon/ConstantManager.h b/Source/Core/VideoCommon/ConstantManager.h index 1f42b20299..da3d19962f 100644 --- a/Source/Core/VideoCommon/ConstantManager.h +++ b/Source/Core/VideoCommon/ConstantManager.h @@ -19,7 +19,8 @@ struct PixelShaderConstants float4 indtexscale[2]; int4 indtexmtx[6]; int4 fogcolor; - float4 fog[2]; + int4 fogi[1]; + float4 fogf[2]; // For pixel lighting int4 plight_colors[8]; diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index e8820e0b9d..899f01143a 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -291,7 +291,8 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T DeclareUniform(out, ApiType, C_INDTEXSCALE, "float4", I_INDTEXSCALE"[2]"); DeclareUniform(out, ApiType, C_INDTEXMTX, "int4", I_INDTEXMTX"[6]"); DeclareUniform(out, ApiType, C_FOGCOLOR, "int4", I_FOGCOLOR); - DeclareUniform(out, ApiType, C_FOG, "float4", I_FOG"[2]"); + DeclareUniform(out, ApiType, C_FOGI, "int4", I_FOGI"[1]"); + DeclareUniform(out, ApiType, C_FOGF, "float4", I_FOGF"[2]"); // For pixel lighting - TODO: Should only be defined when per pixel lighting is enabled! DeclareUniform(out, ApiType, C_PLIGHT_COLORS, "int4", I_PLIGHT_COLORS"[8]"); @@ -1049,33 +1050,36 @@ static inline void WriteFog(T& out, pixel_shader_uid_data& uid_data) uid_data.fog_proj = bpmem.fog.c_proj_fsel.proj; out.SetConstantsUsed(C_FOGCOLOR, C_FOGCOLOR); - out.SetConstantsUsed(C_FOG, C_FOG); + out.SetConstantsUsed(C_FOGI, C_FOGI); + out.SetConstantsUsed(C_FOGF, C_FOGF+1); if (bpmem.fog.c_proj_fsel.proj == 0) { // perspective // ze = A/(B - (Zs >> B_SHF) - out.Write("\tfloat ze = " I_FOG"[0].x / (" I_FOG"[0].y - (float(zCoord) / 16777215.0 / " I_FOG"[0].w));\n"); + // TODO: Verify that we want to drop lower bits here! (currently taken over from software renderer) + out.Write("\tfloat ze = (" I_FOGF"[1].x * 16777215.0) / float(" I_FOGI"[0].y - (zCoord >> " I_FOGI"[0].w));\n"); } else { // orthographic // ze = a*Zs (here, no B_SHF) - out.Write("\tfloat ze = " I_FOG"[0].x * float(zCoord) / 16777215.0;\n"); + out.Write("\tfloat ze = " I_FOGF"[1].x * float(zCoord) / 16777215.0;\n"); } // x_adjust = sqrt((x-center)^2 + k^2)/k // ze *= x_adjust - // this is completely theoretical as the real hardware seems to use a table intead of calculating the values. + // TODO Instead of this theoretical calculation, we should use the + // coefficient table given in the fog range BP registers! uid_data.fog_RangeBaseEnabled = bpmem.fogRange.Base.Enabled; if (bpmem.fogRange.Base.Enabled) { - out.SetConstantsUsed(C_FOG+1, C_FOG+1); - out.Write("\tfloat x_adjust = (2.0 * (clipPos.x / " I_FOG"[1].y)) - 1.0 - " I_FOG"[1].x;\n"); - out.Write("\tx_adjust = sqrt(x_adjust * x_adjust + " I_FOG"[1].z * " I_FOG"[1].z) / " I_FOG"[1].z;\n"); + out.SetConstantsUsed(C_FOGF, C_FOGF); + out.Write("\tfloat x_adjust = (2.0 * (clipPos.x / " I_FOGF"[0].y)) - 1.0 - " I_FOGF"[0].x;\n"); + out.Write("\tx_adjust = sqrt(x_adjust * x_adjust + " I_FOGF"[0].z * " I_FOGF"[0].z) / " I_FOGF"[0].z;\n"); out.Write("\tze *= x_adjust;\n"); } - out.Write("\tfloat fog = clamp(ze - " I_FOG"[0].z, 0.0, 1.0);\n"); + out.Write("\tfloat fog = clamp(ze - " I_FOGF"[1].z, 0.0, 1.0);\n"); if (bpmem.fog.c_proj_fsel.fsel > 3) { diff --git a/Source/Core/VideoCommon/PixelShaderGen.h b/Source/Core/VideoCommon/PixelShaderGen.h index 972101cd1c..135e4928f1 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.h +++ b/Source/Core/VideoCommon/PixelShaderGen.h @@ -17,7 +17,8 @@ #define I_INDTEXSCALE "cindscale" #define I_INDTEXMTX "cindmtx" #define I_FOGCOLOR "cfogcolor" -#define I_FOG "cfog" +#define I_FOGI "cfogi" +#define I_FOGF "cfogf" #define I_PLIGHT_COLORS "cPLightColors" #define I_PLIGHTS "cPLights" #define I_PMATERIALS "cPmtrl" @@ -32,9 +33,10 @@ #define C_INDTEXSCALE (C_ZBIAS + 2) //19 #define C_INDTEXMTX (C_INDTEXSCALE + 2) //21 #define C_FOGCOLOR (C_INDTEXMTX + 6) //27 -#define C_FOG (C_FOGCOLOR + 1) //28 +#define C_FOGI (C_FOGCOLOR + 1) //28 +#define C_FOGF (C_FOGI + 1) //29 -#define C_PLIGHT_COLORS (C_FOG + 2) +#define C_PLIGHT_COLORS (C_FOGF + 2) #define C_PLIGHTS (C_PLIGHT_COLORS + 8) #define C_PMATERIALS (C_PLIGHTS + 32) #define C_PENVCONST_END (C_PMATERIALS + 4) diff --git a/Source/Core/VideoCommon/PixelShaderManager.cpp b/Source/Core/VideoCommon/PixelShaderManager.cpp index 79ff607b82..91d74edf4a 100644 --- a/Source/Core/VideoCommon/PixelShaderManager.cpp +++ b/Source/Core/VideoCommon/PixelShaderManager.cpp @@ -85,15 +85,15 @@ void PixelShaderManager::SetConstants() // they are the coefficients from the center to the border of the screen // so to simplify I use the hi coefficient as K in the shader taking 256 as the scale // TODO: Shouldn't this be EFBToScaledXf? - constants.fog[1][0] = ScreenSpaceCenter; - constants.fog[1][1] = (float)Renderer::EFBToScaledX((int)(2.0f * xfregs.viewport.wd)); - constants.fog[1][2] = bpmem.fogRange.K[4].HI / 256.0f; + constants.fogf[0][0] = ScreenSpaceCenter; + constants.fogf[0][1] = (float)Renderer::EFBToScaledX((int)(2.0f * xfregs.viewport.wd)); + constants.fogf[0][2] = bpmem.fogRange.K[4].HI / 256.0f; } else { - constants.fog[1][0] = 0; - constants.fog[1][1] = 1; - constants.fog[1][2] = 1; + constants.fogf[0][0] = 0; + constants.fogf[0][1] = 1; + constants.fogf[0][2] = 1; } dirty = true; @@ -282,17 +282,17 @@ void PixelShaderManager::SetFogParamChanged() { if (!g_ActiveConfig.bDisableFog) { - constants.fog[0][0] = bpmem.fog.a.GetA(); - constants.fog[0][1] = (float)bpmem.fog.b_magnitude / 0xFFFFFF; - constants.fog[0][2] = bpmem.fog.c_proj_fsel.GetC(); - constants.fog[0][3] = (float)(1 << bpmem.fog.b_shift); + constants.fogf[1][0] = bpmem.fog.a.GetA(); + constants.fogi[0][1] = bpmem.fog.b_magnitude; + constants.fogf[1][2] = bpmem.fog.c_proj_fsel.GetC(); + constants.fogi[0][3] = bpmem.fog.b_shift; } else { - constants.fog[0][0] = 0; - constants.fog[0][1] = 1; - constants.fog[0][2] = 0; - constants.fog[0][3] = 1; + constants.fogf[1][0] = 0.f; + constants.fogi[0][1] = 1; + constants.fogf[1][2] = 0.f; + constants.fogi[0][3] = 1; } dirty = true; } From 50526ae50aff4caa6453ba05f6ce1e2da967400d Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Wed, 5 Feb 2014 13:26:01 +0100 Subject: [PATCH 30/45] PixelShaderGen: Remove some dead code. --- Source/Core/VideoCommon/PixelShaderGen.cpp | 98 ++++++---------------- 1 file changed, 25 insertions(+), 73 deletions(-) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 899f01143a..55b688a998 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -138,24 +138,6 @@ static const char *tevCInputTable[] = // CC "int3(127,127,127)", // HALF "ikonsttemp.rgb", // KONST "int3(0,0,0)", // ZERO - ///added extra values to map clamped values - "icprev.rgb", // CPREV, - "icprev.aaa", // APREV, - "icc0.rgb", // C0, - "icc0.aaa", // A0, - "icc1.rgb", // C1, - "icc1.aaa", // A1, - "icc2.rgb", // C2, - "icc2.aaa", // A2, - "itextemp.rgb", // TEXC, - "itextemp.aaa", // TEXA, - "icrastemp.rgb", // RASC, - "icrastemp.aaa", // RASA, - "int3(255,255,255)", // ONE - "int3(127,127,127)", // HALF - "ickonsttemp.rgb", // KONST - "int3(0,0,0)", // ZERO - "PADERROR1", "PADERROR2", "PADERROR3", "PADERROR4" }; static const char *tevAInputTable[] = // CA @@ -168,17 +150,6 @@ static const char *tevAInputTable[] = // CA "irastemp", // RASA, "ikonsttemp", // KONST, (hw1 had quarter) "int4(0,0,0,0)", // ZERO - ///added extra values to map clamped values - "icprev", // APREV, - "icc0", // A0, - "icc1", // A1, - "icc2", // A2, - "itextemp", // TEXA, - "icrastemp", // RASA, - "ickonsttemp", // KONST, (hw1 had quarter) - "int4(0,0,0,0)", // ZERO - "PADERROR5", "PADERROR6", "PADERROR7", "PADERROR8", - "PADERROR9", "PADERROR10", "PADERROR11", "PADERROR12", }; static const char *tevRasTable[] = @@ -193,8 +164,8 @@ static const char *tevRasTable[] = "int4(0, 0, 0, 0)", // zero }; -static const char *tevCOutputTable[] = { "iprev.rgb", "ic0.rgb", "ic1.rgb", "ic2.rgb", "icprev.rgb", "icc0.rgb", "icc1.rgb", "icc2.rgb", }; -static const char *tevAOutputTable[] = { "iprev.a", "ic0.a", "ic1.a", "ic2.a", "icprev.a", "icc0.a", "icc1.a", "icc2.a" }; +static const char *tevCOutputTable[] = { "iprev.rgb", "ic0.rgb", "ic1.rgb", "ic2.rgb" }; +static const char *tevAOutputTable[] = { "iprev.a", "ic0.a", "ic1.a", "ic2.a" }; static char text[16384]; @@ -381,10 +352,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T " int3 comp16 = int3(1, 256, 0), comp24 = int3(1, 256, 256*256);\n" " int alphabump=0;\n" " int3 tevcoord=int3(0, 0, 0);\n" - " int2 wrappedcoord=int2(0,0); float2 tempcoord=float2(0.0,0.0);\n" - " int4 icc0=int4(0, 0, 0, 0), icc1=int4(0, 0, 0, 0);\n" - " int4 icc2=int4(0, 0, 0, 0), icprev=int4(0, 0, 0, 0);\n" - " int4 icrastemp = int4(0, 0, 0, 0), ickonsttemp = int4(0, 0, 0, 0);\n\n"); + " int2 wrappedcoord=int2(0,0); float2 tempcoord=float2(0.0,0.0);\n\n"); if (ApiType == API_OPENGL) { @@ -610,45 +578,29 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T //table with the color compare operations -static const char *TEVCMPColorOPTable[16] = +static const char *TEVCMPColorOPTable[] = { - "float3(0.0, 0.0, 0.0)",//0 - "float3(0.0, 0.0, 0.0)",//1 - "float3(0.0, 0.0, 0.0)",//2 - "float3(0.0, 0.0, 0.0)",//3 - "float3(0.0, 0.0, 0.0)",//4 - "float3(0.0, 0.0, 0.0)",//5 - "float3(0.0, 0.0, 0.0)",//6 - "float3(0.0, 0.0, 0.0)",//7 - " %s + (((%s.r&255) > %s.r) ? (%s&255): int3(0,0,0))",//#define TEVCMP_R8_GT 8 - " %s + (((%s.r&255) == %s.r) ? (%s&255): int3(0,0,0))",//#define TEVCMP_R8_EQ 9 - " %s + ((idot((%s.rgb&255), comp16) > idot((%s.rgb&255), comp16)) ? (%s&255): int3(0,0,0))",//#define TEVCMP_GR16_GT 10 - " %s + ((idot((%s.rgb&255), comp16) == idot((%s.rgb&255), comp16)) ? (%s&255): int3(0,0,0))",//#define TEVCMP_GR16_EQ 11 - " %s + ((idot((%s.rgb&255), comp24) > idot((%s.rgb&255), comp24)) ? (%s&255): int3(0,0,0))",//#define TEVCMP_BGR24_GT 12 - " %s + ((idot((%s.rgb&255), comp24) == idot((%s.rgb&255), comp24)) ? (%s&255): int3(0,0,0))",//#define TEVCMP_BGR24_EQ 13 - " %s + int3(max(sign(int3((%s.rgb&255)) - int3((%s.rgb&255))), int3(0,0,0)) * (%s&255))",//#define TEVCMP_RGB8_GT 14 - " %s + int3((int3(255,255,255) - max(sign(abs(int3((%s.rgb&255)) - int3((%s.rgb&255)))), int3(0,0,0))) * (%s&255))"//#define TEVCMP_RGB8_EQ 15 + " %s + (((%s.r&255) > %s.r) ? (%s&255): int3(0,0,0))", // TEVCMP_R8_GT + " %s + (((%s.r&255) == %s.r) ? (%s&255): int3(0,0,0))", // TEVCMP_R8_EQ + " %s + ((idot((%s.rgb&255), comp16) > idot((%s.rgb&255), comp16)) ? (%s&255): int3(0,0,0))", // TEVCMP_GR16_GT + " %s + ((idot((%s.rgb&255), comp16) == idot((%s.rgb&255), comp16)) ? (%s&255): int3(0,0,0))", // TEVCMP_GR16_EQ + " %s + ((idot((%s.rgb&255), comp24) > idot((%s.rgb&255), comp24)) ? (%s&255): int3(0,0,0))", // TEVCMP_BGR24_GT + " %s + ((idot((%s.rgb&255), comp24) == idot((%s.rgb&255), comp24)) ? (%s&255): int3(0,0,0))", // TEVCMP_BGR24_EQ + " %s + int3(max(sign(int3((%s.rgb&255)) - int3((%s.rgb&255))), int3(0,0,0)) * (%s&255))", // TEVCMP_RGB8_GT + " %s + int3((int3(255,255,255) - max(sign(abs(int3((%s.rgb&255)) - int3((%s.rgb&255)))), int3(0,0,0))) * (%s&255))" // TEVCMP_RGB8_EQ }; //table with the alpha compare operations -static const char *TEVCMPAlphaOPTable[16] = +static const char *TEVCMPAlphaOPTable[] = { - "0.0",//0 - "0.0",//1 - "0.0",//2 - "0.0",//3 - "0.0",//4 - "0.0",//5 - "0.0",//6 - "0.0",//7 - " %s.a + (((%s.r&255) > (%s.r&255)) ? (%s.a&255) : 0)",//#define TEVCMP_R8_GT 8 - " %s.a + (((%s.r&255) == (%s.r&255)) ? (%s.a&255) : 0)",//#define TEVCMP_R8_EQ 9 - " %s.a + ((idot((%s.rgb&255), comp16) > idot((%s.rgb&255), comp16)) ? (%s.a&255) : 0)",//#define TEVCMP_GR16_GT 10 - " %s.a + ((idot((%s.rgb&255), comp16) == idot((%s.rgb&255), comp16)) ? (%s.a&255) : 0)",//#define TEVCMP_GR16_EQ 11 - " %s.a + ((idot((%s.rgb&255), comp24) > idot((%s.rgb&255), comp24)) ? (%s.a&255) : 0)",//#define TEVCMP_BGR24_GT 12 - " %s.a + ((idot((%s.rgb&255), comp24) == idot((%s.rgb&255), comp24)) ? (%s.a&255) : 0)",//#define TEVCMP_BGR24_EQ 13 - " %s.a + (((%s.a&255) > (%s.a&255)) ? (%s.a&255) : 0)",//#define TEVCMP_A8_GT 14 - " %s.a + (((%s.a&255) == (%s.a&255)) ? (%s.a&255) : 0)" //#define TEVCMP_A8_EQ 15 + " %s.a + (((%s.r&255) > (%s.r&255)) ? (%s.a&255) : 0)", // TEVCMP_R8_GT + " %s.a + (((%s.r&255) == (%s.r&255)) ? (%s.a&255) : 0)", // TEVCMP_R8_EQ + " %s.a + ((idot((%s.rgb&255), comp16) > idot((%s.rgb&255), comp16)) ? (%s.a&255) : 0)", // TEVCMP_GR16_GT + " %s.a + ((idot((%s.rgb&255), comp16) == idot((%s.rgb&255), comp16)) ? (%s.a&255) : 0)", // TEVCMP_GR16_EQ + " %s.a + ((idot((%s.rgb&255), comp24) > idot((%s.rgb&255), comp24)) ? (%s.a&255) : 0)", // TEVCMP_BGR24_GT + " %s.a + ((idot((%s.rgb&255), comp24) == idot((%s.rgb&255), comp24)) ? (%s.a&255) : 0)", // TEVCMP_BGR24_EQ + " %s.a + (((%s.a&255) > (%s.a&255)) ? (%s.a&255) : 0)", // TEVCMP_A8_GT + " %s.a + (((%s.a&255) == (%s.a&255)) ? (%s.a&255) : 0)" // TEVCMP_A8_EQ }; template @@ -710,7 +662,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP out.Write("int2 indtevtrans%d = int2(idot(" I_INDTEXMTX"[%d].xyz, iindtevcrd%d), idot(" I_INDTEXMTX"[%d].xyz, iindtevcrd%d)) >> 3;\n", n, mtxidx, n, mtxidx+1, n); - // TODO: should use a shader uid branch for this.. + // TODO: should use a shader uid branch for this for better performance out.Write("if (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = indtevtrans%d >> " I_INDTEXMTX"[%d].w;\n", mtxidx, n, n, mtxidx); out.Write("else indtevtrans%d = indtevtrans%d << (-" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx); } @@ -887,7 +839,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP } else { - int cmp = (cc.shift<<1)|cc.op|8; // comparemode stored here + int cmp = (cc.shift<<1)|cc.op; // comparemode stored here out.Write(TEVCMPColorOPTable[cmp],//lookup the function from the op table tevCInputTable[cc.d], tevCInputTable[cc.a], @@ -923,7 +875,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP else { //compare alpha combiner goes here - int cmp = (ac.shift<<1)|ac.op|8; // comparemode stored here + int cmp = (ac.shift<<1)|ac.op; // comparemode stored here out.Write(TEVCMPAlphaOPTable[cmp], tevAInputTable[ac.d], tevAInputTable[ac.a], @@ -991,7 +943,7 @@ static inline void WriteAlphaTest(T& out, pixel_shader_uid_data& uid_data, API_T int compindex = bpmem.alpha_test.comp0; out.Write(tevAlphaFuncsTable[compindex], alphaRef[0]); - out.Write("%s", tevAlphaFunclogicTable[bpmem.alpha_test.logic]);//lookup the logic op + out.Write("%s", tevAlphaFunclogicTable[bpmem.alpha_test.logic]); // lookup the logic op // Lookup the second component from the alpha function table compindex = bpmem.alpha_test.comp1; From 16109fb453589a06b7aef0306c66e1d0ee02a874 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Wed, 29 Jan 2014 15:28:22 +0100 Subject: [PATCH 31/45] PixelShaderGen: Treat UV coordinates as actual integers. --- Source/Core/VideoCommon/PixelShaderGen.cpp | 25 ++++++++++------------ 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 55b688a998..22ffa87eb5 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -377,10 +377,6 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T } } - // UV coordinates are stored in 17.7 format.. TODO: Use an integer for this! - for (int i = 0; i < xfregs.numTexGen.numTexGens; ++i) - out.Write("uv%d.xy = uv%d.xy * 128.0;\n", i, i); - if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) { out.Write("\tfloat3 _norm0 = normalize(Normal.xyz);\n\n"); @@ -402,7 +398,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T // HACK to handle cases where the tex gen is not enabled if (numTexgen == 0) { - out.Write("\tfloat3 uv0 = float3(0.0, 0.0, 0.0);\n"); + out.Write("\tint2 fixpoint_uv0 = int2(0, 0);\n"); } else { @@ -417,7 +413,8 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T out.Write("\t\tuv%d.xy = uv%d.xy / uv%d.z;\n", i, i, i); } - out.Write("uv%d.xy = uv%d.xy * " I_TEXDIMS"[%d].zw;\n", i, i, i); + out.Write("int2 fixpoint_uv%d = int2(round(uv%d.xy * " I_TEXDIMS"[%d].zw * 128.0));\n", i, i, i); + // TODO: S24 overflows here? } } @@ -444,7 +441,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T if (texcoord < numTexgen) { out.SetConstantsUsed(C_INDTEXSCALE+i/2,C_INDTEXSCALE+i/2); - out.Write("\ttempcoord = uv%d.xy / 128.0 * " I_INDTEXSCALE"[%d].%s;\n", texcoord, i/2, (i&1)?"zw":"xy"); + out.Write("\ttempcoord = round(float2(fixpoint_uv%d.xy) * " I_INDTEXSCALE"[%d].%s) / 128.0;\n", texcoord, i / 2, (i & 1) ? "zw" : "xy"); // TODO: Make indtexscale an integer } else out.Write("\ttempcoord = float2(0.0, 0.0);\n"); @@ -671,7 +668,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP _assert_(bpmem.tevind[n].mid >= 5); int mtxidx = 2*(bpmem.tevind[n].mid-5); out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx); - out.Write("int2 indtevtrans%d = int2(int2(round(uv%d.xy)) * iindtevcrd%d.xx) >> 8;\n", n, texcoord, n); + out.Write("int2 indtevtrans%d = int2(fixpoint_uv%d * iindtevcrd%d.xx) >> 8;\n", n, texcoord, n); out.Write("if (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = indtevtrans%d >> " I_INDTEXMTX"[%d].w;\n", mtxidx, n, n, mtxidx); out.Write("else indtevtrans%d = indtevtrans%d << (-" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx); @@ -681,7 +678,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP _assert_(bpmem.tevind[n].mid >= 9); int mtxidx = 2*(bpmem.tevind[n].mid-9); out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx); - out.Write("int2 indtevtrans%d = int2(int2(round(uv%d.xy)) * iindtevcrd%d.yy) >> 8;\n", n, texcoord, n); + out.Write("int2 indtevtrans%d = int2(fixpoint_uv%d * iindtevcrd%d.yy) >> 8;\n", n, texcoord, n); out.Write("if (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = indtevtrans%d >> " I_INDTEXMTX"[%d].w;\n", mtxidx, n, n, mtxidx); out.Write("else indtevtrans%d = indtevtrans%d << (-" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx); @@ -703,19 +700,19 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP // wrap S if (bpmem.tevind[n].sw == ITW_OFF) - out.Write("wrappedcoord.x = int(round(uv%d.x));\n", texcoord); + out.Write("wrappedcoord.x = fixpoint_uv%d.x;\n", texcoord); else if (bpmem.tevind[n].sw == ITW_0) out.Write("wrappedcoord.x = 0;\n"); else - out.Write("wrappedcoord.x = int(round(uv%d.x)) %% %s;\n", texcoord, tevIndWrapStart[bpmem.tevind[n].sw]); + out.Write("wrappedcoord.x = fixpoint_uv%d.x %% %s;\n", texcoord, tevIndWrapStart[bpmem.tevind[n].sw]); // wrap T if (bpmem.tevind[n].tw == ITW_OFF) - out.Write("wrappedcoord.y = int(round(uv%d.y));\n", texcoord); + out.Write("wrappedcoord.y = fixpoint_uv%d.y;\n", texcoord); else if (bpmem.tevind[n].tw == ITW_0) out.Write("wrappedcoord.y = 0;\n"); else - out.Write("wrappedcoord.y = int(round(uv%d.y)) %% %s;\n", texcoord, tevIndWrapStart[bpmem.tevind[n].tw]); + out.Write("wrappedcoord.y = fixpoint_uv%d.y %% %s;\n", texcoord, tevIndWrapStart[bpmem.tevind[n].tw]); if (bpmem.tevind[n].fb_addprev) // add previous tevcoord out.Write("tevcoord.xy += wrappedcoord + indtevtrans%d;\n", n); @@ -759,7 +756,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP { // calc tevcord if (bHasTexCoord) - out.Write("tevcoord.xy = int2(round(uv%d.xy));\n", texcoord); + out.Write("tevcoord.xy = fixpoint_uv%d;\n", texcoord); else out.Write("tevcoord.xy = int2(0, 0);\n"); } From fa7173d0994e4195b18f312a3f11461d8197439b Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Wed, 29 Jan 2014 15:52:24 +0100 Subject: [PATCH 32/45] PixelShaderGen: Store tex scale as an integer. --- Source/Core/VideoCommon/BPMemory.h | 3 --- Source/Core/VideoCommon/ConstantManager.h | 2 +- Source/Core/VideoCommon/PixelShaderGen.cpp | 10 +++++----- Source/Core/VideoCommon/PixelShaderManager.cpp | 8 ++++---- 4 files changed, 10 insertions(+), 13 deletions(-) diff --git a/Source/Core/VideoCommon/BPMemory.h b/Source/Core/VideoCommon/BPMemory.h index 2c5a0811f0..2bb35d92c0 100644 --- a/Source/Core/VideoCommon/BPMemory.h +++ b/Source/Core/VideoCommon/BPMemory.h @@ -411,9 +411,6 @@ union TEXSCALE u32 rid : 8; }; u32 hex; - - float getScaleS(int i){return 1.0f/(float)(1<<(i?ss1:ss0));} - float getScaleT(int i){return 1.0f/(float)(1<<(i?ts1:ts0));} }; union RAS1_IREF diff --git a/Source/Core/VideoCommon/ConstantManager.h b/Source/Core/VideoCommon/ConstantManager.h index da3d19962f..85630966c5 100644 --- a/Source/Core/VideoCommon/ConstantManager.h +++ b/Source/Core/VideoCommon/ConstantManager.h @@ -16,7 +16,7 @@ struct PixelShaderConstants int4 alpha; float4 texdims[8]; int4 zbias[2]; - float4 indtexscale[2]; + int4 indtexscale[2]; int4 indtexmtx[6]; int4 fogcolor; int4 fogi[1]; diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 22ffa87eb5..942c557aa2 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -259,7 +259,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T DeclareUniform(out, ApiType, C_ALPHA, "int4", I_ALPHA); DeclareUniform(out, ApiType, C_TEXDIMS, "float4", I_TEXDIMS"[8]"); DeclareUniform(out, ApiType, C_ZBIAS, "int4", I_ZBIAS"[2]"); - DeclareUniform(out, ApiType, C_INDTEXSCALE, "float4", I_INDTEXSCALE"[2]"); + DeclareUniform(out, ApiType, C_INDTEXSCALE, "int4", I_INDTEXSCALE"[2]"); DeclareUniform(out, ApiType, C_INDTEXMTX, "int4", I_INDTEXMTX"[6]"); DeclareUniform(out, ApiType, C_FOGCOLOR, "int4", I_FOGCOLOR); DeclareUniform(out, ApiType, C_FOGI, "int4", I_FOGI"[1]"); @@ -352,7 +352,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T " int3 comp16 = int3(1, 256, 0), comp24 = int3(1, 256, 256*256);\n" " int alphabump=0;\n" " int3 tevcoord=int3(0, 0, 0);\n" - " int2 wrappedcoord=int2(0,0); float2 tempcoord=float2(0.0,0.0);\n\n"); + " int2 wrappedcoord=int2(0,0), tempcoord=int2(0,0);\n\n"); if (ApiType == API_OPENGL) { @@ -441,13 +441,13 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T if (texcoord < numTexgen) { out.SetConstantsUsed(C_INDTEXSCALE+i/2,C_INDTEXSCALE+i/2); - out.Write("\ttempcoord = round(float2(fixpoint_uv%d.xy) * " I_INDTEXSCALE"[%d].%s) / 128.0;\n", texcoord, i / 2, (i & 1) ? "zw" : "xy"); // TODO: Make indtexscale an integer + out.Write("\ttempcoord = fixpoint_uv%d >> " I_INDTEXSCALE"[%d].%s;\n", texcoord, i / 2, (i & 1) ? "zw" : "xy"); } else - out.Write("\ttempcoord = float2(0.0, 0.0);\n"); + out.Write("\ttempcoord = int2(0, 0);\n"); out.Write("\tint3 iindtex%d = ", i); - SampleTexture(out, "tempcoord", "abg", texmap, ApiType); + SampleTexture(out, "(float2(tempcoord)/128.0)", "abg", texmap, ApiType); } } diff --git a/Source/Core/VideoCommon/PixelShaderManager.cpp b/Source/Core/VideoCommon/PixelShaderManager.cpp index 91d74edf4a..86111334ce 100644 --- a/Source/Core/VideoCommon/PixelShaderManager.cpp +++ b/Source/Core/VideoCommon/PixelShaderManager.cpp @@ -202,10 +202,10 @@ void PixelShaderManager::SetViewportChanged() void PixelShaderManager::SetIndTexScaleChanged(bool high) { - constants.indtexscale[high][0] = bpmem.texscale[high].getScaleS(0); - constants.indtexscale[high][1] = bpmem.texscale[high].getScaleT(0); - constants.indtexscale[high][2] = bpmem.texscale[high].getScaleS(1); - constants.indtexscale[high][3] = bpmem.texscale[high].getScaleT(1); + constants.indtexscale[high][0] = bpmem.texscale[high].ss0; + constants.indtexscale[high][1] = bpmem.texscale[high].ts0; + constants.indtexscale[high][2] = bpmem.texscale[high].ss1; + constants.indtexscale[high][3] = bpmem.texscale[high].ts1; dirty = true; } From 036a8c6951c3e6aba480f83744893773fb09441a Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 28 Feb 2014 20:24:00 +0100 Subject: [PATCH 33/45] PixelShaderGen: Clean up tev compare functionality. --- Source/Core/VideoCommon/PixelShaderGen.cpp | 73 ++++++++++------------ 1 file changed, 32 insertions(+), 41 deletions(-) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 942c557aa2..7e8e4859e7 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -573,33 +573,6 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T } - -//table with the color compare operations -static const char *TEVCMPColorOPTable[] = -{ - " %s + (((%s.r&255) > %s.r) ? (%s&255): int3(0,0,0))", // TEVCMP_R8_GT - " %s + (((%s.r&255) == %s.r) ? (%s&255): int3(0,0,0))", // TEVCMP_R8_EQ - " %s + ((idot((%s.rgb&255), comp16) > idot((%s.rgb&255), comp16)) ? (%s&255): int3(0,0,0))", // TEVCMP_GR16_GT - " %s + ((idot((%s.rgb&255), comp16) == idot((%s.rgb&255), comp16)) ? (%s&255): int3(0,0,0))", // TEVCMP_GR16_EQ - " %s + ((idot((%s.rgb&255), comp24) > idot((%s.rgb&255), comp24)) ? (%s&255): int3(0,0,0))", // TEVCMP_BGR24_GT - " %s + ((idot((%s.rgb&255), comp24) == idot((%s.rgb&255), comp24)) ? (%s&255): int3(0,0,0))", // TEVCMP_BGR24_EQ - " %s + int3(max(sign(int3((%s.rgb&255)) - int3((%s.rgb&255))), int3(0,0,0)) * (%s&255))", // TEVCMP_RGB8_GT - " %s + int3((int3(255,255,255) - max(sign(abs(int3((%s.rgb&255)) - int3((%s.rgb&255)))), int3(0,0,0))) * (%s&255))" // TEVCMP_RGB8_EQ -}; - -//table with the alpha compare operations -static const char *TEVCMPAlphaOPTable[] = -{ - " %s.a + (((%s.r&255) > (%s.r&255)) ? (%s.a&255) : 0)", // TEVCMP_R8_GT - " %s.a + (((%s.r&255) == (%s.r&255)) ? (%s.a&255) : 0)", // TEVCMP_R8_EQ - " %s.a + ((idot((%s.rgb&255), comp16) > idot((%s.rgb&255), comp16)) ? (%s.a&255) : 0)", // TEVCMP_GR16_GT - " %s.a + ((idot((%s.rgb&255), comp16) == idot((%s.rgb&255), comp16)) ? (%s.a&255) : 0)", // TEVCMP_GR16_EQ - " %s.a + ((idot((%s.rgb&255), comp24) > idot((%s.rgb&255), comp24)) ? (%s.a&255) : 0)", // TEVCMP_BGR24_GT - " %s.a + ((idot((%s.rgb&255), comp24) == idot((%s.rgb&255), comp24)) ? (%s.a&255) : 0)", // TEVCMP_BGR24_EQ - " %s.a + (((%s.a&255) > (%s.a&255)) ? (%s.a&255) : 0)", // TEVCMP_A8_GT - " %s.a + (((%s.a&255) == (%s.a&255)) ? (%s.a&255) : 0)" // TEVCMP_A8_EQ -}; - template static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE ApiType, const char swapModeTable[4][5]) { @@ -836,12 +809,22 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP } else { - int cmp = (cc.shift<<1)|cc.op; // comparemode stored here - out.Write(TEVCMPColorOPTable[cmp],//lookup the function from the op table - tevCInputTable[cc.d], - tevCInputTable[cc.a], - tevCInputTable[cc.b], - tevCInputTable[cc.c]); + static const char *function_table[] = + { + "(((%s.r&255) > %s.r) ? (%s&255): int3(0,0,0))", // TEVCMP_R8_GT + "(((%s.r&255) == %s.r) ? (%s&255): int3(0,0,0))", // TEVCMP_R8_EQ + "((idot((%s.rgb&255), comp16) > idot((%s.rgb&255), comp16)) ? (%s&255): int3(0,0,0))", // TEVCMP_GR16_GT + "((idot((%s.rgb&255), comp16) == idot((%s.rgb&255), comp16)) ? (%s&255): int3(0,0,0))", // TEVCMP_GR16_EQ + "((idot((%s.rgb&255), comp24) > idot((%s.rgb&255), comp24)) ? (%s&255): int3(0,0,0))", // TEVCMP_BGR24_GT + "((idot((%s.rgb&255), comp24) == idot((%s.rgb&255), comp24)) ? (%s&255): int3(0,0,0))", // TEVCMP_BGR24_EQ + "int3(max(sign(int3((%s.rgb&255)) - int3((%s.rgb&255))), int3(0,0,0)) * (%s&255))", // TEVCMP_RGB8_GT + "int3((int3(255,255,255) - max(sign(abs(int3((%s.rgb&255)) - int3((%s.rgb&255)))), int3(0,0,0))) * (%s&255))" // TEVCMP_RGB8_EQ + }; + + int mode = (cc.shift<<1)|cc.op; + out.Write(" %s + ", tevCInputTable[cc.d]); + out.Write(function_table[mode], tevCInputTable[cc.a], + tevCInputTable[cc.b], tevCInputTable[cc.c]); } if (cc.clamp) out.Write(", int3(0,0,0), int3(255,255,255))"); @@ -867,17 +850,25 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP if (ac.shift>0) out.Write(")%s", tevScaleTable[ac.shift]); - } else { - //compare alpha combiner goes here - int cmp = (ac.shift<<1)|ac.op; // comparemode stored here - out.Write(TEVCMPAlphaOPTable[cmp], - tevAInputTable[ac.d], - tevAInputTable[ac.a], - tevAInputTable[ac.b], - tevAInputTable[ac.c]); + static const char *function_table[] = + { + "(((%s.r&255) > (%s.r&255)) ? (%s.a&255) : 0)", // TEVCMP_R8_GT + "(((%s.r&255) == (%s.r&255)) ? (%s.a&255) : 0)", // TEVCMP_R8_EQ + "((idot((%s.rgb&255), comp16) > idot((%s.rgb&255), comp16)) ? (%s.a&255) : 0)", // TEVCMP_GR16_GT + "((idot((%s.rgb&255), comp16) == idot((%s.rgb&255), comp16)) ? (%s.a&255) : 0)", // TEVCMP_GR16_EQ + "((idot((%s.rgb&255), comp24) > idot((%s.rgb&255), comp24)) ? (%s.a&255) : 0)", // TEVCMP_BGR24_GT + "((idot((%s.rgb&255), comp24) == idot((%s.rgb&255), comp24)) ? (%s.a&255) : 0)", // TEVCMP_BGR24_EQ + "(((%s.a&255) > (%s.a&255)) ? (%s.a&255) : 0)", // TEVCMP_A8_GT + "(((%s.a&255) == (%s.a&255)) ? (%s.a&255) : 0)" // TEVCMP_A8_EQ + }; + + int mode = (ac.shift<<1)|ac.op; + out.Write(" %s.a + ", tevAInputTable[ac.d]); + out.Write(function_table[mode], tevAInputTable[ac.a], + tevAInputTable[ac.b], tevAInputTable[ac.c]); } if (ac.clamp) out.Write(", 0, 255)"); From 8b8bb04fd38caf0128d9c1a3cc6c49623bfdd0b1 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 28 Feb 2014 20:32:09 +0100 Subject: [PATCH 34/45] PixelShaderGen: Use bit shifts instead of multiplications as a small optimization. --- Source/Core/VideoCommon/PixelShaderGen.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 7e8e4859e7..3bbe31927c 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -101,10 +101,10 @@ static const char *tevKSelTableA[] = // KASEL static const char *tevScaleTable[] = // CS { - "*1", // SCALE_1 - "*2", // SCALE_2 - "*4", // SCALE_4 - "/ 2", // DIVIDE_2 + "", // SCALE_1 + " << 1", // SCALE_2 + " << 2", // SCALE_4 + " >> 1", // DIVIDE_2 }; static const char *tevBiasTable[] = // TB @@ -802,10 +802,10 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP out.Write("((%s&255) * (int3(255,255,255) - (%s&255)) + (%s&255) * (%s&255)) / 255", tevCInputTable[cc.a], tevCInputTable[cc.c], tevCInputTable[cc.b], tevCInputTable[cc.c]); - out.Write("%s", tevBiasTable[cc.bias]); + out.Write(" %s", tevBiasTable[cc.bias]); if (cc.shift > TEVSCALE_1) - out.Write(")%s", tevScaleTable[cc.shift]); + out.Write(") %s", tevScaleTable[cc.shift]); } else { @@ -846,10 +846,10 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP out.Write("((%s.a&255) * (255 - (%s.a&255)) + (%s.a&255) * (%s.a&255)) / 255", tevAInputTable[ac.a], tevAInputTable[ac.c], tevAInputTable[ac.b], tevAInputTable[ac.c]); - out.Write("%s",tevBiasTable[ac.bias]); + out.Write(" %s",tevBiasTable[ac.bias]); if (ac.shift>0) - out.Write(")%s", tevScaleTable[ac.shift]); + out.Write(") %s", tevScaleTable[ac.shift]); } else { From 9a96a1d5258ed0962f8859cfe308cf7b706e1992 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 28 Feb 2014 20:34:09 +0100 Subject: [PATCH 35/45] PixelShaderGen: Remove old, mostly useless comments. A few vague lines of comments cannot replace an afternoon reading of how TEV works. --- Source/Core/VideoCommon/PixelShaderGen.cpp | 23 +++++++--------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 3bbe31927c..814617a9ff 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -18,16 +18,7 @@ #include "VideoCommon/XFMemory.h" // for texture projection mode -// old tev->pixelshader notes -// -// color for this stage (alpha, color) is given by bpmem.tevorders[0].colorchan0 -// konstant for this stage (alpha, color) is given by bpmem.tevksel -// inputs are given by bpmem.combiners[0].colorC.a/b/c/d << could be current channel color -// according to GXTevColorArg table above -// output is given by .outreg -// tevtemp is set according to swapmodetables and - -static const char *tevKSelTableC[] = // KCSEL +static const char *tevKSelTableC[] = { "255,255,255", // 1 = 0x00 "223,223,223", // 7_8 = 0x01 @@ -63,7 +54,7 @@ static const char *tevKSelTableC[] = // KCSEL I_KCOLORS"[3].aaa", // K3_A = 0x1F }; -static const char *tevKSelTableA[] = // KASEL +static const char *tevKSelTableA[] = { "255", // 1 = 0x00 "223", // 7_8 = 0x01 @@ -99,7 +90,7 @@ static const char *tevKSelTableA[] = // KASEL I_KCOLORS"[3].a", // K3_A = 0x1F }; -static const char *tevScaleTable[] = // CS +static const char *tevScaleTable[] = { "", // SCALE_1 " << 1", // SCALE_2 @@ -107,7 +98,7 @@ static const char *tevScaleTable[] = // CS " >> 1", // DIVIDE_2 }; -static const char *tevBiasTable[] = // TB +static const char *tevBiasTable[] = { "", // ZERO, "+ 128", // ADDHALF, @@ -115,12 +106,12 @@ static const char *tevBiasTable[] = // TB "", }; -static const char *tevOpTable[] = { // TEV +static const char *tevOpTable[] = { "+", // TEVOP_ADD = 0, "-", // TEVOP_SUB = 1, }; -static const char *tevCInputTable[] = // CC +static const char *tevCInputTable[] = { "iprev.rgb", // CPREV, "iprev.aaa", // APREV, @@ -140,7 +131,7 @@ static const char *tevCInputTable[] = // CC "int3(0,0,0)", // ZERO }; -static const char *tevAInputTable[] = // CA +static const char *tevAInputTable[] = { "iprev", // APREV, "ic0", // A0, From 6fcbda67523da8046e7b27f09c39dcbf74127b3d Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 28 Feb 2014 20:43:43 +0100 Subject: [PATCH 36/45] PixelShaderGen: Cleanup and clarify bump alpha combiner inputs. --- Source/Core/VideoCommon/PixelShaderGen.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 814617a9ff..e818f4fef2 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -150,8 +150,8 @@ static const char *tevRasTable[] = "ERROR13", //2 "ERROR14", //3 "ERROR15", //4 - "(int4(1,1,1,1) * alphabump)", // use bump alpha - "(int4(1,1,1,1) * (alphabump | (alphabump >> 5)))", //normalized + "(int4(1, 1, 1, 1) * alphabump)", // bump alpha (0..248) + "(int4(1, 1, 1, 1) * (alphabump | (alphabump >> 5)))", // normalized bump alpha (0..255) "int4(0, 0, 0, 0)", // zero }; From e2e1c5c905afc9a815ba5e0e1fb550970afb11ff Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 28 Feb 2014 20:53:31 +0100 Subject: [PATCH 37/45] PixelShaderGen: Add a note about a random idea which should be checked with hardware tests. --- Source/Core/VideoCommon/PixelShaderGen.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index e818f4fef2..d6bca52c22 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -988,6 +988,8 @@ static inline void WriteFog(T& out, pixel_shader_uid_data& uid_data) // perspective // ze = A/(B - (Zs >> B_SHF) // TODO: Verify that we want to drop lower bits here! (currently taken over from software renderer) + // Maybe we want to use "ze = (A << B_SHF)/((B << B_SHF) - Zs)" instead? + // That's equivalent, but keeps the lower bits of Zs. out.Write("\tfloat ze = (" I_FOGF"[1].x * 16777215.0) / float(" I_FOGI"[0].y - (zCoord >> " I_FOGI"[0].w));\n"); } else From 8ebb65ebf29b95cde6b564fc174b1b83a717a3f8 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 1 Mar 2014 14:08:45 +0100 Subject: [PATCH 38/45] PixelShaderGen: Prettify generated shader source. --- Source/Core/VideoCommon/PixelShaderGen.cpp | 105 ++++++++++----------- 1 file changed, 52 insertions(+), 53 deletions(-) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index d6bca52c22..30c0303b2f 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -214,7 +214,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T "{\n" "\tint4 tmp = x * y;\n" "\treturn tmp.x + tmp.y + tmp.z + tmp.w;\n" - "}\n"); + "}\n\n"); if (ApiType == API_OPENGL) { @@ -349,22 +349,22 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T { // On Mali, global variables must be initialized as constants. // This is why we initialize these variables locally instead. - out.Write("float4 rawpos = gl_FragCoord;\n"); - out.Write("float4 colors_0 = colors_02;\n"); - out.Write("float4 colors_1 = colors_12;\n"); + out.Write("\tfloat4 rawpos = gl_FragCoord;\n"); + out.Write("\tfloat4 colors_0 = colors_02;\n"); + out.Write("\tfloat4 colors_1 = colors_12;\n"); // compute window position if needed because binding semantic WPOS is not widely supported // Let's set up attributes if (numTexgen) { for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) { - out.Write("float3 uv%d = uv%d_2;\n", i, i); + out.Write("\tfloat3 uv%d = uv%d_2;\n", i, i); } } - out.Write("float4 clipPos = clipPos_2;\n"); + out.Write("\tfloat4 clipPos = clipPos_2;\n"); if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) { - out.Write("float4 Normal = Normal_2;\n"); + out.Write("\tfloat4 Normal = Normal_2;\n"); } } @@ -389,7 +389,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T // HACK to handle cases where the tex gen is not enabled if (numTexgen == 0) { - out.Write("\tint2 fixpoint_uv0 = int2(0, 0);\n"); + out.Write("\tint2 fixpoint_uv0 = int2(0, 0);\n\n"); } else { @@ -400,11 +400,11 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T uid_data.texMtxInfo_n_projection |= xfregs.texMtxInfo[i].projection << i; if (xfregs.texMtxInfo[i].projection == XF_TEXPROJ_STQ) { - out.Write("\tif (uv%d.z != 0.0)", i); + out.Write("\tif (uv%d.z != 0.0)\n", i); out.Write("\t\tuv%d.xy = uv%d.xy / uv%d.z;\n", i, i, i); } - out.Write("int2 fixpoint_uv%d = int2(round(uv%d.xy * " I_TEXDIMS"[%d].zw * 128.0));\n", i, i, i); + out.Write("\tint2 fixpoint_uv%d = int2(round(uv%d.xy * " I_TEXDIMS"[%d].zw * 128.0));\n\n", i, i, i); // TODO: S24 overflows here? } } @@ -491,12 +491,12 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T // The performance impact of this additional calculation doesn't matter, but it prevents // the host GPU driver from performing any early depth test optimizations. if (g_ActiveConfig.bFastDepthCalc) - out.Write("int zCoord = int(round(rawpos.z * 16777215.0));\n"); + out.Write("\tint zCoord = int(round(rawpos.z * 16777215.0));\n"); else { out.SetConstantsUsed(C_ZBIAS+1, C_ZBIAS+1); // the screen space depth value = far z + (clip z / clip w) * z range - out.Write("int zCoord = " I_ZBIAS"[1].x + int(round((clipPos.z / clipPos.w) * float(" I_ZBIAS"[1].y)));\n"); + out.Write("\tint zCoord = " I_ZBIAS"[1].x + int(round((clipPos.z / clipPos.w) * float(" I_ZBIAS"[1].y)));\n"); } // depth texture can safely be ignored if the result won't be written to the depth buffer (early_ztest) and isn't used for fog either @@ -511,7 +511,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T // Note: z-textures are not written to depth buffer if early depth test is used if (per_pixel_depth && bpmem.UseEarlyDepthTest()) - out.Write("depth = float(zCoord) / 16777215.0;\n"); + out.Write("\tdepth = float(zCoord) / 16777215.0;\n"); // Note: depth texture output is only written to depth buffer if late depth test is used // theoretical final depth value is used for fog calculation, though, so we have to emulate ztextures anyway @@ -519,13 +519,13 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T { // use the texture input of the last texture stage (itextemp), hopefully this has been read and is in correct format... out.SetConstantsUsed(C_ZBIAS, C_ZBIAS+1); - out.Write("zCoord = idot(" I_ZBIAS"[0].xyzw, itextemp.xyzw) + " I_ZBIAS"[1].w %s;\n", + out.Write("\tzCoord = idot(" I_ZBIAS"[0].xyzw, itextemp.xyzw) + " I_ZBIAS"[1].w %s;\n", (bpmem.ztex2.op == ZTEXTURE_ADD) ? "+ zCoord" : ""); - out.Write("zCoord = zCoord & 16777215;\n"); + out.Write("\tzCoord = zCoord & 16777215;\n"); } if (per_pixel_depth && bpmem.UseLateDepthTest()) - out.Write("depth = float(zCoord) / 16777215.0;\n"); + out.Write("\tdepth = float(zCoord) / 16777215.0;\n"); if (dstAlphaMode == DSTALPHA_ALPHA_PASS) { @@ -574,7 +574,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP if (!bHasTexCoord) texcoord = 0; - out.Write("// TEV stage %d\n", n); + out.Write("\t// TEV stage %d\n", n); uid_data.stagehash[n].hasindstage = bHasIndStage; uid_data.stagehash[n].tevorders_texcoord = texcoord; @@ -582,7 +582,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP { uid_data.stagehash[n].tevind = bpmem.tevind[n].hex & 0x7FFFFF; - out.Write("// indirect op\n"); + out.Write("\t// indirect op\n"); // perform the indirect op on the incoming regular coordinates using iindtex%d as the offset coords if (bpmem.tevind[n].bs != ITBA_OFF) { @@ -600,17 +600,17 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP // format const char *tevIndFmtMask[] = {"255", "31", "15", "7" }; - out.Write("int3 iindtevcrd%d = iindtex%d & %s;\n", n, bpmem.tevind[n].bt, tevIndFmtMask[bpmem.tevind[n].fmt]); + out.Write("\tint3 iindtevcrd%d = iindtex%d & %s;\n", n, bpmem.tevind[n].bt, tevIndFmtMask[bpmem.tevind[n].fmt]); // bias - TODO: Check if this needs to be this complicated.. const char *tevIndBiasField[] = {"", "x", "y", "xy", "z", "xz", "yz", "xyz"}; // indexed by bias const char *tevIndBiasAdd[] = {"-128", "1", "1", "1" }; // indexed by fmt if (bpmem.tevind[n].bias == ITB_S || bpmem.tevind[n].bias == ITB_T || bpmem.tevind[n].bias == ITB_U) - out.Write("iindtevcrd%d.%s += int(%s);\n", n, tevIndBiasField[bpmem.tevind[n].bias], tevIndBiasAdd[bpmem.tevind[n].fmt]); + out.Write("\tiindtevcrd%d.%s += int(%s);\n", n, tevIndBiasField[bpmem.tevind[n].bias], tevIndBiasAdd[bpmem.tevind[n].fmt]); else if (bpmem.tevind[n].bias == ITB_ST || bpmem.tevind[n].bias == ITB_SU || bpmem.tevind[n].bias == ITB_TU) - out.Write("iindtevcrd%d.%s += int2(%s, %s);\n", n, tevIndBiasField[bpmem.tevind[n].bias], tevIndBiasAdd[bpmem.tevind[n].fmt], tevIndBiasAdd[bpmem.tevind[n].fmt]); + out.Write("\tiindtevcrd%d.%s += int2(%s, %s);\n", n, tevIndBiasField[bpmem.tevind[n].bias], tevIndBiasAdd[bpmem.tevind[n].fmt], tevIndBiasAdd[bpmem.tevind[n].fmt]); else if (bpmem.tevind[n].bias == ITB_STU) - out.Write("iindtevcrd%d.%s += int3(%s, %s, %s);\n", n, tevIndBiasField[bpmem.tevind[n].bias], tevIndBiasAdd[bpmem.tevind[n].fmt], tevIndBiasAdd[bpmem.tevind[n].fmt], tevIndBiasAdd[bpmem.tevind[n].fmt]); + out.Write("\tiindtevcrd%d.%s += int3(%s, %s, %s);\n", n, tevIndBiasField[bpmem.tevind[n].bias], tevIndBiasAdd[bpmem.tevind[n].fmt], tevIndBiasAdd[bpmem.tevind[n].fmt], tevIndBiasAdd[bpmem.tevind[n].fmt]); // multiply by offset matrix and scale - calculations are likely to overflow badly, // yet it works out since we only care about the lower 23 bits (+1 sign bit) of the result @@ -621,40 +621,40 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP int mtxidx = 2*(bpmem.tevind[n].mid-1); out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx); - out.Write("int2 indtevtrans%d = int2(idot(" I_INDTEXMTX"[%d].xyz, iindtevcrd%d), idot(" I_INDTEXMTX"[%d].xyz, iindtevcrd%d)) >> 3;\n", n, mtxidx, n, mtxidx+1, n); + out.Write("\tint2 indtevtrans%d = int2(idot(" I_INDTEXMTX"[%d].xyz, iindtevcrd%d), idot(" I_INDTEXMTX"[%d].xyz, iindtevcrd%d)) >> 3;\n", n, mtxidx, n, mtxidx+1, n); // TODO: should use a shader uid branch for this for better performance - out.Write("if (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = indtevtrans%d >> " I_INDTEXMTX"[%d].w;\n", mtxidx, n, n, mtxidx); - out.Write("else indtevtrans%d = indtevtrans%d << (-" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx); + out.Write("\tif (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = indtevtrans%d >> " I_INDTEXMTX"[%d].w;\n", mtxidx, n, n, mtxidx); + out.Write("\telse indtevtrans%d = indtevtrans%d << (-" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx); } else if (bpmem.tevind[n].mid <= 7 && bHasTexCoord) { // s matrix _assert_(bpmem.tevind[n].mid >= 5); int mtxidx = 2*(bpmem.tevind[n].mid-5); out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx); - out.Write("int2 indtevtrans%d = int2(fixpoint_uv%d * iindtevcrd%d.xx) >> 8;\n", n, texcoord, n); + out.Write("\tint2 indtevtrans%d = int2(fixpoint_uv%d * iindtevcrd%d.xx) >> 8;\n", n, texcoord, n); - out.Write("if (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = indtevtrans%d >> " I_INDTEXMTX"[%d].w;\n", mtxidx, n, n, mtxidx); - out.Write("else indtevtrans%d = indtevtrans%d << (-" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx); + out.Write("\tif (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = indtevtrans%d >> " I_INDTEXMTX"[%d].w;\n", mtxidx, n, n, mtxidx); + out.Write("\telse indtevtrans%d = indtevtrans%d << (-" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx); } else if (bpmem.tevind[n].mid <= 11 && bHasTexCoord) { // t matrix _assert_(bpmem.tevind[n].mid >= 9); int mtxidx = 2*(bpmem.tevind[n].mid-9); out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx); - out.Write("int2 indtevtrans%d = int2(fixpoint_uv%d * iindtevcrd%d.yy) >> 8;\n", n, texcoord, n); + out.Write("\tint2 indtevtrans%d = int2(fixpoint_uv%d * iindtevcrd%d.yy) >> 8;\n", n, texcoord, n); - out.Write("if (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = indtevtrans%d >> " I_INDTEXMTX"[%d].w;\n", mtxidx, n, n, mtxidx); - out.Write("else indtevtrans%d = indtevtrans%d << (-" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx); + out.Write("\tif (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = indtevtrans%d >> " I_INDTEXMTX"[%d].w;\n", mtxidx, n, n, mtxidx); + out.Write("\telse indtevtrans%d = indtevtrans%d << (-" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx); } else { - out.Write("int2 indtevtrans%d = int2(0, 0);\n", n); + out.Write("\tint2 indtevtrans%d = int2(0, 0);\n", n); } } else { - out.Write("int2 indtevtrans%d = int2(0, 0);\n", n); + out.Write("\tint2 indtevtrans%d = int2(0, 0);\n", n); } // --------- @@ -664,27 +664,27 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP // wrap S if (bpmem.tevind[n].sw == ITW_OFF) - out.Write("wrappedcoord.x = fixpoint_uv%d.x;\n", texcoord); + out.Write("\twrappedcoord.x = fixpoint_uv%d.x;\n", texcoord); else if (bpmem.tevind[n].sw == ITW_0) - out.Write("wrappedcoord.x = 0;\n"); + out.Write("\twrappedcoord.x = 0;\n"); else - out.Write("wrappedcoord.x = fixpoint_uv%d.x %% %s;\n", texcoord, tevIndWrapStart[bpmem.tevind[n].sw]); + out.Write("\twrappedcoord.x = fixpoint_uv%d.x %% %s;\n", texcoord, tevIndWrapStart[bpmem.tevind[n].sw]); // wrap T if (bpmem.tevind[n].tw == ITW_OFF) - out.Write("wrappedcoord.y = fixpoint_uv%d.y;\n", texcoord); + out.Write("\twrappedcoord.y = fixpoint_uv%d.y;\n", texcoord); else if (bpmem.tevind[n].tw == ITW_0) - out.Write("wrappedcoord.y = 0;\n"); + out.Write("\twrappedcoord.y = 0;\n"); else - out.Write("wrappedcoord.y = fixpoint_uv%d.y %% %s;\n", texcoord, tevIndWrapStart[bpmem.tevind[n].tw]); + out.Write("\twrappedcoord.y = fixpoint_uv%d.y %% %s;\n", texcoord, tevIndWrapStart[bpmem.tevind[n].tw]); if (bpmem.tevind[n].fb_addprev) // add previous tevcoord - out.Write("tevcoord.xy += wrappedcoord + indtevtrans%d;\n", n); + out.Write("\ttevcoord.xy += wrappedcoord + indtevtrans%d;\n", n); else - out.Write("tevcoord.xy = wrappedcoord + indtevtrans%d;\n", n); + out.Write("\ttevcoord.xy = wrappedcoord + indtevtrans%d;\n", n); // Emulate s24 overflows - out.Write("tevcoord.xy = (tevcoord.xy << 8) >> 8;\n"); + out.Write("\ttevcoord.xy = (tevcoord.xy << 8) >> 8;\n"); } TevStageCombiner::ColorCombiner &cc = bpmem.combiners[n].colorC; @@ -709,7 +709,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP uid_data.stagehash[n].tevorders_colorchan = bpmem.tevorders[n / 2].getColorChan(n & 1); const char *rasswap = swapModeTable[bpmem.combiners[n].alphaC.rswap]; - out.Write("irastemp = %s.%s;\n", tevRasTable[bpmem.tevorders[n / 2].getColorChan(n & 1)], rasswap); + out.Write("\tirastemp = %s.%s;\n", tevRasTable[bpmem.tevorders[n / 2].getColorChan(n & 1)], rasswap); } uid_data.stagehash[n].tevorders_enable = bpmem.tevorders[n / 2].getEnable(n & 1); @@ -720,9 +720,9 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP { // calc tevcord if (bHasTexCoord) - out.Write("tevcoord.xy = fixpoint_uv%d;\n", texcoord); + out.Write("\ttevcoord.xy = fixpoint_uv%d;\n", texcoord); else - out.Write("tevcoord.xy = int2(0, 0);\n"); + out.Write("\ttevcoord.xy = int2(0, 0);\n"); } const int i = bpmem.combiners[n].alphaC.tswap; @@ -737,12 +737,12 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP const char *texswap = swapModeTable[bpmem.combiners[n].alphaC.tswap]; uid_data.SetTevindrefTexmap(i, texmap); - out.Write("itextemp = "); + out.Write("\titextemp = "); SampleTexture(out, "(float2(tevcoord.xy)/128.0)", texswap, texmap, ApiType); } else { - out.Write("itextemp = int4(255, 255, 255, 255);\n"); + out.Write("\titextemp = int4(255, 255, 255, 255);\n"); } @@ -755,7 +755,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP int ka = bpmem.tevksel[n / 2].getKA(n & 1); uid_data.stagehash[n].tevksel_kc = kc; uid_data.stagehash[n].tevksel_ka = ka; - out.Write("ikonsttemp = int4(%s, %s);\n", tevKSelTableC[kc], tevKSelTableA[ka]); + out.Write("\tikonsttemp = int4(%s, %s);\n", tevKSelTableC[kc], tevKSelTableA[ka]); if (kc > 7) out.SetConstantsUsed(C_KCOLORS+((kc-0xc)%4),C_KCOLORS+((kc-0xc)%4)); @@ -778,8 +778,8 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP if (ac.dest >= GX_TEVREG0 && ac.dest <= GX_TEVREG2) out.SetConstantsUsed(C_COLORS+ac.dest, C_COLORS+ac.dest); - out.Write("// color combine\n"); - out.Write("%s = clamp(", tevCOutputTable[cc.dest]); + out.Write("\t// color combine\n"); + out.Write("\t%s = clamp(", tevCOutputTable[cc.dest]); // combine the color channel if (cc.bias != TevBias_COMPARE) // if not compare @@ -796,7 +796,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP out.Write(" %s", tevBiasTable[cc.bias]); if (cc.shift > TEVSCALE_1) - out.Write(") %s", tevScaleTable[cc.shift]); + out.Write(")%s", tevScaleTable[cc.shift]); } else { @@ -840,7 +840,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP out.Write(" %s",tevBiasTable[ac.bias]); if (ac.shift>0) - out.Write(") %s", tevScaleTable[ac.shift]); + out.Write(")%s", tevScaleTable[ac.shift]); } else { @@ -867,7 +867,6 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP out.Write(", -1024, 1023)"); out.Write(";\n\n"); - out.Write("// TEV done\n"); } template From 6c2971eaf6544295f6331130db53e220a36875a2 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 1 Mar 2014 18:25:21 +0100 Subject: [PATCH 39/45] PixelShaderGen: Write 16777215 in hex (0xFFFFFF) so that it's easier to understand. --- Source/Core/VideoCommon/PixelShaderGen.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 30c0303b2f..8d22bd5ef4 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -491,7 +491,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T // The performance impact of this additional calculation doesn't matter, but it prevents // the host GPU driver from performing any early depth test optimizations. if (g_ActiveConfig.bFastDepthCalc) - out.Write("\tint zCoord = int(round(rawpos.z * 16777215.0));\n"); + out.Write("\tint zCoord = int(round(rawpos.z * float(0xFFFFFF)));\n"); else { out.SetConstantsUsed(C_ZBIAS+1, C_ZBIAS+1); @@ -511,7 +511,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T // Note: z-textures are not written to depth buffer if early depth test is used if (per_pixel_depth && bpmem.UseEarlyDepthTest()) - out.Write("\tdepth = float(zCoord) / 16777215.0;\n"); + out.Write("\tdepth = float(zCoord) / float(0xFFFFFF);\n"); // Note: depth texture output is only written to depth buffer if late depth test is used // theoretical final depth value is used for fog calculation, though, so we have to emulate ztextures anyway @@ -521,11 +521,11 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T out.SetConstantsUsed(C_ZBIAS, C_ZBIAS+1); out.Write("\tzCoord = idot(" I_ZBIAS"[0].xyzw, itextemp.xyzw) + " I_ZBIAS"[1].w %s;\n", (bpmem.ztex2.op == ZTEXTURE_ADD) ? "+ zCoord" : ""); - out.Write("\tzCoord = zCoord & 16777215;\n"); + out.Write("\tzCoord = zCoord & 0xFFFFFF;\n"); } if (per_pixel_depth && bpmem.UseLateDepthTest()) - out.Write("\tdepth = float(zCoord) / 16777215.0;\n"); + out.Write("\tdepth = float(zCoord) / float(0xFFFFFF);\n"); if (dstAlphaMode == DSTALPHA_ALPHA_PASS) { @@ -587,7 +587,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP if (bpmem.tevind[n].bs != ITBA_OFF) { const char *tevIndAlphaSel[] = {"", "x", "y", "z"}; - const char *tevIndAlphaMask[] = {"248", "224", "240", "248"}; + const char *tevIndAlphaMask[] = {"248", "224", "240", "248"}; // 0b11111000, 0b11100000, 0b11110000, 0b11111000 out.Write("alphabump = iindtex%d.%s & %s;\n", bpmem.tevind[n].bt, tevIndAlphaSel[bpmem.tevind[n].bs], From 6e65e02c9eaaa967da769e8a16bc5eee81bb002d Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Mon, 3 Mar 2014 19:23:46 +0100 Subject: [PATCH 40/45] ConstantManager: Do not use single-element arrays. --- Source/Core/VideoCommon/ConstantManager.h | 2 +- Source/Core/VideoCommon/PixelShaderGen.cpp | 4 ++-- Source/Core/VideoCommon/PixelShaderManager.cpp | 8 ++++---- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Source/Core/VideoCommon/ConstantManager.h b/Source/Core/VideoCommon/ConstantManager.h index 85630966c5..b03d2ced6d 100644 --- a/Source/Core/VideoCommon/ConstantManager.h +++ b/Source/Core/VideoCommon/ConstantManager.h @@ -19,7 +19,7 @@ struct PixelShaderConstants int4 indtexscale[2]; int4 indtexmtx[6]; int4 fogcolor; - int4 fogi[1]; + int4 fogi; float4 fogf[2]; // For pixel lighting diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 8d22bd5ef4..fe9eecfdea 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -253,7 +253,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T DeclareUniform(out, ApiType, C_INDTEXSCALE, "int4", I_INDTEXSCALE"[2]"); DeclareUniform(out, ApiType, C_INDTEXMTX, "int4", I_INDTEXMTX"[6]"); DeclareUniform(out, ApiType, C_FOGCOLOR, "int4", I_FOGCOLOR); - DeclareUniform(out, ApiType, C_FOGI, "int4", I_FOGI"[1]"); + DeclareUniform(out, ApiType, C_FOGI, "int4", I_FOGI); DeclareUniform(out, ApiType, C_FOGF, "float4", I_FOGF"[2]"); // For pixel lighting - TODO: Should only be defined when per pixel lighting is enabled! @@ -989,7 +989,7 @@ static inline void WriteFog(T& out, pixel_shader_uid_data& uid_data) // TODO: Verify that we want to drop lower bits here! (currently taken over from software renderer) // Maybe we want to use "ze = (A << B_SHF)/((B << B_SHF) - Zs)" instead? // That's equivalent, but keeps the lower bits of Zs. - out.Write("\tfloat ze = (" I_FOGF"[1].x * 16777215.0) / float(" I_FOGI"[0].y - (zCoord >> " I_FOGI"[0].w));\n"); + out.Write("\tfloat ze = (" I_FOGF"[1].x * 16777215.0) / float(" I_FOGI".y - (zCoord >> " I_FOGI".w));\n"); } else { diff --git a/Source/Core/VideoCommon/PixelShaderManager.cpp b/Source/Core/VideoCommon/PixelShaderManager.cpp index 86111334ce..7cbb155be2 100644 --- a/Source/Core/VideoCommon/PixelShaderManager.cpp +++ b/Source/Core/VideoCommon/PixelShaderManager.cpp @@ -283,16 +283,16 @@ void PixelShaderManager::SetFogParamChanged() if (!g_ActiveConfig.bDisableFog) { constants.fogf[1][0] = bpmem.fog.a.GetA(); - constants.fogi[0][1] = bpmem.fog.b_magnitude; + constants.fogi[1] = bpmem.fog.b_magnitude; constants.fogf[1][2] = bpmem.fog.c_proj_fsel.GetC(); - constants.fogi[0][3] = bpmem.fog.b_shift; + constants.fogi[3] = bpmem.fog.b_shift; } else { constants.fogf[1][0] = 0.f; - constants.fogi[0][1] = 1; + constants.fogi[1] = 1; constants.fogf[1][2] = 0.f; - constants.fogi[0][3] = 1; + constants.fogi[3] = 1; } dirty = true; } From bdd629c598b39a2e1dcfed4649cfc7fcfd2bc34b Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Thu, 6 Mar 2014 19:21:03 +0100 Subject: [PATCH 41/45] PixelShaderGen: Use spaces for alignment where appropriate. --- Source/Core/VideoCommon/PixelShaderGen.cpp | 32 +++++++++++----------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index fe9eecfdea..3f4a4d140a 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -204,17 +204,17 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T uid_data.genMode_numtexgens = bpmem.genMode.numtexgens; // dot product for integer vectors - out.Write( "int idot(int3 x, int3 y)\n" - "{\n" - "\tint3 tmp = x * y;\n" - "\treturn tmp.x + tmp.y + tmp.z;\n" - "}\n"); + out.Write("int idot(int3 x, int3 y)\n" + "{\n" + "\tint3 tmp = x * y;\n" + "\treturn tmp.x + tmp.y + tmp.z;\n" + "}\n"); - out.Write( "int idot(int4 x, int4 y)\n" - "{\n" - "\tint4 tmp = x * y;\n" - "\treturn tmp.x + tmp.y + tmp.z + tmp.w;\n" - "}\n\n"); + out.Write("int idot(int4 x, int4 y)\n" + "{\n" + "\tint4 tmp = x * y;\n" + "\treturn tmp.x + tmp.y + tmp.z + tmp.w;\n" + "}\n\n"); if (ApiType == API_OPENGL) { @@ -338,12 +338,12 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T out.Write(" ) {\n"); } - out.Write(" int4 ic0 = " I_COLORS"[1], ic1 = " I_COLORS"[2], ic2 = " I_COLORS"[3], iprev = " I_COLORS"[0];\n" - " int4 irastemp = int4(0, 0, 0, 0), itextemp = int4(0, 0, 0, 0), ikonsttemp = int4(0, 0, 0, 0);\n" - " int3 comp16 = int3(1, 256, 0), comp24 = int3(1, 256, 256*256);\n" - " int alphabump=0;\n" - " int3 tevcoord=int3(0, 0, 0);\n" - " int2 wrappedcoord=int2(0,0), tempcoord=int2(0,0);\n\n"); + out.Write("\tint4 ic0 = " I_COLORS"[1], ic1 = " I_COLORS"[2], ic2 = " I_COLORS"[3], iprev = " I_COLORS"[0];\n" + "\tint4 irastemp = int4(0, 0, 0, 0), itextemp = int4(0, 0, 0, 0), ikonsttemp = int4(0, 0, 0, 0);\n" + "\tint3 comp16 = int3(1, 256, 0), comp24 = int3(1, 256, 256*256);\n" + "\tint alphabump=0;\n" + "\tint3 tevcoord=int3(0, 0, 0);\n" + "\tint2 wrappedcoord=int2(0,0), tempcoord=int2(0,0);\n\n"); if (ApiType == API_OPENGL) { From 0ce92e0162928771136ee0b7e603da3db81c1584 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Thu, 6 Mar 2014 19:27:11 +0100 Subject: [PATCH 42/45] PixelShaderGen: Remove the "i" prefix for integer variables. The prefix was just required in the development stage to reduce the risk of regressions. --- Source/Core/VideoCommon/PixelShaderGen.cpp | 86 +++++++++++----------- 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 3f4a4d140a..67cac69107 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -113,33 +113,33 @@ static const char *tevOpTable[] = { static const char *tevCInputTable[] = { - "iprev.rgb", // CPREV, - "iprev.aaa", // APREV, - "ic0.rgb", // C0, - "ic0.aaa", // A0, - "ic1.rgb", // C1, - "ic1.aaa", // A1, - "ic2.rgb", // C2, - "ic2.aaa", // A2, - "itextemp.rgb", // TEXC, - "itextemp.aaa", // TEXA, - "irastemp.rgb", // RASC, - "irastemp.aaa", // RASA, + "prev.rgb", // CPREV, + "prev.aaa", // APREV, + "c0.rgb", // C0, + "c0.aaa", // A0, + "c1.rgb", // C1, + "c1.aaa", // A1, + "c2.rgb", // C2, + "c2.aaa", // A2, + "textemp.rgb", // TEXC, + "textemp.aaa", // TEXA, + "rastemp.rgb", // RASC, + "rastemp.aaa", // RASA, "int3(255,255,255)", // ONE "int3(127,127,127)", // HALF - "ikonsttemp.rgb", // KONST + "konsttemp.rgb", // KONST "int3(0,0,0)", // ZERO }; static const char *tevAInputTable[] = { - "iprev", // APREV, - "ic0", // A0, - "ic1", // A1, - "ic2", // A2, - "itextemp", // TEXA, - "irastemp", // RASA, - "ikonsttemp", // KONST, (hw1 had quarter) + "prev", // APREV, + "c0", // A0, + "c1", // A1, + "c2", // A2, + "textemp", // TEXA, + "rastemp", // RASA, + "konsttemp", // KONST, (hw1 had quarter) "int4(0,0,0,0)", // ZERO }; @@ -155,8 +155,8 @@ static const char *tevRasTable[] = "int4(0, 0, 0, 0)", // zero }; -static const char *tevCOutputTable[] = { "iprev.rgb", "ic0.rgb", "ic1.rgb", "ic2.rgb" }; -static const char *tevAOutputTable[] = { "iprev.a", "ic0.a", "ic1.a", "ic2.a" }; +static const char *tevCOutputTable[] = { "prev.rgb", "c0.rgb", "c1.rgb", "c2.rgb" }; +static const char *tevAOutputTable[] = { "prev.a", "c0.a", "c1.a", "c2.a" }; static char text[16384]; @@ -338,8 +338,8 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T out.Write(" ) {\n"); } - out.Write("\tint4 ic0 = " I_COLORS"[1], ic1 = " I_COLORS"[2], ic2 = " I_COLORS"[3], iprev = " I_COLORS"[0];\n" - "\tint4 irastemp = int4(0, 0, 0, 0), itextemp = int4(0, 0, 0, 0), ikonsttemp = int4(0, 0, 0, 0);\n" + out.Write("\tint4 c0 = " I_COLORS"[1], c1 = " I_COLORS"[2], c2 = " I_COLORS"[3], prev = " I_COLORS"[0];\n" + "\tint4 rastemp = int4(0, 0, 0, 0), textemp = int4(0, 0, 0, 0), konsttemp = int4(0, 0, 0, 0);\n" "\tint3 comp16 = int3(1, 256, 0), comp24 = int3(1, 256, 256*256);\n" "\tint alphabump=0;\n" "\tint3 tevcoord=int3(0, 0, 0);\n" @@ -468,14 +468,14 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T // regardless of the used destination register if (bpmem.combiners[numStages - 1].colorC.dest != 0) { - out.Write("\tiprev.rgb = %s;\n", tevCOutputTable[bpmem.combiners[numStages - 1].colorC.dest]); + out.Write("\tprev.rgb = %s;\n", tevCOutputTable[bpmem.combiners[numStages - 1].colorC.dest]); } if (bpmem.combiners[numStages - 1].alphaC.dest != 0) { - out.Write("\tiprev.a = %s;\n", tevAOutputTable[bpmem.combiners[numStages - 1].alphaC.dest]); + out.Write("\tprev.a = %s;\n", tevAOutputTable[bpmem.combiners[numStages - 1].alphaC.dest]); } } - out.Write("\tiprev = iprev & 255;\n"); + out.Write("\tprev = prev & 255;\n"); AlphaTest::TEST_RESULT Pretest = bpmem.alpha_test.TestResult(); uid_data.Pretest = Pretest; @@ -517,9 +517,9 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T // theoretical final depth value is used for fog calculation, though, so we have to emulate ztextures anyway if (bpmem.ztex2.op != ZTEXTURE_DISABLE && !skip_ztexture) { - // use the texture input of the last texture stage (itextemp), hopefully this has been read and is in correct format... + // use the texture input of the last texture stage (textemp), hopefully this has been read and is in correct format... out.SetConstantsUsed(C_ZBIAS, C_ZBIAS+1); - out.Write("\tzCoord = idot(" I_ZBIAS"[0].xyzw, itextemp.xyzw) + " I_ZBIAS"[1].w %s;\n", + out.Write("\tzCoord = idot(" I_ZBIAS"[0].xyzw, textemp.xyzw) + " I_ZBIAS"[1].w %s;\n", (bpmem.ztex2.op == ZTEXTURE_ADD) ? "+ zCoord" : ""); out.Write("\tzCoord = zCoord & 0xFFFFFF;\n"); } @@ -530,12 +530,12 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T if (dstAlphaMode == DSTALPHA_ALPHA_PASS) { out.SetConstantsUsed(C_ALPHA, C_ALPHA); - out.Write("\tocol0 = float4(float3(iprev.rgb), float(" I_ALPHA".a)) / 255.0;\n"); + out.Write("\tocol0 = float4(float3(prev.rgb), float(" I_ALPHA".a)) / 255.0;\n"); } else { WriteFog(out, uid_data); - out.Write("\tocol0 = float4(iprev) / 255.0;\n"); + out.Write("\tocol0 = float4(prev) / 255.0;\n"); } // Use dual-source color blending to perform dst alpha in a single pass @@ -545,7 +545,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T // Colors will be blended against the alpha from ocol1 and // the alpha from ocol0 will be written to the framebuffer. - out.Write("\tocol1 = float4(iprev) / 255.0;\n"); + out.Write("\tocol1 = float4(prev) / 255.0;\n"); out.Write("\tocol0.a = float(" I_ALPHA".a) / 255.0;\n"); } @@ -709,7 +709,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP uid_data.stagehash[n].tevorders_colorchan = bpmem.tevorders[n / 2].getColorChan(n & 1); const char *rasswap = swapModeTable[bpmem.combiners[n].alphaC.rswap]; - out.Write("\tirastemp = %s.%s;\n", tevRasTable[bpmem.tevorders[n / 2].getColorChan(n & 1)], rasswap); + out.Write("\trastemp = %s.%s;\n", tevRasTable[bpmem.tevorders[n / 2].getColorChan(n & 1)], rasswap); } uid_data.stagehash[n].tevorders_enable = bpmem.tevorders[n / 2].getEnable(n & 1); @@ -737,12 +737,12 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP const char *texswap = swapModeTable[bpmem.combiners[n].alphaC.tswap]; uid_data.SetTevindrefTexmap(i, texmap); - out.Write("\titextemp = "); + out.Write("\ttextemp = "); SampleTexture(out, "(float2(tevcoord.xy)/128.0)", texswap, texmap, ApiType); } else { - out.Write("\titextemp = int4(255, 255, 255, 255);\n"); + out.Write("\ttextemp = int4(255, 255, 255, 255);\n"); } @@ -755,7 +755,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP int ka = bpmem.tevksel[n / 2].getKA(n & 1); uid_data.stagehash[n].tevksel_kc = kc; uid_data.stagehash[n].tevksel_ka = ka; - out.Write("\tikonsttemp = int4(%s, %s);\n", tevKSelTableC[kc], tevKSelTableA[ka]); + out.Write("\tkonsttemp = int4(%s, %s);\n", tevKSelTableC[kc], tevKSelTableA[ka]); if (kc > 7) out.SetConstantsUsed(C_KCOLORS+((kc-0xc)%4),C_KCOLORS+((kc-0xc)%4)); @@ -883,12 +883,12 @@ static inline void SampleTexture(T& out, const char *texcoords, const char *texs static const char *tevAlphaFuncsTable[] = { "(false)", // NEVER - "(iprev.a < %s)", // LESS - "(iprev.a == %s)", // EQUAL - "(iprev.a <= %s)", // LEQUAL - "(iprev.a > %s)", // GREATER - "(iprev.a != %s)", // NEQUAL - "(iprev.a >= %s)", // GEQUAL + "(prev.a < %s)", // LESS + "(prev.a == %s)", // EQUAL + "(prev.a <= %s)", // LEQUAL + "(prev.a > %s)", // GREATER + "(prev.a != %s)", // NEQUAL + "(prev.a >= %s)", // GEQUAL "(true)" // ALWAYS }; @@ -1024,7 +1024,7 @@ static inline void WriteFog(T& out, pixel_shader_uid_data& uid_data) } out.Write("\tint ifog = int(round(fog * 256.0));\n"); - out.Write("\tiprev.rgb = (iprev.rgb * (256 - ifog) + " I_FOGCOLOR".rgb * ifog) >> 8;\n"); + out.Write("\tprev.rgb = (prev.rgb * (256 - ifog) + " I_FOGCOLOR".rgb * ifog) >> 8;\n"); } void GetPixelShaderUid(PixelShaderUid& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components) From 2067f88e0f4a943056f446a819475e5be4c03b10 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Thu, 6 Mar 2014 19:28:29 +0100 Subject: [PATCH 43/45] PixelShaderGen: Don't make local lookup tables "static". --- Source/Core/VideoCommon/PixelShaderGen.cpp | 32 +++++++++++----------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 67cac69107..ee0ab3e6bb 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -113,21 +113,21 @@ static const char *tevOpTable[] = { static const char *tevCInputTable[] = { - "prev.rgb", // CPREV, - "prev.aaa", // APREV, - "c0.rgb", // C0, - "c0.aaa", // A0, - "c1.rgb", // C1, - "c1.aaa", // A1, - "c2.rgb", // C2, - "c2.aaa", // A2, - "textemp.rgb", // TEXC, - "textemp.aaa", // TEXA, - "rastemp.rgb", // RASC, - "rastemp.aaa", // RASA, + "prev.rgb", // CPREV, + "prev.aaa", // APREV, + "c0.rgb", // C0, + "c0.aaa", // A0, + "c1.rgb", // C1, + "c1.aaa", // A1, + "c2.rgb", // C2, + "c2.aaa", // A2, + "textemp.rgb", // TEXC, + "textemp.aaa", // TEXA, + "rastemp.rgb", // RASC, + "rastemp.aaa", // RASA, "int3(255,255,255)", // ONE "int3(127,127,127)", // HALF - "konsttemp.rgb", // KONST + "konsttemp.rgb", // KONST "int3(0,0,0)", // ZERO }; @@ -140,7 +140,7 @@ static const char *tevAInputTable[] = "textemp", // TEXA, "rastemp", // RASA, "konsttemp", // KONST, (hw1 had quarter) - "int4(0,0,0,0)", // ZERO + "int4(0,0,0,0)", // ZERO }; static const char *tevRasTable[] = @@ -800,7 +800,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP } else { - static const char *function_table[] = + const char *function_table[] = { "(((%s.r&255) > %s.r) ? (%s&255): int3(0,0,0))", // TEVCMP_R8_GT "(((%s.r&255) == %s.r) ? (%s&255): int3(0,0,0))", // TEVCMP_R8_EQ @@ -844,7 +844,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP } else { - static const char *function_table[] = + const char *function_table[] = { "(((%s.r&255) > (%s.r&255)) ? (%s.a&255) : 0)", // TEVCMP_R8_GT "(((%s.r&255) == (%s.r&255)) ? (%s.a&255) : 0)", // TEVCMP_R8_EQ From c1016205d3577d740ba8af84d049fad336887d53 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Thu, 6 Mar 2014 20:08:47 +0100 Subject: [PATCH 44/45] Pixel/LightingShaderGen: Fix code alignment issues. Most of these weren't even introduced by me, but hey - I'm nice and love wasting my time :p --- Source/Core/VideoCommon/LightingShaderGen.h | 28 ++++++++++----------- Source/Core/VideoCommon/PixelShaderGen.cpp | 28 ++++++++++----------- 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/Source/Core/VideoCommon/LightingShaderGen.h b/Source/Core/VideoCommon/LightingShaderGen.h index 87b4e3c14e..f07f67d941 100644 --- a/Source/Core/VideoCommon/LightingShaderGen.h +++ b/Source/Core/VideoCommon/LightingShaderGen.h @@ -59,8 +59,8 @@ static void GenerateLightShader(T& object, LightingUidData& uid_data, int index, case LIGHTDIF_CLAMP: object.Write("ldir = normalize(" LIGHT_POS".xyz - pos.xyz);\n", LIGHT_POS_PARAMS(lightsName, index)); object.Write("lacc.%s += int%s(round(%sdot(ldir, _norm0)) * float%s(" LIGHT_COL")));\n", - swizzle, swizzle_components, chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0," :"(", - swizzle_components, LIGHT_COL_PARAMS(lightsColName, index, swizzle)); + swizzle, swizzle_components, chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0," :"(", + swizzle_components, LIGHT_COL_PARAMS(lightsColName, index, swizzle)); break; default: _assert_(0); } @@ -71,13 +71,13 @@ static void GenerateLightShader(T& object, LightingUidData& uid_data, int index, { // spot object.Write("ldir = " LIGHT_POS".xyz - pos.xyz;\n", LIGHT_POS_PARAMS(lightsName, index)); object.Write("dist2 = dot(ldir, ldir);\n" - "dist = sqrt(dist2);\n" - "ldir = ldir / dist;\n" - "attn = max(0.0, dot(ldir, " LIGHT_DIR".xyz));\n", - LIGHT_DIR_PARAMS(lightsName, index)); + "dist = sqrt(dist2);\n" + "ldir = ldir / dist;\n" + "attn = max(0.0, dot(ldir, " LIGHT_DIR".xyz));\n", + LIGHT_DIR_PARAMS(lightsName, index)); // attn*attn may overflow object.Write("attn = max(0.0, " LIGHT_COSATT".x + " LIGHT_COSATT".y*attn + " LIGHT_COSATT".z*attn*attn) / dot(" LIGHT_DISTATT".xyz, float3(1.0,dist,dist2));\n", - LIGHT_COSATT_PARAMS(lightsName, index), LIGHT_COSATT_PARAMS(lightsName, index), LIGHT_COSATT_PARAMS(lightsName, index), LIGHT_DISTATT_PARAMS(lightsName, index)); + LIGHT_COSATT_PARAMS(lightsName, index), LIGHT_COSATT_PARAMS(lightsName, index), LIGHT_COSATT_PARAMS(lightsName, index), LIGHT_DISTATT_PARAMS(lightsName, index)); } else if (chan.attnfunc == 1) { // specular @@ -85,23 +85,23 @@ static void GenerateLightShader(T& object, LightingUidData& uid_data, int index, object.Write("attn = (dot(_norm0,ldir) >= 0.0) ? max(0.0, dot(_norm0, " LIGHT_DIR".xyz)) : 0.0;\n", LIGHT_DIR_PARAMS(lightsName, index)); // attn*attn may overflow object.Write("attn = max(0.0, " LIGHT_COSATT".x + " LIGHT_COSATT".y*attn + " LIGHT_COSATT".z*attn*attn) / (" LIGHT_DISTATT".x + " LIGHT_DISTATT".y*attn + " LIGHT_DISTATT".z*attn*attn);\n", - LIGHT_COSATT_PARAMS(lightsName, index), LIGHT_COSATT_PARAMS(lightsName, index), LIGHT_COSATT_PARAMS(lightsName, index), - LIGHT_DISTATT_PARAMS(lightsName, index), LIGHT_DISTATT_PARAMS(lightsName, index), LIGHT_DISTATT_PARAMS(lightsName, index)); + LIGHT_COSATT_PARAMS(lightsName, index), LIGHT_COSATT_PARAMS(lightsName, index), LIGHT_COSATT_PARAMS(lightsName, index), + LIGHT_DISTATT_PARAMS(lightsName, index), LIGHT_DISTATT_PARAMS(lightsName, index), LIGHT_DISTATT_PARAMS(lightsName, index)); } switch (chan.diffusefunc) { case LIGHTDIF_NONE: object.Write("lacc.%s += int%s(round(attn * float%s(" LIGHT_COL")));\n", - swizzle, swizzle_components, - swizzle_components, LIGHT_COL_PARAMS(lightsColName, index, swizzle)); + swizzle, swizzle_components, + swizzle_components, LIGHT_COL_PARAMS(lightsColName, index, swizzle)); break; case LIGHTDIF_SIGN: case LIGHTDIF_CLAMP: object.Write("lacc.%s += int%s(round(attn * %sdot(ldir, _norm0)) * float%s(" LIGHT_COL")));\n", - swizzle, swizzle_components, - chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0," :"(", - swizzle_components, LIGHT_COL_PARAMS(lightsColName, index, swizzle)); + swizzle, swizzle_components, + chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0," :"(", + swizzle_components, LIGHT_COL_PARAMS(lightsColName, index, swizzle)); break; default: _assert_(0); } diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index ee0ab3e6bb..b3d5a07a6a 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -205,16 +205,16 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T // dot product for integer vectors out.Write("int idot(int3 x, int3 y)\n" - "{\n" - "\tint3 tmp = x * y;\n" - "\treturn tmp.x + tmp.y + tmp.z;\n" - "}\n"); + "{\n" + "\tint3 tmp = x * y;\n" + "\treturn tmp.x + tmp.y + tmp.z;\n" + "}\n"); out.Write("int idot(int4 x, int4 y)\n" - "{\n" - "\tint4 tmp = x * y;\n" - "\treturn tmp.x + tmp.y + tmp.z + tmp.w;\n" - "}\n\n"); + "{\n" + "\tint4 tmp = x * y;\n" + "\treturn tmp.x + tmp.y + tmp.z + tmp.w;\n" + "}\n\n"); if (ApiType == API_OPENGL) { @@ -339,11 +339,11 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T } out.Write("\tint4 c0 = " I_COLORS"[1], c1 = " I_COLORS"[2], c2 = " I_COLORS"[3], prev = " I_COLORS"[0];\n" - "\tint4 rastemp = int4(0, 0, 0, 0), textemp = int4(0, 0, 0, 0), konsttemp = int4(0, 0, 0, 0);\n" - "\tint3 comp16 = int3(1, 256, 0), comp24 = int3(1, 256, 256*256);\n" - "\tint alphabump=0;\n" - "\tint3 tevcoord=int3(0, 0, 0);\n" - "\tint2 wrappedcoord=int2(0,0), tempcoord=int2(0,0);\n\n"); + "\tint4 rastemp = int4(0, 0, 0, 0), textemp = int4(0, 0, 0, 0), konsttemp = int4(0, 0, 0, 0);\n" + "\tint3 comp16 = int3(1, 256, 0), comp24 = int3(1, 256, 256*256);\n" + "\tint alphabump=0;\n" + "\tint3 tevcoord=int3(0, 0, 0);\n" + "\tint2 wrappedcoord=int2(0,0), tempcoord=int2(0,0);\n\n"); if (ApiType == API_OPENGL) { @@ -815,7 +815,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP int mode = (cc.shift<<1)|cc.op; out.Write(" %s + ", tevCInputTable[cc.d]); out.Write(function_table[mode], tevCInputTable[cc.a], - tevCInputTable[cc.b], tevCInputTable[cc.c]); + tevCInputTable[cc.b], tevCInputTable[cc.c]); } if (cc.clamp) out.Write(", int3(0,0,0), int3(255,255,255))"); From 0f81cbd6be009ac14f036a05db8f4e516ea626d0 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Thu, 6 Mar 2014 20:11:52 +0100 Subject: [PATCH 45/45] PixelShaderGen: More code alignment fixes. --- Source/Core/VideoCommon/PixelShaderGen.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index b3d5a07a6a..29399a253f 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -859,7 +859,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP int mode = (ac.shift<<1)|ac.op; out.Write(" %s.a + ", tevAInputTable[ac.d]); out.Write(function_table[mode], tevAInputTable[ac.a], - tevAInputTable[ac.b], tevAInputTable[ac.c]); + tevAInputTable[ac.b], tevAInputTable[ac.c]); } if (ac.clamp) out.Write(", 0, 255)");