From 4a6c72355fbf2d683302af11e87285283a16060a Mon Sep 17 00:00:00 2001 From: getdls Date: Sun, 19 Aug 2018 12:51:21 +0200 Subject: [PATCH] [SM3DW][CptToad] Backport new fixes AA, Bloom Backports new fixes found when updating 1.4 packs AA scaling for Mario, Toad Missing bloomshader --- .../37a4ec1a7dbc7391_00000000000003c9_ps.txt | 192 +++++++++++++++++ .../5c1761d13feccdff_0000000000000000_vs.txt | 87 ++++++++ .../be99d80628d31127_00000000000003c9_ps.txt | 201 ++++++++++++++++++ 3 files changed, 480 insertions(+) create mode 100644 Source/CaptainToad/37a4ec1a7dbc7391_00000000000003c9_ps.txt create mode 100644 Source/CaptainToad/5c1761d13feccdff_0000000000000000_vs.txt create mode 100644 Source/SuperMario3DWorld/be99d80628d31127_00000000000003c9_ps.txt diff --git a/Source/CaptainToad/37a4ec1a7dbc7391_00000000000003c9_ps.txt b/Source/CaptainToad/37a4ec1a7dbc7391_00000000000003c9_ps.txt new file mode 100644 index 00000000..463355e1 --- /dev/null +++ b/Source/CaptainToad/37a4ec1a7dbc7391_00000000000003c9_ps.txt @@ -0,0 +1,192 @@ + +#version 420 +#extension GL_ARB_texture_gather : enable +#extension GL_ARB_separate_shader_objects : enable +// shader 37a4ec1a7dbc7391 //AA fix +const float resXScale = ; +const float resYScale = ; +uniform ivec4 uf_remappedPS[4]; +layout(binding = 0) uniform sampler2D textureUnitPS0;// Tex0 addr 0xf5800800 res 1280x720x1 dim 1 tm: 4 format 0019 compSel: 0 1 2 3 mipView: 0x0 (num 0x1) sliceView: 0x0 (num 0x1) Sampler0 ClampX/Y/Z: 2 2 2 border: 1 +layout(binding = 1) uniform sampler2D textureUnitPS1;// Tex1 addr 0x159db800 res 1280x720x1 dim 1 tm: 4 format 0001 compSel: 0 4 4 5 mipView: 0x0 (num 0x1) sliceView: 0x0 (num 0x1) Sampler1 ClampX/Y/Z: 2 2 2 border: 1 +layout(location = 0) in vec4 passParameterSem2; +layout(location = 0) out vec4 passPixelColor0; +uniform vec2 uf_fragCoordScale; +int clampFI32(int v) +{ +if( v == 0x7FFFFFFF ) + return floatBitsToInt(1.0); +else if( v == 0xFFFFFFFF ) + return floatBitsToInt(0.0); +return floatBitsToInt(clamp(intBitsToFloat(v), 0.0, 1.0)); +} +float mul_nonIEEE(float a, float b){ if( a == 0.0 || b == 0.0 ) return 0.0; return a*b; } +void main() +{ +vec4 R0f = vec4(0.0); +vec4 R1f = vec4(0.0); +vec4 R2f = vec4(0.0); +vec4 R3f = vec4(0.0); +vec4 R4f = vec4(0.0); +vec4 R123f = vec4(0.0); +vec4 R126f = vec4(0.0); +vec4 R127f = vec4(0.0); +float backupReg0f, backupReg1f, backupReg2f, backupReg3f, backupReg4f; +vec4 PV0f = vec4(0.0), PV1f = vec4(0.0); +float PS0f = 0.0, PS1f = 0.0; +vec4 tempf = vec4(0.0); +float tempResultf; +int tempResulti; +ivec4 ARi = ivec4(0); +bool predResult = true; +bool activeMaskStack[2]; +bool activeMaskStackC[3]; +activeMaskStack[0] = false; +activeMaskStackC[0] = false; +activeMaskStackC[1] = false; +activeMaskStack[0] = true; +activeMaskStackC[0] = true; +activeMaskStackC[1] = true; +vec3 cubeMapSTM; +int cubeMapFaceId; +R0f = passParameterSem2; +if( activeMaskStackC[1] == true ) { +R2f.xyzw = (texture(textureUnitPS0, R0f.xy).xyzw); +R4f.xyzw = (textureGather(textureUnitPS1, R0f.xy).wzxy); +} +if( activeMaskStackC[1] == true ) { +activeMaskStack[1] = activeMaskStack[0]; +activeMaskStackC[2] = activeMaskStackC[1]; +// 0 +PV0f.x = max(R4f.z, R4f.x); +PV0f.y = min(R4f.w, R4f.y); +PV0f.z = mul_nonIEEE(R2f.x, intBitsToFloat(uf_remappedPS[0].x)); +PV0f.w = min(R4f.z, R4f.x); +PS0f = max(R4f.w, R4f.y); +// 1 +R123f.x = (mul_nonIEEE(R2f.y,intBitsToFloat(uf_remappedPS[0].y)) + PV0f.z); +PV1f.x = R123f.x; +PV1f.y = max(PV0f.x, PS0f); +R127f.z = R4f.z + -(R4f.y); +PV1f.z = R127f.z; +PV1f.w = min(PV0f.w, PV0f.y); +R126f.z = R4f.w + -(R4f.x); +PS1f = R126f.z; +// 2 +PV0f.x = max(PV1f.x, PV1f.y); +PV0f.y = mul_nonIEEE(PV1f.y, intBitsToFloat(uf_remappedPS[1].x)); +PV0f.z = min(PV1f.x, PV1f.w); +R3f.x = PV1f.z + PS1f; +PS0f = R3f.x; +// 3 +R1f.x = max(PV0f.y, intBitsToFloat(uf_remappedPS[1].y)); +R3f.y = R127f.z + -(R126f.z); +R0f.w = -(PV0f.z) + PV0f.x; +// 4 +predResult = (R0f.w > R1f.x); +activeMaskStack[1] = predResult; +activeMaskStackC[2] = predResult == true && activeMaskStackC[1] == true; +} +else { +activeMaskStack[1] = false; +activeMaskStackC[2] = false; +} +if( activeMaskStackC[2] == true ) { +// 0 +R1f.x = R4f.y + R4f.x; +PV0f.x = R1f.x; +R1f.y = intBitsToFloat(uf_remappedPS[2].z) * 0.25; +R0f.w = max(R3f.x, -(R3f.x)); +PV0f.w = R0f.w; +R4f.x = max(R3f.y, -(R3f.y)); +PS0f = R4f.x; +// 1 +R1f.x = min(PV0f.w, PS0f); +R4f.y = -(intBitsToFloat(uf_remappedPS[3].y)); +R0f.z = intBitsToFloat(uf_remappedPS[3].x); +R0f.w = R4f.z + PV0f.x; +PV1f.w = R0f.w; +R4f.x = -(intBitsToFloat(uf_remappedPS[3].x)); +PS1f = R4f.x; +// 2 +R1f.z = R4f.w + PV1f.w; +PV0f.z = R1f.z; +R0f.w = intBitsToFloat(uf_remappedPS[3].y); +// 3 +backupReg0f = R1f.y; +R1f.y = (mul_nonIEEE(backupReg0f,PV0f.z) + intBitsToFloat(uf_remappedPS[2].w)); +PV1f.y = R1f.y; +// 4 +backupReg0f = R1f.x; +R1f.x = max(PV1f.y, backupReg0f); +PV0f.x = R1f.x; +// 5 +R1f.w = 1.0 / PV0f.x; +PS1f = R1f.w; +// 6 +R1f.x = R3f.x * PS1f; +PV0f.x = R1f.x; +R1f.y = R3f.y * PS1f; +PV0f.y = R1f.y; +// 7 +R1f.x = max(PV0f.x, -(intBitsToFloat(uf_remappedPS[2].y))); +PV1f.x = R1f.x; +R1f.y = max(PV0f.y, -(intBitsToFloat(uf_remappedPS[2].y))); +PV1f.y = R1f.y; +// 8 +R1f.x = min(PV1f.x, intBitsToFloat(uf_remappedPS[2].y)); +PV0f.x = R1f.x; +R1f.y = min(PV1f.y, intBitsToFloat(uf_remappedPS[2].y)); +PV0f.y = R1f.y; +// 9 +backupReg0f = R0f.x; +backupReg1f = R0f.y; +backupReg2f = R0f.z; +backupReg0f = R0f.x; +backupReg3f = R0f.w; +backupReg1f = R0f.y; +R0f.x = (mul_nonIEEE(PV0f.x,R4f.x) / resXScale + backupReg0f); +R0f.y = (mul_nonIEEE(PV0f.y,R4f.y) / resYScale+ backupReg1f); +R0f.z = (mul_nonIEEE(PV0f.x,backupReg2f) / resXScale + backupReg0f); +R0f.w = (mul_nonIEEE(PV0f.y,backupReg3f) / resYScale+ backupReg1f); +} +if( activeMaskStackC[2] == true ) { +R1f.xyzw = (texture(textureUnitPS0, R0f.zw).xyzw); +R0f.xyzw = (texture(textureUnitPS0, R0f.xy).xyzw); +} +if( activeMaskStackC[2] == true ) { +// 0 +backupReg0f = R0f.y; +backupReg1f = R0f.x; +PV0f.x = R0f.w + R1f.w; +PV0f.x /= 2.0; +PV0f.y = R0f.z + R1f.z; +PV0f.y /= 2.0; +PV0f.z = backupReg0f + R1f.y; +PV0f.z /= 2.0; +PV0f.w = backupReg1f + R1f.x; +PV0f.w /= 2.0; +// 1 +PV1f.x = -(R2f.w) + PV0f.x; +PV1f.y = -(R2f.z) + PV0f.y; +PV1f.z = -(R2f.y) + PV0f.z; +PV1f.w = -(R2f.x) + PV0f.w; +// 2 +backupReg0f = R2f.x; +backupReg1f = R2f.y; +backupReg2f = R2f.z; +backupReg3f = R2f.w; +R2f.x = (PV1f.w * intBitsToFloat(0x3f4ccccd) + backupReg0f); +R2f.y = (PV1f.z * intBitsToFloat(0x3f4ccccd) + backupReg1f); +R2f.z = (PV1f.y * intBitsToFloat(0x3f4ccccd) + backupReg2f); +R2f.w = (PV1f.x * intBitsToFloat(0x3f4ccccd) + backupReg3f); +} +activeMaskStackC[1] = activeMaskStack[0] == true && activeMaskStackC[0] == true; +// export +passPixelColor0 = vec4(R2f.x, R2f.y, R2f.z, R2f.w); +} diff --git a/Source/CaptainToad/5c1761d13feccdff_0000000000000000_vs.txt b/Source/CaptainToad/5c1761d13feccdff_0000000000000000_vs.txt new file mode 100644 index 00000000..65bdedf4 --- /dev/null +++ b/Source/CaptainToad/5c1761d13feccdff_0000000000000000_vs.txt @@ -0,0 +1,87 @@ + + +#version 420 +#extension GL_ARB_texture_gather : enable +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_packing : enable +// shader 5c1761d13feccdff +//Bloom fix heat haze +const float resXScale = ; +const float resYScale = ; + +uniform ivec4 uf_remappedVS[1]; +uniform vec2 uf_windowSpaceToClipSpaceTransform; +layout(location = 0) in uvec4 attrDataSem0; +layout(location = 1) in uvec4 attrDataSem1; +out gl_PerVertex +{ + vec4 gl_Position; + float gl_PointSize; +}; +layout(location = 0) out vec4 passParameterSem3; +int clampFI32(int v) +{ +if( v == 0x7FFFFFFF ) + return floatBitsToInt(1.0); +else if( v == 0xFFFFFFFF ) + return floatBitsToInt(0.0); +return floatBitsToInt(clamp(intBitsToFloat(v), 0.0, 1.0)); +} +float mul_nonIEEE(float a, float b){ return min(a*b,min(abs(a)*3.40282347E+38F,abs(b)*3.40282347E+38F)); } +void main() +{ +vec4 R0f = vec4(0.0); +vec4 R1f = vec4(0.0); +vec4 R2f = vec4(0.0); +uvec4 attrDecoder; +float backupReg0f, backupReg1f, backupReg2f, backupReg3f, backupReg4f; +vec4 PV0f = vec4(0.0), PV1f = vec4(0.0); +float PS0f = 0.0, PS1f = 0.0; +vec4 tempf = vec4(0.0); +float tempResultf; +int tempResulti; +ivec4 ARi = ivec4(0); +bool predResult = true; +vec3 cubeMapSTM; +int cubeMapFaceId; +R0f = floatBitsToInt(ivec4(gl_VertexID, 0, 0, gl_InstanceID)); +attrDecoder.xyz = attrDataSem0.xyz; +attrDecoder.xyz = (attrDecoder.xyz>>24)|((attrDecoder.xyz>>8)&0xFF00)|((attrDecoder.xyz<<8)&0xFF0000)|((attrDecoder.xyz<<24)); +attrDecoder.w = 0; +R1f = vec4(intBitsToFloat(int(attrDecoder.x)), intBitsToFloat(int(attrDecoder.y)), intBitsToFloat(int(attrDecoder.z)), intBitsToFloat(floatBitsToInt(1.0))); +attrDecoder.xy = attrDataSem1.xy; +attrDecoder.xy = (attrDecoder.xy>>24)|((attrDecoder.xy>>8)&0xFF00)|((attrDecoder.xy<<8)&0xFF0000)|((attrDecoder.xy<<24)); +attrDecoder.z = 0; +attrDecoder.w = 0; +R2f = vec4(intBitsToFloat(int(attrDecoder.x)), intBitsToFloat(int(attrDecoder.y)), intBitsToFloat(floatBitsToInt(0.0)), intBitsToFloat(floatBitsToInt(1.0))); +// 0 +backupReg0f = R1f.x; +backupReg1f = R1f.y; +R1f.x = backupReg0f; +R1f.x *= 2.0; +R1f.y = backupReg1f; +R1f.y *= 2.0; +R1f.z = intBitsToFloat(0xbf800000); +R1f.w = 1.0; +PS0f = R2f.x + -(intBitsToFloat(uf_remappedVS[0].x)/resXScale); +// 1 +backupReg0f = R2f.y; +backupReg1f = R2f.x; +PV1f.x = R2f.y + -(intBitsToFloat(uf_remappedVS[0].y)/resXScale); +R2f.y = backupReg0f + intBitsToFloat(uf_remappedVS[0].y)/resYScale; +R2f.z = PS0f; +R2f.x = backupReg1f + intBitsToFloat(uf_remappedVS[0].x)/resXScale; +PS1f = R2f.x; +// 2 +R2f.w = PV1f.x; +// export +gl_Position = vec4(R1f.x, R1f.y, R1f.z, R1f.w); +// export +passParameterSem3 = vec4(R2f.x, R2f.y, R2f.z, R2f.w); +} diff --git a/Source/SuperMario3DWorld/be99d80628d31127_00000000000003c9_ps.txt b/Source/SuperMario3DWorld/be99d80628d31127_00000000000003c9_ps.txt new file mode 100644 index 00000000..ff95e0ea --- /dev/null +++ b/Source/SuperMario3DWorld/be99d80628d31127_00000000000003c9_ps.txt @@ -0,0 +1,201 @@ + + +#version 420 +#extension GL_ARB_texture_gather : enable +#extension GL_ARB_separate_shader_objects : enable +// shader be99d80628d31127 //AA PS +// Used for: Scaling AA to resolution + +const float resXScale = ; +const float resYScale = ; + +uniform ivec4 uf_remappedPS[4]; +layout(binding = 0) uniform sampler2D textureUnitPS0;// Tex0 addr 0xf5800800 res 1280x720x1 dim 1 tm: 4 format 0019 compSel: 0 1 2 3 mipView: 0x0 (num 0x1) sliceView: 0x0 (num 0x1) Sampler0 ClampX/Y/Z: 2 2 2 border: 1 +layout(binding = 1) uniform sampler2D textureUnitPS1;// Tex1 addr 0xf4341000 res 1280x720x1 dim 1 tm: 4 format 0001 compSel: 0 4 4 5 mipView: 0x0 (num 0x1) sliceView: 0x0 (num 0x1) Sampler1 ClampX/Y/Z: 2 2 2 border: 1 +layout(location = 0) in vec4 passParameterSem2; +layout(location = 0) out vec4 passPixelColor0; +uniform vec2 uf_fragCoordScale; +int clampFI32(int v) +{ +if( v == 0x7FFFFFFF ) + return floatBitsToInt(1.0); +else if( v == 0xFFFFFFFF ) + return floatBitsToInt(0.0); +return floatBitsToInt(clamp(intBitsToFloat(v), 0.0, 1.0)); +} +float mul_nonIEEE(float a, float b){return mix(0.0, a*b, (a != 0.0) && (b != 0.0));} +void main() +{ +vec4 R0f = vec4(0.0); +vec4 R1f = vec4(0.0); +vec4 R2f = vec4(0.0); +vec4 R3f = vec4(0.0); +vec4 R4f = vec4(0.0); +vec4 R5f = vec4(0.0); +vec4 R123f = vec4(0.0); +vec4 R126f = vec4(0.0); +vec4 R127f = vec4(0.0); +float backupReg0f, backupReg1f, backupReg2f, backupReg3f, backupReg4f; +vec4 PV0f = vec4(0.0), PV1f = vec4(0.0); +float PS0f = 0.0, PS1f = 0.0; +vec4 tempf = vec4(0.0); +float tempResultf; +int tempResulti; +ivec4 ARi = ivec4(0); +bool predResult = true; +bool activeMaskStack[2]; +bool activeMaskStackC[3]; +activeMaskStack[0] = false; +activeMaskStackC[0] = false; +activeMaskStackC[1] = false; +activeMaskStack[0] = true; +activeMaskStackC[0] = true; +activeMaskStackC[1] = true; +vec3 cubeMapSTM; +int cubeMapFaceId; +R0f = passParameterSem2; +if( activeMaskStackC[1] == true ) { +R4f.xyzw = (texture(textureUnitPS0, R0f.xy).xyzw); +R2f.xyzw = (textureGather(textureUnitPS1, R0f.xy).wzxy); +} +if( activeMaskStackC[1] == true ) { +activeMaskStack[1] = activeMaskStack[0]; +activeMaskStackC[2] = activeMaskStackC[1]; +// 0 +PV0f.x = min(R2f.z, R2f.x); +PV0f.y = max(R2f.z, R2f.x); +PV0f.z = mul_nonIEEE(R4f.x, intBitsToFloat(uf_remappedPS[0].x)); +PV0f.w = min(R2f.w, R2f.y); +PS0f = max(R2f.w, R2f.y); +// 1 +PV1f.x = min(PV0f.x, PV0f.w); +R123f.y = (mul_nonIEEE(R4f.y,intBitsToFloat(uf_remappedPS[0].y)) + PV0f.z); +PV1f.y = R123f.y; +R127f.z = R2f.z + -(R2f.y); +PV1f.z = R127f.z; +PV1f.w = max(PV0f.y, PS0f); +R126f.z = R2f.w + -(R2f.x); +PS1f = R126f.z; +// 2 +PV0f.x = mul_nonIEEE(PV1f.w, intBitsToFloat(uf_remappedPS[1].x)); +PV0f.y = max(PV1f.y, PV1f.w); +PV0f.z = min(PV1f.y, PV1f.x); +R3f.x = PV1f.z + PS1f; +PS0f = R3f.x; +// 3 +R1f.x = max(PV0f.x, intBitsToFloat(uf_remappedPS[1].y)); +R3f.y = -(PV0f.z) + PV0f.y; +R1f.y = R127f.z + -(R126f.z); +PS1f = R1f.y; +// 4 +predResult = (R3f.y > R1f.x); +activeMaskStack[1] = predResult; +activeMaskStackC[2] = predResult == true && activeMaskStackC[1] == true; +} +else { +activeMaskStack[1] = false; +activeMaskStackC[2] = false; +} +if( activeMaskStackC[2] == true ) { +// 0 +backupReg0f = R2f.y; +R1f.x = max(R3f.x, -(R3f.x)); +PV0f.x = R1f.x; +R2f.y = backupReg0f + R2f.x; +PV0f.y = R2f.y; +R0f.z = intBitsToFloat(uf_remappedPS[2].z) * 0.25; +R0f.w = max(R1f.y, -(R1f.y)); +PV0f.w = R0f.w; +R2f.x = -(intBitsToFloat(uf_remappedPS[3].x)); +PS0f = R2f.x; +// 1 +R3f.y = R2f.z + PV0f.y; +PV1f.y = R3f.y; +R2f.y = min(PV0f.x, PV0f.w); +PS1f = R2f.y; +// 2 +R3f.y = R2f.w + PV1f.y; +PV0f.y = R3f.y; +R1f.z = intBitsToFloat(uf_remappedPS[3].x); +R0f.w = intBitsToFloat(uf_remappedPS[3].y); +R5f.y = -(intBitsToFloat(uf_remappedPS[3].y)); +PS0f = R5f.y; +// 3 +backupReg0f = R0f.z; +R0f.z = (mul_nonIEEE(backupReg0f,PV0f.y) + intBitsToFloat(uf_remappedPS[2].w)); +PV1f.z = R0f.z; +// 4 +backupReg0f = R2f.y; +R2f.y = max(PV1f.z, backupReg0f); +PV0f.y = R2f.y; +// 5 +R2f.y = 1.0 / PV0f.y; +PS1f = R2f.y; +// 6 +backupReg0f = R1f.y; +R1f.x = mul_nonIEEE(R3f.x, PS1f); +PV0f.x = R1f.x; +R1f.y = mul_nonIEEE(backupReg0f, PS1f); +PV0f.y = R1f.y; +// 7 +R1f.x = max(PV0f.x, -(intBitsToFloat(uf_remappedPS[2].y))); +PV1f.x = R1f.x; +R1f.y = max(PV0f.y, -(intBitsToFloat(uf_remappedPS[2].y))); +PV1f.y = R1f.y; +// 8 +R1f.x = min(PV1f.x, intBitsToFloat(uf_remappedPS[2].y)); +PV0f.x = R1f.x; +R1f.y = min(PV1f.y, intBitsToFloat(uf_remappedPS[2].y)); +PV0f.y = R1f.y; +// 9 +backupReg0f = R0f.x; +backupReg1f = R0f.y; +backupReg0f = R0f.x; +backupReg2f = R0f.w; +backupReg1f = R0f.y; +R0f.x = (mul_nonIEEE(PV0f.x,R2f.x) /resXScale + backupReg0f); +R0f.y = (mul_nonIEEE(PV0f.y,R5f.y) /resYScale+ backupReg1f); +R0f.z = (mul_nonIEEE(PV0f.x,R1f.z) /resXScale + backupReg0f); +R0f.w = (mul_nonIEEE(PV0f.y,backupReg2f)/ resXScale + backupReg1f); +} +if( activeMaskStackC[2] == true ) { +R1f.xyzw = (texture(textureUnitPS0, R0f.zw).xyzw); +R0f.xyzw = (texture(textureUnitPS0, R0f.xy).xyzw); +} +if( activeMaskStackC[2] == true ) { +// 0 +backupReg0f = R0f.y; +backupReg1f = R0f.x; +PV0f.x = R0f.w + R1f.w; +PV0f.x /= 2.0; +PV0f.y = R0f.z + R1f.z; +PV0f.y /= 2.0; +PV0f.z = backupReg0f + R1f.y; +PV0f.z /= 2.0; +PV0f.w = backupReg1f + R1f.x; +PV0f.w /= 2.0; +// 1 +PV1f.x = -(R4f.w) + PV0f.x; +PV1f.y = -(R4f.z) + PV0f.y; +PV1f.z = -(R4f.y) + PV0f.z; +PV1f.w = -(R4f.x) + PV0f.w; +// 2 +backupReg0f = R4f.x; +backupReg1f = R4f.y; +backupReg2f = R4f.z; +backupReg3f = R4f.w; +R4f.x = (PV1f.w * intBitsToFloat(0x3f4ccccd) + backupReg0f); +R4f.y = (PV1f.z * intBitsToFloat(0x3f4ccccd) + backupReg1f); +R4f.z = (PV1f.y * intBitsToFloat(0x3f4ccccd) + backupReg2f); +R4f.w = (PV1f.x * intBitsToFloat(0x3f4ccccd) + backupReg3f); +} +activeMaskStackC[1] = activeMaskStack[0] == true && activeMaskStackC[0] == true; +// export +passPixelColor0 = vec4(R4f.x, R4f.y, R4f.z, R4f.w); +}