Fix explosion smoke by using the min shaders (#98)

A quick found by Xalphenos and Rajkosto
Partially fix thin paper look explosion smoke on nvdia when using AccurateShaderMul = true, by using the ones from AccurateShaderMul = min
Shaders dumped from Cemu 1.10.0f and BotW 1.3.1
This commit is contained in:
NAVras-Z 2017-10-08 12:22:02 +08:00 committed by Michael
parent d6b79ff97a
commit 9c8569d5e2
3 changed files with 3138 additions and 0 deletions

View File

@ -0,0 +1,382 @@
#version 420
#extension GL_ARB_texture_gather : enable
// shader 71b06814302a7aad
uniform ivec4 uf_remappedPS[6];
uniform float uf_alphaTestRef;
layout(binding = 0) uniform sampler2D textureUnitPS0;// Tex0 addr 0x21048800 res 256x128x1 dim 1 tm: 4 format 0007 compSel: 0 1 1 1 mipView: 0x0 (num 0x9) sliceView: 0x0 (num 0x1) Sampler0 ClampX/Y/Z: 0 2 0 border: 0
layout(binding = 1) uniform sampler2D textureUnitPS1;// Tex1 addr 0x2105e800 res 256x256x1 dim 1 tm: 4 format 001a compSel: 0 1 2 3 mipView: 0x0 (num 0x9) sliceView: 0x0 (num 0x1) Sampler1 ClampX/Y/Z: 1 2 0 border: 0
layout(binding = 2) uniform sampler2D textureUnitPS2;// Tex2 addr 0x21156000 res 128x128x1 dim 1 tm: 4 format 0034 compSel: 0 0 0 0 mipView: 0x0 (num 0x8) sliceView: 0x0 (num 0x1) Sampler2 ClampX/Y/Z: 0 0 0 border: 0
layout(binding = 4) uniform sampler2D textureUnitPS4;// Tex4 addr 0xf4e91800 res 1280x720x1 dim 1 tm: 4 format 0806 compSel: 0 4 4 5 mipView: 0x0 (num 0x1) sliceView: 0x0 (num 0x1) Sampler4 ClampX/Y/Z: 2 2 0 border: 0
layout(binding = 10) uniform samplerCubeArray textureUnitPS10;// Tex10 addr 0x3d568800 res 16x16x1 dim 3 tm: 4 format 0816 compSel: 0 1 2 5 mipView: 0x0 (num 0x2) sliceView: 0x0 (num 0x6) Sampler10 ClampX/Y/Z: 2 2 2 border: 1
layout(location = 0) in vec4 passParameterSem0;
layout(location = 1) in vec4 passParameterSem1;
layout(location = 2) in vec4 passParameterSem3;
layout(location = 3) in vec4 passParameterSem4;
layout(location = 4) in vec4 passParameterSem8;
layout(location = 5) in vec4 passParameterSem9;
layout(location = 6) in vec4 passParameterSem11;
layout(location = 7) in vec4 passParameterSem14;
layout(location = 8) in vec4 passParameterSem15;
layout(location = 9) in vec4 passParameterSem16;
layout(location = 0) out vec4 passPixelColor0;
uniform vec2 uf_fragCoordScale;
void redcCUBE(vec4 src0, vec4 src1, out vec3 stm, out int faceId)
{
// stm -> x .. s, y .. t, z .. MajorAxis*2.0
vec3 inputCoord = normalize(vec3(src1.y, src1.x, src0.x));
float rx = inputCoord.x;
float ry = inputCoord.y;
float rz = inputCoord.z;
if( abs(rx) > abs(ry) && abs(rx) > abs(rz) )
{
stm.z = rx*2.0;
stm.xy = vec2(ry,rz);
if( rx >= 0.0 )
{
faceId = 0;
}
else
{
faceId = 1;
}
}
else if( abs(ry) > abs(rx) && abs(ry) > abs(rz) )
{
stm.z = ry*2.0;
stm.xy = vec2(rx,rz);
if( ry >= 0.0 )
{
faceId = 2;
}
else
{
faceId = 3;
}
}
else //if( abs(rz) > abs(ry) && abs(rz) > abs(rx) )
{
stm.z = rz*2.0;
stm.xy = vec2(rx,ry);
if( rz >= 0.0 )
{
faceId = 4;
}
else
{
faceId = 5;
}
}
}
vec3 redcCUBEReverse(vec2 st, int faceId)
{
st.yx = st.xy;
vec3 v;
float majorAxis = 1.0;
if( faceId == 0 )
{
v.yz = (st-vec2(1.5))*(majorAxis*2.0);
v.x = 1.0;
}
else if( faceId == 1 )
{
v.yz = (st-vec2(1.5))*(majorAxis*2.0);
v.x = -1.0;
}
else if( faceId == 2 )
{
v.xz = (st-vec2(1.5))*(majorAxis*2.0);
v.y = 1.0;
}
else if( faceId == 3 )
{
v.xz = (st-vec2(1.5))*(majorAxis*2.0);
v.y = -1.0;
}
else if( faceId == 4 )
{
v.xy = (st-vec2(1.5))*(majorAxis*2.0);
v.z = 1.0;
}
else
{
v.xy = (st-vec2(1.5))*(majorAxis*2.0);
v.z = -1.0;
}
return v;
}
int clampFI32(int v)
{
if( v == 0x7FFFFFFF )
return floatBitsToInt(1.0);
else if( v == 0xFFFFFFFF )
return floatBitsToInt(0.0);
return floatBitsToInt(clamp(intBitsToFloat(v), 0.0, 1.0));
}
float mul_nonIEEE(float a, float b){ return min(a*b,min(abs(a)*3.40282347E+38F,abs(b)*3.40282347E+38F)); }
void main()
{
vec4 R0f = vec4(0.0);
vec4 R1f = vec4(0.0);
vec4 R2f = vec4(0.0);
vec4 R3f = vec4(0.0);
vec4 R4f = vec4(0.0);
vec4 R5f = vec4(0.0);
vec4 R6f = vec4(0.0);
vec4 R7f = vec4(0.0);
vec4 R8f = vec4(0.0);
vec4 R9f = vec4(0.0);
vec4 R10f = vec4(0.0);
vec4 R11f = vec4(0.0);
vec4 R123f = vec4(0.0);
vec4 R125f = vec4(0.0);
vec4 R126f = vec4(0.0);
vec4 R127f = vec4(0.0);
float backupReg0f, backupReg1f, backupReg2f, backupReg3f, backupReg4f;
vec4 PV0f = vec4(0.0), PV1f = vec4(0.0);
float PS0f = 0.0, PS1f = 0.0;
vec4 tempf = vec4(0.0);
float tempResultf;
int tempResulti;
ivec4 ARi = ivec4(0);
bool predResult = true;
vec3 cubeMapSTM;
int cubeMapFaceId;
float cubeMapArrayIndex10 = 0.0;
R0f = passParameterSem0;
R1f = passParameterSem1;
R2f = passParameterSem3;
R3f = passParameterSem4;
R4f = passParameterSem8;
R5f = passParameterSem9;
R6f = passParameterSem11;
R7f = passParameterSem14;
R8f = passParameterSem15;
R9f = passParameterSem16;
R10f.xw = (texture(textureUnitPS0, R4f.xy).xw);
// 0
PV0f.x = R3f.y * 1.5;
R123f.y = (R10f.w * 2.0 + -(1.0));
PV0f.y = R123f.y;
R123f.z = (R10f.x * 2.0 + -(1.0));
PV0f.z = R123f.z;
PV0f.w = R3f.y;
PV0f.w /= 2.0;
R127f.w = 1.0 / R2f.w;
PS0f = R127f.w;
// 1
PV1f.x = mul_nonIEEE(PV0f.z, PV0f.w);
PV1f.y = mul_nonIEEE(PV0f.y, PV0f.w);
PV1f.z = mul_nonIEEE(PV0f.y, PV0f.x);
PV1f.w = mul_nonIEEE(PV0f.z, PV0f.x);
R10f.x = mul_nonIEEE(R2f.x, PS0f);
PS1f = R10f.x;
// 2
backupReg0f = R4f.z;
backupReg1f = R4f.w;
R4f.xyz = vec3(R5f.x,R5f.y,backupReg0f) + vec3(PV1f.w,PV1f.z,PV1f.x);
R4f.w = backupReg1f + PV1f.y;
R10f.y = mul_nonIEEE(R2f.y, R127f.w);
PS0f = R10f.y;
// 3
backupReg0f = R6f.z;
R5f.x = (R0f.w * 2.0 + -(1.0));
PV1f.y = R2f.z * R127f.w;
R6f.z = -(backupReg0f);
PV1f.z = R6f.z;
R2f.w = intBitsToFloat(uf_remappedPS[0].x);
R5f.z = R1f.x + R1f.x;
PS1f = R5f.z;
// 4
redcCUBE(vec4(PV1f.z,PV1f.z,R6f.x,R6f.y),vec4(R6f.y,R6f.x,PV1f.z,PV1f.z),cubeMapSTM,cubeMapFaceId);
R127f.x = cubeMapSTM.x;
R127f.y = cubeMapSTM.y;
R127f.z = cubeMapSTM.z;
R127f.w = intBitsToFloat(cubeMapFaceId);
PV0f.x = R127f.x;
PV0f.y = R127f.y;
PV0f.z = R127f.z;
PV0f.w = R127f.w;
R126f.z = (mul_nonIEEE(intBitsToFloat(uf_remappedPS[1].w),PV1f.y) + -(intBitsToFloat(uf_remappedPS[1].y)));
PS0f = R126f.z;
// 5
R6f.x = -(R1f.x) + R5f.z;
R6f.y = R1f.y + R1f.y;
PV1f.y = R6f.y;
R2f.z = PV0f.w;
R6f.w = R1f.z + R1f.z;
PV1f.w = R6f.w;
PS1f = 1.0 / abs(PV0f.z);
// 6
R123f.x = (mul_nonIEEE(R127f.y,PS1f) + 1.5);
PV0f.x = R123f.x;
R123f.y = (mul_nonIEEE(R127f.x,PS1f) + 1.5);
PV0f.y = R123f.y;
R3f.z = -(R1f.y) + PV1f.y;
R5f.w = -(R1f.z) + PV1f.w;
PS0f = 1.0 / R126f.z;
// 7
R2f.x = PV0f.x;
R2f.y = PV0f.y;
R6f.z = -(intBitsToFloat(uf_remappedPS[1].z)) * PS0f;
R10f.w = -(R0f.w) + 1.0;
R3f.w = -(R9f.w) + 1.0;
PS1f = R3f.w;
R4f.x = (texture(textureUnitPS2, R4f.xy).w);
R10f.x = (texture(textureUnitPS4, R10f.xy).x);
R11f.xyzw = (texture(textureUnitPS1, R4f.zw).xyzw);
R2f.xyz = (textureLod(textureUnitPS10, vec4(redcCUBEReverse(R2f.xy,floatBitsToInt(R2f.z)),cubeMapArrayIndex10),R2f.w).xyz);
// 0
PV0f.x = R11f.z + R4f.x;
PV0f.x /= 2.0;
PV0f.y = -(R11f.z) + 1.0;
R123f.z = (mul_nonIEEE(R10f.x,intBitsToFloat(uf_remappedPS[1].w)) + intBitsToFloat(uf_remappedPS[1].x));
PV0f.z = R123f.z;
R127f.w = R4f.x + R5f.x;
PV0f.w = R127f.w;
R127f.y = mul_nonIEEE(R10f.w, R10f.w);
R127f.y *= 4.0;
PS0f = R127f.y;
// 1
R123f.x = (-(PV0f.y) * intBitsToFloat(0x40c00000) + PV0f.z);
PV1f.x = R123f.x;
PV1f.y = mul_nonIEEE(R2f.x, R3f.w);
PV1f.z = max(PV0f.w, intBitsToFloat(0x3e4ccccd));
R125f.w = PV0f.x + intBitsToFloat(0xbecccccd);
R125f.w *= 4.0;
PS1f = mul_nonIEEE(R2f.y, R3f.w);
// 2
PV0f.x = mul_nonIEEE(R2f.z, R3f.w);
PV0f.y = min(PV1f.z, intBitsToFloat(0x3e99999a));
R2f.z = (mul_nonIEEE(PV1f.y,intBitsToFloat(uf_remappedPS[2].y)) + 0.0);
R126f.w = PV1f.x + -(R6f.z);
PV0f.w = R126f.w;
R6f.z = (mul_nonIEEE(PS1f,intBitsToFloat(uf_remappedPS[2].y)) + 0.0);
PS0f = R6f.z;
// 3
PV1f.x = PV0f.y + intBitsToFloat(0xbe4ccccd);
R2f.y = (mul_nonIEEE(PV0f.x,intBitsToFloat(uf_remappedPS[2].y)) + 0.0);
R123f.z = (PV0f.w * intBitsToFloat(0x3d08888a) + intBitsToFloat(0xbe2aaaad));
R123f.z = clamp(R123f.z, 0.0, 1.0);
PV1f.z = R123f.z;
R3f.w = R7f.x + 0.0;
R2f.x = R7f.y + 0.0;
PS1f = R2f.x;
// 4
R7f.x = R7f.z + 0.0;
PV0f.y = mul_nonIEEE(PV1f.z, PV1f.z);
PV0f.z = mul_nonIEEE(R11f.w, R127f.w);
PV0f.w = PV1f.x * intBitsToFloat(0x411fffff);
R127f.w = R126f.w;
R127f.w *= 2.0;
R127f.w = clamp(R127f.w, 0.0, 1.0);
PS0f = R127f.w;
// 5
R127f.x = (0.5 * PV0f.y + 0.5);
R126f.y = (mul_nonIEEE(PV0f.w,R125f.w) + -(R127f.y));
R126f.y = clamp(R126f.y, 0.0, 1.0);
PV1f.z = mul_nonIEEE(R11f.x, PV0f.w);
PV1f.w = mul_nonIEEE(R1f.w, PV0f.z);
PV1f.w = clamp(PV1f.w, 0.0, 1.0);
// 6
R123f.x = (mul_nonIEEE(R5f.w,PV1f.z) + R1f.z);
PV0f.x = R123f.x;
R123f.y = (mul_nonIEEE(R3f.z,PV1f.z) + R1f.y);
PV0f.y = R123f.y;
R123f.z = (mul_nonIEEE(R6f.x,PV1f.z) + R1f.x);
PV0f.z = R123f.z;
PV0f.w = mul_nonIEEE(PV1f.w, R127f.w);
PV0f.w = clamp(PV0f.w, 0.0, 1.0);
// 7
R123f.y = (mul_nonIEEE(R6f.w,R11f.y) + PV0f.x);
PV1f.y = R123f.y;
R123f.z = (mul_nonIEEE(R6f.y,R11f.y) + PV0f.y);
PV1f.z = R123f.z;
R123f.w = (mul_nonIEEE(R5f.z,R11f.y) + PV0f.z);
PV1f.w = R123f.w;
R5f.w = mul_nonIEEE(R3f.x, PV0f.w);
PS1f = R5f.w;
// 8
R127f.y = mul_nonIEEE(PV1f.y, R127f.x);
PV0f.y = R127f.y;
R127f.z = mul_nonIEEE(PV1f.z, R127f.x);
PV0f.z = R127f.z;
R127f.w = mul_nonIEEE(PV1f.w, R127f.x);
PV0f.w = R127f.w;
// 9
backupReg0f = R0f.y;
PV1f.y = R0f.z + -(PV0f.y);
PV1f.z = backupReg0f + -(PV0f.z);
PV1f.w = R0f.x + -(PV0f.w);
// 10
backupReg0f = R127f.z;
R127f.x = (mul_nonIEEE(PV1f.w,R126f.y) + R127f.w);
PV0f.x = R127f.x;
R127f.z = (mul_nonIEEE(PV1f.y,R126f.y) + R127f.y);
PV0f.z = R127f.z;
R127f.w = (mul_nonIEEE(PV1f.z,R126f.y) + backupReg0f);
PV0f.w = R127f.w;
// 11
PV1f.x = max(PV0f.x, 0.0);
PV1f.z = max(PV0f.z, 0.0);
PV1f.w = max(PV0f.w, 0.0);
// 12
R0f.x = min(PV1f.w, intBitsToFloat(uf_remappedPS[3].y));
PV0f.x = R0f.x;
R0f.y = min(PV1f.x, intBitsToFloat(uf_remappedPS[3].y));
PV0f.y = R0f.y;
R0f.w = min(PV1f.z, intBitsToFloat(uf_remappedPS[3].y));
PV0f.w = R0f.w;
// 13
R3f.x = R127f.z + -(PV0f.w);
R3f.y = R127f.w + -(PV0f.x);
R0f.z = R127f.x + -(PV0f.y);
// 0
R123f.y = (mul_nonIEEE(intBitsToFloat(uf_remappedPS[0].w),R2f.y) + R7f.x);
PV0f.y = R123f.y;
R123f.z = (mul_nonIEEE(intBitsToFloat(uf_remappedPS[0].w),R6f.z) + R2f.x);
PV0f.z = R123f.z;
R123f.w = (mul_nonIEEE(intBitsToFloat(uf_remappedPS[0].w),R2f.z) + R3f.w);
PV0f.w = R123f.w;
// 1
R127f.x = (mul_nonIEEE(R0f.y,PV0f.w) + R0f.z);
PV1f.x = R127f.x;
R127f.z = (mul_nonIEEE(R0f.w,PV0f.y) + R3f.x);
PV1f.z = R127f.z;
R127f.w = (mul_nonIEEE(R0f.x,PV0f.z) + R3f.y);
PV1f.w = R127f.w;
// 2
PV0f.x = -(PV1f.w) + intBitsToFloat(uf_remappedPS[4].y);
PV0f.y = -(PV1f.x) + intBitsToFloat(uf_remappedPS[4].x);
PV0f.w = -(PV1f.z) + intBitsToFloat(uf_remappedPS[4].z);
// 3
backupReg0f = R127f.x;
R127f.x = (mul_nonIEEE(PV0f.w,R9f.y) + R127f.z);
PV1f.x = R127f.x;
R127f.y = (mul_nonIEEE(PV0f.x,R9f.y) + R127f.w);
PV1f.y = R127f.y;
R127f.z = (mul_nonIEEE(PV0f.y,R9f.y) + backupReg0f);
PV1f.z = R127f.z;
// 4
PV0f.y = R8f.z + -(PV1f.x);
PV0f.z = R8f.y + -(PV1f.y);
PV0f.w = R8f.x + -(PV1f.z);
// 5
backupReg0f = R127f.x;
R127f.x = (mul_nonIEEE(PV0f.w,R8f.w) + R127f.z);
PV1f.x = R127f.x;
R127f.z = (mul_nonIEEE(PV0f.y,R8f.w) + backupReg0f);
PV1f.z = R127f.z;
R127f.w = (mul_nonIEEE(PV0f.z,R8f.w) + R127f.y);
PV1f.w = R127f.w;
// 6
PV0f.x = -(PV1f.w) + intBitsToFloat(uf_remappedPS[5].y);
PV0f.y = -(PV1f.x) + intBitsToFloat(uf_remappedPS[5].x);
PV0f.w = -(PV1f.z) + intBitsToFloat(uf_remappedPS[5].z);
// 7
R5f.x = (mul_nonIEEE(PV0f.y,R9f.x) + R127f.x);
R5f.y = (mul_nonIEEE(PV0f.x,R9f.x) + R127f.w);
R5f.z = (mul_nonIEEE(PV0f.w,R9f.x) + R127f.z);
// export
if( ((vec4(R5f.x, R5f.y, R5f.z, R5f.w)).a > uf_alphaTestRef) == false) discard;
passPixelColor0 = vec4(R5f.x, R5f.y, R5f.z, R5f.w);
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,8 @@
[Definition]
titleIds = 00050000101C9300,00050000101C9400,00050000101C9500
name = "The Legend of Zelda: Breath of the Wild - Bomb Arrow Smoke Fix"
version = 2
# A quick found by Xalphenos and Rajkosto
# Partially fix thin paper look explosion smoke on nvdia when using AccurateShaderMul = true
# Shaders dumped from Cemu 1.10.0f and BotW 1.3.1