#version 420 #extension GL_ARB_texture_gather : enable #extension GL_ARB_separate_shader_objects : enable #ifdef VULKAN #define ATTR_LAYOUT(__vkSet, __location) layout(set = __vkSet, location = __location) #define UNIFORM_BUFFER_LAYOUT(__glLocation, __vkSet, __vkLocation) layout(set = __vkSet, binding = __vkLocation, std140) #define TEXTURE_LAYOUT(__glLocation, __vkSet, __vkLocation) layout(set = __vkSet, binding = __vkLocation) #define SET_POSITION(_v) gl_Position = _v; gl_Position.z = (gl_Position.z + gl_Position.w) / 2.0 #define GET_FRAGCOORD() vec4(gl_FragCoord.xy*uf_fragCoordScale.xy,gl_FragCoord.zw) #define gl_VertexID gl_VertexIndex #define gl_InstanceID gl_InstanceIndex #else #define ATTR_LAYOUT(__vkSet, __location) layout(location = __location) #define UNIFORM_BUFFER_LAYOUT(__glLocation, __vkSet, __vkLocation) layout(binding = __glLocation, std140) #define TEXTURE_LAYOUT(__glLocation, __vkSet, __vkLocation) layout(binding = __glLocation) #define SET_POSITION(_v) gl_Position = _v #define GET_FRAGCOORD() vec4(gl_FragCoord.xy*uf_fragCoordScale,gl_FragCoord.zw) #endif // This shaders was auto-converted from OpenGL to Cemu so expect weird code and possible errors. // shader a7f4801a8d29e333 #ifdef VULKAN layout(set = 1, binding = 1) uniform ufBlock { uniform vec4 uf_fragCoordScale; uniform ivec4 uf_remappedPS[3]; }; #else uniform vec2 uf_fragCoordScale; uniform ivec4 uf_remappedPS[3]; #endif const float hazeFactor = 0.1; const float gamma = $gamma; // 1.0 is neutral Botw is already colour graded at this stage const float exposure = $exposure; // 1.0 is neutral const float vibrance = $vibrance; // 0.0 is neutral const float crushContrast = $crushContrast; // 0.0 is neutral. Use small increments, loss of shadow detail const float contrastCurve = $contrastCurve; vec3 RGB_Lift = vec3($redShadows, $greenShadows , $blueSadows); // [0.000 to 2.000] Adjust shadows for Red, Green and Blue. vec3 RGB_Gamma = vec3($redMid ,$greenMid, $blueMid); // [0.000 to 2.000] Adjust midtones for Red, Green and Blue vec3 RGB_Gain = vec3($redHilight, $greenHilight, $blueHilight); // [0.000 to 2.000] Adjust highlights for Red, Green and Blue //lumasharpen const float sharp_mix = $sharp_mix; const float sharp_strength = 2.0; const float sharp_clamp = 0.75; const float offset_bias = 1.0; float Sigmoid (float x) { return 1.0 / (1.0 + (exp(-(x - 0.5) * 5.5))); } #define px (1.0/1920.0*uf_fragCoordScale.x) #define py (1.0/1080.0*uf_fragCoordScale.y) #define CoefLuma vec3(0.2126, 0.7152, 0.0722) float lumasharping(sampler2D tex, vec2 pos) { vec4 colorInput = texture(tex, pos); vec3 ori = colorInput.rgb; // -- Combining the strength and luma multipliers -- vec3 sharp_strength_luma = (CoefLuma * sharp_strength); // -- Gaussian filter -- // [ .25, .50, .25] [ 1 , 2 , 1 ] // [ .50, 1, .50] = [ 2 , 4 , 2 ] // [ .25, .50, .25] [ 1 , 2 , 1 ] vec3 blur_ori = texture(tex, pos + vec2(px, -py) * 0.5 * offset_bias).rgb; // South East blur_ori += texture(tex, pos + vec2(-px, -py) * 0.5 * offset_bias).rgb; // South West blur_ori += texture(tex, pos + vec2(px, py) * 0.5 * offset_bias).rgb; // North East blur_ori += texture(tex, pos + vec2(-px, py) * 0.5 * offset_bias).rgb; // North West blur_ori *= 0.25; // ( /= 4) Divide by the number of texture fetches // -- Calculate the sharpening -- vec3 sharp = ori - blur_ori; //Subtracting the blurred image from the original image // -- Adjust strength of the sharpening and clamp it-- vec4 sharp_strength_luma_clamp = vec4(sharp_strength_luma * (0.5 / sharp_clamp), 0.5); //Roll part of the clamp into the dot float sharp_luma = clamp((dot(vec4(sharp, 1.0), sharp_strength_luma_clamp)), 0.0, 1.0); //Calculate the luma, adjust the strength, scale up and clamp sharp_luma = (sharp_clamp * 2.0) * sharp_luma - sharp_clamp; //scale down return sharp_luma; } vec3 LiftGammaGainPass(vec3 colorInput) { //reshade BSD https://reshade.me , Alexkiri port vec3 color = colorInput; color = color * (1.5 - 0.5 * RGB_Lift) + 0.5 * RGB_Lift - 0.5; color = clamp(color, 0.0, 1.0); color *= RGB_Gain; color = pow(color, 1.0 / RGB_Gamma); return clamp(color, 0.0, 1.0); } vec3 contrasty(vec3 colour){ vec3 fColour = (colour.xyz); //fColour = LiftGammaGainPass(fColour); fColour = clamp(exposure * fColour, 0.0, 1.0); fColour = pow(fColour, vec3(1.0 / gamma)); float luminance = fColour.r*0.299 + fColour.g*0.587 + fColour.b*0.114; float mn = min(min(fColour.r, fColour.g), fColour.b); float mx = max(max(fColour.r, fColour.g), fColour.b); float sat = (1.0 - (mx - mn)) * (1.0 - mx) * luminance * 5.0; vec3 lightness = vec3((mn + mx) / 2.0); fColour = LiftGammaGainPass(fColour); // vibrance fColour = mix(fColour, mix(fColour, lightness, -vibrance), sat); fColour = max(vec3(0.0), fColour - vec3(crushContrast)); return fColour; } const float resScale = 3.0; //AA in PS TEXTURE_LAYOUT(0, 1, 0) uniform sampler2D textureUnitPS0;// Tex0 addr 0xf470a000 res 1280x720x1 dim 1 tm: 4 format 0816 compSel: 0 1 2 5 mipView: 0x0 (num 0x1 layout(location = 0) in vec4 passParameterSem136; layout(location = 1) in vec4 passParameterSem137; layout(location = 0) out vec4 passPixelColor0; //uniform vec2 uf_fragCoordScale; int clampFI32(int v) { if( v == 0x7FFFFFFF ) return floatBitsToInt(1.0); else if( v == 0xFFFFFFFF ) return floatBitsToInt(0.0); return floatBitsToInt(clamp(intBitsToFloat(v), 0.0, 1.0)); } float mul_nonIEEE(float a, float b){ if( a == 0.0 || b == 0.0 ) return 0.0; return a*b; } void main() { ivec4 R0i = ivec4(0); ivec4 R1i = ivec4(0); ivec4 R2i = ivec4(0); ivec4 R3i = ivec4(0); ivec4 R4i = ivec4(0); ivec4 R123i = ivec4(0); ivec4 R126i = ivec4(0); ivec4 R127i = ivec4(0); int backupReg0i, backupReg1i, backupReg2i, backupReg3i, backupReg4i; ivec4 PV0i = ivec4(0), PV1i = ivec4(0); int PS0i = 0, PS1i = 0; ivec4 tempi = ivec4(0); float tempResultf; int tempResulti; ivec4 ARi = ivec4(0); bool predResult = true; bool activeMaskStack[2]; bool activeMaskStackC[3]; activeMaskStack[0] = false; activeMaskStackC[0] = false; activeMaskStackC[1] = false; activeMaskStack[0] = true; activeMaskStackC[0] = true; activeMaskStackC[1] = true; vec3 cubeMapSTM; int cubeMapFaceId; R0i = floatBitsToInt(passParameterSem136); R1i = floatBitsToInt(passParameterSem137); if( activeMaskStackC[1] == true ) { R2i.y = floatBitsToInt(textureLod(textureUnitPS0, intBitsToFloat(R1i.zy),0.0).y); R0i.w = floatBitsToInt(textureLod(textureUnitPS0, intBitsToFloat(R1i.xy),0.0).y); R0i.z = floatBitsToInt(textureLod(textureUnitPS0, intBitsToFloat(R1i.xw),0.0).y); R2i.x = floatBitsToInt(textureLod(textureUnitPS0, intBitsToFloat(R1i.zw),0.0).y); R1i.xyzw = floatBitsToInt(textureLod(textureUnitPS0, intBitsToFloat(R0i.xy),0.0).xyzw); } if( activeMaskStackC[1] == true ) { activeMaskStack[1] = activeMaskStack[0]; activeMaskStackC[2] = activeMaskStackC[1]; // 0 backupReg0i = R2i.y; R127i.x = floatBitsToInt(max(intBitsToFloat(R0i.w), intBitsToFloat(R0i.z))); R2i.y = floatBitsToInt(intBitsToFloat(backupReg0i) + intBitsToFloat(0x3b2aaaa8)); PV0i.y = R2i.y; R127i.z = floatBitsToInt(min(intBitsToFloat(R0i.w), intBitsToFloat(R0i.z))); // 1 PV1i.x = floatBitsToInt(max(intBitsToFloat(R2i.x), intBitsToFloat(PV0i.y))); PV1i.w = floatBitsToInt(min(intBitsToFloat(R2i.x), intBitsToFloat(PV0i.y))); // 2 R4i.z = floatBitsToInt(min(intBitsToFloat(R127i.z), intBitsToFloat(PV1i.w))); PV0i.z = R4i.z; R4i.w = floatBitsToInt(max(intBitsToFloat(R127i.x), intBitsToFloat(PV1i.x))); PV0i.w = R4i.w; // 3 PV1i.x = floatBitsToInt(mul_nonIEEE(intBitsToFloat(PV0i.w), intBitsToFloat(uf_remappedPS[0].x))); PV1i.y = floatBitsToInt(max(intBitsToFloat(R1i.y), intBitsToFloat(PV0i.w))); PV1i.z = floatBitsToInt(min(intBitsToFloat(R1i.y), intBitsToFloat(PV0i.z))); // 4 PV0i.z = floatBitsToInt(-(intBitsToFloat(PV1i.z)) + intBitsToFloat(PV1i.y)); PV0i.w = floatBitsToInt(max(intBitsToFloat(uf_remappedPS[0].y), intBitsToFloat(PV1i.x))); // 5 PV1i.y = ((intBitsToFloat(PV0i.w) > intBitsToFloat(PV0i.z))?int(0xFFFFFFFF):int(0x0)); // 6 backupReg0i = R1i.y; backupReg1i = R1i.z; backupReg2i = R1i.w; backupReg3i = R1i.x; R3i.x = ((PV1i.y == 0)?(0):(0x3f800000)); R1i.y = ((PV1i.y == 0)?(0):(backupReg0i)); R1i.z = ((PV1i.y == 0)?(0):(backupReg1i)); R1i.w = ((PV1i.y == 0)?(0):(backupReg2i)); R1i.x = ((PV1i.y == 0)?(0):(backupReg3i)); PS0i = R1i.x; // 7 predResult = (R3i.x == 0); activeMaskStack[1] = predResult; activeMaskStackC[2] = predResult == true && activeMaskStackC[1] == true; } else { activeMaskStack[1] = false; activeMaskStackC[2] = false; } if( activeMaskStackC[2] == true ) { // 0 backupReg0i = R0i.w; PV0i.y = floatBitsToInt(intBitsToFloat(R0i.z) + -(intBitsToFloat(R2i.y))); R127i.z = 0; PV0i.w = floatBitsToInt(-(intBitsToFloat(backupReg0i)) + intBitsToFloat(R2i.x)); // 1 R127i.x = floatBitsToInt(intBitsToFloat(PV0i.y) + intBitsToFloat(PV0i.w)); PV1i.x = R127i.x; R127i.y = floatBitsToInt(intBitsToFloat(PV0i.y) + -(intBitsToFloat(PV0i.w))); PV1i.y = R127i.y; // 2 tempi.x = floatBitsToInt(dot(vec4(intBitsToFloat(PV1i.x),intBitsToFloat(PV1i.y),intBitsToFloat(R127i.z),-0.0),vec4(intBitsToFloat(PV1i.x),intBitsToFloat(PV1i.y),intBitsToFloat(R127i.z),0.0))); PV0i.x = tempi.x; PV0i.y = tempi.x; PV0i.z = tempi.x; PV0i.w = tempi.x; // 3 tempResultf = 1.0 / sqrt(intBitsToFloat(PV0i.x)); PS1i = floatBitsToInt(tempResultf); // 4 backupReg0i = R127i.y; R127i.y = floatBitsToInt(mul_nonIEEE(intBitsToFloat(backupReg0i), intBitsToFloat(PS1i))); PV0i.y = R127i.y; R127i.z = floatBitsToInt(mul_nonIEEE(intBitsToFloat(R127i.x), intBitsToFloat(PS1i))); PV0i.z = R127i.z; // 5 PV1i.x = floatBitsToInt(max(intBitsToFloat(PV0i.z), -(intBitsToFloat(PV0i.z)))); PV1i.y = floatBitsToInt(mul_nonIEEE(intBitsToFloat(PV0i.z) / resScale, intBitsToFloat(uf_remappedPS[1].x))); // sharpen pass? PV1i.z = floatBitsToInt(mul_nonIEEE(intBitsToFloat(PV0i.y)/ resScale, intBitsToFloat(uf_remappedPS[1].y))); //sharpen pass? PV1i.w = floatBitsToInt(max(intBitsToFloat(PV0i.y), -(intBitsToFloat(PV0i.y)))); // 6 R1i.x = floatBitsToInt(intBitsToFloat(R0i.x) + -(intBitsToFloat(PV1i.y))); R1i.y = floatBitsToInt(intBitsToFloat(R0i.y) + -(intBitsToFloat(PV1i.z))); PV0i.z = floatBitsToInt(min(intBitsToFloat(PV1i.x), intBitsToFloat(PV1i.w))); R2i.w = floatBitsToInt(intBitsToFloat(R0i.x) + intBitsToFloat(PV1i.y) / resScale); R2i.y = floatBitsToInt(intBitsToFloat(R0i.y) + intBitsToFloat(PV1i.z) / resScale); PS0i = R2i.y; // 7 PV1i.y = floatBitsToInt(mul_nonIEEE(intBitsToFloat(PV0i.z), intBitsToFloat(uf_remappedPS[2].w))); //? // 8 PS0i = floatBitsToInt(1.0 / intBitsToFloat(PV1i.y)); // 9 PV1i.z = floatBitsToInt(mul_nonIEEE(intBitsToFloat(R127i.y), intBitsToFloat(PS0i))); PV1i.w = floatBitsToInt(mul_nonIEEE(intBitsToFloat(R127i.z), intBitsToFloat(PS0i))); // 10 PV0i.x = floatBitsToInt(max(intBitsToFloat(PV1i.z), intBitsToFloat(0xc0000000))); PV0i.y = floatBitsToInt(max(intBitsToFloat(PV1i.w), intBitsToFloat(0xc0000000))); // 11 PV1i.z = floatBitsToInt(min(intBitsToFloat(PV0i.x), 2.0)); PV1i.w = floatBitsToInt(min(intBitsToFloat(PV0i.y), 2.0)); // 12 PV0i.z = floatBitsToInt(mul_nonIEEE(intBitsToFloat(uf_remappedPS[1].w) , intBitsToFloat(PV1i.z))); PV0i.w = floatBitsToInt(mul_nonIEEE(intBitsToFloat(uf_remappedPS[1].z) , intBitsToFloat(PV1i.w))); // 13 backupReg0i = R0i.x ; backupReg1i = R0i.y; backupReg0i = R0i.x; backupReg1i = R0i.y; R0i.xyz = floatBitsToInt(vec3(intBitsToFloat(backupReg0i),intBitsToFloat(backupReg1i),intBitsToFloat(backupReg0i)) + vec3(-(intBitsToFloat(PV0i.w) / resScale),-(intBitsToFloat(PV0i.z) / resScale),intBitsToFloat(PV0i.w) / resScale)); R0i.w = floatBitsToInt(intBitsToFloat(backupReg1i) + intBitsToFloat(PV0i.z) / resScale); } if( activeMaskStackC[2] == true ) { R1i.xyzw = floatBitsToInt(textureLod(textureUnitPS0, intBitsToFloat(R1i.xy),0.0).xyzw); R2i.xyzw = floatBitsToInt(textureLod(textureUnitPS0, intBitsToFloat(R2i.wy),0.0).xyzw); R3i.xyzw = floatBitsToInt(textureLod(textureUnitPS0, intBitsToFloat(R0i.xy),0.0).xyzw); R0i.xyzw = floatBitsToInt(textureLod(textureUnitPS0, intBitsToFloat(R0i.zw),0.0).xyzw); } if( activeMaskStackC[2] == true ) { // 0 R127i.xyz = floatBitsToInt(vec3(intBitsToFloat(R1i.y),intBitsToFloat(R1i.x),intBitsToFloat(R1i.w)) + vec3(intBitsToFloat(R2i.y),intBitsToFloat(R2i.x),intBitsToFloat(R2i.w))); PV0i.y = R127i.y; R127i.w = floatBitsToInt(intBitsToFloat(R1i.z) + intBitsToFloat(R2i.z)); // 1 backupReg0i = R0i.x; backupReg1i = R0i.z; PV1i.x = floatBitsToInt(intBitsToFloat(R3i.y) + intBitsToFloat(R0i.y)); PV1i.y = floatBitsToInt(intBitsToFloat(R3i.x) + intBitsToFloat(backupReg0i)); PV1i.z = floatBitsToInt(intBitsToFloat(R3i.w) + intBitsToFloat(R0i.w)); PV1i.w = floatBitsToInt(intBitsToFloat(R3i.z) + intBitsToFloat(backupReg1i)); R126i.z = PV0i.y; R126i.z = floatBitsToInt(intBitsToFloat(R126i.z) / 2.0); PS1i = R126i.z; // 2 PV0i.x = floatBitsToInt(intBitsToFloat(R127i.x) + intBitsToFloat(PV1i.x)); PV0i.y = floatBitsToInt(intBitsToFloat(R127i.y) + intBitsToFloat(PV1i.y)); PV0i.z = floatBitsToInt(intBitsToFloat(R127i.z) + intBitsToFloat(PV1i.z)); PV0i.w = floatBitsToInt(intBitsToFloat(R127i.w) + intBitsToFloat(PV1i.w)); R126i.y = R127i.x; R126i.y = floatBitsToInt(intBitsToFloat(R126i.y) / 2.0); PS0i = R126i.y; // 3 backupReg0i = R127i.w; R127i.x = floatBitsToInt(intBitsToFloat(PV0i.x) * 0.25 ); PV1i.x = R127i.x; R127i.y = floatBitsToInt(intBitsToFloat(PV0i.y) * 0.25); R127i.z = floatBitsToInt(intBitsToFloat(PV0i.z) * 0.25); R127i.w = floatBitsToInt(intBitsToFloat(PV0i.w) * 0.25); R126i.x = backupReg0i; R126i.x = floatBitsToInt(intBitsToFloat(R126i.x) / 2.0); PS1i = R126i.x; // 4 PV0i.x = ((intBitsToFloat(PV1i.x) > intBitsToFloat(R4i.w))?int(0xFFFFFFFF):int(0x0)); PV0i.y = ((intBitsToFloat(R4i.z) > intBitsToFloat(PV1i.x))?int(0xFFFFFFFF):int(0x0)); // 5 R123i.w = ((PV0i.y == 0)?(PV0i.x):(int(-1))); PV1i.w = R123i.w; // 6 R1i.x = ((PV1i.w == 0)?(R127i.y):(R126i.z)); R1i.y = ((PV1i.w == 0)?(R127i.x):(R126i.y)); R1i.z = ((PV1i.w == 0)?(R127i.w):(R126i.x)); R1i.w = ((PV1i.w == 0)?(R127i.z):(R127i.z)); } activeMaskStackC[1] = activeMaskStack[0] == true && activeMaskStackC[0] == true; // export vec3 R0f = vec3(intBitsToFloat(R1i.x), intBitsToFloat(R1i.y), intBitsToFloat(R1i.z)); R0f.xyz = contrasty(R0f.xyz); R0f.xyz = mix(R0f.xyz, smoothstep(0.0, 1.0, R0f.xyz), contrastCurve); float smask = lumasharping(textureUnitPS0, passParameterSem136.xy); vec3 temp3 = R0f.xyz; R0f.xyz = mix(R0f.xyz, (temp3.xyz += (smask)), sharp_mix); passPixelColor0 = vec4(R0f.xyz, intBitsToFloat(R1i.w)); //passPixelColor0 = vec4(intBitsToFloat(R1i.x), intBitsToFloat(R1i.y), intBitsToFloat(R1i.z), intBitsToFloat(R1i.w)); }