cemu_graphic_packs/Resolutions/Bayonetta2_Resolution/8a0efcdc3f556942_0000000000000079_ps.txt
Crementif 3f87a42f4f
[Bayonetta 2] Fix aspect ratio patch to not break Vulkan
Fixes https://github.com/slashiee/cemu_graphic_packs/issues/437

The aspect ratio mod apparently relied more on a shotgun approach, which caused major issues with Vulkan.
Using @getdls addresses, I found out that 2 of the addresses that were being patched to jump to the code cave were unrelated to the aspect ratio, so I had those removed.

Those random jumps probably didn't cause issues because they would jump to code that would just utilize floating point registers that were in-use and then put the result in the wrong register too. While the picked registers would work with the 3D rendering aspect ratio instruction, it wouldn't fail for the other registers since they weren't specific to those.

Anyway, the proper fix was to just make a second code cave for the aspect ratio, which outputted the culling ratio in the proper register. And also make sure it didn't use in-use registers.

I also reverted some of the shader code changes since they are inconsistent with how we've done them for all the other packs. And made them more compatible with the ultrawide resolutions.
2020-05-24 18:11:09 +02:00

149 lines
4.7 KiB
Plaintext

#version 430
#extension GL_ARB_texture_gather : enable
#extension GL_ARB_separate_shader_objects : enable
// shader 8a0efcdc3f556942
// Used for: Pyramid Blur
float resXScale = float($width)/float($gameWidth);
float resYScale = float($height)/float($gameHeight);
// start of shader inputs/outputs, predetermined by Cemu. Do not touch
#ifdef VULKAN
#define ATTR_LAYOUT(__vkSet, __location) layout(set = __vkSet, location = __location)
#define UNIFORM_BUFFER_LAYOUT(__glLocation, __vkSet, __vkLocation) layout(set = __vkSet, binding = __vkLocation, std140)
#define TEXTURE_LAYOUT(__glLocation, __vkSet, __vkLocation) layout(set = __vkSet, binding = __vkLocation)
#define GET_FRAGCOORD() vec4(gl_FragCoord.xy*uf_fragCoordScale.xy,gl_FragCoord.zw)
#else
#define ATTR_LAYOUT(__vkSet, __location) layout(location = __location)
#define UNIFORM_BUFFER_LAYOUT(__glLocation, __vkSet, __vkLocation) layout(binding = __glLocation, std140)
#define TEXTURE_LAYOUT(__glLocation, __vkSet, __vkLocation) layout(binding = __glLocation)
#define GET_FRAGCOORD() vec4(gl_FragCoord.xy*uf_fragCoordScale,gl_FragCoord.zw)
#endif
#ifdef VULKAN
layout(set = 1, binding = 1) uniform ufBlock
{
uniform ivec4 uf_remappedPS[2];
uniform vec4 uf_fragCoordScale;
};
#else
uniform ivec4 uf_remappedPS[2];
uniform vec2 uf_fragCoordScale;
#endif
TEXTURE_LAYOUT(0, 1, 0) uniform sampler2D textureUnitPS0;
layout(location = 0) in vec4 passParameterSem133;
layout(location = 0) out vec4 passPixelColor0;
// end of shader inputs/outputs
int clampFI32(int v)
{
if( v == 0x7FFFFFFF )
return floatBitsToInt(1.0);
else if( v == 0xFFFFFFFF )
return floatBitsToInt(0.0);
return floatBitsToInt(clamp(intBitsToFloat(v), 0.0, 1.0));
}
float mul_nonIEEE(float a, float b){ if( a == 0.0 || b == 0.0 ) return 0.0; return a*b; }
void main()
{
vec4 R0f = vec4(0.0);
vec4 R1f = vec4(0.0);
vec4 R123f = vec4(0.0);
vec4 R124f = vec4(0.0);
vec4 R125f = vec4(0.0);
vec4 R126f = vec4(0.0);
vec4 R127f = vec4(0.0);
float backupReg0f, backupReg1f, backupReg2f, backupReg3f, backupReg4f;
vec4 PV0f = vec4(0.0), PV1f = vec4(0.0);
float PS0f = 0.0, PS1f = 0.0;
vec4 tempf = vec4(0.0);
float tempResultf;
int tempResulti;
ivec4 ARi = ivec4(0);
bool predResult = true;
vec3 cubeMapSTM;
int cubeMapFaceId;
R0f = passParameterSem133;
// 0
backupReg0f = R0f.x;
backupReg1f = R0f.y;
backupReg0f = R0f.x;
backupReg1f = R0f.y;
R0f.x = (intBitsToFloat(uf_remappedPS[0].z)/resXScale * 2.0 + backupReg0f);
R0f.y = (intBitsToFloat(uf_remappedPS[0].w)/resYScale * 2.0 + backupReg1f);
R0f.z = (-(intBitsToFloat(uf_remappedPS[0].z)/resXScale) * 2.0 + backupReg0f);
R0f.w = (-(intBitsToFloat(uf_remappedPS[0].w)/resYScale) * 2.0 + backupReg1f);
R1f.xyzw = (texture(textureUnitPS0, R0f.xy).xyzw);
R0f.xyzw = (texture(textureUnitPS0, R0f.zw).xyzw);
// 0
backupReg0f = R0f.x;
backupReg1f = R0f.w;
PV0f.x = mul_nonIEEE(R0f.y, R0f.w);
PV0f.y = mul_nonIEEE(R1f.y, R1f.w);
PV0f.z = mul_nonIEEE(R1f.x, R1f.w);
PV0f.w = mul_nonIEEE(backupReg0f, backupReg1f);
R127f.x = mul_nonIEEE(R1f.z, R1f.w);
PS0f = R127f.x;
// 1
PV1f.x = max(PV0f.z, PV0f.w);
PV1f.y = mul_nonIEEE(R0f.z, R0f.w);
R127f.z = max(R1f.w, R0f.w);
PV1f.z = R127f.z;
PV1f.w = max(PV0f.y, PV0f.x);
R126f.x = -(intBitsToFloat(uf_remappedPS[1].x)) + intBitsToFloat(uf_remappedPS[1].z);
PS1f = R126f.x;
// 2
PV0f.x = -(R1f.x) + PV1f.x;
PV0f.y = -(R1f.w) + PV1f.z;
PV0f.z = max(R127f.x, PV1f.y);
PV0f.w = -(R1f.y) + PV1f.w;
R126f.z = -(intBitsToFloat(uf_remappedPS[1].y)) + intBitsToFloat(uf_remappedPS[1].w);
PS0f = R126f.z;
// 3
R127f.x = (mul_nonIEEE(PV0f.x,R127f.z) + R1f.x);
PV1f.x = R127f.x;
R127f.y = (mul_nonIEEE(PV0f.w,R127f.z) + R1f.y);
PV1f.y = R127f.y;
PV1f.z = -(R1f.z) + PV0f.z;
R127f.w = (mul_nonIEEE(PV0f.y,R127f.z) + R1f.w);
PV1f.w = R127f.w;
// 4
PV0f.x = max(PV1f.x, PV1f.y);
R126f.y = (mul_nonIEEE(R126f.z,PV1f.w) + intBitsToFloat(uf_remappedPS[1].y));
R123f.z = (mul_nonIEEE(PV1f.z,R127f.z) + R1f.z);
PV0f.z = R123f.z;
R126f.w = (mul_nonIEEE(R126f.x,PV1f.w) + intBitsToFloat(uf_remappedPS[1].x));
R125f.y = PV1f.x;
R125f.y *= 2.0;
PS0f = R125f.y;
// 5
backupReg0f = R127f.w;
R126f.x = R127f.y;
R126f.x *= 2.0;
R124f.y = PV0f.z;
R124f.y *= 2.0;
PV1f.z = PV0f.z * intBitsToFloat(0x3dea7371);
R127f.w = max(PV0f.z, PV0f.x);
R127f.w /= 2.0;
R1f.w = backupReg0f;
PS1f = R1f.w;
// 6
tempf.x = dot(vec4(R127f.x,R127f.y,PV1f.z,-0.0),vec4(intBitsToFloat(0x3e990afe),intBitsToFloat(0x3f162c23),1.0,0.0));
tempf.x /= 2.0;
PV0f.x = tempf.x;
PV0f.y = tempf.x;
PV0f.z = tempf.x;
PV0f.w = tempf.x;
// 7
PV1f.z = R127f.w + PV0f.x;
// 8
PV0f.y = -(R126f.w) + PV1f.z;
// 9
PV1f.x = mul_nonIEEE(R126f.y, PV0f.y);
PV1f.x = clamp(PV1f.x, 0.0, 1.0);
// 10
R1f.x = mul_nonIEEE(R125f.y, PV1f.x);
R1f.y = mul_nonIEEE(R126f.x, PV1f.x);
R1f.z = mul_nonIEEE(R124f.y, PV1f.x);
// export
passPixelColor0 = vec4(R1f.x, R1f.y, R1f.z, R1f.w);
}