mirror of
https://github.com/cemu-project/cemu_graphic_packs.git
synced 2024-12-23 16:31:50 +01:00
3f87a42f4f
Fixes https://github.com/slashiee/cemu_graphic_packs/issues/437 The aspect ratio mod apparently relied more on a shotgun approach, which caused major issues with Vulkan. Using @getdls addresses, I found out that 2 of the addresses that were being patched to jump to the code cave were unrelated to the aspect ratio, so I had those removed. Those random jumps probably didn't cause issues because they would jump to code that would just utilize floating point registers that were in-use and then put the result in the wrong register too. While the picked registers would work with the 3D rendering aspect ratio instruction, it wouldn't fail for the other registers since they weren't specific to those. Anyway, the proper fix was to just make a second code cave for the aspect ratio, which outputted the culling ratio in the proper register. And also make sure it didn't use in-use registers. I also reverted some of the shader code changes since they are inconsistent with how we've done them for all the other packs. And made them more compatible with the ultrawide resolutions.
152 lines
5.2 KiB
Plaintext
152 lines
5.2 KiB
Plaintext
#version 430
|
|
#extension GL_ARB_texture_gather : enable
|
|
#extension GL_ARB_separate_shader_objects : enable
|
|
// shader 43a2239f07af804e
|
|
// Used for: Horizontal Blur
|
|
float resXScale = float($width)/float($gameWidth);
|
|
|
|
|
|
// start of shader inputs/outputs, predetermined by Cemu. Do not touch
|
|
#ifdef VULKAN
|
|
#define ATTR_LAYOUT(__vkSet, __location) layout(set = __vkSet, location = __location)
|
|
#define UNIFORM_BUFFER_LAYOUT(__glLocation, __vkSet, __vkLocation) layout(set = __vkSet, binding = __vkLocation, std140)
|
|
#define TEXTURE_LAYOUT(__glLocation, __vkSet, __vkLocation) layout(set = __vkSet, binding = __vkLocation)
|
|
#define GET_FRAGCOORD() vec4(gl_FragCoord.xy*uf_fragCoordScale.xy,gl_FragCoord.zw)
|
|
#else
|
|
#define ATTR_LAYOUT(__vkSet, __location) layout(location = __location)
|
|
#define UNIFORM_BUFFER_LAYOUT(__glLocation, __vkSet, __vkLocation) layout(binding = __glLocation, std140)
|
|
#define TEXTURE_LAYOUT(__glLocation, __vkSet, __vkLocation) layout(binding = __glLocation)
|
|
#define GET_FRAGCOORD() vec4(gl_FragCoord.xy*uf_fragCoordScale,gl_FragCoord.zw)
|
|
#endif
|
|
#ifdef VULKAN
|
|
layout(set = 1, binding = 1) uniform ufBlock
|
|
{
|
|
uniform ivec4 uf_remappedPS[3];
|
|
uniform vec4 uf_fragCoordScale;
|
|
};
|
|
#else
|
|
uniform ivec4 uf_remappedPS[3];
|
|
uniform vec2 uf_fragCoordScale;
|
|
#endif
|
|
TEXTURE_LAYOUT(0, 1, 0) uniform sampler2D textureUnitPS0;
|
|
layout(location = 0) in vec4 passParameterSem133;
|
|
layout(location = 0) out vec4 passPixelColor0;
|
|
// end of shader inputs/outputs
|
|
int clampFI32(int v)
|
|
{
|
|
if( v == 0x7FFFFFFF )
|
|
return floatBitsToInt(1.0);
|
|
else if( v == 0xFFFFFFFF )
|
|
return floatBitsToInt(0.0);
|
|
return floatBitsToInt(clamp(intBitsToFloat(v), 0.0, 1.0));
|
|
}
|
|
float mul_nonIEEE(float a, float b){ if( a == 0.0 || b == 0.0 ) return 0.0; return a*b; }
|
|
void main()
|
|
{
|
|
vec4 R0f = vec4(0.0);
|
|
vec4 R1f = vec4(0.0);
|
|
vec4 R2f = vec4(0.0);
|
|
vec4 R3f = vec4(0.0);
|
|
vec4 R4f = vec4(0.0);
|
|
vec4 R5f = vec4(0.0);
|
|
vec4 R123f = vec4(0.0);
|
|
vec4 R127f = vec4(0.0);
|
|
float backupReg0f, backupReg1f, backupReg2f, backupReg3f, backupReg4f;
|
|
vec4 PV0f = vec4(0.0), PV1f = vec4(0.0);
|
|
float PS0f = 0.0, PS1f = 0.0;
|
|
vec4 tempf = vec4(0.0);
|
|
float tempResultf;
|
|
int tempResulti;
|
|
ivec4 ARi = ivec4(0);
|
|
bool predResult = true;
|
|
vec3 cubeMapSTM;
|
|
int cubeMapFaceId;
|
|
R0f = passParameterSem133;
|
|
R4f.xyzw = (texture(textureUnitPS0, R0f.xy).xyzw);
|
|
// 0
|
|
R5f.x = intBitsToFloat(uf_remappedPS[0].x) + -(intBitsToFloat(uf_remappedPS[1].x));
|
|
R5f.w = intBitsToFloat(uf_remappedPS[0].y) + -(intBitsToFloat(uf_remappedPS[1].y));
|
|
// 1
|
|
R1f.x = R0f.x + (intBitsToFloat(0x3c333333)/resXScale);
|
|
R1f.y = R0f.y;
|
|
R5f.z = intBitsToFloat(uf_remappedPS[0].z) + -(intBitsToFloat(uf_remappedPS[1].z));
|
|
R2f.w = R0f.y;
|
|
R2f.x = R0f.x + (intBitsToFloat(0x3b99999a)/resXScale);
|
|
PS1f = R2f.x;
|
|
// 2
|
|
backupReg0f = R0f.x;
|
|
R3f.x = R0f.x + -(intBitsToFloat(0x3c333333)/resXScale);
|
|
R3f.y = R0f.y;
|
|
R0f.x = backupReg0f + -(intBitsToFloat(0x3b99999a)/resXScale);
|
|
PS0f = R0f.x;
|
|
R1f.xyzw = (texture(textureUnitPS0, R1f.xy).xyzw);
|
|
R2f.xyzw = (texture(textureUnitPS0, R2f.xw).xyzw);
|
|
R3f.xyzw = (texture(textureUnitPS0, R3f.xy).xyzw);
|
|
R0f.xyzw = (texture(textureUnitPS0, R0f.xy).xyzw);
|
|
// 0
|
|
PV0f.x = R1f.z * intBitsToFloat(0x3ee66666);
|
|
PV0f.y = R1f.y * intBitsToFloat(0x3ee66666);
|
|
PV0f.z = R1f.x * intBitsToFloat(0x3ee66666);
|
|
PV0f.w = R1f.w * intBitsToFloat(0x3ee66666);
|
|
// 1
|
|
R123f.x = (R4f.x * 1.0 + PV0f.z);
|
|
PV1f.x = R123f.x;
|
|
R123f.y = (R4f.y * 1.0 + PV0f.y);
|
|
PV1f.y = R123f.y;
|
|
R123f.z = (R4f.w * 1.0 + PV0f.w);
|
|
PV1f.z = R123f.z;
|
|
R123f.w = (R4f.z * 1.0 + PV0f.x);
|
|
PV1f.w = R123f.w;
|
|
// 2
|
|
R123f.x = (R2f.y * intBitsToFloat(0x3f4ccccd) + PV1f.y);
|
|
PV0f.x = R123f.x;
|
|
R123f.y = (R2f.x * intBitsToFloat(0x3f4ccccd) + PV1f.x);
|
|
PV0f.y = R123f.y;
|
|
R123f.z = (R2f.w * intBitsToFloat(0x3f4ccccd) + PV1f.z);
|
|
PV0f.z = R123f.z;
|
|
R123f.w = (R2f.z * intBitsToFloat(0x3f4ccccd) + PV1f.w);
|
|
PV0f.w = R123f.w;
|
|
// 3
|
|
R123f.x = (R3f.x * intBitsToFloat(0x3ee66666) + PV0f.y);
|
|
PV1f.x = R123f.x;
|
|
R123f.y = (R3f.w * intBitsToFloat(0x3ee66666) + PV0f.z);
|
|
PV1f.y = R123f.y;
|
|
R123f.z = (R3f.z * intBitsToFloat(0x3ee66666) + PV0f.w);
|
|
PV1f.z = R123f.z;
|
|
R123f.w = (R3f.y * intBitsToFloat(0x3ee66666) + PV0f.x);
|
|
PV1f.w = R123f.w;
|
|
// 4
|
|
R123f.x = (R0f.x * intBitsToFloat(0x3f4ccccd) + PV1f.x);
|
|
PV0f.x = R123f.x;
|
|
R123f.y = (R0f.w * intBitsToFloat(0x3f4ccccd) + PV1f.y);
|
|
PV0f.y = R123f.y;
|
|
R123f.z = (R0f.z * intBitsToFloat(0x3f4ccccd) + PV1f.z);
|
|
PV0f.z = R123f.z;
|
|
R123f.w = (R0f.y * intBitsToFloat(0x3f4ccccd) + PV1f.w);
|
|
PV0f.w = R123f.w;
|
|
// 5
|
|
R127f.x = PV0f.x * intBitsToFloat(0x3e924925);
|
|
R127f.y = PV0f.y * intBitsToFloat(0x3e924925);
|
|
PV1f.y = R127f.y;
|
|
R127f.z = PV0f.z * intBitsToFloat(0x3e924925);
|
|
R127f.w = PV0f.w * intBitsToFloat(0x3e924925);
|
|
// 6
|
|
R123f.x = (mul_nonIEEE(R5f.w,PV1f.y) + intBitsToFloat(uf_remappedPS[1].y));
|
|
PV0f.x = R123f.x;
|
|
R123f.y = (mul_nonIEEE(R5f.x,PV1f.y) + intBitsToFloat(uf_remappedPS[1].x));
|
|
PV0f.y = R123f.y;
|
|
R123f.w = (mul_nonIEEE(R5f.z,PV1f.y) + intBitsToFloat(uf_remappedPS[1].z));
|
|
PV0f.w = R123f.w;
|
|
// 7
|
|
PV1f.x = mul_nonIEEE(R127f.z, PV0f.w);
|
|
PV1f.y = mul_nonIEEE(R127f.w, PV0f.x);
|
|
PV1f.z = mul_nonIEEE(R127f.x, PV0f.y);
|
|
R5f.w = mul_nonIEEE(R127f.y, intBitsToFloat(uf_remappedPS[2].w));
|
|
// 8
|
|
R5f.x = mul_nonIEEE(PV1f.z, intBitsToFloat(uf_remappedPS[2].x));
|
|
R5f.y = mul_nonIEEE(PV1f.y, intBitsToFloat(uf_remappedPS[2].y));
|
|
R5f.z = mul_nonIEEE(PV1f.x, intBitsToFloat(uf_remappedPS[2].z));
|
|
// export
|
|
passPixelColor0 = vec4(R5f.x, R5f.y, R5f.z, R5f.w);
|
|
}
|