From 3f87a42f4f01664236b43ae97b5ce96fd52c5665 Mon Sep 17 00:00:00 2001 From: Crementif <26669564+Crementif@users.noreply.github.com> Date: Sun, 24 May 2020 18:09:28 +0200 Subject: [PATCH] [Bayonetta 2] Fix aspect ratio patch to not break Vulkan Fixes https://github.com/slashiee/cemu_graphic_packs/issues/437 The aspect ratio mod apparently relied more on a shotgun approach, which caused major issues with Vulkan. Using @getdls addresses, I found out that 2 of the addresses that were being patched to jump to the code cave were unrelated to the aspect ratio, so I had those removed. Those random jumps probably didn't cause issues because they would jump to code that would just utilize floating point registers that were in-use and then put the result in the wrong register too. While the picked registers would work with the 3D rendering aspect ratio instruction, it wouldn't fail for the other registers since they weren't specific to those. Anyway, the proper fix was to just make a second code cave for the aspect ratio, which outputted the culling ratio in the proper register. And also make sure it didn't use in-use registers. I also reverted some of the shader code changes since they are inconsistent with how we've done them for all the other packs. And made them more compatible with the ultrawide resolutions. --- .../43a2239f07af804e_0000000000000079_ps.txt | 13 +++--- .../75387173950c1793_0000000000000079_ps.txt | 15 ++++--- .../78a2659662685d55_0000000000000079_ps.txt | 43 +++++++++++-------- .../8a0efcdc3f556942_0000000000000079_ps.txt | 16 ++++--- .../Bayonetta2_Resolution/patch_aspect.asm | 23 ---------- .../patch_aspectRatio.asm | 28 ++++++++++++ 6 files changed, 79 insertions(+), 59 deletions(-) delete mode 100644 Resolutions/Bayonetta2_Resolution/patch_aspect.asm create mode 100644 Resolutions/Bayonetta2_Resolution/patch_aspectRatio.asm diff --git a/Resolutions/Bayonetta2_Resolution/43a2239f07af804e_0000000000000079_ps.txt b/Resolutions/Bayonetta2_Resolution/43a2239f07af804e_0000000000000079_ps.txt index 7cb90e30..0dcebd2f 100644 --- a/Resolutions/Bayonetta2_Resolution/43a2239f07af804e_0000000000000079_ps.txt +++ b/Resolutions/Bayonetta2_Resolution/43a2239f07af804e_0000000000000079_ps.txt @@ -2,6 +2,10 @@ #extension GL_ARB_texture_gather : enable #extension GL_ARB_separate_shader_objects : enable // shader 43a2239f07af804e +// Used for: Horizontal Blur +float resXScale = float($width)/float($gameWidth); + + // start of shader inputs/outputs, predetermined by Cemu. Do not touch #ifdef VULKAN #define ATTR_LAYOUT(__vkSet, __location) layout(set = __vkSet, location = __location) @@ -27,7 +31,6 @@ uniform vec2 uf_fragCoordScale; TEXTURE_LAYOUT(0, 1, 0) uniform sampler2D textureUnitPS0; layout(location = 0) in vec4 passParameterSem133; layout(location = 0) out vec4 passPixelColor0; -const float resScale = ($gameWidth / $width); // end of shader inputs/outputs int clampFI32(int v) { @@ -64,17 +67,17 @@ R4f.xyzw = (texture(textureUnitPS0, R0f.xy).xyzw); R5f.x = intBitsToFloat(uf_remappedPS[0].x) + -(intBitsToFloat(uf_remappedPS[1].x)); R5f.w = intBitsToFloat(uf_remappedPS[0].y) + -(intBitsToFloat(uf_remappedPS[1].y)); // 1 -R1f.x = R0f.x + intBitsToFloat(0x3c333333)*resScale; +R1f.x = R0f.x + (intBitsToFloat(0x3c333333)/resXScale); R1f.y = R0f.y; R5f.z = intBitsToFloat(uf_remappedPS[0].z) + -(intBitsToFloat(uf_remappedPS[1].z)); R2f.w = R0f.y; -R2f.x = R0f.x + intBitsToFloat(0x3b99999a)*resScale; +R2f.x = R0f.x + (intBitsToFloat(0x3b99999a)/resXScale); PS1f = R2f.x; // 2 backupReg0f = R0f.x; -R3f.x = R0f.x + -(intBitsToFloat(0x3c333333)*resScale); +R3f.x = R0f.x + -(intBitsToFloat(0x3c333333)/resXScale); R3f.y = R0f.y; -R0f.x = backupReg0f + -(intBitsToFloat(0x3b99999a)*resScale); +R0f.x = backupReg0f + -(intBitsToFloat(0x3b99999a)/resXScale); PS0f = R0f.x; R1f.xyzw = (texture(textureUnitPS0, R1f.xy).xyzw); R2f.xyzw = (texture(textureUnitPS0, R2f.xw).xyzw); diff --git a/Resolutions/Bayonetta2_Resolution/75387173950c1793_0000000000000079_ps.txt b/Resolutions/Bayonetta2_Resolution/75387173950c1793_0000000000000079_ps.txt index f94d8634..142ace7d 100644 --- a/Resolutions/Bayonetta2_Resolution/75387173950c1793_0000000000000079_ps.txt +++ b/Resolutions/Bayonetta2_Resolution/75387173950c1793_0000000000000079_ps.txt @@ -1,7 +1,11 @@ #version 430 #extension GL_ARB_texture_gather : enable #extension GL_ARB_separate_shader_objects : enable -// shader 75387173950c1793//bloom vert +// shader 75387173950c1793 +// Used for: Vertical Blur +float resYScale = float($height)/float($gameHeight); + + // start of shader inputs/outputs, predetermined by Cemu. Do not touch #ifdef VULKAN #define ATTR_LAYOUT(__vkSet, __location) layout(set = __vkSet, location = __location) @@ -28,7 +32,6 @@ TEXTURE_LAYOUT(0, 1, 0) uniform sampler2D textureUnitPS0; layout(location = 0) in vec4 passParameterSem133; layout(location = 0) out vec4 passPixelColor0; // end of shader inputs/outputs -const float resScale = float($gameWidth / $width); int clampFI32(int v) { if( v == 0x7FFFFFFF ) @@ -60,14 +63,14 @@ R0f = passParameterSem133; R4f.xyzw = (texture(textureUnitPS0, R0f.xy).xyzw); // 0 R1f.x = R0f.x; -R1f.y = R0f.y + intBitsToFloat(0x3ca2e8ba)*resScale; +R1f.y = R0f.y + (intBitsToFloat(0x3ca2e8ba)/resYScale); R2f.z = R0f.x; -R2f.y = R0f.y + intBitsToFloat(0x3c0ba2e9)*resScale; +R2f.y = R0f.y + (intBitsToFloat(0x3c0ba2e9)/resYScale); PS0f = R2f.y; // 1 R3f.x = R0f.x; -R3f.y = R0f.y + -(intBitsToFloat(0x3ca2e8ba))*resScale; -R0f.z = R0f.y + -(intBitsToFloat(0x3c0ba2e9))*resScale; +R3f.y = R0f.y + -(intBitsToFloat(0x3ca2e8ba)/resYScale); +R0f.z = R0f.y + -(intBitsToFloat(0x3c0ba2e9)/resYScale); R1f.xyzw = (texture(textureUnitPS0, R1f.xy).xyzw); R2f.xyzw = (texture(textureUnitPS0, R2f.zy).xyzw); R3f.xyzw = (texture(textureUnitPS0, R3f.xy).xyzw); diff --git a/Resolutions/Bayonetta2_Resolution/78a2659662685d55_0000000000000079_ps.txt b/Resolutions/Bayonetta2_Resolution/78a2659662685d55_0000000000000079_ps.txt index e33efd03..09cccd2a 100644 --- a/Resolutions/Bayonetta2_Resolution/78a2659662685d55_0000000000000079_ps.txt +++ b/Resolutions/Bayonetta2_Resolution/78a2659662685d55_0000000000000079_ps.txt @@ -1,7 +1,12 @@ #version 430 #extension GL_ARB_texture_gather : enable #extension GL_ARB_separate_shader_objects : enable -// shader 78a2659662685d55 //menu AA +// shader 78a2659662685d55 +// Used for: Menu Anti-Aliasing and Specular Highlight? +float resXScale = float($width)/float($gameWidth); +float resYScale = float($height)/float($gameHeight); + + // start of shader inputs/outputs, predetermined by Cemu. Do not touch #ifdef VULKAN #define ATTR_LAYOUT(__vkSet, __location) layout(set = __vkSet, location = __location) @@ -25,7 +30,7 @@ uniform vec2 uf_fragCoordScale; TEXTURE_LAYOUT(0, 1, 0) uniform sampler2D textureUnitPS0; layout(location = 0) in vec4 passParameterSem133; layout(location = 0) out vec4 passPixelColor0; -const float resScale = ($gameWidth / $width); + // end of shader inputs/outputs int clampFI32(int v) { @@ -91,14 +96,14 @@ int cubeMapFaceId; R0i = floatBitsToInt(passParameterSem133); if( activeMaskStackC[1] == true ) { // 0 -R1i.xyz = floatBitsToInt(vec3(intBitsToFloat(R0i.x),intBitsToFloat(R0i.y),intBitsToFloat(R0i.x)) + vec3(0.0,0.0,intBitsToFloat(0xba99999a)*resScale)); +R1i.xyz = floatBitsToInt(vec3(intBitsToFloat(R0i.x),intBitsToFloat(R0i.y),intBitsToFloat(R0i.x)) + vec3(0.0,0.0,(intBitsToFloat(0xba99999a)/resXScale))); R1i.w = floatBitsToInt(intBitsToFloat(R0i.y) + 0.0); -R2i.x = floatBitsToInt(intBitsToFloat(R0i.x) + intBitsToFloat(0x3a99999a)*resScale); +R2i.x = floatBitsToInt(intBitsToFloat(R0i.x) + (intBitsToFloat(0x3a99999a)/resXScale); PS0i = R2i.x; // 1 R3i.x = floatBitsToInt(intBitsToFloat(R0i.x) + 0.0); R2i.y = floatBitsToInt(intBitsToFloat(R0i.y) + 0.0); -R3i.zwy = floatBitsToInt(vec3(intBitsToFloat(R0i.y),intBitsToFloat(R0i.x),intBitsToFloat(R0i.y)) + vec3(intBitsToFloat(0xbb088889)*resScale,0.0,intBitsToFloat(0x3b088889)*resScale)); +R3i.zwy = floatBitsToInt(vec3(intBitsToFloat(R0i.y),intBitsToFloat(R0i.x),intBitsToFloat(R0i.y)) + vec3((intBitsToFloat(0xbb088889)/resYScale),0.0,(intBitsToFloat(0x3b088889)/resXScale))); PS1i = R3i.y; } if( activeMaskStackC[1] == true ) { @@ -118,7 +123,7 @@ R127i.z = floatBitsToInt(intBitsToFloat(R8i.y) + intBitsToFloat(R9i.y)); R127i.z = floatBitsToInt(intBitsToFloat(R127i.z) * 2.0); R127i.w = floatBitsToInt(intBitsToFloat(R8i.x) + intBitsToFloat(R9i.x)); R127i.w = floatBitsToInt(intBitsToFloat(R127i.w) * 2.0); -R3i.x = floatBitsToInt(intBitsToFloat(R0i.x) + intBitsToFloat(0xba4ccccd)*resScale); +R3i.x = floatBitsToInt(intBitsToFloat(R0i.x) + (intBitsToFloat(0xba4ccccd)/resXScale)); PS0i = R3i.x; // 1 R5i.x = floatBitsToInt(intBitsToFloat(R10i.z) + intBitsToFloat(R11i.z)); @@ -132,15 +137,15 @@ R4i.w = floatBitsToInt(intBitsToFloat(R4i.w) * 2.0); R3i.y = floatBitsToInt(intBitsToFloat(R0i.y) + 0.0); PS1i = R3i.y; // 2 -R2i.x = floatBitsToInt(intBitsToFloat(R0i.x) + intBitsToFloat(0x3a4ccccd)*resScale); +R2i.x = floatBitsToInt(intBitsToFloat(R0i.x) + (intBitsToFloat(0x3a4ccccd)/resXScale)); R2i.y = floatBitsToInt(intBitsToFloat(R0i.y) + 0.0); R3i.z = floatBitsToInt(intBitsToFloat(R0i.x) + 0.0); -R3i.w = floatBitsToInt(intBitsToFloat(R0i.y) + intBitsToFloat(0xbab60b61)*resScale); +R3i.w = floatBitsToInt(intBitsToFloat(R0i.y) + (intBitsToFloat(0xbab60b61)/resYScale)); R1i.x = floatBitsToInt(intBitsToFloat(R0i.x) + 0.0); PS0i = R1i.x; // 3 R4i.x = floatBitsToInt((intBitsToFloat(R7i.y) * 2.0 + intBitsToFloat(R127i.z))); -R1i.y = floatBitsToInt(intBitsToFloat(R0i.y) + intBitsToFloat(0x3ab60b61)*resScale); +R1i.y = floatBitsToInt(intBitsToFloat(R0i.y) + (intBitsToFloat(0x3ab60b61)/resYScale)); R1i.z = floatBitsToInt((intBitsToFloat(R7i.x) * 2.0 + intBitsToFloat(R127i.w))); R1i.w = floatBitsToInt((intBitsToFloat(R7i.z) * 2.0 + intBitsToFloat(R127i.y))); R2i.w = floatBitsToInt((intBitsToFloat(R7i.w) * 2.0 + intBitsToFloat(R127i.x))); @@ -224,7 +229,7 @@ R124i.y = floatBitsToInt(intBitsToFloat(R125i.y) * intBitsToFloat(0x3e2aaaab)); R125i.z = floatBitsToInt(intBitsToFloat(R126i.x) * intBitsToFloat(0x3e2aaaab)); R123i.w = floatBitsToInt((intBitsToFloat(R7i.w) * 2.0 + intBitsToFloat(R4i.y))); PV0i.w = R123i.w; -R126i.w = floatBitsToInt((intBitsToFloat(backupReg1i) * intBitsToFloat(0x40400000) + -(intBitsToFloat(0x3dcccccd)*resScale))); +R126i.w = floatBitsToInt((intBitsToFloat(backupReg1i) * intBitsToFloat(0x40400000) + -(intBitsToFloat(0x3dcccccd)/resYScale))); PS0i = R126i.w; // 9 backupReg0i = R125i.x; @@ -232,7 +237,7 @@ backupReg1i = R127i.w; R125i.x = floatBitsToInt(-(intBitsToFloat(R7i.y)) + intBitsToFloat(R1i.y)); R125i.y = floatBitsToInt(-(intBitsToFloat(R7i.z)) + intBitsToFloat(R127i.z)); R124i.z = floatBitsToInt(intBitsToFloat(PV0i.w) * intBitsToFloat(0x3e2aaaab)); -R127i.w = floatBitsToInt((intBitsToFloat(backupReg0i) * intBitsToFloat(0x40400000) + -(intBitsToFloat(0x3dcccccd)*resScale))); +R127i.w = floatBitsToInt((intBitsToFloat(backupReg0i) * intBitsToFloat(0x40400000) + -(intBitsToFloat(0x3dcccccd)/resYScale))); R126i.z = floatBitsToInt(-(intBitsToFloat(R7i.w)) + intBitsToFloat(backupReg1i)); PS1i = R126i.z; // 10 @@ -255,8 +260,8 @@ PS1i = floatBitsToInt(1.0 / intBitsToFloat(PV0i.x)); R124i.x = floatBitsToInt(intBitsToFloat(R126i.w) * intBitsToFloat(PS1i)); R124i.x = clampFI32(R124i.x); R1i.y = floatBitsToInt(intBitsToFloat(R0i.y) + 0.0); -R4i.z = floatBitsToInt(intBitsToFloat(R0i.x) + intBitsToFloat(0x3b333333)*resScale); -R1i.w = floatBitsToInt(intBitsToFloat(R0i.x) + intBitsToFloat(0x3b8ccccd)*resScale); +R4i.z = floatBitsToInt(intBitsToFloat(R0i.x) + (intBitsToFloat(0x3b333333)/resXScale)); +R1i.w = floatBitsToInt(intBitsToFloat(R0i.x) + (intBitsToFloat(0x3b8ccccd)/resYScale)); PS0i = floatBitsToInt(1.0 / intBitsToFloat(PV1i.x)); // 13 backupReg0i = R0i.x; @@ -312,15 +317,15 @@ if( activeMaskStackC[1] == true ) { // 0 R5i.x = floatBitsToInt(intBitsToFloat(R0i.x) + 0.0); R6i.y = floatBitsToInt(intBitsToFloat(R0i.y) + 0.0); -R5i.z = floatBitsToInt(intBitsToFloat(R0i.y) + intBitsToFloat(0x3b9f49f5)*resScale); +R5i.z = floatBitsToInt(intBitsToFloat(R0i.y) + (intBitsToFloat(0x3b9f49f5)/resXScale); PV0i.w = floatBitsToInt(intBitsToFloat(R9i.w) + intBitsToFloat(R17i.w)); R3i.x = floatBitsToInt(intBitsToFloat(R0i.x) + 0.0); PS0i = R3i.x; // 1 R1i.x = floatBitsToInt(intBitsToFloat(R0i.x) + 0.0); -R3i.y = floatBitsToInt(intBitsToFloat(R0i.y) + intBitsToFloat(0x3bfa4fa5)*resScale); +R3i.y = floatBitsToInt(intBitsToFloat(R0i.y) + (intBitsToFloat(0x3bfa4fa5)/resYScale)); R2i.z = floatBitsToInt(intBitsToFloat(R18i.w) + intBitsToFloat(PV0i.w)); -R1i.w = floatBitsToInt(intBitsToFloat(R0i.y) + intBitsToFloat(0x3c2aaaab)*resScale); +R1i.w = floatBitsToInt(intBitsToFloat(R0i.y) + (intBitsToFloat(0x3c2aaaab)/resXScale)); R4i.x = floatBitsToInt(intBitsToFloat(R0i.x) + 0.0); PS1i = R4i.x; } @@ -337,14 +342,14 @@ backupReg1i = R0i.x; backupReg0i = R0i.y; PV0i.x = floatBitsToInt(intBitsToFloat(R11i.w) + intBitsToFloat(R6i.w)); PV0i.y = floatBitsToInt(intBitsToFloat(R19i.w) + intBitsToFloat(R2i.z)); -R4i.z = floatBitsToInt(intBitsToFloat(backupReg0i) + intBitsToFloat(0xbb9f49f5)*resScale); +R4i.z = floatBitsToInt(intBitsToFloat(backupReg0i) + (intBitsToFloat(0xbb9f49f5)/resXScale)); R1i.w = floatBitsToInt(intBitsToFloat(backupReg1i) + 0.0); -R1i.y = floatBitsToInt(intBitsToFloat(backupReg0i) + intBitsToFloat(0xbbfa4fa5)*resScale); +R1i.y = floatBitsToInt(intBitsToFloat(backupReg0i) + (intBitsToFloat(0xbbfa4fa5)/resYScale)); PS0i = R1i.y; // 1 backupReg0i = R3i.w; R1i.x = floatBitsToInt(intBitsToFloat(R8i.w) + intBitsToFloat(PV0i.y)); -R2i.y = floatBitsToInt(intBitsToFloat(R0i.y) + intBitsToFloat(0xbc2aaaab)*resScale); +R2i.y = floatBitsToInt(intBitsToFloat(R0i.y) + (intBitsToFloat(0xbc2aaaab)/resYScale)); R2i.z = floatBitsToInt(intBitsToFloat(R0i.x) + 0.0); R3i.w = floatBitsToInt(intBitsToFloat(backupReg0i) + intBitsToFloat(PV0i.x)); } diff --git a/Resolutions/Bayonetta2_Resolution/8a0efcdc3f556942_0000000000000079_ps.txt b/Resolutions/Bayonetta2_Resolution/8a0efcdc3f556942_0000000000000079_ps.txt index 912c5a81..33d7fc7e 100644 --- a/Resolutions/Bayonetta2_Resolution/8a0efcdc3f556942_0000000000000079_ps.txt +++ b/Resolutions/Bayonetta2_Resolution/8a0efcdc3f556942_0000000000000079_ps.txt @@ -1,7 +1,12 @@ #version 430 #extension GL_ARB_texture_gather : enable #extension GL_ARB_separate_shader_objects : enable -// shader 8a0efcdc3f556942 //frambuffer +// shader 8a0efcdc3f556942 +// Used for: Pyramid Blur +float resXScale = float($width)/float($gameWidth); +float resYScale = float($height)/float($gameHeight); + + // start of shader inputs/outputs, predetermined by Cemu. Do not touch #ifdef VULKAN #define ATTR_LAYOUT(__vkSet, __location) layout(set = __vkSet, location = __location) @@ -28,7 +33,6 @@ TEXTURE_LAYOUT(0, 1, 0) uniform sampler2D textureUnitPS0; layout(location = 0) in vec4 passParameterSem133; layout(location = 0) out vec4 passPixelColor0; // end of shader inputs/outputs -const float resScale = ($gameWidth / $width); int clampFI32(int v) { if( v == 0x7FFFFFFF ) @@ -63,10 +67,10 @@ backupReg0f = R0f.x; backupReg1f = R0f.y; backupReg0f = R0f.x; backupReg1f = R0f.y; -R0f.x = (intBitsToFloat(uf_remappedPS[0].z)*resScale * 2.0 + backupReg0f); -R0f.y = (intBitsToFloat(uf_remappedPS[0].w)*resScale * 2.0 + backupReg1f); -R0f.z = (-(intBitsToFloat(uf_remappedPS[0].z)*resScale) * 2.0 + backupReg0f); -R0f.w = (-(intBitsToFloat(uf_remappedPS[0].w)*resScale) * 2.0 + backupReg1f); +R0f.x = (intBitsToFloat(uf_remappedPS[0].z)/resXScale * 2.0 + backupReg0f); +R0f.y = (intBitsToFloat(uf_remappedPS[0].w)/resYScale * 2.0 + backupReg1f); +R0f.z = (-(intBitsToFloat(uf_remappedPS[0].z)/resXScale) * 2.0 + backupReg0f); +R0f.w = (-(intBitsToFloat(uf_remappedPS[0].w)/resYScale) * 2.0 + backupReg1f); R1f.xyzw = (texture(textureUnitPS0, R0f.xy).xyzw); R0f.xyzw = (texture(textureUnitPS0, R0f.zw).xyzw); // 0 diff --git a/Resolutions/Bayonetta2_Resolution/patch_aspect.asm b/Resolutions/Bayonetta2_Resolution/patch_aspect.asm deleted file mode 100644 index 315ad8ec..00000000 --- a/Resolutions/Bayonetta2_Resolution/patch_aspect.asm +++ /dev/null @@ -1,23 +0,0 @@ -[Bayo2USv0] #v5 asm -moduleMatches = 0xAF5D1A85 - -.origin = codecave all -_widthScaleRatio: -.float $width -_heightScaleRatio: -.float $height - -_Cave: -lis r7, _widthScaleRatio@ha -lfs f13, _widthScaleRatio@l(r7) -lis r7, _heightScaleRatio@ha -lfs f31, _heightScaleRatio@l(r7) - -fdivs f2, f13, f31 -blr - - -0x032F2E6C = bla _Cave -0x032F2044 = bla _Cave -0x032F207C = bla _Cave -0x032F2084 = bla _Cave diff --git a/Resolutions/Bayonetta2_Resolution/patch_aspectRatio.asm b/Resolutions/Bayonetta2_Resolution/patch_aspectRatio.asm new file mode 100644 index 00000000..9fe54d93 --- /dev/null +++ b/Resolutions/Bayonetta2_Resolution/patch_aspectRatio.asm @@ -0,0 +1,28 @@ +[Bayo2_AspectRatio_V0] +moduleMatches = 0xAF5D1A85 + +.origin = codecave +_widthScaleRatio: +.float $width +_heightScaleRatio: +.float $height + +_calculateARForRendering: +lis r7, _widthScaleRatio@ha +lfs f13, _widthScaleRatio@l(r7) +lis r7, _heightScaleRatio@ha +lfs f31, _heightScaleRatio@l(r7) +fdivs f2, f13, f31 +blr + +_calculateARForCulling: +lis r7, _widthScaleRatio@ha +lfs f13, _widthScaleRatio@l(r7) +lis r7, _heightScaleRatio@ha +lfs f1, _heightScaleRatio@l(r7) +fdivs f10, f13, f1 +blr + + +0x032F2E6C = bla _calculateARForRendering +0x032F2044 = bla _calculateARForCulling