[BotW] Switch to Xalphenos' FPS++ V2.1 (fix arrow time)

Switch over to Xalphenos version since his version now has a dynamically sized "oldest frame" (there's not really a good name for what it does) thing which was one of the main things in my version.

Once FPS++ progress is halted, I'll clean up the comments again to something similar what I had in my version, which I feel is more understandable for people that want to see what it does and maybe improve it.

If testing is correct, the previous version also had stability drops or something in comparison with this version. If anything, this update only pushes the arrow time stamina drain fix.
This commit is contained in:
Crementif 2018-12-23 02:07:08 +01:00
parent 7befd59159
commit 8699939dfc
2 changed files with 194 additions and 435 deletions

View File

@ -1,119 +1,112 @@
[BotwFPSV208]
moduleMatches = 0x6267BFD0
# rodata constants
#rodata constants
_float.5 = 0x100005E0
_float1 = 0x10000670
_float30 = 0x101E7964
_convSub = 0x10000BB0
_float32 = 0x1021FCEC
_arrowsDiv = 0x1001CCAC
_arrowsDer = 0x1001CB18
# TestCave
codeCaveSize = 600 ; Theoretically, allow for 74 or so FPS to be averaged.
#TestCave
codeCaveSize = 0x12C
_fpsCalc = 0x00000018
0x00000000 = .float 2
_lowLimit = 0x00000000
_fpsCalc = 0x00000010
0x00000000 = .float 2 ; Don't edit me!
_lowlimit = 0x00000000
0x00000004 = .float 62156250
_busSpeed = 0x00000004
_averageFPS = 0x00000008
_speedDiv = 0x0000000C
0x00000010 = .float (30*30)
_arrowDivisor = 0x00000010
0x00000014 = .float (0.5/30)
_arrowMultiplier = 0x00000014
0x00000120 = .int (4*$amount_of_FPS_averaged+4)
_averageMaxByteLength = 0x00000120
0x00000124 = .float ($amount_of_FPS_averaged)
_averageFPSBufferDivider = 0x00000124
0x00000128 = .float (0)
_averageFPSBufferTotal = 0x00000128
0x0000012C = .int (4)
_averageFPSIndex = 0x0000012C
_averageFPSBuffer = 0x00000130
# - Calculate FPS -
0x00000018 = stw r0, 0x78(r30) # Original instruction that was overwriten by the instruction that jumps to this code cave. This specific place was chosen because r12 is storing the ticks between the previous and current frame, which this code needs to calculate the game's FPS.
0x0000001C = xoris r12, r12, 0x8000 # Flips the sign bit of the game ticks (which is stored as an int in r12), because the code wants to store this as the lower half of a double, and the lower half can't be prepended by a sign bit.
0x00000020 = stw r12, 0x12C(r30) # Stores the sign-bit-flipped ticks to a temporary part of the game's memory, making up the lower 4 bytes of the 8 bytes total from the double that's being created.
0x00000024 = lis r12, 0x4330 # Loads a value of 0x4330 to r12 (Why this number?)
0x00000028 = stw r12, 0x128(r30) # Stores the loaded value from r12 to the upper 4 bytes of the 8 bytes total. Now it makes a full floating point double.
0x0000002C = lfd f10, 0x128(r30) # Loads that floating point double into f10, so that the code can perform floating point operations.
0x00000030 = lis r12, _convSub@ha # Loads upper half of an address to r12 (Side note, PowerPC code always needs to load into or store from registers using two instructions if it needs to interact with an absolute address due to how PowerPC is designed).
0x00000034 = lfd f12, _convSub@l(r12) # Uses the upper half of the address from r12 to load the value that's stored at convSub to f12.
0x00000038 = fsub f10, f10, f12 # Subtracts the convSub (f12) from the game ticks double and store the result in f10.
0x0000003C = frsp f10, f10 # Rounds f10 to a floating point single.
0x00000040 = lis r12, _float1@ha # Loads upper half of an address to r12.
0x00000044 = lfs f12, _float1@l(r12) # Uses the upper half of the address from r12 to load a 1 to f12.
0x00000048 = fdivs f10, f12, f10 # Divides a 1 with the converted game ticks that were stored in f10.
0x0000004C = lis r12, _busSpeed@ha # Loads upper half of an address to r12.
0x00000050 = lfs f12, _busSpeed@l(r12) # Uses the upper half of the address from r12 to load the value that is the Wii U's bus speed to f12.
0x00000054 = fmuls f10, f12, f10 # Multiplies the game ticks (f10) by the bus speed (f12).
# - Arrow Fix -
0x00000058 = lis r12, _arrowDivisor@ha # Loads the upper half of an address to r12.
0x0000005C = lfs f12, _arrowDivisor@l(r12) # Uses the upper half of the address from r12 to load the arrow divisor (30*30) to f12.
0x00000060 = fdivs f12, f12, f10 # Divides the arrow divisor (f12) with the game's current FPS (f10) to f12.
0x00000064 = lis r12, _arrowsDiv@ha # Loads the upper half of an address to r12.
0x00000068 = stfs f12, _arrowsDiv@l(r12) # Uses the upper half of the address from r12 to store a "dynamic" constant to correct the arrow drop.
0x0000006C = lis r12, _arrowMultiplier@ha # Loads the upper half of an address to r12.
0x00000070 = lfs f12, _arrowMultiplier@l(r12) # Uses the upper half of the address from r12 to load the arrow multiplier (0.5/30) to f12
0x00000074 = fmuls f12, f12, f10 # Multiplies the arrow multiplier (f12) with the game current FPS (f10) to f12
0x00000078 = lis r12, _arrowsDer@ha # Loads the upper half of an address to r12.
0x0000007C = stfs f12, _arrowsDer@l(r12) # Uses the upper half of the address from r12 to store another dynamic constant to correct the arrow drop.
# - FPS Averaging -
0x00000080 = lis r3, _averageFPSBuffer@ha # Load the pointer to the buffer to r3
0x00000084 = addi r3, r3, _averageFPSBuffer@l #
0x00000088 = lis r11, _averageFPSIndex@ha # Load the index which has the offset to the oldest FPS in the buffer to r12
0x0000008C = lwz r12, _averageFPSIndex@l(r11) #
0x00000090 = lfsx f12, r3, r12 # Load the oldest FPS using the pointer (r3) plus the offset (r12) to f12
0x00000094 = stfsx f10, r3, r12 # Store the newest FPS to the buffer using the pointer (r3) plus the offset (r12) to f12
0x00000098 = lis r3, _averageMaxByteLength@ha # Load the max byte length to r11
0x0000009C = lwz r3, _averageMaxByteLength@l(r3) #
0x000000A0 = addi r12, r12, 0x04 # Add 4 to the index (r12) which sets the address offset to the next address
0x000000A4 = cmp 0, 0, r12, r3 # Compare the index (r12) to the max byte length (r3)
0x000000A8 = bne .+0x08 # If the compared numbers are not equal, skip over the instruction that resets the offset to 0 if the max has been reached
0x000000AC = li r12, 4 #
0x000000B0 = stw r12, _averageFPSIndex@l(r11) #
0x000000B4 = lis r12, _averageFPSBufferTotal@ha # Load the total of the buffer to r12
0x000000B8 = lfs f7, _averageFPSBufferTotal@l(r12) #
0x000000BC = fsubs f7, f7, f12 # Subtract the oldest FPS (f12) from the total of the buffer (f7), but add the newest FPS (f10)
0x000000C0 = fadds f7, f7, f10 #
0x000000C4 = stfs f7, _averageFPSBufferTotal@l(r11) # Store the updated total of the buffer back
0x000000C8 = lis r12, _averageFPSBufferDivider@ha # Load the divider (= the amount of frames stored in the buffer)
0x000000CC = lfs f12, _averageFPSBufferDivider@l(r12) #
0x000000D0 = fdivs f10, f7, f12 # Divide the updated buffer total with the divider to store the averaged fps to f10
0x000000D4 = lis r12, _averageFPS@ha # Store averaged FPS
0x000000D8 = stfs f10, _averageFPS@l(r12) #
# - Low FPS Limiter -
0x000000DC = lis r12, _float30@ha # Loads the upper half of an address to r12.
0x000000E0 = lfs f12, _float30@l(r12) # Uses the upper half of the address from r12 to load a 30 into the register.
0x000000E4 = fdivs f7, f12, f10 # Divides 30 by the game's averaged FPS.
0x000000E8 = lis r12, _lowLimit@ha # Loads the upper half of an address to r12.
0x000000EC = lfs f12, _lowLimit@l(r12) # Uses the upper half of the address from r12 to load the limit that the game speed can be set as. This might prevent bugs that only happen at a low FPS. By default, the lowest speed it'll try setting the game as is 15 FPS.
0x000000F0 = lis r12, _speedDiv@ha # Loads the upper half of an address to r12.
0x000000F4 = fcmpu cr0, f7, f12 # Compares the game's speed (f7) with the limit.
0x000000F8 = bge .+0x0C # Skips the next two instructions if in the previous comparison f7 was greater or equal to f12.
0x000000FC = stfs f7, _speedDiv@l(r12) # Uses the upper half from the address from r12 to store the game's speed.
0x00000100 = blr # Return to place that called this code cave.
0x00000104 = stfs f12, _speedDiv@l(r12) # Uses the upper half from the address from r12 to store the low limit.
0x00000108 = blr # Return to place that called this code cave.
_fenceNeg1 = 0x0000010C # - Best Fence Skip -
0x0000010C = add r6, r12, r0 # Original instruction that was overwriten by the instruction that jumps to this code cave.
0x00000110 = cmpwi r6, 500 # Checks if r6 less than 500 (full sync on those first frames to prevent milk water).
0x00000114 = blt .+0x08 # Branches to return subtract instruction if r6 is less than 500.
0x00000118 = subi r6, r6, 1 # Subtract 1 from r6.
0x0000011C = blr # Return to place that called this code cave.
# HOW TO CHANGE AMOUNT OF FRAMES TO AVERAGE
0x00000128 = .float 32 #This float can be anything. To change the amount of frames averaged change this float...
_bufferSize = 0x00000128 #...and change the "0x84" on line 74 to this number times 4 plus 4 converted to hex.
#Example using 8 frames. 8 x 4 = 34. 34 + 4 = 38. In hex 0x26
#So line 74 would change from "cmpwi r12, 0x0C" to "cmpwi r12, 0x26"
# codeChanges
0x00000010 = stw r0, 0x78(r30) #orig instruction we are replacing ;ticks between frame are currently in r12.
0x00000014 = xoris r12, r12, 0x8000 #flip the sign bit of int ticks for floating point conversion
0x00000018 = stw r12, 0xD4(r30) #store sign flipped ticks in memory as lower half of double
0x0000001C = lis r12, 0x4330 #create upper half of ticks double
0x00000020 = stw r12, 0xD0(r30) #store it in upper half of memory
0x00000024 = lfd f10, 0xD0(r30) #Load full double ticks into f10
0x00000028 = lis r12, _convSub@ha #load number to subtract from tick double...
0x0000002C = lfd f12, _convSub@l(r12) #...to create tick float into f12
0x00000030 = fsub f10, f10, f12 #Do the subtraction
0x00000034 = frsp f10, f10 #round to single precision and tick float is in f10
0x00000038 = lis r12, _float1@ha #Load float of 1...
0x0000003C = lfs f12, _float1@l(r12) #...into f12
0x00000040 = fdivs f10, f12, f10 #divide 1 by ticks
0x00000044 = lis r12, _busSpeed@ha #load wii u bus speed...
0x00000048 = lfs f12, _busSpeed@l(r12) #...into f12
0x0000004C = fmuls f10, f12, f10 #multiply bus speed to have current fps in f10. (1/ticks)*bus speed
0x00000050 = mr r3, r30 #Make a copy of r30 so we can screw around with it
0x00000054 = addi r3, r3, 0xE0 #Add offset to available memory space
0x00000058 = stw r3, 0xD8(r30) #Save our virgin memory address so we can retrieve it later
0x0000005C = lwz r12, 0xDC(r30) #load counter into r12
0x00000060 = add r3, r3, r12 #add our counter to the offset for our memory location
0x00000064 = stfsu f10, 0x04(r3) #store current fps to memory location +4 and update efective address into r3
0x00000068 = lfs f7, 0xE0(r30) #Load fpsSum into f7
0x0000006C = fadds f10, f10, f7 #Add currentFPS to fpsSum
0x00000070 = addi r12, r12, 0x04 #Incriment counter by 4
0x00000074 = cmpwi r12, 0x84 #Compare counter with 0x80(32 address from base memory location)
0x00000078 = bne .+0x0C #If we write our last current fps then
0x0000007C = lis r12, 0 #Zero our counter
0x00000080 = lwz r3, 0xD8(r30) #and reload our virgin memory offset
0x00000084 = stw r12, 0xDC(r30) #Store counter r12 to memory location
0x00000088 = lfs f7, 0x04(r3) #load oldest fps into f7
0x0000008C = fsubs f10, f10, f7 #Subtract oldest fps from fpsSum
0x00000090 = stfs f10, 0xE0(r30) #store fpsSum back into it's memory address.
0x00000094 = lis r12, _bufferSize@ha #load 32 as a float ...
0x00000098 = lfs f7, _bufferSize@l(r12) #...into f7
0x0000009C = fdivs f10, f10, f7 #divide fps sum by 32, the number of saved fps values, to get average fps over 32 frames
0x000000A0 = lis r12, _averageFPS@ha #Store averaged fps...
0x000000A4 = stfs f10, _averageFPS@l(r12) #... into _averageFPS variable.
0x000000A8 = lis r12, _float30@ha #Setting up speed diviser. 30/current fps = game speed. ex 30/60 = .5 game speed
0x000000AC = lfs f12, _float30@l(r12) #load float 30 into f12
0x000000B0 = fdivs f7, f12, f10 #do the division f10 is our current fps(now averaged)
0x000000B4 = lis r12, _lowlimit@ha #set up a low limit to not set game speed below. here: 10/30 = 3
0x000000B8 = lfs f12, _lowlimit@l(r12) #Set 3 as the low limit into f12
0x000000BC = lis r12, _speedDiv@ha #prepare to store game speed into _speedDiv but wait.
0x000000C0 = fcmpu cr0, f7, f12 #compare Compare lowlimit with current working game speed
0x000000C4 = bge .+0x08 #If game speed is not being set below 10 fps...
0x000000C8 = b .+0x08 #then skip the next line
0x000000CC = fmr f7, f12 #else overrite working current speed with low limit as new working current speed
0x000000D0 = stfs f7, _speedDiv@l(r12) #store working current speed into _speedDiv
0x000000D4 = lis r12, _float30@ha #An attempt to port arrow fix to fps++
0x000000D8 = lfs f12, _float30@l(r12) #load float30 into f12
0x000000DC = fmuls f10, f7, f12 #arrowsDiv = 30 * speedDiv
0x000000E0 = lis r12, _arrowsDiv@ha
0x000000E4 = stfs f10, _arrowsDiv@l(r12) #store the results
0x000000E8 = lis r12, _float.5@ha #load float of .5 into f12
0x000000EC = lfs f12, _float.5@l(r12)
0x000000F0 = fdivs f10, f12, f7 #arrowsDer = .5 / speedDiv
0x000000F4 = lis r12, _arrowsDer@ha
0x000000F8 = stfs f10, _arrowsDer@l(r12) #store the results
0x000000FC = blr #return
_fenceNeg1 = 0x00000100
0x00000100 = add r6, r12, r0 #orig instruction we are replacing
0x00000104 = cmpwi r6, 500 #check if less than 500 (full sync on those first frames to prevent milk water)
0x00000108 = blt .+0x08 #skip subtract if less than
0x0000010C = subi r6, r6, 1 #do the subtract
0x00000110 = blr #return
_arrowTimeDrain = 0x00000114
0x00000114 = lfs f1, 0xFC(r11)
0x00000118 = lis r12, _speedDiv@ha
0x0000011C = lfs f0, _speedDiv@l(r12)
0x00000120 = fmuls f1, f1, f0
0x00000124 = blr
#codeChanges
0x031FA97C = bla _fpsCalc
0x03793328 = nop
0x03793334 = nop
@ -127,128 +120,127 @@ _fenceNeg1 = 0x0000010C # - Best Fence Skip -
0x02D90D30 = lfs f11, _averageFPS@l(r10)
0x02D90D88 = lis r10, _averageFPS@ha
0x02D90D8C = lfs f11, _averageFPS@l(r10)
0x02D5F760 = bla _arrowTimeDrain
#break all forms of frame limiting
0x031FAAFC = bla _fenceNeg1 ;Best fence (make sure GPU is never more than 1 frame ahead)
;0x031FAAE8 = li r0, 1 ;Better fence (make sure GPU is never more than 1 second? ahead)
;0x031FAB00 = nop ; skip fence (no waiting on GPU)
0x031FACD0 = nop ; disable vsync
0x031FACF4 = nop ; disable vsync loop
# Break all forms of frame limiting
0x031FAAFC = bla _fenceNeg1 # Best fence (make sure GPU is never more than 1 frame ahead), enabled by default
;0x031FAAE8 = li r0, 1 # Better fence (make sure GPU is never more than 1 second? ahead), disabled by default
;0x031FAB00 = nop # Skip fence (no waiting on GPU), disabled by default
0x031FACD0 = nop # Disable vsync, enabled by default
0x031FACF4 = nop # Disable vsync loop, enabled by default
[BotwFPSv176V192]
moduleMatches = 0xFD091F9F, 0xD472D8A5
# rodata constants
_float.5 = 0x100005E0
_float1 = 0x10000670
_float30 = 0x101E78F4
_float32 = 0x1025D1A8
_convSub = 0x10000BB0
_arrowsDiv = 0x1001CCAC
_arrowsDer = 0x1001CB18
# TestCave
codeCaveSize = 600 ; Theoretically, allow for 74 or so FPS to be averaged.
_fpsCalc = 0x00000018
0x00000000 = .float 2
_lowLimit = 0x00000000
#TestCave
codeCaveSize = 0x12C
_fpsCalc = 0x00000010
0x00000000 = .float 2 ; Don't edit me!
_lowlimit = 0x00000000
0x00000004 = .float 62156250
_busSpeed = 0x00000004
_averageFPS = 0x00000008
_speedDiv = 0x0000000C
0x00000010 = .float (30*30)
_arrowDivisor = 0x00000010
# HOW TO CHANGE AMOUNT OF FRAMES TO AVERAGE
0x00000128 = .float 32 #This float can be anything. To change the amount of frames averaged change this float...
_bufferSize = 0x00000128 #...and change the "0x84" on line 74 to this number times 4 plus 4 converted to hex.
#Example using 8 frames. 8 x 4 = 34. 34 + 4 = 38. In hex 0x26
#So line 74 would change from "cmpwi r12, 0x0C" to "cmpwi r12, 0x26"
0x00000014 = .float (0.5/30)
_arrowMultiplier = 0x00000014
0x00000120 = .int (4*$amount_of_FPS_averaged+4)
_averageMaxByteLength = 0x00000120
0x00000010 = stw r0, 0x78(r30) #orig instruction we are replacing ;ticks between frame are currently in r12.
0x00000014 = xoris r12, r12, 0x8000 #flip the sign bit of int ticks for floating point conversion
0x00000018 = stw r12, 0xD4(r30) #store sign flipped ticks in memory as lower half of double
0x0000001C = lis r12, 0x4330 #create upper half of ticks double
0x00000020 = stw r12, 0xD0(r30) #store it in upper half of memory
0x00000024 = lfd f10, 0xD0(r30) #Load full double ticks into f10
0x00000028 = lis r12, _convSub@ha #load number to subtract from tick double...
0x0000002C = lfd f12, _convSub@l(r12) #...to create tick float into f12
0x00000030 = fsub f10, f10, f12 #Do the subtraction
0x00000034 = frsp f10, f10 #round to single precision and tick float is in f10
0x00000038 = lis r12, _float1@ha #Load float of 1...
0x0000003C = lfs f12, _float1@l(r12) #...into f12
0x00000040 = fdivs f10, f12, f10 #divide 1 by ticks
0x00000044 = lis r12, _busSpeed@ha #load wii u bus speed...
0x00000048 = lfs f12, _busSpeed@l(r12) #...into f12
0x0000004C = fmuls f10, f12, f10 #multiply bus speed to have current fps in f10. (1/ticks)*bus speed
0x00000124 = .float ($amount_of_FPS_averaged)
_averageFPSBufferDivider = 0x00000124
0x00000050 = mr r3, r30 #Make a copy of r30 so we can screw around with it
0x00000054 = addi r3, r3, 0xE0 #Add offset to available memory space
0x00000058 = stw r3, 0xD8(r30) #Save our virgin memory address so we can retrieve it later
0x0000005C = lwz r12, 0xDC(r30) #load counter into r12
0x00000060 = add r3, r3, r12 #add our counter to the offset for our memory location
0x00000064 = stfsu f10, 0x04(r3) #store current fps to memory location +4 and update efective address into r3
0x00000068 = lfs f7, 0xE0(r30) #Load fpsSum into f7
0x0000006C = fadds f10, f10, f7 #Add currentFPS to fpsSum
0x00000070 = addi r12, r12, 0x04 #Incriment counter by 4
0x00000074 = cmpwi r12, 0x84 #Compare counter with 0x84(32 address from base memory location)
0x00000078 = bne .+0x0C #If we write our last current fps then
0x0000007C = lis r12, 0 #Zero our counter
0x00000080 = lwz r3, 0xD8(r30) #and reload our virgin memory offset
0x00000084 = stw r12, 0xDC(r30) #Store counter r12 to memory location
0x00000088 = lfs f7, 0x04(r3) #load oldest fps into f7
0x0000008C = fsubs f10, f10, f7 #Subtract oldest fps from fpsSum
0x00000090 = stfs f10, 0xE0(r30) #store fpsSum back into it's memory address.
0x00000094 = lis r12, _float32@ha #load 32 as a float ...
0x00000098 = lfs f7, _float32@l(r12) #...into f7
0x0000009C = fdivs f10, f10, f7 #divide fps sum by 32, the number of saved fps values, to get average fps over 32 frames
0x000000A0 = lis r12, _averageFPS@ha #Store averaged fps...
0x000000A4 = stfs f10, _averageFPS@l(r12) #... into _averageFPS variable.
0x00000128 = .float (0)
_averageFPSBufferTotal = 0x00000128
0x000000A8 = lis r12, _float30@ha #Setting up speed diviser. 30/current fps = game speed. ex 30/60 = .5 game speed
0x000000AC = lfs f12, _float30@l(r12) #load float 30 into f12
0x000000B0 = fdivs f7, f12, f10 #do the division f10 is our current fps(now averaged)
0x000000B4 = lis r12, _lowlimit@ha #set up a low limit to not set game speed below. here: 10/30 = 3
0x000000B8 = lfs f12, _lowlimit@l(r12) #Set 3 as the low limit into f12
0x000000BC = lis r12, _speedDiv@ha #prepare to store game speed into _speedDiv but wait.
0x000000C0 = fcmpu cr0, f7, f12 #compare Compare lowlimit with current working game speed
0x000000C4 = bge .+0x08 #If game speed is not being set below 10 fps...
0x000000C8 = b .+0x08 #then skip the next line
0x000000CC = fmr f7, f12 #else overrite working current speed with low limit as new working current speed
0x000000D0 = stfs f7, _speedDiv@l(r12) #store working current speed into _speedDiv
0x0000012C = .int (4)
_averageFPSIndex = 0x0000012C
0x000000D4 = lis r12, _float30@ha #An attempt to port arrow fix to fps++
0x000000D8 = lfs f12, _float30@l(r12) #load float30 into f12
0x000000DC = fmuls f10, f7, f12 #arrowsDiv = 30 * speedDiv
0x000000E0 = lis r12, _arrowsDiv@ha
0x000000E4 = stfs f10, _arrowsDiv@l(r12) #store the results
0x000000E8 = lis r12, _float.5@ha #load float of .5 into f12
0x000000EC = lfs f12, _float.5@l(r12)
0x000000F0 = fdivs f10, f12, f7 #arrowsDer = .5 / speedDiv
0x000000F4 = lis r12, _arrowsDer@ha
0x000000F8 = stfs f10, _arrowsDer@l(r12) #store the results
0x000000FC = blr #return
_averageFPSBuffer = 0x00000130
_fenceNeg1 = 0x00000100
0x00000100 = add r6, r12, r0 #orig instruction we are replacing
0x00000104 = cmpwi r6, 500 #check if less than 500 (full sync on those first frames to prevent milk water)
0x00000108 = blt .+0x08 #skip subtract if less than
0x0000010C = subi r6, r6, 1 #do the subtract
0x00000110 = blr #return
# - Calculate FPS -
0x00000018 = stw r0, 0x78(r30) # Original instruction that was overwriten by the instruction that jumps to this code cave. This specific place was chosen because r12 is storing the ticks between the previous and current frame, which this code needs to calculate the game's FPS.
0x0000001C = xoris r12, r12, 0x8000 # Flips the sign bit of the game ticks (which is stored as an int in r12), because the code wants to store this as the lower half of a double, and the lower half can't be prepended by a sign bit.
0x00000020 = stw r12, 0x12C(r30) # Stores the sign-bit-flipped ticks to a temporary part of the game's memory, making up the lower 4 bytes of the 8 bytes total from the double that's being created.
0x00000024 = lis r12, 0x4330 # Loads a value of 0x4330 to r12 (Why this number?)
0x00000028 = stw r12, 0x128(r30) # Stores the loaded value from r12 to the upper 4 bytes of the 8 bytes total. Now it makes a full floating point double.
0x0000002C = lfd f10, 0x128(r30) # Loads that floating point double into f10, so that the code can perform floating point operations.
0x00000030 = lis r12, _convSub@ha # Loads upper half of an address to r12 (Side note, PowerPC code always needs to load into or store from registers using two instructions if it needs to interact with an absolute address due to how PowerPC is designed).
0x00000034 = lfd f12, _convSub@l(r12) # Uses the upper half of the address from r12 to load the value that's stored at convSub to f12.
0x00000038 = fsub f10, f10, f12 # Subtracts the convSub (f12) from the game ticks double and store the result in f10.
0x0000003C = frsp f10, f10 # Rounds f10 to a floating point single.
0x00000040 = lis r12, _float1@ha # Loads upper half of an address to r12.
0x00000044 = lfs f12, _float1@l(r12) # Uses the upper half of the address from r12 to load a 1 to f12.
0x00000048 = fdivs f10, f12, f10 # Divides a 1 with the converted game ticks that were stored in f10.
0x0000004C = lis r12, _busSpeed@ha # Loads upper half of an address to r12.
0x00000050 = lfs f12, _busSpeed@l(r12) # Uses the upper half of the address from r12 to load the value that is the Wii U's bus speed to f12.
0x00000054 = fmuls f10, f12, f10 # Multiplies the game ticks (f10) by the bus speed (f12).
# - Arrow Fix -
0x00000058 = lis r12, _arrowDivisor@ha # Loads the upper half of an address to r12.
0x0000005C = lfs f12, _arrowDivisor@l(r12) # Uses the upper half of the address from r12 to load the arrow divisor (30*30) to f12.
0x00000060 = fdivs f12, f12, f10 # Divides the arrow divisor (f12) with the game's current FPS (f10) to f12.
0x00000064 = lis r12, _arrowsDiv@ha # Loads the upper half of an address to r12.
0x00000068 = stfs f12, _arrowsDiv@l(r12) # Uses the upper half of the address from r12 to store a "dynamic" constant to correct the arrow drop.
0x0000006C = lis r12, _arrowMultiplier@ha # Loads the upper half of an address to r12.
0x00000070 = lfs f12, _arrowMultiplier@l(r12) # Uses the upper half of the address from r12 to load the arrow multiplier (0.5/30) to f12
0x00000074 = fmuls f12, f12, f10 # Multiplies the arrow multiplier (f12) with the game current FPS (f10) to f12
0x00000078 = lis r12, _arrowsDer@ha # Loads the upper half of an address to r12.
0x0000007C = stfs f12, _arrowsDer@l(r12) # Uses the upper half of the address from r12 to store another dynamic constant to correct the arrow drop.
# - FPS Averaging -
0x00000080 = lis r3, _averageFPSBuffer@ha # Load the pointer to the buffer to r3
0x00000084 = addi r3, r3, _averageFPSBuffer@l #
0x00000088 = lis r11, _averageFPSIndex@ha # Load the index which has the offset to the oldest FPS in the buffer to r12
0x0000008C = lwz r12, _averageFPSIndex@l(r11) #
0x00000090 = lfsx f12, r3, r12 # Load the oldest FPS using the pointer (r3) plus the offset (r12) to f12
0x00000094 = stfsx f10, r3, r12 # Store the newest FPS to the buffer using the pointer (r3) plus the offset (r12) to f12
0x00000098 = lis r3, _averageMaxByteLength@ha # Load the max byte length to r11
0x0000009C = lwz r3, _averageMaxByteLength@l(r3) #
0x000000A0 = addi r12, r12, 0x04 # Add 4 to the index (r12) which sets the address offset to the next address
0x000000A4 = cmp 0, 0, r12, r3 # Compare the index (r12) to the max byte length (r3)
0x000000A8 = bne .+0x08 # If the compared numbers are not equal, skip over the instruction that resets the offset to 0 if the max has been reached
0x000000AC = li r12, 4 #
0x000000B0 = stw r12, _averageFPSIndex@l(r11) #
0x000000B4 = lis r12, _averageFPSBufferTotal@ha # Load the total of the buffer to r12
0x000000B8 = lfs f7, _averageFPSBufferTotal@l(r12) #
0x000000BC = fsubs f7, f7, f12 # Subtract the oldest FPS (f12) from the total of the buffer (f7), but add the newest FPS (f10)
0x000000C0 = fadds f7, f7, f10 #
0x000000C4 = stfs f7, _averageFPSBufferTotal@l(r11) # Store the updated total of the buffer back
0x000000C8 = lis r12, _averageFPSBufferDivider@ha # Load the divider (= the amount of frames stored in the buffer)
0x000000CC = lfs f12, _averageFPSBufferDivider@l(r12) #
0x000000D0 = fdivs f10, f7, f12 # Divide the updated buffer total with the divider to store the averaged fps to f10
0x000000D4 = lis r12, _averageFPS@ha # Store averaged FPS
0x000000D8 = stfs f10, _averageFPS@l(r12) #
# - Low FPS Limiter -
0x000000DC = lis r12, _float30@ha # Loads the upper half of an address to r12.
0x000000E0 = lfs f12, _float30@l(r12) # Uses the upper half of the address from r12 to load a 30 into the register.
0x000000E4 = fdivs f7, f12, f10 # Divides 30 by the game's averaged FPS.
0x000000E8 = lis r12, _lowLimit@ha # Loads the upper half of an address to r12.
0x000000EC = lfs f12, _lowLimit@l(r12) # Uses the upper half of the address from r12 to load the limit that the game speed can be set as. This might prevent bugs that only happen at a low FPS. By default, the lowest speed it'll try setting the game as is 15 FPS.
0x000000F0 = lis r12, _speedDiv@ha # Loads the upper half of an address to r12.
0x000000F4 = fcmpu cr0, f7, f12 # Compares the game's speed (f7) with the limit.
0x000000F8 = bge .+0x0C # Skips the next two instructions if in the previous comparison f7 was greater or equal to f12.
0x000000FC = stfs f7, _speedDiv@l(r12) # Uses the upper half from the address from r12 to store the game's speed.
0x00000100 = blr # Return to place that called this code cave.
0x00000104 = stfs f12, _speedDiv@l(r12) # Uses the upper half from the address from r12 to store the low limit.
0x00000108 = blr # Return to place that called this code cave.
_fenceNeg1 = 0x0000010C # - Best Fence Skip -
0x0000010C = add r6, r12, r0 # Original instruction that was overwriten by the instruction that jumps to this code cave.
0x00000110 = cmpwi r6, 500 # Checks if r6 less than 500 (full sync on those first frames to prevent milk water).
0x00000114 = blt .+0x08 # Branches to return subtract instruction if r6 is less than 500.
0x00000118 = subi r6, r6, 1 # Subtract 1 from r6.
0x0000011C = blr # Return to place that called this code cave.
_arrowTimeDrain = 0x00000114
0x00000114 = lfs f1, 0xFC(r11)
0x00000118 = lis r12, _speedDiv@ha
0x0000011C = lfs f0, _speedDiv@l(r12)
0x00000120 = fmuls f1, f1, f0
0x00000124 = blr
#codeChanges
0x031F9E80 = bla _fpsCalc
@ -264,235 +256,11 @@ _fenceNeg1 = 0x0000010C # - Best Fence Skip -
0x02D90794 = lfs f11, _averageFPS@l(r10)
0x02D907EC = lis r10, _averageFPS@ha
0x02D907F0 = lfs f11, _averageFPS@l(r10)
0x02D5F200 = bla _arrowTimeDrain
# Break all forms of frame limiting
0x031FA000 = bla _fenceNeg1 # Best fence (make sure GPU is never more than 1 frame ahead), enabled by default
;0x031F9FEC = li r0, 1 # Better fence (make sure GPU is never more than 1 second? ahead), disabled by default
;0x031FA004 = nop # Skip fence (no waiting on GPU), disabled by default
0x031FA1D4 = nop # Disable vsync, enabled by default
0x031FA1F8 = nop # Disable vsync loop, enabled by default
[Botw60fpsv144]
moduleMatches = 0x9A2CA0C7
#rodata constants
_float1 = 0x1027AB80
_float3 = 0x100005DC
#TestCave
codeCaveSize = 0x9C
_fpsCalc = 0x00000008
0x00000000 = .float 3 ;edit me
_lowlimit = 0x00000000
0x00000004 = .float 6294724
_busSpeed = 0x00000004
0x00000008 = stw r0, 0x78(r30)
0x0000000C = xoris r12, r12, 0x8000
0x00000010 = stw r12, 0x12C(r30)
0x00000014 = lis r12, 0x4330
0x00000018 = stw r12, 0x128(r30)
0x0000001C = lfd f10, 0x128(r30)
0x00000020 = stw r12, 0x138(r30)
0x00000024 = lis r12, 0x8000
0x00000028 = stw r12, 0x13C(r30)
0x0000002C = lfd f12, 0x138(r30)
0x00000030 = fsub f10, f10, f12
0x00000034 = frsp f10, f10
0x00000038 = lis r12, _float1@ha
0x0000003C = lfs f12, _float1@l(r12)
0x00000040 = fdivs f10, f12, f10
0x00000048 = lis r12, _busSpeed@ha
0x0000004C = lfs f12, _busSpeed@l(r12)
0x00000050 = fmuls f10, f12, f10
0x00000054 = lis r12, _float3@ha
0x00000058 = lfs f12, _float3@l(r12)
0x0000005C = fdivs f7, f12, f10
0x00000060 = lis r12, _lowlimit@ha
0x00000064 = lfs f12, _lowlimit@l(r12)
0x00000068 = fcmpu cr0, f7, f12
0x0000006C = bge .+0x0C
0x00000070 = stfs f7, 0x130(r30)
0x00000074 = blr
0x00000078 = stfs f12, 0x130(r30)
0x0000007C = blr
_fullspeed = 0x00000080
0x00000080 = lis r3, 0x112A
0x00000084 = lis r12, 0xF634
0x00000088 = srawi r12, r12, 0x10
0x0000008c = xoris r12, r12, 0xFFFF
0x00000090 = or r3, r3, r12
0x00000094 = lfs f0, 0x0(r3)
0x00000098 = blr
#Branch
0x03102688 = bla _fpsCalc
0x03686FA0 = nop
0x03686FAC = nop
0x03686FF0 = bla _fullspeed
0x03313C4C = bla _fullspeed
#break all forms of frame limiting
0x03102808 = nop ;disables gx2 fence
0x031029DC = nop ;disables wait for vsync
0x03102A00 = nop ;same
0x03102E10 = li r0, 1;the original 60FPS hack
0x03102DCC = li r0, 1; same
#experimental
;0x02FAA8E4 = li r3, 0 ;sets swap interval to 0
;0x02FAA98C = li r3, 0 ;same
;0x02FB2E50 = li r7, 1 ;sets GX2SetTVBuffer to 1
;0x02FB30A0 = li r7, 1 ;same
[Botw60fpsv112]
moduleMatches = 0x6FD41A61
#rodata constants
_float1 = 0x1026E518
_float3 = 0x100005D4
#TestCave
codeCaveSize = 0x9C
_fpsCalc = 0x00000008
0x00000000 = .float 3 ;edit me
_lowlimit = 0x00000000
0x00000004 = .float 6294724
_busSpeed = 0x00000004
0x00000008 = stw r0, 0x78(r30)
0x0000000C = xoris r12, r12, 0x8000
0x00000010 = stw r12, 0x12C(r30)
0x00000014 = lis r12, 0x4330
0x00000018 = stw r12, 0x128(r30)
0x0000001C = lfd f10, 0x128(r30)
0x00000020 = stw r12, 0x138(r30)
0x00000024 = lis r12, 0x8000
0x00000028 = stw r12, 0x13C(r30)
0x0000002C = lfd f12, 0x138(r30)
0x00000030 = fsub f10, f10, f12
0x00000034 = frsp f10, f10
0x00000038 = lis r12, _float1@ha
0x0000003C = lfs f12, _float1@l(r12)
0x00000040 = fdivs f10, f12, f10
0x00000048 = lis r12, _busSpeed@ha
0x0000004C = lfs f12, _busSpeed@l(r12)
0x00000050 = fmuls f10, f12, f10
0x00000054 = lis r12, _float3@ha
0x00000058 = lfs f12, _float3@l(r12)
0x0000005C = fdivs f7, f12, f10
0x00000060 = lis r12, _lowlimit@ha
0x00000064 = lfs f12, _lowlimit@l(r12)
0x00000068 = fcmpu cr0, f7, f12
0x0000006C = bge .+0x0C
0x00000070 = stfs f7, 0x130(r30)
0x00000074 = blr
0x00000078 = stfs f12, 0x130(r30)
0x0000007C = blr
_fullspeed = 0x00000080
0x00000080 = lis r3, 0x1126
0x00000084 = lis r12, 0xB42C
0x00000088 = srawi r12, r12, 0x10
0x0000008c = xoris r12, r12, 0xFFFF
0x00000090 = or r3, r3, r12
0x00000094 = lfs f0, 0x0(r3)
0x00000098 = blr
#Branch
0x030D5080 = bla _fpsCalc
0x03653634 = nop
0x03653640 = nop
0x03653684 = bla _fullspeed
0x032E4130 = bla _fullspeed
#break all forms of frame limiting
0x030D5200 = nop ;comment out this line with ';' at the beginning to disable gpufenceskip
0x030D53D4 = nop
0x030D53f8 = nop
0x030D5808 = li r0, 1
0x030D57C4 = li r0, 1
#experimental
;0x02F7ECB8 = li r3, 0
;0x02F7ED60 = li r3, 0
;0x02F87224 = li r7, 1
;0x02F87474 = li r7, 1
[Botw60fpsv97]
moduleMatches = 0xD71D859D
#rodata constants
_float1 = 0x10000660
_float3 = 0x100031fC
#TestCave
codeCaveSize = 0x9C
_fpsCalc = 0x00000008
0x00000000 = .float 3 ;edit me
_lowlimit = 0x00000000
0x00000004 = .float 6294724
_busSpeed = 0x00000004
0x00000008 = stw r0, 0x78(r30)
0x0000000C = xoris r12, r12, 0x8000
0x00000010 = stw r12, 0x12C(r30)
0x00000014 = lis r12, 0x4330
0x00000018 = stw r12, 0x128(r30)
0x0000001C = lfd f10, 0x128(r30)
0x00000020 = stw r12, 0x138(r30)
0x00000024 = lis r12, 0x8000
0x00000028 = stw r12, 0x13C(r30)
0x0000002C = lfd f12, 0x138(r30)
0x00000030 = fsub f10, f10, f12
0x00000034 = frsp f10, f10
0x00000038 = lis r12, _float1@ha
0x0000003C = lfs f12, _float1@l(r12)
0x00000040 = fdivs f10, f12, f10
0x00000048 = lis r12, _busSpeed@ha
0x0000004C = lfs f12, _busSpeed@l(r12)
0x00000050 = fmuls f10, f12, f10
0x00000054 = lis r12, _float3@ha
0x00000058 = lfs f12, _float3@l(r12)
0x0000005C = fdivs f7, f12, f10
0x00000060 = lis r12, _lowlimit@ha
0x00000064 = lfs f12, _lowlimit@l(r12)
0x00000068 = fcmpu cr0, f7, f12
0x0000006C = bge .+0x0C
0x00000070 = stfs f7, 0x130(r30)
0x00000074 = blr
0x00000078 = stfs f12, 0x130(r30)
0x0000007C = blr
_fullspeed = 0x00000080
0x00000080 = lis r3, 0x1126
0x00000084 = lis r12, 0xA824
0x00000088 = srawi r12, r12, 0x10
0x0000008c = xoris r12, r12, 0xFFFF
0x00000090 = or r3, r3, r12
0x00000094 = lfs f0, 0x0(r3)
0x00000098 = blr
#Branch
0x030D4120 = bla _fpsCalc
0x03652620 = nop
0x0365262C = nop
0x03652670 = bla _fullspeed
0x032E31D0 = bla _fullspeed
#break all forms of frame limiting
0x030D42A0 = nop ;comment out this line with ';' at the beginning to disable gpufenceskip
0x030D4474 = nop
0x030D4498 = nop
0x030D48A8 = li r0, 1
0x030D4864 = li r0, 1
#experimental
;0x02F7DD58 = li r3, 0
;0x02F7DE00 = li r3, 0
;0x02F862C4 = li r7, 1
;0x02F86514 = li r7, 1
0x031FA000 = bla _fenceNeg1
;0x031F9FEC = li r0, 1
;0x031FA004 = nop
0x031FA1D4 = nop
0x031FA1F8 = nop

View File

@ -8,48 +8,39 @@ version = 3
[Preset]
name = 60FPS (ideal for 240/120/60Hz displays)
$targetFPS:int = 60
$amount_of_FPS_averaged = 32
[Preset]
name = 30FPS (ideal for 240/120/60Hz displays)
$targetFPS:int = 30
$amount_of_FPS_averaged = 32
[Preset]
name = 40FPS (ideal for 240/120/60Hz displays)
$targetFPS:int = 40
$amount_of_FPS_averaged = 32
[Preset]
name = 20FPS (ideal for 240/120/60Hz displays)
$targetFPS:int = 20
$amount_of_FPS_averaged = 32
[Preset]
name = 48FPS (ideal for 144Hz displays)
$targetFPS:int = 48
$amount_of_FPS_averaged = 32
[Preset]
name = 72FPS (ideal for 144Hz displays)
$targetFPS:int = 72
$amount_of_FPS_averaged = 32
[Preset]
name = 75FPS (ideal for 75Hz displays)
$targetFPS:int = 75
$amount_of_FPS_averaged = 32
[Preset]
name = 120FPS (ideal for 240/120/60Hz displays)
$targetFPS:int = 120
$amount_of_FPS_averaged = 32
[Preset]
name = 144FPS (ideal for 144Hz displays)
$targetFPS:int = 144
$amount_of_FPS_averaged = 32
[Control]
vsyncFrequency = $targetFPS