From c081e3f2b35fc5e80ef6d14aa28fcaeb26d9e9af Mon Sep 17 00:00:00 2001 From: Sintendo Date: Sat, 27 Feb 2021 11:30:59 +0100 Subject: [PATCH] Jit64: divwx - Optimize constant dividend When the dividend is known at compile time, we can eliminate some of the branching and precompute the result for the overflow case. Before: B8 54 D3 E6 02 mov eax,2E6D354h 85 FF test edi,edi 74 0C je overflow 3D 00 00 00 80 cmp eax,80000000h 75 0C jne normal_path 83 FF FF cmp edi,0FFFFFFFFh 75 07 jne normal_path overflow: C1 F8 1F sar eax,1Fh 8B F8 mov edi,eax EB 05 jmp done normal_path: 99 cdq F7 FF idiv eax,edi 8B F8 mov edi,eax done: After: 85 FF test edi,edi 75 04 jne normal_path 33 FF xor edi,edi EB 0A jmp done normal_path: B8 54 D3 E6 02 mov eax,2E6D354h 99 cdq F7 FF idiv eax,edi 8B F8 mov edi,eax done: Fairly common with constant dividend of zero. Non-zero values occur frequently in Ocarina of Time Master Quest. --- .../Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index 5c6f2cc17d..fd6feace7d 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -1340,6 +1340,63 @@ void Jit64::divwx(UGeckoInstruction inst) GenerateConstantOverflow(false); } } + else if (gpr.IsImm(a)) + { + // Constant dividend + const u32 dividend = gpr.Imm32(a); + + RCX64Reg Rb = gpr.Bind(b, RCMode::Read); + RCX64Reg Rd = gpr.Bind(d, RCMode::Write); + // no register choice + RCX64Reg eax = gpr.Scratch(EAX); + RCX64Reg edx = gpr.Scratch(EDX); + RegCache::Realize(Rb, Rd, eax, edx); + + // Check for divisor == 0 + TEST(32, Rb, Rb); + + FixupBranch normal_path; + + if (dividend == 0x80000000) + { + // Divisor is 0, proceed to overflow case + const FixupBranch overflow = J_CC(CC_Z); + // Otherwise, check for divisor == -1 + CMP(32, Rb, Imm32(0xFFFFFFFF)); + normal_path = J_CC(CC_NE); + + SetJumpTarget(overflow); + } + else + { + // Divisor is not 0, take normal path + normal_path = J_CC(CC_NZ); + // Otherwise, proceed to overflow case + } + + // Set Rd to all ones or all zeroes + if (dividend & 0x80000000) + MOV(32, Rd, Imm32(0xFFFFFFFF)); + else + XOR(32, Rd, Rd); + + if (inst.OE) + GenerateConstantOverflow(true); + + const FixupBranch done = J(); + + SetJumpTarget(normal_path); + + MOV(32, eax, Imm32(dividend)); + CDQ(); + IDIV(32, Rb); + MOV(32, Rd, eax); + + if (inst.OE) + GenerateConstantOverflow(false); + + SetJumpTarget(done); + } else { RCOpArg Ra = gpr.Use(a, RCMode::Read);