From b35c34186c5a1ae04af7a5d8a4b1f213cc61ee0e Mon Sep 17 00:00:00 2001 From: Sintendo Date: Sat, 21 Feb 2015 01:25:57 +0100 Subject: [PATCH 1/2] More minor DSP JIT optimizations --- Source/Core/Core/DSP/DSPEmitter.cpp | 4 ++-- Source/Core/Core/DSP/Jit/DSPJitArithmetic.cpp | 14 +++++++------- Source/Core/Core/DSP/Jit/DSPJitBranch.cpp | 6 +++--- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/Source/Core/Core/DSP/DSPEmitter.cpp b/Source/Core/Core/DSP/DSPEmitter.cpp index fec47aea93..3398bc6823 100644 --- a/Source/Core/Core/DSP/DSPEmitter.cpp +++ b/Source/Core/Core/DSP/DSPEmitter.cpp @@ -246,11 +246,11 @@ void DSPEmitter::Compile(u16 start_addr) if (DSPAnalyzer::code_flags[compilePC-1] & DSPAnalyzer::CODE_LOOP_END) { MOVZX(32, 16, EAX, M(&(g_dsp.r.st[2]))); - CMP(32, R(EAX), Imm32(0)); + TEST(32, R(EAX), R(EAX)); FixupBranch rLoopAddressExit = J_CC(CC_LE, true); MOVZX(32, 16, EAX, M(&g_dsp.r.st[3])); - CMP(32, R(EAX), Imm32(0)); + TEST(32, R(EAX), R(EAX)); FixupBranch rLoopCounterExit = J_CC(CC_LE, true); if (!opcode->branch) diff --git a/Source/Core/Core/DSP/Jit/DSPJitArithmetic.cpp b/Source/Core/Core/DSP/Jit/DSPJitArithmetic.cpp index a56d75a5b1..ecab111ad0 100644 --- a/Source/Core/Core/DSP/Jit/DSPJitArithmetic.cpp +++ b/Source/Core/Core/DSP/Jit/DSPJitArithmetic.cpp @@ -1075,7 +1075,7 @@ void DSPEmitter::abs(const UDSPInstruction opc) // s64 acc = dsp_get_long_acc(dreg); get_long_acc(dreg); // if (acc < 0) acc = 0 - acc; - CMP(64, R(RAX), Imm8(0)); + TEST(64, R(RAX), R(RAX)); FixupBranch GreaterThanOrEqual = J_CC(CC_GE); NEG(64, R(RAX)); set_long_acc(dreg); @@ -1367,7 +1367,7 @@ void DSPEmitter::lsrn(const UDSPInstruction opc) // acc <<= -shift; // } - CMP(64, R(RDX), Imm8(0));//is this actually worth the branch cost? + TEST(64, R(RDX), R(RDX));//is this actually worth the branch cost? FixupBranch zero = J_CC(CC_E); TEST(16, R(RAX), Imm16(0x3f));//is this actually worth the branch cost? FixupBranch noShift = J_CC(CC_Z); @@ -1426,7 +1426,7 @@ void DSPEmitter::asrn(const UDSPInstruction opc) // acc <<= -shift; // } - CMP(64, R(RDX), Imm8(0)); + TEST(64, R(RDX), R(RDX)); FixupBranch zero = J_CC(CC_E); TEST(16, R(RAX), Imm16(0x3f)); FixupBranch noShift = J_CC(CC_Z); @@ -1489,7 +1489,7 @@ void DSPEmitter::lsrnrx(const UDSPInstruction opc) // acc >>= -shift; // } - CMP(64, R(RDX), Imm8(0)); + TEST(64, R(RDX), R(RDX)); FixupBranch zero = J_CC(CC_E); TEST(16, R(RAX), Imm16(0x3f)); FixupBranch noShift = J_CC(CC_Z); @@ -1546,7 +1546,7 @@ void DSPEmitter::asrnrx(const UDSPInstruction opc) // acc >>= -shift; // } - CMP(64, R(RDX), Imm8(0)); + TEST(64, R(RDX), R(RDX)); FixupBranch zero = J_CC(CC_E); TEST(16, R(RAX), Imm16(0x3f)); FixupBranch noShift = J_CC(CC_Z); @@ -1604,7 +1604,7 @@ void DSPEmitter::lsrnr(const UDSPInstruction opc) // else if (shift < 0) // acc >>= -shift; - CMP(64, R(RDX), Imm8(0)); + TEST(64, R(RDX), R(RDX)); FixupBranch zero = J_CC(CC_E); TEST(16, R(RAX), Imm16(0x3f)); FixupBranch noShift = J_CC(CC_Z); @@ -1659,7 +1659,7 @@ void DSPEmitter::asrnr(const UDSPInstruction opc) // else if (shift < 0) // acc >>= -shift; - CMP(64, R(RDX), Imm8(0)); + TEST(64, R(RDX), R(RDX)); FixupBranch zero = J_CC(CC_E); TEST(16, R(RAX), Imm16(0x3f)); FixupBranch noShift = J_CC(CC_Z); diff --git a/Source/Core/Core/DSP/Jit/DSPJitBranch.cpp b/Source/Core/Core/DSP/Jit/DSPJitBranch.cpp index abf9e7015b..862590c57f 100644 --- a/Source/Core/Core/DSP/Jit/DSPJitBranch.cpp +++ b/Source/Core/Core/DSP/Jit/DSPJitBranch.cpp @@ -282,7 +282,7 @@ void DSPEmitter::HandleLoop() MOVZX(32, 16, EAX, M(&g_dsp.r.st[2])); MOVZX(32, 16, ECX, M(&g_dsp.r.st[3])); - CMP(32, R(RCX), Imm32(0)); + TEST(32, R(RCX), R(RCX)); FixupBranch rLoopCntG = J_CC(CC_LE, true); CMP(16, R(RAX), Imm16(compilePC - 1)); FixupBranch rLoopAddrG = J_CC(CC_NE, true); @@ -323,7 +323,7 @@ void DSPEmitter::loop(const UDSPInstruction opc) dsp_op_read_reg_dont_saturate(reg, RDX, ZERO); u16 loop_pc = compilePC + 1; - CMP(16, R(EDX), Imm16(0)); + TEST(16, R(EDX), R(EDX)); DSPJitRegCache c(gpr); FixupBranch cnt = J_CC(CC_Z, true); dsp_reg_store_stack(3); @@ -393,7 +393,7 @@ void DSPEmitter::bloop(const UDSPInstruction opc) dsp_op_read_reg_dont_saturate(reg, RDX, ZERO); u16 loop_pc = dsp_imem_read(compilePC + 1); - CMP(16, R(EDX), Imm16(0)); + TEST(16, R(EDX), R(EDX)); DSPJitRegCache c(gpr); FixupBranch cnt = J_CC(CC_Z, true); dsp_reg_store_stack(3); From c19482c9a39f420b769ba9386bd4254fbcb309b9 Mon Sep 17 00:00:00 2001 From: Sintendo Date: Sat, 21 Feb 2015 11:12:03 +0100 Subject: [PATCH 2/2] Add function to emit CMP, or TEST when possible Also, a spelling mistake. --- Source/Core/Common/x64Emitter.cpp | 12 ++++++++++++ Source/Core/Common/x64Emitter.h | 2 ++ Source/Core/Core/DSP/Jit/DSPJitRegCache.cpp | 2 +- Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 2 +- 4 files changed, 16 insertions(+), 2 deletions(-) diff --git a/Source/Core/Common/x64Emitter.cpp b/Source/Core/Common/x64Emitter.cpp index cc3c51da08..7b46863d16 100644 --- a/Source/Core/Common/x64Emitter.cpp +++ b/Source/Core/Common/x64Emitter.cpp @@ -1278,6 +1278,18 @@ void XEmitter::MOV (int bits, const OpArg &a1, const OpArg &a2) void XEmitter::TEST(int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(bits, nrmTEST, a1, a2);} void XEmitter::CMP (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(bits, nrmCMP, a1, a2);} void XEmitter::XCHG(int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(bits, nrmXCHG, a1, a2);} +void XEmitter::CMP_or_TEST(int bits, const OpArg &a1, const OpArg &a2) +{ + CheckFlags(); + if (a1.IsSimpleReg() && a2.IsImm() && a2.offset == 0) // turn 'CMP reg, 0' into shorter 'TEST reg, reg' + { + WriteNormalOp(bits, nrmTEST, a1, a1); + } + else + { + WriteNormalOp(bits, nrmCMP, a1, a2); + } +} void XEmitter::IMUL(int bits, X64Reg regOp, OpArg a1, OpArg a2) { diff --git a/Source/Core/Common/x64Emitter.h b/Source/Core/Common/x64Emitter.h index 23927cb88a..80c161ead4 100644 --- a/Source/Core/Common/x64Emitter.h +++ b/Source/Core/Common/x64Emitter.h @@ -465,6 +465,8 @@ public: void MOV (int bits, const OpArg &a1, const OpArg &a2); void TEST(int bits, const OpArg &a1, const OpArg &a2); + void CMP_or_TEST(int bits, const OpArg &a1, const OpArg &a2); + // Are these useful at all? Consider removing. void XCHG(int bits, const OpArg &a1, const OpArg &a2); void XCHG_AHAL(); diff --git a/Source/Core/Core/DSP/Jit/DSPJitRegCache.cpp b/Source/Core/Core/DSP/Jit/DSPJitRegCache.cpp index 589ad41b15..357c8bf30f 100644 --- a/Source/Core/Core/DSP/Jit/DSPJitRegCache.cpp +++ b/Source/Core/Core/DSP/Jit/DSPJitRegCache.cpp @@ -756,7 +756,7 @@ void DSPJitRegCache::getReg(int reg, OpArg &oparg, bool load) { emitter.INT3(); } - // no nead to actually emit code for load or rotate if caller doesn't + // no need to actually emit code for load or rotate if caller doesn't // use the contents, but see above for a reason to force the load movToHostReg(real_reg, load); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index 1cc0782980..948aa9a785 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -1154,7 +1154,7 @@ void Jit64::divwux(UGeckoInstruction inst) MOV(32, R(EAX), gpr.R(a)); XOR(32, R(EDX), R(EDX)); gpr.KillImmediate(b, true, false); - CMP(32, gpr.R(b), Imm32(0)); + CMP_or_TEST(32, gpr.R(b), Imm32(0)); FixupBranch not_div_by_zero = J_CC(CC_NZ); MOV(32, gpr.R(d), R(EDX)); if (inst.OE)