From 997c5c2d0e52969edc34ea463985a9c12be7c96c Mon Sep 17 00:00:00 2001 From: Fiora Date: Sun, 14 Sep 2014 05:31:22 -0700 Subject: [PATCH 1/2] x64Emitter: add LZCNT/TZCNT support and detection Also add a unit test. --- Source/Core/Common/x64CPUDetect.cpp | 1 + Source/Core/Common/x64Emitter.cpp | 17 +++++- Source/Core/Common/x64Emitter.h | 7 ++- Source/UnitTests/Common/x64EmitterTest.cpp | 62 ++++++++++------------ 4 files changed, 51 insertions(+), 36 deletions(-) diff --git a/Source/Core/Common/x64CPUDetect.cpp b/Source/Core/Common/x64CPUDetect.cpp index 8c3738034a..0da02d0fb4 100644 --- a/Source/Core/Common/x64CPUDetect.cpp +++ b/Source/Core/Common/x64CPUDetect.cpp @@ -197,6 +197,7 @@ void CPUInfo::Detect() // Check for more features. __cpuid(cpu_id, 0x80000001); if (cpu_id[2] & 1) bLAHFSAHF64 = true; + if ((cpu_id[2] >> 5) & 1) bLZCNT = true; if ((cpu_id[3] >> 29) & 1) bLongMode = true; } diff --git a/Source/Core/Common/x64Emitter.cpp b/Source/Core/Common/x64Emitter.cpp index 77cb1f9f72..ec80600260 100644 --- a/Source/Core/Common/x64Emitter.cpp +++ b/Source/Core/Common/x64Emitter.cpp @@ -750,12 +750,14 @@ void XEmitter::IDIV(int bits, OpArg src) {WriteMulDivType(bits, src, 7);} void XEmitter::NEG(int bits, OpArg src) {WriteMulDivType(bits, src, 3);} void XEmitter::NOT(int bits, OpArg src) {WriteMulDivType(bits, src, 2);} -void XEmitter::WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2) +void XEmitter::WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep) { _assert_msg_(DYNA_REC, !src.IsImm(), "WriteBitSearchType - Imm argument"); src.operandReg = (u8)dest; if (bits == 16) Write8(0x66); + if (rep) + Write8(0xF3); src.WriteRex(this, bits, bits); Write8(0x0F); Write8(byte2); @@ -772,6 +774,19 @@ void XEmitter::MOVNTI(int bits, OpArg dest, X64Reg src) void XEmitter::BSF(int bits, X64Reg dest, OpArg src) {WriteBitSearchType(bits,dest,src,0xBC);} //bottom bit to top bit void XEmitter::BSR(int bits, X64Reg dest, OpArg src) {WriteBitSearchType(bits,dest,src,0xBD);} //top bit to bottom bit +void XEmitter::TZCNT(int bits, X64Reg dest, OpArg src) +{ + if (!cpu_info.bBMI1) + PanicAlert("Trying to use BMI1 on a system that doesn't support it. Bad programmer."); + WriteBitSearchType(bits, dest, src, 0xBC, true); +} +void XEmitter::LZCNT(int bits, X64Reg dest, OpArg src) +{ + if (!cpu_info.bLZCNT) + PanicAlert("Trying to use LZCNT on a system that doesn't support it. Bad programmer."); + WriteBitSearchType(bits, dest, src, 0xBD, true); +} + void XEmitter::MOVSX(int dbits, int sbits, X64Reg dest, OpArg src) { _assert_msg_(DYNA_REC, !src.IsImm(), "MOVSX - Imm argument"); diff --git a/Source/Core/Common/x64Emitter.h b/Source/Core/Common/x64Emitter.h index 228626c407..8f41065668 100644 --- a/Source/Core/Common/x64Emitter.h +++ b/Source/Core/Common/x64Emitter.h @@ -266,7 +266,7 @@ private: void WriteSimple1Byte(int bits, u8 byte, X64Reg reg); void WriteSimple2Byte(int bits, u8 byte1, u8 byte2, X64Reg reg); void WriteMulDivType(int bits, OpArg src, int ext); - void WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2); + void WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep = false); void WriteShift(int bits, OpArg dest, OpArg &shift, int ext); void WriteBitTest(int bits, OpArg &dest, OpArg &index, int ext); void WriteMXCSR(OpArg arg, int ext); @@ -454,6 +454,11 @@ public: // Available only on Atom or >= Haswell so far. Test with cpu_info.bMOVBE. void MOVBE(int dbits, const OpArg& dest, const OpArg& src); + // Available only on AMD >= Phenom or Intel >= Haswell + void LZCNT(int bits, X64Reg dest, OpArg src); + // Note: this one is actually part of BMI1 + void TZCNT(int bits, X64Reg dest, OpArg src); + // WARNING - These two take 11-13 cycles and are VectorPath! (AMD64) void STMXCSR(OpArg memloc); void LDMXCSR(OpArg memloc); diff --git a/Source/UnitTests/Common/x64EmitterTest.cpp b/Source/UnitTests/Common/x64EmitterTest.cpp index 8cbfad64fe..1f823ecd66 100644 --- a/Source/UnitTests/Common/x64EmitterTest.cpp +++ b/Source/UnitTests/Common/x64EmitterTest.cpp @@ -318,41 +318,35 @@ TEST_F(x64EmitterTest, CMOVcc_Register) } } -TEST_F(x64EmitterTest, BSF) -{ - emitter->BSF(64, R12, R(RAX)); - emitter->BSF(32, R12, R(RAX)); - emitter->BSF(16, R12, R(RAX)); +#define BITSEARCH_TEST(Name) \ + TEST_F(x64EmitterTest, Name) \ + { \ + struct { \ + int bits; \ + std::vector regs; \ + std::string size; \ + std::string rax_name; \ + } regsets[] = { \ + { 16, reg16names, "word", "ax" }, \ + { 32, reg32names, "dword", "eax" }, \ + { 64, reg64names, "qword", "rax" }, \ + }; \ + for (const auto& regset : regsets) \ + for (const auto& r : regset.regs) \ + { \ + emitter->Name(regset.bits, r.reg, R(RAX)); \ + emitter->Name(regset.bits, RAX, R(r.reg)); \ + emitter->Name(regset.bits, r.reg, MatR(RAX)); \ + ExpectDisassembly(#Name " " + r.name + ", " + regset.rax_name + " " \ + #Name " " + regset.rax_name + ", " + r.name + " " \ + #Name " " + r.name + ", " + regset.size + " ptr ds:[rax] " ); \ + } \ + } - emitter->BSF(64, R12, MatR(RAX)); - emitter->BSF(32, R12, MatR(RAX)); - emitter->BSF(16, R12, MatR(RAX)); - - ExpectDisassembly("bsf r12, rax " - "bsf r12d, eax " - "bsf r12w, ax " - "bsf r12, qword ptr ds:[rax] " - "bsf r12d, dword ptr ds:[rax] " - "bsf r12w, word ptr ds:[rax]"); -} - -TEST_F(x64EmitterTest, BSR) -{ - emitter->BSR(64, R12, R(RAX)); - emitter->BSR(32, R12, R(RAX)); - emitter->BSR(16, R12, R(RAX)); - - emitter->BSR(64, R12, MatR(RAX)); - emitter->BSR(32, R12, MatR(RAX)); - emitter->BSR(16, R12, MatR(RAX)); - - ExpectDisassembly("bsr r12, rax " - "bsr r12d, eax " - "bsr r12w, ax " - "bsr r12, qword ptr ds:[rax] " - "bsr r12d, dword ptr ds:[rax] " - "bsr r12w, word ptr ds:[rax]"); -} +BITSEARCH_TEST(BSR); +BITSEARCH_TEST(BSF); +BITSEARCH_TEST(LZCNT); +BITSEARCH_TEST(TZCNT); TEST_F(x64EmitterTest, PREFETCH) { From 40b18f09b2ce03f908b5303643b352d084b0c4b4 Mon Sep 17 00:00:00 2001 From: Fiora Date: Sun, 14 Sep 2014 05:36:23 -0700 Subject: [PATCH 2/2] JIT: use LZCNT in cntlzw --- .../Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index 10ea9f8448..271a0dc861 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -1901,13 +1901,19 @@ void Jit64::cntlzwx(UGeckoInstruction inst) else { gpr.Lock(a, s); - gpr.KillImmediate(s, true, false); - gpr.BindToRegister(a, (a == s), true); - BSR(32, gpr.R(a).GetSimpleReg(), gpr.R(s)); - FixupBranch gotone = J_CC(CC_NZ); - MOV(32, gpr.R(a), Imm32(63)); - SetJumpTarget(gotone); - XOR(32, gpr.R(a), Imm8(0x1f)); // flip order + gpr.BindToRegister(a, a == s, true); + if (cpu_info.bLZCNT) + { + LZCNT(32, gpr.RX(a), gpr.R(s)); + } + else + { + BSR(32, gpr.RX(a), gpr.R(s)); + FixupBranch gotone = J_CC(CC_NZ); + MOV(32, gpr.R(a), Imm32(63)); + SetJumpTarget(gotone); + XOR(32, gpr.R(a), Imm8(0x1f)); // flip order + } gpr.UnlockAll(); }