diff --git a/Source/Core/Common/x64CPUDetect.cpp b/Source/Core/Common/x64CPUDetect.cpp
index 8c3738034a..0da02d0fb4 100644
--- a/Source/Core/Common/x64CPUDetect.cpp
+++ b/Source/Core/Common/x64CPUDetect.cpp
@@ -197,6 +197,7 @@ void CPUInfo::Detect()
 		// Check for more features.
 		__cpuid(cpu_id, 0x80000001);
 		if (cpu_id[2] & 1) bLAHFSAHF64 = true;
+		if ((cpu_id[2] >> 5) & 1) bLZCNT = true;
 		if ((cpu_id[3] >> 29) & 1) bLongMode = true;
 	}
 
diff --git a/Source/Core/Common/x64Emitter.cpp b/Source/Core/Common/x64Emitter.cpp
index 77cb1f9f72..ec80600260 100644
--- a/Source/Core/Common/x64Emitter.cpp
+++ b/Source/Core/Common/x64Emitter.cpp
@@ -750,12 +750,14 @@ void XEmitter::IDIV(int bits, OpArg src) {WriteMulDivType(bits, src, 7);}
 void XEmitter::NEG(int bits, OpArg src)  {WriteMulDivType(bits, src, 3);}
 void XEmitter::NOT(int bits, OpArg src)  {WriteMulDivType(bits, src, 2);}
 
-void XEmitter::WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2)
+void XEmitter::WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep)
 {
 	_assert_msg_(DYNA_REC, !src.IsImm(), "WriteBitSearchType - Imm argument");
 	src.operandReg = (u8)dest;
 	if (bits == 16)
 		Write8(0x66);
+	if (rep)
+		Write8(0xF3);
 	src.WriteRex(this, bits, bits);
 	Write8(0x0F);
 	Write8(byte2);
@@ -772,6 +774,19 @@ void XEmitter::MOVNTI(int bits, OpArg dest, X64Reg src)
 void XEmitter::BSF(int bits, X64Reg dest, OpArg src) {WriteBitSearchType(bits,dest,src,0xBC);} //bottom bit to top bit
 void XEmitter::BSR(int bits, X64Reg dest, OpArg src) {WriteBitSearchType(bits,dest,src,0xBD);} //top bit to bottom bit
 
+void XEmitter::TZCNT(int bits, X64Reg dest, OpArg src)
+{
+	if (!cpu_info.bBMI1)
+		PanicAlert("Trying to use BMI1 on a system that doesn't support it. Bad programmer.");
+	WriteBitSearchType(bits, dest, src, 0xBC, true);
+}
+void XEmitter::LZCNT(int bits, X64Reg dest, OpArg src)
+{
+	if (!cpu_info.bLZCNT)
+		PanicAlert("Trying to use LZCNT on a system that doesn't support it. Bad programmer.");
+	WriteBitSearchType(bits, dest, src, 0xBD, true);
+}
+
 void XEmitter::MOVSX(int dbits, int sbits, X64Reg dest, OpArg src)
 {
 	_assert_msg_(DYNA_REC, !src.IsImm(), "MOVSX - Imm argument");
diff --git a/Source/Core/Common/x64Emitter.h b/Source/Core/Common/x64Emitter.h
index 228626c407..8f41065668 100644
--- a/Source/Core/Common/x64Emitter.h
+++ b/Source/Core/Common/x64Emitter.h
@@ -266,7 +266,7 @@ private:
 	void WriteSimple1Byte(int bits, u8 byte, X64Reg reg);
 	void WriteSimple2Byte(int bits, u8 byte1, u8 byte2, X64Reg reg);
 	void WriteMulDivType(int bits, OpArg src, int ext);
-	void WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2);
+	void WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep = false);
 	void WriteShift(int bits, OpArg dest, OpArg &shift, int ext);
 	void WriteBitTest(int bits, OpArg &dest, OpArg &index, int ext);
 	void WriteMXCSR(OpArg arg, int ext);
@@ -454,6 +454,11 @@ public:
 	// Available only on Atom or >= Haswell so far. Test with cpu_info.bMOVBE.
 	void MOVBE(int dbits, const OpArg& dest, const OpArg& src);
 
+	// Available only on AMD >= Phenom or Intel >= Haswell
+	void LZCNT(int bits, X64Reg dest, OpArg src);
+	// Note: this one is actually part of BMI1
+	void TZCNT(int bits, X64Reg dest, OpArg src);
+
 	// WARNING - These two take 11-13 cycles and are VectorPath! (AMD64)
 	void STMXCSR(OpArg memloc);
 	void LDMXCSR(OpArg memloc);
diff --git a/Source/UnitTests/Common/x64EmitterTest.cpp b/Source/UnitTests/Common/x64EmitterTest.cpp
index 8cbfad64fe..1f823ecd66 100644
--- a/Source/UnitTests/Common/x64EmitterTest.cpp
+++ b/Source/UnitTests/Common/x64EmitterTest.cpp
@@ -318,41 +318,35 @@ TEST_F(x64EmitterTest, CMOVcc_Register)
 	}
 }
 
-TEST_F(x64EmitterTest, BSF)
-{
-	emitter->BSF(64, R12, R(RAX));
-	emitter->BSF(32, R12, R(RAX));
-	emitter->BSF(16, R12, R(RAX));
+#define BITSEARCH_TEST(Name) \
+	TEST_F(x64EmitterTest, Name) \
+	{ \
+		struct { \
+			int bits; \
+			std::vector<NamedReg> regs; \
+			std::string size; \
+			std::string rax_name; \
+		} regsets[] = { \
+			{ 16, reg16names, "word", "ax" }, \
+			{ 32, reg32names, "dword", "eax" }, \
+			{ 64, reg64names, "qword", "rax" }, \
+		}; \
+		for (const auto& regset : regsets) \
+			for (const auto& r : regset.regs) \
+			{ \
+				emitter->Name(regset.bits, r.reg, R(RAX)); \
+				emitter->Name(regset.bits, RAX, R(r.reg)); \
+				emitter->Name(regset.bits, r.reg, MatR(RAX)); \
+				ExpectDisassembly(#Name " " + r.name + ", " + regset.rax_name + " " \
+				                  #Name " " + regset.rax_name + ", " + r.name + " " \
+				                  #Name " " + r.name + ", " + regset.size + " ptr ds:[rax] " ); \
+			} \
+	}
 
-	emitter->BSF(64, R12, MatR(RAX));
-	emitter->BSF(32, R12, MatR(RAX));
-	emitter->BSF(16, R12, MatR(RAX));
-
-	ExpectDisassembly("bsf r12, rax "
-	                  "bsf r12d, eax "
-	                  "bsf r12w, ax "
-	                  "bsf r12, qword ptr ds:[rax] "
-	                  "bsf r12d, dword ptr ds:[rax] "
-	                  "bsf r12w, word ptr ds:[rax]");
-}
-
-TEST_F(x64EmitterTest, BSR)
-{
-	emitter->BSR(64, R12, R(RAX));
-	emitter->BSR(32, R12, R(RAX));
-	emitter->BSR(16, R12, R(RAX));
-
-	emitter->BSR(64, R12, MatR(RAX));
-	emitter->BSR(32, R12, MatR(RAX));
-	emitter->BSR(16, R12, MatR(RAX));
-
-	ExpectDisassembly("bsr r12, rax "
-	                  "bsr r12d, eax "
-	                  "bsr r12w, ax "
-	                  "bsr r12, qword ptr ds:[rax] "
-	                  "bsr r12d, dword ptr ds:[rax] "
-	                  "bsr r12w, word ptr ds:[rax]");
-}
+BITSEARCH_TEST(BSR);
+BITSEARCH_TEST(BSF);
+BITSEARCH_TEST(LZCNT);
+BITSEARCH_TEST(TZCNT);
 
 TEST_F(x64EmitterTest, PREFETCH)
 {