// Copyright 2014 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.

#include <cctype>
#include <cstring>
#include <disasm.h>  // From Bochs, fallback included in Externals.
#include <map>
#include <memory>
#include <vector>
#include <gtest/gtest.h>

// gtest defines the TEST macro to generate test case functions. It conflicts
// with the TEST method in the x64Emitter.
//
// Since we use TEST_F in this file to attach the test cases to a fixture, we
// can get away with simply undef'ing TEST. Phew.
#undef TEST

#include "Common/CPUDetect.h"
#include "Common/x64Emitter.h"

namespace Gen
{

struct NamedReg
{
	X64Reg reg;
	std::string name;
};

const std::vector<NamedReg> reg8names {
	{ RAX, "al"   }, { RBX, "bl"   }, { RCX, "cl"   }, { RDX, "dl"   },
	{ RSI, "sil"  }, { RDI, "dil"  }, { RBP, "bpl"  }, { RSP, "spl"  },
	{ R8,  "r8b"  }, { R9,  "r9b"  }, { R10, "r10b" }, { R11, "r11b" },
	{ R12, "r12b" }, { R13, "r13b" }, { R14, "r14b" }, { R15, "r15b" },
};

const std::vector<NamedReg> reg8hnames {
	{ AH, "ah" }, { BH, "bh" }, { CH, "ch" }, { DH, "dh" },
};

const std::vector<NamedReg> reg16names {
	{ RAX, "ax"   }, { RBX, "bx"   }, { RCX, "cx"   }, { RDX, "dx"   },
	{ RSI, "si"   }, { RDI, "di"   }, { RBP, "bp"   }, { RSP, "sp"   },
	{ R8,  "r8w"  }, { R9,  "r9w"  }, { R10, "r10w" }, { R11, "r11w" },
	{ R12, "r12w" }, { R13, "r13w" }, { R14, "r14w" }, { R15, "r15w" },
};

const std::vector<NamedReg> reg32names {
	{ RAX, "eax"  }, { RBX, "ebx"  }, { RCX, "ecx"  }, { RDX, "edx"  },
	{ RSI, "esi"  }, { RDI, "edi"  }, { RBP, "ebp"  }, { RSP, "esp"  },
	{ R8,  "r8d"  }, { R9,  "r9d"  }, { R10, "r10d" }, { R11, "r11d" },
	{ R12, "r12d" }, { R13, "r13d" }, { R14, "r14d" }, { R15, "r15d" },
};

const std::vector<NamedReg> reg64names {
	{ RAX, "rax" }, { RBX, "rbx" }, { RCX, "rcx" }, { RDX, "rdx" },
	{ RSI, "rsi" }, { RDI, "rdi" }, { RBP, "rbp" }, { RSP, "rsp" },
	{ R8,  "r8"  }, { R9,  "r9"  }, { R10, "r10" }, { R11, "r11" },
	{ R12, "r12" }, { R13, "r13" }, { R14, "r14" }, { R15, "r15" },
};

const std::vector<NamedReg> xmmnames {
	{ XMM0,  "xmm0"  }, { XMM1,  "xmm1"  }, { XMM2,  "xmm2"  }, { XMM3,  "xmm3"  },
	{ XMM4,  "xmm4"  }, { XMM5,  "xmm5"  }, { XMM6,  "xmm6"  }, { XMM7,  "xmm7"  },
	{ XMM8,  "xmm8"  }, { XMM9,  "xmm9"  }, { XMM10, "xmm10" }, { XMM11, "xmm11" },
	{ XMM12, "xmm12" }, { XMM13, "xmm13" }, { XMM14, "xmm14" }, { XMM15, "xmm15" },
};

const std::vector<NamedReg> ymmnames {
	{ YMM0,  "ymm0"  }, { YMM1,  "ymm1"  }, { YMM2,  "ymm2"  }, { YMM3,  "ymm3"  },
	{ YMM4,  "ymm4"  }, { YMM5,  "ymm5"  }, { YMM6,  "ymm6"  }, { YMM7,  "ymm7"  },
	{ YMM8,  "ymm8"  }, { YMM9,  "ymm9"  }, { YMM10, "ymm10" }, { YMM11, "ymm11" },
	{ YMM12, "ymm12" }, { YMM13, "ymm13" }, { YMM14, "ymm14" }, { YMM15, "ymm15" },
};

struct { CCFlags cc; std::string name; } ccnames[] = {
	{ CC_O, "o" }, { CC_NO, "no" },
	{ CC_B, "b" }, { CC_NB, "nb" },
	{ CC_Z, "z" }, { CC_NZ, "nz" },
	{ CC_BE, "be" }, { CC_NBE, "nbe" },
	{ CC_S, "s" }, { CC_NS, "ns" },
	{ CC_P, "p" }, { CC_NP, "np" },
	{ CC_L, "l" }, { CC_NL, "nl" },
	{ CC_LE, "le" }, { CC_NLE, "nle" },
};

class x64EmitterTest : public testing::Test
{
protected:
	void SetUp() override
	{
		memset(&cpu_info, 0xFF, sizeof (cpu_info));

		emitter.reset(new X64CodeBlock());
		emitter->AllocCodeSpace(4096);
		code_buffer = emitter->GetWritableCodePtr();

		disasm.reset(new disassembler);
		disasm->set_syntax_intel();
	}

	void TearDown() override
	{
		cpu_info = CPUInfo();
	}

	void ExpectDisassembly(const std::string& expected)
	{
		std::string disasmed;
		const u8* generated_code_iterator = code_buffer;
		while (generated_code_iterator < emitter->GetCodePtr())
		{
			char instr_buffer[1024] = "";
			generated_code_iterator += disasm->disasm64(
					(u64)generated_code_iterator,
					(u64)generated_code_iterator,
					generated_code_iterator, instr_buffer);
			disasmed += instr_buffer;
			disasmed += "\n";
		}

		auto NormalizeAssembly = [](const std::string& str) -> std::string {
			// Normalize assembly code to make it suitable for equality checks.
			// In particular:
			//   * Replace all whitespace characters by a single space.
			//   * Remove leading and trailing whitespaces.
			//   * Lowercase everything.
			//   * Remove all (0x...) addresses.
			std::string out;
			bool previous_was_space = false;
			bool inside_parens = false;
			for (auto c : str)
			{
				c = tolower(c);
				if (c == '(')
				{
					inside_parens = true;
					continue;
				}
				else if (inside_parens)
				{
					if (c == ')')
						inside_parens = false;
					continue;
				}
				else if (isspace(c))
				{
					previous_was_space = true;
					continue;
				}
				else if (previous_was_space)
				{
					previous_was_space = false;
					if (!out.empty())
						out += ' ';
				}
				out += c;
			}
			return out;
		};
		std::string expected_norm = NormalizeAssembly(expected);
		std::string disasmed_norm = NormalizeAssembly(disasmed);

		EXPECT_EQ(expected_norm, disasmed_norm);

		// Reset code buffer afterwards.
		emitter->SetCodePtr(code_buffer);
	}

	std::unique_ptr<X64CodeBlock> emitter;
	std::unique_ptr<disassembler> disasm;
	u8* code_buffer;
};

#define TEST_INSTR_NO_OPERANDS(Name, ExpectedDisasm) \
	TEST_F(x64EmitterTest, Name) \
	{ \
		emitter->Name(); \
		ExpectDisassembly(ExpectedDisasm); \
	}

TEST_INSTR_NO_OPERANDS(INT3, "int3")
TEST_INSTR_NO_OPERANDS(NOP, "nop")
TEST_INSTR_NO_OPERANDS(PAUSE, "pause")
TEST_INSTR_NO_OPERANDS(STC, "stc")
TEST_INSTR_NO_OPERANDS(CLC, "clc")
TEST_INSTR_NO_OPERANDS(CMC, "cmc")
TEST_INSTR_NO_OPERANDS(LAHF, "lahf")
TEST_INSTR_NO_OPERANDS(SAHF, "sahf")
TEST_INSTR_NO_OPERANDS(PUSHF, "pushf")
TEST_INSTR_NO_OPERANDS(POPF, "popf")
TEST_INSTR_NO_OPERANDS(RET, "ret")
TEST_INSTR_NO_OPERANDS(RET_FAST, "rep ret")
TEST_INSTR_NO_OPERANDS(UD2, "ud2a")
TEST_INSTR_NO_OPERANDS(JMPself, "jmp .-2")
TEST_INSTR_NO_OPERANDS(LFENCE, "lfence")
TEST_INSTR_NO_OPERANDS(MFENCE, "mfence")
TEST_INSTR_NO_OPERANDS(SFENCE, "sfence")
TEST_INSTR_NO_OPERANDS(CWD, "cwd")
TEST_INSTR_NO_OPERANDS(CDQ, "cdq")
TEST_INSTR_NO_OPERANDS(CQO, "cqo")
TEST_INSTR_NO_OPERANDS(CBW, "cbw")
TEST_INSTR_NO_OPERANDS(CWDE, "cwde")
TEST_INSTR_NO_OPERANDS(CDQE, "cdqe")
TEST_INSTR_NO_OPERANDS(XCHG_AHAL, "xchg al, ah")
TEST_INSTR_NO_OPERANDS(FWAIT, "fwait")
TEST_INSTR_NO_OPERANDS(FNSTSW_AX, "fnstsw ax")
TEST_INSTR_NO_OPERANDS(RDTSC, "rdtsc")

TEST_F(x64EmitterTest, NOP_MultiByte)
{
	// 2 bytes is "rep nop", still a simple nop.
	emitter->NOP(2);
	ExpectDisassembly("nop");

	for (int i = 3; i <= 11; ++i)
	{
		emitter->NOP(i);
		ExpectDisassembly("multibyte nop");
	}

	// Larger NOPs are split into several NOPs.
	emitter->NOP(20);
	ExpectDisassembly("multibyte nop "
	                  "multibyte nop");
}

TEST_F(x64EmitterTest, PUSH_Register)
{
	for (const auto& r : reg64names)
	{
		emitter->PUSH(r.reg);
		ExpectDisassembly("push " + r.name);
	}
}

TEST_F(x64EmitterTest, PUSH_Immediate)
{
	emitter->PUSH(64, Imm8(0xf0));
	ExpectDisassembly("push 0xfffffffffffffff0");

	// X64 is weird like that... this pushes 2 bytes, not 8 bytes with sext.
	emitter->PUSH(64, Imm16(0xe0f0));
	ExpectDisassembly("push 0xe0f0");

	emitter->PUSH(64, Imm32(0xc0d0e0f0));
	ExpectDisassembly("push 0xffffffffc0d0e0f0");
}

TEST_F(x64EmitterTest, PUSH_MComplex)
{
	emitter->PUSH(64, MComplex(RAX, RBX, SCALE_2, 4));
	ExpectDisassembly("push qword ptr ds:[rax+rbx*2+4]");
}

TEST_F(x64EmitterTest, POP_Register)
{
	for (const auto& r : reg64names)
	{
		emitter->POP(r.reg);
		ExpectDisassembly("pop " + r.name);
	}
}

TEST_F(x64EmitterTest, JMP)
{
	emitter->NOP(6);
	emitter->JMP(code_buffer);
	ExpectDisassembly("multibyte nop "
	                  "jmp .-8");

	emitter->NOP(6);
	emitter->JMP(code_buffer, true);
	ExpectDisassembly("multibyte nop "
	                  "jmp .-11");
}

TEST_F(x64EmitterTest, JMPptr_Register)
{
	for (const auto& r : reg64names)
	{
		emitter->JMPptr(R(r.reg));
		ExpectDisassembly("jmp " + r.name);
	}
}

// TODO: J/SetJumpTarget

// TODO: CALL

// TODO: J_CC

TEST_F(x64EmitterTest, SETcc)
{
	for (const auto& cc : ccnames)
	{
		for (const auto& r : reg8names)
		{
			emitter->SETcc(cc.cc, R(r.reg));
			ExpectDisassembly("set" + cc.name + " " + r.name);
		}
		for (const auto& r : reg8hnames)
		{
			emitter->SETcc(cc.cc, R(r.reg));
			ExpectDisassembly("set" + cc.name + " " + r.name);
		}
	}
}

TEST_F(x64EmitterTest, CMOVcc_Register)
{
	for (const auto& cc : ccnames)
	{
		emitter->CMOVcc(64, RAX, R(R12), cc.cc);
		emitter->CMOVcc(32, RAX, R(R12), cc.cc);
		emitter->CMOVcc(16, RAX, R(R12), cc.cc);

		ExpectDisassembly("cmov" + cc.name + " rax, r12 "
		                  "cmov" + cc.name + " eax, r12d "
		                  "cmov" + cc.name + " ax, r12w");
	}
}

#define BITSEARCH_TEST(Name) \
	TEST_F(x64EmitterTest, Name) \
	{ \
		struct { \
			int bits; \
			std::vector<NamedReg> regs; \
			std::string size; \
			std::string rax_name; \
		} regsets[] = { \
			{ 16, reg16names, "word", "ax" }, \
			{ 32, reg32names, "dword", "eax" }, \
			{ 64, reg64names, "qword", "rax" }, \
		}; \
		for (const auto& regset : regsets) \
			for (const auto& r : regset.regs) \
			{ \
				emitter->Name(regset.bits, r.reg, R(RAX)); \
				emitter->Name(regset.bits, RAX, R(r.reg)); \
				emitter->Name(regset.bits, r.reg, MatR(RAX)); \
				ExpectDisassembly(#Name " " + r.name + ", " + regset.rax_name + " " \
				                  #Name " " + regset.rax_name + ", " + r.name + " " \
				                  #Name " " + r.name + ", " + regset.size + " ptr ds:[rax] " ); \
			} \
	}

BITSEARCH_TEST(BSR);
BITSEARCH_TEST(BSF);
BITSEARCH_TEST(LZCNT);
BITSEARCH_TEST(TZCNT);

TEST_F(x64EmitterTest, PREFETCH)
{
	emitter->PREFETCH(XEmitter::PF_NTA, MatR(R12));
	emitter->PREFETCH(XEmitter::PF_T0, MatR(R12));
	emitter->PREFETCH(XEmitter::PF_T1, MatR(R12));
	emitter->PREFETCH(XEmitter::PF_T2, MatR(R12));

	ExpectDisassembly("prefetchnta byte ptr ds:[r12] "
	                  "prefetcht0 byte ptr ds:[r12] "
	                  "prefetcht1 byte ptr ds:[r12] "
	                  "prefetcht2 byte ptr ds:[r12]");
}

TEST_F(x64EmitterTest, MOVNTI)
{
	emitter->MOVNTI(32, MatR(RAX), R12);
	emitter->MOVNTI(32, M(code_buffer), R12);
	emitter->MOVNTI(64, MatR(RAX), R12);
	emitter->MOVNTI(64, M(code_buffer), R12);

	ExpectDisassembly("movnti dword ptr ds:[rax], r12d "
	                  "movnti dword ptr ds:[rip-12], r12d "
	                  "movnti qword ptr ds:[rax], r12 "
	                  "movnti qword ptr ds:[rip-24], r12");
}

// Grouped together since these 3 instructions do exactly the same thing.
TEST_F(x64EmitterTest, MOVNT_DQ_PS_PD)
{
	for (const auto& r : xmmnames)
	{
		emitter->MOVNTDQ(MatR(RAX), r.reg);
		emitter->MOVNTPS(MatR(RAX), r.reg);
		emitter->MOVNTPD(MatR(RAX), r.reg);
		ExpectDisassembly("movntdq dqword ptr ds:[rax], " + r.name + " "
		                  "movntps dqword ptr ds:[rax], " + r.name + " "
		                  "movntpd dqword ptr ds:[rax], " + r.name);
	}
}

#define MUL_DIV_TEST(Name) \
	TEST_F(x64EmitterTest, Name) \
	{ \
		struct { \
			int bits; \
			std::vector<NamedReg> regs; \
			std::string out_name; \
		} regsets[] = { \
			{ 8, reg8names, "al" }, \
			{ 8, reg8hnames, "al" }, \
			{ 16, reg16names, "ax" }, \
			{ 32, reg32names, "eax" }, \
			{ 64, reg64names, "rax" }, \
		}; \
		for (const auto& regset : regsets) \
			for (const auto& r : regset.regs) \
			{ \
				emitter->Name(regset.bits, R(r.reg)); \
				ExpectDisassembly(#Name " " + regset.out_name + ", " + r.name); \
			} \
	}

MUL_DIV_TEST(MUL)
MUL_DIV_TEST(IMUL)
MUL_DIV_TEST(DIV)
MUL_DIV_TEST(IDIV)

// TODO: More complex IMUL variants.

#define SHIFT_TEST(Name) \
	TEST_F(x64EmitterTest, Name) \
	{ \
		struct { \
			int bits; \
			std::vector<NamedReg> regs; \
		} regsets[] = { \
			{ 8, reg8names }, \
			{ 8, reg8hnames }, \
			{ 16, reg16names }, \
			{ 32, reg32names }, \
			{ 64, reg64names }, \
		}; \
		for (const auto& regset : regsets) \
			for (const auto& r : regset.regs) \
			{ \
				emitter->Name(regset.bits, R(r.reg), Imm8(1)); \
				emitter->Name(regset.bits, R(r.reg), Imm8(4)); \
				emitter->Name(regset.bits, R(r.reg), R(CL)); \
				ExpectDisassembly(#Name " " + r.name + ", 1 " \
				                  #Name " " + r.name + ", 0x04 " \
				                  #Name " " + r.name + ", cl"); \
			} \
	}

SHIFT_TEST(ROL)
SHIFT_TEST(ROR)
SHIFT_TEST(RCL)
SHIFT_TEST(RCR)
SHIFT_TEST(SHL)
SHIFT_TEST(SHR)
SHIFT_TEST(SAR)

#define BT_TEST(Name) \
	TEST_F(x64EmitterTest, Name) \
	{ \
		struct { \
			int bits; \
			std::vector<NamedReg> regs; \
			std::string out_name; \
			std::string size; \
		} regsets[] = { \
			{ 16, reg16names, "ax", "word" }, \
			{ 32, reg32names, "eax", "dword" }, \
			{ 64, reg64names, "rax", "qword" }, \
		}; \
		for (const auto& regset : regsets) \
			for (const auto& r : regset.regs) \
			{ \
				emitter->Name(regset.bits, R(r.reg), R(RAX)); \
				emitter->Name(regset.bits, R(RAX), R(r.reg)); \
				emitter->Name(regset.bits, R(r.reg), Imm8(0x42)); \
				emitter->Name(regset.bits, MatR(R12), R(r.reg)); \
				ExpectDisassembly(#Name " " + r.name + ", " + regset.out_name + " " \
				                  #Name " " + regset.out_name + ", " + r.name + " " \
				                  #Name " " + r.name + ", 0x42 " \
				                  #Name " " + regset.size + " ptr ds:[r12], " + r.name); \
			} \
	}

BT_TEST(BT)
BT_TEST(BTS)
BT_TEST(BTR)
BT_TEST(BTC)

// TODO: LEA tests

#define ONE_OP_ARITH_TEST(Name) \
	TEST_F(x64EmitterTest, Name) \
	{ \
		struct { \
			int bits; \
			std::vector<NamedReg> regs; \
			std::string size; \
		} regsets[] = { \
			{ 8, reg8names, "byte" }, \
			{ 8, reg8hnames, "byte" }, \
			{ 16, reg16names, "word" }, \
			{ 32, reg32names, "dword" }, \
			{ 64, reg64names, "qword" }, \
		}; \
		for (const auto& regset : regsets) \
			for (const auto& r : regset.regs) \
			{ \
				emitter->Name(regset.bits, R(r.reg)); \
				emitter->Name(regset.bits, MatR(RAX)); \
				emitter->Name(regset.bits, MatR(R12)); \
				ExpectDisassembly(#Name " " + r.name + " " \
				                  #Name " " + regset.size + " ptr ds:[rax] " \
				                  #Name " " + regset.size + " ptr ds:[r12]"); \
			} \
	}

ONE_OP_ARITH_TEST(NOT)
ONE_OP_ARITH_TEST(NEG)

#define TWO_OP_ARITH_TEST(Name) \
	TEST_F(x64EmitterTest, Name) \
	{ \
		struct { \
			int bits; \
			std::vector<NamedReg> regs; \
			std::string size; \
			std::string rax_name; \
			Gen::OpArg imm; \
			std::string immname; \
		} regsets[] = { \
			{ 8, reg8names, "byte", "al", Imm8(0xEF), "0xef" }, \
			{ 8, reg8hnames, "byte", "al", Imm8(0xEF), "0xef" }, \
			{ 16, reg16names, "word", "ax", Imm16(0xBEEF), "0xbeef" }, \
			{ 32, reg32names, "dword", "eax", Imm32(0xDEADBEEF), "0xdeadbeef" }, \
			{ 64, reg64names, "qword", "rax", Imm32(0xDEADBEEF), "0xffffffffdeadbeef" }, \
		}; \
		for (const auto& regset : regsets) \
			for (const auto& r : regset.regs) \
			{ \
				emitter->Name(regset.bits, R(r.reg), R(RAX)); \
				emitter->Name(regset.bits, R(RAX), R(r.reg)); \
				emitter->Name(regset.bits, R(r.reg), MatR(RAX)); \
				emitter->Name(regset.bits, MatR(RAX), R(r.reg)); \
				emitter->Name(regset.bits, R(r.reg), regset.imm); \
				ExpectDisassembly(#Name " " + r.name + ", " + regset.rax_name + " " \
				                  #Name " " + regset.rax_name + ", " + r.name + " " \
				                  #Name " " + r.name + ", " + regset.size + " ptr ds:[rax] " \
				                  #Name " " + regset.size + " ptr ds:[rax], " + r.name + " " \
				                  #Name " " + r.name + ", " + regset.immname ); \
			} \
	}

TWO_OP_ARITH_TEST(ADD)
TWO_OP_ARITH_TEST(ADC)
TWO_OP_ARITH_TEST(SUB)
TWO_OP_ARITH_TEST(SBB)
TWO_OP_ARITH_TEST(AND)
TWO_OP_ARITH_TEST(CMP)
TWO_OP_ARITH_TEST(OR)
TWO_OP_ARITH_TEST(XOR)
TWO_OP_ARITH_TEST(MOV)

// TODO: Disassembler inverts operands here.
// TWO_OP_ARITH_TEST(XCHG)
// TWO_OP_ARITH_TEST(TEST)

TEST_F(x64EmitterTest, BSWAP)
{
	struct {
		int bits;
		std::vector<NamedReg> regs;
	} regsets[] = {
		{ 32, reg32names },
		{ 64, reg64names },
	};
	for (const auto& regset : regsets)
		for (const auto& r : regset.regs)
		{
			emitter->BSWAP(regset.bits, r.reg);
			ExpectDisassembly("bswap " + r.name);
		}
}

TEST_F(x64EmitterTest, MOVSX)
{
	emitter->MOVSX(16, 8, RAX, R(AH));
	emitter->MOVSX(32, 8, RAX, R(R12));
	emitter->MOVSX(32, 16, R12, R(RBX));
	emitter->MOVSX(64, 8, R12, R(RBX));
	emitter->MOVSX(64, 16, RAX, R(R12));
	emitter->MOVSX(64, 32, R12, R(RSP));
	ExpectDisassembly("movsx ax, ah "
	                  "movsx eax, r12b "
	                  "movsx r12d, bx "
	                  "movsx r12, bl "
	                  "movsx rax, r12w "
	                  "movsxd r12, esp");
}

TEST_F(x64EmitterTest, MOVZX)
{
	emitter->MOVZX(16, 8, RAX, R(AH));
	emitter->MOVZX(32, 8, R12, R(RBP));
	emitter->MOVZX(64, 8, R12, R(RDI));
	emitter->MOVZX(32, 16, RAX, R(R12));
	emitter->MOVZX(64, 16, RCX, R(RSI));
	ExpectDisassembly("movzx ax, ah "
	                  "movzx r12d, bpl "
	                  "movzx r12d, dil "     // Generates 32 bit movzx
	                  "movzx eax, r12w "
	                  "movzx ecx, si");
}

TEST_F(x64EmitterTest, MOVBE)
{
	emitter->MOVBE(16, RAX, MatR(R12));
	emitter->MOVBE(16, MatR(RAX), R12);
	emitter->MOVBE(32, RAX, MatR(R12));
	emitter->MOVBE(32, MatR(RAX), R12);
	emitter->MOVBE(64, RAX, MatR(R12));
	emitter->MOVBE(64, MatR(RAX), R12);
	ExpectDisassembly("movbe ax, word ptr ds:[r12] "
	                  "movbe word ptr ds:[rax], r12w "
	                  "movbe eax, dword ptr ds:[r12] "
	                  "movbe dword ptr ds:[rax], r12d "
	                  "movbe rax, qword ptr ds:[r12] "
	                  "movbe qword ptr ds:[rax], r12");
}

TEST_F(x64EmitterTest, STMXCSR)
{
	emitter->STMXCSR(MatR(R12));
	ExpectDisassembly("stmxcsr dword ptr ds:[r12]");
}

TEST_F(x64EmitterTest, LDMXCSR)
{
	emitter->LDMXCSR(MatR(R12));
	ExpectDisassembly("ldmxcsr dword ptr ds:[r12]");
}

TEST_F(x64EmitterTest, FLD_FST_FSTP)
{
	emitter->FLD(32, MatR(RBP));
	emitter->FLD(64, MatR(RBP));
	emitter->FLD(80, MatR(RBP));

	emitter->FST(32, MatR(RBP));
	emitter->FST(64, MatR(RBP));
	// No 80 bit version of FST

	emitter->FSTP(32, MatR(RBP));
	emitter->FSTP(64, MatR(RBP));
	emitter->FSTP(80, MatR(RBP));

	ExpectDisassembly("fld dword ptr ss:[rbp] "
	                  "fld qword ptr ss:[rbp] "
	                  "fld tbyte ptr ss:[rbp] "
	                  "fst dword ptr ss:[rbp] "
	                  "fst qword ptr ss:[rbp] "
	                  "fstp dword ptr ss:[rbp] "
	                  "fstp qword ptr ss:[rbp] "
	                  "fstp tbyte ptr ss:[rbp]");
}

#define TWO_OP_SSE_TEST(Name, MemBits) \
	TEST_F(x64EmitterTest, Name) \
	{ \
		for (const auto& r1 : xmmnames) \
		{ \
			for (const auto& r2 : xmmnames) \
			{ \
				emitter->Name(r1.reg, R(r2.reg)); \
				ExpectDisassembly(#Name " " + r1.name + ", " + r2.name); \
			} \
			emitter->Name(r1.reg, MatR(R12)); \
			ExpectDisassembly(#Name " " + r1.name + ", " MemBits " ptr ds:[r12]"); \
		} \
	}

TWO_OP_SSE_TEST(ADDSS, "dword")
TWO_OP_SSE_TEST(SUBSS, "dword")
TWO_OP_SSE_TEST(MULSS, "dword")
TWO_OP_SSE_TEST(DIVSS, "dword")
TWO_OP_SSE_TEST(MINSS, "dword")
TWO_OP_SSE_TEST(MAXSS, "dword")
TWO_OP_SSE_TEST(SQRTSS, "dword")
TWO_OP_SSE_TEST(RSQRTSS, "dword")

TWO_OP_SSE_TEST(ADDSD, "qword")
TWO_OP_SSE_TEST(SUBSD, "qword")
TWO_OP_SSE_TEST(MULSD, "qword")
TWO_OP_SSE_TEST(DIVSD, "qword")
TWO_OP_SSE_TEST(MINSD, "qword")
TWO_OP_SSE_TEST(MAXSD, "qword")
TWO_OP_SSE_TEST(SQRTSD, "qword")

TWO_OP_SSE_TEST(ADDPS, "dqword")
TWO_OP_SSE_TEST(SUBPS, "dqword")
TWO_OP_SSE_TEST(MULPS, "dqword")
TWO_OP_SSE_TEST(DIVPS, "dqword")
TWO_OP_SSE_TEST(MINPS, "dqword")
TWO_OP_SSE_TEST(MAXPS, "dqword")
TWO_OP_SSE_TEST(SQRTPS, "dqword")
TWO_OP_SSE_TEST(RSQRTPS, "dqword")
TWO_OP_SSE_TEST(ANDPS, "dqword")
TWO_OP_SSE_TEST(ANDNPS, "dqword")
TWO_OP_SSE_TEST(ORPS, "dqword")
TWO_OP_SSE_TEST(XORPS, "dqword")

TWO_OP_SSE_TEST(ADDPD, "dqword")
TWO_OP_SSE_TEST(SUBPD, "dqword")
TWO_OP_SSE_TEST(MULPD, "dqword")
TWO_OP_SSE_TEST(DIVPD, "dqword")
TWO_OP_SSE_TEST(MINPD, "dqword")
TWO_OP_SSE_TEST(MAXPD, "dqword")
TWO_OP_SSE_TEST(SQRTPD, "dqword")
TWO_OP_SSE_TEST(ANDPD, "dqword")
TWO_OP_SSE_TEST(ANDNPD, "dqword")
TWO_OP_SSE_TEST(ORPD, "dqword")
TWO_OP_SSE_TEST(XORPD, "dqword")

TWO_OP_SSE_TEST(MOVSLDUP, "dqword")
TWO_OP_SSE_TEST(MOVSHDUP, "dqword")
TWO_OP_SSE_TEST(MOVDDUP, "qword")

TWO_OP_SSE_TEST(UNPCKLPS, "dqword")
TWO_OP_SSE_TEST(UNPCKHPS, "dqword")
TWO_OP_SSE_TEST(UNPCKLPD, "dqword")
TWO_OP_SSE_TEST(UNPCKHPD, "dqword")

TWO_OP_SSE_TEST(COMISS, "dword")
TWO_OP_SSE_TEST(UCOMISS, "dword")
TWO_OP_SSE_TEST(COMISD, "qword")
TWO_OP_SSE_TEST(UCOMISD, "qword")

// register-only instructions
#define TWO_OP_SSE_REG_TEST(Name, MemBits) \
	TEST_F(x64EmitterTest, Name) \
	{ \
		for (const auto& r1 : xmmnames) \
		{ \
			for (const auto& r2 : xmmnames) \
			{ \
				emitter->Name(r1.reg, r2.reg); \
				ExpectDisassembly(#Name " " + r1.name + ", " + r2.name); \
			} \
		} \
	}

TWO_OP_SSE_REG_TEST(MOVHLPS, "qword")
TWO_OP_SSE_REG_TEST(MOVLHPS, "qword")

// "register + memory"-only instructions
#define TWO_OP_SSE_MEM_TEST(Name, MemBits) \
	TEST_F(x64EmitterTest, Name) \
	{ \
		for (const auto& r1 : xmmnames) \
		{ \
			emitter->Name(r1.reg, MatR(R12)); \
			ExpectDisassembly(#Name " " + r1.name + ", " MemBits " ptr ds:[r12]"); \
			emitter->Name(MatR(R12), r1.reg); \
			ExpectDisassembly(#Name " " MemBits " ptr ds:[r12], " + r1.name); \
		} \
	}

TWO_OP_SSE_MEM_TEST(MOVLPS, "qword")
TWO_OP_SSE_MEM_TEST(MOVHPS, "qword")
TWO_OP_SSE_MEM_TEST(MOVLPD, "qword")
TWO_OP_SSE_MEM_TEST(MOVHPD, "qword")

// TODO: CMPSS/SD
// TODO: SHUFPS/PD
// TODO: more SSE MOVs
// TODO: MOVMSK

TEST_F(x64EmitterTest, MASKMOVDQU)
{
	for (const auto& r1 : xmmnames)
	{
		for (const auto& r2 : xmmnames)
		{
			emitter->MASKMOVDQU(r1.reg, r2.reg);
			ExpectDisassembly("maskmovdqu " + r1.name + ", " + r2.name + ", dqword ptr ds:[rdi]");
		}
	}
}

TEST_F(x64EmitterTest, LDDQU)
{
	for (const auto& r : xmmnames)
	{
		emitter->LDDQU(r.reg, MatR(R12));
		ExpectDisassembly("lddqu " + r.name + ", dqword ptr ds:[r12]");
	}
}

TWO_OP_SSE_TEST(CVTPS2PD, "dqword")
TWO_OP_SSE_TEST(CVTPD2PS, "dqword")
TWO_OP_SSE_TEST(CVTSS2SD, "dword")
TWO_OP_SSE_TEST(CVTSD2SS, "qword")
TWO_OP_SSE_TEST(CVTDQ2PD, "qword")
TWO_OP_SSE_TEST(CVTPD2DQ, "dqword")
TWO_OP_SSE_TEST(CVTDQ2PS, "dqword")
TWO_OP_SSE_TEST(CVTPS2DQ, "dqword")
TWO_OP_SSE_TEST(CVTTPS2DQ, "dqword")
TWO_OP_SSE_TEST(CVTTPD2DQ, "dqword")

// TODO: CVT2SI

TWO_OP_SSE_TEST(PACKSSDW, "dqword")
TWO_OP_SSE_TEST(PACKSSWB, "dqword")
TWO_OP_SSE_TEST(PACKUSDW, "dqword")
TWO_OP_SSE_TEST(PACKUSWB, "dqword")

TWO_OP_SSE_TEST(PUNPCKLBW, "dqword")
TWO_OP_SSE_TEST(PUNPCKLWD, "dqword")
TWO_OP_SSE_TEST(PUNPCKLDQ, "dqword")
TWO_OP_SSE_TEST(PUNPCKLQDQ, "dqword")

TWO_OP_SSE_TEST(PTEST, "dqword")
TWO_OP_SSE_TEST(PAND, "dqword")
TWO_OP_SSE_TEST(PANDN, "dqword")
TWO_OP_SSE_TEST(POR, "dqword")
TWO_OP_SSE_TEST(PXOR, "dqword")
TWO_OP_SSE_TEST(PADDB, "dqword")
TWO_OP_SSE_TEST(PADDW, "dqword")
TWO_OP_SSE_TEST(PADDD, "dqword")
TWO_OP_SSE_TEST(PADDQ, "dqword")
TWO_OP_SSE_TEST(PADDSB, "dqword")
TWO_OP_SSE_TEST(PADDSW, "dqword")
TWO_OP_SSE_TEST(PADDUSB, "dqword")
TWO_OP_SSE_TEST(PADDUSW, "dqword")
TWO_OP_SSE_TEST(PSUBB, "dqword")
TWO_OP_SSE_TEST(PSUBW, "dqword")
TWO_OP_SSE_TEST(PSUBD, "dqword")
TWO_OP_SSE_TEST(PSUBQ, "dqword")
TWO_OP_SSE_TEST(PSUBUSB, "dqword")
TWO_OP_SSE_TEST(PSUBUSW, "dqword")
TWO_OP_SSE_TEST(PAVGB, "dqword")
TWO_OP_SSE_TEST(PAVGW, "dqword")
TWO_OP_SSE_TEST(PCMPEQB, "dqword")
TWO_OP_SSE_TEST(PCMPEQW, "dqword")
TWO_OP_SSE_TEST(PCMPEQD, "dqword")
TWO_OP_SSE_TEST(PCMPGTB, "dqword")
TWO_OP_SSE_TEST(PCMPGTW, "dqword")
TWO_OP_SSE_TEST(PCMPGTD, "dqword")
TWO_OP_SSE_TEST(PMADDWD, "dqword")
TWO_OP_SSE_TEST(PSADBW, "dqword")
TWO_OP_SSE_TEST(PMAXSW, "dqword")
TWO_OP_SSE_TEST(PMAXUB, "dqword")
TWO_OP_SSE_TEST(PMINSW, "dqword")
TWO_OP_SSE_TEST(PMINUB, "dqword")
TWO_OP_SSE_TEST(PSHUFB, "dqword")

// TODO: PEXT/INS/SHUF/MOVMSK

TWO_OP_SSE_TEST(PMOVSXBW, "qword")
TWO_OP_SSE_TEST(PMOVSXBD, "dword")
TWO_OP_SSE_TEST(PMOVSXBQ, "word")
TWO_OP_SSE_TEST(PMOVSXWD, "qword")
TWO_OP_SSE_TEST(PMOVSXWQ, "dword")
TWO_OP_SSE_TEST(PMOVSXDQ, "qword")

TWO_OP_SSE_TEST(PMOVZXBW, "qword")
TWO_OP_SSE_TEST(PMOVZXBD, "dword")
TWO_OP_SSE_TEST(PMOVZXBQ, "word")
TWO_OP_SSE_TEST(PMOVZXWD, "qword")
TWO_OP_SSE_TEST(PMOVZXWQ, "dword")
TWO_OP_SSE_TEST(PMOVZXDQ, "qword")

// TODO: BLEND

// TODO: AVX

// for VEX GPR instructions that take the form op reg, r/m, reg
#define VEX_RMR_TEST(Name) \
	TEST_F(x64EmitterTest, Name) \
	{ \
		struct { \
			int bits; \
			std::vector<NamedReg> regs; \
			std::string out_name; \
			std::string size; \
		} regsets[] = { \
			{ 32, reg32names, "eax", "dword" }, \
			{ 64, reg64names, "rax", "qword" }, \
		}; \
		for (const auto& regset : regsets) \
			for (const auto& r : regset.regs) \
			{ \
				emitter->Name(regset.bits, r.reg, R(RAX), RAX); \
				emitter->Name(regset.bits, RAX, R(r.reg), RAX); \
				emitter->Name(regset.bits, RAX, MatR(R12), r.reg); \
				ExpectDisassembly(#Name " " + r.name + ", " + regset.out_name + ", " + regset.out_name + " " \
				                  #Name " " + regset.out_name + ", " + r.name + ", " + regset.out_name + " " \
				                  #Name " " + regset.out_name + ", " + regset.size + " ptr ds:[r12], " + r.name + " "); \
			} \
	}

VEX_RMR_TEST(SHRX)
VEX_RMR_TEST(SARX)
VEX_RMR_TEST(SHLX)
VEX_RMR_TEST(BEXTR)
VEX_RMR_TEST(BZHI)

// for VEX GPR instructions that take the form op reg, reg, r/m
#define VEX_RRM_TEST(Name) \
	TEST_F(x64EmitterTest, Name) \
	{ \
		struct { \
			int bits; \
			std::vector<NamedReg> regs; \
			std::string out_name; \
			std::string size; \
		} regsets[] = { \
			{ 32, reg32names, "eax", "dword" }, \
			{ 64, reg64names, "rax", "qword" }, \
		}; \
		for (const auto& regset : regsets) \
			for (const auto& r : regset.regs) \
			{ \
				emitter->Name(regset.bits, r.reg, RAX, R(RAX)); \
				emitter->Name(regset.bits, RAX, RAX, R(r.reg)); \
				emitter->Name(regset.bits, RAX, r.reg, MatR(R12)); \
				ExpectDisassembly(#Name " " + r.name+ ", " + regset.out_name + ", " + regset.out_name  + " " \
				                  #Name " " + regset.out_name + ", " + regset.out_name + ", " + r.name + " " \
				                  #Name " " + regset.out_name + ", " + r.name + ", " + regset.size + " ptr ds:[r12] "); \
			} \
	}

VEX_RRM_TEST(PEXT)
VEX_RRM_TEST(PDEP)
VEX_RRM_TEST(MULX)
VEX_RRM_TEST(ANDN)

// for VEX GPR instructions that take the form op reg, r/m
#define VEX_RM_TEST(Name) \
	TEST_F(x64EmitterTest, Name) \
	{ \
		struct { \
			int bits; \
			std::vector<NamedReg> regs; \
			std::string out_name; \
			std::string size; \
		} regsets[] = { \
			{ 32, reg32names, "eax", "dword" }, \
			{ 64, reg64names, "rax", "qword" }, \
		}; \
		for (const auto& regset : regsets) \
			for (const auto& r : regset.regs) \
			{ \
				emitter->Name(regset.bits, r.reg, R(RAX)); \
				emitter->Name(regset.bits, RAX, R(r.reg)); \
				emitter->Name(regset.bits, r.reg, MatR(R12)); \
				ExpectDisassembly(#Name " " + r.name+ ", " + regset.out_name  + " " \
				                  #Name " " + regset.out_name + ", " + r.name + " " \
				                  #Name " " + r.name + ", " + regset.size + " ptr ds:[r12] "); \
			} \
	}

VEX_RM_TEST(BLSR)
VEX_RM_TEST(BLSMSK)
VEX_RM_TEST(BLSI)

// for VEX GPR instructions that take the form op reg, r/m, imm
#define VEX_RMI_TEST(Name) \
	TEST_F(x64EmitterTest, Name) \
	{ \
		struct { \
			int bits; \
			std::vector<NamedReg> regs; \
			std::string out_name; \
			std::string size; \
		} regsets[] = { \
			{ 32, reg32names, "eax", "dword" }, \
			{ 64, reg64names, "rax", "qword" }, \
		}; \
		for (const auto& regset : regsets) \
			for (const auto& r : regset.regs) \
			{ \
				emitter->Name(regset.bits, r.reg, R(RAX), 4); \
				emitter->Name(regset.bits, RAX, R(r.reg), 4); \
				emitter->Name(regset.bits, r.reg, MatR(R12), 4); \
				ExpectDisassembly(#Name " " + r.name+ ", " + regset.out_name  + ", 0x04 " \
				                  #Name " " + regset.out_name + ", " + r.name + ", 0x04 " \
				                  #Name " " + r.name + ", " + regset.size + " ptr ds:[r12], 0x04 "); \
			} \
	}

VEX_RMI_TEST(RORX)

// for AVX instructions that take the form op reg, reg, r/m
#define AVX_RRM_TEST(Name, sizename) \
	TEST_F(x64EmitterTest, Name) \
	{ \
		struct { \
			int bits; \
			std::vector<NamedReg> regs; \
			std::string out_name; \
			std::string size; \
		} regsets[] = { \
			{ 64, xmmnames, "xmm0", sizename }, \
		}; \
		for (const auto& regset : regsets) \
			for (const auto& r : regset.regs) \
			{ \
				emitter->Name(r.reg, XMM0, R(XMM0)); \
				emitter->Name(XMM0, XMM0, R(r.reg)); \
				emitter->Name(XMM0, r.reg, MatR(R12)); \
				ExpectDisassembly(#Name " " + r.name+ ", " + regset.out_name + ", " + regset.out_name  + " " \
				                  #Name " " + regset.out_name + ", " + regset.out_name + ", " + r.name + " " \
				                  #Name " " + regset.out_name + ", " + r.name + ", " + regset.size + " ptr ds:[r12] "); \
			} \
	}

AVX_RRM_TEST(VANDPS,  "dqword")
AVX_RRM_TEST(VANDPD,  "dqword")
AVX_RRM_TEST(VANDNPS, "dqword")
AVX_RRM_TEST(VANDNPD, "dqword")
AVX_RRM_TEST(VORPS,   "dqword")
AVX_RRM_TEST(VORPD,   "dqword")
AVX_RRM_TEST(VXORPS,  "dqword")
AVX_RRM_TEST(VXORPD,  "dqword")
AVX_RRM_TEST(VPAND,   "dqword")
AVX_RRM_TEST(VPANDN,  "dqword")
AVX_RRM_TEST(VPOR,    "dqword")
AVX_RRM_TEST(VPXOR,   "dqword")

#define FMA3_TEST(Name, P, packed) \
	AVX_RRM_TEST(Name ## 132 ## P ## S, packed ? "dqword" : "dword") \
	AVX_RRM_TEST(Name ## 213 ## P ## S, packed ? "dqword" : "dword") \
	AVX_RRM_TEST(Name ## 231 ## P ## S, packed ? "dqword" : "dword") \
	AVX_RRM_TEST(Name ## 132 ## P ## D, packed ? "dqword" : "qword") \
	AVX_RRM_TEST(Name ## 213 ## P ## D, packed ? "dqword" : "qword") \
	AVX_RRM_TEST(Name ## 231 ## P ## D, packed ? "dqword" : "qword")

FMA3_TEST(VFMADD, P, true)
FMA3_TEST(VFMADD, S, false)
FMA3_TEST(VFMSUB, P, true)
FMA3_TEST(VFMSUB, S, false)
FMA3_TEST(VFNMADD, P, true)
FMA3_TEST(VFNMADD, S, false)
FMA3_TEST(VFNMSUB, P, true)
FMA3_TEST(VFNMSUB, S, false)
FMA3_TEST(VFMADDSUB, P, true)
FMA3_TEST(VFMSUBADD, P, true)

// for VEX instructions that take the form op reg, reg, r/m, reg OR reg, reg, reg, r/m
#define VEX_RRMR_RRRM_TEST(Name, sizename) \
	TEST_F(x64EmitterTest, Name) \
	{ \
		struct { \
			int bits; \
			std::vector<NamedReg> regs; \
			std::string out_name; \
			std::string size; \
		} regsets[] = { \
			{ 64, xmmnames, "xmm0", sizename }, \
		}; \
		for (const auto& regset : regsets) \
			for (const auto& r : regset.regs) \
			{ \
				emitter->Name(r.reg, XMM0, R(XMM0), r.reg); \
				emitter->Name(XMM0, XMM0, r.reg, MatR(R12)); \
				emitter->Name(XMM0, r.reg, MatR(R12), XMM0); \
				ExpectDisassembly(#Name " " + r.name+ ", " + regset.out_name + ", " + regset.out_name  + ", " + r.name + " " \
				                  #Name " " + regset.out_name + ", " + regset.out_name + ", " + r.name + ", " + regset.size + " ptr ds:[r12] " \
				                  #Name " " + regset.out_name + ", " + r.name + ", " + regset.size + " ptr ds:[r12], " + regset.out_name); \
			} \
	}

#define FMA4_TEST(Name, P, packed) \
	VEX_RRMR_RRRM_TEST(Name ## P ## S, packed ? "dqword" : "dword") \
	VEX_RRMR_RRRM_TEST(Name ## P ## D, packed ? "dqword" : "qword")

FMA4_TEST(VFMADD, P, true)
FMA4_TEST(VFMADD, S, false)
FMA4_TEST(VFMSUB, P, true)
FMA4_TEST(VFMSUB, S, false)
FMA4_TEST(VFNMADD, P, true)
FMA4_TEST(VFNMADD, S, false)
FMA4_TEST(VFNMSUB, P, true)
FMA4_TEST(VFNMSUB, S, false)
FMA4_TEST(VFMADDSUB, P, true)
FMA4_TEST(VFMSUBADD, P, true)

}  // namespace Gen