Fix Vnmls_S fast path (F64: losing input d value). Fix Vnmla_S & Vnmls_S slow paths (using fused inst.s). Fix Vfma_V slow path not using StandardFPSCRValue(). (#1775)

* Fix Vnmls_S fast path (F64: losing input d value). Fix Vnmla_S & Vnmls_S slow paths (using fused inst.s).

Add Vfma_S & Vfms_S Fma fast paths.
Add Vfnma_S inst. with Fma/Sse fast paths and slow path.
Add Vfnms_S Sse fast path.

Add Tests for affected inst.s.

Nits.

* InternalVersion = 1775

* Nits.

* Fix Vfma_V slow path not using StandardFPSCRValue().

* Nit: Fix Vfma_V order.

* Add Vfms_V Sse fast path and slow path.

* Add Vfma_V and Vfms_V Test.
This commit is contained in:
LDj3SNuD 2020-12-17 20:43:41 +01:00 committed by GitHub
parent b5c215111d
commit 8a33e884f8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 292 additions and 221 deletions

View File

@ -274,17 +274,15 @@ namespace ARMeilleure.CodeGen.X86
Add(X86Instruction.Vcvtph2ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3813, InstructionFlags.Vex | InstructionFlags.Prefix66)); Add(X86Instruction.Vcvtph2ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3813, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Vcvtps2ph, new InstructionInfo(0x000f3a1d, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66)); Add(X86Instruction.Vcvtps2ph, new InstructionInfo(0x000f3a1d, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Vfmadd231ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b8, InstructionFlags.Vex | InstructionFlags.Prefix66)); Add(X86Instruction.Vfmadd231ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b8, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Vfmadd231pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b8, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
Add(X86Instruction.Vfmadd231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b9, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Vfmadd231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b9, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW)); Add(X86Instruction.Vfmadd231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b9, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
Add(X86Instruction.Vfmsub231ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38ba, InstructionFlags.Vex | InstructionFlags.Prefix66)); Add(X86Instruction.Vfmadd231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b9, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Vfmsub231pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38ba, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
Add(X86Instruction.Vfmsub231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bb, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Vfmsub231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bb, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW)); Add(X86Instruction.Vfmsub231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bb, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
Add(X86Instruction.Vfnmsub231ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38be, InstructionFlags.Vex | InstructionFlags.Prefix66)); Add(X86Instruction.Vfmsub231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bb, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Vfnmsub231pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38be, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW)); Add(X86Instruction.Vfnmadd231ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bc, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Vfnmsub231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bf, InstructionFlags.Vex | InstructionFlags.Prefix66)); Add(X86Instruction.Vfnmadd231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bd, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
Add(X86Instruction.Vfnmadd231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bd, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Vfnmsub231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bf, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW)); Add(X86Instruction.Vfnmsub231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bf, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
Add(X86Instruction.Vfnmsub231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bf, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Vpblendvb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4c, InstructionFlags.Vex | InstructionFlags.Prefix66)); Add(X86Instruction.Vpblendvb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4c, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Xor, new InstructionInfo(0x00000031, 0x06000083, 0x06000081, BadOp, 0x00000033, InstructionFlags.None)); Add(X86Instruction.Xor, new InstructionInfo(0x00000031, 0x06000083, 0x06000081, BadOp, 0x00000033, InstructionFlags.None));
Add(X86Instruction.Xorpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstructionFlags.Vex | InstructionFlags.Prefix66)); Add(X86Instruction.Xorpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstructionFlags.Vex | InstructionFlags.Prefix66));

View File

@ -440,9 +440,12 @@ namespace ARMeilleure.CodeGen.X86
else else
{ {
EnsureSameReg(dest, src1); EnsureSameReg(dest, src1);
Debug.Assert(src3.GetRegister().Index == 0); Debug.Assert(src3.GetRegister().Index == 0);
context.Assembler.WriteInstruction(info.Inst, dest, src1, src2); context.Assembler.WriteInstruction(info.Inst, dest, src1, src2);
} }
break; break;
} }
@ -474,11 +477,16 @@ namespace ARMeilleure.CodeGen.X86
Operand src2 = operation.GetSource(1); Operand src2 = operation.GetSource(1);
Operand src3 = operation.GetSource(2); Operand src3 = operation.GetSource(2);
EnsureSameType(dest, src1, src2, src3);
EnsureSameReg(dest, src1);
Debug.Assert(!dest.Type.IsInteger());
Debug.Assert(HardwareCapabilities.SupportsVexEncoding); Debug.Assert(HardwareCapabilities.SupportsVexEncoding);
Debug.Assert(dest.Kind == OperandKind.Register && src1.Kind == OperandKind.Register && src2.Kind == OperandKind.Register);
Debug.Assert(src3.Kind == OperandKind.Register || src3.Kind == OperandKind.Memory);
EnsureSameType(dest, src1, src2, src3);
Debug.Assert(dest.Type == OperandType.V128);
Debug.Assert(dest.Value == src1.Value);
context.Assembler.WriteInstruction(info.Inst, dest, src2, src3); context.Assembler.WriteInstruction(info.Inst, dest, src2, src3);
break; break;

View File

@ -166,16 +166,14 @@ namespace ARMeilleure.CodeGen.X86
Add(Intrinsic.X86Unpcklps, new IntrinsicInfo(X86Instruction.Unpcklps, IntrinsicType.Binary)); Add(Intrinsic.X86Unpcklps, new IntrinsicInfo(X86Instruction.Unpcklps, IntrinsicType.Binary));
Add(Intrinsic.X86Vcvtph2ps, new IntrinsicInfo(X86Instruction.Vcvtph2ps, IntrinsicType.Unary)); Add(Intrinsic.X86Vcvtph2ps, new IntrinsicInfo(X86Instruction.Vcvtph2ps, IntrinsicType.Unary));
Add(Intrinsic.X86Vcvtps2ph, new IntrinsicInfo(X86Instruction.Vcvtps2ph, IntrinsicType.BinaryImm)); Add(Intrinsic.X86Vcvtps2ph, new IntrinsicInfo(X86Instruction.Vcvtps2ph, IntrinsicType.BinaryImm));
Add(Intrinsic.X86Vfmadd231pd, new IntrinsicInfo(X86Instruction.Vfmadd231pd, IntrinsicType.Fma));
Add(Intrinsic.X86Vfmadd231ps, new IntrinsicInfo(X86Instruction.Vfmadd231ps, IntrinsicType.Fma)); Add(Intrinsic.X86Vfmadd231ps, new IntrinsicInfo(X86Instruction.Vfmadd231ps, IntrinsicType.Fma));
Add(Intrinsic.X86Vfmadd231sd, new IntrinsicInfo(X86Instruction.Vfmadd231sd, IntrinsicType.Fma)); Add(Intrinsic.X86Vfmadd231sd, new IntrinsicInfo(X86Instruction.Vfmadd231sd, IntrinsicType.Fma));
Add(Intrinsic.X86Vfmadd231ss, new IntrinsicInfo(X86Instruction.Vfmadd231ss, IntrinsicType.Fma)); Add(Intrinsic.X86Vfmadd231ss, new IntrinsicInfo(X86Instruction.Vfmadd231ss, IntrinsicType.Fma));
Add(Intrinsic.X86Vfmsub231pd, new IntrinsicInfo(X86Instruction.Vfmsub231pd, IntrinsicType.Fma));
Add(Intrinsic.X86Vfmsub231ps, new IntrinsicInfo(X86Instruction.Vfmsub231ps, IntrinsicType.Fma));
Add(Intrinsic.X86Vfmsub231sd, new IntrinsicInfo(X86Instruction.Vfmsub231sd, IntrinsicType.Fma)); Add(Intrinsic.X86Vfmsub231sd, new IntrinsicInfo(X86Instruction.Vfmsub231sd, IntrinsicType.Fma));
Add(Intrinsic.X86Vfmsub231ss, new IntrinsicInfo(X86Instruction.Vfmsub231ss, IntrinsicType.Fma)); Add(Intrinsic.X86Vfmsub231ss, new IntrinsicInfo(X86Instruction.Vfmsub231ss, IntrinsicType.Fma));
Add(Intrinsic.X86Vfnmsub231pd, new IntrinsicInfo(X86Instruction.Vfnmsub231pd, IntrinsicType.Fma)); Add(Intrinsic.X86Vfnmadd231ps, new IntrinsicInfo(X86Instruction.Vfnmadd231ps, IntrinsicType.Fma));
Add(Intrinsic.X86Vfnmsub231ps, new IntrinsicInfo(X86Instruction.Vfnmsub231ps, IntrinsicType.Fma)); Add(Intrinsic.X86Vfnmadd231sd, new IntrinsicInfo(X86Instruction.Vfnmadd231sd, IntrinsicType.Fma));
Add(Intrinsic.X86Vfnmadd231ss, new IntrinsicInfo(X86Instruction.Vfnmadd231ss, IntrinsicType.Fma));
Add(Intrinsic.X86Vfnmsub231sd, new IntrinsicInfo(X86Instruction.Vfnmsub231sd, IntrinsicType.Fma)); Add(Intrinsic.X86Vfnmsub231sd, new IntrinsicInfo(X86Instruction.Vfnmsub231sd, IntrinsicType.Fma));
Add(Intrinsic.X86Vfnmsub231ss, new IntrinsicInfo(X86Instruction.Vfnmsub231ss, IntrinsicType.Fma)); Add(Intrinsic.X86Vfnmsub231ss, new IntrinsicInfo(X86Instruction.Vfnmsub231ss, IntrinsicType.Fma));
Add(Intrinsic.X86Xorpd, new IntrinsicInfo(X86Instruction.Xorpd, IntrinsicType.Binary)); Add(Intrinsic.X86Xorpd, new IntrinsicInfo(X86Instruction.Xorpd, IntrinsicType.Binary));

View File

@ -203,18 +203,16 @@ namespace ARMeilleure.CodeGen.X86
Vblendvps, Vblendvps,
Vcvtph2ps, Vcvtph2ps,
Vcvtps2ph, Vcvtps2ph,
Vfmadd231pd,
Vfmadd231ps, Vfmadd231ps,
Vfmadd231sd, Vfmadd231sd,
Vfmadd231ss, Vfmadd231ss,
Vfmsub231ps,
Vfmsub231pd,
Vfmsub231ss,
Vfmsub231sd, Vfmsub231sd,
Vfnmsub231ps, Vfmsub231ss,
Vfnmsub231pd, Vfnmadd231ps,
Vfnmsub231ss, Vfnmadd231sd,
Vfnmadd231ss,
Vfnmsub231sd, Vfnmsub231sd,
Vfnmsub231ss,
Vpblendvb, Vpblendvb,
Xor, Xor,
Xorpd, Xorpd,

View File

@ -822,6 +822,7 @@ namespace ARMeilleure.Decoders
SetA32("<<<<11101x10xxxxxxxx101xx0x0xxxx", InstName.Vfma, InstEmit32.Vfma_S, OpCode32SimdRegS.Create); SetA32("<<<<11101x10xxxxxxxx101xx0x0xxxx", InstName.Vfma, InstEmit32.Vfma_S, OpCode32SimdRegS.Create);
SetA32("111100100x00xxxxxxxx1100xxx1xxxx", InstName.Vfma, InstEmit32.Vfma_V, OpCode32SimdReg.Create); SetA32("111100100x00xxxxxxxx1100xxx1xxxx", InstName.Vfma, InstEmit32.Vfma_V, OpCode32SimdReg.Create);
SetA32("<<<<11101x10xxxxxxxx101xx1x0xxxx", InstName.Vfms, InstEmit32.Vfms_S, OpCode32SimdRegS.Create); SetA32("<<<<11101x10xxxxxxxx101xx1x0xxxx", InstName.Vfms, InstEmit32.Vfms_S, OpCode32SimdRegS.Create);
SetA32("111100100x10xxxxxxxx1100xxx1xxxx", InstName.Vfms, InstEmit32.Vfms_V, OpCode32SimdReg.Create);
SetA32("<<<<11101x01xxxxxxxx101xx1x0xxxx", InstName.Vfnma, InstEmit32.Vfnma_S, OpCode32SimdRegS.Create); SetA32("<<<<11101x01xxxxxxxx101xx1x0xxxx", InstName.Vfnma, InstEmit32.Vfnma_S, OpCode32SimdRegS.Create);
SetA32("<<<<11101x01xxxxxxxx101xx0x0xxxx", InstName.Vfnms, InstEmit32.Vfnms_S, OpCode32SimdRegS.Create); SetA32("<<<<11101x01xxxxxxxx101xx0x0xxxx", InstName.Vfnms, InstEmit32.Vfnms_S, OpCode32SimdRegS.Create);
SetA32("1111001x0x<<xxxxxxxx0000xxx0xxxx", InstName.Vhadd, InstEmit32.Vhadd, OpCode32SimdReg.Create); SetA32("1111001x0x<<xxxxxxxx0000xxx0xxxx", InstName.Vhadd, InstEmit32.Vhadd, OpCode32SimdReg.Create);

View File

@ -591,7 +591,7 @@ namespace ARMeilleure.Instructions
EmitAluStore(context, res); EmitAluStore(context, res);
} }
public static void EmitDiv(ArmEmitterContext context, bool unsigned) private static void EmitDiv(ArmEmitterContext context, bool unsigned)
{ {
Operand n = GetAluN(context); Operand n = GetAluN(context);
Operand m = GetAluM(context); Operand m = GetAluM(context);

View File

@ -329,7 +329,7 @@ namespace ARMeilleure.Instructions
EmitGenericAluStoreA32(context, op.RdLo, op.SetFlags, lo); EmitGenericAluStoreA32(context, op.RdLo, op.SetFlags, lo);
} }
public static void EmitMlal(ArmEmitterContext context, bool signed) private static void EmitMlal(ArmEmitterContext context, bool signed)
{ {
OpCode32AluUmull op = (OpCode32AluUmull)context.CurrOp; OpCode32AluUmull op = (OpCode32AluUmull)context.CurrOp;

View File

@ -252,28 +252,14 @@ namespace ARMeilleure.Instructions
} }
} }
public static void Vfma_V(ArmEmitterContext context) // Fused. public static void Vfma_S(ArmEmitterContext context) // Fused.
{ {
if (Optimizations.FastFP && Optimizations.UseFma) if (Optimizations.FastFP && Optimizations.UseFma)
{ {
// Vectors contain elements that are 32-bits in length always. The only thing that will change is the number of elements in a vector. EmitScalarTernaryOpF32(context, Intrinsic.X86Vfmadd231ss, Intrinsic.X86Vfmadd231sd);
// The 64-bit variant will never be used.
EmitVectorTernaryOpF32(context, Intrinsic.X86Vfmadd231ps, Intrinsic.X86Vfmadd231pd);
} }
else else if (Optimizations.FastFP && Optimizations.UseSse2)
{ {
EmitVectorTernaryOpF32(context, (op1, op2, op3) =>
{
return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd), op1, op2, op3);
});
}
}
public static void Vfma_S(ArmEmitterContext context) // Fused.
{
if (Optimizations.FastFP && Optimizations.UseSse2)
{
// TODO: Use FMA instruction set.
EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd); EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd);
} }
else else
@ -285,11 +271,29 @@ namespace ARMeilleure.Instructions
} }
} }
public static void Vfma_V(ArmEmitterContext context) // Fused.
{
if (Optimizations.FastFP && Optimizations.UseFma)
{
EmitVectorTernaryOpF32(context, Intrinsic.X86Vfmadd231ps);
}
else
{
EmitVectorTernaryOpF32(context, (op1, op2, op3) =>
{
return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMulAddFpscr), op1, op2, op3);
});
}
}
public static void Vfms_S(ArmEmitterContext context) // Fused. public static void Vfms_S(ArmEmitterContext context) // Fused.
{ {
if (Optimizations.FastFP && Optimizations.UseSse2) if (Optimizations.FastFP && Optimizations.UseFma)
{
EmitScalarTernaryOpF32(context, Intrinsic.X86Vfnmadd231ss, Intrinsic.X86Vfnmadd231sd);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{ {
// TODO: Use FMA instruction set.
EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd); EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd);
} }
else else
@ -301,17 +305,36 @@ namespace ARMeilleure.Instructions
} }
} }
public static void Vfms_V(ArmEmitterContext context) // Fused.
{
if (Optimizations.FastFP && Optimizations.UseFma)
{
EmitVectorTernaryOpF32(context, Intrinsic.X86Vfnmadd231ps);
}
else
{
EmitVectorTernaryOpF32(context, (op1, op2, op3) =>
{
return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMulSubFpscr), op1, op2, op3);
});
}
}
public static void Vfnma_S(ArmEmitterContext context) // Fused. public static void Vfnma_S(ArmEmitterContext context) // Fused.
{ {
if (Optimizations.FastFP && Optimizations.UseFma) if (Optimizations.FastFP && Optimizations.UseFma)
{ {
EmitScalarTernaryOpF32(context, Intrinsic.X86Vfnmsub231ss, Intrinsic.X86Vfnmsub231sd); EmitScalarTernaryOpF32(context, Intrinsic.X86Vfnmsub231ss, Intrinsic.X86Vfnmsub231sd);
} }
else if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd, isNegD: true);
}
else else
{ {
EmitScalarTernaryOpF32(context, (op1, op2, op3) => EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
{ {
return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd), context.Negate(op1), context.Negate(op2), op3); return EmitSoftFloatCall(context, nameof(SoftFloat32.FPNegMulAdd), op1, op2, op3);
}); });
} }
} }
@ -322,11 +345,15 @@ namespace ARMeilleure.Instructions
{ {
EmitScalarTernaryOpF32(context, Intrinsic.X86Vfmsub231ss, Intrinsic.X86Vfmsub231sd); EmitScalarTernaryOpF32(context, Intrinsic.X86Vfmsub231ss, Intrinsic.X86Vfmsub231sd);
} }
else if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd, isNegD: true);
}
else else
{ {
EmitScalarTernaryOpF32(context, (op1, op2, op3) => EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
{ {
return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd), context.Negate(op1), op2, op3); return EmitSoftFloatCall(context, nameof(SoftFloat32.FPNegMulSub), op1, op2, op3);
}); });
} }
} }
@ -422,36 +449,21 @@ namespace ARMeilleure.Instructions
if (Optimizations.FastFP && Optimizations.UseSse2) if (Optimizations.FastFP && Optimizations.UseSse2)
{ {
EmitScalarTernaryOpSimd32(context, (d, n, m) => EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd, isNegD: true);
{
if ((op.Size & 1) == 0)
{
Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
res = context.AddIntrinsic(Intrinsic.X86Addss, d, res);
Operand mask = X86GetScalar(context, -0f);
return context.AddIntrinsic(Intrinsic.X86Xorps, mask, res);
}
else
{
Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
res = context.AddIntrinsic(Intrinsic.X86Addsd, d, res);
Operand mask = X86GetScalar(context, -0d);
return context.AddIntrinsic(Intrinsic.X86Xorpd, mask, res);
}
});
} }
else if (Optimizations.FastFP) else if (Optimizations.FastFP)
{ {
EmitScalarTernaryOpF32(context, (op1, op2, op3) => EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
{ {
return context.Negate(context.Add(op1, context.Multiply(op2, op3))); return context.Subtract(context.Negate(op1), context.Multiply(op2, op3));
}); });
} }
else else
{ {
EmitScalarTernaryOpF32(context, (op1, op2, op3) => EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
{ {
return EmitSoftFloatCall(context, nameof(SoftFloat32.FPNegMulAdd), op1, op2, op3); Operand res = EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op2, op3);
return EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub), context.Negate(op1), res);
}); });
} }
} }
@ -462,24 +474,7 @@ namespace ARMeilleure.Instructions
if (Optimizations.FastFP && Optimizations.UseSse2) if (Optimizations.FastFP && Optimizations.UseSse2)
{ {
EmitScalarTernaryOpSimd32(context, (d, n, m) => EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd, isNegD: true);
{
if ((op.Size & 1) == 0)
{
Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
Operand mask = X86GetScalar(context, -0f);
d = context.AddIntrinsic(Intrinsic.X86Xorps, mask, d);
return context.AddIntrinsic(Intrinsic.X86Addss, d, res);
}
else
{
Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
Operand mask = X86GetScalar(context, -0d);
d = context.AddIntrinsic(Intrinsic.X86Xorpd, mask, res);
return context.AddIntrinsic(Intrinsic.X86Addsd, d, res);
}
});
} }
else if (Optimizations.FastFP) else if (Optimizations.FastFP)
{ {
@ -492,7 +487,8 @@ namespace ARMeilleure.Instructions
{ {
EmitScalarTernaryOpF32(context, (op1, op2, op3) => EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
{ {
return EmitSoftFloatCall(context, nameof(SoftFloat32.FPNegMulSub), op1, op2, op3); Operand res = EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op2, op3);
return EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), context.Negate(op1), res);
}); });
} }
} }

View File

@ -820,15 +820,15 @@ namespace ARMeilleure.Instructions
}); });
} }
public static void EmitVectorTernaryOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64) public static void EmitVectorTernaryOpF32(ArmEmitterContext context, Intrinsic inst32)
{ {
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32; Debug.Assert((op.Size & 1) == 0);
EmitVectorTernaryOpSimd32(context, (d, n, m) => EmitVectorTernaryOpSimd32(context, (d, n, m) =>
{ {
return context.AddIntrinsic(inst, d, n, m); return context.AddIntrinsic(inst32, d, n, m);
}); });
} }
@ -927,7 +927,13 @@ namespace ARMeilleure.Instructions
}); });
} }
public static void EmitScalarTernaryOpF32(ArmEmitterContext context, Intrinsic inst32pt1, Intrinsic inst64pt1, Intrinsic inst32pt2, Intrinsic inst64pt2) public static void EmitScalarTernaryOpF32(
ArmEmitterContext context,
Intrinsic inst32pt1,
Intrinsic inst64pt1,
Intrinsic inst32pt2,
Intrinsic inst64pt2,
bool isNegD = false)
{ {
OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp; OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
@ -939,6 +945,18 @@ namespace ARMeilleure.Instructions
EmitScalarTernaryOpSimd32(context, (d, n, m) => EmitScalarTernaryOpSimd32(context, (d, n, m) =>
{ {
Operand res = context.AddIntrinsic(inst1, n, m); Operand res = context.AddIntrinsic(inst1, n, m);
if (isNegD)
{
Operand mask = doubleSize
? X86GetScalar(context, -0d)
: X86GetScalar(context, -0f);
d = doubleSize
? context.AddIntrinsic(Intrinsic.X86Xorpd, mask, d)
: context.AddIntrinsic(Intrinsic.X86Xorps, mask, d);
}
return context.AddIntrinsic(inst2, d, res); return context.AddIntrinsic(inst2, d, res);
}); });
} }

View File

@ -155,16 +155,14 @@ namespace ARMeilleure.IntermediateRepresentation
X86Unpcklps, X86Unpcklps,
X86Vcvtph2ps, X86Vcvtph2ps,
X86Vcvtps2ph, X86Vcvtps2ph,
X86Vfmadd231pd,
X86Vfmadd231ps, X86Vfmadd231ps,
X86Vfmadd231sd, X86Vfmadd231sd,
X86Vfmadd231ss, X86Vfmadd231ss,
X86Vfmsub231pd,
X86Vfmsub231ps,
X86Vfmsub231sd, X86Vfmsub231sd,
X86Vfmsub231ss, X86Vfmsub231ss,
X86Vfnmsub231pd, X86Vfnmadd231ps,
X86Vfnmsub231ps, X86Vfnmadd231sd,
X86Vfnmadd231ss,
X86Vfnmsub231sd, X86Vfnmsub231sd,
X86Vfnmsub231ss, X86Vfnmsub231ss,
X86Xorpd, X86Xorpd,

View File

@ -22,7 +22,7 @@ namespace ARMeilleure.Translation.PTC
{ {
private const string HeaderMagic = "PTChd"; private const string HeaderMagic = "PTChd";
private const uint InternalVersion = 1713; //! To be incremented manually for each change to the ARMeilleure project. private const int InternalVersion = 1775; //! To be incremented manually for each change to the ARMeilleure project.
private const string ActualDir = "0"; private const string ActualDir = "0";
private const string BackupDir = "1"; private const string BackupDir = "1";

View File

@ -22,41 +22,45 @@ namespace Ryujinx.Tests.Cpu
0x80000000u, 0xFFFFFFFFu }; 0x80000000u, 0xFFFFFFFFu };
} }
private static IEnumerable<uint> _1S_F_() private static IEnumerable<ulong> _1S_F_()
{ {
yield return 0xFF7FFFFFu; // -Max Normal (float.MinValue) yield return 0x00000000FF7FFFFFul; // -Max Normal (float.MinValue)
yield return 0x80800000u; // -Min Normal yield return 0x0000000080800000ul; // -Min Normal
yield return 0x807FFFFFu; // -Max Subnormal yield return 0x00000000807FFFFFul; // -Max Subnormal
yield return 0x80000001u; // -Min Subnormal (-float.Epsilon) yield return 0x0000000080000001ul; // -Min Subnormal (-float.Epsilon)
yield return 0x7F7FFFFFu; // +Max Normal (float.MaxValue) yield return 0x000000007F7FFFFFul; // +Max Normal (float.MaxValue)
yield return 0x00800000u; // +Min Normal yield return 0x0000000000800000ul; // +Min Normal
yield return 0x007FFFFFu; // +Max Subnormal yield return 0x00000000007FFFFFul; // +Max Subnormal
yield return 0x00000001u; // +Min Subnormal (float.Epsilon) yield return 0x0000000000000001ul; // +Min Subnormal (float.Epsilon)
if (!NoZeros) if (!NoZeros)
{ {
yield return 0x80000000u; // -Zero yield return 0x0000000080000000ul; // -Zero
yield return 0x00000000u; // +Zero yield return 0x0000000000000000ul; // +Zero
} }
if (!NoInfs) if (!NoInfs)
{ {
yield return 0xFF800000u; // -Infinity yield return 0x00000000FF800000ul; // -Infinity
yield return 0x7F800000u; // +Infinity yield return 0x000000007F800000ul; // +Infinity
} }
if (!NoNaNs) if (!NoNaNs)
{ {
yield return 0xFFC00000u; // -QNaN (all zeros payload) (float.NaN) yield return 0x00000000FFC00000ul; // -QNaN (all zeros payload) (float.NaN)
yield return 0xFFBFFFFFu; // -SNaN (all ones payload) yield return 0x00000000FFBFFFFFul; // -SNaN (all ones payload)
yield return 0x7FC00000u; // +QNaN (all zeros payload) (-float.NaN) (DefaultNaN) yield return 0x000000007FC00000ul; // +QNaN (all zeros payload) (-float.NaN) (DefaultNaN)
yield return 0x7FBFFFFFu; // +SNaN (all ones payload) yield return 0x000000007FBFFFFFul; // +SNaN (all ones payload)
} }
for (int cnt = 1; cnt <= RndCnt; cnt++) for (int cnt = 1; cnt <= RndCnt; cnt++)
{ {
yield return GenNormalS(); ulong grbg = TestContext.CurrentContext.Random.NextUInt();
yield return GenSubnormalS(); ulong rnd1 = GenNormalS();
ulong rnd2 = GenSubnormalS();
yield return (grbg << 32) | rnd1;
yield return (grbg << 32) | rnd2;
} }
} }
@ -93,8 +97,11 @@ namespace Ryujinx.Tests.Cpu
for (int cnt = 1; cnt <= RndCnt; cnt++) for (int cnt = 1; cnt <= RndCnt; cnt++)
{ {
yield return GenNormalD(); ulong rnd1 = GenNormalD();
yield return GenSubnormalD(); ulong rnd2 = GenSubnormalD();
yield return rnd1;
yield return rnd2;
} }
} }
#endregion #endregion
@ -109,10 +116,10 @@ namespace Ryujinx.Tests.Cpu
[Test, Pairwise, Description("VCVT.<dt>.F32 <Sd>, <Sm>")] [Test, Pairwise, Description("VCVT.<dt>.F32 <Sd>, <Sm>")]
public void Vcvt_F32_I32([Values(0u, 1u, 2u, 3u)] uint rd, public void Vcvt_F32_I32([Values(0u, 1u, 2u, 3u)] uint rd,
[Values(0u, 1u, 2u, 3u)] uint rm, [Values(0u, 1u, 2u, 3u)] uint rm,
[ValueSource(nameof(_1S_F_))] uint s0, [ValueSource(nameof(_1S_F_))] ulong s0,
[ValueSource(nameof(_1S_F_))] uint s1, [ValueSource(nameof(_1S_F_))] ulong s1,
[ValueSource(nameof(_1S_F_))] uint s2, [ValueSource(nameof(_1S_F_))] ulong s2,
[ValueSource(nameof(_1S_F_))] uint s3, [ValueSource(nameof(_1S_F_))] ulong s3,
[Values] bool unsigned) // <U32, S32> [Values] bool unsigned) // <U32, S32>
{ {
uint opcode = 0xeebc0ac0u; // VCVT.U32.F32 S0, S0 uint opcode = 0xeebc0ac0u; // VCVT.U32.F32 S0, S0
@ -125,7 +132,7 @@ namespace Ryujinx.Tests.Cpu
opcode |= ((rd & 0x1e) << 11) | ((rd & 0x1) << 22); opcode |= ((rd & 0x1e) << 11) | ((rd & 0x1) << 22);
opcode |= ((rm & 0x1e) >> 1) | ((rm & 0x1) << 5); opcode |= ((rm & 0x1e) >> 1) | ((rm & 0x1) << 5);
V128 v0 = MakeVectorE0E1E2E3(s0, s1, s2, s3); V128 v0 = MakeVectorE0E1E2E3((uint)s0, (uint)s1, (uint)s2, (uint)s3);
SingleOpcode(opcode, v0: v0); SingleOpcode(opcode, v0: v0);

View File

@ -22,6 +22,59 @@ namespace Ryujinx.Tests.Cpu
}; };
} }
private static uint[] _Vfma_Vfms_Vfnma_Vfnms_S_F32_()
{
return new uint[]
{
0xEEA00A00u, // VFMA. F32 S0, S0, S0
0xEEA00A40u, // VFMS. F32 S0, S0, S0
0xEE900A40u, // VFNMA.F32 S0, S0, S0
0xEE900A00u // VFNMS.F32 S0, S0, S0
};
}
private static uint[] _Vfma_Vfms_Vfnma_Vfnms_S_F64_()
{
return new uint[]
{
0xEEA00B00u, // VFMA. F64 D0, D0, D0
0xEEA00B40u, // VFMS. F64 D0, D0, D0
0xEE900B40u, // VFNMA.F64 D0, D0, D0
0xEE900B00u // VFNMS.F64 D0, D0, D0
};
}
private static uint[] _Vfma_Vfms_V_F32_()
{
return new uint[]
{
0xF2000C10u, // VFMA.F32 D0, D0, D0
0xF2200C10u // VFMS.F32 D0, D0, D0
};
}
private static uint[] _Vmla_Vmls_Vnmla_Vnmls_S_F32_()
{
return new uint[]
{
0xEE000A00u, // VMLA. F32 S0, S0, S0
0xEE000A40u, // VMLS. F32 S0, S0, S0
0xEE100A40u, // VNMLA.F32 S0, S0, S0
0xEE100A00u // VNMLS.F32 S0, S0, S0
};
}
private static uint[] _Vmla_Vmls_Vnmla_Vnmls_S_F64_()
{
return new uint[]
{
0xEE000B00u, // VMLA. F64 D0, D0, D0
0xEE000B40u, // VMLS. F64 D0, D0, D0
0xEE100B40u, // VNMLA.F64 D0, D0, D0
0xEE100B00u // VNMLS.F64 D0, D0, D0
};
}
private static uint[] _Vp_Add_Max_Min_F_() private static uint[] _Vp_Add_Max_Min_F_()
{ {
return new uint[] return new uint[]
@ -184,8 +237,8 @@ namespace Ryujinx.Tests.Cpu
private const int RndCnt = 2; private const int RndCnt = 2;
private static readonly bool NoZeros = false; private static readonly bool NoZeros = false;
private static readonly bool NoInfs = true; private static readonly bool NoInfs = false;
private static readonly bool NoNaNs = true; private static readonly bool NoNaNs = false;
[Explicit] [Explicit]
[Test, Pairwise, Description("VADD.f32 V0, V0, V0")] [Test, Pairwise, Description("VADD.f32 V0, V0, V0")]
@ -293,119 +346,115 @@ namespace Ryujinx.Tests.Cpu
CompareAgainstUnicorn(fpsrMask: Fpsr.Nzcv); CompareAgainstUnicorn(fpsrMask: Fpsr.Nzcv);
} }
[Test, Pairwise, Description("VFMA.F<size> <Vd>, <Vn>, <Vm>")] [Test, Pairwise] [Explicit] // Fused.
public void Vfma([Values(0u, 1u)] uint rd, public void Vfma_Vfms_Vfnma_Vfnms_S_F32([ValueSource(nameof(_Vfma_Vfms_Vfnma_Vfnms_S_F32_))] uint opcode,
[Values(0u, 1u)] uint rn, [Values(0u, 1u, 2u, 3u)] uint rd,
[Values(0u, 1u)] uint rm, [Values(0u, 1u, 2u, 3u)] uint rn,
[Values(0u, 1u)] uint Q, [Values(0u, 1u, 2u, 3u)] uint rm,
[ValueSource("_2S_F_")] ulong z, [ValueSource(nameof(_1S_F_))] ulong s0,
[ValueSource("_2S_F_")] ulong a, [ValueSource(nameof(_1S_F_))] ulong s1,
[ValueSource("_2S_F_")] ulong b ) [ValueSource(nameof(_1S_F_))] ulong s2,
[ValueSource(nameof(_1S_F_))] ulong s3)
{ {
uint opcode = 0xf2000c10;
V128 v0;
V128 v1;
V128 v2;
uint c = (uint) BitConverter.SingleToInt32Bits(z);
uint d = (uint) BitConverter.SingleToInt32Bits(a);
uint e = (uint) BitConverter.SingleToInt32Bits(b);
if (Q == 0)
{
opcode |= (((rm & 0x1) << 5) | (rm & 0x1e) >> 1);
opcode |= (((rd & 0x1) << 22) | (rd & 0x1e) << 11); opcode |= (((rd & 0x1) << 22) | (rd & 0x1e) << 11);
opcode |= (((rn & 0x1) << 7) | (rn & 0x1e) >> 15); opcode |= (((rn & 0x1) << 7) | (rn & 0x1e) << 15);
opcode |= (((rm & 0x1) << 5) | (rm & 0x1e) >> 1);
v0 = MakeVectorE0E1(c, c); V128 v0 = MakeVectorE0E1E2E3((uint)s0, (uint)s1, (uint)s2, (uint)s3);
v1 = MakeVectorE0E1(d, c);
v2 = MakeVectorE0E1(e, c);
}
else
{
rd = rn = rm = 0; // Needed, as these values cannot be odd values if Q == 1.
opcode |= (((rm & 0x10) << 1) | (rm & 0xf) << 0);
opcode |= (((rd & 0x10) << 18) | (rd & 0xf) << 12);
opcode |= (((rn & 0x10) << 3) | (rn & 0xf) << 16);
v0 = MakeVectorE0E1E2E3(c, c, d, e); SingleOpcode(opcode, v0: v0);
v1 = MakeVectorE0E1E2E3(d, c, e, c);
v2 = MakeVectorE0E1E2E3(e, c, d, c);
}
opcode |= ((Q & 1) << 6);
SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
CompareAgainstUnicorn(); CompareAgainstUnicorn();
} }
[Test, Pairwise, Description("VFNMA.F<size> <Vd>, <Vn>, <Vm>")] [Test, Pairwise] [Explicit] // Fused.
public void Vfnma([Values(0u, 1u)] uint rd, public void Vfma_Vfms_Vfnma_Vfnms_S_F64([ValueSource(nameof(_Vfma_Vfms_Vfnma_Vfnms_S_F64_))] uint opcode,
[Values(0u, 1u)] uint rd,
[Values(0u, 1u)] uint rn, [Values(0u, 1u)] uint rn,
[Values(0u, 1u)] uint rm, [Values(0u, 1u)] uint rm,
[Values(2u, 3u)] uint size, [ValueSource(nameof(_1D_F_))] ulong d0,
[ValueSource("_2S_F_")] ulong z, [ValueSource(nameof(_1D_F_))] ulong d1)
[ValueSource("_2S_F_")] ulong a,
[ValueSource("_2S_F_")] ulong b)
{ {
uint opcode = 0xe900840;
if (size == 2)
{
opcode |= (((rm & 0x1) << 5) | (rm & 0x1e) >> 1);
opcode |= (((rd & 0x1) << 22) | (rd & 0x1e) << 11);
opcode |= (((rn & 0x1) << 7) | (rn & 0x1e) >> 15);
}
else
{
opcode |= (((rm & 0x10) << 1) | (rm & 0xf) << 0);
opcode |= (((rd & 0x10) << 18) | (rd & 0xf) << 12); opcode |= (((rd & 0x10) << 18) | (rd & 0xf) << 12);
opcode |= (((rn & 0x10) << 3) | (rn & 0xf) << 16); opcode |= (((rn & 0x10) << 3) | (rn & 0xf) << 16);
} opcode |= (((rm & 0x10) << 1) | (rm & 0xf) << 0);
opcode |= ((size & 3) << 8); V128 v0 = MakeVectorE0E1(d0, d1);
V128 v0 = MakeVectorE0E1(z, z); SingleOpcode(opcode, v0: v0);
V128 v1 = MakeVectorE0E1(a, z);
V128 v2 = MakeVectorE0E1(b, z);
SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
CompareAgainstUnicorn(); CompareAgainstUnicorn();
} }
[Test, Pairwise, Description("VFNMS.F<size> <Vd>, <Vn>, <Vm>")] [Test, Pairwise] [Explicit] // Fused.
public void Vfnms([Values(0u, 1u)] uint rd, public void Vfma_Vfms_V_F32([ValueSource(nameof(_Vfma_Vfms_V_F32_))] uint opcode,
[Values(0u, 1u, 2u, 3u)] uint rd,
[Values(0u, 1u, 2u, 3u)] uint rn,
[Values(0u, 1u, 2u, 3u)] uint rm,
[ValueSource(nameof(_2S_F_))] ulong d0,
[ValueSource(nameof(_2S_F_))] ulong d1,
[ValueSource(nameof(_2S_F_))] ulong d2,
[ValueSource(nameof(_2S_F_))] ulong d3,
[Values] bool q)
{
if (q)
{
opcode |= 1 << 6;
rd >>= 1; rd <<= 1;
rn >>= 1; rn <<= 1;
rm >>= 1; rm <<= 1;
}
opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18);
opcode |= ((rn & 0xf) << 16) | ((rn & 0x10) << 3);
opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1);
V128 v0 = MakeVectorE0E1(d0, d1);
V128 v1 = MakeVectorE0E1(d2, d3);
SingleOpcode(opcode, v0: v0, v1: v1);
CompareAgainstUnicorn();
}
[Test, Pairwise] [Explicit]
public void Vmla_Vmls_Vnmla_Vnmls_S_F32([ValueSource(nameof(_Vmla_Vmls_Vnmla_Vnmls_S_F32_))] uint opcode,
[Values(0u, 1u, 2u, 3u)] uint rd,
[Values(0u, 1u, 2u, 3u)] uint rn,
[Values(0u, 1u, 2u, 3u)] uint rm,
[ValueSource(nameof(_1S_F_))] ulong s0,
[ValueSource(nameof(_1S_F_))] ulong s1,
[ValueSource(nameof(_1S_F_))] ulong s2,
[ValueSource(nameof(_1S_F_))] ulong s3)
{
opcode |= (((rd & 0x1) << 22) | (rd & 0x1e) << 11);
opcode |= (((rn & 0x1) << 7) | (rn & 0x1e) << 15);
opcode |= (((rm & 0x1) << 5) | (rm & 0x1e) >> 1);
V128 v0 = MakeVectorE0E1E2E3((uint)s0, (uint)s1, (uint)s2, (uint)s3);
SingleOpcode(opcode, v0: v0);
CompareAgainstUnicorn();
}
[Test, Pairwise] [Explicit]
public void Vmla_Vmls_Vnmla_Vnmls_S_F64([ValueSource(nameof(_Vmla_Vmls_Vnmla_Vnmls_S_F64_))] uint opcode,
[Values(0u, 1u)] uint rd,
[Values(0u, 1u)] uint rn, [Values(0u, 1u)] uint rn,
[Values(0u, 1u)] uint rm, [Values(0u, 1u)] uint rm,
[Values(2u, 3u)] uint size, [ValueSource(nameof(_1D_F_))] ulong d0,
[ValueSource("_2S_F_")] ulong z, [ValueSource(nameof(_1D_F_))] ulong d1)
[ValueSource("_2S_F_")] ulong a,
[ValueSource("_2S_F_")] ulong b)
{ {
uint opcode = 0xee900a00;
if (size == 2)
{
opcode |= (((rm & 0x1) << 5) | (rm & 0x1e) >> 1);
opcode |= (((rd & 0x1) << 22) | (rd & 0x1e) << 11);
opcode |= (((rn & 0x1) << 7) | (rn & 0x1e) >> 15);
}
else
{
opcode |= (((rm & 0x10) << 1) | (rm & 0xf) << 0);
opcode |= (((rd & 0x10) << 18) | (rd & 0xf) << 12); opcode |= (((rd & 0x10) << 18) | (rd & 0xf) << 12);
opcode |= (((rn & 0x10) << 3) | (rn & 0xf) << 16); opcode |= (((rn & 0x10) << 3) | (rn & 0xf) << 16);
} opcode |= (((rm & 0x10) << 1) | (rm & 0xf) << 0);
opcode |= ((size & 3) << 8); V128 v0 = MakeVectorE0E1(d0, d1);
V128 v0 = MakeVectorE0E1(z, z); SingleOpcode(opcode, v0: v0);
V128 v1 = MakeVectorE0E1(a, z);
V128 v2 = MakeVectorE0E1(b, z);
SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
CompareAgainstUnicorn(); CompareAgainstUnicorn();
} }