From 464ec7ced8bd8dc9ea8e4021cf602e6caedfffcf Mon Sep 17 00:00:00 2001 From: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> Date: Mon, 25 Mar 2019 00:23:27 +0100 Subject: [PATCH] Add Cmeq_V, Cmge_V, Cmgt_V, Cmle_V & Cmlt_V (Z & ~Z) Sse opt.. (#646) * Follow-up (Neg_V). * Follow-up (Not_V & Orn_V). * Add Cmeq/ge/gt/le/lt_V (Z & ~Z) Sse opt.. * Add EmitLd/Stvectmp2/3. * Remove Dup (EmitVectorPairwiseSseOrSse2OpF). * Remove Dup (EmitFcmpOrFcmpe). * Add S/Uabd/l_V Sse opt.. Remove Dup (Srhadd_V). * Nit. --- .../Instructions/InstEmitSimdArithmetic.cs | 279 ++++++++++++++++-- ChocolArm64/Instructions/InstEmitSimdCmp.cs | 165 +++++++++-- .../Instructions/InstEmitSimdHelper.cs | 2 +- .../Instructions/InstEmitSimdLogical.cs | 12 +- 4 files changed, 403 insertions(+), 55 deletions(-) diff --git a/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs b/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs index 5ceea7749..d2d87beff 100644 --- a/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs +++ b/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs @@ -1863,13 +1863,7 @@ namespace ChocolArm64.Instructions Type[] typesSub = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] }; - string[] namesSzv = new string[] { nameof(VectorHelper.VectorSByteZero), - nameof(VectorHelper.VectorInt16Zero), - nameof(VectorHelper.VectorInt32Zero), - nameof(VectorHelper.VectorInt64Zero) }; - - VectorHelper.EmitCall(context, namesSzv[op.Size]); - + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); context.EmitLdvec(op.Rn); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSub)); @@ -1921,20 +1915,125 @@ namespace ChocolArm64.Instructions public static void Sabd_V(ILEmitterCtx context) { - EmitVectorBinaryOpSx(context, () => + if (Optimizations.UseSse2) { - context.Emit(OpCodes.Sub); - EmitAbs(context); - }); + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + Type[] typesCmpSub = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] }; + Type[] typesAndOr = new Type[] { typeof(Vector128), typeof(Vector128) }; + + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareGreaterThan), typesCmpSub)); + + context.EmitStvectmp(); // Cmp mask + context.EmitLdvectmp(); // Cmp mask + + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesCmpSub)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), typesAndOr)); + + context.EmitLdvectmp(); // Cmp mask + + context.EmitLdvec(op.Rm); + context.EmitLdvec(op.Rn); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesCmpSub)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndOr)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesAndOr)); + + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitVectorBinaryOpSx(context, () => + { + context.Emit(OpCodes.Sub); + EmitAbs(context); + }); + } } public static void Sabdl_V(ILEmitterCtx context) { - EmitVectorWidenRnRmBinaryOpSx(context, () => + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + if (Optimizations.UseSse41 && op.Size < 2) { - context.Emit(OpCodes.Sub); - EmitAbs(context); - }); + Type[] typesCmpSub = new Type[] { VectorIntTypesPerSizeLog2[op.Size + 1], + VectorIntTypesPerSizeLog2[op.Size + 1] }; + Type[] typesSrl = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) }; + Type[] typesAndOr = new Type[] { typeof(Vector128), typeof(Vector128) }; + Type[] typesCvt = new Type[] { VectorIntTypesPerSizeLog2[op.Size] }; + + string nameCvt = op.Size == 0 + ? nameof(Sse41.ConvertToVector128Int16) + : nameof(Sse41.ConvertToVector128Int32); + + context.EmitLdvec(op.Rn); + + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + } + + context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); + + context.EmitLdvec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + } + + context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); + + context.EmitStvectmp2(); // Long Rm + context.EmitStvectmp(); // Long Rn + + context.EmitLdvectmp(); // Long Rn + context.EmitLdvectmp2(); // Long Rm + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareGreaterThan), typesCmpSub)); + + context.EmitStvectmp3(); // Cmp mask + context.EmitLdvectmp3(); // Cmp mask + + context.EmitLdvectmp(); // Long Rn + context.EmitLdvectmp2(); // Long Rm + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesCmpSub)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), typesAndOr)); + + context.EmitLdvectmp3(); // Cmp mask + + context.EmitLdvectmp2(); // Long Rm + context.EmitLdvectmp(); // Long Rn + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesCmpSub)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndOr)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesAndOr)); + + context.EmitStvec(op.Rd); + } + else + { + EmitVectorWidenRnRmBinaryOpSx(context, () => + { + context.Emit(OpCodes.Sub); + EmitAbs(context); + }); + } } public static void Sadalp_V(ILEmitterCtx context) @@ -2430,8 +2529,8 @@ namespace ChocolArm64.Instructions context.EmitLdc_I4(op.Size == 0 ? sbyte.MinValue : short.MinValue); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); - context.Emit(OpCodes.Dup); context.EmitStvectmp(); + context.EmitLdvectmp(); context.EmitLdvec(op.Rn); context.EmitLdvectmp(); @@ -2604,20 +2703,152 @@ namespace ChocolArm64.Instructions public static void Uabd_V(ILEmitterCtx context) { - EmitVectorBinaryOpZx(context, () => + if (Optimizations.UseSse41) { - context.Emit(OpCodes.Sub); - EmitAbs(context); - }); + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + Type[] typesMax = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] }; + Type[] typesCmpSub = new Type[] { VectorIntTypesPerSizeLog2 [op.Size], VectorIntTypesPerSizeLog2 [op.Size] }; + Type[] typesAndOr = new Type[] { typeof(Vector128), typeof(Vector128) }; + Type[] typesSav = new Type[] { typeof(long) }; + + Type typeSse = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); + + context.EmitLdvec(op.Rm); + context.EmitLdvec(op.Rn); + + context.EmitCall(typeSse.GetMethod(nameof(Sse2.Max), typesMax)); + + context.EmitLdvec(op.Rm); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareEqual), typesCmpSub)); + + context.EmitLdc_I8(-1L); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndOr)); + + context.EmitStvectmp(); // Cmp mask + context.EmitLdvectmp(); // Cmp mask + + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesCmpSub)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), typesAndOr)); + + context.EmitLdvectmp(); // Cmp mask + + context.EmitLdvec(op.Rm); + context.EmitLdvec(op.Rn); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesCmpSub)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndOr)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesAndOr)); + + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitVectorBinaryOpZx(context, () => + { + context.Emit(OpCodes.Sub); + EmitAbs(context); + }); + } } public static void Uabdl_V(ILEmitterCtx context) { - EmitVectorWidenRnRmBinaryOpZx(context, () => + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + if (Optimizations.UseSse41 && op.Size < 2) { - context.Emit(OpCodes.Sub); - EmitAbs(context); - }); + Type[] typesMax = new Type[] { VectorUIntTypesPerSizeLog2[op.Size + 1], + VectorUIntTypesPerSizeLog2[op.Size + 1] }; + Type[] typesCmpSub = new Type[] { VectorIntTypesPerSizeLog2 [op.Size + 1], + VectorIntTypesPerSizeLog2 [op.Size + 1] }; + Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) }; + Type[] typesAndOr = new Type[] { typeof(Vector128), typeof(Vector128) }; + Type[] typesCvt = new Type[] { VectorUIntTypesPerSizeLog2[op.Size] }; + Type[] typesSav = new Type[] { typeof(long) }; + + string nameCvt = op.Size == 0 + ? nameof(Sse41.ConvertToVector128Int16) + : nameof(Sse41.ConvertToVector128Int32); + + context.EmitLdvec(op.Rn); + + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + } + + context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); + + context.EmitLdvec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + } + + context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); + + context.EmitStvectmp2(); // Long Rm + context.EmitStvectmp(); // Long Rn + + context.EmitLdvectmp2(); // Long Rm + context.EmitLdvectmp(); // Long Rn + + context.EmitCall(typeof(Sse41).GetMethod(nameof(Sse41.Max), typesMax)); + + context.EmitLdvectmp2(); // Long Rm + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareEqual), typesCmpSub)); + + context.EmitLdc_I8(-1L); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndOr)); + + context.EmitStvectmp3(); // Cmp mask + context.EmitLdvectmp3(); // Cmp mask + + context.EmitLdvectmp(); // Long Rn + context.EmitLdvectmp2(); // Long Rm + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesCmpSub)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), typesAndOr)); + + context.EmitLdvectmp3(); // Cmp mask + + context.EmitLdvectmp2(); // Long Rm + context.EmitLdvectmp(); // Long Rn + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesCmpSub)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndOr)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesAndOr)); + + context.EmitStvec(op.Rd); + } + else + { + EmitVectorWidenRnRmBinaryOpZx(context, () => + { + context.Emit(OpCodes.Sub); + EmitAbs(context); + }); + } } public static void Uadalp_V(ILEmitterCtx context) diff --git a/ChocolArm64/Instructions/InstEmitSimdCmp.cs b/ChocolArm64/Instructions/InstEmitSimdCmp.cs index 62cf77209..e6b33f797 100644 --- a/ChocolArm64/Instructions/InstEmitSimdCmp.cs +++ b/ChocolArm64/Instructions/InstEmitSimdCmp.cs @@ -20,19 +20,32 @@ namespace ChocolArm64.Instructions public static void Cmeq_V(ILEmitterCtx context) { - if (context.CurrOp is OpCodeSimdReg64 op) + if (Optimizations.UseSse41) { - if (op.Size < 3 && Optimizations.UseSse2) + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + Type[] typesCmp = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] }; + + Type typeSse = op.Size != 3 ? typeof(Sse2) : typeof(Sse41); + + context.EmitLdvec(op.Rn); + + if (op is OpCodeSimdReg64 binOp) { - EmitSse2Op(context, nameof(Sse2.CompareEqual)); - } - else if (op.Size == 3 && Optimizations.UseSse41) - { - EmitSse41Op(context, nameof(Sse41.CompareEqual)); + context.EmitLdvec(binOp.Rm); } else { - EmitCmpOp(context, OpCodes.Beq_S, scalar: false); + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); + } + + context.EmitCall(typeSse.GetMethod(nameof(Sse2.CompareEqual), typesCmp)); + + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); } } else @@ -48,7 +61,45 @@ namespace ChocolArm64.Instructions public static void Cmge_V(ILEmitterCtx context) { - EmitCmpOp(context, OpCodes.Bge_S, scalar: false); + if (Optimizations.UseSse42) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + Type[] typesCmp = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] }; + Type[] typesAnt = new Type[] { typeof(Vector128), typeof(Vector128) }; + Type[] typesSav = new Type[] { typeof(long) }; + + Type typeSse = op.Size != 3 ? typeof(Sse2) : typeof(Sse42); + + if (op is OpCodeSimdReg64 binOp) + { + context.EmitLdvec(binOp.Rm); + } + else + { + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); + } + + context.EmitLdvec(op.Rn); + + context.EmitCall(typeSse.GetMethod(nameof(Sse2.CompareGreaterThan), typesCmp)); + + context.EmitLdc_I8(-1L); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAnt)); + + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitCmpOp(context, OpCodes.Bge_S, scalar: false); + } } public static void Cmgt_S(ILEmitterCtx context) @@ -58,19 +109,32 @@ namespace ChocolArm64.Instructions public static void Cmgt_V(ILEmitterCtx context) { - if (context.CurrOp is OpCodeSimdReg64 op) + if (Optimizations.UseSse42) { - if (op.Size < 3 && Optimizations.UseSse2) + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + Type[] typesCmp = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] }; + + Type typeSse = op.Size != 3 ? typeof(Sse2) : typeof(Sse42); + + context.EmitLdvec(op.Rn); + + if (op is OpCodeSimdReg64 binOp) { - EmitSse2Op(context, nameof(Sse2.CompareGreaterThan)); - } - else if (op.Size == 3 && Optimizations.UseSse42) - { - EmitSse42Op(context, nameof(Sse42.CompareGreaterThan)); + context.EmitLdvec(binOp.Rm); } else { - EmitCmpOp(context, OpCodes.Bgt_S, scalar: false); + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); + } + + context.EmitCall(typeSse.GetMethod(nameof(Sse2.CompareGreaterThan), typesCmp)); + + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); } } else @@ -92,8 +156,8 @@ namespace ChocolArm64.Instructions { Type[] typesMax = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] }; Type[] typesCmp = new Type[] { VectorIntTypesPerSizeLog2 [op.Size], VectorIntTypesPerSizeLog2 [op.Size] }; - Type[] typesAnt = new Type[] { typeof(Vector128), typeof(Vector128) }; - Type[] typesSav = new Type[] { typeof(byte) }; + Type[] typesAnt = new Type[] { typeof(Vector128), typeof(Vector128) }; + Type[] typesSav = new Type[] { typeof(long) }; Type typeSse = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); @@ -106,7 +170,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareEqual), typesCmp)); - context.EmitLdc_I4(byte.MaxValue); + context.EmitLdc_I8(-1L); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAnt)); @@ -169,7 +233,37 @@ namespace ChocolArm64.Instructions public static void Cmle_V(ILEmitterCtx context) { - EmitCmpOp(context, OpCodes.Ble_S, scalar: false); + if (Optimizations.UseSse42) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + Type[] typesCmp = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] }; + Type[] typesAnt = new Type[] { typeof(Vector128), typeof(Vector128) }; + Type[] typesSav = new Type[] { typeof(long) }; + + Type typeSse = op.Size != 3 ? typeof(Sse2) : typeof(Sse42); + + context.EmitLdvec(op.Rn); + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); + + context.EmitCall(typeSse.GetMethod(nameof(Sse2.CompareGreaterThan), typesCmp)); + + context.EmitLdc_I8(-1L); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAnt)); + + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitCmpOp(context, OpCodes.Ble_S, scalar: false); + } } public static void Cmlt_S(ILEmitterCtx context) @@ -179,7 +273,30 @@ namespace ChocolArm64.Instructions public static void Cmlt_V(ILEmitterCtx context) { - EmitCmpOp(context, OpCodes.Blt_S, scalar: false); + if (Optimizations.UseSse42) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + Type[] typesCmp = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] }; + + Type typeSse = op.Size != 3 ? typeof(Sse2) : typeof(Sse42); + + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); + context.EmitLdvec(op.Rn); + + context.EmitCall(typeSse.GetMethod(nameof(Sse2.CompareGreaterThan), typesCmp)); + + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitCmpOp(context, OpCodes.Blt_S, scalar: false); + } } public static void Cmtst_S(ILEmitterCtx context) @@ -390,8 +507,8 @@ namespace ChocolArm64.Instructions context.EmitLdvec(op.Rm); } - context.Emit(OpCodes.Dup); context.EmitStvectmp(); + context.EmitLdvectmp(); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.CompareOrderedScalar), typesCmp)); VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); @@ -453,8 +570,8 @@ namespace ChocolArm64.Instructions context.EmitLdvec(op.Rm); } - context.Emit(OpCodes.Dup); context.EmitStvectmp(); + context.EmitLdvectmp(); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareOrderedScalar), typesCmp)); VectorHelper.EmitCall(context, nameof(VectorHelper.VectorDoubleZero)); diff --git a/ChocolArm64/Instructions/InstEmitSimdHelper.cs b/ChocolArm64/Instructions/InstEmitSimdHelper.cs index 10b86a3e1..56ef1fdca 100644 --- a/ChocolArm64/Instructions/InstEmitSimdHelper.cs +++ b/ChocolArm64/Instructions/InstEmitSimdHelper.cs @@ -872,8 +872,8 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.UnpackLow), types)); - context.Emit(OpCodes.Dup); context.EmitStvectmp(); + context.EmitLdvectmp(); VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); diff --git a/ChocolArm64/Instructions/InstEmitSimdLogical.cs b/ChocolArm64/Instructions/InstEmitSimdLogical.cs index bf80bada3..a5a922741 100644 --- a/ChocolArm64/Instructions/InstEmitSimdLogical.cs +++ b/ChocolArm64/Instructions/InstEmitSimdLogical.cs @@ -193,12 +193,12 @@ namespace ChocolArm64.Instructions { OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; - Type[] typesSav = new Type[] { typeof(byte) }; - Type[] typesAnt = new Type[] { typeof(Vector128), typeof(Vector128) }; + Type[] typesSav = new Type[] { typeof(long) }; + Type[] typesAnt = new Type[] { typeof(Vector128), typeof(Vector128) }; context.EmitLdvec(op.Rn); - context.EmitLdc_I4(byte.MaxValue); + context.EmitLdc_I8(-1L); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAnt)); @@ -222,13 +222,13 @@ namespace ChocolArm64.Instructions { OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; - Type[] typesSav = new Type[] { typeof(byte) }; - Type[] typesAntOr = new Type[] { typeof(Vector128), typeof(Vector128) }; + Type[] typesSav = new Type[] { typeof(long) }; + Type[] typesAntOr = new Type[] { typeof(Vector128), typeof(Vector128) }; context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rm); - context.EmitLdc_I4(byte.MaxValue); + context.EmitLdc_I8(-1L); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAntOr));