using ChocolArm64.State; using ChocolArm64.Translation; using System; using System.Runtime.CompilerServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; namespace ChocolArm64.Instructions { static class VectorHelper { public static void EmitCall(ILEmitterCtx context, string name64, string name128) { bool isSimd64 = context.CurrOp.RegisterSize == RegisterSize.Simd64; context.EmitCall(typeof(VectorHelper), isSimd64 ? name64 : name128); } public static void EmitCall(ILEmitterCtx context, string mthdName) { context.EmitCall(typeof(VectorHelper), mthdName); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static int SatF32ToS32(float value) { if (float.IsNaN(value)) return 0; return value >= int.MaxValue ? int.MaxValue : value <= int.MinValue ? int.MinValue : (int)value; } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static long SatF32ToS64(float value) { if (float.IsNaN(value)) return 0; return value >= long.MaxValue ? long.MaxValue : value <= long.MinValue ? long.MinValue : (long)value; } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static uint SatF32ToU32(float value) { if (float.IsNaN(value)) return 0; return value >= uint.MaxValue ? uint.MaxValue : value <= uint.MinValue ? uint.MinValue : (uint)value; } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static ulong SatF32ToU64(float value) { if (float.IsNaN(value)) return 0; return value >= ulong.MaxValue ? ulong.MaxValue : value <= ulong.MinValue ? ulong.MinValue : (ulong)value; } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static int SatF64ToS32(double value) { if (double.IsNaN(value)) return 0; return value >= int.MaxValue ? int.MaxValue : value <= int.MinValue ? int.MinValue : (int)value; } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static long SatF64ToS64(double value) { if (double.IsNaN(value)) return 0; return value >= long.MaxValue ? long.MaxValue : value <= long.MinValue ? long.MinValue : (long)value; } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static uint SatF64ToU32(double value) { if (double.IsNaN(value)) return 0; return value >= uint.MaxValue ? uint.MaxValue : value <= uint.MinValue ? uint.MinValue : (uint)value; } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static ulong SatF64ToU64(double value) { if (double.IsNaN(value)) return 0; return value >= ulong.MaxValue ? ulong.MaxValue : value <= ulong.MinValue ? ulong.MinValue : (ulong)value; } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static double Round(double value, CpuThreadState state) { RoundMode roundMode = state.FPRoundingMode(); if (roundMode == RoundMode.ToNearest) { return Math.Round(value); // even } else if (roundMode == RoundMode.TowardsPlusInfinity) { return Math.Ceiling(value); } else if (roundMode == RoundMode.TowardsMinusInfinity) { return Math.Floor(value); } else /* if (roundMode == RoundMode.TowardsZero) */ { return Math.Truncate(value); } } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static float RoundF(float value, CpuThreadState state) { RoundMode roundMode = state.FPRoundingMode(); if (roundMode == RoundMode.ToNearest) { return MathF.Round(value); // even } else if (roundMode == RoundMode.TowardsPlusInfinity) { return MathF.Ceiling(value); } else if (roundMode == RoundMode.TowardsMinusInfinity) { return MathF.Floor(value); } else /* if (roundMode == RoundMode.TowardsZero) */ { return MathF.Truncate(value); } } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128 Sse41ScalarRound(Vector128 upper, Vector128 value, CpuThreadState state) { if (!Sse41.IsSupported) { throw new PlatformNotSupportedException(); } RoundMode roundMode = state.FPRoundingMode(); if (roundMode == RoundMode.ToNearest) { return Sse41.RoundToNearestIntegerScalar(upper, value); // even } else if (roundMode == RoundMode.TowardsPlusInfinity) { return Sse41.RoundToPositiveInfinityScalar(upper, value); } else if (roundMode == RoundMode.TowardsMinusInfinity) { return Sse41.RoundToNegativeInfinityScalar(upper, value); } else /* if (roundMode == RoundMode.TowardsZero) */ { return Sse41.RoundToZeroScalar(upper, value); } } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128 Sse41ScalarRoundF(Vector128 upper, Vector128 value, CpuThreadState state) { if (!Sse41.IsSupported) { throw new PlatformNotSupportedException(); } RoundMode roundMode = state.FPRoundingMode(); if (roundMode == RoundMode.ToNearest) { return Sse41.RoundToNearestIntegerScalar(upper, value); // even } else if (roundMode == RoundMode.TowardsPlusInfinity) { return Sse41.RoundToPositiveInfinityScalar(upper, value); } else if (roundMode == RoundMode.TowardsMinusInfinity) { return Sse41.RoundToNegativeInfinityScalar(upper, value); } else /* if (roundMode == RoundMode.TowardsZero) */ { return Sse41.RoundToZeroScalar(upper, value); } } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128 Sse41VectorRound(Vector128 value, CpuThreadState state) { if (!Sse41.IsSupported) { throw new PlatformNotSupportedException(); } RoundMode roundMode = state.FPRoundingMode(); if (roundMode == RoundMode.ToNearest) { return Sse41.RoundToNearestInteger(value); // even } else if (roundMode == RoundMode.TowardsPlusInfinity) { return Sse41.RoundToPositiveInfinity(value); } else if (roundMode == RoundMode.TowardsMinusInfinity) { return Sse41.RoundToNegativeInfinity(value); } else /* if (roundMode == RoundMode.TowardsZero) */ { return Sse41.RoundToZero(value); } } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128 Sse41VectorRoundF(Vector128 value, CpuThreadState state) { if (!Sse41.IsSupported) { throw new PlatformNotSupportedException(); } RoundMode roundMode = state.FPRoundingMode(); if (roundMode == RoundMode.ToNearest) { return Sse41.RoundToNearestInteger(value); // even } else if (roundMode == RoundMode.TowardsPlusInfinity) { return Sse41.RoundToPositiveInfinity(value); } else if (roundMode == RoundMode.TowardsMinusInfinity) { return Sse41.RoundToNegativeInfinity(value); } else /* if (roundMode == RoundMode.TowardsZero) */ { return Sse41.RoundToZero(value); } } public static Vector128 Tbl1_V64( Vector128 vector, Vector128 tb0) { return Tbl(vector, 8, tb0); } public static Vector128 Tbl1_V128( Vector128 vector, Vector128 tb0) { return Tbl(vector, 16, tb0); } public static Vector128 Tbl2_V64( Vector128 vector, Vector128 tb0, Vector128 tb1) { return Tbl(vector, 8, tb0, tb1); } public static Vector128 Tbl2_V128( Vector128 vector, Vector128 tb0, Vector128 tb1) { return Tbl(vector, 16, tb0, tb1); } public static Vector128 Tbl3_V64( Vector128 vector, Vector128 tb0, Vector128 tb1, Vector128 tb2) { return Tbl(vector, 8, tb0, tb1, tb2); } public static Vector128 Tbl3_V128( Vector128 vector, Vector128 tb0, Vector128 tb1, Vector128 tb2) { return Tbl(vector, 16, tb0, tb1, tb2); } public static Vector128 Tbl4_V64( Vector128 vector, Vector128 tb0, Vector128 tb1, Vector128 tb2, Vector128 tb3) { return Tbl(vector, 8, tb0, tb1, tb2, tb3); } public static Vector128 Tbl4_V128( Vector128 vector, Vector128 tb0, Vector128 tb1, Vector128 tb2, Vector128 tb3) { return Tbl(vector, 16, tb0, tb1, tb2, tb3); } private static Vector128 Tbl(Vector128 vector, int bytes, params Vector128[] tb) { Vector128 res = new Vector128(); byte[] table = new byte[tb.Length * 16]; for (byte index = 0; index < tb.Length; index++) for (byte index2 = 0; index2 < 16; index2++) { table[index * 16 + index2] = (byte)VectorExtractIntZx(tb[index], index2, 0); } for (byte index = 0; index < bytes; index++) { byte tblIdx = (byte)VectorExtractIntZx(vector, index, 0); if (tblIdx < table.Length) { res = VectorInsertInt(table[tblIdx], res, index, 0); } } return res; } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static double VectorExtractDouble(Vector128 vector, byte index) { if (Sse41.IsSupported) { return BitConverter.Int64BitsToDouble(Sse41.Extract(Sse.StaticCast(vector), index)); } else if (Sse2.IsSupported) { return BitConverter.Int64BitsToDouble((long)VectorExtractIntZx(vector, index, 3)); } throw new PlatformNotSupportedException(); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static long VectorExtractIntSx(Vector128 vector, byte index, int size) { if (Sse41.IsSupported) { if (size == 0) { return (sbyte)Sse41.Extract(Sse.StaticCast(vector), index); } else if (size == 1) { return (short)Sse2.Extract(Sse.StaticCast(vector), index); } else if (size == 2) { return Sse41.Extract(Sse.StaticCast(vector), index); } else if (size == 3) { return Sse41.Extract(Sse.StaticCast(vector), index); } else { throw new ArgumentOutOfRangeException(nameof(size)); } } else if (Sse2.IsSupported) { if (size == 0) { return (sbyte)VectorExtractIntZx(vector, index, size); } else if (size == 1) { return (short)VectorExtractIntZx(vector, index, size); } else if (size == 2) { return (int)VectorExtractIntZx(vector, index, size); } else if (size == 3) { return (long)VectorExtractIntZx(vector, index, size); } else { throw new ArgumentOutOfRangeException(nameof(size)); } } throw new PlatformNotSupportedException(); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static ulong VectorExtractIntZx(Vector128 vector, byte index, int size) { if (Sse41.IsSupported) { if (size == 0) { return Sse41.Extract(Sse.StaticCast(vector), index); } else if (size == 1) { return Sse2.Extract(Sse.StaticCast(vector), index); } else if (size == 2) { return Sse41.Extract(Sse.StaticCast(vector), index); } else if (size == 3) { return Sse41.Extract(Sse.StaticCast(vector), index); } else { throw new ArgumentOutOfRangeException(nameof(size)); } } else if (Sse2.IsSupported) { int shortIdx = size == 0 ? index >> 1 : index << (size - 1); ushort value = Sse2.Extract(Sse.StaticCast(vector), (byte)shortIdx); if (size == 0) { return (byte)(value >> (index & 1) * 8); } else if (size == 1) { return value; } else if (size == 2 || size == 3) { ushort value1 = Sse2.Extract(Sse.StaticCast(vector), (byte)(shortIdx + 1)); if (size == 2) { return (uint)(value | (value1 << 16)); } ushort value2 = Sse2.Extract(Sse.StaticCast(vector), (byte)(shortIdx + 2)); ushort value3 = Sse2.Extract(Sse.StaticCast(vector), (byte)(shortIdx + 3)); return ((ulong)value << 0) | ((ulong)value1 << 16) | ((ulong)value2 << 32) | ((ulong)value3 << 48); } else { throw new ArgumentOutOfRangeException(nameof(size)); } } throw new PlatformNotSupportedException(); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static float VectorExtractSingle(Vector128 vector, byte index) { if (Sse41.IsSupported) { return Sse41.Extract(vector, index); } else if (Sse2.IsSupported) { Vector128 shortVector = Sse.StaticCast(vector); int low = Sse2.Extract(shortVector, (byte)(index * 2 + 0)); int high = Sse2.Extract(shortVector, (byte)(index * 2 + 1)); return BitConverter.Int32BitsToSingle(low | (high << 16)); } throw new PlatformNotSupportedException(); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128 VectorInsertDouble(double value, Vector128 vector, byte index) { return VectorInsertInt((ulong)BitConverter.DoubleToInt64Bits(value), vector, index, 3); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128 VectorInsertInt(ulong value, Vector128 vector, byte index, int size) { if (Sse41.IsSupported) { if (size == 0) { return Sse.StaticCast(Sse41.Insert(Sse.StaticCast(vector), (byte)value, index)); } else if (size == 1) { return Sse.StaticCast(Sse2.Insert(Sse.StaticCast(vector), (ushort)value, index)); } else if (size == 2) { return Sse.StaticCast(Sse41.Insert(Sse.StaticCast(vector), (uint)value, index)); } else if (size == 3) { return Sse.StaticCast(Sse41.Insert(Sse.StaticCast(vector), value, index)); } else { throw new ArgumentOutOfRangeException(nameof(size)); } } else if (Sse2.IsSupported) { Vector128 shortVector = Sse.StaticCast(vector); int shortIdx = size == 0 ? index >> 1 : index << (size - 1); if (size == 0) { ushort shortVal = Sse2.Extract(Sse.StaticCast(vector), (byte)shortIdx); int shift = (index & 1) * 8; shortVal &= (ushort)(0xff00 >> shift); shortVal |= (ushort)((byte)value << shift); return Sse.StaticCast(Sse2.Insert(shortVector, shortVal, (byte)shortIdx)); } else if (size == 1) { return Sse.StaticCast(Sse2.Insert(Sse.StaticCast(vector), (ushort)value, index)); } else if (size == 2 || size == 3) { shortVector = Sse2.Insert(shortVector, (ushort)(value >> 0), (byte)(shortIdx + 0)); shortVector = Sse2.Insert(shortVector, (ushort)(value >> 16), (byte)(shortIdx + 1)); if (size == 3) { shortVector = Sse2.Insert(shortVector, (ushort)(value >> 32), (byte)(shortIdx + 2)); shortVector = Sse2.Insert(shortVector, (ushort)(value >> 48), (byte)(shortIdx + 3)); } return Sse.StaticCast(shortVector); } else { throw new ArgumentOutOfRangeException(nameof(size)); } } throw new PlatformNotSupportedException(); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128 VectorInsertSingle(float value, Vector128 vector, byte index) { if (Sse41.IsSupported) { // Note: The if/else if is necessary to enable the JIT to // produce a single INSERTPS instruction instead of the // jump table fallback. if (index == 0) { return Sse41.Insert(vector, value, 0x00); } else if (index == 1) { return Sse41.Insert(vector, value, 0x10); } else if (index == 2) { return Sse41.Insert(vector, value, 0x20); } else if (index == 3) { return Sse41.Insert(vector, value, 0x30); } else { throw new ArgumentOutOfRangeException(nameof(index)); } } else if (Sse2.IsSupported) { int intValue = BitConverter.SingleToInt32Bits(value); ushort low = (ushort)(intValue >> 0); ushort high = (ushort)(intValue >> 16); Vector128 shortVector = Sse.StaticCast(vector); shortVector = Sse2.Insert(shortVector, low, (byte)(index * 2 + 0)); shortVector = Sse2.Insert(shortVector, high, (byte)(index * 2 + 1)); return Sse.StaticCast(shortVector); } throw new PlatformNotSupportedException(); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128 Sse41VectorInsertScalarSingle(float value, Vector128 vector) { // Note: 0b1110 is the mask to zero the upper bits. return Sse41.Insert(vector, value, 0b1110); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128 VectorSingleZero() { if (Sse.IsSupported) { return Sse.SetZeroVector128(); } throw new PlatformNotSupportedException(); } } }