diff --git a/ChocolArm64/AOpCodeTable.cs b/ChocolArm64/AOpCodeTable.cs index 0e979aa44..c69de38f5 100644 --- a/ChocolArm64/AOpCodeTable.cs +++ b/ChocolArm64/AOpCodeTable.cs @@ -387,6 +387,7 @@ namespace ChocolArm64 SetA64("0100111101xxxxxx000001xxxxxxxxxx", AInstEmit.Sshr_V, typeof(AOpCodeSimdShImm)); SetA64("0x00111100>>>xxx000101xxxxxxxxxx", AInstEmit.Ssra_V, typeof(AOpCodeSimdShImm)); SetA64("0100111101xxxxxx000101xxxxxxxxxx", AInstEmit.Ssra_V, typeof(AOpCodeSimdShImm)); + SetA64("0x001110<<1xxxxx001100xxxxxxxxxx", AInstEmit.Ssubw_V, typeof(AOpCodeSimdReg)); SetA64("0x00110000000000xxxxxxxxxxxxxxxx", AInstEmit.St__Vms, typeof(AOpCodeSimdMemMs)); SetA64("0x001100100xxxxxxxxxxxxxxxxxxxxx", AInstEmit.St__Vms, typeof(AOpCodeSimdMemMs)); SetA64("0x00110100x00000xxxxxxxxxxxxxxxx", AInstEmit.St__Vss, typeof(AOpCodeSimdMemSs)); @@ -430,6 +431,7 @@ namespace ChocolArm64 SetA64("0110111101xxxxxx000001xxxxxxxxxx", AInstEmit.Ushr_V, typeof(AOpCodeSimdShImm)); SetA64("0x10111100>>>xxx000101xxxxxxxxxx", AInstEmit.Usra_V, typeof(AOpCodeSimdShImm)); SetA64("0110111101xxxxxx000101xxxxxxxxxx", AInstEmit.Usra_V, typeof(AOpCodeSimdShImm)); + SetA64("0x101110<<1xxxxx001100xxxxxxxxxx", AInstEmit.Usubw_V, typeof(AOpCodeSimdReg)); SetA64("0>001110<<0xxxxx000110xxxxxxxxxx", AInstEmit.Uzp1_V, typeof(AOpCodeSimdReg)); SetA64("0>001110<<0xxxxx010110xxxxxxxxxx", AInstEmit.Uzp2_V, typeof(AOpCodeSimdReg)); SetA64("0x001110<<100001001010xxxxxxxxxx", AInstEmit.Xtn_V, typeof(AOpCodeSimd)); diff --git a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs index 36bb1cbf1..8e7418611 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs @@ -1072,6 +1072,11 @@ namespace ChocolArm64.Instruction EmitVectorSaturatingNarrowOpSxZx(Context, () => { }); } + public static void Ssubw_V(AILEmitterCtx Context) + { + EmitVectorWidenRmBinaryOpSx(Context, () => Context.Emit(OpCodes.Sub)); + } + public static void Sub_S(AILEmitterCtx Context) { EmitScalarBinaryOpZx(Context, () => Context.Emit(OpCodes.Sub)); @@ -1225,5 +1230,10 @@ namespace ChocolArm64.Instruction { EmitVectorSaturatingNarrowOpZxZx(Context, () => { }); } + + public static void Usubw_V(AILEmitterCtx Context) + { + EmitVectorWidenRmBinaryOpZx(Context, () => Context.Emit(OpCodes.Sub)); + } } } diff --git a/ChocolArm64/Instruction/AInstEmitSimdCmp.cs b/ChocolArm64/Instruction/AInstEmitSimdCmp.cs index 773d98944..c2d47747e 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdCmp.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdCmp.cs @@ -364,7 +364,7 @@ namespace ChocolArm64.Instruction AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; int Bytes = Op.GetBitsCount() >> 3; - int Elems = (!Scalar ? Bytes >> Op.Size : 1); + int Elems = !Scalar ? Bytes >> Op.Size : 1; ulong SzMask = ulong.MaxValue >> (64 - (8 << Op.Size)); @@ -408,7 +408,7 @@ namespace ChocolArm64.Instruction AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; int Bytes = Op.GetBitsCount() >> 3; - int Elems = (!Scalar ? Bytes >> Op.Size : 1); + int Elems = !Scalar ? Bytes >> Op.Size : 1; ulong SzMask = ulong.MaxValue >> (64 - (8 << Op.Size)); @@ -522,4 +522,4 @@ namespace ChocolArm64.Instruction Context.MarkLabel(LblEnd); } } -} \ No newline at end of file +} diff --git a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs index 7716e2987..0bd1a6292 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs @@ -419,20 +419,25 @@ namespace ChocolArm64.Instruction int SizeF = Op.Size & 1; int Bytes = Op.GetBitsCount() >> 3; + int Elems = Bytes >> SizeF + 2; - for (int Index = 0; Index < (Bytes >> SizeF + 2); Index++) + bool Rd = (Opers & OperFlags.Rd) != 0; + bool Rn = (Opers & OperFlags.Rn) != 0; + bool Rm = (Opers & OperFlags.Rm) != 0; + + for (int Index = 0; Index < Elems; Index++) { - if (Opers.HasFlag(OperFlags.Rd)) + if (Rd) { EmitVectorExtractF(Context, Op.Rd, Index, SizeF); } - if (Opers.HasFlag(OperFlags.Rn)) + if (Rn) { EmitVectorExtractF(Context, Op.Rn, Index, SizeF); } - if (Opers.HasFlag(OperFlags.Rm)) + if (Rm) { EmitVectorExtractF(Context, ((AOpCodeSimdReg)Op).Rm, Index, SizeF); } @@ -469,8 +474,9 @@ namespace ChocolArm64.Instruction int SizeF = Op.Size & 1; int Bytes = Op.GetBitsCount() >> 3; + int Elems = Bytes >> SizeF + 2; - for (int Index = 0; Index < (Bytes >> SizeF + 2); Index++) + for (int Index = 0; Index < Elems; Index++) { if (Ternary) { @@ -531,19 +537,23 @@ namespace ChocolArm64.Instruction int Bytes = Op.GetBitsCount() >> 3; int Elems = Bytes >> Op.Size; + bool Rd = (Opers & OperFlags.Rd) != 0; + bool Rn = (Opers & OperFlags.Rn) != 0; + bool Rm = (Opers & OperFlags.Rm) != 0; + for (int Index = 0; Index < Elems; Index++) { - if (Opers.HasFlag(OperFlags.Rd)) + if (Rd) { EmitVectorExtract(Context, Op.Rd, Index, Op.Size, Signed); } - if (Opers.HasFlag(OperFlags.Rn)) + if (Rn) { EmitVectorExtract(Context, Op.Rn, Index, Op.Size, Signed); } - if (Opers.HasFlag(OperFlags.Rm)) + if (Rm) { EmitVectorExtract(Context, ((AOpCodeSimdReg)Op).Rm, Index, Op.Size, Signed); } @@ -662,9 +672,6 @@ namespace ChocolArm64.Instruction { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - Context.EmitLdvec(Op.Rd); - Context.EmitStvectmp(); - int Elems = 8 >> Op.Size; int Part = Op.RegisterSize == ARegisterSize.SIMD128 ? Elems : 0; @@ -707,9 +714,6 @@ namespace ChocolArm64.Instruction { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - Context.EmitLdvec(Op.Rd); - Context.EmitStvectmp(); - int Elems = 8 >> Op.Size; int Part = Op.RegisterSize == ARegisterSize.SIMD128 ? Elems : 0; @@ -747,21 +751,25 @@ namespace ChocolArm64.Instruction { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - int Bytes = Op.GetBitsCount() >> 3; + int Words = Op.GetBitsCount() >> 4; + int Pairs = Words >> Op.Size; - int Elems = Bytes >> Op.Size; - int Half = Elems >> 1; - - for (int Index = 0; Index < Elems; Index++) + for (int Index = 0; Index < Pairs; Index++) { - int Elem = (Index & (Half - 1)) << 1; + int Idx = Index << 1; - EmitVectorExtract(Context, Index < Half ? Op.Rn : Op.Rm, Elem + 0, Op.Size, Signed); - EmitVectorExtract(Context, Index < Half ? Op.Rn : Op.Rm, Elem + 1, Op.Size, Signed); + EmitVectorExtract(Context, Op.Rn, Idx, Op.Size, Signed); + EmitVectorExtract(Context, Op.Rn, Idx + 1, Op.Size, Signed); Emit(); - EmitVectorInsertTmp(Context, Index, Op.Size); + EmitVectorExtract(Context, Op.Rm, Idx, Op.Size, Signed); + EmitVectorExtract(Context, Op.Rm, Idx + 1, Op.Size, Signed); + + Emit(); + + EmitVectorInsertTmp(Context, Pairs + Index, Op.Size); + EmitVectorInsertTmp(Context, Index, Op.Size); } Context.EmitLdvectmp(); @@ -818,7 +826,7 @@ namespace ChocolArm64.Instruction int Part = !Scalar && (Op.RegisterSize == ARegisterSize.SIMD128) ? Elems : 0; - long TMaxValue = SignedDst ? (1 << (ESize - 1)) - 1 : (1L << ESize) - 1L; + long TMaxValue = SignedDst ? (1 << (ESize - 1)) - 1 : (long)(~0UL >> (64 - ESize)); long TMinValue = SignedDst ? -((1 << (ESize - 1))) : 0; Context.EmitLdc_I8(0L); @@ -871,7 +879,7 @@ namespace ChocolArm64.Instruction if (Scalar) { - EmitVectorZeroLower(Context, Op.Rd); + EmitVectorZeroLowerTmp(Context); } EmitVectorInsertTmp(Context, Part + Index, Op.Size); @@ -963,6 +971,11 @@ namespace ChocolArm64.Instruction EmitVectorInsert(Context, Rd, 0, 3, 0); } + public static void EmitVectorZeroLowerTmp(AILEmitterCtx Context) + { + EmitVectorInsertTmp(Context, 0, 3, 0); + } + public static void EmitVectorZeroUpper(AILEmitterCtx Context, int Rd) { EmitVectorInsert(Context, Rd, 1, 3, 0); @@ -1008,6 +1021,20 @@ namespace ChocolArm64.Instruction Context.EmitStvec(Reg); } + public static void EmitVectorInsertTmp(AILEmitterCtx Context, int Index, int Size, long Value) + { + ThrowIfInvalid(Index, Size); + + Context.EmitLdc_I8(Value); + Context.EmitLdvectmp(); + Context.EmitLdc_I4(Index); + Context.EmitLdc_I4(Size); + + AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInsertInt)); + + Context.EmitStvectmp(); + } + public static void EmitVectorInsertF(AILEmitterCtx Context, int Reg, int Index, int Size) { ThrowIfInvalidF(Index, Size); diff --git a/ChocolArm64/Instruction/AInstEmitSimdMove.cs b/ChocolArm64/Instruction/AInstEmitSimdMove.cs index 592cab733..3bf1e4635 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdMove.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdMove.cs @@ -295,13 +295,22 @@ namespace ChocolArm64.Instruction int Part = Op.RegisterSize == ARegisterSize.SIMD128 ? Elems : 0; + if (Part != 0) + { + Context.EmitLdvec(Op.Rd); + Context.EmitStvectmp(); + } + for (int Index = 0; Index < Elems; Index++) { EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size + 1); - EmitVectorInsert(Context, Op.Rd, Part + Index, Op.Size); + EmitVectorInsertTmp(Context, Part + Index, Op.Size); } + Context.EmitLdvectmp(); + Context.EmitStvec(Op.Rd); + if (Part == 0) { EmitVectorZeroUpper(Context, Op.Rd); @@ -342,7 +351,7 @@ namespace ChocolArm64.Instruction EmitVectorExtractZx(Context, Op.Rm, Idx + Part, Op.Size); EmitVectorInsertTmp(Context, Idx + 1, Op.Size); - EmitVectorInsertTmp(Context, Idx , Op.Size); + EmitVectorInsertTmp(Context, Idx, Op.Size); } Context.EmitLdvectmp(); @@ -398,7 +407,7 @@ namespace ChocolArm64.Instruction EmitVectorExtractZx(Context, Op.Rm, Base + Index, Op.Size); EmitVectorInsertTmp(Context, Idx + 1, Op.Size); - EmitVectorInsertTmp(Context, Idx , Op.Size); + EmitVectorInsertTmp(Context, Idx, Op.Size); } Context.EmitLdvectmp(); diff --git a/Ryujinx.Tests/Cpu/CpuTestSimd.cs b/Ryujinx.Tests/Cpu/CpuTestSimd.cs index b84d29575..82591edae 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimd.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimd.cs @@ -1377,6 +1377,60 @@ namespace Ryujinx.Tests.Cpu }); Assert.That(((ThreadState.Fpsr >> 27) & 1) != 0, Is.EqualTo(Shared.FPSR[27])); } + + [Test, Description("XTN{2} ., .")] + public void Xtn_V_8H8B_4S4H_2D2S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <8H8B, 4S4H, 2D2S> + { + uint Opcode = 0x0E212800; // XTN V0.8B, V0.8H + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + SimdFp.Xtn_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Description("XTN{2} ., .")] + public void Xtn_V_8H16B_4S8H_2D4S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <8H16B, 4S8H, 2D4S> + { + uint Opcode = 0x4E212800; // XTN2 V0.16B, V0.8H + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + SimdFp.Xtn_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } #endif } } diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs b/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs index 51db857c3..c67348d1d 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs @@ -1659,6 +1659,130 @@ namespace Ryujinx.Tests.Cpu }); } + [Test, Pairwise, Description("SADDW{2} ., ., .")] + public void Saddw_V_8B8H8H_4H4S4S_2S2D2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B8H8H, 4H4S4S, 2S2D2D> + { + uint Opcode = 0x0E201000; // SADDW V0.8H, V0.8H, V0.8B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0(B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); + SimdFp.Saddw_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Pairwise, Description("SADDW{2} ., ., .")] + public void Saddw_V_16B8H8H_8H4S4S_4S2D2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B8H8H, 8H4S4S, 4S2D2D> + { + uint Opcode = 0x4E201000; // SADDW2 V0.8H, V0.8H, V0.16B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE1(B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 1, new Bits(B)); + SimdFp.Saddw_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Pairwise, Description("SSUBW{2} ., ., .")] + public void Ssubw_V_8B8H8H_4H4S4S_2S2D2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B8H8H, 4H4S4S, 2S2D2D> + { + uint Opcode = 0x0E203000; // SSUBW V0.8H, V0.8H, V0.8B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0(B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); + SimdFp.Ssubw_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Pairwise, Description("SSUBW{2} ., ., .")] + public void Ssubw_V_16B8H8H_8H4S4S_4S2D2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B8H8H, 8H4S4S, 4S2D2D> + { + uint Opcode = 0x4E203000; // SSUBW2 V0.8H, V0.8H, V0.16B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE1(B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 1, new Bits(B)); + SimdFp.Ssubw_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + [Test, Pairwise, Description("SUB , , ")] public void Sub_S_D([Values(0u)] uint Rd, [Values(1u, 0u)] uint Rn, @@ -2184,6 +2308,130 @@ namespace Ryujinx.Tests.Cpu }); } + [Test, Pairwise, Description("UADDW{2} ., ., .")] + public void Uaddw_V_8B8H8H_4H4S4S_2S2D2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B8H8H, 4H4S4S, 2S2D2D> + { + uint Opcode = 0x2E201000; // UADDW V0.8H, V0.8H, V0.8B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0(B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); + SimdFp.Uaddw_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Pairwise, Description("UADDW{2} ., ., .")] + public void Uaddw_V_16B8H8H_8H4S4S_4S2D2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B8H8H, 8H4S4S, 4S2D2D> + { + uint Opcode = 0x6E201000; // UADDW2 V0.8H, V0.8H, V0.16B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE1(B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 1, new Bits(B)); + SimdFp.Uaddw_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Pairwise, Description("USUBW{2} ., ., .")] + public void Usubw_V_8B8H8H_4H4S4S_2S2D2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B8H8H, 4H4S4S, 2S2D2D> + { + uint Opcode = 0x2E203000; // USUBW V0.8H, V0.8H, V0.8B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0(B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); + SimdFp.Usubw_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Pairwise, Description("USUBW{2} ., ., .")] + public void Usubw_V_16B8H8H_8H4S4S_4S2D2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B8H8H, 8H4S4S, 4S2D2D> + { + uint Opcode = 0x6E203000; // USUBW2 V0.8H, V0.8H, V0.16B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE1(B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 1, new Bits(B)); + SimdFp.Usubw_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + [Test, Pairwise, Description("UZP1 ., ., .")] public void Uzp1_V_8B_4H_2S([Values(0u)] uint Rd, [Values(1u, 0u)] uint Rn, diff --git a/Ryujinx.Tests/Cpu/Tester/Instructions.cs b/Ryujinx.Tests/Cpu/Tester/Instructions.cs index 68f83423b..2f52dcbf5 100644 --- a/Ryujinx.Tests/Cpu/Tester/Instructions.cs +++ b/Ryujinx.Tests/Cpu/Tester/Instructions.cs @@ -3315,6 +3315,37 @@ namespace Ryujinx.Tests.Cpu.Tester Vpart(d, part, result); } + + // xtn_advsimd.html + public static void Xtn_V(bool Q, Bits size, Bits Rn, Bits Rd) + { + /* Decode Vector */ + int d = (int)UInt(Rd); + int n = (int)UInt(Rn); + + /* if size == '11' then ReservedValue(); */ + + int esize = 8 << (int)UInt(size); + int datasize = 64; + int part = (int)UInt(Q); + int elements = datasize / esize; + + /* Operation */ + /* CheckFPAdvSIMDEnabled64(); */ + + Bits result = new Bits(datasize); + Bits operand = V(2 * datasize, n); + Bits element; + + for (int e = 0; e <= elements - 1; e++) + { + element = Elem(operand, e, 2 * esize); + + Elem(result, e, esize, element[esize - 1, 0]); + } + + Vpart(d, part, result); + } #endregion #region "SimdReg" @@ -4395,8 +4426,8 @@ namespace Ryujinx.Tests.Cpu.Tester int part = (int)UInt(Q); int elements = datasize / esize; - bool unsigned = (U == true); bool accumulate = (op == false); + bool unsigned = (U == true); /* Operation */ /* CheckFPAdvSIMDEnabled64(); */ @@ -4484,8 +4515,8 @@ namespace Ryujinx.Tests.Cpu.Tester int part = (int)UInt(Q); int elements = datasize / esize; - bool unsigned = (U == true); bool accumulate = (op == false); + bool unsigned = (U == true); /* Operation */ /* CheckFPAdvSIMDEnabled64(); */ @@ -4511,6 +4542,108 @@ namespace Ryujinx.Tests.Cpu.Tester V(d, result); } + // saddw_advsimd.html + public static void Saddw_V(bool Q, Bits size, Bits Rm, Bits Rn, Bits Rd) + { + const bool U = false; + const bool o1 = false; + + /* Decode */ + int d = (int)UInt(Rd); + int n = (int)UInt(Rn); + int m = (int)UInt(Rm); + + /* if size == '11' then ReservedValue(); */ + + int esize = 8 << (int)UInt(size); + int datasize = 64; + int part = (int)UInt(Q); + int elements = datasize / esize; + + bool sub_op = (o1 == true); + bool unsigned = (U == true); + + /* Operation */ + /* CheckFPAdvSIMDEnabled64(); */ + + Bits result = new Bits(2 * datasize); + Bits operand1 = V(2 * datasize, n); + Bits operand2 = Vpart(datasize, m, part); + BigInteger element1; + BigInteger element2; + BigInteger sum; + + for (int e = 0; e <= elements - 1; e++) + { + element1 = Int(Elem(operand1, e, 2 * esize), unsigned); + element2 = Int(Elem(operand2, e, esize), unsigned); + + if (sub_op) + { + sum = element1 - element2; + } + else + { + sum = element1 + element2; + } + + Elem(result, e, 2 * esize, sum.SubBigInteger(2 * esize - 1, 0)); + } + + V(d, result); + } + + // ssubw_advsimd.html + public static void Ssubw_V(bool Q, Bits size, Bits Rm, Bits Rn, Bits Rd) + { + const bool U = false; + const bool o1 = true; + + /* Decode */ + int d = (int)UInt(Rd); + int n = (int)UInt(Rn); + int m = (int)UInt(Rm); + + /* if size == '11' then ReservedValue(); */ + + int esize = 8 << (int)UInt(size); + int datasize = 64; + int part = (int)UInt(Q); + int elements = datasize / esize; + + bool sub_op = (o1 == true); + bool unsigned = (U == true); + + /* Operation */ + /* CheckFPAdvSIMDEnabled64(); */ + + Bits result = new Bits(2 * datasize); + Bits operand1 = V(2 * datasize, n); + Bits operand2 = Vpart(datasize, m, part); + BigInteger element1; + BigInteger element2; + BigInteger sum; + + for (int e = 0; e <= elements - 1; e++) + { + element1 = Int(Elem(operand1, e, 2 * esize), unsigned); + element2 = Int(Elem(operand2, e, esize), unsigned); + + if (sub_op) + { + sum = element1 - element2; + } + else + { + sum = element1 + element2; + } + + Elem(result, e, 2 * esize, sum.SubBigInteger(2 * esize - 1, 0)); + } + + V(d, result); + } + // sub_advsimd.html#SUB_asisdsame_only public static void Sub_S(Bits size, Bits Rm, Bits Rn, Bits Rd) { @@ -4785,8 +4918,8 @@ namespace Ryujinx.Tests.Cpu.Tester int part = (int)UInt(Q); int elements = datasize / esize; - bool unsigned = (U == true); bool accumulate = (op == false); + bool unsigned = (U == true); /* Operation */ /* CheckFPAdvSIMDEnabled64(); */ @@ -4874,8 +5007,8 @@ namespace Ryujinx.Tests.Cpu.Tester int part = (int)UInt(Q); int elements = datasize / esize; - bool unsigned = (U == true); bool accumulate = (op == false); + bool unsigned = (U == true); /* Operation */ /* CheckFPAdvSIMDEnabled64(); */ @@ -4901,6 +5034,108 @@ namespace Ryujinx.Tests.Cpu.Tester V(d, result); } + // uaddw_advsimd.html + public static void Uaddw_V(bool Q, Bits size, Bits Rm, Bits Rn, Bits Rd) + { + const bool U = true; + const bool o1 = false; + + /* Decode */ + int d = (int)UInt(Rd); + int n = (int)UInt(Rn); + int m = (int)UInt(Rm); + + /* if size == '11' then ReservedValue(); */ + + int esize = 8 << (int)UInt(size); + int datasize = 64; + int part = (int)UInt(Q); + int elements = datasize / esize; + + bool sub_op = (o1 == true); + bool unsigned = (U == true); + + /* Operation */ + /* CheckFPAdvSIMDEnabled64(); */ + + Bits result = new Bits(2 * datasize); + Bits operand1 = V(2 * datasize, n); + Bits operand2 = Vpart(datasize, m, part); + BigInteger element1; + BigInteger element2; + BigInteger sum; + + for (int e = 0; e <= elements - 1; e++) + { + element1 = Int(Elem(operand1, e, 2 * esize), unsigned); + element2 = Int(Elem(operand2, e, esize), unsigned); + + if (sub_op) + { + sum = element1 - element2; + } + else + { + sum = element1 + element2; + } + + Elem(result, e, 2 * esize, sum.SubBigInteger(2 * esize - 1, 0)); + } + + V(d, result); + } + + // usubw_advsimd.html + public static void Usubw_V(bool Q, Bits size, Bits Rm, Bits Rn, Bits Rd) + { + const bool U = true; + const bool o1 = true; + + /* Decode */ + int d = (int)UInt(Rd); + int n = (int)UInt(Rn); + int m = (int)UInt(Rm); + + /* if size == '11' then ReservedValue(); */ + + int esize = 8 << (int)UInt(size); + int datasize = 64; + int part = (int)UInt(Q); + int elements = datasize / esize; + + bool sub_op = (o1 == true); + bool unsigned = (U == true); + + /* Operation */ + /* CheckFPAdvSIMDEnabled64(); */ + + Bits result = new Bits(2 * datasize); + Bits operand1 = V(2 * datasize, n); + Bits operand2 = Vpart(datasize, m, part); + BigInteger element1; + BigInteger element2; + BigInteger sum; + + for (int e = 0; e <= elements - 1; e++) + { + element1 = Int(Elem(operand1, e, 2 * esize), unsigned); + element2 = Int(Elem(operand2, e, esize), unsigned); + + if (sub_op) + { + sum = element1 - element2; + } + else + { + sum = element1 + element2; + } + + Elem(result, e, 2 * esize, sum.SubBigInteger(2 * esize - 1, 0)); + } + + V(d, result); + } + // uzp1_advsimd.html public static void Uzp1_V(bool Q, Bits size, Bits Rm, Bits Rn, Bits Rd) {