Add Tbl_V Sse opt. with Tests. (#651)

* Add v4, v5, v30, v31 required for Tbl_V Tests.

* Add Tests for Tbl_V.

* Add Tbl_V Sse opt..

* Nit.

* Small opt. on comparison constant vector.

* Nit.

* Add EmitLd/Stvectmp2/3.

* Nit.
This commit is contained in:
LDj3SNuD 2019-03-23 19:50:19 +01:00 committed by gdkchan
parent 8c08547a9f
commit d9561f41eb
2 changed files with 89 additions and 22 deletions

View File

@ -355,35 +355,94 @@ namespace ChocolArm64.Instructions
{
OpCodeSimdTbl64 op = (OpCodeSimdTbl64)context.CurrOp;
context.EmitLdvec(op.Rm);
for (int index = 0; index < op.Size; index++)
if (Optimizations.UseSsse3)
{
context.EmitLdvec((op.Rn + index) & 0x1f);
}
Type[] typesCmpSflSub = new Type[] { typeof(Vector128<sbyte>), typeof(Vector128<sbyte>) };
Type[] typesOr = new Type[] { typeof(Vector128<long> ), typeof(Vector128<long> ) };
Type[] typesSav = new Type[] { typeof(long) };
switch (op.Size)
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitLdc_I8(0x0F0F0F0F0F0F0F0FL);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
context.EmitStvectmp2();
context.EmitLdvectmp2();
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareGreaterThan), typesCmpSflSub));
context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesOr));
context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), typesCmpSflSub));
for (int index = 1; index < op.Size; index++)
{
context.EmitLdvec((op.Rn + index) & 0x1F);
context.EmitLdvec(op.Rm);
context.EmitLdc_I8(0x1010101010101010L * index);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesCmpSflSub));
context.EmitStvectmp();
context.EmitLdvectmp();
context.EmitLdvectmp2();
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareGreaterThan), typesCmpSflSub));
context.EmitLdvectmp();
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesOr));
context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), typesCmpSflSub));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesOr));
}
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
else
{
case 1: VectorHelper.EmitCall(context,
nameof(VectorHelper.Tbl1_V64),
nameof(VectorHelper.Tbl1_V128)); break;
context.EmitLdvec(op.Rm);
case 2: VectorHelper.EmitCall(context,
nameof(VectorHelper.Tbl2_V64),
nameof(VectorHelper.Tbl2_V128)); break;
for (int index = 0; index < op.Size; index++)
{
context.EmitLdvec((op.Rn + index) & 0x1F);
}
case 3: VectorHelper.EmitCall(context,
nameof(VectorHelper.Tbl3_V64),
nameof(VectorHelper.Tbl3_V128)); break;
switch (op.Size)
{
case 1: VectorHelper.EmitCall(context,
nameof(VectorHelper.Tbl1_V64),
nameof(VectorHelper.Tbl1_V128)); break;
case 4: VectorHelper.EmitCall(context,
nameof(VectorHelper.Tbl4_V64),
nameof(VectorHelper.Tbl4_V128)); break;
case 2: VectorHelper.EmitCall(context,
nameof(VectorHelper.Tbl2_V64),
nameof(VectorHelper.Tbl2_V128)); break;
default: throw new InvalidOperationException();
case 3: VectorHelper.EmitCall(context,
nameof(VectorHelper.Tbl3_V64),
nameof(VectorHelper.Tbl3_V128)); break;
case 4: VectorHelper.EmitCall(context,
nameof(VectorHelper.Tbl4_V64),
nameof(VectorHelper.Tbl4_V128)); break;
default: throw new InvalidOperationException();
}
context.EmitStvec(op.Rd);
}
context.EmitStvec(op.Rd);
}
public static void Trn1_V(ILEmitterCtx context)

View File

@ -61,7 +61,9 @@ namespace ChocolArm64.Translation
//Vectors are part of another "set" of locals.
private const int VecGpTmp1Index = ReservedLocalsCount + 0;
private const int UserVecTempStart = ReservedLocalsCount + 1;
private const int VecGpTmp2Index = ReservedLocalsCount + 1;
private const int VecGpTmp3Index = ReservedLocalsCount + 2;
private const int UserVecTempStart = ReservedLocalsCount + 3;
private static int _userIntTempCount;
private static int _userVecTempCount;
@ -629,6 +631,12 @@ namespace ChocolArm64.Translation
public void EmitLdvectmp() => EmitLdvec(VecGpTmp1Index);
public void EmitStvectmp() => EmitStvec(VecGpTmp1Index);
public void EmitLdvectmp2() => EmitLdvec(VecGpTmp2Index);
public void EmitStvectmp2() => EmitStvec(VecGpTmp2Index);
public void EmitLdvectmp3() => EmitLdvec(VecGpTmp3Index);
public void EmitStvectmp3() => EmitStvec(VecGpTmp3Index);
public void EmitLdint(int index) => Ldloc(index, VarType.Int);
public void EmitStint(int index) => Stloc(index, VarType.Int);