From 88d3ffb97c9d6e29a7eb44135ccea88753ca5f61 Mon Sep 17 00:00:00 2001 From: merry Date: Sun, 6 Feb 2022 15:25:45 +0000 Subject: [PATCH] ARMeilleure: A32: Implement SHADD8 (#3086) --- ARMeilleure/Decoders/OpCodeTable.cs | 1 + ARMeilleure/Instructions/InstEmitAlu32.cs | 52 ++++++++++++++++------- ARMeilleure/Instructions/InstName.cs | 1 + Ryujinx.Tests/Cpu/CpuTestAlu32.cs | 21 ++++++++- 4 files changed, 59 insertions(+), 16 deletions(-) diff --git a/ARMeilleure/Decoders/OpCodeTable.cs b/ARMeilleure/Decoders/OpCodeTable.cs index 78537787d..d54bb5a51 100644 --- a/ARMeilleure/Decoders/OpCodeTable.cs +++ b/ARMeilleure/Decoders/OpCodeTable.cs @@ -733,6 +733,7 @@ namespace ARMeilleure.Decoders SetA32("<<<<0000110xxxxxxxxxxxxx0xx1xxxx", InstName.Sbc, InstEmit32.Sbc, OpCode32AluRsReg.Create); SetA32("<<<<0111101xxxxxxxxxxxxxx101xxxx", InstName.Sbfx, InstEmit32.Sbfx, OpCode32AluBf.Create); SetA32("<<<<01110001xxxx1111xxxx0001xxxx", InstName.Sdiv, InstEmit32.Sdiv, OpCode32AluMla.Create); + SetA32("<<<<01100011xxxxxxxx11111001xxxx", InstName.Shadd8, InstEmit32.Shadd8, OpCode32AluReg.Create); SetA32("<<<<00010000xxxxxxxxxxxx1xx0xxxx", InstName.Smla__, InstEmit32.Smla__, OpCode32AluMla.Create); SetA32("<<<<0000111xxxxxxxxxxxxx1001xxxx", InstName.Smlal, InstEmit32.Smlal, OpCode32AluUmull.Create); SetA32("<<<<00010100xxxxxxxxxxxx1xx0xxxx", InstName.Smlal__, InstEmit32.Smlal__, OpCode32AluUmull.Create); diff --git a/ARMeilleure/Instructions/InstEmitAlu32.cs b/ARMeilleure/Instructions/InstEmitAlu32.cs index 5f55fcd19..9aff0261c 100644 --- a/ARMeilleure/Instructions/InstEmitAlu32.cs +++ b/ARMeilleure/Instructions/InstEmitAlu32.cs @@ -387,6 +387,11 @@ namespace ARMeilleure.Instructions EmitDiv(context, false); } + public static void Shadd8(ArmEmitterContext context) + { + EmitHadd8(context, false); + } + public static void Ssat(ArmEmitterContext context) { OpCode32Sat op = (OpCode32Sat)context.CurrOp; @@ -474,20 +479,7 @@ namespace ARMeilleure.Instructions public static void Uhadd8(ArmEmitterContext context) { - OpCode32AluReg op = (OpCode32AluReg)context.CurrOp; - - Operand m = GetIntA32(context, op.Rm); - Operand n = GetIntA32(context, op.Rn); - - Operand xor, res; - - res = context.BitwiseAnd(m, n); - xor = context.BitwiseExclusiveOr(m, n); - xor = context.ShiftRightUI(xor, Const(1)); - xor = context.BitwiseAnd(xor, Const(0x7F7F7F7Fu)); - res = context.Add(res, xor); - - SetIntA32(context, op.Rd, res); + EmitHadd8(context, true); } public static void Usat(ArmEmitterContext context) @@ -659,6 +651,36 @@ namespace ARMeilleure.Instructions context.MarkLabel(lblEnd); } + private static void EmitHadd8(ArmEmitterContext context, bool unsigned) + { + OpCode32AluReg op = (OpCode32AluReg)context.CurrOp; + + Operand m = GetIntA32(context, op.Rm); + Operand n = GetIntA32(context, op.Rn); + + Operand xor, res, carry; + + // This relies on the equality x+y == ((x&y) << 1) + (x^y). + // Note that x^y always contains the LSB of the result. + // Since we want to calculate (x+y)/2, we can instead calculate (x&y) + ((x^y)>>1). + // We mask by 0x7F to remove the LSB so that it doesn't leak into the field below. + + res = context.BitwiseAnd(m, n); + carry = context.BitwiseExclusiveOr(m, n); + xor = context.ShiftRightUI(carry, Const(1)); + xor = context.BitwiseAnd(xor, Const(0x7F7F7F7Fu)); + res = context.Add(res, xor); + + if (!unsigned) + { + // Propagates the sign bit from (x^y)>>1 upwards by one. + carry = context.BitwiseAnd(carry, Const(0x80808080u)); + res = context.BitwiseExclusiveOr(res, carry); + } + + SetIntA32(context, op.Rd, res); + } + private static void EmitSat(ArmEmitterContext context, int intMin, int intMax) { OpCode32Sat op = (OpCode32Sat)context.CurrOp; @@ -772,4 +794,4 @@ namespace ARMeilleure.Instructions EmitGenericAluStoreA32(context, op.Rd, op.SetFlags, value); } } -} \ No newline at end of file +} diff --git a/ARMeilleure/Instructions/InstName.cs b/ARMeilleure/Instructions/InstName.cs index a520c86a3..698979b9a 100644 --- a/ARMeilleure/Instructions/InstName.cs +++ b/ARMeilleure/Instructions/InstName.cs @@ -516,6 +516,7 @@ namespace ARMeilleure.Instructions Rsb, Rsc, Sbfx, + Shadd8, Smla__, Smlal, Smlal__, diff --git a/Ryujinx.Tests/Cpu/CpuTestAlu32.cs b/Ryujinx.Tests/Cpu/CpuTestAlu32.cs index 1867e27fe..7a30f138e 100644 --- a/Ryujinx.Tests/Cpu/CpuTestAlu32.cs +++ b/Ryujinx.Tests/Cpu/CpuTestAlu32.cs @@ -77,6 +77,25 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(); } + [Test, Pairwise] + public void Shadd8([Values(0u, 0xdu)] uint rd, + [Values(1u)] uint rm, + [Values(2u)] uint rn, + [Random(RndCnt)] uint w0, + [Random(RndCnt)] uint w1, + [Random(RndCnt)] uint w2) + { + uint opcode = 0xE6300F90u; // SHADD8 R0, R0, R0 + + opcode |= ((rm & 15) << 0) | ((rd & 15) << 12) | ((rn & 15) << 16); + + uint sp = TestContext.CurrentContext.Random.NextUInt(); + + SingleOpcode(opcode, r0: w0, r1: w1, r2: w2, sp: sp); + + CompareAgainstUnicorn(); + } + [Test, Pairwise] public void Ssat_Usat([ValueSource("_Ssat_Usat_")] uint opcode, [Values(0u, 0xdu)] uint rd, @@ -120,7 +139,7 @@ namespace Ryujinx.Tests.Cpu [Random(RndCnt)] uint w1, [Random(RndCnt)] uint w2) { - uint opcode = 0xE6700F90u; //UHADD8 R0, R0, R0 + uint opcode = 0xE6700F90u; // UHADD8 R0, R0, R0 opcode |= ((rm & 15) << 0) | ((rd & 15) << 12) | ((rn & 15) << 16);