From e21ebbf666f10d39d44a0856e5a44143d3d69d0d Mon Sep 17 00:00:00 2001 From: gdkchan Date: Wed, 27 Feb 2019 23:03:31 -0300 Subject: [PATCH] Misc. CPU optimizations (#575) * Add optimizations related to caller/callee saved registers, thread synchronization and disable tier 0 * Refactoring * Add a config entry to enable or disable the reg load/store opt. * Remove unnecessary register state stores for calls when the callee is know * Rename IoType to VarType * Enable tier 0 while fixing some perf issues related to tier 0 * Small tweak -- Compile before adding to the cache, to avoid lags * Add required config entry --- ChocolArm64/Instructions/InstEmitFlow.cs | 3 +- ChocolArm64/Instructions/InstEmitFlow32.cs | 1 - .../Instructions/InstEmitFlowHelper.cs | 26 +-- ChocolArm64/Optimizations.cs | 32 +-- ChocolArm64/Translation/CallType.cs | 9 + ChocolArm64/Translation/ILBlock.cs | 32 +-- ChocolArm64/Translation/ILEmitterCtx.cs | 89 +++++--- ChocolArm64/Translation/ILLabel.cs | 6 +- ChocolArm64/Translation/ILMethodBuilder.cs | 45 ++-- ChocolArm64/Translation/ILOpCode.cs | 6 +- ChocolArm64/Translation/ILOpCodeBranch.cs | 10 +- ChocolArm64/Translation/ILOpCodeCall.cs | 4 +- ChocolArm64/Translation/ILOpCodeConst.cs | 2 + ChocolArm64/Translation/ILOpCodeLoad.cs | 20 +- ChocolArm64/Translation/ILOpCodeLoadField.cs | 2 +- ChocolArm64/Translation/ILOpCodeLoadState.cs | 17 +- ChocolArm64/Translation/ILOpCodeLog.cs | 6 +- ChocolArm64/Translation/ILOpCodeStore.cs | 20 +- ChocolArm64/Translation/ILOpCodeStoreState.cs | 26 ++- .../{LocalAlloc.cs => RegisterUsage.cs} | 199 ++++++++++-------- ChocolArm64/Translation/TranslatedSub.cs | 55 ++++- ChocolArm64/Translation/Translator.cs | 56 +++-- ChocolArm64/Translation/TranslatorQueue.cs | 14 +- .../Translation/TranslatorQueueItem.cs | 15 +- .../Translation/{IoType.cs => VarType.cs} | 2 +- Ryujinx/Config.jsonc | 17 +- Ryujinx/Configuration.cs | 10 + Ryujinx/_schema.json | 12 ++ 28 files changed, 456 insertions(+), 280 deletions(-) create mode 100644 ChocolArm64/Translation/CallType.cs rename ChocolArm64/Translation/{LocalAlloc.cs => RegisterUsage.cs} (56%) rename ChocolArm64/Translation/{IoType.cs => VarType.cs} (85%) diff --git a/ChocolArm64/Instructions/InstEmitFlow.cs b/ChocolArm64/Instructions/InstEmitFlow.cs index a842dca9d..5eae89cc0 100644 --- a/ChocolArm64/Instructions/InstEmitFlow.cs +++ b/ChocolArm64/Instructions/InstEmitFlow.cs @@ -39,7 +39,6 @@ namespace ChocolArm64.Instructions context.EmitLdc_I(op.Position + 4); context.EmitStint(RegisterAlias.Lr); - context.EmitStoreState(); EmitCall(context, op.Imm); } @@ -60,6 +59,8 @@ namespace ChocolArm64.Instructions { OpCodeBReg64 op = (OpCodeBReg64)context.CurrOp; + context.HasIndirectJump = true; + context.EmitStoreState(); context.EmitLdintzr(op.Rn); diff --git a/ChocolArm64/Instructions/InstEmitFlow32.cs b/ChocolArm64/Instructions/InstEmitFlow32.cs index 61f1d34c5..dea490c77 100644 --- a/ChocolArm64/Instructions/InstEmitFlow32.cs +++ b/ChocolArm64/Instructions/InstEmitFlow32.cs @@ -65,7 +65,6 @@ namespace ChocolArm64.Instructions } context.EmitStint(GetBankedRegisterAlias(context.Mode, RegisterAlias.Aarch32Lr)); - context.EmitStoreState(); //If x is true, then this is a branch with link and exchange. //In this case we need to swap the mode between Arm <-> Thumb. diff --git a/ChocolArm64/Instructions/InstEmitFlowHelper.cs b/ChocolArm64/Instructions/InstEmitFlowHelper.cs index e93ef4267..a6091a571 100644 --- a/ChocolArm64/Instructions/InstEmitFlowHelper.cs +++ b/ChocolArm64/Instructions/InstEmitFlowHelper.cs @@ -11,6 +11,8 @@ namespace ChocolArm64.Instructions { if (context.Tier == TranslationTier.Tier0) { + context.EmitStoreState(); + context.TranslateAhead(imm); context.EmitLdc_I8(imm); @@ -22,6 +24,10 @@ namespace ChocolArm64.Instructions if (!context.TryOptEmitSubroutineCall()) { + context.HasSlowCall = true; + + context.EmitStoreState(); + context.TranslateAhead(imm); context.EmitLdarg(TranslatedSub.StateArgIdx); @@ -32,6 +38,7 @@ namespace ChocolArm64.Instructions context.EmitLdarg(TranslatedSub.StateArgIdx); context.EmitLdc_I8(imm); + context.EmitLdc_I4((int)CallType.Call); context.EmitPrivateCall(typeof(Translator), nameof(Translator.GetOrTranslateSubroutine)); @@ -58,20 +65,6 @@ namespace ChocolArm64.Instructions { if (context.Tier == TranslationTier.Tier0) { - context.Emit(OpCodes.Dup); - - context.EmitSttmp(); - context.EmitLdarg(TranslatedSub.StateArgIdx); - - context.EmitFieldLoad(typeof(CpuThreadState).GetField(nameof(CpuThreadState.CurrentTranslator), - BindingFlags.Instance | - BindingFlags.NonPublic)); - - context.EmitLdarg(TranslatedSub.StateArgIdx); - context.EmitLdtmp(); - - context.EmitPrivateCall(typeof(Translator), nameof(Translator.TranslateVirtualSubroutine)); - context.Emit(OpCodes.Ret); } else @@ -85,8 +78,11 @@ namespace ChocolArm64.Instructions context.EmitLdarg(TranslatedSub.StateArgIdx); context.EmitLdtmp(); + context.EmitLdc_I4(isJump + ? (int)CallType.VirtualJump + : (int)CallType.VirtualCall); - context.EmitPrivateCall(typeof(Translator), nameof(Translator.GetOrTranslateVirtualSubroutine)); + context.EmitPrivateCall(typeof(Translator), nameof(Translator.GetOrTranslateSubroutine)); context.EmitLdarg(TranslatedSub.StateArgIdx); context.EmitLdarg(TranslatedSub.MemoryArgIdx); diff --git a/ChocolArm64/Optimizations.cs b/ChocolArm64/Optimizations.cs index 8fa6f4626..cbb8131f5 100644 --- a/ChocolArm64/Optimizations.cs +++ b/ChocolArm64/Optimizations.cs @@ -2,21 +2,23 @@ using System.Runtime.Intrinsics.X86; public static class Optimizations { - internal static bool FastFP = true; + public static bool AssumeStrictAbiCompliance { get; set; } - private static bool _useAllSseIfAvailable = true; + public static bool FastFP { get; set; } = true; - private static bool _useSseIfAvailable = true; - private static bool _useSse2IfAvailable = true; - private static bool _useSse3IfAvailable = true; - private static bool _useSsse3IfAvailable = true; - private static bool _useSse41IfAvailable = true; - private static bool _useSse42IfAvailable = true; + private const bool UseAllSseIfAvailable = true; - internal static bool UseSse = (_useAllSseIfAvailable && _useSseIfAvailable) && Sse.IsSupported; - internal static bool UseSse2 = (_useAllSseIfAvailable && _useSse2IfAvailable) && Sse2.IsSupported; - internal static bool UseSse3 = (_useAllSseIfAvailable && _useSse3IfAvailable) && Sse3.IsSupported; - internal static bool UseSsse3 = (_useAllSseIfAvailable && _useSsse3IfAvailable) && Ssse3.IsSupported; - internal static bool UseSse41 = (_useAllSseIfAvailable && _useSse41IfAvailable) && Sse41.IsSupported; - internal static bool UseSse42 = (_useAllSseIfAvailable && _useSse42IfAvailable) && Sse42.IsSupported; -} + public static bool UseSseIfAvailable { get; set; } = UseAllSseIfAvailable; + public static bool UseSse2IfAvailable { get; set; } = UseAllSseIfAvailable; + public static bool UseSse3IfAvailable { get; set; } = UseAllSseIfAvailable; + public static bool UseSsse3IfAvailable { get; set; } = UseAllSseIfAvailable; + public static bool UseSse41IfAvailable { get; set; } = UseAllSseIfAvailable; + public static bool UseSse42IfAvailable { get; set; } = UseAllSseIfAvailable; + + internal static bool UseSse => UseSseIfAvailable && Sse.IsSupported; + internal static bool UseSse2 => UseSse2IfAvailable && Sse2.IsSupported; + internal static bool UseSse3 => UseSse3IfAvailable && Sse3.IsSupported; + internal static bool UseSsse3 => UseSsse3IfAvailable && Ssse3.IsSupported; + internal static bool UseSse41 => UseSse41IfAvailable && Sse41.IsSupported; + internal static bool UseSse42 => UseSse42IfAvailable && Sse42.IsSupported; +} \ No newline at end of file diff --git a/ChocolArm64/Translation/CallType.cs b/ChocolArm64/Translation/CallType.cs new file mode 100644 index 000000000..937ede768 --- /dev/null +++ b/ChocolArm64/Translation/CallType.cs @@ -0,0 +1,9 @@ +namespace ChocolArm64.Translation +{ + enum CallType + { + Call, + VirtualCall, + VirtualJump + } +} \ No newline at end of file diff --git a/ChocolArm64/Translation/ILBlock.cs b/ChocolArm64/Translation/ILBlock.cs index 136579012..12773705a 100644 --- a/ChocolArm64/Translation/ILBlock.cs +++ b/ChocolArm64/Translation/ILBlock.cs @@ -4,13 +4,13 @@ namespace ChocolArm64.Translation { class ILBlock : IILEmit { - public long IntInputs { get; private set; } - public long IntOutputs { get; private set; } - public long IntAwOutputs { get; private set; } + public long IntInputs { get; private set; } + public long IntOutputs { get; private set; } + private long _intAwOutputs; - public long VecInputs { get; private set; } - public long VecOutputs { get; private set; } - public long VecAwOutputs { get; private set; } + public long VecInputs { get; private set; } + public long VecOutputs { get; private set; } + private long _vecAwOutputs; public bool HasStateStore { get; private set; } @@ -34,25 +34,25 @@ namespace ChocolArm64.Translation //opcodes emitted by each ARM instruction. //We can only consider the new outputs for doing input elimination //after all the CIL opcodes used by the instruction being emitted. - IntAwOutputs = IntOutputs; - VecAwOutputs = VecOutputs; + _intAwOutputs = IntOutputs; + _vecAwOutputs = VecOutputs; } else if (emitter is ILOpCodeLoad ld && ILMethodBuilder.IsRegIndex(ld.Index)) { - switch (ld.IoType) + switch (ld.VarType) { - case IoType.Flag: IntInputs |= ((1L << ld.Index) << 32) & ~IntAwOutputs; break; - case IoType.Int: IntInputs |= (1L << ld.Index) & ~IntAwOutputs; break; - case IoType.Vector: VecInputs |= (1L << ld.Index) & ~VecAwOutputs; break; + case VarType.Flag: IntInputs |= ((1L << ld.Index) << 32) & ~_intAwOutputs; break; + case VarType.Int: IntInputs |= (1L << ld.Index) & ~_intAwOutputs; break; + case VarType.Vector: VecInputs |= (1L << ld.Index) & ~_vecAwOutputs; break; } } else if (emitter is ILOpCodeStore st && ILMethodBuilder.IsRegIndex(st.Index)) { - switch (st.IoType) + switch (st.VarType) { - case IoType.Flag: IntOutputs |= (1L << st.Index) << 32; break; - case IoType.Int: IntOutputs |= 1L << st.Index; break; - case IoType.Vector: VecOutputs |= 1L << st.Index; break; + case VarType.Flag: IntOutputs |= (1L << st.Index) << 32; break; + case VarType.Int: IntOutputs |= 1L << st.Index; break; + case VarType.Vector: VecOutputs |= 1L << st.Index; break; } } else if (emitter is ILOpCodeStoreState) diff --git a/ChocolArm64/Translation/ILEmitterCtx.cs b/ChocolArm64/Translation/ILEmitterCtx.cs index f7e61bc99..91b72b13a 100644 --- a/ChocolArm64/Translation/ILEmitterCtx.cs +++ b/ChocolArm64/Translation/ILEmitterCtx.cs @@ -31,6 +31,10 @@ namespace ChocolArm64.Translation public Aarch32Mode Mode { get; } = Aarch32Mode.User; //TODO + public bool HasIndirectJump { get; set; } + + public bool HasSlowCall { get; set; } + private Dictionary _visitedBlocks; private Queue _branchTargets; @@ -91,7 +95,12 @@ namespace ChocolArm64.Translation ResetBlockState(); - AdvanceOpCode(); + if (AdvanceOpCode()) + { + EmitSynchronization(); + + _ilBlock.Add(new ILOpCodeLoadState(_ilBlock, isSubEntry: true)); + } } public static int GetIntTempIndex() @@ -127,10 +136,18 @@ namespace ChocolArm64.Translation return; } - if (_opcIndex == 0) + int opcIndex = _opcIndex; + + if (opcIndex == 0) { MarkLabel(GetLabel(_currBlock.Position)); + } + bool isLastOp = opcIndex == CurrBlock.OpCodes.Count - 1; + + if (isLastOp && CurrBlock.Branch != null && + (ulong)CurrBlock.Branch.Position <= (ulong)CurrBlock.Position) + { EmitSynchronization(); } @@ -161,7 +178,7 @@ namespace ChocolArm64.Translation //of the next instruction to be executed (in the case that the condition //is false, and the branch was not taken, as all basic blocks should end with //some kind of branch). - if (CurrOp == CurrBlock.GetLastOp() && CurrBlock.Next == null) + if (isLastOp && CurrBlock.Next == null) { EmitStoreState(); EmitLdc_I8(CurrOp.Position + CurrOp.OpCodeSizeInBytes); @@ -285,32 +302,43 @@ namespace ChocolArm64.Translation return; } - _queue.Enqueue(new TranslatorQueueItem(position, mode, TranslationTier.Tier1)); + _queue.Enqueue(position, mode, TranslationTier.Tier1, isComplete: true); } public bool TryOptEmitSubroutineCall() { + //Calls should always have a next block, unless + //we're translating a single basic block. if (_currBlock.Next == null) { return false; } - if (CurrOp.Emitter != InstEmit.Bl) + if (!(CurrOp is IOpCodeBImm op)) { return false; } - if (!_cache.TryGetSubroutine(((OpCodeBImmAl64)CurrOp).Imm, out TranslatedSub subroutine)) + if (!_cache.TryGetSubroutine(op.Imm, out TranslatedSub sub)) { return false; } + //It's not worth to call a Tier0 method, because + //it contains slow code, rather than the entire function. + if (sub.Tier == TranslationTier.Tier0) + { + return false; + } + + EmitStoreState(sub); + for (int index = 0; index < TranslatedSub.FixedArgTypes.Length; index++) { EmitLdarg(index); } - EmitCall(subroutine.Method); + EmitCall(sub.Method); return true; } @@ -321,8 +349,8 @@ namespace ChocolArm64.Translation InstEmitAluHelper.EmitAluLoadOpers(this); - Stloc(CmpOptTmp2Index, IoType.Int); - Stloc(CmpOptTmp1Index, IoType.Int); + Stloc(CmpOptTmp2Index, VarType.Int); + Stloc(CmpOptTmp1Index, VarType.Int); } private Dictionary _branchOps = new Dictionary() @@ -346,8 +374,8 @@ namespace ChocolArm64.Translation { if (_optOpLastCompare.Emitter == InstEmit.Subs) { - Ldloc(CmpOptTmp1Index, IoType.Int, _optOpLastCompare.RegisterSize); - Ldloc(CmpOptTmp2Index, IoType.Int, _optOpLastCompare.RegisterSize); + Ldloc(CmpOptTmp1Index, VarType.Int, _optOpLastCompare.RegisterSize); + Ldloc(CmpOptTmp2Index, VarType.Int, _optOpLastCompare.RegisterSize); Emit(_branchOps[cond], target); @@ -369,7 +397,7 @@ namespace ChocolArm64.Translation //Such invalid values can't be encoded on the immediate encodings. if (_optOpLastCompare is IOpCodeAluImm64 op) { - Ldloc(CmpOptTmp1Index, IoType.Int, _optOpLastCompare.RegisterSize); + Ldloc(CmpOptTmp1Index, VarType.Int, _optOpLastCompare.RegisterSize); if (_optOpLastCompare.RegisterSize == RegisterSize.Int32) { @@ -491,14 +519,14 @@ namespace ChocolArm64.Translation { if (amount > 0) { - Stloc(RorTmpIndex, IoType.Int); - Ldloc(RorTmpIndex, IoType.Int); + Stloc(RorTmpIndex, VarType.Int); + Ldloc(RorTmpIndex, VarType.Int); EmitLdc_I4(amount); Emit(OpCodes.Shr_Un); - Ldloc(RorTmpIndex, IoType.Int); + Ldloc(RorTmpIndex, VarType.Int); EmitLdc_I4(CurrOp.GetBitsCount() - amount); @@ -546,7 +574,7 @@ namespace ChocolArm64.Translation public void EmitLdarg(int index) { - _ilBlock.Add(new ILOpCodeLoad(index, IoType.Arg)); + _ilBlock.Add(new ILOpCodeLoad(index, VarType.Arg)); } public void EmitLdintzr(int index) @@ -588,6 +616,11 @@ namespace ChocolArm64.Translation _ilBlock.Add(new ILOpCodeStoreState(_ilBlock)); } + private void EmitStoreState(TranslatedSub callSub) + { + _ilBlock.Add(new ILOpCodeStoreState(_ilBlock, callSub)); + } + public void EmitLdtmp() => EmitLdint(IntGpTmp1Index); public void EmitSttmp() => EmitStint(IntGpTmp1Index); @@ -600,13 +633,13 @@ namespace ChocolArm64.Translation public void EmitLdvectmp2() => EmitLdvec(VecGpTmp2Index); public void EmitStvectmp2() => EmitStvec(VecGpTmp2Index); - public void EmitLdint(int index) => Ldloc(index, IoType.Int); - public void EmitStint(int index) => Stloc(index, IoType.Int); + public void EmitLdint(int index) => Ldloc(index, VarType.Int); + public void EmitStint(int index) => Stloc(index, VarType.Int); - public void EmitLdvec(int index) => Ldloc(index, IoType.Vector); - public void EmitStvec(int index) => Stloc(index, IoType.Vector); + public void EmitLdvec(int index) => Ldloc(index, VarType.Vector); + public void EmitStvec(int index) => Stloc(index, VarType.Vector); - public void EmitLdflg(int index) => Ldloc(index, IoType.Flag); + public void EmitLdflg(int index) => Ldloc(index, VarType.Flag); public void EmitStflg(int index) { //Set this only if any of the NZCV flag bits were modified. @@ -619,22 +652,22 @@ namespace ChocolArm64.Translation _optOpLastFlagSet = CurrOp; } - Stloc(index, IoType.Flag); + Stloc(index, VarType.Flag); } - private void Ldloc(int index, IoType ioType) + private void Ldloc(int index, VarType varType) { - _ilBlock.Add(new ILOpCodeLoad(index, ioType, CurrOp.RegisterSize)); + _ilBlock.Add(new ILOpCodeLoad(index, varType, CurrOp.RegisterSize)); } - private void Ldloc(int index, IoType ioType, RegisterSize registerSize) + private void Ldloc(int index, VarType varType, RegisterSize registerSize) { - _ilBlock.Add(new ILOpCodeLoad(index, ioType, registerSize)); + _ilBlock.Add(new ILOpCodeLoad(index, varType, registerSize)); } - private void Stloc(int index, IoType ioType) + private void Stloc(int index, VarType varType) { - _ilBlock.Add(new ILOpCodeStore(index, ioType, CurrOp.RegisterSize)); + _ilBlock.Add(new ILOpCodeStore(index, varType, CurrOp.RegisterSize)); } public void EmitCallPropGet(Type objType, string propName) diff --git a/ChocolArm64/Translation/ILLabel.cs b/ChocolArm64/Translation/ILLabel.cs index f423a4256..17a31783d 100644 --- a/ChocolArm64/Translation/ILLabel.cs +++ b/ChocolArm64/Translation/ILLabel.cs @@ -6,7 +6,7 @@ namespace ChocolArm64.Translation { private bool _hasLabel; - private Label _lbl; + private Label _label; public void Emit(ILMethodBuilder context) { @@ -17,12 +17,12 @@ namespace ChocolArm64.Translation { if (!_hasLabel) { - _lbl = context.Generator.DefineLabel(); + _label = context.Generator.DefineLabel(); _hasLabel = true; } - return _lbl; + return _label; } } } \ No newline at end of file diff --git a/ChocolArm64/Translation/ILMethodBuilder.cs b/ChocolArm64/Translation/ILMethodBuilder.cs index 892f831be..98b505204 100644 --- a/ChocolArm64/Translation/ILMethodBuilder.cs +++ b/ChocolArm64/Translation/ILMethodBuilder.cs @@ -8,7 +8,10 @@ namespace ChocolArm64.Translation { class ILMethodBuilder { - public LocalAlloc LocalAlloc { get; private set; } + private const int RegsCount = 32; + private const int RegsMask = RegsCount - 1; + + public RegisterUsage RegUsage { get; private set; } public ILGenerator Generator { get; private set; } @@ -18,29 +21,47 @@ namespace ChocolArm64.Translation private string _subName; + public bool IsAarch64 { get; } + + public bool IsSubComplete { get; } + private int _localsCount; - public ILMethodBuilder(ILBlock[] ilBlocks, string subName) + public ILMethodBuilder( + ILBlock[] ilBlocks, + string subName, + bool isAarch64, + bool isSubComplete = false) { - _ilBlocks = ilBlocks; - _subName = subName; + _ilBlocks = ilBlocks; + _subName = subName; + IsAarch64 = isAarch64; + IsSubComplete = isSubComplete; } - public TranslatedSub GetSubroutine(TranslationTier tier) + public TranslatedSub GetSubroutine(TranslationTier tier, bool isWorthOptimizing) { - LocalAlloc = new LocalAlloc(_ilBlocks, _ilBlocks[0]); + RegUsage = new RegisterUsage(); + + RegUsage.BuildUses(_ilBlocks[0]); DynamicMethod method = new DynamicMethod(_subName, typeof(long), TranslatedSub.FixedArgTypes); - Generator = method.GetILGenerator(); + long intNiRegsMask = RegUsage.GetIntNotInputs(_ilBlocks[0]); + long vecNiRegsMask = RegUsage.GetVecNotInputs(_ilBlocks[0]); - TranslatedSub subroutine = new TranslatedSub(method, tier); + TranslatedSub subroutine = new TranslatedSub( + method, + intNiRegsMask, + vecNiRegsMask, + tier, + isWorthOptimizing); _locals = new Dictionary(); _localsCount = 0; - new ILOpCodeLoadState(_ilBlocks[0]).Emit(this); + Generator = method.GetILGenerator(); foreach (ILBlock ilBlock in _ilBlocks) { @@ -80,13 +101,13 @@ namespace ChocolArm64.Translation public static Register GetRegFromBit(int bit, RegisterType baseType) { - if (bit < 32) + if (bit < RegsCount) { return new Register(bit, baseType); } else if (baseType == RegisterType.Int) { - return new Register(bit & 0x1f, RegisterType.Flag); + return new Register(bit & RegsMask, RegisterType.Flag); } else { @@ -96,7 +117,7 @@ namespace ChocolArm64.Translation public static bool IsRegIndex(int index) { - return (uint)index < 32; + return (uint)index < RegsCount; } } } \ No newline at end of file diff --git a/ChocolArm64/Translation/ILOpCode.cs b/ChocolArm64/Translation/ILOpCode.cs index 4021603c0..486452820 100644 --- a/ChocolArm64/Translation/ILOpCode.cs +++ b/ChocolArm64/Translation/ILOpCode.cs @@ -4,16 +4,16 @@ namespace ChocolArm64.Translation { struct ILOpCode : IILEmit { - private OpCode _ilOp; + public OpCode ILOp { get; } public ILOpCode(OpCode ilOp) { - _ilOp = ilOp; + ILOp = ilOp; } public void Emit(ILMethodBuilder context) { - context.Generator.Emit(_ilOp); + context.Generator.Emit(ILOp); } } } \ No newline at end of file diff --git a/ChocolArm64/Translation/ILOpCodeBranch.cs b/ChocolArm64/Translation/ILOpCodeBranch.cs index 22b80b5d5..9d4e40fa9 100644 --- a/ChocolArm64/Translation/ILOpCodeBranch.cs +++ b/ChocolArm64/Translation/ILOpCodeBranch.cs @@ -4,18 +4,18 @@ namespace ChocolArm64.Translation { struct ILOpCodeBranch : IILEmit { - private OpCode _ilOp; - private ILLabel _label; + public OpCode ILOp { get; } + public ILLabel Label { get; } public ILOpCodeBranch(OpCode ilOp, ILLabel label) { - _ilOp = ilOp; - _label = label; + ILOp = ilOp; + Label = label; } public void Emit(ILMethodBuilder context) { - context.Generator.Emit(_ilOp, _label.GetLabel(context)); + context.Generator.Emit(ILOp, Label.GetLabel(context)); } } } \ No newline at end of file diff --git a/ChocolArm64/Translation/ILOpCodeCall.cs b/ChocolArm64/Translation/ILOpCodeCall.cs index c046aeeb7..dc20417a9 100644 --- a/ChocolArm64/Translation/ILOpCodeCall.cs +++ b/ChocolArm64/Translation/ILOpCodeCall.cs @@ -5,9 +5,9 @@ namespace ChocolArm64.Translation { struct ILOpCodeCall : IILEmit { - public MethodInfo Info { get; private set; } + public MethodInfo Info { get; } - public bool IsVirtual { get; private set; } + public bool IsVirtual { get; } public ILOpCodeCall(MethodInfo info, bool isVirtual) { diff --git a/ChocolArm64/Translation/ILOpCodeConst.cs b/ChocolArm64/Translation/ILOpCodeConst.cs index 2aaf8676e..cd3b58ff0 100644 --- a/ChocolArm64/Translation/ILOpCodeConst.cs +++ b/ChocolArm64/Translation/ILOpCodeConst.cs @@ -16,6 +16,8 @@ namespace ChocolArm64.Translation private ImmVal _value; + public long Value => _value.I8; + private enum ConstType { Int32, diff --git a/ChocolArm64/Translation/ILOpCodeLoad.cs b/ChocolArm64/Translation/ILOpCodeLoad.cs index c31e06bbd..0d11eeaa4 100644 --- a/ChocolArm64/Translation/ILOpCodeLoad.cs +++ b/ChocolArm64/Translation/ILOpCodeLoad.cs @@ -5,28 +5,28 @@ namespace ChocolArm64.Translation { struct ILOpCodeLoad : IILEmit { - public int Index { get; private set; } + public int Index { get; } - public IoType IoType { get; private set; } + public VarType VarType { get; } - public RegisterSize RegisterSize { get; private set; } + public RegisterSize RegisterSize { get; } - public ILOpCodeLoad(int index, IoType ioType, RegisterSize registerSize = 0) + public ILOpCodeLoad(int index, VarType varType, RegisterSize registerSize = 0) { Index = index; - IoType = ioType; + VarType = varType; RegisterSize = registerSize; } public void Emit(ILMethodBuilder context) { - switch (IoType) + switch (VarType) { - case IoType.Arg: context.Generator.EmitLdarg(Index); break; + case VarType.Arg: context.Generator.EmitLdarg(Index); break; - case IoType.Flag: EmitLdloc(context, Index, RegisterType.Flag); break; - case IoType.Int: EmitLdloc(context, Index, RegisterType.Int); break; - case IoType.Vector: EmitLdloc(context, Index, RegisterType.Vector); break; + case VarType.Flag: EmitLdloc(context, Index, RegisterType.Flag); break; + case VarType.Int: EmitLdloc(context, Index, RegisterType.Int); break; + case VarType.Vector: EmitLdloc(context, Index, RegisterType.Vector); break; } } diff --git a/ChocolArm64/Translation/ILOpCodeLoadField.cs b/ChocolArm64/Translation/ILOpCodeLoadField.cs index abcd37c34..f0507ac22 100644 --- a/ChocolArm64/Translation/ILOpCodeLoadField.cs +++ b/ChocolArm64/Translation/ILOpCodeLoadField.cs @@ -5,7 +5,7 @@ namespace ChocolArm64.Translation { struct ILOpCodeLoadField : IILEmit { - public FieldInfo Info { get; private set; } + public FieldInfo Info { get; } public ILOpCodeLoadField(FieldInfo info) { diff --git a/ChocolArm64/Translation/ILOpCodeLoadState.cs b/ChocolArm64/Translation/ILOpCodeLoadState.cs index ddab61101..c23dc9432 100644 --- a/ChocolArm64/Translation/ILOpCodeLoadState.cs +++ b/ChocolArm64/Translation/ILOpCodeLoadState.cs @@ -7,15 +7,24 @@ namespace ChocolArm64.Translation { private ILBlock _block; - public ILOpCodeLoadState(ILBlock block) + private bool _isSubEntry; + + public ILOpCodeLoadState(ILBlock block, bool isSubEntry = false) { - _block = block; + _block = block; + _isSubEntry = isSubEntry; } public void Emit(ILMethodBuilder context) { - long intInputs = context.LocalAlloc.GetIntInputs(_block); - long vecInputs = context.LocalAlloc.GetVecInputs(_block); + long intInputs = context.RegUsage.GetIntInputs(_block); + long vecInputs = context.RegUsage.GetVecInputs(_block); + + if (Optimizations.AssumeStrictAbiCompliance && context.IsSubComplete) + { + intInputs = RegisterUsage.ClearCallerSavedIntRegs(intInputs, context.IsAarch64); + vecInputs = RegisterUsage.ClearCallerSavedVecRegs(vecInputs, context.IsAarch64); + } LoadLocals(context, intInputs, RegisterType.Int); LoadLocals(context, vecInputs, RegisterType.Vector); diff --git a/ChocolArm64/Translation/ILOpCodeLog.cs b/ChocolArm64/Translation/ILOpCodeLog.cs index ebb042b59..53846f927 100644 --- a/ChocolArm64/Translation/ILOpCodeLog.cs +++ b/ChocolArm64/Translation/ILOpCodeLog.cs @@ -2,16 +2,16 @@ namespace ChocolArm64.Translation { struct ILOpCodeLog : IILEmit { - private string _text; + public string Text { get; } public ILOpCodeLog(string text) { - _text = text; + Text = text; } public void Emit(ILMethodBuilder context) { - context.Generator.EmitWriteLine(_text); + context.Generator.EmitWriteLine(Text); } } } \ No newline at end of file diff --git a/ChocolArm64/Translation/ILOpCodeStore.cs b/ChocolArm64/Translation/ILOpCodeStore.cs index 17a6259c6..7ac78e9ae 100644 --- a/ChocolArm64/Translation/ILOpCodeStore.cs +++ b/ChocolArm64/Translation/ILOpCodeStore.cs @@ -5,28 +5,28 @@ namespace ChocolArm64.Translation { struct ILOpCodeStore : IILEmit { - public int Index { get; private set; } + public int Index { get; } - public IoType IoType { get; private set; } + public VarType VarType { get; } - public RegisterSize RegisterSize { get; private set; } + public RegisterSize RegisterSize { get; } - public ILOpCodeStore(int index, IoType ioType, RegisterSize registerSize = 0) + public ILOpCodeStore(int index, VarType varType, RegisterSize registerSize = 0) { Index = index; - IoType = ioType; + VarType = varType; RegisterSize = registerSize; } public void Emit(ILMethodBuilder context) { - switch (IoType) + switch (VarType) { - case IoType.Arg: context.Generator.EmitStarg(Index); break; + case VarType.Arg: context.Generator.EmitStarg(Index); break; - case IoType.Flag: EmitStloc(context, Index, RegisterType.Flag); break; - case IoType.Int: EmitStloc(context, Index, RegisterType.Int); break; - case IoType.Vector: EmitStloc(context, Index, RegisterType.Vector); break; + case VarType.Flag: EmitStloc(context, Index, RegisterType.Flag); break; + case VarType.Int: EmitStloc(context, Index, RegisterType.Int); break; + case VarType.Vector: EmitStloc(context, Index, RegisterType.Vector); break; } } diff --git a/ChocolArm64/Translation/ILOpCodeStoreState.cs b/ChocolArm64/Translation/ILOpCodeStoreState.cs index 458e9eda4..a587dbfe8 100644 --- a/ChocolArm64/Translation/ILOpCodeStoreState.cs +++ b/ChocolArm64/Translation/ILOpCodeStoreState.cs @@ -7,15 +7,33 @@ namespace ChocolArm64.Translation { private ILBlock _block; - public ILOpCodeStoreState(ILBlock block) + private TranslatedSub _callSub; + + public ILOpCodeStoreState(ILBlock block, TranslatedSub callSub = null) { - _block = block; + _block = block; + _callSub = callSub; } public void Emit(ILMethodBuilder context) { - long intOutputs = context.LocalAlloc.GetIntOutputs(_block); - long vecOutputs = context.LocalAlloc.GetVecOutputs(_block); + long intOutputs = context.RegUsage.GetIntOutputs(_block); + long vecOutputs = context.RegUsage.GetVecOutputs(_block); + + if (Optimizations.AssumeStrictAbiCompliance && context.IsSubComplete) + { + intOutputs = RegisterUsage.ClearCallerSavedIntRegs(intOutputs, context.IsAarch64); + vecOutputs = RegisterUsage.ClearCallerSavedVecRegs(vecOutputs, context.IsAarch64); + } + + if (_callSub != null) + { + //Those register are assigned on the callee function, without + //reading it's value first. We don't need to write them because + //they are not going to be read on the callee. + intOutputs &= ~_callSub.IntNiRegsMask; + vecOutputs &= ~_callSub.VecNiRegsMask; + } StoreLocals(context, intOutputs, RegisterType.Int); StoreLocals(context, vecOutputs, RegisterType.Vector); diff --git a/ChocolArm64/Translation/LocalAlloc.cs b/ChocolArm64/Translation/RegisterUsage.cs similarity index 56% rename from ChocolArm64/Translation/LocalAlloc.cs rename to ChocolArm64/Translation/RegisterUsage.cs index 763be6190..2e6829d51 100644 --- a/ChocolArm64/Translation/LocalAlloc.cs +++ b/ChocolArm64/Translation/RegisterUsage.cs @@ -3,8 +3,13 @@ using System.Collections.Generic; namespace ChocolArm64.Translation { - class LocalAlloc + class RegisterUsage { + public const long CallerSavedIntRegistersMask = 0x7fL << 9; + public const long PStateNzcvFlagsMask = 0xfL << 60; + + public const long CallerSavedVecRegistersMask = 0xffffL << 16; + private class PathIo { private Dictionary _allInputs; @@ -18,31 +23,30 @@ namespace ChocolArm64.Translation _cmnOutputs = new Dictionary(); } - public PathIo(ILBlock root, long inputs, long outputs) : this() + public void Set(ILBlock entry, long inputs, long outputs) { - Set(root, inputs, outputs); - } - - public void Set(ILBlock root, long inputs, long outputs) - { - if (!_allInputs.TryAdd(root, inputs)) + if (!_allInputs.TryAdd(entry, inputs)) { - _allInputs[root] |= inputs; + _allInputs[entry] |= inputs; } - if (!_cmnOutputs.TryAdd(root, outputs)) + if (!_cmnOutputs.TryAdd(entry, outputs)) { - _cmnOutputs[root] &= outputs; + _cmnOutputs[entry] &= outputs; } _allOutputs |= outputs; } - public long GetInputs(ILBlock root) + public long GetInputs(ILBlock entry) { - if (_allInputs.TryGetValue(root, out long inputs)) + if (_allInputs.TryGetValue(entry, out long inputs)) { - return inputs | (_allOutputs & ~_cmnOutputs[root]); + //We also need to read the registers that may not be written + //by all paths that can reach a exit point, to ensure that + //the local variable will not remain uninitialized depending + //on the flow path taken. + return inputs | (_allOutputs & ~_cmnOutputs[entry]); } return 0; @@ -57,15 +61,38 @@ namespace ChocolArm64.Translation private Dictionary _intPaths; private Dictionary _vecPaths; - private struct BlockIo + private struct BlockIo : IEquatable { - public ILBlock Block; - public ILBlock Entry; + public ILBlock Block { get; } + public ILBlock Entry { get; } - public long IntInputs; - public long VecInputs; - public long IntOutputs; - public long VecOutputs; + public long IntInputs { get; set; } + public long VecInputs { get; set; } + public long IntOutputs { get; set; } + public long VecOutputs { get; set; } + + public BlockIo(ILBlock block, ILBlock entry) + { + Block = block; + Entry = entry; + + IntInputs = IntOutputs = 0; + VecInputs = VecOutputs = 0; + } + + public BlockIo( + ILBlock block, + ILBlock entry, + long intInputs, + long vecInputs, + long intOutputs, + long vecOutputs) : this(block, entry) + { + IntInputs = intInputs; + VecInputs = vecInputs; + IntOutputs = intOutputs; + VecOutputs = vecOutputs; + } public override bool Equals(object obj) { @@ -74,6 +101,11 @@ namespace ChocolArm64.Translation return false; } + return Equals(other); + } + + public bool Equals(BlockIo other) + { return other.Block == Block && other.Entry == Entry && other.IntInputs == IntInputs && @@ -98,25 +130,13 @@ namespace ChocolArm64.Translation } } - private const int MaxOptGraphLength = 40; - - public LocalAlloc(ILBlock[] graph, ILBlock entry) + public RegisterUsage() { _intPaths = new Dictionary(); _vecPaths = new Dictionary(); - - if (graph.Length > 1 && - graph.Length < MaxOptGraphLength) - { - InitializeOptimal(graph, entry); - } - else - { - InitializeFast(graph); - } } - private void InitializeOptimal(ILBlock[] graph, ILBlock entry) + public void BuildUses(ILBlock entry) { //This will go through all possible paths on the graph, //and store all inputs/outputs for each block. A register @@ -124,7 +144,7 @@ namespace ChocolArm64.Translation //When a block can be reached by more than one path, then the //output from all paths needs to be set for this block, and //only outputs present in all of the parent blocks can be considered - //when doing input elimination. Each block chain have a entry, that's where + //when doing input elimination. Each block chain has a entry, that's where //the code starts executing. They are present on the subroutine start point, //and on call return points too (address written to X30 by BL). HashSet visited = new HashSet(); @@ -133,19 +153,13 @@ namespace ChocolArm64.Translation void Enqueue(BlockIo block) { - if (!visited.Contains(block)) + if (visited.Add(block)) { unvisited.Enqueue(block); - - visited.Add(block); } } - Enqueue(new BlockIo() - { - Block = entry, - Entry = entry - }); + Enqueue(new BlockIo(entry, entry)); while (unvisited.Count > 0) { @@ -177,19 +191,21 @@ namespace ChocolArm64.Translation void EnqueueFromCurrent(ILBlock block, bool retTarget) { - BlockIo blockIo = new BlockIo() { Block = block }; + BlockIo blockIo; if (retTarget) { - blockIo.Entry = block; + blockIo = new BlockIo(block, block); } else { - blockIo.Entry = current.Entry; - blockIo.IntInputs = current.IntInputs; - blockIo.VecInputs = current.VecInputs; - blockIo.IntOutputs = current.IntOutputs; - blockIo.VecOutputs = current.VecOutputs; + blockIo = new BlockIo( + block, + current.Entry, + current.IntInputs, + current.VecInputs, + current.IntOutputs, + current.VecOutputs); } Enqueue(blockIo); @@ -207,54 +223,63 @@ namespace ChocolArm64.Translation } } - private void InitializeFast(ILBlock[] graph) - { - //This is WAY faster than InitializeOptimal, but results in - //unneeded loads and stores, so the resulting code will be slower. - long intInputs = 0, intOutputs = 0; - long vecInputs = 0, vecOutputs = 0; + public long GetIntInputs(ILBlock entry) => GetInputsImpl(entry, _intPaths.Values); + public long GetVecInputs(ILBlock entry) => GetInputsImpl(entry, _vecPaths.Values); - foreach (ILBlock block in graph) - { - intInputs |= block.IntInputs; - intOutputs |= block.IntOutputs; - vecInputs |= block.VecInputs; - vecOutputs |= block.VecOutputs; - } - - //It's possible that not all code paths writes to those output registers, - //in those cases if we attempt to write an output registers that was - //not written, we will be just writing zero and messing up the old register value. - //So we just need to ensure that all outputs are loaded. - if (graph.Length > 1) - { - intInputs |= intOutputs; - vecInputs |= vecOutputs; - } - - foreach (ILBlock block in graph) - { - _intPaths.Add(block, new PathIo(block, intInputs, intOutputs)); - _vecPaths.Add(block, new PathIo(block, vecInputs, vecOutputs)); - } - } - - public long GetIntInputs(ILBlock root) => GetInputsImpl(root, _intPaths.Values); - public long GetVecInputs(ILBlock root) => GetInputsImpl(root, _vecPaths.Values); - - private long GetInputsImpl(ILBlock root, IEnumerable values) + private long GetInputsImpl(ILBlock entry, IEnumerable values) { long inputs = 0; foreach (PathIo path in values) { - inputs |= path.GetInputs(root); + inputs |= path.GetInputs(entry); } return inputs; } + public long GetIntNotInputs(ILBlock entry) => GetNotInputsImpl(entry, _intPaths.Values); + public long GetVecNotInputs(ILBlock entry) => GetNotInputsImpl(entry, _vecPaths.Values); + + private long GetNotInputsImpl(ILBlock entry, IEnumerable values) + { + //Returns a mask with registers that are written to + //before being read. Only those registers that are + //written in all paths, and is not read before being + //written to on those paths, should be set on the mask. + long mask = -1L; + + foreach (PathIo path in values) + { + mask &= path.GetOutputs() & ~path.GetInputs(entry); + } + + return mask; + } + public long GetIntOutputs(ILBlock block) => _intPaths[block].GetOutputs(); public long GetVecOutputs(ILBlock block) => _vecPaths[block].GetOutputs(); + + public static long ClearCallerSavedIntRegs(long mask, bool isAarch64) + { + //TODO: ARM32 support. + if (isAarch64) + { + mask &= ~(CallerSavedIntRegistersMask | PStateNzcvFlagsMask); + } + + return mask; + } + + public static long ClearCallerSavedVecRegs(long mask, bool isAarch64) + { + //TODO: ARM32 support. + if (isAarch64) + { + mask &= ~CallerSavedVecRegistersMask; + } + + return mask; + } } } \ No newline at end of file diff --git a/ChocolArm64/Translation/TranslatedSub.cs b/ChocolArm64/Translation/TranslatedSub.cs index 65d703510..8b599b7a9 100644 --- a/ChocolArm64/Translation/TranslatedSub.cs +++ b/ChocolArm64/Translation/TranslatedSub.cs @@ -10,21 +10,41 @@ namespace ChocolArm64.Translation class TranslatedSub { + //This is the minimum amount of calls needed for the method + //to be retranslated with higher quality code. It's only worth + //doing that for hot code. + private const int MinCallCountForOpt = 30; + public ArmSubroutine Delegate { get; private set; } - public static int StateArgIdx { get; private set; } - public static int MemoryArgIdx { get; private set; } + public static int StateArgIdx { get; } + public static int MemoryArgIdx { get; } - public static Type[] FixedArgTypes { get; private set; } + public static Type[] FixedArgTypes { get; } - public DynamicMethod Method { get; private set; } + public DynamicMethod Method { get; } - public TranslationTier Tier { get; private set; } + public TranslationTier Tier { get; } - public TranslatedSub(DynamicMethod method, TranslationTier tier) + public long IntNiRegsMask { get; } + public long VecNiRegsMask { get; } + + private bool _isWorthOptimizing; + + private int _callCount; + + public TranslatedSub( + DynamicMethod method, + long intNiRegsMask, + long vecNiRegsMask, + TranslationTier tier, + bool isWorthOptimizing) { - Method = method ?? throw new ArgumentNullException(nameof(method));; - Tier = tier; + Method = method ?? throw new ArgumentNullException(nameof(method));; + IntNiRegsMask = intNiRegsMask; + VecNiRegsMask = vecNiRegsMask; + _isWorthOptimizing = isWorthOptimizing; + Tier = tier; } static TranslatedSub() @@ -61,5 +81,24 @@ namespace ChocolArm64.Translation { return Delegate(threadState, memory); } + + public bool IsWorthOptimizing() + { + if (!_isWorthOptimizing) + { + return false; + } + + if (_callCount++ < MinCallCountForOpt) + { + return false; + } + + //Only return true once, so that it is + //added to the queue only once. + _isWorthOptimizing = false; + + return true; + } } } \ No newline at end of file diff --git a/ChocolArm64/Translation/Translator.cs b/ChocolArm64/Translation/Translator.cs index dd1215f50..bda0bca09 100644 --- a/ChocolArm64/Translation/Translator.cs +++ b/ChocolArm64/Translation/Translator.cs @@ -63,48 +63,36 @@ namespace ChocolArm64.Translation CpuTrace?.Invoke(this, new CpuTraceEventArgs(position)); } - TranslatedSub subroutine = GetOrTranslateSubroutine(state, position); + if (!_cache.TryGetSubroutine(position, out TranslatedSub sub)) + { + sub = TranslateLowCq(position, state.GetExecutionMode()); + } - position = subroutine.Execute(state, _memory); + position = sub.Execute(state, _memory); } while (position != 0 && state.Running); state.CurrentTranslator = null; } - internal void TranslateVirtualSubroutine(CpuThreadState state, long position) - { - if (!_cache.TryGetSubroutine(position, out TranslatedSub sub) || sub.Tier == TranslationTier.Tier0) - { - _queue.Enqueue(new TranslatorQueueItem(position, state.GetExecutionMode(), TranslationTier.Tier1)); - } - } - - internal ArmSubroutine GetOrTranslateVirtualSubroutine(CpuThreadState state, long position) + internal ArmSubroutine GetOrTranslateSubroutine(CpuThreadState state, long position, CallType cs) { if (!_cache.TryGetSubroutine(position, out TranslatedSub sub)) { sub = TranslateLowCq(position, state.GetExecutionMode()); } - if (sub.Tier == TranslationTier.Tier0) + if (sub.IsWorthOptimizing()) { - _queue.Enqueue(new TranslatorQueueItem(position, state.GetExecutionMode(), TranslationTier.Tier1)); + bool isComplete = cs == CallType.Call || + cs == CallType.VirtualCall; + + _queue.Enqueue(position, state.GetExecutionMode(), TranslationTier.Tier1, isComplete); } return sub.Delegate; } - internal TranslatedSub GetOrTranslateSubroutine(CpuThreadState state, long position) - { - if (!_cache.TryGetSubroutine(position, out TranslatedSub subroutine)) - { - subroutine = TranslateLowCq(position, state.GetExecutionMode()); - } - - return subroutine; - } - private void TranslateQueuedSubs() { while (_threadCount != 0) @@ -124,7 +112,7 @@ namespace ChocolArm64.Translation } else { - TranslateHighCq(item.Position, item.Mode); + TranslateHighCq(item.Position, item.Mode, item.IsComplete); } } else @@ -142,14 +130,16 @@ namespace ChocolArm64.Translation string subName = GetSubroutineName(position); - ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(context.GetILBlocks(), subName); + bool isAarch64 = mode == ExecutionMode.Aarch64; - TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier0); + ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(context.GetILBlocks(), subName, isAarch64); + + TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier0, isWorthOptimizing: true); return _cache.GetOrAdd(position, subroutine, block.OpCodes.Count); } - private void TranslateHighCq(long position, ExecutionMode mode) + private TranslatedSub TranslateHighCq(long position, ExecutionMode mode, bool isComplete) { Block graph = Decoder.DecodeSubroutine(_memory, position, mode); @@ -159,9 +149,13 @@ namespace ChocolArm64.Translation string subName = GetSubroutineName(position); - ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(ilBlocks, subName); + bool isAarch64 = mode == ExecutionMode.Aarch64; - TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier1); + isComplete &= !context.HasIndirectJump; + + ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(ilBlocks, subName, isAarch64, isComplete); + + TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier1, context.HasSlowCall); int ilOpCount = 0; @@ -170,9 +164,11 @@ namespace ChocolArm64.Translation ilOpCount += ilBlock.Count; } + ForceAheadOfTimeCompilation(subroutine); + _cache.AddOrUpdate(position, subroutine, ilOpCount); - ForceAheadOfTimeCompilation(subroutine); + return subroutine; } private string GetSubroutineName(long position) diff --git a/ChocolArm64/Translation/TranslatorQueue.cs b/ChocolArm64/Translation/TranslatorQueue.cs index 89d665bfb..0f1d84747 100644 --- a/ChocolArm64/Translation/TranslatorQueue.cs +++ b/ChocolArm64/Translation/TranslatorQueue.cs @@ -1,3 +1,4 @@ +using ChocolArm64.State; using System.Collections.Concurrent; using System.Threading; @@ -5,10 +6,6 @@ namespace ChocolArm64.Translation { class TranslatorQueue { - //This is the maximum number of functions to be translated that the queue can hold. - //The value may need some tuning to find the sweet spot. - private const int MaxQueueSize = 1024; - private ConcurrentStack[] _translationQueue; private ManualResetEvent _queueDataReceivedEvent; @@ -27,14 +24,11 @@ namespace ChocolArm64.Translation _queueDataReceivedEvent = new ManualResetEvent(false); } - public void Enqueue(TranslatorQueueItem item) + public void Enqueue(long position, ExecutionMode mode, TranslationTier tier, bool isComplete) { - ConcurrentStack queue = _translationQueue[(int)item.Tier]; + TranslatorQueueItem item = new TranslatorQueueItem(position, mode, tier, isComplete); - if (queue.Count >= MaxQueueSize) - { - queue.TryPop(out _); - } + ConcurrentStack queue = _translationQueue[(int)tier]; queue.Push(item); diff --git a/ChocolArm64/Translation/TranslatorQueueItem.cs b/ChocolArm64/Translation/TranslatorQueueItem.cs index 0988414a5..dde2706d9 100644 --- a/ChocolArm64/Translation/TranslatorQueueItem.cs +++ b/ChocolArm64/Translation/TranslatorQueueItem.cs @@ -10,11 +10,18 @@ namespace ChocolArm64.Translation public TranslationTier Tier { get; } - public TranslatorQueueItem(long position, ExecutionMode mode, TranslationTier tier) + public bool IsComplete { get; } + + public TranslatorQueueItem( + long position, + ExecutionMode mode, + TranslationTier tier, + bool isComplete = false) { - Position = position; - Mode = mode; - Tier = tier; + Position = position; + Mode = mode; + Tier = tier; + IsComplete = isComplete; } } } \ No newline at end of file diff --git a/ChocolArm64/Translation/IoType.cs b/ChocolArm64/Translation/VarType.cs similarity index 85% rename from ChocolArm64/Translation/IoType.cs rename to ChocolArm64/Translation/VarType.cs index c7710e0c6..d671575e9 100644 --- a/ChocolArm64/Translation/IoType.cs +++ b/ChocolArm64/Translation/VarType.cs @@ -1,6 +1,6 @@ namespace ChocolArm64.Translation { - enum IoType + enum VarType { Arg, Flag, diff --git a/Ryujinx/Config.jsonc b/Ryujinx/Config.jsonc index 8b5ebe032..6e808b56f 100644 --- a/Ryujinx/Config.jsonc +++ b/Ryujinx/Config.jsonc @@ -29,18 +29,21 @@ // System Language list: https://gist.github.com/HorrorTroll/b6e4a88d774c3c9b3bdf54d79a7ca43b "system_language": "AmericanEnglish", - // Enable or Disable Docked Mode + // Enable or disable Docked Mode "docked_mode": false, - - // Enable or Disable Game Vsync + + // Enable or disable Game Vsync "enable_vsync": true, - - // Enable or Disable Multi-core scheduling of threads + + // Enable or disable Multi-core scheduling of threads "enable_multicore_scheduling": true, - + // Enable integrity checks on Switch content files "enable_fs_integrity_checks": true, - + + // Enable or disable aggressive CPU optimizations + "enable_aggressive_cpu_opts": true, + // The primary controller's type // Supported Values: Handheld, ProController, NpadPair, NpadLeft, NpadRight "controller_type": "Handheld", diff --git a/Ryujinx/Configuration.cs b/Ryujinx/Configuration.cs index dbbec1cbc..c4a1b4369 100644 --- a/Ryujinx/Configuration.cs +++ b/Ryujinx/Configuration.cs @@ -86,6 +86,11 @@ namespace Ryujinx /// public bool EnableFsIntegrityChecks { get; private set; } + /// + /// Enable or Disable aggressive CPU optimizations + /// + public bool EnableAggressiveCpuOpts { get; private set; } + /// /// The primary controller's type /// @@ -197,6 +202,11 @@ namespace Ryujinx ? IntegrityCheckLevel.ErrorOnInvalid : IntegrityCheckLevel.None; + if (Instance.EnableAggressiveCpuOpts) + { + Optimizations.AssumeStrictAbiCompliance = true; + } + if(Instance.GamepadControls.Enabled) { if (GamePad.GetName(Instance.GamepadControls.Index) == "Unmapped Controller") diff --git a/Ryujinx/_schema.json b/Ryujinx/_schema.json index 0e586671d..7e7e46659 100644 --- a/Ryujinx/_schema.json +++ b/Ryujinx/_schema.json @@ -17,6 +17,7 @@ "enable_vsync", "enable_multicore_scheduling", "enable_fs_integrity_checks", + "enable_aggressive_cpu_opts", "controller_type", "keyboard_controls", "gamepad_controls" @@ -399,6 +400,17 @@ false ] }, + "enable_aggressive_cpu_opts": { + "$id": "#/properties/enable_aggressive_cpu_opts", + "type": "boolean", + "title": "Enable Aggressive CPU Optimizations", + "description": "Enable or disable aggressive CPU optimizations", + "default": true, + "examples": [ + true, + false + ] + }, "controller_type": { "$id": "#/properties/controller_type", "type": "string",