From 8be183f41d1416e88e22ce7c195984067494b4b8 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Mon, 4 Feb 2019 18:26:05 -0300 Subject: [PATCH] Implement speculative translation on the CPU (#515) * Implement speculative translation on the cpu, and change the way how branches to unknown or untranslated addresses works * Port t0opt changes and other cleanups * Change namespace from translation related classes to ChocolArm64.Translation, other minor tweaks * Fix typo * Translate higher quality code for indirect jumps aswell, and on some cases that were missed when lower quality (tier 0) code was available * Remove debug print * Remove direct argument passing optimization, and enable tail calls for BR instructions * Call delegates directly with Callvirt rather than calling Execute, do not emit calls for tier 0 code * Remove unused property * Rename argument on ArmSubroutine delegate --- CpuThread.cs | 1 + Decoders/Decoder.cs | 73 +++++-- Instructions/InstEmitFlow.cs | 30 ++- Instructions/InstEmitFlowHelper.cs | 122 +++++++++++- State/CpuThreadState.cs | 3 + TranslatedSub.cs | 140 ------------- TranslatedSubType.cs | 8 - Translation/ILEmitterCtx.cs | 46 +++-- Translation/ILMethodBuilder.cs | 54 +---- Translation/ILOpCodeCall.cs | 11 +- Translation/ILOpCodeLoadField.cs | 20 ++ Translation/TranslatedSub.cs | 65 ++++++ Translation/TranslationTier.cs | 11 + Translation/Translator.cs | 188 ++++++++++++++++++ .../TranslatorCache.cs | 27 ++- Translation/TranslatorQueue.cs | 83 ++++++++ Translation/TranslatorQueueItem.cs | 20 ++ Translator.cs | 120 ----------- 18 files changed, 649 insertions(+), 373 deletions(-) delete mode 100644 TranslatedSub.cs delete mode 100644 TranslatedSubType.cs create mode 100644 Translation/ILOpCodeLoadField.cs create mode 100644 Translation/TranslatedSub.cs create mode 100644 Translation/TranslationTier.cs create mode 100644 Translation/Translator.cs rename TranslatorCache.cs => Translation/TranslatorCache.cs (87%) create mode 100644 Translation/TranslatorQueue.cs create mode 100644 Translation/TranslatorQueueItem.cs delete mode 100644 Translator.cs diff --git a/CpuThread.cs b/CpuThread.cs index 87b2139..6cd34f8 100644 --- a/CpuThread.cs +++ b/CpuThread.cs @@ -1,5 +1,6 @@ using ChocolArm64.Memory; using ChocolArm64.State; +using ChocolArm64.Translation; using System; using System.Threading; diff --git a/Decoders/Decoder.cs b/Decoders/Decoder.cs index 2b19541..6b5d79f 100644 --- a/Decoders/Decoder.cs +++ b/Decoders/Decoder.cs @@ -25,14 +25,53 @@ namespace ChocolArm64.Decoders FillBlock(memory, mode, block); + OpCode64 lastOp = block.GetLastOp(); + + if (IsBranch(lastOp) && !IsCall(lastOp) && lastOp is IOpCodeBImm op) + { + //It's possible that the branch on this block lands on the middle of the block. + //This is more common on tight loops. In this case, we can improve the codegen + //a bit by changing the CFG and either making the branch point to the same block + //(which indicates that the block is a loop that jumps back to the start), and the + //other possible case is a jump somewhere on the middle of the block, which is + //also a loop, but in this case we need to split the block in half. + if (op.Imm == start) + { + block.Branch = block; + } + else if ((ulong)op.Imm > (ulong)start && + (ulong)op.Imm < (ulong)block.EndPosition) + { + Block botBlock = new Block(op.Imm); + + int botBlockIndex = 0; + + long currPosition = start; + + while ((ulong)currPosition < (ulong)op.Imm) + { + currPosition += block.OpCodes[botBlockIndex++].OpCodeSizeInBytes; + } + + botBlock.OpCodes.AddRange(block.OpCodes); + + botBlock.OpCodes.RemoveRange(0, botBlockIndex); + + block.OpCodes.RemoveRange(botBlockIndex, block.OpCodes.Count - botBlockIndex); + + botBlock.EndPosition = block.EndPosition; + + block.EndPosition = op.Imm; + + botBlock.Branch = botBlock; + block.Next = botBlock; + } + } + return block; } - public static Block DecodeSubroutine( - TranslatorCache cache, - MemoryManager memory, - long start, - ExecutionMode mode) + public static Block DecodeSubroutine(MemoryManager memory, long start, ExecutionMode mode) { Dictionary visited = new Dictionary(); Dictionary visitedEnd = new Dictionary(); @@ -67,23 +106,16 @@ namespace ChocolArm64.Decoders //(except BL/BLR that are sub calls) or end of executable, Next is null. if (current.OpCodes.Count > 0) { - bool hasCachedSub = false; - OpCode64 lastOp = current.GetLastOp(); - if (lastOp is IOpCodeBImm op) + bool isCall = IsCall(lastOp); + + if (lastOp is IOpCodeBImm op && !isCall) { - if (op.Emitter == InstEmit.Bl) - { - hasCachedSub = cache.HasSubroutine(op.Imm); - } - else - { - current.Branch = Enqueue(op.Imm); - } + current.Branch = Enqueue(op.Imm); } - if (!IsUnconditionalBranch(lastOp) || hasCachedSub) + if (!IsUnconditionalBranch(lastOp) || isCall) { current.Next = Enqueue(current.EndPosition); } @@ -223,6 +255,13 @@ namespace ChocolArm64.Decoders opCode is IOpCode32BReg; } + private static bool IsCall(OpCode64 opCode) + { + //TODO (CQ): ARM32 support. + return opCode.Emitter == InstEmit.Bl || + opCode.Emitter == InstEmit.Blr; + } + private static bool IsException(OpCode64 opCode) { return opCode.Emitter == InstEmit.Brk || diff --git a/Instructions/InstEmitFlow.cs b/Instructions/InstEmitFlow.cs index 181c6a0..a842dca 100644 --- a/Instructions/InstEmitFlow.cs +++ b/Instructions/InstEmitFlow.cs @@ -3,6 +3,8 @@ using ChocolArm64.State; using ChocolArm64.Translation; using System.Reflection.Emit; +using static ChocolArm64.Instructions.InstEmitFlowHelper; + namespace ChocolArm64.Instructions { static partial class InstEmit @@ -39,7 +41,7 @@ namespace ChocolArm64.Instructions context.EmitStint(RegisterAlias.Lr); context.EmitStoreState(); - InstEmitFlowHelper.EmitCall(context, op.Imm); + EmitCall(context, op.Imm); } public static void Blr(ILEmitterCtx context) @@ -51,7 +53,7 @@ namespace ChocolArm64.Instructions context.EmitStint(RegisterAlias.Lr); context.EmitStoreState(); - context.Emit(OpCodes.Ret); + EmitVirtualCall(context); } public static void Br(ILEmitterCtx context) @@ -61,7 +63,7 @@ namespace ChocolArm64.Instructions context.EmitStoreState(); context.EmitLdintzr(op.Rn); - context.Emit(OpCodes.Ret); + EmitVirtualJump(context); } public static void Cbnz(ILEmitterCtx context) => EmitCb(context, OpCodes.Bne_Un); @@ -106,10 +108,17 @@ namespace ChocolArm64.Instructions { OpCodeBImm64 op = (OpCodeBImm64)context.CurrOp; - if (context.CurrBlock.Next != null && - context.CurrBlock.Branch != null) + if (context.CurrBlock.Branch != null) { context.EmitCondBranch(context.GetLabel(op.Imm), cond); + + if (context.CurrBlock.Next == null) + { + context.EmitStoreState(); + context.EmitLdc_I8(op.Position + 4); + + context.Emit(OpCodes.Ret); + } } else { @@ -135,10 +144,17 @@ namespace ChocolArm64.Instructions { OpCodeBImm64 op = (OpCodeBImm64)context.CurrOp; - if (context.CurrBlock.Next != null && - context.CurrBlock.Branch != null) + if (context.CurrBlock.Branch != null) { context.Emit(ilOp, context.GetLabel(op.Imm)); + + if (context.CurrBlock.Next == null) + { + context.EmitStoreState(); + context.EmitLdc_I8(op.Position + 4); + + context.Emit(OpCodes.Ret); + } } else { diff --git a/Instructions/InstEmitFlowHelper.cs b/Instructions/InstEmitFlowHelper.cs index cf093bb..e93ef42 100644 --- a/Instructions/InstEmitFlowHelper.cs +++ b/Instructions/InstEmitFlowHelper.cs @@ -1,4 +1,6 @@ +using ChocolArm64.State; using ChocolArm64.Translation; +using System.Reflection; using System.Reflection.Emit; namespace ChocolArm64.Instructions @@ -7,12 +9,120 @@ namespace ChocolArm64.Instructions { public static void EmitCall(ILEmitterCtx context, long imm) { - if (context.TryOptEmitSubroutineCall()) + if (context.Tier == TranslationTier.Tier0) + { + context.TranslateAhead(imm); + + context.EmitLdc_I8(imm); + + context.Emit(OpCodes.Ret); + + return; + } + + if (!context.TryOptEmitSubroutineCall()) + { + context.TranslateAhead(imm); + + context.EmitLdarg(TranslatedSub.StateArgIdx); + + context.EmitFieldLoad(typeof(CpuThreadState).GetField(nameof(CpuThreadState.CurrentTranslator), + BindingFlags.Instance | + BindingFlags.NonPublic)); + + context.EmitLdarg(TranslatedSub.StateArgIdx); + context.EmitLdc_I8(imm); + + context.EmitPrivateCall(typeof(Translator), nameof(Translator.GetOrTranslateSubroutine)); + + context.EmitLdarg(TranslatedSub.StateArgIdx); + context.EmitLdarg(TranslatedSub.MemoryArgIdx); + + context.EmitCall(typeof(TranslatedSub), nameof(TranslatedSub.Execute)); + } + + EmitContinueOrReturnCheck(context); + } + + public static void EmitVirtualCall(ILEmitterCtx context) + { + EmitVirtualCallOrJump(context, isJump: false); + } + + public static void EmitVirtualJump(ILEmitterCtx context) + { + EmitVirtualCallOrJump(context, isJump: true); + } + + private static void EmitVirtualCallOrJump(ILEmitterCtx context, bool isJump) + { + if (context.Tier == TranslationTier.Tier0) + { + context.Emit(OpCodes.Dup); + + context.EmitSttmp(); + context.EmitLdarg(TranslatedSub.StateArgIdx); + + context.EmitFieldLoad(typeof(CpuThreadState).GetField(nameof(CpuThreadState.CurrentTranslator), + BindingFlags.Instance | + BindingFlags.NonPublic)); + + context.EmitLdarg(TranslatedSub.StateArgIdx); + context.EmitLdtmp(); + + context.EmitPrivateCall(typeof(Translator), nameof(Translator.TranslateVirtualSubroutine)); + + context.Emit(OpCodes.Ret); + } + else + { + context.EmitSttmp(); + context.EmitLdarg(TranslatedSub.StateArgIdx); + + context.EmitFieldLoad(typeof(CpuThreadState).GetField(nameof(CpuThreadState.CurrentTranslator), + BindingFlags.Instance | + BindingFlags.NonPublic)); + + context.EmitLdarg(TranslatedSub.StateArgIdx); + context.EmitLdtmp(); + + context.EmitPrivateCall(typeof(Translator), nameof(Translator.GetOrTranslateVirtualSubroutine)); + + context.EmitLdarg(TranslatedSub.StateArgIdx); + context.EmitLdarg(TranslatedSub.MemoryArgIdx); + + if (isJump) + { + //The tail prefix allows the JIT to jump to the next function, + //while releasing the stack space used by the current one. + //This is ideal for BR ARM instructions, which are + //basically indirect tail calls. + context.Emit(OpCodes.Tailcall); + } + + MethodInfo mthdInfo = typeof(ArmSubroutine).GetMethod("Invoke"); + + context.EmitCall(mthdInfo, isVirtual: true); + + if (!isJump) + { + EmitContinueOrReturnCheck(context); + } + else + { + context.Emit(OpCodes.Ret); + } + } + } + + private static void EmitContinueOrReturnCheck(ILEmitterCtx context) + { + //Note: The return value of the called method will be placed + //at the Stack, the return value is always a Int64 with the + //return address of the function. We check if the address is + //correct, if it isn't we keep returning until we reach the dispatcher. + if (context.CurrBlock.Next != null) { - //Note: the return value of the called method will be placed - //at the Stack, the return value is always a Int64 with the - //return address of the function. We check if the address is - //correct, if it isn't we keep returning until we reach the dispatcher. context.Emit(OpCodes.Dup); context.EmitLdc_I8(context.CurrOp.Position + 4); @@ -30,8 +140,6 @@ namespace ChocolArm64.Instructions } else { - context.EmitLdc_I8(imm); - context.Emit(OpCodes.Ret); } } diff --git a/State/CpuThreadState.cs b/State/CpuThreadState.cs index 12edc42..abec60b 100644 --- a/State/CpuThreadState.cs +++ b/State/CpuThreadState.cs @@ -1,4 +1,5 @@ using ChocolArm64.Events; +using ChocolArm64.Translation; using System; using System.Diagnostics; using System.Runtime.CompilerServices; @@ -82,6 +83,8 @@ namespace ChocolArm64.State private static double _hostTickFreq; + internal Translator CurrentTranslator; + static CpuThreadState() { _hostTickFreq = 1.0 / Stopwatch.Frequency; diff --git a/TranslatedSub.cs b/TranslatedSub.cs deleted file mode 100644 index 653abcc..0000000 --- a/TranslatedSub.cs +++ /dev/null @@ -1,140 +0,0 @@ -using ChocolArm64.Memory; -using ChocolArm64.State; -using System; -using System.Collections.Generic; -using System.Collections.ObjectModel; -using System.Linq; -using System.Reflection; -using System.Reflection.Emit; - -namespace ChocolArm64 -{ - class TranslatedSub - { - private delegate long Aa64Subroutine(CpuThreadState register, MemoryManager memory); - - private const int MinCallCountForReJit = 250; - - private Aa64Subroutine _execDelegate; - - public static int StateArgIdx { get; private set; } - public static int MemoryArgIdx { get; private set; } - - public static Type[] FixedArgTypes { get; private set; } - - public DynamicMethod Method { get; private set; } - - public ReadOnlyCollection SubArgs { get; private set; } - - private HashSet _callers; - - private TranslatedSubType _type; - - private int _callCount; - - private bool _needsReJit; - - public TranslatedSub(DynamicMethod method, List subArgs) - { - Method = method ?? throw new ArgumentNullException(nameof(method));; - SubArgs = subArgs?.AsReadOnly() ?? throw new ArgumentNullException(nameof(subArgs)); - - _callers = new HashSet(); - - PrepareDelegate(); - } - - static TranslatedSub() - { - MethodInfo mthdInfo = typeof(Aa64Subroutine).GetMethod("Invoke"); - - ParameterInfo[] Params = mthdInfo.GetParameters(); - - FixedArgTypes = new Type[Params.Length]; - - for (int index = 0; index < Params.Length; index++) - { - Type paramType = Params[index].ParameterType; - - FixedArgTypes[index] = paramType; - - if (paramType == typeof(CpuThreadState)) - { - StateArgIdx = index; - } - else if (paramType == typeof(MemoryManager)) - { - MemoryArgIdx = index; - } - } - } - - private void PrepareDelegate() - { - string name = $"{Method.Name}_Dispatch"; - - DynamicMethod mthd = new DynamicMethod(name, typeof(long), FixedArgTypes); - - ILGenerator generator = mthd.GetILGenerator(); - - generator.EmitLdargSeq(FixedArgTypes.Length); - - foreach (Register reg in SubArgs) - { - generator.EmitLdarg(StateArgIdx); - - generator.Emit(OpCodes.Ldfld, reg.GetField()); - } - - generator.Emit(OpCodes.Call, Method); - generator.Emit(OpCodes.Ret); - - _execDelegate = (Aa64Subroutine)mthd.CreateDelegate(typeof(Aa64Subroutine)); - } - - public bool ShouldReJit() - { - if (_needsReJit && _callCount < MinCallCountForReJit) - { - _callCount++; - - return false; - } - - return _needsReJit; - } - - public long Execute(CpuThreadState threadState, MemoryManager memory) - { - return _execDelegate(threadState, memory); - } - - public void AddCaller(long position) - { - lock (_callers) - { - _callers.Add(position); - } - } - - public long[] GetCallerPositions() - { - lock (_callers) - { - return _callers.ToArray(); - } - } - - public void SetType(TranslatedSubType type) - { - _type = type; - - if (type == TranslatedSubType.SubTier0) - { - _needsReJit = true; - } - } - - public void MarkForReJit() => _needsReJit = true; - } -} \ No newline at end of file diff --git a/TranslatedSubType.cs b/TranslatedSubType.cs deleted file mode 100644 index f57aea9..0000000 --- a/TranslatedSubType.cs +++ /dev/null @@ -1,8 +0,0 @@ -namespace ChocolArm64 -{ - enum TranslatedSubType - { - SubTier0, - SubTier1 - } -} \ No newline at end of file diff --git a/Translation/ILEmitterCtx.cs b/Translation/ILEmitterCtx.cs index b5ebff7..ef63e60 100644 --- a/Translation/ILEmitterCtx.cs +++ b/Translation/ILEmitterCtx.cs @@ -11,6 +11,7 @@ namespace ChocolArm64.Translation class ILEmitterCtx { private TranslatorCache _cache; + private TranslatorQueue _queue; private Dictionary _labels; @@ -23,6 +24,8 @@ namespace ChocolArm64.Translation public Block CurrBlock => _currBlock; public OpCode64 CurrOp => _currBlock?.OpCodes[_opcIndex]; + public TranslationTier Tier { get; } + public Aarch32Mode Mode { get; } = Aarch32Mode.User; //TODO private Dictionary _visitedBlocks; @@ -47,11 +50,14 @@ namespace ChocolArm64.Translation private const int VecTmp1Index = -5; private const int VecTmp2Index = -6; - public ILEmitterCtx(TranslatorCache cache, Block graph) + public ILEmitterCtx(TranslatorCache cache, TranslatorQueue queue, TranslationTier tier, Block graph) { _cache = cache ?? throw new ArgumentNullException(nameof(cache)); + _queue = queue ?? throw new ArgumentNullException(nameof(queue)); _currBlock = graph ?? throw new ArgumentNullException(nameof(graph)); + Tier = tier; + _labels = new Dictionary(); _visitedBlocks = new Dictionary(); @@ -243,6 +249,16 @@ namespace ChocolArm64.Translation return new ILBlock(); } + public void TranslateAhead(long position, ExecutionMode mode = ExecutionMode.Aarch64) + { + if (_cache.TryGetSubroutine(position, out TranslatedSub sub) && sub.Tier != TranslationTier.Tier0) + { + return; + } + + _queue.Enqueue(new TranslatorQueueItem(position, mode, TranslationTier.Tier1)); + } + public bool TryOptEmitSubroutineCall() { if (_currBlock.Next == null) @@ -265,20 +281,8 @@ namespace ChocolArm64.Translation EmitLdarg(index); } - foreach (Register reg in subroutine.SubArgs) - { - switch (reg.Type) - { - case RegisterType.Flag: Ldloc(reg.Index, IoType.Flag); break; - case RegisterType.Int: Ldloc(reg.Index, IoType.Int); break; - case RegisterType.Vector: Ldloc(reg.Index, IoType.Vector); break; - } - } - EmitCall(subroutine.Method); - subroutine.AddCaller(_subPosition); - return true; } @@ -463,7 +467,12 @@ namespace ChocolArm64.Translation _ilBlock.Add(new ILOpCodeBranch(ilOp, label)); } - public void Emit(string text) + public void EmitFieldLoad(FieldInfo info) + { + _ilBlock.Add(new ILOpCodeLoadField(info)); + } + + public void EmitPrint(string text) { _ilBlock.Add(new ILOpCodeLog(text)); } @@ -618,14 +627,9 @@ namespace ChocolArm64.Translation EmitCall(objType.GetMethod(mthdName, BindingFlags.Instance | BindingFlags.NonPublic)); } - public void EmitCall(MethodInfo mthdInfo) + public void EmitCall(MethodInfo mthdInfo, bool isVirtual = false) { - if (mthdInfo == null) - { - throw new ArgumentNullException(nameof(mthdInfo)); - } - - _ilBlock.Add(new ILOpCodeCall(mthdInfo)); + _ilBlock.Add(new ILOpCodeCall(mthdInfo ?? throw new ArgumentNullException(nameof(mthdInfo)), isVirtual)); } public void EmitLdc_I(long value) diff --git a/Translation/ILMethodBuilder.cs b/Translation/ILMethodBuilder.cs index 70d9a2d..892f831 100644 --- a/Translation/ILMethodBuilder.cs +++ b/Translation/ILMethodBuilder.cs @@ -26,74 +26,32 @@ namespace ChocolArm64.Translation _subName = subName; } - public TranslatedSub GetSubroutine() + public TranslatedSub GetSubroutine(TranslationTier tier) { LocalAlloc = new LocalAlloc(_ilBlocks, _ilBlocks[0]); - List subArgs = new List(); - - void SetArgs(long inputs, RegisterType baseType) - { - for (int bit = 0; bit < 64; bit++) - { - long mask = 1L << bit; - - if ((inputs & mask) != 0) - { - subArgs.Add(GetRegFromBit(bit, baseType)); - } - } - } - - SetArgs(LocalAlloc.GetIntInputs(_ilBlocks[0]), RegisterType.Int); - SetArgs(LocalAlloc.GetVecInputs(_ilBlocks[0]), RegisterType.Vector); - - DynamicMethod method = new DynamicMethod(_subName, typeof(long), GetArgumentTypes(subArgs)); + DynamicMethod method = new DynamicMethod(_subName, typeof(long), TranslatedSub.FixedArgTypes); Generator = method.GetILGenerator(); - TranslatedSub subroutine = new TranslatedSub(method, subArgs); - - int argsStart = TranslatedSub.FixedArgTypes.Length; + TranslatedSub subroutine = new TranslatedSub(method, tier); _locals = new Dictionary(); _localsCount = 0; - for (int index = 0; index < subroutine.SubArgs.Count; index++) - { - Register reg = subroutine.SubArgs[index]; - - Generator.EmitLdarg(index + argsStart); - Generator.EmitStloc(GetLocalIndex(reg)); - } + new ILOpCodeLoadState(_ilBlocks[0]).Emit(this); foreach (ILBlock ilBlock in _ilBlocks) { ilBlock.Emit(this); } + subroutine.PrepareMethod(); + return subroutine; } - private Type[] GetArgumentTypes(IList Params) - { - Type[] fixedArgs = TranslatedSub.FixedArgTypes; - - Type[] output = new Type[Params.Count + fixedArgs.Length]; - - fixedArgs.CopyTo(output, 0); - - int typeIdx = fixedArgs.Length; - - for (int index = 0; index < Params.Count; index++) - { - output[typeIdx++] = GetFieldType(Params[index].Type); - } - - return output; - } - public int GetLocalIndex(Register reg) { if (!_locals.TryGetValue(reg, out int index)) diff --git a/Translation/ILOpCodeCall.cs b/Translation/ILOpCodeCall.cs index 8486a79..c046aee 100644 --- a/Translation/ILOpCodeCall.cs +++ b/Translation/ILOpCodeCall.cs @@ -5,16 +5,19 @@ namespace ChocolArm64.Translation { struct ILOpCodeCall : IILEmit { - private MethodInfo _mthdInfo; + public MethodInfo Info { get; private set; } - public ILOpCodeCall(MethodInfo mthdInfo) + public bool IsVirtual { get; private set; } + + public ILOpCodeCall(MethodInfo info, bool isVirtual) { - _mthdInfo = mthdInfo; + Info = info; + IsVirtual = isVirtual; } public void Emit(ILMethodBuilder context) { - context.Generator.Emit(OpCodes.Call, _mthdInfo); + context.Generator.Emit(IsVirtual ? OpCodes.Callvirt : OpCodes.Call, Info); } } } \ No newline at end of file diff --git a/Translation/ILOpCodeLoadField.cs b/Translation/ILOpCodeLoadField.cs new file mode 100644 index 0000000..abcd37c --- /dev/null +++ b/Translation/ILOpCodeLoadField.cs @@ -0,0 +1,20 @@ +using System.Reflection; +using System.Reflection.Emit; + +namespace ChocolArm64.Translation +{ + struct ILOpCodeLoadField : IILEmit + { + public FieldInfo Info { get; private set; } + + public ILOpCodeLoadField(FieldInfo info) + { + Info = info; + } + + public void Emit(ILMethodBuilder context) + { + context.Generator.Emit(OpCodes.Ldfld, Info); + } + } +} \ No newline at end of file diff --git a/Translation/TranslatedSub.cs b/Translation/TranslatedSub.cs new file mode 100644 index 0000000..65d7035 --- /dev/null +++ b/Translation/TranslatedSub.cs @@ -0,0 +1,65 @@ +using ChocolArm64.Memory; +using ChocolArm64.State; +using System; +using System.Reflection; +using System.Reflection.Emit; + +namespace ChocolArm64.Translation +{ + delegate long ArmSubroutine(CpuThreadState state, MemoryManager memory); + + class TranslatedSub + { + public ArmSubroutine Delegate { get; private set; } + + public static int StateArgIdx { get; private set; } + public static int MemoryArgIdx { get; private set; } + + public static Type[] FixedArgTypes { get; private set; } + + public DynamicMethod Method { get; private set; } + + public TranslationTier Tier { get; private set; } + + public TranslatedSub(DynamicMethod method, TranslationTier tier) + { + Method = method ?? throw new ArgumentNullException(nameof(method));; + Tier = tier; + } + + static TranslatedSub() + { + MethodInfo mthdInfo = typeof(ArmSubroutine).GetMethod("Invoke"); + + ParameterInfo[] Params = mthdInfo.GetParameters(); + + FixedArgTypes = new Type[Params.Length]; + + for (int index = 0; index < Params.Length; index++) + { + Type argType = Params[index].ParameterType; + + FixedArgTypes[index] = argType; + + if (argType == typeof(CpuThreadState)) + { + StateArgIdx = index; + } + else if (argType == typeof(MemoryManager)) + { + MemoryArgIdx = index; + } + } + } + + public void PrepareMethod() + { + Delegate = (ArmSubroutine)Method.CreateDelegate(typeof(ArmSubroutine)); + } + + public long Execute(CpuThreadState threadState, MemoryManager memory) + { + return Delegate(threadState, memory); + } + } +} \ No newline at end of file diff --git a/Translation/TranslationTier.cs b/Translation/TranslationTier.cs new file mode 100644 index 0000000..13afd9c --- /dev/null +++ b/Translation/TranslationTier.cs @@ -0,0 +1,11 @@ +namespace ChocolArm64.Translation +{ + enum TranslationTier + { + Tier0, + Tier1, + Tier2, + + Count + } +} \ No newline at end of file diff --git a/Translation/Translator.cs b/Translation/Translator.cs new file mode 100644 index 0000000..7f7df6e --- /dev/null +++ b/Translation/Translator.cs @@ -0,0 +1,188 @@ +using ChocolArm64.Decoders; +using ChocolArm64.Events; +using ChocolArm64.Memory; +using ChocolArm64.State; +using System; +using System.Threading; + +namespace ChocolArm64.Translation +{ + public class Translator + { + private MemoryManager _memory; + + private CpuThreadState _dummyThreadState; + + private TranslatorCache _cache; + private TranslatorQueue _queue; + + private Thread _backgroundTranslator; + + public event EventHandler CpuTrace; + + public bool EnableCpuTrace { get; set; } + + private volatile int _threadCount; + + public Translator(MemoryManager memory) + { + _memory = memory; + + _dummyThreadState = new CpuThreadState(); + + _dummyThreadState.Running = false; + + _cache = new TranslatorCache(); + _queue = new TranslatorQueue(); + } + + internal void ExecuteSubroutine(CpuThread thread, long position) + { + if (Interlocked.Increment(ref _threadCount) == 1) + { + _backgroundTranslator = new Thread(TranslateQueuedSubs); + _backgroundTranslator.Start(); + } + + ExecuteSubroutine(thread.ThreadState, position); + + if (Interlocked.Decrement(ref _threadCount) == 0) + { + _queue.ForceSignal(); + } + } + + private void ExecuteSubroutine(CpuThreadState state, long position) + { + state.CurrentTranslator = this; + + do + { + if (EnableCpuTrace) + { + CpuTrace?.Invoke(this, new CpuTraceEventArgs(position)); + } + + TranslatedSub subroutine = GetOrTranslateSubroutine(state, position); + + position = subroutine.Execute(state, _memory); + } + while (position != 0 && state.Running); + + state.CurrentTranslator = null; + } + + internal void TranslateVirtualSubroutine(CpuThreadState state, long position) + { + if (!_cache.TryGetSubroutine(position, out TranslatedSub sub) || sub.Tier == TranslationTier.Tier0) + { + _queue.Enqueue(new TranslatorQueueItem(position, state.GetExecutionMode(), TranslationTier.Tier1)); + } + } + + internal ArmSubroutine GetOrTranslateVirtualSubroutine(CpuThreadState state, long position) + { + if (!_cache.TryGetSubroutine(position, out TranslatedSub sub)) + { + sub = TranslateLowCq(position, state.GetExecutionMode()); + } + + if (sub.Tier == TranslationTier.Tier0) + { + _queue.Enqueue(new TranslatorQueueItem(position, state.GetExecutionMode(), TranslationTier.Tier1)); + } + + return sub.Delegate; + } + + internal TranslatedSub GetOrTranslateSubroutine(CpuThreadState state, long position) + { + if (!_cache.TryGetSubroutine(position, out TranslatedSub subroutine)) + { + subroutine = TranslateLowCq(position, state.GetExecutionMode()); + } + + return subroutine; + } + + private void TranslateQueuedSubs() + { + while (_threadCount != 0) + { + if (_queue.TryDequeue(out TranslatorQueueItem item)) + { + bool isCached = _cache.TryGetSubroutine(item.Position, out TranslatedSub sub); + + if (isCached && item.Tier <= sub.Tier) + { + continue; + } + + if (item.Tier == TranslationTier.Tier0) + { + TranslateLowCq(item.Position, item.Mode); + } + else + { + TranslateHighCq(item.Position, item.Mode); + } + } + else + { + _queue.WaitForItems(); + } + } + } + + private TranslatedSub TranslateLowCq(long position, ExecutionMode mode) + { + Block block = Decoder.DecodeBasicBlock(_memory, position, mode); + + ILEmitterCtx context = new ILEmitterCtx(_cache, _queue, TranslationTier.Tier0, block); + + string subName = GetSubroutineName(position); + + ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(context.GetILBlocks(), subName); + + TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier0); + + return _cache.GetOrAdd(position, subroutine, block.OpCodes.Count); + } + + private void TranslateHighCq(long position, ExecutionMode mode) + { + Block graph = Decoder.DecodeSubroutine(_memory, position, mode); + + ILEmitterCtx context = new ILEmitterCtx(_cache, _queue, TranslationTier.Tier1, graph); + + ILBlock[] ilBlocks = context.GetILBlocks(); + + string subName = GetSubroutineName(position); + + ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(ilBlocks, subName); + + TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier1); + + int ilOpCount = 0; + + foreach (ILBlock ilBlock in ilBlocks) + { + ilOpCount += ilBlock.Count; + } + + _cache.AddOrUpdate(position, subroutine, ilOpCount); + + ForceAheadOfTimeCompilation(subroutine); + } + + private string GetSubroutineName(long position) + { + return $"Sub{position:x16}"; + } + + private void ForceAheadOfTimeCompilation(TranslatedSub subroutine) + { + subroutine.Execute(_dummyThreadState, null); + } + } +} \ No newline at end of file diff --git a/TranslatorCache.cs b/Translation/TranslatorCache.cs similarity index 87% rename from TranslatorCache.cs rename to Translation/TranslatorCache.cs index 9903cca..d10d675 100644 --- a/TranslatorCache.cs +++ b/Translation/TranslatorCache.cs @@ -4,7 +4,7 @@ using System.Diagnostics; using System.Runtime.CompilerServices; using System.Threading; -namespace ChocolArm64 +namespace ChocolArm64.Translation { class TranslatorCache { @@ -58,6 +58,31 @@ namespace ChocolArm64 _sortedCache = new LinkedList(); } + public TranslatedSub GetOrAdd(long position, TranslatedSub subroutine, int size) + { + ClearCacheIfNeeded(); + + lock (_sortedCache) + { + LinkedListNode node = _sortedCache.AddLast(position); + + CacheBucket bucket = new CacheBucket(subroutine, node, size); + + bucket = _cache.GetOrAdd(position, bucket); + + if (bucket.Node == node) + { + _totalSize += size; + } + else + { + _sortedCache.Remove(node); + } + + return bucket.Subroutine; + } + } + public void AddOrUpdate(long position, TranslatedSub subroutine, int size) { ClearCacheIfNeeded(); diff --git a/Translation/TranslatorQueue.cs b/Translation/TranslatorQueue.cs new file mode 100644 index 0000000..89d665b --- /dev/null +++ b/Translation/TranslatorQueue.cs @@ -0,0 +1,83 @@ +using System.Collections.Concurrent; +using System.Threading; + +namespace ChocolArm64.Translation +{ + class TranslatorQueue + { + //This is the maximum number of functions to be translated that the queue can hold. + //The value may need some tuning to find the sweet spot. + private const int MaxQueueSize = 1024; + + private ConcurrentStack[] _translationQueue; + + private ManualResetEvent _queueDataReceivedEvent; + + private bool _signaled; + + public TranslatorQueue() + { + _translationQueue = new ConcurrentStack[(int)TranslationTier.Count]; + + for (int prio = 0; prio < _translationQueue.Length; prio++) + { + _translationQueue[prio] = new ConcurrentStack(); + } + + _queueDataReceivedEvent = new ManualResetEvent(false); + } + + public void Enqueue(TranslatorQueueItem item) + { + ConcurrentStack queue = _translationQueue[(int)item.Tier]; + + if (queue.Count >= MaxQueueSize) + { + queue.TryPop(out _); + } + + queue.Push(item); + + _queueDataReceivedEvent.Set(); + } + + public bool TryDequeue(out TranslatorQueueItem item) + { + for (int prio = 0; prio < _translationQueue.Length; prio++) + { + if (_translationQueue[prio].TryPop(out item)) + { + return true; + } + } + + item = default(TranslatorQueueItem); + + return false; + } + + public void WaitForItems() + { + _queueDataReceivedEvent.WaitOne(); + + lock (_queueDataReceivedEvent) + { + if (!_signaled) + { + _queueDataReceivedEvent.Reset(); + } + } + } + + public void ForceSignal() + { + lock (_queueDataReceivedEvent) + { + _signaled = true; + + _queueDataReceivedEvent.Set(); + _queueDataReceivedEvent.Close(); + } + } + } +} \ No newline at end of file diff --git a/Translation/TranslatorQueueItem.cs b/Translation/TranslatorQueueItem.cs new file mode 100644 index 0000000..0988414 --- /dev/null +++ b/Translation/TranslatorQueueItem.cs @@ -0,0 +1,20 @@ +using ChocolArm64.State; + +namespace ChocolArm64.Translation +{ + struct TranslatorQueueItem + { + public long Position { get; } + + public ExecutionMode Mode { get; } + + public TranslationTier Tier { get; } + + public TranslatorQueueItem(long position, ExecutionMode mode, TranslationTier tier) + { + Position = position; + Mode = mode; + Tier = tier; + } + } +} \ No newline at end of file diff --git a/Translator.cs b/Translator.cs deleted file mode 100644 index af2586f..0000000 --- a/Translator.cs +++ /dev/null @@ -1,120 +0,0 @@ -using ChocolArm64.Decoders; -using ChocolArm64.Events; -using ChocolArm64.Memory; -using ChocolArm64.State; -using ChocolArm64.Translation; -using System; - -namespace ChocolArm64 -{ - public class Translator - { - private TranslatorCache _cache; - - public event EventHandler CpuTrace; - - public bool EnableCpuTrace { get; set; } - - public Translator() - { - _cache = new TranslatorCache(); - } - - internal void ExecuteSubroutine(CpuThread thread, long position) - { - ExecuteSubroutine(thread.ThreadState, thread.Memory, position); - } - - private void ExecuteSubroutine(CpuThreadState state, MemoryManager memory, long position) - { - do - { - if (EnableCpuTrace) - { - CpuTrace?.Invoke(this, new CpuTraceEventArgs(position)); - } - - if (!_cache.TryGetSubroutine(position, out TranslatedSub sub)) - { - sub = TranslateTier0(memory, position, state.GetExecutionMode()); - } - - if (sub.ShouldReJit()) - { - TranslateTier1(memory, position, state.GetExecutionMode()); - } - - position = sub.Execute(state, memory); - } - while (position != 0 && state.Running); - } - - internal bool HasCachedSub(long position) - { - return _cache.HasSubroutine(position); - } - - private TranslatedSub TranslateTier0(MemoryManager memory, long position, ExecutionMode mode) - { - Block block = Decoder.DecodeBasicBlock(memory, position, mode); - - ILEmitterCtx context = new ILEmitterCtx(_cache, block); - - string subName = GetSubroutineName(position); - - ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(context.GetILBlocks(), subName); - - TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(); - - subroutine.SetType(TranslatedSubType.SubTier0); - - _cache.AddOrUpdate(position, subroutine, block.OpCodes.Count); - - return subroutine; - } - - private void TranslateTier1(MemoryManager memory, long position, ExecutionMode mode) - { - Block graph = Decoder.DecodeSubroutine(_cache, memory, position, mode); - - ILEmitterCtx context = new ILEmitterCtx(_cache, graph); - - ILBlock[] ilBlocks = context.GetILBlocks(); - - string subName = GetSubroutineName(position); - - ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(ilBlocks, subName); - - TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(); - - subroutine.SetType(TranslatedSubType.SubTier1); - - int ilOpCount = 0; - - foreach (ILBlock ilBlock in ilBlocks) - { - ilOpCount += ilBlock.Count; - } - - _cache.AddOrUpdate(position, subroutine, ilOpCount); - - //Mark all methods that calls this method for ReJiting, - //since we can now call it directly which is faster. - if (_cache.TryGetSubroutine(position, out TranslatedSub oldSub)) - { - foreach (long callerPos in oldSub.GetCallerPositions()) - { - if (_cache.TryGetSubroutine(position, out TranslatedSub callerSub)) - { - callerSub.MarkForReJit(); - } - } - } - } - - private string GetSubroutineName(long position) - { - return $"Sub{position:x16}"; - } - } -} \ No newline at end of file