diff --git a/ARMeilleure/CodeGen/Optimizations/Optimizer.cs b/ARMeilleure/CodeGen/Optimizations/Optimizer.cs index d3ffd185e..8b0c75fd6 100644 --- a/ARMeilleure/CodeGen/Optimizations/Optimizer.cs +++ b/ARMeilleure/CodeGen/Optimizations/Optimizer.cs @@ -136,7 +136,9 @@ namespace ARMeilleure.CodeGen.Optimizations private static bool HasSideEffects(Node node) { - return (node is Operation operation) && operation.Instruction == Instruction.Call; + return (node is Operation operation) && (operation.Instruction == Instruction.Call + || operation.Instruction == Instruction.Tailcall + || operation.Instruction == Instruction.CompareAndSwap); } private static bool IsPropagableCopy(Operation operation) diff --git a/ARMeilleure/CodeGen/X86/Assembler.cs b/ARMeilleure/CodeGen/X86/Assembler.cs index 70130d90e..5088e6f0c 100644 --- a/ARMeilleure/CodeGen/X86/Assembler.cs +++ b/ARMeilleure/CodeGen/X86/Assembler.cs @@ -90,6 +90,7 @@ namespace ARMeilleure.CodeGen.X86 Add(X86Instruction.Cmpps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc2, InstructionFlags.Vex)); Add(X86Instruction.Cmpsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc2, InstructionFlags.Vex | InstructionFlags.PrefixF2)); Add(X86Instruction.Cmpss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc2, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Cmpxchg, new InstructionInfo(0x00000fb1, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None)); Add(X86Instruction.Cmpxchg16b, new InstructionInfo(0x01000fc7, BadOp, BadOp, BadOp, BadOp, InstructionFlags.RexW)); Add(X86Instruction.Comisd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2f, InstructionFlags.Vex | InstructionFlags.Prefix66)); Add(X86Instruction.Comiss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2f, InstructionFlags.Vex)); @@ -117,6 +118,7 @@ namespace ARMeilleure.CodeGen.X86 Add(X86Instruction.Imul, new InstructionInfo(BadOp, 0x0000006b, 0x00000069, BadOp, 0x00000faf, InstructionFlags.None)); Add(X86Instruction.Imul128, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x050000f7, InstructionFlags.None)); Add(X86Instruction.Insertps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a21, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Jmp, new InstructionInfo(0x040000ff, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None)); Add(X86Instruction.Lea, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x0000008d, InstructionFlags.None)); Add(X86Instruction.Maxpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstructionFlags.Vex | InstructionFlags.Prefix66)); Add(X86Instruction.Maxps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstructionFlags.Vex)); @@ -328,6 +330,13 @@ namespace ARMeilleure.CodeGen.X86 WriteByte(0x99); } + public void Cmpxchg(MemoryOperand memOp, Operand src) + { + WriteByte(LockPrefix); + + WriteInstruction(memOp, src, src.Type, X86Instruction.Cmpxchg); + } + public void Cmpxchg16b(MemoryOperand memOp) { WriteByte(LockPrefix); @@ -480,6 +489,11 @@ namespace ARMeilleure.CodeGen.X86 } } + public void Jmp(Operand dest) + { + WriteInstruction(dest, null, OperandType.None, X86Instruction.Jmp); + } + public void Lea(Operand dest, Operand source, OperandType type) { WriteInstruction(dest, source, type, X86Instruction.Lea); diff --git a/ARMeilleure/CodeGen/X86/CodeGenerator.cs b/ARMeilleure/CodeGen/X86/CodeGenerator.cs index 32ca6a781..1d0a4c12f 100644 --- a/ARMeilleure/CodeGen/X86/CodeGenerator.cs +++ b/ARMeilleure/CodeGen/X86/CodeGenerator.cs @@ -34,7 +34,7 @@ namespace ARMeilleure.CodeGen.X86 Add(Instruction.ByteSwap, GenerateByteSwap); Add(Instruction.Call, GenerateCall); Add(Instruction.Clobber, GenerateClobber); - Add(Instruction.CompareAndSwap128, GenerateCompareAndSwap128); + Add(Instruction.CompareAndSwap, GenerateCompareAndSwap); Add(Instruction.CompareEqual, GenerateCompareEqual); Add(Instruction.CompareGreater, GenerateCompareGreater); Add(Instruction.CompareGreaterOrEqual, GenerateCompareGreaterOrEqual); @@ -76,6 +76,7 @@ namespace ARMeilleure.CodeGen.X86 Add(Instruction.Store16, GenerateStore16); Add(Instruction.Store8, GenerateStore8); Add(Instruction.Subtract, GenerateSubtract); + Add(Instruction.Tailcall, GenerateTailcall); Add(Instruction.VectorCreateScalar, GenerateVectorCreateScalar); Add(Instruction.VectorExtract, GenerateVectorExtract); Add(Instruction.VectorExtract16, GenerateVectorExtract16); @@ -543,13 +544,27 @@ namespace ARMeilleure.CodeGen.X86 // register allocator, we don't need to produce any code. } - private static void GenerateCompareAndSwap128(CodeGenContext context, Operation operation) + private static void GenerateCompareAndSwap(CodeGenContext context, Operation operation) { - Operand source = operation.GetSource(0); + Operand src1 = operation.GetSource(0); - MemoryOperand memOp = new MemoryOperand(OperandType.I64, source); + if (operation.SourcesCount == 5) // CompareAndSwap128 has 5 sources, compared to CompareAndSwap64/32's 3. + { + MemoryOperand memOp = new MemoryOperand(OperandType.I64, src1); - context.Assembler.Cmpxchg16b(memOp); + context.Assembler.Cmpxchg16b(memOp); + } + else + { + Operand src2 = operation.GetSource(1); + Operand src3 = operation.GetSource(2); + + EnsureSameType(src2, src3); + + MemoryOperand memOp = new MemoryOperand(src3.Type, src1); + + context.Assembler.Cmpxchg(memOp, src3); + } } private static void GenerateCompareEqual(CodeGenContext context, Operation operation) @@ -1083,6 +1098,13 @@ namespace ARMeilleure.CodeGen.X86 } } + private static void GenerateTailcall(CodeGenContext context, Operation operation) + { + WriteEpilogue(context); + + context.Assembler.Jmp(operation.GetSource(0)); + } + private static void GenerateVectorCreateScalar(CodeGenContext context, Operation operation) { Operand dest = operation.Destination; diff --git a/ARMeilleure/CodeGen/X86/PreAllocator.cs b/ARMeilleure/CodeGen/X86/PreAllocator.cs index 75844b099..e20fca9d6 100644 --- a/ARMeilleure/CodeGen/X86/PreAllocator.cs +++ b/ARMeilleure/CodeGen/X86/PreAllocator.cs @@ -1,6 +1,7 @@ using ARMeilleure.CodeGen.RegisterAllocators; using ARMeilleure.IntermediateRepresentation; using ARMeilleure.Translation; +using System; using System.Collections.Generic; using System.Diagnostics; @@ -101,6 +102,17 @@ namespace ARMeilleure.CodeGen.X86 } break; + case Instruction.Tailcall: + if (callConv == CallConvName.Windows) + { + HandleTailcallWindowsAbi(block.Operations, stackAlloc, node, operation); + } + else + { + HandleTailcallSystemVAbi(block.Operations, stackAlloc, node, operation); + } + break; + case Instruction.VectorInsert8: if (!HardwareCapabilities.SupportsSse41) { @@ -199,32 +211,55 @@ namespace ARMeilleure.CodeGen.X86 switch (operation.Instruction) { - case Instruction.CompareAndSwap128: + case Instruction.CompareAndSwap: { - // Handle the many restrictions of the compare and exchange (16 bytes) instruction: - // - The expected value should be in RDX:RAX. - // - The new value to be written should be in RCX:RBX. - // - The value at the memory location is loaded to RDX:RAX. - void SplitOperand(Operand source, Operand lr, Operand hr) + OperandType type = operation.GetSource(1).Type; + + if (type == OperandType.V128) { - nodes.AddBefore(node, new Operation(Instruction.VectorExtract, lr, source, Const(0))); - nodes.AddBefore(node, new Operation(Instruction.VectorExtract, hr, source, Const(1))); + // Handle the many restrictions of the compare and exchange (16 bytes) instruction: + // - The expected value should be in RDX:RAX. + // - The new value to be written should be in RCX:RBX. + // - The value at the memory location is loaded to RDX:RAX. + void SplitOperand(Operand source, Operand lr, Operand hr) + { + nodes.AddBefore(node, new Operation(Instruction.VectorExtract, lr, source, Const(0))); + nodes.AddBefore(node, new Operation(Instruction.VectorExtract, hr, source, Const(1))); + } + + Operand rax = Gpr(X86Register.Rax, OperandType.I64); + Operand rbx = Gpr(X86Register.Rbx, OperandType.I64); + Operand rcx = Gpr(X86Register.Rcx, OperandType.I64); + Operand rdx = Gpr(X86Register.Rdx, OperandType.I64); + + SplitOperand(operation.GetSource(1), rax, rdx); + SplitOperand(operation.GetSource(2), rbx, rcx); + + node = nodes.AddAfter(node, new Operation(Instruction.VectorCreateScalar, dest, rax)); + node = nodes.AddAfter(node, new Operation(Instruction.VectorInsert, dest, dest, rdx, Const(1))); + + operation.SetDestinations(new Operand[] { rdx, rax }); + + operation.SetSources(new Operand[] { operation.GetSource(0), rdx, rax, rcx, rbx }); } + else + { + // Handle the many restrictions of the compare and exchange (32/64) instruction: + // - The expected value should be in (E/R)AX. + // - The value at the memory location is loaded to (E/R)AX. - Operand rax = Gpr(X86Register.Rax, OperandType.I64); - Operand rbx = Gpr(X86Register.Rbx, OperandType.I64); - Operand rcx = Gpr(X86Register.Rcx, OperandType.I64); - Operand rdx = Gpr(X86Register.Rdx, OperandType.I64); + Operand expected = operation.GetSource(1); - SplitOperand(operation.GetSource(1), rax, rdx); - SplitOperand(operation.GetSource(2), rbx, rcx); + Operand rax = Gpr(X86Register.Rax, expected.Type); - node = nodes.AddAfter(node, new Operation(Instruction.VectorCreateScalar, dest, rax)); - node = nodes.AddAfter(node, new Operation(Instruction.VectorInsert, dest, dest, rdx, Const(1))); + nodes.AddBefore(node, new Operation(Instruction.Copy, rax, expected)); - operation.SetDestinations(new Operand[] { rdx, rax }); + operation.SetSources(new Operand[] { operation.GetSource(0), rax, operation.GetSource(2) }); - operation.SetSources(new Operand[] { operation.GetSource(0), rdx, rax, rcx, rbx }); + node = nodes.AddAfter(node, new Operation(Instruction.Copy, dest, rax)); + + operation.Destination = rax; + } break; } @@ -829,6 +864,123 @@ namespace ARMeilleure.CodeGen.X86 return node; } + private static void HandleTailcallSystemVAbi(IntrusiveList nodes, StackAllocator stackAlloc, Node node, Operation operation) + { + List sources = new List(); + + sources.Add(operation.GetSource(0)); + + int argsCount = operation.SourcesCount - 1; + + int intMax = CallingConvention.GetIntArgumentsOnRegsCount(); + int vecMax = CallingConvention.GetVecArgumentsOnRegsCount(); + + int intCount = 0; + int vecCount = 0; + + // Handle arguments passed on registers. + for (int index = 0; index < argsCount; index++) + { + Operand source = operation.GetSource(1 + index); + + bool passOnReg; + + if (source.Type.IsInteger()) + { + passOnReg = intCount + 1 < intMax; + } + else + { + passOnReg = vecCount < vecMax; + } + + if (source.Type == OperandType.V128 && passOnReg) + { + // V128 is a struct, we pass each half on a GPR if possible. + Operand argReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64); + Operand argReg2 = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64); + + nodes.AddBefore(node, new Operation(Instruction.VectorExtract, argReg, source, Const(0))); + nodes.AddBefore(node, new Operation(Instruction.VectorExtract, argReg2, source, Const(1))); + + continue; + } + + if (passOnReg) + { + Operand argReg = source.Type.IsInteger() + ? Gpr(CallingConvention.GetIntArgumentRegister(intCount++), source.Type) + : Xmm(CallingConvention.GetVecArgumentRegister(vecCount++), source.Type); + + Operation copyOp = new Operation(Instruction.Copy, argReg, source); + + HandleConstantCopy(nodes, nodes.AddBefore(node, copyOp), copyOp); + + sources.Add(argReg); + } + else + { + throw new NotImplementedException("Spilling is not currently supported for tail calls. (too many arguments)"); + } + } + + // The target address must be on the return registers, since we + // don't return anything and it is guaranteed to not be a + // callee saved register (which would be trashed on the epilogue). + Operand retReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64); + + Operation addrCopyOp = new Operation(Instruction.Copy, retReg, operation.GetSource(0)); + + nodes.AddBefore(node, addrCopyOp); + + sources[0] = retReg; + + operation.SetSources(sources.ToArray()); + } + + private static void HandleTailcallWindowsAbi(IntrusiveList nodes, StackAllocator stackAlloc, Node node, Operation operation) + { + int argsCount = operation.SourcesCount - 1; + + int maxArgs = CallingConvention.GetArgumentsOnRegsCount(); + + if (argsCount > maxArgs) + { + throw new NotImplementedException("Spilling is not currently supported for tail calls. (too many arguments)"); + } + + Operand[] sources = new Operand[1 + argsCount]; + + // Handle arguments passed on registers. + for (int index = 0; index < argsCount; index++) + { + Operand source = operation.GetSource(1 + index); + + Operand argReg = source.Type.IsInteger() + ? Gpr(CallingConvention.GetIntArgumentRegister(index), source.Type) + : Xmm(CallingConvention.GetVecArgumentRegister(index), source.Type); + + Operation copyOp = new Operation(Instruction.Copy, argReg, source); + + HandleConstantCopy(nodes, nodes.AddBefore(node, copyOp), copyOp); + + sources[1 + index] = argReg; + } + + // The target address must be on the return registers, since we + // don't return anything and it is guaranteed to not be a + // callee saved register (which would be trashed on the epilogue). + Operand retReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64); + + Operation addrCopyOp = new Operation(Instruction.Copy, retReg, operation.GetSource(0)); + + nodes.AddBefore(node, addrCopyOp); + + sources[0] = retReg; + + operation.SetSources(sources); + } + private static void HandleLoadArgumentWindowsAbi( CompilerContext cctx, IntrusiveList nodes, diff --git a/ARMeilleure/CodeGen/X86/X86Instruction.cs b/ARMeilleure/CodeGen/X86/X86Instruction.cs index 813730f2a..a6dbf1a5b 100644 --- a/ARMeilleure/CodeGen/X86/X86Instruction.cs +++ b/ARMeilleure/CodeGen/X86/X86Instruction.cs @@ -23,6 +23,7 @@ namespace ARMeilleure.CodeGen.X86 Cmpps, Cmpsd, Cmpss, + Cmpxchg, Cmpxchg16b, Comisd, Comiss, @@ -50,6 +51,7 @@ namespace ARMeilleure.CodeGen.X86 Imul, Imul128, Insertps, + Jmp, Lea, Maxpd, Maxps, diff --git a/ARMeilleure/Decoders/Block.cs b/ARMeilleure/Decoders/Block.cs index 3d13c2d5e..d38b5a8ec 100644 --- a/ARMeilleure/Decoders/Block.cs +++ b/ARMeilleure/Decoders/Block.cs @@ -11,6 +11,8 @@ namespace ARMeilleure.Decoders public Block Next { get; set; } public Block Branch { get; set; } + public bool TailCall { get; set; } + public List OpCodes { get; private set; } public Block() diff --git a/ARMeilleure/Decoders/Decoder.cs b/ARMeilleure/Decoders/Decoder.cs index 7cbb62e6c..9675dc8db 100644 --- a/ARMeilleure/Decoders/Decoder.cs +++ b/ARMeilleure/Decoders/Decoder.cs @@ -1,3 +1,4 @@ +using ARMeilleure.Decoders.Optimizations; using ARMeilleure.Instructions; using ARMeilleure.Memory; using ARMeilleure.State; @@ -15,6 +16,9 @@ namespace ARMeilleure.Decoders // take too long to compile and use too much memory. private const int MaxInstsPerFunction = 5000; + // For lower code quality translation, we set a lower limit since we're blocking execution. + private const int MaxInstsPerFunctionLowCq = 500; + private delegate object MakeOp(InstDescriptor inst, ulong address, int opCode); private static ConcurrentDictionary _opActivators; @@ -33,7 +37,7 @@ namespace ARMeilleure.Decoders return new Block[] { block }; } - public static Block[] DecodeFunction(MemoryManager memory, ulong address, ExecutionMode mode) + public static Block[] DecodeFunction(MemoryManager memory, ulong address, ExecutionMode mode, bool highCq) { List blocks = new List(); @@ -43,11 +47,13 @@ namespace ARMeilleure.Decoders int opsCount = 0; + int instructionLimit = highCq ? MaxInstsPerFunction : MaxInstsPerFunctionLowCq; + Block GetBlock(ulong blkAddress) { if (!visited.TryGetValue(blkAddress, out Block block)) { - if (opsCount > MaxInstsPerFunction || !memory.IsMapped((long)blkAddress)) + if (opsCount > instructionLimit || !memory.IsMapped((long)blkAddress)) { return null; } @@ -121,7 +127,7 @@ namespace ARMeilleure.Decoders currBlock.Branch = GetBlock((ulong)op.Immediate); } - if (!IsUnconditionalBranch(lastOp) /*|| isCall*/) + if (!IsUnconditionalBranch(lastOp) || isCall) { currBlock.Next = GetBlock(currBlock.EndAddress); } @@ -140,10 +146,12 @@ namespace ARMeilleure.Decoders } } + TailCallRemover.RunPass(address, blocks); + return blocks.ToArray(); } - private static bool BinarySearch(List blocks, ulong address, out int index) + public static bool BinarySearch(List blocks, ulong address, out int index) { index = 0; diff --git a/ARMeilleure/Decoders/Optimizations/TailCallRemover.cs b/ARMeilleure/Decoders/Optimizations/TailCallRemover.cs new file mode 100644 index 000000000..2d6439bac --- /dev/null +++ b/ARMeilleure/Decoders/Optimizations/TailCallRemover.cs @@ -0,0 +1,75 @@ +using ARMeilleure.Decoders; +using System; +using System.Collections.Generic; + +namespace ARMeilleure.Decoders.Optimizations +{ + static class TailCallRemover + { + public static void RunPass(ulong entryAddress, List blocks) + { + // Detect tail calls: + // - Assume this function spans the space covered by contiguous code blocks surrounding the entry address. + // - Unconditional jump to an area outside this contiguous region will be treated as a tail call. + // - Include a small allowance for jumps outside the contiguous range. + + if (!Decoder.BinarySearch(blocks, entryAddress, out int entryBlockId)) + { + throw new InvalidOperationException("Function entry point is not contained in a block."); + } + + const ulong allowance = 4; + Block entryBlock = blocks[entryBlockId]; + int startBlockIndex = entryBlockId; + Block startBlock = entryBlock; + int endBlockIndex = entryBlockId; + Block endBlock = entryBlock; + + for (int i = entryBlockId + 1; i < blocks.Count; i++) // Search forwards. + { + Block block = blocks[i]; + if (endBlock.EndAddress < block.Address - allowance) + { + break; // End of contiguous function. + } + + endBlock = block; + endBlockIndex = i; + } + + for (int i = entryBlockId - 1; i >= 0; i--) // Search backwards. + { + Block block = blocks[i]; + if (startBlock.Address > block.EndAddress + allowance) + { + break; // End of contiguous function. + } + + startBlock = block; + startBlockIndex = i; + } + + if (startBlockIndex == 0 && endBlockIndex == blocks.Count - 1) + { + return; // Nothing to do here. + } + + // Replace all branches to blocks outside the range with null, and force a tail call. + + for (int i = startBlockIndex; i <= endBlockIndex; i++) + { + Block block = blocks[i]; + if (block.Branch != null && (block.Branch.Address > endBlock.EndAddress || block.Branch.EndAddress < startBlock.Address)) + { + block.Branch = null; + block.TailCall = true; + } + } + + // Finally, delete all blocks outside the contiguous range. + + blocks.RemoveRange(endBlockIndex + 1, (blocks.Count - endBlockIndex) - 1); + blocks.RemoveRange(0, startBlockIndex); + } + } +} diff --git a/ARMeilleure/Instructions/DelegateTypes.cs b/ARMeilleure/Instructions/DelegateTypes.cs index b65149cb8..41614f88e 100644 --- a/ARMeilleure/Instructions/DelegateTypes.cs +++ b/ARMeilleure/Instructions/DelegateTypes.cs @@ -3,6 +3,8 @@ using System; namespace ARMeilleure.Instructions { + delegate bool _Bool(); + delegate double _F64_F64(double a1); delegate double _F64_F64_Bool(double a1, bool a2); delegate double _F64_F64_F64(double a1, double a2); diff --git a/ARMeilleure/Instructions/InstEmitAluHelper.cs b/ARMeilleure/Instructions/InstEmitAluHelper.cs index 916a1da5a..12fa1bf1b 100644 --- a/ARMeilleure/Instructions/InstEmitAluHelper.cs +++ b/ARMeilleure/Instructions/InstEmitAluHelper.cs @@ -116,12 +116,14 @@ namespace ARMeilleure.Instructions { Debug.Assert(value.Type == OperandType.I32); - context.StoreToContext(); - if (IsThumb(context.CurrOp)) { - // Make this count as a call, the translator will ignore the low bit for the address. - context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseOr(value, Const(1)))); + context.StoreToContext(); + bool isReturn = IsA32Return(context); + + Operand addr = context.BitwiseOr(value, Const(1)); + + InstEmitFlowHelper.EmitVirtualJump(context, addr, isReturn); } else { @@ -138,18 +140,8 @@ namespace ARMeilleure.Instructions if (setFlags) { // TODO: Load SPSR etc. - Operand isThumb = GetFlag(PState.TFlag); - Operand lblThumb = Label(); - - context.BranchIfTrue(lblThumb, isThumb); - - // Make this count as a call, the translator will ignore the low bit for the address. - context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseOr(context.BitwiseAnd(value, Const(~3)), Const(1)))); - - context.MarkLabel(lblThumb); - - context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseOr(value, Const(1)))); + EmitBxWritePc(context, value); } else { diff --git a/ARMeilleure/Instructions/InstEmitException.cs b/ARMeilleure/Instructions/InstEmitException.cs index 6f7b6fd51..f0bde242a 100644 --- a/ARMeilleure/Instructions/InstEmitException.cs +++ b/ARMeilleure/Instructions/InstEmitException.cs @@ -2,6 +2,7 @@ using ARMeilleure.Decoders; using ARMeilleure.Translation; using System; +using static ARMeilleure.Instructions.InstEmitFlowHelper; using static ARMeilleure.IntermediateRepresentation.OperandHelper; namespace ARMeilleure.Instructions @@ -30,7 +31,7 @@ namespace ARMeilleure.Instructions if (context.CurrBlock.Next == null) { - context.Return(Const(op.Address + 4)); + EmitTailContinue(context, Const(op.Address + 4)); } } @@ -48,7 +49,7 @@ namespace ARMeilleure.Instructions if (context.CurrBlock.Next == null) { - context.Return(Const(op.Address + 4)); + EmitTailContinue(context, Const(op.Address + 4)); } } } diff --git a/ARMeilleure/Instructions/InstEmitException32.cs b/ARMeilleure/Instructions/InstEmitException32.cs index a73f0dec7..8ffad1d1f 100644 --- a/ARMeilleure/Instructions/InstEmitException32.cs +++ b/ARMeilleure/Instructions/InstEmitException32.cs @@ -1,6 +1,7 @@ using ARMeilleure.Decoders; using ARMeilleure.Translation; +using static ARMeilleure.Instructions.InstEmitFlowHelper; using static ARMeilleure.IntermediateRepresentation.OperandHelper; namespace ARMeilleure.Instructions @@ -29,7 +30,7 @@ namespace ARMeilleure.Instructions if (context.CurrBlock.Next == null) { - context.Return(Const(op.Address + 4)); + EmitTailContinue(context, Const(op.Address + 4)); } } } diff --git a/ARMeilleure/Instructions/InstEmitFlow.cs b/ARMeilleure/Instructions/InstEmitFlow.cs index 93d36e1b9..bac9ec588 100644 --- a/ARMeilleure/Instructions/InstEmitFlow.cs +++ b/ARMeilleure/Instructions/InstEmitFlow.cs @@ -21,7 +21,7 @@ namespace ARMeilleure.Instructions } else { - context.Return(Const(op.Immediate)); + EmitTailContinue(context, Const(op.Immediate), context.CurrBlock.TailCall); } } @@ -56,7 +56,7 @@ namespace ARMeilleure.Instructions { OpCodeBReg op = (OpCodeBReg)context.CurrOp; - EmitVirtualJump(context, GetIntOrZR(context, op.Rn)); + EmitVirtualJump(context, GetIntOrZR(context, op.Rn), op.Rn == RegisterAlias.Lr); } public static void Cbnz(ArmEmitterContext context) => EmitCb(context, onNotZero: true); @@ -71,7 +71,7 @@ namespace ARMeilleure.Instructions public static void Ret(ArmEmitterContext context) { - context.Return(context.BitwiseOr(GetIntOrZR(context, RegisterAlias.Lr), Const(CallFlag))); + context.Return(GetIntOrZR(context, RegisterAlias.Lr)); } public static void Tbnz(ArmEmitterContext context) => EmitTb(context, onNotZero: true); @@ -96,7 +96,7 @@ namespace ARMeilleure.Instructions if (context.CurrBlock.Next == null) { - context.Return(Const(op.Address + 4)); + EmitTailContinue(context, Const(op.Address + 4)); } } else @@ -105,11 +105,11 @@ namespace ARMeilleure.Instructions EmitCondBranch(context, lblTaken, cond); - context.Return(Const(op.Address + 4)); + EmitTailContinue(context, Const(op.Address + 4)); context.MarkLabel(lblTaken); - context.Return(Const(op.Immediate)); + EmitTailContinue(context, Const(op.Immediate)); } } @@ -132,7 +132,7 @@ namespace ARMeilleure.Instructions if (context.CurrBlock.Next == null) { - context.Return(Const(op.Address + 4)); + EmitTailContinue(context, Const(op.Address + 4)); } } else @@ -148,11 +148,11 @@ namespace ARMeilleure.Instructions context.BranchIfFalse(lblTaken, value); } - context.Return(Const(op.Address + 4)); + EmitTailContinue(context, Const(op.Address + 4)); context.MarkLabel(lblTaken); - context.Return(Const(op.Immediate)); + EmitTailContinue(context, Const(op.Immediate)); } } } diff --git a/ARMeilleure/Instructions/InstEmitFlow32.cs b/ARMeilleure/Instructions/InstEmitFlow32.cs index cbb9ad5b2..47233eb99 100644 --- a/ARMeilleure/Instructions/InstEmitFlow32.cs +++ b/ARMeilleure/Instructions/InstEmitFlow32.cs @@ -21,8 +21,7 @@ namespace ARMeilleure.Instructions } else { - context.StoreToContext(); - context.Return(Const(op.Immediate)); + EmitTailContinue(context, Const(op.Immediate)); } } @@ -57,7 +56,7 @@ namespace ARMeilleure.Instructions SetFlag(context, PState.TFlag, Const(isThumb ? 0 : 1)); } - InstEmitFlowHelper.EmitCall(context, (ulong)op.Immediate); + EmitCall(context, (ulong)op.Immediate); } public static void Blxr(ArmEmitterContext context) @@ -66,9 +65,8 @@ namespace ARMeilleure.Instructions uint pc = op.GetPc(); - Operand addr = GetIntA32(context, op.Rm); + Operand addr = context.Copy(GetIntA32(context, op.Rm)); Operand bitOne = context.BitwiseAnd(addr, Const(1)); - addr = context.BitwiseOr(addr, Const((int)CallFlag)); // Set call flag. bool isThumb = IsThumb(context.CurrOp); @@ -80,16 +78,14 @@ namespace ARMeilleure.Instructions SetFlag(context, PState.TFlag, bitOne); - context.Return(addr); // Call. + EmitVirtualCall(context, addr); } public static void Bx(ArmEmitterContext context) { IOpCode32BReg op = (IOpCode32BReg)context.CurrOp; - context.StoreToContext(); - - EmitBxWritePc(context, GetIntA32(context, op.Rm)); + EmitBxWritePc(context, GetIntA32(context, op.Rm), op.Rm); } } } \ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitFlowHelper.cs b/ARMeilleure/Instructions/InstEmitFlowHelper.cs index a8eb21d33..f0a81e855 100644 --- a/ARMeilleure/Instructions/InstEmitFlowHelper.cs +++ b/ARMeilleure/Instructions/InstEmitFlowHelper.cs @@ -2,6 +2,7 @@ using ARMeilleure.Decoders; using ARMeilleure.IntermediateRepresentation; using ARMeilleure.State; using ARMeilleure.Translation; +using System; using static ARMeilleure.Instructions.InstEmitHelper; using static ARMeilleure.IntermediateRepresentation.OperandHelper; @@ -142,7 +143,29 @@ namespace ARMeilleure.Instructions public static void EmitCall(ArmEmitterContext context, ulong immediate) { - context.Return(Const(immediate | CallFlag)); + EmitJumpTableBranch(context, Const(immediate)); + } + + private static void EmitNativeCall(ArmEmitterContext context, Operand nativeContextPtr, Operand funcAddr, bool isJump = false) + { + context.StoreToContext(); + Operand returnAddress; + if (isJump) + { + context.Tailcall(funcAddr, nativeContextPtr); + } + else + { + returnAddress = context.Call(funcAddr, OperandType.I64, nativeContextPtr); + context.LoadFromContext(); + + EmitContinueOrReturnCheck(context, returnAddress); + } + } + + private static void EmitNativeCall(ArmEmitterContext context, Operand funcAddr, bool isJump = false) + { + EmitNativeCall(context, context.LoadArgument(OperandType.I64, 0), funcAddr, isJump); } public static void EmitVirtualCall(ArmEmitterContext context, Operand target) @@ -150,37 +173,45 @@ namespace ARMeilleure.Instructions EmitVirtualCallOrJump(context, target, isJump: false); } - public static void EmitVirtualJump(ArmEmitterContext context, Operand target) + public static void EmitVirtualJump(ArmEmitterContext context, Operand target, bool isReturn) { - EmitVirtualCallOrJump(context, target, isJump: true); + EmitVirtualCallOrJump(context, target, isJump: true, isReturn: isReturn); } - private static void EmitVirtualCallOrJump(ArmEmitterContext context, Operand target, bool isJump) + private static void EmitVirtualCallOrJump(ArmEmitterContext context, Operand target, bool isJump, bool isReturn = false) { - context.Return(context.BitwiseOr(target, Const(target.Type, (long)CallFlag))); - } - - private static void EmitContinueOrReturnCheck(ArmEmitterContext context, Operand retVal) - { - // Note: The return value of the called method will be placed - // at the Stack, the return value is always a Int64 with the - // return address of the function. We check if the address is - // correct, if it isn't we keep returning until we reach the dispatcher. - ulong nextAddr = GetNextOpAddress(context.CurrOp); - - if (context.CurrBlock.Next != null) + if (isReturn) { - Operand lblContinue = Label(); - - context.BranchIfTrue(lblContinue, context.ICompareEqual(retVal, Const(nextAddr))); - - context.Return(Const(nextAddr)); - - context.MarkLabel(lblContinue); + context.Return(target); } else { - context.Return(Const(nextAddr)); + EmitJumpTableBranch(context, target, isJump); + } + } + + private static void EmitContinueOrReturnCheck(ArmEmitterContext context, Operand returnAddress) + { + // Note: The return value of a translated function is always an Int64 with the + // address execution has returned to. We expect this address to be immediately after the + // current instruction, if it isn't we keep returning until we reach the dispatcher. + Operand nextAddr = Const(GetNextOpAddress(context.CurrOp)); + + // Try to continue within this block. + // If the return address isn't to our next instruction, we need to return so the JIT can figure out what to do. + Operand lblContinue = Label(); + + // We need to clear out the call flag for the return address before comparing it. + context.BranchIfTrue(lblContinue, context.ICompareEqual(context.BitwiseAnd(returnAddress, Const(~CallFlag)), nextAddr)); + + context.Return(returnAddress); + + context.MarkLabel(lblContinue); + + if (context.CurrBlock.Next == null) + { + // No code following this instruction, try and find the next block and jump to it. + EmitTailContinue(context, nextAddr); } } @@ -188,5 +219,134 @@ namespace ARMeilleure.Instructions { return op.Address + (ulong)op.OpCodeSizeInBytes; } + + public static void EmitTailContinue(ArmEmitterContext context, Operand address, bool allowRejit = false) + { + bool useTailContinue = true; // Left option here as it may be useful if we need to return to managed rather than tail call in future. (eg. for debug) + if (useTailContinue) + { + if (allowRejit) + { + address = context.BitwiseOr(address, Const(1L)); + } + + Operand fallbackAddr = context.Call(new _U64_U64(NativeInterface.GetFunctionAddress), address); + + EmitNativeCall(context, fallbackAddr, true); + } + else + { + context.Return(address); + } + } + + private static void EmitNativeCallWithGuestAddress(ArmEmitterContext context, Operand funcAddr, Operand guestAddress, bool isJump) + { + Operand nativeContextPtr = context.LoadArgument(OperandType.I64, 0); + context.Store(context.Add(nativeContextPtr, Const(NativeContext.GetCallAddressOffset())), guestAddress); + + EmitNativeCall(context, nativeContextPtr, funcAddr, isJump); + } + + private static void EmitBranchFallback(ArmEmitterContext context, Operand address, bool isJump) + { + address = context.BitwiseOr(address, Const(address.Type, (long)CallFlag)); // Set call flag. + Operand fallbackAddr = context.Call(new _U64_U64(NativeInterface.GetFunctionAddress), address); + EmitNativeCall(context, fallbackAddr, isJump); + } + + public static void EmitDynamicTableCall(ArmEmitterContext context, Operand tableAddress, Operand address, bool isJump) + { + // Loop over elements of the dynamic table. Unrolled loop. + + Operand endLabel = Label(); + Operand fallbackLabel = Label(); + + Action emitTableEntry = (Operand entrySkipLabel) => + { + // Try to take this entry in the table if its guest address equals 0. + Operand gotResult = context.CompareAndSwap(tableAddress, Const(0L), address); + + // Is the address ours? (either taken via CompareAndSwap (0), or what was already here) + context.BranchIfFalse(entrySkipLabel, context.BitwiseOr(context.ICompareEqual(gotResult, address), context.ICompareEqual(gotResult, Const(0L)))); + + // It's ours, so what function is it pointing to? + Operand targetFunctionPtr = context.Add(tableAddress, Const(8L)); + Operand targetFunction = context.Load(OperandType.I64, targetFunctionPtr); + + // Call the function. + // We pass in the entry address as the guest address, as the entry may need to be updated by the indirect call stub. + EmitNativeCallWithGuestAddress(context, targetFunction, tableAddress, isJump); + context.Branch(endLabel); + }; + + // Currently this uses a size of 1, as higher values inflate code size for no real benefit. + for (int i = 0; i < JumpTable.DynamicTableElems; i++) + { + if (i == JumpTable.DynamicTableElems - 1) + { + emitTableEntry(fallbackLabel); // If this is the last entry, avoid emitting the additional label and add. + } + else + { + Operand nextLabel = Label(); + + emitTableEntry(nextLabel); + + context.MarkLabel(nextLabel); + tableAddress = context.Add(tableAddress, Const((long)JumpTable.JumpTableStride)); // Move to the next table entry. + } + } + + context.MarkLabel(fallbackLabel); + + EmitBranchFallback(context, address, isJump); + + context.MarkLabel(endLabel); + } + + public static void EmitJumpTableBranch(ArmEmitterContext context, Operand address, bool isJump = false) + { + if (address.Type == OperandType.I32) + { + address = context.ZeroExtend32(OperandType.I64, address); + } + + // TODO: Constant folding. Indirect calls are slower in the best case and emit more code so we want to avoid them when possible. + bool isConst = address.Kind == OperandKind.Constant; + long constAddr = (long)address.Value; + + if (!context.HighCq) + { + // Don't emit indirect calls or jumps if we're compiling in lowCq mode. + // This avoids wasting space on the jump and indirect tables. + // Just ask the translator for the function address. + + EmitBranchFallback(context, address, isJump); + } + else if (!isConst) + { + // Virtual branch/call - store first used addresses on a small table for fast lookup. + int entry = context.JumpTable.ReserveDynamicEntry(isJump); + + int jumpOffset = entry * JumpTable.JumpTableStride * JumpTable.DynamicTableElems; + Operand dynTablePtr = Const(context.JumpTable.DynamicPointer.ToInt64() + jumpOffset); + + EmitDynamicTableCall(context, dynTablePtr, address, isJump); + } + else + { + int entry = context.JumpTable.ReserveTableEntry(context.BaseAddress & (~3L), constAddr, isJump); + + int jumpOffset = entry * JumpTable.JumpTableStride + 8; // Offset directly to the host address. + + // TODO: Relocatable jump table ptr for AOT. Would prefer a solution to patch this constant into functions as they are loaded rather than calculate at runtime. + Operand tableEntryPtr = Const(context.JumpTable.JumpPointer.ToInt64() + jumpOffset); + + Operand funcAddr = context.Load(OperandType.I64, tableEntryPtr); + + EmitNativeCallWithGuestAddress(context, funcAddr, address, isJump); // Call the function directly. If it's not present yet, this will call the direct call stub. + } + } } } diff --git a/ARMeilleure/Instructions/InstEmitHelper.cs b/ARMeilleure/Instructions/InstEmitHelper.cs index f5495c660..a4227543f 100644 --- a/ARMeilleure/Instructions/InstEmitHelper.cs +++ b/ARMeilleure/Instructions/InstEmitHelper.cs @@ -144,22 +144,34 @@ namespace ARMeilleure.Instructions } } - public static void EmitBxWritePc(ArmEmitterContext context, Operand pc) + public static bool IsA32Return(ArmEmitterContext context) { + switch (context.CurrOp) + { + case IOpCode32MemMult op: + return true; // Setting PC using LDM is nearly always a return. + case OpCode32AluRsImm op: + return op.Rm == RegisterAlias.Aarch32Lr; + case OpCode32AluRsReg op: + return op.Rm == RegisterAlias.Aarch32Lr; + case OpCode32AluReg op: + return op.Rm == RegisterAlias.Aarch32Lr; + case OpCode32Mem op: + return op.Rn == RegisterAlias.Aarch32Sp && op.WBack && !op.Index; // Setting PC to an address stored on the stack is nearly always a return. + } + return false; + } + + public static void EmitBxWritePc(ArmEmitterContext context, Operand pc, int sourceRegister = 0) + { + bool isReturn = sourceRegister == RegisterAlias.Aarch32Lr || IsA32Return(context); Operand mode = context.BitwiseAnd(pc, Const(1)); SetFlag(context, PState.TFlag, mode); - Operand lblArmMode = Label(); + Operand addr = context.ConditionalSelect(mode, context.BitwiseOr(pc, Const((int)InstEmitFlowHelper.CallFlag)), context.BitwiseAnd(pc, Const(~3))); - context.BranchIfTrue(lblArmMode, mode); - - // Make this count as a call, the translator will ignore the low bit for the address. - context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseOr(pc, Const((int)InstEmitFlowHelper.CallFlag)))); - - context.MarkLabel(lblArmMode); - - context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseOr(context.BitwiseAnd(pc, Const(~3)), Const((int)InstEmitFlowHelper.CallFlag)))); + InstEmitFlowHelper.EmitVirtualJump(context, addr, isReturn); } public static Operand GetIntOrZR(ArmEmitterContext context, int regIndex) diff --git a/ARMeilleure/Instructions/InstEmitMemoryHelper.cs b/ARMeilleure/Instructions/InstEmitMemoryHelper.cs index 70861d163..e1dec3313 100644 --- a/ARMeilleure/Instructions/InstEmitMemoryHelper.cs +++ b/ARMeilleure/Instructions/InstEmitMemoryHelper.cs @@ -51,7 +51,7 @@ namespace ARMeilleure.Instructions EmitReadInt(context, address, rt, size); } - if (!isSimd) + if (!isSimd && !(context.CurrOp is OpCode32 && rt == State.RegisterAlias.Aarch32Pc)) { Operand value = GetInt(context, rt); diff --git a/ARMeilleure/Instructions/NativeInterface.cs b/ARMeilleure/Instructions/NativeInterface.cs index 988e86bd7..4514c0da4 100644 --- a/ARMeilleure/Instructions/NativeInterface.cs +++ b/ARMeilleure/Instructions/NativeInterface.cs @@ -1,6 +1,8 @@ using ARMeilleure.Memory; using ARMeilleure.State; +using ARMeilleure.Translation; using System; +using System.Runtime.InteropServices; namespace ARMeilleure.Instructions { @@ -10,17 +12,19 @@ namespace ARMeilleure.Instructions private class ThreadContext { - public ExecutionContext Context { get; } - public MemoryManager Memory { get; } + public ExecutionContext Context { get; } + public MemoryManager Memory { get; } + public Translator Translator { get; } public ulong ExclusiveAddress { get; set; } public ulong ExclusiveValueLow { get; set; } public ulong ExclusiveValueHigh { get; set; } - public ThreadContext(ExecutionContext context, MemoryManager memory) + public ThreadContext(ExecutionContext context, MemoryManager memory, Translator translator) { - Context = context; - Memory = memory; + Context = context; + Memory = memory; + Translator = translator; ExclusiveAddress = ulong.MaxValue; } @@ -29,9 +33,9 @@ namespace ARMeilleure.Instructions [ThreadStatic] private static ThreadContext _context; - public static void RegisterThread(ExecutionContext context, MemoryManager memory) + public static void RegisterThread(ExecutionContext context, MemoryManager memory, Translator translator) { - _context = new ThreadContext(context, memory); + _context = new ThreadContext(context, memory, translator); } public static void UnregisterThread() @@ -381,18 +385,39 @@ namespace ARMeilleure.Instructions return address & ~((4UL << ErgSizeLog2) - 1); } + public static ulong GetFunctionAddress(ulong address) + { + TranslatedFunction function = _context.Translator.GetOrTranslate(address, GetContext().ExecutionMode); + return (ulong)function.GetPointer().ToInt64(); + } + + public static ulong GetIndirectFunctionAddress(ulong address, ulong entryAddress) + { + TranslatedFunction function = _context.Translator.GetOrTranslate(address, GetContext().ExecutionMode); + ulong ptr = (ulong)function.GetPointer().ToInt64(); + if (function.HighCq) + { + // Rewrite the host function address in the table to point to the highCq function. + Marshal.WriteInt64((IntPtr)entryAddress, 8, (long)ptr); + } + return ptr; + } + public static void ClearExclusive() { _context.ExclusiveAddress = ulong.MaxValue; } - public static void CheckSynchronization() + public static bool CheckSynchronization() { Statistics.PauseTimer(); - GetContext().CheckInterrupt(); + ExecutionContext context = GetContext(); + context.CheckInterrupt(); Statistics.ResumeTimer(); + + return context.Running; } public static ExecutionContext GetContext() diff --git a/ARMeilleure/IntermediateRepresentation/Instruction.cs b/ARMeilleure/IntermediateRepresentation/Instruction.cs index 4c4ecb8f2..d1ce1aa37 100644 --- a/ARMeilleure/IntermediateRepresentation/Instruction.cs +++ b/ARMeilleure/IntermediateRepresentation/Instruction.cs @@ -12,7 +12,7 @@ namespace ARMeilleure.IntermediateRepresentation BranchIfTrue, ByteSwap, Call, - CompareAndSwap128, + CompareAndSwap, CompareEqual, CompareGreater, CompareGreaterOrEqual, @@ -52,6 +52,7 @@ namespace ARMeilleure.IntermediateRepresentation Store16, Store8, Subtract, + Tailcall, VectorCreateScalar, VectorExtract, VectorExtract16, diff --git a/ARMeilleure/Memory/MemoryManagement.cs b/ARMeilleure/Memory/MemoryManagement.cs index e299ae49d..ba62f8e73 100644 --- a/ARMeilleure/Memory/MemoryManagement.cs +++ b/ARMeilleure/Memory/MemoryManagement.cs @@ -44,6 +44,25 @@ namespace ARMeilleure.Memory } } + public static bool Commit(IntPtr address, ulong size) + { + if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) + { + IntPtr sizeNint = new IntPtr((long)size); + + return MemoryManagementWindows.Commit(address, sizeNint); + } + else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) || + RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) + { + return MemoryManagementUnix.Commit(address, size); + } + else + { + throw new PlatformNotSupportedException(); + } + } + public static void Reprotect(IntPtr address, ulong size, MemoryProtection permission) { bool result; @@ -70,6 +89,25 @@ namespace ARMeilleure.Memory } } + public static IntPtr Reserve(ulong size) + { + if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) + { + IntPtr sizeNint = new IntPtr((long)size); + + return MemoryManagementWindows.Reserve(sizeNint); + } + else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) || + RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) + { + return MemoryManagementUnix.Reserve(size); + } + else + { + throw new PlatformNotSupportedException(); + } + } + public static bool Free(IntPtr address) { if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) diff --git a/ARMeilleure/Memory/MemoryManagementUnix.cs b/ARMeilleure/Memory/MemoryManagementUnix.cs index 3331fb428..e9b296081 100644 --- a/ARMeilleure/Memory/MemoryManagementUnix.cs +++ b/ARMeilleure/Memory/MemoryManagementUnix.cs @@ -30,6 +30,11 @@ namespace ARMeilleure.Memory return ptr; } + public static bool Commit(IntPtr address, ulong size) + { + return Syscall.mprotect(address, size, MmapProts.PROT_READ | MmapProts.PROT_WRITE) == 0; + } + public static bool Reprotect(IntPtr address, ulong size, Memory.MemoryProtection protection) { MmapProts prot = GetProtection(protection); @@ -37,6 +42,24 @@ namespace ARMeilleure.Memory return Syscall.mprotect(address, size, prot) == 0; } + public static IntPtr Reserve(ulong size) + { + ulong pageSize = (ulong)Syscall.sysconf(SysconfName._SC_PAGESIZE); + + const MmapProts prot = MmapProts.PROT_NONE; + + const MmapFlags flags = MmapFlags.MAP_PRIVATE | MmapFlags.MAP_ANONYMOUS; + + IntPtr ptr = Syscall.mmap(IntPtr.Zero, size + pageSize, prot, flags, -1, 0); + + if (ptr == IntPtr.Zero) + { + throw new OutOfMemoryException(); + } + + return ptr; + } + private static MmapProts GetProtection(Memory.MemoryProtection protection) { switch (protection) diff --git a/ARMeilleure/Memory/MemoryManagementWindows.cs b/ARMeilleure/Memory/MemoryManagementWindows.cs index ae64b5c62..a94550631 100644 --- a/ARMeilleure/Memory/MemoryManagementWindows.cs +++ b/ARMeilleure/Memory/MemoryManagementWindows.cs @@ -89,6 +89,15 @@ namespace ARMeilleure.Memory return ptr; } + public static bool Commit(IntPtr location, IntPtr size) + { + const AllocationType flags = AllocationType.Commit; + + IntPtr ptr = VirtualAlloc(location, size, flags, MemoryProtection.ReadWrite); + + return ptr != IntPtr.Zero; + } + public static bool Reprotect(IntPtr address, IntPtr size, Memory.MemoryProtection protection) { MemoryProtection prot = GetProtection(protection); @@ -96,6 +105,20 @@ namespace ARMeilleure.Memory return VirtualProtect(address, size, prot, out _); } + public static IntPtr Reserve(IntPtr size) + { + const AllocationType flags = AllocationType.Reserve; + + IntPtr ptr = VirtualAlloc(IntPtr.Zero, size, flags, MemoryProtection.ReadWrite); + + if (ptr == IntPtr.Zero) + { + throw new OutOfMemoryException(); + } + + return ptr; + } + private static MemoryProtection GetProtection(Memory.MemoryProtection protection) { switch (protection) diff --git a/ARMeilleure/Memory/MemoryManagerPal.cs b/ARMeilleure/Memory/MemoryManagerPal.cs index 64191a0ac..66c436424 100644 --- a/ARMeilleure/Memory/MemoryManagerPal.cs +++ b/ARMeilleure/Memory/MemoryManagerPal.cs @@ -53,7 +53,7 @@ namespace ARMeilleure.Memory Operand expected = context.LoadArgument(OperandType.V128, 1); Operand desired = context.LoadArgument(OperandType.V128, 2); - Operand result = context.CompareAndSwap128(address, expected, desired); + Operand result = context.CompareAndSwap(address, expected, desired); context.Return(result); diff --git a/ARMeilleure/Memory/ReservedRegion.cs b/ARMeilleure/Memory/ReservedRegion.cs new file mode 100644 index 000000000..521019ade --- /dev/null +++ b/ARMeilleure/Memory/ReservedRegion.cs @@ -0,0 +1,53 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace ARMeilleure.Memory +{ + class ReservedRegion + { + private const int DefaultGranularity = 65536; // Mapping granularity in Windows. + + public IntPtr Pointer { get; } + + private ulong _maxSize; + private ulong _sizeGranularity; + private ulong _currentSize; + + public ReservedRegion(ulong maxSize, ulong granularity = 0) + { + if (granularity == 0) + { + granularity = DefaultGranularity; + } + + Pointer = MemoryManagement.Reserve(maxSize); + _maxSize = maxSize; + _sizeGranularity = granularity; + _currentSize = 0; + } + + public void ExpandIfNeeded(ulong desiredSize) + { + if (desiredSize > _maxSize) + { + throw new OutOfMemoryException(); + } + + if (desiredSize > _currentSize) + { + // Lock, and then check again. We only want to commit once. + lock (this) + { + if (desiredSize >= _currentSize) + { + ulong overflowBytes = desiredSize - _currentSize; + ulong moreToCommit = (((_sizeGranularity - 1) + overflowBytes) / _sizeGranularity) * _sizeGranularity; // Round up. + MemoryManagement.Commit(new IntPtr((long)Pointer + (long)_currentSize), moreToCommit); + _currentSize += moreToCommit; + } + } + } + } + } +} diff --git a/ARMeilleure/State/ExecutionContext.cs b/ARMeilleure/State/ExecutionContext.cs index 482665dbf..57a05dbfd 100644 --- a/ARMeilleure/State/ExecutionContext.cs +++ b/ARMeilleure/State/ExecutionContext.cs @@ -5,7 +5,7 @@ namespace ARMeilleure.State { public class ExecutionContext { - private const int MinCountForCheck = 40000; + private const int MinCountForCheck = 4000; private NativeContext _nativeContext; @@ -57,7 +57,7 @@ namespace ARMeilleure.State } } - public bool Running { get; set; } + internal bool Running { get; private set; } public event EventHandler Interrupt; public event EventHandler Break; @@ -126,6 +126,12 @@ namespace ARMeilleure.State Undefined?.Invoke(this, new InstUndefinedEventArgs(address, opCode)); } + public void StopRunning() + { + Running = false; + _nativeContext.SetCounter(0); + } + public void Dispose() { _nativeContext.Dispose(); diff --git a/ARMeilleure/State/NativeContext.cs b/ARMeilleure/State/NativeContext.cs index eb54505c6..0ab9a3fd2 100644 --- a/ARMeilleure/State/NativeContext.cs +++ b/ARMeilleure/State/NativeContext.cs @@ -10,7 +10,7 @@ namespace ARMeilleure.State private const int IntSize = 8; private const int VecSize = 16; private const int FlagSize = 4; - private const int ExtraSize = 4; + private const int ExtraSize = 8; private const int TotalSize = RegisterConsts.IntRegsCount * IntSize + RegisterConsts.VecRegsCount * VecSize + @@ -183,6 +183,14 @@ namespace ARMeilleure.State RegisterConsts.FpFlagsCount * FlagSize; } + public static int GetCallAddressOffset() + { + return RegisterConsts.IntRegsCount * IntSize + + RegisterConsts.VecRegsCount * VecSize + + RegisterConsts.FlagsCount * FlagSize + + RegisterConsts.FpFlagsCount * FlagSize + 4; + } + public void Dispose() { MemoryManagement.Free(BasePtr); diff --git a/ARMeilleure/Translation/ArmEmitterContext.cs b/ARMeilleure/Translation/ArmEmitterContext.cs index d35e985e6..d1a2c92db 100644 --- a/ARMeilleure/Translation/ArmEmitterContext.cs +++ b/ARMeilleure/Translation/ArmEmitterContext.cs @@ -41,10 +41,19 @@ namespace ARMeilleure.Translation public Aarch32Mode Mode { get; } - public ArmEmitterContext(MemoryManager memory, Aarch32Mode mode) + public JumpTable JumpTable { get; } + + public long BaseAddress { get; } + + public bool HighCq { get; } + + public ArmEmitterContext(MemoryManager memory, JumpTable jumpTable, long baseAddress, bool highCq, Aarch32Mode mode) { - Memory = memory; - Mode = mode; + Memory = memory; + JumpTable = jumpTable; + BaseAddress = baseAddress; + HighCq = highCq; + Mode = mode; _labels = new Dictionary(); } diff --git a/ARMeilleure/Translation/DirectCallStubs.cs b/ARMeilleure/Translation/DirectCallStubs.cs new file mode 100644 index 000000000..e6e87b2b6 --- /dev/null +++ b/ARMeilleure/Translation/DirectCallStubs.cs @@ -0,0 +1,131 @@ +using ARMeilleure.Instructions; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using System; +using System.Runtime.InteropServices; + +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Translation +{ + static class DirectCallStubs + { + private delegate long GuestFunction(IntPtr nativeContextPtr); + + private static GuestFunction _directCallStub; + private static GuestFunction _directTailCallStub; + private static GuestFunction _indirectCallStub; + private static GuestFunction _indirectTailCallStub; + + private static object _lock; + private static bool _initialized; + + static DirectCallStubs() + { + _lock = new object(); + } + + public static void InitializeStubs() + { + if (_initialized) return; + lock (_lock) + { + if (_initialized) return; + _directCallStub = GenerateDirectCallStub(false); + _directTailCallStub = GenerateDirectCallStub(true); + _indirectCallStub = GenerateIndirectCallStub(false); + _indirectTailCallStub = GenerateIndirectCallStub(true); + _initialized = true; + } + } + + public static IntPtr DirectCallStub(bool tailCall) + { + return Marshal.GetFunctionPointerForDelegate(tailCall ? _directTailCallStub : _directCallStub); + } + + public static IntPtr IndirectCallStub(bool tailCall) + { + return Marshal.GetFunctionPointerForDelegate(tailCall ? _indirectTailCallStub : _indirectCallStub); + } + + private static void EmitCall(EmitterContext context, Operand address, bool tailCall) + { + if (tailCall) + { + context.Tailcall(address, context.LoadArgument(OperandType.I64, 0)); + } + else + { + context.Return(context.Call(address, OperandType.I64, context.LoadArgument(OperandType.I64, 0))); + } + } + + /// + /// Generates a stub that is used to find function addresses. Used for direct calls when their jump table does not have the host address yet. + /// Takes a NativeContext like a translated guest function, and extracts the target address from the NativeContext. + /// When the target function is compiled in highCq, all table entries are updated to point to that function instead of this stub by the translator. + /// + private static GuestFunction GenerateDirectCallStub(bool tailCall) + { + EmitterContext context = new EmitterContext(); + + Operand nativeContextPtr = context.LoadArgument(OperandType.I64, 0); + + Operand address = context.Load(OperandType.I64, context.Add(nativeContextPtr, Const((long)NativeContext.GetCallAddressOffset()))); + + address = context.BitwiseOr(address, Const(address.Type, 1)); // Set call flag. + Operand functionAddr = context.Call(new _U64_U64(NativeInterface.GetFunctionAddress), address); + EmitCall(context, functionAddr, tailCall); + + ControlFlowGraph cfg = context.GetControlFlowGraph(); + + OperandType[] argTypes = new OperandType[] + { + OperandType.I64 + }; + + return Compiler.Compile( + cfg, + argTypes, + OperandType.I64, + CompilerOptions.HighCq); + } + + /// + /// Generates a stub that is used to find function addresses and add them to an indirect table. + /// Used for indirect calls entries (already claimed) when their jump table does not have the host address yet. + /// Takes a NativeContext like a translated guest function, and extracts the target indirect table entry from the NativeContext. + /// If the function we find is highCq, the entry in the table is updated to point to that function rather than this stub. + /// + private static GuestFunction GenerateIndirectCallStub(bool tailCall) + { + EmitterContext context = new EmitterContext(); + + Operand nativeContextPtr = context.LoadArgument(OperandType.I64, 0); + + Operand entryAddress = context.Load(OperandType.I64, context.Add(nativeContextPtr, Const((long)NativeContext.GetCallAddressOffset()))); + Operand address = context.Load(OperandType.I64, entryAddress); + + // We need to find the missing function. If the function is HighCq, then it replaces this stub in the indirect table. + // Either way, we call it afterwards. + Operand functionAddr = context.Call(new _U64_U64_U64(NativeInterface.GetIndirectFunctionAddress), address, entryAddress); + + // Call and save the function. + EmitCall(context, functionAddr, tailCall); + + ControlFlowGraph cfg = context.GetControlFlowGraph(); + + OperandType[] argTypes = new OperandType[] + { + OperandType.I64 + }; + + return Compiler.Compile( + cfg, + argTypes, + OperandType.I64, + CompilerOptions.HighCq); + } + } +} diff --git a/ARMeilleure/Translation/EmitterContext.cs b/ARMeilleure/Translation/EmitterContext.cs index a125a715d..a11d25a6d 100644 --- a/ARMeilleure/Translation/EmitterContext.cs +++ b/ARMeilleure/Translation/EmitterContext.cs @@ -143,9 +143,22 @@ namespace ARMeilleure.Translation } } - public Operand CompareAndSwap128(Operand address, Operand expected, Operand desired) + public void Tailcall(Operand address, params Operand[] callArgs) { - return Add(Instruction.CompareAndSwap128, Local(OperandType.V128), address, expected, desired); + Operand[] args = new Operand[callArgs.Length + 1]; + + args[0] = address; + + Array.Copy(callArgs, 0, args, 1, callArgs.Length); + + Add(Instruction.Tailcall, null, args); + + _needsNewBlock = true; + } + + public Operand CompareAndSwap(Operand address, Operand expected, Operand desired) + { + return Add(Instruction.CompareAndSwap, Local(desired.Type), address, expected, desired); } public Operand ConditionalSelect(Operand op1, Operand op2, Operand op3) diff --git a/ARMeilleure/Translation/JitCache.cs b/ARMeilleure/Translation/JitCache.cs index 73f04a966..b004cc22a 100644 --- a/ARMeilleure/Translation/JitCache.cs +++ b/ARMeilleure/Translation/JitCache.cs @@ -13,9 +13,11 @@ namespace ARMeilleure.Translation private const int CodeAlignment = 4; // Bytes - private const int CacheSize = 512 * 1024 * 1024; + private const int CacheSize = 2047 * 1024 * 1024; - private static IntPtr _basePointer; + private static ReservedRegion _jitRegion; + + private static IntPtr _basePointer => _jitRegion.Pointer; private static int _offset; @@ -25,10 +27,11 @@ namespace ARMeilleure.Translation static JitCache() { - _basePointer = MemoryManagement.Allocate(CacheSize); + _jitRegion = new ReservedRegion(CacheSize); if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { + _jitRegion.ExpandIfNeeded(PageSize); JitUnwindWindows.InstallFunctionTableHandler(_basePointer, CacheSize); // The first page is used for the table based SEH structs. @@ -97,6 +100,8 @@ namespace ARMeilleure.Translation _offset += codeSize; + _jitRegion.ExpandIfNeeded((ulong)_offset); + if ((ulong)(uint)_offset > CacheSize) { throw new OutOfMemoryException(); diff --git a/ARMeilleure/Translation/JumpTable.cs b/ARMeilleure/Translation/JumpTable.cs new file mode 100644 index 000000000..5cad29448 --- /dev/null +++ b/ARMeilleure/Translation/JumpTable.cs @@ -0,0 +1,149 @@ +using ARMeilleure.Memory; +using System; +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System.Threading; + +namespace ARMeilleure.Translation +{ + class JumpTable + { + public static JumpTable Instance { get; } + + static JumpTable() + { + Instance = new JumpTable(); + } + + // The jump table is a block of (guestAddress, hostAddress) function mappings. + // Each entry corresponds to one branch in a JIT compiled function. The entries are + // reserved specifically for each call. + // The _dependants dictionary can be used to update the hostAddress for any functions that change. + + public const int JumpTableStride = 16; // 8 byte guest address, 8 byte host address + + private const int JumpTableSize = 1048576; + + private const int JumpTableByteSize = JumpTableSize * JumpTableStride; + + // The dynamic table is also a block of (guestAddress, hostAddress) function mappings. + // The main difference is that indirect calls and jumps reserve _multiple_ entries on the table. + // These start out as all 0. When an indirect call is made, it tries to find the guest address on the table. + + // If we get to an empty address, the guestAddress is set to the call that we want. + + // If we get to a guestAddress that matches our own (or we just claimed it), the hostAddress is read. + // If it is non-zero, we immediately branch or call the host function. + // If it is 0, NativeInterface is called to find the rejited address of the call. + // If none is found, the hostAddress entry stays at 0. Otherwise, the new address is placed in the entry. + + // If the table size is exhausted and we didn't find our desired address, we fall back to requesting + // the function from the JIT. + + private const int DynamicTableSize = 1048576; + + public const int DynamicTableElems = 1; + + public const int DynamicTableStride = DynamicTableElems * JumpTableStride; + + private const int DynamicTableByteSize = DynamicTableSize * JumpTableStride * DynamicTableElems; + + private int _tableEnd = 0; + private int _dynTableEnd = 0; + + private ConcurrentDictionary _targets; + private ConcurrentDictionary> _dependants; // TODO: Attach to TranslatedFunction or a wrapper class. + + private ReservedRegion _jumpRegion; + private ReservedRegion _dynamicRegion; + public IntPtr JumpPointer => _jumpRegion.Pointer; + public IntPtr DynamicPointer => _dynamicRegion.Pointer; + + public JumpTable() + { + _jumpRegion = new ReservedRegion(JumpTableByteSize); + _dynamicRegion = new ReservedRegion(DynamicTableByteSize); + + _targets = new ConcurrentDictionary(); + _dependants = new ConcurrentDictionary>(); + } + + public void RegisterFunction(ulong address, TranslatedFunction func) { + address &= ~3UL; + _targets.AddOrUpdate(address, func, (key, oldFunc) => func); + long funcPtr = func.GetPointer().ToInt64(); + + // Update all jump table entries that target this address. + LinkedList myDependants; + if (_dependants.TryGetValue(address, out myDependants)) + { + lock (myDependants) + { + foreach (var entry in myDependants) + { + IntPtr addr = _jumpRegion.Pointer + entry * JumpTableStride; + Marshal.WriteInt64(addr, 8, funcPtr); + } + } + } + } + + public int ReserveDynamicEntry(bool isJump) + { + int entry = Interlocked.Increment(ref _dynTableEnd); + if (entry >= DynamicTableSize) + { + throw new OutOfMemoryException("JIT Dynamic Jump Table exhausted."); + } + + _dynamicRegion.ExpandIfNeeded((ulong)((entry + 1) * DynamicTableStride)); + + // Initialize all host function pointers to the indirect call stub. + + IntPtr addr = _dynamicRegion.Pointer + entry * DynamicTableStride; + long stubPtr = (long)DirectCallStubs.IndirectCallStub(isJump); + + for (int i = 0; i < DynamicTableElems; i++) + { + Marshal.WriteInt64(addr, i * JumpTableStride + 8, stubPtr); + } + + return entry; + } + + public int ReserveTableEntry(long ownerAddress, long address, bool isJump) + { + int entry = Interlocked.Increment(ref _tableEnd); + if (entry >= JumpTableSize) + { + throw new OutOfMemoryException("JIT Direct Jump Table exhausted."); + } + + _jumpRegion.ExpandIfNeeded((ulong)((entry + 1) * JumpTableStride)); + + // Is the address we have already registered? If so, put the function address in the jump table. + // If not, it will point to the direct call stub. + long value = (long)DirectCallStubs.DirectCallStub(isJump); + TranslatedFunction func; + if (_targets.TryGetValue((ulong)address, out func)) + { + value = func.GetPointer().ToInt64(); + } + + // Make sure changes to the function at the target address update this jump table entry. + LinkedList targetDependants = _dependants.GetOrAdd((ulong)address, (addr) => new LinkedList()); + lock (targetDependants) + { + targetDependants.AddLast(entry); + } + + IntPtr addr = _jumpRegion.Pointer + entry * JumpTableStride; + + Marshal.WriteInt64(addr, 0, address); + Marshal.WriteInt64(addr, 8, value); + + return entry; + } + } +} diff --git a/ARMeilleure/Translation/TranslatedFunction.cs b/ARMeilleure/Translation/TranslatedFunction.cs index 06069cf8f..af01aaab3 100644 --- a/ARMeilleure/Translation/TranslatedFunction.cs +++ b/ARMeilleure/Translation/TranslatedFunction.cs @@ -1,3 +1,5 @@ +using System; +using System.Runtime.InteropServices; using System.Threading; namespace ARMeilleure.Translation @@ -11,6 +13,8 @@ namespace ARMeilleure.Translation private bool _rejit; private int _callCount; + public bool HighCq => !_rejit; + public TranslatedFunction(GuestFunction func, bool rejit) { _func = func; @@ -26,5 +30,10 @@ namespace ARMeilleure.Translation { return _rejit && Interlocked.Increment(ref _callCount) == MinCallsForRejit; } + + public IntPtr GetPointer() + { + return Marshal.GetFunctionPointerForDelegate(_func); + } } } \ No newline at end of file diff --git a/ARMeilleure/Translation/Translator.cs b/ARMeilleure/Translation/Translator.cs index 3008303e7..9d534d58d 100644 --- a/ARMeilleure/Translation/Translator.cs +++ b/ARMeilleure/Translation/Translator.cs @@ -16,10 +16,14 @@ namespace ARMeilleure.Translation { private const ulong CallFlag = InstEmitFlowHelper.CallFlag; + private const bool AlwaysTranslateFunctions = true; // If false, only translates a single block for lowCq. + private MemoryManager _memory; private ConcurrentDictionary _funcs; + private JumpTable _jumpTable; + private PriorityQueue _backgroundQueue; private AutoResetEvent _backgroundTranslatorEvent; @@ -32,9 +36,13 @@ namespace ARMeilleure.Translation _funcs = new ConcurrentDictionary(); + _jumpTable = JumpTable.Instance; + _backgroundQueue = new PriorityQueue(2); _backgroundTranslatorEvent = new AutoResetEvent(false); + + DirectCallStubs.InitializeStubs(); } private void TranslateQueuedSubs() @@ -46,30 +54,42 @@ namespace ARMeilleure.Translation TranslatedFunction func = Translate(request.Address, request.Mode, highCq: true); _funcs.AddOrUpdate(request.Address, func, (key, oldFunc) => func); + _jumpTable.RegisterFunction(request.Address, func); } else { _backgroundTranslatorEvent.WaitOne(); } } + _backgroundTranslatorEvent.Set(); // Wake up any other background translator threads, to encourage them to exit. } public void Execute(State.ExecutionContext context, ulong address) { if (Interlocked.Increment(ref _threadCount) == 1) { - Thread backgroundTranslatorThread = new Thread(TranslateQueuedSubs) + // Simple heuristic, should be user configurable in future. (1 for 4 core/ht or less, 2 for 6 core+ht etc). + // All threads are normal priority except from the last, which just fills as much of the last core as the os lets it with a low priority. + // If we only have one rejit thread, it should be normal priority as highCq code is performance critical. + // TODO: Use physical cores rather than logical. This only really makes sense for processors with hyperthreading. Requires OS specific code. + int unboundedThreadCount = Math.Max(1, (Environment.ProcessorCount - 6) / 3); + int threadCount = Math.Min(3, unboundedThreadCount); + for (int i = 0; i < threadCount; i++) { - Name = "CPU.BackgroundTranslatorThread", - Priority = ThreadPriority.Lowest - }; + bool last = i != 0 && i == unboundedThreadCount - 1; + Thread backgroundTranslatorThread = new Thread(TranslateQueuedSubs) + { + Name = "CPU.BackgroundTranslatorThread." + i, + Priority = last ? ThreadPriority.Lowest : ThreadPriority.Normal + }; - backgroundTranslatorThread.Start(); + backgroundTranslatorThread.Start(); + } } Statistics.InitializeTimer(); - NativeInterface.RegisterThread(context, _memory); + NativeInterface.RegisterThread(context, _memory, this); do { @@ -98,7 +118,7 @@ namespace ARMeilleure.Translation return nextAddr; } - private TranslatedFunction GetOrTranslate(ulong address, ExecutionMode mode) + internal TranslatedFunction GetOrTranslate(ulong address, ExecutionMode mode) { // TODO: Investigate how we should handle code at unaligned addresses. // Currently, those low bits are used to store special flags. @@ -124,12 +144,12 @@ namespace ARMeilleure.Translation private TranslatedFunction Translate(ulong address, ExecutionMode mode, bool highCq) { - ArmEmitterContext context = new ArmEmitterContext(_memory, Aarch32Mode.User); + ArmEmitterContext context = new ArmEmitterContext(_memory, _jumpTable, (long)address, highCq, Aarch32Mode.User); Logger.StartPass(PassName.Decoding); - Block[] blocks = highCq - ? Decoder.DecodeFunction (_memory, address, mode) + Block[] blocks = AlwaysTranslateFunctions + ? Decoder.DecodeFunction (_memory, address, mode, highCq) : Decoder.DecodeBasicBlock(_memory, address, mode); Logger.EndPass(PassName.Decoding); @@ -216,7 +236,7 @@ namespace ARMeilleure.Translation // with some kind of branch). if (isLastOp && block.Next == null) { - context.Return(Const(opCode.Address + (ulong)opCode.OpCodeSizeInBytes)); + InstEmitFlowHelper.EmitTailContinue(context, Const(opCode.Address + (ulong)opCode.OpCodeSizeInBytes)); } } } @@ -238,7 +258,11 @@ namespace ARMeilleure.Translation context.BranchIfTrue(lblNonZero, count); - context.Call(new _Void(NativeInterface.CheckSynchronization)); + Operand running = context.Call(new _Bool(NativeInterface.CheckSynchronization)); + + context.BranchIfTrue(lblExit, running); + + context.Return(Const(0L)); context.Branch(lblExit); diff --git a/Ryujinx.HLE/HOS/Kernel/Threading/HleScheduler.cs b/Ryujinx.HLE/HOS/Kernel/Threading/HleScheduler.cs index 1a213b924..c4161d542 100644 --- a/Ryujinx.HLE/HOS/Kernel/Threading/HleScheduler.cs +++ b/Ryujinx.HLE/HOS/Kernel/Threading/HleScheduler.cs @@ -137,7 +137,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading public void ExitThread(KThread thread) { - thread.Context.Running = false; + thread.Context.StopRunning(); CoreManager.Exit(thread.HostThread); } diff --git a/Ryujinx.HLE/HOS/Kernel/Threading/KThread.cs b/Ryujinx.HLE/HOS/Kernel/Threading/KThread.cs index 53eb5bdc9..cd60c9550 100644 --- a/Ryujinx.HLE/HOS/Kernel/Threading/KThread.cs +++ b/Ryujinx.HLE/HOS/Kernel/Threading/KThread.cs @@ -1141,9 +1141,9 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading { Owner.Translator.Execute(Context, entrypoint); - Context.Dispose(); - ThreadExit(); + + Context.Dispose(); } private void ThreadExit()