diff --git a/ARMeilleure/CodeGen/Optimizations/BlockPlacement.cs b/ARMeilleure/CodeGen/Optimizations/BlockPlacement.cs new file mode 100644 index 000000000..a200f54e0 --- /dev/null +++ b/ARMeilleure/CodeGen/Optimizations/BlockPlacement.cs @@ -0,0 +1,66 @@ +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System.Diagnostics; + +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.CodeGen.Optimizations +{ + static class BlockPlacement + { + public static void RunPass(ControlFlowGraph cfg) + { + bool update = false; + + BasicBlock block; + BasicBlock nextBlock; + + BasicBlock lastBlock = cfg.Blocks.Last; + + // Move cold blocks at the end of the list, so that they are emitted away from hot code. + for (block = cfg.Blocks.First; block != lastBlock; block = nextBlock) + { + nextBlock = block.ListNext; + + if (block.Frequency == BasicBlockFrequency.Cold) + { + cfg.Blocks.Remove(block); + cfg.Blocks.AddLast(block); + } + } + + for (block = cfg.Blocks.First; block != null; block = nextBlock) + { + nextBlock = block.ListNext; + + if (block.SuccessorCount == 2 && block.Operations.Last is Operation branchOp) + { + Debug.Assert(branchOp.Instruction == Instruction.BranchIf); + + BasicBlock falseSucc = block.GetSuccessor(0); + BasicBlock trueSucc = block.GetSuccessor(1); + + // If true successor is next block in list, invert the condition. We avoid extra branching by + // making the true side the fallthrough (i.e, convert it to the false side). + if (trueSucc == block.ListNext) + { + Comparison comp = (Comparison)branchOp.GetSource(2).AsInt32(); + Comparison compInv = comp.Invert(); + + branchOp.SetSource(2, Const((int)compInv)); + + block.SetSuccessor(0, trueSucc); + block.SetSuccessor(1, falseSucc); + + update = true; + } + } + } + + if (update) + { + cfg.Update(removeUnreachableBlocks: false); + } + } + } +} diff --git a/ARMeilleure/CodeGen/X86/CodeGenerator.cs b/ARMeilleure/CodeGen/X86/CodeGenerator.cs index a51f4a137..c9acd9450 100644 --- a/ARMeilleure/CodeGen/X86/CodeGenerator.cs +++ b/ARMeilleure/CodeGen/X86/CodeGenerator.cs @@ -106,6 +106,8 @@ namespace ARMeilleure.CodeGen.X86 X86Optimizer.RunPass(cfg); + BlockPlacement.RunPass(cfg); + Logger.EndPass(PassName.Optimization, cfg); Logger.StartPass(PassName.PreAllocation); @@ -186,9 +188,11 @@ namespace ARMeilleure.CodeGen.X86 } } + byte[] code = context.GetCode(); + Logger.EndPass(PassName.CodeGeneration); - return new CompiledFunction(context.GetCode(), unwindInfo); + return new CompiledFunction(code, unwindInfo); } } diff --git a/ARMeilleure/Diagnostics/IRDumper.cs b/ARMeilleure/Diagnostics/IRDumper.cs index 90eb2300a..1a2ffae9c 100644 --- a/ARMeilleure/Diagnostics/IRDumper.cs +++ b/ARMeilleure/Diagnostics/IRDumper.cs @@ -57,6 +57,11 @@ namespace ARMeilleure.Diagnostics { DumpBlockName(block); + if (block.Frequency == BasicBlockFrequency.Cold) + { + _builder.Append(" cold"); + } + if (block.SuccessorCount > 0) { _builder.Append(" ("); diff --git a/ARMeilleure/Instructions/InstEmitFlowHelper.cs b/ARMeilleure/Instructions/InstEmitFlowHelper.cs index 87549aa3f..f5f228f53 100644 --- a/ARMeilleure/Instructions/InstEmitFlowHelper.cs +++ b/ARMeilleure/Instructions/InstEmitFlowHelper.cs @@ -174,7 +174,7 @@ namespace ARMeilleure.Instructions Operand lblContinue = context.GetLabel(nextAddr.Value); // We need to clear out the call flag for the return address before comparing it. - context.BranchIf(lblContinue, context.BitwiseAnd(returnAddress, Const(~CallFlag)), nextAddr, Comparison.Equal); + context.BranchIf(lblContinue, context.BitwiseAnd(returnAddress, Const(~CallFlag)), nextAddr, Comparison.Equal, BasicBlockFrequency.Cold); context.Return(returnAddress); } diff --git a/ARMeilleure/Instructions/InstEmitMemoryHelper.cs b/ARMeilleure/Instructions/InstEmitMemoryHelper.cs index 91227bc59..390d167db 100644 --- a/ARMeilleure/Instructions/InstEmitMemoryHelper.cs +++ b/ARMeilleure/Instructions/InstEmitMemoryHelper.cs @@ -147,7 +147,7 @@ namespace ARMeilleure.Instructions context.Branch(lblEnd); - context.MarkLabel(lblSlowPath); + context.MarkLabel(lblSlowPath, BasicBlockFrequency.Cold); EmitReadIntFallback(context, address, rt, size); @@ -165,7 +165,7 @@ namespace ARMeilleure.Instructions Operand lblFastPath = Label(); - context.BranchIfFalse(lblFastPath, isUnalignedAddr); + context.BranchIfFalse(lblFastPath, isUnalignedAddr, BasicBlockFrequency.Cold); // The call is not expected to return (it should throw). context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.ThrowInvalidMemoryAccess)), address); @@ -216,7 +216,7 @@ namespace ARMeilleure.Instructions context.Branch(lblEnd); - context.MarkLabel(lblSlowPath); + context.MarkLabel(lblSlowPath, BasicBlockFrequency.Cold); EmitReadVectorFallback(context, address, vector, rt, elem, size); @@ -256,7 +256,7 @@ namespace ARMeilleure.Instructions context.Branch(lblEnd); - context.MarkLabel(lblSlowPath); + context.MarkLabel(lblSlowPath, BasicBlockFrequency.Cold); EmitWriteIntFallback(context, address, rt, size); @@ -274,7 +274,7 @@ namespace ARMeilleure.Instructions Operand lblFastPath = Label(); - context.BranchIfFalse(lblFastPath, isUnalignedAddr); + context.BranchIfFalse(lblFastPath, isUnalignedAddr, BasicBlockFrequency.Cold); // The call is not expected to return (it should throw). context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.ThrowInvalidMemoryAccess)), address); @@ -331,7 +331,7 @@ namespace ARMeilleure.Instructions context.Branch(lblEnd); - context.MarkLabel(lblSlowPath); + context.MarkLabel(lblSlowPath, BasicBlockFrequency.Cold); EmitWriteVectorFallback(context, address, rt, elem, size); @@ -402,7 +402,7 @@ namespace ARMeilleure.Instructions Operand lblNotWatched = Label(); // Is the page currently being monitored for modifications? If so we need to call MarkRegionAsModified. - context.BranchIf(lblNotWatched, pte, Const(0L), Comparison.GreaterOrEqual); + context.BranchIf(lblNotWatched, pte, Const(0L), Comparison.GreaterOrEqual, BasicBlockFrequency.Cold); // Mark the region as modified. Size here doesn't matter as address is assumed to be size aligned here. context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.MarkRegionAsModified)), address, Const(1UL)); @@ -412,7 +412,7 @@ namespace ARMeilleure.Instructions Operand lblNonNull = Label(); // Skip exception if the PTE address is non-null (not zero). - context.BranchIfTrue(lblNonNull, pte); + context.BranchIfTrue(lblNonNull, pte, BasicBlockFrequency.Cold); // The call is not expected to return (it should throw). context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.ThrowInvalidMemoryAccess)), address); diff --git a/ARMeilleure/IntermediateRepresentation/BasicBlock.cs b/ARMeilleure/IntermediateRepresentation/BasicBlock.cs index 640978fe7..056a9d469 100644 --- a/ARMeilleure/IntermediateRepresentation/BasicBlock.cs +++ b/ARMeilleure/IntermediateRepresentation/BasicBlock.cs @@ -5,10 +5,12 @@ namespace ARMeilleure.IntermediateRepresentation { class BasicBlock : IIntrusiveListNode { - private readonly List _successors = new List(); + private readonly List _successors; public int Index { get; set; } + public BasicBlockFrequency Frequency { get; set; } + public BasicBlock ListPrevious { get; set; } public BasicBlock ListNext { get; set; } @@ -25,6 +27,8 @@ namespace ARMeilleure.IntermediateRepresentation public BasicBlock(int index) { + _successors = new List(); + Operations = new IntrusiveList(); Predecessors = new List(); DominanceFrontiers = new HashSet(); diff --git a/ARMeilleure/IntermediateRepresentation/BasicBlockFrequency.cs b/ARMeilleure/IntermediateRepresentation/BasicBlockFrequency.cs new file mode 100644 index 000000000..96cfee35a --- /dev/null +++ b/ARMeilleure/IntermediateRepresentation/BasicBlockFrequency.cs @@ -0,0 +1,8 @@ +namespace ARMeilleure.IntermediateRepresentation +{ + enum BasicBlockFrequency + { + Default, + Cold + } +} \ No newline at end of file diff --git a/ARMeilleure/Translation/ControlFlowGraph.cs b/ARMeilleure/Translation/ControlFlowGraph.cs index 349635344..ee1a245e0 100644 --- a/ARMeilleure/Translation/ControlFlowGraph.cs +++ b/ARMeilleure/Translation/ControlFlowGraph.cs @@ -7,26 +7,37 @@ namespace ARMeilleure.Translation { class ControlFlowGraph { + private BasicBlock[] _postOrderBlocks; + private int[] _postOrderMap; + public BasicBlock Entry { get; } public IntrusiveList Blocks { get; } - public BasicBlock[] PostOrderBlocks { get; } - public int[] PostOrderMap { get; } + public BasicBlock[] PostOrderBlocks => _postOrderBlocks; + public int[] PostOrderMap => _postOrderMap; public ControlFlowGraph(BasicBlock entry, IntrusiveList blocks) { Entry = entry; Blocks = blocks; - RemoveUnreachableBlocks(blocks); + Update(removeUnreachableBlocks: true); + } + + public void Update(bool removeUnreachableBlocks) + { + if (removeUnreachableBlocks) + { + RemoveUnreachableBlocks(Blocks); + } var visited = new HashSet(); var blockStack = new Stack(); - PostOrderBlocks = new BasicBlock[blocks.Count]; - PostOrderMap = new int[blocks.Count]; + Array.Resize(ref _postOrderBlocks, Blocks.Count); + Array.Resize(ref _postOrderMap, Blocks.Count); - visited.Add(entry); - blockStack.Push(entry); + visited.Add(Entry); + blockStack.Push(Entry); int index = 0; diff --git a/ARMeilleure/Translation/EmitterContext.cs b/ARMeilleure/Translation/EmitterContext.cs index 2261fb877..d85a502b1 100644 --- a/ARMeilleure/Translation/EmitterContext.cs +++ b/ARMeilleure/Translation/EmitterContext.cs @@ -20,6 +20,7 @@ namespace ARMeilleure.Translation private BasicBlock _ifBlock; private bool _needsNewBlock; + private BasicBlockFrequency _nextBlockFreq; public EmitterContext() { @@ -27,6 +28,7 @@ namespace ARMeilleure.Translation _irBlocks = new IntrusiveList(); _needsNewBlock = true; + _nextBlockFreq = BasicBlockFrequency.Default; } public Operand Add(Operand op1, Operand op2) @@ -58,24 +60,24 @@ namespace ARMeilleure.Translation { NewNextBlockIfNeeded(); - BranchToLabel(label, uncond: true); + BranchToLabel(label, uncond: true, BasicBlockFrequency.Default); } - public void BranchIf(Operand label, Operand op1, Operand op2, Comparison comp) + public void BranchIf(Operand label, Operand op1, Operand op2, Comparison comp, BasicBlockFrequency falseFreq = default) { Add(Instruction.BranchIf, null, op1, op2, Const((int)comp)); - BranchToLabel(label, uncond: false); + BranchToLabel(label, uncond: false, falseFreq); } - public void BranchIfFalse(Operand label, Operand op1) + public void BranchIfFalse(Operand label, Operand op1, BasicBlockFrequency falseFreq = default) { - BranchIf(label, op1, Const(op1.Type, 0), Comparison.Equal); + BranchIf(label, op1, Const(op1.Type, 0), Comparison.Equal, falseFreq); } - public void BranchIfTrue(Operand label, Operand op1) + public void BranchIfTrue(Operand label, Operand op1, BasicBlockFrequency falseFreq = default) { - BranchIf(label, op1, Const(op1.Type, 0), Comparison.NotEqual); + BranchIf(label, op1, Const(op1.Type, 0), Comparison.NotEqual, falseFreq); } public Operand ByteSwap(Operand op1) @@ -582,7 +584,7 @@ namespace ARMeilleure.Translation return dest; } - private void BranchToLabel(Operand label, bool uncond) + private void BranchToLabel(Operand label, bool uncond, BasicBlockFrequency nextFreq) { if (!_irLabels.TryGetValue(label, out BasicBlock branchBlock)) { @@ -602,10 +604,13 @@ namespace ARMeilleure.Translation } _needsNewBlock = true; + _nextBlockFreq = nextFreq; } - public void MarkLabel(Operand label) + public void MarkLabel(Operand label, BasicBlockFrequency nextFreq = default) { + _nextBlockFreq = nextFreq; + if (_irLabels.TryGetValue(label, out BasicBlock nextBlock)) { nextBlock.Index = _irBlocks.Count; @@ -633,7 +638,7 @@ namespace ARMeilleure.Translation private void NextBlock(BasicBlock nextBlock) { - if (_irBlock != null && _irBlock.SuccessorCount == 0 && !EndsWithUnconditional(_irBlock)) + if (_irBlock?.SuccessorCount == 0 && !EndsWithUnconditional(_irBlock)) { _irBlock.AddSuccessor(nextBlock); @@ -646,8 +651,10 @@ namespace ARMeilleure.Translation } _irBlock = nextBlock; + _irBlock.Frequency = _nextBlockFreq; _needsNewBlock = false; + _nextBlockFreq = BasicBlockFrequency.Default; } private static bool EndsWithUnconditional(BasicBlock block) diff --git a/ARMeilleure/Translation/PTC/Ptc.cs b/ARMeilleure/Translation/PTC/Ptc.cs index ae884ab65..89aeed08a 100644 --- a/ARMeilleure/Translation/PTC/Ptc.cs +++ b/ARMeilleure/Translation/PTC/Ptc.cs @@ -21,7 +21,7 @@ namespace ARMeilleure.Translation.PTC { private const string HeaderMagic = "PTChd"; - private const int InternalVersion = 1535; //! To be incremented manually for each change to the ARMeilleure project. + private const int InternalVersion = 1549; //! To be incremented manually for each change to the ARMeilleure project. private const string ActualDir = "0"; private const string BackupDir = "1"; diff --git a/ARMeilleure/Translation/Translator.cs b/ARMeilleure/Translation/Translator.cs index 1a02bce0c..4448abd76 100644 --- a/ARMeilleure/Translation/Translator.cs +++ b/ARMeilleure/Translation/Translator.cs @@ -301,11 +301,11 @@ namespace ARMeilleure.Translation Operand lblNonZero = Label(); Operand lblExit = Label(); - context.BranchIfTrue(lblNonZero, count); + context.BranchIfTrue(lblNonZero, count, BasicBlockFrequency.Cold); Operand running = context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.CheckSynchronization))); - context.BranchIfTrue(lblExit, running); + context.BranchIfTrue(lblExit, running, BasicBlockFrequency.Cold); context.Return(Const(0L));