From 2502f1f07f31abe30a641d651c9640f3d81c2c0f Mon Sep 17 00:00:00 2001 From: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> Date: Thu, 24 Dec 2020 03:58:36 +0100 Subject: [PATCH] Free up memory allocated by Pools during any PPTC translations at boot time. (#1814) * Added support for offline invalidation, via PPTC, of low cq translations replaced by high cq translations; both on a single run and between runs. Added invalidation of .cache files in the event of reuse on a different user operating system. Added .info and .cache files invalidation in case of a failed stream decompression. Nits. * InternalVersion = 1712; * Nits. * Address comment. * Get rid of BinaryFormatter. Nits. * Move Ptc.LoadTranslations(). Nits. * Nits. * Fixed corner cases (in case backup copies have to be used). Added save logs. * Not core fixes. * Complement to the previous commit. Added load logs. Removed BinaryFormatter leftovers. * Add LoadTranslations log. * Nits. * Removed the search and management of LowCq overlapping functions. * Final increment of .info and .cache flags. * Nit. * Free up memory allocated by Pools during any PPTC translations at boot time. * Nit due to rebase. --- ARMeilleure/Common/BitMapPool.cs | 5 +- ARMeilleure/Common/ThreadStaticPool.cs | 25 +++++++- .../OperandHelper.cs | 10 +++- .../OperationHelper.cs | 7 ++- ARMeilleure/Translation/DirectCallStubs.cs | 2 + ARMeilleure/Translation/PTC/Ptc.cs | 57 ++++++++++++------- ARMeilleure/Translation/PTC/PtcProfiler.cs | 12 ++-- ARMeilleure/Translation/Translator.cs | 12 +++- 8 files changed, 93 insertions(+), 37 deletions(-) diff --git a/ARMeilleure/Common/BitMapPool.cs b/ARMeilleure/Common/BitMapPool.cs index caba23171..aac32d55f 100644 --- a/ARMeilleure/Common/BitMapPool.cs +++ b/ARMeilleure/Common/BitMapPool.cs @@ -1,6 +1,4 @@ -using System; - -namespace ARMeilleure.Common +namespace ARMeilleure.Common { static class BitMapPool { @@ -8,6 +6,7 @@ namespace ARMeilleure.Common { BitMap result = ThreadStaticPool.Instance.Allocate(); result.Reset(initialCapacity); + return result; } diff --git a/ARMeilleure/Common/ThreadStaticPool.cs b/ARMeilleure/Common/ThreadStaticPool.cs index cf3a7bb4d..3fce28ec0 100644 --- a/ARMeilleure/Common/ThreadStaticPool.cs +++ b/ARMeilleure/Common/ThreadStaticPool.cs @@ -5,12 +5,13 @@ using System.Threading; namespace ARMeilleure.Common { - internal class ThreadStaticPool where T : class, new() + class ThreadStaticPool where T : class, new() { private const int PoolSizeIncrement = 200; [ThreadStatic] private static ThreadStaticPool _instance; + public static ThreadStaticPool Instance { get @@ -19,6 +20,7 @@ namespace ARMeilleure.Common { PreparePool(0); // So that we can still use a pool when blindly initializing one. } + return _instance; } } @@ -33,9 +35,10 @@ namespace ARMeilleure.Common public static void PreparePool(int groupId) { // Prepare the pool for this thread, ideally using an existing one from the specified group. + if (_instance == null) { - Stack> pools = GetPools(groupId); + var pools = GetPools(groupId); lock (pools) { _instance = (pools.Count != 0) ? pools.Pop() : new ThreadStaticPool(PoolSizeIncrement * 2); @@ -46,15 +49,29 @@ namespace ARMeilleure.Common public static void ReturnPool(int groupId) { // Reset and return the pool for this thread to the specified group. - Stack> pools = GetPools(groupId); + + var pools = GetPools(groupId); lock (pools) { _instance.Clear(); pools.Push(_instance); + _instance = null; } } + public static void ResetPools() + { + // Resets any static references to the pools used by threads for each group, allowing them to be garbage collected. + + foreach (var pools in _pools.Values) + { + pools.Clear(); + } + + _pools.Clear(); + } + private T[] _pool; private int _poolUsed = -1; private int _poolSize; @@ -74,10 +91,12 @@ namespace ARMeilleure.Common public T Allocate() { int index = Interlocked.Increment(ref _poolUsed); + if (index >= _poolSize) { IncreaseSize(); } + return _pool[index]; } diff --git a/ARMeilleure/IntermediateRepresentation/OperandHelper.cs b/ARMeilleure/IntermediateRepresentation/OperandHelper.cs index c97023fce..f7381d869 100644 --- a/ARMeilleure/IntermediateRepresentation/OperandHelper.cs +++ b/ARMeilleure/IntermediateRepresentation/OperandHelper.cs @@ -90,10 +90,16 @@ namespace ARMeilleure.IntermediateRepresentation ThreadStaticPool.PreparePool(highCq ? 1 : 0); } - public static void ResetOperandPool(bool highCq) + public static void ReturnOperandPool(bool highCq) { ThreadStaticPool.ReturnPool(highCq ? 1 : 0); ThreadStaticPool.ReturnPool(highCq ? 1 : 0); } + + public static void ResetOperandPools() + { + ThreadStaticPool.ResetPools(); + ThreadStaticPool.ResetPools(); + } } -} \ No newline at end of file +} diff --git a/ARMeilleure/IntermediateRepresentation/OperationHelper.cs b/ARMeilleure/IntermediateRepresentation/OperationHelper.cs index 20c7d4efb..538bdac48 100644 --- a/ARMeilleure/IntermediateRepresentation/OperationHelper.cs +++ b/ARMeilleure/IntermediateRepresentation/OperationHelper.cs @@ -51,9 +51,14 @@ namespace ARMeilleure.IntermediateRepresentation ThreadStaticPool.PreparePool(highCq ? 1 : 0); } - public static void ResetOperationPool(bool highCq) + public static void ReturnOperationPool(bool highCq) { ThreadStaticPool.ReturnPool(highCq ? 1 : 0); } + + public static void ResetOperationPools() + { + ThreadStaticPool.ResetPools(); + } } } diff --git a/ARMeilleure/Translation/DirectCallStubs.cs b/ARMeilleure/Translation/DirectCallStubs.cs index df7ca16e7..57397d14b 100644 --- a/ARMeilleure/Translation/DirectCallStubs.cs +++ b/ARMeilleure/Translation/DirectCallStubs.cs @@ -34,6 +34,8 @@ namespace ARMeilleure.Translation _indirectCallStubPtr = Marshal.GetFunctionPointerForDelegate(GenerateIndirectCallStub(false)); _indirectTailCallStubPtr = Marshal.GetFunctionPointerForDelegate(GenerateIndirectCallStub(true)); + Translator.ResetPools(); + _initialized = true; } } diff --git a/ARMeilleure/Translation/PTC/Ptc.cs b/ARMeilleure/Translation/PTC/Ptc.cs index b5a92b976..344925598 100644 --- a/ARMeilleure/Translation/PTC/Ptc.cs +++ b/ARMeilleure/Translation/PTC/Ptc.cs @@ -8,13 +8,13 @@ using Ryujinx.Common.Logging; using System; using System.Buffers.Binary; using System.Collections.Concurrent; +using System.Collections.Generic; using System.Diagnostics; using System.IO; using System.IO.Compression; using System.Runtime.InteropServices; using System.Security.Cryptography; using System.Threading; -using System.Threading.Tasks; namespace ARMeilleure.Translation.PTC { @@ -664,35 +664,50 @@ namespace ARMeilleure.Translation.PTC ThreadPool.QueueUserWorkItem(TranslationLogger, profiledFuncsToTranslate.Count); + void TranslateFuncs() + { + while (profiledFuncsToTranslate.TryDequeue(out var item)) + { + ulong address = item.address; + + Debug.Assert(PtcProfiler.IsAddressInStaticCodeRange(address)); + + TranslatedFunction func = Translator.Translate(memory, jumpTable, address, item.mode, item.highCq); + + bool isAddressUnique = funcs.TryAdd(address, func); + + Debug.Assert(isAddressUnique, $"The address 0x{address:X16} is not unique."); + + Interlocked.Increment(ref _translateCount); + + if (State != PtcState.Enabled) + { + break; + } + } + } + int maxDegreeOfParallelism = (Environment.ProcessorCount * 3) / 4; - Parallel.ForEach(profiledFuncsToTranslate, new ParallelOptions { MaxDegreeOfParallelism = maxDegreeOfParallelism }, (item, state) => + List threads = new List(); + + for (int i = 0; i < maxDegreeOfParallelism; i++) { - ulong address = item.Key; + Thread thread = new Thread(TranslateFuncs); + thread.IsBackground = true; - Debug.Assert(PtcProfiler.IsAddressInStaticCodeRange(address)); + threads.Add(thread); + } - TranslatedFunction func = Translator.Translate(memory, jumpTable, address, item.Value.mode, item.Value.highCq); + threads.ForEach((thread) => thread.Start()); + threads.ForEach((thread) => thread.Join()); - bool isAddressUnique = funcs.TryAdd(address, func); - - Debug.Assert(isAddressUnique, $"The address 0x{address:X16} is not unique."); - - if (func.HighCq) - { - jumpTable.RegisterFunction(address, func); - } - - Interlocked.Increment(ref _translateCount); - - if (State != PtcState.Enabled) - { - state.Stop(); - } - }); + threads.Clear(); _loggerEvent.Set(); + Translator.ResetPools(); + PtcJumpTable.Initialize(jumpTable); PtcJumpTable.ReadJumpTable(jumpTable); diff --git a/ARMeilleure/Translation/PTC/PtcProfiler.cs b/ARMeilleure/Translation/PTC/PtcProfiler.cs index bc9814ecc..0def32c32 100644 --- a/ARMeilleure/Translation/PTC/PtcProfiler.cs +++ b/ARMeilleure/Translation/PTC/PtcProfiler.cs @@ -85,15 +85,17 @@ namespace ARMeilleure.Translation.PTC return address >= StaticCodeStart && address < StaticCodeStart + StaticCodeSize; } - internal static Dictionary GetProfiledFuncsToTranslate(ConcurrentDictionary funcs) + internal static ConcurrentQueue<(ulong address, ExecutionMode mode, bool highCq)> GetProfiledFuncsToTranslate(ConcurrentDictionary funcs) { - var profiledFuncsToTranslate = new Dictionary(ProfiledFuncs); + var profiledFuncsToTranslate = new ConcurrentQueue<(ulong address, ExecutionMode mode, bool highCq)>(); - foreach (ulong address in profiledFuncsToTranslate.Keys) + foreach (var profiledFunc in ProfiledFuncs) { - if (funcs.ContainsKey(address)) + ulong address = profiledFunc.Key; + + if (!funcs.ContainsKey(address)) { - profiledFuncsToTranslate.Remove(address); + profiledFuncsToTranslate.Enqueue((address, profiledFunc.Value.mode, profiledFunc.Value.highCq)); } } diff --git a/ARMeilleure/Translation/Translator.cs b/ARMeilleure/Translation/Translator.cs index d78f5e212..612f66479 100644 --- a/ARMeilleure/Translation/Translator.cs +++ b/ARMeilleure/Translation/Translator.cs @@ -148,6 +148,8 @@ namespace ARMeilleure.Translation ClearJitCache(); + ResetPools(); + _jumpTable.Dispose(); _jumpTable = null; } @@ -249,12 +251,18 @@ namespace ARMeilleure.Translation } } - ResetOperandPool(highCq); - ResetOperationPool(highCq); + ReturnOperandPool(highCq); + ReturnOperationPool(highCq); return new TranslatedFunction(func, funcSize, highCq); } + internal static void ResetPools() + { + ResetOperandPools(); + ResetOperationPools(); + } + private struct Range { public ulong Start { get; }