Remove cold methods from the CPU cache (#224)

* Remove unused tracing functionality from the CPU

* GetNsoExecutable -> GetExecutable

* Unsigned comparison

* Re-add cpu tracing

* Config change

* Remove cold methods from the translation cache on the cpu

* Replace lock with try lock, pass new ATranslatorCache instead of ATranslator

* Rebase fixups
This commit is contained in:
gdkchan 2018-09-19 17:07:56 -03:00 committed by GitHub
parent 99b2692425
commit 6d65e53664
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 318 additions and 232 deletions

View File

@ -2,8 +2,6 @@ using System.Runtime.Intrinsics.X86;
public static class AOptimizations
{
public static bool GenerateCallStack = true;
private static bool UseAllSseIfAvailable = true;
private static bool UseSseIfAvailable = true;

View File

@ -13,6 +13,8 @@ namespace ChocolArm64
{
private delegate long AA64Subroutine(AThreadState Register, AMemory Memory);
private const int MinCallCountForReJit = 250;
private AA64Subroutine ExecDelegate;
public static int StateArgIdx { get; private set; }
@ -32,8 +34,6 @@ namespace ChocolArm64
private bool NeedsReJit;
private int MinCallCountForReJit = 250;
public ATranslatedSub(DynamicMethod Method, List<ARegister> Params)
{
if (Method == null)

View File

@ -1,38 +1,24 @@
using ChocolArm64.Decoder;
using ChocolArm64.Events;
using ChocolArm64.Instruction;
using ChocolArm64.Memory;
using ChocolArm64.State;
using ChocolArm64.Translation;
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Reflection.Emit;
namespace ChocolArm64
{
public class ATranslator
{
private ConcurrentDictionary<long, ATranslatedSub> CachedSubs;
private ConcurrentDictionary<long, string> SymbolTable;
private ATranslatorCache Cache;
public event EventHandler<ACpuTraceEventArgs> CpuTrace;
public bool EnableCpuTrace { get; set; }
public ATranslator(IReadOnlyDictionary<long, string> SymbolTable = null)
public ATranslator()
{
CachedSubs = new ConcurrentDictionary<long, ATranslatedSub>();
if (SymbolTable != null)
{
this.SymbolTable = new ConcurrentDictionary<long, string>(SymbolTable);
}
else
{
this.SymbolTable = new ConcurrentDictionary<long, string>();
}
Cache = new ATranslatorCache();
}
internal void ExecuteSubroutine(AThread Thread, long Position)
@ -70,15 +56,10 @@ namespace ChocolArm64
{
if (EnableCpuTrace)
{
if (!SymbolTable.TryGetValue(Position, out string SubName))
{
SubName = string.Empty;
CpuTrace?.Invoke(this, new ACpuTraceEventArgs(Position));
}
CpuTrace?.Invoke(this, new ACpuTraceEventArgs(Position, SubName));
}
if (!CachedSubs.TryGetValue(Position, out ATranslatedSub Sub))
if (!Cache.TryGetSubroutine(Position, out ATranslatedSub Sub))
{
Sub = TranslateTier0(State, Memory, Position);
}
@ -93,37 +74,20 @@ namespace ChocolArm64
while (Position != 0 && State.Running);
}
internal bool TryGetCachedSub(AOpCode OpCode, out ATranslatedSub Sub)
{
if (OpCode.Emitter != AInstEmit.Bl)
{
Sub = null;
return false;
}
return TryGetCachedSub(((AOpCodeBImmAl)OpCode).Imm, out Sub);
}
internal bool TryGetCachedSub(long Position, out ATranslatedSub Sub)
{
return CachedSubs.TryGetValue(Position, out Sub);
}
internal bool HasCachedSub(long Position)
{
return CachedSubs.ContainsKey(Position);
return Cache.HasSubroutine(Position);
}
private ATranslatedSub TranslateTier0(AThreadState State, AMemory Memory, long Position)
{
ABlock Block = ADecoder.DecodeBasicBlock(State, this, Memory, Position);
ABlock Block = ADecoder.DecodeBasicBlock(State, Memory, Position);
ABlock[] Graph = new ABlock[] { Block };
string SubName = GetSubName(Position);
string SubName = GetSubroutineName(Position);
AILEmitterCtx Context = new AILEmitterCtx(this, Graph, Block, SubName);
AILEmitterCtx Context = new AILEmitterCtx(Cache, Graph, Block, SubName);
do
{
@ -135,7 +99,7 @@ namespace ChocolArm64
Subroutine.SetType(ATranslatedSubType.SubTier0);
CachedSubs.AddOrUpdate(Position, Subroutine, (Key, OldVal) => Subroutine);
Cache.AddOrUpdate(Position, Subroutine, Block.OpCodes.Count);
AOpCode LastOp = Block.GetLastOp();
@ -144,13 +108,11 @@ namespace ChocolArm64
private void TranslateTier1(AThreadState State, AMemory Memory, long Position)
{
(ABlock[] Graph, ABlock Root) Cfg = ADecoder.DecodeSubroutine(State, this, Memory, Position);
(ABlock[] Graph, ABlock Root) = ADecoder.DecodeSubroutine(Cache, State, Memory, Position);
string SubName = GetSubName(Position);
string SubName = GetSubroutineName(Position);
PropagateName(Cfg.Graph, SubName);
AILEmitterCtx Context = new AILEmitterCtx(this, Cfg.Graph, Cfg.Root, SubName);
AILEmitterCtx Context = new AILEmitterCtx(Cache, Graph, Root, SubName);
if (Context.CurrBlock.Position != Position)
{
@ -165,11 +127,11 @@ namespace ChocolArm64
//Mark all methods that calls this method for ReJiting,
//since we can now call it directly which is faster.
if (CachedSubs.TryGetValue(Position, out ATranslatedSub OldSub))
if (Cache.TryGetSubroutine(Position, out ATranslatedSub OldSub))
{
foreach (long CallerPos in OldSub.GetCallerPositions())
{
if (CachedSubs.TryGetValue(Position, out ATranslatedSub CallerSub))
if (Cache.TryGetSubroutine(Position, out ATranslatedSub CallerSub))
{
CallerSub.MarkForReJit();
}
@ -180,27 +142,24 @@ namespace ChocolArm64
Subroutine.SetType(ATranslatedSubType.SubTier1);
CachedSubs.AddOrUpdate(Position, Subroutine, (Key, OldVal) => Subroutine);
Cache.AddOrUpdate(Position, Subroutine, GetGraphInstCount(Graph));
}
private string GetSubName(long Position)
private string GetSubroutineName(long Position)
{
return SymbolTable.GetOrAdd(Position, $"Sub{Position:x16}");
return $"Sub{Position:x16}";
}
private void PropagateName(ABlock[] Graph, string Name)
private int GetGraphInstCount(ABlock[] Graph)
{
int Size = 0;
foreach (ABlock Block in Graph)
{
AOpCode LastOp = Block.GetLastOp();
Size += Block.OpCodes.Count;
}
if (LastOp != null &&
(LastOp.Emitter == AInstEmit.Bl ||
LastOp.Emitter == AInstEmit.Blr))
{
SymbolTable.TryAdd(LastOp.Position + 4, Name);
}
}
return Size;
}
}
}

View File

@ -0,0 +1,164 @@
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Runtime.CompilerServices;
using System.Threading;
namespace ChocolArm64
{
class ATranslatorCache
{
private const int MaxTotalSize = 2 * 1024 * 256;
private const int MaxTimeDelta = 30000;
private const int MinCallCountForUpdate = 1000;
private class CacheBucket
{
public ATranslatedSub Subroutine { get; private set; }
public LinkedListNode<long> Node { get; private set; }
public int CallCount { get; set; }
public int Size { get; private set; }
public int Timestamp { get; private set; }
public CacheBucket(ATranslatedSub Subroutine, LinkedListNode<long> Node, int Size)
{
this.Subroutine = Subroutine;
this.Size = Size;
UpdateNode(Node);
}
public void UpdateNode(LinkedListNode<long> Node)
{
this.Node = Node;
Timestamp = Environment.TickCount;
}
}
private ConcurrentDictionary<long, CacheBucket> Cache;
private LinkedList<long> SortedCache;
private int TotalSize;
public ATranslatorCache()
{
Cache = new ConcurrentDictionary<long, CacheBucket>();
SortedCache = new LinkedList<long>();
}
public void AddOrUpdate(long Position, ATranslatedSub Subroutine, int Size)
{
ClearCacheIfNeeded();
TotalSize += Size;
lock (SortedCache)
{
LinkedListNode<long> Node = SortedCache.AddLast(Position);
CacheBucket NewBucket = new CacheBucket(Subroutine, Node, Size);
Cache.AddOrUpdate(Position, NewBucket, (Key, Bucket) =>
{
TotalSize -= Bucket.Size;
SortedCache.Remove(Bucket.Node);
return NewBucket;
});
}
}
public bool HasSubroutine(long Position)
{
return Cache.ContainsKey(Position);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public bool TryGetSubroutine(long Position, out ATranslatedSub Subroutine)
{
if (Cache.TryGetValue(Position, out CacheBucket Bucket))
{
if (Bucket.CallCount++ > MinCallCountForUpdate)
{
if (Monitor.TryEnter(SortedCache))
{
try
{
Bucket.CallCount = 0;
SortedCache.Remove(Bucket.Node);
Bucket.UpdateNode(SortedCache.AddLast(Position));
}
finally
{
Monitor.Exit(SortedCache);
}
}
}
Subroutine = Bucket.Subroutine;
return true;
}
Subroutine = default(ATranslatedSub);
return false;
}
private void ClearCacheIfNeeded()
{
int Timestamp = Environment.TickCount;
while (TotalSize > MaxTotalSize)
{
lock (SortedCache)
{
LinkedListNode<long> Node = SortedCache.First;
if (Node == null)
{
break;
}
CacheBucket Bucket = Cache[Node.Value];
int TimeDelta = RingDelta(Bucket.Timestamp, Timestamp);
if ((uint)TimeDelta <= (uint)MaxTimeDelta)
{
break;
}
if (Cache.TryRemove(Node.Value, out Bucket))
{
TotalSize -= Bucket.Size;
SortedCache.Remove(Bucket.Node);
}
}
}
}
private static int RingDelta(int Old, int New)
{
if ((uint)New < (uint)Old)
{
return New + (~Old + 1);
}
else
{
return New - Old;
}
}
}
}

View File

@ -19,11 +19,7 @@ namespace ChocolArm64.Decoder
OpActivators = new ConcurrentDictionary<Type, OpActivator>();
}
public static ABlock DecodeBasicBlock(
AThreadState State,
ATranslator Translator,
AMemory Memory,
long Start)
public static ABlock DecodeBasicBlock(AThreadState State, AMemory Memory, long Start)
{
ABlock Block = new ABlock(Start);
@ -33,8 +29,8 @@ namespace ChocolArm64.Decoder
}
public static (ABlock[] Graph, ABlock Root) DecodeSubroutine(
ATranslatorCache Cache,
AThreadState State,
ATranslator Translator,
AMemory Memory,
long Start)
{
@ -79,7 +75,7 @@ namespace ChocolArm64.Decoder
{
if (Op.Emitter == AInstEmit.Bl)
{
HasCachedSub = Translator.HasCachedSub(Op.Imm);
HasCachedSub = Cache.HasSubroutine(Op.Imm);
}
else
{

View File

@ -6,12 +6,9 @@ namespace ChocolArm64.Events
{
public long Position { get; private set; }
public string SubName { get; private set; }
public ACpuTraceEventArgs(long Position, string SubName)
public ACpuTraceEventArgs(long Position)
{
this.Position = Position;
this.SubName = SubName;
}
}
}

View File

@ -35,14 +35,6 @@ namespace ChocolArm64.Instruction
{
AOpCodeBImmAl Op = (AOpCodeBImmAl)Context.CurrOp;
if (AOptimizations.GenerateCallStack)
{
Context.EmitLdarg(ATranslatedSub.StateArgIdx);
Context.EmitLdc_I8(Op.Imm);
Context.EmitPrivateCall(typeof(AThreadState), nameof(AThreadState.EnterMethod));
}
Context.EmitLdc_I(Op.Position + 4);
Context.EmitStint(AThreadState.LRIndex);
Context.EmitStoreState();
@ -80,14 +72,6 @@ namespace ChocolArm64.Instruction
{
AOpCodeBReg Op = (AOpCodeBReg)Context.CurrOp;
if (AOptimizations.GenerateCallStack)
{
Context.EmitLdarg(ATranslatedSub.StateArgIdx);
Context.EmitLdintzr(Op.Rn);
Context.EmitPrivateCall(typeof(AThreadState), nameof(AThreadState.EnterMethod));
}
Context.EmitLdc_I(Op.Position + 4);
Context.EmitStint(AThreadState.LRIndex);
Context.EmitStoreState();
@ -100,14 +84,6 @@ namespace ChocolArm64.Instruction
{
AOpCodeBReg Op = (AOpCodeBReg)Context.CurrOp;
if (AOptimizations.GenerateCallStack)
{
Context.EmitLdarg(ATranslatedSub.StateArgIdx);
Context.EmitLdintzr(Op.Rn);
Context.EmitPrivateCall(typeof(AThreadState), nameof(AThreadState.JumpMethod));
}
Context.EmitStoreState();
Context.EmitLdintzr(Op.Rn);
@ -129,13 +105,6 @@ namespace ChocolArm64.Instruction
public static void Ret(AILEmitterCtx Context)
{
if (AOptimizations.GenerateCallStack)
{
Context.EmitLdarg(ATranslatedSub.StateArgIdx);
Context.EmitPrivateCall(typeof(AThreadState), nameof(AThreadState.ExitMethod));
}
Context.EmitStoreState();
Context.EmitLdint(AThreadState.LRIndex);

View File

@ -1,6 +1,5 @@
using ChocolArm64.Events;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
@ -86,17 +85,10 @@ namespace ChocolArm64.State
public event EventHandler<AInstExceptionEventArgs> SvcCall;
public event EventHandler<AInstUndefinedEventArgs> Undefined;
private Stack<long> CallStack;
private static Stopwatch TickCounter;
private static double HostTickFreq;
public AThreadState()
{
CallStack = new Stack<long>();
}
static AThreadState()
{
HostTickFreq = 1.0 / Stopwatch.Frequency;
@ -153,27 +145,5 @@ namespace ChocolArm64.State
{
Undefined?.Invoke(this, new AInstUndefinedEventArgs(Position, RawOpCode));
}
internal void EnterMethod(long Position)
{
CallStack.Push(Position);
}
internal void ExitMethod()
{
CallStack.TryPop(out _);
}
internal void JumpMethod(long Position)
{
CallStack.TryPop(out _);
CallStack.Push(Position);
}
public long[] GetCallStack()
{
return CallStack.ToArray();
}
}
}

View File

@ -10,7 +10,7 @@ namespace ChocolArm64.Translation
{
class AILEmitterCtx
{
private ATranslator Translator;
private ATranslatorCache Cache;
private Dictionary<long, AILLabel> Labels;
@ -40,29 +40,14 @@ namespace ChocolArm64.Translation
private const int Tmp5Index = -5;
public AILEmitterCtx(
ATranslator Translator,
ATranslatorCache Cache,
ABlock[] Graph,
ABlock Root,
string SubName)
{
if (Translator == null)
{
throw new ArgumentNullException(nameof(Translator));
}
if (Graph == null)
{
throw new ArgumentNullException(nameof(Graph));
}
if (Root == null)
{
throw new ArgumentNullException(nameof(Root));
}
this.Translator = Translator;
this.Graph = Graph;
this.Root = Root;
this.Cache = Cache ?? throw new ArgumentNullException(nameof(Cache));
this.Graph = Graph ?? throw new ArgumentNullException(nameof(Graph));
this.Root = Root ?? throw new ArgumentNullException(nameof(Root));
Labels = new Dictionary<long, AILLabel>();
@ -147,7 +132,12 @@ namespace ChocolArm64.Translation
return false;
}
if (!Translator.TryGetCachedSub(CurrOp, out ATranslatedSub Sub))
if (CurrOp.Emitter != AInstEmit.Bl)
{
return false;
}
if (!Cache.TryGetSubroutine(((AOpCodeBImmAl)CurrOp).Imm, out ATranslatedSub Subroutine))
{
return false;
}
@ -157,7 +147,7 @@ namespace ChocolArm64.Translation
EmitLdarg(Index);
}
foreach (ARegister Reg in Sub.Params)
foreach (ARegister Reg in Subroutine.Params)
{
switch (Reg.Type)
{
@ -167,9 +157,9 @@ namespace ChocolArm64.Translation
}
}
EmitCall(Sub.Method);
EmitCall(Subroutine.Method);
Sub.AddCaller(Root.Position);
Subroutine.AddCaller(Root.Position);
return true;
}

View File

@ -55,8 +55,6 @@ namespace Ryujinx.HLE.HOS
private List<Executable> Executables;
private Dictionary<long, string> SymbolTable;
private long ImageBase;
private bool Disposed;
@ -122,8 +120,6 @@ namespace Ryujinx.HLE.HOS
return false;
}
MakeSymbolTable();
long MainStackTop = MemoryManager.CodeRegionEnd - KMemoryManager.PageSize;
long MainStackSize = 1 * 1024 * 1024;
@ -256,31 +252,6 @@ namespace Ryujinx.HLE.HOS
throw new UndefinedInstructionException(e.Position, e.RawOpCode);
}
private void MakeSymbolTable()
{
SymbolTable = new Dictionary<long, string>();
foreach (Executable Exe in Executables)
{
foreach (KeyValuePair<long, string> KV in Exe.SymbolTable)
{
SymbolTable.TryAdd(Exe.ImageBase + KV.Key, KV.Value);
}
}
}
private ATranslator GetTranslator()
{
if (Translator == null)
{
Translator = new ATranslator(SymbolTable);
Translator.CpuTrace += CpuTraceHandler;
}
return Translator;
}
public void EnableCpuTracing()
{
Translator.EnableCpuTrace = true;
@ -293,32 +264,53 @@ namespace Ryujinx.HLE.HOS
private void CpuTraceHandler(object sender, ACpuTraceEventArgs e)
{
string NsoName = string.Empty;
Executable Exe = GetExecutable(e.Position);
for (int Index = Executables.Count - 1; Index >= 0; Index--)
if (Exe == null)
{
if (e.Position >= Executables[Index].ImageBase)
{
NsoName = $"{(e.Position - Executables[Index].ImageBase):x16}";
break;
}
return;
}
Device.Log.PrintDebug(LogClass.Cpu, $"Executing at 0x{e.Position:x16} {e.SubName} {NsoName}");
if (!TryGetSubName(Exe, e.Position, out string SubName))
{
SubName = string.Empty;
}
long Offset = e.Position - Exe.ImageBase;
string ExeNameWithAddr = $"{Exe.Name}:0x{Offset:x8}";
Device.Log.PrintDebug(LogClass.Cpu, ExeNameWithAddr + " " + SubName);
}
private ATranslator GetTranslator()
{
if (Translator == null)
{
Translator = new ATranslator();
Translator.CpuTrace += CpuTraceHandler;
}
return Translator;
}
public void PrintStackTrace(AThreadState ThreadState)
{
long[] Positions = ThreadState.GetCallStack();
StringBuilder Trace = new StringBuilder();
Trace.AppendLine("Guest stack trace:");
foreach (long Position in Positions)
void AppendTrace(long Position)
{
if (!SymbolTable.TryGetValue(Position, out string SubName))
Executable Exe = GetExecutable(Position);
if (Exe == null)
{
return;
}
if (!TryGetSubName(Exe, Position, out string SubName))
{
SubName = $"Sub{Position:x16}";
}
@ -327,29 +319,77 @@ namespace Ryujinx.HLE.HOS
SubName = Demangler.Parse(SubName);
}
Trace.AppendLine(" " + SubName + " (" + GetNsoNameAndAddress(Position) + ")");
long Offset = Position - Exe.ImageBase;
string ExeNameWithAddr = $"{Exe.Name}:0x{Offset:x8}";
Trace.AppendLine(" " + ExeNameWithAddr + " " + SubName);
}
long FramePointer = (long)ThreadState.X29;
while (FramePointer != 0)
{
AppendTrace(Memory.ReadInt64(FramePointer + 8));
FramePointer = Memory.ReadInt64(FramePointer);
}
Device.Log.PrintInfo(LogClass.Cpu, Trace.ToString());
}
private string GetNsoNameAndAddress(long Position)
private bool TryGetSubName(Executable Exe, long Position, out string Name)
{
Position -= Exe.ImageBase;
int Left = 0;
int Right = Exe.SymbolTable.Count - 1;
while (Left <= Right)
{
int Size = Right - Left;
int Middle = Left + (Size >> 1);
ElfSym Symbol = Exe.SymbolTable[Middle];
long EndPosition = Symbol.Value + Symbol.Size;
if ((ulong)Position >= (ulong)Symbol.Value && (ulong)Position < (ulong)EndPosition)
{
Name = Symbol.Name;
return true;
}
if ((ulong)Position < (ulong)Symbol.Value)
{
Right = Middle - 1;
}
else
{
Left = Middle + 1;
}
}
Name = null;
return false;
}
private Executable GetExecutable(long Position)
{
string Name = string.Empty;
for (int Index = Executables.Count - 1; Index >= 0; Index--)
{
if (Position >= Executables[Index].ImageBase)
if ((ulong)Position >= (ulong)Executables[Index].ImageBase)
{
long Offset = Position - Executables[Index].ImageBase;
Name = $"{Executables[Index].Name}:{Offset:x8}";
break;
return Executables[Index];
}
}
return Name;
return null;
}
private void ThreadFinished(object sender, EventArgs e)

View File

@ -3,18 +3,21 @@ using Ryujinx.HLE.HOS;
using Ryujinx.HLE.HOS.Kernel;
using Ryujinx.HLE.Loaders.Executables;
using Ryujinx.HLE.Utilities;
using System;
using System.Collections.Generic;
using System.Collections.ObjectModel;
using System.IO;
using System.Linq;
namespace Ryujinx.HLE.Loaders
{
class Executable
{
private AMemory Memory;
private List<ElfDyn> Dynamic;
private Dictionary<long, string> m_SymbolTable;
public IReadOnlyDictionary<long, string> SymbolTable => m_SymbolTable;
public ReadOnlyCollection<ElfSym> SymbolTable;
public string Name { get; private set; }
@ -23,16 +26,12 @@ namespace Ryujinx.HLE.Loaders
public long ImageBase { get; private set; }
public long ImageEnd { get; private set; }
private AMemory Memory;
private KMemoryManager MemoryManager;
public Executable(IExecutable Exe, KMemoryManager MemoryManager, AMemory Memory, long ImageBase)
{
Dynamic = new List<ElfDyn>();
m_SymbolTable = new Dictionary<long, string>();
FilePath = Exe.FilePath;
if (FilePath != null)
@ -103,14 +102,18 @@ namespace Ryujinx.HLE.Loaders
long SymEntSize = GetFirstValue(ElfDynTag.DT_SYMENT);
List<ElfSym> Symbols = new List<ElfSym>();
while ((ulong)SymTblAddr < (ulong)StrTblAddr)
{
ElfSym Sym = GetSymbol(SymTblAddr, StrTblAddr);
m_SymbolTable.TryAdd(Sym.Value, Sym.Name);
Symbols.Add(Sym);
SymTblAddr += SymEntSize;
}
SymbolTable = Array.AsReadOnly(Symbols.OrderBy(x => x.Value).ToArray());
}
private ElfRel GetRelocation(long Position)