Ryujinx/ARMeilleure/CodeGen/RegisterAllocators/CopyResolver.cs
gdkchan a731ab3a2a Add a new JIT compiler for CPU code (#693)
* Start of the ARMeilleure project

* Refactoring around the old IRAdapter, now renamed to PreAllocator

* Optimize the LowestBitSet method

* Add CLZ support and fix CLS implementation

* Add missing Equals and GetHashCode overrides on some structs, misc small tweaks

* Implement the ByteSwap IR instruction, and some refactoring on the assembler

* Implement the DivideUI IR instruction and fix 64-bits IDIV

* Correct constant operand type on CSINC

* Move division instructions implementation to InstEmitDiv

* Fix destination type for the ConditionalSelect IR instruction

* Implement UMULH and SMULH, with new IR instructions

* Fix some issues with shift instructions

* Fix constant types for BFM instructions

* Fix up new tests using the new V128 struct

* Update tests

* Move DIV tests to a separate file

* Add support for calls, and some instructions that depends on them

* Start adding support for SIMD & FP types, along with some of the related ARM instructions

* Fix some typos and the divide instruction with FP operands

* Fix wrong method call on Clz_V

* Implement ARM FP & SIMD move instructions, Saddlv_V, and misc. fixes

* Implement SIMD logical instructions and more misc. fixes

* Fix PSRAD x86 instruction encoding, TRN, UABD and UABDL implementations

* Implement float conversion instruction, merge in LDj3SNuD fixes, and some other misc. fixes

* Implement SIMD shift instruction and fix Dup_V

* Add SCVTF and UCVTF (vector, fixed-point) variants to the opcode table

* Fix check with tolerance on tester

* Implement FP & SIMD comparison instructions, and some fixes

* Update FCVT (Scalar) encoding on the table to support the Half-float variants

* Support passing V128 structs, some cleanup on the register allocator, merge LDj3SNuD fixes

* Use old memory access methods, made a start on SIMD memory insts support, some fixes

* Fix float constant passed to functions, save and restore non-volatile XMM registers, other fixes

* Fix arguments count with struct return values, other fixes

* More instructions

* Misc. fixes and integrate LDj3SNuD fixes

* Update tests

* Add a faster linear scan allocator, unwinding support on windows, and other changes

* Update Ryujinx.HLE

* Update Ryujinx.Graphics

* Fix V128 return pointer passing, RCX is clobbered

* Update Ryujinx.Tests

* Update ITimeZoneService

* Stop using GetFunctionPointer as that can't be called from native code, misc. fixes and tweaks

* Use generic GetFunctionPointerForDelegate method and other tweaks

* Some refactoring on the code generator, assert on invalid operations and use a separate enum for intrinsics

* Remove some unused code on the assembler

* Fix REX.W prefix regression on float conversion instructions, add some sort of profiler

* Add hardware capability detection

* Fix regression on Sha1h and revert Fcm** changes

* Add SSE2-only paths on vector extract and insert, some refactoring on the pre-allocator

* Fix silly mistake introduced on last commit on CpuId

* Generate inline stack probes when the stack allocation is too large

* Initial support for the System-V ABI

* Support multiple destination operands

* Fix SSE2 VectorInsert8 path, and other fixes

* Change placement of XMM callee save and restore code to match other compilers

* Rename Dest to Destination and Inst to Instruction

* Fix a regression related to calls and the V128 type

* Add an extra space on comments to match code style

* Some refactoring

* Fix vector insert FP32 SSE2 path

* Port over the ARM32 instructions

* Avoid memory protection races on JIT Cache

* Another fix on VectorInsert FP32 (thanks to LDj3SNuD

* Float operands don't need to use the same register when VEX is supported

* Add a new register allocator, higher quality code for hot code (tier up), and other tweaks

* Some nits, small improvements on the pre allocator

* CpuThreadState is gone

* Allow changing CPU emulators with a config entry

* Add runtime identifiers on the ARMeilleure project

* Allow switching between CPUs through a config entry (pt. 2)

* Change win10-x64 to win-x64 on projects

* Update the Ryujinx project to use ARMeilleure

* Ensure that the selected register is valid on the hybrid allocator

* Allow exiting on returns to 0 (should fix test regression)

* Remove register assignments for most used variables on the hybrid allocator

* Do not use fixed registers as spill temp

* Add missing namespace and remove unneeded using

* Address PR feedback

* Fix types, etc

* Enable AssumeStrictAbiCompliance by default

* Ensure that Spill and Fill don't load or store any more than necessary
2019-08-08 21:56:22 +03:00

246 lines
8.5 KiB
C#

using ARMeilleure.IntermediateRepresentation;
using System;
using System.Collections.Generic;
namespace ARMeilleure.CodeGen.RegisterAllocators
{
class CopyResolver
{
private class ParallelCopy
{
private struct Copy
{
public Register Dest { get; }
public Register Source { get; }
public OperandType Type { get; }
public Copy(Register dest, Register source, OperandType type)
{
Dest = dest;
Source = source;
Type = type;
}
}
private List<Copy> _copies;
public int Count => _copies.Count;
public ParallelCopy()
{
_copies = new List<Copy>();
}
public void AddCopy(Register dest, Register source, OperandType type)
{
_copies.Add(new Copy(dest, source, type));
}
public void Sequence(List<Operation> sequence)
{
Dictionary<Register, Register> locations = new Dictionary<Register, Register>();
Dictionary<Register, Register> sources = new Dictionary<Register, Register>();
Dictionary<Register, OperandType> types = new Dictionary<Register, OperandType>();
Queue<Register> pendingQueue = new Queue<Register>();
Queue<Register> readyQueue = new Queue<Register>();
foreach (Copy copy in _copies)
{
locations[copy.Source] = copy.Source;
sources[copy.Dest] = copy.Source;
types[copy.Dest] = copy.Type;
pendingQueue.Enqueue(copy.Dest);
}
foreach (Copy copy in _copies)
{
// If the destination is not used anywhere, we can assign it immediately.
if (!locations.ContainsKey(copy.Dest))
{
readyQueue.Enqueue(copy.Dest);
}
}
while (pendingQueue.TryDequeue(out Register current))
{
Register copyDest;
Register origSource;
Register copySource;
while (readyQueue.TryDequeue(out copyDest))
{
origSource = sources[copyDest];
copySource = locations[origSource];
OperandType type = types[copyDest];
EmitCopy(sequence, GetRegister(copyDest, type), GetRegister(copySource, type));
locations[origSource] = copyDest;
if (origSource == copySource && sources.ContainsKey(origSource))
{
readyQueue.Enqueue(origSource);
}
}
copyDest = current;
origSource = sources[copyDest];
copySource = locations[origSource];
if (copyDest != copySource)
{
OperandType type = types[copyDest];
type = type.IsInteger() ? OperandType.I64 : OperandType.V128;
EmitXorSwap(sequence, GetRegister(copyDest, type), GetRegister(copySource, type));
locations[origSource] = copyDest;
Register swapOther = copySource;
if (copyDest != locations[sources[copySource]])
{
// Find the other swap destination register.
// To do that, we search all the pending registers, and pick
// the one where the copy source register is equal to the
// current destination register being processed (copyDest).
foreach (Register pending in pendingQueue)
{
// Is this a copy of pending <- copyDest?
if (copyDest == locations[sources[pending]])
{
swapOther = pending;
break;
}
}
}
// The value that was previously at "copyDest" now lives on
// "copySource" thanks to the swap, now we need to update the
// location for the next copy that is supposed to copy the value
// that used to live on "copyDest".
locations[sources[swapOther]] = copySource;
}
}
}
private static void EmitCopy(List<Operation> sequence, Operand x, Operand y)
{
sequence.Add(new Operation(Instruction.Copy, x, y));
}
private static void EmitXorSwap(List<Operation> sequence, Operand x, Operand y)
{
sequence.Add(new Operation(Instruction.BitwiseExclusiveOr, x, x, y));
sequence.Add(new Operation(Instruction.BitwiseExclusiveOr, y, y, x));
sequence.Add(new Operation(Instruction.BitwiseExclusiveOr, x, x, y));
}
}
private Queue<Operation> _fillQueue = new Queue<Operation>();
private Queue<Operation> _spillQueue = new Queue<Operation>();
private ParallelCopy _parallelCopy;
public bool HasCopy { get; private set; }
public CopyResolver()
{
_fillQueue = new Queue<Operation>();
_spillQueue = new Queue<Operation>();
_parallelCopy = new ParallelCopy();
}
public void AddSplit(LiveInterval left, LiveInterval right)
{
if (left.Local != right.Local)
{
throw new ArgumentException("Intervals of different variables are not allowed.");
}
OperandType type = left.Local.Type;
if (left.IsSpilled && !right.IsSpilled)
{
// Move from the stack to a register.
AddSplitFill(left, right, type);
}
else if (!left.IsSpilled && right.IsSpilled)
{
// Move from a register to the stack.
AddSplitSpill(left, right, type);
}
else if (!left.IsSpilled && !right.IsSpilled && left.Register != right.Register)
{
// Move from one register to another.
AddSplitCopy(left, right, type);
}
else if (left.SpillOffset != right.SpillOffset)
{
// This would be the stack-to-stack move case, but this is not supported.
throw new ArgumentException("Both intervals were spilled.");
}
}
private void AddSplitFill(LiveInterval left, LiveInterval right, OperandType type)
{
Operand register = GetRegister(right.Register, type);
Operand offset = new Operand(left.SpillOffset);
_fillQueue.Enqueue(new Operation(Instruction.Fill, register, offset));
HasCopy = true;
}
private void AddSplitSpill(LiveInterval left, LiveInterval right, OperandType type)
{
Operand offset = new Operand(right.SpillOffset);
Operand register = GetRegister(left.Register, type);
_spillQueue.Enqueue(new Operation(Instruction.Spill, null, offset, register));
HasCopy = true;
}
private void AddSplitCopy(LiveInterval left, LiveInterval right, OperandType type)
{
_parallelCopy.AddCopy(right.Register, left.Register, type);
HasCopy = true;
}
public Operation[] Sequence()
{
List<Operation> sequence = new List<Operation>();
while (_spillQueue.TryDequeue(out Operation spillOp))
{
sequence.Add(spillOp);
}
_parallelCopy.Sequence(sequence);
while (_fillQueue.TryDequeue(out Operation fillOp))
{
sequence.Add(fillOp);
}
return sequence.ToArray();
}
private static Operand GetRegister(Register reg, OperandType type)
{
return new Operand(reg.Index, reg.Type, type);
}
}
}