From e10ff17e2d87b818d340947367d2d1a4276a0d06 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Thu, 31 Jan 2019 09:43:24 -0300 Subject: [PATCH] Initial support for shader half float instructions (#507) --- Ryujinx.Graphics/Gal/Shader/GlslDecl.cs | 31 ++++- Ryujinx.Graphics/Gal/Shader/GlslDecompiler.cs | 19 ++- .../Gal/Shader/ShaderDecodeAlu.cs | 59 +++++++++ .../Gal/Shader/ShaderDecodeMem.cs | 125 ++++++++++-------- .../Gal/Shader/ShaderDecodeOpCode.cs | 43 ++++++ .../Gal/Shader/ShaderIrOperGpr.cs | 17 ++- .../Gal/Shader/ShaderOpCodeTable.cs | 4 +- .../Gal/Shader/ShaderRegisterSize.cs | 9 ++ Ryujinx.Graphics/Gal/ShaderDumper.cs | 2 +- 9 files changed, 244 insertions(+), 65 deletions(-) create mode 100644 Ryujinx.Graphics/Gal/Shader/ShaderRegisterSize.cs diff --git a/Ryujinx.Graphics/Gal/Shader/GlslDecl.cs b/Ryujinx.Graphics/Gal/Shader/GlslDecl.cs index b144cef30..43923da74 100644 --- a/Ryujinx.Graphics/Gal/Shader/GlslDecl.cs +++ b/Ryujinx.Graphics/Gal/Shader/GlslDecl.cs @@ -63,6 +63,7 @@ namespace Ryujinx.Graphics.Gal.Shader private Dictionary m_OutAttributes; private Dictionary m_Gprs; + private Dictionary m_GprsHalf; private Dictionary m_Preds; public IReadOnlyDictionary CbTextures => m_CbTextures; @@ -74,8 +75,9 @@ namespace Ryujinx.Graphics.Gal.Shader public IReadOnlyDictionary InAttributes => m_InAttributes; public IReadOnlyDictionary OutAttributes => m_OutAttributes; - public IReadOnlyDictionary Gprs => m_Gprs; - public IReadOnlyDictionary Preds => m_Preds; + public IReadOnlyDictionary Gprs => m_Gprs; + public IReadOnlyDictionary GprsHalf => m_GprsHalf; + public IReadOnlyDictionary Preds => m_Preds; public GalShaderType ShaderType { get; private set; } @@ -92,8 +94,9 @@ namespace Ryujinx.Graphics.Gal.Shader m_InAttributes = new Dictionary(); m_OutAttributes = new Dictionary(); - m_Gprs = new Dictionary(); - m_Preds = new Dictionary(); + m_Gprs = new Dictionary(); + m_GprsHalf = new Dictionary(); + m_Preds = new Dictionary(); } public GlslDecl(ShaderIrBlock[] Blocks, GalShaderType ShaderType, ShaderHeader Header) : this(ShaderType) @@ -146,8 +149,9 @@ namespace Ryujinx.Graphics.Gal.Shader Merge(Combined.m_Attributes, VpA.m_Attributes, VpB.m_Attributes); Merge(Combined.m_OutAttributes, VpA.m_OutAttributes, VpB.m_OutAttributes); - Merge(Combined.m_Gprs, VpA.m_Gprs, VpB.m_Gprs); - Merge(Combined.m_Preds, VpA.m_Preds, VpB.m_Preds); + Merge(Combined.m_Gprs, VpA.m_Gprs, VpB.m_Gprs); + Merge(Combined.m_GprsHalf, VpA.m_GprsHalf, VpB.m_GprsHalf); + Merge(Combined.m_Preds, VpA.m_Preds, VpB.m_Preds); //Merge input attributes. foreach (KeyValuePair KV in VpA.m_InAttributes) @@ -343,7 +347,20 @@ namespace Ryujinx.Graphics.Gal.Shader { string Name = GetGprName(Gpr.Index); - m_Gprs.TryAdd(Gpr.Index, new ShaderDeclInfo(Name, Gpr.Index)); + if (Gpr.RegisterSize == ShaderRegisterSize.Single) + { + m_Gprs.TryAdd(Gpr.Index, new ShaderDeclInfo(Name, Gpr.Index)); + } + else if (Gpr.RegisterSize == ShaderRegisterSize.Half) + { + Name += "_h" + Gpr.HalfPart; + + m_GprsHalf.TryAdd((Gpr.Index << 1) | Gpr.HalfPart, new ShaderDeclInfo(Name, Gpr.Index)); + } + else /* if (Gpr.RegisterSize == ShaderRegisterSize.Double) */ + { + throw new NotImplementedException("Double types are not supported."); + } } break; } diff --git a/Ryujinx.Graphics/Gal/Shader/GlslDecompiler.cs b/Ryujinx.Graphics/Gal/Shader/GlslDecompiler.cs index e6fb7ea14..77a451e95 100644 --- a/Ryujinx.Graphics/Gal/Shader/GlslDecompiler.cs +++ b/Ryujinx.Graphics/Gal/Shader/GlslDecompiler.cs @@ -364,6 +364,7 @@ namespace Ryujinx.Graphics.Gal.Shader private void PrintDeclGprs() { PrintDecls(Decl.Gprs); + PrintDecls(Decl.GprsHalf); } private void PrintDeclPreds() @@ -897,7 +898,23 @@ namespace Ryujinx.Graphics.Gal.Shader private string GetName(ShaderIrOperGpr Gpr) { - return Gpr.IsConst ? "0" : GetNameWithSwizzle(Decl.Gprs, Gpr.Index); + if (Gpr.IsConst) + { + return "0"; + } + + if (Gpr.RegisterSize == ShaderRegisterSize.Single) + { + return GetNameWithSwizzle(Decl.Gprs, Gpr.Index); + } + else if (Gpr.RegisterSize == ShaderRegisterSize.Half) + { + return GetNameWithSwizzle(Decl.GprsHalf, (Gpr.Index << 1) | Gpr.HalfPart); + } + else /* if (Gpr.RegisterSize == ShaderRegisterSize.Double) */ + { + throw new NotImplementedException("Double types are not supported."); + } } private string GetValue(ShaderIrOperImm Imm) diff --git a/Ryujinx.Graphics/Gal/Shader/ShaderDecodeAlu.cs b/Ryujinx.Graphics/Gal/Shader/ShaderDecodeAlu.cs index 6957e30b2..24c85c8f0 100644 --- a/Ryujinx.Graphics/Gal/Shader/ShaderDecodeAlu.cs +++ b/Ryujinx.Graphics/Gal/Shader/ShaderDecodeAlu.cs @@ -6,6 +6,14 @@ namespace Ryujinx.Graphics.Gal.Shader { static partial class ShaderDecode { + private enum HalfOutputType + { + PackedFp16, + Fp32, + MergeH0, + MergeH1 + } + public static void Bfe_C(ShaderIrBlock Block, long OpCode, int Position) { EmitBfe(Block, OpCode, ShaderOper.CR); @@ -144,6 +152,16 @@ namespace Ryujinx.Graphics.Gal.Shader EmitFsetp(Block, OpCode, ShaderOper.RR); } + public static void Hadd2_R(ShaderIrBlock Block, long OpCode, int Position) + { + EmitBinaryHalfOp(Block, OpCode, ShaderIrInst.Fadd); + } + + public static void Hmul2_R(ShaderIrBlock Block, long OpCode, int Position) + { + EmitBinaryHalfOp(Block, OpCode, ShaderIrInst.Fmul); + } + public static void Iadd_C(ShaderIrBlock Block, long OpCode, int Position) { EmitIadd(Block, OpCode, ShaderOper.CR); @@ -1041,6 +1059,47 @@ namespace Ryujinx.Graphics.Gal.Shader Block.AddNode(OpCode.PredNode(new ShaderIrAsg(P0Node, Op))); } + private static void EmitBinaryHalfOp(ShaderIrBlock Block, long OpCode, ShaderIrInst Inst) + { + bool AbsB = OpCode.Read(30); + bool NegB = OpCode.Read(31); + bool Sat = OpCode.Read(32); + bool AbsA = OpCode.Read(44); + + ShaderIrOperGpr[] VecA = OpCode.GprHalfVec8(); + ShaderIrOperGpr[] VecB = OpCode.GprHalfVec20(); + + HalfOutputType OutputType = (HalfOutputType)OpCode.Read(49, 3); + + int Elems = OutputType == HalfOutputType.PackedFp16 ? 2 : 1; + int First = OutputType == HalfOutputType.MergeH1 ? 1 : 0; + + for (int Index = First; Index < Elems; Index++) + { + ShaderIrNode OperA = GetAluFabs (VecA[Index], AbsA); + ShaderIrNode OperB = GetAluFabsFneg(VecB[Index], AbsB, NegB); + + ShaderIrNode Op = new ShaderIrOp(Inst, OperA, OperB); + + ShaderIrOperGpr Dst = GetHalfDst(OpCode, OutputType, Index); + + Block.AddNode(OpCode.PredNode(new ShaderIrAsg(Dst, GetAluFsat(Op, Sat)))); + } + } + + private static ShaderIrOperGpr GetHalfDst(long OpCode, HalfOutputType OutputType, int Index) + { + switch (OutputType) + { + case HalfOutputType.PackedFp16: return OpCode.GprHalf0(Index); + case HalfOutputType.Fp32: return OpCode.Gpr0(); + case HalfOutputType.MergeH0: return OpCode.GprHalf0(0); + case HalfOutputType.MergeH1: return OpCode.GprHalf0(1); + } + + throw new ArgumentException(nameof(OutputType)); + } + private static void EmitLop(ShaderIrBlock Block, long OpCode, ShaderOper Oper) { int SubOp = OpCode.Read(41, 3); diff --git a/Ryujinx.Graphics/Gal/Shader/ShaderDecodeMem.cs b/Ryujinx.Graphics/Gal/Shader/ShaderDecodeMem.cs index cd6559950..adcc47b95 100644 --- a/Ryujinx.Graphics/Gal/Shader/ShaderDecodeMem.cs +++ b/Ryujinx.Graphics/Gal/Shader/ShaderDecodeMem.cs @@ -6,8 +6,6 @@ namespace Ryujinx.Graphics.Gal.Shader { static partial class ShaderDecode { - private const int TempRegStart = 0x100; - private const int ____ = 0x0; private const int R___ = 0x1; private const int _G__ = 0x2; @@ -149,14 +147,18 @@ namespace Ryujinx.Graphics.Gal.Shader for (int Index = 0; Index < Coords.Length; Index++) { - Coords[Index] = OpCode.Gpr8(); + ShaderIrOperGpr CoordReg = OpCode.Gpr8(); - Coords[Index].Index += Index; + CoordReg.Index += Index; - if (Coords[Index].Index > ShaderIrOperGpr.ZRIndex) + if (!CoordReg.IsValidRegister) { - Coords[Index].Index = ShaderIrOperGpr.ZRIndex; + CoordReg.Index = ShaderIrOperGpr.ZRIndex; } + + Coords[Index] = ShaderIrOperGpr.MakeTemporary(Index); + + Block.AddNode(new ShaderIrAsg(Coords[Index], CoordReg)); } int ChMask = OpCode.Read(31, 0xf); @@ -167,17 +169,6 @@ namespace Ryujinx.Graphics.Gal.Shader ShaderIrInst Inst = GprHandle ? ShaderIrInst.Texb : ShaderIrInst.Texs; - for (int Ch = 0; Ch < 4; Ch++) - { - ShaderIrOperGpr Dst = new ShaderIrOperGpr(TempRegStart + Ch); - - ShaderIrMetaTex Meta = new ShaderIrMetaTex(Ch); - - ShaderIrOp Op = new ShaderIrOp(Inst, Coords[0], Coords[1], OperC, Meta); - - Block.AddNode(OpCode.PredNode(new ShaderIrAsg(Dst, Op))); - } - int RegInc = 0; for (int Ch = 0; Ch < 4; Ch++) @@ -187,18 +178,20 @@ namespace Ryujinx.Graphics.Gal.Shader continue; } - ShaderIrOperGpr Src = new ShaderIrOperGpr(TempRegStart + Ch); - ShaderIrOperGpr Dst = OpCode.Gpr0(); Dst.Index += RegInc++; - if (Dst.Index >= ShaderIrOperGpr.ZRIndex) + if (!Dst.IsValidRegister || Dst.IsConst) { continue; } - Block.AddNode(OpCode.PredNode(new ShaderIrAsg(Dst, Src))); + ShaderIrMetaTex Meta = new ShaderIrMetaTex(Ch); + + ShaderIrOp Op = new ShaderIrOp(Inst, Coords[0], Coords[1], OperC, Meta); + + Block.AddNode(OpCode.PredNode(new ShaderIrAsg(Dst, Op))); } } @@ -215,57 +208,81 @@ namespace Ryujinx.Graphics.Gal.Shader private static void EmitTexs(ShaderIrBlock Block, long OpCode, ShaderIrInst Inst) { //TODO: Support other formats. - ShaderIrNode OperA = OpCode.Gpr8(); - ShaderIrNode OperB = OpCode.Gpr20(); - ShaderIrNode OperC = OpCode.Imm13_36(); - int LutIndex; - LutIndex = OpCode.Gpr0 ().Index != ShaderIrOperGpr.ZRIndex ? 1 : 0; - LutIndex |= OpCode.Gpr28().Index != ShaderIrOperGpr.ZRIndex ? 2 : 0; + LutIndex = !OpCode.Gpr0().IsConst ? 1 : 0; + LutIndex |= !OpCode.Gpr28().IsConst ? 2 : 0; if (LutIndex == 0) { - //Both registers are RZ, color is not written anywhere. - //So, the intruction is basically a no-op. + //Both destination registers are RZ, do nothing. return; } - int ChMask = MaskLut[LutIndex, OpCode.Read(50, 7)]; + bool Fp16 = !OpCode.Read(59); - for (int Ch = 0; Ch < 4; Ch++) - { - ShaderIrOperGpr Dst = new ShaderIrOperGpr(TempRegStart + Ch); - - ShaderIrMetaTex Meta = new ShaderIrMetaTex(Ch); - - ShaderIrOp Op = new ShaderIrOp(Inst, OperA, OperB, OperC, Meta); - - Block.AddNode(OpCode.PredNode(new ShaderIrAsg(Dst, Op))); - } - - int RegInc = 0; + int DstIncrement = 0; ShaderIrOperGpr GetDst() { ShaderIrOperGpr Dst; - switch (LutIndex) + if (Fp16) { - case 1: Dst = OpCode.Gpr0(); break; - case 2: Dst = OpCode.Gpr28(); break; - case 3: Dst = (RegInc >> 1) != 0 - ? OpCode.Gpr28() - : OpCode.Gpr0 (); break; + //FP16 mode, two components are packed on the two + //halfs of a 32-bits register, as two half-float values. + int HalfPart = DstIncrement & 1; - default: throw new InvalidOperationException(); + switch (LutIndex) + { + case 1: Dst = OpCode.GprHalf0(HalfPart); break; + case 2: Dst = OpCode.GprHalf28(HalfPart); break; + case 3: Dst = (DstIncrement >> 1) != 0 + ? OpCode.GprHalf28(HalfPart) + : OpCode.GprHalf0(HalfPart); break; + + default: throw new InvalidOperationException(); + } + } + else + { + //32-bits mode, each component uses one register. + //Two components uses two consecutive registers. + switch (LutIndex) + { + case 1: Dst = OpCode.Gpr0(); break; + case 2: Dst = OpCode.Gpr28(); break; + case 3: Dst = (DstIncrement >> 1) != 0 + ? OpCode.Gpr28() + : OpCode.Gpr0(); break; + + default: throw new InvalidOperationException(); + } + + Dst.Index += DstIncrement & 1; } - Dst.Index += RegInc++ & 1; + DstIncrement++; return Dst; } + int ChMask = MaskLut[LutIndex, OpCode.Read(50, 7)]; + + if (ChMask == 0) + { + //All channels are disabled, do nothing. + return; + } + + ShaderIrNode OperC = OpCode.Imm13_36(); + + ShaderIrOperGpr Coord0 = ShaderIrOperGpr.MakeTemporary(0); + ShaderIrOperGpr Coord1 = ShaderIrOperGpr.MakeTemporary(1); + + Block.AddNode(new ShaderIrAsg(Coord0, OpCode.Gpr8())); + Block.AddNode(new ShaderIrAsg(Coord1, OpCode.Gpr20())); + for (int Ch = 0; Ch < 4; Ch++) { if (!IsChannelUsed(ChMask, Ch)) @@ -273,13 +290,15 @@ namespace Ryujinx.Graphics.Gal.Shader continue; } - ShaderIrOperGpr Src = new ShaderIrOperGpr(TempRegStart + Ch); + ShaderIrMetaTex Meta = new ShaderIrMetaTex(Ch); + + ShaderIrOp Op = new ShaderIrOp(Inst, Coord0, Coord1, OperC, Meta); ShaderIrOperGpr Dst = GetDst(); - if (Dst.Index != ShaderIrOperGpr.ZRIndex) + if (Dst.IsValidRegister && !Dst.IsConst) { - Block.AddNode(OpCode.PredNode(new ShaderIrAsg(Dst, Src))); + Block.AddNode(OpCode.PredNode(new ShaderIrAsg(Dst, Op))); } } } diff --git a/Ryujinx.Graphics/Gal/Shader/ShaderDecodeOpCode.cs b/Ryujinx.Graphics/Gal/Shader/ShaderDecodeOpCode.cs index 3af17cae8..6531138e7 100644 --- a/Ryujinx.Graphics/Gal/Shader/ShaderDecodeOpCode.cs +++ b/Ryujinx.Graphics/Gal/Shader/ShaderDecodeOpCode.cs @@ -75,6 +75,49 @@ namespace Ryujinx.Graphics.Gal.Shader return new ShaderIrOperGpr(OpCode.Read(28, 0xff)); } + private static ShaderIrOperGpr[] GprHalfVec8(this long OpCode) + { + return GetGprHalfVec2(OpCode.Read(8, 0xff), OpCode.Read(47, 3)); + } + + private static ShaderIrOperGpr[] GprHalfVec20(this long OpCode) + { + return GetGprHalfVec2(OpCode.Read(20, 0xff), OpCode.Read(28, 3)); + } + + private static ShaderIrOperGpr[] GetGprHalfVec2(int Gpr, int Mask) + { + if (Mask == 1) + { + //This value is used for FP32, the whole 32-bits register + //is used as each element on the vector. + return new ShaderIrOperGpr[] + { + new ShaderIrOperGpr(Gpr), + new ShaderIrOperGpr(Gpr) + }; + } + + ShaderIrOperGpr Low = new ShaderIrOperGpr(Gpr, 0); + ShaderIrOperGpr High = new ShaderIrOperGpr(Gpr, 1); + + return new ShaderIrOperGpr[] + { + (Mask & 1) != 0 ? High : Low, + (Mask & 2) != 0 ? High : Low + }; + } + + private static ShaderIrOperGpr GprHalf0(this long OpCode, int HalfPart) + { + return new ShaderIrOperGpr(OpCode.Read(0, 0xff), HalfPart); + } + + private static ShaderIrOperGpr GprHalf28(this long OpCode, int HalfPart) + { + return new ShaderIrOperGpr(OpCode.Read(28, 0xff), HalfPart); + } + private static ShaderIrOperImm Imm5_39(this long OpCode) { return new ShaderIrOperImm(OpCode.Read(39, 0x1f)); diff --git a/Ryujinx.Graphics/Gal/Shader/ShaderIrOperGpr.cs b/Ryujinx.Graphics/Gal/Shader/ShaderIrOperGpr.cs index 9dd196e69..b4a5cab4d 100644 --- a/Ryujinx.Graphics/Gal/Shader/ShaderIrOperGpr.cs +++ b/Ryujinx.Graphics/Gal/Shader/ShaderIrOperGpr.cs @@ -6,13 +6,26 @@ namespace Ryujinx.Graphics.Gal.Shader public bool IsConst => Index == ZRIndex; - public bool IsValidRegister => (Index <= ZRIndex); + public bool IsValidRegister => (uint)Index <= ZRIndex; - public int Index { get; set; } + public int Index { get; set; } + public int HalfPart { get; set; } + + public ShaderRegisterSize RegisterSize { get; private set; } public ShaderIrOperGpr(int Index) { this.Index = Index; + + RegisterSize = ShaderRegisterSize.Single; + } + + public ShaderIrOperGpr(int Index, int HalfPart) + { + this.Index = Index; + this.HalfPart = HalfPart; + + RegisterSize = ShaderRegisterSize.Half; } public static ShaderIrOperGpr MakeTemporary(int Index = 0) diff --git a/Ryujinx.Graphics/Gal/Shader/ShaderOpCodeTable.cs b/Ryujinx.Graphics/Gal/Shader/ShaderOpCodeTable.cs index d44659c75..177e36c3e 100644 --- a/Ryujinx.Graphics/Gal/Shader/ShaderOpCodeTable.cs +++ b/Ryujinx.Graphics/Gal/Shader/ShaderOpCodeTable.cs @@ -58,6 +58,8 @@ namespace Ryujinx.Graphics.Gal.Shader Set("010010111011xx", ShaderDecode.Fsetp_C); Set("0011011x1011xx", ShaderDecode.Fsetp_I); Set("010110111011xx", ShaderDecode.Fsetp_R); + Set("0101110100010x", ShaderDecode.Hadd2_R); + Set("0101110100001x", ShaderDecode.Hmul2_R); Set("0100110010111x", ShaderDecode.I2f_C); Set("0011100x10111x", ShaderDecode.I2f_I); Set("0101110010111x", ShaderDecode.I2f_R); @@ -118,7 +120,7 @@ namespace Ryujinx.Graphics.Gal.Shader Set("110000xxxx111x", ShaderDecode.Tex); Set("1101111010111x", ShaderDecode.Tex_B); Set("1101111101001x", ShaderDecode.Texq); - Set("1101100xxxxxxx", ShaderDecode.Texs); + Set("1101x00xxxxxxx", ShaderDecode.Texs); Set("1101101xxxxxxx", ShaderDecode.Tlds); Set("01011111xxxxxx", ShaderDecode.Vmad); Set("0100111xxxxxxx", ShaderDecode.Xmad_CR); diff --git a/Ryujinx.Graphics/Gal/Shader/ShaderRegisterSize.cs b/Ryujinx.Graphics/Gal/Shader/ShaderRegisterSize.cs new file mode 100644 index 000000000..eb37359bf --- /dev/null +++ b/Ryujinx.Graphics/Gal/Shader/ShaderRegisterSize.cs @@ -0,0 +1,9 @@ +namespace Ryujinx.Graphics.Gal.Shader +{ + enum ShaderRegisterSize + { + Half, + Single, + Double + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics/Gal/ShaderDumper.cs b/Ryujinx.Graphics/Gal/ShaderDumper.cs index d3bcbf0d8..21e92491a 100644 --- a/Ryujinx.Graphics/Gal/ShaderDumper.cs +++ b/Ryujinx.Graphics/Gal/ShaderDumper.cs @@ -39,7 +39,7 @@ namespace Ryujinx.Graphics.Gal ulong Instruction = 0; //Dump until a NOP instruction is found - while ((Instruction >> 52 & 0xfff8) != 0x50b0) + while ((Instruction >> 48 & 0xfff8) != 0x50b0) { uint Word0 = (uint)Memory.ReadInt32(Position + 0x50 + Offset + 0); uint Word1 = (uint)Memory.ReadInt32(Position + 0x50 + Offset + 4);