From aa021085cfab10ab63a7e6c2f9c9e29b4111525c Mon Sep 17 00:00:00 2001 From: gdkchan Date: Fri, 5 May 2023 11:20:20 -0300 Subject: [PATCH] Allow any shader SSBO constant buffer slot and offset (#2237) * Allow any shader SSBO constant buffer slot and offset * Fix slot value passed to SetUsedStorageBuffer on fallback case * Shader cache version * Ensure that the storage buffer source constant buffer offset is word aligned * Fix FirstBinding on GetUniformBufferDescriptors --- .../Engine/Compute/ComputeClass.cs | 7 +- .../Engine/Threed/StateUpdater.cs | 7 +- .../Shader/DiskCache/DiskCacheHostStorage.cs | 2 +- .../BufferDescriptor.cs | 18 ++- .../BufferUsageFlags.cs | 2 +- .../Translation/GlobalMemory.cs | 2 + .../Optimizations/GlobalToStorage.cs | 104 +++++++++++++----- .../Translation/Optimizations/Optimizer.cs | 1 - .../Translation/Rewriter.cs | 11 +- .../Translation/ShaderConfig.cs | 92 +++++++++++++++- 10 files changed, 194 insertions(+), 52 deletions(-) diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs b/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs index 2ac738fdf..4ec23c791 100644 --- a/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs +++ b/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs @@ -157,11 +157,8 @@ namespace Ryujinx.Graphics.Gpu.Engine.Compute { BufferDescriptor sb = info.SBuffers[index]; - ulong sbDescAddress = _channel.BufferManager.GetComputeUniformBufferAddress(0); - - int sbDescOffset = 0x310 + sb.Slot * 0x10; - - sbDescAddress += (ulong)sbDescOffset; + ulong sbDescAddress = _channel.BufferManager.GetComputeUniformBufferAddress(sb.SbCbSlot); + sbDescAddress += (ulong)sb.SbCbOffset * 4; SbDescriptor sbDescriptor = _channel.MemoryManager.Physical.Read(sbDescAddress); diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs index 00e09a310..1c9bf1d2a 100644 --- a/src/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs +++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs @@ -351,11 +351,8 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed { BufferDescriptor sb = info.SBuffers[index]; - ulong sbDescAddress = _channel.BufferManager.GetGraphicsUniformBufferAddress(stage, 0); - - int sbDescOffset = 0x110 + stage * 0x100 + sb.Slot * 0x10; - - sbDescAddress += (ulong)sbDescOffset; + ulong sbDescAddress = _channel.BufferManager.GetGraphicsUniformBufferAddress(stage, sb.SbCbSlot); + sbDescAddress += (ulong)sb.SbCbOffset * 4; SbDescriptor sbDescriptor = _channel.MemoryManager.Physical.Read(sbDescAddress); diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs index b182f2995..85233c0a3 100644 --- a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs +++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs @@ -22,7 +22,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache private const ushort FileFormatVersionMajor = 1; private const ushort FileFormatVersionMinor = 2; private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor; - private const uint CodeGenVersion = 4735; + private const uint CodeGenVersion = 2237; private const string SharedTocFileName = "shared.toc"; private const string SharedDataFileName = "shared.data"; diff --git a/src/Ryujinx.Graphics.Shader/BufferDescriptor.cs b/src/Ryujinx.Graphics.Shader/BufferDescriptor.cs index 4ce8a896d..410c1991d 100644 --- a/src/Ryujinx.Graphics.Shader/BufferDescriptor.cs +++ b/src/Ryujinx.Graphics.Shader/BufferDescriptor.cs @@ -5,13 +5,27 @@ namespace Ryujinx.Graphics.Shader // New fields should be added to the end of the struct to keep disk shader cache compatibility. public readonly int Binding; - public readonly int Slot; + public readonly byte Slot; + public readonly byte SbCbSlot; + public readonly ushort SbCbOffset; public BufferUsageFlags Flags; public BufferDescriptor(int binding, int slot) { Binding = binding; - Slot = slot; + Slot = (byte)slot; + SbCbSlot = 0; + SbCbOffset = 0; + + Flags = BufferUsageFlags.None; + } + + public BufferDescriptor(int binding, int slot, int sbCbSlot, int sbCbOffset) + { + Binding = binding; + Slot = (byte)slot; + SbCbSlot = (byte)sbCbSlot; + SbCbOffset = (ushort)sbCbOffset; Flags = BufferUsageFlags.None; } diff --git a/src/Ryujinx.Graphics.Shader/BufferUsageFlags.cs b/src/Ryujinx.Graphics.Shader/BufferUsageFlags.cs index 657546cb7..ab81d5756 100644 --- a/src/Ryujinx.Graphics.Shader/BufferUsageFlags.cs +++ b/src/Ryujinx.Graphics.Shader/BufferUsageFlags.cs @@ -6,7 +6,7 @@ namespace Ryujinx.Graphics.Shader /// Flags that indicate how a buffer will be used in a shader. /// [Flags] - public enum BufferUsageFlags + public enum BufferUsageFlags : byte { None = 0, diff --git a/src/Ryujinx.Graphics.Shader/Translation/GlobalMemory.cs b/src/Ryujinx.Graphics.Shader/Translation/GlobalMemory.cs index 774a128d8..a81d0fc4b 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/GlobalMemory.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/GlobalMemory.cs @@ -16,6 +16,8 @@ namespace Ryujinx.Graphics.Shader.Translation public const int UbeDescsSize = StorageDescSize * UbeMaxCount; public const int UbeFirstCbuf = 8; + public const int DriverReservedCb = 0; + public static bool UsesGlobalMemory(Instruction inst, StorageKind storageKind) { return (inst.IsAtomic() && storageKind == StorageKind.GlobalMemory) || diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs index 2a4070e0a..a83682445 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs @@ -8,6 +8,20 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations { static class GlobalToStorage { + private struct SearchResult + { + public static SearchResult NotFound => new SearchResult(-1, 0); + public bool Found => SbCbSlot != -1; + public int SbCbSlot { get; } + public int SbCbOffset { get; } + + public SearchResult(int sbCbSlot, int sbCbOffset) + { + SbCbSlot = sbCbSlot; + SbCbOffset = sbCbOffset; + } + } + public static void RunPass(BasicBlock block, ShaderConfig config, ref int sbUseMask, ref int ubeUseMask) { int sbStart = GetStorageBaseCbOffset(config.Stage); @@ -49,30 +63,33 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations { Operand source = operation.GetSource(0); - int storageIndex = SearchForStorageBase(block, source, sbStart, sbEnd); - - if (storageIndex >= 0) + var result = SearchForStorageBase(config, block, source); + if (!result.Found) { - // Storage buffers are implemented using global memory access. - // If we know from where the base address of the access is loaded, - // we can guess which storage buffer it is accessing. - // We can then replace the global memory access with a storage - // buffer access. - node = ReplaceGlobalWithStorage(block, node, config, storageIndex); + continue; } - else if (config.Stage == ShaderStage.Compute && operation.Inst == Instruction.LoadGlobal) + + if (config.Stage == ShaderStage.Compute && + operation.Inst == Instruction.LoadGlobal && + result.SbCbSlot == DriverReservedCb && + result.SbCbOffset >= UbeBaseOffset && + result.SbCbOffset < UbeBaseOffset + UbeDescsSize) { // Here we effectively try to replace a LDG instruction with LDC. // The hardware only supports a limited amount of constant buffers // so NVN "emulates" more constant buffers using global memory access. // Here we try to replace the global access back to a constant buffer // load. - storageIndex = SearchForStorageBase(block, source, ubeStart, ubeStart + ubeEnd); - - if (storageIndex >= 0) - { - node = ReplaceLdgWithLdc(node, config, storageIndex); - } + node = ReplaceLdgWithLdc(node, config, (result.SbCbOffset - UbeBaseOffset) / StorageDescSize); + } + else + { + // Storage buffers are implemented using global memory access. + // If we know from where the base address of the access is loaded, + // we can guess which storage buffer it is accessing. + // We can then replace the global memory access with a storage + // buffer access. + node = ReplaceGlobalWithStorage(block, node, config, config.GetSbSlot((byte)result.SbCbSlot, (ushort)result.SbCbOffset)); } } } @@ -159,7 +176,9 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations if (byteOffset == null) { - Operand baseAddrLow = Cbuf(0, baseAddressCbOffset); + (int sbCbSlot, int sbCbOffset) = config.GetSbCbInfo(storageIndex); + + Operand baseAddrLow = Cbuf(sbCbSlot, sbCbOffset); Operand baseAddrTrunc = Local(); Operand alignMask = Const(-config.GpuAccessor.QueryHostStorageBufferOffsetAlignment()); @@ -360,20 +379,20 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations return node; } - private static int SearchForStorageBase(BasicBlock block, Operand globalAddress, int sbStart, int sbEnd) + private static SearchResult SearchForStorageBase(ShaderConfig config, BasicBlock block, Operand globalAddress) { globalAddress = Utils.FindLastOperation(globalAddress, block); if (globalAddress.Type == OperandType.ConstantBuffer) { - return GetStorageIndex(globalAddress, sbStart, sbEnd); + return GetStorageIndex(config, globalAddress); } Operation operation = globalAddress.AsgOp as Operation; if (operation == null || operation.Inst != Instruction.Add) { - return -1; + return SearchResult.NotFound; } Operand src1 = operation.GetSource(0); @@ -382,34 +401,65 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations if ((src1.Type == OperandType.LocalVariable && src2.Type == OperandType.Constant) || (src2.Type == OperandType.LocalVariable && src1.Type == OperandType.Constant)) { + Operand baseAddr; + if (src1.Type == OperandType.LocalVariable) { - operation = Utils.FindLastOperation(src1, block).AsgOp as Operation; + baseAddr = Utils.FindLastOperation(src1, block); } else { - operation = Utils.FindLastOperation(src2, block).AsgOp as Operation; + baseAddr = Utils.FindLastOperation(src2, block); } + var result = GetStorageIndex(config, baseAddr); + if (result.Found) + { + return result; + } + + operation = baseAddr.AsgOp as Operation; + if (operation == null || operation.Inst != Instruction.Add) { - return -1; + return SearchResult.NotFound; } } + var selectedResult = SearchResult.NotFound; + for (int index = 0; index < operation.SourcesCount; index++) { Operand source = operation.GetSource(index); - int storageIndex = GetStorageIndex(source, sbStart, sbEnd); + var result = GetStorageIndex(config, source); - if (storageIndex != -1) + // If we already have a result, we give preference to the ones from + // the driver reserved constant buffer, as those are the ones that + // contains the base address. + if (result.Found && (!selectedResult.Found || result.SbCbSlot == GlobalMemory.DriverReservedCb)) { - return storageIndex; + selectedResult = result; } } - return -1; + return selectedResult; + } + + private static SearchResult GetStorageIndex(ShaderConfig config, Operand operand) + { + if (operand.Type == OperandType.ConstantBuffer) + { + int slot = operand.GetCbufSlot(); + int offset = operand.GetCbufOffset(); + + if ((offset & 3) == 0) + { + return new SearchResult(slot, offset); + } + } + + return SearchResult.NotFound; } private static int GetStorageIndex(Operand operand, int sbStart, int sbEnd) diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs index bae774ee4..16848bdc8 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs @@ -68,7 +68,6 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations } ConstantFolding.RunPass(operation); - Simplification.RunPass(operation); if (DestIsLocalVar(operation)) diff --git a/src/Ryujinx.Graphics.Shader/Translation/Rewriter.cs b/src/Ryujinx.Graphics.Shader/Translation/Rewriter.cs index 91e7ace1e..8167efc1d 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/Rewriter.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/Rewriter.cs @@ -110,9 +110,9 @@ namespace Ryujinx.Graphics.Shader.Translation Operand BindingRangeCheck(int cbOffset, out Operand baseAddrLow) { - baseAddrLow = Cbuf(0, cbOffset); - Operand baseAddrHigh = Cbuf(0, cbOffset + 1); - Operand size = Cbuf(0, cbOffset + 2); + baseAddrLow = Cbuf(DriverReservedCb, cbOffset); + Operand baseAddrHigh = Cbuf(DriverReservedCb, cbOffset + 1); + Operand size = Cbuf(DriverReservedCb, cbOffset + 2); Operand offset = PrependOperation(Instruction.Subtract, addrLow, baseAddrLow); Operand borrow = PrependOperation(Instruction.CompareLessU32, addrLow, baseAddrLow); @@ -134,9 +134,10 @@ namespace Ryujinx.Graphics.Shader.Translation sbUseMask &= ~(1 << slot); - config.SetUsedStorageBuffer(slot, isWrite); - int cbOffset = GetStorageCbOffset(config.Stage, slot); + slot = config.GetSbSlot(DriverReservedCb, (ushort)cbOffset); + + config.SetUsedStorageBuffer(slot, isWrite); Operand inRange = BindingRangeCheck(cbOffset, out Operand baseAddrLow); diff --git a/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs b/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs index 22f5a671d..ae60bcc6c 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs @@ -125,6 +125,9 @@ namespace Ryujinx.Graphics.Shader.Translation private readonly Dictionary _usedTextures; private readonly Dictionary _usedImages; + private readonly Dictionary _sbSlots; + private readonly Dictionary _sbSlotsReverse; + private BufferDescriptor[] _cachedConstantBufferDescriptors; private BufferDescriptor[] _cachedStorageBufferDescriptors; private TextureDescriptor[] _cachedTextureDescriptors; @@ -152,6 +155,9 @@ namespace Ryujinx.Graphics.Shader.Translation _usedTextures = new Dictionary(); _usedImages = new Dictionary(); + + _sbSlots = new Dictionary(); + _sbSlotsReverse = new Dictionary(); } public ShaderConfig( @@ -770,9 +776,8 @@ namespace Ryujinx.Graphics.Shader.Translation usedMask |= (int)GpuAccessor.QueryConstantBufferUse(); } - return _cachedConstantBufferDescriptors = GetBufferDescriptors( + return _cachedConstantBufferDescriptors = GetUniformBufferDescriptors( usedMask, - 0, UsedFeatures.HasFlag(FeatureFlags.CbIndexing), out _firstConstantBufferBinding, GpuAccessor.QueryBindingConstantBuffer); @@ -785,7 +790,7 @@ namespace Ryujinx.Graphics.Shader.Translation return _cachedStorageBufferDescriptors; } - return _cachedStorageBufferDescriptors = GetBufferDescriptors( + return _cachedStorageBufferDescriptors = GetStorageBufferDescriptors( _usedStorageBuffers, _usedStorageBuffersWrite, true, @@ -793,7 +798,48 @@ namespace Ryujinx.Graphics.Shader.Translation GpuAccessor.QueryBindingStorageBuffer); } - private static BufferDescriptor[] GetBufferDescriptors( + private static BufferDescriptor[] GetUniformBufferDescriptors(int usedMask, bool isArray, out int firstBinding, Func getBindingCallback) + { + firstBinding = 0; + int lastSlot = -1; + bool hasFirstBinding = false; + var descriptors = new BufferDescriptor[BitOperations.PopCount((uint)usedMask)]; + + for (int i = 0; i < descriptors.Length; i++) + { + int slot = BitOperations.TrailingZeroCount(usedMask); + + if (isArray) + { + // The next array entries also consumes bindings, even if they are unused. + for (int j = lastSlot + 1; j < slot; j++) + { + int binding = getBindingCallback(j); + + if (!hasFirstBinding) + { + firstBinding = binding; + hasFirstBinding = true; + } + } + } + + lastSlot = slot; + descriptors[i] = new BufferDescriptor(getBindingCallback(slot), slot); + + if (!hasFirstBinding) + { + firstBinding = descriptors[i].Binding; + hasFirstBinding = true; + } + + usedMask &= ~(1 << slot); + } + + return descriptors; + } + + private BufferDescriptor[] GetStorageBufferDescriptors( int usedMask, int writtenMask, bool isArray, @@ -827,7 +873,9 @@ namespace Ryujinx.Graphics.Shader.Translation lastSlot = slot; - descriptors[i] = new BufferDescriptor(getBindingCallback(slot), slot); + (int sbCbSlot, int sbCbOffset) = GetSbCbInfo(slot); + + descriptors[i] = new BufferDescriptor(getBindingCallback(slot), slot, sbCbSlot, sbCbOffset); if (!hasFirstBinding) { @@ -924,6 +972,40 @@ namespace Ryujinx.Graphics.Shader.Translation return FindDescriptorIndex(GetImageDescriptors(), texOp); } + public int GetSbSlot(byte sbCbSlot, ushort sbCbOffset) + { + int key = PackSbCbInfo(sbCbSlot, sbCbOffset); + + if (!_sbSlots.TryGetValue(key, out int slot)) + { + slot = _sbSlots.Count; + _sbSlots.Add(key, slot); + _sbSlotsReverse.Add(slot, key); + } + + return slot; + } + + public (int, int) GetSbCbInfo(int slot) + { + if (_sbSlotsReverse.TryGetValue(slot, out int key)) + { + return UnpackSbCbInfo(key); + } + + throw new ArgumentException($"Invalid slot {slot}.", nameof(slot)); + } + + private static int PackSbCbInfo(int sbCbSlot, int sbCbOffset) + { + return sbCbOffset | ((int)sbCbSlot << 16); + } + + private static (int, int) UnpackSbCbInfo(int key) + { + return ((byte)(key >> 16), (ushort)key); + } + public ShaderProgramInfo CreateProgramInfo(ShaderIdentification identification = ShaderIdentification.None) { return new ShaderProgramInfo(