From 8a7e25de71b89c81db0857566a49fa949f5d4ca0 Mon Sep 17 00:00:00 2001 From: riperiperi Date: Fri, 22 May 2020 00:17:25 +0100 Subject: [PATCH] Speed up buffer -> texture copies. No longer copies byte by byte. Fast path when formats are identical. --- .../Engine/MethodCopyBuffer.cs | 60 +++++++++++++--- Ryujinx.Graphics.Texture/BlockLinearLayout.cs | 17 +++++ Ryujinx.Graphics.Texture/Bpp12Pixel.cs | 11 +++ Ryujinx.Graphics.Texture/LayoutConverter.cs | 9 --- Ryujinx.Graphics.Texture/OffsetCalculator.cs | 71 ++++++++++++++++++- 5 files changed, 147 insertions(+), 21 deletions(-) create mode 100644 Ryujinx.Graphics.Texture/Bpp12Pixel.cs diff --git a/Ryujinx.Graphics.Gpu/Engine/MethodCopyBuffer.cs b/Ryujinx.Graphics.Gpu/Engine/MethodCopyBuffer.cs index 7244db324..5660df68a 100644 --- a/Ryujinx.Graphics.Gpu/Engine/MethodCopyBuffer.cs +++ b/Ryujinx.Graphics.Gpu/Engine/MethodCopyBuffer.cs @@ -1,11 +1,13 @@ using Ryujinx.Graphics.Gpu.State; using Ryujinx.Graphics.Texture; using System; +using System.Runtime.Intrinsics; namespace Ryujinx.Graphics.Gpu.Engine { partial class Methods { + /// /// Performs a buffer to buffer, or buffer to texture copy. /// @@ -56,19 +58,55 @@ namespace Ryujinx.Graphics.Gpu.Engine ulong srcBaseAddress = _context.MemoryManager.Translate(cbp.SrcAddress.Pack()); ulong dstBaseAddress = _context.MemoryManager.Translate(cbp.DstAddress.Pack()); - for (int y = 0; y < cbp.YCount; y++) - for (int x = 0; x < cbp.XCount; x++) + (int srcBaseOffset, int srcSize) = srcCalculator.GetRectangleRange(src.RegionX, src.RegionY, cbp.XCount, cbp.YCount); + (int dstBaseOffset, int dstSize) = dstCalculator.GetRectangleRange(dst.RegionX, dst.RegionY, cbp.XCount, cbp.YCount); + + ReadOnlySpan srcSpan = _context.PhysicalMemory.GetSpan(srcBaseAddress + (ulong)srcBaseOffset, srcSize); + Span dstSpan = new Span(_context.PhysicalMemory.GetSpan(dstBaseAddress + (ulong)dstBaseOffset, dstSize).ToArray()); + + bool completeSource = src.RegionX == 0 && src.RegionY == 0 && src.Width == cbp.XCount && src.Height == cbp.YCount; + bool completeDest = dst.RegionX == 0 && dst.RegionY == 0 && dst.Width == cbp.XCount && dst.Height == cbp.YCount; + + if (completeSource && completeDest && srcCalculator.LayoutMatches(dstCalculator)) { - int srcOffset = srcCalculator.GetOffset(src.RegionX + x, src.RegionY + y); - int dstOffset = dstCalculator.GetOffset(dst.RegionX + x, dst.RegionY + y); - - ulong srcAddress = srcBaseAddress + (ulong)srcOffset; - ulong dstAddress = dstBaseAddress + (ulong)dstOffset; - - ReadOnlySpan pixel = _context.PhysicalMemory.GetSpan(srcAddress, srcBpp); - - _context.PhysicalMemory.Write(dstAddress, pixel); + srcSpan.CopyTo(dstSpan); // No layout conversion has to be performed, just copy the data entirely. } + else + { + unsafe int Convert(Span dstSpan, ReadOnlySpan srcSpan) where T : unmanaged + { + fixed (byte* dstPtr = dstSpan, srcPtr = srcSpan) + { + for (int y = 0; y < cbp.YCount; y++) + { + srcCalculator.SetY(src.RegionY + y); + dstCalculator.SetY(dst.RegionY + y); + + for (int x = 0; x < cbp.XCount; x++) + { + int srcOffset = srcBaseOffset + srcCalculator.GetOffset(src.RegionX + x); + int dstOffset = dstBaseOffset + dstCalculator.GetOffset(dst.RegionX + x); + + *(T*)(dstPtr + dstOffset) = *(T*)(srcPtr + srcOffset); + } + } + } + return 1; + } + + int _ = srcBpp switch + { + 1 => Convert(dstSpan, srcSpan), + 2 => Convert(dstSpan, srcSpan), + 4 => Convert(dstSpan, srcSpan), + 8 => Convert(dstSpan, srcSpan), + 12 => Convert(dstSpan, srcSpan), + 16 => Convert>(dstSpan, srcSpan), + _ => throw new NotSupportedException($"Unable to copy ${srcBpp} bpp pixel format.") + }; + } + + _context.PhysicalMemory.Write(dstBaseAddress, dstSpan); } else { diff --git a/Ryujinx.Graphics.Texture/BlockLinearLayout.cs b/Ryujinx.Graphics.Texture/BlockLinearLayout.cs index 7a5ac75f3..c393cc474 100644 --- a/Ryujinx.Graphics.Texture/BlockLinearLayout.cs +++ b/Ryujinx.Graphics.Texture/BlockLinearLayout.cs @@ -102,6 +102,23 @@ namespace Ryujinx.Graphics.Texture return offset; } + public (int offset, int size) GetRectangleRange(int x, int y, int width, int height) + { + // Justification: + // The offset is a combination of separate x and y parts. + // Both components increase with input and never overlap bits. + // Therefore for each component, the minimum input value is the lowest that component can go. Opposite goes for maximum. + + int start = GetOffset(x, y, 0); + int end = GetOffset(x + width, y + height, 0); + return (start, (end - start) + _texBpp); + } + + public bool LayoutMatches(BlockLinearLayout other) + { + return _robSize == other._robSize && _sliceSize == other._sliceSize && _texBpp == other._texBpp && _bhMask == other._bhMask && _bdMask == other._bdMask; + } + // Functions for built in iteration. // Components of the offset can be updated separately, and combined to save some time. diff --git a/Ryujinx.Graphics.Texture/Bpp12Pixel.cs b/Ryujinx.Graphics.Texture/Bpp12Pixel.cs new file mode 100644 index 000000000..5a38259e2 --- /dev/null +++ b/Ryujinx.Graphics.Texture/Bpp12Pixel.cs @@ -0,0 +1,11 @@ +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.Texture +{ + [StructLayout(LayoutKind.Sequential, Pack = 1, Size = 12)] + public struct Bpp12Pixel + { + private ulong _elem1; + private uint _elem2; + } +} diff --git a/Ryujinx.Graphics.Texture/LayoutConverter.cs b/Ryujinx.Graphics.Texture/LayoutConverter.cs index f4c5cd441..516b325c2 100644 --- a/Ryujinx.Graphics.Texture/LayoutConverter.cs +++ b/Ryujinx.Graphics.Texture/LayoutConverter.cs @@ -1,6 +1,5 @@ using Ryujinx.Common; using System; -using System.Runtime.InteropServices; using System.Runtime.Intrinsics; using static Ryujinx.Graphics.Texture.BlockLinearConstants; @@ -8,13 +7,6 @@ namespace Ryujinx.Graphics.Texture { public static class LayoutConverter { - [StructLayout(LayoutKind.Sequential, Pack = 1, Size = 12)] - private struct Bpp12Pixel - { - private ulong _elem1; - private uint _elem2; - } - private const int HostStrideAlignment = 4; public static Span ConvertBlockLinearToLinear( @@ -288,7 +280,6 @@ namespace Ryujinx.Graphics.Texture { fixed (byte* outputBPtr = output, dataBPtr = data) { - T* outputPtr = (T*)outputBPtr, dataPtr = (T*)dataBPtr; for (int layer = 0; layer < layers; layer++) { int outBaseOffset = layer * sizeInfo.LayerSize + sizeInfo.GetMipOffset(level); diff --git a/Ryujinx.Graphics.Texture/OffsetCalculator.cs b/Ryujinx.Graphics.Texture/OffsetCalculator.cs index bb5d606ca..6cf646d9c 100644 --- a/Ryujinx.Graphics.Texture/OffsetCalculator.cs +++ b/Ryujinx.Graphics.Texture/OffsetCalculator.cs @@ -1,17 +1,22 @@ using Ryujinx.Common; - +using System.Runtime.CompilerServices; using static Ryujinx.Graphics.Texture.BlockLinearConstants; namespace Ryujinx.Graphics.Texture { public class OffsetCalculator { + private int _width; + private int _height; private int _stride; private bool _isLinear; private int _bytesPerPixel; private BlockLinearLayout _layoutConverter; + // Variables for built in iteration. + private int _yPart; + public OffsetCalculator( int width, int height, @@ -20,6 +25,8 @@ namespace Ryujinx.Graphics.Texture int gobBlocksInY, int bytesPerPixel) { + _width = width; + _height = height; _stride = stride; _isLinear = isLinear; _bytesPerPixel = bytesPerPixel; @@ -40,6 +47,18 @@ namespace Ryujinx.Graphics.Texture } } + public void SetY(int y) + { + if (_isLinear) + { + _yPart = y * _stride; + } + else + { + _layoutConverter.SetY(y); + } + } + public int GetOffset(int x, int y) { if (_isLinear) @@ -51,5 +70,55 @@ namespace Ryujinx.Graphics.Texture return _layoutConverter.GetOffset(x, y, 0); } } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public int GetOffset(int x) + { + if (_isLinear) + { + return x * _bytesPerPixel + _yPart; + } + else + { + return _layoutConverter.GetOffset(x); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public int GetOffsetWithLineOffset(int x) + { + if (_isLinear) + { + return x + _yPart; + } + else + { + return _layoutConverter.GetOffsetWithLineOffset(x); + } + } + + public (int offset, int size) GetRectangleRange(int x, int y, int width, int height) + { + if (_isLinear) + { + return (y * _stride + x, height * _stride); + } + else + { + return _layoutConverter.GetRectangleRange(x, y, width, height); + } + } + + public bool LayoutMatches(OffsetCalculator other) + { + if (_isLinear) + { + return other._isLinear && _width == other._width && _height == other._height && _stride == other._stride && _bytesPerPixel == other._bytesPerPixel; + } + else + { + return !other._isLinear && _layoutConverter.LayoutMatches(other._layoutConverter); + } + } } } \ No newline at end of file