Ryujinx/src/Ryujinx.Graphics.OpenGL/Image/FormatConverter.cs
jhorv 268c9aecf8
Texture loading: reduce memory allocations (#6623)
* rebase

* add methods Ryyjinx.Common EmbeddedResources and SteamUtils

* GAL changes - change SetData() methods and ThreadedTexture commands to use IMemoryOwner<byte> instead of SpanOrArray<byte>

* Ryujinx.Graphics.Texture: change texture conversion methods to return IMemoryOwner<byte> and allocate from ByteMemoryPool

* Ryujinx.Graphics.OpenGL: update ITexture and Texture-like types with SetData() methods to take IMemoryOwner<byte> instead of SpanOrArray<byte>

* Ryujinx.Graphics.Vulkan: update ITexture and Texture-like types with SetData() methods to take IMemoryOwner<byte> instead of SpanOrArray<byte>

* Ryujinx.Graphics.Gpu: update ITexture and Texture-like types with SetData() methods to take IMemoryOwner<byte> instead of SpanOrArray<byte>

* Remove now-unused SpanOrArray<T>

* post-rebase cleanup

* PixelConverter: remove unsafe modifier on safe methods, and remove one unnecessary cast

* use ByteMemoryPool.Rent() in GetWritableRegion() impls

* fix formatting, rename `ReadRentedMemory()` to `ReadFileToRentedMemory()``

* Texture.ConvertToHostCompatibleFormat(): dispose of `result` in Astc decode branch
2024-04-14 17:06:14 -03:00

154 lines
5.1 KiB
C#

using Ryujinx.Common.Memory;
using System;
using System.Buffers;
using System.Numerics;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace Ryujinx.Graphics.OpenGL.Image
{
static class FormatConverter
{
public unsafe static IMemoryOwner<byte> ConvertS8D24ToD24S8(ReadOnlySpan<byte> data)
{
IMemoryOwner<byte> outputMemory = ByteMemoryPool.Rent(data.Length);
Span<byte> output = outputMemory.Memory.Span;
int start = 0;
if (Avx2.IsSupported)
{
var mask = Vector256.Create(
(byte)3, (byte)0, (byte)1, (byte)2,
(byte)7, (byte)4, (byte)5, (byte)6,
(byte)11, (byte)8, (byte)9, (byte)10,
(byte)15, (byte)12, (byte)13, (byte)14,
(byte)19, (byte)16, (byte)17, (byte)18,
(byte)23, (byte)20, (byte)21, (byte)22,
(byte)27, (byte)24, (byte)25, (byte)26,
(byte)31, (byte)28, (byte)29, (byte)30);
int sizeAligned = data.Length & ~31;
fixed (byte* pInput = data, pOutput = output)
{
for (uint i = 0; i < sizeAligned; i += 32)
{
var dataVec = Avx.LoadVector256(pInput + i);
dataVec = Avx2.Shuffle(dataVec, mask);
Avx.Store(pOutput + i, dataVec);
}
}
start = sizeAligned;
}
else if (Ssse3.IsSupported)
{
var mask = Vector128.Create(
(byte)3, (byte)0, (byte)1, (byte)2,
(byte)7, (byte)4, (byte)5, (byte)6,
(byte)11, (byte)8, (byte)9, (byte)10,
(byte)15, (byte)12, (byte)13, (byte)14);
int sizeAligned = data.Length & ~15;
fixed (byte* pInput = data, pOutput = output)
{
for (uint i = 0; i < sizeAligned; i += 16)
{
var dataVec = Sse2.LoadVector128(pInput + i);
dataVec = Ssse3.Shuffle(dataVec, mask);
Sse2.Store(pOutput + i, dataVec);
}
}
start = sizeAligned;
}
var outSpan = MemoryMarshal.Cast<byte, uint>(output);
var dataSpan = MemoryMarshal.Cast<byte, uint>(data);
for (int i = start / sizeof(uint); i < dataSpan.Length; i++)
{
outSpan[i] = BitOperations.RotateLeft(dataSpan[i], 8);
}
return outputMemory;
}
public unsafe static byte[] ConvertD24S8ToS8D24(ReadOnlySpan<byte> data)
{
byte[] output = new byte[data.Length];
int start = 0;
if (Avx2.IsSupported)
{
var mask = Vector256.Create(
(byte)1, (byte)2, (byte)3, (byte)0,
(byte)5, (byte)6, (byte)7, (byte)4,
(byte)9, (byte)10, (byte)11, (byte)8,
(byte)13, (byte)14, (byte)15, (byte)12,
(byte)17, (byte)18, (byte)19, (byte)16,
(byte)21, (byte)22, (byte)23, (byte)20,
(byte)25, (byte)26, (byte)27, (byte)24,
(byte)29, (byte)30, (byte)31, (byte)28);
int sizeAligned = data.Length & ~31;
fixed (byte* pInput = data, pOutput = output)
{
for (uint i = 0; i < sizeAligned; i += 32)
{
var dataVec = Avx.LoadVector256(pInput + i);
dataVec = Avx2.Shuffle(dataVec, mask);
Avx.Store(pOutput + i, dataVec);
}
}
start = sizeAligned;
}
else if (Ssse3.IsSupported)
{
var mask = Vector128.Create(
(byte)1, (byte)2, (byte)3, (byte)0,
(byte)5, (byte)6, (byte)7, (byte)4,
(byte)9, (byte)10, (byte)11, (byte)8,
(byte)13, (byte)14, (byte)15, (byte)12);
int sizeAligned = data.Length & ~15;
fixed (byte* pInput = data, pOutput = output)
{
for (uint i = 0; i < sizeAligned; i += 16)
{
var dataVec = Sse2.LoadVector128(pInput + i);
dataVec = Ssse3.Shuffle(dataVec, mask);
Sse2.Store(pOutput + i, dataVec);
}
}
start = sizeAligned;
}
var outSpan = MemoryMarshal.Cast<byte, uint>(output);
var dataSpan = MemoryMarshal.Cast<byte, uint>(data);
for (int i = start / sizeof(uint); i < dataSpan.Length; i++)
{
outSpan[i] = BitOperations.RotateRight(dataSpan[i], 8);
}
return output;
}
}
}