diff --git a/Ryujinx.Graphics.GAL/Capabilities.cs b/Ryujinx.Graphics.GAL/Capabilities.cs index 246722f81..4a8b7c83b 100644 --- a/Ryujinx.Graphics.GAL/Capabilities.cs +++ b/Ryujinx.Graphics.GAL/Capabilities.cs @@ -4,13 +4,16 @@ namespace Ryujinx.Graphics.GAL { public bool SupportsAstcCompression { get; } + public int MaximumViewportDimensions { get; } public int StorageBufferOffsetAlignment { get; } public Capabilities( bool supportsAstcCompression, + int maximumViewportDimensions, int storageBufferOffsetAlignment) { SupportsAstcCompression = supportsAstcCompression; + MaximumViewportDimensions = maximumViewportDimensions; StorageBufferOffsetAlignment = storageBufferOffsetAlignment; } } diff --git a/Ryujinx.Graphics.Gpu/Engine/MethodClear.cs b/Ryujinx.Graphics.Gpu/Engine/MethodClear.cs index da6c94e57..43f8b25db 100644 --- a/Ryujinx.Graphics.Gpu/Engine/MethodClear.cs +++ b/Ryujinx.Graphics.Gpu/Engine/MethodClear.cs @@ -7,7 +7,7 @@ namespace Ryujinx.Graphics.Gpu.Engine { private void Clear(GpuState state, int argument) { - UpdateRenderTargetStateIfNeeded(state); + UpdateRenderTargetState(state, useControl: false); _textureManager.CommitGraphicsBindings(); @@ -49,6 +49,8 @@ namespace Ryujinx.Graphics.Gpu.Engine stencilValue, stencilMask); } + + UpdateRenderTargetState(state, useControl: true); } } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Engine/MethodDraw.cs b/Ryujinx.Graphics.Gpu/Engine/MethodDraw.cs index d69b9ea03..b7e8a64b2 100644 --- a/Ryujinx.Graphics.Gpu/Engine/MethodDraw.cs +++ b/Ryujinx.Graphics.Gpu/Engine/MethodDraw.cs @@ -10,7 +10,7 @@ namespace Ryujinx.Graphics.Gpu.Engine private int _firstIndex; private int _indexCount; - private bool _instancedHasState; + private bool _instancedDrawPending; private bool _instancedIndexed; private int _instancedFirstIndex; @@ -32,9 +32,9 @@ namespace Ryujinx.Graphics.Gpu.Engine if (instanced) { - if (!_instancedHasState) + if (!_instancedDrawPending) { - _instancedHasState = true; + _instancedDrawPending = true; _instancedIndexed = _drawIndexed; @@ -82,20 +82,22 @@ namespace Ryujinx.Graphics.Gpu.Engine private void DrawBegin(GpuState state, int argument) { - PrimitiveType type = (PrimitiveType)(argument & 0xffff); - - _context.Renderer.Pipeline.SetPrimitiveTopology(type.Convert()); - - PrimitiveType = type; - if ((argument & (1 << 26)) != 0) { _instanceIndex++; } else if ((argument & (1 << 27)) == 0) { + PerformDeferredDraws(); + _instanceIndex = 0; } + + PrimitiveType type = (PrimitiveType)(argument & 0xffff); + + _context.Renderer.Pipeline.SetPrimitiveTopology(type.Convert()); + + PrimitiveType = type; } private void SetIndexBufferCount(GpuState state, int argument) @@ -106,9 +108,9 @@ namespace Ryujinx.Graphics.Gpu.Engine public void PerformDeferredDraws() { // Perform any pending instanced draw. - if (_instancedHasState) + if (_instancedDrawPending) { - _instancedHasState = false; + _instancedDrawPending = false; if (_instancedIndexed) { diff --git a/Ryujinx.Graphics.Gpu/Engine/MethodUniformBufferUpdate.cs b/Ryujinx.Graphics.Gpu/Engine/MethodUniformBufferUpdate.cs index 12d44f511..43bab2433 100644 --- a/Ryujinx.Graphics.Gpu/Engine/MethodUniformBufferUpdate.cs +++ b/Ryujinx.Graphics.Gpu/Engine/MethodUniformBufferUpdate.cs @@ -1,4 +1,3 @@ -using Ryujinx.Graphics.Gpu.Memory; using Ryujinx.Graphics.Gpu.State; namespace Ryujinx.Graphics.Gpu.Engine @@ -9,11 +8,6 @@ namespace Ryujinx.Graphics.Gpu.Engine { var uniformBuffer = state.Get(MethodOffset.UniformBufferState); - if (_context.MemoryManager.Translate(uniformBuffer.Address.Pack() + (uint)uniformBuffer.Offset) == MemoryManager.BadAddress) - { - return; - } - _context.MemoryAccessor.Write(uniformBuffer.Address.Pack() + (uint)uniformBuffer.Offset, argument); state.SetUniformBufferOffset(uniformBuffer.Offset + 4); diff --git a/Ryujinx.Graphics.Gpu/Engine/Methods.cs b/Ryujinx.Graphics.Gpu/Engine/Methods.cs index 18fd7e708..5388c86d5 100644 --- a/Ryujinx.Graphics.Gpu/Engine/Methods.cs +++ b/Ryujinx.Graphics.Gpu/Engine/Methods.cs @@ -86,7 +86,14 @@ namespace Ryujinx.Graphics.Gpu.Engine UpdateShaderState(state); } - UpdateRenderTargetStateIfNeeded(state); + if (state.QueryModified(MethodOffset.RtColorState, + MethodOffset.RtDepthStencilState, + MethodOffset.RtControl, + MethodOffset.RtDepthStencilSize, + MethodOffset.RtDepthStencilEnable)) + { + UpdateRenderTargetState(state, useControl: true); + } if (state.QueryModified(MethodOffset.DepthTestEnable, MethodOffset.DepthWriteEnable, @@ -155,7 +162,7 @@ namespace Ryujinx.Graphics.Gpu.Engine UpdateFaceState(state); } - if (state.QueryModified(MethodOffset.RtColorMask)) + if (state.QueryModified(MethodOffset.RtColorMaskShared, MethodOffset.RtColorMask)) { UpdateRtColorMask(state); } @@ -210,19 +217,12 @@ namespace Ryujinx.Graphics.Gpu.Engine } } - private void UpdateRenderTargetStateIfNeeded(GpuState state) + private void UpdateRenderTargetState(GpuState state, bool useControl) { - if (state.QueryModified(MethodOffset.RtColorState, - MethodOffset.RtDepthStencilState, - MethodOffset.RtDepthStencilSize, - MethodOffset.RtDepthStencilEnable)) - { - UpdateRenderTargetState(state); - } - } + var rtControl = state.Get(MethodOffset.RtControl); + + int count = useControl ? rtControl.UnpackCount() : Constants.TotalRenderTargets; - private void UpdateRenderTargetState(GpuState state) - { var msaaMode = state.Get(MethodOffset.RtMsaaMode); int samplesInX = msaaMode.SamplesInX(); @@ -230,9 +230,11 @@ namespace Ryujinx.Graphics.Gpu.Engine for (int index = 0; index < Constants.TotalRenderTargets; index++) { - var colorState = state.Get(MethodOffset.RtColorState, index); + int rtIndex = useControl ? rtControl.UnpackPermutationIndex(index) : index; - if (!IsRtEnabled(colorState)) + var colorState = state.Get(MethodOffset.RtColorState, rtIndex); + + if (index >= count || !IsRtEnabled(colorState)) { _textureManager.SetRenderTargetColor(index, null); @@ -292,6 +294,8 @@ namespace Ryujinx.Graphics.Gpu.Engine private void UpdateViewportTransform(GpuState state) { + bool transformEnable = GetViewportTransformEnable(state); + bool flipY = (state.Get(MethodOffset.YControl) & 1) != 0; float yFlip = flipY ? -1 : 1; @@ -303,13 +307,35 @@ namespace Ryujinx.Graphics.Gpu.Engine var transform = state.Get(MethodOffset.ViewportTransform, index); var extents = state.Get (MethodOffset.ViewportExtents, index); - float x = transform.TranslateX - MathF.Abs(transform.ScaleX); - float y = transform.TranslateY - MathF.Abs(transform.ScaleY); + RectangleF region; - float width = transform.ScaleX * 2; - float height = transform.ScaleY * 2 * yFlip; + if (transformEnable) + { + float x = transform.TranslateX - MathF.Abs(transform.ScaleX); + float y = transform.TranslateY - MathF.Abs(transform.ScaleY); - RectangleF region = new RectangleF(x, y, width, height); + float width = transform.ScaleX * 2; + float height = transform.ScaleY * 2 * yFlip; + + region = new RectangleF(x, y, width, height); + } + else + { + // It's not possible to fully disable viewport transform, at least with the most + // common graphics APIs, but we can effectively disable it with a dummy transform. + // The transform is defined as: xw = (width / 2) * xndc + x + (width / 2) + // By setting x to -(width / 2), we effectively remove the translation. + // By setting the width to 2, we remove the scale since 2 / 2 = 1. + // Now, the only problem is the viewport clipping, that we also can't disable. + // To prevent the values from being clipped, we multiply (-1, -1, 2, 2) by + // the maximum supported viewport dimensions. + // This must be compensated on the shader, by dividing the vertex position + // by the maximum viewport dimensions. + float maxSize = (float)_context.Capabilities.MaximumViewportDimensions; + float halfMaxSize = (float)_context.Capabilities.MaximumViewportDimensions * 0.5f; + + region = new RectangleF(-halfMaxSize, -halfMaxSize, maxSize, maxSize * yFlip); + } viewports[index] = new Viewport( region, @@ -537,11 +563,13 @@ namespace Ryujinx.Graphics.Gpu.Engine private void UpdateRtColorMask(GpuState state) { + bool rtColorMaskShared = state.Get(MethodOffset.RtColorMaskShared); + uint[] componentMasks = new uint[Constants.TotalRenderTargets]; for (int index = 0; index < Constants.TotalRenderTargets; index++) { - var colorMask = state.Get(MethodOffset.RtColorMask, index); + var colorMask = state.Get(MethodOffset.RtColorMask, rtColorMaskShared ? 0 : index); uint componentMask = 0; @@ -634,7 +662,9 @@ namespace Ryujinx.Graphics.Gpu.Engine addressesArray[index] = baseAddress + shader.Offset; } - GraphicsShader gs = _shaderCache.GetGraphicsShader(addresses); + bool viewportTransformEnable = GetViewportTransformEnable(state); + + GraphicsShader gs = _shaderCache.GetGraphicsShader(addresses, !viewportTransformEnable); _vsUsesInstanceId = gs.Shader[0].Program.Info.UsesInstanceId; @@ -695,6 +725,14 @@ namespace Ryujinx.Graphics.Gpu.Engine _context.Renderer.Pipeline.BindProgram(gs.HostProgram); } + private bool GetViewportTransformEnable(GpuState state) + { + // FIXME: We should read ViewportTransformEnable, but it seems that some games writes 0 there? + // return state.Get(MethodOffset.ViewportTransformEnable) != 0; + + return true; + } + private static Target GetTarget(SamplerType type) { type &= ~(SamplerType.Indexed | SamplerType.Shadow); diff --git a/Ryujinx.Graphics.Gpu/NvGpuFifo.cs b/Ryujinx.Graphics.Gpu/NvGpuFifo.cs index 64947bf63..6e02f3915 100644 --- a/Ryujinx.Graphics.Gpu/NvGpuFifo.cs +++ b/Ryujinx.Graphics.Gpu/NvGpuFifo.cs @@ -105,6 +105,8 @@ namespace Ryujinx.Graphics.Gpu { case NvGpuFifoMeth.WaitForIdle: { + _context.Methods.PerformDeferredDraws(); + _context.Renderer.FlushPipelines(); break; diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs index 8e39662d1..6e3a42a23 100644 --- a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs +++ b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs @@ -69,7 +69,7 @@ namespace Ryujinx.Graphics.Gpu.Shader return cpShader; } - public GraphicsShader GetGraphicsShader(ShaderAddresses addresses) + public GraphicsShader GetGraphicsShader(ShaderAddresses addresses, bool dividePosXY) { bool isCached = _gpPrograms.TryGetValue(addresses, out List list); @@ -86,19 +86,28 @@ namespace Ryujinx.Graphics.Gpu.Shader GraphicsShader gpShaders = new GraphicsShader(); + TranslationFlags flags = + TranslationFlags.DebugMode | + TranslationFlags.Unspecialized; + + if (dividePosXY) + { + flags |= TranslationFlags.DividePosXY; + } + if (addresses.VertexA != 0) { - gpShaders.Shader[0] = TranslateGraphicsShader(addresses.Vertex, addresses.VertexA); + gpShaders.Shader[0] = TranslateGraphicsShader(flags, addresses.Vertex, addresses.VertexA); } else { - gpShaders.Shader[0] = TranslateGraphicsShader(addresses.Vertex); + gpShaders.Shader[0] = TranslateGraphicsShader(flags, addresses.Vertex); } - gpShaders.Shader[1] = TranslateGraphicsShader(addresses.TessControl); - gpShaders.Shader[2] = TranslateGraphicsShader(addresses.TessEvaluation); - gpShaders.Shader[3] = TranslateGraphicsShader(addresses.Geometry); - gpShaders.Shader[4] = TranslateGraphicsShader(addresses.Fragment); + gpShaders.Shader[1] = TranslateGraphicsShader(flags, addresses.TessControl); + gpShaders.Shader[2] = TranslateGraphicsShader(flags, addresses.TessEvaluation); + gpShaders.Shader[3] = TranslateGraphicsShader(flags, addresses.Geometry); + gpShaders.Shader[4] = TranslateGraphicsShader(flags, addresses.Fragment); BackpropQualifiers(gpShaders); @@ -218,7 +227,7 @@ namespace Ryujinx.Graphics.Gpu.Shader return new CachedShader(program, codeCached); } - private CachedShader TranslateGraphicsShader(ulong gpuVa, ulong gpuVaA = 0) + private CachedShader TranslateGraphicsShader(TranslationFlags flags, ulong gpuVa, ulong gpuVaA = 0) { if (gpuVa == 0) { @@ -227,10 +236,6 @@ namespace Ryujinx.Graphics.Gpu.Shader ShaderProgram program; - const TranslationFlags flags = - TranslationFlags.DebugMode | - TranslationFlags.Unspecialized; - int[] codeCached = null; if (gpuVaA != 0) @@ -345,7 +350,9 @@ namespace Ryujinx.Graphics.Gpu.Shader private ShaderCapabilities GetShaderCapabilities() { - return new ShaderCapabilities(_context.Capabilities.StorageBufferOffsetAlignment); + return new ShaderCapabilities( + _context.Capabilities.MaximumViewportDimensions, + _context.Capabilities.StorageBufferOffsetAlignment); } } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/State/GpuState.cs b/Ryujinx.Graphics.Gpu/State/GpuState.cs index 13f777c91..509f67152 100644 --- a/Ryujinx.Graphics.Gpu/State/GpuState.cs +++ b/Ryujinx.Graphics.Gpu/State/GpuState.cs @@ -94,11 +94,17 @@ namespace Ryujinx.Graphics.Gpu.State _backingMemory[(int)MethodOffset.ViewportExtents + index * 4 + 3] = 0x3F800000; } + // Viewport transform enable. + _backingMemory[(int)MethodOffset.ViewportTransformEnable] = 1; + // Default front stencil mask. _backingMemory[0x4e7] = 0xff; // Default color mask. - _backingMemory[(int)MethodOffset.RtColorMask] = 0x1111; + for (int index = 0; index < Constants.TotalRenderTargets; index++) + { + _backingMemory[(int)MethodOffset.RtColorMask + index] = 0x1111; + } } public void RegisterCallback(MethodOffset offset, int count, MethodCallback callback) diff --git a/Ryujinx.Graphics.Gpu/State/MethodOffset.cs b/Ryujinx.Graphics.Gpu/State/MethodOffset.cs index a560c257c..93cd6f063 100644 --- a/Ryujinx.Graphics.Gpu/State/MethodOffset.cs +++ b/Ryujinx.Graphics.Gpu/State/MethodOffset.cs @@ -29,8 +29,10 @@ namespace Ryujinx.Graphics.Gpu.State StencilBackMasks = 0x3d5, InvalidateTextures = 0x3dd, TextureBarrierTiled = 0x3df, + RtColorMaskShared = 0x3e4, RtDepthStencilState = 0x3f8, VertexAttribState = 0x458, + RtControl = 0x487, RtDepthStencilSize = 0x48a, SamplerIndex = 0x48d, DepthTestEnable = 0x4b3, @@ -62,6 +64,7 @@ namespace Ryujinx.Graphics.Gpu.State DepthBiasClamp = 0x61f, VertexBufferInstanced = 0x620, FaceState = 0x646, + ViewportTransformEnable = 0x64b, Clear = 0x674, RtColorMask = 0x680, ReportState = 0x6c0, diff --git a/Ryujinx.Graphics.Gpu/State/RtControl.cs b/Ryujinx.Graphics.Gpu/State/RtControl.cs new file mode 100644 index 000000000..4c6fbc343 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/State/RtControl.cs @@ -0,0 +1,17 @@ +namespace Ryujinx.Graphics.Gpu.State +{ + struct RtControl + { + public uint Packed; + + public int UnpackCount() + { + return (int)(Packed & 0xf); + } + + public int UnpackPermutationIndex(int index) + { + return (int)((Packed >> (4 + index * 3)) & 7); + } + } +} diff --git a/Ryujinx.Graphics.OpenGL/HwCapabilities.cs b/Ryujinx.Graphics.OpenGL/HwCapabilities.cs index 70112a3a9..671bd5b20 100644 --- a/Ryujinx.Graphics.OpenGL/HwCapabilities.cs +++ b/Ryujinx.Graphics.OpenGL/HwCapabilities.cs @@ -7,10 +7,12 @@ namespace Ryujinx.Graphics.OpenGL { private static Lazy _supportsAstcCompression = new Lazy(() => HasExtension("GL_KHR_texture_compression_astc_ldr")); + private static Lazy _maximumViewportDimensions = new Lazy(() => GetLimit(All.MaxViewportDims)); private static Lazy _storageBufferOffsetAlignment = new Lazy(() => GetLimit(All.ShaderStorageBufferOffsetAlignment)); public static bool SupportsAstcCompression => _supportsAstcCompression.Value; + public static int MaximumViewportDimensions => _maximumViewportDimensions.Value; public static int StorageBufferOffsetAlignment => _storageBufferOffsetAlignment.Value; private static bool HasExtension(string name) diff --git a/Ryujinx.Graphics.OpenGL/Renderer.cs b/Ryujinx.Graphics.OpenGL/Renderer.cs index c320d1504..3007fe5cc 100644 --- a/Ryujinx.Graphics.OpenGL/Renderer.cs +++ b/Ryujinx.Graphics.OpenGL/Renderer.cs @@ -63,6 +63,7 @@ namespace Ryujinx.Graphics.OpenGL { return new Capabilities( HwCapabilities.SupportsAstcCompression, + HwCapabilities.MaximumViewportDimensions, HwCapabilities.StorageBufferOffsetAlignment); } diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs index a5c8cc9a9..85a0001b0 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs @@ -142,6 +142,16 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl context.AppendLine(); } + if ((info.HelperFunctionsMask & HelperFunctionsMask.MultiplyHighS32) != 0) + { + AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighS32.glsl"); + } + + if ((info.HelperFunctionsMask & HelperFunctionsMask.MultiplyHighU32) != 0) + { + AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighU32.glsl"); + } + if ((info.HelperFunctionsMask & HelperFunctionsMask.Shuffle) != 0) { AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/Shuffle.glsl"); @@ -170,6 +180,9 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl public static void DeclareLocals(CodeGenContext context, StructuredProgramInfo info) { + context.AppendLine(GetVarTypeName(VariableType.S32) + " " + DefaultNames.DummyIntName + ";"); + context.AppendLine(GetVarTypeName(VariableType.U32) + " " + DefaultNames.DummyUintName + ";"); + foreach (AstOperand decl in info.Locals) { string name = context.OperandManager.DeclareLocal(decl); diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/DefaultNames.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/DefaultNames.cs index 4da38b2de..90853b9f6 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/DefaultNames.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/DefaultNames.cs @@ -22,6 +22,9 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl public const string LocalMemoryName = "local_mem"; public const string SharedMemoryName = "shared_mem"; + public const string DummyIntName = "dummyInt"; + public const string DummyUintName = "dummyUint"; + public const string UndefinedName = "undef"; } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/HelperFunctionNames.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/HelperFunctionNames.cs index f1540fbfb..21c435475 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/HelperFunctionNames.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/HelperFunctionNames.cs @@ -2,6 +2,9 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl { static class HelperFunctionNames { + public static string MultiplyHighS32 = "Helper_MultiplyHighS32"; + public static string MultiplyHighU32 = "Helper_MultiplyHighU32"; + public static string Shuffle = "Helper_Shuffle"; public static string ShuffleDown = "Helper_ShuffleDown"; public static string ShuffleUp = "Helper_ShuffleUp"; diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighS32.glsl b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighS32.glsl new file mode 100644 index 000000000..caad6f569 --- /dev/null +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighS32.glsl @@ -0,0 +1,7 @@ +int Helper_MultiplyHighS32(int x, int y) +{ + int msb; + int lsb; + imulExtended(x, y, msb, lsb); + return msb; +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighU32.glsl b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighU32.glsl new file mode 100644 index 000000000..617a925f6 --- /dev/null +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighU32.glsl @@ -0,0 +1,7 @@ +uint Helper_MultiplyHighU32(uint x, uint y) +{ + uint msb; + uint lsb; + umulExtended(x, y, msb, lsb); + return msb; +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs index f013ee10c..2b4ae7f19 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs @@ -93,6 +93,8 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions Add(Instruction.Minimum, InstType.CallBinary, "min"); Add(Instruction.MinimumU32, InstType.CallBinary, "min"); Add(Instruction.Multiply, InstType.OpBinaryCom, "*", 1); + Add(Instruction.MultiplyHighS32, InstType.CallBinary, HelperFunctionNames.MultiplyHighS32); + Add(Instruction.MultiplyHighU32, InstType.CallBinary, HelperFunctionNames.MultiplyHighU32); Add(Instruction.Negate, InstType.OpUnary, "-", 0); Add(Instruction.ReciprocalSquareRoot, InstType.CallUnary, "inversesqrt"); Add(Instruction.Return, InstType.OpNullary, "return"); diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs index 5c2ea85e6..ffed4c71c 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs @@ -1,6 +1,5 @@ using Ryujinx.Graphics.Shader.IntermediateRepresentation; using Ryujinx.Graphics.Shader.StructuredIr; -using Ryujinx.Graphics.Shader.Translation.Optimizations; using System; using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenHelper; diff --git a/Ryujinx.Graphics.Shader/Decoders/OpCodeTable.cs b/Ryujinx.Graphics.Shader/Decoders/OpCodeTable.cs index 599c674f3..0837e1858 100644 --- a/Ryujinx.Graphics.Shader/Decoders/OpCodeTable.cs +++ b/Ryujinx.Graphics.Shader/Decoders/OpCodeTable.cs @@ -57,6 +57,7 @@ namespace Ryujinx.Graphics.Shader.Decoders Set("0101110001011x", InstEmit.Fadd, typeof(OpCodeFArithReg)); Set("010010011xxxxx", InstEmit.Ffma, typeof(OpCodeFArithCbuf)); Set("0011001x1xxxxx", InstEmit.Ffma, typeof(OpCodeFArithImm)); + Set("000011xxxxxxxx", InstEmit.Ffma32i, typeof(OpCodeFArithImm32)); Set("010100011xxxxx", InstEmit.Ffma, typeof(OpCodeFArithRegCbuf)); Set("010110011xxxxx", InstEmit.Ffma, typeof(OpCodeFArithReg)); Set("0100110000110x", InstEmit.Flo, typeof(OpCodeAluCbuf)); @@ -102,12 +103,16 @@ namespace Ryujinx.Graphics.Shader.Decoders Set("0011100x11100x", InstEmit.I2I, typeof(OpCodeAluImm)); Set("0101110011100x", InstEmit.I2I, typeof(OpCodeAluReg)); Set("0100110000010x", InstEmit.Iadd, typeof(OpCodeAluCbuf)); - Set("0011100000010x", InstEmit.Iadd, typeof(OpCodeAluImm)); + Set("0011100x00010x", InstEmit.Iadd, typeof(OpCodeAluImm)); Set("0001110x0xxxxx", InstEmit.Iadd, typeof(OpCodeAluImm32)); Set("0101110000010x", InstEmit.Iadd, typeof(OpCodeAluReg)); Set("010011001100xx", InstEmit.Iadd3, typeof(OpCodeAluCbuf)); - Set("001110001100xx", InstEmit.Iadd3, typeof(OpCodeAluImm)); + Set("0011100x1100xx", InstEmit.Iadd3, typeof(OpCodeAluImm)); Set("010111001100xx", InstEmit.Iadd3, typeof(OpCodeAluReg)); + Set("010010100xxxxx", InstEmit.Imad, typeof(OpCodeAluCbuf)); + Set("0011010x0xxxxx", InstEmit.Imad, typeof(OpCodeAluImm)); + Set("010110100xxxxx", InstEmit.Imad, typeof(OpCodeAluReg)); + Set("010100100xxxxx", InstEmit.Imad, typeof(OpCodeAluRegCbuf)); Set("0100110000100x", InstEmit.Imnmx, typeof(OpCodeAluCbuf)); Set("0011100x00100x", InstEmit.Imnmx, typeof(OpCodeAluImm)); Set("0101110000100x", InstEmit.Imnmx, typeof(OpCodeAluReg)); diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitAlu.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitAlu.cs index 1d3a1101c..2a8f00ccb 100644 --- a/Ryujinx.Graphics.Shader/Instructions/InstEmitAlu.cs +++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitAlu.cs @@ -200,6 +200,50 @@ namespace Ryujinx.Graphics.Shader.Instructions // TODO: CC, X, corner cases } + public static void Imad(EmitterContext context) + { + OpCodeAlu op = (OpCodeAlu)context.CurrOp; + + bool signedA = context.CurrOp.RawOpCode.Extract(48); + bool signedB = context.CurrOp.RawOpCode.Extract(53); + bool high = context.CurrOp.RawOpCode.Extract(54); + + Operand srcA = GetSrcA(context); + Operand srcB = GetSrcB(context); + Operand srcC = GetSrcC(context); + + Operand res; + + if (high) + { + if (signedA && signedB) + { + res = context.MultiplyHighS32(srcA, srcB); + } + else + { + res = context.MultiplyHighU32(srcA, srcB); + + if (signedA) + { + res = context.IAdd(res, context.IMultiply(srcB, context.ShiftRightS32(srcA, Const(31)))); + } + else if (signedB) + { + res = context.IAdd(res, context.IMultiply(srcA, context.ShiftRightS32(srcB, Const(31)))); + } + } + } + else + { + res = context.IMultiply(srcA, srcB); + } + + res = context.IAdd(res, srcC); + + context.Copy(GetDest(context), res); + } + public static void Imnmx(EmitterContext context) { OpCodeAlu op = (OpCodeAlu)context.CurrOp; diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitFArith.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitFArith.cs index 3b8d7305b..63d1efcbc 100644 --- a/Ryujinx.Graphics.Shader/Instructions/InstEmitFArith.cs +++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitFArith.cs @@ -59,6 +59,26 @@ namespace Ryujinx.Graphics.Shader.Instructions SetFPZnFlags(context, dest, op.SetCondCode); } + public static void Ffma32i(EmitterContext context) + { + IOpCodeFArith op = (IOpCodeFArith)context.CurrOp; + + bool saturate = op.RawOpCode.Extract(55); + bool negateA = op.RawOpCode.Extract(56); + bool negateC = op.RawOpCode.Extract(57); + + Operand srcA = context.FPNegate(GetSrcA(context), negateA); + Operand srcC = context.FPNegate(GetDest(context), negateC); + + Operand srcB = GetSrcB(context); + + Operand dest = GetDest(context); + + context.Copy(dest, context.FPSaturate(context.FPFusedMultiplyAdd(srcA, srcB, srcC), saturate)); + + SetFPZnFlags(context, dest, op.SetCondCode); + } + public static void Fmnmx(EmitterContext context) { IOpCodeFArith op = (IOpCodeFArith)context.CurrOp; diff --git a/Ryujinx.Graphics.Shader/IntermediateRepresentation/Instruction.cs b/Ryujinx.Graphics.Shader/IntermediateRepresentation/Instruction.cs index 6acaa490c..5f0407c28 100644 --- a/Ryujinx.Graphics.Shader/IntermediateRepresentation/Instruction.cs +++ b/Ryujinx.Graphics.Shader/IntermediateRepresentation/Instruction.cs @@ -84,6 +84,8 @@ namespace Ryujinx.Graphics.Shader.IntermediateRepresentation Minimum, MinimumU32, Multiply, + MultiplyHighS32, + MultiplyHighU32, Negate, PackDouble2x32, PackHalf2x16, diff --git a/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj b/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj index e10d1edaf..8715dad31 100644 --- a/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj +++ b/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj @@ -1,6 +1,8 @@ + + diff --git a/Ryujinx.Graphics.Shader/ShaderCapabilities.cs b/Ryujinx.Graphics.Shader/ShaderCapabilities.cs index 939c7c1de..b5b459a8d 100644 --- a/Ryujinx.Graphics.Shader/ShaderCapabilities.cs +++ b/Ryujinx.Graphics.Shader/ShaderCapabilities.cs @@ -2,14 +2,18 @@ namespace Ryujinx.Graphics.Shader { public struct ShaderCapabilities { - private static readonly ShaderCapabilities _default = new ShaderCapabilities(16); + private static readonly ShaderCapabilities _default = new ShaderCapabilities(32768, 16); public static ShaderCapabilities Default => _default; + public int MaximumViewportDimensions { get; } public int StorageBufferOffsetAlignment { get; } - public ShaderCapabilities(int storageBufferOffsetAlignment) + public ShaderCapabilities( + int maximumViewportDimensions, + int storageBufferOffsetAlignment) { + MaximumViewportDimensions = maximumViewportDimensions; StorageBufferOffsetAlignment = storageBufferOffsetAlignment; } } diff --git a/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs b/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs index e2eee78d9..53367fce1 100644 --- a/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs +++ b/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs @@ -5,10 +5,12 @@ namespace Ryujinx.Graphics.Shader.StructuredIr [Flags] enum HelperFunctionsMask { - Shuffle = 1 << 0, - ShuffleDown = 1 << 1, - ShuffleUp = 1 << 2, - ShuffleXor = 1 << 3, - SwizzleAdd = 1 << 4 + MultiplyHighS32 = 1 << 0, + MultiplyHighU32 = 1 << 1, + Shuffle = 1 << 2, + ShuffleDown = 1 << 3, + ShuffleUp = 1 << 4, + ShuffleXor = 1 << 5, + SwizzleAdd = 1 << 6 } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs b/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs index d1874f50f..9614b6598 100644 --- a/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs +++ b/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs @@ -102,6 +102,8 @@ namespace Ryujinx.Graphics.Shader.StructuredIr Add(Instruction.Minimum, VariableType.Scalar, VariableType.Scalar, VariableType.Scalar); Add(Instruction.MinimumU32, VariableType.U32, VariableType.U32, VariableType.U32); Add(Instruction.Multiply, VariableType.Scalar, VariableType.Scalar, VariableType.Scalar); + Add(Instruction.MultiplyHighS32, VariableType.S32, VariableType.S32, VariableType.S32); + Add(Instruction.MultiplyHighU32, VariableType.U32, VariableType.U32, VariableType.U32); Add(Instruction.Negate, VariableType.Scalar, VariableType.Scalar); Add(Instruction.PackHalf2x16, VariableType.U32, VariableType.F32, VariableType.F32); Add(Instruction.ReciprocalSquareRoot, VariableType.Scalar, VariableType.Scalar); diff --git a/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs b/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs index a85fbae3d..504dc3867 100644 --- a/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs +++ b/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs @@ -171,6 +171,12 @@ namespace Ryujinx.Graphics.Shader.StructuredIr // decide which helper functions are needed on the final generated code. switch (operation.Inst) { + case Instruction.MultiplyHighS32: + context.Info.HelperFunctionsMask |= HelperFunctionsMask.MultiplyHighS32; + break; + case Instruction.MultiplyHighU32: + context.Info.HelperFunctionsMask |= HelperFunctionsMask.MultiplyHighU32; + break; case Instruction.Shuffle: context.Info.HelperFunctionsMask |= HelperFunctionsMask.Shuffle; break; diff --git a/Ryujinx.Graphics.Shader/Translation/EmitterContext.cs b/Ryujinx.Graphics.Shader/Translation/EmitterContext.cs index 3995d4306..7ba7b697b 100644 --- a/Ryujinx.Graphics.Shader/Translation/EmitterContext.cs +++ b/Ryujinx.Graphics.Shader/Translation/EmitterContext.cs @@ -11,18 +11,25 @@ namespace Ryujinx.Graphics.Shader.Translation public Block CurrBlock { get; set; } public OpCode CurrOp { get; set; } - private ShaderStage _stage; - - private ShaderHeader _header; + private ShaderStage _stage; + private ShaderHeader _header; + private ShaderCapabilities _capabilities; + private TranslationFlags _flags; private List _operations; private Dictionary _labels; - public EmitterContext(ShaderStage stage, ShaderHeader header) + public EmitterContext( + ShaderStage stage, + ShaderHeader header, + ShaderCapabilities capabilities, + TranslationFlags flags) { - _stage = stage; - _header = header; + _stage = stage; + _header = header; + _capabilities = capabilities; + _flags = flags; _operations = new List(); @@ -62,7 +69,18 @@ namespace Ryujinx.Graphics.Shader.Translation public void PrepareForReturn() { - if (_stage == ShaderStage.Fragment) + if (_stage == ShaderStage.Vertex) + { + if ((_flags & TranslationFlags.DividePosXY) != 0) + { + Operand posX = Attribute(AttributeConsts.PositionX); + Operand posY = Attribute(AttributeConsts.PositionY); + + this.Copy(posX, this.FPDivide(posX, ConstF(_capabilities.MaximumViewportDimensions / 2))); + this.Copy(posY, this.FPDivide(posY, ConstF(_capabilities.MaximumViewportDimensions / 2))); + } + } + else if (_stage == ShaderStage.Fragment) { if (_header.OmapDepth) { diff --git a/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs b/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs index d884cfdb3..e39d8c645 100644 --- a/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs +++ b/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs @@ -476,6 +476,16 @@ namespace Ryujinx.Graphics.Shader.Translation return context.Add(Instruction.LoadShared, Local(), a); } + public static Operand MultiplyHighS32(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.MultiplyHighS32, Local(), a, b); + } + + public static Operand MultiplyHighU32(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.MultiplyHighU32, Local(), a, b); + } + public static Operand PackHalf2x16(this EmitterContext context, Operand a, Operand b) { return context.Add(Instruction.PackHalf2x16, Local(), a, b); diff --git a/Ryujinx.Graphics.Shader/Translation/TranslationFlags.cs b/Ryujinx.Graphics.Shader/Translation/TranslationFlags.cs index 99b6107a9..8faa43836 100644 --- a/Ryujinx.Graphics.Shader/Translation/TranslationFlags.cs +++ b/Ryujinx.Graphics.Shader/Translation/TranslationFlags.cs @@ -6,6 +6,7 @@ namespace Ryujinx.Graphics.Shader.Translation Compute = 1 << 0, DebugMode = 1 << 1, - Unspecialized = 1 << 2 + Unspecialized = 1 << 2, + DividePosXY = 1 << 3 } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/Translation/Translator.cs b/Ryujinx.Graphics.Shader/Translation/Translator.cs index b129be939..69e63ae13 100644 --- a/Ryujinx.Graphics.Shader/Translation/Translator.cs +++ b/Ryujinx.Graphics.Shader/Translation/Translator.cs @@ -49,15 +49,9 @@ namespace Ryujinx.Graphics.Shader.Translation public static ShaderProgram Translate(Span code, ShaderCapabilities capabilities, TranslationFlags flags) { - bool compute = (flags & TranslationFlags.Compute) != 0; - bool debugMode = (flags & TranslationFlags.DebugMode) != 0; + bool compute = (flags & TranslationFlags.Compute) != 0; - Operation[] ops = DecodeShader( - code, - compute, - debugMode, - out ShaderHeader header, - out int size); + Operation[] ops = DecodeShader(code, capabilities, flags, out ShaderHeader header, out int size); ShaderStage stage; @@ -94,8 +88,8 @@ namespace Ryujinx.Graphics.Shader.Translation { bool debugMode = (flags & TranslationFlags.DebugMode) != 0; - Operation[] vpAOps = DecodeShader(vpACode, compute: false, debugMode, out _, out _); - Operation[] vpBOps = DecodeShader(vpBCode, compute: false, debugMode, out ShaderHeader header, out int sizeB); + Operation[] vpAOps = DecodeShader(vpACode, capabilities, flags, out _, out _); + Operation[] vpBOps = DecodeShader(vpBCode, capabilities, flags, out ShaderHeader header, out int sizeB); ShaderConfig config = new ShaderConfig( header.Stage, @@ -142,23 +136,23 @@ namespace Ryujinx.Graphics.Shader.Translation } private static Operation[] DecodeShader( - Span code, - bool compute, - bool debugMode, - out ShaderHeader header, - out int size) + Span code, + ShaderCapabilities capabilities, + TranslationFlags flags, + out ShaderHeader header, + out int size) { Block[] cfg; EmitterContext context; - if (compute) + if ((flags & TranslationFlags.Compute) != 0) { header = null; cfg = Decoder.Decode(code, 0); - context = new EmitterContext(ShaderStage.Compute, header); + context = new EmitterContext(ShaderStage.Compute, header, capabilities, flags); } else { @@ -166,7 +160,7 @@ namespace Ryujinx.Graphics.Shader.Translation cfg = Decoder.Decode(code, HeaderSize); - context = new EmitterContext(header.Stage, header); + context = new EmitterContext(header.Stage, header, capabilities, flags); } if (cfg == null) @@ -197,7 +191,7 @@ namespace Ryujinx.Graphics.Shader.Translation { OpCode op = block.OpCodes[opIndex]; - if (debugMode) + if ((flags & TranslationFlags.DebugMode) != 0) { string instName; @@ -274,7 +268,7 @@ namespace Ryujinx.Graphics.Shader.Translation } } - size = (int)maxEndAddress + (compute ? 0 : HeaderSize); + size = (int)maxEndAddress + (((flags & TranslationFlags.Compute) != 0) ? 0 : HeaderSize); return context.GetOperations(); }