diff --git a/src/Cafe/CMakeLists.txt b/src/Cafe/CMakeLists.txt index 3f224491..d0e7d921 100644 --- a/src/Cafe/CMakeLists.txt +++ b/src/Cafe/CMakeLists.txt @@ -560,7 +560,9 @@ if(ENABLE_METAL) HW/Latte/Renderer/Metal/MetalPipelineCache.h HW/Latte/Renderer/Metal/MetalDepthStencilCache.cpp HW/Latte/Renderer/Metal/MetalDepthStencilCache.h - HW/Latte/Renderer/Metal/ShaderSourcePresent.h + HW/Latte/Renderer/Metal/MetalHybridComputePipeline.cpp + HW/Latte/Renderer/Metal/MetalHybridComputePipeline.h + HW/Latte/Renderer/Metal/UtilityShaderSource.h ) #target_link_libraries(CemuCafe PRIVATE diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp index c3cad925..e769064f 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp @@ -3732,13 +3732,6 @@ void LatteDecompiler_emitHelperFunctions(LatteDecompilerShaderContext* shaderCon "return round(x / 2.0) * 2.0;\r\n" "}\r\n"); - // unpackHalf2x16 - fCStr_shaderSource->add("" - "template\r\n" - "float2 unpackHalf2x16(T x) {\r\n" - "return float2(as_type(x));\r\n" - "}\r\n"); - // Bit cast // Scalar diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLAttrDecoder.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLAttrDecoder.cpp index a9993964..ee438298 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLAttrDecoder.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLAttrDecoder.cpp @@ -256,7 +256,7 @@ void LatteDecompiler_emitAttributeDecodeMSL(LatteDecompilerShader* shaderContext // seen in Giana Sisters: Twisted Dreams _readLittleEndianAttributeU16x4(shaderContext, src, attributeInputIndex); // TODO: uint4? - src->add("attrDecoder.xyzw = as_type(float4(unpackHalf2x16(attrDecoder.x|(attrDecoder.y<<16)),unpackHalf2x16(attrDecoder.z|(attrDecoder.w<<16))));" _CRLF); + src->add("attrDecoder.xyzw = as_type(float4(float2(as_type(attrDecoder.x|(attrDecoder.y<<16))),float2(as_type(attrDecoder.z|(attrDecoder.w<<16)))));" _CRLF); } else if (attrib->format == FMT_16_16 && attrib->nfa == 0 && attrib->isSigned != 0) { @@ -271,7 +271,7 @@ void LatteDecompiler_emitAttributeDecodeMSL(LatteDecompilerShader* shaderContext { // seen in Giana Sisters: Twisted Dreams _readLittleEndianAttributeU16x2(shaderContext, src, attributeInputIndex); - src->add("attrDecoder.xy = as_type(unpackHalf2x16(attrDecoder.x|(attrDecoder.y<<16)));" _CRLF); + src->add("attrDecoder.xy = as_type(float2(as_type(attrDecoder.x|(attrDecoder.y<<16))));" _CRLF); src->add("attrDecoder.zw = uint2(0);" _CRLF); } else if( attrib->format == FMT_8_8_8_8 && attrib->nfa == 0 && attrib->isSigned == 0 ) @@ -394,7 +394,7 @@ void LatteDecompiler_emitAttributeDecodeMSL(LatteDecompilerShader* shaderContext { _readBigEndianAttributeU16x4(shaderContext, src, attributeInputIndex); // TODO: uint4? - src->add("attrDecoder.xyzw = as_type(float4(unpackHalf2x16(attrDecoder.x|(attrDecoder.y<<16)),unpackHalf2x16(attrDecoder.z|(attrDecoder.w<<16))));" _CRLF); + src->add("attrDecoder.xyzw = as_type(float4(float2(as_type(attrDecoder.x|(attrDecoder.y<<16))),float2(as_type(attrDecoder.z|(attrDecoder.w<<16)))));" _CRLF); } else if (attrib->format == FMT_16_16_16_16 && attrib->nfa == 0 && attrib->isSigned != 0) { @@ -446,7 +446,7 @@ void LatteDecompiler_emitAttributeDecodeMSL(LatteDecompilerShader* shaderContext else if( attrib->format == FMT_16_16_FLOAT && attrib->nfa == 2 ) { _readBigEndianAttributeU16x2(shaderContext, src, attributeInputIndex); - src->add("attrDecoder.xy = as_type(unpackHalf2x16(attrDecoder.x|(attrDecoder.y<<16)));" _CRLF); + src->add("attrDecoder.xy = as_type(float2(as_type(attrDecoder.x|(attrDecoder.y<<16))));" _CRLF); src->add("attrDecoder.zw = uint2(0);" _CRLF); } else if( attrib->format == FMT_16_16 && attrib->nfa == 0 && attrib->isSigned == 0 ) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalHybridComputePipeline.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalHybridComputePipeline.cpp new file mode 100644 index 00000000..3802939b --- /dev/null +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalHybridComputePipeline.cpp @@ -0,0 +1,31 @@ +#include "Cafe/HW/Latte/Renderer/Metal/MetalHybridComputePipeline.h" + +MetalHybridComputePipeline::MetalHybridComputePipeline(class MetalRenderer* mtlRenderer, MTL::Library* library, const char* vertexFunctionName, const char* kernelFunctionName) +{ + // Render pipeline state + MTL::Function* vertexFunction = library->newFunction(NS::String::string(vertexFunctionName, NS::ASCIIStringEncoding)); + + MTL::RenderPipelineDescriptor* renderPipelineDescriptor = MTL::RenderPipelineDescriptor::alloc()->init(); + renderPipelineDescriptor->setVertexFunction(vertexFunction); + renderPipelineDescriptor->setRasterizationEnabled(false); + + NS::Error* error = nullptr; + m_renderPipelineState = mtlRenderer->GetDevice()->newRenderPipelineState(renderPipelineDescriptor, &error); + renderPipelineDescriptor->release(); + vertexFunction->release(); + if (error) + { + printf("error creating hybrid render pipeline state: %s\n", error->localizedDescription()->utf8String()); + error->release(); + } + + // Compute pipeline state + // TODO +} + +MetalHybridComputePipeline::~MetalHybridComputePipeline() +{ + m_renderPipelineState->release(); + // TODO: uncomment + //m_computePipelineState->release(); +} diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalHybridComputePipeline.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalHybridComputePipeline.h new file mode 100644 index 00000000..7d586e24 --- /dev/null +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalHybridComputePipeline.h @@ -0,0 +1,19 @@ +#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h" +#include "HW/Latte/Renderer/Metal/MetalRenderer.h" +#include "Metal/MTLLibrary.hpp" +#include "Metal/MTLRenderPipeline.hpp" + +class MetalHybridComputePipeline +{ +public: + MetalHybridComputePipeline(class MetalRenderer* mtlRenderer, MTL::Library* library, const char* vertexFunctionName, const char* kernelFunctionName); + ~MetalHybridComputePipeline(); + + MTL::RenderPipelineState* GetRenderPipelineState() const { return m_renderPipelineState; } + + MTL::RenderPipelineState* GetComputePipelineState() const { return m_computePipelineState; } + +private: + MTL::RenderPipelineState* m_renderPipelineState; + MTL::RenderPipelineState* m_computePipelineState; +}; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index 099f923b..2f03d0a2 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -7,13 +7,15 @@ #include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalDepthStencilCache.h" #include "Cafe/HW/Latte/Renderer/Metal/LatteTextureReadbackMtl.h" +#include "Cafe/HW/Latte/Renderer/Metal/MetalHybridComputePipeline.h" #include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h" -#include "Cafe/HW/Latte/Renderer/Metal/ShaderSourcePresent.h" +#include "Cafe/HW/Latte/Renderer/Metal/UtilityShaderSource.h" #include "Cafe/HW/Latte/Core/LatteShader.h" #include "Cafe/HW/Latte/Core/LatteIndices.h" #include "Cemu/Logging/CemuDebugLogging.h" +#include "Foundation/NSError.hpp" #include "HW/Latte/Core/Latte.h" #include "HW/Latte/ISA/LatteReg.h" #include "Metal/MTLPixelFormat.hpp" @@ -54,10 +56,31 @@ MetalRenderer::MetalRenderer() m_state.uniformBufferOffsets[i][j] = INVALID_OFFSET; } } + + // Utility shader source + NS::Error* error = nullptr; + m_utilityLibrary = m_device->newLibrary(NS::String::string(utilityShaderSource, NS::ASCIIStringEncoding), nullptr, &error); + if (error) + { + debug_printf("failed to create present library (error: %s)\n", error->localizedDescription()->utf8String()); + error->release(); + return; + } + + // Hybrid pipelines + m_copyDepthToColorPipeline = new MetalHybridComputePipeline(this, m_utilityLibrary, "vertexCopyDepthToColor", "kernelCopyDepthToColor"); + m_copyColorToDepthPipeline = new MetalHybridComputePipeline(this, m_utilityLibrary, "vertexCopyColorToDepth", "kernelCopyColorToDepth"); } MetalRenderer::~MetalRenderer() { + delete m_copyDepthToColorPipeline; + delete m_copyColorToDepthPipeline; + + m_presentPipeline->release(); + + m_utilityLibrary->release(); + delete m_depthStencilCache; delete m_pipelineCache; delete m_memoryManager; @@ -82,22 +105,15 @@ void MetalRenderer::InitializeLayer(const Vector2i& size, bool mainWindow) m_metalLayer->setPixelFormat(MTL::PixelFormatRGBA8Unorm/*_sRGB*/); // Present pipeline - NS::Error* error = nullptr; - MTL::Library* presentLibrary = m_device->newLibrary(NS::String::string(presentLibrarySource, NS::ASCIIStringEncoding), nullptr, &error); - if (error) - { - debug_printf("failed to create present library (error: %s)\n", error->localizedDescription()->utf8String()); - error->release(); - return; - } - MTL::Function* presentVertexFunction = presentLibrary->newFunction(NS::String::string("presentVertex", NS::ASCIIStringEncoding)); - MTL::Function* presentFragmentFunction = presentLibrary->newFunction(NS::String::string("presentFragment", NS::ASCIIStringEncoding)); - presentLibrary->release(); + MTL::Function* presentVertexFunction = m_utilityLibrary->newFunction(NS::String::string("vertexFullscreen", NS::ASCIIStringEncoding)); + MTL::Function* presentFragmentFunction = m_utilityLibrary->newFunction(NS::String::string("fragmentPresent", NS::ASCIIStringEncoding)); MTL::RenderPipelineDescriptor* renderPipelineDescriptor = MTL::RenderPipelineDescriptor::alloc()->init(); renderPipelineDescriptor->setVertexFunction(presentVertexFunction); renderPipelineDescriptor->setFragmentFunction(presentFragmentFunction); renderPipelineDescriptor->colorAttachments()->object(0)->setPixelFormat(m_metalLayer->pixelFormat()); + + NS::Error* error = nullptr; m_presentPipeline = m_device->newRenderPipelineState(renderPipelineDescriptor, &error); renderPipelineDescriptor->release(); presentVertexFunction->release(); @@ -106,7 +122,6 @@ void MetalRenderer::InitializeLayer(const Vector2i& size, bool mainWindow) { debug_printf("failed to create present pipeline (error: %s)\n", error->localizedDescription()->utf8String()); error->release(); - return; } } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h index 6bba8c74..efff0264 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h @@ -257,8 +257,13 @@ private: MTL::CommandQueue* m_commandQueue; // Pipelines + MTL::Library* m_utilityLibrary; MTL::RenderPipelineState* m_presentPipeline; + // Hybrid pipelines + class MetalHybridComputePipeline* m_copyDepthToColorPipeline; + class MetalHybridComputePipeline* m_copyColorToDepthPipeline; + // Basic MTL::SamplerState* m_nearestSampler; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/ShaderSourcePresent.h b/src/Cafe/HW/Latte/Renderer/Metal/ShaderSourcePresent.h deleted file mode 100644 index a77ce2cd..00000000 --- a/src/Cafe/HW/Latte/Renderer/Metal/ShaderSourcePresent.h +++ /dev/null @@ -1,23 +0,0 @@ -inline const char* presentLibrarySource = \ -"#include \n" \ -"using namespace metal;\n" \ -"\n" \ -"constant float2 positions[] = {float2(-1.0, -3.0), float2(-1.0, 1.0), float2(3.0, 1.0)};\n" -"\n" \ -"struct VertexOut {\n" \ -" float4 position [[position]];\n" \ -" float2 texCoord;\n" \ -"};\n" \ -"\n" \ -"vertex VertexOut presentVertex(ushort vid [[vertex_id]]) {\n" \ -" VertexOut out;\n" \ -" out.position = float4(positions[vid], 0.0, 1.0);\n" \ -" out.texCoord = positions[vid] * 0.5 + 0.5;\n" \ -" out.texCoord.y = 1.0 - out.texCoord.y;\n" \ -"\n" \ -" return out;\n" \ -"}\n" \ -"\n" \ -"fragment float4 presentFragment(VertexOut in [[stage_in]], texture2d tex [[texture(0)]], sampler samplr [[sampler(0)]]) {\n" \ -" return tex.sample(samplr, in.texCoord);\n" \ -"}\n"; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/UtilityShaderSource.h b/src/Cafe/HW/Latte/Renderer/Metal/UtilityShaderSource.h new file mode 100644 index 00000000..edfee9ba --- /dev/null +++ b/src/Cafe/HW/Latte/Renderer/Metal/UtilityShaderSource.h @@ -0,0 +1,34 @@ +inline const char* utilityShaderSource = \ +"#include \n" \ +"using namespace metal;\n" \ +"\n" \ +"constant float2 positions[] = {float2(-1.0, -3.0), float2(-1.0, 1.0), float2(3.0, 1.0)};\n" +"\n" \ +"struct VertexOut {\n" \ +" float4 position [[position]];\n" \ +" float2 texCoord;\n" \ +"};\n" \ +"\n" \ +"vertex VertexOut vertexFullscreen(ushort vid [[vertex_id]]) {\n" \ +" VertexOut out;\n" \ +" out.position = float4(positions[vid], 0.0, 1.0);\n" \ +" out.texCoord = positions[vid] * 0.5 + 0.5;\n" \ +" out.texCoord.y = 1.0 - out.texCoord.y;\n" \ +"\n" \ +" return out;\n" \ +"}\n" \ +"\n" \ +"fragment float4 fragmentPresent(VertexOut in [[stage_in]], texture2d tex [[texture(0)]], sampler samplr [[sampler(0)]]) {\n" \ +" return tex.sample(samplr, in.texCoord);\n" \ +"}\n" \ +"\n" \ +"vertex void vertexCopyDepthToColor(uint vid [[vertex_id]], depth2d src [[texture(0)]], texture2d dst [[texture(1)]], constant uint& copyWidth) {\n" \ +" uint2 coord = uint2(vid % copyWidth, vid / copyWidth);\n" \ +" return dst.write(float4(src.read(coord), 0.0, 0.0, 0.0), coord);\n" \ +"}\n" \ +"\n" \ +"vertex void vertexCopyColorToDepth(uint vid [[vertex_id]], texture2d src [[texture(0)]], texture2d dst [[texture(1)]], constant uint& copyWidth) {\n" \ +" uint2 coord = uint2(vid % copyWidth, vid / copyWidth);\n" \ +" return dst.write(float4(src.read(coord).r), coord);\n" \ +"}\n" \ +"\n";