From 8b783e63dc8bbd23cc5eb5fd0033295e10175869 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Sun, 13 Oct 2024 09:20:45 +0200 Subject: [PATCH] refactor pipeline cache --- src/Cafe/CMakeLists.txt | 2 + src/Cafe/HW/Latte/Core/LatteShaderCache.cpp | 21 - .../Renderer/Metal/MetalPipelineCache.cpp | 593 ++--------------- .../Latte/Renderer/Metal/MetalPipelineCache.h | 22 +- .../Renderer/Metal/MetalPipelineCompiler.cpp | 596 ++++++++++++++++++ .../Renderer/Metal/MetalPipelineCompiler.h | 38 ++ .../HW/Latte/Renderer/Metal/MetalRenderer.cpp | 7 +- 7 files changed, 683 insertions(+), 596 deletions(-) create mode 100644 src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp create mode 100644 src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h diff --git a/src/Cafe/CMakeLists.txt b/src/Cafe/CMakeLists.txt index 9b5f8d3e..3d1a0230 100644 --- a/src/Cafe/CMakeLists.txt +++ b/src/Cafe/CMakeLists.txt @@ -560,6 +560,8 @@ if(ENABLE_METAL) HW/Latte/Renderer/Metal/MetalMemoryManager.h HW/Latte/Renderer/Metal/MetalOutputShaderCache.cpp HW/Latte/Renderer/Metal/MetalOutputShaderCache.h + HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp + HW/Latte/Renderer/Metal/MetalPipelineCompiler.h HW/Latte/Renderer/Metal/MetalPipelineCache.cpp HW/Latte/Renderer/Metal/MetalPipelineCache.h HW/Latte/Renderer/Metal/MetalDepthStencilCache.cpp diff --git a/src/Cafe/HW/Latte/Core/LatteShaderCache.cpp b/src/Cafe/HW/Latte/Core/LatteShaderCache.cpp index 4659ff10..cdb41184 100644 --- a/src/Cafe/HW/Latte/Core/LatteShaderCache.cpp +++ b/src/Cafe/HW/Latte/Core/LatteShaderCache.cpp @@ -160,18 +160,11 @@ bool LoadTGAFile(const std::vector& buffer, TGAFILE *tgaFile) void LatteShaderCache_finish() { if (g_renderer->GetType() == RendererAPI::Vulkan) - { RendererShaderVk::ShaderCacheLoading_end(); - } else if (g_renderer->GetType() == RendererAPI::OpenGL) - { RendererShaderGL::ShaderCacheLoading_end(); - } else if (g_renderer->GetType() == RendererAPI::Metal) - { RendererShaderMtl::ShaderCacheLoading_end(); - MetalPipelineCache::ShaderCacheLoading_end(); - } } uint32 LatteShaderCache_getShaderCacheExtraVersion(uint64 titleId) @@ -251,18 +244,11 @@ void LatteShaderCache_Load() fs::create_directories(ActiveSettings::GetCachePath("shaderCache/precompiled"), ec); // initialize renderer specific caches if (g_renderer->GetType() == RendererAPI::Vulkan) - { RendererShaderVk::ShaderCacheLoading_begin(cacheTitleId); - } else if (g_renderer->GetType() == RendererAPI::OpenGL) - { RendererShaderGL::ShaderCacheLoading_begin(cacheTitleId); - } else if (g_renderer->GetType() == RendererAPI::Metal) - { RendererShaderMtl::ShaderCacheLoading_begin(cacheTitleId); - MetalPipelineCache::ShaderCacheLoading_begin(cacheTitleId); - } // get cache file name const auto pathGeneric = ActiveSettings::GetCachePath("shaderCache/transferable/{:016x}_shaders.bin", cacheTitleId); const auto pathGenericPre1_25_0 = ActiveSettings::GetCachePath("shaderCache/transferable/{:016x}.bin", cacheTitleId); // before 1.25.0 @@ -791,18 +777,11 @@ void LatteShaderCache_Close() s_shaderCacheGeneric = nullptr; } if (g_renderer->GetType() == RendererAPI::Vulkan) - { RendererShaderVk::ShaderCacheLoading_Close(); - } else if (g_renderer->GetType() == RendererAPI::OpenGL) - { RendererShaderGL::ShaderCacheLoading_Close(); - } else if (g_renderer->GetType() == RendererAPI::Metal) - { RendererShaderMtl::ShaderCacheLoading_Close(); - MetalPipelineCache::ShaderCacheLoading_Close(); - } // if Vulkan then also close pipeline cache if (g_renderer->GetType() == RendererAPI::Vulkan) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp index 29459539..a70f7541 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp @@ -1,517 +1,13 @@ -#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" -#include "Cafe/HW/Latte/Core/LatteShader.h" #include "Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h" -#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h" -#include "Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h" #include "Cafe/HW/Latte/Core/FetchShader.h" #include "Cafe/HW/Latte/ISA/RegDefines.h" -#include "Cemu/Logging/CemuLogging.h" -#include "HW/Latte/Core/LatteConst.h" -#include "config/ActiveSettings.h" +#include "Cafe/HW/Latte/Core/LatteConst.h" -static void rectsEmulationGS_outputSingleVertex(std::string& gsSrc, const LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, sint32 vIdx, const LatteContextRegister& latteRegister) -{ - auto parameterMask = vertexShader->outputParameterMask; - for (uint32 i = 0; i < 32; i++) - { - if ((parameterMask & (1 << i)) == 0) - continue; - sint32 vsSemanticId = psInputTable->getVertexShaderOutParamSemanticId(latteRegister.GetRawView(), i); - if (vsSemanticId < 0) - continue; - // make sure PS has matching input - if (!psInputTable->hasPSImportForSemanticId(vsSemanticId)) - continue; - gsSrc.append(fmt::format("out.passParameterSem{} = objectPayload.vertexOut[{}].passParameterSem{};\r\n", vsSemanticId, vIdx, vsSemanticId)); - } - gsSrc.append(fmt::format("out.position = objectPayload.vertexOut[{}].position;\r\n", vIdx)); - gsSrc.append(fmt::format("mesh.set_vertex({}, out);\r\n", vIdx)); -} - -static void rectsEmulationGS_outputGeneratedVertex(std::string& gsSrc, const LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, const char* variant, const LatteContextRegister& latteRegister) -{ - auto parameterMask = vertexShader->outputParameterMask; - for (uint32 i = 0; i < 32; i++) - { - if ((parameterMask & (1 << i)) == 0) - continue; - sint32 vsSemanticId = psInputTable->getVertexShaderOutParamSemanticId(latteRegister.GetRawView(), i); - if (vsSemanticId < 0) - continue; - // make sure PS has matching input - if (!psInputTable->hasPSImportForSemanticId(vsSemanticId)) - continue; - gsSrc.append(fmt::format("out.passParameterSem{} = gen4thVertex{}(objectPayload.vertexOut[0].passParameterSem{}, objectPayload.vertexOut[1].passParameterSem{}, objectPayload.vertexOut[2].passParameterSem{});\r\n", vsSemanticId, variant, vsSemanticId, vsSemanticId, vsSemanticId)); - } - gsSrc.append(fmt::format("out.position = gen4thVertex{}(objectPayload.vertexOut[0].position, objectPayload.vertexOut[1].position, objectPayload.vertexOut[2].position);\r\n", variant)); - gsSrc.append(fmt::format("mesh.set_vertex(3, out);\r\n")); -} - -static void rectsEmulationGS_outputVerticesCode(std::string& gsSrc, const LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, sint32 p0, sint32 p1, sint32 p2, sint32 p3, const char* variant, const LatteContextRegister& latteRegister) -{ - sint32 pList[4] = { p0, p1, p2, p3 }; - for (sint32 i = 0; i < 4; i++) - { - if (pList[i] == 3) - rectsEmulationGS_outputGeneratedVertex(gsSrc, vertexShader, psInputTable, variant, latteRegister); - else - rectsEmulationGS_outputSingleVertex(gsSrc, vertexShader, psInputTable, pList[i], latteRegister); - } - gsSrc.append(fmt::format("mesh.set_index(0, {});\r\n", pList[0])); - gsSrc.append(fmt::format("mesh.set_index(1, {});\r\n", pList[1])); - gsSrc.append(fmt::format("mesh.set_index(2, {});\r\n", pList[2])); - gsSrc.append(fmt::format("mesh.set_index(3, {});\r\n", pList[1])); - gsSrc.append(fmt::format("mesh.set_index(4, {});\r\n", pList[2])); - gsSrc.append(fmt::format("mesh.set_index(5, {});\r\n", pList[3])); -} - -static RendererShaderMtl* rectsEmulationGS_generate(MetalRenderer* metalRenderer, const LatteDecompilerShader* vertexShader, const LatteContextRegister& latteRegister) -{ - std::string gsSrc; - gsSrc.append("#include \r\n"); - gsSrc.append("using namespace metal;\r\n"); - - LatteShaderPSInputTable* psInputTable = LatteSHRC_GetPSInputTable(); - - // inputs & outputs - std::string vertexOutDefinition = "struct VertexOut {\r\n"; - vertexOutDefinition += "float4 position;\r\n"; - std::string geometryOutDefinition = "struct GeometryOut {\r\n"; - geometryOutDefinition += "float4 position [[position]];\r\n"; - auto parameterMask = vertexShader->outputParameterMask; - for (sint32 f = 0; f < 2; f++) - { - for (uint32 i = 0; i < 32; i++) - { - if ((parameterMask & (1 << i)) == 0) - continue; - sint32 vsSemanticId = psInputTable->getVertexShaderOutParamSemanticId(latteRegister.GetRawView(), i); - if (vsSemanticId < 0) - continue; - auto psImport = psInputTable->getPSImportBySemanticId(vsSemanticId); - if (psImport == nullptr) - continue; - - if (f == 0) - { - vertexOutDefinition += fmt::format("float4 passParameterSem{};\r\n", vsSemanticId); - } - else - { - geometryOutDefinition += fmt::format("float4 passParameterSem{}", vsSemanticId); - - geometryOutDefinition += fmt::format(" [[user(locn{})]]", psInputTable->getPSImportLocationBySemanticId(vsSemanticId)); - if (psImport->isFlat) - geometryOutDefinition += " [[flat]]"; - if (psImport->isNoPerspective) - geometryOutDefinition += " [[center_no_perspective]]"; - geometryOutDefinition += ";\r\n"; - } - } - } - vertexOutDefinition += "};\r\n"; - geometryOutDefinition += "};\r\n"; - - gsSrc.append(vertexOutDefinition); - gsSrc.append(geometryOutDefinition); - - gsSrc.append("struct ObjectPayload {\r\n"); - gsSrc.append("VertexOut vertexOut[3];\r\n"); - gsSrc.append("};\r\n"); - - // gen function - gsSrc.append("float4 gen4thVertexA(float4 a, float4 b, float4 c)\r\n"); - gsSrc.append("{\r\n"); - gsSrc.append("return b - (c - a);\r\n"); - gsSrc.append("}\r\n"); - - gsSrc.append("float4 gen4thVertexB(float4 a, float4 b, float4 c)\r\n"); - gsSrc.append("{\r\n"); - gsSrc.append("return c - (b - a);\r\n"); - gsSrc.append("}\r\n"); - - gsSrc.append("float4 gen4thVertexC(float4 a, float4 b, float4 c)\r\n"); - gsSrc.append("{\r\n"); - gsSrc.append("return c + (b - a);\r\n"); - gsSrc.append("}\r\n"); - - // main - gsSrc.append("using MeshType = mesh;\r\n"); - gsSrc.append("[[mesh, max_total_threads_per_threadgroup(1)]]\r\n"); - gsSrc.append("void main0(MeshType mesh, const object_data ObjectPayload& objectPayload [[payload]])\r\n"); - gsSrc.append("{\r\n"); - gsSrc.append("GeometryOut out;\r\n"); - - // there are two possible winding orders that need different triangle generation: - // 0 1 - // 2 3 - // and - // 0 1 - // 3 2 - // all others are just symmetries of these cases - - // we can determine the case by comparing the distance 0<->1 and 0<->2 - - gsSrc.append("float dist0_1 = length(objectPayload.vertexOut[1].position.xy - objectPayload.vertexOut[0].position.xy);\r\n"); - gsSrc.append("float dist0_2 = length(objectPayload.vertexOut[2].position.xy - objectPayload.vertexOut[0].position.xy);\r\n"); - gsSrc.append("float dist1_2 = length(objectPayload.vertexOut[2].position.xy - objectPayload.vertexOut[1].position.xy);\r\n"); - - // emit vertices - gsSrc.append("if(dist0_1 > dist0_2 && dist0_1 > dist1_2)\r\n"); - gsSrc.append("{\r\n"); - // p0 to p1 is diagonal - rectsEmulationGS_outputVerticesCode(gsSrc, vertexShader, psInputTable, 2, 1, 0, 3, "A", latteRegister); - gsSrc.append("} else if ( dist0_2 > dist0_1 && dist0_2 > dist1_2 ) {\r\n"); - // p0 to p2 is diagonal - rectsEmulationGS_outputVerticesCode(gsSrc, vertexShader, psInputTable, 1, 2, 0, 3, "B", latteRegister); - gsSrc.append("} else {\r\n"); - // p1 to p2 is diagonal - rectsEmulationGS_outputVerticesCode(gsSrc, vertexShader, psInputTable, 0, 1, 2, 3, "C", latteRegister); - gsSrc.append("}\r\n"); - - gsSrc.append("mesh.set_primitive_count(2);\r\n"); - - gsSrc.append("}\r\n"); - - auto mtlShader = new RendererShaderMtl(metalRenderer, RendererShader::ShaderType::kGeometry, 0, 0, false, false, gsSrc); - mtlShader->PreponeCompilation(true); - - return mtlShader; -} - -#define INVALID_TITLE_ID 0xFFFFFFFFFFFFFFFF - -uint64 s_cacheTitleId = INVALID_TITLE_ID; - -extern std::atomic_int g_compiled_shaders_total; -extern std::atomic_int g_compiled_shaders_async; - -template -void SetFragmentState(T* desc, CachedFBOMtl* lastUsedFBO, CachedFBOMtl* activeFBO, const LatteDecompilerShader* pixelShader, const LatteContextRegister& lcr) -{ - // Rasterization - bool rasterizationEnabled = !lcr.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL(); - - // HACK - // TODO: include this in the hash? - if (!lcr.PA_CL_VTE_CNTL.get_VPORT_X_OFFSET_ENA()) - rasterizationEnabled = true; - - // Culling both front and back faces effectively disables rasterization - const auto& polygonControlReg = lcr.PA_SU_SC_MODE_CNTL; - uint32 cullFront = polygonControlReg.get_CULL_FRONT(); - uint32 cullBack = polygonControlReg.get_CULL_BACK(); - if (cullFront && cullBack) - rasterizationEnabled = false; - - auto pixelShaderMtl = static_cast(pixelShader->shader); - - if (!rasterizationEnabled || !pixelShaderMtl) - { - desc->setRasterizationEnabled(false); - return; - } - - desc->setFragmentFunction(pixelShaderMtl->GetFunction()); - - // Color attachments - const Latte::LATTE_CB_COLOR_CONTROL& colorControlReg = lcr.CB_COLOR_CONTROL; - uint32 blendEnableMask = colorControlReg.get_BLEND_MASK(); - uint32 renderTargetMask = lcr.CB_TARGET_MASK.get_MASK(); - for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++) - { - const auto& colorBuffer = lastUsedFBO->colorBuffer[i]; - auto texture = static_cast(colorBuffer.texture); - if (!texture) - { - continue; - } - auto colorAttachment = desc->colorAttachments()->object(i); - colorAttachment->setPixelFormat(texture->GetRGBAView()->pixelFormat()); - - // Disable writes if not in the active FBO - if (!activeFBO->colorBuffer[i].texture) - { - colorAttachment->setWriteMask(MTL::ColorWriteMaskNone); - continue; - } - - colorAttachment->setWriteMask(GetMtlColorWriteMask((renderTargetMask >> (i * 4)) & 0xF)); - - // Blending - bool blendEnabled = ((blendEnableMask & (1 << i))) != 0; - // Only float data type is blendable - if (blendEnabled && GetMtlPixelFormatInfo(texture->format, false).dataType == MetalDataType::FLOAT) - { - colorAttachment->setBlendingEnabled(true); - - const auto& blendControlReg = lcr.CB_BLENDN_CONTROL[i]; - - auto rgbBlendOp = GetMtlBlendOp(blendControlReg.get_COLOR_COMB_FCN()); - auto srcRgbBlendFactor = GetMtlBlendFactor(blendControlReg.get_COLOR_SRCBLEND()); - auto dstRgbBlendFactor = GetMtlBlendFactor(blendControlReg.get_COLOR_DSTBLEND()); - - colorAttachment->setRgbBlendOperation(rgbBlendOp); - colorAttachment->setSourceRGBBlendFactor(srcRgbBlendFactor); - colorAttachment->setDestinationRGBBlendFactor(dstRgbBlendFactor); - if (blendControlReg.get_SEPARATE_ALPHA_BLEND()) - { - colorAttachment->setAlphaBlendOperation(GetMtlBlendOp(blendControlReg.get_ALPHA_COMB_FCN())); - colorAttachment->setSourceAlphaBlendFactor(GetMtlBlendFactor(blendControlReg.get_ALPHA_SRCBLEND())); - colorAttachment->setDestinationAlphaBlendFactor(GetMtlBlendFactor(blendControlReg.get_ALPHA_DSTBLEND())); - } - else - { - colorAttachment->setAlphaBlendOperation(rgbBlendOp); - colorAttachment->setSourceAlphaBlendFactor(srcRgbBlendFactor); - colorAttachment->setDestinationAlphaBlendFactor(dstRgbBlendFactor); - } - } - } - - // Depth stencil attachment - if (lastUsedFBO->depthBuffer.texture) - { - auto texture = static_cast(lastUsedFBO->depthBuffer.texture); - desc->setDepthAttachmentPixelFormat(texture->GetRGBAView()->pixelFormat()); - if (lastUsedFBO->depthBuffer.hasStencil) - { - desc->setStencilAttachmentPixelFormat(texture->GetRGBAView()->pixelFormat()); - } - } -} - -void MetalPipelineCache::ShaderCacheLoading_begin(uint64 cacheTitleId) -{ - s_cacheTitleId = cacheTitleId; -} - -void MetalPipelineCache::ShaderCacheLoading_end() -{ -} - -void MetalPipelineCache::ShaderCacheLoading_Close() -{ - g_compiled_shaders_total = 0; - g_compiled_shaders_async = 0; -} - -MetalPipelineCache::~MetalPipelineCache() -{ - for (auto& pair : m_pipelineCache) - { - pair.second->release(); - } - m_pipelineCache.clear(); - - NS::Error* error = nullptr; - m_binaryArchive->serializeToURL(m_binaryArchiveURL, &error); - if (error) - { - cemuLog_log(LogType::Force, "error serializing binary archive: {}", error->localizedDescription()->utf8String()); - error->release(); - } - m_binaryArchive->release(); - - m_binaryArchiveURL->release(); -} - -MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* pixelShader, CachedFBOMtl* lastUsedFBO, CachedFBOMtl* activeFBO, const LatteContextRegister& lcr) -{ - uint64 stateHash = CalculateRenderPipelineHash(fetchShader, vertexShader, pixelShader, lastUsedFBO, lcr); - auto& pipeline = m_pipelineCache[stateHash]; - if (pipeline) - return pipeline; - - auto vertexShaderMtl = static_cast(vertexShader->shader); - - // Render pipeline state - MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init(); - desc->setVertexFunction(vertexShaderMtl->GetFunction()); - - // Vertex descriptor - if (!fetchShader->mtlFetchVertexManually) - { - MTL::VertexDescriptor* vertexDescriptor = MTL::VertexDescriptor::alloc()->init(); - for (auto& bufferGroup : fetchShader->bufferGroups) - { - std::optional fetchType; - - uint32 minBufferStride = 0; - for (sint32 j = 0; j < bufferGroup.attribCount; ++j) - { - auto& attr = bufferGroup.attrib[j]; - - uint32 semanticId = vertexShader->resourceMapping.attributeMapping[attr.semanticId]; - if (semanticId == (uint32)-1) - continue; // attribute not used? - - auto attribute = vertexDescriptor->attributes()->object(semanticId); - attribute->setOffset(attr.offset); - attribute->setBufferIndex(GET_MTL_VERTEX_BUFFER_INDEX(attr.attributeBufferIndex)); - attribute->setFormat(GetMtlVertexFormat(attr.format)); - - minBufferStride = std::max(minBufferStride, attr.offset + GetMtlVertexFormatSize(attr.format)); - - if (fetchType.has_value()) - cemu_assert_debug(fetchType == attr.fetchType); - else - fetchType = attr.fetchType; - - if (attr.fetchType == LatteConst::INSTANCE_DATA) - { - cemu_assert_debug(attr.aluDivisor == 1); // other divisor not yet supported - } - } - - uint32 bufferIndex = bufferGroup.attributeBufferIndex; - uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7; - uint32 bufferStride = (lcr.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF; - - auto layout = vertexDescriptor->layouts()->object(GET_MTL_VERTEX_BUFFER_INDEX(bufferIndex)); - if (bufferStride == 0) - { - // Buffer stride cannot be zero, let's use the minimum stride - bufferStride = minBufferStride; - - // Additionally, constant vertex function must be used - layout->setStepFunction(MTL::VertexStepFunctionConstant); - layout->setStepRate(0); - } - else - { - if (!fetchType.has_value() || fetchType == LatteConst::VertexFetchType2::VERTEX_DATA) - layout->setStepFunction(MTL::VertexStepFunctionPerVertex); - else if (fetchType == LatteConst::VertexFetchType2::INSTANCE_DATA) - layout->setStepFunction(MTL::VertexStepFunctionPerInstance); - else - { - debug_printf("unimplemented vertex fetch type %u\n", (uint32)fetchType.value()); - cemu_assert(false); - } - } - bufferStride = Align(bufferStride, 4); - layout->setStride(bufferStride); - } - - // TODO: don't always set the vertex descriptor? - desc->setVertexDescriptor(vertexDescriptor); - vertexDescriptor->release(); - } - - SetFragmentState(desc, lastUsedFBO, activeFBO, pixelShader, lcr); - - TryLoadBinaryArchive(); - - // Load binary - if (m_binaryArchive) - { - NS::Object* binArchives[] = {m_binaryArchive}; - auto binaryArchives = NS::Array::alloc()->init(binArchives, 1); - desc->setBinaryArchives(binaryArchives); - binaryArchives->release(); - } - - NS::Error* error = nullptr; -#ifdef CEMU_DEBUG_ASSERT - desc->setLabel(GetLabel("Cached render pipeline state", desc)); -#endif - pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, MTL::PipelineOptionFailOnBinaryArchiveMiss, nullptr, &error); - - // Pipeline wasn't found in the binary archive, we need to compile it - if (error) - { - desc->setBinaryArchives(nullptr); - - error->release(); - error = nullptr; -#ifdef CEMU_DEBUG_ASSERT - desc->setLabel(GetLabel("New render pipeline state", desc)); -#endif - pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, &error); - if (error) - { - cemuLog_log(LogType::Force, "error creating render pipeline state: {}", error->localizedDescription()->utf8String()); - error->release(); - } - else - { - // Save binary - if (m_binaryArchive) - { - NS::Error* error = nullptr; - m_binaryArchive->addRenderPipelineFunctions(desc, &error); - if (error) - { - cemuLog_log(LogType::Force, "error saving render pipeline functions: {}", error->localizedDescription()->utf8String()); - error->release(); - } - } - } - } - desc->release(); - - return pipeline; -} - -MTL::RenderPipelineState* MetalPipelineCache::GetMeshPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, CachedFBOMtl* lastUsedFBO, CachedFBOMtl* activeFBO, const LatteContextRegister& lcr, Renderer::INDEX_TYPE hostIndexType) -{ - uint64 stateHash = CalculateRenderPipelineHash(fetchShader, vertexShader, pixelShader, lastUsedFBO, lcr); - - stateHash += lcr.GetRawView()[mmVGT_PRIMITIVE_TYPE]; - stateHash = std::rotl(stateHash, 7); - - stateHash += (uint8)hostIndexType; - stateHash = std::rotl(stateHash, 7); - - auto& pipeline = m_pipelineCache[stateHash]; - if (pipeline) - return pipeline; - - auto objectShaderMtl = static_cast(vertexShader->shader); - RendererShaderMtl* meshShaderMtl; - if (geometryShader) - { - meshShaderMtl = static_cast(geometryShader->shader); - } - else - { - // If there is no geometry shader, it means that we are emulating rects - meshShaderMtl = rectsEmulationGS_generate(m_mtlr, vertexShader, lcr); - } - - // Render pipeline state - MTL::MeshRenderPipelineDescriptor* desc = MTL::MeshRenderPipelineDescriptor::alloc()->init(); - desc->setObjectFunction(objectShaderMtl->GetFunction()); - desc->setMeshFunction(meshShaderMtl->GetFunction()); - - SetFragmentState(desc, lastUsedFBO, activeFBO, pixelShader, lcr); - - TryLoadBinaryArchive(); - - // Load binary - // TODO: no binary archives? :( - - NS::Error* error = nullptr; -#ifdef CEMU_DEBUG_ASSERT - desc->setLabel(GetLabel("Mesh pipeline state", desc)); -#endif - pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, MTL::PipelineOptionNone, nullptr, &error); - desc->release(); - if (error) - { - cemuLog_log(LogType::Force, "error creating mesh render pipeline state: {}", error->localizedDescription()->utf8String()); - error->release(); - } - - return pipeline; -} - -uint64 MetalPipelineCache::CalculateRenderPipelineHash(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* lastUsedFBO, const LatteContextRegister& lcr) +uint64 MetalPipelineCache::CalculatePipelineHash(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* lastUsedFBO, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr) { // Hash uint64 stateHash = 0; @@ -523,6 +19,12 @@ uint64 MetalPipelineCache::CalculateRenderPipelineHash(const LatteFetchShader* f stateHash += textureView->GetRGBAView()->pixelFormat() + i * 31; stateHash = std::rotl(stateHash, 7); + + if (activeFBO->colorBuffer[i].texture) + { + stateHash += 1; + stateHash = std::rotl(stateHash, 1); + } } if (lastUsedFBO->depthBuffer.texture) @@ -530,6 +32,12 @@ uint64 MetalPipelineCache::CalculateRenderPipelineHash(const LatteFetchShader* f auto textureView = static_cast(lastUsedFBO->depthBuffer.texture); stateHash += textureView->GetRGBAView()->pixelFormat(); stateHash = std::rotl(stateHash, 7); + + if (activeFBO->depthBuffer.texture) + { + stateHash += 1; + stateHash = std::rotl(stateHash, 1); + } } for (auto& group : fetchShader->bufferGroups) @@ -586,55 +94,38 @@ uint64 MetalPipelineCache::CalculateRenderPipelineHash(const LatteFetchShader* f } } + // Mesh pipeline + const LattePrimitiveMode primitiveMode = static_cast(LatteGPUState.contextRegister[mmVGT_PRIMITIVE_TYPE]); + bool isPrimitiveRect = (primitiveMode == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::RECTS); + + bool usesGeometryShader = (geometryShader != nullptr || isPrimitiveRect); + + if (usesGeometryShader) + { + stateHash += lcr.GetRawView()[mmVGT_PRIMITIVE_TYPE]; + stateHash = std::rotl(stateHash, 7); + } + return stateHash; } -void MetalPipelineCache::TryLoadBinaryArchive() +MetalPipelineCache::~MetalPipelineCache() { - if (m_binaryArchive || s_cacheTitleId == INVALID_TITLE_ID) - return; - - // GPU name - const char* deviceName1 = m_mtlr->GetDevice()->name()->utf8String(); - std::string deviceName; - deviceName.assign(deviceName1); - - // Replace spaces with underscores - for (auto& c : deviceName) + for (auto& [key, value] : m_pipelineCache) { - if (c == ' ') - c = '_'; + value->release(); } - - // OS version - auto osVersion = NS::ProcessInfo::processInfo()->operatingSystemVersion(); - - // Precompiled binaries cannot be shared between different devices or OS versions - const std::string cacheFilename = fmt::format("{:016x}_mtl_pipelines.bin", s_cacheTitleId); - const fs::path cachePath = ActiveSettings::GetCachePath("shaderCache/precompiled/{}/{}-{}-{}/{}", deviceName, osVersion.majorVersion, osVersion.minorVersion, osVersion.patchVersion, cacheFilename); - - // Create the directory if it doesn't exist - std::filesystem::create_directories(cachePath.parent_path()); - - m_binaryArchiveURL = NS::URL::fileURLWithPath(ToNSString((const char*)cachePath.generic_u8string().c_str())); - - MTL::BinaryArchiveDescriptor* desc = MTL::BinaryArchiveDescriptor::alloc()->init(); - desc->setUrl(m_binaryArchiveURL); - - NS::Error* error = nullptr; - m_binaryArchive = m_mtlr->GetDevice()->newBinaryArchive(desc, &error); - if (error) - { - desc->setUrl(nullptr); - - error->release(); - error = nullptr; - m_binaryArchive = m_mtlr->GetDevice()->newBinaryArchive(desc, &error); - if (error) - { - cemuLog_log(LogType::Force, "failed to create binary archive: {}", error->localizedDescription()->utf8String()); - error->release(); - } - } - desc->release(); +} + +MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* lastUsedFBO, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr) +{ + auto& pipeline = m_pipelineCache[CalculatePipelineHash(fetchShader, vertexShader, geometryShader, pixelShader, lastUsedFBO, activeFBO, lcr)]; + if (pipeline) + return pipeline; + + MetalPipelineCompiler compiler(m_mtlr); + compiler.InitFromState(fetchShader, vertexShader, geometryShader, pixelShader, lastUsedFBO, activeFBO, lcr); + pipeline = compiler.Compile(false, true); + + return pipeline; } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h index 916a9072..18b163f6 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h @@ -1,24 +1,17 @@ #pragma once -#include - -#include "HW/Latte/ISA/LatteReg.h" -#include "HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h" -#include "Cafe/HW/Latte/Renderer/Renderer.h" +#include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h" +// TODO: binary archives class MetalPipelineCache { public: - static void ShaderCacheLoading_begin(uint64 cacheTitleId); - static void ShaderCacheLoading_end(); - static void ShaderCacheLoading_Close(); + static uint64 CalculatePipelineHash(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* lastUsedFBO, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr); MetalPipelineCache(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer} {} ~MetalPipelineCache(); - MTL::RenderPipelineState* GetRenderPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* lastUsedFBO, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr); - - MTL::RenderPipelineState* GetMeshPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* lastUsedFBO, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr, Renderer::INDEX_TYPE hostIndexType); + MTL::RenderPipelineState* GetRenderPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* lastUsedFBO, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr); // Debug size_t GetPipelineCacheSize() const { return m_pipelineCache.size(); } @@ -27,11 +20,4 @@ private: class MetalRenderer* m_mtlr; std::map m_pipelineCache; - - NS::URL* m_binaryArchiveURL; - MTL::BinaryArchive* m_binaryArchive; - - uint64 CalculateRenderPipelineHash(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* lastUsedFBO, const LatteContextRegister& lcr); - - void TryLoadBinaryArchive(); }; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp new file mode 100644 index 00000000..9eb29cb6 --- /dev/null +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp @@ -0,0 +1,596 @@ +#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h" +#include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h" +#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" +#include "Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h" +#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h" +#include "Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h" +#include "Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h" + +#include "Cafe/HW/Latte/Core/FetchShader.h" +#include "Cafe/HW/Latte/ISA/RegDefines.h" +#include "Cafe/HW/Latte/Core/LatteConst.h" +#include "Cafe/HW/Latte/Core/LatteShader.h" + +static void rectsEmulationGS_outputSingleVertex(std::string& gsSrc, const LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, sint32 vIdx, const LatteContextRegister& latteRegister) +{ + auto parameterMask = vertexShader->outputParameterMask; + for (uint32 i = 0; i < 32; i++) + { + if ((parameterMask & (1 << i)) == 0) + continue; + sint32 vsSemanticId = psInputTable->getVertexShaderOutParamSemanticId(latteRegister.GetRawView(), i); + if (vsSemanticId < 0) + continue; + // make sure PS has matching input + if (!psInputTable->hasPSImportForSemanticId(vsSemanticId)) + continue; + gsSrc.append(fmt::format("out.passParameterSem{} = objectPayload.vertexOut[{}].passParameterSem{};\r\n", vsSemanticId, vIdx, vsSemanticId)); + } + gsSrc.append(fmt::format("out.position = objectPayload.vertexOut[{}].position;\r\n", vIdx)); + gsSrc.append(fmt::format("mesh.set_vertex({}, out);\r\n", vIdx)); +} + +static void rectsEmulationGS_outputGeneratedVertex(std::string& gsSrc, const LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, const char* variant, const LatteContextRegister& latteRegister) +{ + auto parameterMask = vertexShader->outputParameterMask; + for (uint32 i = 0; i < 32; i++) + { + if ((parameterMask & (1 << i)) == 0) + continue; + sint32 vsSemanticId = psInputTable->getVertexShaderOutParamSemanticId(latteRegister.GetRawView(), i); + if (vsSemanticId < 0) + continue; + // make sure PS has matching input + if (!psInputTable->hasPSImportForSemanticId(vsSemanticId)) + continue; + gsSrc.append(fmt::format("out.passParameterSem{} = gen4thVertex{}(objectPayload.vertexOut[0].passParameterSem{}, objectPayload.vertexOut[1].passParameterSem{}, objectPayload.vertexOut[2].passParameterSem{});\r\n", vsSemanticId, variant, vsSemanticId, vsSemanticId, vsSemanticId)); + } + gsSrc.append(fmt::format("out.position = gen4thVertex{}(objectPayload.vertexOut[0].position, objectPayload.vertexOut[1].position, objectPayload.vertexOut[2].position);\r\n", variant)); + gsSrc.append(fmt::format("mesh.set_vertex(3, out);\r\n")); +} + +static void rectsEmulationGS_outputVerticesCode(std::string& gsSrc, const LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, sint32 p0, sint32 p1, sint32 p2, sint32 p3, const char* variant, const LatteContextRegister& latteRegister) +{ + sint32 pList[4] = { p0, p1, p2, p3 }; + for (sint32 i = 0; i < 4; i++) + { + if (pList[i] == 3) + rectsEmulationGS_outputGeneratedVertex(gsSrc, vertexShader, psInputTable, variant, latteRegister); + else + rectsEmulationGS_outputSingleVertex(gsSrc, vertexShader, psInputTable, pList[i], latteRegister); + } + gsSrc.append(fmt::format("mesh.set_index(0, {});\r\n", pList[0])); + gsSrc.append(fmt::format("mesh.set_index(1, {});\r\n", pList[1])); + gsSrc.append(fmt::format("mesh.set_index(2, {});\r\n", pList[2])); + gsSrc.append(fmt::format("mesh.set_index(3, {});\r\n", pList[1])); + gsSrc.append(fmt::format("mesh.set_index(4, {});\r\n", pList[2])); + gsSrc.append(fmt::format("mesh.set_index(5, {});\r\n", pList[3])); +} + +static RendererShaderMtl* rectsEmulationGS_generate(MetalRenderer* metalRenderer, const LatteDecompilerShader* vertexShader, const LatteContextRegister& latteRegister) +{ + std::string gsSrc; + gsSrc.append("#include \r\n"); + gsSrc.append("using namespace metal;\r\n"); + + LatteShaderPSInputTable* psInputTable = LatteSHRC_GetPSInputTable(); + + // inputs & outputs + std::string vertexOutDefinition = "struct VertexOut {\r\n"; + vertexOutDefinition += "float4 position;\r\n"; + std::string geometryOutDefinition = "struct GeometryOut {\r\n"; + geometryOutDefinition += "float4 position [[position]];\r\n"; + auto parameterMask = vertexShader->outputParameterMask; + for (sint32 f = 0; f < 2; f++) + { + for (uint32 i = 0; i < 32; i++) + { + if ((parameterMask & (1 << i)) == 0) + continue; + sint32 vsSemanticId = psInputTable->getVertexShaderOutParamSemanticId(latteRegister.GetRawView(), i); + if (vsSemanticId < 0) + continue; + auto psImport = psInputTable->getPSImportBySemanticId(vsSemanticId); + if (psImport == nullptr) + continue; + + if (f == 0) + { + vertexOutDefinition += fmt::format("float4 passParameterSem{};\r\n", vsSemanticId); + } + else + { + geometryOutDefinition += fmt::format("float4 passParameterSem{}", vsSemanticId); + + geometryOutDefinition += fmt::format(" [[user(locn{})]]", psInputTable->getPSImportLocationBySemanticId(vsSemanticId)); + if (psImport->isFlat) + geometryOutDefinition += " [[flat]]"; + if (psImport->isNoPerspective) + geometryOutDefinition += " [[center_no_perspective]]"; + geometryOutDefinition += ";\r\n"; + } + } + } + vertexOutDefinition += "};\r\n"; + geometryOutDefinition += "};\r\n"; + + gsSrc.append(vertexOutDefinition); + gsSrc.append(geometryOutDefinition); + + gsSrc.append("struct ObjectPayload {\r\n"); + gsSrc.append("VertexOut vertexOut[3];\r\n"); + gsSrc.append("};\r\n"); + + // gen function + gsSrc.append("float4 gen4thVertexA(float4 a, float4 b, float4 c)\r\n"); + gsSrc.append("{\r\n"); + gsSrc.append("return b - (c - a);\r\n"); + gsSrc.append("}\r\n"); + + gsSrc.append("float4 gen4thVertexB(float4 a, float4 b, float4 c)\r\n"); + gsSrc.append("{\r\n"); + gsSrc.append("return c - (b - a);\r\n"); + gsSrc.append("}\r\n"); + + gsSrc.append("float4 gen4thVertexC(float4 a, float4 b, float4 c)\r\n"); + gsSrc.append("{\r\n"); + gsSrc.append("return c + (b - a);\r\n"); + gsSrc.append("}\r\n"); + + // main + gsSrc.append("using MeshType = mesh;\r\n"); + gsSrc.append("[[mesh, max_total_threads_per_threadgroup(1)]]\r\n"); + gsSrc.append("void main0(MeshType mesh, const object_data ObjectPayload& objectPayload [[payload]])\r\n"); + gsSrc.append("{\r\n"); + gsSrc.append("GeometryOut out;\r\n"); + + // there are two possible winding orders that need different triangle generation: + // 0 1 + // 2 3 + // and + // 0 1 + // 3 2 + // all others are just symmetries of these cases + + // we can determine the case by comparing the distance 0<->1 and 0<->2 + + gsSrc.append("float dist0_1 = length(objectPayload.vertexOut[1].position.xy - objectPayload.vertexOut[0].position.xy);\r\n"); + gsSrc.append("float dist0_2 = length(objectPayload.vertexOut[2].position.xy - objectPayload.vertexOut[0].position.xy);\r\n"); + gsSrc.append("float dist1_2 = length(objectPayload.vertexOut[2].position.xy - objectPayload.vertexOut[1].position.xy);\r\n"); + + // emit vertices + gsSrc.append("if(dist0_1 > dist0_2 && dist0_1 > dist1_2)\r\n"); + gsSrc.append("{\r\n"); + // p0 to p1 is diagonal + rectsEmulationGS_outputVerticesCode(gsSrc, vertexShader, psInputTable, 2, 1, 0, 3, "A", latteRegister); + gsSrc.append("} else if ( dist0_2 > dist0_1 && dist0_2 > dist1_2 ) {\r\n"); + // p0 to p2 is diagonal + rectsEmulationGS_outputVerticesCode(gsSrc, vertexShader, psInputTable, 1, 2, 0, 3, "B", latteRegister); + gsSrc.append("} else {\r\n"); + // p1 to p2 is diagonal + rectsEmulationGS_outputVerticesCode(gsSrc, vertexShader, psInputTable, 0, 1, 2, 3, "C", latteRegister); + gsSrc.append("}\r\n"); + + gsSrc.append("mesh.set_primitive_count(2);\r\n"); + + gsSrc.append("}\r\n"); + + auto mtlShader = new RendererShaderMtl(metalRenderer, RendererShader::ShaderType::kGeometry, 0, 0, false, false, gsSrc); + mtlShader->PreponeCompilation(true); + + return mtlShader; +} + +#define INVALID_TITLE_ID 0xFFFFFFFFFFFFFFFF + +uint64 s_cacheTitleId = INVALID_TITLE_ID; + +extern std::atomic_int g_compiled_shaders_total; +extern std::atomic_int g_compiled_shaders_async; + +template +void SetFragmentState(T* desc, CachedFBOMtl* lastUsedFBO, CachedFBOMtl* activeFBO, const LatteDecompilerShader* pixelShader, const LatteContextRegister& lcr) +{ + // Rasterization + bool rasterizationEnabled = !lcr.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL(); + + // HACK + // TODO: include this in the hash? + if (!lcr.PA_CL_VTE_CNTL.get_VPORT_X_OFFSET_ENA()) + rasterizationEnabled = true; + + // Culling both front and back faces effectively disables rasterization + const auto& polygonControlReg = lcr.PA_SU_SC_MODE_CNTL; + uint32 cullFront = polygonControlReg.get_CULL_FRONT(); + uint32 cullBack = polygonControlReg.get_CULL_BACK(); + if (cullFront && cullBack) + rasterizationEnabled = false; + + auto pixelShaderMtl = static_cast(pixelShader->shader); + + if (!rasterizationEnabled || !pixelShaderMtl) + { + desc->setRasterizationEnabled(false); + return; + } + + desc->setFragmentFunction(pixelShaderMtl->GetFunction()); + + // Color attachments + const Latte::LATTE_CB_COLOR_CONTROL& colorControlReg = lcr.CB_COLOR_CONTROL; + uint32 blendEnableMask = colorControlReg.get_BLEND_MASK(); + uint32 renderTargetMask = lcr.CB_TARGET_MASK.get_MASK(); + for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++) + { + const auto& colorBuffer = lastUsedFBO->colorBuffer[i]; + auto texture = static_cast(colorBuffer.texture); + if (!texture) + { + continue; + } + auto colorAttachment = desc->colorAttachments()->object(i); + colorAttachment->setPixelFormat(texture->GetRGBAView()->pixelFormat()); + + // Disable writes if not in the active FBO + if (!activeFBO->colorBuffer[i].texture) + { + colorAttachment->setWriteMask(MTL::ColorWriteMaskNone); + continue; + } + + colorAttachment->setWriteMask(GetMtlColorWriteMask((renderTargetMask >> (i * 4)) & 0xF)); + + // Blending + bool blendEnabled = ((blendEnableMask & (1 << i))) != 0; + // Only float data type is blendable + if (blendEnabled && GetMtlPixelFormatInfo(texture->format, false).dataType == MetalDataType::FLOAT) + { + colorAttachment->setBlendingEnabled(true); + + const auto& blendControlReg = lcr.CB_BLENDN_CONTROL[i]; + + auto rgbBlendOp = GetMtlBlendOp(blendControlReg.get_COLOR_COMB_FCN()); + auto srcRgbBlendFactor = GetMtlBlendFactor(blendControlReg.get_COLOR_SRCBLEND()); + auto dstRgbBlendFactor = GetMtlBlendFactor(blendControlReg.get_COLOR_DSTBLEND()); + + colorAttachment->setRgbBlendOperation(rgbBlendOp); + colorAttachment->setSourceRGBBlendFactor(srcRgbBlendFactor); + colorAttachment->setDestinationRGBBlendFactor(dstRgbBlendFactor); + if (blendControlReg.get_SEPARATE_ALPHA_BLEND()) + { + colorAttachment->setAlphaBlendOperation(GetMtlBlendOp(blendControlReg.get_ALPHA_COMB_FCN())); + colorAttachment->setSourceAlphaBlendFactor(GetMtlBlendFactor(blendControlReg.get_ALPHA_SRCBLEND())); + colorAttachment->setDestinationAlphaBlendFactor(GetMtlBlendFactor(blendControlReg.get_ALPHA_DSTBLEND())); + } + else + { + colorAttachment->setAlphaBlendOperation(rgbBlendOp); + colorAttachment->setSourceAlphaBlendFactor(srcRgbBlendFactor); + colorAttachment->setDestinationAlphaBlendFactor(dstRgbBlendFactor); + } + } + } + + // Depth stencil attachment + if (lastUsedFBO->depthBuffer.texture) + { + auto texture = static_cast(lastUsedFBO->depthBuffer.texture); + desc->setDepthAttachmentPixelFormat(texture->GetRGBAView()->pixelFormat()); + if (lastUsedFBO->depthBuffer.hasStencil) + { + desc->setStencilAttachmentPixelFormat(texture->GetRGBAView()->pixelFormat()); + } + } +} + +MetalPipelineCompiler::~MetalPipelineCompiler() +{ + /* + for (auto& pair : m_pipelineCache) + { + pair.second->release(); + } + m_pipelineCache.clear(); + + NS::Error* error = nullptr; + m_binaryArchive->serializeToURL(m_binaryArchiveURL, &error); + if (error) + { + cemuLog_log(LogType::Force, "error serializing binary archive: {}", error->localizedDescription()->utf8String()); + error->release(); + } + m_binaryArchive->release(); + + m_binaryArchiveURL->release(); + */ + m_pipelineDescriptor->release(); +} + +void MetalPipelineCompiler::InitFromState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* lastUsedFBO, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr) +{ + const LattePrimitiveMode primitiveMode = static_cast(LatteGPUState.contextRegister[mmVGT_PRIMITIVE_TYPE]); + bool isPrimitiveRect = (primitiveMode == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::RECTS); + + m_usesGeometryShader = (geometryShader != nullptr || isPrimitiveRect); + + if (m_usesGeometryShader) + InitFromStateMesh(fetchShader, vertexShader, geometryShader, pixelShader, lastUsedFBO, activeFBO, lcr); + else + InitFromStateRender(fetchShader, vertexShader, pixelShader, lastUsedFBO, activeFBO, lcr); +} + +MTL::RenderPipelineState* MetalPipelineCompiler::Compile(bool forceCompile, bool isRenderThread) +{ + if (m_usesGeometryShader) + { + auto desc = static_cast(m_pipelineDescriptor); + + NS::Error* error = nullptr; +#ifdef CEMU_DEBUG_ASSERT + desc->setLabel(GetLabel("Mesh render pipeline state", desc)); +#endif + MTL::RenderPipelineState* pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, MTL::PipelineOptionNone, nullptr, &error); + desc->release(); + if (error) + { + cemuLog_log(LogType::Force, "error creating mesh render pipeline state: {}", error->localizedDescription()->utf8String()); + error->release(); + } + + return pipeline; + } + else + { + auto desc = static_cast(m_pipelineDescriptor); + + NS::Error* error = nullptr; +#ifdef CEMU_DEBUG_ASSERT + desc->setLabel(GetLabel("Render pipeline state", desc)); +#endif + MTL::RenderPipelineState* pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, MTL::PipelineOptionNone, nullptr, &error); + if (error) + { + cemuLog_log(LogType::Force, "error creating render pipeline state: {}", error->localizedDescription()->utf8String()); + error->release(); + } + + return pipeline; + } +} + +void MetalPipelineCompiler::InitFromStateRender(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* pixelShader, CachedFBOMtl* lastUsedFBO, CachedFBOMtl* activeFBO, const LatteContextRegister& lcr) +{ + // Shaders + auto vertexShaderMtl = static_cast(vertexShader->shader); + + // Render pipeline state + MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init(); + desc->setVertexFunction(vertexShaderMtl->GetFunction()); + + // Vertex descriptor + if (!fetchShader->mtlFetchVertexManually) + { + MTL::VertexDescriptor* vertexDescriptor = MTL::VertexDescriptor::alloc()->init(); + for (auto& bufferGroup : fetchShader->bufferGroups) + { + std::optional fetchType; + + uint32 minBufferStride = 0; + for (sint32 j = 0; j < bufferGroup.attribCount; ++j) + { + auto& attr = bufferGroup.attrib[j]; + + uint32 semanticId = vertexShader->resourceMapping.attributeMapping[attr.semanticId]; + if (semanticId == (uint32)-1) + continue; // attribute not used? + + auto attribute = vertexDescriptor->attributes()->object(semanticId); + attribute->setOffset(attr.offset); + attribute->setBufferIndex(GET_MTL_VERTEX_BUFFER_INDEX(attr.attributeBufferIndex)); + attribute->setFormat(GetMtlVertexFormat(attr.format)); + + minBufferStride = std::max(minBufferStride, attr.offset + GetMtlVertexFormatSize(attr.format)); + + if (fetchType.has_value()) + cemu_assert_debug(fetchType == attr.fetchType); + else + fetchType = attr.fetchType; + + if (attr.fetchType == LatteConst::INSTANCE_DATA) + { + cemu_assert_debug(attr.aluDivisor == 1); // other divisor not yet supported + } + } + + uint32 bufferIndex = bufferGroup.attributeBufferIndex; + uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7; + uint32 bufferStride = (lcr.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF; + + auto layout = vertexDescriptor->layouts()->object(GET_MTL_VERTEX_BUFFER_INDEX(bufferIndex)); + if (bufferStride == 0) + { + // Buffer stride cannot be zero, let's use the minimum stride + bufferStride = minBufferStride; + + // Additionally, constant vertex function must be used + layout->setStepFunction(MTL::VertexStepFunctionConstant); + layout->setStepRate(0); + } + else + { + if (!fetchType.has_value() || fetchType == LatteConst::VertexFetchType2::VERTEX_DATA) + layout->setStepFunction(MTL::VertexStepFunctionPerVertex); + else if (fetchType == LatteConst::VertexFetchType2::INSTANCE_DATA) + layout->setStepFunction(MTL::VertexStepFunctionPerInstance); + else + { + debug_printf("unimplemented vertex fetch type %u\n", (uint32)fetchType.value()); + cemu_assert(false); + } + } + bufferStride = Align(bufferStride, 4); + layout->setStride(bufferStride); + } + + // TODO: don't always set the vertex descriptor? + desc->setVertexDescriptor(vertexDescriptor); + vertexDescriptor->release(); + } + + SetFragmentState(desc, lastUsedFBO, activeFBO, pixelShader, lcr); + + m_pipelineDescriptor = desc; + + //TryLoadBinaryArchive(); + + // Load binary + /* + if (m_binaryArchive) + { + NS::Object* binArchives[] = {m_binaryArchive}; + auto binaryArchives = NS::Array::alloc()->init(binArchives, 1); + desc->setBinaryArchives(binaryArchives); + binaryArchives->release(); + } + */ + + /* + NS::Error* error = nullptr; +#ifdef CEMU_DEBUG_ASSERT + desc->setLabel(GetLabel("Cached render pipeline state", desc)); +#endif + pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, MTL::PipelineOptionFailOnBinaryArchiveMiss, nullptr, &error); + + // Pipeline wasn't found in the binary archive, we need to compile it + if (error) + { + desc->setBinaryArchives(nullptr); + + error->release(); + error = nullptr; +#ifdef CEMU_DEBUG_ASSERT + desc->setLabel(GetLabel("New render pipeline state", desc)); +#endif + pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, &error); + if (error) + { + cemuLog_log(LogType::Force, "error creating render pipeline state: {}", error->localizedDescription()->utf8String()); + error->release(); + } + else + { + // Save binary + if (m_binaryArchive) + { + NS::Error* error = nullptr; + m_binaryArchive->addRenderPipelineFunctions(desc, &error); + if (error) + { + cemuLog_log(LogType::Force, "error saving render pipeline functions: {}", error->localizedDescription()->utf8String()); + error->release(); + } + } + } + } + desc->release(); + + return pipeline; + */ +} + +void MetalPipelineCompiler::InitFromStateMesh(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, CachedFBOMtl* lastUsedFBO, CachedFBOMtl* activeFBO, const LatteContextRegister& lcr) +{ + auto objectShaderMtl = static_cast(vertexShader->shader); + RendererShaderMtl* meshShaderMtl; + if (geometryShader) + { + meshShaderMtl = static_cast(geometryShader->shader); + } + else + { + // If there is no geometry shader, it means that we are emulating rects + meshShaderMtl = rectsEmulationGS_generate(m_mtlr, vertexShader, lcr); + } + + // Render pipeline state + MTL::MeshRenderPipelineDescriptor* desc = MTL::MeshRenderPipelineDescriptor::alloc()->init(); + desc->setObjectFunction(objectShaderMtl->GetFunction()); + desc->setMeshFunction(meshShaderMtl->GetFunction()); + + SetFragmentState(desc, lastUsedFBO, activeFBO, pixelShader, lcr); + + m_pipelineDescriptor = desc; + + //TryLoadBinaryArchive(); + + // Load binary + // TODO: no binary archives? :( + + /* + NS::Error* error = nullptr; +#ifdef CEMU_DEBUG_ASSERT + desc->setLabel(GetLabel("Mesh pipeline state", desc)); +#endif + pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, MTL::PipelineOptionNone, nullptr, &error); + desc->release(); + if (error) + { + cemuLog_log(LogType::Force, "error creating mesh render pipeline state: {}", error->localizedDescription()->utf8String()); + error->release(); + } + + return pipeline; + */ +} + +/* +void MetalPipelineCache::TryLoadBinaryArchive() +{ + if (m_binaryArchive || s_cacheTitleId == INVALID_TITLE_ID) + return; + + // GPU name + const char* deviceName1 = m_mtlr->GetDevice()->name()->utf8String(); + std::string deviceName; + deviceName.assign(deviceName1); + + // Replace spaces with underscores + for (auto& c : deviceName) + { + if (c == ' ') + c = '_'; + } + + // OS version + auto osVersion = NS::ProcessInfo::processInfo()->operatingSystemVersion(); + + // Precompiled binaries cannot be shared between different devices or OS versions + const std::string cacheFilename = fmt::format("{:016x}_mtl_pipelines.bin", s_cacheTitleId); + const fs::path cachePath = ActiveSettings::GetCachePath("shaderCache/precompiled/{}/{}-{}-{}/{}", deviceName, osVersion.majorVersion, osVersion.minorVersion, osVersion.patchVersion, cacheFilename); + + // Create the directory if it doesn't exist + std::filesystem::create_directories(cachePath.parent_path()); + + m_binaryArchiveURL = NS::URL::fileURLWithPath(ToNSString((const char*)cachePath.generic_u8string().c_str())); + + MTL::BinaryArchiveDescriptor* desc = MTL::BinaryArchiveDescriptor::alloc()->init(); + desc->setUrl(m_binaryArchiveURL); + + NS::Error* error = nullptr; + m_binaryArchive = m_mtlr->GetDevice()->newBinaryArchive(desc, &error); + if (error) + { + desc->setUrl(nullptr); + + error->release(); + error = nullptr; + m_binaryArchive = m_mtlr->GetDevice()->newBinaryArchive(desc, &error); + if (error) + { + cemuLog_log(LogType::Force, "failed to create binary archive: {}", error->localizedDescription()->utf8String()); + error->release(); + } + } + desc->release(); +} +*/ diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h new file mode 100644 index 00000000..282c174d --- /dev/null +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h @@ -0,0 +1,38 @@ +#pragma once + +#include + +#include "Foundation/NSObject.hpp" +#include "HW/Latte/ISA/LatteReg.h" +#include "HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h" +#include "Cafe/HW/Latte/Renderer/Renderer.h" + +class MetalPipelineCompiler +{ +public: + MetalPipelineCompiler(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer} {} + ~MetalPipelineCompiler(); + + void InitFromState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* lastUsedFBO, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr); + + MTL::RenderPipelineState* Compile(bool forceCompile, bool isRenderThread); + +private: + class MetalRenderer* m_mtlr; + + bool m_usesGeometryShader; + + /* + std::map m_pipelineCache; + + NS::URL* m_binaryArchiveURL; + MTL::BinaryArchive* m_binaryArchive; + */ + NS::Object* m_pipelineDescriptor; + + void InitFromStateRender(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* lastUsedFBO, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr); + + void InitFromStateMesh(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* lastUsedFBO, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr); + + //void TryLoadBinaryArchive(); +}; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index 7cd85857..76ed4c55 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -1222,12 +1222,7 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 //} // Render pipeline state - MTL::RenderPipelineState* renderPipelineState; - if (usesGeometryShader) - renderPipelineState = m_pipelineCache->GetMeshPipelineState(fetchShader, vertexShader, geometryShader, pixelShader, m_state.m_lastUsedFBO, m_state.m_activeFBO, LatteGPUState.contextNew, hostIndexType); - else - renderPipelineState = m_pipelineCache->GetRenderPipelineState(fetchShader, vertexShader, pixelShader, m_state.m_lastUsedFBO, m_state.m_activeFBO, LatteGPUState.contextNew); - + MTL::RenderPipelineState* renderPipelineState = m_pipelineCache->GetRenderPipelineState(fetchShader, vertexShader, geometryShader, pixelShader, m_state.m_lastUsedFBO, m_state.m_activeFBO, LatteGPUState.contextNew); if (!renderPipelineState) return;