From 953975f5ecdef8218453152ddb397f523bee9aba Mon Sep 17 00:00:00 2001 From: Samuliak Date: Tue, 3 Sep 2024 13:59:52 +0200 Subject: [PATCH 1/8] don't jit compile vertex shaders --- src/Cafe/HW/Latte/Core/FetchShader.cpp | 15 +- .../LegacyShaderDecompiler/LatteDecompiler.h | 1 + .../LatteDecompilerAnalyzer.cpp | 1 + .../LatteDecompilerEmitMSL.cpp | 127 +++++++++++++++- .../LatteDecompilerEmitMSLHeader.hpp | 4 + .../Renderer/Metal/MetalPipelineCache.cpp | 8 - .../HW/Latte/Renderer/Metal/MetalRenderer.cpp | 4 +- .../Renderer/Metal/RendererShaderMtl.cpp | 142 +----------------- .../Latte/Renderer/Metal/RendererShaderMtl.h | 6 - 9 files changed, 146 insertions(+), 162 deletions(-) diff --git a/src/Cafe/HW/Latte/Core/FetchShader.cpp b/src/Cafe/HW/Latte/Core/FetchShader.cpp index 6c9893f9..272b7c0b 100644 --- a/src/Cafe/HW/Latte/Core/FetchShader.cpp +++ b/src/Cafe/HW/Latte/Core/FetchShader.cpp @@ -8,6 +8,7 @@ #include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInstructions.h" #include "Cafe/HW/Latte/Core/FetchShader.h" #include "Cafe/HW/Latte/ISA/LatteInstructions.h" +#include "HW/Latte/Renderer/Renderer.h" #include "util/containers/LookupTableL3.h" #include "util/helpers/fspinlock.h" #include /* SHA1_DIGEST_LENGTH */ @@ -107,6 +108,14 @@ void LatteShader_calculateFSKey(LatteFetchShader* fetchShader) key += (uint64)(attrib->offset & 3); key = std::rotl(key, 2); } + + // TODO: also check if geometry shader is used + if (g_renderer->GetType() == RendererAPI::Metal) + { + key += (uint64)group.attributeBufferIndex; + key = std::rotl(key, 5); + // TODO: hash the stride as well + } } // todo - also hash invalid buffer groups? fetchShader->key = key; @@ -161,7 +170,7 @@ void _fetchShaderDecompiler_parseInstruction_VTX_SEMANTIC(LatteFetchShader* pars auto nfa = instr->getField_NUM_FORMAT_ALL(); bool isSigned = instr->getField_FORMAT_COMP_ALL() == LatteClauseInstruction_VTX::FORMAT_COMP::COMP_SIGNED; auto endianSwap = instr->getField_ENDIAN_SWAP(); - + // get buffer cemu_assert_debug(bufferId >= 0xA0 && bufferId < 0xB0); uint32 bufferIndex = (bufferId - 0xA0); @@ -316,7 +325,7 @@ LatteFetchShader* LatteShaderRecompiler_createFetchShader(LatteFetchShader::Cach // {0x00000002, 0x01800c00, 0x00000000, 0x8a000000, 0x2c00a001, 0x2c151000, 0x000a0000, ...} // size 0x50 // {0x00000002, 0x01801000, 0x00000000, 0x8a000000, 0x1c00a001, 0x280d1000, 0x00090000, ...} // size 0x60 // {0x00000002, 0x01801c00, 0x00000000, 0x8a000000, 0x1c00a001, 0x280d1000, 0x00090000, ...} // size 0x90 - + // our new implementation: // {0x00000002, 0x01800400, 0x00000000, 0x8a000000, 0x0000a001, 0x2c151000, 0x00020000, ...} @@ -411,7 +420,7 @@ LatteFetchShader::~LatteFetchShader() UnregisterInCache(); } -struct FetchShaderLookupInfo +struct FetchShaderLookupInfo { LatteFetchShader* fetchShader; uint32 programSize; diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h index 5d8b2c6f..29e65c58 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h @@ -65,6 +65,7 @@ struct LatteDecompilerShaderResourceMapping sint8 attributeMapping[LATTE_NUM_MAX_ATTRIBUTE_LOCATIONS]; // Metal exclusive sint8 indexBufferBinding{-1}; + sint8 indexTypeBinding{-1}; sint32 getTextureCount() { diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp index 9a3db895..ec3d8aa7 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp @@ -1020,4 +1020,5 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD LatteDecompiler::_initUniformBindingPoints(shaderContext); LatteDecompiler::_initAttributeBindingPoints(shaderContext); shaderContext->output->resourceMappingMTL.indexBufferBinding = shaderContext->currentBufferBindingPointMTL++; + shaderContext->output->resourceMappingMTL.indexTypeBinding = shaderContext->currentBufferBindingPointMTL++; } diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp index 1c75b737..c40d97c6 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp @@ -11,6 +11,7 @@ #include "Cafe/HW/Latte/Core/FetchShader.h" #include "Cafe/HW/Latte/Renderer/Renderer.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h" +#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h" #include "config/ActiveSettings.h" #include "util/helpers/StringBuf.h" @@ -3856,6 +3857,8 @@ static void LatteDecompiler_emitAttributeImport(LatteDecompilerShaderContext* sh void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, LatteDecompilerShader* shader) { + LatteShaderSHRC_UpdateFetchShader(); + auto fetchShader = LatteSHRC_GetActiveFetchShader(); bool isRectVertexShader = (static_cast(shaderContext->contextRegisters[mmVGT_PRIMITIVE_TYPE]) == LattePrimitiveMode::RECTS); StringBuf* src = new StringBuf(1024*1024*12); // reserve 12MB for generated source (we resize-to-fit at the end) @@ -3881,9 +3884,125 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, case LatteConst::ShaderType::Vertex: if (shaderContext->options->usesGeometryShader || isRectVertexShader) { - // Defined just-in-time - // Will also modify vid in case of an indexed draw - src->add("VertexIn fetchInput(thread uint& vid VERTEX_BUFFER_DEFINITIONS);" _CRLF); + // TODO: clean this up + // Will modify vid in case of an indexed draw + + // Vertex buffers + std::string vertexBufferDefinitions = "#define VERTEX_BUFFER_DEFINITIONS "; + std::string vertexBuffers = "#define VERTEX_BUFFERS "; + std::string inputFetchDefinition = "VertexIn fetchInput(thread uint& vid, device uint* indexBuffer, uint indexType VERTEX_BUFFER_DEFINITIONS) {\n"; + + // Index buffer + inputFetchDefinition += "if (indexType == 1) // UShort\n"; + inputFetchDefinition += "vid = ((device ushort*)indexBuffer)[vid];\n"; + inputFetchDefinition += "else if (indexType == 2)\n"; + inputFetchDefinition += "vid = ((device uint*)indexBuffer)[vid]; // UInt\n"; + + inputFetchDefinition += "VertexIn in;\n"; + for (auto& bufferGroup : fetchShader->bufferGroups) + { + std::optional fetchType; + + uint32 bufferIndex = bufferGroup.attributeBufferIndex; + uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7; + uint32 bufferStride = (shaderContext->contextRegisters[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF; + + for (sint32 j = 0; j < bufferGroup.attribCount; ++j) + { + auto& attr = bufferGroup.attrib[j]; + + uint32 semanticId = shaderContext->output->resourceMappingMTL.attributeMapping[attr.semanticId]; + if (semanticId == (uint32)-1) + continue; // attribute not used? + + std::string formatName; + uint8 componentCount = 0; + switch (GetMtlVertexFormat(attr.format)) + { + case MTL::VertexFormatUChar: + formatName = "uchar"; + componentCount = 1; + break; + case MTL::VertexFormatUChar2: + formatName = "uchar2"; + componentCount = 2; + break; + case MTL::VertexFormatUChar3: + formatName = "uchar3"; + componentCount = 3; + break; + case MTL::VertexFormatUChar4: + formatName = "uchar4"; + componentCount = 4; + break; + case MTL::VertexFormatUShort: + formatName = "ushort"; + componentCount = 1; + break; + case MTL::VertexFormatUShort2: + formatName = "ushort2"; + componentCount = 2; + break; + case MTL::VertexFormatUShort3: + formatName = "ushort3"; + componentCount = 3; + break; + case MTL::VertexFormatUShort4: + formatName = "ushort4"; + componentCount = 4; + break; + case MTL::VertexFormatUInt: + formatName = "uint"; + componentCount = 1; + break; + case MTL::VertexFormatUInt2: + formatName = "uint2"; + componentCount = 2; + break; + case MTL::VertexFormatUInt3: + formatName = "uint3"; + componentCount = 3; + break; + case MTL::VertexFormatUInt4: + formatName = "uint4"; + componentCount = 4; + break; + } + + // Fetch the attribute + inputFetchDefinition += fmt::format("in.ATTRIBUTE_NAME{} = ", semanticId); + inputFetchDefinition += fmt::format("uint4(*(device {}*)", formatName); + inputFetchDefinition += fmt::format("(vertexBuffer{}", attr.attributeBufferIndex); + inputFetchDefinition += fmt::format(" + vid * {} + {})", bufferStride, attr.offset); + for (uint8 i = 0; i < (4 - componentCount); i++) + inputFetchDefinition += ", 0"; + inputFetchDefinition += ");\n"; + + if (fetchType.has_value()) + cemu_assert_debug(fetchType == attr.fetchType); + else + fetchType = attr.fetchType; + + if (attr.fetchType == LatteConst::INSTANCE_DATA) + { + cemu_assert_debug(attr.aluDivisor == 1); // other divisor not yet supported + } + } + + // TODO: fetch type + + vertexBufferDefinitions += fmt::format(", device uchar* vertexBuffer{} [[buffer({})]]", bufferIndex, GET_MTL_VERTEX_BUFFER_INDEX(bufferIndex)); + vertexBuffers += fmt::format(", vertexBuffer{}", bufferIndex); + } + + inputFetchDefinition += "return in;\n"; + inputFetchDefinition += "}\n"; + + src->add(vertexBufferDefinitions.c_str()); + src->add("\n"); + src->add(vertexBuffers.c_str()); + src->add("\n"); + src->add(inputFetchDefinition.c_str()); functionType = "[[object, max_total_threads_per_threadgroup(VERTICES_PER_VERTEX_PRIMITIVE), max_total_threadgroups_per_mesh_grid(1)]]"; outputTypeName = "void"; @@ -3916,7 +4035,7 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, // TODO: don't hardcode the instance index src->add("uint iid = 0;" _CRLF); // Fetch the input - src->add("VertexIn in = fetchInput(vid VERTEX_BUFFERS);" _CRLF); + src->add("VertexIn in = fetchInput(vid, indexBuffer, indexType VERTEX_BUFFERS);" _CRLF); // Output is defined as object payload src->add("object_data VertexOut& out = objectPayload.vertexOut[tid];" _CRLF); } diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp index 38392bdb..aed7e9f1 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp @@ -495,6 +495,10 @@ namespace LatteDecompiler src->add(", mesh_grid_properties meshGridProperties"); src->add(", uint tig [[threadgroup_position_in_grid]]"); src->add(", uint tid [[thread_index_in_threadgroup]]"); + // TODO: inly include index buffer if needed + src->addFmt(", device uint* indexBuffer [[buffer({})]]", decompilerContext->output->resourceMappingMTL.indexBufferBinding); + // TODO: use uchar? + src->addFmt(", constant uint& indexType [[buffer({})]]", decompilerContext->output->resourceMappingMTL.indexTypeBinding); src->add(" VERTEX_BUFFER_DEFINITIONS"); } else diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp index 8f7740b9..a138ec8c 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp @@ -366,13 +366,6 @@ MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const Latte auto mtlVertexShader = static_cast(vertexShader->shader); auto mtlPixelShader = static_cast(pixelShader->shader); - mtlVertexShader->CompileVertexFunction(); - // HACK - if (!mtlVertexShader->GetFunction()) - { - debug_printf("no vertex function, skipping draw\n"); - return nullptr; - } mtlPixelShader->CompileFragmentFunction(lastUsedFBO); // Render pipeline state @@ -475,7 +468,6 @@ MTL::RenderPipelineState* MetalPipelineCache::GetMeshPipelineState(const LatteFe mtlMeshShader = rectsEmulationGS_generate(m_mtlr, vertexShader, lcr); } auto mtlPixelShader = static_cast(pixelShader->shader); - mtlObjectShader->CompileObjectFunction(lcr, fetchShader, vertexShader, hostIndexType); mtlPixelShader->CompileFragmentFunction(lastUsedFBO); // Render pipeline state diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index 6ccbdb69..c40fbabb 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -916,7 +916,7 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 LatteDecompilerShader* geometryShader = LatteSHRC_GetActiveGeometryShader(); LatteDecompilerShader* pixelShader = LatteSHRC_GetActivePixelShader(); // TODO: is this even needed? Also, should go to draw_beginSequence - if (!vertexShader) + if (!vertexShader || !static_cast(vertexShader->shader)->GetFunction()) { printf("no vertex function, skipping draw\n"); return; @@ -1200,6 +1200,8 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 { if (indexBuffer) SetBuffer(renderCommandEncoder, METAL_SHADER_TYPE_OBJECT, indexBuffer, indexBufferOffset, vertexShader->resourceMapping.indexBufferBinding); + renderCommandEncoder->setObjectBytes(&hostIndexType, sizeof(hostIndexType), vertexShader->resourceMapping.indexTypeBinding); + encoderState.m_buffers[METAL_SHADER_TYPE_OBJECT][vertexShader->resourceMapping.indexTypeBinding] = {nullptr}; uint32 verticesPerPrimitive = 0; switch (primitiveMode) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp index 6ad72d87..8905ddee 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp @@ -16,7 +16,8 @@ extern std::atomic_int g_compiled_shaders_async; RendererShaderMtl::RendererShaderMtl(MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode) : RendererShader(type, baseHash, auxHash, isGameShader, isGfxPackShader), m_mtlr{mtlRenderer} { - if (type == ShaderType::kGeometry) + // TODO: don't compile fragment function just-in-time + if (type != ShaderType::kFragment) { Compile(mslCode); } @@ -36,145 +37,6 @@ RendererShaderMtl::~RendererShaderMtl() m_function->release(); } -void RendererShaderMtl::CompileObjectFunction(const LatteContextRegister& lcr, const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, Renderer::INDEX_TYPE hostIndexType) -{ - cemu_assert_debug(m_type == ShaderType::kVertex); - - std::string fullCode; - - // Vertex buffers - std::string vertexBufferDefinitions = "#define VERTEX_BUFFER_DEFINITIONS "; - std::string vertexBuffers = "#define VERTEX_BUFFERS "; - std::string inputFetchDefinition = "VertexIn fetchInput(thread uint& vid VERTEX_BUFFER_DEFINITIONS) {\n"; - - // Index buffer - if (hostIndexType != Renderer::INDEX_TYPE::NONE) - { - vertexBufferDefinitions += ", device "; - switch (hostIndexType) - { - case Renderer::INDEX_TYPE::U16: - vertexBufferDefinitions += "ushort"; - break; - case Renderer::INDEX_TYPE::U32: - vertexBufferDefinitions += "uint"; - break; - default: - cemu_assert_suspicious(); - break; - } - - vertexBufferDefinitions += fmt::format("* indexBuffer [[buffer({})]]", vertexShader->resourceMapping.indexBufferBinding); - vertexBuffers += ", indexBuffer"; - inputFetchDefinition += "vid = indexBuffer[vid];\n"; - } - - inputFetchDefinition += "VertexIn in;\n"; - for (auto& bufferGroup : fetchShader->bufferGroups) - { - std::optional fetchType; - - uint32 bufferIndex = bufferGroup.attributeBufferIndex; - uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7; - uint32 bufferStride = (lcr.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF; - - for (sint32 j = 0; j < bufferGroup.attribCount; ++j) - { - auto& attr = bufferGroup.attrib[j]; - - uint32 semanticId = vertexShader->resourceMapping.attributeMapping[attr.semanticId]; - if (semanticId == (uint32)-1) - continue; // attribute not used? - - std::string formatName; - uint8 componentCount = 0; - switch (GetMtlVertexFormat(attr.format)) - { - case MTL::VertexFormatUChar: - formatName = "uchar"; - componentCount = 1; - break; - case MTL::VertexFormatUChar2: - formatName = "uchar2"; - componentCount = 2; - break; - case MTL::VertexFormatUChar3: - formatName = "uchar3"; - componentCount = 3; - break; - case MTL::VertexFormatUChar4: - formatName = "uchar4"; - componentCount = 4; - break; - case MTL::VertexFormatUShort: - formatName = "ushort"; - componentCount = 1; - break; - case MTL::VertexFormatUShort2: - formatName = "ushort2"; - componentCount = 2; - break; - case MTL::VertexFormatUShort3: - formatName = "ushort3"; - componentCount = 3; - break; - case MTL::VertexFormatUShort4: - formatName = "ushort4"; - componentCount = 4; - break; - case MTL::VertexFormatUInt: - formatName = "uint"; - componentCount = 1; - break; - case MTL::VertexFormatUInt2: - formatName = "uint2"; - componentCount = 2; - break; - case MTL::VertexFormatUInt3: - formatName = "uint3"; - componentCount = 3; - break; - case MTL::VertexFormatUInt4: - formatName = "uint4"; - componentCount = 4; - break; - } - - // Fetch the attribute - inputFetchDefinition += fmt::format("in.ATTRIBUTE_NAME{} = ", semanticId); - inputFetchDefinition += fmt::format("uint4(*(device {}*)", formatName); - inputFetchDefinition += fmt::format("(vertexBuffer{}", attr.attributeBufferIndex); - inputFetchDefinition += fmt::format(" + vid * {} + {})", bufferStride, attr.offset); - for (uint8 i = 0; i < (4 - componentCount); i++) - inputFetchDefinition += ", 0"; - inputFetchDefinition += ");\n"; - - if (fetchType.has_value()) - cemu_assert_debug(fetchType == attr.fetchType); - else - fetchType = attr.fetchType; - - if (attr.fetchType == LatteConst::INSTANCE_DATA) - { - cemu_assert_debug(attr.aluDivisor == 1); // other divisor not yet supported - } - } - - vertexBufferDefinitions += fmt::format(", device uchar* vertexBuffer{} [[buffer({})]]", bufferIndex, GET_MTL_VERTEX_BUFFER_INDEX(bufferIndex)); - vertexBuffers += fmt::format(", vertexBuffer{}", bufferIndex); - } - - inputFetchDefinition += "return in;\n"; - inputFetchDefinition += "}\n"; - - fullCode += vertexBufferDefinitions + "\n"; - fullCode += vertexBuffers + "\n"; - fullCode += m_mslCode; - fullCode += inputFetchDefinition; - - Compile(fullCode); -} - void RendererShaderMtl::CompileFragmentFunction(CachedFBOMtl* activeFBO) { cemu_assert_debug(m_type == ShaderType::kFragment); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h index ca5a0ff9..6ae2b928 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h @@ -21,12 +21,6 @@ public: RendererShaderMtl(class MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode); virtual ~RendererShaderMtl(); - void CompileVertexFunction() - { - Compile(m_mslCode); - } - - void CompileObjectFunction(const LatteContextRegister& lcr, const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, Renderer::INDEX_TYPE hostIndexType); void CompileFragmentFunction(CachedFBOMtl* activeFBO); MTL::Function* GetFunction() const From 2ee92e53e9f254e8461c688487074cc03e6e93d9 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Tue, 3 Sep 2024 14:26:04 +0200 Subject: [PATCH 2/8] use the correct fetch shader --- .../Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp index c40d97c6..5dae2131 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp @@ -3857,8 +3857,6 @@ static void LatteDecompiler_emitAttributeImport(LatteDecompilerShaderContext* sh void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, LatteDecompilerShader* shader) { - LatteShaderSHRC_UpdateFetchShader(); - auto fetchShader = LatteSHRC_GetActiveFetchShader(); bool isRectVertexShader = (static_cast(shaderContext->contextRegisters[mmVGT_PRIMITIVE_TYPE]) == LattePrimitiveMode::RECTS); StringBuf* src = new StringBuf(1024*1024*12); // reserve 12MB for generated source (we resize-to-fit at the end) @@ -3899,7 +3897,7 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, inputFetchDefinition += "vid = ((device uint*)indexBuffer)[vid]; // UInt\n"; inputFetchDefinition += "VertexIn in;\n"; - for (auto& bufferGroup : fetchShader->bufferGroups) + for (auto& bufferGroup : shaderContext->fetchShader->bufferGroups) { std::optional fetchType; From b13ba58aad6ebc6231f14a7479d8327ccbf37036 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Wed, 4 Sep 2024 07:36:40 +0200 Subject: [PATCH 3/8] correct the object shader hash --- src/Cafe/HW/Latte/Core/FetchShader.cpp | 28 +++++++++++++------ src/Cafe/HW/Latte/Core/FetchShader.h | 7 ++++- src/Cafe/HW/Latte/Core/LatteShader.cpp | 2 ++ .../Renderer/Metal/MetalPipelineCache.cpp | 2 -- 4 files changed, 27 insertions(+), 12 deletions(-) diff --git a/src/Cafe/HW/Latte/Core/FetchShader.cpp b/src/Cafe/HW/Latte/Core/FetchShader.cpp index 272b7c0b..5933fe05 100644 --- a/src/Cafe/HW/Latte/Core/FetchShader.cpp +++ b/src/Cafe/HW/Latte/Core/FetchShader.cpp @@ -8,7 +8,6 @@ #include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInstructions.h" #include "Cafe/HW/Latte/Core/FetchShader.h" #include "Cafe/HW/Latte/ISA/LatteInstructions.h" -#include "HW/Latte/Renderer/Renderer.h" #include "util/containers/LookupTableL3.h" #include "util/helpers/fspinlock.h" #include /* SHA1_DIGEST_LENGTH */ @@ -108,14 +107,6 @@ void LatteShader_calculateFSKey(LatteFetchShader* fetchShader) key += (uint64)(attrib->offset & 3); key = std::rotl(key, 2); } - - // TODO: also check if geometry shader is used - if (g_renderer->GetType() == RendererAPI::Metal) - { - key += (uint64)group.attributeBufferIndex; - key = std::rotl(key, 5); - // TODO: hash the stride as well - } } // todo - also hash invalid buffer groups? fetchShader->key = key; @@ -155,6 +146,23 @@ void LatteFetchShader::CalculateFetchShaderVkHash() this->vkPipelineHashFragment = h; } +void LatteFetchShader::CalculateFetchShaderMtlObjectShaderHash(uint32* contextRegister) +{uint64 key = 0; + for (sint32 g = 0; g < bufferGroups.size(); g++) + { + LatteParsedFetchShaderBufferGroup_t& group = bufferGroups[g]; + uint32 bufferIndex = group.attributeBufferIndex; + uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7; + uint32 bufferStride = (contextRegister[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF; + + key += (uint64)bufferIndex; + key = std::rotl(key, 5); + key += (uint64)bufferStride; + key = std::rotl(key, 5); + } + mtlShaderHashObject = key; +} + void _fetchShaderDecompiler_parseInstruction_VTX_SEMANTIC(LatteFetchShader* parsedFetchShader, uint32* contextRegister, const LatteClauseInstruction_VTX* instr) { uint32 semanticId = instr->getFieldSEM_SEMANTIC_ID(); // location (attribute index inside shader) @@ -337,6 +345,7 @@ LatteFetchShader* LatteShaderRecompiler_createFetchShader(LatteFetchShader::Cach // these only make sense when vertex shader does not call FS? LatteShader_calculateFSKey(newFetchShader); newFetchShader->CalculateFetchShaderVkHash(); + newFetchShader->CalculateFetchShaderMtlObjectShaderHash(contextRegister); return newFetchShader; } @@ -396,6 +405,7 @@ LatteFetchShader* LatteShaderRecompiler_createFetchShader(LatteFetchShader::Cach } LatteShader_calculateFSKey(newFetchShader); newFetchShader->CalculateFetchShaderVkHash(); + newFetchShader->CalculateFetchShaderMtlObjectShaderHash(contextRegister); // register in cache // its possible that during multi-threaded shader cache loading, two identical (same hash) fetch shaders get created simultaneously diff --git a/src/Cafe/HW/Latte/Core/FetchShader.h b/src/Cafe/HW/Latte/Core/FetchShader.h index ac57714d..9aeed6bd 100644 --- a/src/Cafe/HW/Latte/Core/FetchShader.h +++ b/src/Cafe/HW/Latte/Core/FetchShader.h @@ -46,6 +46,9 @@ struct LatteFetchShader // Vulkan uint64 vkPipelineHashFragment{}; // hash of all fetch shader state that influences the Vulkan graphics pipeline + // Metal + uint64 mtlShaderHashObject{}; + // cache info CacheHash m_cacheHash{}; bool m_isRegistered{}; // if true, fetch shader is referenced by cache (RegisterInCache() succeeded) @@ -53,6 +56,8 @@ struct LatteFetchShader void CalculateFetchShaderVkHash(); + void CalculateFetchShaderMtlObjectShaderHash(uint32* contextRegister); + uint64 getVkPipelineHashFragment() const { return vkPipelineHashFragment; }; static bool isValidBufferIndex(const uint32 index) { return index < 0x10; }; @@ -69,4 +74,4 @@ struct LatteFetchShader static std::unordered_map s_fetchShaderByHash; }; -LatteFetchShader* LatteShaderRecompiler_createFetchShader(LatteFetchShader::CacheHash fsHash, uint32* contextRegister, uint32* fsProgramCode, uint32 fsProgramSize); \ No newline at end of file +LatteFetchShader* LatteShaderRecompiler_createFetchShader(LatteFetchShader::CacheHash fsHash, uint32* contextRegister, uint32* fsProgramCode, uint32 fsProgramSize); diff --git a/src/Cafe/HW/Latte/Core/LatteShader.cpp b/src/Cafe/HW/Latte/Core/LatteShader.cpp index 3ac0e9d2..d20067a6 100644 --- a/src/Cafe/HW/Latte/Core/LatteShader.cpp +++ b/src/Cafe/HW/Latte/Core/LatteShader.cpp @@ -498,6 +498,8 @@ void LatteSHRC_UpdateVSBaseHash(uint8* vertexShaderPtr, uint32 vertexShaderSize, uint64 vsHash2 = 0; _calculateShaderProgramHash(vsProgramCode, vertexShaderSize, &hashCacheVS, &vsHash1, &vsHash2); uint64 vsHash = vsHash1 + vsHash2 + _activeFetchShader->key + _activePSImportTable.key + (usesGeometryShader ? 0x1111ULL : 0ULL); + if (g_renderer->GetType() == RendererAPI::Metal && usesGeometryShader) + vsHash += _activeFetchShader->mtlShaderHashObject; uint32 tmp = LatteGPUState.contextNew.PA_CL_VTE_CNTL.getRawValue() ^ 0x43F; vsHash += tmp; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp index a138ec8c..9a99f138 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp @@ -1,7 +1,6 @@ #include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" -#include "Foundation/NSObject.hpp" #include "HW/Latte/Core/LatteShader.h" #include "HW/Latte/Renderer/Metal/CachedFBOMtl.h" #include "HW/Latte/Renderer/Metal/LatteToMtl.h" @@ -10,7 +9,6 @@ #include "HW/Latte/Core/FetchShader.h" #include "HW/Latte/ISA/RegDefines.h" -#include "Metal/MTLRenderPipeline.hpp" #include "config/ActiveSettings.h" static void rectsEmulationGS_outputSingleVertex(std::string& gsSrc, const LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, sint32 vIdx, const LatteContextRegister& latteRegister) From 8a74445a9632935d218c653377d189d607f5b9cb Mon Sep 17 00:00:00 2001 From: Samo Z Date: Wed, 4 Sep 2024 19:05:07 +0200 Subject: [PATCH 4/8] don't compile fragment shaders just-in-time --- .../LatteDecompilerEmitMSL.cpp | 12 +-- .../LatteDecompilerEmitMSLHeader.hpp | 6 +- .../HW/Latte/Renderer/Metal/MetalCommon.h | 8 +- .../Renderer/Metal/MetalPipelineCache.cpp | 2 - .../Renderer/Metal/RendererShaderMtl.cpp | 79 ++----------------- .../Latte/Renderer/Metal/RendererShaderMtl.h | 8 -- 6 files changed, 21 insertions(+), 94 deletions(-) diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp index 5dae2131..1d6ab1a4 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp @@ -3215,11 +3215,11 @@ static void _emitExportCode(LatteDecompilerShaderContext* shaderContext, LatteDe src->add(") == false) discard_fragment();" _CRLF); } // pixel color output - src->addFmt("#ifdef {}" _CRLF, GetColorAttachmentTypeStr(pixelColorOutputIndex)); - src->addFmt("out.passPixelColor{} = as_type<{}>(", pixelColorOutputIndex, GetColorAttachmentTypeStr(pixelColorOutputIndex)); + //src->addFmt("#ifdef {}" _CRLF, GetColorAttachmentTypeStr(pixelColorOutputIndex)); + src->addFmt("out.passPixelColor{} = as_type(", pixelColorOutputIndex/*, GetColorAttachmentTypeStr(pixelColorOutputIndex)*/); _emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, i); src->add(");" _CRLF); - src->add("#endif" _CRLF); + //src->add("#endif" _CRLF); if( cfInstruction->exportArrayBase+i >= 8 ) cemu_assert_unimplemented(); @@ -3883,12 +3883,12 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, if (shaderContext->options->usesGeometryShader || isRectVertexShader) { // TODO: clean this up - // Will modify vid in case of an indexed draw + // fetchVertex will modify vid in case of an indexed draw // Vertex buffers std::string vertexBufferDefinitions = "#define VERTEX_BUFFER_DEFINITIONS "; std::string vertexBuffers = "#define VERTEX_BUFFERS "; - std::string inputFetchDefinition = "VertexIn fetchInput(thread uint& vid, device uint* indexBuffer, uint indexType VERTEX_BUFFER_DEFINITIONS) {\n"; + std::string inputFetchDefinition = "VertexIn fetchVertex(thread uint& vid, device uint* indexBuffer, uint indexType VERTEX_BUFFER_DEFINITIONS) {\n"; // Index buffer inputFetchDefinition += "if (indexType == 1) // UShort\n"; @@ -4033,7 +4033,7 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, // TODO: don't hardcode the instance index src->add("uint iid = 0;" _CRLF); // Fetch the input - src->add("VertexIn in = fetchInput(vid, indexBuffer, indexType VERTEX_BUFFERS);" _CRLF); + src->add("VertexIn in = fetchVertex(vid, indexBuffer, indexType VERTEX_BUFFERS);" _CRLF); // Output is defined as object payload src->add("object_data VertexOut& out = objectPayload.vertexOut[tid];" _CRLF); } diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp index aed7e9f1..5f88f246 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp @@ -281,9 +281,9 @@ namespace LatteDecompiler { if ((decompilerContext->shader->pixelColorOutputMask & (1 << i)) != 0) { - src->addFmt("#ifdef {}" _CRLF, GetColorAttachmentTypeStr(i)); - src->addFmt("{} passPixelColor{} [[color({})]];" _CRLF, GetColorAttachmentTypeStr(i), i, i); - src->add("#endif" _CRLF); + //src->addFmt("#ifdef {}" _CRLF, GetColorAttachmentTypeStr(i)); + src->addFmt("float4 passPixelColor{} [[color({})]];" _CRLF/*, GetColorAttachmentTypeStr(i)*/, i, i); + //src->add("#endif" _CRLF); } } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h index a2ecc7e9..ede0bed6 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h @@ -41,10 +41,10 @@ inline size_t Align(size_t size, size_t alignment) return (size + alignment - 1) & ~(alignment - 1); } -inline std::string GetColorAttachmentTypeStr(uint32 index) -{ - return "COLOR_ATTACHMENT" + std::to_string(index) + "_TYPE"; -} +//inline std::string GetColorAttachmentTypeStr(uint32 index) +//{ +// return "COLOR_ATTACHMENT" + std::to_string(index) + "_TYPE"; +//} // Cast from const char* to NS::String* inline NS::String* ToNSString(const char* str) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp index 9a99f138..1842142e 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp @@ -364,7 +364,6 @@ MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const Latte auto mtlVertexShader = static_cast(vertexShader->shader); auto mtlPixelShader = static_cast(pixelShader->shader); - mtlPixelShader->CompileFragmentFunction(lastUsedFBO); // Render pipeline state MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init(); @@ -466,7 +465,6 @@ MTL::RenderPipelineState* MetalPipelineCache::GetMeshPipelineState(const LatteFe mtlMeshShader = rectsEmulationGS_generate(m_mtlr, vertexShader, lcr); } auto mtlPixelShader = static_cast(pixelShader->shader); - mtlPixelShader->CompileFragmentFunction(lastUsedFBO); // Render pipeline state MTL::MeshRenderPipelineDescriptor* desc = MTL::MeshRenderPipelineDescriptor::alloc()->init(); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp index 8905ddee..d343ef45 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp @@ -16,16 +16,16 @@ extern std::atomic_int g_compiled_shaders_async; RendererShaderMtl::RendererShaderMtl(MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode) : RendererShader(type, baseHash, auxHash, isGameShader, isGfxPackShader), m_mtlr{mtlRenderer} { - // TODO: don't compile fragment function just-in-time - if (type != ShaderType::kFragment) + NS::Error* error = nullptr; + MTL::Library* library = m_mtlr->GetDevice()->newLibrary(ToNSString(mslCode), nullptr, &error); + if (error) { - Compile(mslCode); - } - else - { - // TODO: don't compile just-in-time - m_mslCode = mslCode; + printf("failed to create library (error: %s) -> source:\n%s\n", error->localizedDescription()->utf8String(), mslCode.c_str()); + error->release(); + return; } + m_function = library->newFunction(ToNSString("main0")); + library->release(); // Count shader compilation g_compiled_shaders_total++; @@ -36,66 +36,3 @@ RendererShaderMtl::~RendererShaderMtl() if (m_function) m_function->release(); } - -void RendererShaderMtl::CompileFragmentFunction(CachedFBOMtl* activeFBO) -{ - cemu_assert_debug(m_type == ShaderType::kFragment); - - std::string fullCode; - - // Define color attachment data types - for (uint8 i = 0; i < 8; i++) - { - const auto& colorBuffer = activeFBO->colorBuffer[i]; - if (!colorBuffer.texture) - { - continue; - } - auto dataType = GetMtlPixelFormatInfo(colorBuffer.texture->format, false).dataType; - fullCode += "#define " + GetColorAttachmentTypeStr(i) + " "; - switch (dataType) - { - case MetalDataType::INT: - fullCode += "int4"; - break; - case MetalDataType::UINT: - fullCode += "uint4"; - break; - case MetalDataType::FLOAT: - fullCode += "float4"; - break; - default: - cemu_assert_suspicious(); - break; - } - fullCode += "\n"; - } - - fullCode += m_mslCode; - Compile(fullCode); -} - -void RendererShaderMtl::Compile(const std::string& mslCode) -{ - if (m_function) - m_function->release(); - - // HACK - if (m_hasError) - return; - - NS::Error* error = nullptr; - MTL::Library* library = m_mtlr->GetDevice()->newLibrary(ToNSString(mslCode), nullptr, &error); - if (error) - { - printf("failed to create library (error: %s) -> source:\n%s\n", error->localizedDescription()->utf8String(), mslCode.c_str()); - error->release(); - - // HACK - m_hasError = true; - - return; - } - m_function = library->newFunction(ToNSString("main0")); - library->release(); -} diff --git a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h index 6ae2b928..0758b0e6 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h @@ -21,8 +21,6 @@ public: RendererShaderMtl(class MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode); virtual ~RendererShaderMtl(); - void CompileFragmentFunction(CachedFBOMtl* activeFBO); - MTL::Function* GetFunction() const { return m_function; @@ -54,11 +52,5 @@ private: MTL::Function* m_function = nullptr; - std::vector m_binary; - std::string m_mslCode; - - // HACK - bool m_hasError = false; - void Compile(const std::string& mslCode); }; From cd72ad80d219a4bf25dfc3e72098239d900280b2 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Thu, 5 Sep 2024 14:34:39 +0200 Subject: [PATCH 5/8] include color format data types in ps hash --- src/Cafe/HW/Latte/Core/LatteShader.cpp | 17 +++++++++++++++++ .../LatteDecompilerEmitMSL.cpp | 2 +- .../LatteDecompilerEmitMSLHeader.hpp | 2 +- .../HW/Latte/Renderer/Metal/LatteToMtl.cpp | 12 +++++------- src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h | 17 +++++++++++++++++ .../Renderer/Metal/MetalPipelineCache.cpp | 18 ++++++++---------- 6 files changed, 49 insertions(+), 19 deletions(-) diff --git a/src/Cafe/HW/Latte/Core/LatteShader.cpp b/src/Cafe/HW/Latte/Core/LatteShader.cpp index d20067a6..2bc719cd 100644 --- a/src/Cafe/HW/Latte/Core/LatteShader.cpp +++ b/src/Cafe/HW/Latte/Core/LatteShader.cpp @@ -14,6 +14,9 @@ #include "config/ActiveSettings.h" #include "Cafe/GameProfile/GameProfile.h" #include "util/containers/flat_hash_map.hpp" +#if BOOST_OS_MACOS +#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h" +#endif #include // experimental new decompiler (WIP) @@ -544,6 +547,20 @@ void LatteSHRC_UpdatePSBaseHash(uint8* pixelShaderPtr, uint32 pixelShaderSize, b _calculateShaderProgramHash(psProgramCode, pixelShaderSize, &hashCachePS, &psHash1, &psHash2); // get vertex shader uint64 psHash = psHash1 + psHash2 + _activePSImportTable.key + (usesGeometryShader ? hashCacheGS.prevHash1 : 0ULL); + +#if BOOST_OS_MACOS + if (g_renderer->GetType() == RendererAPI::Metal) + { + for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++) + { + auto format = LatteMRT::GetColorBufferFormat(i, LatteGPUState.contextNew); + uint8 dataType = (uint8)GetMtlPixelFormatInfo(format, false).dataType; + psHash += (uint64)dataType; + psHash = std::rotl(psHash, 7); + } + } +#endif + _shaderBaseHash_ps = psHash; } diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp index 1d6ab1a4..aea421fb 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp @@ -3216,7 +3216,7 @@ static void _emitExportCode(LatteDecompilerShaderContext* shaderContext, LatteDe } // pixel color output //src->addFmt("#ifdef {}" _CRLF, GetColorAttachmentTypeStr(pixelColorOutputIndex)); - src->addFmt("out.passPixelColor{} = as_type(", pixelColorOutputIndex/*, GetColorAttachmentTypeStr(pixelColorOutputIndex)*/); + src->addFmt("out.passPixelColor{} = as_type<{}>(", pixelColorOutputIndex, GetColorBufferDataTypeStr(pixelColorOutputIndex, *shaderContext->contextRegistersNew)/*, GetColorAttachmentTypeStr(pixelColorOutputIndex)*/); _emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, i); src->add(");" _CRLF); //src->add("#endif" _CRLF); diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp index 5f88f246..066e3b5c 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp @@ -282,7 +282,7 @@ namespace LatteDecompiler if ((decompilerContext->shader->pixelColorOutputMask & (1 << i)) != 0) { //src->addFmt("#ifdef {}" _CRLF, GetColorAttachmentTypeStr(i)); - src->addFmt("float4 passPixelColor{} [[color({})]];" _CRLF/*, GetColorAttachmentTypeStr(i)*/, i, i); + src->addFmt("{} passPixelColor{} [[color({})]];" _CRLF, GetColorBufferDataTypeStr(i, *decompilerContext->contextRegistersNew)/*, GetColorAttachmentTypeStr(i)*/, i, i); //src->add("#endif" _CRLF); } } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.cpp index 6f54272c..581c0e19 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.cpp @@ -84,19 +84,17 @@ std::map MTL_DEPTH_FORMAT_TABLE = { const MetalPixelFormatInfo GetMtlPixelFormatInfo(Latte::E_GX2SURFFMT format, bool isDepth) { + if (format == Latte::E_GX2SURFFMT::INVALID_FORMAT) + { + return {MTL::PixelFormatInvalid, MetalDataType::NONE, 0}; + } + MetalPixelFormatInfo formatInfo; if (isDepth) formatInfo = MTL_DEPTH_FORMAT_TABLE[format]; else formatInfo = MTL_COLOR_FORMAT_TABLE[format]; - // Depth24Unorm_Stencil8 is not supported on Apple sillicon - // TODO: query if format is available instead - if (formatInfo.pixelFormat == MTL::PixelFormatDepth24Unorm_Stencil8) - { - formatInfo.pixelFormat = MTL::PixelFormatDepth32Float_Stencil8; - } - if (formatInfo.pixelFormat == MTL::PixelFormatInvalid) { printf("invalid pixel format: %u\n", (uint32)format); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h b/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h index 2c805527..218a74f5 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h @@ -32,6 +32,23 @@ const MetalPixelFormatInfo GetMtlPixelFormatInfo(Latte::E_GX2SURFFMT format, boo MTL::PixelFormat GetMtlPixelFormat(Latte::E_GX2SURFFMT format, bool isDepth, const MetalPixelFormatSupport& pixelFormatSupport); +inline const char* GetColorBufferDataTypeStr(const uint32 index, const LatteContextRegister& lcr) +{ + auto format = LatteMRT::GetColorBufferFormat(index, lcr); + auto dataType = GetMtlPixelFormatInfo(format, false).dataType; + switch (dataType) + { + case MetalDataType::INT: + return "int4"; + case MetalDataType::UINT: + return "uint4"; + case MetalDataType::FLOAT: + return "float4"; + default: + return "unknown"; + } +} + size_t GetMtlTextureBytesPerRow(Latte::E_GX2SURFFMT format, bool isDepth, uint32 width); size_t GetMtlTextureBytesPerImage(Latte::E_GX2SURFFMT format, bool isDepth, uint32 height, size_t bytesPerRow); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp index 1842142e..8273ec16 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp @@ -1,14 +1,14 @@ #include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" -#include "HW/Latte/Core/LatteShader.h" -#include "HW/Latte/Renderer/Metal/CachedFBOMtl.h" -#include "HW/Latte/Renderer/Metal/LatteToMtl.h" -#include "HW/Latte/Renderer/Metal/RendererShaderMtl.h" -#include "HW/Latte/Renderer/Metal/LatteTextureViewMtl.h" +#include "Cafe/HW/Latte/Core/LatteShader.h" +#include "Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h" +#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h" +#include "Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h" +#include "Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h" -#include "HW/Latte/Core/FetchShader.h" -#include "HW/Latte/ISA/RegDefines.h" +#include "Cafe/HW/Latte/Core/FetchShader.h" +#include "Cafe/HW/Latte/ISA/RegDefines.h" #include "config/ActiveSettings.h" static void rectsEmulationGS_outputSingleVertex(std::string& gsSrc, const LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, sint32 vIdx, const LatteContextRegister& latteRegister) @@ -409,7 +409,6 @@ MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const Latte { debug_printf("error creating render pipeline state: %s\n", error->localizedDescription()->utf8String()); error->release(); - return nullptr; } else { @@ -484,13 +483,12 @@ MTL::RenderPipelineState* MetalPipelineCache::GetMeshPipelineState(const LatteFe desc->setLabel(GetLabel("Mesh pipeline state", desc)); #endif pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, MTL::PipelineOptionNone, nullptr, &error); + desc->release(); if (error) { debug_printf("error creating render pipeline state: %s\n", error->localizedDescription()->utf8String()); error->release(); - return nullptr; } - desc->release(); return pipeline; } From 4251f3fe551ee7b1c2ea521e2ae30b7576405e28 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Fri, 6 Sep 2024 17:16:47 +0200 Subject: [PATCH 6/8] check for invalid color formats --- .../LatteDecompilerEmitMSL.cpp | 12 +++++++----- .../LatteDecompilerEmitMSLHeader.hpp | 8 +++++--- src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h | 12 +++++++++--- 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp index aea421fb..137f8e87 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp @@ -3215,11 +3215,13 @@ static void _emitExportCode(LatteDecompilerShaderContext* shaderContext, LatteDe src->add(") == false) discard_fragment();" _CRLF); } // pixel color output - //src->addFmt("#ifdef {}" _CRLF, GetColorAttachmentTypeStr(pixelColorOutputIndex)); - src->addFmt("out.passPixelColor{} = as_type<{}>(", pixelColorOutputIndex, GetColorBufferDataTypeStr(pixelColorOutputIndex, *shaderContext->contextRegistersNew)/*, GetColorAttachmentTypeStr(pixelColorOutputIndex)*/); - _emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, i); - src->add(");" _CRLF); - //src->add("#endif" _CRLF); + auto dataType = GetColorBufferDataType(pixelColorOutputIndex, *shaderContext->contextRegistersNew); + if (dataType != MetalDataType::NONE) + { + src->addFmt("out.passPixelColor{} = as_type<{}>(", pixelColorOutputIndex, GetDataTypeStr(dataType)); + _emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, i); + src->add(");" _CRLF); + } if( cfInstruction->exportArrayBase+i >= 8 ) cemu_assert_unimplemented(); diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp index 066e3b5c..412c9992 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp @@ -281,9 +281,11 @@ namespace LatteDecompiler { if ((decompilerContext->shader->pixelColorOutputMask & (1 << i)) != 0) { - //src->addFmt("#ifdef {}" _CRLF, GetColorAttachmentTypeStr(i)); - src->addFmt("{} passPixelColor{} [[color({})]];" _CRLF, GetColorBufferDataTypeStr(i, *decompilerContext->contextRegistersNew)/*, GetColorAttachmentTypeStr(i)*/, i, i); - //src->add("#endif" _CRLF); + auto dataType = GetColorBufferDataType(i, *decompilerContext->contextRegistersNew); + if (dataType != MetalDataType::NONE) + { + src->addFmt("{} passPixelColor{} [[color({})]];" _CRLF, GetDataTypeStr(dataType), i, i); + } } } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h b/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h index 218a74f5..c1b1c75c 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h @@ -6,6 +6,7 @@ #include "Cafe/HW/Latte/Core/LatteConst.h" //#include "Cafe/HW/Latte/Core/FetchShader.h" #include "Cafe/HW/Latte/Renderer/Renderer.h" +#include "Common/precompiled.h" struct Uvec2 { uint32 x; @@ -32,10 +33,14 @@ const MetalPixelFormatInfo GetMtlPixelFormatInfo(Latte::E_GX2SURFFMT format, boo MTL::PixelFormat GetMtlPixelFormat(Latte::E_GX2SURFFMT format, bool isDepth, const MetalPixelFormatSupport& pixelFormatSupport); -inline const char* GetColorBufferDataTypeStr(const uint32 index, const LatteContextRegister& lcr) +inline MetalDataType GetColorBufferDataType(const uint32 index, const LatteContextRegister& lcr) { auto format = LatteMRT::GetColorBufferFormat(index, lcr); - auto dataType = GetMtlPixelFormatInfo(format, false).dataType; + return GetMtlPixelFormatInfo(format, false).dataType; +} + +inline const char* GetDataTypeStr(MetalDataType dataType) +{ switch (dataType) { case MetalDataType::INT: @@ -45,7 +50,8 @@ inline const char* GetColorBufferDataTypeStr(const uint32 index, const LatteCont case MetalDataType::FLOAT: return "float4"; default: - return "unknown"; + cemu_assert_suspicious(); + return ""; } } From 6a3bdd49e9e3e14290f2760526c27e4d8fb42af9 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Fri, 6 Sep 2024 17:38:19 +0200 Subject: [PATCH 7/8] refactor pixel format support --- .../HW/Latte/Renderer/Metal/LatteToMtl.cpp | 51 +++++++++---------- .../HW/Latte/Renderer/Metal/MetalCommon.h | 2 +- 2 files changed, 26 insertions(+), 27 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.cpp index 581c0e19..daa283e4 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.cpp @@ -1,4 +1,5 @@ #include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h" +#include "Cemu/Logging/CemuLogging.h" #include "Common/precompiled.h" #include "Metal/MTLDepthStencil.hpp" #include "Metal/MTLPixelFormat.hpp" @@ -95,42 +96,40 @@ const MetalPixelFormatInfo GetMtlPixelFormatInfo(Latte::E_GX2SURFFMT format, boo else formatInfo = MTL_COLOR_FORMAT_TABLE[format]; - if (formatInfo.pixelFormat == MTL::PixelFormatInvalid) - { - printf("invalid pixel format: %u\n", (uint32)format); - } - return formatInfo; } MTL::PixelFormat GetMtlPixelFormat(Latte::E_GX2SURFFMT format, bool isDepth, const MetalPixelFormatSupport& pixelFormatSupport) { auto pixelFormat = GetMtlPixelFormatInfo(format, isDepth).pixelFormat; + if (pixelFormat == MTL::PixelFormatInvalid) + cemuLog_logDebug(LogType::Force, "invalid pixel format {}\n", pixelFormat); - if (!pixelFormatSupport.m_supportsR8Unorm_sRGB && pixelFormat == MTL::PixelFormatR8Unorm_sRGB) - pixelFormat = MTL::PixelFormatRGBA8Unorm_sRGB; - - if (!pixelFormatSupport.m_supportsRG8Unorm_sRGB && pixelFormat == MTL::PixelFormatRG8Unorm_sRGB) - pixelFormat = MTL::PixelFormatRGBA8Unorm_sRGB; - - if (!pixelFormatSupport.m_supportsPacked16BitFormats) + switch (pixelFormat) { - switch (pixelFormat) - { - case MTL::PixelFormatB5G6R5Unorm: - case MTL::PixelFormatA1BGR5Unorm: - case MTL::PixelFormatABGR4Unorm: - case MTL::PixelFormatBGR5A1Unorm: - pixelFormat = MTL::PixelFormatRGBA8Unorm; - break; - default: - break; - } + case MTL::PixelFormatR8Unorm_sRGB: + if (!pixelFormatSupport.m_supportsR8Unorm_sRGB) + return MTL::PixelFormatRGBA8Unorm_sRGB; + break; + case MTL::PixelFormatRG8Unorm_sRGB: + if (!pixelFormatSupport.m_supportsRG8Unorm_sRGB) + return MTL::PixelFormatRGBA8Unorm_sRGB; + break; + case MTL::PixelFormatB5G6R5Unorm: + case MTL::PixelFormatA1BGR5Unorm: + case MTL::PixelFormatABGR4Unorm: + case MTL::PixelFormatBGR5A1Unorm: + if (!pixelFormatSupport.m_supportsPacked16BitFormats) + return MTL::PixelFormatRGBA8Unorm; + break; + case MTL::PixelFormatDepth24Unorm_Stencil8: + if (!pixelFormatSupport.m_supportsDepth24Unorm_Stencil8) + return MTL::PixelFormatDepth32Float_Stencil8; + break; + default: + break; } - if (!pixelFormatSupport.m_supportsDepth24Unorm_Stencil8 && pixelFormat == MTL::PixelFormatDepth24Unorm_Stencil8) - pixelFormat = MTL::PixelFormatDepth32Float_Stencil8; - return pixelFormat; } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h index ede0bed6..8a6daa92 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h @@ -18,7 +18,7 @@ struct MetalPixelFormatSupport m_supportsR8Unorm_sRGB = device->supportsFamily(MTL::GPUFamilyApple1); m_supportsRG8Unorm_sRGB = device->supportsFamily(MTL::GPUFamilyApple1); m_supportsPacked16BitFormats = device->supportsFamily(MTL::GPUFamilyApple1); - m_supportsDepth24Unorm_Stencil8 = device->supportsFamily(MTL::GPUFamilyMac2); + m_supportsDepth24Unorm_Stencil8 = device->depth24Stencil8PixelFormatSupported(); } }; From 548ffb6b575caa6318b8f9296db3700360f5dd35 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Fri, 6 Sep 2024 19:15:56 +0200 Subject: [PATCH 8/8] add: todo notices --- src/Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.cpp | 1 + src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp | 1 + 2 files changed, 2 insertions(+) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.cpp index eebacd45..86c44efe 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.cpp @@ -78,6 +78,7 @@ LatteTextureMtl::LatteTextureMtl(class MetalRenderer* mtlRenderer, Latte::E_DIM auto pixelFormat = GetMtlPixelFormat(format, isDepth, m_mtlr->GetPixelFormatSupport()); desc->setPixelFormat(pixelFormat); + // TODO: using MTL::TextureUsageShaderWrite as well fixes Mario Tennis: Ultra Smash, investigate why MTL::TextureUsage usage = MTL::TextureUsageShaderRead; if (!Latte::IsCompressedFormat(format)) { diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index c40fbabb..545295dc 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -859,6 +859,7 @@ void MetalRenderer::draw_beginSequence() return; // no render target } + // TODO: not checking for !streamoutEnable fixes Super Smash Bros. for Wii U, investigate why if (!hasValidFramebufferAttached && !streamoutEnable) { debug_printf("Drawcall with no color buffer or depth buffer attached\n");