From 953975f5ecdef8218453152ddb397f523bee9aba Mon Sep 17 00:00:00 2001 From: Samuliak Date: Tue, 3 Sep 2024 13:59:52 +0200 Subject: [PATCH] don't jit compile vertex shaders --- src/Cafe/HW/Latte/Core/FetchShader.cpp | 15 +- .../LegacyShaderDecompiler/LatteDecompiler.h | 1 + .../LatteDecompilerAnalyzer.cpp | 1 + .../LatteDecompilerEmitMSL.cpp | 127 +++++++++++++++- .../LatteDecompilerEmitMSLHeader.hpp | 4 + .../Renderer/Metal/MetalPipelineCache.cpp | 8 - .../HW/Latte/Renderer/Metal/MetalRenderer.cpp | 4 +- .../Renderer/Metal/RendererShaderMtl.cpp | 142 +----------------- .../Latte/Renderer/Metal/RendererShaderMtl.h | 6 - 9 files changed, 146 insertions(+), 162 deletions(-) diff --git a/src/Cafe/HW/Latte/Core/FetchShader.cpp b/src/Cafe/HW/Latte/Core/FetchShader.cpp index 6c9893f9..272b7c0b 100644 --- a/src/Cafe/HW/Latte/Core/FetchShader.cpp +++ b/src/Cafe/HW/Latte/Core/FetchShader.cpp @@ -8,6 +8,7 @@ #include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInstructions.h" #include "Cafe/HW/Latte/Core/FetchShader.h" #include "Cafe/HW/Latte/ISA/LatteInstructions.h" +#include "HW/Latte/Renderer/Renderer.h" #include "util/containers/LookupTableL3.h" #include "util/helpers/fspinlock.h" #include /* SHA1_DIGEST_LENGTH */ @@ -107,6 +108,14 @@ void LatteShader_calculateFSKey(LatteFetchShader* fetchShader) key += (uint64)(attrib->offset & 3); key = std::rotl(key, 2); } + + // TODO: also check if geometry shader is used + if (g_renderer->GetType() == RendererAPI::Metal) + { + key += (uint64)group.attributeBufferIndex; + key = std::rotl(key, 5); + // TODO: hash the stride as well + } } // todo - also hash invalid buffer groups? fetchShader->key = key; @@ -161,7 +170,7 @@ void _fetchShaderDecompiler_parseInstruction_VTX_SEMANTIC(LatteFetchShader* pars auto nfa = instr->getField_NUM_FORMAT_ALL(); bool isSigned = instr->getField_FORMAT_COMP_ALL() == LatteClauseInstruction_VTX::FORMAT_COMP::COMP_SIGNED; auto endianSwap = instr->getField_ENDIAN_SWAP(); - + // get buffer cemu_assert_debug(bufferId >= 0xA0 && bufferId < 0xB0); uint32 bufferIndex = (bufferId - 0xA0); @@ -316,7 +325,7 @@ LatteFetchShader* LatteShaderRecompiler_createFetchShader(LatteFetchShader::Cach // {0x00000002, 0x01800c00, 0x00000000, 0x8a000000, 0x2c00a001, 0x2c151000, 0x000a0000, ...} // size 0x50 // {0x00000002, 0x01801000, 0x00000000, 0x8a000000, 0x1c00a001, 0x280d1000, 0x00090000, ...} // size 0x60 // {0x00000002, 0x01801c00, 0x00000000, 0x8a000000, 0x1c00a001, 0x280d1000, 0x00090000, ...} // size 0x90 - + // our new implementation: // {0x00000002, 0x01800400, 0x00000000, 0x8a000000, 0x0000a001, 0x2c151000, 0x00020000, ...} @@ -411,7 +420,7 @@ LatteFetchShader::~LatteFetchShader() UnregisterInCache(); } -struct FetchShaderLookupInfo +struct FetchShaderLookupInfo { LatteFetchShader* fetchShader; uint32 programSize; diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h index 5d8b2c6f..29e65c58 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h @@ -65,6 +65,7 @@ struct LatteDecompilerShaderResourceMapping sint8 attributeMapping[LATTE_NUM_MAX_ATTRIBUTE_LOCATIONS]; // Metal exclusive sint8 indexBufferBinding{-1}; + sint8 indexTypeBinding{-1}; sint32 getTextureCount() { diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp index 9a3db895..ec3d8aa7 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp @@ -1020,4 +1020,5 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD LatteDecompiler::_initUniformBindingPoints(shaderContext); LatteDecompiler::_initAttributeBindingPoints(shaderContext); shaderContext->output->resourceMappingMTL.indexBufferBinding = shaderContext->currentBufferBindingPointMTL++; + shaderContext->output->resourceMappingMTL.indexTypeBinding = shaderContext->currentBufferBindingPointMTL++; } diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp index 1c75b737..c40d97c6 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp @@ -11,6 +11,7 @@ #include "Cafe/HW/Latte/Core/FetchShader.h" #include "Cafe/HW/Latte/Renderer/Renderer.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h" +#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h" #include "config/ActiveSettings.h" #include "util/helpers/StringBuf.h" @@ -3856,6 +3857,8 @@ static void LatteDecompiler_emitAttributeImport(LatteDecompilerShaderContext* sh void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, LatteDecompilerShader* shader) { + LatteShaderSHRC_UpdateFetchShader(); + auto fetchShader = LatteSHRC_GetActiveFetchShader(); bool isRectVertexShader = (static_cast(shaderContext->contextRegisters[mmVGT_PRIMITIVE_TYPE]) == LattePrimitiveMode::RECTS); StringBuf* src = new StringBuf(1024*1024*12); // reserve 12MB for generated source (we resize-to-fit at the end) @@ -3881,9 +3884,125 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, case LatteConst::ShaderType::Vertex: if (shaderContext->options->usesGeometryShader || isRectVertexShader) { - // Defined just-in-time - // Will also modify vid in case of an indexed draw - src->add("VertexIn fetchInput(thread uint& vid VERTEX_BUFFER_DEFINITIONS);" _CRLF); + // TODO: clean this up + // Will modify vid in case of an indexed draw + + // Vertex buffers + std::string vertexBufferDefinitions = "#define VERTEX_BUFFER_DEFINITIONS "; + std::string vertexBuffers = "#define VERTEX_BUFFERS "; + std::string inputFetchDefinition = "VertexIn fetchInput(thread uint& vid, device uint* indexBuffer, uint indexType VERTEX_BUFFER_DEFINITIONS) {\n"; + + // Index buffer + inputFetchDefinition += "if (indexType == 1) // UShort\n"; + inputFetchDefinition += "vid = ((device ushort*)indexBuffer)[vid];\n"; + inputFetchDefinition += "else if (indexType == 2)\n"; + inputFetchDefinition += "vid = ((device uint*)indexBuffer)[vid]; // UInt\n"; + + inputFetchDefinition += "VertexIn in;\n"; + for (auto& bufferGroup : fetchShader->bufferGroups) + { + std::optional fetchType; + + uint32 bufferIndex = bufferGroup.attributeBufferIndex; + uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7; + uint32 bufferStride = (shaderContext->contextRegisters[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF; + + for (sint32 j = 0; j < bufferGroup.attribCount; ++j) + { + auto& attr = bufferGroup.attrib[j]; + + uint32 semanticId = shaderContext->output->resourceMappingMTL.attributeMapping[attr.semanticId]; + if (semanticId == (uint32)-1) + continue; // attribute not used? + + std::string formatName; + uint8 componentCount = 0; + switch (GetMtlVertexFormat(attr.format)) + { + case MTL::VertexFormatUChar: + formatName = "uchar"; + componentCount = 1; + break; + case MTL::VertexFormatUChar2: + formatName = "uchar2"; + componentCount = 2; + break; + case MTL::VertexFormatUChar3: + formatName = "uchar3"; + componentCount = 3; + break; + case MTL::VertexFormatUChar4: + formatName = "uchar4"; + componentCount = 4; + break; + case MTL::VertexFormatUShort: + formatName = "ushort"; + componentCount = 1; + break; + case MTL::VertexFormatUShort2: + formatName = "ushort2"; + componentCount = 2; + break; + case MTL::VertexFormatUShort3: + formatName = "ushort3"; + componentCount = 3; + break; + case MTL::VertexFormatUShort4: + formatName = "ushort4"; + componentCount = 4; + break; + case MTL::VertexFormatUInt: + formatName = "uint"; + componentCount = 1; + break; + case MTL::VertexFormatUInt2: + formatName = "uint2"; + componentCount = 2; + break; + case MTL::VertexFormatUInt3: + formatName = "uint3"; + componentCount = 3; + break; + case MTL::VertexFormatUInt4: + formatName = "uint4"; + componentCount = 4; + break; + } + + // Fetch the attribute + inputFetchDefinition += fmt::format("in.ATTRIBUTE_NAME{} = ", semanticId); + inputFetchDefinition += fmt::format("uint4(*(device {}*)", formatName); + inputFetchDefinition += fmt::format("(vertexBuffer{}", attr.attributeBufferIndex); + inputFetchDefinition += fmt::format(" + vid * {} + {})", bufferStride, attr.offset); + for (uint8 i = 0; i < (4 - componentCount); i++) + inputFetchDefinition += ", 0"; + inputFetchDefinition += ");\n"; + + if (fetchType.has_value()) + cemu_assert_debug(fetchType == attr.fetchType); + else + fetchType = attr.fetchType; + + if (attr.fetchType == LatteConst::INSTANCE_DATA) + { + cemu_assert_debug(attr.aluDivisor == 1); // other divisor not yet supported + } + } + + // TODO: fetch type + + vertexBufferDefinitions += fmt::format(", device uchar* vertexBuffer{} [[buffer({})]]", bufferIndex, GET_MTL_VERTEX_BUFFER_INDEX(bufferIndex)); + vertexBuffers += fmt::format(", vertexBuffer{}", bufferIndex); + } + + inputFetchDefinition += "return in;\n"; + inputFetchDefinition += "}\n"; + + src->add(vertexBufferDefinitions.c_str()); + src->add("\n"); + src->add(vertexBuffers.c_str()); + src->add("\n"); + src->add(inputFetchDefinition.c_str()); functionType = "[[object, max_total_threads_per_threadgroup(VERTICES_PER_VERTEX_PRIMITIVE), max_total_threadgroups_per_mesh_grid(1)]]"; outputTypeName = "void"; @@ -3916,7 +4035,7 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, // TODO: don't hardcode the instance index src->add("uint iid = 0;" _CRLF); // Fetch the input - src->add("VertexIn in = fetchInput(vid VERTEX_BUFFERS);" _CRLF); + src->add("VertexIn in = fetchInput(vid, indexBuffer, indexType VERTEX_BUFFERS);" _CRLF); // Output is defined as object payload src->add("object_data VertexOut& out = objectPayload.vertexOut[tid];" _CRLF); } diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp index 38392bdb..aed7e9f1 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp @@ -495,6 +495,10 @@ namespace LatteDecompiler src->add(", mesh_grid_properties meshGridProperties"); src->add(", uint tig [[threadgroup_position_in_grid]]"); src->add(", uint tid [[thread_index_in_threadgroup]]"); + // TODO: inly include index buffer if needed + src->addFmt(", device uint* indexBuffer [[buffer({})]]", decompilerContext->output->resourceMappingMTL.indexBufferBinding); + // TODO: use uchar? + src->addFmt(", constant uint& indexType [[buffer({})]]", decompilerContext->output->resourceMappingMTL.indexTypeBinding); src->add(" VERTEX_BUFFER_DEFINITIONS"); } else diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp index 8f7740b9..a138ec8c 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp @@ -366,13 +366,6 @@ MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const Latte auto mtlVertexShader = static_cast(vertexShader->shader); auto mtlPixelShader = static_cast(pixelShader->shader); - mtlVertexShader->CompileVertexFunction(); - // HACK - if (!mtlVertexShader->GetFunction()) - { - debug_printf("no vertex function, skipping draw\n"); - return nullptr; - } mtlPixelShader->CompileFragmentFunction(lastUsedFBO); // Render pipeline state @@ -475,7 +468,6 @@ MTL::RenderPipelineState* MetalPipelineCache::GetMeshPipelineState(const LatteFe mtlMeshShader = rectsEmulationGS_generate(m_mtlr, vertexShader, lcr); } auto mtlPixelShader = static_cast(pixelShader->shader); - mtlObjectShader->CompileObjectFunction(lcr, fetchShader, vertexShader, hostIndexType); mtlPixelShader->CompileFragmentFunction(lastUsedFBO); // Render pipeline state diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index 6ccbdb69..c40fbabb 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -916,7 +916,7 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 LatteDecompilerShader* geometryShader = LatteSHRC_GetActiveGeometryShader(); LatteDecompilerShader* pixelShader = LatteSHRC_GetActivePixelShader(); // TODO: is this even needed? Also, should go to draw_beginSequence - if (!vertexShader) + if (!vertexShader || !static_cast(vertexShader->shader)->GetFunction()) { printf("no vertex function, skipping draw\n"); return; @@ -1200,6 +1200,8 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 { if (indexBuffer) SetBuffer(renderCommandEncoder, METAL_SHADER_TYPE_OBJECT, indexBuffer, indexBufferOffset, vertexShader->resourceMapping.indexBufferBinding); + renderCommandEncoder->setObjectBytes(&hostIndexType, sizeof(hostIndexType), vertexShader->resourceMapping.indexTypeBinding); + encoderState.m_buffers[METAL_SHADER_TYPE_OBJECT][vertexShader->resourceMapping.indexTypeBinding] = {nullptr}; uint32 verticesPerPrimitive = 0; switch (primitiveMode) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp index 6ad72d87..8905ddee 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp @@ -16,7 +16,8 @@ extern std::atomic_int g_compiled_shaders_async; RendererShaderMtl::RendererShaderMtl(MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode) : RendererShader(type, baseHash, auxHash, isGameShader, isGfxPackShader), m_mtlr{mtlRenderer} { - if (type == ShaderType::kGeometry) + // TODO: don't compile fragment function just-in-time + if (type != ShaderType::kFragment) { Compile(mslCode); } @@ -36,145 +37,6 @@ RendererShaderMtl::~RendererShaderMtl() m_function->release(); } -void RendererShaderMtl::CompileObjectFunction(const LatteContextRegister& lcr, const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, Renderer::INDEX_TYPE hostIndexType) -{ - cemu_assert_debug(m_type == ShaderType::kVertex); - - std::string fullCode; - - // Vertex buffers - std::string vertexBufferDefinitions = "#define VERTEX_BUFFER_DEFINITIONS "; - std::string vertexBuffers = "#define VERTEX_BUFFERS "; - std::string inputFetchDefinition = "VertexIn fetchInput(thread uint& vid VERTEX_BUFFER_DEFINITIONS) {\n"; - - // Index buffer - if (hostIndexType != Renderer::INDEX_TYPE::NONE) - { - vertexBufferDefinitions += ", device "; - switch (hostIndexType) - { - case Renderer::INDEX_TYPE::U16: - vertexBufferDefinitions += "ushort"; - break; - case Renderer::INDEX_TYPE::U32: - vertexBufferDefinitions += "uint"; - break; - default: - cemu_assert_suspicious(); - break; - } - - vertexBufferDefinitions += fmt::format("* indexBuffer [[buffer({})]]", vertexShader->resourceMapping.indexBufferBinding); - vertexBuffers += ", indexBuffer"; - inputFetchDefinition += "vid = indexBuffer[vid];\n"; - } - - inputFetchDefinition += "VertexIn in;\n"; - for (auto& bufferGroup : fetchShader->bufferGroups) - { - std::optional fetchType; - - uint32 bufferIndex = bufferGroup.attributeBufferIndex; - uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7; - uint32 bufferStride = (lcr.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF; - - for (sint32 j = 0; j < bufferGroup.attribCount; ++j) - { - auto& attr = bufferGroup.attrib[j]; - - uint32 semanticId = vertexShader->resourceMapping.attributeMapping[attr.semanticId]; - if (semanticId == (uint32)-1) - continue; // attribute not used? - - std::string formatName; - uint8 componentCount = 0; - switch (GetMtlVertexFormat(attr.format)) - { - case MTL::VertexFormatUChar: - formatName = "uchar"; - componentCount = 1; - break; - case MTL::VertexFormatUChar2: - formatName = "uchar2"; - componentCount = 2; - break; - case MTL::VertexFormatUChar3: - formatName = "uchar3"; - componentCount = 3; - break; - case MTL::VertexFormatUChar4: - formatName = "uchar4"; - componentCount = 4; - break; - case MTL::VertexFormatUShort: - formatName = "ushort"; - componentCount = 1; - break; - case MTL::VertexFormatUShort2: - formatName = "ushort2"; - componentCount = 2; - break; - case MTL::VertexFormatUShort3: - formatName = "ushort3"; - componentCount = 3; - break; - case MTL::VertexFormatUShort4: - formatName = "ushort4"; - componentCount = 4; - break; - case MTL::VertexFormatUInt: - formatName = "uint"; - componentCount = 1; - break; - case MTL::VertexFormatUInt2: - formatName = "uint2"; - componentCount = 2; - break; - case MTL::VertexFormatUInt3: - formatName = "uint3"; - componentCount = 3; - break; - case MTL::VertexFormatUInt4: - formatName = "uint4"; - componentCount = 4; - break; - } - - // Fetch the attribute - inputFetchDefinition += fmt::format("in.ATTRIBUTE_NAME{} = ", semanticId); - inputFetchDefinition += fmt::format("uint4(*(device {}*)", formatName); - inputFetchDefinition += fmt::format("(vertexBuffer{}", attr.attributeBufferIndex); - inputFetchDefinition += fmt::format(" + vid * {} + {})", bufferStride, attr.offset); - for (uint8 i = 0; i < (4 - componentCount); i++) - inputFetchDefinition += ", 0"; - inputFetchDefinition += ");\n"; - - if (fetchType.has_value()) - cemu_assert_debug(fetchType == attr.fetchType); - else - fetchType = attr.fetchType; - - if (attr.fetchType == LatteConst::INSTANCE_DATA) - { - cemu_assert_debug(attr.aluDivisor == 1); // other divisor not yet supported - } - } - - vertexBufferDefinitions += fmt::format(", device uchar* vertexBuffer{} [[buffer({})]]", bufferIndex, GET_MTL_VERTEX_BUFFER_INDEX(bufferIndex)); - vertexBuffers += fmt::format(", vertexBuffer{}", bufferIndex); - } - - inputFetchDefinition += "return in;\n"; - inputFetchDefinition += "}\n"; - - fullCode += vertexBufferDefinitions + "\n"; - fullCode += vertexBuffers + "\n"; - fullCode += m_mslCode; - fullCode += inputFetchDefinition; - - Compile(fullCode); -} - void RendererShaderMtl::CompileFragmentFunction(CachedFBOMtl* activeFBO) { cemu_assert_debug(m_type == ShaderType::kFragment); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h index ca5a0ff9..6ae2b928 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h @@ -21,12 +21,6 @@ public: RendererShaderMtl(class MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode); virtual ~RendererShaderMtl(); - void CompileVertexFunction() - { - Compile(m_mslCode); - } - - void CompileObjectFunction(const LatteContextRegister& lcr, const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, Renderer::INDEX_TYPE hostIndexType); void CompileFragmentFunction(CachedFBOMtl* activeFBO); MTL::Function* GetFunction() const