diff --git a/src/Cafe/HW/Latte/Core/FetchShader.cpp b/src/Cafe/HW/Latte/Core/FetchShader.cpp index 6c9893f9..5933fe05 100644 --- a/src/Cafe/HW/Latte/Core/FetchShader.cpp +++ b/src/Cafe/HW/Latte/Core/FetchShader.cpp @@ -146,6 +146,23 @@ void LatteFetchShader::CalculateFetchShaderVkHash() this->vkPipelineHashFragment = h; } +void LatteFetchShader::CalculateFetchShaderMtlObjectShaderHash(uint32* contextRegister) +{uint64 key = 0; + for (sint32 g = 0; g < bufferGroups.size(); g++) + { + LatteParsedFetchShaderBufferGroup_t& group = bufferGroups[g]; + uint32 bufferIndex = group.attributeBufferIndex; + uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7; + uint32 bufferStride = (contextRegister[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF; + + key += (uint64)bufferIndex; + key = std::rotl(key, 5); + key += (uint64)bufferStride; + key = std::rotl(key, 5); + } + mtlShaderHashObject = key; +} + void _fetchShaderDecompiler_parseInstruction_VTX_SEMANTIC(LatteFetchShader* parsedFetchShader, uint32* contextRegister, const LatteClauseInstruction_VTX* instr) { uint32 semanticId = instr->getFieldSEM_SEMANTIC_ID(); // location (attribute index inside shader) @@ -161,7 +178,7 @@ void _fetchShaderDecompiler_parseInstruction_VTX_SEMANTIC(LatteFetchShader* pars auto nfa = instr->getField_NUM_FORMAT_ALL(); bool isSigned = instr->getField_FORMAT_COMP_ALL() == LatteClauseInstruction_VTX::FORMAT_COMP::COMP_SIGNED; auto endianSwap = instr->getField_ENDIAN_SWAP(); - + // get buffer cemu_assert_debug(bufferId >= 0xA0 && bufferId < 0xB0); uint32 bufferIndex = (bufferId - 0xA0); @@ -316,7 +333,7 @@ LatteFetchShader* LatteShaderRecompiler_createFetchShader(LatteFetchShader::Cach // {0x00000002, 0x01800c00, 0x00000000, 0x8a000000, 0x2c00a001, 0x2c151000, 0x000a0000, ...} // size 0x50 // {0x00000002, 0x01801000, 0x00000000, 0x8a000000, 0x1c00a001, 0x280d1000, 0x00090000, ...} // size 0x60 // {0x00000002, 0x01801c00, 0x00000000, 0x8a000000, 0x1c00a001, 0x280d1000, 0x00090000, ...} // size 0x90 - + // our new implementation: // {0x00000002, 0x01800400, 0x00000000, 0x8a000000, 0x0000a001, 0x2c151000, 0x00020000, ...} @@ -328,6 +345,7 @@ LatteFetchShader* LatteShaderRecompiler_createFetchShader(LatteFetchShader::Cach // these only make sense when vertex shader does not call FS? LatteShader_calculateFSKey(newFetchShader); newFetchShader->CalculateFetchShaderVkHash(); + newFetchShader->CalculateFetchShaderMtlObjectShaderHash(contextRegister); return newFetchShader; } @@ -387,6 +405,7 @@ LatteFetchShader* LatteShaderRecompiler_createFetchShader(LatteFetchShader::Cach } LatteShader_calculateFSKey(newFetchShader); newFetchShader->CalculateFetchShaderVkHash(); + newFetchShader->CalculateFetchShaderMtlObjectShaderHash(contextRegister); // register in cache // its possible that during multi-threaded shader cache loading, two identical (same hash) fetch shaders get created simultaneously @@ -411,7 +430,7 @@ LatteFetchShader::~LatteFetchShader() UnregisterInCache(); } -struct FetchShaderLookupInfo +struct FetchShaderLookupInfo { LatteFetchShader* fetchShader; uint32 programSize; diff --git a/src/Cafe/HW/Latte/Core/FetchShader.h b/src/Cafe/HW/Latte/Core/FetchShader.h index ac57714d..9aeed6bd 100644 --- a/src/Cafe/HW/Latte/Core/FetchShader.h +++ b/src/Cafe/HW/Latte/Core/FetchShader.h @@ -46,6 +46,9 @@ struct LatteFetchShader // Vulkan uint64 vkPipelineHashFragment{}; // hash of all fetch shader state that influences the Vulkan graphics pipeline + // Metal + uint64 mtlShaderHashObject{}; + // cache info CacheHash m_cacheHash{}; bool m_isRegistered{}; // if true, fetch shader is referenced by cache (RegisterInCache() succeeded) @@ -53,6 +56,8 @@ struct LatteFetchShader void CalculateFetchShaderVkHash(); + void CalculateFetchShaderMtlObjectShaderHash(uint32* contextRegister); + uint64 getVkPipelineHashFragment() const { return vkPipelineHashFragment; }; static bool isValidBufferIndex(const uint32 index) { return index < 0x10; }; @@ -69,4 +74,4 @@ struct LatteFetchShader static std::unordered_map s_fetchShaderByHash; }; -LatteFetchShader* LatteShaderRecompiler_createFetchShader(LatteFetchShader::CacheHash fsHash, uint32* contextRegister, uint32* fsProgramCode, uint32 fsProgramSize); \ No newline at end of file +LatteFetchShader* LatteShaderRecompiler_createFetchShader(LatteFetchShader::CacheHash fsHash, uint32* contextRegister, uint32* fsProgramCode, uint32 fsProgramSize); diff --git a/src/Cafe/HW/Latte/Core/LatteShader.cpp b/src/Cafe/HW/Latte/Core/LatteShader.cpp index 3ac0e9d2..2bc719cd 100644 --- a/src/Cafe/HW/Latte/Core/LatteShader.cpp +++ b/src/Cafe/HW/Latte/Core/LatteShader.cpp @@ -14,6 +14,9 @@ #include "config/ActiveSettings.h" #include "Cafe/GameProfile/GameProfile.h" #include "util/containers/flat_hash_map.hpp" +#if BOOST_OS_MACOS +#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h" +#endif #include // experimental new decompiler (WIP) @@ -498,6 +501,8 @@ void LatteSHRC_UpdateVSBaseHash(uint8* vertexShaderPtr, uint32 vertexShaderSize, uint64 vsHash2 = 0; _calculateShaderProgramHash(vsProgramCode, vertexShaderSize, &hashCacheVS, &vsHash1, &vsHash2); uint64 vsHash = vsHash1 + vsHash2 + _activeFetchShader->key + _activePSImportTable.key + (usesGeometryShader ? 0x1111ULL : 0ULL); + if (g_renderer->GetType() == RendererAPI::Metal && usesGeometryShader) + vsHash += _activeFetchShader->mtlShaderHashObject; uint32 tmp = LatteGPUState.contextNew.PA_CL_VTE_CNTL.getRawValue() ^ 0x43F; vsHash += tmp; @@ -542,6 +547,20 @@ void LatteSHRC_UpdatePSBaseHash(uint8* pixelShaderPtr, uint32 pixelShaderSize, b _calculateShaderProgramHash(psProgramCode, pixelShaderSize, &hashCachePS, &psHash1, &psHash2); // get vertex shader uint64 psHash = psHash1 + psHash2 + _activePSImportTable.key + (usesGeometryShader ? hashCacheGS.prevHash1 : 0ULL); + +#if BOOST_OS_MACOS + if (g_renderer->GetType() == RendererAPI::Metal) + { + for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++) + { + auto format = LatteMRT::GetColorBufferFormat(i, LatteGPUState.contextNew); + uint8 dataType = (uint8)GetMtlPixelFormatInfo(format, false).dataType; + psHash += (uint64)dataType; + psHash = std::rotl(psHash, 7); + } + } +#endif + _shaderBaseHash_ps = psHash; } diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h index 5d8b2c6f..29e65c58 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h @@ -65,6 +65,7 @@ struct LatteDecompilerShaderResourceMapping sint8 attributeMapping[LATTE_NUM_MAX_ATTRIBUTE_LOCATIONS]; // Metal exclusive sint8 indexBufferBinding{-1}; + sint8 indexTypeBinding{-1}; sint32 getTextureCount() { diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp index 9a3db895..ec3d8aa7 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp @@ -1020,4 +1020,5 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD LatteDecompiler::_initUniformBindingPoints(shaderContext); LatteDecompiler::_initAttributeBindingPoints(shaderContext); shaderContext->output->resourceMappingMTL.indexBufferBinding = shaderContext->currentBufferBindingPointMTL++; + shaderContext->output->resourceMappingMTL.indexTypeBinding = shaderContext->currentBufferBindingPointMTL++; } diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp index 1c75b737..137f8e87 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp @@ -11,6 +11,7 @@ #include "Cafe/HW/Latte/Core/FetchShader.h" #include "Cafe/HW/Latte/Renderer/Renderer.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h" +#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h" #include "config/ActiveSettings.h" #include "util/helpers/StringBuf.h" @@ -3214,11 +3215,13 @@ static void _emitExportCode(LatteDecompilerShaderContext* shaderContext, LatteDe src->add(") == false) discard_fragment();" _CRLF); } // pixel color output - src->addFmt("#ifdef {}" _CRLF, GetColorAttachmentTypeStr(pixelColorOutputIndex)); - src->addFmt("out.passPixelColor{} = as_type<{}>(", pixelColorOutputIndex, GetColorAttachmentTypeStr(pixelColorOutputIndex)); - _emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, i); - src->add(");" _CRLF); - src->add("#endif" _CRLF); + auto dataType = GetColorBufferDataType(pixelColorOutputIndex, *shaderContext->contextRegistersNew); + if (dataType != MetalDataType::NONE) + { + src->addFmt("out.passPixelColor{} = as_type<{}>(", pixelColorOutputIndex, GetDataTypeStr(dataType)); + _emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, i); + src->add(");" _CRLF); + } if( cfInstruction->exportArrayBase+i >= 8 ) cemu_assert_unimplemented(); @@ -3881,9 +3884,125 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, case LatteConst::ShaderType::Vertex: if (shaderContext->options->usesGeometryShader || isRectVertexShader) { - // Defined just-in-time - // Will also modify vid in case of an indexed draw - src->add("VertexIn fetchInput(thread uint& vid VERTEX_BUFFER_DEFINITIONS);" _CRLF); + // TODO: clean this up + // fetchVertex will modify vid in case of an indexed draw + + // Vertex buffers + std::string vertexBufferDefinitions = "#define VERTEX_BUFFER_DEFINITIONS "; + std::string vertexBuffers = "#define VERTEX_BUFFERS "; + std::string inputFetchDefinition = "VertexIn fetchVertex(thread uint& vid, device uint* indexBuffer, uint indexType VERTEX_BUFFER_DEFINITIONS) {\n"; + + // Index buffer + inputFetchDefinition += "if (indexType == 1) // UShort\n"; + inputFetchDefinition += "vid = ((device ushort*)indexBuffer)[vid];\n"; + inputFetchDefinition += "else if (indexType == 2)\n"; + inputFetchDefinition += "vid = ((device uint*)indexBuffer)[vid]; // UInt\n"; + + inputFetchDefinition += "VertexIn in;\n"; + for (auto& bufferGroup : shaderContext->fetchShader->bufferGroups) + { + std::optional fetchType; + + uint32 bufferIndex = bufferGroup.attributeBufferIndex; + uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7; + uint32 bufferStride = (shaderContext->contextRegisters[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF; + + for (sint32 j = 0; j < bufferGroup.attribCount; ++j) + { + auto& attr = bufferGroup.attrib[j]; + + uint32 semanticId = shaderContext->output->resourceMappingMTL.attributeMapping[attr.semanticId]; + if (semanticId == (uint32)-1) + continue; // attribute not used? + + std::string formatName; + uint8 componentCount = 0; + switch (GetMtlVertexFormat(attr.format)) + { + case MTL::VertexFormatUChar: + formatName = "uchar"; + componentCount = 1; + break; + case MTL::VertexFormatUChar2: + formatName = "uchar2"; + componentCount = 2; + break; + case MTL::VertexFormatUChar3: + formatName = "uchar3"; + componentCount = 3; + break; + case MTL::VertexFormatUChar4: + formatName = "uchar4"; + componentCount = 4; + break; + case MTL::VertexFormatUShort: + formatName = "ushort"; + componentCount = 1; + break; + case MTL::VertexFormatUShort2: + formatName = "ushort2"; + componentCount = 2; + break; + case MTL::VertexFormatUShort3: + formatName = "ushort3"; + componentCount = 3; + break; + case MTL::VertexFormatUShort4: + formatName = "ushort4"; + componentCount = 4; + break; + case MTL::VertexFormatUInt: + formatName = "uint"; + componentCount = 1; + break; + case MTL::VertexFormatUInt2: + formatName = "uint2"; + componentCount = 2; + break; + case MTL::VertexFormatUInt3: + formatName = "uint3"; + componentCount = 3; + break; + case MTL::VertexFormatUInt4: + formatName = "uint4"; + componentCount = 4; + break; + } + + // Fetch the attribute + inputFetchDefinition += fmt::format("in.ATTRIBUTE_NAME{} = ", semanticId); + inputFetchDefinition += fmt::format("uint4(*(device {}*)", formatName); + inputFetchDefinition += fmt::format("(vertexBuffer{}", attr.attributeBufferIndex); + inputFetchDefinition += fmt::format(" + vid * {} + {})", bufferStride, attr.offset); + for (uint8 i = 0; i < (4 - componentCount); i++) + inputFetchDefinition += ", 0"; + inputFetchDefinition += ");\n"; + + if (fetchType.has_value()) + cemu_assert_debug(fetchType == attr.fetchType); + else + fetchType = attr.fetchType; + + if (attr.fetchType == LatteConst::INSTANCE_DATA) + { + cemu_assert_debug(attr.aluDivisor == 1); // other divisor not yet supported + } + } + + // TODO: fetch type + + vertexBufferDefinitions += fmt::format(", device uchar* vertexBuffer{} [[buffer({})]]", bufferIndex, GET_MTL_VERTEX_BUFFER_INDEX(bufferIndex)); + vertexBuffers += fmt::format(", vertexBuffer{}", bufferIndex); + } + + inputFetchDefinition += "return in;\n"; + inputFetchDefinition += "}\n"; + + src->add(vertexBufferDefinitions.c_str()); + src->add("\n"); + src->add(vertexBuffers.c_str()); + src->add("\n"); + src->add(inputFetchDefinition.c_str()); functionType = "[[object, max_total_threads_per_threadgroup(VERTICES_PER_VERTEX_PRIMITIVE), max_total_threadgroups_per_mesh_grid(1)]]"; outputTypeName = "void"; @@ -3916,7 +4035,7 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, // TODO: don't hardcode the instance index src->add("uint iid = 0;" _CRLF); // Fetch the input - src->add("VertexIn in = fetchInput(vid VERTEX_BUFFERS);" _CRLF); + src->add("VertexIn in = fetchVertex(vid, indexBuffer, indexType VERTEX_BUFFERS);" _CRLF); // Output is defined as object payload src->add("object_data VertexOut& out = objectPayload.vertexOut[tid];" _CRLF); } diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp index 38392bdb..412c9992 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp @@ -281,9 +281,11 @@ namespace LatteDecompiler { if ((decompilerContext->shader->pixelColorOutputMask & (1 << i)) != 0) { - src->addFmt("#ifdef {}" _CRLF, GetColorAttachmentTypeStr(i)); - src->addFmt("{} passPixelColor{} [[color({})]];" _CRLF, GetColorAttachmentTypeStr(i), i, i); - src->add("#endif" _CRLF); + auto dataType = GetColorBufferDataType(i, *decompilerContext->contextRegistersNew); + if (dataType != MetalDataType::NONE) + { + src->addFmt("{} passPixelColor{} [[color({})]];" _CRLF, GetDataTypeStr(dataType), i, i); + } } } @@ -495,6 +497,10 @@ namespace LatteDecompiler src->add(", mesh_grid_properties meshGridProperties"); src->add(", uint tig [[threadgroup_position_in_grid]]"); src->add(", uint tid [[thread_index_in_threadgroup]]"); + // TODO: inly include index buffer if needed + src->addFmt(", device uint* indexBuffer [[buffer({})]]", decompilerContext->output->resourceMappingMTL.indexBufferBinding); + // TODO: use uchar? + src->addFmt(", constant uint& indexType [[buffer({})]]", decompilerContext->output->resourceMappingMTL.indexTypeBinding); src->add(" VERTEX_BUFFER_DEFINITIONS"); } else diff --git a/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.cpp index eebacd45..86c44efe 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.cpp @@ -78,6 +78,7 @@ LatteTextureMtl::LatteTextureMtl(class MetalRenderer* mtlRenderer, Latte::E_DIM auto pixelFormat = GetMtlPixelFormat(format, isDepth, m_mtlr->GetPixelFormatSupport()); desc->setPixelFormat(pixelFormat); + // TODO: using MTL::TextureUsageShaderWrite as well fixes Mario Tennis: Ultra Smash, investigate why MTL::TextureUsage usage = MTL::TextureUsageShaderRead; if (!Latte::IsCompressedFormat(format)) { diff --git a/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.cpp index 6f54272c..daa283e4 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.cpp @@ -1,4 +1,5 @@ #include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h" +#include "Cemu/Logging/CemuLogging.h" #include "Common/precompiled.h" #include "Metal/MTLDepthStencil.hpp" #include "Metal/MTLPixelFormat.hpp" @@ -84,55 +85,51 @@ std::map MTL_DEPTH_FORMAT_TABLE = { const MetalPixelFormatInfo GetMtlPixelFormatInfo(Latte::E_GX2SURFFMT format, bool isDepth) { + if (format == Latte::E_GX2SURFFMT::INVALID_FORMAT) + { + return {MTL::PixelFormatInvalid, MetalDataType::NONE, 0}; + } + MetalPixelFormatInfo formatInfo; if (isDepth) formatInfo = MTL_DEPTH_FORMAT_TABLE[format]; else formatInfo = MTL_COLOR_FORMAT_TABLE[format]; - // Depth24Unorm_Stencil8 is not supported on Apple sillicon - // TODO: query if format is available instead - if (formatInfo.pixelFormat == MTL::PixelFormatDepth24Unorm_Stencil8) - { - formatInfo.pixelFormat = MTL::PixelFormatDepth32Float_Stencil8; - } - - if (formatInfo.pixelFormat == MTL::PixelFormatInvalid) - { - printf("invalid pixel format: %u\n", (uint32)format); - } - return formatInfo; } MTL::PixelFormat GetMtlPixelFormat(Latte::E_GX2SURFFMT format, bool isDepth, const MetalPixelFormatSupport& pixelFormatSupport) { auto pixelFormat = GetMtlPixelFormatInfo(format, isDepth).pixelFormat; + if (pixelFormat == MTL::PixelFormatInvalid) + cemuLog_logDebug(LogType::Force, "invalid pixel format {}\n", pixelFormat); - if (!pixelFormatSupport.m_supportsR8Unorm_sRGB && pixelFormat == MTL::PixelFormatR8Unorm_sRGB) - pixelFormat = MTL::PixelFormatRGBA8Unorm_sRGB; - - if (!pixelFormatSupport.m_supportsRG8Unorm_sRGB && pixelFormat == MTL::PixelFormatRG8Unorm_sRGB) - pixelFormat = MTL::PixelFormatRGBA8Unorm_sRGB; - - if (!pixelFormatSupport.m_supportsPacked16BitFormats) + switch (pixelFormat) { - switch (pixelFormat) - { - case MTL::PixelFormatB5G6R5Unorm: - case MTL::PixelFormatA1BGR5Unorm: - case MTL::PixelFormatABGR4Unorm: - case MTL::PixelFormatBGR5A1Unorm: - pixelFormat = MTL::PixelFormatRGBA8Unorm; - break; - default: - break; - } + case MTL::PixelFormatR8Unorm_sRGB: + if (!pixelFormatSupport.m_supportsR8Unorm_sRGB) + return MTL::PixelFormatRGBA8Unorm_sRGB; + break; + case MTL::PixelFormatRG8Unorm_sRGB: + if (!pixelFormatSupport.m_supportsRG8Unorm_sRGB) + return MTL::PixelFormatRGBA8Unorm_sRGB; + break; + case MTL::PixelFormatB5G6R5Unorm: + case MTL::PixelFormatA1BGR5Unorm: + case MTL::PixelFormatABGR4Unorm: + case MTL::PixelFormatBGR5A1Unorm: + if (!pixelFormatSupport.m_supportsPacked16BitFormats) + return MTL::PixelFormatRGBA8Unorm; + break; + case MTL::PixelFormatDepth24Unorm_Stencil8: + if (!pixelFormatSupport.m_supportsDepth24Unorm_Stencil8) + return MTL::PixelFormatDepth32Float_Stencil8; + break; + default: + break; } - if (!pixelFormatSupport.m_supportsDepth24Unorm_Stencil8 && pixelFormat == MTL::PixelFormatDepth24Unorm_Stencil8) - pixelFormat = MTL::PixelFormatDepth32Float_Stencil8; - return pixelFormat; } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h b/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h index 2c805527..c1b1c75c 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h @@ -6,6 +6,7 @@ #include "Cafe/HW/Latte/Core/LatteConst.h" //#include "Cafe/HW/Latte/Core/FetchShader.h" #include "Cafe/HW/Latte/Renderer/Renderer.h" +#include "Common/precompiled.h" struct Uvec2 { uint32 x; @@ -32,6 +33,28 @@ const MetalPixelFormatInfo GetMtlPixelFormatInfo(Latte::E_GX2SURFFMT format, boo MTL::PixelFormat GetMtlPixelFormat(Latte::E_GX2SURFFMT format, bool isDepth, const MetalPixelFormatSupport& pixelFormatSupport); +inline MetalDataType GetColorBufferDataType(const uint32 index, const LatteContextRegister& lcr) +{ + auto format = LatteMRT::GetColorBufferFormat(index, lcr); + return GetMtlPixelFormatInfo(format, false).dataType; +} + +inline const char* GetDataTypeStr(MetalDataType dataType) +{ + switch (dataType) + { + case MetalDataType::INT: + return "int4"; + case MetalDataType::UINT: + return "uint4"; + case MetalDataType::FLOAT: + return "float4"; + default: + cemu_assert_suspicious(); + return ""; + } +} + size_t GetMtlTextureBytesPerRow(Latte::E_GX2SURFFMT format, bool isDepth, uint32 width); size_t GetMtlTextureBytesPerImage(Latte::E_GX2SURFFMT format, bool isDepth, uint32 height, size_t bytesPerRow); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h index a2ecc7e9..8a6daa92 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h @@ -18,7 +18,7 @@ struct MetalPixelFormatSupport m_supportsR8Unorm_sRGB = device->supportsFamily(MTL::GPUFamilyApple1); m_supportsRG8Unorm_sRGB = device->supportsFamily(MTL::GPUFamilyApple1); m_supportsPacked16BitFormats = device->supportsFamily(MTL::GPUFamilyApple1); - m_supportsDepth24Unorm_Stencil8 = device->supportsFamily(MTL::GPUFamilyMac2); + m_supportsDepth24Unorm_Stencil8 = device->depth24Stencil8PixelFormatSupported(); } }; @@ -41,10 +41,10 @@ inline size_t Align(size_t size, size_t alignment) return (size + alignment - 1) & ~(alignment - 1); } -inline std::string GetColorAttachmentTypeStr(uint32 index) -{ - return "COLOR_ATTACHMENT" + std::to_string(index) + "_TYPE"; -} +//inline std::string GetColorAttachmentTypeStr(uint32 index) +//{ +// return "COLOR_ATTACHMENT" + std::to_string(index) + "_TYPE"; +//} // Cast from const char* to NS::String* inline NS::String* ToNSString(const char* str) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp index 8f7740b9..8273ec16 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp @@ -1,16 +1,14 @@ #include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" -#include "Foundation/NSObject.hpp" -#include "HW/Latte/Core/LatteShader.h" -#include "HW/Latte/Renderer/Metal/CachedFBOMtl.h" -#include "HW/Latte/Renderer/Metal/LatteToMtl.h" -#include "HW/Latte/Renderer/Metal/RendererShaderMtl.h" -#include "HW/Latte/Renderer/Metal/LatteTextureViewMtl.h" +#include "Cafe/HW/Latte/Core/LatteShader.h" +#include "Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h" +#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h" +#include "Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h" +#include "Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h" -#include "HW/Latte/Core/FetchShader.h" -#include "HW/Latte/ISA/RegDefines.h" -#include "Metal/MTLRenderPipeline.hpp" +#include "Cafe/HW/Latte/Core/FetchShader.h" +#include "Cafe/HW/Latte/ISA/RegDefines.h" #include "config/ActiveSettings.h" static void rectsEmulationGS_outputSingleVertex(std::string& gsSrc, const LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, sint32 vIdx, const LatteContextRegister& latteRegister) @@ -366,14 +364,6 @@ MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const Latte auto mtlVertexShader = static_cast(vertexShader->shader); auto mtlPixelShader = static_cast(pixelShader->shader); - mtlVertexShader->CompileVertexFunction(); - // HACK - if (!mtlVertexShader->GetFunction()) - { - debug_printf("no vertex function, skipping draw\n"); - return nullptr; - } - mtlPixelShader->CompileFragmentFunction(lastUsedFBO); // Render pipeline state MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init(); @@ -419,7 +409,6 @@ MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const Latte { debug_printf("error creating render pipeline state: %s\n", error->localizedDescription()->utf8String()); error->release(); - return nullptr; } else { @@ -475,8 +464,6 @@ MTL::RenderPipelineState* MetalPipelineCache::GetMeshPipelineState(const LatteFe mtlMeshShader = rectsEmulationGS_generate(m_mtlr, vertexShader, lcr); } auto mtlPixelShader = static_cast(pixelShader->shader); - mtlObjectShader->CompileObjectFunction(lcr, fetchShader, vertexShader, hostIndexType); - mtlPixelShader->CompileFragmentFunction(lastUsedFBO); // Render pipeline state MTL::MeshRenderPipelineDescriptor* desc = MTL::MeshRenderPipelineDescriptor::alloc()->init(); @@ -496,13 +483,12 @@ MTL::RenderPipelineState* MetalPipelineCache::GetMeshPipelineState(const LatteFe desc->setLabel(GetLabel("Mesh pipeline state", desc)); #endif pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, MTL::PipelineOptionNone, nullptr, &error); + desc->release(); if (error) { debug_printf("error creating render pipeline state: %s\n", error->localizedDescription()->utf8String()); error->release(); - return nullptr; } - desc->release(); return pipeline; } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index 6ccbdb69..545295dc 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -859,6 +859,7 @@ void MetalRenderer::draw_beginSequence() return; // no render target } + // TODO: not checking for !streamoutEnable fixes Super Smash Bros. for Wii U, investigate why if (!hasValidFramebufferAttached && !streamoutEnable) { debug_printf("Drawcall with no color buffer or depth buffer attached\n"); @@ -916,7 +917,7 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 LatteDecompilerShader* geometryShader = LatteSHRC_GetActiveGeometryShader(); LatteDecompilerShader* pixelShader = LatteSHRC_GetActivePixelShader(); // TODO: is this even needed? Also, should go to draw_beginSequence - if (!vertexShader) + if (!vertexShader || !static_cast(vertexShader->shader)->GetFunction()) { printf("no vertex function, skipping draw\n"); return; @@ -1200,6 +1201,8 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 { if (indexBuffer) SetBuffer(renderCommandEncoder, METAL_SHADER_TYPE_OBJECT, indexBuffer, indexBufferOffset, vertexShader->resourceMapping.indexBufferBinding); + renderCommandEncoder->setObjectBytes(&hostIndexType, sizeof(hostIndexType), vertexShader->resourceMapping.indexTypeBinding); + encoderState.m_buffers[METAL_SHADER_TYPE_OBJECT][vertexShader->resourceMapping.indexTypeBinding] = {nullptr}; uint32 verticesPerPrimitive = 0; switch (primitiveMode) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp index 6ad72d87..d343ef45 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp @@ -16,15 +16,16 @@ extern std::atomic_int g_compiled_shaders_async; RendererShaderMtl::RendererShaderMtl(MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode) : RendererShader(type, baseHash, auxHash, isGameShader, isGfxPackShader), m_mtlr{mtlRenderer} { - if (type == ShaderType::kGeometry) + NS::Error* error = nullptr; + MTL::Library* library = m_mtlr->GetDevice()->newLibrary(ToNSString(mslCode), nullptr, &error); + if (error) { - Compile(mslCode); - } - else - { - // TODO: don't compile just-in-time - m_mslCode = mslCode; + printf("failed to create library (error: %s) -> source:\n%s\n", error->localizedDescription()->utf8String(), mslCode.c_str()); + error->release(); + return; } + m_function = library->newFunction(ToNSString("main0")); + library->release(); // Count shader compilation g_compiled_shaders_total++; @@ -35,205 +36,3 @@ RendererShaderMtl::~RendererShaderMtl() if (m_function) m_function->release(); } - -void RendererShaderMtl::CompileObjectFunction(const LatteContextRegister& lcr, const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, Renderer::INDEX_TYPE hostIndexType) -{ - cemu_assert_debug(m_type == ShaderType::kVertex); - - std::string fullCode; - - // Vertex buffers - std::string vertexBufferDefinitions = "#define VERTEX_BUFFER_DEFINITIONS "; - std::string vertexBuffers = "#define VERTEX_BUFFERS "; - std::string inputFetchDefinition = "VertexIn fetchInput(thread uint& vid VERTEX_BUFFER_DEFINITIONS) {\n"; - - // Index buffer - if (hostIndexType != Renderer::INDEX_TYPE::NONE) - { - vertexBufferDefinitions += ", device "; - switch (hostIndexType) - { - case Renderer::INDEX_TYPE::U16: - vertexBufferDefinitions += "ushort"; - break; - case Renderer::INDEX_TYPE::U32: - vertexBufferDefinitions += "uint"; - break; - default: - cemu_assert_suspicious(); - break; - } - - vertexBufferDefinitions += fmt::format("* indexBuffer [[buffer({})]]", vertexShader->resourceMapping.indexBufferBinding); - vertexBuffers += ", indexBuffer"; - inputFetchDefinition += "vid = indexBuffer[vid];\n"; - } - - inputFetchDefinition += "VertexIn in;\n"; - for (auto& bufferGroup : fetchShader->bufferGroups) - { - std::optional fetchType; - - uint32 bufferIndex = bufferGroup.attributeBufferIndex; - uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7; - uint32 bufferStride = (lcr.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF; - - for (sint32 j = 0; j < bufferGroup.attribCount; ++j) - { - auto& attr = bufferGroup.attrib[j]; - - uint32 semanticId = vertexShader->resourceMapping.attributeMapping[attr.semanticId]; - if (semanticId == (uint32)-1) - continue; // attribute not used? - - std::string formatName; - uint8 componentCount = 0; - switch (GetMtlVertexFormat(attr.format)) - { - case MTL::VertexFormatUChar: - formatName = "uchar"; - componentCount = 1; - break; - case MTL::VertexFormatUChar2: - formatName = "uchar2"; - componentCount = 2; - break; - case MTL::VertexFormatUChar3: - formatName = "uchar3"; - componentCount = 3; - break; - case MTL::VertexFormatUChar4: - formatName = "uchar4"; - componentCount = 4; - break; - case MTL::VertexFormatUShort: - formatName = "ushort"; - componentCount = 1; - break; - case MTL::VertexFormatUShort2: - formatName = "ushort2"; - componentCount = 2; - break; - case MTL::VertexFormatUShort3: - formatName = "ushort3"; - componentCount = 3; - break; - case MTL::VertexFormatUShort4: - formatName = "ushort4"; - componentCount = 4; - break; - case MTL::VertexFormatUInt: - formatName = "uint"; - componentCount = 1; - break; - case MTL::VertexFormatUInt2: - formatName = "uint2"; - componentCount = 2; - break; - case MTL::VertexFormatUInt3: - formatName = "uint3"; - componentCount = 3; - break; - case MTL::VertexFormatUInt4: - formatName = "uint4"; - componentCount = 4; - break; - } - - // Fetch the attribute - inputFetchDefinition += fmt::format("in.ATTRIBUTE_NAME{} = ", semanticId); - inputFetchDefinition += fmt::format("uint4(*(device {}*)", formatName); - inputFetchDefinition += fmt::format("(vertexBuffer{}", attr.attributeBufferIndex); - inputFetchDefinition += fmt::format(" + vid * {} + {})", bufferStride, attr.offset); - for (uint8 i = 0; i < (4 - componentCount); i++) - inputFetchDefinition += ", 0"; - inputFetchDefinition += ");\n"; - - if (fetchType.has_value()) - cemu_assert_debug(fetchType == attr.fetchType); - else - fetchType = attr.fetchType; - - if (attr.fetchType == LatteConst::INSTANCE_DATA) - { - cemu_assert_debug(attr.aluDivisor == 1); // other divisor not yet supported - } - } - - vertexBufferDefinitions += fmt::format(", device uchar* vertexBuffer{} [[buffer({})]]", bufferIndex, GET_MTL_VERTEX_BUFFER_INDEX(bufferIndex)); - vertexBuffers += fmt::format(", vertexBuffer{}", bufferIndex); - } - - inputFetchDefinition += "return in;\n"; - inputFetchDefinition += "}\n"; - - fullCode += vertexBufferDefinitions + "\n"; - fullCode += vertexBuffers + "\n"; - fullCode += m_mslCode; - fullCode += inputFetchDefinition; - - Compile(fullCode); -} - -void RendererShaderMtl::CompileFragmentFunction(CachedFBOMtl* activeFBO) -{ - cemu_assert_debug(m_type == ShaderType::kFragment); - - std::string fullCode; - - // Define color attachment data types - for (uint8 i = 0; i < 8; i++) - { - const auto& colorBuffer = activeFBO->colorBuffer[i]; - if (!colorBuffer.texture) - { - continue; - } - auto dataType = GetMtlPixelFormatInfo(colorBuffer.texture->format, false).dataType; - fullCode += "#define " + GetColorAttachmentTypeStr(i) + " "; - switch (dataType) - { - case MetalDataType::INT: - fullCode += "int4"; - break; - case MetalDataType::UINT: - fullCode += "uint4"; - break; - case MetalDataType::FLOAT: - fullCode += "float4"; - break; - default: - cemu_assert_suspicious(); - break; - } - fullCode += "\n"; - } - - fullCode += m_mslCode; - Compile(fullCode); -} - -void RendererShaderMtl::Compile(const std::string& mslCode) -{ - if (m_function) - m_function->release(); - - // HACK - if (m_hasError) - return; - - NS::Error* error = nullptr; - MTL::Library* library = m_mtlr->GetDevice()->newLibrary(ToNSString(mslCode), nullptr, &error); - if (error) - { - printf("failed to create library (error: %s) -> source:\n%s\n", error->localizedDescription()->utf8String(), mslCode.c_str()); - error->release(); - - // HACK - m_hasError = true; - - return; - } - m_function = library->newFunction(ToNSString("main0")); - library->release(); -} diff --git a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h index ca5a0ff9..0758b0e6 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h @@ -21,14 +21,6 @@ public: RendererShaderMtl(class MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode); virtual ~RendererShaderMtl(); - void CompileVertexFunction() - { - Compile(m_mslCode); - } - - void CompileObjectFunction(const LatteContextRegister& lcr, const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, Renderer::INDEX_TYPE hostIndexType); - void CompileFragmentFunction(CachedFBOMtl* activeFBO); - MTL::Function* GetFunction() const { return m_function; @@ -60,11 +52,5 @@ private: MTL::Function* m_function = nullptr; - std::vector m_binary; - std::string m_mslCode; - - // HACK - bool m_hasError = false; - void Compile(const std::string& mslCode); };