From 6b1360415bbc1115a180338c453a48d16aaf8d27 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Tue, 20 Aug 2024 08:22:24 +0200 Subject: [PATCH] emit object shaders --- .../LatteDecompilerEmitMSL.cpp | 50 ++++++++++++---- .../LatteDecompilerEmitMSLHeader.hpp | 60 +++++++++++++------ 2 files changed, 82 insertions(+), 28 deletions(-) diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp index 95e91d40..5bf3c1e5 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp @@ -2822,14 +2822,14 @@ static void _emitGSReadInputVFetchCode(LatteDecompilerShaderContext* shaderConte src->add(" = "); _emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, shaderContext->typeTracker.defaultDataType); - src->add("(v2g["); + src->add("(objectPayload["); if (texInstruction->textureFetch.srcSel[0] >= 4) cemu_assert_unimplemented(); if (texInstruction->textureFetch.srcSel[1] >= 4) cemu_assert_unimplemented(); // todo: Index type src->add("0"); - src->addFmt("].passV2GParameter{}.", texInstruction->textureFetch.offset/16); + src->addFmt("].passParameterSem{}.", texInstruction->textureFetch.offset/16); for(sint32 f=0; f<4; f++) @@ -3316,7 +3316,7 @@ static void _emitCFRingWriteCode(LatteDecompilerShaderContext* shaderContext, La cemu_assert_unimplemented(); for (sint32 burstIndex = 0; burstIndex < (sint32)(cfInstruction->exportBurstCount + 1); burstIndex++) { - src->addFmt("v2g.passV2GParameter{}.", (cfInstruction->exportArrayBase) / 4 + burstIndex); + src->addFmt("out.passParameterSem{}.", (cfInstruction->exportArrayBase) / 4 + burstIndex); _emitXYZWByMask(src, cfInstruction->memWriteCompMask); src->addFmt(" = "); _emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_SIGNED_INT, burstIndex); @@ -3842,8 +3842,20 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, switch (shader->shaderType) { case LatteConst::ShaderType::Vertex: - functionType = "vertex"; - outputTypeName = "VertexOut"; + if (shaderContext->options->usesGeometryShader) + { + // Defined just-in-time + // Will also modify vid in case of an indexed draw + src->add("ObjectIn fetchInput(VERTEX_BUFFER_DEFINITIONS, thread uint& vid);" _CRLF); + + functionType = "[[object, max_total_threads_per_threadgroup(MAX_THREADS_PER_THREADGROUP), max_total_threadgroups_per_mesh_grid(1)]]"; + outputTypeName = "void"; + } + else + { + functionType = "vertex"; + outputTypeName = "VertexOut"; + } break; case LatteConst::ShaderType::Pixel: functionType = "fragment"; @@ -3854,7 +3866,21 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, src->addFmt("{} {} main0(", functionType, outputTypeName); LatteDecompiler::emitInputs(shaderContext); src->add(") {" _CRLF); - src->addFmt("{} out;" _CRLF, outputTypeName); + if (shader->shaderType == LatteConst::ShaderType::Vertex && shaderContext->options->usesGeometryShader) + { + // Calculate the imaginary vertex id + src->add("uint vid = tig * PRIMITIVE_VERTEX_COUNT + tid;" _CRLF); + // TODO: don't hardcode the instance index + src->add("uint iid = 0;" _CRLF); + // Fetch the input + src->add("ObjectIn in = fetchInput(VERTEX_BUFFERS, vid);" _CRLF); + // Output is defined as object payload + src->add("object_payload ObjectPayload& out = objectPayload[tid];" _CRLF); + } + else + { + src->addFmt("{} out;" _CRLF, outputTypeName); + } // variable definition if (shaderContext->typeTracker.useArrayGPRs == false) { @@ -4094,13 +4120,17 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, if (shader->shaderType == LatteConst::ShaderType::Vertex && shaderContext->options->usesGeometryShader == false) src->add("out.pointSize = supportBuffer.pointSize;" _CRLF); } - // HACK: this should be handled outside of the shader, because clipping currently wouldn't work (or would it?) + // TODO: this should be handled outside of the shader, because clipping currently wouldn't work (or would it?) if (shader->shaderType == LatteConst::ShaderType::Vertex) - { - // TODO: check this - // MoltenVK does this src->add("out.position.z = (out.position.z + out.position.w) / 2.0;" _CRLF); + + if (shader->shaderType == LatteConst::ShaderType::Vertex && shaderContext->options->usesGeometryShader) + { + src->add("if (tid == 0) {" _CRLF); + src->add("meshGridProperties.set_threadgroups_per_grid(uint3(1, 1, 1));" _CRLF); + src->add("}" _CRLF); } + // return src->add("return out;" _CRLF); // end of shader main diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp index 8527adde..4a36cf65 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp @@ -158,7 +158,10 @@ namespace LatteDecompiler if (decompilerContext->shader->shaderType == LatteConst::ShaderType::Vertex) { - src->add("struct VertexIn {" _CRLF); + if (decompilerContext->options->usesGeometryShader) + src->add("struct ObjectIn {" _CRLF); + else + src->add("struct VertexIn {" _CRLF); // attribute inputs for (uint32 i = 0; i < LATTE_NUM_MAX_ATTRIBUTE_LOCATIONS; i++) { @@ -166,7 +169,10 @@ namespace LatteDecompiler { cemu_assert_debug(decompilerContext->output->resourceMappingVK.attributeMapping[i] >= 0); - src->addFmt("uint4 attrDataSem{} [[attribute({})]];" _CRLF, i, (sint32)decompilerContext->output->resourceMappingVK.attributeMapping[i]); + src->addFmt("uint4 attrDataSem{}", i); + if (!decompilerContext->options->usesGeometryShader) + src->addFmt(" [[attribute({})]]", (sint32)decompilerContext->output->resourceMappingVK.attributeMapping[i]); + src->add(";" _CRLF); } } src->add("};" _CRLF _CRLF); @@ -177,11 +183,14 @@ namespace LatteDecompiler { auto* src = shaderContext->shaderSource; - src->add("struct VertexOut {" _CRLF); + if (shaderContext->options->usesGeometryShader) + src->add("struct ObjectPayload {" _CRLF); + else + src->add("struct VertexOut {" _CRLF); src->add("float4 position [[position]];" _CRLF); if (shaderContext->analyzer.outputPointSize) - src->add("float pointSize[[point_size]];" _CRLF); + src->add("float pointSize [[point_size]];" _CRLF); LatteShaderPSInputTable* psInputTable = LatteSHRC_GetPSInputTable(); auto parameterMask = shaderContext->shader->outputParameterMask; @@ -206,11 +215,14 @@ namespace LatteDecompiler continue; // no ps input src->addFmt("float4 passParameterSem{}", psInputTable->import[psInputIndex].semanticId); - src->addFmt(" [[user(locn{})]]", psInputIndex); - if (psInputTable->import[psInputIndex].isFlat) - src->add(" [[flat]]"); - if (psInputTable->import[psInputIndex].isNoPerspective) - src->add(" [[center_no_perspective]]"); + if (!shaderContext->options->usesGeometryShader) + { + src->addFmt(" [[user(locn{})]]", psInputIndex); + if (psInputTable->import[psInputIndex].isFlat) + src->add(" [[flat]]"); + if (psInputTable->import[psInputIndex].isNoPerspective) + src->add(" [[center_no_perspective]]"); + } src->addFmt(";" _CRLF); } @@ -369,26 +381,38 @@ namespace LatteDecompiler switch (decompilerContext->shaderType) { case LatteConst::ShaderType::Vertex: - src->add("VertexIn"); + if (!decompilerContext->options->usesGeometryShader) + src->add("VertexIn in [[stage_in]], "); break; case LatteConst::ShaderType::Pixel: - src->add("FragmentIn"); + src->add("FragmentIn in [[stage_in]], "); + break; + default: break; } - src->add(" in [[stage_in]], constant SupportBuffer& supportBuffer [[buffer(30)]]"); + src->add("constant SupportBuffer& supportBuffer [[buffer(30)]]"); switch (decompilerContext->shaderType) { case LatteConst::ShaderType::Vertex: - src->add(", uint vid [[vertex_id]]"); - src->add(", uint iid [[instance_id]]"); - - // streamout buffer (transform feedback) - if (decompilerContext->analyzer.hasStreamoutEnable && decompilerContext->analyzer.hasStreamoutWrite) + if (decompilerContext->options->usesGeometryShader) { - src->addFmt(", device int* sb [[buffer({})]]" _CRLF, decompilerContext->output->resourceMappingVK.tfStorageBindingPoint); + src->add(", object_data ObjectPayload* objectPayload [[payload]]"); + src->add(", mesh_grid_properties meshGridProperties"); + src->add(", uint tig [[threadgroup_position_in_grid]]"); + src->add(", uint tid [[thread_index_in_threadgroup]]"); } + else + { + src->add(", uint vid [[vertex_id]]"); + src->add(", uint iid [[instance_id]]"); + // streamout buffer (transform feedback) + if (decompilerContext->analyzer.hasStreamoutEnable && decompilerContext->analyzer.hasStreamoutWrite) + { + src->addFmt(", device int* sb [[buffer({})]]" _CRLF, decompilerContext->output->resourceMappingVK.tfStorageBindingPoint); + } + } break; case LatteConst::ShaderType::Pixel: src->add(", bool frontFacing [[front_facing]]");