From b10bcd422e946fdb2cf708cc877e94984202bccc Mon Sep 17 00:00:00 2001 From: Samuliak Date: Tue, 20 Aug 2024 09:20:22 +0200 Subject: [PATCH] emit mesh shaders --- .../LatteDecompilerEmitMSL.cpp | 45 +++++--- .../LatteDecompilerEmitMSLHeader.hpp | 101 +++++++++++++----- 2 files changed, 104 insertions(+), 42 deletions(-) diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp index 5bf3c1e5..2061790a 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp @@ -3355,7 +3355,7 @@ static void _emitCFRingWriteCode(LatteDecompilerShaderContext* shaderContext, La } else if (parameterExportType == 2 && parameterExportBase < 16) { - src->addFmt("passG2PParameter{}.", parameterExportBase); + src->addFmt("out.passParameterSem{}.", parameterExportBase); _emitXYZWByMask(src, cfInstruction->memWriteCompMask); src->addFmt(" = "); _emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, burstIndex); @@ -3587,9 +3587,10 @@ void LatteDecompiler_emitClauseCodeMSL(LatteDecompilerShaderContext* shaderConte src->addFmt("if( {} == true ) {{" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1)); // write point size if (shaderContext->analyzer.outputPointSize && shaderContext->analyzer.writesPointSize == false) - src->add("gl_PointSize = supportBuffer.pointSize;" _CRLF); + src->add("out.pointSize = supportBuffer.pointSize;" _CRLF); // emit vertex - src->add("EmitVertex();" _CRLF); + src->add("mesh.set_vertex(out);" _CRLF); + src->add("mesh.set_index(tid, tid);" _CRLF); // increment transform feedback pointer for (sint32 i = 0; i < LATTE_NUM_STREAMOUT_BUFFER; i++) { @@ -3846,7 +3847,7 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, { // Defined just-in-time // Will also modify vid in case of an indexed draw - src->add("ObjectIn fetchInput(VERTEX_BUFFER_DEFINITIONS, thread uint& vid);" _CRLF); + src->add("VertexIn fetchInput(VERTEX_BUFFER_DEFINITIONS, thread uint& vid);" _CRLF); functionType = "[[object, max_total_threads_per_threadgroup(MAX_THREADS_PER_THREADGROUP), max_total_threadgroups_per_mesh_grid(1)]]"; outputTypeName = "void"; @@ -3857,6 +3858,10 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, outputTypeName = "VertexOut"; } break; + case LatteConst::ShaderType::Geometry: + functionType = "[[mesh, max_total_threads_per_threadgroup(MAX_THREADS_PER_THREADGROUP)]]"; + outputTypeName = "void"; + break; case LatteConst::ShaderType::Pixel: functionType = "fragment"; outputTypeName = "FragmentOut"; @@ -3866,16 +3871,23 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, src->addFmt("{} {} main0(", functionType, outputTypeName); LatteDecompiler::emitInputs(shaderContext); src->add(") {" _CRLF); - if (shader->shaderType == LatteConst::ShaderType::Vertex && shaderContext->options->usesGeometryShader) + if (shaderContext->options->usesGeometryShader) { - // Calculate the imaginary vertex id - src->add("uint vid = tig * PRIMITIVE_VERTEX_COUNT + tid;" _CRLF); - // TODO: don't hardcode the instance index - src->add("uint iid = 0;" _CRLF); - // Fetch the input - src->add("ObjectIn in = fetchInput(VERTEX_BUFFERS, vid);" _CRLF); - // Output is defined as object payload - src->add("object_payload ObjectPayload& out = objectPayload[tid];" _CRLF); + if (shader->shaderType == LatteConst::ShaderType::Vertex) + { + // Calculate the imaginary vertex id + src->add("uint vid = tig * PRIMITIVE_VERTEX_COUNT + tid;" _CRLF); + // TODO: don't hardcode the instance index + src->add("uint iid = 0;" _CRLF); + // Fetch the input + src->add("VertexIn in = fetchInput(VERTEX_BUFFERS, vid);" _CRLF); + // Output is defined as object payload + src->add("object_data VertexOut& out = objectPayload.vertexOut[tid];" _CRLF); + } + else + { + src->add("GeometryOut out;" _CRLF); + } } else { @@ -4077,9 +4089,9 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, { // import from geometry shader if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) - src->addFmt("{} = asy_type(passG2PParameter{});" _CRLF, _getRegisterVarName(shaderContext, gprIndex), psInputSemanticId & 0x7F); + src->addFmt("{} = asy_type(passParameterSem{});" _CRLF, _getRegisterVarName(shaderContext, gprIndex), psInputSemanticId & 0x7F); else if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT) - src->addFmt("{} = passG2PParameter{};" _CRLF, _getRegisterVarName(shaderContext, gprIndex), psInputSemanticId & 0x7F); + src->addFmt("{} = passParameterSem{};" _CRLF, _getRegisterVarName(shaderContext, gprIndex), psInputSemanticId & 0x7F); else cemu_assert_unimplemented(); } @@ -4132,7 +4144,8 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, } // return - src->add("return out;" _CRLF); + if (!shaderContext->options->usesGeometryShader) + src->add("return out;" _CRLF); // end of shader main src->add("}" _CRLF); src->shrink_to_fit(); diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp index 4a36cf65..71d83710 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp @@ -1,5 +1,6 @@ #pragma once +#include "HW/Latte/Core/LatteConst.h" namespace LatteDecompiler { static void _emitUniformVariables(LatteDecompilerShaderContext* decompilerContext) @@ -158,10 +159,7 @@ namespace LatteDecompiler if (decompilerContext->shader->shaderType == LatteConst::ShaderType::Vertex) { - if (decompilerContext->options->usesGeometryShader) - src->add("struct ObjectIn {" _CRLF); - else - src->add("struct VertexIn {" _CRLF); + src->add("struct VertexIn {" _CRLF); // attribute inputs for (uint32 i = 0; i < LATTE_NUM_MAX_ATTRIBUTE_LOCATIONS; i++) { @@ -183,11 +181,7 @@ namespace LatteDecompiler { auto* src = shaderContext->shaderSource; - if (shaderContext->options->usesGeometryShader) - src->add("struct ObjectPayload {" _CRLF); - else - src->add("struct VertexOut {" _CRLF); - + src->add("struct VertexOut {" _CRLF); src->add("float4 position [[position]];" _CRLF); if (shaderContext->analyzer.outputPointSize) src->add("float pointSize [[point_size]];" _CRLF); @@ -215,14 +209,11 @@ namespace LatteDecompiler continue; // no ps input src->addFmt("float4 passParameterSem{}", psInputTable->import[psInputIndex].semanticId); - if (!shaderContext->options->usesGeometryShader) - { - src->addFmt(" [[user(locn{})]]", psInputIndex); - if (psInputTable->import[psInputIndex].isFlat) - src->add(" [[flat]]"); - if (psInputTable->import[psInputIndex].isNoPerspective) - src->add(" [[center_no_perspective]]"); - } + src->addFmt(" [[user(locn{})]]", psInputIndex); + if (psInputTable->import[psInputIndex].isFlat) + src->add(" [[flat]]"); + if (psInputTable->import[psInputIndex].isNoPerspective) + src->add(" [[center_no_perspective]]"); src->addFmt(";" _CRLF); } @@ -262,11 +253,10 @@ namespace LatteDecompiler if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex) { _emitAttributes(decompilerContext); - _emitVSOutputs(decompilerContext); } else if (decompilerContext->shaderType == LatteConst::ShaderType::Pixel) { - _emitPSInputs(decompilerContext); + _emitPSInputs(decompilerContext); src->add("struct FragmentOut {" _CRLF); @@ -289,6 +279,57 @@ namespace LatteDecompiler src->add("};" _CRLF _CRLF); } + + if (!decompilerContext->options->usesGeometryShader) + { + if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex) + _emitVSOutputs(decompilerContext); + } + else + { + if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex || decompilerContext->shaderType == LatteConst::ShaderType::Geometry) + { + src->add("struct VertexOut {" _CRLF); + src->add("float4 position [[position]];" _CRLF); + uint32 ringParameterCountVS2GS = 0; + if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex) + { + ringParameterCountVS2GS = decompilerContext->shader->ringParameterCount; + } + else + { + ringParameterCountVS2GS = decompilerContext->shader->ringParameterCountFromPrevStage; + } + for (uint32 f = 0; f < ringParameterCountVS2GS; f++) + src->addFmt("int4 passParameterSem{};" _CRLF, f); + src->add("};" _CRLF _CRLF); + src->add("struct ObjectPayload {" _CRLF); + src->add("VertexOut vertexOut[PRIMITIVE_VERTEX_COUNT];" _CRLF); + src->add("};" _CRLF _CRLF); + } + if (decompilerContext->shaderType == LatteConst::ShaderType::Geometry) + { + // parameters shared between geometry and pixel shader + uint32 ringItemSize = decompilerContext->contextRegisters[mmSQ_GSVS_RING_ITEMSIZE] & 0x7FFF; + if ((ringItemSize & 0xF) != 0) + debugBreakpoint(); + if (((decompilerContext->contextRegisters[mmSQ_GSVS_RING_ITEMSIZE] & 0x7FFF) & 0xF) != 0) + debugBreakpoint(); + + src->add("struct GeometryOut {" _CRLF); + src->add("float4 position [[position]];" _CRLF); + for (sint32 p = 0; p < decompilerContext->parsedGSCopyShader->numParam; p++) + { + if (decompilerContext->parsedGSCopyShader->paramMapping[p].exportType != 2) + continue; + src->addFmt("float4 passParameterSem{} [[user(locn)]];" _CRLF, (sint32)decompilerContext->parsedGSCopyShader->paramMapping[p].exportParam, decompilerContext->parsedGSCopyShader->paramMapping[p].exportParam & 0x7F); + } + src->add("};" _CRLF _CRLF); + + // Define the mesh shader output type + src->add("using MeshType = mesh;" _CRLF); + } + } } static void emitHeader(LatteDecompilerShaderContext* decompilerContext) @@ -397,27 +438,35 @@ namespace LatteDecompiler case LatteConst::ShaderType::Vertex: if (decompilerContext->options->usesGeometryShader) { - src->add(", object_data ObjectPayload* objectPayload [[payload]]"); + src->add(", object_data ObjectPayload& objectPayload [[payload]]"); src->add(", mesh_grid_properties meshGridProperties"); src->add(", uint tig [[threadgroup_position_in_grid]]"); src->add(", uint tid [[thread_index_in_threadgroup]]"); + src->add(", VERTEX_BUFFER_DEFINITIONS"); } else { src->add(", uint vid [[vertex_id]]"); src->add(", uint iid [[instance_id]]"); - - // streamout buffer (transform feedback) - if (decompilerContext->analyzer.hasStreamoutEnable && decompilerContext->analyzer.hasStreamoutWrite) - { - src->addFmt(", device int* sb [[buffer({})]]" _CRLF, decompilerContext->output->resourceMappingVK.tfStorageBindingPoint); - } } break; + case LatteConst::ShaderType::Geometry: + src->add(", MeshType mesh"); + src->add(", const object_data ObjectPayload& objectPayload [[payload]]"); + src->add(", uint tid [[thread_index_in_threadgroup]]"); + break; case LatteConst::ShaderType::Pixel: src->add(", bool frontFacing [[front_facing]]"); break; } + + // streamout buffer (transform feedback) + if ((decompilerContext->shaderType == LatteConst::ShaderType::Vertex && !decompilerContext->options->usesGeometryShader) || decompilerContext->shaderType == LatteConst::ShaderType::Geometry) + { + if (decompilerContext->analyzer.hasStreamoutEnable && decompilerContext->analyzer.hasStreamoutWrite) + src->addFmt(", device int* sb [[buffer({})]]" _CRLF, decompilerContext->output->resourceMappingVK.tfStorageBindingPoint); + } + // uniform buffers _emitUniformBufferDefinitions(decompilerContext); // textures