From 950f04d4446f6a5b58bb1c2170205b6f5e4c8283 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Wed, 11 Sep 2024 12:22:45 +0200 Subject: [PATCH] support instancing for mesh shaders --- .../HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h | 1 + .../LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp | 1 + .../LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp | 8 ++++---- .../LatteDecompilerEmitMSLHeader.hpp | 3 +++ src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp | 8 ++++++-- 5 files changed, 15 insertions(+), 6 deletions(-) diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h index 29e65c58..2812facc 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h @@ -64,6 +64,7 @@ struct LatteDecompilerShaderResourceMapping // attributes (vertex shader only) sint8 attributeMapping[LATTE_NUM_MAX_ATTRIBUTE_LOCATIONS]; // Metal exclusive + sint8 verticesPerInstanceBinding{-1}; sint8 indexBufferBinding{-1}; sint8 indexTypeBinding{-1}; diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp index ec3d8aa7..b5697d42 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp @@ -1019,6 +1019,7 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD LatteDecompiler::_initTextureBindingPointsMTL(shaderContext); LatteDecompiler::_initUniformBindingPoints(shaderContext); LatteDecompiler::_initAttributeBindingPoints(shaderContext); + shaderContext->output->resourceMappingMTL.verticesPerInstanceBinding = shaderContext->currentBufferBindingPointMTL++; shaderContext->output->resourceMappingMTL.indexBufferBinding = shaderContext->currentBufferBindingPointMTL++; shaderContext->output->resourceMappingMTL.indexTypeBinding = shaderContext->currentBufferBindingPointMTL++; } diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp index 71e3f0df..3f022c61 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp @@ -3920,8 +3920,8 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, // Index buffer inputFetchDefinition += "if (indexType == 1) // UShort\n"; inputFetchDefinition += "vid = ((device ushort*)indexBuffer)[vid];\n"; - inputFetchDefinition += "else if (indexType == 2)\n"; - inputFetchDefinition += "vid = ((device uint*)indexBuffer)[vid]; // UInt\n"; + inputFetchDefinition += "else if (indexType == 2) // UInt\n"; + inputFetchDefinition += "vid = ((device uint*)indexBuffer)[vid];\n"; inputFetchDefinition += "VertexIn in;\n"; for (auto& bufferGroup : shaderContext->fetchShader->bufferGroups) @@ -4060,8 +4060,8 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, { // Calculate the imaginary vertex id src->add("uint vid = tig * VERTICES_PER_VERTEX_PRIMITIVE + tid;" _CRLF); - // TODO: don't hardcode the instance index - src->add("uint iid = 0;" _CRLF); + src->add("uint iid = vid / verticesPerInstance;" _CRLF); + src->add("vid %= verticesPerInstance;" _CRLF); // Fetch the input src->add("VertexIn in = fetchVertex(vid, indexBuffer, indexType VERTEX_BUFFERS);" _CRLF); // Output is defined as object payload diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp index 1342a277..a7121f52 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp @@ -497,8 +497,11 @@ namespace LatteDecompiler src->add(", mesh_grid_properties meshGridProperties"); src->add(", uint tig [[threadgroup_position_in_grid]]"); src->add(", uint tid [[thread_index_in_threadgroup]]"); + // TODO: put into the support buffer? + src->addFmt(", constant uint& verticesPerInstance [[buffer({})]]", decompilerContext->output->resourceMappingMTL.verticesPerInstanceBinding); // TODO: inly include index buffer if needed src->addFmt(", device uint* indexBuffer [[buffer({})]]", decompilerContext->output->resourceMappingMTL.indexBufferBinding); + // TODO: put into the support buffer? // TODO: use uchar? src->addFmt(", constant uint& indexType [[buffer({})]]", decompilerContext->output->resourceMappingMTL.indexTypeBinding); src->add(" VERTEX_BUFFER_DEFINITIONS"); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index 3c3ed106..46bcf6dc 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -1183,10 +1183,14 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 } if (usesGeometryShader) { + uint32 verticesPerInstance = count / instanceCount; + // TODO: make a helper function for this + renderCommandEncoder->setObjectBytes(&verticesPerInstance, sizeof(verticesPerInstance), vertexShader->resourceMapping.verticesPerInstanceBinding); + encoderState.m_buffers[METAL_SHADER_TYPE_OBJECT][vertexShader->resourceMapping.verticesPerInstanceBinding] = {nullptr}; if (indexBuffer) SetBuffer(renderCommandEncoder, METAL_SHADER_TYPE_OBJECT, indexBuffer, indexBufferOffset, vertexShader->resourceMapping.indexBufferBinding); renderCommandEncoder->setObjectBytes(&hostIndexType, sizeof(hostIndexType), vertexShader->resourceMapping.indexTypeBinding); - encoderState.m_buffers[METAL_SHADER_TYPE_OBJECT][vertexShader->resourceMapping.indexTypeBinding] = {nullptr}; + encoderState.m_buffers[METAL_SHADER_TYPE_OBJECT][vertexShader->resourceMapping.indexTypeBinding] = {nullptr}; uint32 verticesPerPrimitive = 0; switch (primitiveMode) @@ -1206,7 +1210,7 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 break; } - renderCommandEncoder->drawMeshThreadgroups(MTL::Size(count / verticesPerPrimitive, 1, 1), MTL::Size(verticesPerPrimitive, 1, 1), MTL::Size(1, 1, 1)); + renderCommandEncoder->drawMeshThreadgroups(MTL::Size(count * instanceCount / verticesPerPrimitive, 1, 1), MTL::Size(verticesPerPrimitive, 1, 1), MTL::Size(1, 1, 1)); } else {