From 950f04d4446f6a5b58bb1c2170205b6f5e4c8283 Mon Sep 17 00:00:00 2001
From: Samuliak <samuliak77@gmail.com>
Date: Wed, 11 Sep 2024 12:22:45 +0200
Subject: [PATCH] support instancing for mesh shaders

---
 .../HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h     | 1 +
 .../LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp    | 1 +
 .../LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp     | 8 ++++----
 .../LatteDecompilerEmitMSLHeader.hpp                      | 3 +++
 src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp        | 8 ++++++--
 5 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h
index 29e65c58..2812facc 100644
--- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h
+++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h
@@ -64,6 +64,7 @@ struct LatteDecompilerShaderResourceMapping
 	// attributes (vertex shader only)
 	sint8 attributeMapping[LATTE_NUM_MAX_ATTRIBUTE_LOCATIONS];
 	// Metal exclusive
+	sint8 verticesPerInstanceBinding{-1};
 	sint8 indexBufferBinding{-1};
 	sint8 indexTypeBinding{-1};
 
diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp
index ec3d8aa7..b5697d42 100644
--- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp
+++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp
@@ -1019,6 +1019,7 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD
 	LatteDecompiler::_initTextureBindingPointsMTL(shaderContext);
 	LatteDecompiler::_initUniformBindingPoints(shaderContext);
 	LatteDecompiler::_initAttributeBindingPoints(shaderContext);
+	shaderContext->output->resourceMappingMTL.verticesPerInstanceBinding = shaderContext->currentBufferBindingPointMTL++;
 	shaderContext->output->resourceMappingMTL.indexBufferBinding = shaderContext->currentBufferBindingPointMTL++;
 	shaderContext->output->resourceMappingMTL.indexTypeBinding = shaderContext->currentBufferBindingPointMTL++;
 }
diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp
index 71e3f0df..3f022c61 100644
--- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp
+++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp
@@ -3920,8 +3920,8 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
 			// Index buffer
             inputFetchDefinition += "if (indexType == 1) // UShort\n";
             inputFetchDefinition += "vid = ((device ushort*)indexBuffer)[vid];\n";
-            inputFetchDefinition += "else if (indexType == 2)\n";
-            inputFetchDefinition += "vid = ((device uint*)indexBuffer)[vid]; // UInt\n";
+            inputFetchDefinition += "else if (indexType == 2) // UInt\n";
+            inputFetchDefinition += "vid = ((device uint*)indexBuffer)[vid];\n";
 
             inputFetchDefinition += "VertexIn in;\n";
             for (auto& bufferGroup : shaderContext->fetchShader->bufferGroups)
@@ -4060,8 +4060,8 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
 		{
     	    // Calculate the imaginary vertex id
     	    src->add("uint vid = tig * VERTICES_PER_VERTEX_PRIMITIVE + tid;" _CRLF);
-    		// TODO: don't hardcode the instance index
-    		src->add("uint iid = 0;" _CRLF);
+    		src->add("uint iid = vid / verticesPerInstance;" _CRLF);
+            src->add("vid %= verticesPerInstance;" _CRLF);
     		// Fetch the input
     		src->add("VertexIn in = fetchVertex(vid, indexBuffer, indexType VERTEX_BUFFERS);" _CRLF);
     		// Output is defined as object payload
diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp
index 1342a277..a7121f52 100644
--- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp
+++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp
@@ -497,8 +497,11 @@ namespace LatteDecompiler
                 src->add(", mesh_grid_properties meshGridProperties");
                 src->add(", uint tig [[threadgroup_position_in_grid]]");
                 src->add(", uint tid [[thread_index_in_threadgroup]]");
+                // TODO: put into the support buffer?
+                src->addFmt(", constant uint& verticesPerInstance [[buffer({})]]", decompilerContext->output->resourceMappingMTL.verticesPerInstanceBinding);
                 // TODO: inly include index buffer if needed
                 src->addFmt(", device uint* indexBuffer [[buffer({})]]", decompilerContext->output->resourceMappingMTL.indexBufferBinding);
+                // TODO: put into the support buffer?
                 // TODO: use uchar?
                 src->addFmt(", constant uint& indexType [[buffer({})]]", decompilerContext->output->resourceMappingMTL.indexTypeBinding);
                 src->add(" VERTEX_BUFFER_DEFINITIONS");
diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp
index 3c3ed106..46bcf6dc 100644
--- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp
+++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp
@@ -1183,10 +1183,14 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
 	}
 	if (usesGeometryShader)
 	{
+		uint32 verticesPerInstance = count / instanceCount;
+        // TODO: make a helper function for this
+        renderCommandEncoder->setObjectBytes(&verticesPerInstance, sizeof(verticesPerInstance), vertexShader->resourceMapping.verticesPerInstanceBinding);
+        encoderState.m_buffers[METAL_SHADER_TYPE_OBJECT][vertexShader->resourceMapping.verticesPerInstanceBinding] = {nullptr};
 	    if (indexBuffer)
 		    SetBuffer(renderCommandEncoder, METAL_SHADER_TYPE_OBJECT, indexBuffer, indexBufferOffset, vertexShader->resourceMapping.indexBufferBinding);
 		renderCommandEncoder->setObjectBytes(&hostIndexType, sizeof(hostIndexType), vertexShader->resourceMapping.indexTypeBinding);
-		encoderState.m_buffers[METAL_SHADER_TYPE_OBJECT][vertexShader->resourceMapping.indexTypeBinding] = {nullptr};
+        encoderState.m_buffers[METAL_SHADER_TYPE_OBJECT][vertexShader->resourceMapping.indexTypeBinding] = {nullptr};
 
 		uint32 verticesPerPrimitive = 0;
 		switch (primitiveMode)
@@ -1206,7 +1210,7 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
             break;
         }
 
-		renderCommandEncoder->drawMeshThreadgroups(MTL::Size(count / verticesPerPrimitive, 1, 1), MTL::Size(verticesPerPrimitive, 1, 1), MTL::Size(1, 1, 1));
+		renderCommandEncoder->drawMeshThreadgroups(MTL::Size(count * instanceCount / verticesPerPrimitive, 1, 1), MTL::Size(verticesPerPrimitive, 1, 1), MTL::Size(1, 1, 1));
 	}
 	else
 	{