emit mesh shaders

This commit is contained in:
Samuliak 2024-08-20 09:20:22 +02:00
parent 6b1360415b
commit b10bcd422e
2 changed files with 104 additions and 42 deletions

View File

@ -3355,7 +3355,7 @@ static void _emitCFRingWriteCode(LatteDecompilerShaderContext* shaderContext, La
} }
else if (parameterExportType == 2 && parameterExportBase < 16) else if (parameterExportType == 2 && parameterExportBase < 16)
{ {
src->addFmt("passG2PParameter{}.", parameterExportBase); src->addFmt("out.passParameterSem{}.", parameterExportBase);
_emitXYZWByMask(src, cfInstruction->memWriteCompMask); _emitXYZWByMask(src, cfInstruction->memWriteCompMask);
src->addFmt(" = "); src->addFmt(" = ");
_emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, burstIndex); _emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, burstIndex);
@ -3587,9 +3587,10 @@ void LatteDecompiler_emitClauseCodeMSL(LatteDecompilerShaderContext* shaderConte
src->addFmt("if( {} == true ) {{" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1)); src->addFmt("if( {} == true ) {{" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1));
// write point size // write point size
if (shaderContext->analyzer.outputPointSize && shaderContext->analyzer.writesPointSize == false) if (shaderContext->analyzer.outputPointSize && shaderContext->analyzer.writesPointSize == false)
src->add("gl_PointSize = supportBuffer.pointSize;" _CRLF); src->add("out.pointSize = supportBuffer.pointSize;" _CRLF);
// emit vertex // emit vertex
src->add("EmitVertex();" _CRLF); src->add("mesh.set_vertex(out);" _CRLF);
src->add("mesh.set_index(tid, tid);" _CRLF);
// increment transform feedback pointer // increment transform feedback pointer
for (sint32 i = 0; i < LATTE_NUM_STREAMOUT_BUFFER; i++) for (sint32 i = 0; i < LATTE_NUM_STREAMOUT_BUFFER; i++)
{ {
@ -3846,7 +3847,7 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
{ {
// Defined just-in-time // Defined just-in-time
// Will also modify vid in case of an indexed draw // Will also modify vid in case of an indexed draw
src->add("ObjectIn fetchInput(VERTEX_BUFFER_DEFINITIONS, thread uint& vid);" _CRLF); src->add("VertexIn fetchInput(VERTEX_BUFFER_DEFINITIONS, thread uint& vid);" _CRLF);
functionType = "[[object, max_total_threads_per_threadgroup(MAX_THREADS_PER_THREADGROUP), max_total_threadgroups_per_mesh_grid(1)]]"; functionType = "[[object, max_total_threads_per_threadgroup(MAX_THREADS_PER_THREADGROUP), max_total_threadgroups_per_mesh_grid(1)]]";
outputTypeName = "void"; outputTypeName = "void";
@ -3857,6 +3858,10 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
outputTypeName = "VertexOut"; outputTypeName = "VertexOut";
} }
break; break;
case LatteConst::ShaderType::Geometry:
functionType = "[[mesh, max_total_threads_per_threadgroup(MAX_THREADS_PER_THREADGROUP)]]";
outputTypeName = "void";
break;
case LatteConst::ShaderType::Pixel: case LatteConst::ShaderType::Pixel:
functionType = "fragment"; functionType = "fragment";
outputTypeName = "FragmentOut"; outputTypeName = "FragmentOut";
@ -3866,16 +3871,23 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
src->addFmt("{} {} main0(", functionType, outputTypeName); src->addFmt("{} {} main0(", functionType, outputTypeName);
LatteDecompiler::emitInputs(shaderContext); LatteDecompiler::emitInputs(shaderContext);
src->add(") {" _CRLF); src->add(") {" _CRLF);
if (shader->shaderType == LatteConst::ShaderType::Vertex && shaderContext->options->usesGeometryShader) if (shaderContext->options->usesGeometryShader)
{ {
// Calculate the imaginary vertex id if (shader->shaderType == LatteConst::ShaderType::Vertex)
src->add("uint vid = tig * PRIMITIVE_VERTEX_COUNT + tid;" _CRLF); {
// TODO: don't hardcode the instance index // Calculate the imaginary vertex id
src->add("uint iid = 0;" _CRLF); src->add("uint vid = tig * PRIMITIVE_VERTEX_COUNT + tid;" _CRLF);
// Fetch the input // TODO: don't hardcode the instance index
src->add("ObjectIn in = fetchInput(VERTEX_BUFFERS, vid);" _CRLF); src->add("uint iid = 0;" _CRLF);
// Output is defined as object payload // Fetch the input
src->add("object_payload ObjectPayload& out = objectPayload[tid];" _CRLF); src->add("VertexIn in = fetchInput(VERTEX_BUFFERS, vid);" _CRLF);
// Output is defined as object payload
src->add("object_data VertexOut& out = objectPayload.vertexOut[tid];" _CRLF);
}
else
{
src->add("GeometryOut out;" _CRLF);
}
} }
else else
{ {
@ -4077,9 +4089,9 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
{ {
// import from geometry shader // import from geometry shader
if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT)
src->addFmt("{} = asy_type<int4>(passG2PParameter{});" _CRLF, _getRegisterVarName(shaderContext, gprIndex), psInputSemanticId & 0x7F); src->addFmt("{} = asy_type<int4>(passParameterSem{});" _CRLF, _getRegisterVarName(shaderContext, gprIndex), psInputSemanticId & 0x7F);
else if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT) else if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT)
src->addFmt("{} = passG2PParameter{};" _CRLF, _getRegisterVarName(shaderContext, gprIndex), psInputSemanticId & 0x7F); src->addFmt("{} = passParameterSem{};" _CRLF, _getRegisterVarName(shaderContext, gprIndex), psInputSemanticId & 0x7F);
else else
cemu_assert_unimplemented(); cemu_assert_unimplemented();
} }
@ -4132,7 +4144,8 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
} }
// return // return
src->add("return out;" _CRLF); if (!shaderContext->options->usesGeometryShader)
src->add("return out;" _CRLF);
// end of shader main // end of shader main
src->add("}" _CRLF); src->add("}" _CRLF);
src->shrink_to_fit(); src->shrink_to_fit();

View File

@ -1,5 +1,6 @@
#pragma once #pragma once
#include "HW/Latte/Core/LatteConst.h"
namespace LatteDecompiler namespace LatteDecompiler
{ {
static void _emitUniformVariables(LatteDecompilerShaderContext* decompilerContext) static void _emitUniformVariables(LatteDecompilerShaderContext* decompilerContext)
@ -158,10 +159,7 @@ namespace LatteDecompiler
if (decompilerContext->shader->shaderType == LatteConst::ShaderType::Vertex) if (decompilerContext->shader->shaderType == LatteConst::ShaderType::Vertex)
{ {
if (decompilerContext->options->usesGeometryShader) src->add("struct VertexIn {" _CRLF);
src->add("struct ObjectIn {" _CRLF);
else
src->add("struct VertexIn {" _CRLF);
// attribute inputs // attribute inputs
for (uint32 i = 0; i < LATTE_NUM_MAX_ATTRIBUTE_LOCATIONS; i++) for (uint32 i = 0; i < LATTE_NUM_MAX_ATTRIBUTE_LOCATIONS; i++)
{ {
@ -183,11 +181,7 @@ namespace LatteDecompiler
{ {
auto* src = shaderContext->shaderSource; auto* src = shaderContext->shaderSource;
if (shaderContext->options->usesGeometryShader) src->add("struct VertexOut {" _CRLF);
src->add("struct ObjectPayload {" _CRLF);
else
src->add("struct VertexOut {" _CRLF);
src->add("float4 position [[position]];" _CRLF); src->add("float4 position [[position]];" _CRLF);
if (shaderContext->analyzer.outputPointSize) if (shaderContext->analyzer.outputPointSize)
src->add("float pointSize [[point_size]];" _CRLF); src->add("float pointSize [[point_size]];" _CRLF);
@ -215,14 +209,11 @@ namespace LatteDecompiler
continue; // no ps input continue; // no ps input
src->addFmt("float4 passParameterSem{}", psInputTable->import[psInputIndex].semanticId); src->addFmt("float4 passParameterSem{}", psInputTable->import[psInputIndex].semanticId);
if (!shaderContext->options->usesGeometryShader) src->addFmt(" [[user(locn{})]]", psInputIndex);
{ if (psInputTable->import[psInputIndex].isFlat)
src->addFmt(" [[user(locn{})]]", psInputIndex); src->add(" [[flat]]");
if (psInputTable->import[psInputIndex].isFlat) if (psInputTable->import[psInputIndex].isNoPerspective)
src->add(" [[flat]]"); src->add(" [[center_no_perspective]]");
if (psInputTable->import[psInputIndex].isNoPerspective)
src->add(" [[center_no_perspective]]");
}
src->addFmt(";" _CRLF); src->addFmt(";" _CRLF);
} }
@ -262,11 +253,10 @@ namespace LatteDecompiler
if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex) if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex)
{ {
_emitAttributes(decompilerContext); _emitAttributes(decompilerContext);
_emitVSOutputs(decompilerContext);
} }
else if (decompilerContext->shaderType == LatteConst::ShaderType::Pixel) else if (decompilerContext->shaderType == LatteConst::ShaderType::Pixel)
{ {
_emitPSInputs(decompilerContext); _emitPSInputs(decompilerContext);
src->add("struct FragmentOut {" _CRLF); src->add("struct FragmentOut {" _CRLF);
@ -289,6 +279,57 @@ namespace LatteDecompiler
src->add("};" _CRLF _CRLF); src->add("};" _CRLF _CRLF);
} }
if (!decompilerContext->options->usesGeometryShader)
{
if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex)
_emitVSOutputs(decompilerContext);
}
else
{
if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex || decompilerContext->shaderType == LatteConst::ShaderType::Geometry)
{
src->add("struct VertexOut {" _CRLF);
src->add("float4 position [[position]];" _CRLF);
uint32 ringParameterCountVS2GS = 0;
if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex)
{
ringParameterCountVS2GS = decompilerContext->shader->ringParameterCount;
}
else
{
ringParameterCountVS2GS = decompilerContext->shader->ringParameterCountFromPrevStage;
}
for (uint32 f = 0; f < ringParameterCountVS2GS; f++)
src->addFmt("int4 passParameterSem{};" _CRLF, f);
src->add("};" _CRLF _CRLF);
src->add("struct ObjectPayload {" _CRLF);
src->add("VertexOut vertexOut[PRIMITIVE_VERTEX_COUNT];" _CRLF);
src->add("};" _CRLF _CRLF);
}
if (decompilerContext->shaderType == LatteConst::ShaderType::Geometry)
{
// parameters shared between geometry and pixel shader
uint32 ringItemSize = decompilerContext->contextRegisters[mmSQ_GSVS_RING_ITEMSIZE] & 0x7FFF;
if ((ringItemSize & 0xF) != 0)
debugBreakpoint();
if (((decompilerContext->contextRegisters[mmSQ_GSVS_RING_ITEMSIZE] & 0x7FFF) & 0xF) != 0)
debugBreakpoint();
src->add("struct GeometryOut {" _CRLF);
src->add("float4 position [[position]];" _CRLF);
for (sint32 p = 0; p < decompilerContext->parsedGSCopyShader->numParam; p++)
{
if (decompilerContext->parsedGSCopyShader->paramMapping[p].exportType != 2)
continue;
src->addFmt("float4 passParameterSem{} [[user(locn)]];" _CRLF, (sint32)decompilerContext->parsedGSCopyShader->paramMapping[p].exportParam, decompilerContext->parsedGSCopyShader->paramMapping[p].exportParam & 0x7F);
}
src->add("};" _CRLF _CRLF);
// Define the mesh shader output type
src->add("using MeshType = mesh<GeometryOut, void, MAX_PRIMITIVE_COUNT * PRIMITIVE_VERTEX_COUNT, MAX_PRIMITIVE_COUNT, topology::PRIMITIVE_TYPE>;" _CRLF);
}
}
} }
static void emitHeader(LatteDecompilerShaderContext* decompilerContext) static void emitHeader(LatteDecompilerShaderContext* decompilerContext)
@ -397,27 +438,35 @@ namespace LatteDecompiler
case LatteConst::ShaderType::Vertex: case LatteConst::ShaderType::Vertex:
if (decompilerContext->options->usesGeometryShader) if (decompilerContext->options->usesGeometryShader)
{ {
src->add(", object_data ObjectPayload* objectPayload [[payload]]"); src->add(", object_data ObjectPayload& objectPayload [[payload]]");
src->add(", mesh_grid_properties meshGridProperties"); src->add(", mesh_grid_properties meshGridProperties");
src->add(", uint tig [[threadgroup_position_in_grid]]"); src->add(", uint tig [[threadgroup_position_in_grid]]");
src->add(", uint tid [[thread_index_in_threadgroup]]"); src->add(", uint tid [[thread_index_in_threadgroup]]");
src->add(", VERTEX_BUFFER_DEFINITIONS");
} }
else else
{ {
src->add(", uint vid [[vertex_id]]"); src->add(", uint vid [[vertex_id]]");
src->add(", uint iid [[instance_id]]"); src->add(", uint iid [[instance_id]]");
// streamout buffer (transform feedback)
if (decompilerContext->analyzer.hasStreamoutEnable && decompilerContext->analyzer.hasStreamoutWrite)
{
src->addFmt(", device int* sb [[buffer({})]]" _CRLF, decompilerContext->output->resourceMappingVK.tfStorageBindingPoint);
}
} }
break; break;
case LatteConst::ShaderType::Geometry:
src->add(", MeshType mesh");
src->add(", const object_data ObjectPayload& objectPayload [[payload]]");
src->add(", uint tid [[thread_index_in_threadgroup]]");
break;
case LatteConst::ShaderType::Pixel: case LatteConst::ShaderType::Pixel:
src->add(", bool frontFacing [[front_facing]]"); src->add(", bool frontFacing [[front_facing]]");
break; break;
} }
// streamout buffer (transform feedback)
if ((decompilerContext->shaderType == LatteConst::ShaderType::Vertex && !decompilerContext->options->usesGeometryShader) || decompilerContext->shaderType == LatteConst::ShaderType::Geometry)
{
if (decompilerContext->analyzer.hasStreamoutEnable && decompilerContext->analyzer.hasStreamoutWrite)
src->addFmt(", device int* sb [[buffer({})]]" _CRLF, decompilerContext->output->resourceMappingVK.tfStorageBindingPoint);
}
// uniform buffers // uniform buffers
_emitUniformBufferDefinitions(decompilerContext); _emitUniformBufferDefinitions(decompilerContext);
// textures // textures