diff --git a/src/Cafe/HW/Latte/Core/LatteShader.cpp b/src/Cafe/HW/Latte/Core/LatteShader.cpp index 66539a76..d17fd57d 100644 --- a/src/Cafe/HW/Latte/Core/LatteShader.cpp +++ b/src/Cafe/HW/Latte/Core/LatteShader.cpp @@ -617,10 +617,12 @@ LatteDecompilerShader* LatteShader_CreateShaderFromDecompilerOutput(LatteDecompi shader->baseHash = baseHash; // copy resource mapping // HACK - if (g_renderer->GetType() != RendererAPI::OpenGL) + if (g_renderer->GetType() == RendererAPI::Vulkan) shader->resourceMapping = decompilerOutput.resourceMappingVK; - else + else if (g_renderer->GetType() == RendererAPI::OpenGL) shader->resourceMapping = decompilerOutput.resourceMappingGL; + else + shader->resourceMapping = decompilerOutput.resourceMappingMTL; // copy texture info shader->textureUnitMask2 = decompilerOutput.textureUnitMask; // copy streamout info diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h index 57df13b1..5d8b2c6f 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h @@ -57,12 +57,14 @@ struct LatteDecompilerShaderResourceMapping // texture sint8 textureUnitToBindingPoint[LATTE_NUM_MAX_TEX_UNITS]; // uniform buffer - sint8 uniformVarsBufferBindingPoint{}; // special block for uniform registers/remapped array/custom variables + sint8 uniformVarsBufferBindingPoint{-1}; // special block for uniform registers/remapped array/custom variables sint8 uniformBuffersBindingPoint[LATTE_NUM_MAX_UNIFORM_BUFFERS]; // shader storage buffer for transform feedback (if alternative mode is used) sint8 tfStorageBindingPoint{-1}; // attributes (vertex shader only) sint8 attributeMapping[LATTE_NUM_MAX_ATTRIBUTE_LOCATIONS]; + // Metal exclusive + sint8 indexBufferBinding{-1}; sint32 getTextureCount() { @@ -288,6 +290,7 @@ struct LatteDecompilerOutput_t // mapping and binding information LatteDecompilerShaderResourceMapping resourceMappingGL; LatteDecompilerShaderResourceMapping resourceMappingVK; + LatteDecompilerShaderResourceMapping resourceMappingMTL; }; struct LatteDecompilerSubroutineInfo; diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp index e84e4851..9a3db895 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp @@ -498,6 +498,18 @@ namespace LatteDecompiler } } + void _initTextureBindingPointsMTL(LatteDecompilerShaderContext* decompilerContext) + { + // for Vulkan we use consecutive indices + for (sint32 i = 0; i < LATTE_NUM_MAX_TEX_UNITS; i++) + { + if (!decompilerContext->output->textureUnitMask[i]) + continue; + decompilerContext->output->resourceMappingMTL.textureUnitToBindingPoint[i] = decompilerContext->currentTextureBindingPointMTL; + decompilerContext->currentTextureBindingPointMTL++; + } + } + void _initHasUniformVarBlock(LatteDecompilerShaderContext* decompilerContext) { decompilerContext->hasUniformVarBlock = false; @@ -552,14 +564,13 @@ namespace LatteDecompiler } } // assign binding point to uniform var block - decompilerContext->output->resourceMappingGL.uniformVarsBufferBindingPoint = -1; // OpenGL currently doesnt use a uniform block if (decompilerContext->hasUniformVarBlock) { decompilerContext->output->resourceMappingVK.uniformVarsBufferBindingPoint = decompilerContext->currentBindingPointVK; decompilerContext->currentBindingPointVK++; + decompilerContext->output->resourceMappingMTL.uniformVarsBufferBindingPoint = decompilerContext->currentBufferBindingPointMTL; + decompilerContext->currentBufferBindingPointMTL++; } - else - decompilerContext->output->resourceMappingVK.uniformVarsBufferBindingPoint = -1; // assign binding points to uniform buffers if (decompilerContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CBANK) { @@ -578,6 +589,8 @@ namespace LatteDecompiler decompilerContext->output->resourceMappingVK.uniformBuffersBindingPoint[i] = decompilerContext->currentBindingPointVK; decompilerContext->currentBindingPointVK++; + decompilerContext->output->resourceMappingMTL.uniformBuffersBindingPoint[i] = decompilerContext->currentBufferBindingPointMTL; + decompilerContext->currentBufferBindingPointMTL++; } // for OpenGL we use the relative buffer index for (uint32 i = 0; i < LATTE_NUM_MAX_UNIFORM_BUFFERS; i++) @@ -599,6 +612,8 @@ namespace LatteDecompiler { decompilerContext->output->resourceMappingVK.tfStorageBindingPoint = decompilerContext->currentBindingPointVK; decompilerContext->currentBindingPointVK++; + decompilerContext->output->resourceMappingMTL.tfStorageBindingPoint = decompilerContext->currentBufferBindingPointMTL; + decompilerContext->currentBufferBindingPointMTL++; } } @@ -615,6 +630,7 @@ namespace LatteDecompiler { decompilerContext->output->resourceMappingGL.attributeMapping[i] = bindingIndex; decompilerContext->output->resourceMappingVK.attributeMapping[i] = bindingIndex; + decompilerContext->output->resourceMappingMTL.attributeMapping[i] = bindingIndex; bindingIndex++; } } @@ -1000,6 +1016,8 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD shaderContext->output->resourceMappingVK.setIndex = 2; LatteDecompiler::_initTextureBindingPointsGL(shaderContext); LatteDecompiler::_initTextureBindingPointsVK(shaderContext); + LatteDecompiler::_initTextureBindingPointsMTL(shaderContext); LatteDecompiler::_initUniformBindingPoints(shaderContext); LatteDecompiler::_initAttributeBindingPoints(shaderContext); + shaderContext->output->resourceMappingMTL.indexBufferBinding = shaderContext->currentBufferBindingPointMTL++; } diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp index 95e91d40..dcca0560 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp @@ -2822,14 +2822,13 @@ static void _emitGSReadInputVFetchCode(LatteDecompilerShaderContext* shaderConte src->add(" = "); _emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, shaderContext->typeTracker.defaultDataType); - src->add("(v2g["); + src->add("(objectPayload.vertexOut["); if (texInstruction->textureFetch.srcSel[0] >= 4) cemu_assert_unimplemented(); if (texInstruction->textureFetch.srcSel[1] >= 4) cemu_assert_unimplemented(); - // todo: Index type - src->add("0"); - src->addFmt("].passV2GParameter{}.", texInstruction->textureFetch.offset/16); + src->add("vertexIndex"); + src->addFmt("].passParameterSem{}.", texInstruction->textureFetch.offset/16); for(sint32 f=0; f<4; f++) @@ -3316,7 +3315,7 @@ static void _emitCFRingWriteCode(LatteDecompilerShaderContext* shaderContext, La cemu_assert_unimplemented(); for (sint32 burstIndex = 0; burstIndex < (sint32)(cfInstruction->exportBurstCount + 1); burstIndex++) { - src->addFmt("v2g.passV2GParameter{}.", (cfInstruction->exportArrayBase) / 4 + burstIndex); + src->addFmt("out.passParameterSem{}.", (cfInstruction->exportArrayBase) / 4 + burstIndex); _emitXYZWByMask(src, cfInstruction->memWriteCompMask); src->addFmt(" = "); _emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_SIGNED_INT, burstIndex); @@ -3355,7 +3354,7 @@ static void _emitCFRingWriteCode(LatteDecompilerShaderContext* shaderContext, La } else if (parameterExportType == 2 && parameterExportBase < 16) { - src->addFmt("passG2PParameter{}.", parameterExportBase); + src->addFmt("out.passParameterSem{}.", parameterExportBase); _emitXYZWByMask(src, cfInstruction->memWriteCompMask); src->addFmt(" = "); _emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, burstIndex); @@ -3587,9 +3586,10 @@ void LatteDecompiler_emitClauseCodeMSL(LatteDecompilerShaderContext* shaderConte src->addFmt("if( {} == true ) {{" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1)); // write point size if (shaderContext->analyzer.outputPointSize && shaderContext->analyzer.writesPointSize == false) - src->add("gl_PointSize = supportBuffer.pointSize;" _CRLF); - // emit vertex - src->add("EmitVertex();" _CRLF); + src->add("out.pointSize = supportBuffer.pointSize;" _CRLF); + src->add("out.position.z = (out.position.z + out.position.w) / 2.0;" _CRLF); + src->add("mesh.set_vertex(vertexIndex, out);" _CRLF); + src->add("vertexIndex++;" _CRLF); // increment transform feedback pointer for (sint32 i = 0; i < LATTE_NUM_STREAMOUT_BUFFER; i++) { @@ -3821,20 +3821,22 @@ static void LatteDecompiler_emitAttributeImport(LatteDecompilerShaderContext* sh void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, LatteDecompilerShader* shader) { + bool isRectVertexShader = (static_cast(shaderContext->contextRegisters[mmVGT_PRIMITIVE_TYPE]) == LattePrimitiveMode::RECTS); + StringBuf* src = new StringBuf(1024*1024*12); // reserve 12MB for generated source (we resize-to-fit at the end) shaderContext->shaderSource = src; // debug info src->addFmt("// shader {:016x}" _CRLF, shaderContext->shaderBaseHash); #ifdef CEMU_DEBUG_ASSERT - src->addFmt("// usesIntegerValues: {}" _CRLF, shaderContext->analyzer.usesIntegerValues?"true":"false"); + src->addFmt("// usesIntegerValues: {}" _CRLF, shaderContext->analyzer.usesIntegerValues ? "true" : "false"); src->addFmt(_CRLF); #endif // include metal standard library src->add("#include " _CRLF); src->add("using namespace metal;" _CRLF); // header part (definitions for inputs and outputs) - LatteDecompiler::emitHeader(shaderContext); + LatteDecompiler::emitHeader(shaderContext, isRectVertexShader); // helper functions LatteDecompiler_emitHelperFunctions(shaderContext, src); const char* functionType = ""; @@ -3842,9 +3844,25 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, switch (shader->shaderType) { case LatteConst::ShaderType::Vertex: - functionType = "vertex"; - outputTypeName = "VertexOut"; + if (shaderContext->options->usesGeometryShader || isRectVertexShader) + { + // Defined just-in-time + // Will also modify vid in case of an indexed draw + src->add("VertexIn fetchInput(thread uint& vid VERTEX_BUFFER_DEFINITIONS);" _CRLF); + + functionType = "[[object, max_total_threads_per_threadgroup(VERTICES_PER_VERTEX_PRIMITIVE), max_total_threadgroups_per_mesh_grid(1)]]"; + outputTypeName = "void"; + } + else + { + functionType = "vertex"; + outputTypeName = "VertexOut"; + } break; + case LatteConst::ShaderType::Geometry: + functionType = "[[mesh, max_total_threads_per_threadgroup(1)]]"; + outputTypeName = "void"; + break; case LatteConst::ShaderType::Pixel: functionType = "fragment"; outputTypeName = "FragmentOut"; @@ -3852,9 +3870,32 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, } // start of main src->addFmt("{} {} main0(", functionType, outputTypeName); - LatteDecompiler::emitInputs(shaderContext); + LatteDecompiler::emitInputs(shaderContext, isRectVertexShader); src->add(") {" _CRLF); - src->addFmt("{} out;" _CRLF, outputTypeName); + if ((shaderContext->options->usesGeometryShader || isRectVertexShader) && (shader->shaderType == LatteConst::ShaderType::Vertex || shader->shaderType == LatteConst::ShaderType::Geometry)) + { + if (shader->shaderType == LatteConst::ShaderType::Vertex) + { + // Calculate the imaginary vertex id + src->add("uint vid = tig * VERTICES_PER_VERTEX_PRIMITIVE + tid;" _CRLF); + // TODO: don't hardcode the instance index + src->add("uint iid = 0;" _CRLF); + // Fetch the input + src->add("VertexIn in = fetchInput(vid VERTEX_BUFFERS);" _CRLF); + // Output is defined as object payload + src->add("object_data VertexOut& out = objectPayload.vertexOut[tid];" _CRLF); + } + else if (shader->shaderType == LatteConst::ShaderType::Geometry) + { + src->add("GeometryOut out;" _CRLF); + // The index of the current vertex that is being emitted + src->add("uint vertexIndex = 0;" _CRLF); + } + } + else + { + src->addFmt("{} out;" _CRLF, outputTypeName); + } // variable definition if (shaderContext->typeTracker.useArrayGPRs == false) { @@ -4047,13 +4088,14 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, continue; } - if (shaderContext->options->usesGeometryShader) + // TODO: is the if statement even needed? + if (shaderContext->options->usesGeometryShader || isRectVertexShader) { // import from geometry shader if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) - src->addFmt("{} = asy_type(passG2PParameter{});" _CRLF, _getRegisterVarName(shaderContext, gprIndex), psInputSemanticId & 0x7F); + src->addFmt("{} = as_type(in.passParameterSem{});" _CRLF, _getRegisterVarName(shaderContext, gprIndex), psInputSemanticId & 0x7F); else if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT) - src->addFmt("{} = passG2PParameter{};" _CRLF, _getRegisterVarName(shaderContext, gprIndex), psInputSemanticId & 0x7F); + src->addFmt("{} = in.passParameterSem{};" _CRLF, _getRegisterVarName(shaderContext, gprIndex), psInputSemanticId & 0x7F); else cemu_assert_unimplemented(); } @@ -4091,18 +4133,52 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, // vertex shader should write renderstate point size at the end if required but not modified by shader if (shaderContext->analyzer.outputPointSize && shaderContext->analyzer.writesPointSize == false) { - if (shader->shaderType == LatteConst::ShaderType::Vertex && shaderContext->options->usesGeometryShader == false) + if (shader->shaderType == LatteConst::ShaderType::Vertex && !shaderContext->options->usesGeometryShader) src->add("out.pointSize = supportBuffer.pointSize;" _CRLF); } - // HACK: this should be handled outside of the shader, because clipping currently wouldn't work (or would it?) - if (shader->shaderType == LatteConst::ShaderType::Vertex) + + if ((shaderContext->options->usesGeometryShader || isRectVertexShader) && (shader->shaderType == LatteConst::ShaderType::Vertex || shader->shaderType == LatteConst::ShaderType::Geometry)) { - // TODO: check this - // MoltenVK does this - src->add("out.position.z = (out.position.z + out.position.w) / 2.0;" _CRLF); + if (shader->shaderType == LatteConst::ShaderType::Vertex) + { + src->add("if (tid == 0) {" _CRLF); + src->add("meshGridProperties.set_threadgroups_per_grid(uint3(1, 1, 1));" _CRLF); + src->add("}" _CRLF); + } + else if (shader->shaderType == LatteConst::ShaderType::Geometry) + { + src->add("mesh.set_primitive_count(GET_PRIMITIVE_COUNT(vertexIndex));" _CRLF); + + // Set indices + if (shaderContext->contextRegisters[mmVGT_GS_OUT_PRIM_TYPE] == 1) // Line strip + { + src->add("for (uint8_t i = 0; i < GET_PRIMITIVE_COUNT(vertexIndex) * 2; i++) {" _CRLF); + src->add("mesh.set_index(i, (i 2 3) + i % 2);" _CRLF); + src->add("}" _CRLF); + } + else if (shaderContext->contextRegisters[mmVGT_GS_OUT_PRIM_TYPE] == 2) // Triangle strip + { + src->add("for (uint8_t i = 0; i < GET_PRIMITIVE_COUNT(vertexIndex) * 3; i++) {" _CRLF); + src->add("mesh.set_index(i, (i / 3) + i % 3);" _CRLF); + src->add("}" _CRLF); + } + else + { + src->add("for (uint8_t i = 0; i < vertexIndex; i++) {" _CRLF); + src->add("mesh.set_index(i, i);" _CRLF); + src->add("}" _CRLF); + } + } } - // return - src->add("return out;" _CRLF); + + // TODO: this should be handled outside of the shader, because clipping currently wouldn't work (or would it?) + if ((shader->shaderType == LatteConst::ShaderType::Vertex && !shaderContext->options->usesGeometryShader) || shader->shaderType == LatteConst::ShaderType::Geometry) + src->add("out.position.z = (out.position.z + out.position.w) / 2.0;" _CRLF); + + // Return + if (!(shaderContext->options->usesGeometryShader || isRectVertexShader) || shader->shaderType == LatteConst::ShaderType::Pixel) + src->add("return out;" _CRLF); + // end of shader main src->add("}" _CRLF); src->shrink_to_fit(); diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp index 8527adde..53332f7c 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp @@ -1,5 +1,7 @@ #pragma once +#include "Common/precompiled.h" +#include "HW/Latte/Core/LatteConst.h" namespace LatteDecompiler { static void _emitUniformVariables(LatteDecompilerShaderContext* decompilerContext) @@ -94,7 +96,7 @@ namespace LatteDecompiler uniformCurrentOffset += 8; } // define verticesPerInstance + streamoutBufferBaseX - if ((shader->shaderType == LatteConst::ShaderType::Vertex && decompilerContext->options->usesGeometryShader == false) || + if ((shader->shaderType == LatteConst::ShaderType::Vertex && !decompilerContext->options->usesGeometryShader) || (shader->shaderType == LatteConst::ShaderType::Geometry)) { src->add("int verticesPerInstance;" _CRLF); @@ -127,7 +129,7 @@ namespace LatteDecompiler if (!decompilerContext->analyzer.uniformBufferAccessTracker[i].HasAccess()) continue; - cemu_assert_debug(decompilerContext->output->resourceMappingVK.uniformBuffersBindingPoint[i] >= 0); + cemu_assert_debug(decompilerContext->output->resourceMappingMTL.uniformBuffersBindingPoint[i] >= 0); shaderSrc->addFmt("struct UBuff{} {{" _CRLF, i); shaderSrc->addFmt("float4 d[{}];" _CRLF, decompilerContext->analyzer.uniformBufferAccessTracker[i].DetermineSize(decompilerContext->shaderBaseHash, LATTE_GLSL_DYNAMIC_UNIFORM_BLOCK_SIZE)); @@ -155,6 +157,7 @@ namespace LatteDecompiler static void _emitAttributes(LatteDecompilerShaderContext* decompilerContext) { auto src = decompilerContext->shaderSource; + std::string attributeNames; if (decompilerContext->shader->shaderType == LatteConst::ShaderType::Vertex) { @@ -164,24 +167,29 @@ namespace LatteDecompiler { if (decompilerContext->analyzer.inputAttributSemanticMask[i]) { - cemu_assert_debug(decompilerContext->output->resourceMappingVK.attributeMapping[i] >= 0); + cemu_assert_debug(decompilerContext->output->resourceMappingMTL.attributeMapping[i] >= 0); - src->addFmt("uint4 attrDataSem{} [[attribute({})]];" _CRLF, i, (sint32)decompilerContext->output->resourceMappingVK.attributeMapping[i]); + src->addFmt("uint4 attrDataSem{}", i); + if (decompilerContext->options->usesGeometryShader) + attributeNames += "#define ATTRIBUTE_NAME" + std::to_string((sint32)decompilerContext->output->resourceMappingMTL.attributeMapping[i]) + " attrDataSem" + std::to_string(i) + "\n"; + else + src->addFmt(" [[attribute({})]]", (sint32)decompilerContext->output->resourceMappingMTL.attributeMapping[i]); + src->add(";" _CRLF); } } src->add("};" _CRLF _CRLF); } + src->addFmt("{}", attributeNames); } - static void _emitVSOutputs(LatteDecompilerShaderContext* shaderContext) + static void _emitVSOutputs(LatteDecompilerShaderContext* shaderContext, bool isRectVertexShader) { auto* src = shaderContext->shaderSource; src->add("struct VertexOut {" _CRLF); - src->add("float4 position [[position]];" _CRLF); if (shaderContext->analyzer.outputPointSize) - src->add("float pointSize[[point_size]];" _CRLF); + src->add("float pointSize [[point_size]];" _CRLF); LatteShaderPSInputTable* psInputTable = LatteSHRC_GetPSInputTable(); auto parameterMask = shaderContext->shader->outputParameterMask; @@ -206,15 +214,25 @@ namespace LatteDecompiler continue; // no ps input src->addFmt("float4 passParameterSem{}", psInputTable->import[psInputIndex].semanticId); - src->addFmt(" [[user(locn{})]]", psInputIndex); - if (psInputTable->import[psInputIndex].isFlat) - src->add(" [[flat]]"); - if (psInputTable->import[psInputIndex].isNoPerspective) - src->add(" [[center_no_perspective]]"); + if (!isRectVertexShader) + { + src->addFmt(" [[user(locn{})]]", psInputIndex); + if (psInputTable->import[psInputIndex].isFlat) + src->add(" [[flat]]"); + if (psInputTable->import[psInputIndex].isNoPerspective) + src->add(" [[center_no_perspective]]"); + } src->addFmt(";" _CRLF); } src->add("};" _CRLF _CRLF); + + if (isRectVertexShader) + { + src->add("struct ObjectPayload {" _CRLF); + src->add("VertexOut vertexOut[VERTICES_PER_VERTEX_PRIMITIVE];" _CRLF); + src->add("};" _CRLF _CRLF); + } } static void _emitPSInputs(LatteDecompilerShaderContext* shaderContext) @@ -243,18 +261,17 @@ namespace LatteDecompiler src->add("};" _CRLF _CRLF); } - static void _emitInputsAndOutputs(LatteDecompilerShaderContext* decompilerContext) + static void _emitInputsAndOutputs(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader) { auto src = decompilerContext->shaderSource; if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex) { _emitAttributes(decompilerContext); - _emitVSOutputs(decompilerContext); } else if (decompilerContext->shaderType == LatteConst::ShaderType::Pixel) { - _emitPSInputs(decompilerContext); + _emitPSInputs(decompilerContext); src->add("struct FragmentOut {" _CRLF); @@ -277,10 +294,111 @@ namespace LatteDecompiler src->add("};" _CRLF _CRLF); } + + if (!decompilerContext->options->usesGeometryShader) + { + if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex) + _emitVSOutputs(decompilerContext, isRectVertexShader); + } + else + { + if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex || decompilerContext->shaderType == LatteConst::ShaderType::Geometry) + { + src->add("struct VertexOut {" _CRLF); + uint32 ringParameterCountVS2GS = 0; + if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex) + { + ringParameterCountVS2GS = decompilerContext->shader->ringParameterCount; + } + else + { + ringParameterCountVS2GS = decompilerContext->shader->ringParameterCountFromPrevStage; + } + for (uint32 f = 0; f < ringParameterCountVS2GS; f++) + src->addFmt("int4 passParameterSem{};" _CRLF, f); + src->add("};" _CRLF _CRLF); + src->add("struct ObjectPayload {" _CRLF); + src->add("VertexOut vertexOut[VERTICES_PER_VERTEX_PRIMITIVE];" _CRLF); + src->add("};" _CRLF _CRLF); + } + if (decompilerContext->shaderType == LatteConst::ShaderType::Geometry) + { + // parameters shared between geometry and pixel shader + uint32 ringItemSize = decompilerContext->contextRegisters[mmSQ_GSVS_RING_ITEMSIZE] & 0x7FFF; + if ((ringItemSize & 0xF) != 0) + debugBreakpoint(); + if (((decompilerContext->contextRegisters[mmSQ_GSVS_RING_ITEMSIZE] & 0x7FFF) & 0xF) != 0) + debugBreakpoint(); + + src->add("struct GeometryOut {" _CRLF); + src->add("float4 position [[position]];" _CRLF); + for (sint32 p = 0; p < decompilerContext->parsedGSCopyShader->numParam; p++) + { + if (decompilerContext->parsedGSCopyShader->paramMapping[p].exportType != 2) + continue; + src->addFmt("float4 passParameterSem{} [[user(locn{})]];" _CRLF, (sint32)decompilerContext->parsedGSCopyShader->paramMapping[p].exportParam, decompilerContext->parsedGSCopyShader->paramMapping[p].exportParam & 0x7F); + } + src->add("};" _CRLF _CRLF); + + const uint32 MAX_VERTEX_COUNT = 32; + + // Define the mesh shader output type + src->addFmt("using MeshType = mesh;" _CRLF, MAX_VERTEX_COUNT, MAX_VERTEX_COUNT); + } + } } - static void emitHeader(LatteDecompilerShaderContext* decompilerContext) + static void emitHeader(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader) { + auto src = decompilerContext->shaderSource; + + if ((decompilerContext->options->usesGeometryShader || isRectVertexShader) && (decompilerContext->shaderType == LatteConst::ShaderType::Vertex || decompilerContext->shaderType == LatteConst::ShaderType::Geometry)) + { + // TODO: make vsOutPrimType parth of the shader hash + LattePrimitiveMode vsOutPrimType = static_cast(decompilerContext->contextRegisters[mmVGT_PRIMITIVE_TYPE]); + uint32 gsOutPrimType = decompilerContext->contextRegisters[mmVGT_GS_OUT_PRIM_TYPE]; + + switch (vsOutPrimType) + { + case LattePrimitiveMode::POINTS: + src->add("#define VERTICES_PER_VERTEX_PRIMITIVE 1" _CRLF); + break; + case LattePrimitiveMode::LINES: + src->add("#define VERTICES_PER_VERTEX_PRIMITIVE 2" _CRLF); + break; + case LattePrimitiveMode::TRIANGLES: + src->add("#define VERTICES_PER_VERTEX_PRIMITIVE 3" _CRLF); + break; + case LattePrimitiveMode::RECTS: + src->add("#define VERTICES_PER_VERTEX_PRIMITIVE 3" _CRLF); + break; + default: + cemu_assert_suspicious(); + break; + } + if (decompilerContext->shaderType == LatteConst::ShaderType::Geometry) + { + switch (gsOutPrimType) + { + case 0: // Point + src->add("#define MTL_PRIMITIVE_TYPE point" _CRLF); + src->add("#define GET_PRIMITIVE_COUNT(vertexCount) (vertexCount / 1)" _CRLF); + break; + case 1: // Line strip + src->add("#define MTL_PRIMITIVE_TYPE line" _CRLF); + src->add("#define GET_PRIMITIVE_COUNT(vertexCount) (vertexCount - 1)" _CRLF); + break; + case 2: // Triangle strip + src->add("#define MTL_PRIMITIVE_TYPE triangle" _CRLF); + src->add("#define GET_PRIMITIVE_COUNT(vertexCount) (vertexCount - 2)" _CRLF); + break; + default: + cemu_assert_suspicious(); + break; + } + } + } + const bool dump_shaders_enabled = ActiveSettings::DumpShadersEnabled(); if(dump_shaders_enabled) decompilerContext->shaderSource->add("// start of shader inputs/outputs, predetermined by Cemu. Do not touch" _CRLF); @@ -289,7 +407,7 @@ namespace LatteDecompiler // uniform buffers _emitUniformBuffers(decompilerContext); // inputs and outputs - _emitInputsAndOutputs(decompilerContext); + _emitInputsAndOutputs(decompilerContext, isRectVertexShader); if (dump_shaders_enabled) decompilerContext->shaderSource->add("// end of shader inputs/outputs" _CRLF); @@ -306,9 +424,9 @@ namespace LatteDecompiler if (!decompilerContext->analyzer.uniformBufferAccessTracker[i].HasAccess()) continue; - cemu_assert_debug(decompilerContext->output->resourceMappingVK.uniformBuffersBindingPoint[i] >= 0); + cemu_assert_debug(decompilerContext->output->resourceMappingMTL.uniformBuffersBindingPoint[i] >= 0); - src->addFmt(", constant UBuff{}& ubuff{} [[buffer({})]]", i, i, (sint32)decompilerContext->output->resourceMappingVK.uniformBuffersBindingPoint[i]); + src->addFmt(", constant UBuff{}& ubuff{} [[buffer({})]]", i, i, (sint32)decompilerContext->output->resourceMappingMTL.uniformBuffersBindingPoint[i]); } } } @@ -354,46 +472,58 @@ namespace LatteDecompiler cemu_assert_unimplemented(); } - uint32 binding = shaderContext->output->resourceMappingVK.textureUnitToBindingPoint[i]; - //uint32 textureBinding = shaderContext->output->resourceMappingVK.textureUnitToBindingPoint[i] % 31; + uint32 binding = shaderContext->output->resourceMappingMTL.textureUnitToBindingPoint[i]; + //uint32 textureBinding = shaderContext->output->resourceMappingMTL.textureUnitToBindingPoint[i] % 31; //uint32 samplerBinding = textureBinding % 16; src->addFmt(" tex{} [[texture({})]]", i, binding); src->addFmt(", sampler samplr{} [[sampler({})]]", i, binding); } } - static void emitInputs(LatteDecompilerShaderContext* decompilerContext) + static void emitInputs(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader) { auto src = decompilerContext->shaderSource; switch (decompilerContext->shaderType) { case LatteConst::ShaderType::Vertex: - src->add("VertexIn"); - break; - case LatteConst::ShaderType::Pixel: - src->add("FragmentIn"); - break; - } - - src->add(" in [[stage_in]], constant SupportBuffer& supportBuffer [[buffer(30)]]"); - switch (decompilerContext->shaderType) - { - case LatteConst::ShaderType::Vertex: - src->add(", uint vid [[vertex_id]]"); - src->add(", uint iid [[instance_id]]"); - - // streamout buffer (transform feedback) - if (decompilerContext->analyzer.hasStreamoutEnable && decompilerContext->analyzer.hasStreamoutWrite) + if (decompilerContext->options->usesGeometryShader || isRectVertexShader) { - src->addFmt(", device int* sb [[buffer({})]]" _CRLF, decompilerContext->output->resourceMappingVK.tfStorageBindingPoint); + src->add("object_data ObjectPayload& objectPayload [[payload]]"); + src->add(", mesh_grid_properties meshGridProperties"); + src->add(", uint tig [[threadgroup_position_in_grid]]"); + src->add(", uint tid [[thread_index_in_threadgroup]]"); + src->add(" VERTEX_BUFFER_DEFINITIONS"); } - + else + { + src->add("VertexIn in [[stage_in]]"); + src->add(", uint vid [[vertex_id]]"); + src->add(", uint iid [[instance_id]]"); + } + break; + case LatteConst::ShaderType::Geometry: + src->add("MeshType mesh"); + src->add(", const object_data ObjectPayload& objectPayload [[payload]]"); break; case LatteConst::ShaderType::Pixel: + src->add("FragmentIn in [[stage_in]]"); src->add(", bool frontFacing [[front_facing]]"); + break; + default: break; } + + if (decompilerContext->output->resourceMappingMTL.uniformVarsBufferBindingPoint >= 0) + src->addFmt(", constant SupportBuffer& supportBuffer [[buffer({})]]", decompilerContext->output->resourceMappingMTL.uniformVarsBufferBindingPoint); + + // streamout buffer (transform feedback) + if ((decompilerContext->shaderType == LatteConst::ShaderType::Vertex && !decompilerContext->options->usesGeometryShader) || decompilerContext->shaderType == LatteConst::ShaderType::Geometry) + { + if (decompilerContext->analyzer.hasStreamoutEnable && decompilerContext->analyzer.hasStreamoutWrite) + src->addFmt(", device int* sb [[buffer({})]]" _CRLF, decompilerContext->output->resourceMappingMTL.tfStorageBindingPoint); + } + // uniform buffers _emitUniformBufferDefinitions(decompilerContext); // textures diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInternal.h b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInternal.h index 4b85d458..f4135640 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInternal.h +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInternal.h @@ -260,6 +260,8 @@ struct LatteDecompilerShaderContext // emitter bool hasUniformVarBlock; sint32 currentBindingPointVK{}; + sint32 currentBufferBindingPointMTL{}; + sint32 currentTextureBindingPointMTL{}; struct ALUClauseTemporariesState* aluPVPSState{nullptr}; // misc std::vector list_subroutines; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.cpp index 771aa059..51885e51 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.cpp @@ -29,7 +29,7 @@ std::map MTL_COLOR_FORMAT_TABLE = { {Latte::E_GX2SURFFMT::R10_G10_B10_A2_SNORM, {MTL::PixelFormatRGBA16Snorm, MetalDataType::FLOAT, 8}}, // TODO: correct? {Latte::E_GX2SURFFMT::R10_G10_B10_A2_UINT, {MTL::PixelFormatRGB10A2Uint, MetalDataType::UINT, 4}}, {Latte::E_GX2SURFFMT::R10_G10_B10_A2_SINT, {MTL::PixelFormatRGBA16Sint, MetalDataType::INT, 8}}, // TODO: correct? - {Latte::E_GX2SURFFMT::R10_G10_B10_A2_SRGB, {MTL::PixelFormatRGBA8Unorm_sRGB, MetalDataType::FLOAT, 4}}, // TODO: correct? + {Latte::E_GX2SURFFMT::R10_G10_B10_A2_SRGB, {MTL::PixelFormatRGB10A2Unorm, MetalDataType::FLOAT, 4}}, // TODO: sRGB? {Latte::E_GX2SURFFMT::A2_B10_G10_R10_UNORM, {MTL::PixelFormatBGR10A2Unorm, MetalDataType::FLOAT, 4}}, // TODO: correct? {Latte::E_GX2SURFFMT::A2_B10_G10_R10_UINT, {MTL::PixelFormatRGB10A2Uint, MetalDataType::UINT, 4}}, // TODO: correct? {Latte::E_GX2SURFFMT::R16_UNORM, {MTL::PixelFormatR16Unorm, MetalDataType::FLOAT, 2}}, @@ -47,10 +47,10 @@ std::map MTL_COLOR_FORMAT_TABLE = { {Latte::E_GX2SURFFMT::R16_G16_B16_A16_UINT, {MTL::PixelFormatRGBA16Uint, MetalDataType::UINT, 8}}, {Latte::E_GX2SURFFMT::R16_G16_B16_A16_SINT, {MTL::PixelFormatRGBA16Sint, MetalDataType::INT, 8}}, {Latte::E_GX2SURFFMT::R16_G16_B16_A16_FLOAT, {MTL::PixelFormatRGBA16Float, MetalDataType::FLOAT, 8}}, - {Latte::E_GX2SURFFMT::R24_X8_UNORM, {MTL::PixelFormatInvalid, MetalDataType::NONE, 0}}, // TODO + {Latte::E_GX2SURFFMT::R24_X8_UNORM, {MTL::PixelFormatR32Float, MetalDataType::FLOAT, 0}}, // TODO {Latte::E_GX2SURFFMT::R24_X8_FLOAT, {MTL::PixelFormatInvalid, MetalDataType::NONE, 0}}, // TODO - {Latte::E_GX2SURFFMT::X24_G8_UINT, {MTL::PixelFormatInvalid, MetalDataType::NONE, 0}}, // TODO - {Latte::E_GX2SURFFMT::R32_X8_FLOAT, {MTL::PixelFormatInvalid, MetalDataType::NONE, 0}}, // TODO + {Latte::E_GX2SURFFMT::X24_G8_UINT, {MTL::PixelFormatRGBA8Uint, MetalDataType::UINT, 4}}, // TODO: correct? + {Latte::E_GX2SURFFMT::R32_X8_FLOAT, {MTL::PixelFormatR32Float, MetalDataType::FLOAT, 4}}, // TODO: correct? {Latte::E_GX2SURFFMT::X32_G8_UINT_X24, {MTL::PixelFormatInvalid, MetalDataType::NONE, 0}}, // TODO {Latte::E_GX2SURFFMT::R11_G11_B10_FLOAT, {MTL::PixelFormatRG11B10Float, MetalDataType::FLOAT, 4}}, {Latte::E_GX2SURFFMT::R32_UINT, {MTL::PixelFormatR32Uint, MetalDataType::UINT, 4}}, diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h index 926af5f4..44d4d873 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h @@ -21,15 +21,13 @@ struct MetalPixelFormatSupport }; #define MAX_MTL_BUFFERS 31 -// Buffer index 30 is reserved for the support buffer, buffer indices 27-29 are reserved for the helper shaders -#define GET_MTL_VERTEX_BUFFER_INDEX(index) (MAX_MTL_BUFFERS - index - 5) -// TODO: don't harcdode the support buffer binding -#define MTL_SUPPORT_BUFFER_BINDING 30 +// Buffer indices 28-30 are reserved for the helper shaders +#define GET_MTL_VERTEX_BUFFER_INDEX(index) (MAX_MTL_BUFFERS - index - 4) #define MAX_MTL_TEXTURES 31 #define MAX_MTL_SAMPLERS 16 -#define GET_HELPER_BUFFER_BINDING(index) (27 + index) +#define GET_HELPER_BUFFER_BINDING(index) (28 + index) #define GET_HELPER_TEXTURE_BINDING(index) (29 + index) #define GET_HELPER_SAMPLER_BINDING(index) (14 + index) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp index 93c6ec86..f0c12217 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp @@ -2,6 +2,7 @@ #include "Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalHybridComputePipeline.h" #include "Common/precompiled.h" +#include "HW/Latte/Renderer/Metal/MetalRenderer.h" MetalVertexBufferCache::~MetalVertexBufferCache() { @@ -42,11 +43,8 @@ MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(MTL::Bu renderCommandEncoder->setRenderPipelineState(m_restrideBufferPipeline->GetRenderPipelineState()); m_mtlr->GetEncoderState().m_renderPipelineState = m_restrideBufferPipeline->GetRenderPipelineState(); - MTL::Buffer* buffers[] = {bufferCache, buffer}; - size_t offsets[] = {vertexBufferRange.offset, restrideInfo.allocation.offset}; - renderCommandEncoder->setVertexBuffers(buffers, offsets, NS::Range(GET_HELPER_BUFFER_BINDING(0), 2)); - m_mtlr->GetEncoderState().m_uniformBufferOffsets[METAL_SHADER_TYPE_VERTEX][GET_HELPER_BUFFER_BINDING(0)] = INVALID_OFFSET; - m_mtlr->GetEncoderState().m_uniformBufferOffsets[METAL_SHADER_TYPE_VERTEX][GET_HELPER_BUFFER_BINDING(1)] = INVALID_OFFSET; + m_mtlr->SetBuffer(renderCommandEncoder, METAL_SHADER_TYPE_VERTEX, bufferCache, vertexBufferRange.offset, GET_HELPER_BUFFER_BINDING(0)); + m_mtlr->SetBuffer(renderCommandEncoder, METAL_SHADER_TYPE_VERTEX, buffer, restrideInfo.allocation.offset, GET_HELPER_BUFFER_BINDING(1)); struct { @@ -54,16 +52,7 @@ MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(MTL::Bu uint32 newStride; } strideData = {static_cast(stride), static_cast(newStride)}; renderCommandEncoder->setVertexBytes(&strideData, sizeof(strideData), GET_HELPER_BUFFER_BINDING(2)); - m_mtlr->GetEncoderState().m_uniformBufferOffsets[METAL_SHADER_TYPE_VERTEX][GET_HELPER_BUFFER_BINDING(2)] = INVALID_OFFSET; - - // TODO: remove - uint32 vertexCount = vertexBufferRange.size / stride; - if (vertexCount * strideData.oldStride > buffers[0]->length() - offsets[0]) { - throw std::runtime_error("Source buffer overflow (" + std::to_string(vertexCount) + " * " + std::to_string(strideData.oldStride) + " > " + std::to_string(buffers[0]->length()) + " - " + std::to_string(offsets[0]) + ")"); - } - if (vertexCount * strideData.newStride > buffers[1]->length() - offsets[1]) { - throw std::runtime_error("Destination buffer overflow (" + std::to_string(vertexCount) + " * " + std::to_string(strideData.newStride) + " > " + std::to_string(buffers[1]->length()) + " - " + std::to_string(offsets[1]) + ")"); - } + m_mtlr->GetEncoderState().m_buffers[METAL_SHADER_TYPE_VERTEX][GET_HELPER_BUFFER_BINDING(2)] = {nullptr}; renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), vertexBufferRange.size / stride); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp index 94ab3721..ad63a041 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp @@ -2,6 +2,7 @@ #include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" #include "Foundation/NSObject.hpp" +#include "HW/Latte/Core/LatteShader.h" #include "HW/Latte/Renderer/Metal/CachedFBOMtl.h" #include "HW/Latte/Renderer/Metal/LatteToMtl.h" #include "HW/Latte/Renderer/Metal/RendererShaderMtl.h" @@ -11,6 +12,175 @@ #include "HW/Latte/ISA/RegDefines.h" #include "config/ActiveSettings.h" +static void rectsEmulationGS_outputSingleVertex(std::string& gsSrc, const LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, sint32 vIdx, const LatteContextRegister& latteRegister) +{ + auto parameterMask = vertexShader->outputParameterMask; + for (uint32 i = 0; i < 32; i++) + { + if ((parameterMask & (1 << i)) == 0) + continue; + sint32 vsSemanticId = psInputTable->getVertexShaderOutParamSemanticId(latteRegister.GetRawView(), i); + if (vsSemanticId < 0) + continue; + // make sure PS has matching input + if (!psInputTable->hasPSImportForSemanticId(vsSemanticId)) + continue; + gsSrc.append(fmt::format("out.passParameterSem{} = objectPayload.vertexOut[{}].passParameterSem{};\r\n", vsSemanticId, vIdx, vsSemanticId)); + } + gsSrc.append(fmt::format("out.position = objectPayload.vertexOut[{}].position;\r\n", vIdx)); + gsSrc.append(fmt::format("mesh.set_vertex({}, out);\r\n", vIdx)); +} + +static void rectsEmulationGS_outputGeneratedVertex(std::string& gsSrc, const LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, const char* variant, const LatteContextRegister& latteRegister) +{ + auto parameterMask = vertexShader->outputParameterMask; + for (uint32 i = 0; i < 32; i++) + { + if ((parameterMask & (1 << i)) == 0) + continue; + sint32 vsSemanticId = psInputTable->getVertexShaderOutParamSemanticId(latteRegister.GetRawView(), i); + if (vsSemanticId < 0) + continue; + // make sure PS has matching input + if (!psInputTable->hasPSImportForSemanticId(vsSemanticId)) + continue; + gsSrc.append(fmt::format("out.passParameterSem{} = gen4thVertex{}(objectPayload.vertexOut[0].passParameterSem{}, objectPayload.vertexOut[1].passParameterSem{}, objectPayload.vertexOut[2].passParameterSem{});\r\n", vsSemanticId, variant, vsSemanticId, vsSemanticId, vsSemanticId)); + } + gsSrc.append(fmt::format("out.position = gen4thVertex{}(objectPayload.vertexOut[0].position, objectPayload.vertexOut[1].position, objectPayload.vertexOut[2].position);\r\n", variant)); + gsSrc.append(fmt::format("mesh.set_vertex(3, out);\r\n")); +} + +static void rectsEmulationGS_outputVerticesCode(std::string& gsSrc, const LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, sint32 p0, sint32 p1, sint32 p2, sint32 p3, const char* variant, const LatteContextRegister& latteRegister) +{ + sint32 pList[4] = { p0, p1, p2, p3 }; + for (sint32 i = 0; i < 4; i++) + { + if (pList[i] == 3) + rectsEmulationGS_outputGeneratedVertex(gsSrc, vertexShader, psInputTable, variant, latteRegister); + else + rectsEmulationGS_outputSingleVertex(gsSrc, vertexShader, psInputTable, pList[i], latteRegister); + } + gsSrc.append(fmt::format("mesh.set_index(0, {});\r\n", pList[0])); + gsSrc.append(fmt::format("mesh.set_index(1, {});\r\n", pList[1])); + gsSrc.append(fmt::format("mesh.set_index(2, {});\r\n", pList[2])); + gsSrc.append(fmt::format("mesh.set_index(3, {});\r\n", pList[1])); + gsSrc.append(fmt::format("mesh.set_index(4, {});\r\n", pList[2])); + gsSrc.append(fmt::format("mesh.set_index(5, {});\r\n", pList[3])); +} + +static RendererShaderMtl* rectsEmulationGS_generate(MetalRenderer* metalRenderer, const LatteDecompilerShader* vertexShader, const LatteContextRegister& latteRegister) +{ + std::string gsSrc; + gsSrc.append("#include \r\n"); + gsSrc.append("using namespace metal;\r\n"); + + LatteShaderPSInputTable* psInputTable = LatteSHRC_GetPSInputTable(); + + // inputs & outputs + std::string vertexOutDefinition = "struct VertexOut {\r\n"; + vertexOutDefinition += "float4 position;\r\n"; + std::string geometryOutDefinition = "struct GeometryOut {\r\n"; + geometryOutDefinition += "float4 position [[position]];\r\n"; + auto parameterMask = vertexShader->outputParameterMask; + for (sint32 f = 0; f < 2; f++) + { + for (uint32 i = 0; i < 32; i++) + { + if ((parameterMask & (1 << i)) == 0) + continue; + sint32 vsSemanticId = psInputTable->getVertexShaderOutParamSemanticId(latteRegister.GetRawView(), i); + if (vsSemanticId < 0) + continue; + auto psImport = psInputTable->getPSImportBySemanticId(vsSemanticId); + if (psImport == nullptr) + continue; + + if (f == 0) + { + vertexOutDefinition += fmt::format("float4 passParameterSem{};\r\n", vsSemanticId); + } + else + { + geometryOutDefinition += fmt::format("float4 passParameterSem{}", vsSemanticId); + + geometryOutDefinition += fmt::format(" [[user(locn{})]]", psInputTable->getPSImportLocationBySemanticId(vsSemanticId)); + if (psImport->isFlat) + geometryOutDefinition += " [[flat]]"; + if (psImport->isNoPerspective) + geometryOutDefinition += " [[center_no_perspective]]"; + geometryOutDefinition += ";\r\n"; + } + } + } + vertexOutDefinition += "};\r\n"; + geometryOutDefinition += "};\r\n"; + + gsSrc.append(vertexOutDefinition); + gsSrc.append(geometryOutDefinition); + + gsSrc.append("struct ObjectPayload {\r\n"); + gsSrc.append("VertexOut vertexOut[3];\r\n"); + gsSrc.append("};\r\n"); + + // gen function + gsSrc.append("float4 gen4thVertexA(float4 a, float4 b, float4 c)\r\n"); + gsSrc.append("{\r\n"); + gsSrc.append("return b - (c - a);\r\n"); + gsSrc.append("}\r\n"); + + gsSrc.append("float4 gen4thVertexB(float4 a, float4 b, float4 c)\r\n"); + gsSrc.append("{\r\n"); + gsSrc.append("return c - (b - a);\r\n"); + gsSrc.append("}\r\n"); + + gsSrc.append("float4 gen4thVertexC(float4 a, float4 b, float4 c)\r\n"); + gsSrc.append("{\r\n"); + gsSrc.append("return c + (b - a);\r\n"); + gsSrc.append("}\r\n"); + + // main + gsSrc.append("using MeshType = mesh;\r\n"); + gsSrc.append("[[mesh, max_total_threads_per_threadgroup(1)]]\r\n"); + gsSrc.append("void main0(MeshType mesh, const object_data ObjectPayload& objectPayload [[payload]])\r\n"); + gsSrc.append("{\r\n"); + gsSrc.append("GeometryOut out;\r\n"); + + // there are two possible winding orders that need different triangle generation: + // 0 1 + // 2 3 + // and + // 0 1 + // 3 2 + // all others are just symmetries of these cases + + // we can determine the case by comparing the distance 0<->1 and 0<->2 + + gsSrc.append("float dist0_1 = length(objectPayload.vertexOut[1].position.xy - objectPayload.vertexOut[0].position.xy);\r\n"); + gsSrc.append("float dist0_2 = length(objectPayload.vertexOut[2].position.xy - objectPayload.vertexOut[0].position.xy);\r\n"); + gsSrc.append("float dist1_2 = length(objectPayload.vertexOut[2].position.xy - objectPayload.vertexOut[1].position.xy);\r\n"); + + // emit vertices + gsSrc.append("if(dist0_1 > dist0_2 && dist0_1 > dist1_2)\r\n"); + gsSrc.append("{\r\n"); + // p0 to p1 is diagonal + rectsEmulationGS_outputVerticesCode(gsSrc, vertexShader, psInputTable, 2, 1, 0, 3, "A", latteRegister); + gsSrc.append("} else if ( dist0_2 > dist0_1 && dist0_2 > dist1_2 ) {\r\n"); + // p0 to p2 is diagonal + rectsEmulationGS_outputVerticesCode(gsSrc, vertexShader, psInputTable, 1, 2, 0, 3, "B", latteRegister); + gsSrc.append("} else {\r\n"); + // p1 to p2 is diagonal + rectsEmulationGS_outputVerticesCode(gsSrc, vertexShader, psInputTable, 0, 1, 2, 3, "C", latteRegister); + gsSrc.append("}\r\n"); + + gsSrc.append("mesh.set_primitive_count(2);\r\n"); + + gsSrc.append("}\r\n"); + + auto mtlShader = new RendererShaderMtl(metalRenderer, RendererShader::ShaderType::kGeometry, 0, 0, false, false, gsSrc); + + return mtlShader; +} + #define INVALID_TITLE_ID 0xFFFFFFFFFFFFFFFF uint64 s_cacheTitleId = INVALID_TITLE_ID; @@ -18,6 +188,68 @@ uint64 s_cacheTitleId = INVALID_TITLE_ID; extern std::atomic_int g_compiled_shaders_total; extern std::atomic_int g_compiled_shaders_async; +template +void SetFragmentState(T* desc, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr) +{ + // Color attachments + const Latte::LATTE_CB_COLOR_CONTROL& colorControlReg = lcr.CB_COLOR_CONTROL; + uint32 blendEnableMask = colorControlReg.get_BLEND_MASK(); + uint32 renderTargetMask = lcr.CB_TARGET_MASK.get_MASK(); + for (uint8 i = 0; i < 8; i++) + { + const auto& colorBuffer = activeFBO->colorBuffer[i]; + auto texture = static_cast(colorBuffer.texture); + if (!texture) + { + continue; + } + auto colorAttachment = desc->colorAttachments()->object(i); + colorAttachment->setPixelFormat(texture->GetRGBAView()->pixelFormat()); + colorAttachment->setWriteMask(GetMtlColorWriteMask((renderTargetMask >> (i * 4)) & 0xF)); + + // Blending + bool blendEnabled = ((blendEnableMask & (1 << i))) != 0; + // Only float data type is blendable + if (blendEnabled && GetMtlPixelFormatInfo(texture->format, false).dataType == MetalDataType::FLOAT) + { + colorAttachment->setBlendingEnabled(true); + + const auto& blendControlReg = lcr.CB_BLENDN_CONTROL[i]; + + auto rgbBlendOp = GetMtlBlendOp(blendControlReg.get_COLOR_COMB_FCN()); + auto srcRgbBlendFactor = GetMtlBlendFactor(blendControlReg.get_COLOR_SRCBLEND()); + auto dstRgbBlendFactor = GetMtlBlendFactor(blendControlReg.get_COLOR_DSTBLEND()); + + colorAttachment->setRgbBlendOperation(rgbBlendOp); + colorAttachment->setSourceRGBBlendFactor(srcRgbBlendFactor); + colorAttachment->setDestinationRGBBlendFactor(dstRgbBlendFactor); + if (blendControlReg.get_SEPARATE_ALPHA_BLEND()) + { + colorAttachment->setAlphaBlendOperation(GetMtlBlendOp(blendControlReg.get_ALPHA_COMB_FCN())); + colorAttachment->setSourceAlphaBlendFactor(GetMtlBlendFactor(blendControlReg.get_ALPHA_SRCBLEND())); + colorAttachment->setDestinationAlphaBlendFactor(GetMtlBlendFactor(blendControlReg.get_ALPHA_DSTBLEND())); + } + else + { + colorAttachment->setAlphaBlendOperation(rgbBlendOp); + colorAttachment->setSourceAlphaBlendFactor(srcRgbBlendFactor); + colorAttachment->setDestinationAlphaBlendFactor(dstRgbBlendFactor); + } + } + } + + // Depth stencil attachment + if (activeFBO->depthBuffer.texture) + { + auto texture = static_cast(activeFBO->depthBuffer.texture); + desc->setDepthAttachmentPixelFormat(texture->GetRGBAView()->pixelFormat()); + if (activeFBO->depthBuffer.hasStencil) + { + desc->setStencilAttachmentPixelFormat(texture->GetRGBAView()->pixelFormat()); + } + } +} + void MetalPipelineCache::ShaderCacheLoading_begin(uint64 cacheTitleId) { s_cacheTitleId = cacheTitleId; @@ -53,9 +285,9 @@ MetalPipelineCache::~MetalPipelineCache() m_binaryArchiveURL->release(); } -MTL::RenderPipelineState* MetalPipelineCache::GetPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* pixelShader, CachedFBOMtl* activeFBO, const LatteContextRegister& lcr) +MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* pixelShader, CachedFBOMtl* activeFBO, const LatteContextRegister& lcr) { - uint64 stateHash = CalculatePipelineHash(fetchShader, vertexShader, pixelShader, activeFBO, lcr); + uint64 stateHash = CalculateRenderPipelineHash(fetchShader, vertexShader, pixelShader, activeFBO, lcr); auto& pipeline = m_pipelineCache[stateHash]; if (pipeline) return pipeline; @@ -92,7 +324,7 @@ MTL::RenderPipelineState* MetalPipelineCache::GetPipelineState(const LatteFetchS uint32 bufferIndex = bufferGroup.attributeBufferIndex; uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7; - uint32 bufferStride = (LatteGPUState.contextNew.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF; + uint32 bufferStride = (lcr.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF; bufferStride = Align(bufferStride, 4); // HACK @@ -117,6 +349,7 @@ MTL::RenderPipelineState* MetalPipelineCache::GetPipelineState(const LatteFetchS auto mtlVertexShader = static_cast(vertexShader->shader); auto mtlPixelShader = static_cast(pixelShader->shader); + mtlVertexShader->CompileVertexFunction(); mtlPixelShader->CompileFragmentFunction(activeFBO); // Render pipeline state @@ -126,65 +359,18 @@ MTL::RenderPipelineState* MetalPipelineCache::GetPipelineState(const LatteFetchS // TODO: don't always set the vertex descriptor? desc->setVertexDescriptor(vertexDescriptor); - // Color attachments - const Latte::LATTE_CB_COLOR_CONTROL& colorControlReg = LatteGPUState.contextNew.CB_COLOR_CONTROL; - uint32 blendEnableMask = colorControlReg.get_BLEND_MASK(); - uint32 renderTargetMask = LatteGPUState.contextNew.CB_TARGET_MASK.get_MASK(); - for (uint8 i = 0; i < 8; i++) - { - const auto& colorBuffer = activeFBO->colorBuffer[i]; - auto texture = static_cast(colorBuffer.texture); - if (!texture) - { - continue; - } - auto colorAttachment = desc->colorAttachments()->object(i); - colorAttachment->setPixelFormat(texture->GetRGBAView()->pixelFormat()); - colorAttachment->setWriteMask(GetMtlColorWriteMask((renderTargetMask >> (i * 4)) & 0xF)); + SetFragmentState(desc, activeFBO, lcr); - // Blending - bool blendEnabled = ((blendEnableMask & (1 << i))) != 0; - // Only float data type is blendable - if (blendEnabled && GetMtlPixelFormatInfo(texture->format, false).dataType == MetalDataType::FLOAT) - { - colorAttachment->setBlendingEnabled(true); + TryLoadBinaryArchive(); - const auto& blendControlReg = LatteGPUState.contextNew.CB_BLENDN_CONTROL[i]; - - auto rgbBlendOp = GetMtlBlendOp(blendControlReg.get_COLOR_COMB_FCN()); - auto srcRgbBlendFactor = GetMtlBlendFactor(blendControlReg.get_COLOR_SRCBLEND()); - auto dstRgbBlendFactor = GetMtlBlendFactor(blendControlReg.get_COLOR_DSTBLEND()); - - colorAttachment->setRgbBlendOperation(rgbBlendOp); - colorAttachment->setSourceRGBBlendFactor(srcRgbBlendFactor); - colorAttachment->setDestinationRGBBlendFactor(dstRgbBlendFactor); - if (blendControlReg.get_SEPARATE_ALPHA_BLEND()) - { - colorAttachment->setAlphaBlendOperation(GetMtlBlendOp(blendControlReg.get_ALPHA_COMB_FCN())); - colorAttachment->setSourceAlphaBlendFactor(GetMtlBlendFactor(blendControlReg.get_ALPHA_SRCBLEND())); - colorAttachment->setDestinationAlphaBlendFactor(GetMtlBlendFactor(blendControlReg.get_ALPHA_DSTBLEND())); - } - else - { - colorAttachment->setAlphaBlendOperation(rgbBlendOp); - colorAttachment->setSourceAlphaBlendFactor(srcRgbBlendFactor); - colorAttachment->setDestinationAlphaBlendFactor(dstRgbBlendFactor); - } - } - } - - // Depth stencil attachment - if (activeFBO->depthBuffer.texture) - { - auto texture = static_cast(activeFBO->depthBuffer.texture); - desc->setDepthAttachmentPixelFormat(texture->GetRGBAView()->pixelFormat()); - if (activeFBO->depthBuffer.hasStencil) - { - desc->setStencilAttachmentPixelFormat(texture->GetRGBAView()->pixelFormat()); - } - } - - LoadBinary(desc); + // Load binary + if (m_binaryArchive) + { + NS::Object* binArchives[] = {m_binaryArchive}; + auto binaryArchives = NS::Array::alloc()->init(binArchives, 1); + desc->setBinaryArchives(binaryArchives); + binaryArchives->release(); + } NS::Error* error = nullptr; #ifdef CEMU_DEBUG_ASSERT @@ -210,10 +396,21 @@ MTL::RenderPipelineState* MetalPipelineCache::GetPipelineState(const LatteFetchS { debug_printf("error creating render pipeline state: %s\n", error->localizedDescription()->utf8String()); error->release(); + return nullptr; } else { - SaveBinary(desc); + // Save binary + if (m_binaryArchive) + { + NS::Error* error = nullptr; + m_binaryArchive->addRenderPipelineFunctions(desc, &error); + if (error) + { + debug_printf("error saving render pipeline functions: %s\n", error->localizedDescription()->utf8String()); + error->release(); + } + } } //newPipelineCount++; @@ -229,7 +426,65 @@ MTL::RenderPipelineState* MetalPipelineCache::GetPipelineState(const LatteFetchS return pipeline; } -uint64 MetalPipelineCache::CalculatePipelineHash(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr) +MTL::RenderPipelineState* MetalPipelineCache::GetMeshPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, CachedFBOMtl* activeFBO, const LatteContextRegister& lcr, Renderer::INDEX_TYPE hostIndexType) +{ + uint64 stateHash = CalculateRenderPipelineHash(fetchShader, vertexShader, pixelShader, activeFBO, lcr); + + stateHash += lcr.GetRawView()[mmVGT_PRIMITIVE_TYPE]; + stateHash = std::rotl(stateHash, 7); + + stateHash += (uint8)hostIndexType; + stateHash = std::rotl(stateHash, 7); // TODO: 7?s + + auto& pipeline = m_pipelineCache[stateHash]; + if (pipeline) + return pipeline; + + auto mtlObjectShader = static_cast(vertexShader->shader); + RendererShaderMtl* mtlMeshShader; + if (geometryShader) + { + mtlMeshShader = static_cast(geometryShader->shader); + } + else + { + // If there is no geometry shader, it means that we are emulating rects + mtlMeshShader = rectsEmulationGS_generate(m_mtlr, vertexShader, lcr); + } + auto mtlPixelShader = static_cast(pixelShader->shader); + mtlObjectShader->CompileObjectFunction(lcr, fetchShader, vertexShader, hostIndexType); + mtlPixelShader->CompileFragmentFunction(activeFBO); + + // Render pipeline state + MTL::MeshRenderPipelineDescriptor* desc = MTL::MeshRenderPipelineDescriptor::alloc()->init(); + desc->setObjectFunction(mtlObjectShader->GetFunction()); + desc->setMeshFunction(mtlMeshShader->GetFunction()); + desc->setFragmentFunction(mtlPixelShader->GetFunction()); + + SetFragmentState(desc, activeFBO, lcr); + + TryLoadBinaryArchive(); + + // Load binary + // TODO: no binary archives? :( + + NS::Error* error = nullptr; +#ifdef CEMU_DEBUG_ASSERT + desc->setLabel(GetLabel("Mesh pipeline state", desc)); +#endif + pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, MTL::PipelineOptionNone, nullptr, &error); + if (error) + { + debug_printf("error creating render pipeline state: %s\n", error->localizedDescription()->utf8String()); + error->release(); + return nullptr; + } + desc->release(); + + return pipeline; +} + +uint64 MetalPipelineCache::CalculateRenderPipelineHash(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr) { // Hash uint64 stateHash = 0; @@ -260,9 +515,6 @@ uint64 MetalPipelineCache::CalculatePipelineHash(const LatteFetchShader* fetchSh stateHash += fetchShader->getVkPipelineHashFragment(); stateHash = std::rotl(stateHash, 7); - stateHash += lcr.GetRawView()[mmVGT_PRIMITIVE_TYPE]; - stateHash = std::rotl(stateHash, 7); - stateHash += lcr.GetRawView()[mmVGT_STRMOUT_EN]; stateHash = std::rotl(stateHash, 7); @@ -339,30 +591,3 @@ void MetalPipelineCache::TryLoadBinaryArchive() } desc->release(); } - -void MetalPipelineCache::LoadBinary(MTL::RenderPipelineDescriptor* desc) -{ - TryLoadBinaryArchive(); - - if (!m_binaryArchive) - return; - - NS::Object* binArchives[] = {m_binaryArchive}; - auto binaryArchives = NS::Array::alloc()->init(binArchives, 1); - desc->setBinaryArchives(binaryArchives); - binaryArchives->release(); -} - -void MetalPipelineCache::SaveBinary(MTL::RenderPipelineDescriptor* desc) -{ - if (!m_binaryArchive) - return; - - NS::Error* error = nullptr; - m_binaryArchive->addRenderPipelineFunctions(desc, &error); - if (error) - { - debug_printf("error saving render pipeline functions: %s\n", error->localizedDescription()->utf8String()); - error->release(); - } -} diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h index 1fa1f87c..30f40208 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h @@ -4,6 +4,7 @@ #include "HW/Latte/ISA/LatteReg.h" #include "HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h" +#include "Cafe/HW/Latte/Renderer/Renderer.h" class MetalPipelineCache { @@ -15,7 +16,9 @@ public: MetalPipelineCache(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer} {} ~MetalPipelineCache(); - MTL::RenderPipelineState* GetPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr); + MTL::RenderPipelineState* GetRenderPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr); + + MTL::RenderPipelineState* GetMeshPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr, Renderer::INDEX_TYPE hostIndexType); private: class MetalRenderer* m_mtlr; @@ -25,11 +28,7 @@ private: NS::URL* m_binaryArchiveURL; MTL::BinaryArchive* m_binaryArchive; - uint64 CalculatePipelineHash(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr); + uint64 CalculateRenderPipelineHash(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr); void TryLoadBinaryArchive(); - - void LoadBinary(MTL::RenderPipelineDescriptor* desc); - - void SaveBinary(MTL::RenderPipelineDescriptor* desc); }; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index f5a11118..7604406f 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -17,9 +17,8 @@ #include "Cafe/HW/Latte/Core/LatteShader.h" #include "Cafe/HW/Latte/Core/LatteIndices.h" #include "Cemu/Logging/CemuDebugLogging.h" -#include "Common/precompiled.h" +#include "HW/Latte/Core/LatteConst.h" #include "HW/Latte/Renderer/Metal/MetalCommon.h" -#include "Metal/MTLDevice.hpp" #include "gui/guiWrapper.h" #define COMMIT_TRESHOLD 256 @@ -96,15 +95,9 @@ MetalRenderer::MetalRenderer() // Utility shader library - // Process the source first - std::string processedUtilityShaderSource = utilityShaderSource; - processedUtilityShaderSource.pop_back(); - processedUtilityShaderSource.erase(processedUtilityShaderSource.begin()); - processedUtilityShaderSource = "#include \nusing namespace metal;\n#define GET_BUFFER_BINDING(index) (27 + index)\n#define GET_TEXTURE_BINDING(index) (29 + index)\n#define GET_SAMPLER_BINDING(index) (14 + index)\n" + processedUtilityShaderSource; - // Create the library NS::Error* error = nullptr; - MTL::Library* utilityLibrary = m_device->newLibrary(ToNSString(processedUtilityShaderSource.c_str()), nullptr, &error); + MTL::Library* utilityLibrary = m_device->newLibrary(ToNSString(utilityShaderSource), nullptr, &error); if (error) { debug_printf("failed to create utility library (error: %s)\n", error->localizedDescription()->utf8String()); @@ -587,8 +580,6 @@ void MetalRenderer::surfaceCopy_copySurfaceWithFormatConversion(LatteTexture* so return; } - MTL::Texture* textures[] = {srcTextureMtl->GetTexture(), dstTextureMtl->GetTexture()}; - struct CopyParams { uint32 width; @@ -605,11 +596,10 @@ void MetalRenderer::surfaceCopy_copySurfaceWithFormatConversion(LatteTexture* so renderCommandEncoder->setRenderPipelineState(m_copyTextureToTexturePipeline->GetRenderPipelineState()); m_state.m_encoderState.m_renderPipelineState = m_copyTextureToTexturePipeline->GetRenderPipelineState(); - renderCommandEncoder->setVertexTextures(textures, NS::Range(GET_HELPER_TEXTURE_BINDING(0), 2)); - m_state.m_encoderState.m_textures[METAL_SHADER_TYPE_VERTEX][GET_HELPER_TEXTURE_BINDING(0)] = {(LatteTextureViewMtl*)textures[0]}; - m_state.m_encoderState.m_textures[METAL_SHADER_TYPE_VERTEX][GET_HELPER_TEXTURE_BINDING(1)] = {(LatteTextureViewMtl*)textures[1]}; + SetTexture(renderCommandEncoder, METAL_SHADER_TYPE_VERTEX, srcTextureMtl->GetTexture(), GET_HELPER_TEXTURE_BINDING(0)); + SetTexture(renderCommandEncoder, METAL_SHADER_TYPE_VERTEX, dstTextureMtl->GetTexture(), GET_HELPER_TEXTURE_BINDING(1)); renderCommandEncoder->setVertexBytes(¶ms, sizeof(params), GET_HELPER_BUFFER_BINDING(0)); - m_state.m_encoderState.m_uniformBufferOffsets[METAL_SHADER_TYPE_VERTEX][GET_HELPER_BUFFER_BINDING(0)] = INVALID_OFFSET; + m_state.m_encoderState.m_buffers[METAL_SHADER_TYPE_VERTEX][GET_HELPER_BUFFER_BINDING(0)] = {nullptr}; renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangle, NS::UInteger(0), NS::UInteger(3)); } @@ -654,7 +644,6 @@ void MetalRenderer::buffer_bindVertexBuffer(uint32 bufferIndex, uint32 offset, u m_memoryManager->UntrackVertexBuffer(bufferIndex); } - buffer.needsRebind = true; buffer.offset = offset; buffer.size = size; buffer.restrideInfo = {}; @@ -664,7 +653,7 @@ void MetalRenderer::buffer_bindVertexBuffer(uint32 bufferIndex, uint32 offset, u void MetalRenderer::buffer_bindUniformBuffer(LatteConst::ShaderType shaderType, uint32 bufferIndex, uint32 offset, uint32 size) { - m_state.m_uniformBufferOffsets[GetMtlShaderType(shaderType)][bufferIndex] = offset; + m_state.m_uniformBufferOffsets[GetMtlGeneralShaderType(shaderType)][bufferIndex] = offset; } RendererShader* MetalRenderer::shader_create(RendererShader::ShaderType type, uint64 baseHash, uint64 auxHash, const std::string& source, bool isGameShader, bool isGfxPackShader) @@ -759,30 +748,40 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 auto& encoderState = m_state.m_encoderState; + // Shaders + LatteDecompilerShader* vertexShader = LatteSHRC_GetActiveVertexShader(); + LatteDecompilerShader* geometryShader = LatteSHRC_GetActiveGeometryShader(); + LatteDecompilerShader* pixelShader = LatteSHRC_GetActivePixelShader(); + if (!vertexShader) + { + debug_printf("no vertex function, skipping draw\n"); + return; + } + const auto fetchShader = LatteSHRC_GetActiveFetchShader(); + + // Check if we need to end the render pass + // Fragment shader is most likely to require a render pass flush, so check for it first + bool endRenderPass = CheckIfRenderPassNeedsFlush(pixelShader); + if (!endRenderPass) + endRenderPass = CheckIfRenderPassNeedsFlush(vertexShader); + if (!endRenderPass && geometryShader) + endRenderPass = CheckIfRenderPassNeedsFlush(geometryShader); + + if (endRenderPass) + EndEncoding(); + // Render pass auto renderCommandEncoder = GetRenderCommandEncoder(); - // Shaders - LatteDecompilerShader* vertexShader = LatteSHRC_GetActiveVertexShader(); - LatteDecompilerShader* geometryShader = LatteSHRC_GetActiveGeometryShader(); - LatteDecompilerShader* pixelShader = LatteSHRC_GetActivePixelShader(); - if (!vertexShader || !static_cast(vertexShader->shader)->GetFunction()) - { - debug_printf("no vertex function, skipping draw\n"); - return; - } - // TODO: remove this? - if (geometryShader) - { - debug_printf("geometry shader aren't supported on Metal yet, skipping draw\n"); - return; - } - const auto fetchShader = LatteSHRC_GetActiveFetchShader(); + // Primitive type + const LattePrimitiveMode primitiveMode = static_cast(LatteGPUState.contextRegister[mmVGT_PRIMITIVE_TYPE]); + auto mtlPrimitiveType = GetMtlPrimitiveType(primitiveMode); + bool isPrimitiveRect = (primitiveMode == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::RECTS); + + bool usesGeometryShader = (geometryShader != nullptr || isPrimitiveRect); // Depth stencil state - // TODO: implement this somehow - //auto depthControl = LatteGPUState.contextNew.DB_DEPTH_CONTROL; - + // TODO // Disable depth write when there is no depth attachment //if (!m_state.m_lastUsedFBO->depthBuffer.texture) // depthControl.set_Z_WRITE_ENABLE(false); @@ -815,11 +814,6 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 } } - // Primitive type - const LattePrimitiveMode primitiveMode = static_cast(LatteGPUState.contextRegister[mmVGT_PRIMITIVE_TYPE]); - auto mtlPrimitiveType = GetMtlPrimitiveType(primitiveMode); - bool isPrimitiveRect = (primitiveMode == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::RECTS); - // Blend color float* blendColorConstant = (float*)LatteGPUState.contextRegister + Latte::REGADDR::CB_BLEND_RED; renderCommandEncoder->setBlendColor(blendColorConstant[0], blendColorConstant[1], blendColorConstant[2], blendColorConstant[3]); @@ -956,23 +950,38 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 auto& vertexBufferRange = m_state.m_vertexBuffers[i]; if (vertexBufferRange.offset != INVALID_OFFSET) { - // Restride - uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + i * 7; - uint32 bufferStride = (LatteGPUState.contextNew.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF; + MTL::Buffer* buffer; + size_t offset; - auto restridedBuffer = m_memoryManager->RestrideBufferIfNeeded(i, bufferStride); + // Restride + if (usesGeometryShader) + { + // Object shaders don't need restriding, since the attributes are fetched in the shader + buffer = m_memoryManager->GetBufferCache(); + offset = m_state.m_vertexBuffers[i].offset; + } + else + { + uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + i * 7; + uint32 bufferStride = (LatteGPUState.contextNew.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF; + + auto restridedBuffer = m_memoryManager->RestrideBufferIfNeeded(i, bufferStride); + + buffer = restridedBuffer.buffer; + offset = restridedBuffer.offset; + } // Bind - if (vertexBufferRange.needsRebind) - { - renderCommandEncoder->setVertexBuffer(restridedBuffer.buffer, restridedBuffer.offset, GET_MTL_VERTEX_BUFFER_INDEX(i)); - vertexBufferRange.needsRebind = false; - } + SetBuffer(renderCommandEncoder, GetMtlShaderType(vertexShader->shaderType, usesGeometryShader), buffer, offset, GET_MTL_VERTEX_BUFFER_INDEX(i)); } } // Render pipeline state - MTL::RenderPipelineState* renderPipelineState = m_pipelineCache->GetPipelineState(fetchShader, vertexShader, pixelShader, m_state.m_lastUsedFBO, LatteGPUState.contextNew); + MTL::RenderPipelineState* renderPipelineState; + if (usesGeometryShader) + renderPipelineState = m_pipelineCache->GetMeshPipelineState(fetchShader, vertexShader, geometryShader, pixelShader, m_state.m_lastUsedFBO, LatteGPUState.contextNew, hostIndexType); + else + renderPipelineState = m_pipelineCache->GetRenderPipelineState(fetchShader, vertexShader, pixelShader, m_state.m_lastUsedFBO, LatteGPUState.contextNew); if (renderPipelineState != encoderState.m_renderPipelineState) { renderCommandEncoder->setRenderPipelineState(renderPipelineState); @@ -984,19 +993,54 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 LatteStreamout_PrepareDrawcall(count, instanceCount); // Uniform buffers, textures and samplers - BindStageResources(renderCommandEncoder, vertexShader); - BindStageResources(renderCommandEncoder, pixelShader); + BindStageResources(renderCommandEncoder, vertexShader, usesGeometryShader); + if (geometryShader) + BindStageResources(renderCommandEncoder, geometryShader, usesGeometryShader); + BindStageResources(renderCommandEncoder, pixelShader, usesGeometryShader); // Draw + MTL::Buffer* indexBuffer = nullptr; if (hostIndexType != INDEX_TYPE::NONE) + indexBuffer = m_memoryManager->GetTemporaryBufferAllocator().GetBuffer(indexBufferIndex); + if (usesGeometryShader) { - auto mtlIndexType = GetMtlIndexType(hostIndexType); - MTL::Buffer* indexBuffer = m_memoryManager->GetTemporaryBufferAllocator().GetBuffer(indexBufferIndex); - renderCommandEncoder->drawIndexedPrimitives(mtlPrimitiveType, hostIndexCount, mtlIndexType, indexBuffer, indexBufferOffset, instanceCount, baseVertex, baseInstance); - } else - { - renderCommandEncoder->drawPrimitives(mtlPrimitiveType, baseVertex, count, instanceCount, baseInstance); + if (indexBuffer) + SetBuffer(renderCommandEncoder, METAL_SHADER_TYPE_OBJECT, indexBuffer, indexBufferOffset, vertexShader->resourceMapping.indexBufferBinding); + + uint32 verticesPerPrimitive = 0; + switch (primitiveMode) + { + case LattePrimitiveMode::POINTS: + verticesPerPrimitive = 1; + break; + case LattePrimitiveMode::LINES: + verticesPerPrimitive = 2; + break; + case LattePrimitiveMode::TRIANGLES: + case LattePrimitiveMode::RECTS: + verticesPerPrimitive = 3; + break; + default: + debug_printf("invalid primitive mode %u\n", (uint32)primitiveMode); + break; + } + + renderCommandEncoder->drawMeshThreadgroups(MTL::Size(count / verticesPerPrimitive, 1, 1), MTL::Size(verticesPerPrimitive, 1, 1), MTL::Size(1, 1, 1)); } + else + { + if (indexBuffer) + { + auto mtlIndexType = GetMtlIndexType(hostIndexType); + renderCommandEncoder->drawIndexedPrimitives(mtlPrimitiveType, hostIndexCount, mtlIndexType, indexBuffer, indexBufferOffset, instanceCount, baseVertex, baseInstance); + } + else + { + renderCommandEncoder->drawPrimitives(mtlPrimitiveType, baseVertex, count, instanceCount, baseInstance); + } + } + + m_state.m_isFirstDrawInRenderPass = false; LatteStreamout_FinishDrawcall(false); @@ -1037,6 +1081,83 @@ void MetalRenderer::indexData_uploadIndexMemory(uint32 bufferIndex, uint32 offse buffer->didModifyRange(NS::Range(offset, size)); } +void MetalRenderer::SetBuffer(MTL::RenderCommandEncoder* renderCommandEncoder, MetalShaderType shaderType, MTL::Buffer* buffer, size_t offset, uint32 index) +{ + auto& boundBuffer = m_state.m_encoderState.m_buffers[shaderType][index]; + if (buffer == boundBuffer.m_buffer && offset == boundBuffer.m_offset) + return; + + // TODO: only set the offset if only offset changed + + boundBuffer = {buffer, offset}; + + switch (shaderType) + { + case METAL_SHADER_TYPE_VERTEX: + renderCommandEncoder->setVertexBuffer(buffer, offset, index); + break; + case METAL_SHADER_TYPE_OBJECT: + renderCommandEncoder->setObjectBuffer(buffer, offset, index); + break; + case METAL_SHADER_TYPE_MESH: + renderCommandEncoder->setMeshBuffer(buffer, offset, index); + break; + case METAL_SHADER_TYPE_FRAGMENT: + renderCommandEncoder->setFragmentBuffer(buffer, offset, index); + break; + } +} + +void MetalRenderer::SetTexture(MTL::RenderCommandEncoder* renderCommandEncoder, MetalShaderType shaderType, MTL::Texture* texture, uint32 index) +{ + auto& boundTexture = m_state.m_encoderState.m_textures[shaderType][index]; + if (texture == boundTexture) + return; + + boundTexture = texture; + + switch (shaderType) + { + case METAL_SHADER_TYPE_VERTEX: + renderCommandEncoder->setVertexTexture(texture, index); + break; + case METAL_SHADER_TYPE_OBJECT: + renderCommandEncoder->setObjectTexture(texture, index); + break; + case METAL_SHADER_TYPE_MESH: + renderCommandEncoder->setMeshTexture(texture, index); + break; + case METAL_SHADER_TYPE_FRAGMENT: + renderCommandEncoder->setFragmentTexture(texture, index); + break; + } +} + +void MetalRenderer::SetSamplerState(MTL::RenderCommandEncoder* renderCommandEncoder, MetalShaderType shaderType, MTL::SamplerState* samplerState, uint32 index) +{ + auto& boundSamplerState = m_state.m_encoderState.m_samplers[shaderType][index]; + if (samplerState == boundSamplerState) + return; + + boundSamplerState = samplerState; + + switch (shaderType) + { + case METAL_SHADER_TYPE_VERTEX: + renderCommandEncoder->setVertexSamplerState(samplerState, index); + break; + case METAL_SHADER_TYPE_OBJECT: + renderCommandEncoder->setObjectSamplerState(samplerState, index); + break; + case METAL_SHADER_TYPE_MESH: + renderCommandEncoder->setMeshSamplerState(samplerState, index); + break; + case METAL_SHADER_TYPE_FRAGMENT: + renderCommandEncoder->setFragmentSamplerState(samplerState, index); + break; + } +} + MTL::CommandBuffer* MetalRenderer::GetCommandBuffer() { bool needsNewCommandBuffer = (m_commandBuffers.empty() || m_commandBuffers.back().m_commited); @@ -1086,7 +1207,7 @@ MTL::RenderCommandEncoder* MetalRenderer::GetTemporaryRenderCommandEncoder(MTL:: } // Some render passes clear the attachments, forceRecreate is supposed to be used in those cases -MTL::RenderCommandEncoder* MetalRenderer::GetRenderCommandEncoder(bool forceRecreate, bool rebindStateIfNewEncoder) +MTL::RenderCommandEncoder* MetalRenderer::GetRenderCommandEncoder(bool forceRecreate) { // Check if we need to begin a new render pass if (m_commandEncoder) @@ -1130,6 +1251,7 @@ MTL::RenderCommandEncoder* MetalRenderer::GetRenderCommandEncoder(bool forceRecr // Update state m_state.m_lastUsedFBO = m_state.m_activeFBO; + m_state.m_isFirstDrawInRenderPass = true; auto renderCommandEncoder = commandBuffer->renderCommandEncoder(m_state.m_activeFBO->GetRenderPassDescriptor()); #ifdef CEMU_DEBUG_ASSERT @@ -1140,12 +1262,6 @@ MTL::RenderCommandEncoder* MetalRenderer::GetRenderCommandEncoder(bool forceRecr ResetEncoderState(); - if (rebindStateIfNewEncoder) - { - // Rebind all the render state - RebindRenderState(renderCommandEncoder); - } - return renderCommandEncoder; } @@ -1259,9 +1375,56 @@ bool MetalRenderer::AcquireNextDrawable(bool mainWindow) return true; } -void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandEncoder, LatteDecompilerShader* shader) +bool MetalRenderer::CheckIfRenderPassNeedsFlush(LatteDecompilerShader* shader) { - auto mtlShaderType = GetMtlShaderType(shader->shaderType); + sint32 textureCount = shader->resourceMapping.getTextureCount(); + for (int i = 0; i < textureCount; ++i) + { + const auto relative_textureUnit = shader->resourceMapping.getTextureUnitFromBindingPoint(i); + auto hostTextureUnit = relative_textureUnit; + auto textureDim = shader->textureUnitDim[relative_textureUnit]; + auto texUnitRegIndex = hostTextureUnit * 7; + switch (shader->shaderType) + { + case LatteConst::ShaderType::Vertex: + hostTextureUnit += LATTE_CEMU_VS_TEX_UNIT_BASE; + texUnitRegIndex += Latte::REGADDR::SQ_TEX_RESOURCE_WORD0_N_VS; + break; + case LatteConst::ShaderType::Pixel: + hostTextureUnit += LATTE_CEMU_PS_TEX_UNIT_BASE; + texUnitRegIndex += Latte::REGADDR::SQ_TEX_RESOURCE_WORD0_N_PS; + break; + case LatteConst::ShaderType::Geometry: + hostTextureUnit += LATTE_CEMU_GS_TEX_UNIT_BASE; + texUnitRegIndex += Latte::REGADDR::SQ_TEX_RESOURCE_WORD0_N_GS; + break; + default: + UNREACHABLE; + } + + auto textureView = m_state.m_textures[hostTextureUnit]; + if (!textureView) + continue; + + LatteTexture* baseTexture = textureView->baseTexture; + if (!m_state.m_isFirstDrawInRenderPass) + { + // If the texture is also used in the current render pass, we need to end the render pass to "flush" the texture + for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++) + { + auto colorTarget = m_state.m_activeFBO->colorBuffer[i].texture; + if (colorTarget && colorTarget->baseTexture == baseTexture) + return true; + } + } + } + + return false; +} + +void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandEncoder, LatteDecompilerShader* shader, bool usesGeometryShader) +{ + auto mtlShaderType = GetMtlShaderType(shader->shaderType, usesGeometryShader); sint32 textureCount = shader->resourceMapping.getTextureCount(); for (int i = 0; i < textureCount; ++i) @@ -1288,8 +1451,8 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE UNREACHABLE; } - // TODO: uncomment - uint32 binding = shader->resourceMapping.getTextureBaseBindingPoint() + i;//shader->resourceMapping.textureUnitToBindingPoint[hostTextureUnit]; + // TODO: correct? + uint32 binding = shader->resourceMapping.getTextureBaseBindingPoint() + i; if (binding >= MAX_MTL_TEXTURES) { debug_printf("invalid texture binding %u\n", binding); @@ -1301,88 +1464,21 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE { // TODO: don't bind if already bound if (textureDim == Latte::E_DIM::DIM_1D) - { - switch (shader->shaderType) - { - case LatteConst::ShaderType::Vertex: - { - renderCommandEncoder->setVertexTexture(m_nullTexture1D, binding); - renderCommandEncoder->setVertexSamplerState(m_nearestSampler, binding); - break; - } - case LatteConst::ShaderType::Pixel: - { - renderCommandEncoder->setFragmentTexture(m_nullTexture1D, binding); - renderCommandEncoder->setVertexSamplerState(m_nearestSampler, binding); - break; - } - default: - UNREACHABLE; - } - } + SetTexture(renderCommandEncoder, mtlShaderType, m_nullTexture1D, binding); else - { - switch (shader->shaderType) - { - case LatteConst::ShaderType::Vertex: - { - renderCommandEncoder->setVertexTexture(m_nullTexture2D, binding); - renderCommandEncoder->setVertexSamplerState(m_nearestSampler, binding); - break; - } - case LatteConst::ShaderType::Pixel: - { - renderCommandEncoder->setFragmentTexture(m_nullTexture2D, binding); - renderCommandEncoder->setVertexSamplerState(m_nearestSampler, binding); - break; - } - default: - UNREACHABLE; - } - } + SetTexture(renderCommandEncoder, mtlShaderType, m_nullTexture2D, binding); + SetSamplerState(renderCommandEncoder, mtlShaderType, m_nearestSampler, binding); continue; } if (textureDim == Latte::E_DIM::DIM_1D && (textureView->dim != Latte::E_DIM::DIM_1D)) { - switch (shader->shaderType) - { - case LatteConst::ShaderType::Vertex: - { - renderCommandEncoder->setVertexTexture(m_nullTexture1D, binding); - renderCommandEncoder->setVertexSamplerState(m_nearestSampler, binding); - break; - } - case LatteConst::ShaderType::Pixel: - { - renderCommandEncoder->setFragmentTexture(m_nullTexture1D, binding); - renderCommandEncoder->setVertexSamplerState(m_nearestSampler, binding); - break; - } - default: - UNREACHABLE; - } + SetTexture(renderCommandEncoder, mtlShaderType, m_nullTexture1D, binding); continue; } else if (textureDim == Latte::E_DIM::DIM_2D && (textureView->dim != Latte::E_DIM::DIM_2D && textureView->dim != Latte::E_DIM::DIM_2D_MSAA)) { - switch (shader->shaderType) - { - case LatteConst::ShaderType::Vertex: - { - renderCommandEncoder->setVertexTexture(m_nullTexture2D, binding); - renderCommandEncoder->setVertexSamplerState(m_nearestSampler, binding); - break; - } - case LatteConst::ShaderType::Pixel: - { - renderCommandEncoder->setFragmentTexture(m_nullTexture2D, binding); - renderCommandEncoder->setVertexSamplerState(m_nearestSampler, binding); - break; - } - default: - UNREACHABLE; - } + SetTexture(renderCommandEncoder, mtlShaderType, m_nullTexture2D, binding); continue; } @@ -1399,53 +1495,13 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE { sampler = m_nearestSampler; } - - auto& boundSampler = m_state.m_encoderState.m_samplers[mtlShaderType][binding]; - if (sampler != boundSampler) - { - boundSampler = sampler; - - switch (shader->shaderType) - { - case LatteConst::ShaderType::Vertex: - { - renderCommandEncoder->setVertexSamplerState(sampler, binding); - break; - } - case LatteConst::ShaderType::Pixel: - { - renderCommandEncoder->setFragmentSamplerState(sampler, binding); - break; - } - default: - UNREACHABLE; - } - } + SetSamplerState(renderCommandEncoder, mtlShaderType, sampler, binding); // get texture register word 0 uint32 word4 = LatteGPUState.contextRegister[texUnitRegIndex + 4]; auto& boundTexture = m_state.m_encoderState.m_textures[mtlShaderType][binding]; - if (textureView == boundTexture.m_textureView && word4 == boundTexture.m_word4) - continue; - - boundTexture = {textureView, word4}; - MTL::Texture* mtlTexture = textureView->GetSwizzledView(word4); - switch (shader->shaderType) - { - case LatteConst::ShaderType::Vertex: - { - renderCommandEncoder->setVertexTexture(mtlTexture, binding); - break; - } - case LatteConst::ShaderType::Pixel: - { - renderCommandEncoder->setFragmentTexture(mtlTexture, binding); - break; - } - default: - UNREACHABLE; - } + SetTexture(renderCommandEncoder, mtlShaderType, mtlTexture, binding); } // Support buffer @@ -1537,23 +1593,7 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE if (!HasUnifiedMemory()) buffer->didModifyRange(NS::Range(supportBuffer.offset, size)); - switch (shader->shaderType) - { - case LatteConst::ShaderType::Vertex: - { - renderCommandEncoder->setVertexBuffer(buffer, supportBuffer.offset, MTL_SUPPORT_BUFFER_BINDING); - //renderCommandEncoder->setVertexBytes(supportBufferData, sizeof(supportBufferData), MTL_SUPPORT_BUFFER_BINDING); - break; - } - case LatteConst::ShaderType::Pixel: - { - renderCommandEncoder->setFragmentBuffer(buffer, supportBuffer.offset, MTL_SUPPORT_BUFFER_BINDING); - //renderCommandEncoder->setFragmentBytes(supportBufferData, sizeof(supportBufferData), MTL_SUPPORT_BUFFER_BINDING); - break; - } - default: - UNREACHABLE; - } + SetBuffer(renderCommandEncoder, mtlShaderType, buffer, supportBuffer.offset, shader->resourceMapping.uniformVarsBufferBindingPoint); } // Uniform buffers @@ -1568,65 +1608,18 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE continue; } - size_t offset = m_state.m_uniformBufferOffsets[mtlShaderType][i]; + size_t offset = m_state.m_uniformBufferOffsets[GetMtlGeneralShaderType(shader->shaderType)][i]; if (offset == INVALID_OFFSET) continue; - auto& boundOffset = m_state.m_encoderState.m_uniformBufferOffsets[mtlShaderType][binding]; - if (offset == boundOffset) - continue; - - boundOffset = offset; - - // TODO: only set the offset if already bound - switch (shader->shaderType) - { - case LatteConst::ShaderType::Vertex: - { - renderCommandEncoder->setVertexBuffer(m_memoryManager->GetBufferCache(), offset, binding); - break; - } - case LatteConst::ShaderType::Pixel: - { - renderCommandEncoder->setFragmentBuffer(m_memoryManager->GetBufferCache(), offset, binding); - break; - } - default: - UNREACHABLE; - } + SetBuffer(renderCommandEncoder, mtlShaderType, m_memoryManager->GetBufferCache(), offset, binding); } } // Storage buffer if (shader->resourceMapping.tfStorageBindingPoint >= 0) { - switch (shader->shaderType) - { - case LatteConst::ShaderType::Vertex: - { - renderCommandEncoder->setVertexBuffer(m_xfbRingBuffer, 0, shader->resourceMapping.tfStorageBindingPoint); - break; - } - case LatteConst::ShaderType::Pixel: - { - renderCommandEncoder->setFragmentBuffer(m_xfbRingBuffer, 0, shader->resourceMapping.tfStorageBindingPoint); - break; - } - default: - UNREACHABLE; - } - m_state.m_encoderState.m_uniformBufferOffsets[mtlShaderType][shader->resourceMapping.tfStorageBindingPoint] = INVALID_OFFSET; - } -} - -void MetalRenderer::RebindRenderState(MTL::RenderCommandEncoder* renderCommandEncoder) -{ - // Vertex buffers - for (uint8 i = 0; i < MAX_MTL_BUFFERS; i++) - { - auto& vertexBufferRange = m_state.m_vertexBuffers[i]; - if (vertexBufferRange.offset != INVALID_OFFSET) - vertexBufferRange.needsRebind = true; + SetBuffer(renderCommandEncoder, mtlShaderType, m_xfbRingBuffer, 0, shader->resourceMapping.tfStorageBindingPoint); } } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h index 4ea13fb2..f8e12bd6 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h @@ -8,6 +8,7 @@ #include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h" #include "Metal/MTLResource.hpp" +#include "Metal/MTLSampler.hpp" struct MetalBufferAllocation { @@ -31,27 +32,57 @@ struct MetalRestrideInfo struct MetalBoundBuffer { - bool needsRebind = false; size_t offset = INVALID_OFFSET; size_t size = 0; // Memory manager will write restride info to this variable MetalRestrideInfo restrideInfo; }; +enum MetalGeneralShaderType +{ + METAL_GENERAL_SHADER_TYPE_VERTEX, + METAL_GENERAL_SHADER_TYPE_GEOMETRY, + METAL_GENERAL_SHADER_TYPE_FRAGMENT, + + METAL_GENERAL_SHADER_TYPE_TOTAL +}; + +inline MetalGeneralShaderType GetMtlGeneralShaderType(LatteConst::ShaderType shaderType) +{ + switch (shaderType) + { + case LatteConst::ShaderType::Vertex: + return METAL_GENERAL_SHADER_TYPE_VERTEX; + case LatteConst::ShaderType::Geometry: + return METAL_GENERAL_SHADER_TYPE_GEOMETRY; + case LatteConst::ShaderType::Pixel: + return METAL_GENERAL_SHADER_TYPE_FRAGMENT; + default: + return METAL_GENERAL_SHADER_TYPE_TOTAL; + } +} + enum MetalShaderType { METAL_SHADER_TYPE_VERTEX, + METAL_SHADER_TYPE_OBJECT, + METAL_SHADER_TYPE_MESH, METAL_SHADER_TYPE_FRAGMENT, METAL_SHADER_TYPE_TOTAL }; -inline MetalShaderType GetMtlShaderType(LatteConst::ShaderType shaderType) +inline MetalShaderType GetMtlShaderType(LatteConst::ShaderType shaderType, bool usesGeometryShader) { switch (shaderType) { case LatteConst::ShaderType::Vertex: - return METAL_SHADER_TYPE_VERTEX; + if (usesGeometryShader) + return METAL_SHADER_TYPE_OBJECT; + else + return METAL_SHADER_TYPE_VERTEX; + case LatteConst::ShaderType::Geometry: + return METAL_SHADER_TYPE_MESH; case LatteConst::ShaderType::Pixel: return METAL_SHADER_TYPE_FRAGMENT; default: @@ -73,11 +104,11 @@ struct MetalEncoderState uint32 m_depthSlope = 0; uint32 m_depthClamp = 0; struct { - class LatteTextureViewMtl* m_textureView = nullptr; - uint32 m_word4 = INVALID_UINT32; - } m_textures[METAL_SHADER_TYPE_TOTAL][MAX_MTL_TEXTURES]; + MTL::Buffer* m_buffer; + size_t m_offset; + } m_buffers[METAL_SHADER_TYPE_TOTAL][MAX_MTL_BUFFERS]; + MTL::Texture* m_textures[METAL_SHADER_TYPE_TOTAL][MAX_MTL_TEXTURES]; MTL::SamplerState* m_samplers[METAL_SHADER_TYPE_TOTAL][MAX_MTL_SAMPLERS]; - size_t m_uniformBufferOffsets[METAL_SHADER_TYPE_TOTAL][MAX_MTL_BUFFERS]; }; struct MetalStreamoutState @@ -97,6 +128,7 @@ struct MetalState bool m_usesSRGB = false; bool m_skipDrawSequence = false; + bool m_isFirstDrawInRenderPass = true; class CachedFBOMtl* m_activeFBO = nullptr; // If the FBO changes, but it's the same FBO as the last one with some omitted attachments, this FBO doesn't change' @@ -105,7 +137,7 @@ struct MetalState MetalBoundBuffer m_vertexBuffers[MAX_MTL_BUFFERS] = {{}}; // TODO: find out what is the max number of bound textures on the Wii U class LatteTextureViewMtl* m_textures[64] = {nullptr}; - size_t m_uniformBufferOffsets[METAL_SHADER_TYPE_TOTAL][MAX_MTL_BUFFERS]; + size_t m_uniformBufferOffsets[METAL_GENERAL_SHADER_TYPE_TOTAL][MAX_MTL_BUFFERS]; MTL::Viewport m_viewport; MTL::ScissorRect m_scissor; @@ -329,12 +361,12 @@ public: for (uint32 i = 0; i < METAL_SHADER_TYPE_TOTAL; i++) { + for (uint32 j = 0; j < MAX_MTL_BUFFERS; j++) + m_state.m_encoderState.m_buffers[i][j] = {nullptr}; for (uint32 j = 0; j < MAX_MTL_TEXTURES; j++) - m_state.m_encoderState.m_textures[i][j] = {nullptr}; + m_state.m_encoderState.m_textures[i][j] = nullptr; for (uint32 j = 0; j < MAX_MTL_SAMPLERS; j++) m_state.m_encoderState.m_samplers[i][j] = nullptr; - for (uint32 j = 0; j < MAX_MTL_BUFFERS; j++) - m_state.m_encoderState.m_uniformBufferOffsets[i][j] = INVALID_OFFSET; } } @@ -343,11 +375,15 @@ public: return m_state.m_encoderState; } + void SetBuffer(MTL::RenderCommandEncoder* renderCommandEncoder, MetalShaderType shaderType, MTL::Buffer* buffer, size_t offset, uint32 index); + void SetTexture(MTL::RenderCommandEncoder* renderCommandEncoder, MetalShaderType shaderType, MTL::Texture* texture, uint32 index); + void SetSamplerState(MTL::RenderCommandEncoder* renderCommandEncoder, MetalShaderType shaderType, MTL::SamplerState* samplerState, uint32 index); + MTL::CommandBuffer* GetCommandBuffer(); bool CommandBufferCompleted(MTL::CommandBuffer* commandBuffer); void WaitForCommandBufferCompletion(MTL::CommandBuffer* commandBuffer); MTL::RenderCommandEncoder* GetTemporaryRenderCommandEncoder(MTL::RenderPassDescriptor* renderPassDescriptor); - MTL::RenderCommandEncoder* GetRenderCommandEncoder(bool forceRecreate = false, bool rebindStateIfNewEncoder = true); + MTL::RenderCommandEncoder* GetRenderCommandEncoder(bool forceRecreate = false); MTL::ComputeCommandEncoder* GetComputeCommandEncoder(); MTL::BlitCommandEncoder* GetBlitCommandEncoder(); void EndEncoding(); @@ -355,8 +391,8 @@ public: bool AcquireNextDrawable(bool mainWindow); - void BindStageResources(MTL::RenderCommandEncoder* renderCommandEncoder, LatteDecompilerShader* shader); - void RebindRenderState(MTL::RenderCommandEncoder* renderCommandEncoder); + bool CheckIfRenderPassNeedsFlush(LatteDecompilerShader* shader); + void BindStageResources(MTL::RenderCommandEncoder* renderCommandEncoder, LatteDecompilerShader* shader, bool usesGeometryShader); void ClearColorTextureInternal(MTL::Texture* mtlTexture, sint32 sliceIndex, sint32 mipIndex, float r, float g, float b, float a); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp index 465e9316..4c968d1c 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp @@ -7,6 +7,8 @@ #include "Cemu/Logging/CemuLogging.h" #include "Common/precompiled.h" +#include "HW/Latte/Core/FetchShader.h" +#include "HW/Latte/ISA/RegDefines.h" extern std::atomic_int g_compiled_shaders_total; extern std::atomic_int g_compiled_shaders_async; @@ -14,14 +16,14 @@ extern std::atomic_int g_compiled_shaders_async; RendererShaderMtl::RendererShaderMtl(MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode) : RendererShader(type, baseHash, auxHash, isGameShader, isGfxPackShader), m_mtlr{mtlRenderer} { - if (m_type == ShaderType::kFragment) + if (type == ShaderType::kGeometry) { - // Fragment functions are compiled just-in-time - m_mslCode = mslCode; + Compile(mslCode); } else { - Compile(mslCode); + // TODO: don't compile just-in-time + m_mslCode = mslCode; } // Count shader compilation @@ -34,13 +36,149 @@ RendererShaderMtl::~RendererShaderMtl() m_function->release(); } +void RendererShaderMtl::CompileObjectFunction(const LatteContextRegister& lcr, const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, Renderer::INDEX_TYPE hostIndexType) +{ + cemu_assert_debug(m_type == ShaderType::kVertex); + + std::string fullCode; + + // Vertex buffers + std::string vertexBufferDefinitions = "#define VERTEX_BUFFER_DEFINITIONS "; + std::string vertexBuffers = "#define VERTEX_BUFFERS "; + std::string inputFetchDefinition = "VertexIn fetchInput(thread uint& vid VERTEX_BUFFER_DEFINITIONS) {\n"; + + // Index buffer + if (hostIndexType != Renderer::INDEX_TYPE::NONE) + { + vertexBufferDefinitions += ", device "; + switch (hostIndexType) + { + case Renderer::INDEX_TYPE::U16: + vertexBufferDefinitions += "ushort"; + break; + case Renderer::INDEX_TYPE::U32: + vertexBufferDefinitions += "uint"; + break; + default: + cemu_assert_suspicious(); + break; + } + + vertexBufferDefinitions += fmt::format("* indexBuffer [[buffer({})]]", vertexShader->resourceMapping.indexBufferBinding); + vertexBuffers += ", indexBuffer"; + inputFetchDefinition += "vid = indexBuffer[vid];\n"; + } + + inputFetchDefinition += "VertexIn in;\n"; + for (auto& bufferGroup : fetchShader->bufferGroups) + { + std::optional fetchType; + + uint32 bufferIndex = bufferGroup.attributeBufferIndex; + uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7; + uint32 bufferStride = (lcr.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF; + + for (sint32 j = 0; j < bufferGroup.attribCount; ++j) + { + auto& attr = bufferGroup.attrib[j]; + + uint32 semanticId = vertexShader->resourceMapping.attributeMapping[attr.semanticId]; + if (semanticId == (uint32)-1) + continue; // attribute not used? + + std::string formatName; + uint8 componentCount = 0; + switch (GetMtlVertexFormat(attr.format)) + { + case MTL::VertexFormatUChar: + formatName = "uchar"; + componentCount = 1; + break; + case MTL::VertexFormatUChar2: + formatName = "uchar2"; + componentCount = 2; + break; + case MTL::VertexFormatUChar3: + formatName = "uchar3"; + componentCount = 3; + break; + case MTL::VertexFormatUChar4: + formatName = "uchar4"; + componentCount = 4; + break; + case MTL::VertexFormatUShort: + formatName = "ushort"; + componentCount = 1; + break; + case MTL::VertexFormatUShort2: + formatName = "ushort2"; + componentCount = 2; + break; + case MTL::VertexFormatUShort3: + formatName = "ushort3"; + componentCount = 3; + break; + case MTL::VertexFormatUShort4: + formatName = "ushort4"; + componentCount = 4; + break; + case MTL::VertexFormatUInt: + formatName = "uint"; + componentCount = 1; + break; + case MTL::VertexFormatUInt2: + formatName = "uint2"; + componentCount = 2; + break; + case MTL::VertexFormatUInt3: + formatName = "uint3"; + componentCount = 3; + break; + case MTL::VertexFormatUInt4: + formatName = "uint4"; + componentCount = 4; + break; + } + + // Fetch the attribute + inputFetchDefinition += fmt::format("in.ATTRIBUTE_NAME{} = ", semanticId); + inputFetchDefinition += fmt::format("uint4(*(device {}*)", formatName); + inputFetchDefinition += fmt::format("(vertexBuffer{}", attr.attributeBufferIndex); + inputFetchDefinition += fmt::format(" + vid * {} + {})", bufferStride, attr.offset); + for (uint8 i = 0; i < (4 - componentCount); i++) + inputFetchDefinition += ", 0"; + inputFetchDefinition += ");\n"; + + if (fetchType.has_value()) + cemu_assert_debug(fetchType == attr.fetchType); + else + fetchType = attr.fetchType; + + if (attr.fetchType == LatteConst::INSTANCE_DATA) + { + cemu_assert_debug(attr.aluDivisor == 1); // other divisor not yet supported + } + } + + vertexBufferDefinitions += fmt::format(", device uchar* vertexBuffer{} [[buffer({})]]", bufferIndex, GET_MTL_VERTEX_BUFFER_INDEX(bufferIndex)); + vertexBuffers += fmt::format(", vertexBuffer{}", bufferIndex); + } + + inputFetchDefinition += "return in;\n"; + inputFetchDefinition += "}\n"; + + fullCode += vertexBufferDefinitions + "\n"; + fullCode += vertexBuffers + "\n"; + fullCode += m_mslCode; + fullCode += inputFetchDefinition; + + Compile(fullCode); +} + void RendererShaderMtl::CompileFragmentFunction(CachedFBOMtl* activeFBO) { cemu_assert_debug(m_type == ShaderType::kFragment); - if (m_function) - m_function->release(); - std::string fullCode; // Define color attachment data types @@ -77,6 +215,9 @@ void RendererShaderMtl::CompileFragmentFunction(CachedFBOMtl* activeFBO) void RendererShaderMtl::Compile(const std::string& mslCode) { + if (m_function) + m_function->release(); + NS::Error* error = nullptr; MTL::Library* library = m_mtlr->GetDevice()->newLibrary(ToNSString(mslCode), nullptr, &error); if (error) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h index eea12ae7..e21db55e 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h @@ -21,6 +21,12 @@ public: RendererShaderMtl(class MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode); virtual ~RendererShaderMtl(); + void CompileVertexFunction() + { + Compile(m_mslCode); + } + + void CompileObjectFunction(const LatteContextRegister& lcr, const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, Renderer::INDEX_TYPE hostIndexType); void CompileFragmentFunction(CachedFBOMtl* activeFBO); MTL::Function* GetFunction() const diff --git a/src/Cafe/HW/Latte/Renderer/Metal/UtilityShaderSource.h b/src/Cafe/HW/Latte/Renderer/Metal/UtilityShaderSource.h index c298150e..7f8f3dc7 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/UtilityShaderSource.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/UtilityShaderSource.h @@ -3,7 +3,14 @@ #define __STRINGIFY(x) #x #define _STRINGIFY(x) __STRINGIFY(x) -constexpr const char* utilityShaderSource = _STRINGIFY(( +constexpr const char* utilityShaderSource = R"V0G0N( +#include +using namespace metal; + +#define GET_BUFFER_BINDING(index) (28 + index) +#define GET_TEXTURE_BINDING(index) (29 + index) +#define GET_SAMPLER_BINDING(index) (14 + index)\n + constant float2 positions[] = {float2(-1.0, -3.0), float2(-1.0, 1.0), float2(3.0, 1.0)}; struct VertexOut { @@ -48,4 +55,4 @@ vertex void vertexRestrideBuffer(uint vid [[vertex_id]], device uint8_t* src [[b dst[vid * params.newStride + i] = src[vid * params.oldStride + i]; } } -)); +)V0G0N";