From a832bc225ed35913029f57b9bee1210b6ef799f0 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Wed, 21 Aug 2024 12:12:45 +0200 Subject: [PATCH] support rect primitive emulation --- .../LatteDecompilerEmitMSL.cpp | 35 ++-- .../LatteDecompilerEmitMSLHeader.hpp | 44 +++-- .../Renderer/Metal/MetalPipelineCache.cpp | 183 +++++++++++++++++- .../HW/Latte/Renderer/Metal/MetalRenderer.cpp | 29 +-- 4 files changed, 241 insertions(+), 50 deletions(-) diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp index 1b0f3f71..faf20065 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp @@ -3821,20 +3821,22 @@ static void LatteDecompiler_emitAttributeImport(LatteDecompilerShaderContext* sh void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, LatteDecompilerShader* shader) { + bool isRectVertexShader = (static_cast(shaderContext->contextRegisters[mmVGT_PRIMITIVE_TYPE]) == LattePrimitiveMode::RECTS); + StringBuf* src = new StringBuf(1024*1024*12); // reserve 12MB for generated source (we resize-to-fit at the end) shaderContext->shaderSource = src; // debug info src->addFmt("// shader {:016x}" _CRLF, shaderContext->shaderBaseHash); #ifdef CEMU_DEBUG_ASSERT - src->addFmt("// usesIntegerValues: {}" _CRLF, shaderContext->analyzer.usesIntegerValues?"true":"false"); + src->addFmt("// usesIntegerValues: {}" _CRLF, shaderContext->analyzer.usesIntegerValues ? "true" : "false"); src->addFmt(_CRLF); #endif // include metal standard library src->add("#include " _CRLF); src->add("using namespace metal;" _CRLF); // header part (definitions for inputs and outputs) - LatteDecompiler::emitHeader(shaderContext); + LatteDecompiler::emitHeader(shaderContext, isRectVertexShader); // helper functions LatteDecompiler_emitHelperFunctions(shaderContext, src); const char* functionType = ""; @@ -3842,7 +3844,7 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, switch (shader->shaderType) { case LatteConst::ShaderType::Vertex: - if (shaderContext->options->usesGeometryShader) + if (shaderContext->options->usesGeometryShader || isRectVertexShader) { // Defined just-in-time // Will also modify vid in case of an indexed draw @@ -3868,9 +3870,9 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, } // start of main src->addFmt("{} {} main0(", functionType, outputTypeName); - LatteDecompiler::emitInputs(shaderContext); + LatteDecompiler::emitInputs(shaderContext, isRectVertexShader); src->add(") {" _CRLF); - if (shaderContext->options->usesGeometryShader && (shader->shaderType == LatteConst::ShaderType::Vertex || shader->shaderType == LatteConst::ShaderType::Geometry)) + if ((shaderContext->options->usesGeometryShader || isRectVertexShader) && (shader->shaderType == LatteConst::ShaderType::Vertex || shader->shaderType == LatteConst::ShaderType::Geometry)) { if (shader->shaderType == LatteConst::ShaderType::Vertex) { @@ -4086,7 +4088,8 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, continue; } - if (shaderContext->options->usesGeometryShader) + // TODO: is the if statement even needed? + if (shaderContext->options->usesGeometryShader || isRectVertexShader) { // import from geometry shader if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) @@ -4130,11 +4133,11 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, // vertex shader should write renderstate point size at the end if required but not modified by shader if (shaderContext->analyzer.outputPointSize && shaderContext->analyzer.writesPointSize == false) { - if (shader->shaderType == LatteConst::ShaderType::Vertex && shaderContext->options->usesGeometryShader == false) + if (shader->shaderType == LatteConst::ShaderType::Vertex && !shaderContext->options->usesGeometryShader) src->add("out.pointSize = supportBuffer.pointSize;" _CRLF); } - if (shaderContext->options->usesGeometryShader && (shader->shaderType == LatteConst::ShaderType::Vertex || shader->shaderType == LatteConst::ShaderType::Geometry)) + if ((shaderContext->options->usesGeometryShader || isRectVertexShader) && (shader->shaderType == LatteConst::ShaderType::Vertex || shader->shaderType == LatteConst::ShaderType::Geometry)) { if (shader->shaderType == LatteConst::ShaderType::Vertex) { @@ -4167,18 +4170,14 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, } } } - else - { - if (shader->shaderType == LatteConst::ShaderType::Vertex) - { - // TODO: this should be handled outside of the shader, because clipping currently wouldn't work (or would it?) - if (shader->shaderType == LatteConst::ShaderType::Vertex) - src->add("out.position.z = (out.position.z + out.position.w) / 2.0;" _CRLF); - } - // Return + // TODO: this should be handled outside of the shader, because clipping currently wouldn't work (or would it?) + if ((shader->shaderType == LatteConst::ShaderType::Vertex && !shaderContext->options->usesGeometryShader) || shader->shaderType == LatteConst::ShaderType::Geometry) + src->add("out.position.z = (out.position.z + out.position.w) / 2.0;" _CRLF); + + // Return + if (!(shaderContext->options->usesGeometryShader || isRectVertexShader) || shader->shaderType == LatteConst::ShaderType::Pixel) src->add("return out;" _CRLF); - } // end of shader main src->add("}" _CRLF); diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp index 20f75c95..5e1b4c11 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp @@ -96,7 +96,7 @@ namespace LatteDecompiler uniformCurrentOffset += 8; } // define verticesPerInstance + streamoutBufferBaseX - if ((shader->shaderType == LatteConst::ShaderType::Vertex && decompilerContext->options->usesGeometryShader == false) || + if ((shader->shaderType == LatteConst::ShaderType::Vertex && !decompilerContext->options->usesGeometryShader) || (shader->shaderType == LatteConst::ShaderType::Geometry)) { src->add("int verticesPerInstance;" _CRLF); @@ -182,7 +182,7 @@ namespace LatteDecompiler src->addFmt("{}", attributeNames); } - static void _emitVSOutputs(LatteDecompilerShaderContext* shaderContext) + static void _emitVSOutputs(LatteDecompilerShaderContext* shaderContext, bool isRectVertexShader) { auto* src = shaderContext->shaderSource; @@ -214,15 +214,25 @@ namespace LatteDecompiler continue; // no ps input src->addFmt("float4 passParameterSem{}", psInputTable->import[psInputIndex].semanticId); - src->addFmt(" [[user(locn{})]]", psInputIndex); - if (psInputTable->import[psInputIndex].isFlat) - src->add(" [[flat]]"); - if (psInputTable->import[psInputIndex].isNoPerspective) - src->add(" [[center_no_perspective]]"); + if (!isRectVertexShader) + { + src->addFmt(" [[user(locn{})]]", psInputIndex); + if (psInputTable->import[psInputIndex].isFlat) + src->add(" [[flat]]"); + if (psInputTable->import[psInputIndex].isNoPerspective) + src->add(" [[center_no_perspective]]"); + } src->addFmt(";" _CRLF); } src->add("};" _CRLF _CRLF); + + if (isRectVertexShader) + { + src->add("struct ObjectPayload {" _CRLF); + src->add("VertexOut vertexOut[VERTICES_PER_VERTEX_PRIMITIVE];" _CRLF); + src->add("};" _CRLF _CRLF); + } } static void _emitPSInputs(LatteDecompilerShaderContext* shaderContext) @@ -251,7 +261,7 @@ namespace LatteDecompiler src->add("};" _CRLF _CRLF); } - static void _emitInputsAndOutputs(LatteDecompilerShaderContext* decompilerContext) + static void _emitInputsAndOutputs(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader) { auto src = decompilerContext->shaderSource; @@ -288,7 +298,7 @@ namespace LatteDecompiler if (!decompilerContext->options->usesGeometryShader) { if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex) - _emitVSOutputs(decompilerContext); + _emitVSOutputs(decompilerContext, isRectVertexShader); } else { @@ -338,11 +348,11 @@ namespace LatteDecompiler } } - static void emitHeader(LatteDecompilerShaderContext* decompilerContext) + static void emitHeader(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader) { auto src = decompilerContext->shaderSource; - if (decompilerContext->options->usesGeometryShader && (decompilerContext->shaderType == LatteConst::ShaderType::Vertex || decompilerContext->shaderType == LatteConst::ShaderType::Geometry)) + if ((decompilerContext->options->usesGeometryShader || isRectVertexShader) && (decompilerContext->shaderType == LatteConst::ShaderType::Vertex || decompilerContext->shaderType == LatteConst::ShaderType::Geometry)) { // TODO: make vsOutPrimType parth of the shader hash LattePrimitiveMode vsOutPrimType = static_cast(decompilerContext->contextRegisters[mmVGT_PRIMITIVE_TYPE]); @@ -359,6 +369,9 @@ namespace LatteDecompiler case LattePrimitiveMode::TRIANGLES: src->add("#define VERTICES_PER_VERTEX_PRIMITIVE 3" _CRLF); break; + case LattePrimitiveMode::RECTS: + src->add("#define VERTICES_PER_VERTEX_PRIMITIVE 3" _CRLF); + break; default: cemu_assert_suspicious(); break; @@ -394,7 +407,7 @@ namespace LatteDecompiler // uniform buffers _emitUniformBuffers(decompilerContext); // inputs and outputs - _emitInputsAndOutputs(decompilerContext); + _emitInputsAndOutputs(decompilerContext, isRectVertexShader); if (dump_shaders_enabled) decompilerContext->shaderSource->add("// end of shader inputs/outputs" _CRLF); @@ -467,14 +480,14 @@ namespace LatteDecompiler } } - static void emitInputs(LatteDecompilerShaderContext* decompilerContext) + static void emitInputs(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader) { auto src = decompilerContext->shaderSource; switch (decompilerContext->shaderType) { case LatteConst::ShaderType::Vertex: - if (!decompilerContext->options->usesGeometryShader) + if (!(decompilerContext->options->usesGeometryShader || isRectVertexShader)) src->add("VertexIn in [[stage_in]], "); break; case LatteConst::ShaderType::Pixel: @@ -488,7 +501,7 @@ namespace LatteDecompiler switch (decompilerContext->shaderType) { case LatteConst::ShaderType::Vertex: - if (decompilerContext->options->usesGeometryShader) + if (decompilerContext->options->usesGeometryShader || isRectVertexShader) { src->add(", object_data ObjectPayload& objectPayload [[payload]]"); src->add(", mesh_grid_properties meshGridProperties"); @@ -505,7 +518,6 @@ namespace LatteDecompiler case LatteConst::ShaderType::Geometry: src->add(", MeshType mesh"); src->add(", const object_data ObjectPayload& objectPayload [[payload]]"); - src->add(", uint tid [[thread_index_in_threadgroup]]"); break; case LatteConst::ShaderType::Pixel: src->add(", bool frontFacing [[front_facing]]"); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp index 8e115e58..ba577012 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp @@ -2,6 +2,7 @@ #include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" #include "Foundation/NSObject.hpp" +#include "HW/Latte/Core/LatteShader.h" #include "HW/Latte/Renderer/Metal/CachedFBOMtl.h" #include "HW/Latte/Renderer/Metal/LatteToMtl.h" #include "HW/Latte/Renderer/Metal/RendererShaderMtl.h" @@ -9,10 +10,177 @@ #include "HW/Latte/Core/FetchShader.h" #include "HW/Latte/ISA/RegDefines.h" -#include "Metal/MTLDevice.hpp" -#include "Metal/MTLRenderPipeline.hpp" #include "config/ActiveSettings.h" +static void rectsEmulationGS_outputSingleVertex(std::string& gsSrc, const LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, sint32 vIdx, const LatteContextRegister& latteRegister) +{ + auto parameterMask = vertexShader->outputParameterMask; + for (uint32 i = 0; i < 32; i++) + { + if ((parameterMask & (1 << i)) == 0) + continue; + sint32 vsSemanticId = psInputTable->getVertexShaderOutParamSemanticId(latteRegister.GetRawView(), i); + if (vsSemanticId < 0) + continue; + // make sure PS has matching input + if (!psInputTable->hasPSImportForSemanticId(vsSemanticId)) + continue; + gsSrc.append(fmt::format("out.passParameterSem{} = objectPayload.vertexOut[{}].passParameterSem{};\r\n", vsSemanticId, vIdx, vsSemanticId)); + } + gsSrc.append(fmt::format("out.position = objectPayload.vertexOut[{}].position;\r\n", vIdx)); + gsSrc.append(fmt::format("mesh.set_vertex({}, out);\r\n", vIdx)); +} + +static void rectsEmulationGS_outputGeneratedVertex(std::string& gsSrc, const LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, const char* variant, const LatteContextRegister& latteRegister) +{ + auto parameterMask = vertexShader->outputParameterMask; + for (uint32 i = 0; i < 32; i++) + { + if ((parameterMask & (1 << i)) == 0) + continue; + sint32 vsSemanticId = psInputTable->getVertexShaderOutParamSemanticId(latteRegister.GetRawView(), i); + if (vsSemanticId < 0) + continue; + // make sure PS has matching input + if (!psInputTable->hasPSImportForSemanticId(vsSemanticId)) + continue; + gsSrc.append(fmt::format("passParameterSem{}Out = gen4thVertex{}(objectPayload.vertexOut[0].passParameterSem{}, objectPayload.vertexOut[1].passParameterSem{}, objectPayload.vertexOut[2].passParameterSem{});\r\n", vsSemanticId, variant, vsSemanticId, vsSemanticId, vsSemanticId)); + } + gsSrc.append(fmt::format("out.position = gen4thVertex{}(objectPayload.vertexOut[0].position, objectPayload.vertexOut[1].position, objectPayload.vertexOut[2].position);\r\n", variant)); + gsSrc.append(fmt::format("mesh.set_vertex(3, out);\r\n")); +} + +static void rectsEmulationGS_outputVerticesCode(std::string& gsSrc, const LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, sint32 p0, sint32 p1, sint32 p2, sint32 p3, const char* variant, const LatteContextRegister& latteRegister) +{ + sint32 pList[4] = { p0, p1, p2, p3 }; + for (sint32 i = 0; i < 4; i++) + { + if (pList[i] == 3) + rectsEmulationGS_outputGeneratedVertex(gsSrc, vertexShader, psInputTable, variant, latteRegister); + else + rectsEmulationGS_outputSingleVertex(gsSrc, vertexShader, psInputTable, pList[i], latteRegister); + } +} + +static RendererShaderMtl* rectsEmulationGS_generate(MetalRenderer* metalRenderer, const LatteDecompilerShader* vertexShader, const LatteContextRegister& latteRegister) +{ + std::string gsSrc; + gsSrc.append("#include \r\n"); + gsSrc.append("using namespace metal;\r\n"); + + LatteShaderPSInputTable* psInputTable = LatteSHRC_GetPSInputTable(); + + // inputs & outputs + std::string vertexOutDefinition = "struct VertexOut {\r\n"; + vertexOutDefinition += "float4 position;\r\n"; + std::string geometryOutDefinition = "struct GeometryOut {\r\n"; + geometryOutDefinition += "float4 position [[position]];\r\n"; + auto parameterMask = vertexShader->outputParameterMask; + for (sint32 f = 0; f < 2; f++) + { + for (uint32 i = 0; i < 32; i++) + { + if ((parameterMask & (1 << i)) == 0) + continue; + sint32 vsSemanticId = psInputTable->getVertexShaderOutParamSemanticId(latteRegister.GetRawView(), i); + if (vsSemanticId < 0) + continue; + auto psImport = psInputTable->getPSImportBySemanticId(vsSemanticId); + if (psImport == nullptr) + continue; + + if (f == 0) + { + vertexOutDefinition += fmt::format("float4 passParameterSem{};\r\n", vsSemanticId); + } + else + { + geometryOutDefinition += fmt::format("float4 passParameterSem{}", vsSemanticId); + + geometryOutDefinition += fmt::format(" [[user(locn{})]]", psInputTable->getPSImportLocationBySemanticId(vsSemanticId)); + if (psImport->isFlat) + geometryOutDefinition += " [[flat]]"; + if (psImport->isNoPerspective) + geometryOutDefinition += " [[center_no_perspective]]"; + geometryOutDefinition += ";\r\n"; + } + } + } + vertexOutDefinition += "};\r\n"; + geometryOutDefinition += "};\r\n"; + + gsSrc.append(vertexOutDefinition); + gsSrc.append(geometryOutDefinition); + + gsSrc.append("struct ObjectPayload {\r\n"); + gsSrc.append("VertexOut vertexOut[3];\r\n"); + gsSrc.append("};\r\n"); + + // gen function + gsSrc.append("float4 gen4thVertexA(float4 a, float4 b, float4 c)\r\n"); + gsSrc.append("{\r\n"); + gsSrc.append("return b - (c - a);\r\n"); + gsSrc.append("}\r\n"); + + gsSrc.append("float4 gen4thVertexB(float4 a, float4 b, float4 c)\r\n"); + gsSrc.append("{\r\n"); + gsSrc.append("return c - (b - a);\r\n"); + gsSrc.append("}\r\n"); + + gsSrc.append("float4 gen4thVertexC(float4 a, float4 b, float4 c)\r\n"); + gsSrc.append("{\r\n"); + gsSrc.append("return c + (b - a);\r\n"); + gsSrc.append("}\r\n"); + + // main + gsSrc.append("using MeshType = mesh;\r\n"); + gsSrc.append("[[mesh, max_total_threads_per_threadgroup(1)]]\r\n"); + gsSrc.append("void main0(MeshType mesh, const object_data ObjectPayload& objectPayload [[payload]])\r\n"); + gsSrc.append("{\r\n"); + gsSrc.append("GeometryOut out;\r\n"); + + // there are two possible winding orders that need different triangle generation: + // 0 1 + // 2 3 + // and + // 0 1 + // 3 2 + // all others are just symmetries of these cases + + // we can determine the case by comparing the distance 0<->1 and 0<->2 + + gsSrc.append("float dist0_1 = length(objectPayload.vertexOut[1].position.xy - objectPayload.vertexOut[0].position.xy);\r\n"); + gsSrc.append("float dist0_2 = length(objectPayload.vertexOut[2].position.xy - objectPayload.vertexOut[0].position.xy);\r\n"); + gsSrc.append("float dist1_2 = length(objectPayload.vertexOut[2].position.xy - objectPayload.vertexOut[1].position.xy);\r\n"); + + // emit vertices + gsSrc.append("if(dist0_1 > dist0_2 && dist0_1 > dist1_2)\r\n"); + gsSrc.append("{\r\n"); + // p0 to p1 is diagonal + rectsEmulationGS_outputVerticesCode(gsSrc, vertexShader, psInputTable, 2, 1, 0, 3, "A", latteRegister); + gsSrc.append("} else if ( dist0_2 > dist0_1 && dist0_2 > dist1_2 ) {\r\n"); + // p0 to p2 is diagonal + rectsEmulationGS_outputVerticesCode(gsSrc, vertexShader, psInputTable, 1, 2, 0, 3, "B", latteRegister); + gsSrc.append("} else {\r\n"); + // p1 to p2 is diagonal + rectsEmulationGS_outputVerticesCode(gsSrc, vertexShader, psInputTable, 0, 1, 2, 3, "C", latteRegister); + gsSrc.append("}\r\n"); + + gsSrc.append("mesh.set_primitive_count(2);\r\n"); + gsSrc.append("mesh.set_index(0, 0);\r\n"); + gsSrc.append("mesh.set_index(1, 1);\r\n"); + gsSrc.append("mesh.set_index(2, 2);\r\n"); + gsSrc.append("mesh.set_index(3, 1);\r\n"); + gsSrc.append("mesh.set_index(4, 2);\r\n"); + gsSrc.append("mesh.set_index(5, 3);\r\n"); + + gsSrc.append("}\r\n"); + + auto mtlShader = new RendererShaderMtl(metalRenderer, RendererShader::ShaderType::kGeometry, 0, 0, false, false, gsSrc); + + return mtlShader; +} + #define INVALID_TITLE_ID 0xFFFFFFFFFFFFFFFF uint64 s_cacheTitleId = INVALID_TITLE_ID; @@ -273,7 +441,16 @@ MTL::RenderPipelineState* MetalPipelineCache::GetMeshPipelineState(const LatteFe return pipeline; auto mtlObjectShader = static_cast(vertexShader->shader); - auto mtlMeshShader = static_cast(geometryShader->shader); + RendererShaderMtl* mtlMeshShader; + if (geometryShader) + { + mtlMeshShader = static_cast(geometryShader->shader); + } + else + { + // If there is no geometry shader, it means that we are emulating rects + mtlMeshShader = rectsEmulationGS_generate(m_mtlr, vertexShader, lcr); + } auto mtlPixelShader = static_cast(pixelShader->shader); mtlObjectShader->CompileObjectFunction(lcr, fetchShader, vertexShader, hostIndexType); mtlPixelShader->CompileFragmentFunction(activeFBO); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index 89c9c2a3..aef5445a 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -819,6 +819,11 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 // Render pass auto renderCommandEncoder = GetRenderCommandEncoder(); + // Primitive type + const LattePrimitiveMode primitiveMode = static_cast(LatteGPUState.contextRegister[mmVGT_PRIMITIVE_TYPE]); + auto mtlPrimitiveType = GetMtlPrimitiveType(primitiveMode); + bool isPrimitiveRect = (primitiveMode == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::RECTS); + // Shaders LatteDecompilerShader* vertexShader = LatteSHRC_GetActiveVertexShader(); LatteDecompilerShader* geometryShader = LatteSHRC_GetActiveGeometryShader(); @@ -830,6 +835,8 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 } const auto fetchShader = LatteSHRC_GetActiveFetchShader(); + bool usesGeometryShader = (geometryShader != nullptr || isPrimitiveRect); + // Depth stencil state // TODO: implement this somehow //auto depthControl = LatteGPUState.contextNew.DB_DEPTH_CONTROL; @@ -866,11 +873,6 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 } } - // Primitive type - const LattePrimitiveMode primitiveMode = static_cast(LatteGPUState.contextRegister[mmVGT_PRIMITIVE_TYPE]); - auto mtlPrimitiveType = GetMtlPrimitiveType(primitiveMode); - bool isPrimitiveRect = (primitiveMode == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::RECTS); - // Blend color float* blendColorConstant = (float*)LatteGPUState.contextRegister + Latte::REGADDR::CB_BLEND_RED; renderCommandEncoder->setBlendColor(blendColorConstant[0], blendColorConstant[1], blendColorConstant[2], blendColorConstant[3]); @@ -1011,7 +1013,7 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 size_t offset; // Restride - if (geometryShader) + if (usesGeometryShader) { // Object shaders don't need restriding, since the attributes are fetched in the shader buffer = m_memoryManager->GetBufferCache(); @@ -1031,14 +1033,14 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 // Bind if (true) { - SetBuffer(renderCommandEncoder, GetMtlShaderType(vertexShader->shaderType, (geometryShader != nullptr)), buffer, offset, GET_MTL_VERTEX_BUFFER_INDEX(i)); + SetBuffer(renderCommandEncoder, GetMtlShaderType(vertexShader->shaderType, usesGeometryShader), buffer, offset, GET_MTL_VERTEX_BUFFER_INDEX(i)); } } } // Render pipeline state MTL::RenderPipelineState* renderPipelineState; - if (geometryShader) + if (usesGeometryShader) renderPipelineState = m_pipelineCache->GetMeshPipelineState(fetchShader, vertexShader, geometryShader, pixelShader, m_state.m_lastUsedFBO, LatteGPUState.contextNew, hostIndexType); else renderPipelineState = m_pipelineCache->GetRenderPipelineState(fetchShader, vertexShader, pixelShader, m_state.m_lastUsedFBO, LatteGPUState.contextNew); @@ -1053,16 +1055,16 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 LatteStreamout_PrepareDrawcall(count, instanceCount); // Uniform buffers, textures and samplers - BindStageResources(renderCommandEncoder, vertexShader, (geometryShader != nullptr)); + BindStageResources(renderCommandEncoder, vertexShader, usesGeometryShader); if (geometryShader) - BindStageResources(renderCommandEncoder, geometryShader, (geometryShader != nullptr)); - BindStageResources(renderCommandEncoder, pixelShader, (geometryShader != nullptr)); + BindStageResources(renderCommandEncoder, geometryShader, usesGeometryShader); + BindStageResources(renderCommandEncoder, pixelShader, usesGeometryShader); // Draw MTL::Buffer* indexBuffer = nullptr; if (hostIndexType != INDEX_TYPE::NONE) indexBuffer = m_memoryManager->GetTemporaryBufferAllocator().GetBuffer(indexBufferIndex); - if (geometryShader) + if (usesGeometryShader) { // TODO: don't hardcode the index if (indexBuffer) @@ -1078,10 +1080,11 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 verticesPerPrimitive = 2; break; case LattePrimitiveMode::TRIANGLES: + case LattePrimitiveMode::RECTS: verticesPerPrimitive = 3; break; default: - throw std::runtime_error("Invalid primitive mode"); + debug_printf("invalid primitive mode %u\n", (uint32)primitiveMode); break; }