diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp index 734aa2e4..9945725a 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp @@ -3847,7 +3847,7 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, { // Defined just-in-time // Will also modify vid in case of an indexed draw - src->add("VertexIn fetchInput(VERTEX_BUFFER_DEFINITIONS, thread uint& vid);" _CRLF); + src->add("VertexIn fetchInput(thread uint& vid VERTEX_BUFFER_DEFINITIONS);" _CRLF); functionType = "[[object, max_total_threads_per_threadgroup(VERTICES_PER_PRIMITIVE), max_total_threadgroups_per_mesh_grid(1)]]"; outputTypeName = "void"; @@ -3880,7 +3880,7 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, // TODO: don't hardcode the instance index src->add("uint iid = 0;" _CRLF); // Fetch the input - src->add("VertexIn in = fetchInput(VERTEX_BUFFERS, vid);" _CRLF); + src->add("VertexIn in = fetchInput(vid VERTEX_BUFFERS);" _CRLF); // Output is defined as object payload src->add("object_data VertexOut& out = objectPayload.vertexOut[tid];" _CRLF); } diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp index 9f8b62ae..51385c2e 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp @@ -156,6 +156,7 @@ namespace LatteDecompiler static void _emitAttributes(LatteDecompilerShaderContext* decompilerContext) { auto src = decompilerContext->shaderSource; + std::string attributeNames; if (decompilerContext->shader->shaderType == LatteConst::ShaderType::Vertex) { @@ -168,13 +169,16 @@ namespace LatteDecompiler cemu_assert_debug(decompilerContext->output->resourceMappingVK.attributeMapping[i] >= 0); src->addFmt("uint4 attrDataSem{}", i); - if (!decompilerContext->options->usesGeometryShader) + if (decompilerContext->options->usesGeometryShader) + attributeNames += "#define ATTRIBUTE_NAME" + std::to_string((sint32)decompilerContext->output->resourceMappingVK.attributeMapping[i]) + " attrDataSem" + std::to_string(i) + "\n"; + else src->addFmt(" [[attribute({})]]", (sint32)decompilerContext->output->resourceMappingVK.attributeMapping[i]); src->add(";" _CRLF); } } src->add("};" _CRLF _CRLF); } + src->addFmt("{}", attributeNames); } static void _emitVSOutputs(LatteDecompilerShaderContext* shaderContext) @@ -335,6 +339,21 @@ namespace LatteDecompiler static void emitHeader(LatteDecompilerShaderContext* decompilerContext) { + auto src = decompilerContext->shaderSource; + + if (decompilerContext->options->usesGeometryShader && (decompilerContext->shaderType == LatteConst::ShaderType::Vertex || decompilerContext->shaderType == LatteConst::ShaderType::Geometry)) + { + src->add("#if PRIMITIVE_TYPE == point" _CRLF); + src->add("#define VERTICES_PER_PRIMITIVE 1" _CRLF); + src->add("#if PRIMITIVE_TYPE == line" _CRLF); + src->add("#define VERTICES_PER_PRIMITIVE 2" _CRLF); + src->add("#if PRIMITIVE_TYPE == triangle" _CRLF); + src->add("#define VERTICES_PER_PRIMITIVE 3" _CRLF); + src->add("#else" _CRLF); + src->add("#error unsupported primitive type" _CRLF); + src->add("#endif" _CRLF); + } + const bool dump_shaders_enabled = ActiveSettings::DumpShadersEnabled(); if(dump_shaders_enabled) decompilerContext->shaderSource->add("// start of shader inputs/outputs, predetermined by Cemu. Do not touch" _CRLF); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp index 94ab3721..d68ddf5e 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp @@ -92,7 +92,7 @@ MTL::RenderPipelineState* MetalPipelineCache::GetPipelineState(const LatteFetchS uint32 bufferIndex = bufferGroup.attributeBufferIndex; uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7; - uint32 bufferStride = (LatteGPUState.contextNew.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF; + uint32 bufferStride = (lcr.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF; bufferStride = Align(bufferStride, 4); // HACK @@ -117,6 +117,7 @@ MTL::RenderPipelineState* MetalPipelineCache::GetPipelineState(const LatteFetchS auto mtlVertexShader = static_cast(vertexShader->shader); auto mtlPixelShader = static_cast(pixelShader->shader); + mtlVertexShader->CompileVertexFunction(); mtlPixelShader->CompileFragmentFunction(activeFBO); // Render pipeline state @@ -127,9 +128,9 @@ MTL::RenderPipelineState* MetalPipelineCache::GetPipelineState(const LatteFetchS desc->setVertexDescriptor(vertexDescriptor); // Color attachments - const Latte::LATTE_CB_COLOR_CONTROL& colorControlReg = LatteGPUState.contextNew.CB_COLOR_CONTROL; + const Latte::LATTE_CB_COLOR_CONTROL& colorControlReg = lcr.CB_COLOR_CONTROL; uint32 blendEnableMask = colorControlReg.get_BLEND_MASK(); - uint32 renderTargetMask = LatteGPUState.contextNew.CB_TARGET_MASK.get_MASK(); + uint32 renderTargetMask = lcr.CB_TARGET_MASK.get_MASK(); for (uint8 i = 0; i < 8; i++) { const auto& colorBuffer = activeFBO->colorBuffer[i]; @@ -149,7 +150,7 @@ MTL::RenderPipelineState* MetalPipelineCache::GetPipelineState(const LatteFetchS { colorAttachment->setBlendingEnabled(true); - const auto& blendControlReg = LatteGPUState.contextNew.CB_BLENDN_CONTROL[i]; + const auto& blendControlReg = lcr.CB_BLENDN_CONTROL[i]; auto rgbBlendOp = GetMtlBlendOp(blendControlReg.get_COLOR_COMB_FCN()); auto srcRgbBlendFactor = GetMtlBlendFactor(blendControlReg.get_COLOR_SRCBLEND()); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index f5a11118..0fa60eea 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -766,17 +766,11 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 LatteDecompilerShader* vertexShader = LatteSHRC_GetActiveVertexShader(); LatteDecompilerShader* geometryShader = LatteSHRC_GetActiveGeometryShader(); LatteDecompilerShader* pixelShader = LatteSHRC_GetActivePixelShader(); - if (!vertexShader || !static_cast(vertexShader->shader)->GetFunction()) + if (!vertexShader) { debug_printf("no vertex function, skipping draw\n"); return; } - // TODO: remove this? - if (geometryShader) - { - debug_printf("geometry shader aren't supported on Metal yet, skipping draw\n"); - return; - } const auto fetchShader = LatteSHRC_GetActiveFetchShader(); // Depth stencil state diff --git a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp index 465e9316..17322b19 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp @@ -7,6 +7,8 @@ #include "Cemu/Logging/CemuLogging.h" #include "Common/precompiled.h" +#include "HW/Latte/Core/FetchShader.h" +#include "HW/Latte/ISA/RegDefines.h" extern std::atomic_int g_compiled_shaders_total; extern std::atomic_int g_compiled_shaders_async; @@ -14,15 +16,8 @@ extern std::atomic_int g_compiled_shaders_async; RendererShaderMtl::RendererShaderMtl(MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode) : RendererShader(type, baseHash, auxHash, isGameShader, isGfxPackShader), m_mtlr{mtlRenderer} { - if (m_type == ShaderType::kFragment) - { - // Fragment functions are compiled just-in-time - m_mslCode = mslCode; - } - else - { - Compile(mslCode); - } + // TODO: don't compile just-in-time + m_mslCode = mslCode; // Count shader compilation g_compiled_shaders_total++; @@ -34,13 +29,176 @@ RendererShaderMtl::~RendererShaderMtl() m_function->release(); } +void RendererShaderMtl::CompileObjectFunction(const LatteContextRegister& lcr, const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, Renderer::INDEX_TYPE hostIndexType) +{ + cemu_assert_debug(m_type == ShaderType::kVertex); + + std::string fullCode; + + // Primitive type + const LattePrimitiveMode primitiveMode = static_cast(lcr.VGT_PRIMITIVE_TYPE.get_PRIMITIVE_MODE()); + fullCode += "#define PRIMITIVE_TYPE "; + switch (primitiveMode) + { + case LattePrimitiveMode::POINTS: + fullCode += "point"; + break; + case LattePrimitiveMode::LINES: + fullCode += "line"; + break; + case LattePrimitiveMode::TRIANGLES: + fullCode += "triangle"; + break; + default: + break; + } + fullCode += "\n"; + + // Vertex buffers + std::string vertexBufferDefinitions = "#define VERTEX_BUFFER_DEFINITIONS "; + std::string vertexBuffers = "#define VERTEX_BUFFERS "; + std::string inputFetchDefinition = "VertexIn fetchInput(thread uint& vid VERTEX_BUFFER_DEFINITIONS) {\n"; + inputFetchDefinition += "VertexIn in;\n"; + if (hostIndexType != Renderer::INDEX_TYPE::NONE) + { + vertexBufferDefinitions += ", device "; + switch (hostIndexType) + { + case Renderer::INDEX_TYPE::U16: + vertexBufferDefinitions += "ushort"; + break; + case Renderer::INDEX_TYPE::U32: + vertexBufferDefinitions += "uint"; + break; + default: + cemu_assert_suspicious(); + break; + } + // TODO: don't hardcode the index + vertexBufferDefinitions += "* indexBuffer [[buffer(20)]]"; + vertexBuffers += ", indexBuffer"; + inputFetchDefinition += "vid = indexBuffer[vid]\n"; + } + for (auto& bufferGroup : fetchShader->bufferGroups) + { + std::optional fetchType; + + for (sint32 j = 0; j < bufferGroup.attribCount; ++j) + { + auto& attr = bufferGroup.attrib[j]; + + uint32 semanticId = vertexShader->resourceMapping.attributeMapping[attr.semanticId]; + if (semanticId == (uint32)-1) + continue; // attribute not used? + + std::string formatName; + switch (GetMtlVertexFormat(attr.format)) + { + case MTL::VertexFormatUChar: + formatName = "uchar"; + break; + case MTL::VertexFormatUChar2: + formatName = "uchar2"; + break; + case MTL::VertexFormatUChar3: + formatName = "uchar3"; + break; + case MTL::VertexFormatUChar4: + formatName = "uchar4"; + break; + case MTL::VertexFormatUShort: + formatName = "ushort"; + break; + case MTL::VertexFormatUShort2: + formatName = "ushort2"; + break; + case MTL::VertexFormatUShort3: + formatName = "ushort3"; + break; + case MTL::VertexFormatUShort4: + formatName = "ushort4"; + break; + case MTL::VertexFormatUInt: + formatName = "uint"; + break; + case MTL::VertexFormatUInt2: + formatName = "uint2"; + break; + case MTL::VertexFormatUInt3: + formatName = "uint3"; + break; + case MTL::VertexFormatUInt4: + formatName = "uint4"; + break; + } + + // Fetch the attribute + inputFetchDefinition += "in.ATTRIBUTE_NAME" + std::to_string(semanticId) + " = "; + inputFetchDefinition += "*(device " + formatName + "*)"; + inputFetchDefinition += "(vertexBuffer" + std::to_string(attr.attributeBufferIndex); + inputFetchDefinition += " + vid + " + std::to_string(attr.offset) + ");\n"; + + if (fetchType.has_value()) + cemu_assert_debug(fetchType == attr.fetchType); + else + fetchType = attr.fetchType; + + if (attr.fetchType == LatteConst::INSTANCE_DATA) + { + cemu_assert_debug(attr.aluDivisor == 1); // other divisor not yet supported + } + } + + uint32 bufferIndex = bufferGroup.attributeBufferIndex; + uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7; + uint32 bufferStride = (lcr.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF; + + fullCode += ", device uchar* vertexBuffer" + std::to_string(bufferIndex) + " [[buffer(" + std::to_string(GET_MTL_VERTEX_BUFFER_INDEX(bufferIndex)) + ")]]"; + } + inputFetchDefinition += "return in;\n"; + inputFetchDefinition += "}\n"; + + fullCode += vertexBufferDefinitions + "\n"; + fullCode += vertexBuffers + "\n"; + fullCode += m_mslCode; + fullCode += inputFetchDefinition; + + Compile(fullCode); +} + +void RendererShaderMtl::CompileMeshFunction(const LatteContextRegister& lcr, const LatteFetchShader* fetchShader) +{ + cemu_assert_debug(m_type == ShaderType::kGeometry); + + std::string fullCode; + + // Primitive type + const LattePrimitiveMode primitiveMode = static_cast(lcr.VGT_PRIMITIVE_TYPE.get_PRIMITIVE_MODE()); + fullCode += "#define PRIMITIVE_TYPE "; + switch (primitiveMode) + { + case LattePrimitiveMode::POINTS: + fullCode += "point"; + break; + case LattePrimitiveMode::LINES: + fullCode += "line"; + break; + case LattePrimitiveMode::TRIANGLES: + fullCode += "triangle"; + break; + default: + break; + } + fullCode += "\n"; + + fullCode += m_mslCode; + Compile(fullCode); +} + void RendererShaderMtl::CompileFragmentFunction(CachedFBOMtl* activeFBO) { cemu_assert_debug(m_type == ShaderType::kFragment); - if (m_function) - m_function->release(); - std::string fullCode; // Define color attachment data types @@ -77,6 +235,9 @@ void RendererShaderMtl::CompileFragmentFunction(CachedFBOMtl* activeFBO) void RendererShaderMtl::Compile(const std::string& mslCode) { + if (m_function) + m_function->release(); + NS::Error* error = nullptr; MTL::Library* library = m_mtlr->GetDevice()->newLibrary(ToNSString(mslCode), nullptr, &error); if (error) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h index eea12ae7..1a53313a 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h @@ -21,6 +21,13 @@ public: RendererShaderMtl(class MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode); virtual ~RendererShaderMtl(); + void CompileVertexFunction() + { + Compile(m_mslCode); + } + + void CompileObjectFunction(const LatteContextRegister& lcr, const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, Renderer::INDEX_TYPE hostIndexType); + void CompileMeshFunction(const LatteContextRegister& lcr, const LatteFetchShader* fetchShader); void CompileFragmentFunction(CachedFBOMtl* activeFBO); MTL::Function* GetFunction() const