From 3dc233fb56061b0468a1fb25248f53432a0bc0a4 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Sat, 7 Sep 2024 11:00:10 +0200 Subject: [PATCH] support rasterization kill --- src/Cafe/HW/Latte/Core/LatteShader.cpp | 23 +++++- .../LatteDecompilerEmitMSL.cpp | 53 +++++++++++--- .../LatteDecompilerEmitMSLHeader.hpp | 8 +-- .../Renderer/Metal/MetalPipelineCache.cpp | 70 +++++++++++-------- .../HW/Latte/Renderer/Metal/MetalRenderer.cpp | 50 +++++-------- 5 files changed, 124 insertions(+), 80 deletions(-) diff --git a/src/Cafe/HW/Latte/Core/LatteShader.cpp b/src/Cafe/HW/Latte/Core/LatteShader.cpp index 2bc719cd..c530dc43 100644 --- a/src/Cafe/HW/Latte/Core/LatteShader.cpp +++ b/src/Cafe/HW/Latte/Core/LatteShader.cpp @@ -501,8 +501,27 @@ void LatteSHRC_UpdateVSBaseHash(uint8* vertexShaderPtr, uint32 vertexShaderSize, uint64 vsHash2 = 0; _calculateShaderProgramHash(vsProgramCode, vertexShaderSize, &hashCacheVS, &vsHash1, &vsHash2); uint64 vsHash = vsHash1 + vsHash2 + _activeFetchShader->key + _activePSImportTable.key + (usesGeometryShader ? 0x1111ULL : 0ULL); - if (g_renderer->GetType() == RendererAPI::Metal && usesGeometryShader) - vsHash += _activeFetchShader->mtlShaderHashObject; + if (g_renderer->GetType() == RendererAPI::Metal) + { + if (usesGeometryShader) + vsHash += _activeFetchShader->mtlShaderHashObject; + + // Rasterization + bool rasterizationEnabled = !LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL(); + + // HACK + if (!LatteGPUState.contextNew.PA_CL_VTE_CNTL.get_VPORT_X_OFFSET_ENA()) + rasterizationEnabled = true; + + const auto& polygonControlReg = LatteGPUState.contextNew.PA_SU_SC_MODE_CNTL; + uint32 cullFront = polygonControlReg.get_CULL_FRONT(); + uint32 cullBack = polygonControlReg.get_CULL_BACK(); + if (cullFront && cullBack) + rasterizationEnabled = false; + + if (rasterizationEnabled) + vsHash += 51ULL; + } uint32 tmp = LatteGPUState.contextNew.PA_CL_VTE_CNTL.getRawValue() ^ 0x43F; vsHash += tmp; diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp index 137f8e87..4c62c244 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp @@ -20,6 +20,8 @@ #define _CRLF "\r\n" +static bool rasterizationEnabled; + void LatteDecompiler_emitAttributeDecodeMSL(LatteDecompilerShader* shaderContext, StringBuf* src, LatteParsedFetchShaderAttribute_t* attrib); /* @@ -3108,6 +3110,9 @@ static void _emitExportGPRReadCode(LatteDecompilerShaderContext* shaderContext, static void _emitExportCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerCFInstruction* cfInstruction) { + if (!rasterizationEnabled) + return; + StringBuf* src = shaderContext->shaderSource; src->add("// export" _CRLF); if(shaderContext->shaderType == LatteConst::ShaderType::Vertex ) @@ -3332,6 +3337,9 @@ static void _emitCFRingWriteCode(LatteDecompilerShaderContext* shaderContext, La return; } + if (!rasterizationEnabled) + return; + if (shaderContext->shaderType == LatteConst::ShaderType::Vertex) { if (cfInstruction->memWriteElemSize != 3) @@ -3861,6 +3869,23 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, { bool isRectVertexShader = (static_cast(shaderContext->contextRegisters[mmVGT_PRIMITIVE_TYPE]) == LattePrimitiveMode::RECTS); + // Rasterization + rasterizationEnabled = true; + if (shader->shaderType == LatteConst::ShaderType::Vertex) + { + rasterizationEnabled = !shaderContext->contextRegistersNew->PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL(); + + // HACK + if (!shaderContext->contextRegistersNew->PA_CL_VTE_CNTL.get_VPORT_X_OFFSET_ENA()) + rasterizationEnabled = true; + + const auto& polygonControlReg = shaderContext->contextRegistersNew->PA_SU_SC_MODE_CNTL; + uint32 cullFront = polygonControlReg.get_CULL_FRONT(); + uint32 cullBack = polygonControlReg.get_CULL_BACK(); + if (cullFront && cullBack) + rasterizationEnabled = false; + } + StringBuf* src = new StringBuf(1024*1024*12); // reserve 12MB for generated source (we resize-to-fit at the end) shaderContext->shaderSource = src; @@ -3874,7 +3899,7 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, src->add("#include " _CRLF); src->add("using namespace metal;" _CRLF); // header part (definitions for inputs and outputs) - LatteDecompiler::emitHeader(shaderContext, isRectVertexShader); + LatteDecompiler::emitHeader(shaderContext, isRectVertexShader, rasterizationEnabled); // helper functions LatteDecompiler_emitHelperFunctions(shaderContext, src); const char* functionType = ""; @@ -4010,7 +4035,10 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, else { functionType = "vertex"; - outputTypeName = "VertexOut"; + if (rasterizationEnabled) + outputTypeName = "VertexOut"; + else + outputTypeName = "void"; } break; case LatteConst::ShaderType::Geometry: @@ -4048,7 +4076,8 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, } else { - src->addFmt("{} out;" _CRLF, outputTypeName); + if (rasterizationEnabled) + src->addFmt("{} out;" _CRLF, outputTypeName); } // variable definition if (shaderContext->typeTracker.useArrayGPRs == false) @@ -4285,9 +4314,9 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, //if(shader->shaderType == LatteConst::ShaderType::Geometry) // src->add("EndPrimitive();" _CRLF); // vertex shader should write renderstate point size at the end if required but not modified by shader - if (shaderContext->analyzer.outputPointSize && shaderContext->analyzer.writesPointSize == false) + if (shaderContext->analyzer.outputPointSize && !shaderContext->analyzer.writesPointSize) { - if (shader->shaderType == LatteConst::ShaderType::Vertex && !shaderContext->options->usesGeometryShader) + if (shader->shaderType == LatteConst::ShaderType::Vertex && !shaderContext->options->usesGeometryShader && rasterizationEnabled) src->add("out.pointSize = supportBuffer.pointSize;" _CRLF); } @@ -4325,13 +4354,15 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, } } - // TODO: this should be handled outside of the shader, because clipping currently wouldn't work (or would it?) - if ((shader->shaderType == LatteConst::ShaderType::Vertex && !shaderContext->options->usesGeometryShader) || shader->shaderType == LatteConst::ShaderType::Geometry) - src->add("out.position.z = (out.position.z + out.position.w) / 2.0;" _CRLF); + if (rasterizationEnabled) + { + if (shader->shaderType == LatteConst::ShaderType::Vertex && !shaderContext->options->usesGeometryShader) + src->add("out.position.z = (out.position.z + out.position.w) / 2.0;" _CRLF); - // Return - if (!(shaderContext->options->usesGeometryShader || isRectVertexShader) || shader->shaderType == LatteConst::ShaderType::Pixel) - src->add("return out;" _CRLF); + // Return + if (!(shaderContext->options->usesGeometryShader || isRectVertexShader) || shader->shaderType == LatteConst::ShaderType::Pixel) + src->add("return out;" _CRLF); + } // end of shader main src->add("}" _CRLF); diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp index 412c9992..1342a277 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp @@ -262,7 +262,7 @@ namespace LatteDecompiler src->add("};" _CRLF _CRLF); } - static void _emitInputsAndOutputs(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader) + static void _emitInputsAndOutputs(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader, bool rasterizationEnabled) { auto src = decompilerContext->shaderSource; @@ -300,7 +300,7 @@ namespace LatteDecompiler if (!decompilerContext->options->usesGeometryShader) { - if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex) + if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex && rasterizationEnabled) _emitVSOutputs(decompilerContext, isRectVertexShader); } else @@ -351,7 +351,7 @@ namespace LatteDecompiler } } - static void emitHeader(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader) + static void emitHeader(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader, bool rasterizationEnabled) { auto src = decompilerContext->shaderSource; @@ -410,7 +410,7 @@ namespace LatteDecompiler // uniform buffers _emitUniformBuffers(decompilerContext); // inputs and outputs - _emitInputsAndOutputs(decompilerContext, isRectVertexShader); + _emitInputsAndOutputs(decompilerContext, isRectVertexShader, rasterizationEnabled); if (dump_shaders_enabled) decompilerContext->shaderSource->add("// end of shader inputs/outputs" _CRLF); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp index 8273ec16..931b6149 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp @@ -9,6 +9,7 @@ #include "Cafe/HW/Latte/Core/FetchShader.h" #include "Cafe/HW/Latte/ISA/RegDefines.h" +#include "Cemu/Logging/CemuLogging.h" #include "config/ActiveSettings.h" static void rectsEmulationGS_outputSingleVertex(std::string& gsSrc, const LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, sint32 vIdx, const LatteContextRegister& latteRegister) @@ -188,8 +189,31 @@ extern std::atomic_int g_compiled_shaders_total; extern std::atomic_int g_compiled_shaders_async; template -void SetFragmentState(T* desc, CachedFBOMtl* lastUsedFBO, CachedFBOMtl* activeFBO, const LatteContextRegister& lcr) +void SetFragmentState(T* desc, CachedFBOMtl* lastUsedFBO, CachedFBOMtl* activeFBO, const LatteDecompilerShader* pixelShader, const LatteContextRegister& lcr) { + // Rasterization + bool rasterizationEnabled = !LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL(); + + // HACK + // TODO: include this in the hash? + if (!LatteGPUState.contextNew.PA_CL_VTE_CNTL.get_VPORT_X_OFFSET_ENA()) + rasterizationEnabled = true; + + // Culling both front and back faces effectively disables rasterization + const auto& polygonControlReg = LatteGPUState.contextNew.PA_SU_SC_MODE_CNTL; + uint32 cullFront = polygonControlReg.get_CULL_FRONT(); + uint32 cullBack = polygonControlReg.get_CULL_BACK(); + if (cullFront && cullBack) + rasterizationEnabled = false; + + desc->setRasterizationEnabled(rasterizationEnabled); + + if (rasterizationEnabled) + { + auto pixelShaderMtl = static_cast(pixelShader->shader); + desc->setFragmentFunction(pixelShaderMtl->GetFunction()); + } + // Color attachments const Latte::LATTE_CB_COLOR_CONTROL& colorControlReg = lcr.CB_COLOR_CONTROL; uint32 blendEnableMask = colorControlReg.get_BLEND_MASK(); @@ -284,7 +308,7 @@ MetalPipelineCache::~MetalPipelineCache() m_binaryArchive->serializeToURL(m_binaryArchiveURL, &error); if (error) { - debug_printf("failed to serialize binary archive: %s\n", error->localizedDescription()->utf8String()); + cemuLog_log(LogType::Force, "failed to serialize binary archive: {}", error->localizedDescription()->utf8String()); error->release(); } m_binaryArchive->release(); @@ -362,17 +386,15 @@ MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const Latte layout->setStride(bufferStride); } - auto mtlVertexShader = static_cast(vertexShader->shader); - auto mtlPixelShader = static_cast(pixelShader->shader); + auto vertexShaderMtl = static_cast(vertexShader->shader); // Render pipeline state MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init(); - desc->setVertexFunction(mtlVertexShader->GetFunction()); - desc->setFragmentFunction(mtlPixelShader->GetFunction()); + desc->setVertexFunction(vertexShaderMtl->GetFunction()); // TODO: don't always set the vertex descriptor? desc->setVertexDescriptor(vertexDescriptor); - SetFragmentState(desc, lastUsedFBO, activeFBO, lcr); + SetFragmentState(desc, lastUsedFBO, activeFBO, pixelShader, lcr); TryLoadBinaryArchive(); @@ -391,9 +413,6 @@ MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const Latte #endif pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, MTL::PipelineOptionFailOnBinaryArchiveMiss, nullptr, &error); - //static uint32 oldPipelineCount = 0; - //static uint32 newPipelineCount = 0; - // Pipeline wasn't found in the binary archive, we need to compile it if (error) { @@ -407,7 +426,7 @@ MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const Latte pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, &error); if (error) { - debug_printf("error creating render pipeline state: %s\n", error->localizedDescription()->utf8String()); + cemuLog_log(LogType::Force, "error creating render pipeline state: {}", error->localizedDescription()->utf8String()); error->release(); } else @@ -419,19 +438,12 @@ MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const Latte m_binaryArchive->addRenderPipelineFunctions(desc, &error); if (error) { - debug_printf("error saving render pipeline functions: %s\n", error->localizedDescription()->utf8String()); + cemuLog_log(LogType::Force, "error saving render pipeline functions: {}", error->localizedDescription()->utf8String()); error->release(); } } } - - //newPipelineCount++; } - //else - //{ - // oldPipelineCount++; - //} - //debug_printf("%u pipelines were found in the binary archive, %u new were created\n", oldPipelineCount, newPipelineCount); desc->release(); vertexDescriptor->release(); @@ -452,26 +464,24 @@ MTL::RenderPipelineState* MetalPipelineCache::GetMeshPipelineState(const LatteFe if (pipeline) return pipeline; - auto mtlObjectShader = static_cast(vertexShader->shader); - RendererShaderMtl* mtlMeshShader; + auto objectShaderMtl = static_cast(vertexShader->shader); + RendererShaderMtl* meshShaderMtl; if (geometryShader) { - mtlMeshShader = static_cast(geometryShader->shader); + meshShaderMtl = static_cast(geometryShader->shader); } else { // If there is no geometry shader, it means that we are emulating rects - mtlMeshShader = rectsEmulationGS_generate(m_mtlr, vertexShader, lcr); + meshShaderMtl = rectsEmulationGS_generate(m_mtlr, vertexShader, lcr); } - auto mtlPixelShader = static_cast(pixelShader->shader); // Render pipeline state MTL::MeshRenderPipelineDescriptor* desc = MTL::MeshRenderPipelineDescriptor::alloc()->init(); - desc->setObjectFunction(mtlObjectShader->GetFunction()); - desc->setMeshFunction(mtlMeshShader->GetFunction()); - desc->setFragmentFunction(mtlPixelShader->GetFunction()); + desc->setObjectFunction(objectShaderMtl->GetFunction()); + desc->setMeshFunction(meshShaderMtl->GetFunction()); - SetFragmentState(desc, lastUsedFBO, activeFBO, lcr); + SetFragmentState(desc, lastUsedFBO, activeFBO, pixelShader, lcr); TryLoadBinaryArchive(); @@ -486,7 +496,7 @@ MTL::RenderPipelineState* MetalPipelineCache::GetMeshPipelineState(const LatteFe desc->release(); if (error) { - debug_printf("error creating render pipeline state: %s\n", error->localizedDescription()->utf8String()); + cemuLog_log(LogType::Force, "error creating mesh render pipeline state: {}", error->localizedDescription()->utf8String()); error->release(); } @@ -594,7 +604,7 @@ void MetalPipelineCache::TryLoadBinaryArchive() m_binaryArchive = m_mtlr->GetDevice()->newBinaryArchive(desc, &error); if (error) { - debug_printf("failed to create binary archive: %s\n", error->localizedDescription()->utf8String()); + cemuLog_log(LogType::Force, "failed to create binary archive: {}", error->localizedDescription()->utf8String()); error->release(); } } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index 545295dc..3c3ed106 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -879,7 +879,7 @@ void MetalRenderer::draw_beginSequence() LatteRenderTarget_updateScissorBox(); // check for conditions which would turn the drawcalls into no-ops - bool rasterizerEnable = LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL() == false; + bool rasterizerEnable = !LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL(); // GX2SetSpecialState(0, true) enables DX_RASTERIZATION_KILL, but still expects depth writes to happen? -> Research which stages are disabled by DX_RASTERIZATION_KILL exactly // for now we use a workaround: @@ -888,18 +888,6 @@ void MetalRenderer::draw_beginSequence() if (!rasterizerEnable && !streamoutEnable) m_state.m_skipDrawSequence = true; - - // Both faces are culled - // TODO: can we really skip the draw? - const auto& polygonControlReg = LatteGPUState.contextNew.PA_SU_SC_MODE_CNTL; - uint32 cullFront = polygonControlReg.get_CULL_FRONT(); - uint32 cullBack = polygonControlReg.get_CULL_BACK(); - if (cullFront && cullBack) - m_state.m_skipDrawSequence = true; - - // TODO: is this even needed? - if (!m_state.m_activeFBO) - m_state.m_skipDrawSequence = true; } void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 instanceCount, uint32 count, MPTR indexDataMPTR, Latte::LATTE_VGT_DMA_INDEX_TYPE::E_INDEX_TYPE indexType, bool isFirst) @@ -1065,23 +1053,23 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 // Cull mode - // Handled in draw_beginSequence - if (cullFront && cullBack) - cemu_assert_suspicious(); + // Cull front and back is handled by disabling rasterization + if (!(cullFront && cullBack)) + { + MTL::CullMode cullMode; + if (cullFront) + cullMode = MTL::CullModeFront; + else if (cullBack) + cullMode = MTL::CullModeBack; + else + cullMode = MTL::CullModeNone; - MTL::CullMode cullMode; - if (cullFront) - cullMode = MTL::CullModeFront; - else if (cullBack) - cullMode = MTL::CullModeBack; - else - cullMode = MTL::CullModeNone; - - if (cullMode != encoderState.m_cullMode) - { - renderCommandEncoder->setCullMode(cullMode); - encoderState.m_cullMode = cullMode; - } + if (cullMode != encoderState.m_cullMode) + { + renderCommandEncoder->setCullMode(cullMode); + encoderState.m_cullMode = cullMode; + } + } // Front face MTL::Winding frontFaceWinding; @@ -1164,12 +1152,8 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 else renderPipelineState = m_pipelineCache->GetRenderPipelineState(fetchShader, vertexShader, pixelShader, m_state.m_lastUsedFBO, m_state.m_activeFBO, LatteGPUState.contextNew); - // HACK if (!renderPipelineState) - { - printf("invalid render pipeline state, skipping draw\n"); return; - } if (renderPipelineState != encoderState.m_renderPipelineState) {