From b8e9bb355c3c2a30b960deb084cd63e70b938e63 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Sat, 17 Aug 2024 19:05:00 +0200 Subject: [PATCH] fix: GPU hangs (hack) --- .../LatteDecompilerEmitMSL.cpp | 4 ++ .../HW/Latte/Renderer/Metal/MetalRenderer.cpp | 53 +++++++++++++++++-- 2 files changed, 54 insertions(+), 3 deletions(-) diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp index 95e91d40..a46da96c 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp @@ -3284,6 +3284,8 @@ static void _emitCFRingWriteCode(LatteDecompilerShaderContext* shaderContext, La continue; uint32 u32Offset = streamWrite->exportArrayBase + i; + // HACK: disable streamout temporarily, since it causes GPU hangs + continue; src->addFmt("sb[sbBase{} + {}]", streamWrite->bufferIndex, u32Offset); src->add(" = "); @@ -3397,6 +3399,8 @@ static void _emitStreamWriteCode(LatteDecompilerShaderContext* shaderContext, La continue; uint32 u32Offset = cfInstruction->exportArrayBase + i; + // HACK: disable streamout temporarily, since it causes GPU hangs + continue; src->addFmt("sb[sbBase{} + {}]", streamoutBufferIndex, u32Offset); src->add(" = "); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index a9245383..ca4c9938 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -591,8 +591,6 @@ void MetalRenderer::surfaceCopy_copySurfaceWithFormatConversion(LatteTexture* so // TODO: do the copy in a compute shader debug_printf("surfaceCopy_copySurfaceWithFormatConversion: no active render command encoder, skipping copy\n"); } - - // TODO: restore state } void MetalRenderer::bufferCache_init(const sint32 bufferSize) @@ -745,7 +743,13 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 const auto fetchShader = LatteSHRC_GetActiveFetchShader(); // Depth stencil state - MTL::DepthStencilState* depthStencilState = m_depthStencilCache->GetDepthStencilState(LatteGPUState.contextNew); + auto depthStencilContext = LatteGPUState.contextNew; + + // Disable depth write when there is no depth attachment + if (!m_state.m_lastUsedFBO->depthBuffer.texture) + depthStencilContext.DB_DEPTH_CONTROL.set_Z_WRITE_ENABLE(false); + + MTL::DepthStencilState* depthStencilState = m_depthStencilCache->GetDepthStencilState(depthStencilContext); if (depthStencilState != encoderState.m_depthStencilState) { renderCommandEncoder->setDepthStencilState(depthStencilState); @@ -1286,6 +1290,49 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE continue; } + if (textureDim == Latte::E_DIM::DIM_1D && (textureView->dim != Latte::E_DIM::DIM_1D)) + { + switch (shader->shaderType) + { + case LatteConst::ShaderType::Vertex: + { + renderCommandEncoder->setVertexTexture(m_nullTexture1D, binding); + renderCommandEncoder->setVertexSamplerState(m_nearestSampler, binding); + break; + } + case LatteConst::ShaderType::Pixel: + { + renderCommandEncoder->setFragmentTexture(m_nullTexture1D, binding); + renderCommandEncoder->setVertexSamplerState(m_nearestSampler, binding); + break; + } + default: + UNREACHABLE; + } + continue; + } + else if (textureDim == Latte::E_DIM::DIM_2D && (textureView->dim != Latte::E_DIM::DIM_2D && textureView->dim != Latte::E_DIM::DIM_2D_MSAA)) + { + switch (shader->shaderType) + { + case LatteConst::ShaderType::Vertex: + { + renderCommandEncoder->setVertexTexture(m_nullTexture2D, binding); + renderCommandEncoder->setVertexSamplerState(m_nearestSampler, binding); + break; + } + case LatteConst::ShaderType::Pixel: + { + renderCommandEncoder->setFragmentTexture(m_nullTexture2D, binding); + renderCommandEncoder->setVertexSamplerState(m_nearestSampler, binding); + break; + } + default: + UNREACHABLE; + } + continue; + } + LatteTexture* baseTexture = textureView->baseTexture; uint32 stageSamplerIndex = shader->textureUnitSamplerAssignment[relative_textureUnit];