diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp index 8cbbbe37..17ff27ea 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp @@ -2187,6 +2187,7 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex } auto texDim = shaderContext->shader->textureUnitDim[texInstruction->textureFetch.textureIndex]; + bool isCompare = shaderContext->shader->textureUsesDepthCompare[texInstruction->textureFetch.textureIndex]; char tempBuffer0[32]; char tempBuffer1[32]; @@ -2212,6 +2213,7 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex } // texture sampler opcode uint32 texOpcode = texInstruction->opcode; + // TODO: is this needed? if (shaderContext->shaderType == LatteConst::ShaderType::Vertex) { // vertex shader forces LOD to zero, but certain sampler types don't support textureLod(...) API @@ -2275,7 +2277,10 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex } else { - src->addFmt("sample(samplr{}, ", texInstruction->textureFetch.textureIndex); + src->add("sample"); + if (isCompare) + src->add("_compare"); + src->addFmt("(samplr{}, ", texInstruction->textureFetch.textureIndex); } // for textureGather() add shift (todo: depends on rounding mode set in sampler registers?) @@ -2493,61 +2498,68 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex src->addFmt(",int3({},{},{})", texInstruction->textureFetch.offsetX/2, texInstruction->textureFetch.offsetY/2, texInstruction->textureFetch.offsetZ/2); } } - // lod bias - if( texOpcode == GPU7_TEX_INST_SAMPLE_C || texOpcode == GPU7_TEX_INST_SAMPLE_C_LZ ) - { - src->add(")."); - if (numWrittenElements > 1) - { - // result is copied into multiple channels - for (sint32 f = 0; f < numWrittenElements; f++) - { - cemu_assert_debug(texInstruction->dstSel[f] == 0); // only x component is defined - src->add("x"); - } - } - else - { - src->add("x"); - } - } - else - { - src->add(")."); - for (sint32 f = 0; f < 4; f++) - { - if( texInstruction->dstSel[f] < 4 ) - { - uint8 elemIndex = texInstruction->dstSel[f]; - if (texOpcode == GPU7_TEX_INST_FETCH4) - { - // 's textureGather() and GPU7's FETCH4 instruction have a different order of elements - // xyzw: top-left, top-right, bottom-right, bottom-left - // textureGather xyzw - // fetch4 yzxw - // translate index from fetch4 to textureGather order - static uint8 fetchToGather[4] = - { - 2, // x -> z - 0, // y -> x - 1, // z -> y - 3, // w -> w - }; - elemIndex = fetchToGather[elemIndex]; - } - src->add(resultElemTable[elemIndex]); - numWrittenElements++; - } - else if( texInstruction->dstSel[f] == 7 ) - { - // masked and not written - } - else - { - cemu_assert_unimplemented(); - } - } + // lod bias (TODO: wht?) + + src->add(")"); + // sample_compare doesn't return a float + if (!isCompare) + { + if( texOpcode == GPU7_TEX_INST_SAMPLE_C || texOpcode == GPU7_TEX_INST_SAMPLE_C_LZ ) + { + src->add("."); + + if (numWrittenElements > 1) + { + // result is copied into multiple channels + for (sint32 f = 0; f < numWrittenElements; f++) + { + cemu_assert_debug(texInstruction->dstSel[f] == 0); // only x component is defined + src->add("x"); + } + } + else + { + src->add("x"); + } + } + else + { + src->add("."); + for (sint32 f = 0; f < 4; f++) + { + if( texInstruction->dstSel[f] < 4 ) + { + uint8 elemIndex = texInstruction->dstSel[f]; + if (texOpcode == GPU7_TEX_INST_FETCH4) + { + // 's textureGather() and GPU7's FETCH4 instruction have a different order of elements + // xyzw: top-left, top-right, bottom-right, bottom-left + // textureGather xyzw + // fetch4 yzxw + // translate index from fetch4 to textureGather order + static uint8 fetchToGather[4] = + { + 2, // x -> z + 0, // y -> x + 1, // z -> y + 3, // w -> w + }; + elemIndex = fetchToGather[elemIndex]; + } + src->add(resultElemTable[elemIndex]); + numWrittenElements++; + } + else if( texInstruction->dstSel[f] == 7 ) + { + // masked and not written + } + else + { + cemu_assert_unimplemented(); + } + } + } } src->add(");"); diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp index 114cd6fa..6a696e11 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp @@ -224,6 +224,7 @@ namespace LatteDecompiler src->add("#define GET_FRAGCOORD() vec4(in.position.xy * supportBuffer.fragCoordScale.xy, in.position.z, 1.0 / in.position.w)" _CRLF); src->add("struct FragmentIn {" _CRLF); + src->add("float4 position [[position]];" _CRLF); LatteShaderPSInputTable* psInputTable = LatteSHRC_GetPSInputTable(); for (sint32 i = 0; i < psInputTable->count; i++) @@ -271,7 +272,7 @@ namespace LatteDecompiler // generate depth output for pixel shader if (decompilerContext->shader->depthWritten) { - src->add("float passDepth [[depth]];" _CRLF); + src->add("float passDepth [[depth(any)]];" _CRLF); } src->add("};" _CRLF _CRLF); @@ -323,26 +324,31 @@ namespace LatteDecompiler src->add(", "); + if (shaderContext->shader->textureUsesDepthCompare[i]) + src->add("depth"); + else + src->add("texture"); + if (shaderContext->shader->textureIsIntegerFormat[i]) { // integer samplers if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_1D) - src->add("texture1d"); + src->add("1d"); else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D || shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D_MSAA) - src->add("texture2d"); + src->add("2d"); else cemu_assert_unimplemented(); } else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D || shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D_MSAA) - src->add("texture2d"); + src->add("2d"); else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_1D) - src->add("texture1d"); + src->add("1d"); else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D_ARRAY) - src->add("texture2d_array"); + src->add("2d_array"); else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_CUBEMAP) - src->add("texturecube_array"); + src->add("cube_array"); else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_3D) - src->add("texture3d"); + src->add("3d"); else { cemu_assert_unimplemented(); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.cpp index edf720fb..4a6ceeb4 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.cpp @@ -39,11 +39,12 @@ MTL::Texture* LatteTextureViewMtl::GetSwizzledView(uint32 gpuSamplerSwizzle) sint32 freeIndex = -1; for (sint32 i = 0; i < std::size(m_viewCache); i++) { - if (m_viewCache[i].key == gpuSamplerSwizzle) + const auto& entry = m_viewCache[i]; + if (entry.key == gpuSamplerSwizzle) { - return m_viewCache[i].texture; + return entry.texture; } - else if (m_viewCache[i].key == INVALID_SWIZZLE && freeIndex == -1) + else if (entry.key == INVALID_SWIZZLE && freeIndex == -1) { freeIndex = i; } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h b/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h index 7a5a9dfa..fc05de5f 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h @@ -29,7 +29,7 @@ private: struct { uint32 key; MTL::Texture* texture; - } m_viewCache[4] = {{INVALID_SWIZZLE, nullptr}}; + } m_viewCache[4] = {{INVALID_SWIZZLE, nullptr}, {INVALID_SWIZZLE, nullptr}, {INVALID_SWIZZLE, nullptr}, {INVALID_SWIZZLE, nullptr}}; std::unordered_map m_fallbackViewCache; MTL::Texture* CreateSwizzledView(uint32 gpuSamplerSwizzle); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalDepthStencilCache.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalDepthStencilCache.cpp index 4734ae42..b8f3fc52 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalDepthStencilCache.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalDepthStencilCache.cpp @@ -30,12 +30,10 @@ MTL::DepthStencilState* MetalDepthStencilCache::GetDepthStencilState(const Latte MTL::DepthStencilDescriptor* desc = MTL::DepthStencilDescriptor::alloc()->init(); desc->setDepthWriteEnabled(depthWriteEnable); - auto depthCompareFunc = GetMtlCompareFunc(depthFunc); - if (!depthEnable) + if (depthEnable) { - depthCompareFunc = MTL::CompareFunctionAlways; + desc->setDepthCompareFunction(GetMtlCompareFunc(depthFunc)); } - desc->setDepthCompareFunction(depthCompareFunc); // Stencil state bool stencilEnable = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_ENABLE(); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp index c19d8596..40832aa3 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp @@ -105,6 +105,7 @@ MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(MTL::Bu { memcpy(newPtr + elem * newStride, oldPtr + elem * stride, stride); } + debug_printf("Restrided vertex buffer (old stride: %zu, new stride: %zu, old size: %zu, new size: %zu)\n", stride, newStride, vertexBufferRange->size, newSize); restrideInfo.memoryInvalidated = false; restrideInfo.lastStride = newStride; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp index 2462f370..c1be4dbb 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp @@ -43,7 +43,6 @@ MTL::RenderPipelineState* MetalPipelineCache::GetPipelineState(const LatteFetchS auto attribute = vertexDescriptor->attributes()->object(semanticId); attribute->setOffset(attr.offset); - // Bind from the end to not conflict with uniform buffers attribute->setBufferIndex(GET_MTL_VERTEX_BUFFER_INDEX(attr.attributeBufferIndex)); attribute->setFormat(GetMtlVertexFormat(attr.format)); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index 51323eec..5d35baa3 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -19,6 +19,7 @@ #include "Metal/MTLResource.hpp" #include "Metal/MTLTypes.hpp" #include "gui/guiWrapper.h" +#include extern bool hasValidFramebufferAttached; @@ -596,8 +597,7 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 debug_printf("no vertex function, skipping draw\n"); return; } - - auto fetchShader = vertexShader->compatibleFetchShader; + const auto fetchShader = LatteSHRC_GetActiveFetchShader(); // Render pipeline state MTL::RenderPipelineState* renderPipelineState = m_pipelineCache->GetPipelineState(fetchShader, vertexShader, pixelShader, m_state.activeFBO, LatteGPUState.contextNew);