From 67a64c9fe9d5466ab9b52c2bd49881e79925b78f Mon Sep 17 00:00:00 2001 From: Samuliak Date: Thu, 22 Aug 2024 13:58:50 +0200 Subject: [PATCH] rework the binding system --- src/Cafe/HW/Latte/Core/LatteShader.cpp | 6 +- .../LegacyShaderDecompiler/LatteDecompiler.h | 5 +- .../LatteDecompilerAnalyzer.cpp | 24 ++- .../LatteDecompilerEmitMSLHeader.hpp | 51 +++-- .../LatteDecompilerInternal.h | 2 + .../HW/Latte/Renderer/Metal/MetalCommon.h | 2 - .../Renderer/Metal/MetalMemoryManager.cpp | 19 +- .../HW/Latte/Renderer/Metal/MetalRenderer.cpp | 179 +++++++++--------- .../HW/Latte/Renderer/Metal/MetalRenderer.h | 19 +- .../Renderer/Metal/RendererShaderMtl.cpp | 22 ++- 10 files changed, 168 insertions(+), 161 deletions(-) diff --git a/src/Cafe/HW/Latte/Core/LatteShader.cpp b/src/Cafe/HW/Latte/Core/LatteShader.cpp index 66539a76..d17fd57d 100644 --- a/src/Cafe/HW/Latte/Core/LatteShader.cpp +++ b/src/Cafe/HW/Latte/Core/LatteShader.cpp @@ -617,10 +617,12 @@ LatteDecompilerShader* LatteShader_CreateShaderFromDecompilerOutput(LatteDecompi shader->baseHash = baseHash; // copy resource mapping // HACK - if (g_renderer->GetType() != RendererAPI::OpenGL) + if (g_renderer->GetType() == RendererAPI::Vulkan) shader->resourceMapping = decompilerOutput.resourceMappingVK; - else + else if (g_renderer->GetType() == RendererAPI::OpenGL) shader->resourceMapping = decompilerOutput.resourceMappingGL; + else + shader->resourceMapping = decompilerOutput.resourceMappingMTL; // copy texture info shader->textureUnitMask2 = decompilerOutput.textureUnitMask; // copy streamout info diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h index 57df13b1..5d8b2c6f 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h @@ -57,12 +57,14 @@ struct LatteDecompilerShaderResourceMapping // texture sint8 textureUnitToBindingPoint[LATTE_NUM_MAX_TEX_UNITS]; // uniform buffer - sint8 uniformVarsBufferBindingPoint{}; // special block for uniform registers/remapped array/custom variables + sint8 uniformVarsBufferBindingPoint{-1}; // special block for uniform registers/remapped array/custom variables sint8 uniformBuffersBindingPoint[LATTE_NUM_MAX_UNIFORM_BUFFERS]; // shader storage buffer for transform feedback (if alternative mode is used) sint8 tfStorageBindingPoint{-1}; // attributes (vertex shader only) sint8 attributeMapping[LATTE_NUM_MAX_ATTRIBUTE_LOCATIONS]; + // Metal exclusive + sint8 indexBufferBinding{-1}; sint32 getTextureCount() { @@ -288,6 +290,7 @@ struct LatteDecompilerOutput_t // mapping and binding information LatteDecompilerShaderResourceMapping resourceMappingGL; LatteDecompilerShaderResourceMapping resourceMappingVK; + LatteDecompilerShaderResourceMapping resourceMappingMTL; }; struct LatteDecompilerSubroutineInfo; diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp index e84e4851..9a3db895 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp @@ -498,6 +498,18 @@ namespace LatteDecompiler } } + void _initTextureBindingPointsMTL(LatteDecompilerShaderContext* decompilerContext) + { + // for Vulkan we use consecutive indices + for (sint32 i = 0; i < LATTE_NUM_MAX_TEX_UNITS; i++) + { + if (!decompilerContext->output->textureUnitMask[i]) + continue; + decompilerContext->output->resourceMappingMTL.textureUnitToBindingPoint[i] = decompilerContext->currentTextureBindingPointMTL; + decompilerContext->currentTextureBindingPointMTL++; + } + } + void _initHasUniformVarBlock(LatteDecompilerShaderContext* decompilerContext) { decompilerContext->hasUniformVarBlock = false; @@ -552,14 +564,13 @@ namespace LatteDecompiler } } // assign binding point to uniform var block - decompilerContext->output->resourceMappingGL.uniformVarsBufferBindingPoint = -1; // OpenGL currently doesnt use a uniform block if (decompilerContext->hasUniformVarBlock) { decompilerContext->output->resourceMappingVK.uniformVarsBufferBindingPoint = decompilerContext->currentBindingPointVK; decompilerContext->currentBindingPointVK++; + decompilerContext->output->resourceMappingMTL.uniformVarsBufferBindingPoint = decompilerContext->currentBufferBindingPointMTL; + decompilerContext->currentBufferBindingPointMTL++; } - else - decompilerContext->output->resourceMappingVK.uniformVarsBufferBindingPoint = -1; // assign binding points to uniform buffers if (decompilerContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CBANK) { @@ -578,6 +589,8 @@ namespace LatteDecompiler decompilerContext->output->resourceMappingVK.uniformBuffersBindingPoint[i] = decompilerContext->currentBindingPointVK; decompilerContext->currentBindingPointVK++; + decompilerContext->output->resourceMappingMTL.uniformBuffersBindingPoint[i] = decompilerContext->currentBufferBindingPointMTL; + decompilerContext->currentBufferBindingPointMTL++; } // for OpenGL we use the relative buffer index for (uint32 i = 0; i < LATTE_NUM_MAX_UNIFORM_BUFFERS; i++) @@ -599,6 +612,8 @@ namespace LatteDecompiler { decompilerContext->output->resourceMappingVK.tfStorageBindingPoint = decompilerContext->currentBindingPointVK; decompilerContext->currentBindingPointVK++; + decompilerContext->output->resourceMappingMTL.tfStorageBindingPoint = decompilerContext->currentBufferBindingPointMTL; + decompilerContext->currentBufferBindingPointMTL++; } } @@ -615,6 +630,7 @@ namespace LatteDecompiler { decompilerContext->output->resourceMappingGL.attributeMapping[i] = bindingIndex; decompilerContext->output->resourceMappingVK.attributeMapping[i] = bindingIndex; + decompilerContext->output->resourceMappingMTL.attributeMapping[i] = bindingIndex; bindingIndex++; } } @@ -1000,6 +1016,8 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD shaderContext->output->resourceMappingVK.setIndex = 2; LatteDecompiler::_initTextureBindingPointsGL(shaderContext); LatteDecompiler::_initTextureBindingPointsVK(shaderContext); + LatteDecompiler::_initTextureBindingPointsMTL(shaderContext); LatteDecompiler::_initUniformBindingPoints(shaderContext); LatteDecompiler::_initAttributeBindingPoints(shaderContext); + shaderContext->output->resourceMappingMTL.indexBufferBinding = shaderContext->currentBufferBindingPointMTL++; } diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp index 5e1b4c11..53332f7c 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp @@ -129,7 +129,7 @@ namespace LatteDecompiler if (!decompilerContext->analyzer.uniformBufferAccessTracker[i].HasAccess()) continue; - cemu_assert_debug(decompilerContext->output->resourceMappingVK.uniformBuffersBindingPoint[i] >= 0); + cemu_assert_debug(decompilerContext->output->resourceMappingMTL.uniformBuffersBindingPoint[i] >= 0); shaderSrc->addFmt("struct UBuff{} {{" _CRLF, i); shaderSrc->addFmt("float4 d[{}];" _CRLF, decompilerContext->analyzer.uniformBufferAccessTracker[i].DetermineSize(decompilerContext->shaderBaseHash, LATTE_GLSL_DYNAMIC_UNIFORM_BLOCK_SIZE)); @@ -167,13 +167,13 @@ namespace LatteDecompiler { if (decompilerContext->analyzer.inputAttributSemanticMask[i]) { - cemu_assert_debug(decompilerContext->output->resourceMappingVK.attributeMapping[i] >= 0); + cemu_assert_debug(decompilerContext->output->resourceMappingMTL.attributeMapping[i] >= 0); src->addFmt("uint4 attrDataSem{}", i); if (decompilerContext->options->usesGeometryShader) - attributeNames += "#define ATTRIBUTE_NAME" + std::to_string((sint32)decompilerContext->output->resourceMappingVK.attributeMapping[i]) + " attrDataSem" + std::to_string(i) + "\n"; + attributeNames += "#define ATTRIBUTE_NAME" + std::to_string((sint32)decompilerContext->output->resourceMappingMTL.attributeMapping[i]) + " attrDataSem" + std::to_string(i) + "\n"; else - src->addFmt(" [[attribute({})]]", (sint32)decompilerContext->output->resourceMappingVK.attributeMapping[i]); + src->addFmt(" [[attribute({})]]", (sint32)decompilerContext->output->resourceMappingMTL.attributeMapping[i]); src->add(";" _CRLF); } } @@ -424,9 +424,9 @@ namespace LatteDecompiler if (!decompilerContext->analyzer.uniformBufferAccessTracker[i].HasAccess()) continue; - cemu_assert_debug(decompilerContext->output->resourceMappingVK.uniformBuffersBindingPoint[i] >= 0); + cemu_assert_debug(decompilerContext->output->resourceMappingMTL.uniformBuffersBindingPoint[i] >= 0); - src->addFmt(", constant UBuff{}& ubuff{} [[buffer({})]]", i, i, (sint32)decompilerContext->output->resourceMappingVK.uniformBuffersBindingPoint[i]); + src->addFmt(", constant UBuff{}& ubuff{} [[buffer({})]]", i, i, (sint32)decompilerContext->output->resourceMappingMTL.uniformBuffersBindingPoint[i]); } } } @@ -472,8 +472,8 @@ namespace LatteDecompiler cemu_assert_unimplemented(); } - uint32 binding = shaderContext->output->resourceMappingVK.textureUnitToBindingPoint[i]; - //uint32 textureBinding = shaderContext->output->resourceMappingVK.textureUnitToBindingPoint[i] % 31; + uint32 binding = shaderContext->output->resourceMappingMTL.textureUnitToBindingPoint[i]; + //uint32 textureBinding = shaderContext->output->resourceMappingMTL.textureUnitToBindingPoint[i] % 31; //uint32 samplerBinding = textureBinding % 16; src->addFmt(" tex{} [[texture({})]]", i, binding); src->addFmt(", sampler samplr{} [[sampler({})]]", i, binding); @@ -484,51 +484,44 @@ namespace LatteDecompiler { auto src = decompilerContext->shaderSource; - switch (decompilerContext->shaderType) - { - case LatteConst::ShaderType::Vertex: - if (!(decompilerContext->options->usesGeometryShader || isRectVertexShader)) - src->add("VertexIn in [[stage_in]], "); - break; - case LatteConst::ShaderType::Pixel: - src->add("FragmentIn in [[stage_in]], "); - break; - default: - break; - } - - src->add("constant SupportBuffer& supportBuffer [[buffer(30)]]"); switch (decompilerContext->shaderType) { case LatteConst::ShaderType::Vertex: if (decompilerContext->options->usesGeometryShader || isRectVertexShader) { - src->add(", object_data ObjectPayload& objectPayload [[payload]]"); - src->add(", mesh_grid_properties meshGridProperties"); - src->add(", uint tig [[threadgroup_position_in_grid]]"); - src->add(", uint tid [[thread_index_in_threadgroup]]"); - src->add(" VERTEX_BUFFER_DEFINITIONS"); + src->add("object_data ObjectPayload& objectPayload [[payload]]"); + src->add(", mesh_grid_properties meshGridProperties"); + src->add(", uint tig [[threadgroup_position_in_grid]]"); + src->add(", uint tid [[thread_index_in_threadgroup]]"); + src->add(" VERTEX_BUFFER_DEFINITIONS"); } else { + src->add("VertexIn in [[stage_in]]"); src->add(", uint vid [[vertex_id]]"); src->add(", uint iid [[instance_id]]"); } break; case LatteConst::ShaderType::Geometry: - src->add(", MeshType mesh"); + src->add("MeshType mesh"); src->add(", const object_data ObjectPayload& objectPayload [[payload]]"); break; case LatteConst::ShaderType::Pixel: + src->add("FragmentIn in [[stage_in]]"); src->add(", bool frontFacing [[front_facing]]"); + break; + default: break; } + if (decompilerContext->output->resourceMappingMTL.uniformVarsBufferBindingPoint >= 0) + src->addFmt(", constant SupportBuffer& supportBuffer [[buffer({})]]", decompilerContext->output->resourceMappingMTL.uniformVarsBufferBindingPoint); + // streamout buffer (transform feedback) if ((decompilerContext->shaderType == LatteConst::ShaderType::Vertex && !decompilerContext->options->usesGeometryShader) || decompilerContext->shaderType == LatteConst::ShaderType::Geometry) { if (decompilerContext->analyzer.hasStreamoutEnable && decompilerContext->analyzer.hasStreamoutWrite) - src->addFmt(", device int* sb [[buffer({})]]" _CRLF, decompilerContext->output->resourceMappingVK.tfStorageBindingPoint); + src->addFmt(", device int* sb [[buffer({})]]" _CRLF, decompilerContext->output->resourceMappingMTL.tfStorageBindingPoint); } // uniform buffers diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInternal.h b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInternal.h index 4b85d458..f4135640 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInternal.h +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInternal.h @@ -260,6 +260,8 @@ struct LatteDecompilerShaderContext // emitter bool hasUniformVarBlock; sint32 currentBindingPointVK{}; + sint32 currentBufferBindingPointMTL{}; + sint32 currentTextureBindingPointMTL{}; struct ALUClauseTemporariesState* aluPVPSState{nullptr}; // misc std::vector list_subroutines; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h index 926af5f4..84956786 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h @@ -23,8 +23,6 @@ struct MetalPixelFormatSupport #define MAX_MTL_BUFFERS 31 // Buffer index 30 is reserved for the support buffer, buffer indices 27-29 are reserved for the helper shaders #define GET_MTL_VERTEX_BUFFER_INDEX(index) (MAX_MTL_BUFFERS - index - 5) -// TODO: don't harcdode the support buffer binding -#define MTL_SUPPORT_BUFFER_BINDING 30 #define MAX_MTL_TEXTURES 31 #define MAX_MTL_SAMPLERS 16 diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp index 93c6ec86..f0c12217 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp @@ -2,6 +2,7 @@ #include "Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalHybridComputePipeline.h" #include "Common/precompiled.h" +#include "HW/Latte/Renderer/Metal/MetalRenderer.h" MetalVertexBufferCache::~MetalVertexBufferCache() { @@ -42,11 +43,8 @@ MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(MTL::Bu renderCommandEncoder->setRenderPipelineState(m_restrideBufferPipeline->GetRenderPipelineState()); m_mtlr->GetEncoderState().m_renderPipelineState = m_restrideBufferPipeline->GetRenderPipelineState(); - MTL::Buffer* buffers[] = {bufferCache, buffer}; - size_t offsets[] = {vertexBufferRange.offset, restrideInfo.allocation.offset}; - renderCommandEncoder->setVertexBuffers(buffers, offsets, NS::Range(GET_HELPER_BUFFER_BINDING(0), 2)); - m_mtlr->GetEncoderState().m_uniformBufferOffsets[METAL_SHADER_TYPE_VERTEX][GET_HELPER_BUFFER_BINDING(0)] = INVALID_OFFSET; - m_mtlr->GetEncoderState().m_uniformBufferOffsets[METAL_SHADER_TYPE_VERTEX][GET_HELPER_BUFFER_BINDING(1)] = INVALID_OFFSET; + m_mtlr->SetBuffer(renderCommandEncoder, METAL_SHADER_TYPE_VERTEX, bufferCache, vertexBufferRange.offset, GET_HELPER_BUFFER_BINDING(0)); + m_mtlr->SetBuffer(renderCommandEncoder, METAL_SHADER_TYPE_VERTEX, buffer, restrideInfo.allocation.offset, GET_HELPER_BUFFER_BINDING(1)); struct { @@ -54,16 +52,7 @@ MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(MTL::Bu uint32 newStride; } strideData = {static_cast(stride), static_cast(newStride)}; renderCommandEncoder->setVertexBytes(&strideData, sizeof(strideData), GET_HELPER_BUFFER_BINDING(2)); - m_mtlr->GetEncoderState().m_uniformBufferOffsets[METAL_SHADER_TYPE_VERTEX][GET_HELPER_BUFFER_BINDING(2)] = INVALID_OFFSET; - - // TODO: remove - uint32 vertexCount = vertexBufferRange.size / stride; - if (vertexCount * strideData.oldStride > buffers[0]->length() - offsets[0]) { - throw std::runtime_error("Source buffer overflow (" + std::to_string(vertexCount) + " * " + std::to_string(strideData.oldStride) + " > " + std::to_string(buffers[0]->length()) + " - " + std::to_string(offsets[0]) + ")"); - } - if (vertexCount * strideData.newStride > buffers[1]->length() - offsets[1]) { - throw std::runtime_error("Destination buffer overflow (" + std::to_string(vertexCount) + " * " + std::to_string(strideData.newStride) + " > " + std::to_string(buffers[1]->length()) + " - " + std::to_string(offsets[1]) + ")"); - } + m_mtlr->GetEncoderState().m_buffers[METAL_SHADER_TYPE_VERTEX][GET_HELPER_BUFFER_BINDING(2)] = {nullptr}; renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), vertexBufferRange.size / stride); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index 64141ed1..1996ff46 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -30,63 +30,6 @@ extern bool hasValidFramebufferAttached; float supportBufferData[512 * 4]; -void SetBuffer(MTL::RenderCommandEncoder* renderCommandEncoder, MetalShaderType shaderType, MTL::Buffer* buffer, size_t offset, uint32 index) -{ - switch (shaderType) - { - case METAL_SHADER_TYPE_VERTEX: - renderCommandEncoder->setVertexBuffer(buffer, offset, index); - break; - case METAL_SHADER_TYPE_OBJECT: - renderCommandEncoder->setObjectBuffer(buffer, offset, index); - break; - case METAL_SHADER_TYPE_MESH: - renderCommandEncoder->setMeshBuffer(buffer, offset, index); - break; - case METAL_SHADER_TYPE_FRAGMENT: - renderCommandEncoder->setFragmentBuffer(buffer, offset, index); - break; - } -} - -void SetTexture(MTL::RenderCommandEncoder* renderCommandEncoder, MetalShaderType shaderType, MTL::Texture* texture, uint32 index) -{ - switch (shaderType) - { - case METAL_SHADER_TYPE_VERTEX: - renderCommandEncoder->setVertexTexture(texture, index); - break; - case METAL_SHADER_TYPE_OBJECT: - renderCommandEncoder->setObjectTexture(texture, index); - break; - case METAL_SHADER_TYPE_MESH: - renderCommandEncoder->setMeshTexture(texture, index); - break; - case METAL_SHADER_TYPE_FRAGMENT: - renderCommandEncoder->setFragmentTexture(texture, index); - break; - } -} - -void SetSamplerState(MTL::RenderCommandEncoder* renderCommandEncoder, MetalShaderType shaderType, MTL::SamplerState* samplerState, uint32 index) -{ - switch (shaderType) - { - case METAL_SHADER_TYPE_VERTEX: - renderCommandEncoder->setVertexSamplerState(samplerState, index); - break; - case METAL_SHADER_TYPE_OBJECT: - renderCommandEncoder->setObjectSamplerState(samplerState, index); - break; - case METAL_SHADER_TYPE_MESH: - renderCommandEncoder->setMeshSamplerState(samplerState, index); - break; - case METAL_SHADER_TYPE_FRAGMENT: - renderCommandEncoder->setFragmentSamplerState(samplerState, index); - break; - } -} - MetalRenderer::MetalRenderer() { m_device = MTL::CreateSystemDefaultDevice(); @@ -646,8 +589,6 @@ void MetalRenderer::surfaceCopy_copySurfaceWithFormatConversion(LatteTexture* so return; } - MTL::Texture* textures[] = {srcTextureMtl->GetTexture(), dstTextureMtl->GetTexture()}; - struct CopyParams { uint32 width; @@ -664,11 +605,10 @@ void MetalRenderer::surfaceCopy_copySurfaceWithFormatConversion(LatteTexture* so renderCommandEncoder->setRenderPipelineState(m_copyTextureToTexturePipeline->GetRenderPipelineState()); m_state.m_encoderState.m_renderPipelineState = m_copyTextureToTexturePipeline->GetRenderPipelineState(); - renderCommandEncoder->setVertexTextures(textures, NS::Range(GET_HELPER_TEXTURE_BINDING(0), 2)); - m_state.m_encoderState.m_textures[METAL_SHADER_TYPE_VERTEX][GET_HELPER_TEXTURE_BINDING(0)] = {(LatteTextureViewMtl*)textures[0]}; - m_state.m_encoderState.m_textures[METAL_SHADER_TYPE_VERTEX][GET_HELPER_TEXTURE_BINDING(1)] = {(LatteTextureViewMtl*)textures[1]}; + SetTexture(renderCommandEncoder, METAL_SHADER_TYPE_VERTEX, srcTextureMtl->GetTexture(), GET_HELPER_TEXTURE_BINDING(0)); + SetTexture(renderCommandEncoder, METAL_SHADER_TYPE_VERTEX, dstTextureMtl->GetTexture(), GET_HELPER_TEXTURE_BINDING(1)); renderCommandEncoder->setVertexBytes(¶ms, sizeof(params), GET_HELPER_BUFFER_BINDING(0)); - m_state.m_encoderState.m_uniformBufferOffsets[METAL_SHADER_TYPE_VERTEX][GET_HELPER_BUFFER_BINDING(0)] = INVALID_OFFSET; + m_state.m_encoderState.m_buffers[METAL_SHADER_TYPE_VERTEX][GET_HELPER_BUFFER_BINDING(0)] = {nullptr}; renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangle, NS::UInteger(0), NS::UInteger(3)); } @@ -1041,10 +981,7 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 } // Bind - if (true) - { - SetBuffer(renderCommandEncoder, GetMtlShaderType(vertexShader->shaderType, usesGeometryShader), buffer, offset, GET_MTL_VERTEX_BUFFER_INDEX(i)); - } + SetBuffer(renderCommandEncoder, GetMtlShaderType(vertexShader->shaderType, usesGeometryShader), buffer, offset, GET_MTL_VERTEX_BUFFER_INDEX(i)); } } @@ -1076,9 +1013,8 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 indexBuffer = m_memoryManager->GetTemporaryBufferAllocator().GetBuffer(indexBufferIndex); if (usesGeometryShader) { - // TODO: don't hardcode the index if (indexBuffer) - renderCommandEncoder->setObjectBuffer(indexBuffer, indexBufferOffset, 20); + SetBuffer(renderCommandEncoder, METAL_SHADER_TYPE_OBJECT, indexBuffer, indexBufferOffset, vertexShader->resourceMapping.indexBufferBinding); uint32 verticesPerPrimitive = 0; switch (primitiveMode) @@ -1154,6 +1090,83 @@ void MetalRenderer::indexData_uploadIndexMemory(uint32 bufferIndex, uint32 offse buffer->didModifyRange(NS::Range(offset, size)); } +void MetalRenderer::SetBuffer(MTL::RenderCommandEncoder* renderCommandEncoder, MetalShaderType shaderType, MTL::Buffer* buffer, size_t offset, uint32 index) +{ + auto& boundBuffer = m_state.m_encoderState.m_buffers[shaderType][index]; + if (buffer == boundBuffer.m_buffer && offset == boundBuffer.m_offset) + return; + + // TODO: only set the offset if only offset changed + + boundBuffer = {buffer, offset}; + + switch (shaderType) + { + case METAL_SHADER_TYPE_VERTEX: + renderCommandEncoder->setVertexBuffer(buffer, offset, index); + break; + case METAL_SHADER_TYPE_OBJECT: + renderCommandEncoder->setObjectBuffer(buffer, offset, index); + break; + case METAL_SHADER_TYPE_MESH: + renderCommandEncoder->setMeshBuffer(buffer, offset, index); + break; + case METAL_SHADER_TYPE_FRAGMENT: + renderCommandEncoder->setFragmentBuffer(buffer, offset, index); + break; + } +} + +void MetalRenderer::SetTexture(MTL::RenderCommandEncoder* renderCommandEncoder, MetalShaderType shaderType, MTL::Texture* texture, uint32 index) +{ + auto& boundTexture = m_state.m_encoderState.m_textures[shaderType][index]; + if (texture == boundTexture) + return; + + boundTexture = texture; + + switch (shaderType) + { + case METAL_SHADER_TYPE_VERTEX: + renderCommandEncoder->setVertexTexture(texture, index); + break; + case METAL_SHADER_TYPE_OBJECT: + renderCommandEncoder->setObjectTexture(texture, index); + break; + case METAL_SHADER_TYPE_MESH: + renderCommandEncoder->setMeshTexture(texture, index); + break; + case METAL_SHADER_TYPE_FRAGMENT: + renderCommandEncoder->setFragmentTexture(texture, index); + break; + } +} + +void MetalRenderer::SetSamplerState(MTL::RenderCommandEncoder* renderCommandEncoder, MetalShaderType shaderType, MTL::SamplerState* samplerState, uint32 index) +{ + auto& boundSamplerState = m_state.m_encoderState.m_samplers[shaderType][index]; + if (samplerState == boundSamplerState) + return; + + boundSamplerState = samplerState; + + switch (shaderType) + { + case METAL_SHADER_TYPE_VERTEX: + renderCommandEncoder->setVertexSamplerState(samplerState, index); + break; + case METAL_SHADER_TYPE_OBJECT: + renderCommandEncoder->setObjectSamplerState(samplerState, index); + break; + case METAL_SHADER_TYPE_MESH: + renderCommandEncoder->setMeshSamplerState(samplerState, index); + break; + case METAL_SHADER_TYPE_FRAGMENT: + renderCommandEncoder->setFragmentSamplerState(samplerState, index); + break; + } +} + MTL::CommandBuffer* MetalRenderer::GetCommandBuffer() { bool needsNewCommandBuffer = (m_commandBuffers.empty() || m_commandBuffers.back().m_commited); @@ -1447,8 +1460,8 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE UNREACHABLE; } - // TODO: uncomment - uint32 binding = shader->resourceMapping.getTextureBaseBindingPoint() + i;//shader->resourceMapping.textureUnitToBindingPoint[hostTextureUnit]; + // TODO: correct? + uint32 binding = shader->resourceMapping.getTextureBaseBindingPoint() + i; if (binding >= MAX_MTL_TEXTURES) { debug_printf("invalid texture binding %u\n", binding); @@ -1491,23 +1504,11 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE { sampler = m_nearestSampler; } - - auto& boundSampler = m_state.m_encoderState.m_samplers[mtlShaderType][binding]; - if (sampler != boundSampler) - { - boundSampler = sampler; - - SetSamplerState(renderCommandEncoder, mtlShaderType, sampler, binding); - } + SetSamplerState(renderCommandEncoder, mtlShaderType, sampler, binding); // get texture register word 0 uint32 word4 = LatteGPUState.contextRegister[texUnitRegIndex + 4]; auto& boundTexture = m_state.m_encoderState.m_textures[mtlShaderType][binding]; - if (textureView == boundTexture.m_textureView && word4 == boundTexture.m_word4) - continue; - - boundTexture = {textureView, word4}; - MTL::Texture* mtlTexture = textureView->GetSwizzledView(word4); SetTexture(renderCommandEncoder, mtlShaderType, mtlTexture, binding); } @@ -1601,7 +1602,7 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE if (!HasUnifiedMemory()) buffer->didModifyRange(NS::Range(supportBuffer.offset, size)); - SetBuffer(renderCommandEncoder, mtlShaderType, buffer, supportBuffer.offset, MTL_SUPPORT_BUFFER_BINDING); + SetBuffer(renderCommandEncoder, mtlShaderType, buffer, supportBuffer.offset, shader->resourceMapping.uniformVarsBufferBindingPoint); } // Uniform buffers @@ -1620,13 +1621,6 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE if (offset == INVALID_OFFSET) continue; - auto& boundOffset = m_state.m_encoderState.m_uniformBufferOffsets[mtlShaderType][binding]; - if (offset == boundOffset) - continue; - - boundOffset = offset; - - // TODO: only set the offset if already bound SetBuffer(renderCommandEncoder, mtlShaderType, m_memoryManager->GetBufferCache(), offset, binding); } } @@ -1635,7 +1629,6 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE if (shader->resourceMapping.tfStorageBindingPoint >= 0) { SetBuffer(renderCommandEncoder, mtlShaderType, m_xfbRingBuffer, 0, shader->resourceMapping.tfStorageBindingPoint); - m_state.m_encoderState.m_uniformBufferOffsets[mtlShaderType][shader->resourceMapping.tfStorageBindingPoint] = INVALID_OFFSET; } } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h index 896ef43a..f8e12bd6 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h @@ -8,6 +8,7 @@ #include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h" #include "Metal/MTLResource.hpp" +#include "Metal/MTLSampler.hpp" struct MetalBufferAllocation { @@ -103,11 +104,11 @@ struct MetalEncoderState uint32 m_depthSlope = 0; uint32 m_depthClamp = 0; struct { - class LatteTextureViewMtl* m_textureView = nullptr; - uint32 m_word4 = INVALID_UINT32; - } m_textures[METAL_SHADER_TYPE_TOTAL][MAX_MTL_TEXTURES]; + MTL::Buffer* m_buffer; + size_t m_offset; + } m_buffers[METAL_SHADER_TYPE_TOTAL][MAX_MTL_BUFFERS]; + MTL::Texture* m_textures[METAL_SHADER_TYPE_TOTAL][MAX_MTL_TEXTURES]; MTL::SamplerState* m_samplers[METAL_SHADER_TYPE_TOTAL][MAX_MTL_SAMPLERS]; - size_t m_uniformBufferOffsets[METAL_SHADER_TYPE_TOTAL][MAX_MTL_BUFFERS]; }; struct MetalStreamoutState @@ -360,12 +361,12 @@ public: for (uint32 i = 0; i < METAL_SHADER_TYPE_TOTAL; i++) { + for (uint32 j = 0; j < MAX_MTL_BUFFERS; j++) + m_state.m_encoderState.m_buffers[i][j] = {nullptr}; for (uint32 j = 0; j < MAX_MTL_TEXTURES; j++) - m_state.m_encoderState.m_textures[i][j] = {nullptr}; + m_state.m_encoderState.m_textures[i][j] = nullptr; for (uint32 j = 0; j < MAX_MTL_SAMPLERS; j++) m_state.m_encoderState.m_samplers[i][j] = nullptr; - for (uint32 j = 0; j < MAX_MTL_BUFFERS; j++) - m_state.m_encoderState.m_uniformBufferOffsets[i][j] = INVALID_OFFSET; } } @@ -374,6 +375,10 @@ public: return m_state.m_encoderState; } + void SetBuffer(MTL::RenderCommandEncoder* renderCommandEncoder, MetalShaderType shaderType, MTL::Buffer* buffer, size_t offset, uint32 index); + void SetTexture(MTL::RenderCommandEncoder* renderCommandEncoder, MetalShaderType shaderType, MTL::Texture* texture, uint32 index); + void SetSamplerState(MTL::RenderCommandEncoder* renderCommandEncoder, MetalShaderType shaderType, MTL::SamplerState* samplerState, uint32 index); + MTL::CommandBuffer* GetCommandBuffer(); bool CommandBufferCompleted(MTL::CommandBuffer* commandBuffer); void WaitForCommandBufferCompletion(MTL::CommandBuffer* commandBuffer); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp index dc2846ef..4c968d1c 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp @@ -46,7 +46,9 @@ void RendererShaderMtl::CompileObjectFunction(const LatteContextRegister& lcr, c std::string vertexBufferDefinitions = "#define VERTEX_BUFFER_DEFINITIONS "; std::string vertexBuffers = "#define VERTEX_BUFFERS "; std::string inputFetchDefinition = "VertexIn fetchInput(thread uint& vid VERTEX_BUFFER_DEFINITIONS) {\n"; - if (hostIndexType != Renderer::INDEX_TYPE::NONE) + + // Index buffer + if (hostIndexType != Renderer::INDEX_TYPE::NONE) { vertexBufferDefinitions += ", device "; switch (hostIndexType) @@ -61,11 +63,12 @@ void RendererShaderMtl::CompileObjectFunction(const LatteContextRegister& lcr, c cemu_assert_suspicious(); break; } - // TODO: don't hardcode the index - vertexBufferDefinitions += "* indexBuffer [[buffer(20)]]"; + + vertexBufferDefinitions += fmt::format("* indexBuffer [[buffer({})]]", vertexShader->resourceMapping.indexBufferBinding); vertexBuffers += ", indexBuffer"; inputFetchDefinition += "vid = indexBuffer[vid];\n"; } + inputFetchDefinition += "VertexIn in;\n"; for (auto& bufferGroup : fetchShader->bufferGroups) { @@ -138,10 +141,10 @@ void RendererShaderMtl::CompileObjectFunction(const LatteContextRegister& lcr, c } // Fetch the attribute - inputFetchDefinition += "in.ATTRIBUTE_NAME" + std::to_string(semanticId) + " = "; - inputFetchDefinition += "uint4(*(device " + formatName + "*)"; - inputFetchDefinition += "(vertexBuffer" + std::to_string(attr.attributeBufferIndex); - inputFetchDefinition += " + vid * " + std::to_string(bufferStride) + " + " + std::to_string(attr.offset) + ")"; + inputFetchDefinition += fmt::format("in.ATTRIBUTE_NAME{} = ", semanticId); + inputFetchDefinition += fmt::format("uint4(*(device {}*)", formatName); + inputFetchDefinition += fmt::format("(vertexBuffer{}", attr.attributeBufferIndex); + inputFetchDefinition += fmt::format(" + vid * {} + {})", bufferStride, attr.offset); for (uint8 i = 0; i < (4 - componentCount); i++) inputFetchDefinition += ", 0"; inputFetchDefinition += ");\n"; @@ -157,9 +160,10 @@ void RendererShaderMtl::CompileObjectFunction(const LatteContextRegister& lcr, c } } - vertexBufferDefinitions += ", device uchar* vertexBuffer" + std::to_string(bufferIndex) + " [[buffer(" + std::to_string(GET_MTL_VERTEX_BUFFER_INDEX(bufferIndex)) + ")]]"; - vertexBuffers += ", vertexBuffer" + std::to_string(bufferIndex); + vertexBufferDefinitions += fmt::format(", device uchar* vertexBuffer{} [[buffer({})]]", bufferIndex, GET_MTL_VERTEX_BUFFER_INDEX(bufferIndex)); + vertexBuffers += fmt::format(", vertexBuffer{}", bufferIndex); } + inputFetchDefinition += "return in;\n"; inputFetchDefinition += "}\n";