From cb525b22ff9c204cd3b6bf4ace689bd6c5ce8990 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Sat, 27 Jul 2024 21:26:26 +0200 Subject: [PATCH] implement vertex descriptors & draw --- src/Cafe/CMakeLists.txt | 1 + .../LatteDecompilerEmitMSL.cpp | 7 +- .../HW/Latte/Renderer/Metal/LatteToMtl.cpp | 88 ++++++++++++++++++ src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h | 9 ++ .../Renderer/Metal/MetalMemoryManager.cpp | 26 ++++++ .../Latte/Renderer/Metal/MetalMemoryManager.h | 27 ++++-- .../HW/Latte/Renderer/Metal/MetalRenderer.cpp | 90 +++++++++++++++---- .../HW/Latte/Renderer/Metal/MetalRenderer.h | 15 +++- .../Renderer/Metal/RendererShaderMtl.cpp | 14 ++- 9 files changed, 247 insertions(+), 30 deletions(-) create mode 100644 src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp diff --git a/src/Cafe/CMakeLists.txt b/src/Cafe/CMakeLists.txt index bfb0d1b5..6dbe8781 100644 --- a/src/Cafe/CMakeLists.txt +++ b/src/Cafe/CMakeLists.txt @@ -549,6 +549,7 @@ if(ENABLE_METAL) HW/Latte/Renderer/Metal/RendererShaderMtl.h HW/Latte/Renderer/Metal/CachedFBOMtl.cpp HW/Latte/Renderer/Metal/CachedFBOMtl.h + HW/Latte/Renderer/Metal/MetalMemoryManager.cpp HW/Latte/Renderer/Metal/MetalMemoryManager.h ) diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp index ce48bd63..320d8e24 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp @@ -3818,18 +3818,21 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, LatteDecompiler::emitHeader(shaderContext); // helper functions LatteDecompiler_emitHelperFunctions(shaderContext, src); - const char* outputTypeName; + const char* functionType = ""; + const char* outputTypeName = ""; switch (shader->shaderType) { case LatteConst::ShaderType::Vertex: + functionType = "vertex"; outputTypeName = "VertexOut"; break; case LatteConst::ShaderType::Pixel: + functionType = "fragment"; outputTypeName = "FragmentOut"; break; } // start of main - src->addFmt("{} main0(", outputTypeName); + src->addFmt("{} {} main0(", functionType, outputTypeName); LatteDecompiler::emitInputs(shaderContext); src->add(") {" _CRLF); src->addFmt("{} out;" _CRLF, outputTypeName); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.cpp index 0d7d14c5..7c7b4187 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.cpp @@ -1,6 +1,7 @@ #include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h" #include "Common/precompiled.h" #include "Metal/MTLPixelFormat.hpp" +#include "Metal/MTLVertexDescriptor.hpp" // TODO: separate color and depth formats std::map MTL_FORMAT_TABLE = { @@ -106,3 +107,90 @@ size_t GetMtlTextureBytesPerImage(Latte::E_GX2SURFFMT format, uint32 height, siz return CeilDivide(height, formatInfo.blockTexelSize.y) * bytesPerRow; } + +MTL::PrimitiveType GetMtlPrimitiveType(LattePrimitiveMode mode) +{ + switch (mode) + { + case LattePrimitiveMode::POINTS: + return MTL::PrimitiveTypePoint; + case LattePrimitiveMode::LINES: + return MTL::PrimitiveTypeLine; + case LattePrimitiveMode::TRIANGLES: + return MTL::PrimitiveTypeTriangle; + case LattePrimitiveMode::TRIANGLE_STRIP: + return MTL::PrimitiveTypeTriangleStrip; + default: + printf("unimplemented primitive type %u\n", (uint32)mode); + cemu_assert_debug(false); + return MTL::PrimitiveTypeTriangle; + } +} + +MTL::VertexFormat GetMtlVertexFormat(uint8 format) +{ + switch (format) + { + case FMT_32_32_32_32_FLOAT: + return MTL::VertexFormatUInt4; + case FMT_32_32_32_FLOAT: + return MTL::VertexFormatUInt3; + case FMT_32_32_FLOAT: + return MTL::VertexFormatUInt2; + case FMT_32_FLOAT: + return MTL::VertexFormatUInt; + case FMT_8_8_8_8: + return MTL::VertexFormatUChar4; + case FMT_8_8_8: + return MTL::VertexFormatUChar3; + case FMT_8_8: + return MTL::VertexFormatUChar2; + case FMT_8: + return MTL::VertexFormatUChar; + case FMT_32_32_32_32: + return MTL::VertexFormatUInt4; + case FMT_32_32_32: + return MTL::VertexFormatUInt3; + case FMT_32_32: + return MTL::VertexFormatUInt2; + case FMT_32: + return MTL::VertexFormatUInt; + case FMT_16_16_16_16: + return MTL::VertexFormatUShort4; // verified to match OpenGL + case FMT_16_16_16: + return MTL::VertexFormatUShort3; + case FMT_16_16: + return MTL::VertexFormatUShort2; + case FMT_16: + return MTL::VertexFormatUShort; + case FMT_16_16_16_16_FLOAT: + return MTL::VertexFormatUShort4; // verified to match OpenGL + case FMT_16_16_16_FLOAT: + return MTL::VertexFormatUShort3; + case FMT_16_16_FLOAT: + return MTL::VertexFormatUShort2; + case FMT_16_FLOAT: + return MTL::VertexFormatUShort; + case FMT_2_10_10_10: + return MTL::VertexFormatUInt; // verified to match OpenGL + default: + printf("unsupported vertex format: %u\n", (uint32)format); + assert_dbg(); + return MTL::VertexFormatInvalid; + } +} + +MTL::IndexType GetMtlIndexType(Renderer::INDEX_TYPE indexType) +{ + switch (indexType) + { + case Renderer::INDEX_TYPE::U16: + return MTL::IndexTypeUInt16; + case Renderer::INDEX_TYPE::U32: + return MTL::IndexTypeUInt32; + default: + printf("unsupported index type: %u\n", (uint32)indexType); + assert_dbg(); + return MTL::IndexTypeUInt32; + } +} diff --git a/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h b/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h index e4a82156..a0c1b939 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h @@ -3,6 +3,9 @@ #include #include "Cafe/HW/Latte/ISA/LatteReg.h" +#include "Cafe/HW/Latte/Core/LatteConst.h" +//#include "Cafe/HW/Latte/Core/FetchShader.h" +#include "Cafe/HW/Latte/Renderer/Renderer.h" struct Uvec2 { uint32 x; @@ -20,3 +23,9 @@ const MtlPixelFormatInfo GetMtlPixelFormatInfo(Latte::E_GX2SURFFMT format); size_t GetMtlTextureBytesPerRow(Latte::E_GX2SURFFMT format, uint32 width); size_t GetMtlTextureBytesPerImage(Latte::E_GX2SURFFMT format, uint32 height, size_t bytesPerRow); + +MTL::PrimitiveType GetMtlPrimitiveType(LattePrimitiveMode mode); + +MTL::VertexFormat GetMtlVertexFormat(uint8 format); + +MTL::IndexType GetMtlIndexType(Renderer::INDEX_TYPE indexType); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp new file mode 100644 index 00000000..e85ede40 --- /dev/null +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp @@ -0,0 +1,26 @@ +#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" + +void* MetalMemoryManager::GetTextureUploadBuffer(size_t size) +{ + if (m_textureUploadBuffer.size() < size) + { + m_textureUploadBuffer.resize(size); + } + + return m_textureUploadBuffer.data(); +} + +// TODO: optimize this +MetalBufferAllocation MetalMemoryManager::GetBufferAllocation(size_t size) +{ + MTL::Buffer* buffer = m_mtlr->GetDevice()->newBuffer(size, MTL::ResourceStorageModeShared); + + MetalBufferAllocation allocation; + allocation.bufferIndex = m_buffers.size(); + allocation.bufferOffset = 0; + allocation.data = buffer->contents(); + + m_buffers.push_back(buffer); + + return allocation; +} diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h index d767f232..89ccd244 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h @@ -1,22 +1,33 @@ #pragma once +#include + #include "Cafe/HW/Latte/ISA/LatteReg.h" +struct MetalBufferAllocation +{ + void* data; + uint32 bufferIndex; + size_t bufferOffset; +}; + class MetalMemoryManager { public: - MetalMemoryManager() = default; + MetalMemoryManager(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer} {} - void* GetTextureUploadBuffer(size_t size) + MTL::Buffer* GetBuffer(uint32 bufferIndex) { - if (m_textureUploadBuffer.size() < size) - { - m_textureUploadBuffer.resize(size); - } - - return m_textureUploadBuffer.data(); + return m_buffers[bufferIndex]; } + void* GetTextureUploadBuffer(size_t size); + + MetalBufferAllocation GetBufferAllocation(size_t size); + private: + class MetalRenderer* m_mtlr; + std::vector m_textureUploadBuffer; + std::vector m_buffers; }; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index 40c331d0..21b8f9fa 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -5,8 +5,10 @@ #include "Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h" -#include "HW/Latte/Core/LatteShader.h" +#include "Cafe/HW/Latte/Core/FetchShader.h" +#include "Cafe/HW/Latte/Core/LatteShader.h" #include "Cafe/HW/Latte/Core/LatteIndices.h" +#include "Metal/MTLVertexDescriptor.hpp" #include "gui/guiWrapper.h" extern bool hasValidFramebufferAttached; @@ -15,10 +17,14 @@ MetalRenderer::MetalRenderer() { m_device = MTL::CreateSystemDefaultDevice(); m_commandQueue = m_device->newCommandQueue(); + + m_memoryManager = new MetalMemoryManager(this); } MetalRenderer::~MetalRenderer() { + delete m_memoryManager; + m_commandQueue->release(); m_device->release(); } @@ -155,7 +161,7 @@ void MetalRenderer::rendertarget_bindFramebufferObject(LatteCachedFBO* cfbo) void* MetalRenderer::texture_acquireTextureUploadBuffer(uint32 size) { - return m_memoryManager.GetTextureUploadBuffer(size); + return m_memoryManager->GetTextureUploadBuffer(size); } void MetalRenderer::texture_releaseTextureUploadBuffer(uint8* mem) @@ -431,8 +437,7 @@ void MetalRenderer::draw_beginSequence() } // apply render target - // HACK: not implemented yet - //LatteMRT::ApplyCurrentState(); + LatteMRT::ApplyCurrentState(); // viewport and scissor box LatteRenderTarget_updateViewport(); @@ -453,6 +458,8 @@ void MetalRenderer::draw_beginSequence() void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 instanceCount, uint32 count, MPTR indexDataMPTR, Latte::LATTE_VGT_DMA_INDEX_TYPE::E_INDEX_TYPE indexType, bool isFirst) { std::cout << "DRAW" << std::endl; + + ensureCommandBuffer(); // TODO: uncomment //if (m_state.skipDrawSequence) //{ @@ -461,8 +468,6 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 //} // Render pass - LatteMRT::ApplyCurrentState(); - if (!m_state.activeFBO) { printf("no active FBO, skipping draw\n"); @@ -470,17 +475,70 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 } auto renderPassDescriptor = m_state.activeFBO->GetRenderPassDescriptor(); - m_renderCommandEncoder = m_commandBuffer->renderCommandEncoder(renderPassDescriptor); + beginRenderPassIfNeeded(renderPassDescriptor); // Shaders - /* LatteDecompilerShader* vertexShader = LatteSHRC_GetActiveVertexShader(); LatteDecompilerShader* pixelShader = LatteSHRC_GetActivePixelShader(); + auto fetchShader = vertexShader->compatibleFetchShader; + + // Vertex descriptor + MTL::VertexDescriptor* vertexDescriptor = MTL::VertexDescriptor::alloc()->init(); + for (auto& bufferGroup : fetchShader->bufferGroups) + { + std::optional fetchType; + + for (sint32 j = 0; j < bufferGroup.attribCount; ++j) + { + auto& attr = bufferGroup.attrib[j]; + + uint32 semanticId = vertexShader->resourceMapping.attributeMapping[attr.semanticId]; + if (semanticId == (uint32)-1) + continue; // attribute not used? + + auto attribute = vertexDescriptor->attributes()->object(semanticId); + attribute->setOffset(attr.offset); + // Bind from the end to not conflict with uniform buffers + attribute->setBufferIndex(GET_MTL_VERTEX_BUFFER_INDEX(attr.attributeBufferIndex)); + attribute->setFormat(GetMtlVertexFormat(attr.format)); + + if (fetchType.has_value()) + cemu_assert_debug(fetchType == attr.fetchType); + else + fetchType = attr.fetchType; + + if (attr.fetchType == LatteConst::INSTANCE_DATA) + { + cemu_assert_debug(attr.aluDivisor == 1); // other divisor not yet supported + } + } + + uint32 bufferIndex = bufferGroup.attributeBufferIndex; + uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7; + // TODO: is LatteGPUState.contextNew correct? + uint32 bufferStride = (LatteGPUState.contextNew.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF; + printf("buffer %u has stride %u bytes\n", bufferIndex, bufferStride); + + auto layout = vertexDescriptor->layouts()->object(GET_MTL_VERTEX_BUFFER_INDEX(bufferIndex)); + layout->setStride(bufferStride); + if (!fetchType.has_value() || fetchType == LatteConst::VertexFetchType2::VERTEX_DATA) + layout->setStepFunction(MTL::VertexStepFunctionPerVertex); + else if (fetchType == LatteConst::VertexFetchType2::INSTANCE_DATA) + layout->setStepFunction(MTL::VertexStepFunctionPerInstance); + else + { + printf("unimplemented vertex fetch type %u\n", (uint32)fetchType.value()); + cemu_assert(false); + } + } + // Render pipeline state MTL::RenderPipelineDescriptor* renderPipelineDescriptor = MTL::RenderPipelineDescriptor::alloc()->init(); renderPipelineDescriptor->setVertexFunction(static_cast(vertexShader->shader)->GetFunction()); renderPipelineDescriptor->setFragmentFunction(static_cast(pixelShader->shader)->GetFunction()); + // TODO: don't always set the vertex descriptor + renderPipelineDescriptor->setVertexDescriptor(vertexDescriptor); NS::Error* error = nullptr; MTL::RenderPipelineState* renderPipelineState = m_device->newRenderPipelineState(renderPipelineDescriptor, &error); @@ -489,12 +547,12 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 printf("error creating render pipeline state: %s\n", error->localizedDescription()->utf8String()); return; } + m_renderCommandEncoder->setRenderPipelineState(renderPipelineState); // TODO: bind resources const LattePrimitiveMode primitiveMode = static_cast(LatteGPUState.contextRegister[mmVGT_PRIMITIVE_TYPE]); - // TODO: uncomment - //auto mtlPrimitiveType = GetMtlPrimitiveType(primitiveMode); + auto mtlPrimitiveType = GetMtlPrimitiveType(primitiveMode); Renderer::INDEX_TYPE hostIndexType; uint32 hostIndexCount; @@ -503,21 +561,17 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 uint32 indexBufferOffset = 0; uint32 indexBufferIndex = 0; LatteIndices_decode(memory_getPointerFromVirtualOffset(indexDataMPTR), indexType, count, primitiveMode, indexMin, indexMax, hostIndexType, hostIndexCount, indexBufferOffset, indexBufferIndex); - */ // Draw - // TODO: uncomment - /* if (hostIndexType != INDEX_TYPE::NONE) { auto mtlIndexType = GetMtlIndexType(hostIndexType); - // TODO: get index buffer + MTL::Buffer* indexBuffer = m_memoryManager->GetBuffer(indexBufferIndex); m_renderCommandEncoder->drawIndexedPrimitives(mtlPrimitiveType, hostIndexCount, mtlIndexType, indexBuffer, 0, instanceCount, baseVertex, baseInstance); } else { m_renderCommandEncoder->drawPrimitives(mtlPrimitiveType, baseVertex, count, instanceCount, baseInstance); } - */ } void MetalRenderer::draw_endSequence() @@ -527,9 +581,11 @@ void MetalRenderer::draw_endSequence() void* MetalRenderer::indexData_reserveIndexMemory(uint32 size, uint32& offset, uint32& bufferIndex) { - printf("MetalRenderer::indexData_reserveIndexMemory not implemented\n"); + auto allocation = m_memoryManager->GetBufferAllocation(size); + offset = allocation.bufferOffset; + bufferIndex = allocation.bufferIndex; - return nullptr; + return allocation.data; } void MetalRenderer::indexData_uploadIndexMemory(uint32 offset, uint32 size) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h index 4adf0984..0b1063f9 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h @@ -7,8 +7,10 @@ #include "Cafe/HW/Latte/Renderer/Renderer.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h" #include "Metal/MTLRenderCommandEncoder.hpp" +#include "Metal/MTLRenderPass.hpp" #define MAX_MTL_BUFFERS 31 +#define GET_MTL_VERTEX_BUFFER_INDEX(index) (MAX_MTL_BUFFERS - index - 1) struct MetalState { @@ -168,7 +170,7 @@ public: private: CA::MetalLayer* m_metalLayer; - MetalMemoryManager m_memoryManager; + MetalMemoryManager* m_memoryManager; // Metal objects MTL::Device* m_device; @@ -186,7 +188,18 @@ private: { if (!m_commandBuffer) { + // Debug + m_commandQueue->insertDebugCaptureBoundary(); + m_commandBuffer = m_commandQueue->commandBuffer(); } } + + void beginRenderPassIfNeeded(MTL::RenderPassDescriptor* renderPassDescriptor) + { + if (!m_renderCommandEncoder) + { + m_renderCommandEncoder = m_commandBuffer->renderCommandEncoder(renderPassDescriptor); + } + } }; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp index 28b38612..b870fc68 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp @@ -1,6 +1,7 @@ #include "Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" #include "Cemu/Logging/CemuLogging.h" +#include "Metal/MTLFunctionDescriptor.hpp" RendererShaderMtl::RendererShaderMtl(MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode) : RendererShader(type, baseHash, auxHash, isGameShader, isGfxPackShader) @@ -9,11 +10,20 @@ RendererShaderMtl::RendererShaderMtl(MetalRenderer* mtlRenderer, ShaderType type MTL::Library* library = mtlRenderer->GetDevice()->newLibrary(NS::String::string(mslCode.c_str(), NS::ASCIIStringEncoding), nullptr, &error); if (error) { - printf("Failed to create library (error: %s) -> source:\n%s", error->localizedDescription()->utf8String(), mslCode.c_str()); + printf("Failed to create library (error: %s) -> source:\n%s\n", error->localizedDescription()->utf8String(), mslCode.c_str()); + error->release(); + return; + } + MTL::FunctionDescriptor* desc = MTL::FunctionDescriptor::alloc()->init(); + desc->setName(NS::String::string("main0", NS::ASCIIStringEncoding)); + error = nullptr; + m_function = library->newFunction(desc, &error); + if (error) + { + printf("Failed to create function (error: %s)\n", error->localizedDescription()->utf8String()); error->release(); return; } - m_function = library->newFunction(NS::String::string("main0", NS::ASCIIStringEncoding)); library->release(); }