From 7ea18d8a5f62dc7133c41e92e49ca49f53a89277 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Sun, 28 Jul 2024 07:36:32 +0200 Subject: [PATCH] bind resources to shaders --- .../LatteDecompilerEmitMSLHeader.hpp | 2 +- .../Renderer/Metal/MetalMemoryManager.cpp | 33 +++ .../Latte/Renderer/Metal/MetalMemoryManager.h | 11 + .../HW/Latte/Renderer/Metal/MetalRenderer.cpp | 221 +++++++++++++++++- .../HW/Latte/Renderer/Metal/MetalRenderer.h | 18 +- 5 files changed, 271 insertions(+), 14 deletions(-) diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp index 45c88631..3bc8796e 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp @@ -367,7 +367,7 @@ namespace LatteDecompiler break; } - src->add(" in [[stage_in]], constant SupportBuffer& supportBuffer [[buffer(29)]]"); + src->add(" in [[stage_in]], constant SupportBuffer& supportBuffer [[buffer(30)]]"); switch (decompilerContext->shaderType) { case LatteConst::ShaderType::Vertex: diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp index e85ede40..9ec94c0e 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp @@ -24,3 +24,36 @@ MetalBufferAllocation MetalMemoryManager::GetBufferAllocation(size_t size) return allocation; } + +void MetalMemoryManager::InitBufferCache(size_t size) +{ + if (m_bufferCache) + { + printf("MetalMemoryManager::InitBufferCache: buffer cache already initialized\n"); + return; + } + + m_bufferCache = m_mtlr->GetDevice()->newBuffer(size, MTL::ResourceStorageModeShared); +} + +void MetalMemoryManager::UploadToBufferCache(const void* data, size_t offset, size_t size) +{ + if (!m_bufferCache) + { + printf("MetalMemoryManager::UploadToBufferCache: buffer cache not initialized\n"); + return; + } + + memcpy((uint8*)m_bufferCache->contents() + offset, data, size); +} + +void MetalMemoryManager::CopyBufferCache(size_t srcOffset, size_t dstOffset, size_t size) +{ + if (!m_bufferCache) + { + printf("MetalMemoryManager::CopyBufferCache: buffer cache not initialized\n"); + return; + } + + memcpy((uint8*)m_bufferCache->contents() + dstOffset, (uint8*)m_bufferCache->contents() + srcOffset, size); +} diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h index 89ccd244..ee773047 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h @@ -21,13 +21,24 @@ public: return m_buffers[bufferIndex]; } + MTL::Buffer* GetBufferCache() + { + return m_bufferCache; + } + void* GetTextureUploadBuffer(size_t size); MetalBufferAllocation GetBufferAllocation(size_t size); + // Buffer cache + void InitBufferCache(size_t size); + void UploadToBufferCache(const void* data, size_t offset, size_t size); + void CopyBufferCache(size_t srcOffset, size_t dstOffset, size_t size); + private: class MetalRenderer* m_mtlr; std::vector m_textureUploadBuffer; std::vector m_buffers; + MTL::Buffer* m_bufferCache = nullptr; }; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index 21b8f9fa..0bd17f09 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -1,6 +1,7 @@ #include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalLayer.h" #include "Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.h" +#include "Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h" @@ -13,6 +14,8 @@ extern bool hasValidFramebufferAttached; +float supportBufferData[512 * 4]; + MetalRenderer::MetalRenderer() { m_device = MTL::CreateSystemDefaultDevice(); @@ -329,7 +332,7 @@ LatteTexture* MetalRenderer::texture_createTextureEx(Latte::E_DIM dim, MPTR phys void MetalRenderer::texture_setLatteTexture(LatteTextureView* textureView, uint32 textureUnit) { - printf("MetalRenderer::texture_setLatteTexture not implemented\n"); + m_state.textures[textureUnit] = static_cast(textureView); } void MetalRenderer::texture_copyImageSubData(LatteTexture* src, sint32 srcMip, sint32 effectiveSrcX, sint32 effectiveSrcY, sint32 srcSlice, LatteTexture* dst, sint32 dstMip, sint32 effectiveDstX, sint32 effectiveDstY, sint32 dstSlice, sint32 effectiveCopyWidth, sint32 effectiveCopyHeight, sint32 srcDepth) @@ -351,17 +354,17 @@ void MetalRenderer::surfaceCopy_copySurfaceWithFormatConversion(LatteTexture* so void MetalRenderer::bufferCache_init(const sint32 bufferSize) { - printf("MetalRenderer::bufferCache_init not implemented\n"); + m_memoryManager->InitBufferCache(bufferSize); } void MetalRenderer::bufferCache_upload(uint8* buffer, sint32 size, uint32 bufferOffset) { - printf("MetalRenderer::bufferCache_upload not implemented\n"); + m_memoryManager->UploadToBufferCache(buffer, bufferOffset, size); } void MetalRenderer::bufferCache_copy(uint32 srcOffset, uint32 dstOffset, uint32 size) { - printf("MetalRenderer::bufferCache_copy not implemented\n"); + m_memoryManager->CopyBufferCache(srcOffset, dstOffset, size); } void MetalRenderer::bufferCache_copyStreamoutToMainBuffer(uint32 srcOffset, uint32 dstOffset, uint32 size) @@ -371,7 +374,11 @@ void MetalRenderer::bufferCache_copyStreamoutToMainBuffer(uint32 srcOffset, uint void MetalRenderer::buffer_bindVertexBuffer(uint32 bufferIndex, uint32 offset, uint32 size) { - printf("MetalRenderer::buffer_bindVertexBuffer not implemented\n"); + if (m_state.vertexBuffers[bufferIndex].offset == offset) + return; + cemu_assert_debug(bufferIndex < LATTE_MAX_VERTEX_BUFFERS); + m_state.vertexBuffers[bufferIndex].needsRebind = true; + m_state.vertexBuffers[bufferIndex].offset = offset; } void MetalRenderer::buffer_bindUniformBuffer(LatteConst::ShaderType shaderType, uint32 bufferIndex, uint32 offset, uint32 size) @@ -457,8 +464,6 @@ void MetalRenderer::draw_beginSequence() void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 instanceCount, uint32 count, MPTR indexDataMPTR, Latte::LATTE_VGT_DMA_INDEX_TYPE::E_INDEX_TYPE indexType, bool isFirst) { - std::cout << "DRAW" << std::endl; - ensureCommandBuffer(); // TODO: uncomment //if (m_state.skipDrawSequence) @@ -478,6 +483,7 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 beginRenderPassIfNeeded(renderPassDescriptor); // Shaders + LatteSHRC_UpdateActiveShaders(); LatteDecompilerShader* vertexShader = LatteSHRC_GetActiveVertexShader(); LatteDecompilerShader* pixelShader = LatteSHRC_GetActivePixelShader(); @@ -518,7 +524,6 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7; // TODO: is LatteGPUState.contextNew correct? uint32 bufferStride = (LatteGPUState.contextNew.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF; - printf("buffer %u has stride %u bytes\n", bufferIndex, bufferStride); auto layout = vertexDescriptor->layouts()->object(GET_MTL_VERTEX_BUFFER_INDEX(bufferIndex)); layout->setStride(bufferStride); @@ -549,11 +554,13 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 } m_renderCommandEncoder->setRenderPipelineState(renderPipelineState); - // TODO: bind resources - + // Primitive type const LattePrimitiveMode primitiveMode = static_cast(LatteGPUState.contextRegister[mmVGT_PRIMITIVE_TYPE]); auto mtlPrimitiveType = GetMtlPrimitiveType(primitiveMode); + // Resources + + // Index buffer Renderer::INDEX_TYPE hostIndexType; uint32 hostIndexCount; uint32 indexMin = 0; @@ -562,6 +569,25 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 uint32 indexBufferIndex = 0; LatteIndices_decode(memory_getPointerFromVirtualOffset(indexDataMPTR), indexType, count, primitiveMode, indexMin, indexMax, hostIndexType, hostIndexCount, indexBufferOffset, indexBufferIndex); + // synchronize vertex and uniform cache and update buffer bindings + LatteBufferCache_Sync(indexMin + baseVertex, indexMax + baseVertex, baseInstance, instanceCount); + + // Vertex buffers + for (uint8 i = 0; i < MAX_MTL_BUFFERS; i++) + { + auto& vertexBufferRange = m_state.vertexBuffers[i]; + if (vertexBufferRange.needsRebind) + { + m_renderCommandEncoder->setVertexBuffer(m_memoryManager->GetBufferCache(), vertexBufferRange.offset, GET_MTL_VERTEX_BUFFER_INDEX(i)); + // TODO: uncomment + //vertexBufferRange.needRebind = false; + } + } + + // Uniform buffers, textures and samplers + BindStageResources(vertexShader); + BindStageResources(pixelShader); + // Draw if (hostIndexType != INDEX_TYPE::NONE) { @@ -592,3 +618,178 @@ void MetalRenderer::indexData_uploadIndexMemory(uint32 offset, uint32 size) { printf("MetalRenderer::indexData_uploadIndexMemory not implemented\n"); } + +void MetalRenderer::BindStageResources(LatteDecompilerShader* shader) +{ + sint32 textureCount = shader->resourceMapping.getTextureCount(); + + for (int i = 0; i < textureCount; ++i) + { + const auto relative_textureUnit = shader->resourceMapping.getTextureUnitFromBindingPoint(i); + auto hostTextureUnit = relative_textureUnit; + auto textureDim = shader->textureUnitDim[relative_textureUnit]; + auto texUnitRegIndex = hostTextureUnit * 7; + + auto textureView = m_state.textures[hostTextureUnit]; + + //LatteTexture* baseTexture = textureView->baseTexture; + // get texture register word 0 + uint32 word4 = LatteGPUState.contextRegister[texUnitRegIndex + 4]; + + // TODO: wht + //auto imageViewObj = textureView->GetSamplerView(word4); + //info.imageView = imageViewObj->m_textureImageView; + + uint32 stageSamplerIndex = shader->textureUnitSamplerAssignment[relative_textureUnit]; + // TODO: uncomment + MTL::SamplerState* sampler = nullptr;//basicSampler; + if (stageSamplerIndex != LATTE_DECOMPILER_SAMPLER_NONE) + { + // TODO: bind the actual sampler + } + + uint32 binding = shader->resourceMapping.getTextureBaseBindingPoint() + i; + switch (shader->shaderType) + { + case LatteConst::ShaderType::Vertex: + { + m_renderCommandEncoder->setVertexTexture(textureView->GetTexture(), binding); + break; + } + case LatteConst::ShaderType::Pixel: + { + m_renderCommandEncoder->setFragmentTexture(textureView->GetTexture(), binding); + break; + } + default: + UNREACHABLE; + } + } + + // Support buffer + auto GET_UNIFORM_DATA_PTR = [&](size_t index) { return supportBufferData + (index / 4); }; + + sint32 shaderAluConst; + sint32 shaderUniformRegisterOffset; + + switch (shader->shaderType) + { + case LatteConst::ShaderType::Vertex: + shaderAluConst = 0x400; + shaderUniformRegisterOffset = mmSQ_VTX_UNIFORM_BLOCK_START; + break; + case LatteConst::ShaderType::Pixel: + shaderAluConst = 0; + shaderUniformRegisterOffset = mmSQ_PS_UNIFORM_BLOCK_START; + break; + case LatteConst::ShaderType::Geometry: + shaderAluConst = 0; // geometry shader has no ALU const + shaderUniformRegisterOffset = mmSQ_GS_UNIFORM_BLOCK_START; + break; + default: + UNREACHABLE; + } + + //if (shader->resourceMapping.uniformVarsBufferBindingPoint >= 0) + //{ + if (shader->uniform.list_ufTexRescale.empty() == false) + { + for (auto& entry : shader->uniform.list_ufTexRescale) + { + float* xyScale = LatteTexture_getEffectiveTextureScale(shader->shaderType, entry.texUnit); + memcpy(entry.currentValue, xyScale, sizeof(float) * 2); + memcpy(GET_UNIFORM_DATA_PTR(entry.uniformLocation), xyScale, sizeof(float) * 2); + } + } + if (shader->uniform.loc_alphaTestRef >= 0) + { + *GET_UNIFORM_DATA_PTR(shader->uniform.loc_alphaTestRef) = LatteGPUState.contextNew.SX_ALPHA_REF.get_ALPHA_TEST_REF(); + } + if (shader->uniform.loc_pointSize >= 0) + { + const auto& pointSizeReg = LatteGPUState.contextNew.PA_SU_POINT_SIZE; + float pointWidth = (float)pointSizeReg.get_WIDTH() / 8.0f; + if (pointWidth == 0.0f) + pointWidth = 1.0f / 8.0f; // minimum size + *GET_UNIFORM_DATA_PTR(shader->uniform.loc_pointSize) = pointWidth; + } + if (shader->uniform.loc_remapped >= 0) + { + LatteBufferCache_LoadRemappedUniforms(shader, GET_UNIFORM_DATA_PTR(shader->uniform.loc_remapped)); + } + if (shader->uniform.loc_uniformRegister >= 0) + { + uint32* uniformRegData = (uint32*)(LatteGPUState.contextRegister + mmSQ_ALU_CONSTANT0_0 + shaderAluConst); + memcpy(GET_UNIFORM_DATA_PTR(shader->uniform.loc_uniformRegister), uniformRegData, shader->uniform.count_uniformRegister * 16); + } + if (shader->uniform.loc_windowSpaceToClipSpaceTransform >= 0) + { + sint32 viewportWidth; + sint32 viewportHeight; + LatteRenderTarget_GetCurrentVirtualViewportSize(&viewportWidth, &viewportHeight); // always call after _updateViewport() + float* v = GET_UNIFORM_DATA_PTR(shader->uniform.loc_windowSpaceToClipSpaceTransform); + v[0] = 2.0f / (float)viewportWidth; + v[1] = 2.0f / (float)viewportHeight; + } + if (shader->uniform.loc_fragCoordScale >= 0) + { + LatteMRT::GetCurrentFragCoordScale(GET_UNIFORM_DATA_PTR(shader->uniform.loc_fragCoordScale)); + } + // TODO: uncomment? + /* + if (shader->uniform.loc_verticesPerInstance >= 0) + { + *(int*)(supportBufferData + ((size_t)shader->uniform.loc_verticesPerInstance / 4)) = m_streamoutState.verticesPerInstance; + for (sint32 b = 0; b < LATTE_NUM_STREAMOUT_BUFFER; b++) + { + if (shader->uniform.loc_streamoutBufferBase[b] >= 0) + { + *(uint32*)GET_UNIFORM_DATA_PTR(shader->uniform.loc_streamoutBufferBase[b]) = m_streamoutState.buffer[b].ringBufferOffset; + } + } + } + */ + + switch (shader->shaderType) + { + case LatteConst::ShaderType::Vertex: + { + m_renderCommandEncoder->setVertexBytes(supportBufferData, sizeof(supportBufferData), MTL_SUPPORT_BUFFER_BINDING); + break; + } + case LatteConst::ShaderType::Pixel: + { + m_renderCommandEncoder->setFragmentBytes(supportBufferData, sizeof(supportBufferData), MTL_SUPPORT_BUFFER_BINDING); + break; + } + default: + UNREACHABLE; + } + //} + + // Uniform buffers + for (sint32 i = 0; i < LATTE_NUM_MAX_UNIFORM_BUFFERS; i++) + { + if (shader->resourceMapping.uniformBuffersBindingPoint[i] >= 0) + { + uint32 binding = shader->resourceMapping.uniformBuffersBindingPoint[i]; + // TODO: don't hardcode + size_t offset = 0; + switch (shader->shaderType) + { + case LatteConst::ShaderType::Vertex: + { + m_renderCommandEncoder->setVertexBuffer(m_memoryManager->GetBufferCache(), offset, binding); + break; + } + case LatteConst::ShaderType::Pixel: + { + m_renderCommandEncoder->setFragmentBuffer(m_memoryManager->GetBufferCache(), offset, binding); + break; + } + default: + UNREACHABLE; + } + } + } +} diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h index 0b1063f9..fd446365 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h @@ -10,14 +10,24 @@ #include "Metal/MTLRenderPass.hpp" #define MAX_MTL_BUFFERS 31 -#define GET_MTL_VERTEX_BUFFER_INDEX(index) (MAX_MTL_BUFFERS - index - 1) +#define GET_MTL_VERTEX_BUFFER_INDEX(index) (MAX_MTL_BUFFERS - index - 2) +// TODO: don't harcdode the support buffer binding +#define MTL_SUPPORT_BUFFER_BINDING 30 + +#define MAX_MTL_TEXTURES 31 + +struct MetalBufferRange +{ + bool needsRebind = false; + sint32 offset = -1; +}; struct MetalState { bool skipDrawSequence = false; class CachedFBOMtl* activeFBO = nullptr; - //MTL::Buffer* vertexBuffers[MAX_MTL_BUFFERS] = {nullptr}; - //MTL::Buffer* indexBuffer = nullptr; + MetalBufferRange vertexBuffers[MAX_MTL_BUFFERS] = {{}}; + class LatteTextureViewMtl* textures[MAX_MTL_TEXTURES] = {nullptr}; }; class MetalRenderer : public Renderer @@ -202,4 +212,6 @@ private: m_renderCommandEncoder = m_commandBuffer->renderCommandEncoder(renderPassDescriptor); } } + + void BindStageResources(LatteDecompilerShader* shader); };