diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h index aa71731e..0e2c6ac9 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h @@ -2,6 +2,20 @@ #include +#define MAX_MTL_BUFFERS 31 +// Buffer index 30 is reserved for the support buffer, buffer indices 27-29 are reserved for the helper shaders +#define GET_MTL_VERTEX_BUFFER_INDEX(index) (MAX_MTL_BUFFERS - index - 5) +// TODO: don't harcdode the support buffer binding +#define MTL_SUPPORT_BUFFER_BINDING 30 + +#define MAX_MTL_TEXTURES 31 +#define MAX_MTL_SAMPLERS 16 + +#define GET_HELPER_BUFFER_BINDING(index) (27 + index) +#define GET_HELPER_TEXTURE_BINDING(index) (29 + index) +#define GET_HELPER_SAMPLER_BINDING(index) (14 + index) + +constexpr uint32 INVALID_UINT32 = std::numeric_limits::max(); constexpr size_t INVALID_OFFSET = std::numeric_limits::max(); inline size_t Align(size_t size, size_t alignment) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp index d48a9123..022fda8d 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp @@ -106,14 +106,14 @@ MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(MTL::Bu renderCommandEncoder->setRenderPipelineState(m_restrideBufferPipeline->GetRenderPipelineState()); MTL::Buffer* buffers[] = {bufferCache, buffer}; size_t offsets[] = {vertexBufferRange.offset, restrideInfo.allocation.bufferOffset}; - renderCommandEncoder->setVertexBuffers(buffers, offsets, NS::Range(0, 2)); + renderCommandEncoder->setVertexBuffers(buffers, offsets, NS::Range(GET_HELPER_BUFFER_BINDING(0), 2)); struct { uint32 oldStride; uint32 newStride; } strideData = {static_cast(stride), static_cast(newStride)}; - renderCommandEncoder->setVertexBytes(&strideData, sizeof(strideData), 2); + renderCommandEncoder->setVertexBytes(&strideData, sizeof(strideData), GET_HELPER_BUFFER_BINDING(2)); renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypePoint, NS::UInteger(0), vertexBufferRange.size / stride); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index f9427b54..06d74d47 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -16,6 +16,7 @@ #include "Cafe/HW/Latte/Core/LatteIndices.h" #include "Cemu/Logging/CemuDebugLogging.h" #include "Common/precompiled.h" +#include "HW/Latte/Renderer/Metal/MetalCommon.h" #include "Metal/MTLRenderPass.hpp" #include "gui/guiWrapper.h" @@ -49,12 +50,10 @@ MetalRenderer::MetalRenderer() m_xfbRingBuffer = m_device->newBuffer(LatteStreamout_GetRingBufferSize(), MTL::StorageModeShared); // Initialize state - for (uint32 i = 0; i < (uint32)LatteConst::ShaderType::TotalCount; i++) + for (uint32 i = 0; i < METAL_SHADER_TYPE_TOTAL; i++) { for (uint32 j = 0; j < MAX_MTL_BUFFERS; j++) - { m_state.m_uniformBufferOffsets[i][j] = INVALID_OFFSET; - } } // Utility shader library @@ -63,7 +62,7 @@ MetalRenderer::MetalRenderer() std::string processedUtilityShaderSource = utilityShaderSource; processedUtilityShaderSource.pop_back(); processedUtilityShaderSource.erase(processedUtilityShaderSource.begin()); - processedUtilityShaderSource = "#include \n" + processedUtilityShaderSource; + processedUtilityShaderSource = "#include \nusing namespace metal;\n#define GET_BUFFER_BINDING(index) (27 + index)\n#define GET_TEXTURE_BINDING(index) (29 + index)\n#define GET_SAMPLER_BINDING(index) (14 + index)\n" + processedUtilityShaderSource; // Create the library NS::Error* error = nullptr; @@ -233,8 +232,8 @@ void MetalRenderer::DrawBackbufferQuad(LatteTextureView* texView, RendererOutput // Draw to Metal layer renderCommandEncoder->setRenderPipelineState(m_state.m_usesSRGB ? m_presentPipelineSRGB : m_presentPipelineLinear); - renderCommandEncoder->setFragmentTexture(presentTexture, 0); - renderCommandEncoder->setFragmentSamplerState((useLinearTexFilter ? m_linearSampler : m_nearestSampler), 0); + renderCommandEncoder->setFragmentTexture(presentTexture, GET_HELPER_TEXTURE_BINDING(0)); + renderCommandEncoder->setFragmentSamplerState((useLinearTexFilter ? m_linearSampler : m_nearestSampler), GET_HELPER_SAMPLER_BINDING(0)); renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangle, NS::UInteger(0), NS::UInteger(3)); @@ -540,8 +539,8 @@ void MetalRenderer::surfaceCopy_copySurfaceWithFormatConversion(LatteTexture* so renderCommandEncoder->setViewport(MTL::Viewport{0.0, 0.0, (double)effectiveCopyWidth, (double)effectiveCopyHeight, 0.0, 1.0}); renderCommandEncoder->setScissorRect(MTL::ScissorRect{0, 0, (uint32)effectiveCopyWidth, (uint32)effectiveCopyHeight}); - renderCommandEncoder->setVertexTextures(textures, NS::Range(0, 2)); - renderCommandEncoder->setVertexBytes(¶ms, sizeof(params), 0); + renderCommandEncoder->setVertexTextures(textures, NS::Range(GET_HELPER_BUFFER_BINDING(0), 2)); + renderCommandEncoder->setVertexBytes(¶ms, sizeof(params), GET_HELPER_BUFFER_BINDING(0)); renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangle, NS::UInteger(0), NS::UInteger(3)); } @@ -596,7 +595,7 @@ void MetalRenderer::buffer_bindVertexBuffer(uint32 bufferIndex, uint32 offset, u void MetalRenderer::buffer_bindUniformBuffer(LatteConst::ShaderType shaderType, uint32 bufferIndex, uint32 offset, uint32 size) { - m_state.m_uniformBufferOffsets[(uint32)shaderType][bufferIndex] = offset; + m_state.m_uniformBufferOffsets[GetMtlShaderType(shaderType)][bufferIndex] = offset; } RendererShader* MetalRenderer::shader_create(RendererShader::ShaderType type, uint64 baseHash, uint64 auxHash, const std::string& source, bool isGameShader, bool isGfxPackShader) @@ -962,6 +961,8 @@ MTL::RenderCommandEncoder* MetalRenderer::GetRenderCommandEncoder(bool forceRecr m_commandEncoder = renderCommandEncoder; m_encoderType = MetalEncoderType::Render; + ResetEncoderState(); + if (rebindStateIfNewEncoder) { // Rebind all the render state @@ -989,6 +990,8 @@ MTL::ComputeCommandEncoder* MetalRenderer::GetComputeCommandEncoder() m_commandEncoder = computeCommandEncoder; m_encoderType = MetalEncoderType::Compute; + ResetEncoderState(); + return computeCommandEncoder; } @@ -1010,6 +1013,8 @@ MTL::BlitCommandEncoder* MetalRenderer::GetBlitCommandEncoder() m_commandEncoder = blitCommandEncoder; m_encoderType = MetalEncoderType::Blit; + ResetEncoderState(); + return blitCommandEncoder; } @@ -1075,8 +1080,9 @@ bool MetalRenderer::AcquireNextDrawable(bool mainWindow) void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandEncoder, LatteDecompilerShader* shader) { - sint32 textureCount = shader->resourceMapping.getTextureCount(); + auto mtlShaderType = GetMtlShaderType(shader->shaderType); + sint32 textureCount = shader->resourceMapping.getTextureCount(); for (int i = 0; i < textureCount; ++i) { const auto relative_textureUnit = shader->resourceMapping.getTextureUnitFromBindingPoint(i); @@ -1108,24 +1114,16 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE continue; } - LatteTexture* baseTexture = textureView->baseTexture; - // get texture register word 0 - uint32 word4 = LatteGPUState.contextRegister[texUnitRegIndex + 4]; - - // TODO: wht - //auto imageViewObj = textureView->GetSamplerView(word4); - //info.imageView = imageViewObj->m_textureImageView; - // TODO: uncomment uint32 binding = shader->resourceMapping.getTextureBaseBindingPoint() + i;//shader->resourceMapping.textureUnitToBindingPoint[hostTextureUnit]; - //uint32 textureBinding = binding % MAX_MTL_TEXTURES; - //uint32 samplerBinding = binding % MAX_MTL_SAMPLERS; if (binding >= MAX_MTL_TEXTURES) { debug_printf("invalid texture binding %u\n", binding); continue; } + LatteTexture* baseTexture = textureView->baseTexture; + uint32 stageSamplerIndex = shader->textureUnitSamplerAssignment[relative_textureUnit]; if (stageSamplerIndex != LATTE_DECOMPILER_SAMPLER_NONE) { @@ -1249,6 +1247,14 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE sampler->release(); } + // get texture register word 0 + uint32 word4 = LatteGPUState.contextRegister[texUnitRegIndex + 4]; + auto& boundTexture = m_state.m_encoderState.m_textures[mtlShaderType][binding]; + if (textureView == boundTexture.m_textureView && word4 == boundTexture.m_word4) + continue; + + boundTexture = {textureView, word4}; + MTL::Texture* mtlTexture = textureView->GetSwizzledView(word4); switch (shader->shaderType) { @@ -1376,28 +1382,36 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE uint32 binding = shader->resourceMapping.uniformBuffersBindingPoint[i]; if (binding >= MAX_MTL_BUFFERS) { - debug_printf("too big buffer index (%u), skipping binding\n", binding); + debug_printf("invalid buffer binding%u\n", binding); continue; } - size_t offset = m_state.m_uniformBufferOffsets[(uint32)shader->shaderType][i]; - if (offset != INVALID_OFFSET) - { - switch (shader->shaderType) - { - case LatteConst::ShaderType::Vertex: - { - renderCommandEncoder->setVertexBuffer(m_memoryManager->GetBufferCache(), offset, binding); - break; - } - case LatteConst::ShaderType::Pixel: - { - renderCommandEncoder->setFragmentBuffer(m_memoryManager->GetBufferCache(), offset, binding); - break; - } - default: - UNREACHABLE; - } - } + + size_t offset = m_state.m_uniformBufferOffsets[mtlShaderType][i]; + if (offset == INVALID_OFFSET) + continue; + + auto& boundOffset = m_state.m_encoderState.m_uniformBufferOffsets[mtlShaderType][binding]; + if (offset == boundOffset) + continue; + + boundOffset = offset; + + // TODO: only set the offset if already bound + switch (shader->shaderType) + { + case LatteConst::ShaderType::Vertex: + { + renderCommandEncoder->setVertexBuffer(m_memoryManager->GetBufferCache(), offset, binding); + break; + } + case LatteConst::ShaderType::Pixel: + { + renderCommandEncoder->setFragmentBuffer(m_memoryManager->GetBufferCache(), offset, binding); + break; + } + default: + UNREACHABLE; + } } } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h index 5e86d7d9..d151af9a 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h @@ -7,18 +7,6 @@ #include "Cafe/HW/Latte/Renderer/Renderer.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h" -#include "Common/precompiled.h" -#include "Metal/MTLCommandBuffer.hpp" -#include "Metal/MTLCommandEncoder.hpp" -#include "Metal/MTLRenderPass.hpp" - -#define MAX_MTL_BUFFERS 31 -#define GET_MTL_VERTEX_BUFFER_INDEX(index) (MAX_MTL_BUFFERS - index - 2) -// TODO: don't harcdode the support buffer binding -#define MTL_SUPPORT_BUFFER_BINDING 30 - -#define MAX_MTL_TEXTURES 31 -#define MAX_MTL_SAMPLERS 16 struct MetalBoundBuffer { @@ -29,8 +17,40 @@ struct MetalBoundBuffer MetalRestrideInfo restrideInfo; }; +enum MetalShaderType +{ + METAL_SHADER_TYPE_VERTEX, + METAL_SHADER_TYPE_FRAGMENT, + + METAL_SHADER_TYPE_TOTAL +}; + +inline MetalShaderType GetMtlShaderType(LatteConst::ShaderType shaderType) +{ + switch (shaderType) + { + case LatteConst::ShaderType::Vertex: + return METAL_SHADER_TYPE_VERTEX; + case LatteConst::ShaderType::Pixel: + return METAL_SHADER_TYPE_FRAGMENT; + default: + return METAL_SHADER_TYPE_TOTAL; + } +} + +struct MetalEncoderState +{ + struct { + class LatteTextureViewMtl* m_textureView = nullptr; + uint32 m_word4 = INVALID_UINT32; + } m_textures[METAL_SHADER_TYPE_TOTAL][MAX_MTL_TEXTURES]; + size_t m_uniformBufferOffsets[METAL_SHADER_TYPE_TOTAL][MAX_MTL_BUFFERS]; +}; + struct MetalState { + MetalEncoderState m_encoderState{}; + bool m_usesSRGB = false; bool m_skipDrawSequence = false; @@ -42,7 +62,7 @@ struct MetalState MetalBoundBuffer m_vertexBuffers[MAX_MTL_BUFFERS] = {{}}; // TODO: find out what is the max number of bound textures on the Wii U class LatteTextureViewMtl* m_textures[64] = {nullptr}; - size_t m_uniformBufferOffsets[(uint32)LatteConst::ShaderType::TotalCount][MAX_MTL_BUFFERS]; + size_t m_uniformBufferOffsets[METAL_SHADER_TYPE_TOTAL][MAX_MTL_BUFFERS]; MTL::Viewport m_viewport = {0, 0, 0, 0, 0, 0}; MTL::ScissorRect m_scissor = {0, 0, 0, 0}; @@ -255,6 +275,19 @@ public: return m_encoderType; } + void ResetEncoderState() + { + m_state.m_encoderState = {}; + + for (uint32 i = 0; i < METAL_SHADER_TYPE_TOTAL; i++) + { + for (uint32 j = 0; j < MAX_MTL_TEXTURES; j++) + m_state.m_encoderState.m_textures[i][j] = {nullptr}; + for (uint32 j = 0; j < MAX_MTL_BUFFERS; j++) + m_state.m_encoderState.m_uniformBufferOffsets[i][j] = INVALID_OFFSET; + } + } + MTL::CommandBuffer* GetCommandBuffer(); bool CommandBufferCompleted(MTL::CommandBuffer* commandBuffer); void WaitForCommandBufferCompletion(MTL::CommandBuffer* commandBuffer); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/UtilityShaderSource.h b/src/Cafe/HW/Latte/Renderer/Metal/UtilityShaderSource.h index a3e5bae1..d96d3294 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/UtilityShaderSource.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/UtilityShaderSource.h @@ -4,8 +4,6 @@ #define _STRINGIFY(x) __STRINGIFY(x) constexpr const char* utilityShaderSource = _STRINGIFY(( -using namespace metal; - constant float2 positions[] = {float2(-1.0, -3.0), float2(-1.0, 1.0), float2(3.0, 1.0)}; struct VertexOut { @@ -22,7 +20,7 @@ vertex VertexOut vertexFullscreen(ushort vid [[vertex_id]]) { return out; } -fragment float4 fragmentPresent(VertexOut in [[stage_in]], texture2d tex [[texture(0)]], sampler samplr [[sampler(0)]]) { +fragment float4 fragmentPresent(VertexOut in [[stage_in]], texture2d tex [[texture(GET_TEXTURE_BINDING(0))]], sampler samplr [[sampler(GET_SAMPLER_BINDING(0))]]) { return tex.sample(samplr, in.texCoord); } @@ -34,7 +32,7 @@ struct CopyParams { uint dstSlice; }; -vertex void vertexCopyTextureToTexture(uint vid [[vertex_id]], texture2d_array src [[texture(0)]], texture2d_array dst [[texture(1)]], constant CopyParams& params [[buffer(0)]]) { +vertex void vertexCopyTextureToTexture(uint vid [[vertex_id]], texture2d_array src [[texture(GET_TEXTURE_BINDING(0))]], texture2d_array dst [[texture(GET_TEXTURE_BINDING(1))]], constant CopyParams& params [[buffer(GET_BUFFER_BINDING(0))]]) { uint2 coord = uint2(vid % params.width, vid / params.width); return dst.write(float4(src.read(coord, params.srcSlice, params.srcMip).r, 0.0, 0.0, 0.0), coord, params.dstSlice, params.dstMip); } @@ -45,7 +43,7 @@ struct RestrideParams { }; /* TODO: use uint32? Since that would require less iterations */ -vertex void vertexRestrideBuffer(uint vid [[vertex_id]], device uint8_t* src [[buffer(0)]], device uint8_t* dst [[buffer(1)]], constant RestrideParams& params [[buffer(2)]]) { +vertex void vertexRestrideBuffer(uint vid [[vertex_id]], device uint8_t* src [[buffer(GET_BUFFER_BINDING(0))]], device uint8_t* dst [[buffer(GET_BUFFER_BINDING(1))]], constant RestrideParams& params [[buffer(GET_BUFFER_BINDING(2))]]) { for (uint32_t i = 0; i < params.oldStride; i++) { dst[vid * params.newStride + i] = src[vid * params.oldStride + i]; }