diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h index 3ec0acbd..445fb823 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h @@ -1,6 +1,7 @@ #pragma once #include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" +#include "Common/precompiled.h" #include "Metal/MTLResource.hpp" struct MetalBufferRange @@ -39,7 +40,7 @@ public: MetalBufferAllocation GetBufferAllocation(size_t size) { // Align the size - size = Align(size, 16); + size = Align(size, 128); // First, try to find a free range for (uint32 i = 0; i < m_freeBufferRanges.size(); i++) @@ -147,57 +148,136 @@ struct MetalSyncedBuffer { MTL::Buffer* m_buffer; std::vector m_commandBuffers; + uint32 m_lock = 0; + + bool IsLocked() const + { + return (m_lock != 0); + } }; +constexpr uint16 MAX_COMMAND_BUFFER_FRAMES = 8; + class MetalTemporaryBufferAllocator : public MetalBufferAllocator { public: - MetalTemporaryBufferAllocator(class MetalRenderer* metalRenderer) : MetalBufferAllocator(metalRenderer, metalRenderer->GetOptimalBufferStorageMode()) {} + MetalTemporaryBufferAllocator(class MetalRenderer* metalRenderer) : MetalBufferAllocator(metalRenderer, MTL::ResourceStorageModeShared) {} + + void LockBuffer(uint32 bufferIndex) + { + m_buffers[bufferIndex].m_lock++; + } + + void UnlockBuffer(uint32 bufferIndex) + { + auto& buffer = m_buffers[bufferIndex]; + + buffer.m_lock--; + + // TODO: is this really necessary? + // Release the buffer if it wasn't released due to the lock + if (!buffer.IsLocked() && buffer.m_commandBuffers.empty()) + FreeBuffer(bufferIndex); + } + + void UnlockAllBuffers() + { + for (uint32_t i = 0; i < m_buffers.size(); i++) + { + auto& buffer = m_buffers[i]; + + if (buffer.m_lock != 0) + { + if (buffer.m_commandBuffers.empty()) + FreeBuffer(i); + + buffer.m_lock = 0; + } + } + + /* + auto it = m_commandBuffersFrames.begin(); + while (it != m_commandBuffersFrames.end()) + { + it->second++; + + if (it->second > MAX_COMMAND_BUFFER_FRAMES) + { + debug_printf("command buffer %p remained unfinished for more than %u frames\n", it->first, MAX_COMMAND_BUFFER_FRAMES); + + // Pretend like the command buffer has finished + CommandBufferFinished(it->first, false); + + it = m_commandBuffersFrames.erase(it); + } + else + { + it++; + } + } + */ + } void SetActiveCommandBuffer(MTL::CommandBuffer* commandBuffer) { m_activeCommandBuffer = commandBuffer; + + //if (commandBuffer) + // m_commandBuffersFrames[commandBuffer] = 0; } - void CommandBufferFinished(MTL::CommandBuffer* commandBuffer) + void CheckForCompletedCommandBuffers(/*MTL::CommandBuffer* commandBuffer, bool erase = true*/) { for (uint32_t i = 0; i < m_buffers.size(); i++) { auto& buffer = m_buffers[i]; for (uint32_t j = 0; j < buffer.m_commandBuffers.size(); j++) { - if (commandBuffer == buffer.m_commandBuffers[j]) + if (m_mtlr->CommandBufferCompleted(buffer.m_commandBuffers[j])) { if (buffer.m_commandBuffers.size() == 1) { - // All command buffers using it have finished execution, we can use it again - m_freeBufferRanges.push_back({i, 0, buffer.m_buffer->length()}); + if (!buffer.IsLocked()) + { + // All command buffers using it have finished execution, we can use it again + FreeBuffer(i); + } buffer.m_commandBuffers.clear(); + break; } else { buffer.m_commandBuffers.erase(buffer.m_commandBuffers.begin() + j); + j--; } - break; } } } + + //if (erase) + // m_commandBuffersFrames.erase(commandBuffer); } - // TODO: should this be here? It's just to ensure safety MTL::Buffer* GetBuffer(uint32 bufferIndex) { + cemu_assert_debug(m_activeCommandBuffer); + auto& buffer = m_buffers[bufferIndex]; - if (buffer.m_commandBuffers.back() != m_activeCommandBuffer) + if (buffer.m_commandBuffers.empty() || buffer.m_commandBuffers.back() != m_activeCommandBuffer/*std::find(buffer.m_commandBuffers.begin(), buffer.m_commandBuffers.end(), m_activeCommandBuffer) == buffer.m_commandBuffers.end()*/) buffer.m_commandBuffers.push_back(m_activeCommandBuffer); return buffer.m_buffer; } + MTL::Buffer* GetBufferOutsideOfCommandBuffer(uint32 bufferIndex) + { + return m_buffers[bufferIndex].m_buffer; + } + + /* MetalBufferAllocation GetBufferAllocation(size_t size) { - // TODO: remove this if (!m_activeCommandBuffer) throw std::runtime_error("No active command buffer when allocating a buffer!"); @@ -209,7 +289,56 @@ public: return allocation; } + */ + + /* + void LogInfo() + { + debug_printf("BUFFERS:\n"); + for (auto& buffer : m_buffers) + { + debug_printf(" %p -> size: %lu, command buffers: %zu\n", buffer.m_buffer, buffer.m_buffer->length(), buffer.m_commandBuffers.size()); + uint32 same = 0; + uint32 completed = 0; + for (uint32 i = 0; i < buffer.m_commandBuffers.size(); i++) + { + if (m_mtlr->CommandBufferCompleted(buffer.m_commandBuffers[i])) + completed++; + for (uint32 j = 0; j < buffer.m_commandBuffers.size(); j++) + { + if (i != j && buffer.m_commandBuffers[i] == buffer.m_commandBuffers[j]) + same++; + } + } + debug_printf(" same: %u\n", same); + debug_printf(" completed: %u\n", completed); + } + + debug_printf("FREE RANGES:\n"); + for (auto& range : m_freeBufferRanges) + { + debug_printf(" %u -> offset: %zu, size: %zu\n", range.bufferIndex, range.offset, range.size); + } + } + */ private: MTL::CommandBuffer* m_activeCommandBuffer = nullptr; + + //std::map m_commandBuffersFrames; + + void FreeBuffer(uint32 bufferIndex) + { + // First remove any free ranges that use this buffer + for (uint32 k = 0; k < m_freeBufferRanges.size(); k++) + { + if (m_freeBufferRanges[k].bufferIndex == bufferIndex) + { + m_freeBufferRanges.erase(m_freeBufferRanges.begin() + k); + k--; + } + } + + m_freeBufferRanges.push_back({bufferIndex, 0, m_buffers[bufferIndex].m_buffer->length()}); + } }; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index eb85f1ea..7b6be6ce 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -126,7 +126,7 @@ MetalRenderer::MetalRenderer() presentFragmentFunction->release(); error = nullptr; - renderPipelineDescriptor->colorAttachments()->object(0)->setPixelFormat(MTL::PixelFormatRGBA8Unorm); + renderPipelineDescriptor->colorAttachments()->object(0)->setPixelFormat(MTL::PixelFormatBGRA8Unorm); #ifdef CEMU_DEBUG_ASSERT renderPipelineDescriptor->setLabel(GetLabel("Present pipeline linear", renderPipelineDescriptor)); #endif @@ -138,7 +138,7 @@ MetalRenderer::MetalRenderer() } error = nullptr; - renderPipelineDescriptor->colorAttachments()->object(0)->setPixelFormat(MTL::PixelFormatRGBA8Unorm_sRGB); + renderPipelineDescriptor->colorAttachments()->object(0)->setPixelFormat(MTL::PixelFormatBGRA8Unorm_sRGB); #ifdef CEMU_DEBUG_ASSERT renderPipelineDescriptor->setLabel(GetLabel("Present pipeline sRGB", renderPipelineDescriptor)); #endif @@ -185,7 +185,9 @@ MetalRenderer::~MetalRenderer() void MetalRenderer::InitializeLayer(const Vector2i& size, bool mainWindow) { - GetLayer(mainWindow) = MetalLayerHandle(m_device, size); + auto& layer = GetLayer(mainWindow); + layer = MetalLayerHandle(m_device, size); + layer.GetLayer()->setPixelFormat(MTL::PixelFormatBGRA8Unorm); } void MetalRenderer::ResizeLayer(const Vector2i& size, bool mainWindow) @@ -240,17 +242,24 @@ void MetalRenderer::SwapBuffers(bool swapTV, bool swapDRC) { if (swapTV) SwapBuffer(true); - //if (swapDRC) - // SwapBuffer(false); + if (swapDRC) + SwapBuffer(false); // Release all the command buffers CommitCommandBuffer(); - for (uint32 i = 0; i < m_commandBuffers.size(); i++) - m_commandBuffers[i].m_commandBuffer->release(); + // TODO: release + //for (uint32 i = 0; i < m_commandBuffers.size(); i++) + // m_commandBuffers[i].m_commandBuffer->release(); m_commandBuffers.clear(); // Release frame persistent buffers m_memoryManager->GetFramePersistentBufferAllocator().ResetAllocations(); + + // Unlock all temporary buffers + m_memoryManager->GetTemporaryBufferAllocator().UnlockAllBuffers(); + + // Check for completed command buffers + m_memoryManager->GetTemporaryBufferAllocator().CheckForCompletedCommandBuffers(); } // TODO: use `shader` for drawing @@ -381,7 +390,7 @@ ImTextureID MetalRenderer::GenerateTexture(const std::vector& data, const desc->setPixelFormat(MTL::PixelFormatRGBA8Unorm); desc->setWidth(size.x); desc->setHeight(size.y); - desc->setStorageMode(MTL::StorageModeShared); + desc->setStorageMode(m_isAppleGPU ? MTL::StorageModeShared : MTL::StorageModeManaged); desc->setUsage(MTL::TextureUsageShaderRead); MTL::Texture* texture = m_device->newTexture(desc); @@ -507,11 +516,13 @@ void MetalRenderer::texture_loadSlice(LatteTexture* hostTexture, sint32 width, s auto blitCommandEncoder = GetBlitCommandEncoder(); // Allocate a temporary buffer - auto allocation = m_memoryManager->GetTemporaryBufferAllocator().GetBufferAllocation(compressedImageSize); - auto buffer = m_memoryManager->GetTemporaryBufferAllocator().GetBuffer(allocation.bufferIndex); + auto& bufferAllocator = m_memoryManager->GetTemporaryBufferAllocator(); + auto allocation = bufferAllocator.GetBufferAllocation(compressedImageSize); + auto buffer = bufferAllocator.GetBuffer(allocation.bufferIndex); // Copy the data to the temporary buffer memcpy(allocation.data, pixelData, compressedImageSize); + //buffer->didModifyRange(NS::Range(allocation.offset, allocation.size)); // Copy the data from the temporary buffer to the texture blitCommandEncoder->copyFromBuffer(buffer, allocation.offset, bytesPerRow, 0, MTL::Size(width, height, 1), textureMtl->GetTexture(), sliceIndex, mipIndex, MTL::Origin(0, 0, offsetZ)); @@ -936,10 +947,6 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 uint32 cullBack = polygonControlReg.get_CULL_BACK(); uint32 polyOffsetFrontEnable = polygonControlReg.get_OFFSET_FRONT_ENABLED(); - // TODO - //cemu_assert_debug(LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_ZCLIP_NEAR_DISABLE() == LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_ZCLIP_FAR_DISABLE()); // near or far clipping can be disabled individually - //bool zClipEnable = LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_ZCLIP_FAR_DISABLE() == false; - if (polyOffsetFrontEnable) { uint32 frontScaleU32 = LatteGPUState.contextNew.PA_SU_POLY_OFFSET_FRONT_SCALE.getRawValue(); @@ -973,6 +980,16 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 } } + // Depth clip mode + cemu_assert_debug(LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_ZCLIP_NEAR_DISABLE() == LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_ZCLIP_FAR_DISABLE()); // near or far clipping can be disabled individually + bool zClipEnable = LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_ZCLIP_FAR_DISABLE() == false; + + if (zClipEnable != encoderState.m_depthClipEnable) + { + renderCommandEncoder->setDepthClipMode(zClipEnable ? MTL::DepthClipModeClip : MTL::DepthClipModeClamp); + encoderState.m_depthClipEnable = zClipEnable; + } + // todo - how does culling behave with rects? // right now we just assume that their winding is always CW if (isPrimitiveRect) @@ -1112,7 +1129,13 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 // Draw MTL::Buffer* indexBuffer = nullptr; if (hostIndexType != INDEX_TYPE::NONE) - indexBuffer = m_memoryManager->GetTemporaryBufferAllocator().GetBuffer(indexBufferIndex); + { + auto& bufferAllocator = m_memoryManager->GetTemporaryBufferAllocator(); + indexBuffer = bufferAllocator.GetBuffer(indexBufferIndex); + + // We have already retrieved the buffer, no need for it to be locked anymore + bufferAllocator.UnlockBuffer(indexBufferIndex); + } if (usesGeometryShader) { if (indexBuffer) @@ -1178,18 +1201,27 @@ void MetalRenderer::draw_endSequence() void* MetalRenderer::indexData_reserveIndexMemory(uint32 size, uint32& offset, uint32& bufferIndex) { - auto allocation = m_memoryManager->GetTemporaryBufferAllocator().GetBufferAllocation(size); + auto& bufferAllocator = m_memoryManager->GetTemporaryBufferAllocator(); + auto allocation = bufferAllocator.GetBufferAllocation(size); offset = allocation.offset; bufferIndex = allocation.bufferIndex; + // Lock the buffer so that it doesn't get released + bufferAllocator.LockBuffer(allocation.bufferIndex); + return allocation.data; } void MetalRenderer::indexData_uploadIndexMemory(uint32 bufferIndex, uint32 offset, uint32 size) { - auto buffer = m_memoryManager->GetTemporaryBufferAllocator().GetBuffer(bufferIndex); + // Do nothing + /* if (!HasUnifiedMemory()) + { + auto buffer = m_memoryManager->GetTemporaryBufferAllocator().GetBufferOutsideOfCommandBuffer(bufferIndex); buffer->didModifyRange(NS::Range(offset, size)); + } + */ } void MetalRenderer::SetBuffer(MTL::RenderCommandEncoder* renderCommandEncoder, MetalShaderType shaderType, MTL::Buffer* buffer, size_t offset, uint32 index) @@ -1454,13 +1486,16 @@ void MetalRenderer::CommitCommandBuffer() auto& commandBuffer = m_commandBuffers.back(); if (!commandBuffer.m_commited) { - commandBuffer.m_commandBuffer->addCompletedHandler(^(MTL::CommandBuffer* cmd) { - m_memoryManager->GetTemporaryBufferAllocator().CommandBufferFinished(commandBuffer.m_commandBuffer); - }); + // Handled differently, since it seems like Metal doesn't always call the completion handler + //commandBuffer.m_commandBuffer->addCompletedHandler(^(MTL::CommandBuffer*) { + // m_memoryManager->GetTemporaryBufferAllocator().CommandBufferFinished(commandBuffer.m_commandBuffer); + //}); commandBuffer.m_commandBuffer->commit(); commandBuffer.m_commited = true; + m_memoryManager->GetTemporaryBufferAllocator().SetActiveCommandBuffer(nullptr); + // Debug //m_commandQueue->insertDebugCaptureBoundary(); } @@ -1474,7 +1509,7 @@ bool MetalRenderer::AcquireDrawable(bool mainWindow) const bool latteBufferUsesSRGB = mainWindow ? LatteGPUState.tvBufferUsesSRGB : LatteGPUState.drcBufferUsesSRGB; if (latteBufferUsesSRGB != m_state.m_usesSRGB) { - layer.GetLayer()->setPixelFormat(latteBufferUsesSRGB ? MTL::PixelFormatRGBA8Unorm_sRGB : MTL::PixelFormatRGBA8Unorm); + layer.GetLayer()->setPixelFormat(latteBufferUsesSRGB ? MTL::PixelFormatBGRA8Unorm_sRGB : MTL::PixelFormatBGRA8Unorm); m_state.m_usesSRGB = latteBufferUsesSRGB; } @@ -1696,8 +1731,8 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE auto supportBuffer = bufferAllocator.GetBufferAllocation(size); memcpy(supportBuffer.data, supportBufferData, size); auto buffer = bufferAllocator.GetBuffer(supportBuffer.bufferIndex); - if (!HasUnifiedMemory()) - buffer->didModifyRange(NS::Range(supportBuffer.offset, size)); + //if (!HasUnifiedMemory()) + // buffer->didModifyRange(NS::Range(supportBuffer.offset, size)); SetBuffer(renderCommandEncoder, mtlShaderType, buffer, supportBuffer.offset, shader->resourceMapping.uniformVarsBufferBindingPoint); } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h index 7a9b41e4..8fe3a8d9 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h @@ -98,6 +98,7 @@ struct MetalEncoderState uint32 m_depthBias = 0; uint32 m_depthSlope = 0; uint32 m_depthClamp = 0; + bool m_depthClipEnable = true; struct { MTL::Buffer* m_buffer; size_t m_offset;