Merge pull request #2 from SamoZ256/metal-intel-macs

Intel Macs support
This commit is contained in:
SamoZ256 2024-08-27 14:40:20 +02:00 committed by GitHub
commit a1b20fdbda
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 198 additions and 33 deletions

View File

@ -1,6 +1,7 @@
#pragma once #pragma once
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
#include "Common/precompiled.h"
#include "Metal/MTLResource.hpp" #include "Metal/MTLResource.hpp"
struct MetalBufferRange struct MetalBufferRange
@ -39,7 +40,7 @@ public:
MetalBufferAllocation GetBufferAllocation(size_t size) MetalBufferAllocation GetBufferAllocation(size_t size)
{ {
// Align the size // Align the size
size = Align(size, 16); size = Align(size, 128);
// First, try to find a free range // First, try to find a free range
for (uint32 i = 0; i < m_freeBufferRanges.size(); i++) for (uint32 i = 0; i < m_freeBufferRanges.size(); i++)
@ -147,57 +148,136 @@ struct MetalSyncedBuffer
{ {
MTL::Buffer* m_buffer; MTL::Buffer* m_buffer;
std::vector<MTL::CommandBuffer*> m_commandBuffers; std::vector<MTL::CommandBuffer*> m_commandBuffers;
uint32 m_lock = 0;
bool IsLocked() const
{
return (m_lock != 0);
}
}; };
constexpr uint16 MAX_COMMAND_BUFFER_FRAMES = 8;
class MetalTemporaryBufferAllocator : public MetalBufferAllocator<MetalSyncedBuffer> class MetalTemporaryBufferAllocator : public MetalBufferAllocator<MetalSyncedBuffer>
{ {
public: public:
MetalTemporaryBufferAllocator(class MetalRenderer* metalRenderer) : MetalBufferAllocator<MetalSyncedBuffer>(metalRenderer, metalRenderer->GetOptimalBufferStorageMode()) {} MetalTemporaryBufferAllocator(class MetalRenderer* metalRenderer) : MetalBufferAllocator<MetalSyncedBuffer>(metalRenderer, MTL::ResourceStorageModeShared) {}
void LockBuffer(uint32 bufferIndex)
{
m_buffers[bufferIndex].m_lock++;
}
void UnlockBuffer(uint32 bufferIndex)
{
auto& buffer = m_buffers[bufferIndex];
buffer.m_lock--;
// TODO: is this really necessary?
// Release the buffer if it wasn't released due to the lock
if (!buffer.IsLocked() && buffer.m_commandBuffers.empty())
FreeBuffer(bufferIndex);
}
void UnlockAllBuffers()
{
for (uint32_t i = 0; i < m_buffers.size(); i++)
{
auto& buffer = m_buffers[i];
if (buffer.m_lock != 0)
{
if (buffer.m_commandBuffers.empty())
FreeBuffer(i);
buffer.m_lock = 0;
}
}
/*
auto it = m_commandBuffersFrames.begin();
while (it != m_commandBuffersFrames.end())
{
it->second++;
if (it->second > MAX_COMMAND_BUFFER_FRAMES)
{
debug_printf("command buffer %p remained unfinished for more than %u frames\n", it->first, MAX_COMMAND_BUFFER_FRAMES);
// Pretend like the command buffer has finished
CommandBufferFinished(it->first, false);
it = m_commandBuffersFrames.erase(it);
}
else
{
it++;
}
}
*/
}
void SetActiveCommandBuffer(MTL::CommandBuffer* commandBuffer) void SetActiveCommandBuffer(MTL::CommandBuffer* commandBuffer)
{ {
m_activeCommandBuffer = commandBuffer; m_activeCommandBuffer = commandBuffer;
//if (commandBuffer)
// m_commandBuffersFrames[commandBuffer] = 0;
} }
void CommandBufferFinished(MTL::CommandBuffer* commandBuffer) void CheckForCompletedCommandBuffers(/*MTL::CommandBuffer* commandBuffer, bool erase = true*/)
{ {
for (uint32_t i = 0; i < m_buffers.size(); i++) for (uint32_t i = 0; i < m_buffers.size(); i++)
{ {
auto& buffer = m_buffers[i]; auto& buffer = m_buffers[i];
for (uint32_t j = 0; j < buffer.m_commandBuffers.size(); j++) for (uint32_t j = 0; j < buffer.m_commandBuffers.size(); j++)
{ {
if (commandBuffer == buffer.m_commandBuffers[j]) if (m_mtlr->CommandBufferCompleted(buffer.m_commandBuffers[j]))
{ {
if (buffer.m_commandBuffers.size() == 1) if (buffer.m_commandBuffers.size() == 1)
{ {
// All command buffers using it have finished execution, we can use it again if (!buffer.IsLocked())
m_freeBufferRanges.push_back({i, 0, buffer.m_buffer->length()}); {
// All command buffers using it have finished execution, we can use it again
FreeBuffer(i);
}
buffer.m_commandBuffers.clear(); buffer.m_commandBuffers.clear();
break;
} }
else else
{ {
buffer.m_commandBuffers.erase(buffer.m_commandBuffers.begin() + j); buffer.m_commandBuffers.erase(buffer.m_commandBuffers.begin() + j);
j--;
} }
break;
} }
} }
} }
//if (erase)
// m_commandBuffersFrames.erase(commandBuffer);
} }
// TODO: should this be here? It's just to ensure safety
MTL::Buffer* GetBuffer(uint32 bufferIndex) MTL::Buffer* GetBuffer(uint32 bufferIndex)
{ {
cemu_assert_debug(m_activeCommandBuffer);
auto& buffer = m_buffers[bufferIndex]; auto& buffer = m_buffers[bufferIndex];
if (buffer.m_commandBuffers.back() != m_activeCommandBuffer) if (buffer.m_commandBuffers.empty() || buffer.m_commandBuffers.back() != m_activeCommandBuffer/*std::find(buffer.m_commandBuffers.begin(), buffer.m_commandBuffers.end(), m_activeCommandBuffer) == buffer.m_commandBuffers.end()*/)
buffer.m_commandBuffers.push_back(m_activeCommandBuffer); buffer.m_commandBuffers.push_back(m_activeCommandBuffer);
return buffer.m_buffer; return buffer.m_buffer;
} }
MTL::Buffer* GetBufferOutsideOfCommandBuffer(uint32 bufferIndex)
{
return m_buffers[bufferIndex].m_buffer;
}
/*
MetalBufferAllocation GetBufferAllocation(size_t size) MetalBufferAllocation GetBufferAllocation(size_t size)
{ {
// TODO: remove this
if (!m_activeCommandBuffer) if (!m_activeCommandBuffer)
throw std::runtime_error("No active command buffer when allocating a buffer!"); throw std::runtime_error("No active command buffer when allocating a buffer!");
@ -209,7 +289,56 @@ public:
return allocation; return allocation;
} }
*/
/*
void LogInfo()
{
debug_printf("BUFFERS:\n");
for (auto& buffer : m_buffers)
{
debug_printf(" %p -> size: %lu, command buffers: %zu\n", buffer.m_buffer, buffer.m_buffer->length(), buffer.m_commandBuffers.size());
uint32 same = 0;
uint32 completed = 0;
for (uint32 i = 0; i < buffer.m_commandBuffers.size(); i++)
{
if (m_mtlr->CommandBufferCompleted(buffer.m_commandBuffers[i]))
completed++;
for (uint32 j = 0; j < buffer.m_commandBuffers.size(); j++)
{
if (i != j && buffer.m_commandBuffers[i] == buffer.m_commandBuffers[j])
same++;
}
}
debug_printf(" same: %u\n", same);
debug_printf(" completed: %u\n", completed);
}
debug_printf("FREE RANGES:\n");
for (auto& range : m_freeBufferRanges)
{
debug_printf(" %u -> offset: %zu, size: %zu\n", range.bufferIndex, range.offset, range.size);
}
}
*/
private: private:
MTL::CommandBuffer* m_activeCommandBuffer = nullptr; MTL::CommandBuffer* m_activeCommandBuffer = nullptr;
//std::map<MTL::CommandBuffer*, uint16> m_commandBuffersFrames;
void FreeBuffer(uint32 bufferIndex)
{
// First remove any free ranges that use this buffer
for (uint32 k = 0; k < m_freeBufferRanges.size(); k++)
{
if (m_freeBufferRanges[k].bufferIndex == bufferIndex)
{
m_freeBufferRanges.erase(m_freeBufferRanges.begin() + k);
k--;
}
}
m_freeBufferRanges.push_back({bufferIndex, 0, m_buffers[bufferIndex].m_buffer->length()});
}
}; };

View File

@ -126,7 +126,7 @@ MetalRenderer::MetalRenderer()
presentFragmentFunction->release(); presentFragmentFunction->release();
error = nullptr; error = nullptr;
renderPipelineDescriptor->colorAttachments()->object(0)->setPixelFormat(MTL::PixelFormatRGBA8Unorm); renderPipelineDescriptor->colorAttachments()->object(0)->setPixelFormat(MTL::PixelFormatBGRA8Unorm);
#ifdef CEMU_DEBUG_ASSERT #ifdef CEMU_DEBUG_ASSERT
renderPipelineDescriptor->setLabel(GetLabel("Present pipeline linear", renderPipelineDescriptor)); renderPipelineDescriptor->setLabel(GetLabel("Present pipeline linear", renderPipelineDescriptor));
#endif #endif
@ -138,7 +138,7 @@ MetalRenderer::MetalRenderer()
} }
error = nullptr; error = nullptr;
renderPipelineDescriptor->colorAttachments()->object(0)->setPixelFormat(MTL::PixelFormatRGBA8Unorm_sRGB); renderPipelineDescriptor->colorAttachments()->object(0)->setPixelFormat(MTL::PixelFormatBGRA8Unorm_sRGB);
#ifdef CEMU_DEBUG_ASSERT #ifdef CEMU_DEBUG_ASSERT
renderPipelineDescriptor->setLabel(GetLabel("Present pipeline sRGB", renderPipelineDescriptor)); renderPipelineDescriptor->setLabel(GetLabel("Present pipeline sRGB", renderPipelineDescriptor));
#endif #endif
@ -185,7 +185,9 @@ MetalRenderer::~MetalRenderer()
void MetalRenderer::InitializeLayer(const Vector2i& size, bool mainWindow) void MetalRenderer::InitializeLayer(const Vector2i& size, bool mainWindow)
{ {
GetLayer(mainWindow) = MetalLayerHandle(m_device, size); auto& layer = GetLayer(mainWindow);
layer = MetalLayerHandle(m_device, size);
layer.GetLayer()->setPixelFormat(MTL::PixelFormatBGRA8Unorm);
} }
void MetalRenderer::ResizeLayer(const Vector2i& size, bool mainWindow) void MetalRenderer::ResizeLayer(const Vector2i& size, bool mainWindow)
@ -240,17 +242,24 @@ void MetalRenderer::SwapBuffers(bool swapTV, bool swapDRC)
{ {
if (swapTV) if (swapTV)
SwapBuffer(true); SwapBuffer(true);
//if (swapDRC) if (swapDRC)
// SwapBuffer(false); SwapBuffer(false);
// Release all the command buffers // Release all the command buffers
CommitCommandBuffer(); CommitCommandBuffer();
for (uint32 i = 0; i < m_commandBuffers.size(); i++) // TODO: release
m_commandBuffers[i].m_commandBuffer->release(); //for (uint32 i = 0; i < m_commandBuffers.size(); i++)
// m_commandBuffers[i].m_commandBuffer->release();
m_commandBuffers.clear(); m_commandBuffers.clear();
// Release frame persistent buffers // Release frame persistent buffers
m_memoryManager->GetFramePersistentBufferAllocator().ResetAllocations(); m_memoryManager->GetFramePersistentBufferAllocator().ResetAllocations();
// Unlock all temporary buffers
m_memoryManager->GetTemporaryBufferAllocator().UnlockAllBuffers();
// Check for completed command buffers
m_memoryManager->GetTemporaryBufferAllocator().CheckForCompletedCommandBuffers();
} }
// TODO: use `shader` for drawing // TODO: use `shader` for drawing
@ -381,7 +390,7 @@ ImTextureID MetalRenderer::GenerateTexture(const std::vector<uint8>& data, const
desc->setPixelFormat(MTL::PixelFormatRGBA8Unorm); desc->setPixelFormat(MTL::PixelFormatRGBA8Unorm);
desc->setWidth(size.x); desc->setWidth(size.x);
desc->setHeight(size.y); desc->setHeight(size.y);
desc->setStorageMode(MTL::StorageModeShared); desc->setStorageMode(m_isAppleGPU ? MTL::StorageModeShared : MTL::StorageModeManaged);
desc->setUsage(MTL::TextureUsageShaderRead); desc->setUsage(MTL::TextureUsageShaderRead);
MTL::Texture* texture = m_device->newTexture(desc); MTL::Texture* texture = m_device->newTexture(desc);
@ -507,11 +516,13 @@ void MetalRenderer::texture_loadSlice(LatteTexture* hostTexture, sint32 width, s
auto blitCommandEncoder = GetBlitCommandEncoder(); auto blitCommandEncoder = GetBlitCommandEncoder();
// Allocate a temporary buffer // Allocate a temporary buffer
auto allocation = m_memoryManager->GetTemporaryBufferAllocator().GetBufferAllocation(compressedImageSize); auto& bufferAllocator = m_memoryManager->GetTemporaryBufferAllocator();
auto buffer = m_memoryManager->GetTemporaryBufferAllocator().GetBuffer(allocation.bufferIndex); auto allocation = bufferAllocator.GetBufferAllocation(compressedImageSize);
auto buffer = bufferAllocator.GetBuffer(allocation.bufferIndex);
// Copy the data to the temporary buffer // Copy the data to the temporary buffer
memcpy(allocation.data, pixelData, compressedImageSize); memcpy(allocation.data, pixelData, compressedImageSize);
//buffer->didModifyRange(NS::Range(allocation.offset, allocation.size));
// Copy the data from the temporary buffer to the texture // Copy the data from the temporary buffer to the texture
blitCommandEncoder->copyFromBuffer(buffer, allocation.offset, bytesPerRow, 0, MTL::Size(width, height, 1), textureMtl->GetTexture(), sliceIndex, mipIndex, MTL::Origin(0, 0, offsetZ)); blitCommandEncoder->copyFromBuffer(buffer, allocation.offset, bytesPerRow, 0, MTL::Size(width, height, 1), textureMtl->GetTexture(), sliceIndex, mipIndex, MTL::Origin(0, 0, offsetZ));
@ -936,10 +947,6 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
uint32 cullBack = polygonControlReg.get_CULL_BACK(); uint32 cullBack = polygonControlReg.get_CULL_BACK();
uint32 polyOffsetFrontEnable = polygonControlReg.get_OFFSET_FRONT_ENABLED(); uint32 polyOffsetFrontEnable = polygonControlReg.get_OFFSET_FRONT_ENABLED();
// TODO
//cemu_assert_debug(LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_ZCLIP_NEAR_DISABLE() == LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_ZCLIP_FAR_DISABLE()); // near or far clipping can be disabled individually
//bool zClipEnable = LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_ZCLIP_FAR_DISABLE() == false;
if (polyOffsetFrontEnable) if (polyOffsetFrontEnable)
{ {
uint32 frontScaleU32 = LatteGPUState.contextNew.PA_SU_POLY_OFFSET_FRONT_SCALE.getRawValue(); uint32 frontScaleU32 = LatteGPUState.contextNew.PA_SU_POLY_OFFSET_FRONT_SCALE.getRawValue();
@ -973,6 +980,16 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
} }
} }
// Depth clip mode
cemu_assert_debug(LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_ZCLIP_NEAR_DISABLE() == LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_ZCLIP_FAR_DISABLE()); // near or far clipping can be disabled individually
bool zClipEnable = LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_ZCLIP_FAR_DISABLE() == false;
if (zClipEnable != encoderState.m_depthClipEnable)
{
renderCommandEncoder->setDepthClipMode(zClipEnable ? MTL::DepthClipModeClip : MTL::DepthClipModeClamp);
encoderState.m_depthClipEnable = zClipEnable;
}
// todo - how does culling behave with rects? // todo - how does culling behave with rects?
// right now we just assume that their winding is always CW // right now we just assume that their winding is always CW
if (isPrimitiveRect) if (isPrimitiveRect)
@ -1112,7 +1129,13 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
// Draw // Draw
MTL::Buffer* indexBuffer = nullptr; MTL::Buffer* indexBuffer = nullptr;
if (hostIndexType != INDEX_TYPE::NONE) if (hostIndexType != INDEX_TYPE::NONE)
indexBuffer = m_memoryManager->GetTemporaryBufferAllocator().GetBuffer(indexBufferIndex); {
auto& bufferAllocator = m_memoryManager->GetTemporaryBufferAllocator();
indexBuffer = bufferAllocator.GetBuffer(indexBufferIndex);
// We have already retrieved the buffer, no need for it to be locked anymore
bufferAllocator.UnlockBuffer(indexBufferIndex);
}
if (usesGeometryShader) if (usesGeometryShader)
{ {
if (indexBuffer) if (indexBuffer)
@ -1178,18 +1201,27 @@ void MetalRenderer::draw_endSequence()
void* MetalRenderer::indexData_reserveIndexMemory(uint32 size, uint32& offset, uint32& bufferIndex) void* MetalRenderer::indexData_reserveIndexMemory(uint32 size, uint32& offset, uint32& bufferIndex)
{ {
auto allocation = m_memoryManager->GetTemporaryBufferAllocator().GetBufferAllocation(size); auto& bufferAllocator = m_memoryManager->GetTemporaryBufferAllocator();
auto allocation = bufferAllocator.GetBufferAllocation(size);
offset = allocation.offset; offset = allocation.offset;
bufferIndex = allocation.bufferIndex; bufferIndex = allocation.bufferIndex;
// Lock the buffer so that it doesn't get released
bufferAllocator.LockBuffer(allocation.bufferIndex);
return allocation.data; return allocation.data;
} }
void MetalRenderer::indexData_uploadIndexMemory(uint32 bufferIndex, uint32 offset, uint32 size) void MetalRenderer::indexData_uploadIndexMemory(uint32 bufferIndex, uint32 offset, uint32 size)
{ {
auto buffer = m_memoryManager->GetTemporaryBufferAllocator().GetBuffer(bufferIndex); // Do nothing
/*
if (!HasUnifiedMemory()) if (!HasUnifiedMemory())
{
auto buffer = m_memoryManager->GetTemporaryBufferAllocator().GetBufferOutsideOfCommandBuffer(bufferIndex);
buffer->didModifyRange(NS::Range(offset, size)); buffer->didModifyRange(NS::Range(offset, size));
}
*/
} }
void MetalRenderer::SetBuffer(MTL::RenderCommandEncoder* renderCommandEncoder, MetalShaderType shaderType, MTL::Buffer* buffer, size_t offset, uint32 index) void MetalRenderer::SetBuffer(MTL::RenderCommandEncoder* renderCommandEncoder, MetalShaderType shaderType, MTL::Buffer* buffer, size_t offset, uint32 index)
@ -1454,13 +1486,16 @@ void MetalRenderer::CommitCommandBuffer()
auto& commandBuffer = m_commandBuffers.back(); auto& commandBuffer = m_commandBuffers.back();
if (!commandBuffer.m_commited) if (!commandBuffer.m_commited)
{ {
commandBuffer.m_commandBuffer->addCompletedHandler(^(MTL::CommandBuffer* cmd) { // Handled differently, since it seems like Metal doesn't always call the completion handler
m_memoryManager->GetTemporaryBufferAllocator().CommandBufferFinished(commandBuffer.m_commandBuffer); //commandBuffer.m_commandBuffer->addCompletedHandler(^(MTL::CommandBuffer*) {
}); // m_memoryManager->GetTemporaryBufferAllocator().CommandBufferFinished(commandBuffer.m_commandBuffer);
//});
commandBuffer.m_commandBuffer->commit(); commandBuffer.m_commandBuffer->commit();
commandBuffer.m_commited = true; commandBuffer.m_commited = true;
m_memoryManager->GetTemporaryBufferAllocator().SetActiveCommandBuffer(nullptr);
// Debug // Debug
//m_commandQueue->insertDebugCaptureBoundary(); //m_commandQueue->insertDebugCaptureBoundary();
} }
@ -1474,7 +1509,7 @@ bool MetalRenderer::AcquireDrawable(bool mainWindow)
const bool latteBufferUsesSRGB = mainWindow ? LatteGPUState.tvBufferUsesSRGB : LatteGPUState.drcBufferUsesSRGB; const bool latteBufferUsesSRGB = mainWindow ? LatteGPUState.tvBufferUsesSRGB : LatteGPUState.drcBufferUsesSRGB;
if (latteBufferUsesSRGB != m_state.m_usesSRGB) if (latteBufferUsesSRGB != m_state.m_usesSRGB)
{ {
layer.GetLayer()->setPixelFormat(latteBufferUsesSRGB ? MTL::PixelFormatRGBA8Unorm_sRGB : MTL::PixelFormatRGBA8Unorm); layer.GetLayer()->setPixelFormat(latteBufferUsesSRGB ? MTL::PixelFormatBGRA8Unorm_sRGB : MTL::PixelFormatBGRA8Unorm);
m_state.m_usesSRGB = latteBufferUsesSRGB; m_state.m_usesSRGB = latteBufferUsesSRGB;
} }
@ -1696,8 +1731,8 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE
auto supportBuffer = bufferAllocator.GetBufferAllocation(size); auto supportBuffer = bufferAllocator.GetBufferAllocation(size);
memcpy(supportBuffer.data, supportBufferData, size); memcpy(supportBuffer.data, supportBufferData, size);
auto buffer = bufferAllocator.GetBuffer(supportBuffer.bufferIndex); auto buffer = bufferAllocator.GetBuffer(supportBuffer.bufferIndex);
if (!HasUnifiedMemory()) //if (!HasUnifiedMemory())
buffer->didModifyRange(NS::Range(supportBuffer.offset, size)); // buffer->didModifyRange(NS::Range(supportBuffer.offset, size));
SetBuffer(renderCommandEncoder, mtlShaderType, buffer, supportBuffer.offset, shader->resourceMapping.uniformVarsBufferBindingPoint); SetBuffer(renderCommandEncoder, mtlShaderType, buffer, supportBuffer.offset, shader->resourceMapping.uniformVarsBufferBindingPoint);
} }

View File

@ -98,6 +98,7 @@ struct MetalEncoderState
uint32 m_depthBias = 0; uint32 m_depthBias = 0;
uint32 m_depthSlope = 0; uint32 m_depthSlope = 0;
uint32 m_depthClamp = 0; uint32 m_depthClamp = 0;
bool m_depthClipEnable = true;
struct { struct {
MTL::Buffer* m_buffer; MTL::Buffer* m_buffer;
size_t m_offset; size_t m_offset;