Merge pull request #2 from SamoZ256/metal-intel-macs

Intel Macs support
This commit is contained in:
SamoZ256 2024-08-27 14:40:20 +02:00 committed by GitHub
commit a1b20fdbda
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 198 additions and 33 deletions

View File

@ -1,6 +1,7 @@
#pragma once
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
#include "Common/precompiled.h"
#include "Metal/MTLResource.hpp"
struct MetalBufferRange
@ -39,7 +40,7 @@ public:
MetalBufferAllocation GetBufferAllocation(size_t size)
{
// Align the size
size = Align(size, 16);
size = Align(size, 128);
// First, try to find a free range
for (uint32 i = 0; i < m_freeBufferRanges.size(); i++)
@ -147,57 +148,136 @@ struct MetalSyncedBuffer
{
MTL::Buffer* m_buffer;
std::vector<MTL::CommandBuffer*> m_commandBuffers;
uint32 m_lock = 0;
bool IsLocked() const
{
return (m_lock != 0);
}
};
constexpr uint16 MAX_COMMAND_BUFFER_FRAMES = 8;
class MetalTemporaryBufferAllocator : public MetalBufferAllocator<MetalSyncedBuffer>
{
public:
MetalTemporaryBufferAllocator(class MetalRenderer* metalRenderer) : MetalBufferAllocator<MetalSyncedBuffer>(metalRenderer, metalRenderer->GetOptimalBufferStorageMode()) {}
MetalTemporaryBufferAllocator(class MetalRenderer* metalRenderer) : MetalBufferAllocator<MetalSyncedBuffer>(metalRenderer, MTL::ResourceStorageModeShared) {}
void LockBuffer(uint32 bufferIndex)
{
m_buffers[bufferIndex].m_lock++;
}
void UnlockBuffer(uint32 bufferIndex)
{
auto& buffer = m_buffers[bufferIndex];
buffer.m_lock--;
// TODO: is this really necessary?
// Release the buffer if it wasn't released due to the lock
if (!buffer.IsLocked() && buffer.m_commandBuffers.empty())
FreeBuffer(bufferIndex);
}
void UnlockAllBuffers()
{
for (uint32_t i = 0; i < m_buffers.size(); i++)
{
auto& buffer = m_buffers[i];
if (buffer.m_lock != 0)
{
if (buffer.m_commandBuffers.empty())
FreeBuffer(i);
buffer.m_lock = 0;
}
}
/*
auto it = m_commandBuffersFrames.begin();
while (it != m_commandBuffersFrames.end())
{
it->second++;
if (it->second > MAX_COMMAND_BUFFER_FRAMES)
{
debug_printf("command buffer %p remained unfinished for more than %u frames\n", it->first, MAX_COMMAND_BUFFER_FRAMES);
// Pretend like the command buffer has finished
CommandBufferFinished(it->first, false);
it = m_commandBuffersFrames.erase(it);
}
else
{
it++;
}
}
*/
}
void SetActiveCommandBuffer(MTL::CommandBuffer* commandBuffer)
{
m_activeCommandBuffer = commandBuffer;
//if (commandBuffer)
// m_commandBuffersFrames[commandBuffer] = 0;
}
void CommandBufferFinished(MTL::CommandBuffer* commandBuffer)
void CheckForCompletedCommandBuffers(/*MTL::CommandBuffer* commandBuffer, bool erase = true*/)
{
for (uint32_t i = 0; i < m_buffers.size(); i++)
{
auto& buffer = m_buffers[i];
for (uint32_t j = 0; j < buffer.m_commandBuffers.size(); j++)
{
if (commandBuffer == buffer.m_commandBuffers[j])
if (m_mtlr->CommandBufferCompleted(buffer.m_commandBuffers[j]))
{
if (buffer.m_commandBuffers.size() == 1)
{
// All command buffers using it have finished execution, we can use it again
m_freeBufferRanges.push_back({i, 0, buffer.m_buffer->length()});
if (!buffer.IsLocked())
{
// All command buffers using it have finished execution, we can use it again
FreeBuffer(i);
}
buffer.m_commandBuffers.clear();
break;
}
else
{
buffer.m_commandBuffers.erase(buffer.m_commandBuffers.begin() + j);
j--;
}
break;
}
}
}
//if (erase)
// m_commandBuffersFrames.erase(commandBuffer);
}
// TODO: should this be here? It's just to ensure safety
MTL::Buffer* GetBuffer(uint32 bufferIndex)
{
cemu_assert_debug(m_activeCommandBuffer);
auto& buffer = m_buffers[bufferIndex];
if (buffer.m_commandBuffers.back() != m_activeCommandBuffer)
if (buffer.m_commandBuffers.empty() || buffer.m_commandBuffers.back() != m_activeCommandBuffer/*std::find(buffer.m_commandBuffers.begin(), buffer.m_commandBuffers.end(), m_activeCommandBuffer) == buffer.m_commandBuffers.end()*/)
buffer.m_commandBuffers.push_back(m_activeCommandBuffer);
return buffer.m_buffer;
}
MTL::Buffer* GetBufferOutsideOfCommandBuffer(uint32 bufferIndex)
{
return m_buffers[bufferIndex].m_buffer;
}
/*
MetalBufferAllocation GetBufferAllocation(size_t size)
{
// TODO: remove this
if (!m_activeCommandBuffer)
throw std::runtime_error("No active command buffer when allocating a buffer!");
@ -209,7 +289,56 @@ public:
return allocation;
}
*/
/*
void LogInfo()
{
debug_printf("BUFFERS:\n");
for (auto& buffer : m_buffers)
{
debug_printf(" %p -> size: %lu, command buffers: %zu\n", buffer.m_buffer, buffer.m_buffer->length(), buffer.m_commandBuffers.size());
uint32 same = 0;
uint32 completed = 0;
for (uint32 i = 0; i < buffer.m_commandBuffers.size(); i++)
{
if (m_mtlr->CommandBufferCompleted(buffer.m_commandBuffers[i]))
completed++;
for (uint32 j = 0; j < buffer.m_commandBuffers.size(); j++)
{
if (i != j && buffer.m_commandBuffers[i] == buffer.m_commandBuffers[j])
same++;
}
}
debug_printf(" same: %u\n", same);
debug_printf(" completed: %u\n", completed);
}
debug_printf("FREE RANGES:\n");
for (auto& range : m_freeBufferRanges)
{
debug_printf(" %u -> offset: %zu, size: %zu\n", range.bufferIndex, range.offset, range.size);
}
}
*/
private:
MTL::CommandBuffer* m_activeCommandBuffer = nullptr;
//std::map<MTL::CommandBuffer*, uint16> m_commandBuffersFrames;
void FreeBuffer(uint32 bufferIndex)
{
// First remove any free ranges that use this buffer
for (uint32 k = 0; k < m_freeBufferRanges.size(); k++)
{
if (m_freeBufferRanges[k].bufferIndex == bufferIndex)
{
m_freeBufferRanges.erase(m_freeBufferRanges.begin() + k);
k--;
}
}
m_freeBufferRanges.push_back({bufferIndex, 0, m_buffers[bufferIndex].m_buffer->length()});
}
};

View File

@ -126,7 +126,7 @@ MetalRenderer::MetalRenderer()
presentFragmentFunction->release();
error = nullptr;
renderPipelineDescriptor->colorAttachments()->object(0)->setPixelFormat(MTL::PixelFormatRGBA8Unorm);
renderPipelineDescriptor->colorAttachments()->object(0)->setPixelFormat(MTL::PixelFormatBGRA8Unorm);
#ifdef CEMU_DEBUG_ASSERT
renderPipelineDescriptor->setLabel(GetLabel("Present pipeline linear", renderPipelineDescriptor));
#endif
@ -138,7 +138,7 @@ MetalRenderer::MetalRenderer()
}
error = nullptr;
renderPipelineDescriptor->colorAttachments()->object(0)->setPixelFormat(MTL::PixelFormatRGBA8Unorm_sRGB);
renderPipelineDescriptor->colorAttachments()->object(0)->setPixelFormat(MTL::PixelFormatBGRA8Unorm_sRGB);
#ifdef CEMU_DEBUG_ASSERT
renderPipelineDescriptor->setLabel(GetLabel("Present pipeline sRGB", renderPipelineDescriptor));
#endif
@ -185,7 +185,9 @@ MetalRenderer::~MetalRenderer()
void MetalRenderer::InitializeLayer(const Vector2i& size, bool mainWindow)
{
GetLayer(mainWindow) = MetalLayerHandle(m_device, size);
auto& layer = GetLayer(mainWindow);
layer = MetalLayerHandle(m_device, size);
layer.GetLayer()->setPixelFormat(MTL::PixelFormatBGRA8Unorm);
}
void MetalRenderer::ResizeLayer(const Vector2i& size, bool mainWindow)
@ -240,17 +242,24 @@ void MetalRenderer::SwapBuffers(bool swapTV, bool swapDRC)
{
if (swapTV)
SwapBuffer(true);
//if (swapDRC)
// SwapBuffer(false);
if (swapDRC)
SwapBuffer(false);
// Release all the command buffers
CommitCommandBuffer();
for (uint32 i = 0; i < m_commandBuffers.size(); i++)
m_commandBuffers[i].m_commandBuffer->release();
// TODO: release
//for (uint32 i = 0; i < m_commandBuffers.size(); i++)
// m_commandBuffers[i].m_commandBuffer->release();
m_commandBuffers.clear();
// Release frame persistent buffers
m_memoryManager->GetFramePersistentBufferAllocator().ResetAllocations();
// Unlock all temporary buffers
m_memoryManager->GetTemporaryBufferAllocator().UnlockAllBuffers();
// Check for completed command buffers
m_memoryManager->GetTemporaryBufferAllocator().CheckForCompletedCommandBuffers();
}
// TODO: use `shader` for drawing
@ -381,7 +390,7 @@ ImTextureID MetalRenderer::GenerateTexture(const std::vector<uint8>& data, const
desc->setPixelFormat(MTL::PixelFormatRGBA8Unorm);
desc->setWidth(size.x);
desc->setHeight(size.y);
desc->setStorageMode(MTL::StorageModeShared);
desc->setStorageMode(m_isAppleGPU ? MTL::StorageModeShared : MTL::StorageModeManaged);
desc->setUsage(MTL::TextureUsageShaderRead);
MTL::Texture* texture = m_device->newTexture(desc);
@ -507,11 +516,13 @@ void MetalRenderer::texture_loadSlice(LatteTexture* hostTexture, sint32 width, s
auto blitCommandEncoder = GetBlitCommandEncoder();
// Allocate a temporary buffer
auto allocation = m_memoryManager->GetTemporaryBufferAllocator().GetBufferAllocation(compressedImageSize);
auto buffer = m_memoryManager->GetTemporaryBufferAllocator().GetBuffer(allocation.bufferIndex);
auto& bufferAllocator = m_memoryManager->GetTemporaryBufferAllocator();
auto allocation = bufferAllocator.GetBufferAllocation(compressedImageSize);
auto buffer = bufferAllocator.GetBuffer(allocation.bufferIndex);
// Copy the data to the temporary buffer
memcpy(allocation.data, pixelData, compressedImageSize);
//buffer->didModifyRange(NS::Range(allocation.offset, allocation.size));
// Copy the data from the temporary buffer to the texture
blitCommandEncoder->copyFromBuffer(buffer, allocation.offset, bytesPerRow, 0, MTL::Size(width, height, 1), textureMtl->GetTexture(), sliceIndex, mipIndex, MTL::Origin(0, 0, offsetZ));
@ -936,10 +947,6 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
uint32 cullBack = polygonControlReg.get_CULL_BACK();
uint32 polyOffsetFrontEnable = polygonControlReg.get_OFFSET_FRONT_ENABLED();
// TODO
//cemu_assert_debug(LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_ZCLIP_NEAR_DISABLE() == LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_ZCLIP_FAR_DISABLE()); // near or far clipping can be disabled individually
//bool zClipEnable = LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_ZCLIP_FAR_DISABLE() == false;
if (polyOffsetFrontEnable)
{
uint32 frontScaleU32 = LatteGPUState.contextNew.PA_SU_POLY_OFFSET_FRONT_SCALE.getRawValue();
@ -973,6 +980,16 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
}
}
// Depth clip mode
cemu_assert_debug(LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_ZCLIP_NEAR_DISABLE() == LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_ZCLIP_FAR_DISABLE()); // near or far clipping can be disabled individually
bool zClipEnable = LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_ZCLIP_FAR_DISABLE() == false;
if (zClipEnable != encoderState.m_depthClipEnable)
{
renderCommandEncoder->setDepthClipMode(zClipEnable ? MTL::DepthClipModeClip : MTL::DepthClipModeClamp);
encoderState.m_depthClipEnable = zClipEnable;
}
// todo - how does culling behave with rects?
// right now we just assume that their winding is always CW
if (isPrimitiveRect)
@ -1112,7 +1129,13 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
// Draw
MTL::Buffer* indexBuffer = nullptr;
if (hostIndexType != INDEX_TYPE::NONE)
indexBuffer = m_memoryManager->GetTemporaryBufferAllocator().GetBuffer(indexBufferIndex);
{
auto& bufferAllocator = m_memoryManager->GetTemporaryBufferAllocator();
indexBuffer = bufferAllocator.GetBuffer(indexBufferIndex);
// We have already retrieved the buffer, no need for it to be locked anymore
bufferAllocator.UnlockBuffer(indexBufferIndex);
}
if (usesGeometryShader)
{
if (indexBuffer)
@ -1178,18 +1201,27 @@ void MetalRenderer::draw_endSequence()
void* MetalRenderer::indexData_reserveIndexMemory(uint32 size, uint32& offset, uint32& bufferIndex)
{
auto allocation = m_memoryManager->GetTemporaryBufferAllocator().GetBufferAllocation(size);
auto& bufferAllocator = m_memoryManager->GetTemporaryBufferAllocator();
auto allocation = bufferAllocator.GetBufferAllocation(size);
offset = allocation.offset;
bufferIndex = allocation.bufferIndex;
// Lock the buffer so that it doesn't get released
bufferAllocator.LockBuffer(allocation.bufferIndex);
return allocation.data;
}
void MetalRenderer::indexData_uploadIndexMemory(uint32 bufferIndex, uint32 offset, uint32 size)
{
auto buffer = m_memoryManager->GetTemporaryBufferAllocator().GetBuffer(bufferIndex);
// Do nothing
/*
if (!HasUnifiedMemory())
{
auto buffer = m_memoryManager->GetTemporaryBufferAllocator().GetBufferOutsideOfCommandBuffer(bufferIndex);
buffer->didModifyRange(NS::Range(offset, size));
}
*/
}
void MetalRenderer::SetBuffer(MTL::RenderCommandEncoder* renderCommandEncoder, MetalShaderType shaderType, MTL::Buffer* buffer, size_t offset, uint32 index)
@ -1454,13 +1486,16 @@ void MetalRenderer::CommitCommandBuffer()
auto& commandBuffer = m_commandBuffers.back();
if (!commandBuffer.m_commited)
{
commandBuffer.m_commandBuffer->addCompletedHandler(^(MTL::CommandBuffer* cmd) {
m_memoryManager->GetTemporaryBufferAllocator().CommandBufferFinished(commandBuffer.m_commandBuffer);
});
// Handled differently, since it seems like Metal doesn't always call the completion handler
//commandBuffer.m_commandBuffer->addCompletedHandler(^(MTL::CommandBuffer*) {
// m_memoryManager->GetTemporaryBufferAllocator().CommandBufferFinished(commandBuffer.m_commandBuffer);
//});
commandBuffer.m_commandBuffer->commit();
commandBuffer.m_commited = true;
m_memoryManager->GetTemporaryBufferAllocator().SetActiveCommandBuffer(nullptr);
// Debug
//m_commandQueue->insertDebugCaptureBoundary();
}
@ -1474,7 +1509,7 @@ bool MetalRenderer::AcquireDrawable(bool mainWindow)
const bool latteBufferUsesSRGB = mainWindow ? LatteGPUState.tvBufferUsesSRGB : LatteGPUState.drcBufferUsesSRGB;
if (latteBufferUsesSRGB != m_state.m_usesSRGB)
{
layer.GetLayer()->setPixelFormat(latteBufferUsesSRGB ? MTL::PixelFormatRGBA8Unorm_sRGB : MTL::PixelFormatRGBA8Unorm);
layer.GetLayer()->setPixelFormat(latteBufferUsesSRGB ? MTL::PixelFormatBGRA8Unorm_sRGB : MTL::PixelFormatBGRA8Unorm);
m_state.m_usesSRGB = latteBufferUsesSRGB;
}
@ -1696,8 +1731,8 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE
auto supportBuffer = bufferAllocator.GetBufferAllocation(size);
memcpy(supportBuffer.data, supportBufferData, size);
auto buffer = bufferAllocator.GetBuffer(supportBuffer.bufferIndex);
if (!HasUnifiedMemory())
buffer->didModifyRange(NS::Range(supportBuffer.offset, size));
//if (!HasUnifiedMemory())
// buffer->didModifyRange(NS::Range(supportBuffer.offset, size));
SetBuffer(renderCommandEncoder, mtlShaderType, buffer, supportBuffer.offset, shader->resourceMapping.uniformVarsBufferBindingPoint);
}

View File

@ -98,6 +98,7 @@ struct MetalEncoderState
uint32 m_depthBias = 0;
uint32 m_depthSlope = 0;
uint32 m_depthClamp = 0;
bool m_depthClipEnable = true;
struct {
MTL::Buffer* m_buffer;
size_t m_offset;