Merge pull request #4 from SamoZ256/metal-occlusion-queries

Occlusion queries
This commit is contained in:
SamoZ256 2024-09-14 07:29:57 +02:00 committed by GitHub
commit e5dcd93dc3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 191 additions and 76 deletions

View File

@ -567,6 +567,8 @@ if(ENABLE_METAL)
HW/Latte/Renderer/Metal/MetalSamplerCache.h
HW/Latte/Renderer/Metal/MetalHybridComputePipeline.cpp
HW/Latte/Renderer/Metal/MetalHybridComputePipeline.h
HW/Latte/Renderer/Metal/MetalQuery.cpp
HW/Latte/Renderer/Metal/MetalQuery.h
HW/Latte/Renderer/Metal/MetalPerformanceMonitor.h
HW/Latte/Renderer/Metal/UtilityShaderSource.h
)

View File

@ -1,9 +1,9 @@
#include "Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h"
#include "HW/Latte/Renderer/Metal/LatteToMtl.h"
#include "Metal/MTLRenderPass.hpp"
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
void CachedFBOMtl::CreateRenderPass()
CachedFBOMtl::CachedFBOMtl(class MetalRenderer* metalRenderer, uint64 key) : LatteCachedFBO(key)
{
m_renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init();
@ -39,6 +39,9 @@ void CachedFBOMtl::CreateRenderPass()
stencilAttachment->setStoreAction(MTL::StoreActionStore);
}
}
// Visibility buffer
m_renderPassDescriptor->setVisibilityResultBuffer(metalRenderer->GetOcclusionQueryResultBuffer());
}
CachedFBOMtl::~CachedFBOMtl()

View File

@ -8,10 +8,7 @@
class CachedFBOMtl : public LatteCachedFBO
{
public:
CachedFBOMtl(uint64 key) : LatteCachedFBO(key)
{
CreateRenderPass();
}
CachedFBOMtl(class MetalRenderer* metalRenderer, uint64 key);
~CachedFBOMtl();
@ -22,6 +19,4 @@ public:
private:
MTL::RenderPassDescriptor* m_renderPassDescriptor = nullptr;
void CreateRenderPass();
};

View File

@ -25,6 +25,7 @@ void LatteTextureReadbackInfoMtl::StartTransfer()
blitCommandEncoder->copyFromTexture(baseTexture->GetTexture(), 0, 0, MTL::Origin{0, 0, 0}, MTL::Size{(uint32)baseTexture->width, (uint32)baseTexture->height, 1}, m_mtlr->GetTextureReadbackBuffer(), m_bufferOffset, bytesPerRow, bytesPerImage);
m_commandBuffer = m_mtlr->GetCurrentCommandBuffer();
m_mtlr->RequestSoonCommit();
}
bool LatteTextureReadbackInfoMtl::IsFinished()
@ -38,12 +39,12 @@ bool LatteTextureReadbackInfoMtl::IsFinished()
if (m_mtlr->GetCurrentCommandBuffer() == m_commandBuffer)
m_mtlr->CommitCommandBuffer();
return m_mtlr->CommandBufferCompleted(m_commandBuffer);
return CommandBufferCompleted(m_commandBuffer);
}
void LatteTextureReadbackInfoMtl::ForceFinish()
{
m_mtlr->WaitForCommandBufferCompletion(m_commandBuffer);
m_commandBuffer->waitUntilCompleted();
}
uint8* LatteTextureReadbackInfoMtl::GetData()

View File

@ -255,7 +255,7 @@ public:
auto& buffer = m_buffers[i];
for (uint32_t j = 0; j < buffer.m_data.m_commandBuffers.size(); j++)
{
if (m_mtlr->CommandBufferCompleted(buffer.m_data.m_commandBuffers[j]))
if (CommandBufferCompleted(buffer.m_data.m_commandBuffers[j]))
{
if (buffer.m_data.m_commandBuffers.size() == 1)
{

View File

@ -69,3 +69,9 @@ inline bool IsValidDepthTextureType(Latte::E_DIM dim)
{
return (dim == Latte::E_DIM::DIM_2D || dim == Latte::E_DIM::DIM_2D_MSAA || dim == Latte::E_DIM::DIM_2D_ARRAY || dim == Latte::E_DIM::DIM_2D_ARRAY_MSAA || dim == Latte::E_DIM::DIM_CUBEMAP);
}
inline bool CommandBufferCompleted(MTL::CommandBuffer* commandBuffer)
{
auto status = commandBuffer->status();
return (status == MTL::CommandBufferStatusCompleted || status == MTL::CommandBufferStatusError);
}

View File

@ -0,0 +1,41 @@
#include "Cafe/HW/Latte/Renderer/Metal/MetalQuery.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
#include "HW/Latte/Renderer/Metal/MetalCommon.h"
bool LatteQueryObjectMtl::getResult(uint64& numSamplesPassed)
{
if (!m_commandBuffer)
{
numSamplesPassed = 0;
return true;
}
if (!CommandBufferCompleted(m_commandBuffer))
return false;
numSamplesPassed = m_mtlr->GetOcclusionQueryResultsPtr()[m_queryIndex];
return true;
}
LatteQueryObjectMtl::~LatteQueryObjectMtl()
{
if (m_queryIndex != INVALID_UINT32)
m_mtlr->ReleaseOcclusionQueryIndex(m_queryIndex);
}
void LatteQueryObjectMtl::begin()
{
m_queryIndex = m_mtlr->GetAvailableOcclusionQueryIndex();
m_mtlr->SetActiveOcclusionQueryIndex(m_queryIndex);
}
void LatteQueryObjectMtl::end()
{
m_mtlr->SetActiveOcclusionQueryIndex(INVALID_UINT32);
if (m_mtlr->IsCommandBufferActive())
{
m_commandBuffer = m_mtlr->GetCurrentCommandBuffer();
m_mtlr->RequestSoonCommit();
}
}

View File

@ -0,0 +1,23 @@
#pragma once
#include "Cafe/HW/Latte/Core/LatteQueryObject.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
class LatteQueryObjectMtl : public LatteQueryObject
{
public:
LatteQueryObjectMtl(class MetalRenderer* mtlRenderer) : m_mtlr{mtlRenderer} {}
~LatteQueryObjectMtl();
bool getResult(uint64& numSamplesPassed) override;
void begin() override;
void end() override;
private:
class MetalRenderer* m_mtlr;
uint32 m_queryIndex = INVALID_UINT32;
// TODO: make this a list of command buffers
MTL::CommandBuffer* m_commandBuffer = nullptr;
};

View File

@ -9,6 +9,7 @@
#include "Cafe/HW/Latte/Renderer/Metal/MetalSamplerCache.h"
#include "Cafe/HW/Latte/Renderer/Metal/LatteTextureReadbackMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalHybridComputePipeline.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalQuery.h"
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/UtilityShaderSource.h"
@ -20,12 +21,14 @@
#include "HW/Latte/Renderer/Metal/MetalCommon.h"
#include "HW/Latte/Renderer/Metal/MetalLayerHandle.h"
#include "HW/Latte/Renderer/Renderer.h"
#include "Metal/MTLRenderCommandEncoder.hpp"
#define IMGUI_IMPL_METAL_CPP
#include "imgui/imgui_extension.h"
#include "imgui/imgui_impl_metal.h"
#define COMMIT_TRESHOLD 256
#define DEFAULT_COMMIT_TRESHOLD 256
#define OCCLUSION_QUERY_POOL_SIZE 1024
extern bool hasValidFramebufferAttached;
@ -93,6 +96,17 @@ MetalRenderer::MetalRenderer()
m_xfbRingBuffer->setLabel(GetLabel("Transform feedback buffer", m_xfbRingBuffer));
#endif
// Occlusion queries
m_occlusionQuery.m_resultBuffer = m_device->newBuffer(OCCLUSION_QUERY_POOL_SIZE * sizeof(uint64), MTL::ResourceStorageModeShared);
#ifdef CEMU_DEBUG_ASSERT
m_occlusionQuery.m_resultBuffer->setLabel(GetLabel("Occlusion query result buffer", m_occlusionQuery.m_resultBuffer));
#endif
m_occlusionQuery.m_resultsPtr = (uint64*)m_occlusionQuery.m_resultBuffer->contents();
m_occlusionQuery.m_availableIndices.reserve(OCCLUSION_QUERY_POOL_SIZE);
for (uint32 i = 0; i < OCCLUSION_QUERY_POOL_SIZE; i++)
m_occlusionQuery.m_availableIndices.push_back(i);
// Initialize state
for (uint32 i = 0; i < METAL_SHADER_TYPE_TOTAL; i++)
{
@ -320,7 +334,7 @@ void MetalRenderer::Flush(bool waitIdle)
{
cemu_assert_debug(commandBuffer.m_commited);
WaitForCommandBufferCompletion(commandBuffer.m_commandBuffer);
commandBuffer.m_commandBuffer->waitUntilCompleted();
}
}
}
@ -466,7 +480,7 @@ void MetalRenderer::renderTarget_setScissor(sint32 scissorX, sint32 scissorY, si
LatteCachedFBO* MetalRenderer::rendertarget_createCachedFBO(uint64 key)
{
return new CachedFBOMtl(key);
return new CachedFBOMtl(this, key);
}
void MetalRenderer::rendertarget_deleteCachedFBO(LatteCachedFBO* cfbo)
@ -1041,6 +1055,14 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
encoderState.m_depthClipEnable = zClipEnable;
}
// Visibility result mode
if (m_occlusionQuery.m_activeIndex != encoderState.m_visibilityResultOffset)
{
auto mode = (m_occlusionQuery.m_activeIndex == INVALID_UINT32 ? MTL::VisibilityResultModeDisabled : MTL::VisibilityResultModeCounting);
renderCommandEncoder->setVisibilityResultMode(mode, m_occlusionQuery.m_activeIndex * sizeof(uint64));
encoderState.m_visibilityResultOffset = m_occlusionQuery.m_activeIndex;
}
// todo - how does culling behave with rects?
// right now we just assume that their winding is always CW
if (isPrimitiveRect)
@ -1248,7 +1270,8 @@ void MetalRenderer::draw_endSequence()
bool hasReadback = LatteTextureReadback_Update();
m_recordedDrawcalls++;
// The number of draw calls needs to twice as big, since we are interrupting the render pass
if (m_recordedDrawcalls >= COMMIT_TRESHOLD * 2 || hasReadback)
// TODO: ucomment?
if (m_recordedDrawcalls >= m_commitTreshold * 2/* || hasReadback*/)
{
CommitCommandBuffer();
@ -1282,6 +1305,23 @@ void MetalRenderer::indexData_uploadIndexMemory(uint32 bufferIndex, uint32 offse
*/
}
LatteQueryObject* MetalRenderer::occlusionQuery_create() {
return new LatteQueryObjectMtl(this);
}
void MetalRenderer::occlusionQuery_destroy(LatteQueryObject* queryObj) {
auto queryObjMtl = static_cast<LatteQueryObjectMtl*>(queryObj);
delete queryObjMtl;
}
void MetalRenderer::occlusionQuery_flush() {
// TODO: implement
}
void MetalRenderer::occlusionQuery_updateState() {
// TODO: implement
}
void MetalRenderer::SetBuffer(MTL::RenderCommandEncoder* renderCommandEncoder, MetalShaderType shaderType, MTL::Buffer* buffer, size_t offset, uint32 index)
{
auto& boundBuffer = m_state.m_encoderState.m_buffers[shaderType][index];
@ -1370,6 +1410,9 @@ MTL::CommandBuffer* MetalRenderer::GetCommandBuffer()
MTL::CommandBuffer* mtlCommandBuffer = m_commandQueue->commandBuffer();
m_commandBuffers.push_back({mtlCommandBuffer});
m_recordedDrawcalls = 0;
m_commitTreshold = DEFAULT_COMMIT_TRESHOLD;
// Notify memory manager about the new command buffer
m_memoryManager->GetTemporaryBufferAllocator().SetActiveCommandBuffer(mtlCommandBuffer);
@ -1381,17 +1424,6 @@ MTL::CommandBuffer* MetalRenderer::GetCommandBuffer()
}
}
bool MetalRenderer::CommandBufferCompleted(MTL::CommandBuffer* commandBuffer)
{
auto status = commandBuffer->status();
return (status == MTL::CommandBufferStatusCompleted || status == MTL::CommandBufferStatusError);
}
void MetalRenderer::WaitForCommandBufferCompletion(MTL::CommandBuffer* commandBuffer)
{
commandBuffer->waitUntilCompleted();
}
MTL::RenderCommandEncoder* MetalRenderer::GetTemporaryRenderCommandEncoder(MTL::RenderPassDescriptor* renderPassDescriptor)
{
EndEncoding();
@ -1529,15 +1561,13 @@ void MetalRenderer::EndEncoding()
m_encoderType = MetalEncoderType::None;
// Commit the command buffer if enough draw calls have been recorded
if (m_recordedDrawcalls >= COMMIT_TRESHOLD)
if (m_recordedDrawcalls >= m_commitTreshold)
CommitCommandBuffer();
}
}
void MetalRenderer::CommitCommandBuffer()
{
m_recordedDrawcalls = 0;
if (m_commandBuffers.size() != 0)
{
EndEncoding();

View File

@ -99,6 +99,7 @@ struct MetalEncoderState
uint32 m_depthSlope = 0;
uint32 m_depthClamp = 0;
bool m_depthClipEnable = true;
uint32 m_visibilityResultOffset = INVALID_UINT32;
struct {
MTL::Buffer* m_buffer;
size_t m_offset;
@ -155,32 +156,6 @@ enum class MetalEncoderType
Blit,
};
// HACK: Dummy occlusion query object for Metal
class LatteQueryObjectMtl : public LatteQueryObject
{
public:
LatteQueryObjectMtl(class MetalRenderer* mtlRenderer) : m_mtlr{mtlRenderer} {}
bool getResult(uint64& numSamplesPassed) override
{
cemuLog_log(LogType::MetalLogging, "LatteQueryObjectMtl::getResult: occlusion queries are not yet supported on Metal");
return true;
}
void begin() override
{
cemuLog_log(LogType::MetalLogging, "LatteQueryObjectMtl::begin: occlusion queries are not yet supported on Metal");
}
void end() override
{
cemuLog_log(LogType::MetalLogging, "LatteQueryObjectMtl::end: occlusion queries are not yet supported on Metal");
}
private:
class MetalRenderer* m_mtlr;
};
class MetalRenderer : public Renderer
{
public:
@ -296,27 +271,19 @@ public:
void indexData_uploadIndexMemory(uint32 bufferIndex, uint32 offset, uint32 size) override;
// occlusion queries
LatteQueryObject* occlusionQuery_create() override {
cemuLog_log(LogType::MetalLogging, "MetalRenderer::occlusionQuery_create: Occlusion queries are not yet supported on Metal");
return new LatteQueryObjectMtl(this);
}
void occlusionQuery_destroy(LatteQueryObject* queryObj) override {
cemuLog_log(LogType::MetalLogging, "MetalRenderer::occlusionQuery_destroy: occlusion queries are not yet supported on Metal");
}
void occlusionQuery_flush() override {
cemuLog_log(LogType::MetalLogging, "MetalRenderer::occlusionQuery_flush: occlusion queries are not yet supported on Metal");
}
void occlusionQuery_updateState() override {
cemuLog_log(LogType::MetalLogging, "MetalRenderer::occlusionQuery_updateState: occlusion queries are not yet supported on Metal");
}
LatteQueryObject* occlusionQuery_create() override;
void occlusionQuery_destroy(LatteQueryObject* queryObj) override;
void occlusionQuery_flush() override;
void occlusionQuery_updateState() override;
// Helpers
MetalPerformanceMonitor& GetPerformanceMonitor() { return m_performanceMonitor; }
bool IsCommandBufferActive() const
{
return (m_commandBuffers.size() != 0);
}
MTL::CommandBuffer* GetCurrentCommandBuffer()
{
cemu_assert_debug(m_commandBuffers.size() != 0);
@ -324,6 +291,11 @@ public:
return m_commandBuffers[m_commandBuffers.size() - 1].m_commandBuffer;
}
void RequestSoonCommit()
{
m_commitTreshold = m_recordedDrawcalls + 8;
}
MTL::CommandEncoder* GetCommandEncoder()
{
return m_commandEncoder;
@ -361,8 +333,6 @@ public:
void SetSamplerState(MTL::RenderCommandEncoder* renderCommandEncoder, MetalShaderType shaderType, MTL::SamplerState* samplerState, uint32 index);
MTL::CommandBuffer* GetCommandBuffer();
bool CommandBufferCompleted(MTL::CommandBuffer* commandBuffer);
void WaitForCommandBufferCompletion(MTL::CommandBuffer* commandBuffer);
MTL::RenderCommandEncoder* GetTemporaryRenderCommandEncoder(MTL::RenderPassDescriptor* renderPassDescriptor);
MTL::RenderCommandEncoder* GetRenderCommandEncoder(bool forceRecreate = false);
MTL::ComputeCommandEncoder* GetComputeCommandEncoder();
@ -415,6 +385,40 @@ public:
return m_readbackBuffer;
}
MTL::Buffer* GetOcclusionQueryResultBuffer() const
{
return m_occlusionQuery.m_resultBuffer;
}
uint64* GetOcclusionQueryResultsPtr()
{
return m_occlusionQuery.m_resultsPtr;
}
uint32 GetAvailableOcclusionQueryIndex()
{
if (m_occlusionQuery.m_availableIndices.empty())
{
cemuLog_log(LogType::Force, "No occlusion query index available");
return 0;
}
uint32 queryIndex = m_occlusionQuery.m_availableIndices.back();
m_occlusionQuery.m_availableIndices.pop_back();
return queryIndex;
}
void ReleaseOcclusionQueryIndex(uint32 queryIndex)
{
m_occlusionQuery.m_availableIndices.push_back(queryIndex);
}
void SetActiveOcclusionQueryIndex(uint32 queryIndex)
{
m_occlusionQuery.m_activeIndex = queryIndex;
}
private:
MetalLayerHandle m_mainLayer;
MetalLayerHandle m_padLayer;
@ -462,12 +466,22 @@ private:
// Transform feedback
MTL::Buffer* m_xfbRingBuffer;
// Occlusion queries
struct
{
MTL::Buffer* m_resultBuffer;
uint64* m_resultsPtr;
std::vector<uint32> m_availableIndices;
uint32 m_activeIndex = INVALID_UINT32;
} m_occlusionQuery;
// Active objects
std::vector<MetalCommandBuffer> m_commandBuffers;
MetalEncoderType m_encoderType = MetalEncoderType::None;
MTL::CommandEncoder* m_commandEncoder = nullptr;
uint32 m_recordedDrawcalls = 0;
uint32 m_recordedDrawcalls;
uint32 m_commitTreshold;
// State
MetalState m_state;