Merge pull request #9 from SamoZ256/metal-async-pipelines

Async pipeline compilation
This commit is contained in:
SamoZ256 2024-10-29 06:57:39 +01:00 committed by GitHub
commit 228fd347e7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 143 additions and 195 deletions

View File

@ -1,25 +1,95 @@
#include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
#include "Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
#include "Cafe/HW/Latte/Core/FetchShader.h"
#include "Cafe/HW/Latte/ISA/RegDefines.h"
#include "Cafe/HW/Latte/Core/LatteConst.h"
#include "Cafe/HW/Latte/Core/LatteCachedFBO.h"
#include "Cafe/HW/Latte/Common/RegisterSerializer.h"
#include "Cafe/HW/Latte/Core/LatteShaderCache.h"
#include "Cemu/FileCache/FileCache.h"
#include "Common/precompiled.h"
#include "HW/Latte/Core/LatteShader.h"
#include "HW/Latte/ISA/LatteReg.h"
#include "HW/Latte/Renderer/Metal/LatteToMtl.h"
#include "HW/Latte/Renderer/Metal/MetalAttachmentsInfo.h"
#include "Metal/MTLRenderPipeline.hpp"
#include "Cafe/HW/Latte/Core/LatteShader.h"
#include "Cafe/HW/Latte/ISA/LatteReg.h"
#include "util/helpers/helpers.h"
#include "config/ActiveSettings.h"
#include <openssl/sha.h>
static bool g_compilePipelineThreadInit{false};
static std::mutex g_compilePipelineMutex;
static std::condition_variable g_compilePipelineCondVar;
static std::queue<MetalPipelineCompiler*> g_compilePipelineRequests;
static void compileThreadFunc(sint32 threadIndex)
{
SetThreadName("compilePl");
// one thread runs at normal priority while the others run at lower priority
if(threadIndex != 0)
; // TODO: set thread priority
while (true)
{
std::unique_lock lock(g_compilePipelineMutex);
while (g_compilePipelineRequests.empty())
g_compilePipelineCondVar.wait(lock);
MetalPipelineCompiler* request = g_compilePipelineRequests.front();
g_compilePipelineRequests.pop();
lock.unlock();
request->Compile(true, false, true);
delete request;
}
}
static void initCompileThread()
{
uint32 numCompileThreads;
uint32 cpuCoreCount = GetPhysicalCoreCount();
if (cpuCoreCount <= 2)
numCompileThreads = 1;
else
numCompileThreads = 2 + (cpuCoreCount - 3); // 2 plus one additionally for every extra core above 3
numCompileThreads = std::min(numCompileThreads, 8u); // cap at 8
for (uint32 i = 0; i < numCompileThreads; i++)
{
std::thread compileThread(compileThreadFunc, i);
compileThread.detach();
}
}
static void queuePipeline(MetalPipelineCompiler* v)
{
std::unique_lock lock(g_compilePipelineMutex);
g_compilePipelineRequests.push(std::move(v));
lock.unlock();
g_compilePipelineCondVar.notify_one();
}
// make a guess if a pipeline is not essential
// non-essential means that skipping these drawcalls shouldn't lead to permanently corrupted graphics
bool IsAsyncPipelineAllowed(const MetalAttachmentsInfo& attachmentsInfo, Vector2i extend, uint32 indexCount)
{
if (extend.x == 1600 && extend.y == 1600)
return false; // Splatoon ink mechanics use 1600x1600 R8 and R8G8 framebuffers, this resolution is rare enough that we can just blacklist it globally
if (attachmentsInfo.depthFormat != Latte::E_GX2SURFFMT::INVALID_FORMAT)
return true; // aggressive filter but seems to work well so far
// small index count (3,4,5,6) is often associated with full-viewport quads (which are considered essential due to often being used to generate persistent textures)
if (indexCount <= 6)
return false;
return true;
}
MetalPipelineCache* g_mtlPipelineCache = nullptr;
MetalPipelineCache& MetalPipelineCache::GetInstance()
@ -34,34 +104,52 @@ MetalPipelineCache::MetalPipelineCache(class MetalRenderer* metalRenderer) : m_m
MetalPipelineCache::~MetalPipelineCache()
{
for (auto& [key, value] : m_pipelineCache)
for (auto& [key, pipelineObj] : m_pipelineCache)
{
value->release();
pipelineObj->m_pipeline->release();
delete pipelineObj;
}
}
MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr)
PipelineObject* MetalPipelineCache::GetRenderPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, Vector2i extend, uint32 indexCount, const LatteContextRegister& lcr)
{
uint64 hash = CalculatePipelineHash(fetchShader, vertexShader, geometryShader, pixelShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr);
auto it = m_pipelineCache.find(hash);
if (it != m_pipelineCache.end())
return it->second;
PipelineObject*& pipelineObj = m_pipelineCache[hash];
if (pipelineObj)
return pipelineObj;
MetalPipelineCompiler compiler(m_mtlr);
pipelineObj = new PipelineObject();
MetalPipelineCompiler* compiler = new MetalPipelineCompiler(m_mtlr, *pipelineObj);
bool fbosMatch;
compiler.InitFromState(fetchShader, vertexShader, geometryShader, pixelShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr, fbosMatch);
bool attemptedCompilation = false;
MTL::RenderPipelineState* pipeline = compiler.Compile(false, true, true, attemptedCompilation);
compiler->InitFromState(fetchShader, vertexShader, geometryShader, pixelShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr, fbosMatch);
bool allowAsyncCompile = false;
if (GetConfig().async_compile)
allowAsyncCompile = IsAsyncPipelineAllowed(activeAttachmentsInfo, extend, indexCount);
if (allowAsyncCompile)
{
if (!g_compilePipelineThreadInit)
{
initCompileThread();
g_compilePipelineThreadInit = true;
}
queuePipeline(compiler);
}
else
{
// Also force compile to ensure that the pipeline is ready
cemu_assert_debug(compiler->Compile(true, true, true));
delete compiler;
}
// If FBOs don't match, it wouldn't be possible to reconstruct the pipeline from the cache
if (pipeline && fbosMatch)
if (fbosMatch)
AddCurrentStateToCache(hash);
// Place the pipeline to the cache if the compilation was at least attempted
if (attemptedCompilation)
m_pipelineCache.insert({hash, pipeline});
return pipeline;
return pipelineObj;
}
uint64 MetalPipelineCache::CalculatePipelineHash(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr)
@ -360,32 +448,24 @@ void MetalPipelineCache::LoadPipelineFromCache(std::span<uint8> fileData)
MetalAttachmentsInfo attachmentsInfo(*lcr, pixelShader);
MTL::RenderPipelineState* pipeline = nullptr;
PipelineObject* pipelineObject = new PipelineObject();
// compile
{
MetalPipelineCompiler pp(m_mtlr);
MetalPipelineCompiler pp(m_mtlr, *pipelineObject);
bool fbosMatch;
pp.InitFromState(vertexShader->compatibleFetchShader, vertexShader, geometryShader, pixelShader, attachmentsInfo, attachmentsInfo, *lcr, fbosMatch);
cemu_assert_debug(fbosMatch);
//{
// s_spinlockSharedInternal.lock();
// delete lcr;
// delete cachedPipeline;
// s_spinlockSharedInternal.unlock();
// return;
//}
bool attemptedCompilation = false;
pipeline = pp.Compile(true, true, false, attemptedCompilation);
cemu_assert_debug(attemptedCompilation);
pp.Compile(true, true, false);
// destroy pp early
}
// on success, calculate pipeline hash and flag as present in cache
if (pipeline)
// on success, cache the pipeline
if (pipelineObject->m_pipeline)
{
uint64 pipelineStateHash = CalculatePipelineHash(vertexShader->compatibleFetchShader, vertexShader, geometryShader, pixelShader, attachmentsInfo, attachmentsInfo, *lcr);
m_pipelineCacheLock.lock();
m_pipelineCache[pipelineStateHash] = pipeline;
m_pipelineCache[pipelineStateHash] = pipelineObject;
m_pipelineCacheLock.unlock();
}

View File

@ -3,8 +3,8 @@
#include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h"
#include "util/helpers/ConcurrentQueue.h"
#include "util/helpers/fspinlock.h"
#include "util/math/vector2.h"
// TODO: binary archives
class MetalPipelineCache
{
public:
@ -13,7 +13,7 @@ public:
MetalPipelineCache(class MetalRenderer* metalRenderer);
~MetalPipelineCache();
MTL::RenderPipelineState* GetRenderPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr);
PipelineObject* GetRenderPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, Vector2i extend, uint32 indexCount, const LatteContextRegister& lcr);
// Cache loading
uint32 BeginLoading(uint64 cacheTitleId); // returns count of pipelines stored in cache
@ -28,7 +28,7 @@ public:
private:
class MetalRenderer* m_mtlr;
std::map<uint64, MTL::RenderPipelineState*> m_pipelineCache;
std::map<uint64, PipelineObject*> m_pipelineCache;
FSpinlock m_pipelineCacheLock;
std::thread* m_pipelineCacheStoreThread;

View File

@ -327,7 +327,7 @@ void MetalPipelineCompiler::InitFromState(const LatteFetchShader* fetchShader, c
InitFromStateRender(fetchShader, vertexShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr, fbosMatch);
}
MTL::RenderPipelineState* MetalPipelineCompiler::Compile(bool forceCompile, bool isRenderThread, bool showInOverlay, bool& attemptedCompilation)
bool MetalPipelineCompiler::Compile(bool forceCompile, bool isRenderThread, bool showInOverlay)
{
if (forceCompile)
{
@ -343,11 +343,11 @@ MTL::RenderPipelineState* MetalPipelineCompiler::Compile(bool forceCompile, bool
{
// fail early if some shader stages are not compiled
if (m_vertexShaderMtl && !m_vertexShaderMtl->IsCompiled())
return nullptr;
return false;
if (m_geometryShaderMtl && !m_geometryShaderMtl->IsCompiled())
return nullptr;
return false;
if (m_pixelShaderMtl && !m_pixelShaderMtl->IsCompiled())
return nullptr;
return false;
}
// Compile
@ -386,7 +386,7 @@ MTL::RenderPipelineState* MetalPipelineCompiler::Compile(bool forceCompile, bool
}
auto end = std::chrono::high_resolution_clock::now();
auto creationDuration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
auto creationDuration = std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count();
if (error)
{
@ -403,10 +403,9 @@ MTL::RenderPipelineState* MetalPipelineCompiler::Compile(bool forceCompile, bool
g_compiling_pipelines++;
}
// Inform the pipeline cache that compilation was at least attempted
attemptedCompilation = true;
m_pipelineObj.m_pipeline = pipeline;
return pipeline;
return true;
}
void MetalPipelineCompiler::InitFromStateRender(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr, bool& fbosMatch)
@ -479,7 +478,6 @@ void MetalPipelineCompiler::InitFromStateRender(const LatteFetchShader* fetchSha
layout->setStride(bufferStride);
}
// TODO: don't always set the vertex descriptor?
desc->setVertexDescriptor(vertexDescriptor);
vertexDescriptor->release();
}
@ -487,62 +485,6 @@ void MetalPipelineCompiler::InitFromStateRender(const LatteFetchShader* fetchSha
SetFragmentState(desc, lastUsedAttachmentsInfo, activeAttachmentsInfo, m_rasterizationEnabled, lcr, fbosMatch);
m_pipelineDescriptor = desc;
//TryLoadBinaryArchive();
// Load binary
/*
if (m_binaryArchive)
{
NS::Object* binArchives[] = {m_binaryArchive};
auto binaryArchives = NS::Array::alloc()->init(binArchives, 1);
desc->setBinaryArchives(binaryArchives);
binaryArchives->release();
}
*/
/*
NS::Error* error = nullptr;
#ifdef CEMU_DEBUG_ASSERT
desc->setLabel(GetLabel("Cached render pipeline state", desc));
#endif
pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, MTL::PipelineOptionFailOnBinaryArchiveMiss, nullptr, &error);
// Pipeline wasn't found in the binary archive, we need to compile it
if (error)
{
desc->setBinaryArchives(nullptr);
error->release();
error = nullptr;
#ifdef CEMU_DEBUG_ASSERT
desc->setLabel(GetLabel("New render pipeline state", desc));
#endif
pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, &error);
if (error)
{
cemuLog_log(LogType::Force, "error creating render pipeline state: {}", error->localizedDescription()->utf8String());
error->release();
}
else
{
// Save binary
if (m_binaryArchive)
{
NS::Error* error = nullptr;
m_binaryArchive->addRenderPipelineFunctions(desc, &error);
if (error)
{
cemuLog_log(LogType::Force, "error saving render pipeline functions: {}", error->localizedDescription()->utf8String());
error->release();
}
}
}
}
desc->release();
return pipeline;
*/
}
void MetalPipelineCompiler::InitFromStateMesh(const LatteFetchShader* fetchShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr, bool& fbosMatch)
@ -553,77 +495,4 @@ void MetalPipelineCompiler::InitFromStateMesh(const LatteFetchShader* fetchShade
SetFragmentState(desc, lastUsedAttachmentsInfo, activeAttachmentsInfo, m_rasterizationEnabled, lcr, fbosMatch);
m_pipelineDescriptor = desc;
//TryLoadBinaryArchive();
// Load binary
// TODO: no binary archives? :(
/*
NS::Error* error = nullptr;
#ifdef CEMU_DEBUG_ASSERT
desc->setLabel(GetLabel("Mesh pipeline state", desc));
#endif
pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, MTL::PipelineOptionNone, nullptr, &error);
desc->release();
if (error)
{
cemuLog_log(LogType::Force, "error creating mesh render pipeline state: {}", error->localizedDescription()->utf8String());
error->release();
}
return pipeline;
*/
}
/*
void MetalPipelineCache::TryLoadBinaryArchive()
{
if (m_binaryArchive || s_cacheTitleId == INVALID_TITLE_ID)
return;
// GPU name
const char* deviceName1 = m_mtlr->GetDevice()->name()->utf8String();
std::string deviceName;
deviceName.assign(deviceName1);
// Replace spaces with underscores
for (auto& c : deviceName)
{
if (c == ' ')
c = '_';
}
// OS version
auto osVersion = NS::ProcessInfo::processInfo()->operatingSystemVersion();
// Precompiled binaries cannot be shared between different devices or OS versions
const std::string cacheFilename = fmt::format("{:016x}_mtl_pipelines.bin", s_cacheTitleId);
const fs::path cachePath = ActiveSettings::GetCachePath("shaderCache/precompiled/{}/{}-{}-{}/{}", deviceName, osVersion.majorVersion, osVersion.minorVersion, osVersion.patchVersion, cacheFilename);
// Create the directory if it doesn't exist
std::filesystem::create_directories(cachePath.parent_path());
m_binaryArchiveURL = NS::URL::fileURLWithPath(ToNSString((const char*)cachePath.generic_u8string().c_str()));
MTL::BinaryArchiveDescriptor* desc = MTL::BinaryArchiveDescriptor::alloc()->init();
desc->setUrl(m_binaryArchiveURL);
NS::Error* error = nullptr;
m_binaryArchive = m_mtlr->GetDevice()->newBinaryArchive(desc, &error);
if (error)
{
desc->setUrl(nullptr);
error->release();
error = nullptr;
m_binaryArchive = m_mtlr->GetDevice()->newBinaryArchive(desc, &error);
if (error)
{
cemuLog_log(LogType::Force, "failed to create binary archive: {}", error->localizedDescription()->utf8String());
error->release();
}
}
desc->release();
}
*/

View File

@ -5,18 +5,24 @@
#include "Cafe/HW/Latte/ISA/LatteReg.h"
#include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h"
struct PipelineObject
{
MTL::RenderPipelineState* m_pipeline = nullptr;
};
class MetalPipelineCompiler
{
public:
MetalPipelineCompiler(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer} {}
MetalPipelineCompiler(class MetalRenderer* metalRenderer, PipelineObject& pipelineObj) : m_mtlr{metalRenderer}, m_pipelineObj{pipelineObj} {}
~MetalPipelineCompiler();
void InitFromState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr, bool& fbosMatch);
MTL::RenderPipelineState* Compile(bool forceCompile, bool isRenderThread, bool showInOverlay, bool& attemptedCompilation);
bool Compile(bool forceCompile, bool isRenderThread, bool showInOverlay);
private:
class MetalRenderer* m_mtlr;
PipelineObject& m_pipelineObj;
class RendererShaderMtl* m_vertexShaderMtl;
class RendererShaderMtl* m_geometryShaderMtl;
@ -24,17 +30,9 @@ private:
bool m_usesGeometryShader;
bool m_rasterizationEnabled;
/*
std::map<uint64, MTL::RenderPipelineState*> m_pipelineCache;
NS::URL* m_binaryArchiveURL;
MTL::BinaryArchive* m_binaryArchive;
*/
NS::Object* m_pipelineDescriptor;
void InitFromStateRender(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr, bool& fbosMatch);
void InitFromStateMesh(const LatteFetchShader* fetchShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr, bool& fbosMatch);
//void TryLoadBinaryArchive();
};

View File

@ -23,6 +23,7 @@
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalLayerHandle.h"
#include "Cafe/HW/Latte/Renderer/Renderer.h"
#include "HW/Latte/Renderer/Metal/MetalPipelineCompiler.h"
#include "config/CemuConfig.h"
#define IMGUI_IMPL_METAL_CPP
@ -1000,14 +1001,14 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
auto renderCommandEncoder = GetRenderCommandEncoder();
// Render pipeline state
MTL::RenderPipelineState* renderPipelineState = m_pipelineCache->GetRenderPipelineState(fetchShader, vertexShader, geometryShader, pixelShader, m_state.m_lastUsedFBO.m_attachmentsInfo, m_state.m_activeFBO.m_attachmentsInfo, LatteGPUState.contextNew);
if (!renderPipelineState)
PipelineObject* pipelineObj = m_pipelineCache->GetRenderPipelineState(fetchShader, vertexShader, geometryShader, pixelShader, m_state.m_lastUsedFBO.m_attachmentsInfo, m_state.m_activeFBO.m_attachmentsInfo, m_state.m_activeFBO.m_fbo->m_size, count, LatteGPUState.contextNew);
if (!pipelineObj->m_pipeline)
return;
if (renderPipelineState != encoderState.m_renderPipelineState)
if (pipelineObj->m_pipeline != encoderState.m_renderPipelineState)
{
renderCommandEncoder->setRenderPipelineState(renderPipelineState);
encoderState.m_renderPipelineState = renderPipelineState;
renderCommandEncoder->setRenderPipelineState(pipelineObj->m_pipeline);
encoderState.m_renderPipelineState = pipelineObj->m_pipeline;
}
// Depth stencil state