diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp index 9e49959c..101b6d68 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp @@ -1,25 +1,95 @@ #include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" -#include "Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h" -#include "Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h" +#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h" #include "Cafe/HW/Latte/Core/FetchShader.h" #include "Cafe/HW/Latte/ISA/RegDefines.h" #include "Cafe/HW/Latte/Core/LatteConst.h" -#include "Cafe/HW/Latte/Core/LatteCachedFBO.h" #include "Cafe/HW/Latte/Common/RegisterSerializer.h" #include "Cafe/HW/Latte/Core/LatteShaderCache.h" #include "Cemu/FileCache/FileCache.h" #include "Common/precompiled.h" -#include "HW/Latte/Core/LatteShader.h" -#include "HW/Latte/ISA/LatteReg.h" -#include "HW/Latte/Renderer/Metal/LatteToMtl.h" -#include "HW/Latte/Renderer/Metal/MetalAttachmentsInfo.h" -#include "Metal/MTLRenderPipeline.hpp" +#include "Cafe/HW/Latte/Core/LatteShader.h" +#include "Cafe/HW/Latte/ISA/LatteReg.h" #include "util/helpers/helpers.h" #include "config/ActiveSettings.h" + #include +static bool g_compilePipelineThreadInit{false}; +static std::mutex g_compilePipelineMutex; +static std::condition_variable g_compilePipelineCondVar; +static std::queue g_compilePipelineRequests; + +static void compileThreadFunc(sint32 threadIndex) +{ + SetThreadName("compilePl"); + + // one thread runs at normal priority while the others run at lower priority + if(threadIndex != 0) + ; // TODO: set thread priority + + while (true) + { + std::unique_lock lock(g_compilePipelineMutex); + while (g_compilePipelineRequests.empty()) + g_compilePipelineCondVar.wait(lock); + + MetalPipelineCompiler* request = g_compilePipelineRequests.front(); + + g_compilePipelineRequests.pop(); + + lock.unlock(); + + request->Compile(true, false, true); + delete request; + } +} + +static void initCompileThread() +{ + uint32 numCompileThreads; + + uint32 cpuCoreCount = GetPhysicalCoreCount(); + if (cpuCoreCount <= 2) + numCompileThreads = 1; + else + numCompileThreads = 2 + (cpuCoreCount - 3); // 2 plus one additionally for every extra core above 3 + + numCompileThreads = std::min(numCompileThreads, 8u); // cap at 8 + + for (uint32 i = 0; i < numCompileThreads; i++) + { + std::thread compileThread(compileThreadFunc, i); + compileThread.detach(); + } +} + +static void queuePipeline(MetalPipelineCompiler* v) +{ + std::unique_lock lock(g_compilePipelineMutex); + g_compilePipelineRequests.push(std::move(v)); + lock.unlock(); + g_compilePipelineCondVar.notify_one(); +} + +// make a guess if a pipeline is not essential +// non-essential means that skipping these drawcalls shouldn't lead to permanently corrupted graphics +bool IsAsyncPipelineAllowed(const MetalAttachmentsInfo& attachmentsInfo, Vector2i extend, uint32 indexCount) +{ + if (extend.x == 1600 && extend.y == 1600) + return false; // Splatoon ink mechanics use 1600x1600 R8 and R8G8 framebuffers, this resolution is rare enough that we can just blacklist it globally + + if (attachmentsInfo.depthFormat != Latte::E_GX2SURFFMT::INVALID_FORMAT) + return true; // aggressive filter but seems to work well so far + + // small index count (3,4,5,6) is often associated with full-viewport quads (which are considered essential due to often being used to generate persistent textures) + if (indexCount <= 6) + return false; + + return true; +} + MetalPipelineCache* g_mtlPipelineCache = nullptr; MetalPipelineCache& MetalPipelineCache::GetInstance() @@ -34,34 +104,52 @@ MetalPipelineCache::MetalPipelineCache(class MetalRenderer* metalRenderer) : m_m MetalPipelineCache::~MetalPipelineCache() { - for (auto& [key, value] : m_pipelineCache) + for (auto& [key, pipelineObj] : m_pipelineCache) { - value->release(); + pipelineObj->m_pipeline->release(); + delete pipelineObj; } } -MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr) +PipelineObject* MetalPipelineCache::GetRenderPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, Vector2i extend, uint32 indexCount, const LatteContextRegister& lcr) { uint64 hash = CalculatePipelineHash(fetchShader, vertexShader, geometryShader, pixelShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr); - auto it = m_pipelineCache.find(hash); - if (it != m_pipelineCache.end()) - return it->second; + PipelineObject*& pipelineObj = m_pipelineCache[hash]; + if (pipelineObj) + return pipelineObj; - MetalPipelineCompiler compiler(m_mtlr); + pipelineObj = new PipelineObject(); + + MetalPipelineCompiler* compiler = new MetalPipelineCompiler(m_mtlr, *pipelineObj); bool fbosMatch; - compiler.InitFromState(fetchShader, vertexShader, geometryShader, pixelShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr, fbosMatch); - bool attemptedCompilation = false; - MTL::RenderPipelineState* pipeline = compiler.Compile(false, true, true, attemptedCompilation); + compiler->InitFromState(fetchShader, vertexShader, geometryShader, pixelShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr, fbosMatch); + + bool allowAsyncCompile = false; + if (GetConfig().async_compile) + allowAsyncCompile = IsAsyncPipelineAllowed(activeAttachmentsInfo, extend, indexCount); + + if (allowAsyncCompile) + { + if (!g_compilePipelineThreadInit) + { + initCompileThread(); + g_compilePipelineThreadInit = true; + } + + queuePipeline(compiler); + } + else + { + // Also force compile to ensure that the pipeline is ready + cemu_assert_debug(compiler->Compile(true, true, true)); + delete compiler; + } // If FBOs don't match, it wouldn't be possible to reconstruct the pipeline from the cache - if (pipeline && fbosMatch) + if (fbosMatch) AddCurrentStateToCache(hash); - // Place the pipeline to the cache if the compilation was at least attempted - if (attemptedCompilation) - m_pipelineCache.insert({hash, pipeline}); - - return pipeline; + return pipelineObj; } uint64 MetalPipelineCache::CalculatePipelineHash(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr) @@ -360,32 +448,24 @@ void MetalPipelineCache::LoadPipelineFromCache(std::span fileData) MetalAttachmentsInfo attachmentsInfo(*lcr, pixelShader); - MTL::RenderPipelineState* pipeline = nullptr; + PipelineObject* pipelineObject = new PipelineObject(); + // compile { - MetalPipelineCompiler pp(m_mtlr); + MetalPipelineCompiler pp(m_mtlr, *pipelineObject); bool fbosMatch; pp.InitFromState(vertexShader->compatibleFetchShader, vertexShader, geometryShader, pixelShader, attachmentsInfo, attachmentsInfo, *lcr, fbosMatch); cemu_assert_debug(fbosMatch); - //{ - // s_spinlockSharedInternal.lock(); - // delete lcr; - // delete cachedPipeline; - // s_spinlockSharedInternal.unlock(); - // return; - //} - bool attemptedCompilation = false; - pipeline = pp.Compile(true, true, false, attemptedCompilation); - cemu_assert_debug(attemptedCompilation); + pp.Compile(true, true, false); // destroy pp early } - // on success, calculate pipeline hash and flag as present in cache - if (pipeline) + // on success, cache the pipeline + if (pipelineObject->m_pipeline) { uint64 pipelineStateHash = CalculatePipelineHash(vertexShader->compatibleFetchShader, vertexShader, geometryShader, pixelShader, attachmentsInfo, attachmentsInfo, *lcr); m_pipelineCacheLock.lock(); - m_pipelineCache[pipelineStateHash] = pipeline; + m_pipelineCache[pipelineStateHash] = pipelineObject; m_pipelineCacheLock.unlock(); } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h index be26bdee..d49ec6a2 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h @@ -3,8 +3,8 @@ #include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h" #include "util/helpers/ConcurrentQueue.h" #include "util/helpers/fspinlock.h" +#include "util/math/vector2.h" -// TODO: binary archives class MetalPipelineCache { public: @@ -13,7 +13,7 @@ public: MetalPipelineCache(class MetalRenderer* metalRenderer); ~MetalPipelineCache(); - MTL::RenderPipelineState* GetRenderPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr); + PipelineObject* GetRenderPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, Vector2i extend, uint32 indexCount, const LatteContextRegister& lcr); // Cache loading uint32 BeginLoading(uint64 cacheTitleId); // returns count of pipelines stored in cache @@ -28,7 +28,7 @@ public: private: class MetalRenderer* m_mtlr; - std::map m_pipelineCache; + std::map m_pipelineCache; FSpinlock m_pipelineCacheLock; std::thread* m_pipelineCacheStoreThread; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp index a8bce291..611d190d 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp @@ -327,7 +327,7 @@ void MetalPipelineCompiler::InitFromState(const LatteFetchShader* fetchShader, c InitFromStateRender(fetchShader, vertexShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr, fbosMatch); } -MTL::RenderPipelineState* MetalPipelineCompiler::Compile(bool forceCompile, bool isRenderThread, bool showInOverlay, bool& attemptedCompilation) +bool MetalPipelineCompiler::Compile(bool forceCompile, bool isRenderThread, bool showInOverlay) { if (forceCompile) { @@ -343,11 +343,11 @@ MTL::RenderPipelineState* MetalPipelineCompiler::Compile(bool forceCompile, bool { // fail early if some shader stages are not compiled if (m_vertexShaderMtl && !m_vertexShaderMtl->IsCompiled()) - return nullptr; + return false; if (m_geometryShaderMtl && !m_geometryShaderMtl->IsCompiled()) - return nullptr; + return false; if (m_pixelShaderMtl && !m_pixelShaderMtl->IsCompiled()) - return nullptr; + return false; } // Compile @@ -386,7 +386,7 @@ MTL::RenderPipelineState* MetalPipelineCompiler::Compile(bool forceCompile, bool } auto end = std::chrono::high_resolution_clock::now(); - auto creationDuration = std::chrono::duration_cast(end - start).count(); + auto creationDuration = std::chrono::duration_cast(end - start).count(); if (error) { @@ -403,10 +403,9 @@ MTL::RenderPipelineState* MetalPipelineCompiler::Compile(bool forceCompile, bool g_compiling_pipelines++; } - // Inform the pipeline cache that compilation was at least attempted - attemptedCompilation = true; + m_pipelineObj.m_pipeline = pipeline; - return pipeline; + return true; } void MetalPipelineCompiler::InitFromStateRender(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr, bool& fbosMatch) @@ -479,7 +478,6 @@ void MetalPipelineCompiler::InitFromStateRender(const LatteFetchShader* fetchSha layout->setStride(bufferStride); } - // TODO: don't always set the vertex descriptor? desc->setVertexDescriptor(vertexDescriptor); vertexDescriptor->release(); } @@ -487,62 +485,6 @@ void MetalPipelineCompiler::InitFromStateRender(const LatteFetchShader* fetchSha SetFragmentState(desc, lastUsedAttachmentsInfo, activeAttachmentsInfo, m_rasterizationEnabled, lcr, fbosMatch); m_pipelineDescriptor = desc; - - //TryLoadBinaryArchive(); - - // Load binary - /* - if (m_binaryArchive) - { - NS::Object* binArchives[] = {m_binaryArchive}; - auto binaryArchives = NS::Array::alloc()->init(binArchives, 1); - desc->setBinaryArchives(binaryArchives); - binaryArchives->release(); - } - */ - - /* - NS::Error* error = nullptr; -#ifdef CEMU_DEBUG_ASSERT - desc->setLabel(GetLabel("Cached render pipeline state", desc)); -#endif - pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, MTL::PipelineOptionFailOnBinaryArchiveMiss, nullptr, &error); - - // Pipeline wasn't found in the binary archive, we need to compile it - if (error) - { - desc->setBinaryArchives(nullptr); - - error->release(); - error = nullptr; -#ifdef CEMU_DEBUG_ASSERT - desc->setLabel(GetLabel("New render pipeline state", desc)); -#endif - pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, &error); - if (error) - { - cemuLog_log(LogType::Force, "error creating render pipeline state: {}", error->localizedDescription()->utf8String()); - error->release(); - } - else - { - // Save binary - if (m_binaryArchive) - { - NS::Error* error = nullptr; - m_binaryArchive->addRenderPipelineFunctions(desc, &error); - if (error) - { - cemuLog_log(LogType::Force, "error saving render pipeline functions: {}", error->localizedDescription()->utf8String()); - error->release(); - } - } - } - } - desc->release(); - - return pipeline; - */ } void MetalPipelineCompiler::InitFromStateMesh(const LatteFetchShader* fetchShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr, bool& fbosMatch) @@ -553,77 +495,4 @@ void MetalPipelineCompiler::InitFromStateMesh(const LatteFetchShader* fetchShade SetFragmentState(desc, lastUsedAttachmentsInfo, activeAttachmentsInfo, m_rasterizationEnabled, lcr, fbosMatch); m_pipelineDescriptor = desc; - - //TryLoadBinaryArchive(); - - // Load binary - // TODO: no binary archives? :( - - /* - NS::Error* error = nullptr; -#ifdef CEMU_DEBUG_ASSERT - desc->setLabel(GetLabel("Mesh pipeline state", desc)); -#endif - pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, MTL::PipelineOptionNone, nullptr, &error); - desc->release(); - if (error) - { - cemuLog_log(LogType::Force, "error creating mesh render pipeline state: {}", error->localizedDescription()->utf8String()); - error->release(); - } - - return pipeline; - */ } - -/* -void MetalPipelineCache::TryLoadBinaryArchive() -{ - if (m_binaryArchive || s_cacheTitleId == INVALID_TITLE_ID) - return; - - // GPU name - const char* deviceName1 = m_mtlr->GetDevice()->name()->utf8String(); - std::string deviceName; - deviceName.assign(deviceName1); - - // Replace spaces with underscores - for (auto& c : deviceName) - { - if (c == ' ') - c = '_'; - } - - // OS version - auto osVersion = NS::ProcessInfo::processInfo()->operatingSystemVersion(); - - // Precompiled binaries cannot be shared between different devices or OS versions - const std::string cacheFilename = fmt::format("{:016x}_mtl_pipelines.bin", s_cacheTitleId); - const fs::path cachePath = ActiveSettings::GetCachePath("shaderCache/precompiled/{}/{}-{}-{}/{}", deviceName, osVersion.majorVersion, osVersion.minorVersion, osVersion.patchVersion, cacheFilename); - - // Create the directory if it doesn't exist - std::filesystem::create_directories(cachePath.parent_path()); - - m_binaryArchiveURL = NS::URL::fileURLWithPath(ToNSString((const char*)cachePath.generic_u8string().c_str())); - - MTL::BinaryArchiveDescriptor* desc = MTL::BinaryArchiveDescriptor::alloc()->init(); - desc->setUrl(m_binaryArchiveURL); - - NS::Error* error = nullptr; - m_binaryArchive = m_mtlr->GetDevice()->newBinaryArchive(desc, &error); - if (error) - { - desc->setUrl(nullptr); - - error->release(); - error = nullptr; - m_binaryArchive = m_mtlr->GetDevice()->newBinaryArchive(desc, &error); - if (error) - { - cemuLog_log(LogType::Force, "failed to create binary archive: {}", error->localizedDescription()->utf8String()); - error->release(); - } - } - desc->release(); -} -*/ diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h index e4067555..d762d802 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h @@ -5,18 +5,24 @@ #include "Cafe/HW/Latte/ISA/LatteReg.h" #include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h" +struct PipelineObject +{ + MTL::RenderPipelineState* m_pipeline = nullptr; +}; + class MetalPipelineCompiler { public: - MetalPipelineCompiler(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer} {} + MetalPipelineCompiler(class MetalRenderer* metalRenderer, PipelineObject& pipelineObj) : m_mtlr{metalRenderer}, m_pipelineObj{pipelineObj} {} ~MetalPipelineCompiler(); void InitFromState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr, bool& fbosMatch); - MTL::RenderPipelineState* Compile(bool forceCompile, bool isRenderThread, bool showInOverlay, bool& attemptedCompilation); + bool Compile(bool forceCompile, bool isRenderThread, bool showInOverlay); private: class MetalRenderer* m_mtlr; + PipelineObject& m_pipelineObj; class RendererShaderMtl* m_vertexShaderMtl; class RendererShaderMtl* m_geometryShaderMtl; @@ -24,17 +30,9 @@ private: bool m_usesGeometryShader; bool m_rasterizationEnabled; - /* - std::map m_pipelineCache; - - NS::URL* m_binaryArchiveURL; - MTL::BinaryArchive* m_binaryArchive; - */ NS::Object* m_pipelineDescriptor; void InitFromStateRender(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr, bool& fbosMatch); void InitFromStateMesh(const LatteFetchShader* fetchShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr, bool& fbosMatch); - - //void TryLoadBinaryArchive(); }; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index e560c2c3..dc4244ec 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -23,6 +23,7 @@ #include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalLayerHandle.h" #include "Cafe/HW/Latte/Renderer/Renderer.h" +#include "HW/Latte/Renderer/Metal/MetalPipelineCompiler.h" #include "config/CemuConfig.h" #define IMGUI_IMPL_METAL_CPP @@ -1000,14 +1001,14 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 auto renderCommandEncoder = GetRenderCommandEncoder(); // Render pipeline state - MTL::RenderPipelineState* renderPipelineState = m_pipelineCache->GetRenderPipelineState(fetchShader, vertexShader, geometryShader, pixelShader, m_state.m_lastUsedFBO.m_attachmentsInfo, m_state.m_activeFBO.m_attachmentsInfo, LatteGPUState.contextNew); - if (!renderPipelineState) + PipelineObject* pipelineObj = m_pipelineCache->GetRenderPipelineState(fetchShader, vertexShader, geometryShader, pixelShader, m_state.m_lastUsedFBO.m_attachmentsInfo, m_state.m_activeFBO.m_attachmentsInfo, m_state.m_activeFBO.m_fbo->m_size, count, LatteGPUState.contextNew); + if (!pipelineObj->m_pipeline) return; - if (renderPipelineState != encoderState.m_renderPipelineState) + if (pipelineObj->m_pipeline != encoderState.m_renderPipelineState) { - renderCommandEncoder->setRenderPipelineState(renderPipelineState); - encoderState.m_renderPipelineState = renderPipelineState; + renderCommandEncoder->setRenderPipelineState(pipelineObj->m_pipeline); + encoderState.m_renderPipelineState = pipelineObj->m_pipeline; } // Depth stencil state