From bdfac965e0c24e9bb3555f47d5e0478be74277b0 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Thu, 10 Oct 2024 18:26:14 +0200 Subject: [PATCH] implement async shader compilation --- .../HW/Latte/Renderer/Metal/MetalRenderer.cpp | 12 +- .../Renderer/Metal/RendererShaderMtl.cpp | 175 ++++++++++++++++-- .../Latte/Renderer/Metal/RendererShaderMtl.h | 34 ++-- 3 files changed, 189 insertions(+), 32 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index 0416f2d9..7cd85857 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -223,14 +223,16 @@ void MetalRenderer::ResizeLayer(const Vector2i& size, bool mainWindow) void MetalRenderer::Initialize() { Renderer::Initialize(); + RendererShaderMtl::Initialize(); } void MetalRenderer::Shutdown() { // TODO: should shutdown both layers ImGui_ImplMetal_Shutdown(); - Renderer::Shutdown(); CommitCommandBuffer(); + Renderer::Shutdown(); + RendererShaderMtl::Shutdown(); } bool MetalRenderer::IsPadWindowActive() @@ -935,13 +937,21 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 return; } + // TODO: special state 8 and 5 + auto& encoderState = m_state.m_encoderState; // Shaders LatteDecompilerShader* vertexShader = LatteSHRC_GetActiveVertexShader(); + if (vertexShader && !vertexShader->shader->IsCompiled()) + return; LatteDecompilerShader* geometryShader = LatteSHRC_GetActiveGeometryShader(); + if (geometryShader && !geometryShader->shader->IsCompiled()) + return; LatteDecompilerShader* pixelShader = LatteSHRC_GetActivePixelShader(); const auto fetchShader = LatteSHRC_GetActiveFetchShader(); + if (vertexShader && !pixelShader->shader->IsCompiled()) + return; bool neverSkipAccurateBarrier = false; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp index 4aff3e14..0f674080 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp @@ -1,6 +1,5 @@ #include "Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" -#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h" //#include "Cemu/FileCache/FileCache.h" //#include "config/ActiveSettings.h" @@ -8,31 +7,101 @@ #include "Cemu/Logging/CemuLogging.h" #include "Common/precompiled.h" #include "config/CemuConfig.h" +#include "util/helpers/helpers.h" extern std::atomic_int g_compiled_shaders_total; extern std::atomic_int g_compiled_shaders_async; -RendererShaderMtl::RendererShaderMtl(MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode) - : RendererShader(type, baseHash, auxHash, isGameShader, isGfxPackShader), m_mtlr{mtlRenderer} +class ShaderMtlThreadPool { - MTL::CompileOptions* options = MTL::CompileOptions::alloc()->init(); - if (GetConfig().fast_math) - options->setFastMathEnabled(true); +public: + void StartThreads() + { + if (m_threadsActive.exchange(true)) + return; + // create thread pool + const uint32 threadCount = 2; + for (uint32 i = 0; i < threadCount; ++i) + s_threads.emplace_back(&ShaderMtlThreadPool::CompilerThreadFunc, this); + } - NS::Error* error = nullptr; - MTL::Library* library = m_mtlr->GetDevice()->newLibrary(ToNSString(mslCode), options, &error); - options->release(); - if (error) - { - cemuLog_log(LogType::Force, "failed to create library: {} -> {}", error->localizedDescription()->utf8String(), mslCode.c_str()); - error->release(); - return; - } - m_function = library->newFunction(ToNSString("main0")); - library->release(); + void StopThreads() + { + if (!m_threadsActive.exchange(false)) + return; + for (uint32 i = 0; i < s_threads.size(); ++i) + s_compilationQueueCount.increment(); + for (auto& it : s_threads) + it.join(); + s_threads.clear(); + } - // Count shader compilation - g_compiled_shaders_total++; + ~ShaderMtlThreadPool() + { + StopThreads(); + } + + void CompilerThreadFunc() + { + SetThreadName("mtlShaderComp"); + while (m_threadsActive.load(std::memory_order::relaxed)) + { + s_compilationQueueCount.decrementWithWait(); + s_compilationQueueMutex.lock(); + if (s_compilationQueue.empty()) + { + // queue empty again, shaders compiled synchronously via PreponeCompilation() + s_compilationQueueMutex.unlock(); + continue; + } + RendererShaderMtl* job = s_compilationQueue.front(); + s_compilationQueue.pop_front(); + // set compilation state + cemu_assert_debug(job->m_compilationState.getValue() == RendererShaderMtl::COMPILATION_STATE::QUEUED); + job->m_compilationState.setValue(RendererShaderMtl::COMPILATION_STATE::COMPILING); + s_compilationQueueMutex.unlock(); + // compile + job->CompileInternal(); + ++g_compiled_shaders_async; + // mark as compiled + cemu_assert_debug(job->m_compilationState.getValue() == RendererShaderMtl::COMPILATION_STATE::COMPILING); + job->m_compilationState.setValue(RendererShaderMtl::COMPILATION_STATE::DONE); + } + } + + bool HasThreadsRunning() const { return m_threadsActive; } + +public: + std::vector s_threads; + + std::deque s_compilationQueue; + CounterSemaphore s_compilationQueueCount; + std::mutex s_compilationQueueMutex; + +private: + std::atomic m_threadsActive; +} shaderMtlThreadPool; + +void RendererShaderMtl::Initialize() +{ + shaderMtlThreadPool.StartThreads(); +} + +void RendererShaderMtl::Shutdown() +{ + shaderMtlThreadPool.StopThreads(); +} + +RendererShaderMtl::RendererShaderMtl(MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode) + : RendererShader(type, baseHash, auxHash, isGameShader, isGfxPackShader), m_mtlr{mtlRenderer}, m_mslCode{mslCode} +{ + // start async compilation + shaderMtlThreadPool.s_compilationQueueMutex.lock(); + m_compilationState.setValue(COMPILATION_STATE::QUEUED); + shaderMtlThreadPool.s_compilationQueue.push_back(this); + shaderMtlThreadPool.s_compilationQueueCount.increment(); + shaderMtlThreadPool.s_compilationQueueMutex.unlock(); + cemu_assert_debug(shaderMtlThreadPool.HasThreadsRunning()); // make sure .StartThreads() was called } RendererShaderMtl::~RendererShaderMtl() @@ -40,3 +109,71 @@ RendererShaderMtl::~RendererShaderMtl() if (m_function) m_function->release(); } + +void RendererShaderMtl::PreponeCompilation(bool isRenderThread) +{ + shaderMtlThreadPool.s_compilationQueueMutex.lock(); + bool isStillQueued = m_compilationState.hasState(COMPILATION_STATE::QUEUED); + if (isStillQueued) + { + // remove from queue + shaderMtlThreadPool.s_compilationQueue.erase(std::remove(shaderMtlThreadPool.s_compilationQueue.begin(), shaderMtlThreadPool.s_compilationQueue.end(), this), shaderMtlThreadPool.s_compilationQueue.end()); + m_compilationState.setValue(COMPILATION_STATE::COMPILING); + } + shaderMtlThreadPool.s_compilationQueueMutex.unlock(); + if (!isStillQueued) + { + m_compilationState.waitUntilValue(COMPILATION_STATE::DONE); + --g_compiled_shaders_async; // compilation caused a stall so we don't consider this one async + return; + } + else + { + // compile synchronously + CompileInternal(); + m_compilationState.setValue(COMPILATION_STATE::DONE); + } +} + +bool RendererShaderMtl::IsCompiled() +{ + return m_compilationState.hasState(COMPILATION_STATE::DONE); +}; + +bool RendererShaderMtl::WaitForCompiled() +{ + m_compilationState.waitUntilValue(COMPILATION_STATE::DONE); + return true; +} + +void RendererShaderMtl::CompileInternal() +{ + MTL::CompileOptions* options = MTL::CompileOptions::alloc()->init(); + // TODO: always disable fast math for problematic shaders + if (GetConfig().fast_math) + options->setFastMathEnabled(true); + + NS::Error* error = nullptr; + MTL::Library* library = m_mtlr->GetDevice()->newLibrary(ToNSString(m_mslCode), options, &error); + options->release(); + if (error) + { + cemuLog_log(LogType::Force, "failed to create library: {} -> {}", error->localizedDescription()->utf8String(), m_mslCode.c_str()); + error->release(); + FinishCompilation(); + return; + } + m_function = library->newFunction(ToNSString("main0")); + library->release(); + + FinishCompilation(); + + // Count shader compilation + g_compiled_shaders_total++; +} + +void RendererShaderMtl::FinishCompilation() +{ + m_mslCode.clear(); + m_mslCode.shrink_to_fit(); +} diff --git a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h index 0758b0e6..ddf72d81 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h @@ -4,20 +4,26 @@ #include "HW/Latte/Renderer/Metal/CachedFBOMtl.h" #include "HW/Latte/Renderer/Metal/MetalRenderer.h" #include "util/helpers/ConcurrentQueue.h" +#include "util/helpers/Semaphore.h" #include class RendererShaderMtl : public RendererShader { - //enum class COMPILATION_STATE : uint32 - //{ - // NONE, - // QUEUED, - // COMPILING, - // DONE - //}; + friend class ShaderMtlThreadPool; + + enum class COMPILATION_STATE : uint32 + { + NONE, + QUEUED, + COMPILING, + DONE + }; public: + static void Initialize(); + static void Shutdown(); + RendererShaderMtl(class MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode); virtual ~RendererShaderMtl(); @@ -42,15 +48,19 @@ public: cemu_assert_suspicious(); } - // TODO: implement this - void PreponeCompilation(bool isRenderThread) override {} - bool IsCompiled() override { return true; } - bool WaitForCompiled() override { return true; } + void PreponeCompilation(bool isRenderThread) override; + bool IsCompiled() override; + bool WaitForCompiled() override; private: class MetalRenderer* m_mtlr; MTL::Function* m_function = nullptr; - void Compile(const std::string& mslCode); + StateSemaphore m_compilationState{ COMPILATION_STATE::NONE }; + + std::string m_mslCode; + + void CompileInternal(); + void FinishCompilation(); };