From bdfac965e0c24e9bb3555f47d5e0478be74277b0 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Thu, 10 Oct 2024 18:26:14 +0200 Subject: [PATCH 1/3] implement async shader compilation --- .../HW/Latte/Renderer/Metal/MetalRenderer.cpp | 12 +- .../Renderer/Metal/RendererShaderMtl.cpp | 175 ++++++++++++++++-- .../Latte/Renderer/Metal/RendererShaderMtl.h | 34 ++-- 3 files changed, 189 insertions(+), 32 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index 0416f2d9..7cd85857 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -223,14 +223,16 @@ void MetalRenderer::ResizeLayer(const Vector2i& size, bool mainWindow) void MetalRenderer::Initialize() { Renderer::Initialize(); + RendererShaderMtl::Initialize(); } void MetalRenderer::Shutdown() { // TODO: should shutdown both layers ImGui_ImplMetal_Shutdown(); - Renderer::Shutdown(); CommitCommandBuffer(); + Renderer::Shutdown(); + RendererShaderMtl::Shutdown(); } bool MetalRenderer::IsPadWindowActive() @@ -935,13 +937,21 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 return; } + // TODO: special state 8 and 5 + auto& encoderState = m_state.m_encoderState; // Shaders LatteDecompilerShader* vertexShader = LatteSHRC_GetActiveVertexShader(); + if (vertexShader && !vertexShader->shader->IsCompiled()) + return; LatteDecompilerShader* geometryShader = LatteSHRC_GetActiveGeometryShader(); + if (geometryShader && !geometryShader->shader->IsCompiled()) + return; LatteDecompilerShader* pixelShader = LatteSHRC_GetActivePixelShader(); const auto fetchShader = LatteSHRC_GetActiveFetchShader(); + if (vertexShader && !pixelShader->shader->IsCompiled()) + return; bool neverSkipAccurateBarrier = false; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp index 4aff3e14..0f674080 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp @@ -1,6 +1,5 @@ #include "Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" -#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h" //#include "Cemu/FileCache/FileCache.h" //#include "config/ActiveSettings.h" @@ -8,31 +7,101 @@ #include "Cemu/Logging/CemuLogging.h" #include "Common/precompiled.h" #include "config/CemuConfig.h" +#include "util/helpers/helpers.h" extern std::atomic_int g_compiled_shaders_total; extern std::atomic_int g_compiled_shaders_async; -RendererShaderMtl::RendererShaderMtl(MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode) - : RendererShader(type, baseHash, auxHash, isGameShader, isGfxPackShader), m_mtlr{mtlRenderer} +class ShaderMtlThreadPool { - MTL::CompileOptions* options = MTL::CompileOptions::alloc()->init(); - if (GetConfig().fast_math) - options->setFastMathEnabled(true); +public: + void StartThreads() + { + if (m_threadsActive.exchange(true)) + return; + // create thread pool + const uint32 threadCount = 2; + for (uint32 i = 0; i < threadCount; ++i) + s_threads.emplace_back(&ShaderMtlThreadPool::CompilerThreadFunc, this); + } - NS::Error* error = nullptr; - MTL::Library* library = m_mtlr->GetDevice()->newLibrary(ToNSString(mslCode), options, &error); - options->release(); - if (error) - { - cemuLog_log(LogType::Force, "failed to create library: {} -> {}", error->localizedDescription()->utf8String(), mslCode.c_str()); - error->release(); - return; - } - m_function = library->newFunction(ToNSString("main0")); - library->release(); + void StopThreads() + { + if (!m_threadsActive.exchange(false)) + return; + for (uint32 i = 0; i < s_threads.size(); ++i) + s_compilationQueueCount.increment(); + for (auto& it : s_threads) + it.join(); + s_threads.clear(); + } - // Count shader compilation - g_compiled_shaders_total++; + ~ShaderMtlThreadPool() + { + StopThreads(); + } + + void CompilerThreadFunc() + { + SetThreadName("mtlShaderComp"); + while (m_threadsActive.load(std::memory_order::relaxed)) + { + s_compilationQueueCount.decrementWithWait(); + s_compilationQueueMutex.lock(); + if (s_compilationQueue.empty()) + { + // queue empty again, shaders compiled synchronously via PreponeCompilation() + s_compilationQueueMutex.unlock(); + continue; + } + RendererShaderMtl* job = s_compilationQueue.front(); + s_compilationQueue.pop_front(); + // set compilation state + cemu_assert_debug(job->m_compilationState.getValue() == RendererShaderMtl::COMPILATION_STATE::QUEUED); + job->m_compilationState.setValue(RendererShaderMtl::COMPILATION_STATE::COMPILING); + s_compilationQueueMutex.unlock(); + // compile + job->CompileInternal(); + ++g_compiled_shaders_async; + // mark as compiled + cemu_assert_debug(job->m_compilationState.getValue() == RendererShaderMtl::COMPILATION_STATE::COMPILING); + job->m_compilationState.setValue(RendererShaderMtl::COMPILATION_STATE::DONE); + } + } + + bool HasThreadsRunning() const { return m_threadsActive; } + +public: + std::vector s_threads; + + std::deque s_compilationQueue; + CounterSemaphore s_compilationQueueCount; + std::mutex s_compilationQueueMutex; + +private: + std::atomic m_threadsActive; +} shaderMtlThreadPool; + +void RendererShaderMtl::Initialize() +{ + shaderMtlThreadPool.StartThreads(); +} + +void RendererShaderMtl::Shutdown() +{ + shaderMtlThreadPool.StopThreads(); +} + +RendererShaderMtl::RendererShaderMtl(MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode) + : RendererShader(type, baseHash, auxHash, isGameShader, isGfxPackShader), m_mtlr{mtlRenderer}, m_mslCode{mslCode} +{ + // start async compilation + shaderMtlThreadPool.s_compilationQueueMutex.lock(); + m_compilationState.setValue(COMPILATION_STATE::QUEUED); + shaderMtlThreadPool.s_compilationQueue.push_back(this); + shaderMtlThreadPool.s_compilationQueueCount.increment(); + shaderMtlThreadPool.s_compilationQueueMutex.unlock(); + cemu_assert_debug(shaderMtlThreadPool.HasThreadsRunning()); // make sure .StartThreads() was called } RendererShaderMtl::~RendererShaderMtl() @@ -40,3 +109,71 @@ RendererShaderMtl::~RendererShaderMtl() if (m_function) m_function->release(); } + +void RendererShaderMtl::PreponeCompilation(bool isRenderThread) +{ + shaderMtlThreadPool.s_compilationQueueMutex.lock(); + bool isStillQueued = m_compilationState.hasState(COMPILATION_STATE::QUEUED); + if (isStillQueued) + { + // remove from queue + shaderMtlThreadPool.s_compilationQueue.erase(std::remove(shaderMtlThreadPool.s_compilationQueue.begin(), shaderMtlThreadPool.s_compilationQueue.end(), this), shaderMtlThreadPool.s_compilationQueue.end()); + m_compilationState.setValue(COMPILATION_STATE::COMPILING); + } + shaderMtlThreadPool.s_compilationQueueMutex.unlock(); + if (!isStillQueued) + { + m_compilationState.waitUntilValue(COMPILATION_STATE::DONE); + --g_compiled_shaders_async; // compilation caused a stall so we don't consider this one async + return; + } + else + { + // compile synchronously + CompileInternal(); + m_compilationState.setValue(COMPILATION_STATE::DONE); + } +} + +bool RendererShaderMtl::IsCompiled() +{ + return m_compilationState.hasState(COMPILATION_STATE::DONE); +}; + +bool RendererShaderMtl::WaitForCompiled() +{ + m_compilationState.waitUntilValue(COMPILATION_STATE::DONE); + return true; +} + +void RendererShaderMtl::CompileInternal() +{ + MTL::CompileOptions* options = MTL::CompileOptions::alloc()->init(); + // TODO: always disable fast math for problematic shaders + if (GetConfig().fast_math) + options->setFastMathEnabled(true); + + NS::Error* error = nullptr; + MTL::Library* library = m_mtlr->GetDevice()->newLibrary(ToNSString(m_mslCode), options, &error); + options->release(); + if (error) + { + cemuLog_log(LogType::Force, "failed to create library: {} -> {}", error->localizedDescription()->utf8String(), m_mslCode.c_str()); + error->release(); + FinishCompilation(); + return; + } + m_function = library->newFunction(ToNSString("main0")); + library->release(); + + FinishCompilation(); + + // Count shader compilation + g_compiled_shaders_total++; +} + +void RendererShaderMtl::FinishCompilation() +{ + m_mslCode.clear(); + m_mslCode.shrink_to_fit(); +} diff --git a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h index 0758b0e6..ddf72d81 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h @@ -4,20 +4,26 @@ #include "HW/Latte/Renderer/Metal/CachedFBOMtl.h" #include "HW/Latte/Renderer/Metal/MetalRenderer.h" #include "util/helpers/ConcurrentQueue.h" +#include "util/helpers/Semaphore.h" #include class RendererShaderMtl : public RendererShader { - //enum class COMPILATION_STATE : uint32 - //{ - // NONE, - // QUEUED, - // COMPILING, - // DONE - //}; + friend class ShaderMtlThreadPool; + + enum class COMPILATION_STATE : uint32 + { + NONE, + QUEUED, + COMPILING, + DONE + }; public: + static void Initialize(); + static void Shutdown(); + RendererShaderMtl(class MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode); virtual ~RendererShaderMtl(); @@ -42,15 +48,19 @@ public: cemu_assert_suspicious(); } - // TODO: implement this - void PreponeCompilation(bool isRenderThread) override {} - bool IsCompiled() override { return true; } - bool WaitForCompiled() override { return true; } + void PreponeCompilation(bool isRenderThread) override; + bool IsCompiled() override; + bool WaitForCompiled() override; private: class MetalRenderer* m_mtlr; MTL::Function* m_function = nullptr; - void Compile(const std::string& mslCode); + StateSemaphore m_compilationState{ COMPILATION_STATE::NONE }; + + std::string m_mslCode; + + void CompileInternal(); + void FinishCompilation(); }; From 03bc647e1cfd9597ef99a135ea6525db8c52e1c0 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Thu, 10 Oct 2024 18:53:22 +0200 Subject: [PATCH 2/3] prepone compilation for rect geometry shaders --- src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp index b7f5c88c..29459539 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp @@ -178,6 +178,7 @@ static RendererShaderMtl* rectsEmulationGS_generate(MetalRenderer* metalRenderer gsSrc.append("}\r\n"); auto mtlShader = new RendererShaderMtl(metalRenderer, RendererShader::ShaderType::kGeometry, 0, 0, false, false, gsSrc); + mtlShader->PreponeCompilation(true); return mtlShader; } From 641ef71cab311fec5afa6d70a405e058d38ad1d5 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Thu, 10 Oct 2024 19:38:14 +0200 Subject: [PATCH 3/3] count compiled shaders properly --- src/Cafe/HW/Latte/Core/LatteShaderCache.cpp | 34 +++++++++++++++---- .../Renderer/Metal/RendererShaderMtl.cpp | 31 +++++++++++++++-- .../Latte/Renderer/Metal/RendererShaderMtl.h | 7 ++++ 3 files changed, 63 insertions(+), 9 deletions(-) diff --git a/src/Cafe/HW/Latte/Core/LatteShaderCache.cpp b/src/Cafe/HW/Latte/Core/LatteShaderCache.cpp index 1ba50dec..4659ff10 100644 --- a/src/Cafe/HW/Latte/Core/LatteShaderCache.cpp +++ b/src/Cafe/HW/Latte/Core/LatteShaderCache.cpp @@ -11,6 +11,7 @@ #include "Cafe/HW/Latte/Renderer/Renderer.h" #include "Cafe/HW/Latte/Renderer/OpenGL/RendererShaderGL.h" #include "Cafe/HW/Latte/Renderer/Vulkan/RendererShaderVk.h" +#include "Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h" #include "Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineStableCache.h" @@ -158,12 +159,19 @@ bool LoadTGAFile(const std::vector& buffer, TGAFILE *tgaFile) void LatteShaderCache_finish() { - if (g_renderer->GetType() == RendererAPI::Vulkan) + if (g_renderer->GetType() == RendererAPI::Vulkan) + { RendererShaderVk::ShaderCacheLoading_end(); + } else if (g_renderer->GetType() == RendererAPI::OpenGL) + { RendererShaderGL::ShaderCacheLoading_end(); + } else if (g_renderer->GetType() == RendererAPI::Metal) + { + RendererShaderMtl::ShaderCacheLoading_end(); MetalPipelineCache::ShaderCacheLoading_end(); + } } uint32 LatteShaderCache_getShaderCacheExtraVersion(uint64 titleId) @@ -243,11 +251,18 @@ void LatteShaderCache_Load() fs::create_directories(ActiveSettings::GetCachePath("shaderCache/precompiled"), ec); // initialize renderer specific caches if (g_renderer->GetType() == RendererAPI::Vulkan) + { RendererShaderVk::ShaderCacheLoading_begin(cacheTitleId); + } else if (g_renderer->GetType() == RendererAPI::OpenGL) + { RendererShaderGL::ShaderCacheLoading_begin(cacheTitleId); + } else if (g_renderer->GetType() == RendererAPI::Metal) + { + RendererShaderMtl::ShaderCacheLoading_begin(cacheTitleId); MetalPipelineCache::ShaderCacheLoading_begin(cacheTitleId); + } // get cache file name const auto pathGeneric = ActiveSettings::GetCachePath("shaderCache/transferable/{:016x}_shaders.bin", cacheTitleId); const auto pathGenericPre1_25_0 = ActiveSettings::GetCachePath("shaderCache/transferable/{:016x}.bin", cacheTitleId); // before 1.25.0 @@ -776,11 +791,18 @@ void LatteShaderCache_Close() s_shaderCacheGeneric = nullptr; } if (g_renderer->GetType() == RendererAPI::Vulkan) - RendererShaderVk::ShaderCacheLoading_Close(); - else if (g_renderer->GetType() == RendererAPI::OpenGL) - RendererShaderGL::ShaderCacheLoading_Close(); - else if (g_renderer->GetType() == RendererAPI::Metal) - MetalPipelineCache::ShaderCacheLoading_Close(); + { + RendererShaderVk::ShaderCacheLoading_Close(); + } + else if (g_renderer->GetType() == RendererAPI::OpenGL) + { + RendererShaderGL::ShaderCacheLoading_Close(); + } + else if (g_renderer->GetType() == RendererAPI::Metal) + { + RendererShaderMtl::ShaderCacheLoading_Close(); + MetalPipelineCache::ShaderCacheLoading_Close(); + } // if Vulkan then also close pipeline cache if (g_renderer->GetType() == RendererAPI::Vulkan) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp index 0f674080..c8babb14 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp @@ -9,6 +9,8 @@ #include "config/CemuConfig.h" #include "util/helpers/helpers.h" +static bool s_isLoadingShadersMtl{false}; + extern std::atomic_int g_compiled_shaders_total; extern std::atomic_int g_compiled_shaders_async; @@ -62,7 +64,8 @@ public: s_compilationQueueMutex.unlock(); // compile job->CompileInternal(); - ++g_compiled_shaders_async; + if (job->ShouldCountCompilation()) + ++g_compiled_shaders_async; // mark as compiled cemu_assert_debug(job->m_compilationState.getValue() == RendererShaderMtl::COMPILATION_STATE::COMPILING); job->m_compilationState.setValue(RendererShaderMtl::COMPILATION_STATE::DONE); @@ -82,6 +85,21 @@ private: std::atomic m_threadsActive; } shaderMtlThreadPool; +// TODO: find out if it would be possible to cache compiled Metal shaders +void RendererShaderMtl::ShaderCacheLoading_begin(uint64 cacheTitleId) +{ + s_isLoadingShadersMtl = true; +} + +void RendererShaderMtl::ShaderCacheLoading_end() +{ + s_isLoadingShadersMtl = false; +} + +void RendererShaderMtl::ShaderCacheLoading_Close() +{ +} + void RendererShaderMtl::Initialize() { shaderMtlThreadPool.StartThreads(); @@ -124,7 +142,8 @@ void RendererShaderMtl::PreponeCompilation(bool isRenderThread) if (!isStillQueued) { m_compilationState.waitUntilValue(COMPILATION_STATE::DONE); - --g_compiled_shaders_async; // compilation caused a stall so we don't consider this one async + if (ShouldCountCompilation()) + --g_compiled_shaders_async; // compilation caused a stall so we don't consider this one async return; } else @@ -146,6 +165,11 @@ bool RendererShaderMtl::WaitForCompiled() return true; } +bool RendererShaderMtl::ShouldCountCompilation() const +{ + return !s_isLoadingShadersMtl && m_isGameShader; +} + void RendererShaderMtl::CompileInternal() { MTL::CompileOptions* options = MTL::CompileOptions::alloc()->init(); @@ -169,7 +193,8 @@ void RendererShaderMtl::CompileInternal() FinishCompilation(); // Count shader compilation - g_compiled_shaders_total++; + if (ShouldCountCompilation()) + g_compiled_shaders_total++; } void RendererShaderMtl::FinishCompilation() diff --git a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h index ddf72d81..40d04c87 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h @@ -21,6 +21,10 @@ class RendererShaderMtl : public RendererShader }; public: + static void ShaderCacheLoading_begin(uint64 cacheTitleId); + static void ShaderCacheLoading_end(); + static void ShaderCacheLoading_Close(); + static void Initialize(); static void Shutdown(); @@ -61,6 +65,9 @@ private: std::string m_mslCode; + bool ShouldCountCompilation() const; + void CompileInternal(); + void FinishCompilation(); };