Merge pull request #6 from SamoZ256/metal-async

Implement async shader compilation
This commit is contained in:
SamoZ256 2024-10-11 15:47:01 +02:00 committed by GitHub
commit fabaf1621d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 250 additions and 38 deletions

View File

@ -11,6 +11,7 @@
#include "Cafe/HW/Latte/Renderer/Renderer.h" #include "Cafe/HW/Latte/Renderer/Renderer.h"
#include "Cafe/HW/Latte/Renderer/OpenGL/RendererShaderGL.h" #include "Cafe/HW/Latte/Renderer/OpenGL/RendererShaderGL.h"
#include "Cafe/HW/Latte/Renderer/Vulkan/RendererShaderVk.h" #include "Cafe/HW/Latte/Renderer/Vulkan/RendererShaderVk.h"
#include "Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h"
#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineStableCache.h" #include "Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineStableCache.h"
@ -158,12 +159,19 @@ bool LoadTGAFile(const std::vector<uint8>& buffer, TGAFILE *tgaFile)
void LatteShaderCache_finish() void LatteShaderCache_finish()
{ {
if (g_renderer->GetType() == RendererAPI::Vulkan) if (g_renderer->GetType() == RendererAPI::Vulkan)
{
RendererShaderVk::ShaderCacheLoading_end(); RendererShaderVk::ShaderCacheLoading_end();
}
else if (g_renderer->GetType() == RendererAPI::OpenGL) else if (g_renderer->GetType() == RendererAPI::OpenGL)
{
RendererShaderGL::ShaderCacheLoading_end(); RendererShaderGL::ShaderCacheLoading_end();
}
else if (g_renderer->GetType() == RendererAPI::Metal) else if (g_renderer->GetType() == RendererAPI::Metal)
{
RendererShaderMtl::ShaderCacheLoading_end();
MetalPipelineCache::ShaderCacheLoading_end(); MetalPipelineCache::ShaderCacheLoading_end();
}
} }
uint32 LatteShaderCache_getShaderCacheExtraVersion(uint64 titleId) uint32 LatteShaderCache_getShaderCacheExtraVersion(uint64 titleId)
@ -243,11 +251,18 @@ void LatteShaderCache_Load()
fs::create_directories(ActiveSettings::GetCachePath("shaderCache/precompiled"), ec); fs::create_directories(ActiveSettings::GetCachePath("shaderCache/precompiled"), ec);
// initialize renderer specific caches // initialize renderer specific caches
if (g_renderer->GetType() == RendererAPI::Vulkan) if (g_renderer->GetType() == RendererAPI::Vulkan)
{
RendererShaderVk::ShaderCacheLoading_begin(cacheTitleId); RendererShaderVk::ShaderCacheLoading_begin(cacheTitleId);
}
else if (g_renderer->GetType() == RendererAPI::OpenGL) else if (g_renderer->GetType() == RendererAPI::OpenGL)
{
RendererShaderGL::ShaderCacheLoading_begin(cacheTitleId); RendererShaderGL::ShaderCacheLoading_begin(cacheTitleId);
}
else if (g_renderer->GetType() == RendererAPI::Metal) else if (g_renderer->GetType() == RendererAPI::Metal)
{
RendererShaderMtl::ShaderCacheLoading_begin(cacheTitleId);
MetalPipelineCache::ShaderCacheLoading_begin(cacheTitleId); MetalPipelineCache::ShaderCacheLoading_begin(cacheTitleId);
}
// get cache file name // get cache file name
const auto pathGeneric = ActiveSettings::GetCachePath("shaderCache/transferable/{:016x}_shaders.bin", cacheTitleId); const auto pathGeneric = ActiveSettings::GetCachePath("shaderCache/transferable/{:016x}_shaders.bin", cacheTitleId);
const auto pathGenericPre1_25_0 = ActiveSettings::GetCachePath("shaderCache/transferable/{:016x}.bin", cacheTitleId); // before 1.25.0 const auto pathGenericPre1_25_0 = ActiveSettings::GetCachePath("shaderCache/transferable/{:016x}.bin", cacheTitleId); // before 1.25.0
@ -776,11 +791,18 @@ void LatteShaderCache_Close()
s_shaderCacheGeneric = nullptr; s_shaderCacheGeneric = nullptr;
} }
if (g_renderer->GetType() == RendererAPI::Vulkan) if (g_renderer->GetType() == RendererAPI::Vulkan)
RendererShaderVk::ShaderCacheLoading_Close(); {
else if (g_renderer->GetType() == RendererAPI::OpenGL) RendererShaderVk::ShaderCacheLoading_Close();
RendererShaderGL::ShaderCacheLoading_Close(); }
else if (g_renderer->GetType() == RendererAPI::Metal) else if (g_renderer->GetType() == RendererAPI::OpenGL)
MetalPipelineCache::ShaderCacheLoading_Close(); {
RendererShaderGL::ShaderCacheLoading_Close();
}
else if (g_renderer->GetType() == RendererAPI::Metal)
{
RendererShaderMtl::ShaderCacheLoading_Close();
MetalPipelineCache::ShaderCacheLoading_Close();
}
// if Vulkan then also close pipeline cache // if Vulkan then also close pipeline cache
if (g_renderer->GetType() == RendererAPI::Vulkan) if (g_renderer->GetType() == RendererAPI::Vulkan)

View File

@ -178,6 +178,7 @@ static RendererShaderMtl* rectsEmulationGS_generate(MetalRenderer* metalRenderer
gsSrc.append("}\r\n"); gsSrc.append("}\r\n");
auto mtlShader = new RendererShaderMtl(metalRenderer, RendererShader::ShaderType::kGeometry, 0, 0, false, false, gsSrc); auto mtlShader = new RendererShaderMtl(metalRenderer, RendererShader::ShaderType::kGeometry, 0, 0, false, false, gsSrc);
mtlShader->PreponeCompilation(true);
return mtlShader; return mtlShader;
} }

View File

@ -223,14 +223,16 @@ void MetalRenderer::ResizeLayer(const Vector2i& size, bool mainWindow)
void MetalRenderer::Initialize() void MetalRenderer::Initialize()
{ {
Renderer::Initialize(); Renderer::Initialize();
RendererShaderMtl::Initialize();
} }
void MetalRenderer::Shutdown() void MetalRenderer::Shutdown()
{ {
// TODO: should shutdown both layers // TODO: should shutdown both layers
ImGui_ImplMetal_Shutdown(); ImGui_ImplMetal_Shutdown();
Renderer::Shutdown();
CommitCommandBuffer(); CommitCommandBuffer();
Renderer::Shutdown();
RendererShaderMtl::Shutdown();
} }
bool MetalRenderer::IsPadWindowActive() bool MetalRenderer::IsPadWindowActive()
@ -935,13 +937,21 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
return; return;
} }
// TODO: special state 8 and 5
auto& encoderState = m_state.m_encoderState; auto& encoderState = m_state.m_encoderState;
// Shaders // Shaders
LatteDecompilerShader* vertexShader = LatteSHRC_GetActiveVertexShader(); LatteDecompilerShader* vertexShader = LatteSHRC_GetActiveVertexShader();
if (vertexShader && !vertexShader->shader->IsCompiled())
return;
LatteDecompilerShader* geometryShader = LatteSHRC_GetActiveGeometryShader(); LatteDecompilerShader* geometryShader = LatteSHRC_GetActiveGeometryShader();
if (geometryShader && !geometryShader->shader->IsCompiled())
return;
LatteDecompilerShader* pixelShader = LatteSHRC_GetActivePixelShader(); LatteDecompilerShader* pixelShader = LatteSHRC_GetActivePixelShader();
const auto fetchShader = LatteSHRC_GetActiveFetchShader(); const auto fetchShader = LatteSHRC_GetActiveFetchShader();
if (vertexShader && !pixelShader->shader->IsCompiled())
return;
bool neverSkipAccurateBarrier = false; bool neverSkipAccurateBarrier = false;

View File

@ -1,6 +1,5 @@
#include "Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
//#include "Cemu/FileCache/FileCache.h" //#include "Cemu/FileCache/FileCache.h"
//#include "config/ActiveSettings.h" //#include "config/ActiveSettings.h"
@ -8,31 +7,119 @@
#include "Cemu/Logging/CemuLogging.h" #include "Cemu/Logging/CemuLogging.h"
#include "Common/precompiled.h" #include "Common/precompiled.h"
#include "config/CemuConfig.h" #include "config/CemuConfig.h"
#include "util/helpers/helpers.h"
static bool s_isLoadingShadersMtl{false};
extern std::atomic_int g_compiled_shaders_total; extern std::atomic_int g_compiled_shaders_total;
extern std::atomic_int g_compiled_shaders_async; extern std::atomic_int g_compiled_shaders_async;
RendererShaderMtl::RendererShaderMtl(MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode) class ShaderMtlThreadPool
: RendererShader(type, baseHash, auxHash, isGameShader, isGfxPackShader), m_mtlr{mtlRenderer}
{ {
MTL::CompileOptions* options = MTL::CompileOptions::alloc()->init(); public:
if (GetConfig().fast_math) void StartThreads()
options->setFastMathEnabled(true); {
if (m_threadsActive.exchange(true))
return;
// create thread pool
const uint32 threadCount = 2;
for (uint32 i = 0; i < threadCount; ++i)
s_threads.emplace_back(&ShaderMtlThreadPool::CompilerThreadFunc, this);
}
NS::Error* error = nullptr; void StopThreads()
MTL::Library* library = m_mtlr->GetDevice()->newLibrary(ToNSString(mslCode), options, &error); {
options->release(); if (!m_threadsActive.exchange(false))
if (error) return;
{ for (uint32 i = 0; i < s_threads.size(); ++i)
cemuLog_log(LogType::Force, "failed to create library: {} -> {}", error->localizedDescription()->utf8String(), mslCode.c_str()); s_compilationQueueCount.increment();
error->release(); for (auto& it : s_threads)
return; it.join();
} s_threads.clear();
m_function = library->newFunction(ToNSString("main0")); }
library->release();
// Count shader compilation ~ShaderMtlThreadPool()
g_compiled_shaders_total++; {
StopThreads();
}
void CompilerThreadFunc()
{
SetThreadName("mtlShaderComp");
while (m_threadsActive.load(std::memory_order::relaxed))
{
s_compilationQueueCount.decrementWithWait();
s_compilationQueueMutex.lock();
if (s_compilationQueue.empty())
{
// queue empty again, shaders compiled synchronously via PreponeCompilation()
s_compilationQueueMutex.unlock();
continue;
}
RendererShaderMtl* job = s_compilationQueue.front();
s_compilationQueue.pop_front();
// set compilation state
cemu_assert_debug(job->m_compilationState.getValue() == RendererShaderMtl::COMPILATION_STATE::QUEUED);
job->m_compilationState.setValue(RendererShaderMtl::COMPILATION_STATE::COMPILING);
s_compilationQueueMutex.unlock();
// compile
job->CompileInternal();
if (job->ShouldCountCompilation())
++g_compiled_shaders_async;
// mark as compiled
cemu_assert_debug(job->m_compilationState.getValue() == RendererShaderMtl::COMPILATION_STATE::COMPILING);
job->m_compilationState.setValue(RendererShaderMtl::COMPILATION_STATE::DONE);
}
}
bool HasThreadsRunning() const { return m_threadsActive; }
public:
std::vector<std::thread> s_threads;
std::deque<RendererShaderMtl*> s_compilationQueue;
CounterSemaphore s_compilationQueueCount;
std::mutex s_compilationQueueMutex;
private:
std::atomic<bool> m_threadsActive;
} shaderMtlThreadPool;
// TODO: find out if it would be possible to cache compiled Metal shaders
void RendererShaderMtl::ShaderCacheLoading_begin(uint64 cacheTitleId)
{
s_isLoadingShadersMtl = true;
}
void RendererShaderMtl::ShaderCacheLoading_end()
{
s_isLoadingShadersMtl = false;
}
void RendererShaderMtl::ShaderCacheLoading_Close()
{
}
void RendererShaderMtl::Initialize()
{
shaderMtlThreadPool.StartThreads();
}
void RendererShaderMtl::Shutdown()
{
shaderMtlThreadPool.StopThreads();
}
RendererShaderMtl::RendererShaderMtl(MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode)
: RendererShader(type, baseHash, auxHash, isGameShader, isGfxPackShader), m_mtlr{mtlRenderer}, m_mslCode{mslCode}
{
// start async compilation
shaderMtlThreadPool.s_compilationQueueMutex.lock();
m_compilationState.setValue(COMPILATION_STATE::QUEUED);
shaderMtlThreadPool.s_compilationQueue.push_back(this);
shaderMtlThreadPool.s_compilationQueueCount.increment();
shaderMtlThreadPool.s_compilationQueueMutex.unlock();
cemu_assert_debug(shaderMtlThreadPool.HasThreadsRunning()); // make sure .StartThreads() was called
} }
RendererShaderMtl::~RendererShaderMtl() RendererShaderMtl::~RendererShaderMtl()
@ -40,3 +127,78 @@ RendererShaderMtl::~RendererShaderMtl()
if (m_function) if (m_function)
m_function->release(); m_function->release();
} }
void RendererShaderMtl::PreponeCompilation(bool isRenderThread)
{
shaderMtlThreadPool.s_compilationQueueMutex.lock();
bool isStillQueued = m_compilationState.hasState(COMPILATION_STATE::QUEUED);
if (isStillQueued)
{
// remove from queue
shaderMtlThreadPool.s_compilationQueue.erase(std::remove(shaderMtlThreadPool.s_compilationQueue.begin(), shaderMtlThreadPool.s_compilationQueue.end(), this), shaderMtlThreadPool.s_compilationQueue.end());
m_compilationState.setValue(COMPILATION_STATE::COMPILING);
}
shaderMtlThreadPool.s_compilationQueueMutex.unlock();
if (!isStillQueued)
{
m_compilationState.waitUntilValue(COMPILATION_STATE::DONE);
if (ShouldCountCompilation())
--g_compiled_shaders_async; // compilation caused a stall so we don't consider this one async
return;
}
else
{
// compile synchronously
CompileInternal();
m_compilationState.setValue(COMPILATION_STATE::DONE);
}
}
bool RendererShaderMtl::IsCompiled()
{
return m_compilationState.hasState(COMPILATION_STATE::DONE);
};
bool RendererShaderMtl::WaitForCompiled()
{
m_compilationState.waitUntilValue(COMPILATION_STATE::DONE);
return true;
}
bool RendererShaderMtl::ShouldCountCompilation() const
{
return !s_isLoadingShadersMtl && m_isGameShader;
}
void RendererShaderMtl::CompileInternal()
{
MTL::CompileOptions* options = MTL::CompileOptions::alloc()->init();
// TODO: always disable fast math for problematic shaders
if (GetConfig().fast_math)
options->setFastMathEnabled(true);
NS::Error* error = nullptr;
MTL::Library* library = m_mtlr->GetDevice()->newLibrary(ToNSString(m_mslCode), options, &error);
options->release();
if (error)
{
cemuLog_log(LogType::Force, "failed to create library: {} -> {}", error->localizedDescription()->utf8String(), m_mslCode.c_str());
error->release();
FinishCompilation();
return;
}
m_function = library->newFunction(ToNSString("main0"));
library->release();
FinishCompilation();
// Count shader compilation
if (ShouldCountCompilation())
g_compiled_shaders_total++;
}
void RendererShaderMtl::FinishCompilation()
{
m_mslCode.clear();
m_mslCode.shrink_to_fit();
}

View File

@ -4,20 +4,30 @@
#include "HW/Latte/Renderer/Metal/CachedFBOMtl.h" #include "HW/Latte/Renderer/Metal/CachedFBOMtl.h"
#include "HW/Latte/Renderer/Metal/MetalRenderer.h" #include "HW/Latte/Renderer/Metal/MetalRenderer.h"
#include "util/helpers/ConcurrentQueue.h" #include "util/helpers/ConcurrentQueue.h"
#include "util/helpers/Semaphore.h"
#include <Metal/Metal.hpp> #include <Metal/Metal.hpp>
class RendererShaderMtl : public RendererShader class RendererShaderMtl : public RendererShader
{ {
//enum class COMPILATION_STATE : uint32 friend class ShaderMtlThreadPool;
//{
// NONE, enum class COMPILATION_STATE : uint32
// QUEUED, {
// COMPILING, NONE,
// DONE QUEUED,
//}; COMPILING,
DONE
};
public: public:
static void ShaderCacheLoading_begin(uint64 cacheTitleId);
static void ShaderCacheLoading_end();
static void ShaderCacheLoading_Close();
static void Initialize();
static void Shutdown();
RendererShaderMtl(class MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode); RendererShaderMtl(class MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode);
virtual ~RendererShaderMtl(); virtual ~RendererShaderMtl();
@ -42,15 +52,22 @@ public:
cemu_assert_suspicious(); cemu_assert_suspicious();
} }
// TODO: implement this void PreponeCompilation(bool isRenderThread) override;
void PreponeCompilation(bool isRenderThread) override {} bool IsCompiled() override;
bool IsCompiled() override { return true; } bool WaitForCompiled() override;
bool WaitForCompiled() override { return true; }
private: private:
class MetalRenderer* m_mtlr; class MetalRenderer* m_mtlr;
MTL::Function* m_function = nullptr; MTL::Function* m_function = nullptr;
void Compile(const std::string& mslCode); StateSemaphore<COMPILATION_STATE> m_compilationState{ COMPILATION_STATE::NONE };
std::string m_mslCode;
bool ShouldCountCompilation() const;
void CompileInternal();
void FinishCompilation();
}; };