cache all pipelines

This commit is contained in:
Samuliak 2024-10-29 07:44:47 +01:00
parent 228fd347e7
commit 85db0dc468
No known key found for this signature in database
4 changed files with 34 additions and 24 deletions

View File

@ -11,6 +11,7 @@
#include "Common/precompiled.h" #include "Common/precompiled.h"
#include "Cafe/HW/Latte/Core/LatteShader.h" #include "Cafe/HW/Latte/Core/LatteShader.h"
#include "Cafe/HW/Latte/ISA/LatteReg.h" #include "Cafe/HW/Latte/ISA/LatteReg.h"
#include "HW/Latte/Renderer/Metal/MetalPipelineCompiler.h"
#include "util/helpers/helpers.h" #include "util/helpers/helpers.h"
#include "config/ActiveSettings.h" #include "config/ActiveSettings.h"
@ -121,8 +122,7 @@ PipelineObject* MetalPipelineCache::GetRenderPipelineState(const LatteFetchShade
pipelineObj = new PipelineObject(); pipelineObj = new PipelineObject();
MetalPipelineCompiler* compiler = new MetalPipelineCompiler(m_mtlr, *pipelineObj); MetalPipelineCompiler* compiler = new MetalPipelineCompiler(m_mtlr, *pipelineObj);
bool fbosMatch; compiler->InitFromState(fetchShader, vertexShader, geometryShader, pixelShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr);
compiler->InitFromState(fetchShader, vertexShader, geometryShader, pixelShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr, fbosMatch);
bool allowAsyncCompile = false; bool allowAsyncCompile = false;
if (GetConfig().async_compile) if (GetConfig().async_compile)
@ -145,9 +145,8 @@ PipelineObject* MetalPipelineCache::GetRenderPipelineState(const LatteFetchShade
delete compiler; delete compiler;
} }
// If FBOs don't match, it wouldn't be possible to reconstruct the pipeline from the cache // Save to cache
if (fbosMatch) AddCurrentStateToCache(hash, lastUsedAttachmentsInfo);
AddCurrentStateToCache(hash);
return pipelineObj; return pipelineObj;
} }
@ -380,6 +379,8 @@ struct CachedPipeline
ShaderHash gsHash; ShaderHash gsHash;
ShaderHash psHash; ShaderHash psHash;
MetalAttachmentsInfo lastUsedAttachmentsInfo;
Latte::GPUCompactedRegisterState gpuState; Latte::GPUCompactedRegisterState gpuState;
}; };
@ -453,9 +454,7 @@ void MetalPipelineCache::LoadPipelineFromCache(std::span<uint8> fileData)
// compile // compile
{ {
MetalPipelineCompiler pp(m_mtlr, *pipelineObject); MetalPipelineCompiler pp(m_mtlr, *pipelineObject);
bool fbosMatch; pp.InitFromState(vertexShader->compatibleFetchShader, vertexShader, geometryShader, pixelShader, cachedPipeline->lastUsedAttachmentsInfo, attachmentsInfo, *lcr);
pp.InitFromState(vertexShader->compatibleFetchShader, vertexShader, geometryShader, pixelShader, attachmentsInfo, attachmentsInfo, *lcr, fbosMatch);
cemu_assert_debug(fbosMatch);
pp.Compile(true, true, false); pp.Compile(true, true, false);
// destroy pp early // destroy pp early
} }
@ -463,7 +462,7 @@ void MetalPipelineCache::LoadPipelineFromCache(std::span<uint8> fileData)
// on success, cache the pipeline // on success, cache the pipeline
if (pipelineObject->m_pipeline) if (pipelineObject->m_pipeline)
{ {
uint64 pipelineStateHash = CalculatePipelineHash(vertexShader->compatibleFetchShader, vertexShader, geometryShader, pixelShader, attachmentsInfo, attachmentsInfo, *lcr); uint64 pipelineStateHash = CalculatePipelineHash(vertexShader->compatibleFetchShader, vertexShader, geometryShader, pixelShader, cachedPipeline->lastUsedAttachmentsInfo, attachmentsInfo, *lcr);
m_pipelineCacheLock.lock(); m_pipelineCacheLock.lock();
m_pipelineCache[pipelineStateHash] = pipelineObject; m_pipelineCache[pipelineStateHash] = pipelineObject;
m_pipelineCacheLock.unlock(); m_pipelineCacheLock.unlock();
@ -478,7 +477,7 @@ void MetalPipelineCache::LoadPipelineFromCache(std::span<uint8> fileData)
ConcurrentQueue<CachedPipeline*> g_mtlPipelineCachingQueue; ConcurrentQueue<CachedPipeline*> g_mtlPipelineCachingQueue;
void MetalPipelineCache::AddCurrentStateToCache(uint64 pipelineStateHash) void MetalPipelineCache::AddCurrentStateToCache(uint64 pipelineStateHash, const MetalAttachmentsInfo& lastUsedAttachmentsInfo)
{ {
if (!m_pipelineCacheStoreThread) if (!m_pipelineCacheStoreThread)
{ {
@ -499,6 +498,7 @@ void MetalPipelineCache::AddCurrentStateToCache(uint64 pipelineStateHash)
job->gsHash.set(gs->baseHash, gs->auxHash); job->gsHash.set(gs->baseHash, gs->auxHash);
if (ps) if (ps)
job->psHash.set(ps->baseHash, ps->auxHash); job->psHash.set(ps->baseHash, ps->auxHash);
job->lastUsedAttachmentsInfo = lastUsedAttachmentsInfo;
Latte::StoreGPURegisterState(LatteGPUState.contextNew, job->gpuState); Latte::StoreGPURegisterState(LatteGPUState.contextNew, job->gpuState);
// queue job // queue job
g_mtlPipelineCachingQueue.push(job); g_mtlPipelineCachingQueue.push(job);
@ -530,7 +530,13 @@ bool MetalPipelineCache::SerializePipeline(MemStreamWriter& memWriter, CachedPip
memWriter.writeBE<uint64>(cachedPipeline.psHash.baseHash); memWriter.writeBE<uint64>(cachedPipeline.psHash.baseHash);
memWriter.writeBE<uint64>(cachedPipeline.psHash.auxHash); memWriter.writeBE<uint64>(cachedPipeline.psHash.auxHash);
} }
for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++)
memWriter.writeBE<uint16>((uint16)cachedPipeline.lastUsedAttachmentsInfo.colorFormats[i]);
memWriter.writeBE<uint16>((uint16)cachedPipeline.lastUsedAttachmentsInfo.depthFormat);
Latte::SerializeRegisterState(cachedPipeline.gpuState, memWriter); Latte::SerializeRegisterState(cachedPipeline.gpuState, memWriter);
return true; return true;
} }
@ -562,12 +568,18 @@ bool MetalPipelineCache::DeserializePipeline(MemStreamReader& memReader, CachedP
uint64 auxHash = memReader.readBE<uint64>(); uint64 auxHash = memReader.readBE<uint64>();
cachedPipeline.psHash.set(baseHash, auxHash); cachedPipeline.psHash.set(baseHash, auxHash);
} }
for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++)
cachedPipeline.lastUsedAttachmentsInfo.colorFormats[i] = (Latte::E_GX2SURFFMT)memReader.readBE<uint16>();
cachedPipeline.lastUsedAttachmentsInfo.depthFormat = (Latte::E_GX2SURFFMT)memReader.readBE<uint16>();
// deserialize GPU state // deserialize GPU state
if (!Latte::DeserializeRegisterState(cachedPipeline.gpuState, memReader)) if (!Latte::DeserializeRegisterState(cachedPipeline.gpuState, memReader))
{ {
return false; return false;
} }
cemu_assert_debug(!memReader.hasError()); cemu_assert_debug(!memReader.hasError());
return true; return true;
} }

View File

@ -41,7 +41,7 @@ private:
static uint64 CalculatePipelineHash(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr); static uint64 CalculatePipelineHash(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr);
void AddCurrentStateToCache(uint64 pipelineStateHash); void AddCurrentStateToCache(uint64 pipelineStateHash, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo);
// pipeline serialization for file // pipeline serialization for file
bool SerializePipeline(class MemStreamWriter& memWriter, struct CachedPipeline& cachedPipeline); bool SerializePipeline(class MemStreamWriter& memWriter, struct CachedPipeline& cachedPipeline);

View File

@ -190,7 +190,7 @@ extern std::atomic_int g_compiled_shaders_total;
extern std::atomic_int g_compiled_shaders_async; extern std::atomic_int g_compiled_shaders_async;
template<typename T> template<typename T>
void SetFragmentState(T* desc, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, bool rasterizationEnabled, const LatteContextRegister& lcr, bool& fbosMatch) void SetFragmentState(T* desc, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, bool rasterizationEnabled, const LatteContextRegister& lcr)
{ {
// TODO: check if the pixel shader is valid as well? // TODO: check if the pixel shader is valid as well?
if (!rasterizationEnabled/* || !pixelShaderMtl*/) if (!rasterizationEnabled/* || !pixelShaderMtl*/)
@ -200,7 +200,6 @@ void SetFragmentState(T* desc, const MetalAttachmentsInfo& lastUsedAttachmentsIn
} }
// Color attachments // Color attachments
fbosMatch = true;
const Latte::LATTE_CB_COLOR_CONTROL& colorControlReg = lcr.CB_COLOR_CONTROL; const Latte::LATTE_CB_COLOR_CONTROL& colorControlReg = lcr.CB_COLOR_CONTROL;
uint32 blendEnableMask = colorControlReg.get_BLEND_MASK(); uint32 blendEnableMask = colorControlReg.get_BLEND_MASK();
uint32 renderTargetMask = lcr.CB_TARGET_MASK.get_MASK(); uint32 renderTargetMask = lcr.CB_TARGET_MASK.get_MASK();
@ -218,7 +217,6 @@ void SetFragmentState(T* desc, const MetalAttachmentsInfo& lastUsedAttachmentsIn
if (activeAttachmentsInfo.colorFormats[i] == Latte::E_GX2SURFFMT::INVALID_FORMAT) if (activeAttachmentsInfo.colorFormats[i] == Latte::E_GX2SURFFMT::INVALID_FORMAT)
{ {
colorAttachment->setWriteMask(MTL::ColorWriteMaskNone); colorAttachment->setWriteMask(MTL::ColorWriteMaskNone);
fbosMatch = false;
continue; continue;
} }
@ -288,7 +286,7 @@ MetalPipelineCompiler::~MetalPipelineCompiler()
m_pipelineDescriptor->release(); m_pipelineDescriptor->release();
} }
void MetalPipelineCompiler::InitFromState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr, bool& fbosMatch) void MetalPipelineCompiler::InitFromState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr)
{ {
// Check if the pipeline uses a geometry shader // Check if the pipeline uses a geometry shader
const LattePrimitiveMode primitiveMode = static_cast<LattePrimitiveMode>(lcr.VGT_PRIMITIVE_TYPE.get_PRIMITIVE_MODE()); const LattePrimitiveMode primitiveMode = static_cast<LattePrimitiveMode>(lcr.VGT_PRIMITIVE_TYPE.get_PRIMITIVE_MODE());
@ -322,9 +320,9 @@ void MetalPipelineCompiler::InitFromState(const LatteFetchShader* fetchShader, c
m_pixelShaderMtl = static_cast<RendererShaderMtl*>(pixelShader->shader); m_pixelShaderMtl = static_cast<RendererShaderMtl*>(pixelShader->shader);
if (m_usesGeometryShader) if (m_usesGeometryShader)
InitFromStateMesh(fetchShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr, fbosMatch); InitFromStateMesh(fetchShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr);
else else
InitFromStateRender(fetchShader, vertexShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr, fbosMatch); InitFromStateRender(fetchShader, vertexShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr);
} }
bool MetalPipelineCompiler::Compile(bool forceCompile, bool isRenderThread, bool showInOverlay) bool MetalPipelineCompiler::Compile(bool forceCompile, bool isRenderThread, bool showInOverlay)
@ -408,7 +406,7 @@ bool MetalPipelineCompiler::Compile(bool forceCompile, bool isRenderThread, bool
return true; return true;
} }
void MetalPipelineCompiler::InitFromStateRender(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr, bool& fbosMatch) void MetalPipelineCompiler::InitFromStateRender(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr)
{ {
// Render pipeline state // Render pipeline state
MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init(); MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init();
@ -482,17 +480,17 @@ void MetalPipelineCompiler::InitFromStateRender(const LatteFetchShader* fetchSha
vertexDescriptor->release(); vertexDescriptor->release();
} }
SetFragmentState(desc, lastUsedAttachmentsInfo, activeAttachmentsInfo, m_rasterizationEnabled, lcr, fbosMatch); SetFragmentState(desc, lastUsedAttachmentsInfo, activeAttachmentsInfo, m_rasterizationEnabled, lcr);
m_pipelineDescriptor = desc; m_pipelineDescriptor = desc;
} }
void MetalPipelineCompiler::InitFromStateMesh(const LatteFetchShader* fetchShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr, bool& fbosMatch) void MetalPipelineCompiler::InitFromStateMesh(const LatteFetchShader* fetchShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr)
{ {
// Render pipeline state // Render pipeline state
MTL::MeshRenderPipelineDescriptor* desc = MTL::MeshRenderPipelineDescriptor::alloc()->init(); MTL::MeshRenderPipelineDescriptor* desc = MTL::MeshRenderPipelineDescriptor::alloc()->init();
SetFragmentState(desc, lastUsedAttachmentsInfo, activeAttachmentsInfo, m_rasterizationEnabled, lcr, fbosMatch); SetFragmentState(desc, lastUsedAttachmentsInfo, activeAttachmentsInfo, m_rasterizationEnabled, lcr);
m_pipelineDescriptor = desc; m_pipelineDescriptor = desc;
} }

View File

@ -16,7 +16,7 @@ public:
MetalPipelineCompiler(class MetalRenderer* metalRenderer, PipelineObject& pipelineObj) : m_mtlr{metalRenderer}, m_pipelineObj{pipelineObj} {} MetalPipelineCompiler(class MetalRenderer* metalRenderer, PipelineObject& pipelineObj) : m_mtlr{metalRenderer}, m_pipelineObj{pipelineObj} {}
~MetalPipelineCompiler(); ~MetalPipelineCompiler();
void InitFromState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr, bool& fbosMatch); void InitFromState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr);
bool Compile(bool forceCompile, bool isRenderThread, bool showInOverlay); bool Compile(bool forceCompile, bool isRenderThread, bool showInOverlay);
@ -32,7 +32,7 @@ private:
NS::Object* m_pipelineDescriptor; NS::Object* m_pipelineDescriptor;
void InitFromStateRender(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr, bool& fbosMatch); void InitFromStateRender(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr);
void InitFromStateMesh(const LatteFetchShader* fetchShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr, bool& fbosMatch); void InitFromStateMesh(const LatteFetchShader* fetchShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr);
}; };