mirror of
https://github.com/cemu-project/Cemu.git
synced 2024-11-29 12:34:17 +01:00
Merge pull request #7 from SamoZ256/metal-pipeline-cache
Pipeline cache
This commit is contained in:
commit
a61d0f0237
@ -534,6 +534,7 @@ if(APPLE)
|
|||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(ENABLE_METAL)
|
if(ENABLE_METAL)
|
||||||
|
# TODO: sort alphabetically
|
||||||
target_sources(CemuCafe PRIVATE
|
target_sources(CemuCafe PRIVATE
|
||||||
HW/Latte/Renderer/Metal/MetalRenderer.cpp
|
HW/Latte/Renderer/Metal/MetalRenderer.cpp
|
||||||
HW/Latte/Renderer/Metal/MetalRenderer.h
|
HW/Latte/Renderer/Metal/MetalRenderer.h
|
||||||
@ -555,11 +556,15 @@ if(ENABLE_METAL)
|
|||||||
HW/Latte/Renderer/Metal/RendererShaderMtl.h
|
HW/Latte/Renderer/Metal/RendererShaderMtl.h
|
||||||
HW/Latte/Renderer/Metal/CachedFBOMtl.cpp
|
HW/Latte/Renderer/Metal/CachedFBOMtl.cpp
|
||||||
HW/Latte/Renderer/Metal/CachedFBOMtl.h
|
HW/Latte/Renderer/Metal/CachedFBOMtl.h
|
||||||
|
HW/Latte/Renderer/Metal/MetalAttachmentsInfo.cpp
|
||||||
|
HW/Latte/Renderer/Metal/MetalAttachmentsInfo.h
|
||||||
HW/Latte/Renderer/Metal/MetalBufferAllocator.h
|
HW/Latte/Renderer/Metal/MetalBufferAllocator.h
|
||||||
HW/Latte/Renderer/Metal/MetalMemoryManager.cpp
|
HW/Latte/Renderer/Metal/MetalMemoryManager.cpp
|
||||||
HW/Latte/Renderer/Metal/MetalMemoryManager.h
|
HW/Latte/Renderer/Metal/MetalMemoryManager.h
|
||||||
HW/Latte/Renderer/Metal/MetalOutputShaderCache.cpp
|
HW/Latte/Renderer/Metal/MetalOutputShaderCache.cpp
|
||||||
HW/Latte/Renderer/Metal/MetalOutputShaderCache.h
|
HW/Latte/Renderer/Metal/MetalOutputShaderCache.h
|
||||||
|
HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp
|
||||||
|
HW/Latte/Renderer/Metal/MetalPipelineCompiler.h
|
||||||
HW/Latte/Renderer/Metal/MetalPipelineCache.cpp
|
HW/Latte/Renderer/Metal/MetalPipelineCache.cpp
|
||||||
HW/Latte/Renderer/Metal/MetalPipelineCache.h
|
HW/Latte/Renderer/Metal/MetalPipelineCache.h
|
||||||
HW/Latte/Renderer/Metal/MetalDepthStencilCache.cpp
|
HW/Latte/Renderer/Metal/MetalDepthStencilCache.cpp
|
||||||
|
@ -209,11 +209,9 @@ void LatteShader_free(LatteDecompilerShader* shader)
|
|||||||
delete shader;
|
delete shader;
|
||||||
}
|
}
|
||||||
|
|
||||||
// both vertex and geometry/pixel shader depend on PS inputs
|
void LatteShader_CreatePSInputTable(LatteShaderPSInputTable* psInputTable, uint32* contextRegisters)
|
||||||
// we prepare the PS import info in advance
|
|
||||||
void LatteShader_UpdatePSInputs(uint32* contextRegisters)
|
|
||||||
{
|
{
|
||||||
// PS control
|
// PS control
|
||||||
uint32 psControl0 = contextRegisters[mmSPI_PS_IN_CONTROL_0];
|
uint32 psControl0 = contextRegisters[mmSPI_PS_IN_CONTROL_0];
|
||||||
uint32 spi0_positionEnable = (psControl0 >> 8) & 1;
|
uint32 spi0_positionEnable = (psControl0 >> 8) & 1;
|
||||||
uint32 spi0_positionCentroid = (psControl0 >> 9) & 1;
|
uint32 spi0_positionCentroid = (psControl0 >> 9) & 1;
|
||||||
@ -242,12 +240,12 @@ void LatteShader_UpdatePSInputs(uint32* contextRegisters)
|
|||||||
{
|
{
|
||||||
key += std::rotr<uint64>(spi0_paramGen, 7);
|
key += std::rotr<uint64>(spi0_paramGen, 7);
|
||||||
key += std::rotr<uint64>(spi0_paramGenAddr, 3);
|
key += std::rotr<uint64>(spi0_paramGenAddr, 3);
|
||||||
_activePSImportTable.paramGen = spi0_paramGen;
|
psInputTable->paramGen = spi0_paramGen;
|
||||||
_activePSImportTable.paramGenGPR = spi0_paramGenAddr;
|
psInputTable->paramGenGPR = spi0_paramGenAddr;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
_activePSImportTable.paramGen = 0;
|
psInputTable->paramGen = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// semantic imports from vertex shader
|
// semantic imports from vertex shader
|
||||||
@ -281,9 +279,9 @@ void LatteShader_UpdatePSInputs(uint32* contextRegisters)
|
|||||||
key = std::rotl<uint64>(key, 7);
|
key = std::rotl<uint64>(key, 7);
|
||||||
if (spi0_positionEnable && f == spi0_positionAddr)
|
if (spi0_positionEnable && f == spi0_positionAddr)
|
||||||
{
|
{
|
||||||
_activePSImportTable.import[f].semanticId = LATTE_ANALYZER_IMPORT_INDEX_SPIPOSITION;
|
psInputTable->import[f].semanticId = LATTE_ANALYZER_IMPORT_INDEX_SPIPOSITION;
|
||||||
_activePSImportTable.import[f].isFlat = false;
|
psInputTable->import[f].isFlat = false;
|
||||||
_activePSImportTable.import[f].isNoPerspective = false;
|
psInputTable->import[f].isNoPerspective = false;
|
||||||
key += (uint64)0x33;
|
key += (uint64)0x33;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -296,13 +294,20 @@ void LatteShader_UpdatePSInputs(uint32* contextRegisters)
|
|||||||
semanticMask[psSemanticId >> 3] |= (1 << (psSemanticId & 7));
|
semanticMask[psSemanticId >> 3] |= (1 << (psSemanticId & 7));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
_activePSImportTable.import[f].semanticId = psSemanticId;
|
psInputTable->import[f].semanticId = psSemanticId;
|
||||||
_activePSImportTable.import[f].isFlat = (psInputControl&(1 << 10)) != 0;
|
psInputTable->import[f].isFlat = (psInputControl&(1 << 10)) != 0;
|
||||||
_activePSImportTable.import[f].isNoPerspective = (psInputControl&(1 << 12)) != 0;
|
psInputTable->import[f].isNoPerspective = (psInputControl&(1 << 12)) != 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_activePSImportTable.key = key;
|
psInputTable->key = key;
|
||||||
_activePSImportTable.count = numPSInputs;
|
psInputTable->count = numPSInputs;
|
||||||
|
}
|
||||||
|
|
||||||
|
// both vertex and geometry/pixel shader depend on PS inputs
|
||||||
|
// we prepare the PS import info in advance
|
||||||
|
void LatteShader_UpdatePSInputs(uint32* contextRegisters)
|
||||||
|
{
|
||||||
|
LatteShader_CreatePSInputTable(&_activePSImportTable, contextRegisters);
|
||||||
}
|
}
|
||||||
|
|
||||||
void LatteShader_CreateRendererShader(LatteDecompilerShader* shader, bool compileAsync)
|
void LatteShader_CreateRendererShader(LatteDecompilerShader* shader, bool compileAsync)
|
||||||
|
@ -84,6 +84,7 @@ struct LatteShaderPSInputTable
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
void LatteShader_CreatePSInputTable(LatteShaderPSInputTable* psInputTable, uint32* contextRegisters);
|
||||||
void LatteShader_UpdatePSInputs(uint32* contextRegisters);
|
void LatteShader_UpdatePSInputs(uint32* contextRegisters);
|
||||||
LatteShaderPSInputTable* LatteSHRC_GetPSInputTable();
|
LatteShaderPSInputTable* LatteSHRC_GetPSInputTable();
|
||||||
|
|
||||||
@ -126,4 +127,4 @@ void LatteShaderCache_writeSeparableGeometryShader(uint64 shaderBaseHash, uint64
|
|||||||
void LatteShaderCache_writeSeparablePixelShader(uint64 shaderBaseHash, uint64 shaderAuxHash, uint8* pixelShader, uint32 pixelShaderSize, uint32* contextRegisters, bool usesGeometryShader);
|
void LatteShaderCache_writeSeparablePixelShader(uint64 shaderBaseHash, uint64 shaderAuxHash, uint8* pixelShader, uint32 pixelShaderSize, uint32* contextRegisters, bool usesGeometryShader);
|
||||||
|
|
||||||
// todo - refactor this
|
// todo - refactor this
|
||||||
sint32 LatteDecompiler_getTextureSamplerBaseIndex(LatteConst::ShaderType shaderType);
|
sint32 LatteDecompiler_getTextureSamplerBaseIndex(LatteConst::ShaderType shaderType);
|
||||||
|
@ -64,7 +64,7 @@ FileCache* s_shaderCacheGeneric = nullptr; // contains hardware and version inde
|
|||||||
#define SHADER_CACHE_TYPE_PIXEL (2)
|
#define SHADER_CACHE_TYPE_PIXEL (2)
|
||||||
|
|
||||||
bool LatteShaderCache_readSeparableShader(uint8* shaderInfoData, sint32 shaderInfoSize);
|
bool LatteShaderCache_readSeparableShader(uint8* shaderInfoData, sint32 shaderInfoSize);
|
||||||
void LatteShaderCache_LoadVulkanPipelineCache(uint64 cacheTitleId);
|
void LatteShaderCache_LoadPipelineCache(uint64 cacheTitleId);
|
||||||
bool LatteShaderCache_updatePipelineLoadingProgress();
|
bool LatteShaderCache_updatePipelineLoadingProgress();
|
||||||
void LatteShaderCache_ShowProgress(const std::function <bool(void)>& loadUpdateFunc, bool isPipelines);
|
void LatteShaderCache_ShowProgress(const std::function <bool(void)>& loadUpdateFunc, bool isPipelines);
|
||||||
|
|
||||||
@ -160,18 +160,11 @@ bool LoadTGAFile(const std::vector<uint8>& buffer, TGAFILE *tgaFile)
|
|||||||
void LatteShaderCache_finish()
|
void LatteShaderCache_finish()
|
||||||
{
|
{
|
||||||
if (g_renderer->GetType() == RendererAPI::Vulkan)
|
if (g_renderer->GetType() == RendererAPI::Vulkan)
|
||||||
{
|
|
||||||
RendererShaderVk::ShaderCacheLoading_end();
|
RendererShaderVk::ShaderCacheLoading_end();
|
||||||
}
|
|
||||||
else if (g_renderer->GetType() == RendererAPI::OpenGL)
|
else if (g_renderer->GetType() == RendererAPI::OpenGL)
|
||||||
{
|
|
||||||
RendererShaderGL::ShaderCacheLoading_end();
|
RendererShaderGL::ShaderCacheLoading_end();
|
||||||
}
|
|
||||||
else if (g_renderer->GetType() == RendererAPI::Metal)
|
else if (g_renderer->GetType() == RendererAPI::Metal)
|
||||||
{
|
|
||||||
RendererShaderMtl::ShaderCacheLoading_end();
|
RendererShaderMtl::ShaderCacheLoading_end();
|
||||||
MetalPipelineCache::ShaderCacheLoading_end();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32 LatteShaderCache_getShaderCacheExtraVersion(uint64 titleId)
|
uint32 LatteShaderCache_getShaderCacheExtraVersion(uint64 titleId)
|
||||||
@ -251,18 +244,11 @@ void LatteShaderCache_Load()
|
|||||||
fs::create_directories(ActiveSettings::GetCachePath("shaderCache/precompiled"), ec);
|
fs::create_directories(ActiveSettings::GetCachePath("shaderCache/precompiled"), ec);
|
||||||
// initialize renderer specific caches
|
// initialize renderer specific caches
|
||||||
if (g_renderer->GetType() == RendererAPI::Vulkan)
|
if (g_renderer->GetType() == RendererAPI::Vulkan)
|
||||||
{
|
|
||||||
RendererShaderVk::ShaderCacheLoading_begin(cacheTitleId);
|
RendererShaderVk::ShaderCacheLoading_begin(cacheTitleId);
|
||||||
}
|
|
||||||
else if (g_renderer->GetType() == RendererAPI::OpenGL)
|
else if (g_renderer->GetType() == RendererAPI::OpenGL)
|
||||||
{
|
|
||||||
RendererShaderGL::ShaderCacheLoading_begin(cacheTitleId);
|
RendererShaderGL::ShaderCacheLoading_begin(cacheTitleId);
|
||||||
}
|
|
||||||
else if (g_renderer->GetType() == RendererAPI::Metal)
|
else if (g_renderer->GetType() == RendererAPI::Metal)
|
||||||
{
|
|
||||||
RendererShaderMtl::ShaderCacheLoading_begin(cacheTitleId);
|
RendererShaderMtl::ShaderCacheLoading_begin(cacheTitleId);
|
||||||
MetalPipelineCache::ShaderCacheLoading_begin(cacheTitleId);
|
|
||||||
}
|
|
||||||
// get cache file name
|
// get cache file name
|
||||||
const auto pathGeneric = ActiveSettings::GetCachePath("shaderCache/transferable/{:016x}_shaders.bin", cacheTitleId);
|
const auto pathGeneric = ActiveSettings::GetCachePath("shaderCache/transferable/{:016x}_shaders.bin", cacheTitleId);
|
||||||
const auto pathGenericPre1_25_0 = ActiveSettings::GetCachePath("shaderCache/transferable/{:016x}.bin", cacheTitleId); // before 1.25.0
|
const auto pathGenericPre1_25_0 = ActiveSettings::GetCachePath("shaderCache/transferable/{:016x}.bin", cacheTitleId); // before 1.25.0
|
||||||
@ -361,9 +347,9 @@ void LatteShaderCache_Load()
|
|||||||
cemuLog_log(LogType::Force, "Shader cache loaded with {} shaders. Commited mem {}MB. Took {}ms", numLoadedShaders, (sint32)(memCommited/1024/1024), timeLoad);
|
cemuLog_log(LogType::Force, "Shader cache loaded with {} shaders. Commited mem {}MB. Took {}ms", numLoadedShaders, (sint32)(memCommited/1024/1024), timeLoad);
|
||||||
#endif
|
#endif
|
||||||
LatteShaderCache_finish();
|
LatteShaderCache_finish();
|
||||||
// if Vulkan then also load pipeline cache
|
// if Vulkan or Metal then also load pipeline cache
|
||||||
if (g_renderer->GetType() == RendererAPI::Vulkan)
|
if (g_renderer->GetType() == RendererAPI::Vulkan || g_renderer->GetType() == RendererAPI::Metal)
|
||||||
LatteShaderCache_LoadVulkanPipelineCache(cacheTitleId);
|
LatteShaderCache_LoadPipelineCache(cacheTitleId);
|
||||||
|
|
||||||
|
|
||||||
g_renderer->BeginFrame(true);
|
g_renderer->BeginFrame(true);
|
||||||
@ -518,13 +504,18 @@ void LatteShaderCache_ShowProgress(const std::function <bool(void)>& loadUpdateF
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void LatteShaderCache_LoadVulkanPipelineCache(uint64 cacheTitleId)
|
void LatteShaderCache_LoadPipelineCache(uint64 cacheTitleId)
|
||||||
{
|
{
|
||||||
auto& pipelineCache = VulkanPipelineStableCache::GetInstance();
|
if (g_renderer->GetType() == RendererAPI::Vulkan)
|
||||||
g_shaderCacheLoaderState.pipelineFileCount = pipelineCache.BeginLoading(cacheTitleId);
|
g_shaderCacheLoaderState.pipelineFileCount = VulkanPipelineStableCache::GetInstance().BeginLoading(cacheTitleId);
|
||||||
|
else if (g_renderer->GetType() == RendererAPI::Metal)
|
||||||
|
g_shaderCacheLoaderState.pipelineFileCount = MetalPipelineCache::GetInstance().BeginLoading(cacheTitleId);
|
||||||
g_shaderCacheLoaderState.loadedPipelines = 0;
|
g_shaderCacheLoaderState.loadedPipelines = 0;
|
||||||
LatteShaderCache_ShowProgress(LatteShaderCache_updatePipelineLoadingProgress, true);
|
LatteShaderCache_ShowProgress(LatteShaderCache_updatePipelineLoadingProgress, true);
|
||||||
pipelineCache.EndLoading();
|
if (g_renderer->GetType() == RendererAPI::Vulkan)
|
||||||
|
VulkanPipelineStableCache::GetInstance().EndLoading();
|
||||||
|
else if (g_renderer->GetType() == RendererAPI::Metal)
|
||||||
|
MetalPipelineCache::GetInstance().EndLoading();
|
||||||
if(Latte_GetStopSignal())
|
if(Latte_GetStopSignal())
|
||||||
LatteThread_Exit();
|
LatteThread_Exit();
|
||||||
}
|
}
|
||||||
@ -532,7 +523,12 @@ void LatteShaderCache_LoadVulkanPipelineCache(uint64 cacheTitleId)
|
|||||||
bool LatteShaderCache_updatePipelineLoadingProgress()
|
bool LatteShaderCache_updatePipelineLoadingProgress()
|
||||||
{
|
{
|
||||||
uint32 pipelinesMissingShaders = 0;
|
uint32 pipelinesMissingShaders = 0;
|
||||||
return VulkanPipelineStableCache::GetInstance().UpdateLoading(g_shaderCacheLoaderState.loadedPipelines, pipelinesMissingShaders);
|
if (g_renderer->GetType() == RendererAPI::Vulkan)
|
||||||
|
return VulkanPipelineStableCache::GetInstance().UpdateLoading(g_shaderCacheLoaderState.loadedPipelines, pipelinesMissingShaders);
|
||||||
|
else if (g_renderer->GetType() == RendererAPI::Metal)
|
||||||
|
return MetalPipelineCache::GetInstance().UpdateLoading(g_shaderCacheLoaderState.loadedPipelines, pipelinesMissingShaders);
|
||||||
|
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64 LatteShaderCache_getShaderNameInTransferableCache(uint64 baseHash, uint32 shaderType)
|
uint64 LatteShaderCache_getShaderNameInTransferableCache(uint64 baseHash, uint32 shaderType)
|
||||||
@ -791,22 +787,17 @@ void LatteShaderCache_Close()
|
|||||||
s_shaderCacheGeneric = nullptr;
|
s_shaderCacheGeneric = nullptr;
|
||||||
}
|
}
|
||||||
if (g_renderer->GetType() == RendererAPI::Vulkan)
|
if (g_renderer->GetType() == RendererAPI::Vulkan)
|
||||||
{
|
|
||||||
RendererShaderVk::ShaderCacheLoading_Close();
|
RendererShaderVk::ShaderCacheLoading_Close();
|
||||||
}
|
|
||||||
else if (g_renderer->GetType() == RendererAPI::OpenGL)
|
else if (g_renderer->GetType() == RendererAPI::OpenGL)
|
||||||
{
|
|
||||||
RendererShaderGL::ShaderCacheLoading_Close();
|
RendererShaderGL::ShaderCacheLoading_Close();
|
||||||
}
|
|
||||||
else if (g_renderer->GetType() == RendererAPI::Metal)
|
else if (g_renderer->GetType() == RendererAPI::Metal)
|
||||||
{
|
|
||||||
RendererShaderMtl::ShaderCacheLoading_Close();
|
RendererShaderMtl::ShaderCacheLoading_Close();
|
||||||
MetalPipelineCache::ShaderCacheLoading_Close();
|
|
||||||
}
|
|
||||||
|
|
||||||
// if Vulkan then also close pipeline cache
|
// if Vulkan or Metal then also close pipeline cache
|
||||||
if (g_renderer->GetType() == RendererAPI::Vulkan)
|
if (g_renderer->GetType() == RendererAPI::Vulkan)
|
||||||
VulkanPipelineStableCache::GetInstance().Close();
|
VulkanPipelineStableCache::GetInstance().Close();
|
||||||
|
else if (g_renderer->GetType() == RendererAPI::Metal)
|
||||||
|
MetalPipelineCache::GetInstance().Close();
|
||||||
}
|
}
|
||||||
|
|
||||||
#include <wx/msgdlg.h>
|
#include <wx/msgdlg.h>
|
||||||
|
@ -65,7 +65,7 @@ LatteTextureMtl::LatteTextureMtl(class MetalRenderer* mtlRenderer, Latte::E_DIM
|
|||||||
}
|
}
|
||||||
else if (textureType == MTL::TextureTypeCube)
|
else if (textureType == MTL::TextureTypeCube)
|
||||||
{
|
{
|
||||||
// Do notjing
|
// Do nothing
|
||||||
}
|
}
|
||||||
else if (textureType == MTL::TextureTypeCubeArray)
|
else if (textureType == MTL::TextureTypeCubeArray)
|
||||||
{
|
{
|
||||||
@ -81,13 +81,10 @@ LatteTextureMtl::LatteTextureMtl(class MetalRenderer* mtlRenderer, Latte::E_DIM
|
|||||||
|
|
||||||
MTL::TextureUsage usage = MTL::TextureUsageShaderRead | MTL::TextureUsagePixelFormatView;
|
MTL::TextureUsage usage = MTL::TextureUsageShaderRead | MTL::TextureUsagePixelFormatView;
|
||||||
if (!Latte::IsCompressedFormat(format))
|
if (!Latte::IsCompressedFormat(format))
|
||||||
{
|
|
||||||
usage |= MTL::TextureUsageRenderTarget;
|
usage |= MTL::TextureUsageRenderTarget;
|
||||||
}
|
|
||||||
desc->setUsage(usage);
|
desc->setUsage(usage);
|
||||||
|
|
||||||
m_texture = mtlRenderer->GetDevice()->newTexture(desc);
|
m_texture = mtlRenderer->GetDevice()->newTexture(desc);
|
||||||
|
|
||||||
desc->release();
|
desc->release();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
48
src/Cafe/HW/Latte/Renderer/Metal/MetalAttachmentsInfo.cpp
Normal file
48
src/Cafe/HW/Latte/Renderer/Metal/MetalAttachmentsInfo.cpp
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
#include "Cafe/HW/Latte/Renderer/Metal/MetalAttachmentsInfo.h"
|
||||||
|
#include "Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h"
|
||||||
|
#include "Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h"
|
||||||
|
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
|
||||||
|
|
||||||
|
MetalAttachmentsInfo::MetalAttachmentsInfo(class CachedFBOMtl* fbo)
|
||||||
|
{
|
||||||
|
for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++)
|
||||||
|
{
|
||||||
|
const auto& colorBuffer = fbo->colorBuffer[i];
|
||||||
|
auto texture = static_cast<LatteTextureViewMtl*>(colorBuffer.texture);
|
||||||
|
if (!texture)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
colorFormats[i] = texture->format;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Depth stencil attachment
|
||||||
|
if (fbo->depthBuffer.texture)
|
||||||
|
{
|
||||||
|
auto texture = static_cast<LatteTextureViewMtl*>(fbo->depthBuffer.texture);
|
||||||
|
depthFormat = texture->format;
|
||||||
|
hasStencil = fbo->depthBuffer.hasStencil;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
MetalAttachmentsInfo::MetalAttachmentsInfo(const LatteContextRegister& lcr, const LatteDecompilerShader* pixelShader)
|
||||||
|
{
|
||||||
|
uint8 cbMask = LatteMRT::GetActiveColorBufferMask(pixelShader, lcr);
|
||||||
|
bool dbMask = LatteMRT::GetActiveDepthBufferMask(lcr);
|
||||||
|
|
||||||
|
// Color attachments
|
||||||
|
for (int i = 0; i < 8; ++i)
|
||||||
|
{
|
||||||
|
if ((cbMask & (1 << i)) == 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
colorFormats[i] = LatteMRT::GetColorBufferFormat(i, lcr);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Depth stencil attachment
|
||||||
|
if (dbMask)
|
||||||
|
{
|
||||||
|
Latte::E_GX2SURFFMT format = LatteMRT::GetDepthBufferFormat(lcr);
|
||||||
|
depthFormat = format;
|
||||||
|
hasStencil = GetMtlPixelFormatInfo(format, true).hasStencil;
|
||||||
|
}
|
||||||
|
}
|
15
src/Cafe/HW/Latte/Renderer/Metal/MetalAttachmentsInfo.h
Normal file
15
src/Cafe/HW/Latte/Renderer/Metal/MetalAttachmentsInfo.h
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
|
||||||
|
|
||||||
|
class MetalAttachmentsInfo
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
MetalAttachmentsInfo() = default;
|
||||||
|
MetalAttachmentsInfo(class CachedFBOMtl* fbo);
|
||||||
|
MetalAttachmentsInfo(const LatteContextRegister& lcr, const class LatteDecompilerShader* pixelShader);
|
||||||
|
|
||||||
|
Latte::E_GX2SURFFMT colorFormats[LATTE_NUM_COLOR_TARGET] = {Latte::E_GX2SURFFMT::INVALID_FORMAT};
|
||||||
|
Latte::E_GX2SURFFMT depthFormat = Latte::E_GX2SURFFMT::INVALID_FORMAT;
|
||||||
|
bool hasStencil = false;
|
||||||
|
};
|
File diff suppressed because it is too large
Load Diff
@ -1,24 +1,26 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <Metal/Metal.hpp>
|
#include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h"
|
||||||
|
#include "util/helpers/ConcurrentQueue.h"
|
||||||
#include "HW/Latte/ISA/LatteReg.h"
|
#include "util/helpers/fspinlock.h"
|
||||||
#include "HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h"
|
#include "util/math/vector2.h"
|
||||||
#include "Cafe/HW/Latte/Renderer/Renderer.h"
|
|
||||||
|
|
||||||
class MetalPipelineCache
|
class MetalPipelineCache
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
static void ShaderCacheLoading_begin(uint64 cacheTitleId);
|
static MetalPipelineCache& GetInstance();
|
||||||
static void ShaderCacheLoading_end();
|
|
||||||
static void ShaderCacheLoading_Close();
|
|
||||||
|
|
||||||
MetalPipelineCache(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer} {}
|
MetalPipelineCache(class MetalRenderer* metalRenderer);
|
||||||
~MetalPipelineCache();
|
~MetalPipelineCache();
|
||||||
|
|
||||||
MTL::RenderPipelineState* GetRenderPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* lastUsedFBO, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr);
|
PipelineObject* GetRenderPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, Vector2i extend, uint32 indexCount, const LatteContextRegister& lcr);
|
||||||
|
|
||||||
MTL::RenderPipelineState* GetMeshPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* lastUsedFBO, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr, Renderer::INDEX_TYPE hostIndexType);
|
// Cache loading
|
||||||
|
uint32 BeginLoading(uint64 cacheTitleId); // returns count of pipelines stored in cache
|
||||||
|
bool UpdateLoading(uint32& pipelinesLoadedTotal, uint32& pipelinesMissingShaders);
|
||||||
|
void EndLoading();
|
||||||
|
void LoadPipelineFromCache(std::span<uint8> fileData);
|
||||||
|
void Close(); // called on title exit
|
||||||
|
|
||||||
// Debug
|
// Debug
|
||||||
size_t GetPipelineCacheSize() const { return m_pipelineCache.size(); }
|
size_t GetPipelineCacheSize() const { return m_pipelineCache.size(); }
|
||||||
@ -26,12 +28,25 @@ public:
|
|||||||
private:
|
private:
|
||||||
class MetalRenderer* m_mtlr;
|
class MetalRenderer* m_mtlr;
|
||||||
|
|
||||||
std::map<uint64, MTL::RenderPipelineState*> m_pipelineCache;
|
std::map<uint64, PipelineObject*> m_pipelineCache;
|
||||||
|
FSpinlock m_pipelineCacheLock;
|
||||||
|
|
||||||
NS::URL* m_binaryArchiveURL;
|
std::thread* m_pipelineCacheStoreThread;
|
||||||
MTL::BinaryArchive* m_binaryArchive;
|
|
||||||
|
|
||||||
uint64 CalculateRenderPipelineHash(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* lastUsedFBO, const LatteContextRegister& lcr);
|
class FileCache* s_cache;
|
||||||
|
|
||||||
void TryLoadBinaryArchive();
|
std::atomic_uint32_t m_numCompilationThreads{ 0 };
|
||||||
|
ConcurrentQueue<std::vector<uint8>> m_compilationQueue;
|
||||||
|
std::atomic_uint32_t m_compilationCount;
|
||||||
|
|
||||||
|
static uint64 CalculatePipelineHash(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr);
|
||||||
|
|
||||||
|
void AddCurrentStateToCache(uint64 pipelineStateHash, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo);
|
||||||
|
|
||||||
|
// pipeline serialization for file
|
||||||
|
bool SerializePipeline(class MemStreamWriter& memWriter, struct CachedPipeline& cachedPipeline);
|
||||||
|
bool DeserializePipeline(class MemStreamReader& memReader, struct CachedPipeline& cachedPipeline);
|
||||||
|
|
||||||
|
int CompilerThread();
|
||||||
|
void WorkerThread();
|
||||||
};
|
};
|
||||||
|
496
src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp
Normal file
496
src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.cpp
Normal file
@ -0,0 +1,496 @@
|
|||||||
|
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
|
||||||
|
#include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h"
|
||||||
|
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
|
||||||
|
#include "Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h"
|
||||||
|
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
|
||||||
|
#include "Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h"
|
||||||
|
#include "Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h"
|
||||||
|
|
||||||
|
#include "Cafe/HW/Latte/Core/FetchShader.h"
|
||||||
|
#include "Cafe/HW/Latte/ISA/RegDefines.h"
|
||||||
|
#include "Cafe/HW/Latte/Core/LatteConst.h"
|
||||||
|
#include "Cafe/HW/Latte/Core/LatteShader.h"
|
||||||
|
|
||||||
|
#include <chrono>
|
||||||
|
|
||||||
|
extern std::atomic_int g_compiling_pipelines;
|
||||||
|
extern std::atomic_int g_compiling_pipelines_async;
|
||||||
|
extern std::atomic_uint64_t g_compiling_pipelines_syncTimeSum;
|
||||||
|
|
||||||
|
static void rectsEmulationGS_outputSingleVertex(std::string& gsSrc, const LatteDecompilerShader* vertexShader, LatteShaderPSInputTable& psInputTable, sint32 vIdx, const LatteContextRegister& latteRegister)
|
||||||
|
{
|
||||||
|
auto parameterMask = vertexShader->outputParameterMask;
|
||||||
|
for (uint32 i = 0; i < 32; i++)
|
||||||
|
{
|
||||||
|
if ((parameterMask & (1 << i)) == 0)
|
||||||
|
continue;
|
||||||
|
sint32 vsSemanticId = psInputTable.getVertexShaderOutParamSemanticId(latteRegister.GetRawView(), i);
|
||||||
|
if (vsSemanticId < 0)
|
||||||
|
continue;
|
||||||
|
// make sure PS has matching input
|
||||||
|
if (!psInputTable.hasPSImportForSemanticId(vsSemanticId))
|
||||||
|
continue;
|
||||||
|
gsSrc.append(fmt::format("out.passParameterSem{} = objectPayload.vertexOut[{}].passParameterSem{};\r\n", vsSemanticId, vIdx, vsSemanticId));
|
||||||
|
}
|
||||||
|
gsSrc.append(fmt::format("out.position = objectPayload.vertexOut[{}].position;\r\n", vIdx));
|
||||||
|
gsSrc.append(fmt::format("mesh.set_vertex({}, out);\r\n", vIdx));
|
||||||
|
}
|
||||||
|
|
||||||
|
static void rectsEmulationGS_outputGeneratedVertex(std::string& gsSrc, const LatteDecompilerShader* vertexShader, LatteShaderPSInputTable& psInputTable, const char* variant, const LatteContextRegister& latteRegister)
|
||||||
|
{
|
||||||
|
auto parameterMask = vertexShader->outputParameterMask;
|
||||||
|
for (uint32 i = 0; i < 32; i++)
|
||||||
|
{
|
||||||
|
if ((parameterMask & (1 << i)) == 0)
|
||||||
|
continue;
|
||||||
|
sint32 vsSemanticId = psInputTable.getVertexShaderOutParamSemanticId(latteRegister.GetRawView(), i);
|
||||||
|
if (vsSemanticId < 0)
|
||||||
|
continue;
|
||||||
|
// make sure PS has matching input
|
||||||
|
if (!psInputTable.hasPSImportForSemanticId(vsSemanticId))
|
||||||
|
continue;
|
||||||
|
gsSrc.append(fmt::format("out.passParameterSem{} = gen4thVertex{}(objectPayload.vertexOut[0].passParameterSem{}, objectPayload.vertexOut[1].passParameterSem{}, objectPayload.vertexOut[2].passParameterSem{});\r\n", vsSemanticId, variant, vsSemanticId, vsSemanticId, vsSemanticId));
|
||||||
|
}
|
||||||
|
gsSrc.append(fmt::format("out.position = gen4thVertex{}(objectPayload.vertexOut[0].position, objectPayload.vertexOut[1].position, objectPayload.vertexOut[2].position);\r\n", variant));
|
||||||
|
gsSrc.append(fmt::format("mesh.set_vertex(3, out);\r\n"));
|
||||||
|
}
|
||||||
|
|
||||||
|
static void rectsEmulationGS_outputVerticesCode(std::string& gsSrc, const LatteDecompilerShader* vertexShader, LatteShaderPSInputTable& psInputTable, sint32 p0, sint32 p1, sint32 p2, sint32 p3, const char* variant, const LatteContextRegister& latteRegister)
|
||||||
|
{
|
||||||
|
sint32 pList[4] = { p0, p1, p2, p3 };
|
||||||
|
for (sint32 i = 0; i < 4; i++)
|
||||||
|
{
|
||||||
|
if (pList[i] == 3)
|
||||||
|
rectsEmulationGS_outputGeneratedVertex(gsSrc, vertexShader, psInputTable, variant, latteRegister);
|
||||||
|
else
|
||||||
|
rectsEmulationGS_outputSingleVertex(gsSrc, vertexShader, psInputTable, pList[i], latteRegister);
|
||||||
|
}
|
||||||
|
gsSrc.append(fmt::format("mesh.set_index(0, {});\r\n", pList[0]));
|
||||||
|
gsSrc.append(fmt::format("mesh.set_index(1, {});\r\n", pList[1]));
|
||||||
|
gsSrc.append(fmt::format("mesh.set_index(2, {});\r\n", pList[2]));
|
||||||
|
gsSrc.append(fmt::format("mesh.set_index(3, {});\r\n", pList[1]));
|
||||||
|
gsSrc.append(fmt::format("mesh.set_index(4, {});\r\n", pList[2]));
|
||||||
|
gsSrc.append(fmt::format("mesh.set_index(5, {});\r\n", pList[3]));
|
||||||
|
}
|
||||||
|
|
||||||
|
static RendererShaderMtl* rectsEmulationGS_generate(MetalRenderer* metalRenderer, const LatteDecompilerShader* vertexShader, const LatteContextRegister& latteRegister)
|
||||||
|
{
|
||||||
|
std::string gsSrc;
|
||||||
|
gsSrc.append("#include <metal_stdlib>\r\n");
|
||||||
|
gsSrc.append("using namespace metal;\r\n");
|
||||||
|
|
||||||
|
LatteShaderPSInputTable psInputTable;
|
||||||
|
LatteShader_CreatePSInputTable(&psInputTable, latteRegister.GetRawView());
|
||||||
|
|
||||||
|
// inputs & outputs
|
||||||
|
std::string vertexOutDefinition = "struct VertexOut {\r\n";
|
||||||
|
vertexOutDefinition += "float4 position;\r\n";
|
||||||
|
std::string geometryOutDefinition = "struct GeometryOut {\r\n";
|
||||||
|
geometryOutDefinition += "float4 position [[position]];\r\n";
|
||||||
|
auto parameterMask = vertexShader->outputParameterMask;
|
||||||
|
for (uint32 i = 0; i < 32; i++)
|
||||||
|
{
|
||||||
|
if ((parameterMask & (1 << i)) == 0)
|
||||||
|
continue;
|
||||||
|
sint32 vsSemanticId = psInputTable.getVertexShaderOutParamSemanticId(latteRegister.GetRawView(), i);
|
||||||
|
if (vsSemanticId < 0)
|
||||||
|
continue;
|
||||||
|
auto psImport = psInputTable.getPSImportBySemanticId(vsSemanticId);
|
||||||
|
if (psImport == nullptr)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// VertexOut
|
||||||
|
vertexOutDefinition += fmt::format("float4 passParameterSem{};\r\n", vsSemanticId);
|
||||||
|
|
||||||
|
// GeometryOut
|
||||||
|
geometryOutDefinition += fmt::format("float4 passParameterSem{}", vsSemanticId);
|
||||||
|
|
||||||
|
geometryOutDefinition += fmt::format(" [[user(locn{})]]", psInputTable.getPSImportLocationBySemanticId(vsSemanticId));
|
||||||
|
if (psImport->isFlat)
|
||||||
|
geometryOutDefinition += " [[flat]]";
|
||||||
|
if (psImport->isNoPerspective)
|
||||||
|
geometryOutDefinition += " [[center_no_perspective]]";
|
||||||
|
geometryOutDefinition += ";\r\n";
|
||||||
|
}
|
||||||
|
vertexOutDefinition += "};\r\n";
|
||||||
|
geometryOutDefinition += "};\r\n";
|
||||||
|
|
||||||
|
gsSrc.append(vertexOutDefinition);
|
||||||
|
gsSrc.append(geometryOutDefinition);
|
||||||
|
|
||||||
|
gsSrc.append("struct ObjectPayload {\r\n");
|
||||||
|
gsSrc.append("VertexOut vertexOut[3];\r\n");
|
||||||
|
gsSrc.append("};\r\n");
|
||||||
|
|
||||||
|
// gen function
|
||||||
|
gsSrc.append("float4 gen4thVertexA(float4 a, float4 b, float4 c)\r\n");
|
||||||
|
gsSrc.append("{\r\n");
|
||||||
|
gsSrc.append("return b - (c - a);\r\n");
|
||||||
|
gsSrc.append("}\r\n");
|
||||||
|
|
||||||
|
gsSrc.append("float4 gen4thVertexB(float4 a, float4 b, float4 c)\r\n");
|
||||||
|
gsSrc.append("{\r\n");
|
||||||
|
gsSrc.append("return c - (b - a);\r\n");
|
||||||
|
gsSrc.append("}\r\n");
|
||||||
|
|
||||||
|
gsSrc.append("float4 gen4thVertexC(float4 a, float4 b, float4 c)\r\n");
|
||||||
|
gsSrc.append("{\r\n");
|
||||||
|
gsSrc.append("return c + (b - a);\r\n");
|
||||||
|
gsSrc.append("}\r\n");
|
||||||
|
|
||||||
|
// main
|
||||||
|
gsSrc.append("using MeshType = mesh<GeometryOut, void, 4, 2, topology::triangle>;\r\n");
|
||||||
|
gsSrc.append("[[mesh, max_total_threads_per_threadgroup(1)]]\r\n");
|
||||||
|
gsSrc.append("void main0(MeshType mesh, const object_data ObjectPayload& objectPayload [[payload]])\r\n");
|
||||||
|
gsSrc.append("{\r\n");
|
||||||
|
gsSrc.append("GeometryOut out;\r\n");
|
||||||
|
|
||||||
|
// there are two possible winding orders that need different triangle generation:
|
||||||
|
// 0 1
|
||||||
|
// 2 3
|
||||||
|
// and
|
||||||
|
// 0 1
|
||||||
|
// 3 2
|
||||||
|
// all others are just symmetries of these cases
|
||||||
|
|
||||||
|
// we can determine the case by comparing the distance 0<->1 and 0<->2
|
||||||
|
|
||||||
|
gsSrc.append("float dist0_1 = length(objectPayload.vertexOut[1].position.xy - objectPayload.vertexOut[0].position.xy);\r\n");
|
||||||
|
gsSrc.append("float dist0_2 = length(objectPayload.vertexOut[2].position.xy - objectPayload.vertexOut[0].position.xy);\r\n");
|
||||||
|
gsSrc.append("float dist1_2 = length(objectPayload.vertexOut[2].position.xy - objectPayload.vertexOut[1].position.xy);\r\n");
|
||||||
|
|
||||||
|
// emit vertices
|
||||||
|
gsSrc.append("if(dist0_1 > dist0_2 && dist0_1 > dist1_2)\r\n");
|
||||||
|
gsSrc.append("{\r\n");
|
||||||
|
// p0 to p1 is diagonal
|
||||||
|
rectsEmulationGS_outputVerticesCode(gsSrc, vertexShader, psInputTable, 2, 1, 0, 3, "A", latteRegister);
|
||||||
|
gsSrc.append("} else if ( dist0_2 > dist0_1 && dist0_2 > dist1_2 ) {\r\n");
|
||||||
|
// p0 to p2 is diagonal
|
||||||
|
rectsEmulationGS_outputVerticesCode(gsSrc, vertexShader, psInputTable, 1, 2, 0, 3, "B", latteRegister);
|
||||||
|
gsSrc.append("} else {\r\n");
|
||||||
|
// p1 to p2 is diagonal
|
||||||
|
rectsEmulationGS_outputVerticesCode(gsSrc, vertexShader, psInputTable, 0, 1, 2, 3, "C", latteRegister);
|
||||||
|
gsSrc.append("}\r\n");
|
||||||
|
|
||||||
|
gsSrc.append("mesh.set_primitive_count(2);\r\n");
|
||||||
|
|
||||||
|
gsSrc.append("}\r\n");
|
||||||
|
|
||||||
|
auto mtlShader = new RendererShaderMtl(metalRenderer, RendererShader::ShaderType::kGeometry, 0, 0, false, false, gsSrc);
|
||||||
|
mtlShader->PreponeCompilation(true);
|
||||||
|
|
||||||
|
return mtlShader;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define INVALID_TITLE_ID 0xFFFFFFFFFFFFFFFF
|
||||||
|
|
||||||
|
uint64 s_cacheTitleId = INVALID_TITLE_ID;
|
||||||
|
|
||||||
|
extern std::atomic_int g_compiled_shaders_total;
|
||||||
|
extern std::atomic_int g_compiled_shaders_async;
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
void SetFragmentState(T* desc, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, bool rasterizationEnabled, const LatteContextRegister& lcr)
|
||||||
|
{
|
||||||
|
// TODO: check if the pixel shader is valid as well?
|
||||||
|
if (!rasterizationEnabled/* || !pixelShaderMtl*/)
|
||||||
|
{
|
||||||
|
desc->setRasterizationEnabled(false);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Color attachments
|
||||||
|
const Latte::LATTE_CB_COLOR_CONTROL& colorControlReg = lcr.CB_COLOR_CONTROL;
|
||||||
|
uint32 blendEnableMask = colorControlReg.get_BLEND_MASK();
|
||||||
|
uint32 renderTargetMask = lcr.CB_TARGET_MASK.get_MASK();
|
||||||
|
for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++)
|
||||||
|
{
|
||||||
|
Latte::E_GX2SURFFMT format = lastUsedAttachmentsInfo.colorFormats[i];
|
||||||
|
if (format == Latte::E_GX2SURFFMT::INVALID_FORMAT)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
MTL::PixelFormat pixelFormat = GetMtlPixelFormat(format, false);
|
||||||
|
auto colorAttachment = desc->colorAttachments()->object(i);
|
||||||
|
colorAttachment->setPixelFormat(pixelFormat);
|
||||||
|
|
||||||
|
// Disable writes if not in the active FBO
|
||||||
|
if (activeAttachmentsInfo.colorFormats[i] == Latte::E_GX2SURFFMT::INVALID_FORMAT)
|
||||||
|
{
|
||||||
|
colorAttachment->setWriteMask(MTL::ColorWriteMaskNone);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
colorAttachment->setWriteMask(GetMtlColorWriteMask((renderTargetMask >> (i * 4)) & 0xF));
|
||||||
|
|
||||||
|
// Blending
|
||||||
|
bool blendEnabled = ((blendEnableMask & (1 << i))) != 0;
|
||||||
|
// Only float data type is blendable
|
||||||
|
if (blendEnabled && GetMtlPixelFormatInfo(format, false).dataType == MetalDataType::FLOAT)
|
||||||
|
{
|
||||||
|
colorAttachment->setBlendingEnabled(true);
|
||||||
|
|
||||||
|
const auto& blendControlReg = lcr.CB_BLENDN_CONTROL[i];
|
||||||
|
|
||||||
|
auto rgbBlendOp = GetMtlBlendOp(blendControlReg.get_COLOR_COMB_FCN());
|
||||||
|
auto srcRgbBlendFactor = GetMtlBlendFactor(blendControlReg.get_COLOR_SRCBLEND());
|
||||||
|
auto dstRgbBlendFactor = GetMtlBlendFactor(blendControlReg.get_COLOR_DSTBLEND());
|
||||||
|
|
||||||
|
colorAttachment->setRgbBlendOperation(rgbBlendOp);
|
||||||
|
colorAttachment->setSourceRGBBlendFactor(srcRgbBlendFactor);
|
||||||
|
colorAttachment->setDestinationRGBBlendFactor(dstRgbBlendFactor);
|
||||||
|
if (blendControlReg.get_SEPARATE_ALPHA_BLEND())
|
||||||
|
{
|
||||||
|
colorAttachment->setAlphaBlendOperation(GetMtlBlendOp(blendControlReg.get_ALPHA_COMB_FCN()));
|
||||||
|
colorAttachment->setSourceAlphaBlendFactor(GetMtlBlendFactor(blendControlReg.get_ALPHA_SRCBLEND()));
|
||||||
|
colorAttachment->setDestinationAlphaBlendFactor(GetMtlBlendFactor(blendControlReg.get_ALPHA_DSTBLEND()));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
colorAttachment->setAlphaBlendOperation(rgbBlendOp);
|
||||||
|
colorAttachment->setSourceAlphaBlendFactor(srcRgbBlendFactor);
|
||||||
|
colorAttachment->setDestinationAlphaBlendFactor(dstRgbBlendFactor);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Depth stencil attachment
|
||||||
|
if (lastUsedAttachmentsInfo.depthFormat != Latte::E_GX2SURFFMT::INVALID_FORMAT)
|
||||||
|
{
|
||||||
|
MTL::PixelFormat pixelFormat = GetMtlPixelFormat(lastUsedAttachmentsInfo.depthFormat, true);
|
||||||
|
desc->setDepthAttachmentPixelFormat(pixelFormat);
|
||||||
|
if (lastUsedAttachmentsInfo.hasStencil)
|
||||||
|
desc->setStencilAttachmentPixelFormat(pixelFormat);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
MetalPipelineCompiler::~MetalPipelineCompiler()
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
for (auto& pair : m_pipelineCache)
|
||||||
|
{
|
||||||
|
pair.second->release();
|
||||||
|
}
|
||||||
|
m_pipelineCache.clear();
|
||||||
|
|
||||||
|
NS::Error* error = nullptr;
|
||||||
|
m_binaryArchive->serializeToURL(m_binaryArchiveURL, &error);
|
||||||
|
if (error)
|
||||||
|
{
|
||||||
|
cemuLog_log(LogType::Force, "error serializing binary archive: {}", error->localizedDescription()->utf8String());
|
||||||
|
error->release();
|
||||||
|
}
|
||||||
|
m_binaryArchive->release();
|
||||||
|
|
||||||
|
m_binaryArchiveURL->release();
|
||||||
|
*/
|
||||||
|
m_pipelineDescriptor->release();
|
||||||
|
}
|
||||||
|
|
||||||
|
void MetalPipelineCompiler::InitFromState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr)
|
||||||
|
{
|
||||||
|
// Check if the pipeline uses a geometry shader
|
||||||
|
const LattePrimitiveMode primitiveMode = static_cast<LattePrimitiveMode>(lcr.VGT_PRIMITIVE_TYPE.get_PRIMITIVE_MODE());
|
||||||
|
bool isPrimitiveRect = (primitiveMode == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::RECTS);
|
||||||
|
|
||||||
|
m_usesGeometryShader = (geometryShader != nullptr || isPrimitiveRect);
|
||||||
|
|
||||||
|
// Rasterization
|
||||||
|
m_rasterizationEnabled = !lcr.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL();
|
||||||
|
|
||||||
|
// HACK
|
||||||
|
// TODO: include this in the hash?
|
||||||
|
if (!lcr.PA_CL_VTE_CNTL.get_VPORT_X_OFFSET_ENA())
|
||||||
|
m_rasterizationEnabled = true;
|
||||||
|
|
||||||
|
// Culling both front and back faces effectively disables rasterization
|
||||||
|
const auto& polygonControlReg = lcr.PA_SU_SC_MODE_CNTL;
|
||||||
|
uint32 cullFront = polygonControlReg.get_CULL_FRONT();
|
||||||
|
uint32 cullBack = polygonControlReg.get_CULL_BACK();
|
||||||
|
if (cullFront && cullBack)
|
||||||
|
m_rasterizationEnabled = false;
|
||||||
|
|
||||||
|
// Shaders
|
||||||
|
m_vertexShaderMtl = static_cast<RendererShaderMtl*>(vertexShader->shader);
|
||||||
|
if (geometryShader)
|
||||||
|
m_geometryShaderMtl = static_cast<RendererShaderMtl*>(geometryShader->shader);
|
||||||
|
else if (isPrimitiveRect)
|
||||||
|
m_geometryShaderMtl = rectsEmulationGS_generate(m_mtlr, vertexShader, lcr);
|
||||||
|
else
|
||||||
|
m_geometryShaderMtl = nullptr;
|
||||||
|
m_pixelShaderMtl = static_cast<RendererShaderMtl*>(pixelShader->shader);
|
||||||
|
|
||||||
|
if (m_usesGeometryShader)
|
||||||
|
InitFromStateMesh(fetchShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr);
|
||||||
|
else
|
||||||
|
InitFromStateRender(fetchShader, vertexShader, lastUsedAttachmentsInfo, activeAttachmentsInfo, lcr);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool MetalPipelineCompiler::Compile(bool forceCompile, bool isRenderThread, bool showInOverlay)
|
||||||
|
{
|
||||||
|
if (forceCompile)
|
||||||
|
{
|
||||||
|
// if some shader stages are not compiled yet, compile them now
|
||||||
|
if (m_vertexShaderMtl && !m_vertexShaderMtl->IsCompiled())
|
||||||
|
m_vertexShaderMtl->PreponeCompilation(isRenderThread);
|
||||||
|
if (m_geometryShaderMtl && !m_geometryShaderMtl->IsCompiled())
|
||||||
|
m_geometryShaderMtl->PreponeCompilation(isRenderThread);
|
||||||
|
if (m_pixelShaderMtl && !m_pixelShaderMtl->IsCompiled())
|
||||||
|
m_pixelShaderMtl->PreponeCompilation(isRenderThread);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// fail early if some shader stages are not compiled
|
||||||
|
if (m_vertexShaderMtl && !m_vertexShaderMtl->IsCompiled())
|
||||||
|
return false;
|
||||||
|
if (m_geometryShaderMtl && !m_geometryShaderMtl->IsCompiled())
|
||||||
|
return false;
|
||||||
|
if (m_pixelShaderMtl && !m_pixelShaderMtl->IsCompiled())
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compile
|
||||||
|
MTL::RenderPipelineState* pipeline = nullptr;
|
||||||
|
NS::Error* error = nullptr;
|
||||||
|
|
||||||
|
auto start = std::chrono::high_resolution_clock::now();
|
||||||
|
if (m_usesGeometryShader)
|
||||||
|
{
|
||||||
|
auto desc = static_cast<MTL::MeshRenderPipelineDescriptor*>(m_pipelineDescriptor);
|
||||||
|
|
||||||
|
// Shaders
|
||||||
|
desc->setObjectFunction(m_vertexShaderMtl->GetFunction());
|
||||||
|
desc->setMeshFunction(m_geometryShaderMtl->GetFunction());
|
||||||
|
if (m_rasterizationEnabled)
|
||||||
|
desc->setFragmentFunction(m_pixelShaderMtl->GetFunction());
|
||||||
|
|
||||||
|
#ifdef CEMU_DEBUG_ASSERT
|
||||||
|
desc->setLabel(GetLabel("Mesh render pipeline state", desc));
|
||||||
|
#endif
|
||||||
|
pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, MTL::PipelineOptionNone, nullptr, &error);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
auto desc = static_cast<MTL::RenderPipelineDescriptor*>(m_pipelineDescriptor);
|
||||||
|
|
||||||
|
// Shaders
|
||||||
|
desc->setVertexFunction(m_vertexShaderMtl->GetFunction());
|
||||||
|
if (m_rasterizationEnabled)
|
||||||
|
desc->setFragmentFunction(m_pixelShaderMtl->GetFunction());
|
||||||
|
|
||||||
|
#ifdef CEMU_DEBUG_ASSERT
|
||||||
|
desc->setLabel(GetLabel("Render pipeline state", desc));
|
||||||
|
#endif
|
||||||
|
pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, MTL::PipelineOptionNone, nullptr, &error);
|
||||||
|
}
|
||||||
|
auto end = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
|
auto creationDuration = std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count();
|
||||||
|
|
||||||
|
if (error)
|
||||||
|
{
|
||||||
|
cemuLog_log(LogType::Force, "error creating render pipeline state: {}", error->localizedDescription()->utf8String());
|
||||||
|
error->release();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (showInOverlay)
|
||||||
|
{
|
||||||
|
if (isRenderThread)
|
||||||
|
g_compiling_pipelines_syncTimeSum += creationDuration;
|
||||||
|
else
|
||||||
|
g_compiling_pipelines_async++;
|
||||||
|
g_compiling_pipelines++;
|
||||||
|
}
|
||||||
|
|
||||||
|
m_pipelineObj.m_pipeline = pipeline;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void MetalPipelineCompiler::InitFromStateRender(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr)
|
||||||
|
{
|
||||||
|
// Render pipeline state
|
||||||
|
MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init();
|
||||||
|
|
||||||
|
// Vertex descriptor
|
||||||
|
if (!fetchShader->mtlFetchVertexManually)
|
||||||
|
{
|
||||||
|
MTL::VertexDescriptor* vertexDescriptor = MTL::VertexDescriptor::alloc()->init();
|
||||||
|
for (auto& bufferGroup : fetchShader->bufferGroups)
|
||||||
|
{
|
||||||
|
std::optional<LatteConst::VertexFetchType2> fetchType;
|
||||||
|
|
||||||
|
uint32 minBufferStride = 0;
|
||||||
|
for (sint32 j = 0; j < bufferGroup.attribCount; ++j)
|
||||||
|
{
|
||||||
|
auto& attr = bufferGroup.attrib[j];
|
||||||
|
|
||||||
|
uint32 semanticId = vertexShader->resourceMapping.attributeMapping[attr.semanticId];
|
||||||
|
if (semanticId == (uint32)-1)
|
||||||
|
continue; // attribute not used?
|
||||||
|
|
||||||
|
auto attribute = vertexDescriptor->attributes()->object(semanticId);
|
||||||
|
attribute->setOffset(attr.offset);
|
||||||
|
attribute->setBufferIndex(GET_MTL_VERTEX_BUFFER_INDEX(attr.attributeBufferIndex));
|
||||||
|
attribute->setFormat(GetMtlVertexFormat(attr.format));
|
||||||
|
|
||||||
|
minBufferStride = std::max(minBufferStride, attr.offset + GetMtlVertexFormatSize(attr.format));
|
||||||
|
|
||||||
|
if (fetchType.has_value())
|
||||||
|
cemu_assert_debug(fetchType == attr.fetchType);
|
||||||
|
else
|
||||||
|
fetchType = attr.fetchType;
|
||||||
|
|
||||||
|
if (attr.fetchType == LatteConst::INSTANCE_DATA)
|
||||||
|
{
|
||||||
|
cemu_assert_debug(attr.aluDivisor == 1); // other divisor not yet supported
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32 bufferIndex = bufferGroup.attributeBufferIndex;
|
||||||
|
uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7;
|
||||||
|
uint32 bufferStride = (lcr.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;
|
||||||
|
|
||||||
|
auto layout = vertexDescriptor->layouts()->object(GET_MTL_VERTEX_BUFFER_INDEX(bufferIndex));
|
||||||
|
if (bufferStride == 0)
|
||||||
|
{
|
||||||
|
// Buffer stride cannot be zero, let's use the minimum stride
|
||||||
|
bufferStride = minBufferStride;
|
||||||
|
|
||||||
|
// Additionally, constant vertex function must be used
|
||||||
|
layout->setStepFunction(MTL::VertexStepFunctionConstant);
|
||||||
|
layout->setStepRate(0);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (!fetchType.has_value() || fetchType == LatteConst::VertexFetchType2::VERTEX_DATA)
|
||||||
|
layout->setStepFunction(MTL::VertexStepFunctionPerVertex);
|
||||||
|
else if (fetchType == LatteConst::VertexFetchType2::INSTANCE_DATA)
|
||||||
|
layout->setStepFunction(MTL::VertexStepFunctionPerInstance);
|
||||||
|
else
|
||||||
|
{
|
||||||
|
debug_printf("unimplemented vertex fetch type %u\n", (uint32)fetchType.value());
|
||||||
|
cemu_assert(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
bufferStride = Align(bufferStride, 4);
|
||||||
|
layout->setStride(bufferStride);
|
||||||
|
}
|
||||||
|
|
||||||
|
desc->setVertexDescriptor(vertexDescriptor);
|
||||||
|
vertexDescriptor->release();
|
||||||
|
}
|
||||||
|
|
||||||
|
SetFragmentState(desc, lastUsedAttachmentsInfo, activeAttachmentsInfo, m_rasterizationEnabled, lcr);
|
||||||
|
|
||||||
|
m_pipelineDescriptor = desc;
|
||||||
|
}
|
||||||
|
|
||||||
|
void MetalPipelineCompiler::InitFromStateMesh(const LatteFetchShader* fetchShader, const MetalAttachmentsInfo& lastUsedAttachmentsInfo, const MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr)
|
||||||
|
{
|
||||||
|
// Render pipeline state
|
||||||
|
MTL::MeshRenderPipelineDescriptor* desc = MTL::MeshRenderPipelineDescriptor::alloc()->init();
|
||||||
|
|
||||||
|
SetFragmentState(desc, lastUsedAttachmentsInfo, activeAttachmentsInfo, m_rasterizationEnabled, lcr);
|
||||||
|
|
||||||
|
m_pipelineDescriptor = desc;
|
||||||
|
}
|
38
src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h
Normal file
38
src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCompiler.h
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "Cafe/HW/Latte/Renderer/Metal/MetalAttachmentsInfo.h"
|
||||||
|
|
||||||
|
#include "Cafe/HW/Latte/ISA/LatteReg.h"
|
||||||
|
#include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h"
|
||||||
|
|
||||||
|
struct PipelineObject
|
||||||
|
{
|
||||||
|
MTL::RenderPipelineState* m_pipeline = nullptr;
|
||||||
|
};
|
||||||
|
|
||||||
|
class MetalPipelineCompiler
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
MetalPipelineCompiler(class MetalRenderer* metalRenderer, PipelineObject& pipelineObj) : m_mtlr{metalRenderer}, m_pipelineObj{pipelineObj} {}
|
||||||
|
~MetalPipelineCompiler();
|
||||||
|
|
||||||
|
void InitFromState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr);
|
||||||
|
|
||||||
|
bool Compile(bool forceCompile, bool isRenderThread, bool showInOverlay);
|
||||||
|
|
||||||
|
private:
|
||||||
|
class MetalRenderer* m_mtlr;
|
||||||
|
PipelineObject& m_pipelineObj;
|
||||||
|
|
||||||
|
class RendererShaderMtl* m_vertexShaderMtl;
|
||||||
|
class RendererShaderMtl* m_geometryShaderMtl;
|
||||||
|
class RendererShaderMtl* m_pixelShaderMtl;
|
||||||
|
bool m_usesGeometryShader;
|
||||||
|
bool m_rasterizationEnabled;
|
||||||
|
|
||||||
|
NS::Object* m_pipelineDescriptor;
|
||||||
|
|
||||||
|
void InitFromStateRender(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr);
|
||||||
|
|
||||||
|
void InitFromStateMesh(const LatteFetchShader* fetchShader, const class MetalAttachmentsInfo& lastUsedAttachmentsInfo, const class MetalAttachmentsInfo& activeAttachmentsInfo, const LatteContextRegister& lcr);
|
||||||
|
};
|
@ -23,6 +23,7 @@
|
|||||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
|
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
|
||||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalLayerHandle.h"
|
#include "Cafe/HW/Latte/Renderer/Metal/MetalLayerHandle.h"
|
||||||
#include "Cafe/HW/Latte/Renderer/Renderer.h"
|
#include "Cafe/HW/Latte/Renderer/Renderer.h"
|
||||||
|
#include "HW/Latte/Renderer/Metal/MetalPipelineCompiler.h"
|
||||||
#include "config/CemuConfig.h"
|
#include "config/CemuConfig.h"
|
||||||
|
|
||||||
#define IMGUI_IMPL_METAL_CPP
|
#define IMGUI_IMPL_METAL_CPP
|
||||||
@ -69,6 +70,7 @@ MetalRenderer::MetalRenderer()
|
|||||||
MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::alloc()->init();
|
MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::alloc()->init();
|
||||||
textureDescriptor->setTextureType(MTL::TextureType1D);
|
textureDescriptor->setTextureType(MTL::TextureType1D);
|
||||||
textureDescriptor->setWidth(1);
|
textureDescriptor->setWidth(1);
|
||||||
|
textureDescriptor->setUsage(MTL::TextureUsageShaderRead);
|
||||||
m_nullTexture1D = m_device->newTexture(textureDescriptor);
|
m_nullTexture1D = m_device->newTexture(textureDescriptor);
|
||||||
#ifdef CEMU_DEBUG_ASSERT
|
#ifdef CEMU_DEBUG_ASSERT
|
||||||
m_nullTexture1D->setLabel(GetLabel("Null texture 1D", m_nullTexture1D));
|
m_nullTexture1D->setLabel(GetLabel("Null texture 1D", m_nullTexture1D));
|
||||||
@ -76,6 +78,7 @@ MetalRenderer::MetalRenderer()
|
|||||||
|
|
||||||
textureDescriptor->setTextureType(MTL::TextureType2D);
|
textureDescriptor->setTextureType(MTL::TextureType2D);
|
||||||
textureDescriptor->setHeight(1);
|
textureDescriptor->setHeight(1);
|
||||||
|
textureDescriptor->setUsage(MTL::TextureUsageShaderRead | MTL::TextureUsageRenderTarget);
|
||||||
m_nullTexture2D = m_device->newTexture(textureDescriptor);
|
m_nullTexture2D = m_device->newTexture(textureDescriptor);
|
||||||
#ifdef CEMU_DEBUG_ASSERT
|
#ifdef CEMU_DEBUG_ASSERT
|
||||||
m_nullTexture2D->setLabel(GetLabel("Null texture 2D", m_nullTexture2D));
|
m_nullTexture2D->setLabel(GetLabel("Null texture 2D", m_nullTexture2D));
|
||||||
@ -511,13 +514,13 @@ LatteCachedFBO* MetalRenderer::rendertarget_createCachedFBO(uint64 key)
|
|||||||
|
|
||||||
void MetalRenderer::rendertarget_deleteCachedFBO(LatteCachedFBO* cfbo)
|
void MetalRenderer::rendertarget_deleteCachedFBO(LatteCachedFBO* cfbo)
|
||||||
{
|
{
|
||||||
if (cfbo == (LatteCachedFBO*)m_state.m_activeFBO)
|
if (cfbo == (LatteCachedFBO*)m_state.m_activeFBO.m_fbo)
|
||||||
m_state.m_activeFBO = nullptr;
|
m_state.m_activeFBO = {nullptr};
|
||||||
}
|
}
|
||||||
|
|
||||||
void MetalRenderer::rendertarget_bindFramebufferObject(LatteCachedFBO* cfbo)
|
void MetalRenderer::rendertarget_bindFramebufferObject(LatteCachedFBO* cfbo)
|
||||||
{
|
{
|
||||||
m_state.m_activeFBO = (CachedFBOMtl*)cfbo;
|
m_state.m_activeFBO = {(CachedFBOMtl*)cfbo, MetalAttachmentsInfo((CachedFBOMtl*)cfbo)};
|
||||||
}
|
}
|
||||||
|
|
||||||
void* MetalRenderer::texture_acquireTextureUploadBuffer(uint32 size)
|
void* MetalRenderer::texture_acquireTextureUploadBuffer(uint32 size)
|
||||||
@ -943,15 +946,9 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
|
|||||||
|
|
||||||
// Shaders
|
// Shaders
|
||||||
LatteDecompilerShader* vertexShader = LatteSHRC_GetActiveVertexShader();
|
LatteDecompilerShader* vertexShader = LatteSHRC_GetActiveVertexShader();
|
||||||
if (vertexShader && !vertexShader->shader->IsCompiled())
|
|
||||||
return;
|
|
||||||
LatteDecompilerShader* geometryShader = LatteSHRC_GetActiveGeometryShader();
|
LatteDecompilerShader* geometryShader = LatteSHRC_GetActiveGeometryShader();
|
||||||
if (geometryShader && !geometryShader->shader->IsCompiled())
|
|
||||||
return;
|
|
||||||
LatteDecompilerShader* pixelShader = LatteSHRC_GetActivePixelShader();
|
LatteDecompilerShader* pixelShader = LatteSHRC_GetActivePixelShader();
|
||||||
const auto fetchShader = LatteSHRC_GetActiveFetchShader();
|
const auto fetchShader = LatteSHRC_GetActiveFetchShader();
|
||||||
if (vertexShader && !pixelShader->shader->IsCompiled())
|
|
||||||
return;
|
|
||||||
|
|
||||||
bool neverSkipAccurateBarrier = false;
|
bool neverSkipAccurateBarrier = false;
|
||||||
|
|
||||||
@ -1003,12 +1000,23 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
|
|||||||
// Render pass
|
// Render pass
|
||||||
auto renderCommandEncoder = GetRenderCommandEncoder();
|
auto renderCommandEncoder = GetRenderCommandEncoder();
|
||||||
|
|
||||||
|
// Render pipeline state
|
||||||
|
PipelineObject* pipelineObj = m_pipelineCache->GetRenderPipelineState(fetchShader, vertexShader, geometryShader, pixelShader, m_state.m_lastUsedFBO.m_attachmentsInfo, m_state.m_activeFBO.m_attachmentsInfo, m_state.m_activeFBO.m_fbo->m_size, count, LatteGPUState.contextNew);
|
||||||
|
if (!pipelineObj->m_pipeline)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (pipelineObj->m_pipeline != encoderState.m_renderPipelineState)
|
||||||
|
{
|
||||||
|
renderCommandEncoder->setRenderPipelineState(pipelineObj->m_pipeline);
|
||||||
|
encoderState.m_renderPipelineState = pipelineObj->m_pipeline;
|
||||||
|
}
|
||||||
|
|
||||||
// Depth stencil state
|
// Depth stencil state
|
||||||
|
|
||||||
// Disable depth write when there is no depth attachment
|
// Disable depth write when there is no depth attachment
|
||||||
auto& depthControl = LatteGPUState.contextNew.DB_DEPTH_CONTROL;
|
auto& depthControl = LatteGPUState.contextNew.DB_DEPTH_CONTROL;
|
||||||
bool depthWriteEnable = depthControl.get_Z_WRITE_ENABLE();
|
bool depthWriteEnable = depthControl.get_Z_WRITE_ENABLE();
|
||||||
if (!m_state.m_activeFBO->depthBuffer.texture)
|
if (!m_state.m_activeFBO.m_fbo->depthBuffer.texture)
|
||||||
depthControl.set_Z_WRITE_ENABLE(false);
|
depthControl.set_Z_WRITE_ENABLE(false);
|
||||||
|
|
||||||
MTL::DepthStencilState* depthStencilState = m_depthStencilCache->GetDepthStencilState(LatteGPUState.contextNew);
|
MTL::DepthStencilState* depthStencilState = m_depthStencilCache->GetDepthStencilState(LatteGPUState.contextNew);
|
||||||
@ -1221,22 +1229,6 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
|
|||||||
// renderCommandEncoder->memoryBarrier(barrierBuffers.data(), barrierBuffers.size(), MTL::RenderStageVertex, MTL::RenderStageVertex);
|
// renderCommandEncoder->memoryBarrier(barrierBuffers.data(), barrierBuffers.size(), MTL::RenderStageVertex, MTL::RenderStageVertex);
|
||||||
//}
|
//}
|
||||||
|
|
||||||
// Render pipeline state
|
|
||||||
MTL::RenderPipelineState* renderPipelineState;
|
|
||||||
if (usesGeometryShader)
|
|
||||||
renderPipelineState = m_pipelineCache->GetMeshPipelineState(fetchShader, vertexShader, geometryShader, pixelShader, m_state.m_lastUsedFBO, m_state.m_activeFBO, LatteGPUState.contextNew, hostIndexType);
|
|
||||||
else
|
|
||||||
renderPipelineState = m_pipelineCache->GetRenderPipelineState(fetchShader, vertexShader, pixelShader, m_state.m_lastUsedFBO, m_state.m_activeFBO, LatteGPUState.contextNew);
|
|
||||||
|
|
||||||
if (!renderPipelineState)
|
|
||||||
return;
|
|
||||||
|
|
||||||
if (renderPipelineState != encoderState.m_renderPipelineState)
|
|
||||||
{
|
|
||||||
renderCommandEncoder->setRenderPipelineState(renderPipelineState);
|
|
||||||
encoderState.m_renderPipelineState = renderPipelineState;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Prepare streamout
|
// Prepare streamout
|
||||||
m_state.m_streamoutState.verticesPerInstance = count;
|
m_state.m_streamoutState.verticesPerInstance = count;
|
||||||
LatteStreamout_PrepareDrawcall(count, instanceCount);
|
LatteStreamout_PrepareDrawcall(count, instanceCount);
|
||||||
@ -1529,12 +1521,12 @@ MTL::RenderCommandEncoder* MetalRenderer::GetRenderCommandEncoder(bool forceRecr
|
|||||||
{
|
{
|
||||||
if (m_encoderType == MetalEncoderType::Render)
|
if (m_encoderType == MetalEncoderType::Render)
|
||||||
{
|
{
|
||||||
bool needsNewRenderPass = (m_state.m_lastUsedFBO == nullptr);
|
bool needsNewRenderPass = (m_state.m_lastUsedFBO.m_fbo == nullptr);
|
||||||
if (!needsNewRenderPass)
|
if (!needsNewRenderPass)
|
||||||
{
|
{
|
||||||
for (uint8 i = 0; i < 8; i++)
|
for (uint8 i = 0; i < 8; i++)
|
||||||
{
|
{
|
||||||
if (m_state.m_activeFBO->colorBuffer[i].texture && m_state.m_activeFBO->colorBuffer[i].texture != m_state.m_lastUsedFBO->colorBuffer[i].texture)
|
if (m_state.m_activeFBO.m_fbo->colorBuffer[i].texture && m_state.m_activeFBO.m_fbo->colorBuffer[i].texture != m_state.m_lastUsedFBO.m_fbo->colorBuffer[i].texture)
|
||||||
{
|
{
|
||||||
needsNewRenderPass = true;
|
needsNewRenderPass = true;
|
||||||
break;
|
break;
|
||||||
@ -1544,7 +1536,7 @@ MTL::RenderCommandEncoder* MetalRenderer::GetRenderCommandEncoder(bool forceRecr
|
|||||||
|
|
||||||
if (!needsNewRenderPass)
|
if (!needsNewRenderPass)
|
||||||
{
|
{
|
||||||
if (m_state.m_activeFBO->depthBuffer.texture && (m_state.m_activeFBO->depthBuffer.texture != m_state.m_lastUsedFBO->depthBuffer.texture || ( m_state.m_activeFBO->depthBuffer.hasStencil && !m_state.m_lastUsedFBO->depthBuffer.hasStencil)))
|
if (m_state.m_activeFBO.m_fbo->depthBuffer.texture && (m_state.m_activeFBO.m_fbo->depthBuffer.texture != m_state.m_lastUsedFBO.m_fbo->depthBuffer.texture || ( m_state.m_activeFBO.m_fbo->depthBuffer.hasStencil && !m_state.m_lastUsedFBO.m_fbo->depthBuffer.hasStencil)))
|
||||||
{
|
{
|
||||||
needsNewRenderPass = true;
|
needsNewRenderPass = true;
|
||||||
}
|
}
|
||||||
@ -1562,7 +1554,7 @@ MTL::RenderCommandEncoder* MetalRenderer::GetRenderCommandEncoder(bool forceRecr
|
|||||||
|
|
||||||
auto commandBuffer = GetCommandBuffer();
|
auto commandBuffer = GetCommandBuffer();
|
||||||
|
|
||||||
auto renderCommandEncoder = commandBuffer->renderCommandEncoder(m_state.m_activeFBO->GetRenderPassDescriptor());
|
auto renderCommandEncoder = commandBuffer->renderCommandEncoder(m_state.m_activeFBO.m_fbo->GetRenderPassDescriptor());
|
||||||
#ifdef CEMU_DEBUG_ASSERT
|
#ifdef CEMU_DEBUG_ASSERT
|
||||||
renderCommandEncoder->setLabel(GetLabel("Render command encoder", renderCommandEncoder));
|
renderCommandEncoder->setLabel(GetLabel("Render command encoder", renderCommandEncoder));
|
||||||
#endif
|
#endif
|
||||||
@ -1721,7 +1713,7 @@ bool MetalRenderer::CheckIfRenderPassNeedsFlush(LatteDecompilerShader* shader)
|
|||||||
// If the texture is also used in the current render pass, we need to end the render pass to "flush" the texture
|
// If the texture is also used in the current render pass, we need to end the render pass to "flush" the texture
|
||||||
for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++)
|
for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++)
|
||||||
{
|
{
|
||||||
auto colorTarget = m_state.m_activeFBO->colorBuffer[i].texture;
|
auto colorTarget = m_state.m_activeFBO.m_fbo->colorBuffer[i].texture;
|
||||||
if (colorTarget && colorTarget->baseTexture == baseTexture)
|
if (colorTarget && colorTarget->baseTexture == baseTexture)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -5,6 +5,7 @@
|
|||||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalLayerHandle.h"
|
#include "Cafe/HW/Latte/Renderer/Metal/MetalLayerHandle.h"
|
||||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalPerformanceMonitor.h"
|
#include "Cafe/HW/Latte/Renderer/Metal/MetalPerformanceMonitor.h"
|
||||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalOutputShaderCache.h"
|
#include "Cafe/HW/Latte/Renderer/Metal/MetalOutputShaderCache.h"
|
||||||
|
#include "Cafe/HW/Latte/Renderer/Metal/MetalAttachmentsInfo.h"
|
||||||
|
|
||||||
struct MetalBufferAllocation
|
struct MetalBufferAllocation
|
||||||
{
|
{
|
||||||
@ -121,6 +122,12 @@ struct MetalStreamoutState
|
|||||||
sint32 verticesPerInstance;
|
sint32 verticesPerInstance;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct MetalActiveFBOState
|
||||||
|
{
|
||||||
|
class CachedFBOMtl* m_fbo = nullptr;
|
||||||
|
MetalAttachmentsInfo m_attachmentsInfo;
|
||||||
|
};
|
||||||
|
|
||||||
struct MetalState
|
struct MetalState
|
||||||
{
|
{
|
||||||
MetalEncoderState m_encoderState{};
|
MetalEncoderState m_encoderState{};
|
||||||
@ -130,9 +137,9 @@ struct MetalState
|
|||||||
bool m_skipDrawSequence = false;
|
bool m_skipDrawSequence = false;
|
||||||
bool m_isFirstDrawInRenderPass = true;
|
bool m_isFirstDrawInRenderPass = true;
|
||||||
|
|
||||||
class CachedFBOMtl* m_activeFBO = nullptr;
|
MetalActiveFBOState m_activeFBO;
|
||||||
// If the FBO changes, but it's the same FBO as the last one with some omitted attachments, this FBO doesn't change'
|
// If the FBO changes, but it's the same FBO as the last one with some omitted attachments, this FBO doesn't change
|
||||||
class CachedFBOMtl* m_lastUsedFBO = nullptr;
|
MetalActiveFBOState m_lastUsedFBO;
|
||||||
|
|
||||||
MetalBoundBuffer m_vertexBuffers[MAX_MTL_BUFFERS] = {{}};
|
MetalBoundBuffer m_vertexBuffers[MAX_MTL_BUFFERS] = {{}};
|
||||||
// TODO: find out what is the max number of bound textures on the Wii U
|
// TODO: find out what is the max number of bound textures on the Wii U
|
||||||
|
Loading…
Reference in New Issue
Block a user