implement sampler cache

This commit is contained in:
Samuliak 2024-08-15 11:44:06 +02:00
parent 9982ac7acb
commit d79d7fea63
7 changed files with 181 additions and 122 deletions

View File

@ -560,6 +560,8 @@ if(ENABLE_METAL)
HW/Latte/Renderer/Metal/MetalPipelineCache.h
HW/Latte/Renderer/Metal/MetalDepthStencilCache.cpp
HW/Latte/Renderer/Metal/MetalDepthStencilCache.h
HW/Latte/Renderer/Metal/MetalSamplerCache.cpp
HW/Latte/Renderer/Metal/MetalSamplerCache.h
HW/Latte/Renderer/Metal/MetalHybridComputePipeline.cpp
HW/Latte/Renderer/Metal/MetalHybridComputePipeline.h
HW/Latte/Renderer/Metal/UtilityShaderSource.h

View File

@ -18,9 +18,7 @@ MTL::DepthStencilState* MetalDepthStencilCache::GetDepthStencilState(const Latte
uint64 stateHash = CalculateDepthStencilHash(lcr);
auto& depthStencilState = m_depthStencilCache[stateHash];
if (depthStencilState)
{
return depthStencilState;
}
// Depth stencil state
bool depthEnable = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_Z_ENABLE();

View File

@ -58,9 +58,7 @@ MTL::RenderPipelineState* MetalPipelineCache::GetPipelineState(const LatteFetchS
uint64 stateHash = CalculatePipelineHash(fetchShader, vertexShader, pixelShader, activeFBO, lcr);
auto& pipeline = m_pipelineCache[stateHash];
if (pipeline)
{
return pipeline;
}
// Vertex descriptor
MTL::VertexDescriptor* vertexDescriptor = MTL::VertexDescriptor::alloc()->init();

View File

@ -6,6 +6,7 @@
#include "Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalDepthStencilCache.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalSamplerCache.h"
#include "Cafe/HW/Latte/Renderer/Metal/LatteTextureReadbackMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalHybridComputePipeline.h"
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
@ -16,6 +17,7 @@
#include "Cafe/HW/Latte/Core/LatteIndices.h"
#include "Cemu/Logging/CemuDebugLogging.h"
#include "Common/precompiled.h"
#include "HW/Latte/Core/Latte.h"
#include "HW/Latte/Renderer/Metal/MetalCommon.h"
#include "Metal/MTLRenderCommandEncoder.hpp"
#include "Metal/MTLRenderPass.hpp"
@ -43,6 +45,7 @@ MetalRenderer::MetalRenderer()
m_memoryManager = new MetalMemoryManager(this);
m_pipelineCache = new MetalPipelineCache(this);
m_depthStencilCache = new MetalDepthStencilCache(this);
m_samplerCache = new MetalSamplerCache(this);
// Texture readback
m_readbackBuffer = m_device->newBuffer(TEXTURE_READBACK_SIZE, MTL::StorageModeShared);
@ -121,8 +124,9 @@ MetalRenderer::~MetalRenderer()
m_presentPipelineLinear->release();
m_presentPipelineSRGB->release();
delete m_depthStencilCache;
delete m_pipelineCache;
delete m_depthStencilCache;
delete m_samplerCache;
delete m_memoryManager;
m_nearestSampler->release();
@ -1179,123 +1183,29 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE
if (stageSamplerIndex != LATTE_DECOMPILER_SAMPLER_NONE)
{
uint32 samplerIndex = stageSamplerIndex + LatteDecompiler_getTextureSamplerBaseIndex(shader->shaderType);
const _LatteRegisterSetSampler* samplerWords = LatteGPUState.contextNew.SQ_TEX_SAMPLER + samplerIndex;
auto sampler = m_samplerCache->GetSamplerState(LatteGPUState.contextNew, samplerIndex);
// TODO: cache this instead
MTL::SamplerDescriptor* samplerDescriptor = MTL::SamplerDescriptor::alloc()->init();
auto& boundSampler = m_state.m_encoderState.m_samplers[mtlShaderType][binding];
if (sampler != boundSampler)
{
boundSampler = sampler;
// lod
uint32 iMinLOD = samplerWords->WORD1.get_MIN_LOD();
uint32 iMaxLOD = samplerWords->WORD1.get_MAX_LOD();
sint32 iLodBias = samplerWords->WORD1.get_LOD_BIAS();
// apply relative lod bias from graphic pack
if (baseTexture->overwriteInfo.hasRelativeLodBias)
iLodBias += baseTexture->overwriteInfo.relativeLodBias;
// apply absolute lod bias from graphic pack
if (baseTexture->overwriteInfo.hasLodBias)
iLodBias = baseTexture->overwriteInfo.lodBias;
auto filterMip = samplerWords->WORD0.get_MIP_FILTER();
if (filterMip == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_Z_FILTER::NONE)
{
samplerDescriptor->setMipFilter(MTL::SamplerMipFilterNearest);
samplerDescriptor->setLodMinClamp(0.0f);
samplerDescriptor->setLodMaxClamp(0.25f);
}
else if (filterMip == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_Z_FILTER::POINT)
{
samplerDescriptor->setMipFilter(MTL::SamplerMipFilterNearest);
samplerDescriptor->setLodMinClamp((float)iMinLOD / 64.0f);
samplerDescriptor->setLodMaxClamp((float)iMaxLOD / 64.0f);
}
else if (filterMip == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_Z_FILTER::LINEAR)
{
samplerDescriptor->setMipFilter(MTL::SamplerMipFilterLinear);
samplerDescriptor->setLodMinClamp((float)iMinLOD / 64.0f);
samplerDescriptor->setLodMaxClamp((float)iMaxLOD / 64.0f);
}
else
{
// fallback for invalid constants
samplerDescriptor->setMipFilter(MTL::SamplerMipFilterLinear);
samplerDescriptor->setLodMinClamp((float)iMinLOD / 64.0f);
samplerDescriptor->setLodMaxClamp((float)iMaxLOD / 64.0f);
}
auto filterMin = samplerWords->WORD0.get_XY_MIN_FILTER();
cemu_assert_debug(filterMin != Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_XY_FILTER::BICUBIC); // todo
samplerDescriptor->setMinFilter((filterMin == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_XY_FILTER::POINT || filterMin == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_XY_FILTER::ANISO_POINT) ? MTL::SamplerMinMagFilterNearest : MTL::SamplerMinMagFilterLinear);
auto filterMag = samplerWords->WORD0.get_XY_MAG_FILTER();
samplerDescriptor->setMagFilter((filterMag == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_XY_FILTER::POINT || filterMin == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_XY_FILTER::ANISO_POINT) ? MTL::SamplerMinMagFilterNearest : MTL::SamplerMinMagFilterLinear);
auto filterZ = samplerWords->WORD0.get_Z_FILTER();
// todo: z-filter for texture array samplers is customizable for GPU7 but OpenGL/Vulkan doesn't expose this functionality?
auto clampX = samplerWords->WORD0.get_CLAMP_X();
auto clampY = samplerWords->WORD0.get_CLAMP_Y();
auto clampZ = samplerWords->WORD0.get_CLAMP_Z();
samplerDescriptor->setSAddressMode(GetMtlSamplerAddressMode(clampX));
samplerDescriptor->setTAddressMode(GetMtlSamplerAddressMode(clampY));
samplerDescriptor->setRAddressMode(GetMtlSamplerAddressMode(clampZ));
auto maxAniso = samplerWords->WORD0.get_MAX_ANISO_RATIO();
if (baseTexture->overwriteInfo.anisotropicLevel >= 0)
maxAniso = baseTexture->overwriteInfo.anisotropicLevel;
if (maxAniso > 0)
{
samplerDescriptor->setMaxAnisotropy(1 << maxAniso);
}
// TODO: set lod bias
//samplerInfo.mipLodBias = (float)iLodBias / 64.0f;
// depth compare
uint8 depthCompareMode = shader->textureUsesDepthCompare[relative_textureUnit] ? 1 : 0;
if (depthCompareMode == 1)
{
// TODO: is it okay to just cast?
samplerDescriptor->setCompareFunction(GetMtlCompareFunc((Latte::E_COMPAREFUNC)samplerWords->WORD0.get_DEPTH_COMPARE_FUNCTION()));
}
// border
auto borderType = samplerWords->WORD0.get_BORDER_COLOR_TYPE();
if (borderType == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_BORDER_COLOR_TYPE::TRANSPARENT_BLACK)
samplerDescriptor->setBorderColor(MTL::SamplerBorderColorTransparentBlack);
else if (borderType == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_BORDER_COLOR_TYPE::OPAQUE_BLACK)
samplerDescriptor->setBorderColor(MTL::SamplerBorderColorOpaqueBlack);
else if (borderType == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_BORDER_COLOR_TYPE::OPAQUE_WHITE)
samplerDescriptor->setBorderColor(MTL::SamplerBorderColorOpaqueWhite);
else
{
// Metal doesn't support custom border color
samplerDescriptor->setBorderColor(MTL::SamplerBorderColorOpaqueBlack);
}
MTL::SamplerState* sampler = m_device->newSamplerState(samplerDescriptor);
samplerDescriptor->release();
switch (shader->shaderType)
{
case LatteConst::ShaderType::Vertex:
{
renderCommandEncoder->setVertexSamplerState(sampler, binding);
break;
}
case LatteConst::ShaderType::Pixel:
{
renderCommandEncoder->setFragmentSamplerState(sampler, binding);
break;
}
default:
UNREACHABLE;
}
sampler->release();
switch (shader->shaderType)
{
case LatteConst::ShaderType::Vertex:
{
renderCommandEncoder->setVertexSamplerState(sampler, binding);
break;
}
case LatteConst::ShaderType::Pixel:
{
renderCommandEncoder->setFragmentSamplerState(sampler, binding);
break;
}
default:
UNREACHABLE;
}
}
}
// get texture register word 0

View File

@ -7,8 +7,6 @@
#include "Cafe/HW/Latte/Renderer/Renderer.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h"
#include "Metal/MTLDepthStencil.hpp"
#include "Metal/MTLRenderCommandEncoder.hpp"
struct MetalBoundBuffer
{
@ -55,6 +53,7 @@ struct MetalEncoderState
class LatteTextureViewMtl* m_textureView = nullptr;
uint32 m_word4 = INVALID_UINT32;
} m_textures[METAL_SHADER_TYPE_TOTAL][MAX_MTL_TEXTURES];
MTL::SamplerState* m_samplers[METAL_SHADER_TYPE_TOTAL][MAX_MTL_SAMPLERS];
size_t m_uniformBufferOffsets[METAL_SHADER_TYPE_TOTAL][MAX_MTL_BUFFERS];
};
@ -294,6 +293,8 @@ public:
{
for (uint32 j = 0; j < MAX_MTL_TEXTURES; j++)
m_state.m_encoderState.m_textures[i][j] = {nullptr};
for (uint32 j = 0; j < MAX_MTL_SAMPLERS; j++)
m_state.m_encoderState.m_samplers[i][j] = nullptr;
for (uint32 j = 0; j < MAX_MTL_BUFFERS; j++)
m_state.m_encoderState.m_uniformBufferOffsets[i][j] = INVALID_OFFSET;
}
@ -333,6 +334,7 @@ private:
class MetalMemoryManager* m_memoryManager;
class MetalPipelineCache* m_pipelineCache;
class MetalDepthStencilCache* m_depthStencilCache;
class MetalSamplerCache* m_samplerCache;
// Metal objects
MTL::Device* m_device;

View File

@ -0,0 +1,128 @@
#include "Cafe/HW/Latte/Renderer/Metal/MetalSamplerCache.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
#include "HW/Latte/Renderer/Metal/LatteToMtl.h"
MetalSamplerCache::~MetalSamplerCache()
{
for (auto& pair : m_samplerCache)
{
pair.second->release();
}
m_samplerCache.clear();
}
MTL::SamplerState* MetalSamplerCache::GetSamplerState(const LatteContextRegister& lcr, uint32 samplerIndex)
{
uint64 stateHash = CalculateSamplerHash(lcr, samplerIndex);
auto& samplerState = m_samplerCache[stateHash];
if (samplerState)
return samplerState;
// Sampler state
const _LatteRegisterSetSampler* samplerWords = lcr.SQ_TEX_SAMPLER + samplerIndex;
MTL::SamplerDescriptor* samplerDescriptor = MTL::SamplerDescriptor::alloc()->init();
// lod
uint32 iMinLOD = samplerWords->WORD1.get_MIN_LOD();
uint32 iMaxLOD = samplerWords->WORD1.get_MAX_LOD();
sint32 iLodBias = samplerWords->WORD1.get_LOD_BIAS();
// TODO: uncomment
// apply relative lod bias from graphic pack
//if (baseTexture->overwriteInfo.hasRelativeLodBias)
// iLodBias += baseTexture->overwriteInfo.relativeLodBias;
// apply absolute lod bias from graphic pack
//if (baseTexture->overwriteInfo.hasLodBias)
// iLodBias = baseTexture->overwriteInfo.lodBias;
auto filterMip = samplerWords->WORD0.get_MIP_FILTER();
if (filterMip == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_Z_FILTER::NONE)
{
samplerDescriptor->setMipFilter(MTL::SamplerMipFilterNearest);
samplerDescriptor->setLodMinClamp(0.0f);
samplerDescriptor->setLodMaxClamp(0.25f);
}
else if (filterMip == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_Z_FILTER::POINT)
{
samplerDescriptor->setMipFilter(MTL::SamplerMipFilterNearest);
samplerDescriptor->setLodMinClamp((float)iMinLOD / 64.0f);
samplerDescriptor->setLodMaxClamp((float)iMaxLOD / 64.0f);
}
else if (filterMip == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_Z_FILTER::LINEAR)
{
samplerDescriptor->setMipFilter(MTL::SamplerMipFilterLinear);
samplerDescriptor->setLodMinClamp((float)iMinLOD / 64.0f);
samplerDescriptor->setLodMaxClamp((float)iMaxLOD / 64.0f);
}
else
{
// fallback for invalid constants
samplerDescriptor->setMipFilter(MTL::SamplerMipFilterLinear);
samplerDescriptor->setLodMinClamp((float)iMinLOD / 64.0f);
samplerDescriptor->setLodMaxClamp((float)iMaxLOD / 64.0f);
}
auto filterMin = samplerWords->WORD0.get_XY_MIN_FILTER();
cemu_assert_debug(filterMin != Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_XY_FILTER::BICUBIC); // todo
samplerDescriptor->setMinFilter((filterMin == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_XY_FILTER::POINT || filterMin == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_XY_FILTER::ANISO_POINT) ? MTL::SamplerMinMagFilterNearest : MTL::SamplerMinMagFilterLinear);
auto filterMag = samplerWords->WORD0.get_XY_MAG_FILTER();
samplerDescriptor->setMagFilter((filterMag == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_XY_FILTER::POINT || filterMin == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_XY_FILTER::ANISO_POINT) ? MTL::SamplerMinMagFilterNearest : MTL::SamplerMinMagFilterLinear);
auto filterZ = samplerWords->WORD0.get_Z_FILTER();
// todo: z-filter for texture array samplers is customizable for GPU7 but OpenGL/Vulkan doesn't expose this functionality?
auto clampX = samplerWords->WORD0.get_CLAMP_X();
auto clampY = samplerWords->WORD0.get_CLAMP_Y();
auto clampZ = samplerWords->WORD0.get_CLAMP_Z();
samplerDescriptor->setSAddressMode(GetMtlSamplerAddressMode(clampX));
samplerDescriptor->setTAddressMode(GetMtlSamplerAddressMode(clampY));
samplerDescriptor->setRAddressMode(GetMtlSamplerAddressMode(clampZ));
auto maxAniso = samplerWords->WORD0.get_MAX_ANISO_RATIO();
// TODO: uncomment
//if (baseTexture->overwriteInfo.anisotropicLevel >= 0)
// maxAniso = baseTexture->overwriteInfo.anisotropicLevel;
if (maxAniso > 0)
samplerDescriptor->setMaxAnisotropy(1 << maxAniso);
// TODO: set lod bias
//samplerInfo.mipLodBias = (float)iLodBias / 64.0f;
// depth compare
//uint8 depthCompareMode = shader->textureUsesDepthCompare[relative_textureUnit] ? 1 : 0;
// TODO: is it okay to just cast?
samplerDescriptor->setCompareFunction(GetMtlCompareFunc((Latte::E_COMPAREFUNC)samplerWords->WORD0.get_DEPTH_COMPARE_FUNCTION()));
// border
auto borderType = samplerWords->WORD0.get_BORDER_COLOR_TYPE();
if (borderType == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_BORDER_COLOR_TYPE::TRANSPARENT_BLACK)
samplerDescriptor->setBorderColor(MTL::SamplerBorderColorTransparentBlack);
else if (borderType == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_BORDER_COLOR_TYPE::OPAQUE_BLACK)
samplerDescriptor->setBorderColor(MTL::SamplerBorderColorOpaqueBlack);
else if (borderType == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_BORDER_COLOR_TYPE::OPAQUE_WHITE)
samplerDescriptor->setBorderColor(MTL::SamplerBorderColorOpaqueWhite);
else
{
// Metal doesn't support custom border color
samplerDescriptor->setBorderColor(MTL::SamplerBorderColorOpaqueBlack);
}
samplerState = m_mtlr->GetDevice()->newSamplerState(samplerDescriptor);
samplerDescriptor->release();
return samplerState;
}
uint64 MetalSamplerCache::CalculateSamplerHash(const LatteContextRegister& lcr, uint32 samplerIndex)
{
const _LatteRegisterSetSampler* samplerWords = lcr.SQ_TEX_SAMPLER + samplerIndex;
// TODO: check this
return *((uint64*)samplerWords);
}

View File

@ -0,0 +1,21 @@
#pragma once
#include <Metal/Metal.hpp>
#include "HW/Latte/ISA/LatteReg.h"
class MetalSamplerCache
{
public:
MetalSamplerCache(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer} {}
~MetalSamplerCache();
MTL::SamplerState* GetSamplerState(const LatteContextRegister& lcr, uint32 samplerIndex);
private:
class MetalRenderer* m_mtlr;
std::map<uint64, MTL::SamplerState*> m_samplerCache;
uint64 CalculateSamplerHash(const LatteContextRegister& lcr, uint32 samplerIndex);
};