diff --git a/src/Cafe/CMakeLists.txt b/src/Cafe/CMakeLists.txt index d0e7d921..fb4672d2 100644 --- a/src/Cafe/CMakeLists.txt +++ b/src/Cafe/CMakeLists.txt @@ -560,6 +560,8 @@ if(ENABLE_METAL) HW/Latte/Renderer/Metal/MetalPipelineCache.h HW/Latte/Renderer/Metal/MetalDepthStencilCache.cpp HW/Latte/Renderer/Metal/MetalDepthStencilCache.h + HW/Latte/Renderer/Metal/MetalSamplerCache.cpp + HW/Latte/Renderer/Metal/MetalSamplerCache.h HW/Latte/Renderer/Metal/MetalHybridComputePipeline.cpp HW/Latte/Renderer/Metal/MetalHybridComputePipeline.h HW/Latte/Renderer/Metal/UtilityShaderSource.h diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalDepthStencilCache.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalDepthStencilCache.cpp index b8f3fc52..96375e0b 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalDepthStencilCache.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalDepthStencilCache.cpp @@ -18,9 +18,7 @@ MTL::DepthStencilState* MetalDepthStencilCache::GetDepthStencilState(const Latte uint64 stateHash = CalculateDepthStencilHash(lcr); auto& depthStencilState = m_depthStencilCache[stateHash]; if (depthStencilState) - { return depthStencilState; - } // Depth stencil state bool depthEnable = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_Z_ENABLE(); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp index 39a7ec8d..faf67c3c 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp @@ -58,9 +58,7 @@ MTL::RenderPipelineState* MetalPipelineCache::GetPipelineState(const LatteFetchS uint64 stateHash = CalculatePipelineHash(fetchShader, vertexShader, pixelShader, activeFBO, lcr); auto& pipeline = m_pipelineCache[stateHash]; if (pipeline) - { return pipeline; - } // Vertex descriptor MTL::VertexDescriptor* vertexDescriptor = MTL::VertexDescriptor::alloc()->init(); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index 0870d2ed..4489b020 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -6,6 +6,7 @@ #include "Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalDepthStencilCache.h" +#include "Cafe/HW/Latte/Renderer/Metal/MetalSamplerCache.h" #include "Cafe/HW/Latte/Renderer/Metal/LatteTextureReadbackMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalHybridComputePipeline.h" #include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h" @@ -16,6 +17,7 @@ #include "Cafe/HW/Latte/Core/LatteIndices.h" #include "Cemu/Logging/CemuDebugLogging.h" #include "Common/precompiled.h" +#include "HW/Latte/Core/Latte.h" #include "HW/Latte/Renderer/Metal/MetalCommon.h" #include "Metal/MTLRenderCommandEncoder.hpp" #include "Metal/MTLRenderPass.hpp" @@ -43,6 +45,7 @@ MetalRenderer::MetalRenderer() m_memoryManager = new MetalMemoryManager(this); m_pipelineCache = new MetalPipelineCache(this); m_depthStencilCache = new MetalDepthStencilCache(this); + m_samplerCache = new MetalSamplerCache(this); // Texture readback m_readbackBuffer = m_device->newBuffer(TEXTURE_READBACK_SIZE, MTL::StorageModeShared); @@ -121,8 +124,9 @@ MetalRenderer::~MetalRenderer() m_presentPipelineLinear->release(); m_presentPipelineSRGB->release(); - delete m_depthStencilCache; delete m_pipelineCache; + delete m_depthStencilCache; + delete m_samplerCache; delete m_memoryManager; m_nearestSampler->release(); @@ -1179,123 +1183,29 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE if (stageSamplerIndex != LATTE_DECOMPILER_SAMPLER_NONE) { uint32 samplerIndex = stageSamplerIndex + LatteDecompiler_getTextureSamplerBaseIndex(shader->shaderType); - const _LatteRegisterSetSampler* samplerWords = LatteGPUState.contextNew.SQ_TEX_SAMPLER + samplerIndex; + auto sampler = m_samplerCache->GetSamplerState(LatteGPUState.contextNew, samplerIndex); - // TODO: cache this instead - MTL::SamplerDescriptor* samplerDescriptor = MTL::SamplerDescriptor::alloc()->init(); + auto& boundSampler = m_state.m_encoderState.m_samplers[mtlShaderType][binding]; + if (sampler != boundSampler) + { + boundSampler = sampler; - // lod - uint32 iMinLOD = samplerWords->WORD1.get_MIN_LOD(); - uint32 iMaxLOD = samplerWords->WORD1.get_MAX_LOD(); - sint32 iLodBias = samplerWords->WORD1.get_LOD_BIAS(); - - // apply relative lod bias from graphic pack - if (baseTexture->overwriteInfo.hasRelativeLodBias) - iLodBias += baseTexture->overwriteInfo.relativeLodBias; - // apply absolute lod bias from graphic pack - if (baseTexture->overwriteInfo.hasLodBias) - iLodBias = baseTexture->overwriteInfo.lodBias; - - auto filterMip = samplerWords->WORD0.get_MIP_FILTER(); - if (filterMip == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_Z_FILTER::NONE) - { - samplerDescriptor->setMipFilter(MTL::SamplerMipFilterNearest); - samplerDescriptor->setLodMinClamp(0.0f); - samplerDescriptor->setLodMaxClamp(0.25f); - } - else if (filterMip == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_Z_FILTER::POINT) - { - samplerDescriptor->setMipFilter(MTL::SamplerMipFilterNearest); - samplerDescriptor->setLodMinClamp((float)iMinLOD / 64.0f); - samplerDescriptor->setLodMaxClamp((float)iMaxLOD / 64.0f); - } - else if (filterMip == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_Z_FILTER::LINEAR) - { - samplerDescriptor->setMipFilter(MTL::SamplerMipFilterLinear); - samplerDescriptor->setLodMinClamp((float)iMinLOD / 64.0f); - samplerDescriptor->setLodMaxClamp((float)iMaxLOD / 64.0f); - } - else - { - // fallback for invalid constants - samplerDescriptor->setMipFilter(MTL::SamplerMipFilterLinear); - samplerDescriptor->setLodMinClamp((float)iMinLOD / 64.0f); - samplerDescriptor->setLodMaxClamp((float)iMaxLOD / 64.0f); - } - - auto filterMin = samplerWords->WORD0.get_XY_MIN_FILTER(); - cemu_assert_debug(filterMin != Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_XY_FILTER::BICUBIC); // todo - samplerDescriptor->setMinFilter((filterMin == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_XY_FILTER::POINT || filterMin == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_XY_FILTER::ANISO_POINT) ? MTL::SamplerMinMagFilterNearest : MTL::SamplerMinMagFilterLinear); - - auto filterMag = samplerWords->WORD0.get_XY_MAG_FILTER(); - samplerDescriptor->setMagFilter((filterMag == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_XY_FILTER::POINT || filterMin == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_XY_FILTER::ANISO_POINT) ? MTL::SamplerMinMagFilterNearest : MTL::SamplerMinMagFilterLinear); - - auto filterZ = samplerWords->WORD0.get_Z_FILTER(); - // todo: z-filter for texture array samplers is customizable for GPU7 but OpenGL/Vulkan doesn't expose this functionality? - - auto clampX = samplerWords->WORD0.get_CLAMP_X(); - auto clampY = samplerWords->WORD0.get_CLAMP_Y(); - auto clampZ = samplerWords->WORD0.get_CLAMP_Z(); - - samplerDescriptor->setSAddressMode(GetMtlSamplerAddressMode(clampX)); - samplerDescriptor->setTAddressMode(GetMtlSamplerAddressMode(clampY)); - samplerDescriptor->setRAddressMode(GetMtlSamplerAddressMode(clampZ)); - - auto maxAniso = samplerWords->WORD0.get_MAX_ANISO_RATIO(); - - if (baseTexture->overwriteInfo.anisotropicLevel >= 0) - maxAniso = baseTexture->overwriteInfo.anisotropicLevel; - - if (maxAniso > 0) - { - samplerDescriptor->setMaxAnisotropy(1 << maxAniso); - } - - // TODO: set lod bias - //samplerInfo.mipLodBias = (float)iLodBias / 64.0f; - - // depth compare - uint8 depthCompareMode = shader->textureUsesDepthCompare[relative_textureUnit] ? 1 : 0; - if (depthCompareMode == 1) - { - // TODO: is it okay to just cast? - samplerDescriptor->setCompareFunction(GetMtlCompareFunc((Latte::E_COMPAREFUNC)samplerWords->WORD0.get_DEPTH_COMPARE_FUNCTION())); - } - - // border - auto borderType = samplerWords->WORD0.get_BORDER_COLOR_TYPE(); - - if (borderType == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_BORDER_COLOR_TYPE::TRANSPARENT_BLACK) - samplerDescriptor->setBorderColor(MTL::SamplerBorderColorTransparentBlack); - else if (borderType == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_BORDER_COLOR_TYPE::OPAQUE_BLACK) - samplerDescriptor->setBorderColor(MTL::SamplerBorderColorOpaqueBlack); - else if (borderType == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_BORDER_COLOR_TYPE::OPAQUE_WHITE) - samplerDescriptor->setBorderColor(MTL::SamplerBorderColorOpaqueWhite); - else - { - // Metal doesn't support custom border color - samplerDescriptor->setBorderColor(MTL::SamplerBorderColorOpaqueBlack); - } - - MTL::SamplerState* sampler = m_device->newSamplerState(samplerDescriptor); - samplerDescriptor->release(); - - switch (shader->shaderType) - { - case LatteConst::ShaderType::Vertex: - { - renderCommandEncoder->setVertexSamplerState(sampler, binding); - break; - } - case LatteConst::ShaderType::Pixel: - { - renderCommandEncoder->setFragmentSamplerState(sampler, binding); - break; - } - default: - UNREACHABLE; - } - sampler->release(); + switch (shader->shaderType) + { + case LatteConst::ShaderType::Vertex: + { + renderCommandEncoder->setVertexSamplerState(sampler, binding); + break; + } + case LatteConst::ShaderType::Pixel: + { + renderCommandEncoder->setFragmentSamplerState(sampler, binding); + break; + } + default: + UNREACHABLE; + } + } } // get texture register word 0 diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h index bbd53194..451a29af 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h @@ -7,8 +7,6 @@ #include "Cafe/HW/Latte/Renderer/Renderer.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h" -#include "Metal/MTLDepthStencil.hpp" -#include "Metal/MTLRenderCommandEncoder.hpp" struct MetalBoundBuffer { @@ -55,6 +53,7 @@ struct MetalEncoderState class LatteTextureViewMtl* m_textureView = nullptr; uint32 m_word4 = INVALID_UINT32; } m_textures[METAL_SHADER_TYPE_TOTAL][MAX_MTL_TEXTURES]; + MTL::SamplerState* m_samplers[METAL_SHADER_TYPE_TOTAL][MAX_MTL_SAMPLERS]; size_t m_uniformBufferOffsets[METAL_SHADER_TYPE_TOTAL][MAX_MTL_BUFFERS]; }; @@ -294,6 +293,8 @@ public: { for (uint32 j = 0; j < MAX_MTL_TEXTURES; j++) m_state.m_encoderState.m_textures[i][j] = {nullptr}; + for (uint32 j = 0; j < MAX_MTL_SAMPLERS; j++) + m_state.m_encoderState.m_samplers[i][j] = nullptr; for (uint32 j = 0; j < MAX_MTL_BUFFERS; j++) m_state.m_encoderState.m_uniformBufferOffsets[i][j] = INVALID_OFFSET; } @@ -333,6 +334,7 @@ private: class MetalMemoryManager* m_memoryManager; class MetalPipelineCache* m_pipelineCache; class MetalDepthStencilCache* m_depthStencilCache; + class MetalSamplerCache* m_samplerCache; // Metal objects MTL::Device* m_device; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalSamplerCache.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalSamplerCache.cpp new file mode 100644 index 00000000..4f987d83 --- /dev/null +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalSamplerCache.cpp @@ -0,0 +1,128 @@ +#include "Cafe/HW/Latte/Renderer/Metal/MetalSamplerCache.h" +#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" +#include "HW/Latte/Renderer/Metal/LatteToMtl.h" + +MetalSamplerCache::~MetalSamplerCache() +{ + for (auto& pair : m_samplerCache) + { + pair.second->release(); + } + m_samplerCache.clear(); +} + +MTL::SamplerState* MetalSamplerCache::GetSamplerState(const LatteContextRegister& lcr, uint32 samplerIndex) +{ + uint64 stateHash = CalculateSamplerHash(lcr, samplerIndex); + auto& samplerState = m_samplerCache[stateHash]; + if (samplerState) + return samplerState; + + // Sampler state + const _LatteRegisterSetSampler* samplerWords = lcr.SQ_TEX_SAMPLER + samplerIndex; + + MTL::SamplerDescriptor* samplerDescriptor = MTL::SamplerDescriptor::alloc()->init(); + + // lod + uint32 iMinLOD = samplerWords->WORD1.get_MIN_LOD(); + uint32 iMaxLOD = samplerWords->WORD1.get_MAX_LOD(); + sint32 iLodBias = samplerWords->WORD1.get_LOD_BIAS(); + + // TODO: uncomment + // apply relative lod bias from graphic pack + //if (baseTexture->overwriteInfo.hasRelativeLodBias) + // iLodBias += baseTexture->overwriteInfo.relativeLodBias; + // apply absolute lod bias from graphic pack + //if (baseTexture->overwriteInfo.hasLodBias) + // iLodBias = baseTexture->overwriteInfo.lodBias; + + auto filterMip = samplerWords->WORD0.get_MIP_FILTER(); + if (filterMip == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_Z_FILTER::NONE) + { + samplerDescriptor->setMipFilter(MTL::SamplerMipFilterNearest); + samplerDescriptor->setLodMinClamp(0.0f); + samplerDescriptor->setLodMaxClamp(0.25f); + } + else if (filterMip == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_Z_FILTER::POINT) + { + samplerDescriptor->setMipFilter(MTL::SamplerMipFilterNearest); + samplerDescriptor->setLodMinClamp((float)iMinLOD / 64.0f); + samplerDescriptor->setLodMaxClamp((float)iMaxLOD / 64.0f); + } + else if (filterMip == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_Z_FILTER::LINEAR) + { + samplerDescriptor->setMipFilter(MTL::SamplerMipFilterLinear); + samplerDescriptor->setLodMinClamp((float)iMinLOD / 64.0f); + samplerDescriptor->setLodMaxClamp((float)iMaxLOD / 64.0f); + } + else + { + // fallback for invalid constants + samplerDescriptor->setMipFilter(MTL::SamplerMipFilterLinear); + samplerDescriptor->setLodMinClamp((float)iMinLOD / 64.0f); + samplerDescriptor->setLodMaxClamp((float)iMaxLOD / 64.0f); + } + + auto filterMin = samplerWords->WORD0.get_XY_MIN_FILTER(); + cemu_assert_debug(filterMin != Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_XY_FILTER::BICUBIC); // todo + samplerDescriptor->setMinFilter((filterMin == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_XY_FILTER::POINT || filterMin == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_XY_FILTER::ANISO_POINT) ? MTL::SamplerMinMagFilterNearest : MTL::SamplerMinMagFilterLinear); + + auto filterMag = samplerWords->WORD0.get_XY_MAG_FILTER(); + samplerDescriptor->setMagFilter((filterMag == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_XY_FILTER::POINT || filterMin == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_XY_FILTER::ANISO_POINT) ? MTL::SamplerMinMagFilterNearest : MTL::SamplerMinMagFilterLinear); + + auto filterZ = samplerWords->WORD0.get_Z_FILTER(); + // todo: z-filter for texture array samplers is customizable for GPU7 but OpenGL/Vulkan doesn't expose this functionality? + + auto clampX = samplerWords->WORD0.get_CLAMP_X(); + auto clampY = samplerWords->WORD0.get_CLAMP_Y(); + auto clampZ = samplerWords->WORD0.get_CLAMP_Z(); + + samplerDescriptor->setSAddressMode(GetMtlSamplerAddressMode(clampX)); + samplerDescriptor->setTAddressMode(GetMtlSamplerAddressMode(clampY)); + samplerDescriptor->setRAddressMode(GetMtlSamplerAddressMode(clampZ)); + + auto maxAniso = samplerWords->WORD0.get_MAX_ANISO_RATIO(); + + // TODO: uncomment + //if (baseTexture->overwriteInfo.anisotropicLevel >= 0) + // maxAniso = baseTexture->overwriteInfo.anisotropicLevel; + + if (maxAniso > 0) + samplerDescriptor->setMaxAnisotropy(1 << maxAniso); + + // TODO: set lod bias + //samplerInfo.mipLodBias = (float)iLodBias / 64.0f; + + // depth compare + //uint8 depthCompareMode = shader->textureUsesDepthCompare[relative_textureUnit] ? 1 : 0; + // TODO: is it okay to just cast? + samplerDescriptor->setCompareFunction(GetMtlCompareFunc((Latte::E_COMPAREFUNC)samplerWords->WORD0.get_DEPTH_COMPARE_FUNCTION())); + + // border + auto borderType = samplerWords->WORD0.get_BORDER_COLOR_TYPE(); + + if (borderType == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_BORDER_COLOR_TYPE::TRANSPARENT_BLACK) + samplerDescriptor->setBorderColor(MTL::SamplerBorderColorTransparentBlack); + else if (borderType == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_BORDER_COLOR_TYPE::OPAQUE_BLACK) + samplerDescriptor->setBorderColor(MTL::SamplerBorderColorOpaqueBlack); + else if (borderType == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_BORDER_COLOR_TYPE::OPAQUE_WHITE) + samplerDescriptor->setBorderColor(MTL::SamplerBorderColorOpaqueWhite); + else + { + // Metal doesn't support custom border color + samplerDescriptor->setBorderColor(MTL::SamplerBorderColorOpaqueBlack); + } + + samplerState = m_mtlr->GetDevice()->newSamplerState(samplerDescriptor); + samplerDescriptor->release(); + + return samplerState; +} + +uint64 MetalSamplerCache::CalculateSamplerHash(const LatteContextRegister& lcr, uint32 samplerIndex) +{ + const _LatteRegisterSetSampler* samplerWords = lcr.SQ_TEX_SAMPLER + samplerIndex; + + // TODO: check this + return *((uint64*)samplerWords); +} diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalSamplerCache.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalSamplerCache.h new file mode 100644 index 00000000..891d7e03 --- /dev/null +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalSamplerCache.h @@ -0,0 +1,21 @@ +#pragma once + +#include + +#include "HW/Latte/ISA/LatteReg.h" + +class MetalSamplerCache +{ +public: + MetalSamplerCache(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer} {} + ~MetalSamplerCache(); + + MTL::SamplerState* GetSamplerState(const LatteContextRegister& lcr, uint32 samplerIndex); + +private: + class MetalRenderer* m_mtlr; + + std::map m_samplerCache; + + uint64 CalculateSamplerHash(const LatteContextRegister& lcr, uint32 samplerIndex); +};