implement depth stencil cache & fix: textureSize

This commit is contained in:
Samuliak 2024-08-03 10:01:57 +02:00
parent d7411e27f7
commit 406a85672d
9 changed files with 183 additions and 98 deletions

View File

@ -555,6 +555,8 @@ if(ENABLE_METAL)
HW/Latte/Renderer/Metal/MetalMemoryManager.h
HW/Latte/Renderer/Metal/MetalPipelineCache.cpp
HW/Latte/Renderer/Metal/MetalPipelineCache.h
HW/Latte/Renderer/Metal/MetalDepthStencilCache.cpp
HW/Latte/Renderer/Metal/MetalDepthStencilCache.h
HW/Latte/Renderer/Metal/ShaderSourcePresent.h
)

View File

@ -615,6 +615,7 @@ LatteDecompilerShader* LatteShader_CreateShaderFromDecompilerOutput(LatteDecompi
LatteDecompilerShader* shader = decompilerOutput.shader;
shader->baseHash = baseHash;
// copy resource mapping
// HACK
if (g_renderer->GetType() != RendererAPI::OpenGL)
shader->resourceMapping = decompilerOutput.resourceMappingVK;
else

View File

@ -2422,7 +2422,7 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex
src->add(")");
// avoid truncate to effectively round downwards on texel edges
if (ActiveSettings::ForceSamplerRoundToPrecision())
src->addFmt("+ float2(1.0)/float2(textureSize(tex{}, 0))/512.0", texInstruction->textureFetch.textureIndex);
src->addFmt("+ float2(1.0)/float2(tex{}.get_width(), tex{}.get_height())/512.0", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex);
}
// lod or lod bias parameter
if( texOpcode == GPU7_TEX_INST_SAMPLE_L || texOpcode == GPU7_TEX_INST_SAMPLE_LB || texOpcode == GPU7_TEX_INST_SAMPLE_C_L)
@ -2599,17 +2599,17 @@ static void _emitTEXGetTextureResInfoCode(LatteDecompilerShaderContext* shaderCo
auto texDim = shaderContext->shader->textureUnitDim[texInstruction->textureFetch.textureIndex];
if (texDim == Latte::E_DIM::DIM_1D)
src->addFmt(" = int4(textureSize(tex{}, 0),1,1,1).", texInstruction->textureFetch.textureIndex);
src->addFmt(" = int4(tex{}.get_width(), 1, 1, 1).", texInstruction->textureFetch.textureIndex);
else if (texDim == Latte::E_DIM::DIM_1D_ARRAY)
src->addFmt(" = int4(textureSize(tex{}, 0),1,1).", texInstruction->textureFetch.textureIndex);
src->addFmt(" = int4(tex{}.get_width(), tex{}.get_array_size(), 1, 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex);
else if (texDim == Latte::E_DIM::DIM_2D || texDim == Latte::E_DIM::DIM_2D_MSAA)
src->addFmt(" = int4(textureSize(tex{}, 0),1,1).", texInstruction->textureFetch.textureIndex);
src->addFmt(" = int4(tex{}.get_width(), tex{}.get_height(), 1, 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex);
else if (texDim == Latte::E_DIM::DIM_2D_ARRAY)
src->addFmt(" = int4(textureSize(tex{}, 0),1).", texInstruction->textureFetch.textureIndex);
src->addFmt(" = int4(tex{}.get_width(), tex{}.get_height(), tex{}.get_array_size(), 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex);
else
{
cemu_assert_debug(false);
src->addFmt(" = int4(textureSize(tex{}, 0),1,1).", texInstruction->textureFetch.textureIndex);
src->addFmt(" = int4(tex{}.get_width(), tex{}.get_height(), 1, 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex);
}
for(sint32 f=0; f<4; f++)

View File

@ -0,0 +1,138 @@
#include "Cafe/HW/Latte/Renderer/Metal/MetalDepthStencilCache.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
#include "HW/Latte/ISA/RegDefines.h"
#include "HW/Latte/Renderer/Metal/LatteToMtl.h"
#include "Metal/MTLDepthStencil.hpp"
MetalDepthStencilCache::~MetalDepthStencilCache()
{
for (auto& pair : m_depthStencilCache)
{
pair.second->release();
}
m_depthStencilCache.clear();
}
MTL::DepthStencilState* MetalDepthStencilCache::GetDepthStencilState(const LatteContextRegister& lcr)
{
uint64 stateHash = CalculateDepthStencilHash(lcr);
auto& depthStencilState = m_depthStencilCache[stateHash];
if (depthStencilState)
{
return depthStencilState;
}
// Depth stencil state
bool depthEnable = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_Z_ENABLE();
auto depthFunc = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_Z_FUNC();
bool depthWriteEnable = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_Z_WRITE_ENABLE();
MTL::DepthStencilDescriptor* desc = MTL::DepthStencilDescriptor::alloc()->init();
desc->setDepthWriteEnabled(depthWriteEnable);
auto depthCompareFunc = GetMtlCompareFunc(depthFunc);
if (!depthEnable)
{
depthCompareFunc = MTL::CompareFunctionAlways;
}
desc->setDepthCompareFunction(depthCompareFunc);
// TODO: stencil state
/*
// get stencil control parameters
bool stencilEnable = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_ENABLE();
bool backStencilEnable = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_BACK_STENCIL_ENABLE();
auto frontStencilFunc = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_FUNC_F();
auto frontStencilZPass = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_ZPASS_F();
auto frontStencilZFail = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_ZFAIL_F();
auto frontStencilFail = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_FAIL_F();
auto backStencilFunc = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_FUNC_B();
auto backStencilZPass = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_ZPASS_B();
auto backStencilZFail = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_ZFAIL_B();
auto backStencilFail = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_FAIL_B();
// get stencil control parameters
uint32 stencilCompareMaskFront = LatteGPUState.contextNew.DB_STENCILREFMASK.get_STENCILMASK_F();
uint32 stencilWriteMaskFront = LatteGPUState.contextNew.DB_STENCILREFMASK.get_STENCILWRITEMASK_F();
uint32 stencilRefFront = LatteGPUState.contextNew.DB_STENCILREFMASK.get_STENCILREF_F();
uint32 stencilCompareMaskBack = LatteGPUState.contextNew.DB_STENCILREFMASK_BF.get_STENCILMASK_B();
uint32 stencilWriteMaskBack = LatteGPUState.contextNew.DB_STENCILREFMASK_BF.get_STENCILWRITEMASK_B();
uint32 stencilRefBack = LatteGPUState.contextNew.DB_STENCILREFMASK_BF.get_STENCILREF_B();
static const VkStencilOp stencilOpTable[8] = {
VK_STENCIL_OP_KEEP,
VK_STENCIL_OP_ZERO,
VK_STENCIL_OP_REPLACE,
VK_STENCIL_OP_INCREMENT_AND_CLAMP,
VK_STENCIL_OP_DECREMENT_AND_CLAMP,
VK_STENCIL_OP_INVERT,
VK_STENCIL_OP_INCREMENT_AND_WRAP,
VK_STENCIL_OP_DECREMENT_AND_WRAP
};
depthStencilState.stencilTestEnable = stencilEnable ? VK_TRUE : VK_FALSE;
depthStencilState.front.reference = stencilRefFront;
depthStencilState.front.compareMask = stencilCompareMaskFront;
depthStencilState.front.writeMask = stencilWriteMaskBack;
depthStencilState.front.compareOp = vkDepthCompareTable[(size_t)frontStencilFunc];
depthStencilState.front.depthFailOp = stencilOpTable[(size_t)frontStencilZFail];
depthStencilState.front.failOp = stencilOpTable[(size_t)frontStencilFail];
depthStencilState.front.passOp = stencilOpTable[(size_t)frontStencilZPass];
if (backStencilEnable)
{
depthStencilState.back.reference = stencilRefBack;
depthStencilState.back.compareMask = stencilCompareMaskBack;
depthStencilState.back.writeMask = stencilWriteMaskBack;
depthStencilState.back.compareOp = vkDepthCompareTable[(size_t)backStencilFunc];
depthStencilState.back.depthFailOp = stencilOpTable[(size_t)backStencilZFail];
depthStencilState.back.failOp = stencilOpTable[(size_t)backStencilFail];
depthStencilState.back.passOp = stencilOpTable[(size_t)backStencilZPass];
}
else
{
depthStencilState.back.reference = stencilRefFront;
depthStencilState.back.compareMask = stencilCompareMaskFront;
depthStencilState.back.writeMask = stencilWriteMaskFront;
depthStencilState.back.compareOp = vkDepthCompareTable[(size_t)frontStencilFunc];
depthStencilState.back.depthFailOp = stencilOpTable[(size_t)frontStencilZFail];
depthStencilState.back.failOp = stencilOpTable[(size_t)frontStencilFail];
depthStencilState.back.passOp = stencilOpTable[(size_t)frontStencilZPass];
}
*/
depthStencilState = m_mtlr->GetDevice()->newDepthStencilState(desc);
desc->release();
return depthStencilState;
}
uint64 MetalDepthStencilCache::CalculateDepthStencilHash(const LatteContextRegister& lcr)
{
uint32* ctxRegister = lcr.GetRawView();
// Hash
uint64 stateHash = 0;
uint32 depthControl = ctxRegister[Latte::REGADDR::DB_DEPTH_CONTROL];
bool stencilTestEnable = depthControl & 1;
if (stencilTestEnable)
{
stateHash += ctxRegister[mmDB_STENCILREFMASK];
stateHash = std::rotl<uint64>(stateHash, 17);
if(depthControl & (1<<7)) // back stencil enable
{
stateHash += ctxRegister[mmDB_STENCILREFMASK_BF];
stateHash = std::rotl<uint64>(stateHash, 13);
}
}
else
{
// zero out stencil related bits (8-31)
depthControl &= 0xFF;
}
stateHash = std::rotl<uint64>(stateHash, 17);
stateHash += depthControl;
return stateHash;
}

View File

@ -0,0 +1,21 @@
#pragma once
#include <Metal/Metal.hpp>
#include "HW/Latte/ISA/LatteReg.h"
class MetalDepthStencilCache
{
public:
MetalDepthStencilCache(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer} {}
~MetalDepthStencilCache();
MTL::DepthStencilState* GetDepthStencilState(const LatteContextRegister& lcr);
private:
class MetalRenderer* m_mtlr;
std::map<uint64, MTL::DepthStencilState*> m_depthStencilCache;
uint64 CalculateDepthStencilHash(const LatteContextRegister& lcr);
};

View File

@ -75,11 +75,11 @@ MTL::RenderPipelineState* MetalPipelineCache::GetPipelineState(const LatteFetchS
}
// Render pipeline state
MTL::RenderPipelineDescriptor* renderPipelineDescriptor = MTL::RenderPipelineDescriptor::alloc()->init();
renderPipelineDescriptor->setVertexFunction(static_cast<RendererShaderMtl*>(vertexShader->shader)->GetFunction());
renderPipelineDescriptor->setFragmentFunction(static_cast<RendererShaderMtl*>(pixelShader->shader)->GetFunction());
MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init();
desc->setVertexFunction(static_cast<RendererShaderMtl*>(vertexShader->shader)->GetFunction());
desc->setFragmentFunction(static_cast<RendererShaderMtl*>(pixelShader->shader)->GetFunction());
// TODO: don't always set the vertex descriptor
renderPipelineDescriptor->setVertexDescriptor(vertexDescriptor);
desc->setVertexDescriptor(vertexDescriptor);
for (uint8 i = 0; i < 8; i++)
{
const auto& colorBuffer = activeFBO->colorBuffer[i];
@ -88,7 +88,7 @@ MTL::RenderPipelineState* MetalPipelineCache::GetPipelineState(const LatteFetchS
{
continue;
}
auto colorAttachment = renderPipelineDescriptor->colorAttachments()->object(i);
auto colorAttachment = desc->colorAttachments()->object(i);
colorAttachment->setPixelFormat(texture->GetTexture()->pixelFormat());
// Blending
@ -128,11 +128,12 @@ MTL::RenderPipelineState* MetalPipelineCache::GetPipelineState(const LatteFetchS
if (activeFBO->depthBuffer.texture)
{
auto texture = static_cast<LatteTextureViewMtl*>(activeFBO->depthBuffer.texture);
renderPipelineDescriptor->setDepthAttachmentPixelFormat(texture->GetTexture()->pixelFormat());
desc->setDepthAttachmentPixelFormat(texture->GetTexture()->pixelFormat());
}
NS::Error* error = nullptr;
pipeline = m_mtlr->GetDevice()->newRenderPipelineState(renderPipelineDescriptor, &error);
pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, &error);
desc->release();
if (error)
{
debug_printf("error creating render pipeline state: %s\n", error->localizedDescription()->utf8String());

View File

@ -5,21 +5,16 @@
#include "Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalDepthStencilCache.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h"
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/ShaderSourcePresent.h"
#include "Cafe/HW/Latte/Core/FetchShader.h"
#include "Cafe/HW/Latte/Core/LatteShader.h"
#include "Cafe/HW/Latte/Core/LatteIndices.h"
#include "Cemu/Logging/CemuDebugLogging.h"
#include "Foundation/NSTypes.hpp"
#include "HW/Latte/Core/Latte.h"
#include "Metal/MTLDepthStencil.hpp"
#include "Metal/MTLRenderCommandEncoder.hpp"
#include "Metal/MTLRenderPass.hpp"
#include "Metal/MTLRenderPipeline.hpp"
#include "gui/guiWrapper.h"
extern bool hasValidFramebufferAttached;
@ -36,6 +31,7 @@ MetalRenderer::MetalRenderer()
m_memoryManager = new MetalMemoryManager(this);
m_pipelineCache = new MetalPipelineCache(this);
m_depthStencilCache = new MetalDepthStencilCache(this);
// Initialize state
for (uint32 i = 0; i < (uint32)LatteConst::ShaderType::TotalCount; i++)
@ -49,6 +45,8 @@ MetalRenderer::MetalRenderer()
MetalRenderer::~MetalRenderer()
{
delete m_depthStencilCache;
delete m_pipelineCache;
delete m_memoryManager;
m_commandQueue->release();
@ -622,84 +620,7 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
renderCommandEncoder->setRenderPipelineState(renderPipelineState);
// Depth stencil state
bool depthEnable = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_Z_ENABLE();
auto depthFunc = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_Z_FUNC();
bool depthWriteEnable = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_Z_WRITE_ENABLE();
MTL::DepthStencilDescriptor* depthStencilDescriptor = MTL::DepthStencilDescriptor::alloc()->init();
depthStencilDescriptor->setDepthWriteEnabled(depthWriteEnable);
auto depthCompareFunc = GetMtlCompareFunc(depthFunc);
if (!depthEnable)
{
depthCompareFunc = MTL::CompareFunctionAlways;
}
depthStencilDescriptor->setDepthCompareFunction(depthCompareFunc);
// TODO: stencil state
/*
// get stencil control parameters
bool stencilEnable = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_ENABLE();
bool backStencilEnable = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_BACK_STENCIL_ENABLE();
auto frontStencilFunc = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_FUNC_F();
auto frontStencilZPass = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_ZPASS_F();
auto frontStencilZFail = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_ZFAIL_F();
auto frontStencilFail = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_FAIL_F();
auto backStencilFunc = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_FUNC_B();
auto backStencilZPass = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_ZPASS_B();
auto backStencilZFail = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_ZFAIL_B();
auto backStencilFail = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_FAIL_B();
// get stencil control parameters
uint32 stencilCompareMaskFront = LatteGPUState.contextNew.DB_STENCILREFMASK.get_STENCILMASK_F();
uint32 stencilWriteMaskFront = LatteGPUState.contextNew.DB_STENCILREFMASK.get_STENCILWRITEMASK_F();
uint32 stencilRefFront = LatteGPUState.contextNew.DB_STENCILREFMASK.get_STENCILREF_F();
uint32 stencilCompareMaskBack = LatteGPUState.contextNew.DB_STENCILREFMASK_BF.get_STENCILMASK_B();
uint32 stencilWriteMaskBack = LatteGPUState.contextNew.DB_STENCILREFMASK_BF.get_STENCILWRITEMASK_B();
uint32 stencilRefBack = LatteGPUState.contextNew.DB_STENCILREFMASK_BF.get_STENCILREF_B();
static const VkStencilOp stencilOpTable[8] = {
VK_STENCIL_OP_KEEP,
VK_STENCIL_OP_ZERO,
VK_STENCIL_OP_REPLACE,
VK_STENCIL_OP_INCREMENT_AND_CLAMP,
VK_STENCIL_OP_DECREMENT_AND_CLAMP,
VK_STENCIL_OP_INVERT,
VK_STENCIL_OP_INCREMENT_AND_WRAP,
VK_STENCIL_OP_DECREMENT_AND_WRAP
};
depthStencilState.stencilTestEnable = stencilEnable ? VK_TRUE : VK_FALSE;
depthStencilState.front.reference = stencilRefFront;
depthStencilState.front.compareMask = stencilCompareMaskFront;
depthStencilState.front.writeMask = stencilWriteMaskBack;
depthStencilState.front.compareOp = vkDepthCompareTable[(size_t)frontStencilFunc];
depthStencilState.front.depthFailOp = stencilOpTable[(size_t)frontStencilZFail];
depthStencilState.front.failOp = stencilOpTable[(size_t)frontStencilFail];
depthStencilState.front.passOp = stencilOpTable[(size_t)frontStencilZPass];
if (backStencilEnable)
{
depthStencilState.back.reference = stencilRefBack;
depthStencilState.back.compareMask = stencilCompareMaskBack;
depthStencilState.back.writeMask = stencilWriteMaskBack;
depthStencilState.back.compareOp = vkDepthCompareTable[(size_t)backStencilFunc];
depthStencilState.back.depthFailOp = stencilOpTable[(size_t)backStencilZFail];
depthStencilState.back.failOp = stencilOpTable[(size_t)backStencilFail];
depthStencilState.back.passOp = stencilOpTable[(size_t)backStencilZPass];
}
else
{
depthStencilState.back.reference = stencilRefFront;
depthStencilState.back.compareMask = stencilCompareMaskFront;
depthStencilState.back.writeMask = stencilWriteMaskFront;
depthStencilState.back.compareOp = vkDepthCompareTable[(size_t)frontStencilFunc];
depthStencilState.back.depthFailOp = stencilOpTable[(size_t)frontStencilZFail];
depthStencilState.back.failOp = stencilOpTable[(size_t)frontStencilFail];
depthStencilState.back.passOp = stencilOpTable[(size_t)frontStencilZPass];
}
*/
MTL::DepthStencilState* depthStencilState = m_device->newDepthStencilState(depthStencilDescriptor);
MTL::DepthStencilState* depthStencilState = m_depthStencilCache->GetDepthStencilState(LatteGPUState.contextNew);
renderCommandEncoder->setDepthStencilState(depthStencilState);
// Primitive type

View File

@ -196,6 +196,7 @@ private:
class MetalMemoryManager* m_memoryManager;
class MetalPipelineCache* m_pipelineCache;
class MetalDepthStencilCache* m_depthStencilCache;
// Metal objects
MTL::Device* m_device;

View File

@ -18,13 +18,13 @@ RendererShaderMtl::RendererShaderMtl(MetalRenderer* mtlRenderer, ShaderType type
desc->setName(NS::String::string("main0", NS::ASCIIStringEncoding));
error = nullptr;
m_function = library->newFunction(desc, &error);
library->release();
if (error)
{
printf("failed to create function (error: %s)\n", error->localizedDescription()->utf8String());
error->release();
return;
}
library->release();
}
RendererShaderMtl::~RendererShaderMtl()