Merge pull request #5 from SamoZ256/metal-no-vertex-restride

No vertex restride
This commit is contained in:
SamoZ256 2024-10-01 18:46:21 +02:00 committed by GitHub
commit 6dc8f9a019
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 269 additions and 169 deletions

View File

@ -8,8 +8,12 @@
#include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInstructions.h"
#include "Cafe/HW/Latte/Core/FetchShader.h"
#include "Cafe/HW/Latte/ISA/LatteInstructions.h"
#include "HW/Latte/Renderer/Renderer.h"
#include "util/containers/LookupTableL3.h"
#include "util/helpers/fspinlock.h"
#if BOOST_OS_MACOS
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
#endif
#include <openssl/sha.h> /* SHA1_DIGEST_LENGTH */
#include <openssl/evp.h> /* EVP_Digest */
@ -71,7 +75,7 @@ uint32 LatteShaderRecompiler_getAttributeAlignment(LatteParsedFetchShaderAttribu
return 4;
}
void LatteShader_calculateFSKey(LatteFetchShader* fetchShader)
void LatteShader_calculateFSKey(LatteFetchShader* fetchShader, uint32* contextRegister)
{
uint64 key = 0;
for (sint32 g = 0; g < fetchShader->bufferGroups.size(); g++)
@ -104,11 +108,25 @@ void LatteShader_calculateFSKey(LatteFetchShader* fetchShader)
key = std::rotl<uint64>(key, 8);
key += (uint64)attrib->semanticId;
key = std::rotl<uint64>(key, 8);
key += (uint64)(attrib->offset & 3);
key = std::rotl<uint64>(key, 2);
if (g_renderer->GetType() == RendererAPI::Metal)
key += (uint64)attrib->offset;
else
key += (uint64)(attrib->offset & 3);
key = std::rotl<uint64>(key, 7);
}
}
// todo - also hash invalid buffer groups?
if (g_renderer->GetType() == RendererAPI::Metal)
{
for (sint32 g = 0; g < fetchShader->bufferGroups.size(); g++)
{
LatteParsedFetchShaderBufferGroup_t& group = fetchShader->bufferGroups[g];
key += (uint64)group.attributeBufferIndex;
key = std::rotl<uint64>(key, 5);
}
}
fetchShader->key = key;
}
@ -146,8 +164,8 @@ void LatteFetchShader::CalculateFetchShaderVkHash()
this->vkPipelineHashFragment = h;
}
void LatteFetchShader::CalculateFetchShaderMtlObjectShaderHash(uint32* contextRegister)
{uint64 key = 0;
void LatteFetchShader::CheckIfVerticesNeedManualFetchMtl(uint32* contextRegister)
{
for (sint32 g = 0; g < bufferGroups.size(); g++)
{
LatteParsedFetchShaderBufferGroup_t& group = bufferGroups[g];
@ -155,12 +173,16 @@ void LatteFetchShader::CalculateFetchShaderMtlObjectShaderHash(uint32* contextRe
uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7;
uint32 bufferStride = (contextRegister[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;
key += (uint64)bufferIndex;
key = std::rotl<uint64>(key, 5);
key += (uint64)bufferStride;
key = std::rotl<uint64>(key, 5);
if (bufferStride % 4 != 0)
mtlFetchVertexManually = true;
for (sint32 f = 0; f < group.attribCount; f++)
{
auto& attr = group.attrib[f];
if (attr.offset + GetMtlVertexFormatSize(attr.format) > bufferStride)
mtlFetchVertexManually = true;
}
}
mtlShaderHashObject = key;
}
void _fetchShaderDecompiler_parseInstruction_VTX_SEMANTIC(LatteFetchShader* parsedFetchShader, uint32* contextRegister, const LatteClauseInstruction_VTX* instr)
@ -343,9 +365,9 @@ LatteFetchShader* LatteShaderRecompiler_createFetchShader(LatteFetchShader::Cach
{
// empty fetch shader, seen in Minecraft
// these only make sense when vertex shader does not call FS?
LatteShader_calculateFSKey(newFetchShader);
LatteShader_calculateFSKey(newFetchShader, contextRegister);
newFetchShader->CalculateFetchShaderVkHash();
newFetchShader->CalculateFetchShaderMtlObjectShaderHash(contextRegister);
newFetchShader->CheckIfVerticesNeedManualFetchMtl(contextRegister);
return newFetchShader;
}
@ -403,9 +425,9 @@ LatteFetchShader* LatteShaderRecompiler_createFetchShader(LatteFetchShader::Cach
}
bufferGroup.vboStride = vboOffset;
}
LatteShader_calculateFSKey(newFetchShader);
LatteShader_calculateFSKey(newFetchShader, contextRegister);
newFetchShader->CalculateFetchShaderVkHash();
newFetchShader->CalculateFetchShaderMtlObjectShaderHash(contextRegister);
newFetchShader->CheckIfVerticesNeedManualFetchMtl(contextRegister);
// register in cache
// its possible that during multi-threaded shader cache loading, two identical (same hash) fetch shaders get created simultaneously

View File

@ -47,16 +47,15 @@ struct LatteFetchShader
uint64 vkPipelineHashFragment{}; // hash of all fetch shader state that influences the Vulkan graphics pipeline
// Metal
uint64 mtlShaderHashObject{};
bool mtlFetchVertexManually{};
// cache info
CacheHash m_cacheHash{};
bool m_isRegistered{}; // if true, fetch shader is referenced by cache (RegisterInCache() succeeded)
void CalculateFetchShaderVkHash();
void CalculateFetchShaderMtlObjectShaderHash(uint32* contextRegister);
void CheckIfVerticesNeedManualFetchMtl(uint32* contextRegister);
uint64 getVkPipelineHashFragment() const { return vkPipelineHashFragment; };

View File

@ -503,11 +503,21 @@ void LatteSHRC_UpdateVSBaseHash(uint8* vertexShaderPtr, uint32 vertexShaderSize,
uint64 vsHash = vsHash1 + vsHash2 + _activeFetchShader->key + _activePSImportTable.key + (usesGeometryShader ? 0x1111ULL : 0ULL);
if (g_renderer->GetType() == RendererAPI::Metal)
{
if (usesGeometryShader)
if (usesGeometryShader || _activeFetchShader->mtlFetchVertexManually)
{
vsHash += _activeFetchShader->mtlShaderHashObject;
for (sint32 g = 0; g < _activeFetchShader->bufferGroups.size(); g++)
{
LatteParsedFetchShaderBufferGroup_t& group = _activeFetchShader->bufferGroups[g];
uint32 bufferIndex = group.attributeBufferIndex;
uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7;
uint32 bufferStride = (LatteGPUState.contextRegister[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;
vsHash += (uint64)bufferStride;
vsHash = std::rotl<uint64>(vsHash, 7);
}
}
else
if (!usesGeometryShader)
{
// Rasterization
bool rasterizationEnabled = !LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL();
@ -524,6 +534,10 @@ void LatteSHRC_UpdateVSBaseHash(uint8* vertexShaderPtr, uint32 vertexShaderSize,
if (rasterizationEnabled)
vsHash += 51ULL;
// Vertex fetch
if (_activeFetchShader->mtlFetchVertexManually)
vsHash += 349ULL;
}
}
@ -531,6 +545,7 @@ void LatteSHRC_UpdateVSBaseHash(uint8* vertexShaderPtr, uint32 vertexShaderSize,
vsHash += tmp;
auto primitiveType = LatteGPUState.contextNew.VGT_PRIMITIVE_TYPE.get_PRIMITIVE_MODE();
// TODO: include always in the hash in case of geometry shader or rect shader
if (primitiveType == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::RECTS)
{
vsHash += 13ULL;

View File

@ -3854,10 +3854,12 @@ static void LatteDecompiler_emitAttributeImport(LatteDecompilerShaderContext* sh
void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, LatteDecompilerShader* shader)
{
bool isRectVertexShader = (static_cast<LattePrimitiveMode>(shaderContext->contextRegisters[mmVGT_PRIMITIVE_TYPE]) == LattePrimitiveMode::RECTS);
bool usesGeometryShader = (shaderContext->options->usesGeometryShader || isRectVertexShader);
bool fetchVertexManually = (usesGeometryShader || (shaderContext->fetchShader && shaderContext->fetchShader->mtlFetchVertexManually));
// Rasterization
rasterizationEnabled = true;
if (shader->shaderType == LatteConst::ShaderType::Vertex && !(shaderContext->options->usesGeometryShader || isRectVertexShader))
if (shader->shaderType == LatteConst::ShaderType::Vertex && !usesGeometryShader)
{
rasterizationEnabled = !shaderContext->contextRegistersNew->PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL();
@ -3885,7 +3887,7 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
src->add("#include <metal_stdlib>" _CRLF);
src->add("using namespace metal;" _CRLF);
// header part (definitions for inputs and outputs)
LatteDecompiler::emitHeader(shaderContext, isRectVertexShader, rasterizationEnabled);
LatteDecompiler::emitHeader(shaderContext, isRectVertexShader, fetchVertexManually, rasterizationEnabled);
// helper functions
LatteDecompiler_emitHelperFunctions(shaderContext, src);
const char* functionType = "";
@ -3893,21 +3895,32 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
switch (shader->shaderType)
{
case LatteConst::ShaderType::Vertex:
if (shaderContext->options->usesGeometryShader || isRectVertexShader)
if (fetchVertexManually)
{
// TODO: clean this up
// fetchVertex will modify vid in case of an indexed draw
// fetchVertex will modify vid in case of an object shader and an indexed draw
// Vertex buffers
std::string vertexBufferDefinitions = "#define VERTEX_BUFFER_DEFINITIONS ";
std::string vertexBuffers = "#define VERTEX_BUFFERS ";
std::string inputFetchDefinition = "VertexIn fetchVertex(thread uint& vid, device uint* indexBuffer, uchar indexType VERTEX_BUFFER_DEFINITIONS) {\n";
std::string inputFetchDefinition = "VertexIn fetchVertex(";
if (usesGeometryShader)
inputFetchDefinition += "thread uint&";
else
inputFetchDefinition += "uint";
inputFetchDefinition += " vid, uint iid";
if (usesGeometryShader)
inputFetchDefinition += ", device uint* indexBuffer, uchar indexType";
inputFetchDefinition += " VERTEX_BUFFER_DEFINITIONS) {\n";
// Index buffer
inputFetchDefinition += "if (indexType == 1) // UShort\n";
inputFetchDefinition += "vid = ((device ushort*)indexBuffer)[vid];\n";
inputFetchDefinition += "else if (indexType == 2) // UInt\n";
inputFetchDefinition += "vid = ((device uint*)indexBuffer)[vid];\n";
if (usesGeometryShader)
{
inputFetchDefinition += "if (indexType == 1) // UShort\n";
inputFetchDefinition += "vid = ((device ushort*)indexBuffer)[vid];\n";
inputFetchDefinition += "else if (indexType == 2) // UInt\n";
inputFetchDefinition += "vid = ((device uint*)indexBuffer)[vid];\n";
}
inputFetchDefinition += "VertexIn in;\n";
for (auto& bufferGroup : shaderContext->fetchShader->bufferGroups)
@ -3980,11 +3993,22 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
break;
}
// Get the fetch type
std::string fetchTypeStr;
if (attr.fetchType == LatteConst::VertexFetchType2::VERTEX_DATA)
fetchTypeStr = "vid";
else if (attr.fetchType == LatteConst::VertexFetchType2::INSTANCE_DATA)
fetchTypeStr = "iid";
else if (attr.fetchType == LatteConst::VertexFetchType2::NO_INDEX_OFFSET_DATA)
fetchTypeStr = "0"; // TODO: correct?
// Fetch the attribute
inputFetchDefinition += fmt::format("in.ATTRIBUTE_NAME{} = ", semanticId);
inputFetchDefinition += fmt::format("uint4(*(device {}*)", formatName);
inputFetchDefinition += fmt::format("in.ATTRIBUTE_NAME{} = uint4(uint", semanticId);
if (componentCount != 1)
inputFetchDefinition += fmt::format("{}", componentCount);
inputFetchDefinition += fmt::format("(*(device {}*)", formatName);
inputFetchDefinition += fmt::format("(vertexBuffer{}", attr.attributeBufferIndex);
inputFetchDefinition += fmt::format(" + vid * {} + {})", bufferStride, attr.offset);
inputFetchDefinition += fmt::format(" + {} * {} + {}))", fetchTypeStr, bufferStride, attr.offset);
for (uint8 i = 0; i < (4 - componentCount); i++)
inputFetchDefinition += ", 0";
inputFetchDefinition += ");\n";
@ -4014,7 +4038,10 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
src->add(vertexBuffers.c_str());
src->add("\n");
src->add(inputFetchDefinition.c_str());
}
if (usesGeometryShader)
{
functionType = "[[object, max_total_threads_per_threadgroup(VERTICES_PER_VERTEX_PRIMITIVE), max_total_threadgroups_per_mesh_grid(1)]]";
outputTypeName = "void";
}
@ -4038,20 +4065,33 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
}
// start of main
src->addFmt("{} {} main0(", functionType, outputTypeName);
LatteDecompiler::emitInputs(shaderContext, isRectVertexShader);
LatteDecompiler::emitInputs(shaderContext, isRectVertexShader, fetchVertexManually);
src->add(") {" _CRLF);
if ((shaderContext->options->usesGeometryShader || isRectVertexShader) && (shader->shaderType == LatteConst::ShaderType::Vertex || shader->shaderType == LatteConst::ShaderType::Geometry))
if (fetchVertexManually && (shader->shaderType == LatteConst::ShaderType::Vertex || shader->shaderType == LatteConst::ShaderType::Geometry))
{
if (shader->shaderType == LatteConst::ShaderType::Vertex)
{
// Calculate the imaginary vertex id
src->add("uint vid = tig * VERTICES_PER_VERTEX_PRIMITIVE + tid;" _CRLF);
src->add("uint iid = vid / verticesPerInstance;" _CRLF);
src->add("vid %= verticesPerInstance;" _CRLF);
// Fetch the input
src->add("VertexIn in = fetchVertex(vid, indexBuffer, indexType VERTEX_BUFFERS);" _CRLF);
// Output is defined as object payload
src->add("object_data VertexOut& out = objectPayload.vertexOut[tid];" _CRLF);
if (usesGeometryShader)
{
// Calculate the imaginary vertex id
src->add("uint vid = tig * VERTICES_PER_VERTEX_PRIMITIVE + tid;" _CRLF);
src->add("uint iid = vid / verticesPerInstance;" _CRLF);
src->add("vid %= verticesPerInstance;" _CRLF);
// Fetch the input
src->add("VertexIn in = fetchVertex(vid, iid, indexBuffer, indexType VERTEX_BUFFERS);" _CRLF);
// Output is defined as object payload
src->add("object_data VertexOut& out = objectPayload.vertexOut[tid];" _CRLF);
}
else
{
// Fetch the input
src->add("VertexIn in = fetchVertex(vid, iid VERTEX_BUFFERS);" _CRLF);
if (rasterizationEnabled)
src->add("VertexOut out;" _CRLF);
}
}
else if (shader->shaderType == LatteConst::ShaderType::Geometry)
{
@ -4258,11 +4298,11 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
}
// TODO: is the if statement even needed?
if (shaderContext->options->usesGeometryShader || isRectVertexShader)
if (usesGeometryShader)
{
// import from geometry shader
if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT)
src->addFmt("{} = as_type<int4>(in.passParameterSem{});" _CRLF, _getRegisterVarName(shaderContext, gprIndex), psInputSemanticId & 0x7F);
src->addFmt("{} = bitCast<int>(in.passParameterSem{});" _CRLF, _getRegisterVarName(shaderContext, gprIndex), psInputSemanticId & 0x7F);
else if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT)
src->addFmt("{} = in.passParameterSem{};" _CRLF, _getRegisterVarName(shaderContext, gprIndex), psInputSemanticId & 0x7F);
else
@ -4306,7 +4346,7 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
src->add("out.pointSize = supportBuffer.pointSize;" _CRLF);
}
if ((shaderContext->options->usesGeometryShader || isRectVertexShader) && (shader->shaderType == LatteConst::ShaderType::Vertex || shader->shaderType == LatteConst::ShaderType::Geometry))
if (usesGeometryShader && (shader->shaderType == LatteConst::ShaderType::Vertex || shader->shaderType == LatteConst::ShaderType::Geometry))
{
if (shader->shaderType == LatteConst::ShaderType::Vertex)
{
@ -4346,7 +4386,7 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
src->add("out.position.z = (out.position.z + out.position.w) / 2.0;" _CRLF);
// Return
if (!(shaderContext->options->usesGeometryShader || isRectVertexShader) || shader->shaderType == LatteConst::ShaderType::Pixel)
if (!usesGeometryShader || shader->shaderType == LatteConst::ShaderType::Pixel)
src->add("return out;" _CRLF);
}

View File

@ -143,7 +143,7 @@ namespace LatteDecompiler
}
}
static void _emitAttributes(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader)
static void _emitAttributes(LatteDecompilerShaderContext* decompilerContext, bool fetchVertexManually)
{
auto src = decompilerContext->shaderSource;
std::string attributeNames;
@ -159,7 +159,7 @@ namespace LatteDecompiler
cemu_assert_debug(decompilerContext->output->resourceMappingMTL.attributeMapping[i] >= 0);
src->addFmt("uint4 attrDataSem{}", i);
if (decompilerContext->options->usesGeometryShader || isRectVertexShader)
if (fetchVertexManually)
attributeNames += "#define ATTRIBUTE_NAME" + std::to_string((sint32)decompilerContext->output->resourceMappingMTL.attributeMapping[i]) + " attrDataSem" + std::to_string(i) + "\n";
else
src->addFmt(" [[attribute({})]]", (sint32)decompilerContext->output->resourceMappingMTL.attributeMapping[i]);
@ -250,13 +250,13 @@ namespace LatteDecompiler
src->add("};" _CRLF _CRLF);
}
static void _emitInputsAndOutputs(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader, bool rasterizationEnabled)
static void _emitInputsAndOutputs(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader, bool fetchVertexManually, bool rasterizationEnabled)
{
auto src = decompilerContext->shaderSource;
if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex)
{
_emitAttributes(decompilerContext, isRectVertexShader);
_emitAttributes(decompilerContext, fetchVertexManually);
}
else if (decompilerContext->shaderType == LatteConst::ShaderType::Pixel)
{
@ -339,13 +339,12 @@ namespace LatteDecompiler
}
}
static void emitHeader(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader, bool rasterizationEnabled)
static void emitHeader(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader, bool fetchVertexManually, bool rasterizationEnabled)
{
auto src = decompilerContext->shaderSource;
if ((decompilerContext->options->usesGeometryShader || isRectVertexShader) && (decompilerContext->shaderType == LatteConst::ShaderType::Vertex || decompilerContext->shaderType == LatteConst::ShaderType::Geometry))
{
// TODO: make vsOutPrimType parth of the shader hash
LattePrimitiveMode vsOutPrimType = static_cast<LattePrimitiveMode>(decompilerContext->contextRegisters[mmVGT_PRIMITIVE_TYPE]);
uint32 gsOutPrimType = decompilerContext->contextRegisters[mmVGT_GS_OUT_PRIM_TYPE];
@ -398,7 +397,7 @@ namespace LatteDecompiler
// uniform buffers
_emitUniformBuffers(decompilerContext);
// inputs and outputs
_emitInputsAndOutputs(decompilerContext, isRectVertexShader, rasterizationEnabled);
_emitInputsAndOutputs(decompilerContext, isRectVertexShader, fetchVertexManually, rasterizationEnabled);
if (dump_shaders_enabled)
decompilerContext->shaderSource->add("// end of shader inputs/outputs" _CRLF);
@ -472,7 +471,7 @@ namespace LatteDecompiler
}
}
static void emitInputs(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader)
static void emitInputs(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader, bool fetchVertexManually)
{
auto src = decompilerContext->shaderSource;
@ -491,14 +490,18 @@ namespace LatteDecompiler
src->addFmt(", device uint* indexBuffer [[buffer({})]]", decompilerContext->output->resourceMappingMTL.indexBufferBinding);
// TODO: put into the support buffer?
src->addFmt(", constant uchar& indexType [[buffer({})]]", decompilerContext->output->resourceMappingMTL.indexTypeBinding);
src->add(" VERTEX_BUFFER_DEFINITIONS");
}
else
{
src->add("VertexIn in [[stage_in]]");
src->add(", uint vid [[vertex_id]]");
src->add("uint vid [[vertex_id]]");
src->add(", uint iid [[instance_id]]");
}
if (fetchVertexManually)
src->add(" VERTEX_BUFFER_DEFINITIONS");
else
src->add(", VertexIn in [[stage_in]]");
break;
case LatteConst::ShaderType::Geometry:
src->add("MeshType mesh");

View File

@ -2,9 +2,8 @@
#include "Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalVoidVertexPipeline.h"
#include "Common/precompiled.h"
#include "HW/Latte/Renderer/Metal/MetalRenderer.h"
#include "Metal/MTLResource.hpp"
/*
MetalVertexBufferCache::~MetalVertexBufferCache()
{
}
@ -28,13 +27,11 @@ MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(MTL::Bu
restrideInfo.allocation = m_bufferAllocator.GetBufferAllocation(newSize);
buffer = m_bufferAllocator.GetBuffer(restrideInfo.allocation.bufferIndex);
/*
uint8* oldPtr = (uint8*)bufferCache->contents() + vertexBufferRange.offset;
uint8* newPtr = (uint8*)buffer->contents() + restrideInfo.allocation.offset;
//uint8* oldPtr = (uint8*)bufferCache->contents() + vertexBufferRange.offset;
//uint8* newPtr = (uint8*)buffer->contents() + restrideInfo.allocation.offset;
for (size_t elem = 0; elem < vertexBufferRange.size / stride; elem++)
memcpy(newPtr + elem * newStride, oldPtr + elem * stride, stride);
*/
//for (size_t elem = 0; elem < vertexBufferRange.size / stride; elem++)
// memcpy(newPtr + elem * newStride, oldPtr + elem * stride, stride);
if (m_mtlr->GetEncoderType() == MetalEncoderType::Render)
{
@ -94,6 +91,7 @@ void MetalVertexBufferCache::MemoryRangeChanged(size_t offset, size_t size)
}
}
}
*/
MetalMemoryManager::~MetalMemoryManager()
{
@ -144,7 +142,7 @@ void MetalMemoryManager::UploadToBufferCache(const void* data, size_t offset, si
m_tempBufferAllocator.UnlockBuffer(allocation.bufferIndex);
// Notify vertex buffer cache about the change
m_vertexBufferCache.MemoryRangeChanged(offset, size);
//m_vertexBufferCache.MemoryRangeChanged(offset, size);
}
void MetalMemoryManager::CopyBufferCache(size_t srcOffset, size_t dstOffset, size_t size)

View File

@ -1,8 +1,8 @@
#pragma once
#include "Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h"
#include "Metal/MTLResource.hpp"
/*
struct MetalRestridedBufferRange
{
MTL::Buffer* buffer;
@ -54,18 +54,21 @@ private:
void MemoryRangeChanged(size_t offset, size_t size);
};
*/
class MetalMemoryManager
{
public:
MetalMemoryManager(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer}, m_bufferAllocator(metalRenderer, m_mtlr->GetOptimalBufferStorageMode()), m_framePersistentBufferAllocator(metalRenderer, MTL::ResourceStorageModePrivate), m_tempBufferAllocator(metalRenderer), m_vertexBufferCache(metalRenderer, m_framePersistentBufferAllocator) {}
MetalMemoryManager(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer}, m_bufferAllocator(metalRenderer, m_mtlr->GetOptimalBufferStorageMode()), m_framePersistentBufferAllocator(metalRenderer, MTL::ResourceStorageModePrivate), m_tempBufferAllocator(metalRenderer)/*, m_vertexBufferCache(metalRenderer, m_framePersistentBufferAllocator)*/ {}
~MetalMemoryManager();
// Pipelines
/*
void SetRestrideBufferPipeline(class MetalVoidVertexPipeline* restrideBufferPipeline)
{
m_vertexBufferCache.SetRestrideBufferPipeline(restrideBufferPipeline);
}
*/
MetalDefaultBufferAllocator& GetBufferAllocator()
{
@ -95,6 +98,7 @@ public:
void CopyBufferCache(size_t srcOffset, size_t dstOffset, size_t size);
// Vertex buffer cache
/*
void TrackVertexBuffer(uint32 bufferIndex, size_t offset, size_t size, MetalRestrideInfo* restrideInfo)
{
m_vertexBufferCache.TrackVertexBuffer(bufferIndex, offset, size, restrideInfo);
@ -109,6 +113,7 @@ public:
{
return m_vertexBufferCache.RestrideBufferIfNeeded(m_bufferCache, bufferIndex, stride, barrierBuffers);
}
*/
private:
class MetalRenderer* m_mtlr;
@ -118,7 +123,7 @@ private:
MetalDefaultBufferAllocator m_bufferAllocator;
MetalDefaultBufferAllocator m_framePersistentBufferAllocator;
MetalTemporaryBufferAllocator m_tempBufferAllocator;
MetalVertexBufferCache m_vertexBufferCache;
//MetalVertexBufferCache m_vertexBufferCache;
MTL::Buffer* m_bufferCache = nullptr;
};

View File

@ -8,7 +8,8 @@ public:
// Per frame data
uint32 m_renderPasses = 0;
uint32 m_clears = 0;
uint32 m_vertexBufferRestrides = 0;
uint32 m_manualVertexFetchDraws = 0;
uint32 m_meshDraws = 0;
uint32 m_triangleFans = 0;
MetalPerformanceMonitor() = default;
@ -18,7 +19,8 @@ public:
{
m_renderPasses = 0;
m_clears = 0;
m_vertexBufferRestrides = 0;
m_manualVertexFetchDraws = 0;
m_meshDraws = 0;
m_triangleFans = 0;
}
};

View File

@ -326,76 +326,81 @@ MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const Latte
if (pipeline)
return pipeline;
// Vertex descriptor
MTL::VertexDescriptor* vertexDescriptor = MTL::VertexDescriptor::alloc()->init();
for (auto& bufferGroup : fetchShader->bufferGroups)
{
std::optional<LatteConst::VertexFetchType2> fetchType;
uint32 minBufferStride = 0;
for (sint32 j = 0; j < bufferGroup.attribCount; ++j)
{
auto& attr = bufferGroup.attrib[j];
uint32 semanticId = vertexShader->resourceMapping.attributeMapping[attr.semanticId];
if (semanticId == (uint32)-1)
continue; // attribute not used?
auto attribute = vertexDescriptor->attributes()->object(semanticId);
attribute->setOffset(attr.offset);
attribute->setBufferIndex(GET_MTL_VERTEX_BUFFER_INDEX(attr.attributeBufferIndex));
attribute->setFormat(GetMtlVertexFormat(attr.format));
minBufferStride = std::max(minBufferStride, attr.offset + GetMtlVertexFormatSize(attr.format));
if (fetchType.has_value())
cemu_assert_debug(fetchType == attr.fetchType);
else
fetchType = attr.fetchType;
if (attr.fetchType == LatteConst::INSTANCE_DATA)
{
cemu_assert_debug(attr.aluDivisor == 1); // other divisor not yet supported
}
}
uint32 bufferIndex = bufferGroup.attributeBufferIndex;
uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7;
uint32 bufferStride = (lcr.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;
auto layout = vertexDescriptor->layouts()->object(GET_MTL_VERTEX_BUFFER_INDEX(bufferIndex));
if (bufferStride == 0)
{
// Buffer stride cannot be zero, let's use the minimum stride
bufferStride = minBufferStride;
// Additionally, constant vertex function must be used
layout->setStepFunction(MTL::VertexStepFunctionConstant);
layout->setStepRate(0);
}
else
{
if (!fetchType.has_value() || fetchType == LatteConst::VertexFetchType2::VERTEX_DATA)
layout->setStepFunction(MTL::VertexStepFunctionPerVertex);
else if (fetchType == LatteConst::VertexFetchType2::INSTANCE_DATA)
layout->setStepFunction(MTL::VertexStepFunctionPerInstance);
else
{
debug_printf("unimplemented vertex fetch type %u\n", (uint32)fetchType.value());
cemu_assert(false);
}
}
bufferStride = Align(bufferStride, 4);
layout->setStride(bufferStride);
}
auto vertexShaderMtl = static_cast<RendererShaderMtl*>(vertexShader->shader);
// Render pipeline state
MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init();
desc->setVertexFunction(vertexShaderMtl->GetFunction());
// TODO: don't always set the vertex descriptor?
desc->setVertexDescriptor(vertexDescriptor);
// Vertex descriptor
if (!fetchShader->mtlFetchVertexManually)
{
MTL::VertexDescriptor* vertexDescriptor = MTL::VertexDescriptor::alloc()->init();
for (auto& bufferGroup : fetchShader->bufferGroups)
{
std::optional<LatteConst::VertexFetchType2> fetchType;
uint32 minBufferStride = 0;
for (sint32 j = 0; j < bufferGroup.attribCount; ++j)
{
auto& attr = bufferGroup.attrib[j];
uint32 semanticId = vertexShader->resourceMapping.attributeMapping[attr.semanticId];
if (semanticId == (uint32)-1)
continue; // attribute not used?
auto attribute = vertexDescriptor->attributes()->object(semanticId);
attribute->setOffset(attr.offset);
attribute->setBufferIndex(GET_MTL_VERTEX_BUFFER_INDEX(attr.attributeBufferIndex));
attribute->setFormat(GetMtlVertexFormat(attr.format));
minBufferStride = std::max(minBufferStride, attr.offset + GetMtlVertexFormatSize(attr.format));
if (fetchType.has_value())
cemu_assert_debug(fetchType == attr.fetchType);
else
fetchType = attr.fetchType;
if (attr.fetchType == LatteConst::INSTANCE_DATA)
{
cemu_assert_debug(attr.aluDivisor == 1); // other divisor not yet supported
}
}
uint32 bufferIndex = bufferGroup.attributeBufferIndex;
uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7;
uint32 bufferStride = (lcr.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;
auto layout = vertexDescriptor->layouts()->object(GET_MTL_VERTEX_BUFFER_INDEX(bufferIndex));
if (bufferStride == 0)
{
// Buffer stride cannot be zero, let's use the minimum stride
bufferStride = minBufferStride;
// Additionally, constant vertex function must be used
layout->setStepFunction(MTL::VertexStepFunctionConstant);
layout->setStepRate(0);
}
else
{
if (!fetchType.has_value() || fetchType == LatteConst::VertexFetchType2::VERTEX_DATA)
layout->setStepFunction(MTL::VertexStepFunctionPerVertex);
else if (fetchType == LatteConst::VertexFetchType2::INSTANCE_DATA)
layout->setStepFunction(MTL::VertexStepFunctionPerInstance);
else
{
debug_printf("unimplemented vertex fetch type %u\n", (uint32)fetchType.value());
cemu_assert(false);
}
}
bufferStride = Align(bufferStride, 4);
layout->setStride(bufferStride);
}
// TODO: don't always set the vertex descriptor?
desc->setVertexDescriptor(vertexDescriptor);
vertexDescriptor->release();
}
SetFragmentState(desc, lastUsedFBO, activeFBO, pixelShader, lcr);
@ -448,7 +453,6 @@ MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const Latte
}
}
desc->release();
vertexDescriptor->release();
return pipeline;
}

View File

@ -18,11 +18,11 @@
#include "Cafe/HW/Latte/Core/LatteIndices.h"
#include "Cemu/Logging/CemuDebugLogging.h"
#include "Cemu/Logging/CemuLogging.h"
#include "HW/Latte/Core/LatteConst.h"
#include "HW/Latte/Renderer/Metal/MetalCommon.h"
#include "HW/Latte/Renderer/Metal/MetalLayerHandle.h"
#include "HW/Latte/Renderer/Renderer.h"
#include "Metal/MTLRenderPipeline.hpp"
#include "Cafe/HW/Latte/Core/FetchShader.h"
#include "Cafe/HW/Latte/Core/LatteConst.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalLayerHandle.h"
#include "Cafe/HW/Latte/Renderer/Renderer.h"
#include "config/CemuConfig.h"
#define IMGUI_IMPL_METAL_CPP
@ -163,10 +163,10 @@ MetalRenderer::MetalRenderer()
if (m_isAppleGPU)
m_copyBufferToBufferPipeline = new MetalVoidVertexPipeline(this, utilityLibrary, "vertexCopyBufferToBuffer");
//m_copyTextureToTexturePipeline = new MetalVoidVertexPipeline(this, utilityLibrary, "vertexCopyTextureToTexture");
m_restrideBufferPipeline = new MetalVoidVertexPipeline(this, utilityLibrary, "vertexRestrideBuffer");
//m_restrideBufferPipeline = new MetalVoidVertexPipeline(this, utilityLibrary, "vertexRestrideBuffer");
utilityLibrary->release();
m_memoryManager->SetRestrideBufferPipeline(m_restrideBufferPipeline);
//m_memoryManager->SetRestrideBufferPipeline(m_restrideBufferPipeline);
}
MetalRenderer::~MetalRenderer()
@ -174,7 +174,7 @@ MetalRenderer::~MetalRenderer()
if (m_isAppleGPU)
delete m_copyBufferToBufferPipeline;
//delete m_copyTextureToTexturePipeline;
delete m_restrideBufferPipeline;
//delete m_restrideBufferPipeline;
//m_presentPipelineLinear->release();
//m_presentPipelineSRGB->release();
@ -475,20 +475,20 @@ void MetalRenderer::DeleteFontTextures()
void MetalRenderer::AppendOverlayDebugInfo()
{
ImGui::Text("--- GPU info ---");
ImGui::Text("Is Apple GPU %s", (m_isAppleGPU ? "yes" : "no"));
ImGui::Text("Has unified memory %s", (m_hasUnifiedMemory ? "yes" : "no"));
ImGui::Text("Supports Metal3 %s", (m_supportsMetal3 ? "yes" : "no"));
ImGui::Text("Is Apple GPU %s", (m_isAppleGPU ? "yes" : "no"));
ImGui::Text("Has unified memory %s", (m_hasUnifiedMemory ? "yes" : "no"));
ImGui::Text("Supports Metal3 %s", (m_supportsMetal3 ? "yes" : "no"));
ImGui::Text("--- Metal info ---");
ImGui::Text("Render pipeline states %zu", m_pipelineCache->GetPipelineCacheSize());
ImGui::Text("Buffer allocator memory %zuMB", m_performanceMonitor.m_bufferAllocatorMemory / 1024 / 1024);
ImGui::Text("Render pipeline states %zu", m_pipelineCache->GetPipelineCacheSize());
ImGui::Text("Buffer allocator memory %zuMB", m_performanceMonitor.m_bufferAllocatorMemory / 1024 / 1024);
ImGui::Text("--- Metal info (per frame) ---");
ImGui::Text("Command buffers %zu", m_commandBuffers.size());
ImGui::Text("Render passes %u", m_performanceMonitor.m_renderPasses);
ImGui::Text("Clears %u", m_performanceMonitor.m_clears);
ImGui::Text("Vertex buffer restrides %u", m_performanceMonitor.m_vertexBufferRestrides);
ImGui::Text("Triangle fans %u", m_performanceMonitor.m_triangleFans);
ImGui::Text("Command buffers %zu", m_commandBuffers.size());
ImGui::Text("Render passes %u", m_performanceMonitor.m_renderPasses);
ImGui::Text("Clears %u", m_performanceMonitor.m_clears);
ImGui::Text("Manual vertex fetch draws %u (mesh draws: %u)", m_performanceMonitor.m_manualVertexFetchDraws, m_performanceMonitor.m_meshDraws);
ImGui::Text("Triangle fans %u", m_performanceMonitor.m_triangleFans);
}
// TODO: halfZ
@ -831,16 +831,16 @@ void MetalRenderer::buffer_bindVertexBuffer(uint32 bufferIndex, uint32 offset, u
if (buffer.offset == offset && buffer.size == size)
return;
if (buffer.offset != INVALID_OFFSET)
{
m_memoryManager->UntrackVertexBuffer(bufferIndex);
}
//if (buffer.offset != INVALID_OFFSET)
//{
// m_memoryManager->UntrackVertexBuffer(bufferIndex);
//}
buffer.offset = offset;
buffer.size = size;
buffer.restrideInfo = {};
//buffer.restrideInfo = {};
m_memoryManager->TrackVertexBuffer(bufferIndex, offset, size, &buffer.restrideInfo);
//m_memoryManager->TrackVertexBuffer(bufferIndex, offset, size, &buffer.restrideInfo);
}
void MetalRenderer::buffer_bindUniformBuffer(LatteConst::ShaderType shaderType, uint32 bufferIndex, uint32 offset, uint32 size)
@ -975,6 +975,7 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
bool isPrimitiveRect = (primitiveMode == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::RECTS);
bool usesGeometryShader = (geometryShader != nullptr || isPrimitiveRect);
bool fetchVertexManually = (usesGeometryShader || fetchShader->mtlFetchVertexManually);
// Index buffer
Renderer::INDEX_TYPE hostIndexType;
@ -1168,12 +1169,13 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
// Resources
// Vertex buffers
std::vector<MTL::Resource*> barrierBuffers;
//std::vector<MTL::Resource*> barrierBuffers;
for (uint8 i = 0; i < MAX_MTL_BUFFERS; i++)
{
auto& vertexBufferRange = m_state.m_vertexBuffers[i];
if (vertexBufferRange.offset != INVALID_OFFSET)
{
/*
MTL::Buffer* buffer;
size_t offset;
@ -1194,16 +1196,20 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
buffer = restridedBuffer.buffer;
offset = restridedBuffer.offset;
}
*/
MTL::Buffer* buffer = m_memoryManager->GetBufferCache();
size_t offset = m_state.m_vertexBuffers[i].offset;
// Bind
SetBuffer(renderCommandEncoder, GetMtlShaderType(vertexShader->shaderType, usesGeometryShader), buffer, offset, GET_MTL_VERTEX_BUFFER_INDEX(i));
}
}
if (!barrierBuffers.empty())
{
renderCommandEncoder->memoryBarrier(barrierBuffers.data(), barrierBuffers.size(), MTL::RenderStageVertex, MTL::RenderStageVertex);
}
//if (!barrierBuffers.empty())
//{
// renderCommandEncoder->memoryBarrier(barrierBuffers.data(), barrierBuffers.size(), MTL::RenderStageVertex, MTL::RenderStageVertex);
//}
// Render pipeline state
MTL::RenderPipelineState* renderPipelineState;
@ -1293,6 +1299,10 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
LatteStreamout_FinishDrawcall(false);
// Debug
if (fetchVertexManually)
m_performanceMonitor.m_manualVertexFetchDraws++;
if (usesGeometryShader)
m_performanceMonitor.m_meshDraws++;
if (primitiveMode == LattePrimitiveMode::TRIANGLE_FAN)
m_performanceMonitor.m_triangleFans++;

View File

@ -19,19 +19,21 @@ struct MetalBufferAllocation
}
};
/*
struct MetalRestrideInfo
{
bool memoryInvalidated = true;
size_t lastStride = 0;
MetalBufferAllocation allocation{};
};
*/
struct MetalBoundBuffer
{
size_t offset = INVALID_OFFSET;
size_t size = 0;
// Memory manager will write restride info to this variable
MetalRestrideInfo restrideInfo;
//MetalRestrideInfo restrideInfo;
};
enum MetalGeneralShaderType
@ -473,7 +475,7 @@ private:
// Hybrid pipelines
class MetalVoidVertexPipeline* m_copyBufferToBufferPipeline;
//class MetalVoidVertexPipeline* m_copyTextureToTexturePipeline;
class MetalVoidVertexPipeline* m_restrideBufferPipeline;
//class MetalVoidVertexPipeline* m_restrideBufferPipeline;
// Resources
MTL::SamplerState* m_nearestSampler;

View File

@ -20,7 +20,7 @@ RendererShaderMtl::RendererShaderMtl(MetalRenderer* mtlRenderer, ShaderType type
MTL::Library* library = m_mtlr->GetDevice()->newLibrary(ToNSString(mslCode), nullptr, &error);
if (error)
{
cemuLog_log(LogType::Force, "failed to create library: {}", error->localizedDescription()->utf8String());
cemuLog_log(LogType::Force, "failed to create library: {} -> {}", error->localizedDescription()->utf8String(), mslCode.c_str());
error->release();
return;
}