fetch vertices manually if needed

This commit is contained in:
Samuliak 2024-10-01 17:38:14 +02:00
parent a3bfde80b0
commit 94e8ed5a46
8 changed files with 215 additions and 149 deletions

View File

@ -8,8 +8,12 @@
#include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInstructions.h"
#include "Cafe/HW/Latte/Core/FetchShader.h"
#include "Cafe/HW/Latte/ISA/LatteInstructions.h"
#include "HW/Latte/Renderer/Renderer.h"
#include "util/containers/LookupTableL3.h"
#include "util/helpers/fspinlock.h"
#if BOOST_OS_MACOS
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
#endif
#include <openssl/sha.h> /* SHA1_DIGEST_LENGTH */
#include <openssl/evp.h> /* EVP_Digest */
@ -71,7 +75,7 @@ uint32 LatteShaderRecompiler_getAttributeAlignment(LatteParsedFetchShaderAttribu
return 4;
}
void LatteShader_calculateFSKey(LatteFetchShader* fetchShader)
void LatteShader_calculateFSKey(LatteFetchShader* fetchShader, uint32* contextRegister)
{
uint64 key = 0;
for (sint32 g = 0; g < fetchShader->bufferGroups.size(); g++)
@ -104,11 +108,25 @@ void LatteShader_calculateFSKey(LatteFetchShader* fetchShader)
key = std::rotl<uint64>(key, 8);
key += (uint64)attrib->semanticId;
key = std::rotl<uint64>(key, 8);
key += (uint64)(attrib->offset & 3);
key = std::rotl<uint64>(key, 2);
if (g_renderer->GetType() == RendererAPI::Metal)
key += (uint64)attrib->offset;
else
key += (uint64)(attrib->offset & 3);
key = std::rotl<uint64>(key, 7);
}
}
// todo - also hash invalid buffer groups?
if (g_renderer->GetType() == RendererAPI::Metal)
{
for (sint32 g = 0; g < fetchShader->bufferGroups.size(); g++)
{
LatteParsedFetchShaderBufferGroup_t& group = fetchShader->bufferGroups[g];
key += (uint64)group.attributeBufferIndex;
key = std::rotl<uint64>(key, 5);
}
}
fetchShader->key = key;
}
@ -146,8 +164,8 @@ void LatteFetchShader::CalculateFetchShaderVkHash()
this->vkPipelineHashFragment = h;
}
void LatteFetchShader::CalculateFetchShaderMtlObjectShaderHash(uint32* contextRegister)
{uint64 key = 0;
void LatteFetchShader::CheckIfVerticesNeedManualFetchMtl(uint32* contextRegister)
{
for (sint32 g = 0; g < bufferGroups.size(); g++)
{
LatteParsedFetchShaderBufferGroup_t& group = bufferGroups[g];
@ -155,12 +173,16 @@ void LatteFetchShader::CalculateFetchShaderMtlObjectShaderHash(uint32* contextRe
uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7;
uint32 bufferStride = (contextRegister[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;
key += (uint64)bufferIndex;
key = std::rotl<uint64>(key, 5);
key += (uint64)bufferStride;
key = std::rotl<uint64>(key, 5);
if (bufferStride % 4 != 0)
mtlFetchVertexManually = true;
for (sint32 f = 0; f < group.attribCount; f++)
{
auto& attr = group.attrib[f];
if (attr.offset + GetMtlVertexFormatSize(attr.format) > bufferStride)
mtlFetchVertexManually = true;
}
}
mtlShaderHashObject = key;
}
void _fetchShaderDecompiler_parseInstruction_VTX_SEMANTIC(LatteFetchShader* parsedFetchShader, uint32* contextRegister, const LatteClauseInstruction_VTX* instr)
@ -343,9 +365,9 @@ LatteFetchShader* LatteShaderRecompiler_createFetchShader(LatteFetchShader::Cach
{
// empty fetch shader, seen in Minecraft
// these only make sense when vertex shader does not call FS?
LatteShader_calculateFSKey(newFetchShader);
LatteShader_calculateFSKey(newFetchShader, contextRegister);
newFetchShader->CalculateFetchShaderVkHash();
newFetchShader->CalculateFetchShaderMtlObjectShaderHash(contextRegister);
newFetchShader->CheckIfVerticesNeedManualFetchMtl(contextRegister);
return newFetchShader;
}
@ -403,9 +425,9 @@ LatteFetchShader* LatteShaderRecompiler_createFetchShader(LatteFetchShader::Cach
}
bufferGroup.vboStride = vboOffset;
}
LatteShader_calculateFSKey(newFetchShader);
LatteShader_calculateFSKey(newFetchShader, contextRegister);
newFetchShader->CalculateFetchShaderVkHash();
newFetchShader->CalculateFetchShaderMtlObjectShaderHash(contextRegister);
newFetchShader->CheckIfVerticesNeedManualFetchMtl(contextRegister);
// register in cache
// its possible that during multi-threaded shader cache loading, two identical (same hash) fetch shaders get created simultaneously

View File

@ -47,16 +47,15 @@ struct LatteFetchShader
uint64 vkPipelineHashFragment{}; // hash of all fetch shader state that influences the Vulkan graphics pipeline
// Metal
uint64 mtlShaderHashObject{};
bool mtlFetchVertexManually{};
// cache info
CacheHash m_cacheHash{};
bool m_isRegistered{}; // if true, fetch shader is referenced by cache (RegisterInCache() succeeded)
void CalculateFetchShaderVkHash();
void CalculateFetchShaderMtlObjectShaderHash(uint32* contextRegister);
void CheckIfVerticesNeedManualFetchMtl(uint32* contextRegister);
uint64 getVkPipelineHashFragment() const { return vkPipelineHashFragment; };

View File

@ -503,11 +503,21 @@ void LatteSHRC_UpdateVSBaseHash(uint8* vertexShaderPtr, uint32 vertexShaderSize,
uint64 vsHash = vsHash1 + vsHash2 + _activeFetchShader->key + _activePSImportTable.key + (usesGeometryShader ? 0x1111ULL : 0ULL);
if (g_renderer->GetType() == RendererAPI::Metal)
{
if (usesGeometryShader)
if (usesGeometryShader || _activeFetchShader->mtlFetchVertexManually)
{
vsHash += _activeFetchShader->mtlShaderHashObject;
for (sint32 g = 0; g < _activeFetchShader->bufferGroups.size(); g++)
{
LatteParsedFetchShaderBufferGroup_t& group = _activeFetchShader->bufferGroups[g];
uint32 bufferIndex = group.attributeBufferIndex;
uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7;
uint32 bufferStride = (LatteGPUState.contextRegister[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;
vsHash += (uint64)bufferStride;
vsHash = std::rotl<uint64>(vsHash, 7);
}
}
else
if (!usesGeometryShader)
{
// Rasterization
bool rasterizationEnabled = !LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL();
@ -524,6 +534,10 @@ void LatteSHRC_UpdateVSBaseHash(uint8* vertexShaderPtr, uint32 vertexShaderSize,
if (rasterizationEnabled)
vsHash += 51ULL;
// Vertex fetch
if (_activeFetchShader->mtlFetchVertexManually)
vsHash += 349ULL;
}
}
@ -531,6 +545,7 @@ void LatteSHRC_UpdateVSBaseHash(uint8* vertexShaderPtr, uint32 vertexShaderSize,
vsHash += tmp;
auto primitiveType = LatteGPUState.contextNew.VGT_PRIMITIVE_TYPE.get_PRIMITIVE_MODE();
// TODO: include always in the hash in case of geometry shader or rect shader
if (primitiveType == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::RECTS)
{
vsHash += 13ULL;

View File

@ -3854,10 +3854,12 @@ static void LatteDecompiler_emitAttributeImport(LatteDecompilerShaderContext* sh
void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, LatteDecompilerShader* shader)
{
bool isRectVertexShader = (static_cast<LattePrimitiveMode>(shaderContext->contextRegisters[mmVGT_PRIMITIVE_TYPE]) == LattePrimitiveMode::RECTS);
bool usesGeometryShader = (shaderContext->options->usesGeometryShader || isRectVertexShader);
bool fetchVertexManually = (usesGeometryShader || (shaderContext->fetchShader && shaderContext->fetchShader->mtlFetchVertexManually));
// Rasterization
rasterizationEnabled = true;
if (shader->shaderType == LatteConst::ShaderType::Vertex && !(shaderContext->options->usesGeometryShader || isRectVertexShader))
if (shader->shaderType == LatteConst::ShaderType::Vertex && !usesGeometryShader)
{
rasterizationEnabled = !shaderContext->contextRegistersNew->PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL();
@ -3885,7 +3887,7 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
src->add("#include <metal_stdlib>" _CRLF);
src->add("using namespace metal;" _CRLF);
// header part (definitions for inputs and outputs)
LatteDecompiler::emitHeader(shaderContext, isRectVertexShader, rasterizationEnabled);
LatteDecompiler::emitHeader(shaderContext, isRectVertexShader, fetchVertexManually, rasterizationEnabled);
// helper functions
LatteDecompiler_emitHelperFunctions(shaderContext, src);
const char* functionType = "";
@ -3893,21 +3895,32 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
switch (shader->shaderType)
{
case LatteConst::ShaderType::Vertex:
if (shaderContext->options->usesGeometryShader || isRectVertexShader)
if (fetchVertexManually)
{
// TODO: clean this up
// fetchVertex will modify vid in case of an indexed draw
// fetchVertex will modify vid in case of an object shader and an indexed draw
// Vertex buffers
std::string vertexBufferDefinitions = "#define VERTEX_BUFFER_DEFINITIONS ";
std::string vertexBuffers = "#define VERTEX_BUFFERS ";
std::string inputFetchDefinition = "VertexIn fetchVertex(thread uint& vid, device uint* indexBuffer, uchar indexType VERTEX_BUFFER_DEFINITIONS) {\n";
std::string inputFetchDefinition = "VertexIn fetchVertex(";
if (usesGeometryShader)
inputFetchDefinition += "thread uint&";
else
inputFetchDefinition += "uint";
inputFetchDefinition += " vid, uint iid";
if (usesGeometryShader)
inputFetchDefinition += ", device uint* indexBuffer, uchar indexType";
inputFetchDefinition += " VERTEX_BUFFER_DEFINITIONS) {\n";
// Index buffer
inputFetchDefinition += "if (indexType == 1) // UShort\n";
inputFetchDefinition += "vid = ((device ushort*)indexBuffer)[vid];\n";
inputFetchDefinition += "else if (indexType == 2) // UInt\n";
inputFetchDefinition += "vid = ((device uint*)indexBuffer)[vid];\n";
if (usesGeometryShader)
{
inputFetchDefinition += "if (indexType == 1) // UShort\n";
inputFetchDefinition += "vid = ((device ushort*)indexBuffer)[vid];\n";
inputFetchDefinition += "else if (indexType == 2) // UInt\n";
inputFetchDefinition += "vid = ((device uint*)indexBuffer)[vid];\n";
}
inputFetchDefinition += "VertexIn in;\n";
for (auto& bufferGroup : shaderContext->fetchShader->bufferGroups)
@ -3980,11 +3993,22 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
break;
}
// Get the fetch type
std::string fetchTypeStr;
if (attr.fetchType == LatteConst::VertexFetchType2::VERTEX_DATA)
fetchTypeStr = "vid";
else if (attr.fetchType == LatteConst::VertexFetchType2::INSTANCE_DATA)
fetchTypeStr = "iid";
else if (attr.fetchType == LatteConst::VertexFetchType2::NO_INDEX_OFFSET_DATA)
fetchTypeStr = "0"; // TODO: correct?
// Fetch the attribute
inputFetchDefinition += fmt::format("in.ATTRIBUTE_NAME{} = ", semanticId);
inputFetchDefinition += fmt::format("uint4(*(device {}*)", formatName);
inputFetchDefinition += fmt::format("in.ATTRIBUTE_NAME{} = uint4(uint", semanticId);
if (componentCount != 1)
inputFetchDefinition += fmt::format("{}", componentCount);
inputFetchDefinition += fmt::format("(*(device {}*)", formatName);
inputFetchDefinition += fmt::format("(vertexBuffer{}", attr.attributeBufferIndex);
inputFetchDefinition += fmt::format(" + vid * {} + {})", bufferStride, attr.offset);
inputFetchDefinition += fmt::format(" + {} * {} + {}))", fetchTypeStr, bufferStride, attr.offset);
for (uint8 i = 0; i < (4 - componentCount); i++)
inputFetchDefinition += ", 0";
inputFetchDefinition += ");\n";
@ -4014,7 +4038,10 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
src->add(vertexBuffers.c_str());
src->add("\n");
src->add(inputFetchDefinition.c_str());
}
if (usesGeometryShader)
{
functionType = "[[object, max_total_threads_per_threadgroup(VERTICES_PER_VERTEX_PRIMITIVE), max_total_threadgroups_per_mesh_grid(1)]]";
outputTypeName = "void";
}
@ -4038,20 +4065,33 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
}
// start of main
src->addFmt("{} {} main0(", functionType, outputTypeName);
LatteDecompiler::emitInputs(shaderContext, isRectVertexShader);
LatteDecompiler::emitInputs(shaderContext, isRectVertexShader, fetchVertexManually);
src->add(") {" _CRLF);
if ((shaderContext->options->usesGeometryShader || isRectVertexShader) && (shader->shaderType == LatteConst::ShaderType::Vertex || shader->shaderType == LatteConst::ShaderType::Geometry))
if (fetchVertexManually && (shader->shaderType == LatteConst::ShaderType::Vertex || shader->shaderType == LatteConst::ShaderType::Geometry))
{
if (shader->shaderType == LatteConst::ShaderType::Vertex)
{
// Calculate the imaginary vertex id
src->add("uint vid = tig * VERTICES_PER_VERTEX_PRIMITIVE + tid;" _CRLF);
src->add("uint iid = vid / verticesPerInstance;" _CRLF);
src->add("vid %= verticesPerInstance;" _CRLF);
// Fetch the input
src->add("VertexIn in = fetchVertex(vid, indexBuffer, indexType VERTEX_BUFFERS);" _CRLF);
// Output is defined as object payload
src->add("object_data VertexOut& out = objectPayload.vertexOut[tid];" _CRLF);
if (usesGeometryShader)
{
// Calculate the imaginary vertex id
src->add("uint vid = tig * VERTICES_PER_VERTEX_PRIMITIVE + tid;" _CRLF);
src->add("uint iid = vid / verticesPerInstance;" _CRLF);
src->add("vid %= verticesPerInstance;" _CRLF);
// Fetch the input
src->add("VertexIn in = fetchVertex(vid, iid, indexBuffer, indexType VERTEX_BUFFERS);" _CRLF);
// Output is defined as object payload
src->add("object_data VertexOut& out = objectPayload.vertexOut[tid];" _CRLF);
}
else
{
// Fetch the input
src->add("VertexIn in = fetchVertex(vid, iid VERTEX_BUFFERS);" _CRLF);
if (rasterizationEnabled)
src->add("VertexOut out;" _CRLF);
}
}
else if (shader->shaderType == LatteConst::ShaderType::Geometry)
{
@ -4258,11 +4298,11 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
}
// TODO: is the if statement even needed?
if (shaderContext->options->usesGeometryShader || isRectVertexShader)
if (usesGeometryShader)
{
// import from geometry shader
if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT)
src->addFmt("{} = as_type<int4>(in.passParameterSem{});" _CRLF, _getRegisterVarName(shaderContext, gprIndex), psInputSemanticId & 0x7F);
src->addFmt("{} = bitCast<int>(in.passParameterSem{});" _CRLF, _getRegisterVarName(shaderContext, gprIndex), psInputSemanticId & 0x7F);
else if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT)
src->addFmt("{} = in.passParameterSem{};" _CRLF, _getRegisterVarName(shaderContext, gprIndex), psInputSemanticId & 0x7F);
else
@ -4306,7 +4346,7 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
src->add("out.pointSize = supportBuffer.pointSize;" _CRLF);
}
if ((shaderContext->options->usesGeometryShader || isRectVertexShader) && (shader->shaderType == LatteConst::ShaderType::Vertex || shader->shaderType == LatteConst::ShaderType::Geometry))
if (usesGeometryShader && (shader->shaderType == LatteConst::ShaderType::Vertex || shader->shaderType == LatteConst::ShaderType::Geometry))
{
if (shader->shaderType == LatteConst::ShaderType::Vertex)
{
@ -4346,7 +4386,7 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
src->add("out.position.z = (out.position.z + out.position.w) / 2.0;" _CRLF);
// Return
if (!(shaderContext->options->usesGeometryShader || isRectVertexShader) || shader->shaderType == LatteConst::ShaderType::Pixel)
if (!usesGeometryShader || shader->shaderType == LatteConst::ShaderType::Pixel)
src->add("return out;" _CRLF);
}

View File

@ -143,7 +143,7 @@ namespace LatteDecompiler
}
}
static void _emitAttributes(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader)
static void _emitAttributes(LatteDecompilerShaderContext* decompilerContext, bool fetchVertexManually)
{
auto src = decompilerContext->shaderSource;
std::string attributeNames;
@ -159,7 +159,7 @@ namespace LatteDecompiler
cemu_assert_debug(decompilerContext->output->resourceMappingMTL.attributeMapping[i] >= 0);
src->addFmt("uint4 attrDataSem{}", i);
if (decompilerContext->options->usesGeometryShader || isRectVertexShader)
if (fetchVertexManually)
attributeNames += "#define ATTRIBUTE_NAME" + std::to_string((sint32)decompilerContext->output->resourceMappingMTL.attributeMapping[i]) + " attrDataSem" + std::to_string(i) + "\n";
else
src->addFmt(" [[attribute({})]]", (sint32)decompilerContext->output->resourceMappingMTL.attributeMapping[i]);
@ -250,13 +250,13 @@ namespace LatteDecompiler
src->add("};" _CRLF _CRLF);
}
static void _emitInputsAndOutputs(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader, bool rasterizationEnabled)
static void _emitInputsAndOutputs(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader, bool fetchVertexManually, bool rasterizationEnabled)
{
auto src = decompilerContext->shaderSource;
if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex)
{
_emitAttributes(decompilerContext, isRectVertexShader);
_emitAttributes(decompilerContext, fetchVertexManually);
}
else if (decompilerContext->shaderType == LatteConst::ShaderType::Pixel)
{
@ -339,13 +339,12 @@ namespace LatteDecompiler
}
}
static void emitHeader(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader, bool rasterizationEnabled)
static void emitHeader(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader, bool fetchVertexManually, bool rasterizationEnabled)
{
auto src = decompilerContext->shaderSource;
if ((decompilerContext->options->usesGeometryShader || isRectVertexShader) && (decompilerContext->shaderType == LatteConst::ShaderType::Vertex || decompilerContext->shaderType == LatteConst::ShaderType::Geometry))
{
// TODO: make vsOutPrimType parth of the shader hash
LattePrimitiveMode vsOutPrimType = static_cast<LattePrimitiveMode>(decompilerContext->contextRegisters[mmVGT_PRIMITIVE_TYPE]);
uint32 gsOutPrimType = decompilerContext->contextRegisters[mmVGT_GS_OUT_PRIM_TYPE];
@ -398,7 +397,7 @@ namespace LatteDecompiler
// uniform buffers
_emitUniformBuffers(decompilerContext);
// inputs and outputs
_emitInputsAndOutputs(decompilerContext, isRectVertexShader, rasterizationEnabled);
_emitInputsAndOutputs(decompilerContext, isRectVertexShader, fetchVertexManually, rasterizationEnabled);
if (dump_shaders_enabled)
decompilerContext->shaderSource->add("// end of shader inputs/outputs" _CRLF);
@ -472,7 +471,7 @@ namespace LatteDecompiler
}
}
static void emitInputs(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader)
static void emitInputs(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader, bool fetchVertexManually)
{
auto src = decompilerContext->shaderSource;
@ -491,14 +490,18 @@ namespace LatteDecompiler
src->addFmt(", device uint* indexBuffer [[buffer({})]]", decompilerContext->output->resourceMappingMTL.indexBufferBinding);
// TODO: put into the support buffer?
src->addFmt(", constant uchar& indexType [[buffer({})]]", decompilerContext->output->resourceMappingMTL.indexTypeBinding);
src->add(" VERTEX_BUFFER_DEFINITIONS");
}
else
{
src->add("VertexIn in [[stage_in]]");
src->add(", uint vid [[vertex_id]]");
src->add("uint vid [[vertex_id]]");
src->add(", uint iid [[instance_id]]");
}
if (fetchVertexManually)
src->add(" VERTEX_BUFFER_DEFINITIONS");
else
src->add(", VertexIn in [[stage_in]]");
break;
case LatteConst::ShaderType::Geometry:
src->add("MeshType mesh");

View File

@ -326,76 +326,81 @@ MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const Latte
if (pipeline)
return pipeline;
// Vertex descriptor
MTL::VertexDescriptor* vertexDescriptor = MTL::VertexDescriptor::alloc()->init();
for (auto& bufferGroup : fetchShader->bufferGroups)
{
std::optional<LatteConst::VertexFetchType2> fetchType;
uint32 minBufferStride = 0;
for (sint32 j = 0; j < bufferGroup.attribCount; ++j)
{
auto& attr = bufferGroup.attrib[j];
uint32 semanticId = vertexShader->resourceMapping.attributeMapping[attr.semanticId];
if (semanticId == (uint32)-1)
continue; // attribute not used?
auto attribute = vertexDescriptor->attributes()->object(semanticId);
attribute->setOffset(attr.offset);
attribute->setBufferIndex(GET_MTL_VERTEX_BUFFER_INDEX(attr.attributeBufferIndex));
attribute->setFormat(GetMtlVertexFormat(attr.format));
minBufferStride = std::max(minBufferStride, attr.offset + GetMtlVertexFormatSize(attr.format));
if (fetchType.has_value())
cemu_assert_debug(fetchType == attr.fetchType);
else
fetchType = attr.fetchType;
if (attr.fetchType == LatteConst::INSTANCE_DATA)
{
cemu_assert_debug(attr.aluDivisor == 1); // other divisor not yet supported
}
}
uint32 bufferIndex = bufferGroup.attributeBufferIndex;
uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7;
uint32 bufferStride = (lcr.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;
auto layout = vertexDescriptor->layouts()->object(GET_MTL_VERTEX_BUFFER_INDEX(bufferIndex));
if (bufferStride == 0)
{
// Buffer stride cannot be zero, let's use the minimum stride
bufferStride = minBufferStride;
// Additionally, constant vertex function must be used
layout->setStepFunction(MTL::VertexStepFunctionConstant);
layout->setStepRate(0);
}
else
{
if (!fetchType.has_value() || fetchType == LatteConst::VertexFetchType2::VERTEX_DATA)
layout->setStepFunction(MTL::VertexStepFunctionPerVertex);
else if (fetchType == LatteConst::VertexFetchType2::INSTANCE_DATA)
layout->setStepFunction(MTL::VertexStepFunctionPerInstance);
else
{
debug_printf("unimplemented vertex fetch type %u\n", (uint32)fetchType.value());
cemu_assert(false);
}
}
bufferStride = Align(bufferStride, 4);
layout->setStride(bufferStride);
}
auto vertexShaderMtl = static_cast<RendererShaderMtl*>(vertexShader->shader);
// Render pipeline state
MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init();
desc->setVertexFunction(vertexShaderMtl->GetFunction());
// TODO: don't always set the vertex descriptor?
desc->setVertexDescriptor(vertexDescriptor);
// Vertex descriptor
if (!fetchShader->mtlFetchVertexManually)
{
MTL::VertexDescriptor* vertexDescriptor = MTL::VertexDescriptor::alloc()->init();
for (auto& bufferGroup : fetchShader->bufferGroups)
{
std::optional<LatteConst::VertexFetchType2> fetchType;
uint32 minBufferStride = 0;
for (sint32 j = 0; j < bufferGroup.attribCount; ++j)
{
auto& attr = bufferGroup.attrib[j];
uint32 semanticId = vertexShader->resourceMapping.attributeMapping[attr.semanticId];
if (semanticId == (uint32)-1)
continue; // attribute not used?
auto attribute = vertexDescriptor->attributes()->object(semanticId);
attribute->setOffset(attr.offset);
attribute->setBufferIndex(GET_MTL_VERTEX_BUFFER_INDEX(attr.attributeBufferIndex));
attribute->setFormat(GetMtlVertexFormat(attr.format));
minBufferStride = std::max(minBufferStride, attr.offset + GetMtlVertexFormatSize(attr.format));
if (fetchType.has_value())
cemu_assert_debug(fetchType == attr.fetchType);
else
fetchType = attr.fetchType;
if (attr.fetchType == LatteConst::INSTANCE_DATA)
{
cemu_assert_debug(attr.aluDivisor == 1); // other divisor not yet supported
}
}
uint32 bufferIndex = bufferGroup.attributeBufferIndex;
uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7;
uint32 bufferStride = (lcr.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;
auto layout = vertexDescriptor->layouts()->object(GET_MTL_VERTEX_BUFFER_INDEX(bufferIndex));
if (bufferStride == 0)
{
// Buffer stride cannot be zero, let's use the minimum stride
bufferStride = minBufferStride;
// Additionally, constant vertex function must be used
layout->setStepFunction(MTL::VertexStepFunctionConstant);
layout->setStepRate(0);
}
else
{
if (!fetchType.has_value() || fetchType == LatteConst::VertexFetchType2::VERTEX_DATA)
layout->setStepFunction(MTL::VertexStepFunctionPerVertex);
else if (fetchType == LatteConst::VertexFetchType2::INSTANCE_DATA)
layout->setStepFunction(MTL::VertexStepFunctionPerInstance);
else
{
debug_printf("unimplemented vertex fetch type %u\n", (uint32)fetchType.value());
cemu_assert(false);
}
}
bufferStride = Align(bufferStride, 4);
layout->setStride(bufferStride);
}
// TODO: don't always set the vertex descriptor?
desc->setVertexDescriptor(vertexDescriptor);
vertexDescriptor->release();
}
SetFragmentState(desc, lastUsedFBO, activeFBO, pixelShader, lcr);
@ -448,7 +453,6 @@ MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const Latte
}
}
desc->release();
vertexDescriptor->release();
return pipeline;
}

View File

@ -18,11 +18,11 @@
#include "Cafe/HW/Latte/Core/LatteIndices.h"
#include "Cemu/Logging/CemuDebugLogging.h"
#include "Cemu/Logging/CemuLogging.h"
#include "HW/Latte/Core/LatteConst.h"
#include "HW/Latte/Renderer/Metal/MetalCommon.h"
#include "HW/Latte/Renderer/Metal/MetalLayerHandle.h"
#include "HW/Latte/Renderer/Renderer.h"
#include "Metal/MTLRenderPipeline.hpp"
#include "Cafe/HW/Latte/Core/FetchShader.h"
#include "Cafe/HW/Latte/Core/LatteConst.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalLayerHandle.h"
#include "Cafe/HW/Latte/Renderer/Renderer.h"
#include "config/CemuConfig.h"
#define IMGUI_IMPL_METAL_CPP
@ -975,6 +975,7 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
bool isPrimitiveRect = (primitiveMode == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::RECTS);
bool usesGeometryShader = (geometryShader != nullptr || isPrimitiveRect);
//bool fetchVertexManually = (usesGeometryShader || fetchShader->mtlFetchVertexManually);
// Index buffer
Renderer::INDEX_TYPE hostIndexType;
@ -1174,26 +1175,8 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
auto& vertexBufferRange = m_state.m_vertexBuffers[i];
if (vertexBufferRange.offset != INVALID_OFFSET)
{
MTL::Buffer* buffer;
size_t offset;
// Restride
if (usesGeometryShader)
{
// Object shaders don't need restriding, since the attributes are fetched in the shader
buffer = m_memoryManager->GetBufferCache();
offset = m_state.m_vertexBuffers[i].offset;
}
else
{
uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + i * 7;
uint32 bufferStride = (LatteGPUState.contextNew.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;
auto restridedBuffer = m_memoryManager->RestrideBufferIfNeeded(i, bufferStride, barrierBuffers);
buffer = restridedBuffer.buffer;
offset = restridedBuffer.offset;
}
MTL::Buffer* buffer = m_memoryManager->GetBufferCache();
size_t offset = m_state.m_vertexBuffers[i].offset;
// Bind
SetBuffer(renderCommandEncoder, GetMtlShaderType(vertexShader->shaderType, usesGeometryShader), buffer, offset, GET_MTL_VERTEX_BUFFER_INDEX(i));

View File

@ -20,7 +20,7 @@ RendererShaderMtl::RendererShaderMtl(MetalRenderer* mtlRenderer, ShaderType type
MTL::Library* library = m_mtlr->GetDevice()->newLibrary(ToNSString(mslCode), nullptr, &error);
if (error)
{
cemuLog_log(LogType::Force, "failed to create library: {}", error->localizedDescription()->utf8String());
cemuLog_log(LogType::Force, "failed to create library: {} -> {}", error->localizedDescription()->utf8String(), mslCode.c_str());
error->release();
return;
}