don't jit compile vertex shaders

This commit is contained in:
Samuliak 2024-09-03 13:59:52 +02:00
parent c4eb195797
commit 953975f5ec
9 changed files with 146 additions and 162 deletions

View File

@ -8,6 +8,7 @@
#include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInstructions.h"
#include "Cafe/HW/Latte/Core/FetchShader.h"
#include "Cafe/HW/Latte/ISA/LatteInstructions.h"
#include "HW/Latte/Renderer/Renderer.h"
#include "util/containers/LookupTableL3.h"
#include "util/helpers/fspinlock.h"
#include <openssl/sha.h> /* SHA1_DIGEST_LENGTH */
@ -107,6 +108,14 @@ void LatteShader_calculateFSKey(LatteFetchShader* fetchShader)
key += (uint64)(attrib->offset & 3);
key = std::rotl<uint64>(key, 2);
}
// TODO: also check if geometry shader is used
if (g_renderer->GetType() == RendererAPI::Metal)
{
key += (uint64)group.attributeBufferIndex;
key = std::rotl<uint64>(key, 5);
// TODO: hash the stride as well
}
}
// todo - also hash invalid buffer groups?
fetchShader->key = key;
@ -161,7 +170,7 @@ void _fetchShaderDecompiler_parseInstruction_VTX_SEMANTIC(LatteFetchShader* pars
auto nfa = instr->getField_NUM_FORMAT_ALL();
bool isSigned = instr->getField_FORMAT_COMP_ALL() == LatteClauseInstruction_VTX::FORMAT_COMP::COMP_SIGNED;
auto endianSwap = instr->getField_ENDIAN_SWAP();
// get buffer
cemu_assert_debug(bufferId >= 0xA0 && bufferId < 0xB0);
uint32 bufferIndex = (bufferId - 0xA0);
@ -316,7 +325,7 @@ LatteFetchShader* LatteShaderRecompiler_createFetchShader(LatteFetchShader::Cach
// {0x00000002, 0x01800c00, 0x00000000, 0x8a000000, 0x2c00a001, 0x2c151000, 0x000a0000, ...} // size 0x50
// {0x00000002, 0x01801000, 0x00000000, 0x8a000000, 0x1c00a001, 0x280d1000, 0x00090000, ...} // size 0x60
// {0x00000002, 0x01801c00, 0x00000000, 0x8a000000, 0x1c00a001, 0x280d1000, 0x00090000, ...} // size 0x90
// our new implementation:
// {0x00000002, 0x01800400, 0x00000000, 0x8a000000, 0x0000a001, 0x2c151000, 0x00020000, ...}
@ -411,7 +420,7 @@ LatteFetchShader::~LatteFetchShader()
UnregisterInCache();
}
struct FetchShaderLookupInfo
struct FetchShaderLookupInfo
{
LatteFetchShader* fetchShader;
uint32 programSize;

View File

@ -65,6 +65,7 @@ struct LatteDecompilerShaderResourceMapping
sint8 attributeMapping[LATTE_NUM_MAX_ATTRIBUTE_LOCATIONS];
// Metal exclusive
sint8 indexBufferBinding{-1};
sint8 indexTypeBinding{-1};
sint32 getTextureCount()
{

View File

@ -1020,4 +1020,5 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD
LatteDecompiler::_initUniformBindingPoints(shaderContext);
LatteDecompiler::_initAttributeBindingPoints(shaderContext);
shaderContext->output->resourceMappingMTL.indexBufferBinding = shaderContext->currentBufferBindingPointMTL++;
shaderContext->output->resourceMappingMTL.indexTypeBinding = shaderContext->currentBufferBindingPointMTL++;
}

View File

@ -11,6 +11,7 @@
#include "Cafe/HW/Latte/Core/FetchShader.h"
#include "Cafe/HW/Latte/Renderer/Renderer.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
#include "config/ActiveSettings.h"
#include "util/helpers/StringBuf.h"
@ -3856,6 +3857,8 @@ static void LatteDecompiler_emitAttributeImport(LatteDecompilerShaderContext* sh
void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, LatteDecompilerShader* shader)
{
LatteShaderSHRC_UpdateFetchShader();
auto fetchShader = LatteSHRC_GetActiveFetchShader();
bool isRectVertexShader = (static_cast<LattePrimitiveMode>(shaderContext->contextRegisters[mmVGT_PRIMITIVE_TYPE]) == LattePrimitiveMode::RECTS);
StringBuf* src = new StringBuf(1024*1024*12); // reserve 12MB for generated source (we resize-to-fit at the end)
@ -3881,9 +3884,125 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
case LatteConst::ShaderType::Vertex:
if (shaderContext->options->usesGeometryShader || isRectVertexShader)
{
// Defined just-in-time
// Will also modify vid in case of an indexed draw
src->add("VertexIn fetchInput(thread uint& vid VERTEX_BUFFER_DEFINITIONS);" _CRLF);
// TODO: clean this up
// Will modify vid in case of an indexed draw
// Vertex buffers
std::string vertexBufferDefinitions = "#define VERTEX_BUFFER_DEFINITIONS ";
std::string vertexBuffers = "#define VERTEX_BUFFERS ";
std::string inputFetchDefinition = "VertexIn fetchInput(thread uint& vid, device uint* indexBuffer, uint indexType VERTEX_BUFFER_DEFINITIONS) {\n";
// Index buffer
inputFetchDefinition += "if (indexType == 1) // UShort\n";
inputFetchDefinition += "vid = ((device ushort*)indexBuffer)[vid];\n";
inputFetchDefinition += "else if (indexType == 2)\n";
inputFetchDefinition += "vid = ((device uint*)indexBuffer)[vid]; // UInt\n";
inputFetchDefinition += "VertexIn in;\n";
for (auto& bufferGroup : fetchShader->bufferGroups)
{
std::optional<LatteConst::VertexFetchType2> fetchType;
uint32 bufferIndex = bufferGroup.attributeBufferIndex;
uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7;
uint32 bufferStride = (shaderContext->contextRegisters[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;
for (sint32 j = 0; j < bufferGroup.attribCount; ++j)
{
auto& attr = bufferGroup.attrib[j];
uint32 semanticId = shaderContext->output->resourceMappingMTL.attributeMapping[attr.semanticId];
if (semanticId == (uint32)-1)
continue; // attribute not used?
std::string formatName;
uint8 componentCount = 0;
switch (GetMtlVertexFormat(attr.format))
{
case MTL::VertexFormatUChar:
formatName = "uchar";
componentCount = 1;
break;
case MTL::VertexFormatUChar2:
formatName = "uchar2";
componentCount = 2;
break;
case MTL::VertexFormatUChar3:
formatName = "uchar3";
componentCount = 3;
break;
case MTL::VertexFormatUChar4:
formatName = "uchar4";
componentCount = 4;
break;
case MTL::VertexFormatUShort:
formatName = "ushort";
componentCount = 1;
break;
case MTL::VertexFormatUShort2:
formatName = "ushort2";
componentCount = 2;
break;
case MTL::VertexFormatUShort3:
formatName = "ushort3";
componentCount = 3;
break;
case MTL::VertexFormatUShort4:
formatName = "ushort4";
componentCount = 4;
break;
case MTL::VertexFormatUInt:
formatName = "uint";
componentCount = 1;
break;
case MTL::VertexFormatUInt2:
formatName = "uint2";
componentCount = 2;
break;
case MTL::VertexFormatUInt3:
formatName = "uint3";
componentCount = 3;
break;
case MTL::VertexFormatUInt4:
formatName = "uint4";
componentCount = 4;
break;
}
// Fetch the attribute
inputFetchDefinition += fmt::format("in.ATTRIBUTE_NAME{} = ", semanticId);
inputFetchDefinition += fmt::format("uint4(*(device {}*)", formatName);
inputFetchDefinition += fmt::format("(vertexBuffer{}", attr.attributeBufferIndex);
inputFetchDefinition += fmt::format(" + vid * {} + {})", bufferStride, attr.offset);
for (uint8 i = 0; i < (4 - componentCount); i++)
inputFetchDefinition += ", 0";
inputFetchDefinition += ");\n";
if (fetchType.has_value())
cemu_assert_debug(fetchType == attr.fetchType);
else
fetchType = attr.fetchType;
if (attr.fetchType == LatteConst::INSTANCE_DATA)
{
cemu_assert_debug(attr.aluDivisor == 1); // other divisor not yet supported
}
}
// TODO: fetch type
vertexBufferDefinitions += fmt::format(", device uchar* vertexBuffer{} [[buffer({})]]", bufferIndex, GET_MTL_VERTEX_BUFFER_INDEX(bufferIndex));
vertexBuffers += fmt::format(", vertexBuffer{}", bufferIndex);
}
inputFetchDefinition += "return in;\n";
inputFetchDefinition += "}\n";
src->add(vertexBufferDefinitions.c_str());
src->add("\n");
src->add(vertexBuffers.c_str());
src->add("\n");
src->add(inputFetchDefinition.c_str());
functionType = "[[object, max_total_threads_per_threadgroup(VERTICES_PER_VERTEX_PRIMITIVE), max_total_threadgroups_per_mesh_grid(1)]]";
outputTypeName = "void";
@ -3916,7 +4035,7 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
// TODO: don't hardcode the instance index
src->add("uint iid = 0;" _CRLF);
// Fetch the input
src->add("VertexIn in = fetchInput(vid VERTEX_BUFFERS);" _CRLF);
src->add("VertexIn in = fetchInput(vid, indexBuffer, indexType VERTEX_BUFFERS);" _CRLF);
// Output is defined as object payload
src->add("object_data VertexOut& out = objectPayload.vertexOut[tid];" _CRLF);
}

View File

@ -495,6 +495,10 @@ namespace LatteDecompiler
src->add(", mesh_grid_properties meshGridProperties");
src->add(", uint tig [[threadgroup_position_in_grid]]");
src->add(", uint tid [[thread_index_in_threadgroup]]");
// TODO: inly include index buffer if needed
src->addFmt(", device uint* indexBuffer [[buffer({})]]", decompilerContext->output->resourceMappingMTL.indexBufferBinding);
// TODO: use uchar?
src->addFmt(", constant uint& indexType [[buffer({})]]", decompilerContext->output->resourceMappingMTL.indexTypeBinding);
src->add(" VERTEX_BUFFER_DEFINITIONS");
}
else

View File

@ -366,13 +366,6 @@ MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const Latte
auto mtlVertexShader = static_cast<RendererShaderMtl*>(vertexShader->shader);
auto mtlPixelShader = static_cast<RendererShaderMtl*>(pixelShader->shader);
mtlVertexShader->CompileVertexFunction();
// HACK
if (!mtlVertexShader->GetFunction())
{
debug_printf("no vertex function, skipping draw\n");
return nullptr;
}
mtlPixelShader->CompileFragmentFunction(lastUsedFBO);
// Render pipeline state
@ -475,7 +468,6 @@ MTL::RenderPipelineState* MetalPipelineCache::GetMeshPipelineState(const LatteFe
mtlMeshShader = rectsEmulationGS_generate(m_mtlr, vertexShader, lcr);
}
auto mtlPixelShader = static_cast<RendererShaderMtl*>(pixelShader->shader);
mtlObjectShader->CompileObjectFunction(lcr, fetchShader, vertexShader, hostIndexType);
mtlPixelShader->CompileFragmentFunction(lastUsedFBO);
// Render pipeline state

View File

@ -916,7 +916,7 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
LatteDecompilerShader* geometryShader = LatteSHRC_GetActiveGeometryShader();
LatteDecompilerShader* pixelShader = LatteSHRC_GetActivePixelShader();
// TODO: is this even needed? Also, should go to draw_beginSequence
if (!vertexShader)
if (!vertexShader || !static_cast<RendererShaderMtl*>(vertexShader->shader)->GetFunction())
{
printf("no vertex function, skipping draw\n");
return;
@ -1200,6 +1200,8 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
{
if (indexBuffer)
SetBuffer(renderCommandEncoder, METAL_SHADER_TYPE_OBJECT, indexBuffer, indexBufferOffset, vertexShader->resourceMapping.indexBufferBinding);
renderCommandEncoder->setObjectBytes(&hostIndexType, sizeof(hostIndexType), vertexShader->resourceMapping.indexTypeBinding);
encoderState.m_buffers[METAL_SHADER_TYPE_OBJECT][vertexShader->resourceMapping.indexTypeBinding] = {nullptr};
uint32 verticesPerPrimitive = 0;
switch (primitiveMode)

View File

@ -16,7 +16,8 @@ extern std::atomic_int g_compiled_shaders_async;
RendererShaderMtl::RendererShaderMtl(MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode)
: RendererShader(type, baseHash, auxHash, isGameShader, isGfxPackShader), m_mtlr{mtlRenderer}
{
if (type == ShaderType::kGeometry)
// TODO: don't compile fragment function just-in-time
if (type != ShaderType::kFragment)
{
Compile(mslCode);
}
@ -36,145 +37,6 @@ RendererShaderMtl::~RendererShaderMtl()
m_function->release();
}
void RendererShaderMtl::CompileObjectFunction(const LatteContextRegister& lcr, const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, Renderer::INDEX_TYPE hostIndexType)
{
cemu_assert_debug(m_type == ShaderType::kVertex);
std::string fullCode;
// Vertex buffers
std::string vertexBufferDefinitions = "#define VERTEX_BUFFER_DEFINITIONS ";
std::string vertexBuffers = "#define VERTEX_BUFFERS ";
std::string inputFetchDefinition = "VertexIn fetchInput(thread uint& vid VERTEX_BUFFER_DEFINITIONS) {\n";
// Index buffer
if (hostIndexType != Renderer::INDEX_TYPE::NONE)
{
vertexBufferDefinitions += ", device ";
switch (hostIndexType)
{
case Renderer::INDEX_TYPE::U16:
vertexBufferDefinitions += "ushort";
break;
case Renderer::INDEX_TYPE::U32:
vertexBufferDefinitions += "uint";
break;
default:
cemu_assert_suspicious();
break;
}
vertexBufferDefinitions += fmt::format("* indexBuffer [[buffer({})]]", vertexShader->resourceMapping.indexBufferBinding);
vertexBuffers += ", indexBuffer";
inputFetchDefinition += "vid = indexBuffer[vid];\n";
}
inputFetchDefinition += "VertexIn in;\n";
for (auto& bufferGroup : fetchShader->bufferGroups)
{
std::optional<LatteConst::VertexFetchType2> fetchType;
uint32 bufferIndex = bufferGroup.attributeBufferIndex;
uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7;
uint32 bufferStride = (lcr.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;
for (sint32 j = 0; j < bufferGroup.attribCount; ++j)
{
auto& attr = bufferGroup.attrib[j];
uint32 semanticId = vertexShader->resourceMapping.attributeMapping[attr.semanticId];
if (semanticId == (uint32)-1)
continue; // attribute not used?
std::string formatName;
uint8 componentCount = 0;
switch (GetMtlVertexFormat(attr.format))
{
case MTL::VertexFormatUChar:
formatName = "uchar";
componentCount = 1;
break;
case MTL::VertexFormatUChar2:
formatName = "uchar2";
componentCount = 2;
break;
case MTL::VertexFormatUChar3:
formatName = "uchar3";
componentCount = 3;
break;
case MTL::VertexFormatUChar4:
formatName = "uchar4";
componentCount = 4;
break;
case MTL::VertexFormatUShort:
formatName = "ushort";
componentCount = 1;
break;
case MTL::VertexFormatUShort2:
formatName = "ushort2";
componentCount = 2;
break;
case MTL::VertexFormatUShort3:
formatName = "ushort3";
componentCount = 3;
break;
case MTL::VertexFormatUShort4:
formatName = "ushort4";
componentCount = 4;
break;
case MTL::VertexFormatUInt:
formatName = "uint";
componentCount = 1;
break;
case MTL::VertexFormatUInt2:
formatName = "uint2";
componentCount = 2;
break;
case MTL::VertexFormatUInt3:
formatName = "uint3";
componentCount = 3;
break;
case MTL::VertexFormatUInt4:
formatName = "uint4";
componentCount = 4;
break;
}
// Fetch the attribute
inputFetchDefinition += fmt::format("in.ATTRIBUTE_NAME{} = ", semanticId);
inputFetchDefinition += fmt::format("uint4(*(device {}*)", formatName);
inputFetchDefinition += fmt::format("(vertexBuffer{}", attr.attributeBufferIndex);
inputFetchDefinition += fmt::format(" + vid * {} + {})", bufferStride, attr.offset);
for (uint8 i = 0; i < (4 - componentCount); i++)
inputFetchDefinition += ", 0";
inputFetchDefinition += ");\n";
if (fetchType.has_value())
cemu_assert_debug(fetchType == attr.fetchType);
else
fetchType = attr.fetchType;
if (attr.fetchType == LatteConst::INSTANCE_DATA)
{
cemu_assert_debug(attr.aluDivisor == 1); // other divisor not yet supported
}
}
vertexBufferDefinitions += fmt::format(", device uchar* vertexBuffer{} [[buffer({})]]", bufferIndex, GET_MTL_VERTEX_BUFFER_INDEX(bufferIndex));
vertexBuffers += fmt::format(", vertexBuffer{}", bufferIndex);
}
inputFetchDefinition += "return in;\n";
inputFetchDefinition += "}\n";
fullCode += vertexBufferDefinitions + "\n";
fullCode += vertexBuffers + "\n";
fullCode += m_mslCode;
fullCode += inputFetchDefinition;
Compile(fullCode);
}
void RendererShaderMtl::CompileFragmentFunction(CachedFBOMtl* activeFBO)
{
cemu_assert_debug(m_type == ShaderType::kFragment);

View File

@ -21,12 +21,6 @@ public:
RendererShaderMtl(class MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode);
virtual ~RendererShaderMtl();
void CompileVertexFunction()
{
Compile(m_mslCode);
}
void CompileObjectFunction(const LatteContextRegister& lcr, const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, Renderer::INDEX_TYPE hostIndexType);
void CompileFragmentFunction(CachedFBOMtl* activeFBO);
MTL::Function* GetFunction() const