patch object and mesh shaders

This commit is contained in:
Samuliak 2024-08-20 18:59:25 +02:00
parent 9679c6b7e8
commit 2f4ceb33e0
6 changed files with 208 additions and 26 deletions

View File

@ -3847,7 +3847,7 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
{
// Defined just-in-time
// Will also modify vid in case of an indexed draw
src->add("VertexIn fetchInput(VERTEX_BUFFER_DEFINITIONS, thread uint& vid);" _CRLF);
src->add("VertexIn fetchInput(thread uint& vid VERTEX_BUFFER_DEFINITIONS);" _CRLF);
functionType = "[[object, max_total_threads_per_threadgroup(VERTICES_PER_PRIMITIVE), max_total_threadgroups_per_mesh_grid(1)]]";
outputTypeName = "void";
@ -3880,7 +3880,7 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
// TODO: don't hardcode the instance index
src->add("uint iid = 0;" _CRLF);
// Fetch the input
src->add("VertexIn in = fetchInput(VERTEX_BUFFERS, vid);" _CRLF);
src->add("VertexIn in = fetchInput(vid VERTEX_BUFFERS);" _CRLF);
// Output is defined as object payload
src->add("object_data VertexOut& out = objectPayload.vertexOut[tid];" _CRLF);
}

View File

@ -156,6 +156,7 @@ namespace LatteDecompiler
static void _emitAttributes(LatteDecompilerShaderContext* decompilerContext)
{
auto src = decompilerContext->shaderSource;
std::string attributeNames;
if (decompilerContext->shader->shaderType == LatteConst::ShaderType::Vertex)
{
@ -168,13 +169,16 @@ namespace LatteDecompiler
cemu_assert_debug(decompilerContext->output->resourceMappingVK.attributeMapping[i] >= 0);
src->addFmt("uint4 attrDataSem{}", i);
if (!decompilerContext->options->usesGeometryShader)
if (decompilerContext->options->usesGeometryShader)
attributeNames += "#define ATTRIBUTE_NAME" + std::to_string((sint32)decompilerContext->output->resourceMappingVK.attributeMapping[i]) + " attrDataSem" + std::to_string(i) + "\n";
else
src->addFmt(" [[attribute({})]]", (sint32)decompilerContext->output->resourceMappingVK.attributeMapping[i]);
src->add(";" _CRLF);
}
}
src->add("};" _CRLF _CRLF);
}
src->addFmt("{}", attributeNames);
}
static void _emitVSOutputs(LatteDecompilerShaderContext* shaderContext)
@ -335,6 +339,21 @@ namespace LatteDecompiler
static void emitHeader(LatteDecompilerShaderContext* decompilerContext)
{
auto src = decompilerContext->shaderSource;
if (decompilerContext->options->usesGeometryShader && (decompilerContext->shaderType == LatteConst::ShaderType::Vertex || decompilerContext->shaderType == LatteConst::ShaderType::Geometry))
{
src->add("#if PRIMITIVE_TYPE == point" _CRLF);
src->add("#define VERTICES_PER_PRIMITIVE 1" _CRLF);
src->add("#if PRIMITIVE_TYPE == line" _CRLF);
src->add("#define VERTICES_PER_PRIMITIVE 2" _CRLF);
src->add("#if PRIMITIVE_TYPE == triangle" _CRLF);
src->add("#define VERTICES_PER_PRIMITIVE 3" _CRLF);
src->add("#else" _CRLF);
src->add("#error unsupported primitive type" _CRLF);
src->add("#endif" _CRLF);
}
const bool dump_shaders_enabled = ActiveSettings::DumpShadersEnabled();
if(dump_shaders_enabled)
decompilerContext->shaderSource->add("// start of shader inputs/outputs, predetermined by Cemu. Do not touch" _CRLF);

View File

@ -92,7 +92,7 @@ MTL::RenderPipelineState* MetalPipelineCache::GetPipelineState(const LatteFetchS
uint32 bufferIndex = bufferGroup.attributeBufferIndex;
uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7;
uint32 bufferStride = (LatteGPUState.contextNew.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;
uint32 bufferStride = (lcr.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;
bufferStride = Align(bufferStride, 4);
// HACK
@ -117,6 +117,7 @@ MTL::RenderPipelineState* MetalPipelineCache::GetPipelineState(const LatteFetchS
auto mtlVertexShader = static_cast<RendererShaderMtl*>(vertexShader->shader);
auto mtlPixelShader = static_cast<RendererShaderMtl*>(pixelShader->shader);
mtlVertexShader->CompileVertexFunction();
mtlPixelShader->CompileFragmentFunction(activeFBO);
// Render pipeline state
@ -127,9 +128,9 @@ MTL::RenderPipelineState* MetalPipelineCache::GetPipelineState(const LatteFetchS
desc->setVertexDescriptor(vertexDescriptor);
// Color attachments
const Latte::LATTE_CB_COLOR_CONTROL& colorControlReg = LatteGPUState.contextNew.CB_COLOR_CONTROL;
const Latte::LATTE_CB_COLOR_CONTROL& colorControlReg = lcr.CB_COLOR_CONTROL;
uint32 blendEnableMask = colorControlReg.get_BLEND_MASK();
uint32 renderTargetMask = LatteGPUState.contextNew.CB_TARGET_MASK.get_MASK();
uint32 renderTargetMask = lcr.CB_TARGET_MASK.get_MASK();
for (uint8 i = 0; i < 8; i++)
{
const auto& colorBuffer = activeFBO->colorBuffer[i];
@ -149,7 +150,7 @@ MTL::RenderPipelineState* MetalPipelineCache::GetPipelineState(const LatteFetchS
{
colorAttachment->setBlendingEnabled(true);
const auto& blendControlReg = LatteGPUState.contextNew.CB_BLENDN_CONTROL[i];
const auto& blendControlReg = lcr.CB_BLENDN_CONTROL[i];
auto rgbBlendOp = GetMtlBlendOp(blendControlReg.get_COLOR_COMB_FCN());
auto srcRgbBlendFactor = GetMtlBlendFactor(blendControlReg.get_COLOR_SRCBLEND());

View File

@ -766,17 +766,11 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
LatteDecompilerShader* vertexShader = LatteSHRC_GetActiveVertexShader();
LatteDecompilerShader* geometryShader = LatteSHRC_GetActiveGeometryShader();
LatteDecompilerShader* pixelShader = LatteSHRC_GetActivePixelShader();
if (!vertexShader || !static_cast<RendererShaderMtl*>(vertexShader->shader)->GetFunction())
if (!vertexShader)
{
debug_printf("no vertex function, skipping draw\n");
return;
}
// TODO: remove this?
if (geometryShader)
{
debug_printf("geometry shader aren't supported on Metal yet, skipping draw\n");
return;
}
const auto fetchShader = LatteSHRC_GetActiveFetchShader();
// Depth stencil state

View File

@ -7,6 +7,8 @@
#include "Cemu/Logging/CemuLogging.h"
#include "Common/precompiled.h"
#include "HW/Latte/Core/FetchShader.h"
#include "HW/Latte/ISA/RegDefines.h"
extern std::atomic_int g_compiled_shaders_total;
extern std::atomic_int g_compiled_shaders_async;
@ -14,15 +16,8 @@ extern std::atomic_int g_compiled_shaders_async;
RendererShaderMtl::RendererShaderMtl(MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode)
: RendererShader(type, baseHash, auxHash, isGameShader, isGfxPackShader), m_mtlr{mtlRenderer}
{
if (m_type == ShaderType::kFragment)
{
// Fragment functions are compiled just-in-time
m_mslCode = mslCode;
}
else
{
Compile(mslCode);
}
// TODO: don't compile just-in-time
m_mslCode = mslCode;
// Count shader compilation
g_compiled_shaders_total++;
@ -34,13 +29,176 @@ RendererShaderMtl::~RendererShaderMtl()
m_function->release();
}
void RendererShaderMtl::CompileObjectFunction(const LatteContextRegister& lcr, const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, Renderer::INDEX_TYPE hostIndexType)
{
cemu_assert_debug(m_type == ShaderType::kVertex);
std::string fullCode;
// Primitive type
const LattePrimitiveMode primitiveMode = static_cast<LattePrimitiveMode>(lcr.VGT_PRIMITIVE_TYPE.get_PRIMITIVE_MODE());
fullCode += "#define PRIMITIVE_TYPE ";
switch (primitiveMode)
{
case LattePrimitiveMode::POINTS:
fullCode += "point";
break;
case LattePrimitiveMode::LINES:
fullCode += "line";
break;
case LattePrimitiveMode::TRIANGLES:
fullCode += "triangle";
break;
default:
break;
}
fullCode += "\n";
// Vertex buffers
std::string vertexBufferDefinitions = "#define VERTEX_BUFFER_DEFINITIONS ";
std::string vertexBuffers = "#define VERTEX_BUFFERS ";
std::string inputFetchDefinition = "VertexIn fetchInput(thread uint& vid VERTEX_BUFFER_DEFINITIONS) {\n";
inputFetchDefinition += "VertexIn in;\n";
if (hostIndexType != Renderer::INDEX_TYPE::NONE)
{
vertexBufferDefinitions += ", device ";
switch (hostIndexType)
{
case Renderer::INDEX_TYPE::U16:
vertexBufferDefinitions += "ushort";
break;
case Renderer::INDEX_TYPE::U32:
vertexBufferDefinitions += "uint";
break;
default:
cemu_assert_suspicious();
break;
}
// TODO: don't hardcode the index
vertexBufferDefinitions += "* indexBuffer [[buffer(20)]]";
vertexBuffers += ", indexBuffer";
inputFetchDefinition += "vid = indexBuffer[vid]\n";
}
for (auto& bufferGroup : fetchShader->bufferGroups)
{
std::optional<LatteConst::VertexFetchType2> fetchType;
for (sint32 j = 0; j < bufferGroup.attribCount; ++j)
{
auto& attr = bufferGroup.attrib[j];
uint32 semanticId = vertexShader->resourceMapping.attributeMapping[attr.semanticId];
if (semanticId == (uint32)-1)
continue; // attribute not used?
std::string formatName;
switch (GetMtlVertexFormat(attr.format))
{
case MTL::VertexFormatUChar:
formatName = "uchar";
break;
case MTL::VertexFormatUChar2:
formatName = "uchar2";
break;
case MTL::VertexFormatUChar3:
formatName = "uchar3";
break;
case MTL::VertexFormatUChar4:
formatName = "uchar4";
break;
case MTL::VertexFormatUShort:
formatName = "ushort";
break;
case MTL::VertexFormatUShort2:
formatName = "ushort2";
break;
case MTL::VertexFormatUShort3:
formatName = "ushort3";
break;
case MTL::VertexFormatUShort4:
formatName = "ushort4";
break;
case MTL::VertexFormatUInt:
formatName = "uint";
break;
case MTL::VertexFormatUInt2:
formatName = "uint2";
break;
case MTL::VertexFormatUInt3:
formatName = "uint3";
break;
case MTL::VertexFormatUInt4:
formatName = "uint4";
break;
}
// Fetch the attribute
inputFetchDefinition += "in.ATTRIBUTE_NAME" + std::to_string(semanticId) + " = ";
inputFetchDefinition += "*(device " + formatName + "*)";
inputFetchDefinition += "(vertexBuffer" + std::to_string(attr.attributeBufferIndex);
inputFetchDefinition += " + vid + " + std::to_string(attr.offset) + ");\n";
if (fetchType.has_value())
cemu_assert_debug(fetchType == attr.fetchType);
else
fetchType = attr.fetchType;
if (attr.fetchType == LatteConst::INSTANCE_DATA)
{
cemu_assert_debug(attr.aluDivisor == 1); // other divisor not yet supported
}
}
uint32 bufferIndex = bufferGroup.attributeBufferIndex;
uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7;
uint32 bufferStride = (lcr.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;
fullCode += ", device uchar* vertexBuffer" + std::to_string(bufferIndex) + " [[buffer(" + std::to_string(GET_MTL_VERTEX_BUFFER_INDEX(bufferIndex)) + ")]]";
}
inputFetchDefinition += "return in;\n";
inputFetchDefinition += "}\n";
fullCode += vertexBufferDefinitions + "\n";
fullCode += vertexBuffers + "\n";
fullCode += m_mslCode;
fullCode += inputFetchDefinition;
Compile(fullCode);
}
void RendererShaderMtl::CompileMeshFunction(const LatteContextRegister& lcr, const LatteFetchShader* fetchShader)
{
cemu_assert_debug(m_type == ShaderType::kGeometry);
std::string fullCode;
// Primitive type
const LattePrimitiveMode primitiveMode = static_cast<LattePrimitiveMode>(lcr.VGT_PRIMITIVE_TYPE.get_PRIMITIVE_MODE());
fullCode += "#define PRIMITIVE_TYPE ";
switch (primitiveMode)
{
case LattePrimitiveMode::POINTS:
fullCode += "point";
break;
case LattePrimitiveMode::LINES:
fullCode += "line";
break;
case LattePrimitiveMode::TRIANGLES:
fullCode += "triangle";
break;
default:
break;
}
fullCode += "\n";
fullCode += m_mslCode;
Compile(fullCode);
}
void RendererShaderMtl::CompileFragmentFunction(CachedFBOMtl* activeFBO)
{
cemu_assert_debug(m_type == ShaderType::kFragment);
if (m_function)
m_function->release();
std::string fullCode;
// Define color attachment data types
@ -77,6 +235,9 @@ void RendererShaderMtl::CompileFragmentFunction(CachedFBOMtl* activeFBO)
void RendererShaderMtl::Compile(const std::string& mslCode)
{
if (m_function)
m_function->release();
NS::Error* error = nullptr;
MTL::Library* library = m_mtlr->GetDevice()->newLibrary(ToNSString(mslCode), nullptr, &error);
if (error)

View File

@ -21,6 +21,13 @@ public:
RendererShaderMtl(class MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode);
virtual ~RendererShaderMtl();
void CompileVertexFunction()
{
Compile(m_mslCode);
}
void CompileObjectFunction(const LatteContextRegister& lcr, const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, Renderer::INDEX_TYPE hostIndexType);
void CompileMeshFunction(const LatteContextRegister& lcr, const LatteFetchShader* fetchShader);
void CompileFragmentFunction(CachedFBOMtl* activeFBO);
MTL::Function* GetFunction() const