mirror of
https://github.com/cemu-project/Cemu.git
synced 2025-01-07 15:48:15 +01:00
commit
b332d638c7
@ -617,10 +617,12 @@ LatteDecompilerShader* LatteShader_CreateShaderFromDecompilerOutput(LatteDecompi
|
||||
shader->baseHash = baseHash;
|
||||
// copy resource mapping
|
||||
// HACK
|
||||
if (g_renderer->GetType() != RendererAPI::OpenGL)
|
||||
if (g_renderer->GetType() == RendererAPI::Vulkan)
|
||||
shader->resourceMapping = decompilerOutput.resourceMappingVK;
|
||||
else
|
||||
else if (g_renderer->GetType() == RendererAPI::OpenGL)
|
||||
shader->resourceMapping = decompilerOutput.resourceMappingGL;
|
||||
else
|
||||
shader->resourceMapping = decompilerOutput.resourceMappingMTL;
|
||||
// copy texture info
|
||||
shader->textureUnitMask2 = decompilerOutput.textureUnitMask;
|
||||
// copy streamout info
|
||||
|
@ -57,12 +57,14 @@ struct LatteDecompilerShaderResourceMapping
|
||||
// texture
|
||||
sint8 textureUnitToBindingPoint[LATTE_NUM_MAX_TEX_UNITS];
|
||||
// uniform buffer
|
||||
sint8 uniformVarsBufferBindingPoint{}; // special block for uniform registers/remapped array/custom variables
|
||||
sint8 uniformVarsBufferBindingPoint{-1}; // special block for uniform registers/remapped array/custom variables
|
||||
sint8 uniformBuffersBindingPoint[LATTE_NUM_MAX_UNIFORM_BUFFERS];
|
||||
// shader storage buffer for transform feedback (if alternative mode is used)
|
||||
sint8 tfStorageBindingPoint{-1};
|
||||
// attributes (vertex shader only)
|
||||
sint8 attributeMapping[LATTE_NUM_MAX_ATTRIBUTE_LOCATIONS];
|
||||
// Metal exclusive
|
||||
sint8 indexBufferBinding{-1};
|
||||
|
||||
sint32 getTextureCount()
|
||||
{
|
||||
@ -288,6 +290,7 @@ struct LatteDecompilerOutput_t
|
||||
// mapping and binding information
|
||||
LatteDecompilerShaderResourceMapping resourceMappingGL;
|
||||
LatteDecompilerShaderResourceMapping resourceMappingVK;
|
||||
LatteDecompilerShaderResourceMapping resourceMappingMTL;
|
||||
};
|
||||
|
||||
struct LatteDecompilerSubroutineInfo;
|
||||
|
@ -498,6 +498,18 @@ namespace LatteDecompiler
|
||||
}
|
||||
}
|
||||
|
||||
void _initTextureBindingPointsMTL(LatteDecompilerShaderContext* decompilerContext)
|
||||
{
|
||||
// for Vulkan we use consecutive indices
|
||||
for (sint32 i = 0; i < LATTE_NUM_MAX_TEX_UNITS; i++)
|
||||
{
|
||||
if (!decompilerContext->output->textureUnitMask[i])
|
||||
continue;
|
||||
decompilerContext->output->resourceMappingMTL.textureUnitToBindingPoint[i] = decompilerContext->currentTextureBindingPointMTL;
|
||||
decompilerContext->currentTextureBindingPointMTL++;
|
||||
}
|
||||
}
|
||||
|
||||
void _initHasUniformVarBlock(LatteDecompilerShaderContext* decompilerContext)
|
||||
{
|
||||
decompilerContext->hasUniformVarBlock = false;
|
||||
@ -552,14 +564,13 @@ namespace LatteDecompiler
|
||||
}
|
||||
}
|
||||
// assign binding point to uniform var block
|
||||
decompilerContext->output->resourceMappingGL.uniformVarsBufferBindingPoint = -1; // OpenGL currently doesnt use a uniform block
|
||||
if (decompilerContext->hasUniformVarBlock)
|
||||
{
|
||||
decompilerContext->output->resourceMappingVK.uniformVarsBufferBindingPoint = decompilerContext->currentBindingPointVK;
|
||||
decompilerContext->currentBindingPointVK++;
|
||||
decompilerContext->output->resourceMappingMTL.uniformVarsBufferBindingPoint = decompilerContext->currentBufferBindingPointMTL;
|
||||
decompilerContext->currentBufferBindingPointMTL++;
|
||||
}
|
||||
else
|
||||
decompilerContext->output->resourceMappingVK.uniformVarsBufferBindingPoint = -1;
|
||||
// assign binding points to uniform buffers
|
||||
if (decompilerContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CBANK)
|
||||
{
|
||||
@ -578,6 +589,8 @@ namespace LatteDecompiler
|
||||
|
||||
decompilerContext->output->resourceMappingVK.uniformBuffersBindingPoint[i] = decompilerContext->currentBindingPointVK;
|
||||
decompilerContext->currentBindingPointVK++;
|
||||
decompilerContext->output->resourceMappingMTL.uniformBuffersBindingPoint[i] = decompilerContext->currentBufferBindingPointMTL;
|
||||
decompilerContext->currentBufferBindingPointMTL++;
|
||||
}
|
||||
// for OpenGL we use the relative buffer index
|
||||
for (uint32 i = 0; i < LATTE_NUM_MAX_UNIFORM_BUFFERS; i++)
|
||||
@ -599,6 +612,8 @@ namespace LatteDecompiler
|
||||
{
|
||||
decompilerContext->output->resourceMappingVK.tfStorageBindingPoint = decompilerContext->currentBindingPointVK;
|
||||
decompilerContext->currentBindingPointVK++;
|
||||
decompilerContext->output->resourceMappingMTL.tfStorageBindingPoint = decompilerContext->currentBufferBindingPointMTL;
|
||||
decompilerContext->currentBufferBindingPointMTL++;
|
||||
}
|
||||
}
|
||||
|
||||
@ -615,6 +630,7 @@ namespace LatteDecompiler
|
||||
{
|
||||
decompilerContext->output->resourceMappingGL.attributeMapping[i] = bindingIndex;
|
||||
decompilerContext->output->resourceMappingVK.attributeMapping[i] = bindingIndex;
|
||||
decompilerContext->output->resourceMappingMTL.attributeMapping[i] = bindingIndex;
|
||||
bindingIndex++;
|
||||
}
|
||||
}
|
||||
@ -1000,6 +1016,8 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD
|
||||
shaderContext->output->resourceMappingVK.setIndex = 2;
|
||||
LatteDecompiler::_initTextureBindingPointsGL(shaderContext);
|
||||
LatteDecompiler::_initTextureBindingPointsVK(shaderContext);
|
||||
LatteDecompiler::_initTextureBindingPointsMTL(shaderContext);
|
||||
LatteDecompiler::_initUniformBindingPoints(shaderContext);
|
||||
LatteDecompiler::_initAttributeBindingPoints(shaderContext);
|
||||
shaderContext->output->resourceMappingMTL.indexBufferBinding = shaderContext->currentBufferBindingPointMTL++;
|
||||
}
|
||||
|
@ -2822,14 +2822,13 @@ static void _emitGSReadInputVFetchCode(LatteDecompilerShaderContext* shaderConte
|
||||
|
||||
src->add(" = ");
|
||||
_emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, shaderContext->typeTracker.defaultDataType);
|
||||
src->add("(v2g[");
|
||||
src->add("(objectPayload.vertexOut[");
|
||||
if (texInstruction->textureFetch.srcSel[0] >= 4)
|
||||
cemu_assert_unimplemented();
|
||||
if (texInstruction->textureFetch.srcSel[1] >= 4)
|
||||
cemu_assert_unimplemented();
|
||||
// todo: Index type
|
||||
src->add("0");
|
||||
src->addFmt("].passV2GParameter{}.", texInstruction->textureFetch.offset/16);
|
||||
src->add("vertexIndex");
|
||||
src->addFmt("].passParameterSem{}.", texInstruction->textureFetch.offset/16);
|
||||
|
||||
|
||||
for(sint32 f=0; f<4; f++)
|
||||
@ -3316,7 +3315,7 @@ static void _emitCFRingWriteCode(LatteDecompilerShaderContext* shaderContext, La
|
||||
cemu_assert_unimplemented();
|
||||
for (sint32 burstIndex = 0; burstIndex < (sint32)(cfInstruction->exportBurstCount + 1); burstIndex++)
|
||||
{
|
||||
src->addFmt("v2g.passV2GParameter{}.", (cfInstruction->exportArrayBase) / 4 + burstIndex);
|
||||
src->addFmt("out.passParameterSem{}.", (cfInstruction->exportArrayBase) / 4 + burstIndex);
|
||||
_emitXYZWByMask(src, cfInstruction->memWriteCompMask);
|
||||
src->addFmt(" = ");
|
||||
_emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_SIGNED_INT, burstIndex);
|
||||
@ -3355,7 +3354,7 @@ static void _emitCFRingWriteCode(LatteDecompilerShaderContext* shaderContext, La
|
||||
}
|
||||
else if (parameterExportType == 2 && parameterExportBase < 16)
|
||||
{
|
||||
src->addFmt("passG2PParameter{}.", parameterExportBase);
|
||||
src->addFmt("out.passParameterSem{}.", parameterExportBase);
|
||||
_emitXYZWByMask(src, cfInstruction->memWriteCompMask);
|
||||
src->addFmt(" = ");
|
||||
_emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, burstIndex);
|
||||
@ -3587,9 +3586,10 @@ void LatteDecompiler_emitClauseCodeMSL(LatteDecompilerShaderContext* shaderConte
|
||||
src->addFmt("if( {} == true ) {{" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1));
|
||||
// write point size
|
||||
if (shaderContext->analyzer.outputPointSize && shaderContext->analyzer.writesPointSize == false)
|
||||
src->add("gl_PointSize = supportBuffer.pointSize;" _CRLF);
|
||||
// emit vertex
|
||||
src->add("EmitVertex();" _CRLF);
|
||||
src->add("out.pointSize = supportBuffer.pointSize;" _CRLF);
|
||||
src->add("out.position.z = (out.position.z + out.position.w) / 2.0;" _CRLF);
|
||||
src->add("mesh.set_vertex(vertexIndex, out);" _CRLF);
|
||||
src->add("vertexIndex++;" _CRLF);
|
||||
// increment transform feedback pointer
|
||||
for (sint32 i = 0; i < LATTE_NUM_STREAMOUT_BUFFER; i++)
|
||||
{
|
||||
@ -3821,20 +3821,22 @@ static void LatteDecompiler_emitAttributeImport(LatteDecompilerShaderContext* sh
|
||||
|
||||
void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, LatteDecompilerShader* shader)
|
||||
{
|
||||
bool isRectVertexShader = (static_cast<LattePrimitiveMode>(shaderContext->contextRegisters[mmVGT_PRIMITIVE_TYPE]) == LattePrimitiveMode::RECTS);
|
||||
|
||||
StringBuf* src = new StringBuf(1024*1024*12); // reserve 12MB for generated source (we resize-to-fit at the end)
|
||||
shaderContext->shaderSource = src;
|
||||
|
||||
// debug info
|
||||
src->addFmt("// shader {:016x}" _CRLF, shaderContext->shaderBaseHash);
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
src->addFmt("// usesIntegerValues: {}" _CRLF, shaderContext->analyzer.usesIntegerValues?"true":"false");
|
||||
src->addFmt("// usesIntegerValues: {}" _CRLF, shaderContext->analyzer.usesIntegerValues ? "true" : "false");
|
||||
src->addFmt(_CRLF);
|
||||
#endif
|
||||
// include metal standard library
|
||||
src->add("#include <metal_stdlib>" _CRLF);
|
||||
src->add("using namespace metal;" _CRLF);
|
||||
// header part (definitions for inputs and outputs)
|
||||
LatteDecompiler::emitHeader(shaderContext);
|
||||
LatteDecompiler::emitHeader(shaderContext, isRectVertexShader);
|
||||
// helper functions
|
||||
LatteDecompiler_emitHelperFunctions(shaderContext, src);
|
||||
const char* functionType = "";
|
||||
@ -3842,9 +3844,25 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
|
||||
switch (shader->shaderType)
|
||||
{
|
||||
case LatteConst::ShaderType::Vertex:
|
||||
functionType = "vertex";
|
||||
outputTypeName = "VertexOut";
|
||||
if (shaderContext->options->usesGeometryShader || isRectVertexShader)
|
||||
{
|
||||
// Defined just-in-time
|
||||
// Will also modify vid in case of an indexed draw
|
||||
src->add("VertexIn fetchInput(thread uint& vid VERTEX_BUFFER_DEFINITIONS);" _CRLF);
|
||||
|
||||
functionType = "[[object, max_total_threads_per_threadgroup(VERTICES_PER_VERTEX_PRIMITIVE), max_total_threadgroups_per_mesh_grid(1)]]";
|
||||
outputTypeName = "void";
|
||||
}
|
||||
else
|
||||
{
|
||||
functionType = "vertex";
|
||||
outputTypeName = "VertexOut";
|
||||
}
|
||||
break;
|
||||
case LatteConst::ShaderType::Geometry:
|
||||
functionType = "[[mesh, max_total_threads_per_threadgroup(1)]]";
|
||||
outputTypeName = "void";
|
||||
break;
|
||||
case LatteConst::ShaderType::Pixel:
|
||||
functionType = "fragment";
|
||||
outputTypeName = "FragmentOut";
|
||||
@ -3852,9 +3870,32 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
|
||||
}
|
||||
// start of main
|
||||
src->addFmt("{} {} main0(", functionType, outputTypeName);
|
||||
LatteDecompiler::emitInputs(shaderContext);
|
||||
LatteDecompiler::emitInputs(shaderContext, isRectVertexShader);
|
||||
src->add(") {" _CRLF);
|
||||
src->addFmt("{} out;" _CRLF, outputTypeName);
|
||||
if ((shaderContext->options->usesGeometryShader || isRectVertexShader) && (shader->shaderType == LatteConst::ShaderType::Vertex || shader->shaderType == LatteConst::ShaderType::Geometry))
|
||||
{
|
||||
if (shader->shaderType == LatteConst::ShaderType::Vertex)
|
||||
{
|
||||
// Calculate the imaginary vertex id
|
||||
src->add("uint vid = tig * VERTICES_PER_VERTEX_PRIMITIVE + tid;" _CRLF);
|
||||
// TODO: don't hardcode the instance index
|
||||
src->add("uint iid = 0;" _CRLF);
|
||||
// Fetch the input
|
||||
src->add("VertexIn in = fetchInput(vid VERTEX_BUFFERS);" _CRLF);
|
||||
// Output is defined as object payload
|
||||
src->add("object_data VertexOut& out = objectPayload.vertexOut[tid];" _CRLF);
|
||||
}
|
||||
else if (shader->shaderType == LatteConst::ShaderType::Geometry)
|
||||
{
|
||||
src->add("GeometryOut out;" _CRLF);
|
||||
// The index of the current vertex that is being emitted
|
||||
src->add("uint vertexIndex = 0;" _CRLF);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
src->addFmt("{} out;" _CRLF, outputTypeName);
|
||||
}
|
||||
// variable definition
|
||||
if (shaderContext->typeTracker.useArrayGPRs == false)
|
||||
{
|
||||
@ -4047,13 +4088,14 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
|
||||
continue;
|
||||
}
|
||||
|
||||
if (shaderContext->options->usesGeometryShader)
|
||||
// TODO: is the if statement even needed?
|
||||
if (shaderContext->options->usesGeometryShader || isRectVertexShader)
|
||||
{
|
||||
// import from geometry shader
|
||||
if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT)
|
||||
src->addFmt("{} = asy_type<int4>(passG2PParameter{});" _CRLF, _getRegisterVarName(shaderContext, gprIndex), psInputSemanticId & 0x7F);
|
||||
src->addFmt("{} = as_type<int4>(in.passParameterSem{});" _CRLF, _getRegisterVarName(shaderContext, gprIndex), psInputSemanticId & 0x7F);
|
||||
else if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT)
|
||||
src->addFmt("{} = passG2PParameter{};" _CRLF, _getRegisterVarName(shaderContext, gprIndex), psInputSemanticId & 0x7F);
|
||||
src->addFmt("{} = in.passParameterSem{};" _CRLF, _getRegisterVarName(shaderContext, gprIndex), psInputSemanticId & 0x7F);
|
||||
else
|
||||
cemu_assert_unimplemented();
|
||||
}
|
||||
@ -4091,18 +4133,52 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
|
||||
// vertex shader should write renderstate point size at the end if required but not modified by shader
|
||||
if (shaderContext->analyzer.outputPointSize && shaderContext->analyzer.writesPointSize == false)
|
||||
{
|
||||
if (shader->shaderType == LatteConst::ShaderType::Vertex && shaderContext->options->usesGeometryShader == false)
|
||||
if (shader->shaderType == LatteConst::ShaderType::Vertex && !shaderContext->options->usesGeometryShader)
|
||||
src->add("out.pointSize = supportBuffer.pointSize;" _CRLF);
|
||||
}
|
||||
// HACK: this should be handled outside of the shader, because clipping currently wouldn't work (or would it?)
|
||||
if (shader->shaderType == LatteConst::ShaderType::Vertex)
|
||||
|
||||
if ((shaderContext->options->usesGeometryShader || isRectVertexShader) && (shader->shaderType == LatteConst::ShaderType::Vertex || shader->shaderType == LatteConst::ShaderType::Geometry))
|
||||
{
|
||||
// TODO: check this
|
||||
// MoltenVK does this
|
||||
src->add("out.position.z = (out.position.z + out.position.w) / 2.0;" _CRLF);
|
||||
if (shader->shaderType == LatteConst::ShaderType::Vertex)
|
||||
{
|
||||
src->add("if (tid == 0) {" _CRLF);
|
||||
src->add("meshGridProperties.set_threadgroups_per_grid(uint3(1, 1, 1));" _CRLF);
|
||||
src->add("}" _CRLF);
|
||||
}
|
||||
else if (shader->shaderType == LatteConst::ShaderType::Geometry)
|
||||
{
|
||||
src->add("mesh.set_primitive_count(GET_PRIMITIVE_COUNT(vertexIndex));" _CRLF);
|
||||
|
||||
// Set indices
|
||||
if (shaderContext->contextRegisters[mmVGT_GS_OUT_PRIM_TYPE] == 1) // Line strip
|
||||
{
|
||||
src->add("for (uint8_t i = 0; i < GET_PRIMITIVE_COUNT(vertexIndex) * 2; i++) {" _CRLF);
|
||||
src->add("mesh.set_index(i, (i 2 3) + i % 2);" _CRLF);
|
||||
src->add("}" _CRLF);
|
||||
}
|
||||
else if (shaderContext->contextRegisters[mmVGT_GS_OUT_PRIM_TYPE] == 2) // Triangle strip
|
||||
{
|
||||
src->add("for (uint8_t i = 0; i < GET_PRIMITIVE_COUNT(vertexIndex) * 3; i++) {" _CRLF);
|
||||
src->add("mesh.set_index(i, (i / 3) + i % 3);" _CRLF);
|
||||
src->add("}" _CRLF);
|
||||
}
|
||||
else
|
||||
{
|
||||
src->add("for (uint8_t i = 0; i < vertexIndex; i++) {" _CRLF);
|
||||
src->add("mesh.set_index(i, i);" _CRLF);
|
||||
src->add("}" _CRLF);
|
||||
}
|
||||
}
|
||||
}
|
||||
// return
|
||||
src->add("return out;" _CRLF);
|
||||
|
||||
// TODO: this should be handled outside of the shader, because clipping currently wouldn't work (or would it?)
|
||||
if ((shader->shaderType == LatteConst::ShaderType::Vertex && !shaderContext->options->usesGeometryShader) || shader->shaderType == LatteConst::ShaderType::Geometry)
|
||||
src->add("out.position.z = (out.position.z + out.position.w) / 2.0;" _CRLF);
|
||||
|
||||
// Return
|
||||
if (!(shaderContext->options->usesGeometryShader || isRectVertexShader) || shader->shaderType == LatteConst::ShaderType::Pixel)
|
||||
src->add("return out;" _CRLF);
|
||||
|
||||
// end of shader main
|
||||
src->add("}" _CRLF);
|
||||
src->shrink_to_fit();
|
||||
|
@ -1,5 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "Common/precompiled.h"
|
||||
#include "HW/Latte/Core/LatteConst.h"
|
||||
namespace LatteDecompiler
|
||||
{
|
||||
static void _emitUniformVariables(LatteDecompilerShaderContext* decompilerContext)
|
||||
@ -94,7 +96,7 @@ namespace LatteDecompiler
|
||||
uniformCurrentOffset += 8;
|
||||
}
|
||||
// define verticesPerInstance + streamoutBufferBaseX
|
||||
if ((shader->shaderType == LatteConst::ShaderType::Vertex && decompilerContext->options->usesGeometryShader == false) ||
|
||||
if ((shader->shaderType == LatteConst::ShaderType::Vertex && !decompilerContext->options->usesGeometryShader) ||
|
||||
(shader->shaderType == LatteConst::ShaderType::Geometry))
|
||||
{
|
||||
src->add("int verticesPerInstance;" _CRLF);
|
||||
@ -127,7 +129,7 @@ namespace LatteDecompiler
|
||||
if (!decompilerContext->analyzer.uniformBufferAccessTracker[i].HasAccess())
|
||||
continue;
|
||||
|
||||
cemu_assert_debug(decompilerContext->output->resourceMappingVK.uniformBuffersBindingPoint[i] >= 0);
|
||||
cemu_assert_debug(decompilerContext->output->resourceMappingMTL.uniformBuffersBindingPoint[i] >= 0);
|
||||
|
||||
shaderSrc->addFmt("struct UBuff{} {{" _CRLF, i);
|
||||
shaderSrc->addFmt("float4 d[{}];" _CRLF, decompilerContext->analyzer.uniformBufferAccessTracker[i].DetermineSize(decompilerContext->shaderBaseHash, LATTE_GLSL_DYNAMIC_UNIFORM_BLOCK_SIZE));
|
||||
@ -155,6 +157,7 @@ namespace LatteDecompiler
|
||||
static void _emitAttributes(LatteDecompilerShaderContext* decompilerContext)
|
||||
{
|
||||
auto src = decompilerContext->shaderSource;
|
||||
std::string attributeNames;
|
||||
|
||||
if (decompilerContext->shader->shaderType == LatteConst::ShaderType::Vertex)
|
||||
{
|
||||
@ -164,24 +167,29 @@ namespace LatteDecompiler
|
||||
{
|
||||
if (decompilerContext->analyzer.inputAttributSemanticMask[i])
|
||||
{
|
||||
cemu_assert_debug(decompilerContext->output->resourceMappingVK.attributeMapping[i] >= 0);
|
||||
cemu_assert_debug(decompilerContext->output->resourceMappingMTL.attributeMapping[i] >= 0);
|
||||
|
||||
src->addFmt("uint4 attrDataSem{} [[attribute({})]];" _CRLF, i, (sint32)decompilerContext->output->resourceMappingVK.attributeMapping[i]);
|
||||
src->addFmt("uint4 attrDataSem{}", i);
|
||||
if (decompilerContext->options->usesGeometryShader)
|
||||
attributeNames += "#define ATTRIBUTE_NAME" + std::to_string((sint32)decompilerContext->output->resourceMappingMTL.attributeMapping[i]) + " attrDataSem" + std::to_string(i) + "\n";
|
||||
else
|
||||
src->addFmt(" [[attribute({})]]", (sint32)decompilerContext->output->resourceMappingMTL.attributeMapping[i]);
|
||||
src->add(";" _CRLF);
|
||||
}
|
||||
}
|
||||
src->add("};" _CRLF _CRLF);
|
||||
}
|
||||
src->addFmt("{}", attributeNames);
|
||||
}
|
||||
|
||||
static void _emitVSOutputs(LatteDecompilerShaderContext* shaderContext)
|
||||
static void _emitVSOutputs(LatteDecompilerShaderContext* shaderContext, bool isRectVertexShader)
|
||||
{
|
||||
auto* src = shaderContext->shaderSource;
|
||||
|
||||
src->add("struct VertexOut {" _CRLF);
|
||||
|
||||
src->add("float4 position [[position]];" _CRLF);
|
||||
if (shaderContext->analyzer.outputPointSize)
|
||||
src->add("float pointSize[[point_size]];" _CRLF);
|
||||
src->add("float pointSize [[point_size]];" _CRLF);
|
||||
|
||||
LatteShaderPSInputTable* psInputTable = LatteSHRC_GetPSInputTable();
|
||||
auto parameterMask = shaderContext->shader->outputParameterMask;
|
||||
@ -206,15 +214,25 @@ namespace LatteDecompiler
|
||||
continue; // no ps input
|
||||
|
||||
src->addFmt("float4 passParameterSem{}", psInputTable->import[psInputIndex].semanticId);
|
||||
src->addFmt(" [[user(locn{})]]", psInputIndex);
|
||||
if (psInputTable->import[psInputIndex].isFlat)
|
||||
src->add(" [[flat]]");
|
||||
if (psInputTable->import[psInputIndex].isNoPerspective)
|
||||
src->add(" [[center_no_perspective]]");
|
||||
if (!isRectVertexShader)
|
||||
{
|
||||
src->addFmt(" [[user(locn{})]]", psInputIndex);
|
||||
if (psInputTable->import[psInputIndex].isFlat)
|
||||
src->add(" [[flat]]");
|
||||
if (psInputTable->import[psInputIndex].isNoPerspective)
|
||||
src->add(" [[center_no_perspective]]");
|
||||
}
|
||||
src->addFmt(";" _CRLF);
|
||||
}
|
||||
|
||||
src->add("};" _CRLF _CRLF);
|
||||
|
||||
if (isRectVertexShader)
|
||||
{
|
||||
src->add("struct ObjectPayload {" _CRLF);
|
||||
src->add("VertexOut vertexOut[VERTICES_PER_VERTEX_PRIMITIVE];" _CRLF);
|
||||
src->add("};" _CRLF _CRLF);
|
||||
}
|
||||
}
|
||||
|
||||
static void _emitPSInputs(LatteDecompilerShaderContext* shaderContext)
|
||||
@ -243,18 +261,17 @@ namespace LatteDecompiler
|
||||
src->add("};" _CRLF _CRLF);
|
||||
}
|
||||
|
||||
static void _emitInputsAndOutputs(LatteDecompilerShaderContext* decompilerContext)
|
||||
static void _emitInputsAndOutputs(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader)
|
||||
{
|
||||
auto src = decompilerContext->shaderSource;
|
||||
|
||||
if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex)
|
||||
{
|
||||
_emitAttributes(decompilerContext);
|
||||
_emitVSOutputs(decompilerContext);
|
||||
}
|
||||
else if (decompilerContext->shaderType == LatteConst::ShaderType::Pixel)
|
||||
{
|
||||
_emitPSInputs(decompilerContext);
|
||||
_emitPSInputs(decompilerContext);
|
||||
|
||||
src->add("struct FragmentOut {" _CRLF);
|
||||
|
||||
@ -277,10 +294,111 @@ namespace LatteDecompiler
|
||||
|
||||
src->add("};" _CRLF _CRLF);
|
||||
}
|
||||
|
||||
if (!decompilerContext->options->usesGeometryShader)
|
||||
{
|
||||
if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex)
|
||||
_emitVSOutputs(decompilerContext, isRectVertexShader);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex || decompilerContext->shaderType == LatteConst::ShaderType::Geometry)
|
||||
{
|
||||
src->add("struct VertexOut {" _CRLF);
|
||||
uint32 ringParameterCountVS2GS = 0;
|
||||
if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex)
|
||||
{
|
||||
ringParameterCountVS2GS = decompilerContext->shader->ringParameterCount;
|
||||
}
|
||||
else
|
||||
{
|
||||
ringParameterCountVS2GS = decompilerContext->shader->ringParameterCountFromPrevStage;
|
||||
}
|
||||
for (uint32 f = 0; f < ringParameterCountVS2GS; f++)
|
||||
src->addFmt("int4 passParameterSem{};" _CRLF, f);
|
||||
src->add("};" _CRLF _CRLF);
|
||||
src->add("struct ObjectPayload {" _CRLF);
|
||||
src->add("VertexOut vertexOut[VERTICES_PER_VERTEX_PRIMITIVE];" _CRLF);
|
||||
src->add("};" _CRLF _CRLF);
|
||||
}
|
||||
if (decompilerContext->shaderType == LatteConst::ShaderType::Geometry)
|
||||
{
|
||||
// parameters shared between geometry and pixel shader
|
||||
uint32 ringItemSize = decompilerContext->contextRegisters[mmSQ_GSVS_RING_ITEMSIZE] & 0x7FFF;
|
||||
if ((ringItemSize & 0xF) != 0)
|
||||
debugBreakpoint();
|
||||
if (((decompilerContext->contextRegisters[mmSQ_GSVS_RING_ITEMSIZE] & 0x7FFF) & 0xF) != 0)
|
||||
debugBreakpoint();
|
||||
|
||||
src->add("struct GeometryOut {" _CRLF);
|
||||
src->add("float4 position [[position]];" _CRLF);
|
||||
for (sint32 p = 0; p < decompilerContext->parsedGSCopyShader->numParam; p++)
|
||||
{
|
||||
if (decompilerContext->parsedGSCopyShader->paramMapping[p].exportType != 2)
|
||||
continue;
|
||||
src->addFmt("float4 passParameterSem{} [[user(locn{})]];" _CRLF, (sint32)decompilerContext->parsedGSCopyShader->paramMapping[p].exportParam, decompilerContext->parsedGSCopyShader->paramMapping[p].exportParam & 0x7F);
|
||||
}
|
||||
src->add("};" _CRLF _CRLF);
|
||||
|
||||
const uint32 MAX_VERTEX_COUNT = 32;
|
||||
|
||||
// Define the mesh shader output type
|
||||
src->addFmt("using MeshType = mesh<GeometryOut, void, {}, GET_PRIMITIVE_COUNT({}), topology::MTL_PRIMITIVE_TYPE>;" _CRLF, MAX_VERTEX_COUNT, MAX_VERTEX_COUNT);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void emitHeader(LatteDecompilerShaderContext* decompilerContext)
|
||||
static void emitHeader(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader)
|
||||
{
|
||||
auto src = decompilerContext->shaderSource;
|
||||
|
||||
if ((decompilerContext->options->usesGeometryShader || isRectVertexShader) && (decompilerContext->shaderType == LatteConst::ShaderType::Vertex || decompilerContext->shaderType == LatteConst::ShaderType::Geometry))
|
||||
{
|
||||
// TODO: make vsOutPrimType parth of the shader hash
|
||||
LattePrimitiveMode vsOutPrimType = static_cast<LattePrimitiveMode>(decompilerContext->contextRegisters[mmVGT_PRIMITIVE_TYPE]);
|
||||
uint32 gsOutPrimType = decompilerContext->contextRegisters[mmVGT_GS_OUT_PRIM_TYPE];
|
||||
|
||||
switch (vsOutPrimType)
|
||||
{
|
||||
case LattePrimitiveMode::POINTS:
|
||||
src->add("#define VERTICES_PER_VERTEX_PRIMITIVE 1" _CRLF);
|
||||
break;
|
||||
case LattePrimitiveMode::LINES:
|
||||
src->add("#define VERTICES_PER_VERTEX_PRIMITIVE 2" _CRLF);
|
||||
break;
|
||||
case LattePrimitiveMode::TRIANGLES:
|
||||
src->add("#define VERTICES_PER_VERTEX_PRIMITIVE 3" _CRLF);
|
||||
break;
|
||||
case LattePrimitiveMode::RECTS:
|
||||
src->add("#define VERTICES_PER_VERTEX_PRIMITIVE 3" _CRLF);
|
||||
break;
|
||||
default:
|
||||
cemu_assert_suspicious();
|
||||
break;
|
||||
}
|
||||
if (decompilerContext->shaderType == LatteConst::ShaderType::Geometry)
|
||||
{
|
||||
switch (gsOutPrimType)
|
||||
{
|
||||
case 0: // Point
|
||||
src->add("#define MTL_PRIMITIVE_TYPE point" _CRLF);
|
||||
src->add("#define GET_PRIMITIVE_COUNT(vertexCount) (vertexCount / 1)" _CRLF);
|
||||
break;
|
||||
case 1: // Line strip
|
||||
src->add("#define MTL_PRIMITIVE_TYPE line" _CRLF);
|
||||
src->add("#define GET_PRIMITIVE_COUNT(vertexCount) (vertexCount - 1)" _CRLF);
|
||||
break;
|
||||
case 2: // Triangle strip
|
||||
src->add("#define MTL_PRIMITIVE_TYPE triangle" _CRLF);
|
||||
src->add("#define GET_PRIMITIVE_COUNT(vertexCount) (vertexCount - 2)" _CRLF);
|
||||
break;
|
||||
default:
|
||||
cemu_assert_suspicious();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const bool dump_shaders_enabled = ActiveSettings::DumpShadersEnabled();
|
||||
if(dump_shaders_enabled)
|
||||
decompilerContext->shaderSource->add("// start of shader inputs/outputs, predetermined by Cemu. Do not touch" _CRLF);
|
||||
@ -289,7 +407,7 @@ namespace LatteDecompiler
|
||||
// uniform buffers
|
||||
_emitUniformBuffers(decompilerContext);
|
||||
// inputs and outputs
|
||||
_emitInputsAndOutputs(decompilerContext);
|
||||
_emitInputsAndOutputs(decompilerContext, isRectVertexShader);
|
||||
|
||||
if (dump_shaders_enabled)
|
||||
decompilerContext->shaderSource->add("// end of shader inputs/outputs" _CRLF);
|
||||
@ -306,9 +424,9 @@ namespace LatteDecompiler
|
||||
if (!decompilerContext->analyzer.uniformBufferAccessTracker[i].HasAccess())
|
||||
continue;
|
||||
|
||||
cemu_assert_debug(decompilerContext->output->resourceMappingVK.uniformBuffersBindingPoint[i] >= 0);
|
||||
cemu_assert_debug(decompilerContext->output->resourceMappingMTL.uniformBuffersBindingPoint[i] >= 0);
|
||||
|
||||
src->addFmt(", constant UBuff{}& ubuff{} [[buffer({})]]", i, i, (sint32)decompilerContext->output->resourceMappingVK.uniformBuffersBindingPoint[i]);
|
||||
src->addFmt(", constant UBuff{}& ubuff{} [[buffer({})]]", i, i, (sint32)decompilerContext->output->resourceMappingMTL.uniformBuffersBindingPoint[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -354,46 +472,58 @@ namespace LatteDecompiler
|
||||
cemu_assert_unimplemented();
|
||||
}
|
||||
|
||||
uint32 binding = shaderContext->output->resourceMappingVK.textureUnitToBindingPoint[i];
|
||||
//uint32 textureBinding = shaderContext->output->resourceMappingVK.textureUnitToBindingPoint[i] % 31;
|
||||
uint32 binding = shaderContext->output->resourceMappingMTL.textureUnitToBindingPoint[i];
|
||||
//uint32 textureBinding = shaderContext->output->resourceMappingMTL.textureUnitToBindingPoint[i] % 31;
|
||||
//uint32 samplerBinding = textureBinding % 16;
|
||||
src->addFmt(" tex{} [[texture({})]]", i, binding);
|
||||
src->addFmt(", sampler samplr{} [[sampler({})]]", i, binding);
|
||||
}
|
||||
}
|
||||
|
||||
static void emitInputs(LatteDecompilerShaderContext* decompilerContext)
|
||||
static void emitInputs(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader)
|
||||
{
|
||||
auto src = decompilerContext->shaderSource;
|
||||
|
||||
switch (decompilerContext->shaderType)
|
||||
{
|
||||
case LatteConst::ShaderType::Vertex:
|
||||
src->add("VertexIn");
|
||||
break;
|
||||
case LatteConst::ShaderType::Pixel:
|
||||
src->add("FragmentIn");
|
||||
break;
|
||||
}
|
||||
|
||||
src->add(" in [[stage_in]], constant SupportBuffer& supportBuffer [[buffer(30)]]");
|
||||
switch (decompilerContext->shaderType)
|
||||
{
|
||||
case LatteConst::ShaderType::Vertex:
|
||||
src->add(", uint vid [[vertex_id]]");
|
||||
src->add(", uint iid [[instance_id]]");
|
||||
|
||||
// streamout buffer (transform feedback)
|
||||
if (decompilerContext->analyzer.hasStreamoutEnable && decompilerContext->analyzer.hasStreamoutWrite)
|
||||
if (decompilerContext->options->usesGeometryShader || isRectVertexShader)
|
||||
{
|
||||
src->addFmt(", device int* sb [[buffer({})]]" _CRLF, decompilerContext->output->resourceMappingVK.tfStorageBindingPoint);
|
||||
src->add("object_data ObjectPayload& objectPayload [[payload]]");
|
||||
src->add(", mesh_grid_properties meshGridProperties");
|
||||
src->add(", uint tig [[threadgroup_position_in_grid]]");
|
||||
src->add(", uint tid [[thread_index_in_threadgroup]]");
|
||||
src->add(" VERTEX_BUFFER_DEFINITIONS");
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
src->add("VertexIn in [[stage_in]]");
|
||||
src->add(", uint vid [[vertex_id]]");
|
||||
src->add(", uint iid [[instance_id]]");
|
||||
}
|
||||
break;
|
||||
case LatteConst::ShaderType::Geometry:
|
||||
src->add("MeshType mesh");
|
||||
src->add(", const object_data ObjectPayload& objectPayload [[payload]]");
|
||||
break;
|
||||
case LatteConst::ShaderType::Pixel:
|
||||
src->add("FragmentIn in [[stage_in]]");
|
||||
src->add(", bool frontFacing [[front_facing]]");
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (decompilerContext->output->resourceMappingMTL.uniformVarsBufferBindingPoint >= 0)
|
||||
src->addFmt(", constant SupportBuffer& supportBuffer [[buffer({})]]", decompilerContext->output->resourceMappingMTL.uniformVarsBufferBindingPoint);
|
||||
|
||||
// streamout buffer (transform feedback)
|
||||
if ((decompilerContext->shaderType == LatteConst::ShaderType::Vertex && !decompilerContext->options->usesGeometryShader) || decompilerContext->shaderType == LatteConst::ShaderType::Geometry)
|
||||
{
|
||||
if (decompilerContext->analyzer.hasStreamoutEnable && decompilerContext->analyzer.hasStreamoutWrite)
|
||||
src->addFmt(", device int* sb [[buffer({})]]" _CRLF, decompilerContext->output->resourceMappingMTL.tfStorageBindingPoint);
|
||||
}
|
||||
|
||||
// uniform buffers
|
||||
_emitUniformBufferDefinitions(decompilerContext);
|
||||
// textures
|
||||
|
@ -260,6 +260,8 @@ struct LatteDecompilerShaderContext
|
||||
// emitter
|
||||
bool hasUniformVarBlock;
|
||||
sint32 currentBindingPointVK{};
|
||||
sint32 currentBufferBindingPointMTL{};
|
||||
sint32 currentTextureBindingPointMTL{};
|
||||
struct ALUClauseTemporariesState* aluPVPSState{nullptr};
|
||||
// misc
|
||||
std::vector<LatteDecompilerSubroutineInfo> list_subroutines;
|
||||
|
@ -29,7 +29,7 @@ std::map<Latte::E_GX2SURFFMT, MetalPixelFormatInfo> MTL_COLOR_FORMAT_TABLE = {
|
||||
{Latte::E_GX2SURFFMT::R10_G10_B10_A2_SNORM, {MTL::PixelFormatRGBA16Snorm, MetalDataType::FLOAT, 8}}, // TODO: correct?
|
||||
{Latte::E_GX2SURFFMT::R10_G10_B10_A2_UINT, {MTL::PixelFormatRGB10A2Uint, MetalDataType::UINT, 4}},
|
||||
{Latte::E_GX2SURFFMT::R10_G10_B10_A2_SINT, {MTL::PixelFormatRGBA16Sint, MetalDataType::INT, 8}}, // TODO: correct?
|
||||
{Latte::E_GX2SURFFMT::R10_G10_B10_A2_SRGB, {MTL::PixelFormatRGBA8Unorm_sRGB, MetalDataType::FLOAT, 4}}, // TODO: correct?
|
||||
{Latte::E_GX2SURFFMT::R10_G10_B10_A2_SRGB, {MTL::PixelFormatRGB10A2Unorm, MetalDataType::FLOAT, 4}}, // TODO: sRGB?
|
||||
{Latte::E_GX2SURFFMT::A2_B10_G10_R10_UNORM, {MTL::PixelFormatBGR10A2Unorm, MetalDataType::FLOAT, 4}}, // TODO: correct?
|
||||
{Latte::E_GX2SURFFMT::A2_B10_G10_R10_UINT, {MTL::PixelFormatRGB10A2Uint, MetalDataType::UINT, 4}}, // TODO: correct?
|
||||
{Latte::E_GX2SURFFMT::R16_UNORM, {MTL::PixelFormatR16Unorm, MetalDataType::FLOAT, 2}},
|
||||
@ -47,10 +47,10 @@ std::map<Latte::E_GX2SURFFMT, MetalPixelFormatInfo> MTL_COLOR_FORMAT_TABLE = {
|
||||
{Latte::E_GX2SURFFMT::R16_G16_B16_A16_UINT, {MTL::PixelFormatRGBA16Uint, MetalDataType::UINT, 8}},
|
||||
{Latte::E_GX2SURFFMT::R16_G16_B16_A16_SINT, {MTL::PixelFormatRGBA16Sint, MetalDataType::INT, 8}},
|
||||
{Latte::E_GX2SURFFMT::R16_G16_B16_A16_FLOAT, {MTL::PixelFormatRGBA16Float, MetalDataType::FLOAT, 8}},
|
||||
{Latte::E_GX2SURFFMT::R24_X8_UNORM, {MTL::PixelFormatInvalid, MetalDataType::NONE, 0}}, // TODO
|
||||
{Latte::E_GX2SURFFMT::R24_X8_UNORM, {MTL::PixelFormatR32Float, MetalDataType::FLOAT, 0}}, // TODO
|
||||
{Latte::E_GX2SURFFMT::R24_X8_FLOAT, {MTL::PixelFormatInvalid, MetalDataType::NONE, 0}}, // TODO
|
||||
{Latte::E_GX2SURFFMT::X24_G8_UINT, {MTL::PixelFormatInvalid, MetalDataType::NONE, 0}}, // TODO
|
||||
{Latte::E_GX2SURFFMT::R32_X8_FLOAT, {MTL::PixelFormatInvalid, MetalDataType::NONE, 0}}, // TODO
|
||||
{Latte::E_GX2SURFFMT::X24_G8_UINT, {MTL::PixelFormatRGBA8Uint, MetalDataType::UINT, 4}}, // TODO: correct?
|
||||
{Latte::E_GX2SURFFMT::R32_X8_FLOAT, {MTL::PixelFormatR32Float, MetalDataType::FLOAT, 4}}, // TODO: correct?
|
||||
{Latte::E_GX2SURFFMT::X32_G8_UINT_X24, {MTL::PixelFormatInvalid, MetalDataType::NONE, 0}}, // TODO
|
||||
{Latte::E_GX2SURFFMT::R11_G11_B10_FLOAT, {MTL::PixelFormatRG11B10Float, MetalDataType::FLOAT, 4}},
|
||||
{Latte::E_GX2SURFFMT::R32_UINT, {MTL::PixelFormatR32Uint, MetalDataType::UINT, 4}},
|
||||
|
@ -21,15 +21,13 @@ struct MetalPixelFormatSupport
|
||||
};
|
||||
|
||||
#define MAX_MTL_BUFFERS 31
|
||||
// Buffer index 30 is reserved for the support buffer, buffer indices 27-29 are reserved for the helper shaders
|
||||
#define GET_MTL_VERTEX_BUFFER_INDEX(index) (MAX_MTL_BUFFERS - index - 5)
|
||||
// TODO: don't harcdode the support buffer binding
|
||||
#define MTL_SUPPORT_BUFFER_BINDING 30
|
||||
// Buffer indices 28-30 are reserved for the helper shaders
|
||||
#define GET_MTL_VERTEX_BUFFER_INDEX(index) (MAX_MTL_BUFFERS - index - 4)
|
||||
|
||||
#define MAX_MTL_TEXTURES 31
|
||||
#define MAX_MTL_SAMPLERS 16
|
||||
|
||||
#define GET_HELPER_BUFFER_BINDING(index) (27 + index)
|
||||
#define GET_HELPER_BUFFER_BINDING(index) (28 + index)
|
||||
#define GET_HELPER_TEXTURE_BINDING(index) (29 + index)
|
||||
#define GET_HELPER_SAMPLER_BINDING(index) (14 + index)
|
||||
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalHybridComputePipeline.h"
|
||||
#include "Common/precompiled.h"
|
||||
#include "HW/Latte/Renderer/Metal/MetalRenderer.h"
|
||||
|
||||
MetalVertexBufferCache::~MetalVertexBufferCache()
|
||||
{
|
||||
@ -42,11 +43,8 @@ MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(MTL::Bu
|
||||
renderCommandEncoder->setRenderPipelineState(m_restrideBufferPipeline->GetRenderPipelineState());
|
||||
m_mtlr->GetEncoderState().m_renderPipelineState = m_restrideBufferPipeline->GetRenderPipelineState();
|
||||
|
||||
MTL::Buffer* buffers[] = {bufferCache, buffer};
|
||||
size_t offsets[] = {vertexBufferRange.offset, restrideInfo.allocation.offset};
|
||||
renderCommandEncoder->setVertexBuffers(buffers, offsets, NS::Range(GET_HELPER_BUFFER_BINDING(0), 2));
|
||||
m_mtlr->GetEncoderState().m_uniformBufferOffsets[METAL_SHADER_TYPE_VERTEX][GET_HELPER_BUFFER_BINDING(0)] = INVALID_OFFSET;
|
||||
m_mtlr->GetEncoderState().m_uniformBufferOffsets[METAL_SHADER_TYPE_VERTEX][GET_HELPER_BUFFER_BINDING(1)] = INVALID_OFFSET;
|
||||
m_mtlr->SetBuffer(renderCommandEncoder, METAL_SHADER_TYPE_VERTEX, bufferCache, vertexBufferRange.offset, GET_HELPER_BUFFER_BINDING(0));
|
||||
m_mtlr->SetBuffer(renderCommandEncoder, METAL_SHADER_TYPE_VERTEX, buffer, restrideInfo.allocation.offset, GET_HELPER_BUFFER_BINDING(1));
|
||||
|
||||
struct
|
||||
{
|
||||
@ -54,16 +52,7 @@ MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(MTL::Bu
|
||||
uint32 newStride;
|
||||
} strideData = {static_cast<uint32>(stride), static_cast<uint32>(newStride)};
|
||||
renderCommandEncoder->setVertexBytes(&strideData, sizeof(strideData), GET_HELPER_BUFFER_BINDING(2));
|
||||
m_mtlr->GetEncoderState().m_uniformBufferOffsets[METAL_SHADER_TYPE_VERTEX][GET_HELPER_BUFFER_BINDING(2)] = INVALID_OFFSET;
|
||||
|
||||
// TODO: remove
|
||||
uint32 vertexCount = vertexBufferRange.size / stride;
|
||||
if (vertexCount * strideData.oldStride > buffers[0]->length() - offsets[0]) {
|
||||
throw std::runtime_error("Source buffer overflow (" + std::to_string(vertexCount) + " * " + std::to_string(strideData.oldStride) + " > " + std::to_string(buffers[0]->length()) + " - " + std::to_string(offsets[0]) + ")");
|
||||
}
|
||||
if (vertexCount * strideData.newStride > buffers[1]->length() - offsets[1]) {
|
||||
throw std::runtime_error("Destination buffer overflow (" + std::to_string(vertexCount) + " * " + std::to_string(strideData.newStride) + " > " + std::to_string(buffers[1]->length()) + " - " + std::to_string(offsets[1]) + ")");
|
||||
}
|
||||
m_mtlr->GetEncoderState().m_buffers[METAL_SHADER_TYPE_VERTEX][GET_HELPER_BUFFER_BINDING(2)] = {nullptr};
|
||||
|
||||
renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), vertexBufferRange.size / stride);
|
||||
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
|
||||
#include "Foundation/NSObject.hpp"
|
||||
#include "HW/Latte/Core/LatteShader.h"
|
||||
#include "HW/Latte/Renderer/Metal/CachedFBOMtl.h"
|
||||
#include "HW/Latte/Renderer/Metal/LatteToMtl.h"
|
||||
#include "HW/Latte/Renderer/Metal/RendererShaderMtl.h"
|
||||
@ -11,6 +12,175 @@
|
||||
#include "HW/Latte/ISA/RegDefines.h"
|
||||
#include "config/ActiveSettings.h"
|
||||
|
||||
static void rectsEmulationGS_outputSingleVertex(std::string& gsSrc, const LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, sint32 vIdx, const LatteContextRegister& latteRegister)
|
||||
{
|
||||
auto parameterMask = vertexShader->outputParameterMask;
|
||||
for (uint32 i = 0; i < 32; i++)
|
||||
{
|
||||
if ((parameterMask & (1 << i)) == 0)
|
||||
continue;
|
||||
sint32 vsSemanticId = psInputTable->getVertexShaderOutParamSemanticId(latteRegister.GetRawView(), i);
|
||||
if (vsSemanticId < 0)
|
||||
continue;
|
||||
// make sure PS has matching input
|
||||
if (!psInputTable->hasPSImportForSemanticId(vsSemanticId))
|
||||
continue;
|
||||
gsSrc.append(fmt::format("out.passParameterSem{} = objectPayload.vertexOut[{}].passParameterSem{};\r\n", vsSemanticId, vIdx, vsSemanticId));
|
||||
}
|
||||
gsSrc.append(fmt::format("out.position = objectPayload.vertexOut[{}].position;\r\n", vIdx));
|
||||
gsSrc.append(fmt::format("mesh.set_vertex({}, out);\r\n", vIdx));
|
||||
}
|
||||
|
||||
static void rectsEmulationGS_outputGeneratedVertex(std::string& gsSrc, const LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, const char* variant, const LatteContextRegister& latteRegister)
|
||||
{
|
||||
auto parameterMask = vertexShader->outputParameterMask;
|
||||
for (uint32 i = 0; i < 32; i++)
|
||||
{
|
||||
if ((parameterMask & (1 << i)) == 0)
|
||||
continue;
|
||||
sint32 vsSemanticId = psInputTable->getVertexShaderOutParamSemanticId(latteRegister.GetRawView(), i);
|
||||
if (vsSemanticId < 0)
|
||||
continue;
|
||||
// make sure PS has matching input
|
||||
if (!psInputTable->hasPSImportForSemanticId(vsSemanticId))
|
||||
continue;
|
||||
gsSrc.append(fmt::format("out.passParameterSem{} = gen4thVertex{}(objectPayload.vertexOut[0].passParameterSem{}, objectPayload.vertexOut[1].passParameterSem{}, objectPayload.vertexOut[2].passParameterSem{});\r\n", vsSemanticId, variant, vsSemanticId, vsSemanticId, vsSemanticId));
|
||||
}
|
||||
gsSrc.append(fmt::format("out.position = gen4thVertex{}(objectPayload.vertexOut[0].position, objectPayload.vertexOut[1].position, objectPayload.vertexOut[2].position);\r\n", variant));
|
||||
gsSrc.append(fmt::format("mesh.set_vertex(3, out);\r\n"));
|
||||
}
|
||||
|
||||
static void rectsEmulationGS_outputVerticesCode(std::string& gsSrc, const LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, sint32 p0, sint32 p1, sint32 p2, sint32 p3, const char* variant, const LatteContextRegister& latteRegister)
|
||||
{
|
||||
sint32 pList[4] = { p0, p1, p2, p3 };
|
||||
for (sint32 i = 0; i < 4; i++)
|
||||
{
|
||||
if (pList[i] == 3)
|
||||
rectsEmulationGS_outputGeneratedVertex(gsSrc, vertexShader, psInputTable, variant, latteRegister);
|
||||
else
|
||||
rectsEmulationGS_outputSingleVertex(gsSrc, vertexShader, psInputTable, pList[i], latteRegister);
|
||||
}
|
||||
gsSrc.append(fmt::format("mesh.set_index(0, {});\r\n", pList[0]));
|
||||
gsSrc.append(fmt::format("mesh.set_index(1, {});\r\n", pList[1]));
|
||||
gsSrc.append(fmt::format("mesh.set_index(2, {});\r\n", pList[2]));
|
||||
gsSrc.append(fmt::format("mesh.set_index(3, {});\r\n", pList[1]));
|
||||
gsSrc.append(fmt::format("mesh.set_index(4, {});\r\n", pList[2]));
|
||||
gsSrc.append(fmt::format("mesh.set_index(5, {});\r\n", pList[3]));
|
||||
}
|
||||
|
||||
static RendererShaderMtl* rectsEmulationGS_generate(MetalRenderer* metalRenderer, const LatteDecompilerShader* vertexShader, const LatteContextRegister& latteRegister)
|
||||
{
|
||||
std::string gsSrc;
|
||||
gsSrc.append("#include <metal_stdlib>\r\n");
|
||||
gsSrc.append("using namespace metal;\r\n");
|
||||
|
||||
LatteShaderPSInputTable* psInputTable = LatteSHRC_GetPSInputTable();
|
||||
|
||||
// inputs & outputs
|
||||
std::string vertexOutDefinition = "struct VertexOut {\r\n";
|
||||
vertexOutDefinition += "float4 position;\r\n";
|
||||
std::string geometryOutDefinition = "struct GeometryOut {\r\n";
|
||||
geometryOutDefinition += "float4 position [[position]];\r\n";
|
||||
auto parameterMask = vertexShader->outputParameterMask;
|
||||
for (sint32 f = 0; f < 2; f++)
|
||||
{
|
||||
for (uint32 i = 0; i < 32; i++)
|
||||
{
|
||||
if ((parameterMask & (1 << i)) == 0)
|
||||
continue;
|
||||
sint32 vsSemanticId = psInputTable->getVertexShaderOutParamSemanticId(latteRegister.GetRawView(), i);
|
||||
if (vsSemanticId < 0)
|
||||
continue;
|
||||
auto psImport = psInputTable->getPSImportBySemanticId(vsSemanticId);
|
||||
if (psImport == nullptr)
|
||||
continue;
|
||||
|
||||
if (f == 0)
|
||||
{
|
||||
vertexOutDefinition += fmt::format("float4 passParameterSem{};\r\n", vsSemanticId);
|
||||
}
|
||||
else
|
||||
{
|
||||
geometryOutDefinition += fmt::format("float4 passParameterSem{}", vsSemanticId);
|
||||
|
||||
geometryOutDefinition += fmt::format(" [[user(locn{})]]", psInputTable->getPSImportLocationBySemanticId(vsSemanticId));
|
||||
if (psImport->isFlat)
|
||||
geometryOutDefinition += " [[flat]]";
|
||||
if (psImport->isNoPerspective)
|
||||
geometryOutDefinition += " [[center_no_perspective]]";
|
||||
geometryOutDefinition += ";\r\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
vertexOutDefinition += "};\r\n";
|
||||
geometryOutDefinition += "};\r\n";
|
||||
|
||||
gsSrc.append(vertexOutDefinition);
|
||||
gsSrc.append(geometryOutDefinition);
|
||||
|
||||
gsSrc.append("struct ObjectPayload {\r\n");
|
||||
gsSrc.append("VertexOut vertexOut[3];\r\n");
|
||||
gsSrc.append("};\r\n");
|
||||
|
||||
// gen function
|
||||
gsSrc.append("float4 gen4thVertexA(float4 a, float4 b, float4 c)\r\n");
|
||||
gsSrc.append("{\r\n");
|
||||
gsSrc.append("return b - (c - a);\r\n");
|
||||
gsSrc.append("}\r\n");
|
||||
|
||||
gsSrc.append("float4 gen4thVertexB(float4 a, float4 b, float4 c)\r\n");
|
||||
gsSrc.append("{\r\n");
|
||||
gsSrc.append("return c - (b - a);\r\n");
|
||||
gsSrc.append("}\r\n");
|
||||
|
||||
gsSrc.append("float4 gen4thVertexC(float4 a, float4 b, float4 c)\r\n");
|
||||
gsSrc.append("{\r\n");
|
||||
gsSrc.append("return c + (b - a);\r\n");
|
||||
gsSrc.append("}\r\n");
|
||||
|
||||
// main
|
||||
gsSrc.append("using MeshType = mesh<GeometryOut, void, 4, 2, topology::triangle>;\r\n");
|
||||
gsSrc.append("[[mesh, max_total_threads_per_threadgroup(1)]]\r\n");
|
||||
gsSrc.append("void main0(MeshType mesh, const object_data ObjectPayload& objectPayload [[payload]])\r\n");
|
||||
gsSrc.append("{\r\n");
|
||||
gsSrc.append("GeometryOut out;\r\n");
|
||||
|
||||
// there are two possible winding orders that need different triangle generation:
|
||||
// 0 1
|
||||
// 2 3
|
||||
// and
|
||||
// 0 1
|
||||
// 3 2
|
||||
// all others are just symmetries of these cases
|
||||
|
||||
// we can determine the case by comparing the distance 0<->1 and 0<->2
|
||||
|
||||
gsSrc.append("float dist0_1 = length(objectPayload.vertexOut[1].position.xy - objectPayload.vertexOut[0].position.xy);\r\n");
|
||||
gsSrc.append("float dist0_2 = length(objectPayload.vertexOut[2].position.xy - objectPayload.vertexOut[0].position.xy);\r\n");
|
||||
gsSrc.append("float dist1_2 = length(objectPayload.vertexOut[2].position.xy - objectPayload.vertexOut[1].position.xy);\r\n");
|
||||
|
||||
// emit vertices
|
||||
gsSrc.append("if(dist0_1 > dist0_2 && dist0_1 > dist1_2)\r\n");
|
||||
gsSrc.append("{\r\n");
|
||||
// p0 to p1 is diagonal
|
||||
rectsEmulationGS_outputVerticesCode(gsSrc, vertexShader, psInputTable, 2, 1, 0, 3, "A", latteRegister);
|
||||
gsSrc.append("} else if ( dist0_2 > dist0_1 && dist0_2 > dist1_2 ) {\r\n");
|
||||
// p0 to p2 is diagonal
|
||||
rectsEmulationGS_outputVerticesCode(gsSrc, vertexShader, psInputTable, 1, 2, 0, 3, "B", latteRegister);
|
||||
gsSrc.append("} else {\r\n");
|
||||
// p1 to p2 is diagonal
|
||||
rectsEmulationGS_outputVerticesCode(gsSrc, vertexShader, psInputTable, 0, 1, 2, 3, "C", latteRegister);
|
||||
gsSrc.append("}\r\n");
|
||||
|
||||
gsSrc.append("mesh.set_primitive_count(2);\r\n");
|
||||
|
||||
gsSrc.append("}\r\n");
|
||||
|
||||
auto mtlShader = new RendererShaderMtl(metalRenderer, RendererShader::ShaderType::kGeometry, 0, 0, false, false, gsSrc);
|
||||
|
||||
return mtlShader;
|
||||
}
|
||||
|
||||
#define INVALID_TITLE_ID 0xFFFFFFFFFFFFFFFF
|
||||
|
||||
uint64 s_cacheTitleId = INVALID_TITLE_ID;
|
||||
@ -18,6 +188,68 @@ uint64 s_cacheTitleId = INVALID_TITLE_ID;
|
||||
extern std::atomic_int g_compiled_shaders_total;
|
||||
extern std::atomic_int g_compiled_shaders_async;
|
||||
|
||||
template<typename T>
|
||||
void SetFragmentState(T* desc, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr)
|
||||
{
|
||||
// Color attachments
|
||||
const Latte::LATTE_CB_COLOR_CONTROL& colorControlReg = lcr.CB_COLOR_CONTROL;
|
||||
uint32 blendEnableMask = colorControlReg.get_BLEND_MASK();
|
||||
uint32 renderTargetMask = lcr.CB_TARGET_MASK.get_MASK();
|
||||
for (uint8 i = 0; i < 8; i++)
|
||||
{
|
||||
const auto& colorBuffer = activeFBO->colorBuffer[i];
|
||||
auto texture = static_cast<LatteTextureViewMtl*>(colorBuffer.texture);
|
||||
if (!texture)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
auto colorAttachment = desc->colorAttachments()->object(i);
|
||||
colorAttachment->setPixelFormat(texture->GetRGBAView()->pixelFormat());
|
||||
colorAttachment->setWriteMask(GetMtlColorWriteMask((renderTargetMask >> (i * 4)) & 0xF));
|
||||
|
||||
// Blending
|
||||
bool blendEnabled = ((blendEnableMask & (1 << i))) != 0;
|
||||
// Only float data type is blendable
|
||||
if (blendEnabled && GetMtlPixelFormatInfo(texture->format, false).dataType == MetalDataType::FLOAT)
|
||||
{
|
||||
colorAttachment->setBlendingEnabled(true);
|
||||
|
||||
const auto& blendControlReg = lcr.CB_BLENDN_CONTROL[i];
|
||||
|
||||
auto rgbBlendOp = GetMtlBlendOp(blendControlReg.get_COLOR_COMB_FCN());
|
||||
auto srcRgbBlendFactor = GetMtlBlendFactor(blendControlReg.get_COLOR_SRCBLEND());
|
||||
auto dstRgbBlendFactor = GetMtlBlendFactor(blendControlReg.get_COLOR_DSTBLEND());
|
||||
|
||||
colorAttachment->setRgbBlendOperation(rgbBlendOp);
|
||||
colorAttachment->setSourceRGBBlendFactor(srcRgbBlendFactor);
|
||||
colorAttachment->setDestinationRGBBlendFactor(dstRgbBlendFactor);
|
||||
if (blendControlReg.get_SEPARATE_ALPHA_BLEND())
|
||||
{
|
||||
colorAttachment->setAlphaBlendOperation(GetMtlBlendOp(blendControlReg.get_ALPHA_COMB_FCN()));
|
||||
colorAttachment->setSourceAlphaBlendFactor(GetMtlBlendFactor(blendControlReg.get_ALPHA_SRCBLEND()));
|
||||
colorAttachment->setDestinationAlphaBlendFactor(GetMtlBlendFactor(blendControlReg.get_ALPHA_DSTBLEND()));
|
||||
}
|
||||
else
|
||||
{
|
||||
colorAttachment->setAlphaBlendOperation(rgbBlendOp);
|
||||
colorAttachment->setSourceAlphaBlendFactor(srcRgbBlendFactor);
|
||||
colorAttachment->setDestinationAlphaBlendFactor(dstRgbBlendFactor);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Depth stencil attachment
|
||||
if (activeFBO->depthBuffer.texture)
|
||||
{
|
||||
auto texture = static_cast<LatteTextureViewMtl*>(activeFBO->depthBuffer.texture);
|
||||
desc->setDepthAttachmentPixelFormat(texture->GetRGBAView()->pixelFormat());
|
||||
if (activeFBO->depthBuffer.hasStencil)
|
||||
{
|
||||
desc->setStencilAttachmentPixelFormat(texture->GetRGBAView()->pixelFormat());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MetalPipelineCache::ShaderCacheLoading_begin(uint64 cacheTitleId)
|
||||
{
|
||||
s_cacheTitleId = cacheTitleId;
|
||||
@ -53,9 +285,9 @@ MetalPipelineCache::~MetalPipelineCache()
|
||||
m_binaryArchiveURL->release();
|
||||
}
|
||||
|
||||
MTL::RenderPipelineState* MetalPipelineCache::GetPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* pixelShader, CachedFBOMtl* activeFBO, const LatteContextRegister& lcr)
|
||||
MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* pixelShader, CachedFBOMtl* activeFBO, const LatteContextRegister& lcr)
|
||||
{
|
||||
uint64 stateHash = CalculatePipelineHash(fetchShader, vertexShader, pixelShader, activeFBO, lcr);
|
||||
uint64 stateHash = CalculateRenderPipelineHash(fetchShader, vertexShader, pixelShader, activeFBO, lcr);
|
||||
auto& pipeline = m_pipelineCache[stateHash];
|
||||
if (pipeline)
|
||||
return pipeline;
|
||||
@ -92,7 +324,7 @@ MTL::RenderPipelineState* MetalPipelineCache::GetPipelineState(const LatteFetchS
|
||||
|
||||
uint32 bufferIndex = bufferGroup.attributeBufferIndex;
|
||||
uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7;
|
||||
uint32 bufferStride = (LatteGPUState.contextNew.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;
|
||||
uint32 bufferStride = (lcr.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;
|
||||
bufferStride = Align(bufferStride, 4);
|
||||
|
||||
// HACK
|
||||
@ -117,6 +349,7 @@ MTL::RenderPipelineState* MetalPipelineCache::GetPipelineState(const LatteFetchS
|
||||
|
||||
auto mtlVertexShader = static_cast<RendererShaderMtl*>(vertexShader->shader);
|
||||
auto mtlPixelShader = static_cast<RendererShaderMtl*>(pixelShader->shader);
|
||||
mtlVertexShader->CompileVertexFunction();
|
||||
mtlPixelShader->CompileFragmentFunction(activeFBO);
|
||||
|
||||
// Render pipeline state
|
||||
@ -126,65 +359,18 @@ MTL::RenderPipelineState* MetalPipelineCache::GetPipelineState(const LatteFetchS
|
||||
// TODO: don't always set the vertex descriptor?
|
||||
desc->setVertexDescriptor(vertexDescriptor);
|
||||
|
||||
// Color attachments
|
||||
const Latte::LATTE_CB_COLOR_CONTROL& colorControlReg = LatteGPUState.contextNew.CB_COLOR_CONTROL;
|
||||
uint32 blendEnableMask = colorControlReg.get_BLEND_MASK();
|
||||
uint32 renderTargetMask = LatteGPUState.contextNew.CB_TARGET_MASK.get_MASK();
|
||||
for (uint8 i = 0; i < 8; i++)
|
||||
{
|
||||
const auto& colorBuffer = activeFBO->colorBuffer[i];
|
||||
auto texture = static_cast<LatteTextureViewMtl*>(colorBuffer.texture);
|
||||
if (!texture)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
auto colorAttachment = desc->colorAttachments()->object(i);
|
||||
colorAttachment->setPixelFormat(texture->GetRGBAView()->pixelFormat());
|
||||
colorAttachment->setWriteMask(GetMtlColorWriteMask((renderTargetMask >> (i * 4)) & 0xF));
|
||||
SetFragmentState(desc, activeFBO, lcr);
|
||||
|
||||
// Blending
|
||||
bool blendEnabled = ((blendEnableMask & (1 << i))) != 0;
|
||||
// Only float data type is blendable
|
||||
if (blendEnabled && GetMtlPixelFormatInfo(texture->format, false).dataType == MetalDataType::FLOAT)
|
||||
{
|
||||
colorAttachment->setBlendingEnabled(true);
|
||||
TryLoadBinaryArchive();
|
||||
|
||||
const auto& blendControlReg = LatteGPUState.contextNew.CB_BLENDN_CONTROL[i];
|
||||
|
||||
auto rgbBlendOp = GetMtlBlendOp(blendControlReg.get_COLOR_COMB_FCN());
|
||||
auto srcRgbBlendFactor = GetMtlBlendFactor(blendControlReg.get_COLOR_SRCBLEND());
|
||||
auto dstRgbBlendFactor = GetMtlBlendFactor(blendControlReg.get_COLOR_DSTBLEND());
|
||||
|
||||
colorAttachment->setRgbBlendOperation(rgbBlendOp);
|
||||
colorAttachment->setSourceRGBBlendFactor(srcRgbBlendFactor);
|
||||
colorAttachment->setDestinationRGBBlendFactor(dstRgbBlendFactor);
|
||||
if (blendControlReg.get_SEPARATE_ALPHA_BLEND())
|
||||
{
|
||||
colorAttachment->setAlphaBlendOperation(GetMtlBlendOp(blendControlReg.get_ALPHA_COMB_FCN()));
|
||||
colorAttachment->setSourceAlphaBlendFactor(GetMtlBlendFactor(blendControlReg.get_ALPHA_SRCBLEND()));
|
||||
colorAttachment->setDestinationAlphaBlendFactor(GetMtlBlendFactor(blendControlReg.get_ALPHA_DSTBLEND()));
|
||||
}
|
||||
else
|
||||
{
|
||||
colorAttachment->setAlphaBlendOperation(rgbBlendOp);
|
||||
colorAttachment->setSourceAlphaBlendFactor(srcRgbBlendFactor);
|
||||
colorAttachment->setDestinationAlphaBlendFactor(dstRgbBlendFactor);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Depth stencil attachment
|
||||
if (activeFBO->depthBuffer.texture)
|
||||
{
|
||||
auto texture = static_cast<LatteTextureViewMtl*>(activeFBO->depthBuffer.texture);
|
||||
desc->setDepthAttachmentPixelFormat(texture->GetRGBAView()->pixelFormat());
|
||||
if (activeFBO->depthBuffer.hasStencil)
|
||||
{
|
||||
desc->setStencilAttachmentPixelFormat(texture->GetRGBAView()->pixelFormat());
|
||||
}
|
||||
}
|
||||
|
||||
LoadBinary(desc);
|
||||
// Load binary
|
||||
if (m_binaryArchive)
|
||||
{
|
||||
NS::Object* binArchives[] = {m_binaryArchive};
|
||||
auto binaryArchives = NS::Array::alloc()->init(binArchives, 1);
|
||||
desc->setBinaryArchives(binaryArchives);
|
||||
binaryArchives->release();
|
||||
}
|
||||
|
||||
NS::Error* error = nullptr;
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
@ -210,10 +396,21 @@ MTL::RenderPipelineState* MetalPipelineCache::GetPipelineState(const LatteFetchS
|
||||
{
|
||||
debug_printf("error creating render pipeline state: %s\n", error->localizedDescription()->utf8String());
|
||||
error->release();
|
||||
return nullptr;
|
||||
}
|
||||
else
|
||||
{
|
||||
SaveBinary(desc);
|
||||
// Save binary
|
||||
if (m_binaryArchive)
|
||||
{
|
||||
NS::Error* error = nullptr;
|
||||
m_binaryArchive->addRenderPipelineFunctions(desc, &error);
|
||||
if (error)
|
||||
{
|
||||
debug_printf("error saving render pipeline functions: %s\n", error->localizedDescription()->utf8String());
|
||||
error->release();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//newPipelineCount++;
|
||||
@ -229,7 +426,65 @@ MTL::RenderPipelineState* MetalPipelineCache::GetPipelineState(const LatteFetchS
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
uint64 MetalPipelineCache::CalculatePipelineHash(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr)
|
||||
MTL::RenderPipelineState* MetalPipelineCache::GetMeshPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, CachedFBOMtl* activeFBO, const LatteContextRegister& lcr, Renderer::INDEX_TYPE hostIndexType)
|
||||
{
|
||||
uint64 stateHash = CalculateRenderPipelineHash(fetchShader, vertexShader, pixelShader, activeFBO, lcr);
|
||||
|
||||
stateHash += lcr.GetRawView()[mmVGT_PRIMITIVE_TYPE];
|
||||
stateHash = std::rotl<uint64>(stateHash, 7);
|
||||
|
||||
stateHash += (uint8)hostIndexType;
|
||||
stateHash = std::rotl<uint64>(stateHash, 7); // TODO: 7?s
|
||||
|
||||
auto& pipeline = m_pipelineCache[stateHash];
|
||||
if (pipeline)
|
||||
return pipeline;
|
||||
|
||||
auto mtlObjectShader = static_cast<RendererShaderMtl*>(vertexShader->shader);
|
||||
RendererShaderMtl* mtlMeshShader;
|
||||
if (geometryShader)
|
||||
{
|
||||
mtlMeshShader = static_cast<RendererShaderMtl*>(geometryShader->shader);
|
||||
}
|
||||
else
|
||||
{
|
||||
// If there is no geometry shader, it means that we are emulating rects
|
||||
mtlMeshShader = rectsEmulationGS_generate(m_mtlr, vertexShader, lcr);
|
||||
}
|
||||
auto mtlPixelShader = static_cast<RendererShaderMtl*>(pixelShader->shader);
|
||||
mtlObjectShader->CompileObjectFunction(lcr, fetchShader, vertexShader, hostIndexType);
|
||||
mtlPixelShader->CompileFragmentFunction(activeFBO);
|
||||
|
||||
// Render pipeline state
|
||||
MTL::MeshRenderPipelineDescriptor* desc = MTL::MeshRenderPipelineDescriptor::alloc()->init();
|
||||
desc->setObjectFunction(mtlObjectShader->GetFunction());
|
||||
desc->setMeshFunction(mtlMeshShader->GetFunction());
|
||||
desc->setFragmentFunction(mtlPixelShader->GetFunction());
|
||||
|
||||
SetFragmentState(desc, activeFBO, lcr);
|
||||
|
||||
TryLoadBinaryArchive();
|
||||
|
||||
// Load binary
|
||||
// TODO: no binary archives? :(
|
||||
|
||||
NS::Error* error = nullptr;
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
desc->setLabel(GetLabel("Mesh pipeline state", desc));
|
||||
#endif
|
||||
pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, MTL::PipelineOptionNone, nullptr, &error);
|
||||
if (error)
|
||||
{
|
||||
debug_printf("error creating render pipeline state: %s\n", error->localizedDescription()->utf8String());
|
||||
error->release();
|
||||
return nullptr;
|
||||
}
|
||||
desc->release();
|
||||
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
uint64 MetalPipelineCache::CalculateRenderPipelineHash(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr)
|
||||
{
|
||||
// Hash
|
||||
uint64 stateHash = 0;
|
||||
@ -260,9 +515,6 @@ uint64 MetalPipelineCache::CalculatePipelineHash(const LatteFetchShader* fetchSh
|
||||
stateHash += fetchShader->getVkPipelineHashFragment();
|
||||
stateHash = std::rotl<uint64>(stateHash, 7);
|
||||
|
||||
stateHash += lcr.GetRawView()[mmVGT_PRIMITIVE_TYPE];
|
||||
stateHash = std::rotl<uint64>(stateHash, 7);
|
||||
|
||||
stateHash += lcr.GetRawView()[mmVGT_STRMOUT_EN];
|
||||
stateHash = std::rotl<uint64>(stateHash, 7);
|
||||
|
||||
@ -339,30 +591,3 @@ void MetalPipelineCache::TryLoadBinaryArchive()
|
||||
}
|
||||
desc->release();
|
||||
}
|
||||
|
||||
void MetalPipelineCache::LoadBinary(MTL::RenderPipelineDescriptor* desc)
|
||||
{
|
||||
TryLoadBinaryArchive();
|
||||
|
||||
if (!m_binaryArchive)
|
||||
return;
|
||||
|
||||
NS::Object* binArchives[] = {m_binaryArchive};
|
||||
auto binaryArchives = NS::Array::alloc()->init(binArchives, 1);
|
||||
desc->setBinaryArchives(binaryArchives);
|
||||
binaryArchives->release();
|
||||
}
|
||||
|
||||
void MetalPipelineCache::SaveBinary(MTL::RenderPipelineDescriptor* desc)
|
||||
{
|
||||
if (!m_binaryArchive)
|
||||
return;
|
||||
|
||||
NS::Error* error = nullptr;
|
||||
m_binaryArchive->addRenderPipelineFunctions(desc, &error);
|
||||
if (error)
|
||||
{
|
||||
debug_printf("error saving render pipeline functions: %s\n", error->localizedDescription()->utf8String());
|
||||
error->release();
|
||||
}
|
||||
}
|
||||
|
@ -4,6 +4,7 @@
|
||||
|
||||
#include "HW/Latte/ISA/LatteReg.h"
|
||||
#include "HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Renderer.h"
|
||||
|
||||
class MetalPipelineCache
|
||||
{
|
||||
@ -15,7 +16,9 @@ public:
|
||||
MetalPipelineCache(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer} {}
|
||||
~MetalPipelineCache();
|
||||
|
||||
MTL::RenderPipelineState* GetPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr);
|
||||
MTL::RenderPipelineState* GetRenderPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr);
|
||||
|
||||
MTL::RenderPipelineState* GetMeshPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr, Renderer::INDEX_TYPE hostIndexType);
|
||||
|
||||
private:
|
||||
class MetalRenderer* m_mtlr;
|
||||
@ -25,11 +28,7 @@ private:
|
||||
NS::URL* m_binaryArchiveURL;
|
||||
MTL::BinaryArchive* m_binaryArchive;
|
||||
|
||||
uint64 CalculatePipelineHash(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr);
|
||||
uint64 CalculateRenderPipelineHash(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr);
|
||||
|
||||
void TryLoadBinaryArchive();
|
||||
|
||||
void LoadBinary(MTL::RenderPipelineDescriptor* desc);
|
||||
|
||||
void SaveBinary(MTL::RenderPipelineDescriptor* desc);
|
||||
};
|
||||
|
@ -17,9 +17,8 @@
|
||||
#include "Cafe/HW/Latte/Core/LatteShader.h"
|
||||
#include "Cafe/HW/Latte/Core/LatteIndices.h"
|
||||
#include "Cemu/Logging/CemuDebugLogging.h"
|
||||
#include "Common/precompiled.h"
|
||||
#include "HW/Latte/Core/LatteConst.h"
|
||||
#include "HW/Latte/Renderer/Metal/MetalCommon.h"
|
||||
#include "Metal/MTLDevice.hpp"
|
||||
#include "gui/guiWrapper.h"
|
||||
|
||||
#define COMMIT_TRESHOLD 256
|
||||
@ -96,15 +95,9 @@ MetalRenderer::MetalRenderer()
|
||||
|
||||
// Utility shader library
|
||||
|
||||
// Process the source first
|
||||
std::string processedUtilityShaderSource = utilityShaderSource;
|
||||
processedUtilityShaderSource.pop_back();
|
||||
processedUtilityShaderSource.erase(processedUtilityShaderSource.begin());
|
||||
processedUtilityShaderSource = "#include <metal_stdlib>\nusing namespace metal;\n#define GET_BUFFER_BINDING(index) (27 + index)\n#define GET_TEXTURE_BINDING(index) (29 + index)\n#define GET_SAMPLER_BINDING(index) (14 + index)\n" + processedUtilityShaderSource;
|
||||
|
||||
// Create the library
|
||||
NS::Error* error = nullptr;
|
||||
MTL::Library* utilityLibrary = m_device->newLibrary(ToNSString(processedUtilityShaderSource.c_str()), nullptr, &error);
|
||||
MTL::Library* utilityLibrary = m_device->newLibrary(ToNSString(utilityShaderSource), nullptr, &error);
|
||||
if (error)
|
||||
{
|
||||
debug_printf("failed to create utility library (error: %s)\n", error->localizedDescription()->utf8String());
|
||||
@ -587,8 +580,6 @@ void MetalRenderer::surfaceCopy_copySurfaceWithFormatConversion(LatteTexture* so
|
||||
return;
|
||||
}
|
||||
|
||||
MTL::Texture* textures[] = {srcTextureMtl->GetTexture(), dstTextureMtl->GetTexture()};
|
||||
|
||||
struct CopyParams
|
||||
{
|
||||
uint32 width;
|
||||
@ -605,11 +596,10 @@ void MetalRenderer::surfaceCopy_copySurfaceWithFormatConversion(LatteTexture* so
|
||||
renderCommandEncoder->setRenderPipelineState(m_copyTextureToTexturePipeline->GetRenderPipelineState());
|
||||
m_state.m_encoderState.m_renderPipelineState = m_copyTextureToTexturePipeline->GetRenderPipelineState();
|
||||
|
||||
renderCommandEncoder->setVertexTextures(textures, NS::Range(GET_HELPER_TEXTURE_BINDING(0), 2));
|
||||
m_state.m_encoderState.m_textures[METAL_SHADER_TYPE_VERTEX][GET_HELPER_TEXTURE_BINDING(0)] = {(LatteTextureViewMtl*)textures[0]};
|
||||
m_state.m_encoderState.m_textures[METAL_SHADER_TYPE_VERTEX][GET_HELPER_TEXTURE_BINDING(1)] = {(LatteTextureViewMtl*)textures[1]};
|
||||
SetTexture(renderCommandEncoder, METAL_SHADER_TYPE_VERTEX, srcTextureMtl->GetTexture(), GET_HELPER_TEXTURE_BINDING(0));
|
||||
SetTexture(renderCommandEncoder, METAL_SHADER_TYPE_VERTEX, dstTextureMtl->GetTexture(), GET_HELPER_TEXTURE_BINDING(1));
|
||||
renderCommandEncoder->setVertexBytes(¶ms, sizeof(params), GET_HELPER_BUFFER_BINDING(0));
|
||||
m_state.m_encoderState.m_uniformBufferOffsets[METAL_SHADER_TYPE_VERTEX][GET_HELPER_BUFFER_BINDING(0)] = INVALID_OFFSET;
|
||||
m_state.m_encoderState.m_buffers[METAL_SHADER_TYPE_VERTEX][GET_HELPER_BUFFER_BINDING(0)] = {nullptr};
|
||||
|
||||
renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangle, NS::UInteger(0), NS::UInteger(3));
|
||||
}
|
||||
@ -654,7 +644,6 @@ void MetalRenderer::buffer_bindVertexBuffer(uint32 bufferIndex, uint32 offset, u
|
||||
m_memoryManager->UntrackVertexBuffer(bufferIndex);
|
||||
}
|
||||
|
||||
buffer.needsRebind = true;
|
||||
buffer.offset = offset;
|
||||
buffer.size = size;
|
||||
buffer.restrideInfo = {};
|
||||
@ -664,7 +653,7 @@ void MetalRenderer::buffer_bindVertexBuffer(uint32 bufferIndex, uint32 offset, u
|
||||
|
||||
void MetalRenderer::buffer_bindUniformBuffer(LatteConst::ShaderType shaderType, uint32 bufferIndex, uint32 offset, uint32 size)
|
||||
{
|
||||
m_state.m_uniformBufferOffsets[GetMtlShaderType(shaderType)][bufferIndex] = offset;
|
||||
m_state.m_uniformBufferOffsets[GetMtlGeneralShaderType(shaderType)][bufferIndex] = offset;
|
||||
}
|
||||
|
||||
RendererShader* MetalRenderer::shader_create(RendererShader::ShaderType type, uint64 baseHash, uint64 auxHash, const std::string& source, bool isGameShader, bool isGfxPackShader)
|
||||
@ -759,30 +748,40 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
|
||||
|
||||
auto& encoderState = m_state.m_encoderState;
|
||||
|
||||
// Shaders
|
||||
LatteDecompilerShader* vertexShader = LatteSHRC_GetActiveVertexShader();
|
||||
LatteDecompilerShader* geometryShader = LatteSHRC_GetActiveGeometryShader();
|
||||
LatteDecompilerShader* pixelShader = LatteSHRC_GetActivePixelShader();
|
||||
if (!vertexShader)
|
||||
{
|
||||
debug_printf("no vertex function, skipping draw\n");
|
||||
return;
|
||||
}
|
||||
const auto fetchShader = LatteSHRC_GetActiveFetchShader();
|
||||
|
||||
// Check if we need to end the render pass
|
||||
// Fragment shader is most likely to require a render pass flush, so check for it first
|
||||
bool endRenderPass = CheckIfRenderPassNeedsFlush(pixelShader);
|
||||
if (!endRenderPass)
|
||||
endRenderPass = CheckIfRenderPassNeedsFlush(vertexShader);
|
||||
if (!endRenderPass && geometryShader)
|
||||
endRenderPass = CheckIfRenderPassNeedsFlush(geometryShader);
|
||||
|
||||
if (endRenderPass)
|
||||
EndEncoding();
|
||||
|
||||
// Render pass
|
||||
auto renderCommandEncoder = GetRenderCommandEncoder();
|
||||
|
||||
// Shaders
|
||||
LatteDecompilerShader* vertexShader = LatteSHRC_GetActiveVertexShader();
|
||||
LatteDecompilerShader* geometryShader = LatteSHRC_GetActiveGeometryShader();
|
||||
LatteDecompilerShader* pixelShader = LatteSHRC_GetActivePixelShader();
|
||||
if (!vertexShader || !static_cast<RendererShaderMtl*>(vertexShader->shader)->GetFunction())
|
||||
{
|
||||
debug_printf("no vertex function, skipping draw\n");
|
||||
return;
|
||||
}
|
||||
// TODO: remove this?
|
||||
if (geometryShader)
|
||||
{
|
||||
debug_printf("geometry shader aren't supported on Metal yet, skipping draw\n");
|
||||
return;
|
||||
}
|
||||
const auto fetchShader = LatteSHRC_GetActiveFetchShader();
|
||||
// Primitive type
|
||||
const LattePrimitiveMode primitiveMode = static_cast<LattePrimitiveMode>(LatteGPUState.contextRegister[mmVGT_PRIMITIVE_TYPE]);
|
||||
auto mtlPrimitiveType = GetMtlPrimitiveType(primitiveMode);
|
||||
bool isPrimitiveRect = (primitiveMode == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::RECTS);
|
||||
|
||||
bool usesGeometryShader = (geometryShader != nullptr || isPrimitiveRect);
|
||||
|
||||
// Depth stencil state
|
||||
// TODO: implement this somehow
|
||||
//auto depthControl = LatteGPUState.contextNew.DB_DEPTH_CONTROL;
|
||||
|
||||
// TODO
|
||||
// Disable depth write when there is no depth attachment
|
||||
//if (!m_state.m_lastUsedFBO->depthBuffer.texture)
|
||||
// depthControl.set_Z_WRITE_ENABLE(false);
|
||||
@ -815,11 +814,6 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
|
||||
}
|
||||
}
|
||||
|
||||
// Primitive type
|
||||
const LattePrimitiveMode primitiveMode = static_cast<LattePrimitiveMode>(LatteGPUState.contextRegister[mmVGT_PRIMITIVE_TYPE]);
|
||||
auto mtlPrimitiveType = GetMtlPrimitiveType(primitiveMode);
|
||||
bool isPrimitiveRect = (primitiveMode == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::RECTS);
|
||||
|
||||
// Blend color
|
||||
float* blendColorConstant = (float*)LatteGPUState.contextRegister + Latte::REGADDR::CB_BLEND_RED;
|
||||
renderCommandEncoder->setBlendColor(blendColorConstant[0], blendColorConstant[1], blendColorConstant[2], blendColorConstant[3]);
|
||||
@ -956,23 +950,38 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
|
||||
auto& vertexBufferRange = m_state.m_vertexBuffers[i];
|
||||
if (vertexBufferRange.offset != INVALID_OFFSET)
|
||||
{
|
||||
// Restride
|
||||
uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + i * 7;
|
||||
uint32 bufferStride = (LatteGPUState.contextNew.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;
|
||||
MTL::Buffer* buffer;
|
||||
size_t offset;
|
||||
|
||||
auto restridedBuffer = m_memoryManager->RestrideBufferIfNeeded(i, bufferStride);
|
||||
// Restride
|
||||
if (usesGeometryShader)
|
||||
{
|
||||
// Object shaders don't need restriding, since the attributes are fetched in the shader
|
||||
buffer = m_memoryManager->GetBufferCache();
|
||||
offset = m_state.m_vertexBuffers[i].offset;
|
||||
}
|
||||
else
|
||||
{
|
||||
uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + i * 7;
|
||||
uint32 bufferStride = (LatteGPUState.contextNew.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;
|
||||
|
||||
auto restridedBuffer = m_memoryManager->RestrideBufferIfNeeded(i, bufferStride);
|
||||
|
||||
buffer = restridedBuffer.buffer;
|
||||
offset = restridedBuffer.offset;
|
||||
}
|
||||
|
||||
// Bind
|
||||
if (vertexBufferRange.needsRebind)
|
||||
{
|
||||
renderCommandEncoder->setVertexBuffer(restridedBuffer.buffer, restridedBuffer.offset, GET_MTL_VERTEX_BUFFER_INDEX(i));
|
||||
vertexBufferRange.needsRebind = false;
|
||||
}
|
||||
SetBuffer(renderCommandEncoder, GetMtlShaderType(vertexShader->shaderType, usesGeometryShader), buffer, offset, GET_MTL_VERTEX_BUFFER_INDEX(i));
|
||||
}
|
||||
}
|
||||
|
||||
// Render pipeline state
|
||||
MTL::RenderPipelineState* renderPipelineState = m_pipelineCache->GetPipelineState(fetchShader, vertexShader, pixelShader, m_state.m_lastUsedFBO, LatteGPUState.contextNew);
|
||||
MTL::RenderPipelineState* renderPipelineState;
|
||||
if (usesGeometryShader)
|
||||
renderPipelineState = m_pipelineCache->GetMeshPipelineState(fetchShader, vertexShader, geometryShader, pixelShader, m_state.m_lastUsedFBO, LatteGPUState.contextNew, hostIndexType);
|
||||
else
|
||||
renderPipelineState = m_pipelineCache->GetRenderPipelineState(fetchShader, vertexShader, pixelShader, m_state.m_lastUsedFBO, LatteGPUState.contextNew);
|
||||
if (renderPipelineState != encoderState.m_renderPipelineState)
|
||||
{
|
||||
renderCommandEncoder->setRenderPipelineState(renderPipelineState);
|
||||
@ -984,19 +993,54 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
|
||||
LatteStreamout_PrepareDrawcall(count, instanceCount);
|
||||
|
||||
// Uniform buffers, textures and samplers
|
||||
BindStageResources(renderCommandEncoder, vertexShader);
|
||||
BindStageResources(renderCommandEncoder, pixelShader);
|
||||
BindStageResources(renderCommandEncoder, vertexShader, usesGeometryShader);
|
||||
if (geometryShader)
|
||||
BindStageResources(renderCommandEncoder, geometryShader, usesGeometryShader);
|
||||
BindStageResources(renderCommandEncoder, pixelShader, usesGeometryShader);
|
||||
|
||||
// Draw
|
||||
MTL::Buffer* indexBuffer = nullptr;
|
||||
if (hostIndexType != INDEX_TYPE::NONE)
|
||||
indexBuffer = m_memoryManager->GetTemporaryBufferAllocator().GetBuffer(indexBufferIndex);
|
||||
if (usesGeometryShader)
|
||||
{
|
||||
auto mtlIndexType = GetMtlIndexType(hostIndexType);
|
||||
MTL::Buffer* indexBuffer = m_memoryManager->GetTemporaryBufferAllocator().GetBuffer(indexBufferIndex);
|
||||
renderCommandEncoder->drawIndexedPrimitives(mtlPrimitiveType, hostIndexCount, mtlIndexType, indexBuffer, indexBufferOffset, instanceCount, baseVertex, baseInstance);
|
||||
} else
|
||||
{
|
||||
renderCommandEncoder->drawPrimitives(mtlPrimitiveType, baseVertex, count, instanceCount, baseInstance);
|
||||
if (indexBuffer)
|
||||
SetBuffer(renderCommandEncoder, METAL_SHADER_TYPE_OBJECT, indexBuffer, indexBufferOffset, vertexShader->resourceMapping.indexBufferBinding);
|
||||
|
||||
uint32 verticesPerPrimitive = 0;
|
||||
switch (primitiveMode)
|
||||
{
|
||||
case LattePrimitiveMode::POINTS:
|
||||
verticesPerPrimitive = 1;
|
||||
break;
|
||||
case LattePrimitiveMode::LINES:
|
||||
verticesPerPrimitive = 2;
|
||||
break;
|
||||
case LattePrimitiveMode::TRIANGLES:
|
||||
case LattePrimitiveMode::RECTS:
|
||||
verticesPerPrimitive = 3;
|
||||
break;
|
||||
default:
|
||||
debug_printf("invalid primitive mode %u\n", (uint32)primitiveMode);
|
||||
break;
|
||||
}
|
||||
|
||||
renderCommandEncoder->drawMeshThreadgroups(MTL::Size(count / verticesPerPrimitive, 1, 1), MTL::Size(verticesPerPrimitive, 1, 1), MTL::Size(1, 1, 1));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (indexBuffer)
|
||||
{
|
||||
auto mtlIndexType = GetMtlIndexType(hostIndexType);
|
||||
renderCommandEncoder->drawIndexedPrimitives(mtlPrimitiveType, hostIndexCount, mtlIndexType, indexBuffer, indexBufferOffset, instanceCount, baseVertex, baseInstance);
|
||||
}
|
||||
else
|
||||
{
|
||||
renderCommandEncoder->drawPrimitives(mtlPrimitiveType, baseVertex, count, instanceCount, baseInstance);
|
||||
}
|
||||
}
|
||||
|
||||
m_state.m_isFirstDrawInRenderPass = false;
|
||||
|
||||
LatteStreamout_FinishDrawcall(false);
|
||||
|
||||
@ -1037,6 +1081,83 @@ void MetalRenderer::indexData_uploadIndexMemory(uint32 bufferIndex, uint32 offse
|
||||
buffer->didModifyRange(NS::Range(offset, size));
|
||||
}
|
||||
|
||||
void MetalRenderer::SetBuffer(MTL::RenderCommandEncoder* renderCommandEncoder, MetalShaderType shaderType, MTL::Buffer* buffer, size_t offset, uint32 index)
|
||||
{
|
||||
auto& boundBuffer = m_state.m_encoderState.m_buffers[shaderType][index];
|
||||
if (buffer == boundBuffer.m_buffer && offset == boundBuffer.m_offset)
|
||||
return;
|
||||
|
||||
// TODO: only set the offset if only offset changed
|
||||
|
||||
boundBuffer = {buffer, offset};
|
||||
|
||||
switch (shaderType)
|
||||
{
|
||||
case METAL_SHADER_TYPE_VERTEX:
|
||||
renderCommandEncoder->setVertexBuffer(buffer, offset, index);
|
||||
break;
|
||||
case METAL_SHADER_TYPE_OBJECT:
|
||||
renderCommandEncoder->setObjectBuffer(buffer, offset, index);
|
||||
break;
|
||||
case METAL_SHADER_TYPE_MESH:
|
||||
renderCommandEncoder->setMeshBuffer(buffer, offset, index);
|
||||
break;
|
||||
case METAL_SHADER_TYPE_FRAGMENT:
|
||||
renderCommandEncoder->setFragmentBuffer(buffer, offset, index);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void MetalRenderer::SetTexture(MTL::RenderCommandEncoder* renderCommandEncoder, MetalShaderType shaderType, MTL::Texture* texture, uint32 index)
|
||||
{
|
||||
auto& boundTexture = m_state.m_encoderState.m_textures[shaderType][index];
|
||||
if (texture == boundTexture)
|
||||
return;
|
||||
|
||||
boundTexture = texture;
|
||||
|
||||
switch (shaderType)
|
||||
{
|
||||
case METAL_SHADER_TYPE_VERTEX:
|
||||
renderCommandEncoder->setVertexTexture(texture, index);
|
||||
break;
|
||||
case METAL_SHADER_TYPE_OBJECT:
|
||||
renderCommandEncoder->setObjectTexture(texture, index);
|
||||
break;
|
||||
case METAL_SHADER_TYPE_MESH:
|
||||
renderCommandEncoder->setMeshTexture(texture, index);
|
||||
break;
|
||||
case METAL_SHADER_TYPE_FRAGMENT:
|
||||
renderCommandEncoder->setFragmentTexture(texture, index);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void MetalRenderer::SetSamplerState(MTL::RenderCommandEncoder* renderCommandEncoder, MetalShaderType shaderType, MTL::SamplerState* samplerState, uint32 index)
|
||||
{
|
||||
auto& boundSamplerState = m_state.m_encoderState.m_samplers[shaderType][index];
|
||||
if (samplerState == boundSamplerState)
|
||||
return;
|
||||
|
||||
boundSamplerState = samplerState;
|
||||
|
||||
switch (shaderType)
|
||||
{
|
||||
case METAL_SHADER_TYPE_VERTEX:
|
||||
renderCommandEncoder->setVertexSamplerState(samplerState, index);
|
||||
break;
|
||||
case METAL_SHADER_TYPE_OBJECT:
|
||||
renderCommandEncoder->setObjectSamplerState(samplerState, index);
|
||||
break;
|
||||
case METAL_SHADER_TYPE_MESH:
|
||||
renderCommandEncoder->setMeshSamplerState(samplerState, index);
|
||||
break;
|
||||
case METAL_SHADER_TYPE_FRAGMENT:
|
||||
renderCommandEncoder->setFragmentSamplerState(samplerState, index);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
MTL::CommandBuffer* MetalRenderer::GetCommandBuffer()
|
||||
{
|
||||
bool needsNewCommandBuffer = (m_commandBuffers.empty() || m_commandBuffers.back().m_commited);
|
||||
@ -1086,7 +1207,7 @@ MTL::RenderCommandEncoder* MetalRenderer::GetTemporaryRenderCommandEncoder(MTL::
|
||||
}
|
||||
|
||||
// Some render passes clear the attachments, forceRecreate is supposed to be used in those cases
|
||||
MTL::RenderCommandEncoder* MetalRenderer::GetRenderCommandEncoder(bool forceRecreate, bool rebindStateIfNewEncoder)
|
||||
MTL::RenderCommandEncoder* MetalRenderer::GetRenderCommandEncoder(bool forceRecreate)
|
||||
{
|
||||
// Check if we need to begin a new render pass
|
||||
if (m_commandEncoder)
|
||||
@ -1130,6 +1251,7 @@ MTL::RenderCommandEncoder* MetalRenderer::GetRenderCommandEncoder(bool forceRecr
|
||||
|
||||
// Update state
|
||||
m_state.m_lastUsedFBO = m_state.m_activeFBO;
|
||||
m_state.m_isFirstDrawInRenderPass = true;
|
||||
|
||||
auto renderCommandEncoder = commandBuffer->renderCommandEncoder(m_state.m_activeFBO->GetRenderPassDescriptor());
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
@ -1140,12 +1262,6 @@ MTL::RenderCommandEncoder* MetalRenderer::GetRenderCommandEncoder(bool forceRecr
|
||||
|
||||
ResetEncoderState();
|
||||
|
||||
if (rebindStateIfNewEncoder)
|
||||
{
|
||||
// Rebind all the render state
|
||||
RebindRenderState(renderCommandEncoder);
|
||||
}
|
||||
|
||||
return renderCommandEncoder;
|
||||
}
|
||||
|
||||
@ -1259,9 +1375,56 @@ bool MetalRenderer::AcquireNextDrawable(bool mainWindow)
|
||||
return true;
|
||||
}
|
||||
|
||||
void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandEncoder, LatteDecompilerShader* shader)
|
||||
bool MetalRenderer::CheckIfRenderPassNeedsFlush(LatteDecompilerShader* shader)
|
||||
{
|
||||
auto mtlShaderType = GetMtlShaderType(shader->shaderType);
|
||||
sint32 textureCount = shader->resourceMapping.getTextureCount();
|
||||
for (int i = 0; i < textureCount; ++i)
|
||||
{
|
||||
const auto relative_textureUnit = shader->resourceMapping.getTextureUnitFromBindingPoint(i);
|
||||
auto hostTextureUnit = relative_textureUnit;
|
||||
auto textureDim = shader->textureUnitDim[relative_textureUnit];
|
||||
auto texUnitRegIndex = hostTextureUnit * 7;
|
||||
switch (shader->shaderType)
|
||||
{
|
||||
case LatteConst::ShaderType::Vertex:
|
||||
hostTextureUnit += LATTE_CEMU_VS_TEX_UNIT_BASE;
|
||||
texUnitRegIndex += Latte::REGADDR::SQ_TEX_RESOURCE_WORD0_N_VS;
|
||||
break;
|
||||
case LatteConst::ShaderType::Pixel:
|
||||
hostTextureUnit += LATTE_CEMU_PS_TEX_UNIT_BASE;
|
||||
texUnitRegIndex += Latte::REGADDR::SQ_TEX_RESOURCE_WORD0_N_PS;
|
||||
break;
|
||||
case LatteConst::ShaderType::Geometry:
|
||||
hostTextureUnit += LATTE_CEMU_GS_TEX_UNIT_BASE;
|
||||
texUnitRegIndex += Latte::REGADDR::SQ_TEX_RESOURCE_WORD0_N_GS;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE;
|
||||
}
|
||||
|
||||
auto textureView = m_state.m_textures[hostTextureUnit];
|
||||
if (!textureView)
|
||||
continue;
|
||||
|
||||
LatteTexture* baseTexture = textureView->baseTexture;
|
||||
if (!m_state.m_isFirstDrawInRenderPass)
|
||||
{
|
||||
// If the texture is also used in the current render pass, we need to end the render pass to "flush" the texture
|
||||
for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++)
|
||||
{
|
||||
auto colorTarget = m_state.m_activeFBO->colorBuffer[i].texture;
|
||||
if (colorTarget && colorTarget->baseTexture == baseTexture)
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandEncoder, LatteDecompilerShader* shader, bool usesGeometryShader)
|
||||
{
|
||||
auto mtlShaderType = GetMtlShaderType(shader->shaderType, usesGeometryShader);
|
||||
|
||||
sint32 textureCount = shader->resourceMapping.getTextureCount();
|
||||
for (int i = 0; i < textureCount; ++i)
|
||||
@ -1288,8 +1451,8 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE
|
||||
UNREACHABLE;
|
||||
}
|
||||
|
||||
// TODO: uncomment
|
||||
uint32 binding = shader->resourceMapping.getTextureBaseBindingPoint() + i;//shader->resourceMapping.textureUnitToBindingPoint[hostTextureUnit];
|
||||
// TODO: correct?
|
||||
uint32 binding = shader->resourceMapping.getTextureBaseBindingPoint() + i;
|
||||
if (binding >= MAX_MTL_TEXTURES)
|
||||
{
|
||||
debug_printf("invalid texture binding %u\n", binding);
|
||||
@ -1301,88 +1464,21 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE
|
||||
{
|
||||
// TODO: don't bind if already bound
|
||||
if (textureDim == Latte::E_DIM::DIM_1D)
|
||||
{
|
||||
switch (shader->shaderType)
|
||||
{
|
||||
case LatteConst::ShaderType::Vertex:
|
||||
{
|
||||
renderCommandEncoder->setVertexTexture(m_nullTexture1D, binding);
|
||||
renderCommandEncoder->setVertexSamplerState(m_nearestSampler, binding);
|
||||
break;
|
||||
}
|
||||
case LatteConst::ShaderType::Pixel:
|
||||
{
|
||||
renderCommandEncoder->setFragmentTexture(m_nullTexture1D, binding);
|
||||
renderCommandEncoder->setVertexSamplerState(m_nearestSampler, binding);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE;
|
||||
}
|
||||
}
|
||||
SetTexture(renderCommandEncoder, mtlShaderType, m_nullTexture1D, binding);
|
||||
else
|
||||
{
|
||||
switch (shader->shaderType)
|
||||
{
|
||||
case LatteConst::ShaderType::Vertex:
|
||||
{
|
||||
renderCommandEncoder->setVertexTexture(m_nullTexture2D, binding);
|
||||
renderCommandEncoder->setVertexSamplerState(m_nearestSampler, binding);
|
||||
break;
|
||||
}
|
||||
case LatteConst::ShaderType::Pixel:
|
||||
{
|
||||
renderCommandEncoder->setFragmentTexture(m_nullTexture2D, binding);
|
||||
renderCommandEncoder->setVertexSamplerState(m_nearestSampler, binding);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE;
|
||||
}
|
||||
}
|
||||
SetTexture(renderCommandEncoder, mtlShaderType, m_nullTexture2D, binding);
|
||||
SetSamplerState(renderCommandEncoder, mtlShaderType, m_nearestSampler, binding);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (textureDim == Latte::E_DIM::DIM_1D && (textureView->dim != Latte::E_DIM::DIM_1D))
|
||||
{
|
||||
switch (shader->shaderType)
|
||||
{
|
||||
case LatteConst::ShaderType::Vertex:
|
||||
{
|
||||
renderCommandEncoder->setVertexTexture(m_nullTexture1D, binding);
|
||||
renderCommandEncoder->setVertexSamplerState(m_nearestSampler, binding);
|
||||
break;
|
||||
}
|
||||
case LatteConst::ShaderType::Pixel:
|
||||
{
|
||||
renderCommandEncoder->setFragmentTexture(m_nullTexture1D, binding);
|
||||
renderCommandEncoder->setVertexSamplerState(m_nearestSampler, binding);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE;
|
||||
}
|
||||
SetTexture(renderCommandEncoder, mtlShaderType, m_nullTexture1D, binding);
|
||||
continue;
|
||||
}
|
||||
else if (textureDim == Latte::E_DIM::DIM_2D && (textureView->dim != Latte::E_DIM::DIM_2D && textureView->dim != Latte::E_DIM::DIM_2D_MSAA))
|
||||
{
|
||||
switch (shader->shaderType)
|
||||
{
|
||||
case LatteConst::ShaderType::Vertex:
|
||||
{
|
||||
renderCommandEncoder->setVertexTexture(m_nullTexture2D, binding);
|
||||
renderCommandEncoder->setVertexSamplerState(m_nearestSampler, binding);
|
||||
break;
|
||||
}
|
||||
case LatteConst::ShaderType::Pixel:
|
||||
{
|
||||
renderCommandEncoder->setFragmentTexture(m_nullTexture2D, binding);
|
||||
renderCommandEncoder->setVertexSamplerState(m_nearestSampler, binding);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE;
|
||||
}
|
||||
SetTexture(renderCommandEncoder, mtlShaderType, m_nullTexture2D, binding);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -1399,53 +1495,13 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE
|
||||
{
|
||||
sampler = m_nearestSampler;
|
||||
}
|
||||
|
||||
auto& boundSampler = m_state.m_encoderState.m_samplers[mtlShaderType][binding];
|
||||
if (sampler != boundSampler)
|
||||
{
|
||||
boundSampler = sampler;
|
||||
|
||||
switch (shader->shaderType)
|
||||
{
|
||||
case LatteConst::ShaderType::Vertex:
|
||||
{
|
||||
renderCommandEncoder->setVertexSamplerState(sampler, binding);
|
||||
break;
|
||||
}
|
||||
case LatteConst::ShaderType::Pixel:
|
||||
{
|
||||
renderCommandEncoder->setFragmentSamplerState(sampler, binding);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE;
|
||||
}
|
||||
}
|
||||
SetSamplerState(renderCommandEncoder, mtlShaderType, sampler, binding);
|
||||
|
||||
// get texture register word 0
|
||||
uint32 word4 = LatteGPUState.contextRegister[texUnitRegIndex + 4];
|
||||
auto& boundTexture = m_state.m_encoderState.m_textures[mtlShaderType][binding];
|
||||
if (textureView == boundTexture.m_textureView && word4 == boundTexture.m_word4)
|
||||
continue;
|
||||
|
||||
boundTexture = {textureView, word4};
|
||||
|
||||
MTL::Texture* mtlTexture = textureView->GetSwizzledView(word4);
|
||||
switch (shader->shaderType)
|
||||
{
|
||||
case LatteConst::ShaderType::Vertex:
|
||||
{
|
||||
renderCommandEncoder->setVertexTexture(mtlTexture, binding);
|
||||
break;
|
||||
}
|
||||
case LatteConst::ShaderType::Pixel:
|
||||
{
|
||||
renderCommandEncoder->setFragmentTexture(mtlTexture, binding);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE;
|
||||
}
|
||||
SetTexture(renderCommandEncoder, mtlShaderType, mtlTexture, binding);
|
||||
}
|
||||
|
||||
// Support buffer
|
||||
@ -1537,23 +1593,7 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE
|
||||
if (!HasUnifiedMemory())
|
||||
buffer->didModifyRange(NS::Range(supportBuffer.offset, size));
|
||||
|
||||
switch (shader->shaderType)
|
||||
{
|
||||
case LatteConst::ShaderType::Vertex:
|
||||
{
|
||||
renderCommandEncoder->setVertexBuffer(buffer, supportBuffer.offset, MTL_SUPPORT_BUFFER_BINDING);
|
||||
//renderCommandEncoder->setVertexBytes(supportBufferData, sizeof(supportBufferData), MTL_SUPPORT_BUFFER_BINDING);
|
||||
break;
|
||||
}
|
||||
case LatteConst::ShaderType::Pixel:
|
||||
{
|
||||
renderCommandEncoder->setFragmentBuffer(buffer, supportBuffer.offset, MTL_SUPPORT_BUFFER_BINDING);
|
||||
//renderCommandEncoder->setFragmentBytes(supportBufferData, sizeof(supportBufferData), MTL_SUPPORT_BUFFER_BINDING);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE;
|
||||
}
|
||||
SetBuffer(renderCommandEncoder, mtlShaderType, buffer, supportBuffer.offset, shader->resourceMapping.uniformVarsBufferBindingPoint);
|
||||
}
|
||||
|
||||
// Uniform buffers
|
||||
@ -1568,65 +1608,18 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE
|
||||
continue;
|
||||
}
|
||||
|
||||
size_t offset = m_state.m_uniformBufferOffsets[mtlShaderType][i];
|
||||
size_t offset = m_state.m_uniformBufferOffsets[GetMtlGeneralShaderType(shader->shaderType)][i];
|
||||
if (offset == INVALID_OFFSET)
|
||||
continue;
|
||||
|
||||
auto& boundOffset = m_state.m_encoderState.m_uniformBufferOffsets[mtlShaderType][binding];
|
||||
if (offset == boundOffset)
|
||||
continue;
|
||||
|
||||
boundOffset = offset;
|
||||
|
||||
// TODO: only set the offset if already bound
|
||||
switch (shader->shaderType)
|
||||
{
|
||||
case LatteConst::ShaderType::Vertex:
|
||||
{
|
||||
renderCommandEncoder->setVertexBuffer(m_memoryManager->GetBufferCache(), offset, binding);
|
||||
break;
|
||||
}
|
||||
case LatteConst::ShaderType::Pixel:
|
||||
{
|
||||
renderCommandEncoder->setFragmentBuffer(m_memoryManager->GetBufferCache(), offset, binding);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE;
|
||||
}
|
||||
SetBuffer(renderCommandEncoder, mtlShaderType, m_memoryManager->GetBufferCache(), offset, binding);
|
||||
}
|
||||
}
|
||||
|
||||
// Storage buffer
|
||||
if (shader->resourceMapping.tfStorageBindingPoint >= 0)
|
||||
{
|
||||
switch (shader->shaderType)
|
||||
{
|
||||
case LatteConst::ShaderType::Vertex:
|
||||
{
|
||||
renderCommandEncoder->setVertexBuffer(m_xfbRingBuffer, 0, shader->resourceMapping.tfStorageBindingPoint);
|
||||
break;
|
||||
}
|
||||
case LatteConst::ShaderType::Pixel:
|
||||
{
|
||||
renderCommandEncoder->setFragmentBuffer(m_xfbRingBuffer, 0, shader->resourceMapping.tfStorageBindingPoint);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE;
|
||||
}
|
||||
m_state.m_encoderState.m_uniformBufferOffsets[mtlShaderType][shader->resourceMapping.tfStorageBindingPoint] = INVALID_OFFSET;
|
||||
}
|
||||
}
|
||||
|
||||
void MetalRenderer::RebindRenderState(MTL::RenderCommandEncoder* renderCommandEncoder)
|
||||
{
|
||||
// Vertex buffers
|
||||
for (uint8 i = 0; i < MAX_MTL_BUFFERS; i++)
|
||||
{
|
||||
auto& vertexBufferRange = m_state.m_vertexBuffers[i];
|
||||
if (vertexBufferRange.offset != INVALID_OFFSET)
|
||||
vertexBufferRange.needsRebind = true;
|
||||
SetBuffer(renderCommandEncoder, mtlShaderType, m_xfbRingBuffer, 0, shader->resourceMapping.tfStorageBindingPoint);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -8,6 +8,7 @@
|
||||
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
|
||||
#include "Metal/MTLResource.hpp"
|
||||
#include "Metal/MTLSampler.hpp"
|
||||
|
||||
struct MetalBufferAllocation
|
||||
{
|
||||
@ -31,27 +32,57 @@ struct MetalRestrideInfo
|
||||
|
||||
struct MetalBoundBuffer
|
||||
{
|
||||
bool needsRebind = false;
|
||||
size_t offset = INVALID_OFFSET;
|
||||
size_t size = 0;
|
||||
// Memory manager will write restride info to this variable
|
||||
MetalRestrideInfo restrideInfo;
|
||||
};
|
||||
|
||||
enum MetalGeneralShaderType
|
||||
{
|
||||
METAL_GENERAL_SHADER_TYPE_VERTEX,
|
||||
METAL_GENERAL_SHADER_TYPE_GEOMETRY,
|
||||
METAL_GENERAL_SHADER_TYPE_FRAGMENT,
|
||||
|
||||
METAL_GENERAL_SHADER_TYPE_TOTAL
|
||||
};
|
||||
|
||||
inline MetalGeneralShaderType GetMtlGeneralShaderType(LatteConst::ShaderType shaderType)
|
||||
{
|
||||
switch (shaderType)
|
||||
{
|
||||
case LatteConst::ShaderType::Vertex:
|
||||
return METAL_GENERAL_SHADER_TYPE_VERTEX;
|
||||
case LatteConst::ShaderType::Geometry:
|
||||
return METAL_GENERAL_SHADER_TYPE_GEOMETRY;
|
||||
case LatteConst::ShaderType::Pixel:
|
||||
return METAL_GENERAL_SHADER_TYPE_FRAGMENT;
|
||||
default:
|
||||
return METAL_GENERAL_SHADER_TYPE_TOTAL;
|
||||
}
|
||||
}
|
||||
|
||||
enum MetalShaderType
|
||||
{
|
||||
METAL_SHADER_TYPE_VERTEX,
|
||||
METAL_SHADER_TYPE_OBJECT,
|
||||
METAL_SHADER_TYPE_MESH,
|
||||
METAL_SHADER_TYPE_FRAGMENT,
|
||||
|
||||
METAL_SHADER_TYPE_TOTAL
|
||||
};
|
||||
|
||||
inline MetalShaderType GetMtlShaderType(LatteConst::ShaderType shaderType)
|
||||
inline MetalShaderType GetMtlShaderType(LatteConst::ShaderType shaderType, bool usesGeometryShader)
|
||||
{
|
||||
switch (shaderType)
|
||||
{
|
||||
case LatteConst::ShaderType::Vertex:
|
||||
return METAL_SHADER_TYPE_VERTEX;
|
||||
if (usesGeometryShader)
|
||||
return METAL_SHADER_TYPE_OBJECT;
|
||||
else
|
||||
return METAL_SHADER_TYPE_VERTEX;
|
||||
case LatteConst::ShaderType::Geometry:
|
||||
return METAL_SHADER_TYPE_MESH;
|
||||
case LatteConst::ShaderType::Pixel:
|
||||
return METAL_SHADER_TYPE_FRAGMENT;
|
||||
default:
|
||||
@ -73,11 +104,11 @@ struct MetalEncoderState
|
||||
uint32 m_depthSlope = 0;
|
||||
uint32 m_depthClamp = 0;
|
||||
struct {
|
||||
class LatteTextureViewMtl* m_textureView = nullptr;
|
||||
uint32 m_word4 = INVALID_UINT32;
|
||||
} m_textures[METAL_SHADER_TYPE_TOTAL][MAX_MTL_TEXTURES];
|
||||
MTL::Buffer* m_buffer;
|
||||
size_t m_offset;
|
||||
} m_buffers[METAL_SHADER_TYPE_TOTAL][MAX_MTL_BUFFERS];
|
||||
MTL::Texture* m_textures[METAL_SHADER_TYPE_TOTAL][MAX_MTL_TEXTURES];
|
||||
MTL::SamplerState* m_samplers[METAL_SHADER_TYPE_TOTAL][MAX_MTL_SAMPLERS];
|
||||
size_t m_uniformBufferOffsets[METAL_SHADER_TYPE_TOTAL][MAX_MTL_BUFFERS];
|
||||
};
|
||||
|
||||
struct MetalStreamoutState
|
||||
@ -97,6 +128,7 @@ struct MetalState
|
||||
bool m_usesSRGB = false;
|
||||
|
||||
bool m_skipDrawSequence = false;
|
||||
bool m_isFirstDrawInRenderPass = true;
|
||||
|
||||
class CachedFBOMtl* m_activeFBO = nullptr;
|
||||
// If the FBO changes, but it's the same FBO as the last one with some omitted attachments, this FBO doesn't change'
|
||||
@ -105,7 +137,7 @@ struct MetalState
|
||||
MetalBoundBuffer m_vertexBuffers[MAX_MTL_BUFFERS] = {{}};
|
||||
// TODO: find out what is the max number of bound textures on the Wii U
|
||||
class LatteTextureViewMtl* m_textures[64] = {nullptr};
|
||||
size_t m_uniformBufferOffsets[METAL_SHADER_TYPE_TOTAL][MAX_MTL_BUFFERS];
|
||||
size_t m_uniformBufferOffsets[METAL_GENERAL_SHADER_TYPE_TOTAL][MAX_MTL_BUFFERS];
|
||||
|
||||
MTL::Viewport m_viewport;
|
||||
MTL::ScissorRect m_scissor;
|
||||
@ -329,12 +361,12 @@ public:
|
||||
|
||||
for (uint32 i = 0; i < METAL_SHADER_TYPE_TOTAL; i++)
|
||||
{
|
||||
for (uint32 j = 0; j < MAX_MTL_BUFFERS; j++)
|
||||
m_state.m_encoderState.m_buffers[i][j] = {nullptr};
|
||||
for (uint32 j = 0; j < MAX_MTL_TEXTURES; j++)
|
||||
m_state.m_encoderState.m_textures[i][j] = {nullptr};
|
||||
m_state.m_encoderState.m_textures[i][j] = nullptr;
|
||||
for (uint32 j = 0; j < MAX_MTL_SAMPLERS; j++)
|
||||
m_state.m_encoderState.m_samplers[i][j] = nullptr;
|
||||
for (uint32 j = 0; j < MAX_MTL_BUFFERS; j++)
|
||||
m_state.m_encoderState.m_uniformBufferOffsets[i][j] = INVALID_OFFSET;
|
||||
}
|
||||
}
|
||||
|
||||
@ -343,11 +375,15 @@ public:
|
||||
return m_state.m_encoderState;
|
||||
}
|
||||
|
||||
void SetBuffer(MTL::RenderCommandEncoder* renderCommandEncoder, MetalShaderType shaderType, MTL::Buffer* buffer, size_t offset, uint32 index);
|
||||
void SetTexture(MTL::RenderCommandEncoder* renderCommandEncoder, MetalShaderType shaderType, MTL::Texture* texture, uint32 index);
|
||||
void SetSamplerState(MTL::RenderCommandEncoder* renderCommandEncoder, MetalShaderType shaderType, MTL::SamplerState* samplerState, uint32 index);
|
||||
|
||||
MTL::CommandBuffer* GetCommandBuffer();
|
||||
bool CommandBufferCompleted(MTL::CommandBuffer* commandBuffer);
|
||||
void WaitForCommandBufferCompletion(MTL::CommandBuffer* commandBuffer);
|
||||
MTL::RenderCommandEncoder* GetTemporaryRenderCommandEncoder(MTL::RenderPassDescriptor* renderPassDescriptor);
|
||||
MTL::RenderCommandEncoder* GetRenderCommandEncoder(bool forceRecreate = false, bool rebindStateIfNewEncoder = true);
|
||||
MTL::RenderCommandEncoder* GetRenderCommandEncoder(bool forceRecreate = false);
|
||||
MTL::ComputeCommandEncoder* GetComputeCommandEncoder();
|
||||
MTL::BlitCommandEncoder* GetBlitCommandEncoder();
|
||||
void EndEncoding();
|
||||
@ -355,8 +391,8 @@ public:
|
||||
|
||||
bool AcquireNextDrawable(bool mainWindow);
|
||||
|
||||
void BindStageResources(MTL::RenderCommandEncoder* renderCommandEncoder, LatteDecompilerShader* shader);
|
||||
void RebindRenderState(MTL::RenderCommandEncoder* renderCommandEncoder);
|
||||
bool CheckIfRenderPassNeedsFlush(LatteDecompilerShader* shader);
|
||||
void BindStageResources(MTL::RenderCommandEncoder* renderCommandEncoder, LatteDecompilerShader* shader, bool usesGeometryShader);
|
||||
|
||||
void ClearColorTextureInternal(MTL::Texture* mtlTexture, sint32 sliceIndex, sint32 mipIndex, float r, float g, float b, float a);
|
||||
|
||||
|
@ -7,6 +7,8 @@
|
||||
|
||||
#include "Cemu/Logging/CemuLogging.h"
|
||||
#include "Common/precompiled.h"
|
||||
#include "HW/Latte/Core/FetchShader.h"
|
||||
#include "HW/Latte/ISA/RegDefines.h"
|
||||
|
||||
extern std::atomic_int g_compiled_shaders_total;
|
||||
extern std::atomic_int g_compiled_shaders_async;
|
||||
@ -14,14 +16,14 @@ extern std::atomic_int g_compiled_shaders_async;
|
||||
RendererShaderMtl::RendererShaderMtl(MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode)
|
||||
: RendererShader(type, baseHash, auxHash, isGameShader, isGfxPackShader), m_mtlr{mtlRenderer}
|
||||
{
|
||||
if (m_type == ShaderType::kFragment)
|
||||
if (type == ShaderType::kGeometry)
|
||||
{
|
||||
// Fragment functions are compiled just-in-time
|
||||
m_mslCode = mslCode;
|
||||
Compile(mslCode);
|
||||
}
|
||||
else
|
||||
{
|
||||
Compile(mslCode);
|
||||
// TODO: don't compile just-in-time
|
||||
m_mslCode = mslCode;
|
||||
}
|
||||
|
||||
// Count shader compilation
|
||||
@ -34,13 +36,149 @@ RendererShaderMtl::~RendererShaderMtl()
|
||||
m_function->release();
|
||||
}
|
||||
|
||||
void RendererShaderMtl::CompileObjectFunction(const LatteContextRegister& lcr, const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, Renderer::INDEX_TYPE hostIndexType)
|
||||
{
|
||||
cemu_assert_debug(m_type == ShaderType::kVertex);
|
||||
|
||||
std::string fullCode;
|
||||
|
||||
// Vertex buffers
|
||||
std::string vertexBufferDefinitions = "#define VERTEX_BUFFER_DEFINITIONS ";
|
||||
std::string vertexBuffers = "#define VERTEX_BUFFERS ";
|
||||
std::string inputFetchDefinition = "VertexIn fetchInput(thread uint& vid VERTEX_BUFFER_DEFINITIONS) {\n";
|
||||
|
||||
// Index buffer
|
||||
if (hostIndexType != Renderer::INDEX_TYPE::NONE)
|
||||
{
|
||||
vertexBufferDefinitions += ", device ";
|
||||
switch (hostIndexType)
|
||||
{
|
||||
case Renderer::INDEX_TYPE::U16:
|
||||
vertexBufferDefinitions += "ushort";
|
||||
break;
|
||||
case Renderer::INDEX_TYPE::U32:
|
||||
vertexBufferDefinitions += "uint";
|
||||
break;
|
||||
default:
|
||||
cemu_assert_suspicious();
|
||||
break;
|
||||
}
|
||||
|
||||
vertexBufferDefinitions += fmt::format("* indexBuffer [[buffer({})]]", vertexShader->resourceMapping.indexBufferBinding);
|
||||
vertexBuffers += ", indexBuffer";
|
||||
inputFetchDefinition += "vid = indexBuffer[vid];\n";
|
||||
}
|
||||
|
||||
inputFetchDefinition += "VertexIn in;\n";
|
||||
for (auto& bufferGroup : fetchShader->bufferGroups)
|
||||
{
|
||||
std::optional<LatteConst::VertexFetchType2> fetchType;
|
||||
|
||||
uint32 bufferIndex = bufferGroup.attributeBufferIndex;
|
||||
uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7;
|
||||
uint32 bufferStride = (lcr.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;
|
||||
|
||||
for (sint32 j = 0; j < bufferGroup.attribCount; ++j)
|
||||
{
|
||||
auto& attr = bufferGroup.attrib[j];
|
||||
|
||||
uint32 semanticId = vertexShader->resourceMapping.attributeMapping[attr.semanticId];
|
||||
if (semanticId == (uint32)-1)
|
||||
continue; // attribute not used?
|
||||
|
||||
std::string formatName;
|
||||
uint8 componentCount = 0;
|
||||
switch (GetMtlVertexFormat(attr.format))
|
||||
{
|
||||
case MTL::VertexFormatUChar:
|
||||
formatName = "uchar";
|
||||
componentCount = 1;
|
||||
break;
|
||||
case MTL::VertexFormatUChar2:
|
||||
formatName = "uchar2";
|
||||
componentCount = 2;
|
||||
break;
|
||||
case MTL::VertexFormatUChar3:
|
||||
formatName = "uchar3";
|
||||
componentCount = 3;
|
||||
break;
|
||||
case MTL::VertexFormatUChar4:
|
||||
formatName = "uchar4";
|
||||
componentCount = 4;
|
||||
break;
|
||||
case MTL::VertexFormatUShort:
|
||||
formatName = "ushort";
|
||||
componentCount = 1;
|
||||
break;
|
||||
case MTL::VertexFormatUShort2:
|
||||
formatName = "ushort2";
|
||||
componentCount = 2;
|
||||
break;
|
||||
case MTL::VertexFormatUShort3:
|
||||
formatName = "ushort3";
|
||||
componentCount = 3;
|
||||
break;
|
||||
case MTL::VertexFormatUShort4:
|
||||
formatName = "ushort4";
|
||||
componentCount = 4;
|
||||
break;
|
||||
case MTL::VertexFormatUInt:
|
||||
formatName = "uint";
|
||||
componentCount = 1;
|
||||
break;
|
||||
case MTL::VertexFormatUInt2:
|
||||
formatName = "uint2";
|
||||
componentCount = 2;
|
||||
break;
|
||||
case MTL::VertexFormatUInt3:
|
||||
formatName = "uint3";
|
||||
componentCount = 3;
|
||||
break;
|
||||
case MTL::VertexFormatUInt4:
|
||||
formatName = "uint4";
|
||||
componentCount = 4;
|
||||
break;
|
||||
}
|
||||
|
||||
// Fetch the attribute
|
||||
inputFetchDefinition += fmt::format("in.ATTRIBUTE_NAME{} = ", semanticId);
|
||||
inputFetchDefinition += fmt::format("uint4(*(device {}*)", formatName);
|
||||
inputFetchDefinition += fmt::format("(vertexBuffer{}", attr.attributeBufferIndex);
|
||||
inputFetchDefinition += fmt::format(" + vid * {} + {})", bufferStride, attr.offset);
|
||||
for (uint8 i = 0; i < (4 - componentCount); i++)
|
||||
inputFetchDefinition += ", 0";
|
||||
inputFetchDefinition += ");\n";
|
||||
|
||||
if (fetchType.has_value())
|
||||
cemu_assert_debug(fetchType == attr.fetchType);
|
||||
else
|
||||
fetchType = attr.fetchType;
|
||||
|
||||
if (attr.fetchType == LatteConst::INSTANCE_DATA)
|
||||
{
|
||||
cemu_assert_debug(attr.aluDivisor == 1); // other divisor not yet supported
|
||||
}
|
||||
}
|
||||
|
||||
vertexBufferDefinitions += fmt::format(", device uchar* vertexBuffer{} [[buffer({})]]", bufferIndex, GET_MTL_VERTEX_BUFFER_INDEX(bufferIndex));
|
||||
vertexBuffers += fmt::format(", vertexBuffer{}", bufferIndex);
|
||||
}
|
||||
|
||||
inputFetchDefinition += "return in;\n";
|
||||
inputFetchDefinition += "}\n";
|
||||
|
||||
fullCode += vertexBufferDefinitions + "\n";
|
||||
fullCode += vertexBuffers + "\n";
|
||||
fullCode += m_mslCode;
|
||||
fullCode += inputFetchDefinition;
|
||||
|
||||
Compile(fullCode);
|
||||
}
|
||||
|
||||
void RendererShaderMtl::CompileFragmentFunction(CachedFBOMtl* activeFBO)
|
||||
{
|
||||
cemu_assert_debug(m_type == ShaderType::kFragment);
|
||||
|
||||
if (m_function)
|
||||
m_function->release();
|
||||
|
||||
std::string fullCode;
|
||||
|
||||
// Define color attachment data types
|
||||
@ -77,6 +215,9 @@ void RendererShaderMtl::CompileFragmentFunction(CachedFBOMtl* activeFBO)
|
||||
|
||||
void RendererShaderMtl::Compile(const std::string& mslCode)
|
||||
{
|
||||
if (m_function)
|
||||
m_function->release();
|
||||
|
||||
NS::Error* error = nullptr;
|
||||
MTL::Library* library = m_mtlr->GetDevice()->newLibrary(ToNSString(mslCode), nullptr, &error);
|
||||
if (error)
|
||||
|
@ -21,6 +21,12 @@ public:
|
||||
RendererShaderMtl(class MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode);
|
||||
virtual ~RendererShaderMtl();
|
||||
|
||||
void CompileVertexFunction()
|
||||
{
|
||||
Compile(m_mslCode);
|
||||
}
|
||||
|
||||
void CompileObjectFunction(const LatteContextRegister& lcr, const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, Renderer::INDEX_TYPE hostIndexType);
|
||||
void CompileFragmentFunction(CachedFBOMtl* activeFBO);
|
||||
|
||||
MTL::Function* GetFunction() const
|
||||
|
@ -3,7 +3,14 @@
|
||||
#define __STRINGIFY(x) #x
|
||||
#define _STRINGIFY(x) __STRINGIFY(x)
|
||||
|
||||
constexpr const char* utilityShaderSource = _STRINGIFY((
|
||||
constexpr const char* utilityShaderSource = R"V0G0N(
|
||||
#include <metal_stdlib>
|
||||
using namespace metal;
|
||||
|
||||
#define GET_BUFFER_BINDING(index) (28 + index)
|
||||
#define GET_TEXTURE_BINDING(index) (29 + index)
|
||||
#define GET_SAMPLER_BINDING(index) (14 + index)\n
|
||||
|
||||
constant float2 positions[] = {float2(-1.0, -3.0), float2(-1.0, 1.0), float2(3.0, 1.0)};
|
||||
|
||||
struct VertexOut {
|
||||
@ -48,4 +55,4 @@ vertex void vertexRestrideBuffer(uint vid [[vertex_id]], device uint8_t* src [[b
|
||||
dst[vid * params.newStride + i] = src[vid * params.oldStride + i];
|
||||
}
|
||||
}
|
||||
));
|
||||
)V0G0N";
|
||||
|
Loading…
Reference in New Issue
Block a user