diff --git a/src/Cafe/CMakeLists.txt b/src/Cafe/CMakeLists.txt index fa3c6ff9..2a3dda47 100644 --- a/src/Cafe/CMakeLists.txt +++ b/src/Cafe/CMakeLists.txt @@ -545,6 +545,8 @@ if(ENABLE_METAL) HW/Latte/Renderer/Metal/LatteTextureMtl.h HW/Latte/Renderer/Metal/LatteTextureViewMtl.cpp HW/Latte/Renderer/Metal/LatteTextureViewMtl.h + HW/Latte/Renderer/Metal/RendererShaderMtl.cpp + HW/Latte/Renderer/Metal/RendererShaderMtl.h ) #target_link_libraries(CemuCafe PRIVATE diff --git a/src/Cafe/HW/Latte/Core/LatteShaderCache.cpp b/src/Cafe/HW/Latte/Core/LatteShaderCache.cpp index 88418650..98d970f6 100644 --- a/src/Cafe/HW/Latte/Core/LatteShaderCache.cpp +++ b/src/Cafe/HW/Latte/Core/LatteShaderCache.cpp @@ -11,6 +11,7 @@ #include "Cafe/HW/Latte/Renderer/Renderer.h" #include "Cafe/HW/Latte/Renderer/OpenGL/RendererShaderGL.h" #include "Cafe/HW/Latte/Renderer/Vulkan/RendererShaderVk.h" +#include "Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h" #include "Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineStableCache.h" #include @@ -161,6 +162,8 @@ void LatteShaderCache_finish() RendererShaderVk::ShaderCacheLoading_end(); else if (g_renderer->GetType() == RendererAPI::OpenGL) RendererShaderGL::ShaderCacheLoading_end(); + else if (g_renderer->GetType() == RendererAPI::Metal) + RendererShaderMtl::ShaderCacheLoading_end(); } uint32 LatteShaderCache_getShaderCacheExtraVersion(uint64 titleId) @@ -243,6 +246,8 @@ void LatteShaderCache_Load() RendererShaderVk::ShaderCacheLoading_begin(cacheTitleId); else if (g_renderer->GetType() == RendererAPI::OpenGL) RendererShaderGL::ShaderCacheLoading_begin(cacheTitleId); + else if (g_renderer->GetType() == RendererAPI::Metal) + RendererShaderMtl::ShaderCacheLoading_begin(cacheTitleId); // get cache file name const auto pathGeneric = ActiveSettings::GetCachePath("shaderCache/transferable/{:016x}_shaders.bin", cacheTitleId); const auto pathGenericPre1_25_0 = ActiveSettings::GetCachePath("shaderCache/transferable/{:016x}.bin", cacheTitleId); // before 1.25.0 @@ -774,6 +779,8 @@ void LatteShaderCache_Close() RendererShaderVk::ShaderCacheLoading_Close(); else if (g_renderer->GetType() == RendererAPI::OpenGL) RendererShaderGL::ShaderCacheLoading_Close(); + else if (g_renderer->GetType() == RendererAPI::Metal) + RendererShaderMtl::ShaderCacheLoading_Close(); // if Vulkan then also close pipeline cache if (g_renderer->GetType() == RendererAPI::Vulkan) diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.cpp index 5f0d7fb2..c2051090 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.cpp @@ -12,8 +12,6 @@ #include "Cafe/HW/Latte/Renderer/Renderer.h" #include "Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h" #include "util/helpers/helpers.h" -// TODO: remove this include -#include "util/helpers/StringBuf.h" // parse instruction and if valid append it to instructionList bool LatteDecompiler_ParseCFInstruction(LatteDecompilerShaderContext* shaderContext, uint32 cfIndex, uint32 cfWord0, uint32 cfWord1, bool* endOfProgram, std::vector& instructionList) @@ -1070,14 +1068,9 @@ void _LatteDecompiler_Process(LatteDecompilerShaderContext* shaderContext, uint8 if (shaderContext->shader->hasError == false) { if (g_renderer->GetType() == RendererAPI::Metal) - { LatteDecompiler_emitMSLShader(shaderContext, shaderContext->shader); - // HACK - std::cout << shaderContext->shaderSource->c_str() << std::endl; - } else - { + else LatteDecompiler_emitGLSLShader(shaderContext, shaderContext->shader); - } } LatteDecompiler_cleanup(shaderContext); // fast access diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp index 943f1840..2ffcfa0f 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp @@ -3878,9 +3878,19 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, LatteDecompiler::emitHeader(shaderContext); // helper functions LatteDecompiler_emitHelperFunctions(shaderContext, src); + switch (shader->shaderType) + { + case LatteConst::ShaderType::Vertex: + src->add("VertexOut"); + break; + case LatteConst::ShaderType::Pixel: + src->add("FragmentOut"); + break; + } // start of main - src->add("void main()" _CRLF); - src->add("{" _CRLF); + src->add(" main0("); + LatteDecompiler::emitInputs(shaderContext); + src->add(") {" _CRLF); // variable definition if (shaderContext->typeTracker.useArrayGPRs == false) { @@ -3987,7 +3997,7 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, cemu_assert_debug((shaderContext->output->streamoutBufferStride[i]&3) == 0); if (shader->shaderType == LatteConst::ShaderType::Vertex) // vertex shader - src->addFmt("int sbBase{} = uf_streamoutBufferBase{}/4 + (gl_VertexID + uf_verticesPerInstance * gl_InstanceID)*{};" _CRLF, i, i, shaderContext->output->streamoutBufferStride[i] / 4); + src->addFmt("int sbBase{} = uf_streamoutBufferBase{}/4 + (vid + uf_verticesPerInstance * iid)*{};" _CRLF, i, i, shaderContext->output->streamoutBufferStride[i] / 4); else // geometry shader { uint32 gsOutPrimType = shaderContext->contextRegisters[mmVGT_GS_OUT_PRIM_TYPE]; @@ -4007,9 +4017,9 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, if( (shaderContext->analyzer.gprUseMask[0/8]&(1<<(0%8))) != 0 ) { if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) - src->addFmt("{} = int4(gl_VertexID, 0, 0, gl_InstanceID);" _CRLF, _getRegisterVarName(shaderContext, 0)); + src->addFmt("{} = int4(vid, 0, 0, iid);" _CRLF, _getRegisterVarName(shaderContext, 0)); else if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT) - src->addFmt("{} = as_type(float4(gl_VertexID, 0, 0, gl_InstanceID));" _CRLF, _getRegisterVarName(shaderContext, 0)); // TODO: is this correct? + src->addFmt("{} = as_type(float4(vid, 0, 0, iid));" _CRLF, _getRegisterVarName(shaderContext, 0)); // TODO: is this correct? else cemu_assert_unimplemented(); } diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp index fade4775..39798dc5 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp @@ -2,24 +2,28 @@ namespace LatteDecompiler { - static void _emitUniformVariables(LatteDecompilerShaderContext* decompilerContext, LatteDecompilerOutputUniformOffsets& uniformOffsets) + static void _emitUniformVariables(LatteDecompilerShaderContext* decompilerContext) { - LatteDecompilerShaderResourceMapping& resourceMapping = decompilerContext->output->resourceMappingVK; + auto src = decompilerContext->shaderSource; + + LatteDecompilerShaderResourceMapping& resourceMapping = decompilerContext->output->resourceMappingGL; + auto& uniformOffsets = decompilerContext->output->uniformOffsetsVK; + + src->add("struct DefualtUniforms {" _CRLF); sint32 uniformCurrentOffset = 0; auto shader = decompilerContext->shader; auto shaderType = decompilerContext->shader->shaderType; - auto shaderSrc = decompilerContext->shaderSource; if (decompilerContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_REMAPPED) { // uniform registers or buffers are accessed statically with predictable offsets // this allows us to remap the used entries into a more compact array if (shaderType == LatteConst::ShaderType::Vertex) - shaderSrc->addFmt("uniform ivec4 uf_remappedVS[{}];" _CRLF, (sint32)shader->list_remappedUniformEntries.size()); + src->addFmt("ivec4 uf_remappedVS[{}];" _CRLF, (sint32)shader->list_remappedUniformEntries.size()); else if (shaderType == LatteConst::ShaderType::Pixel) - shaderSrc->addFmt("uniform ivec4 uf_remappedPS[{}];" _CRLF, (sint32)shader->list_remappedUniformEntries.size()); + src->addFmt("ivec4 uf_remappedPS[{}];" _CRLF, (sint32)shader->list_remappedUniformEntries.size()); else if (shaderType == LatteConst::ShaderType::Geometry) - shaderSrc->addFmt("uniform ivec4 uf_remappedGS[{}];" _CRLF, (sint32)shader->list_remappedUniformEntries.size()); + src->addFmt("ivec4 uf_remappedGS[{}];" _CRLF, (sint32)shader->list_remappedUniformEntries.size()); else debugBreakpoint(); uniformOffsets.offset_remapped = uniformCurrentOffset; @@ -30,11 +34,11 @@ namespace LatteDecompiler uint32 cfileSize = decompilerContext->analyzer.uniformRegisterAccessTracker.DetermineSize(decompilerContext->shaderBaseHash, 256); // full or partial uniform register file has to be present if (shaderType == LatteConst::ShaderType::Vertex) - shaderSrc->addFmt("uniform ivec4 uf_uniformRegisterVS[{}];" _CRLF, cfileSize); + src->addFmt("ivec4 uf_uniformRegisterVS[{}];" _CRLF, cfileSize); else if (shaderType == LatteConst::ShaderType::Pixel) - shaderSrc->addFmt("uniform ivec4 uf_uniformRegisterPS[{}];" _CRLF, cfileSize); + src->addFmt("ivec4 uf_uniformRegisterPS[{}];" _CRLF, cfileSize); else if (shaderType == LatteConst::ShaderType::Geometry) - shaderSrc->addFmt("uniform ivec4 uf_uniformRegisterGS[{}];" _CRLF, cfileSize); + src->addFmt("ivec4 uf_uniformRegisterGS[{}];" _CRLF, cfileSize); uniformOffsets.offset_uniformRegister = uniformCurrentOffset; uniformOffsets.count_uniformRegister = cfileSize; uniformCurrentOffset += 16 * cfileSize; @@ -49,7 +53,7 @@ namespace LatteDecompiler { // aka GX2 special state 0 uniformCurrentOffset = (uniformCurrentOffset + 7)&~7; - shaderSrc->add("uniform vec2 uf_windowSpaceToClipSpaceTransform;" _CRLF); + src->add("float2 uf_windowSpaceToClipSpaceTransform;" _CRLF); uniformOffsets.offset_windowSpaceToClipSpaceTransform = uniformCurrentOffset; uniformCurrentOffset += 8; } @@ -57,7 +61,7 @@ namespace LatteDecompiler if (decompilerContext->shaderType == LatteConst::ShaderType::Pixel && alphaTestEnable) { uniformCurrentOffset = (uniformCurrentOffset + 3)&~3; - shaderSrc->add("uniform float uf_alphaTestRef;" _CRLF); + src->add("float uf_alphaTestRef;" _CRLF); uniformOffsets.offset_alphaTestRef = uniformCurrentOffset; uniformCurrentOffset += 4; } @@ -67,7 +71,7 @@ namespace LatteDecompiler decompilerContext->shaderType == LatteConst::ShaderType::Geometry) { uniformCurrentOffset = (uniformCurrentOffset + 3)&~3; - shaderSrc->add("uniform float uf_pointSize;" _CRLF); + src->add("float uf_pointSize;" _CRLF); uniformOffsets.offset_pointSize = uniformCurrentOffset; uniformCurrentOffset += 4; } @@ -76,7 +80,7 @@ namespace LatteDecompiler if (shader->shaderType == LatteConst::ShaderType::Pixel) { uniformCurrentOffset = (uniformCurrentOffset + 7)&~7; - shaderSrc->add("uniform vec2 uf_fragCoordScale;" _CRLF); + src->add("vec2 uf_fragCoordScale;" _CRLF); uniformOffsets.offset_fragCoordScale = uniformCurrentOffset; uniformCurrentOffset += 8; } @@ -86,7 +90,7 @@ namespace LatteDecompiler if (decompilerContext->analyzer.texUnitUsesTexelCoordinates.test(t) == false) continue; uniformCurrentOffset = (uniformCurrentOffset + 7) & ~7; - shaderSrc->addFmt("uniform vec2 uf_tex{}Scale;" _CRLF, t); + src->addFmt("vec2 uf_tex{}Scale;" _CRLF, t); uniformOffsets.offset_texScale[t] = uniformCurrentOffset; uniformCurrentOffset += 8; } @@ -95,20 +99,22 @@ namespace LatteDecompiler (shader->shaderType == LatteConst::ShaderType::Vertex && decompilerContext->options->usesGeometryShader == false) || (shader->shaderType == LatteConst::ShaderType::Geometry) ) { - shaderSrc->add("uniform int uf_verticesPerInstance;" _CRLF); + src->add("int uf_verticesPerInstance;" _CRLF); uniformOffsets.offset_verticesPerInstance = uniformCurrentOffset; uniformCurrentOffset += 4; for (uint32 i = 0; i < LATTE_NUM_STREAMOUT_BUFFER; i++) { if (decompilerContext->output->streamoutBufferWriteMask[i]) { - shaderSrc->addFmt("uniform int uf_streamoutBufferBase{};" _CRLF, i); + src->addFmt("int uf_streamoutBufferBase{};" _CRLF, i); uniformOffsets.offset_streamoutBufferBase[i] = uniformCurrentOffset; uniformCurrentOffset += 4; } } } + src->add("}" _CRLF _CRLF); + uniformOffsets.offset_endOfBlock = uniformCurrentOffset; } @@ -126,13 +132,11 @@ namespace LatteDecompiler cemu_assert_debug(decompilerContext->output->resourceMappingGL.uniformBuffersBindingPoint[i] >= 0); cemu_assert_debug(decompilerContext->output->resourceMappingVK.uniformBuffersBindingPoint[i] >= 0); - shaderSrc->addFmt("UNIFORM_BUFFER_LAYOUT({}, {}, {}) ", (sint32)decompilerContext->output->resourceMappingGL.uniformBuffersBindingPoint[i], (sint32)decompilerContext->output->resourceMappingVK.setIndex, (sint32)decompilerContext->output->resourceMappingVK.uniformBuffersBindingPoint[i]); + //shaderSrc->addFmt("UNIFORM_BUFFER_LAYOUT({}, {}, {}) ", (sint32)decompilerContext->output->resourceMappingGL.uniformBuffersBindingPoint[i], (sint32)decompilerContext->output->resourceMappingVK.setIndex, (sint32)decompilerContext->output->resourceMappingVK.uniformBuffersBindingPoint[i]); - shaderSrc->addFmt("uniform ubuff{}" _CRLF, i); - shaderSrc->add("{" _CRLF); - shaderSrc->addFmt("float4 ubuff{}[{}];" _CRLF, i, decompilerContext->analyzer.uniformBufferAccessTracker[i].DetermineSize(decompilerContext->shaderBaseHash, LATTE_GLSL_DYNAMIC_UNIFORM_BLOCK_SIZE)); + shaderSrc->addFmt("struct UBuff{} {" _CRLF, i); + shaderSrc->addFmt("float4 d{}[{}];" _CRLF, i, decompilerContext->analyzer.uniformBufferAccessTracker[i].DetermineSize(decompilerContext->shaderBaseHash, LATTE_GLSL_DYNAMIC_UNIFORM_BLOCK_SIZE)); shaderSrc->add("};" _CRLF _CRLF); - shaderSrc->add(_CRLF); } } else if (decompilerContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_REMAPPED) @@ -153,6 +157,161 @@ namespace LatteDecompiler } } + static void _emitAttributes(LatteDecompilerShaderContext* decompilerContext) + { + auto src = decompilerContext->shaderSource; + + if (decompilerContext->shader->shaderType == LatteConst::ShaderType::Vertex) + { + src->add("struct VertexIn {" _CRLF); + // attribute inputs + for (uint32 i = 0; i < LATTE_NUM_MAX_ATTRIBUTE_LOCATIONS; i++) + { + if (decompilerContext->analyzer.inputAttributSemanticMask[i]) + { + cemu_assert_debug(decompilerContext->output->resourceMappingGL.attributeMapping[i] >= 0); + cemu_assert_debug(decompilerContext->output->resourceMappingVK.attributeMapping[i] >= 0); + cemu_assert_debug(decompilerContext->output->resourceMappingGL.attributeMapping[i] == decompilerContext->output->resourceMappingVK.attributeMapping[i]); + + src->addFmt("ATTR_LAYOUT({}, {}) in uvec4 attrDataSem{};" _CRLF, (sint32)decompilerContext->output->resourceMappingVK.setIndex, (sint32)decompilerContext->output->resourceMappingVK.attributeMapping[i], i); + } + } + src->add("};" _CRLF _CRLF); + } + } + + static void _emitVSOutputs(LatteDecompilerShaderContext* shaderContext) + { + auto* src = shaderContext->shaderSource; + + src->add("struct VertexOut {" _CRLF); + + src->add("float4 position [[position]];" _CRLF); + if (shaderContext->analyzer.outputPointSize) + src->add("float pointSize[[point_size]];" _CRLF); + + LatteShaderPSInputTable* psInputTable = LatteSHRC_GetPSInputTable(); + auto parameterMask = shaderContext->shader->outputParameterMask; + for (uint32 i = 0; i < 32; i++) + { + if ((parameterMask&(1 << i)) == 0) + continue; + uint32 vsSemanticId = _getVertexShaderOutParamSemanticId(shaderContext->contextRegisters, i); + if (vsSemanticId > LATTE_ANALYZER_IMPORT_INDEX_PARAM_MAX) + continue; + // get import based on semanticId + sint32 psInputIndex = -1; + for (sint32 f = 0; f < psInputTable->count; f++) + { + if (psInputTable->import[f].semanticId == vsSemanticId) + { + psInputIndex = f; + break; + } + } + if (psInputIndex == -1) + continue; // no ps input + + src->addFmt("float4 passParameterSem{}", psInputTable->import[psInputIndex].semanticId); + src->addFmt(" [[user(locn{})]]", psInputIndex); + if (psInputTable->import[psInputIndex].isFlat) + src->add(" [[flat]]"); + if (psInputTable->import[psInputIndex].isNoPerspective) + src->add(" [[center_no_perspective]]"); + src->addFmt(";" _CRLF); + } + + src->add("};" _CRLF _CRLF); + } + + static void _emitPSInputs(LatteDecompilerShaderContext* shaderContext) + { + auto* src = shaderContext->shaderSource; + + src->add("struct FragmentIn {" _CRLF); + + LatteShaderPSInputTable* psInputTable = LatteSHRC_GetPSInputTable(); + for (sint32 i = 0; i < psInputTable->count; i++) + { + if (psInputTable->import[i].semanticId > LATTE_ANALYZER_IMPORT_INDEX_PARAM_MAX) + continue; + src->addFmt("float4 passParameterSem{}", psInputTable->import[i].semanticId); + src->addFmt(" [[user(locn{})]]", i); + if (psInputTable->import[i].isFlat) + src->add(" [[flat]]"); + if (psInputTable->import[i].isNoPerspective) + src->add(" [[center_no_perspective]]"); + src->add(";" _CRLF); + } + + src->add("};" _CRLF _CRLF); + } + + static void _emitInputsAndOutputs(LatteDecompilerShaderContext* decompilerContext) + { + auto src = decompilerContext->shaderSource; + + if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex) + { + _emitAttributes(decompilerContext); + _emitVSOutputs(decompilerContext); + + // TODO: transform feedback + } + else if (decompilerContext->shaderType == LatteConst::ShaderType::Pixel) + { + _emitPSInputs(decompilerContext); + + src->add("struct FragmentOut {" _CRLF); + + // generate pixel outputs for pixel shader + for (uint32 i = 0; i < LATTE_NUM_COLOR_TARGET; i++) + { + if ((decompilerContext->shader->pixelColorOutputMask&(1 << i)) != 0) + { + src->addFmt("float4 passPixelColor{} [[color({})]];" _CRLF, i, i); + } + } + + src->add("};" _CRLF _CRLF); + } + } + + static void emitHeader(LatteDecompilerShaderContext* decompilerContext) + { + const bool dump_shaders_enabled = ActiveSettings::DumpShadersEnabled(); + if(dump_shaders_enabled) + decompilerContext->shaderSource->add("// start of shader inputs/outputs, predetermined by Cemu. Do not touch" _CRLF); + // uniform variables + _emitUniformVariables(decompilerContext); + // uniform buffers + _emitUniformBuffers(decompilerContext); + // inputs and outputs + _emitInputsAndOutputs(decompilerContext); + + if (dump_shaders_enabled) + decompilerContext->shaderSource->add("// end of shader inputs/outputs" _CRLF); + } + + static void _emitUniformBufferDefinitions(LatteDecompilerShaderContext* decompilerContext) + { + auto src = decompilerContext->shaderSource; + // uniform buffer definition + if (decompilerContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CBANK) + { + for (uint32 i = 0; i < LATTE_NUM_MAX_UNIFORM_BUFFERS; i++) + { + if (!decompilerContext->analyzer.uniformBufferAccessTracker[i].HasAccess()) + continue; + + cemu_assert_debug(decompilerContext->output->resourceMappingGL.uniformBuffersBindingPoint[i] >= 0); + cemu_assert_debug(decompilerContext->output->resourceMappingVK.uniformBuffersBindingPoint[i] >= 0); + + src->addFmt("constant UBuff{}& ubuff{} [[buffer({})]]" _CRLF, i, i, (sint32)decompilerContext->output->resourceMappingGL.uniformBuffersBindingPoint[i]); + } + } + } + static void _emitTextureDefinitions(LatteDecompilerShaderContext* shaderContext) { auto src = shaderContext->shaderSource; @@ -162,6 +321,8 @@ namespace LatteDecompiler if (!shaderContext->output->textureUnitMask[i]) continue; + src->add(", "); + if (shaderContext->shader->textureIsIntegerFormat[i]) { // integer samplers @@ -187,240 +348,29 @@ namespace LatteDecompiler cemu_assert_unimplemented(); } - src->addFmt(" tex{} [[texture({})]], ", i, shaderContext->output->resourceMappingGL.textureUnitToBindingPoint[i]); - src->addFmt("sampler samplr{} [[sampler({})]], ", i, shaderContext->output->resourceMappingGL.textureUnitToBindingPoint[i]); + src->addFmt(" tex{} [[texture({})]]", i, shaderContext->output->resourceMappingGL.textureUnitToBindingPoint[i]); + src->addFmt(", sampler samplr{} [[sampler({})]]", i, shaderContext->output->resourceMappingGL.textureUnitToBindingPoint[i]); } } - static void _emitAttributes(LatteDecompilerShaderContext* decompilerContext) + static void emitInputs(LatteDecompilerShaderContext* decompilerContext) { - auto shaderSrc = decompilerContext->shaderSource; - if (decompilerContext->shader->shaderType == LatteConst::ShaderType::Vertex) + auto src = decompilerContext->shaderSource; + + switch (decompilerContext->shaderType) { - // attribute inputs - for (uint32 i = 0; i < LATTE_NUM_MAX_ATTRIBUTE_LOCATIONS; i++) - { - if (decompilerContext->analyzer.inputAttributSemanticMask[i]) - { - cemu_assert_debug(decompilerContext->output->resourceMappingGL.attributeMapping[i] >= 0); - cemu_assert_debug(decompilerContext->output->resourceMappingVK.attributeMapping[i] >= 0); - cemu_assert_debug(decompilerContext->output->resourceMappingGL.attributeMapping[i] == decompilerContext->output->resourceMappingVK.attributeMapping[i]); - - shaderSrc->addFmt("ATTR_LAYOUT({}, {}) in uvec4 attrDataSem{};" _CRLF, (sint32)decompilerContext->output->resourceMappingVK.setIndex, (sint32)decompilerContext->output->resourceMappingVK.attributeMapping[i], i); - } - } + case LatteConst::ShaderType::Vertex: + src->add("VertexIn"); + break; + case LatteConst::ShaderType::Pixel: + src->add("FragmentIn"); + break; } - } - static void _emitVSExports(LatteDecompilerShaderContext* shaderContext) - { - auto* src = shaderContext->shaderSource; - LatteShaderPSInputTable* psInputTable = LatteSHRC_GetPSInputTable(); - auto parameterMask = shaderContext->shader->outputParameterMask; - for (uint32 i = 0; i < 32; i++) - { - if ((parameterMask&(1 << i)) == 0) - continue; - uint32 vsSemanticId = _getVertexShaderOutParamSemanticId(shaderContext->contextRegisters, i); - if (vsSemanticId > LATTE_ANALYZER_IMPORT_INDEX_PARAM_MAX) - continue; - // get import based on semanticId - sint32 psInputIndex = -1; - for (sint32 f = 0; f < psInputTable->count; f++) - { - if (psInputTable->import[f].semanticId == vsSemanticId) - { - psInputIndex = f; - break; - } - } - if (psInputIndex == -1) - continue; // no ps input - - src->addFmt("layout(location = {}) ", psInputIndex); - if (psInputTable->import[psInputIndex].isFlat) - src->add("flat "); - if (psInputTable->import[psInputIndex].isNoPerspective) - src->add("noperspective "); - src->add("out"); - src->addFmt(" vec4 passParameterSem{};" _CRLF, psInputTable->import[psInputIndex].semanticId); - } - } - - static void _emitPSImports(LatteDecompilerShaderContext* shaderContext) - { - auto* src = shaderContext->shaderSource; - LatteShaderPSInputTable* psInputTable = LatteSHRC_GetPSInputTable(); - for (sint32 i = 0; i < psInputTable->count; i++) - { - if (psInputTable->import[i].semanticId > LATTE_ANALYZER_IMPORT_INDEX_PARAM_MAX) - continue; - src->addFmt("layout(location = {}) ", i); - if (psInputTable->import[i].isFlat) - src->add("flat "); - if (psInputTable->import[i].isNoPerspective) - src->add("noperspective "); - src->add("in"); - src->addFmt(" vec4 passParameterSem{};" _CRLF, psInputTable->import[i].semanticId); - } - } - - static void _emitMisc(LatteDecompilerShaderContext* decompilerContext) - { - auto src = decompilerContext->shaderSource; - // per-vertex output (VS or GS) - if ((decompilerContext->shaderType == LatteConst::ShaderType::Vertex && !decompilerContext->options->usesGeometryShader) || - (decompilerContext->shaderType == LatteConst::ShaderType::Geometry)) - { - src->add("out gl_PerVertex" _CRLF); - src->add("{" _CRLF); - src->add(" vec4 gl_Position;" _CRLF); - if (decompilerContext->analyzer.outputPointSize) - src->add(" float gl_PointSize;" _CRLF); - src->add("};" _CRLF); - } - // varyings (variables passed from vertex to pixel shader, only if geometry stage is disabled - if (decompilerContext->options->usesGeometryShader == false) - { - if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex) - { - _emitVSExports(decompilerContext); - } - else if (decompilerContext->shaderType == LatteConst::ShaderType::Pixel) - { - _emitPSImports(decompilerContext); - } - } - else - { - if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex || decompilerContext->shaderType == LatteConst::ShaderType::Geometry) - { - // parameters shared between vertex shader and geometry shader - src->add("V2G_LAYOUT "); - - if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex) - src->add("out Vertex" _CRLF); - else if (decompilerContext->shaderType == LatteConst::ShaderType::Geometry) - src->add("in Vertex" _CRLF); - src->add("{" _CRLF); - uint32 ringParameterCountVS2GS = 0; - if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex) - { - ringParameterCountVS2GS = decompilerContext->shader->ringParameterCount; - } - else - { - ringParameterCountVS2GS = decompilerContext->shader->ringParameterCountFromPrevStage; - } - for (uint32 f = 0; f < ringParameterCountVS2GS; f++) - src->addFmt(" ivec4 passV2GParameter{};" _CRLF, f); - if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex) - src->add("}v2g;" _CRLF); - else if (decompilerContext->shaderType == LatteConst::ShaderType::Geometry) - src->add("}v2g[];" _CRLF); - } - if (decompilerContext->shaderType == LatteConst::ShaderType::Geometry) - { - // parameters shared between geometry and pixel shader - uint32 ringItemSize = decompilerContext->contextRegisters[mmSQ_GSVS_RING_ITEMSIZE] & 0x7FFF; - if ((ringItemSize & 0xF) != 0) - debugBreakpoint(); - if (((decompilerContext->contextRegisters[mmSQ_GSVS_RING_ITEMSIZE] & 0x7FFF) & 0xF) != 0) - debugBreakpoint(); - - for (sint32 p = 0; p < decompilerContext->parsedGSCopyShader->numParam; p++) - { - if (decompilerContext->parsedGSCopyShader->paramMapping[p].exportType != 2) - continue; - src->addFmt("layout(location = {}) out vec4 passG2PParameter{};" _CRLF, decompilerContext->parsedGSCopyShader->paramMapping[p].exportParam & 0x7F, (sint32)decompilerContext->parsedGSCopyShader->paramMapping[p].exportParam); - } - } - else if (decompilerContext->shaderType == LatteConst::ShaderType::Pixel) - { - // pixel shader with geometry shader - LatteShaderPSInputTable* psInputTable = LatteSHRC_GetPSInputTable(); - for (sint32 i = 0; i < psInputTable->count; i++) - { - if (psInputTable->import[i].semanticId > LATTE_ANALYZER_IMPORT_INDEX_PARAM_MAX) - continue; - uint32 location = psInputTable->import[i].semanticId & 0x7F; // todo - the range above 128 has special meaning? - - src->addFmt("layout(location = {}) ", location); - if (psInputTable->import[i].isFlat) - src->add("flat "); - if (psInputTable->import[i].isNoPerspective) - src->add("noperspective "); - if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex) - src->add("out"); - else if (decompilerContext->shaderType == LatteConst::ShaderType::Pixel) - src->add("in"); - else - debugBreakpoint(); - - src->addFmt(" vec4 passG2PParameter{};" _CRLF, (sint32)location); - } - } - } - // output defines - if (decompilerContext->shaderType == LatteConst::ShaderType::Pixel) - { - // generate pixel outputs for pixel shader - for (uint32 i = 0; i < LATTE_NUM_COLOR_TARGET; i++) - { - if ((decompilerContext->shader->pixelColorOutputMask&(1 << i)) != 0) - { - src->addFmt("layout(location = {}) out vec4 passPixelColor{};" _CRLF, i, i); - } - } - } - // streamout buffer (transform feedback) - if ((decompilerContext->shaderType == LatteConst::ShaderType::Vertex || decompilerContext->shaderType == LatteConst::ShaderType::Geometry) && decompilerContext->analyzer.hasStreamoutEnable) - { - if (decompilerContext->options->useTFViaSSBO) - { - if (decompilerContext->analyzer.useSSBOForStreamout && decompilerContext->analyzer.hasStreamoutWrite) - { - src->addFmt("layout(set = {}, binding = {}) buffer StreamoutBuffer" _CRLF, decompilerContext->output->resourceMappingVK.setIndex, decompilerContext->output->resourceMappingVK.getTFStorageBufferBindingPoint()); - src->add("{" _CRLF); - src->add("int sb_buffer[];" _CRLF); - src->add("};" _CRLF); - } - } - else - { - sint32 locationOffset = 0; // glslang wants a location for xfb outputs - for (uint32 i = 0; i < LATTE_NUM_STREAMOUT_BUFFER; i++) - { - if (!decompilerContext->output->streamoutBufferWriteMask[i]) - continue; - uint32 bufferStride = decompilerContext->output->streamoutBufferStride[i]; - src->addFmt("XFB_BLOCK_LAYOUT({}, {}, {}) out XfbBlock{} " _CRLF, i, bufferStride, locationOffset, i); - src->add("{" _CRLF); - src->addFmt("layout(xfb_buffer = {}, xfb_offset = 0) int sb{}[{}];" _CRLF, i, i, decompilerContext->output->streamoutBufferStride[i] / 4); - src->add("};" _CRLF); - locationOffset += (decompilerContext->output->streamoutBufferStride[i] / 4); - } - } - } - } - - static void emitHeader(LatteDecompilerShaderContext* decompilerContext) - { - const bool dump_shaders_enabled = ActiveSettings::DumpShadersEnabled(); - if(dump_shaders_enabled) - decompilerContext->shaderSource->add("// start of shader inputs/outputs, predetermined by Cemu. Do not touch" _CRLF); - // uniform variables - _emitUniformVariables(decompilerContext, decompilerContext->output->uniformOffsetsVK); + src->add(" in [[stage_in]], DefaultVariables defaultVars [[buffer(29)]]"); // uniform buffers - _emitUniformBuffers(decompilerContext); + _emitUniformBufferDefinitions(decompilerContext); // textures _emitTextureDefinitions(decompilerContext); - // attributes - _emitAttributes(decompilerContext); - // misc stuff - _emitMisc(decompilerContext); - - if (dump_shaders_enabled) - decompilerContext->shaderSource->add("// end of shader inputs/outputs" _CRLF); } } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index b1710e8a..fa3b03f2 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -1,6 +1,7 @@ #include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalLayer.h" #include "Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.h" +#include "Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h" #include "HW/Latte/Core/LatteShader.h" @@ -235,10 +236,9 @@ void MetalRenderer::buffer_bindUniformBuffer(LatteConst::ShaderType shaderType, cemuLog_logDebug(LogType::Force, "not implemented"); } -RendererShader* MetalRenderer::shader_create(RendererShader::ShaderType type, uint64 baseHash, uint64 auxHash, const std::string& source, bool compileAsync, bool isGfxPackSource) +RendererShader* MetalRenderer::shader_create(RendererShader::ShaderType type, uint64 baseHash, uint64 auxHash, const std::string& source, bool isGameShader, bool isGfxPackShader) { - cemuLog_logDebug(LogType::Force, "not implemented"); - + //return new RendererShaderMtl(this, type, baseHash, auxHash, isGameShader, isGfxPackShader, source); return nullptr; } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp new file mode 100644 index 00000000..50033d5c --- /dev/null +++ b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp @@ -0,0 +1,38 @@ +#include "Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h" +#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" +#include "Cemu/Logging/CemuLogging.h" + +RendererShaderMtl::RendererShaderMtl(MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode) + : RendererShader(type, baseHash, auxHash, isGameShader, isGfxPackShader) +{ + NS::Error* error = nullptr; + MTL::Library* library = mtlRenderer->GetDevice()->newLibrary(NS::String::string(mslCode.c_str(), NS::ASCIIStringEncoding), nullptr, &error); + if (error) + { + cemuLog_log(LogType::MetalLogging, "Failed to create library (error: {}) -> source:\n{}", error->localizedDescription()->utf8String(), mslCode.c_str()); + error->release(); + return; + } + m_function = library->newFunction(NS::String::string("main0", NS::ASCIIStringEncoding)); +} + +RendererShaderMtl::~RendererShaderMtl() +{ + if (m_function) + m_function->release(); +} + +void RendererShaderMtl::ShaderCacheLoading_begin(uint64 cacheTitleId) +{ + cemuLog_log(LogType::MetalLogging, "RendererShaderMtl::ShaderCacheLoading_begin not implemented!"); +} + +void RendererShaderMtl::ShaderCacheLoading_end() +{ + cemuLog_log(LogType::MetalLogging, "RendererShaderMtl::ShaderCacheLoading_end not implemented!"); +} + +void RendererShaderMtl::ShaderCacheLoading_Close() +{ + cemuLog_log(LogType::MetalLogging, "RendererShaderMtl::ShaderCacheLoading_Close not implemented!"); +} diff --git a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h new file mode 100644 index 00000000..030bbff0 --- /dev/null +++ b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h @@ -0,0 +1,44 @@ +#pragma once + +#include "Cafe/HW/Latte/Renderer/RendererShader.h" +#include "HW/Latte/Renderer/Metal/MetalRenderer.h" +#include "util/helpers/ConcurrentQueue.h" + +#include + +class RendererShaderMtl : public RendererShader +{ + //enum class COMPILATION_STATE : uint32 + //{ + // NONE, + // QUEUED, + // COMPILING, + // DONE + //}; + +public: + static void ShaderCacheLoading_begin(uint64 cacheTitleId); + static void ShaderCacheLoading_end(); + static void ShaderCacheLoading_Close(); + + RendererShaderMtl(class MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode); + virtual ~RendererShaderMtl(); + + MTL::Function* GetFunction() const + { + return m_function; + } + + // TODO: implement these + sint32 GetUniformLocation(const char* name) override { return 0; } + void SetUniform2fv(sint32 location, void* data, sint32 count) override {} + void SetUniform4iv(sint32 location, void* data, sint32 count) override {} + + // TODO: implement this + void PreponeCompilation(bool isRenderThread) override {} + bool IsCompiled() override { return true; } + bool WaitForCompiled() override { return true; } + +private: + MTL::Function* m_function = nullptr; +};