From e2ec602c437b69277e923d4ed2890cbd969bd431 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Wed, 7 Aug 2024 13:25:38 +0200 Subject: [PATCH] specialize fragment shader output types & fix: shader errors --- src/Cafe/HW/Latte/Core/LatteBufferData.cpp | 15 +- .../LatteDecompilerEmitMSL.cpp | 13 +- .../LatteDecompilerEmitMSLHeader.hpp | 6 +- .../Renderer/Metal/LatteTextureViewMtl.cpp | 8 +- .../HW/Latte/Renderer/Metal/LatteToMtl.cpp | 149 +++++++++--------- src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h | 14 +- .../HW/Latte/Renderer/Metal/MetalCommon.h | 7 +- .../Renderer/Metal/MetalMemoryManager.cpp | 4 +- .../Renderer/Metal/MetalPipelineCache.cpp | 15 +- .../Renderer/Metal/RendererShaderMtl.cpp | 86 +++++++--- .../Latte/Renderer/Metal/RendererShaderMtl.h | 9 ++ 11 files changed, 205 insertions(+), 121 deletions(-) diff --git a/src/Cafe/HW/Latte/Core/LatteBufferData.cpp b/src/Cafe/HW/Latte/Core/LatteBufferData.cpp index 85d4cdf7..7620e6a7 100644 --- a/src/Cafe/HW/Latte/Core/LatteBufferData.cpp +++ b/src/Cafe/HW/Latte/Core/LatteBufferData.cpp @@ -62,7 +62,7 @@ void rectGenerate4thVertex(uint32be* output, uint32be* input0, uint32be* input1, // order of rectangle vertices is // v0 v1 - // v2 v3 + // v2 v3 for (sint32 f = 0; f < vectorLen*4; f++) output[f] = _swapEndianU32(output[f]); @@ -199,11 +199,14 @@ bool LatteBufferCache_Sync(uint32 minIndex, uint32 maxIndex, uint32 baseInstance #if BOOST_OS_MACOS if(bufferStride % 4 != 0) { - if (VulkanRenderer* vkRenderer = VulkanRenderer::GetInstance()) + if (g_renderer->GetType() == RendererAPI::Vulkan) { - auto fixedBuffer = vkRenderer->buffer_genStrideWorkaroundVertexBuffer(bufferAddress, fixedBufferSize, bufferStride); - vkRenderer->buffer_bindVertexStrideWorkaroundBuffer(fixedBuffer.first, fixedBuffer.second, bufferIndex, fixedBufferSize); - continue; + if (VulkanRenderer* vkRenderer = VulkanRenderer::GetInstance()) + { + auto fixedBuffer = vkRenderer->buffer_genStrideWorkaroundVertexBuffer(bufferAddress, fixedBufferSize, bufferStride); + vkRenderer->buffer_bindVertexStrideWorkaroundBuffer(fixedBuffer.first, fixedBuffer.second, bufferIndex, fixedBufferSize); + continue; + } } } #endif @@ -222,4 +225,4 @@ bool LatteBufferCache_Sync(uint32 minIndex, uint32 maxIndex, uint32 baseInstance if (pixelShader) LatteBufferCache_syncGPUUniformBuffers(pixelShader, mmSQ_PS_UNIFORM_BLOCK_START, LatteConst::ShaderType::Pixel); return true; -} \ No newline at end of file +} diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp index 0975a4d0..84cba909 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp @@ -10,6 +10,7 @@ #include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInstructions.h" #include "Cafe/HW/Latte/Core/FetchShader.h" #include "Cafe/HW/Latte/Renderer/Renderer.h" +#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h" #include "config/ActiveSettings.h" #include "util/helpers/StringBuf.h" @@ -2335,14 +2336,14 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex if (texDim == Latte::E_DIM::DIM_2D_ARRAY) { // 3 coords + compare value - src->add("float3("); + src->add("float2("); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(", "); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT); - src->add(", "); + src->add("), "); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 2, LATTE_DECOMPILER_DTYPE_FLOAT); - src->addFmt("), {}", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[3], -1, -1, -1, tempBuffer0)); + src->addFmt(", {}", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[3], -1, -1, -1, tempBuffer0)); } else if (texDim == Latte::E_DIM::DIM_CUBEMAP) { @@ -3181,9 +3182,11 @@ static void _emitExportCode(LatteDecompilerShaderContext* shaderContext, LatteDe src->add(") == false) discard_fragment();" _CRLF); } // pixel color output - src->addFmt("out.passPixelColor{} = ", pixelColorOutputIndex); + src->addFmt("#ifdef {}" _CRLF, GetColorAttachmentTypeStr(pixelColorOutputIndex)); + src->addFmt("out.passPixelColor{} = as_type<{}>(", pixelColorOutputIndex, GetColorAttachmentTypeStr(pixelColorOutputIndex)); _emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, i); - src->add(";" _CRLF); + src->add(");" _CRLF); + src->add("#endif" _CRLF); if( cfInstruction->exportArrayBase+i >= 8 ) cemu_assert_unimplemented(); diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp index 92a5fb13..5384f0d4 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp @@ -222,6 +222,8 @@ namespace LatteDecompiler { auto* src = shaderContext->shaderSource; + src->add("#define GET_FRAGCOORD() vec4(in.position.xy * supportBuffer.fragCoordScale.xy, in.position.z, 1.0 / in.position.w)" _CRLF); + src->add("struct FragmentIn {" _CRLF); LatteShaderPSInputTable* psInputTable = LatteSHRC_GetPSInputTable(); @@ -263,7 +265,9 @@ namespace LatteDecompiler { if ((decompilerContext->shader->pixelColorOutputMask & (1 << i)) != 0) { - src->addFmt("float4 passPixelColor{} [[color({})]];" _CRLF, i, i); + src->addFmt("#ifdef {}" _CRLF, GetColorAttachmentTypeStr(i)); + src->addFmt("{} passPixelColor{} [[color({})]];" _CRLF, GetColorAttachmentTypeStr(i), i, i); + src->add("#endif" _CRLF); } } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.cpp index b9d00119..edf720fb 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.cpp @@ -50,17 +50,17 @@ MTL::Texture* LatteTextureViewMtl::GetSwizzledView(uint32 gpuSamplerSwizzle) } // Fallback cache - auto it = m_fallbackViewCache.find(gpuSamplerSwizzle); - if (it != m_fallbackViewCache.end()) + auto& fallbackEntry = m_fallbackViewCache[gpuSamplerSwizzle]; + if (fallbackEntry) { - return it->second; + return fallbackEntry; } MTL::Texture* texture = CreateSwizzledView(gpuSamplerSwizzle); if (freeIndex != -1) m_viewCache[freeIndex] = {gpuSamplerSwizzle, texture}; else - it->second = texture; + fallbackEntry = texture; return texture; } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.cpp index 226a5c6d..44cb1f3b 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.cpp @@ -4,85 +4,85 @@ #include "Metal/MTLRenderCommandEncoder.hpp" #include "Metal/MTLSampler.hpp" -std::map MTL_COLOR_FORMAT_TABLE = { - {Latte::E_GX2SURFFMT::R4_G4_UNORM, {MTL::PixelFormatRG8Unorm, true, 2}}, // TODO: correct? - {Latte::E_GX2SURFFMT::R5_G6_B5_UNORM, {MTL::PixelFormatB5G6R5Unorm, true, 2}}, // TODO: correct? - {Latte::E_GX2SURFFMT::R5_G5_B5_A1_UNORM, {MTL::PixelFormatBGR5A1Unorm, true, 2}}, // TODO: correct? - {Latte::E_GX2SURFFMT::R4_G4_B4_A4_UNORM, {MTL::PixelFormatABGR4Unorm, true, 2}}, // TODO: correct? - {Latte::E_GX2SURFFMT::A1_B5_G5_R5_UNORM, {MTL::PixelFormatA1BGR5Unorm, true, 2}}, - {Latte::E_GX2SURFFMT::R8_UNORM, {MTL::PixelFormatR8Unorm, true, 1}}, - {Latte::E_GX2SURFFMT::R8_SNORM, {MTL::PixelFormatR8Snorm, true, 1}}, - {Latte::E_GX2SURFFMT::R8_UINT, {MTL::PixelFormatR8Uint, false, 1}}, - {Latte::E_GX2SURFFMT::R8_SINT, {MTL::PixelFormatR8Sint, false, 1}}, - {Latte::E_GX2SURFFMT::R8_G8_UNORM, {MTL::PixelFormatRG8Unorm, true, 2}}, - {Latte::E_GX2SURFFMT::R8_G8_SNORM, {MTL::PixelFormatRG8Snorm, true, 2}}, - {Latte::E_GX2SURFFMT::R8_G8_UINT, {MTL::PixelFormatRG8Uint, false, 2}}, - {Latte::E_GX2SURFFMT::R8_G8_SINT, {MTL::PixelFormatRG8Sint, false, 2}}, - {Latte::E_GX2SURFFMT::R8_G8_B8_A8_UNORM, {MTL::PixelFormatRGBA8Unorm, true, 4}}, - {Latte::E_GX2SURFFMT::R8_G8_B8_A8_SNORM, {MTL::PixelFormatRGBA8Snorm, true, 4}}, - {Latte::E_GX2SURFFMT::R8_G8_B8_A8_UINT, {MTL::PixelFormatRGBA8Uint, false, 4}}, - {Latte::E_GX2SURFFMT::R8_G8_B8_A8_SINT, {MTL::PixelFormatRGBA8Sint, false, 4}}, - {Latte::E_GX2SURFFMT::R8_G8_B8_A8_SRGB, {MTL::PixelFormatRGBA8Unorm_sRGB, true, 4}}, - {Latte::E_GX2SURFFMT::R10_G10_B10_A2_UNORM, {MTL::PixelFormatRGB10A2Unorm, true, 4}}, - {Latte::E_GX2SURFFMT::R10_G10_B10_A2_SNORM, {MTL::PixelFormatRGBA16Snorm, true, 8}}, // TODO: correct? - {Latte::E_GX2SURFFMT::R10_G10_B10_A2_UINT, {MTL::PixelFormatRGB10A2Uint, false, 4}}, - {Latte::E_GX2SURFFMT::R10_G10_B10_A2_SINT, {MTL::PixelFormatRGBA16Sint, false, 8}}, // TODO: correct? - {Latte::E_GX2SURFFMT::R10_G10_B10_A2_SRGB, {MTL::PixelFormatRGBA8Unorm_sRGB, true, 4}}, // TODO: correct? - {Latte::E_GX2SURFFMT::A2_B10_G10_R10_UNORM, {MTL::PixelFormatBGR10A2Unorm, true, 4}}, // TODO: correct? - {Latte::E_GX2SURFFMT::A2_B10_G10_R10_UINT, {MTL::PixelFormatRGB10A2Uint, false, 4}}, // TODO: correct? - {Latte::E_GX2SURFFMT::R16_UNORM, {MTL::PixelFormatR16Unorm, true, 2}}, - {Latte::E_GX2SURFFMT::R16_SNORM, {MTL::PixelFormatR16Snorm, true, 2}}, - {Latte::E_GX2SURFFMT::R16_UINT, {MTL::PixelFormatR16Uint, false, 2}}, - {Latte::E_GX2SURFFMT::R16_SINT, {MTL::PixelFormatR16Sint, false, 2}}, - {Latte::E_GX2SURFFMT::R16_FLOAT, {MTL::PixelFormatR16Float, true, 2}}, - {Latte::E_GX2SURFFMT::R16_G16_UNORM, {MTL::PixelFormatRG16Unorm, true, 4}}, - {Latte::E_GX2SURFFMT::R16_G16_SNORM, {MTL::PixelFormatRG16Snorm, true, 4}}, - {Latte::E_GX2SURFFMT::R16_G16_UINT, {MTL::PixelFormatRG16Uint, false, 4}}, - {Latte::E_GX2SURFFMT::R16_G16_SINT, {MTL::PixelFormatRG16Sint, false, 4}}, - {Latte::E_GX2SURFFMT::R16_G16_FLOAT, {MTL::PixelFormatRG16Float, true, 4}}, - {Latte::E_GX2SURFFMT::R16_G16_B16_A16_UNORM, {MTL::PixelFormatRGBA16Unorm, true, 8}}, - {Latte::E_GX2SURFFMT::R16_G16_B16_A16_SNORM, {MTL::PixelFormatRGBA16Snorm, true, 8}}, - {Latte::E_GX2SURFFMT::R16_G16_B16_A16_UINT, {MTL::PixelFormatRGBA16Uint, false, 8}}, - {Latte::E_GX2SURFFMT::R16_G16_B16_A16_SINT, {MTL::PixelFormatRGBA16Sint, false, 8}}, - {Latte::E_GX2SURFFMT::R16_G16_B16_A16_FLOAT, {MTL::PixelFormatRGBA16Float, true, 8}}, - {Latte::E_GX2SURFFMT::R24_X8_UNORM, {MTL::PixelFormatInvalid, false, 0}}, // TODO - {Latte::E_GX2SURFFMT::R24_X8_FLOAT, {MTL::PixelFormatInvalid, false, 0}}, // TODO - {Latte::E_GX2SURFFMT::X24_G8_UINT, {MTL::PixelFormatInvalid, false, 0}}, // TODO - {Latte::E_GX2SURFFMT::R32_X8_FLOAT, {MTL::PixelFormatInvalid, false, 0}}, // TODO - {Latte::E_GX2SURFFMT::X32_G8_UINT_X24, {MTL::PixelFormatInvalid, false, 0}}, // TODO - {Latte::E_GX2SURFFMT::R11_G11_B10_FLOAT, {MTL::PixelFormatRG11B10Float, true, 4}}, - {Latte::E_GX2SURFFMT::R32_UINT, {MTL::PixelFormatR32Uint, false, 4}}, - {Latte::E_GX2SURFFMT::R32_SINT, {MTL::PixelFormatR32Sint, false, 4}}, - {Latte::E_GX2SURFFMT::R32_FLOAT, {MTL::PixelFormatR32Float, true, 4}}, - {Latte::E_GX2SURFFMT::R32_G32_UINT, {MTL::PixelFormatRG32Uint, false, 8}}, - {Latte::E_GX2SURFFMT::R32_G32_SINT, {MTL::PixelFormatRG32Sint, false, 8}}, - {Latte::E_GX2SURFFMT::R32_G32_FLOAT, {MTL::PixelFormatRG32Float, true, 8}}, - {Latte::E_GX2SURFFMT::R32_G32_B32_A32_UINT, {MTL::PixelFormatRGBA32Uint, false, 16}}, - {Latte::E_GX2SURFFMT::R32_G32_B32_A32_SINT, {MTL::PixelFormatRGBA32Sint, false, 16}}, - {Latte::E_GX2SURFFMT::R32_G32_B32_A32_FLOAT, {MTL::PixelFormatRGBA32Float, true, 16}}, - {Latte::E_GX2SURFFMT::BC1_UNORM, {MTL::PixelFormatBC1_RGBA, true, 8, {4, 4}}}, // TODO: correct? - {Latte::E_GX2SURFFMT::BC1_SRGB, {MTL::PixelFormatBC1_RGBA_sRGB, true, 8, {4, 4}}}, // TODO: correct? - {Latte::E_GX2SURFFMT::BC2_UNORM, {MTL::PixelFormatBC2_RGBA, true, 16, {4, 4}}}, // TODO: correct? - {Latte::E_GX2SURFFMT::BC2_SRGB, {MTL::PixelFormatBC2_RGBA_sRGB, true, 16, {4, 4}}}, // TODO: correct? - {Latte::E_GX2SURFFMT::BC3_UNORM, {MTL::PixelFormatBC3_RGBA, true, 16, {4, 4}}}, // TODO: correct? - {Latte::E_GX2SURFFMT::BC3_SRGB, {MTL::PixelFormatBC3_RGBA_sRGB, true, 16, {4, 4}}}, // TODO: correct? - {Latte::E_GX2SURFFMT::BC4_UNORM, {MTL::PixelFormatBC4_RUnorm, true, 8, {4, 4}}}, // TODO: correct? - {Latte::E_GX2SURFFMT::BC4_SNORM, {MTL::PixelFormatBC4_RSnorm, true, 8, {4, 4}}}, // TODO: correct? - {Latte::E_GX2SURFFMT::BC5_UNORM, {MTL::PixelFormatBC5_RGUnorm, true, 16, {4, 4}}}, // TODO: correct? - {Latte::E_GX2SURFFMT::BC5_SNORM, {MTL::PixelFormatBC5_RGSnorm, true, 16, {4, 4}}}, // TODO: correct? +std::map MTL_COLOR_FORMAT_TABLE = { + {Latte::E_GX2SURFFMT::R4_G4_UNORM, {MTL::PixelFormatRG8Unorm, MetalDataType::FLOAT, 2}}, // TODO: correct? + {Latte::E_GX2SURFFMT::R5_G6_B5_UNORM, {MTL::PixelFormatB5G6R5Unorm, MetalDataType::FLOAT, 2}}, // TODO: correct? + {Latte::E_GX2SURFFMT::R5_G5_B5_A1_UNORM, {MTL::PixelFormatBGR5A1Unorm, MetalDataType::FLOAT, 2}}, // TODO: correct? + {Latte::E_GX2SURFFMT::R4_G4_B4_A4_UNORM, {MTL::PixelFormatABGR4Unorm, MetalDataType::FLOAT, 2}}, // TODO: correct? + {Latte::E_GX2SURFFMT::A1_B5_G5_R5_UNORM, {MTL::PixelFormatA1BGR5Unorm, MetalDataType::FLOAT, 2}}, + {Latte::E_GX2SURFFMT::R8_UNORM, {MTL::PixelFormatR8Unorm, MetalDataType::FLOAT, 1}}, + {Latte::E_GX2SURFFMT::R8_SNORM, {MTL::PixelFormatR8Snorm, MetalDataType::FLOAT, 1}}, + {Latte::E_GX2SURFFMT::R8_UINT, {MTL::PixelFormatR8Uint, MetalDataType::UINT, 1}}, + {Latte::E_GX2SURFFMT::R8_SINT, {MTL::PixelFormatR8Sint, MetalDataType::INT, 1}}, + {Latte::E_GX2SURFFMT::R8_G8_UNORM, {MTL::PixelFormatRG8Unorm, MetalDataType::FLOAT, 2}}, + {Latte::E_GX2SURFFMT::R8_G8_SNORM, {MTL::PixelFormatRG8Snorm, MetalDataType::FLOAT, 2}}, + {Latte::E_GX2SURFFMT::R8_G8_UINT, {MTL::PixelFormatRG8Uint, MetalDataType::UINT, 2}}, + {Latte::E_GX2SURFFMT::R8_G8_SINT, {MTL::PixelFormatRG8Sint, MetalDataType::INT, 2}}, + {Latte::E_GX2SURFFMT::R8_G8_B8_A8_UNORM, {MTL::PixelFormatRGBA8Unorm, MetalDataType::FLOAT, 4}}, + {Latte::E_GX2SURFFMT::R8_G8_B8_A8_SNORM, {MTL::PixelFormatRGBA8Snorm, MetalDataType::FLOAT, 4}}, + {Latte::E_GX2SURFFMT::R8_G8_B8_A8_UINT, {MTL::PixelFormatRGBA8Uint, MetalDataType::UINT, 4}}, + {Latte::E_GX2SURFFMT::R8_G8_B8_A8_SINT, {MTL::PixelFormatRGBA8Sint, MetalDataType::INT, 4}}, + {Latte::E_GX2SURFFMT::R8_G8_B8_A8_SRGB, {MTL::PixelFormatRGBA8Unorm_sRGB, MetalDataType::FLOAT, 4}}, + {Latte::E_GX2SURFFMT::R10_G10_B10_A2_UNORM, {MTL::PixelFormatRGB10A2Unorm, MetalDataType::FLOAT, 4}}, + {Latte::E_GX2SURFFMT::R10_G10_B10_A2_SNORM, {MTL::PixelFormatRGBA16Snorm, MetalDataType::FLOAT, 8}}, // TODO: correct? + {Latte::E_GX2SURFFMT::R10_G10_B10_A2_UINT, {MTL::PixelFormatRGB10A2Uint, MetalDataType::UINT, 4}}, + {Latte::E_GX2SURFFMT::R10_G10_B10_A2_SINT, {MTL::PixelFormatRGBA16Sint, MetalDataType::INT, 8}}, // TODO: correct? + {Latte::E_GX2SURFFMT::R10_G10_B10_A2_SRGB, {MTL::PixelFormatRGBA8Unorm_sRGB, MetalDataType::FLOAT, 4}}, // TODO: correct? + {Latte::E_GX2SURFFMT::A2_B10_G10_R10_UNORM, {MTL::PixelFormatBGR10A2Unorm, MetalDataType::FLOAT, 4}}, // TODO: correct? + {Latte::E_GX2SURFFMT::A2_B10_G10_R10_UINT, {MTL::PixelFormatRGB10A2Uint, MetalDataType::UINT, 4}}, // TODO: correct? + {Latte::E_GX2SURFFMT::R16_UNORM, {MTL::PixelFormatR16Unorm, MetalDataType::FLOAT, 2}}, + {Latte::E_GX2SURFFMT::R16_SNORM, {MTL::PixelFormatR16Snorm, MetalDataType::FLOAT, 2}}, + {Latte::E_GX2SURFFMT::R16_UINT, {MTL::PixelFormatR16Uint, MetalDataType::UINT, 2}}, + {Latte::E_GX2SURFFMT::R16_SINT, {MTL::PixelFormatR16Sint, MetalDataType::INT, 2}}, + {Latte::E_GX2SURFFMT::R16_FLOAT, {MTL::PixelFormatR16Float, MetalDataType::FLOAT, 2}}, + {Latte::E_GX2SURFFMT::R16_G16_UNORM, {MTL::PixelFormatRG16Unorm, MetalDataType::FLOAT, 4}}, + {Latte::E_GX2SURFFMT::R16_G16_SNORM, {MTL::PixelFormatRG16Snorm, MetalDataType::FLOAT, 4}}, + {Latte::E_GX2SURFFMT::R16_G16_UINT, {MTL::PixelFormatRG16Uint, MetalDataType::UINT, 4}}, + {Latte::E_GX2SURFFMT::R16_G16_SINT, {MTL::PixelFormatRG16Sint, MetalDataType::INT, 4}}, + {Latte::E_GX2SURFFMT::R16_G16_FLOAT, {MTL::PixelFormatRG16Float, MetalDataType::FLOAT, 4}}, + {Latte::E_GX2SURFFMT::R16_G16_B16_A16_UNORM, {MTL::PixelFormatRGBA16Unorm, MetalDataType::FLOAT, 8}}, + {Latte::E_GX2SURFFMT::R16_G16_B16_A16_SNORM, {MTL::PixelFormatRGBA16Snorm, MetalDataType::FLOAT, 8}}, + {Latte::E_GX2SURFFMT::R16_G16_B16_A16_UINT, {MTL::PixelFormatRGBA16Uint, MetalDataType::UINT, 8}}, + {Latte::E_GX2SURFFMT::R16_G16_B16_A16_SINT, {MTL::PixelFormatRGBA16Sint, MetalDataType::INT, 8}}, + {Latte::E_GX2SURFFMT::R16_G16_B16_A16_FLOAT, {MTL::PixelFormatRGBA16Float, MetalDataType::FLOAT, 8}}, + {Latte::E_GX2SURFFMT::R24_X8_UNORM, {MTL::PixelFormatInvalid, MetalDataType::NONE, 0}}, // TODO + {Latte::E_GX2SURFFMT::R24_X8_FLOAT, {MTL::PixelFormatInvalid, MetalDataType::NONE, 0}}, // TODO + {Latte::E_GX2SURFFMT::X24_G8_UINT, {MTL::PixelFormatInvalid, MetalDataType::NONE, 0}}, // TODO + {Latte::E_GX2SURFFMT::R32_X8_FLOAT, {MTL::PixelFormatInvalid, MetalDataType::NONE, 0}}, // TODO + {Latte::E_GX2SURFFMT::X32_G8_UINT_X24, {MTL::PixelFormatInvalid, MetalDataType::NONE, 0}}, // TODO + {Latte::E_GX2SURFFMT::R11_G11_B10_FLOAT, {MTL::PixelFormatRG11B10Float, MetalDataType::FLOAT, 4}}, + {Latte::E_GX2SURFFMT::R32_UINT, {MTL::PixelFormatR32Uint, MetalDataType::UINT, 4}}, + {Latte::E_GX2SURFFMT::R32_SINT, {MTL::PixelFormatR32Sint, MetalDataType::INT, 4}}, + {Latte::E_GX2SURFFMT::R32_FLOAT, {MTL::PixelFormatR32Float, MetalDataType::FLOAT, 4}}, + {Latte::E_GX2SURFFMT::R32_G32_UINT, {MTL::PixelFormatRG32Uint, MetalDataType::UINT, 8}}, + {Latte::E_GX2SURFFMT::R32_G32_SINT, {MTL::PixelFormatRG32Sint, MetalDataType::INT, 8}}, + {Latte::E_GX2SURFFMT::R32_G32_FLOAT, {MTL::PixelFormatRG32Float, MetalDataType::FLOAT, 8}}, + {Latte::E_GX2SURFFMT::R32_G32_B32_A32_UINT, {MTL::PixelFormatRGBA32Uint, MetalDataType::UINT, 16}}, + {Latte::E_GX2SURFFMT::R32_G32_B32_A32_SINT, {MTL::PixelFormatRGBA32Sint, MetalDataType::INT, 16}}, + {Latte::E_GX2SURFFMT::R32_G32_B32_A32_FLOAT, {MTL::PixelFormatRGBA32Float, MetalDataType::FLOAT, 16}}, + {Latte::E_GX2SURFFMT::BC1_UNORM, {MTL::PixelFormatBC1_RGBA, MetalDataType::FLOAT, 8, {4, 4}}}, // TODO: correct? + {Latte::E_GX2SURFFMT::BC1_SRGB, {MTL::PixelFormatBC1_RGBA_sRGB, MetalDataType::FLOAT, 8, {4, 4}}}, // TODO: correct? + {Latte::E_GX2SURFFMT::BC2_UNORM, {MTL::PixelFormatBC2_RGBA, MetalDataType::FLOAT, 16, {4, 4}}}, // TODO: correct? + {Latte::E_GX2SURFFMT::BC2_SRGB, {MTL::PixelFormatBC2_RGBA_sRGB, MetalDataType::FLOAT, 16, {4, 4}}}, // TODO: correct? + {Latte::E_GX2SURFFMT::BC3_UNORM, {MTL::PixelFormatBC3_RGBA, MetalDataType::FLOAT, 16, {4, 4}}}, // TODO: correct? + {Latte::E_GX2SURFFMT::BC3_SRGB, {MTL::PixelFormatBC3_RGBA_sRGB, MetalDataType::FLOAT, 16, {4, 4}}}, // TODO: correct? + {Latte::E_GX2SURFFMT::BC4_UNORM, {MTL::PixelFormatBC4_RUnorm, MetalDataType::FLOAT, 8, {4, 4}}}, // TODO: correct? + {Latte::E_GX2SURFFMT::BC4_SNORM, {MTL::PixelFormatBC4_RSnorm, MetalDataType::FLOAT, 8, {4, 4}}}, // TODO: correct? + {Latte::E_GX2SURFFMT::BC5_UNORM, {MTL::PixelFormatBC5_RGUnorm, MetalDataType::FLOAT, 16, {4, 4}}}, // TODO: correct? + {Latte::E_GX2SURFFMT::BC5_SNORM, {MTL::PixelFormatBC5_RGSnorm, MetalDataType::FLOAT, 16, {4, 4}}}, // TODO: correct? }; -std::map MTL_DEPTH_FORMAT_TABLE = { - {Latte::E_GX2SURFFMT::D24_S8_UNORM, {MTL::PixelFormatDepth24Unorm_Stencil8, false, 4}}, // TODO: not supported on Apple sillicon, maybe find something else - {Latte::E_GX2SURFFMT::D24_S8_FLOAT, {MTL::PixelFormatDepth32Float_Stencil8, false, 4}}, // TODO: correct? - {Latte::E_GX2SURFFMT::D32_S8_FLOAT, {MTL::PixelFormatDepth32Float_Stencil8, false, 5}}, - {Latte::E_GX2SURFFMT::D16_UNORM, {MTL::PixelFormatDepth16Unorm, false, 2}}, - {Latte::E_GX2SURFFMT::D32_FLOAT, {MTL::PixelFormatDepth32Float, false, 4}}, +std::map MTL_DEPTH_FORMAT_TABLE = { + {Latte::E_GX2SURFFMT::D24_S8_UNORM, {MTL::PixelFormatDepth24Unorm_Stencil8, MetalDataType::NONE, 4}}, // TODO: not supported on Apple sillicon, maybe find something else + {Latte::E_GX2SURFFMT::D24_S8_FLOAT, {MTL::PixelFormatDepth32Float_Stencil8, MetalDataType::NONE, 4}}, // TODO: correct? + {Latte::E_GX2SURFFMT::D32_S8_FLOAT, {MTL::PixelFormatDepth32Float_Stencil8, MetalDataType::NONE, 5}}, + {Latte::E_GX2SURFFMT::D16_UNORM, {MTL::PixelFormatDepth16Unorm, MetalDataType::NONE, 2}}, + {Latte::E_GX2SURFFMT::D32_FLOAT, {MTL::PixelFormatDepth32Float, MetalDataType::NONE, 4}}, }; -const MtlPixelFormatInfo GetMtlPixelFormatInfo(Latte::E_GX2SURFFMT format, bool isDepth) +const MetalPixelFormatInfo GetMtlPixelFormatInfo(Latte::E_GX2SURFFMT format, bool isDepth) { - MtlPixelFormatInfo formatInfo; + MetalPixelFormatInfo formatInfo; if (isDepth) formatInfo = MTL_DEPTH_FORMAT_TABLE[format]; else @@ -266,7 +266,6 @@ MTL::PrimitiveType GetMtlPrimitiveType(LattePrimitiveMode primitiveMode) case Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::TRIANGLES: return MTL::PrimitiveTypeTriangle; case Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::TRIANGLE_FAN: - debug_printf("Metal doesn't support triangle fan primitive, using triangle strip instead\n"); return MTL::PrimitiveTypeTriangleStrip; case Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::TRIANGLE_STRIP: return MTL::PrimitiveTypeTriangleStrip; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h b/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h index 91ae1c2f..6b3ae2af 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h @@ -15,14 +15,22 @@ struct Uvec2 { uint32 y; }; -struct MtlPixelFormatInfo { +enum class MetalDataType +{ + NONE, + INT, + UINT, + FLOAT, +}; + +struct MetalPixelFormatInfo { MTL::PixelFormat pixelFormat; - bool blendable; + MetalDataType dataType; size_t bytesPerBlock; Uvec2 blockTexelSize = {1, 1}; }; -const MtlPixelFormatInfo GetMtlPixelFormatInfo(Latte::E_GX2SURFFMT format, bool isDepth); +const MetalPixelFormatInfo GetMtlPixelFormatInfo(Latte::E_GX2SURFFMT format, bool isDepth); size_t GetMtlTextureBytesPerRow(Latte::E_GX2SURFFMT format, bool isDepth, uint32 width); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h index c7011ab8..2a2713e5 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h @@ -1,6 +1,11 @@ #pragma once -inline size_t align(size_t size, size_t alignment) +inline size_t Align(size_t size, size_t alignment) { return (size + alignment - 1) & ~(alignment - 1); } + +inline std::string GetColorAttachmentTypeStr(uint32 index) +{ + return "COLOR_ATTACHMENT" + std::to_string(index) + "_TYPE"; +} diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp index bba887b9..c19d8596 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp @@ -16,7 +16,7 @@ MetalBufferAllocator::~MetalBufferAllocator() MetalBufferAllocation MetalBufferAllocator::GetBufferAllocation(size_t size, size_t alignment) { // Align the size - size = align(size, alignment); + size = Align(size, alignment); // First, try to find a free range for (uint32 i = 0; i < m_freeBufferRanges.size(); i++) @@ -94,7 +94,7 @@ MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(MTL::Bu if (restrideInfo.memoryInvalidated || stride != restrideInfo.lastStride) { // TODO: use compute/void vertex function instead - size_t newStride = align(stride, 4); + size_t newStride = Align(stride, 4); size_t newSize = vertexBufferRange->size / stride * newStride; restrideInfo.buffer = m_mtlr->GetDevice()->newBuffer(newSize, MTL::StorageModeShared); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp index ef59c111..b76cd8f8 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp @@ -61,7 +61,7 @@ MTL::RenderPipelineState* MetalPipelineCache::GetPipelineState(const LatteFetchS uint32 bufferIndex = bufferGroup.attributeBufferIndex; uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7; uint32 bufferStride = (LatteGPUState.contextNew.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF; - bufferStride = align(bufferStride, 4); + bufferStride = Align(bufferStride, 4); // HACK if (bufferStride == 0) @@ -83,11 +83,15 @@ MTL::RenderPipelineState* MetalPipelineCache::GetPipelineState(const LatteFetchS } } + auto mtlVertexShader = static_cast(vertexShader->shader); + auto mtlPixelShader = static_cast(pixelShader->shader); + mtlPixelShader->CompileFragmentFunction(activeFBO); + // Render pipeline state MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init(); - desc->setVertexFunction(static_cast(vertexShader->shader)->GetFunction()); - desc->setFragmentFunction(static_cast(pixelShader->shader)->GetFunction()); - // TODO: don't always set the vertex descriptor + desc->setVertexFunction(mtlVertexShader->GetFunction()); + desc->setFragmentFunction(mtlPixelShader->GetFunction()); + // TODO: don't always set the vertex descriptor? desc->setVertexDescriptor(vertexDescriptor); for (uint8 i = 0; i < 8; i++) { @@ -106,7 +110,8 @@ MTL::RenderPipelineState* MetalPipelineCache::GetPipelineState(const LatteFetchS uint32 renderTargetMask = LatteGPUState.contextNew.CB_TARGET_MASK.get_MASK(); bool blendEnabled = ((blendEnableMask & (1 << i))) != 0; - if (blendEnabled && GetMtlPixelFormatInfo(texture->format, false).blendable) + // Only float data type is blendable + if (blendEnabled && GetMtlPixelFormatInfo(texture->format, false).dataType == MetalDataType::FLOAT) { colorAttachment->setBlendingEnabled(true); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp index dcceb18a..18334052 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp @@ -1,30 +1,23 @@ #include "Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" +#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h" +#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h" + #include "Cemu/Logging/CemuLogging.h" -#include "Metal/MTLFunctionDescriptor.hpp" +#include "Common/precompiled.h" RendererShaderMtl::RendererShaderMtl(MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode) - : RendererShader(type, baseHash, auxHash, isGameShader, isGfxPackShader) + : RendererShader(type, baseHash, auxHash, isGameShader, isGfxPackShader), m_mtlr{mtlRenderer} { - NS::Error* error = nullptr; - MTL::Library* library = mtlRenderer->GetDevice()->newLibrary(NS::String::string(mslCode.c_str(), NS::ASCIIStringEncoding), nullptr, &error); - if (error) + // Fragment functions are compiled just-in-time + if (m_type == ShaderType::kFragment) { - printf("failed to create library (error: %s) -> source:\n%s\n", error->localizedDescription()->utf8String(), mslCode.c_str()); - error->release(); - return; + m_mslCode = mslCode; + } + else + { + Compile(mslCode); } - //MTL::FunctionDescriptor* desc = MTL::FunctionDescriptor::alloc()->init(); - //desc->setName(NS::String::string("main0", NS::ASCIIStringEncoding)); - //error = nullptr; - m_function = library->newFunction(NS::String::string("main0", NS::ASCIIStringEncoding)); - library->release(); - //if (error) - //{ - // printf("failed to create function (error: %s)\n", error->localizedDescription()->utf8String()); - // error->release(); - // return; - //} } RendererShaderMtl::~RendererShaderMtl() @@ -33,6 +26,47 @@ RendererShaderMtl::~RendererShaderMtl() m_function->release(); } +void RendererShaderMtl::CompileFragmentFunction(CachedFBOMtl* activeFBO) +{ + cemu_assert_debug(m_type == ShaderType::kFragment); + + if (m_function) + m_function->release(); + + std::string fullCode; + + // Define color attachment data types + for (uint8 i = 0; i < 8; i++) + { + const auto& colorBuffer = activeFBO->colorBuffer[i]; + if (!colorBuffer.texture) + { + continue; + } + auto dataType = GetMtlPixelFormatInfo(colorBuffer.texture->format, false).dataType; + fullCode += "#define " + GetColorAttachmentTypeStr(i) + " "; + switch (dataType) + { + case MetalDataType::INT: + fullCode += "int4"; + break; + case MetalDataType::UINT: + fullCode += "uint4"; + break; + case MetalDataType::FLOAT: + fullCode += "float4"; + break; + default: + cemu_assert_suspicious(); + break; + } + fullCode += "\n"; + } + + fullCode += m_mslCode; + Compile(fullCode); +} + void RendererShaderMtl::ShaderCacheLoading_begin(uint64 cacheTitleId) { cemuLog_log(LogType::MetalLogging, "RendererShaderMtl::ShaderCacheLoading_begin not implemented!"); @@ -47,3 +81,17 @@ void RendererShaderMtl::ShaderCacheLoading_Close() { cemuLog_log(LogType::MetalLogging, "RendererShaderMtl::ShaderCacheLoading_Close not implemented!"); } + +void RendererShaderMtl::Compile(const std::string& mslCode) +{ + NS::Error* error = nullptr; + MTL::Library* library = m_mtlr->GetDevice()->newLibrary(NS::String::string(mslCode.c_str(), NS::ASCIIStringEncoding), nullptr, &error); + if (error) + { + printf("failed to create library (error: %s) -> source:\n%s\n", error->localizedDescription()->utf8String(), mslCode.c_str()); + error->release(); + return; + } + m_function = library->newFunction(NS::String::string("main0", NS::ASCIIStringEncoding)); + library->release(); +} diff --git a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h index e440d4dc..f70db1bd 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h @@ -1,6 +1,7 @@ #pragma once #include "Cafe/HW/Latte/Renderer/RendererShader.h" +#include "HW/Latte/Renderer/Metal/CachedFBOMtl.h" #include "HW/Latte/Renderer/Metal/MetalRenderer.h" #include "util/helpers/ConcurrentQueue.h" @@ -24,6 +25,8 @@ public: RendererShaderMtl(class MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode); virtual ~RendererShaderMtl(); + void CompileFragmentFunction(CachedFBOMtl* activeFBO); + MTL::Function* GetFunction() const { return m_function; @@ -51,5 +54,11 @@ public: bool WaitForCompiled() override { return true; } private: + class MetalRenderer* m_mtlr; + MTL::Function* m_function = nullptr; + + std::string m_mslCode; + + void Compile(const std::string& mslCode); };