From d7e9aff230f470deb169b56d1de57356f7f8f3c4 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Wed, 7 Aug 2024 11:44:16 +0200 Subject: [PATCH] implement triangle fan reindexing --- src/Cafe/HW/Latte/Core/LatteIndices.cpp | 71 ++++++++- src/Cafe/HW/Latte/Core/LatteTexture.cpp | 2 - .../LatteDecompilerEmitMSL.cpp | 4 +- .../HW/Latte/Renderer/Metal/LatteToMtl.cpp | 140 +++++++++--------- src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h | 1 + .../Renderer/Metal/MetalPipelineCache.cpp | 2 +- 6 files changed, 141 insertions(+), 79 deletions(-) diff --git a/src/Cafe/HW/Latte/Core/LatteIndices.cpp b/src/Cafe/HW/Latte/Core/LatteIndices.cpp index 6e1d7455..891dc3e1 100644 --- a/src/Cafe/HW/Latte/Core/LatteIndices.cpp +++ b/src/Cafe/HW/Latte/Core/LatteIndices.cpp @@ -7,7 +7,7 @@ #include #endif -struct +struct { const void* lastPtr; uint32 lastCount; @@ -284,6 +284,46 @@ void LatteIndices_generateAutoLineLoopIndices(void* indexDataOutput, uint32 coun indexMax = std::max(count, 1u) - 1; } +template +void LatteIndices_unpackTriangleFanAndConvert(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax) +{ + debug_printf("TRIANGLE FAN UNPACK\n"); + const betype* src = (betype*)indexDataInput; + T* dst = (T*)indexDataOutput; + // TODO: check this + for (sint32 i = 0; i < count; i++) + { + uint32 i0; + if (i % 2 == 0) + i0 = i / 2; + else + i0 = count - 1 - i / 2; + T idx = src[i0]; + indexMin = std::min(indexMin, (uint32)idx); + indexMax = std::max(indexMax, (uint32)idx); + dst[i] = idx; + } +} + +template +void LatteIndices_generateAutoTriangleFanIndices(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax) +{ + debug_printf("TRIANGLE FAN AUTO\n"); + const betype* src = (betype*)indexDataInput; + T* dst = (T*)indexDataOutput; + for (sint32 i = 0; i < count; i++) + { + T idx = i; + if (idx % 2 == 0) + idx = idx / 2; + else + idx = count - 1 - idx / 2; + dst[i] = idx; + } + indexMin = 0; + indexMax = std::max(count, 1u) - 1; +} + #if defined(ARCH_X86_64) ATTRIBUTE_AVX2 void LatteIndices_fastConvertU16_AVX2(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax) @@ -295,7 +335,7 @@ void LatteIndices_fastConvertU16_AVX2(const void* indexDataInput, void* indexDat sint32 countRemaining = count & 15; if (count16) { - __m256i mMin = _mm256_set_epi16((sint16)0xFFFF, (sint16)0xFFFF, (sint16)0xFFFF, (sint16)0xFFFF, (sint16)0xFFFF, (sint16)0xFFFF, (sint16)0xFFFF, (sint16)0xFFFF, + __m256i mMin = _mm256_set_epi16((sint16)0xFFFF, (sint16)0xFFFF, (sint16)0xFFFF, (sint16)0xFFFF, (sint16)0xFFFF, (sint16)0xFFFF, (sint16)0xFFFF, (sint16)0xFFFF, (sint16)0xFFFF, (sint16)0xFFFF, (sint16)0xFFFF, (sint16)0xFFFF, (sint16)0xFFFF, (sint16)0xFFFF, (sint16)0xFFFF, (sint16)0xFFFF); __m256i mMax = _mm256_set_epi16(0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000); __m256i mShuffle16Swap = _mm256_set_epi8(30, 31, 28, 29, 26, 27, 24, 25, 22, 23, 20, 21, 18, 19, 16, 17, 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1); @@ -659,6 +699,29 @@ void LatteIndices_decode(const void* indexData, LatteIndexType indexType, uint32 cemu_assert_debug(false); outputCount = count + 1; } + else if (primitiveMode == LattePrimitiveMode::TRIANGLE_FAN && g_renderer->GetType() == RendererAPI::Metal) + { + if (indexType == LatteIndexType::AUTO) + { + if (count <= 0xFFFF) + { + LatteIndices_generateAutoTriangleFanIndices(indexData, indexOutputPtr, count, indexMin, indexMax); + renderIndexType = Renderer::INDEX_TYPE::U16; + } + else + { + LatteIndices_generateAutoTriangleFanIndices(indexData, indexOutputPtr, count, indexMin, indexMax); + renderIndexType = Renderer::INDEX_TYPE::U32; + } + } + else if (indexType == LatteIndexType::U16_BE) + LatteIndices_unpackTriangleFanAndConvert(indexData, indexOutputPtr, count, indexMin, indexMax); + else if (indexType == LatteIndexType::U32_BE) + LatteIndices_unpackTriangleFanAndConvert(indexData, indexOutputPtr, count, indexMin, indexMax); + else + cemu_assert_debug(false); + outputCount = count; + } else { if (indexType == LatteIndexType::U16_BE) @@ -671,7 +734,7 @@ void LatteIndices_decode(const void* indexData, LatteIndexType indexType, uint32 else LatteIndices_convertBE(indexData, indexOutputPtr, count, indexMin, indexMax); #else - LatteIndices_convertBE(indexData, indexOutputPtr, count, indexMin, indexMax); + LatteIndices_convertBE(indexData, indexOutputPtr, count, indexMin, indexMax); #endif } else if (indexType == LatteIndexType::U32_BE) @@ -682,7 +745,7 @@ void LatteIndices_decode(const void* indexData, LatteIndexType indexType, uint32 else LatteIndices_convertBE(indexData, indexOutputPtr, count, indexMin, indexMax); #else - LatteIndices_convertBE(indexData, indexOutputPtr, count, indexMin, indexMax); + LatteIndices_convertBE(indexData, indexOutputPtr, count, indexMin, indexMax); #endif } else if (indexType == LatteIndexType::U16_LE) diff --git a/src/Cafe/HW/Latte/Core/LatteTexture.cpp b/src/Cafe/HW/Latte/Core/LatteTexture.cpp index 18e686ac..3c561000 100644 --- a/src/Cafe/HW/Latte/Core/LatteTexture.cpp +++ b/src/Cafe/HW/Latte/Core/LatteTexture.cpp @@ -352,7 +352,6 @@ void LatteTexture_CopySlice(LatteTexture* srcTexture, sint32 srcSlice, sint32 sr if (srcTexture->isDepth != dstTexture->isDepth) { g_renderer->surfaceCopy_copySurfaceWithFormatConversion(srcTexture, srcMip, srcSlice, dstTexture, dstMip, dstSlice, width, height); - throw std::runtime_error("1"); return; } // rescale copy size @@ -385,7 +384,6 @@ void LatteTexture_CopySlice(LatteTexture* srcTexture, sint32 srcSlice, sint32 sr cemuLog_log(LogType::Force, "Source: {:08x} origResolution {:4}/{:4} effectiveResolution {:4}/{:4} fmt {:04x} mipIndex {} ratioW/H: {:.4}/{:.4}", srcTexture->physAddress, srcTexture->width, srcTexture->height, effectiveWidth_src, effectiveHeight_src, (uint32)srcTexture->format, srcMip, ratioWidth_src, ratioHeight_src); cemuLog_log(LogType::Force, "Destination: {:08x} origResolution {:4}/{:4} effectiveResolution {:4}/{:4} fmt {:04x} mipIndex {} ratioW/H: {:.4}/{:.4}", dstTexture->physAddress, dstTexture->width, dstTexture->height, effectiveWidth_dst, effectiveHeight_dst, (uint32)dstTexture->format, dstMip, ratioWidth_dst, ratioHeight_dst); } - throw std::runtime_error("2"); //cemuLog_logDebug(LogType::Force, "If these textures are not meant to share data you can ignore this"); return; } diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp index e743f31c..0975a4d0 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp @@ -2389,7 +2389,7 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex else if(texDim == Latte::E_DIM::DIM_3D) { // 3 coords - src->add("float2("); + src->add("float3("); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(", "); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT); @@ -2434,7 +2434,7 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex } else { - // TODO: is this correct + // TODO: is this correct? src->add("level("); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 3, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(")"); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.cpp index 2da259f3..226a5c6d 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.cpp @@ -5,79 +5,79 @@ #include "Metal/MTLSampler.hpp" std::map MTL_COLOR_FORMAT_TABLE = { - {Latte::E_GX2SURFFMT::R4_G4_UNORM, {MTL::PixelFormatRG8Unorm, 2}}, // TODO: correct? - {Latte::E_GX2SURFFMT::R5_G6_B5_UNORM, {MTL::PixelFormatB5G6R5Unorm, 2}}, // TODO: correct? - {Latte::E_GX2SURFFMT::R5_G5_B5_A1_UNORM, {MTL::PixelFormatBGR5A1Unorm, 2}}, // TODO: correct? - {Latte::E_GX2SURFFMT::R4_G4_B4_A4_UNORM, {MTL::PixelFormatABGR4Unorm, 2}}, // TODO: correct? - {Latte::E_GX2SURFFMT::A1_B5_G5_R5_UNORM, {MTL::PixelFormatA1BGR5Unorm, 2}}, - {Latte::E_GX2SURFFMT::R8_UNORM, {MTL::PixelFormatR8Unorm, 1}}, - {Latte::E_GX2SURFFMT::R8_SNORM, {MTL::PixelFormatR8Snorm, 1}}, - {Latte::E_GX2SURFFMT::R8_UINT, {MTL::PixelFormatR8Uint, 1}}, - {Latte::E_GX2SURFFMT::R8_SINT, {MTL::PixelFormatR8Sint, 1}}, - {Latte::E_GX2SURFFMT::R8_G8_UNORM, {MTL::PixelFormatRG8Unorm, 2}}, - {Latte::E_GX2SURFFMT::R8_G8_SNORM, {MTL::PixelFormatRG8Snorm, 2}}, - {Latte::E_GX2SURFFMT::R8_G8_UINT, {MTL::PixelFormatRG8Uint, 2}}, - {Latte::E_GX2SURFFMT::R8_G8_SINT, {MTL::PixelFormatRG8Sint, 2}}, - {Latte::E_GX2SURFFMT::R8_G8_B8_A8_UNORM, {MTL::PixelFormatRGBA8Unorm, 4}}, - {Latte::E_GX2SURFFMT::R8_G8_B8_A8_SNORM, {MTL::PixelFormatRGBA8Snorm, 4}}, - {Latte::E_GX2SURFFMT::R8_G8_B8_A8_UINT, {MTL::PixelFormatRGBA8Uint, 4}}, - {Latte::E_GX2SURFFMT::R8_G8_B8_A8_SINT, {MTL::PixelFormatRGBA8Sint, 4}}, - {Latte::E_GX2SURFFMT::R8_G8_B8_A8_SRGB, {MTL::PixelFormatRGBA8Unorm_sRGB, 4}}, - {Latte::E_GX2SURFFMT::R10_G10_B10_A2_UNORM, {MTL::PixelFormatRGB10A2Unorm, 4}}, - {Latte::E_GX2SURFFMT::R10_G10_B10_A2_SNORM, {MTL::PixelFormatRGBA16Snorm, 8}}, // TODO: correct? - {Latte::E_GX2SURFFMT::R10_G10_B10_A2_UINT, {MTL::PixelFormatRGB10A2Uint, 4}}, - {Latte::E_GX2SURFFMT::R10_G10_B10_A2_SINT, {MTL::PixelFormatRGBA16Sint, 8}}, // TODO: correct? - {Latte::E_GX2SURFFMT::R10_G10_B10_A2_SRGB, {MTL::PixelFormatRGBA8Unorm_sRGB, 4}}, // TODO: correct? - {Latte::E_GX2SURFFMT::A2_B10_G10_R10_UNORM, {MTL::PixelFormatBGR10A2Unorm, 4}}, // TODO: correct? - {Latte::E_GX2SURFFMT::A2_B10_G10_R10_UINT, {MTL::PixelFormatRGB10A2Uint, 4}}, // TODO: correct? - {Latte::E_GX2SURFFMT::R16_UNORM, {MTL::PixelFormatR16Unorm, 2}}, - {Latte::E_GX2SURFFMT::R16_SNORM, {MTL::PixelFormatR16Snorm, 2}}, - {Latte::E_GX2SURFFMT::R16_UINT, {MTL::PixelFormatR16Uint, 2}}, - {Latte::E_GX2SURFFMT::R16_SINT, {MTL::PixelFormatR16Sint, 2}}, - {Latte::E_GX2SURFFMT::R16_FLOAT, {MTL::PixelFormatR16Float, 2}}, - {Latte::E_GX2SURFFMT::R16_G16_UNORM, {MTL::PixelFormatRG16Unorm, 4}}, - {Latte::E_GX2SURFFMT::R16_G16_SNORM, {MTL::PixelFormatRG16Snorm, 4}}, - {Latte::E_GX2SURFFMT::R16_G16_UINT, {MTL::PixelFormatRG16Uint, 4}}, - {Latte::E_GX2SURFFMT::R16_G16_SINT, {MTL::PixelFormatRG16Sint, 4}}, - {Latte::E_GX2SURFFMT::R16_G16_FLOAT, {MTL::PixelFormatRG16Float, 4}}, - {Latte::E_GX2SURFFMT::R16_G16_B16_A16_UNORM, {MTL::PixelFormatRGBA16Unorm, 8}}, - {Latte::E_GX2SURFFMT::R16_G16_B16_A16_SNORM, {MTL::PixelFormatRGBA16Snorm, 8}}, - {Latte::E_GX2SURFFMT::R16_G16_B16_A16_UINT, {MTL::PixelFormatRGBA16Uint, 8}}, - {Latte::E_GX2SURFFMT::R16_G16_B16_A16_SINT, {MTL::PixelFormatRGBA16Sint, 8}}, - {Latte::E_GX2SURFFMT::R16_G16_B16_A16_FLOAT, {MTL::PixelFormatRGBA16Float, 8}}, - {Latte::E_GX2SURFFMT::R24_X8_UNORM, {MTL::PixelFormatInvalid, 0}}, // TODO - {Latte::E_GX2SURFFMT::R24_X8_FLOAT, {MTL::PixelFormatInvalid, 0}}, // TODO - {Latte::E_GX2SURFFMT::X24_G8_UINT, {MTL::PixelFormatInvalid, 0}}, // TODO - {Latte::E_GX2SURFFMT::R32_X8_FLOAT, {MTL::PixelFormatInvalid, 0}}, // TODO - {Latte::E_GX2SURFFMT::X32_G8_UINT_X24, {MTL::PixelFormatInvalid, 0}}, // TODO - {Latte::E_GX2SURFFMT::R11_G11_B10_FLOAT, {MTL::PixelFormatRG11B10Float, 4}}, - {Latte::E_GX2SURFFMT::R32_UINT, {MTL::PixelFormatR32Uint, 4}}, - {Latte::E_GX2SURFFMT::R32_SINT, {MTL::PixelFormatR32Sint, 4}}, - {Latte::E_GX2SURFFMT::R32_FLOAT, {MTL::PixelFormatR32Float, 4}}, - {Latte::E_GX2SURFFMT::R32_G32_UINT, {MTL::PixelFormatRG32Uint, 8}}, - {Latte::E_GX2SURFFMT::R32_G32_SINT, {MTL::PixelFormatRG32Sint, 8}}, - {Latte::E_GX2SURFFMT::R32_G32_FLOAT, {MTL::PixelFormatRG32Float, 8}}, - {Latte::E_GX2SURFFMT::R32_G32_B32_A32_UINT, {MTL::PixelFormatRGBA32Uint, 16}}, - {Latte::E_GX2SURFFMT::R32_G32_B32_A32_SINT, {MTL::PixelFormatRGBA32Sint, 16}}, - {Latte::E_GX2SURFFMT::R32_G32_B32_A32_FLOAT, {MTL::PixelFormatRGBA32Float, 16}}, - {Latte::E_GX2SURFFMT::BC1_UNORM, {MTL::PixelFormatBC1_RGBA, 8, {4, 4}}}, // TODO: correct? - {Latte::E_GX2SURFFMT::BC1_SRGB, {MTL::PixelFormatBC1_RGBA_sRGB, 8, {4, 4}}}, // TODO: correct? - {Latte::E_GX2SURFFMT::BC2_UNORM, {MTL::PixelFormatBC2_RGBA, 16, {4, 4}}}, // TODO: correct? - {Latte::E_GX2SURFFMT::BC2_SRGB, {MTL::PixelFormatBC2_RGBA_sRGB, 16, {4, 4}}}, // TODO: correct? - {Latte::E_GX2SURFFMT::BC3_UNORM, {MTL::PixelFormatBC3_RGBA, 16, {4, 4}}}, // TODO: correct? - {Latte::E_GX2SURFFMT::BC3_SRGB, {MTL::PixelFormatBC3_RGBA_sRGB, 16, {4, 4}}}, // TODO: correct? - {Latte::E_GX2SURFFMT::BC4_UNORM, {MTL::PixelFormatBC4_RUnorm, 8, {4, 4}}}, // TODO: correct? - {Latte::E_GX2SURFFMT::BC4_SNORM, {MTL::PixelFormatBC4_RSnorm, 8, {4, 4}}}, // TODO: correct? - {Latte::E_GX2SURFFMT::BC5_UNORM, {MTL::PixelFormatBC5_RGUnorm, 16, {4, 4}}}, // TODO: correct? - {Latte::E_GX2SURFFMT::BC5_SNORM, {MTL::PixelFormatBC5_RGSnorm, 16, {4, 4}}}, // TODO: correct? + {Latte::E_GX2SURFFMT::R4_G4_UNORM, {MTL::PixelFormatRG8Unorm, true, 2}}, // TODO: correct? + {Latte::E_GX2SURFFMT::R5_G6_B5_UNORM, {MTL::PixelFormatB5G6R5Unorm, true, 2}}, // TODO: correct? + {Latte::E_GX2SURFFMT::R5_G5_B5_A1_UNORM, {MTL::PixelFormatBGR5A1Unorm, true, 2}}, // TODO: correct? + {Latte::E_GX2SURFFMT::R4_G4_B4_A4_UNORM, {MTL::PixelFormatABGR4Unorm, true, 2}}, // TODO: correct? + {Latte::E_GX2SURFFMT::A1_B5_G5_R5_UNORM, {MTL::PixelFormatA1BGR5Unorm, true, 2}}, + {Latte::E_GX2SURFFMT::R8_UNORM, {MTL::PixelFormatR8Unorm, true, 1}}, + {Latte::E_GX2SURFFMT::R8_SNORM, {MTL::PixelFormatR8Snorm, true, 1}}, + {Latte::E_GX2SURFFMT::R8_UINT, {MTL::PixelFormatR8Uint, false, 1}}, + {Latte::E_GX2SURFFMT::R8_SINT, {MTL::PixelFormatR8Sint, false, 1}}, + {Latte::E_GX2SURFFMT::R8_G8_UNORM, {MTL::PixelFormatRG8Unorm, true, 2}}, + {Latte::E_GX2SURFFMT::R8_G8_SNORM, {MTL::PixelFormatRG8Snorm, true, 2}}, + {Latte::E_GX2SURFFMT::R8_G8_UINT, {MTL::PixelFormatRG8Uint, false, 2}}, + {Latte::E_GX2SURFFMT::R8_G8_SINT, {MTL::PixelFormatRG8Sint, false, 2}}, + {Latte::E_GX2SURFFMT::R8_G8_B8_A8_UNORM, {MTL::PixelFormatRGBA8Unorm, true, 4}}, + {Latte::E_GX2SURFFMT::R8_G8_B8_A8_SNORM, {MTL::PixelFormatRGBA8Snorm, true, 4}}, + {Latte::E_GX2SURFFMT::R8_G8_B8_A8_UINT, {MTL::PixelFormatRGBA8Uint, false, 4}}, + {Latte::E_GX2SURFFMT::R8_G8_B8_A8_SINT, {MTL::PixelFormatRGBA8Sint, false, 4}}, + {Latte::E_GX2SURFFMT::R8_G8_B8_A8_SRGB, {MTL::PixelFormatRGBA8Unorm_sRGB, true, 4}}, + {Latte::E_GX2SURFFMT::R10_G10_B10_A2_UNORM, {MTL::PixelFormatRGB10A2Unorm, true, 4}}, + {Latte::E_GX2SURFFMT::R10_G10_B10_A2_SNORM, {MTL::PixelFormatRGBA16Snorm, true, 8}}, // TODO: correct? + {Latte::E_GX2SURFFMT::R10_G10_B10_A2_UINT, {MTL::PixelFormatRGB10A2Uint, false, 4}}, + {Latte::E_GX2SURFFMT::R10_G10_B10_A2_SINT, {MTL::PixelFormatRGBA16Sint, false, 8}}, // TODO: correct? + {Latte::E_GX2SURFFMT::R10_G10_B10_A2_SRGB, {MTL::PixelFormatRGBA8Unorm_sRGB, true, 4}}, // TODO: correct? + {Latte::E_GX2SURFFMT::A2_B10_G10_R10_UNORM, {MTL::PixelFormatBGR10A2Unorm, true, 4}}, // TODO: correct? + {Latte::E_GX2SURFFMT::A2_B10_G10_R10_UINT, {MTL::PixelFormatRGB10A2Uint, false, 4}}, // TODO: correct? + {Latte::E_GX2SURFFMT::R16_UNORM, {MTL::PixelFormatR16Unorm, true, 2}}, + {Latte::E_GX2SURFFMT::R16_SNORM, {MTL::PixelFormatR16Snorm, true, 2}}, + {Latte::E_GX2SURFFMT::R16_UINT, {MTL::PixelFormatR16Uint, false, 2}}, + {Latte::E_GX2SURFFMT::R16_SINT, {MTL::PixelFormatR16Sint, false, 2}}, + {Latte::E_GX2SURFFMT::R16_FLOAT, {MTL::PixelFormatR16Float, true, 2}}, + {Latte::E_GX2SURFFMT::R16_G16_UNORM, {MTL::PixelFormatRG16Unorm, true, 4}}, + {Latte::E_GX2SURFFMT::R16_G16_SNORM, {MTL::PixelFormatRG16Snorm, true, 4}}, + {Latte::E_GX2SURFFMT::R16_G16_UINT, {MTL::PixelFormatRG16Uint, false, 4}}, + {Latte::E_GX2SURFFMT::R16_G16_SINT, {MTL::PixelFormatRG16Sint, false, 4}}, + {Latte::E_GX2SURFFMT::R16_G16_FLOAT, {MTL::PixelFormatRG16Float, true, 4}}, + {Latte::E_GX2SURFFMT::R16_G16_B16_A16_UNORM, {MTL::PixelFormatRGBA16Unorm, true, 8}}, + {Latte::E_GX2SURFFMT::R16_G16_B16_A16_SNORM, {MTL::PixelFormatRGBA16Snorm, true, 8}}, + {Latte::E_GX2SURFFMT::R16_G16_B16_A16_UINT, {MTL::PixelFormatRGBA16Uint, false, 8}}, + {Latte::E_GX2SURFFMT::R16_G16_B16_A16_SINT, {MTL::PixelFormatRGBA16Sint, false, 8}}, + {Latte::E_GX2SURFFMT::R16_G16_B16_A16_FLOAT, {MTL::PixelFormatRGBA16Float, true, 8}}, + {Latte::E_GX2SURFFMT::R24_X8_UNORM, {MTL::PixelFormatInvalid, false, 0}}, // TODO + {Latte::E_GX2SURFFMT::R24_X8_FLOAT, {MTL::PixelFormatInvalid, false, 0}}, // TODO + {Latte::E_GX2SURFFMT::X24_G8_UINT, {MTL::PixelFormatInvalid, false, 0}}, // TODO + {Latte::E_GX2SURFFMT::R32_X8_FLOAT, {MTL::PixelFormatInvalid, false, 0}}, // TODO + {Latte::E_GX2SURFFMT::X32_G8_UINT_X24, {MTL::PixelFormatInvalid, false, 0}}, // TODO + {Latte::E_GX2SURFFMT::R11_G11_B10_FLOAT, {MTL::PixelFormatRG11B10Float, true, 4}}, + {Latte::E_GX2SURFFMT::R32_UINT, {MTL::PixelFormatR32Uint, false, 4}}, + {Latte::E_GX2SURFFMT::R32_SINT, {MTL::PixelFormatR32Sint, false, 4}}, + {Latte::E_GX2SURFFMT::R32_FLOAT, {MTL::PixelFormatR32Float, true, 4}}, + {Latte::E_GX2SURFFMT::R32_G32_UINT, {MTL::PixelFormatRG32Uint, false, 8}}, + {Latte::E_GX2SURFFMT::R32_G32_SINT, {MTL::PixelFormatRG32Sint, false, 8}}, + {Latte::E_GX2SURFFMT::R32_G32_FLOAT, {MTL::PixelFormatRG32Float, true, 8}}, + {Latte::E_GX2SURFFMT::R32_G32_B32_A32_UINT, {MTL::PixelFormatRGBA32Uint, false, 16}}, + {Latte::E_GX2SURFFMT::R32_G32_B32_A32_SINT, {MTL::PixelFormatRGBA32Sint, false, 16}}, + {Latte::E_GX2SURFFMT::R32_G32_B32_A32_FLOAT, {MTL::PixelFormatRGBA32Float, true, 16}}, + {Latte::E_GX2SURFFMT::BC1_UNORM, {MTL::PixelFormatBC1_RGBA, true, 8, {4, 4}}}, // TODO: correct? + {Latte::E_GX2SURFFMT::BC1_SRGB, {MTL::PixelFormatBC1_RGBA_sRGB, true, 8, {4, 4}}}, // TODO: correct? + {Latte::E_GX2SURFFMT::BC2_UNORM, {MTL::PixelFormatBC2_RGBA, true, 16, {4, 4}}}, // TODO: correct? + {Latte::E_GX2SURFFMT::BC2_SRGB, {MTL::PixelFormatBC2_RGBA_sRGB, true, 16, {4, 4}}}, // TODO: correct? + {Latte::E_GX2SURFFMT::BC3_UNORM, {MTL::PixelFormatBC3_RGBA, true, 16, {4, 4}}}, // TODO: correct? + {Latte::E_GX2SURFFMT::BC3_SRGB, {MTL::PixelFormatBC3_RGBA_sRGB, true, 16, {4, 4}}}, // TODO: correct? + {Latte::E_GX2SURFFMT::BC4_UNORM, {MTL::PixelFormatBC4_RUnorm, true, 8, {4, 4}}}, // TODO: correct? + {Latte::E_GX2SURFFMT::BC4_SNORM, {MTL::PixelFormatBC4_RSnorm, true, 8, {4, 4}}}, // TODO: correct? + {Latte::E_GX2SURFFMT::BC5_UNORM, {MTL::PixelFormatBC5_RGUnorm, true, 16, {4, 4}}}, // TODO: correct? + {Latte::E_GX2SURFFMT::BC5_SNORM, {MTL::PixelFormatBC5_RGSnorm, true, 16, {4, 4}}}, // TODO: correct? }; std::map MTL_DEPTH_FORMAT_TABLE = { - {Latte::E_GX2SURFFMT::D24_S8_UNORM, {MTL::PixelFormatDepth24Unorm_Stencil8, 4}}, // TODO: not supported on Apple sillicon, maybe find something else - {Latte::E_GX2SURFFMT::D24_S8_FLOAT, {MTL::PixelFormatDepth32Float_Stencil8, 4}}, // TODO: correct? - {Latte::E_GX2SURFFMT::D32_S8_FLOAT, {MTL::PixelFormatDepth32Float_Stencil8, 5}}, - {Latte::E_GX2SURFFMT::D16_UNORM, {MTL::PixelFormatDepth16Unorm, 2}}, - {Latte::E_GX2SURFFMT::D32_FLOAT, {MTL::PixelFormatDepth32Float, 4}}, + {Latte::E_GX2SURFFMT::D24_S8_UNORM, {MTL::PixelFormatDepth24Unorm_Stencil8, false, 4}}, // TODO: not supported on Apple sillicon, maybe find something else + {Latte::E_GX2SURFFMT::D24_S8_FLOAT, {MTL::PixelFormatDepth32Float_Stencil8, false, 4}}, // TODO: correct? + {Latte::E_GX2SURFFMT::D32_S8_FLOAT, {MTL::PixelFormatDepth32Float_Stencil8, false, 5}}, + {Latte::E_GX2SURFFMT::D16_UNORM, {MTL::PixelFormatDepth16Unorm, false, 2}}, + {Latte::E_GX2SURFFMT::D32_FLOAT, {MTL::PixelFormatDepth32Float, false, 4}}, }; const MtlPixelFormatInfo GetMtlPixelFormatInfo(Latte::E_GX2SURFFMT format, bool isDepth) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h b/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h index bd25ada5..91ae1c2f 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h @@ -17,6 +17,7 @@ struct Uvec2 { struct MtlPixelFormatInfo { MTL::PixelFormat pixelFormat; + bool blendable; size_t bytesPerBlock; Uvec2 blockTexelSize = {1, 1}; }; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp index d4d04312..ef59c111 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp @@ -106,7 +106,7 @@ MTL::RenderPipelineState* MetalPipelineCache::GetPipelineState(const LatteFetchS uint32 renderTargetMask = LatteGPUState.contextNew.CB_TARGET_MASK.get_MASK(); bool blendEnabled = ((blendEnableMask & (1 << i))) != 0; - if (blendEnabled) + if (blendEnabled && GetMtlPixelFormatInfo(texture->format, false).blendable) { colorAttachment->setBlendingEnabled(true);