From 4976ff3084c91ade0b6d9a8a8c64d7bd6d51f8f8 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Sat, 27 Jul 2024 11:36:51 +0200 Subject: [PATCH] fix: sampling & capture boundaries --- src/Cafe/HW/Latte/Core/LatteTextureLoader.cpp | 4 +- .../LatteDecompilerEmitMSL.cpp | 43 +++-- .../Latte/Renderer/Metal/LatteTextureMtl.cpp | 37 ++-- .../Renderer/Metal/LatteTextureViewMtl.cpp | 12 +- .../HW/Latte/Renderer/Metal/LatteToMtl.cpp | 162 ++++++++++-------- .../HW/Latte/Renderer/Metal/MetalRenderer.cpp | 82 +++++++-- .../HW/Latte/Renderer/Metal/MetalRenderer.h | 12 ++ .../Latte/Renderer/Vulkan/VulkanRenderer.cpp | 2 +- 8 files changed, 233 insertions(+), 121 deletions(-) diff --git a/src/Cafe/HW/Latte/Core/LatteTextureLoader.cpp b/src/Cafe/HW/Latte/Core/LatteTextureLoader.cpp index c06a3bf1..b80bd869 100644 --- a/src/Cafe/HW/Latte/Core/LatteTextureLoader.cpp +++ b/src/Cafe/HW/Latte/Core/LatteTextureLoader.cpp @@ -602,7 +602,7 @@ void LatteTextureLoader_loadTextureDataIntoSlice(LatteTexture* hostTexture, sint void LatteTextureLoader_UpdateTextureSliceData(LatteTexture* tex, uint32 sliceIndex, uint32 mipIndex, MPTR physImagePtr, MPTR physMipPtr, Latte::E_DIM dim, uint32 width, uint32 height, uint32 depth, uint32 mipLevels, uint32 pitch, Latte::E_HWTILEMODE tileMode, uint32 swizzle, bool dumpTex) { LatteTextureLoaderCtx textureLoader = { 0 }; - + Latte::E_GX2SURFFMT format = tex->format; LatteTextureLoader_begin(&textureLoader, sliceIndex, mipIndex, physImagePtr, physMipPtr, format, dim, width, height, depth, mipLevels, pitch, tileMode, swizzle); @@ -853,7 +853,7 @@ void LatteTextureLoader_writeReadbackTextureToMemory(LatteTextureDefinition* tex pixelInput += 4; } } - } + } else { cemuLog_logDebug(LogType::Force, "Texture readback unsupported format {:04x} for tileMode 0x{:02x}", (uint32)textureData->format, textureData->tileMode); diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp index 9981d5ea..76cd6a18 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp @@ -2335,14 +2335,14 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex if (texDim == Latte::E_DIM::DIM_2D_ARRAY) { // 3 coords + compare value (as float4) - src->add("float4("); + src->add("float3("); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); - src->add(","); + src->add(", "); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT); - src->add(","); + src->add(", "); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 2, LATTE_DECOMPILER_DTYPE_FLOAT); - src->addFmt(",{})", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[3], -1, -1, -1, tempBuffer0)); + src->addFmt("), {}", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[3], -1, -1, -1, tempBuffer0)); } else if (texDim == Latte::E_DIM::DIM_CUBEMAP) { @@ -2364,7 +2364,7 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex { debugBreakpoint(); } - src->addFmt("float3({},0.0,{})", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], -1, -1, -1, tempBuffer0), _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[3], -1, -1, -1, tempBuffer1)); + src->addFmt("{}, {}", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], -1, -1, -1, tempBuffer0), _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[3], -1, -1, -1, tempBuffer1)); } else { @@ -2373,17 +2373,27 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex { debugBreakpoint(); } - src->addFmt("float3({}, {})", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], texInstruction->textureFetch.srcSel[1], -1, -1, tempBuffer0), _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[3], -1, -1, -1, tempBuffer1)); + src->addFmt("float2({}), {}", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], texInstruction->textureFetch.srcSel[1], -1, -1, tempBuffer0), _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[3], -1, -1, -1, tempBuffer1)); } } - else if( texDim == Latte::E_DIM::DIM_3D || texDim == Latte::E_DIM::DIM_2D_ARRAY ) + else if(texDim == Latte::E_DIM::DIM_2D_ARRAY) { // 3 coords - src->add("float3("); + src->add("float2("); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); - src->add(","); + src->add(", "); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT); - src->add(","); + src->add("), "); + _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 2, LATTE_DECOMPILER_DTYPE_FLOAT); + } + else if(texDim == Latte::E_DIM::DIM_3D) + { + // 3 coords + src->add("float2("); + _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); + src->add(", "); + _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT); + src->add(", "); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 2, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(")"); } @@ -2392,11 +2402,9 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex // 2 coords + faceId cemu_assert_debug(texInstruction->textureFetch.srcSel[0] < 4); cemu_assert_debug(texInstruction->textureFetch.srcSel[1] < 4); - src->add("float4("); src->addFmt("redcCUBEReverse({},", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], texInstruction->textureFetch.srcSel[1], -1, -1, tempBuffer0)); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 2, LATTE_DECOMPILER_DTYPE_SIGNED_INT); - src->add(")"); - src->addFmt(",cubeMapArrayIndex{})", texInstruction->textureFetch.textureIndex); // cubemap index + src->addFmt(", cubeMapArrayIndex{})", texInstruction->textureFetch.textureIndex); // cubemap index } else if( texDim == Latte::E_DIM::DIM_1D ) { @@ -3161,7 +3169,7 @@ static void _emitExportCode(LatteDecompilerShaderContext* shaderContext, LatteDe src->add(") == false) discard;" _CRLF); } // pixel color output - src->addFmt("passPixelColor{} = ", pixelColorOutputIndex); + src->addFmt("out.passPixelColor{} = ", pixelColorOutputIndex); _emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, i); src->add(";" _CRLF); @@ -3706,6 +3714,13 @@ void LatteDecompiler_emitHelperFunctions(LatteDecompilerShaderContext* shaderCon " return as_type(0.0);\r\n" "return as_type(clamp(as_type(v), 0.0, 1.0));\r\n" "}\r\n"); + + // round even + fCStr_shaderSource->add("" + "float roundEven(float x) {\r\n" + "return round(x / 2.0) * 2.0;\r\n" + "}\r\n"); + // mul non-ieee way (0*NaN/INF => 0.0) if (shaderContext->options->strictMul) { diff --git a/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.cpp index bf6cab24..bbd714d9 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.cpp @@ -46,22 +46,29 @@ LatteTextureMtl::LatteTextureMtl(class MetalRenderer* mtlRenderer, Latte::E_DIM } desc->setUsage(usage); - if (dim == Latte::E_DIM::DIM_2D) - desc->setTextureType(MTL::TextureType2D); - else if (dim == Latte::E_DIM::DIM_1D) - desc->setTextureType(MTL::TextureType1D); - else if (dim == Latte::E_DIM::DIM_3D) - desc->setTextureType(MTL::TextureType3D); - else if (dim == Latte::E_DIM::DIM_2D_ARRAY) + switch (dim) + { + case Latte::E_DIM::DIM_1D: + desc->setTextureType(MTL::TextureType1D); + break; + case Latte::E_DIM::DIM_2D: + case Latte::E_DIM::DIM_2D_MSAA: + desc->setTextureType(MTL::TextureType2D); + break; + case Latte::E_DIM::DIM_2D_ARRAY: desc->setTextureType(MTL::TextureType2DArray); - else if (dim == Latte::E_DIM::DIM_CUBEMAP) - desc->setTextureType(MTL::TextureTypeCube); // TODO: is this correct? - else if (dim == Latte::E_DIM::DIM_2D_MSAA) - desc->setTextureType(MTL::TextureType2D); - else - { - cemu_assert_unimplemented(); - } + break; + case Latte::E_DIM::DIM_3D: + desc->setTextureType(MTL::TextureType3D); + break; + case Latte::E_DIM::DIM_CUBEMAP: + desc->setTextureType(MTL::TextureTypeCube); // TODO: check this + break; + default: + cemu_assert_unimplemented(); + desc->setTextureType(MTL::TextureType2D); + break; + } m_texture = mtlRenderer->GetDevice()->newTexture(desc); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.cpp index c8df8cf6..37399fca 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.cpp @@ -1,29 +1,34 @@ #include "Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" +#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h" LatteTextureViewMtl::LatteTextureViewMtl(MetalRenderer* mtlRenderer, LatteTextureMtl* texture, Latte::E_DIM dim, Latte::E_GX2SURFFMT format, sint32 firstMip, sint32 mipCount, sint32 firstSlice, sint32 sliceCount) : LatteTextureView(texture, firstMip, mipCount, firstSlice, sliceCount, dim, format), m_mtlr(mtlRenderer), m_format(format) { - // TODO: don't hardcode the format - MTL::PixelFormat pixelFormat = MTL::PixelFormatRGBA8Unorm; MTL::TextureType textureType; switch (dim) { case Latte::E_DIM::DIM_1D: textureType = MTL::TextureType1D; + break; case Latte::E_DIM::DIM_2D: case Latte::E_DIM::DIM_2D_MSAA: textureType = MTL::TextureType2D; + break; case Latte::E_DIM::DIM_2D_ARRAY: textureType = MTL::TextureType2DArray; + break; case Latte::E_DIM::DIM_3D: textureType = MTL::TextureType3D; + break; case Latte::E_DIM::DIM_CUBEMAP: textureType = MTL::TextureTypeCube; // TODO: check this + break; default: cemu_assert_unimplemented(); textureType = MTL::TextureType2D; + break; } uint32 baseLevel = firstMip; @@ -47,7 +52,8 @@ LatteTextureViewMtl::LatteTextureViewMtl(MetalRenderer* mtlRenderer, LatteTextur // TODO: swizzle - m_texture = texture->GetTexture()->newTextureView(pixelFormat, textureType, NS::Range::Make(baseLevel, levelCount), NS::Range::Make(baseLayer, layerCount)); + auto formatInfo = GetMtlPixelFormatInfo(format); + m_texture = texture->GetTexture()->newTextureView(formatInfo.pixelFormat, textureType, NS::Range::Make(baseLevel, levelCount), NS::Range::Make(baseLayer, layerCount)); } LatteTextureViewMtl::~LatteTextureViewMtl() diff --git a/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.cpp index a1d398bd..cc24348a 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.cpp @@ -1,96 +1,106 @@ #include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h" #include "Common/precompiled.h" +#include "Metal/MTLPixelFormat.hpp" -std::map MTL_FORMAT_TABLE = {{ - {Latte::E_GX2SURFFMT::R4_G4_UNORM, {MTL::PixelFormatInvalid, 0}}, // TODO - {Latte::E_GX2SURFFMT::R5_G6_B5_UNORM, {MTL::PixelFormatB5G6R5Unorm, 2}}, // TODO: correct? - {Latte::E_GX2SURFFMT::R5_G5_B5_A1_UNORM, {MTL::PixelFormatBGR5A1Unorm, 2}}, // TODO: correct? - {Latte::E_GX2SURFFMT::R4_G4_B4_A4_UNORM, {MTL::PixelFormatABGR4Unorm, 2}}, // TODO: correct? - {Latte::E_GX2SURFFMT::A1_B5_G5_R5_UNORM, {MTL::PixelFormatA1BGR5Unorm, 2}}, - {Latte::E_GX2SURFFMT::R8_UNORM, {MTL::PixelFormatR8Unorm, 1}}, - {Latte::E_GX2SURFFMT::R8_SNORM, {MTL::PixelFormatR8Snorm, 1}}, - {Latte::E_GX2SURFFMT::R8_UINT, {MTL::PixelFormatR8Uint, 1}}, - {Latte::E_GX2SURFFMT::R8_SINT, {MTL::PixelFormatR8Sint, 1}}, - {Latte::E_GX2SURFFMT::R8_G8_UNORM, {MTL::PixelFormatRG8Unorm, 2}}, - {Latte::E_GX2SURFFMT::R8_G8_SNORM, {MTL::PixelFormatRG8Snorm, 2}}, - {Latte::E_GX2SURFFMT::R8_G8_UINT, {MTL::PixelFormatRG8Uint, 2}}, - {Latte::E_GX2SURFFMT::R8_G8_SINT, {MTL::PixelFormatRG8Sint, 2}}, - {Latte::E_GX2SURFFMT::R8_G8_B8_A8_UNORM, {MTL::PixelFormatRGBA8Unorm, 4}}, - {Latte::E_GX2SURFFMT::R8_G8_B8_A8_SNORM, {MTL::PixelFormatRGBA8Snorm, 4}}, - {Latte::E_GX2SURFFMT::R8_G8_B8_A8_UINT, {MTL::PixelFormatRGBA8Uint, 4}}, - {Latte::E_GX2SURFFMT::R8_G8_B8_A8_SINT, {MTL::PixelFormatRGBA8Sint, 4}}, - {Latte::E_GX2SURFFMT::R8_G8_B8_A8_SRGB, {MTL::PixelFormatRGBA8Unorm_sRGB, 4}}, - {Latte::E_GX2SURFFMT::R10_G10_B10_A2_UNORM, {MTL::PixelFormatRGB10A2Unorm, 4}}, - {Latte::E_GX2SURFFMT::R10_G10_B10_A2_SNORM, {MTL::PixelFormatInvalid, 0}}, // TODO - {Latte::E_GX2SURFFMT::R10_G10_B10_A2_UINT, {MTL::PixelFormatRGB10A2Uint, 4}}, - {Latte::E_GX2SURFFMT::R10_G10_B10_A2_SINT, {MTL::PixelFormatInvalid, 0}}, // TODO - {Latte::E_GX2SURFFMT::R10_G10_B10_A2_SRGB, {MTL::PixelFormatInvalid, 0}}, // TODO - {Latte::E_GX2SURFFMT::A2_B10_G10_R10_UNORM, {MTL::PixelFormatBGR10A2Unorm, 4}}, // TODO: correct? - {Latte::E_GX2SURFFMT::A2_B10_G10_R10_UINT, {MTL::PixelFormatRGB10A2Uint, 4}}, // TODO: correct? - {Latte::E_GX2SURFFMT::R16_UNORM, {MTL::PixelFormatR16Unorm, 2}}, - {Latte::E_GX2SURFFMT::R16_SNORM, {MTL::PixelFormatR16Snorm, 2}}, - {Latte::E_GX2SURFFMT::R16_UINT, {MTL::PixelFormatR16Uint, 2}}, - {Latte::E_GX2SURFFMT::R16_SINT, {MTL::PixelFormatR16Sint, 2}}, - {Latte::E_GX2SURFFMT::R16_FLOAT, {MTL::PixelFormatR16Float, 2}}, - {Latte::E_GX2SURFFMT::R16_G16_UNORM, {MTL::PixelFormatRG16Unorm, 4}}, - {Latte::E_GX2SURFFMT::R16_G16_SNORM, {MTL::PixelFormatRG16Snorm, 4}}, - {Latte::E_GX2SURFFMT::R16_G16_UINT, {MTL::PixelFormatRG16Uint, 4}}, - {Latte::E_GX2SURFFMT::R16_G16_SINT, {MTL::PixelFormatRG16Sint, 4}}, - {Latte::E_GX2SURFFMT::R16_G16_FLOAT, {MTL::PixelFormatRG16Float, 4}}, - {Latte::E_GX2SURFFMT::R16_G16_B16_A16_UNORM, {MTL::PixelFormatRGBA16Unorm, 8}}, - {Latte::E_GX2SURFFMT::R16_G16_B16_A16_SNORM, {MTL::PixelFormatRGBA16Snorm, 8}}, - {Latte::E_GX2SURFFMT::R16_G16_B16_A16_UINT, {MTL::PixelFormatRGBA16Uint, 8}}, - {Latte::E_GX2SURFFMT::R16_G16_B16_A16_SINT, {MTL::PixelFormatRGBA16Sint, 8}}, - {Latte::E_GX2SURFFMT::R16_G16_B16_A16_FLOAT, {MTL::PixelFormatRGBA16Float, 8}}, - {Latte::E_GX2SURFFMT::R24_X8_UNORM, {MTL::PixelFormatInvalid, 0}}, // TODO - {Latte::E_GX2SURFFMT::R24_X8_FLOAT, {MTL::PixelFormatInvalid, 0}}, // TODO - {Latte::E_GX2SURFFMT::X24_G8_UINT, {MTL::PixelFormatInvalid, 0}}, // TODO - {Latte::E_GX2SURFFMT::R32_X8_FLOAT, {MTL::PixelFormatInvalid, 0}}, // TODO - {Latte::E_GX2SURFFMT::X32_G8_UINT_X24, {MTL::PixelFormatInvalid, 0}}, // TODO - {Latte::E_GX2SURFFMT::R11_G11_B10_FLOAT, {MTL::PixelFormatRG11B10Float, 4}}, - {Latte::E_GX2SURFFMT::R32_UINT, {MTL::PixelFormatR32Uint, 4}}, - {Latte::E_GX2SURFFMT::R32_SINT, {MTL::PixelFormatR32Sint, 4}}, - {Latte::E_GX2SURFFMT::R32_FLOAT, {MTL::PixelFormatR32Float, 4}}, - {Latte::E_GX2SURFFMT::R32_G32_UINT, {MTL::PixelFormatRG32Uint, 8}}, - {Latte::E_GX2SURFFMT::R32_G32_SINT, {MTL::PixelFormatRG32Sint, 8}}, - {Latte::E_GX2SURFFMT::R32_G32_FLOAT, {MTL::PixelFormatRG32Float, 8}}, - {Latte::E_GX2SURFFMT::R32_G32_B32_A32_UINT, {MTL::PixelFormatRGBA32Uint, 16}}, - {Latte::E_GX2SURFFMT::R32_G32_B32_A32_SINT, {MTL::PixelFormatRGBA32Sint, 16}}, - {Latte::E_GX2SURFFMT::R32_G32_B32_A32_FLOAT, {MTL::PixelFormatRGBA32Float, 16}}, - {Latte::E_GX2SURFFMT::D24_S8_UNORM, {MTL::PixelFormatDepth24Unorm_Stencil8, 4}}, // TODO: not supported on Apple sillicon, maybe find something else - {Latte::E_GX2SURFFMT::D24_S8_FLOAT, {MTL::PixelFormatDepth32Float_Stencil8, 4}}, // TODO: correct? - {Latte::E_GX2SURFFMT::D32_S8_FLOAT, {MTL::PixelFormatDepth32Float_Stencil8, 5}}, - {Latte::E_GX2SURFFMT::D16_UNORM, {MTL::PixelFormatDepth16Unorm, 2}}, - {Latte::E_GX2SURFFMT::D32_FLOAT, {MTL::PixelFormatDepth32Float, 4}}, - {Latte::E_GX2SURFFMT::BC1_UNORM, {MTL::PixelFormatBC1_RGBA, 8, {4, 4}}}, // TODO: correct? - {Latte::E_GX2SURFFMT::BC1_SRGB, {MTL::PixelFormatBC1_RGBA_sRGB, 8, {4, 4}}}, // TODO: correct? - {Latte::E_GX2SURFFMT::BC2_UNORM, {MTL::PixelFormatBC2_RGBA, 16, {4, 4}}}, // TODO: correct? - {Latte::E_GX2SURFFMT::BC2_SRGB, {MTL::PixelFormatBC2_RGBA_sRGB, 16, {4, 4}}}, // TODO: correct? - {Latte::E_GX2SURFFMT::BC3_UNORM, {MTL::PixelFormatBC3_RGBA, 16, {4, 4}}}, // TODO: correct? - {Latte::E_GX2SURFFMT::BC3_SRGB, {MTL::PixelFormatBC3_RGBA_sRGB, 16, {4, 4}}}, // TODO: correct? - {Latte::E_GX2SURFFMT::BC4_UNORM, {MTL::PixelFormatInvalid, 0}}, // TODO - {Latte::E_GX2SURFFMT::BC4_SNORM, {MTL::PixelFormatInvalid, 0}}, // TODO - {Latte::E_GX2SURFFMT::BC5_UNORM, {MTL::PixelFormatInvalid, 0}}, // TODO - {Latte::E_GX2SURFFMT::BC5_SNORM, {MTL::PixelFormatInvalid, 0}}, // TODO -}}; +std::map MTL_FORMAT_TABLE = { + {Latte::E_GX2SURFFMT::R4_G4_UNORM, {MTL::PixelFormatRG8Unorm, 2}}, // TODO: correct? + {Latte::E_GX2SURFFMT::R5_G6_B5_UNORM, {MTL::PixelFormatB5G6R5Unorm, 2}}, // TODO: correct? + {Latte::E_GX2SURFFMT::R5_G5_B5_A1_UNORM, {MTL::PixelFormatBGR5A1Unorm, 2}}, // TODO: correct? + {Latte::E_GX2SURFFMT::R4_G4_B4_A4_UNORM, {MTL::PixelFormatABGR4Unorm, 2}}, // TODO: correct? + {Latte::E_GX2SURFFMT::A1_B5_G5_R5_UNORM, {MTL::PixelFormatA1BGR5Unorm, 2}}, + {Latte::E_GX2SURFFMT::R8_UNORM, {MTL::PixelFormatR8Unorm, 1}}, + {Latte::E_GX2SURFFMT::R8_SNORM, {MTL::PixelFormatR8Snorm, 1}}, + {Latte::E_GX2SURFFMT::R8_UINT, {MTL::PixelFormatR8Uint, 1}}, + {Latte::E_GX2SURFFMT::R8_SINT, {MTL::PixelFormatR8Sint, 1}}, + {Latte::E_GX2SURFFMT::R8_G8_UNORM, {MTL::PixelFormatRG8Unorm, 2}}, + {Latte::E_GX2SURFFMT::R8_G8_SNORM, {MTL::PixelFormatRG8Snorm, 2}}, + {Latte::E_GX2SURFFMT::R8_G8_UINT, {MTL::PixelFormatRG8Uint, 2}}, + {Latte::E_GX2SURFFMT::R8_G8_SINT, {MTL::PixelFormatRG8Sint, 2}}, + {Latte::E_GX2SURFFMT::R8_G8_B8_A8_UNORM, {MTL::PixelFormatRGBA8Unorm, 4}}, + {Latte::E_GX2SURFFMT::R8_G8_B8_A8_SNORM, {MTL::PixelFormatRGBA8Snorm, 4}}, + {Latte::E_GX2SURFFMT::R8_G8_B8_A8_UINT, {MTL::PixelFormatRGBA8Uint, 4}}, + {Latte::E_GX2SURFFMT::R8_G8_B8_A8_SINT, {MTL::PixelFormatRGBA8Sint, 4}}, + {Latte::E_GX2SURFFMT::R8_G8_B8_A8_SRGB, {MTL::PixelFormatRGBA8Unorm_sRGB, 4}}, + {Latte::E_GX2SURFFMT::R10_G10_B10_A2_UNORM, {MTL::PixelFormatRGB10A2Unorm, 4}}, + {Latte::E_GX2SURFFMT::R10_G10_B10_A2_SNORM, {MTL::PixelFormatRGBA16Snorm, 8}}, // TODO: correct? + {Latte::E_GX2SURFFMT::R10_G10_B10_A2_UINT, {MTL::PixelFormatRGB10A2Uint, 4}}, + {Latte::E_GX2SURFFMT::R10_G10_B10_A2_SINT, {MTL::PixelFormatRGBA16Sint, 8}}, // TODO: correct? + {Latte::E_GX2SURFFMT::R10_G10_B10_A2_SRGB, {MTL::PixelFormatRGBA8Unorm_sRGB, 4}}, // TODO: correct? + {Latte::E_GX2SURFFMT::A2_B10_G10_R10_UNORM, {MTL::PixelFormatBGR10A2Unorm, 4}}, // TODO: correct? + {Latte::E_GX2SURFFMT::A2_B10_G10_R10_UINT, {MTL::PixelFormatRGB10A2Uint, 4}}, // TODO: correct? + {Latte::E_GX2SURFFMT::R16_UNORM, {MTL::PixelFormatR16Unorm, 2}}, + {Latte::E_GX2SURFFMT::R16_SNORM, {MTL::PixelFormatR16Snorm, 2}}, + {Latte::E_GX2SURFFMT::R16_UINT, {MTL::PixelFormatR16Uint, 2}}, + {Latte::E_GX2SURFFMT::R16_SINT, {MTL::PixelFormatR16Sint, 2}}, + {Latte::E_GX2SURFFMT::R16_FLOAT, {MTL::PixelFormatR16Float, 2}}, + {Latte::E_GX2SURFFMT::R16_G16_UNORM, {MTL::PixelFormatRG16Unorm, 4}}, + {Latte::E_GX2SURFFMT::R16_G16_SNORM, {MTL::PixelFormatRG16Snorm, 4}}, + {Latte::E_GX2SURFFMT::R16_G16_UINT, {MTL::PixelFormatRG16Uint, 4}}, + {Latte::E_GX2SURFFMT::R16_G16_SINT, {MTL::PixelFormatRG16Sint, 4}}, + {Latte::E_GX2SURFFMT::R16_G16_FLOAT, {MTL::PixelFormatRG16Float, 4}}, + {Latte::E_GX2SURFFMT::R16_G16_B16_A16_UNORM, {MTL::PixelFormatRGBA16Unorm, 8}}, + {Latte::E_GX2SURFFMT::R16_G16_B16_A16_SNORM, {MTL::PixelFormatRGBA16Snorm, 8}}, + {Latte::E_GX2SURFFMT::R16_G16_B16_A16_UINT, {MTL::PixelFormatRGBA16Uint, 8}}, + {Latte::E_GX2SURFFMT::R16_G16_B16_A16_SINT, {MTL::PixelFormatRGBA16Sint, 8}}, + {Latte::E_GX2SURFFMT::R16_G16_B16_A16_FLOAT, {MTL::PixelFormatRGBA16Float, 8}}, + {Latte::E_GX2SURFFMT::R24_X8_UNORM, {MTL::PixelFormatInvalid, 0}}, // TODO + {Latte::E_GX2SURFFMT::R24_X8_FLOAT, {MTL::PixelFormatInvalid, 0}}, // TODO + {Latte::E_GX2SURFFMT::X24_G8_UINT, {MTL::PixelFormatInvalid, 0}}, // TODO + {Latte::E_GX2SURFFMT::R32_X8_FLOAT, {MTL::PixelFormatInvalid, 0}}, // TODO + {Latte::E_GX2SURFFMT::X32_G8_UINT_X24, {MTL::PixelFormatInvalid, 0}}, // TODO + {Latte::E_GX2SURFFMT::R11_G11_B10_FLOAT, {MTL::PixelFormatRG11B10Float, 4}}, + {Latte::E_GX2SURFFMT::R32_UINT, {MTL::PixelFormatR32Uint, 4}}, + {Latte::E_GX2SURFFMT::R32_SINT, {MTL::PixelFormatR32Sint, 4}}, + {Latte::E_GX2SURFFMT::R32_FLOAT, {MTL::PixelFormatR32Float, 4}}, + {Latte::E_GX2SURFFMT::R32_G32_UINT, {MTL::PixelFormatRG32Uint, 8}}, + {Latte::E_GX2SURFFMT::R32_G32_SINT, {MTL::PixelFormatRG32Sint, 8}}, + {Latte::E_GX2SURFFMT::R32_G32_FLOAT, {MTL::PixelFormatRG32Float, 8}}, + {Latte::E_GX2SURFFMT::R32_G32_B32_A32_UINT, {MTL::PixelFormatRGBA32Uint, 16}}, + {Latte::E_GX2SURFFMT::R32_G32_B32_A32_SINT, {MTL::PixelFormatRGBA32Sint, 16}}, + {Latte::E_GX2SURFFMT::R32_G32_B32_A32_FLOAT, {MTL::PixelFormatRGBA32Float, 16}}, + {Latte::E_GX2SURFFMT::D24_S8_UNORM, {MTL::PixelFormatDepth24Unorm_Stencil8, 4}}, // TODO: not supported on Apple sillicon, maybe find something else + {Latte::E_GX2SURFFMT::D24_S8_FLOAT, {MTL::PixelFormatDepth32Float_Stencil8, 4}}, // TODO: correct? + {Latte::E_GX2SURFFMT::D32_S8_FLOAT, {MTL::PixelFormatDepth32Float_Stencil8, 5}}, + {Latte::E_GX2SURFFMT::D16_UNORM, {MTL::PixelFormatDepth16Unorm, 2}}, + {Latte::E_GX2SURFFMT::D32_FLOAT, {MTL::PixelFormatDepth32Float, 4}}, + {Latte::E_GX2SURFFMT::BC1_UNORM, {MTL::PixelFormatBC1_RGBA, 8, {4, 4}}}, // TODO: correct? + {Latte::E_GX2SURFFMT::BC1_SRGB, {MTL::PixelFormatBC1_RGBA_sRGB, 8, {4, 4}}}, // TODO: correct? + {Latte::E_GX2SURFFMT::BC2_UNORM, {MTL::PixelFormatBC2_RGBA, 16, {4, 4}}}, // TODO: correct? + {Latte::E_GX2SURFFMT::BC2_SRGB, {MTL::PixelFormatBC2_RGBA_sRGB, 16, {4, 4}}}, // TODO: correct? + {Latte::E_GX2SURFFMT::BC3_UNORM, {MTL::PixelFormatBC3_RGBA, 16, {4, 4}}}, // TODO: correct? + {Latte::E_GX2SURFFMT::BC3_SRGB, {MTL::PixelFormatBC3_RGBA_sRGB, 16, {4, 4}}}, // TODO: correct? + {Latte::E_GX2SURFFMT::BC4_UNORM, {MTL::PixelFormatBC4_RUnorm, 8, {4, 4}}}, // TODO: correct? + {Latte::E_GX2SURFFMT::BC4_SNORM, {MTL::PixelFormatBC4_RSnorm, 8, {4, 4}}}, // TODO: correct? + {Latte::E_GX2SURFFMT::BC5_UNORM, {MTL::PixelFormatBC5_RGUnorm, 16, {4, 4}}}, // TODO: correct? + {Latte::E_GX2SURFFMT::BC5_SNORM, {MTL::PixelFormatBC5_RGSnorm, 16, {4, 4}}}, // TODO: correct? +}; -const MtlPixelFormatInfo GetMtlPixelFormatInfo(Latte::E_GX2SURFFMT format) { +const MtlPixelFormatInfo GetMtlPixelFormatInfo(Latte::E_GX2SURFFMT format) +{ cemu_assert_debug(static_cast(format) < MTL_FORMAT_TABLE.size()); - return MTL_FORMAT_TABLE[format]; + MtlPixelFormatInfo formatInfo = MTL_FORMAT_TABLE[format]; + if (formatInfo.pixelFormat == MTL::PixelFormatInvalid) + { + printf("invalid pixel format: %i\n", (int)format); + } + + return formatInfo; } inline uint32 CeilDivide(uint32 a, uint32 b) { return (a + b - 1) / b; } -size_t GetMtlTextureBytesPerRow(Latte::E_GX2SURFFMT format, uint32 width) { +size_t GetMtlTextureBytesPerRow(Latte::E_GX2SURFFMT format, uint32 width) +{ const auto& formatInfo = GetMtlPixelFormatInfo(format); return CeilDivide(width, formatInfo.blockTexelSize.x) * formatInfo.bytesPerBlock; } -size_t GetMtlTextureBytesPerImage(Latte::E_GX2SURFFMT format, uint32 height, size_t bytesPerRow) { +size_t GetMtlTextureBytesPerImage(Latte::E_GX2SURFFMT format, uint32 height, size_t bytesPerRow) +{ const auto& formatInfo = GetMtlPixelFormatInfo(format); return CeilDivide(height, formatInfo.blockTexelSize.y) * bytesPerRow; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index a5b7e3a6..0923b9ef 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -7,6 +7,8 @@ #include "HW/Latte/Core/LatteShader.h" #include "gui/guiWrapper.h" +extern bool hasValidFramebufferAttached; + MetalRenderer::MetalRenderer() { m_device = MTL::CreateSystemDefaultDevice(); @@ -66,17 +68,27 @@ void MetalRenderer::DrawEmptyFrame(bool mainWindow) void MetalRenderer::SwapBuffers(bool swapTV, bool swapDRC) { + CA::MetalDrawable* drawable = m_metalLayer->nextDrawable(); - if (!drawable) + if (drawable) { - return; + ensureCommandBuffer(); + m_commandBuffer->presentDrawable(drawable); + } else + { + printf("skipped present!\n"); } - m_commandBuffer->presentDrawable(drawable); - m_commandBuffer->commit(); + if (m_commandBuffer) + { + m_commandBuffer->commit(); - m_commandBuffer->release(); - m_commandBuffer = nullptr; + m_commandBuffer->release(); + m_commandBuffer = nullptr; + + // Debug + m_commandQueue->insertDebugCaptureBoundary(); + } } void MetalRenderer::DrawBackbufferQuad(LatteTextureView* texView, RendererOutputShader* shader, bool useLinearTexFilter, @@ -88,8 +100,6 @@ void MetalRenderer::DrawBackbufferQuad(LatteTextureView* texView, RendererOutput bool MetalRenderer::BeginFrame(bool mainWindow) { - m_commandBuffer = m_commandQueue->commandBuffer(); - // TODO return false; } @@ -162,6 +172,7 @@ void MetalRenderer::texture_clearSlice(LatteTexture* hostTexture, sint32 sliceIn void MetalRenderer::texture_loadSlice(LatteTexture* hostTexture, sint32 width, sint32 height, sint32 depth, void* pixelData, sint32 sliceIndex, sint32 mipIndex, uint32 compressedImageSize) { + std::cout << "TEXTURE LOAD SLICE" << std::endl; auto mtlTexture = (LatteTextureMtl*)hostTexture; size_t bytesPerRow = GetMtlTextureBytesPerRow(mtlTexture->GetFormat(), width); @@ -191,6 +202,7 @@ void MetalRenderer::texture_setLatteTexture(LatteTextureView* textureView, uint3 void MetalRenderer::texture_copyImageSubData(LatteTexture* src, sint32 srcMip, sint32 effectiveSrcX, sint32 effectiveSrcY, sint32 srcSlice, LatteTexture* dst, sint32 dstMip, sint32 effectiveDstX, sint32 effectiveDstY, sint32 dstSlice, sint32 effectiveCopyWidth, sint32 effectiveCopyHeight, sint32 srcDepth) { + std::cout << "TEXTURE COPY IMAGE SUBDATA" << std::endl; cemuLog_log(LogType::MetalLogging, "not implemented"); } @@ -258,9 +270,59 @@ void MetalRenderer::streamout_rendererFinishDrawcall() void MetalRenderer::draw_beginSequence() { - cemuLog_log(LogType::MetalLogging, "not implemented"); + skipDraws = false; - LatteSHRC_UpdateActiveShaders(); + // update shader state + LatteSHRC_UpdateActiveShaders(); + if (LatteGPUState.activeShaderHasError) + { + cemuLog_logDebugOnce(LogType::Force, "Skipping drawcalls due to shader error"); + skipDraws = true; + cemu_assert_debug(false); + return; + } + + // update render target and texture state + LatteGPUState.requiresTextureBarrier = false; + while (true) + { + LatteGPUState.repeatTextureInitialization = false; + if (!LatteMRT::UpdateCurrentFBO()) + { + debug_printf("Rendertarget invalid\n"); + skipDraws = true; + return; // no render target + } + + if (!hasValidFramebufferAttached) + { + debug_printf("Drawcall with no color buffer or depth buffer attached\n"); + skipDraws = true; + return; // no render target + } + LatteTexture_updateTextures(); + if (!LatteGPUState.repeatTextureInitialization) + break; + } + + // apply render target + // HACK: not implemented yet + //LatteMRT::ApplyCurrentState(); + + // viewport and scissor box + LatteRenderTarget_updateViewport(); + LatteRenderTarget_updateScissorBox(); + + // check for conditions which would turn the drawcalls into no-ops + bool rasterizerEnable = LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL() == false; + + // GX2SetSpecialState(0, true) enables DX_RASTERIZATION_KILL, but still expects depth writes to happen? -> Research which stages are disabled by DX_RASTERIZATION_KILL exactly + // for now we use a workaround: + if (!LatteGPUState.contextNew.PA_CL_VTE_CNTL.get_VPORT_X_OFFSET_ENA()) + rasterizerEnable = true; + + if (!rasterizerEnable == false) + skipDraws = true; } void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 instanceCount, uint32 count, MPTR indexDataMPTR, Latte::LATTE_VGT_DMA_INDEX_TYPE::E_INDEX_TYPE indexType, bool isFirst) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h index d43cc2be..a9177ab5 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h @@ -161,4 +161,16 @@ private: MTL::CommandQueue* m_commandQueue; MTL::CommandBuffer* m_commandBuffer = nullptr; + + // State + bool skipDraws = false; + + // Helpers + void ensureCommandBuffer() + { + if (!m_commandBuffer) + { + m_commandBuffer = m_commandQueue->commandBuffer(); + } + } }; diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp index 9209e3cd..ec27b412 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp @@ -61,7 +61,7 @@ VKAPI_ATTR VkBool32 VKAPI_CALL DebugUtilsCallback(VkDebugUtilsMessageSeverityFla if (strstr(pCallbackData->pMessage, "consumes input location")) return VK_FALSE; // false means we dont care if (strstr(pCallbackData->pMessage, "blend")) - return VK_FALSE; // + return VK_FALSE; // // note: Check if previously used location in VK_EXT_debug_report callback is the same as messageIdNumber under the new extension // validation errors which are difficult to fix