specialize fragment shader output types & fix: shader errors

This commit is contained in:
Samuliak 2024-08-07 13:25:38 +02:00
parent d7e9aff230
commit e2ec602c43
11 changed files with 205 additions and 121 deletions

View File

@ -198,6 +198,8 @@ bool LatteBufferCache_Sync(uint32 minIndex, uint32 maxIndex, uint32 baseInstance
#if BOOST_OS_MACOS #if BOOST_OS_MACOS
if(bufferStride % 4 != 0) if(bufferStride % 4 != 0)
{
if (g_renderer->GetType() == RendererAPI::Vulkan)
{ {
if (VulkanRenderer* vkRenderer = VulkanRenderer::GetInstance()) if (VulkanRenderer* vkRenderer = VulkanRenderer::GetInstance())
{ {
@ -206,6 +208,7 @@ bool LatteBufferCache_Sync(uint32 minIndex, uint32 maxIndex, uint32 baseInstance
continue; continue;
} }
} }
}
#endif #endif
uint32 bindOffset = LatteBufferCache_retrieveDataInCache(bufferAddress, fixedBufferSize); uint32 bindOffset = LatteBufferCache_retrieveDataInCache(bufferAddress, fixedBufferSize);

View File

@ -10,6 +10,7 @@
#include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInstructions.h" #include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInstructions.h"
#include "Cafe/HW/Latte/Core/FetchShader.h" #include "Cafe/HW/Latte/Core/FetchShader.h"
#include "Cafe/HW/Latte/Renderer/Renderer.h" #include "Cafe/HW/Latte/Renderer/Renderer.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
#include "config/ActiveSettings.h" #include "config/ActiveSettings.h"
#include "util/helpers/StringBuf.h" #include "util/helpers/StringBuf.h"
@ -2335,14 +2336,14 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex
if (texDim == Latte::E_DIM::DIM_2D_ARRAY) if (texDim == Latte::E_DIM::DIM_2D_ARRAY)
{ {
// 3 coords + compare value // 3 coords + compare value
src->add("float3("); src->add("float2(");
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT);
src->add(", "); src->add(", ");
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT);
src->add(", "); src->add("), ");
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 2, LATTE_DECOMPILER_DTYPE_FLOAT); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 2, LATTE_DECOMPILER_DTYPE_FLOAT);
src->addFmt("), {}", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[3], -1, -1, -1, tempBuffer0)); src->addFmt(", {}", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[3], -1, -1, -1, tempBuffer0));
} }
else if (texDim == Latte::E_DIM::DIM_CUBEMAP) else if (texDim == Latte::E_DIM::DIM_CUBEMAP)
{ {
@ -3181,9 +3182,11 @@ static void _emitExportCode(LatteDecompilerShaderContext* shaderContext, LatteDe
src->add(") == false) discard_fragment();" _CRLF); src->add(") == false) discard_fragment();" _CRLF);
} }
// pixel color output // pixel color output
src->addFmt("out.passPixelColor{} = ", pixelColorOutputIndex); src->addFmt("#ifdef {}" _CRLF, GetColorAttachmentTypeStr(pixelColorOutputIndex));
src->addFmt("out.passPixelColor{} = as_type<{}>(", pixelColorOutputIndex, GetColorAttachmentTypeStr(pixelColorOutputIndex));
_emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, i); _emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, i);
src->add(";" _CRLF); src->add(");" _CRLF);
src->add("#endif" _CRLF);
if( cfInstruction->exportArrayBase+i >= 8 ) if( cfInstruction->exportArrayBase+i >= 8 )
cemu_assert_unimplemented(); cemu_assert_unimplemented();

View File

@ -222,6 +222,8 @@ namespace LatteDecompiler
{ {
auto* src = shaderContext->shaderSource; auto* src = shaderContext->shaderSource;
src->add("#define GET_FRAGCOORD() vec4(in.position.xy * supportBuffer.fragCoordScale.xy, in.position.z, 1.0 / in.position.w)" _CRLF);
src->add("struct FragmentIn {" _CRLF); src->add("struct FragmentIn {" _CRLF);
LatteShaderPSInputTable* psInputTable = LatteSHRC_GetPSInputTable(); LatteShaderPSInputTable* psInputTable = LatteSHRC_GetPSInputTable();
@ -263,7 +265,9 @@ namespace LatteDecompiler
{ {
if ((decompilerContext->shader->pixelColorOutputMask & (1 << i)) != 0) if ((decompilerContext->shader->pixelColorOutputMask & (1 << i)) != 0)
{ {
src->addFmt("float4 passPixelColor{} [[color({})]];" _CRLF, i, i); src->addFmt("#ifdef {}" _CRLF, GetColorAttachmentTypeStr(i));
src->addFmt("{} passPixelColor{} [[color({})]];" _CRLF, GetColorAttachmentTypeStr(i), i, i);
src->add("#endif" _CRLF);
} }
} }

View File

@ -50,17 +50,17 @@ MTL::Texture* LatteTextureViewMtl::GetSwizzledView(uint32 gpuSamplerSwizzle)
} }
// Fallback cache // Fallback cache
auto it = m_fallbackViewCache.find(gpuSamplerSwizzle); auto& fallbackEntry = m_fallbackViewCache[gpuSamplerSwizzle];
if (it != m_fallbackViewCache.end()) if (fallbackEntry)
{ {
return it->second; return fallbackEntry;
} }
MTL::Texture* texture = CreateSwizzledView(gpuSamplerSwizzle); MTL::Texture* texture = CreateSwizzledView(gpuSamplerSwizzle);
if (freeIndex != -1) if (freeIndex != -1)
m_viewCache[freeIndex] = {gpuSamplerSwizzle, texture}; m_viewCache[freeIndex] = {gpuSamplerSwizzle, texture};
else else
it->second = texture; fallbackEntry = texture;
return texture; return texture;
} }

View File

@ -4,85 +4,85 @@
#include "Metal/MTLRenderCommandEncoder.hpp" #include "Metal/MTLRenderCommandEncoder.hpp"
#include "Metal/MTLSampler.hpp" #include "Metal/MTLSampler.hpp"
std::map<Latte::E_GX2SURFFMT, MtlPixelFormatInfo> MTL_COLOR_FORMAT_TABLE = { std::map<Latte::E_GX2SURFFMT, MetalPixelFormatInfo> MTL_COLOR_FORMAT_TABLE = {
{Latte::E_GX2SURFFMT::R4_G4_UNORM, {MTL::PixelFormatRG8Unorm, true, 2}}, // TODO: correct? {Latte::E_GX2SURFFMT::R4_G4_UNORM, {MTL::PixelFormatRG8Unorm, MetalDataType::FLOAT, 2}}, // TODO: correct?
{Latte::E_GX2SURFFMT::R5_G6_B5_UNORM, {MTL::PixelFormatB5G6R5Unorm, true, 2}}, // TODO: correct? {Latte::E_GX2SURFFMT::R5_G6_B5_UNORM, {MTL::PixelFormatB5G6R5Unorm, MetalDataType::FLOAT, 2}}, // TODO: correct?
{Latte::E_GX2SURFFMT::R5_G5_B5_A1_UNORM, {MTL::PixelFormatBGR5A1Unorm, true, 2}}, // TODO: correct? {Latte::E_GX2SURFFMT::R5_G5_B5_A1_UNORM, {MTL::PixelFormatBGR5A1Unorm, MetalDataType::FLOAT, 2}}, // TODO: correct?
{Latte::E_GX2SURFFMT::R4_G4_B4_A4_UNORM, {MTL::PixelFormatABGR4Unorm, true, 2}}, // TODO: correct? {Latte::E_GX2SURFFMT::R4_G4_B4_A4_UNORM, {MTL::PixelFormatABGR4Unorm, MetalDataType::FLOAT, 2}}, // TODO: correct?
{Latte::E_GX2SURFFMT::A1_B5_G5_R5_UNORM, {MTL::PixelFormatA1BGR5Unorm, true, 2}}, {Latte::E_GX2SURFFMT::A1_B5_G5_R5_UNORM, {MTL::PixelFormatA1BGR5Unorm, MetalDataType::FLOAT, 2}},
{Latte::E_GX2SURFFMT::R8_UNORM, {MTL::PixelFormatR8Unorm, true, 1}}, {Latte::E_GX2SURFFMT::R8_UNORM, {MTL::PixelFormatR8Unorm, MetalDataType::FLOAT, 1}},
{Latte::E_GX2SURFFMT::R8_SNORM, {MTL::PixelFormatR8Snorm, true, 1}}, {Latte::E_GX2SURFFMT::R8_SNORM, {MTL::PixelFormatR8Snorm, MetalDataType::FLOAT, 1}},
{Latte::E_GX2SURFFMT::R8_UINT, {MTL::PixelFormatR8Uint, false, 1}}, {Latte::E_GX2SURFFMT::R8_UINT, {MTL::PixelFormatR8Uint, MetalDataType::UINT, 1}},
{Latte::E_GX2SURFFMT::R8_SINT, {MTL::PixelFormatR8Sint, false, 1}}, {Latte::E_GX2SURFFMT::R8_SINT, {MTL::PixelFormatR8Sint, MetalDataType::INT, 1}},
{Latte::E_GX2SURFFMT::R8_G8_UNORM, {MTL::PixelFormatRG8Unorm, true, 2}}, {Latte::E_GX2SURFFMT::R8_G8_UNORM, {MTL::PixelFormatRG8Unorm, MetalDataType::FLOAT, 2}},
{Latte::E_GX2SURFFMT::R8_G8_SNORM, {MTL::PixelFormatRG8Snorm, true, 2}}, {Latte::E_GX2SURFFMT::R8_G8_SNORM, {MTL::PixelFormatRG8Snorm, MetalDataType::FLOAT, 2}},
{Latte::E_GX2SURFFMT::R8_G8_UINT, {MTL::PixelFormatRG8Uint, false, 2}}, {Latte::E_GX2SURFFMT::R8_G8_UINT, {MTL::PixelFormatRG8Uint, MetalDataType::UINT, 2}},
{Latte::E_GX2SURFFMT::R8_G8_SINT, {MTL::PixelFormatRG8Sint, false, 2}}, {Latte::E_GX2SURFFMT::R8_G8_SINT, {MTL::PixelFormatRG8Sint, MetalDataType::INT, 2}},
{Latte::E_GX2SURFFMT::R8_G8_B8_A8_UNORM, {MTL::PixelFormatRGBA8Unorm, true, 4}}, {Latte::E_GX2SURFFMT::R8_G8_B8_A8_UNORM, {MTL::PixelFormatRGBA8Unorm, MetalDataType::FLOAT, 4}},
{Latte::E_GX2SURFFMT::R8_G8_B8_A8_SNORM, {MTL::PixelFormatRGBA8Snorm, true, 4}}, {Latte::E_GX2SURFFMT::R8_G8_B8_A8_SNORM, {MTL::PixelFormatRGBA8Snorm, MetalDataType::FLOAT, 4}},
{Latte::E_GX2SURFFMT::R8_G8_B8_A8_UINT, {MTL::PixelFormatRGBA8Uint, false, 4}}, {Latte::E_GX2SURFFMT::R8_G8_B8_A8_UINT, {MTL::PixelFormatRGBA8Uint, MetalDataType::UINT, 4}},
{Latte::E_GX2SURFFMT::R8_G8_B8_A8_SINT, {MTL::PixelFormatRGBA8Sint, false, 4}}, {Latte::E_GX2SURFFMT::R8_G8_B8_A8_SINT, {MTL::PixelFormatRGBA8Sint, MetalDataType::INT, 4}},
{Latte::E_GX2SURFFMT::R8_G8_B8_A8_SRGB, {MTL::PixelFormatRGBA8Unorm_sRGB, true, 4}}, {Latte::E_GX2SURFFMT::R8_G8_B8_A8_SRGB, {MTL::PixelFormatRGBA8Unorm_sRGB, MetalDataType::FLOAT, 4}},
{Latte::E_GX2SURFFMT::R10_G10_B10_A2_UNORM, {MTL::PixelFormatRGB10A2Unorm, true, 4}}, {Latte::E_GX2SURFFMT::R10_G10_B10_A2_UNORM, {MTL::PixelFormatRGB10A2Unorm, MetalDataType::FLOAT, 4}},
{Latte::E_GX2SURFFMT::R10_G10_B10_A2_SNORM, {MTL::PixelFormatRGBA16Snorm, true, 8}}, // TODO: correct? {Latte::E_GX2SURFFMT::R10_G10_B10_A2_SNORM, {MTL::PixelFormatRGBA16Snorm, MetalDataType::FLOAT, 8}}, // TODO: correct?
{Latte::E_GX2SURFFMT::R10_G10_B10_A2_UINT, {MTL::PixelFormatRGB10A2Uint, false, 4}}, {Latte::E_GX2SURFFMT::R10_G10_B10_A2_UINT, {MTL::PixelFormatRGB10A2Uint, MetalDataType::UINT, 4}},
{Latte::E_GX2SURFFMT::R10_G10_B10_A2_SINT, {MTL::PixelFormatRGBA16Sint, false, 8}}, // TODO: correct? {Latte::E_GX2SURFFMT::R10_G10_B10_A2_SINT, {MTL::PixelFormatRGBA16Sint, MetalDataType::INT, 8}}, // TODO: correct?
{Latte::E_GX2SURFFMT::R10_G10_B10_A2_SRGB, {MTL::PixelFormatRGBA8Unorm_sRGB, true, 4}}, // TODO: correct? {Latte::E_GX2SURFFMT::R10_G10_B10_A2_SRGB, {MTL::PixelFormatRGBA8Unorm_sRGB, MetalDataType::FLOAT, 4}}, // TODO: correct?
{Latte::E_GX2SURFFMT::A2_B10_G10_R10_UNORM, {MTL::PixelFormatBGR10A2Unorm, true, 4}}, // TODO: correct? {Latte::E_GX2SURFFMT::A2_B10_G10_R10_UNORM, {MTL::PixelFormatBGR10A2Unorm, MetalDataType::FLOAT, 4}}, // TODO: correct?
{Latte::E_GX2SURFFMT::A2_B10_G10_R10_UINT, {MTL::PixelFormatRGB10A2Uint, false, 4}}, // TODO: correct? {Latte::E_GX2SURFFMT::A2_B10_G10_R10_UINT, {MTL::PixelFormatRGB10A2Uint, MetalDataType::UINT, 4}}, // TODO: correct?
{Latte::E_GX2SURFFMT::R16_UNORM, {MTL::PixelFormatR16Unorm, true, 2}}, {Latte::E_GX2SURFFMT::R16_UNORM, {MTL::PixelFormatR16Unorm, MetalDataType::FLOAT, 2}},
{Latte::E_GX2SURFFMT::R16_SNORM, {MTL::PixelFormatR16Snorm, true, 2}}, {Latte::E_GX2SURFFMT::R16_SNORM, {MTL::PixelFormatR16Snorm, MetalDataType::FLOAT, 2}},
{Latte::E_GX2SURFFMT::R16_UINT, {MTL::PixelFormatR16Uint, false, 2}}, {Latte::E_GX2SURFFMT::R16_UINT, {MTL::PixelFormatR16Uint, MetalDataType::UINT, 2}},
{Latte::E_GX2SURFFMT::R16_SINT, {MTL::PixelFormatR16Sint, false, 2}}, {Latte::E_GX2SURFFMT::R16_SINT, {MTL::PixelFormatR16Sint, MetalDataType::INT, 2}},
{Latte::E_GX2SURFFMT::R16_FLOAT, {MTL::PixelFormatR16Float, true, 2}}, {Latte::E_GX2SURFFMT::R16_FLOAT, {MTL::PixelFormatR16Float, MetalDataType::FLOAT, 2}},
{Latte::E_GX2SURFFMT::R16_G16_UNORM, {MTL::PixelFormatRG16Unorm, true, 4}}, {Latte::E_GX2SURFFMT::R16_G16_UNORM, {MTL::PixelFormatRG16Unorm, MetalDataType::FLOAT, 4}},
{Latte::E_GX2SURFFMT::R16_G16_SNORM, {MTL::PixelFormatRG16Snorm, true, 4}}, {Latte::E_GX2SURFFMT::R16_G16_SNORM, {MTL::PixelFormatRG16Snorm, MetalDataType::FLOAT, 4}},
{Latte::E_GX2SURFFMT::R16_G16_UINT, {MTL::PixelFormatRG16Uint, false, 4}}, {Latte::E_GX2SURFFMT::R16_G16_UINT, {MTL::PixelFormatRG16Uint, MetalDataType::UINT, 4}},
{Latte::E_GX2SURFFMT::R16_G16_SINT, {MTL::PixelFormatRG16Sint, false, 4}}, {Latte::E_GX2SURFFMT::R16_G16_SINT, {MTL::PixelFormatRG16Sint, MetalDataType::INT, 4}},
{Latte::E_GX2SURFFMT::R16_G16_FLOAT, {MTL::PixelFormatRG16Float, true, 4}}, {Latte::E_GX2SURFFMT::R16_G16_FLOAT, {MTL::PixelFormatRG16Float, MetalDataType::FLOAT, 4}},
{Latte::E_GX2SURFFMT::R16_G16_B16_A16_UNORM, {MTL::PixelFormatRGBA16Unorm, true, 8}}, {Latte::E_GX2SURFFMT::R16_G16_B16_A16_UNORM, {MTL::PixelFormatRGBA16Unorm, MetalDataType::FLOAT, 8}},
{Latte::E_GX2SURFFMT::R16_G16_B16_A16_SNORM, {MTL::PixelFormatRGBA16Snorm, true, 8}}, {Latte::E_GX2SURFFMT::R16_G16_B16_A16_SNORM, {MTL::PixelFormatRGBA16Snorm, MetalDataType::FLOAT, 8}},
{Latte::E_GX2SURFFMT::R16_G16_B16_A16_UINT, {MTL::PixelFormatRGBA16Uint, false, 8}}, {Latte::E_GX2SURFFMT::R16_G16_B16_A16_UINT, {MTL::PixelFormatRGBA16Uint, MetalDataType::UINT, 8}},
{Latte::E_GX2SURFFMT::R16_G16_B16_A16_SINT, {MTL::PixelFormatRGBA16Sint, false, 8}}, {Latte::E_GX2SURFFMT::R16_G16_B16_A16_SINT, {MTL::PixelFormatRGBA16Sint, MetalDataType::INT, 8}},
{Latte::E_GX2SURFFMT::R16_G16_B16_A16_FLOAT, {MTL::PixelFormatRGBA16Float, true, 8}}, {Latte::E_GX2SURFFMT::R16_G16_B16_A16_FLOAT, {MTL::PixelFormatRGBA16Float, MetalDataType::FLOAT, 8}},
{Latte::E_GX2SURFFMT::R24_X8_UNORM, {MTL::PixelFormatInvalid, false, 0}}, // TODO {Latte::E_GX2SURFFMT::R24_X8_UNORM, {MTL::PixelFormatInvalid, MetalDataType::NONE, 0}}, // TODO
{Latte::E_GX2SURFFMT::R24_X8_FLOAT, {MTL::PixelFormatInvalid, false, 0}}, // TODO {Latte::E_GX2SURFFMT::R24_X8_FLOAT, {MTL::PixelFormatInvalid, MetalDataType::NONE, 0}}, // TODO
{Latte::E_GX2SURFFMT::X24_G8_UINT, {MTL::PixelFormatInvalid, false, 0}}, // TODO {Latte::E_GX2SURFFMT::X24_G8_UINT, {MTL::PixelFormatInvalid, MetalDataType::NONE, 0}}, // TODO
{Latte::E_GX2SURFFMT::R32_X8_FLOAT, {MTL::PixelFormatInvalid, false, 0}}, // TODO {Latte::E_GX2SURFFMT::R32_X8_FLOAT, {MTL::PixelFormatInvalid, MetalDataType::NONE, 0}}, // TODO
{Latte::E_GX2SURFFMT::X32_G8_UINT_X24, {MTL::PixelFormatInvalid, false, 0}}, // TODO {Latte::E_GX2SURFFMT::X32_G8_UINT_X24, {MTL::PixelFormatInvalid, MetalDataType::NONE, 0}}, // TODO
{Latte::E_GX2SURFFMT::R11_G11_B10_FLOAT, {MTL::PixelFormatRG11B10Float, true, 4}}, {Latte::E_GX2SURFFMT::R11_G11_B10_FLOAT, {MTL::PixelFormatRG11B10Float, MetalDataType::FLOAT, 4}},
{Latte::E_GX2SURFFMT::R32_UINT, {MTL::PixelFormatR32Uint, false, 4}}, {Latte::E_GX2SURFFMT::R32_UINT, {MTL::PixelFormatR32Uint, MetalDataType::UINT, 4}},
{Latte::E_GX2SURFFMT::R32_SINT, {MTL::PixelFormatR32Sint, false, 4}}, {Latte::E_GX2SURFFMT::R32_SINT, {MTL::PixelFormatR32Sint, MetalDataType::INT, 4}},
{Latte::E_GX2SURFFMT::R32_FLOAT, {MTL::PixelFormatR32Float, true, 4}}, {Latte::E_GX2SURFFMT::R32_FLOAT, {MTL::PixelFormatR32Float, MetalDataType::FLOAT, 4}},
{Latte::E_GX2SURFFMT::R32_G32_UINT, {MTL::PixelFormatRG32Uint, false, 8}}, {Latte::E_GX2SURFFMT::R32_G32_UINT, {MTL::PixelFormatRG32Uint, MetalDataType::UINT, 8}},
{Latte::E_GX2SURFFMT::R32_G32_SINT, {MTL::PixelFormatRG32Sint, false, 8}}, {Latte::E_GX2SURFFMT::R32_G32_SINT, {MTL::PixelFormatRG32Sint, MetalDataType::INT, 8}},
{Latte::E_GX2SURFFMT::R32_G32_FLOAT, {MTL::PixelFormatRG32Float, true, 8}}, {Latte::E_GX2SURFFMT::R32_G32_FLOAT, {MTL::PixelFormatRG32Float, MetalDataType::FLOAT, 8}},
{Latte::E_GX2SURFFMT::R32_G32_B32_A32_UINT, {MTL::PixelFormatRGBA32Uint, false, 16}}, {Latte::E_GX2SURFFMT::R32_G32_B32_A32_UINT, {MTL::PixelFormatRGBA32Uint, MetalDataType::UINT, 16}},
{Latte::E_GX2SURFFMT::R32_G32_B32_A32_SINT, {MTL::PixelFormatRGBA32Sint, false, 16}}, {Latte::E_GX2SURFFMT::R32_G32_B32_A32_SINT, {MTL::PixelFormatRGBA32Sint, MetalDataType::INT, 16}},
{Latte::E_GX2SURFFMT::R32_G32_B32_A32_FLOAT, {MTL::PixelFormatRGBA32Float, true, 16}}, {Latte::E_GX2SURFFMT::R32_G32_B32_A32_FLOAT, {MTL::PixelFormatRGBA32Float, MetalDataType::FLOAT, 16}},
{Latte::E_GX2SURFFMT::BC1_UNORM, {MTL::PixelFormatBC1_RGBA, true, 8, {4, 4}}}, // TODO: correct? {Latte::E_GX2SURFFMT::BC1_UNORM, {MTL::PixelFormatBC1_RGBA, MetalDataType::FLOAT, 8, {4, 4}}}, // TODO: correct?
{Latte::E_GX2SURFFMT::BC1_SRGB, {MTL::PixelFormatBC1_RGBA_sRGB, true, 8, {4, 4}}}, // TODO: correct? {Latte::E_GX2SURFFMT::BC1_SRGB, {MTL::PixelFormatBC1_RGBA_sRGB, MetalDataType::FLOAT, 8, {4, 4}}}, // TODO: correct?
{Latte::E_GX2SURFFMT::BC2_UNORM, {MTL::PixelFormatBC2_RGBA, true, 16, {4, 4}}}, // TODO: correct? {Latte::E_GX2SURFFMT::BC2_UNORM, {MTL::PixelFormatBC2_RGBA, MetalDataType::FLOAT, 16, {4, 4}}}, // TODO: correct?
{Latte::E_GX2SURFFMT::BC2_SRGB, {MTL::PixelFormatBC2_RGBA_sRGB, true, 16, {4, 4}}}, // TODO: correct? {Latte::E_GX2SURFFMT::BC2_SRGB, {MTL::PixelFormatBC2_RGBA_sRGB, MetalDataType::FLOAT, 16, {4, 4}}}, // TODO: correct?
{Latte::E_GX2SURFFMT::BC3_UNORM, {MTL::PixelFormatBC3_RGBA, true, 16, {4, 4}}}, // TODO: correct? {Latte::E_GX2SURFFMT::BC3_UNORM, {MTL::PixelFormatBC3_RGBA, MetalDataType::FLOAT, 16, {4, 4}}}, // TODO: correct?
{Latte::E_GX2SURFFMT::BC3_SRGB, {MTL::PixelFormatBC3_RGBA_sRGB, true, 16, {4, 4}}}, // TODO: correct? {Latte::E_GX2SURFFMT::BC3_SRGB, {MTL::PixelFormatBC3_RGBA_sRGB, MetalDataType::FLOAT, 16, {4, 4}}}, // TODO: correct?
{Latte::E_GX2SURFFMT::BC4_UNORM, {MTL::PixelFormatBC4_RUnorm, true, 8, {4, 4}}}, // TODO: correct? {Latte::E_GX2SURFFMT::BC4_UNORM, {MTL::PixelFormatBC4_RUnorm, MetalDataType::FLOAT, 8, {4, 4}}}, // TODO: correct?
{Latte::E_GX2SURFFMT::BC4_SNORM, {MTL::PixelFormatBC4_RSnorm, true, 8, {4, 4}}}, // TODO: correct? {Latte::E_GX2SURFFMT::BC4_SNORM, {MTL::PixelFormatBC4_RSnorm, MetalDataType::FLOAT, 8, {4, 4}}}, // TODO: correct?
{Latte::E_GX2SURFFMT::BC5_UNORM, {MTL::PixelFormatBC5_RGUnorm, true, 16, {4, 4}}}, // TODO: correct? {Latte::E_GX2SURFFMT::BC5_UNORM, {MTL::PixelFormatBC5_RGUnorm, MetalDataType::FLOAT, 16, {4, 4}}}, // TODO: correct?
{Latte::E_GX2SURFFMT::BC5_SNORM, {MTL::PixelFormatBC5_RGSnorm, true, 16, {4, 4}}}, // TODO: correct? {Latte::E_GX2SURFFMT::BC5_SNORM, {MTL::PixelFormatBC5_RGSnorm, MetalDataType::FLOAT, 16, {4, 4}}}, // TODO: correct?
}; };
std::map<Latte::E_GX2SURFFMT, MtlPixelFormatInfo> MTL_DEPTH_FORMAT_TABLE = { std::map<Latte::E_GX2SURFFMT, MetalPixelFormatInfo> MTL_DEPTH_FORMAT_TABLE = {
{Latte::E_GX2SURFFMT::D24_S8_UNORM, {MTL::PixelFormatDepth24Unorm_Stencil8, false, 4}}, // TODO: not supported on Apple sillicon, maybe find something else {Latte::E_GX2SURFFMT::D24_S8_UNORM, {MTL::PixelFormatDepth24Unorm_Stencil8, MetalDataType::NONE, 4}}, // TODO: not supported on Apple sillicon, maybe find something else
{Latte::E_GX2SURFFMT::D24_S8_FLOAT, {MTL::PixelFormatDepth32Float_Stencil8, false, 4}}, // TODO: correct? {Latte::E_GX2SURFFMT::D24_S8_FLOAT, {MTL::PixelFormatDepth32Float_Stencil8, MetalDataType::NONE, 4}}, // TODO: correct?
{Latte::E_GX2SURFFMT::D32_S8_FLOAT, {MTL::PixelFormatDepth32Float_Stencil8, false, 5}}, {Latte::E_GX2SURFFMT::D32_S8_FLOAT, {MTL::PixelFormatDepth32Float_Stencil8, MetalDataType::NONE, 5}},
{Latte::E_GX2SURFFMT::D16_UNORM, {MTL::PixelFormatDepth16Unorm, false, 2}}, {Latte::E_GX2SURFFMT::D16_UNORM, {MTL::PixelFormatDepth16Unorm, MetalDataType::NONE, 2}},
{Latte::E_GX2SURFFMT::D32_FLOAT, {MTL::PixelFormatDepth32Float, false, 4}}, {Latte::E_GX2SURFFMT::D32_FLOAT, {MTL::PixelFormatDepth32Float, MetalDataType::NONE, 4}},
}; };
const MtlPixelFormatInfo GetMtlPixelFormatInfo(Latte::E_GX2SURFFMT format, bool isDepth) const MetalPixelFormatInfo GetMtlPixelFormatInfo(Latte::E_GX2SURFFMT format, bool isDepth)
{ {
MtlPixelFormatInfo formatInfo; MetalPixelFormatInfo formatInfo;
if (isDepth) if (isDepth)
formatInfo = MTL_DEPTH_FORMAT_TABLE[format]; formatInfo = MTL_DEPTH_FORMAT_TABLE[format];
else else
@ -266,7 +266,6 @@ MTL::PrimitiveType GetMtlPrimitiveType(LattePrimitiveMode primitiveMode)
case Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::TRIANGLES: case Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::TRIANGLES:
return MTL::PrimitiveTypeTriangle; return MTL::PrimitiveTypeTriangle;
case Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::TRIANGLE_FAN: case Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::TRIANGLE_FAN:
debug_printf("Metal doesn't support triangle fan primitive, using triangle strip instead\n");
return MTL::PrimitiveTypeTriangleStrip; return MTL::PrimitiveTypeTriangleStrip;
case Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::TRIANGLE_STRIP: case Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::TRIANGLE_STRIP:
return MTL::PrimitiveTypeTriangleStrip; return MTL::PrimitiveTypeTriangleStrip;

View File

@ -15,14 +15,22 @@ struct Uvec2 {
uint32 y; uint32 y;
}; };
struct MtlPixelFormatInfo { enum class MetalDataType
{
NONE,
INT,
UINT,
FLOAT,
};
struct MetalPixelFormatInfo {
MTL::PixelFormat pixelFormat; MTL::PixelFormat pixelFormat;
bool blendable; MetalDataType dataType;
size_t bytesPerBlock; size_t bytesPerBlock;
Uvec2 blockTexelSize = {1, 1}; Uvec2 blockTexelSize = {1, 1};
}; };
const MtlPixelFormatInfo GetMtlPixelFormatInfo(Latte::E_GX2SURFFMT format, bool isDepth); const MetalPixelFormatInfo GetMtlPixelFormatInfo(Latte::E_GX2SURFFMT format, bool isDepth);
size_t GetMtlTextureBytesPerRow(Latte::E_GX2SURFFMT format, bool isDepth, uint32 width); size_t GetMtlTextureBytesPerRow(Latte::E_GX2SURFFMT format, bool isDepth, uint32 width);

View File

@ -1,6 +1,11 @@
#pragma once #pragma once
inline size_t align(size_t size, size_t alignment) inline size_t Align(size_t size, size_t alignment)
{ {
return (size + alignment - 1) & ~(alignment - 1); return (size + alignment - 1) & ~(alignment - 1);
} }
inline std::string GetColorAttachmentTypeStr(uint32 index)
{
return "COLOR_ATTACHMENT" + std::to_string(index) + "_TYPE";
}

View File

@ -16,7 +16,7 @@ MetalBufferAllocator::~MetalBufferAllocator()
MetalBufferAllocation MetalBufferAllocator::GetBufferAllocation(size_t size, size_t alignment) MetalBufferAllocation MetalBufferAllocator::GetBufferAllocation(size_t size, size_t alignment)
{ {
// Align the size // Align the size
size = align(size, alignment); size = Align(size, alignment);
// First, try to find a free range // First, try to find a free range
for (uint32 i = 0; i < m_freeBufferRanges.size(); i++) for (uint32 i = 0; i < m_freeBufferRanges.size(); i++)
@ -94,7 +94,7 @@ MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(MTL::Bu
if (restrideInfo.memoryInvalidated || stride != restrideInfo.lastStride) if (restrideInfo.memoryInvalidated || stride != restrideInfo.lastStride)
{ {
// TODO: use compute/void vertex function instead // TODO: use compute/void vertex function instead
size_t newStride = align(stride, 4); size_t newStride = Align(stride, 4);
size_t newSize = vertexBufferRange->size / stride * newStride; size_t newSize = vertexBufferRange->size / stride * newStride;
restrideInfo.buffer = m_mtlr->GetDevice()->newBuffer(newSize, MTL::StorageModeShared); restrideInfo.buffer = m_mtlr->GetDevice()->newBuffer(newSize, MTL::StorageModeShared);

View File

@ -61,7 +61,7 @@ MTL::RenderPipelineState* MetalPipelineCache::GetPipelineState(const LatteFetchS
uint32 bufferIndex = bufferGroup.attributeBufferIndex; uint32 bufferIndex = bufferGroup.attributeBufferIndex;
uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7; uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7;
uint32 bufferStride = (LatteGPUState.contextNew.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF; uint32 bufferStride = (LatteGPUState.contextNew.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;
bufferStride = align(bufferStride, 4); bufferStride = Align(bufferStride, 4);
// HACK // HACK
if (bufferStride == 0) if (bufferStride == 0)
@ -83,11 +83,15 @@ MTL::RenderPipelineState* MetalPipelineCache::GetPipelineState(const LatteFetchS
} }
} }
auto mtlVertexShader = static_cast<RendererShaderMtl*>(vertexShader->shader);
auto mtlPixelShader = static_cast<RendererShaderMtl*>(pixelShader->shader);
mtlPixelShader->CompileFragmentFunction(activeFBO);
// Render pipeline state // Render pipeline state
MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init(); MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init();
desc->setVertexFunction(static_cast<RendererShaderMtl*>(vertexShader->shader)->GetFunction()); desc->setVertexFunction(mtlVertexShader->GetFunction());
desc->setFragmentFunction(static_cast<RendererShaderMtl*>(pixelShader->shader)->GetFunction()); desc->setFragmentFunction(mtlPixelShader->GetFunction());
// TODO: don't always set the vertex descriptor // TODO: don't always set the vertex descriptor?
desc->setVertexDescriptor(vertexDescriptor); desc->setVertexDescriptor(vertexDescriptor);
for (uint8 i = 0; i < 8; i++) for (uint8 i = 0; i < 8; i++)
{ {
@ -106,7 +110,8 @@ MTL::RenderPipelineState* MetalPipelineCache::GetPipelineState(const LatteFetchS
uint32 renderTargetMask = LatteGPUState.contextNew.CB_TARGET_MASK.get_MASK(); uint32 renderTargetMask = LatteGPUState.contextNew.CB_TARGET_MASK.get_MASK();
bool blendEnabled = ((blendEnableMask & (1 << i))) != 0; bool blendEnabled = ((blendEnableMask & (1 << i))) != 0;
if (blendEnabled && GetMtlPixelFormatInfo(texture->format, false).blendable) // Only float data type is blendable
if (blendEnabled && GetMtlPixelFormatInfo(texture->format, false).dataType == MetalDataType::FLOAT)
{ {
colorAttachment->setBlendingEnabled(true); colorAttachment->setBlendingEnabled(true);

View File

@ -1,30 +1,23 @@
#include "Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
#include "Cemu/Logging/CemuLogging.h" #include "Cemu/Logging/CemuLogging.h"
#include "Metal/MTLFunctionDescriptor.hpp" #include "Common/precompiled.h"
RendererShaderMtl::RendererShaderMtl(MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode) RendererShaderMtl::RendererShaderMtl(MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode)
: RendererShader(type, baseHash, auxHash, isGameShader, isGfxPackShader) : RendererShader(type, baseHash, auxHash, isGameShader, isGfxPackShader), m_mtlr{mtlRenderer}
{ {
NS::Error* error = nullptr; // Fragment functions are compiled just-in-time
MTL::Library* library = mtlRenderer->GetDevice()->newLibrary(NS::String::string(mslCode.c_str(), NS::ASCIIStringEncoding), nullptr, &error); if (m_type == ShaderType::kFragment)
if (error)
{ {
printf("failed to create library (error: %s) -> source:\n%s\n", error->localizedDescription()->utf8String(), mslCode.c_str()); m_mslCode = mslCode;
error->release(); }
return; else
{
Compile(mslCode);
} }
//MTL::FunctionDescriptor* desc = MTL::FunctionDescriptor::alloc()->init();
//desc->setName(NS::String::string("main0", NS::ASCIIStringEncoding));
//error = nullptr;
m_function = library->newFunction(NS::String::string("main0", NS::ASCIIStringEncoding));
library->release();
//if (error)
//{
// printf("failed to create function (error: %s)\n", error->localizedDescription()->utf8String());
// error->release();
// return;
//}
} }
RendererShaderMtl::~RendererShaderMtl() RendererShaderMtl::~RendererShaderMtl()
@ -33,6 +26,47 @@ RendererShaderMtl::~RendererShaderMtl()
m_function->release(); m_function->release();
} }
void RendererShaderMtl::CompileFragmentFunction(CachedFBOMtl* activeFBO)
{
cemu_assert_debug(m_type == ShaderType::kFragment);
if (m_function)
m_function->release();
std::string fullCode;
// Define color attachment data types
for (uint8 i = 0; i < 8; i++)
{
const auto& colorBuffer = activeFBO->colorBuffer[i];
if (!colorBuffer.texture)
{
continue;
}
auto dataType = GetMtlPixelFormatInfo(colorBuffer.texture->format, false).dataType;
fullCode += "#define " + GetColorAttachmentTypeStr(i) + " ";
switch (dataType)
{
case MetalDataType::INT:
fullCode += "int4";
break;
case MetalDataType::UINT:
fullCode += "uint4";
break;
case MetalDataType::FLOAT:
fullCode += "float4";
break;
default:
cemu_assert_suspicious();
break;
}
fullCode += "\n";
}
fullCode += m_mslCode;
Compile(fullCode);
}
void RendererShaderMtl::ShaderCacheLoading_begin(uint64 cacheTitleId) void RendererShaderMtl::ShaderCacheLoading_begin(uint64 cacheTitleId)
{ {
cemuLog_log(LogType::MetalLogging, "RendererShaderMtl::ShaderCacheLoading_begin not implemented!"); cemuLog_log(LogType::MetalLogging, "RendererShaderMtl::ShaderCacheLoading_begin not implemented!");
@ -47,3 +81,17 @@ void RendererShaderMtl::ShaderCacheLoading_Close()
{ {
cemuLog_log(LogType::MetalLogging, "RendererShaderMtl::ShaderCacheLoading_Close not implemented!"); cemuLog_log(LogType::MetalLogging, "RendererShaderMtl::ShaderCacheLoading_Close not implemented!");
} }
void RendererShaderMtl::Compile(const std::string& mslCode)
{
NS::Error* error = nullptr;
MTL::Library* library = m_mtlr->GetDevice()->newLibrary(NS::String::string(mslCode.c_str(), NS::ASCIIStringEncoding), nullptr, &error);
if (error)
{
printf("failed to create library (error: %s) -> source:\n%s\n", error->localizedDescription()->utf8String(), mslCode.c_str());
error->release();
return;
}
m_function = library->newFunction(NS::String::string("main0", NS::ASCIIStringEncoding));
library->release();
}

View File

@ -1,6 +1,7 @@
#pragma once #pragma once
#include "Cafe/HW/Latte/Renderer/RendererShader.h" #include "Cafe/HW/Latte/Renderer/RendererShader.h"
#include "HW/Latte/Renderer/Metal/CachedFBOMtl.h"
#include "HW/Latte/Renderer/Metal/MetalRenderer.h" #include "HW/Latte/Renderer/Metal/MetalRenderer.h"
#include "util/helpers/ConcurrentQueue.h" #include "util/helpers/ConcurrentQueue.h"
@ -24,6 +25,8 @@ public:
RendererShaderMtl(class MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode); RendererShaderMtl(class MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode);
virtual ~RendererShaderMtl(); virtual ~RendererShaderMtl();
void CompileFragmentFunction(CachedFBOMtl* activeFBO);
MTL::Function* GetFunction() const MTL::Function* GetFunction() const
{ {
return m_function; return m_function;
@ -51,5 +54,11 @@ public:
bool WaitForCompiled() override { return true; } bool WaitForCompiled() override { return true; }
private: private:
class MetalRenderer* m_mtlr;
MTL::Function* m_function = nullptr; MTL::Function* m_function = nullptr;
std::string m_mslCode;
void Compile(const std::string& mslCode);
}; };