specialize fragment shader output types & fix: shader errors

This commit is contained in:
Samuliak 2024-08-07 13:25:38 +02:00
parent d7e9aff230
commit e2ec602c43
11 changed files with 205 additions and 121 deletions

View File

@ -198,6 +198,8 @@ bool LatteBufferCache_Sync(uint32 minIndex, uint32 maxIndex, uint32 baseInstance
#if BOOST_OS_MACOS
if(bufferStride % 4 != 0)
{
if (g_renderer->GetType() == RendererAPI::Vulkan)
{
if (VulkanRenderer* vkRenderer = VulkanRenderer::GetInstance())
{
@ -206,6 +208,7 @@ bool LatteBufferCache_Sync(uint32 minIndex, uint32 maxIndex, uint32 baseInstance
continue;
}
}
}
#endif
uint32 bindOffset = LatteBufferCache_retrieveDataInCache(bufferAddress, fixedBufferSize);

View File

@ -10,6 +10,7 @@
#include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInstructions.h"
#include "Cafe/HW/Latte/Core/FetchShader.h"
#include "Cafe/HW/Latte/Renderer/Renderer.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
#include "config/ActiveSettings.h"
#include "util/helpers/StringBuf.h"
@ -2335,14 +2336,14 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex
if (texDim == Latte::E_DIM::DIM_2D_ARRAY)
{
// 3 coords + compare value
src->add("float3(");
src->add("float2(");
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT);
src->add(", ");
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT);
src->add(", ");
src->add("), ");
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 2, LATTE_DECOMPILER_DTYPE_FLOAT);
src->addFmt("), {}", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[3], -1, -1, -1, tempBuffer0));
src->addFmt(", {}", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[3], -1, -1, -1, tempBuffer0));
}
else if (texDim == Latte::E_DIM::DIM_CUBEMAP)
{
@ -3181,9 +3182,11 @@ static void _emitExportCode(LatteDecompilerShaderContext* shaderContext, LatteDe
src->add(") == false) discard_fragment();" _CRLF);
}
// pixel color output
src->addFmt("out.passPixelColor{} = ", pixelColorOutputIndex);
src->addFmt("#ifdef {}" _CRLF, GetColorAttachmentTypeStr(pixelColorOutputIndex));
src->addFmt("out.passPixelColor{} = as_type<{}>(", pixelColorOutputIndex, GetColorAttachmentTypeStr(pixelColorOutputIndex));
_emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, i);
src->add(";" _CRLF);
src->add(");" _CRLF);
src->add("#endif" _CRLF);
if( cfInstruction->exportArrayBase+i >= 8 )
cemu_assert_unimplemented();

View File

@ -222,6 +222,8 @@ namespace LatteDecompiler
{
auto* src = shaderContext->shaderSource;
src->add("#define GET_FRAGCOORD() vec4(in.position.xy * supportBuffer.fragCoordScale.xy, in.position.z, 1.0 / in.position.w)" _CRLF);
src->add("struct FragmentIn {" _CRLF);
LatteShaderPSInputTable* psInputTable = LatteSHRC_GetPSInputTable();
@ -263,7 +265,9 @@ namespace LatteDecompiler
{
if ((decompilerContext->shader->pixelColorOutputMask & (1 << i)) != 0)
{
src->addFmt("float4 passPixelColor{} [[color({})]];" _CRLF, i, i);
src->addFmt("#ifdef {}" _CRLF, GetColorAttachmentTypeStr(i));
src->addFmt("{} passPixelColor{} [[color({})]];" _CRLF, GetColorAttachmentTypeStr(i), i, i);
src->add("#endif" _CRLF);
}
}

View File

@ -50,17 +50,17 @@ MTL::Texture* LatteTextureViewMtl::GetSwizzledView(uint32 gpuSamplerSwizzle)
}
// Fallback cache
auto it = m_fallbackViewCache.find(gpuSamplerSwizzle);
if (it != m_fallbackViewCache.end())
auto& fallbackEntry = m_fallbackViewCache[gpuSamplerSwizzle];
if (fallbackEntry)
{
return it->second;
return fallbackEntry;
}
MTL::Texture* texture = CreateSwizzledView(gpuSamplerSwizzle);
if (freeIndex != -1)
m_viewCache[freeIndex] = {gpuSamplerSwizzle, texture};
else
it->second = texture;
fallbackEntry = texture;
return texture;
}

View File

@ -4,85 +4,85 @@
#include "Metal/MTLRenderCommandEncoder.hpp"
#include "Metal/MTLSampler.hpp"
std::map<Latte::E_GX2SURFFMT, MtlPixelFormatInfo> MTL_COLOR_FORMAT_TABLE = {
{Latte::E_GX2SURFFMT::R4_G4_UNORM, {MTL::PixelFormatRG8Unorm, true, 2}}, // TODO: correct?
{Latte::E_GX2SURFFMT::R5_G6_B5_UNORM, {MTL::PixelFormatB5G6R5Unorm, true, 2}}, // TODO: correct?
{Latte::E_GX2SURFFMT::R5_G5_B5_A1_UNORM, {MTL::PixelFormatBGR5A1Unorm, true, 2}}, // TODO: correct?
{Latte::E_GX2SURFFMT::R4_G4_B4_A4_UNORM, {MTL::PixelFormatABGR4Unorm, true, 2}}, // TODO: correct?
{Latte::E_GX2SURFFMT::A1_B5_G5_R5_UNORM, {MTL::PixelFormatA1BGR5Unorm, true, 2}},
{Latte::E_GX2SURFFMT::R8_UNORM, {MTL::PixelFormatR8Unorm, true, 1}},
{Latte::E_GX2SURFFMT::R8_SNORM, {MTL::PixelFormatR8Snorm, true, 1}},
{Latte::E_GX2SURFFMT::R8_UINT, {MTL::PixelFormatR8Uint, false, 1}},
{Latte::E_GX2SURFFMT::R8_SINT, {MTL::PixelFormatR8Sint, false, 1}},
{Latte::E_GX2SURFFMT::R8_G8_UNORM, {MTL::PixelFormatRG8Unorm, true, 2}},
{Latte::E_GX2SURFFMT::R8_G8_SNORM, {MTL::PixelFormatRG8Snorm, true, 2}},
{Latte::E_GX2SURFFMT::R8_G8_UINT, {MTL::PixelFormatRG8Uint, false, 2}},
{Latte::E_GX2SURFFMT::R8_G8_SINT, {MTL::PixelFormatRG8Sint, false, 2}},
{Latte::E_GX2SURFFMT::R8_G8_B8_A8_UNORM, {MTL::PixelFormatRGBA8Unorm, true, 4}},
{Latte::E_GX2SURFFMT::R8_G8_B8_A8_SNORM, {MTL::PixelFormatRGBA8Snorm, true, 4}},
{Latte::E_GX2SURFFMT::R8_G8_B8_A8_UINT, {MTL::PixelFormatRGBA8Uint, false, 4}},
{Latte::E_GX2SURFFMT::R8_G8_B8_A8_SINT, {MTL::PixelFormatRGBA8Sint, false, 4}},
{Latte::E_GX2SURFFMT::R8_G8_B8_A8_SRGB, {MTL::PixelFormatRGBA8Unorm_sRGB, true, 4}},
{Latte::E_GX2SURFFMT::R10_G10_B10_A2_UNORM, {MTL::PixelFormatRGB10A2Unorm, true, 4}},
{Latte::E_GX2SURFFMT::R10_G10_B10_A2_SNORM, {MTL::PixelFormatRGBA16Snorm, true, 8}}, // TODO: correct?
{Latte::E_GX2SURFFMT::R10_G10_B10_A2_UINT, {MTL::PixelFormatRGB10A2Uint, false, 4}},
{Latte::E_GX2SURFFMT::R10_G10_B10_A2_SINT, {MTL::PixelFormatRGBA16Sint, false, 8}}, // TODO: correct?
{Latte::E_GX2SURFFMT::R10_G10_B10_A2_SRGB, {MTL::PixelFormatRGBA8Unorm_sRGB, true, 4}}, // TODO: correct?
{Latte::E_GX2SURFFMT::A2_B10_G10_R10_UNORM, {MTL::PixelFormatBGR10A2Unorm, true, 4}}, // TODO: correct?
{Latte::E_GX2SURFFMT::A2_B10_G10_R10_UINT, {MTL::PixelFormatRGB10A2Uint, false, 4}}, // TODO: correct?
{Latte::E_GX2SURFFMT::R16_UNORM, {MTL::PixelFormatR16Unorm, true, 2}},
{Latte::E_GX2SURFFMT::R16_SNORM, {MTL::PixelFormatR16Snorm, true, 2}},
{Latte::E_GX2SURFFMT::R16_UINT, {MTL::PixelFormatR16Uint, false, 2}},
{Latte::E_GX2SURFFMT::R16_SINT, {MTL::PixelFormatR16Sint, false, 2}},
{Latte::E_GX2SURFFMT::R16_FLOAT, {MTL::PixelFormatR16Float, true, 2}},
{Latte::E_GX2SURFFMT::R16_G16_UNORM, {MTL::PixelFormatRG16Unorm, true, 4}},
{Latte::E_GX2SURFFMT::R16_G16_SNORM, {MTL::PixelFormatRG16Snorm, true, 4}},
{Latte::E_GX2SURFFMT::R16_G16_UINT, {MTL::PixelFormatRG16Uint, false, 4}},
{Latte::E_GX2SURFFMT::R16_G16_SINT, {MTL::PixelFormatRG16Sint, false, 4}},
{Latte::E_GX2SURFFMT::R16_G16_FLOAT, {MTL::PixelFormatRG16Float, true, 4}},
{Latte::E_GX2SURFFMT::R16_G16_B16_A16_UNORM, {MTL::PixelFormatRGBA16Unorm, true, 8}},
{Latte::E_GX2SURFFMT::R16_G16_B16_A16_SNORM, {MTL::PixelFormatRGBA16Snorm, true, 8}},
{Latte::E_GX2SURFFMT::R16_G16_B16_A16_UINT, {MTL::PixelFormatRGBA16Uint, false, 8}},
{Latte::E_GX2SURFFMT::R16_G16_B16_A16_SINT, {MTL::PixelFormatRGBA16Sint, false, 8}},
{Latte::E_GX2SURFFMT::R16_G16_B16_A16_FLOAT, {MTL::PixelFormatRGBA16Float, true, 8}},
{Latte::E_GX2SURFFMT::R24_X8_UNORM, {MTL::PixelFormatInvalid, false, 0}}, // TODO
{Latte::E_GX2SURFFMT::R24_X8_FLOAT, {MTL::PixelFormatInvalid, false, 0}}, // TODO
{Latte::E_GX2SURFFMT::X24_G8_UINT, {MTL::PixelFormatInvalid, false, 0}}, // TODO
{Latte::E_GX2SURFFMT::R32_X8_FLOAT, {MTL::PixelFormatInvalid, false, 0}}, // TODO
{Latte::E_GX2SURFFMT::X32_G8_UINT_X24, {MTL::PixelFormatInvalid, false, 0}}, // TODO
{Latte::E_GX2SURFFMT::R11_G11_B10_FLOAT, {MTL::PixelFormatRG11B10Float, true, 4}},
{Latte::E_GX2SURFFMT::R32_UINT, {MTL::PixelFormatR32Uint, false, 4}},
{Latte::E_GX2SURFFMT::R32_SINT, {MTL::PixelFormatR32Sint, false, 4}},
{Latte::E_GX2SURFFMT::R32_FLOAT, {MTL::PixelFormatR32Float, true, 4}},
{Latte::E_GX2SURFFMT::R32_G32_UINT, {MTL::PixelFormatRG32Uint, false, 8}},
{Latte::E_GX2SURFFMT::R32_G32_SINT, {MTL::PixelFormatRG32Sint, false, 8}},
{Latte::E_GX2SURFFMT::R32_G32_FLOAT, {MTL::PixelFormatRG32Float, true, 8}},
{Latte::E_GX2SURFFMT::R32_G32_B32_A32_UINT, {MTL::PixelFormatRGBA32Uint, false, 16}},
{Latte::E_GX2SURFFMT::R32_G32_B32_A32_SINT, {MTL::PixelFormatRGBA32Sint, false, 16}},
{Latte::E_GX2SURFFMT::R32_G32_B32_A32_FLOAT, {MTL::PixelFormatRGBA32Float, true, 16}},
{Latte::E_GX2SURFFMT::BC1_UNORM, {MTL::PixelFormatBC1_RGBA, true, 8, {4, 4}}}, // TODO: correct?
{Latte::E_GX2SURFFMT::BC1_SRGB, {MTL::PixelFormatBC1_RGBA_sRGB, true, 8, {4, 4}}}, // TODO: correct?
{Latte::E_GX2SURFFMT::BC2_UNORM, {MTL::PixelFormatBC2_RGBA, true, 16, {4, 4}}}, // TODO: correct?
{Latte::E_GX2SURFFMT::BC2_SRGB, {MTL::PixelFormatBC2_RGBA_sRGB, true, 16, {4, 4}}}, // TODO: correct?
{Latte::E_GX2SURFFMT::BC3_UNORM, {MTL::PixelFormatBC3_RGBA, true, 16, {4, 4}}}, // TODO: correct?
{Latte::E_GX2SURFFMT::BC3_SRGB, {MTL::PixelFormatBC3_RGBA_sRGB, true, 16, {4, 4}}}, // TODO: correct?
{Latte::E_GX2SURFFMT::BC4_UNORM, {MTL::PixelFormatBC4_RUnorm, true, 8, {4, 4}}}, // TODO: correct?
{Latte::E_GX2SURFFMT::BC4_SNORM, {MTL::PixelFormatBC4_RSnorm, true, 8, {4, 4}}}, // TODO: correct?
{Latte::E_GX2SURFFMT::BC5_UNORM, {MTL::PixelFormatBC5_RGUnorm, true, 16, {4, 4}}}, // TODO: correct?
{Latte::E_GX2SURFFMT::BC5_SNORM, {MTL::PixelFormatBC5_RGSnorm, true, 16, {4, 4}}}, // TODO: correct?
std::map<Latte::E_GX2SURFFMT, MetalPixelFormatInfo> MTL_COLOR_FORMAT_TABLE = {
{Latte::E_GX2SURFFMT::R4_G4_UNORM, {MTL::PixelFormatRG8Unorm, MetalDataType::FLOAT, 2}}, // TODO: correct?
{Latte::E_GX2SURFFMT::R5_G6_B5_UNORM, {MTL::PixelFormatB5G6R5Unorm, MetalDataType::FLOAT, 2}}, // TODO: correct?
{Latte::E_GX2SURFFMT::R5_G5_B5_A1_UNORM, {MTL::PixelFormatBGR5A1Unorm, MetalDataType::FLOAT, 2}}, // TODO: correct?
{Latte::E_GX2SURFFMT::R4_G4_B4_A4_UNORM, {MTL::PixelFormatABGR4Unorm, MetalDataType::FLOAT, 2}}, // TODO: correct?
{Latte::E_GX2SURFFMT::A1_B5_G5_R5_UNORM, {MTL::PixelFormatA1BGR5Unorm, MetalDataType::FLOAT, 2}},
{Latte::E_GX2SURFFMT::R8_UNORM, {MTL::PixelFormatR8Unorm, MetalDataType::FLOAT, 1}},
{Latte::E_GX2SURFFMT::R8_SNORM, {MTL::PixelFormatR8Snorm, MetalDataType::FLOAT, 1}},
{Latte::E_GX2SURFFMT::R8_UINT, {MTL::PixelFormatR8Uint, MetalDataType::UINT, 1}},
{Latte::E_GX2SURFFMT::R8_SINT, {MTL::PixelFormatR8Sint, MetalDataType::INT, 1}},
{Latte::E_GX2SURFFMT::R8_G8_UNORM, {MTL::PixelFormatRG8Unorm, MetalDataType::FLOAT, 2}},
{Latte::E_GX2SURFFMT::R8_G8_SNORM, {MTL::PixelFormatRG8Snorm, MetalDataType::FLOAT, 2}},
{Latte::E_GX2SURFFMT::R8_G8_UINT, {MTL::PixelFormatRG8Uint, MetalDataType::UINT, 2}},
{Latte::E_GX2SURFFMT::R8_G8_SINT, {MTL::PixelFormatRG8Sint, MetalDataType::INT, 2}},
{Latte::E_GX2SURFFMT::R8_G8_B8_A8_UNORM, {MTL::PixelFormatRGBA8Unorm, MetalDataType::FLOAT, 4}},
{Latte::E_GX2SURFFMT::R8_G8_B8_A8_SNORM, {MTL::PixelFormatRGBA8Snorm, MetalDataType::FLOAT, 4}},
{Latte::E_GX2SURFFMT::R8_G8_B8_A8_UINT, {MTL::PixelFormatRGBA8Uint, MetalDataType::UINT, 4}},
{Latte::E_GX2SURFFMT::R8_G8_B8_A8_SINT, {MTL::PixelFormatRGBA8Sint, MetalDataType::INT, 4}},
{Latte::E_GX2SURFFMT::R8_G8_B8_A8_SRGB, {MTL::PixelFormatRGBA8Unorm_sRGB, MetalDataType::FLOAT, 4}},
{Latte::E_GX2SURFFMT::R10_G10_B10_A2_UNORM, {MTL::PixelFormatRGB10A2Unorm, MetalDataType::FLOAT, 4}},
{Latte::E_GX2SURFFMT::R10_G10_B10_A2_SNORM, {MTL::PixelFormatRGBA16Snorm, MetalDataType::FLOAT, 8}}, // TODO: correct?
{Latte::E_GX2SURFFMT::R10_G10_B10_A2_UINT, {MTL::PixelFormatRGB10A2Uint, MetalDataType::UINT, 4}},
{Latte::E_GX2SURFFMT::R10_G10_B10_A2_SINT, {MTL::PixelFormatRGBA16Sint, MetalDataType::INT, 8}}, // TODO: correct?
{Latte::E_GX2SURFFMT::R10_G10_B10_A2_SRGB, {MTL::PixelFormatRGBA8Unorm_sRGB, MetalDataType::FLOAT, 4}}, // TODO: correct?
{Latte::E_GX2SURFFMT::A2_B10_G10_R10_UNORM, {MTL::PixelFormatBGR10A2Unorm, MetalDataType::FLOAT, 4}}, // TODO: correct?
{Latte::E_GX2SURFFMT::A2_B10_G10_R10_UINT, {MTL::PixelFormatRGB10A2Uint, MetalDataType::UINT, 4}}, // TODO: correct?
{Latte::E_GX2SURFFMT::R16_UNORM, {MTL::PixelFormatR16Unorm, MetalDataType::FLOAT, 2}},
{Latte::E_GX2SURFFMT::R16_SNORM, {MTL::PixelFormatR16Snorm, MetalDataType::FLOAT, 2}},
{Latte::E_GX2SURFFMT::R16_UINT, {MTL::PixelFormatR16Uint, MetalDataType::UINT, 2}},
{Latte::E_GX2SURFFMT::R16_SINT, {MTL::PixelFormatR16Sint, MetalDataType::INT, 2}},
{Latte::E_GX2SURFFMT::R16_FLOAT, {MTL::PixelFormatR16Float, MetalDataType::FLOAT, 2}},
{Latte::E_GX2SURFFMT::R16_G16_UNORM, {MTL::PixelFormatRG16Unorm, MetalDataType::FLOAT, 4}},
{Latte::E_GX2SURFFMT::R16_G16_SNORM, {MTL::PixelFormatRG16Snorm, MetalDataType::FLOAT, 4}},
{Latte::E_GX2SURFFMT::R16_G16_UINT, {MTL::PixelFormatRG16Uint, MetalDataType::UINT, 4}},
{Latte::E_GX2SURFFMT::R16_G16_SINT, {MTL::PixelFormatRG16Sint, MetalDataType::INT, 4}},
{Latte::E_GX2SURFFMT::R16_G16_FLOAT, {MTL::PixelFormatRG16Float, MetalDataType::FLOAT, 4}},
{Latte::E_GX2SURFFMT::R16_G16_B16_A16_UNORM, {MTL::PixelFormatRGBA16Unorm, MetalDataType::FLOAT, 8}},
{Latte::E_GX2SURFFMT::R16_G16_B16_A16_SNORM, {MTL::PixelFormatRGBA16Snorm, MetalDataType::FLOAT, 8}},
{Latte::E_GX2SURFFMT::R16_G16_B16_A16_UINT, {MTL::PixelFormatRGBA16Uint, MetalDataType::UINT, 8}},
{Latte::E_GX2SURFFMT::R16_G16_B16_A16_SINT, {MTL::PixelFormatRGBA16Sint, MetalDataType::INT, 8}},
{Latte::E_GX2SURFFMT::R16_G16_B16_A16_FLOAT, {MTL::PixelFormatRGBA16Float, MetalDataType::FLOAT, 8}},
{Latte::E_GX2SURFFMT::R24_X8_UNORM, {MTL::PixelFormatInvalid, MetalDataType::NONE, 0}}, // TODO
{Latte::E_GX2SURFFMT::R24_X8_FLOAT, {MTL::PixelFormatInvalid, MetalDataType::NONE, 0}}, // TODO
{Latte::E_GX2SURFFMT::X24_G8_UINT, {MTL::PixelFormatInvalid, MetalDataType::NONE, 0}}, // TODO
{Latte::E_GX2SURFFMT::R32_X8_FLOAT, {MTL::PixelFormatInvalid, MetalDataType::NONE, 0}}, // TODO
{Latte::E_GX2SURFFMT::X32_G8_UINT_X24, {MTL::PixelFormatInvalid, MetalDataType::NONE, 0}}, // TODO
{Latte::E_GX2SURFFMT::R11_G11_B10_FLOAT, {MTL::PixelFormatRG11B10Float, MetalDataType::FLOAT, 4}},
{Latte::E_GX2SURFFMT::R32_UINT, {MTL::PixelFormatR32Uint, MetalDataType::UINT, 4}},
{Latte::E_GX2SURFFMT::R32_SINT, {MTL::PixelFormatR32Sint, MetalDataType::INT, 4}},
{Latte::E_GX2SURFFMT::R32_FLOAT, {MTL::PixelFormatR32Float, MetalDataType::FLOAT, 4}},
{Latte::E_GX2SURFFMT::R32_G32_UINT, {MTL::PixelFormatRG32Uint, MetalDataType::UINT, 8}},
{Latte::E_GX2SURFFMT::R32_G32_SINT, {MTL::PixelFormatRG32Sint, MetalDataType::INT, 8}},
{Latte::E_GX2SURFFMT::R32_G32_FLOAT, {MTL::PixelFormatRG32Float, MetalDataType::FLOAT, 8}},
{Latte::E_GX2SURFFMT::R32_G32_B32_A32_UINT, {MTL::PixelFormatRGBA32Uint, MetalDataType::UINT, 16}},
{Latte::E_GX2SURFFMT::R32_G32_B32_A32_SINT, {MTL::PixelFormatRGBA32Sint, MetalDataType::INT, 16}},
{Latte::E_GX2SURFFMT::R32_G32_B32_A32_FLOAT, {MTL::PixelFormatRGBA32Float, MetalDataType::FLOAT, 16}},
{Latte::E_GX2SURFFMT::BC1_UNORM, {MTL::PixelFormatBC1_RGBA, MetalDataType::FLOAT, 8, {4, 4}}}, // TODO: correct?
{Latte::E_GX2SURFFMT::BC1_SRGB, {MTL::PixelFormatBC1_RGBA_sRGB, MetalDataType::FLOAT, 8, {4, 4}}}, // TODO: correct?
{Latte::E_GX2SURFFMT::BC2_UNORM, {MTL::PixelFormatBC2_RGBA, MetalDataType::FLOAT, 16, {4, 4}}}, // TODO: correct?
{Latte::E_GX2SURFFMT::BC2_SRGB, {MTL::PixelFormatBC2_RGBA_sRGB, MetalDataType::FLOAT, 16, {4, 4}}}, // TODO: correct?
{Latte::E_GX2SURFFMT::BC3_UNORM, {MTL::PixelFormatBC3_RGBA, MetalDataType::FLOAT, 16, {4, 4}}}, // TODO: correct?
{Latte::E_GX2SURFFMT::BC3_SRGB, {MTL::PixelFormatBC3_RGBA_sRGB, MetalDataType::FLOAT, 16, {4, 4}}}, // TODO: correct?
{Latte::E_GX2SURFFMT::BC4_UNORM, {MTL::PixelFormatBC4_RUnorm, MetalDataType::FLOAT, 8, {4, 4}}}, // TODO: correct?
{Latte::E_GX2SURFFMT::BC4_SNORM, {MTL::PixelFormatBC4_RSnorm, MetalDataType::FLOAT, 8, {4, 4}}}, // TODO: correct?
{Latte::E_GX2SURFFMT::BC5_UNORM, {MTL::PixelFormatBC5_RGUnorm, MetalDataType::FLOAT, 16, {4, 4}}}, // TODO: correct?
{Latte::E_GX2SURFFMT::BC5_SNORM, {MTL::PixelFormatBC5_RGSnorm, MetalDataType::FLOAT, 16, {4, 4}}}, // TODO: correct?
};
std::map<Latte::E_GX2SURFFMT, MtlPixelFormatInfo> MTL_DEPTH_FORMAT_TABLE = {
{Latte::E_GX2SURFFMT::D24_S8_UNORM, {MTL::PixelFormatDepth24Unorm_Stencil8, false, 4}}, // TODO: not supported on Apple sillicon, maybe find something else
{Latte::E_GX2SURFFMT::D24_S8_FLOAT, {MTL::PixelFormatDepth32Float_Stencil8, false, 4}}, // TODO: correct?
{Latte::E_GX2SURFFMT::D32_S8_FLOAT, {MTL::PixelFormatDepth32Float_Stencil8, false, 5}},
{Latte::E_GX2SURFFMT::D16_UNORM, {MTL::PixelFormatDepth16Unorm, false, 2}},
{Latte::E_GX2SURFFMT::D32_FLOAT, {MTL::PixelFormatDepth32Float, false, 4}},
std::map<Latte::E_GX2SURFFMT, MetalPixelFormatInfo> MTL_DEPTH_FORMAT_TABLE = {
{Latte::E_GX2SURFFMT::D24_S8_UNORM, {MTL::PixelFormatDepth24Unorm_Stencil8, MetalDataType::NONE, 4}}, // TODO: not supported on Apple sillicon, maybe find something else
{Latte::E_GX2SURFFMT::D24_S8_FLOAT, {MTL::PixelFormatDepth32Float_Stencil8, MetalDataType::NONE, 4}}, // TODO: correct?
{Latte::E_GX2SURFFMT::D32_S8_FLOAT, {MTL::PixelFormatDepth32Float_Stencil8, MetalDataType::NONE, 5}},
{Latte::E_GX2SURFFMT::D16_UNORM, {MTL::PixelFormatDepth16Unorm, MetalDataType::NONE, 2}},
{Latte::E_GX2SURFFMT::D32_FLOAT, {MTL::PixelFormatDepth32Float, MetalDataType::NONE, 4}},
};
const MtlPixelFormatInfo GetMtlPixelFormatInfo(Latte::E_GX2SURFFMT format, bool isDepth)
const MetalPixelFormatInfo GetMtlPixelFormatInfo(Latte::E_GX2SURFFMT format, bool isDepth)
{
MtlPixelFormatInfo formatInfo;
MetalPixelFormatInfo formatInfo;
if (isDepth)
formatInfo = MTL_DEPTH_FORMAT_TABLE[format];
else
@ -266,7 +266,6 @@ MTL::PrimitiveType GetMtlPrimitiveType(LattePrimitiveMode primitiveMode)
case Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::TRIANGLES:
return MTL::PrimitiveTypeTriangle;
case Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::TRIANGLE_FAN:
debug_printf("Metal doesn't support triangle fan primitive, using triangle strip instead\n");
return MTL::PrimitiveTypeTriangleStrip;
case Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::TRIANGLE_STRIP:
return MTL::PrimitiveTypeTriangleStrip;

View File

@ -15,14 +15,22 @@ struct Uvec2 {
uint32 y;
};
struct MtlPixelFormatInfo {
enum class MetalDataType
{
NONE,
INT,
UINT,
FLOAT,
};
struct MetalPixelFormatInfo {
MTL::PixelFormat pixelFormat;
bool blendable;
MetalDataType dataType;
size_t bytesPerBlock;
Uvec2 blockTexelSize = {1, 1};
};
const MtlPixelFormatInfo GetMtlPixelFormatInfo(Latte::E_GX2SURFFMT format, bool isDepth);
const MetalPixelFormatInfo GetMtlPixelFormatInfo(Latte::E_GX2SURFFMT format, bool isDepth);
size_t GetMtlTextureBytesPerRow(Latte::E_GX2SURFFMT format, bool isDepth, uint32 width);

View File

@ -1,6 +1,11 @@
#pragma once
inline size_t align(size_t size, size_t alignment)
inline size_t Align(size_t size, size_t alignment)
{
return (size + alignment - 1) & ~(alignment - 1);
}
inline std::string GetColorAttachmentTypeStr(uint32 index)
{
return "COLOR_ATTACHMENT" + std::to_string(index) + "_TYPE";
}

View File

@ -16,7 +16,7 @@ MetalBufferAllocator::~MetalBufferAllocator()
MetalBufferAllocation MetalBufferAllocator::GetBufferAllocation(size_t size, size_t alignment)
{
// Align the size
size = align(size, alignment);
size = Align(size, alignment);
// First, try to find a free range
for (uint32 i = 0; i < m_freeBufferRanges.size(); i++)
@ -94,7 +94,7 @@ MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(MTL::Bu
if (restrideInfo.memoryInvalidated || stride != restrideInfo.lastStride)
{
// TODO: use compute/void vertex function instead
size_t newStride = align(stride, 4);
size_t newStride = Align(stride, 4);
size_t newSize = vertexBufferRange->size / stride * newStride;
restrideInfo.buffer = m_mtlr->GetDevice()->newBuffer(newSize, MTL::StorageModeShared);

View File

@ -61,7 +61,7 @@ MTL::RenderPipelineState* MetalPipelineCache::GetPipelineState(const LatteFetchS
uint32 bufferIndex = bufferGroup.attributeBufferIndex;
uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7;
uint32 bufferStride = (LatteGPUState.contextNew.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;
bufferStride = align(bufferStride, 4);
bufferStride = Align(bufferStride, 4);
// HACK
if (bufferStride == 0)
@ -83,11 +83,15 @@ MTL::RenderPipelineState* MetalPipelineCache::GetPipelineState(const LatteFetchS
}
}
auto mtlVertexShader = static_cast<RendererShaderMtl*>(vertexShader->shader);
auto mtlPixelShader = static_cast<RendererShaderMtl*>(pixelShader->shader);
mtlPixelShader->CompileFragmentFunction(activeFBO);
// Render pipeline state
MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init();
desc->setVertexFunction(static_cast<RendererShaderMtl*>(vertexShader->shader)->GetFunction());
desc->setFragmentFunction(static_cast<RendererShaderMtl*>(pixelShader->shader)->GetFunction());
// TODO: don't always set the vertex descriptor
desc->setVertexFunction(mtlVertexShader->GetFunction());
desc->setFragmentFunction(mtlPixelShader->GetFunction());
// TODO: don't always set the vertex descriptor?
desc->setVertexDescriptor(vertexDescriptor);
for (uint8 i = 0; i < 8; i++)
{
@ -106,7 +110,8 @@ MTL::RenderPipelineState* MetalPipelineCache::GetPipelineState(const LatteFetchS
uint32 renderTargetMask = LatteGPUState.contextNew.CB_TARGET_MASK.get_MASK();
bool blendEnabled = ((blendEnableMask & (1 << i))) != 0;
if (blendEnabled && GetMtlPixelFormatInfo(texture->format, false).blendable)
// Only float data type is blendable
if (blendEnabled && GetMtlPixelFormatInfo(texture->format, false).dataType == MetalDataType::FLOAT)
{
colorAttachment->setBlendingEnabled(true);

View File

@ -1,30 +1,23 @@
#include "Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
#include "Cemu/Logging/CemuLogging.h"
#include "Metal/MTLFunctionDescriptor.hpp"
#include "Common/precompiled.h"
RendererShaderMtl::RendererShaderMtl(MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode)
: RendererShader(type, baseHash, auxHash, isGameShader, isGfxPackShader)
: RendererShader(type, baseHash, auxHash, isGameShader, isGfxPackShader), m_mtlr{mtlRenderer}
{
NS::Error* error = nullptr;
MTL::Library* library = mtlRenderer->GetDevice()->newLibrary(NS::String::string(mslCode.c_str(), NS::ASCIIStringEncoding), nullptr, &error);
if (error)
// Fragment functions are compiled just-in-time
if (m_type == ShaderType::kFragment)
{
printf("failed to create library (error: %s) -> source:\n%s\n", error->localizedDescription()->utf8String(), mslCode.c_str());
error->release();
return;
m_mslCode = mslCode;
}
else
{
Compile(mslCode);
}
//MTL::FunctionDescriptor* desc = MTL::FunctionDescriptor::alloc()->init();
//desc->setName(NS::String::string("main0", NS::ASCIIStringEncoding));
//error = nullptr;
m_function = library->newFunction(NS::String::string("main0", NS::ASCIIStringEncoding));
library->release();
//if (error)
//{
// printf("failed to create function (error: %s)\n", error->localizedDescription()->utf8String());
// error->release();
// return;
//}
}
RendererShaderMtl::~RendererShaderMtl()
@ -33,6 +26,47 @@ RendererShaderMtl::~RendererShaderMtl()
m_function->release();
}
void RendererShaderMtl::CompileFragmentFunction(CachedFBOMtl* activeFBO)
{
cemu_assert_debug(m_type == ShaderType::kFragment);
if (m_function)
m_function->release();
std::string fullCode;
// Define color attachment data types
for (uint8 i = 0; i < 8; i++)
{
const auto& colorBuffer = activeFBO->colorBuffer[i];
if (!colorBuffer.texture)
{
continue;
}
auto dataType = GetMtlPixelFormatInfo(colorBuffer.texture->format, false).dataType;
fullCode += "#define " + GetColorAttachmentTypeStr(i) + " ";
switch (dataType)
{
case MetalDataType::INT:
fullCode += "int4";
break;
case MetalDataType::UINT:
fullCode += "uint4";
break;
case MetalDataType::FLOAT:
fullCode += "float4";
break;
default:
cemu_assert_suspicious();
break;
}
fullCode += "\n";
}
fullCode += m_mslCode;
Compile(fullCode);
}
void RendererShaderMtl::ShaderCacheLoading_begin(uint64 cacheTitleId)
{
cemuLog_log(LogType::MetalLogging, "RendererShaderMtl::ShaderCacheLoading_begin not implemented!");
@ -47,3 +81,17 @@ void RendererShaderMtl::ShaderCacheLoading_Close()
{
cemuLog_log(LogType::MetalLogging, "RendererShaderMtl::ShaderCacheLoading_Close not implemented!");
}
void RendererShaderMtl::Compile(const std::string& mslCode)
{
NS::Error* error = nullptr;
MTL::Library* library = m_mtlr->GetDevice()->newLibrary(NS::String::string(mslCode.c_str(), NS::ASCIIStringEncoding), nullptr, &error);
if (error)
{
printf("failed to create library (error: %s) -> source:\n%s\n", error->localizedDescription()->utf8String(), mslCode.c_str());
error->release();
return;
}
m_function = library->newFunction(NS::String::string("main0", NS::ASCIIStringEncoding));
library->release();
}

View File

@ -1,6 +1,7 @@
#pragma once
#include "Cafe/HW/Latte/Renderer/RendererShader.h"
#include "HW/Latte/Renderer/Metal/CachedFBOMtl.h"
#include "HW/Latte/Renderer/Metal/MetalRenderer.h"
#include "util/helpers/ConcurrentQueue.h"
@ -24,6 +25,8 @@ public:
RendererShaderMtl(class MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode);
virtual ~RendererShaderMtl();
void CompileFragmentFunction(CachedFBOMtl* activeFBO);
MTL::Function* GetFunction() const
{
return m_function;
@ -51,5 +54,11 @@ public:
bool WaitForCompiled() override { return true; }
private:
class MetalRenderer* m_mtlr;
MTL::Function* m_function = nullptr;
std::string m_mslCode;
void Compile(const std::string& mslCode);
};