Merge pull request #3 from SamoZ256/metal-shaders

Better shaders
This commit is contained in:
SamoZ256 2024-09-06 19:20:58 +02:00 committed by GitHub
commit 5e50592937
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 265 additions and 300 deletions

View File

@ -146,6 +146,23 @@ void LatteFetchShader::CalculateFetchShaderVkHash()
this->vkPipelineHashFragment = h;
}
void LatteFetchShader::CalculateFetchShaderMtlObjectShaderHash(uint32* contextRegister)
{uint64 key = 0;
for (sint32 g = 0; g < bufferGroups.size(); g++)
{
LatteParsedFetchShaderBufferGroup_t& group = bufferGroups[g];
uint32 bufferIndex = group.attributeBufferIndex;
uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7;
uint32 bufferStride = (contextRegister[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;
key += (uint64)bufferIndex;
key = std::rotl<uint64>(key, 5);
key += (uint64)bufferStride;
key = std::rotl<uint64>(key, 5);
}
mtlShaderHashObject = key;
}
void _fetchShaderDecompiler_parseInstruction_VTX_SEMANTIC(LatteFetchShader* parsedFetchShader, uint32* contextRegister, const LatteClauseInstruction_VTX* instr)
{
uint32 semanticId = instr->getFieldSEM_SEMANTIC_ID(); // location (attribute index inside shader)
@ -161,7 +178,7 @@ void _fetchShaderDecompiler_parseInstruction_VTX_SEMANTIC(LatteFetchShader* pars
auto nfa = instr->getField_NUM_FORMAT_ALL();
bool isSigned = instr->getField_FORMAT_COMP_ALL() == LatteClauseInstruction_VTX::FORMAT_COMP::COMP_SIGNED;
auto endianSwap = instr->getField_ENDIAN_SWAP();
// get buffer
cemu_assert_debug(bufferId >= 0xA0 && bufferId < 0xB0);
uint32 bufferIndex = (bufferId - 0xA0);
@ -316,7 +333,7 @@ LatteFetchShader* LatteShaderRecompiler_createFetchShader(LatteFetchShader::Cach
// {0x00000002, 0x01800c00, 0x00000000, 0x8a000000, 0x2c00a001, 0x2c151000, 0x000a0000, ...} // size 0x50
// {0x00000002, 0x01801000, 0x00000000, 0x8a000000, 0x1c00a001, 0x280d1000, 0x00090000, ...} // size 0x60
// {0x00000002, 0x01801c00, 0x00000000, 0x8a000000, 0x1c00a001, 0x280d1000, 0x00090000, ...} // size 0x90
// our new implementation:
// {0x00000002, 0x01800400, 0x00000000, 0x8a000000, 0x0000a001, 0x2c151000, 0x00020000, ...}
@ -328,6 +345,7 @@ LatteFetchShader* LatteShaderRecompiler_createFetchShader(LatteFetchShader::Cach
// these only make sense when vertex shader does not call FS?
LatteShader_calculateFSKey(newFetchShader);
newFetchShader->CalculateFetchShaderVkHash();
newFetchShader->CalculateFetchShaderMtlObjectShaderHash(contextRegister);
return newFetchShader;
}
@ -387,6 +405,7 @@ LatteFetchShader* LatteShaderRecompiler_createFetchShader(LatteFetchShader::Cach
}
LatteShader_calculateFSKey(newFetchShader);
newFetchShader->CalculateFetchShaderVkHash();
newFetchShader->CalculateFetchShaderMtlObjectShaderHash(contextRegister);
// register in cache
// its possible that during multi-threaded shader cache loading, two identical (same hash) fetch shaders get created simultaneously
@ -411,7 +430,7 @@ LatteFetchShader::~LatteFetchShader()
UnregisterInCache();
}
struct FetchShaderLookupInfo
struct FetchShaderLookupInfo
{
LatteFetchShader* fetchShader;
uint32 programSize;

View File

@ -46,6 +46,9 @@ struct LatteFetchShader
// Vulkan
uint64 vkPipelineHashFragment{}; // hash of all fetch shader state that influences the Vulkan graphics pipeline
// Metal
uint64 mtlShaderHashObject{};
// cache info
CacheHash m_cacheHash{};
bool m_isRegistered{}; // if true, fetch shader is referenced by cache (RegisterInCache() succeeded)
@ -53,6 +56,8 @@ struct LatteFetchShader
void CalculateFetchShaderVkHash();
void CalculateFetchShaderMtlObjectShaderHash(uint32* contextRegister);
uint64 getVkPipelineHashFragment() const { return vkPipelineHashFragment; };
static bool isValidBufferIndex(const uint32 index) { return index < 0x10; };
@ -69,4 +74,4 @@ struct LatteFetchShader
static std::unordered_map<CacheHash, LatteFetchShader*> s_fetchShaderByHash;
};
LatteFetchShader* LatteShaderRecompiler_createFetchShader(LatteFetchShader::CacheHash fsHash, uint32* contextRegister, uint32* fsProgramCode, uint32 fsProgramSize);
LatteFetchShader* LatteShaderRecompiler_createFetchShader(LatteFetchShader::CacheHash fsHash, uint32* contextRegister, uint32* fsProgramCode, uint32 fsProgramSize);

View File

@ -14,6 +14,9 @@
#include "config/ActiveSettings.h"
#include "Cafe/GameProfile/GameProfile.h"
#include "util/containers/flat_hash_map.hpp"
#if BOOST_OS_MACOS
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
#endif
#include <cinttypes>
// experimental new decompiler (WIP)
@ -498,6 +501,8 @@ void LatteSHRC_UpdateVSBaseHash(uint8* vertexShaderPtr, uint32 vertexShaderSize,
uint64 vsHash2 = 0;
_calculateShaderProgramHash(vsProgramCode, vertexShaderSize, &hashCacheVS, &vsHash1, &vsHash2);
uint64 vsHash = vsHash1 + vsHash2 + _activeFetchShader->key + _activePSImportTable.key + (usesGeometryShader ? 0x1111ULL : 0ULL);
if (g_renderer->GetType() == RendererAPI::Metal && usesGeometryShader)
vsHash += _activeFetchShader->mtlShaderHashObject;
uint32 tmp = LatteGPUState.contextNew.PA_CL_VTE_CNTL.getRawValue() ^ 0x43F;
vsHash += tmp;
@ -542,6 +547,20 @@ void LatteSHRC_UpdatePSBaseHash(uint8* pixelShaderPtr, uint32 pixelShaderSize, b
_calculateShaderProgramHash(psProgramCode, pixelShaderSize, &hashCachePS, &psHash1, &psHash2);
// get vertex shader
uint64 psHash = psHash1 + psHash2 + _activePSImportTable.key + (usesGeometryShader ? hashCacheGS.prevHash1 : 0ULL);
#if BOOST_OS_MACOS
if (g_renderer->GetType() == RendererAPI::Metal)
{
for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++)
{
auto format = LatteMRT::GetColorBufferFormat(i, LatteGPUState.contextNew);
uint8 dataType = (uint8)GetMtlPixelFormatInfo(format, false).dataType;
psHash += (uint64)dataType;
psHash = std::rotl<uint64>(psHash, 7);
}
}
#endif
_shaderBaseHash_ps = psHash;
}

View File

@ -65,6 +65,7 @@ struct LatteDecompilerShaderResourceMapping
sint8 attributeMapping[LATTE_NUM_MAX_ATTRIBUTE_LOCATIONS];
// Metal exclusive
sint8 indexBufferBinding{-1};
sint8 indexTypeBinding{-1};
sint32 getTextureCount()
{

View File

@ -1020,4 +1020,5 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD
LatteDecompiler::_initUniformBindingPoints(shaderContext);
LatteDecompiler::_initAttributeBindingPoints(shaderContext);
shaderContext->output->resourceMappingMTL.indexBufferBinding = shaderContext->currentBufferBindingPointMTL++;
shaderContext->output->resourceMappingMTL.indexTypeBinding = shaderContext->currentBufferBindingPointMTL++;
}

View File

@ -11,6 +11,7 @@
#include "Cafe/HW/Latte/Core/FetchShader.h"
#include "Cafe/HW/Latte/Renderer/Renderer.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
#include "config/ActiveSettings.h"
#include "util/helpers/StringBuf.h"
@ -3214,11 +3215,13 @@ static void _emitExportCode(LatteDecompilerShaderContext* shaderContext, LatteDe
src->add(") == false) discard_fragment();" _CRLF);
}
// pixel color output
src->addFmt("#ifdef {}" _CRLF, GetColorAttachmentTypeStr(pixelColorOutputIndex));
src->addFmt("out.passPixelColor{} = as_type<{}>(", pixelColorOutputIndex, GetColorAttachmentTypeStr(pixelColorOutputIndex));
_emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, i);
src->add(");" _CRLF);
src->add("#endif" _CRLF);
auto dataType = GetColorBufferDataType(pixelColorOutputIndex, *shaderContext->contextRegistersNew);
if (dataType != MetalDataType::NONE)
{
src->addFmt("out.passPixelColor{} = as_type<{}>(", pixelColorOutputIndex, GetDataTypeStr(dataType));
_emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, i);
src->add(");" _CRLF);
}
if( cfInstruction->exportArrayBase+i >= 8 )
cemu_assert_unimplemented();
@ -3881,9 +3884,125 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
case LatteConst::ShaderType::Vertex:
if (shaderContext->options->usesGeometryShader || isRectVertexShader)
{
// Defined just-in-time
// Will also modify vid in case of an indexed draw
src->add("VertexIn fetchInput(thread uint& vid VERTEX_BUFFER_DEFINITIONS);" _CRLF);
// TODO: clean this up
// fetchVertex will modify vid in case of an indexed draw
// Vertex buffers
std::string vertexBufferDefinitions = "#define VERTEX_BUFFER_DEFINITIONS ";
std::string vertexBuffers = "#define VERTEX_BUFFERS ";
std::string inputFetchDefinition = "VertexIn fetchVertex(thread uint& vid, device uint* indexBuffer, uint indexType VERTEX_BUFFER_DEFINITIONS) {\n";
// Index buffer
inputFetchDefinition += "if (indexType == 1) // UShort\n";
inputFetchDefinition += "vid = ((device ushort*)indexBuffer)[vid];\n";
inputFetchDefinition += "else if (indexType == 2)\n";
inputFetchDefinition += "vid = ((device uint*)indexBuffer)[vid]; // UInt\n";
inputFetchDefinition += "VertexIn in;\n";
for (auto& bufferGroup : shaderContext->fetchShader->bufferGroups)
{
std::optional<LatteConst::VertexFetchType2> fetchType;
uint32 bufferIndex = bufferGroup.attributeBufferIndex;
uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7;
uint32 bufferStride = (shaderContext->contextRegisters[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;
for (sint32 j = 0; j < bufferGroup.attribCount; ++j)
{
auto& attr = bufferGroup.attrib[j];
uint32 semanticId = shaderContext->output->resourceMappingMTL.attributeMapping[attr.semanticId];
if (semanticId == (uint32)-1)
continue; // attribute not used?
std::string formatName;
uint8 componentCount = 0;
switch (GetMtlVertexFormat(attr.format))
{
case MTL::VertexFormatUChar:
formatName = "uchar";
componentCount = 1;
break;
case MTL::VertexFormatUChar2:
formatName = "uchar2";
componentCount = 2;
break;
case MTL::VertexFormatUChar3:
formatName = "uchar3";
componentCount = 3;
break;
case MTL::VertexFormatUChar4:
formatName = "uchar4";
componentCount = 4;
break;
case MTL::VertexFormatUShort:
formatName = "ushort";
componentCount = 1;
break;
case MTL::VertexFormatUShort2:
formatName = "ushort2";
componentCount = 2;
break;
case MTL::VertexFormatUShort3:
formatName = "ushort3";
componentCount = 3;
break;
case MTL::VertexFormatUShort4:
formatName = "ushort4";
componentCount = 4;
break;
case MTL::VertexFormatUInt:
formatName = "uint";
componentCount = 1;
break;
case MTL::VertexFormatUInt2:
formatName = "uint2";
componentCount = 2;
break;
case MTL::VertexFormatUInt3:
formatName = "uint3";
componentCount = 3;
break;
case MTL::VertexFormatUInt4:
formatName = "uint4";
componentCount = 4;
break;
}
// Fetch the attribute
inputFetchDefinition += fmt::format("in.ATTRIBUTE_NAME{} = ", semanticId);
inputFetchDefinition += fmt::format("uint4(*(device {}*)", formatName);
inputFetchDefinition += fmt::format("(vertexBuffer{}", attr.attributeBufferIndex);
inputFetchDefinition += fmt::format(" + vid * {} + {})", bufferStride, attr.offset);
for (uint8 i = 0; i < (4 - componentCount); i++)
inputFetchDefinition += ", 0";
inputFetchDefinition += ");\n";
if (fetchType.has_value())
cemu_assert_debug(fetchType == attr.fetchType);
else
fetchType = attr.fetchType;
if (attr.fetchType == LatteConst::INSTANCE_DATA)
{
cemu_assert_debug(attr.aluDivisor == 1); // other divisor not yet supported
}
}
// TODO: fetch type
vertexBufferDefinitions += fmt::format(", device uchar* vertexBuffer{} [[buffer({})]]", bufferIndex, GET_MTL_VERTEX_BUFFER_INDEX(bufferIndex));
vertexBuffers += fmt::format(", vertexBuffer{}", bufferIndex);
}
inputFetchDefinition += "return in;\n";
inputFetchDefinition += "}\n";
src->add(vertexBufferDefinitions.c_str());
src->add("\n");
src->add(vertexBuffers.c_str());
src->add("\n");
src->add(inputFetchDefinition.c_str());
functionType = "[[object, max_total_threads_per_threadgroup(VERTICES_PER_VERTEX_PRIMITIVE), max_total_threadgroups_per_mesh_grid(1)]]";
outputTypeName = "void";
@ -3916,7 +4035,7 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
// TODO: don't hardcode the instance index
src->add("uint iid = 0;" _CRLF);
// Fetch the input
src->add("VertexIn in = fetchInput(vid VERTEX_BUFFERS);" _CRLF);
src->add("VertexIn in = fetchVertex(vid, indexBuffer, indexType VERTEX_BUFFERS);" _CRLF);
// Output is defined as object payload
src->add("object_data VertexOut& out = objectPayload.vertexOut[tid];" _CRLF);
}

View File

@ -281,9 +281,11 @@ namespace LatteDecompiler
{
if ((decompilerContext->shader->pixelColorOutputMask & (1 << i)) != 0)
{
src->addFmt("#ifdef {}" _CRLF, GetColorAttachmentTypeStr(i));
src->addFmt("{} passPixelColor{} [[color({})]];" _CRLF, GetColorAttachmentTypeStr(i), i, i);
src->add("#endif" _CRLF);
auto dataType = GetColorBufferDataType(i, *decompilerContext->contextRegistersNew);
if (dataType != MetalDataType::NONE)
{
src->addFmt("{} passPixelColor{} [[color({})]];" _CRLF, GetDataTypeStr(dataType), i, i);
}
}
}
@ -495,6 +497,10 @@ namespace LatteDecompiler
src->add(", mesh_grid_properties meshGridProperties");
src->add(", uint tig [[threadgroup_position_in_grid]]");
src->add(", uint tid [[thread_index_in_threadgroup]]");
// TODO: inly include index buffer if needed
src->addFmt(", device uint* indexBuffer [[buffer({})]]", decompilerContext->output->resourceMappingMTL.indexBufferBinding);
// TODO: use uchar?
src->addFmt(", constant uint& indexType [[buffer({})]]", decompilerContext->output->resourceMappingMTL.indexTypeBinding);
src->add(" VERTEX_BUFFER_DEFINITIONS");
}
else

View File

@ -78,6 +78,7 @@ LatteTextureMtl::LatteTextureMtl(class MetalRenderer* mtlRenderer, Latte::E_DIM
auto pixelFormat = GetMtlPixelFormat(format, isDepth, m_mtlr->GetPixelFormatSupport());
desc->setPixelFormat(pixelFormat);
// TODO: using MTL::TextureUsageShaderWrite as well fixes Mario Tennis: Ultra Smash, investigate why
MTL::TextureUsage usage = MTL::TextureUsageShaderRead;
if (!Latte::IsCompressedFormat(format))
{

View File

@ -1,4 +1,5 @@
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
#include "Cemu/Logging/CemuLogging.h"
#include "Common/precompiled.h"
#include "Metal/MTLDepthStencil.hpp"
#include "Metal/MTLPixelFormat.hpp"
@ -84,55 +85,51 @@ std::map<Latte::E_GX2SURFFMT, MetalPixelFormatInfo> MTL_DEPTH_FORMAT_TABLE = {
const MetalPixelFormatInfo GetMtlPixelFormatInfo(Latte::E_GX2SURFFMT format, bool isDepth)
{
if (format == Latte::E_GX2SURFFMT::INVALID_FORMAT)
{
return {MTL::PixelFormatInvalid, MetalDataType::NONE, 0};
}
MetalPixelFormatInfo formatInfo;
if (isDepth)
formatInfo = MTL_DEPTH_FORMAT_TABLE[format];
else
formatInfo = MTL_COLOR_FORMAT_TABLE[format];
// Depth24Unorm_Stencil8 is not supported on Apple sillicon
// TODO: query if format is available instead
if (formatInfo.pixelFormat == MTL::PixelFormatDepth24Unorm_Stencil8)
{
formatInfo.pixelFormat = MTL::PixelFormatDepth32Float_Stencil8;
}
if (formatInfo.pixelFormat == MTL::PixelFormatInvalid)
{
printf("invalid pixel format: %u\n", (uint32)format);
}
return formatInfo;
}
MTL::PixelFormat GetMtlPixelFormat(Latte::E_GX2SURFFMT format, bool isDepth, const MetalPixelFormatSupport& pixelFormatSupport)
{
auto pixelFormat = GetMtlPixelFormatInfo(format, isDepth).pixelFormat;
if (pixelFormat == MTL::PixelFormatInvalid)
cemuLog_logDebug(LogType::Force, "invalid pixel format {}\n", pixelFormat);
if (!pixelFormatSupport.m_supportsR8Unorm_sRGB && pixelFormat == MTL::PixelFormatR8Unorm_sRGB)
pixelFormat = MTL::PixelFormatRGBA8Unorm_sRGB;
if (!pixelFormatSupport.m_supportsRG8Unorm_sRGB && pixelFormat == MTL::PixelFormatRG8Unorm_sRGB)
pixelFormat = MTL::PixelFormatRGBA8Unorm_sRGB;
if (!pixelFormatSupport.m_supportsPacked16BitFormats)
switch (pixelFormat)
{
switch (pixelFormat)
{
case MTL::PixelFormatB5G6R5Unorm:
case MTL::PixelFormatA1BGR5Unorm:
case MTL::PixelFormatABGR4Unorm:
case MTL::PixelFormatBGR5A1Unorm:
pixelFormat = MTL::PixelFormatRGBA8Unorm;
break;
default:
break;
}
case MTL::PixelFormatR8Unorm_sRGB:
if (!pixelFormatSupport.m_supportsR8Unorm_sRGB)
return MTL::PixelFormatRGBA8Unorm_sRGB;
break;
case MTL::PixelFormatRG8Unorm_sRGB:
if (!pixelFormatSupport.m_supportsRG8Unorm_sRGB)
return MTL::PixelFormatRGBA8Unorm_sRGB;
break;
case MTL::PixelFormatB5G6R5Unorm:
case MTL::PixelFormatA1BGR5Unorm:
case MTL::PixelFormatABGR4Unorm:
case MTL::PixelFormatBGR5A1Unorm:
if (!pixelFormatSupport.m_supportsPacked16BitFormats)
return MTL::PixelFormatRGBA8Unorm;
break;
case MTL::PixelFormatDepth24Unorm_Stencil8:
if (!pixelFormatSupport.m_supportsDepth24Unorm_Stencil8)
return MTL::PixelFormatDepth32Float_Stencil8;
break;
default:
break;
}
if (!pixelFormatSupport.m_supportsDepth24Unorm_Stencil8 && pixelFormat == MTL::PixelFormatDepth24Unorm_Stencil8)
pixelFormat = MTL::PixelFormatDepth32Float_Stencil8;
return pixelFormat;
}

View File

@ -6,6 +6,7 @@
#include "Cafe/HW/Latte/Core/LatteConst.h"
//#include "Cafe/HW/Latte/Core/FetchShader.h"
#include "Cafe/HW/Latte/Renderer/Renderer.h"
#include "Common/precompiled.h"
struct Uvec2 {
uint32 x;
@ -32,6 +33,28 @@ const MetalPixelFormatInfo GetMtlPixelFormatInfo(Latte::E_GX2SURFFMT format, boo
MTL::PixelFormat GetMtlPixelFormat(Latte::E_GX2SURFFMT format, bool isDepth, const MetalPixelFormatSupport& pixelFormatSupport);
inline MetalDataType GetColorBufferDataType(const uint32 index, const LatteContextRegister& lcr)
{
auto format = LatteMRT::GetColorBufferFormat(index, lcr);
return GetMtlPixelFormatInfo(format, false).dataType;
}
inline const char* GetDataTypeStr(MetalDataType dataType)
{
switch (dataType)
{
case MetalDataType::INT:
return "int4";
case MetalDataType::UINT:
return "uint4";
case MetalDataType::FLOAT:
return "float4";
default:
cemu_assert_suspicious();
return "";
}
}
size_t GetMtlTextureBytesPerRow(Latte::E_GX2SURFFMT format, bool isDepth, uint32 width);
size_t GetMtlTextureBytesPerImage(Latte::E_GX2SURFFMT format, bool isDepth, uint32 height, size_t bytesPerRow);

View File

@ -18,7 +18,7 @@ struct MetalPixelFormatSupport
m_supportsR8Unorm_sRGB = device->supportsFamily(MTL::GPUFamilyApple1);
m_supportsRG8Unorm_sRGB = device->supportsFamily(MTL::GPUFamilyApple1);
m_supportsPacked16BitFormats = device->supportsFamily(MTL::GPUFamilyApple1);
m_supportsDepth24Unorm_Stencil8 = device->supportsFamily(MTL::GPUFamilyMac2);
m_supportsDepth24Unorm_Stencil8 = device->depth24Stencil8PixelFormatSupported();
}
};
@ -41,10 +41,10 @@ inline size_t Align(size_t size, size_t alignment)
return (size + alignment - 1) & ~(alignment - 1);
}
inline std::string GetColorAttachmentTypeStr(uint32 index)
{
return "COLOR_ATTACHMENT" + std::to_string(index) + "_TYPE";
}
//inline std::string GetColorAttachmentTypeStr(uint32 index)
//{
// return "COLOR_ATTACHMENT" + std::to_string(index) + "_TYPE";
//}
// Cast from const char* to NS::String*
inline NS::String* ToNSString(const char* str)

View File

@ -1,16 +1,14 @@
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
#include "Foundation/NSObject.hpp"
#include "HW/Latte/Core/LatteShader.h"
#include "HW/Latte/Renderer/Metal/CachedFBOMtl.h"
#include "HW/Latte/Renderer/Metal/LatteToMtl.h"
#include "HW/Latte/Renderer/Metal/RendererShaderMtl.h"
#include "HW/Latte/Renderer/Metal/LatteTextureViewMtl.h"
#include "Cafe/HW/Latte/Core/LatteShader.h"
#include "Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h"
#include "HW/Latte/Core/FetchShader.h"
#include "HW/Latte/ISA/RegDefines.h"
#include "Metal/MTLRenderPipeline.hpp"
#include "Cafe/HW/Latte/Core/FetchShader.h"
#include "Cafe/HW/Latte/ISA/RegDefines.h"
#include "config/ActiveSettings.h"
static void rectsEmulationGS_outputSingleVertex(std::string& gsSrc, const LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, sint32 vIdx, const LatteContextRegister& latteRegister)
@ -366,14 +364,6 @@ MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const Latte
auto mtlVertexShader = static_cast<RendererShaderMtl*>(vertexShader->shader);
auto mtlPixelShader = static_cast<RendererShaderMtl*>(pixelShader->shader);
mtlVertexShader->CompileVertexFunction();
// HACK
if (!mtlVertexShader->GetFunction())
{
debug_printf("no vertex function, skipping draw\n");
return nullptr;
}
mtlPixelShader->CompileFragmentFunction(lastUsedFBO);
// Render pipeline state
MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init();
@ -419,7 +409,6 @@ MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const Latte
{
debug_printf("error creating render pipeline state: %s\n", error->localizedDescription()->utf8String());
error->release();
return nullptr;
}
else
{
@ -475,8 +464,6 @@ MTL::RenderPipelineState* MetalPipelineCache::GetMeshPipelineState(const LatteFe
mtlMeshShader = rectsEmulationGS_generate(m_mtlr, vertexShader, lcr);
}
auto mtlPixelShader = static_cast<RendererShaderMtl*>(pixelShader->shader);
mtlObjectShader->CompileObjectFunction(lcr, fetchShader, vertexShader, hostIndexType);
mtlPixelShader->CompileFragmentFunction(lastUsedFBO);
// Render pipeline state
MTL::MeshRenderPipelineDescriptor* desc = MTL::MeshRenderPipelineDescriptor::alloc()->init();
@ -496,13 +483,12 @@ MTL::RenderPipelineState* MetalPipelineCache::GetMeshPipelineState(const LatteFe
desc->setLabel(GetLabel("Mesh pipeline state", desc));
#endif
pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, MTL::PipelineOptionNone, nullptr, &error);
desc->release();
if (error)
{
debug_printf("error creating render pipeline state: %s\n", error->localizedDescription()->utf8String());
error->release();
return nullptr;
}
desc->release();
return pipeline;
}

View File

@ -859,6 +859,7 @@ void MetalRenderer::draw_beginSequence()
return; // no render target
}
// TODO: not checking for !streamoutEnable fixes Super Smash Bros. for Wii U, investigate why
if (!hasValidFramebufferAttached && !streamoutEnable)
{
debug_printf("Drawcall with no color buffer or depth buffer attached\n");
@ -916,7 +917,7 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
LatteDecompilerShader* geometryShader = LatteSHRC_GetActiveGeometryShader();
LatteDecompilerShader* pixelShader = LatteSHRC_GetActivePixelShader();
// TODO: is this even needed? Also, should go to draw_beginSequence
if (!vertexShader)
if (!vertexShader || !static_cast<RendererShaderMtl*>(vertexShader->shader)->GetFunction())
{
printf("no vertex function, skipping draw\n");
return;
@ -1200,6 +1201,8 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
{
if (indexBuffer)
SetBuffer(renderCommandEncoder, METAL_SHADER_TYPE_OBJECT, indexBuffer, indexBufferOffset, vertexShader->resourceMapping.indexBufferBinding);
renderCommandEncoder->setObjectBytes(&hostIndexType, sizeof(hostIndexType), vertexShader->resourceMapping.indexTypeBinding);
encoderState.m_buffers[METAL_SHADER_TYPE_OBJECT][vertexShader->resourceMapping.indexTypeBinding] = {nullptr};
uint32 verticesPerPrimitive = 0;
switch (primitiveMode)

View File

@ -16,15 +16,16 @@ extern std::atomic_int g_compiled_shaders_async;
RendererShaderMtl::RendererShaderMtl(MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode)
: RendererShader(type, baseHash, auxHash, isGameShader, isGfxPackShader), m_mtlr{mtlRenderer}
{
if (type == ShaderType::kGeometry)
NS::Error* error = nullptr;
MTL::Library* library = m_mtlr->GetDevice()->newLibrary(ToNSString(mslCode), nullptr, &error);
if (error)
{
Compile(mslCode);
}
else
{
// TODO: don't compile just-in-time
m_mslCode = mslCode;
printf("failed to create library (error: %s) -> source:\n%s\n", error->localizedDescription()->utf8String(), mslCode.c_str());
error->release();
return;
}
m_function = library->newFunction(ToNSString("main0"));
library->release();
// Count shader compilation
g_compiled_shaders_total++;
@ -35,205 +36,3 @@ RendererShaderMtl::~RendererShaderMtl()
if (m_function)
m_function->release();
}
void RendererShaderMtl::CompileObjectFunction(const LatteContextRegister& lcr, const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, Renderer::INDEX_TYPE hostIndexType)
{
cemu_assert_debug(m_type == ShaderType::kVertex);
std::string fullCode;
// Vertex buffers
std::string vertexBufferDefinitions = "#define VERTEX_BUFFER_DEFINITIONS ";
std::string vertexBuffers = "#define VERTEX_BUFFERS ";
std::string inputFetchDefinition = "VertexIn fetchInput(thread uint& vid VERTEX_BUFFER_DEFINITIONS) {\n";
// Index buffer
if (hostIndexType != Renderer::INDEX_TYPE::NONE)
{
vertexBufferDefinitions += ", device ";
switch (hostIndexType)
{
case Renderer::INDEX_TYPE::U16:
vertexBufferDefinitions += "ushort";
break;
case Renderer::INDEX_TYPE::U32:
vertexBufferDefinitions += "uint";
break;
default:
cemu_assert_suspicious();
break;
}
vertexBufferDefinitions += fmt::format("* indexBuffer [[buffer({})]]", vertexShader->resourceMapping.indexBufferBinding);
vertexBuffers += ", indexBuffer";
inputFetchDefinition += "vid = indexBuffer[vid];\n";
}
inputFetchDefinition += "VertexIn in;\n";
for (auto& bufferGroup : fetchShader->bufferGroups)
{
std::optional<LatteConst::VertexFetchType2> fetchType;
uint32 bufferIndex = bufferGroup.attributeBufferIndex;
uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7;
uint32 bufferStride = (lcr.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;
for (sint32 j = 0; j < bufferGroup.attribCount; ++j)
{
auto& attr = bufferGroup.attrib[j];
uint32 semanticId = vertexShader->resourceMapping.attributeMapping[attr.semanticId];
if (semanticId == (uint32)-1)
continue; // attribute not used?
std::string formatName;
uint8 componentCount = 0;
switch (GetMtlVertexFormat(attr.format))
{
case MTL::VertexFormatUChar:
formatName = "uchar";
componentCount = 1;
break;
case MTL::VertexFormatUChar2:
formatName = "uchar2";
componentCount = 2;
break;
case MTL::VertexFormatUChar3:
formatName = "uchar3";
componentCount = 3;
break;
case MTL::VertexFormatUChar4:
formatName = "uchar4";
componentCount = 4;
break;
case MTL::VertexFormatUShort:
formatName = "ushort";
componentCount = 1;
break;
case MTL::VertexFormatUShort2:
formatName = "ushort2";
componentCount = 2;
break;
case MTL::VertexFormatUShort3:
formatName = "ushort3";
componentCount = 3;
break;
case MTL::VertexFormatUShort4:
formatName = "ushort4";
componentCount = 4;
break;
case MTL::VertexFormatUInt:
formatName = "uint";
componentCount = 1;
break;
case MTL::VertexFormatUInt2:
formatName = "uint2";
componentCount = 2;
break;
case MTL::VertexFormatUInt3:
formatName = "uint3";
componentCount = 3;
break;
case MTL::VertexFormatUInt4:
formatName = "uint4";
componentCount = 4;
break;
}
// Fetch the attribute
inputFetchDefinition += fmt::format("in.ATTRIBUTE_NAME{} = ", semanticId);
inputFetchDefinition += fmt::format("uint4(*(device {}*)", formatName);
inputFetchDefinition += fmt::format("(vertexBuffer{}", attr.attributeBufferIndex);
inputFetchDefinition += fmt::format(" + vid * {} + {})", bufferStride, attr.offset);
for (uint8 i = 0; i < (4 - componentCount); i++)
inputFetchDefinition += ", 0";
inputFetchDefinition += ");\n";
if (fetchType.has_value())
cemu_assert_debug(fetchType == attr.fetchType);
else
fetchType = attr.fetchType;
if (attr.fetchType == LatteConst::INSTANCE_DATA)
{
cemu_assert_debug(attr.aluDivisor == 1); // other divisor not yet supported
}
}
vertexBufferDefinitions += fmt::format(", device uchar* vertexBuffer{} [[buffer({})]]", bufferIndex, GET_MTL_VERTEX_BUFFER_INDEX(bufferIndex));
vertexBuffers += fmt::format(", vertexBuffer{}", bufferIndex);
}
inputFetchDefinition += "return in;\n";
inputFetchDefinition += "}\n";
fullCode += vertexBufferDefinitions + "\n";
fullCode += vertexBuffers + "\n";
fullCode += m_mslCode;
fullCode += inputFetchDefinition;
Compile(fullCode);
}
void RendererShaderMtl::CompileFragmentFunction(CachedFBOMtl* activeFBO)
{
cemu_assert_debug(m_type == ShaderType::kFragment);
std::string fullCode;
// Define color attachment data types
for (uint8 i = 0; i < 8; i++)
{
const auto& colorBuffer = activeFBO->colorBuffer[i];
if (!colorBuffer.texture)
{
continue;
}
auto dataType = GetMtlPixelFormatInfo(colorBuffer.texture->format, false).dataType;
fullCode += "#define " + GetColorAttachmentTypeStr(i) + " ";
switch (dataType)
{
case MetalDataType::INT:
fullCode += "int4";
break;
case MetalDataType::UINT:
fullCode += "uint4";
break;
case MetalDataType::FLOAT:
fullCode += "float4";
break;
default:
cemu_assert_suspicious();
break;
}
fullCode += "\n";
}
fullCode += m_mslCode;
Compile(fullCode);
}
void RendererShaderMtl::Compile(const std::string& mslCode)
{
if (m_function)
m_function->release();
// HACK
if (m_hasError)
return;
NS::Error* error = nullptr;
MTL::Library* library = m_mtlr->GetDevice()->newLibrary(ToNSString(mslCode), nullptr, &error);
if (error)
{
printf("failed to create library (error: %s) -> source:\n%s\n", error->localizedDescription()->utf8String(), mslCode.c_str());
error->release();
// HACK
m_hasError = true;
return;
}
m_function = library->newFunction(ToNSString("main0"));
library->release();
}

View File

@ -21,14 +21,6 @@ public:
RendererShaderMtl(class MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode);
virtual ~RendererShaderMtl();
void CompileVertexFunction()
{
Compile(m_mslCode);
}
void CompileObjectFunction(const LatteContextRegister& lcr, const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, Renderer::INDEX_TYPE hostIndexType);
void CompileFragmentFunction(CachedFBOMtl* activeFBO);
MTL::Function* GetFunction() const
{
return m_function;
@ -60,11 +52,5 @@ private:
MTL::Function* m_function = nullptr;
std::vector<uint8> m_binary;
std::string m_mslCode;
// HACK
bool m_hasError = false;
void Compile(const std::string& mslCode);
};