mirror of
https://github.com/cemu-project/Cemu.git
synced 2025-01-07 15:48:15 +01:00
commit
5e50592937
@ -146,6 +146,23 @@ void LatteFetchShader::CalculateFetchShaderVkHash()
|
||||
this->vkPipelineHashFragment = h;
|
||||
}
|
||||
|
||||
void LatteFetchShader::CalculateFetchShaderMtlObjectShaderHash(uint32* contextRegister)
|
||||
{uint64 key = 0;
|
||||
for (sint32 g = 0; g < bufferGroups.size(); g++)
|
||||
{
|
||||
LatteParsedFetchShaderBufferGroup_t& group = bufferGroups[g];
|
||||
uint32 bufferIndex = group.attributeBufferIndex;
|
||||
uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7;
|
||||
uint32 bufferStride = (contextRegister[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;
|
||||
|
||||
key += (uint64)bufferIndex;
|
||||
key = std::rotl<uint64>(key, 5);
|
||||
key += (uint64)bufferStride;
|
||||
key = std::rotl<uint64>(key, 5);
|
||||
}
|
||||
mtlShaderHashObject = key;
|
||||
}
|
||||
|
||||
void _fetchShaderDecompiler_parseInstruction_VTX_SEMANTIC(LatteFetchShader* parsedFetchShader, uint32* contextRegister, const LatteClauseInstruction_VTX* instr)
|
||||
{
|
||||
uint32 semanticId = instr->getFieldSEM_SEMANTIC_ID(); // location (attribute index inside shader)
|
||||
@ -161,7 +178,7 @@ void _fetchShaderDecompiler_parseInstruction_VTX_SEMANTIC(LatteFetchShader* pars
|
||||
auto nfa = instr->getField_NUM_FORMAT_ALL();
|
||||
bool isSigned = instr->getField_FORMAT_COMP_ALL() == LatteClauseInstruction_VTX::FORMAT_COMP::COMP_SIGNED;
|
||||
auto endianSwap = instr->getField_ENDIAN_SWAP();
|
||||
|
||||
|
||||
// get buffer
|
||||
cemu_assert_debug(bufferId >= 0xA0 && bufferId < 0xB0);
|
||||
uint32 bufferIndex = (bufferId - 0xA0);
|
||||
@ -316,7 +333,7 @@ LatteFetchShader* LatteShaderRecompiler_createFetchShader(LatteFetchShader::Cach
|
||||
// {0x00000002, 0x01800c00, 0x00000000, 0x8a000000, 0x2c00a001, 0x2c151000, 0x000a0000, ...} // size 0x50
|
||||
// {0x00000002, 0x01801000, 0x00000000, 0x8a000000, 0x1c00a001, 0x280d1000, 0x00090000, ...} // size 0x60
|
||||
// {0x00000002, 0x01801c00, 0x00000000, 0x8a000000, 0x1c00a001, 0x280d1000, 0x00090000, ...} // size 0x90
|
||||
|
||||
|
||||
// our new implementation:
|
||||
// {0x00000002, 0x01800400, 0x00000000, 0x8a000000, 0x0000a001, 0x2c151000, 0x00020000, ...}
|
||||
|
||||
@ -328,6 +345,7 @@ LatteFetchShader* LatteShaderRecompiler_createFetchShader(LatteFetchShader::Cach
|
||||
// these only make sense when vertex shader does not call FS?
|
||||
LatteShader_calculateFSKey(newFetchShader);
|
||||
newFetchShader->CalculateFetchShaderVkHash();
|
||||
newFetchShader->CalculateFetchShaderMtlObjectShaderHash(contextRegister);
|
||||
return newFetchShader;
|
||||
}
|
||||
|
||||
@ -387,6 +405,7 @@ LatteFetchShader* LatteShaderRecompiler_createFetchShader(LatteFetchShader::Cach
|
||||
}
|
||||
LatteShader_calculateFSKey(newFetchShader);
|
||||
newFetchShader->CalculateFetchShaderVkHash();
|
||||
newFetchShader->CalculateFetchShaderMtlObjectShaderHash(contextRegister);
|
||||
|
||||
// register in cache
|
||||
// its possible that during multi-threaded shader cache loading, two identical (same hash) fetch shaders get created simultaneously
|
||||
@ -411,7 +430,7 @@ LatteFetchShader::~LatteFetchShader()
|
||||
UnregisterInCache();
|
||||
}
|
||||
|
||||
struct FetchShaderLookupInfo
|
||||
struct FetchShaderLookupInfo
|
||||
{
|
||||
LatteFetchShader* fetchShader;
|
||||
uint32 programSize;
|
||||
|
@ -46,6 +46,9 @@ struct LatteFetchShader
|
||||
// Vulkan
|
||||
uint64 vkPipelineHashFragment{}; // hash of all fetch shader state that influences the Vulkan graphics pipeline
|
||||
|
||||
// Metal
|
||||
uint64 mtlShaderHashObject{};
|
||||
|
||||
// cache info
|
||||
CacheHash m_cacheHash{};
|
||||
bool m_isRegistered{}; // if true, fetch shader is referenced by cache (RegisterInCache() succeeded)
|
||||
@ -53,6 +56,8 @@ struct LatteFetchShader
|
||||
|
||||
void CalculateFetchShaderVkHash();
|
||||
|
||||
void CalculateFetchShaderMtlObjectShaderHash(uint32* contextRegister);
|
||||
|
||||
uint64 getVkPipelineHashFragment() const { return vkPipelineHashFragment; };
|
||||
|
||||
static bool isValidBufferIndex(const uint32 index) { return index < 0x10; };
|
||||
@ -69,4 +74,4 @@ struct LatteFetchShader
|
||||
static std::unordered_map<CacheHash, LatteFetchShader*> s_fetchShaderByHash;
|
||||
};
|
||||
|
||||
LatteFetchShader* LatteShaderRecompiler_createFetchShader(LatteFetchShader::CacheHash fsHash, uint32* contextRegister, uint32* fsProgramCode, uint32 fsProgramSize);
|
||||
LatteFetchShader* LatteShaderRecompiler_createFetchShader(LatteFetchShader::CacheHash fsHash, uint32* contextRegister, uint32* fsProgramCode, uint32 fsProgramSize);
|
||||
|
@ -14,6 +14,9 @@
|
||||
#include "config/ActiveSettings.h"
|
||||
#include "Cafe/GameProfile/GameProfile.h"
|
||||
#include "util/containers/flat_hash_map.hpp"
|
||||
#if BOOST_OS_MACOS
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
|
||||
#endif
|
||||
#include <cinttypes>
|
||||
|
||||
// experimental new decompiler (WIP)
|
||||
@ -498,6 +501,8 @@ void LatteSHRC_UpdateVSBaseHash(uint8* vertexShaderPtr, uint32 vertexShaderSize,
|
||||
uint64 vsHash2 = 0;
|
||||
_calculateShaderProgramHash(vsProgramCode, vertexShaderSize, &hashCacheVS, &vsHash1, &vsHash2);
|
||||
uint64 vsHash = vsHash1 + vsHash2 + _activeFetchShader->key + _activePSImportTable.key + (usesGeometryShader ? 0x1111ULL : 0ULL);
|
||||
if (g_renderer->GetType() == RendererAPI::Metal && usesGeometryShader)
|
||||
vsHash += _activeFetchShader->mtlShaderHashObject;
|
||||
|
||||
uint32 tmp = LatteGPUState.contextNew.PA_CL_VTE_CNTL.getRawValue() ^ 0x43F;
|
||||
vsHash += tmp;
|
||||
@ -542,6 +547,20 @@ void LatteSHRC_UpdatePSBaseHash(uint8* pixelShaderPtr, uint32 pixelShaderSize, b
|
||||
_calculateShaderProgramHash(psProgramCode, pixelShaderSize, &hashCachePS, &psHash1, &psHash2);
|
||||
// get vertex shader
|
||||
uint64 psHash = psHash1 + psHash2 + _activePSImportTable.key + (usesGeometryShader ? hashCacheGS.prevHash1 : 0ULL);
|
||||
|
||||
#if BOOST_OS_MACOS
|
||||
if (g_renderer->GetType() == RendererAPI::Metal)
|
||||
{
|
||||
for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++)
|
||||
{
|
||||
auto format = LatteMRT::GetColorBufferFormat(i, LatteGPUState.contextNew);
|
||||
uint8 dataType = (uint8)GetMtlPixelFormatInfo(format, false).dataType;
|
||||
psHash += (uint64)dataType;
|
||||
psHash = std::rotl<uint64>(psHash, 7);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
_shaderBaseHash_ps = psHash;
|
||||
}
|
||||
|
||||
|
@ -65,6 +65,7 @@ struct LatteDecompilerShaderResourceMapping
|
||||
sint8 attributeMapping[LATTE_NUM_MAX_ATTRIBUTE_LOCATIONS];
|
||||
// Metal exclusive
|
||||
sint8 indexBufferBinding{-1};
|
||||
sint8 indexTypeBinding{-1};
|
||||
|
||||
sint32 getTextureCount()
|
||||
{
|
||||
|
@ -1020,4 +1020,5 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD
|
||||
LatteDecompiler::_initUniformBindingPoints(shaderContext);
|
||||
LatteDecompiler::_initAttributeBindingPoints(shaderContext);
|
||||
shaderContext->output->resourceMappingMTL.indexBufferBinding = shaderContext->currentBufferBindingPointMTL++;
|
||||
shaderContext->output->resourceMappingMTL.indexTypeBinding = shaderContext->currentBufferBindingPointMTL++;
|
||||
}
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include "Cafe/HW/Latte/Core/FetchShader.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Renderer.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
|
||||
#include "config/ActiveSettings.h"
|
||||
#include "util/helpers/StringBuf.h"
|
||||
|
||||
@ -3214,11 +3215,13 @@ static void _emitExportCode(LatteDecompilerShaderContext* shaderContext, LatteDe
|
||||
src->add(") == false) discard_fragment();" _CRLF);
|
||||
}
|
||||
// pixel color output
|
||||
src->addFmt("#ifdef {}" _CRLF, GetColorAttachmentTypeStr(pixelColorOutputIndex));
|
||||
src->addFmt("out.passPixelColor{} = as_type<{}>(", pixelColorOutputIndex, GetColorAttachmentTypeStr(pixelColorOutputIndex));
|
||||
_emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, i);
|
||||
src->add(");" _CRLF);
|
||||
src->add("#endif" _CRLF);
|
||||
auto dataType = GetColorBufferDataType(pixelColorOutputIndex, *shaderContext->contextRegistersNew);
|
||||
if (dataType != MetalDataType::NONE)
|
||||
{
|
||||
src->addFmt("out.passPixelColor{} = as_type<{}>(", pixelColorOutputIndex, GetDataTypeStr(dataType));
|
||||
_emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, i);
|
||||
src->add(");" _CRLF);
|
||||
}
|
||||
|
||||
if( cfInstruction->exportArrayBase+i >= 8 )
|
||||
cemu_assert_unimplemented();
|
||||
@ -3881,9 +3884,125 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
|
||||
case LatteConst::ShaderType::Vertex:
|
||||
if (shaderContext->options->usesGeometryShader || isRectVertexShader)
|
||||
{
|
||||
// Defined just-in-time
|
||||
// Will also modify vid in case of an indexed draw
|
||||
src->add("VertexIn fetchInput(thread uint& vid VERTEX_BUFFER_DEFINITIONS);" _CRLF);
|
||||
// TODO: clean this up
|
||||
// fetchVertex will modify vid in case of an indexed draw
|
||||
|
||||
// Vertex buffers
|
||||
std::string vertexBufferDefinitions = "#define VERTEX_BUFFER_DEFINITIONS ";
|
||||
std::string vertexBuffers = "#define VERTEX_BUFFERS ";
|
||||
std::string inputFetchDefinition = "VertexIn fetchVertex(thread uint& vid, device uint* indexBuffer, uint indexType VERTEX_BUFFER_DEFINITIONS) {\n";
|
||||
|
||||
// Index buffer
|
||||
inputFetchDefinition += "if (indexType == 1) // UShort\n";
|
||||
inputFetchDefinition += "vid = ((device ushort*)indexBuffer)[vid];\n";
|
||||
inputFetchDefinition += "else if (indexType == 2)\n";
|
||||
inputFetchDefinition += "vid = ((device uint*)indexBuffer)[vid]; // UInt\n";
|
||||
|
||||
inputFetchDefinition += "VertexIn in;\n";
|
||||
for (auto& bufferGroup : shaderContext->fetchShader->bufferGroups)
|
||||
{
|
||||
std::optional<LatteConst::VertexFetchType2> fetchType;
|
||||
|
||||
uint32 bufferIndex = bufferGroup.attributeBufferIndex;
|
||||
uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7;
|
||||
uint32 bufferStride = (shaderContext->contextRegisters[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;
|
||||
|
||||
for (sint32 j = 0; j < bufferGroup.attribCount; ++j)
|
||||
{
|
||||
auto& attr = bufferGroup.attrib[j];
|
||||
|
||||
uint32 semanticId = shaderContext->output->resourceMappingMTL.attributeMapping[attr.semanticId];
|
||||
if (semanticId == (uint32)-1)
|
||||
continue; // attribute not used?
|
||||
|
||||
std::string formatName;
|
||||
uint8 componentCount = 0;
|
||||
switch (GetMtlVertexFormat(attr.format))
|
||||
{
|
||||
case MTL::VertexFormatUChar:
|
||||
formatName = "uchar";
|
||||
componentCount = 1;
|
||||
break;
|
||||
case MTL::VertexFormatUChar2:
|
||||
formatName = "uchar2";
|
||||
componentCount = 2;
|
||||
break;
|
||||
case MTL::VertexFormatUChar3:
|
||||
formatName = "uchar3";
|
||||
componentCount = 3;
|
||||
break;
|
||||
case MTL::VertexFormatUChar4:
|
||||
formatName = "uchar4";
|
||||
componentCount = 4;
|
||||
break;
|
||||
case MTL::VertexFormatUShort:
|
||||
formatName = "ushort";
|
||||
componentCount = 1;
|
||||
break;
|
||||
case MTL::VertexFormatUShort2:
|
||||
formatName = "ushort2";
|
||||
componentCount = 2;
|
||||
break;
|
||||
case MTL::VertexFormatUShort3:
|
||||
formatName = "ushort3";
|
||||
componentCount = 3;
|
||||
break;
|
||||
case MTL::VertexFormatUShort4:
|
||||
formatName = "ushort4";
|
||||
componentCount = 4;
|
||||
break;
|
||||
case MTL::VertexFormatUInt:
|
||||
formatName = "uint";
|
||||
componentCount = 1;
|
||||
break;
|
||||
case MTL::VertexFormatUInt2:
|
||||
formatName = "uint2";
|
||||
componentCount = 2;
|
||||
break;
|
||||
case MTL::VertexFormatUInt3:
|
||||
formatName = "uint3";
|
||||
componentCount = 3;
|
||||
break;
|
||||
case MTL::VertexFormatUInt4:
|
||||
formatName = "uint4";
|
||||
componentCount = 4;
|
||||
break;
|
||||
}
|
||||
|
||||
// Fetch the attribute
|
||||
inputFetchDefinition += fmt::format("in.ATTRIBUTE_NAME{} = ", semanticId);
|
||||
inputFetchDefinition += fmt::format("uint4(*(device {}*)", formatName);
|
||||
inputFetchDefinition += fmt::format("(vertexBuffer{}", attr.attributeBufferIndex);
|
||||
inputFetchDefinition += fmt::format(" + vid * {} + {})", bufferStride, attr.offset);
|
||||
for (uint8 i = 0; i < (4 - componentCount); i++)
|
||||
inputFetchDefinition += ", 0";
|
||||
inputFetchDefinition += ");\n";
|
||||
|
||||
if (fetchType.has_value())
|
||||
cemu_assert_debug(fetchType == attr.fetchType);
|
||||
else
|
||||
fetchType = attr.fetchType;
|
||||
|
||||
if (attr.fetchType == LatteConst::INSTANCE_DATA)
|
||||
{
|
||||
cemu_assert_debug(attr.aluDivisor == 1); // other divisor not yet supported
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: fetch type
|
||||
|
||||
vertexBufferDefinitions += fmt::format(", device uchar* vertexBuffer{} [[buffer({})]]", bufferIndex, GET_MTL_VERTEX_BUFFER_INDEX(bufferIndex));
|
||||
vertexBuffers += fmt::format(", vertexBuffer{}", bufferIndex);
|
||||
}
|
||||
|
||||
inputFetchDefinition += "return in;\n";
|
||||
inputFetchDefinition += "}\n";
|
||||
|
||||
src->add(vertexBufferDefinitions.c_str());
|
||||
src->add("\n");
|
||||
src->add(vertexBuffers.c_str());
|
||||
src->add("\n");
|
||||
src->add(inputFetchDefinition.c_str());
|
||||
|
||||
functionType = "[[object, max_total_threads_per_threadgroup(VERTICES_PER_VERTEX_PRIMITIVE), max_total_threadgroups_per_mesh_grid(1)]]";
|
||||
outputTypeName = "void";
|
||||
@ -3916,7 +4035,7 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
|
||||
// TODO: don't hardcode the instance index
|
||||
src->add("uint iid = 0;" _CRLF);
|
||||
// Fetch the input
|
||||
src->add("VertexIn in = fetchInput(vid VERTEX_BUFFERS);" _CRLF);
|
||||
src->add("VertexIn in = fetchVertex(vid, indexBuffer, indexType VERTEX_BUFFERS);" _CRLF);
|
||||
// Output is defined as object payload
|
||||
src->add("object_data VertexOut& out = objectPayload.vertexOut[tid];" _CRLF);
|
||||
}
|
||||
|
@ -281,9 +281,11 @@ namespace LatteDecompiler
|
||||
{
|
||||
if ((decompilerContext->shader->pixelColorOutputMask & (1 << i)) != 0)
|
||||
{
|
||||
src->addFmt("#ifdef {}" _CRLF, GetColorAttachmentTypeStr(i));
|
||||
src->addFmt("{} passPixelColor{} [[color({})]];" _CRLF, GetColorAttachmentTypeStr(i), i, i);
|
||||
src->add("#endif" _CRLF);
|
||||
auto dataType = GetColorBufferDataType(i, *decompilerContext->contextRegistersNew);
|
||||
if (dataType != MetalDataType::NONE)
|
||||
{
|
||||
src->addFmt("{} passPixelColor{} [[color({})]];" _CRLF, GetDataTypeStr(dataType), i, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -495,6 +497,10 @@ namespace LatteDecompiler
|
||||
src->add(", mesh_grid_properties meshGridProperties");
|
||||
src->add(", uint tig [[threadgroup_position_in_grid]]");
|
||||
src->add(", uint tid [[thread_index_in_threadgroup]]");
|
||||
// TODO: inly include index buffer if needed
|
||||
src->addFmt(", device uint* indexBuffer [[buffer({})]]", decompilerContext->output->resourceMappingMTL.indexBufferBinding);
|
||||
// TODO: use uchar?
|
||||
src->addFmt(", constant uint& indexType [[buffer({})]]", decompilerContext->output->resourceMappingMTL.indexTypeBinding);
|
||||
src->add(" VERTEX_BUFFER_DEFINITIONS");
|
||||
}
|
||||
else
|
||||
|
@ -78,6 +78,7 @@ LatteTextureMtl::LatteTextureMtl(class MetalRenderer* mtlRenderer, Latte::E_DIM
|
||||
auto pixelFormat = GetMtlPixelFormat(format, isDepth, m_mtlr->GetPixelFormatSupport());
|
||||
desc->setPixelFormat(pixelFormat);
|
||||
|
||||
// TODO: using MTL::TextureUsageShaderWrite as well fixes Mario Tennis: Ultra Smash, investigate why
|
||||
MTL::TextureUsage usage = MTL::TextureUsageShaderRead;
|
||||
if (!Latte::IsCompressedFormat(format))
|
||||
{
|
||||
|
@ -1,4 +1,5 @@
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
|
||||
#include "Cemu/Logging/CemuLogging.h"
|
||||
#include "Common/precompiled.h"
|
||||
#include "Metal/MTLDepthStencil.hpp"
|
||||
#include "Metal/MTLPixelFormat.hpp"
|
||||
@ -84,55 +85,51 @@ std::map<Latte::E_GX2SURFFMT, MetalPixelFormatInfo> MTL_DEPTH_FORMAT_TABLE = {
|
||||
|
||||
const MetalPixelFormatInfo GetMtlPixelFormatInfo(Latte::E_GX2SURFFMT format, bool isDepth)
|
||||
{
|
||||
if (format == Latte::E_GX2SURFFMT::INVALID_FORMAT)
|
||||
{
|
||||
return {MTL::PixelFormatInvalid, MetalDataType::NONE, 0};
|
||||
}
|
||||
|
||||
MetalPixelFormatInfo formatInfo;
|
||||
if (isDepth)
|
||||
formatInfo = MTL_DEPTH_FORMAT_TABLE[format];
|
||||
else
|
||||
formatInfo = MTL_COLOR_FORMAT_TABLE[format];
|
||||
|
||||
// Depth24Unorm_Stencil8 is not supported on Apple sillicon
|
||||
// TODO: query if format is available instead
|
||||
if (formatInfo.pixelFormat == MTL::PixelFormatDepth24Unorm_Stencil8)
|
||||
{
|
||||
formatInfo.pixelFormat = MTL::PixelFormatDepth32Float_Stencil8;
|
||||
}
|
||||
|
||||
if (formatInfo.pixelFormat == MTL::PixelFormatInvalid)
|
||||
{
|
||||
printf("invalid pixel format: %u\n", (uint32)format);
|
||||
}
|
||||
|
||||
return formatInfo;
|
||||
}
|
||||
|
||||
MTL::PixelFormat GetMtlPixelFormat(Latte::E_GX2SURFFMT format, bool isDepth, const MetalPixelFormatSupport& pixelFormatSupport)
|
||||
{
|
||||
auto pixelFormat = GetMtlPixelFormatInfo(format, isDepth).pixelFormat;
|
||||
if (pixelFormat == MTL::PixelFormatInvalid)
|
||||
cemuLog_logDebug(LogType::Force, "invalid pixel format {}\n", pixelFormat);
|
||||
|
||||
if (!pixelFormatSupport.m_supportsR8Unorm_sRGB && pixelFormat == MTL::PixelFormatR8Unorm_sRGB)
|
||||
pixelFormat = MTL::PixelFormatRGBA8Unorm_sRGB;
|
||||
|
||||
if (!pixelFormatSupport.m_supportsRG8Unorm_sRGB && pixelFormat == MTL::PixelFormatRG8Unorm_sRGB)
|
||||
pixelFormat = MTL::PixelFormatRGBA8Unorm_sRGB;
|
||||
|
||||
if (!pixelFormatSupport.m_supportsPacked16BitFormats)
|
||||
switch (pixelFormat)
|
||||
{
|
||||
switch (pixelFormat)
|
||||
{
|
||||
case MTL::PixelFormatB5G6R5Unorm:
|
||||
case MTL::PixelFormatA1BGR5Unorm:
|
||||
case MTL::PixelFormatABGR4Unorm:
|
||||
case MTL::PixelFormatBGR5A1Unorm:
|
||||
pixelFormat = MTL::PixelFormatRGBA8Unorm;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
case MTL::PixelFormatR8Unorm_sRGB:
|
||||
if (!pixelFormatSupport.m_supportsR8Unorm_sRGB)
|
||||
return MTL::PixelFormatRGBA8Unorm_sRGB;
|
||||
break;
|
||||
case MTL::PixelFormatRG8Unorm_sRGB:
|
||||
if (!pixelFormatSupport.m_supportsRG8Unorm_sRGB)
|
||||
return MTL::PixelFormatRGBA8Unorm_sRGB;
|
||||
break;
|
||||
case MTL::PixelFormatB5G6R5Unorm:
|
||||
case MTL::PixelFormatA1BGR5Unorm:
|
||||
case MTL::PixelFormatABGR4Unorm:
|
||||
case MTL::PixelFormatBGR5A1Unorm:
|
||||
if (!pixelFormatSupport.m_supportsPacked16BitFormats)
|
||||
return MTL::PixelFormatRGBA8Unorm;
|
||||
break;
|
||||
case MTL::PixelFormatDepth24Unorm_Stencil8:
|
||||
if (!pixelFormatSupport.m_supportsDepth24Unorm_Stencil8)
|
||||
return MTL::PixelFormatDepth32Float_Stencil8;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (!pixelFormatSupport.m_supportsDepth24Unorm_Stencil8 && pixelFormat == MTL::PixelFormatDepth24Unorm_Stencil8)
|
||||
pixelFormat = MTL::PixelFormatDepth32Float_Stencil8;
|
||||
|
||||
return pixelFormat;
|
||||
}
|
||||
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include "Cafe/HW/Latte/Core/LatteConst.h"
|
||||
//#include "Cafe/HW/Latte/Core/FetchShader.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Renderer.h"
|
||||
#include "Common/precompiled.h"
|
||||
|
||||
struct Uvec2 {
|
||||
uint32 x;
|
||||
@ -32,6 +33,28 @@ const MetalPixelFormatInfo GetMtlPixelFormatInfo(Latte::E_GX2SURFFMT format, boo
|
||||
|
||||
MTL::PixelFormat GetMtlPixelFormat(Latte::E_GX2SURFFMT format, bool isDepth, const MetalPixelFormatSupport& pixelFormatSupport);
|
||||
|
||||
inline MetalDataType GetColorBufferDataType(const uint32 index, const LatteContextRegister& lcr)
|
||||
{
|
||||
auto format = LatteMRT::GetColorBufferFormat(index, lcr);
|
||||
return GetMtlPixelFormatInfo(format, false).dataType;
|
||||
}
|
||||
|
||||
inline const char* GetDataTypeStr(MetalDataType dataType)
|
||||
{
|
||||
switch (dataType)
|
||||
{
|
||||
case MetalDataType::INT:
|
||||
return "int4";
|
||||
case MetalDataType::UINT:
|
||||
return "uint4";
|
||||
case MetalDataType::FLOAT:
|
||||
return "float4";
|
||||
default:
|
||||
cemu_assert_suspicious();
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
size_t GetMtlTextureBytesPerRow(Latte::E_GX2SURFFMT format, bool isDepth, uint32 width);
|
||||
|
||||
size_t GetMtlTextureBytesPerImage(Latte::E_GX2SURFFMT format, bool isDepth, uint32 height, size_t bytesPerRow);
|
||||
|
@ -18,7 +18,7 @@ struct MetalPixelFormatSupport
|
||||
m_supportsR8Unorm_sRGB = device->supportsFamily(MTL::GPUFamilyApple1);
|
||||
m_supportsRG8Unorm_sRGB = device->supportsFamily(MTL::GPUFamilyApple1);
|
||||
m_supportsPacked16BitFormats = device->supportsFamily(MTL::GPUFamilyApple1);
|
||||
m_supportsDepth24Unorm_Stencil8 = device->supportsFamily(MTL::GPUFamilyMac2);
|
||||
m_supportsDepth24Unorm_Stencil8 = device->depth24Stencil8PixelFormatSupported();
|
||||
}
|
||||
};
|
||||
|
||||
@ -41,10 +41,10 @@ inline size_t Align(size_t size, size_t alignment)
|
||||
return (size + alignment - 1) & ~(alignment - 1);
|
||||
}
|
||||
|
||||
inline std::string GetColorAttachmentTypeStr(uint32 index)
|
||||
{
|
||||
return "COLOR_ATTACHMENT" + std::to_string(index) + "_TYPE";
|
||||
}
|
||||
//inline std::string GetColorAttachmentTypeStr(uint32 index)
|
||||
//{
|
||||
// return "COLOR_ATTACHMENT" + std::to_string(index) + "_TYPE";
|
||||
//}
|
||||
|
||||
// Cast from const char* to NS::String*
|
||||
inline NS::String* ToNSString(const char* str)
|
||||
|
@ -1,16 +1,14 @@
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
|
||||
#include "Foundation/NSObject.hpp"
|
||||
#include "HW/Latte/Core/LatteShader.h"
|
||||
#include "HW/Latte/Renderer/Metal/CachedFBOMtl.h"
|
||||
#include "HW/Latte/Renderer/Metal/LatteToMtl.h"
|
||||
#include "HW/Latte/Renderer/Metal/RendererShaderMtl.h"
|
||||
#include "HW/Latte/Renderer/Metal/LatteTextureViewMtl.h"
|
||||
#include "Cafe/HW/Latte/Core/LatteShader.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h"
|
||||
|
||||
#include "HW/Latte/Core/FetchShader.h"
|
||||
#include "HW/Latte/ISA/RegDefines.h"
|
||||
#include "Metal/MTLRenderPipeline.hpp"
|
||||
#include "Cafe/HW/Latte/Core/FetchShader.h"
|
||||
#include "Cafe/HW/Latte/ISA/RegDefines.h"
|
||||
#include "config/ActiveSettings.h"
|
||||
|
||||
static void rectsEmulationGS_outputSingleVertex(std::string& gsSrc, const LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, sint32 vIdx, const LatteContextRegister& latteRegister)
|
||||
@ -366,14 +364,6 @@ MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const Latte
|
||||
|
||||
auto mtlVertexShader = static_cast<RendererShaderMtl*>(vertexShader->shader);
|
||||
auto mtlPixelShader = static_cast<RendererShaderMtl*>(pixelShader->shader);
|
||||
mtlVertexShader->CompileVertexFunction();
|
||||
// HACK
|
||||
if (!mtlVertexShader->GetFunction())
|
||||
{
|
||||
debug_printf("no vertex function, skipping draw\n");
|
||||
return nullptr;
|
||||
}
|
||||
mtlPixelShader->CompileFragmentFunction(lastUsedFBO);
|
||||
|
||||
// Render pipeline state
|
||||
MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init();
|
||||
@ -419,7 +409,6 @@ MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const Latte
|
||||
{
|
||||
debug_printf("error creating render pipeline state: %s\n", error->localizedDescription()->utf8String());
|
||||
error->release();
|
||||
return nullptr;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -475,8 +464,6 @@ MTL::RenderPipelineState* MetalPipelineCache::GetMeshPipelineState(const LatteFe
|
||||
mtlMeshShader = rectsEmulationGS_generate(m_mtlr, vertexShader, lcr);
|
||||
}
|
||||
auto mtlPixelShader = static_cast<RendererShaderMtl*>(pixelShader->shader);
|
||||
mtlObjectShader->CompileObjectFunction(lcr, fetchShader, vertexShader, hostIndexType);
|
||||
mtlPixelShader->CompileFragmentFunction(lastUsedFBO);
|
||||
|
||||
// Render pipeline state
|
||||
MTL::MeshRenderPipelineDescriptor* desc = MTL::MeshRenderPipelineDescriptor::alloc()->init();
|
||||
@ -496,13 +483,12 @@ MTL::RenderPipelineState* MetalPipelineCache::GetMeshPipelineState(const LatteFe
|
||||
desc->setLabel(GetLabel("Mesh pipeline state", desc));
|
||||
#endif
|
||||
pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, MTL::PipelineOptionNone, nullptr, &error);
|
||||
desc->release();
|
||||
if (error)
|
||||
{
|
||||
debug_printf("error creating render pipeline state: %s\n", error->localizedDescription()->utf8String());
|
||||
error->release();
|
||||
return nullptr;
|
||||
}
|
||||
desc->release();
|
||||
|
||||
return pipeline;
|
||||
}
|
||||
|
@ -859,6 +859,7 @@ void MetalRenderer::draw_beginSequence()
|
||||
return; // no render target
|
||||
}
|
||||
|
||||
// TODO: not checking for !streamoutEnable fixes Super Smash Bros. for Wii U, investigate why
|
||||
if (!hasValidFramebufferAttached && !streamoutEnable)
|
||||
{
|
||||
debug_printf("Drawcall with no color buffer or depth buffer attached\n");
|
||||
@ -916,7 +917,7 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
|
||||
LatteDecompilerShader* geometryShader = LatteSHRC_GetActiveGeometryShader();
|
||||
LatteDecompilerShader* pixelShader = LatteSHRC_GetActivePixelShader();
|
||||
// TODO: is this even needed? Also, should go to draw_beginSequence
|
||||
if (!vertexShader)
|
||||
if (!vertexShader || !static_cast<RendererShaderMtl*>(vertexShader->shader)->GetFunction())
|
||||
{
|
||||
printf("no vertex function, skipping draw\n");
|
||||
return;
|
||||
@ -1200,6 +1201,8 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
|
||||
{
|
||||
if (indexBuffer)
|
||||
SetBuffer(renderCommandEncoder, METAL_SHADER_TYPE_OBJECT, indexBuffer, indexBufferOffset, vertexShader->resourceMapping.indexBufferBinding);
|
||||
renderCommandEncoder->setObjectBytes(&hostIndexType, sizeof(hostIndexType), vertexShader->resourceMapping.indexTypeBinding);
|
||||
encoderState.m_buffers[METAL_SHADER_TYPE_OBJECT][vertexShader->resourceMapping.indexTypeBinding] = {nullptr};
|
||||
|
||||
uint32 verticesPerPrimitive = 0;
|
||||
switch (primitiveMode)
|
||||
|
@ -16,15 +16,16 @@ extern std::atomic_int g_compiled_shaders_async;
|
||||
RendererShaderMtl::RendererShaderMtl(MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode)
|
||||
: RendererShader(type, baseHash, auxHash, isGameShader, isGfxPackShader), m_mtlr{mtlRenderer}
|
||||
{
|
||||
if (type == ShaderType::kGeometry)
|
||||
NS::Error* error = nullptr;
|
||||
MTL::Library* library = m_mtlr->GetDevice()->newLibrary(ToNSString(mslCode), nullptr, &error);
|
||||
if (error)
|
||||
{
|
||||
Compile(mslCode);
|
||||
}
|
||||
else
|
||||
{
|
||||
// TODO: don't compile just-in-time
|
||||
m_mslCode = mslCode;
|
||||
printf("failed to create library (error: %s) -> source:\n%s\n", error->localizedDescription()->utf8String(), mslCode.c_str());
|
||||
error->release();
|
||||
return;
|
||||
}
|
||||
m_function = library->newFunction(ToNSString("main0"));
|
||||
library->release();
|
||||
|
||||
// Count shader compilation
|
||||
g_compiled_shaders_total++;
|
||||
@ -35,205 +36,3 @@ RendererShaderMtl::~RendererShaderMtl()
|
||||
if (m_function)
|
||||
m_function->release();
|
||||
}
|
||||
|
||||
void RendererShaderMtl::CompileObjectFunction(const LatteContextRegister& lcr, const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, Renderer::INDEX_TYPE hostIndexType)
|
||||
{
|
||||
cemu_assert_debug(m_type == ShaderType::kVertex);
|
||||
|
||||
std::string fullCode;
|
||||
|
||||
// Vertex buffers
|
||||
std::string vertexBufferDefinitions = "#define VERTEX_BUFFER_DEFINITIONS ";
|
||||
std::string vertexBuffers = "#define VERTEX_BUFFERS ";
|
||||
std::string inputFetchDefinition = "VertexIn fetchInput(thread uint& vid VERTEX_BUFFER_DEFINITIONS) {\n";
|
||||
|
||||
// Index buffer
|
||||
if (hostIndexType != Renderer::INDEX_TYPE::NONE)
|
||||
{
|
||||
vertexBufferDefinitions += ", device ";
|
||||
switch (hostIndexType)
|
||||
{
|
||||
case Renderer::INDEX_TYPE::U16:
|
||||
vertexBufferDefinitions += "ushort";
|
||||
break;
|
||||
case Renderer::INDEX_TYPE::U32:
|
||||
vertexBufferDefinitions += "uint";
|
||||
break;
|
||||
default:
|
||||
cemu_assert_suspicious();
|
||||
break;
|
||||
}
|
||||
|
||||
vertexBufferDefinitions += fmt::format("* indexBuffer [[buffer({})]]", vertexShader->resourceMapping.indexBufferBinding);
|
||||
vertexBuffers += ", indexBuffer";
|
||||
inputFetchDefinition += "vid = indexBuffer[vid];\n";
|
||||
}
|
||||
|
||||
inputFetchDefinition += "VertexIn in;\n";
|
||||
for (auto& bufferGroup : fetchShader->bufferGroups)
|
||||
{
|
||||
std::optional<LatteConst::VertexFetchType2> fetchType;
|
||||
|
||||
uint32 bufferIndex = bufferGroup.attributeBufferIndex;
|
||||
uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7;
|
||||
uint32 bufferStride = (lcr.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;
|
||||
|
||||
for (sint32 j = 0; j < bufferGroup.attribCount; ++j)
|
||||
{
|
||||
auto& attr = bufferGroup.attrib[j];
|
||||
|
||||
uint32 semanticId = vertexShader->resourceMapping.attributeMapping[attr.semanticId];
|
||||
if (semanticId == (uint32)-1)
|
||||
continue; // attribute not used?
|
||||
|
||||
std::string formatName;
|
||||
uint8 componentCount = 0;
|
||||
switch (GetMtlVertexFormat(attr.format))
|
||||
{
|
||||
case MTL::VertexFormatUChar:
|
||||
formatName = "uchar";
|
||||
componentCount = 1;
|
||||
break;
|
||||
case MTL::VertexFormatUChar2:
|
||||
formatName = "uchar2";
|
||||
componentCount = 2;
|
||||
break;
|
||||
case MTL::VertexFormatUChar3:
|
||||
formatName = "uchar3";
|
||||
componentCount = 3;
|
||||
break;
|
||||
case MTL::VertexFormatUChar4:
|
||||
formatName = "uchar4";
|
||||
componentCount = 4;
|
||||
break;
|
||||
case MTL::VertexFormatUShort:
|
||||
formatName = "ushort";
|
||||
componentCount = 1;
|
||||
break;
|
||||
case MTL::VertexFormatUShort2:
|
||||
formatName = "ushort2";
|
||||
componentCount = 2;
|
||||
break;
|
||||
case MTL::VertexFormatUShort3:
|
||||
formatName = "ushort3";
|
||||
componentCount = 3;
|
||||
break;
|
||||
case MTL::VertexFormatUShort4:
|
||||
formatName = "ushort4";
|
||||
componentCount = 4;
|
||||
break;
|
||||
case MTL::VertexFormatUInt:
|
||||
formatName = "uint";
|
||||
componentCount = 1;
|
||||
break;
|
||||
case MTL::VertexFormatUInt2:
|
||||
formatName = "uint2";
|
||||
componentCount = 2;
|
||||
break;
|
||||
case MTL::VertexFormatUInt3:
|
||||
formatName = "uint3";
|
||||
componentCount = 3;
|
||||
break;
|
||||
case MTL::VertexFormatUInt4:
|
||||
formatName = "uint4";
|
||||
componentCount = 4;
|
||||
break;
|
||||
}
|
||||
|
||||
// Fetch the attribute
|
||||
inputFetchDefinition += fmt::format("in.ATTRIBUTE_NAME{} = ", semanticId);
|
||||
inputFetchDefinition += fmt::format("uint4(*(device {}*)", formatName);
|
||||
inputFetchDefinition += fmt::format("(vertexBuffer{}", attr.attributeBufferIndex);
|
||||
inputFetchDefinition += fmt::format(" + vid * {} + {})", bufferStride, attr.offset);
|
||||
for (uint8 i = 0; i < (4 - componentCount); i++)
|
||||
inputFetchDefinition += ", 0";
|
||||
inputFetchDefinition += ");\n";
|
||||
|
||||
if (fetchType.has_value())
|
||||
cemu_assert_debug(fetchType == attr.fetchType);
|
||||
else
|
||||
fetchType = attr.fetchType;
|
||||
|
||||
if (attr.fetchType == LatteConst::INSTANCE_DATA)
|
||||
{
|
||||
cemu_assert_debug(attr.aluDivisor == 1); // other divisor not yet supported
|
||||
}
|
||||
}
|
||||
|
||||
vertexBufferDefinitions += fmt::format(", device uchar* vertexBuffer{} [[buffer({})]]", bufferIndex, GET_MTL_VERTEX_BUFFER_INDEX(bufferIndex));
|
||||
vertexBuffers += fmt::format(", vertexBuffer{}", bufferIndex);
|
||||
}
|
||||
|
||||
inputFetchDefinition += "return in;\n";
|
||||
inputFetchDefinition += "}\n";
|
||||
|
||||
fullCode += vertexBufferDefinitions + "\n";
|
||||
fullCode += vertexBuffers + "\n";
|
||||
fullCode += m_mslCode;
|
||||
fullCode += inputFetchDefinition;
|
||||
|
||||
Compile(fullCode);
|
||||
}
|
||||
|
||||
void RendererShaderMtl::CompileFragmentFunction(CachedFBOMtl* activeFBO)
|
||||
{
|
||||
cemu_assert_debug(m_type == ShaderType::kFragment);
|
||||
|
||||
std::string fullCode;
|
||||
|
||||
// Define color attachment data types
|
||||
for (uint8 i = 0; i < 8; i++)
|
||||
{
|
||||
const auto& colorBuffer = activeFBO->colorBuffer[i];
|
||||
if (!colorBuffer.texture)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
auto dataType = GetMtlPixelFormatInfo(colorBuffer.texture->format, false).dataType;
|
||||
fullCode += "#define " + GetColorAttachmentTypeStr(i) + " ";
|
||||
switch (dataType)
|
||||
{
|
||||
case MetalDataType::INT:
|
||||
fullCode += "int4";
|
||||
break;
|
||||
case MetalDataType::UINT:
|
||||
fullCode += "uint4";
|
||||
break;
|
||||
case MetalDataType::FLOAT:
|
||||
fullCode += "float4";
|
||||
break;
|
||||
default:
|
||||
cemu_assert_suspicious();
|
||||
break;
|
||||
}
|
||||
fullCode += "\n";
|
||||
}
|
||||
|
||||
fullCode += m_mslCode;
|
||||
Compile(fullCode);
|
||||
}
|
||||
|
||||
void RendererShaderMtl::Compile(const std::string& mslCode)
|
||||
{
|
||||
if (m_function)
|
||||
m_function->release();
|
||||
|
||||
// HACK
|
||||
if (m_hasError)
|
||||
return;
|
||||
|
||||
NS::Error* error = nullptr;
|
||||
MTL::Library* library = m_mtlr->GetDevice()->newLibrary(ToNSString(mslCode), nullptr, &error);
|
||||
if (error)
|
||||
{
|
||||
printf("failed to create library (error: %s) -> source:\n%s\n", error->localizedDescription()->utf8String(), mslCode.c_str());
|
||||
error->release();
|
||||
|
||||
// HACK
|
||||
m_hasError = true;
|
||||
|
||||
return;
|
||||
}
|
||||
m_function = library->newFunction(ToNSString("main0"));
|
||||
library->release();
|
||||
}
|
||||
|
@ -21,14 +21,6 @@ public:
|
||||
RendererShaderMtl(class MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode);
|
||||
virtual ~RendererShaderMtl();
|
||||
|
||||
void CompileVertexFunction()
|
||||
{
|
||||
Compile(m_mslCode);
|
||||
}
|
||||
|
||||
void CompileObjectFunction(const LatteContextRegister& lcr, const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, Renderer::INDEX_TYPE hostIndexType);
|
||||
void CompileFragmentFunction(CachedFBOMtl* activeFBO);
|
||||
|
||||
MTL::Function* GetFunction() const
|
||||
{
|
||||
return m_function;
|
||||
@ -60,11 +52,5 @@ private:
|
||||
|
||||
MTL::Function* m_function = nullptr;
|
||||
|
||||
std::vector<uint8> m_binary;
|
||||
std::string m_mslCode;
|
||||
|
||||
// HACK
|
||||
bool m_hasError = false;
|
||||
|
||||
void Compile(const std::string& mslCode);
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user