Latte: Optimize uniform register array size for known shaders

This commit is contained in:
Exzap 2024-02-19 12:07:03 +01:00
parent 96bbd3bd25
commit 72ce4838ea
4 changed files with 15 additions and 11 deletions

View File

@ -652,7 +652,7 @@ LatteDecompilerShader* LatteShader_CreateShaderFromDecompilerOutput(LatteDecompi
} }
else else
{ {
shader->uniform.count_uniformRegister = decompilerOutput.uniformOffsetsVK.count_uniformRegister; shader->uniform.count_uniformRegister = decompilerOutput.uniformOffsetsGL.count_uniformRegister;
} }
// calculate aux hash // calculate aux hash
if (calculateAuxHash) if (calculateAuxHash)

View File

@ -787,7 +787,7 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD
continue; continue;
LatteDecompilerShader::QuickBufferEntry entry; LatteDecompilerShader::QuickBufferEntry entry;
entry.index = i; entry.index = i;
entry.size = shaderContext->analyzer.uniformBufferAccessTracker[i].DetermineSize(LATTE_GLSL_DYNAMIC_UNIFORM_BLOCK_SIZE) * 16; entry.size = shaderContext->analyzer.uniformBufferAccessTracker[i].DetermineSize(shaderContext->shaderBaseHash, LATTE_GLSL_DYNAMIC_UNIFORM_BLOCK_SIZE) * 16;
shader->list_quickBufferList.push_back(entry); shader->list_quickBufferList.push_back(entry);
} }
// get dimension of each used texture // get dimension of each used texture

View File

@ -37,7 +37,7 @@ namespace LatteDecompiler
} }
else if (decompilerContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CFILE) else if (decompilerContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CFILE)
{ {
uint32 cfileSize = decompilerContext->analyzer.uniformRegisterAccessTracker.DetermineSize(256); uint32 cfileSize = decompilerContext->analyzer.uniformRegisterAccessTracker.DetermineSize(decompilerContext->shaderBaseHash, 256);
// full or partial uniform register file has to be present // full or partial uniform register file has to be present
if (shaderType == LatteConst::ShaderType::Vertex) if (shaderType == LatteConst::ShaderType::Vertex)
shaderSrc->addFmt("uniform ivec4 uf_uniformRegisterVS[{}];" _CRLF, cfileSize); shaderSrc->addFmt("uniform ivec4 uf_uniformRegisterVS[{}];" _CRLF, cfileSize);
@ -156,7 +156,7 @@ namespace LatteDecompiler
shaderSrc->addFmt("uniform {}{}" _CRLF, _getShaderUniformBlockInterfaceName(decompilerContext->shaderType), i); shaderSrc->addFmt("uniform {}{}" _CRLF, _getShaderUniformBlockInterfaceName(decompilerContext->shaderType), i);
shaderSrc->add("{" _CRLF); shaderSrc->add("{" _CRLF);
shaderSrc->addFmt("vec4 {}{}[{}];" _CRLF, _getShaderUniformBlockVariableName(decompilerContext->shaderType), i, decompilerContext->analyzer.uniformBufferAccessTracker[i].DetermineSize(LATTE_GLSL_DYNAMIC_UNIFORM_BLOCK_SIZE)); shaderSrc->addFmt("vec4 {}{}[{}];" _CRLF, _getShaderUniformBlockVariableName(decompilerContext->shaderType), i, decompilerContext->analyzer.uniformBufferAccessTracker[i].DetermineSize(decompilerContext->shaderBaseHash, LATTE_GLSL_DYNAMIC_UNIFORM_BLOCK_SIZE));
shaderSrc->add("};" _CRLF _CRLF); shaderSrc->add("};" _CRLF _CRLF);
shaderSrc->add(_CRLF); shaderSrc->add(_CRLF);
} }

View File

@ -157,19 +157,23 @@ struct LatteDecompilerBufferAccessTracker
} }
} }
sint32 DetermineSize(sint32 maximumSize) const sint32 DetermineSize(uint64 shaderBaseHash, sint32 maximumSize) const
{ {
// here we try to predict the accessed range so we dont have to upload the whole buffer // here we try to predict the accessed byte range so we dont have to upload the whole buffer
// potential risky optimization: assume that if there is a fixed-index access on an index higher than any other non-zero relative accesses, it bounds the prior relative access // if no bound can be determined then return maximumSize
// for some known shaders we use hand-tuned values instead of the maximumSize fallback value that those shaders would normally use
if(shaderBaseHash == 0x8ff56afdf1a2f837) // XCX text rendering
return 24;
if(shaderBaseHash == 0x37b9100c1310d3bb) // BotW UI backdrops 1
return 24;
if(shaderBaseHash == 0xf7ba548c1fefe24a) // BotW UI backdrops 2
return 30;
sint32 highestAccessIndex = -1; sint32 highestAccessIndex = -1;
if(hasStaticIndexAccess) if(hasStaticIndexAccess)
{
highestAccessIndex = highestAccessStaticIndex; highestAccessIndex = highestAccessStaticIndex;
}
if(hasDynamicIndexAccess) if(hasDynamicIndexAccess)
{
return maximumSize; // dynamic index exists and no bound can be determined return maximumSize; // dynamic index exists and no bound can be determined
}
if (highestAccessIndex < 0) if (highestAccessIndex < 0)
return 1; // no access at all? But avoid zero as a size return 1; // no access at all? But avoid zero as a size
return highestAccessIndex + 1; return highestAccessIndex + 1;