mirror of
https://github.com/cemu-project/Cemu.git
synced 2024-11-25 18:46:55 +01:00
Latte: Bound uniform buffers based on access patterns within the shader
This commit is contained in:
parent
4d6b72b353
commit
3e925b7707
@ -132,22 +132,18 @@ void LatteBufferCache_syncGPUUniformBuffers(LatteDecompilerShader* shader, const
|
|||||||
{
|
{
|
||||||
if (shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CBANK)
|
if (shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CBANK)
|
||||||
{
|
{
|
||||||
// use full uniform buffers
|
for(const auto& buf : shader->list_quickBufferList)
|
||||||
for (sint32 t = 0; t < shader->uniformBufferListCount; t++)
|
|
||||||
{
|
{
|
||||||
sint32 i = shader->uniformBufferList[t];
|
sint32 i = buf.index;
|
||||||
MPTR physicalAddr = LatteGPUState.contextRegister[uniformBufferRegOffset + i * 7 + 0];
|
MPTR physicalAddr = LatteGPUState.contextRegister[uniformBufferRegOffset + i * 7 + 0];
|
||||||
uint32 uniformSize = LatteGPUState.contextRegister[uniformBufferRegOffset + i * 7 + 1] + 1;
|
uint32 uniformSize = LatteGPUState.contextRegister[uniformBufferRegOffset + i * 7 + 1] + 1;
|
||||||
|
if (physicalAddr == MPTR_NULL) [[unlikely]]
|
||||||
if (physicalAddr == MPTR_NULL)
|
|
||||||
{
|
{
|
||||||
// no data
|
|
||||||
g_renderer->buffer_bindUniformBuffer(shaderType, i, 0, 0);
|
g_renderer->buffer_bindUniformBuffer(shaderType, i, 0, 0);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
uniformSize = std::min<uint32>(uniformSize, buf.size);
|
||||||
uint32 bindOffset = LatteBufferCache_retrieveDataInCache(physicalAddr, uniformSize);
|
uint32 bindOffset = LatteBufferCache_retrieveDataInCache(physicalAddr, uniformSize);
|
||||||
|
|
||||||
g_renderer->buffer_bindUniformBuffer(shaderType, i, bindOffset, uniformSize);
|
g_renderer->buffer_bindUniformBuffer(shaderType, i, bindOffset, uniformSize);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
#include "Cafe/HW/Latte/Core/LatteConst.h"
|
#include "Cafe/HW/Latte/Core/LatteConst.h"
|
||||||
#include "Cafe/HW/Latte/Renderer/RendererShader.h"
|
#include "Cafe/HW/Latte/Renderer/RendererShader.h"
|
||||||
|
#include <boost/container/static_vector.hpp>
|
||||||
|
|
||||||
namespace LatteDecompiler
|
namespace LatteDecompiler
|
||||||
{
|
{
|
||||||
@ -158,11 +159,13 @@ struct LatteDecompilerShader
|
|||||||
struct LatteFetchShader* compatibleFetchShader{};
|
struct LatteFetchShader* compatibleFetchShader{};
|
||||||
// error tracking
|
// error tracking
|
||||||
bool hasError{false}; // if set, the shader cannot be used
|
bool hasError{false}; // if set, the shader cannot be used
|
||||||
// optimized access / iteration
|
// compact resource lists for optimized access
|
||||||
// list of uniform buffers used
|
struct QuickBufferEntry
|
||||||
uint8 uniformBufferList[LATTE_NUM_MAX_UNIFORM_BUFFERS];
|
{
|
||||||
uint8 uniformBufferListCount{ 0 };
|
uint8 index;
|
||||||
// list of used texture units (faster access than iterating textureUnitMask)
|
uint16 size;
|
||||||
|
};
|
||||||
|
boost::container::static_vector<QuickBufferEntry, LATTE_NUM_MAX_UNIFORM_BUFFERS> list_quickBufferList;
|
||||||
uint8 textureUnitList[LATTE_NUM_MAX_TEX_UNITS];
|
uint8 textureUnitList[LATTE_NUM_MAX_TEX_UNITS];
|
||||||
uint8 textureUnitListCount{ 0 };
|
uint8 textureUnitListCount{ 0 };
|
||||||
// input
|
// input
|
||||||
|
@ -230,47 +230,39 @@ void LatteDecompiler_analyzeALUClause(LatteDecompilerShaderContext* shaderContex
|
|||||||
// check input for uniform access
|
// check input for uniform access
|
||||||
if( aluInstruction.sourceOperand[f].sel == 0xFFFFFFFF )
|
if( aluInstruction.sourceOperand[f].sel == 0xFFFFFFFF )
|
||||||
continue; // source operand not set/used
|
continue; // source operand not set/used
|
||||||
|
// about uniform register and buffer access tracking:
|
||||||
|
// for absolute indices we can determine a maximum size that is accessed
|
||||||
|
// relative accesses are tricky because the upper bound of accessed indices is unknown
|
||||||
|
// worst case we have to load the full file (256 * 16 byte entries) or for buffers an arbitrary upper bound (64KB in our case)
|
||||||
if( GPU7_ALU_SRC_IS_CFILE(aluInstruction.sourceOperand[f].sel) )
|
if( GPU7_ALU_SRC_IS_CFILE(aluInstruction.sourceOperand[f].sel) )
|
||||||
{
|
{
|
||||||
// uniform register access
|
|
||||||
|
|
||||||
// relative register file accesses are tricky because the range of possible indices is unknown
|
|
||||||
// worst case we have to load the full file (256 * 16 byte entries)
|
|
||||||
// by tracking the accessed base indices the shader analyzer can determine bounds for the potentially accessed ranges
|
|
||||||
|
|
||||||
shaderContext->analyzer.uniformRegisterAccess = true;
|
|
||||||
if (aluInstruction.sourceOperand[f].rel)
|
if (aluInstruction.sourceOperand[f].rel)
|
||||||
{
|
{
|
||||||
shaderContext->analyzer.uniformRegisterDynamicAccess = true;
|
shaderContext->analyzer.uniformRegisterAccessTracker.TrackAccess(GPU7_ALU_SRC_GET_CFILE_INDEX(aluInstruction.sourceOperand[f].sel), true);
|
||||||
shaderContext->analyzer.uniformRegisterAccessIndices.emplace_back(GPU7_ALU_SRC_GET_CFILE_INDEX(aluInstruction.sourceOperand[f].sel), true);
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
_remapUniformAccess(shaderContext, true, 0, GPU7_ALU_SRC_GET_CFILE_INDEX(aluInstruction.sourceOperand[f].sel));
|
_remapUniformAccess(shaderContext, true, 0, GPU7_ALU_SRC_GET_CFILE_INDEX(aluInstruction.sourceOperand[f].sel));
|
||||||
shaderContext->analyzer.uniformRegisterAccessIndices.emplace_back(GPU7_ALU_SRC_GET_CFILE_INDEX(aluInstruction.sourceOperand[f].sel), false);
|
shaderContext->analyzer.uniformRegisterAccessTracker.TrackAccess(GPU7_ALU_SRC_GET_CFILE_INDEX(aluInstruction.sourceOperand[f].sel), false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if( GPU7_ALU_SRC_IS_CBANK0(aluInstruction.sourceOperand[f].sel) )
|
else if( GPU7_ALU_SRC_IS_CBANK0(aluInstruction.sourceOperand[f].sel) )
|
||||||
{
|
{
|
||||||
// uniform bank 0 (uniform buffer with index cfInstruction->cBank0Index)
|
// uniform bank 0 (uniform buffer with index cfInstruction->cBank0Index)
|
||||||
uint32 uniformBufferIndex = cfInstruction->cBank0Index;
|
uint32 uniformBufferIndex = cfInstruction->cBank0Index;
|
||||||
if( uniformBufferIndex >= LATTE_NUM_MAX_UNIFORM_BUFFERS)
|
cemu_assert(uniformBufferIndex < LATTE_NUM_MAX_UNIFORM_BUFFERS);
|
||||||
debugBreakpoint();
|
uint32 offset = GPU7_ALU_SRC_GET_CBANK0_INDEX(aluInstruction.sourceOperand[f].sel)+cfInstruction->cBank0AddrBase;
|
||||||
shaderContext->analyzer.uniformBufferAccessMask |= (1<<uniformBufferIndex);
|
_remapUniformAccess(shaderContext, false, uniformBufferIndex, offset);
|
||||||
if( aluInstruction.sourceOperand[f].rel )
|
shaderContext->analyzer.uniformBufferAccessTracker[uniformBufferIndex].TrackAccess(offset, aluInstruction.sourceOperand[f].rel);
|
||||||
shaderContext->analyzer.uniformBufferDynamicAccessMask |= (1<<uniformBufferIndex);
|
|
||||||
_remapUniformAccess(shaderContext, false, uniformBufferIndex, GPU7_ALU_SRC_GET_CBANK0_INDEX(aluInstruction.sourceOperand[f].sel)+cfInstruction->cBank0AddrBase);
|
|
||||||
}
|
}
|
||||||
else if( GPU7_ALU_SRC_IS_CBANK1(aluInstruction.sourceOperand[f].sel) )
|
else if( GPU7_ALU_SRC_IS_CBANK1(aluInstruction.sourceOperand[f].sel) )
|
||||||
{
|
{
|
||||||
// uniform bank 1 (uniform buffer with index cfInstruction->cBank1Index)
|
// uniform bank 1 (uniform buffer with index cfInstruction->cBank1Index)
|
||||||
uint32 uniformBufferIndex = cfInstruction->cBank1Index;
|
uint32 uniformBufferIndex = cfInstruction->cBank1Index;
|
||||||
if( uniformBufferIndex >= LATTE_NUM_MAX_UNIFORM_BUFFERS)
|
cemu_assert(uniformBufferIndex < LATTE_NUM_MAX_UNIFORM_BUFFERS);
|
||||||
debugBreakpoint();
|
uint32 offset = GPU7_ALU_SRC_GET_CBANK1_INDEX(aluInstruction.sourceOperand[f].sel)+cfInstruction->cBank1AddrBase;
|
||||||
shaderContext->analyzer.uniformBufferAccessMask |= (1<<uniformBufferIndex);
|
_remapUniformAccess(shaderContext, false, uniformBufferIndex, offset);
|
||||||
if( aluInstruction.sourceOperand[f].rel )
|
shaderContext->analyzer.uniformBufferAccessTracker[uniformBufferIndex].TrackAccess(offset, aluInstruction.sourceOperand[f].rel);
|
||||||
shaderContext->analyzer.uniformBufferDynamicAccessMask |= (1<<uniformBufferIndex);
|
|
||||||
_remapUniformAccess(shaderContext, false, uniformBufferIndex, GPU7_ALU_SRC_GET_CBANK1_INDEX(aluInstruction.sourceOperand[f].sel)+cfInstruction->cBank1AddrBase);
|
|
||||||
}
|
}
|
||||||
else if( GPU7_ALU_SRC_IS_GPR(aluInstruction.sourceOperand[f].sel) )
|
else if( GPU7_ALU_SRC_IS_GPR(aluInstruction.sourceOperand[f].sel) )
|
||||||
{
|
{
|
||||||
@ -360,8 +352,7 @@ void LatteDecompiler_analyzeTEXClause(LatteDecompilerShaderContext* shaderContex
|
|||||||
if( texInstruction.textureFetch.textureIndex >= 0x80 && texInstruction.textureFetch.textureIndex <= 0x8F )
|
if( texInstruction.textureFetch.textureIndex >= 0x80 && texInstruction.textureFetch.textureIndex <= 0x8F )
|
||||||
{
|
{
|
||||||
uint32 uniformBufferIndex = texInstruction.textureFetch.textureIndex - 0x80;
|
uint32 uniformBufferIndex = texInstruction.textureFetch.textureIndex - 0x80;
|
||||||
shaderContext->analyzer.uniformBufferAccessMask |= (1<<uniformBufferIndex);
|
shaderContext->analyzer.uniformBufferAccessTracker[uniformBufferIndex].TrackAccess(0, true);
|
||||||
shaderContext->analyzer.uniformBufferDynamicAccessMask |= (1<<uniformBufferIndex);
|
|
||||||
}
|
}
|
||||||
else if( texInstruction.textureFetch.textureIndex == 0x9F && shader->shaderType == LatteConst::ShaderType::Geometry )
|
else if( texInstruction.textureFetch.textureIndex == 0x9F && shader->shaderType == LatteConst::ShaderType::Geometry )
|
||||||
{
|
{
|
||||||
@ -576,7 +567,7 @@ namespace LatteDecompiler
|
|||||||
// for Vulkan we use consecutive indices
|
// for Vulkan we use consecutive indices
|
||||||
for (uint32 i = 0; i < LATTE_NUM_MAX_UNIFORM_BUFFERS; i++)
|
for (uint32 i = 0; i < LATTE_NUM_MAX_UNIFORM_BUFFERS; i++)
|
||||||
{
|
{
|
||||||
if ((decompilerContext->analyzer.uniformBufferAccessMask&(1 << i)) == 0)
|
if (!decompilerContext->analyzer.uniformBufferAccessTracker[i].HasAccess())
|
||||||
continue;
|
continue;
|
||||||
sint32 uniformBindingPoint = i;
|
sint32 uniformBindingPoint = i;
|
||||||
if (decompilerContext->shaderType == LatteConst::ShaderType::Geometry)
|
if (decompilerContext->shaderType == LatteConst::ShaderType::Geometry)
|
||||||
@ -592,7 +583,7 @@ namespace LatteDecompiler
|
|||||||
// for OpenGL we use the relative buffer index
|
// for OpenGL we use the relative buffer index
|
||||||
for (uint32 i = 0; i < LATTE_NUM_MAX_UNIFORM_BUFFERS; i++)
|
for (uint32 i = 0; i < LATTE_NUM_MAX_UNIFORM_BUFFERS; i++)
|
||||||
{
|
{
|
||||||
if ((decompilerContext->analyzer.uniformBufferAccessMask&(1 << i)) == 0)
|
if (!decompilerContext->analyzer.uniformBufferAccessTracker[i].HasAccess())
|
||||||
continue;
|
continue;
|
||||||
sint32 uniformBindingPoint = i;
|
sint32 uniformBindingPoint = i;
|
||||||
if (decompilerContext->shaderType == LatteConst::ShaderType::Geometry)
|
if (decompilerContext->shaderType == LatteConst::ShaderType::Geometry)
|
||||||
@ -765,17 +756,24 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD
|
|||||||
LatteDecompiler_analyzeSubroutine(shaderContext, subroutineAddr);
|
LatteDecompiler_analyzeSubroutine(shaderContext, subroutineAddr);
|
||||||
}
|
}
|
||||||
// decide which uniform mode to use
|
// decide which uniform mode to use
|
||||||
if(shaderContext->analyzer.uniformBufferAccessMask != 0 && shaderContext->analyzer.uniformRegisterAccess )
|
bool hasAnyDynamicBufferAccess = false;
|
||||||
debugBreakpoint(); // not allowed
|
bool hasAnyBufferAccess = false;
|
||||||
if(shaderContext->analyzer.uniformBufferDynamicAccessMask != 0 )
|
for(auto& it : shaderContext->analyzer.uniformBufferAccessTracker)
|
||||||
|
{
|
||||||
|
if( it.HasRelativeAccess() )
|
||||||
|
hasAnyDynamicBufferAccess = true;
|
||||||
|
if( it.HasAccess() )
|
||||||
|
hasAnyBufferAccess = true;
|
||||||
|
}
|
||||||
|
if (hasAnyDynamicBufferAccess)
|
||||||
{
|
{
|
||||||
shader->uniformMode = LATTE_DECOMPILER_UNIFORM_MODE_FULL_CBANK;
|
shader->uniformMode = LATTE_DECOMPILER_UNIFORM_MODE_FULL_CBANK;
|
||||||
}
|
}
|
||||||
else if(shaderContext->analyzer.uniformRegisterDynamicAccess )
|
else if(shaderContext->analyzer.uniformRegisterAccessTracker.HasRelativeAccess() )
|
||||||
{
|
{
|
||||||
shader->uniformMode = LATTE_DECOMPILER_UNIFORM_MODE_FULL_CFILE;
|
shader->uniformMode = LATTE_DECOMPILER_UNIFORM_MODE_FULL_CFILE;
|
||||||
}
|
}
|
||||||
else if(shaderContext->analyzer.uniformBufferAccessMask != 0 || shaderContext->analyzer.uniformRegisterAccess != 0 )
|
else if(hasAnyBufferAccess || shaderContext->analyzer.uniformRegisterAccessTracker.HasAccess() )
|
||||||
{
|
{
|
||||||
shader->uniformMode = LATTE_DECOMPILER_UNIFORM_MODE_REMAPPED;
|
shader->uniformMode = LATTE_DECOMPILER_UNIFORM_MODE_REMAPPED;
|
||||||
}
|
}
|
||||||
@ -783,16 +781,18 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD
|
|||||||
{
|
{
|
||||||
shader->uniformMode = LATTE_DECOMPILER_UNIFORM_MODE_NONE;
|
shader->uniformMode = LATTE_DECOMPILER_UNIFORM_MODE_NONE;
|
||||||
}
|
}
|
||||||
// generate list of uniform buffers based on uniformBufferAccessMask (for faster access)
|
// generate compact list of uniform buffers (for faster access)
|
||||||
shader->uniformBufferListCount = 0;
|
cemu_assert_debug(shader->list_quickBufferList.empty());
|
||||||
for (uint32 i = 0; i < LATTE_NUM_MAX_UNIFORM_BUFFERS; i++)
|
for (uint32 i = 0; i < LATTE_NUM_MAX_UNIFORM_BUFFERS; i++)
|
||||||
{
|
{
|
||||||
if( !HAS_FLAG(shaderContext->analyzer.uniformBufferAccessMask, (1<<i)) )
|
if( !shaderContext->analyzer.uniformBufferAccessTracker[i].HasAccess() )
|
||||||
continue;
|
continue;
|
||||||
shader->uniformBufferList[shader->uniformBufferListCount] = i;
|
LatteDecompilerShader::QuickBufferEntry entry;
|
||||||
shader->uniformBufferListCount++;
|
entry.index = i;
|
||||||
|
entry.size = shaderContext->analyzer.uniformBufferAccessTracker[i].DetermineSize(LATTE_GLSL_DYNAMIC_UNIFORM_BLOCK_SIZE) * 16;
|
||||||
|
shader->list_quickBufferList.push_back(entry);
|
||||||
}
|
}
|
||||||
// get dimension of each used textures
|
// get dimension of each used texture
|
||||||
_LatteRegisterSetTextureUnit* texRegs = nullptr;
|
_LatteRegisterSetTextureUnit* texRegs = nullptr;
|
||||||
if( shader->shaderType == LatteConst::ShaderType::Vertex )
|
if( shader->shaderType == LatteConst::ShaderType::Vertex )
|
||||||
texRegs = shaderContext->contextRegistersNew->SQ_TEX_START_VS;
|
texRegs = shaderContext->contextRegistersNew->SQ_TEX_START_VS;
|
||||||
|
@ -37,36 +37,14 @@ namespace LatteDecompiler
|
|||||||
}
|
}
|
||||||
else if (decompilerContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CFILE)
|
else if (decompilerContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CFILE)
|
||||||
{
|
{
|
||||||
// here we try to predict the accessed range so we dont have to upload the whole register file
|
uint32 cfileSize = decompilerContext->analyzer.uniformRegisterAccessTracker.DetermineSize(256);
|
||||||
// we assume that if there is a fixed-index access on an index higher than a relative access, it bounds the prior relative access
|
// full or partial uniform register file has to be present
|
||||||
sint16 highestAccessIndex = -1;
|
|
||||||
bool highestAccessIndexIsRel = false;
|
|
||||||
for(auto& accessItr : decompilerContext->analyzer.uniformRegisterAccessIndices)
|
|
||||||
{
|
|
||||||
if (accessItr.index > highestAccessIndex || (accessItr.index == highestAccessIndex && accessItr.isRelative && !highestAccessIndexIsRel))
|
|
||||||
{
|
|
||||||
highestAccessIndex = accessItr.index;
|
|
||||||
highestAccessIndexIsRel = accessItr.isRelative;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (highestAccessIndex < 0)
|
|
||||||
highestAccessIndex = 0;
|
|
||||||
|
|
||||||
uint32 cfileSize;
|
|
||||||
if (highestAccessIndexIsRel)
|
|
||||||
cfileSize = 256;
|
|
||||||
else
|
|
||||||
cfileSize = highestAccessIndex + 1;
|
|
||||||
|
|
||||||
// full uniform register file has to be present
|
|
||||||
if (shaderType == LatteConst::ShaderType::Vertex)
|
if (shaderType == LatteConst::ShaderType::Vertex)
|
||||||
shaderSrc->addFmt("uniform ivec4 uf_uniformRegisterVS[{}];" _CRLF, cfileSize);
|
shaderSrc->addFmt("uniform ivec4 uf_uniformRegisterVS[{}];" _CRLF, cfileSize);
|
||||||
else if (shaderType == LatteConst::ShaderType::Pixel)
|
else if (shaderType == LatteConst::ShaderType::Pixel)
|
||||||
shaderSrc->addFmt("uniform ivec4 uf_uniformRegisterPS[{}];" _CRLF, cfileSize);
|
shaderSrc->addFmt("uniform ivec4 uf_uniformRegisterPS[{}];" _CRLF, cfileSize);
|
||||||
else if (shaderType == LatteConst::ShaderType::Geometry)
|
else if (shaderType == LatteConst::ShaderType::Geometry)
|
||||||
shaderSrc->addFmt("uniform ivec4 uf_uniformRegisterGS[{}];" _CRLF, cfileSize);
|
shaderSrc->addFmt("uniform ivec4 uf_uniformRegisterGS[{}];" _CRLF, cfileSize);
|
||||||
else
|
|
||||||
debugBreakpoint();
|
|
||||||
uniformOffsets.offset_uniformRegister = uniformCurrentOffset;
|
uniformOffsets.offset_uniformRegister = uniformCurrentOffset;
|
||||||
uniformOffsets.count_uniformRegister = cfileSize;
|
uniformOffsets.count_uniformRegister = cfileSize;
|
||||||
uniformCurrentOffset += 16 * cfileSize;
|
uniformCurrentOffset += 16 * cfileSize;
|
||||||
@ -168,7 +146,7 @@ namespace LatteDecompiler
|
|||||||
{
|
{
|
||||||
for (uint32 i = 0; i < LATTE_NUM_MAX_UNIFORM_BUFFERS; i++)
|
for (uint32 i = 0; i < LATTE_NUM_MAX_UNIFORM_BUFFERS; i++)
|
||||||
{
|
{
|
||||||
if ((decompilerContext->analyzer.uniformBufferAccessMask&(1 << i)) == 0)
|
if (!decompilerContext->analyzer.uniformBufferAccessTracker[i].HasAccess())
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
cemu_assert_debug(decompilerContext->output->resourceMappingGL.uniformBuffersBindingPoint[i] >= 0);
|
cemu_assert_debug(decompilerContext->output->resourceMappingGL.uniformBuffersBindingPoint[i] >= 0);
|
||||||
@ -178,7 +156,7 @@ namespace LatteDecompiler
|
|||||||
|
|
||||||
shaderSrc->addFmt("uniform {}{}" _CRLF, _getShaderUniformBlockInterfaceName(decompilerContext->shaderType), i);
|
shaderSrc->addFmt("uniform {}{}" _CRLF, _getShaderUniformBlockInterfaceName(decompilerContext->shaderType), i);
|
||||||
shaderSrc->add("{" _CRLF);
|
shaderSrc->add("{" _CRLF);
|
||||||
shaderSrc->addFmt("vec4 {}{}[{}];" _CRLF, _getShaderUniformBlockVariableName(decompilerContext->shaderType), i, LATTE_GLSL_DYNAMIC_UNIFORM_BLOCK_SIZE);
|
shaderSrc->addFmt("vec4 {}{}[{}];" _CRLF, _getShaderUniformBlockVariableName(decompilerContext->shaderType), i, decompilerContext->analyzer.uniformBufferAccessTracker[i].DetermineSize(LATTE_GLSL_DYNAMIC_UNIFORM_BLOCK_SIZE));
|
||||||
shaderSrc->add("};" _CRLF _CRLF);
|
shaderSrc->add("};" _CRLF _CRLF);
|
||||||
shaderSrc->add(_CRLF);
|
shaderSrc->add(_CRLF);
|
||||||
}
|
}
|
||||||
|
@ -125,19 +125,66 @@ struct LatteDecompilerCFInstruction
|
|||||||
LatteDecompilerCFInstruction& operator=(LatteDecompilerCFInstruction&& mE) = default;
|
LatteDecompilerCFInstruction& operator=(LatteDecompilerCFInstruction&& mE) = default;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct LatteDecompilerCFileAccess
|
|
||||||
{
|
|
||||||
LatteDecompilerCFileAccess(uint8 index, bool isRelative) : index(index), isRelative(isRelative) {};
|
|
||||||
uint8 index;
|
|
||||||
bool isRelative;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct LatteDecompilerSubroutineInfo
|
struct LatteDecompilerSubroutineInfo
|
||||||
{
|
{
|
||||||
uint32 cfAddr;
|
uint32 cfAddr;
|
||||||
std::vector<LatteDecompilerCFInstruction> instructions;
|
std::vector<LatteDecompilerCFInstruction> instructions;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// helper struct to track the highest accessed offset within a buffer
|
||||||
|
struct LatteDecompilerBufferAccessTracker
|
||||||
|
{
|
||||||
|
bool hasStaticIndexAccess{false};
|
||||||
|
bool hasDynamicIndexAccess{false};
|
||||||
|
sint32 highestAccessDynamicIndex{0};
|
||||||
|
sint32 highestAccessStaticIndex{0};
|
||||||
|
|
||||||
|
// track access, index is the array index and not a byte offset
|
||||||
|
void TrackAccess(sint32 index, bool isDynamicIndex)
|
||||||
|
{
|
||||||
|
if (isDynamicIndex)
|
||||||
|
{
|
||||||
|
hasDynamicIndexAccess = true;
|
||||||
|
if (index > highestAccessDynamicIndex)
|
||||||
|
highestAccessDynamicIndex = index;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
hasStaticIndexAccess = true;
|
||||||
|
if (index > highestAccessStaticIndex)
|
||||||
|
highestAccessStaticIndex = index;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sint32 DetermineSize(sint32 maximumSize) const
|
||||||
|
{
|
||||||
|
// here we try to predict the accessed range so we dont have to upload the whole buffer
|
||||||
|
// potential risky optimization: assume that if there is a fixed-index access on an index higher than any other non-zero relative accesses, it bounds the prior relative access
|
||||||
|
sint32 highestAccessIndex = -1;
|
||||||
|
if(hasStaticIndexAccess)
|
||||||
|
{
|
||||||
|
highestAccessIndex = highestAccessStaticIndex;
|
||||||
|
}
|
||||||
|
if(hasDynamicIndexAccess)
|
||||||
|
{
|
||||||
|
return maximumSize; // dynamic index exists and no bound can be determined
|
||||||
|
}
|
||||||
|
if (highestAccessIndex < 0)
|
||||||
|
return 1; // no access at all? But avoid zero as a size
|
||||||
|
return highestAccessIndex + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool HasAccess() const
|
||||||
|
{
|
||||||
|
return hasStaticIndexAccess || hasDynamicIndexAccess;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool HasRelativeAccess() const
|
||||||
|
{
|
||||||
|
return hasDynamicIndexAccess;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
struct LatteDecompilerShaderContext
|
struct LatteDecompilerShaderContext
|
||||||
{
|
{
|
||||||
LatteDecompilerOutput_t* output;
|
LatteDecompilerOutput_t* output;
|
||||||
@ -174,12 +221,9 @@ struct LatteDecompilerShaderContext
|
|||||||
bool isPointsPrimitive{}; // set if current render primitive is points
|
bool isPointsPrimitive{}; // set if current render primitive is points
|
||||||
bool outputPointSize{}; // set if the current shader should output the point size
|
bool outputPointSize{}; // set if the current shader should output the point size
|
||||||
std::bitset<256> inputAttributSemanticMask; // one set bit for every used semanticId - todo: there are only 128 bit available semantic locations? The MSB has special meaning?
|
std::bitset<256> inputAttributSemanticMask; // one set bit for every used semanticId - todo: there are only 128 bit available semantic locations? The MSB has special meaning?
|
||||||
// uniform
|
// uniforms
|
||||||
bool uniformRegisterAccess; // set to true if cfile (uniform register) is accessed
|
LatteDecompilerBufferAccessTracker uniformRegisterAccessTracker;
|
||||||
bool uniformRegisterDynamicAccess; // set to true if cfile (uniform register) is accessed with a dynamic index
|
LatteDecompilerBufferAccessTracker uniformBufferAccessTracker[LATTE_NUM_MAX_UNIFORM_BUFFERS];
|
||||||
uint32 uniformBufferAccessMask; // 1 bit per buffer, set if the uniform buffer is accessed
|
|
||||||
uint32 uniformBufferDynamicAccessMask; // 1 bit per buffer, set if the uniform buffer is accessed by dynamic index
|
|
||||||
std::vector<LatteDecompilerCFileAccess> uniformRegisterAccessIndices;
|
|
||||||
// ssbo
|
// ssbo
|
||||||
bool hasSSBORead; // shader has instructions that read from SSBO
|
bool hasSSBORead; // shader has instructions that read from SSBO
|
||||||
bool hasSSBOWrite; // shader has instructions that write to SSBO
|
bool hasSSBOWrite; // shader has instructions that write to SSBO
|
||||||
|
@ -1591,10 +1591,9 @@ void VulkanRenderer::draw_updateUniformBuffersDirectAccess(LatteDecompilerShader
|
|||||||
{
|
{
|
||||||
if (shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CBANK)
|
if (shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CBANK)
|
||||||
{
|
{
|
||||||
// use full uniform buffers
|
for(const auto& buf : shader->list_quickBufferList)
|
||||||
for (sint32 t = 0; t < shader->uniformBufferListCount; t++)
|
|
||||||
{
|
{
|
||||||
sint32 i = shader->uniformBufferList[t];
|
sint32 i = buf.index;
|
||||||
MPTR physicalAddr = LatteGPUState.contextRegister[uniformBufferRegOffset + i * 7 + 0];
|
MPTR physicalAddr = LatteGPUState.contextRegister[uniformBufferRegOffset + i * 7 + 0];
|
||||||
uint32 uniformSize = LatteGPUState.contextRegister[uniformBufferRegOffset + i * 7 + 1] + 1;
|
uint32 uniformSize = LatteGPUState.contextRegister[uniformBufferRegOffset + i * 7 + 1] + 1;
|
||||||
|
|
||||||
@ -1603,6 +1602,7 @@ void VulkanRenderer::draw_updateUniformBuffersDirectAccess(LatteDecompilerShader
|
|||||||
cemu_assert_unimplemented();
|
cemu_assert_unimplemented();
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
uniformSize = std::min<uint32>(uniformSize, buf.size);
|
||||||
|
|
||||||
cemu_assert_debug(physicalAddr < 0x50000000);
|
cemu_assert_debug(physicalAddr < 0x50000000);
|
||||||
|
|
||||||
@ -1621,7 +1621,7 @@ void VulkanRenderer::draw_updateUniformBuffersDirectAccess(LatteDecompilerShader
|
|||||||
dynamicOffsetInfo.shaderUB[VulkanRendererConst::SHADER_STAGE_INDEX_FRAGMENT].unformBufferOffset[bufferIndex] = physicalAddr - m_importedMemBaseAddress;
|
dynamicOffsetInfo.shaderUB[VulkanRendererConst::SHADER_STAGE_INDEX_FRAGMENT].unformBufferOffset[bufferIndex] = physicalAddr - m_importedMemBaseAddress;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
cemu_assert_debug(false);
|
UNREACHABLE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user