Latte: Bound uniform buffers based on access patterns within the shader

This commit is contained in:
Exzap 2023-09-23 22:53:57 +02:00
parent 4d6b72b353
commit 3e925b7707
6 changed files with 114 additions and 93 deletions

View File

@ -132,22 +132,18 @@ void LatteBufferCache_syncGPUUniformBuffers(LatteDecompilerShader* shader, const
{ {
if (shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CBANK) if (shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CBANK)
{ {
// use full uniform buffers for(const auto& buf : shader->list_quickBufferList)
for (sint32 t = 0; t < shader->uniformBufferListCount; t++)
{ {
sint32 i = shader->uniformBufferList[t]; sint32 i = buf.index;
MPTR physicalAddr = LatteGPUState.contextRegister[uniformBufferRegOffset + i * 7 + 0]; MPTR physicalAddr = LatteGPUState.contextRegister[uniformBufferRegOffset + i * 7 + 0];
uint32 uniformSize = LatteGPUState.contextRegister[uniformBufferRegOffset + i * 7 + 1] + 1; uint32 uniformSize = LatteGPUState.contextRegister[uniformBufferRegOffset + i * 7 + 1] + 1;
if (physicalAddr == MPTR_NULL) [[unlikely]]
if (physicalAddr == MPTR_NULL)
{ {
// no data
g_renderer->buffer_bindUniformBuffer(shaderType, i, 0, 0); g_renderer->buffer_bindUniformBuffer(shaderType, i, 0, 0);
continue; continue;
} }
uniformSize = std::min<uint32>(uniformSize, buf.size);
uint32 bindOffset = LatteBufferCache_retrieveDataInCache(physicalAddr, uniformSize); uint32 bindOffset = LatteBufferCache_retrieveDataInCache(physicalAddr, uniformSize);
g_renderer->buffer_bindUniformBuffer(shaderType, i, bindOffset, uniformSize); g_renderer->buffer_bindUniformBuffer(shaderType, i, bindOffset, uniformSize);
} }
} }

View File

@ -1,6 +1,7 @@
#pragma once #pragma once
#include "Cafe/HW/Latte/Core/LatteConst.h" #include "Cafe/HW/Latte/Core/LatteConst.h"
#include "Cafe/HW/Latte/Renderer/RendererShader.h" #include "Cafe/HW/Latte/Renderer/RendererShader.h"
#include <boost/container/static_vector.hpp>
namespace LatteDecompiler namespace LatteDecompiler
{ {
@ -158,11 +159,13 @@ struct LatteDecompilerShader
struct LatteFetchShader* compatibleFetchShader{}; struct LatteFetchShader* compatibleFetchShader{};
// error tracking // error tracking
bool hasError{false}; // if set, the shader cannot be used bool hasError{false}; // if set, the shader cannot be used
// optimized access / iteration // compact resource lists for optimized access
// list of uniform buffers used struct QuickBufferEntry
uint8 uniformBufferList[LATTE_NUM_MAX_UNIFORM_BUFFERS]; {
uint8 uniformBufferListCount{ 0 }; uint8 index;
// list of used texture units (faster access than iterating textureUnitMask) uint16 size;
};
boost::container::static_vector<QuickBufferEntry, LATTE_NUM_MAX_UNIFORM_BUFFERS> list_quickBufferList;
uint8 textureUnitList[LATTE_NUM_MAX_TEX_UNITS]; uint8 textureUnitList[LATTE_NUM_MAX_TEX_UNITS];
uint8 textureUnitListCount{ 0 }; uint8 textureUnitListCount{ 0 };
// input // input

View File

@ -230,47 +230,39 @@ void LatteDecompiler_analyzeALUClause(LatteDecompilerShaderContext* shaderContex
// check input for uniform access // check input for uniform access
if( aluInstruction.sourceOperand[f].sel == 0xFFFFFFFF ) if( aluInstruction.sourceOperand[f].sel == 0xFFFFFFFF )
continue; // source operand not set/used continue; // source operand not set/used
// about uniform register and buffer access tracking:
// for absolute indices we can determine a maximum size that is accessed
// relative accesses are tricky because the upper bound of accessed indices is unknown
// worst case we have to load the full file (256 * 16 byte entries) or for buffers an arbitrary upper bound (64KB in our case)
if( GPU7_ALU_SRC_IS_CFILE(aluInstruction.sourceOperand[f].sel) ) if( GPU7_ALU_SRC_IS_CFILE(aluInstruction.sourceOperand[f].sel) )
{ {
// uniform register access
// relative register file accesses are tricky because the range of possible indices is unknown
// worst case we have to load the full file (256 * 16 byte entries)
// by tracking the accessed base indices the shader analyzer can determine bounds for the potentially accessed ranges
shaderContext->analyzer.uniformRegisterAccess = true;
if (aluInstruction.sourceOperand[f].rel) if (aluInstruction.sourceOperand[f].rel)
{ {
shaderContext->analyzer.uniformRegisterDynamicAccess = true; shaderContext->analyzer.uniformRegisterAccessTracker.TrackAccess(GPU7_ALU_SRC_GET_CFILE_INDEX(aluInstruction.sourceOperand[f].sel), true);
shaderContext->analyzer.uniformRegisterAccessIndices.emplace_back(GPU7_ALU_SRC_GET_CFILE_INDEX(aluInstruction.sourceOperand[f].sel), true);
} }
else else
{ {
_remapUniformAccess(shaderContext, true, 0, GPU7_ALU_SRC_GET_CFILE_INDEX(aluInstruction.sourceOperand[f].sel)); _remapUniformAccess(shaderContext, true, 0, GPU7_ALU_SRC_GET_CFILE_INDEX(aluInstruction.sourceOperand[f].sel));
shaderContext->analyzer.uniformRegisterAccessIndices.emplace_back(GPU7_ALU_SRC_GET_CFILE_INDEX(aluInstruction.sourceOperand[f].sel), false); shaderContext->analyzer.uniformRegisterAccessTracker.TrackAccess(GPU7_ALU_SRC_GET_CFILE_INDEX(aluInstruction.sourceOperand[f].sel), false);
} }
} }
else if( GPU7_ALU_SRC_IS_CBANK0(aluInstruction.sourceOperand[f].sel) ) else if( GPU7_ALU_SRC_IS_CBANK0(aluInstruction.sourceOperand[f].sel) )
{ {
// uniform bank 0 (uniform buffer with index cfInstruction->cBank0Index) // uniform bank 0 (uniform buffer with index cfInstruction->cBank0Index)
uint32 uniformBufferIndex = cfInstruction->cBank0Index; uint32 uniformBufferIndex = cfInstruction->cBank0Index;
if( uniformBufferIndex >= LATTE_NUM_MAX_UNIFORM_BUFFERS) cemu_assert(uniformBufferIndex < LATTE_NUM_MAX_UNIFORM_BUFFERS);
debugBreakpoint(); uint32 offset = GPU7_ALU_SRC_GET_CBANK0_INDEX(aluInstruction.sourceOperand[f].sel)+cfInstruction->cBank0AddrBase;
shaderContext->analyzer.uniformBufferAccessMask |= (1<<uniformBufferIndex); _remapUniformAccess(shaderContext, false, uniformBufferIndex, offset);
if( aluInstruction.sourceOperand[f].rel ) shaderContext->analyzer.uniformBufferAccessTracker[uniformBufferIndex].TrackAccess(offset, aluInstruction.sourceOperand[f].rel);
shaderContext->analyzer.uniformBufferDynamicAccessMask |= (1<<uniformBufferIndex);
_remapUniformAccess(shaderContext, false, uniformBufferIndex, GPU7_ALU_SRC_GET_CBANK0_INDEX(aluInstruction.sourceOperand[f].sel)+cfInstruction->cBank0AddrBase);
} }
else if( GPU7_ALU_SRC_IS_CBANK1(aluInstruction.sourceOperand[f].sel) ) else if( GPU7_ALU_SRC_IS_CBANK1(aluInstruction.sourceOperand[f].sel) )
{ {
// uniform bank 1 (uniform buffer with index cfInstruction->cBank1Index) // uniform bank 1 (uniform buffer with index cfInstruction->cBank1Index)
uint32 uniformBufferIndex = cfInstruction->cBank1Index; uint32 uniformBufferIndex = cfInstruction->cBank1Index;
if( uniformBufferIndex >= LATTE_NUM_MAX_UNIFORM_BUFFERS) cemu_assert(uniformBufferIndex < LATTE_NUM_MAX_UNIFORM_BUFFERS);
debugBreakpoint(); uint32 offset = GPU7_ALU_SRC_GET_CBANK1_INDEX(aluInstruction.sourceOperand[f].sel)+cfInstruction->cBank1AddrBase;
shaderContext->analyzer.uniformBufferAccessMask |= (1<<uniformBufferIndex); _remapUniformAccess(shaderContext, false, uniformBufferIndex, offset);
if( aluInstruction.sourceOperand[f].rel ) shaderContext->analyzer.uniformBufferAccessTracker[uniformBufferIndex].TrackAccess(offset, aluInstruction.sourceOperand[f].rel);
shaderContext->analyzer.uniformBufferDynamicAccessMask |= (1<<uniformBufferIndex);
_remapUniformAccess(shaderContext, false, uniformBufferIndex, GPU7_ALU_SRC_GET_CBANK1_INDEX(aluInstruction.sourceOperand[f].sel)+cfInstruction->cBank1AddrBase);
} }
else if( GPU7_ALU_SRC_IS_GPR(aluInstruction.sourceOperand[f].sel) ) else if( GPU7_ALU_SRC_IS_GPR(aluInstruction.sourceOperand[f].sel) )
{ {
@ -360,8 +352,7 @@ void LatteDecompiler_analyzeTEXClause(LatteDecompilerShaderContext* shaderContex
if( texInstruction.textureFetch.textureIndex >= 0x80 && texInstruction.textureFetch.textureIndex <= 0x8F ) if( texInstruction.textureFetch.textureIndex >= 0x80 && texInstruction.textureFetch.textureIndex <= 0x8F )
{ {
uint32 uniformBufferIndex = texInstruction.textureFetch.textureIndex - 0x80; uint32 uniformBufferIndex = texInstruction.textureFetch.textureIndex - 0x80;
shaderContext->analyzer.uniformBufferAccessMask |= (1<<uniformBufferIndex); shaderContext->analyzer.uniformBufferAccessTracker[uniformBufferIndex].TrackAccess(0, true);
shaderContext->analyzer.uniformBufferDynamicAccessMask |= (1<<uniformBufferIndex);
} }
else if( texInstruction.textureFetch.textureIndex == 0x9F && shader->shaderType == LatteConst::ShaderType::Geometry ) else if( texInstruction.textureFetch.textureIndex == 0x9F && shader->shaderType == LatteConst::ShaderType::Geometry )
{ {
@ -576,7 +567,7 @@ namespace LatteDecompiler
// for Vulkan we use consecutive indices // for Vulkan we use consecutive indices
for (uint32 i = 0; i < LATTE_NUM_MAX_UNIFORM_BUFFERS; i++) for (uint32 i = 0; i < LATTE_NUM_MAX_UNIFORM_BUFFERS; i++)
{ {
if ((decompilerContext->analyzer.uniformBufferAccessMask&(1 << i)) == 0) if (!decompilerContext->analyzer.uniformBufferAccessTracker[i].HasAccess())
continue; continue;
sint32 uniformBindingPoint = i; sint32 uniformBindingPoint = i;
if (decompilerContext->shaderType == LatteConst::ShaderType::Geometry) if (decompilerContext->shaderType == LatteConst::ShaderType::Geometry)
@ -592,7 +583,7 @@ namespace LatteDecompiler
// for OpenGL we use the relative buffer index // for OpenGL we use the relative buffer index
for (uint32 i = 0; i < LATTE_NUM_MAX_UNIFORM_BUFFERS; i++) for (uint32 i = 0; i < LATTE_NUM_MAX_UNIFORM_BUFFERS; i++)
{ {
if ((decompilerContext->analyzer.uniformBufferAccessMask&(1 << i)) == 0) if (!decompilerContext->analyzer.uniformBufferAccessTracker[i].HasAccess())
continue; continue;
sint32 uniformBindingPoint = i; sint32 uniformBindingPoint = i;
if (decompilerContext->shaderType == LatteConst::ShaderType::Geometry) if (decompilerContext->shaderType == LatteConst::ShaderType::Geometry)
@ -765,17 +756,24 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD
LatteDecompiler_analyzeSubroutine(shaderContext, subroutineAddr); LatteDecompiler_analyzeSubroutine(shaderContext, subroutineAddr);
} }
// decide which uniform mode to use // decide which uniform mode to use
if(shaderContext->analyzer.uniformBufferAccessMask != 0 && shaderContext->analyzer.uniformRegisterAccess ) bool hasAnyDynamicBufferAccess = false;
debugBreakpoint(); // not allowed bool hasAnyBufferAccess = false;
if(shaderContext->analyzer.uniformBufferDynamicAccessMask != 0 ) for(auto& it : shaderContext->analyzer.uniformBufferAccessTracker)
{
if( it.HasRelativeAccess() )
hasAnyDynamicBufferAccess = true;
if( it.HasAccess() )
hasAnyBufferAccess = true;
}
if (hasAnyDynamicBufferAccess)
{ {
shader->uniformMode = LATTE_DECOMPILER_UNIFORM_MODE_FULL_CBANK; shader->uniformMode = LATTE_DECOMPILER_UNIFORM_MODE_FULL_CBANK;
} }
else if(shaderContext->analyzer.uniformRegisterDynamicAccess ) else if(shaderContext->analyzer.uniformRegisterAccessTracker.HasRelativeAccess() )
{ {
shader->uniformMode = LATTE_DECOMPILER_UNIFORM_MODE_FULL_CFILE; shader->uniformMode = LATTE_DECOMPILER_UNIFORM_MODE_FULL_CFILE;
} }
else if(shaderContext->analyzer.uniformBufferAccessMask != 0 || shaderContext->analyzer.uniformRegisterAccess != 0 ) else if(hasAnyBufferAccess || shaderContext->analyzer.uniformRegisterAccessTracker.HasAccess() )
{ {
shader->uniformMode = LATTE_DECOMPILER_UNIFORM_MODE_REMAPPED; shader->uniformMode = LATTE_DECOMPILER_UNIFORM_MODE_REMAPPED;
} }
@ -783,16 +781,18 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD
{ {
shader->uniformMode = LATTE_DECOMPILER_UNIFORM_MODE_NONE; shader->uniformMode = LATTE_DECOMPILER_UNIFORM_MODE_NONE;
} }
// generate list of uniform buffers based on uniformBufferAccessMask (for faster access) // generate compact list of uniform buffers (for faster access)
shader->uniformBufferListCount = 0; cemu_assert_debug(shader->list_quickBufferList.empty());
for (uint32 i = 0; i < LATTE_NUM_MAX_UNIFORM_BUFFERS; i++) for (uint32 i = 0; i < LATTE_NUM_MAX_UNIFORM_BUFFERS; i++)
{ {
if( !HAS_FLAG(shaderContext->analyzer.uniformBufferAccessMask, (1<<i)) ) if( !shaderContext->analyzer.uniformBufferAccessTracker[i].HasAccess() )
continue; continue;
shader->uniformBufferList[shader->uniformBufferListCount] = i; LatteDecompilerShader::QuickBufferEntry entry;
shader->uniformBufferListCount++; entry.index = i;
entry.size = shaderContext->analyzer.uniformBufferAccessTracker[i].DetermineSize(LATTE_GLSL_DYNAMIC_UNIFORM_BLOCK_SIZE) * 16;
shader->list_quickBufferList.push_back(entry);
} }
// get dimension of each used textures // get dimension of each used texture
_LatteRegisterSetTextureUnit* texRegs = nullptr; _LatteRegisterSetTextureUnit* texRegs = nullptr;
if( shader->shaderType == LatteConst::ShaderType::Vertex ) if( shader->shaderType == LatteConst::ShaderType::Vertex )
texRegs = shaderContext->contextRegistersNew->SQ_TEX_START_VS; texRegs = shaderContext->contextRegistersNew->SQ_TEX_START_VS;

View File

@ -37,36 +37,14 @@ namespace LatteDecompiler
} }
else if (decompilerContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CFILE) else if (decompilerContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CFILE)
{ {
// here we try to predict the accessed range so we dont have to upload the whole register file uint32 cfileSize = decompilerContext->analyzer.uniformRegisterAccessTracker.DetermineSize(256);
// we assume that if there is a fixed-index access on an index higher than a relative access, it bounds the prior relative access // full or partial uniform register file has to be present
sint16 highestAccessIndex = -1;
bool highestAccessIndexIsRel = false;
for(auto& accessItr : decompilerContext->analyzer.uniformRegisterAccessIndices)
{
if (accessItr.index > highestAccessIndex || (accessItr.index == highestAccessIndex && accessItr.isRelative && !highestAccessIndexIsRel))
{
highestAccessIndex = accessItr.index;
highestAccessIndexIsRel = accessItr.isRelative;
}
}
if (highestAccessIndex < 0)
highestAccessIndex = 0;
uint32 cfileSize;
if (highestAccessIndexIsRel)
cfileSize = 256;
else
cfileSize = highestAccessIndex + 1;
// full uniform register file has to be present
if (shaderType == LatteConst::ShaderType::Vertex) if (shaderType == LatteConst::ShaderType::Vertex)
shaderSrc->addFmt("uniform ivec4 uf_uniformRegisterVS[{}];" _CRLF, cfileSize); shaderSrc->addFmt("uniform ivec4 uf_uniformRegisterVS[{}];" _CRLF, cfileSize);
else if (shaderType == LatteConst::ShaderType::Pixel) else if (shaderType == LatteConst::ShaderType::Pixel)
shaderSrc->addFmt("uniform ivec4 uf_uniformRegisterPS[{}];" _CRLF, cfileSize); shaderSrc->addFmt("uniform ivec4 uf_uniformRegisterPS[{}];" _CRLF, cfileSize);
else if (shaderType == LatteConst::ShaderType::Geometry) else if (shaderType == LatteConst::ShaderType::Geometry)
shaderSrc->addFmt("uniform ivec4 uf_uniformRegisterGS[{}];" _CRLF, cfileSize); shaderSrc->addFmt("uniform ivec4 uf_uniformRegisterGS[{}];" _CRLF, cfileSize);
else
debugBreakpoint();
uniformOffsets.offset_uniformRegister = uniformCurrentOffset; uniformOffsets.offset_uniformRegister = uniformCurrentOffset;
uniformOffsets.count_uniformRegister = cfileSize; uniformOffsets.count_uniformRegister = cfileSize;
uniformCurrentOffset += 16 * cfileSize; uniformCurrentOffset += 16 * cfileSize;
@ -168,7 +146,7 @@ namespace LatteDecompiler
{ {
for (uint32 i = 0; i < LATTE_NUM_MAX_UNIFORM_BUFFERS; i++) for (uint32 i = 0; i < LATTE_NUM_MAX_UNIFORM_BUFFERS; i++)
{ {
if ((decompilerContext->analyzer.uniformBufferAccessMask&(1 << i)) == 0) if (!decompilerContext->analyzer.uniformBufferAccessTracker[i].HasAccess())
continue; continue;
cemu_assert_debug(decompilerContext->output->resourceMappingGL.uniformBuffersBindingPoint[i] >= 0); cemu_assert_debug(decompilerContext->output->resourceMappingGL.uniformBuffersBindingPoint[i] >= 0);
@ -178,7 +156,7 @@ namespace LatteDecompiler
shaderSrc->addFmt("uniform {}{}" _CRLF, _getShaderUniformBlockInterfaceName(decompilerContext->shaderType), i); shaderSrc->addFmt("uniform {}{}" _CRLF, _getShaderUniformBlockInterfaceName(decompilerContext->shaderType), i);
shaderSrc->add("{" _CRLF); shaderSrc->add("{" _CRLF);
shaderSrc->addFmt("vec4 {}{}[{}];" _CRLF, _getShaderUniformBlockVariableName(decompilerContext->shaderType), i, LATTE_GLSL_DYNAMIC_UNIFORM_BLOCK_SIZE); shaderSrc->addFmt("vec4 {}{}[{}];" _CRLF, _getShaderUniformBlockVariableName(decompilerContext->shaderType), i, decompilerContext->analyzer.uniformBufferAccessTracker[i].DetermineSize(LATTE_GLSL_DYNAMIC_UNIFORM_BLOCK_SIZE));
shaderSrc->add("};" _CRLF _CRLF); shaderSrc->add("};" _CRLF _CRLF);
shaderSrc->add(_CRLF); shaderSrc->add(_CRLF);
} }

View File

@ -125,19 +125,66 @@ struct LatteDecompilerCFInstruction
LatteDecompilerCFInstruction& operator=(LatteDecompilerCFInstruction&& mE) = default; LatteDecompilerCFInstruction& operator=(LatteDecompilerCFInstruction&& mE) = default;
}; };
struct LatteDecompilerCFileAccess
{
LatteDecompilerCFileAccess(uint8 index, bool isRelative) : index(index), isRelative(isRelative) {};
uint8 index;
bool isRelative;
};
struct LatteDecompilerSubroutineInfo struct LatteDecompilerSubroutineInfo
{ {
uint32 cfAddr; uint32 cfAddr;
std::vector<LatteDecompilerCFInstruction> instructions; std::vector<LatteDecompilerCFInstruction> instructions;
}; };
// helper struct to track the highest accessed offset within a buffer
struct LatteDecompilerBufferAccessTracker
{
bool hasStaticIndexAccess{false};
bool hasDynamicIndexAccess{false};
sint32 highestAccessDynamicIndex{0};
sint32 highestAccessStaticIndex{0};
// track access, index is the array index and not a byte offset
void TrackAccess(sint32 index, bool isDynamicIndex)
{
if (isDynamicIndex)
{
hasDynamicIndexAccess = true;
if (index > highestAccessDynamicIndex)
highestAccessDynamicIndex = index;
}
else
{
hasStaticIndexAccess = true;
if (index > highestAccessStaticIndex)
highestAccessStaticIndex = index;
}
}
sint32 DetermineSize(sint32 maximumSize) const
{
// here we try to predict the accessed range so we dont have to upload the whole buffer
// potential risky optimization: assume that if there is a fixed-index access on an index higher than any other non-zero relative accesses, it bounds the prior relative access
sint32 highestAccessIndex = -1;
if(hasStaticIndexAccess)
{
highestAccessIndex = highestAccessStaticIndex;
}
if(hasDynamicIndexAccess)
{
return maximumSize; // dynamic index exists and no bound can be determined
}
if (highestAccessIndex < 0)
return 1; // no access at all? But avoid zero as a size
return highestAccessIndex + 1;
}
bool HasAccess() const
{
return hasStaticIndexAccess || hasDynamicIndexAccess;
}
bool HasRelativeAccess() const
{
return hasDynamicIndexAccess;
}
};
struct LatteDecompilerShaderContext struct LatteDecompilerShaderContext
{ {
LatteDecompilerOutput_t* output; LatteDecompilerOutput_t* output;
@ -174,12 +221,9 @@ struct LatteDecompilerShaderContext
bool isPointsPrimitive{}; // set if current render primitive is points bool isPointsPrimitive{}; // set if current render primitive is points
bool outputPointSize{}; // set if the current shader should output the point size bool outputPointSize{}; // set if the current shader should output the point size
std::bitset<256> inputAttributSemanticMask; // one set bit for every used semanticId - todo: there are only 128 bit available semantic locations? The MSB has special meaning? std::bitset<256> inputAttributSemanticMask; // one set bit for every used semanticId - todo: there are only 128 bit available semantic locations? The MSB has special meaning?
// uniform // uniforms
bool uniformRegisterAccess; // set to true if cfile (uniform register) is accessed LatteDecompilerBufferAccessTracker uniformRegisterAccessTracker;
bool uniformRegisterDynamicAccess; // set to true if cfile (uniform register) is accessed with a dynamic index LatteDecompilerBufferAccessTracker uniformBufferAccessTracker[LATTE_NUM_MAX_UNIFORM_BUFFERS];
uint32 uniformBufferAccessMask; // 1 bit per buffer, set if the uniform buffer is accessed
uint32 uniformBufferDynamicAccessMask; // 1 bit per buffer, set if the uniform buffer is accessed by dynamic index
std::vector<LatteDecompilerCFileAccess> uniformRegisterAccessIndices;
// ssbo // ssbo
bool hasSSBORead; // shader has instructions that read from SSBO bool hasSSBORead; // shader has instructions that read from SSBO
bool hasSSBOWrite; // shader has instructions that write to SSBO bool hasSSBOWrite; // shader has instructions that write to SSBO

View File

@ -1591,10 +1591,9 @@ void VulkanRenderer::draw_updateUniformBuffersDirectAccess(LatteDecompilerShader
{ {
if (shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CBANK) if (shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CBANK)
{ {
// use full uniform buffers for(const auto& buf : shader->list_quickBufferList)
for (sint32 t = 0; t < shader->uniformBufferListCount; t++)
{ {
sint32 i = shader->uniformBufferList[t]; sint32 i = buf.index;
MPTR physicalAddr = LatteGPUState.contextRegister[uniformBufferRegOffset + i * 7 + 0]; MPTR physicalAddr = LatteGPUState.contextRegister[uniformBufferRegOffset + i * 7 + 0];
uint32 uniformSize = LatteGPUState.contextRegister[uniformBufferRegOffset + i * 7 + 1] + 1; uint32 uniformSize = LatteGPUState.contextRegister[uniformBufferRegOffset + i * 7 + 1] + 1;
@ -1603,6 +1602,7 @@ void VulkanRenderer::draw_updateUniformBuffersDirectAccess(LatteDecompilerShader
cemu_assert_unimplemented(); cemu_assert_unimplemented();
continue; continue;
} }
uniformSize = std::min<uint32>(uniformSize, buf.size);
cemu_assert_debug(physicalAddr < 0x50000000); cemu_assert_debug(physicalAddr < 0x50000000);
@ -1621,7 +1621,7 @@ void VulkanRenderer::draw_updateUniformBuffersDirectAccess(LatteDecompilerShader
dynamicOffsetInfo.shaderUB[VulkanRendererConst::SHADER_STAGE_INDEX_FRAGMENT].unformBufferOffset[bufferIndex] = physicalAddr - m_importedMemBaseAddress; dynamicOffsetInfo.shaderUB[VulkanRendererConst::SHADER_STAGE_INDEX_FRAGMENT].unformBufferOffset[bufferIndex] = physicalAddr - m_importedMemBaseAddress;
break; break;
default: default:
cemu_assert_debug(false); UNREACHABLE;
} }
} }
} }