Latte: Very minor refactor + optimization

This commit is contained in:
Exzap 2023-09-20 04:54:36 +02:00
parent 65e5e20afc
commit 4d6b72b353
2 changed files with 18 additions and 18 deletions

View File

@ -370,8 +370,12 @@ void VulkanRenderer::indexData_uploadIndexMemory(uint32 offset, uint32 size)
// does nothing since the index buffer memory is coherent // does nothing since the index buffer memory is coherent
} }
float s_vkUniformData[512 * 4];
void VulkanRenderer::uniformData_updateUniformVars(uint32 shaderStageIndex, LatteDecompilerShader* shader) void VulkanRenderer::uniformData_updateUniformVars(uint32 shaderStageIndex, LatteDecompilerShader* shader)
{ {
auto GET_UNIFORM_DATA_PTR = [&](size_t index) { return s_vkUniformData + (index / 4); };
sint32 shaderAluConst; sint32 shaderAluConst;
sint32 shaderUniformRegisterOffset; sint32 shaderUniformRegisterOffset;
@ -390,27 +394,23 @@ void VulkanRenderer::uniformData_updateUniformVars(uint32 shaderStageIndex, Latt
shaderUniformRegisterOffset = mmSQ_GS_UNIFORM_BLOCK_START; shaderUniformRegisterOffset = mmSQ_GS_UNIFORM_BLOCK_START;
break; break;
default: default:
cemu_assert_debug(false); UNREACHABLE;
} }
if (shader->resourceMapping.uniformVarsBufferBindingPoint >= 0) if (shader->resourceMapping.uniformVarsBufferBindingPoint >= 0)
{ {
float uniformData[512 * 4];
if (shader->uniform.list_ufTexRescale.empty() == false) if (shader->uniform.list_ufTexRescale.empty() == false)
{ {
for (auto& entry : shader->uniform.list_ufTexRescale) for (auto& entry : shader->uniform.list_ufTexRescale)
{ {
float* xyScale = LatteTexture_getEffectiveTextureScale(shader->shaderType, entry.texUnit); float* xyScale = LatteTexture_getEffectiveTextureScale(shader->shaderType, entry.texUnit);
float* v = uniformData + (entry.uniformLocation / 4);
memcpy(entry.currentValue, xyScale, sizeof(float) * 2); memcpy(entry.currentValue, xyScale, sizeof(float) * 2);
memcpy(v, xyScale, sizeof(float) * 2); memcpy(GET_UNIFORM_DATA_PTR(entry.uniformLocation), xyScale, sizeof(float) * 2);
} }
} }
if (shader->uniform.loc_alphaTestRef >= 0) if (shader->uniform.loc_alphaTestRef >= 0)
{ {
float* v = uniformData + (shader->uniform.loc_alphaTestRef / 4); *GET_UNIFORM_DATA_PTR(shader->uniform.loc_alphaTestRef) = LatteGPUState.contextNew.SX_ALPHA_REF.get_ALPHA_TEST_REF();
v[0] = LatteGPUState.contextNew.SX_ALPHA_REF.get_ALPHA_TEST_REF();
} }
if (shader->uniform.loc_pointSize >= 0) if (shader->uniform.loc_pointSize >= 0)
{ {
@ -418,41 +418,38 @@ void VulkanRenderer::uniformData_updateUniformVars(uint32 shaderStageIndex, Latt
float pointWidth = (float)pointSizeReg.get_WIDTH() / 8.0f; float pointWidth = (float)pointSizeReg.get_WIDTH() / 8.0f;
if (pointWidth == 0.0f) if (pointWidth == 0.0f)
pointWidth = 1.0f / 8.0f; // minimum size pointWidth = 1.0f / 8.0f; // minimum size
float* v = uniformData + (shader->uniform.loc_pointSize / 4); *GET_UNIFORM_DATA_PTR(shader->uniform.loc_pointSize) = pointWidth;
v[0] = pointWidth;
} }
if (shader->uniform.loc_remapped >= 0) if (shader->uniform.loc_remapped >= 0)
{ {
LatteBufferCache_LoadRemappedUniforms(shader, uniformData + (shader->uniform.loc_remapped / 4)); LatteBufferCache_LoadRemappedUniforms(shader, GET_UNIFORM_DATA_PTR(shader->uniform.loc_remapped));
} }
if (shader->uniform.loc_uniformRegister >= 0) if (shader->uniform.loc_uniformRegister >= 0)
{ {
uint32* uniformRegData = (uint32*)(LatteGPUState.contextRegister + mmSQ_ALU_CONSTANT0_0 + shaderAluConst); uint32* uniformRegData = (uint32*)(LatteGPUState.contextRegister + mmSQ_ALU_CONSTANT0_0 + shaderAluConst);
float* v = uniformData + (shader->uniform.loc_uniformRegister / 4); memcpy(GET_UNIFORM_DATA_PTR(shader->uniform.loc_uniformRegister), uniformRegData, shader->uniform.count_uniformRegister * 16);
memcpy(v, uniformRegData, shader->uniform.count_uniformRegister * 16);
} }
if (shader->uniform.loc_windowSpaceToClipSpaceTransform >= 0) if (shader->uniform.loc_windowSpaceToClipSpaceTransform >= 0)
{ {
sint32 viewportWidth; sint32 viewportWidth;
sint32 viewportHeight; sint32 viewportHeight;
LatteRenderTarget_GetCurrentVirtualViewportSize(&viewportWidth, &viewportHeight); // always call after _updateViewport() LatteRenderTarget_GetCurrentVirtualViewportSize(&viewportWidth, &viewportHeight); // always call after _updateViewport()
float* v = uniformData + (shader->uniform.loc_windowSpaceToClipSpaceTransform / 4); float* v = GET_UNIFORM_DATA_PTR(shader->uniform.loc_windowSpaceToClipSpaceTransform);
v[0] = 2.0f / (float)viewportWidth; v[0] = 2.0f / (float)viewportWidth;
v[1] = 2.0f / (float)viewportHeight; v[1] = 2.0f / (float)viewportHeight;
} }
if (shader->uniform.loc_fragCoordScale >= 0) if (shader->uniform.loc_fragCoordScale >= 0)
{ {
float* coordScale = uniformData + (shader->uniform.loc_fragCoordScale / 4); LatteMRT::GetCurrentFragCoordScale(GET_UNIFORM_DATA_PTR(shader->uniform.loc_fragCoordScale));
LatteMRT::GetCurrentFragCoordScale(coordScale);
} }
if (shader->uniform.loc_verticesPerInstance >= 0) if (shader->uniform.loc_verticesPerInstance >= 0)
{ {
*(int*)(uniformData + (shader->uniform.loc_verticesPerInstance / 4)) = m_streamoutState.verticesPerInstance; *(int*)(s_vkUniformData + ((size_t)shader->uniform.loc_verticesPerInstance / 4)) = m_streamoutState.verticesPerInstance;
for (sint32 b = 0; b < LATTE_NUM_STREAMOUT_BUFFER; b++) for (sint32 b = 0; b < LATTE_NUM_STREAMOUT_BUFFER; b++)
{ {
if (shader->uniform.loc_streamoutBufferBase[b] >= 0) if (shader->uniform.loc_streamoutBufferBase[b] >= 0)
{ {
*(int*)(uniformData + (shader->uniform.loc_streamoutBufferBase[b] / 4)) = m_streamoutState.buffer[b].ringBufferOffset; *(uint32*)GET_UNIFORM_DATA_PTR(shader->uniform.loc_streamoutBufferBase[b]) = m_streamoutState.buffer[b].ringBufferOffset;
} }
} }
} }
@ -463,7 +460,7 @@ void VulkanRenderer::uniformData_updateUniformVars(uint32 shaderStageIndex, Latt
} }
uint32 bufferAlignmentM1 = std::max(m_featureControl.limits.minUniformBufferOffsetAlignment, m_featureControl.limits.nonCoherentAtomSize) - 1; uint32 bufferAlignmentM1 = std::max(m_featureControl.limits.minUniformBufferOffsetAlignment, m_featureControl.limits.nonCoherentAtomSize) - 1;
const uint32 uniformOffset = m_uniformVarBufferWriteIndex; const uint32 uniformOffset = m_uniformVarBufferWriteIndex;
memcpy(m_uniformVarBufferPtr + uniformOffset, uniformData, shader->uniform.uniformRangeSize); memcpy(m_uniformVarBufferPtr + uniformOffset, s_vkUniformData, shader->uniform.uniformRangeSize);
m_uniformVarBufferWriteIndex += shader->uniform.uniformRangeSize; m_uniformVarBufferWriteIndex += shader->uniform.uniformRangeSize;
m_uniformVarBufferWriteIndex = (m_uniformVarBufferWriteIndex + bufferAlignmentM1) & ~bufferAlignmentM1; m_uniformVarBufferWriteIndex = (m_uniformVarBufferWriteIndex + bufferAlignmentM1) & ~bufferAlignmentM1;
// update dynamic offset // update dynamic offset

View File

@ -235,10 +235,13 @@ inline uint64 _udiv128(uint64 highDividend, uint64 lowDividend, uint64 divisor,
#if defined(_MSC_VER) #if defined(_MSC_VER)
#define UNREACHABLE __assume(false) #define UNREACHABLE __assume(false)
#define ASSUME(__cond) __assume(__cond)
#elif defined(__GNUC__) #elif defined(__GNUC__)
#define UNREACHABLE __builtin_unreachable() #define UNREACHABLE __builtin_unreachable()
#define ASSUME(__cond) __attribute__((assume(__cond)))
#else #else
#define UNREACHABLE #define UNREACHABLE
#define ASSUME(__cond)
#endif #endif
#if defined(_MSC_VER) #if defined(_MSC_VER)