Latte: Very minor refactor + optimization

This commit is contained in:
Exzap 2023-09-20 04:54:36 +02:00
parent 65e5e20afc
commit 4d6b72b353
2 changed files with 18 additions and 18 deletions

View File

@ -370,8 +370,12 @@ void VulkanRenderer::indexData_uploadIndexMemory(uint32 offset, uint32 size)
// does nothing since the index buffer memory is coherent
}
float s_vkUniformData[512 * 4];
void VulkanRenderer::uniformData_updateUniformVars(uint32 shaderStageIndex, LatteDecompilerShader* shader)
{
auto GET_UNIFORM_DATA_PTR = [&](size_t index) { return s_vkUniformData + (index / 4); };
sint32 shaderAluConst;
sint32 shaderUniformRegisterOffset;
@ -390,27 +394,23 @@ void VulkanRenderer::uniformData_updateUniformVars(uint32 shaderStageIndex, Latt
shaderUniformRegisterOffset = mmSQ_GS_UNIFORM_BLOCK_START;
break;
default:
cemu_assert_debug(false);
UNREACHABLE;
}
if (shader->resourceMapping.uniformVarsBufferBindingPoint >= 0)
{
float uniformData[512 * 4];
if (shader->uniform.list_ufTexRescale.empty() == false)
{
for (auto& entry : shader->uniform.list_ufTexRescale)
{
float* xyScale = LatteTexture_getEffectiveTextureScale(shader->shaderType, entry.texUnit);
float* v = uniformData + (entry.uniformLocation / 4);
memcpy(entry.currentValue, xyScale, sizeof(float) * 2);
memcpy(v, xyScale, sizeof(float) * 2);
memcpy(GET_UNIFORM_DATA_PTR(entry.uniformLocation), xyScale, sizeof(float) * 2);
}
}
if (shader->uniform.loc_alphaTestRef >= 0)
{
float* v = uniformData + (shader->uniform.loc_alphaTestRef / 4);
v[0] = LatteGPUState.contextNew.SX_ALPHA_REF.get_ALPHA_TEST_REF();
*GET_UNIFORM_DATA_PTR(shader->uniform.loc_alphaTestRef) = LatteGPUState.contextNew.SX_ALPHA_REF.get_ALPHA_TEST_REF();
}
if (shader->uniform.loc_pointSize >= 0)
{
@ -418,41 +418,38 @@ void VulkanRenderer::uniformData_updateUniformVars(uint32 shaderStageIndex, Latt
float pointWidth = (float)pointSizeReg.get_WIDTH() / 8.0f;
if (pointWidth == 0.0f)
pointWidth = 1.0f / 8.0f; // minimum size
float* v = uniformData + (shader->uniform.loc_pointSize / 4);
v[0] = pointWidth;
*GET_UNIFORM_DATA_PTR(shader->uniform.loc_pointSize) = pointWidth;
}
if (shader->uniform.loc_remapped >= 0)
{
LatteBufferCache_LoadRemappedUniforms(shader, uniformData + (shader->uniform.loc_remapped / 4));
LatteBufferCache_LoadRemappedUniforms(shader, GET_UNIFORM_DATA_PTR(shader->uniform.loc_remapped));
}
if (shader->uniform.loc_uniformRegister >= 0)
{
uint32* uniformRegData = (uint32*)(LatteGPUState.contextRegister + mmSQ_ALU_CONSTANT0_0 + shaderAluConst);
float* v = uniformData + (shader->uniform.loc_uniformRegister / 4);
memcpy(v, uniformRegData, shader->uniform.count_uniformRegister * 16);
memcpy(GET_UNIFORM_DATA_PTR(shader->uniform.loc_uniformRegister), uniformRegData, shader->uniform.count_uniformRegister * 16);
}
if (shader->uniform.loc_windowSpaceToClipSpaceTransform >= 0)
{
sint32 viewportWidth;
sint32 viewportHeight;
LatteRenderTarget_GetCurrentVirtualViewportSize(&viewportWidth, &viewportHeight); // always call after _updateViewport()
float* v = uniformData + (shader->uniform.loc_windowSpaceToClipSpaceTransform / 4);
float* v = GET_UNIFORM_DATA_PTR(shader->uniform.loc_windowSpaceToClipSpaceTransform);
v[0] = 2.0f / (float)viewportWidth;
v[1] = 2.0f / (float)viewportHeight;
}
if (shader->uniform.loc_fragCoordScale >= 0)
{
float* coordScale = uniformData + (shader->uniform.loc_fragCoordScale / 4);
LatteMRT::GetCurrentFragCoordScale(coordScale);
LatteMRT::GetCurrentFragCoordScale(GET_UNIFORM_DATA_PTR(shader->uniform.loc_fragCoordScale));
}
if (shader->uniform.loc_verticesPerInstance >= 0)
{
*(int*)(uniformData + (shader->uniform.loc_verticesPerInstance / 4)) = m_streamoutState.verticesPerInstance;
*(int*)(s_vkUniformData + ((size_t)shader->uniform.loc_verticesPerInstance / 4)) = m_streamoutState.verticesPerInstance;
for (sint32 b = 0; b < LATTE_NUM_STREAMOUT_BUFFER; b++)
{
if (shader->uniform.loc_streamoutBufferBase[b] >= 0)
{
*(int*)(uniformData + (shader->uniform.loc_streamoutBufferBase[b] / 4)) = m_streamoutState.buffer[b].ringBufferOffset;
*(uint32*)GET_UNIFORM_DATA_PTR(shader->uniform.loc_streamoutBufferBase[b]) = m_streamoutState.buffer[b].ringBufferOffset;
}
}
}
@ -463,7 +460,7 @@ void VulkanRenderer::uniformData_updateUniformVars(uint32 shaderStageIndex, Latt
}
uint32 bufferAlignmentM1 = std::max(m_featureControl.limits.minUniformBufferOffsetAlignment, m_featureControl.limits.nonCoherentAtomSize) - 1;
const uint32 uniformOffset = m_uniformVarBufferWriteIndex;
memcpy(m_uniformVarBufferPtr + uniformOffset, uniformData, shader->uniform.uniformRangeSize);
memcpy(m_uniformVarBufferPtr + uniformOffset, s_vkUniformData, shader->uniform.uniformRangeSize);
m_uniformVarBufferWriteIndex += shader->uniform.uniformRangeSize;
m_uniformVarBufferWriteIndex = (m_uniformVarBufferWriteIndex + bufferAlignmentM1) & ~bufferAlignmentM1;
// update dynamic offset

View File

@ -235,10 +235,13 @@ inline uint64 _udiv128(uint64 highDividend, uint64 lowDividend, uint64 divisor,
#if defined(_MSC_VER)
#define UNREACHABLE __assume(false)
#define ASSUME(__cond) __assume(__cond)
#elif defined(__GNUC__)
#define UNREACHABLE __builtin_unreachable()
#define ASSUME(__cond) __attribute__((assume(__cond)))
#else
#define UNREACHABLE
#define ASSUME(__cond)
#endif
#if defined(_MSC_VER)