From 6393f157f0c2bb89b93cde43fc5d8b07fd0c7719 Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Sat, 29 May 2021 16:50:33 +1000 Subject: [PATCH] OGL: Force memory barrier when reading back bounding box values We also need to ensure the the CPU does not receive stale values which have been updated by the GPU. Apparently the buffer here is not coherent on NVIDIA drivers. Not sure if this is a driver bug/spec violation or not, one would think that glGetBufferSubData() would invalidate any caches as needed, but this path is only used on NVIDIA anyway, so it's fine. A point to note is that according to ARB_debug_report, it's moved from video to host memory, which would explain why it needs the cache invalidate. --- Source/Core/VideoBackends/OGL/OGLBoundingBox.cpp | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/Source/Core/VideoBackends/OGL/OGLBoundingBox.cpp b/Source/Core/VideoBackends/OGL/OGLBoundingBox.cpp index cee200d4ad..4d79785645 100644 --- a/Source/Core/VideoBackends/OGL/OGLBoundingBox.cpp +++ b/Source/Core/VideoBackends/OGL/OGLBoundingBox.cpp @@ -87,12 +87,21 @@ void BoundingBox::Flush() void BoundingBox::Readback() { glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id); + + // Using glMapBufferRange to read back the contents of the SSBO is extremely slow + // on nVidia drivers. This is more noticeable at higher internal resolutions. + // Using glGetBufferSubData instead does not seem to exhibit this slowdown. if (!DriverDetails::HasBug(DriverDetails::BUG_SLOW_GETBUFFERSUBDATA) && !static_cast(g_renderer.get())->IsGLES()) { - // Using glMapBufferRange to read back the contents of the SSBO is extremely slow - // on nVidia drivers. This is more noticeable at higher internal resolutions. - // Using glGetBufferSubData instead does not seem to exhibit this slowdown. + // We also need to ensure the the CPU does not receive stale values which have been updated by + // the GPU. Apparently the buffer here is not coherent on NVIDIA drivers. Not sure if this is a + // driver bug/spec violation or not, one would think that glGetBufferSubData() would invalidate + // any caches as needed, but this path is only used on NVIDIA anyway, so it's fine. A point to + // note is that according to ARB_debug_report, it's moved from video to host memory, which would + // explain why it needs the cache invalidate. + glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); + std::array gpu_values; glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(s32) * NUM_BBOX_VALUES, gpu_values.data());