OGL: Work around slowdown of glMapBufferRange with SSBO on NVIDIA drivers

Using glMapBufferRange to read back the contents of the SSBO is extremely
slow on NVIDIA drivers. This is more noticeable at higher internal
resolutions. Using glGetBufferSubData instead does not seem to exhibit
this slowdown.
This commit is contained in:
Stenzek 2016-05-11 22:19:59 +10:00
parent 24ea2dc2da
commit 89e54fbd6c
3 changed files with 27 additions and 4 deletions

View File

@ -6,6 +6,7 @@
#include "VideoBackends/OGL/BoundingBox.h" #include "VideoBackends/OGL/BoundingBox.h"
#include "VideoCommon/DriverDetails.h"
#include "VideoCommon/VideoConfig.h" #include "VideoCommon/VideoConfig.h"
static GLuint s_bbox_buffer_id; static GLuint s_bbox_buffer_id;
@ -42,12 +43,25 @@ int BoundingBox::Get(int index)
{ {
int data = 0; int data = 0;
glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id); glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id);
void* ptr = glMapBufferRange(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), GL_MAP_READ_BIT);
if (ptr) if (!DriverDetails::HasBug(DriverDetails::BUG_SLOWGETBUFFERSUBDATA))
{ {
data = *(int*)ptr; // Using glMapBufferRange to read back the contents of the SSBO is extremely slow
glUnmapBuffer(GL_SHADER_STORAGE_BUFFER); // on nVidia drivers. This is more noticeable at higher internal resolutions.
// Using glGetBufferSubData instead does not seem to exhibit this slowdown.
glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), &data);
} }
else
{
// Using glMapBufferRange is faster on AMD cards by a measurable margin.
void* ptr = glMapBufferRange(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), GL_MAP_READ_BIT);
if (ptr)
{
memcpy(&data, ptr, sizeof(int));
glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
}
}
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
return data; return data;
} }

View File

@ -60,6 +60,7 @@ namespace DriverDetails
{OS_WINDOWS,VENDOR_NVIDIA, DRIVER_NVIDIA, Family::UNKNOWN, BUG_BROKENUNSYNCMAPPING, -1.0, -1.0, true}, {OS_WINDOWS,VENDOR_NVIDIA, DRIVER_NVIDIA, Family::UNKNOWN, BUG_BROKENUNSYNCMAPPING, -1.0, -1.0, true},
{OS_LINUX, VENDOR_NVIDIA, DRIVER_NVIDIA, Family::UNKNOWN, BUG_BROKENUNSYNCMAPPING, -1.0, -1.0, true}, {OS_LINUX, VENDOR_NVIDIA, DRIVER_NVIDIA, Family::UNKNOWN, BUG_BROKENUNSYNCMAPPING, -1.0, -1.0, true},
{OS_WINDOWS,VENDOR_INTEL, DRIVER_INTEL, Family::UNKNOWN, BUG_INTELBROKENBUFFERSTORAGE, 101810.3907, 101810.3960, true}, {OS_WINDOWS,VENDOR_INTEL, DRIVER_INTEL, Family::UNKNOWN, BUG_INTELBROKENBUFFERSTORAGE, 101810.3907, 101810.3960, true},
{OS_ALL, VENDOR_ATI, DRIVER_ATI, Family::UNKNOWN, BUG_SLOWGETBUFFERSUBDATA, -1.0, -1.0, true},
}; };
static std::map<Bug, BugInfo> m_bugs; static std::map<Bug, BugInfo> m_bugs;

View File

@ -184,6 +184,14 @@ namespace DriverDetails
// Qualcomm seems to have lots of overhead on exlicit flushing, but the coherent mapping path is fine. // Qualcomm seems to have lots of overhead on exlicit flushing, but the coherent mapping path is fine.
// So let's use coherent mapping there. // So let's use coherent mapping there.
BUG_BROKENEXPLICITFLUSH, BUG_BROKENEXPLICITFLUSH,
// Bug: glGetBufferSubData for bounding box reads is slow on AMD drivers
// Started Version: -1
// Ended Version: -1
// Bounding box reads use glGetBufferSubData to read back the contents of the SSBO, but this is slow on AMD drivers, compared to
// using glMapBufferRange. glMapBufferRange is slower on Nvidia drivers, we suspect due to the first call moving the buffer from
// GPU memory to system memory. Use glMapBufferRange for BBox reads on AMD, and glGetBufferSubData everywhere else.
BUG_SLOWGETBUFFERSUBDATA,
}; };
// Initializes our internal vendor, device family, and driver version // Initializes our internal vendor, device family, and driver version