Merge pull request #461 from degasus/streambuffer_opts

OGL StreamBuffer optimizations
This commit is contained in:
Tony Wasserka 2014-06-05 13:39:42 +02:00
commit 40031c9a72
2 changed files with 55 additions and 58 deletions

View File

@ -22,13 +22,12 @@ static u32 genBuffer()
return id; return id;
} }
StreamBuffer::StreamBuffer(u32 type, size_t size) StreamBuffer::StreamBuffer(u32 type, u32 size)
: m_buffer(genBuffer()), m_buffertype(type), m_size(size) : m_buffer(genBuffer()), m_buffertype(type), m_size(ROUND_UP_POW2(size)), m_bit_per_slot(Log2(ROUND_UP_POW2(size) / SYNC_POINTS))
{ {
m_iterator = 0; m_iterator = 0;
m_used_iterator = 0; m_used_iterator = 0;
m_free_iterator = 0; m_free_iterator = 0;
fences = nullptr;
} }
@ -60,37 +59,35 @@ StreamBuffer::~StreamBuffer()
* As ring buffers have an ugly behavoir on rollover, have fun to read this code ;) * As ring buffers have an ugly behavoir on rollover, have fun to read this code ;)
*/ */
#define SLOT(x) ((x)*SYNC_POINTS/m_size)
static const u32 SYNC_POINTS = 16;
void StreamBuffer::CreateFences() void StreamBuffer::CreateFences()
{ {
fences = new GLsync[SYNC_POINTS]; for (int i=0; i<SYNC_POINTS; i++)
for (u32 i=0; i<SYNC_POINTS; i++) {
fences[i] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); fences[i] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
}
} }
void StreamBuffer::DeleteFences() void StreamBuffer::DeleteFences()
{ {
for (size_t i = SLOT(m_free_iterator) + 1; i < SYNC_POINTS; i++) for (int i = SLOT(m_free_iterator) + 1; i < SYNC_POINTS; i++)
{ {
glDeleteSync(fences[i]); glDeleteSync(fences[i]);
} }
for (size_t i = 0; i < SLOT(m_iterator); i++) for (int i = 0; i < SLOT(m_iterator); i++)
{ {
glDeleteSync(fences[i]); glDeleteSync(fences[i]);
} }
delete [] fences;
} }
void StreamBuffer::AllocMemory(size_t size) void StreamBuffer::AllocMemory(u32 size)
{ {
// insert waiting slots for used memory // insert waiting slots for used memory
for (size_t i = SLOT(m_used_iterator); i < SLOT(m_iterator); i++) for (int i = SLOT(m_used_iterator); i < SLOT(m_iterator); i++)
{ {
fences[i] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); fences[i] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
} }
m_used_iterator = m_iterator; m_used_iterator = m_iterator;
// wait for new slots to end of buffer // wait for new slots to end of buffer
for (size_t i = SLOT(m_free_iterator) + 1; i <= SLOT(m_iterator + size) && i < SYNC_POINTS; i++) for (int i = SLOT(m_free_iterator) + 1; i <= SLOT(m_iterator + size) && i < SYNC_POINTS; i++)
{ {
glClientWaitSync(fences[i], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); glClientWaitSync(fences[i], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED);
glDeleteSync(fences[i]); glDeleteSync(fences[i]);
@ -101,7 +98,7 @@ void StreamBuffer::AllocMemory(size_t size)
if (m_iterator + size >= m_size) { if (m_iterator + size >= m_size) {
// insert waiting slots in unused space at the end of the buffer // insert waiting slots in unused space at the end of the buffer
for (size_t i = SLOT(m_used_iterator); i < SYNC_POINTS; i++) for (int i = SLOT(m_used_iterator); i < SYNC_POINTS; i++)
{ {
fences[i] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); fences[i] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
} }
@ -110,7 +107,7 @@ void StreamBuffer::AllocMemory(size_t size)
m_used_iterator = m_iterator = 0; // offset 0 is always aligned m_used_iterator = m_iterator = 0; // offset 0 is always aligned
// wait for space at the start // wait for space at the start
for (u32 i = 0; i <= SLOT(m_iterator + size); i++) for (int i = 0; i <= SLOT(m_iterator + size); i++)
{ {
glClientWaitSync(fences[i], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); glClientWaitSync(fences[i], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED);
glDeleteSync(fences[i]); glDeleteSync(fences[i]);
@ -118,15 +115,6 @@ void StreamBuffer::AllocMemory(size_t size)
m_free_iterator = m_iterator + size; m_free_iterator = m_iterator + size;
} }
} }
#undef SLOT
void StreamBuffer::Align(u32 stride)
{
if (m_iterator && stride) {
m_iterator--;
m_iterator = m_iterator - (m_iterator % stride) + stride;
}
}
/* The usual way to stream data to the gpu. /* The usual way to stream data to the gpu.
* Described here: https://www.opengl.org/wiki/Buffer_Object_Streaming#Unsynchronized_buffer_mapping * Described here: https://www.opengl.org/wiki/Buffer_Object_Streaming#Unsynchronized_buffer_mapping
@ -138,7 +126,7 @@ void StreamBuffer::Align(u32 stride)
class MapAndOrphan : public StreamBuffer class MapAndOrphan : public StreamBuffer
{ {
public: public:
MapAndOrphan(u32 type, size_t size) : StreamBuffer(type, size) { MapAndOrphan(u32 type, u32 size) : StreamBuffer(type, size) {
glBindBuffer(m_buffertype, m_buffer); glBindBuffer(m_buffertype, m_buffer);
glBufferData(m_buffertype, m_size, nullptr, GL_STREAM_DRAW); glBufferData(m_buffertype, m_size, nullptr, GL_STREAM_DRAW);
} }
@ -146,8 +134,7 @@ public:
~MapAndOrphan() { ~MapAndOrphan() {
} }
std::pair<u8*, size_t> Map(size_t size, u32 stride) override { std::pair<u8*, u32> Map(u32 size) override {
Align(stride);
if (m_iterator + size >= m_size) { if (m_iterator + size >= m_size) {
glBufferData(m_buffertype, m_size, nullptr, GL_STREAM_DRAW); glBufferData(m_buffertype, m_size, nullptr, GL_STREAM_DRAW);
m_iterator = 0; m_iterator = 0;
@ -157,7 +144,7 @@ public:
return std::make_pair(pointer, m_iterator); return std::make_pair(pointer, m_iterator);
} }
void Unmap(size_t used_size) override { void Unmap(u32 used_size) override {
glFlushMappedBufferRange(m_buffertype, 0, used_size); glFlushMappedBufferRange(m_buffertype, 0, used_size);
glUnmapBuffer(m_buffertype); glUnmapBuffer(m_buffertype);
m_iterator += used_size; m_iterator += used_size;
@ -174,7 +161,7 @@ public:
class MapAndSync : public StreamBuffer class MapAndSync : public StreamBuffer
{ {
public: public:
MapAndSync(u32 type, size_t size) : StreamBuffer(type, size) { MapAndSync(u32 type, u32 size) : StreamBuffer(type, size) {
CreateFences(); CreateFences();
glBindBuffer(m_buffertype, m_buffer); glBindBuffer(m_buffertype, m_buffer);
glBufferData(m_buffertype, m_size, nullptr, GL_STREAM_DRAW); glBufferData(m_buffertype, m_size, nullptr, GL_STREAM_DRAW);
@ -184,15 +171,14 @@ public:
DeleteFences(); DeleteFences();
} }
std::pair<u8*, size_t> Map(size_t size, u32 stride) override { std::pair<u8*, u32> Map(u32 size) override {
Align(stride);
AllocMemory(size); AllocMemory(size);
u8* pointer = (u8*)glMapBufferRange(m_buffertype, m_iterator, size, u8* pointer = (u8*)glMapBufferRange(m_buffertype, m_iterator, size,
GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT | GL_MAP_UNSYNCHRONIZED_BIT); GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
return std::make_pair(pointer, m_iterator); return std::make_pair(pointer, m_iterator);
} }
void Unmap(size_t used_size) override { void Unmap(u32 used_size) override {
glFlushMappedBufferRange(m_buffertype, 0, used_size); glFlushMappedBufferRange(m_buffertype, 0, used_size);
glUnmapBuffer(m_buffertype); glUnmapBuffer(m_buffertype);
m_iterator += used_size; m_iterator += used_size;
@ -215,7 +201,7 @@ public:
class BufferStorage : public StreamBuffer class BufferStorage : public StreamBuffer
{ {
public: public:
BufferStorage(u32 type, size_t size) : StreamBuffer(type, size) { BufferStorage(u32 type, u32 size) : StreamBuffer(type, size) {
CreateFences(); CreateFences();
glBindBuffer(m_buffertype, m_buffer); glBindBuffer(m_buffertype, m_buffer);
@ -234,13 +220,12 @@ public:
glBindBuffer(m_buffertype, 0); glBindBuffer(m_buffertype, 0);
} }
std::pair<u8*, size_t> Map(size_t size, u32 stride) override { std::pair<u8*, u32> Map(u32 size) override {
Align(stride);
AllocMemory(size); AllocMemory(size);
return std::make_pair(m_pointer + m_iterator, m_iterator); return std::make_pair(m_pointer + m_iterator, m_iterator);
} }
void Unmap(size_t used_size) override { void Unmap(u32 used_size) override {
glFlushMappedBufferRange(m_buffertype, m_iterator, used_size); glFlushMappedBufferRange(m_buffertype, m_iterator, used_size);
m_iterator += used_size; m_iterator += used_size;
} }
@ -258,7 +243,7 @@ public:
class PinnedMemory : public StreamBuffer class PinnedMemory : public StreamBuffer
{ {
public: public:
PinnedMemory(u32 type, size_t size) : StreamBuffer(type, size) { PinnedMemory(u32 type, u32 size) : StreamBuffer(type, size) {
CreateFences(); CreateFences();
m_pointer = (u8*)AllocateAlignedMemory(ROUND_UP(m_size,ALIGN_PINNED_MEMORY), ALIGN_PINNED_MEMORY ); m_pointer = (u8*)AllocateAlignedMemory(ROUND_UP(m_size,ALIGN_PINNED_MEMORY), ALIGN_PINNED_MEMORY );
glBindBuffer(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, m_buffer); glBindBuffer(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, m_buffer);
@ -275,13 +260,12 @@ public:
m_pointer = nullptr; m_pointer = nullptr;
} }
std::pair<u8*, size_t> Map(size_t size, u32 stride) override { std::pair<u8*, u32> Map(u32 size) override {
Align(stride);
AllocMemory(size); AllocMemory(size);
return std::make_pair(m_pointer + m_iterator, m_iterator); return std::make_pair(m_pointer + m_iterator, m_iterator);
} }
void Unmap(size_t used_size) override { void Unmap(u32 used_size) override {
m_iterator += used_size; m_iterator += used_size;
} }
@ -297,7 +281,7 @@ public:
class BufferSubData : public StreamBuffer class BufferSubData : public StreamBuffer
{ {
public: public:
BufferSubData(u32 type, size_t size) : StreamBuffer(type, size) { BufferSubData(u32 type, u32 size) : StreamBuffer(type, size) {
glBindBuffer(m_buffertype, m_buffer); glBindBuffer(m_buffertype, m_buffer);
glBufferData(m_buffertype, size, nullptr, GL_STATIC_DRAW); glBufferData(m_buffertype, size, nullptr, GL_STATIC_DRAW);
m_pointer = new u8[m_size]; m_pointer = new u8[m_size];
@ -307,11 +291,11 @@ public:
delete [] m_pointer; delete [] m_pointer;
} }
std::pair<u8*, size_t> Map(size_t size, u32 stride) override { std::pair<u8*, u32> Map(u32 size) override {
return std::make_pair(m_pointer, 0); return std::make_pair(m_pointer, 0);
} }
void Unmap(size_t used_size) override { void Unmap(u32 used_size) override {
glBufferSubData(m_buffertype, 0, used_size, m_pointer); glBufferSubData(m_buffertype, 0, used_size, m_pointer);
} }
@ -326,7 +310,7 @@ public:
class BufferData : public StreamBuffer class BufferData : public StreamBuffer
{ {
public: public:
BufferData(u32 type, size_t size) : StreamBuffer(type, size) { BufferData(u32 type, u32 size) : StreamBuffer(type, size) {
glBindBuffer(m_buffertype, m_buffer); glBindBuffer(m_buffertype, m_buffer);
m_pointer = new u8[m_size]; m_pointer = new u8[m_size];
} }
@ -335,11 +319,11 @@ public:
delete [] m_pointer; delete [] m_pointer;
} }
std::pair<u8*, size_t> Map(size_t size, u32 stride) override { std::pair<u8*, u32> Map(u32 size) override {
return std::make_pair(m_pointer, 0); return std::make_pair(m_pointer, 0);
} }
void Unmap(size_t used_size) override { void Unmap(u32 used_size) override {
glBufferData(m_buffertype, used_size, m_pointer, GL_STREAM_DRAW); glBufferData(m_buffertype, used_size, m_pointer, GL_STREAM_DRAW);
} }
@ -347,7 +331,7 @@ public:
}; };
// choose best streaming library based on the supported extensions and known issues // choose best streaming library based on the supported extensions and known issues
StreamBuffer* StreamBuffer::Create(u32 type, size_t size) StreamBuffer* StreamBuffer::Create(u32 type, u32 size)
{ {
// without basevertex support, only streaming methods whith uploads everything to zero works fine: // without basevertex support, only streaming methods whith uploads everything to zero works fine:
if (!g_ogl_config.bSupportsGLBaseVertex) if (!g_ogl_config.bSupportsGLBaseVertex)

View File

@ -15,7 +15,7 @@ namespace OGL
class StreamBuffer { class StreamBuffer {
public: public:
static StreamBuffer* Create(u32 type, size_t size); static StreamBuffer* Create(u32 type, u32 size);
virtual ~StreamBuffer(); virtual ~StreamBuffer();
/* This mapping function will return a pair of: /* This mapping function will return a pair of:
@ -26,27 +26,40 @@ public:
* Mapping invalidates the current buffer content, * Mapping invalidates the current buffer content,
* so it isn't allowed to access the old content any more. * so it isn't allowed to access the old content any more.
*/ */
virtual std::pair<u8*, size_t> Map(size_t size, u32 stride = 0) = 0; virtual std::pair<u8*, u32> Map(u32 size) = 0;
virtual void Unmap(size_t used_size) = 0; virtual void Unmap(u32 used_size) = 0;
inline std::pair<u8*, u32> Map(u32 size, u32 stride)
{
u32 padding = m_iterator % stride;
if (padding)
{
m_iterator += stride - padding;
}
return Map(size);
}
const u32 m_buffer; const u32 m_buffer;
protected: protected:
StreamBuffer(u32 type, size_t size); StreamBuffer(u32 type, u32 size);
void CreateFences(); void CreateFences();
void DeleteFences(); void DeleteFences();
void AllocMemory(size_t size); void AllocMemory(u32 size);
void Align(u32 stride);
const u32 m_buffertype; const u32 m_buffertype;
const size_t m_size; const u32 m_size;
size_t m_iterator; u32 m_iterator;
size_t m_used_iterator; u32 m_used_iterator;
size_t m_free_iterator; u32 m_free_iterator;
private: private:
GLsync *fences; static const int SYNC_POINTS = 16;
inline int SLOT(u32 x) const { return x >> m_bit_per_slot; }
const int m_bit_per_slot;
GLsync fences[SYNC_POINTS];
}; };
} }