// Copyright 2013 Dolphin Emulator Project // Licensed under GPLv2 // Refer to the license.txt file included. #include "Globals.h" #include "GLUtil.h" #include "StreamBuffer.h" #include "MemoryUtil.h" #include "Render.h" #include "DriverDetails.h" #include "OnScreenDisplay.h" namespace OGL { static const u32 SYNC_POINTS = 16; static const u32 ALIGN_PINNED_MEMORY = 4096; StreamBuffer::StreamBuffer(u32 type, size_t size, StreamType uploadType) : m_uploadtype(uploadType), m_buffertype(type), m_size(size) { glGenBuffers(1, &m_buffer); bool nvidia = !strcmp(g_ogl_config.gl_vendor, "NVIDIA Corporation"); if(m_uploadtype & STREAM_DETECT) { // TODO: move this to InitBackendInfo if(g_ActiveConfig.bHackedBufferUpload && DriverDetails::HasBug(DriverDetails::BUG_BROKENHACKEDBUFFER)) { OSD::AddMessage("Vertex Streaming Hack isn't supported by your GPU.", 10000); g_ActiveConfig.bHackedBufferUpload = false; g_Config.bHackedBufferUpload = false; } if (g_ogl_config.bSupportsGLBufferStorage && !(DriverDetails::HasBug(DriverDetails::BUG_BROKENBUFFERSTORAGE) && type == GL_ARRAY_BUFFER) && (m_uploadtype & BUFFERSTORAGE)) m_uploadtype = BUFFERSTORAGE; else if(!g_ogl_config.bSupportsGLBaseVertex && (m_uploadtype & BUFFERSUBDATA) && !DriverDetails::HasBug(DriverDetails::BUG_BROKENBUFFERSTREAM)) m_uploadtype = BUFFERSUBDATA; else if(!g_ogl_config.bSupportsGLBaseVertex && (m_uploadtype & BUFFERDATA)) m_uploadtype = BUFFERDATA; else if(g_ogl_config.bSupportsGLSync && g_ActiveConfig.bHackedBufferUpload && (m_uploadtype & MAP_AND_RISK)) m_uploadtype = MAP_AND_RISK; else if(g_ogl_config.bSupportsGLSync && g_ogl_config.bSupportsGLPinnedMemory && (m_uploadtype & PINNED_MEMORY)) m_uploadtype = PINNED_MEMORY; else if(nvidia && (m_uploadtype & BUFFERSUBDATA)) m_uploadtype = BUFFERSUBDATA; else if(g_ogl_config.bSupportsGLSync && (m_uploadtype & MAP_AND_SYNC)) m_uploadtype = MAP_AND_SYNC; else m_uploadtype = MAP_AND_ORPHAN; } Init(); } StreamBuffer::~StreamBuffer() { Shutdown(); glDeleteBuffers(1, &m_buffer); } #define SLOT(x) ((x)*SYNC_POINTS/m_size) void StreamBuffer::Alloc ( size_t size, u32 stride ) { size_t m_iterator_aligned = m_iterator; if(m_iterator_aligned && stride) { m_iterator_aligned--; m_iterator_aligned = m_iterator_aligned - (m_iterator_aligned % stride) + stride; } size_t iter_end = m_iterator_aligned + size; switch(m_uploadtype) { case MAP_AND_ORPHAN: if(iter_end >= m_size) { glBufferData(m_buffertype, m_size, NULL, GL_STREAM_DRAW); m_iterator_aligned = 0; } break; case MAP_AND_SYNC: case PINNED_MEMORY: case BUFFERSTORAGE: // insert waiting slots for used memory for (size_t i = SLOT(m_used_iterator); i < SLOT(m_iterator); i++) { fences[i] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); } m_used_iterator = m_iterator; // wait for new slots to end of buffer for (size_t i = SLOT(m_free_iterator) + 1; i <= SLOT(iter_end) && i < SYNC_POINTS; i++) { glClientWaitSync(fences[i], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); glDeleteSync(fences[i]); } m_free_iterator = iter_end; // if buffer is full if (iter_end >= m_size) { // insert waiting slots in unused space at the end of the buffer for (size_t i = SLOT(m_used_iterator); i < SYNC_POINTS; i++) { fences[i] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); } // move to the start m_used_iterator = m_iterator_aligned = m_iterator = 0; // offset 0 is always aligned iter_end = size; // wait for space at the start for (u32 i = 0; i <= SLOT(iter_end); i++) { glClientWaitSync(fences[i], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); glDeleteSync(fences[i]); } m_free_iterator = iter_end; } break; case MAP_AND_RISK: if(iter_end >= m_size) { m_iterator_aligned = 0; } break; case BUFFERSUBDATA: case BUFFERDATA: m_iterator_aligned = 0; break; case STREAM_DETECT: case DETECT_MASK: // To shutup compiler warnings break; } m_iterator = m_iterator_aligned; } size_t StreamBuffer::Upload ( u8* data, size_t size ) { switch(m_uploadtype) { case MAP_AND_SYNC: case MAP_AND_ORPHAN: pointer = (u8*)glMapBufferRange(m_buffertype, m_iterator, size, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT); if(pointer) { memcpy(pointer, data, size); glUnmapBuffer(m_buffertype); } else { ERROR_LOG(VIDEO, "Buffer mapping failed"); } break; case PINNED_MEMORY: case MAP_AND_RISK: case BUFFERSTORAGE: if (pointer) memcpy(pointer + m_iterator, data, size); break; case BUFFERSUBDATA: glBufferSubData(m_buffertype, m_iterator, size, data); break; case BUFFERDATA: glBufferData(m_buffertype, size, data, GL_STREAM_DRAW); break; case STREAM_DETECT: case DETECT_MASK: // To shutup compiler warnings break; } size_t ret = m_iterator; m_iterator += size; return ret; } void StreamBuffer::Init() { m_iterator = 0; m_used_iterator = 0; m_free_iterator = 0; switch(m_uploadtype) { case MAP_AND_SYNC: fences = new GLsync[SYNC_POINTS]; for(u32 i=0; i<SYNC_POINTS; i++) fences[i] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); case MAP_AND_ORPHAN: case BUFFERSUBDATA: glBindBuffer(m_buffertype, m_buffer); glBufferData(m_buffertype, m_size, NULL, GL_STREAM_DRAW); break; case PINNED_MEMORY: glGetError(); // errors before this allocation should be ignored fences = new GLsync[SYNC_POINTS]; for(u32 i=0; i<SYNC_POINTS; i++) fences[i] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); pointer = (u8*)AllocateAlignedMemory(ROUND_UP(m_size,ALIGN_PINNED_MEMORY), ALIGN_PINNED_MEMORY ); glBindBuffer(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, m_buffer); glBufferData(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, ROUND_UP(m_size,ALIGN_PINNED_MEMORY), pointer, GL_STREAM_COPY); glBindBuffer(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, 0); glBindBuffer(m_buffertype, m_buffer); // on error, switch to another backend. some old catalyst seems to have broken pinned memory support if(glGetError() != GL_NO_ERROR) { ERROR_LOG(VIDEO, "Pinned memory detected, but not working. Please report this: %s, %s, %s", g_ogl_config.gl_vendor, g_ogl_config.gl_renderer, g_ogl_config.gl_version); Shutdown(); m_uploadtype = MAP_AND_SYNC; Init(); } break; case BUFFERSTORAGE: glGetError(); // errors before this allocation should be ignored fences = new GLsync[SYNC_POINTS]; for (u32 i = 0; i<SYNC_POINTS; i++) fences[i] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); glBindBuffer(m_buffertype, m_buffer); // PERSISTANT_BIT to make sure that the buffer can be used while mapped // COHERENT_BIT is set so we don't have to use a MemoryBarrier on write // CLIENT_STORAGE_BIT is set since we access the buffer more frequently on the client side then server side glBufferStorage(m_buffertype, m_size, NULL, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT | GL_CLIENT_STORAGE_BIT); pointer = (u8*)glMapBufferRange(m_buffertype, 0, m_size, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT); if(!pointer) ERROR_LOG(VIDEO, "Buffer allocation failed"); break; case MAP_AND_RISK: glBindBuffer(m_buffertype, m_buffer); glBufferData(m_buffertype, m_size, NULL, GL_STREAM_DRAW); pointer = (u8*)glMapBufferRange(m_buffertype, 0, m_size, GL_MAP_WRITE_BIT); glUnmapBuffer(m_buffertype); if(!pointer) ERROR_LOG(VIDEO, "Buffer allocation failed"); break; case BUFFERDATA: glBindBuffer(m_buffertype, m_buffer); break; case STREAM_DETECT: case DETECT_MASK: // To shutup compiler warnings break; } } void StreamBuffer::Shutdown() { switch(m_uploadtype) { case MAP_AND_SYNC: DeleteFences(); break; case MAP_AND_RISK: case MAP_AND_ORPHAN: case BUFFERSUBDATA: case BUFFERDATA: break; case PINNED_MEMORY: DeleteFences(); glBindBuffer(m_buffertype, 0); glFinish(); // ogl pipeline must be flushed, else this buffer can be in use FreeAlignedMemory(pointer); break; case BUFFERSTORAGE: DeleteFences(); glUnmapBuffer(m_buffertype); glBindBuffer(m_buffertype, 0); glFinish(); // ogl pipeline must be flushed, else this buffer can be in use break; case STREAM_DETECT: case DETECT_MASK: // To shutup compiler warnings break; } } void StreamBuffer::DeleteFences() { for (size_t i = SLOT(m_free_iterator) + 1; i < SYNC_POINTS; i++) { glDeleteSync(fences[i]); } for (size_t i = 0; i < SLOT(m_iterator); i++) { glDeleteSync(fences[i]); } delete [] fences; } }