From cd502990fa63e63c727d99ebf0defb3cf40583f9 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Thu, 20 Jul 2017 15:25:29 +1000 Subject: [PATCH] OGL: Uber shader support --- .../VideoBackends/OGL/NativeVertexFormat.cpp | 3 +- .../VideoBackends/OGL/ProgramShaderCache.cpp | 921 ++++++++++++++---- .../VideoBackends/OGL/ProgramShaderCache.h | 140 ++- Source/Core/VideoBackends/OGL/Render.cpp | 4 +- Source/Core/VideoBackends/OGL/Render.h | 1 + .../Core/VideoBackends/OGL/VertexManager.cpp | 31 +- Source/Core/VideoBackends/OGL/VertexManager.h | 1 - Source/Core/VideoCommon/DriverDetails.cpp | 5 +- Source/Core/VideoCommon/DriverDetails.h | 6 + 9 files changed, 857 insertions(+), 255 deletions(-) diff --git a/Source/Core/VideoBackends/OGL/NativeVertexFormat.cpp b/Source/Core/VideoBackends/OGL/NativeVertexFormat.cpp index 69a6229741..d3b98cd3a7 100644 --- a/Source/Core/VideoBackends/OGL/NativeVertexFormat.cpp +++ b/Source/Core/VideoBackends/OGL/NativeVertexFormat.cpp @@ -57,6 +57,7 @@ GLVertexFormat::GLVertexFormat(const PortableVertexDeclaration& _vtx_decl) glGenVertexArrays(1, &VAO); glBindVertexArray(VAO); + ProgramShaderCache::BindVertexFormat(this); // the element buffer is bound directly to the vao, so we must it set for every vao glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, vm->m_index_buffers); @@ -74,8 +75,6 @@ GLVertexFormat::GLVertexFormat(const PortableVertexDeclaration& _vtx_decl) SetPointer(SHADER_TEXTURE0_ATTRIB + i, vertex_stride, _vtx_decl.texcoords[i]); SetPointer(SHADER_POSMTX_ATTRIB, vertex_stride, _vtx_decl.posmtx); - - vm->m_last_vao = VAO; } GLVertexFormat::~GLVertexFormat() diff --git a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp index 67e335f515..e26fae7d4b 100644 --- a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp +++ b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp @@ -4,45 +4,62 @@ #include "VideoBackends/OGL/ProgramShaderCache.h" +#include #include #include #include "Common/Align.h" #include "Common/CommonTypes.h" #include "Common/FileUtil.h" +#include "Common/GL/GLInterfaceBase.h" #include "Common/Logging/Log.h" #include "Common/MsgHandler.h" #include "Common/StringUtil.h" +#include "Common/Timer.h" #include "Core/ConfigManager.h" +#include "Core/Host.h" #include "VideoBackends/OGL/Render.h" #include "VideoBackends/OGL/StreamBuffer.h" +#include "VideoBackends/OGL/VertexManager.h" +#include "VideoCommon/AsyncShaderCompiler.h" #include "VideoCommon/Debugger.h" +#include "VideoCommon/DriverDetails.h" #include "VideoCommon/GeometryShaderManager.h" #include "VideoCommon/ImageWrite.h" #include "VideoCommon/PixelShaderManager.h" #include "VideoCommon/Statistics.h" +#include "VideoCommon/UberShaderPixel.h" +#include "VideoCommon/UberShaderVertex.h" +#include "VideoCommon/VertexLoaderManager.h" #include "VideoCommon/VertexShaderManager.h" #include "VideoCommon/VideoCommon.h" namespace OGL { -static const u32 UBO_LENGTH = 32 * 1024 * 1024; +static constexpr u32 UBO_LENGTH = 32 * 1024 * 1024; +static constexpr u32 INVALID_VAO = std::numeric_limits::max(); +std::unique_ptr + ProgramShaderCache::s_async_compiler; u32 ProgramShaderCache::s_ubo_buffer_size; s32 ProgramShaderCache::s_ubo_align; +u32 ProgramShaderCache::s_last_VAO = INVALID_VAO; static std::unique_ptr s_buffer; static int num_failures = 0; -static LinearDiskCache g_program_disk_cache; +static LinearDiskCache s_program_disk_cache; +static LinearDiskCache s_uber_program_disk_cache; static GLuint CurrentProgram = 0; ProgramShaderCache::PCache ProgramShaderCache::pshaders; +ProgramShaderCache::UberPCache ProgramShaderCache::ubershaders; ProgramShaderCache::PCacheEntry* ProgramShaderCache::last_entry; +ProgramShaderCache::PCacheEntry* ProgramShaderCache::last_uber_entry; SHADERUID ProgramShaderCache::last_uid; - +UBERSHADERUID ProgramShaderCache::last_uber_uid; static std::string s_glsl_header = ""; static std::string GetGLSLVersionString() @@ -85,6 +102,7 @@ void SHADER::SetProgramVariables() GLint PSBlock_id = glGetUniformBlockIndex(glprogid, "PSBlock"); GLint VSBlock_id = glGetUniformBlockIndex(glprogid, "VSBlock"); GLint GSBlock_id = glGetUniformBlockIndex(glprogid, "GSBlock"); + GLint UBERBlock_id = glGetUniformBlockIndex(glprogid, "UBERBlock"); if (PSBlock_id != -1) glUniformBlockBinding(glprogid, PSBlock_id, 1); @@ -92,6 +110,8 @@ void SHADER::SetProgramVariables() glUniformBlockBinding(glprogid, VSBlock_id, 2); if (GSBlock_id != -1) glUniformBlockBinding(glprogid, GSBlock_id, 3); + if (UBERBlock_id != -1) + glUniformBlockBinding(glprogid, UBERBlock_id, 4); // Bind Texture Samplers for (int a = 0; a <= 9; ++a) @@ -148,6 +168,25 @@ void SHADER::Bind() const } } +void SHADER::DestroyShaders() +{ + if (vsid) + { + glDeleteShader(vsid); + vsid = 0; + } + if (gsid) + { + glDeleteShader(gsid); + gsid = 0; + } + if (psid) + { + glDeleteShader(psid); + psid = 0; + } +} + void ProgramShaderCache::UploadConstants() { if (PixelShaderManager::dirty || VertexShaderManager::dirty || GeometryShaderManager::dirty) @@ -182,68 +221,136 @@ void ProgramShaderCache::UploadConstants() } } -SHADER* ProgramShaderCache::SetShader(u32 primitive_type) +SHADER* ProgramShaderCache::SetShader(u32 primitive_type, const GLVertexFormat* vertex_format) { + if (g_ActiveConfig.bDisableSpecializedShaders && g_ActiveConfig.CanUseUberShaders()) + return SetUberShader(primitive_type, vertex_format); + SHADERUID uid; - GetShaderId(&uid, primitive_type); + std::memset(&uid, 0, sizeof(uid)); + uid.puid = GetPixelShaderUid(); + uid.vuid = GetVertexShaderUid(); + uid.guid = GetGeometryShaderUid(primitive_type); // Check if the shader is already set - if (last_entry) + if (last_entry && uid == last_uid) { - if (uid == last_uid) - { - GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true); - last_entry->shader.Bind(); - return &last_entry->shader; - } + last_entry->shader.Bind(); + BindVertexFormat(vertex_format); + return &last_entry->shader; } - last_uid = uid; - // Check if shader is already in cache - PCache::iterator iter = pshaders.find(uid); + auto iter = pshaders.find(uid); if (iter != pshaders.end()) { PCacheEntry* entry = &iter->second; - last_entry = entry; + if (entry->pending) + return SetUberShader(primitive_type, vertex_format); - GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true); + last_uid = uid; + last_entry = entry; + BindVertexFormat(vertex_format); last_entry->shader.Bind(); return &last_entry->shader; } - // Make an entry in the table + // Compile the new shader program. PCacheEntry& newentry = pshaders[uid]; - last_entry = &newentry; - newentry.in_cache = 0; + newentry.in_cache = false; + newentry.pending = false; + // Can we background compile this shader? Requires background shader compiling to be enabled, + // and all ubershaders to have been successfully compiled. + if (g_ActiveConfig.CanBackgroundCompileShaders() && !ubershaders.empty() && s_async_compiler) + { + newentry.pending = true; + s_async_compiler->QueueWorkItem(s_async_compiler->CreateWorkItem(uid)); + return SetUberShader(primitive_type, vertex_format); + } + + // Synchronous shader compiling. ShaderHostConfig host_config = ShaderHostConfig::GetCurrent(); - ShaderCode vcode = GenerateVertexShaderCode(APIType::OpenGL, host_config, uid.vuid.GetUidData()); - ShaderCode pcode = GeneratePixelShaderCode(APIType::OpenGL, host_config, uid.puid.GetUidData()); + ShaderCode vcode; + if (!g_ActiveConfig.bForceVertexUberShaders) + vcode = GenerateVertexShaderCode(APIType::OpenGL, host_config, uid.vuid.GetUidData()); + else + vcode = UberShader::GenVertexShader(APIType::OpenGL, host_config, + UberShader::GetVertexShaderUid().GetUidData()); + ShaderCode pcode; + if (!g_ActiveConfig.bForcePixelUberShaders) + pcode = GeneratePixelShaderCode(APIType::OpenGL, host_config, uid.puid.GetUidData()); + else + pcode = UberShader::GenPixelShader(APIType::OpenGL, host_config, + UberShader::GetPixelShaderUid().GetUidData()); + ShaderCode gcode; if (g_ActiveConfig.backend_info.bSupportsGeometryShaders && !uid.guid.GetUidData()->IsPassthrough()) gcode = GenerateGeometryShaderCode(APIType::OpenGL, host_config, uid.guid.GetUidData()); -#if defined(_DEBUG) || defined(DEBUGFAST) - if (g_ActiveConfig.iLog & CONF_SAVESHADERS) + if (!CompileShader(newentry.shader, vcode.GetBuffer(), pcode.GetBuffer(), gcode.GetBuffer())) + return nullptr; + + INCSTAT(stats.numPixelShadersCreated); + SETSTAT(stats.numPixelShadersAlive, pshaders.size()); + + last_uid = uid; + last_entry = &newentry; + BindVertexFormat(vertex_format); + last_entry->shader.Bind(); + return &last_entry->shader; +} + +SHADER* ProgramShaderCache::SetUberShader(u32 primitive_type, const GLVertexFormat* vertex_format) +{ + UBERSHADERUID uid; + std::memset(&uid, 0, sizeof(uid)); + uid.puid = UberShader::GetPixelShaderUid(); + uid.vuid = UberShader::GetVertexShaderUid(); + uid.guid = GetGeometryShaderUid(primitive_type); + + // We need to use the ubershader vertex format with all attributes enabled. + // Otherwise, the NV driver can generate variants for the vertex shaders. + const GLVertexFormat* uber_vertex_format = static_cast( + VertexLoaderManager::GetUberVertexFormat(vertex_format->GetVertexDeclaration())); + + // Check if the shader is already set + if (last_uber_entry && last_uber_uid == uid) { - static int counter = 0; - std::string filename = - StringFromFormat("%svs_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), counter++); - SaveData(filename, vcode.GetBuffer()); - - filename = StringFromFormat("%sps_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), counter++); - SaveData(filename, pcode.GetBuffer()); - - if (!gcode.GetBuffer().empty()) - { - filename = - StringFromFormat("%sgs_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), counter++); - SaveData(filename, gcode.GetBuffer()); - } + BindVertexFormat(uber_vertex_format); + last_uber_entry->shader.Bind(); + return &last_uber_entry->shader; + } + + // Check if shader is already in cache + auto iter = ubershaders.find(uid); + if (iter != ubershaders.end()) + { + PCacheEntry* entry = &iter->second; + last_uber_uid = uid; + last_uber_entry = entry; + BindVertexFormat(uber_vertex_format); + last_uber_entry->shader.Bind(); + return &last_uber_entry->shader; + } + + // Make an entry in the table + PCacheEntry& newentry = ubershaders[uid]; + newentry.in_cache = false; + newentry.pending = false; + + ShaderHostConfig host_config = ShaderHostConfig::GetCurrent(); + ShaderCode vcode = + UberShader::GenVertexShader(APIType::OpenGL, host_config, uid.vuid.GetUidData()); + ShaderCode pcode = + UberShader::GenPixelShader(APIType::OpenGL, host_config, uid.puid.GetUidData()); + ShaderCode gcode; + if (g_ActiveConfig.backend_info.bSupportsGeometryShaders && + !uid.guid.GetUidData()->IsPassthrough()) + { + gcode = GenerateGeometryShaderCode(APIType::OpenGL, host_config, uid.guid.GetUidData()); } -#endif if (!CompileShader(newentry.shader, vcode.GetBuffer(), pcode.GetBuffer(), gcode.GetBuffer())) { @@ -251,93 +358,77 @@ SHADER* ProgramShaderCache::SetShader(u32 primitive_type) return nullptr; } - INCSTAT(stats.numPixelShadersCreated); - SETSTAT(stats.numPixelShadersAlive, pshaders.size()); - GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true); - - last_entry->shader.Bind(); - return &last_entry->shader; + last_uber_uid = uid; + last_uber_entry = &newentry; + BindVertexFormat(uber_vertex_format); + last_uber_entry->shader.Bind(); + return &last_uber_entry->shader; } bool ProgramShaderCache::CompileShader(SHADER& shader, const std::string& vcode, const std::string& pcode, const std::string& gcode) { - GLuint vsid = CompileSingleShader(GL_VERTEX_SHADER, vcode); - GLuint psid = CompileSingleShader(GL_FRAGMENT_SHADER, pcode); +#if defined(_DEBUG) || defined(DEBUGFAST) + if (g_ActiveConfig.iLog & CONF_SAVESHADERS) + { + static int counter = 0; + std::string filename = + StringFromFormat("%svs_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), counter++); + SaveData(filename, vcode.c_str()); + + filename = StringFromFormat("%sps_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), counter++); + SaveData(filename, pcode.c_str()); + + if (!gcode.empty()) + { + filename = + StringFromFormat("%sgs_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), counter++); + SaveData(filename, gcode.c_str()); + } + } +#endif + + shader.vsid = CompileSingleShader(GL_VERTEX_SHADER, vcode); + shader.psid = CompileSingleShader(GL_FRAGMENT_SHADER, pcode); // Optional geometry shader - GLuint gsid = 0; + shader.gsid = 0; if (!gcode.empty()) - gsid = CompileSingleShader(GL_GEOMETRY_SHADER, gcode); + shader.gsid = CompileSingleShader(GL_GEOMETRY_SHADER, gcode); - if (!vsid || !psid || (!gcode.empty() && !gsid)) + if (!shader.vsid || !shader.psid || (!gcode.empty() && !shader.gsid)) { - glDeleteShader(vsid); - glDeleteShader(psid); - glDeleteShader(gsid); + shader.Destroy(); return false; } - GLuint pid = shader.glprogid = glCreateProgram(); + // Create and link the program. + shader.glprogid = glCreateProgram(); - glAttachShader(pid, vsid); - glAttachShader(pid, psid); - if (gsid) - glAttachShader(pid, gsid); + glAttachShader(shader.glprogid, shader.vsid); + glAttachShader(shader.glprogid, shader.psid); + if (shader.gsid) + glAttachShader(shader.glprogid, shader.gsid); if (g_ogl_config.bSupportsGLSLCache) - glProgramParameteri(pid, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE); + glProgramParameteri(shader.glprogid, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE); shader.SetProgramBindings(false); - glLinkProgram(pid); + glLinkProgram(shader.glprogid); - // original shaders aren't needed any more - glDeleteShader(vsid); - glDeleteShader(psid); - glDeleteShader(gsid); - - GLint linkStatus; - glGetProgramiv(pid, GL_LINK_STATUS, &linkStatus); - GLsizei length = 0; - glGetProgramiv(pid, GL_INFO_LOG_LENGTH, &length); - if (linkStatus != GL_TRUE || (length > 1 && DEBUG_GLSL)) + if (!CheckProgramLinkResult(shader.glprogid, vcode, pcode, gcode)) { - std::string info_log; - info_log.resize(length); - glGetProgramInfoLog(pid, length, &length, &info_log[0]); - ERROR_LOG(VIDEO, "Program info log:\n%s", info_log.c_str()); - - std::string filename = - StringFromFormat("%sbad_p_%d.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++); - std::ofstream file; - File::OpenFStream(file, filename, std::ios_base::out); - file << s_glsl_header << vcode << s_glsl_header << pcode; - if (!gcode.empty()) - file << s_glsl_header << gcode; - file << info_log; - file.close(); - - if (linkStatus != GL_TRUE) - { - PanicAlert("Failed to link shaders: %s\n" - "Debug info (%s, %s, %s):\n%s", - filename.c_str(), g_ogl_config.gl_vendor, g_ogl_config.gl_renderer, - g_ogl_config.gl_version, info_log.c_str()); - } - } - if (linkStatus != GL_TRUE) - { - // Compile failed - ERROR_LOG(VIDEO, "Program linking failed; see info log"); - // Don't try to use this shader - glDeleteProgram(pid); + shader.Destroy(); return false; } + // For drivers that don't support binding layout, we need to bind it here. shader.SetProgramVariables(); + // Original shaders aren't needed any more. + shader.DestroyShaders(); return true; } @@ -352,63 +443,30 @@ bool ProgramShaderCache::CompileComputeShader(SHADER& shader, const std::string& header = "#extension GL_ARB_compute_shader : enable\n"; } - GLuint shader_id = CompileSingleShader(GL_COMPUTE_SHADER, header + code); + std::string full_code = header + code; + GLuint shader_id = CompileSingleShader(GL_COMPUTE_SHADER, full_code); if (!shader_id) return false; - GLuint pid = shader.glprogid = glCreateProgram(); - glAttachShader(pid, shader_id); - if (g_ogl_config.bSupportsGLSLCache) - glProgramParameteri(pid, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE); - + shader.glprogid = glCreateProgram(); + glAttachShader(shader.glprogid, shader_id); shader.SetProgramBindings(true); - - glLinkProgram(pid); + glLinkProgram(shader.glprogid); // original shaders aren't needed any more glDeleteShader(shader_id); - GLint linkStatus; - glGetProgramiv(pid, GL_LINK_STATUS, &linkStatus); - GLsizei length = 0; - glGetProgramiv(pid, GL_INFO_LOG_LENGTH, &length); - if (linkStatus != GL_TRUE || (length > 1 && DEBUG_GLSL)) + if (!CheckProgramLinkResult(shader.glprogid, full_code, "", "")) { - std::string info_log; - info_log.resize(length); - glGetProgramInfoLog(pid, length, &length, &info_log[0]); - ERROR_LOG(VIDEO, "Program info log:\n%s", info_log.c_str()); - - std::string filename = - StringFromFormat("%sbad_p_%d.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++); - std::ofstream file; - File::OpenFStream(file, filename, std::ios_base::out); - file << s_glsl_header << code; - file << info_log; - file.close(); - - if (linkStatus != GL_TRUE) - { - PanicAlert("Failed to link shaders: %s\n" - "Debug info (%s, %s, %s):\n%s", - filename.c_str(), g_ogl_config.gl_vendor, g_ogl_config.gl_renderer, - g_ogl_config.gl_version, info_log.c_str()); - } - } - if (linkStatus != GL_TRUE) - { - // Compile failed - ERROR_LOG(VIDEO, "Program linking failed; see info log"); - - // Don't try to use this shader - glDeleteProgram(pid); + shader.Destroy(); return false; } + shader.SetProgramVariables(); return true; } -GLuint ProgramShaderCache::CompileSingleShader(GLuint type, const std::string& code) +GLuint ProgramShaderCache::CompileSingleShader(GLenum type, const std::string& code) { GLuint result = glCreateShader(type); @@ -416,16 +474,29 @@ GLuint ProgramShaderCache::CompileSingleShader(GLuint type, const std::string& c glShaderSource(result, 2, src, nullptr); glCompileShader(result); + + if (!CheckShaderCompileResult(result, type, code)) + { + // Don't try to use this shader + glDeleteShader(result); + return 0; + } + + return result; +} + +bool ProgramShaderCache::CheckShaderCompileResult(GLuint id, GLenum type, const std::string& code) +{ GLint compileStatus; - glGetShaderiv(result, GL_COMPILE_STATUS, &compileStatus); + glGetShaderiv(id, GL_COMPILE_STATUS, &compileStatus); GLsizei length = 0; - glGetShaderiv(result, GL_INFO_LOG_LENGTH, &length); + glGetShaderiv(id, GL_INFO_LOG_LENGTH, &length); if (compileStatus != GL_TRUE || (length > 1 && DEBUG_GLSL)) { std::string info_log; info_log.resize(length); - glGetShaderInfoLog(result, length, &length, &info_log[0]); + glGetShaderInfoLog(id, length, &length, &info_log[0]); const char* prefix = ""; switch (type) @@ -465,20 +536,48 @@ GLuint ProgramShaderCache::CompileSingleShader(GLuint type, const std::string& c { // Compile failed ERROR_LOG(VIDEO, "Shader compilation failed; see info log"); - - // Don't try to use this shader - glDeleteShader(result); - return 0; + return false; } - return result; + return true; } -void ProgramShaderCache::GetShaderId(SHADERUID* uid, u32 primitive_type) +bool ProgramShaderCache::CheckProgramLinkResult(GLuint id, const std::string& vcode, + const std::string& pcode, const std::string& gcode) { - uid->puid = GetPixelShaderUid(); - uid->vuid = GetVertexShaderUid(); - uid->guid = GetGeometryShaderUid(primitive_type); + GLint linkStatus; + glGetProgramiv(id, GL_LINK_STATUS, &linkStatus); + GLsizei length = 0; + glGetProgramiv(id, GL_INFO_LOG_LENGTH, &length); + if (linkStatus != GL_TRUE || (length > 1 && DEBUG_GLSL)) + { + std::string info_log; + info_log.resize(length); + glGetProgramInfoLog(id, length, &length, &info_log[0]); + ERROR_LOG(VIDEO, "Program info log:\n%s", info_log.c_str()); + + std::string filename = + StringFromFormat("%sbad_p_%d.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++); + std::ofstream file; + File::OpenFStream(file, filename, std::ios_base::out); + file << s_glsl_header << vcode << s_glsl_header << pcode; + if (!gcode.empty()) + file << s_glsl_header << gcode; + file << info_log; + file.close(); + + if (linkStatus != GL_TRUE) + { + PanicAlert("Failed to link shaders: %s\n" + "Debug info (%s, %s, %s):\n%s", + filename.c_str(), g_ogl_config.gl_vendor, g_ogl_config.gl_renderer, + g_ogl_config.gl_version, info_log.c_str()); + + return false; + } + } + + return true; } ProgramShaderCache::PCacheEntry ProgramShaderCache::GetShaderProgram() @@ -503,6 +602,23 @@ void ProgramShaderCache::Init() // Then once more to get bytes s_buffer = StreamBuffer::Create(GL_UNIFORM_BUFFER, UBO_LENGTH); + // The GPU shader code appears to be context-specific on Mesa/i965. + // This means that if we compiled the ubershaders asynchronously, they will be recompiled + // on the main thread the first time they are used, causing stutter. Nouveau has been + // reported to crash if draw calls are invoked on the shared context threads. For now, + // disable asynchronous compilation on Mesa. + if (!DriverDetails::HasBug(DriverDetails::BUG_SHARED_CONTEXT_SHADER_COMPILATION) && + g_ActiveConfig.GetShaderCompilerThreads() > 0) + { + s_async_compiler = std::make_unique(); + s_async_compiler->StartWorkerThreads(g_ActiveConfig.GetShaderCompilerThreads()); + if (!s_async_compiler->HasWorkerThreads()) + { + // No point using the async compiler without workers. + s_async_compiler.reset(); + } + } + // Read our shader cache, only if supported and enabled if (g_ogl_config.bSupportsGLSLCache && g_ActiveConfig.bShaderCache) LoadProgramBinaries(); @@ -511,36 +627,126 @@ void ProgramShaderCache::Init() CurrentProgram = 0; last_entry = nullptr; + last_uber_entry = nullptr; + + if (g_ActiveConfig.CanPrecompileUberShaders()) + PrecompileUberShaders(); +} + +void ProgramShaderCache::RetrieveAsyncShaders() +{ + if (s_async_compiler) + s_async_compiler->RetrieveWorkItems(); } void ProgramShaderCache::Reload() { + if (s_async_compiler) + { + s_async_compiler->WaitUntilCompletion(); + s_async_compiler->RetrieveWorkItems(); + } + const bool use_cache = g_ogl_config.bSupportsGLSLCache && g_ActiveConfig.bShaderCache; if (use_cache) SaveProgramBinaries(); - g_program_disk_cache.Close(); + s_program_disk_cache.Close(); + s_uber_program_disk_cache.Close(); DestroyShaders(); if (use_cache) LoadProgramBinaries(); + if (g_ActiveConfig.CanPrecompileUberShaders()) + PrecompileUberShaders(); + + InvalidateVertexFormat(); CurrentProgram = 0; last_entry = nullptr; + last_uber_entry = nullptr; last_uid = {}; + last_uber_uid = {}; } void ProgramShaderCache::Shutdown() { + if (s_async_compiler) + { + s_async_compiler->WaitUntilCompletion(); + s_async_compiler->StopWorkerThreads(); + s_async_compiler->RetrieveWorkItems(); + s_async_compiler.reset(); + } + // store all shaders in cache on disk if (g_ogl_config.bSupportsGLSLCache && g_ActiveConfig.bShaderCache) SaveProgramBinaries(); - g_program_disk_cache.Close(); + s_program_disk_cache.Close(); + s_uber_program_disk_cache.Close(); + InvalidateVertexFormat(); DestroyShaders(); s_buffer.reset(); } +void ProgramShaderCache::BindVertexFormat(const GLVertexFormat* vertex_format) +{ + u32 new_VAO = vertex_format ? vertex_format->VAO : 0; + if (s_last_VAO == new_VAO) + return; + + glBindVertexArray(new_VAO); + s_last_VAO = new_VAO; +} + +void ProgramShaderCache::InvalidateVertexFormat() +{ + s_last_VAO = INVALID_VAO; +} + +void ProgramShaderCache::BindLastVertexFormat() +{ + if (s_last_VAO != INVALID_VAO) + glBindVertexArray(s_last_VAO); + else + glBindVertexArray(0); +} + +GLuint ProgramShaderCache::CreateProgramFromBinary(const u8* value, u32 value_size) +{ + const u8* binary = value + sizeof(GLenum); + GLint binary_size = value_size - sizeof(GLenum); + GLenum prog_format; + std::memcpy(&prog_format, value, sizeof(GLenum)); + + GLuint progid = glCreateProgram(); + glProgramBinary(progid, prog_format, binary, binary_size); + + GLint success; + glGetProgramiv(progid, GL_LINK_STATUS, &success); + if (!success) + { + glDeleteProgram(progid); + return 0; + } + + return progid; +} + +bool ProgramShaderCache::CreateCacheEntryFromBinary(PCacheEntry* entry, const u8* value, + u32 value_size) +{ + entry->in_cache = true; + entry->pending = false; + entry->shader.glprogid = CreateProgramFromBinary(value, value_size); + if (entry->shader.glprogid == 0) + return false; + + entry->shader.SetProgramVariables(); + return true; +} + void ProgramShaderCache::LoadProgramBinaries() { GLint Supported; @@ -553,49 +759,73 @@ void ProgramShaderCache::LoadProgramBinaries() } else { + // Load game-specific shaders. std::string cache_filename = GetDiskShaderCacheFileName(APIType::OpenGL, "ProgramBinaries", true, true); - ProgramShaderCacheInserter inserter; - g_program_disk_cache.OpenAndRead(cache_filename, inserter); + ProgramShaderCacheInserter inserter(pshaders); + s_program_disk_cache.OpenAndRead(cache_filename, inserter); + + // Load global ubershaders. + cache_filename = + GetDiskShaderCacheFileName(APIType::OpenGL, "UberProgramBinaries", false, true); + ProgramShaderCacheInserter uber_inserter(ubershaders); + s_uber_program_disk_cache.OpenAndRead(cache_filename, uber_inserter); } SETSTAT(stats.numPixelShadersAlive, pshaders.size()); } -void ProgramShaderCache::SaveProgramBinaries() +static bool GetProgramBinary(const ProgramShaderCache::PCacheEntry& entry, std::vector& data) { - for (auto& entry : pshaders) + // Clear any prior error code + glGetError(); + + GLint link_status = GL_FALSE, delete_status = GL_TRUE, binary_size = 0; + glGetProgramiv(entry.shader.glprogid, GL_LINK_STATUS, &link_status); + glGetProgramiv(entry.shader.glprogid, GL_DELETE_STATUS, &delete_status); + glGetProgramiv(entry.shader.glprogid, GL_PROGRAM_BINARY_LENGTH, &binary_size); + if (glGetError() != GL_NO_ERROR || link_status == GL_FALSE || delete_status == GL_TRUE || + binary_size == 0) { - // Clear any prior error code - glGetError(); - - if (entry.second.in_cache) - { - continue; - } - - GLint link_status = GL_FALSE, delete_status = GL_TRUE, binary_size = 0; - glGetProgramiv(entry.second.shader.glprogid, GL_LINK_STATUS, &link_status); - glGetProgramiv(entry.second.shader.glprogid, GL_DELETE_STATUS, &delete_status); - glGetProgramiv(entry.second.shader.glprogid, GL_PROGRAM_BINARY_LENGTH, &binary_size); - if (glGetError() != GL_NO_ERROR || link_status == GL_FALSE || delete_status == GL_TRUE || - !binary_size) - { - continue; - } - - std::vector data(binary_size + sizeof(GLenum)); - u8* binary = &data[sizeof(GLenum)]; - GLenum* prog_format = (GLenum*)&data[0]; - glGetProgramBinary(entry.second.shader.glprogid, binary_size, nullptr, prog_format, binary); - if (glGetError() != GL_NO_ERROR) - { - continue; - } - - g_program_disk_cache.Append(entry.first, &data[0], binary_size + sizeof(GLenum)); + return false; } - g_program_disk_cache.Sync(); + data.resize(binary_size + sizeof(GLenum)); + + GLsizei length = binary_size; + GLenum prog_format; + glGetProgramBinary(entry.shader.glprogid, binary_size, &length, &prog_format, + &data[sizeof(GLenum)]); + if (glGetError() != GL_NO_ERROR) + return false; + + std::memcpy(&data[0], &prog_format, sizeof(prog_format)); + return true; +} + +template +static void SaveProgramBinaryMap(CacheMapType& program_map, DiskCacheType& disk_cache) +{ + std::vector binary_data; + for (auto& entry : program_map) + { + if (entry.second.in_cache || entry.second.pending) + continue; + + // Entry is now in cache (even if it fails, we don't want to try to save it again). + entry.second.in_cache = true; + if (!GetProgramBinary(entry.second, binary_data)) + continue; + + disk_cache.Append(entry.first, &binary_data[0], static_cast(binary_data.size())); + } + + disk_cache.Sync(); +} + +void ProgramShaderCache::SaveProgramBinaries() +{ + SaveProgramBinaryMap(pshaders, s_program_disk_cache); + SaveProgramBinaryMap(ubershaders, s_uber_program_disk_cache); } void ProgramShaderCache::DestroyShaders() @@ -603,10 +833,12 @@ void ProgramShaderCache::DestroyShaders() glUseProgram(0); for (auto& entry : pshaders) - { entry.second.Destroy(); - } pshaders.clear(); + + for (auto& entry : ubershaders) + entry.second.Destroy(); + ubershaders.clear(); } void ProgramShaderCache::CreateHeader() @@ -757,30 +989,307 @@ void ProgramShaderCache::CreateHeader() v >= GLSLES_310 ? "precision highp image2DArray;" : ""); } -void ProgramShaderCache::ProgramShaderCacheInserter::Read(const SHADERUID& key, const u8* value, - u32 value_size) +void ProgramShaderCache::PrecompileUberShaders() { - const u8* binary = value + sizeof(GLenum); - GLenum* prog_format = (GLenum*)value; - GLint binary_size = value_size - sizeof(GLenum); + bool success = true; - PCacheEntry entry; - entry.in_cache = 1; - entry.shader.glprogid = glCreateProgram(); - glProgramBinary(entry.shader.glprogid, *prog_format, binary, binary_size); + UberShader::EnumerateVertexShaderUids([&](const UberShader::VertexShaderUid& vuid) { + UberShader::EnumeratePixelShaderUids([&](const UberShader::PixelShaderUid& puid) { + // UIDs must have compatible texgens, a mismatching combination will never be queried. + if (vuid.GetUidData()->num_texgens != puid.GetUidData()->num_texgens) + return; - GLint success; - glGetProgramiv(entry.shader.glprogid, GL_LINK_STATUS, &success); + EnumerateGeometryShaderUids([&](const GeometryShaderUid& guid) { + if (guid.GetUidData()->numTexGens != vuid.GetUidData()->num_texgens) + return; - if (success) + UBERSHADERUID uid; + std::memcpy(&uid.vuid, &vuid, sizeof(uid.vuid)); + std::memcpy(&uid.puid, &puid, sizeof(uid.puid)); + std::memcpy(&uid.guid, &guid, sizeof(uid.guid)); + + // The ubershader may already exist if shader caching is enabled. + if (!success || ubershaders.find(uid) != ubershaders.end()) + return; + + PCacheEntry& entry = ubershaders[uid]; + entry.in_cache = false; + entry.pending = false; + + // Multi-context path? + if (s_async_compiler) + { + entry.pending = true; + s_async_compiler->QueueWorkItem( + s_async_compiler->CreateWorkItem(uid)); + return; + } + + ShaderHostConfig host_config = ShaderHostConfig::GetCurrent(); + ShaderCode vcode = + UberShader::GenVertexShader(APIType::OpenGL, host_config, uid.vuid.GetUidData()); + ShaderCode pcode = + UberShader::GenPixelShader(APIType::OpenGL, host_config, uid.puid.GetUidData()); + ShaderCode gcode; + if (g_ActiveConfig.backend_info.bSupportsGeometryShaders && + !uid.guid.GetUidData()->IsPassthrough()) + { + GenerateGeometryShaderCode(APIType::OpenGL, host_config, uid.guid.GetUidData()); + } + + // Always background compile, even when it's not supported. + // This way hopefully the driver can still compile the shaders in parallel. + if (!CompileShader(entry.shader, vcode.GetBuffer(), pcode.GetBuffer(), gcode.GetBuffer())) + { + // Stop compiling shaders if any of them fail, no point continuing. + success = false; + return; + } + }); + }); + }); + + if (s_async_compiler) { - pshaders[key] = entry; - entry.shader.SetProgramVariables(); + s_async_compiler->WaitUntilCompletion([](size_t completed, size_t total) { + Host_UpdateProgressDialog(GetStringT("Compiling shaders...").c_str(), + static_cast(completed), static_cast(total)); + }); + s_async_compiler->RetrieveWorkItems(); + Host_UpdateProgressDialog("", -1, -1); } - else + + if (!success) { - glDeleteProgram(entry.shader.glprogid); + PanicAlert("One or more ubershaders failed to compile. Disabling ubershaders."); + for (auto& it : ubershaders) + it.second.Destroy(); + ubershaders.clear(); } } +bool ProgramShaderCache::SharedContextAsyncShaderCompiler::WorkerThreadInitMainThread(void** param) +{ + SharedContextData* ctx_data = new SharedContextData(); + ctx_data->context = GLInterface->CreateSharedContext(); + if (!ctx_data->context) + { + PanicAlert("Failed to create shared context for shader compiling."); + delete ctx_data; + return false; + } + + *param = ctx_data; + return true; +} + +bool ProgramShaderCache::SharedContextAsyncShaderCompiler::WorkerThreadInitWorkerThread(void* param) +{ + SharedContextData* ctx_data = reinterpret_cast(param); + if (!ctx_data->context->MakeCurrent()) + { + PanicAlert("Failed to make shared context current."); + ctx_data->context->Shutdown(); + delete ctx_data; + return false; + } + + CreatePrerenderArrays(ctx_data); + return true; +} + +void ProgramShaderCache::SharedContextAsyncShaderCompiler::WorkerThreadExit(void* param) +{ + SharedContextData* ctx_data = reinterpret_cast(param); + DestroyPrerenderArrays(ctx_data); + ctx_data->context->Shutdown(); + delete ctx_data; +} + +ProgramShaderCache::ShaderCompileWorkItem::ShaderCompileWorkItem(const SHADERUID& uid) +{ + std::memcpy(&m_uid, &uid, sizeof(m_uid)); +} + +bool ProgramShaderCache::ShaderCompileWorkItem::Compile() +{ + ShaderHostConfig host_config = ShaderHostConfig::GetCurrent(); + ShaderCode vcode = + GenerateVertexShaderCode(APIType::OpenGL, host_config, m_uid.vuid.GetUidData()); + ShaderCode pcode = GeneratePixelShaderCode(APIType::OpenGL, host_config, m_uid.puid.GetUidData()); + ShaderCode gcode; + if (g_ActiveConfig.backend_info.bSupportsGeometryShaders && + !m_uid.guid.GetUidData()->IsPassthrough()) + gcode = GenerateGeometryShaderCode(APIType::OpenGL, host_config, m_uid.guid.GetUidData()); + + CompileShader(m_program, vcode.GetBuffer(), pcode.GetBuffer(), gcode.GetBuffer()); + DrawPrerenderArray(m_program, m_uid.guid.GetUidData()->primitive_type); + return true; +} + +void ProgramShaderCache::ShaderCompileWorkItem::Retrieve() +{ + auto iter = pshaders.find(m_uid); + if (iter != pshaders.end() && !iter->second.pending) + { + // Main thread already compiled this shader. + m_program.Destroy(); + return; + } + + PCacheEntry& entry = pshaders[m_uid]; + entry.shader = m_program; + entry.in_cache = false; + entry.pending = false; +} + +ProgramShaderCache::UberShaderCompileWorkItem::UberShaderCompileWorkItem(const UBERSHADERUID& uid) +{ + std::memcpy(&m_uid, &uid, sizeof(m_uid)); +} + +bool ProgramShaderCache::UberShaderCompileWorkItem::Compile() +{ + ShaderHostConfig host_config = ShaderHostConfig::GetCurrent(); + ShaderCode vcode = + UberShader::GenVertexShader(APIType::OpenGL, host_config, m_uid.vuid.GetUidData()); + ShaderCode pcode = + UberShader::GenPixelShader(APIType::OpenGL, host_config, m_uid.puid.GetUidData()); + ShaderCode gcode; + if (g_ActiveConfig.backend_info.bSupportsGeometryShaders && + !m_uid.guid.GetUidData()->IsPassthrough()) + gcode = GenerateGeometryShaderCode(APIType::OpenGL, host_config, m_uid.guid.GetUidData()); + + CompileShader(m_program, vcode.GetBuffer(), pcode.GetBuffer(), gcode.GetBuffer()); + DrawPrerenderArray(m_program, m_uid.guid.GetUidData()->primitive_type); + return true; +} + +void ProgramShaderCache::UberShaderCompileWorkItem::Retrieve() +{ + auto iter = ubershaders.find(m_uid); + if (iter != ubershaders.end() && !iter->second.pending) + { + // Main thread already compiled this shader. + m_program.Destroy(); + return; + } + + PCacheEntry& entry = ubershaders[m_uid]; + entry.shader = m_program; + entry.in_cache = false; + entry.pending = false; +} + +void ProgramShaderCache::CreatePrerenderArrays(SharedContextData* data) +{ + // Create VAO for the prerender vertices. + // We don't use the normal VAO map, since we need to change the VBO pointer. + glGenVertexArrays(1, &data->prerender_VAO); + glBindVertexArray(data->prerender_VAO); + + // Create and populate the prerender VBO. We need enough space to draw 3 triangles. + static constexpr float vbo_data[] = {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}; + constexpr u32 vbo_stride = sizeof(float) * 3; + glGenBuffers(1, &data->prerender_VBO); + glBindBuffer(GL_ARRAY_BUFFER, data->prerender_VBO); + glBufferData(GL_ARRAY_BUFFER, sizeof(vbo_data), vbo_data, GL_STATIC_DRAW); + + // We only need a position in our prerender vertex. + glEnableVertexAttribArray(SHADER_POSITION_ATTRIB); + glVertexAttribPointer(SHADER_POSITION_ATTRIB, 3, GL_FLOAT, GL_FALSE, vbo_stride, nullptr); + + // The other attributes have to be active to avoid variant generation. + glEnableVertexAttribArray(SHADER_POSMTX_ATTRIB); + glVertexAttribIPointer(SHADER_POSMTX_ATTRIB, 1, GL_UNSIGNED_BYTE, vbo_stride, nullptr); + for (u32 i = 0; i < 3; i++) + { + glEnableVertexAttribArray(SHADER_NORM0_ATTRIB + i); + glVertexAttribPointer(SHADER_NORM0_ATTRIB + i, 3, GL_FLOAT, GL_FALSE, vbo_stride, nullptr); + } + for (u32 i = 0; i < 2; i++) + { + glEnableVertexAttribArray(SHADER_COLOR0_ATTRIB + i); + glVertexAttribPointer(SHADER_COLOR0_ATTRIB + i, 4, GL_UNSIGNED_BYTE, GL_TRUE, vbo_stride, + nullptr); + } + for (u32 i = 0; i < 8; i++) + { + glEnableVertexAttribArray(SHADER_TEXTURE0_ATTRIB + i); + glVertexAttribPointer(SHADER_TEXTURE0_ATTRIB + i, 3, GL_FLOAT, GL_FALSE, vbo_stride, nullptr); + } + + // We need an index buffer to set up the same drawing state on Mesa. + static constexpr u16 ibo_data[] = {0, 1, 2}; + glGenBuffers(1, &data->prerender_IBO); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, data->prerender_IBO); + glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(ibo_data), ibo_data, GL_STATIC_DRAW); + + // Mesa also requires the primitive restart state matches? + if (g_ActiveConfig.backend_info.bSupportsPrimitiveRestart) + { + if (GLInterface->GetMode() == GLInterfaceMode::MODE_OPENGLES3) + { + glEnable(GL_PRIMITIVE_RESTART_FIXED_INDEX); + } + else + { + if (GLExtensions::Version() >= 310) + { + glEnable(GL_PRIMITIVE_RESTART); + glPrimitiveRestartIndex(65535); + } + else + { + glEnableClientState(GL_PRIMITIVE_RESTART_NV); + glPrimitiveRestartIndexNV(65535); + } + } + } +} + +void ProgramShaderCache::DestroyPrerenderArrays(SharedContextData* data) +{ + if (data->prerender_VAO) + { + glDeleteVertexArrays(1, &data->prerender_VAO); + data->prerender_VAO = 0; + } + if (data->prerender_VBO) + { + glDeleteBuffers(1, &data->prerender_VBO); + data->prerender_VBO = 0; + } + if (data->prerender_IBO) + { + glDeleteBuffers(1, &data->prerender_IBO); + data->prerender_IBO = 0; + } +} + +void ProgramShaderCache::DrawPrerenderArray(const SHADER& shader, u32 primitive_type) +{ + // This is called on a worker thread, so we don't want to use the normal binding process. + glUseProgram(shader.glprogid); + + // The number of primitives drawn depends on the type. + switch (primitive_type) + { + case PRIMITIVE_POINTS: + glDrawElements(GL_POINTS, 1, GL_UNSIGNED_SHORT, nullptr); + break; + case PRIMITIVE_LINES: + glDrawElements(GL_LINES, 2, GL_UNSIGNED_SHORT, nullptr); + break; + case PRIMITIVE_TRIANGLES: + glDrawElements(GL_TRIANGLES, 3, GL_UNSIGNED_SHORT, nullptr); + break; + } + + // Has to be finished by the time the main thread picks it up. + GLsync sync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + glClientWaitSync(sync, GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); + glDeleteSync(sync); +} + } // namespace OGL diff --git a/Source/Core/VideoBackends/OGL/ProgramShaderCache.h b/Source/Core/VideoBackends/OGL/ProgramShaderCache.h index 7b29c24314..61f2f68fdc 100644 --- a/Source/Core/VideoBackends/OGL/ProgramShaderCache.h +++ b/Source/Core/VideoBackends/OGL/ProgramShaderCache.h @@ -4,17 +4,25 @@ #pragma once +#include #include #include "Common/GL/GLUtil.h" #include "Common/LinearDiskCache.h" +#include "VideoCommon/AsyncShaderCompiler.h" #include "VideoCommon/GeometryShaderGen.h" #include "VideoCommon/PixelShaderGen.h" +#include "VideoCommon/UberShaderPixel.h" +#include "VideoCommon/UberShaderVertex.h" #include "VideoCommon/VertexShaderGen.h" +class cInterfaceBase; + namespace OGL { +class GLVertexFormat; + class SHADERUID { public: @@ -24,30 +32,53 @@ public: bool operator<(const SHADERUID& r) const { - return std::tie(puid, vuid, guid) < std::tie(r.puid, r.vuid, r.guid); + return std::tie(vuid, puid, guid) < std::tie(r.vuid, r.puid, r.guid); } bool operator==(const SHADERUID& r) const { - return std::tie(puid, vuid, guid) == std::tie(r.puid, r.vuid, r.guid); + return std::tie(vuid, puid, guid) == std::tie(r.vuid, r.puid, r.guid); + } +}; +class UBERSHADERUID +{ +public: + UberShader::VertexShaderUid vuid; + UberShader::PixelShaderUid puid; + GeometryShaderUid guid; + + bool operator<(const UBERSHADERUID& r) const + { + return std::tie(vuid, puid, guid) < std::tie(r.vuid, r.puid, r.guid); + } + + bool operator==(const UBERSHADERUID& r) const + { + return std::tie(vuid, puid, guid) == std::tie(r.vuid, r.puid, r.guid); } }; struct SHADER { - SHADER() : glprogid(0) {} void Destroy() { - glDeleteProgram(glprogid); - glprogid = 0; + DestroyShaders(); + if (glprogid) + { + glDeleteProgram(glprogid); + glprogid = 0; + } } - GLuint glprogid; // OpenGL program id - std::string strvprog, strpprog, strgprog; + GLuint vsid = 0; + GLuint gsid = 0; + GLuint psid = 0; + GLuint glprogid = 0; void SetProgramVariables(); void SetProgramBindings(bool is_compute); void Bind() const; + void DestroyShaders(); }; class ProgramShaderCache @@ -57,43 +88,126 @@ public: { SHADER shader; bool in_cache; + bool pending; void Destroy() { shader.Destroy(); } }; static PCacheEntry GetShaderProgram(); - static SHADER* SetShader(u32 primitive_type); - static void GetShaderId(SHADERUID* uid, u32 primitive_type); + static SHADER* SetShader(u32 primitive_type, const GLVertexFormat* vertex_format); + static SHADER* SetUberShader(u32 primitive_type, const GLVertexFormat* vertex_format); + static void BindVertexFormat(const GLVertexFormat* vertex_format); + static void InvalidateVertexFormat(); + static void BindLastVertexFormat(); static bool CompileShader(SHADER& shader, const std::string& vcode, const std::string& pcode, const std::string& gcode = ""); static bool CompileComputeShader(SHADER& shader, const std::string& code); - static GLuint CompileSingleShader(GLuint type, const std::string& code); + static GLuint CompileSingleShader(GLenum type, const std::string& code); + static bool CheckShaderCompileResult(GLuint id, GLenum type, const std::string& code); + static bool CheckProgramLinkResult(GLuint id, const std::string& vcode, const std::string& pcode, + const std::string& gcode); static void UploadConstants(); static void Init(); static void Reload(); static void Shutdown(); static void CreateHeader(); + static void RetrieveAsyncShaders(); + static void PrecompileUberShaders(); private: - class ProgramShaderCacheInserter : public LinearDiskCacheReader + template + class ProgramShaderCacheInserter : public LinearDiskCacheReader { public: - void Read(const SHADERUID& key, const u8* value, u32 value_size) override; + ProgramShaderCacheInserter(std::map& shader_map) + : m_shader_map(shader_map) + { + } + + void Read(const UIDType& key, const u8* value, u32 value_size) override + { + if (m_shader_map.find(key) != m_shader_map.end()) + return; + + PCacheEntry& entry = m_shader_map[key]; + if (!CreateCacheEntryFromBinary(&entry, value, value_size)) + { + m_shader_map.erase(key); + return; + } + } + + private: + std::map& m_shader_map; }; + class SharedContextAsyncShaderCompiler : public VideoCommon::AsyncShaderCompiler + { + protected: + virtual bool WorkerThreadInitMainThread(void** param) override; + virtual bool WorkerThreadInitWorkerThread(void* param) override; + virtual void WorkerThreadExit(void* param) override; + }; + + struct SharedContextData + { + std::unique_ptr context; + GLuint prerender_VBO; + GLuint prerender_VAO; + GLuint prerender_IBO; + }; + + class ShaderCompileWorkItem : public VideoCommon::AsyncShaderCompiler::WorkItem + { + public: + ShaderCompileWorkItem(const SHADERUID& uid); + + bool Compile() override; + void Retrieve() override; + + private: + SHADERUID m_uid; + SHADER m_program; + }; + + class UberShaderCompileWorkItem : public VideoCommon::AsyncShaderCompiler::WorkItem + { + public: + UberShaderCompileWorkItem(const UBERSHADERUID& uid); + + bool Compile() override; + void Retrieve() override; + + private: + UBERSHADERUID m_uid; + SHADER m_program; + }; + + typedef std::map PCache; + typedef std::map UberPCache; + + static GLuint CreateProgramFromBinary(const u8* value, u32 value_size); + static bool CreateCacheEntryFromBinary(PCacheEntry* entry, const u8* value, u32 value_size); static void LoadProgramBinaries(); static void SaveProgramBinaries(); static void DestroyShaders(); + static void CreatePrerenderArrays(SharedContextData* data); + static void DestroyPrerenderArrays(SharedContextData* data); + static void DrawPrerenderArray(const SHADER& shader, u32 primitive_type); - typedef std::map PCache; static PCache pshaders; + static UberPCache ubershaders; static PCacheEntry* last_entry; + static PCacheEntry* last_uber_entry; static SHADERUID last_uid; + static UBERSHADERUID last_uber_uid; + static std::unique_ptr s_async_compiler; static u32 s_ubo_buffer_size; static s32 s_ubo_align; + static u32 s_last_VAO; }; } // namespace OGL diff --git a/Source/Core/VideoBackends/OGL/Render.cpp b/Source/Core/VideoBackends/OGL/Render.cpp index 66edbf5d74..b8a14869d0 100644 --- a/Source/Core/VideoBackends/OGL/Render.cpp +++ b/Source/Core/VideoBackends/OGL/Render.cpp @@ -1472,6 +1472,7 @@ void Renderer::SwapImpl(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, // Clean out old stuff from caches. It's not worth it to clean out the shader caches. g_texture_cache->Cleanup(frameCount); + ProgramShaderCache::RetrieveAsyncShaders(); // Render to the framebuffer. FramebufferManager::SetFramebuffer(0); @@ -1768,10 +1769,9 @@ void Renderer::RestoreAPIState() SetBlendMode(true); SetViewport(); + ProgramShaderCache::BindLastVertexFormat(); const VertexManager* const vm = static_cast(g_vertex_manager.get()); glBindBuffer(GL_ARRAY_BUFFER, vm->m_vertex_buffers); - if (vm->m_last_vao) - glBindVertexArray(vm->m_last_vao); OGLTexture::SetStage(); } diff --git a/Source/Core/VideoBackends/OGL/Render.h b/Source/Core/VideoBackends/OGL/Render.h index 8ec6a21e0d..e8df3d4301 100644 --- a/Source/Core/VideoBackends/OGL/Render.h +++ b/Source/Core/VideoBackends/OGL/Render.h @@ -58,6 +58,7 @@ struct VideoConfig bool bSupportsConservativeDepth; bool bSupportsImageLoadStore; bool bSupportsAniso; + bool bSupportsBitfield; const char* gl_vendor; const char* gl_renderer; diff --git a/Source/Core/VideoBackends/OGL/VertexManager.cpp b/Source/Core/VideoBackends/OGL/VertexManager.cpp index eb7ce04c92..9f8d87eceb 100644 --- a/Source/Core/VideoBackends/OGL/VertexManager.cpp +++ b/Source/Core/VideoBackends/OGL/VertexManager.cpp @@ -53,8 +53,6 @@ void VertexManager::CreateDeviceObjects() s_indexBuffer = StreamBuffer::Create(GL_ELEMENT_ARRAY_BUFFER, MAX_IBUFFER_SIZE); m_index_buffers = s_indexBuffer->m_buffer; - - m_last_vao = 0; } void VertexManager::DestroyDeviceObjects() @@ -142,22 +140,13 @@ void VertexManager::vFlush() GLVertexFormat* nativeVertexFmt = (GLVertexFormat*)VertexLoaderManager::GetCurrentVertexFormat(); u32 stride = nativeVertexFmt->GetVertexStride(); - if (m_last_vao != nativeVertexFmt->VAO) - { - glBindVertexArray(nativeVertexFmt->VAO); - m_last_vao = nativeVertexFmt->VAO; - } + ProgramShaderCache::SetShader(m_current_primitive_type, nativeVertexFmt); PrepareDrawBuffers(stride); - ProgramShaderCache::SetShader(m_current_primitive_type); - // upload global constants ProgramShaderCache::UploadConstants(); - // setup the pointers - nativeVertexFmt->SetupVertexPointers(); - if (::BoundingBox::active && !g_Config.BBoxUseFragmentShaderImplementation()) { glEnable(GL_STENCIL_TEST); @@ -171,24 +160,6 @@ void VertexManager::vFlush() glDisable(GL_STENCIL_TEST); } -#if defined(_DEBUG) || defined(DEBUGFAST) - if (g_ActiveConfig.iLog & CONF_SAVESHADERS) - { - // save the shaders - ProgramShaderCache::PCacheEntry prog = ProgramShaderCache::GetShaderProgram(); - std::string filename = StringFromFormat( - "%sps%.3d.txt", File::GetUserPath(D_DUMPFRAMES_IDX).c_str(), g_ActiveConfig.iSaveTargetId); - std::ofstream fps; - File::OpenFStream(fps, filename, std::ios_base::out); - fps << prog.shader.strpprog; - - filename = StringFromFormat("%svs%.3d.txt", File::GetUserPath(D_DUMPFRAMES_IDX).c_str(), - g_ActiveConfig.iSaveTargetId); - std::ofstream fvs; - File::OpenFStream(fvs, filename, std::ios_base::out); - fvs << prog.shader.strvprog; - } -#endif g_Config.iSaveTargetId++; ClearEFBCache(); } diff --git a/Source/Core/VideoBackends/OGL/VertexManager.h b/Source/Core/VideoBackends/OGL/VertexManager.h index 7ba18b6ecc..25a1ba7c3b 100644 --- a/Source/Core/VideoBackends/OGL/VertexManager.h +++ b/Source/Core/VideoBackends/OGL/VertexManager.h @@ -42,7 +42,6 @@ public: // NativeVertexFormat use this GLuint m_vertex_buffers; GLuint m_index_buffers; - GLuint m_last_vao; protected: void ResetBuffer(u32 stride) override; diff --git a/Source/Core/VideoCommon/DriverDetails.cpp b/Source/Core/VideoCommon/DriverDetails.cpp index e81bdfb2af..fb12fc04b8 100644 --- a/Source/Core/VideoCommon/DriverDetails.cpp +++ b/Source/Core/VideoCommon/DriverDetails.cpp @@ -98,7 +98,10 @@ static BugInfo m_known_bugs[] = { BUG_BROKEN_BITWISE_OP_NEGATION, -1.0, -1.0, true}, {API_VULKAN, OS_ALL, VENDOR_ATI, DRIVER_ATI, Family::UNKNOWN, BUG_PRIMITIVE_RESTART, -1.0, -1.0, true}, -}; + {API_OPENGL, OS_LINUX, VENDOR_MESA, DRIVER_I965, Family::UNKNOWN, + BUG_SHARED_CONTEXT_SHADER_COMPILATION, -1.0, -1.0, true}, + {API_OPENGL, OS_LINUX, VENDOR_MESA, DRIVER_NOUVEAU, Family::UNKNOWN, + BUG_SHARED_CONTEXT_SHADER_COMPILATION, -1.0, -1.0, true}}; static std::map m_bugs; diff --git a/Source/Core/VideoCommon/DriverDetails.h b/Source/Core/VideoCommon/DriverDetails.h index 009ef1fdd5..d256b51918 100644 --- a/Source/Core/VideoCommon/DriverDetails.h +++ b/Source/Core/VideoCommon/DriverDetails.h @@ -247,6 +247,12 @@ enum Bug // fail compilation with no useful diagnostic log. This can be worked around by storing // the negated value to a temporary variable then using that in the bitwise op. BUG_BROKEN_BITWISE_OP_NEGATION, + + // Bug: Shaders are recompiled on the main thread after being previously compiled on + // a worker thread on Mesa i965. + // Started version: -1 + // Ended Version: -1 + BUG_SHARED_CONTEXT_SHADER_COMPILATION, }; // Initializes our internal vendor, device family, and driver version