mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-01-18 03:59:14 +01:00
86f8768268
These are only ever used with ShaderCode instances and nothing else. Given that, we can convert these helper functions to expect that type of object as an argument and remove the need for templates, improving compiler throughput a marginal amount, as the template instantiation process doesn't need to be performed. We can also move the definitions of these functions into the cpp file, which allows us to remove a few inclusions from the ShaderGenCommon header. This uncovered a few instances of indirect inclusions being relied upon in other source files. One other benefit is this allows changes to be made to the definitions of the functions without needing to recompile all translation units that make use of these functions, making change testing a little quicker. Moving the definitions into the cpp file also allows us to completely hide DefineOutputMember() from external view, given it's only ever used inside of GenerateVSOutputMembers().
890 lines
29 KiB
C++
890 lines
29 KiB
C++
// Copyright 2011 Dolphin Emulator Project
|
|
// Licensed under GPLv2+
|
|
// Refer to the license.txt file included.
|
|
|
|
#include "VideoBackends/OGL/ProgramShaderCache.h"
|
|
|
|
#include <array>
|
|
#include <atomic>
|
|
#include <memory>
|
|
#include <string>
|
|
|
|
#include "Common/Align.h"
|
|
#include "Common/Assert.h"
|
|
#include "Common/CommonTypes.h"
|
|
#include "Common/FileUtil.h"
|
|
#include "Common/GL/GLContext.h"
|
|
#include "Common/Logging/Log.h"
|
|
#include "Common/MsgHandler.h"
|
|
#include "Common/StringUtil.h"
|
|
#include "Common/Version.h"
|
|
|
|
#include "Core/ConfigManager.h"
|
|
|
|
#include "VideoBackends/OGL/OGLShader.h"
|
|
#include "VideoBackends/OGL/Render.h"
|
|
#include "VideoBackends/OGL/StreamBuffer.h"
|
|
#include "VideoBackends/OGL/VertexManager.h"
|
|
|
|
#include "VideoCommon/AsyncShaderCompiler.h"
|
|
#include "VideoCommon/GeometryShaderManager.h"
|
|
#include "VideoCommon/PixelShaderManager.h"
|
|
#include "VideoCommon/Statistics.h"
|
|
#include "VideoCommon/VertexLoaderManager.h"
|
|
#include "VideoCommon/VertexShaderManager.h"
|
|
#include "VideoCommon/VideoBackendBase.h"
|
|
#include "VideoCommon/VideoConfig.h"
|
|
|
|
namespace OGL
|
|
{
|
|
u32 ProgramShaderCache::s_ubo_buffer_size;
|
|
s32 ProgramShaderCache::s_ubo_align;
|
|
GLuint ProgramShaderCache::s_attributeless_VBO = 0;
|
|
GLuint ProgramShaderCache::s_attributeless_VAO = 0;
|
|
GLuint ProgramShaderCache::s_last_VAO = 0;
|
|
|
|
static std::unique_ptr<StreamBuffer> s_buffer;
|
|
static int num_failures = 0;
|
|
|
|
static GLuint CurrentProgram = 0;
|
|
ProgramShaderCache::PipelineProgramMap ProgramShaderCache::s_pipeline_programs;
|
|
std::mutex ProgramShaderCache::s_pipeline_program_lock;
|
|
static std::string s_glsl_header;
|
|
static std::atomic<u64> s_shader_counter{0};
|
|
static thread_local bool s_is_shared_context = false;
|
|
|
|
static std::string GetGLSLVersionString()
|
|
{
|
|
GlslVersion v = g_ogl_config.eSupportedGLSLVersion;
|
|
switch (v)
|
|
{
|
|
case GlslEs300:
|
|
return "#version 300 es";
|
|
case GlslEs310:
|
|
return "#version 310 es";
|
|
case GlslEs320:
|
|
return "#version 320 es";
|
|
case Glsl130:
|
|
return "#version 130";
|
|
case Glsl140:
|
|
return "#version 140";
|
|
case Glsl150:
|
|
return "#version 150";
|
|
case Glsl330:
|
|
return "#version 330";
|
|
case Glsl400:
|
|
return "#version 400";
|
|
case Glsl430:
|
|
return "#version 430";
|
|
default:
|
|
// Shouldn't ever hit this
|
|
return "#version ERROR";
|
|
}
|
|
}
|
|
|
|
void SHADER::SetProgramVariables()
|
|
{
|
|
if (g_ActiveConfig.backend_info.bSupportsBindingLayout)
|
|
return;
|
|
|
|
// To set uniform blocks/uniforms, the program must be active. We restore the
|
|
// current binding at the end of this method to maintain the invariant.
|
|
glUseProgram(glprogid);
|
|
|
|
// Bind UBO and texture samplers
|
|
GLint PSBlock_id = glGetUniformBlockIndex(glprogid, "PSBlock");
|
|
GLint VSBlock_id = glGetUniformBlockIndex(glprogid, "VSBlock");
|
|
GLint GSBlock_id = glGetUniformBlockIndex(glprogid, "GSBlock");
|
|
GLint UBERBlock_id = glGetUniformBlockIndex(glprogid, "UBERBlock");
|
|
if (PSBlock_id != -1)
|
|
glUniformBlockBinding(glprogid, PSBlock_id, 1);
|
|
if (VSBlock_id != -1)
|
|
glUniformBlockBinding(glprogid, VSBlock_id, 2);
|
|
if (GSBlock_id != -1)
|
|
glUniformBlockBinding(glprogid, GSBlock_id, 3);
|
|
if (UBERBlock_id != -1)
|
|
glUniformBlockBinding(glprogid, UBERBlock_id, 4);
|
|
|
|
// Bind Texture Samplers
|
|
for (int a = 0; a < 8; ++a)
|
|
{
|
|
// Still need to get sampler locations since we aren't binding them statically in the shaders
|
|
int loc = glGetUniformLocation(glprogid, StringFromFormat("samp[%d]", a).c_str());
|
|
if (loc < 0)
|
|
loc = glGetUniformLocation(glprogid, StringFromFormat("samp%d", a).c_str());
|
|
if (loc >= 0)
|
|
glUniform1i(loc, a);
|
|
}
|
|
|
|
// Restore previous program binding.
|
|
glUseProgram(CurrentProgram);
|
|
}
|
|
|
|
void SHADER::SetProgramBindings(bool is_compute)
|
|
{
|
|
if (!is_compute)
|
|
{
|
|
if (g_ActiveConfig.backend_info.bSupportsDualSourceBlend)
|
|
{
|
|
// So we do support extended blending
|
|
// So we need to set a few more things here.
|
|
// Bind our out locations
|
|
glBindFragDataLocationIndexed(glprogid, 0, 0, "ocol0");
|
|
glBindFragDataLocationIndexed(glprogid, 0, 1, "ocol1");
|
|
}
|
|
// Need to set some attribute locations
|
|
glBindAttribLocation(glprogid, SHADER_POSITION_ATTRIB, "rawpos");
|
|
|
|
glBindAttribLocation(glprogid, SHADER_POSMTX_ATTRIB, "posmtx");
|
|
|
|
glBindAttribLocation(glprogid, SHADER_COLOR0_ATTRIB, "rawcolor0");
|
|
glBindAttribLocation(glprogid, SHADER_COLOR1_ATTRIB, "rawcolor1");
|
|
|
|
glBindAttribLocation(glprogid, SHADER_NORM0_ATTRIB, "rawnorm0");
|
|
glBindAttribLocation(glprogid, SHADER_NORM1_ATTRIB, "rawnorm1");
|
|
glBindAttribLocation(glprogid, SHADER_NORM2_ATTRIB, "rawnorm2");
|
|
}
|
|
|
|
for (int i = 0; i < 8; i++)
|
|
{
|
|
std::string attrib_name = StringFromFormat("rawtex%d", i);
|
|
glBindAttribLocation(glprogid, SHADER_TEXTURE0_ATTRIB + i, attrib_name.c_str());
|
|
}
|
|
}
|
|
|
|
void SHADER::Bind() const
|
|
{
|
|
if (CurrentProgram != glprogid)
|
|
{
|
|
INCSTAT(g_stats.this_frame.num_shader_changes);
|
|
glUseProgram(glprogid);
|
|
CurrentProgram = glprogid;
|
|
}
|
|
}
|
|
|
|
void SHADER::DestroyShaders()
|
|
{
|
|
if (vsid)
|
|
{
|
|
glDeleteShader(vsid);
|
|
vsid = 0;
|
|
}
|
|
if (gsid)
|
|
{
|
|
glDeleteShader(gsid);
|
|
gsid = 0;
|
|
}
|
|
if (psid)
|
|
{
|
|
glDeleteShader(psid);
|
|
psid = 0;
|
|
}
|
|
}
|
|
|
|
bool PipelineProgramKey::operator!=(const PipelineProgramKey& rhs) const
|
|
{
|
|
return !operator==(rhs);
|
|
}
|
|
|
|
bool PipelineProgramKey::operator==(const PipelineProgramKey& rhs) const
|
|
{
|
|
return std::tie(vertex_shader_id, geometry_shader_id, pixel_shader_id) ==
|
|
std::tie(rhs.vertex_shader_id, rhs.geometry_shader_id, rhs.pixel_shader_id);
|
|
}
|
|
|
|
bool PipelineProgramKey::operator<(const PipelineProgramKey& rhs) const
|
|
{
|
|
return std::tie(vertex_shader_id, geometry_shader_id, pixel_shader_id) <
|
|
std::tie(rhs.vertex_shader_id, rhs.geometry_shader_id, rhs.pixel_shader_id);
|
|
}
|
|
|
|
std::size_t PipelineProgramKeyHash::operator()(const PipelineProgramKey& key) const
|
|
{
|
|
// We would really want std::hash_combine for this..
|
|
std::hash<u64> hasher;
|
|
return hasher(key.vertex_shader_id) + hasher(key.geometry_shader_id) +
|
|
hasher(key.pixel_shader_id);
|
|
}
|
|
|
|
StreamBuffer* ProgramShaderCache::GetUniformBuffer()
|
|
{
|
|
return s_buffer.get();
|
|
}
|
|
|
|
u32 ProgramShaderCache::GetUniformBufferAlignment()
|
|
{
|
|
return s_ubo_align;
|
|
}
|
|
|
|
void ProgramShaderCache::UploadConstants()
|
|
{
|
|
if (PixelShaderManager::dirty || VertexShaderManager::dirty || GeometryShaderManager::dirty)
|
|
{
|
|
auto buffer = s_buffer->Map(s_ubo_buffer_size, s_ubo_align);
|
|
|
|
memcpy(buffer.first, &PixelShaderManager::constants, sizeof(PixelShaderConstants));
|
|
|
|
memcpy(buffer.first + Common::AlignUp(sizeof(PixelShaderConstants), s_ubo_align),
|
|
&VertexShaderManager::constants, sizeof(VertexShaderConstants));
|
|
|
|
memcpy(buffer.first + Common::AlignUp(sizeof(PixelShaderConstants), s_ubo_align) +
|
|
Common::AlignUp(sizeof(VertexShaderConstants), s_ubo_align),
|
|
&GeometryShaderManager::constants, sizeof(GeometryShaderConstants));
|
|
|
|
s_buffer->Unmap(s_ubo_buffer_size);
|
|
glBindBufferRange(GL_UNIFORM_BUFFER, 1, s_buffer->m_buffer, buffer.second,
|
|
sizeof(PixelShaderConstants));
|
|
glBindBufferRange(GL_UNIFORM_BUFFER, 2, s_buffer->m_buffer,
|
|
buffer.second + Common::AlignUp(sizeof(PixelShaderConstants), s_ubo_align),
|
|
sizeof(VertexShaderConstants));
|
|
glBindBufferRange(GL_UNIFORM_BUFFER, 3, s_buffer->m_buffer,
|
|
buffer.second + Common::AlignUp(sizeof(PixelShaderConstants), s_ubo_align) +
|
|
Common::AlignUp(sizeof(VertexShaderConstants), s_ubo_align),
|
|
sizeof(GeometryShaderConstants));
|
|
|
|
PixelShaderManager::dirty = false;
|
|
VertexShaderManager::dirty = false;
|
|
GeometryShaderManager::dirty = false;
|
|
|
|
ADDSTAT(g_stats.this_frame.bytes_uniform_streamed, s_ubo_buffer_size);
|
|
}
|
|
}
|
|
|
|
void ProgramShaderCache::UploadConstants(const void* data, u32 data_size)
|
|
{
|
|
// allocate and copy
|
|
const u32 alloc_size = Common::AlignUp(data_size, s_ubo_align);
|
|
auto buffer = s_buffer->Map(alloc_size, s_ubo_align);
|
|
std::memcpy(buffer.first, data, data_size);
|
|
s_buffer->Unmap(alloc_size);
|
|
|
|
// bind the same sub-buffer to all stages
|
|
for (u32 index = 1; index <= 3; index++)
|
|
glBindBufferRange(GL_UNIFORM_BUFFER, index, s_buffer->m_buffer, buffer.second, data_size);
|
|
|
|
ADDSTAT(g_stats.this_frame.bytes_uniform_streamed, data_size);
|
|
}
|
|
|
|
bool ProgramShaderCache::CompileComputeShader(SHADER& shader, std::string_view code)
|
|
{
|
|
// We need to enable GL_ARB_compute_shader for drivers that support the extension,
|
|
// but not GLSL 4.3. Mesa is one example.
|
|
std::string full_code;
|
|
if (g_ActiveConfig.backend_info.bSupportsComputeShaders &&
|
|
g_ogl_config.eSupportedGLSLVersion < Glsl430)
|
|
{
|
|
full_code = "#extension GL_ARB_compute_shader : enable\n";
|
|
}
|
|
|
|
full_code += code;
|
|
const GLuint shader_id = CompileSingleShader(GL_COMPUTE_SHADER, full_code);
|
|
if (!shader_id)
|
|
return false;
|
|
|
|
shader.glprogid = glCreateProgram();
|
|
glAttachShader(shader.glprogid, shader_id);
|
|
shader.SetProgramBindings(true);
|
|
glLinkProgram(shader.glprogid);
|
|
|
|
// original shaders aren't needed any more
|
|
glDeleteShader(shader_id);
|
|
|
|
if (!CheckProgramLinkResult(shader.glprogid, full_code, {}, {}))
|
|
{
|
|
shader.Destroy();
|
|
return false;
|
|
}
|
|
|
|
shader.SetProgramVariables();
|
|
return true;
|
|
}
|
|
|
|
GLuint ProgramShaderCache::CompileSingleShader(GLenum type, std::string_view code)
|
|
{
|
|
const GLuint result = glCreateShader(type);
|
|
|
|
constexpr GLsizei num_strings = 2;
|
|
const std::array<const char*, num_strings> src{
|
|
s_glsl_header.data(),
|
|
code.data(),
|
|
};
|
|
const std::array<GLint, num_strings> src_sizes{
|
|
static_cast<GLint>(s_glsl_header.size()),
|
|
static_cast<GLint>(code.size()),
|
|
};
|
|
|
|
glShaderSource(result, num_strings, src.data(), src_sizes.data());
|
|
glCompileShader(result);
|
|
|
|
if (!CheckShaderCompileResult(result, type, code))
|
|
{
|
|
// Don't try to use this shader
|
|
glDeleteShader(result);
|
|
return 0;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
bool ProgramShaderCache::CheckShaderCompileResult(GLuint id, GLenum type, std::string_view code)
|
|
{
|
|
GLint compileStatus;
|
|
glGetShaderiv(id, GL_COMPILE_STATUS, &compileStatus);
|
|
GLsizei length = 0;
|
|
glGetShaderiv(id, GL_INFO_LOG_LENGTH, &length);
|
|
if (compileStatus != GL_TRUE || length > 1)
|
|
{
|
|
std::string info_log;
|
|
info_log.resize(length);
|
|
glGetShaderInfoLog(id, length, &length, &info_log[0]);
|
|
|
|
const char* prefix = "";
|
|
switch (type)
|
|
{
|
|
case GL_VERTEX_SHADER:
|
|
prefix = "vs";
|
|
break;
|
|
case GL_GEOMETRY_SHADER:
|
|
prefix = "gs";
|
|
break;
|
|
case GL_FRAGMENT_SHADER:
|
|
prefix = "ps";
|
|
break;
|
|
case GL_COMPUTE_SHADER:
|
|
prefix = "cs";
|
|
break;
|
|
}
|
|
|
|
if (compileStatus != GL_TRUE)
|
|
{
|
|
ERROR_LOG(VIDEO, "%s failed compilation:\n%s", prefix, info_log.c_str());
|
|
|
|
std::string filename = VideoBackendBase::BadShaderFilename(prefix, num_failures++);
|
|
std::ofstream file;
|
|
File::OpenFStream(file, filename, std::ios_base::out);
|
|
file << s_glsl_header << code << info_log;
|
|
file << "\n";
|
|
file << "Dolphin Version: " + Common::scm_rev_str + "\n";
|
|
file << "Video Backend: " + g_video_backend->GetDisplayName();
|
|
file.close();
|
|
|
|
PanicAlert("Failed to compile %s shader: %s\n"
|
|
"Debug info (%s, %s, %s):\n%s",
|
|
prefix, filename.c_str(), g_ogl_config.gl_vendor, g_ogl_config.gl_renderer,
|
|
g_ogl_config.gl_version, info_log.c_str());
|
|
|
|
return false;
|
|
}
|
|
|
|
WARN_LOG(VIDEO, "%s compiled with warnings:\n%s", prefix, info_log.c_str());
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool ProgramShaderCache::CheckProgramLinkResult(GLuint id, std::string_view vcode,
|
|
std::string_view pcode, std::string_view gcode)
|
|
{
|
|
GLint linkStatus;
|
|
glGetProgramiv(id, GL_LINK_STATUS, &linkStatus);
|
|
GLsizei length = 0;
|
|
glGetProgramiv(id, GL_INFO_LOG_LENGTH, &length);
|
|
if (linkStatus != GL_TRUE || length > 1)
|
|
{
|
|
std::string info_log;
|
|
info_log.resize(length);
|
|
glGetProgramInfoLog(id, length, &length, &info_log[0]);
|
|
if (linkStatus != GL_TRUE)
|
|
{
|
|
ERROR_LOG(VIDEO, "Program failed linking:\n%s", info_log.c_str());
|
|
std::string filename = VideoBackendBase::BadShaderFilename("p", num_failures++);
|
|
std::ofstream file;
|
|
File::OpenFStream(file, filename, std::ios_base::out);
|
|
if (!vcode.empty())
|
|
file << s_glsl_header << vcode << '\n';
|
|
if (!gcode.empty())
|
|
file << s_glsl_header << gcode << '\n';
|
|
if (!pcode.empty())
|
|
file << s_glsl_header << pcode << '\n';
|
|
|
|
file << info_log;
|
|
file << "\n";
|
|
file << "Dolphin Version: " + Common::scm_rev_str + "\n";
|
|
file << "Video Backend: " + g_video_backend->GetDisplayName();
|
|
file.close();
|
|
|
|
PanicAlert("Failed to link shaders: %s\n"
|
|
"Debug info (%s, %s, %s):\n%s",
|
|
filename.c_str(), g_ogl_config.gl_vendor, g_ogl_config.gl_renderer,
|
|
g_ogl_config.gl_version, info_log.c_str());
|
|
|
|
return false;
|
|
}
|
|
|
|
WARN_LOG(VIDEO, "Program linked with warnings:\n%s", info_log.c_str());
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void ProgramShaderCache::Init()
|
|
{
|
|
// We have to get the UBO alignment here because
|
|
// if we generate a buffer that isn't aligned
|
|
// then the UBO will fail.
|
|
glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &s_ubo_align);
|
|
|
|
s_ubo_buffer_size =
|
|
static_cast<u32>(Common::AlignUp(sizeof(PixelShaderConstants), s_ubo_align) +
|
|
Common::AlignUp(sizeof(VertexShaderConstants), s_ubo_align) +
|
|
Common::AlignUp(sizeof(GeometryShaderConstants), s_ubo_align));
|
|
|
|
// We multiply by *4*4 because we need to get down to basic machine units.
|
|
// So multiply by four to get how many floats we have from vec4s
|
|
// Then once more to get bytes
|
|
s_buffer = StreamBuffer::Create(GL_UNIFORM_BUFFER, VertexManagerBase::UNIFORM_STREAM_BUFFER_SIZE);
|
|
|
|
CreateHeader();
|
|
CreateAttributelessVAO();
|
|
|
|
CurrentProgram = 0;
|
|
}
|
|
|
|
void ProgramShaderCache::Shutdown()
|
|
{
|
|
s_buffer.reset();
|
|
|
|
glBindVertexArray(0);
|
|
glDeleteBuffers(1, &s_attributeless_VBO);
|
|
glDeleteVertexArrays(1, &s_attributeless_VAO);
|
|
s_attributeless_VBO = 0;
|
|
s_attributeless_VAO = 0;
|
|
s_last_VAO = 0;
|
|
|
|
// All pipeline programs should have been released.
|
|
DEBUG_ASSERT(s_pipeline_programs.empty());
|
|
s_pipeline_programs.clear();
|
|
}
|
|
|
|
void ProgramShaderCache::CreateAttributelessVAO()
|
|
{
|
|
glGenVertexArrays(1, &s_attributeless_VAO);
|
|
|
|
// In a compatibility context, we require a valid, bound array buffer.
|
|
glGenBuffers(1, &s_attributeless_VBO);
|
|
|
|
// Initialize the buffer with nothing. 16 floats is an arbitrary size that may work around driver
|
|
// issues.
|
|
glBindBuffer(GL_ARRAY_BUFFER, s_attributeless_VBO);
|
|
glBufferData(GL_ARRAY_BUFFER, sizeof(GLfloat) * 16, nullptr, GL_STATIC_DRAW);
|
|
|
|
// We must also define vertex attribute 0.
|
|
glBindVertexArray(s_attributeless_VAO);
|
|
glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 0, nullptr);
|
|
glEnableVertexAttribArray(0);
|
|
}
|
|
|
|
void ProgramShaderCache::BindVertexFormat(const GLVertexFormat* vertex_format)
|
|
{
|
|
u32 new_VAO = vertex_format ? vertex_format->VAO : s_attributeless_VAO;
|
|
if (s_last_VAO == new_VAO)
|
|
return;
|
|
|
|
glBindVertexArray(new_VAO);
|
|
s_last_VAO = new_VAO;
|
|
}
|
|
|
|
bool ProgramShaderCache::IsValidVertexFormatBound()
|
|
{
|
|
return s_last_VAO != 0 && s_last_VAO != s_attributeless_VAO;
|
|
}
|
|
|
|
void ProgramShaderCache::InvalidateVertexFormat()
|
|
{
|
|
s_last_VAO = 0;
|
|
}
|
|
|
|
void ProgramShaderCache::InvalidateVertexFormatIfBound(GLuint vao)
|
|
{
|
|
if (s_last_VAO == vao)
|
|
s_last_VAO = 0;
|
|
}
|
|
|
|
void ProgramShaderCache::InvalidateLastProgram()
|
|
{
|
|
CurrentProgram = 0;
|
|
}
|
|
|
|
PipelineProgram* ProgramShaderCache::GetPipelineProgram(const GLVertexFormat* vertex_format,
|
|
const OGLShader* vertex_shader,
|
|
const OGLShader* geometry_shader,
|
|
const OGLShader* pixel_shader,
|
|
const void* cache_data,
|
|
size_t cache_data_size)
|
|
{
|
|
PipelineProgramKey key = {vertex_shader ? vertex_shader->GetID() : 0,
|
|
geometry_shader ? geometry_shader->GetID() : 0,
|
|
pixel_shader ? pixel_shader->GetID() : 0};
|
|
{
|
|
std::lock_guard guard{s_pipeline_program_lock};
|
|
auto iter = s_pipeline_programs.find(key);
|
|
if (iter != s_pipeline_programs.end())
|
|
{
|
|
iter->second->reference_count++;
|
|
return iter->second.get();
|
|
}
|
|
}
|
|
|
|
std::unique_ptr<PipelineProgram> prog = std::make_unique<PipelineProgram>();
|
|
prog->key = key;
|
|
prog->shader.glprogid = glCreateProgram();
|
|
|
|
// Use the cache data, if present. If this fails, we want to return an error, so the shader cache
|
|
// doesn't attempt to use the same binary data in the future.
|
|
if (cache_data_size >= sizeof(u32))
|
|
{
|
|
u32 program_binary_type;
|
|
std::memcpy(&program_binary_type, cache_data, sizeof(u32));
|
|
glProgramBinary(prog->shader.glprogid, static_cast<GLenum>(program_binary_type),
|
|
static_cast<const u8*>(cache_data) + sizeof(u32),
|
|
static_cast<GLsizei>(cache_data_size - sizeof(u32)));
|
|
|
|
// Check the link status. If this fails, it means the binary was invalid.
|
|
GLint link_status;
|
|
glGetProgramiv(prog->shader.glprogid, GL_LINK_STATUS, &link_status);
|
|
if (link_status != GL_TRUE)
|
|
{
|
|
WARN_LOG(VIDEO, "Failed to create GL program from program binary.");
|
|
prog->shader.Destroy();
|
|
return nullptr;
|
|
}
|
|
|
|
// We don't want to retrieve this binary and duplicate entries in the cache again.
|
|
// See the explanation in OGLPipeline.cpp.
|
|
prog->binary_retrieved = true;
|
|
}
|
|
else
|
|
{
|
|
// We temporarily change the vertex array to the pipeline's vertex format.
|
|
// This can prevent the NVIDIA OpenGL driver from recompiling on first use.
|
|
GLuint vao = vertex_format ? vertex_format->VAO : s_attributeless_VAO;
|
|
if (s_is_shared_context || vao != s_last_VAO)
|
|
glBindVertexArray(vao);
|
|
|
|
// Attach shaders.
|
|
ASSERT(vertex_shader && vertex_shader->GetStage() == ShaderStage::Vertex);
|
|
ASSERT(pixel_shader && pixel_shader->GetStage() == ShaderStage::Pixel);
|
|
glAttachShader(prog->shader.glprogid, vertex_shader->GetGLShaderID());
|
|
glAttachShader(prog->shader.glprogid, pixel_shader->GetGLShaderID());
|
|
if (geometry_shader)
|
|
{
|
|
ASSERT(geometry_shader->GetStage() == ShaderStage::Geometry);
|
|
glAttachShader(prog->shader.glprogid, geometry_shader->GetGLShaderID());
|
|
}
|
|
|
|
if (g_ActiveConfig.backend_info.bSupportsPipelineCacheData)
|
|
glProgramParameteri(prog->shader.glprogid, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE);
|
|
|
|
// Link program.
|
|
prog->shader.SetProgramBindings(false);
|
|
glLinkProgram(prog->shader.glprogid);
|
|
|
|
// Restore VAO binding after linking.
|
|
if (!s_is_shared_context && vao != s_last_VAO)
|
|
glBindVertexArray(s_last_VAO);
|
|
|
|
if (!CheckProgramLinkResult(prog->shader.glprogid,
|
|
vertex_shader ? vertex_shader->GetSource() : std::string_view{},
|
|
geometry_shader ? geometry_shader->GetSource() : std::string_view{},
|
|
pixel_shader ? pixel_shader->GetSource() : std::string_view{}))
|
|
{
|
|
prog->shader.Destroy();
|
|
return nullptr;
|
|
}
|
|
}
|
|
|
|
// Lock to insert. A duplicate program may have been created in the meantime.
|
|
std::lock_guard guard{s_pipeline_program_lock};
|
|
auto iter = s_pipeline_programs.find(key);
|
|
if (iter != s_pipeline_programs.end())
|
|
{
|
|
// Destroy this program, and use the one which was created first.
|
|
prog->shader.Destroy();
|
|
iter->second->reference_count++;
|
|
return iter->second.get();
|
|
}
|
|
|
|
// Set program variables on the shader which will be returned.
|
|
// This is only needed for drivers which don't support binding layout.
|
|
prog->shader.SetProgramVariables();
|
|
|
|
// If this is a shared context, ensure we sync before we return the program to
|
|
// the main thread. If we don't do this, some driver can lock up (e.g. AMD).
|
|
if (s_is_shared_context)
|
|
glFinish();
|
|
|
|
auto ip = s_pipeline_programs.emplace(key, std::move(prog));
|
|
return ip.first->second.get();
|
|
}
|
|
|
|
void ProgramShaderCache::ReleasePipelineProgram(PipelineProgram* prog)
|
|
{
|
|
if (--prog->reference_count > 0)
|
|
return;
|
|
|
|
prog->shader.Destroy();
|
|
|
|
std::lock_guard guard{s_pipeline_program_lock};
|
|
const auto iter = s_pipeline_programs.find(prog->key);
|
|
ASSERT(iter != s_pipeline_programs.end() && prog == iter->second.get());
|
|
s_pipeline_programs.erase(iter);
|
|
}
|
|
|
|
void ProgramShaderCache::CreateHeader()
|
|
{
|
|
GlslVersion v = g_ogl_config.eSupportedGLSLVersion;
|
|
bool is_glsles = v >= GlslEs300;
|
|
std::string SupportedESPointSize;
|
|
std::string SupportedESTextureBuffer;
|
|
switch (g_ogl_config.SupportedESPointSize)
|
|
{
|
|
case 1:
|
|
SupportedESPointSize = "#extension GL_OES_geometry_point_size : enable";
|
|
break;
|
|
case 2:
|
|
SupportedESPointSize = "#extension GL_EXT_geometry_point_size : enable";
|
|
break;
|
|
default:
|
|
SupportedESPointSize = "";
|
|
break;
|
|
}
|
|
|
|
switch (g_ogl_config.SupportedESTextureBuffer)
|
|
{
|
|
case EsTexbufType::TexbufExt:
|
|
SupportedESTextureBuffer = "#extension GL_EXT_texture_buffer : enable";
|
|
break;
|
|
case EsTexbufType::TexbufOes:
|
|
SupportedESTextureBuffer = "#extension GL_OES_texture_buffer : enable";
|
|
break;
|
|
case EsTexbufType::TexbufCore:
|
|
case EsTexbufType::TexbufNone:
|
|
SupportedESTextureBuffer = "";
|
|
break;
|
|
}
|
|
|
|
std::string earlyz_string;
|
|
if (g_ActiveConfig.backend_info.bSupportsEarlyZ)
|
|
{
|
|
if (g_ogl_config.bSupportsImageLoadStore)
|
|
{
|
|
earlyz_string = "#define FORCE_EARLY_Z layout(early_fragment_tests) in\n";
|
|
}
|
|
else if (g_ogl_config.bSupportsConservativeDepth)
|
|
{
|
|
// See PixelShaderGen for details about this fallback.
|
|
earlyz_string = "#define FORCE_EARLY_Z layout(depth_unchanged) out float gl_FragDepth\n";
|
|
earlyz_string += "#extension GL_ARB_conservative_depth : enable\n";
|
|
}
|
|
}
|
|
|
|
std::string framebuffer_fetch_string;
|
|
switch (g_ogl_config.SupportedFramebufferFetch)
|
|
{
|
|
case EsFbFetchType::FbFetchExt:
|
|
framebuffer_fetch_string = "#extension GL_EXT_shader_framebuffer_fetch: enable\n"
|
|
"#define FB_FETCH_VALUE real_ocol0\n"
|
|
"#define FRAGMENT_INOUT inout";
|
|
break;
|
|
case EsFbFetchType::FbFetchArm:
|
|
framebuffer_fetch_string = "#extension GL_ARM_shader_framebuffer_fetch: enable\n"
|
|
"#define FB_FETCH_VALUE gl_LastFragColorARM\n"
|
|
"#define FRAGMENT_INOUT out";
|
|
break;
|
|
case EsFbFetchType::FbFetchNone:
|
|
framebuffer_fetch_string = "";
|
|
break;
|
|
}
|
|
|
|
std::string shader_shuffle_string;
|
|
if (g_ogl_config.bSupportsShaderThreadShuffleNV)
|
|
{
|
|
shader_shuffle_string = R"(
|
|
#extension GL_NV_shader_thread_group : enable
|
|
#extension GL_NV_shader_thread_shuffle : enable
|
|
#define SUPPORTS_SUBGROUP_REDUCTION 1
|
|
|
|
// The xor shuffle below produces incorrect results if all threads in a warp are not active.
|
|
#define CAN_USE_SUBGROUP_REDUCTION (ballotThreadNV(true) == 0xFFFFFFFFu)
|
|
|
|
#define IS_HELPER_INVOCATION gl_HelperThreadNV
|
|
#define IS_FIRST_ACTIVE_INVOCATION (gl_ThreadInWarpNV == findLSB(ballotThreadNV(!gl_HelperThreadNV)))
|
|
#define SUBGROUP_REDUCTION(func, value) value = func(value, shuffleXorNV(value, 16, 32)); \
|
|
value = func(value, shuffleXorNV(value, 8, 32)); \
|
|
value = func(value, shuffleXorNV(value, 4, 32)); \
|
|
value = func(value, shuffleXorNV(value, 2, 32)); \
|
|
value = func(value, shuffleXorNV(value, 1, 32));
|
|
#define SUBGROUP_MIN(value) SUBGROUP_REDUCTION(min, value)
|
|
#define SUBGROUP_MAX(value) SUBGROUP_REDUCTION(max, value)
|
|
)";
|
|
}
|
|
|
|
s_glsl_header = StringFromFormat(
|
|
"%s\n"
|
|
"%s\n" // ubo
|
|
"%s\n" // early-z
|
|
"%s\n" // 420pack
|
|
"%s\n" // msaa
|
|
"%s\n" // Input/output/sampler binding
|
|
"%s\n" // Varying location
|
|
"%s\n" // storage buffer
|
|
"%s\n" // shader5
|
|
"%s\n" // SSAA
|
|
"%s\n" // Geometry point size
|
|
"%s\n" // AEP
|
|
"%s\n" // texture buffer
|
|
"%s\n" // ES texture buffer
|
|
"%s\n" // ES dual source blend
|
|
"%s\n" // shader image load store
|
|
"%s\n" // shader framebuffer fetch
|
|
"%s\n" // shader thread shuffle
|
|
|
|
// Precision defines for GLSL ES
|
|
"%s\n"
|
|
"%s\n"
|
|
"%s\n"
|
|
"%s\n"
|
|
"%s\n"
|
|
"%s\n"
|
|
|
|
// Silly differences
|
|
"#define API_OPENGL 1\n"
|
|
"#define float2 vec2\n"
|
|
"#define float3 vec3\n"
|
|
"#define float4 vec4\n"
|
|
"#define uint2 uvec2\n"
|
|
"#define uint3 uvec3\n"
|
|
"#define uint4 uvec4\n"
|
|
"#define int2 ivec2\n"
|
|
"#define int3 ivec3\n"
|
|
"#define int4 ivec4\n"
|
|
"#define frac fract\n"
|
|
"#define lerp mix\n"
|
|
|
|
,
|
|
GetGLSLVersionString().c_str(),
|
|
v < Glsl140 ? "#extension GL_ARB_uniform_buffer_object : enable" : "", earlyz_string.c_str(),
|
|
(g_ActiveConfig.backend_info.bSupportsBindingLayout && v < GlslEs310) ?
|
|
"#extension GL_ARB_shading_language_420pack : enable" :
|
|
"",
|
|
(g_ogl_config.bSupportsMSAA && v < Glsl150) ?
|
|
"#extension GL_ARB_texture_multisample : enable" :
|
|
"",
|
|
// Attribute and fragment output bindings are still done via glBindAttribLocation and
|
|
// glBindFragDataLocation. In the future this could be moved to the layout qualifier
|
|
// in GLSL, but requires verification of GL_ARB_explicit_attrib_location.
|
|
g_ActiveConfig.backend_info.bSupportsBindingLayout ?
|
|
"#define ATTRIBUTE_LOCATION(x)\n"
|
|
"#define FRAGMENT_OUTPUT_LOCATION(x)\n"
|
|
"#define FRAGMENT_OUTPUT_LOCATION_INDEXED(x, y)\n"
|
|
"#define UBO_BINDING(packing, x) layout(packing, binding = x)\n"
|
|
"#define SAMPLER_BINDING(x) layout(binding = x)\n"
|
|
"#define TEXEL_BUFFER_BINDING(x) layout(binding = x)\n"
|
|
"#define SSBO_BINDING(x) layout(binding = x)\n"
|
|
"#define IMAGE_BINDING(format, x) layout(format, binding = x)\n" :
|
|
"#define ATTRIBUTE_LOCATION(x)\n"
|
|
"#define FRAGMENT_OUTPUT_LOCATION(x)\n"
|
|
"#define FRAGMENT_OUTPUT_LOCATION_INDEXED(x, y)\n"
|
|
"#define UBO_BINDING(packing, x) layout(packing)\n"
|
|
"#define SAMPLER_BINDING(x)\n"
|
|
"#define TEXEL_BUFFER_BINDING(x)\n"
|
|
"#define SSBO_BINDING(x)\n"
|
|
"#define IMAGE_BINDING(format, x) layout(format)\n",
|
|
// Input/output blocks are matched by name during program linking
|
|
"#define VARYING_LOCATION(x)\n",
|
|
!is_glsles && g_ActiveConfig.backend_info.bSupportsFragmentStoresAndAtomics ?
|
|
"#extension GL_ARB_shader_storage_buffer_object : enable" :
|
|
"",
|
|
v < Glsl400 && g_ActiveConfig.backend_info.bSupportsGSInstancing ?
|
|
"#extension GL_ARB_gpu_shader5 : enable" :
|
|
"",
|
|
v < Glsl400 && g_ActiveConfig.backend_info.bSupportsSSAA ?
|
|
"#extension GL_ARB_sample_shading : enable" :
|
|
"",
|
|
SupportedESPointSize.c_str(),
|
|
g_ogl_config.bSupportsAEP ? "#extension GL_ANDROID_extension_pack_es31a : enable" : "",
|
|
v < Glsl140 && g_ActiveConfig.backend_info.bSupportsPaletteConversion ?
|
|
"#extension GL_ARB_texture_buffer_object : enable" :
|
|
"",
|
|
SupportedESTextureBuffer.c_str(),
|
|
is_glsles && g_ActiveConfig.backend_info.bSupportsDualSourceBlend ?
|
|
"#extension GL_EXT_blend_func_extended : enable" :
|
|
""
|
|
|
|
,
|
|
g_ogl_config.bSupportsImageLoadStore &&
|
|
((!is_glsles && v < Glsl430) || (is_glsles && v < GlslEs310)) ?
|
|
"#extension GL_ARB_shader_image_load_store : enable" :
|
|
"",
|
|
framebuffer_fetch_string.c_str(), shader_shuffle_string.c_str(),
|
|
is_glsles ? "precision highp float;" : "", is_glsles ? "precision highp int;" : "",
|
|
is_glsles ? "precision highp sampler2DArray;" : "",
|
|
(is_glsles && g_ActiveConfig.backend_info.bSupportsPaletteConversion) ?
|
|
"precision highp usamplerBuffer;" :
|
|
"",
|
|
v > GlslEs300 ? "precision highp sampler2DMS;" : "",
|
|
v >= GlslEs310 ? "precision highp image2DArray;" : "");
|
|
}
|
|
|
|
u64 ProgramShaderCache::GenerateShaderID()
|
|
{
|
|
return s_shader_counter++;
|
|
}
|
|
|
|
bool SharedContextAsyncShaderCompiler::WorkerThreadInitMainThread(void** param)
|
|
{
|
|
std::unique_ptr<GLContext> context =
|
|
static_cast<Renderer*>(g_renderer.get())->GetMainGLContext()->CreateSharedContext();
|
|
if (!context)
|
|
{
|
|
PanicAlert("Failed to create shared context for shader compiling.");
|
|
return false;
|
|
}
|
|
|
|
*param = context.release();
|
|
return true;
|
|
}
|
|
|
|
bool SharedContextAsyncShaderCompiler::WorkerThreadInitWorkerThread(void* param)
|
|
{
|
|
GLContext* context = static_cast<GLContext*>(param);
|
|
if (!context->MakeCurrent())
|
|
return false;
|
|
|
|
s_is_shared_context = true;
|
|
|
|
// Make the state match the main context to have a better chance of avoiding recompiles.
|
|
if (!context->IsGLES())
|
|
glEnable(GL_PROGRAM_POINT_SIZE);
|
|
if (g_ActiveConfig.backend_info.bSupportsClipControl)
|
|
glClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
|
|
if (g_ActiveConfig.backend_info.bSupportsDepthClamp)
|
|
{
|
|
glEnable(GL_CLIP_DISTANCE0);
|
|
glEnable(GL_CLIP_DISTANCE1);
|
|
glEnable(GL_DEPTH_CLAMP);
|
|
}
|
|
if (g_ActiveConfig.backend_info.bSupportsPrimitiveRestart)
|
|
GLUtil::EnablePrimitiveRestart(context);
|
|
|
|
return true;
|
|
}
|
|
|
|
void SharedContextAsyncShaderCompiler::WorkerThreadExit(void* param)
|
|
{
|
|
GLContext* context = static_cast<GLContext*>(param);
|
|
context->ClearCurrent();
|
|
delete context;
|
|
}
|
|
} // namespace OGL
|