// Copyright 2011 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later

#include "VideoBackends/OGL/ProgramShaderCache.h"

#include <array>
#include <atomic>
#include <memory>
#include <string>

#include "Common/Align.h"
#include "Common/Assert.h"
#include "Common/CommonTypes.h"
#include "Common/FileUtil.h"
#include "Common/GL/GLContext.h"
#include "Common/Logging/Log.h"
#include "Common/MsgHandler.h"
#include "Common/StringUtil.h"
#include "Common/Version.h"

#include "Core/ConfigManager.h"

#include "VideoBackends/OGL/OGLRender.h"
#include "VideoBackends/OGL/OGLShader.h"
#include "VideoBackends/OGL/OGLStreamBuffer.h"
#include "VideoBackends/OGL/OGLVertexManager.h"

#include "VideoCommon/AsyncShaderCompiler.h"
#include "VideoCommon/GeometryShaderManager.h"
#include "VideoCommon/PixelShaderManager.h"
#include "VideoCommon/Statistics.h"
#include "VideoCommon/VertexLoaderManager.h"
#include "VideoCommon/VertexShaderManager.h"
#include "VideoCommon/VideoBackendBase.h"
#include "VideoCommon/VideoConfig.h"

namespace OGL
{
u32 ProgramShaderCache::s_ubo_buffer_size;
s32 ProgramShaderCache::s_ubo_align = 1;
GLuint ProgramShaderCache::s_attributeless_VBO = 0;
GLuint ProgramShaderCache::s_attributeless_VAO = 0;
GLuint ProgramShaderCache::s_last_VAO = 0;

static std::unique_ptr<StreamBuffer> s_buffer;
static int num_failures = 0;

static GLuint CurrentProgram = 0;
ProgramShaderCache::PipelineProgramMap ProgramShaderCache::s_pipeline_programs;
std::mutex ProgramShaderCache::s_pipeline_program_lock;
static std::string s_glsl_header;
static std::atomic<u64> s_shader_counter{0};
static thread_local bool s_is_shared_context = false;

static std::string GetGLSLVersionString()
{
  GlslVersion v = g_ogl_config.eSupportedGLSLVersion;
  switch (v)
  {
  case GlslEs300:
    return "#version 300 es";
  case GlslEs310:
    return "#version 310 es";
  case GlslEs320:
    return "#version 320 es";
  case Glsl130:
    return "#version 130";
  case Glsl140:
    return "#version 140";
  case Glsl150:
    return "#version 150";
  case Glsl330:
    return "#version 330";
  case Glsl400:
    return "#version 400";
  case Glsl430:
    return "#version 430";
  default:
    // Shouldn't ever hit this
    return "#version ERROR";
  }
}

void SHADER::SetProgramVariables()
{
  if (g_ActiveConfig.backend_info.bSupportsBindingLayout)
    return;

  // To set uniform blocks/uniforms, the program must be active. We restore the
  // current binding at the end of this method to maintain the invariant.
  glUseProgram(glprogid);

  // Bind UBO and texture samplers
  GLint PSBlock_id = glGetUniformBlockIndex(glprogid, "PSBlock");
  GLint VSBlock_id = glGetUniformBlockIndex(glprogid, "VSBlock");
  GLint GSBlock_id = glGetUniformBlockIndex(glprogid, "GSBlock");
  GLint UBERBlock_id = glGetUniformBlockIndex(glprogid, "UBERBlock");
  if (PSBlock_id != -1)
    glUniformBlockBinding(glprogid, PSBlock_id, 1);
  if (VSBlock_id != -1)
    glUniformBlockBinding(glprogid, VSBlock_id, 2);
  if (GSBlock_id != -1)
    glUniformBlockBinding(glprogid, GSBlock_id, 3);
  if (UBERBlock_id != -1)
    glUniformBlockBinding(glprogid, UBERBlock_id, 4);

  // Bind Texture Samplers
  for (int a = 0; a < 8; ++a)
  {
    // Still need to get sampler locations since we aren't binding them statically in the shaders
    int loc = glGetUniformLocation(glprogid, StringFromFormat("samp[%d]", a).c_str());
    if (loc < 0)
      loc = glGetUniformLocation(glprogid, StringFromFormat("samp%d", a).c_str());
    if (loc >= 0)
      glUniform1i(loc, a);
  }

  // Restore previous program binding.
  glUseProgram(CurrentProgram);
}

void SHADER::SetProgramBindings(bool is_compute)
{
  if (!is_compute)
  {
    if (g_ActiveConfig.backend_info.bSupportsDualSourceBlend)
    {
      // So we do support extended blending
      // So we need to set a few more things here.
      // Bind our out locations
      glBindFragDataLocationIndexed(glprogid, 0, 0, "ocol0");
      glBindFragDataLocationIndexed(glprogid, 0, 1, "ocol1");
    }
    // Need to set some attribute locations
    glBindAttribLocation(glprogid, SHADER_POSITION_ATTRIB, "rawpos");

    glBindAttribLocation(glprogid, SHADER_POSMTX_ATTRIB, "posmtx");

    glBindAttribLocation(glprogid, SHADER_COLOR0_ATTRIB, "rawcolor0");
    glBindAttribLocation(glprogid, SHADER_COLOR1_ATTRIB, "rawcolor1");

    glBindAttribLocation(glprogid, SHADER_NORMAL_ATTRIB, "rawnormal");
    glBindAttribLocation(glprogid, SHADER_TANGENT_ATTRIB, "rawtangent");
    glBindAttribLocation(glprogid, SHADER_BINORMAL_ATTRIB, "rawbinormal");
  }

  for (int i = 0; i < 8; i++)
  {
    std::string attrib_name = StringFromFormat("rawtex%d", i);
    glBindAttribLocation(glprogid, SHADER_TEXTURE0_ATTRIB + i, attrib_name.c_str());
  }
}

void SHADER::Bind() const
{
  if (CurrentProgram != glprogid)
  {
    INCSTAT(g_stats.this_frame.num_shader_changes);
    glUseProgram(glprogid);
    CurrentProgram = glprogid;
  }
}

void SHADER::DestroyShaders()
{
  if (vsid)
  {
    glDeleteShader(vsid);
    vsid = 0;
  }
  if (gsid)
  {
    glDeleteShader(gsid);
    gsid = 0;
  }
  if (psid)
  {
    glDeleteShader(psid);
    psid = 0;
  }
}

bool PipelineProgramKey::operator!=(const PipelineProgramKey& rhs) const
{
  return !operator==(rhs);
}

bool PipelineProgramKey::operator==(const PipelineProgramKey& rhs) const
{
  return std::tie(vertex_shader_id, geometry_shader_id, pixel_shader_id) ==
         std::tie(rhs.vertex_shader_id, rhs.geometry_shader_id, rhs.pixel_shader_id);
}

bool PipelineProgramKey::operator<(const PipelineProgramKey& rhs) const
{
  return std::tie(vertex_shader_id, geometry_shader_id, pixel_shader_id) <
         std::tie(rhs.vertex_shader_id, rhs.geometry_shader_id, rhs.pixel_shader_id);
}

std::size_t PipelineProgramKeyHash::operator()(const PipelineProgramKey& key) const
{
  // We would really want std::hash_combine for this..
  std::hash<u64> hasher;
  return hasher(key.vertex_shader_id) + hasher(key.geometry_shader_id) +
         hasher(key.pixel_shader_id);
}

StreamBuffer* ProgramShaderCache::GetUniformBuffer()
{
  return s_buffer.get();
}

u32 ProgramShaderCache::GetUniformBufferAlignment()
{
  return s_ubo_align;
}

void ProgramShaderCache::UploadConstants()
{
  if (PixelShaderManager::dirty || VertexShaderManager::dirty || GeometryShaderManager::dirty)
  {
    auto buffer = s_buffer->Map(s_ubo_buffer_size, s_ubo_align);

    memcpy(buffer.first, &PixelShaderManager::constants, sizeof(PixelShaderConstants));

    memcpy(buffer.first + Common::AlignUp(sizeof(PixelShaderConstants), s_ubo_align),
           &VertexShaderManager::constants, sizeof(VertexShaderConstants));

    memcpy(buffer.first + Common::AlignUp(sizeof(PixelShaderConstants), s_ubo_align) +
               Common::AlignUp(sizeof(VertexShaderConstants), s_ubo_align),
           &GeometryShaderManager::constants, sizeof(GeometryShaderConstants));

    s_buffer->Unmap(s_ubo_buffer_size);
    glBindBufferRange(GL_UNIFORM_BUFFER, 1, s_buffer->m_buffer, buffer.second,
                      sizeof(PixelShaderConstants));
    glBindBufferRange(GL_UNIFORM_BUFFER, 2, s_buffer->m_buffer,
                      buffer.second + Common::AlignUp(sizeof(PixelShaderConstants), s_ubo_align),
                      sizeof(VertexShaderConstants));
    glBindBufferRange(GL_UNIFORM_BUFFER, 3, s_buffer->m_buffer,
                      buffer.second + Common::AlignUp(sizeof(PixelShaderConstants), s_ubo_align) +
                          Common::AlignUp(sizeof(VertexShaderConstants), s_ubo_align),
                      sizeof(GeometryShaderConstants));

    PixelShaderManager::dirty = false;
    VertexShaderManager::dirty = false;
    GeometryShaderManager::dirty = false;

    ADDSTAT(g_stats.this_frame.bytes_uniform_streamed, s_ubo_buffer_size);
  }
}

void ProgramShaderCache::UploadConstants(const void* data, u32 data_size)
{
  // allocate and copy
  const u32 alloc_size = Common::AlignUp(data_size, s_ubo_align);
  auto buffer = s_buffer->Map(alloc_size, s_ubo_align);
  std::memcpy(buffer.first, data, data_size);
  s_buffer->Unmap(alloc_size);

  // bind the same sub-buffer to all stages
  for (u32 index = 1; index <= 3; index++)
    glBindBufferRange(GL_UNIFORM_BUFFER, index, s_buffer->m_buffer, buffer.second, data_size);

  ADDSTAT(g_stats.this_frame.bytes_uniform_streamed, data_size);
}

bool ProgramShaderCache::CompileComputeShader(SHADER& shader, std::string_view code)
{
  // We need to enable GL_ARB_compute_shader for drivers that support the extension,
  // but not GLSL 4.3. Mesa is one example.
  std::string full_code;
  if (g_ActiveConfig.backend_info.bSupportsComputeShaders &&
      g_ogl_config.eSupportedGLSLVersion < Glsl430)
  {
    full_code = "#extension GL_ARB_compute_shader : enable\n";
  }

  full_code += code;
  const GLuint shader_id = CompileSingleShader(GL_COMPUTE_SHADER, full_code);
  if (!shader_id)
    return false;

  shader.glprogid = glCreateProgram();
  glAttachShader(shader.glprogid, shader_id);
  shader.SetProgramBindings(true);
  glLinkProgram(shader.glprogid);

  // original shaders aren't needed any more
  glDeleteShader(shader_id);

  if (!CheckProgramLinkResult(shader.glprogid, full_code, {}, {}))
  {
    shader.Destroy();
    return false;
  }

  shader.SetProgramVariables();
  return true;
}

GLuint ProgramShaderCache::CompileSingleShader(GLenum type, std::string_view code)
{
  const GLuint result = glCreateShader(type);

  constexpr GLsizei num_strings = 2;
  const std::array<const char*, num_strings> src{
      s_glsl_header.data(),
      code.data(),
  };
  const std::array<GLint, num_strings> src_sizes{
      static_cast<GLint>(s_glsl_header.size()),
      static_cast<GLint>(code.size()),
  };

  glShaderSource(result, num_strings, src.data(), src_sizes.data());
  glCompileShader(result);

  if (!CheckShaderCompileResult(result, type, code))
  {
    // Don't try to use this shader
    glDeleteShader(result);
    return 0;
  }

  return result;
}

bool ProgramShaderCache::CheckShaderCompileResult(GLuint id, GLenum type, std::string_view code)
{
  GLint compileStatus;
  glGetShaderiv(id, GL_COMPILE_STATUS, &compileStatus);
  GLsizei length = 0;
  glGetShaderiv(id, GL_INFO_LOG_LENGTH, &length);
  if (compileStatus != GL_TRUE || length > 1)
  {
    std::string info_log;
    info_log.resize(length);
    glGetShaderInfoLog(id, length, &length, &info_log[0]);

    const char* prefix = "";
    switch (type)
    {
    case GL_VERTEX_SHADER:
      prefix = "vs";
      break;
    case GL_GEOMETRY_SHADER:
      prefix = "gs";
      break;
    case GL_FRAGMENT_SHADER:
      prefix = "ps";
      break;
    case GL_COMPUTE_SHADER:
      prefix = "cs";
      break;
    }

    if (compileStatus != GL_TRUE)
    {
      ERROR_LOG_FMT(VIDEO, "{} failed compilation:\n{}", prefix, info_log);

      std::string filename = VideoBackendBase::BadShaderFilename(prefix, num_failures++);
      std::ofstream file;
      File::OpenFStream(file, filename, std::ios_base::out);
      file << s_glsl_header << code << info_log;
      file << "\n";
      file << "Dolphin Version: " + Common::GetScmRevStr() + "\n";
      file << "Video Backend: " + g_video_backend->GetDisplayName();
      file.close();

      PanicAlertFmt("Failed to compile {} shader: {}\n"
                    "Debug info ({}, {}, {}):\n{}",
                    prefix, filename, g_ogl_config.gl_vendor, g_ogl_config.gl_renderer,
                    g_ogl_config.gl_version, info_log);

      return false;
    }

    WARN_LOG_FMT(VIDEO, "{} compiled with warnings:\n{}", prefix, info_log);
  }

  return true;
}

bool ProgramShaderCache::CheckProgramLinkResult(GLuint id, std::string_view vcode,
                                                std::string_view pcode, std::string_view gcode)
{
  GLint linkStatus;
  glGetProgramiv(id, GL_LINK_STATUS, &linkStatus);
  GLsizei length = 0;
  glGetProgramiv(id, GL_INFO_LOG_LENGTH, &length);
  if (linkStatus != GL_TRUE || length > 1)
  {
    std::string info_log;
    info_log.resize(length);
    glGetProgramInfoLog(id, length, &length, &info_log[0]);
    if (linkStatus != GL_TRUE)
    {
      ERROR_LOG_FMT(VIDEO, "Program failed linking:\n{}", info_log);
      std::string filename = VideoBackendBase::BadShaderFilename("p", num_failures++);
      std::ofstream file;
      File::OpenFStream(file, filename, std::ios_base::out);
      if (!vcode.empty())
        file << s_glsl_header << vcode << '\n';
      if (!gcode.empty())
        file << s_glsl_header << gcode << '\n';
      if (!pcode.empty())
        file << s_glsl_header << pcode << '\n';

      file << info_log;
      file << "\n";
      file << "Dolphin Version: " + Common::GetScmRevStr() + "\n";
      file << "Video Backend: " + g_video_backend->GetDisplayName();
      file.close();

      PanicAlertFmt("Failed to link shaders: {}\n"
                    "Debug info ({}, {}, {}):\n{}",
                    filename, g_ogl_config.gl_vendor, g_ogl_config.gl_renderer,
                    g_ogl_config.gl_version, info_log);

      return false;
    }

    WARN_LOG_FMT(VIDEO, "Program linked with warnings:\n{}", info_log);
  }

  return true;
}

void ProgramShaderCache::Init()
{
  // We have to get the UBO alignment here because
  // if we generate a buffer that isn't aligned
  // then the UBO will fail.
  glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &s_ubo_align);

  s_ubo_buffer_size =
      static_cast<u32>(Common::AlignUp(sizeof(PixelShaderConstants), s_ubo_align) +
                       Common::AlignUp(sizeof(VertexShaderConstants), s_ubo_align) +
                       Common::AlignUp(sizeof(GeometryShaderConstants), s_ubo_align));

  // We multiply by *4*4 because we need to get down to basic machine units.
  // So multiply by four to get how many floats we have from vec4s
  // Then once more to get bytes
  s_buffer = StreamBuffer::Create(GL_UNIFORM_BUFFER, VertexManagerBase::UNIFORM_STREAM_BUFFER_SIZE);

  CreateHeader();
  CreateAttributelessVAO();

  CurrentProgram = 0;
}

void ProgramShaderCache::Shutdown()
{
  s_buffer.reset();

  glBindVertexArray(0);
  glDeleteBuffers(1, &s_attributeless_VBO);
  glDeleteVertexArrays(1, &s_attributeless_VAO);
  s_attributeless_VBO = 0;
  s_attributeless_VAO = 0;
  s_last_VAO = 0;

  // All pipeline programs should have been released.
  DEBUG_ASSERT(s_pipeline_programs.empty());
  s_pipeline_programs.clear();
}

void ProgramShaderCache::CreateAttributelessVAO()
{
  glGenVertexArrays(1, &s_attributeless_VAO);

  // In a compatibility context, we require a valid, bound array buffer.
  glGenBuffers(1, &s_attributeless_VBO);

  // Initialize the buffer with nothing. 16 floats is an arbitrary size that may work around driver
  // issues.
  glBindBuffer(GL_ARRAY_BUFFER, s_attributeless_VBO);
  glBufferData(GL_ARRAY_BUFFER, sizeof(GLfloat) * 16, nullptr, GL_STATIC_DRAW);

  // We must also define vertex attribute 0.
  glBindVertexArray(s_attributeless_VAO);
  glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 0, nullptr);
  glEnableVertexAttribArray(0);
}

void ProgramShaderCache::BindVertexFormat(const GLVertexFormat* vertex_format)
{
  u32 new_VAO = vertex_format ? vertex_format->VAO : s_attributeless_VAO;
  if (s_last_VAO == new_VAO)
    return;

  glBindVertexArray(new_VAO);
  s_last_VAO = new_VAO;
}

bool ProgramShaderCache::IsValidVertexFormatBound()
{
  return s_last_VAO != 0 && s_last_VAO != s_attributeless_VAO;
}

void ProgramShaderCache::InvalidateVertexFormat()
{
  s_last_VAO = 0;
}

void ProgramShaderCache::InvalidateVertexFormatIfBound(GLuint vao)
{
  if (s_last_VAO == vao)
    s_last_VAO = 0;
}

void ProgramShaderCache::InvalidateLastProgram()
{
  CurrentProgram = 0;
}

PipelineProgram* ProgramShaderCache::GetPipelineProgram(const GLVertexFormat* vertex_format,
                                                        const OGLShader* vertex_shader,
                                                        const OGLShader* geometry_shader,
                                                        const OGLShader* pixel_shader,
                                                        const void* cache_data,
                                                        size_t cache_data_size)
{
  PipelineProgramKey key = {vertex_shader ? vertex_shader->GetID() : 0,
                            geometry_shader ? geometry_shader->GetID() : 0,
                            pixel_shader ? pixel_shader->GetID() : 0};
  {
    std::lock_guard guard{s_pipeline_program_lock};
    auto iter = s_pipeline_programs.find(key);
    if (iter != s_pipeline_programs.end())
    {
      iter->second->reference_count++;
      return iter->second.get();
    }
  }

  std::unique_ptr<PipelineProgram> prog = std::make_unique<PipelineProgram>();
  prog->key = key;
  prog->shader.glprogid = glCreateProgram();

  // Use the cache data, if present. If this fails, we want to return an error, so the shader cache
  // doesn't attempt to use the same binary data in the future.
  if (cache_data_size >= sizeof(u32))
  {
    u32 program_binary_type;
    std::memcpy(&program_binary_type, cache_data, sizeof(u32));
    glProgramBinary(prog->shader.glprogid, static_cast<GLenum>(program_binary_type),
                    static_cast<const u8*>(cache_data) + sizeof(u32),
                    static_cast<GLsizei>(cache_data_size - sizeof(u32)));

    // Check the link status. If this fails, it means the binary was invalid.
    GLint link_status;
    glGetProgramiv(prog->shader.glprogid, GL_LINK_STATUS, &link_status);
    if (link_status != GL_TRUE)
    {
      WARN_LOG_FMT(VIDEO, "Failed to create GL program from program binary.");
      prog->shader.Destroy();
      return nullptr;
    }

    // We don't want to retrieve this binary and duplicate entries in the cache again.
    // See the explanation in OGLPipeline.cpp.
    prog->binary_retrieved = true;
  }
  else
  {
    // We temporarily change the vertex array to the pipeline's vertex format.
    // This can prevent the NVIDIA OpenGL driver from recompiling on first use.
    GLuint vao = vertex_format ? vertex_format->VAO : s_attributeless_VAO;
    if (s_is_shared_context || vao != s_last_VAO)
      glBindVertexArray(vao);

    // Attach shaders.
    ASSERT(vertex_shader && vertex_shader->GetStage() == ShaderStage::Vertex);
    ASSERT(pixel_shader && pixel_shader->GetStage() == ShaderStage::Pixel);
    glAttachShader(prog->shader.glprogid, vertex_shader->GetGLShaderID());
    glAttachShader(prog->shader.glprogid, pixel_shader->GetGLShaderID());
    if (geometry_shader)
    {
      ASSERT(geometry_shader->GetStage() == ShaderStage::Geometry);
      glAttachShader(prog->shader.glprogid, geometry_shader->GetGLShaderID());
    }

    if (g_ActiveConfig.backend_info.bSupportsPipelineCacheData)
      glProgramParameteri(prog->shader.glprogid, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE);

    // Link program.
    prog->shader.SetProgramBindings(false);
    glLinkProgram(prog->shader.glprogid);

    // Restore VAO binding after linking.
    if (!s_is_shared_context && vao != s_last_VAO)
      glBindVertexArray(s_last_VAO);

    if (!CheckProgramLinkResult(prog->shader.glprogid,
                                vertex_shader ? vertex_shader->GetSource() : std::string_view{},
                                geometry_shader ? geometry_shader->GetSource() : std::string_view{},
                                pixel_shader ? pixel_shader->GetSource() : std::string_view{}))
    {
      prog->shader.Destroy();
      return nullptr;
    }
  }

  // Lock to insert. A duplicate program may have been created in the meantime.
  std::lock_guard guard{s_pipeline_program_lock};
  auto iter = s_pipeline_programs.find(key);
  if (iter != s_pipeline_programs.end())
  {
    // Destroy this program, and use the one which was created first.
    prog->shader.Destroy();
    iter->second->reference_count++;
    return iter->second.get();
  }

  // Set program variables on the shader which will be returned.
  // This is only needed for drivers which don't support binding layout.
  prog->shader.SetProgramVariables();

  // If this is a shared context, ensure we sync before we return the program to
  // the main thread. If we don't do this, some driver can lock up (e.g. AMD).
  if (s_is_shared_context)
    glFinish();

  auto ip = s_pipeline_programs.emplace(key, std::move(prog));
  return ip.first->second.get();
}

void ProgramShaderCache::ReleasePipelineProgram(PipelineProgram* prog)
{
  if (--prog->reference_count > 0)
    return;

  prog->shader.Destroy();

  std::lock_guard guard{s_pipeline_program_lock};
  const auto iter = s_pipeline_programs.find(prog->key);
  ASSERT(iter != s_pipeline_programs.end() && prog == iter->second.get());
  s_pipeline_programs.erase(iter);
}

void ProgramShaderCache::CreateHeader()
{
  GlslVersion v = g_ogl_config.eSupportedGLSLVersion;
  bool is_glsles = v >= GlslEs300;
  std::string SupportedESPointSize;
  std::string SupportedESTextureBuffer;
  switch (g_ogl_config.SupportedESPointSize)
  {
  case 1:
    SupportedESPointSize = "#extension GL_OES_geometry_point_size : enable";
    break;
  case 2:
    SupportedESPointSize = "#extension GL_EXT_geometry_point_size : enable";
    break;
  default:
    SupportedESPointSize = "";
    break;
  }

  switch (g_ogl_config.SupportedESTextureBuffer)
  {
  case EsTexbufType::TexbufExt:
    SupportedESTextureBuffer = "#extension GL_EXT_texture_buffer : enable";
    break;
  case EsTexbufType::TexbufOes:
    SupportedESTextureBuffer = "#extension GL_OES_texture_buffer : enable";
    break;
  case EsTexbufType::TexbufCore:
  case EsTexbufType::TexbufNone:
    SupportedESTextureBuffer = "";
    break;
  }

  std::string earlyz_string;
  if (g_ActiveConfig.backend_info.bSupportsEarlyZ)
  {
    if (g_ogl_config.bSupportsImageLoadStore)
    {
      earlyz_string = "#define FORCE_EARLY_Z layout(early_fragment_tests) in\n";
    }
    else if (g_ogl_config.bSupportsConservativeDepth)
    {
      // See PixelShaderGen for details about this fallback.
      earlyz_string = "#define FORCE_EARLY_Z layout(depth_unchanged) out float gl_FragDepth\n";
      earlyz_string += "#extension GL_ARB_conservative_depth : enable\n";
    }
  }

  std::string framebuffer_fetch_string;
  switch (g_ogl_config.SupportedFramebufferFetch)
  {
  case EsFbFetchType::FbFetchExt:
    framebuffer_fetch_string = "#extension GL_EXT_shader_framebuffer_fetch: enable\n"
                               "#define FRAGMENT_INOUT inout";
    break;
  case EsFbFetchType::FbFetchArm:
    framebuffer_fetch_string = "#extension GL_ARM_shader_framebuffer_fetch: enable\n"
                               "#define FB_FETCH_VALUE gl_LastFragColorARM\n"
                               "#define FRAGMENT_INOUT out";
    break;
  case EsFbFetchType::FbFetchNone:
    framebuffer_fetch_string = "";
    break;
  }

  std::string shader_shuffle_string;
  if (g_ogl_config.bSupportsShaderThreadShuffleNV)
  {
    shader_shuffle_string = R"(
#extension GL_NV_shader_thread_group : enable
#extension GL_NV_shader_thread_shuffle : enable
#define SUPPORTS_SUBGROUP_REDUCTION 1

// The xor shuffle below produces incorrect results if all threads in a warp are not active.
#define CAN_USE_SUBGROUP_REDUCTION (ballotThreadNV(true) == 0xFFFFFFFFu)

#define IS_HELPER_INVOCATION gl_HelperThreadNV
#define IS_FIRST_ACTIVE_INVOCATION (gl_ThreadInWarpNV == findLSB(ballotThreadNV(!gl_HelperThreadNV)))
#define SUBGROUP_REDUCTION(func, value) value = func(value, shuffleXorNV(value, 16, 32)); \
                                        value = func(value, shuffleXorNV(value, 8, 32)); \
                                        value = func(value, shuffleXorNV(value, 4, 32)); \
                                        value = func(value, shuffleXorNV(value, 2, 32)); \
                                        value = func(value, shuffleXorNV(value, 1, 32));
#define SUBGROUP_MIN(value) SUBGROUP_REDUCTION(min, value)
#define SUBGROUP_MAX(value) SUBGROUP_REDUCTION(max, value)
)";
  }

  s_glsl_header = StringFromFormat(
      "%s\n"
      "%s\n"  // ubo
      "%s\n"  // early-z
      "%s\n"  // 420pack
      "%s\n"  // msaa
      "%s\n"  // Input/output/sampler binding
      "%s\n"  // Varying location
      "%s\n"  // storage buffer
      "%s\n"  // shader5
      "%s\n"  // SSAA
      "%s\n"  // Geometry point size
      "%s\n"  // AEP
      "%s\n"  // texture buffer
      "%s\n"  // ES texture buffer
      "%s\n"  // ES dual source blend
      "%s\n"  // shader image load store
      "%s\n"  // shader framebuffer fetch
      "%s\n"  // shader thread shuffle
      "%s\n"  // derivative control
      "%s\n"  // query levels

      // Precision defines for GLSL ES
      "%s\n"
      "%s\n"
      "%s\n"
      "%s\n"
      "%s\n"
      "%s\n"

      // Silly differences
      "#define API_OPENGL 1\n"
      "#define float2 vec2\n"
      "#define float3 vec3\n"
      "#define float4 vec4\n"
      "#define uint2 uvec2\n"
      "#define uint3 uvec3\n"
      "#define uint4 uvec4\n"
      "#define int2 ivec2\n"
      "#define int3 ivec3\n"
      "#define int4 ivec4\n"
      "#define frac fract\n"
      "#define lerp mix\n"

      ,
      GetGLSLVersionString().c_str(),
      v < Glsl140 ? "#extension GL_ARB_uniform_buffer_object : enable" : "", earlyz_string.c_str(),
      (g_ActiveConfig.backend_info.bSupportsBindingLayout && v < GlslEs310) ?
          "#extension GL_ARB_shading_language_420pack : enable" :
          "",
      (g_ogl_config.bSupportsMSAA && v < Glsl150) ?
          "#extension GL_ARB_texture_multisample : enable" :
          "",
      // Attribute and fragment output bindings are still done via glBindAttribLocation and
      // glBindFragDataLocation. In the future this could be moved to the layout qualifier
      // in GLSL, but requires verification of GL_ARB_explicit_attrib_location.
      g_ActiveConfig.backend_info.bSupportsBindingLayout ?
          "#define ATTRIBUTE_LOCATION(x)\n"
          "#define FRAGMENT_OUTPUT_LOCATION(x)\n"
          "#define FRAGMENT_OUTPUT_LOCATION_INDEXED(x, y)\n"
          "#define UBO_BINDING(packing, x) layout(packing, binding = x)\n"
          "#define SAMPLER_BINDING(x) layout(binding = x)\n"
          "#define TEXEL_BUFFER_BINDING(x) layout(binding = x)\n"
          "#define SSBO_BINDING(x) layout(std430, binding = x)\n"
          "#define IMAGE_BINDING(format, x) layout(format, binding = x)\n" :
          "#define ATTRIBUTE_LOCATION(x)\n"
          "#define FRAGMENT_OUTPUT_LOCATION(x)\n"
          "#define FRAGMENT_OUTPUT_LOCATION_INDEXED(x, y)\n"
          "#define UBO_BINDING(packing, x) layout(packing)\n"
          "#define SAMPLER_BINDING(x)\n"
          "#define TEXEL_BUFFER_BINDING(x)\n"
          "#define SSBO_BINDING(x) layout(std430)\n"
          "#define IMAGE_BINDING(format, x) layout(format)\n",
      // Input/output blocks are matched by name during program linking
      "#define VARYING_LOCATION(x)\n",
      !is_glsles && g_ActiveConfig.backend_info.bSupportsFragmentStoresAndAtomics ?
          "#extension GL_ARB_shader_storage_buffer_object : enable" :
          "",
      v < Glsl400 && g_ActiveConfig.backend_info.bSupportsGSInstancing ?
          "#extension GL_ARB_gpu_shader5 : enable" :
          "",
      v < Glsl400 && g_ActiveConfig.backend_info.bSupportsSSAA ?
          "#extension GL_ARB_sample_shading : enable" :
          "",
      SupportedESPointSize.c_str(),
      g_ogl_config.bSupportsAEP ? "#extension GL_ANDROID_extension_pack_es31a : enable" : "",
      v < Glsl140 && g_ActiveConfig.backend_info.bSupportsPaletteConversion ?
          "#extension GL_ARB_texture_buffer_object : enable" :
          "",
      SupportedESTextureBuffer.c_str(),
      is_glsles && g_ActiveConfig.backend_info.bSupportsDualSourceBlend ?
          "#extension GL_EXT_blend_func_extended : enable" :
          ""

      ,
      g_ogl_config.bSupportsImageLoadStore &&
              ((!is_glsles && v < Glsl430) || (is_glsles && v < GlslEs310)) ?
          "#extension GL_ARB_shader_image_load_store : enable" :
          "",
      framebuffer_fetch_string.c_str(), shader_shuffle_string.c_str(),
      g_ActiveConfig.backend_info.bSupportsCoarseDerivatives ?
          "#extension GL_ARB_derivative_control : enable" :
          "",
      g_ActiveConfig.backend_info.bSupportsTextureQueryLevels ?
          "#extension GL_ARB_texture_query_levels : enable" :
          "",
      is_glsles ? "precision highp float;" : "", is_glsles ? "precision highp int;" : "",
      is_glsles ? "precision highp sampler2DArray;" : "",
      (is_glsles && g_ActiveConfig.backend_info.bSupportsPaletteConversion) ?
          "precision highp usamplerBuffer;" :
          "",
      v > GlslEs300 ? "precision highp sampler2DMSArray;" : "",
      v >= GlslEs310 ? "precision highp image2DArray;" : "");
}

u64 ProgramShaderCache::GenerateShaderID()
{
  return s_shader_counter++;
}

bool SharedContextAsyncShaderCompiler::WorkerThreadInitMainThread(void** param)
{
  std::unique_ptr<GLContext> context =
      static_cast<Renderer*>(g_renderer.get())->GetMainGLContext()->CreateSharedContext();
  if (!context)
  {
    PanicAlertFmt("Failed to create shared context for shader compiling.");
    return false;
  }

  *param = context.release();
  return true;
}

bool SharedContextAsyncShaderCompiler::WorkerThreadInitWorkerThread(void* param)
{
  GLContext* context = static_cast<GLContext*>(param);
  if (!context->MakeCurrent())
    return false;

  s_is_shared_context = true;

  // Make the state match the main context to have a better chance of avoiding recompiles.
  if (!context->IsGLES())
    glEnable(GL_PROGRAM_POINT_SIZE);
  if (g_ActiveConfig.backend_info.bSupportsClipControl)
    glClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
  if (g_ActiveConfig.backend_info.bSupportsDepthClamp)
  {
    glEnable(GL_CLIP_DISTANCE0);
    glEnable(GL_CLIP_DISTANCE1);
    glEnable(GL_DEPTH_CLAMP);
  }
  if (g_ActiveConfig.backend_info.bSupportsPrimitiveRestart)
    GLUtil::EnablePrimitiveRestart(context);

  return true;
}

void SharedContextAsyncShaderCompiler::WorkerThreadExit(void* param)
{
  GLContext* context = static_cast<GLContext*>(param);
  context->ClearCurrent();
  delete context;
}
}  // namespace OGL