Partially Revert "renderer_opengl: Remove amd hacks and legacy paths" (#6122)

This reverts commit 07a69b7c7b2f469cb946c4caec42981e7a4400ad.
2025-02-28 15:33:33 +01:00 · 2022-09-10 15:35:13 +05:30 · 2022-09-10 15:35:13 +05:30 · baecc18d8c
commit baecc18d8c
parent 30831e6367
6 changed files with 43 additions and 15 deletions
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@ -26,6 +26,10 @@ MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
 MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255));
 MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));

+static bool IsVendorAmd() {
+    const std::string_view gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))};
+    return gpu_vendor == "ATI Technologies Inc." || gpu_vendor == "Advanced Micro Devices, Inc.";
+}
 #ifdef __APPLE__
 static bool IsVendorIntel() {
    std::string gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))};
@ -34,11 +38,11 @@ static bool IsVendorIntel() {
 #endif

 RasterizerOpenGL::RasterizerOpenGL(Frontend::EmuWindow& emu_window)
-    : vertex_buffer(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE),
-      uniform_buffer(GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE),
-      index_buffer(GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE),
-      texture_buffer(GL_TEXTURE_BUFFER, TEXTURE_BUFFER_SIZE),
-      texture_lf_buffer(GL_TEXTURE_BUFFER, TEXTURE_BUFFER_SIZE) {
+    : is_amd(IsVendorAmd()), vertex_buffer(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE, is_amd),
+      uniform_buffer(GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE, false),
+      index_buffer(GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE, false),
+      texture_buffer(GL_TEXTURE_BUFFER, TEXTURE_BUFFER_SIZE, false),
+      texture_lf_buffer(GL_TEXTURE_BUFFER, TEXTURE_BUFFER_SIZE, false) {

    // Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0
    state.clip_distance[0] = true;
@ -144,12 +148,13 @@ RasterizerOpenGL::RasterizerOpenGL(Frontend::EmuWindow& emu_window)

 #ifdef __APPLE__
    if (IsVendorIntel()) {
-        shader_program_manager = std::make_unique<ShaderProgramManager>(emu_window, false);
+        shader_program_manager = std::make_unique<ShaderProgramManager>(
+            emu_window, VideoCore::g_separable_shader_enabled, is_amd);
    } else {
-        shader_program_manager = std::make_unique<ShaderProgramManager>(emu_window, false);
+        shader_program_manager = std::make_unique<ShaderProgramManager>(emu_window, true, is_amd);
    }
 #else
-    shader_program_manager = std::make_unique<ShaderProgramManager>(emu_window, !GLES);
+    shader_program_manager = std::make_unique<ShaderProgramManager>(emu_window, !GLES, is_amd);
 #endif

    glEnable(GL_BLEND);
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@ -247,6 +247,8 @@ private:
    /// Setup geometry shader for AccelerateDrawBatch
    bool SetupGeometryShader();

+    bool is_amd;
+
    OpenGLState state;
    GLuint default_texture;

--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@ -327,8 +327,8 @@ using FragmentShaders = ShaderCache<PicaFSConfig, &GenerateFragmentShader, GL_FR

 class ShaderProgramManager::Impl {
 public:
-    explicit Impl(bool separable)
-        : separable(separable), programmable_vertex_shaders(separable),
+    explicit Impl(bool separable, bool is_amd)
+        : is_amd(is_amd), separable(separable), programmable_vertex_shaders(separable),
          trivial_vertex_shader(separable), fixed_geometry_shaders(separable),
          fragment_shaders(separable), disk_cache(separable) {
        if (separable)
@ -361,6 +361,7 @@ public:
        }
    };

+    bool is_amd;
    bool separable;

    ShaderTuple current;
@ -376,8 +377,9 @@ public:
    ShaderDiskCache disk_cache;
 };

-ShaderProgramManager::ShaderProgramManager(Frontend::EmuWindow& emu_window_, bool separable)
-    : impl(std::make_unique<Impl>(separable)), emu_window{emu_window_} {}
+ShaderProgramManager::ShaderProgramManager(Frontend::EmuWindow& emu_window_, bool separable,
+                                           bool is_amd)
+    : impl(std::make_unique<Impl>(separable, is_amd)), emu_window{emu_window_} {}

 ShaderProgramManager::~ShaderProgramManager() = default;

@ -439,6 +441,15 @@ void ShaderProgramManager::UseFragmentShader(const Pica::Regs& regs) {

 void ShaderProgramManager::ApplyTo(OpenGLState& state) {
    if (impl->separable) {
+        if (impl->is_amd) {
+            // Without this reseting, AMD sometimes freezes when one stage is changed but not
+            // for the others. On the other hand, including this reset seems to introduce memory
+            // leak in Intel Graphics.
+            glUseProgramStages(
+                impl->pipeline.handle,
+                GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | GL_FRAGMENT_SHADER_BIT, 0);
+        }
+
        glUseProgramStages(impl->pipeline.handle, GL_VERTEX_SHADER_BIT, impl->current.vs);
        glUseProgramStages(impl->pipeline.handle, GL_GEOMETRY_SHADER_BIT, impl->current.gs);
        glUseProgramStages(impl->pipeline.handle, GL_FRAGMENT_SHADER_BIT, impl->current.fs);
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@ -108,7 +108,7 @@ class OpenGLState;
 /// A class that manage different shader stages and configures them with given config data.
 class ShaderProgramManager {
 public:
-    ShaderProgramManager(Frontend::EmuWindow& emu_window_, bool separable);
+    ShaderProgramManager(Frontend::EmuWindow& emu_window_, bool separable, bool is_amd);
    ~ShaderProgramManager();

    void LoadDiskCache(const std::atomic_bool& stop_loading,
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
@ -12,12 +12,21 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",

 namespace OpenGL {

-OGLStreamBuffer::OGLStreamBuffer(GLenum target, GLsizeiptr size, bool prefer_coherent)
+OGLStreamBuffer::OGLStreamBuffer(GLenum target, GLsizeiptr size, bool array_buffer_for_amd,
+                                 bool prefer_coherent)
    : gl_target(target), buffer_size(size) {
    gl_buffer.Create();
    glBindBuffer(gl_target, gl_buffer.handle);

    GLsizeiptr allocate_size = size;
+    if (array_buffer_for_amd) {
+        // On AMD GPU there is a strange crash in indexed drawing. The crash happens when the buffer
+        // read position is near the end and is an out-of-bound access to the vertex buffer. This is
+        // probably a bug in the driver and is related to the usage of vec3<byte> attributes in the
+        // vertex array. Doubling the allocation size for the vertex buffer seems to avoid the
+        // crash.
+        allocate_size *= 2;
+    }

    if (GLAD_GL_ARB_buffer_storage) {
        persistent = true;
--- a/src/video_core/renderer_opengl/gl_stream_buffer.h
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.h
@ -10,7 +10,8 @@ namespace OpenGL {

 class OGLStreamBuffer : private NonCopyable {
 public:
-    explicit OGLStreamBuffer(GLenum target, GLsizeiptr size, bool prefer_coherent = false);
+    explicit OGLStreamBuffer(GLenum target, GLsizeiptr size, bool array_buffer_for_amd,
+                             bool prefer_coherent = false);
    ~OGLStreamBuffer();

    GLuint GetHandle() const;