diff --git a/Source/Android/app/src/main/java/org/dolphinemu/dolphinemu/ui/settings/SettingsFragmentPresenter.java b/Source/Android/app/src/main/java/org/dolphinemu/dolphinemu/ui/settings/SettingsFragmentPresenter.java
index 5285ab2a09..1ad529afd8 100644
--- a/Source/Android/app/src/main/java/org/dolphinemu/dolphinemu/ui/settings/SettingsFragmentPresenter.java
+++ b/Source/Android/app/src/main/java/org/dolphinemu/dolphinemu/ui/settings/SettingsFragmentPresenter.java
@@ -286,6 +286,7 @@ public final class SettingsFragmentPresenter
BooleanSetting ignoreFormat = new BooleanSetting(SettingsFile.KEY_IGNORE_FORMAT, SettingsFile.SECTION_GFX_HACKS, SettingsFile.SETTINGS_GFX, ignoreFormatValue);
Setting efbToTexture = mSettings.get(SettingsFile.SETTINGS_GFX).get(SettingsFile.SECTION_GFX_HACKS).getSetting(SettingsFile.KEY_EFB_TEXTURE);
Setting texCacheAccuracy = mSettings.get(SettingsFile.SETTINGS_GFX).get(SettingsFile.SECTION_GFX_SETTINGS).getSetting(SettingsFile.KEY_TEXCACHE_ACCURACY);
+ Setting gpuTextureDecoding = mSettings.get(SettingsFile.SETTINGS_GFX).get(SettingsFile.SECTION_GFX_SETTINGS).getSetting(SettingsFile.KEY_GPU_TEXTURE_DECODING);
IntSetting xfb = new IntSetting(SettingsFile.KEY_XFB, SettingsFile.SECTION_GFX_HACKS, SettingsFile.SETTINGS_GFX, xfbValue);
Setting fastDepth = mSettings.get(SettingsFile.SETTINGS_GFX).get(SettingsFile.SECTION_GFX_HACKS).getSetting(SettingsFile.KEY_FAST_DEPTH);
Setting aspectRatio = mSettings.get(SettingsFile.SETTINGS_GFX).get(SettingsFile.SECTION_GFX_SETTINGS).getSetting(SettingsFile.KEY_ASPECT_RATIO);
@@ -297,6 +298,7 @@ public final class SettingsFragmentPresenter
sl.add(new HeaderSetting(null, null, R.string.texture_cache, 0));
sl.add(new SingleChoiceSetting(SettingsFile.KEY_TEXCACHE_ACCURACY, SettingsFile.SECTION_GFX_SETTINGS, SettingsFile.SETTINGS_GFX, R.string.texture_cache_accuracy, R.string.texture_cache_accuracy_descrip, R.array.textureCacheAccuracyEntries, R.array.textureCacheAccuracyValues, 128, texCacheAccuracy));
+ sl.add(new CheckBoxSetting(SettingsFile.KEY_GPU_TEXTURE_DECODING, SettingsFile.SECTION_GFX_SETTINGS, SettingsFile.SETTINGS_GFX, R.string.gpu_texture_decoding, R.string.gpu_texture_decoding_descrip, false, gpuTextureDecoding));
sl.add(new HeaderSetting(null, null, R.string.external_frame_buffer, 0));
sl.add(new SingleChoiceSetting(SettingsFile.KEY_XFB_METHOD, SettingsFile.SECTION_GFX_HACKS, SettingsFile.SETTINGS_GFX, R.string.external_frame_buffer, R.string.external_frame_buffer_descrip, R.array.externalFrameBufferEntries, R.array.externalFrameBufferValues, 0, xfb));
diff --git a/Source/Android/app/src/main/java/org/dolphinemu/dolphinemu/utils/SettingsFile.java b/Source/Android/app/src/main/java/org/dolphinemu/dolphinemu/utils/SettingsFile.java
index 707e345d26..76043fbb3f 100644
--- a/Source/Android/app/src/main/java/org/dolphinemu/dolphinemu/utils/SettingsFile.java
+++ b/Source/Android/app/src/main/java/org/dolphinemu/dolphinemu/utils/SettingsFile.java
@@ -73,6 +73,7 @@ public final class SettingsFile
public static final String KEY_IGNORE_FORMAT = "EFBEmulateFormatChanges";
public static final String KEY_EFB_TEXTURE = "EFBToTextureEnable";
public static final String KEY_TEXCACHE_ACCURACY = "SafeTextureCacheColorSamples";
+ public static final String KEY_GPU_TEXTURE_DECODING = "EnableGPUTextureDecoding";
public static final String KEY_XFB = "UseXFB";
public static final String KEY_XFB_REAL = "UseRealXFB";
public static final String KEY_FAST_DEPTH = "FastDepthCalc";
diff --git a/Source/Android/app/src/main/res/values/strings.xml b/Source/Android/app/src/main/res/values/strings.xml
index f41d8fe8a2..10be6058bb 100644
--- a/Source/Android/app/src/main/res/values/strings.xml
+++ b/Source/Android/app/src/main/res/values/strings.xml
@@ -168,6 +168,8 @@
Texture Cache
Texture Cache Accuracy
The safer the selection, the less likely the emulator will be missing any texture updates from RAM.
+ GPU Texture Decoding
+ Decodes textures on the GPU using compute shaders where supported. May improve performance in some scenarios.
External Frame Buffer
Determines how the XFB will be emulated.
Disable Destination Alpha
diff --git a/Source/Core/Common/Common.vcxproj b/Source/Core/Common/Common.vcxproj
index 13256bdce7..a00a8d488e 100644
--- a/Source/Core/Common/Common.vcxproj
+++ b/Source/Core/Common/Common.vcxproj
@@ -72,6 +72,7 @@
+
@@ -83,9 +84,11 @@
+
+
diff --git a/Source/Core/Common/Common.vcxproj.filters b/Source/Core/Common/Common.vcxproj.filters
index cb6fba5ad5..ff6b9ad7fc 100644
--- a/Source/Core/Common/Common.vcxproj.filters
+++ b/Source/Core/Common/Common.vcxproj.filters
@@ -238,6 +238,16 @@
+
+
+ GL\GLExtensions
+
+
+ GL\GLExtensions
+
+
+ GL\GLExtensions
+
@@ -303,6 +313,7 @@
+
diff --git a/Source/Core/Common/GL/GLExtensions/ARB_compute_shader.h b/Source/Core/Common/GL/GLExtensions/ARB_compute_shader.h
new file mode 100644
index 0000000000..b27b8091e4
--- /dev/null
+++ b/Source/Core/Common/GL/GLExtensions/ARB_compute_shader.h
@@ -0,0 +1,53 @@
+/*
+** Copyright (c) 2013-2015 The Khronos Group Inc.
+**
+** Permission is hereby granted, free of charge, to any person obtaining a
+** copy of this software and/or associated documentation files (the
+** "Materials"), to deal in the Materials without restriction, including
+** without limitation the rights to use, copy, modify, merge, publish,
+** distribute, sublicense, and/or sell copies of the Materials, and to
+** permit persons to whom the Materials are furnished to do so, subject to
+** the following conditions:
+**
+** The above copyright notice and this permission notice shall be included
+** in all copies or substantial portions of the Materials.
+**
+** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+*/
+
+#include "Common/GL/GLExtensions/gl_common.h"
+
+#define GL_COMPUTE_SHADER 0x91B9
+#define GL_MAX_COMPUTE_UNIFORM_BLOCKS 0x91BB
+#define GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS 0x91BC
+#define GL_MAX_COMPUTE_IMAGE_UNIFORMS 0x91BD
+#define GL_MAX_COMPUTE_SHARED_MEMORY_SIZE 0x8262
+#define GL_MAX_COMPUTE_UNIFORM_COMPONENTS 0x8263
+#define GL_MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS 0x8264
+#define GL_MAX_COMPUTE_ATOMIC_COUNTERS 0x8265
+#define GL_MAX_COMBINED_COMPUTE_UNIFORM_COMPONENTS 0x8266
+#define GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS 0x90EB
+#define GL_MAX_COMPUTE_WORK_GROUP_COUNT 0x91BE
+#define GL_MAX_COMPUTE_WORK_GROUP_SIZE 0x91BF
+#define GL_COMPUTE_WORK_GROUP_SIZE 0x8267
+#define GL_UNIFORM_BLOCK_REFERENCED_BY_COMPUTE_SHADER 0x90EC
+#define GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_COMPUTE_SHADER 0x90ED
+#define GL_DISPATCH_INDIRECT_BUFFER 0x90EE
+#define GL_DISPATCH_INDIRECT_BUFFER_BINDING 0x90EF
+#define GL_COMPUTE_SHADER_BIT 0x00000020
+
+typedef void(APIENTRYP PFNDOLDISPATCHCOMPUTEPROC)(GLuint num_groups_x, GLuint num_groups_y,
+ GLuint num_groups_z);
+typedef void(APIENTRYP PFNDOLDISPATCHCOMPUTEINDIRECTPROC)(GLintptr indirect);
+
+extern PFNDOLDISPATCHCOMPUTEPROC dolDispatchCompute;
+extern PFNDOLDISPATCHCOMPUTEINDIRECTPROC dolDispatchComputeIndirect;
+
+#define glDispatchCompute dolDispatchCompute
+#define glDispatchComputeIndirect dolDispatchComputeIndirect
diff --git a/Source/Core/Common/GL/GLExtensions/ARB_shader_image_load_store.h b/Source/Core/Common/GL/GLExtensions/ARB_shader_image_load_store.h
new file mode 100644
index 0000000000..ca366f3c5b
--- /dev/null
+++ b/Source/Core/Common/GL/GLExtensions/ARB_shader_image_load_store.h
@@ -0,0 +1,100 @@
+/*
+** Copyright (c) 2013-2015 The Khronos Group Inc.
+**
+** Permission is hereby granted, free of charge, to any person obtaining a
+** copy of this software and/or associated documentation files (the
+** "Materials"), to deal in the Materials without restriction, including
+** without limitation the rights to use, copy, modify, merge, publish,
+** distribute, sublicense, and/or sell copies of the Materials, and to
+** permit persons to whom the Materials are furnished to do so, subject to
+** the following conditions:
+**
+** The above copyright notice and this permission notice shall be included
+** in all copies or substantial portions of the Materials.
+**
+** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+*/
+
+#include "Common/GL/GLExtensions/gl_common.h"
+
+#define GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT 0x00000001
+#define GL_ELEMENT_ARRAY_BARRIER_BIT 0x00000002
+#define GL_UNIFORM_BARRIER_BIT 0x00000004
+#define GL_TEXTURE_FETCH_BARRIER_BIT 0x00000008
+#define GL_SHADER_IMAGE_ACCESS_BARRIER_BIT 0x00000020
+#define GL_COMMAND_BARRIER_BIT 0x00000040
+#define GL_PIXEL_BUFFER_BARRIER_BIT 0x00000080
+#define GL_TEXTURE_UPDATE_BARRIER_BIT 0x00000100
+#define GL_BUFFER_UPDATE_BARRIER_BIT 0x00000200
+#define GL_FRAMEBUFFER_BARRIER_BIT 0x00000400
+#define GL_TRANSFORM_FEEDBACK_BARRIER_BIT 0x00000800
+#define GL_ATOMIC_COUNTER_BARRIER_BIT 0x00001000
+#define GL_ALL_BARRIER_BITS 0xFFFFFFFF
+#define GL_MAX_IMAGE_UNITS 0x8F38
+#define GL_MAX_COMBINED_IMAGE_UNITS_AND_FRAGMENT_OUTPUTS 0x8F39
+#define GL_IMAGE_BINDING_NAME 0x8F3A
+#define GL_IMAGE_BINDING_LEVEL 0x8F3B
+#define GL_IMAGE_BINDING_LAYERED 0x8F3C
+#define GL_IMAGE_BINDING_LAYER 0x8F3D
+#define GL_IMAGE_BINDING_ACCESS 0x8F3E
+#define GL_IMAGE_1D 0x904C
+#define GL_IMAGE_2D 0x904D
+#define GL_IMAGE_3D 0x904E
+#define GL_IMAGE_2D_RECT 0x904F
+#define GL_IMAGE_CUBE 0x9050
+#define GL_IMAGE_BUFFER 0x9051
+#define GL_IMAGE_1D_ARRAY 0x9052
+#define GL_IMAGE_2D_ARRAY 0x9053
+#define GL_IMAGE_CUBE_MAP_ARRAY 0x9054
+#define GL_IMAGE_2D_MULTISAMPLE 0x9055
+#define GL_IMAGE_2D_MULTISAMPLE_ARRAY 0x9056
+#define GL_INT_IMAGE_1D 0x9057
+#define GL_INT_IMAGE_2D 0x9058
+#define GL_INT_IMAGE_3D 0x9059
+#define GL_INT_IMAGE_2D_RECT 0x905A
+#define GL_INT_IMAGE_CUBE 0x905B
+#define GL_INT_IMAGE_BUFFER 0x905C
+#define GL_INT_IMAGE_1D_ARRAY 0x905D
+#define GL_INT_IMAGE_2D_ARRAY 0x905E
+#define GL_INT_IMAGE_CUBE_MAP_ARRAY 0x905F
+#define GL_INT_IMAGE_2D_MULTISAMPLE 0x9060
+#define GL_INT_IMAGE_2D_MULTISAMPLE_ARRAY 0x9061
+#define GL_UNSIGNED_INT_IMAGE_1D 0x9062
+#define GL_UNSIGNED_INT_IMAGE_2D 0x9063
+#define GL_UNSIGNED_INT_IMAGE_3D 0x9064
+#define GL_UNSIGNED_INT_IMAGE_2D_RECT 0x9065
+#define GL_UNSIGNED_INT_IMAGE_CUBE 0x9066
+#define GL_UNSIGNED_INT_IMAGE_BUFFER 0x9067
+#define GL_UNSIGNED_INT_IMAGE_1D_ARRAY 0x9068
+#define GL_UNSIGNED_INT_IMAGE_2D_ARRAY 0x9069
+#define GL_UNSIGNED_INT_IMAGE_CUBE_MAP_ARRAY 0x906A
+#define GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE 0x906B
+#define GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE_ARRAY 0x906C
+#define GL_MAX_IMAGE_SAMPLES 0x906D
+#define GL_IMAGE_BINDING_FORMAT 0x906E
+#define GL_IMAGE_FORMAT_COMPATIBILITY_TYPE 0x90C7
+#define GL_IMAGE_FORMAT_COMPATIBILITY_BY_SIZE 0x90C8
+#define GL_IMAGE_FORMAT_COMPATIBILITY_BY_CLASS 0x90C9
+#define GL_MAX_VERTEX_IMAGE_UNIFORMS 0x90CA
+#define GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS 0x90CB
+#define GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS 0x90CC
+#define GL_MAX_GEOMETRY_IMAGE_UNIFORMS 0x90CD
+#define GL_MAX_FRAGMENT_IMAGE_UNIFORMS 0x90CE
+#define GL_MAX_COMBINED_IMAGE_UNIFORMS 0x90CF
+
+typedef void(APIENTRYP PFNDOLBINDIMAGETEXTUREPROC)(GLuint unit, GLuint texture, GLint level,
+ GLboolean layered, GLint layer, GLenum access,
+ GLenum format);
+typedef void(APIENTRYP PFNDOLMEMORYBARRIERPROC)(GLbitfield barriers);
+
+extern PFNDOLBINDIMAGETEXTUREPROC dolBindImageTexture;
+extern PFNDOLMEMORYBARRIERPROC dolMemoryBarrier;
+
+#define glBindImageTexture dolBindImageTexture
+#define glMemoryBarrier dolMemoryBarrier
diff --git a/Source/Core/Common/GL/GLExtensions/ARB_texture_storage.h b/Source/Core/Common/GL/GLExtensions/ARB_texture_storage.h
new file mode 100644
index 0000000000..1686a11248
--- /dev/null
+++ b/Source/Core/Common/GL/GLExtensions/ARB_texture_storage.h
@@ -0,0 +1,41 @@
+/*
+** Copyright (c) 2013-2015 The Khronos Group Inc.
+**
+** Permission is hereby granted, free of charge, to any person obtaining a
+** copy of this software and/or associated documentation files (the
+** "Materials"), to deal in the Materials without restriction, including
+** without limitation the rights to use, copy, modify, merge, publish,
+** distribute, sublicense, and/or sell copies of the Materials, and to
+** permit persons to whom the Materials are furnished to do so, subject to
+** the following conditions:
+**
+** The above copyright notice and this permission notice shall be included
+** in all copies or substantial portions of the Materials.
+**
+** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+*/
+
+#include "Common/GL/GLExtensions/gl_common.h"
+
+#define GL_TEXTURE_IMMUTABLE_FORMAT 0x912F
+
+typedef void(APIENTRYP PFNDOLTEXSTORAGE1DPROC)(GLenum target, GLsizei levels, GLenum internalformat,
+ GLsizei width);
+typedef void(APIENTRYP PFNDOLTEXSTORAGE2DPROC)(GLenum target, GLsizei levels, GLenum internalformat,
+ GLsizei width, GLsizei height);
+typedef void(APIENTRYP PFNDOLTEXSTORAGE3DPROC)(GLenum target, GLsizei levels, GLenum internalformat,
+ GLsizei width, GLsizei height, GLsizei depth);
+
+extern PFNDOLTEXSTORAGE1DPROC dolTexStorage1D;
+extern PFNDOLTEXSTORAGE2DPROC dolTexStorage2D;
+extern PFNDOLTEXSTORAGE3DPROC dolTexStorage3D;
+
+#define glTexStorage1D dolTexStorage1D
+#define glTexStorage2D dolTexStorage2D
+#define glTexStorage3D dolTexStorage3D
diff --git a/Source/Core/Common/GL/GLExtensions/GLExtensions.cpp b/Source/Core/Common/GL/GLExtensions/GLExtensions.cpp
index 546bfa1b69..cba40b35dc 100644
--- a/Source/Core/Common/GL/GLExtensions/GLExtensions.cpp
+++ b/Source/Core/Common/GL/GLExtensions/GLExtensions.cpp
@@ -653,19 +653,12 @@ PFNDOLDRAWELEMENTSINSTANCEDBASEVERTEXBASEINSTANCEPROC
dolDrawElementsInstancedBaseVertexBaseInstance;
PFNDOLGETINTERNALFORMATIVPROC dolGetInternalformativ;
PFNDOLGETACTIVEATOMICCOUNTERBUFFERIVPROC dolGetActiveAtomicCounterBufferiv;
-PFNDOLBINDIMAGETEXTUREPROC dolBindImageTexture;
-PFNDOLMEMORYBARRIERPROC dolMemoryBarrier;
-PFNDOLTEXSTORAGE1DPROC dolTexStorage1D;
-PFNDOLTEXSTORAGE2DPROC dolTexStorage2D;
-PFNDOLTEXSTORAGE3DPROC dolTexStorage3D;
PFNDOLDRAWTRANSFORMFEEDBACKINSTANCEDPROC dolDrawTransformFeedbackInstanced;
PFNDOLDRAWTRANSFORMFEEDBACKSTREAMINSTANCEDPROC dolDrawTransformFeedbackStreamInstanced;
// gl_4_3
PFNDOLCLEARBUFFERDATAPROC dolClearBufferData;
PFNDOLCLEARBUFFERSUBDATAPROC dolClearBufferSubData;
-PFNDOLDISPATCHCOMPUTEPROC dolDispatchCompute;
-PFNDOLDISPATCHCOMPUTEINDIRECTPROC dolDispatchComputeIndirect;
PFNDOLFRAMEBUFFERPARAMETERIPROC dolFramebufferParameteri;
PFNDOLGETFRAMEBUFFERPARAMETERIVPROC dolGetFramebufferParameteriv;
PFNDOLGETINTERNALFORMATI64VPROC dolGetInternalformati64v;
@@ -905,6 +898,11 @@ PFNDOLTEXIMAGE3DMULTISAMPLEPROC dolTexImage3DMultisample;
PFNDOLGETMULTISAMPLEFVPROC dolGetMultisamplefv;
PFNDOLSAMPLEMASKIPROC dolSampleMaski;
+// ARB_texture_storage
+PFNDOLTEXSTORAGE1DPROC dolTexStorage1D;
+PFNDOLTEXSTORAGE2DPROC dolTexStorage2D;
+PFNDOLTEXSTORAGE3DPROC dolTexStorage3D;
+
// ARB_texture_storage_multisample
PFNDOLTEXSTORAGE2DMULTISAMPLEPROC dolTexStorage2DMultisample;
PFNDOLTEXSTORAGE3DMULTISAMPLEPROC dolTexStorage3DMultisample;
@@ -989,6 +987,14 @@ PFNDOLDEPTHRANGEDNVPROC dolDepthRangedNV;
PFNDOLCLEARDEPTHDNVPROC dolClearDepthdNV;
PFNDOLDEPTHBOUNDSDNVPROC dolDepthBoundsdNV;
+// ARB_shader_image_load_store
+PFNDOLBINDIMAGETEXTUREPROC dolBindImageTexture;
+PFNDOLMEMORYBARRIERPROC dolMemoryBarrier;
+
+// ARB_compute_shader
+PFNDOLDISPATCHCOMPUTEPROC dolDispatchCompute;
+PFNDOLDISPATCHCOMPUTEINDIRECTPROC dolDispatchComputeIndirect;
+
// Creates a GLFunc object that requires a feature
#define GLFUNC_REQUIRES(x, y) \
{ \
@@ -1681,6 +1687,11 @@ const GLFunc gl_function_array[] = {
GLFUNC_REQUIRES(glGetMultisamplefv, "GL_ARB_texture_multisample"),
GLFUNC_REQUIRES(glSampleMaski, "GL_ARB_texture_multisample"),
+ // ARB_texture_storage
+ GLFUNC_REQUIRES(glTexStorage1D, "GL_ARB_texture_storage !VERSION_4_2"),
+ GLFUNC_REQUIRES(glTexStorage2D, "GL_ARB_texture_storage !VERSION_4_2 |VERSION_GLES_3"),
+ GLFUNC_REQUIRES(glTexStorage3D, "GL_ARB_texture_storage !VERSION_4_2 |VERSION_GLES_3"),
+
// ARB_texture_storage_multisample
GLFUNC_REQUIRES(glTexStorage2DMultisample,
"GL_ARB_texture_storage_multisample !VERSION_4_3 |VERSION_GLES_3_1"),
@@ -1848,6 +1859,17 @@ const GLFunc gl_function_array[] = {
GLFUNC_REQUIRES(glDepthRangedNV, "GL_NV_depth_buffer_float"),
GLFUNC_REQUIRES(glClearDepthdNV, "GL_NV_depth_buffer_float"),
GLFUNC_REQUIRES(glDepthBoundsdNV, "GL_NV_depth_buffer_float"),
+
+ // ARB_shader_image_load_store
+ GLFUNC_REQUIRES(glBindImageTexture,
+ "GL_ARB_shader_image_load_store !VERSION_4_2 |VERSION_GLES_3_1"),
+ GLFUNC_REQUIRES(glMemoryBarrier,
+ "GL_ARB_shader_image_load_store !VERSION_4_2 |VERSION_GLES_3_1"),
+
+ // ARB_compute_shader
+ GLFUNC_REQUIRES(glDispatchCompute, "GL_ARB_compute_shader !VERSION_4_3 |VERSION_GLES_3_1"),
+ GLFUNC_REQUIRES(glDispatchComputeIndirect,
+ "GL_ARB_compute_shader !VERSION_4_3 |VERSION_GLES_3_1"),
};
namespace GLExtensions
diff --git a/Source/Core/Common/GL/GLExtensions/GLExtensions.h b/Source/Core/Common/GL/GLExtensions/GLExtensions.h
index 4c58167700..49e9fc282f 100644
--- a/Source/Core/Common/GL/GLExtensions/GLExtensions.h
+++ b/Source/Core/Common/GL/GLExtensions/GLExtensions.h
@@ -12,6 +12,7 @@
#include "Common/GL/GLExtensions/ARB_blend_func_extended.h"
#include "Common/GL/GLExtensions/ARB_buffer_storage.h"
#include "Common/GL/GLExtensions/ARB_clip_control.h"
+#include "Common/GL/GLExtensions/ARB_compute_shader.h"
#include "Common/GL/GLExtensions/ARB_copy_image.h"
#include "Common/GL/GLExtensions/ARB_debug_output.h"
#include "Common/GL/GLExtensions/ARB_draw_elements_base_vertex.h"
@@ -21,9 +22,11 @@
#include "Common/GL/GLExtensions/ARB_occlusion_query2.h"
#include "Common/GL/GLExtensions/ARB_sample_shading.h"
#include "Common/GL/GLExtensions/ARB_sampler_objects.h"
+#include "Common/GL/GLExtensions/ARB_shader_image_load_store.h"
#include "Common/GL/GLExtensions/ARB_shader_storage_buffer_object.h"
#include "Common/GL/GLExtensions/ARB_sync.h"
#include "Common/GL/GLExtensions/ARB_texture_multisample.h"
+#include "Common/GL/GLExtensions/ARB_texture_storage.h"
#include "Common/GL/GLExtensions/ARB_texture_storage_multisample.h"
#include "Common/GL/GLExtensions/ARB_uniform_buffer_object.h"
#include "Common/GL/GLExtensions/ARB_vertex_array_object.h"
diff --git a/Source/Core/Common/GL/GLExtensions/gl_4_2.h b/Source/Core/Common/GL/GLExtensions/gl_4_2.h
index bb9286bf32..e4eab3f475 100644
--- a/Source/Core/Common/GL/GLExtensions/gl_4_2.h
+++ b/Source/Core/Common/GL/GLExtensions/gl_4_2.h
@@ -66,75 +66,10 @@
#define GL_ACTIVE_ATOMIC_COUNTER_BUFFERS 0x92D9
#define GL_UNIFORM_ATOMIC_COUNTER_BUFFER_INDEX 0x92DA
#define GL_UNSIGNED_INT_ATOMIC_COUNTER 0x92DB
-#define GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT 0x00000001
-#define GL_ELEMENT_ARRAY_BARRIER_BIT 0x00000002
-#define GL_UNIFORM_BARRIER_BIT 0x00000004
-#define GL_TEXTURE_FETCH_BARRIER_BIT 0x00000008
-#define GL_SHADER_IMAGE_ACCESS_BARRIER_BIT 0x00000020
-#define GL_COMMAND_BARRIER_BIT 0x00000040
-#define GL_PIXEL_BUFFER_BARRIER_BIT 0x00000080
-#define GL_TEXTURE_UPDATE_BARRIER_BIT 0x00000100
-#define GL_BUFFER_UPDATE_BARRIER_BIT 0x00000200
-#define GL_FRAMEBUFFER_BARRIER_BIT 0x00000400
-#define GL_TRANSFORM_FEEDBACK_BARRIER_BIT 0x00000800
-#define GL_ATOMIC_COUNTER_BARRIER_BIT 0x00001000
-#define GL_ALL_BARRIER_BITS 0xFFFFFFFF
-#define GL_MAX_IMAGE_UNITS 0x8F38
-#define GL_MAX_COMBINED_IMAGE_UNITS_AND_FRAGMENT_OUTPUTS 0x8F39
-#define GL_IMAGE_BINDING_NAME 0x8F3A
-#define GL_IMAGE_BINDING_LEVEL 0x8F3B
-#define GL_IMAGE_BINDING_LAYERED 0x8F3C
-#define GL_IMAGE_BINDING_LAYER 0x8F3D
-#define GL_IMAGE_BINDING_ACCESS 0x8F3E
-#define GL_IMAGE_1D 0x904C
-#define GL_IMAGE_2D 0x904D
-#define GL_IMAGE_3D 0x904E
-#define GL_IMAGE_2D_RECT 0x904F
-#define GL_IMAGE_CUBE 0x9050
-#define GL_IMAGE_BUFFER 0x9051
-#define GL_IMAGE_1D_ARRAY 0x9052
-#define GL_IMAGE_2D_ARRAY 0x9053
-#define GL_IMAGE_CUBE_MAP_ARRAY 0x9054
-#define GL_IMAGE_2D_MULTISAMPLE 0x9055
-#define GL_IMAGE_2D_MULTISAMPLE_ARRAY 0x9056
-#define GL_INT_IMAGE_1D 0x9057
-#define GL_INT_IMAGE_2D 0x9058
-#define GL_INT_IMAGE_3D 0x9059
-#define GL_INT_IMAGE_2D_RECT 0x905A
-#define GL_INT_IMAGE_CUBE 0x905B
-#define GL_INT_IMAGE_BUFFER 0x905C
-#define GL_INT_IMAGE_1D_ARRAY 0x905D
-#define GL_INT_IMAGE_2D_ARRAY 0x905E
-#define GL_INT_IMAGE_CUBE_MAP_ARRAY 0x905F
-#define GL_INT_IMAGE_2D_MULTISAMPLE 0x9060
-#define GL_INT_IMAGE_2D_MULTISAMPLE_ARRAY 0x9061
-#define GL_UNSIGNED_INT_IMAGE_1D 0x9062
-#define GL_UNSIGNED_INT_IMAGE_2D 0x9063
-#define GL_UNSIGNED_INT_IMAGE_3D 0x9064
-#define GL_UNSIGNED_INT_IMAGE_2D_RECT 0x9065
-#define GL_UNSIGNED_INT_IMAGE_CUBE 0x9066
-#define GL_UNSIGNED_INT_IMAGE_BUFFER 0x9067
-#define GL_UNSIGNED_INT_IMAGE_1D_ARRAY 0x9068
-#define GL_UNSIGNED_INT_IMAGE_2D_ARRAY 0x9069
-#define GL_UNSIGNED_INT_IMAGE_CUBE_MAP_ARRAY 0x906A
-#define GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE 0x906B
-#define GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE_ARRAY 0x906C
-#define GL_MAX_IMAGE_SAMPLES 0x906D
-#define GL_IMAGE_BINDING_FORMAT 0x906E
-#define GL_IMAGE_FORMAT_COMPATIBILITY_TYPE 0x90C7
-#define GL_IMAGE_FORMAT_COMPATIBILITY_BY_SIZE 0x90C8
-#define GL_IMAGE_FORMAT_COMPATIBILITY_BY_CLASS 0x90C9
-#define GL_MAX_VERTEX_IMAGE_UNIFORMS 0x90CA
-#define GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS 0x90CB
-#define GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS 0x90CC
-#define GL_MAX_GEOMETRY_IMAGE_UNIFORMS 0x90CD
-#define GL_MAX_FRAGMENT_IMAGE_UNIFORMS 0x90CE
-#define GL_MAX_COMBINED_IMAGE_UNIFORMS 0x90CF
#define GL_COMPRESSED_RGBA_BPTC_UNORM 0x8E8C
#define GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM 0x8E8D
#define GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT 0x8E8E
#define GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT 0x8E8F
-#define GL_TEXTURE_IMMUTABLE_FORMAT 0x912F
typedef void(APIENTRYP PFNDOLDRAWARRAYSINSTANCEDBASEINSTANCEPROC)(GLenum mode, GLint first,
GLsizei count,
@@ -152,16 +87,6 @@ typedef void(APIENTRYP PFNDOLGETINTERNALFORMATIVPROC)(GLenum target, GLenum inte
GLenum pname, GLsizei bufSize, GLint* params);
typedef void(APIENTRYP PFNDOLGETACTIVEATOMICCOUNTERBUFFERIVPROC)(GLuint program, GLuint bufferIndex,
GLenum pname, GLint* params);
-typedef void(APIENTRYP PFNDOLBINDIMAGETEXTUREPROC)(GLuint unit, GLuint texture, GLint level,
- GLboolean layered, GLint layer, GLenum access,
- GLenum format);
-typedef void(APIENTRYP PFNDOLMEMORYBARRIERPROC)(GLbitfield barriers);
-typedef void(APIENTRYP PFNDOLTEXSTORAGE1DPROC)(GLenum target, GLsizei levels, GLenum internalformat,
- GLsizei width);
-typedef void(APIENTRYP PFNDOLTEXSTORAGE2DPROC)(GLenum target, GLsizei levels, GLenum internalformat,
- GLsizei width, GLsizei height);
-typedef void(APIENTRYP PFNDOLTEXSTORAGE3DPROC)(GLenum target, GLsizei levels, GLenum internalformat,
- GLsizei width, GLsizei height, GLsizei depth);
typedef void(APIENTRYP PFNDOLDRAWTRANSFORMFEEDBACKINSTANCEDPROC)(GLenum mode, GLuint id,
GLsizei instancecount);
typedef void(APIENTRYP PFNDOLDRAWTRANSFORMFEEDBACKSTREAMINSTANCEDPROC)(GLenum mode, GLuint id,
@@ -174,11 +99,6 @@ extern PFNDOLDRAWELEMENTSINSTANCEDBASEVERTEXBASEINSTANCEPROC
dolDrawElementsInstancedBaseVertexBaseInstance;
extern PFNDOLGETINTERNALFORMATIVPROC dolGetInternalformativ;
extern PFNDOLGETACTIVEATOMICCOUNTERBUFFERIVPROC dolGetActiveAtomicCounterBufferiv;
-extern PFNDOLBINDIMAGETEXTUREPROC dolBindImageTexture;
-extern PFNDOLMEMORYBARRIERPROC dolMemoryBarrier;
-extern PFNDOLTEXSTORAGE1DPROC dolTexStorage1D;
-extern PFNDOLTEXSTORAGE2DPROC dolTexStorage2D;
-extern PFNDOLTEXSTORAGE3DPROC dolTexStorage3D;
extern PFNDOLDRAWTRANSFORMFEEDBACKINSTANCEDPROC dolDrawTransformFeedbackInstanced;
extern PFNDOLDRAWTRANSFORMFEEDBACKSTREAMINSTANCEDPROC dolDrawTransformFeedbackStreamInstanced;
@@ -187,10 +107,5 @@ extern PFNDOLDRAWTRANSFORMFEEDBACKSTREAMINSTANCEDPROC dolDrawTransformFeedbackSt
#define glDrawElementsInstancedBaseVertexBaseInstance dolDrawElementsInstancedBaseVertexBaseInstance
#define glGetInternalformativ dolGetInternalformativ
#define glGetActiveAtomicCounterBufferiv dolGetActiveAtomicCounterBufferiv
-#define glBindImageTexture dolBindImageTexture
-#define glMemoryBarrier dolMemoryBarrier
-#define glTexStorage1D dolTexStorage1D
-#define glTexStorage2D dolTexStorage2D
-#define glTexStorage3D dolTexStorage3D
#define glDrawTransformFeedbackInstanced dolDrawTransformFeedbackInstanced
#define glDrawTransformFeedbackStreamInstanced dolDrawTransformFeedbackStreamInstanced
diff --git a/Source/Core/Common/GL/GLExtensions/gl_4_3.h b/Source/Core/Common/GL/GLExtensions/gl_4_3.h
index e81b24e370..98b18ad246 100644
--- a/Source/Core/Common/GL/GLExtensions/gl_4_3.h
+++ b/Source/Core/Common/GL/GLExtensions/gl_4_3.h
@@ -38,24 +38,6 @@
#define GL_PRIMITIVE_RESTART_FIXED_INDEX 0x8D69
#define GL_ANY_SAMPLES_PASSED_CONSERVATIVE 0x8D6A
#define GL_MAX_ELEMENT_INDEX 0x8D6B
-#define GL_COMPUTE_SHADER 0x91B9
-#define GL_MAX_COMPUTE_UNIFORM_BLOCKS 0x91BB
-#define GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS 0x91BC
-#define GL_MAX_COMPUTE_IMAGE_UNIFORMS 0x91BD
-#define GL_MAX_COMPUTE_SHARED_MEMORY_SIZE 0x8262
-#define GL_MAX_COMPUTE_UNIFORM_COMPONENTS 0x8263
-#define GL_MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS 0x8264
-#define GL_MAX_COMPUTE_ATOMIC_COUNTERS 0x8265
-#define GL_MAX_COMBINED_COMPUTE_UNIFORM_COMPONENTS 0x8266
-#define GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS 0x90EB
-#define GL_MAX_COMPUTE_WORK_GROUP_COUNT 0x91BE
-#define GL_MAX_COMPUTE_WORK_GROUP_SIZE 0x91BF
-#define GL_COMPUTE_WORK_GROUP_SIZE 0x8267
-#define GL_UNIFORM_BLOCK_REFERENCED_BY_COMPUTE_SHADER 0x90EC
-#define GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_COMPUTE_SHADER 0x90ED
-#define GL_DISPATCH_INDIRECT_BUFFER 0x90EE
-#define GL_DISPATCH_INDIRECT_BUFFER_BINDING 0x90EF
-#define GL_COMPUTE_SHADER_BIT 0x00000020
#define GL_DEBUG_OUTPUT_SYNCHRONOUS 0x8242
#define GL_DEBUG_NEXT_LOGGED_MESSAGE_LENGTH 0x8243
#define GL_DEBUG_CALLBACK_FUNCTION 0x8244
@@ -287,9 +269,6 @@ typedef void(APIENTRYP PFNDOLCLEARBUFFERDATAPROC)(GLenum target, GLenum internal
typedef void(APIENTRYP PFNDOLCLEARBUFFERSUBDATAPROC)(GLenum target, GLenum internalformat,
GLintptr offset, GLsizeiptr size,
GLenum format, GLenum type, const void* data);
-typedef void(APIENTRYP PFNDOLDISPATCHCOMPUTEPROC)(GLuint num_groups_x, GLuint num_groups_y,
- GLuint num_groups_z);
-typedef void(APIENTRYP PFNDOLDISPATCHCOMPUTEINDIRECTPROC)(GLintptr indirect);
typedef void(APIENTRYP PFNDOLFRAMEBUFFERPARAMETERIPROC)(GLenum target, GLenum pname, GLint param);
typedef void(APIENTRYP PFNDOLGETFRAMEBUFFERPARAMETERIVPROC)(GLenum target, GLenum pname,
GLint* params);
@@ -348,8 +327,6 @@ typedef void(APIENTRYP PFNDOLVERTEXBINDINGDIVISORPROC)(GLuint bindingindex, GLui
extern PFNDOLCLEARBUFFERDATAPROC dolClearBufferData;
extern PFNDOLCLEARBUFFERSUBDATAPROC dolClearBufferSubData;
-extern PFNDOLDISPATCHCOMPUTEPROC dolDispatchCompute;
-extern PFNDOLDISPATCHCOMPUTEINDIRECTPROC dolDispatchComputeIndirect;
extern PFNDOLFRAMEBUFFERPARAMETERIPROC dolFramebufferParameteri;
extern PFNDOLGETFRAMEBUFFERPARAMETERIVPROC dolGetFramebufferParameteriv;
extern PFNDOLGETINTERNALFORMATI64VPROC dolGetInternalformati64v;
@@ -378,8 +355,6 @@ extern PFNDOLVERTEXBINDINGDIVISORPROC dolVertexBindingDivisor;
#define glClearBufferData dolClearBufferData
#define glClearBufferSubData dolClearBufferSubData
-#define glDispatchCompute dolDispatchCompute
-#define glDispatchComputeIndirect dolDispatchComputeIndirect
#define glFramebufferParameteri dolFramebufferParameteri
#define glGetFramebufferParameteriv dolGetFramebufferParameteriv
#define glGetInternalformati64v dolGetInternalformati64v
diff --git a/Source/Core/DolphinWX/VideoConfigDiag.cpp b/Source/Core/DolphinWX/VideoConfigDiag.cpp
index fd6db1942c..49a004b73c 100644
--- a/Source/Core/DolphinWX/VideoConfigDiag.cpp
+++ b/Source/Core/DolphinWX/VideoConfigDiag.cpp
@@ -284,6 +284,10 @@ static wxString true_color_desc =
wxTRANSLATE("Forces the game to render the RGB color channels in 24-bit, thereby increasing "
"quality by reducing color banding.\nIt has no impact on performance and causes "
"few graphical issues.\n\n\nIf unsure, leave this checked.");
+static wxString gpu_texture_decoding_desc =
+ wxTRANSLATE("Enables texture decoding using the GPU instead of the CPU. This may result in "
+ "performance gains in some scenarios, or systems where the CPU is the bottleneck."
+ "\n\nIf unsure, leave this unchecked.");
#if !defined(__APPLE__)
// Search for available resolutions - TODO: Move to Common?
@@ -755,6 +759,15 @@ VideoConfigDiag::VideoConfigDiag(wxWindow* parent, const std::string& title)
slide_szr->Add(new wxStaticText(page_hacks, wxID_ANY, _("Fast")), 0, wxALIGN_CENTER_VERTICAL);
szr_safetex->Add(slide_szr, 1, wxEXPAND | wxLEFT | wxRIGHT, space5);
+
+ if (vconfig.backend_info.bSupportsGPUTextureDecoding)
+ {
+ szr_safetex->Add(CreateCheckBox(page_hacks, _("GPU Texture Decoding"),
+ wxGetTranslation(gpu_texture_decoding_desc),
+ vconfig.bEnableGPUTextureDecoding),
+ 1, wxEXPAND | wxLEFT | wxRIGHT, space5);
+ }
+
if (slider_pos == -1)
{
stc_slider->Disable();
diff --git a/Source/Core/VideoBackends/D3D/main.cpp b/Source/Core/VideoBackends/D3D/main.cpp
index e8a51d3441..bc0af68180 100644
--- a/Source/Core/VideoBackends/D3D/main.cpp
+++ b/Source/Core/VideoBackends/D3D/main.cpp
@@ -67,6 +67,7 @@ void VideoBackend::InitBackendInfo()
g_Config.backend_info.bSupportsPrimitiveRestart = true;
g_Config.backend_info.bSupportsOversizedViewports = false;
g_Config.backend_info.bSupportsGeometryShaders = true;
+ g_Config.backend_info.bSupportsComputeShaders = false;
g_Config.backend_info.bSupports3DVision = true;
g_Config.backend_info.bSupportsPostProcessing = false;
g_Config.backend_info.bSupportsPaletteConversion = true;
@@ -75,6 +76,7 @@ void VideoBackend::InitBackendInfo()
g_Config.backend_info.bSupportsReversedDepthRange = false;
g_Config.backend_info.bSupportsMultithreading = false;
g_Config.backend_info.bSupportsInternalResolutionFrameDumps = false;
+ g_Config.backend_info.bSupportsGPUTextureDecoding = false;
IDXGIFactory* factory;
IDXGIAdapter* ad;
diff --git a/Source/Core/VideoBackends/D3D12/main.cpp b/Source/Core/VideoBackends/D3D12/main.cpp
index 7a48647c0e..2c95bff055 100644
--- a/Source/Core/VideoBackends/D3D12/main.cpp
+++ b/Source/Core/VideoBackends/D3D12/main.cpp
@@ -70,6 +70,7 @@ void VideoBackend::InitBackendInfo()
g_Config.backend_info.bSupportsPrimitiveRestart = true;
g_Config.backend_info.bSupportsOversizedViewports = false;
g_Config.backend_info.bSupportsGeometryShaders = true;
+ g_Config.backend_info.bSupportsComputeShaders = false;
g_Config.backend_info.bSupports3DVision = true;
g_Config.backend_info.bSupportsPostProcessing = false;
g_Config.backend_info.bSupportsPaletteConversion = true;
@@ -78,6 +79,7 @@ void VideoBackend::InitBackendInfo()
g_Config.backend_info.bSupportsReversedDepthRange = false;
g_Config.backend_info.bSupportsMultithreading = false;
g_Config.backend_info.bSupportsInternalResolutionFrameDumps = false;
+ g_Config.backend_info.bSupportsGPUTextureDecoding = false;
IDXGIFactory* factory;
IDXGIAdapter* ad;
diff --git a/Source/Core/VideoBackends/Null/NullBackend.cpp b/Source/Core/VideoBackends/Null/NullBackend.cpp
index 1fe0914a91..60a6f637ac 100644
--- a/Source/Core/VideoBackends/Null/NullBackend.cpp
+++ b/Source/Core/VideoBackends/Null/NullBackend.cpp
@@ -30,6 +30,7 @@ void VideoBackend::InitBackendInfo()
g_Config.backend_info.bSupportsPrimitiveRestart = true;
g_Config.backend_info.bSupportsOversizedViewports = true;
g_Config.backend_info.bSupportsGeometryShaders = true;
+ g_Config.backend_info.bSupportsComputeShaders = false;
g_Config.backend_info.bSupports3DVision = false;
g_Config.backend_info.bSupportsEarlyZ = true;
g_Config.backend_info.bSupportsBindingLayout = true;
@@ -43,6 +44,7 @@ void VideoBackend::InitBackendInfo()
g_Config.backend_info.bSupportsReversedDepthRange = true;
g_Config.backend_info.bSupportsMultithreading = false;
g_Config.backend_info.bSupportsInternalResolutionFrameDumps = false;
+ g_Config.backend_info.bSupportsGPUTextureDecoding = false;
// aamodes: We only support 1 sample, so no MSAA
g_Config.backend_info.Adapters.clear();
diff --git a/Source/Core/VideoBackends/OGL/FramebufferManager.cpp b/Source/Core/VideoBackends/OGL/FramebufferManager.cpp
index 5fc1651285..59a8679fd0 100644
--- a/Source/Core/VideoBackends/OGL/FramebufferManager.cpp
+++ b/Source/Core/VideoBackends/OGL/FramebufferManager.cpp
@@ -65,7 +65,7 @@ GLuint FramebufferManager::CreateTexture(GLenum texture_type, GLenum internal_fo
}
else if (texture_type == GL_TEXTURE_2D_MULTISAMPLE_ARRAY)
{
- if (g_ogl_config.bSupports3DTextureStorage)
+ if (g_ogl_config.bSupports3DTextureStorageMultisample)
glTexStorage3DMultisample(texture_type, m_msaaSamples, internal_format, m_targetWidth,
m_targetHeight, m_EFBLayers, false);
else
@@ -74,7 +74,7 @@ GLuint FramebufferManager::CreateTexture(GLenum texture_type, GLenum internal_fo
}
else if (texture_type == GL_TEXTURE_2D_MULTISAMPLE)
{
- if (g_ogl_config.bSupports2DTextureStorage)
+ if (g_ogl_config.bSupports2DTextureStorageMultisample)
glTexStorage2DMultisample(texture_type, m_msaaSamples, internal_format, m_targetWidth,
m_targetHeight, false);
else
diff --git a/Source/Core/VideoBackends/OGL/GPUTimer.h b/Source/Core/VideoBackends/OGL/GPUTimer.h
new file mode 100644
index 0000000000..50724ab06f
--- /dev/null
+++ b/Source/Core/VideoBackends/OGL/GPUTimer.h
@@ -0,0 +1,105 @@
+// Copyright 2016 Dolphin Emulator Project
+// Licensed under GPLv2+
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "Common/GL/GLExtensions/GLExtensions.h"
+
+#ifndef GL_TIME_ELAPSED
+#define GL_TIME_ELAPSED 0x88BF
+#endif
+
+namespace OGL
+{
+/*
+ * This class can be used to measure the time it takes for the GPU to perform a draw call
+ * or compute dispatch. To use:
+ *
+ * - Create an instance of GPUTimer before issuing the draw call.
+ * (this can be before or after any binding that needs to be done)
+ *
+ * - (optionally) call Begin(). This is not needed for a single draw call.
+ *
+ * - Issue the draw call or compute dispatch as normal.
+ *
+ * - (optionally) call End(). This is not necessary for a single draw call.
+ *
+ * - Call GetTime{Seconds,Milliseconds,Nanoseconds} to determine how long the operation
+ * took to execute on the GPU.
+ *
+ * NOTE: When the timer is read back, this will force a GL flush, so the more often a timer is used,
+ * the larger of a performance impact it will have. Only one timer can be active at any time, due to
+ * using GL_TIME_ELAPSED. This is not enforced by the class, however.
+ *
+ */
+class GPUTimer final
+{
+public:
+ GPUTimer()
+ {
+ glGenQueries(1, &m_query_id);
+ Begin();
+ }
+
+ ~GPUTimer()
+ {
+ End();
+ glDeleteQueries(1, &m_query_id);
+ }
+
+ void Begin()
+ {
+ if (m_started)
+ glEndQuery(GL_TIME_ELAPSED);
+
+ glBeginQuery(GL_TIME_ELAPSED, m_query_id);
+ m_started = true;
+ }
+
+ void End()
+ {
+ if (!m_started)
+ return;
+
+ glEndQuery(GL_TIME_ELAPSED);
+ m_started = false;
+ }
+
+ double GetTimeSeconds()
+ {
+ GetResult();
+ return static_cast(m_result) / 1000000000.0;
+ }
+
+ double GetTimeMilliseconds()
+ {
+ GetResult();
+ return static_cast(m_result) / 1000000.0;
+ }
+
+ u32 GetTimeNanoseconds()
+ {
+ GetResult();
+ return m_result;
+ }
+
+private:
+ void GetResult()
+ {
+ if (m_has_result)
+ return;
+
+ if (m_started)
+ End();
+
+ glGetQueryObjectuiv(m_query_id, GL_QUERY_RESULT, &m_result);
+ m_has_result = true;
+ }
+
+ GLuint m_query_id;
+ GLuint m_result = 0;
+ bool m_started = false;
+ bool m_has_result = false;
+};
+} // namespace OGL
diff --git a/Source/Core/VideoBackends/OGL/OGL.vcxproj b/Source/Core/VideoBackends/OGL/OGL.vcxproj
index 0c234a8e71..3b945793a4 100644
--- a/Source/Core/VideoBackends/OGL/OGL.vcxproj
+++ b/Source/Core/VideoBackends/OGL/OGL.vcxproj
@@ -53,6 +53,7 @@
+
@@ -79,4 +80,4 @@
-
+
\ No newline at end of file
diff --git a/Source/Core/VideoBackends/OGL/OGL.vcxproj.filters b/Source/Core/VideoBackends/OGL/OGL.vcxproj.filters
index 282934d830..201a4045f8 100644
--- a/Source/Core/VideoBackends/OGL/OGL.vcxproj.filters
+++ b/Source/Core/VideoBackends/OGL/OGL.vcxproj.filters
@@ -90,8 +90,11 @@
+
+ GLUtil
+
-
+
\ No newline at end of file
diff --git a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp
index 508d6e41ed..3d4aeea2fc 100644
--- a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp
+++ b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp
@@ -65,6 +65,8 @@ static std::string GetGLSLVersionString()
return "#version 330";
case GLSL_400:
return "#version 400";
+ case GLSL_430:
+ return "#version 430";
default:
// Shouldn't ever hit this
return "#version ERROR";
@@ -103,27 +105,30 @@ void SHADER::SetProgramVariables()
}
}
-void SHADER::SetProgramBindings()
+void SHADER::SetProgramBindings(bool is_compute)
{
- if (g_ActiveConfig.backend_info.bSupportsDualSourceBlend)
+ if (!is_compute)
{
- // So we do support extended blending
- // So we need to set a few more things here.
- // Bind our out locations
- glBindFragDataLocationIndexed(glprogid, 0, 0, "ocol0");
- glBindFragDataLocationIndexed(glprogid, 0, 1, "ocol1");
+ if (g_ActiveConfig.backend_info.bSupportsDualSourceBlend)
+ {
+ // So we do support extended blending
+ // So we need to set a few more things here.
+ // Bind our out locations
+ glBindFragDataLocationIndexed(glprogid, 0, 0, "ocol0");
+ glBindFragDataLocationIndexed(glprogid, 0, 1, "ocol1");
+ }
+ // Need to set some attribute locations
+ glBindAttribLocation(glprogid, SHADER_POSITION_ATTRIB, "rawpos");
+
+ glBindAttribLocation(glprogid, SHADER_POSMTX_ATTRIB, "posmtx");
+
+ glBindAttribLocation(glprogid, SHADER_COLOR0_ATTRIB, "color0");
+ glBindAttribLocation(glprogid, SHADER_COLOR1_ATTRIB, "color1");
+
+ glBindAttribLocation(glprogid, SHADER_NORM0_ATTRIB, "rawnorm0");
+ glBindAttribLocation(glprogid, SHADER_NORM1_ATTRIB, "rawnorm1");
+ glBindAttribLocation(glprogid, SHADER_NORM2_ATTRIB, "rawnorm2");
}
- // Need to set some attribute locations
- glBindAttribLocation(glprogid, SHADER_POSITION_ATTRIB, "rawpos");
-
- glBindAttribLocation(glprogid, SHADER_POSMTX_ATTRIB, "posmtx");
-
- glBindAttribLocation(glprogid, SHADER_COLOR0_ATTRIB, "color0");
- glBindAttribLocation(glprogid, SHADER_COLOR1_ATTRIB, "color1");
-
- glBindAttribLocation(glprogid, SHADER_NORM0_ATTRIB, "rawnorm0");
- glBindAttribLocation(glprogid, SHADER_NORM1_ATTRIB, "rawnorm1");
- glBindAttribLocation(glprogid, SHADER_NORM2_ATTRIB, "rawnorm2");
for (int i = 0; i < 8; i++)
{
@@ -281,7 +286,7 @@ bool ProgramShaderCache::CompileShader(SHADER& shader, const std::string& vcode,
if (g_ogl_config.bSupportsGLSLCache)
glProgramParameteri(pid, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE);
- shader.SetProgramBindings();
+ shader.SetProgramBindings(false);
glLinkProgram(pid);
@@ -296,10 +301,10 @@ bool ProgramShaderCache::CompileShader(SHADER& shader, const std::string& vcode,
glGetProgramiv(pid, GL_INFO_LOG_LENGTH, &length);
if (linkStatus != GL_TRUE || (length > 1 && DEBUG_GLSL))
{
- GLsizei charsWritten;
- GLchar* infoLog = new GLchar[length];
- glGetProgramInfoLog(pid, length, &charsWritten, infoLog);
- ERROR_LOG(VIDEO, "Program info log:\n%s", infoLog);
+ std::string info_log;
+ info_log.resize(length);
+ glGetProgramInfoLog(pid, length, &length, &info_log[0]);
+ ERROR_LOG(VIDEO, "Program info log:\n%s", info_log.c_str());
std::string filename =
StringFromFormat("%sbad_p_%d.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++);
@@ -308,7 +313,7 @@ bool ProgramShaderCache::CompileShader(SHADER& shader, const std::string& vcode,
file << s_glsl_header << vcode << s_glsl_header << pcode;
if (!gcode.empty())
file << s_glsl_header << gcode;
- file << infoLog;
+ file << info_log;
file.close();
if (linkStatus != GL_TRUE)
@@ -316,10 +321,8 @@ bool ProgramShaderCache::CompileShader(SHADER& shader, const std::string& vcode,
PanicAlert("Failed to link shaders: %s\n"
"Debug info (%s, %s, %s):\n%s",
filename.c_str(), g_ogl_config.gl_vendor, g_ogl_config.gl_renderer,
- g_ogl_config.gl_version, infoLog);
+ g_ogl_config.gl_version, info_log.c_str());
}
-
- delete[] infoLog;
}
if (linkStatus != GL_TRUE)
{
@@ -336,6 +339,73 @@ bool ProgramShaderCache::CompileShader(SHADER& shader, const std::string& vcode,
return true;
}
+bool ProgramShaderCache::CompileComputeShader(SHADER& shader, const std::string& code)
+{
+ // We need to enable GL_ARB_compute_shader for drivers that support the extension,
+ // but not GLSL 4.3. Mesa is one example.
+ std::string header;
+ if (g_ActiveConfig.backend_info.bSupportsComputeShaders &&
+ g_ogl_config.eSupportedGLSLVersion < GLSL_430)
+ {
+ header = "#extension GL_ARB_compute_shader : enable\n";
+ }
+
+ GLuint shader_id = CompileSingleShader(GL_COMPUTE_SHADER, header + code);
+ if (!shader_id)
+ return false;
+
+ GLuint pid = shader.glprogid = glCreateProgram();
+ glAttachShader(pid, shader_id);
+ if (g_ogl_config.bSupportsGLSLCache)
+ glProgramParameteri(pid, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE);
+
+ shader.SetProgramBindings(true);
+
+ glLinkProgram(pid);
+
+ // original shaders aren't needed any more
+ glDeleteShader(shader_id);
+
+ GLint linkStatus;
+ glGetProgramiv(pid, GL_LINK_STATUS, &linkStatus);
+ GLsizei length = 0;
+ glGetProgramiv(pid, GL_INFO_LOG_LENGTH, &length);
+ if (linkStatus != GL_TRUE || (length > 1 && DEBUG_GLSL))
+ {
+ std::string info_log;
+ info_log.resize(length);
+ glGetProgramInfoLog(pid, length, &length, &info_log[0]);
+ ERROR_LOG(VIDEO, "Program info log:\n%s", info_log.c_str());
+
+ std::string filename =
+ StringFromFormat("%sbad_p_%d.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++);
+ std::ofstream file;
+ OpenFStream(file, filename, std::ios_base::out);
+ file << s_glsl_header << code;
+ file << info_log;
+ file.close();
+
+ if (linkStatus != GL_TRUE)
+ {
+ PanicAlert("Failed to link shaders: %s\n"
+ "Debug info (%s, %s, %s):\n%s",
+ filename.c_str(), g_ogl_config.gl_vendor, g_ogl_config.gl_renderer,
+ g_ogl_config.gl_version, info_log.c_str());
+ }
+ }
+ if (linkStatus != GL_TRUE)
+ {
+ // Compile failed
+ ERROR_LOG(VIDEO, "Program linking failed; see info log");
+
+ // Don't try to use this shader
+ glDeleteProgram(pid);
+ return false;
+ }
+
+ return true;
+}
+
GLuint ProgramShaderCache::CompileSingleShader(GLuint type, const std::string& code)
{
GLuint result = glCreateShader(type);
@@ -351,31 +421,43 @@ GLuint ProgramShaderCache::CompileSingleShader(GLuint type, const std::string& c
if (compileStatus != GL_TRUE || (length > 1 && DEBUG_GLSL))
{
- GLsizei charsWritten;
- GLchar* infoLog = new GLchar[length];
- glGetShaderInfoLog(result, length, &charsWritten, infoLog);
- ERROR_LOG(VIDEO, "%s Shader info log:\n%s",
- type == GL_VERTEX_SHADER ? "VS" : type == GL_FRAGMENT_SHADER ? "PS" : "GS", infoLog);
+ std::string info_log;
+ info_log.resize(length);
+ glGetShaderInfoLog(result, length, &length, &info_log[0]);
+
+ const char* prefix = "";
+ switch (type)
+ {
+ case GL_VERTEX_SHADER:
+ prefix = "vs";
+ break;
+ case GL_GEOMETRY_SHADER:
+ prefix = "gs";
+ break;
+ case GL_FRAGMENT_SHADER:
+ prefix = "ps";
+ break;
+ case GL_COMPUTE_SHADER:
+ prefix = "cs";
+ break;
+ }
+
+ ERROR_LOG(VIDEO, "%s Shader info log:\n%s", prefix, info_log.c_str());
std::string filename = StringFromFormat(
- "%sbad_%s_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(),
- type == GL_VERTEX_SHADER ? "vs" : type == GL_FRAGMENT_SHADER ? "ps" : "gs", num_failures++);
+ "%sbad_%s_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), prefix, num_failures++);
std::ofstream file;
OpenFStream(file, filename, std::ios_base::out);
- file << s_glsl_header << code << infoLog;
+ file << s_glsl_header << code << info_log;
file.close();
if (compileStatus != GL_TRUE)
{
PanicAlert("Failed to compile %s shader: %s\n"
"Debug info (%s, %s, %s):\n%s",
- type == GL_VERTEX_SHADER ? "vertex" : type == GL_FRAGMENT_SHADER ? "pixel" :
- "geometry",
- filename.c_str(), g_ogl_config.gl_vendor, g_ogl_config.gl_renderer,
- g_ogl_config.gl_version, infoLog);
+ prefix, filename.c_str(), g_ogl_config.gl_vendor, g_ogl_config.gl_renderer,
+ g_ogl_config.gl_version, info_log.c_str());
}
-
- delete[] infoLog;
}
if (compileStatus != GL_TRUE)
{
@@ -539,11 +621,9 @@ void ProgramShaderCache::CreateHeader()
std::string earlyz_string = "";
if (g_ActiveConfig.backend_info.bSupportsEarlyZ)
{
- if (g_ogl_config.bSupportsEarlyFragmentTests)
+ if (g_ogl_config.bSupportsImageLoadStore)
{
earlyz_string = "#define FORCE_EARLY_Z layout(early_fragment_tests) in\n";
- if (!is_glsles) // GLES supports this by default
- earlyz_string += "#extension GL_ARB_shader_image_load_store : enable\n";
}
else if (g_ogl_config.bSupportsConservativeDepth)
{
@@ -569,6 +649,7 @@ void ProgramShaderCache::CreateHeader()
"%s\n" // texture buffer
"%s\n" // ES texture buffer
"%s\n" // ES dual source blend
+ "%s\n" // shader image load store
// Precision defines for GLSL ES
"%s\n"
@@ -576,6 +657,7 @@ void ProgramShaderCache::CreateHeader()
"%s\n"
"%s\n"
"%s\n"
+ "%s\n"
// Silly differences
"#define float2 vec2\n"
@@ -638,12 +720,17 @@ void ProgramShaderCache::CreateHeader()
""
,
+ g_ogl_config.bSupportsImageLoadStore &&
+ ((!is_glsles && v < GLSL_430) || (is_glsles && v < GLSLES_310)) ?
+ "#extension GL_ARB_shader_image_load_store : enable" :
+ "",
is_glsles ? "precision highp float;" : "", is_glsles ? "precision highp int;" : "",
is_glsles ? "precision highp sampler2DArray;" : "",
(is_glsles && g_ActiveConfig.backend_info.bSupportsPaletteConversion) ?
"precision highp usamplerBuffer;" :
"",
- v > GLSLES_300 ? "precision highp sampler2DMS;" : "");
+ v > GLSLES_300 ? "precision highp sampler2DMS;" : "",
+ v >= GLSLES_310 ? "precision highp image2DArray;" : "");
}
void ProgramShaderCache::ProgramShaderCacheInserter::Read(const SHADERUID& key, const u8* value,
diff --git a/Source/Core/VideoBackends/OGL/ProgramShaderCache.h b/Source/Core/VideoBackends/OGL/ProgramShaderCache.h
index c471db63ed..a8b2bfcbc1 100644
--- a/Source/Core/VideoBackends/OGL/ProgramShaderCache.h
+++ b/Source/Core/VideoBackends/OGL/ProgramShaderCache.h
@@ -46,7 +46,7 @@ struct SHADER
std::string strvprog, strpprog, strgprog;
void SetProgramVariables();
- void SetProgramBindings();
+ void SetProgramBindings(bool is_compute);
void Bind();
};
@@ -67,6 +67,7 @@ public:
static bool CompileShader(SHADER& shader, const std::string& vcode, const std::string& pcode,
const std::string& gcode = "");
+ static bool CompileComputeShader(SHADER& shader, const std::string& code);
static GLuint CompileSingleShader(GLuint type, const std::string& code);
static void UploadConstants();
diff --git a/Source/Core/VideoBackends/OGL/Render.cpp b/Source/Core/VideoBackends/OGL/Render.cpp
index 198dc7f05f..18626eaa0e 100644
--- a/Source/Core/VideoBackends/OGL/Render.cpp
+++ b/Source/Core/VideoBackends/OGL/Render.cpp
@@ -451,15 +451,16 @@ Renderer::Renderer()
g_ogl_config.bSupportViewportFloat = GLExtensions::Supports("GL_ARB_viewport_array");
g_ogl_config.bSupportsDebug =
GLExtensions::Supports("GL_KHR_debug") || GLExtensions::Supports("GL_ARB_debug_output");
- g_ogl_config.bSupports3DTextureStorage =
+ g_ogl_config.bSupportsTextureStorage = GLExtensions::Supports("GL_ARB_texture_storage");
+ g_ogl_config.bSupports3DTextureStorageMultisample =
GLExtensions::Supports("GL_ARB_texture_storage_multisample") ||
GLExtensions::Supports("GL_OES_texture_storage_multisample_2d_array");
- g_ogl_config.bSupports2DTextureStorage =
+ g_ogl_config.bSupports2DTextureStorageMultisample =
GLExtensions::Supports("GL_ARB_texture_storage_multisample");
- g_ogl_config.bSupportsEarlyFragmentTests =
- GLExtensions::Supports("GL_ARB_shader_image_load_store");
+ g_ogl_config.bSupportsImageLoadStore = GLExtensions::Supports("GL_ARB_shader_image_load_store");
g_ogl_config.bSupportsConservativeDepth = GLExtensions::Supports("GL_ARB_conservative_depth");
g_ogl_config.bSupportsAniso = GLExtensions::Supports("GL_EXT_texture_filter_anisotropic");
+ g_Config.backend_info.bSupportsComputeShaders = GLExtensions::Supports("GL_ARB_compute_shader");
if (GLInterface->GetMode() == GLInterfaceMode::MODE_OPENGLES3)
{
@@ -486,6 +487,7 @@ Renderer::Renderer()
{
g_ogl_config.eSupportedGLSLVersion = GLSLES_300;
g_ogl_config.bSupportsAEP = false;
+ g_ogl_config.bSupportsTextureStorage = true;
g_Config.backend_info.bSupportsGeometryShaders = false;
}
else if (GLExtensions::Version() == 310)
@@ -493,16 +495,18 @@ Renderer::Renderer()
g_ogl_config.eSupportedGLSLVersion = GLSLES_310;
g_ogl_config.bSupportsAEP = GLExtensions::Supports("GL_ANDROID_extension_pack_es31a");
g_Config.backend_info.bSupportsBindingLayout = true;
- g_ogl_config.bSupportsEarlyFragmentTests = true;
+ g_ogl_config.bSupportsImageLoadStore = true;
g_Config.backend_info.bSupportsGeometryShaders = g_ogl_config.bSupportsAEP;
+ g_Config.backend_info.bSupportsComputeShaders = true;
g_Config.backend_info.bSupportsGSInstancing =
g_Config.backend_info.bSupportsGeometryShaders && g_ogl_config.SupportedESPointSize > 0;
g_Config.backend_info.bSupportsSSAA = g_ogl_config.bSupportsAEP;
g_Config.backend_info.bSupportsFragmentStoresAndAtomics = true;
g_ogl_config.bSupportsMSAA = true;
- g_ogl_config.bSupports2DTextureStorage = true;
+ g_ogl_config.bSupportsTextureStorage = true;
+ g_ogl_config.bSupports2DTextureStorageMultisample = true;
if (g_ActiveConfig.iStereoMode > 0 && g_ActiveConfig.iMultisamples > 1 &&
- !g_ogl_config.bSupports3DTextureStorage)
+ !g_ogl_config.bSupports3DTextureStorageMultisample)
{
// GLES 3.1 can't support stereo rendering and MSAA
OSD::AddMessage("MSAA Stereo rendering isn't supported by your GPU.", 10000);
@@ -514,8 +518,9 @@ Renderer::Renderer()
g_ogl_config.eSupportedGLSLVersion = GLSLES_320;
g_ogl_config.bSupportsAEP = GLExtensions::Supports("GL_ANDROID_extension_pack_es31a");
g_Config.backend_info.bSupportsBindingLayout = true;
- g_ogl_config.bSupportsEarlyFragmentTests = true;
+ g_ogl_config.bSupportsImageLoadStore = true;
g_Config.backend_info.bSupportsGeometryShaders = true;
+ g_Config.backend_info.bSupportsComputeShaders = true;
g_Config.backend_info.bSupportsGSInstancing = g_ogl_config.SupportedESPointSize > 0;
g_Config.backend_info.bSupportsPaletteConversion = true;
g_Config.backend_info.bSupportsSSAA = true;
@@ -524,8 +529,9 @@ Renderer::Renderer()
g_ogl_config.bSupportsGLBaseVertex = true;
g_ogl_config.bSupportsDebug = true;
g_ogl_config.bSupportsMSAA = true;
- g_ogl_config.bSupports2DTextureStorage = true;
- g_ogl_config.bSupports3DTextureStorage = true;
+ g_ogl_config.bSupportsTextureStorage = true;
+ g_ogl_config.bSupports2DTextureStorageMultisample = true;
+ g_ogl_config.bSupports3DTextureStorageMultisample = true;
}
}
else
@@ -541,8 +547,7 @@ Renderer::Renderer()
else if (GLExtensions::Version() == 300)
{
g_ogl_config.eSupportedGLSLVersion = GLSL_130;
- g_ogl_config.bSupportsEarlyFragmentTests =
- false; // layout keyword is only supported on glsl150+
+ g_ogl_config.bSupportsImageLoadStore = false; // layout keyword is only supported on glsl150+
g_ogl_config.bSupportsConservativeDepth =
false; // layout keyword is only supported on glsl150+
g_Config.backend_info.bSupportsGeometryShaders =
@@ -551,8 +556,7 @@ Renderer::Renderer()
else if (GLExtensions::Version() == 310)
{
g_ogl_config.eSupportedGLSLVersion = GLSL_140;
- g_ogl_config.bSupportsEarlyFragmentTests =
- false; // layout keyword is only supported on glsl150+
+ g_ogl_config.bSupportsImageLoadStore = false; // layout keyword is only supported on glsl150+
g_ogl_config.bSupportsConservativeDepth =
false; // layout keyword is only supported on glsl150+
g_Config.backend_info.bSupportsGeometryShaders =
@@ -566,10 +570,28 @@ Renderer::Renderer()
{
g_ogl_config.eSupportedGLSLVersion = GLSL_330;
}
+ else if (GLExtensions::Version() >= 430)
+ {
+ // TODO: We should really parse the GL_SHADING_LANGUAGE_VERSION token.
+ g_ogl_config.eSupportedGLSLVersion = GLSL_430;
+ g_ogl_config.bSupportsTextureStorage = true;
+ g_ogl_config.bSupportsImageLoadStore = true;
+ g_Config.backend_info.bSupportsSSAA = true;
+
+ // Compute shaders are core in GL4.3.
+ g_Config.backend_info.bSupportsComputeShaders = true;
+ }
else
{
g_ogl_config.eSupportedGLSLVersion = GLSL_400;
g_Config.backend_info.bSupportsSSAA = true;
+
+ if (GLExtensions::Version() == 420)
+ {
+ // Texture storage and shader image load/store are core in GL4.2.
+ g_ogl_config.bSupportsTextureStorage = true;
+ g_ogl_config.bSupportsImageLoadStore = true;
+ }
}
// Desktop OpenGL can't have the Android Extension Pack
@@ -578,12 +600,19 @@ Renderer::Renderer()
// Either method can do early-z tests. See PixelShaderGen for details.
g_Config.backend_info.bSupportsEarlyZ =
- g_ogl_config.bSupportsEarlyFragmentTests || g_ogl_config.bSupportsConservativeDepth;
+ g_ogl_config.bSupportsImageLoadStore || g_ogl_config.bSupportsConservativeDepth;
glGetIntegerv(GL_MAX_SAMPLES, &g_ogl_config.max_samples);
if (g_ogl_config.max_samples < 1 || !g_ogl_config.bSupportsMSAA)
g_ogl_config.max_samples = 1;
+ // We require texel buffers, image load store, and compute shaders to enable GPU texture decoding.
+ // If the driver doesn't expose the extensions, but supports GL4.3/GLES3.1, it will still be
+ // enabled in the version check below.
+ g_Config.backend_info.bSupportsGPUTextureDecoding =
+ g_Config.backend_info.bSupportsPaletteConversion &&
+ g_Config.backend_info.bSupportsComputeShaders && g_ogl_config.bSupportsImageLoadStore;
+
if (g_ogl_config.bSupportsDebug)
{
if (GLExtensions::Supports("GL_KHR_debug"))
diff --git a/Source/Core/VideoBackends/OGL/Render.h b/Source/Core/VideoBackends/OGL/Render.h
index e3c2ba13c3..8ec6a21e0d 100644
--- a/Source/Core/VideoBackends/OGL/Render.h
+++ b/Source/Core/VideoBackends/OGL/Render.h
@@ -22,7 +22,8 @@ enum GLSL_VERSION
GLSL_140,
GLSL_150,
GLSL_330,
- GLSL_400, // and above
+ GLSL_400, // and above
+ GLSL_430,
GLSLES_300, // GLES 3.0
GLSLES_310, // GLES 3.1
GLSLES_320, // GLES 3.2
@@ -51,10 +52,11 @@ struct VideoConfig
bool bSupportsCopySubImage;
u8 SupportedESPointSize;
ES_TEXBUF_TYPE SupportedESTextureBuffer;
- bool bSupports2DTextureStorage;
- bool bSupports3DTextureStorage;
- bool bSupportsEarlyFragmentTests;
+ bool bSupportsTextureStorage;
+ bool bSupports2DTextureStorageMultisample;
+ bool bSupports3DTextureStorageMultisample;
bool bSupportsConservativeDepth;
+ bool bSupportsImageLoadStore;
bool bSupportsAniso;
const char* gl_vendor;
diff --git a/Source/Core/VideoBackends/OGL/TextureCache.cpp b/Source/Core/VideoBackends/OGL/TextureCache.cpp
index 2cf0623939..bd6f99b2e8 100644
--- a/Source/Core/VideoBackends/OGL/TextureCache.cpp
+++ b/Source/Core/VideoBackends/OGL/TextureCache.cpp
@@ -16,6 +16,7 @@
#include "Common/StringUtil.h"
#include "VideoBackends/OGL/FramebufferManager.h"
+#include "VideoBackends/OGL/GPUTimer.h"
#include "VideoBackends/OGL/ProgramShaderCache.h"
#include "VideoBackends/OGL/Render.h"
#include "VideoBackends/OGL/SamplerCache.h"
@@ -23,6 +24,7 @@
#include "VideoBackends/OGL/TextureConverter.h"
#include "VideoCommon/ImageWrite.h"
+#include "VideoCommon/TextureConversionShader.h"
#include "VideoCommon/TextureDecoder.h"
#include "VideoCommon/VideoConfig.h"
@@ -49,6 +51,26 @@ static GLuint s_palette_buffer_offset_uniform[3];
static GLuint s_palette_multiplier_uniform[3];
static GLuint s_palette_copy_position_uniform[3];
+struct TextureDecodingProgramInfo
+{
+ const TextureConversionShader::DecodingShaderInfo* base_info = nullptr;
+ SHADER program;
+ GLint uniform_dst_size = -1;
+ GLint uniform_src_size = -1;
+ GLint uniform_src_row_stride = -1;
+ GLint uniform_src_offset = -1;
+ GLint uniform_palette_offset = -1;
+ bool valid = false;
+};
+
+//#define TIME_TEXTURE_DECODING 1
+
+static std::map, TextureDecodingProgramInfo> s_texture_decoding_program_info;
+static std::array
+ s_texture_decoding_buffer_views;
+static void CreateTextureDecodingResources();
+static void DestroyTextureDecodingResources();
+
bool SaveTexture(const std::string& filename, u32 textarget, u32 tex, int virtual_width,
int virtual_height, unsigned int level)
{
@@ -119,12 +141,22 @@ TextureCache::TCacheEntryBase* TextureCache::CreateTexture(const TCacheEntryConf
glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MAX_LEVEL, config.levels - 1);
+ if (g_ogl_config.bSupportsTextureStorage)
+ {
+ glTexStorage3D(GL_TEXTURE_2D_ARRAY, config.levels, GL_RGBA8, config.width, config.height,
+ config.layers);
+ }
+
if (config.rendertarget)
{
- for (u32 level = 0; level <= config.levels; level++)
+ if (!g_ogl_config.bSupportsTextureStorage)
{
- glTexImage3D(GL_TEXTURE_2D_ARRAY, level, GL_RGBA, config.width, config.height, config.layers,
- 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr);
+ for (u32 level = 0; level < config.levels; level++)
+ {
+ glTexImage3D(GL_TEXTURE_2D_ARRAY, level, GL_RGBA, std::max(config.width >> level, 1u),
+ std::max(config.height >> level, 1u), config.layers, 0, GL_RGBA,
+ GL_UNSIGNED_BYTE, nullptr);
+ }
}
glGenFramebuffers(1, &entry->framebuffer);
FramebufferManager::SetFramebuffer(entry->framebuffer);
@@ -187,8 +219,16 @@ void TextureCache::TCacheEntry::Load(const u8* buffer, u32 width, u32 height, u3
if (expanded_width != width)
glPixelStorei(GL_UNPACK_ROW_LENGTH, expanded_width);
- glTexImage3D(GL_TEXTURE_2D_ARRAY, level, GL_RGBA, width, height, 1, 0, GL_RGBA, GL_UNSIGNED_BYTE,
- buffer);
+ if (g_ogl_config.bSupportsTextureStorage)
+ {
+ glTexSubImage3D(GL_TEXTURE_2D_ARRAY, level, 0, 0, 0, width, height, 1, GL_RGBA,
+ GL_UNSIGNED_BYTE, buffer);
+ }
+ else
+ {
+ glTexImage3D(GL_TEXTURE_2D_ARRAY, level, GL_RGBA, width, height, 1, 0, GL_RGBA,
+ GL_UNSIGNED_BYTE, buffer);
+ }
if (expanded_width != width)
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
@@ -267,26 +307,31 @@ TextureCache::TextureCache()
if (g_ActiveConfig.backend_info.bSupportsPaletteConversion)
{
- s32 buffer_size = 1024 * 1024;
+ s32 buffer_size_mb = (g_ActiveConfig.backend_info.bSupportsGPUTextureDecoding ? 32 : 1);
+ s32 buffer_size = buffer_size_mb * 1024 * 1024;
s32 max_buffer_size = 0;
- // The minimum MAX_TEXTURE_BUFFER_SIZE that the spec mandates
- // is 65KB, we are asking for a 1MB buffer here.
- // Make sure to check the maximum size and if it is below 1MB
- // then use the maximum the hardware supports instead.
+ // The minimum MAX_TEXTURE_BUFFER_SIZE that the spec mandates is 65KB, we are asking for a 1MB
+ // buffer here. This buffer is also used as storage for undecoded textures when compute shader
+ // texture decoding is enabled, in which case the requested size is 32MB.
glGetIntegerv(GL_MAX_TEXTURE_BUFFER_SIZE, &max_buffer_size);
+
+ // Clamp the buffer size to the maximum size that the driver supports.
buffer_size = std::min(buffer_size, max_buffer_size);
s_palette_stream_buffer = StreamBuffer::Create(GL_TEXTURE_BUFFER, buffer_size);
glGenTextures(1, &s_palette_resolv_texture);
glBindTexture(GL_TEXTURE_BUFFER, s_palette_resolv_texture);
glTexBuffer(GL_TEXTURE_BUFFER, GL_R16UI, s_palette_stream_buffer->m_buffer);
+
+ CreateTextureDecodingResources();
}
}
TextureCache::~TextureCache()
{
DeleteShaders();
+ DestroyTextureDecodingResources();
if (g_ActiveConfig.backend_info.bSupportsPaletteConversion)
{
@@ -588,4 +633,159 @@ void TextureCache::ConvertTexture(TCacheEntryBase* _entry, TCacheEntryBase* _unc
FramebufferManager::SetFramebuffer(0);
g_renderer->RestoreAPIState();
}
+
+static const std::string decoding_vertex_shader = R"(
+void main()
+{
+ vec2 rawpos = vec2(gl_VertexID&1, gl_VertexID&2);
+ gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0);
+}
+)";
+
+void CreateTextureDecodingResources()
+{
+ static const GLenum gl_view_types[TextureConversionShader::BUFFER_FORMAT_COUNT] = {
+ GL_R8UI, // BUFFER_FORMAT_R8_UINT
+ GL_R16UI, // BUFFER_FORMAT_R16_UINT
+ GL_RG32UI, // BUFFER_FORMAT_R32G32_UINT
+ };
+
+ glGenTextures(TextureConversionShader::BUFFER_FORMAT_COUNT,
+ s_texture_decoding_buffer_views.data());
+ for (size_t i = 0; i < TextureConversionShader::BUFFER_FORMAT_COUNT; i++)
+ {
+ glBindTexture(GL_TEXTURE_BUFFER, s_texture_decoding_buffer_views[i]);
+ glTexBuffer(GL_TEXTURE_BUFFER, gl_view_types[i], s_palette_stream_buffer->m_buffer);
+ }
+}
+
+void DestroyTextureDecodingResources()
+{
+ glDeleteTextures(TextureConversionShader::BUFFER_FORMAT_COUNT,
+ s_texture_decoding_buffer_views.data());
+ s_texture_decoding_buffer_views.fill(0);
+ s_texture_decoding_program_info.clear();
+}
+
+bool TextureCache::SupportsGPUTextureDecode(TextureFormat format, TlutFormat palette_format)
+{
+ auto key = std::make_pair(static_cast(format), static_cast(palette_format));
+ auto iter = s_texture_decoding_program_info.find(key);
+ if (iter != s_texture_decoding_program_info.end())
+ return iter->second.valid;
+
+ TextureDecodingProgramInfo info;
+ info.base_info = TextureConversionShader::GetDecodingShaderInfo(format);
+ if (!info.base_info)
+ {
+ s_texture_decoding_program_info.emplace(key, info);
+ return false;
+ }
+
+ std::string shader_source =
+ TextureConversionShader::GenerateDecodingShader(format, palette_format, APIType::OpenGL);
+ if (shader_source.empty())
+ {
+ s_texture_decoding_program_info.emplace(key, info);
+ return false;
+ }
+
+ if (!ProgramShaderCache::CompileComputeShader(info.program, shader_source))
+ {
+ s_texture_decoding_program_info.emplace(key, info);
+ return false;
+ }
+
+ info.uniform_dst_size = glGetUniformLocation(info.program.glprogid, "u_dst_size");
+ info.uniform_src_size = glGetUniformLocation(info.program.glprogid, "u_src_size");
+ info.uniform_src_offset = glGetUniformLocation(info.program.glprogid, "u_src_offset");
+ info.uniform_src_row_stride = glGetUniformLocation(info.program.glprogid, "u_src_row_stride");
+ info.uniform_palette_offset = glGetUniformLocation(info.program.glprogid, "u_palette_offset");
+ info.valid = true;
+ s_texture_decoding_program_info.emplace(key, info);
+ return true;
+}
+
+void TextureCache::DecodeTextureOnGPU(TCacheEntryBase* entry, u32 dst_level, const u8* data,
+ size_t data_size, TextureFormat format, u32 width, u32 height,
+ u32 aligned_width, u32 aligned_height, u32 row_stride,
+ const u8* palette, TlutFormat palette_format)
+{
+ auto key = std::make_pair(static_cast(format), static_cast(palette_format));
+ auto iter = s_texture_decoding_program_info.find(key);
+ if (iter == s_texture_decoding_program_info.end())
+ return;
+
+#ifdef TIME_TEXTURE_DECODING
+ GPUTimer timer;
+#endif
+
+ // Copy to GPU-visible buffer, aligned to the data type.
+ auto info = iter->second;
+ u32 bytes_per_buffer_elem =
+ TextureConversionShader::GetBytesPerBufferElement(info.base_info->buffer_format);
+
+ // Only copy palette if it is required.
+ bool has_palette = info.base_info->palette_size > 0;
+ u32 total_upload_size = static_cast(data_size);
+ u32 palette_offset = total_upload_size;
+ if (has_palette)
+ {
+ // Align to u16.
+ if ((total_upload_size % sizeof(u16)) != 0)
+ {
+ total_upload_size++;
+ palette_offset++;
+ }
+
+ total_upload_size += info.base_info->palette_size;
+ }
+
+ // Allocate space in stream buffer, and copy texture + palette across.
+ auto buffer = s_palette_stream_buffer->Map(total_upload_size, bytes_per_buffer_elem);
+ memcpy(buffer.first, data, data_size);
+ if (has_palette)
+ memcpy(buffer.first + palette_offset, palette, info.base_info->palette_size);
+ s_palette_stream_buffer->Unmap(total_upload_size);
+
+ info.program.Bind();
+
+ // Calculate stride in buffer elements
+ u32 row_stride_in_elements = row_stride / bytes_per_buffer_elem;
+ u32 offset_in_elements = buffer.second / bytes_per_buffer_elem;
+ u32 palette_offset_in_elements = (buffer.second + palette_offset) / sizeof(u16);
+ if (info.uniform_dst_size >= 0)
+ glUniform2ui(info.uniform_dst_size, width, height);
+ if (info.uniform_src_size >= 0)
+ glUniform2ui(info.uniform_src_size, aligned_width, aligned_height);
+ if (info.uniform_src_offset >= 0)
+ glUniform1ui(info.uniform_src_offset, offset_in_elements);
+ if (info.uniform_src_row_stride >= 0)
+ glUniform1ui(info.uniform_src_row_stride, row_stride_in_elements);
+ if (info.uniform_palette_offset >= 0)
+ glUniform1ui(info.uniform_palette_offset, palette_offset_in_elements);
+
+ glActiveTexture(GL_TEXTURE9);
+ glBindTexture(GL_TEXTURE_BUFFER, s_texture_decoding_buffer_views[info.base_info->buffer_format]);
+
+ if (has_palette)
+ {
+ // Use an R16UI view for the palette.
+ glActiveTexture(GL_TEXTURE10);
+ glBindTexture(GL_TEXTURE_BUFFER, s_palette_resolv_texture);
+ }
+
+ auto dispatch_groups = TextureConversionShader::GetDispatchCount(info.base_info, width, height);
+ glBindImageTexture(0, static_cast(entry)->texture, dst_level, GL_TRUE, 0,
+ GL_WRITE_ONLY, GL_RGBA8);
+ glDispatchCompute(dispatch_groups.first, dispatch_groups.second, 1);
+ glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT);
+
+ TextureCache::SetStage();
+
+#ifdef TIME_TEXTURE_DECODING
+ WARN_LOG(VIDEO, "Decode texture format %u size %ux%u took %.4fms", static_cast(format),
+ width, height, timer.GetTimeMilliseconds());
+#endif
+}
}
diff --git a/Source/Core/VideoBackends/OGL/TextureCache.h b/Source/Core/VideoBackends/OGL/TextureCache.h
index 66f58cae0b..cfd267caae 100644
--- a/Source/Core/VideoBackends/OGL/TextureCache.h
+++ b/Source/Core/VideoBackends/OGL/TextureCache.h
@@ -23,6 +23,12 @@ public:
static void DisableStage(unsigned int stage);
static void SetStage();
+ bool SupportsGPUTextureDecode(TextureFormat format, TlutFormat palette_format) override;
+ void DecodeTextureOnGPU(TCacheEntryBase* entry, u32 dst_level, const u8* data, size_t data_size,
+ TextureFormat format, u32 width, u32 height, u32 aligned_width,
+ u32 aligned_height, u32 row_stride, const u8* palette,
+ TlutFormat palette_format) override;
+
private:
struct TCacheEntry : TCacheEntryBase
{
diff --git a/Source/Core/VideoBackends/OGL/main.cpp b/Source/Core/VideoBackends/OGL/main.cpp
index 300d096978..7b5ccbe93e 100644
--- a/Source/Core/VideoBackends/OGL/main.cpp
+++ b/Source/Core/VideoBackends/OGL/main.cpp
@@ -101,6 +101,7 @@ void VideoBackend::InitBackendInfo()
g_Config.backend_info.bSupportsExclusiveFullscreen = false;
g_Config.backend_info.bSupportsOversizedViewports = true;
g_Config.backend_info.bSupportsGeometryShaders = true;
+ g_Config.backend_info.bSupportsComputeShaders = false;
g_Config.backend_info.bSupports3DVision = false;
g_Config.backend_info.bSupportsPostProcessing = true;
g_Config.backend_info.bSupportsSSAA = true;
@@ -108,6 +109,11 @@ void VideoBackend::InitBackendInfo()
g_Config.backend_info.bSupportsMultithreading = false;
g_Config.backend_info.bSupportsInternalResolutionFrameDumps = true;
+ // TODO: There is a bug here, if texel buffers are not supported the graphics options
+ // will show the option when it is not supported. The only way around this would be
+ // creating a context when calling this function to determine what is available.
+ g_Config.backend_info.bSupportsGPUTextureDecoding = true;
+
// Overwritten in Render.cpp later
g_Config.backend_info.bSupportsDualSourceBlend = true;
g_Config.backend_info.bSupportsPrimitiveRestart = true;
diff --git a/Source/Core/VideoBackends/Software/SWmain.cpp b/Source/Core/VideoBackends/Software/SWmain.cpp
index 96ebdd8adb..eb70f4059f 100644
--- a/Source/Core/VideoBackends/Software/SWmain.cpp
+++ b/Source/Core/VideoBackends/Software/SWmain.cpp
@@ -131,7 +131,9 @@ void VideoSoftware::InitBackendInfo()
g_Config.backend_info.bSupportsOversizedViewports = true;
g_Config.backend_info.bSupportsPrimitiveRestart = false;
g_Config.backend_info.bSupportsMultithreading = false;
+ g_Config.backend_info.bSupportsComputeShaders = false;
g_Config.backend_info.bSupportsInternalResolutionFrameDumps = false;
+ g_Config.backend_info.bSupportsGPUTextureDecoding = false;
// aamodes
g_Config.backend_info.AAModes = {1};
diff --git a/Source/Core/VideoBackends/Vulkan/CommandBufferManager.cpp b/Source/Core/VideoBackends/Vulkan/CommandBufferManager.cpp
index 27a3976b83..460ec919aa 100644
--- a/Source/Core/VideoBackends/Vulkan/CommandBufferManager.cpp
+++ b/Source/Core/VideoBackends/Vulkan/CommandBufferManager.cpp
@@ -91,7 +91,8 @@ bool CommandBufferManager::CreateCommandBuffers()
VkDescriptorPoolSize pool_sizes[] = {{VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 500000},
{VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 500000},
{VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 16},
- {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1024}};
+ {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1024},
+ {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1024}};
VkDescriptorPoolCreateInfo pool_create_info = {VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
nullptr,
diff --git a/Source/Core/VideoBackends/Vulkan/Constants.h b/Source/Core/VideoBackends/Vulkan/Constants.h
index f65aad6cfc..8507d23342 100644
--- a/Source/Core/VideoBackends/Vulkan/Constants.h
+++ b/Source/Core/VideoBackends/Vulkan/Constants.h
@@ -30,6 +30,7 @@ enum DESCRIPTOR_SET_LAYOUT
DESCRIPTOR_SET_LAYOUT_PIXEL_SHADER_SAMPLERS,
DESCRIPTOR_SET_LAYOUT_SHADER_STORAGE_BUFFERS,
DESCRIPTOR_SET_LAYOUT_TEXEL_BUFFERS,
+ DESCRIPTOR_SET_LAYOUT_COMPUTE,
NUM_DESCRIPTOR_SET_LAYOUTS
};
@@ -52,6 +53,12 @@ enum DESCRIPTOR_SET_BIND_POINT
// - Same as standard, plus 128 bytes of push constants, accessible from all stages.
// - Texture Decoding
// - Same as push constant, plus a single texel buffer accessible from PS.
+// - Compute
+// - 1 uniform buffer [set=0, binding=0]
+// - 4 combined image samplers [set=0, binding=1-4]
+// - 1 texel buffer [set=0, binding=5]
+// - 1 storage image [set=0, binding=6]
+// - 128 bytes of push constants
//
// All four pipeline layout share the first two descriptor sets (uniform buffers, PS samplers).
// The third descriptor set (see bind points above) is used for storage or texel buffers.
@@ -62,6 +69,7 @@ enum PIPELINE_LAYOUT
PIPELINE_LAYOUT_BBOX,
PIPELINE_LAYOUT_PUSH_CONSTANT,
PIPELINE_LAYOUT_TEXTURE_CONVERSION,
+ PIPELINE_LAYOUT_COMPUTE,
NUM_PIPELINE_LAYOUTS
};
diff --git a/Source/Core/VideoBackends/Vulkan/ObjectCache.cpp b/Source/Core/VideoBackends/Vulkan/ObjectCache.cpp
index 9c903b0065..1fb083be0f 100644
--- a/Source/Core/VideoBackends/Vulkan/ObjectCache.cpp
+++ b/Source/Core/VideoBackends/Vulkan/ObjectCache.cpp
@@ -324,6 +324,41 @@ std::pair ObjectCache::GetPipelineWithCacheResult(const Pipeli
return {pipeline, false};
}
+VkPipeline ObjectCache::CreateComputePipeline(const ComputePipelineInfo& info)
+{
+ VkComputePipelineCreateInfo pipeline_info = {VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ nullptr,
+ 0,
+ {VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ nullptr, 0, VK_SHADER_STAGE_COMPUTE_BIT, info.cs,
+ "main", nullptr},
+ info.pipeline_layout,
+ VK_NULL_HANDLE,
+ -1};
+
+ VkPipeline pipeline;
+ VkResult res = vkCreateComputePipelines(g_vulkan_context->GetDevice(), VK_NULL_HANDLE, 1,
+ &pipeline_info, nullptr, &pipeline);
+ if (res != VK_SUCCESS)
+ {
+ LOG_VULKAN_ERROR(res, "vkCreateComputePipelines failed: ");
+ return VK_NULL_HANDLE;
+ }
+
+ return pipeline;
+}
+
+VkPipeline ObjectCache::GetComputePipeline(const ComputePipelineInfo& info)
+{
+ auto iter = m_compute_pipeline_objects.find(info);
+ if (iter != m_compute_pipeline_objects.end())
+ return iter->second;
+
+ VkPipeline pipeline = CreateComputePipeline(info);
+ m_compute_pipeline_objects.emplace(info, pipeline);
+ return pipeline;
+}
+
std::string ObjectCache::GetDiskCacheFileName(const char* type)
{
return StringFromFormat("%svulkan-%s-%s.cache", File::GetUserPath(D_SHADERCACHE_IDX).c_str(),
@@ -477,6 +512,13 @@ void ObjectCache::DestroyPipelineCache()
}
m_pipeline_objects.clear();
+ for (const auto& it : m_compute_pipeline_objects)
+ {
+ if (it.second != VK_NULL_HANDLE)
+ vkDestroyPipeline(g_vulkan_context->GetDevice(), it.second, nullptr);
+ }
+ m_compute_pipeline_objects.clear();
+
vkDestroyPipelineCache(g_vulkan_context->GetDevice(), m_pipeline_cache, nullptr);
m_pipeline_cache = VK_NULL_HANDLE;
}
@@ -725,6 +767,17 @@ bool ObjectCache::CreateDescriptorSetLayouts()
{0, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1, VK_SHADER_STAGE_FRAGMENT_BIT},
};
+ static const VkDescriptorSetLayoutBinding compute_set_bindings[] = {
+ {0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1, VK_SHADER_STAGE_COMPUTE_BIT},
+ {1, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_COMPUTE_BIT},
+ {2, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_COMPUTE_BIT},
+ {3, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_COMPUTE_BIT},
+ {4, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_COMPUTE_BIT},
+ {5, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT},
+ {6, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT},
+ {7, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_COMPUTE_BIT},
+ };
+
static const VkDescriptorSetLayoutCreateInfo create_infos[NUM_DESCRIPTOR_SET_LAYOUTS] = {
{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, 0,
static_cast(ArraySize(ubo_set_bindings)), ubo_set_bindings},
@@ -733,7 +786,9 @@ bool ObjectCache::CreateDescriptorSetLayouts()
{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, 0,
static_cast(ArraySize(ssbo_set_bindings)), ssbo_set_bindings},
{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, 0,
- static_cast(ArraySize(texel_buffer_set_bindings)), texel_buffer_set_bindings}};
+ static_cast(ArraySize(texel_buffer_set_bindings)), texel_buffer_set_bindings},
+ {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, 0,
+ static_cast(ArraySize(compute_set_bindings)), compute_set_bindings}};
for (size_t i = 0; i < NUM_DESCRIPTOR_SET_LAYOUTS; i++)
{
@@ -774,8 +829,11 @@ bool ObjectCache::CreatePipelineLayouts()
m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_UNIFORM_BUFFERS],
m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_PIXEL_SHADER_SAMPLERS],
m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_TEXEL_BUFFERS]};
+ VkDescriptorSetLayout compute_sets[] = {m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_COMPUTE]};
VkPushConstantRange push_constant_range = {
VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, 0, PUSH_CONSTANT_BUFFER_SIZE};
+ VkPushConstantRange compute_push_constant_range = {VK_SHADER_STAGE_COMPUTE_BIT, 0,
+ PUSH_CONSTANT_BUFFER_SIZE};
// Info for each pipeline layout
VkPipelineLayoutCreateInfo pipeline_layout_info[NUM_PIPELINE_LAYOUTS] = {
@@ -794,7 +852,11 @@ bool ObjectCache::CreatePipelineLayouts()
// Texture Conversion
{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, nullptr, 0,
static_cast(ArraySize(texture_conversion_sets)), texture_conversion_sets, 1,
- &push_constant_range}};
+ &push_constant_range},
+
+ // Compute
+ {VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, nullptr, 0,
+ static_cast(ArraySize(compute_sets)), compute_sets, 1, &compute_push_constant_range}};
for (size_t i = 0; i < NUM_PIPELINE_LAYOUTS; i++)
{
@@ -1007,6 +1069,31 @@ bool operator<(const SamplerState& lhs, const SamplerState& rhs)
return lhs.bits < rhs.bits;
}
+std::size_t ComputePipelineInfoHash::operator()(const ComputePipelineInfo& key) const
+{
+ return static_cast(XXH64(&key, sizeof(key), 0));
+}
+
+bool operator==(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs)
+{
+ return std::memcmp(&lhs, &rhs, sizeof(lhs)) == 0;
+}
+
+bool operator!=(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs)
+{
+ return !operator==(lhs, rhs);
+}
+
+bool operator<(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs)
+{
+ return std::memcmp(&lhs, &rhs, sizeof(lhs)) < 0;
+}
+
+bool operator>(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs)
+{
+ return std::memcmp(&lhs, &rhs, sizeof(lhs)) > 0;
+}
+
bool ObjectCache::CompileSharedShaders()
{
static const char PASSTHROUGH_VERTEX_SHADER_SOURCE[] = R"(
diff --git a/Source/Core/VideoBackends/Vulkan/ObjectCache.h b/Source/Core/VideoBackends/Vulkan/ObjectCache.h
index 546d1439a5..11d436fc35 100644
--- a/Source/Core/VideoBackends/Vulkan/ObjectCache.h
+++ b/Source/Core/VideoBackends/Vulkan/ObjectCache.h
@@ -56,6 +56,22 @@ bool operator!=(const SamplerState& lhs, const SamplerState& rhs);
bool operator>(const SamplerState& lhs, const SamplerState& rhs);
bool operator<(const SamplerState& lhs, const SamplerState& rhs);
+struct ComputePipelineInfo
+{
+ VkPipelineLayout pipeline_layout;
+ VkShaderModule cs;
+};
+
+struct ComputePipelineInfoHash
+{
+ std::size_t operator()(const ComputePipelineInfo& key) const;
+};
+
+bool operator==(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs);
+bool operator!=(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs);
+bool operator<(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs);
+bool operator>(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs);
+
class ObjectCache
{
public:
@@ -114,6 +130,12 @@ public:
// otherwise for a cache hit it will be true.
std::pair GetPipelineWithCacheResult(const PipelineInfo& info);
+ // Creates a compute pipeline, and does not track the handle.
+ VkPipeline CreateComputePipeline(const ComputePipelineInfo& info);
+
+ // Find a pipeline by the specified description, if not found, attempts to create it
+ VkPipeline GetComputePipeline(const ComputePipelineInfo& info);
+
// Saves the pipeline cache to disk. Call when shutting down.
void SavePipelineCache();
@@ -166,6 +188,8 @@ private:
ShaderCache m_ps_cache;
std::unordered_map m_pipeline_objects;
+ std::unordered_map
+ m_compute_pipeline_objects;
VkPipelineCache m_pipeline_cache = VK_NULL_HANDLE;
std::string m_pipeline_cache_filename;
diff --git a/Source/Core/VideoBackends/Vulkan/ShaderCompiler.cpp b/Source/Core/VideoBackends/Vulkan/ShaderCompiler.cpp
index 2265a34364..d4d095bb78 100644
--- a/Source/Core/VideoBackends/Vulkan/ShaderCompiler.cpp
+++ b/Source/Core/VideoBackends/Vulkan/ShaderCompiler.cpp
@@ -35,7 +35,7 @@ static const TBuiltInResource* GetCompilerResourceLimits();
// Compile a shader to SPIR-V via glslang
static bool CompileShaderToSPV(SPIRVCodeVector* out_code, EShLanguage stage,
const char* stage_filename, const char* source_code,
- size_t source_code_length, bool prepend_header);
+ size_t source_code_length, const char* header, size_t header_length);
// Regarding the UBO bind points, we subtract one from the binding index because
// the OpenGL backend requires UBO #0 for non-block uniforms (at least on NV).
@@ -73,9 +73,32 @@ static const char SHADER_HEADER[] = R"(
#define gl_VertexID gl_VertexIndex
#define gl_InstanceID gl_InstanceIndex
)";
+static const char COMPUTE_SHADER_HEADER[] = R"(
+ // Target GLSL 4.5.
+ #version 450 core
+ // All resources are packed into one descriptor set for compute.
+ #define UBO_BINDING(packing, x) layout(packing, set = 0, binding = (0 + x))
+ #define SAMPLER_BINDING(x) layout(set = 0, binding = (1 + x))
+ #define TEXEL_BUFFER_BINDING(x) layout(set = 0, binding = (5 + x))
+ #define IMAGE_BINDING(format, x) layout(format, set = 0, binding = (7 + x))
+
+ // hlsl to glsl function translation
+ #define float2 vec2
+ #define float3 vec3
+ #define float4 vec4
+ #define uint2 uvec2
+ #define uint3 uvec3
+ #define uint4 uvec4
+ #define int2 ivec2
+ #define int3 ivec3
+ #define int4 ivec4
+ #define frac fract
+ #define lerp mix
+)";
bool CompileShaderToSPV(SPIRVCodeVector* out_code, EShLanguage stage, const char* stage_filename,
- const char* source_code, size_t source_code_length, bool prepend_header)
+ const char* source_code, size_t source_code_length, const char* header,
+ size_t header_length)
{
if (!InitializeGlslang())
return false;
@@ -91,10 +114,10 @@ bool CompileShaderToSPV(SPIRVCodeVector* out_code, EShLanguage stage, const char
std::string full_source_code;
const char* pass_source_code = source_code;
int pass_source_code_length = static_cast(source_code_length);
- if (prepend_header)
+ if (header_length > 0)
{
- full_source_code.reserve(sizeof(SHADER_HEADER) + source_code_length);
- full_source_code.append(SHADER_HEADER, sizeof(SHADER_HEADER) - 1);
+ full_source_code.reserve(header_length + source_code_length);
+ full_source_code.append(header, header_length);
full_source_code.append(source_code, source_code_length);
pass_source_code = full_source_code.c_str();
pass_source_code_length = static_cast(full_source_code.length());
@@ -318,21 +341,28 @@ bool CompileVertexShader(SPIRVCodeVector* out_code, const char* source_code,
size_t source_code_length, bool prepend_header)
{
return CompileShaderToSPV(out_code, EShLangVertex, "vs", source_code, source_code_length,
- prepend_header);
+ SHADER_HEADER, sizeof(SHADER_HEADER) - 1);
}
bool CompileGeometryShader(SPIRVCodeVector* out_code, const char* source_code,
size_t source_code_length, bool prepend_header)
{
return CompileShaderToSPV(out_code, EShLangGeometry, "gs", source_code, source_code_length,
- prepend_header);
+ SHADER_HEADER, sizeof(SHADER_HEADER) - 1);
}
bool CompileFragmentShader(SPIRVCodeVector* out_code, const char* source_code,
size_t source_code_length, bool prepend_header)
{
return CompileShaderToSPV(out_code, EShLangFragment, "ps", source_code, source_code_length,
- prepend_header);
+ SHADER_HEADER, sizeof(SHADER_HEADER) - 1);
+}
+
+bool CompileComputeShader(SPIRVCodeVector* out_code, const char* source_code,
+ size_t source_code_length, bool prepend_header)
+{
+ return CompileShaderToSPV(out_code, EShLangCompute, "cs", source_code, source_code_length,
+ COMPUTE_SHADER_HEADER, sizeof(COMPUTE_SHADER_HEADER) - 1);
}
} // namespace ShaderCompiler
diff --git a/Source/Core/VideoBackends/Vulkan/ShaderCompiler.h b/Source/Core/VideoBackends/Vulkan/ShaderCompiler.h
index 96bd9081bf..197dc1787c 100644
--- a/Source/Core/VideoBackends/Vulkan/ShaderCompiler.h
+++ b/Source/Core/VideoBackends/Vulkan/ShaderCompiler.h
@@ -29,5 +29,9 @@ bool CompileGeometryShader(SPIRVCodeVector* out_code, const char* source_code,
bool CompileFragmentShader(SPIRVCodeVector* out_code, const char* source_code,
size_t source_code_length, bool prepend_header = true);
+// Compile a compute shader to SPIR-V.
+bool CompileComputeShader(SPIRVCodeVector* out_code, const char* source_code,
+ size_t source_code_length, bool prepend_header = true);
+
} // namespace ShaderCompiler
} // namespace Vulkan
diff --git a/Source/Core/VideoBackends/Vulkan/Texture2D.cpp b/Source/Core/VideoBackends/Vulkan/Texture2D.cpp
index 9dda089b21..9b8111aa94 100644
--- a/Source/Core/VideoBackends/Vulkan/Texture2D.cpp
+++ b/Source/Core/VideoBackends/Vulkan/Texture2D.cpp
@@ -4,6 +4,7 @@
#include
+#include "Common/Assert.h"
#include "VideoBackends/Vulkan/CommandBufferManager.h"
#include "VideoBackends/Vulkan/Texture2D.h"
#include "VideoBackends/Vulkan/VulkanContext.h"
@@ -273,10 +274,132 @@ void Texture2D::TransitionToLayout(VkCommandBuffer command_buffer, VkImageLayout
break;
}
+ // If we were using a compute layout, the stages need to reflect that
+ switch (m_compute_layout)
+ {
+ case ComputeImageLayout::Undefined:
+ break;
+ case ComputeImageLayout::ReadOnly:
+ barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT;
+ srcStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
+ break;
+ case ComputeImageLayout::WriteOnly:
+ barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
+ srcStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
+ break;
+ case ComputeImageLayout::ReadWrite:
+ barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
+ srcStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
+ break;
+ }
+ m_compute_layout = ComputeImageLayout::Undefined;
+
vkCmdPipelineBarrier(command_buffer, srcStageMask, dstStageMask, 0, 0, nullptr, 0, nullptr, 1,
&barrier);
m_layout = new_layout;
}
+void Texture2D::TransitionToLayout(VkCommandBuffer command_buffer, ComputeImageLayout new_layout)
+{
+ _assert_(new_layout != ComputeImageLayout::Undefined);
+ if (m_compute_layout == new_layout)
+ return;
+
+ VkImageMemoryBarrier barrier = {
+ VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // VkStructureType sType
+ nullptr, // const void* pNext
+ 0, // VkAccessFlags srcAccessMask
+ 0, // VkAccessFlags dstAccessMask
+ m_layout, // VkImageLayout oldLayout
+ VK_IMAGE_LAYOUT_GENERAL, // VkImageLayout newLayout
+ VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex
+ VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex
+ m_image, // VkImage image
+ {static_cast(Util::IsDepthFormat(m_format) ? VK_IMAGE_ASPECT_DEPTH_BIT :
+ VK_IMAGE_ASPECT_COLOR_BIT),
+ 0, m_levels, 0, m_layers} // VkImageSubresourceRange subresourceRange
+ };
+
+ VkPipelineStageFlags srcStageMask, dstStageMask;
+ switch (m_layout)
+ {
+ case VK_IMAGE_LAYOUT_UNDEFINED:
+ // Layout undefined therefore contents undefined, and we don't care what happens to it.
+ barrier.srcAccessMask = 0;
+ srcStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
+ break;
+
+ case VK_IMAGE_LAYOUT_PREINITIALIZED:
+ // Image has been pre-initialized by the host, so ensure all writes have completed.
+ barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT;
+ srcStageMask = VK_PIPELINE_STAGE_HOST_BIT;
+ break;
+
+ case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
+ // Image was being used as a color attachment, so ensure all writes have completed.
+ barrier.srcAccessMask =
+ VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+ srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+ break;
+
+ case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
+ // Image was being used as a depthstencil attachment, so ensure all writes have completed.
+ barrier.srcAccessMask =
+ VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+ srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+ break;
+
+ case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
+ // Image was being used as a shader resource, make sure all reads have finished.
+ barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT;
+ srcStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
+ break;
+
+ case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
+ // Image was being used as a copy source, ensure all reads have finished.
+ barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
+ srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT;
+ break;
+
+ case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
+ // Image was being used as a copy destination, ensure all writes have finished.
+ barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+ srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT;
+ break;
+
+ default:
+ srcStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
+ break;
+ }
+
+ switch (new_layout)
+ {
+ case ComputeImageLayout::ReadOnly:
+ barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
+ barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+ dstStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
+ break;
+ case ComputeImageLayout::WriteOnly:
+ barrier.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
+ barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
+ dstStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
+ break;
+ case ComputeImageLayout::ReadWrite:
+ barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
+ barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
+ dstStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
+ break;
+ default:
+ dstStageMask = 0;
+ break;
+ }
+
+ m_layout = barrier.newLayout;
+ m_compute_layout = new_layout;
+
+ vkCmdPipelineBarrier(command_buffer, srcStageMask, dstStageMask, 0, 0, nullptr, 0, nullptr, 1,
+ &barrier);
+}
+
} // namespace Vulkan
diff --git a/Source/Core/VideoBackends/Vulkan/Texture2D.h b/Source/Core/VideoBackends/Vulkan/Texture2D.h
index bf0a8a70ab..3fce48d758 100644
--- a/Source/Core/VideoBackends/Vulkan/Texture2D.h
+++ b/Source/Core/VideoBackends/Vulkan/Texture2D.h
@@ -17,6 +17,15 @@ class ObjectCache;
class Texture2D
{
public:
+ // Custom image layouts, mainly used for switching to/from compute
+ enum class ComputeImageLayout
+ {
+ Undefined,
+ ReadOnly,
+ WriteOnly,
+ ReadWrite
+ };
+
Texture2D(u32 width, u32 height, u32 levels, u32 layers, VkFormat format,
VkSampleCountFlagBits samples, VkImageViewType view_type, VkImage image,
VkDeviceMemory device_memory, VkImageView view);
@@ -50,6 +59,7 @@ public:
void OverrideImageLayout(VkImageLayout new_layout);
void TransitionToLayout(VkCommandBuffer command_buffer, VkImageLayout new_layout);
+ void TransitionToLayout(VkCommandBuffer command_buffer, ComputeImageLayout new_layout);
private:
u32 m_width;
@@ -60,6 +70,7 @@ private:
VkSampleCountFlagBits m_samples;
VkImageViewType m_view_type;
VkImageLayout m_layout = VK_IMAGE_LAYOUT_UNDEFINED;
+ ComputeImageLayout m_compute_layout = ComputeImageLayout::Undefined;
VkImage m_image;
VkDeviceMemory m_device_memory;
diff --git a/Source/Core/VideoBackends/Vulkan/TextureCache.cpp b/Source/Core/VideoBackends/Vulkan/TextureCache.cpp
index 5a73106476..b951b78aa2 100644
--- a/Source/Core/VideoBackends/Vulkan/TextureCache.cpp
+++ b/Source/Core/VideoBackends/Vulkan/TextureCache.cpp
@@ -138,6 +138,21 @@ void TextureCache::CopyRectangleFromTexture(TCacheEntry* dst_texture,
ScaleTextureRectangle(dst_texture, dst_rect, src_texture, src_rect);
}
+bool TextureCache::SupportsGPUTextureDecode(TextureFormat format, TlutFormat palette_format)
+{
+ return m_texture_converter->SupportsTextureDecoding(format, palette_format);
+}
+
+void TextureCache::DecodeTextureOnGPU(TCacheEntryBase* entry, u32 dst_level, const u8* data,
+ size_t data_size, TextureFormat format, u32 width, u32 height,
+ u32 aligned_width, u32 aligned_height, u32 row_stride,
+ const u8* palette, TlutFormat palette_format)
+{
+ m_texture_converter->DecodeTexture(static_cast(entry), dst_level, data, data_size,
+ format, width, height, aligned_width, aligned_height,
+ row_stride, palette, palette_format);
+}
+
void TextureCache::CopyTextureRectangle(TCacheEntry* dst_texture,
const MathUtil::Rectangle& dst_rect,
Texture2D* src_texture,
diff --git a/Source/Core/VideoBackends/Vulkan/TextureCache.h b/Source/Core/VideoBackends/Vulkan/TextureCache.h
index f014492d7b..b433d7d9e0 100644
--- a/Source/Core/VideoBackends/Vulkan/TextureCache.h
+++ b/Source/Core/VideoBackends/Vulkan/TextureCache.h
@@ -66,6 +66,13 @@ public:
void CopyRectangleFromTexture(TCacheEntry* dst_texture, const MathUtil::Rectangle& dst_rect,
Texture2D* src_texture, const MathUtil::Rectangle& src_rect);
+ bool SupportsGPUTextureDecode(TextureFormat format, TlutFormat palette_format) override;
+
+ void DecodeTextureOnGPU(TCacheEntryBase* entry, u32 dst_level, const u8* data, size_t data_size,
+ TextureFormat format, u32 width, u32 height, u32 aligned_width,
+ u32 aligned_height, u32 row_stride, const u8* palette,
+ TlutFormat palette_format) override;
+
private:
bool CreateRenderPasses();
diff --git a/Source/Core/VideoBackends/Vulkan/TextureConverter.cpp b/Source/Core/VideoBackends/Vulkan/TextureConverter.cpp
index 8f69b386e0..cbe4322b1d 100644
--- a/Source/Core/VideoBackends/Vulkan/TextureConverter.cpp
+++ b/Source/Core/VideoBackends/Vulkan/TextureConverter.cpp
@@ -42,8 +42,12 @@ TextureConverter::~TextureConverter()
vkDestroyShaderModule(g_vulkan_context->GetDevice(), it, nullptr);
}
+ if (m_texel_buffer_view_r8_uint != VK_NULL_HANDLE)
+ vkDestroyBufferView(g_vulkan_context->GetDevice(), m_texel_buffer_view_r8_uint, nullptr);
if (m_texel_buffer_view_r16_uint != VK_NULL_HANDLE)
vkDestroyBufferView(g_vulkan_context->GetDevice(), m_texel_buffer_view_r16_uint, nullptr);
+ if (m_texel_buffer_view_r32g32_uint != VK_NULL_HANDLE)
+ vkDestroyBufferView(g_vulkan_context->GetDevice(), m_texel_buffer_view_r32g32_uint, nullptr);
if (m_texel_buffer_view_rgba8_unorm != VK_NULL_HANDLE)
vkDestroyBufferView(g_vulkan_context->GetDevice(), m_texel_buffer_view_rgba8_unorm, nullptr);
@@ -59,6 +63,12 @@ TextureConverter::~TextureConverter()
vkDestroyShaderModule(g_vulkan_context->GetDevice(), shader, nullptr);
}
+ for (const auto& it : m_decoding_pipelines)
+ {
+ if (it.second.compute_shader != VK_NULL_HANDLE)
+ vkDestroyShaderModule(g_vulkan_context->GetDevice(), it.second.compute_shader, nullptr);
+ }
+
if (m_rgb_to_yuyv_shader != VK_NULL_HANDLE)
vkDestroyShaderModule(g_vulkan_context->GetDevice(), m_rgb_to_yuyv_shader, nullptr);
if (m_yuyv_to_rgb_shader != VK_NULL_HANDLE)
@@ -103,6 +113,12 @@ bool TextureConverter::Initialize()
return false;
}
+ if (!CreateDecodingTexture())
+ {
+ PanicAlert("Failed to create decoding texture");
+ return false;
+ }
+
if (!CompileYUYVConversionShaders())
{
PanicAlert("Failed to compile YUYV conversion shaders");
@@ -371,6 +387,152 @@ void TextureConverter::DecodeYUYVTextureFromMemory(TextureCache::TCacheEntry* ds
draw.EndRenderPass();
}
+bool TextureConverter::SupportsTextureDecoding(TextureFormat format, TlutFormat palette_format)
+{
+ auto key = std::make_pair(format, palette_format);
+ auto iter = m_decoding_pipelines.find(key);
+ if (iter != m_decoding_pipelines.end())
+ return iter->second.valid;
+
+ TextureDecodingPipeline pipeline;
+ pipeline.base_info = TextureConversionShader::GetDecodingShaderInfo(format);
+ pipeline.compute_shader = VK_NULL_HANDLE;
+ pipeline.valid = false;
+
+ if (!pipeline.base_info)
+ {
+ m_decoding_pipelines.emplace(key, pipeline);
+ return false;
+ }
+
+ std::string shader_source =
+ TextureConversionShader::GenerateDecodingShader(format, palette_format, APIType::Vulkan);
+
+ pipeline.compute_shader = Util::CompileAndCreateComputeShader(shader_source, true);
+ if (pipeline.compute_shader == VK_NULL_HANDLE)
+ {
+ m_decoding_pipelines.emplace(key, pipeline);
+ return false;
+ }
+
+ pipeline.valid = true;
+ m_decoding_pipelines.emplace(key, pipeline);
+ return true;
+}
+
+void TextureConverter::DecodeTexture(TextureCache::TCacheEntry* entry, u32 dst_level,
+ const u8* data, size_t data_size, TextureFormat format,
+ u32 width, u32 height, u32 aligned_width, u32 aligned_height,
+ u32 row_stride, const u8* palette, TlutFormat palette_format)
+{
+ auto key = std::make_pair(format, palette_format);
+ auto iter = m_decoding_pipelines.find(key);
+ if (iter == m_decoding_pipelines.end())
+ return;
+
+ struct PushConstants
+ {
+ u32 dst_size[2];
+ u32 src_size[2];
+ u32 src_offset;
+ u32 src_row_stride;
+ u32 palette_offset;
+ };
+
+ // Copy to GPU-visible buffer, aligned to the data type
+ auto info = iter->second;
+ u32 bytes_per_buffer_elem =
+ TextureConversionShader::GetBytesPerBufferElement(info.base_info->buffer_format);
+
+ // Calculate total data size, including palette.
+ // Only copy palette if it is required.
+ u32 total_upload_size = static_cast(data_size);
+ u32 palette_size = iter->second.base_info->palette_size;
+ u32 palette_offset = total_upload_size;
+ bool has_palette = palette_size > 0;
+ if (has_palette)
+ {
+ // Align to u16.
+ if ((total_upload_size % sizeof(u16)) != 0)
+ {
+ total_upload_size++;
+ palette_offset++;
+ }
+
+ total_upload_size += palette_size;
+ }
+
+ // Allocate space for upload, if it fails, execute the buffer.
+ if (!m_texel_buffer->ReserveMemory(total_upload_size, bytes_per_buffer_elem))
+ {
+ Util::ExecuteCurrentCommandsAndRestoreState(true, false);
+ if (!m_texel_buffer->ReserveMemory(total_upload_size, bytes_per_buffer_elem))
+ PanicAlert("Failed to reserve memory for encoded texture upload");
+ }
+
+ // Copy/commit upload buffer.
+ u32 texel_buffer_offset = static_cast(m_texel_buffer->GetCurrentOffset());
+ std::memcpy(m_texel_buffer->GetCurrentHostPointer(), data, data_size);
+ if (has_palette)
+ std::memcpy(m_texel_buffer->GetCurrentHostPointer() + palette_offset, palette, palette_size);
+ m_texel_buffer->CommitMemory(total_upload_size);
+
+ // Determine uniforms.
+ PushConstants constants = {
+ {width, height},
+ {aligned_width, aligned_height},
+ texel_buffer_offset / bytes_per_buffer_elem,
+ row_stride / bytes_per_buffer_elem,
+ static_cast((texel_buffer_offset + palette_offset) / sizeof(u16))};
+
+ // Determine view to use for texel buffers.
+ VkBufferView data_view = VK_NULL_HANDLE;
+ switch (iter->second.base_info->buffer_format)
+ {
+ case TextureConversionShader::BUFFER_FORMAT_R8_UINT:
+ data_view = m_texel_buffer_view_r8_uint;
+ break;
+ case TextureConversionShader::BUFFER_FORMAT_R16_UINT:
+ data_view = m_texel_buffer_view_r16_uint;
+ break;
+ case TextureConversionShader::BUFFER_FORMAT_R32G32_UINT:
+ data_view = m_texel_buffer_view_r32g32_uint;
+ break;
+ default:
+ break;
+ }
+
+ // Place compute shader dispatches together in the init command buffer.
+ // That way we don't have to pay a penalty for switching from graphics->compute,
+ // or end/restart our render pass.
+ VkCommandBuffer command_buffer = g_command_buffer_mgr->GetCurrentInitCommandBuffer();
+
+ // Dispatch compute to temporary texture.
+ ComputeShaderDispatcher dispatcher(command_buffer,
+ g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_COMPUTE),
+ iter->second.compute_shader);
+ m_decoding_texture->TransitionToLayout(command_buffer, Texture2D::ComputeImageLayout::WriteOnly);
+ dispatcher.SetPushConstants(&constants, sizeof(constants));
+ dispatcher.SetStorageImage(m_decoding_texture->GetView(), m_decoding_texture->GetLayout());
+ dispatcher.SetTexelBuffer(0, data_view);
+ if (has_palette)
+ dispatcher.SetTexelBuffer(1, m_texel_buffer_view_r16_uint);
+ auto groups = TextureConversionShader::GetDispatchCount(iter->second.base_info, width, height);
+ dispatcher.Dispatch(groups.first, groups.second, 1);
+
+ // Copy from temporary texture to final destination.
+ m_decoding_texture->TransitionToLayout(command_buffer, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
+ entry->GetTexture()->TransitionToLayout(command_buffer, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
+ VkImageCopy image_copy = {{VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1},
+ {0, 0, 0},
+ {VK_IMAGE_ASPECT_COLOR_BIT, dst_level, 0, 1},
+ {0, 0, 0},
+ {width, height, 1}};
+ vkCmdCopyImage(command_buffer, m_decoding_texture->GetImage(),
+ VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, entry->GetTexture()->GetImage(),
+ VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &image_copy);
+}
+
bool TextureConverter::CreateTexelBuffer()
{
// Prefer an 8MB buffer if possible, but use less if the device doesn't support this.
@@ -386,9 +548,13 @@ bool TextureConverter::CreateTexelBuffer()
return false;
// Create views of the formats that we will be using.
+ m_texel_buffer_view_r8_uint = CreateTexelBufferView(VK_FORMAT_R8_UINT);
m_texel_buffer_view_r16_uint = CreateTexelBufferView(VK_FORMAT_R16_UINT);
+ m_texel_buffer_view_r32g32_uint = CreateTexelBufferView(VK_FORMAT_R32G32_UINT);
m_texel_buffer_view_rgba8_unorm = CreateTexelBufferView(VK_FORMAT_R8G8B8A8_UNORM);
- return m_texel_buffer_view_r16_uint != VK_NULL_HANDLE &&
+ return m_texel_buffer_view_r8_uint != VK_NULL_HANDLE &&
+ m_texel_buffer_view_r16_uint != VK_NULL_HANDLE &&
+ m_texel_buffer_view_r32g32_uint != VK_NULL_HANDLE &&
m_texel_buffer_view_rgba8_unorm != VK_NULL_HANDLE;
}
@@ -611,6 +777,15 @@ bool TextureConverter::CreateEncodingDownloadTexture()
return m_encoding_download_texture && m_encoding_download_texture->Map();
}
+bool TextureConverter::CreateDecodingTexture()
+{
+ m_decoding_texture = Texture2D::Create(
+ DECODING_TEXTURE_WIDTH, DECODING_TEXTURE_HEIGHT, 1, 1, VK_FORMAT_R8G8B8A8_UNORM,
+ VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_VIEW_TYPE_2D_ARRAY, VK_IMAGE_TILING_OPTIMAL,
+ VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
+ return static_cast(m_decoding_texture);
+}
+
bool TextureConverter::CompileYUYVConversionShaders()
{
static const char RGB_TO_YUYV_SHADER_SOURCE[] = R"(
diff --git a/Source/Core/VideoBackends/Vulkan/TextureConverter.h b/Source/Core/VideoBackends/Vulkan/TextureConverter.h
index 651d511849..39543e0f17 100644
--- a/Source/Core/VideoBackends/Vulkan/TextureConverter.h
+++ b/Source/Core/VideoBackends/Vulkan/TextureConverter.h
@@ -5,11 +5,14 @@
#pragma once
#include
+#include