diff --git a/Source/Core/VideoBackends/OGL/Render.cpp b/Source/Core/VideoBackends/OGL/Render.cpp index df2d7cb790..18626eaa0e 100644 --- a/Source/Core/VideoBackends/OGL/Render.cpp +++ b/Source/Core/VideoBackends/OGL/Render.cpp @@ -606,6 +606,13 @@ Renderer::Renderer() if (g_ogl_config.max_samples < 1 || !g_ogl_config.bSupportsMSAA) g_ogl_config.max_samples = 1; + // We require texel buffers, image load store, and compute shaders to enable GPU texture decoding. + // If the driver doesn't expose the extensions, but supports GL4.3/GLES3.1, it will still be + // enabled in the version check below. + g_Config.backend_info.bSupportsGPUTextureDecoding = + g_Config.backend_info.bSupportsPaletteConversion && + g_Config.backend_info.bSupportsComputeShaders && g_ogl_config.bSupportsImageLoadStore; + if (g_ogl_config.bSupportsDebug) { if (GLExtensions::Supports("GL_KHR_debug")) diff --git a/Source/Core/VideoBackends/OGL/TextureCache.cpp b/Source/Core/VideoBackends/OGL/TextureCache.cpp index 39a7ccfe2d..72b04fc5b1 100644 --- a/Source/Core/VideoBackends/OGL/TextureCache.cpp +++ b/Source/Core/VideoBackends/OGL/TextureCache.cpp @@ -23,6 +23,7 @@ #include "VideoBackends/OGL/TextureConverter.h" #include "VideoCommon/ImageWrite.h" +#include "VideoCommon/TextureConversionShader.h" #include "VideoCommon/TextureDecoder.h" #include "VideoCommon/VideoConfig.h" @@ -49,6 +50,24 @@ static GLuint s_palette_buffer_offset_uniform[3]; static GLuint s_palette_multiplier_uniform[3]; static GLuint s_palette_copy_position_uniform[3]; +struct TextureDecodingProgramInfo +{ + const TextureConversionShader::DecodingShaderInfo* base_info = nullptr; + SHADER program; + GLint uniform_dst_size = -1; + GLint uniform_src_size = -1; + GLint uniform_src_row_stride = -1; + GLint uniform_src_offset = -1; + GLint uniform_palette_offset = -1; + bool valid = false; +}; + +static std::map, TextureDecodingProgramInfo> s_texture_decoding_program_info; +static std::array + s_texture_decoding_buffer_views; +static void CreateTextureDecodingResources(); +static void DestroyTextureDecodingResources(); + bool SaveTexture(const std::string& filename, u32 textarget, u32 tex, int virtual_width, int virtual_height, unsigned int level) { @@ -285,26 +304,31 @@ TextureCache::TextureCache() if (g_ActiveConfig.backend_info.bSupportsPaletteConversion) { - s32 buffer_size = 1024 * 1024; + s32 buffer_size_mb = (g_ActiveConfig.backend_info.bSupportsGPUTextureDecoding ? 32 : 1); + s32 buffer_size = buffer_size_mb * 1024 * 1024; s32 max_buffer_size = 0; - // The minimum MAX_TEXTURE_BUFFER_SIZE that the spec mandates - // is 65KB, we are asking for a 1MB buffer here. - // Make sure to check the maximum size and if it is below 1MB - // then use the maximum the hardware supports instead. + // The minimum MAX_TEXTURE_BUFFER_SIZE that the spec mandates is 65KB, we are asking for a 1MB + // buffer here. This buffer is also used as storage for undecoded textures when compute shader + // texture decoding is enabled, in which case the requested size is 32MB. glGetIntegerv(GL_MAX_TEXTURE_BUFFER_SIZE, &max_buffer_size); + + // Clamp the buffer size to the maximum size that the driver supports. buffer_size = std::min(buffer_size, max_buffer_size); s_palette_stream_buffer = StreamBuffer::Create(GL_TEXTURE_BUFFER, buffer_size); glGenTextures(1, &s_palette_resolv_texture); glBindTexture(GL_TEXTURE_BUFFER, s_palette_resolv_texture); glTexBuffer(GL_TEXTURE_BUFFER, GL_R16UI, s_palette_stream_buffer->m_buffer); + + CreateTextureDecodingResources(); } } TextureCache::~TextureCache() { DeleteShaders(); + DestroyTextureDecodingResources(); if (g_ActiveConfig.backend_info.bSupportsPaletteConversion) { @@ -606,4 +630,150 @@ void TextureCache::ConvertTexture(TCacheEntryBase* _entry, TCacheEntryBase* _unc FramebufferManager::SetFramebuffer(0); g_renderer->RestoreAPIState(); } + +static const std::string decoding_vertex_shader = R"( +void main() +{ + vec2 rawpos = vec2(gl_VertexID&1, gl_VertexID&2); + gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0); +} +)"; + +void CreateTextureDecodingResources() +{ + static const GLenum gl_view_types[TextureConversionShader::BUFFER_FORMAT_COUNT] = { + GL_R8UI, // BUFFER_FORMAT_R8_UINT + GL_R16UI, // BUFFER_FORMAT_R16_UINT + GL_RG32UI, // BUFFER_FORMAT_R32G32_UINT + }; + + glGenTextures(TextureConversionShader::BUFFER_FORMAT_COUNT, + s_texture_decoding_buffer_views.data()); + for (size_t i = 0; i < TextureConversionShader::BUFFER_FORMAT_COUNT; i++) + { + glBindTexture(GL_TEXTURE_BUFFER, s_texture_decoding_buffer_views[i]); + glTexBuffer(GL_TEXTURE_BUFFER, gl_view_types[i], s_palette_stream_buffer->m_buffer); + } +} + +void DestroyTextureDecodingResources() +{ + glDeleteTextures(TextureConversionShader::BUFFER_FORMAT_COUNT, + s_texture_decoding_buffer_views.data()); + s_texture_decoding_buffer_views.fill(0); + s_texture_decoding_program_info.clear(); +} + +bool TextureCache::SupportsGPUTextureDecode(TextureFormat format, TlutFormat palette_format) +{ + auto key = std::make_pair(static_cast(format), static_cast(palette_format)); + auto iter = s_texture_decoding_program_info.find(key); + if (iter != s_texture_decoding_program_info.end()) + return iter->second.valid; + + TextureDecodingProgramInfo info; + info.base_info = TextureConversionShader::GetDecodingShaderInfo(format); + if (!info.base_info) + { + s_texture_decoding_program_info.emplace(key, info); + return false; + } + + std::string shader_source = + TextureConversionShader::GenerateDecodingShader(format, palette_format, APIType::OpenGL); + if (shader_source.empty()) + { + s_texture_decoding_program_info.emplace(key, info); + return false; + } + + if (!ProgramShaderCache::CompileComputeShader(info.program, shader_source)) + { + s_texture_decoding_program_info.emplace(key, info); + return false; + } + + info.uniform_dst_size = glGetUniformLocation(info.program.glprogid, "u_dst_size"); + info.uniform_src_size = glGetUniformLocation(info.program.glprogid, "u_src_size"); + info.uniform_src_offset = glGetUniformLocation(info.program.glprogid, "u_src_offset"); + info.uniform_src_row_stride = glGetUniformLocation(info.program.glprogid, "u_src_row_stride"); + info.uniform_palette_offset = glGetUniformLocation(info.program.glprogid, "u_palette_offset"); + info.valid = true; + s_texture_decoding_program_info.emplace(key, info); + return true; +} + +void TextureCache::DecodeTextureOnGPU(TCacheEntryBase* entry, u32 dst_level, const u8* data, + size_t data_size, TextureFormat format, u32 width, u32 height, + u32 aligned_width, u32 aligned_height, u32 row_stride, + const u8* palette, TlutFormat palette_format) +{ + auto key = std::make_pair(static_cast(format), static_cast(palette_format)); + auto iter = s_texture_decoding_program_info.find(key); + if (iter == s_texture_decoding_program_info.end()) + return; + + // Copy to GPU-visible buffer, aligned to the data type. + auto info = iter->second; + u32 bytes_per_buffer_elem = + TextureConversionShader::GetBytesPerBufferElement(info.base_info->buffer_format); + + // Only copy palette if it is required. + bool has_palette = info.base_info->palette_size > 0; + u32 total_upload_size = static_cast(data_size); + u32 palette_offset = total_upload_size; + if (has_palette) + { + // Align to u16. + if ((total_upload_size % sizeof(u16)) != 0) + { + total_upload_size++; + palette_offset++; + } + + total_upload_size += info.base_info->palette_size; + } + + // Allocate space in stream buffer, and copy texture + palette across. + auto buffer = s_palette_stream_buffer->Map(total_upload_size, bytes_per_buffer_elem); + memcpy(buffer.first, data, data_size); + if (has_palette) + memcpy(buffer.first + palette_offset, palette, info.base_info->palette_size); + s_palette_stream_buffer->Unmap(total_upload_size); + + info.program.Bind(); + + // Calculate stride in buffer elements + u32 row_stride_in_elements = row_stride / bytes_per_buffer_elem; + u32 offset_in_elements = buffer.second / bytes_per_buffer_elem; + u32 palette_offset_in_elements = (buffer.second + palette_offset) / sizeof(u16); + if (info.uniform_dst_size >= 0) + glUniform2ui(info.uniform_dst_size, width, height); + if (info.uniform_src_size >= 0) + glUniform2ui(info.uniform_src_size, aligned_width, aligned_height); + if (info.uniform_src_offset >= 0) + glUniform1ui(info.uniform_src_offset, offset_in_elements); + if (info.uniform_src_row_stride >= 0) + glUniform1ui(info.uniform_src_row_stride, row_stride_in_elements); + if (info.uniform_palette_offset >= 0) + glUniform1ui(info.uniform_palette_offset, palette_offset_in_elements); + + glActiveTexture(GL_TEXTURE9); + glBindTexture(GL_TEXTURE_BUFFER, s_texture_decoding_buffer_views[info.base_info->buffer_format]); + + if (has_palette) + { + // Use an R16UI view for the palette. + glActiveTexture(GL_TEXTURE10); + glBindTexture(GL_TEXTURE_BUFFER, s_palette_resolv_texture); + } + + auto dispatch_groups = TextureConversionShader::GetDispatchCount(info.base_info, width, height); + glBindImageTexture(0, static_cast(entry)->texture, dst_level, GL_TRUE, 0, + GL_WRITE_ONLY, GL_RGBA8); + glDispatchCompute(dispatch_groups.first, dispatch_groups.second, 1); + glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT); + + TextureCache::SetStage(); +} } diff --git a/Source/Core/VideoBackends/OGL/TextureCache.h b/Source/Core/VideoBackends/OGL/TextureCache.h index 66f58cae0b..cfd267caae 100644 --- a/Source/Core/VideoBackends/OGL/TextureCache.h +++ b/Source/Core/VideoBackends/OGL/TextureCache.h @@ -23,6 +23,12 @@ public: static void DisableStage(unsigned int stage); static void SetStage(); + bool SupportsGPUTextureDecode(TextureFormat format, TlutFormat palette_format) override; + void DecodeTextureOnGPU(TCacheEntryBase* entry, u32 dst_level, const u8* data, size_t data_size, + TextureFormat format, u32 width, u32 height, u32 aligned_width, + u32 aligned_height, u32 row_stride, const u8* palette, + TlutFormat palette_format) override; + private: struct TCacheEntry : TCacheEntryBase { diff --git a/Source/Core/VideoBackends/OGL/main.cpp b/Source/Core/VideoBackends/OGL/main.cpp index 80cca3babd..7b5ccbe93e 100644 --- a/Source/Core/VideoBackends/OGL/main.cpp +++ b/Source/Core/VideoBackends/OGL/main.cpp @@ -108,7 +108,11 @@ void VideoBackend::InitBackendInfo() g_Config.backend_info.bSupportsReversedDepthRange = true; g_Config.backend_info.bSupportsMultithreading = false; g_Config.backend_info.bSupportsInternalResolutionFrameDumps = true; - g_Config.backend_info.bSupportsGPUTextureDecoding = false; + + // TODO: There is a bug here, if texel buffers are not supported the graphics options + // will show the option when it is not supported. The only way around this would be + // creating a context when calling this function to determine what is available. + g_Config.backend_info.bSupportsGPUTextureDecoding = true; // Overwritten in Render.cpp later g_Config.backend_info.bSupportsDualSourceBlend = true;