// Copyright 2008 Dolphin Emulator Project // Licensed under GPLv2+ // Refer to the license.txt file included. #include "VideoBackends/OGL/TextureCache.h" #include #include #include #include #include #include #include "Common/Assert.h" #include "Common/GL/GLInterfaceBase.h" #include "Common/MsgHandler.h" #include "Common/StringUtil.h" #include "VideoBackends/OGL/FramebufferManager.h" #include "VideoBackends/OGL/GPUTimer.h" #include "VideoBackends/OGL/ProgramShaderCache.h" #include "VideoBackends/OGL/Render.h" #include "VideoBackends/OGL/SamplerCache.h" #include "VideoBackends/OGL/StreamBuffer.h" #include "VideoBackends/OGL/TextureConverter.h" #include "VideoCommon/ImageWrite.h" #include "VideoCommon/TextureConversionShader.h" #include "VideoCommon/TextureDecoder.h" #include "VideoCommon/VideoConfig.h" namespace OGL { static SHADER s_ColorCopyProgram; static SHADER s_ColorMatrixProgram; static SHADER s_DepthMatrixProgram; static GLuint s_ColorMatrixUniform; static GLuint s_DepthMatrixUniform; static GLuint s_ColorCopyPositionUniform; static GLuint s_ColorMatrixPositionUniform; static GLuint s_DepthCopyPositionUniform; static u32 s_ColorCbufid; static u32 s_DepthCbufid; static u32 s_Textures[8]; static u32 s_ActiveTexture; static SHADER s_palette_pixel_shader[3]; static std::unique_ptr s_palette_stream_buffer; static GLuint s_palette_resolv_texture; static GLuint s_palette_buffer_offset_uniform[3]; static GLuint s_palette_multiplier_uniform[3]; static GLuint s_palette_copy_position_uniform[3]; struct TextureDecodingProgramInfo { const TextureConversionShader::DecodingShaderInfo* base_info = nullptr; SHADER program; GLint uniform_dst_size = -1; GLint uniform_src_size = -1; GLint uniform_src_row_stride = -1; GLint uniform_src_offset = -1; GLint uniform_palette_offset = -1; bool valid = false; }; //#define TIME_TEXTURE_DECODING 1 static std::map, TextureDecodingProgramInfo> s_texture_decoding_program_info; static std::array s_texture_decoding_buffer_views; static void CreateTextureDecodingResources(); static void DestroyTextureDecodingResources(); bool SaveTexture(const std::string& filename, u32 textarget, u32 tex, int virtual_width, int virtual_height, unsigned int level) { if (GLInterface->GetMode() != GLInterfaceMode::MODE_OPENGL) return false; int width = std::max(virtual_width >> level, 1); int height = std::max(virtual_height >> level, 1); std::vector data(width * height * 4); glActiveTexture(GL_TEXTURE9); glBindTexture(textarget, tex); glGetTexImage(textarget, level, GL_RGBA, GL_UNSIGNED_BYTE, data.data()); TextureCache::SetStage(); return TextureToPng(data.data(), width * 4, filename, width, height, true); } static GLenum GetGLInternalFormatForTextureFormat(HostTextureFormat format, bool storage) { switch (format) { case HostTextureFormat::DXT1: return GL_COMPRESSED_RGBA_S3TC_DXT1_EXT; case HostTextureFormat::DXT3: return GL_COMPRESSED_RGBA_S3TC_DXT3_EXT; case HostTextureFormat::DXT5: return GL_COMPRESSED_RGBA_S3TC_DXT5_EXT; case HostTextureFormat::RGBA8: default: return storage ? GL_RGBA8 : GL_RGBA; } } static GLenum GetGLFormatForTextureFormat(HostTextureFormat format) { switch (format) { case HostTextureFormat::RGBA8: return GL_RGBA; // Compressed texture formats don't use this parameter. default: return GL_UNSIGNED_BYTE; } } static GLenum GetGLTypeForTextureFormat(HostTextureFormat format) { switch (format) { case HostTextureFormat::RGBA8: return GL_UNSIGNED_BYTE; // Compressed texture formats don't use this parameter. default: return GL_UNSIGNED_BYTE; } } TextureCache::TCacheEntry::~TCacheEntry() { if (texture) { for (auto& gtex : s_Textures) if (gtex == texture) gtex = 0; glDeleteTextures(1, &texture); texture = 0; } if (framebuffer) { glDeleteFramebuffers(1, &framebuffer); framebuffer = 0; } } TextureCache::TCacheEntry::TCacheEntry(const TCacheEntryConfig& _config) : TCacheEntryBase(_config) { glGenTextures(1, &texture); framebuffer = 0; } void TextureCache::TCacheEntry::Bind(unsigned int stage) { if (s_Textures[stage] != texture) { if (s_ActiveTexture != stage) { glActiveTexture(GL_TEXTURE0 + stage); s_ActiveTexture = stage; } glBindTexture(GL_TEXTURE_2D_ARRAY, texture); s_Textures[stage] = texture; } } bool TextureCache::TCacheEntry::Save(const std::string& filename, unsigned int level) { // We can't dump compressed textures currently (it would mean drawing them to a RGBA8 // framebuffer, and saving that). TextureCache does not call Save for custom textures // anyway, so this is fine for now. _assert_(config.format == HostTextureFormat::RGBA8); return SaveTexture(filename, GL_TEXTURE_2D_ARRAY, texture, config.width, config.height, level); } TextureCache::TCacheEntryBase* TextureCache::CreateTexture(const TCacheEntryConfig& config) { TCacheEntry* entry = new TCacheEntry(config); glActiveTexture(GL_TEXTURE9); glBindTexture(GL_TEXTURE_2D_ARRAY, entry->texture); glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MAX_LEVEL, config.levels - 1); if (g_ogl_config.bSupportsTextureStorage) { GLenum gl_internal_format = GetGLInternalFormatForTextureFormat(config.format, true); glTexStorage3D(GL_TEXTURE_2D_ARRAY, config.levels, gl_internal_format, config.width, config.height, config.layers); } if (config.rendertarget) { // We can't render to compressed formats. _assert_(!IsCompressedHostTextureFormat(config.format)); if (!g_ogl_config.bSupportsTextureStorage) { for (u32 level = 0; level < config.levels; level++) { glTexImage3D(GL_TEXTURE_2D_ARRAY, level, GL_RGBA, std::max(config.width >> level, 1u), std::max(config.height >> level, 1u), config.layers, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr); } } glGenFramebuffers(1, &entry->framebuffer); FramebufferManager::SetFramebuffer(entry->framebuffer); FramebufferManager::FramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D_ARRAY, entry->texture, 0); } TextureCache::SetStage(); return entry; } void TextureCache::TCacheEntry::CopyRectangleFromTexture(const TCacheEntryBase* source, const MathUtil::Rectangle& srcrect, const MathUtil::Rectangle& dstrect) { TCacheEntry* srcentry = (TCacheEntry*)source; if (srcrect.GetWidth() == dstrect.GetWidth() && srcrect.GetHeight() == dstrect.GetHeight() && g_ogl_config.bSupportsCopySubImage) { glCopyImageSubData(srcentry->texture, GL_TEXTURE_2D_ARRAY, 0, srcrect.left, srcrect.top, 0, texture, GL_TEXTURE_2D_ARRAY, 0, dstrect.left, dstrect.top, 0, dstrect.GetWidth(), dstrect.GetHeight(), srcentry->config.layers); return; } else if (!framebuffer) { glGenFramebuffers(1, &framebuffer); FramebufferManager::SetFramebuffer(framebuffer); FramebufferManager::FramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D_ARRAY, texture, 0); } g_renderer->ResetAPIState(); FramebufferManager::SetFramebuffer(framebuffer); glActiveTexture(GL_TEXTURE9); glBindTexture(GL_TEXTURE_2D_ARRAY, srcentry->texture); g_sampler_cache->BindLinearSampler(9); glViewport(dstrect.left, dstrect.top, dstrect.GetWidth(), dstrect.GetHeight()); s_ColorCopyProgram.Bind(); glUniform4f(s_ColorCopyPositionUniform, float(srcrect.left), float(srcrect.top), float(srcrect.GetWidth()), float(srcrect.GetHeight())); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); FramebufferManager::SetFramebuffer(0); g_renderer->RestoreAPIState(); } void TextureCache::TCacheEntry::Load(u32 level, u32 width, u32 height, u32 row_length, const u8* buffer, size_t buffer_size) { if (level >= config.levels) PanicAlert("Texture only has %d levels, can't update level %d", config.levels, level); if (width != std::max(1u, config.width >> level) || height != std::max(1u, config.height >> level)) PanicAlert("size of level %d must be %dx%d, but %dx%d requested", level, std::max(1u, config.width >> level), std::max(1u, config.height >> level), width, height); glActiveTexture(GL_TEXTURE9); glBindTexture(GL_TEXTURE_2D_ARRAY, texture); if (row_length != width) glPixelStorei(GL_UNPACK_ROW_LENGTH, row_length); GLenum gl_internal_format = GetGLInternalFormatForTextureFormat(config.format, false); if (IsCompressedHostTextureFormat(config.format)) { if (g_ogl_config.bSupportsTextureStorage) { glCompressedTexSubImage3D(GL_TEXTURE_2D_ARRAY, level, 0, 0, 0, width, height, 1, gl_internal_format, static_cast(buffer_size), buffer); } else { glCompressedTexImage3D(GL_TEXTURE_2D_ARRAY, level, gl_internal_format, width, height, 1, 0, static_cast(buffer_size), buffer); } } else { GLenum gl_format = GetGLFormatForTextureFormat(config.format); GLenum gl_type = GetGLTypeForTextureFormat(config.format); if (g_ogl_config.bSupportsTextureStorage) { glTexSubImage3D(GL_TEXTURE_2D_ARRAY, level, 0, 0, 0, width, height, 1, gl_format, gl_type, buffer); } else { glTexImage3D(GL_TEXTURE_2D_ARRAY, level, gl_internal_format, width, height, 1, 0, gl_format, gl_type, buffer); } } if (row_length != width) glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); TextureCache::SetStage(); } void TextureCache::TCacheEntry::FromRenderTarget(bool is_depth_copy, const EFBRectangle& srcRect, bool scaleByHalf, unsigned int cbufid, const float* colmat) { g_renderer->ResetAPIState(); // reset any game specific settings // Make sure to resolve anything we need to read from. const GLuint read_texture = is_depth_copy ? FramebufferManager::ResolveAndGetDepthTarget(srcRect) : FramebufferManager::ResolveAndGetRenderTarget(srcRect); FramebufferManager::SetFramebuffer(framebuffer); OpenGL_BindAttributelessVAO(); glActiveTexture(GL_TEXTURE9); glBindTexture(GL_TEXTURE_2D_ARRAY, read_texture); if (scaleByHalf) g_sampler_cache->BindLinearSampler(9); else g_sampler_cache->BindNearestSampler(9); glViewport(0, 0, config.width, config.height); GLuint uniform_location; if (is_depth_copy) { s_DepthMatrixProgram.Bind(); if (s_DepthCbufid != cbufid) glUniform4fv(s_DepthMatrixUniform, 5, colmat); s_DepthCbufid = cbufid; uniform_location = s_DepthCopyPositionUniform; } else { s_ColorMatrixProgram.Bind(); if (s_ColorCbufid != cbufid) glUniform4fv(s_ColorMatrixUniform, 7, colmat); s_ColorCbufid = cbufid; uniform_location = s_ColorMatrixPositionUniform; } TargetRectangle R = g_renderer->ConvertEFBRectangle(srcRect); glUniform4f(uniform_location, static_cast(R.left), static_cast(R.top), static_cast(R.right), static_cast(R.bottom)); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); FramebufferManager::SetFramebuffer(0); g_renderer->RestoreAPIState(); } void TextureCache::CopyEFB(u8* dst, const EFBCopyFormat& format, u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, bool is_depth_copy, const EFBRectangle& src_rect, bool scale_by_half) { TextureConverter::EncodeToRamFromTexture(dst, format, native_width, bytes_per_row, num_blocks_y, memory_stride, is_depth_copy, src_rect, scale_by_half); } TextureCache::TextureCache() { CompileShaders(); s_ActiveTexture = UINT32_MAX; for (auto& gtex : s_Textures) gtex = UINT32_MAX; if (g_ActiveConfig.backend_info.bSupportsPaletteConversion) { s32 buffer_size_mb = (g_ActiveConfig.backend_info.bSupportsGPUTextureDecoding ? 32 : 1); s32 buffer_size = buffer_size_mb * 1024 * 1024; s32 max_buffer_size = 0; // The minimum MAX_TEXTURE_BUFFER_SIZE that the spec mandates is 65KB, we are asking for a 1MB // buffer here. This buffer is also used as storage for undecoded textures when compute shader // texture decoding is enabled, in which case the requested size is 32MB. glGetIntegerv(GL_MAX_TEXTURE_BUFFER_SIZE, &max_buffer_size); // Clamp the buffer size to the maximum size that the driver supports. buffer_size = std::min(buffer_size, max_buffer_size); s_palette_stream_buffer = StreamBuffer::Create(GL_TEXTURE_BUFFER, buffer_size); glGenTextures(1, &s_palette_resolv_texture); glBindTexture(GL_TEXTURE_BUFFER, s_palette_resolv_texture); glTexBuffer(GL_TEXTURE_BUFFER, GL_R16UI, s_palette_stream_buffer->m_buffer); CreateTextureDecodingResources(); } } TextureCache::~TextureCache() { DeleteShaders(); DestroyTextureDecodingResources(); if (g_ActiveConfig.backend_info.bSupportsPaletteConversion) { s_palette_stream_buffer.reset(); glDeleteTextures(1, &s_palette_resolv_texture); } } void TextureCache::DisableStage(unsigned int stage) { } void TextureCache::SetStage() { // -1 is the initial value as we don't know which texture should be bound if (s_ActiveTexture != (u32)-1) glActiveTexture(GL_TEXTURE0 + s_ActiveTexture); } bool TextureCache::CompileShaders() { constexpr const char* color_copy_program = "SAMPLER_BINDING(9) uniform sampler2DArray samp9;\n" "in vec3 f_uv0;\n" "out vec4 ocol0;\n" "\n" "void main(){\n" " vec4 texcol = texture(samp9, f_uv0);\n" " ocol0 = texcol;\n" "}\n"; constexpr const char* color_matrix_program = "SAMPLER_BINDING(9) uniform sampler2DArray samp9;\n" "uniform vec4 colmat[7];\n" "in vec3 f_uv0;\n" "out vec4 ocol0;\n" "\n" "void main(){\n" " vec4 texcol = texture(samp9, f_uv0);\n" " texcol = floor(texcol * colmat[5]) * colmat[6];\n" " ocol0 = texcol * mat4(colmat[0], colmat[1], colmat[2], colmat[3]) + colmat[4];\n" "}\n"; constexpr const char* depth_matrix_program = "SAMPLER_BINDING(9) uniform sampler2DArray samp9;\n" "uniform vec4 colmat[5];\n" "in vec3 f_uv0;\n" "out vec4 ocol0;\n" "\n" "void main(){\n" " vec4 texcol = texture(samp9, vec3(f_uv0.xy, %s));\n" " int depth = int(texcol.x * 16777216.0);\n" // Convert to Z24 format " ivec4 workspace;\n" " workspace.r = (depth >> 16) & 255;\n" " workspace.g = (depth >> 8) & 255;\n" " workspace.b = depth & 255;\n" // Convert to Z4 format " workspace.a = (depth >> 16) & 0xF0;\n" // Normalize components to [0.0..1.0] " texcol = vec4(workspace) / 255.0;\n" " ocol0 = texcol * mat4(colmat[0], colmat[1], colmat[2], colmat[3]) + colmat[4];\n" "}\n"; constexpr const char* vertex_program = "out vec3 %s_uv0;\n" "SAMPLER_BINDING(9) uniform sampler2DArray samp9;\n" "uniform vec4 copy_position;\n" // left, top, right, bottom "void main()\n" "{\n" " vec2 rawpos = vec2(gl_VertexID&1, gl_VertexID&2);\n" " %s_uv0 = vec3(mix(copy_position.xy, copy_position.zw, rawpos) / vec2(textureSize(samp9, " "0).xy), 0.0);\n" " gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0);\n" "}\n"; const std::string geo_program = g_ActiveConfig.iStereoMode > 0 ? "layout(triangles) in;\n" "layout(triangle_strip, max_vertices = 6) out;\n" "in vec3 v_uv0[3];\n" "out vec3 f_uv0;\n" "SAMPLER_BINDING(9) uniform sampler2DArray samp9;\n" "void main()\n" "{\n" " int layers = textureSize(samp9, 0).z;\n" " for (int layer = 0; layer < layers; ++layer) {\n" " for (int i = 0; i < 3; ++i) {\n" " f_uv0 = vec3(v_uv0[i].xy, layer);\n" " gl_Position = gl_in[i].gl_Position;\n" " gl_Layer = layer;\n" " EmitVertex();\n" " }\n" " EndPrimitive();\n" " }\n" "}\n" : ""; const char* prefix = geo_program.empty() ? "f" : "v"; const char* depth_layer = g_ActiveConfig.bStereoEFBMonoDepth ? "0.0" : "f_uv0.z"; if (!ProgramShaderCache::CompileShader(s_ColorCopyProgram, StringFromFormat(vertex_program, prefix, prefix), color_copy_program, geo_program) || !ProgramShaderCache::CompileShader(s_ColorMatrixProgram, StringFromFormat(vertex_program, prefix, prefix), color_matrix_program, geo_program) || !ProgramShaderCache::CompileShader( s_DepthMatrixProgram, StringFromFormat(vertex_program, prefix, prefix), StringFromFormat(depth_matrix_program, depth_layer), geo_program)) { return false; } s_ColorMatrixUniform = glGetUniformLocation(s_ColorMatrixProgram.glprogid, "colmat"); s_DepthMatrixUniform = glGetUniformLocation(s_DepthMatrixProgram.glprogid, "colmat"); s_ColorCbufid = UINT32_MAX; s_DepthCbufid = UINT32_MAX; s_ColorCopyPositionUniform = glGetUniformLocation(s_ColorCopyProgram.glprogid, "copy_position"); s_ColorMatrixPositionUniform = glGetUniformLocation(s_ColorMatrixProgram.glprogid, "copy_position"); s_DepthCopyPositionUniform = glGetUniformLocation(s_DepthMatrixProgram.glprogid, "copy_position"); std::string palette_shader = R"GLSL( uniform int texture_buffer_offset; uniform float multiplier; SAMPLER_BINDING(9) uniform sampler2DArray samp9; SAMPLER_BINDING(10) uniform usamplerBuffer samp10; in vec3 f_uv0; out vec4 ocol0; int Convert3To8(int v) { // Swizzle bits: 00000123 -> 12312312 return (v << 5) | (v << 2) | (v >> 1); } int Convert4To8(int v) { // Swizzle bits: 00001234 -> 12341234 return (v << 4) | v; } int Convert5To8(int v) { // Swizzle bits: 00012345 -> 12345123 return (v << 3) | (v >> 2); } int Convert6To8(int v) { // Swizzle bits: 00123456 -> 12345612 return (v << 2) | (v >> 4); } float4 DecodePixel_RGB5A3(int val) { int r,g,b,a; if ((val&0x8000) > 0) { r=Convert5To8((val>>10) & 0x1f); g=Convert5To8((val>>5 ) & 0x1f); b=Convert5To8((val ) & 0x1f); a=0xFF; } else { a=Convert3To8((val>>12) & 0x7); r=Convert4To8((val>>8 ) & 0xf); g=Convert4To8((val>>4 ) & 0xf); b=Convert4To8((val ) & 0xf); } return float4(r, g, b, a) / 255.0; } float4 DecodePixel_RGB565(int val) { int r, g, b, a; r = Convert5To8((val >> 11) & 0x1f); g = Convert6To8((val >> 5) & 0x3f); b = Convert5To8((val) & 0x1f); a = 0xFF; return float4(r, g, b, a) / 255.0; } float4 DecodePixel_IA8(int val) { int i = val & 0xFF; int a = val >> 8; return float4(i, i, i, a) / 255.0; } void main() { int src = int(round(texture(samp9, f_uv0).r * multiplier)); src = int(texelFetch(samp10, src + texture_buffer_offset).r); src = ((src << 8) & 0xFF00) | (src >> 8); ocol0 = DECODE(src); } )GLSL"; if (g_ActiveConfig.backend_info.bSupportsPaletteConversion) { if (!ProgramShaderCache::CompileShader( s_palette_pixel_shader[GX_TL_IA8], StringFromFormat(vertex_program, prefix, prefix), "#define DECODE DecodePixel_IA8" + palette_shader, geo_program)) { return false; } s_palette_buffer_offset_uniform[GX_TL_IA8] = glGetUniformLocation(s_palette_pixel_shader[GX_TL_IA8].glprogid, "texture_buffer_offset"); s_palette_multiplier_uniform[GX_TL_IA8] = glGetUniformLocation(s_palette_pixel_shader[GX_TL_IA8].glprogid, "multiplier"); s_palette_copy_position_uniform[GX_TL_IA8] = glGetUniformLocation(s_palette_pixel_shader[GX_TL_IA8].glprogid, "copy_position"); if (!ProgramShaderCache::CompileShader( s_palette_pixel_shader[GX_TL_RGB565], StringFromFormat(vertex_program, prefix, prefix), "#define DECODE DecodePixel_RGB565" + palette_shader, geo_program)) { return false; } s_palette_buffer_offset_uniform[GX_TL_RGB565] = glGetUniformLocation( s_palette_pixel_shader[GX_TL_RGB565].glprogid, "texture_buffer_offset"); s_palette_multiplier_uniform[GX_TL_RGB565] = glGetUniformLocation(s_palette_pixel_shader[GX_TL_RGB565].glprogid, "multiplier"); s_palette_copy_position_uniform[GX_TL_RGB565] = glGetUniformLocation(s_palette_pixel_shader[GX_TL_RGB565].glprogid, "copy_position"); if (!ProgramShaderCache::CompileShader( s_palette_pixel_shader[GX_TL_RGB5A3], StringFromFormat(vertex_program, prefix, prefix), "#define DECODE DecodePixel_RGB5A3" + palette_shader, geo_program)) { return false; } s_palette_buffer_offset_uniform[GX_TL_RGB5A3] = glGetUniformLocation( s_palette_pixel_shader[GX_TL_RGB5A3].glprogid, "texture_buffer_offset"); s_palette_multiplier_uniform[GX_TL_RGB5A3] = glGetUniformLocation(s_palette_pixel_shader[GX_TL_RGB5A3].glprogid, "multiplier"); s_palette_copy_position_uniform[GX_TL_RGB5A3] = glGetUniformLocation(s_palette_pixel_shader[GX_TL_RGB5A3].glprogid, "copy_position"); } return true; } void TextureCache::DeleteShaders() { s_ColorMatrixProgram.Destroy(); s_DepthMatrixProgram.Destroy(); if (g_ActiveConfig.backend_info.bSupportsPaletteConversion) for (auto& shader : s_palette_pixel_shader) shader.Destroy(); } void TextureCache::ConvertTexture(TCacheEntryBase* _entry, TCacheEntryBase* _unconverted, void* palette, TlutFormat format) { if (!g_ActiveConfig.backend_info.bSupportsPaletteConversion) return; g_renderer->ResetAPIState(); TCacheEntry* entry = (TCacheEntry*)_entry; TCacheEntry* unconverted = (TCacheEntry*)_unconverted; glActiveTexture(GL_TEXTURE9); glBindTexture(GL_TEXTURE_2D_ARRAY, unconverted->texture); g_sampler_cache->BindNearestSampler(9); FramebufferManager::SetFramebuffer(entry->framebuffer); glViewport(0, 0, entry->config.width, entry->config.height); s_palette_pixel_shader[format].Bind(); // C14 textures are currently unsupported int size = (unconverted->format & 0xf) == GX_TF_I4 ? 32 : 512; auto buffer = s_palette_stream_buffer->Map(size); memcpy(buffer.first, palette, size); s_palette_stream_buffer->Unmap(size); glUniform1i(s_palette_buffer_offset_uniform[format], buffer.second / 2); glUniform1f(s_palette_multiplier_uniform[format], (unconverted->format & 0xf) == 0 ? 15.0f : 255.0f); glUniform4f(s_palette_copy_position_uniform[format], 0.0f, 0.0f, (float)unconverted->config.width, (float)unconverted->config.height); glActiveTexture(GL_TEXTURE10); glBindTexture(GL_TEXTURE_BUFFER, s_palette_resolv_texture); g_sampler_cache->BindNearestSampler(10); OpenGL_BindAttributelessVAO(); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); FramebufferManager::SetFramebuffer(0); g_renderer->RestoreAPIState(); } static const std::string decoding_vertex_shader = R"( void main() { vec2 rawpos = vec2(gl_VertexID&1, gl_VertexID&2); gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0); } )"; void CreateTextureDecodingResources() { static const GLenum gl_view_types[TextureConversionShader::BUFFER_FORMAT_COUNT] = { GL_R8UI, // BUFFER_FORMAT_R8_UINT GL_R16UI, // BUFFER_FORMAT_R16_UINT GL_RG32UI, // BUFFER_FORMAT_R32G32_UINT }; glGenTextures(TextureConversionShader::BUFFER_FORMAT_COUNT, s_texture_decoding_buffer_views.data()); for (size_t i = 0; i < TextureConversionShader::BUFFER_FORMAT_COUNT; i++) { glBindTexture(GL_TEXTURE_BUFFER, s_texture_decoding_buffer_views[i]); glTexBuffer(GL_TEXTURE_BUFFER, gl_view_types[i], s_palette_stream_buffer->m_buffer); } } void DestroyTextureDecodingResources() { glDeleteTextures(TextureConversionShader::BUFFER_FORMAT_COUNT, s_texture_decoding_buffer_views.data()); s_texture_decoding_buffer_views.fill(0); s_texture_decoding_program_info.clear(); } bool TextureCache::SupportsGPUTextureDecode(TextureFormat format, TlutFormat palette_format) { auto key = std::make_pair(static_cast(format), static_cast(palette_format)); auto iter = s_texture_decoding_program_info.find(key); if (iter != s_texture_decoding_program_info.end()) return iter->second.valid; TextureDecodingProgramInfo info; info.base_info = TextureConversionShader::GetDecodingShaderInfo(format); if (!info.base_info) { s_texture_decoding_program_info.emplace(key, info); return false; } std::string shader_source = TextureConversionShader::GenerateDecodingShader(format, palette_format, APIType::OpenGL); if (shader_source.empty()) { s_texture_decoding_program_info.emplace(key, info); return false; } if (!ProgramShaderCache::CompileComputeShader(info.program, shader_source)) { s_texture_decoding_program_info.emplace(key, info); return false; } info.uniform_dst_size = glGetUniformLocation(info.program.glprogid, "u_dst_size"); info.uniform_src_size = glGetUniformLocation(info.program.glprogid, "u_src_size"); info.uniform_src_offset = glGetUniformLocation(info.program.glprogid, "u_src_offset"); info.uniform_src_row_stride = glGetUniformLocation(info.program.glprogid, "u_src_row_stride"); info.uniform_palette_offset = glGetUniformLocation(info.program.glprogid, "u_palette_offset"); info.valid = true; s_texture_decoding_program_info.emplace(key, info); return true; } void TextureCache::DecodeTextureOnGPU(TCacheEntryBase* entry, u32 dst_level, const u8* data, size_t data_size, TextureFormat format, u32 width, u32 height, u32 aligned_width, u32 aligned_height, u32 row_stride, const u8* palette, TlutFormat palette_format) { auto key = std::make_pair(static_cast(format), static_cast(palette_format)); auto iter = s_texture_decoding_program_info.find(key); if (iter == s_texture_decoding_program_info.end()) return; #ifdef TIME_TEXTURE_DECODING GPUTimer timer; #endif // Copy to GPU-visible buffer, aligned to the data type. auto info = iter->second; u32 bytes_per_buffer_elem = TextureConversionShader::GetBytesPerBufferElement(info.base_info->buffer_format); // Only copy palette if it is required. bool has_palette = info.base_info->palette_size > 0; u32 total_upload_size = static_cast(data_size); u32 palette_offset = total_upload_size; if (has_palette) { // Align to u16. if ((total_upload_size % sizeof(u16)) != 0) { total_upload_size++; palette_offset++; } total_upload_size += info.base_info->palette_size; } // Allocate space in stream buffer, and copy texture + palette across. auto buffer = s_palette_stream_buffer->Map(total_upload_size, bytes_per_buffer_elem); memcpy(buffer.first, data, data_size); if (has_palette) memcpy(buffer.first + palette_offset, palette, info.base_info->palette_size); s_palette_stream_buffer->Unmap(total_upload_size); info.program.Bind(); // Calculate stride in buffer elements u32 row_stride_in_elements = row_stride / bytes_per_buffer_elem; u32 offset_in_elements = buffer.second / bytes_per_buffer_elem; u32 palette_offset_in_elements = (buffer.second + palette_offset) / sizeof(u16); if (info.uniform_dst_size >= 0) glUniform2ui(info.uniform_dst_size, width, height); if (info.uniform_src_size >= 0) glUniform2ui(info.uniform_src_size, aligned_width, aligned_height); if (info.uniform_src_offset >= 0) glUniform1ui(info.uniform_src_offset, offset_in_elements); if (info.uniform_src_row_stride >= 0) glUniform1ui(info.uniform_src_row_stride, row_stride_in_elements); if (info.uniform_palette_offset >= 0) glUniform1ui(info.uniform_palette_offset, palette_offset_in_elements); glActiveTexture(GL_TEXTURE9); glBindTexture(GL_TEXTURE_BUFFER, s_texture_decoding_buffer_views[info.base_info->buffer_format]); if (has_palette) { // Use an R16UI view for the palette. glActiveTexture(GL_TEXTURE10); glBindTexture(GL_TEXTURE_BUFFER, s_palette_resolv_texture); } auto dispatch_groups = TextureConversionShader::GetDispatchCount(info.base_info, aligned_width, aligned_height); glBindImageTexture(0, static_cast(entry)->texture, dst_level, GL_TRUE, 0, GL_WRITE_ONLY, GL_RGBA8); glDispatchCompute(dispatch_groups.first, dispatch_groups.second, 1); glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT); TextureCache::SetStage(); #ifdef TIME_TEXTURE_DECODING WARN_LOG(VIDEO, "Decode texture format %u size %ux%u took %.4fms", static_cast(format), width, height, timer.GetTimeMilliseconds()); #endif } }