// Copyright 2008 Dolphin Emulator Project // Licensed under GPLv2+ // Refer to the license.txt file included. #include #include #include #include #include #include #include "Common/Assert.h" #include "Common/GL/GLInterfaceBase.h" #include "Common/MsgHandler.h" #include "Common/StringUtil.h" #include "VideoBackends/OGL/FramebufferManager.h" #include "VideoBackends/OGL/GPUTimer.h" #include "VideoBackends/OGL/OGLTexture.h" #include "VideoBackends/OGL/ProgramShaderCache.h" #include "VideoBackends/OGL/Render.h" #include "VideoBackends/OGL/SamplerCache.h" #include "VideoBackends/OGL/StreamBuffer.h" #include "VideoBackends/OGL/TextureCache.h" #include "VideoBackends/OGL/TextureConverter.h" #include "VideoCommon/ImageWrite.h" #include "VideoCommon/TextureConversionShader.h" #include "VideoCommon/TextureConverterShaderGen.h" #include "VideoCommon/TextureDecoder.h" #include "VideoCommon/VideoCommon.h" #include "VideoCommon/VideoConfig.h" namespace OGL { constexpr const char GLSL_PROGRAM_VS[] = R"GLSL( out vec3 %c_uv0; SAMPLER_BINDING(9) uniform sampler2DArray samp9; uniform vec4 copy_position; // left, top, right, bottom void main() { vec2 rawpos = vec2(gl_VertexID&1, gl_VertexID&2); %c_uv0 = vec3(mix(copy_position.xy, copy_position.zw, rawpos) / vec2(textureSize(samp9, 0).xy), 0.0); gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0); } )GLSL"; constexpr const char GLSL_PROGRAM_GS[] = R"GLSL( layout(triangles) in; layout(triangle_strip, max_vertices = 6) out; in vec3 v_uv0[3]; out vec3 f_uv0; SAMPLER_BINDING(9) uniform sampler2DArray samp9; void main() { int layers = textureSize(samp9, 0).z; for (int layer = 0; layer < layers; ++layer) { for (int i = 0; i < 3; ++i) { f_uv0 = vec3(v_uv0[i].xy, layer); gl_Position = gl_in[i].gl_Position; gl_Layer = layer; EmitVertex(); } EndPrimitive(); } )GLSL"; constexpr const char GLSL_COLOR_COPY_FS[] = R"GLSL( SAMPLER_BINDING(9) uniform sampler2DArray samp9; in vec3 f_uv0; out vec4 ocol0; void main() { vec4 texcol = texture(samp9, f_uv0); ocol0 = texcol; } )GLSL"; constexpr const char GLSL_PALETTE_FS[] = R"GLSL( uniform int texture_buffer_offset; uniform float multiplier; SAMPLER_BINDING(9) uniform sampler2DArray samp9; SAMPLER_BINDING(10) uniform usamplerBuffer samp10; in vec3 f_uv0; out vec4 ocol0; int Convert3To8(int v) { // Swizzle bits: 00000123 -> 12312312 return (v << 5) | (v << 2) | (v >> 1); } int Convert4To8(int v) { // Swizzle bits: 00001234 -> 12341234 return (v << 4) | v; } int Convert5To8(int v) { // Swizzle bits: 00012345 -> 12345123 return (v << 3) | (v >> 2); } int Convert6To8(int v) { // Swizzle bits: 00123456 -> 12345612 return (v << 2) | (v >> 4); } float4 DecodePixel_RGB5A3(int val) { int r,g,b,a; if ((val&0x8000) > 0) { r=Convert5To8((val>>10) & 0x1f); g=Convert5To8((val>>5 ) & 0x1f); b=Convert5To8((val ) & 0x1f); a=0xFF; } else { a=Convert3To8((val>>12) & 0x7); r=Convert4To8((val>>8 ) & 0xf); g=Convert4To8((val>>4 ) & 0xf); b=Convert4To8((val ) & 0xf); } return float4(r, g, b, a) / 255.0; } float4 DecodePixel_RGB565(int val) { int r, g, b, a; r = Convert5To8((val >> 11) & 0x1f); g = Convert6To8((val >> 5) & 0x3f); b = Convert5To8((val) & 0x1f); a = 0xFF; return float4(r, g, b, a) / 255.0; } float4 DecodePixel_IA8(int val) { int i = val & 0xFF; int a = val >> 8; return float4(i, i, i, a) / 255.0; } void main() { int src = int(round(texture(samp9, f_uv0).r * multiplier)); src = int(texelFetch(samp10, src + texture_buffer_offset).r); src = ((src << 8) & 0xFF00) | (src >> 8); ocol0 = DecodePixel_%s(src); } )GLSL"; //#define TIME_TEXTURE_DECODING 1 void TextureCache::CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom, const CopyFilterCoefficientArray& filter_coefficients) { // Flip top/bottom due to lower-left coordinate system. float clamp_top_val = clamp_bottom ? (1.0f - src_rect.bottom / static_cast(EFB_HEIGHT)) : 0.0f; float clamp_bottom_val = clamp_top ? (1.0f - src_rect.top / static_cast(EFB_HEIGHT)) : 1.0f; TextureConverter::EncodeToRamFromTexture(dst, params, native_width, bytes_per_row, num_blocks_y, memory_stride, src_rect, scale_by_half, y_scale, gamma, clamp_top_val, clamp_bottom_val, filter_coefficients); } TextureCache::TextureCache() { CompileShaders(); if (g_ActiveConfig.backend_info.bSupportsPaletteConversion) { s32 buffer_size_mb = (g_ActiveConfig.backend_info.bSupportsGPUTextureDecoding ? 32 : 1); s32 buffer_size = buffer_size_mb * 1024 * 1024; s32 max_buffer_size = 0; // The minimum MAX_TEXTURE_BUFFER_SIZE that the spec mandates is 65KB, we are asking for a 1MB // buffer here. This buffer is also used as storage for undecoded textures when compute shader // texture decoding is enabled, in which case the requested size is 32MB. glGetIntegerv(GL_MAX_TEXTURE_BUFFER_SIZE, &max_buffer_size); // Clamp the buffer size to the maximum size that the driver supports. buffer_size = std::min(buffer_size, max_buffer_size); m_palette_stream_buffer = StreamBuffer::Create(GL_TEXTURE_BUFFER, buffer_size); glGenTextures(1, &m_palette_resolv_texture); glBindTexture(GL_TEXTURE_BUFFER, m_palette_resolv_texture); glTexBuffer(GL_TEXTURE_BUFFER, GL_R16UI, m_palette_stream_buffer->m_buffer); if (g_ActiveConfig.backend_info.bSupportsGPUTextureDecoding) CreateTextureDecodingResources(); } } TextureCache::~TextureCache() { DeleteShaders(); if (g_ActiveConfig.backend_info.bSupportsGPUTextureDecoding) DestroyTextureDecodingResources(); if (g_ActiveConfig.backend_info.bSupportsPaletteConversion) { glDeleteTextures(1, &m_palette_resolv_texture); } } TextureCache* TextureCache::GetInstance() { return static_cast(g_texture_cache.get()); } const SHADER& TextureCache::GetColorCopyProgram() const { return m_colorCopyProgram; } GLuint TextureCache::GetColorCopyPositionUniform() const { return m_colorCopyPositionUniform; } bool TextureCache::CompilePaletteShader(TLUTFormat tlutfmt, const std::string& vcode, const std::string& pcode, const std::string& gcode) { ASSERT(IsValidTLUTFormat(tlutfmt)); PaletteShader& shader = m_palette_shaders[static_cast(tlutfmt)]; if (!ProgramShaderCache::CompileShader(shader.shader, vcode, pcode, gcode)) return false; shader.buffer_offset_uniform = glGetUniformLocation(shader.shader.glprogid, "texture_buffer_offset"); shader.multiplier_uniform = glGetUniformLocation(shader.shader.glprogid, "multiplier"); shader.copy_position_uniform = glGetUniformLocation(shader.shader.glprogid, "copy_position"); return true; } bool TextureCache::CompileShaders() { std::string geo_program = ""; char prefix = 'f'; if (g_ActiveConfig.stereo_mode != StereoMode::Off) { geo_program = GLSL_PROGRAM_GS; prefix = 'v'; } if (!ProgramShaderCache::CompileShader(m_colorCopyProgram, StringFromFormat(GLSL_PROGRAM_VS, prefix, prefix), GLSL_COLOR_COPY_FS, geo_program)) { return false; } m_colorCopyPositionUniform = glGetUniformLocation(m_colorCopyProgram.glprogid, "copy_position"); if (g_ActiveConfig.backend_info.bSupportsPaletteConversion) { if (!CompilePaletteShader(TLUTFormat::IA8, StringFromFormat(GLSL_PROGRAM_VS, prefix, prefix), StringFromFormat(GLSL_PALETTE_FS, "IA8"), geo_program)) return false; if (!CompilePaletteShader(TLUTFormat::RGB565, StringFromFormat(GLSL_PROGRAM_VS, prefix, prefix), StringFromFormat(GLSL_PALETTE_FS, "RGB565"), geo_program)) return false; if (!CompilePaletteShader(TLUTFormat::RGB5A3, StringFromFormat(GLSL_PROGRAM_VS, prefix, prefix), StringFromFormat(GLSL_PALETTE_FS, "RGB5A3"), geo_program)) return false; } return true; } void TextureCache::DeleteShaders() { for (auto& it : m_efb_copy_programs) it.second.shader.Destroy(); m_efb_copy_programs.clear(); if (g_ActiveConfig.backend_info.bSupportsPaletteConversion) for (auto& shader : m_palette_shaders) shader.shader.Destroy(); } void TextureCache::ConvertTexture(TCacheEntry* destination, TCacheEntry* source, const void* palette, TLUTFormat tlutfmt) { if (!g_ActiveConfig.backend_info.bSupportsPaletteConversion) return; ASSERT(IsValidTLUTFormat(tlutfmt)); const PaletteShader& palette_shader = m_palette_shaders[static_cast(tlutfmt)]; g_renderer->ResetAPIState(); OGLTexture* source_texture = static_cast(source->texture.get()); OGLTexture* destination_texture = static_cast(destination->texture.get()); glActiveTexture(GL_TEXTURE9); glBindTexture(GL_TEXTURE_2D_ARRAY, source_texture->GetRawTexIdentifier()); g_sampler_cache->BindNearestSampler(9); FramebufferManager::SetFramebuffer(destination_texture->GetFramebuffer()); glViewport(0, 0, destination->GetWidth(), destination->GetHeight()); palette_shader.shader.Bind(); // C14 textures are currently unsupported int size = source->format == TextureFormat::I4 ? 32 : 512; auto buffer = m_palette_stream_buffer->Map(size); memcpy(buffer.first, palette, size); m_palette_stream_buffer->Unmap(size); glUniform1i(palette_shader.buffer_offset_uniform, buffer.second / 2); glUniform1f(palette_shader.multiplier_uniform, source->format == TextureFormat::I4 ? 15.0f : 255.0f); glUniform4f(palette_shader.copy_position_uniform, 0.0f, 0.0f, static_cast(source->GetWidth()), static_cast(source->GetHeight())); glActiveTexture(GL_TEXTURE10); glBindTexture(GL_TEXTURE_BUFFER, m_palette_resolv_texture); g_sampler_cache->BindNearestSampler(10); ProgramShaderCache::BindVertexFormat(nullptr); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); g_renderer->RestoreAPIState(); } static const std::string decoding_vertex_shader = R"( void main() { vec2 rawpos = vec2(gl_VertexID&1, gl_VertexID&2); gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0); } )"; void TextureCache::CreateTextureDecodingResources() { static const GLenum gl_view_types[TextureConversionShaderTiled::BUFFER_FORMAT_COUNT] = { GL_R8UI, // BUFFER_FORMAT_R8_UINT GL_R16UI, // BUFFER_FORMAT_R16_UINT GL_RG32UI, // BUFFER_FORMAT_R32G32_UINT GL_RGBA8UI, // BUFFER_FORMAT_RGBA8_UINT }; glGenTextures(TextureConversionShaderTiled::BUFFER_FORMAT_COUNT, m_texture_decoding_buffer_views.data()); for (size_t i = 0; i < TextureConversionShaderTiled::BUFFER_FORMAT_COUNT; i++) { glBindTexture(GL_TEXTURE_BUFFER, m_texture_decoding_buffer_views[i]); glTexBuffer(GL_TEXTURE_BUFFER, gl_view_types[i], m_palette_stream_buffer->m_buffer); } } void TextureCache::DestroyTextureDecodingResources() { glDeleteTextures(TextureConversionShaderTiled::BUFFER_FORMAT_COUNT, m_texture_decoding_buffer_views.data()); m_texture_decoding_buffer_views.fill(0); m_texture_decoding_program_info.clear(); } bool TextureCache::SupportsGPUTextureDecode(TextureFormat format, TLUTFormat palette_format) { auto key = std::make_pair(static_cast(format), static_cast(palette_format)); auto iter = m_texture_decoding_program_info.find(key); if (iter != m_texture_decoding_program_info.end()) return iter->second.valid; TextureDecodingProgramInfo info; info.base_info = TextureConversionShaderTiled::GetDecodingShaderInfo(format); if (!info.base_info) { m_texture_decoding_program_info.emplace(key, info); return false; } std::string shader_source = TextureConversionShaderTiled::GenerateDecodingShader(format, palette_format, APIType::OpenGL); if (shader_source.empty()) { m_texture_decoding_program_info.emplace(key, info); return false; } if (!ProgramShaderCache::CompileComputeShader(info.program, shader_source)) { m_texture_decoding_program_info.emplace(key, info); return false; } info.uniform_dst_size = glGetUniformLocation(info.program.glprogid, "u_dst_size"); info.uniform_src_size = glGetUniformLocation(info.program.glprogid, "u_src_size"); info.uniform_src_offset = glGetUniformLocation(info.program.glprogid, "u_src_offset"); info.uniform_src_row_stride = glGetUniformLocation(info.program.glprogid, "u_src_row_stride"); info.uniform_palette_offset = glGetUniformLocation(info.program.glprogid, "u_palette_offset"); info.valid = true; m_texture_decoding_program_info.emplace(key, info); return true; } void TextureCache::DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, const u8* data, size_t data_size, TextureFormat format, u32 width, u32 height, u32 aligned_width, u32 aligned_height, u32 row_stride, const u8* palette, TLUTFormat palette_format) { auto key = std::make_pair(static_cast(format), static_cast(palette_format)); auto iter = m_texture_decoding_program_info.find(key); if (iter == m_texture_decoding_program_info.end()) return; #ifdef TIME_TEXTURE_DECODING GPUTimer timer; #endif // Copy to GPU-visible buffer, aligned to the data type. auto info = iter->second; u32 bytes_per_buffer_elem = TextureConversionShaderTiled::GetBytesPerBufferElement(info.base_info->buffer_format); // Only copy palette if it is required. bool has_palette = info.base_info->palette_size > 0; u32 total_upload_size = static_cast(data_size); u32 palette_offset = total_upload_size; if (has_palette) { // Align to u16. if ((total_upload_size % sizeof(u16)) != 0) { total_upload_size++; palette_offset++; } total_upload_size += info.base_info->palette_size; } // Allocate space in stream buffer, and copy texture + palette across. auto buffer = m_palette_stream_buffer->Map(total_upload_size, bytes_per_buffer_elem); memcpy(buffer.first, data, data_size); if (has_palette) memcpy(buffer.first + palette_offset, palette, info.base_info->palette_size); m_palette_stream_buffer->Unmap(total_upload_size); info.program.Bind(); // Calculate stride in buffer elements u32 row_stride_in_elements = row_stride / bytes_per_buffer_elem; u32 offset_in_elements = buffer.second / bytes_per_buffer_elem; u32 palette_offset_in_elements = (buffer.second + palette_offset) / sizeof(u16); if (info.uniform_dst_size >= 0) glUniform2ui(info.uniform_dst_size, width, height); if (info.uniform_src_size >= 0) glUniform2ui(info.uniform_src_size, aligned_width, aligned_height); if (info.uniform_src_offset >= 0) glUniform1ui(info.uniform_src_offset, offset_in_elements); if (info.uniform_src_row_stride >= 0) glUniform1ui(info.uniform_src_row_stride, row_stride_in_elements); if (info.uniform_palette_offset >= 0) glUniform1ui(info.uniform_palette_offset, palette_offset_in_elements); glActiveTexture(GL_TEXTURE9); glBindTexture(GL_TEXTURE_BUFFER, m_texture_decoding_buffer_views[info.base_info->buffer_format]); if (has_palette) { // Use an R16UI view for the palette. glActiveTexture(GL_TEXTURE10); glBindTexture(GL_TEXTURE_BUFFER, m_palette_resolv_texture); } auto dispatch_groups = TextureConversionShaderTiled::GetDispatchCount(info.base_info, aligned_width, aligned_height); glBindImageTexture(0, static_cast(entry->texture.get())->GetRawTexIdentifier(), dst_level, GL_TRUE, 0, GL_WRITE_ONLY, GL_RGBA8); glDispatchCompute(dispatch_groups.first, dispatch_groups.second, 1); glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT); #ifdef TIME_TEXTURE_DECODING WARN_LOG(VIDEO, "Decode texture format %u size %ux%u took %.4fms", static_cast(format), width, height, timer.GetTimeMilliseconds()); #endif } void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect, bool scale_by_half, EFBCopyFormat dst_format, bool is_intensity, float gamma, bool clamp_top, bool clamp_bottom, const CopyFilterCoefficientArray& filter_coefficients) { auto* destination_texture = static_cast(entry->texture.get()); g_renderer->ResetAPIState(); // reset any game specific settings // Make sure to resolve anything we need to read from. const GLuint read_texture = is_depth_copy ? FramebufferManager::ResolveAndGetDepthTarget(src_rect) : FramebufferManager::ResolveAndGetRenderTarget(src_rect); FramebufferManager::SetFramebuffer(destination_texture->GetFramebuffer()); glActiveTexture(GL_TEXTURE9); glBindTexture(GL_TEXTURE_2D_ARRAY, read_texture); if (scale_by_half) g_sampler_cache->BindLinearSampler(9); else g_sampler_cache->BindNearestSampler(9); glViewport(0, 0, destination_texture->GetConfig().width, destination_texture->GetConfig().height); auto uid = TextureConversionShaderGen::GetShaderUid(dst_format, is_depth_copy, is_intensity, scale_by_half, NeedsCopyFilterInShader(filter_coefficients)); auto it = m_efb_copy_programs.emplace(uid, EFBCopyShader()); EFBCopyShader& shader = it.first->second; bool created = it.second; if (created) { ShaderCode code = TextureConversionShaderGen::GenerateShader(APIType::OpenGL, uid.GetUidData()); std::string geo_program = ""; char prefix = 'f'; if (g_ActiveConfig.stereo_mode != StereoMode::Off) { geo_program = GLSL_PROGRAM_GS; prefix = 'v'; } ProgramShaderCache::CompileShader(shader.shader, StringFromFormat(GLSL_PROGRAM_VS, prefix, prefix), code.GetBuffer(), geo_program); shader.position_uniform = glGetUniformLocation(shader.shader.glprogid, "copy_position"); shader.pixel_height_uniform = glGetUniformLocation(shader.shader.glprogid, "pixel_height"); shader.gamma_rcp_uniform = glGetUniformLocation(shader.shader.glprogid, "gamma_rcp"); shader.clamp_tb_uniform = glGetUniformLocation(shader.shader.glprogid, "clamp_tb"); shader.filter_coefficients_uniform = glGetUniformLocation(shader.shader.glprogid, "filter_coefficients"); } shader.shader.Bind(); TargetRectangle R = g_renderer->ConvertEFBRectangle(src_rect); glUniform4f(shader.position_uniform, static_cast(R.left), static_cast(R.top), static_cast(R.right), static_cast(R.bottom)); glUniform1f(shader.pixel_height_uniform, g_ActiveConfig.bCopyEFBScaled ? 1.0f / g_renderer->GetTargetHeight() : 1.0f / EFB_HEIGHT); glUniform1f(shader.gamma_rcp_uniform, 1.0f / gamma); glUniform2f(shader.clamp_tb_uniform, clamp_bottom ? (1.0f - src_rect.bottom / static_cast(EFB_HEIGHT)) : 0.0f, clamp_top ? (1.0f - src_rect.top / static_cast(EFB_HEIGHT)) : 1.0f); glUniform3f(shader.filter_coefficients_uniform, filter_coefficients[0], filter_coefficients[1], filter_coefficients[2]); ProgramShaderCache::BindVertexFormat(nullptr); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); g_renderer->RestoreAPIState(); } } // namespace OGL