diff --git a/Source/Core/VideoCommon/FramebufferShaderGen.cpp b/Source/Core/VideoCommon/FramebufferShaderGen.cpp index dfb86d2be2..f944562d9c 100644 --- a/Source/Core/VideoCommon/FramebufferShaderGen.cpp +++ b/Source/Core/VideoCommon/FramebufferShaderGen.cpp @@ -1,6 +1,7 @@ #include "VideoCommon/FramebufferShaderGen.h" #include #include "VideoCommon/FramebufferManager.h" +#include "VideoCommon/TextureDecoder.h" #include "VideoCommon/VertexShaderGen.h" namespace FramebufferShaderGen @@ -68,6 +69,26 @@ static void EmitSampleTexture(std::stringstream& ss, u32 n, const char* coords) } } +// Emits a texel fetch/load instruction. Assumes that "coords" is a 4-element vector, with z +// containing the layer, and w containing the mipmap level. +static void EmitTextureLoad(std::stringstream& ss, u32 n, const char* coords) +{ + switch (GetAPIType()) + { + case APIType::D3D: + ss << "tex" << n << ".Load(" << coords << ")"; + break; + + case APIType::OpenGL: + case APIType::Vulkan: + ss << "texelFetch(samp" << n << ", (" << coords << ").xyz, (" << coords << ").w)"; + break; + + default: + break; + } +} + static void EmitVertexMainDeclaration(std::stringstream& ss, u32 num_tex_inputs, u32 num_color_inputs, bool position_input, u32 num_tex_outputs, u32 num_color_outputs, @@ -133,7 +154,7 @@ static void EmitVertexMainDeclaration(std::stringstream& ss, u32 num_tex_inputs, static void EmitPixelMainDeclaration(std::stringstream& ss, u32 num_tex_inputs, u32 num_color_inputs, const char* output_type = "float4", - const char* extra_vars = "") + const char* extra_vars = "", bool emit_frag_coord = false) { switch (GetAPIType()) { @@ -144,6 +165,8 @@ static void EmitPixelMainDeclaration(std::stringstream& ss, u32 num_tex_inputs, ss << "in float3 v_tex" << i << " : TEXCOORD" << i << ", "; for (u32 i = 0; i < num_color_inputs; i++) ss << "in float4 v_col" << i << " : COLOR" << i << ", "; + if (emit_frag_coord) + ss << "in float4 frag_coord : SV_Position, "; ss << extra_vars << "out " << output_type << " ocol0 : SV_Target)\n"; } break; @@ -170,6 +193,8 @@ static void EmitPixelMainDeclaration(std::stringstream& ss, u32 num_tex_inputs, ss << "FRAGMENT_OUTPUT_LOCATION(0) out " << output_type << " ocol0;\n"; ss << extra_vars << "\n"; + if (emit_frag_coord) + ss << "#define frag_coord gl_FragCoord\n"; ss << "void main()\n"; } break; @@ -496,4 +521,126 @@ std::string GenerateFormatConversionShader(EFBReinterpretType convtype, u32 samp return ss.str(); } +std::string GenerateTextureReinterpretShader(TextureFormat from_format, TextureFormat to_format) +{ + std::stringstream ss; + EmitSamplerDeclarations(ss, 0, 1, false); + EmitPixelMainDeclaration(ss, 1, 0, "float4", "", true); + ss << "{\n"; + ss << " int layer = int(v_tex0.z);\n"; + ss << " int4 coords = int4(int2(frag_coord.xy), layer, 0);\n"; + + // Convert to a 32-bit value encompassing all channels, filling the most significant bits with + // zeroes. + ss << " uint raw_value;\n"; + switch (from_format) + { + case TextureFormat::I8: + case TextureFormat::C8: + { + ss << " float4 temp_value = "; + EmitTextureLoad(ss, 0, "coords"); + ss << ";\n"; + ss << " raw_value = uint(temp_value.r * 255.0);\n"; + } + break; + + case TextureFormat::IA8: + { + ss << " float4 temp_value = "; + EmitTextureLoad(ss, 0, "coords"); + ss << ";\n"; + ss << " raw_value = uint(temp_value.r * 255.0) | (uint(temp_value.a * 255.0) << 8);\n"; + } + break; + + case TextureFormat::IA4: + { + ss << " float4 temp_value = "; + EmitTextureLoad(ss, 0, "coords"); + ss << ";\n"; + ss << " raw_value = uint(temp_value.r * 15.0) | (uint(temp_value.a * 15.0) << 4);\n"; + } + break; + + case TextureFormat::RGB565: + { + ss << " float4 temp_value = "; + EmitTextureLoad(ss, 0, "coords"); + ss << ";\n"; + ss << " raw_value = uint(temp_value.b * 31.0) | (uint(temp_value.g * 63.0) << 5) |\n"; + ss << " (uint(temp_value.r * 31.0) << 11);\n"; + } + break; + + case TextureFormat::RGB5A3: + { + ss << " float4 temp_value = "; + EmitTextureLoad(ss, 0, "coords"); + ss << ";\n"; + + // 0.8784 = 224 / 255 which is the maximum alpha value that can be represented in 3 bits + ss << " if (temp_value.a > 0.878f) {\n"; + ss << " raw_value = (uint(temp_value.b * 31.0)) | (uint(temp_value.g * 31.0) << 5) |\n"; + ss << " (uint(temp_value.r * 31.0) << 10) | 0x8000u;\n"; + ss << " } else {\n"; + ss << " raw_value = (uint(temp_value.b * 15.0)) | (uint(temp_value.g * 15.0) << 4) |\n"; + ss << " (uint(temp_value.r * 15.0) << 8) | (uint(temp_value.a * 7.0) << 12);\n"; + ss << " }\n"; + } + break; + } + + // Now convert it to its new representation. + switch (to_format) + { + case TextureFormat::I8: + case TextureFormat::C8: + { + ss << " ocol0.rgba = (float(raw_value & 0xFFu) / 255.0).rrrr;\n"; + } + break; + + case TextureFormat::IA8: + { + ss << " ocol0.rgb = (float(raw_value & 0xFFu) / 255.0).rrr;\n"; + ss << " ocol0.a = float((raw_value >> 8) & 0xFFu) / 255.0;\n"; + } + break; + + case TextureFormat::IA4: + { + ss << " ocol0.rgb = (float(raw_value & 0xFu) / 15.0).rrr;\n"; + ss << " ocol0.a = float((raw_value >> 4) & 0xFu) / 15.0;\n"; + } + break; + + case TextureFormat::RGB565: + { + ss << " ocol0 = float4(float((raw_value >> 10) & 0x1Fu) / 31.0\n"; + ss << " float((raw_value >> 5) & 0x1Fu) / 31.0,\n"; + ss << " float(raw_value & 0x1Fu) / 31.0,, 1.0);\n"; + } + break; + + case TextureFormat::RGB5A3: + { + ss << " if ((raw_value & 0x8000u) != 0u) {\n"; + ss << " ocol0 = float4(float((raw_value >> 10) & 0x1Fu) / 31.0,\n"; + ss << " float((raw_value >> 5) & 0x1Fu) / 31.0,\n"; + ss << " float(raw_value & 0x1Fu) / 31.0, 1.0);\n"; + ss << " } else {\n"; + ss << " ocol0 = float4(float((raw_value >> 8) & 0x0Fu) / 15.0,\n"; + ss << " float((raw_value >> 4) & 0x0Fu) / 15.0,\n"; + ss << " float(raw_value & 0x0Fu) / 15.0,\n"; + ss << " float((raw_value >> 12) & 0x07u) / 7.0);\n"; + ss << " }\n"; + } + break; + } + + ss << "}\n"; + return ss.str(); +} + } // namespace FramebufferShaderGen diff --git a/Source/Core/VideoCommon/FramebufferShaderGen.h b/Source/Core/VideoCommon/FramebufferShaderGen.h index 0e065521cf..b0134b5897 100644 --- a/Source/Core/VideoCommon/FramebufferShaderGen.h +++ b/Source/Core/VideoCommon/FramebufferShaderGen.h @@ -3,6 +3,7 @@ #include "VideoCommon/VideoCommon.h" enum class EFBReinterpretType; +enum class TextureFormat; namespace FramebufferShaderGen { @@ -28,5 +29,6 @@ std::string GenerateClearVertexShader(); std::string GenerateEFBPokeVertexShader(); std::string GenerateColorPixelShader(); std::string GenerateFormatConversionShader(EFBReinterpretType convtype, u32 samples); +std::string GenerateTextureReinterpretShader(TextureFormat from_format, TextureFormat to_format); } // namespace FramebufferShaderGen diff --git a/Source/Core/VideoCommon/ShaderCache.cpp b/Source/Core/VideoCommon/ShaderCache.cpp index 1f578f6bd8..3d4f286b68 100644 --- a/Source/Core/VideoCommon/ShaderCache.cpp +++ b/Source/Core/VideoCommon/ShaderCache.cpp @@ -1255,6 +1255,44 @@ const AbstractPipeline* ShaderCache::GetPaletteConversionPipeline(TLUTFormat for return m_palette_conversion_pipelines[static_cast(format)].get(); } +const AbstractPipeline* ShaderCache::GetTextureReinterpretPipeline(TextureFormat from_format, + TextureFormat to_format) +{ + const auto key = std::make_pair(from_format, to_format); + auto iter = m_texture_reinterpret_pipelines.find(key); + if (iter != m_texture_reinterpret_pipelines.end()) + return iter->second.get(); + + std::string shader_source = + FramebufferShaderGen::GenerateTextureReinterpretShader(from_format, to_format); + if (shader_source.empty()) + { + m_texture_reinterpret_pipelines.emplace(key, nullptr); + return nullptr; + } + + std::unique_ptr shader = + g_renderer->CreateShaderFromSource(ShaderStage::Pixel, shader_source); + if (!shader) + { + m_texture_reinterpret_pipelines.emplace(key, nullptr); + return nullptr; + } + + AbstractPipelineConfig config; + config.vertex_format = nullptr; + config.vertex_shader = m_screen_quad_vertex_shader.get(); + config.geometry_shader = nullptr; + config.pixel_shader = shader.get(); + config.rasterization_state = RenderState::GetNoCullRasterizationState(PrimitiveType::Triangles); + config.depth_state = RenderState::GetNoDepthTestingDepthState(); + config.blending_state = RenderState::GetNoBlendingBlendState(); + config.framebuffer_state = RenderState::GetRGBA8FramebufferState(); + config.usage = AbstractPipelineUsage::Utility; + auto iiter = m_texture_reinterpret_pipelines.emplace(key, g_renderer->CreatePipeline(config)); + return iiter.first->second.get(); +} + const AbstractShader* ShaderCache::GetTextureDecodingShader(TextureFormat format, TLUTFormat palette_format) { @@ -1282,5 +1320,4 @@ const AbstractShader* ShaderCache::GetTextureDecodingShader(TextureFormat format auto iiter = m_texture_decoding_shaders.emplace(key, std::move(shader)); return iiter.first->second.get(); } - } // namespace VideoCommon diff --git a/Source/Core/VideoCommon/ShaderCache.h b/Source/Core/VideoCommon/ShaderCache.h index 16f6ca6f4a..ffea19ee60 100644 --- a/Source/Core/VideoCommon/ShaderCache.h +++ b/Source/Core/VideoCommon/ShaderCache.h @@ -34,6 +34,7 @@ class NativeVertexFormat; enum class AbstractTextureFormat : u32; +enum class TextureFormat; enum class TLUTFormat; namespace VideoCommon @@ -104,6 +105,10 @@ public: // Palette texture conversion pipelines const AbstractPipeline* GetPaletteConversionPipeline(TLUTFormat format); + // Texture reinterpret pipelines + const AbstractPipeline* GetTextureReinterpretPipeline(TextureFormat from_format, + TextureFormat to_format); + // Texture decoding compute shaders const AbstractShader* GetTextureDecodingShader(TextureFormat format, TLUTFormat palette_format); @@ -238,6 +243,10 @@ private: std::array, NUM_PALETTE_CONVERSION_SHADERS> m_palette_conversion_pipelines; + // Texture reinterpreting pipeline + std::map, std::unique_ptr> + m_texture_reinterpret_pipelines; + // Texture decoding shaders std::map, std::unique_ptr> m_texture_decoding_shaders; }; diff --git a/Source/Core/VideoCommon/TextureCacheBase.cpp b/Source/Core/VideoCommon/TextureCacheBase.cpp index f21e8e0e0e..5fcf667514 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.cpp +++ b/Source/Core/VideoCommon/TextureCacheBase.cpp @@ -311,6 +311,44 @@ TextureCacheBase::ApplyPaletteToEntry(TCacheEntry* entry, u8* palette, TLUTForma return decoded_entry; } +TextureCacheBase::TCacheEntry* TextureCacheBase::ReinterpretEntry(const TCacheEntry* existing_entry, + TextureFormat new_format) +{ + TextureConfig new_config = existing_entry->texture->GetConfig(); + new_config.levels = 1; + new_config.flags |= AbstractTextureFlag_RenderTarget; + + TCacheEntry* reinterpreted_entry = AllocateCacheEntry(new_config); + if (!reinterpreted_entry) + return nullptr; + + reinterpreted_entry->SetGeneralParameters(existing_entry->addr, existing_entry->size_in_bytes, + new_format, existing_entry->should_force_safe_hashing); + reinterpreted_entry->SetDimensions(existing_entry->native_width, existing_entry->native_height, + 1); + reinterpreted_entry->SetHashes(existing_entry->base_hash, existing_entry->hash); + reinterpreted_entry->frameCount = existing_entry->frameCount; + reinterpreted_entry->SetNotCopy(); + reinterpreted_entry->is_efb_copy = existing_entry->is_efb_copy; + reinterpreted_entry->may_have_overlapping_textures = + existing_entry->may_have_overlapping_textures; + + g_renderer->BeginUtilityDrawing(); + g_renderer->SetAndDiscardFramebuffer(reinterpreted_entry->framebuffer.get()); + g_renderer->SetViewportAndScissor(reinterpreted_entry->texture->GetRect()); + g_renderer->SetPipeline( + g_shader_cache->GetTextureReinterpretPipeline(existing_entry->format.texfmt, new_format)); + g_renderer->SetTexture(0, existing_entry->texture.get()); + g_renderer->SetSamplerState(1, RenderState::GetPointSamplerState()); + g_renderer->Draw(0, 3); + g_renderer->EndUtilityDrawing(); + reinterpreted_entry->texture->FinishedRendering(); + + textures_by_address.emplace(reinterpreted_entry->addr, reinterpreted_entry); + + return reinterpreted_entry; +} + void TextureCacheBase::ScaleTextureCacheEntryTo(TextureCacheBase::TCacheEntry* entry, u32 new_width, u32 new_height) { @@ -385,6 +423,18 @@ TextureCacheBase::DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* pale { if (entry->hash == entry->CalculateHash()) { + // If the texture formats are not compatible or convertible, skip it. + if (!IsCompatibleTextureFormat(entry_to_update->format.texfmt, entry->format.texfmt)) + { + if (!CanReinterpretTextureOnGPU(entry_to_update->format.texfmt, entry->format.texfmt)) + { + ++iter.first; + continue; + } + + entry = ReinterpretEntry(entry, entry_to_update->format.texfmt); + } + if (isPaletteTexture) { TCacheEntry* decoded_entry = ApplyPaletteToEntry(entry, palette, tlutfmt); @@ -930,6 +980,7 @@ TextureCacheBase::GetTexture(u32 address, u32 width, u32 height, const TextureFo TexAddrCache::iterator oldest_entry = iter; int temp_frameCount = 0x7fffffff; TexAddrCache::iterator unconverted_copy = textures_by_address.end(); + TexAddrCache::iterator unreinterpreted_copy = textures_by_address.end(); while (iter != iter_range.second) { @@ -958,10 +1009,38 @@ TextureCacheBase::GetTexture(u32 address, u32 width, u32 height, const TextureFo (!isPaletteTexture || g_Config.backend_info.bSupportsPaletteConversion)) || IsPlayingBackFifologWithBrokenEFBCopies) { - // TODO: We should check format/width/height/levels for EFB copies. Checking - // format is complicated because EFB copy formats don't exactly match - // texture formats. I'm not sure what effect checking width/height/levels - // would have. + // The texture format in VRAM must match the format that the copy was created with. Some + // formats are inherently compatible, as the channel and bit layout is identical (e.g. + // I8/C8). Others have the same number of bits per texel, and can be reinterpreted on the + // GPU (e.g. IA4 and I8 or RGB565 and RGBA5). The only known game which reinteprets texels + // in this manner is Spiderman Shattered Dimensions, where it creates a copy in B8 format, + // and sets it up as a IA4 texture. + if (!IsCompatibleTextureFormat(entry->format.texfmt, texformat)) + { + // Can we reinterpret this in VRAM? + if (CanReinterpretTextureOnGPU(entry->format.texfmt, texformat)) + { + // Delay the conversion until afterwards, it's possible this texture has already been + // converted. + unreinterpreted_copy = iter++; + continue; + } + else + { + // If the EFB copies are in a different format and are not reinterpretable, use the RAM + // copy. + ++iter; + continue; + } + } + else + { + // Prefer the already-converted copy. + unconverted_copy = textures_by_address.end(); + } + + // TODO: We should check width/height/levels for EFB copies. I'm not sure what effect + // checking width/height/levels would have. if (!isPaletteTexture || !g_Config.backend_info.bSupportsPaletteConversion) return entry; @@ -1010,6 +1089,18 @@ TextureCacheBase::GetTexture(u32 address, u32 width, u32 height, const TextureFo ++iter; } + if (unreinterpreted_copy != textures_by_address.end()) + { + TCacheEntry* decoded_entry = ReinterpretEntry(unreinterpreted_copy->second, texformat); + + // It's possible to combine reinterpreted textures + palettes. + if (unreinterpreted_copy == unconverted_copy && decoded_entry) + decoded_entry = ApplyPaletteToEntry(decoded_entry, &texMem[tlutaddr], tlutfmt); + + if (decoded_entry) + return decoded_entry; + } + if (unconverted_copy != textures_by_address.end()) { TCacheEntry* decoded_entry = diff --git a/Source/Core/VideoCommon/TextureCacheBase.h b/Source/Core/VideoCommon/TextureCacheBase.h index 51c4f20af4..2f42df9a05 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.h +++ b/Source/Core/VideoCommon/TextureCacheBase.h @@ -277,6 +277,8 @@ private: TCacheEntry* ApplyPaletteToEntry(TCacheEntry* entry, u8* palette, TLUTFormat tlutfmt); + TCacheEntry* ReinterpretEntry(const TCacheEntry* existing_entry, TextureFormat new_format); + TCacheEntry* DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* palette, TLUTFormat tlutfmt); void StitchXFBCopy(TCacheEntry* entry_to_update); diff --git a/Source/Core/VideoCommon/TextureDecoder.h b/Source/Core/VideoCommon/TextureDecoder.h index a1313afab1..7a08a6514a 100644 --- a/Source/Core/VideoCommon/TextureDecoder.h +++ b/Source/Core/VideoCommon/TextureDecoder.h @@ -99,6 +99,47 @@ static inline bool IsValidTLUTFormat(TLUTFormat tlutfmt) tlutfmt == TLUTFormat::RGB5A3; } +static inline bool IsCompatibleTextureFormat(TextureFormat from_format, TextureFormat to_format) +{ + if (from_format == to_format) + return true; + + // Indexed and paletted formats are "compatible", that is do not require conversion. + switch (from_format) + { + case TextureFormat::I4: + case TextureFormat::C4: + return to_format == TextureFormat::I4 || to_format == TextureFormat::C4; + + case TextureFormat::I8: + case TextureFormat::C8: + return to_format == TextureFormat::I8 || to_format == TextureFormat::C8; + + default: + return false; + } +} + +static inline bool CanReinterpretTextureOnGPU(TextureFormat from_format, TextureFormat to_format) +{ + // Currently, we can only reinterpret textures of the same width. + switch (from_format) + { + case TextureFormat::I8: + case TextureFormat::IA4: + return to_format == TextureFormat::I8 || to_format == TextureFormat::IA4; + + case TextureFormat::IA8: + case TextureFormat::RGB565: + case TextureFormat::RGB5A3: + return to_format == TextureFormat::IA8 || to_format == TextureFormat::RGB565 || + to_format == TextureFormat::RGB5A3; + + default: + return false; + } +} + int TexDecoder_GetTexelSizeInNibbles(TextureFormat format); int TexDecoder_GetTextureSizeInBytes(int width, int height, TextureFormat format); int TexDecoder_GetBlockWidthInTexels(TextureFormat format);