TextureCache: Support reinterpreting formats for VRAM textures

This commit is contained in:
Stenzek 2019-07-14 15:24:12 +10:00
parent 77f406c8a8
commit 946571b759
7 changed files with 335 additions and 6 deletions

View File

@ -1,6 +1,7 @@
#include "VideoCommon/FramebufferShaderGen.h"
#include <sstream>
#include "VideoCommon/FramebufferManager.h"
#include "VideoCommon/TextureDecoder.h"
#include "VideoCommon/VertexShaderGen.h"
namespace FramebufferShaderGen
@ -68,6 +69,26 @@ static void EmitSampleTexture(std::stringstream& ss, u32 n, const char* coords)
}
}
// Emits a texel fetch/load instruction. Assumes that "coords" is a 4-element vector, with z
// containing the layer, and w containing the mipmap level.
static void EmitTextureLoad(std::stringstream& ss, u32 n, const char* coords)
{
switch (GetAPIType())
{
case APIType::D3D:
ss << "tex" << n << ".Load(" << coords << ")";
break;
case APIType::OpenGL:
case APIType::Vulkan:
ss << "texelFetch(samp" << n << ", (" << coords << ").xyz, (" << coords << ").w)";
break;
default:
break;
}
}
static void EmitVertexMainDeclaration(std::stringstream& ss, u32 num_tex_inputs,
u32 num_color_inputs, bool position_input,
u32 num_tex_outputs, u32 num_color_outputs,
@ -133,7 +154,7 @@ static void EmitVertexMainDeclaration(std::stringstream& ss, u32 num_tex_inputs,
static void EmitPixelMainDeclaration(std::stringstream& ss, u32 num_tex_inputs,
u32 num_color_inputs, const char* output_type = "float4",
const char* extra_vars = "")
const char* extra_vars = "", bool emit_frag_coord = false)
{
switch (GetAPIType())
{
@ -144,6 +165,8 @@ static void EmitPixelMainDeclaration(std::stringstream& ss, u32 num_tex_inputs,
ss << "in float3 v_tex" << i << " : TEXCOORD" << i << ", ";
for (u32 i = 0; i < num_color_inputs; i++)
ss << "in float4 v_col" << i << " : COLOR" << i << ", ";
if (emit_frag_coord)
ss << "in float4 frag_coord : SV_Position, ";
ss << extra_vars << "out " << output_type << " ocol0 : SV_Target)\n";
}
break;
@ -170,6 +193,8 @@ static void EmitPixelMainDeclaration(std::stringstream& ss, u32 num_tex_inputs,
ss << "FRAGMENT_OUTPUT_LOCATION(0) out " << output_type << " ocol0;\n";
ss << extra_vars << "\n";
if (emit_frag_coord)
ss << "#define frag_coord gl_FragCoord\n";
ss << "void main()\n";
}
break;
@ -496,4 +521,126 @@ std::string GenerateFormatConversionShader(EFBReinterpretType convtype, u32 samp
return ss.str();
}
std::string GenerateTextureReinterpretShader(TextureFormat from_format, TextureFormat to_format)
{
std::stringstream ss;
EmitSamplerDeclarations(ss, 0, 1, false);
EmitPixelMainDeclaration(ss, 1, 0, "float4", "", true);
ss << "{\n";
ss << " int layer = int(v_tex0.z);\n";
ss << " int4 coords = int4(int2(frag_coord.xy), layer, 0);\n";
// Convert to a 32-bit value encompassing all channels, filling the most significant bits with
// zeroes.
ss << " uint raw_value;\n";
switch (from_format)
{
case TextureFormat::I8:
case TextureFormat::C8:
{
ss << " float4 temp_value = ";
EmitTextureLoad(ss, 0, "coords");
ss << ";\n";
ss << " raw_value = uint(temp_value.r * 255.0);\n";
}
break;
case TextureFormat::IA8:
{
ss << " float4 temp_value = ";
EmitTextureLoad(ss, 0, "coords");
ss << ";\n";
ss << " raw_value = uint(temp_value.r * 255.0) | (uint(temp_value.a * 255.0) << 8);\n";
}
break;
case TextureFormat::IA4:
{
ss << " float4 temp_value = ";
EmitTextureLoad(ss, 0, "coords");
ss << ";\n";
ss << " raw_value = uint(temp_value.r * 15.0) | (uint(temp_value.a * 15.0) << 4);\n";
}
break;
case TextureFormat::RGB565:
{
ss << " float4 temp_value = ";
EmitTextureLoad(ss, 0, "coords");
ss << ";\n";
ss << " raw_value = uint(temp_value.b * 31.0) | (uint(temp_value.g * 63.0) << 5) |\n";
ss << " (uint(temp_value.r * 31.0) << 11);\n";
}
break;
case TextureFormat::RGB5A3:
{
ss << " float4 temp_value = ";
EmitTextureLoad(ss, 0, "coords");
ss << ";\n";
// 0.8784 = 224 / 255 which is the maximum alpha value that can be represented in 3 bits
ss << " if (temp_value.a > 0.878f) {\n";
ss << " raw_value = (uint(temp_value.b * 31.0)) | (uint(temp_value.g * 31.0) << 5) |\n";
ss << " (uint(temp_value.r * 31.0) << 10) | 0x8000u;\n";
ss << " } else {\n";
ss << " raw_value = (uint(temp_value.b * 15.0)) | (uint(temp_value.g * 15.0) << 4) |\n";
ss << " (uint(temp_value.r * 15.0) << 8) | (uint(temp_value.a * 7.0) << 12);\n";
ss << " }\n";
}
break;
}
// Now convert it to its new representation.
switch (to_format)
{
case TextureFormat::I8:
case TextureFormat::C8:
{
ss << " ocol0.rgba = (float(raw_value & 0xFFu) / 255.0).rrrr;\n";
}
break;
case TextureFormat::IA8:
{
ss << " ocol0.rgb = (float(raw_value & 0xFFu) / 255.0).rrr;\n";
ss << " ocol0.a = float((raw_value >> 8) & 0xFFu) / 255.0;\n";
}
break;
case TextureFormat::IA4:
{
ss << " ocol0.rgb = (float(raw_value & 0xFu) / 15.0).rrr;\n";
ss << " ocol0.a = float((raw_value >> 4) & 0xFu) / 15.0;\n";
}
break;
case TextureFormat::RGB565:
{
ss << " ocol0 = float4(float((raw_value >> 10) & 0x1Fu) / 31.0\n";
ss << " float((raw_value >> 5) & 0x1Fu) / 31.0,\n";
ss << " float(raw_value & 0x1Fu) / 31.0,, 1.0);\n";
}
break;
case TextureFormat::RGB5A3:
{
ss << " if ((raw_value & 0x8000u) != 0u) {\n";
ss << " ocol0 = float4(float((raw_value >> 10) & 0x1Fu) / 31.0,\n";
ss << " float((raw_value >> 5) & 0x1Fu) / 31.0,\n";
ss << " float(raw_value & 0x1Fu) / 31.0, 1.0);\n";
ss << " } else {\n";
ss << " ocol0 = float4(float((raw_value >> 8) & 0x0Fu) / 15.0,\n";
ss << " float((raw_value >> 4) & 0x0Fu) / 15.0,\n";
ss << " float(raw_value & 0x0Fu) / 15.0,\n";
ss << " float((raw_value >> 12) & 0x07u) / 7.0);\n";
ss << " }\n";
}
break;
}
ss << "}\n";
return ss.str();
}
} // namespace FramebufferShaderGen

View File

@ -3,6 +3,7 @@
#include "VideoCommon/VideoCommon.h"
enum class EFBReinterpretType;
enum class TextureFormat;
namespace FramebufferShaderGen
{
@ -28,5 +29,6 @@ std::string GenerateClearVertexShader();
std::string GenerateEFBPokeVertexShader();
std::string GenerateColorPixelShader();
std::string GenerateFormatConversionShader(EFBReinterpretType convtype, u32 samples);
std::string GenerateTextureReinterpretShader(TextureFormat from_format, TextureFormat to_format);
} // namespace FramebufferShaderGen

View File

@ -1255,6 +1255,44 @@ const AbstractPipeline* ShaderCache::GetPaletteConversionPipeline(TLUTFormat for
return m_palette_conversion_pipelines[static_cast<size_t>(format)].get();
}
const AbstractPipeline* ShaderCache::GetTextureReinterpretPipeline(TextureFormat from_format,
TextureFormat to_format)
{
const auto key = std::make_pair(from_format, to_format);
auto iter = m_texture_reinterpret_pipelines.find(key);
if (iter != m_texture_reinterpret_pipelines.end())
return iter->second.get();
std::string shader_source =
FramebufferShaderGen::GenerateTextureReinterpretShader(from_format, to_format);
if (shader_source.empty())
{
m_texture_reinterpret_pipelines.emplace(key, nullptr);
return nullptr;
}
std::unique_ptr<AbstractShader> shader =
g_renderer->CreateShaderFromSource(ShaderStage::Pixel, shader_source);
if (!shader)
{
m_texture_reinterpret_pipelines.emplace(key, nullptr);
return nullptr;
}
AbstractPipelineConfig config;
config.vertex_format = nullptr;
config.vertex_shader = m_screen_quad_vertex_shader.get();
config.geometry_shader = nullptr;
config.pixel_shader = shader.get();
config.rasterization_state = RenderState::GetNoCullRasterizationState(PrimitiveType::Triangles);
config.depth_state = RenderState::GetNoDepthTestingDepthState();
config.blending_state = RenderState::GetNoBlendingBlendState();
config.framebuffer_state = RenderState::GetRGBA8FramebufferState();
config.usage = AbstractPipelineUsage::Utility;
auto iiter = m_texture_reinterpret_pipelines.emplace(key, g_renderer->CreatePipeline(config));
return iiter.first->second.get();
}
const AbstractShader* ShaderCache::GetTextureDecodingShader(TextureFormat format,
TLUTFormat palette_format)
{
@ -1282,5 +1320,4 @@ const AbstractShader* ShaderCache::GetTextureDecodingShader(TextureFormat format
auto iiter = m_texture_decoding_shaders.emplace(key, std::move(shader));
return iiter.first->second.get();
}
} // namespace VideoCommon

View File

@ -34,6 +34,7 @@
class NativeVertexFormat;
enum class AbstractTextureFormat : u32;
enum class TextureFormat;
enum class TLUTFormat;
namespace VideoCommon
@ -104,6 +105,10 @@ public:
// Palette texture conversion pipelines
const AbstractPipeline* GetPaletteConversionPipeline(TLUTFormat format);
// Texture reinterpret pipelines
const AbstractPipeline* GetTextureReinterpretPipeline(TextureFormat from_format,
TextureFormat to_format);
// Texture decoding compute shaders
const AbstractShader* GetTextureDecodingShader(TextureFormat format, TLUTFormat palette_format);
@ -238,6 +243,10 @@ private:
std::array<std::unique_ptr<AbstractPipeline>, NUM_PALETTE_CONVERSION_SHADERS>
m_palette_conversion_pipelines;
// Texture reinterpreting pipeline
std::map<std::pair<TextureFormat, TextureFormat>, std::unique_ptr<AbstractPipeline>>
m_texture_reinterpret_pipelines;
// Texture decoding shaders
std::map<std::pair<u32, u32>, std::unique_ptr<AbstractShader>> m_texture_decoding_shaders;
};

View File

@ -311,6 +311,44 @@ TextureCacheBase::ApplyPaletteToEntry(TCacheEntry* entry, u8* palette, TLUTForma
return decoded_entry;
}
TextureCacheBase::TCacheEntry* TextureCacheBase::ReinterpretEntry(const TCacheEntry* existing_entry,
TextureFormat new_format)
{
TextureConfig new_config = existing_entry->texture->GetConfig();
new_config.levels = 1;
new_config.flags |= AbstractTextureFlag_RenderTarget;
TCacheEntry* reinterpreted_entry = AllocateCacheEntry(new_config);
if (!reinterpreted_entry)
return nullptr;
reinterpreted_entry->SetGeneralParameters(existing_entry->addr, existing_entry->size_in_bytes,
new_format, existing_entry->should_force_safe_hashing);
reinterpreted_entry->SetDimensions(existing_entry->native_width, existing_entry->native_height,
1);
reinterpreted_entry->SetHashes(existing_entry->base_hash, existing_entry->hash);
reinterpreted_entry->frameCount = existing_entry->frameCount;
reinterpreted_entry->SetNotCopy();
reinterpreted_entry->is_efb_copy = existing_entry->is_efb_copy;
reinterpreted_entry->may_have_overlapping_textures =
existing_entry->may_have_overlapping_textures;
g_renderer->BeginUtilityDrawing();
g_renderer->SetAndDiscardFramebuffer(reinterpreted_entry->framebuffer.get());
g_renderer->SetViewportAndScissor(reinterpreted_entry->texture->GetRect());
g_renderer->SetPipeline(
g_shader_cache->GetTextureReinterpretPipeline(existing_entry->format.texfmt, new_format));
g_renderer->SetTexture(0, existing_entry->texture.get());
g_renderer->SetSamplerState(1, RenderState::GetPointSamplerState());
g_renderer->Draw(0, 3);
g_renderer->EndUtilityDrawing();
reinterpreted_entry->texture->FinishedRendering();
textures_by_address.emplace(reinterpreted_entry->addr, reinterpreted_entry);
return reinterpreted_entry;
}
void TextureCacheBase::ScaleTextureCacheEntryTo(TextureCacheBase::TCacheEntry* entry, u32 new_width,
u32 new_height)
{
@ -385,6 +423,18 @@ TextureCacheBase::DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* pale
{
if (entry->hash == entry->CalculateHash())
{
// If the texture formats are not compatible or convertible, skip it.
if (!IsCompatibleTextureFormat(entry_to_update->format.texfmt, entry->format.texfmt))
{
if (!CanReinterpretTextureOnGPU(entry_to_update->format.texfmt, entry->format.texfmt))
{
++iter.first;
continue;
}
entry = ReinterpretEntry(entry, entry_to_update->format.texfmt);
}
if (isPaletteTexture)
{
TCacheEntry* decoded_entry = ApplyPaletteToEntry(entry, palette, tlutfmt);
@ -930,6 +980,7 @@ TextureCacheBase::GetTexture(u32 address, u32 width, u32 height, const TextureFo
TexAddrCache::iterator oldest_entry = iter;
int temp_frameCount = 0x7fffffff;
TexAddrCache::iterator unconverted_copy = textures_by_address.end();
TexAddrCache::iterator unreinterpreted_copy = textures_by_address.end();
while (iter != iter_range.second)
{
@ -958,10 +1009,38 @@ TextureCacheBase::GetTexture(u32 address, u32 width, u32 height, const TextureFo
(!isPaletteTexture || g_Config.backend_info.bSupportsPaletteConversion)) ||
IsPlayingBackFifologWithBrokenEFBCopies)
{
// TODO: We should check format/width/height/levels for EFB copies. Checking
// format is complicated because EFB copy formats don't exactly match
// texture formats. I'm not sure what effect checking width/height/levels
// would have.
// The texture format in VRAM must match the format that the copy was created with. Some
// formats are inherently compatible, as the channel and bit layout is identical (e.g.
// I8/C8). Others have the same number of bits per texel, and can be reinterpreted on the
// GPU (e.g. IA4 and I8 or RGB565 and RGBA5). The only known game which reinteprets texels
// in this manner is Spiderman Shattered Dimensions, where it creates a copy in B8 format,
// and sets it up as a IA4 texture.
if (!IsCompatibleTextureFormat(entry->format.texfmt, texformat))
{
// Can we reinterpret this in VRAM?
if (CanReinterpretTextureOnGPU(entry->format.texfmt, texformat))
{
// Delay the conversion until afterwards, it's possible this texture has already been
// converted.
unreinterpreted_copy = iter++;
continue;
}
else
{
// If the EFB copies are in a different format and are not reinterpretable, use the RAM
// copy.
++iter;
continue;
}
}
else
{
// Prefer the already-converted copy.
unconverted_copy = textures_by_address.end();
}
// TODO: We should check width/height/levels for EFB copies. I'm not sure what effect
// checking width/height/levels would have.
if (!isPaletteTexture || !g_Config.backend_info.bSupportsPaletteConversion)
return entry;
@ -1010,6 +1089,18 @@ TextureCacheBase::GetTexture(u32 address, u32 width, u32 height, const TextureFo
++iter;
}
if (unreinterpreted_copy != textures_by_address.end())
{
TCacheEntry* decoded_entry = ReinterpretEntry(unreinterpreted_copy->second, texformat);
// It's possible to combine reinterpreted textures + palettes.
if (unreinterpreted_copy == unconverted_copy && decoded_entry)
decoded_entry = ApplyPaletteToEntry(decoded_entry, &texMem[tlutaddr], tlutfmt);
if (decoded_entry)
return decoded_entry;
}
if (unconverted_copy != textures_by_address.end())
{
TCacheEntry* decoded_entry =

View File

@ -277,6 +277,8 @@ private:
TCacheEntry* ApplyPaletteToEntry(TCacheEntry* entry, u8* palette, TLUTFormat tlutfmt);
TCacheEntry* ReinterpretEntry(const TCacheEntry* existing_entry, TextureFormat new_format);
TCacheEntry* DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* palette,
TLUTFormat tlutfmt);
void StitchXFBCopy(TCacheEntry* entry_to_update);

View File

@ -99,6 +99,47 @@ static inline bool IsValidTLUTFormat(TLUTFormat tlutfmt)
tlutfmt == TLUTFormat::RGB5A3;
}
static inline bool IsCompatibleTextureFormat(TextureFormat from_format, TextureFormat to_format)
{
if (from_format == to_format)
return true;
// Indexed and paletted formats are "compatible", that is do not require conversion.
switch (from_format)
{
case TextureFormat::I4:
case TextureFormat::C4:
return to_format == TextureFormat::I4 || to_format == TextureFormat::C4;
case TextureFormat::I8:
case TextureFormat::C8:
return to_format == TextureFormat::I8 || to_format == TextureFormat::C8;
default:
return false;
}
}
static inline bool CanReinterpretTextureOnGPU(TextureFormat from_format, TextureFormat to_format)
{
// Currently, we can only reinterpret textures of the same width.
switch (from_format)
{
case TextureFormat::I8:
case TextureFormat::IA4:
return to_format == TextureFormat::I8 || to_format == TextureFormat::IA4;
case TextureFormat::IA8:
case TextureFormat::RGB565:
case TextureFormat::RGB5A3:
return to_format == TextureFormat::IA8 || to_format == TextureFormat::RGB565 ||
to_format == TextureFormat::RGB5A3;
default:
return false;
}
}
int TexDecoder_GetTexelSizeInNibbles(TextureFormat format);
int TexDecoder_GetTextureSizeInBytes(int width, int height, TextureFormat format);
int TexDecoder_GetBlockWidthInTexels(TextureFormat format);