mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-01-25 07:21:14 +01:00
TextureCache: Support reinterpreting formats for VRAM textures
This commit is contained in:
parent
77f406c8a8
commit
946571b759
@ -1,6 +1,7 @@
|
||||
#include "VideoCommon/FramebufferShaderGen.h"
|
||||
#include <sstream>
|
||||
#include "VideoCommon/FramebufferManager.h"
|
||||
#include "VideoCommon/TextureDecoder.h"
|
||||
#include "VideoCommon/VertexShaderGen.h"
|
||||
|
||||
namespace FramebufferShaderGen
|
||||
@ -68,6 +69,26 @@ static void EmitSampleTexture(std::stringstream& ss, u32 n, const char* coords)
|
||||
}
|
||||
}
|
||||
|
||||
// Emits a texel fetch/load instruction. Assumes that "coords" is a 4-element vector, with z
|
||||
// containing the layer, and w containing the mipmap level.
|
||||
static void EmitTextureLoad(std::stringstream& ss, u32 n, const char* coords)
|
||||
{
|
||||
switch (GetAPIType())
|
||||
{
|
||||
case APIType::D3D:
|
||||
ss << "tex" << n << ".Load(" << coords << ")";
|
||||
break;
|
||||
|
||||
case APIType::OpenGL:
|
||||
case APIType::Vulkan:
|
||||
ss << "texelFetch(samp" << n << ", (" << coords << ").xyz, (" << coords << ").w)";
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void EmitVertexMainDeclaration(std::stringstream& ss, u32 num_tex_inputs,
|
||||
u32 num_color_inputs, bool position_input,
|
||||
u32 num_tex_outputs, u32 num_color_outputs,
|
||||
@ -133,7 +154,7 @@ static void EmitVertexMainDeclaration(std::stringstream& ss, u32 num_tex_inputs,
|
||||
|
||||
static void EmitPixelMainDeclaration(std::stringstream& ss, u32 num_tex_inputs,
|
||||
u32 num_color_inputs, const char* output_type = "float4",
|
||||
const char* extra_vars = "")
|
||||
const char* extra_vars = "", bool emit_frag_coord = false)
|
||||
{
|
||||
switch (GetAPIType())
|
||||
{
|
||||
@ -144,6 +165,8 @@ static void EmitPixelMainDeclaration(std::stringstream& ss, u32 num_tex_inputs,
|
||||
ss << "in float3 v_tex" << i << " : TEXCOORD" << i << ", ";
|
||||
for (u32 i = 0; i < num_color_inputs; i++)
|
||||
ss << "in float4 v_col" << i << " : COLOR" << i << ", ";
|
||||
if (emit_frag_coord)
|
||||
ss << "in float4 frag_coord : SV_Position, ";
|
||||
ss << extra_vars << "out " << output_type << " ocol0 : SV_Target)\n";
|
||||
}
|
||||
break;
|
||||
@ -170,6 +193,8 @@ static void EmitPixelMainDeclaration(std::stringstream& ss, u32 num_tex_inputs,
|
||||
|
||||
ss << "FRAGMENT_OUTPUT_LOCATION(0) out " << output_type << " ocol0;\n";
|
||||
ss << extra_vars << "\n";
|
||||
if (emit_frag_coord)
|
||||
ss << "#define frag_coord gl_FragCoord\n";
|
||||
ss << "void main()\n";
|
||||
}
|
||||
break;
|
||||
@ -496,4 +521,126 @@ std::string GenerateFormatConversionShader(EFBReinterpretType convtype, u32 samp
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::string GenerateTextureReinterpretShader(TextureFormat from_format, TextureFormat to_format)
|
||||
{
|
||||
std::stringstream ss;
|
||||
EmitSamplerDeclarations(ss, 0, 1, false);
|
||||
EmitPixelMainDeclaration(ss, 1, 0, "float4", "", true);
|
||||
ss << "{\n";
|
||||
ss << " int layer = int(v_tex0.z);\n";
|
||||
ss << " int4 coords = int4(int2(frag_coord.xy), layer, 0);\n";
|
||||
|
||||
// Convert to a 32-bit value encompassing all channels, filling the most significant bits with
|
||||
// zeroes.
|
||||
ss << " uint raw_value;\n";
|
||||
switch (from_format)
|
||||
{
|
||||
case TextureFormat::I8:
|
||||
case TextureFormat::C8:
|
||||
{
|
||||
ss << " float4 temp_value = ";
|
||||
EmitTextureLoad(ss, 0, "coords");
|
||||
ss << ";\n";
|
||||
ss << " raw_value = uint(temp_value.r * 255.0);\n";
|
||||
}
|
||||
break;
|
||||
|
||||
case TextureFormat::IA8:
|
||||
{
|
||||
ss << " float4 temp_value = ";
|
||||
EmitTextureLoad(ss, 0, "coords");
|
||||
ss << ";\n";
|
||||
ss << " raw_value = uint(temp_value.r * 255.0) | (uint(temp_value.a * 255.0) << 8);\n";
|
||||
}
|
||||
break;
|
||||
|
||||
case TextureFormat::IA4:
|
||||
{
|
||||
ss << " float4 temp_value = ";
|
||||
EmitTextureLoad(ss, 0, "coords");
|
||||
ss << ";\n";
|
||||
ss << " raw_value = uint(temp_value.r * 15.0) | (uint(temp_value.a * 15.0) << 4);\n";
|
||||
}
|
||||
break;
|
||||
|
||||
case TextureFormat::RGB565:
|
||||
{
|
||||
ss << " float4 temp_value = ";
|
||||
EmitTextureLoad(ss, 0, "coords");
|
||||
ss << ";\n";
|
||||
ss << " raw_value = uint(temp_value.b * 31.0) | (uint(temp_value.g * 63.0) << 5) |\n";
|
||||
ss << " (uint(temp_value.r * 31.0) << 11);\n";
|
||||
}
|
||||
break;
|
||||
|
||||
case TextureFormat::RGB5A3:
|
||||
{
|
||||
ss << " float4 temp_value = ";
|
||||
EmitTextureLoad(ss, 0, "coords");
|
||||
ss << ";\n";
|
||||
|
||||
// 0.8784 = 224 / 255 which is the maximum alpha value that can be represented in 3 bits
|
||||
ss << " if (temp_value.a > 0.878f) {\n";
|
||||
ss << " raw_value = (uint(temp_value.b * 31.0)) | (uint(temp_value.g * 31.0) << 5) |\n";
|
||||
ss << " (uint(temp_value.r * 31.0) << 10) | 0x8000u;\n";
|
||||
ss << " } else {\n";
|
||||
ss << " raw_value = (uint(temp_value.b * 15.0)) | (uint(temp_value.g * 15.0) << 4) |\n";
|
||||
ss << " (uint(temp_value.r * 15.0) << 8) | (uint(temp_value.a * 7.0) << 12);\n";
|
||||
ss << " }\n";
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Now convert it to its new representation.
|
||||
switch (to_format)
|
||||
{
|
||||
case TextureFormat::I8:
|
||||
case TextureFormat::C8:
|
||||
{
|
||||
ss << " ocol0.rgba = (float(raw_value & 0xFFu) / 255.0).rrrr;\n";
|
||||
}
|
||||
break;
|
||||
|
||||
case TextureFormat::IA8:
|
||||
{
|
||||
ss << " ocol0.rgb = (float(raw_value & 0xFFu) / 255.0).rrr;\n";
|
||||
ss << " ocol0.a = float((raw_value >> 8) & 0xFFu) / 255.0;\n";
|
||||
}
|
||||
break;
|
||||
|
||||
case TextureFormat::IA4:
|
||||
{
|
||||
ss << " ocol0.rgb = (float(raw_value & 0xFu) / 15.0).rrr;\n";
|
||||
ss << " ocol0.a = float((raw_value >> 4) & 0xFu) / 15.0;\n";
|
||||
}
|
||||
break;
|
||||
|
||||
case TextureFormat::RGB565:
|
||||
{
|
||||
ss << " ocol0 = float4(float((raw_value >> 10) & 0x1Fu) / 31.0\n";
|
||||
ss << " float((raw_value >> 5) & 0x1Fu) / 31.0,\n";
|
||||
ss << " float(raw_value & 0x1Fu) / 31.0,, 1.0);\n";
|
||||
}
|
||||
break;
|
||||
|
||||
case TextureFormat::RGB5A3:
|
||||
{
|
||||
ss << " if ((raw_value & 0x8000u) != 0u) {\n";
|
||||
ss << " ocol0 = float4(float((raw_value >> 10) & 0x1Fu) / 31.0,\n";
|
||||
ss << " float((raw_value >> 5) & 0x1Fu) / 31.0,\n";
|
||||
ss << " float(raw_value & 0x1Fu) / 31.0, 1.0);\n";
|
||||
ss << " } else {\n";
|
||||
ss << " ocol0 = float4(float((raw_value >> 8) & 0x0Fu) / 15.0,\n";
|
||||
ss << " float((raw_value >> 4) & 0x0Fu) / 15.0,\n";
|
||||
ss << " float(raw_value & 0x0Fu) / 15.0,\n";
|
||||
ss << " float((raw_value >> 12) & 0x07u) / 7.0);\n";
|
||||
ss << " }\n";
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
ss << "}\n";
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
} // namespace FramebufferShaderGen
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include "VideoCommon/VideoCommon.h"
|
||||
|
||||
enum class EFBReinterpretType;
|
||||
enum class TextureFormat;
|
||||
|
||||
namespace FramebufferShaderGen
|
||||
{
|
||||
@ -28,5 +29,6 @@ std::string GenerateClearVertexShader();
|
||||
std::string GenerateEFBPokeVertexShader();
|
||||
std::string GenerateColorPixelShader();
|
||||
std::string GenerateFormatConversionShader(EFBReinterpretType convtype, u32 samples);
|
||||
std::string GenerateTextureReinterpretShader(TextureFormat from_format, TextureFormat to_format);
|
||||
|
||||
} // namespace FramebufferShaderGen
|
||||
|
@ -1255,6 +1255,44 @@ const AbstractPipeline* ShaderCache::GetPaletteConversionPipeline(TLUTFormat for
|
||||
return m_palette_conversion_pipelines[static_cast<size_t>(format)].get();
|
||||
}
|
||||
|
||||
const AbstractPipeline* ShaderCache::GetTextureReinterpretPipeline(TextureFormat from_format,
|
||||
TextureFormat to_format)
|
||||
{
|
||||
const auto key = std::make_pair(from_format, to_format);
|
||||
auto iter = m_texture_reinterpret_pipelines.find(key);
|
||||
if (iter != m_texture_reinterpret_pipelines.end())
|
||||
return iter->second.get();
|
||||
|
||||
std::string shader_source =
|
||||
FramebufferShaderGen::GenerateTextureReinterpretShader(from_format, to_format);
|
||||
if (shader_source.empty())
|
||||
{
|
||||
m_texture_reinterpret_pipelines.emplace(key, nullptr);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::unique_ptr<AbstractShader> shader =
|
||||
g_renderer->CreateShaderFromSource(ShaderStage::Pixel, shader_source);
|
||||
if (!shader)
|
||||
{
|
||||
m_texture_reinterpret_pipelines.emplace(key, nullptr);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
AbstractPipelineConfig config;
|
||||
config.vertex_format = nullptr;
|
||||
config.vertex_shader = m_screen_quad_vertex_shader.get();
|
||||
config.geometry_shader = nullptr;
|
||||
config.pixel_shader = shader.get();
|
||||
config.rasterization_state = RenderState::GetNoCullRasterizationState(PrimitiveType::Triangles);
|
||||
config.depth_state = RenderState::GetNoDepthTestingDepthState();
|
||||
config.blending_state = RenderState::GetNoBlendingBlendState();
|
||||
config.framebuffer_state = RenderState::GetRGBA8FramebufferState();
|
||||
config.usage = AbstractPipelineUsage::Utility;
|
||||
auto iiter = m_texture_reinterpret_pipelines.emplace(key, g_renderer->CreatePipeline(config));
|
||||
return iiter.first->second.get();
|
||||
}
|
||||
|
||||
const AbstractShader* ShaderCache::GetTextureDecodingShader(TextureFormat format,
|
||||
TLUTFormat palette_format)
|
||||
{
|
||||
@ -1282,5 +1320,4 @@ const AbstractShader* ShaderCache::GetTextureDecodingShader(TextureFormat format
|
||||
auto iiter = m_texture_decoding_shaders.emplace(key, std::move(shader));
|
||||
return iiter.first->second.get();
|
||||
}
|
||||
|
||||
} // namespace VideoCommon
|
||||
|
@ -34,6 +34,7 @@
|
||||
|
||||
class NativeVertexFormat;
|
||||
enum class AbstractTextureFormat : u32;
|
||||
enum class TextureFormat;
|
||||
enum class TLUTFormat;
|
||||
|
||||
namespace VideoCommon
|
||||
@ -104,6 +105,10 @@ public:
|
||||
// Palette texture conversion pipelines
|
||||
const AbstractPipeline* GetPaletteConversionPipeline(TLUTFormat format);
|
||||
|
||||
// Texture reinterpret pipelines
|
||||
const AbstractPipeline* GetTextureReinterpretPipeline(TextureFormat from_format,
|
||||
TextureFormat to_format);
|
||||
|
||||
// Texture decoding compute shaders
|
||||
const AbstractShader* GetTextureDecodingShader(TextureFormat format, TLUTFormat palette_format);
|
||||
|
||||
@ -238,6 +243,10 @@ private:
|
||||
std::array<std::unique_ptr<AbstractPipeline>, NUM_PALETTE_CONVERSION_SHADERS>
|
||||
m_palette_conversion_pipelines;
|
||||
|
||||
// Texture reinterpreting pipeline
|
||||
std::map<std::pair<TextureFormat, TextureFormat>, std::unique_ptr<AbstractPipeline>>
|
||||
m_texture_reinterpret_pipelines;
|
||||
|
||||
// Texture decoding shaders
|
||||
std::map<std::pair<u32, u32>, std::unique_ptr<AbstractShader>> m_texture_decoding_shaders;
|
||||
};
|
||||
|
@ -311,6 +311,44 @@ TextureCacheBase::ApplyPaletteToEntry(TCacheEntry* entry, u8* palette, TLUTForma
|
||||
return decoded_entry;
|
||||
}
|
||||
|
||||
TextureCacheBase::TCacheEntry* TextureCacheBase::ReinterpretEntry(const TCacheEntry* existing_entry,
|
||||
TextureFormat new_format)
|
||||
{
|
||||
TextureConfig new_config = existing_entry->texture->GetConfig();
|
||||
new_config.levels = 1;
|
||||
new_config.flags |= AbstractTextureFlag_RenderTarget;
|
||||
|
||||
TCacheEntry* reinterpreted_entry = AllocateCacheEntry(new_config);
|
||||
if (!reinterpreted_entry)
|
||||
return nullptr;
|
||||
|
||||
reinterpreted_entry->SetGeneralParameters(existing_entry->addr, existing_entry->size_in_bytes,
|
||||
new_format, existing_entry->should_force_safe_hashing);
|
||||
reinterpreted_entry->SetDimensions(existing_entry->native_width, existing_entry->native_height,
|
||||
1);
|
||||
reinterpreted_entry->SetHashes(existing_entry->base_hash, existing_entry->hash);
|
||||
reinterpreted_entry->frameCount = existing_entry->frameCount;
|
||||
reinterpreted_entry->SetNotCopy();
|
||||
reinterpreted_entry->is_efb_copy = existing_entry->is_efb_copy;
|
||||
reinterpreted_entry->may_have_overlapping_textures =
|
||||
existing_entry->may_have_overlapping_textures;
|
||||
|
||||
g_renderer->BeginUtilityDrawing();
|
||||
g_renderer->SetAndDiscardFramebuffer(reinterpreted_entry->framebuffer.get());
|
||||
g_renderer->SetViewportAndScissor(reinterpreted_entry->texture->GetRect());
|
||||
g_renderer->SetPipeline(
|
||||
g_shader_cache->GetTextureReinterpretPipeline(existing_entry->format.texfmt, new_format));
|
||||
g_renderer->SetTexture(0, existing_entry->texture.get());
|
||||
g_renderer->SetSamplerState(1, RenderState::GetPointSamplerState());
|
||||
g_renderer->Draw(0, 3);
|
||||
g_renderer->EndUtilityDrawing();
|
||||
reinterpreted_entry->texture->FinishedRendering();
|
||||
|
||||
textures_by_address.emplace(reinterpreted_entry->addr, reinterpreted_entry);
|
||||
|
||||
return reinterpreted_entry;
|
||||
}
|
||||
|
||||
void TextureCacheBase::ScaleTextureCacheEntryTo(TextureCacheBase::TCacheEntry* entry, u32 new_width,
|
||||
u32 new_height)
|
||||
{
|
||||
@ -385,6 +423,18 @@ TextureCacheBase::DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* pale
|
||||
{
|
||||
if (entry->hash == entry->CalculateHash())
|
||||
{
|
||||
// If the texture formats are not compatible or convertible, skip it.
|
||||
if (!IsCompatibleTextureFormat(entry_to_update->format.texfmt, entry->format.texfmt))
|
||||
{
|
||||
if (!CanReinterpretTextureOnGPU(entry_to_update->format.texfmt, entry->format.texfmt))
|
||||
{
|
||||
++iter.first;
|
||||
continue;
|
||||
}
|
||||
|
||||
entry = ReinterpretEntry(entry, entry_to_update->format.texfmt);
|
||||
}
|
||||
|
||||
if (isPaletteTexture)
|
||||
{
|
||||
TCacheEntry* decoded_entry = ApplyPaletteToEntry(entry, palette, tlutfmt);
|
||||
@ -930,6 +980,7 @@ TextureCacheBase::GetTexture(u32 address, u32 width, u32 height, const TextureFo
|
||||
TexAddrCache::iterator oldest_entry = iter;
|
||||
int temp_frameCount = 0x7fffffff;
|
||||
TexAddrCache::iterator unconverted_copy = textures_by_address.end();
|
||||
TexAddrCache::iterator unreinterpreted_copy = textures_by_address.end();
|
||||
|
||||
while (iter != iter_range.second)
|
||||
{
|
||||
@ -958,10 +1009,38 @@ TextureCacheBase::GetTexture(u32 address, u32 width, u32 height, const TextureFo
|
||||
(!isPaletteTexture || g_Config.backend_info.bSupportsPaletteConversion)) ||
|
||||
IsPlayingBackFifologWithBrokenEFBCopies)
|
||||
{
|
||||
// TODO: We should check format/width/height/levels for EFB copies. Checking
|
||||
// format is complicated because EFB copy formats don't exactly match
|
||||
// texture formats. I'm not sure what effect checking width/height/levels
|
||||
// would have.
|
||||
// The texture format in VRAM must match the format that the copy was created with. Some
|
||||
// formats are inherently compatible, as the channel and bit layout is identical (e.g.
|
||||
// I8/C8). Others have the same number of bits per texel, and can be reinterpreted on the
|
||||
// GPU (e.g. IA4 and I8 or RGB565 and RGBA5). The only known game which reinteprets texels
|
||||
// in this manner is Spiderman Shattered Dimensions, where it creates a copy in B8 format,
|
||||
// and sets it up as a IA4 texture.
|
||||
if (!IsCompatibleTextureFormat(entry->format.texfmt, texformat))
|
||||
{
|
||||
// Can we reinterpret this in VRAM?
|
||||
if (CanReinterpretTextureOnGPU(entry->format.texfmt, texformat))
|
||||
{
|
||||
// Delay the conversion until afterwards, it's possible this texture has already been
|
||||
// converted.
|
||||
unreinterpreted_copy = iter++;
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
// If the EFB copies are in a different format and are not reinterpretable, use the RAM
|
||||
// copy.
|
||||
++iter;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Prefer the already-converted copy.
|
||||
unconverted_copy = textures_by_address.end();
|
||||
}
|
||||
|
||||
// TODO: We should check width/height/levels for EFB copies. I'm not sure what effect
|
||||
// checking width/height/levels would have.
|
||||
if (!isPaletteTexture || !g_Config.backend_info.bSupportsPaletteConversion)
|
||||
return entry;
|
||||
|
||||
@ -1010,6 +1089,18 @@ TextureCacheBase::GetTexture(u32 address, u32 width, u32 height, const TextureFo
|
||||
++iter;
|
||||
}
|
||||
|
||||
if (unreinterpreted_copy != textures_by_address.end())
|
||||
{
|
||||
TCacheEntry* decoded_entry = ReinterpretEntry(unreinterpreted_copy->second, texformat);
|
||||
|
||||
// It's possible to combine reinterpreted textures + palettes.
|
||||
if (unreinterpreted_copy == unconverted_copy && decoded_entry)
|
||||
decoded_entry = ApplyPaletteToEntry(decoded_entry, &texMem[tlutaddr], tlutfmt);
|
||||
|
||||
if (decoded_entry)
|
||||
return decoded_entry;
|
||||
}
|
||||
|
||||
if (unconverted_copy != textures_by_address.end())
|
||||
{
|
||||
TCacheEntry* decoded_entry =
|
||||
|
@ -277,6 +277,8 @@ private:
|
||||
|
||||
TCacheEntry* ApplyPaletteToEntry(TCacheEntry* entry, u8* palette, TLUTFormat tlutfmt);
|
||||
|
||||
TCacheEntry* ReinterpretEntry(const TCacheEntry* existing_entry, TextureFormat new_format);
|
||||
|
||||
TCacheEntry* DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* palette,
|
||||
TLUTFormat tlutfmt);
|
||||
void StitchXFBCopy(TCacheEntry* entry_to_update);
|
||||
|
@ -99,6 +99,47 @@ static inline bool IsValidTLUTFormat(TLUTFormat tlutfmt)
|
||||
tlutfmt == TLUTFormat::RGB5A3;
|
||||
}
|
||||
|
||||
static inline bool IsCompatibleTextureFormat(TextureFormat from_format, TextureFormat to_format)
|
||||
{
|
||||
if (from_format == to_format)
|
||||
return true;
|
||||
|
||||
// Indexed and paletted formats are "compatible", that is do not require conversion.
|
||||
switch (from_format)
|
||||
{
|
||||
case TextureFormat::I4:
|
||||
case TextureFormat::C4:
|
||||
return to_format == TextureFormat::I4 || to_format == TextureFormat::C4;
|
||||
|
||||
case TextureFormat::I8:
|
||||
case TextureFormat::C8:
|
||||
return to_format == TextureFormat::I8 || to_format == TextureFormat::C8;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool CanReinterpretTextureOnGPU(TextureFormat from_format, TextureFormat to_format)
|
||||
{
|
||||
// Currently, we can only reinterpret textures of the same width.
|
||||
switch (from_format)
|
||||
{
|
||||
case TextureFormat::I8:
|
||||
case TextureFormat::IA4:
|
||||
return to_format == TextureFormat::I8 || to_format == TextureFormat::IA4;
|
||||
|
||||
case TextureFormat::IA8:
|
||||
case TextureFormat::RGB565:
|
||||
case TextureFormat::RGB5A3:
|
||||
return to_format == TextureFormat::IA8 || to_format == TextureFormat::RGB565 ||
|
||||
to_format == TextureFormat::RGB5A3;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
int TexDecoder_GetTexelSizeInNibbles(TextureFormat format);
|
||||
int TexDecoder_GetTextureSizeInBytes(int width, int height, TextureFormat format);
|
||||
int TexDecoder_GetBlockWidthInTexels(TextureFormat format);
|
||||
|
Loading…
x
Reference in New Issue
Block a user