mirror of
synced 2025-03-21 05:35:05 +01:00
Fixed fog, lava, water in Mario Galaxy (and possibly other games) in higher IRs by detecting & fixing the mipmaps that aren't just used for downscaling, but are handmade to create a gradient.
This commit is contained in:
@ -299,7 +299,7 @@ ID3D11SamplerState* StateCache::Get(SamplerState state)
sampdc.AddressV = address_modes[static_cast<u32>(state.wrap_v.Value())];
sampdc.MaxLOD = state.max_lod / 16.f;
sampdc.MinLOD = state.min_lod / 16.f;
sampdc.MipLODBias = (s32)state.lod_bias / 32.0f;
sampdc.MipLODBias = (s32)state.lod_bias / 256.f;
if (state.anisotropic_filtering)
@ -40,7 +40,7 @@ private:
std::unordered_map<u32, ID3D11DepthStencilState*> m_depth;
std::unordered_map<u32, ID3D11RasterizerState*> m_raster;
std::unordered_map<u32, ID3D11BlendState*> m_blend;
std::unordered_map<u32, ID3D11SamplerState*> m_sampler;
std::unordered_map<SamplerState::StorageType, ID3D11SamplerState*> m_sampler;
namespace D3D
@ -100,7 +100,7 @@ void SamplerCache::SetParameters(GLuint sampler_id, const SamplerState& params)
glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, params.max_lod / 16.f);
if (GLInterface->GetMode() == GLInterfaceMode::MODE_OPENGL)
glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, params.lod_bias / 32.f);
glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, params.lod_bias / 256.f);
if (params.anisotropic_filtering && g_ogl_config.bSupportsAniso)
@ -329,7 +329,7 @@ VkSampler ObjectCache::GetSampler(const SamplerState& info)
address_modes[static_cast<u32>(info.wrap_u.Value())], // VkSamplerAddressMode addressModeU
address_modes[static_cast<u32>(info.wrap_v.Value())], // VkSamplerAddressMode addressModeV
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // VkSamplerAddressMode addressModeW
info.lod_bias / 32.0f, // float mipLodBias
info.lod_bias / 256.0f, // float mipLodBias
VK_FALSE, // VkBool32 anisotropyEnable
0.0f, // float maxAnisotropy
VK_FALSE, // VkBool32 compareEnable
@ -180,8 +180,8 @@ void SamplerState::Generate(const BPMemory& bp, u32 index)
// If mipmaps are disabled, clamp min/max lod
max_lod = SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0) ? tm1.max_lod : 0;
min_lod = std::min(max_lod.Value(), tm1.min_lod);
lod_bias = SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0) ? tm0.lod_bias : 0;
min_lod = std::min(max_lod.Value(), static_cast<u64>(tm1.min_lod));
lod_bias = SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0) ? tm0.lod_bias * (256 / 32) : 0;
// Address modes
static constexpr std::array<AddressMode, 4> address_modes = {
@ -76,13 +76,15 @@ union BlendingState
union SamplerState
enum class Filter : u32
using StorageType = u64;
enum class Filter : StorageType
enum class AddressMode : u32
enum class AddressMode : StorageType
@ -101,12 +103,12 @@ union SamplerState
BitField<2, 1, Filter> mipmap_filter;
BitField<3, 2, AddressMode> wrap_u;
BitField<5, 2, AddressMode> wrap_v;
BitField<7, 8, u32> min_lod; // multiplied by 16
BitField<15, 8, u32> max_lod; // multiplied by 16
BitField<23, 8, s32> lod_bias; // multiplied by 32
BitField<31, 1, u32> anisotropic_filtering;
BitField<7, 16, s64> lod_bias; // multiplied by 256
BitField<23, 8, u64> min_lod; // multiplied by 16
BitField<31, 8, u64> max_lod; // multiplied by 16
BitField<39, 1, u64> anisotropic_filtering;
u32 hex;
StorageType hex;
namespace RenderState
@ -3,6 +3,7 @@
// Refer to the license.txt file included.
#include <algorithm>
#include <cmath>
#include <cstring>
#include <memory>
#include <string>
@ -431,7 +432,8 @@ TextureCacheBase::DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* pale
return entry_to_update;
void TextureCacheBase::DumpTexture(TCacheEntry* entry, std::string basename, unsigned int level)
void TextureCacheBase::DumpTexture(TCacheEntry* entry, std::string basename, unsigned int level,
bool is_arbitrary)
std::string szDir = File::GetUserPath(D_DUMPTEXTURES_IDX) + SConfig::GetInstance().GetGameID();
@ -441,8 +443,9 @@ void TextureCacheBase::DumpTexture(TCacheEntry* entry, std::string basename, uns
if (level > 0)
basename += StringFromFormat("_mip%i", level);
basename += StringFromFormat(is_arbitrary ? "_arb_mip%i" : "_mip%i", level);
std::string filename = szDir + "/" + basename + ".png";
if (!File::Exists(filename))
@ -477,6 +480,124 @@ void TextureCacheBase::BindTextures()
class ArbitraryMipmapDetector
using PixelRGBAf = std::array<float, 4>;
explicit ArbitraryMipmapDetector() = default;
void AddLevel(u32 width, u32 height, u32 row_length, const u8* buffer)
levels.push_back({width, height, row_length, buffer});
bool HasArbitraryMipmaps(u8* downsample_buffer) const
if (levels.size() < 2)
return false;
// This is the average per-pixel, per-channel difference in percent between what we
// expect a normal blurred mipmap to look like and what we actually received
constexpr auto THRESHOLD_PERCENT = 35.f;
for (std::size_t i = 0; i < levels.size() - 1; ++i)
const auto& level = levels[i];
const auto& mip = levels[i + 1];
// Manually downsample the current layer with a simple box blur
// This is not necessarily close to whatever the original artists used, however
// It should still be closer than a thing that's not a downscale at all
level.Downsample(downsample_buffer, mip);
// Find the average difference between pixels in this level but downsampled
// and the next level
auto diff = mip.AverageDiff(downsample_buffer);
return true;
return false;
static float SRGBToLinear(u8 srgb_byte)
auto srgb_float = static_cast<float>(srgb_byte) / 256.f;
// approximations found on
// http://chilliant.blogspot.com/2012/08/srgb-approximations-for-hlsl.html
return srgb_float * (srgb_float * (srgb_float * 0.305306011f + 0.682171111f) + 0.012522878f);
static u8 LinearToSRGB(float linear)
return static_cast<u8>(std::max(1.055f * std::pow(linear, 0.416666667f) - 0.055f, 0.f) * 256.f);
struct Level
u32 width;
u32 height;
u32 row_length;
const u8* buffer;
PixelRGBAf Sample(u32 x, u32 y) const
const auto* p = buffer + (x + y * row_length) * 4;
return {SRGBToLinear(p[0]), SRGBToLinear(p[1]), SRGBToLinear(p[2]), SRGBToLinear(p[3])};
// Puts a downsampled image in dst. dst must be at least width*height*4
void Downsample(u8* dst, const Level& dst_shape) const
for (u32 i = 0; i < dst_shape.height; ++i)
for (u32 j = 0; j < dst_shape.width; ++j)
auto x = j * 2;
auto y = i * 2;
const std::array<PixelRGBAf, 4> samples = {Sample(x, y), Sample(x + 1, y),
Sample(x, y + 1), Sample(x + 1, y + 1)};
auto* dst_pixel = dst + (j + i * dst_shape.row_length) * 4;
dst_pixel[0] =
LinearToSRGB((samples[0][0] + samples[0][1] + samples[0][2] + samples[0][3]) * 0.25f);
dst_pixel[1] =
LinearToSRGB((samples[1][0] + samples[1][1] + samples[1][2] + samples[1][3]) * 0.25f);
dst_pixel[2] =
LinearToSRGB((samples[2][0] + samples[2][1] + samples[2][2] + samples[2][3]) * 0.25f);
dst_pixel[3] =
LinearToSRGB((samples[3][0] + samples[3][1] + samples[3][2] + samples[3][3]) * 0.25f);
float AverageDiff(const u8* other) const
float average_diff = 0.f;
const auto* ptr1 = buffer;
const auto* ptr2 = other;
for (u32 i = 0; i < height; ++i)
const auto* row1 = ptr1;
const auto* row2 = ptr2;
for (u32 j = 0; j < width; ++j, row1 += 4, row2 += 4)
average_diff += std::abs(row1[0] - row2[0]);
average_diff += std::abs(row1[1] - row2[1]);
average_diff += std::abs(row1[2] - row2[2]);
average_diff += std::abs(row1[3] - row2[3]);
ptr1 += row_length;
ptr2 += row_length;
return average_diff / (width * height * 4) / 2.56f;
std::vector<Level> levels;
TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
// if this stage was not invalidated by changes to texture registers, keep the current texture
@ -774,6 +895,8 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
config.levels = texLevels;
config.format = hires_tex ? hires_tex->GetFormat() : AbstractTextureFormat::RGBA8;
ArbitraryMipmapDetector arbitrary_mip_detector;
TCacheEntry* entry = AllocateCacheEntry(config);
@ -788,6 +911,9 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
// Initialized to null because only software loading uses this buffer
u8* dst_buffer = nullptr;
if (!hires_tex && decode_on_gpu)
u32 row_stride = bytes_per_block * (expandedWidth / bsw);
@ -797,19 +923,41 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
else if (!hires_tex)
size_t decoded_texture_size = expandedWidth * sizeof(u32) * expandedHeight;
// Allocate memory for all levels at once
size_t total_texture_size = decoded_texture_size;
size_t mip_downsample_buffer_size = decoded_texture_size / 4;
size_t prev_level_size = decoded_texture_size;
for (u32 i = 1; i < tex_levels; ++i)
prev_level_size /= 4;
total_texture_size += prev_level_size;
// Add space for the downsampling at the end
total_texture_size += mip_downsample_buffer_size;
dst_buffer = temp;
if (!(texformat == TextureFormat::RGBA8 && from_tmem))
TexDecoder_Decode(temp, src_data, expandedWidth, expandedHeight, texformat, tlut, tlutfmt);
TexDecoder_Decode(dst_buffer, src_data, expandedWidth, expandedHeight, texformat, tlut,
u8* src_data_gb =
&texMem[bpmem.tex[stage / 4].texImage2[stage % 4].tmem_odd * TMEM_LINE_SIZE];
TexDecoder_DecodeRGBA8FromTmem(temp, src_data, src_data_gb, expandedWidth, expandedHeight);
TexDecoder_DecodeRGBA8FromTmem(dst_buffer, src_data, src_data_gb, expandedWidth,
entry->texture->Load(0, width, height, expandedWidth, temp, decoded_texture_size);
entry->texture->Load(0, width, height, expandedWidth, dst_buffer, decoded_texture_size);
arbitrary_mip_detector.AddLevel(width, height, expandedWidth, dst_buffer);
dst_buffer += decoded_texture_size;
iter = textures_by_address.emplace(address, entry);
@ -832,7 +980,6 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
basename = HiresTexture::GenBaseName(src_data, texture_size, &texMem[tlutaddr], palette_size,
width, height, texformat, use_mipmaps, true);
DumpTexture(entry, basename, 0);
if (hires_tex)
@ -878,18 +1025,29 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
// No need to call CheckTempSize here, as mips will always be smaller than the base level.
// No need to call CheckTempSize here, as the whole buffer is preallocated at the beginning
size_t decoded_mip_size = expanded_mip_width * sizeof(u32) * expanded_mip_height;
TexDecoder_Decode(temp, mip_src_data, expanded_mip_width, expanded_mip_height, texformat,
tlut, tlutfmt);
entry->texture->Load(level, mip_width, mip_height, expanded_mip_width, temp,
TexDecoder_Decode(dst_buffer, mip_src_data, expanded_mip_width, expanded_mip_height,
texformat, tlut, tlutfmt);
entry->texture->Load(level, mip_width, mip_height, expanded_mip_width, dst_buffer,
arbitrary_mip_detector.AddLevel(mip_width, mip_height, expanded_mip_width, dst_buffer);
dst_buffer += decoded_mip_size;
mip_src_data += mip_size;
if (g_ActiveConfig.bDumpTextures)
DumpTexture(entry, basename, level);
entry->has_arbitrary_mips = arbitrary_mip_detector.HasArbitraryMipmaps(dst_buffer);
if (g_ActiveConfig.bDumpTextures)
for (u32 level = 0; level < texLevels; ++level)
DumpTexture(entry, basename, level, entry->has_arbitrary_mips);
@ -81,7 +81,9 @@ public:
bool is_efb_copy;
bool is_custom_tex;
bool may_have_overlapping_textures = true;
bool tmem_only = false; // indicates that this texture only exists in the tmem cache
bool tmem_only = false; // indicates that this texture only exists in the tmem cache
bool has_arbitrary_mips = false; // indicates that the mips in this texture are arbitrary
// content, aren't just downscaled
unsigned int native_width,
native_height; // Texture dimensions from the GameCube's point of view
@ -224,7 +226,7 @@ private:
TCacheEntry* DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* palette,
TLUTFormat tlutfmt);
void DumpTexture(TCacheEntry* entry, std::string basename, unsigned int level);
void DumpTexture(TCacheEntry* entry, std::string basename, unsigned int level, bool is_arbitrary);
void CheckTempSize(size_t required_size);
TCacheEntry* AllocateCacheEntry(const TextureConfig& config);
@ -209,7 +209,7 @@ std::pair<size_t, size_t> VertexManagerBase::ResetFlushAspectRatioCount()
return val;
static void SetSamplerState(u32 index, bool custom_tex)
static void SetSamplerState(u32 index, bool custom_tex, bool has_arbitrary_mips)
const FourTexUnits& tex = bpmem.tex[index / 4];
const TexMode0& tm0 = tex.texMode0[index % 4];
@ -252,6 +252,18 @@ static void SetSamplerState(u32 index, bool custom_tex)
state.anisotropic_filtering = 0;
if (has_arbitrary_mips && SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0))
// Apply a secondary bias calculated from the IR scale to pull inwards mipmaps
// that have arbitrary contents, eg. are used for fog effects where the
// distance they kick in at is important to preserve at any resolution.
state.lod_bias =
state.lod_bias + std::log2(static_cast<float>(g_ActiveConfig.iEFBScale)) * 256.f;
// Anisotropic also pushes mips farther away so it cannot be used either
state.anisotropic_filtering = 0;
g_renderer->SetSamplerState(index, state);
@ -323,7 +335,7 @@ void VertexManagerBase::Flush()
if (tentry)
SetSamplerState(i, tentry->is_custom_tex);
SetSamplerState(i, tentry->is_custom_tex, tentry->has_arbitrary_mips);
PixelShaderManager::SetTexDims(i, tentry->native_width, tentry->native_height);
Reference in New Issue
Block a user