2021-10-12 15:51:24 +13:00

291 lines
9.2 KiB
C++

// Copyright 2019 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include <array>
#include "Common/ChunkFile.h"
#include "VideoCommon/BPMemory.h"
#include "VideoCommon/TMEM.h"
////////////////////////////////////////////////////////////////////////////////////////////////////
//
// TMEM emulation tracks which textures should be cached in TMEM on a real console.
// There are two good reasons to do this:
//
// 1. Some games deliberately avoid invalidating a texture, overwrite it with an EFB copy,
// and then expect the original texture to still be found in TMEM for another draw call.
// Spyro: A Hero's Tail is known for using such overwritten textures.
// However, other games like:
// * Sonic Riders
// * Metal Arms: Glitch in the System
// * Godzilla: Destroy All Monsters Melee
// * NHL Slapshot
// * Tak and the Power of Juju
// * Night at the Museum: Battle of the Smithsonian
// * 428: Fūsa Sareta Shibuya de
// are known to (accidentally or deliberately) avoid invalidating and then expect the pattern
// of the draw and the fact that the whole texture doesn't fit in TMEM to self-invalidate the
// texture. These are usually full-screen efb copies.
// So we must track the size of the textures as an heuristic to see if they will self-invalidate
// or not.
//
// 2. It actually improves Dolphin's performance in safer texture hashing modes, by reducing the
// amount of times a texture needs to be hashed when reused in subsequent draws.
//
// As a side-effect, TMEM emulation also tracks if the texture unit configuration has changed at
// all, which Dolphin's TextureCache takes advantage of.
//
////////////////////////////////////////////////////////////////////////////////////////////////////
//
// Checking if a texture fits in TMEM or not is complicated by the fact that Flipper's TMEM is quite
// configurable.
// Each of the eight texture units has two banks (even and odd) that can be pointed at any offset
// and set to any size. It is completely valid to have overlapping banks, and performance can be
// improved by overlapping the caches of texture units that are drawing the same textures.
//
// For trilinear textures, the even/odd banks contain the even/odd LODs of the texture. TMEM has two
// banks of 512KB each, covering the upper and lower halves of TMEM's address space. The two banks
// be accessed simultaneously, allowing a trilinear texture sample to be completed at the same cost
// as a bilinear sample, assuming the even and odd banks are mapped onto different banks.
//
// 32bit textures are actually stored as two 16bit textures in separate banks, allowing a bilinear
// sample of a 32bit texture at the same cost as a 16bit bilinear/trilinear sample. A trilinear
// sample of a 32bit texture costs more.
//
// TODO: I'm not sure if it's valid for a texture unit's even and odd banks to overlap. There might
// actually be a hard requirement for even and odd banks to live in different banks of TMEM.
//
// Note: This is still very much a heuristic.
// Actually knowing if a texture is partially or fully cached within TMEM would require
// extensive software rasterization, or sampler feedback from a hardware backend.
//
////////////////////////////////////////////////////////////////////////////////////////////////////
namespace TMEM
{
struct TextureUnitState
{
enum class State
{
// Cache is invalid. Configuration has changed
INVALID,
// Valid, but not cached due to either being too big, or overlapping with another texture unit
VALID,
// Texture unit has cached all of the previous draw
CACHED,
};
struct BankConfig
{
u32 width = 0;
u32 height = 0;
u32 base = 0;
u32 size = 0;
bool Overlaps(const BankConfig& other) const;
};
BankConfig even = {};
BankConfig odd = {};
State state = State::INVALID;
bool Overlaps(const TextureUnitState& other) const;
};
static u32 CalculateUnitSize(TextureUnitState::BankConfig bank_config);
static std::array<TextureUnitState, 8> s_unit;
// On TMEM configuration changed:
// 1. invalidate stage.
void ConfigurationChanged(TexUnitAddress bp_addr, u32 config)
{
TextureUnitState& unit_state = s_unit[bp_addr.GetUnitID()];
// If anything has changed, we can't assume existing state is still valid.
unit_state.state = TextureUnitState::State::INVALID;
// Note: BPStructs has already filtered out NOP changes before calling us
switch (bp_addr.Reg)
{
case TexUnitAddress::Register::SETIMAGE1:
{
// Image Type and Even bank's Cache Height, Cache Width, TMEM Offset
TexImage1 even = {.hex = config};
unit_state.even = {even.cache_width, even.cache_height, even.tmem_even << 5, 0};
break;
}
case TexUnitAddress::Register::SETIMAGE2:
{
// Odd bank's Cache Height, Cache Width, TMEM Offset
TexImage2 odd = {.hex = config};
unit_state.odd = {odd.cache_width, odd.cache_height, odd.tmem_odd << 5, 0};
break;
}
default:
// Something else has changed
return;
}
}
void InvalidateAll()
{
for (auto& unit : s_unit)
{
unit.state = TextureUnitState::State::INVALID;
}
}
// On invalidate cache:
// 1. invalidate all texture units.
void Invalidate([[maybe_unused]] u32 param)
{
// The exact arguments of Invalidate commands is currently unknown.
// It appears to contain the TMEM address and a size.
// For simplicity, we will just invalidate everything
InvalidateAll();
}
// On bind:
// 1. use mipmapping/32bit status to calculate final sizes
// 2. if texture size is small enough to fit in region mark as cached.
// otherwise, mark as valid
void Bind(u32 unit, int width, int height, bool is_mipmapped, bool is_32_bit)
{
TextureUnitState& unit_state = s_unit[unit];
// All textures use the even bank.
// It holds the level 0 mipmap (and other even mipmap LODs, if mipmapping is enabled)
unit_state.even.size = CalculateUnitSize(unit_state.even);
bool fits = (width * height * 32U) <= unit_state.even.size;
if (is_mipmapped || is_32_bit)
{
// And the odd bank is enabled when either mipmapping is enabled or the texture is 32 bit
// It holds the Alpha and Red channels of 32 bit textures or the odd layers of a mipmapped
// texture
unit_state.odd.size = CalculateUnitSize(unit_state.odd);
fits = fits && (width * height * 32U) <= unit_state.odd.size;
}
else
{
unit_state.odd.size = 0;
}
if (is_mipmapped)
{
// TODO: This is what games appear to expect from hardware. But seems odd, as it doesn't line up
// with how much extra memory is required for mipmapping, just 33% more.
// Hardware testing is required to see exactly what gets used.
// When mipmapping is enabled, the even bank is doubled in size
// The extended region holds the remaining even mipmap layers
unit_state.even.size *= 2;
if (is_32_bit)
{
// When a 32bit texture is mipmapped, the odd bank is also doubled in size
unit_state.odd.size *= 2;
}
}
unit_state.state = fits ? TextureUnitState::State::CACHED : TextureUnitState::State::VALID;
}
static u32 CalculateUnitSize(TextureUnitState::BankConfig bank_config)
{
u32 width = bank_config.width;
u32 height = bank_config.height;
// These are the only cache sizes supported by the sdk
if (width == height)
{
switch (width)
{
case 3: // 32KB
return 32 * 1024;
case 4: // 128KB
return 128 * 1024;
case 5: // 512KB
return 512 * 1024;
default:
break;
}
}
// However, the registers allow a much larger amount of configurablity.
// Maybe other sizes are broken?
// Until hardware tests are done, this is a guess at the size algorithm
return 512 * (1 << width) * (1 << height);
}
bool TextureUnitState::BankConfig::Overlaps(const BankConfig& other) const
{
if (size == 0 || other.size == 0)
return false;
return (base <= other.base && (base + size) > other.base) ||
(other.base <= base && (other.base + other.size) > base);
}
bool TextureUnitState::Overlaps(const TextureUnitState& other) const
{
if (state == TextureUnitState::State::INVALID || other.state == TextureUnitState::State::INVALID)
return false;
return even.Overlaps(other.even) || even.Overlaps(other.odd) || odd.Overlaps(other.even) ||
odd.Overlaps(other.odd);
}
// Scans though active texture units checks for overlaps.
void FinalizeBinds(BitSet32 used_textures)
{
for (u32 i : used_textures)
{
if (s_unit[i].even.Overlaps(s_unit[i].odd))
{ // Self-overlap
s_unit[i].state = TextureUnitState::State::VALID;
}
for (size_t j = 0; j < s_unit.size(); j++)
{
if (j != i && s_unit[i].Overlaps(s_unit[j]))
{
// There is an overlap, downgrade both from CACHED
// (for there to be an overlap, both must have started as valid or cached)
s_unit[i].state = TextureUnitState::State::VALID;
s_unit[j].state = TextureUnitState::State::VALID;
}
}
}
}
bool IsCached(u32 unit)
{
return s_unit[unit].state == TextureUnitState::State::CACHED;
}
bool IsValid(u32 unit)
{
return s_unit[unit].state != TextureUnitState::State::INVALID;
}
void Init()
{
s_unit.fill({});
}
void DoState(PointerWrap& p)
{
p.DoArray(s_unit);
}
} // namespace TMEM