// Copyright 2019 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.

#include "VideoCommon/TMEM.h"

#include <array>

#include "Common/ChunkFile.h"
#include "VideoCommon/BPMemory.h"

////////////////////////////////////////////////////////////////////////////////////////////////////
//
// TMEM emulation tracks which textures should be cached in TMEM on a real console.
// There are two good reasons to do this:
//
// 1. Some games deliberately avoid invalidating a texture, overwrite it with an EFB copy,
//    and then expect the original texture to still be found in TMEM for another draw call.
//    Spyro: A Hero's Tail is known for using such overwritten textures.
//    However, other games like:
//      * Sonic Riders
//      * Metal Arms: Glitch in the System
//      * Godzilla: Destroy All Monsters Melee
//      * NHL Slapshot
//      * Tak and the Power of Juju
//      * Night at the Museum: Battle of the Smithsonian
//      * 428: Fūsa Sareta Shibuya de
//    are known to (accidentally or deliberately) avoid invalidating and then expect the pattern
//    of the draw and the fact that the whole texture doesn't fit in TMEM to self-invalidate the
//    texture. These are usually full-screen efb copies.
//    So we must track the size of the textures as an heuristic to see if they will self-invalidate
//    or not.
//
// 2. It actually improves Dolphin's performance in safer texture hashing modes, by reducing the
//    amount of times a texture needs to be hashed when reused in subsequent draws.
//
// As a side-effect, TMEM emulation also tracks if the texture unit configuration has changed at
// all, which Dolphin's TextureCache takes advantage of.
//
////////////////////////////////////////////////////////////////////////////////////////////////////
//
// Checking if a texture fits in TMEM or not is complicated by the fact that Flipper's TMEM is quite
// configurable.
// Each of the eight texture units has two banks (even and odd) that can be pointed at any offset
// and set to any size. It is completely valid to have overlapping banks, and performance can be
// improved by overlapping the caches of texture units that are drawing the same textures.
//
// For trilinear textures, the even/odd banks contain the even/odd LODs of the texture. TMEM has two
// banks of 512KB each, covering the upper and lower halves of TMEM's address space. The two banks
// be accessed simultaneously, allowing a trilinear texture sample to be completed at the same cost
// as a bilinear sample, assuming the even and odd banks are mapped onto different banks.
//
// 32bit textures are actually stored as two 16bit textures in separate banks, allowing a bilinear
// sample of a 32bit texture at the same cost as a 16bit bilinear/trilinear sample. A trilinear
// sample of a 32bit texture costs more.
//
// TODO: I'm not sure if it's valid for a texture unit's even and odd banks to overlap. There might
//       actually be a hard requirement for even and odd banks to live in different banks of TMEM.
//
// Note: This is still very much a heuristic.
//       Actually knowing if a texture is partially or fully cached within TMEM would require
//       extensive software rasterization, or sampler feedback from a hardware backend.
//
////////////////////////////////////////////////////////////////////////////////////////////////////

namespace TMEM
{
struct TextureUnitState
{
  enum class State
  {
    // Cache is invalid. Configuration has changed
    INVALID,

    // Valid, but not cached due to either being too big, or overlapping with another texture unit
    VALID,

    // Texture unit has cached all of the previous draw
    CACHED,
  };

  struct BankConfig
  {
    u32 width = 0;
    u32 height = 0;
    u32 base = 0;
    u32 size = 0;
    bool Overlaps(const BankConfig& other) const;
  };

  BankConfig even = {};
  BankConfig odd = {};
  State state = State::INVALID;

  bool Overlaps(const TextureUnitState& other) const;
};

static u32 CalculateUnitSize(TextureUnitState::BankConfig bank_config);

static std::array<TextureUnitState, 8> s_unit;

// On TMEM configuration changed:
// 1. invalidate stage.

void ConfigurationChanged(TexUnitAddress bp_addr, u32 config)
{
  TextureUnitState& unit_state = s_unit[bp_addr.GetUnitID()];

  // If anything has changed, we can't assume existing state is still valid.
  unit_state.state = TextureUnitState::State::INVALID;

  // Note: BPStructs has already filtered out NOP changes before calling us
  switch (bp_addr.Reg)
  {
  case TexUnitAddress::Register::SETIMAGE1:
  {
    // Image Type and Even bank's Cache Height, Cache Width, TMEM Offset
    TexImage1 even = {.hex = config};
    unit_state.even = {even.cache_width, even.cache_height, even.tmem_even << 5, 0};
    break;
  }
  case TexUnitAddress::Register::SETIMAGE2:
  {
    // Odd bank's Cache Height, Cache Width, TMEM Offset
    TexImage2 odd = {.hex = config};
    unit_state.odd = {odd.cache_width, odd.cache_height, odd.tmem_odd << 5, 0};
    break;
  }
  default:
    // Something else has changed
    return;
  }
}

void InvalidateAll()
{
  for (auto& unit : s_unit)
  {
    unit.state = TextureUnitState::State::INVALID;
  }
}

// On invalidate cache:
// 1. invalidate all texture units.

void Invalidate([[maybe_unused]] u32 param)
{
  // The exact arguments of Invalidate commands is currently unknown.
  // It appears to contain the TMEM address and a size.

  // For simplicity, we will just invalidate everything
  InvalidateAll();
}

// On bind:
// 1. use mipmapping/32bit status to calculate final sizes
// 2. if texture size is small enough to fit in region mark as cached.
//    otherwise, mark as valid

void Bind(u32 unit, int width, int height, bool is_mipmapped, bool is_32_bit)
{
  TextureUnitState& unit_state = s_unit[unit];

  // All textures use the even bank.
  // It holds the level 0 mipmap (and other even mipmap LODs, if mipmapping is enabled)
  unit_state.even.size = CalculateUnitSize(unit_state.even);

  bool fits = (width * height * 32U) <= unit_state.even.size;

  if (is_mipmapped || is_32_bit)
  {
    // And the odd bank is enabled when either mipmapping is enabled or the texture is 32 bit
    // It holds the Alpha and Red channels of 32 bit textures or the odd layers of a mipmapped
    // texture
    unit_state.odd.size = CalculateUnitSize(unit_state.odd);

    fits = fits && (width * height * 32U) <= unit_state.odd.size;
  }
  else
  {
    unit_state.odd.size = 0;
  }

  if (is_mipmapped)
  {
    // TODO: This is what games appear to expect from hardware. But seems odd, as it doesn't line up
    //       with how much extra memory is required for mipmapping, just 33% more.
    //       Hardware testing is required to see exactly what gets used.

    // When mipmapping is enabled, the even bank is doubled in size
    // The extended region holds the remaining even mipmap layers
    unit_state.even.size *= 2;

    if (is_32_bit)
    {
      // When a 32bit texture is mipmapped, the odd bank is also doubled in size
      unit_state.odd.size *= 2;
    }
  }

  unit_state.state = fits ? TextureUnitState::State::CACHED : TextureUnitState::State::VALID;
}

static u32 CalculateUnitSize(TextureUnitState::BankConfig bank_config)
{
  u32 width = bank_config.width;
  u32 height = bank_config.height;

  // These are the only cache sizes supported by the sdk
  if (width == height)
  {
    switch (width)
    {
    case 3:  // 32KB
      return 32 * 1024;
    case 4:  // 128KB
      return 128 * 1024;
    case 5:  // 512KB
      return 512 * 1024;
    default:
      break;
    }
  }

  // However, the registers allow a much larger amount of configurablity.
  // Maybe other sizes are broken?
  // Until hardware tests are done, this is a guess at the size algorithm

  return 512 * (1 << width) * (1 << height);
}

bool TextureUnitState::BankConfig::Overlaps(const BankConfig& other) const
{
  if (size == 0 || other.size == 0)
    return false;
  return (base <= other.base && (base + size) > other.base) ||
         (other.base <= base && (other.base + other.size) > base);
}

bool TextureUnitState::Overlaps(const TextureUnitState& other) const
{
  if (state == TextureUnitState::State::INVALID || other.state == TextureUnitState::State::INVALID)
    return false;
  return even.Overlaps(other.even) || even.Overlaps(other.odd) || odd.Overlaps(other.even) ||
         odd.Overlaps(other.odd);
}

// Scans though active texture units checks for overlaps.
void FinalizeBinds(BitSet32 used_textures)
{
  for (u32 i : used_textures)
  {
    if (s_unit[i].even.Overlaps(s_unit[i].odd))
    {  // Self-overlap
      s_unit[i].state = TextureUnitState::State::VALID;
    }
    for (size_t j = 0; j < s_unit.size(); j++)
    {
      if (j != i && s_unit[i].Overlaps(s_unit[j]))
      {
        // There is an overlap, downgrade both from CACHED
        // (for there to be an overlap, both must have started as valid or cached)
        s_unit[i].state = TextureUnitState::State::VALID;
        s_unit[j].state = TextureUnitState::State::VALID;
      }
    }
  }
}

bool IsCached(u32 unit)
{
  return s_unit[unit].state == TextureUnitState::State::CACHED;
}

bool IsValid(u32 unit)
{
  return s_unit[unit].state != TextureUnitState::State::INVALID;
}

void Init()
{
  s_unit.fill({});
}

void DoState(PointerWrap& p)
{
  p.DoArray(s_unit);
}

}  // namespace TMEM