Utilise SegmentTable for rapid FlatMemoryManager lookups

In some games performing the binary search in `TranslateRange()` ended up taking a fairly large (~8%) proportion of GPFIFO time. By using a segment table for O(1) lookups this is reduced to <2% for non-split mappings at the cost of slightly increased memory usage (2GiB in the absolute worse case but more like 50MiB in real world situations).

In addition to adapting `TranslateRange()` to use the segment table, a new function `LookupBlock()` for cases where only a single mapping would ever be looked up so the small_vector handling and fallback paths can be skipped and the entire lookup be inlined.
This commit is contained in:
Billy Laws 2022-08-31 12:50:59 +01:00
parent 4ea0b0e1e5
commit be825b7aad
6 changed files with 96 additions and 32 deletions

View File

@ -6,6 +6,7 @@
#include <boost/container/small_vector.hpp> #include <boost/container/small_vector.hpp>
#include <concepts> #include <concepts>
#include <common.h> #include <common.h>
#include "segment_table.h"
namespace skyline { namespace skyline {
template<typename VaType, size_t AddressSpaceBits> template<typename VaType, size_t AddressSpaceBits>
@ -76,16 +77,6 @@ namespace skyline {
FlatAddressSpaceMap(VaType vaLimit, std::function<void(VaType, VaType)> unmapCallback = {}); FlatAddressSpaceMap(VaType vaLimit, std::function<void(VaType, VaType)> unmapCallback = {});
FlatAddressSpaceMap() = default; FlatAddressSpaceMap() = default;
void Map(VaType virt, PaType phys, VaType size, ExtraBlockInfo extraInfo = {}) {
std::scoped_lock lock(blockMutex);
MapLocked(virt, phys, size, extraInfo);
}
void Unmap(VaType virt, VaType size) {
std::scoped_lock lock(blockMutex);
UnmapLocked(virt, size);
}
}; };
/** /**
@ -98,12 +89,37 @@ namespace skyline {
/** /**
* @brief FlatMemoryManager specialises FlatAddressSpaceMap to focus on pointers as PAs, adding read/write functions and sparse mapping support * @brief FlatMemoryManager specialises FlatAddressSpaceMap to focus on pointers as PAs, adding read/write functions and sparse mapping support
*/ */
template<typename VaType, VaType UnmappedVa, size_t AddressSpaceBits> requires AddressSpaceValid<VaType, AddressSpaceBits> template<typename VaType, VaType UnmappedVa, size_t AddressSpaceBits, size_t VaGranularityBits, size_t VaL2GranularityBits> requires AddressSpaceValid<VaType, AddressSpaceBits>
class FlatMemoryManager : public FlatAddressSpaceMap<VaType, UnmappedVa, u8 *, nullptr, true, AddressSpaceBits, MemoryManagerBlockInfo> { class FlatMemoryManager : public FlatAddressSpaceMap<VaType, UnmappedVa, u8 *, nullptr, true, AddressSpaceBits, MemoryManagerBlockInfo> {
private: private:
static constexpr u64 SparseMapSize{0x400000000}; //!< 16GiB pool size for sparse mappings returned by TranslateRange, this number is arbritary and should be large enough to fit the largest sparse mapping in the AS static constexpr u64 SparseMapSize{0x400000000}; //!< 16GiB pool size for sparse mappings returned by TranslateRange, this number is arbritary and should be large enough to fit the largest sparse mapping in the AS
u8 *sparseMap; //!< Pointer to a zero filled memory region that is returned by TranslateRange for sparse mappings u8 *sparseMap; //!< Pointer to a zero filled memory region that is returned by TranslateRange for sparse mappings
/**
* @brief Version of `Block` that is trivial so it can be stored in a segment table for rapid lookups, also holds an additional extent member
*/
struct SegmentTableEntry {
VaType virt;
u8 *phys;
VaType extent;
MemoryManagerBlockInfo extraInfo;
};
static constexpr size_t AddressSpaceSize{1ULL << AddressSpaceBits};
SegmentTable<SegmentTableEntry, AddressSpaceSize, VaGranularityBits, VaL2GranularityBits> blockSegmentTable; //!< A page table of all buffer mappings for O(1) lookups on full matches
TranslatedAddressRange TranslateRangeImpl(VaType virt, VaType size, std::function<void(span<u8>)> cpuAccessCallback = {});
std::pair<span<u8>, size_t> LookupBlockLocked(VaType virt, std::function<void(span<u8>)> cpuAccessCallback = {}) {
const auto &blockEntry{this->blockSegmentTable[virt]};
VaType segmentOffset{virt - blockEntry.virt};
span<u8> blockSpan{blockEntry.phys, blockEntry.extent};
if (cpuAccessCallback)
cpuAccessCallback(blockSpan);
return {blockSpan, segmentOffset};
}
public: public:
FlatMemoryManager(); FlatMemoryManager();
@ -117,9 +133,31 @@ namespace skyline {
} }
/** /**
* @return A vector of all physical ranges inside of the given virtual range * @brief Looks up the mapped region that contains the given VA
* @return A span of the mapped region and the offset of the input VA in the region
*/ */
TranslatedAddressRange TranslateRange(VaType virt, VaType size, std::function<void(span<u8>)> cpuAccessCallback = {}); __attribute__((always_inline)) std::pair<span<u8>, VaType> LookupBlock(VaType virt, std::function<void(span<u8>)> cpuAccessCallback = {}) {
std::scoped_lock lock{this->blockMutex};
return LookupBlockLocked(virt, cpuAccessCallback);
}
/**
* @brief Translates a region in the VA space to a corresponding set of regions in the PA space
*/
TranslatedAddressRange TranslateRange(VaType virt, VaType size, std::function<void(span<u8>)> cpuAccessCallback = {}) {
std::scoped_lock lock{this->blockMutex};
// Fast path for when the range is mapped in a single block
auto [blockSpan, rangeOffset]{LookupBlockLocked(virt, cpuAccessCallback)};
if (blockSpan.size() - rangeOffset >= size) {
TranslatedAddressRange ranges;
ranges.push_back(blockSpan.subspan(rangeOffset, size));
return ranges;
}
return TranslateRangeImpl(virt, size, cpuAccessCallback);
}
void Read(u8 *destination, VaType virt, VaType size, std::function<void(span<u8>)> cpuAccessCallback = {}); void Read(u8 *destination, VaType virt, VaType size, std::function<void(span<u8>)> cpuAccessCallback = {});
@ -203,6 +241,18 @@ namespace skyline {
} }
void Copy(VaType dst, VaType src, VaType size, std::function<void(span<u8>)> cpuAccessCallback = {}); void Copy(VaType dst, VaType src, VaType size, std::function<void(span<u8>)> cpuAccessCallback = {});
void Map(VaType virt, u8 *phys, VaType size, MemoryManagerBlockInfo extraInfo = {}) {
std::scoped_lock lock(this->blockMutex);
blockSegmentTable.Set(virt, virt + size, {virt, phys, size, extraInfo});
this->MapLocked(virt, phys, size, extraInfo);
}
void Unmap(VaType virt, VaType size) {
std::scoped_lock lock(this->blockMutex);
blockSegmentTable.Set(virt, virt + size, {});
this->UnmapLocked(virt, size);
}
}; };
/** /**

View File

@ -7,7 +7,7 @@
#define MAP_MEMBER(returnType) template<typename VaType, VaType UnmappedVa, typename PaType, PaType UnmappedPa, bool PaContigSplit, size_t AddressSpaceBits, typename ExtraBlockInfo> requires AddressSpaceValid<VaType, AddressSpaceBits> returnType FlatAddressSpaceMap<VaType, UnmappedVa, PaType, UnmappedPa, PaContigSplit, AddressSpaceBits, ExtraBlockInfo> #define MAP_MEMBER(returnType) template<typename VaType, VaType UnmappedVa, typename PaType, PaType UnmappedPa, bool PaContigSplit, size_t AddressSpaceBits, typename ExtraBlockInfo> requires AddressSpaceValid<VaType, AddressSpaceBits> returnType FlatAddressSpaceMap<VaType, UnmappedVa, PaType, UnmappedPa, PaContigSplit, AddressSpaceBits, ExtraBlockInfo>
#define MM_MEMBER(returnType) template<typename VaType, VaType UnmappedVa, size_t AddressSpaceBits> requires AddressSpaceValid<VaType, AddressSpaceBits> returnType FlatMemoryManager<VaType, UnmappedVa, AddressSpaceBits> #define MM_MEMBER(returnType) template<typename VaType, VaType UnmappedVa, size_t AddressSpaceBits, size_t VaGranularityBits, size_t VaL2GranularityBits> requires AddressSpaceValid<VaType, AddressSpaceBits> returnType FlatMemoryManager<VaType, UnmappedVa, AddressSpaceBits, VaGranularityBits, VaL2GranularityBits>
#define ALLOC_MEMBER(returnType) template<typename VaType, VaType UnmappedVa, size_t AddressSpaceBits> requires AddressSpaceValid<VaType, AddressSpaceBits> returnType FlatAllocator<VaType, UnmappedVa, AddressSpaceBits> #define ALLOC_MEMBER(returnType) template<typename VaType, VaType UnmappedVa, size_t AddressSpaceBits> requires AddressSpaceValid<VaType, AddressSpaceBits> returnType FlatAllocator<VaType, UnmappedVa, AddressSpaceBits>
@ -223,20 +223,11 @@ namespace skyline {
unmapCallback(virt, size); unmapCallback(virt, size);
} }
MM_MEMBER()::FlatMemoryManager() {
sparseMap = static_cast<u8 *>(mmap(0, SparseMapSize, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0));
if (!sparseMap)
throw exception("Failed to mmap sparse map!");
}
MM_MEMBER()::~FlatMemoryManager() { MM_MEMBER(TranslatedAddressRange)::TranslateRangeImpl(VaType virt, VaType size, std::function<void(span<u8>)> cpuAccessCallback) {
munmap(sparseMap, SparseMapSize);
}
MM_MEMBER(TranslatedAddressRange)::TranslateRange(VaType virt, VaType size, std::function<void(span<u8>)> cpuAccessCallback) {
TRACE_EVENT("containers", "FlatMemoryManager::TranslateRange"); TRACE_EVENT("containers", "FlatMemoryManager::TranslateRange");
std::scoped_lock lock(this->blockMutex); TranslatedAddressRange ranges;
auto successor{std::upper_bound(this->blocks.begin(), this->blocks.end(), virt, [] (auto virt, const auto &block) { auto successor{std::upper_bound(this->blocks.begin(), this->blocks.end(), virt, [] (auto virt, const auto &block) {
return virt < block.virt; return virt < block.virt;
@ -247,7 +238,6 @@ namespace skyline {
u8 *blockPhys{predecessor->phys + (virt - predecessor->virt)}; u8 *blockPhys{predecessor->phys + (virt - predecessor->virt)};
VaType blockSize{std::min(successor->virt - virt, size)}; VaType blockSize{std::min(successor->virt - virt, size)};
TranslatedAddressRange ranges;
while (size) { while (size) {
// Return a zeroed out map to emulate sparse mappings // Return a zeroed out map to emulate sparse mappings
@ -276,6 +266,16 @@ namespace skyline {
return ranges; return ranges;
} }
MM_MEMBER()::FlatMemoryManager() {
sparseMap = static_cast<u8 *>(mmap(0, SparseMapSize, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0));
if (!sparseMap)
throw exception("Failed to mmap sparse map!");
}
MM_MEMBER()::~FlatMemoryManager() {
munmap(sparseMap, SparseMapSize);
}
MM_MEMBER(void)::Read(u8 *destination, VaType virt, VaType size, std::function<void(span<u8>)> cpuAccessCallback) { MM_MEMBER(void)::Read(u8 *destination, VaType virt, VaType size, std::function<void(span<u8>)> cpuAccessCallback) {
TRACE_EVENT("containers", "FlatMemoryManager::Read"); TRACE_EVENT("containers", "FlatMemoryManager::Read");
@ -491,10 +491,12 @@ namespace skyline {
} }
ALLOC_MEMBER(void)::AllocateFixed(VaType virt, VaType size) { ALLOC_MEMBER(void)::AllocateFixed(VaType virt, VaType size) {
this->Map(virt, true, size); std::scoped_lock lock(this->blockMutex);
this->MapLocked(virt, true, size, {});
} }
ALLOC_MEMBER(void)::Free(VaType virt, VaType size) { ALLOC_MEMBER(void)::Free(VaType virt, VaType size) {
this->Unmap(virt, size); std::scoped_lock lock(this->blockMutex);
this->UnmapLocked(virt, size);
} }
} }

View File

@ -6,5 +6,5 @@
namespace skyline { namespace skyline {
template class FlatAddressSpaceMap<u64, 0, u8 *, nullptr, true, soc::gm20b::GmmuAddressSpaceBits>; template class FlatAddressSpaceMap<u64, 0, u8 *, nullptr, true, soc::gm20b::GmmuAddressSpaceBits>;
template class FlatMemoryManager<u64, 0, soc::gm20b::GmmuAddressSpaceBits>; template class FlatMemoryManager<u64, 0, soc::gm20b::GmmuAddressSpaceBits, soc::gm20b::GmmuSmallPageSizeBits, soc::gm20b::GmmuMinBigPageSizeBits>;
} }

View File

@ -3,17 +3,23 @@
#pragma once #pragma once
#include <bit>
#include <common/address_space.h> #include <common/address_space.h>
namespace skyline::soc::gm20b { namespace skyline::soc::gm20b {
static constexpr u8 GmmuAddressSpaceBits{40}; //!< The size of the GMMU AS in bits static constexpr u8 GmmuAddressSpaceBits{40}; //!< The size of the GMMU AS in bits
static constexpr size_t GmmuSmallPageSize{0x1000}; // 4KiB
static constexpr size_t GmmuSmallPageSizeBits{std::countr_zero(GmmuSmallPageSize)};
static constexpr size_t GmmuMinBigPageSize{0x20000}; // 128KiB
static constexpr size_t GmmuMinBigPageSizeBits{std::countr_zero(GmmuMinBigPageSize)};
/** /**
* @brief The GMMU (Graphics Memory Management Unit) class handles mapping between a Maxwell GPU virtual address space and an application's address space and is meant to roughly emulate the GMMU on the X1 * @brief The GMMU (Graphics Memory Management Unit) class handles mapping between a Maxwell GPU virtual address space and an application's address space and is meant to roughly emulate the GMMU on the X1
* @note This is not accurate to the X1 as it would have an SMMU between the GMMU and physical memory but we don't need to emulate this abstraction * @note This is not accurate to the X1 as it would have an SMMU between the GMMU and physical memory but we don't need to emulate this abstraction
* @note The GMMU is implemented entirely as a template specialization over FlatMemoryManager * @note The GMMU is implemented entirely as a template specialization over FlatMemoryManager
*/ */
using GMMU = FlatMemoryManager<u64, 0, GmmuAddressSpaceBits>; using GMMU = FlatMemoryManager<u64, 0, GmmuAddressSpaceBits, GmmuSmallPageSizeBits, GmmuMinBigPageSizeBits>;
struct AddressSpaceContext { struct AddressSpaceContext {
GMMU gmmu; GMMU gmmu;

View File

@ -6,5 +6,5 @@
namespace skyline { namespace skyline {
template class FlatAddressSpaceMap<u32, 0, u8 *, nullptr, true, soc::SmmuAddressSpaceBits>; template class FlatAddressSpaceMap<u32, 0, u8 *, nullptr, true, soc::SmmuAddressSpaceBits>;
template class FlatMemoryManager<u32, 0, soc::SmmuAddressSpaceBits>; template class FlatMemoryManager<u32, 0, soc::SmmuAddressSpaceBits, soc::SmmuPageSizeBits, soc::SmmuL2PageSizeBits>;
} }

View File

@ -7,10 +7,16 @@
namespace skyline::soc { namespace skyline::soc {
static constexpr u8 SmmuAddressSpaceBits{32}; //!< The size of the SMMU AS in bits static constexpr u8 SmmuAddressSpaceBits{32}; //!< The size of the SMMU AS in bits
constexpr size_t SmmuPageSize{0x1000}; // 4KiB
constexpr size_t SmmuPageSizeBits{std::countr_zero(SmmuPageSize)};
constexpr size_t SmmuL2PageSize{0x20000}; // 128KiB - not actually a thing in HW but needed for segment table
constexpr size_t SmmuL2PageSizeBits{std::countr_zero(SmmuL2PageSize)};
/** /**
* @brief The SMMU (System Memory Management Unit) class handles mapping between the host1x peripheral virtual address space and an application's address space * @brief The SMMU (System Memory Management Unit) class handles mapping between the host1x peripheral virtual address space and an application's address space
* @note The SMMU is implemented entirely as a template specialization over FlatMemoryManager * @note The SMMU is implemented entirely as a template specialization over FlatMemoryManager
*/ */
using SMMU = FlatMemoryManager<u32, 0, SmmuAddressSpaceBits>; using SMMU = FlatMemoryManager<u32, 0, SmmuAddressSpaceBits, SmmuPageSizeBits, SmmuL2PageSizeBits>;
} }