Utilise SegmentTable for rapid FlatMemoryManager lookups

In some games performing the binary search in `TranslateRange()` ended up taking a fairly large (~8%) proportion of GPFIFO time. By using a segment table for O(1) lookups this is reduced to <2% for non-split mappings at the cost of slightly increased memory usage (2GiB in the absolute worse case but more like 50MiB in real world situations).

In addition to adapting `TranslateRange()` to use the segment table, a new function `LookupBlock()` for cases where only a single mapping would ever be looked up so the small_vector handling and fallback paths can be skipped and the entire lookup be inlined.
This commit is contained in:
Billy Laws 2022-08-31 12:50:59 +01:00
parent 4ea0b0e1e5
commit be825b7aad
6 changed files with 96 additions and 32 deletions

View File

@ -6,6 +6,7 @@
#include <boost/container/small_vector.hpp>
#include <concepts>
#include <common.h>
#include "segment_table.h"
namespace skyline {
template<typename VaType, size_t AddressSpaceBits>
@ -76,16 +77,6 @@ namespace skyline {
FlatAddressSpaceMap(VaType vaLimit, std::function<void(VaType, VaType)> unmapCallback = {});
FlatAddressSpaceMap() = default;
void Map(VaType virt, PaType phys, VaType size, ExtraBlockInfo extraInfo = {}) {
std::scoped_lock lock(blockMutex);
MapLocked(virt, phys, size, extraInfo);
}
void Unmap(VaType virt, VaType size) {
std::scoped_lock lock(blockMutex);
UnmapLocked(virt, size);
}
};
/**
@ -98,12 +89,37 @@ namespace skyline {
/**
* @brief FlatMemoryManager specialises FlatAddressSpaceMap to focus on pointers as PAs, adding read/write functions and sparse mapping support
*/
template<typename VaType, VaType UnmappedVa, size_t AddressSpaceBits> requires AddressSpaceValid<VaType, AddressSpaceBits>
template<typename VaType, VaType UnmappedVa, size_t AddressSpaceBits, size_t VaGranularityBits, size_t VaL2GranularityBits> requires AddressSpaceValid<VaType, AddressSpaceBits>
class FlatMemoryManager : public FlatAddressSpaceMap<VaType, UnmappedVa, u8 *, nullptr, true, AddressSpaceBits, MemoryManagerBlockInfo> {
private:
static constexpr u64 SparseMapSize{0x400000000}; //!< 16GiB pool size for sparse mappings returned by TranslateRange, this number is arbritary and should be large enough to fit the largest sparse mapping in the AS
u8 *sparseMap; //!< Pointer to a zero filled memory region that is returned by TranslateRange for sparse mappings
/**
* @brief Version of `Block` that is trivial so it can be stored in a segment table for rapid lookups, also holds an additional extent member
*/
struct SegmentTableEntry {
VaType virt;
u8 *phys;
VaType extent;
MemoryManagerBlockInfo extraInfo;
};
static constexpr size_t AddressSpaceSize{1ULL << AddressSpaceBits};
SegmentTable<SegmentTableEntry, AddressSpaceSize, VaGranularityBits, VaL2GranularityBits> blockSegmentTable; //!< A page table of all buffer mappings for O(1) lookups on full matches
TranslatedAddressRange TranslateRangeImpl(VaType virt, VaType size, std::function<void(span<u8>)> cpuAccessCallback = {});
std::pair<span<u8>, size_t> LookupBlockLocked(VaType virt, std::function<void(span<u8>)> cpuAccessCallback = {}) {
const auto &blockEntry{this->blockSegmentTable[virt]};
VaType segmentOffset{virt - blockEntry.virt};
span<u8> blockSpan{blockEntry.phys, blockEntry.extent};
if (cpuAccessCallback)
cpuAccessCallback(blockSpan);
return {blockSpan, segmentOffset};
}
public:
FlatMemoryManager();
@ -117,9 +133,31 @@ namespace skyline {
}
/**
* @return A vector of all physical ranges inside of the given virtual range
* @brief Looks up the mapped region that contains the given VA
* @return A span of the mapped region and the offset of the input VA in the region
*/
TranslatedAddressRange TranslateRange(VaType virt, VaType size, std::function<void(span<u8>)> cpuAccessCallback = {});
__attribute__((always_inline)) std::pair<span<u8>, VaType> LookupBlock(VaType virt, std::function<void(span<u8>)> cpuAccessCallback = {}) {
std::scoped_lock lock{this->blockMutex};
return LookupBlockLocked(virt, cpuAccessCallback);
}
/**
* @brief Translates a region in the VA space to a corresponding set of regions in the PA space
*/
TranslatedAddressRange TranslateRange(VaType virt, VaType size, std::function<void(span<u8>)> cpuAccessCallback = {}) {
std::scoped_lock lock{this->blockMutex};
// Fast path for when the range is mapped in a single block
auto [blockSpan, rangeOffset]{LookupBlockLocked(virt, cpuAccessCallback)};
if (blockSpan.size() - rangeOffset >= size) {
TranslatedAddressRange ranges;
ranges.push_back(blockSpan.subspan(rangeOffset, size));
return ranges;
}
return TranslateRangeImpl(virt, size, cpuAccessCallback);
}
void Read(u8 *destination, VaType virt, VaType size, std::function<void(span<u8>)> cpuAccessCallback = {});
@ -203,6 +241,18 @@ namespace skyline {
}
void Copy(VaType dst, VaType src, VaType size, std::function<void(span<u8>)> cpuAccessCallback = {});
void Map(VaType virt, u8 *phys, VaType size, MemoryManagerBlockInfo extraInfo = {}) {
std::scoped_lock lock(this->blockMutex);
blockSegmentTable.Set(virt, virt + size, {virt, phys, size, extraInfo});
this->MapLocked(virt, phys, size, extraInfo);
}
void Unmap(VaType virt, VaType size) {
std::scoped_lock lock(this->blockMutex);
blockSegmentTable.Set(virt, virt + size, {});
this->UnmapLocked(virt, size);
}
};
/**

View File

@ -7,7 +7,7 @@
#define MAP_MEMBER(returnType) template<typename VaType, VaType UnmappedVa, typename PaType, PaType UnmappedPa, bool PaContigSplit, size_t AddressSpaceBits, typename ExtraBlockInfo> requires AddressSpaceValid<VaType, AddressSpaceBits> returnType FlatAddressSpaceMap<VaType, UnmappedVa, PaType, UnmappedPa, PaContigSplit, AddressSpaceBits, ExtraBlockInfo>
#define MM_MEMBER(returnType) template<typename VaType, VaType UnmappedVa, size_t AddressSpaceBits> requires AddressSpaceValid<VaType, AddressSpaceBits> returnType FlatMemoryManager<VaType, UnmappedVa, AddressSpaceBits>
#define MM_MEMBER(returnType) template<typename VaType, VaType UnmappedVa, size_t AddressSpaceBits, size_t VaGranularityBits, size_t VaL2GranularityBits> requires AddressSpaceValid<VaType, AddressSpaceBits> returnType FlatMemoryManager<VaType, UnmappedVa, AddressSpaceBits, VaGranularityBits, VaL2GranularityBits>
#define ALLOC_MEMBER(returnType) template<typename VaType, VaType UnmappedVa, size_t AddressSpaceBits> requires AddressSpaceValid<VaType, AddressSpaceBits> returnType FlatAllocator<VaType, UnmappedVa, AddressSpaceBits>
@ -223,20 +223,11 @@ namespace skyline {
unmapCallback(virt, size);
}
MM_MEMBER()::FlatMemoryManager() {
sparseMap = static_cast<u8 *>(mmap(0, SparseMapSize, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0));
if (!sparseMap)
throw exception("Failed to mmap sparse map!");
}
MM_MEMBER()::~FlatMemoryManager() {
munmap(sparseMap, SparseMapSize);
}
MM_MEMBER(TranslatedAddressRange)::TranslateRange(VaType virt, VaType size, std::function<void(span<u8>)> cpuAccessCallback) {
MM_MEMBER(TranslatedAddressRange)::TranslateRangeImpl(VaType virt, VaType size, std::function<void(span<u8>)> cpuAccessCallback) {
TRACE_EVENT("containers", "FlatMemoryManager::TranslateRange");
std::scoped_lock lock(this->blockMutex);
TranslatedAddressRange ranges;
auto successor{std::upper_bound(this->blocks.begin(), this->blocks.end(), virt, [] (auto virt, const auto &block) {
return virt < block.virt;
@ -247,7 +238,6 @@ namespace skyline {
u8 *blockPhys{predecessor->phys + (virt - predecessor->virt)};
VaType blockSize{std::min(successor->virt - virt, size)};
TranslatedAddressRange ranges;
while (size) {
// Return a zeroed out map to emulate sparse mappings
@ -276,6 +266,16 @@ namespace skyline {
return ranges;
}
MM_MEMBER()::FlatMemoryManager() {
sparseMap = static_cast<u8 *>(mmap(0, SparseMapSize, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0));
if (!sparseMap)
throw exception("Failed to mmap sparse map!");
}
MM_MEMBER()::~FlatMemoryManager() {
munmap(sparseMap, SparseMapSize);
}
MM_MEMBER(void)::Read(u8 *destination, VaType virt, VaType size, std::function<void(span<u8>)> cpuAccessCallback) {
TRACE_EVENT("containers", "FlatMemoryManager::Read");
@ -491,10 +491,12 @@ namespace skyline {
}
ALLOC_MEMBER(void)::AllocateFixed(VaType virt, VaType size) {
this->Map(virt, true, size);
std::scoped_lock lock(this->blockMutex);
this->MapLocked(virt, true, size, {});
}
ALLOC_MEMBER(void)::Free(VaType virt, VaType size) {
this->Unmap(virt, size);
std::scoped_lock lock(this->blockMutex);
this->UnmapLocked(virt, size);
}
}

View File

@ -6,5 +6,5 @@
namespace skyline {
template class FlatAddressSpaceMap<u64, 0, u8 *, nullptr, true, soc::gm20b::GmmuAddressSpaceBits>;
template class FlatMemoryManager<u64, 0, soc::gm20b::GmmuAddressSpaceBits>;
template class FlatMemoryManager<u64, 0, soc::gm20b::GmmuAddressSpaceBits, soc::gm20b::GmmuSmallPageSizeBits, soc::gm20b::GmmuMinBigPageSizeBits>;
}

View File

@ -3,17 +3,23 @@
#pragma once
#include <bit>
#include <common/address_space.h>
namespace skyline::soc::gm20b {
static constexpr u8 GmmuAddressSpaceBits{40}; //!< The size of the GMMU AS in bits
static constexpr size_t GmmuSmallPageSize{0x1000}; // 4KiB
static constexpr size_t GmmuSmallPageSizeBits{std::countr_zero(GmmuSmallPageSize)};
static constexpr size_t GmmuMinBigPageSize{0x20000}; // 128KiB
static constexpr size_t GmmuMinBigPageSizeBits{std::countr_zero(GmmuMinBigPageSize)};
/**
* @brief The GMMU (Graphics Memory Management Unit) class handles mapping between a Maxwell GPU virtual address space and an application's address space and is meant to roughly emulate the GMMU on the X1
* @note This is not accurate to the X1 as it would have an SMMU between the GMMU and physical memory but we don't need to emulate this abstraction
* @note The GMMU is implemented entirely as a template specialization over FlatMemoryManager
*/
using GMMU = FlatMemoryManager<u64, 0, GmmuAddressSpaceBits>;
using GMMU = FlatMemoryManager<u64, 0, GmmuAddressSpaceBits, GmmuSmallPageSizeBits, GmmuMinBigPageSizeBits>;
struct AddressSpaceContext {
GMMU gmmu;

View File

@ -6,5 +6,5 @@
namespace skyline {
template class FlatAddressSpaceMap<u32, 0, u8 *, nullptr, true, soc::SmmuAddressSpaceBits>;
template class FlatMemoryManager<u32, 0, soc::SmmuAddressSpaceBits>;
template class FlatMemoryManager<u32, 0, soc::SmmuAddressSpaceBits, soc::SmmuPageSizeBits, soc::SmmuL2PageSizeBits>;
}

View File

@ -7,10 +7,16 @@
namespace skyline::soc {
static constexpr u8 SmmuAddressSpaceBits{32}; //!< The size of the SMMU AS in bits
constexpr size_t SmmuPageSize{0x1000}; // 4KiB
constexpr size_t SmmuPageSizeBits{std::countr_zero(SmmuPageSize)};
constexpr size_t SmmuL2PageSize{0x20000}; // 128KiB - not actually a thing in HW but needed for segment table
constexpr size_t SmmuL2PageSizeBits{std::countr_zero(SmmuL2PageSize)};
/**
* @brief The SMMU (System Memory Management Unit) class handles mapping between the host1x peripheral virtual address space and an application's address space
* @note The SMMU is implemented entirely as a template specialization over FlatMemoryManager
*/
using SMMU = FlatMemoryManager<u32, 0, SmmuAddressSpaceBits>;
using SMMU = FlatMemoryManager<u32, 0, SmmuAddressSpaceBits, SmmuPageSizeBits, SmmuL2PageSizeBits>;
}