mirror of
https://github.com/skyline-emu/skyline.git
synced 2024-12-27 21:01:50 +01:00
Utilise SegmentTable for rapid FlatMemoryManager lookups
In some games performing the binary search in `TranslateRange()` ended up taking a fairly large (~8%) proportion of GPFIFO time. By using a segment table for O(1) lookups this is reduced to <2% for non-split mappings at the cost of slightly increased memory usage (2GiB in the absolute worse case but more like 50MiB in real world situations). In addition to adapting `TranslateRange()` to use the segment table, a new function `LookupBlock()` for cases where only a single mapping would ever be looked up so the small_vector handling and fallback paths can be skipped and the entire lookup be inlined.
This commit is contained in:
parent
4ea0b0e1e5
commit
be825b7aad
@ -6,6 +6,7 @@
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include <concepts>
|
||||
#include <common.h>
|
||||
#include "segment_table.h"
|
||||
|
||||
namespace skyline {
|
||||
template<typename VaType, size_t AddressSpaceBits>
|
||||
@ -76,16 +77,6 @@ namespace skyline {
|
||||
FlatAddressSpaceMap(VaType vaLimit, std::function<void(VaType, VaType)> unmapCallback = {});
|
||||
|
||||
FlatAddressSpaceMap() = default;
|
||||
|
||||
void Map(VaType virt, PaType phys, VaType size, ExtraBlockInfo extraInfo = {}) {
|
||||
std::scoped_lock lock(blockMutex);
|
||||
MapLocked(virt, phys, size, extraInfo);
|
||||
}
|
||||
|
||||
void Unmap(VaType virt, VaType size) {
|
||||
std::scoped_lock lock(blockMutex);
|
||||
UnmapLocked(virt, size);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
@ -98,12 +89,37 @@ namespace skyline {
|
||||
/**
|
||||
* @brief FlatMemoryManager specialises FlatAddressSpaceMap to focus on pointers as PAs, adding read/write functions and sparse mapping support
|
||||
*/
|
||||
template<typename VaType, VaType UnmappedVa, size_t AddressSpaceBits> requires AddressSpaceValid<VaType, AddressSpaceBits>
|
||||
template<typename VaType, VaType UnmappedVa, size_t AddressSpaceBits, size_t VaGranularityBits, size_t VaL2GranularityBits> requires AddressSpaceValid<VaType, AddressSpaceBits>
|
||||
class FlatMemoryManager : public FlatAddressSpaceMap<VaType, UnmappedVa, u8 *, nullptr, true, AddressSpaceBits, MemoryManagerBlockInfo> {
|
||||
private:
|
||||
static constexpr u64 SparseMapSize{0x400000000}; //!< 16GiB pool size for sparse mappings returned by TranslateRange, this number is arbritary and should be large enough to fit the largest sparse mapping in the AS
|
||||
u8 *sparseMap; //!< Pointer to a zero filled memory region that is returned by TranslateRange for sparse mappings
|
||||
|
||||
/**
|
||||
* @brief Version of `Block` that is trivial so it can be stored in a segment table for rapid lookups, also holds an additional extent member
|
||||
*/
|
||||
struct SegmentTableEntry {
|
||||
VaType virt;
|
||||
u8 *phys;
|
||||
VaType extent;
|
||||
MemoryManagerBlockInfo extraInfo;
|
||||
};
|
||||
|
||||
static constexpr size_t AddressSpaceSize{1ULL << AddressSpaceBits};
|
||||
SegmentTable<SegmentTableEntry, AddressSpaceSize, VaGranularityBits, VaL2GranularityBits> blockSegmentTable; //!< A page table of all buffer mappings for O(1) lookups on full matches
|
||||
|
||||
TranslatedAddressRange TranslateRangeImpl(VaType virt, VaType size, std::function<void(span<u8>)> cpuAccessCallback = {});
|
||||
|
||||
std::pair<span<u8>, size_t> LookupBlockLocked(VaType virt, std::function<void(span<u8>)> cpuAccessCallback = {}) {
|
||||
const auto &blockEntry{this->blockSegmentTable[virt]};
|
||||
VaType segmentOffset{virt - blockEntry.virt};
|
||||
span<u8> blockSpan{blockEntry.phys, blockEntry.extent};
|
||||
if (cpuAccessCallback)
|
||||
cpuAccessCallback(blockSpan);
|
||||
|
||||
return {blockSpan, segmentOffset};
|
||||
}
|
||||
|
||||
public:
|
||||
FlatMemoryManager();
|
||||
|
||||
@ -117,9 +133,31 @@ namespace skyline {
|
||||
}
|
||||
|
||||
/**
|
||||
* @return A vector of all physical ranges inside of the given virtual range
|
||||
* @brief Looks up the mapped region that contains the given VA
|
||||
* @return A span of the mapped region and the offset of the input VA in the region
|
||||
*/
|
||||
TranslatedAddressRange TranslateRange(VaType virt, VaType size, std::function<void(span<u8>)> cpuAccessCallback = {});
|
||||
__attribute__((always_inline)) std::pair<span<u8>, VaType> LookupBlock(VaType virt, std::function<void(span<u8>)> cpuAccessCallback = {}) {
|
||||
std::scoped_lock lock{this->blockMutex};
|
||||
return LookupBlockLocked(virt, cpuAccessCallback);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Translates a region in the VA space to a corresponding set of regions in the PA space
|
||||
*/
|
||||
TranslatedAddressRange TranslateRange(VaType virt, VaType size, std::function<void(span<u8>)> cpuAccessCallback = {}) {
|
||||
std::scoped_lock lock{this->blockMutex};
|
||||
|
||||
// Fast path for when the range is mapped in a single block
|
||||
auto [blockSpan, rangeOffset]{LookupBlockLocked(virt, cpuAccessCallback)};
|
||||
if (blockSpan.size() - rangeOffset >= size) {
|
||||
TranslatedAddressRange ranges;
|
||||
ranges.push_back(blockSpan.subspan(rangeOffset, size));
|
||||
return ranges;
|
||||
}
|
||||
|
||||
return TranslateRangeImpl(virt, size, cpuAccessCallback);
|
||||
}
|
||||
|
||||
|
||||
void Read(u8 *destination, VaType virt, VaType size, std::function<void(span<u8>)> cpuAccessCallback = {});
|
||||
|
||||
@ -203,6 +241,18 @@ namespace skyline {
|
||||
}
|
||||
|
||||
void Copy(VaType dst, VaType src, VaType size, std::function<void(span<u8>)> cpuAccessCallback = {});
|
||||
|
||||
void Map(VaType virt, u8 *phys, VaType size, MemoryManagerBlockInfo extraInfo = {}) {
|
||||
std::scoped_lock lock(this->blockMutex);
|
||||
blockSegmentTable.Set(virt, virt + size, {virt, phys, size, extraInfo});
|
||||
this->MapLocked(virt, phys, size, extraInfo);
|
||||
}
|
||||
|
||||
void Unmap(VaType virt, VaType size) {
|
||||
std::scoped_lock lock(this->blockMutex);
|
||||
blockSegmentTable.Set(virt, virt + size, {});
|
||||
this->UnmapLocked(virt, size);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -7,7 +7,7 @@
|
||||
|
||||
#define MAP_MEMBER(returnType) template<typename VaType, VaType UnmappedVa, typename PaType, PaType UnmappedPa, bool PaContigSplit, size_t AddressSpaceBits, typename ExtraBlockInfo> requires AddressSpaceValid<VaType, AddressSpaceBits> returnType FlatAddressSpaceMap<VaType, UnmappedVa, PaType, UnmappedPa, PaContigSplit, AddressSpaceBits, ExtraBlockInfo>
|
||||
|
||||
#define MM_MEMBER(returnType) template<typename VaType, VaType UnmappedVa, size_t AddressSpaceBits> requires AddressSpaceValid<VaType, AddressSpaceBits> returnType FlatMemoryManager<VaType, UnmappedVa, AddressSpaceBits>
|
||||
#define MM_MEMBER(returnType) template<typename VaType, VaType UnmappedVa, size_t AddressSpaceBits, size_t VaGranularityBits, size_t VaL2GranularityBits> requires AddressSpaceValid<VaType, AddressSpaceBits> returnType FlatMemoryManager<VaType, UnmappedVa, AddressSpaceBits, VaGranularityBits, VaL2GranularityBits>
|
||||
|
||||
#define ALLOC_MEMBER(returnType) template<typename VaType, VaType UnmappedVa, size_t AddressSpaceBits> requires AddressSpaceValid<VaType, AddressSpaceBits> returnType FlatAllocator<VaType, UnmappedVa, AddressSpaceBits>
|
||||
|
||||
@ -223,20 +223,11 @@ namespace skyline {
|
||||
unmapCallback(virt, size);
|
||||
}
|
||||
|
||||
MM_MEMBER()::FlatMemoryManager() {
|
||||
sparseMap = static_cast<u8 *>(mmap(0, SparseMapSize, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0));
|
||||
if (!sparseMap)
|
||||
throw exception("Failed to mmap sparse map!");
|
||||
}
|
||||
|
||||
MM_MEMBER()::~FlatMemoryManager() {
|
||||
munmap(sparseMap, SparseMapSize);
|
||||
}
|
||||
|
||||
MM_MEMBER(TranslatedAddressRange)::TranslateRange(VaType virt, VaType size, std::function<void(span<u8>)> cpuAccessCallback) {
|
||||
MM_MEMBER(TranslatedAddressRange)::TranslateRangeImpl(VaType virt, VaType size, std::function<void(span<u8>)> cpuAccessCallback) {
|
||||
TRACE_EVENT("containers", "FlatMemoryManager::TranslateRange");
|
||||
|
||||
std::scoped_lock lock(this->blockMutex);
|
||||
TranslatedAddressRange ranges;
|
||||
|
||||
auto successor{std::upper_bound(this->blocks.begin(), this->blocks.end(), virt, [] (auto virt, const auto &block) {
|
||||
return virt < block.virt;
|
||||
@ -247,7 +238,6 @@ namespace skyline {
|
||||
u8 *blockPhys{predecessor->phys + (virt - predecessor->virt)};
|
||||
VaType blockSize{std::min(successor->virt - virt, size)};
|
||||
|
||||
TranslatedAddressRange ranges;
|
||||
|
||||
while (size) {
|
||||
// Return a zeroed out map to emulate sparse mappings
|
||||
@ -276,6 +266,16 @@ namespace skyline {
|
||||
return ranges;
|
||||
}
|
||||
|
||||
MM_MEMBER()::FlatMemoryManager() {
|
||||
sparseMap = static_cast<u8 *>(mmap(0, SparseMapSize, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0));
|
||||
if (!sparseMap)
|
||||
throw exception("Failed to mmap sparse map!");
|
||||
}
|
||||
|
||||
MM_MEMBER()::~FlatMemoryManager() {
|
||||
munmap(sparseMap, SparseMapSize);
|
||||
}
|
||||
|
||||
MM_MEMBER(void)::Read(u8 *destination, VaType virt, VaType size, std::function<void(span<u8>)> cpuAccessCallback) {
|
||||
TRACE_EVENT("containers", "FlatMemoryManager::Read");
|
||||
|
||||
@ -491,10 +491,12 @@ namespace skyline {
|
||||
}
|
||||
|
||||
ALLOC_MEMBER(void)::AllocateFixed(VaType virt, VaType size) {
|
||||
this->Map(virt, true, size);
|
||||
std::scoped_lock lock(this->blockMutex);
|
||||
this->MapLocked(virt, true, size, {});
|
||||
}
|
||||
|
||||
ALLOC_MEMBER(void)::Free(VaType virt, VaType size) {
|
||||
this->Unmap(virt, size);
|
||||
std::scoped_lock lock(this->blockMutex);
|
||||
this->UnmapLocked(virt, size);
|
||||
}
|
||||
}
|
||||
|
@ -6,5 +6,5 @@
|
||||
|
||||
namespace skyline {
|
||||
template class FlatAddressSpaceMap<u64, 0, u8 *, nullptr, true, soc::gm20b::GmmuAddressSpaceBits>;
|
||||
template class FlatMemoryManager<u64, 0, soc::gm20b::GmmuAddressSpaceBits>;
|
||||
template class FlatMemoryManager<u64, 0, soc::gm20b::GmmuAddressSpaceBits, soc::gm20b::GmmuSmallPageSizeBits, soc::gm20b::GmmuMinBigPageSizeBits>;
|
||||
}
|
||||
|
@ -3,17 +3,23 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <bit>
|
||||
#include <common/address_space.h>
|
||||
|
||||
namespace skyline::soc::gm20b {
|
||||
static constexpr u8 GmmuAddressSpaceBits{40}; //!< The size of the GMMU AS in bits
|
||||
static constexpr size_t GmmuSmallPageSize{0x1000}; // 4KiB
|
||||
static constexpr size_t GmmuSmallPageSizeBits{std::countr_zero(GmmuSmallPageSize)};
|
||||
static constexpr size_t GmmuMinBigPageSize{0x20000}; // 128KiB
|
||||
static constexpr size_t GmmuMinBigPageSizeBits{std::countr_zero(GmmuMinBigPageSize)};
|
||||
|
||||
|
||||
/**
|
||||
* @brief The GMMU (Graphics Memory Management Unit) class handles mapping between a Maxwell GPU virtual address space and an application's address space and is meant to roughly emulate the GMMU on the X1
|
||||
* @note This is not accurate to the X1 as it would have an SMMU between the GMMU and physical memory but we don't need to emulate this abstraction
|
||||
* @note The GMMU is implemented entirely as a template specialization over FlatMemoryManager
|
||||
*/
|
||||
using GMMU = FlatMemoryManager<u64, 0, GmmuAddressSpaceBits>;
|
||||
using GMMU = FlatMemoryManager<u64, 0, GmmuAddressSpaceBits, GmmuSmallPageSizeBits, GmmuMinBigPageSizeBits>;
|
||||
|
||||
struct AddressSpaceContext {
|
||||
GMMU gmmu;
|
||||
|
@ -6,5 +6,5 @@
|
||||
|
||||
namespace skyline {
|
||||
template class FlatAddressSpaceMap<u32, 0, u8 *, nullptr, true, soc::SmmuAddressSpaceBits>;
|
||||
template class FlatMemoryManager<u32, 0, soc::SmmuAddressSpaceBits>;
|
||||
template class FlatMemoryManager<u32, 0, soc::SmmuAddressSpaceBits, soc::SmmuPageSizeBits, soc::SmmuL2PageSizeBits>;
|
||||
}
|
||||
|
@ -7,10 +7,16 @@
|
||||
|
||||
namespace skyline::soc {
|
||||
static constexpr u8 SmmuAddressSpaceBits{32}; //!< The size of the SMMU AS in bits
|
||||
constexpr size_t SmmuPageSize{0x1000}; // 4KiB
|
||||
constexpr size_t SmmuPageSizeBits{std::countr_zero(SmmuPageSize)};
|
||||
constexpr size_t SmmuL2PageSize{0x20000}; // 128KiB - not actually a thing in HW but needed for segment table
|
||||
constexpr size_t SmmuL2PageSizeBits{std::countr_zero(SmmuL2PageSize)};
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @brief The SMMU (System Memory Management Unit) class handles mapping between the host1x peripheral virtual address space and an application's address space
|
||||
* @note The SMMU is implemented entirely as a template specialization over FlatMemoryManager
|
||||
*/
|
||||
using SMMU = FlatMemoryManager<u32, 0, SmmuAddressSpaceBits>;
|
||||
using SMMU = FlatMemoryManager<u32, 0, SmmuAddressSpaceBits, SmmuPageSizeBits, SmmuL2PageSizeBits>;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user