mirror of
https://github.com/skyline-emu/skyline.git
synced 2024-12-02 14:14:15 +01:00
Utilise SegmentTable for rapid FlatMemoryManager lookups
In some games performing the binary search in `TranslateRange()` ended up taking a fairly large (~8%) proportion of GPFIFO time. By using a segment table for O(1) lookups this is reduced to <2% for non-split mappings at the cost of slightly increased memory usage (2GiB in the absolute worse case but more like 50MiB in real world situations). In addition to adapting `TranslateRange()` to use the segment table, a new function `LookupBlock()` for cases where only a single mapping would ever be looked up so the small_vector handling and fallback paths can be skipped and the entire lookup be inlined.
This commit is contained in:
parent
4ea0b0e1e5
commit
be825b7aad
@ -6,6 +6,7 @@
|
|||||||
#include <boost/container/small_vector.hpp>
|
#include <boost/container/small_vector.hpp>
|
||||||
#include <concepts>
|
#include <concepts>
|
||||||
#include <common.h>
|
#include <common.h>
|
||||||
|
#include "segment_table.h"
|
||||||
|
|
||||||
namespace skyline {
|
namespace skyline {
|
||||||
template<typename VaType, size_t AddressSpaceBits>
|
template<typename VaType, size_t AddressSpaceBits>
|
||||||
@ -76,16 +77,6 @@ namespace skyline {
|
|||||||
FlatAddressSpaceMap(VaType vaLimit, std::function<void(VaType, VaType)> unmapCallback = {});
|
FlatAddressSpaceMap(VaType vaLimit, std::function<void(VaType, VaType)> unmapCallback = {});
|
||||||
|
|
||||||
FlatAddressSpaceMap() = default;
|
FlatAddressSpaceMap() = default;
|
||||||
|
|
||||||
void Map(VaType virt, PaType phys, VaType size, ExtraBlockInfo extraInfo = {}) {
|
|
||||||
std::scoped_lock lock(blockMutex);
|
|
||||||
MapLocked(virt, phys, size, extraInfo);
|
|
||||||
}
|
|
||||||
|
|
||||||
void Unmap(VaType virt, VaType size) {
|
|
||||||
std::scoped_lock lock(blockMutex);
|
|
||||||
UnmapLocked(virt, size);
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -98,12 +89,37 @@ namespace skyline {
|
|||||||
/**
|
/**
|
||||||
* @brief FlatMemoryManager specialises FlatAddressSpaceMap to focus on pointers as PAs, adding read/write functions and sparse mapping support
|
* @brief FlatMemoryManager specialises FlatAddressSpaceMap to focus on pointers as PAs, adding read/write functions and sparse mapping support
|
||||||
*/
|
*/
|
||||||
template<typename VaType, VaType UnmappedVa, size_t AddressSpaceBits> requires AddressSpaceValid<VaType, AddressSpaceBits>
|
template<typename VaType, VaType UnmappedVa, size_t AddressSpaceBits, size_t VaGranularityBits, size_t VaL2GranularityBits> requires AddressSpaceValid<VaType, AddressSpaceBits>
|
||||||
class FlatMemoryManager : public FlatAddressSpaceMap<VaType, UnmappedVa, u8 *, nullptr, true, AddressSpaceBits, MemoryManagerBlockInfo> {
|
class FlatMemoryManager : public FlatAddressSpaceMap<VaType, UnmappedVa, u8 *, nullptr, true, AddressSpaceBits, MemoryManagerBlockInfo> {
|
||||||
private:
|
private:
|
||||||
static constexpr u64 SparseMapSize{0x400000000}; //!< 16GiB pool size for sparse mappings returned by TranslateRange, this number is arbritary and should be large enough to fit the largest sparse mapping in the AS
|
static constexpr u64 SparseMapSize{0x400000000}; //!< 16GiB pool size for sparse mappings returned by TranslateRange, this number is arbritary and should be large enough to fit the largest sparse mapping in the AS
|
||||||
u8 *sparseMap; //!< Pointer to a zero filled memory region that is returned by TranslateRange for sparse mappings
|
u8 *sparseMap; //!< Pointer to a zero filled memory region that is returned by TranslateRange for sparse mappings
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Version of `Block` that is trivial so it can be stored in a segment table for rapid lookups, also holds an additional extent member
|
||||||
|
*/
|
||||||
|
struct SegmentTableEntry {
|
||||||
|
VaType virt;
|
||||||
|
u8 *phys;
|
||||||
|
VaType extent;
|
||||||
|
MemoryManagerBlockInfo extraInfo;
|
||||||
|
};
|
||||||
|
|
||||||
|
static constexpr size_t AddressSpaceSize{1ULL << AddressSpaceBits};
|
||||||
|
SegmentTable<SegmentTableEntry, AddressSpaceSize, VaGranularityBits, VaL2GranularityBits> blockSegmentTable; //!< A page table of all buffer mappings for O(1) lookups on full matches
|
||||||
|
|
||||||
|
TranslatedAddressRange TranslateRangeImpl(VaType virt, VaType size, std::function<void(span<u8>)> cpuAccessCallback = {});
|
||||||
|
|
||||||
|
std::pair<span<u8>, size_t> LookupBlockLocked(VaType virt, std::function<void(span<u8>)> cpuAccessCallback = {}) {
|
||||||
|
const auto &blockEntry{this->blockSegmentTable[virt]};
|
||||||
|
VaType segmentOffset{virt - blockEntry.virt};
|
||||||
|
span<u8> blockSpan{blockEntry.phys, blockEntry.extent};
|
||||||
|
if (cpuAccessCallback)
|
||||||
|
cpuAccessCallback(blockSpan);
|
||||||
|
|
||||||
|
return {blockSpan, segmentOffset};
|
||||||
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
FlatMemoryManager();
|
FlatMemoryManager();
|
||||||
|
|
||||||
@ -117,9 +133,31 @@ namespace skyline {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return A vector of all physical ranges inside of the given virtual range
|
* @brief Looks up the mapped region that contains the given VA
|
||||||
|
* @return A span of the mapped region and the offset of the input VA in the region
|
||||||
*/
|
*/
|
||||||
TranslatedAddressRange TranslateRange(VaType virt, VaType size, std::function<void(span<u8>)> cpuAccessCallback = {});
|
__attribute__((always_inline)) std::pair<span<u8>, VaType> LookupBlock(VaType virt, std::function<void(span<u8>)> cpuAccessCallback = {}) {
|
||||||
|
std::scoped_lock lock{this->blockMutex};
|
||||||
|
return LookupBlockLocked(virt, cpuAccessCallback);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Translates a region in the VA space to a corresponding set of regions in the PA space
|
||||||
|
*/
|
||||||
|
TranslatedAddressRange TranslateRange(VaType virt, VaType size, std::function<void(span<u8>)> cpuAccessCallback = {}) {
|
||||||
|
std::scoped_lock lock{this->blockMutex};
|
||||||
|
|
||||||
|
// Fast path for when the range is mapped in a single block
|
||||||
|
auto [blockSpan, rangeOffset]{LookupBlockLocked(virt, cpuAccessCallback)};
|
||||||
|
if (blockSpan.size() - rangeOffset >= size) {
|
||||||
|
TranslatedAddressRange ranges;
|
||||||
|
ranges.push_back(blockSpan.subspan(rangeOffset, size));
|
||||||
|
return ranges;
|
||||||
|
}
|
||||||
|
|
||||||
|
return TranslateRangeImpl(virt, size, cpuAccessCallback);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void Read(u8 *destination, VaType virt, VaType size, std::function<void(span<u8>)> cpuAccessCallback = {});
|
void Read(u8 *destination, VaType virt, VaType size, std::function<void(span<u8>)> cpuAccessCallback = {});
|
||||||
|
|
||||||
@ -203,6 +241,18 @@ namespace skyline {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void Copy(VaType dst, VaType src, VaType size, std::function<void(span<u8>)> cpuAccessCallback = {});
|
void Copy(VaType dst, VaType src, VaType size, std::function<void(span<u8>)> cpuAccessCallback = {});
|
||||||
|
|
||||||
|
void Map(VaType virt, u8 *phys, VaType size, MemoryManagerBlockInfo extraInfo = {}) {
|
||||||
|
std::scoped_lock lock(this->blockMutex);
|
||||||
|
blockSegmentTable.Set(virt, virt + size, {virt, phys, size, extraInfo});
|
||||||
|
this->MapLocked(virt, phys, size, extraInfo);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Unmap(VaType virt, VaType size) {
|
||||||
|
std::scoped_lock lock(this->blockMutex);
|
||||||
|
blockSegmentTable.Set(virt, virt + size, {});
|
||||||
|
this->UnmapLocked(virt, size);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
|
|
||||||
#define MAP_MEMBER(returnType) template<typename VaType, VaType UnmappedVa, typename PaType, PaType UnmappedPa, bool PaContigSplit, size_t AddressSpaceBits, typename ExtraBlockInfo> requires AddressSpaceValid<VaType, AddressSpaceBits> returnType FlatAddressSpaceMap<VaType, UnmappedVa, PaType, UnmappedPa, PaContigSplit, AddressSpaceBits, ExtraBlockInfo>
|
#define MAP_MEMBER(returnType) template<typename VaType, VaType UnmappedVa, typename PaType, PaType UnmappedPa, bool PaContigSplit, size_t AddressSpaceBits, typename ExtraBlockInfo> requires AddressSpaceValid<VaType, AddressSpaceBits> returnType FlatAddressSpaceMap<VaType, UnmappedVa, PaType, UnmappedPa, PaContigSplit, AddressSpaceBits, ExtraBlockInfo>
|
||||||
|
|
||||||
#define MM_MEMBER(returnType) template<typename VaType, VaType UnmappedVa, size_t AddressSpaceBits> requires AddressSpaceValid<VaType, AddressSpaceBits> returnType FlatMemoryManager<VaType, UnmappedVa, AddressSpaceBits>
|
#define MM_MEMBER(returnType) template<typename VaType, VaType UnmappedVa, size_t AddressSpaceBits, size_t VaGranularityBits, size_t VaL2GranularityBits> requires AddressSpaceValid<VaType, AddressSpaceBits> returnType FlatMemoryManager<VaType, UnmappedVa, AddressSpaceBits, VaGranularityBits, VaL2GranularityBits>
|
||||||
|
|
||||||
#define ALLOC_MEMBER(returnType) template<typename VaType, VaType UnmappedVa, size_t AddressSpaceBits> requires AddressSpaceValid<VaType, AddressSpaceBits> returnType FlatAllocator<VaType, UnmappedVa, AddressSpaceBits>
|
#define ALLOC_MEMBER(returnType) template<typename VaType, VaType UnmappedVa, size_t AddressSpaceBits> requires AddressSpaceValid<VaType, AddressSpaceBits> returnType FlatAllocator<VaType, UnmappedVa, AddressSpaceBits>
|
||||||
|
|
||||||
@ -223,20 +223,11 @@ namespace skyline {
|
|||||||
unmapCallback(virt, size);
|
unmapCallback(virt, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
MM_MEMBER()::FlatMemoryManager() {
|
|
||||||
sparseMap = static_cast<u8 *>(mmap(0, SparseMapSize, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0));
|
|
||||||
if (!sparseMap)
|
|
||||||
throw exception("Failed to mmap sparse map!");
|
|
||||||
}
|
|
||||||
|
|
||||||
MM_MEMBER()::~FlatMemoryManager() {
|
MM_MEMBER(TranslatedAddressRange)::TranslateRangeImpl(VaType virt, VaType size, std::function<void(span<u8>)> cpuAccessCallback) {
|
||||||
munmap(sparseMap, SparseMapSize);
|
|
||||||
}
|
|
||||||
|
|
||||||
MM_MEMBER(TranslatedAddressRange)::TranslateRange(VaType virt, VaType size, std::function<void(span<u8>)> cpuAccessCallback) {
|
|
||||||
TRACE_EVENT("containers", "FlatMemoryManager::TranslateRange");
|
TRACE_EVENT("containers", "FlatMemoryManager::TranslateRange");
|
||||||
|
|
||||||
std::scoped_lock lock(this->blockMutex);
|
TranslatedAddressRange ranges;
|
||||||
|
|
||||||
auto successor{std::upper_bound(this->blocks.begin(), this->blocks.end(), virt, [] (auto virt, const auto &block) {
|
auto successor{std::upper_bound(this->blocks.begin(), this->blocks.end(), virt, [] (auto virt, const auto &block) {
|
||||||
return virt < block.virt;
|
return virt < block.virt;
|
||||||
@ -247,7 +238,6 @@ namespace skyline {
|
|||||||
u8 *blockPhys{predecessor->phys + (virt - predecessor->virt)};
|
u8 *blockPhys{predecessor->phys + (virt - predecessor->virt)};
|
||||||
VaType blockSize{std::min(successor->virt - virt, size)};
|
VaType blockSize{std::min(successor->virt - virt, size)};
|
||||||
|
|
||||||
TranslatedAddressRange ranges;
|
|
||||||
|
|
||||||
while (size) {
|
while (size) {
|
||||||
// Return a zeroed out map to emulate sparse mappings
|
// Return a zeroed out map to emulate sparse mappings
|
||||||
@ -276,6 +266,16 @@ namespace skyline {
|
|||||||
return ranges;
|
return ranges;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MM_MEMBER()::FlatMemoryManager() {
|
||||||
|
sparseMap = static_cast<u8 *>(mmap(0, SparseMapSize, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0));
|
||||||
|
if (!sparseMap)
|
||||||
|
throw exception("Failed to mmap sparse map!");
|
||||||
|
}
|
||||||
|
|
||||||
|
MM_MEMBER()::~FlatMemoryManager() {
|
||||||
|
munmap(sparseMap, SparseMapSize);
|
||||||
|
}
|
||||||
|
|
||||||
MM_MEMBER(void)::Read(u8 *destination, VaType virt, VaType size, std::function<void(span<u8>)> cpuAccessCallback) {
|
MM_MEMBER(void)::Read(u8 *destination, VaType virt, VaType size, std::function<void(span<u8>)> cpuAccessCallback) {
|
||||||
TRACE_EVENT("containers", "FlatMemoryManager::Read");
|
TRACE_EVENT("containers", "FlatMemoryManager::Read");
|
||||||
|
|
||||||
@ -491,10 +491,12 @@ namespace skyline {
|
|||||||
}
|
}
|
||||||
|
|
||||||
ALLOC_MEMBER(void)::AllocateFixed(VaType virt, VaType size) {
|
ALLOC_MEMBER(void)::AllocateFixed(VaType virt, VaType size) {
|
||||||
this->Map(virt, true, size);
|
std::scoped_lock lock(this->blockMutex);
|
||||||
|
this->MapLocked(virt, true, size, {});
|
||||||
}
|
}
|
||||||
|
|
||||||
ALLOC_MEMBER(void)::Free(VaType virt, VaType size) {
|
ALLOC_MEMBER(void)::Free(VaType virt, VaType size) {
|
||||||
this->Unmap(virt, size);
|
std::scoped_lock lock(this->blockMutex);
|
||||||
|
this->UnmapLocked(virt, size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -6,5 +6,5 @@
|
|||||||
|
|
||||||
namespace skyline {
|
namespace skyline {
|
||||||
template class FlatAddressSpaceMap<u64, 0, u8 *, nullptr, true, soc::gm20b::GmmuAddressSpaceBits>;
|
template class FlatAddressSpaceMap<u64, 0, u8 *, nullptr, true, soc::gm20b::GmmuAddressSpaceBits>;
|
||||||
template class FlatMemoryManager<u64, 0, soc::gm20b::GmmuAddressSpaceBits>;
|
template class FlatMemoryManager<u64, 0, soc::gm20b::GmmuAddressSpaceBits, soc::gm20b::GmmuSmallPageSizeBits, soc::gm20b::GmmuMinBigPageSizeBits>;
|
||||||
}
|
}
|
||||||
|
@ -3,17 +3,23 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <bit>
|
||||||
#include <common/address_space.h>
|
#include <common/address_space.h>
|
||||||
|
|
||||||
namespace skyline::soc::gm20b {
|
namespace skyline::soc::gm20b {
|
||||||
static constexpr u8 GmmuAddressSpaceBits{40}; //!< The size of the GMMU AS in bits
|
static constexpr u8 GmmuAddressSpaceBits{40}; //!< The size of the GMMU AS in bits
|
||||||
|
static constexpr size_t GmmuSmallPageSize{0x1000}; // 4KiB
|
||||||
|
static constexpr size_t GmmuSmallPageSizeBits{std::countr_zero(GmmuSmallPageSize)};
|
||||||
|
static constexpr size_t GmmuMinBigPageSize{0x20000}; // 128KiB
|
||||||
|
static constexpr size_t GmmuMinBigPageSizeBits{std::countr_zero(GmmuMinBigPageSize)};
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief The GMMU (Graphics Memory Management Unit) class handles mapping between a Maxwell GPU virtual address space and an application's address space and is meant to roughly emulate the GMMU on the X1
|
* @brief The GMMU (Graphics Memory Management Unit) class handles mapping between a Maxwell GPU virtual address space and an application's address space and is meant to roughly emulate the GMMU on the X1
|
||||||
* @note This is not accurate to the X1 as it would have an SMMU between the GMMU and physical memory but we don't need to emulate this abstraction
|
* @note This is not accurate to the X1 as it would have an SMMU between the GMMU and physical memory but we don't need to emulate this abstraction
|
||||||
* @note The GMMU is implemented entirely as a template specialization over FlatMemoryManager
|
* @note The GMMU is implemented entirely as a template specialization over FlatMemoryManager
|
||||||
*/
|
*/
|
||||||
using GMMU = FlatMemoryManager<u64, 0, GmmuAddressSpaceBits>;
|
using GMMU = FlatMemoryManager<u64, 0, GmmuAddressSpaceBits, GmmuSmallPageSizeBits, GmmuMinBigPageSizeBits>;
|
||||||
|
|
||||||
struct AddressSpaceContext {
|
struct AddressSpaceContext {
|
||||||
GMMU gmmu;
|
GMMU gmmu;
|
||||||
|
@ -6,5 +6,5 @@
|
|||||||
|
|
||||||
namespace skyline {
|
namespace skyline {
|
||||||
template class FlatAddressSpaceMap<u32, 0, u8 *, nullptr, true, soc::SmmuAddressSpaceBits>;
|
template class FlatAddressSpaceMap<u32, 0, u8 *, nullptr, true, soc::SmmuAddressSpaceBits>;
|
||||||
template class FlatMemoryManager<u32, 0, soc::SmmuAddressSpaceBits>;
|
template class FlatMemoryManager<u32, 0, soc::SmmuAddressSpaceBits, soc::SmmuPageSizeBits, soc::SmmuL2PageSizeBits>;
|
||||||
}
|
}
|
||||||
|
@ -7,10 +7,16 @@
|
|||||||
|
|
||||||
namespace skyline::soc {
|
namespace skyline::soc {
|
||||||
static constexpr u8 SmmuAddressSpaceBits{32}; //!< The size of the SMMU AS in bits
|
static constexpr u8 SmmuAddressSpaceBits{32}; //!< The size of the SMMU AS in bits
|
||||||
|
constexpr size_t SmmuPageSize{0x1000}; // 4KiB
|
||||||
|
constexpr size_t SmmuPageSizeBits{std::countr_zero(SmmuPageSize)};
|
||||||
|
constexpr size_t SmmuL2PageSize{0x20000}; // 128KiB - not actually a thing in HW but needed for segment table
|
||||||
|
constexpr size_t SmmuL2PageSizeBits{std::countr_zero(SmmuL2PageSize)};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief The SMMU (System Memory Management Unit) class handles mapping between the host1x peripheral virtual address space and an application's address space
|
* @brief The SMMU (System Memory Management Unit) class handles mapping between the host1x peripheral virtual address space and an application's address space
|
||||||
* @note The SMMU is implemented entirely as a template specialization over FlatMemoryManager
|
* @note The SMMU is implemented entirely as a template specialization over FlatMemoryManager
|
||||||
*/
|
*/
|
||||||
using SMMU = FlatMemoryManager<u32, 0, SmmuAddressSpaceBits>;
|
using SMMU = FlatMemoryManager<u32, 0, SmmuAddressSpaceBits, SmmuPageSizeBits, SmmuL2PageSizeBits>;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user