From 5f8619f7915b9b43e0827d2b4c374e13bf927145 Mon Sep 17 00:00:00 2001 From: PixelyIon Date: Mon, 1 Aug 2022 21:50:09 +0530 Subject: [PATCH] Optimize Buffer Lookups using Range Tables Buffer lookups are a fairly expensive operation that we currently spend `O(log n)` on the simplest and most frequent case of which is a direct match, this is a very frequent operation where that may be insufficient. This commit optimizes that case to `O(1)` by utilizing a `RangeTable` at the cost of slightly higher insertion/deletion costs for setting ranges of values but these are minimal in frequency compared to lookups. --- .../main/cpp/skyline/gpu/buffer_manager.cpp | 28 +++++++++++++------ app/src/main/cpp/skyline/gpu/buffer_manager.h | 10 ++++++- 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/app/src/main/cpp/skyline/gpu/buffer_manager.cpp b/app/src/main/cpp/skyline/gpu/buffer_manager.cpp index 8c6d5e54..fc0bfd74 100644 --- a/app/src/main/cpp/skyline/gpu/buffer_manager.cpp +++ b/app/src/main/cpp/skyline/gpu/buffer_manager.cpp @@ -2,7 +2,6 @@ // Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/) #include - #include "buffer_manager.h" namespace skyline::gpu { @@ -36,8 +35,17 @@ namespace skyline::gpu { BufferManager::LockedBuffers BufferManager::Lookup(span range, ContextTag tag) { LockedBuffers overlaps; - auto entryIt{std::lower_bound(buffers.begin(), buffers.end(), range.end().base(), BufferLessThan)}; - while (entryIt != buffers.begin() && (*--entryIt)->guest->begin() <= range.end()) + + // Try to do a fast lookup in the page table + auto lookupBuffer{bufferTable[range.begin().base()]}; + if (lookupBuffer != nullptr && lookupBuffer->guest->contains(range)) { + overlaps.emplace_back(lookupBuffer->shared_from_this(), tag); + return overlaps; + } + + // If we cannot find the buffer quickly, do a binary search to find all overlapping buffers + auto entryIt{std::lower_bound(bufferMappings.begin(), bufferMappings.end(), range.end().base(), BufferLessThan)}; + while (entryIt != bufferMappings.begin() && (*--entryIt)->guest->begin() <= range.end()) if ((*entryIt)->guest->end() > range.begin()) overlaps.emplace_back(*entryIt, tag); @@ -45,12 +53,14 @@ namespace skyline::gpu { } void BufferManager::InsertBuffer(std::shared_ptr buffer) { - auto bufferEnd{buffer->guest->end().base()}; - buffers.insert(std::lower_bound(buffers.begin(), buffers.end(), bufferEnd, BufferLessThan), std::move(buffer)); + auto bufferStart{buffer->guest->begin().base()}, bufferEnd{buffer->guest->end().base()}; + bufferTable.Set(bufferStart, bufferEnd, buffer.get()); + bufferMappings.insert(std::lower_bound(bufferMappings.begin(), bufferMappings.end(), bufferEnd, BufferLessThan), std::move(buffer)); } void BufferManager::DeleteBuffer(const std::shared_ptr &buffer) { - buffers.erase(std::find(buffers.begin(), buffers.end(), buffer)); + bufferTable.Set(buffer->guest->begin().base(), buffer->guest->end().base(), nullptr); + bufferMappings.erase(std::find(bufferMappings.begin(), bufferMappings.end(), buffer)); } BufferManager::LockedBuffer BufferManager::CoalesceBuffers(span range, const LockedBuffers &srcBuffers, ContextTag tag) { @@ -144,8 +154,6 @@ namespace skyline::gpu { } newBuffer->delegates.splice(newBuffer->delegates.end(), srcBuffer->delegates); - - srcBuffer->Invalidate(); // Invalidate the overlapping buffer so it can't be synced in the future } return newBuffer; @@ -189,8 +197,10 @@ namespace skyline::gpu { } // Delete older overlapping buffers and insert the new buffer into the map - for (auto &overlap : overlaps) + for (auto &overlap : overlaps) { DeleteBuffer(*overlap); + overlap->Invalidate(); // Invalidate the overlapping buffer so it can't be synced in the future + } InsertBuffer(*buffer); return buffer->GetView(static_cast(guestMapping.begin() - buffer->guest->begin()) + offset, size); diff --git a/app/src/main/cpp/skyline/gpu/buffer_manager.h b/app/src/main/cpp/skyline/gpu/buffer_manager.h index dda43883..c659d072 100644 --- a/app/src/main/cpp/skyline/gpu/buffer_manager.h +++ b/app/src/main/cpp/skyline/gpu/buffer_manager.h @@ -3,6 +3,7 @@ #pragma once +#include #include "buffer.h" namespace skyline::gpu { @@ -15,7 +16,12 @@ namespace skyline::gpu { private: GPU &gpu; std::mutex mutex; //!< Synchronizes access to the buffer mappings - std::vector> buffers; //!< A sorted vector of all buffer mappings + std::vector> bufferMappings; //!< A sorted vector of all buffer mappings + + static constexpr size_t AddressSpaceSize{1ULL << 39}; //!< The size of the guest CPU AS in bytes + static constexpr size_t PageSizeBits{12}; //!< The size of a single page of the guest CPU AS as a power of two (4 KiB == 1 << 12) + static constexpr size_t L2EntryGranularity{19}; //!< The amount of AS (in bytes) a single L2 PTE covers (512 KiB == 1 << 19) + RangeTable bufferTable; //!< A page table of all buffer mappings for O(1) lookups on full matches std::mutex megaBufferMutex; //!< Synchronizes access to the allocated megabuffers @@ -57,11 +63,13 @@ namespace skyline::gpu { /** * @brief Inserts the supplied buffer into the map based on its guest address + * @note The supplied buffer **must** have a valid guest mapping */ void InsertBuffer(std::shared_ptr buffer); /** * @brief Deletes the supplied buffer from the map, the lifetime of the buffer will no longer be extended by the map + * @note The supplied buffer **must** have a valid guest mapping */ void DeleteBuffer(const std::shared_ptr &buffer);