Optimize Buffer Lookups using Range Tables

Buffer lookups are a fairly expensive operation that we currently spend `O(log n)` on the simplest and most frequent case of which is a direct match, this is a very frequent operation where that may be insufficient. This commit optimizes that case to `O(1)` by utilizing a `RangeTable` at the cost of slightly higher insertion/deletion costs for setting ranges of values but these are minimal in frequency compared to lookups.
This commit is contained in:
PixelyIon 2022-08-01 21:50:09 +05:30
parent 578ae86cca
commit 5f8619f791
No known key found for this signature in database
GPG Key ID: 11BC6C3201BC2C05
2 changed files with 28 additions and 10 deletions

View File

@ -2,7 +2,6 @@
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <gpu.h>
#include "buffer_manager.h"
namespace skyline::gpu {
@ -36,8 +35,17 @@ namespace skyline::gpu {
BufferManager::LockedBuffers BufferManager::Lookup(span<u8> range, ContextTag tag) {
LockedBuffers overlaps;
auto entryIt{std::lower_bound(buffers.begin(), buffers.end(), range.end().base(), BufferLessThan)};
while (entryIt != buffers.begin() && (*--entryIt)->guest->begin() <= range.end())
// Try to do a fast lookup in the page table
auto lookupBuffer{bufferTable[range.begin().base()]};
if (lookupBuffer != nullptr && lookupBuffer->guest->contains(range)) {
overlaps.emplace_back(lookupBuffer->shared_from_this(), tag);
return overlaps;
}
// If we cannot find the buffer quickly, do a binary search to find all overlapping buffers
auto entryIt{std::lower_bound(bufferMappings.begin(), bufferMappings.end(), range.end().base(), BufferLessThan)};
while (entryIt != bufferMappings.begin() && (*--entryIt)->guest->begin() <= range.end())
if ((*entryIt)->guest->end() > range.begin())
overlaps.emplace_back(*entryIt, tag);
@ -45,12 +53,14 @@ namespace skyline::gpu {
}
void BufferManager::InsertBuffer(std::shared_ptr<Buffer> buffer) {
auto bufferEnd{buffer->guest->end().base()};
buffers.insert(std::lower_bound(buffers.begin(), buffers.end(), bufferEnd, BufferLessThan), std::move(buffer));
auto bufferStart{buffer->guest->begin().base()}, bufferEnd{buffer->guest->end().base()};
bufferTable.Set(bufferStart, bufferEnd, buffer.get());
bufferMappings.insert(std::lower_bound(bufferMappings.begin(), bufferMappings.end(), bufferEnd, BufferLessThan), std::move(buffer));
}
void BufferManager::DeleteBuffer(const std::shared_ptr<Buffer> &buffer) {
buffers.erase(std::find(buffers.begin(), buffers.end(), buffer));
bufferTable.Set(buffer->guest->begin().base(), buffer->guest->end().base(), nullptr);
bufferMappings.erase(std::find(bufferMappings.begin(), bufferMappings.end(), buffer));
}
BufferManager::LockedBuffer BufferManager::CoalesceBuffers(span<u8> range, const LockedBuffers &srcBuffers, ContextTag tag) {
@ -144,8 +154,6 @@ namespace skyline::gpu {
}
newBuffer->delegates.splice(newBuffer->delegates.end(), srcBuffer->delegates);
srcBuffer->Invalidate(); // Invalidate the overlapping buffer so it can't be synced in the future
}
return newBuffer;
@ -189,8 +197,10 @@ namespace skyline::gpu {
}
// Delete older overlapping buffers and insert the new buffer into the map
for (auto &overlap : overlaps)
for (auto &overlap : overlaps) {
DeleteBuffer(*overlap);
overlap->Invalidate(); // Invalidate the overlapping buffer so it can't be synced in the future
}
InsertBuffer(*buffer);
return buffer->GetView(static_cast<vk::DeviceSize>(guestMapping.begin() - buffer->guest->begin()) + offset, size);

View File

@ -3,6 +3,7 @@
#pragma once
#include <common/range_table.h>
#include "buffer.h"
namespace skyline::gpu {
@ -15,7 +16,12 @@ namespace skyline::gpu {
private:
GPU &gpu;
std::mutex mutex; //!< Synchronizes access to the buffer mappings
std::vector<std::shared_ptr<Buffer>> buffers; //!< A sorted vector of all buffer mappings
std::vector<std::shared_ptr<Buffer>> bufferMappings; //!< A sorted vector of all buffer mappings
static constexpr size_t AddressSpaceSize{1ULL << 39}; //!< The size of the guest CPU AS in bytes
static constexpr size_t PageSizeBits{12}; //!< The size of a single page of the guest CPU AS as a power of two (4 KiB == 1 << 12)
static constexpr size_t L2EntryGranularity{19}; //!< The amount of AS (in bytes) a single L2 PTE covers (512 KiB == 1 << 19)
RangeTable<Buffer*, AddressSpaceSize, PageSizeBits, L2EntryGranularity> bufferTable; //!< A page table of all buffer mappings for O(1) lookups on full matches
std::mutex megaBufferMutex; //!< Synchronizes access to the allocated megabuffers
@ -57,11 +63,13 @@ namespace skyline::gpu {
/**
* @brief Inserts the supplied buffer into the map based on its guest address
* @note The supplied buffer **must** have a valid guest mapping
*/
void InsertBuffer(std::shared_ptr<Buffer> buffer);
/**
* @brief Deletes the supplied buffer from the map, the lifetime of the buffer will no longer be extended by the map
* @note The supplied buffer **must** have a valid guest mapping
*/
void DeleteBuffer(const std::shared_ptr<Buffer> &buffer);