mirror of
https://github.com/skyline-emu/skyline.git
synced 2024-11-27 05:04:17 +01:00
Optimize Buffer Lookups using Range Tables
Buffer lookups are a fairly expensive operation that we currently spend `O(log n)` on the simplest and most frequent case of which is a direct match, this is a very frequent operation where that may be insufficient. This commit optimizes that case to `O(1)` by utilizing a `RangeTable` at the cost of slightly higher insertion/deletion costs for setting ranges of values but these are minimal in frequency compared to lookups.
This commit is contained in:
parent
578ae86cca
commit
5f8619f791
@ -2,7 +2,6 @@
|
||||
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||
|
||||
#include <gpu.h>
|
||||
|
||||
#include "buffer_manager.h"
|
||||
|
||||
namespace skyline::gpu {
|
||||
@ -36,8 +35,17 @@ namespace skyline::gpu {
|
||||
|
||||
BufferManager::LockedBuffers BufferManager::Lookup(span<u8> range, ContextTag tag) {
|
||||
LockedBuffers overlaps;
|
||||
auto entryIt{std::lower_bound(buffers.begin(), buffers.end(), range.end().base(), BufferLessThan)};
|
||||
while (entryIt != buffers.begin() && (*--entryIt)->guest->begin() <= range.end())
|
||||
|
||||
// Try to do a fast lookup in the page table
|
||||
auto lookupBuffer{bufferTable[range.begin().base()]};
|
||||
if (lookupBuffer != nullptr && lookupBuffer->guest->contains(range)) {
|
||||
overlaps.emplace_back(lookupBuffer->shared_from_this(), tag);
|
||||
return overlaps;
|
||||
}
|
||||
|
||||
// If we cannot find the buffer quickly, do a binary search to find all overlapping buffers
|
||||
auto entryIt{std::lower_bound(bufferMappings.begin(), bufferMappings.end(), range.end().base(), BufferLessThan)};
|
||||
while (entryIt != bufferMappings.begin() && (*--entryIt)->guest->begin() <= range.end())
|
||||
if ((*entryIt)->guest->end() > range.begin())
|
||||
overlaps.emplace_back(*entryIt, tag);
|
||||
|
||||
@ -45,12 +53,14 @@ namespace skyline::gpu {
|
||||
}
|
||||
|
||||
void BufferManager::InsertBuffer(std::shared_ptr<Buffer> buffer) {
|
||||
auto bufferEnd{buffer->guest->end().base()};
|
||||
buffers.insert(std::lower_bound(buffers.begin(), buffers.end(), bufferEnd, BufferLessThan), std::move(buffer));
|
||||
auto bufferStart{buffer->guest->begin().base()}, bufferEnd{buffer->guest->end().base()};
|
||||
bufferTable.Set(bufferStart, bufferEnd, buffer.get());
|
||||
bufferMappings.insert(std::lower_bound(bufferMappings.begin(), bufferMappings.end(), bufferEnd, BufferLessThan), std::move(buffer));
|
||||
}
|
||||
|
||||
void BufferManager::DeleteBuffer(const std::shared_ptr<Buffer> &buffer) {
|
||||
buffers.erase(std::find(buffers.begin(), buffers.end(), buffer));
|
||||
bufferTable.Set(buffer->guest->begin().base(), buffer->guest->end().base(), nullptr);
|
||||
bufferMappings.erase(std::find(bufferMappings.begin(), bufferMappings.end(), buffer));
|
||||
}
|
||||
|
||||
BufferManager::LockedBuffer BufferManager::CoalesceBuffers(span<u8> range, const LockedBuffers &srcBuffers, ContextTag tag) {
|
||||
@ -144,8 +154,6 @@ namespace skyline::gpu {
|
||||
}
|
||||
|
||||
newBuffer->delegates.splice(newBuffer->delegates.end(), srcBuffer->delegates);
|
||||
|
||||
srcBuffer->Invalidate(); // Invalidate the overlapping buffer so it can't be synced in the future
|
||||
}
|
||||
|
||||
return newBuffer;
|
||||
@ -189,8 +197,10 @@ namespace skyline::gpu {
|
||||
}
|
||||
|
||||
// Delete older overlapping buffers and insert the new buffer into the map
|
||||
for (auto &overlap : overlaps)
|
||||
for (auto &overlap : overlaps) {
|
||||
DeleteBuffer(*overlap);
|
||||
overlap->Invalidate(); // Invalidate the overlapping buffer so it can't be synced in the future
|
||||
}
|
||||
InsertBuffer(*buffer);
|
||||
|
||||
return buffer->GetView(static_cast<vk::DeviceSize>(guestMapping.begin() - buffer->guest->begin()) + offset, size);
|
||||
|
@ -3,6 +3,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <common/range_table.h>
|
||||
#include "buffer.h"
|
||||
|
||||
namespace skyline::gpu {
|
||||
@ -15,7 +16,12 @@ namespace skyline::gpu {
|
||||
private:
|
||||
GPU &gpu;
|
||||
std::mutex mutex; //!< Synchronizes access to the buffer mappings
|
||||
std::vector<std::shared_ptr<Buffer>> buffers; //!< A sorted vector of all buffer mappings
|
||||
std::vector<std::shared_ptr<Buffer>> bufferMappings; //!< A sorted vector of all buffer mappings
|
||||
|
||||
static constexpr size_t AddressSpaceSize{1ULL << 39}; //!< The size of the guest CPU AS in bytes
|
||||
static constexpr size_t PageSizeBits{12}; //!< The size of a single page of the guest CPU AS as a power of two (4 KiB == 1 << 12)
|
||||
static constexpr size_t L2EntryGranularity{19}; //!< The amount of AS (in bytes) a single L2 PTE covers (512 KiB == 1 << 19)
|
||||
RangeTable<Buffer*, AddressSpaceSize, PageSizeBits, L2EntryGranularity> bufferTable; //!< A page table of all buffer mappings for O(1) lookups on full matches
|
||||
|
||||
std::mutex megaBufferMutex; //!< Synchronizes access to the allocated megabuffers
|
||||
|
||||
@ -57,11 +63,13 @@ namespace skyline::gpu {
|
||||
|
||||
/**
|
||||
* @brief Inserts the supplied buffer into the map based on its guest address
|
||||
* @note The supplied buffer **must** have a valid guest mapping
|
||||
*/
|
||||
void InsertBuffer(std::shared_ptr<Buffer> buffer);
|
||||
|
||||
/**
|
||||
* @brief Deletes the supplied buffer from the map, the lifetime of the buffer will no longer be extended by the map
|
||||
* @note The supplied buffer **must** have a valid guest mapping
|
||||
*/
|
||||
void DeleteBuffer(const std::shared_ptr<Buffer> &buffer);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user