mirror of
https://github.com/skyline-emu/skyline.git
synced 2024-11-27 09:04:14 +01:00
Optimize Buffer Lookups using Range Tables
Buffer lookups are a fairly expensive operation that we currently spend `O(log n)` on the simplest and most frequent case of which is a direct match, this is a very frequent operation where that may be insufficient. This commit optimizes that case to `O(1)` by utilizing a `RangeTable` at the cost of slightly higher insertion/deletion costs for setting ranges of values but these are minimal in frequency compared to lookups.
This commit is contained in:
parent
578ae86cca
commit
5f8619f791
@ -2,7 +2,6 @@
|
|||||||
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||||
|
|
||||||
#include <gpu.h>
|
#include <gpu.h>
|
||||||
|
|
||||||
#include "buffer_manager.h"
|
#include "buffer_manager.h"
|
||||||
|
|
||||||
namespace skyline::gpu {
|
namespace skyline::gpu {
|
||||||
@ -36,8 +35,17 @@ namespace skyline::gpu {
|
|||||||
|
|
||||||
BufferManager::LockedBuffers BufferManager::Lookup(span<u8> range, ContextTag tag) {
|
BufferManager::LockedBuffers BufferManager::Lookup(span<u8> range, ContextTag tag) {
|
||||||
LockedBuffers overlaps;
|
LockedBuffers overlaps;
|
||||||
auto entryIt{std::lower_bound(buffers.begin(), buffers.end(), range.end().base(), BufferLessThan)};
|
|
||||||
while (entryIt != buffers.begin() && (*--entryIt)->guest->begin() <= range.end())
|
// Try to do a fast lookup in the page table
|
||||||
|
auto lookupBuffer{bufferTable[range.begin().base()]};
|
||||||
|
if (lookupBuffer != nullptr && lookupBuffer->guest->contains(range)) {
|
||||||
|
overlaps.emplace_back(lookupBuffer->shared_from_this(), tag);
|
||||||
|
return overlaps;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we cannot find the buffer quickly, do a binary search to find all overlapping buffers
|
||||||
|
auto entryIt{std::lower_bound(bufferMappings.begin(), bufferMappings.end(), range.end().base(), BufferLessThan)};
|
||||||
|
while (entryIt != bufferMappings.begin() && (*--entryIt)->guest->begin() <= range.end())
|
||||||
if ((*entryIt)->guest->end() > range.begin())
|
if ((*entryIt)->guest->end() > range.begin())
|
||||||
overlaps.emplace_back(*entryIt, tag);
|
overlaps.emplace_back(*entryIt, tag);
|
||||||
|
|
||||||
@ -45,12 +53,14 @@ namespace skyline::gpu {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void BufferManager::InsertBuffer(std::shared_ptr<Buffer> buffer) {
|
void BufferManager::InsertBuffer(std::shared_ptr<Buffer> buffer) {
|
||||||
auto bufferEnd{buffer->guest->end().base()};
|
auto bufferStart{buffer->guest->begin().base()}, bufferEnd{buffer->guest->end().base()};
|
||||||
buffers.insert(std::lower_bound(buffers.begin(), buffers.end(), bufferEnd, BufferLessThan), std::move(buffer));
|
bufferTable.Set(bufferStart, bufferEnd, buffer.get());
|
||||||
|
bufferMappings.insert(std::lower_bound(bufferMappings.begin(), bufferMappings.end(), bufferEnd, BufferLessThan), std::move(buffer));
|
||||||
}
|
}
|
||||||
|
|
||||||
void BufferManager::DeleteBuffer(const std::shared_ptr<Buffer> &buffer) {
|
void BufferManager::DeleteBuffer(const std::shared_ptr<Buffer> &buffer) {
|
||||||
buffers.erase(std::find(buffers.begin(), buffers.end(), buffer));
|
bufferTable.Set(buffer->guest->begin().base(), buffer->guest->end().base(), nullptr);
|
||||||
|
bufferMappings.erase(std::find(bufferMappings.begin(), bufferMappings.end(), buffer));
|
||||||
}
|
}
|
||||||
|
|
||||||
BufferManager::LockedBuffer BufferManager::CoalesceBuffers(span<u8> range, const LockedBuffers &srcBuffers, ContextTag tag) {
|
BufferManager::LockedBuffer BufferManager::CoalesceBuffers(span<u8> range, const LockedBuffers &srcBuffers, ContextTag tag) {
|
||||||
@ -144,8 +154,6 @@ namespace skyline::gpu {
|
|||||||
}
|
}
|
||||||
|
|
||||||
newBuffer->delegates.splice(newBuffer->delegates.end(), srcBuffer->delegates);
|
newBuffer->delegates.splice(newBuffer->delegates.end(), srcBuffer->delegates);
|
||||||
|
|
||||||
srcBuffer->Invalidate(); // Invalidate the overlapping buffer so it can't be synced in the future
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return newBuffer;
|
return newBuffer;
|
||||||
@ -189,8 +197,10 @@ namespace skyline::gpu {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Delete older overlapping buffers and insert the new buffer into the map
|
// Delete older overlapping buffers and insert the new buffer into the map
|
||||||
for (auto &overlap : overlaps)
|
for (auto &overlap : overlaps) {
|
||||||
DeleteBuffer(*overlap);
|
DeleteBuffer(*overlap);
|
||||||
|
overlap->Invalidate(); // Invalidate the overlapping buffer so it can't be synced in the future
|
||||||
|
}
|
||||||
InsertBuffer(*buffer);
|
InsertBuffer(*buffer);
|
||||||
|
|
||||||
return buffer->GetView(static_cast<vk::DeviceSize>(guestMapping.begin() - buffer->guest->begin()) + offset, size);
|
return buffer->GetView(static_cast<vk::DeviceSize>(guestMapping.begin() - buffer->guest->begin()) + offset, size);
|
||||||
|
@ -3,6 +3,7 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <common/range_table.h>
|
||||||
#include "buffer.h"
|
#include "buffer.h"
|
||||||
|
|
||||||
namespace skyline::gpu {
|
namespace skyline::gpu {
|
||||||
@ -15,7 +16,12 @@ namespace skyline::gpu {
|
|||||||
private:
|
private:
|
||||||
GPU &gpu;
|
GPU &gpu;
|
||||||
std::mutex mutex; //!< Synchronizes access to the buffer mappings
|
std::mutex mutex; //!< Synchronizes access to the buffer mappings
|
||||||
std::vector<std::shared_ptr<Buffer>> buffers; //!< A sorted vector of all buffer mappings
|
std::vector<std::shared_ptr<Buffer>> bufferMappings; //!< A sorted vector of all buffer mappings
|
||||||
|
|
||||||
|
static constexpr size_t AddressSpaceSize{1ULL << 39}; //!< The size of the guest CPU AS in bytes
|
||||||
|
static constexpr size_t PageSizeBits{12}; //!< The size of a single page of the guest CPU AS as a power of two (4 KiB == 1 << 12)
|
||||||
|
static constexpr size_t L2EntryGranularity{19}; //!< The amount of AS (in bytes) a single L2 PTE covers (512 KiB == 1 << 19)
|
||||||
|
RangeTable<Buffer*, AddressSpaceSize, PageSizeBits, L2EntryGranularity> bufferTable; //!< A page table of all buffer mappings for O(1) lookups on full matches
|
||||||
|
|
||||||
std::mutex megaBufferMutex; //!< Synchronizes access to the allocated megabuffers
|
std::mutex megaBufferMutex; //!< Synchronizes access to the allocated megabuffers
|
||||||
|
|
||||||
@ -57,11 +63,13 @@ namespace skyline::gpu {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Inserts the supplied buffer into the map based on its guest address
|
* @brief Inserts the supplied buffer into the map based on its guest address
|
||||||
|
* @note The supplied buffer **must** have a valid guest mapping
|
||||||
*/
|
*/
|
||||||
void InsertBuffer(std::shared_ptr<Buffer> buffer);
|
void InsertBuffer(std::shared_ptr<Buffer> buffer);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Deletes the supplied buffer from the map, the lifetime of the buffer will no longer be extended by the map
|
* @brief Deletes the supplied buffer from the map, the lifetime of the buffer will no longer be extended by the map
|
||||||
|
* @note The supplied buffer **must** have a valid guest mapping
|
||||||
*/
|
*/
|
||||||
void DeleteBuffer(const std::shared_ptr<Buffer> &buffer);
|
void DeleteBuffer(const std::shared_ptr<Buffer> &buffer);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user