Transition to std::unordered_set for buffer view tracking

Has the same guarantees of pointer stabilty while also being significantly faster in cases where a buffer has thousands of views. This is the case in RE4 and this change leads to an almost 1000% performance improvement in that game.
This commit is contained in:
Billy Laws 2022-06-09 23:18:33 +01:00
parent b75a06af1b
commit 22039df301
3 changed files with 38 additions and 17 deletions

View File

@ -259,12 +259,9 @@ namespace skyline::gpu {
}
BufferView Buffer::GetView(vk::DeviceSize offset, vk::DeviceSize size, vk::Format format) {
for (auto &view : views)
if (view.offset == offset && view.size == size && view.format == format)
return BufferView{shared_from_this(), &view};
views.emplace_back(offset, size, format);
return BufferView{shared_from_this(), &views.back()};
// Will return an iterator to the inserted view or the already-existing view if the same view is already in the set
auto it{views.emplace(offset, size, format).first};
return BufferView{shared_from_this(), &(*it)};
}
vk::DeviceSize Buffer::AcquireMegaBuffer(MegaBuffer& megaBuffer) {
@ -295,7 +292,7 @@ namespace skyline::gpu {
Buffer::BufferViewStorage::BufferViewStorage(vk::DeviceSize offset, vk::DeviceSize size, vk::Format format) : offset(offset), size(size), format(format) {}
Buffer::BufferDelegate::BufferDelegate(std::shared_ptr<Buffer> pBuffer, Buffer::BufferViewStorage *view) : buffer(std::move(pBuffer)), view(view) {
Buffer::BufferDelegate::BufferDelegate(std::shared_ptr<Buffer> pBuffer, const Buffer::BufferViewStorage *view) : buffer(std::move(pBuffer)), view(view) {
iterator = buffer->delegates.emplace(buffer->delegates.end(), this);
}
@ -339,7 +336,7 @@ namespace skyline::gpu {
}
}
BufferView::BufferView(std::shared_ptr<Buffer> buffer, Buffer::BufferViewStorage *view) : bufferDelegate(std::make_shared<Buffer::BufferDelegate>(std::move(buffer), view)) {}
BufferView::BufferView(std::shared_ptr<Buffer> buffer, const Buffer::BufferViewStorage *view) : bufferDelegate(std::make_shared<Buffer::BufferDelegate>(std::move(buffer), view)) {}
void BufferView::AttachCycle(const std::shared_ptr<FenceCycle> &cycle) {
auto buffer{bufferDelegate->buffer.get()};

View File

@ -3,6 +3,8 @@
#pragma once
#include <unordered_set>
#include <boost/functional/hash.hpp>
#include <nce.h>
#include "memory_manager.h"
@ -53,10 +55,26 @@ namespace skyline::gpu {
vk::Format format;
BufferViewStorage(vk::DeviceSize offset, vk::DeviceSize size, vk::Format format);
auto operator<=>(const BufferViewStorage &) const = default;
};
private:
std::list<BufferViewStorage> views; //!< BufferViewStorage(s) that are backed by this Buffer, used for storage and repointing to a new Buffer on deletion
/**
* @brief Hash function for BufferViewStorage to be used in the views set
*/
struct BufferViewStorageHash {
size_t operator()(const BufferViewStorage &entry) const noexcept {
size_t seed{};
boost::hash_combine(seed, entry.offset);
boost::hash_combine(seed, entry.size);
boost::hash_combine(seed, entry.format);
return seed;
}
};
std::unordered_set<BufferViewStorage, BufferViewStorageHash> views; //!< BufferViewStorage(s) that are backed by this Buffer, used for storage and repointing to a new Buffer on deletion
public:
/**
@ -65,11 +83,11 @@ namespace skyline::gpu {
*/
struct BufferDelegate : public FenceCycleDependency {
std::shared_ptr<Buffer> buffer;
Buffer::BufferViewStorage *view;
const Buffer::BufferViewStorage *view;
std::function<void(const BufferViewStorage &, const std::shared_ptr<Buffer> &)> usageCallback;
std::list<BufferDelegate *>::iterator iterator;
BufferDelegate(std::shared_ptr<Buffer> buffer, Buffer::BufferViewStorage *view);
BufferDelegate(std::shared_ptr<Buffer> buffer, const Buffer::BufferViewStorage *view);
~BufferDelegate();
@ -262,7 +280,7 @@ namespace skyline::gpu {
struct BufferView {
std::shared_ptr<Buffer::BufferDelegate> bufferDelegate;
BufferView(std::shared_ptr<Buffer> buffer, Buffer::BufferViewStorage *view);
BufferView(std::shared_ptr<Buffer> buffer, const Buffer::BufferViewStorage *view);
constexpr BufferView(nullptr_t = nullptr) : bufferDelegate(nullptr) {}

View File

@ -55,13 +55,19 @@ namespace skyline::gpu {
buffers.erase(std::find(buffers.begin(), buffers.end(), overlap));
// Transfer all views from the overlapping buffer to the new buffer with the new buffer and updated offset
// Transfer all views from the overlapping buffer to the new buffer with the new buffer and updated offset, ensuring pointer stability
vk::DeviceSize overlapOffset{static_cast<vk::DeviceSize>(overlap->guest->begin() - newBuffer->guest->begin())};
if (overlapOffset != 0)
for (auto &view : overlap->views)
view.offset += overlapOffset;
if (overlapOffset != 0) {
// This is a slight hack as we really shouldn't be changing the underlying set elements without a rehash but without writing our own set impl this is the best we can do
for (auto it{overlap->views.begin()}; it != overlap->views.end(); it++)
const_cast<Buffer::BufferViewStorage *>(&*it)->offset += overlapOffset;
newBuffer->views.splice(newBuffer->views.end(), overlap->views);
// All current hashes are invalidated by above loop so rehash the container
overlap->views.rehash(0);
}
// Merge the view sets, this will keep pointer stability hence avoiding any reallocation
newBuffer->views.merge(overlap->views);
// Transfer all delegates references from the overlapping buffer to the new buffer
for (auto &delegate : overlap->delegates) {