Transition to std::unordered_set for buffer view tracking

Has the same guarantees of pointer stabilty while also being significantly faster in cases where a buffer has thousands of views. This is the case in RE4 and this change leads to an almost 1000% performance improvement in that game.
This commit is contained in:
Billy Laws 2022-06-09 23:18:33 +01:00
parent b75a06af1b
commit 22039df301
3 changed files with 38 additions and 17 deletions

View File

@ -259,12 +259,9 @@ namespace skyline::gpu {
} }
BufferView Buffer::GetView(vk::DeviceSize offset, vk::DeviceSize size, vk::Format format) { BufferView Buffer::GetView(vk::DeviceSize offset, vk::DeviceSize size, vk::Format format) {
for (auto &view : views) // Will return an iterator to the inserted view or the already-existing view if the same view is already in the set
if (view.offset == offset && view.size == size && view.format == format) auto it{views.emplace(offset, size, format).first};
return BufferView{shared_from_this(), &view}; return BufferView{shared_from_this(), &(*it)};
views.emplace_back(offset, size, format);
return BufferView{shared_from_this(), &views.back()};
} }
vk::DeviceSize Buffer::AcquireMegaBuffer(MegaBuffer& megaBuffer) { vk::DeviceSize Buffer::AcquireMegaBuffer(MegaBuffer& megaBuffer) {
@ -295,7 +292,7 @@ namespace skyline::gpu {
Buffer::BufferViewStorage::BufferViewStorage(vk::DeviceSize offset, vk::DeviceSize size, vk::Format format) : offset(offset), size(size), format(format) {} Buffer::BufferViewStorage::BufferViewStorage(vk::DeviceSize offset, vk::DeviceSize size, vk::Format format) : offset(offset), size(size), format(format) {}
Buffer::BufferDelegate::BufferDelegate(std::shared_ptr<Buffer> pBuffer, Buffer::BufferViewStorage *view) : buffer(std::move(pBuffer)), view(view) { Buffer::BufferDelegate::BufferDelegate(std::shared_ptr<Buffer> pBuffer, const Buffer::BufferViewStorage *view) : buffer(std::move(pBuffer)), view(view) {
iterator = buffer->delegates.emplace(buffer->delegates.end(), this); iterator = buffer->delegates.emplace(buffer->delegates.end(), this);
} }
@ -339,7 +336,7 @@ namespace skyline::gpu {
} }
} }
BufferView::BufferView(std::shared_ptr<Buffer> buffer, Buffer::BufferViewStorage *view) : bufferDelegate(std::make_shared<Buffer::BufferDelegate>(std::move(buffer), view)) {} BufferView::BufferView(std::shared_ptr<Buffer> buffer, const Buffer::BufferViewStorage *view) : bufferDelegate(std::make_shared<Buffer::BufferDelegate>(std::move(buffer), view)) {}
void BufferView::AttachCycle(const std::shared_ptr<FenceCycle> &cycle) { void BufferView::AttachCycle(const std::shared_ptr<FenceCycle> &cycle) {
auto buffer{bufferDelegate->buffer.get()}; auto buffer{bufferDelegate->buffer.get()};

View File

@ -3,6 +3,8 @@
#pragma once #pragma once
#include <unordered_set>
#include <boost/functional/hash.hpp>
#include <nce.h> #include <nce.h>
#include "memory_manager.h" #include "memory_manager.h"
@ -53,10 +55,26 @@ namespace skyline::gpu {
vk::Format format; vk::Format format;
BufferViewStorage(vk::DeviceSize offset, vk::DeviceSize size, vk::Format format); BufferViewStorage(vk::DeviceSize offset, vk::DeviceSize size, vk::Format format);
auto operator<=>(const BufferViewStorage &) const = default;
}; };
private: private:
std::list<BufferViewStorage> views; //!< BufferViewStorage(s) that are backed by this Buffer, used for storage and repointing to a new Buffer on deletion /**
* @brief Hash function for BufferViewStorage to be used in the views set
*/
struct BufferViewStorageHash {
size_t operator()(const BufferViewStorage &entry) const noexcept {
size_t seed{};
boost::hash_combine(seed, entry.offset);
boost::hash_combine(seed, entry.size);
boost::hash_combine(seed, entry.format);
return seed;
}
};
std::unordered_set<BufferViewStorage, BufferViewStorageHash> views; //!< BufferViewStorage(s) that are backed by this Buffer, used for storage and repointing to a new Buffer on deletion
public: public:
/** /**
@ -65,11 +83,11 @@ namespace skyline::gpu {
*/ */
struct BufferDelegate : public FenceCycleDependency { struct BufferDelegate : public FenceCycleDependency {
std::shared_ptr<Buffer> buffer; std::shared_ptr<Buffer> buffer;
Buffer::BufferViewStorage *view; const Buffer::BufferViewStorage *view;
std::function<void(const BufferViewStorage &, const std::shared_ptr<Buffer> &)> usageCallback; std::function<void(const BufferViewStorage &, const std::shared_ptr<Buffer> &)> usageCallback;
std::list<BufferDelegate *>::iterator iterator; std::list<BufferDelegate *>::iterator iterator;
BufferDelegate(std::shared_ptr<Buffer> buffer, Buffer::BufferViewStorage *view); BufferDelegate(std::shared_ptr<Buffer> buffer, const Buffer::BufferViewStorage *view);
~BufferDelegate(); ~BufferDelegate();
@ -262,7 +280,7 @@ namespace skyline::gpu {
struct BufferView { struct BufferView {
std::shared_ptr<Buffer::BufferDelegate> bufferDelegate; std::shared_ptr<Buffer::BufferDelegate> bufferDelegate;
BufferView(std::shared_ptr<Buffer> buffer, Buffer::BufferViewStorage *view); BufferView(std::shared_ptr<Buffer> buffer, const Buffer::BufferViewStorage *view);
constexpr BufferView(nullptr_t = nullptr) : bufferDelegate(nullptr) {} constexpr BufferView(nullptr_t = nullptr) : bufferDelegate(nullptr) {}

View File

@ -55,13 +55,19 @@ namespace skyline::gpu {
buffers.erase(std::find(buffers.begin(), buffers.end(), overlap)); buffers.erase(std::find(buffers.begin(), buffers.end(), overlap));
// Transfer all views from the overlapping buffer to the new buffer with the new buffer and updated offset // Transfer all views from the overlapping buffer to the new buffer with the new buffer and updated offset, ensuring pointer stability
vk::DeviceSize overlapOffset{static_cast<vk::DeviceSize>(overlap->guest->begin() - newBuffer->guest->begin())}; vk::DeviceSize overlapOffset{static_cast<vk::DeviceSize>(overlap->guest->begin() - newBuffer->guest->begin())};
if (overlapOffset != 0) if (overlapOffset != 0) {
for (auto &view : overlap->views) // This is a slight hack as we really shouldn't be changing the underlying set elements without a rehash but without writing our own set impl this is the best we can do
view.offset += overlapOffset; for (auto it{overlap->views.begin()}; it != overlap->views.end(); it++)
const_cast<Buffer::BufferViewStorage *>(&*it)->offset += overlapOffset;
newBuffer->views.splice(newBuffer->views.end(), overlap->views); // All current hashes are invalidated by above loop so rehash the container
overlap->views.rehash(0);
}
// Merge the view sets, this will keep pointer stability hence avoiding any reallocation
newBuffer->views.merge(overlap->views);
// Transfer all delegates references from the overlapping buffer to the new buffer // Transfer all delegates references from the overlapping buffer to the new buffer
for (auto &delegate : overlap->delegates) { for (auto &delegate : overlap->delegates) {