Rework GPU BufferView to have handle-like semantics

We wanted views to extend the lifetime of the underlying buffers and at the same time preserve all views until the destruction of the buffer to prevent recreation which might be costly in the future when we need `VkBufferView`s of the buffer but also require a centralized list of all views for recreation of the buffer. It also removes the inconsistency between `BufferView*` being returned in `GetXView` in `GraphicsContext`.
This commit is contained in:
PixelyIon 2022-03-17 19:02:06 +05:30
parent fae5332f20
commit 7a5c771f44
7 changed files with 76 additions and 60 deletions

View File

@ -171,23 +171,18 @@ namespace skyline::gpu {
std::memcpy(backing.data() + offset, data.data(), data.size());
}
std::shared_ptr<BufferView> Buffer::GetView(vk::DeviceSize offset, vk::DeviceSize range, vk::Format format) {
for (auto viewIt{views.begin()}; viewIt != views.end();) {
auto view{viewIt->lock()};
if (view && view->offset == offset && view->range == range && view->format == format)
return view;
else if (!view)
viewIt = views.erase(viewIt);
else
++viewIt;
}
Buffer::BufferViewStorage::BufferViewStorage(vk::DeviceSize offset, vk::DeviceSize range, vk::Format format) : offset(offset), range(range), format(format) {}
auto view{std::make_shared<BufferView>(shared_from_this(), offset, range, format)};
views.push_back(view);
return view;
BufferView Buffer::GetView(vk::DeviceSize offset, vk::DeviceSize range, vk::Format format) {
for (auto &view : views)
if (view.offset == offset && view.range == range && view.format == format)
return BufferView{shared_from_this(), &view};
views.emplace_back(offset, range, format);
return BufferView{shared_from_this(), &views.back()};
}
BufferView::BufferView(std::shared_ptr<Buffer> backing, vk::DeviceSize offset, vk::DeviceSize range, vk::Format format) : buffer(std::move(backing)), offset(offset), range(range), format(format) {}
BufferView::BufferView(std::shared_ptr<Buffer> buffer, Buffer::BufferViewStorage *view) : buffer(buffer), view(view) {}
void BufferView::lock() {
auto backing{std::atomic_load(&buffer)};

View File

@ -44,7 +44,18 @@ namespace skyline::gpu {
GpuDirty, //!< The GPU buffer has been modified but the CPU mappings have not been updated
} dirtyState{DirtyState::CpuDirty}; //!< The state of the CPU mappings with respect to the GPU buffer
std::vector<std::weak_ptr<BufferView>> views; //!< BufferView(s) that are backed by this Buffer, used for repointing to a new Buffer on deletion
/**
* @brief Storage for all metadata about a specific view into the buffer, used to prevent redundant view creation and duplication of VkBufferView(s)
*/
struct BufferViewStorage {
public:
vk::DeviceSize offset;
vk::DeviceSize range;
vk::Format format;
BufferViewStorage(vk::DeviceSize offset, vk::DeviceSize range, vk::Format format);
};
std::list<BufferViewStorage> views; //!< BufferViewStorage(s) that are backed by this Buffer, used for storage and repointing to a new Buffer on deletion
friend BufferView;
friend BufferManager;
@ -138,8 +149,9 @@ namespace skyline::gpu {
/**
* @return A cached or newly created view into this buffer with the supplied attributes
* @note The buffer **must** be locked prior to calling this
*/
std::shared_ptr<BufferView> GetView(vk::DeviceSize offset, vk::DeviceSize range, vk::Format format = {});
BufferView GetView(vk::DeviceSize offset, vk::DeviceSize range, vk::Format format = {});
};
/**
@ -147,16 +159,25 @@ namespace skyline::gpu {
* @note The object **must** be locked prior to accessing any members as values will be mutated
* @note This class conforms to the Lockable and BasicLockable C++ named requirements
*/
struct BufferView : public FenceCycleDependency, public std::enable_shared_from_this<BufferView> {
struct BufferView {
std::shared_ptr<Buffer> buffer;
vk::DeviceSize offset;
vk::DeviceSize range;
vk::Format format;
Buffer::BufferViewStorage *view;
/**
* @note A view must **NOT** be constructed directly, it should always be retrieved using Buffer::GetView
*/
BufferView(std::shared_ptr<Buffer> backing, vk::DeviceSize offset, vk::DeviceSize range, vk::Format format);
BufferView(std::shared_ptr<Buffer> buffer, Buffer::BufferViewStorage *view);
constexpr BufferView(nullptr_t = nullptr) : buffer(nullptr), view(nullptr) {}
constexpr operator bool() const {
return view != nullptr;
}
constexpr Buffer::BufferViewStorage *operator->() {
return view;
}
operator std::shared_ptr<FenceCycleDependency>() {
return buffer;
}
/**
* @brief Acquires an exclusive lock on the buffer for the calling thread

View File

@ -8,7 +8,7 @@
namespace skyline::gpu {
BufferManager::BufferManager(GPU &gpu) : gpu(gpu) {}
std::shared_ptr<BufferView> BufferManager::FindOrCreate(const GuestBuffer &guest) {
BufferView BufferManager::FindOrCreate(const GuestBuffer &guest) {
auto guestMapping{guest.mappings.front()};
/*

View File

@ -37,6 +37,6 @@ namespace skyline::gpu {
/**
* @return A pre-existing or newly created Buffer object which covers the supplied mappings
*/
std::shared_ptr<BufferView> FindOrCreate(const GuestBuffer &guest);
BufferView FindOrCreate(const GuestBuffer &guest);
};
}

View File

@ -35,12 +35,12 @@ namespace skyline::gpu::interconnect {
cycle->AttachObject(view->shared_from_this());
}
void CommandExecutor::AttachBuffer(BufferView *view) {
auto buffer{view->buffer.get()};
void CommandExecutor::AttachBuffer(BufferView view) {
auto buffer{view.buffer.get()};
if (!syncBuffers.contains(buffer)) {
buffer->WaitOnFence();
buffer->cycle = cycle;
cycle->AttachObject(view->shared_from_this());
cycle->AttachObject(view);
syncBuffers.emplace(buffer);
}
}

View File

@ -44,7 +44,7 @@ namespace skyline::gpu::interconnect {
* @note The supplied buffer **must** be locked by the calling thread
* @note This'll automatically handle syncing of the buffer in the most optimal way possible
*/
void AttachBuffer(BufferView *view);
void AttachBuffer(BufferView view);
/**
* @brief Attach the lifetime of the fence cycle dependency to the command buffer

View File

@ -572,7 +572,7 @@ namespace skyline::gpu::interconnect {
IOVA iova;
u32 size;
GuestBuffer guest;
std::shared_ptr<BufferView> view;
BufferView view;
/**
* @brief Reads an object from the supplied offset in the constant buffer
@ -603,8 +603,8 @@ namespace skyline::gpu::interconnect {
*/
template<typename T>
void Write(T &object, size_t offset) {
std::lock_guard lock{*view};
view->buffer->Write(span<T>(object).template cast<u8>(), view->offset + offset);
std::scoped_lock lock{view};
view.buffer->Write(span<T>(object).template cast<u8>(), view->offset + offset);
}
};
ConstantBuffer constantBufferSelector; //!< The constant buffer selector is used to bind a constant buffer to a stage or update data in it
@ -612,17 +612,17 @@ namespace skyline::gpu::interconnect {
public:
void SetConstantBufferSelectorSize(u32 size) {
constantBufferSelector.size = size;
constantBufferSelector.view.reset();
constantBufferSelector.view = {};
}
void SetConstantBufferSelectorIovaHigh(u32 high) {
constantBufferSelector.iova.high = high;
constantBufferSelector.view.reset();
constantBufferSelector.view = {};
}
void SetConstantBufferSelectorIovaLow(u32 low) {
constantBufferSelector.iova.low = low;
constantBufferSelector.view.reset();
constantBufferSelector.view = {};
}
std::optional<ConstantBuffer> GetConstantBufferSelector() {
@ -915,13 +915,13 @@ namespace skyline::gpu::interconnect {
});
auto view{pipelineStage.constantBuffers[constantBuffer.index].view};
std::scoped_lock lock(*view);
std::scoped_lock lock(view);
bufferInfo.push_back(vk::DescriptorBufferInfo{
.buffer = view->buffer->GetBacking(),
.buffer = view.buffer->GetBacking(),
.offset = view->offset,
.range = view->range,
});
executor.AttachBuffer(view.get());
executor.AttachBuffer(view);
}
}
@ -1423,7 +1423,7 @@ namespace skyline::gpu::interconnect {
vk::VertexInputBindingDescription bindingDescription{};
vk::VertexInputBindingDivisorDescriptionEXT bindingDivisorDescription{};
IOVA start{}, end{}; //!< IOVAs covering a contiguous region in GPU AS with the vertex buffer
std::shared_ptr<BufferView> view;
BufferView view;
};
std::array<VertexBuffer, maxwell3d::VertexBufferCount> vertexBuffers{};
@ -1445,25 +1445,25 @@ namespace skyline::gpu::interconnect {
void SetVertexBufferStartIovaHigh(u32 index, u32 high) {
auto &vertexBuffer{vertexBuffers[index]};
vertexBuffer.start.high = high;
vertexBuffer.view.reset();
vertexBuffer.view = {};
}
void SetVertexBufferStartIovaLow(u32 index, u32 low) {
auto &vertexBuffer{vertexBuffers[index]};
vertexBuffer.start.low = low;
vertexBuffer.view.reset();
vertexBuffer.view = {};
}
void SetVertexBufferEndIovaHigh(u32 index, u32 high) {
auto &vertexBuffer{vertexBuffers[index]};
vertexBuffer.end.high = high;
vertexBuffer.view.reset();
vertexBuffer.view = {};
}
void SetVertexBufferEndIovaLow(u32 index, u32 low) {
auto &vertexBuffer{vertexBuffers[index]};
vertexBuffer.end.low = low;
vertexBuffer.view.reset();
vertexBuffer.view = {};
}
void SetVertexBufferDivisor(u32 index, u32 divisor) {
@ -1578,19 +1578,19 @@ namespace skyline::gpu::interconnect {
}
}
BufferView *GetVertexBuffer(size_t index) {
BufferView GetVertexBuffer(size_t index) {
auto &vertexBuffer{vertexBuffers.at(index)};
if (vertexBuffer.start > vertexBuffer.end || vertexBuffer.start == 0 || vertexBuffer.end == 0)
return nullptr;
else if (vertexBuffer.view)
return vertexBuffer.view.get();
return vertexBuffer.view;
GuestBuffer guest;
auto mappings{channelCtx.asCtx->gmmu.TranslateRange(vertexBuffer.start, (vertexBuffer.end + 1) - vertexBuffer.start)};
guest.mappings.assign(mappings.begin(), mappings.end());
vertexBuffer.view = gpu.buffer.FindOrCreate(guest);
return vertexBuffer.view.get();
return vertexBuffer.view;
}
/* Input Assembly */
@ -1638,7 +1638,7 @@ namespace skyline::gpu::interconnect {
IOVA start{}, end{}; //!< IOVAs covering a contiguous region in GPU AS containing the index buffer (end does not represent the true extent of the index buffers, just a maximum possible extent and is set to extremely high values which cannot be used to create a buffer)
vk::IndexType type{};
vk::DeviceSize viewSize{}; //!< The size of the cached view
std::shared_ptr<BufferView> view{}; //!< A cached view tied to the IOVAs and size to allow for a faster lookup
BufferView view{}; //!< A cached view tied to the IOVAs and size to allow for a faster lookup
vk::DeviceSize GetIndexBufferSize(u32 elementCount) {
switch (type) {
@ -2120,22 +2120,22 @@ namespace skyline::gpu::interconnect {
public:
void SetIndexBufferStartIovaHigh(u32 high) {
indexBuffer.start.high = high;
indexBuffer.view.reset();
indexBuffer.view = {};
}
void SetIndexBufferStartIovaLow(u32 low) {
indexBuffer.start.low = low;
indexBuffer.view.reset();
indexBuffer.view = {};
}
void SetIndexBufferEndIovaHigh(u32 high) {
indexBuffer.end.high = high;
indexBuffer.view.reset();
indexBuffer.view = {};
}
void SetIndexBufferEndIovaLow(u32 low) {
indexBuffer.end.low = low;
indexBuffer.view.reset();
indexBuffer.view = {};
}
void SetIndexBufferFormat(maxwell3d::IndexBuffer::Format format) {
@ -2155,22 +2155,22 @@ namespace skyline::gpu::interconnect {
if (indexBuffer.type == vk::IndexType::eUint8EXT && !gpu.traits.supportsUint8Indices)
throw exception("Cannot use U8 index buffer without host GPU support");
indexBuffer.view.reset();
indexBuffer.view = {};
}
BufferView *GetIndexBuffer(u32 elementCount) {
BufferView GetIndexBuffer(u32 elementCount) {
auto size{indexBuffer.GetIndexBufferSize(elementCount)};
if (indexBuffer.start > indexBuffer.end || indexBuffer.start == 0 || indexBuffer.end == 0 || size == 0)
return nullptr;
else if (indexBuffer.view && size == indexBuffer.viewSize)
return indexBuffer.view.get();
return indexBuffer.view;
GuestBuffer guestBuffer;
auto mappings{channelCtx.asCtx->gmmu.TranslateRange(indexBuffer.start, size)};
guestBuffer.mappings.assign(mappings.begin(), mappings.end());
indexBuffer.view = gpu.buffer.FindOrCreate(guestBuffer);
return indexBuffer.view.get();
return indexBuffer.view;
}
/* Depth */
@ -2391,10 +2391,10 @@ namespace skyline::gpu::interconnect {
vk::IndexType indexBufferType;
if constexpr (IsIndexed) {
auto indexBufferView{GetIndexBuffer(count)};
std::scoped_lock lock(*indexBufferView);
std::scoped_lock lock(indexBufferView);
executor.AttachBuffer(indexBufferView);
indexBufferHandle = indexBufferView->buffer->GetBacking();
indexBufferHandle = indexBufferView.buffer->GetBacking();
indexBufferOffset = indexBufferView->offset;
indexBufferType = indexBuffer.type;
}
@ -2414,8 +2414,8 @@ namespace skyline::gpu::interconnect {
if (vertexBuffer.bindingDescription.inputRate == vk::VertexInputRate::eInstance)
vertexBindingDivisorsDescriptions.push_back(vertexBuffer.bindingDivisorDescription);
std::scoped_lock vertexBufferLock(*vertexBufferView);
vertexBufferHandles[index] = vertexBufferView->buffer->GetBacking();
std::scoped_lock vertexBufferLock(vertexBufferView);
vertexBufferHandles[index] = vertexBufferView.buffer->GetBacking();
vertexBufferOffsets[index] = vertexBufferView->offset;
executor.AttachBuffer(vertexBufferView);
}