Rework BufferManager, Buffer and BufferView

This commit encapsulates a complex sequence of cascading changes in the process of supporting overlaps for buffers:
* We determined that it is impossible to resolve overlaps with multiple intervals per buffer within the constraints of each overlap being a contiguous view, support for multiple intervals was therefore dropped. The older buffer manager code was entirely reworked to be simpler due to only handling one interval per buffer with code now being based off `IntervalMap` but tailored specifically for buffers.
* During overlap resolution, the problem of how existing views into the buffer being recreated would be updated, it had to be replaced with a larger buffer that could contain all overlaps and all existing views would need to be repointed to it. This was addressed by a buffer owning all views to itself, we could automatically recalculate the offset of all views and update the buffers with it.
* We still needed to update usage of existing views which was done by handling all access (such as inside a recorded draw) to buffer view properties via `BufferView::RegisterUsage` which dispatches a callback with the view and the corresponding backing buffer. This callback can be stored and called during overlap resolution with the new buffer.
* We had issues with lifetime of the buffer with the handle-like semantics of `BufferView` introduced in the last buffer-related commit, if we updated the view to be owned by a new buffer we'd need to extend the lifetime of the new buffer not the older one and the only way to do this was a proxy owner object `BufferDelegate` which holds a shared pointer to the real `Buffer` which in-turn holds a pointer to all `BufferDelegate` objects to update on repointing. A `BufferView` is effectively just a wrapper around `std::shared_ptr<BufferDelegate>` with more favorable semantics but generally just forwarding calls.
It should be additionally noted that to support usage of `RegisterUsage` the code around buffers in `GraphicsContext` was refactored to defer truly binding till the recording phase.
This commit is contained in:
PixelyIon 2022-03-28 12:27:05 +05:30
parent a6781b38f4
commit cb1ec9a7f4
7 changed files with 363 additions and 304 deletions

View File

@ -8,45 +8,14 @@
#include "buffer.h" #include "buffer.h"
namespace skyline::gpu { namespace skyline::gpu {
vk::DeviceSize GuestBuffer::BufferSize() const {
vk::DeviceSize size{};
for (const auto &buffer : mappings)
size += buffer.size_bytes();
return size;
}
void Buffer::SetupGuestMappings() { void Buffer::SetupGuestMappings() {
auto &mappings{guest.mappings}; u8 *alignedData{util::AlignDown(guest.data(), PAGE_SIZE)};
if (mappings.size() == 1) { size_t alignedSize{static_cast<size_t>(util::AlignUp(guest.data() + guest.size(), PAGE_SIZE) - alignedData)};
auto mapping{mappings.front()};
u8 *alignedData{util::AlignDown(mapping.data(), PAGE_SIZE)};
size_t alignedSize{static_cast<size_t>(util::AlignUp(mapping.data() + mapping.size(), PAGE_SIZE) - alignedData)};
alignedMirror = gpu.state.process->memory.CreateMirror(alignedData, alignedSize); alignedMirror = gpu.state.process->memory.CreateMirror(alignedData, alignedSize);
mirror = alignedMirror.subspan(static_cast<size_t>(mapping.data() - alignedData), mapping.size()); mirror = alignedMirror.subspan(static_cast<size_t>(guest.data() - alignedData), guest.size());
} else {
std::vector<span<u8>> alignedMappings;
const auto &frontMapping{mappings.front()}; trapHandle = gpu.state.nce->TrapRegions(guest, true, [this] {
u8 *alignedData{util::AlignDown(frontMapping.data(), PAGE_SIZE)};
alignedMappings.emplace_back(alignedData, (frontMapping.data() + frontMapping.size()) - alignedData);
size_t totalSize{frontMapping.size()};
for (auto it{std::next(mappings.begin())}; it != std::prev(mappings.end()); ++it) {
auto mappingSize{it->size()};
alignedMappings.emplace_back(it->data(), mappingSize);
totalSize += mappingSize;
}
const auto &backMapping{mappings.back()};
totalSize += backMapping.size();
alignedMappings.emplace_back(backMapping.data(), util::AlignUp(backMapping.size(), PAGE_SIZE));
alignedMirror = gpu.state.process->memory.CreateMirrors(alignedMappings);
mirror = alignedMirror.subspan(static_cast<size_t>(frontMapping.data() - alignedData), totalSize);
}
trapHandle = gpu.state.nce->TrapRegions(mappings, true, [this] {
std::lock_guard lock(*this); std::lock_guard lock(*this);
SynchronizeGuest(true); // We can skip trapping since the caller will do it SynchronizeGuest(true); // We can skip trapping since the caller will do it
WaitOnFence(); WaitOnFence();
@ -58,7 +27,7 @@ namespace skyline::gpu {
}); });
} }
Buffer::Buffer(GPU &gpu, GuestBuffer guest) : gpu(gpu), size(guest.BufferSize()), backing(gpu.memory.AllocateBuffer(size)), guest(std::move(guest)) { Buffer::Buffer(GPU &gpu, GuestBuffer guest) : gpu(gpu), backing(gpu.memory.AllocateBuffer(guest.size())), guest(guest) {
SetupGuestMappings(); SetupGuestMappings();
} }
@ -111,7 +80,7 @@ namespace skyline::gpu {
if (dirtyState != DirtyState::CpuDirty) if (dirtyState != DirtyState::CpuDirty)
return; return;
if (pCycle != cycle.lock()) if (!cycle.owner_before(pCycle))
WaitOnFence(); WaitOnFence();
TRACE_EVENT("gpu", "Buffer::SynchronizeHostWithCycle"); TRACE_EVENT("gpu", "Buffer::SynchronizeHostWithCycle");
@ -127,11 +96,12 @@ namespace skyline::gpu {
} }
} }
void Buffer::SynchronizeGuest(bool skipTrap) { void Buffer::SynchronizeGuest(bool skipTrap, bool skipFence) {
if (dirtyState != DirtyState::GpuDirty) if (dirtyState != DirtyState::GpuDirty)
return; // If the buffer has not been used on the GPU, there is no need to synchronize it return; // If the buffer has not been used on the GPU, there is no need to synchronize it
WaitOnFence(); if (!skipFence)
WaitOnFence();
TRACE_EVENT("gpu", "Buffer::SynchronizeGuest"); TRACE_EVENT("gpu", "Buffer::SynchronizeGuest");
@ -157,13 +127,20 @@ namespace skyline::gpu {
}; };
void Buffer::SynchronizeGuestWithCycle(const std::shared_ptr<FenceCycle> &pCycle) { void Buffer::SynchronizeGuestWithCycle(const std::shared_ptr<FenceCycle> &pCycle) {
if (pCycle != cycle.lock()) if (!cycle.owner_before(pCycle))
WaitOnFence(); WaitOnFence();
pCycle->AttachObject(std::make_shared<BufferGuestSync>(shared_from_this())); pCycle->AttachObject(std::make_shared<BufferGuestSync>(shared_from_this()));
cycle = pCycle; cycle = pCycle;
} }
void Buffer::Read(span<u8> data, vk::DeviceSize offset) {
if (dirtyState == DirtyState::CpuDirty || dirtyState == DirtyState::Clean)
std::memcpy(data.data(), mirror.data() + offset, data.size());
else if (dirtyState == DirtyState::GpuDirty)
std::memcpy(data.data(), backing.data() + offset, data.size());
}
void Buffer::Write(span<u8> data, vk::DeviceSize offset) { void Buffer::Write(span<u8> data, vk::DeviceSize offset) {
if (dirtyState == DirtyState::CpuDirty || dirtyState == DirtyState::Clean) if (dirtyState == DirtyState::CpuDirty || dirtyState == DirtyState::Clean)
std::memcpy(mirror.data() + offset, data.data(), data.size()); std::memcpy(mirror.data() + offset, data.data(), data.size());
@ -171,51 +148,89 @@ namespace skyline::gpu {
std::memcpy(backing.data() + offset, data.data(), data.size()); std::memcpy(backing.data() + offset, data.data(), data.size());
} }
Buffer::BufferViewStorage::BufferViewStorage(vk::DeviceSize offset, vk::DeviceSize range, vk::Format format) : offset(offset), range(range), format(format) {} Buffer::BufferViewStorage::BufferViewStorage(vk::DeviceSize offset, vk::DeviceSize size, vk::Format format) : offset(offset), size(size), format(format) {}
BufferView Buffer::GetView(vk::DeviceSize offset, vk::DeviceSize range, vk::Format format) { Buffer::BufferDelegate::BufferDelegate(std::shared_ptr<Buffer> pBuffer, Buffer::BufferViewStorage *view) : buffer(std::move(pBuffer)), view(view) {
for (auto &view : views) iterator = buffer->delegates.emplace(buffer->delegates.end(), this);
if (view.offset == offset && view.range == range && view.format == format)
return BufferView{shared_from_this(), &view};
views.emplace_back(offset, range, format);
return BufferView{shared_from_this(), &views.back()};
} }
BufferView::BufferView(std::shared_ptr<Buffer> buffer, Buffer::BufferViewStorage *view) : buffer(buffer), view(view) {} Buffer::BufferDelegate::~BufferDelegate() {
std::scoped_lock lock(*this);
buffer->delegates.erase(iterator);
}
void BufferView::lock() { void Buffer::BufferDelegate::lock() {
auto backing{std::atomic_load(&buffer)}; auto lBuffer{std::atomic_load(&buffer)};
while (true) { while (true) {
backing->lock(); lBuffer->lock();
auto latestBacking{std::atomic_load(&buffer)}; auto latestBacking{std::atomic_load(&buffer)};
if (backing == latestBacking) if (lBuffer == latestBacking)
return; return;
backing->unlock(); lBuffer->unlock();
backing = latestBacking; lBuffer = latestBacking;
} }
} }
void BufferView::unlock() { void Buffer::BufferDelegate::unlock() {
buffer->unlock(); buffer->unlock();
} }
bool BufferView::try_lock() { bool Buffer::BufferDelegate::try_lock() {
auto backing{std::atomic_load(&buffer)}; auto lBuffer{std::atomic_load(&buffer)};
while (true) { while (true) {
bool success{backing->try_lock()}; bool success{lBuffer->try_lock()};
auto latestBacking{std::atomic_load(&buffer)}; auto latestBuffer{std::atomic_load(&buffer)};
if (backing == latestBacking) if (lBuffer == latestBuffer)
// We want to ensure that the try_lock() was on the latest backing and not on an outdated one // We want to ensure that the try_lock() was on the latest backing and not on an outdated one
return success; return success;
if (success) if (success)
// We only unlock() if the try_lock() was successful and we acquired the mutex // We only unlock() if the try_lock() was successful and we acquired the mutex
backing->unlock(); lBuffer->unlock();
backing = latestBacking; lBuffer = latestBuffer;
} }
} }
BufferView Buffer::GetView(vk::DeviceSize offset, vk::DeviceSize size, vk::Format format) {
for (auto &view : views)
if (view.offset == offset && view.size == size && view.format == format)
return BufferView{shared_from_this(), &view};
views.emplace_back(offset, size, format);
return BufferView{shared_from_this(), &views.back()};
}
BufferView::BufferView(std::shared_ptr<Buffer> buffer, Buffer::BufferViewStorage *view) : bufferDelegate(std::make_shared<Buffer::BufferDelegate>(std::move(buffer), view)) {}
void BufferView::AttachCycle(const std::shared_ptr<FenceCycle> &cycle) {
auto buffer{bufferDelegate->buffer.get()};
if (!buffer->cycle.owner_before(cycle)) {
buffer->WaitOnFence();
buffer->cycle = cycle;
cycle->AttachObject(bufferDelegate);
}
}
void BufferView::RegisterUsage(const std::function<void(const Buffer::BufferViewStorage &, const std::shared_ptr<Buffer> &)> &usageCallback) {
usageCallback(*bufferDelegate->view, bufferDelegate->buffer);
if (!bufferDelegate->usageCallback) {
bufferDelegate->usageCallback = usageCallback;
} else {
bufferDelegate->usageCallback = [usageCallback, oldCallback = std::move(bufferDelegate->usageCallback)](const Buffer::BufferViewStorage &pView, const std::shared_ptr<Buffer> &buffer) {
oldCallback(pView, buffer);
usageCallback(pView, buffer);
};
}
}
void BufferView::Read(span<u8> data, vk::DeviceSize offset) const {
bufferDelegate->buffer->Read(data, offset + bufferDelegate->view->offset);
}
void BufferView::Write(span<u8> data, vk::DeviceSize offset) const {
bufferDelegate->buffer->Write(data, offset + bufferDelegate->view->offset);
}
} }

View File

@ -7,18 +7,7 @@
#include "memory_manager.h" #include "memory_manager.h"
namespace skyline::gpu { namespace skyline::gpu {
/** using GuestBuffer = span<u8>; //!< The CPU mapping for the guest buffer, multiple mappings for buffers aren't supported since overlaps cannot be reconciled
* @brief A descriptor for a GPU buffer on the guest
*/
struct GuestBuffer {
using Mappings = boost::container::small_vector<span < u8>, 3>;
Mappings mappings; //!< Spans to CPU memory for the underlying data backing this buffer
/**
* @return The total size of the buffer by adding up the size of all mappings
*/
vk::DeviceSize BufferSize() const;
};
struct BufferView; struct BufferView;
class BufferManager; class BufferManager;
@ -31,7 +20,6 @@ namespace skyline::gpu {
private: private:
GPU &gpu; GPU &gpu;
std::mutex mutex; //!< Synchronizes any mutations to the buffer or its backing std::mutex mutex; //!< Synchronizes any mutations to the buffer or its backing
vk::DeviceSize size;
memory::Buffer backing; memory::Buffer backing;
GuestBuffer guest; GuestBuffer guest;
@ -44,19 +32,46 @@ namespace skyline::gpu {
GpuDirty, //!< The GPU buffer has been modified but the CPU mappings have not been updated GpuDirty, //!< The GPU buffer has been modified but the CPU mappings have not been updated
} dirtyState{DirtyState::CpuDirty}; //!< The state of the CPU mappings with respect to the GPU buffer } dirtyState{DirtyState::CpuDirty}; //!< The state of the CPU mappings with respect to the GPU buffer
public:
/** /**
* @brief Storage for all metadata about a specific view into the buffer, used to prevent redundant view creation and duplication of VkBufferView(s) * @brief Storage for all metadata about a specific view into the buffer, used to prevent redundant view creation and duplication of VkBufferView(s)
*/ */
struct BufferViewStorage { struct BufferViewStorage {
public:
vk::DeviceSize offset; vk::DeviceSize offset;
vk::DeviceSize range; vk::DeviceSize size;
vk::Format format; vk::Format format;
BufferViewStorage(vk::DeviceSize offset, vk::DeviceSize range, vk::Format format); BufferViewStorage(vk::DeviceSize offset, vk::DeviceSize size, vk::Format format);
}; };
private:
std::list<BufferViewStorage> views; //!< BufferViewStorage(s) that are backed by this Buffer, used for storage and repointing to a new Buffer on deletion std::list<BufferViewStorage> views; //!< BufferViewStorage(s) that are backed by this Buffer, used for storage and repointing to a new Buffer on deletion
public:
/**
* @brief A delegate for a strong reference to a Buffer by a BufferView which can be changed to another Buffer transparently
* @note This class conforms to the Lockable and BasicLockable C++ named requirements
*/
struct BufferDelegate : public FenceCycleDependency {
std::shared_ptr<Buffer> buffer;
Buffer::BufferViewStorage *view;
std::function<void(const BufferViewStorage &, const std::shared_ptr<Buffer> &)> usageCallback;
std::list<BufferDelegate *>::iterator iterator;
BufferDelegate(std::shared_ptr<Buffer> buffer, Buffer::BufferViewStorage *view);
~BufferDelegate();
void lock();
void unlock();
bool try_lock();
};
private:
std::list<BufferDelegate *> delegates; //!< The reference delegates for this buffer, used to prevent the buffer from being deleted while it is still in use
friend BufferView; friend BufferView;
friend BufferManager; friend BufferManager;
@ -131,9 +146,10 @@ namespace skyline::gpu {
/** /**
* @brief Synchronizes the guest buffer with the host buffer * @brief Synchronizes the guest buffer with the host buffer
* @param skipTrap If true, setting up a CPU trap will be skipped and the dirty state will be Clean/CpuDirty * @param skipTrap If true, setting up a CPU trap will be skipped and the dirty state will be Clean/CpuDirty
* @param skipFence If true, waiting on the currently attached fence will be skipped
* @note The buffer **must** be locked prior to calling this * @note The buffer **must** be locked prior to calling this
*/ */
void SynchronizeGuest(bool skipTrap = false); void SynchronizeGuest(bool skipTrap = false, bool skipFence = false);
/** /**
* @brief Synchronizes the guest buffer with the host buffer when the FenceCycle is signalled * @brief Synchronizes the guest buffer with the host buffer when the FenceCycle is signalled
@ -142,6 +158,11 @@ namespace skyline::gpu {
*/ */
void SynchronizeGuestWithCycle(const std::shared_ptr<FenceCycle> &cycle); void SynchronizeGuestWithCycle(const std::shared_ptr<FenceCycle> &cycle);
/**
* @brief Reads data at the specified offset in the buffer
*/
void Read(span<u8> data, vk::DeviceSize offset);
/** /**
* @brief Writes data at the specified offset in the buffer * @brief Writes data at the specified offset in the buffer
*/ */
@ -151,7 +172,7 @@ namespace skyline::gpu {
* @return A cached or newly created view into this buffer with the supplied attributes * @return A cached or newly created view into this buffer with the supplied attributes
* @note The buffer **must** be locked prior to calling this * @note The buffer **must** be locked prior to calling this
*/ */
BufferView GetView(vk::DeviceSize offset, vk::DeviceSize range, vk::Format format = {}); BufferView GetView(vk::DeviceSize offset, vk::DeviceSize size, vk::Format format = {});
}; };
/** /**
@ -160,41 +181,70 @@ namespace skyline::gpu {
* @note This class conforms to the Lockable and BasicLockable C++ named requirements * @note This class conforms to the Lockable and BasicLockable C++ named requirements
*/ */
struct BufferView { struct BufferView {
std::shared_ptr<Buffer> buffer; std::shared_ptr<Buffer::BufferDelegate> bufferDelegate;
Buffer::BufferViewStorage *view;
BufferView(std::shared_ptr<Buffer> buffer, Buffer::BufferViewStorage *view); BufferView(std::shared_ptr<Buffer> buffer, Buffer::BufferViewStorage *view);
constexpr BufferView(nullptr_t = nullptr) : buffer(nullptr), view(nullptr) {} constexpr BufferView(nullptr_t = nullptr) : bufferDelegate(nullptr) {}
constexpr operator bool() const {
return view != nullptr;
}
constexpr Buffer::BufferViewStorage *operator->() {
return view;
}
operator std::shared_ptr<FenceCycleDependency>() {
return buffer;
}
/** /**
* @brief Acquires an exclusive lock on the buffer for the calling thread * @brief Acquires an exclusive lock on the buffer for the calling thread
* @note Naming is in accordance to the BasicLockable named requirement * @note Naming is in accordance to the BasicLockable named requirement
*/ */
void lock(); void lock() const {
bufferDelegate->lock();
}
/** /**
* @brief Relinquishes an existing lock on the buffer by the calling thread * @brief Relinquishes an existing lock on the buffer by the calling thread
* @note Naming is in accordance to the BasicLockable named requirement * @note Naming is in accordance to the BasicLockable named requirement
*/ */
void unlock(); void unlock() const {
bufferDelegate->unlock();
}
/** /**
* @brief Attempts to acquire an exclusive lock but returns immediately if it's captured by another thread * @brief Attempts to acquire an exclusive lock but returns immediately if it's captured by another thread
* @note Naming is in accordance to the Lockable named requirement * @note Naming is in accordance to the Lockable named requirement
*/ */
bool try_lock(); bool try_lock() const {
return bufferDelegate->try_lock();
}
constexpr operator bool() const {
return bufferDelegate != nullptr;
}
/**
* @note The buffer **must** be locked prior to calling this
*/
Buffer::BufferDelegate *operator->() const {
return bufferDelegate.get();
}
/**
* @brief Attaches a fence cycle to the underlying buffer in a way that it will be synchronized with the latest backing buffer
* @note The view **must** be locked prior to calling this
*/
void AttachCycle(const std::shared_ptr<FenceCycle> &cycle);
/**
* @brief Registers a callback for a usage of this view, it may be called multiple times due to the view being recreated with different backings
* @note The callback will be automatically called the first time after registration
* @note The view **must** be locked prior to calling this
*/
void RegisterUsage(const std::function<void(const Buffer::BufferViewStorage &, const std::shared_ptr<Buffer> &)> &usageCallback);
/**
* @brief Reads data at the specified offset in the view
* @note The view **must** be locked prior to calling this
*/
void Read(span<u8> data, vk::DeviceSize offset) const;
/**
* @brief Writes data at the specified offset in the view
* @note The view **must** be locked prior to calling this
*/
void Write(span<u8> data, vk::DeviceSize offset) const;
}; };
} }

View File

@ -8,101 +8,68 @@
namespace skyline::gpu { namespace skyline::gpu {
BufferManager::BufferManager(GPU &gpu) : gpu(gpu) {} BufferManager::BufferManager(GPU &gpu) : gpu(gpu) {}
BufferView BufferManager::FindOrCreate(const GuestBuffer &guest) { bool BufferManager::BufferLessThan(const std::shared_ptr<Buffer> &it, u8 *pointer) {
auto guestMapping{guest.mappings.front()}; return it->guest.begin().base() < pointer;
}
/*
* Iterate over all buffers that overlap with the first mapping of the guest buffer and compare the mappings:
* 1) All mappings match up perfectly, we check that the rest of the supplied mappings correspond to mappings in the buffer
* 1.1) If they match as well, we return a view encompassing the entire buffer
* 2) Only a contiguous range of mappings match, we check for the overlap bounds, it can go two ways:
* 2.1) If the supplied buffer is smaller than the matching buffer, we return a view encompassing the mappings into the buffer
* 2.2) If the matching buffer is smaller than the supplied buffer, we make the matching buffer larger and return it
* 3) If there's another overlap we go back to (1) with it else we go to (4)
* 4) Create a new buffer and insert it in the map then return it
*/
BufferView BufferManager::FindOrCreate(GuestBuffer guestMapping, const std::shared_ptr<FenceCycle> &cycle) {
std::scoped_lock lock(mutex); std::scoped_lock lock(mutex);
std::shared_ptr<Buffer> match{};
auto mappingEnd{std::upper_bound(buffers.begin(), buffers.end(), guestMapping)}, hostMapping{mappingEnd};
if (hostMapping != buffers.begin() && (--hostMapping)->end() > guestMapping.begin()) {
auto &hostMappings{hostMapping->buffer->guest.mappings};
if (hostMapping->contains(guestMapping)) {
// We need to check that all corresponding mappings in the candidate buffer and the guest buffer match up
// Only the start of the first matched mapping and the end of the last mapping can not match up as this is the case for views
auto firstHostMapping{hostMapping->iterator};
auto lastGuestMapping{guest.mappings.back()};
auto endHostMapping{std::find_if(firstHostMapping, hostMappings.end(), [&lastGuestMapping](const span<u8> &it) {
return lastGuestMapping.begin() > it.begin() && lastGuestMapping.end() > it.end();
})}; //!< A past-the-end iterator for the last host mapping, the final valid mapping is prior to this iterator
bool mappingMatch{std::equal(firstHostMapping, endHostMapping, guest.mappings.begin(), guest.mappings.end(), [](const span<u8> &lhs, const span<u8> &rhs) {
return lhs.end() == rhs.end(); // We check end() here to implicitly ignore any offset from the first mapping
})};
auto &lastHostMapping{*std::prev(endHostMapping)}; // Lookup for any buffers overlapping with the supplied guest mapping
if (firstHostMapping == hostMappings.begin() && firstHostMapping->begin() == guestMapping.begin() && mappingMatch && endHostMapping == hostMappings.end() && lastGuestMapping.end() == lastHostMapping.end()) { boost::container::small_vector<std::shared_ptr<Buffer>, 4> overlaps;
// We've gotten a perfect 1:1 match for *all* mappings from the start to end for (auto entryIt{std::lower_bound(buffers.begin(), buffers.end(), guestMapping.end().base(), BufferLessThan)}; entryIt != buffers.begin() && (*--entryIt)->guest.begin() <= guestMapping.end();)
std::scoped_lock bufferLock(*hostMapping->buffer); if ((*entryIt)->guest.end() > guestMapping.begin())
return hostMapping->buffer->GetView(0, hostMapping->buffer->size); overlaps.push_back(*entryIt);
} else if (mappingMatch && firstHostMapping->begin() > guestMapping.begin() && lastHostMapping.end() > lastGuestMapping.end()) {
// We've gotten a guest buffer that is located entirely within a host buffer if (overlaps.size() == 1) [[likely]] {
std::scoped_lock bufferLock(*hostMapping->buffer); auto buffer{overlaps.front()};
return hostMapping->buffer->GetView(hostMapping->offset + static_cast<vk::DeviceSize>(hostMapping->begin() - guestMapping.begin()), guest.BufferSize()); if (buffer->guest.begin() <= guestMapping.begin() && buffer->guest.end() >= guestMapping.end()) {
} // If we find a buffer which can entirely fit the guest mapping, we can just return a view into it
std::scoped_lock bufferLock{*buffer};
return buffer->GetView(static_cast<vk::DeviceSize>(guestMapping.begin() - buffer->guest.begin()), guestMapping.size());
} }
} }
/* TODO: Handle overlapping buffers // Find the extents of the new buffer we want to create that can hold all overlapping buffers
// Create a list of all overlapping buffers and update the guest mappings to fit them all auto lowestAddress{guestMapping.begin().base()}, highestAddress{guestMapping.end().base()};
boost::container::small_vector<std::pair<std::shared_ptr<Buffer>, u32>, 4> overlappingBuffers; for (const auto &overlap : overlaps) {
GuestBuffer::Mappings newMappings; auto mapping{overlap->guest};
if (mapping.begin().base() < lowestAddress)
auto guestMappingIt{guest.mappings.begin()}; lowestAddress = mapping.begin().base();
while (true) { if (mapping.end().base() > highestAddress)
do { highestAddress = mapping.end().base();
hostMapping->begin();
overlappingBuffers.emplace_back(hostMapping->buffer, 4);
} while (hostMapping != buffers.begin() && (--hostMapping)->end() > guestMappingIt->begin());
// Iterate over all guest mappings to find overlapping buffers, not just the first
auto nextGuestMappingIt{std::next(guestMappingIt)};
if (nextGuestMappingIt != guest.mappings.end())
hostMapping = std::upper_bound(buffers.begin(), buffers.end(), *nextGuestMappingIt);
else
break;
guestMappingIt = nextGuestMappingIt;
} }
// Create a buffer that can contain all the overlapping buffers auto newBuffer{std::make_shared<Buffer>(gpu, span<u8>(lowestAddress, highestAddress))};
auto buffer{std::make_shared<Buffer>(gpu, guest)}; for (auto &overlap : overlaps) {
std::scoped_lock overlapLock{*overlap};
// Delete mappings from all overlapping buffers and repoint all buffer views if (!overlap->cycle.owner_before(cycle))
for (auto &overlappingBuffer : overlappingBuffers) { overlap->WaitOnFence(); // We want to only wait on the fence cycle if it's not the current fence cycle
std::scoped_lock overlappingBufferLock(*overlappingBuffer.first); overlap->SynchronizeGuest(true, true); // Sync back the buffer before we destroy it
auto &bufferMappings{hostMapping->buffer->guest.mappings};
// Delete all mappings of the overlapping buffers buffers.erase(std::find(buffers.begin(), buffers.end(), overlap));
while ((++it) != buffer->guest.mappings.end()) {
guestMapping = *it; // Transfer all views from the overlapping buffer to the new buffer with the new buffer and updated offset
auto mapping{std::upper_bound(buffers.begin(), buffers.end(), guestMapping)}; vk::DeviceSize overlapOffset{static_cast<vk::DeviceSize>(overlap->guest.begin() - newBuffer->guest.begin())};
buffers.emplace(mapping, BufferMapping{buffer, it, offset, guestMapping}); if (overlapOffset != 0)
offset += mapping->size_bytes(); for (auto &view : overlap->views)
view.offset += overlapOffset;
newBuffer->views.splice(newBuffer->views.end(), overlap->views);
// Transfer all delegates references from the overlapping buffer to the new buffer
for (auto &delegate : overlap->delegates) {
atomic_exchange(&delegate->buffer, newBuffer);
if (delegate->usageCallback)
delegate->usageCallback(*delegate->view, newBuffer);
} }
}
*/
auto buffer{std::make_shared<Buffer>(gpu, guest)}; newBuffer->delegates.splice(newBuffer->delegates.end(), overlap->delegates);
auto it{buffer->guest.mappings.begin()};
buffers.emplace(mappingEnd, BufferMapping{buffer, it, 0, guestMapping});
vk::DeviceSize offset{};
while ((++it) != buffer->guest.mappings.end()) {
guestMapping = *it;
auto mapping{std::upper_bound(buffers.begin(), buffers.end(), guestMapping)};
buffers.emplace(mapping, BufferMapping{buffer, it, offset, guestMapping});
offset += mapping->size_bytes();
} }
return buffer->GetView(0, buffer->size); buffers.insert(std::lower_bound(buffers.begin(), buffers.end(), newBuffer->guest.end().base(), BufferLessThan), newBuffer);
return newBuffer->GetView(static_cast<vk::DeviceSize>(guestMapping.begin() - newBuffer->guest.begin()), guestMapping.size());
} }
} }

View File

@ -11,25 +11,14 @@ namespace skyline::gpu {
*/ */
class BufferManager { class BufferManager {
private: private:
/**
* @brief A single contiguous mapping of a buffer in the CPU address space
*/
struct BufferMapping : span<u8> {
std::shared_ptr<Buffer> buffer;
GuestBuffer::Mappings::iterator iterator; //!< An iterator to the mapping in the buffer's GuestBufferMappings corresponding to this mapping
vk::DeviceSize offset; //!< Offset of this mapping relative to the start of the buffer
template<typename... Args>
BufferMapping(std::shared_ptr<Buffer> buffer, GuestBuffer::Mappings::iterator iterator, vk::DeviceSize offset, Args &&... args)
: span<u8>(std::forward<Args>(args)...),
buffer(std::move(buffer)),
iterator(iterator),
offset(offset) {}
};
GPU &gpu; GPU &gpu;
std::mutex mutex; //!< Synchronizes access to the buffer mappings std::mutex mutex; //!< Synchronizes access to the buffer mappings
std::vector<BufferMapping> buffers; //!< A sorted vector of all buffer mappings std::vector<std::shared_ptr<Buffer>> buffers; //!< A sorted vector of all buffer mappings
/**
* @return If the end of the supplied buffer is less than the supplied pointer
*/
static bool BufferLessThan(const std::shared_ptr<Buffer> &it, u8 *pointer);
public: public:
BufferManager(GPU &gpu); BufferManager(GPU &gpu);
@ -37,6 +26,6 @@ namespace skyline::gpu {
/** /**
* @return A pre-existing or newly created Buffer object which covers the supplied mappings * @return A pre-existing or newly created Buffer object which covers the supplied mappings
*/ */
BufferView FindOrCreate(const GuestBuffer &guest); BufferView FindOrCreate(GuestBuffer guestMapping, const std::shared_ptr<FenceCycle> &cycle = nullptr);
}; };
} }

View File

@ -35,13 +35,10 @@ namespace skyline::gpu::interconnect {
cycle->AttachObject(view->shared_from_this()); cycle->AttachObject(view->shared_from_this());
} }
void CommandExecutor::AttachBuffer(BufferView view) { void CommandExecutor::AttachBuffer(BufferView &view) {
auto buffer{view.buffer.get()}; if (!syncBuffers.contains(view.bufferDelegate)) {
if (!syncBuffers.contains(buffer)) { view.AttachCycle(cycle);
buffer->WaitOnFence(); syncBuffers.emplace(view.bufferDelegate);
buffer->cycle = cycle;
cycle->AttachObject(view);
syncBuffers.emplace(buffer);
} }
} }
@ -133,8 +130,10 @@ namespace skyline::gpu::interconnect {
for (auto texture : syncTextures) for (auto texture : syncTextures)
texture->SynchronizeHostWithBuffer(commandBuffer, cycle, true); texture->SynchronizeHostWithBuffer(commandBuffer, cycle, true);
for (auto buffer : syncBuffers) for (const auto& delegate : syncBuffers) {
buffer->SynchronizeHostWithCycle(cycle, true); delegate->buffer->SynchronizeHostWithCycle(cycle, true);
delegate->usageCallback = nullptr;
}
vk::RenderPass lRenderPass; vk::RenderPass lRenderPass;
u32 subpassIndex; u32 subpassIndex;

View File

@ -16,11 +16,12 @@ namespace skyline::gpu::interconnect {
private: private:
GPU &gpu; GPU &gpu;
CommandScheduler::ActiveCommandBuffer activeCommandBuffer; CommandScheduler::ActiveCommandBuffer activeCommandBuffer;
std::shared_ptr<FenceCycle> cycle;
boost::container::stable_vector<node::NodeVariant> nodes; boost::container::stable_vector<node::NodeVariant> nodes;
node::RenderPassNode *renderPass{}; node::RenderPassNode *renderPass{};
std::unordered_set<Texture *> syncTextures; //!< All textures that need to be synced prior to and after execution std::unordered_set<Texture *> syncTextures; //!< All textures that need to be synced prior to and after execution
std::unordered_set<Buffer *> syncBuffers; //!< All buffers that need to be synced prior to and after execution
using SharedBufferDelegate = std::shared_ptr<Buffer::BufferDelegate>;
std::unordered_set<SharedBufferDelegate> syncBuffers; //!< All buffers that need to be synced prior to and after execution
/** /**
* @return If a new render pass was created by the function or the current one was reused as it was compatible * @return If a new render pass was created by the function or the current one was reused as it was compatible
@ -28,6 +29,8 @@ namespace skyline::gpu::interconnect {
bool CreateRenderPass(vk::Rect2D renderArea); bool CreateRenderPass(vk::Rect2D renderArea);
public: public:
std::shared_ptr<FenceCycle> cycle; //!< The fence cycle that this command executor uses to wait for the GPU to finish executing commands
CommandExecutor(const DeviceState &state); CommandExecutor(const DeviceState &state);
~CommandExecutor(); ~CommandExecutor();
@ -44,7 +47,7 @@ namespace skyline::gpu::interconnect {
* @note The supplied buffer **must** be locked by the calling thread * @note The supplied buffer **must** be locked by the calling thread
* @note This'll automatically handle syncing of the buffer in the most optimal way possible * @note This'll automatically handle syncing of the buffer in the most optimal way possible
*/ */
void AttachBuffer(BufferView view); void AttachBuffer(BufferView &view);
/** /**
* @brief Attach the lifetime of the fence cycle dependency to the command buffer * @brief Attach the lifetime of the fence cycle dependency to the command buffer

View File

@ -571,7 +571,6 @@ namespace skyline::gpu::interconnect {
struct ConstantBuffer { struct ConstantBuffer {
IOVA iova; IOVA iova;
u32 size; u32 size;
GuestBuffer guest;
BufferView view; BufferView view;
/** /**
@ -581,20 +580,9 @@ namespace skyline::gpu::interconnect {
template<typename T> template<typename T>
T Read(size_t offset) const { T Read(size_t offset) const {
T object; T object;
size_t objectOffset{}; std::scoped_lock lock{view};
for (auto &mapping : guest.mappings) { view.Read(span<T>(object).template cast<u8>(), offset);
if (offset < mapping.size_bytes()) { return object;
auto copySize{std::min(mapping.size_bytes() - offset, sizeof(T))};
std::memcpy(reinterpret_cast<u8 *>(&object) + objectOffset, mapping.data() + offset, copySize);
objectOffset += copySize;
if (objectOffset == sizeof(T))
return object;
offset = mapping.size_bytes();
} else {
offset -= mapping.size_bytes();
}
}
throw exception("Object extent ({} + {} = {}) is larger than constant buffer size: {}", size + offset, sizeof(T), size + offset + sizeof(T), size);
} }
/** /**
@ -604,7 +592,7 @@ namespace skyline::gpu::interconnect {
template<typename T> template<typename T>
void Write(T &object, size_t offset) { void Write(T &object, size_t offset) {
std::scoped_lock lock{view}; std::scoped_lock lock{view};
view.buffer->Write(span<T>(object).template cast<u8>(), view->offset + offset); view.Write(span<T>(object).template cast<u8>(), offset);
} }
}; };
ConstantBuffer constantBufferSelector; //!< The constant buffer selector is used to bind a constant buffer to a stage or update data in it ConstantBuffer constantBufferSelector; //!< The constant buffer selector is used to bind a constant buffer to a stage or update data in it
@ -633,12 +621,7 @@ namespace skyline::gpu::interconnect {
auto mappings{channelCtx.asCtx->gmmu.TranslateRange(constantBufferSelector.iova, constantBufferSelector.size)}; auto mappings{channelCtx.asCtx->gmmu.TranslateRange(constantBufferSelector.iova, constantBufferSelector.size)};
// Ignore unmapped areas from mappings due to buggy games setting the wrong cbuf size constantBufferSelector.view = gpu.buffer.FindOrCreate(mappings.front(), executor.cycle);
mappings.erase(ranges::find_if(mappings, [](const auto &mapping) { return !mapping.valid(); }), mappings.end());
constantBufferSelector.guest.mappings.assign(mappings.begin(), mappings.end());
constantBufferSelector.view = gpu.buffer.FindOrCreate(constantBufferSelector.guest);
return constantBufferSelector; return constantBufferSelector;
} }
@ -750,14 +733,11 @@ namespace skyline::gpu::interconnect {
.convert_depth_mode = true // This is required for the default GPU register state .convert_depth_mode = true // This is required for the default GPU register state
}; };
constexpr static size_t PipelineUniqueDescriptorTypeCount{2}; //!< The amount of unique descriptor types that may be bound to a pipeline constexpr static size_t PipelineUniqueDescriptorTypeCount{3}; //!< The amount of unique descriptor types that may be bound to a pipeline
constexpr static size_t MaxPipelineDescriptorWriteCount{maxwell3d::PipelineStageCount * PipelineUniqueDescriptorTypeCount}; //!< The maxium amount of descriptors writes that are used to bind a pipeline constexpr static size_t PipelineDescriptorWritesReservedCount{maxwell3d::PipelineStageCount * PipelineUniqueDescriptorTypeCount}; //!< The amount of descriptors writes reserved in advance to bind a pipeline, this is not a hard limit due to the Adreno descriptor quirk
constexpr static size_t MaxPipelineDescriptorCount{100}; //!< The maxium amount of descriptors we support being bound to a pipeline constexpr static size_t MaxPipelineDescriptorCount{100}; //!< The maxium amount of descriptors we support being bound to a pipeline
boost::container::static_vector<vk::WriteDescriptorSet, MaxPipelineDescriptorWriteCount> descriptorSetWrites;
boost::container::static_vector<vk::DescriptorSetLayoutBinding, MaxPipelineDescriptorCount> layoutBindings; boost::container::static_vector<vk::DescriptorSetLayoutBinding, MaxPipelineDescriptorCount> layoutBindings;
boost::container::static_vector<vk::DescriptorBufferInfo, MaxPipelineDescriptorCount> bufferInfo;
boost::container::static_vector<vk::DescriptorImageInfo, MaxPipelineDescriptorCount> imageInfo;
/** /**
* @brief All state concerning the shader programs and their bindings * @brief All state concerning the shader programs and their bindings
@ -767,7 +747,18 @@ namespace skyline::gpu::interconnect {
boost::container::static_vector<std::shared_ptr<vk::raii::ShaderModule>, maxwell3d::PipelineStageCount> shaderModules; //!< Shader modules for every pipeline stage boost::container::static_vector<std::shared_ptr<vk::raii::ShaderModule>, maxwell3d::PipelineStageCount> shaderModules; //!< Shader modules for every pipeline stage
boost::container::static_vector<vk::PipelineShaderStageCreateInfo, maxwell3d::PipelineStageCount> shaderStages; //!< Shader modules for every pipeline stage boost::container::static_vector<vk::PipelineShaderStageCreateInfo, maxwell3d::PipelineStageCount> shaderStages; //!< Shader modules for every pipeline stage
vk::raii::DescriptorSetLayout descriptorSetLayout; //!< The descriptor set layout for the pipeline (Only valid when `activeShaderStagesInfoCount` is non-zero) vk::raii::DescriptorSetLayout descriptorSetLayout; //!< The descriptor set layout for the pipeline (Only valid when `activeShaderStagesInfoCount` is non-zero)
span<vk::WriteDescriptorSet> descriptorSetWrites; //!< The writes to the descriptor set that need to be done prior to executing a pipeline
struct DescriptorSetWrites {
std::vector<vk::WriteDescriptorSet> writes; //!< The descriptor set writes for the pipeline
std::vector<vk::DescriptorBufferInfo> bufferDescriptors; //!< The storage for buffer descriptors
std::vector<vk::DescriptorImageInfo> imageDescriptors; //!< The storage for image descriptors
std::vector<vk::WriteDescriptorSet> &operator*() {
return writes;
}
};
std::unique_ptr<DescriptorSetWrites> descriptorSetWrites; //!< The writes to the descriptor set that need to be done prior to executing a pipeline
}; };
/** /**
@ -798,11 +789,10 @@ namespace skyline::gpu::interconnect {
auto ssbo{cbuf.Read<SsboDescriptor>(descriptor.cbuf_offset)}; auto ssbo{cbuf.Read<SsboDescriptor>(descriptor.cbuf_offset)};
auto mappings{channelCtx.asCtx->gmmu.TranslateRange(ssbo.iova, ssbo.size)}; auto mappings{channelCtx.asCtx->gmmu.TranslateRange(ssbo.iova, ssbo.size)};
if (mappings.size() != 1)
Logger::Warn("Multiple buffer mappings ({}) are not supported", mappings.size());
GuestBuffer guestBuffer; return gpu.buffer.FindOrCreate(mappings.front(), executor.cycle);
guestBuffer.mappings.assign(mappings.begin(), mappings.end());
return gpu.buffer.FindOrCreate(guestBuffer);
} }
/** /**
@ -889,14 +879,29 @@ namespace skyline::gpu::interconnect {
} }
} }
descriptorSetWrites.clear(); auto descriptorSetWrites{std::make_unique<ShaderProgramState::DescriptorSetWrites>()};
auto &descriptorWrites{**descriptorSetWrites};
descriptorWrites.reserve(PipelineDescriptorWritesReservedCount);
auto &bufferDescriptors{descriptorSetWrites->bufferDescriptors};
auto &imageDescriptors{descriptorSetWrites->imageDescriptors};
size_t bufferCount{}, imageCount{};
for (auto &pipelineStage : pipelineStages) {
if (pipelineStage.enabled) {
auto &program{pipelineStage.program->program};
bufferCount += program.info.constant_buffer_descriptors.size() + program.info.storage_buffers_descriptors.size();
imageCount += program.info.texture_descriptors.size();
}
}
bufferDescriptors.resize(bufferCount);
imageDescriptors.resize(imageCount);
layoutBindings.clear(); layoutBindings.clear();
bufferInfo.clear();
imageInfo.clear();
runtimeInfo.previous_stage_stores.mask.set(); // First stage should always have all bits set runtimeInfo.previous_stage_stores.mask.set(); // First stage should always have all bits set
ShaderCompiler::Backend::Bindings bindings{}; ShaderCompiler::Backend::Bindings bindings{};
size_t bufferIndex{}, imageIndex{};
boost::container::static_vector<std::shared_ptr<vk::raii::ShaderModule>, maxwell3d::PipelineStageCount> shaderModules; boost::container::static_vector<std::shared_ptr<vk::raii::ShaderModule>, maxwell3d::PipelineStageCount> shaderModules;
boost::container::static_vector<vk::PipelineShaderStageCreateInfo, maxwell3d::PipelineStageCount> shaderStages; boost::container::static_vector<vk::PipelineShaderStageCreateInfo, maxwell3d::PipelineStageCount> shaderStages;
for (auto &pipelineStage : pipelineStages) { for (auto &pipelineStage : pipelineStages) {
@ -920,11 +925,11 @@ namespace skyline::gpu::interconnect {
u32 bindingIndex{pipelineStage.bindingBase}; u32 bindingIndex{pipelineStage.bindingBase};
if (!program.info.constant_buffer_descriptors.empty()) { if (!program.info.constant_buffer_descriptors.empty()) {
descriptorSetWrites.push_back(vk::WriteDescriptorSet{ descriptorWrites.push_back(vk::WriteDescriptorSet{
.dstBinding = bindingIndex, .dstBinding = bindingIndex,
.descriptorCount = static_cast<u32>(program.info.constant_buffer_descriptors.size()), .descriptorCount = static_cast<u32>(program.info.constant_buffer_descriptors.size()),
.descriptorType = vk::DescriptorType::eUniformBuffer, .descriptorType = vk::DescriptorType::eUniformBuffer,
.pBufferInfo = bufferInfo.data() + bufferInfo.size(), .pBufferInfo = bufferDescriptors.data() + bufferIndex,
}); });
for (auto &constantBuffer : program.info.constant_buffer_descriptors) { for (auto &constantBuffer : program.info.constant_buffer_descriptors) {
@ -936,23 +941,24 @@ namespace skyline::gpu::interconnect {
}); });
auto view{pipelineStage.constantBuffers[constantBuffer.index].view}; auto view{pipelineStage.constantBuffers[constantBuffer.index].view};
std::scoped_lock lock{view}; std::scoped_lock lock(view);
bufferInfo.push_back(vk::DescriptorBufferInfo{ view.RegisterUsage([descriptor = bufferDescriptors.data() + bufferIndex++](const Buffer::BufferViewStorage &view, const std::shared_ptr<Buffer> &buffer) {
.buffer = view.buffer->GetBacking(), *descriptor = vk::DescriptorBufferInfo{
.offset = view->offset, .buffer = buffer->GetBacking(),
.range = view->range, .offset = view.offset,
.range = view.size,
};
}); });
executor.AttachBuffer(view); executor.AttachBuffer(view);
} }
} }
if (!program.info.storage_buffers_descriptors.empty()) { if (!program.info.storage_buffers_descriptors.empty()) {
descriptorSetWrites.push_back({ descriptorWrites.push_back(vk::WriteDescriptorSet{
.dstBinding = bindingIndex, .dstBinding = bindingIndex,
.descriptorCount = static_cast<u32>(program.info.storage_buffers_descriptors.size()), .descriptorCount = static_cast<u32>(program.info.storage_buffers_descriptors.size()),
.descriptorType = vk::DescriptorType::eStorageBuffer, .descriptorType = vk::DescriptorType::eStorageBuffer,
.pBufferInfo = bufferInfo.data() + bufferInfo.size(), .pBufferInfo = bufferDescriptors.data() + bufferIndex,
}); });
for (auto &storageBuffer : program.info.storage_buffers_descriptors) { for (auto &storageBuffer : program.info.storage_buffers_descriptors) {
@ -965,10 +971,12 @@ namespace skyline::gpu::interconnect {
auto view{GetSsboViewFromDescriptor(storageBuffer, pipelineStage.constantBuffers)}; auto view{GetSsboViewFromDescriptor(storageBuffer, pipelineStage.constantBuffers)};
std::scoped_lock lock{view}; std::scoped_lock lock{view};
bufferInfo.push_back(vk::DescriptorBufferInfo{ view.RegisterUsage([descriptor = bufferDescriptors.data() + bufferIndex++](const Buffer::BufferViewStorage &view, const std::shared_ptr<Buffer> &buffer) {
.buffer = view.buffer->GetBacking(), *descriptor = vk::DescriptorBufferInfo{
.offset = view->offset, .buffer = buffer->GetBacking(),
.range = view->range, .offset = view.offset,
.range = view.size,
};
}); });
executor.AttachBuffer(view); executor.AttachBuffer(view);
} }
@ -982,22 +990,22 @@ namespace skyline::gpu::interconnect {
if (!program.info.texture_descriptors.empty()) { if (!program.info.texture_descriptors.empty()) {
if (!gpu.traits.quirks.needsIndividualTextureBindingWrites) if (!gpu.traits.quirks.needsIndividualTextureBindingWrites)
descriptorSetWrites.push_back(vk::WriteDescriptorSet{ descriptorWrites.push_back(vk::WriteDescriptorSet{
.dstBinding = bindingIndex, .dstBinding = bindingIndex,
.descriptorCount = static_cast<u32>(program.info.texture_descriptors.size()), .descriptorCount = static_cast<u32>(program.info.texture_descriptors.size()),
.descriptorType = vk::DescriptorType::eCombinedImageSampler, .descriptorType = vk::DescriptorType::eCombinedImageSampler,
.pImageInfo = imageInfo.data() + imageInfo.size(), .pImageInfo = imageDescriptors.data() + imageIndex,
}); });
else else
descriptorSetWrites.reserve(descriptorSetWrites.size() + program.info.texture_descriptors.size()); descriptorWrites.reserve(descriptorWrites.size() + program.info.texture_descriptors.size());
for (auto &texture : program.info.texture_descriptors) { for (auto &texture : program.info.texture_descriptors) {
if (gpu.traits.quirks.needsIndividualTextureBindingWrites) if (gpu.traits.quirks.needsIndividualTextureBindingWrites)
descriptorSetWrites.push_back(vk::WriteDescriptorSet{ descriptorWrites.push_back(vk::WriteDescriptorSet{
.dstBinding = bindingIndex, .dstBinding = bindingIndex,
.descriptorCount = 1, .descriptorCount = 1,
.descriptorType = vk::DescriptorType::eCombinedImageSampler, .descriptorType = vk::DescriptorType::eCombinedImageSampler,
.pImageInfo = imageInfo.data() + imageInfo.size(), .pImageInfo = imageDescriptors.data() + imageIndex,
}); });
layoutBindings.push_back(vk::DescriptorSetLayoutBinding{ layoutBindings.push_back(vk::DescriptorSetLayoutBinding{
@ -1020,11 +1028,11 @@ namespace skyline::gpu::interconnect {
auto textureView{GetPoolTextureView(handle.textureIndex)}; auto textureView{GetPoolTextureView(handle.textureIndex)};
std::scoped_lock lock(*textureView); std::scoped_lock lock(*textureView);
imageInfo.push_back(vk::DescriptorImageInfo{ imageDescriptors[imageIndex++] = vk::DescriptorImageInfo{
.sampler = **sampler, .sampler = **sampler,
.imageView = textureView->GetView(), .imageView = textureView->GetView(),
.imageLayout = textureView->texture->layout, .imageLayout = textureView->texture->layout,
}); };
executor.AttachTexture(textureView.get()); executor.AttachTexture(textureView.get());
executor.AttachDependency(std::move(sampler)); executor.AttachDependency(std::move(sampler));
} }
@ -1048,7 +1056,7 @@ namespace skyline::gpu::interconnect {
.pBindings = layoutBindings.data(), .pBindings = layoutBindings.data(),
.bindingCount = static_cast<u32>(layoutBindings.size()), .bindingCount = static_cast<u32>(layoutBindings.size()),
}), }),
descriptorSetWrites, std::move(descriptorSetWrites),
}; };
} }
@ -1643,11 +1651,11 @@ namespace skyline::gpu::interconnect {
else if (vertexBuffer.view) else if (vertexBuffer.view)
return vertexBuffer.view; return vertexBuffer.view;
GuestBuffer guest;
auto mappings{channelCtx.asCtx->gmmu.TranslateRange(vertexBuffer.start, (vertexBuffer.end + 1) - vertexBuffer.start)}; auto mappings{channelCtx.asCtx->gmmu.TranslateRange(vertexBuffer.start, (vertexBuffer.end + 1) - vertexBuffer.start)};
guest.mappings.assign(mappings.begin(), mappings.end()); if (mappings.size() != 1)
Logger::Warn("Multiple buffer mappings ({}) are not supported", mappings.size());
vertexBuffer.view = gpu.buffer.FindOrCreate(guest); vertexBuffer.view = gpu.buffer.FindOrCreate(mappings.front(), executor.cycle);
return vertexBuffer.view; return vertexBuffer.view;
} }
@ -1842,7 +1850,7 @@ namespace skyline::gpu::interconnect {
* @tparam ConvGR Converts all green component * @tparam ConvGR Converts all green component
* @tparam SwapBR Swaps blue and red components * @tparam SwapBR Swaps blue and red components
*/ */
template <bool ConvGR, bool SwapBR> template<bool ConvGR, bool SwapBR>
vk::ComponentMapping ConvertTicSwizzleMapping(TextureImageControl::FormatWord format) { vk::ComponentMapping ConvertTicSwizzleMapping(TextureImageControl::FormatWord format) {
auto convertComponentSwizzle{[](TextureImageControl::ImageSwizzle swizzle) { auto convertComponentSwizzle{[](TextureImageControl::ImageSwizzle swizzle) {
switch (swizzle) { switch (swizzle) {
@ -2223,11 +2231,12 @@ namespace skyline::gpu::interconnect {
else if (indexBuffer.view && size == indexBuffer.viewSize) else if (indexBuffer.view && size == indexBuffer.viewSize)
return indexBuffer.view; return indexBuffer.view;
GuestBuffer guestBuffer;
auto mappings{channelCtx.asCtx->gmmu.TranslateRange(indexBuffer.start, size)}; auto mappings{channelCtx.asCtx->gmmu.TranslateRange(indexBuffer.start, size)};
guestBuffer.mappings.assign(mappings.begin(), mappings.end()); if (mappings.size() != 1)
Logger::Warn("Multiple buffer mappings ({}) are not supported", mappings.size());
indexBuffer.view = gpu.buffer.FindOrCreate(guestBuffer); auto mapping{mappings.front()};
indexBuffer.view = gpu.buffer.FindOrCreate(span<u8>(mapping.data(), size), executor.cycle);
return indexBuffer.view; return indexBuffer.view;
} }
@ -2433,33 +2442,43 @@ namespace skyline::gpu::interconnect {
void Draw(u32 count, u32 first, i32 vertexOffset = 0) { void Draw(u32 count, u32 first, i32 vertexOffset = 0) {
// Shader + Binding Setup // Shader + Binding Setup
auto programState{CompileShaderProgramState()}; auto programState{CompileShaderProgramState()};
auto descriptorSet{gpu.descriptor.AllocateSet(*programState.descriptorSetLayout)}; auto descriptorSet{gpu.descriptor.AllocateSet(*programState.descriptorSetLayout)};
for (auto &descriptorSetWrite : programState.descriptorSetWrites) for (auto &descriptorSetWrite : **programState.descriptorSetWrites)
descriptorSetWrite.dstSet = descriptorSet; descriptorSetWrite.dstSet = descriptorSet;
gpu.vkDevice.updateDescriptorSets(programState.descriptorSetWrites, nullptr);
vk::raii::PipelineLayout pipelineLayout(gpu.vkDevice, vk::PipelineLayoutCreateInfo{ vk::raii::PipelineLayout pipelineLayout(gpu.vkDevice, vk::PipelineLayoutCreateInfo{
.pSetLayouts = &*programState.descriptorSetLayout, .pSetLayouts = &*programState.descriptorSetLayout,
.setLayoutCount = 1, .setLayoutCount = 1,
}); });
vk::Buffer indexBufferHandle; struct BoundIndexBuffer {
vk::DeviceSize indexBufferOffset; vk::Buffer handle{};
vk::IndexType indexBufferType; vk::DeviceSize offset{};
vk::IndexType type{};
};
auto boundIndexBuffer{std::make_shared<BoundIndexBuffer>()};
if constexpr (IsIndexed) { if constexpr (IsIndexed) {
auto indexBufferView{GetIndexBuffer(count)}; auto indexBufferView{GetIndexBuffer(count)};
std::scoped_lock lock(indexBufferView); {
executor.AttachBuffer(indexBufferView); std::scoped_lock lock(indexBufferView);
indexBufferHandle = indexBufferView.buffer->GetBacking(); boundIndexBuffer->type = indexBuffer.type;
indexBufferOffset = indexBufferView->offset; indexBufferView.RegisterUsage([=](const Buffer::BufferViewStorage &view, const std::shared_ptr<Buffer> &buffer) {
indexBufferType = indexBuffer.type; boundIndexBuffer->handle = buffer->GetBacking();
boundIndexBuffer->offset = view.offset;
});
executor.AttachBuffer(indexBufferView);
}
} }
// Vertex Buffer Setup // Vertex Buffer Setup
std::array<vk::Buffer, maxwell3d::VertexBufferCount> vertexBufferHandles{}; struct BoundVertexBuffers {
std::array<vk::DeviceSize, maxwell3d::VertexBufferCount> vertexBufferOffsets{}; std::array<vk::Buffer, maxwell3d::VertexBufferCount> handles{};
std::array<vk::DeviceSize, maxwell3d::VertexBufferCount> offsets{};
};
auto boundVertexBuffers{std::make_shared<BoundVertexBuffers>()};
boost::container::static_vector<vk::VertexInputBindingDescription, maxwell3d::VertexBufferCount> vertexBindingDescriptions{}; boost::container::static_vector<vk::VertexInputBindingDescription, maxwell3d::VertexBufferCount> vertexBindingDescriptions{};
boost::container::static_vector<vk::VertexInputBindingDivisorDescriptionEXT, maxwell3d::VertexBufferCount> vertexBindingDivisorsDescriptions{}; boost::container::static_vector<vk::VertexInputBindingDivisorDescriptionEXT, maxwell3d::VertexBufferCount> vertexBindingDivisorsDescriptions{};
@ -2473,8 +2492,11 @@ namespace skyline::gpu::interconnect {
vertexBindingDivisorsDescriptions.push_back(vertexBuffer.bindingDivisorDescription); vertexBindingDivisorsDescriptions.push_back(vertexBuffer.bindingDivisorDescription);
std::scoped_lock vertexBufferLock(vertexBufferView); std::scoped_lock vertexBufferLock(vertexBufferView);
vertexBufferHandles[index] = vertexBufferView.buffer->GetBacking(); vertexBufferView.RegisterUsage([handle = boundVertexBuffers->handles.data() + index, offset = boundVertexBuffers->offsets.data() + index](const Buffer::BufferViewStorage &view, const std::shared_ptr<Buffer> &buffer) {
vertexBufferOffsets[index] = vertexBufferView->offset; *handle = buffer->GetBacking();
*offset = view.offset;
});
executor.AttachBuffer(vertexBufferView); executor.AttachBuffer(vertexBufferView);
} }
} }
@ -2505,18 +2527,29 @@ namespace skyline::gpu::interconnect {
depthTargetLock.emplace(*depthRenderTargetView); depthTargetLock.emplace(*depthRenderTargetView);
// Draw Persistent Storage // Draw Persistent Storage
struct Storage : FenceCycleDependency { struct DrawStorage {
vk::raii::DescriptorSetLayout descriptorSetLayout;
std::unique_ptr<ShaderProgramState::DescriptorSetWrites> descriptorSetWrites;
vk::raii::PipelineLayout pipelineLayout; vk::raii::PipelineLayout pipelineLayout;
std::optional<vk::raii::Pipeline> pipeline;
DescriptorAllocator::ActiveDescriptorSet descriptorSet;
Storage(vk::raii::PipelineLayout &&pipelineLayout, DescriptorAllocator::ActiveDescriptorSet &&descriptorSet) : pipelineLayout(std::move(pipelineLayout)), descriptorSet(std::move(descriptorSet)) {} DrawStorage(vk::raii::DescriptorSetLayout &&descriptorSetLayout, std::unique_ptr<ShaderProgramState::DescriptorSetWrites> &&descriptorSetWrites, vk::raii::PipelineLayout &&pipelineLayout) : descriptorSetLayout(std::move(descriptorSetLayout)), descriptorSetWrites(std::move(descriptorSetWrites)), pipelineLayout(std::move(pipelineLayout)) {}
}; };
auto storage{std::make_shared<Storage>(std::move(pipelineLayout), std::move(descriptorSet))}; auto drawStorage{std::make_shared<DrawStorage>(std::move(programState.descriptorSetLayout), std::move(programState.descriptorSetWrites), std::move(pipelineLayout))};
// Command Buffer Persistent Storage
struct FenceStorage : FenceCycleDependency {
std::optional<vk::raii::Pipeline> pipeline;
DescriptorAllocator::ActiveDescriptorSet descriptorSet;
std::shared_ptr<DrawStorage> drawStorage{};
FenceStorage(DescriptorAllocator::ActiveDescriptorSet &&descriptorSet) : descriptorSet(std::move(descriptorSet)) {}
};
auto fenceStorage{std::make_shared<FenceStorage>(std::move(descriptorSet))};
// Submit Draw // Submit Draw
executor.AddSubpass([=, &vkDevice = gpu.vkDevice, shaderModules = programState.shaderModules, shaderStages = programState.shaderStages, inputAssemblyState = inputAssemblyState, multiViewport = gpu.traits.supportsMultipleViewports, viewports = viewports, scissors = scissors, rasterizerState = rasterizerState, multisampleState = multisampleState, depthState = depthState, blendState = blendState, storage = std::move(storage), supportsVertexAttributeDivisor = gpu.traits.supportsVertexAttributeDivisor, vertexBufferHandles = std::move(vertexBufferHandles), vertexBufferOffsets = std::move(vertexBufferOffsets), pipelineCache = *pipelineCache](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &cycle, GPU &, vk::RenderPass renderPass, u32 subpassIndex) mutable { executor.AddSubpass([=, &vkDevice = gpu.vkDevice, shaderModules = programState.shaderModules, shaderStages = programState.shaderStages, inputAssemblyState = inputAssemblyState, multiViewport = gpu.traits.supportsMultipleViewports, viewports = viewports, scissors = scissors, rasterizerState = rasterizerState, multisampleState = multisampleState, depthState = depthState, blendState = blendState, drawStorage = std::move(drawStorage), fenceStorage = std::move(fenceStorage), supportsVertexAttributeDivisor = gpu.traits.supportsVertexAttributeDivisor, pipelineCache = *pipelineCache](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &cycle, GPU &, vk::RenderPass renderPass, u32 subpassIndex) mutable {
vk::StructureChain<vk::PipelineVertexInputStateCreateInfo, vk::PipelineVertexInputDivisorStateCreateInfoEXT> vertexState{ vk::StructureChain<vk::PipelineVertexInputStateCreateInfo, vk::PipelineVertexInputDivisorStateCreateInfoEXT> vertexState{
vk::PipelineVertexInputStateCreateInfo{ vk::PipelineVertexInputStateCreateInfo{
.pVertexBindingDescriptions = vertexBindingDescriptions.data(), .pVertexBindingDescriptions = vertexBindingDescriptions.data(),
@ -2553,7 +2586,7 @@ namespace skyline::gpu::interconnect {
.pDepthStencilState = &depthState, .pDepthStencilState = &depthState,
.pColorBlendState = &blendState, .pColorBlendState = &blendState,
.pDynamicState = nullptr, .pDynamicState = nullptr,
.layout = *storage->pipelineLayout, .layout = *drawStorage->pipelineLayout,
.renderPass = renderPass, .renderPass = renderPass,
.subpass = subpassIndex, .subpass = subpassIndex,
}; };
@ -2564,6 +2597,7 @@ namespace skyline::gpu::interconnect {
commandBuffer.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline.value); commandBuffer.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline.value);
auto &vertexBufferHandles{boundVertexBuffers->handles};
for (u32 bindingIndex{}; bindingIndex != vertexBufferHandles.size(); bindingIndex++) { for (u32 bindingIndex{}; bindingIndex != vertexBufferHandles.size(); bindingIndex++) {
// We need to bind all non-null vertex buffers while skipping any null ones // We need to bind all non-null vertex buffers while skipping any null ones
if (vertexBufferHandles[bindingIndex]) { if (vertexBufferHandles[bindingIndex]) {
@ -2572,24 +2606,26 @@ namespace skyline::gpu::interconnect {
bindingEndIndex++; bindingEndIndex++;
u32 bindingCount{bindingEndIndex - bindingIndex}; u32 bindingCount{bindingEndIndex - bindingIndex};
commandBuffer.bindVertexBuffers(bindingIndex, span(vertexBufferHandles.data() + bindingIndex, bindingCount), span(vertexBufferOffsets.data() + bindingIndex, bindingCount)); commandBuffer.bindVertexBuffers(bindingIndex, span(vertexBufferHandles.data() + bindingIndex, bindingCount), span(boundVertexBuffers->offsets.data() + bindingIndex, bindingCount));
} }
} }
commandBuffer.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, *storage->pipelineLayout, 0, storage->descriptorSet, nullptr); vkDevice.updateDescriptorSets(**drawStorage->descriptorSetWrites, nullptr);
commandBuffer.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, *drawStorage->pipelineLayout, 0, fenceStorage->descriptorSet, nullptr);
if constexpr (IsIndexed) { if constexpr (IsIndexed) {
commandBuffer.bindIndexBuffer(indexBufferHandle, indexBufferOffset, indexBufferType); commandBuffer.bindIndexBuffer(boundIndexBuffer->handle, boundIndexBuffer->offset, boundIndexBuffer->type);
commandBuffer.drawIndexed(count, 1, first, vertexOffset, 0); commandBuffer.drawIndexed(count, 1, first, vertexOffset, 0);
} else { } else {
commandBuffer.draw(count, 1, first, 0); commandBuffer.draw(count, 1, first, 0);
} }
storage->pipeline = vk::raii::Pipeline(vkDevice, pipeline.value); fenceStorage->drawStorage = drawStorage;
fenceStorage->pipeline = vk::raii::Pipeline(vkDevice, pipeline.value);
cycle->AttachObject(storage); cycle->AttachObject(fenceStorage);
}, vk::Rect2D{ }, vk::Rect2D{
.extent = activeColorRenderTargets[0]->texture->dimensions, .extent = activeColorRenderTargets.front()->texture->dimensions,
}, {}, activeColorRenderTargets, depthRenderTargetView); }, {}, activeColorRenderTargets, depthRenderTargetView);
} }