Fix BufferManager::FindOrCreate Recreation Bugs

It was determined that `FindOrCreate` has several issues which this commit fixes:
* It wouldn't correctly handle locking of the newly created `Buffer` as the constructor would setup traps prior to being able to lock it which could lead to UB
* It wouldn't propagate the `usedByContext`/`everHadInlineUpdate` flags correctly
* It wouldn't correctly set the `dirtyState` of the buffer according to that of its source buffers
This commit is contained in:
PixelyIon 2022-07-17 18:18:33 +05:30
parent d1a682eace
commit 38d3ff4300
No known key found for this signature in database
GPG Key ID: 11BC6C3201BC2C05
3 changed files with 93 additions and 83 deletions

View File

@ -40,58 +40,6 @@ namespace skyline::gpu {
SetupGuestMappings();
}
Buffer::Buffer(GPU &gpu, GuestBuffer guest, ContextTag tag, span<std::shared_ptr<Buffer>> srcBuffers) : gpu{gpu}, backing{gpu.memory.AllocateBuffer(guest.size())}, guest{guest} {
SetupGuestMappings();
// Source buffers don't necessarily fully overlap with us so we have to perform a sync here to prevent any gaps
SynchronizeHost(false);
// Copies between two buffers based off of their mappings in guest memory
auto copyBuffer{[](auto dstGuest, auto srcGuest, auto dstPtr, auto srcPtr) {
if (dstGuest.begin().base() <= srcGuest.begin().base()) {
size_t dstOffset{static_cast<size_t>(srcGuest.begin().base() - dstGuest.begin().base())};
size_t copySize{std::min(dstGuest.size() - dstOffset, srcGuest.size())};
std::memcpy(dstPtr + dstOffset, srcPtr, copySize);
} else if (dstGuest.begin().base() > srcGuest.begin().base()) {
size_t srcOffset{static_cast<size_t>(dstGuest.begin().base() - srcGuest.begin().base())};
size_t copySize{std::min(dstGuest.size(), srcGuest.size() - srcOffset)};
std::memcpy(dstPtr, srcPtr + srcOffset, copySize);
}
}};
// Transfer data/state from source buffers
for (const auto &srcBuffer : srcBuffers) {
ContextLock lock{tag, *srcBuffer};
if (srcBuffer->guest) {
if (srcBuffer->cycle && cycle != srcBuffer->cycle)
if (cycle)
cycle->ChainCycle(srcBuffer->cycle);
else
cycle = srcBuffer->cycle;
if (srcBuffer->dirtyState == Buffer::DirtyState::GpuDirty) {
// If the source buffer is GPU dirty we cannot directly copy over its GPU backing contents
// Only sync back the buffer if it's not attached to the current context, otherwise propagate the GPU dirtiness
if (lock.IsFirstUsage()) {
// Perform a GPU -> CPU sync on the source then do a CPU -> GPU sync for the region occupied by the source
// This is required since if we were created from a two buffers: one GPU dirty in the current cycle, and one GPU dirty in the previous cycle, if we marked ourselves as CPU dirty here then the GPU dirtiness from the current cycle buffer would be ignored and cause writes to be missed
srcBuffer->SynchronizeGuest(true);
copyBuffer(guest, *srcBuffer->guest, backing.data(), srcBuffer->mirror.data());
} else {
MarkGpuDirty();
}
} else if (srcBuffer->dirtyState == Buffer::DirtyState::Clean) {
// For clean buffers we can just copy over the GPU backing data directly
// This is necessary since clean buffers may not have matching GPU/CPU data in the case of inline updates for host immutable buffers
copyBuffer(guest, *srcBuffer->guest, backing.data(), srcBuffer->backing.data());
}
// CPU dirty buffers are already synchronized in the initial SynchronizeHost call so don't need special handling
}
}
}
Buffer::Buffer(GPU &gpu, vk::DeviceSize size) : gpu(gpu), backing(gpu.memory.AllocateBuffer(size)) {
dirtyState = DirtyState::Clean; // Since this is a host-only buffer it's always going to be clean
}
@ -140,6 +88,16 @@ namespace skyline::gpu {
return false;
}
void Buffer::Invalidate() {
if (trapHandle) {
gpu.state.nce->DeleteTrap(*trapHandle);
trapHandle = {};
}
// Will prevent any sync operations so even if the trap handler is partway through running and hasn't yet acquired the lock it won't do anything
guest = {};
}
void Buffer::SynchronizeHost(bool rwTrap) {
if (!guest)
return;

View File

@ -140,13 +140,6 @@ namespace skyline::gpu {
Buffer(GPU &gpu, GuestBuffer guest);
/**
* @brief Creates a Buffer that is pre-synchronised with the contents of the input buffers
* @param tag The tag to associate locking of srcBuffers with
* @param srcBuffers Span of overlapping source buffers
*/
Buffer(GPU &gpu, GuestBuffer guest, ContextTag tag, span<std::shared_ptr<Buffer>> srcBuffers);
/**
* @brief Creates a host-only Buffer which isn't backed by any guest buffer
* @note The created buffer won't have a mirror so any operations cannot depend on a mirror existing
@ -222,6 +215,13 @@ namespace skyline::gpu {
*/
bool PollFence();
/**
* @brief Invalidates the Buffer on the guest and deletes the trap that backs this buffer as it is no longer necessary
* @note This will not clear any views or delegates on the buffer, it will only remove guest mappings and delete the trap
* @note The buffer **must** be locked prior to calling this
*/
void Invalidate();
/**
* @brief Synchronizes the host buffer with the guest
* @param rwTrap If true, the guest buffer will be read/write trapped rather than only being write trapped which is more efficient than calling MarkGpuDirty directly after

View File

@ -34,17 +34,31 @@ namespace skyline::gpu {
vk::DeviceSize offset{static_cast<size_t>(guestMapping.begin().base() - alignedStart)}, size{guestMapping.size()};
guestMapping = span<u8>{alignedStart, alignedEnd};
struct LockedBuffer {
std::shared_ptr<Buffer> buffer;
ContextLock<Buffer> lock;
LockedBuffer(std::shared_ptr<Buffer> pBuffer, ContextTag tag) : buffer{std::move(pBuffer)}, lock{tag, *buffer} {}
Buffer *operator->() const {
return buffer.get();
}
std::shared_ptr<Buffer> &operator*() {
return buffer;
}
};
// Lookup for any buffers overlapping with the supplied guest mapping
boost::container::small_vector<std::shared_ptr<Buffer>, 4> overlaps;
boost::container::small_vector<LockedBuffer, 4> overlaps;
for (auto entryIt{std::lower_bound(buffers.begin(), buffers.end(), guestMapping.end().base(), BufferLessThan)}; entryIt != buffers.begin() && (*--entryIt)->guest->begin() <= guestMapping.end();)
if ((*entryIt)->guest->end() > guestMapping.begin())
overlaps.push_back(*entryIt);
overlaps.emplace_back(*entryIt, tag);
if (overlaps.size() == 1) [[likely]] {
auto buffer{overlaps.front()};
auto &buffer{overlaps.front()};
if (buffer->guest->begin() <= guestMapping.begin() && buffer->guest->end() >= guestMapping.end()) {
// If we find a buffer which can entirely fit the guest mapping, we can just return a view into it
ContextLock bufferLock{tag, *buffer};
return buffer->GetView(static_cast<vk::DeviceSize>(guestMapping.begin() - buffer->guest->begin()) + offset, size);
}
}
@ -59,20 +73,56 @@ namespace skyline::gpu {
highestAddress = mapping.end().base();
}
auto newBuffer{std::make_shared<Buffer>(gpu, span<u8>{lowestAddress, highestAddress}, tag, overlaps)};
ContextLock newLock{tag, *newBuffer};
bool hasAlreadyAttached{}; //!< If any overlapping view was already attached to the current context
for (auto &overlap : overlaps) {
ContextLock overlapLock{tag, *overlap};
LockedBuffer newBuffer{std::make_shared<Buffer>(gpu, span<u8>{lowestAddress, highestAddress}), tag}; // If we don't lock the buffer prior to trapping it during synchronization, a race could occur with a guest trap acquiring the lock before we do and mutating the buffer prior to it being ready
if (!overlapLock.IsFirstUsage())
hasAlreadyAttached = true;
newBuffer->SynchronizeHost(false); // Overlaps don't necessarily fully cover the buffer so we have to perform a sync here to prevent any gaps
buffers.erase(std::find(buffers.begin(), buffers.end(), overlap));
auto copyBuffer{[](auto dstGuest, auto srcGuest, auto dstPtr, auto srcPtr) {
if (dstGuest.begin().base() <= srcGuest.begin().base()) {
size_t dstOffset{static_cast<size_t>(srcGuest.begin().base() - dstGuest.begin().base())};
size_t copySize{std::min(dstGuest.size() - dstOffset, srcGuest.size())};
std::memcpy(dstPtr + dstOffset, srcPtr, copySize);
} else if (dstGuest.begin().base() > srcGuest.begin().base()) {
size_t srcOffset{static_cast<size_t>(dstGuest.begin().base() - srcGuest.begin().base())};
size_t copySize{std::min(dstGuest.size(), srcGuest.size() - srcOffset)};
std::memcpy(dstPtr, srcPtr + srcOffset, copySize);
}
}}; //!< Copies between two buffers based off of their mappings in guest memory
bool shouldAttach{}; //!< If the new buffer should be attached to the current context
for (auto &srcBuffer : overlaps) {
if (!srcBuffer.lock.IsFirstUsage())
// If any overlapping buffer was already attached to the current context, we should also attach the current context
shouldAttach = true;
// All newly created buffers that have this set are guaranteed to be attached in buffer FindOrCreate, attach will then lock the buffer without resetting this flag, which will only finally be reset when the lock is released
newBuffer->usedByContext |= srcBuffer->usedByContext;
newBuffer->everHadInlineUpdate |= srcBuffer->everHadInlineUpdate;
if (srcBuffer->cycle && newBuffer->cycle != srcBuffer->cycle)
if (newBuffer->cycle)
newBuffer->cycle->ChainCycle(srcBuffer->cycle);
else
newBuffer->cycle = srcBuffer->cycle;
if (srcBuffer->dirtyState == Buffer::DirtyState::GpuDirty) {
srcBuffer->WaitOnFence();
if (srcBuffer.lock.IsFirstUsage() && newBuffer->dirtyState != Buffer::DirtyState::GpuDirty)
copyBuffer(*newBuffer->guest, *srcBuffer->guest, newBuffer->mirror.data(), srcBuffer->backing.data());
else
newBuffer->MarkGpuDirty();
} else if (srcBuffer->usedByContext) {
if (srcBuffer->dirtyState == Buffer::DirtyState::CpuDirty)
Logger::Error("Buffer (0x{}-0x{}) is marked as CPU dirty while being utilized by the context, this is not valid", srcBuffer->guest->begin().base(), srcBuffer->guest->end().base());
// We need the backing to be stable so that any accesses within this context are sequenced correctly, we can't use the source mirror here either since buffer writes within this context will update the mirror on CPU and backing on GPU
srcBuffer->WaitOnFence();
copyBuffer(*newBuffer->guest, *srcBuffer->guest, newBuffer->backing.data(), srcBuffer->backing.data());
}
// Transfer all views from the overlapping buffer to the new buffer with the new buffer and updated offset, ensuring pointer stability
vk::DeviceSize overlapOffset{static_cast<vk::DeviceSize>(overlap->guest->begin() - newBuffer->guest->begin())};
for (auto it{overlap->views.begin()}; it != overlap->views.end(); it++) {
vk::DeviceSize overlapOffset{static_cast<vk::DeviceSize>(srcBuffer->guest->begin() - newBuffer->guest->begin())};
for (auto it{srcBuffer->views.begin()}; it != srcBuffer->views.end(); it++) {
if (overlapOffset)
// This is a slight hack as we really shouldn't be changing the underlying non-mutable set elements without a rehash but without writing our own set impl this is the best we can do
const_cast<Buffer::BufferViewStorage *>(&*it)->offset += overlapOffset;
@ -84,26 +134,28 @@ namespace skyline::gpu {
if (overlapOffset)
// All current hashes are invalidated by above loop if overlapOffset is nonzero so rehash the container
overlap->views.rehash(0);
srcBuffer->views.rehash(0);
// Merge the view sets, this will keep pointer stability hence avoiding any reallocation
newBuffer->views.merge(overlap->views);
newBuffer->views.merge(srcBuffer->views);
// Transfer all delegates references from the overlapping buffer to the new buffer
for (auto &delegate : overlap->delegates) {
delegate->buffer = newBuffer;
for (auto &delegate : srcBuffer->delegates) {
delegate->buffer = *newBuffer;
if (delegate->usageCallback)
delegate->usageCallback(*delegate->view, newBuffer);
delegate->usageCallback(*delegate->view, *newBuffer);
}
newBuffer->delegates.splice(newBuffer->delegates.end(), overlap->delegates);
newBuffer->delegates.splice(newBuffer->delegates.end(), srcBuffer->delegates);
srcBuffer->Invalidate(); // Invalidate the overlapping buffer so it can't be synced in the future
buffers.erase(std::find(buffers.begin(), buffers.end(), srcBuffer.buffer));
}
if (hasAlreadyAttached)
// We need to reattach the buffer if any overlapping views were already attached to the current context
attachBuffer(newBuffer, std::move(newLock));
if (shouldAttach)
attachBuffer(*newBuffer, std::move(newBuffer.lock));
buffers.insert(std::lower_bound(buffers.begin(), buffers.end(), newBuffer->guest->end().base(), BufferLessThan), newBuffer);
buffers.insert(std::lower_bound(buffers.begin(), buffers.end(), newBuffer->guest->end().base(), BufferLessThan), *newBuffer);
return newBuffer->GetView(static_cast<vk::DeviceSize>(guestMapping.begin() - newBuffer->guest->begin()) + offset, size);
}