Replace host immutability FenceCycle with GPU usage tracking

We utilized a `FenceCycle` to keep track of if the buffer was mutable or not and introduced another cycle to track GPU-side requirements only on fulfillment of which could the buffer be utilized on the host but due to the recent change in the behavior this system ended up being unoptimal. 

This commit replaces the cycle with a boolean tracking if there are any usages of the resource on the GPU within the current context that may prevent it from being mutated on the CPU. The fence of the context is simply attached to the buffer based off this which was allowed as the new behavior of buffer fences matches all the requirements for this.
This commit is contained in:
PixelyIon 2022-06-29 22:02:09 +05:30
parent 217d484cba
commit a60d6ec58f
No known key found for this signature in database
GPG Key ID: 11BC6C3201BC2C05
3 changed files with 36 additions and 39 deletions

View File

@ -8,13 +8,6 @@
#include "buffer.h"
namespace skyline::gpu {
bool Buffer::CheckHostImmutable() {
if (hostImmutableCycle && hostImmutableCycle->Poll())
hostImmutableCycle.reset();
return hostImmutableCycle != nullptr;
}
void Buffer::SetupGuestMappings() {
u8 *alignedData{util::AlignDown(guest->data(), PAGE_SIZE)};
size_t alignedSize{static_cast<size_t>(util::AlignUp(guest->data() + guest->size(), PAGE_SIZE) - alignedData)};
@ -61,14 +54,11 @@ namespace skyline::gpu {
for (const auto &srcBuffer : srcBuffers) {
ContextLock lock{tag, *srcBuffer};
if (srcBuffer->guest) {
if (srcBuffer->hostImmutableCycle) {
// Propagate any host immutability
if (hostImmutableCycle) {
srcBuffer->hostImmutableCycle->Wait();
} else {
hostImmutableCycle = srcBuffer->hostImmutableCycle;
}
}
if (srcBuffer->cycle && cycle != srcBuffer->cycle)
if (cycle)
cycle->ChainCycle(srcBuffer->cycle);
else
cycle = srcBuffer->cycle;
if (srcBuffer->dirtyState == Buffer::DirtyState::GpuDirty) {
// If the source buffer is GPU dirty we cannot directly copy over its GPU backing contents
@ -201,7 +191,7 @@ namespace skyline::gpu {
std::memcpy(mirror.data() + offset, data.data(), data.size()); // Always copy to mirror since any CPU side reads will need the up-to-date contents
if (CheckHostImmutable())
if (PollFence())
// Perform a GPU-side inline update for the buffer contents if this buffer is host immutable since we can't directly modify the backing
gpuCopyCallback();
else
@ -238,10 +228,6 @@ namespace skyline::gpu {
return mirror;
}
void Buffer::MarkHostImmutable(const std::shared_ptr<FenceCycle> &pCycle) {
hostImmutableCycle = pCycle;
}
void Buffer::lock() {
mutex.lock();
}
@ -257,6 +243,7 @@ namespace skyline::gpu {
void Buffer::unlock() {
tag = ContextTag{};
usedByContext = false;
mutex.unlock();
}
@ -298,7 +285,7 @@ namespace skyline::gpu {
void BufferView::RegisterUsage(const std::shared_ptr<FenceCycle> &cycle, const std::function<void(const Buffer::BufferViewStorage &, const std::shared_ptr<Buffer> &)> &usageCallback) {
// Users of RegisterUsage expect the buffer contents to be sequenced as the guest GPU would be, so force any further writes in the current cycle to occur on the GPU
bufferDelegate->buffer->MarkHostImmutable(cycle);
bufferDelegate->buffer->MarkGpuUsed();
usageCallback(*bufferDelegate->view, bufferDelegate->buffer);
if (!bufferDelegate->usageCallback) {
@ -320,7 +307,7 @@ namespace skyline::gpu {
bool gpuCopy{bufferDelegate->view->size > MegaBufferingDisableThreshold};
if (gpuCopy)
// This will force the host buffer contents to stay as is for the current cycle, requiring that write operations are instead sequenced on the GPU for the entire buffer
bufferDelegate->buffer->MarkHostImmutable(pCycle);
bufferDelegate->buffer->MarkGpuUsed();
bufferDelegate->buffer->Write(isFirstUsage, flushHostCallback, gpuCopyCallback, data, offset + bufferDelegate->view->offset);
}

View File

@ -40,12 +40,7 @@ namespace skyline::gpu {
bool everHadInlineUpdate{}; //!< Whether the buffer has ever had an inline update since it was created, if this is set then megabuffering will be attempted by views to avoid the cost of inline GPU updates
std::shared_ptr<FenceCycle> hostImmutableCycle; //!< The cycle for when the buffer was last immutable, if this is signalled the buffer is no longer immutable
/**
* @return If the buffer should be treated as host immutable
*/
bool CheckHostImmutable();
bool usedByContext{}; //!< If this buffer is used by the current context, this determines if a buffer needs to be bound to the cycle it is locked by or not
public:
/**
@ -191,6 +186,27 @@ namespace skyline::gpu {
*/
void MarkGpuDirty();
/**
* @brief Marks the buffer as utilized by the current context, this will be reset on unlocking the buffer
* @note The buffer **must** be locked prior to calling this
* @note This is significantly different from MarkGpuDirty in that it doesn't imply that the buffer is written to on the GPU and only used on it, this eliminates the requirement to sync-back
*/
void MarkGpuUsed() {
usedByContext = true;
}
/**
* @return If this buffer has been utilized within the current context
* @note The buffer **must** be locked with a context prior to calling this
*/
bool UsedByContext() const {
return usedByContext;
}
bool EverHadInlineUpdate() const {
return everHadInlineUpdate;
}
/**
* @brief Waits on a fence cycle if it exists till it's signalled and resets it after
* @note The buffer **must** be locked prior to calling this
@ -272,14 +288,6 @@ namespace skyline::gpu {
* @note The buffer **must** be kept locked until the span is no longer in use
*/
span<u8> GetReadOnlyBackingSpan(bool isFirstUsage, const std::function<void()> &flushHostCallback);
/**
* @brief Prevents any further writes to the `backing` host side buffer for the duration of the current cycle, forcing slower inline GPU updates instead
* @note The buffer **must** be locked prior to calling this
*/
void MarkHostImmutable(const std::shared_ptr<FenceCycle> &cycle);
bool EverHadInlineUpdate() const { return everHadInlineUpdate; }
};
/**

View File

@ -280,9 +280,11 @@ namespace skyline::gpu::interconnect {
textureManagerLock.reset();
for (const auto &attachedBuffer : attachedBuffers) {
cycle->AttachObject(attachedBuffer.buffer);
cycle->ChainCycle(attachedBuffer->cycle);
attachedBuffer->cycle = cycle;
if (attachedBuffer->UsedByContext()) {
cycle->AttachObject(attachedBuffer.buffer);
cycle->ChainCycle(attachedBuffer->cycle);
attachedBuffer->cycle = cycle;
}
}
for (const auto &delegate : attachedBufferDelegates) {