Introduce usage tracker for dirty tracking within an execution

This is neccessary as e.g. shaders can be updated through a mirror and never hit modification traps. By tracking which addresses have sequenced writes applied, the shader manager can then correctly detect if a given shader has been modified by the GPU.
This commit is contained in:
Billy Laws 2023-03-04 20:11:34 +00:00
parent f64860c93e
commit 090151f0c3
16 changed files with 103 additions and 44 deletions

View File

@ -194,13 +194,15 @@ namespace skyline::gpu {
return isDirect ? ValidateMegaBufferViewImplDirect(size) : ValidateMegaBufferViewImplStaged(size); return isDirect ? ValidateMegaBufferViewImplDirect(size) : ValidateMegaBufferViewImplStaged(size);
} }
void Buffer::CopyFromImplDirect(vk::DeviceSize dstOffset, Buffer *src, vk::DeviceSize srcOffset, vk::DeviceSize size, const std::function<void()> &gpuCopyCallback) { void Buffer::CopyFromImplDirect(vk::DeviceSize dstOffset,
Buffer *src, vk::DeviceSize srcOffset, vk::DeviceSize size,
UsageTracker &usageTracker, const std::function<void()> &gpuCopyCallback) {
everHadInlineUpdate = true; everHadInlineUpdate = true;
bool needsGpuTracking{src->RefreshGpuWritesActiveDirect() || RefreshGpuWritesActiveDirect()}; bool needsGpuTracking{src->RefreshGpuWritesActiveDirect() || RefreshGpuWritesActiveDirect()};
bool needsCpuTracking{RefreshGpuReadsActiveDirect() && !needsGpuTracking}; bool needsCpuTracking{RefreshGpuReadsActiveDirect() && !needsGpuTracking};
if (needsGpuTracking || needsCpuTracking) { if (needsGpuTracking || needsCpuTracking) {
if (needsGpuTracking) // Force buffer to be dirty for this cycle if either of the sources are dirty, this is needed as otherwise it could have just been dirty from the previous cycle if (needsGpuTracking) // Force buffer to be dirty for this cycle if either of the sources are dirty, this is needed as otherwise it could have just been dirty from the previous cycle
MarkGpuDirty(); MarkGpuDirty(usageTracker);
gpuCopyCallback(); gpuCopyCallback();
if (needsCpuTracking) if (needsCpuTracking)
@ -210,7 +212,9 @@ namespace skyline::gpu {
} }
} }
void Buffer::CopyFromImplStaged(vk::DeviceSize dstOffset, Buffer *src, vk::DeviceSize srcOffset, vk::DeviceSize size, const std::function<void()> &gpuCopyCallback) { void Buffer::CopyFromImplStaged(vk::DeviceSize dstOffset,
Buffer *src, vk::DeviceSize srcOffset, vk::DeviceSize size,
UsageTracker &usageTracker, const std::function<void()> &gpuCopyCallback) {
std::scoped_lock lock{stateMutex, src->stateMutex}; // Fine even if src and dst are same since recursive mutex std::scoped_lock lock{stateMutex, src->stateMutex}; // Fine even if src and dst are same since recursive mutex
if (dirtyState == DirtyState::CpuDirty && SequencedCpuBackingWritesBlocked()) if (dirtyState == DirtyState::CpuDirty && SequencedCpuBackingWritesBlocked())
@ -230,18 +234,19 @@ namespace skyline::gpu {
else else
gpuCopyCallback(); gpuCopyCallback();
} else { } else {
MarkGpuDirty(); MarkGpuDirty(usageTracker);
gpuCopyCallback(); gpuCopyCallback();
} }
} }
bool Buffer::WriteImplDirect(span<u8> data, vk::DeviceSize offset, const std::function<void()> &gpuCopyCallback) { bool Buffer::WriteImplDirect(span<u8> data, vk::DeviceSize offset,
UsageTracker &usageTracker, const std::function<void()> &gpuCopyCallback) {
// If the buffer is GPU dirty do the write on the GPU and we're done // If the buffer is GPU dirty do the write on the GPU and we're done
if (RefreshGpuWritesActiveDirect()) { if (RefreshGpuWritesActiveDirect()) {
if (gpuCopyCallback) { if (gpuCopyCallback) {
// Propagate dirtiness to the current cycle, since if this is only dirty in a previous cycle that could change at any time and we would need to have the write saved somewhere for CPU reads // Propagate dirtiness to the current cycle, since if this is only dirty in a previous cycle that could change at any time and we would need to have the write saved somewhere for CPU reads
// By propagating the dirtiness to the current cycle we can avoid this and force a wait on any reads // By propagating the dirtiness to the current cycle we can avoid this and force a wait on any reads
MarkGpuDirty(); MarkGpuDirty(usageTracker);
gpuCopyCallback(); gpuCopyCallback();
return false; return false;
} else { } else {
@ -349,6 +354,15 @@ namespace skyline::gpu {
AdvanceSequence(); // The GPU will modify buffer contents so advance to the next sequence AdvanceSequence(); // The GPU will modify buffer contents so advance to the next sequence
} }
void Buffer::MarkGpuDirtyImpl() {
currentExecutionGpuDirty = true;
if (isDirect)
MarkGpuDirtyImplDirect();
else
MarkGpuDirtyImplStaged();
}
Buffer::Buffer(LinearAllocatorState<> &delegateAllocator, GPU &gpu, GuestBuffer guest, size_t id, bool direct) Buffer::Buffer(LinearAllocatorState<> &delegateAllocator, GPU &gpu, GuestBuffer guest, size_t id, bool direct)
: gpu{gpu}, : gpu{gpu},
guest{guest}, guest{guest},
@ -382,16 +396,12 @@ namespace skyline::gpu {
WaitOnFence(); WaitOnFence();
} }
void Buffer::MarkGpuDirty() { void Buffer::MarkGpuDirty(UsageTracker &usageTracker) {
if (!guest) if (!guest)
return; return;
currentExecutionGpuDirty = true; usageTracker.dirtyIntervals.Insert(*guest);
MarkGpuDirtyImpl();
if (isDirect)
MarkGpuDirtyImplDirect();
else
MarkGpuDirtyImplStaged();
} }
void Buffer::WaitOnFence() { void Buffer::WaitOnFence() {
@ -493,24 +503,30 @@ namespace skyline::gpu {
ReadImplStaged(isFirstUsage, flushHostCallback, data, offset); ReadImplStaged(isFirstUsage, flushHostCallback, data, offset);
} }
bool Buffer::Write(span<u8> data, vk::DeviceSize offset, const std::function<void()> &gpuCopyCallback) { bool Buffer::Write(span<u8> data, vk::DeviceSize offset, UsageTracker &usageTracker, const std::function<void()> &gpuCopyCallback) {
AdvanceSequence(); // We are modifying GPU backing contents so advance to the next sequence AdvanceSequence(); // We are modifying GPU backing contents so advance to the next sequence
everHadInlineUpdate = true; everHadInlineUpdate = true;
usageTracker.sequencedIntervals.Insert(*guest);
if (isDirect) if (isDirect)
return WriteImplDirect(data, offset, gpuCopyCallback); return WriteImplDirect(data, offset, usageTracker, gpuCopyCallback);
else else
return WriteImplStaged(data, offset, gpuCopyCallback); return WriteImplStaged(data, offset, gpuCopyCallback);
} }
void Buffer::CopyFrom(vk::DeviceSize dstOffset, Buffer *src, vk::DeviceSize srcOffset, vk::DeviceSize size, const std::function<void()> &gpuCopyCallback) { void Buffer::CopyFrom(vk::DeviceSize dstOffset,
Buffer *src, vk::DeviceSize srcOffset, vk::DeviceSize size,
UsageTracker &usageTracker, const std::function<void()> &gpuCopyCallback) {
AdvanceSequence(); // We are modifying GPU backing contents so advance to the next sequence AdvanceSequence(); // We are modifying GPU backing contents so advance to the next sequence
everHadInlineUpdate = true; everHadInlineUpdate = true;
usageTracker.sequencedIntervals.Insert(*guest);
if (isDirect) if (isDirect)
CopyFromImplDirect(dstOffset, src, srcOffset, size, gpuCopyCallback); CopyFromImplDirect(dstOffset, src, srcOffset, size, usageTracker, gpuCopyCallback);
else else
CopyFromImplStaged(dstOffset, src, srcOffset, size, gpuCopyCallback); CopyFromImplStaged(dstOffset, src, srcOffset, size, usageTracker, gpuCopyCallback);
} }
BufferView Buffer::GetView(vk::DeviceSize offset, vk::DeviceSize size) { BufferView Buffer::GetView(vk::DeviceSize offset, vk::DeviceSize size) {
@ -676,8 +692,8 @@ namespace skyline::gpu {
GetBuffer()->Read(isFirstUsage, flushHostCallback, data, readOffset + GetOffset()); GetBuffer()->Read(isFirstUsage, flushHostCallback, data, readOffset + GetOffset());
} }
bool BufferView::Write(span<u8> data, vk::DeviceSize writeOffset, const std::function<void()> &gpuCopyCallback) const { bool BufferView::Write(span<u8> data, vk::DeviceSize writeOffset, UsageTracker &usageTracker, const std::function<void()> &gpuCopyCallback) const {
return GetBuffer()->Write(data, writeOffset + GetOffset(), gpuCopyCallback); return GetBuffer()->Write(data, writeOffset + GetOffset(), usageTracker, gpuCopyCallback);
} }
BufferBinding BufferView::TryMegaBuffer(const std::shared_ptr<FenceCycle> &pCycle, MegaBufferAllocator &allocator, ContextTag executionTag, size_t sizeOverride) const { BufferBinding BufferView::TryMegaBuffer(const std::shared_ptr<FenceCycle> &pCycle, MegaBufferAllocator &allocator, ContextTag executionTag, size_t sizeOverride) const {
@ -689,9 +705,9 @@ namespace skyline::gpu {
return backing.subspan(GetOffset(), size); return backing.subspan(GetOffset(), size);
} }
void BufferView::CopyFrom(BufferView src, const std::function<void()> &gpuCopyCallback) { void BufferView::CopyFrom(BufferView src, UsageTracker &usageTracker, const std::function<void()> &gpuCopyCallback) {
if (src.size != size) if (src.size != size)
throw exception("Copy size mismatch!"); throw exception("Copy size mismatch!");
return GetBuffer()->CopyFrom(GetOffset(), src.GetBuffer(), src.GetOffset(), size, gpuCopyCallback); return GetBuffer()->CopyFrom(GetOffset(), src.GetBuffer(), src.GetOffset(), size, usageTracker, gpuCopyCallback);
} }
} }

View File

@ -8,6 +8,7 @@
#include <common/spin_lock.h> #include <common/spin_lock.h>
#include <nce.h> #include <nce.h>
#include <gpu/tag_allocator.h> #include <gpu/tag_allocator.h>
#include "usage_tracker.h"
#include "megabuffer.h" #include "megabuffer.h"
#include "memory_manager.h" #include "memory_manager.h"
@ -146,11 +147,16 @@ namespace skyline::gpu {
*/ */
bool ValidateMegaBufferView(vk::DeviceSize size); bool ValidateMegaBufferView(vk::DeviceSize size);
void CopyFromImplDirect(vk::DeviceSize dstOffset, Buffer *src, vk::DeviceSize srcOffset, vk::DeviceSize size, const std::function<void()> &gpuCopyCallback); void CopyFromImplDirect(vk::DeviceSize dstOffset,
Buffer *src, vk::DeviceSize srcOffset, vk::DeviceSize size,
UsageTracker &usageTracker, const std::function<void()> &gpuCopyCallback);
void CopyFromImplStaged(vk::DeviceSize dstOffset, Buffer *src, vk::DeviceSize srcOffset, vk::DeviceSize size, const std::function<void()> &gpuCopyCallback); void CopyFromImplStaged(vk::DeviceSize dstOffset,
Buffer *src, vk::DeviceSize srcOffset, vk::DeviceSize size,
UsageTracker &usageTracker, const std::function<void()> &gpuCopyCallback);
bool WriteImplDirect(span<u8> data, vk::DeviceSize offset, const std::function<void()> &gpuCopyCallback = {}); bool WriteImplDirect(span<u8> data, vk::DeviceSize offset,
UsageTracker &usageTracker, const std::function<void()> &gpuCopyCallback = {});
bool WriteImplStaged(span<u8> data, vk::DeviceSize offset, const std::function<void()> &gpuCopyCallback = {}); bool WriteImplStaged(span<u8> data, vk::DeviceSize offset, const std::function<void()> &gpuCopyCallback = {});
@ -162,6 +168,8 @@ namespace skyline::gpu {
void MarkGpuDirtyImplStaged(); void MarkGpuDirtyImplStaged();
void MarkGpuDirtyImpl();
public: public:
void UpdateCycle(const std::shared_ptr<FenceCycle> &newCycle) { void UpdateCycle(const std::shared_ptr<FenceCycle> &newCycle) {
newCycle->ChainCycle(cycle); newCycle->ChainCycle(cycle);
@ -227,7 +235,7 @@ namespace skyline::gpu {
* @note This **must** be called after syncing the buffer to the GPU not before * @note This **must** be called after syncing the buffer to the GPU not before
* @note The buffer **must** be locked prior to calling this * @note The buffer **must** be locked prior to calling this
*/ */
void MarkGpuDirty(); void MarkGpuDirty(UsageTracker &usageTracker);
/** /**
* @brief Prevents sequenced writes to this buffer's backing from occuring on the CPU, forcing sequencing on the GPU instead for the duration of the context. Unsequenced writes such as those from the guest can still occur however. * @brief Prevents sequenced writes to this buffer's backing from occuring on the CPU, forcing sequencing on the GPU instead for the duration of the context. Unsequenced writes such as those from the guest can still occur however.
@ -365,13 +373,15 @@ namespace skyline::gpu {
* @param gpuCopyCallback Optional callback to perform a GPU-side copy for this Write if necessary, if such a copy is needed and this is not supplied `true` will be returned to indicate that the write needs to be repeated with the callback present * @param gpuCopyCallback Optional callback to perform a GPU-side copy for this Write if necessary, if such a copy is needed and this is not supplied `true` will be returned to indicate that the write needs to be repeated with the callback present
* @return Whether the write needs to be repeated with `gpuCopyCallback` provided, always false if `gpuCopyCallback` is provided * @return Whether the write needs to be repeated with `gpuCopyCallback` provided, always false if `gpuCopyCallback` is provided
*/ */
bool Write(span<u8> data, vk::DeviceSize offset, const std::function<void()> &gpuCopyCallback = {}); bool Write(span<u8> data, vk::DeviceSize offset, UsageTracker &usageTracker, const std::function<void()> &gpuCopyCallback = {});
/** /**
* @brief Copies a region of the src buffer into a region of this buffer * @brief Copies a region of the src buffer into a region of this buffer
* @note The src/dst buffers **must** be locked prior to calling this * @note The src/dst buffers **must** be locked prior to calling this
*/ */
void CopyFrom(vk::DeviceSize dstOffset, Buffer *src, vk::DeviceSize srcOffset, vk::DeviceSize size, const std::function<void()> &gpuCopyCallback); void CopyFrom(vk::DeviceSize dstOffset,
Buffer *src, vk::DeviceSize srcOffset, vk::DeviceSize size,
UsageTracker &usageTracker, const std::function<void()> &gpuCopyCallback);
/** /**
* @return A view into this buffer with the supplied attributes * @return A view into this buffer with the supplied attributes
@ -528,7 +538,7 @@ namespace skyline::gpu {
* @note The view **must** be locked prior to calling this * @note The view **must** be locked prior to calling this
* @note See Buffer::Write * @note See Buffer::Write
*/ */
bool Write(span<u8> data, vk::DeviceSize writeOffset, const std::function<void()> &gpuCopyCallback = {}) const; bool Write(span<u8> data, vk::DeviceSize writeOffset, UsageTracker &usageTracker, const std::function<void()> &gpuCopyCallback = {}) const;
/* /*
* @brief If megabuffering is determined to be beneficial for the underlying buffer, allocates and copies this view into the megabuffer (in case of cache miss), returning a binding of the allocated megabuffer region * @brief If megabuffering is determined to be beneficial for the underlying buffer, allocates and copies this view into the megabuffer (in case of cache miss), returning a binding of the allocated megabuffer region
@ -550,7 +560,7 @@ namespace skyline::gpu {
* @brief Copies the contents of one view into this one * @brief Copies the contents of one view into this one
* @note The src/dst views **must** be locked prior to calling this * @note The src/dst views **must** be locked prior to calling this
*/ */
void CopyFrom(BufferView src, const std::function<void()> &gpuCopyCallback); void CopyFrom(BufferView src, UsageTracker &usageTracker, const std::function<void()> &gpuCopyCallback);
constexpr operator bool() { constexpr operator bool() {
return delegate != nullptr; return delegate != nullptr;

View File

@ -113,7 +113,7 @@ namespace skyline::gpu {
if (srcBuffer.lock.IsFirstUsage() && newBuffer->dirtyState != Buffer::DirtyState::GpuDirty) if (srcBuffer.lock.IsFirstUsage() && newBuffer->dirtyState != Buffer::DirtyState::GpuDirty)
copyBuffer(*newBuffer->guest, *srcBuffer->guest, newBuffer->mirror.data(), srcBuffer->backing->data()); copyBuffer(*newBuffer->guest, *srcBuffer->guest, newBuffer->mirror.data(), srcBuffer->backing->data());
else else
newBuffer->MarkGpuDirty(); newBuffer->MarkGpuDirtyImpl();
// Since we don't synchost source buffers and the source buffers here are GPU dirty their mirrors will be out of date, meaning the backing contents of this source buffer's region in the new buffer from the initial synchost call will be incorrect. By copying backings directly here we can ensure that no writes are lost and that if the newly created buffer needs to turn GPU dirty during recreation no copies need to be done since the backing is as up to date as the mirror at a minimum. // Since we don't synchost source buffers and the source buffers here are GPU dirty their mirrors will be out of date, meaning the backing contents of this source buffer's region in the new buffer from the initial synchost call will be incorrect. By copying backings directly here we can ensure that no writes are lost and that if the newly created buffer needs to turn GPU dirty during recreation no copies need to be done since the backing is as up to date as the mirror at a minimum.
copyBuffer(*newBuffer->guest, *srcBuffer->guest, newBuffer->backing->data(), srcBuffer->backing->data()); copyBuffer(*newBuffer->guest, *srcBuffer->guest, newBuffer->backing->data(), srcBuffer->backing->data());
@ -126,7 +126,7 @@ namespace skyline::gpu {
} }
} else { } else {
if (srcBuffer->directGpuWritesActive) { if (srcBuffer->directGpuWritesActive) {
newBuffer->MarkGpuDirty(); newBuffer->MarkGpuDirtyImpl();
} else if (srcBuffer->directTrackedShadowActive) { } else if (srcBuffer->directTrackedShadowActive) {
newBuffer->EnableTrackedShadowDirect(); newBuffer->EnableTrackedShadowDirect();
copyBuffer(*newBuffer->guest, *srcBuffer->guest, newBuffer->directTrackedShadow.data(), srcBuffer->directTrackedShadow.data()); copyBuffer(*newBuffer->guest, *srcBuffer->guest, newBuffer->directTrackedShadow.data(), srcBuffer->directTrackedShadow.data());

View File

@ -562,6 +562,7 @@ namespace skyline::gpu::interconnect {
attachedBuffers.clear(); attachedBuffers.clear();
allocator->Reset(); allocator->Reset();
renderPassIndex = 0; renderPassIndex = 0;
usageTracker.sequencedIntervals.Clear();
// Periodically clear preserve attachments just in case there are new waiters which would otherwise end up waiting forever // Periodically clear preserve attachments just in case there are new waiters which would otherwise end up waiting forever
if ((submissionNumber % (2U << *state.settings->executorSlotCountScale)) == 0) { if ((submissionNumber % (2U << *state.settings->executorSlotCountScale)) == 0) {
@ -586,7 +587,6 @@ namespace skyline::gpu::interconnect {
SubmitInternal(); SubmitInternal();
submissionNumber++; submissionNumber++;
} else { } else {
if (callback && *state.settings->useDirectMemoryImport) if (callback && *state.settings->useDirectMemoryImport)
waiterThread.Queue(nullptr, std::move(callback)); waiterThread.Queue(nullptr, std::move(callback));
@ -598,6 +598,8 @@ namespace skyline::gpu::interconnect {
ResetInternal(); ResetInternal();
if (wait) { if (wait) {
usageTracker.dirtyIntervals.Clear();
std::condition_variable cv; std::condition_variable cv;
std::mutex mutex; std::mutex mutex;
bool gpuDone{}; bool gpuDone{};

View File

@ -6,6 +6,7 @@
#include <boost/container/stable_vector.hpp> #include <boost/container/stable_vector.hpp>
#include <renderdoc_app.h> #include <renderdoc_app.h>
#include <common/linear_allocator.h> #include <common/linear_allocator.h>
#include <gpu/usage_tracker.h>
#include <gpu/megabuffer.h> #include <gpu/megabuffer.h>
#include "command_nodes.h" #include "command_nodes.h"
#include "common/spin_lock.h" #include "common/spin_lock.h"
@ -217,6 +218,7 @@ namespace skyline::gpu::interconnect {
size_t submissionNumber{}; size_t submissionNumber{};
ContextTag executionTag{}; ContextTag executionTag{};
bool captureNextExecution{}; bool captureNextExecution{};
UsageTracker usageTracker;
CommandExecutor(const DeviceState &state); CommandExecutor(const DeviceState &state);

View File

@ -62,7 +62,7 @@ namespace skyline::gpu::interconnect {
dstStageMask |= dstStage; dstStageMask |= dstStage;
} }
view.GetBuffer()->MarkGpuDirty(); view.GetBuffer()->MarkGpuDirty(ctx.executor.usageTracker);
} else { } else {
if (auto megaBufferBinding{view.TryMegaBuffer(ctx.executor.cycle, ctx.gpu.megaBufferAllocator, ctx.executor.executionTag)}) if (auto megaBufferBinding{view.TryMegaBuffer(ctx.executor.cycle, ctx.gpu.megaBufferAllocator, ctx.executor.executionTag)})
return megaBufferBinding; return megaBufferBinding;

View File

@ -53,13 +53,13 @@ namespace skyline::gpu::interconnect {
mirrorBlock = blockMapping; mirrorBlock = blockMapping;
} }
if (entry->trapCount > MirrorEntry::SkipTrapThreshold && entry->channelSequenceNumber != ctx.channelCtx.channelSequenceNumber) { if (entry->trapCount > MirrorEntry::SkipTrapThreshold && entry->executionTag != ctx.executor.executionTag) {
entry->channelSequenceNumber = ctx.channelCtx.channelSequenceNumber; entry->executionTag = ctx.executor.executionTag;
entry->dirty = true; entry->dirty = true;
} }
// If the mirror entry has been written to, clear its shader binary cache and retrap to catch any future writes // If the mirror entry has been written to, clear its shader binary cache and retrap to catch any future writes
if (entry->dirty) { if (entry->dirty || ctx.executor.usageTracker.sequencedIntervals.Intersect(blockMapping.subspan(blockOffset))) {
entry->cache.clear(); entry->cache.clear();
entry->dirty = false; entry->dirty = false;
@ -129,7 +129,7 @@ namespace skyline::gpu::interconnect {
if (programBase != lastProgramBase || programOffset != lastProgramOffset) if (programBase != lastProgramBase || programOffset != lastProgramOffset)
return true; return true;
if (entry && entry->trapCount > MirrorEntry::SkipTrapThreshold && entry->channelSequenceNumber != ctx.channelCtx.channelSequenceNumber) if (entry && entry->trapCount > MirrorEntry::SkipTrapThreshold && entry->executionTag != ctx.executor.executionTag)
return true; return true;
else if (entry && entry->dirty) else if (entry && entry->dirty)
return true; return true;

View File

@ -22,7 +22,7 @@ namespace skyline::gpu::interconnect {
static constexpr u32 SkipTrapThreshold{20}; //!< Threshold for the number of times a mirror trap needs to be hit before we fallback to always hashing static constexpr u32 SkipTrapThreshold{20}; //!< Threshold for the number of times a mirror trap needs to be hit before we fallback to always hashing
u32 trapCount{}; //!< The number of times the trap has been hit, used to avoid trapping in cases where the constant retraps would harm performance u32 trapCount{}; //!< The number of times the trap has been hit, used to avoid trapping in cases where the constant retraps would harm performance
size_t channelSequenceNumber{}; //!< For the case where `trapCount > SkipTrapThreshold`, the memory sequence number number used to clear the cache after every access ContextTag executionTag{}; //!< For the case where `trapCount > SkipTrapThreshold`, the memory sequence number number used to clear the cache after every access
bool dirty{}; //!< If the trap has been hit and the cache needs to be cleared bool dirty{}; //!< If the trap has been hit and the cache needs to be cleared
MirrorEntry(span<u8> alignedMirror) : mirror{alignedMirror} {} MirrorEntry(span<u8> alignedMirror) : mirror{alignedMirror} {}

View File

@ -123,6 +123,7 @@ namespace skyline::gpu::interconnect {
auto dstTextureView{gpu.texture.FindOrCreate(dstGuestTexture, executor.tag)}; auto dstTextureView{gpu.texture.FindOrCreate(dstGuestTexture, executor.tag)};
executor.AttachDependency(dstTextureView); executor.AttachDependency(dstTextureView);
executor.AttachTexture(dstTextureView.get()); executor.AttachTexture(dstTextureView.get());
dstTextureView->texture->MarkGpuDirty(executor.usageTracker);
// Blit shader always samples from centre so adjust if necessary // Blit shader always samples from centre so adjust if necessary
float centredSrcRectX{sampleOrigin == SampleModeOrigin::Corner ? srcRectX - 0.5f : srcRectX}; float centredSrcRectX{sampleOrigin == SampleModeOrigin::Corner ? srcRectX - 0.5f : srcRectX};

View File

@ -22,7 +22,7 @@ namespace skyline::gpu::interconnect {
ContextLock dstBufLock{executor.tag, dstBuf}; ContextLock dstBufLock{executor.tag, dstBuf};
dstBuf.Write(src, 0, [&]() { dstBuf.Write(src, 0, executor.usageTracker, [&]() {
executor.AttachLockedBufferView(dstBuf, std::move(dstBufLock)); executor.AttachLockedBufferView(dstBuf, std::move(dstBufLock));
// This will prevent any CPU accesses to backing for the duration of the usage // This will prevent any CPU accesses to backing for the duration of the usage
dstBuf.GetBuffer()->BlockAllCpuBackingWrites(); dstBuf.GetBuffer()->BlockAllCpuBackingWrites();

View File

@ -206,7 +206,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
dstStageMask |= vk::PipelineStageFlagBits::eTransformFeedbackEXT; dstStageMask |= vk::PipelineStageFlagBits::eTransformFeedbackEXT;
} }
view->GetBuffer()->MarkGpuDirty(); view->GetBuffer()->MarkGpuDirty(ctx.executor.usageTracker);
builder.SetTransformFeedbackBuffer(index, *view); builder.SetTransformFeedbackBuffer(index, *view);
return; return;
} else { } else {

View File

@ -46,7 +46,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
ContextLock lock{ctx.executor.tag, view}; ContextLock lock{ctx.executor.tag, view};
// First attempt the write without setting up the gpu copy callback as a fast path // First attempt the write without setting up the gpu copy callback as a fast path
if (view.Write(srcCpuBuf, offset)) [[unlikely]] { if (view.Write(srcCpuBuf, offset, ctx.executor.usageTracker)) [[unlikely]] {
// Store callback data in a stack allocated struct to avoid heap allocation for the gpu copy callback lambda // Store callback data in a stack allocated struct to avoid heap allocation for the gpu copy callback lambda
struct GpuCopyCallbackData { struct GpuCopyCallbackData {
InterconnectContext &ctx; InterconnectContext &ctx;
@ -56,7 +56,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
BufferView &view; BufferView &view;
} callbackData{ctx, srcCpuBuf, offset, lock, view}; } callbackData{ctx, srcCpuBuf, offset, lock, view};
view.Write(srcCpuBuf, offset, [&callbackData]() { view.Write(srcCpuBuf, offset, ctx.executor.usageTracker, [&callbackData]() {
callbackData.ctx.executor.AttachLockedBufferView(callbackData.view, std::move(callbackData.lock)); callbackData.ctx.executor.AttachLockedBufferView(callbackData.view, std::move(callbackData.lock));
// This will prevent any CPU accesses to backing for the duration of the usage // This will prevent any CPU accesses to backing for the duration of the usage
callbackData.view.GetBuffer()->BlockAllCpuBackingWrites(); callbackData.view.GetBuffer()->BlockAllCpuBackingWrites();

View File

@ -24,7 +24,7 @@ namespace skyline::gpu::interconnect {
})}; })};
ContextLock dstBufLock{executor.tag, dstBuf}; ContextLock dstBufLock{executor.tag, dstBuf};
dstBuf.CopyFrom(srcBuf, [&]() { dstBuf.CopyFrom(srcBuf, executor.usageTracker, [&]() {
executor.AttachLockedBufferView(srcBuf, std::move(srcBufLock)); executor.AttachLockedBufferView(srcBuf, std::move(srcBufLock));
executor.AttachLockedBufferView(dstBuf, std::move(dstBufLock)); executor.AttachLockedBufferView(dstBuf, std::move(dstBufLock));
// This will prevent any CPU accesses to backing for the duration of the usage // This will prevent any CPU accesses to backing for the duration of the usage

View File

@ -725,6 +725,12 @@ namespace skyline::gpu {
} }
} }
void Texture::MarkGpuDirty(UsageTracker &usageTracker) {
for (auto mapping : guest->mappings)
if (mapping.valid())
usageTracker.dirtyIntervals.Insert(mapping);
}
void Texture::SynchronizeHost(bool gpuDirty) { void Texture::SynchronizeHost(bool gpuDirty) {
if (!guest) if (!guest)
return; return;

View File

@ -10,6 +10,7 @@
#include <nce.h> #include <nce.h>
#include <gpu/tag_allocator.h> #include <gpu/tag_allocator.h>
#include <gpu/memory_manager.h> #include <gpu/memory_manager.h>
#include <gpu/usage_tracker.h>
namespace skyline::gpu { namespace skyline::gpu {
namespace texture { namespace texture {
@ -560,6 +561,11 @@ namespace skyline::gpu {
*/ */
void TransitionLayout(vk::ImageLayout layout); void TransitionLayout(vk::ImageLayout layout);
/**
* @brief Marks the texture as being GPU dirty
*/
void MarkGpuDirty(UsageTracker &usageTracker);
/** /**
* @brief Synchronizes the host texture with the guest after it has been modified * @brief Synchronizes the host texture with the guest after it has been modified
* @param gpuDirty If true, the texture will be transitioned to being GpuDirty by this call * @param gpuDirty If true, the texture will be transitioned to being GpuDirty by this call

View File

@ -0,0 +1,16 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include <common/interval_list.h>
namespace skyline::gpu {
/**
* @brief Tracks the usage of GPU memory and buffers to allow for fine-grained flushing
*/
struct UsageTracker {
IntervalList<u8 *> dirtyIntervals; //!< Intervals of GPU-dirty contents that requires a flush before accessing
IntervalList<u8 *> sequencedIntervals; //!< Intervals of GPFIFO-sequenced writes that occur within an execution
};
}