mirror of
https://github.com/skyline-emu/skyline.git
synced 2024-11-29 11:44:16 +01:00
Introduce usage tracker for dirty tracking within an execution
This is neccessary as e.g. shaders can be updated through a mirror and never hit modification traps. By tracking which addresses have sequenced writes applied, the shader manager can then correctly detect if a given shader has been modified by the GPU.
This commit is contained in:
parent
f64860c93e
commit
090151f0c3
@ -194,13 +194,15 @@ namespace skyline::gpu {
|
|||||||
return isDirect ? ValidateMegaBufferViewImplDirect(size) : ValidateMegaBufferViewImplStaged(size);
|
return isDirect ? ValidateMegaBufferViewImplDirect(size) : ValidateMegaBufferViewImplStaged(size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Buffer::CopyFromImplDirect(vk::DeviceSize dstOffset, Buffer *src, vk::DeviceSize srcOffset, vk::DeviceSize size, const std::function<void()> &gpuCopyCallback) {
|
void Buffer::CopyFromImplDirect(vk::DeviceSize dstOffset,
|
||||||
|
Buffer *src, vk::DeviceSize srcOffset, vk::DeviceSize size,
|
||||||
|
UsageTracker &usageTracker, const std::function<void()> &gpuCopyCallback) {
|
||||||
everHadInlineUpdate = true;
|
everHadInlineUpdate = true;
|
||||||
bool needsGpuTracking{src->RefreshGpuWritesActiveDirect() || RefreshGpuWritesActiveDirect()};
|
bool needsGpuTracking{src->RefreshGpuWritesActiveDirect() || RefreshGpuWritesActiveDirect()};
|
||||||
bool needsCpuTracking{RefreshGpuReadsActiveDirect() && !needsGpuTracking};
|
bool needsCpuTracking{RefreshGpuReadsActiveDirect() && !needsGpuTracking};
|
||||||
if (needsGpuTracking || needsCpuTracking) {
|
if (needsGpuTracking || needsCpuTracking) {
|
||||||
if (needsGpuTracking) // Force buffer to be dirty for this cycle if either of the sources are dirty, this is needed as otherwise it could have just been dirty from the previous cycle
|
if (needsGpuTracking) // Force buffer to be dirty for this cycle if either of the sources are dirty, this is needed as otherwise it could have just been dirty from the previous cycle
|
||||||
MarkGpuDirty();
|
MarkGpuDirty(usageTracker);
|
||||||
gpuCopyCallback();
|
gpuCopyCallback();
|
||||||
|
|
||||||
if (needsCpuTracking)
|
if (needsCpuTracking)
|
||||||
@ -210,7 +212,9 @@ namespace skyline::gpu {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Buffer::CopyFromImplStaged(vk::DeviceSize dstOffset, Buffer *src, vk::DeviceSize srcOffset, vk::DeviceSize size, const std::function<void()> &gpuCopyCallback) {
|
void Buffer::CopyFromImplStaged(vk::DeviceSize dstOffset,
|
||||||
|
Buffer *src, vk::DeviceSize srcOffset, vk::DeviceSize size,
|
||||||
|
UsageTracker &usageTracker, const std::function<void()> &gpuCopyCallback) {
|
||||||
std::scoped_lock lock{stateMutex, src->stateMutex}; // Fine even if src and dst are same since recursive mutex
|
std::scoped_lock lock{stateMutex, src->stateMutex}; // Fine even if src and dst are same since recursive mutex
|
||||||
|
|
||||||
if (dirtyState == DirtyState::CpuDirty && SequencedCpuBackingWritesBlocked())
|
if (dirtyState == DirtyState::CpuDirty && SequencedCpuBackingWritesBlocked())
|
||||||
@ -230,18 +234,19 @@ namespace skyline::gpu {
|
|||||||
else
|
else
|
||||||
gpuCopyCallback();
|
gpuCopyCallback();
|
||||||
} else {
|
} else {
|
||||||
MarkGpuDirty();
|
MarkGpuDirty(usageTracker);
|
||||||
gpuCopyCallback();
|
gpuCopyCallback();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Buffer::WriteImplDirect(span<u8> data, vk::DeviceSize offset, const std::function<void()> &gpuCopyCallback) {
|
bool Buffer::WriteImplDirect(span<u8> data, vk::DeviceSize offset,
|
||||||
|
UsageTracker &usageTracker, const std::function<void()> &gpuCopyCallback) {
|
||||||
// If the buffer is GPU dirty do the write on the GPU and we're done
|
// If the buffer is GPU dirty do the write on the GPU and we're done
|
||||||
if (RefreshGpuWritesActiveDirect()) {
|
if (RefreshGpuWritesActiveDirect()) {
|
||||||
if (gpuCopyCallback) {
|
if (gpuCopyCallback) {
|
||||||
// Propagate dirtiness to the current cycle, since if this is only dirty in a previous cycle that could change at any time and we would need to have the write saved somewhere for CPU reads
|
// Propagate dirtiness to the current cycle, since if this is only dirty in a previous cycle that could change at any time and we would need to have the write saved somewhere for CPU reads
|
||||||
// By propagating the dirtiness to the current cycle we can avoid this and force a wait on any reads
|
// By propagating the dirtiness to the current cycle we can avoid this and force a wait on any reads
|
||||||
MarkGpuDirty();
|
MarkGpuDirty(usageTracker);
|
||||||
gpuCopyCallback();
|
gpuCopyCallback();
|
||||||
return false;
|
return false;
|
||||||
} else {
|
} else {
|
||||||
@ -349,6 +354,15 @@ namespace skyline::gpu {
|
|||||||
AdvanceSequence(); // The GPU will modify buffer contents so advance to the next sequence
|
AdvanceSequence(); // The GPU will modify buffer contents so advance to the next sequence
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Buffer::MarkGpuDirtyImpl() {
|
||||||
|
currentExecutionGpuDirty = true;
|
||||||
|
|
||||||
|
if (isDirect)
|
||||||
|
MarkGpuDirtyImplDirect();
|
||||||
|
else
|
||||||
|
MarkGpuDirtyImplStaged();
|
||||||
|
}
|
||||||
|
|
||||||
Buffer::Buffer(LinearAllocatorState<> &delegateAllocator, GPU &gpu, GuestBuffer guest, size_t id, bool direct)
|
Buffer::Buffer(LinearAllocatorState<> &delegateAllocator, GPU &gpu, GuestBuffer guest, size_t id, bool direct)
|
||||||
: gpu{gpu},
|
: gpu{gpu},
|
||||||
guest{guest},
|
guest{guest},
|
||||||
@ -382,16 +396,12 @@ namespace skyline::gpu {
|
|||||||
WaitOnFence();
|
WaitOnFence();
|
||||||
}
|
}
|
||||||
|
|
||||||
void Buffer::MarkGpuDirty() {
|
void Buffer::MarkGpuDirty(UsageTracker &usageTracker) {
|
||||||
if (!guest)
|
if (!guest)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
currentExecutionGpuDirty = true;
|
usageTracker.dirtyIntervals.Insert(*guest);
|
||||||
|
MarkGpuDirtyImpl();
|
||||||
if (isDirect)
|
|
||||||
MarkGpuDirtyImplDirect();
|
|
||||||
else
|
|
||||||
MarkGpuDirtyImplStaged();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Buffer::WaitOnFence() {
|
void Buffer::WaitOnFence() {
|
||||||
@ -493,24 +503,30 @@ namespace skyline::gpu {
|
|||||||
ReadImplStaged(isFirstUsage, flushHostCallback, data, offset);
|
ReadImplStaged(isFirstUsage, flushHostCallback, data, offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Buffer::Write(span<u8> data, vk::DeviceSize offset, const std::function<void()> &gpuCopyCallback) {
|
bool Buffer::Write(span<u8> data, vk::DeviceSize offset, UsageTracker &usageTracker, const std::function<void()> &gpuCopyCallback) {
|
||||||
AdvanceSequence(); // We are modifying GPU backing contents so advance to the next sequence
|
AdvanceSequence(); // We are modifying GPU backing contents so advance to the next sequence
|
||||||
everHadInlineUpdate = true;
|
everHadInlineUpdate = true;
|
||||||
|
|
||||||
|
usageTracker.sequencedIntervals.Insert(*guest);
|
||||||
|
|
||||||
if (isDirect)
|
if (isDirect)
|
||||||
return WriteImplDirect(data, offset, gpuCopyCallback);
|
return WriteImplDirect(data, offset, usageTracker, gpuCopyCallback);
|
||||||
else
|
else
|
||||||
return WriteImplStaged(data, offset, gpuCopyCallback);
|
return WriteImplStaged(data, offset, gpuCopyCallback);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Buffer::CopyFrom(vk::DeviceSize dstOffset, Buffer *src, vk::DeviceSize srcOffset, vk::DeviceSize size, const std::function<void()> &gpuCopyCallback) {
|
void Buffer::CopyFrom(vk::DeviceSize dstOffset,
|
||||||
|
Buffer *src, vk::DeviceSize srcOffset, vk::DeviceSize size,
|
||||||
|
UsageTracker &usageTracker, const std::function<void()> &gpuCopyCallback) {
|
||||||
AdvanceSequence(); // We are modifying GPU backing contents so advance to the next sequence
|
AdvanceSequence(); // We are modifying GPU backing contents so advance to the next sequence
|
||||||
everHadInlineUpdate = true;
|
everHadInlineUpdate = true;
|
||||||
|
|
||||||
|
usageTracker.sequencedIntervals.Insert(*guest);
|
||||||
|
|
||||||
if (isDirect)
|
if (isDirect)
|
||||||
CopyFromImplDirect(dstOffset, src, srcOffset, size, gpuCopyCallback);
|
CopyFromImplDirect(dstOffset, src, srcOffset, size, usageTracker, gpuCopyCallback);
|
||||||
else
|
else
|
||||||
CopyFromImplStaged(dstOffset, src, srcOffset, size, gpuCopyCallback);
|
CopyFromImplStaged(dstOffset, src, srcOffset, size, usageTracker, gpuCopyCallback);
|
||||||
}
|
}
|
||||||
|
|
||||||
BufferView Buffer::GetView(vk::DeviceSize offset, vk::DeviceSize size) {
|
BufferView Buffer::GetView(vk::DeviceSize offset, vk::DeviceSize size) {
|
||||||
@ -676,8 +692,8 @@ namespace skyline::gpu {
|
|||||||
GetBuffer()->Read(isFirstUsage, flushHostCallback, data, readOffset + GetOffset());
|
GetBuffer()->Read(isFirstUsage, flushHostCallback, data, readOffset + GetOffset());
|
||||||
}
|
}
|
||||||
|
|
||||||
bool BufferView::Write(span<u8> data, vk::DeviceSize writeOffset, const std::function<void()> &gpuCopyCallback) const {
|
bool BufferView::Write(span<u8> data, vk::DeviceSize writeOffset, UsageTracker &usageTracker, const std::function<void()> &gpuCopyCallback) const {
|
||||||
return GetBuffer()->Write(data, writeOffset + GetOffset(), gpuCopyCallback);
|
return GetBuffer()->Write(data, writeOffset + GetOffset(), usageTracker, gpuCopyCallback);
|
||||||
}
|
}
|
||||||
|
|
||||||
BufferBinding BufferView::TryMegaBuffer(const std::shared_ptr<FenceCycle> &pCycle, MegaBufferAllocator &allocator, ContextTag executionTag, size_t sizeOverride) const {
|
BufferBinding BufferView::TryMegaBuffer(const std::shared_ptr<FenceCycle> &pCycle, MegaBufferAllocator &allocator, ContextTag executionTag, size_t sizeOverride) const {
|
||||||
@ -689,9 +705,9 @@ namespace skyline::gpu {
|
|||||||
return backing.subspan(GetOffset(), size);
|
return backing.subspan(GetOffset(), size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void BufferView::CopyFrom(BufferView src, const std::function<void()> &gpuCopyCallback) {
|
void BufferView::CopyFrom(BufferView src, UsageTracker &usageTracker, const std::function<void()> &gpuCopyCallback) {
|
||||||
if (src.size != size)
|
if (src.size != size)
|
||||||
throw exception("Copy size mismatch!");
|
throw exception("Copy size mismatch!");
|
||||||
return GetBuffer()->CopyFrom(GetOffset(), src.GetBuffer(), src.GetOffset(), size, gpuCopyCallback);
|
return GetBuffer()->CopyFrom(GetOffset(), src.GetBuffer(), src.GetOffset(), size, usageTracker, gpuCopyCallback);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -8,6 +8,7 @@
|
|||||||
#include <common/spin_lock.h>
|
#include <common/spin_lock.h>
|
||||||
#include <nce.h>
|
#include <nce.h>
|
||||||
#include <gpu/tag_allocator.h>
|
#include <gpu/tag_allocator.h>
|
||||||
|
#include "usage_tracker.h"
|
||||||
#include "megabuffer.h"
|
#include "megabuffer.h"
|
||||||
#include "memory_manager.h"
|
#include "memory_manager.h"
|
||||||
|
|
||||||
@ -146,11 +147,16 @@ namespace skyline::gpu {
|
|||||||
*/
|
*/
|
||||||
bool ValidateMegaBufferView(vk::DeviceSize size);
|
bool ValidateMegaBufferView(vk::DeviceSize size);
|
||||||
|
|
||||||
void CopyFromImplDirect(vk::DeviceSize dstOffset, Buffer *src, vk::DeviceSize srcOffset, vk::DeviceSize size, const std::function<void()> &gpuCopyCallback);
|
void CopyFromImplDirect(vk::DeviceSize dstOffset,
|
||||||
|
Buffer *src, vk::DeviceSize srcOffset, vk::DeviceSize size,
|
||||||
|
UsageTracker &usageTracker, const std::function<void()> &gpuCopyCallback);
|
||||||
|
|
||||||
void CopyFromImplStaged(vk::DeviceSize dstOffset, Buffer *src, vk::DeviceSize srcOffset, vk::DeviceSize size, const std::function<void()> &gpuCopyCallback);
|
void CopyFromImplStaged(vk::DeviceSize dstOffset,
|
||||||
|
Buffer *src, vk::DeviceSize srcOffset, vk::DeviceSize size,
|
||||||
|
UsageTracker &usageTracker, const std::function<void()> &gpuCopyCallback);
|
||||||
|
|
||||||
bool WriteImplDirect(span<u8> data, vk::DeviceSize offset, const std::function<void()> &gpuCopyCallback = {});
|
bool WriteImplDirect(span<u8> data, vk::DeviceSize offset,
|
||||||
|
UsageTracker &usageTracker, const std::function<void()> &gpuCopyCallback = {});
|
||||||
|
|
||||||
bool WriteImplStaged(span<u8> data, vk::DeviceSize offset, const std::function<void()> &gpuCopyCallback = {});
|
bool WriteImplStaged(span<u8> data, vk::DeviceSize offset, const std::function<void()> &gpuCopyCallback = {});
|
||||||
|
|
||||||
@ -162,6 +168,8 @@ namespace skyline::gpu {
|
|||||||
|
|
||||||
void MarkGpuDirtyImplStaged();
|
void MarkGpuDirtyImplStaged();
|
||||||
|
|
||||||
|
void MarkGpuDirtyImpl();
|
||||||
|
|
||||||
public:
|
public:
|
||||||
void UpdateCycle(const std::shared_ptr<FenceCycle> &newCycle) {
|
void UpdateCycle(const std::shared_ptr<FenceCycle> &newCycle) {
|
||||||
newCycle->ChainCycle(cycle);
|
newCycle->ChainCycle(cycle);
|
||||||
@ -227,7 +235,7 @@ namespace skyline::gpu {
|
|||||||
* @note This **must** be called after syncing the buffer to the GPU not before
|
* @note This **must** be called after syncing the buffer to the GPU not before
|
||||||
* @note The buffer **must** be locked prior to calling this
|
* @note The buffer **must** be locked prior to calling this
|
||||||
*/
|
*/
|
||||||
void MarkGpuDirty();
|
void MarkGpuDirty(UsageTracker &usageTracker);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Prevents sequenced writes to this buffer's backing from occuring on the CPU, forcing sequencing on the GPU instead for the duration of the context. Unsequenced writes such as those from the guest can still occur however.
|
* @brief Prevents sequenced writes to this buffer's backing from occuring on the CPU, forcing sequencing on the GPU instead for the duration of the context. Unsequenced writes such as those from the guest can still occur however.
|
||||||
@ -365,13 +373,15 @@ namespace skyline::gpu {
|
|||||||
* @param gpuCopyCallback Optional callback to perform a GPU-side copy for this Write if necessary, if such a copy is needed and this is not supplied `true` will be returned to indicate that the write needs to be repeated with the callback present
|
* @param gpuCopyCallback Optional callback to perform a GPU-side copy for this Write if necessary, if such a copy is needed and this is not supplied `true` will be returned to indicate that the write needs to be repeated with the callback present
|
||||||
* @return Whether the write needs to be repeated with `gpuCopyCallback` provided, always false if `gpuCopyCallback` is provided
|
* @return Whether the write needs to be repeated with `gpuCopyCallback` provided, always false if `gpuCopyCallback` is provided
|
||||||
*/
|
*/
|
||||||
bool Write(span<u8> data, vk::DeviceSize offset, const std::function<void()> &gpuCopyCallback = {});
|
bool Write(span<u8> data, vk::DeviceSize offset, UsageTracker &usageTracker, const std::function<void()> &gpuCopyCallback = {});
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Copies a region of the src buffer into a region of this buffer
|
* @brief Copies a region of the src buffer into a region of this buffer
|
||||||
* @note The src/dst buffers **must** be locked prior to calling this
|
* @note The src/dst buffers **must** be locked prior to calling this
|
||||||
*/
|
*/
|
||||||
void CopyFrom(vk::DeviceSize dstOffset, Buffer *src, vk::DeviceSize srcOffset, vk::DeviceSize size, const std::function<void()> &gpuCopyCallback);
|
void CopyFrom(vk::DeviceSize dstOffset,
|
||||||
|
Buffer *src, vk::DeviceSize srcOffset, vk::DeviceSize size,
|
||||||
|
UsageTracker &usageTracker, const std::function<void()> &gpuCopyCallback);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return A view into this buffer with the supplied attributes
|
* @return A view into this buffer with the supplied attributes
|
||||||
@ -528,7 +538,7 @@ namespace skyline::gpu {
|
|||||||
* @note The view **must** be locked prior to calling this
|
* @note The view **must** be locked prior to calling this
|
||||||
* @note See Buffer::Write
|
* @note See Buffer::Write
|
||||||
*/
|
*/
|
||||||
bool Write(span<u8> data, vk::DeviceSize writeOffset, const std::function<void()> &gpuCopyCallback = {}) const;
|
bool Write(span<u8> data, vk::DeviceSize writeOffset, UsageTracker &usageTracker, const std::function<void()> &gpuCopyCallback = {}) const;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* @brief If megabuffering is determined to be beneficial for the underlying buffer, allocates and copies this view into the megabuffer (in case of cache miss), returning a binding of the allocated megabuffer region
|
* @brief If megabuffering is determined to be beneficial for the underlying buffer, allocates and copies this view into the megabuffer (in case of cache miss), returning a binding of the allocated megabuffer region
|
||||||
@ -550,7 +560,7 @@ namespace skyline::gpu {
|
|||||||
* @brief Copies the contents of one view into this one
|
* @brief Copies the contents of one view into this one
|
||||||
* @note The src/dst views **must** be locked prior to calling this
|
* @note The src/dst views **must** be locked prior to calling this
|
||||||
*/
|
*/
|
||||||
void CopyFrom(BufferView src, const std::function<void()> &gpuCopyCallback);
|
void CopyFrom(BufferView src, UsageTracker &usageTracker, const std::function<void()> &gpuCopyCallback);
|
||||||
|
|
||||||
constexpr operator bool() {
|
constexpr operator bool() {
|
||||||
return delegate != nullptr;
|
return delegate != nullptr;
|
||||||
|
@ -113,7 +113,7 @@ namespace skyline::gpu {
|
|||||||
if (srcBuffer.lock.IsFirstUsage() && newBuffer->dirtyState != Buffer::DirtyState::GpuDirty)
|
if (srcBuffer.lock.IsFirstUsage() && newBuffer->dirtyState != Buffer::DirtyState::GpuDirty)
|
||||||
copyBuffer(*newBuffer->guest, *srcBuffer->guest, newBuffer->mirror.data(), srcBuffer->backing->data());
|
copyBuffer(*newBuffer->guest, *srcBuffer->guest, newBuffer->mirror.data(), srcBuffer->backing->data());
|
||||||
else
|
else
|
||||||
newBuffer->MarkGpuDirty();
|
newBuffer->MarkGpuDirtyImpl();
|
||||||
|
|
||||||
// Since we don't synchost source buffers and the source buffers here are GPU dirty their mirrors will be out of date, meaning the backing contents of this source buffer's region in the new buffer from the initial synchost call will be incorrect. By copying backings directly here we can ensure that no writes are lost and that if the newly created buffer needs to turn GPU dirty during recreation no copies need to be done since the backing is as up to date as the mirror at a minimum.
|
// Since we don't synchost source buffers and the source buffers here are GPU dirty their mirrors will be out of date, meaning the backing contents of this source buffer's region in the new buffer from the initial synchost call will be incorrect. By copying backings directly here we can ensure that no writes are lost and that if the newly created buffer needs to turn GPU dirty during recreation no copies need to be done since the backing is as up to date as the mirror at a minimum.
|
||||||
copyBuffer(*newBuffer->guest, *srcBuffer->guest, newBuffer->backing->data(), srcBuffer->backing->data());
|
copyBuffer(*newBuffer->guest, *srcBuffer->guest, newBuffer->backing->data(), srcBuffer->backing->data());
|
||||||
@ -126,7 +126,7 @@ namespace skyline::gpu {
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (srcBuffer->directGpuWritesActive) {
|
if (srcBuffer->directGpuWritesActive) {
|
||||||
newBuffer->MarkGpuDirty();
|
newBuffer->MarkGpuDirtyImpl();
|
||||||
} else if (srcBuffer->directTrackedShadowActive) {
|
} else if (srcBuffer->directTrackedShadowActive) {
|
||||||
newBuffer->EnableTrackedShadowDirect();
|
newBuffer->EnableTrackedShadowDirect();
|
||||||
copyBuffer(*newBuffer->guest, *srcBuffer->guest, newBuffer->directTrackedShadow.data(), srcBuffer->directTrackedShadow.data());
|
copyBuffer(*newBuffer->guest, *srcBuffer->guest, newBuffer->directTrackedShadow.data(), srcBuffer->directTrackedShadow.data());
|
||||||
|
@ -562,6 +562,7 @@ namespace skyline::gpu::interconnect {
|
|||||||
attachedBuffers.clear();
|
attachedBuffers.clear();
|
||||||
allocator->Reset();
|
allocator->Reset();
|
||||||
renderPassIndex = 0;
|
renderPassIndex = 0;
|
||||||
|
usageTracker.sequencedIntervals.Clear();
|
||||||
|
|
||||||
// Periodically clear preserve attachments just in case there are new waiters which would otherwise end up waiting forever
|
// Periodically clear preserve attachments just in case there are new waiters which would otherwise end up waiting forever
|
||||||
if ((submissionNumber % (2U << *state.settings->executorSlotCountScale)) == 0) {
|
if ((submissionNumber % (2U << *state.settings->executorSlotCountScale)) == 0) {
|
||||||
@ -586,7 +587,6 @@ namespace skyline::gpu::interconnect {
|
|||||||
|
|
||||||
SubmitInternal();
|
SubmitInternal();
|
||||||
submissionNumber++;
|
submissionNumber++;
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
if (callback && *state.settings->useDirectMemoryImport)
|
if (callback && *state.settings->useDirectMemoryImport)
|
||||||
waiterThread.Queue(nullptr, std::move(callback));
|
waiterThread.Queue(nullptr, std::move(callback));
|
||||||
@ -598,6 +598,8 @@ namespace skyline::gpu::interconnect {
|
|||||||
ResetInternal();
|
ResetInternal();
|
||||||
|
|
||||||
if (wait) {
|
if (wait) {
|
||||||
|
usageTracker.dirtyIntervals.Clear();
|
||||||
|
|
||||||
std::condition_variable cv;
|
std::condition_variable cv;
|
||||||
std::mutex mutex;
|
std::mutex mutex;
|
||||||
bool gpuDone{};
|
bool gpuDone{};
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
#include <boost/container/stable_vector.hpp>
|
#include <boost/container/stable_vector.hpp>
|
||||||
#include <renderdoc_app.h>
|
#include <renderdoc_app.h>
|
||||||
#include <common/linear_allocator.h>
|
#include <common/linear_allocator.h>
|
||||||
|
#include <gpu/usage_tracker.h>
|
||||||
#include <gpu/megabuffer.h>
|
#include <gpu/megabuffer.h>
|
||||||
#include "command_nodes.h"
|
#include "command_nodes.h"
|
||||||
#include "common/spin_lock.h"
|
#include "common/spin_lock.h"
|
||||||
@ -217,6 +218,7 @@ namespace skyline::gpu::interconnect {
|
|||||||
size_t submissionNumber{};
|
size_t submissionNumber{};
|
||||||
ContextTag executionTag{};
|
ContextTag executionTag{};
|
||||||
bool captureNextExecution{};
|
bool captureNextExecution{};
|
||||||
|
UsageTracker usageTracker;
|
||||||
|
|
||||||
CommandExecutor(const DeviceState &state);
|
CommandExecutor(const DeviceState &state);
|
||||||
|
|
||||||
|
@ -62,7 +62,7 @@ namespace skyline::gpu::interconnect {
|
|||||||
dstStageMask |= dstStage;
|
dstStageMask |= dstStage;
|
||||||
}
|
}
|
||||||
|
|
||||||
view.GetBuffer()->MarkGpuDirty();
|
view.GetBuffer()->MarkGpuDirty(ctx.executor.usageTracker);
|
||||||
} else {
|
} else {
|
||||||
if (auto megaBufferBinding{view.TryMegaBuffer(ctx.executor.cycle, ctx.gpu.megaBufferAllocator, ctx.executor.executionTag)})
|
if (auto megaBufferBinding{view.TryMegaBuffer(ctx.executor.cycle, ctx.gpu.megaBufferAllocator, ctx.executor.executionTag)})
|
||||||
return megaBufferBinding;
|
return megaBufferBinding;
|
||||||
|
@ -53,13 +53,13 @@ namespace skyline::gpu::interconnect {
|
|||||||
mirrorBlock = blockMapping;
|
mirrorBlock = blockMapping;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (entry->trapCount > MirrorEntry::SkipTrapThreshold && entry->channelSequenceNumber != ctx.channelCtx.channelSequenceNumber) {
|
if (entry->trapCount > MirrorEntry::SkipTrapThreshold && entry->executionTag != ctx.executor.executionTag) {
|
||||||
entry->channelSequenceNumber = ctx.channelCtx.channelSequenceNumber;
|
entry->executionTag = ctx.executor.executionTag;
|
||||||
entry->dirty = true;
|
entry->dirty = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If the mirror entry has been written to, clear its shader binary cache and retrap to catch any future writes
|
// If the mirror entry has been written to, clear its shader binary cache and retrap to catch any future writes
|
||||||
if (entry->dirty) {
|
if (entry->dirty || ctx.executor.usageTracker.sequencedIntervals.Intersect(blockMapping.subspan(blockOffset))) {
|
||||||
entry->cache.clear();
|
entry->cache.clear();
|
||||||
entry->dirty = false;
|
entry->dirty = false;
|
||||||
|
|
||||||
@ -129,7 +129,7 @@ namespace skyline::gpu::interconnect {
|
|||||||
if (programBase != lastProgramBase || programOffset != lastProgramOffset)
|
if (programBase != lastProgramBase || programOffset != lastProgramOffset)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
if (entry && entry->trapCount > MirrorEntry::SkipTrapThreshold && entry->channelSequenceNumber != ctx.channelCtx.channelSequenceNumber)
|
if (entry && entry->trapCount > MirrorEntry::SkipTrapThreshold && entry->executionTag != ctx.executor.executionTag)
|
||||||
return true;
|
return true;
|
||||||
else if (entry && entry->dirty)
|
else if (entry && entry->dirty)
|
||||||
return true;
|
return true;
|
||||||
|
@ -22,7 +22,7 @@ namespace skyline::gpu::interconnect {
|
|||||||
|
|
||||||
static constexpr u32 SkipTrapThreshold{20}; //!< Threshold for the number of times a mirror trap needs to be hit before we fallback to always hashing
|
static constexpr u32 SkipTrapThreshold{20}; //!< Threshold for the number of times a mirror trap needs to be hit before we fallback to always hashing
|
||||||
u32 trapCount{}; //!< The number of times the trap has been hit, used to avoid trapping in cases where the constant retraps would harm performance
|
u32 trapCount{}; //!< The number of times the trap has been hit, used to avoid trapping in cases where the constant retraps would harm performance
|
||||||
size_t channelSequenceNumber{}; //!< For the case where `trapCount > SkipTrapThreshold`, the memory sequence number number used to clear the cache after every access
|
ContextTag executionTag{}; //!< For the case where `trapCount > SkipTrapThreshold`, the memory sequence number number used to clear the cache after every access
|
||||||
bool dirty{}; //!< If the trap has been hit and the cache needs to be cleared
|
bool dirty{}; //!< If the trap has been hit and the cache needs to be cleared
|
||||||
|
|
||||||
MirrorEntry(span<u8> alignedMirror) : mirror{alignedMirror} {}
|
MirrorEntry(span<u8> alignedMirror) : mirror{alignedMirror} {}
|
||||||
|
@ -123,6 +123,7 @@ namespace skyline::gpu::interconnect {
|
|||||||
auto dstTextureView{gpu.texture.FindOrCreate(dstGuestTexture, executor.tag)};
|
auto dstTextureView{gpu.texture.FindOrCreate(dstGuestTexture, executor.tag)};
|
||||||
executor.AttachDependency(dstTextureView);
|
executor.AttachDependency(dstTextureView);
|
||||||
executor.AttachTexture(dstTextureView.get());
|
executor.AttachTexture(dstTextureView.get());
|
||||||
|
dstTextureView->texture->MarkGpuDirty(executor.usageTracker);
|
||||||
|
|
||||||
// Blit shader always samples from centre so adjust if necessary
|
// Blit shader always samples from centre so adjust if necessary
|
||||||
float centredSrcRectX{sampleOrigin == SampleModeOrigin::Corner ? srcRectX - 0.5f : srcRectX};
|
float centredSrcRectX{sampleOrigin == SampleModeOrigin::Corner ? srcRectX - 0.5f : srcRectX};
|
||||||
|
@ -22,7 +22,7 @@ namespace skyline::gpu::interconnect {
|
|||||||
ContextLock dstBufLock{executor.tag, dstBuf};
|
ContextLock dstBufLock{executor.tag, dstBuf};
|
||||||
|
|
||||||
|
|
||||||
dstBuf.Write(src, 0, [&]() {
|
dstBuf.Write(src, 0, executor.usageTracker, [&]() {
|
||||||
executor.AttachLockedBufferView(dstBuf, std::move(dstBufLock));
|
executor.AttachLockedBufferView(dstBuf, std::move(dstBufLock));
|
||||||
// This will prevent any CPU accesses to backing for the duration of the usage
|
// This will prevent any CPU accesses to backing for the duration of the usage
|
||||||
dstBuf.GetBuffer()->BlockAllCpuBackingWrites();
|
dstBuf.GetBuffer()->BlockAllCpuBackingWrites();
|
||||||
|
@ -206,7 +206,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
|||||||
dstStageMask |= vk::PipelineStageFlagBits::eTransformFeedbackEXT;
|
dstStageMask |= vk::PipelineStageFlagBits::eTransformFeedbackEXT;
|
||||||
}
|
}
|
||||||
|
|
||||||
view->GetBuffer()->MarkGpuDirty();
|
view->GetBuffer()->MarkGpuDirty(ctx.executor.usageTracker);
|
||||||
builder.SetTransformFeedbackBuffer(index, *view);
|
builder.SetTransformFeedbackBuffer(index, *view);
|
||||||
return;
|
return;
|
||||||
} else {
|
} else {
|
||||||
|
@ -46,7 +46,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
|||||||
ContextLock lock{ctx.executor.tag, view};
|
ContextLock lock{ctx.executor.tag, view};
|
||||||
|
|
||||||
// First attempt the write without setting up the gpu copy callback as a fast path
|
// First attempt the write without setting up the gpu copy callback as a fast path
|
||||||
if (view.Write(srcCpuBuf, offset)) [[unlikely]] {
|
if (view.Write(srcCpuBuf, offset, ctx.executor.usageTracker)) [[unlikely]] {
|
||||||
// Store callback data in a stack allocated struct to avoid heap allocation for the gpu copy callback lambda
|
// Store callback data in a stack allocated struct to avoid heap allocation for the gpu copy callback lambda
|
||||||
struct GpuCopyCallbackData {
|
struct GpuCopyCallbackData {
|
||||||
InterconnectContext &ctx;
|
InterconnectContext &ctx;
|
||||||
@ -56,7 +56,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
|||||||
BufferView &view;
|
BufferView &view;
|
||||||
} callbackData{ctx, srcCpuBuf, offset, lock, view};
|
} callbackData{ctx, srcCpuBuf, offset, lock, view};
|
||||||
|
|
||||||
view.Write(srcCpuBuf, offset, [&callbackData]() {
|
view.Write(srcCpuBuf, offset, ctx.executor.usageTracker, [&callbackData]() {
|
||||||
callbackData.ctx.executor.AttachLockedBufferView(callbackData.view, std::move(callbackData.lock));
|
callbackData.ctx.executor.AttachLockedBufferView(callbackData.view, std::move(callbackData.lock));
|
||||||
// This will prevent any CPU accesses to backing for the duration of the usage
|
// This will prevent any CPU accesses to backing for the duration of the usage
|
||||||
callbackData.view.GetBuffer()->BlockAllCpuBackingWrites();
|
callbackData.view.GetBuffer()->BlockAllCpuBackingWrites();
|
||||||
|
@ -24,7 +24,7 @@ namespace skyline::gpu::interconnect {
|
|||||||
})};
|
})};
|
||||||
ContextLock dstBufLock{executor.tag, dstBuf};
|
ContextLock dstBufLock{executor.tag, dstBuf};
|
||||||
|
|
||||||
dstBuf.CopyFrom(srcBuf, [&]() {
|
dstBuf.CopyFrom(srcBuf, executor.usageTracker, [&]() {
|
||||||
executor.AttachLockedBufferView(srcBuf, std::move(srcBufLock));
|
executor.AttachLockedBufferView(srcBuf, std::move(srcBufLock));
|
||||||
executor.AttachLockedBufferView(dstBuf, std::move(dstBufLock));
|
executor.AttachLockedBufferView(dstBuf, std::move(dstBufLock));
|
||||||
// This will prevent any CPU accesses to backing for the duration of the usage
|
// This will prevent any CPU accesses to backing for the duration of the usage
|
||||||
|
@ -725,6 +725,12 @@ namespace skyline::gpu {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Texture::MarkGpuDirty(UsageTracker &usageTracker) {
|
||||||
|
for (auto mapping : guest->mappings)
|
||||||
|
if (mapping.valid())
|
||||||
|
usageTracker.dirtyIntervals.Insert(mapping);
|
||||||
|
}
|
||||||
|
|
||||||
void Texture::SynchronizeHost(bool gpuDirty) {
|
void Texture::SynchronizeHost(bool gpuDirty) {
|
||||||
if (!guest)
|
if (!guest)
|
||||||
return;
|
return;
|
||||||
|
@ -10,6 +10,7 @@
|
|||||||
#include <nce.h>
|
#include <nce.h>
|
||||||
#include <gpu/tag_allocator.h>
|
#include <gpu/tag_allocator.h>
|
||||||
#include <gpu/memory_manager.h>
|
#include <gpu/memory_manager.h>
|
||||||
|
#include <gpu/usage_tracker.h>
|
||||||
|
|
||||||
namespace skyline::gpu {
|
namespace skyline::gpu {
|
||||||
namespace texture {
|
namespace texture {
|
||||||
@ -560,6 +561,11 @@ namespace skyline::gpu {
|
|||||||
*/
|
*/
|
||||||
void TransitionLayout(vk::ImageLayout layout);
|
void TransitionLayout(vk::ImageLayout layout);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Marks the texture as being GPU dirty
|
||||||
|
*/
|
||||||
|
void MarkGpuDirty(UsageTracker &usageTracker);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Synchronizes the host texture with the guest after it has been modified
|
* @brief Synchronizes the host texture with the guest after it has been modified
|
||||||
* @param gpuDirty If true, the texture will be transitioned to being GpuDirty by this call
|
* @param gpuDirty If true, the texture will be transitioned to being GpuDirty by this call
|
||||||
|
16
app/src/main/cpp/skyline/gpu/usage_tracker.h
Normal file
16
app/src/main/cpp/skyline/gpu/usage_tracker.h
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
// SPDX-License-Identifier: MPL-2.0
|
||||||
|
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <common/interval_list.h>
|
||||||
|
|
||||||
|
namespace skyline::gpu {
|
||||||
|
/**
|
||||||
|
* @brief Tracks the usage of GPU memory and buffers to allow for fine-grained flushing
|
||||||
|
*/
|
||||||
|
struct UsageTracker {
|
||||||
|
IntervalList<u8 *> dirtyIntervals; //!< Intervals of GPU-dirty contents that requires a flush before accessing
|
||||||
|
IntervalList<u8 *> sequencedIntervals; //!< Intervals of GPFIFO-sequenced writes that occur within an execution
|
||||||
|
};
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user