Switch execution-numbers to be globally unique tags

This is required for making pipelines usable across channels without introducing caching bugs.
This commit is contained in:
Billy Laws 2022-12-10 15:31:22 +00:00
parent 072b8193a1
commit 937eff392f
12 changed files with 32 additions and 33 deletions

View File

@ -343,7 +343,7 @@ namespace skyline::gpu {
return {};
}
BufferBinding Buffer::TryMegaBufferView(const std::shared_ptr<FenceCycle> &pCycle, MegaBufferAllocator &allocator, u32 executionNumber,
BufferBinding Buffer::TryMegaBufferView(const std::shared_ptr<FenceCycle> &pCycle, MegaBufferAllocator &allocator, ContextTag executionTag,
vk::DeviceSize offset, vk::DeviceSize size) {
if ((!everHadInlineUpdate && sequenceNumber < FrequentlySyncedThreshold) || size >= MegaBufferChunkSize)
// Don't megabuffer buffers that have never had inline updates and are not frequently synced since performance is only going to be harmed as a result of the constant copying and there wont be any benefit since there are no GPU inline updates that would be avoided
@ -355,9 +355,9 @@ namespace skyline::gpu {
return {};
// If the active execution has changed all previous allocations are now invalid
if (executionNumber != lastExecutionNumber) [[unlikely]] {
if (executionTag != lastExecutionTag) [[unlikely]] {
ResetMegabufferState();
lastExecutionNumber = executionNumber;
lastExecutionTag = executionTag;
}
// If more than half the buffer has been megabuffered in chunks within the same execution assume this will generally be the case for this buffer and just megabuffer the whole thing without chunking
@ -427,7 +427,6 @@ namespace skyline::gpu {
void Buffer::unlock() {
tag = ContextTag{};
AllowAllBackingWrites();
lastExecutionNumber = 0;
mutex.unlock();
}
@ -489,8 +488,8 @@ namespace skyline::gpu {
return GetBuffer()->Write(data, writeOffset + GetOffset(), gpuCopyCallback);
}
BufferBinding BufferView::TryMegaBuffer(const std::shared_ptr<FenceCycle> &pCycle, MegaBufferAllocator &allocator, u32 executionNumber, size_t sizeOverride) const {
return GetBuffer()->TryMegaBufferView(pCycle, allocator, executionNumber, GetOffset(), sizeOverride ? sizeOverride : size);
BufferBinding BufferView::TryMegaBuffer(const std::shared_ptr<FenceCycle> &pCycle, MegaBufferAllocator &allocator, ContextTag executionTag, size_t sizeOverride) const {
return GetBuffer()->TryMegaBufferView(pCycle, allocator, executionTag, GetOffset(), sizeOverride ? sizeOverride : size);
}
span<u8> BufferView::GetReadOnlyBackingSpan(bool isFirstUsage, const std::function<void()> &flushHostCallback) {

View File

@ -82,7 +82,7 @@ namespace skyline::gpu {
bool unifiedMegaBufferEnabled{}; //!< If the unified megabuffer is enabled for this buffer and should be used instead of the table
bool everHadInlineUpdate{}; //!< Whether the buffer has ever had an inline update since it was created, if this is set then megabuffering will be attempted by views to avoid the cost of inline GPU updates
u32 lastExecutionNumber{}; //!< The execution number of the last time megabuffer data was updated
ContextTag lastExecutionTag{}; //!< The execution tag of the last time megabuffer data was updated
size_t megaBufferViewAccumulatedSize{};
MegaBufferAllocator::Allocation unifiedMegaBuffer{}; //!< An optional full-size mirror of the buffer in the megabuffer for use when the buffer is frequently updated and *all* of the buffer is frequently used. Replaces all uses of the table when active
@ -317,7 +317,7 @@ namespace skyline::gpu {
* @return A binding to the megabuffer allocation for the view, may be invalid if megabuffering is not beneficial
* @note The buffer **must** be locked prior to calling this
*/
BufferBinding TryMegaBufferView(const std::shared_ptr<FenceCycle> &pCycle, MegaBufferAllocator &allocator, u32 executionNumber,
BufferBinding TryMegaBufferView(const std::shared_ptr<FenceCycle> &pCycle, MegaBufferAllocator &allocator, ContextTag executionTag,
vk::DeviceSize offset, vk::DeviceSize size);
/**
@ -451,7 +451,7 @@ namespace skyline::gpu {
* @note The view **must** be locked prior to calling this
* @note See Buffer::TryMegaBufferView
*/
BufferBinding TryMegaBuffer(const std::shared_ptr<FenceCycle> &pCycle, MegaBufferAllocator &allocator, u32 executionNumber, size_t sizeOverride = 0) const;
BufferBinding TryMegaBuffer(const std::shared_ptr<FenceCycle> &pCycle, MegaBufferAllocator &allocator, ContextTag executionTag, size_t sizeOverride = 0) const;
/**
* @return A span of the backing buffer contents

View File

@ -82,7 +82,7 @@ namespace skyline::gpu::interconnect {
}
void CommandRecordThread::ProcessSlot(Slot *slot) {
TRACE_EVENT_FMT("gpu", "ProcessSlot: 0x{:X}, execution: {}", slot, slot->executionNumber);
TRACE_EVENT_FMT("gpu", "ProcessSlot: 0x{:X}, execution: {}", slot, slot->executionTag);
auto &gpu{*state.gpu};
vk::RenderPass lRenderPass;
@ -207,7 +207,7 @@ namespace skyline::gpu::interconnect {
captureNextExecution = false;
slot = recordThread.AcquireSlot();
cycle = slot->Reset(gpu);
slot->executionNumber = executionNumber;
slot->executionTag = executionTag;
allocator = &slot->allocator;
}
@ -496,7 +496,7 @@ namespace skyline::gpu::interconnect {
for (const auto &callback : flushCallbacks)
callback();
executionNumber++;
executionTag = AllocateTag();
if (!slot->nodes.empty()) {
TRACE_EVENT("gpu", "CommandExecutor::Submit");

View File

@ -39,7 +39,7 @@ namespace skyline::gpu::interconnect {
LinearAllocatorState<> allocator;
std::mutex beginLock;
std::condition_variable beginCondition;
u32 executionNumber;
ContextTag executionTag;
bool ready{}; //!< If this slot's command buffer has had 'beginCommandBuffer' called and is ready to have commands recorded into it
bool capture{}; //!< If this slot's Vulkan commands should be captured using the renderdoc API
bool didWait{}; //!< If a wait of time longer than GrowThresholdNs occured when this slot was acquired
@ -186,7 +186,7 @@ namespace skyline::gpu::interconnect {
LinearAllocatorState<> *allocator;
ContextTag tag; //!< The tag associated with this command executor, any tagged resource locking must utilize this tag
size_t submissionNumber{};
u32 executionNumber{};
ContextTag executionTag{};
bool captureNextExecution{};
CommandExecutor(const DeviceState &state);

View File

@ -24,7 +24,7 @@ namespace skyline::gpu::interconnect {
ctx.executor.AttachBuffer(view);
size_t sizeOverride{std::min<size_t>(info.constant_buffer_used_sizes[idx], view.size)};
if (auto megaBufferBinding{view.TryMegaBuffer(ctx.executor.cycle, ctx.gpu.megaBufferAllocator, ctx.executor.executionNumber, sizeOverride)}) {
if (auto megaBufferBinding{view.TryMegaBuffer(ctx.executor.cycle, ctx.gpu.megaBufferAllocator, ctx.executor.executionTag, sizeOverride)}) {
return megaBufferBinding;
} else {
view.GetBuffer()->BlockSequencedCpuBackingWrites();
@ -52,7 +52,7 @@ namespace skyline::gpu::interconnect {
if (desc.is_written) {
view.GetBuffer()->MarkGpuDirty();
} else {
if (auto megaBufferBinding{view.TryMegaBuffer(ctx.executor.cycle, ctx.gpu.megaBufferAllocator, ctx.executor.executionNumber)})
if (auto megaBufferBinding{view.TryMegaBuffer(ctx.executor.cycle, ctx.gpu.megaBufferAllocator, ctx.executor.executionTag)})
return megaBufferBinding;
}

View File

@ -189,11 +189,11 @@ namespace skyline::gpu::interconnect {
textureHeaderCache.resize(textureHeaders.size());
std::fill(textureHeaderCache.begin(), textureHeaderCache.end(), CacheEntry{});
} else if (auto &cached{textureHeaderCache[index]}; cached.view) {
if (cached.executionNumber == ctx.executor.executionNumber)
if (cached.executionTag == ctx.executor.executionTag)
return cached.view;
if (cached.tic == textureHeaders[index] && !cached.view->texture->replaced) {
cached.executionNumber = ctx.executor.executionNumber;
cached.executionTag = ctx.executor.executionTag;
return cached.view;
}
}
@ -321,7 +321,7 @@ namespace skyline::gpu::interconnect {
texture = ctx.gpu.texture.FindOrCreate(guest, ctx.executor.tag);
}
textureHeaderCache[index] = {textureHeader, texture.get(), ctx.executor.executionNumber};
textureHeaderCache[index] = {textureHeader, texture.get(), ctx.executor.executionTag};
return texture.get();
}

View File

@ -41,7 +41,7 @@ namespace skyline::gpu::interconnect {
struct CacheEntry {
TextureImageControl tic;
TextureView *view;
u32 executionNumber;
ContextTag executionTag;
};
std::vector<CacheEntry> textureHeaderCache;

View File

@ -107,17 +107,17 @@ namespace skyline::gpu::interconnect::kepler_compute {
storageBufferViews.resize(shaderStage.info.storage_buffers_descriptors.size());
}
void Pipeline::SyncCachedStorageBufferViews(u32 executionNumber) {
if (lastExecutionNumber != executionNumber) {
void Pipeline::SyncCachedStorageBufferViews(ContextTag executionTag) {
if (lastExecutionTag != executionTag) {
for (auto &view : storageBufferViews)
view.PurgeCaches();
lastExecutionNumber = executionNumber;
lastExecutionTag = executionTag;
}
}
DescriptorUpdateInfo *Pipeline::SyncDescriptors(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, Samplers &samplers, Textures &textures) {
SyncCachedStorageBufferViews(ctx.executor.executionNumber);
SyncCachedStorageBufferViews(ctx.executor.executionTag);
u32 writeIdx{};
auto writes{ctx.executor.allocator->AllocateUntracked<vk::WriteDescriptorSet>(descriptorInfo.totalWriteDescCount)};

View File

@ -42,9 +42,9 @@ namespace skyline::gpu::interconnect::kepler_compute {
ShaderStage shaderStage;
DescriptorInfo descriptorInfo;
std::vector<CachedMappedBufferView> storageBufferViews;
u32 lastExecutionNumber{}; //!< The last execution number this pipeline was used at
ContextTag lastExecutionTag{}; //!< The last execution tag this pipeline was used at
void SyncCachedStorageBufferViews(u32 executionNumber);
void SyncCachedStorageBufferViews(ContextTag executionTag);
public:
CompiledPipeline compiledPipeline;

View File

@ -29,7 +29,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
if (*view) {
ctx.executor.AttachBuffer(*view);
if (megaBufferBinding = view->TryMegaBuffer(ctx.executor.cycle, ctx.gpu.megaBufferAllocator, ctx.executor.executionNumber);
if (megaBufferBinding = view->TryMegaBuffer(ctx.executor.cycle, ctx.gpu.megaBufferAllocator, ctx.executor.executionTag);
megaBufferBinding)
builder.SetVertexBuffer(index, megaBufferBinding, ctx.gpu.traits.supportsExtendedDynamicState, engine->vertexStream.format.stride);
else
@ -50,7 +50,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
bool VertexBufferState::Refresh(InterconnectContext &ctx, StateUpdateBuilder &builder) {
if (megaBufferBinding) {
if (auto newMegaBufferBinding{view->TryMegaBuffer(ctx.executor.cycle, ctx.gpu.megaBufferAllocator, ctx.executor.executionNumber)};
if (auto newMegaBufferBinding{view->TryMegaBuffer(ctx.executor.cycle, ctx.gpu.megaBufferAllocator, ctx.executor.executionTag)};
newMegaBufferBinding != megaBufferBinding) {
megaBufferBinding = newMegaBufferBinding;
@ -136,7 +136,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
if (quadConversion)
megaBufferBinding = GenerateQuadConversionIndexBuffer(ctx, engine->indexBuffer.indexSize, *view, firstIndex, elementCount);
else
megaBufferBinding = view->TryMegaBuffer(ctx.executor.cycle, ctx.gpu.megaBufferAllocator, ctx.executor.executionNumber);
megaBufferBinding = view->TryMegaBuffer(ctx.executor.cycle, ctx.gpu.megaBufferAllocator, ctx.executor.executionTag);
if (megaBufferBinding)
builder.SetIndexBuffer(megaBufferBinding, indexType);
@ -159,7 +159,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
megaBufferBinding = GenerateQuadConversionIndexBuffer(ctx, engine->indexBuffer.indexSize, *view, firstIndex, elementCount);
builder.SetIndexBuffer(megaBufferBinding, indexType);
} else if (megaBufferBinding) {
if (auto newMegaBufferBinding{view->TryMegaBuffer(ctx.executor.cycle, ctx.gpu.megaBufferAllocator, ctx.executor.executionNumber)};
if (auto newMegaBufferBinding{view->TryMegaBuffer(ctx.executor.cycle, ctx.gpu.megaBufferAllocator, ctx.executor.executionTag)};
newMegaBufferBinding != megaBufferBinding) {
megaBufferBinding = newMegaBufferBinding;

View File

@ -652,7 +652,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
}
DescriptorUpdateInfo *Pipeline::SyncDescriptors(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, Samplers &samplers, Textures &textures, span<TextureView *> sampledImages) {
SyncCachedStorageBufferViews(ctx.executor.executionNumber);
SyncCachedStorageBufferViews(ctx.executor.executionTag);
u32 writeIdx{};
auto writes{ctx.executor.allocator->AllocateUntracked<vk::WriteDescriptorSet>(descriptorInfo.totalWriteDescCount)};
@ -761,7 +761,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
}
DescriptorUpdateInfo *Pipeline::SyncDescriptorsQuickBind(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, Samplers &samplers, Textures &textures, ConstantBuffers::QuickBind quickBind, span<TextureView *> sampledImages) {
SyncCachedStorageBufferViews(ctx.executor.executionNumber);
SyncCachedStorageBufferViews(ctx.executor.executionTag);
size_t stageIndex{static_cast<size_t>(quickBind.stage)};
const auto &stageDescInfo{descriptorInfo.stages[stageIndex]};

View File

@ -86,7 +86,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
private:
std::vector<CachedMappedBufferView> storageBufferViews;
u32 lastExecutionNumber{}; //!< The last execution number this pipeline was used at
ContextTag lastExecutionTag{}; //!< The last execution tag this pipeline was used at
std::array<ShaderStage, engine::ShaderStageCount> shaderStages;
DescriptorInfo descriptorInfo;