From 460e6c9c8435e35aba977740513062e7828afb64 Mon Sep 17 00:00:00 2001 From: Billy Laws Date: Sun, 31 Jul 2022 13:57:11 +0100 Subject: [PATCH] Use raw pointers to hold constant buffer views The constant destruction and creation of `BufferView`s in cbuf-heavy games showed up as a large chunk of the profiler. Fix this by taking advantage of the fact that constant buffer `BufferView`s are never deleted and always kept around in the cache to just return a pointer to them in the cache. --- .../gpu/interconnect/graphics_context.h | 67 ++++++++++--------- 1 file changed, 36 insertions(+), 31 deletions(-) diff --git a/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h b/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h index d35d3eed..e07d66c5 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h @@ -617,7 +617,7 @@ namespace skyline::gpu::interconnect { struct ConstantBuffer { IOVA iova; u32 size; - BufferView view; + BufferView *view; /** * @brief Reads an object from the supplied offset in the constant buffer @@ -626,8 +626,8 @@ namespace skyline::gpu::interconnect { template T Read(CommandExecutor &pExecutor, size_t dstOffset) const { T object; - ContextLock lock{pExecutor.tag, view}; - view.Read(lock.IsFirstUsage(), []() { + ContextLock lock{pExecutor.tag, *view}; + view->Read(lock.IsFirstUsage(), []() { // TODO: here we should trigger a SubmitWithFlush, however that doesn't currently work due to Read being called mid-draw and attached objects not handling this case Logger::Warn("GPU dirty buffer reads for attached buffers are unimplemented"); }, span(object).template cast(), dstOffset); @@ -642,28 +642,29 @@ namespace skyline::gpu::interconnect { void Write(CommandExecutor &pExecutor, MegaBufferAllocator &megaBufferAllocator, span buf, size_t dstOffset) { auto srcCpuBuf{buf.template cast()}; - ContextLock lock{pExecutor.tag, view}; - view.Write(lock.IsFirstUsage(), pExecutor.cycle, []() { + ContextLock lock{pExecutor.tag, *view}; + view->Write(lock.IsFirstUsage(), pExecutor.cycle, []() { // TODO: see Read() Logger::Warn("GPU dirty buffer reads for attached buffers are unimplemented"); }, [&megaBufferAllocator, &pExecutor, srcCpuBuf, dstOffset, &view = this->view, &lock]() { - pExecutor.AttachLockedBufferView(view, std::move(lock)); + pExecutor.AttachLockedBufferView(*view, std::move(lock)); // This will prevent any CPU accesses to backing for the duration of the usage // ONLY in this specific case is it fine to access the backing buffer directly since the flag will be propagated with recreations - view->buffer->BlockAllCpuBackingWrites(); + (*view)->buffer->BlockAllCpuBackingWrites(); auto srcGpuAllocation{megaBufferAllocator.Push(pExecutor.cycle, srcCpuBuf)}; pExecutor.AddOutsideRpCommand([=](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &, GPU &) { vk::BufferCopy copyRegion{ .size = srcCpuBuf.size_bytes(), .srcOffset = srcGpuAllocation.offset, - .dstOffset = view->view->offset + dstOffset + .dstOffset = (*view)->view->offset + dstOffset }; - commandBuffer.copyBuffer(srcGpuAllocation.buffer, view->buffer->GetBacking(), copyRegion); + commandBuffer.copyBuffer(srcGpuAllocation.buffer, (*view)->buffer->GetBacking(), copyRegion); }); }, srcCpuBuf, dstOffset); } }; + ConstantBuffer constantBufferSelector{}; //!< The constant buffer selector is used to bind a constant buffer to a stage or update data in it public: @@ -708,40 +709,44 @@ namespace skyline::gpu::interconnect { std::unordered_map cache; public: - std::optional Lookup(u32 size, u64 iova) { + BufferView *Lookup(u32 size, u64 iova) { if (auto it{cache.find({size, iova})}; it != cache.end()) - return it->second; + return &it->second; - return std::nullopt; + return nullptr; } - void Insert(u32 size, u64 iova, BufferView &view) { - cache[Key{size, iova}] = view; + BufferView *Insert(u32 size, u64 iova, BufferView &&view) { + return &cache.emplace(Key{size, iova}, view).first->second; } } constantBufferCache; - std::optional GetConstantBufferSelector() { + ConstantBuffer *GetConstantBufferSelector() { if (constantBufferSelector.size == 0) - return std::nullopt; + return nullptr; else if (constantBufferSelector.view) - return constantBufferSelector; + return &constantBufferSelector; auto view{constantBufferCache.Lookup(constantBufferSelector.size, constantBufferSelector.iova)}; if (!view) { auto mappings{channelCtx.asCtx->gmmu.TranslateRange(constantBufferSelector.iova, constantBufferSelector.size)}; - view = executor.AcquireBufferManager().FindOrCreate(mappings.front(), executor.tag, [this](std::shared_ptr buffer, ContextLock &&lock) { - executor.AttachLockedBuffer(buffer, std::move(lock)); - }); - constantBufferCache.Insert(constantBufferSelector.size, constantBufferSelector.iova, *view); + view = constantBufferCache.Insert(constantBufferSelector.size, constantBufferSelector.iova, + executor.AcquireBufferManager().FindOrCreate(mappings.front(), executor.tag, [this](std::shared_ptr buffer, ContextLock &&lock) { + executor.AttachLockedBuffer(buffer, std::move(lock)); + }) + ); } - constantBufferSelector.view = *view; - return constantBufferSelector; + constantBufferSelector.view = view; + return &constantBufferSelector; } - void ConstantBufferUpdate(std::vector data, u32 offset) { - auto constantBuffer{GetConstantBufferSelector().value()}; - constantBuffer.Write(executor, executor.AcquireMegaBufferAllocator(), data, offset); + void ConstantBufferUpdate(span data, u32 offset) { + auto constantBuffer{GetConstantBufferSelector()}; + if (constantBuffer) + constantBuffer->Write(executor, executor.AcquireMegaBufferAllocator(), data, offset); + else + throw exception("Attempting to write to invalid constant buffer!"); } /* Shader Program */ @@ -1112,7 +1117,7 @@ namespace skyline::gpu::interconnect { .stageFlags = pipelineStage.vkStage, }); - auto view{pipelineStage.constantBuffers[constantBuffer.index].view}; + auto &view{*pipelineStage.constantBuffers[constantBuffer.index].view}; executor.AttachBuffer(view); if (auto megaBufferAllocation{view.AcquireMegaBuffer(executor.cycle, executor.AcquireMegaBufferAllocator())}) { // If the buffer is megabuffered then since we don't get out data from the underlying buffer, rather the megabuffer which stays consistent throughout a single execution, we can skip registering usage @@ -1268,12 +1273,12 @@ namespace skyline::gpu::interconnect { } void BindPipelineConstantBuffer(maxwell3d::PipelineStage stage, bool enable, u32 index) { - auto &constantBuffer{pipelineStages[stage].constantBuffers[index]}; + auto &targetConstantBuffer{pipelineStages[stage].constantBuffers[index]}; - if (enable) - constantBuffer = GetConstantBufferSelector().value(); + if (auto selector{GetConstantBufferSelector()}; selector && enable) + targetConstantBuffer = *selector; else - constantBuffer = {}; + targetConstantBuffer = {}; } /* Rasterizer State */