mirror of
https://github.com/skyline-emu/skyline.git
synced 2024-12-24 18:41:52 +01:00
Use raw pointers to hold constant buffer views
The constant destruction and creation of `BufferView`s in cbuf-heavy games showed up as a large chunk of the profiler. Fix this by taking advantage of the fact that constant buffer `BufferView`s are never deleted and always kept around in the cache to just return a pointer to them in the cache.
This commit is contained in:
parent
6b2e84712b
commit
460e6c9c84
@ -617,7 +617,7 @@ namespace skyline::gpu::interconnect {
|
|||||||
struct ConstantBuffer {
|
struct ConstantBuffer {
|
||||||
IOVA iova;
|
IOVA iova;
|
||||||
u32 size;
|
u32 size;
|
||||||
BufferView view;
|
BufferView *view;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Reads an object from the supplied offset in the constant buffer
|
* @brief Reads an object from the supplied offset in the constant buffer
|
||||||
@ -626,8 +626,8 @@ namespace skyline::gpu::interconnect {
|
|||||||
template<typename T>
|
template<typename T>
|
||||||
T Read(CommandExecutor &pExecutor, size_t dstOffset) const {
|
T Read(CommandExecutor &pExecutor, size_t dstOffset) const {
|
||||||
T object;
|
T object;
|
||||||
ContextLock lock{pExecutor.tag, view};
|
ContextLock lock{pExecutor.tag, *view};
|
||||||
view.Read(lock.IsFirstUsage(), []() {
|
view->Read(lock.IsFirstUsage(), []() {
|
||||||
// TODO: here we should trigger a SubmitWithFlush, however that doesn't currently work due to Read being called mid-draw and attached objects not handling this case
|
// TODO: here we should trigger a SubmitWithFlush, however that doesn't currently work due to Read being called mid-draw and attached objects not handling this case
|
||||||
Logger::Warn("GPU dirty buffer reads for attached buffers are unimplemented");
|
Logger::Warn("GPU dirty buffer reads for attached buffers are unimplemented");
|
||||||
}, span<T>(object).template cast<u8>(), dstOffset);
|
}, span<T>(object).template cast<u8>(), dstOffset);
|
||||||
@ -642,28 +642,29 @@ namespace skyline::gpu::interconnect {
|
|||||||
void Write(CommandExecutor &pExecutor, MegaBufferAllocator &megaBufferAllocator, span<T> buf, size_t dstOffset) {
|
void Write(CommandExecutor &pExecutor, MegaBufferAllocator &megaBufferAllocator, span<T> buf, size_t dstOffset) {
|
||||||
auto srcCpuBuf{buf.template cast<u8>()};
|
auto srcCpuBuf{buf.template cast<u8>()};
|
||||||
|
|
||||||
ContextLock lock{pExecutor.tag, view};
|
ContextLock lock{pExecutor.tag, *view};
|
||||||
view.Write(lock.IsFirstUsage(), pExecutor.cycle, []() {
|
view->Write(lock.IsFirstUsage(), pExecutor.cycle, []() {
|
||||||
// TODO: see Read()
|
// TODO: see Read()
|
||||||
Logger::Warn("GPU dirty buffer reads for attached buffers are unimplemented");
|
Logger::Warn("GPU dirty buffer reads for attached buffers are unimplemented");
|
||||||
}, [&megaBufferAllocator, &pExecutor, srcCpuBuf, dstOffset, &view = this->view, &lock]() {
|
}, [&megaBufferAllocator, &pExecutor, srcCpuBuf, dstOffset, &view = this->view, &lock]() {
|
||||||
pExecutor.AttachLockedBufferView(view, std::move(lock));
|
pExecutor.AttachLockedBufferView(*view, std::move(lock));
|
||||||
// This will prevent any CPU accesses to backing for the duration of the usage
|
// This will prevent any CPU accesses to backing for the duration of the usage
|
||||||
// ONLY in this specific case is it fine to access the backing buffer directly since the flag will be propagated with recreations
|
// ONLY in this specific case is it fine to access the backing buffer directly since the flag will be propagated with recreations
|
||||||
view->buffer->BlockAllCpuBackingWrites();
|
(*view)->buffer->BlockAllCpuBackingWrites();
|
||||||
|
|
||||||
auto srcGpuAllocation{megaBufferAllocator.Push(pExecutor.cycle, srcCpuBuf)};
|
auto srcGpuAllocation{megaBufferAllocator.Push(pExecutor.cycle, srcCpuBuf)};
|
||||||
pExecutor.AddOutsideRpCommand([=](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &, GPU &) {
|
pExecutor.AddOutsideRpCommand([=](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &, GPU &) {
|
||||||
vk::BufferCopy copyRegion{
|
vk::BufferCopy copyRegion{
|
||||||
.size = srcCpuBuf.size_bytes(),
|
.size = srcCpuBuf.size_bytes(),
|
||||||
.srcOffset = srcGpuAllocation.offset,
|
.srcOffset = srcGpuAllocation.offset,
|
||||||
.dstOffset = view->view->offset + dstOffset
|
.dstOffset = (*view)->view->offset + dstOffset
|
||||||
};
|
};
|
||||||
commandBuffer.copyBuffer(srcGpuAllocation.buffer, view->buffer->GetBacking(), copyRegion);
|
commandBuffer.copyBuffer(srcGpuAllocation.buffer, (*view)->buffer->GetBacking(), copyRegion);
|
||||||
});
|
});
|
||||||
}, srcCpuBuf, dstOffset);
|
}, srcCpuBuf, dstOffset);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
ConstantBuffer constantBufferSelector{}; //!< The constant buffer selector is used to bind a constant buffer to a stage or update data in it
|
ConstantBuffer constantBufferSelector{}; //!< The constant buffer selector is used to bind a constant buffer to a stage or update data in it
|
||||||
|
|
||||||
public:
|
public:
|
||||||
@ -708,40 +709,44 @@ namespace skyline::gpu::interconnect {
|
|||||||
std::unordered_map<Key, BufferView, KeyHash> cache;
|
std::unordered_map<Key, BufferView, KeyHash> cache;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
std::optional<BufferView> Lookup(u32 size, u64 iova) {
|
BufferView *Lookup(u32 size, u64 iova) {
|
||||||
if (auto it{cache.find({size, iova})}; it != cache.end())
|
if (auto it{cache.find({size, iova})}; it != cache.end())
|
||||||
return it->second;
|
return &it->second;
|
||||||
|
|
||||||
return std::nullopt;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Insert(u32 size, u64 iova, BufferView &view) {
|
BufferView *Insert(u32 size, u64 iova, BufferView &&view) {
|
||||||
cache[Key{size, iova}] = view;
|
return &cache.emplace(Key{size, iova}, view).first->second;
|
||||||
}
|
}
|
||||||
} constantBufferCache;
|
} constantBufferCache;
|
||||||
|
|
||||||
std::optional<ConstantBuffer> GetConstantBufferSelector() {
|
ConstantBuffer *GetConstantBufferSelector() {
|
||||||
if (constantBufferSelector.size == 0)
|
if (constantBufferSelector.size == 0)
|
||||||
return std::nullopt;
|
return nullptr;
|
||||||
else if (constantBufferSelector.view)
|
else if (constantBufferSelector.view)
|
||||||
return constantBufferSelector;
|
return &constantBufferSelector;
|
||||||
|
|
||||||
auto view{constantBufferCache.Lookup(constantBufferSelector.size, constantBufferSelector.iova)};
|
auto view{constantBufferCache.Lookup(constantBufferSelector.size, constantBufferSelector.iova)};
|
||||||
if (!view) {
|
if (!view) {
|
||||||
auto mappings{channelCtx.asCtx->gmmu.TranslateRange(constantBufferSelector.iova, constantBufferSelector.size)};
|
auto mappings{channelCtx.asCtx->gmmu.TranslateRange(constantBufferSelector.iova, constantBufferSelector.size)};
|
||||||
view = executor.AcquireBufferManager().FindOrCreate(mappings.front(), executor.tag, [this](std::shared_ptr<Buffer> buffer, ContextLock<Buffer> &&lock) {
|
view = constantBufferCache.Insert(constantBufferSelector.size, constantBufferSelector.iova,
|
||||||
executor.AttachLockedBuffer(buffer, std::move(lock));
|
executor.AcquireBufferManager().FindOrCreate(mappings.front(), executor.tag, [this](std::shared_ptr<Buffer> buffer, ContextLock<Buffer> &&lock) {
|
||||||
});
|
executor.AttachLockedBuffer(buffer, std::move(lock));
|
||||||
constantBufferCache.Insert(constantBufferSelector.size, constantBufferSelector.iova, *view);
|
})
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
constantBufferSelector.view = *view;
|
constantBufferSelector.view = view;
|
||||||
return constantBufferSelector;
|
return &constantBufferSelector;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ConstantBufferUpdate(std::vector<u32> data, u32 offset) {
|
void ConstantBufferUpdate(span<u32> data, u32 offset) {
|
||||||
auto constantBuffer{GetConstantBufferSelector().value()};
|
auto constantBuffer{GetConstantBufferSelector()};
|
||||||
constantBuffer.Write<u32>(executor, executor.AcquireMegaBufferAllocator(), data, offset);
|
if (constantBuffer)
|
||||||
|
constantBuffer->Write<u32>(executor, executor.AcquireMegaBufferAllocator(), data, offset);
|
||||||
|
else
|
||||||
|
throw exception("Attempting to write to invalid constant buffer!");
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Shader Program */
|
/* Shader Program */
|
||||||
@ -1112,7 +1117,7 @@ namespace skyline::gpu::interconnect {
|
|||||||
.stageFlags = pipelineStage.vkStage,
|
.stageFlags = pipelineStage.vkStage,
|
||||||
});
|
});
|
||||||
|
|
||||||
auto view{pipelineStage.constantBuffers[constantBuffer.index].view};
|
auto &view{*pipelineStage.constantBuffers[constantBuffer.index].view};
|
||||||
executor.AttachBuffer(view);
|
executor.AttachBuffer(view);
|
||||||
if (auto megaBufferAllocation{view.AcquireMegaBuffer(executor.cycle, executor.AcquireMegaBufferAllocator())}) {
|
if (auto megaBufferAllocation{view.AcquireMegaBuffer(executor.cycle, executor.AcquireMegaBufferAllocator())}) {
|
||||||
// If the buffer is megabuffered then since we don't get out data from the underlying buffer, rather the megabuffer which stays consistent throughout a single execution, we can skip registering usage
|
// If the buffer is megabuffered then since we don't get out data from the underlying buffer, rather the megabuffer which stays consistent throughout a single execution, we can skip registering usage
|
||||||
@ -1268,12 +1273,12 @@ namespace skyline::gpu::interconnect {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void BindPipelineConstantBuffer(maxwell3d::PipelineStage stage, bool enable, u32 index) {
|
void BindPipelineConstantBuffer(maxwell3d::PipelineStage stage, bool enable, u32 index) {
|
||||||
auto &constantBuffer{pipelineStages[stage].constantBuffers[index]};
|
auto &targetConstantBuffer{pipelineStages[stage].constantBuffers[index]};
|
||||||
|
|
||||||
if (enable)
|
if (auto selector{GetConstantBufferSelector()}; selector && enable)
|
||||||
constantBuffer = GetConstantBufferSelector().value();
|
targetConstantBuffer = *selector;
|
||||||
else
|
else
|
||||||
constantBuffer = {};
|
targetConstantBuffer = {};
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Rasterizer State */
|
/* Rasterizer State */
|
||||||
|
Loading…
Reference in New Issue
Block a user