skyline/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp

228 lines
10 KiB
C++
Raw Normal View History

// SPDX-License-Identifier: MPL-2.0
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <gpu.h>
#include "command_executor.h"
namespace skyline::gpu::interconnect {
CommandExecutor::CommandExecutor(const DeviceState &state) : gpu(*state.gpu), activeCommandBuffer(gpu.scheduler.AllocateCommandBuffer()), cycle(activeCommandBuffer.GetFenceCycle()) {}
CommandExecutor::~CommandExecutor() {
cycle->Cancel();
}
2021-10-22 11:59:38 +02:00
bool CommandExecutor::CreateRenderPassWithSubpass(vk::Rect2D renderArea, span<TextureView *> inputAttachments, span<TextureView *> colorAttachments, TextureView *depthStencilAttachment) {
auto addSubpass{[&] {
renderPass->AddSubpass(inputAttachments, colorAttachments, depthStencilAttachment);
lastSubpassAttachments.clear();
auto insertAttachmentRange{[this](auto &attachments) -> std::pair<size_t, size_t> {
size_t beginIndex{lastSubpassAttachments.size()};
lastSubpassAttachments.insert(lastSubpassAttachments.end(), attachments.begin(), attachments.end());
return {beginIndex, attachments.size()};
}};
auto rangeToSpan{[this](auto &range) -> span<TextureView *> {
return {lastSubpassAttachments.data() + range.first, range.second};
}};
auto inputAttachmentRange{insertAttachmentRange(inputAttachments)};
auto colorAttachmentRange{insertAttachmentRange(colorAttachments)};
lastSubpassInputAttachments = rangeToSpan(inputAttachmentRange);
lastSubpassColorAttachments = rangeToSpan(colorAttachmentRange);
lastSubpassDepthStencilAttachment = depthStencilAttachment;
}};
if (renderPass == nullptr || (renderPass && (renderPass->renderArea != renderArea || subpassCount > gpu.traits.quirks.maxSubpassCount))) {
// We need to create a render pass if one doesn't already exist or the current one isn't compatible
if (renderPass != nullptr)
nodes.emplace_back(std::in_place_type_t<node::RenderPassEndNode>());
renderPass = &std::get<node::RenderPassNode>(nodes.emplace_back(std::in_place_type_t<node::RenderPassNode>(), renderArea));
addSubpass();
subpassCount = 0;
return false;
} else {
if (ranges::equal(lastSubpassInputAttachments, inputAttachments) &&
ranges::equal(lastSubpassColorAttachments, colorAttachments) &&
lastSubpassDepthStencilAttachment == depthStencilAttachment) {
// The last subpass had the same attachments, so we can reuse them
return false;
} else {
// The last subpass had different attachments, so we need to create a new one
addSubpass();
subpassCount++;
return true;
}
}
}
void CommandExecutor::FinishRenderPass() {
if (renderPass) {
nodes.emplace_back(std::in_place_type_t<node::RenderPassEndNode>());
renderPass = nullptr;
subpassCount = 0;
lastSubpassAttachments.clear();
lastSubpassInputAttachments = nullptr;
lastSubpassColorAttachments = nullptr;
lastSubpassDepthStencilAttachment = nullptr;
}
}
void CommandExecutor::AttachTexture(TextureView *view) {
auto texture{view->texture.get()};
Implement overhead-free sequenced buffer updates with megabuffers Previously constant buffer updates would be handled on the CPU and only the end result would be synced to the GPU before execute. This caused issues as if the constant buffer contents was changed between each draw in a renderpass (e.g. text rendering) the draws themselves would only see the final resulting constant buffer. We had earlier tried to fix this by using vkCmdUpdateBuffer however this caused significant performance loss due to an oversight in Adreno drivers. We could have worked around this simply by using vkCmdCopy buffer however there would still be a performance loss due to renderpasses being split up with copies inbetween. To avoid this we introduce 'megabuffers', a brand new technique not done before in any other switch emulators. Rather than replaying the copies in sequence on the GPU, we take advantage of the fact that buffers are generally small in order to replay buffers on the GPU instead. Each write and subsequent usage of a buffer will cause a copy of the buffer with that write, and all prior applied to be pushed into the megabuffer, this way at the start of execute the megabuffer will hold all used states of the buffer simultaneously. Draws then reference these individual states in sequence to allow everything to work without any copies. In order to support this buffers have been moved to an immediate sync model, with synchronisation being done at usage-time rather than execute (in order to keep contents properly sequenced) and GPU-side writes now need to be explictly marked (since they prevent megabuffering). It should also be noted that a fallback path using cmdCopyBuffer exists for the cases where buffers are too large or GPU dirty.
2022-04-23 19:10:39 +02:00
if (!attachedTextures.contains(texture)) {
texture->WaitOnFence();
texture->cycle = cycle;
Implement overhead-free sequenced buffer updates with megabuffers Previously constant buffer updates would be handled on the CPU and only the end result would be synced to the GPU before execute. This caused issues as if the constant buffer contents was changed between each draw in a renderpass (e.g. text rendering) the draws themselves would only see the final resulting constant buffer. We had earlier tried to fix this by using vkCmdUpdateBuffer however this caused significant performance loss due to an oversight in Adreno drivers. We could have worked around this simply by using vkCmdCopy buffer however there would still be a performance loss due to renderpasses being split up with copies inbetween. To avoid this we introduce 'megabuffers', a brand new technique not done before in any other switch emulators. Rather than replaying the copies in sequence on the GPU, we take advantage of the fact that buffers are generally small in order to replay buffers on the GPU instead. Each write and subsequent usage of a buffer will cause a copy of the buffer with that write, and all prior applied to be pushed into the megabuffer, this way at the start of execute the megabuffer will hold all used states of the buffer simultaneously. Draws then reference these individual states in sequence to allow everything to work without any copies. In order to support this buffers have been moved to an immediate sync model, with synchronisation being done at usage-time rather than execute (in order to keep contents properly sequenced) and GPU-side writes now need to be explictly marked (since they prevent megabuffering). It should also be noted that a fallback path using cmdCopyBuffer exists for the cases where buffers are too large or GPU dirty.
2022-04-23 19:10:39 +02:00
attachedTextures.emplace(texture);
}
cycle->AttachObject(view->shared_from_this());
}
Rework `BufferManager`, `Buffer` and `BufferView` This commit encapsulates a complex sequence of cascading changes in the process of supporting overlaps for buffers: * We determined that it is impossible to resolve overlaps with multiple intervals per buffer within the constraints of each overlap being a contiguous view, support for multiple intervals was therefore dropped. The older buffer manager code was entirely reworked to be simpler due to only handling one interval per buffer with code now being based off `IntervalMap` but tailored specifically for buffers. * During overlap resolution, the problem of how existing views into the buffer being recreated would be updated, it had to be replaced with a larger buffer that could contain all overlaps and all existing views would need to be repointed to it. This was addressed by a buffer owning all views to itself, we could automatically recalculate the offset of all views and update the buffers with it. * We still needed to update usage of existing views which was done by handling all access (such as inside a recorded draw) to buffer view properties via `BufferView::RegisterUsage` which dispatches a callback with the view and the corresponding backing buffer. This callback can be stored and called during overlap resolution with the new buffer. * We had issues with lifetime of the buffer with the handle-like semantics of `BufferView` introduced in the last buffer-related commit, if we updated the view to be owned by a new buffer we'd need to extend the lifetime of the new buffer not the older one and the only way to do this was a proxy owner object `BufferDelegate` which holds a shared pointer to the real `Buffer` which in-turn holds a pointer to all `BufferDelegate` objects to update on repointing. A `BufferView` is effectively just a wrapper around `std::shared_ptr<BufferDelegate>` with more favorable semantics but generally just forwarding calls. It should be additionally noted that to support usage of `RegisterUsage` the code around buffers in `GraphicsContext` was refactored to defer truly binding till the recording phase.
2022-03-28 08:57:05 +02:00
void CommandExecutor::AttachBuffer(BufferView &view) {
Implement overhead-free sequenced buffer updates with megabuffers Previously constant buffer updates would be handled on the CPU and only the end result would be synced to the GPU before execute. This caused issues as if the constant buffer contents was changed between each draw in a renderpass (e.g. text rendering) the draws themselves would only see the final resulting constant buffer. We had earlier tried to fix this by using vkCmdUpdateBuffer however this caused significant performance loss due to an oversight in Adreno drivers. We could have worked around this simply by using vkCmdCopy buffer however there would still be a performance loss due to renderpasses being split up with copies inbetween. To avoid this we introduce 'megabuffers', a brand new technique not done before in any other switch emulators. Rather than replaying the copies in sequence on the GPU, we take advantage of the fact that buffers are generally small in order to replay buffers on the GPU instead. Each write and subsequent usage of a buffer will cause a copy of the buffer with that write, and all prior applied to be pushed into the megabuffer, this way at the start of execute the megabuffer will hold all used states of the buffer simultaneously. Draws then reference these individual states in sequence to allow everything to work without any copies. In order to support this buffers have been moved to an immediate sync model, with synchronisation being done at usage-time rather than execute (in order to keep contents properly sequenced) and GPU-side writes now need to be explictly marked (since they prevent megabuffering). It should also be noted that a fallback path using cmdCopyBuffer exists for the cases where buffers are too large or GPU dirty.
2022-04-23 19:10:39 +02:00
view->buffer->SynchronizeHost();
if (!attachedBuffers.contains(view.bufferDelegate)) {
Rework `BufferManager`, `Buffer` and `BufferView` This commit encapsulates a complex sequence of cascading changes in the process of supporting overlaps for buffers: * We determined that it is impossible to resolve overlaps with multiple intervals per buffer within the constraints of each overlap being a contiguous view, support for multiple intervals was therefore dropped. The older buffer manager code was entirely reworked to be simpler due to only handling one interval per buffer with code now being based off `IntervalMap` but tailored specifically for buffers. * During overlap resolution, the problem of how existing views into the buffer being recreated would be updated, it had to be replaced with a larger buffer that could contain all overlaps and all existing views would need to be repointed to it. This was addressed by a buffer owning all views to itself, we could automatically recalculate the offset of all views and update the buffers with it. * We still needed to update usage of existing views which was done by handling all access (such as inside a recorded draw) to buffer view properties via `BufferView::RegisterUsage` which dispatches a callback with the view and the corresponding backing buffer. This callback can be stored and called during overlap resolution with the new buffer. * We had issues with lifetime of the buffer with the handle-like semantics of `BufferView` introduced in the last buffer-related commit, if we updated the view to be owned by a new buffer we'd need to extend the lifetime of the new buffer not the older one and the only way to do this was a proxy owner object `BufferDelegate` which holds a shared pointer to the real `Buffer` which in-turn holds a pointer to all `BufferDelegate` objects to update on repointing. A `BufferView` is effectively just a wrapper around `std::shared_ptr<BufferDelegate>` with more favorable semantics but generally just forwarding calls. It should be additionally noted that to support usage of `RegisterUsage` the code around buffers in `GraphicsContext` was refactored to defer truly binding till the recording phase.
2022-03-28 08:57:05 +02:00
view.AttachCycle(cycle);
Implement overhead-free sequenced buffer updates with megabuffers Previously constant buffer updates would be handled on the CPU and only the end result would be synced to the GPU before execute. This caused issues as if the constant buffer contents was changed between each draw in a renderpass (e.g. text rendering) the draws themselves would only see the final resulting constant buffer. We had earlier tried to fix this by using vkCmdUpdateBuffer however this caused significant performance loss due to an oversight in Adreno drivers. We could have worked around this simply by using vkCmdCopy buffer however there would still be a performance loss due to renderpasses being split up with copies inbetween. To avoid this we introduce 'megabuffers', a brand new technique not done before in any other switch emulators. Rather than replaying the copies in sequence on the GPU, we take advantage of the fact that buffers are generally small in order to replay buffers on the GPU instead. Each write and subsequent usage of a buffer will cause a copy of the buffer with that write, and all prior applied to be pushed into the megabuffer, this way at the start of execute the megabuffer will hold all used states of the buffer simultaneously. Draws then reference these individual states in sequence to allow everything to work without any copies. In order to support this buffers have been moved to an immediate sync model, with synchronisation being done at usage-time rather than execute (in order to keep contents properly sequenced) and GPU-side writes now need to be explictly marked (since they prevent megabuffering). It should also be noted that a fallback path using cmdCopyBuffer exists for the cases where buffers are too large or GPU dirty.
2022-04-23 19:10:39 +02:00
attachedBuffers.emplace(view.bufferDelegate);
}
}
void CommandExecutor::AttachDependency(const std::shared_ptr<FenceCycleDependency> &dependency) {
cycle->AttachObject(dependency);
}
void CommandExecutor::AddSubpass(std::function<void(vk::raii::CommandBuffer &, const std::shared_ptr<FenceCycle> &, GPU &, vk::RenderPass, u32)> &&function, vk::Rect2D renderArea, span<TextureView *> inputAttachments, span<TextureView *> colorAttachments, TextureView *depthStencilAttachment, bool exclusiveSubpass) {
if (exclusiveSubpass)
FinishRenderPass();
bool gotoNext{CreateRenderPassWithSubpass(renderArea, inputAttachments, colorAttachments, depthStencilAttachment ? &*depthStencilAttachment : nullptr)};
if (gotoNext)
nodes.emplace_back(std::in_place_type_t<node::NextSubpassFunctionNode>(), std::forward<decltype(function)>(function));
else
nodes.emplace_back(std::in_place_type_t<node::SubpassFunctionNode>(), std::forward<decltype(function)>(function));
}
void CommandExecutor::AddOutsideRpCommand(std::function<void(vk::raii::CommandBuffer &, const std::shared_ptr<FenceCycle> &, GPU &)> &&function) {
if (renderPass)
FinishRenderPass();
nodes.emplace_back(std::in_place_type_t<node::FunctionNode>(), std::forward<decltype(function)>(function));
}
void CommandExecutor::AddClearColorSubpass(TextureView *attachment, const vk::ClearColorValue &value) {
bool gotoNext{CreateRenderPassWithSubpass(vk::Rect2D{.extent = attachment->texture->dimensions}, {}, attachment, nullptr)};
if (renderPass->ClearColorAttachment(0, value)) {
if (gotoNext)
nodes.emplace_back(std::in_place_type_t<node::NextSubpassNode>());
} else {
auto function{[scissor = attachment->texture->dimensions, value](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &, GPU &, vk::RenderPass, u32) {
commandBuffer.clearAttachments(vk::ClearAttachment{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.colorAttachment = 0,
.clearValue = value,
}, vk::ClearRect{
.rect = vk::Rect2D{.extent = scissor},
.baseArrayLayer = 0,
.layerCount = 1,
});
}};
if (gotoNext)
nodes.emplace_back(std::in_place_type_t<node::NextSubpassFunctionNode>(), function);
else
nodes.emplace_back(std::in_place_type_t<node::SubpassFunctionNode>(), function);
}
}
void CommandExecutor::AddClearDepthStencilSubpass(TextureView *attachment, const vk::ClearDepthStencilValue &value) {
bool gotoNext{CreateRenderPassWithSubpass(vk::Rect2D{.extent = attachment->texture->dimensions}, {}, {}, attachment)};
if (renderPass->ClearDepthStencilAttachment(value)) {
if (gotoNext)
nodes.emplace_back(std::in_place_type_t<node::NextSubpassNode>());
} else {
auto function{[aspect = attachment->format->vkAspect, extent = attachment->texture->dimensions, value](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &, GPU &, vk::RenderPass, u32) {
commandBuffer.clearAttachments(vk::ClearAttachment{
.aspectMask = aspect,
.clearValue = value,
}, vk::ClearRect{
.rect.extent = extent,
.baseArrayLayer = 0,
.layerCount = 1,
});
}};
if (gotoNext)
nodes.emplace_back(std::in_place_type_t<node::NextSubpassFunctionNode>(), function);
else
nodes.emplace_back(std::in_place_type_t<node::SubpassFunctionNode>(), function);
}
}
void CommandExecutor::Execute() {
if (!nodes.empty()) {
TRACE_EVENT("gpu", "CommandExecutor::Execute");
if (renderPass)
FinishRenderPass();
{
auto &commandBuffer{*activeCommandBuffer};
commandBuffer.begin(vk::CommandBufferBeginInfo{
.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit,
});
Implement overhead-free sequenced buffer updates with megabuffers Previously constant buffer updates would be handled on the CPU and only the end result would be synced to the GPU before execute. This caused issues as if the constant buffer contents was changed between each draw in a renderpass (e.g. text rendering) the draws themselves would only see the final resulting constant buffer. We had earlier tried to fix this by using vkCmdUpdateBuffer however this caused significant performance loss due to an oversight in Adreno drivers. We could have worked around this simply by using vkCmdCopy buffer however there would still be a performance loss due to renderpasses being split up with copies inbetween. To avoid this we introduce 'megabuffers', a brand new technique not done before in any other switch emulators. Rather than replaying the copies in sequence on the GPU, we take advantage of the fact that buffers are generally small in order to replay buffers on the GPU instead. Each write and subsequent usage of a buffer will cause a copy of the buffer with that write, and all prior applied to be pushed into the megabuffer, this way at the start of execute the megabuffer will hold all used states of the buffer simultaneously. Draws then reference these individual states in sequence to allow everything to work without any copies. In order to support this buffers have been moved to an immediate sync model, with synchronisation being done at usage-time rather than execute (in order to keep contents properly sequenced) and GPU-side writes now need to be explictly marked (since they prevent megabuffering). It should also be noted that a fallback path using cmdCopyBuffer exists for the cases where buffers are too large or GPU dirty.
2022-04-23 19:10:39 +02:00
for (auto texture : attachedTextures) {
texture->SynchronizeHostWithBuffer(commandBuffer, cycle, true);
texture->MarkGpuDirty();
}
for (const auto &delegate : attachedBuffers)
Rework `BufferManager`, `Buffer` and `BufferView` This commit encapsulates a complex sequence of cascading changes in the process of supporting overlaps for buffers: * We determined that it is impossible to resolve overlaps with multiple intervals per buffer within the constraints of each overlap being a contiguous view, support for multiple intervals was therefore dropped. The older buffer manager code was entirely reworked to be simpler due to only handling one interval per buffer with code now being based off `IntervalMap` but tailored specifically for buffers. * During overlap resolution, the problem of how existing views into the buffer being recreated would be updated, it had to be replaced with a larger buffer that could contain all overlaps and all existing views would need to be repointed to it. This was addressed by a buffer owning all views to itself, we could automatically recalculate the offset of all views and update the buffers with it. * We still needed to update usage of existing views which was done by handling all access (such as inside a recorded draw) to buffer view properties via `BufferView::RegisterUsage` which dispatches a callback with the view and the corresponding backing buffer. This callback can be stored and called during overlap resolution with the new buffer. * We had issues with lifetime of the buffer with the handle-like semantics of `BufferView` introduced in the last buffer-related commit, if we updated the view to be owned by a new buffer we'd need to extend the lifetime of the new buffer not the older one and the only way to do this was a proxy owner object `BufferDelegate` which holds a shared pointer to the real `Buffer` which in-turn holds a pointer to all `BufferDelegate` objects to update on repointing. A `BufferView` is effectively just a wrapper around `std::shared_ptr<BufferDelegate>` with more favorable semantics but generally just forwarding calls. It should be additionally noted that to support usage of `RegisterUsage` the code around buffers in `GraphicsContext` was refactored to defer truly binding till the recording phase.
2022-03-28 08:57:05 +02:00
delegate->usageCallback = nullptr;
vk::RenderPass lRenderPass;
u32 subpassIndex;
using namespace node;
for (NodeVariant &node : nodes) {
#define NODE(name) [&](name& node) { node(commandBuffer, cycle, gpu); }
std::visit(VariantVisitor{
NODE(FunctionNode),
[&](RenderPassNode &node) {
lRenderPass = node(commandBuffer, cycle, gpu);
subpassIndex = 0;
},
[&](NextSubpassNode &node) {
node(commandBuffer, cycle, gpu);
++subpassIndex;
},
[&](SubpassFunctionNode &node) { node(commandBuffer, cycle, gpu, lRenderPass, subpassIndex); },
[&](NextSubpassFunctionNode &node) { node(commandBuffer, cycle, gpu, lRenderPass, ++subpassIndex); },
NODE(RenderPassEndNode),
}, node);
#undef NODE
}
commandBuffer.end();
gpu.scheduler.SubmitCommandBuffer(commandBuffer, activeCommandBuffer.GetFence());
for (const auto &delegate : attachedBuffers)
Implement overhead-free sequenced buffer updates with megabuffers Previously constant buffer updates would be handled on the CPU and only the end result would be synced to the GPU before execute. This caused issues as if the constant buffer contents was changed between each draw in a renderpass (e.g. text rendering) the draws themselves would only see the final resulting constant buffer. We had earlier tried to fix this by using vkCmdUpdateBuffer however this caused significant performance loss due to an oversight in Adreno drivers. We could have worked around this simply by using vkCmdCopy buffer however there would still be a performance loss due to renderpasses being split up with copies inbetween. To avoid this we introduce 'megabuffers', a brand new technique not done before in any other switch emulators. Rather than replaying the copies in sequence on the GPU, we take advantage of the fact that buffers are generally small in order to replay buffers on the GPU instead. Each write and subsequent usage of a buffer will cause a copy of the buffer with that write, and all prior applied to be pushed into the megabuffer, this way at the start of execute the megabuffer will hold all used states of the buffer simultaneously. Draws then reference these individual states in sequence to allow everything to work without any copies. In order to support this buffers have been moved to an immediate sync model, with synchronisation being done at usage-time rather than execute (in order to keep contents properly sequenced) and GPU-side writes now need to be explictly marked (since they prevent megabuffering). It should also be noted that a fallback path using cmdCopyBuffer exists for the cases where buffers are too large or GPU dirty.
2022-04-23 19:10:39 +02:00
delegate->buffer->InvalidateMegaBuffer();
nodes.clear();
Implement overhead-free sequenced buffer updates with megabuffers Previously constant buffer updates would be handled on the CPU and only the end result would be synced to the GPU before execute. This caused issues as if the constant buffer contents was changed between each draw in a renderpass (e.g. text rendering) the draws themselves would only see the final resulting constant buffer. We had earlier tried to fix this by using vkCmdUpdateBuffer however this caused significant performance loss due to an oversight in Adreno drivers. We could have worked around this simply by using vkCmdCopy buffer however there would still be a performance loss due to renderpasses being split up with copies inbetween. To avoid this we introduce 'megabuffers', a brand new technique not done before in any other switch emulators. Rather than replaying the copies in sequence on the GPU, we take advantage of the fact that buffers are generally small in order to replay buffers on the GPU instead. Each write and subsequent usage of a buffer will cause a copy of the buffer with that write, and all prior applied to be pushed into the megabuffer, this way at the start of execute the megabuffer will hold all used states of the buffer simultaneously. Draws then reference these individual states in sequence to allow everything to work without any copies. In order to support this buffers have been moved to an immediate sync model, with synchronisation being done at usage-time rather than execute (in order to keep contents properly sequenced) and GPU-side writes now need to be explictly marked (since they prevent megabuffering). It should also be noted that a fallback path using cmdCopyBuffer exists for the cases where buffers are too large or GPU dirty.
2022-04-23 19:10:39 +02:00
attachedTextures.clear();
attachedBuffers.clear();
cycle = activeCommandBuffer.Reset();
Implement overhead-free sequenced buffer updates with megabuffers Previously constant buffer updates would be handled on the CPU and only the end result would be synced to the GPU before execute. This caused issues as if the constant buffer contents was changed between each draw in a renderpass (e.g. text rendering) the draws themselves would only see the final resulting constant buffer. We had earlier tried to fix this by using vkCmdUpdateBuffer however this caused significant performance loss due to an oversight in Adreno drivers. We could have worked around this simply by using vkCmdCopy buffer however there would still be a performance loss due to renderpasses being split up with copies inbetween. To avoid this we introduce 'megabuffers', a brand new technique not done before in any other switch emulators. Rather than replaying the copies in sequence on the GPU, we take advantage of the fact that buffers are generally small in order to replay buffers on the GPU instead. Each write and subsequent usage of a buffer will cause a copy of the buffer with that write, and all prior applied to be pushed into the megabuffer, this way at the start of execute the megabuffer will hold all used states of the buffer simultaneously. Draws then reference these individual states in sequence to allow everything to work without any copies. In order to support this buffers have been moved to an immediate sync model, with synchronisation being done at usage-time rather than execute (in order to keep contents properly sequenced) and GPU-side writes now need to be explictly marked (since they prevent megabuffering). It should also be noted that a fallback path using cmdCopyBuffer exists for the cases where buffers are too large or GPU dirty.
2022-04-23 19:10:39 +02:00
gpu.buffer.megaBuffer.Reset();
}
}
}
}