Implement indirect draws in the Maxwell 3D interconnect

These will be used by the HLE indirect draw macro to perform indirect draws without waiting for GPU idle.
This commit is contained in:
Billy Laws 2023-02-04 22:32:36 +00:00
parent 49cd2a71cc
commit 7e1c58accc
2 changed files with 66 additions and 0 deletions

View File

@ -352,4 +352,67 @@ namespace skyline::gpu::interconnect::maxwell3d {
}, scissor, activeDescriptorSetSampledImages, {}, activeState.GetColorAttachments(), activeState.GetDepthAttachment(), !ctx.gpu.traits.quirks.relaxedRenderPassCompatibility, srcStageMask, dstStageMask); }, scissor, activeDescriptorSetSampledImages, {}, activeState.GetColorAttachments(), activeState.GetDepthAttachment(), !ctx.gpu.traits.quirks.relaxedRenderPassCompatibility, srcStageMask, dstStageMask);
ctx.executor.AddCheckpoint("After draw"); ctx.executor.AddCheckpoint("After draw");
} }
void Maxwell3D::DrawIndirect(engine::DrawTopology topology, bool transformFeedbackEnable, bool indexed, span<u8> indirectBuffer, u32 count, u32 stride) {
if (!count)
return;
TRACE_EVENT("gpu", "Indirect Draw", "buffer", reinterpret_cast<uintptr_t>(indirectBuffer.data()));
StateUpdateBuilder builder{*ctx.executor.allocator};
vk::PipelineStageFlags srcStageMask{}, dstStageMask{};
PrepareDraw(builder, topology, indexed, true, 0, 0, srcStageMask, dstStageMask);
if (directState.inputAssembly.NeedsQuadConversion())
throw exception("Quad conversion is not supported for indirect draws!");
if (indirectBufferView)
indirectBufferView = indirectBufferView.GetBuffer()->TryGetView(indirectBuffer);
if (!indirectBufferView)
indirectBufferView = ctx.gpu.buffer.FindOrCreate(indirectBuffer, ctx.executor.tag, [this](std::shared_ptr<Buffer> buffer, ContextLock<Buffer> &&lock) {
ctx.executor.AttachLockedBuffer(buffer, std::move(lock));
});
indirectBufferView.GetBuffer()->BlockSequencedCpuBackingWrites();
auto stateUpdater{builder.Build()};
/**
* @brief Struct that can be linearly allocated, holding all state for the draw to avoid a dynamic allocation with lambda captures
*/
struct DrawParams {
StateUpdater stateUpdater;
BufferView indirectBuffer;
u32 count;
u32 stride;
bool indexed;
bool transformFeedbackEnable;
};
auto *drawParams{ctx.executor.allocator->EmplaceUntracked<DrawParams>(DrawParams{stateUpdater,
indirectBufferView,
count, stride, indexed,
ctx.gpu.traits.supportsTransformFeedback ? transformFeedbackEnable : false})};
auto scissor{GetDrawScissor()};
constantBuffers.ResetQuickBind();
ctx.executor.AddCheckpoint("Before indirect draw");
ctx.executor.AddSubpass([drawParams](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &, GPU &gpu, vk::RenderPass, u32) {
drawParams->stateUpdater.RecordAll(gpu, commandBuffer);
if (drawParams->transformFeedbackEnable)
commandBuffer.beginTransformFeedbackEXT(0, {}, {});
auto indirectBinding{drawParams->indirectBuffer.GetBinding(gpu)};
if (drawParams->indexed)
commandBuffer.drawIndexedIndirect(indirectBinding.buffer, indirectBinding.offset, drawParams->count, drawParams->stride);
else
commandBuffer.drawIndirect(indirectBinding.buffer, indirectBinding.offset, drawParams->count, drawParams->stride);
if (drawParams->transformFeedbackEnable)
commandBuffer.endTransformFeedbackEXT(0, {}, {});
}, scissor, activeDescriptorSetSampledImages, {}, activeState.GetColorAttachments(), activeState.GetDepthAttachment(), !ctx.gpu.traits.quirks.relaxedRenderPassCompatibility, srcStageMask, dstStageMask);
ctx.executor.AddCheckpoint("After indirect draw");
}
} }

View File

@ -49,6 +49,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
Textures textures; Textures textures;
std::shared_ptr<memory::Buffer> quadConversionBuffer{}; std::shared_ptr<memory::Buffer> quadConversionBuffer{};
bool quadConversionBufferAttached{}; bool quadConversionBufferAttached{};
BufferView indirectBufferView;
static constexpr size_t DescriptorBatchSize{0x100}; static constexpr size_t DescriptorBatchSize{0x100};
std::shared_ptr<boost::container::static_vector<DescriptorAllocator::ActiveDescriptorSet, DescriptorBatchSize>> attachedDescriptorSets; std::shared_ptr<boost::container::static_vector<DescriptorAllocator::ActiveDescriptorSet, DescriptorBatchSize>> attachedDescriptorSets;
@ -102,5 +103,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
void Clear(engine::ClearSurface &clearSurface); void Clear(engine::ClearSurface &clearSurface);
void Draw(engine::DrawTopology topology, bool transformFeedbackEnable, bool indexed, u32 count, u32 first, u32 instanceCount, u32 vertexOffset, u32 firstInstance); void Draw(engine::DrawTopology topology, bool transformFeedbackEnable, bool indexed, u32 count, u32 first, u32 instanceCount, u32 vertexOffset, u32 firstInstance);
void DrawIndirect(engine::DrawTopology topology, bool transformFeedbackEnable, bool indexed, span<u8> indirectBuffer, u32 count, u32 stride);
}; };
} }