Implement Maxwell 3D instanced draw support

In the Maxwell 3D engine, instanced draws are implemented by repeating the exact same draw in sequence with special flag set in vertexBeginGl. This flag allows either incrementing the instance counter or resetting it, since we need to supply an instance count to the host API we defer all draws until state changes occur. If there are no state changes between draws we can skip them and count the occurences to get the number of instances to draw.
This commit is contained in:
Billy Laws 2022-05-07 13:55:17 +01:00
parent 03594a081c
commit 4149ab1067
4 changed files with 106 additions and 17 deletions

View File

@ -2787,7 +2787,7 @@ namespace skyline::gpu::interconnect {
/* Draws */ /* Draws */
public: public:
template<bool IsIndexed> template<bool IsIndexed>
void Draw(u32 count, u32 first, i32 vertexOffset = 0) { void Draw(u32 count, u32 first, u32 instanceCount = 1, i32 vertexOffset = 0) {
ValidatePrimitiveRestartState(); ValidatePrimitiveRestartState();
// Index Buffer Setup // Index Buffer Setup
@ -2982,21 +2982,24 @@ namespace skyline::gpu::interconnect {
if constexpr (IsIndexed) { if constexpr (IsIndexed) {
commandBuffer.bindIndexBuffer(boundIndexBuffer->handle, boundIndexBuffer->offset, boundIndexBuffer->type); commandBuffer.bindIndexBuffer(boundIndexBuffer->handle, boundIndexBuffer->offset, boundIndexBuffer->type);
commandBuffer.drawIndexed(count, 1, first, vertexOffset, 0); commandBuffer.drawIndexed(count, instanceCount, first, vertexOffset, 0);
} else { } else {
commandBuffer.draw(count, 1, first, 0); commandBuffer.draw(count, instanceCount, first, 0);
} }
}, vk::Rect2D{ }, vk::Rect2D{
.extent = activeColorRenderTargets.empty() ? depthRenderTarget.guest.dimensions : activeColorRenderTargets.front()->texture->dimensions, .extent = activeColorRenderTargets.empty() ? depthRenderTarget.guest.dimensions : activeColorRenderTargets.front()->texture->dimensions,
}, {}, activeColorRenderTargets, depthRenderTargetView, !gpu.traits.quirks.relaxedRenderPassCompatibility); }, {}, activeColorRenderTargets, depthRenderTargetView, !gpu.traits.quirks.relaxedRenderPassCompatibility);
} }
void DrawVertex(u32 vertexCount, u32 firstVertex) { void Draw(u32 vertexCount, u32 firstVertex, u32 instanceCount) {
Draw<false>(vertexCount, firstVertex); if (needsQuadConversion)
Draw<true>(vertexCount, firstVertex, instanceCount);
else
Draw<false>(vertexCount, firstVertex, instanceCount);
} }
void DrawIndexed(u32 indexCount, u32 firstIndex, i32 vertexOffset) { void DrawIndexed(u32 indexCount, u32 firstIndex, u32 instanceCount, i32 vertexOffset) {
Draw<true>(indexCount, firstIndex, vertexOffset); Draw<true>(indexCount, firstIndex, instanceCount, vertexOffset);
} }
}; };
} }

View File

@ -539,7 +539,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d::type {
u32 raw; u32 raw;
struct { struct {
PrimitiveTopology topology; PrimitiveTopology topology;
u16 pad : 12; u16 pad : 10;
bool instanceNext : 1; bool instanceNext : 1;
bool instanceContinue : 1; bool instanceContinue : 1;
}; };

View File

@ -17,6 +17,18 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
InitializeRegisters(); InitializeRegisters();
} }
void Maxwell3D::FlushDeferredDraw() {
if (deferredDraw.pending) {
if (deferredDraw.indexed)
context.DrawIndexed(deferredDraw.drawCount, deferredDraw.drawFirst, deferredDraw.instanceCount, deferredDraw.drawBaseVertex);
else
context.Draw(deferredDraw.drawCount, deferredDraw.drawFirst, deferredDraw.instanceCount);
deferredDraw.pending = false;
deferredDraw.instanceCount = 1;
}
}
void Maxwell3D::HandleMethod(u32 method, u32 argument) { void Maxwell3D::HandleMethod(u32 method, u32 argument) {
if (method != ENGINE_STRUCT_OFFSET(mme, shadowRamControl)) { if (method != ENGINE_STRUCT_OFFSET(mme, shadowRamControl)) {
if (shadowRegisters.mme->shadowRamControl == type::MmeShadowRamControl::MethodTrack || shadowRegisters.mme->shadowRamControl == type::MmeShadowRamControl::MethodTrackWithFilter) if (shadowRegisters.mme->shadowRamControl == type::MmeShadowRamControl::MethodTrack || shadowRegisters.mme->shadowRamControl == type::MmeShadowRamControl::MethodTrackWithFilter)
@ -49,6 +61,45 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
} }
} }
// See DeferredDrawState comment for full details
if (deferredDraw.pending) {
switch (method) {
ENGINE_CASE(vertexBeginGl, {
if (deferredDraw.drawTopology != vertexBeginGl.topology && !vertexBeginGl.instanceContinue)
Logger::Warn("Vertex topology changed partway through instanced draw!");
if (vertexBeginGl.instanceNext) {
deferredDraw.instanceCount++;
} else if (vertexBeginGl.instanceContinue) {
FlushDeferredDraw();
break; // This instanced draw is finished, continue on to handle the actual method
}
return;
})
// Can be ignored since we handle drawing in draw{Vertex,Index}Count
ENGINE_CASE(vertexEndGl, { return; })
// Draws here can be ignored since they're just repeats of the original instanced draw
ENGINE_CASE(drawVertexCount, {
if (!redundant)
Logger::Warn("Vertex count changed partway through instanced draw!");
return;
})
ENGINE_CASE(drawIndexCount, {
if (!redundant)
Logger::Warn("Index count changed partway through instanced draw!");
return;
})
// Once we stop calling draw methods flush the current draw since drawing is dependent on the register state not changing
default:
FlushDeferredDraw();
break;
}
}
if (!redundant) { if (!redundant) {
switch (method) { switch (method) {
ENGINE_STRUCT_CASE(mme, shadowRamControl, { ENGINE_STRUCT_CASE(mme, shadowRamControl, {
@ -514,10 +565,6 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
static_assert(type::ShaderStageCount == 6 && type::ShaderStageCount < BOOST_PP_LIMIT_REPEAT); static_assert(type::ShaderStageCount == 6 && type::ShaderStageCount < BOOST_PP_LIMIT_REPEAT);
#undef SET_SHADER_ENABLE_CALLBACK #undef SET_SHADER_ENABLE_CALLBACK
ENGINE_CASE(vertexBeginGl, {
context.SetPrimitiveTopology(vertexBeginGl.topology);
})
ENGINE_CASE(primitiveRestartEnable, { ENGINE_CASE(primitiveRestartEnable, {
context.SetPrimitiveRestartEnabled(primitiveRestartEnable); context.SetPrimitiveRestartEnabled(primitiveRestartEnable);
}) })
@ -620,15 +667,24 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
context.ClearBuffers(clearBuffers); context.ClearBuffers(clearBuffers);
}) })
ENGINE_CASE(vertexBeginGl, {
context.SetPrimitiveTopology(vertexBeginGl.topology);
// If we reach here then we aren't in a deferred draw so theres no need to flush anything
if (vertexBeginGl.instanceNext)
deferredDraw.instanceCount++;
else if (vertexBeginGl.instanceContinue)
deferredDraw.instanceCount = 1;
})
ENGINE_CASE(drawVertexCount, { ENGINE_CASE(drawVertexCount, {
if (context.needsQuadConversion) // Defer the draw until the first non-draw operation to allow for detecting instanced draws (see DeferredDrawState comment)
context.DrawIndexed(drawVertexCount, *registers.drawVertexFirst, 0); deferredDraw.Set(drawVertexCount, *registers.drawVertexFirst, 0, registers.vertexBeginGl->topology, false);
else
context.DrawVertex(drawVertexCount, *registers.drawVertexFirst);
}) })
ENGINE_CASE(drawIndexCount, { ENGINE_CASE(drawIndexCount, {
context.DrawIndexed(drawIndexCount, *registers.drawIndexFirst, *registers.drawBaseVertex); // Defer the draw until the first non-draw operation to allow for detecting instanced draws (see DeferredDrawState comment)
deferredDraw.Set(drawIndexCount, *registers.drawIndexFirst, *registers.drawBaseVertex, registers.vertexBeginGl->topology, true);
}) })
ENGINE_STRUCT_CASE(semaphore, info, { ENGINE_STRUCT_CASE(semaphore, info, {
@ -725,6 +781,8 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
context.ConstantBufferUpdate(std::move(batchConstantBufferUpdate.buffer), batchConstantBufferUpdate.startOffset); context.ConstantBufferUpdate(std::move(batchConstantBufferUpdate.buffer), batchConstantBufferUpdate.startOffset);
batchConstantBufferUpdate.Reset(); batchConstantBufferUpdate.Reset();
} }
FlushDeferredDraw();
} }
__attribute__((always_inline)) void Maxwell3D::CallMethod(u32 method, u32 argument) { __attribute__((always_inline)) void Maxwell3D::CallMethod(u32 method, u32 argument) {

View File

@ -1,5 +1,6 @@
// SPDX-License-Identifier: MPL-2.0 // SPDX-License-Identifier: MPL-2.0
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/) // Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
// Copyright © 2022 Ryujinx Team and Contributors (https://github.com/Ryujinx/)
// Copyright © 2018-2020 fincs (https://github.com/devkitPro/deko3d) // Copyright © 2018-2020 fincs (https://github.com/devkitPro/deko3d)
#pragma once #pragma once
@ -36,6 +37,33 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
} }
} batchConstantBufferUpdate; //!< Holds state for updating constant buffer data in a batch rather than word by word } batchConstantBufferUpdate; //!< Holds state for updating constant buffer data in a batch rather than word by word
/**
* @brief In the Maxwell 3D engine, instanced draws are implemented by repeating the exact same draw in sequence with special flag set in vertexBeginGl. This flag allows either incrementing the instance counter or resetting it, since we need to supply an instance count to the host API we defer all draws until state changes occur. If there are no state changes between draws we can skip them and count the occurences to get the number of instances to draw.
*/
struct DeferredDrawState {
bool pending;
bool indexed; //!< If the deferred draw is indexed
type::PrimitiveTopology drawTopology; //!< Topology of draw at draw time
u32 instanceCount{1}; //!< Number of instances in the final draw
u32 drawCount; //!< indexed ? drawIndexCount : drawVertexCount
u32 drawFirst; //!< indexed ? drawIndexFirst : drawVertexFirst
i32 drawBaseVertex; //!< Only applicable to indexed draws
/**
* @brief Sets up the state necessary to defer a new draw
*/
void Set(u32 pDrawCount, u32 pDrawFirst, i32 pDrawBaseVertex, type::PrimitiveTopology pDrawTopology, bool pIndexed) {
pending = true;
indexed = pIndexed;
drawTopology = pDrawTopology;
drawCount = pDrawCount;
drawFirst = pDrawFirst;
drawBaseVertex = pDrawBaseVertex;
}
} deferredDraw{};
void FlushDeferredDraw();
/** /**
* @brief Calls the appropriate function corresponding to a certain method with the supplied argument * @brief Calls the appropriate function corresponding to a certain method with the supplied argument
*/ */