From 38aad21d29c9e2990c7a14b988bb142a4c2838e0 Mon Sep 17 00:00:00 2001 From: Billy Laws Date: Thu, 29 Sep 2022 20:29:29 +0100 Subject: [PATCH] Share single flag variable for Maxwell3D batch draw/constant buffer update Slightly cheaper --- .../skyline/soc/gm20b/engines/maxwell_3d.cpp | 130 +++++++++--------- .../skyline/soc/gm20b/engines/maxwell_3d.h | 21 ++- 2 files changed, 77 insertions(+), 74 deletions(-) diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp index d38153d8..90fa51eb 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp @@ -70,8 +70,8 @@ namespace skyline::soc::gm20b::engine::maxwell3d { } __attribute__((always_inline)) void Maxwell3D::FlushDeferredDraw() { - if (deferredDraw.pending) { - deferredDraw.pending = false; + if (batchEnableState.drawActive) { + batchEnableState.drawActive = false; interconnect.Draw(deferredDraw.drawTopology, deferredDraw.indexed, deferredDraw.drawCount, deferredDraw.drawFirst, deferredDraw.instanceCount, deferredDraw.drawBaseVertex, deferredDraw.drawBaseInstance); deferredDraw.instanceCount = 1; } @@ -92,70 +92,70 @@ namespace skyline::soc::gm20b::engine::maxwell3d { bool redundant{registers.raw[method] == argument}; registers.raw[method] = argument; - // TODO COMBINE THESE - if (batchLoadConstantBuffer.Active()) { - switch (method) { - // Add to the batch constant buffer update buffer - // Return early here so that any code below can rely on the fact that any cbuf updates will always be the first of a batch - #define LOAD_CONSTANT_BUFFER_CALLBACKS(z, index, data_) \ - ENGINE_STRUCT_ARRAY_CASE(loadConstantBuffer, data, index, { \ - batchLoadConstantBuffer.buffer.push_back(argument); \ - registers.loadConstantBuffer->offset += 4; \ - return; \ - }) + if (batchEnableState.raw) { + if (batchEnableState.constantBufferActive) { + switch (method) { + // Add to the batch constant buffer update buffer + // Return early here so that any code below can rely on the fact that any cbuf updates will always be the first of a batch + #define LOAD_CONSTANT_BUFFER_CALLBACKS(z, index, data_) \ + ENGINE_STRUCT_ARRAY_CASE(loadConstantBuffer, data, index, { \ + batchLoadConstantBuffer.buffer.push_back(argument); \ + registers.loadConstantBuffer->offset += 4; \ + return; \ + }) - BOOST_PP_REPEAT(16, LOAD_CONSTANT_BUFFER_CALLBACKS, 0) - #undef LOAD_CONSTANT_BUFFER_CALLBACKS - default: - // When a method other than constant buffer update is called submit our submit the previously built-up update as a batch - interconnect.DisableQuickConstantBufferBind(); - interconnect.LoadConstantBuffer(batchLoadConstantBuffer.buffer, batchLoadConstantBuffer.Invalidate()); - batchLoadConstantBuffer.Reset(); - break; // Continue on here to handle the actual method - } - } else if (deferredDraw.pending) { // See DeferredDrawState comment for full details - switch (method) { - ENGINE_CASE(begin, { - if (begin.instanceId == Registers::Begin::InstanceId::Subsequent) { - if (deferredDraw.drawTopology != begin.op && - registers.primitiveTopologyControl->override == type::PrimitiveTopologyControl::Override::UseTopologyInBeginMethods) - Logger::Warn("Vertex topology changed partway through instanced draw!"); + BOOST_PP_REPEAT(16, LOAD_CONSTANT_BUFFER_CALLBACKS, 0) + #undef LOAD_CONSTANT_BUFFER_CALLBACKS + default: + // When a method other than constant buffer update is called submit our submit the previously built-up update as a batch + interconnect.DisableQuickConstantBufferBind(); + interconnect.LoadConstantBuffer(batchLoadConstantBuffer.buffer, batchLoadConstantBuffer.startOffset); + batchEnableState.constantBufferActive = false; + batchLoadConstantBuffer.Reset(); + break; // Continue on here to handle the actual method + } + } else if (batchEnableState.drawActive) { // See DeferredDrawState comment for full details + switch (method) { + ENGINE_CASE(begin, { + if (begin.instanceId == Registers::Begin::InstanceId::Subsequent) { + if (deferredDraw.drawTopology != begin.op && + registers.primitiveTopologyControl->override == type::PrimitiveTopologyControl::Override::UseTopologyInBeginMethods) + Logger::Warn("Vertex topology changed partway through instanced draw!"); - deferredDraw.instanceCount++; - } else { + deferredDraw.instanceCount++; + } else { + FlushDeferredDraw(); + break; // This instanced draw is finished, continue on to handle the next draw + } + + return; + }) + + // Can be ignored since we handle drawing in draw{Vertex,Index}Count + ENGINE_CASE(end, { return; }) + + // Draws here can be ignored since they're just repeats of the original instanced draw + ENGINE_CASE(drawVertexArray, { + if (!redundant) + Logger::Warn("Vertex count changed partway through instanced draw!"); + return; + }) + ENGINE_CASE(drawIndexBuffer, { + if (!redundant) + Logger::Warn("Index count changed partway through instanced draw!"); + return; + }) + + // Once we stop calling draw methods flush the current draw since drawing is dependent on the register state not changing + default: FlushDeferredDraw(); - break; // This instanced draw is finished, continue on to handle the next draw - } - - return; - }) - - // Can be ignored since we handle drawing in draw{Vertex,Index}Count - ENGINE_CASE(end, { return; }) - - // Draws here can be ignored since they're just repeats of the original instanced draw - ENGINE_CASE(drawVertexArray, { - if (!redundant) - Logger::Warn("Vertex count changed partway through instanced draw!"); - return; - }) - ENGINE_CASE(drawIndexBuffer, { - if (!redundant) - Logger::Warn("Index count changed partway through instanced draw!"); - return; - }) - - // Once we stop calling draw methods flush the current draw since drawing is dependent on the register state not changing - default: - FlushDeferredDraw(); - break; + break; + } } } - - if (!redundant) { + if (!redundant) dirtyManager.MarkDirty(method); - } switch (method) { ENGINE_STRUCT_CASE(mme, instructionRamLoad, { @@ -208,11 +208,13 @@ namespace skyline::soc::gm20b::engine::maxwell3d { ENGINE_STRUCT_CASE(drawVertexArray, count, { // Defer the draw until the first non-draw operation to allow for detecting instanced draws (see DeferredDrawState comment) deferredDraw.Set(count, *registers.vertexArrayStart, 0, *registers.globalBaseInstanceIndex, GetCurrentTopology(), false); + batchEnableState.drawActive = true; }) ENGINE_STRUCT_CASE(drawIndexBuffer, count, { // Defer the draw until the first non-draw operation to allow for detecting instanced draws (see DeferredDrawState comment) deferredDraw.Set(count, registers.indexBuffer->first, *registers.globalBaseVertexIndex, *registers.globalBaseInstanceIndex, GetCurrentTopology(), true); + batchEnableState.drawActive = true; }) ENGINE_STRUCT_CASE(semaphore, info, { @@ -251,8 +253,9 @@ namespace skyline::soc::gm20b::engine::maxwell3d { // Begin a batch constant buffer update, this case will never be reached if a batch update is currently active #define LOAD_CONSTANT_BUFFER_CALLBACKS(z, index, data_) \ ENGINE_STRUCT_ARRAY_CASE(loadConstantBuffer, data, index, { \ - batchLoadConstantBuffer.startOffset = registers.loadConstantBuffer->offset; \ - batchLoadConstantBuffer.buffer.push_back(data); \ + batchLoadConstantBuffer.startOffset = registers.loadConstantBuffer->offset; \ + batchLoadConstantBuffer.buffer.push_back(data); \ + batchEnableState.constantBufferActive = true; \ registers.loadConstantBuffer->offset += 4; \ }) @@ -296,8 +299,9 @@ namespace skyline::soc::gm20b::engine::maxwell3d { void Maxwell3D::FlushEngineState() { FlushDeferredDraw(); - if (batchLoadConstantBuffer.Active()) { - interconnect.LoadConstantBuffer(batchLoadConstantBuffer.buffer, batchLoadConstantBuffer.Invalidate()); + if (batchEnableState.constantBufferActive) { + interconnect.LoadConstantBuffer(batchLoadConstantBuffer.buffer, batchLoadConstantBuffer.startOffset); + batchEnableState.constantBufferActive = false; batchLoadConstantBuffer.Reset(); } diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.h b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.h index 7ac1e6d8..dfaedb5d 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.h +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.h @@ -27,17 +27,18 @@ namespace skyline::soc::gm20b::engine::maxwell3d { gpu::interconnect::maxwell3d::DirtyManager dirtyManager; gpu::interconnect::maxwell3d::Maxwell3D interconnect; + union BatchEnableState { + u8 raw{}; + + struct { + bool constantBufferActive : 1; + bool drawActive : 1; + }; + } batchEnableState{}; + struct BatchLoadConstantBufferState { std::vector buffer; - u32 startOffset{std::numeric_limits::max()}; - - bool Active() { - return startOffset != std::numeric_limits::max(); - } - - u32 Invalidate() { - return std::exchange(startOffset, std::numeric_limits::max()); - } + u32 startOffset{}; void Reset() { buffer.clear(); @@ -48,7 +49,6 @@ namespace skyline::soc::gm20b::engine::maxwell3d { * @brief In the Maxwell 3D engine, instanced draws are implemented by repeating the exact same draw in sequence with special flag set in vertexBeginGl. This flag allows either incrementing the instance counter or resetting it, since we need to supply an instance count to the host API we defer all draws until state changes occur. If there are no state changes between draws we can skip them and count the occurences to get the number of instances to draw. */ struct DeferredDrawState { - bool pending; bool indexed; //!< If the deferred draw is indexed type::DrawTopology drawTopology; //!< Topology of draw at draw time u32 instanceCount{1}; //!< Number of instances in the final draw @@ -61,7 +61,6 @@ namespace skyline::soc::gm20b::engine::maxwell3d { * @brief Sets up the state necessary to defer a new draw */ void Set(u32 pDrawCount, u32 pDrawFirst, u32 pDrawBaseVertex, u32 pDrawBaseInstance, type::DrawTopology pDrawTopology, bool pIndexed) { - pending = true; indexed = pIndexed; drawTopology = pDrawTopology; drawCount = pDrawCount;