Avoid submitting executions on semaphore incrs

This avoids breaking RPs which helps perf, and since we have our own sync logic we don't need to match the guest here.
2025-01-03 14:21:53 +01:00 · 2023-03-25 22:48:43 +00:00 · 2023-03-25 22:48:43 +00:00 · 737fb2207d
commit 737fb2207d
parent 99a7b77948
4 changed files with 43 additions and 18 deletions
--- a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp
+++ b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp
@ -672,23 +672,37 @@ namespace skyline::gpu::interconnect {

        executionTag = AllocateTag();

-        if (!slot->nodes.empty()) {
-            TRACE_EVENT("gpu", "CommandExecutor::Submit");
+        // Ensure all pushed callbacks wait for the submission to have finished GPU execution
+        if (!slot->nodes.empty())
+            waiterThread.Queue(cycle, {});

-            if (callback && *state.settings->useDirectMemoryImport)
-                waiterThread.Queue(cycle, std::move(callback));
-            else
-                waiterThread.Queue(cycle, {});
+        if (*state.settings->useDirectMemoryImport) {
+            // When DMI is in use, callbacks and deferred actions should be executed in sequence with the host GPU
+            for (auto &actionCb : pendingDeferredActions)
+                waiterThread.Queue(nullptr, std::move(actionCb));

-            SubmitInternal();
-            submissionNumber++;
-        } else {
-            if (callback && *state.settings->useDirectMemoryImport)
+            pendingDeferredActions.clear();
+
+            if (callback)
                waiterThread.Queue(nullptr, std::move(callback));
        }

-        if (callback && !*state.settings->useDirectMemoryImport)
-            callback();
+        if (!slot->nodes.empty()) {
+            TRACE_EVENT("gpu", "CommandExecutor::Submit");
+            SubmitInternal();
+            submissionNumber++;
+        }
+
+        if (!*state.settings->useDirectMemoryImport) {
+            // When DMI is not in use, execute callbacks immediately after submission
+            for (auto &actionCb : pendingDeferredActions)
+                actionCb();
+
+            pendingDeferredActions.clear();
+
+            if (callback)
+                callback();
+        }

        ResetInternal();

@ -710,6 +724,10 @@ namespace skyline::gpu::interconnect {
        }
    }

+    void CommandExecutor::AddDeferredAction(std::function<void()> &&callback) {
+        pendingDeferredActions.emplace_back(std::move(callback));
+    }
+
    void CommandExecutor::LockPreserve() {
        if (!preserveLocked) {
            preserveLocked = true;
--- a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h
+++ b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h
@ -202,6 +202,8 @@ namespace skyline::gpu::interconnect {
        std::vector<std::function<void()>> flushCallbacks; //!< Set of persistent callbacks that will be called at the start of Execute in order to flush data required for recording
        std::vector<std::function<void()>> pipelineChangeCallbacks; //!< Set of persistent callbacks that will be called after any non-Maxwell 3D engine changes the active pipeline

+        std::vector<std::function<void()>> pendingDeferredActions;
+
        u32 nextCheckpointId{}; //!< The ID of the next debug checkpoint to be allocated

        void RotateRecordSlot();
@ -372,6 +374,11 @@ namespace skyline::gpu::interconnect {
         */
        void Submit(std::function<void()> &&callback = {}, bool wait = false);

+        /**
+         * @brief Adds an action to be executed upon current cycle completion (if DMI is on, otherwise after submission)
+         */
+        void AddDeferredAction(std::function<void()> &&callback);
+
        /**
         * @brief Locks all preserve attached buffers/textures
         * @note This **MUST** be called before attaching any buffers/textures to an execution
--- a/app/src/main/cpp/skyline/soc/gm20b/engines/gpfifo.cpp
+++ b/app/src/main/cpp/skyline/soc/gm20b/engines/gpfifo.cpp
@ -19,7 +19,7 @@ namespace skyline::soc::gm20b::engine {
            ENGINE_STRUCT_CASE(syncpoint, action, {
                if (action.operation == Registers::Syncpoint::Operation::Incr) {
                    Logger::Debug("Increment syncpoint: {}", +action.index);
-                    channelCtx.executor.Submit([=, syncpoints = &this->syncpoints, index = action.index]() {
+                    channelCtx.executor.AddDeferredAction([=, syncpoints = &this->syncpoints, index = action.index]() {
                        syncpoints->at(index).host.Increment();
                    });
                    syncpoints.at(action.index).guest.Increment();
@ -50,7 +50,7 @@ namespace skyline::soc::gm20b::engine {
                        channelCtx.Lock();
                        break;
                    case Registers::Semaphore::Operation::Release:
-                        channelCtx.executor.Submit([this, action, address, payload = registers.semaphore->payload] () {
+                        channelCtx.executor.AddDeferredAction([this, action, address, payload = registers.semaphore->payload] () {
                            // Write timestamp first to ensure ordering
                            if (action.releaseSize == Registers::Semaphore::ReleaseSize::SixteenBytes) {
                                channelCtx.asCtx->gmmu.Write<u32>(address + 4, 0);
@ -121,7 +121,7 @@ namespace skyline::soc::gm20b::engine {
                channelCtx.executor.AddFullBarrier();
            })
            ENGINE_CASE(setReference, {
-                channelCtx.executor.Submit();
+                channelCtx.executor.AddFullBarrier();
            })
        }
    };
--- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp
+++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp
@ -264,7 +264,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d {

            ENGINE_CASE(syncpointAction, {
                Logger::Debug("Increment syncpoint: {}", static_cast<u16>(syncpointAction.id));
-                channelCtx.executor.Submit([=, syncpoints = &this->syncpoints, index = syncpointAction.id]() {
+                channelCtx.executor.AddDeferredAction([=, syncpoints = &this->syncpoints, index = syncpointAction.id]() {
                    syncpoints->at(index).host.Increment();
                });
                syncpoints.at(syncpointAction.id).guest.Increment();
@ -399,7 +399,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d {

                switch (info.op) {
                    case type::SemaphoreInfo::Op::Release:
-                        channelCtx.executor.Submit([=, this, semaphore = *registers.semaphore]() {
+                        channelCtx.executor.AddDeferredAction([=, this, semaphore = *registers.semaphore]() {
                            WriteSemaphoreResult(semaphore, semaphore.payload);
                        });
                        break;
@ -407,7 +407,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
                    case type::SemaphoreInfo::Op::Counter: {
                        switch (info.counterType) {
                            case type::SemaphoreInfo::CounterType::Zero:
-                                channelCtx.executor.Submit([=, this, semaphore = *registers.semaphore]() {
+                                channelCtx.executor.AddDeferredAction([=, this, semaphore = *registers.semaphore]() {
                                    WriteSemaphoreResult(semaphore, semaphore.payload);
                                });
                                break;