diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp index 6c4b5db5..81f295c3 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp @@ -672,23 +672,37 @@ namespace skyline::gpu::interconnect { executionTag = AllocateTag(); - if (!slot->nodes.empty()) { - TRACE_EVENT("gpu", "CommandExecutor::Submit"); + // Ensure all pushed callbacks wait for the submission to have finished GPU execution + if (!slot->nodes.empty()) + waiterThread.Queue(cycle, {}); - if (callback && *state.settings->useDirectMemoryImport) - waiterThread.Queue(cycle, std::move(callback)); - else - waiterThread.Queue(cycle, {}); + if (*state.settings->useDirectMemoryImport) { + // When DMI is in use, callbacks and deferred actions should be executed in sequence with the host GPU + for (auto &actionCb : pendingDeferredActions) + waiterThread.Queue(nullptr, std::move(actionCb)); - SubmitInternal(); - submissionNumber++; - } else { - if (callback && *state.settings->useDirectMemoryImport) + pendingDeferredActions.clear(); + + if (callback) waiterThread.Queue(nullptr, std::move(callback)); } - if (callback && !*state.settings->useDirectMemoryImport) - callback(); + if (!slot->nodes.empty()) { + TRACE_EVENT("gpu", "CommandExecutor::Submit"); + SubmitInternal(); + submissionNumber++; + } + + if (!*state.settings->useDirectMemoryImport) { + // When DMI is not in use, execute callbacks immediately after submission + for (auto &actionCb : pendingDeferredActions) + actionCb(); + + pendingDeferredActions.clear(); + + if (callback) + callback(); + } ResetInternal(); @@ -710,6 +724,10 @@ namespace skyline::gpu::interconnect { } } + void CommandExecutor::AddDeferredAction(std::function &&callback) { + pendingDeferredActions.emplace_back(std::move(callback)); + } + void CommandExecutor::LockPreserve() { if (!preserveLocked) { preserveLocked = true; diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h index 1d8cde1f..ec89fa20 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h @@ -202,6 +202,8 @@ namespace skyline::gpu::interconnect { std::vector> flushCallbacks; //!< Set of persistent callbacks that will be called at the start of Execute in order to flush data required for recording std::vector> pipelineChangeCallbacks; //!< Set of persistent callbacks that will be called after any non-Maxwell 3D engine changes the active pipeline + std::vector> pendingDeferredActions; + u32 nextCheckpointId{}; //!< The ID of the next debug checkpoint to be allocated void RotateRecordSlot(); @@ -372,6 +374,11 @@ namespace skyline::gpu::interconnect { */ void Submit(std::function &&callback = {}, bool wait = false); + /** + * @brief Adds an action to be executed upon current cycle completion (if DMI is on, otherwise after submission) + */ + void AddDeferredAction(std::function &&callback); + /** * @brief Locks all preserve attached buffers/textures * @note This **MUST** be called before attaching any buffers/textures to an execution diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/gpfifo.cpp b/app/src/main/cpp/skyline/soc/gm20b/engines/gpfifo.cpp index ae59d9f4..67c47f5f 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/gpfifo.cpp +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/gpfifo.cpp @@ -19,7 +19,7 @@ namespace skyline::soc::gm20b::engine { ENGINE_STRUCT_CASE(syncpoint, action, { if (action.operation == Registers::Syncpoint::Operation::Incr) { Logger::Debug("Increment syncpoint: {}", +action.index); - channelCtx.executor.Submit([=, syncpoints = &this->syncpoints, index = action.index]() { + channelCtx.executor.AddDeferredAction([=, syncpoints = &this->syncpoints, index = action.index]() { syncpoints->at(index).host.Increment(); }); syncpoints.at(action.index).guest.Increment(); @@ -50,7 +50,7 @@ namespace skyline::soc::gm20b::engine { channelCtx.Lock(); break; case Registers::Semaphore::Operation::Release: - channelCtx.executor.Submit([this, action, address, payload = registers.semaphore->payload] () { + channelCtx.executor.AddDeferredAction([this, action, address, payload = registers.semaphore->payload] () { // Write timestamp first to ensure ordering if (action.releaseSize == Registers::Semaphore::ReleaseSize::SixteenBytes) { channelCtx.asCtx->gmmu.Write(address + 4, 0); @@ -121,7 +121,7 @@ namespace skyline::soc::gm20b::engine { channelCtx.executor.AddFullBarrier(); }) ENGINE_CASE(setReference, { - channelCtx.executor.Submit(); + channelCtx.executor.AddFullBarrier(); }) } }; diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp index be2af8d6..5cc1cbaa 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp @@ -264,7 +264,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d { ENGINE_CASE(syncpointAction, { Logger::Debug("Increment syncpoint: {}", static_cast(syncpointAction.id)); - channelCtx.executor.Submit([=, syncpoints = &this->syncpoints, index = syncpointAction.id]() { + channelCtx.executor.AddDeferredAction([=, syncpoints = &this->syncpoints, index = syncpointAction.id]() { syncpoints->at(index).host.Increment(); }); syncpoints.at(syncpointAction.id).guest.Increment(); @@ -399,7 +399,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d { switch (info.op) { case type::SemaphoreInfo::Op::Release: - channelCtx.executor.Submit([=, this, semaphore = *registers.semaphore]() { + channelCtx.executor.AddDeferredAction([=, this, semaphore = *registers.semaphore]() { WriteSemaphoreResult(semaphore, semaphore.payload); }); break; @@ -407,7 +407,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d { case type::SemaphoreInfo::Op::Counter: { switch (info.counterType) { case type::SemaphoreInfo::CounterType::Zero: - channelCtx.executor.Submit([=, this, semaphore = *registers.semaphore]() { + channelCtx.executor.AddDeferredAction([=, this, semaphore = *registers.semaphore]() { WriteSemaphoreResult(semaphore, semaphore.payload); }); break;