Avoid submitting executions on semaphore incrs

This avoids breaking RPs which helps perf, and since we have our own sync logic we don't need to match the guest here.
This commit is contained in:
Billy Laws 2023-03-25 22:48:43 +00:00
parent 99a7b77948
commit 737fb2207d
4 changed files with 43 additions and 18 deletions

View File

@ -672,23 +672,37 @@ namespace skyline::gpu::interconnect {
executionTag = AllocateTag();
if (!slot->nodes.empty()) {
TRACE_EVENT("gpu", "CommandExecutor::Submit");
// Ensure all pushed callbacks wait for the submission to have finished GPU execution
if (!slot->nodes.empty())
waiterThread.Queue(cycle, {});
if (callback && *state.settings->useDirectMemoryImport)
waiterThread.Queue(cycle, std::move(callback));
else
waiterThread.Queue(cycle, {});
if (*state.settings->useDirectMemoryImport) {
// When DMI is in use, callbacks and deferred actions should be executed in sequence with the host GPU
for (auto &actionCb : pendingDeferredActions)
waiterThread.Queue(nullptr, std::move(actionCb));
SubmitInternal();
submissionNumber++;
} else {
if (callback && *state.settings->useDirectMemoryImport)
pendingDeferredActions.clear();
if (callback)
waiterThread.Queue(nullptr, std::move(callback));
}
if (callback && !*state.settings->useDirectMemoryImport)
callback();
if (!slot->nodes.empty()) {
TRACE_EVENT("gpu", "CommandExecutor::Submit");
SubmitInternal();
submissionNumber++;
}
if (!*state.settings->useDirectMemoryImport) {
// When DMI is not in use, execute callbacks immediately after submission
for (auto &actionCb : pendingDeferredActions)
actionCb();
pendingDeferredActions.clear();
if (callback)
callback();
}
ResetInternal();
@ -710,6 +724,10 @@ namespace skyline::gpu::interconnect {
}
}
void CommandExecutor::AddDeferredAction(std::function<void()> &&callback) {
pendingDeferredActions.emplace_back(std::move(callback));
}
void CommandExecutor::LockPreserve() {
if (!preserveLocked) {
preserveLocked = true;

View File

@ -202,6 +202,8 @@ namespace skyline::gpu::interconnect {
std::vector<std::function<void()>> flushCallbacks; //!< Set of persistent callbacks that will be called at the start of Execute in order to flush data required for recording
std::vector<std::function<void()>> pipelineChangeCallbacks; //!< Set of persistent callbacks that will be called after any non-Maxwell 3D engine changes the active pipeline
std::vector<std::function<void()>> pendingDeferredActions;
u32 nextCheckpointId{}; //!< The ID of the next debug checkpoint to be allocated
void RotateRecordSlot();
@ -372,6 +374,11 @@ namespace skyline::gpu::interconnect {
*/
void Submit(std::function<void()> &&callback = {}, bool wait = false);
/**
* @brief Adds an action to be executed upon current cycle completion (if DMI is on, otherwise after submission)
*/
void AddDeferredAction(std::function<void()> &&callback);
/**
* @brief Locks all preserve attached buffers/textures
* @note This **MUST** be called before attaching any buffers/textures to an execution

View File

@ -19,7 +19,7 @@ namespace skyline::soc::gm20b::engine {
ENGINE_STRUCT_CASE(syncpoint, action, {
if (action.operation == Registers::Syncpoint::Operation::Incr) {
Logger::Debug("Increment syncpoint: {}", +action.index);
channelCtx.executor.Submit([=, syncpoints = &this->syncpoints, index = action.index]() {
channelCtx.executor.AddDeferredAction([=, syncpoints = &this->syncpoints, index = action.index]() {
syncpoints->at(index).host.Increment();
});
syncpoints.at(action.index).guest.Increment();
@ -50,7 +50,7 @@ namespace skyline::soc::gm20b::engine {
channelCtx.Lock();
break;
case Registers::Semaphore::Operation::Release:
channelCtx.executor.Submit([this, action, address, payload = registers.semaphore->payload] () {
channelCtx.executor.AddDeferredAction([this, action, address, payload = registers.semaphore->payload] () {
// Write timestamp first to ensure ordering
if (action.releaseSize == Registers::Semaphore::ReleaseSize::SixteenBytes) {
channelCtx.asCtx->gmmu.Write<u32>(address + 4, 0);
@ -121,7 +121,7 @@ namespace skyline::soc::gm20b::engine {
channelCtx.executor.AddFullBarrier();
})
ENGINE_CASE(setReference, {
channelCtx.executor.Submit();
channelCtx.executor.AddFullBarrier();
})
}
};

View File

@ -264,7 +264,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
ENGINE_CASE(syncpointAction, {
Logger::Debug("Increment syncpoint: {}", static_cast<u16>(syncpointAction.id));
channelCtx.executor.Submit([=, syncpoints = &this->syncpoints, index = syncpointAction.id]() {
channelCtx.executor.AddDeferredAction([=, syncpoints = &this->syncpoints, index = syncpointAction.id]() {
syncpoints->at(index).host.Increment();
});
syncpoints.at(syncpointAction.id).guest.Increment();
@ -399,7 +399,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
switch (info.op) {
case type::SemaphoreInfo::Op::Release:
channelCtx.executor.Submit([=, this, semaphore = *registers.semaphore]() {
channelCtx.executor.AddDeferredAction([=, this, semaphore = *registers.semaphore]() {
WriteSemaphoreResult(semaphore, semaphore.payload);
});
break;
@ -407,7 +407,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
case type::SemaphoreInfo::Op::Counter: {
switch (info.counterType) {
case type::SemaphoreInfo::CounterType::Zero:
channelCtx.executor.Submit([=, this, semaphore = *registers.semaphore]() {
channelCtx.executor.AddDeferredAction([=, this, semaphore = *registers.semaphore]() {
WriteSemaphoreResult(semaphore, semaphore.payload);
});
break;