From 561103d3da26a1dc7b728c57b329a8d086f4df3d Mon Sep 17 00:00:00 2001 From: Billy Laws Date: Sat, 16 Jul 2022 21:55:31 +0530 Subject: [PATCH] Submit GPFIFO work prior to `CircularQueue` waiting The position at which we call submit is a significant factor in performance and we did so at the end of PBs (PushBuffers), this isn't optimal as there could be multiple PBs queued up that would benefit from being in the same submission. We now delay the submission of the workload till we run out of PBs. --- app/src/main/cpp/skyline/common/circular_queue.h | 6 ++++-- app/src/main/cpp/skyline/gpu/presentation_engine.cpp | 6 +++--- app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp | 10 ++++++++-- app/src/main/cpp/skyline/soc/host1x/command_fifo.cpp | 2 +- 4 files changed, 16 insertions(+), 8 deletions(-) diff --git a/app/src/main/cpp/skyline/common/circular_queue.h b/app/src/main/cpp/skyline/common/circular_queue.h index 1c030f05..cd209d50 100644 --- a/app/src/main/cpp/skyline/common/circular_queue.h +++ b/app/src/main/cpp/skyline/common/circular_queue.h @@ -49,9 +49,10 @@ namespace skyline { /** * @brief A blocking for-each that runs on every item and waits till new items to run on them as well * @param function A function that is called for each item (with the only parameter as a reference to that item) + * @param preWait An optional function that's called prior to waiting on more items to be queued */ - template - [[noreturn]] void Process(F function) { + template + [[noreturn]] void Process(F1 function, F2 preWait) { TRACE_EVENT_BEGIN("containers", "CircularQueue::Process"); while (true) { @@ -59,6 +60,7 @@ namespace skyline { std::unique_lock lock(productionMutex); TRACE_EVENT_END("containers"); + preWait(); produceCondition.wait(lock, [this]() { return start != end; }); TRACE_EVENT_BEGIN("containers", "CircularQueue::Process"); } diff --git a/app/src/main/cpp/skyline/gpu/presentation_engine.cpp b/app/src/main/cpp/skyline/gpu/presentation_engine.cpp index ac9355b4..6276f242 100644 --- a/app/src/main/cpp/skyline/gpu/presentation_engine.cpp +++ b/app/src/main/cpp/skyline/gpu/presentation_engine.cpp @@ -224,10 +224,10 @@ namespace skyline::gpu { try { signal::SetSignalHandler({SIGINT, SIGILL, SIGTRAP, SIGBUS, SIGFPE, SIGSEGV}, signal::ExceptionalSignalHandler); - presentQueue.Process([this](const PresentableFrame& frame) { + presentQueue.Process([this](const PresentableFrame &frame) { PresentFrame(frame); frame.presentCallback(); // We're calling the callback here as it's outside of all the locks in PresentFrame - }); + }, [] {}); } catch (const signal::SignalException &e) { Logger::Error("{}\nStack Trace:{}", e.what(), state.loader->GetStackTrace(e.frames)); if (state.process) @@ -374,7 +374,7 @@ namespace skyline::gpu { } } - u64 PresentationEngine::Present(const std::shared_ptr &texture, i64 timestamp, i64 swapInterval, AndroidRect crop, NativeWindowScalingMode scalingMode, NativeWindowTransform transform, skyline::service::hosbinder::AndroidFence fence, const std::function& presentCallback) { + u64 PresentationEngine::Present(const std::shared_ptr &texture, i64 timestamp, i64 swapInterval, AndroidRect crop, NativeWindowScalingMode scalingMode, NativeWindowTransform transform, skyline::service::hosbinder::AndroidFence fence, const std::function &presentCallback) { if (!vkSurface.has_value()) { // We want this function to generally (not necessarily always) block when a surface is not present to implicitly pause the game std::unique_lock lock(mutex); diff --git a/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp b/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp index c901d159..acf64fbb 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp +++ b/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp @@ -150,6 +150,10 @@ namespace skyline::soc::gm20b { } void ChannelGpfifo::Process(GpEntry gpEntry) { + // Submit if required by the GpEntry, this is needed as some games dynamically generate pushbuffer contents + if (gpEntry.sync == GpEntry::Sync::Wait) + channelCtx.executor.Submit(); + if (!gpEntry.size) { // This is a GPFIFO control entry, all control entries have a zero length and contain no pushbuffers switch (gpEntry.opcode) { @@ -335,8 +339,6 @@ namespace skyline::soc::gm20b { if (hitEnd) break; } - - channelCtx.executor.Submit(); } void ChannelGpfifo::Run() { @@ -350,6 +352,10 @@ namespace skyline::soc::gm20b { gpEntries.Process([this](GpEntry gpEntry) { Logger::Debug("Processing pushbuffer: 0x{:X}, Size: 0x{:X}", gpEntry.Address(), +gpEntry.size); Process(gpEntry); + }, [this]() { + // If we run out of GpEntries to process ensure we submit any remaining GPU work before waiting for more to arrive + Logger::Debug("Finished processing pushbuffer batch"); + channelCtx.executor.Submit(); }); } catch (const signal::SignalException &e) { if (e.signal != SIGINT) { diff --git a/app/src/main/cpp/skyline/soc/host1x/command_fifo.cpp b/app/src/main/cpp/skyline/soc/host1x/command_fifo.cpp index ff67e7a6..e265750d 100644 --- a/app/src/main/cpp/skyline/soc/host1x/command_fifo.cpp +++ b/app/src/main/cpp/skyline/soc/host1x/command_fifo.cpp @@ -124,7 +124,7 @@ namespace skyline::soc::host1x { gatherQueue.Process([this](span gather) { Logger::Debug("Processing pushbuffer: 0x{:X}, size: 0x{:X}", gather.data(), gather.size()); Process(gather); - }); + }, [] {}); } catch (const signal::SignalException &e) { if (e.signal != SIGINT) { Logger::Error("{}\nStack Trace:{}", e.what(), state.loader->GetStackTrace(e.frames));