Submit GPFIFO work prior to CircularQueue waiting

The position at which we call submit is a significant factor in performance and we did so at the end of PBs (PushBuffers), this isn't optimal as there could be multiple PBs queued up that would benefit from being in the same submission. We now delay the submission of the workload till we run out of PBs.
This commit is contained in:
Billy Laws 2022-07-16 21:55:31 +05:30 committed by PixelyIon
parent 3ac5ed8c06
commit 561103d3da
No known key found for this signature in database
GPG Key ID: 11BC6C3201BC2C05
4 changed files with 16 additions and 8 deletions

View File

@ -49,9 +49,10 @@ namespace skyline {
/** /**
* @brief A blocking for-each that runs on every item and waits till new items to run on them as well * @brief A blocking for-each that runs on every item and waits till new items to run on them as well
* @param function A function that is called for each item (with the only parameter as a reference to that item) * @param function A function that is called for each item (with the only parameter as a reference to that item)
* @param preWait An optional function that's called prior to waiting on more items to be queued
*/ */
template<typename F> template<typename F1, typename F2>
[[noreturn]] void Process(F function) { [[noreturn]] void Process(F1 function, F2 preWait) {
TRACE_EVENT_BEGIN("containers", "CircularQueue::Process"); TRACE_EVENT_BEGIN("containers", "CircularQueue::Process");
while (true) { while (true) {
@ -59,6 +60,7 @@ namespace skyline {
std::unique_lock lock(productionMutex); std::unique_lock lock(productionMutex);
TRACE_EVENT_END("containers"); TRACE_EVENT_END("containers");
preWait();
produceCondition.wait(lock, [this]() { return start != end; }); produceCondition.wait(lock, [this]() { return start != end; });
TRACE_EVENT_BEGIN("containers", "CircularQueue::Process"); TRACE_EVENT_BEGIN("containers", "CircularQueue::Process");
} }

View File

@ -224,10 +224,10 @@ namespace skyline::gpu {
try { try {
signal::SetSignalHandler({SIGINT, SIGILL, SIGTRAP, SIGBUS, SIGFPE, SIGSEGV}, signal::ExceptionalSignalHandler); signal::SetSignalHandler({SIGINT, SIGILL, SIGTRAP, SIGBUS, SIGFPE, SIGSEGV}, signal::ExceptionalSignalHandler);
presentQueue.Process([this](const PresentableFrame& frame) { presentQueue.Process([this](const PresentableFrame &frame) {
PresentFrame(frame); PresentFrame(frame);
frame.presentCallback(); // We're calling the callback here as it's outside of all the locks in PresentFrame frame.presentCallback(); // We're calling the callback here as it's outside of all the locks in PresentFrame
}); }, [] {});
} catch (const signal::SignalException &e) { } catch (const signal::SignalException &e) {
Logger::Error("{}\nStack Trace:{}", e.what(), state.loader->GetStackTrace(e.frames)); Logger::Error("{}\nStack Trace:{}", e.what(), state.loader->GetStackTrace(e.frames));
if (state.process) if (state.process)
@ -374,7 +374,7 @@ namespace skyline::gpu {
} }
} }
u64 PresentationEngine::Present(const std::shared_ptr<TextureView> &texture, i64 timestamp, i64 swapInterval, AndroidRect crop, NativeWindowScalingMode scalingMode, NativeWindowTransform transform, skyline::service::hosbinder::AndroidFence fence, const std::function<void()>& presentCallback) { u64 PresentationEngine::Present(const std::shared_ptr<TextureView> &texture, i64 timestamp, i64 swapInterval, AndroidRect crop, NativeWindowScalingMode scalingMode, NativeWindowTransform transform, skyline::service::hosbinder::AndroidFence fence, const std::function<void()> &presentCallback) {
if (!vkSurface.has_value()) { if (!vkSurface.has_value()) {
// We want this function to generally (not necessarily always) block when a surface is not present to implicitly pause the game // We want this function to generally (not necessarily always) block when a surface is not present to implicitly pause the game
std::unique_lock lock(mutex); std::unique_lock lock(mutex);

View File

@ -150,6 +150,10 @@ namespace skyline::soc::gm20b {
} }
void ChannelGpfifo::Process(GpEntry gpEntry) { void ChannelGpfifo::Process(GpEntry gpEntry) {
// Submit if required by the GpEntry, this is needed as some games dynamically generate pushbuffer contents
if (gpEntry.sync == GpEntry::Sync::Wait)
channelCtx.executor.Submit();
if (!gpEntry.size) { if (!gpEntry.size) {
// This is a GPFIFO control entry, all control entries have a zero length and contain no pushbuffers // This is a GPFIFO control entry, all control entries have a zero length and contain no pushbuffers
switch (gpEntry.opcode) { switch (gpEntry.opcode) {
@ -335,8 +339,6 @@ namespace skyline::soc::gm20b {
if (hitEnd) if (hitEnd)
break; break;
} }
channelCtx.executor.Submit();
} }
void ChannelGpfifo::Run() { void ChannelGpfifo::Run() {
@ -350,6 +352,10 @@ namespace skyline::soc::gm20b {
gpEntries.Process([this](GpEntry gpEntry) { gpEntries.Process([this](GpEntry gpEntry) {
Logger::Debug("Processing pushbuffer: 0x{:X}, Size: 0x{:X}", gpEntry.Address(), +gpEntry.size); Logger::Debug("Processing pushbuffer: 0x{:X}, Size: 0x{:X}", gpEntry.Address(), +gpEntry.size);
Process(gpEntry); Process(gpEntry);
}, [this]() {
// If we run out of GpEntries to process ensure we submit any remaining GPU work before waiting for more to arrive
Logger::Debug("Finished processing pushbuffer batch");
channelCtx.executor.Submit();
}); });
} catch (const signal::SignalException &e) { } catch (const signal::SignalException &e) {
if (e.signal != SIGINT) { if (e.signal != SIGINT) {

View File

@ -124,7 +124,7 @@ namespace skyline::soc::host1x {
gatherQueue.Process([this](span<u32> gather) { gatherQueue.Process([this](span<u32> gather) {
Logger::Debug("Processing pushbuffer: 0x{:X}, size: 0x{:X}", gather.data(), gather.size()); Logger::Debug("Processing pushbuffer: 0x{:X}, size: 0x{:X}", gather.data(), gather.size());
Process(gather); Process(gather);
}); }, [] {});
} catch (const signal::SignalException &e) { } catch (const signal::SignalException &e) {
if (e.signal != SIGINT) { if (e.signal != SIGINT) {
Logger::Error("{}\nStack Trace:{}", e.what(), state.loader->GetStackTrace(e.frames)); Logger::Error("{}\nStack Trace:{}", e.what(), state.loader->GetStackTrace(e.frames));