From 579a2d9337bd884501012b121d2072940eaf0c3f Mon Sep 17 00:00:00 2001 From: Billy Laws Date: Sat, 19 Nov 2022 18:11:24 +0000 Subject: [PATCH] Add dynamic executor slot growth --- .../main/cpp/skyline/common/circular_queue.h | 4 ++-- .../gpu/interconnect/command_executor.cpp | 22 ++++++++++++++----- .../gpu/interconnect/command_executor.h | 3 +++ .../nvdrv/devices/nvhost/host1x_channel.cpp | 6 ++++- 4 files changed, 27 insertions(+), 8 deletions(-) diff --git a/app/src/main/cpp/skyline/common/circular_queue.h b/app/src/main/cpp/skyline/common/circular_queue.h index bdbd3836..1e536e38 100644 --- a/app/src/main/cpp/skyline/common/circular_queue.h +++ b/app/src/main/cpp/skyline/common/circular_queue.h @@ -124,9 +124,9 @@ namespace skyline { * @param tranformation A function that takes in an item of TransformedType as input and returns an item of Type */ template - void AppendTranform(span buffer, Transformation transformation) { + void AppendTranform(TransformedType &container, Transformation transformation) { std::unique_lock lock(productionMutex); - for (auto &item : buffer) { + for (auto &item : container) { auto next{end + 1}; next = (next == reinterpret_cast(vector.end().base())) ? reinterpret_cast(vector.begin().base()) : next; if (next == start) { diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp index af239a77..916d9efd 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp @@ -55,8 +55,13 @@ namespace skyline::gpu::interconnect { ready{other.ready} {} std::shared_ptr CommandRecordThread::Slot::Reset(GPU &gpu) { + auto startTime{util::GetTimeNs()}; + cycle->Wait(); cycle = std::make_shared(*cycle); + if (util::GetTimeNs() - startTime > GrowThresholdNs) + didWait = true; + // Command buffer doesn't need to be reset since that's done implicitly by begin return cycle; } @@ -126,10 +131,7 @@ namespace skyline::gpu::interconnect { Logger::Warn("Failed to intialise RenderDoc API: {}", ret); } - std::vector slots{}; - std::generate_n(std::back_inserter(slots), (1U << *state.settings->executorSlotCountScale), [&] () -> Slot { return gpu; }); - - outgoing.AppendTranform(span(slots), [](auto &slot) { return &slot; }); + outgoing.Push(&slots.emplace_back(gpu)); if (int result{pthread_setname_np(pthread_self(), "Sky-CmdRecord")}) Logger::Warn("Failed to set the thread name: {}", strerror(result)); @@ -148,6 +150,11 @@ namespace skyline::gpu::interconnect { renderDocApi->EndFrameCapture(RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(instance), nullptr); slot->capture = false; + if (slot->didWait && slots.size() < (1U << *state.settings->executorSlotCountScale)) { + outgoing.Push(&slots.emplace_back(gpu)); + slot->didWait = false; + } + outgoing.Push(slot); }, [] {}); } catch (const signal::SignalException &e) { @@ -166,7 +173,12 @@ namespace skyline::gpu::interconnect { } CommandRecordThread::Slot *CommandRecordThread::AcquireSlot() { - return outgoing.Pop(); + auto startTime{util::GetTimeNs()}; + auto slot{outgoing.Pop()}; + if (util::GetTimeNs() - startTime > GrowThresholdNs) + slot->didWait = true; + + return slot; } void CommandRecordThread::ReleaseSlot(Slot *slot) { diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h index 521a53ef..5dd470c7 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h @@ -42,6 +42,7 @@ namespace skyline::gpu::interconnect { u32 executionNumber; bool ready{}; //!< If this slot's command buffer has had 'beginCommandBuffer' called and is ready to have commands recorded into it bool capture{}; //!< If this slot's Vulkan commands should be captured using the renderdoc API + bool didWait{}; //!< If a wait of time longer than GrowThresholdNs occured when this slot was acquired Slot(GPU &gpu); @@ -62,9 +63,11 @@ namespace skyline::gpu::interconnect { }; private: + static constexpr size_t GrowThresholdNs{constant::NsInMillisecond / 4}; //!< The wait time threshold at which the slot count will be increased const DeviceState &state; CircularQueue incoming; //!< Slots pending recording CircularQueue outgoing; //!< Slots that have been submitted, may still be active on the GPU + std::list slots; std::thread thread; diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/host1x_channel.cpp b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/host1x_channel.cpp index 80c40d99..ebc3db4a 100644 --- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/host1x_channel.cpp +++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/host1x_channel.cpp @@ -3,6 +3,7 @@ #include #include +#include #include "host1x_channel.h" namespace skyline::service::nvdrv::device::nvhost { @@ -38,6 +39,9 @@ namespace skyline::service::nvdrv::device::nvhost { for (size_t i{}; i < syncpointIncrs.size(); i++) { const auto &incr{syncpointIncrs[i]}; + for (size_t j{}; j < incr.numIncrs; j++) + state.soc->host1x.syncpoints[incr.syncpointId].Increment(); + u32 max{core.syncpointManager.IncrementSyncpointMaxExt(incr.syncpointId, incr.numIncrs)}; if (i < fenceThresholds.size()) fenceThresholds[i] = max; @@ -52,7 +56,7 @@ namespace skyline::service::nvdrv::device::nvhost { Logger::Debug("Submit gather, CPU address: 0x{:X}, words: 0x{:X}", gatherAddress, cmdBuf.words); span gather(reinterpret_cast(gatherAddress), cmdBuf.words); - state.soc->host1x.channels[static_cast(channelType)].Push(gather); + // state.soc->host1x.channels[static_cast(channelType)].Push(gather); } return PosixResult::Success;