mirror of
https://github.com/skyline-emu/skyline.git
synced 2024-12-23 17:51:52 +01:00
Add dynamic executor slot growth
This commit is contained in:
parent
60169fce4c
commit
579a2d9337
@ -124,9 +124,9 @@ namespace skyline {
|
|||||||
* @param tranformation A function that takes in an item of TransformedType as input and returns an item of Type
|
* @param tranformation A function that takes in an item of TransformedType as input and returns an item of Type
|
||||||
*/
|
*/
|
||||||
template<typename TransformedType, typename Transformation>
|
template<typename TransformedType, typename Transformation>
|
||||||
void AppendTranform(span <TransformedType> buffer, Transformation transformation) {
|
void AppendTranform(TransformedType &container, Transformation transformation) {
|
||||||
std::unique_lock lock(productionMutex);
|
std::unique_lock lock(productionMutex);
|
||||||
for (auto &item : buffer) {
|
for (auto &item : container) {
|
||||||
auto next{end + 1};
|
auto next{end + 1};
|
||||||
next = (next == reinterpret_cast<Type *>(vector.end().base())) ? reinterpret_cast<Type *>(vector.begin().base()) : next;
|
next = (next == reinterpret_cast<Type *>(vector.end().base())) ? reinterpret_cast<Type *>(vector.begin().base()) : next;
|
||||||
if (next == start) {
|
if (next == start) {
|
||||||
|
@ -55,8 +55,13 @@ namespace skyline::gpu::interconnect {
|
|||||||
ready{other.ready} {}
|
ready{other.ready} {}
|
||||||
|
|
||||||
std::shared_ptr<FenceCycle> CommandRecordThread::Slot::Reset(GPU &gpu) {
|
std::shared_ptr<FenceCycle> CommandRecordThread::Slot::Reset(GPU &gpu) {
|
||||||
|
auto startTime{util::GetTimeNs()};
|
||||||
|
|
||||||
cycle->Wait();
|
cycle->Wait();
|
||||||
cycle = std::make_shared<FenceCycle>(*cycle);
|
cycle = std::make_shared<FenceCycle>(*cycle);
|
||||||
|
if (util::GetTimeNs() - startTime > GrowThresholdNs)
|
||||||
|
didWait = true;
|
||||||
|
|
||||||
// Command buffer doesn't need to be reset since that's done implicitly by begin
|
// Command buffer doesn't need to be reset since that's done implicitly by begin
|
||||||
return cycle;
|
return cycle;
|
||||||
}
|
}
|
||||||
@ -126,10 +131,7 @@ namespace skyline::gpu::interconnect {
|
|||||||
Logger::Warn("Failed to intialise RenderDoc API: {}", ret);
|
Logger::Warn("Failed to intialise RenderDoc API: {}", ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<Slot> slots{};
|
outgoing.Push(&slots.emplace_back(gpu));
|
||||||
std::generate_n(std::back_inserter(slots), (1U << *state.settings->executorSlotCountScale), [&] () -> Slot { return gpu; });
|
|
||||||
|
|
||||||
outgoing.AppendTranform(span<Slot>(slots), [](auto &slot) { return &slot; });
|
|
||||||
|
|
||||||
if (int result{pthread_setname_np(pthread_self(), "Sky-CmdRecord")})
|
if (int result{pthread_setname_np(pthread_self(), "Sky-CmdRecord")})
|
||||||
Logger::Warn("Failed to set the thread name: {}", strerror(result));
|
Logger::Warn("Failed to set the thread name: {}", strerror(result));
|
||||||
@ -148,6 +150,11 @@ namespace skyline::gpu::interconnect {
|
|||||||
renderDocApi->EndFrameCapture(RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(instance), nullptr);
|
renderDocApi->EndFrameCapture(RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(instance), nullptr);
|
||||||
slot->capture = false;
|
slot->capture = false;
|
||||||
|
|
||||||
|
if (slot->didWait && slots.size() < (1U << *state.settings->executorSlotCountScale)) {
|
||||||
|
outgoing.Push(&slots.emplace_back(gpu));
|
||||||
|
slot->didWait = false;
|
||||||
|
}
|
||||||
|
|
||||||
outgoing.Push(slot);
|
outgoing.Push(slot);
|
||||||
}, [] {});
|
}, [] {});
|
||||||
} catch (const signal::SignalException &e) {
|
} catch (const signal::SignalException &e) {
|
||||||
@ -166,7 +173,12 @@ namespace skyline::gpu::interconnect {
|
|||||||
}
|
}
|
||||||
|
|
||||||
CommandRecordThread::Slot *CommandRecordThread::AcquireSlot() {
|
CommandRecordThread::Slot *CommandRecordThread::AcquireSlot() {
|
||||||
return outgoing.Pop();
|
auto startTime{util::GetTimeNs()};
|
||||||
|
auto slot{outgoing.Pop()};
|
||||||
|
if (util::GetTimeNs() - startTime > GrowThresholdNs)
|
||||||
|
slot->didWait = true;
|
||||||
|
|
||||||
|
return slot;
|
||||||
}
|
}
|
||||||
|
|
||||||
void CommandRecordThread::ReleaseSlot(Slot *slot) {
|
void CommandRecordThread::ReleaseSlot(Slot *slot) {
|
||||||
|
@ -42,6 +42,7 @@ namespace skyline::gpu::interconnect {
|
|||||||
u32 executionNumber;
|
u32 executionNumber;
|
||||||
bool ready{}; //!< If this slot's command buffer has had 'beginCommandBuffer' called and is ready to have commands recorded into it
|
bool ready{}; //!< If this slot's command buffer has had 'beginCommandBuffer' called and is ready to have commands recorded into it
|
||||||
bool capture{}; //!< If this slot's Vulkan commands should be captured using the renderdoc API
|
bool capture{}; //!< If this slot's Vulkan commands should be captured using the renderdoc API
|
||||||
|
bool didWait{}; //!< If a wait of time longer than GrowThresholdNs occured when this slot was acquired
|
||||||
|
|
||||||
Slot(GPU &gpu);
|
Slot(GPU &gpu);
|
||||||
|
|
||||||
@ -62,9 +63,11 @@ namespace skyline::gpu::interconnect {
|
|||||||
};
|
};
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
static constexpr size_t GrowThresholdNs{constant::NsInMillisecond / 4}; //!< The wait time threshold at which the slot count will be increased
|
||||||
const DeviceState &state;
|
const DeviceState &state;
|
||||||
CircularQueue<Slot *> incoming; //!< Slots pending recording
|
CircularQueue<Slot *> incoming; //!< Slots pending recording
|
||||||
CircularQueue<Slot *> outgoing; //!< Slots that have been submitted, may still be active on the GPU
|
CircularQueue<Slot *> outgoing; //!< Slots that have been submitted, may still be active on the GPU
|
||||||
|
std::list<Slot> slots;
|
||||||
|
|
||||||
std::thread thread;
|
std::thread thread;
|
||||||
|
|
||||||
|
@ -3,6 +3,7 @@
|
|||||||
|
|
||||||
#include <soc.h>
|
#include <soc.h>
|
||||||
#include <services/nvdrv/devices/deserialisation/deserialisation.h>
|
#include <services/nvdrv/devices/deserialisation/deserialisation.h>
|
||||||
|
#include <gpu.h>
|
||||||
#include "host1x_channel.h"
|
#include "host1x_channel.h"
|
||||||
|
|
||||||
namespace skyline::service::nvdrv::device::nvhost {
|
namespace skyline::service::nvdrv::device::nvhost {
|
||||||
@ -38,6 +39,9 @@ namespace skyline::service::nvdrv::device::nvhost {
|
|||||||
for (size_t i{}; i < syncpointIncrs.size(); i++) {
|
for (size_t i{}; i < syncpointIncrs.size(); i++) {
|
||||||
const auto &incr{syncpointIncrs[i]};
|
const auto &incr{syncpointIncrs[i]};
|
||||||
|
|
||||||
|
for (size_t j{}; j < incr.numIncrs; j++)
|
||||||
|
state.soc->host1x.syncpoints[incr.syncpointId].Increment();
|
||||||
|
|
||||||
u32 max{core.syncpointManager.IncrementSyncpointMaxExt(incr.syncpointId, incr.numIncrs)};
|
u32 max{core.syncpointManager.IncrementSyncpointMaxExt(incr.syncpointId, incr.numIncrs)};
|
||||||
if (i < fenceThresholds.size())
|
if (i < fenceThresholds.size())
|
||||||
fenceThresholds[i] = max;
|
fenceThresholds[i] = max;
|
||||||
@ -52,7 +56,7 @@ namespace skyline::service::nvdrv::device::nvhost {
|
|||||||
Logger::Debug("Submit gather, CPU address: 0x{:X}, words: 0x{:X}", gatherAddress, cmdBuf.words);
|
Logger::Debug("Submit gather, CPU address: 0x{:X}, words: 0x{:X}", gatherAddress, cmdBuf.words);
|
||||||
|
|
||||||
span gather(reinterpret_cast<u32 *>(gatherAddress), cmdBuf.words);
|
span gather(reinterpret_cast<u32 *>(gatherAddress), cmdBuf.words);
|
||||||
state.soc->host1x.channels[static_cast<size_t>(channelType)].Push(gather);
|
// state.soc->host1x.channels[static_cast<size_t>(channelType)].Push(gather);
|
||||||
}
|
}
|
||||||
|
|
||||||
return PosixResult::Success;
|
return PosixResult::Success;
|
||||||
|
Loading…
Reference in New Issue
Block a user