Support Core Migration for Running External Thread

We did not support migration of threads which were running in a non-cooperative manner, this was partially due to the dependence on per-core conditional variables rather than per-thread which made this harder to do programmatically. This has been fixed by moving to per-thread cvars and therefore the limitation can be removed, this feature is used by Unity games.
This commit is contained in:
PixelyIon 2021-02-18 19:43:29 +05:30 committed by ◱ Mark
parent 198f32de51
commit 20bdda6a63
5 changed files with 91 additions and 47 deletions

View File

@ -171,7 +171,7 @@
</inspection_tool> </inspection_tool>
<inspection_tool class="CheckedExceptionClass" enabled="true" level="WARNING" enabled_by_default="true" /> <inspection_tool class="CheckedExceptionClass" enabled="true" level="WARNING" enabled_by_default="true" />
<inspection_tool class="ClangTidy" enabled="true" level="WARNING" enabled_by_default="true"> <inspection_tool class="ClangTidy" enabled="true" level="WARNING" enabled_by_default="true">
<option name="clangTidyChecks" value="-*,bugprone-argument-comment,bugprone-assert-side-effect,bugprone-bad-signal-to-kill-thread,bugprone-branch-clone,bugprone-copy-constructor-init,bugprone-dangling-handle,bugprone-dynamic-static-initializers,bugprone-fold-init-type,bugprone-forward-declaration-namespace,bugprone-forwarding-reference-overload,bugprone-inaccurate-erase,bugprone-incorrect-roundings,bugprone-integer-division,bugprone-lambda-function-name,bugprone-macro-parentheses,bugprone-macro-repeated-side-effects,bugprone-misplaced-operator-in-strlen-in-alloc,bugprone-misplaced-pointer-arithmetic-in-alloc,bugprone-misplaced-widening-cast,bugprone-move-forwarding-reference,bugprone-multiple-statement-macro,bugprone-no-escape,bugprone-not-null-terminated-result,bugprone-parent-virtual-call,bugprone-posix-return,bugprone-reserved-identifier,bugprone-sizeof-container,bugprone-sizeof-expression,bugprone-spuriously-wake-up-functions,bugprone-string-constructor,bugprone-string-integer-assignment,bugprone-string-literal-with-embedded-nul,bugprone-suspicious-enum-usage,bugprone-suspicious-include,bugprone-suspicious-memset-usage,bugprone-suspicious-missing-comma,bugprone-suspicious-semicolon,bugprone-suspicious-string-compare,bugprone-swapped-arguments,bugprone-terminating-continue,bugprone-throw-keyword-missing,bugprone-too-small-loop-variable,bugprone-undefined-memory-manipulation,bugprone-undelegated-constructor,bugprone-unhandled-self-assignment,bugprone-unused-raii,bugprone-unused-return-value,bugprone-use-after-move,bugprone-virtual-near-miss,cert-dcl21-cpp,cert-dcl58-cpp,cert-err34-c,cert-err52-cpp,cert-err58-cpp,cert-err60-cpp,cert-flp30-c,cert-msc50-cpp,cert-msc51-cpp,cert-str34-c,cppcoreguidelines-interfaces-global-init,cppcoreguidelines-narrowing-conversions,cppcoreguidelines-pro-type-static-cast-downcast,cppcoreguidelines-slicing,google-default-arguments,google-explicit-constructor,google-runtime-operator,hicpp-exception-baseclass,hicpp-multiway-paths-covered,misc-misplaced-const,misc-new-delete-overloads,misc-no-recursion,misc-non-copyable-objects,misc-throw-by-value-catch-by-reference,misc-unconventional-assign-operator,misc-uniqueptr-reset-release,modernize-avoid-bind,modernize-concat-nested-namespaces,modernize-deprecated-headers,modernize-deprecated-ios-base-aliases,modernize-loop-convert,modernize-make-shared,modernize-make-unique,modernize-pass-by-value,modernize-raw-string-literal,modernize-redundant-void-arg,modernize-replace-auto-ptr,modernize-replace-disallow-copy-and-assign-macro,modernize-replace-random-shuffle,modernize-return-braced-init-list,modernize-shrink-to-fit,modernize-unary-static-assert,modernize-use-auto,modernize-use-bool-literals,modernize-use-emplace,modernize-use-equals-default,modernize-use-equals-delete,modernize-use-nodiscard,modernize-use-noexcept,modernize-use-nullptr,modernize-use-override,modernize-use-transparent-functors,modernize-use-uncaught-exceptions,mpi-buffer-deref,mpi-type-mismatch,openmp-use-default-none,performance-faster-string-find,performance-for-range-copy,performance-implicit-conversion-in-loop,performance-inefficient-algorithm,performance-inefficient-string-concatenation,performance-inefficient-vector-operation,performance-move-const-arg,performance-move-constructor-init,performance-no-automatic-move,performance-noexcept-move-constructor,performance-trivially-destructible,performance-type-promotion-in-math-fn,performance-unnecessary-copy-initialization,performance-unnecessary-value-param,portability-simd-intrinsics,readability-avoid-const-params-in-decls,readability-const-return-type,readability-container-size-empty,readability-convert-member-functions-to-static,readability-delete-null-pointer,readability-deleted-default,readability-inconsistent-declaration-parameter-name,readability-make-member-function-const,readability-misleading-indentation,readability-misplaced-array-index,readability-non-const-parameter,readability-redundant-control-flow,readability-redundant-declaration,readability-redundant-function-ptr-dereference,readability-redundant-smartptr-get,readability-redundant-string-cstr,readability-redundant-string-init,readability-simplify-subscript-expr,readability-static-accessed-through-instance,readability-static-definition-in-anonymous-namespace,readability-string-compare,readability-uniqueptr-delete-release,readability-use-anyofallof" /> <option name="clangTidyChecks" value="-*,bugprone-argument-comment,bugprone-assert-side-effect,bugprone-bad-signal-to-kill-thread,bugprone-branch-clone,bugprone-copy-constructor-init,bugprone-dangling-handle,bugprone-dynamic-static-initializers,bugprone-fold-init-type,bugprone-forward-declaration-namespace,bugprone-forwarding-reference-overload,bugprone-inaccurate-erase,bugprone-incorrect-roundings,bugprone-integer-division,bugprone-lambda-function-name,bugprone-macro-parentheses,bugprone-macro-repeated-side-effects,bugprone-misplaced-operator-in-strlen-in-alloc,bugprone-misplaced-pointer-arithmetic-in-alloc,bugprone-misplaced-widening-cast,bugprone-move-forwarding-reference,bugprone-multiple-statement-macro,bugprone-no-escape,bugprone-not-null-terminated-result,bugprone-parent-virtual-call,bugprone-posix-return,bugprone-reserved-identifier,bugprone-sizeof-container,bugprone-sizeof-expression,bugprone-spuriously-wake-up-functions,bugprone-string-constructor,bugprone-string-integer-assignment,bugprone-string-literal-with-embedded-nul,bugprone-suspicious-enum-usage,bugprone-suspicious-include,bugprone-suspicious-memset-usage,bugprone-suspicious-missing-comma,bugprone-suspicious-semicolon,bugprone-suspicious-string-compare,bugprone-swapped-arguments,bugprone-terminating-continue,bugprone-throw-keyword-missing,bugprone-too-small-loop-variable,bugprone-undefined-memory-manipulation,bugprone-undelegated-constructor,bugprone-unhandled-self-assignment,bugprone-unused-raii,bugprone-unused-return-value,bugprone-use-after-move,bugprone-virtual-near-miss,cert-dcl21-cpp,cert-dcl58-cpp,cert-err34-c,cert-err52-cpp,cert-err58-cpp,cert-err60-cpp,cert-flp30-c,cert-msc50-cpp,cert-msc51-cpp,cppcoreguidelines-interfaces-global-init,cppcoreguidelines-narrowing-conversions,cppcoreguidelines-pro-type-static-cast-downcast,cppcoreguidelines-slicing,google-default-arguments,google-explicit-constructor,google-runtime-operator,hicpp-exception-baseclass,hicpp-multiway-paths-covered,misc-misplaced-const,misc-new-delete-overloads,misc-no-recursion,misc-non-copyable-objects,misc-throw-by-value-catch-by-reference,misc-unconventional-assign-operator,misc-uniqueptr-reset-release,modernize-avoid-bind,modernize-concat-nested-namespaces,modernize-deprecated-headers,modernize-deprecated-ios-base-aliases,modernize-loop-convert,modernize-make-shared,modernize-make-unique,modernize-pass-by-value,modernize-raw-string-literal,modernize-redundant-void-arg,modernize-replace-auto-ptr,modernize-replace-disallow-copy-and-assign-macro,modernize-replace-random-shuffle,modernize-return-braced-init-list,modernize-shrink-to-fit,modernize-unary-static-assert,modernize-use-auto,modernize-use-bool-literals,modernize-use-emplace,modernize-use-equals-default,modernize-use-equals-delete,modernize-use-nodiscard,modernize-use-noexcept,modernize-use-nullptr,modernize-use-override,modernize-use-transparent-functors,modernize-use-uncaught-exceptions,mpi-buffer-deref,mpi-type-mismatch,openmp-use-default-none,performance-faster-string-find,performance-for-range-copy,performance-implicit-conversion-in-loop,performance-inefficient-algorithm,performance-inefficient-string-concatenation,performance-inefficient-vector-operation,performance-move-const-arg,performance-move-constructor-init,performance-no-automatic-move,performance-noexcept-move-constructor,performance-trivially-destructible,performance-type-promotion-in-math-fn,performance-unnecessary-copy-initialization,performance-unnecessary-value-param,portability-simd-intrinsics,readability-avoid-const-params-in-decls,readability-const-return-type,readability-container-size-empty,readability-convert-member-functions-to-static,readability-delete-null-pointer,readability-deleted-default,readability-inconsistent-declaration-parameter-name,readability-make-member-function-const,readability-misleading-indentation,readability-misplaced-array-index,readability-non-const-parameter,readability-redundant-control-flow,readability-redundant-declaration,readability-redundant-function-ptr-dereference,readability-redundant-smartptr-get,readability-redundant-string-cstr,readability-redundant-string-init,readability-simplify-subscript-expr,readability-static-accessed-through-instance,readability-static-definition-in-anonymous-namespace,readability-string-compare,readability-uniqueptr-delete-release,readability-use-anyofallof" />
</inspection_tool> </inspection_tool>
<inspection_tool class="ClangTidyInspection" enabled="false" level="WARNING" enabled_by_default="false"> <inspection_tool class="ClangTidyInspection" enabled="false" level="WARNING" enabled_by_default="false">
<option name="useCustomListOfClangTidyChecks" value="false" /> <option name="useCustomListOfClangTidyChecks" value="false" />

View File

@ -61,8 +61,9 @@ namespace skyline::kernel {
if (optimalCore != currentCore) { if (optimalCore != currentCore) {
if (!alwaysInsert && thread == state.thread) if (!alwaysInsert && thread == state.thread)
RemoveThread(); RemoveThread();
else if (!alwaysInsert && thread != state.thread) [[unlikely]] else if (!alwaysInsert && thread != state.thread)
throw exception("Migrating an external thread (T{}) without 'alwaysInsert' isn't supported", thread->id); [[unlikely]]
throw exception("Migrating an external thread (T{}) without 'alwaysInsert' isn't supported", thread->id);
thread->coreId = optimalCore->id; thread->coreId = optimalCore->id;
InsertThread(thread); InsertThread(thread);
state.logger->Debug("Load Balancing T{}: C{} -> C{}", thread->id, currentCore->id, optimalCore->id); state.logger->Debug("Load Balancing T{}: C{} -> C{}", thread->id, currentCore->id, optimalCore->id);
@ -121,12 +122,26 @@ namespace skyline::kernel {
void Scheduler::WaitSchedule(bool loadBalance) { void Scheduler::WaitSchedule(bool loadBalance) {
auto &thread{state.thread}; auto &thread{state.thread};
auto *core{&cores.at(thread->coreId)}; CoreContext *core{&cores.at(thread->coreId)};
std::unique_lock lock(core->mutex); std::unique_lock lock(core->mutex);
auto wakeFunction{[&]() {
if (!thread->affinityMask.test(thread->coreId)) [[unlikely]] {
lock.unlock();
RemoveThread();
thread->coreId = thread->idealCore;
InsertThread(thread);
core = &cores.at(thread->coreId);
lock = std::unique_lock(core->mutex);
}
return !core->queue.empty() && core->queue.front() == thread;
}};
if (loadBalance && thread->affinityMask.count() > 1) { if (loadBalance && thread->affinityMask.count() > 1) {
std::chrono::milliseconds loadBalanceThreshold{PreemptiveTimeslice * 2}; //!< The amount of time that needs to pass unscheduled for a thread to attempt load balancing std::chrono::milliseconds loadBalanceThreshold{PreemptiveTimeslice * 2}; //!< The amount of time that needs to pass unscheduled for a thread to attempt load balancing
while (!thread->wakeCondition.wait_for(lock, loadBalanceThreshold, [&]() { return !core->queue.empty() && core->queue.front() == thread; })) { while (!thread->wakeCondition.wait_for(lock, loadBalanceThreshold, wakeFunction)) {
lock.unlock(); lock.unlock();
LoadBalance(state.thread); LoadBalance(state.thread);
if (thread->coreId == core->id) { if (thread->coreId == core->id) {
@ -139,7 +154,7 @@ namespace skyline::kernel {
loadBalanceThreshold *= 2; // We double the duration required for future load balancing for this invocation to minimize pointless load balancing loadBalanceThreshold *= 2; // We double the duration required for future load balancing for this invocation to minimize pointless load balancing
} }
} else { } else {
thread->wakeCondition.wait(lock, [&]() { return !core->queue.empty() && core->queue.front() == thread; }); thread->wakeCondition.wait(lock, wakeFunction);
} }
if (thread->priority == core->preemptionPriority) { if (thread->priority == core->preemptionPriority) {
@ -156,7 +171,19 @@ namespace skyline::kernel {
auto *core{&cores.at(thread->coreId)}; auto *core{&cores.at(thread->coreId)};
std::unique_lock lock(core->mutex); std::unique_lock lock(core->mutex);
if (thread->wakeCondition.wait_for(lock, timeout, [&]() { return !core->queue.empty() && core->queue.front() == thread; })) { if (thread->wakeCondition.wait_for(lock, timeout, [&]() {
if (!thread->affinityMask.test(thread->coreId)) [[unlikely]] {
lock.unlock();
RemoveThread();
thread->coreId = thread->idealCore;
InsertThread(thread);
core = &cores.at(thread->coreId);
lock = std::unique_lock(core->mutex);
}
return !core->queue.empty() && core->queue.front() == thread;
})) {
if (thread->priority == core->preemptionPriority) { if (thread->priority == core->preemptionPriority) {
struct itimerspec spec{.it_value = {.tv_nsec = std::chrono::duration_cast<std::chrono::nanoseconds>(PreemptiveTimeslice).count()}}; struct itimerspec spec{.it_value = {.tv_nsec = std::chrono::duration_cast<std::chrono::nanoseconds>(PreemptiveTimeslice).count()}};
timer_settime(thread->preemptionTimer, 0, &spec, nullptr); timer_settime(thread->preemptionTimer, 0, &spec, nullptr);
@ -182,11 +209,12 @@ namespace skyline::kernel {
// Splice the linked element from the beginning of the queue to where it's priority is present // Splice the linked element from the beginning of the queue to where it's priority is present
core.queue.splice(std::upper_bound(core.queue.begin(), core.queue.end(), thread->priority.load(), type::KThread::IsHigherPriority), core.queue, core.queue.begin()); core.queue.splice(std::upper_bound(core.queue.begin(), core.queue.end(), thread->priority.load(), type::KThread::IsHigherPriority), core.queue, core.queue.begin());
auto& front{core.queue.front()}; auto &front{core.queue.front()};
if (front != thread) if (front != thread)
front->wakeCondition.notify_one(); // If we aren't at the front of the queue, only then should we wake the thread at the front up front->wakeCondition.notify_one(); // If we aren't at the front of the queue, only then should we wake the thread at the front up
} else if (!thread->forceYield) { [[unlikely]] } else if (!thread->forceYield) {
throw exception("T{} called Rotate while not being in C{}'s queue", thread->id, thread->coreId); [[unlikely]]
throw exception("T{} called Rotate while not being in C{}'s queue", thread->id, thread->coreId);
} }
thread->averageTimeslice = (thread->averageTimeslice / 4) + (3 * (util::GetTimeTicks() - thread->timesliceStart / 4)); thread->averageTimeslice = (thread->averageTimeslice / 4) + (3 * (util::GetTimeTicks() - thread->timesliceStart / 4));
@ -202,6 +230,34 @@ namespace skyline::kernel {
thread->forceYield = false; thread->forceYield = false;
} }
void Scheduler::RemoveThread() {
auto &thread{state.thread};
auto &core{cores.at(thread->coreId)};
{
std::unique_lock lock(core.mutex);
auto it{std::find(core.queue.begin(), core.queue.end(), thread)};
if (it != core.queue.end()) {
it = core.queue.erase(it);
if (it == core.queue.begin()) {
// We need to update the averageTimeslice accordingly, if we've been unscheduled by this
if (thread->timesliceStart)
thread->averageTimeslice = (thread->averageTimeslice / 4) + (3 * (util::GetTimeTicks() - thread->timesliceStart / 4));
if (it != core.queue.end())
(*it)->wakeCondition.notify_one(); // We need to wake the thread at the front of the queue, if we were at the front previously
}
}
}
if (thread->isPreempted) {
struct itimerspec spec{};
timer_settime(thread->preemptionTimer, 0, &spec, nullptr);
thread->isPreempted = false;
}
YieldPending = false;
}
void Scheduler::UpdatePriority(const std::shared_ptr<type::KThread> &thread) { void Scheduler::UpdatePriority(const std::shared_ptr<type::KThread> &thread) {
std::lock_guard migrationLock(thread->coreMigrationMutex); std::lock_guard migrationLock(thread->coreMigrationMutex);
auto *core{&cores.at(thread->coreId)}; auto *core{&cores.at(thread->coreId)};
@ -254,6 +310,15 @@ namespace skyline::kernel {
} }
} }
void Scheduler::UpdateCore(const std::shared_ptr<type::KThread> &thread) {
auto *core{&cores.at(thread->coreId)};
std::unique_lock coreLock(core->mutex);
if (core->queue.front() == thread)
thread->SendSignal(YieldSignal);
else
thread->wakeCondition.notify_one();
}
void Scheduler::ParkThread() { void Scheduler::ParkThread() {
auto &thread{state.thread}; auto &thread{state.thread};
std::lock_guard migrationLock(thread->coreMigrationMutex); std::lock_guard migrationLock(thread->coreMigrationMutex);
@ -293,32 +358,4 @@ namespace skyline::kernel {
} }
} }
} }
void Scheduler::RemoveThread() {
auto &thread{state.thread};
auto &core{cores.at(thread->coreId)};
{
std::unique_lock lock(core.mutex);
auto it{std::find(core.queue.begin(), core.queue.end(), thread)};
if (it != core.queue.end()) {
it = core.queue.erase(it);
if (it == core.queue.begin()) {
// We need to update the averageTimeslice accordingly, if we've been unscheduled by this
if (thread->timesliceStart)
thread->averageTimeslice = (thread->averageTimeslice / 4) + (3 * (util::GetTimeTicks() - thread->timesliceStart / 4));
if (it != core.queue.end())
(*it)->wakeCondition.notify_one(); // We need to wake the thread at the front of the queue, if we were at the front previously
}
}
}
if (thread->isPreempted) {
struct itimerspec spec{};
timer_settime(thread->preemptionTimer, 0, &spec, nullptr);
thread->isPreempted = false;
}
YieldPending = false;
}
} }

View File

@ -103,11 +103,22 @@ namespace skyline {
*/ */
void Rotate(bool cooperative = true); void Rotate(bool cooperative = true);
/**
* @brief Removes the calling thread from it's resident core queue
*/
void RemoveThread();
/** /**
* @brief Updates the placement of the supplied thread in it's resident core's queue according to it's new priority * @brief Updates the placement of the supplied thread in it's resident core's queue according to it's new priority
*/ */
void UpdatePriority(const std::shared_ptr<type::KThread>& thread); void UpdatePriority(const std::shared_ptr<type::KThread>& thread);
/**
* @brief Updates the core that the supplied thread is resident to according to it's new affinity mask and ideal core
* @note This supports changing the core of a thread which is currently running
*/
void UpdateCore(const std::shared_ptr<type::KThread>& thread);
/** /**
* @brief Parks the calling thread after removing it from it's resident core's queue and inserts it on the core it's been awoken on * @brief Parks the calling thread after removing it from it's resident core's queue and inserts it on the core it's been awoken on
* @note This will not handle waiting for the thread to be scheduled, this should be followed with a call to WaitSchedule/TimedWaitSchedule * @note This will not handle waiting for the thread to be scheduled, this should be followed with a call to WaitSchedule/TimedWaitSchedule
@ -119,11 +130,6 @@ namespace skyline {
* @note We will only wake a thread if it is determined to be a better pick than the thread which would be run on this core next * @note We will only wake a thread if it is determined to be a better pick than the thread which would be run on this core next
*/ */
void WakeParkedThread(); void WakeParkedThread();
/**
* @brief Removes the calling thread from it's resident core queue
*/
void RemoveThread();
}; };
/** /**

View File

@ -412,10 +412,11 @@ namespace skyline::kernel::svc {
state.logger->Debug("svcSetThreadCoreMask: Setting thread #{}'s Ideal Core ({}) + Affinity Mask ({})", thread->id, idealCore, affinityMask); state.logger->Debug("svcSetThreadCoreMask: Setting thread #{}'s Ideal Core ({}) + Affinity Mask ({})", thread->id, idealCore, affinityMask);
std::lock_guard guard(thread->coreMigrationMutex);
thread->idealCore = idealCore; thread->idealCore = idealCore;
thread->affinityMask = affinityMask; thread->affinityMask = affinityMask;
if (!affinityMask.test(thread->coreId)) { if (!affinityMask.test(thread->coreId) && thread->coreId != constant::ParkedCoreId) {
state.logger->Debug("svcSetThreadCoreMask: Migrating thread #{} to Ideal Core C{} -> C{}", thread->id, thread->coreId, idealCore); state.logger->Debug("svcSetThreadCoreMask: Migrating thread #{} to Ideal Core C{} -> C{}", thread->id, thread->coreId, idealCore);
if (thread == state.thread) { if (thread == state.thread) {
@ -425,7 +426,7 @@ namespace skyline::kernel::svc {
} else if (!thread->running) { } else if (!thread->running) {
thread->coreId = idealCore; thread->coreId = idealCore;
} else { } else {
throw exception("svcSetThreadCoreMask: Migrating a running thread due to a new core mask is not supported"); state.scheduler->UpdateCore(thread);
} }
} }

View File

@ -41,7 +41,7 @@ namespace skyline {
u64 entryArgument; u64 entryArgument;
void *stackTop; void *stackTop;
std::condition_variable_any wakeCondition; //!< A conditional variable which is signalled to wake the current thread while it's sleeping std::condition_variable wakeCondition; //!< A conditional variable which is signalled to wake the current thread while it's sleeping
std::atomic<u8> basePriority; //!< The priority of the thread for the scheduler without any priority-inheritance std::atomic<u8> basePriority; //!< The priority of the thread for the scheduler without any priority-inheritance
std::atomic<u8> priority; //!< The priority of the thread for the scheduler std::atomic<u8> priority; //!< The priority of the thread for the scheduler
i8 idealCore; //!< The ideal CPU core for this thread to run on i8 idealCore; //!< The ideal CPU core for this thread to run on