Fix Thread Insertion Optimization + Revert Per-Thread Scheduler Conditions

This commit is contained in:
◱ PixelyIon 2021-01-21 02:59:42 +05:30 committed by ◱ Mark
parent d5d133372f
commit 1f48fdd4a5
7 changed files with 64 additions and 76 deletions

View File

@ -18,7 +18,6 @@ namespace skyline::signal {
std::terminate_handler terminateHandler{}; std::terminate_handler terminateHandler{};
[[clang::optnone]]
inline StackFrame *SafeFrameRecurse(size_t depth, StackFrame *frame) { inline StackFrame *SafeFrameRecurse(size_t depth, StackFrame *frame) {
if (frame) { if (frame) {
for (size_t it{}; it < depth; it++) { for (size_t it{}; it < depth; it++) {
@ -33,7 +32,6 @@ namespace skyline::signal {
return frame; return frame;
} }
[[clang::optnone]]
void TerminateHandler() { void TerminateHandler() {
auto exception{std::current_exception()}; auto exception{std::current_exception()};
if (terminateHandler && exception && exception == SignalExceptionPtr) { if (terminateHandler && exception && exception == SignalExceptionPtr) {
@ -171,18 +169,18 @@ namespace skyline::signal {
asm volatile("MSR TPIDR_EL0, %x0"::"r"(tls)); asm volatile("MSR TPIDR_EL0, %x0"::"r"(tls));
} }
void SetSignalHandler(std::initializer_list<int> signals, SignalHandler function) { void SetSignalHandler(std::initializer_list<int> signals, SignalHandler function, bool syscallRestart) {
static std::array<std::once_flag, NSIG> signalHandlerOnce{}; static std::array<std::once_flag, NSIG> signalHandlerOnce{};
stack_t stack; stack_t stack;
sigaltstack(nullptr, &stack); sigaltstack(nullptr, &stack);
struct sigaction action{ struct sigaction action{
.sa_sigaction = reinterpret_cast<void (*)(int, siginfo *, void *)>(ThreadSignalHandler), .sa_sigaction = reinterpret_cast<void (*)(int, siginfo *, void *)>(ThreadSignalHandler),
.sa_flags = SA_RESTART | SA_SIGINFO | (stack.ss_sp && stack.ss_size ? SA_ONSTACK : 0), .sa_flags = (syscallRestart ? SA_RESTART : 0) | SA_SIGINFO | (stack.ss_sp && stack.ss_size ? SA_ONSTACK : 0),
}; };
for (int signal : signals) { for (int signal : signals) {
std::call_once(signalHandlerOnce[signal], [signal, action]() { std::call_once(signalHandlerOnce[signal], [signal, &action]() {
struct sigaction oldAction; struct sigaction oldAction;
Sigaction(signal, &action, &oldAction); Sigaction(signal, &action, &oldAction);
if (oldAction.sa_flags && oldAction.sa_flags != action.sa_flags) if (oldAction.sa_flags && oldAction.sa_flags != action.sa_flags)

View File

@ -80,12 +80,13 @@ namespace skyline::signal {
/** /**
* @brief A wrapper around Sigaction to make it easy to set a sigaction signal handler for multiple signals and also allow for thread-local signal handlers * @brief A wrapper around Sigaction to make it easy to set a sigaction signal handler for multiple signals and also allow for thread-local signal handlers
* @param function A sa_action callback with a pointer to the old TLS (If present) as the 4th argument * @param function A sa_action callback with a pointer to the old TLS (If present) as the 4th argument
* @param syscallRestart If a system call running during the signal will be seamlessly restarted or return an error (Corresponds to SA_RESTART)
* @note If 'nullptr' is written into the 4th argument then the old TLS won't be restored or it'll be set to any non-null value written into it * @note If 'nullptr' is written into the 4th argument then the old TLS won't be restored or it'll be set to any non-null value written into it
*/ */
void SetSignalHandler(std::initializer_list<int> signals, SignalHandler function); void SetSignalHandler(std::initializer_list<int> signals, SignalHandler function, bool syscallRestart = true);
inline void SetSignalHandler(std::initializer_list<int> signals, void (*function)(int, struct siginfo *, ucontext *)) { inline void SetSignalHandler(std::initializer_list<int> signals, void (*function)(int, struct siginfo *, ucontext *), bool syscallRestart = true) {
SetSignalHandler(signals, reinterpret_cast<SignalHandler>(function)); SetSignalHandler(signals, reinterpret_cast<SignalHandler>(function), syscallRestart);
} }
/** /**

View File

@ -2,6 +2,7 @@
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/) // Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <common/signal.h> #include <common/signal.h>
#include <loader/loader.h>
#include <kernel/types/KProcess.h> #include <kernel/types/KProcess.h>
#include <gpu.h> #include <gpu.h>
#include <gpu/engines/maxwell_3d.h> #include <gpu/engines/maxwell_3d.h>
@ -99,12 +100,12 @@ namespace skyline::gpu::gpfifo {
}); });
} catch (const signal::SignalException &e) { } catch (const signal::SignalException &e) {
if (e.signal != SIGINT) { if (e.signal != SIGINT) {
state.logger->Write(Logger::LogLevel::Error, e.what()); state.logger->Error("{}\nStack Trace:{}", e.what(), state.loader->GetStackTrace(e.frames));
signal::BlockSignal({SIGINT}); signal::BlockSignal({SIGINT});
state.process->Kill(false); state.process->Kill(false);
} }
} catch (const std::exception &e) { } catch (const std::exception &e) {
state.logger->Write(Logger::LogLevel::Error, e.what()); state.logger->Error(e.what());
signal::BlockSignal({SIGINT}); signal::BlockSignal({SIGINT});
state.process->Kill(false); state.process->Kill(false);
} }

View File

@ -61,7 +61,7 @@ namespace skyline::kernel {
if (optimalCore != currentCore) { if (optimalCore != currentCore) {
if (!alwaysInsert && thread == state.thread) if (!alwaysInsert && thread == state.thread)
RemoveThread(); RemoveThread();
else if (!alwaysInsert && thread != state.thread) else if (!alwaysInsert && thread != state.thread) [[unlikely]]
throw exception("Migrating an external thread (T{}) without 'alwaysInsert' isn't supported", thread->id); throw exception("Migrating an external thread (T{}) without 'alwaysInsert' isn't supported", thread->id);
thread->coreId = optimalCore->id; thread->coreId = optimalCore->id;
InsertThread(thread); InsertThread(thread);
@ -89,29 +89,31 @@ namespace skyline::kernel {
if (nextThread == core.queue.begin()) { if (nextThread == core.queue.begin()) {
if (nextThread != core.queue.end()) { if (nextThread != core.queue.end()) {
// If the inserted thread has a higher priority than the currently running thread (and the queue isn't empty) // If the inserted thread has a higher priority than the currently running thread (and the queue isn't empty)
if (state.thread == thread) { // We can yield the thread which is currently scheduled on the core by sending it a signal
// If the current thread is inserting itself then we try to optimize by trying to by forcefully yielding it ourselves now // It is optimized to avoid waiting for the thread to yield on receiving the signal which serializes the entire pipeline
// We can avoid waiting on it to yield itself on receiving the signal which serializes the entire pipeline auto front{core.queue.front()};
// This isn't done in other cases as this optimization is unsafe when done where serialization is required (Eg: Mutexes) front->forceYield = true;
core.queue.front()->forceYield = true; core.queue.splice(std::upper_bound(core.queue.begin(), core.queue.end(), front->priority.load(), type::KThread::IsHigherPriority), core.queue, core.queue.begin());
core.queue.splice(std::upper_bound(core.queue.begin(), core.queue.end(), thread->priority.load(), type::KThread::IsHigherPriority), core.queue, core.queue.begin()); core.queue.push_front(thread);
core.queue.push_front(thread);
} else {
// If we're inserting another thread then we just insert it after the thread in line
// It'll automatically be ready to be scheduled when the thread at the front yields
// This enforces strict synchronization for the thread to run and waits till the previous thread has yielded itself
core.queue.insert(std::next(core.queue.begin()), thread);
}
if (state.thread != core.queue.front()) if (state.thread != front) {
core.queue.front()->SendSignal(YieldSignal); // If the inserting thread isn't at the front, we need to send it an OS signal to yield
else if (!front->pendingYield) {
// We only want to yield the thread if it hasn't already been sent a signal to yield in the past
// Not doing this can lead to races and deadlocks but is also slower as it prevents redundant signals
front->SendSignal(YieldSignal);
front->pendingYield = true;
}
} else {
// If the thread at the front is being yielded, we can just set the YieldPending flag
// This avoids an OS signal and would cause a deadlock otherwise as the core lock would be relocked
YieldPending = true; YieldPending = true;
}
} else { } else {
core.queue.push_front(thread); core.queue.push_front(thread);
} }
if (thread != state.thread) if (thread != state.thread)
thread->wakeCondition.notify_one(); // We only want to trigger the conditional variable if the current thread isn't inserting itself core.frontCondition.notify_all(); // We only want to trigger the conditional variable if the current thread isn't inserting itself
} else { } else {
core.queue.insert(nextThread, thread); core.queue.insert(nextThread, thread);
} }
@ -124,7 +126,7 @@ namespace skyline::kernel {
std::unique_lock lock(core->mutex); std::unique_lock lock(core->mutex);
if (loadBalance && thread->affinityMask.count() > 1) { if (loadBalance && thread->affinityMask.count() > 1) {
std::chrono::milliseconds loadBalanceThreshold{PreemptiveTimeslice * 2}; //!< The amount of time that needs to pass unscheduled for a thread to attempt load balancing std::chrono::milliseconds loadBalanceThreshold{PreemptiveTimeslice * 2}; //!< The amount of time that needs to pass unscheduled for a thread to attempt load balancing
while (!thread->wakeCondition.wait_for(lock, loadBalanceThreshold, [&]() { return !core->queue.empty() && core->queue.front() == thread; })) { while (!core->frontCondition.wait_for(lock, loadBalanceThreshold, [&]() { return !core->queue.empty() && core->queue.front() == thread; })) {
lock.unlock(); lock.unlock();
LoadBalance(state.thread); LoadBalance(state.thread);
if (thread->coreId == core->id) { if (thread->coreId == core->id) {
@ -137,7 +139,7 @@ namespace skyline::kernel {
loadBalanceThreshold *= 2; // We double the duration required for future load balancing for this invocation to minimize pointless load balancing loadBalanceThreshold *= 2; // We double the duration required for future load balancing for this invocation to minimize pointless load balancing
} }
} else { } else {
thread->wakeCondition.wait(lock, [&]() { return !core->queue.empty() && core->queue.front() == thread; }); core->frontCondition.wait(lock, [&]() { return !core->queue.empty() && core->queue.front() == thread; });
} }
if (thread->priority == core->preemptionPriority) { if (thread->priority == core->preemptionPriority) {
@ -154,7 +156,7 @@ namespace skyline::kernel {
auto *core{&cores.at(thread->coreId)}; auto *core{&cores.at(thread->coreId)};
std::unique_lock lock(core->mutex); std::unique_lock lock(core->mutex);
if (thread->wakeCondition.wait_for(lock, timeout, [&]() { return !core->queue.empty() && core->queue.front() == thread; })) { if (core->frontCondition.wait_for(lock, timeout, [&]() { return !core->queue.empty() && core->queue.front() == thread; })) {
if (thread->priority == core->preemptionPriority) { if (thread->priority == core->preemptionPriority) {
struct itimerspec spec{.it_value = {.tv_nsec = std::chrono::duration_cast<std::chrono::nanoseconds>(PreemptiveTimeslice).count()}}; struct itimerspec spec{.it_value = {.tv_nsec = std::chrono::duration_cast<std::chrono::nanoseconds>(PreemptiveTimeslice).count()}};
timer_settime(thread->preemptionTimer, 0, &spec, nullptr); timer_settime(thread->preemptionTimer, 0, &spec, nullptr);
@ -175,49 +177,27 @@ namespace skyline::kernel {
std::unique_lock lock(core.mutex); std::unique_lock lock(core.mutex);
if (core.queue.front() == thread) { if (core.queue.front() == thread) {
thread->averageTimeslice = (thread->averageTimeslice / 4) + (3 * (util::GetTimeTicks() - thread->timesliceStart / 4)); // 0.25 * old timeslice duration + 0.75 * current timeslice duration // If this thread is at the front of the thread queue then we need to rotate the thread
// In the case where this thread was forcefully yielded, we don't need to do this as it's done by the thread which yielded us
// Splice the linked element from the beginning of the queue to where it's priority is present // Splice the linked element from the beginning of the queue to where it's priority is present
core.queue.splice(std::upper_bound(core.queue.begin(), core.queue.end(), thread->priority.load(), type::KThread::IsHigherPriority), core.queue, core.queue.begin()); core.queue.splice(std::upper_bound(core.queue.begin(), core.queue.end(), thread->priority.load(), type::KThread::IsHigherPriority), core.queue, core.queue.begin());
auto front{core.queue.front()}; if (core.queue.front() != thread)
if (front != thread) core.frontCondition.notify_all(); // If we aren't at the front of the queue, only then should we wake the thread at the front up
front->wakeCondition.notify_one(); // If we aren't at the front of the queue, only then should we wake the thread at the front up } else if (!thread->forceYield) { [[unlikely]]
if (cooperative && thread->isPreempted) {
// If a preemptive thread did a cooperative yield then we need to disarm the preemptive timer
struct itimerspec spec{};
timer_settime(thread->preemptionTimer, 0, &spec, nullptr);
}
thread->isPreempted = false;
} else if (thread->forceYield) {
// We need to check if this thread was yielded by another thread on behalf of this thread
// If it is then we just need to disarm the preemption timer and update the average timeslice
// If it isn't then we need to throw an exception as it's indicative of an invalid state
struct ThreadComparision {
constexpr bool operator()(const i8 priority, const std::shared_ptr<type::KThread> &it) { return priority < it->priority; }
constexpr bool operator()(const std::shared_ptr<type::KThread> &it, const i8 priority) { return it->priority < priority; }
};
auto bounds{std::equal_range(core.queue.begin(), core.queue.end(), thread->priority.load(), ThreadComparision{})};
auto iterator{std::find(bounds.first, bounds.second, thread)};
if (iterator != bounds.second) {
thread->averageTimeslice = (thread->averageTimeslice / 4) + (3 * (util::GetTimeTicks() - thread->timesliceStart / 4));
if (cooperative && thread->isPreempted) {
struct itimerspec spec{};
timer_settime(thread->preemptionTimer, 0, &spec, nullptr);
}
thread->isPreempted = false;
} else {
throw exception("T{} called Rotate while not being in C{}'s queue after being forcefully yielded", thread->id, thread->coreId);
}
} else {
throw exception("T{} called Rotate while not being in C{}'s queue", thread->id, thread->coreId); throw exception("T{} called Rotate while not being in C{}'s queue", thread->id, thread->coreId);
} }
thread->averageTimeslice = (thread->averageTimeslice / 4) + (3 * (util::GetTimeTicks() - thread->timesliceStart / 4));
if (cooperative && thread->isPreempted) {
// If a preemptive thread did a cooperative yield then we need to disarm the preemptive timer
struct itimerspec spec{};
timer_settime(thread->preemptionTimer, 0, &spec, nullptr);
}
thread->isPreempted = false;
thread->pendingYield = false;
thread->forceYield = false; thread->forceYield = false;
} }
@ -234,7 +214,10 @@ namespace skyline::kernel {
// Alternatively, if it's currently running then we'd just want to cause it to yield if it's priority is lower than the the thread behind it // Alternatively, if it's currently running then we'd just want to cause it to yield if it's priority is lower than the the thread behind it
auto nextIt{std::next(currentIt)}; auto nextIt{std::next(currentIt)};
if (nextIt != core->queue.end() && (*nextIt)->priority < thread->priority) { if (nextIt != core->queue.end() && (*nextIt)->priority < thread->priority) {
thread->SendSignal(YieldSignal); if (!thread->pendingYield) {
thread->SendSignal(YieldSignal);
thread->pendingYield = true;
}
} else if (!thread->isPreempted && thread->priority == core->preemptionPriority) { } else if (!thread->isPreempted && thread->priority == core->preemptionPriority) {
// If the thread needs to be preempted due to the new priority then arm it's preemption timer // If the thread needs to be preempted due to the new priority then arm it's preemption timer
struct itimerspec spec{.it_value = {.tv_nsec = std::chrono::duration_cast<std::chrono::nanoseconds>(PreemptiveTimeslice).count()}}; struct itimerspec spec{.it_value = {.tv_nsec = std::chrono::duration_cast<std::chrono::nanoseconds>(PreemptiveTimeslice).count()}};
@ -260,7 +243,11 @@ namespace skyline::kernel {
targetIt = std::upper_bound(core->queue.begin(), core->queue.end(), thread->priority.load(), type::KThread::IsHigherPriority); // Iterator invalidation targetIt = std::upper_bound(core->queue.begin(), core->queue.end(), thread->priority.load(), type::KThread::IsHigherPriority); // Iterator invalidation
if (targetIt == core->queue.begin() && targetIt != core->queue.end()) { if (targetIt == core->queue.begin() && targetIt != core->queue.end()) {
core->queue.insert(std::next(core->queue.begin()), thread); core->queue.insert(std::next(core->queue.begin()), thread);
core->queue.front()->SendSignal(YieldSignal); auto front{core->queue.front()};
if (!front->pendingYield) {
front->SendSignal(YieldSignal);
front->pendingYield = true;
}
} else { } else {
core->queue.insert(targetIt, thread); core->queue.insert(targetIt, thread);
} }
@ -280,7 +267,7 @@ namespace skyline::kernel {
if (thread->coreId == constant::ParkedCoreId) { if (thread->coreId == constant::ParkedCoreId) {
std::unique_lock lock(parkedMutex); std::unique_lock lock(parkedMutex);
parkedQueue.insert(std::upper_bound(parkedQueue.begin(), parkedQueue.end(), thread->priority.load(), type::KThread::IsHigherPriority), thread); parkedQueue.insert(std::upper_bound(parkedQueue.begin(), parkedQueue.end(), thread->priority.load(), type::KThread::IsHigherPriority), thread);
thread->wakeCondition.wait(lock, [&]() { return parkedQueue.front() == thread && thread->coreId != constant::ParkedCoreId; }); parkedFrontCondition.wait(lock, [&]() { return parkedQueue.front() == thread && thread->coreId != constant::ParkedCoreId; });
} }
InsertThread(thread); InsertThread(thread);
@ -301,7 +288,7 @@ namespace skyline::kernel {
if (parkedThread->priority < thread->priority || (parkedThread->priority == thread->priority && (!nextThread || parkedThread->timesliceStart < nextThread->timesliceStart))) { if (parkedThread->priority < thread->priority || (parkedThread->priority == thread->priority && (!nextThread || parkedThread->timesliceStart < nextThread->timesliceStart))) {
parkedThread->coreId = thread->coreId; parkedThread->coreId = thread->coreId;
parkedLock.unlock(); parkedLock.unlock();
thread->wakeCondition.notify_one(); parkedFrontCondition.notify_all();
} }
} }
} }
@ -320,7 +307,7 @@ namespace skyline::kernel {
thread->averageTimeslice = (thread->averageTimeslice / 4) + (3 * (util::GetTimeTicks() - thread->timesliceStart / 4)); thread->averageTimeslice = (thread->averageTimeslice / 4) + (3 * (util::GetTimeTicks() - thread->timesliceStart / 4));
if (it != core.queue.end()) if (it != core.queue.end())
(*it)->wakeCondition.notify_one(); // We need to wake the thread at the front of the queue, if we were at the front previously core.frontCondition.notify_all(); // We need to wake the thread at the front of the queue, if we were at the front previously
} }
} }
} }

View File

@ -47,6 +47,7 @@ namespace skyline {
u8 id; u8 id;
u8 preemptionPriority; //!< The priority at which this core becomes preemptive as opposed to cooperative u8 preemptionPriority; //!< The priority at which this core becomes preemptive as opposed to cooperative
std::mutex mutex; //!< Synchronizes all operations on the queue std::mutex mutex; //!< Synchronizes all operations on the queue
std::condition_variable frontCondition; //!< A conditional variable which is signalled when the front of the parked queue has changed
std::list<std::shared_ptr<type::KThread>> queue; //!< A queue of threads which are running or to be run on this core std::list<std::shared_ptr<type::KThread>> queue; //!< A queue of threads which are running or to be run on this core
CoreContext(u8 id, u8 preemptionPriority); CoreContext(u8 id, u8 preemptionPriority);

View File

@ -64,7 +64,7 @@ namespace skyline::kernel::type {
throw exception("timer_create has failed with '{}'", strerror(errno)); throw exception("timer_create has failed with '{}'", strerror(errno));
signal::SetSignalHandler({SIGINT, SIGILL, SIGTRAP, SIGBUS, SIGFPE, SIGSEGV}, nce::NCE::SignalHandler); signal::SetSignalHandler({SIGINT, SIGILL, SIGTRAP, SIGBUS, SIGFPE, SIGSEGV}, nce::NCE::SignalHandler);
signal::SetSignalHandler({Scheduler::YieldSignal}, Scheduler::SignalHandler); signal::SetSignalHandler({Scheduler::YieldSignal}, Scheduler::SignalHandler, false); // We want futexes to fail and their predicates rechecked
{ {
std::lock_guard lock(statusMutex); std::lock_guard lock(statusMutex);

View File

@ -41,7 +41,6 @@ namespace skyline {
u64 entryArgument; u64 entryArgument;
void *stackTop; void *stackTop;
std::condition_variable wakeCondition; //!< A conditional variable which is signalled to wake the current thread while it's sleeping
std::atomic<u8> basePriority; //!< The priority of the thread for the scheduler without any priority-inheritance std::atomic<u8> basePriority; //!< The priority of the thread for the scheduler without any priority-inheritance
std::atomic<u8> priority; //!< The priority of the thread for the scheduler std::atomic<u8> priority; //!< The priority of the thread for the scheduler
i8 idealCore; //!< The ideal CPU core for this thread to run on i8 idealCore; //!< The ideal CPU core for this thread to run on
@ -52,6 +51,7 @@ namespace skyline {
u64 averageTimeslice{}; //!< A weighted average of the timeslice duration for this thread u64 averageTimeslice{}; //!< A weighted average of the timeslice duration for this thread
timer_t preemptionTimer{}; //!< A kernel timer used for preemption interrupts timer_t preemptionTimer{}; //!< A kernel timer used for preemption interrupts
bool isPreempted{}; //!< If the preemption timer has been armed and will fire bool isPreempted{}; //!< If the preemption timer has been armed and will fire
bool pendingYield{}; //!< If the current thread has been yielded and hasn't been acted upon it yet
bool forceYield{}; //!< If the thread has been forcefully yielded by another thread bool forceYield{}; //!< If the thread has been forcefully yielded by another thread
std::mutex waiterMutex; //!< Synchronizes operations on mutation of the waiter members std::mutex waiterMutex; //!< Synchronizes operations on mutation of the waiter members
u32* waitKey; //!< The key of the mutex which this thread is waiting on u32* waitKey; //!< The key of the mutex which this thread is waiting on