Fix Thread Insertion Optimization + Revert Per-Thread Scheduler Conditions

2025-01-22 14:31:11 +01:00 · 2021-01-21 02:59:42 +05:30 · 2021-01-21 02:59:42 +05:30 · 1f48fdd4a5
commit 1f48fdd4a5
parent d5d133372f
7 changed files with 64 additions and 76 deletions
--- a/app/src/main/cpp/skyline/common/signal.cpp
+++ b/app/src/main/cpp/skyline/common/signal.cpp
@ -18,7 +18,6 @@ namespace skyline::signal {

    std::terminate_handler terminateHandler{};

-    [[clang::optnone]]
    inline StackFrame *SafeFrameRecurse(size_t depth, StackFrame *frame) {
        if (frame) {
            for (size_t it{}; it < depth; it++) {
@ -33,7 +32,6 @@ namespace skyline::signal {
        return frame;
    }

-    [[clang::optnone]]
    void TerminateHandler() {
        auto exception{std::current_exception()};
        if (terminateHandler && exception && exception == SignalExceptionPtr) {
@ -171,18 +169,18 @@ namespace skyline::signal {
            asm volatile("MSR TPIDR_EL0, %x0"::"r"(tls));
    }

-    void SetSignalHandler(std::initializer_list<int> signals, SignalHandler function) {
+    void SetSignalHandler(std::initializer_list<int> signals, SignalHandler function, bool syscallRestart) {
        static std::array<std::once_flag, NSIG> signalHandlerOnce{};

        stack_t stack;
        sigaltstack(nullptr, &stack);
        struct sigaction action{
            .sa_sigaction = reinterpret_cast<void (*)(int, siginfo *, void *)>(ThreadSignalHandler),
-            .sa_flags = SA_RESTART | SA_SIGINFO | (stack.ss_sp && stack.ss_size ? SA_ONSTACK : 0),
+            .sa_flags = (syscallRestart ? SA_RESTART : 0) | SA_SIGINFO | (stack.ss_sp && stack.ss_size ? SA_ONSTACK : 0),
        };

        for (int signal : signals) {
-            std::call_once(signalHandlerOnce[signal], [signal, action]() {
+            std::call_once(signalHandlerOnce[signal], [signal, &action]() {
                struct sigaction oldAction;
                Sigaction(signal, &action, &oldAction);
                if (oldAction.sa_flags && oldAction.sa_flags != action.sa_flags)
--- a/app/src/main/cpp/skyline/common/signal.h
+++ b/app/src/main/cpp/skyline/common/signal.h
@ -80,12 +80,13 @@ namespace skyline::signal {
    /**
     * @brief A wrapper around Sigaction to make it easy to set a sigaction signal handler for multiple signals and also allow for thread-local signal handlers
     * @param function A sa_action callback with a pointer to the old TLS (If present) as the 4th argument
+     * @param syscallRestart If a system call running during the signal will be seamlessly restarted or return an error (Corresponds to SA_RESTART)
     * @note If 'nullptr' is written into the 4th argument then the old TLS won't be restored or it'll be set to any non-null value written into it
     */
-    void SetSignalHandler(std::initializer_list<int> signals, SignalHandler function);
+    void SetSignalHandler(std::initializer_list<int> signals, SignalHandler function, bool syscallRestart = true);

-    inline void SetSignalHandler(std::initializer_list<int> signals, void (*function)(int, struct siginfo *, ucontext *)) {
-        SetSignalHandler(signals, reinterpret_cast<SignalHandler>(function));
+    inline void SetSignalHandler(std::initializer_list<int> signals, void (*function)(int, struct siginfo *, ucontext *), bool syscallRestart = true) {
+        SetSignalHandler(signals, reinterpret_cast<SignalHandler>(function), syscallRestart);
    }

    /**
--- a/app/src/main/cpp/skyline/gpu/gpfifo.cpp
+++ b/app/src/main/cpp/skyline/gpu/gpfifo.cpp
@ -2,6 +2,7 @@
 // Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)

 #include <common/signal.h>
+#include <loader/loader.h>
 #include <kernel/types/KProcess.h>
 #include <gpu.h>
 #include <gpu/engines/maxwell_3d.h>
@ -99,12 +100,12 @@ namespace skyline::gpu::gpfifo {
            });
        } catch (const signal::SignalException &e) {
            if (e.signal != SIGINT) {
-                state.logger->Write(Logger::LogLevel::Error, e.what());
+                state.logger->Error("{}\nStack Trace:{}", e.what(), state.loader->GetStackTrace(e.frames));
                signal::BlockSignal({SIGINT});
                state.process->Kill(false);
            }
        } catch (const std::exception &e) {
-            state.logger->Write(Logger::LogLevel::Error, e.what());
+            state.logger->Error(e.what());
            signal::BlockSignal({SIGINT});
            state.process->Kill(false);
        }
--- a/app/src/main/cpp/skyline/kernel/scheduler.cpp
+++ b/app/src/main/cpp/skyline/kernel/scheduler.cpp
@ -61,7 +61,7 @@ namespace skyline::kernel {
            if (optimalCore != currentCore) {
                if (!alwaysInsert && thread == state.thread)
                    RemoveThread();
-                else if (!alwaysInsert && thread != state.thread)
+                else if (!alwaysInsert && thread != state.thread) [[unlikely]]
                    throw exception("Migrating an external thread (T{}) without 'alwaysInsert' isn't supported", thread->id);
                thread->coreId = optimalCore->id;
                InsertThread(thread);
@ -89,29 +89,31 @@ namespace skyline::kernel {
        if (nextThread == core.queue.begin()) {
            if (nextThread != core.queue.end()) {
                // If the inserted thread has a higher priority than the currently running thread (and the queue isn't empty)
-                if (state.thread == thread) {
-                    // If the current thread is inserting itself then we try to optimize by trying to by forcefully yielding it ourselves now
-                    // We can avoid waiting on it to yield itself on receiving the signal which serializes the entire pipeline
-                    // This isn't done in other cases as this optimization is unsafe when done where serialization is required (Eg: Mutexes)
-                    core.queue.front()->forceYield = true;
-                    core.queue.splice(std::upper_bound(core.queue.begin(), core.queue.end(), thread->priority.load(), type::KThread::IsHigherPriority), core.queue, core.queue.begin());
-                    core.queue.push_front(thread);
-                } else {
-                    // If we're inserting another thread then we just insert it after the thread in line
-                    // It'll automatically be ready to be scheduled when the thread at the front yields
-                    // This enforces strict synchronization for the thread to run and waits till the previous thread has yielded itself
-                    core.queue.insert(std::next(core.queue.begin()), thread);
-                }
+                // We can yield the thread which is currently scheduled on the core by sending it a signal
+                // It is optimized to avoid waiting for the thread to yield on receiving the signal which serializes the entire pipeline
+                auto front{core.queue.front()};
+                front->forceYield = true;
+                core.queue.splice(std::upper_bound(core.queue.begin(), core.queue.end(), front->priority.load(), type::KThread::IsHigherPriority), core.queue, core.queue.begin());
+                core.queue.push_front(thread);

-                if (state.thread != core.queue.front())
-                    core.queue.front()->SendSignal(YieldSignal);
-                else
+                if (state.thread != front) {
+                    // If the inserting thread isn't at the front, we need to send it an OS signal to yield
+                    if (!front->pendingYield) {
+                        // We only want to yield the thread if it hasn't already been sent a signal to yield in the past
+                        // Not doing this can lead to races and deadlocks but is also slower as it prevents redundant signals
+                        front->SendSignal(YieldSignal);
+                        front->pendingYield = true;
+                    }
+                } else {
+                    // If the thread at the front is being yielded, we can just set the YieldPending flag
+                    // This avoids an OS signal and would cause a deadlock otherwise as the core lock would be relocked
                    YieldPending = true;
+                }
            } else {
                core.queue.push_front(thread);
            }
            if (thread != state.thread)
-                thread->wakeCondition.notify_one(); // We only want to trigger the conditional variable if the current thread isn't inserting itself
+                core.frontCondition.notify_all(); // We only want to trigger the conditional variable if the current thread isn't inserting itself
        } else {
            core.queue.insert(nextThread, thread);
        }
@ -124,7 +126,7 @@ namespace skyline::kernel {
        std::unique_lock lock(core->mutex);
        if (loadBalance && thread->affinityMask.count() > 1) {
            std::chrono::milliseconds loadBalanceThreshold{PreemptiveTimeslice * 2}; //!< The amount of time that needs to pass unscheduled for a thread to attempt load balancing
-            while (!thread->wakeCondition.wait_for(lock, loadBalanceThreshold, [&]() { return !core->queue.empty() && core->queue.front() == thread; })) {
+            while (!core->frontCondition.wait_for(lock, loadBalanceThreshold, [&]() { return !core->queue.empty() && core->queue.front() == thread; })) {
                lock.unlock();
                LoadBalance(state.thread);
                if (thread->coreId == core->id) {
@ -137,7 +139,7 @@ namespace skyline::kernel {
                loadBalanceThreshold *= 2; // We double the duration required for future load balancing for this invocation to minimize pointless load balancing
            }
        } else {
-            thread->wakeCondition.wait(lock, [&]() { return !core->queue.empty() && core->queue.front() == thread; });
+            core->frontCondition.wait(lock, [&]() { return !core->queue.empty() && core->queue.front() == thread; });
        }

        if (thread->priority == core->preemptionPriority) {
@ -154,7 +156,7 @@ namespace skyline::kernel {
        auto *core{&cores.at(thread->coreId)};

        std::unique_lock lock(core->mutex);
-        if (thread->wakeCondition.wait_for(lock, timeout, [&]() { return !core->queue.empty() && core->queue.front() == thread; })) {
+        if (core->frontCondition.wait_for(lock, timeout, [&]() { return !core->queue.empty() && core->queue.front() == thread; })) {
            if (thread->priority == core->preemptionPriority) {
                struct itimerspec spec{.it_value = {.tv_nsec = std::chrono::duration_cast<std::chrono::nanoseconds>(PreemptiveTimeslice).count()}};
                timer_settime(thread->preemptionTimer, 0, &spec, nullptr);
@ -175,49 +177,27 @@ namespace skyline::kernel {

        std::unique_lock lock(core.mutex);
        if (core.queue.front() == thread) {
-            thread->averageTimeslice = (thread->averageTimeslice / 4) + (3 * (util::GetTimeTicks() - thread->timesliceStart / 4)); // 0.25 * old timeslice duration + 0.75 * current timeslice duration
-
+            // If this thread is at the front of the thread queue then we need to rotate the thread
+            // In the case where this thread was forcefully yielded, we don't need to do this as it's done by the thread which yielded us
            // Splice the linked element from the beginning of the queue to where it's priority is present
            core.queue.splice(std::upper_bound(core.queue.begin(), core.queue.end(), thread->priority.load(), type::KThread::IsHigherPriority), core.queue, core.queue.begin());

-            auto front{core.queue.front()};
-            if (front != thread)
-                front->wakeCondition.notify_one(); // If we aren't at the front of the queue, only then should we wake the thread at the front up
-
-            if (cooperative && thread->isPreempted) {
-                // If a preemptive thread did a cooperative yield then we need to disarm the preemptive timer
-                struct itimerspec spec{};
-                timer_settime(thread->preemptionTimer, 0, &spec, nullptr);
-            }
-
-            thread->isPreempted = false;
-        } else if (thread->forceYield) {
-            // We need to check if this thread was yielded by another thread on behalf of this thread
-            // If it is then we just need to disarm the preemption timer and update the average timeslice
-            // If it isn't then we need to throw an exception as it's indicative of an invalid state
-            struct ThreadComparision {
-                constexpr bool operator()(const i8 priority, const std::shared_ptr<type::KThread> &it) { return priority < it->priority; }
-
-                constexpr bool operator()(const std::shared_ptr<type::KThread> &it, const i8 priority) { return it->priority < priority; }
-            };
-            auto bounds{std::equal_range(core.queue.begin(), core.queue.end(), thread->priority.load(), ThreadComparision{})};
-
-            auto iterator{std::find(bounds.first, bounds.second, thread)};
-            if (iterator != bounds.second) {
-                thread->averageTimeslice = (thread->averageTimeslice / 4) + (3 * (util::GetTimeTicks() - thread->timesliceStart / 4));
-
-                if (cooperative && thread->isPreempted) {
-                    struct itimerspec spec{};
-                    timer_settime(thread->preemptionTimer, 0, &spec, nullptr);
-                }
-
-                thread->isPreempted = false;
-            } else {
-                throw exception("T{} called Rotate while not being in C{}'s queue after being forcefully yielded", thread->id, thread->coreId);
-            }
-        } else {
+            if (core.queue.front() != thread)
+                core.frontCondition.notify_all(); // If we aren't at the front of the queue, only then should we wake the thread at the front up
+        } else if (!thread->forceYield) { [[unlikely]]
            throw exception("T{} called Rotate while not being in C{}'s queue", thread->id, thread->coreId);
        }
+
+        thread->averageTimeslice = (thread->averageTimeslice / 4) + (3 * (util::GetTimeTicks() - thread->timesliceStart / 4));
+
+        if (cooperative && thread->isPreempted) {
+            // If a preemptive thread did a cooperative yield then we need to disarm the preemptive timer
+            struct itimerspec spec{};
+            timer_settime(thread->preemptionTimer, 0, &spec, nullptr);
+        }
+
+        thread->isPreempted = false;
+        thread->pendingYield = false;
        thread->forceYield = false;
    }

@ -234,7 +214,10 @@ namespace skyline::kernel {
            // Alternatively, if it's currently running then we'd just want to cause it to yield if it's priority is lower than the the thread behind it
            auto nextIt{std::next(currentIt)};
            if (nextIt != core->queue.end() && (*nextIt)->priority < thread->priority) {
-                thread->SendSignal(YieldSignal);
+                if (!thread->pendingYield) {
+                    thread->SendSignal(YieldSignal);
+                    thread->pendingYield = true;
+                }
            } else if (!thread->isPreempted && thread->priority == core->preemptionPriority) {
                // If the thread needs to be preempted due to the new priority then arm it's preemption timer
                struct itimerspec spec{.it_value = {.tv_nsec = std::chrono::duration_cast<std::chrono::nanoseconds>(PreemptiveTimeslice).count()}};
@ -260,7 +243,11 @@ namespace skyline::kernel {
        targetIt = std::upper_bound(core->queue.begin(), core->queue.end(), thread->priority.load(), type::KThread::IsHigherPriority); // Iterator invalidation
        if (targetIt == core->queue.begin() && targetIt != core->queue.end()) {
            core->queue.insert(std::next(core->queue.begin()), thread);
-            core->queue.front()->SendSignal(YieldSignal);
+            auto front{core->queue.front()};
+            if (!front->pendingYield) {
+                front->SendSignal(YieldSignal);
+                front->pendingYield = true;
+            }
        } else {
            core->queue.insert(targetIt, thread);
        }
@ -280,7 +267,7 @@ namespace skyline::kernel {
        if (thread->coreId == constant::ParkedCoreId) {
            std::unique_lock lock(parkedMutex);
            parkedQueue.insert(std::upper_bound(parkedQueue.begin(), parkedQueue.end(), thread->priority.load(), type::KThread::IsHigherPriority), thread);
-            thread->wakeCondition.wait(lock, [&]() { return parkedQueue.front() == thread && thread->coreId != constant::ParkedCoreId; });
+            parkedFrontCondition.wait(lock, [&]() { return parkedQueue.front() == thread && thread->coreId != constant::ParkedCoreId; });
        }

        InsertThread(thread);
@ -301,7 +288,7 @@ namespace skyline::kernel {
            if (parkedThread->priority < thread->priority || (parkedThread->priority == thread->priority && (!nextThread || parkedThread->timesliceStart < nextThread->timesliceStart))) {
                parkedThread->coreId = thread->coreId;
                parkedLock.unlock();
-                thread->wakeCondition.notify_one();
+                parkedFrontCondition.notify_all();
            }
        }
    }
@ -320,7 +307,7 @@ namespace skyline::kernel {
                        thread->averageTimeslice = (thread->averageTimeslice / 4) + (3 * (util::GetTimeTicks() - thread->timesliceStart / 4));

                    if (it != core.queue.end())
-                        (*it)->wakeCondition.notify_one(); // We need to wake the thread at the front of the queue, if we were at the front previously
+                        core.frontCondition.notify_all(); // We need to wake the thread at the front of the queue, if we were at the front previously
                }
            }
        }
--- a/app/src/main/cpp/skyline/kernel/scheduler.h
+++ b/app/src/main/cpp/skyline/kernel/scheduler.h
@ -47,6 +47,7 @@ namespace skyline {
                u8 id;
                u8 preemptionPriority; //!< The priority at which this core becomes preemptive as opposed to cooperative
                std::mutex mutex; //!< Synchronizes all operations on the queue
+                std::condition_variable frontCondition; //!< A conditional variable which is signalled when the front of the parked queue has changed
                std::list<std::shared_ptr<type::KThread>> queue; //!< A queue of threads which are running or to be run on this core

                CoreContext(u8 id, u8 preemptionPriority);
--- a/app/src/main/cpp/skyline/kernel/types/KThread.cpp
+++ b/app/src/main/cpp/skyline/kernel/types/KThread.cpp
@ -64,7 +64,7 @@ namespace skyline::kernel::type {
            throw exception("timer_create has failed with '{}'", strerror(errno));

        signal::SetSignalHandler({SIGINT, SIGILL, SIGTRAP, SIGBUS, SIGFPE, SIGSEGV}, nce::NCE::SignalHandler);
-        signal::SetSignalHandler({Scheduler::YieldSignal}, Scheduler::SignalHandler);
+        signal::SetSignalHandler({Scheduler::YieldSignal}, Scheduler::SignalHandler, false); // We want futexes to fail and their predicates rechecked

        {
            std::lock_guard lock(statusMutex);
--- a/app/src/main/cpp/skyline/kernel/types/KThread.h
+++ b/app/src/main/cpp/skyline/kernel/types/KThread.h
@ -41,7 +41,6 @@ namespace skyline {
            u64 entryArgument;
            void *stackTop;

-            std::condition_variable wakeCondition; //!< A conditional variable which is signalled to wake the current thread while it's sleeping
            std::atomic<u8> basePriority; //!< The priority of the thread for the scheduler without any priority-inheritance
            std::atomic<u8> priority; //!< The priority of the thread for the scheduler
            i8 idealCore; //!< The ideal CPU core for this thread to run on
@ -52,6 +51,7 @@ namespace skyline {
            u64 averageTimeslice{}; //!< A weighted average of the timeslice duration for this thread
            timer_t preemptionTimer{}; //!< A kernel timer used for preemption interrupts
            bool isPreempted{}; //!< If the preemption timer has been armed and will fire
+            bool pendingYield{}; //!< If the current thread has been yielded and hasn't been acted upon it yet
            bool forceYield{}; //!< If the thread has been forcefully yielded by another thread
            std::mutex waiterMutex; //!< Synchronizes operations on mutation of the waiter members
            u32* waitKey; //!< The key of the mutex which this thread is waiting on