Implement Cooperative Scheduling With Load Balancing

2024-12-23 17:51:52 +01:00 · 2020-12-05 23:11:52 +05:30 · 2020-12-05 23:11:52 +05:30 · cf000f5750
commit cf000f5750
parent 8564edcb16
10 changed files with 321 additions and 102 deletions
--- a/app/CMakeLists.txt
+++ b/app/CMakeLists.txt
@ -41,6 +41,7 @@ add_library(skyline SHARED
        ${source_DIR}/skyline/jvm.cpp
        ${source_DIR}/skyline/os.cpp
        ${source_DIR}/skyline/kernel/memory.cpp
        ${source_DIR}/skyline/kernel/scheduler.cpp
        ${source_DIR}/skyline/kernel/ipc.cpp
        ${source_DIR}/skyline/kernel/svc.cpp
        ${source_DIR}/skyline/kernel/types/KProcess.cpp
--- a/app/src/main/cpp/emu_jni.cpp
+++ b/app/src/main/cpp/emu_jni.cpp
@ -5,9 +5,10 @@
 #include <pthread.h>
 #include <unistd.h>
 #include <android/log.h>
 #include "skyline/loader/loader.h"
 #include "skyline/common.h"
 #include "skyline/common/signal.h"
 #include "skyline/common/settings.h"
 #include "skyline/loader/loader.h"
 #include "skyline/os.h"
 #include "skyline/jvm.h"
 #include "skyline/gpu.h"
@ -50,6 +51,8 @@ extern "C" JNIEXPORT void Java_emu_skyline_EmulationActivity_executeApplication(
        os->Execute(romFd, static_cast<skyline::loader::RomFormat>(romType));
    } catch (std::exception &e) {
        logger->Error(e.what());
    } catch (const skyline::signal::SignalException &e) {
        logger->Error(e.what());
    } catch (...) {
        logger->Error("An unknown exception has occurred");
    }
--- a/app/src/main/cpp/skyline/common.cpp
+++ b/app/src/main/cpp/skyline/common.cpp
@ -62,6 +62,7 @@ namespace skyline {
        gpu = std::make_shared<gpu::GPU>(*this);
        audio = std::make_shared<audio::Audio>(*this);
        nce = std::make_shared<nce::NCE>(*this);
        scheduler = std::make_shared<kernel::Scheduler>(*this);
        input = std::make_shared<input::Input>(*this);
    }
 }
--- a/app/src/main/cpp/skyline/common.h
+++ b/app/src/main/cpp/skyline/common.h
@ -480,6 +480,7 @@ namespace skyline {
            class KProcess;
            class KThread;
        }
        class Scheduler;
        class OS;
    }
    namespace audio {
@ -506,6 +507,7 @@ namespace skyline {
        std::shared_ptr<gpu::GPU> gpu;
        std::shared_ptr<audio::Audio> audio;
        std::shared_ptr<nce::NCE> nce;
        std::shared_ptr<kernel::Scheduler> scheduler;
        std::shared_ptr<kernel::type::KProcess> process;
        static thread_local inline std::shared_ptr<kernel::type::KThread> thread{}; //!< The KThread of the thread which accesses this object
        static thread_local inline nce::ThreadContext *ctx{}; //!< The context of the guest thread for the corresponding host thread
--- a/app/src/main/cpp/skyline/kernel/scheduler.cpp
+++ b/app/src/main/cpp/skyline/kernel/scheduler.cpp
@ -0,0 +1,134 @@
 // SPDX-License-Identifier: MPL-2.0
 // Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
 #include <common/signal.h>
 #include "types/KThread.h"
 #include "scheduler.h"
 namespace skyline::kernel {
    Scheduler::CoreContext::CoreContext(u8 id) : id(id) {}
    Scheduler::Scheduler(const DeviceState &state) : state(state) {}
    Scheduler::CoreContext &Scheduler::LoadBalance() {
        auto &thread{state.thread};
        auto currentCore{&cores.at(thread->coreId)};
        if (!currentCore->queue.empty() && thread->affinityMask.count() != 1) {
            // Select core where the current thread will be scheduled the earliest based off average timeslice durations for resident threads
            // There's a preference for the current core as migration isn't free
            size_t minTimeslice{};
            CoreContext *optimalCore{};
            for (auto &candidateCore : cores) {
                if (thread->affinityMask.test(candidateCore.id)) {
                    u64 timeslice{};
                    if (!candidateCore.queue.empty()) {
                        std::shared_lock lock(candidateCore.mutex);
                        auto threadIterator{candidateCore.queue.cbegin()};
                        if (threadIterator != candidateCore.queue.cend()) {
                            const auto &runningThread{*threadIterator};
                            timeslice += runningThread->averageTimeslice ? std::min(runningThread->averageTimeslice - (util::GetTimeTicks() - runningThread->timesliceStart), 1UL) : runningThread->timesliceStart ? util::GetTimeTicks() - runningThread->timesliceStart : 1UL;
                            while (++threadIterator != candidateCore.queue.cend()) {
                                const auto &residentThread{*threadIterator};
                                if (residentThread->priority <= thread->priority)
                                    timeslice += residentThread->averageTimeslice ? residentThread->averageTimeslice : 1UL;
                            }
                        }
                    }
                    if (!optimalCore || timeslice < minTimeslice || (timeslice == minTimeslice && &candidateCore == currentCore)) {
                        optimalCore = &candidateCore;
                        minTimeslice = timeslice;
                    }
                }
            }
            if (optimalCore != currentCore) {
                std::unique_lock lock(currentCore->mutex);
                currentCore->queue.erase(std::remove(currentCore->queue.begin(), currentCore->queue.end(), thread), currentCore->queue.end());
                currentCore->mutateCondition.notify_all();
                thread->coreId = optimalCore->id;
                state.logger->Debug("Load Balancing for #{}: C{} -> C{}", thread->id, currentCore->id, optimalCore->id);
            } else {
                state.logger->Debug("Load Balancing for #{}: C{} (Late)", thread->id, currentCore->id);
            }
            return *optimalCore;
        }
        state.logger->Debug("Load Balancing for #{}: C{} (Early)", thread->id, currentCore->id);
        return *currentCore;
    }
    void Scheduler::InsertThread(bool loadBalance) {
        auto &thread{state.thread};
        auto &core{loadBalance ? LoadBalance() : cores.at(thread->coreId)};
        std::unique_lock lock(core.mutex);
        auto nextThread{std::find_if(core.queue.begin(), core.queue.end(), [&](const std::shared_ptr<type::KThread> &it) { return it->priority > thread->priority; })};
        if (nextThread == core.queue.begin() && nextThread != core.queue.end()) {
            throw exception("Migration Interrupt Required");
        } else {
            core.queue.insert(nextThread, thread);
        }
        core.mutateCondition.notify_all();
    }
    void Scheduler::WaitSchedule() {
        auto &thread{state.thread};
        auto *core{&cores.at(thread->coreId)};
        std::shared_lock lock(core->mutex);
        if (thread->affinityMask.count() > 1) {
            std::chrono::milliseconds loadBalanceThreshold{1}; //!< The amount of time that needs to pass unscheduled for a thread to attempt load balancing
            while (!core->mutateCondition.wait_for(lock, loadBalanceThreshold, [&]() { return core->queue.front() == thread; })) {
                lock.unlock();
                LoadBalance();
                if (thread->coreId == core->id) {
                    lock.lock();
                } else {
                    InsertThread(false);
                    core = &cores.at(thread->coreId);
                    lock = std::shared_lock(core->mutex);
                }
                loadBalanceThreshold *= 2; // We double the duration required for future load balancing for this invocation to minimize pointless load balancing
            }
        } else {
            core->mutateCondition.wait(lock, [&]() { return core->queue.front() == thread; });
        }
        thread->timesliceStart = util::GetTimeTicks();
    }
    void Scheduler::Rotate() {
        auto &thread{state.thread};
        auto &core{cores.at(thread->coreId)};
        std::unique_lock lock(core.mutex);
        if (core.queue.front() == thread) {
            thread->averageTimeslice = (thread->averageTimeslice / 4) + (3 * (util::GetTimeTicks() - thread->timesliceStart / 4)); // 0.25 * old timeslice duration + 0.75 * current timeslice duration
            core.queue.pop_front();
            core.queue.push_back(thread);
            core.mutateCondition.notify_all();
        }
    }
    void Scheduler::RemoveThread() {
        auto &thread{state.thread};
        auto &core{cores.at(thread->coreId)};
        std::unique_lock lock(core.mutex);
        core.queue.erase(std::remove(core.queue.begin(), core.queue.end(), thread), core.queue.end());
    }
 }
--- a/app/src/main/cpp/skyline/kernel/scheduler.h
+++ b/app/src/main/cpp/skyline/kernel/scheduler.h
@ -4,6 +4,7 @@
 #pragma once
 #include <common.h>
 #include <condition_variable>
 namespace skyline {
    namespace constant {
@ -32,5 +33,57 @@ namespace skyline {
                return (value >= min) && (value <= max);
            }
        };
        /*
         * @brief The Scheduler is responsible for determining which threads should run on which virtual cores and when they should be scheduled
         * @note We tend to stray a lot from HOS in our scheduler design as we've designed it around our 1 host thread per guest thread which leads to scheduling from the perspective of threads while the HOS scheduler deals with scheduling from the perspective of cores, not doing this would lead to missing out on key optimizations and serialization of scheduling
         */
        class Scheduler {
          private:
            const DeviceState &state;
            struct CoreContext {
                u8 id;
                std::shared_mutex mutex; //!< Synchronizes all operations on the queue
                std::condition_variable_any mutateCondition; //!< A conditional variable which is signalled on every mutation of the queue
                std::deque<std::shared_ptr<type::KThread>> queue; //!< A queue of threads which are running or to be run on this core
                explicit CoreContext(u8 id);
            };
            std::array<CoreContext, constant::CoreCount> cores{CoreContext(0), CoreContext(1), CoreContext(2), CoreContext(3)};
          public:
            Scheduler(const DeviceState &state);
            /**
             * @brief Checks all cores and migrates the calling thread to the core where the calling thread should be scheduled the earliest
             * @return A reference to the CoreContext of the core which the calling thread is running on after load balancing
             * @note This doesn't insert the thread into the migrated process's queue after load-balancing
             */
            CoreContext& LoadBalance();
            /**
             * @brief Inserts the calling thread into the scheduler queue at the appropriate location based on it's priority
             * @param loadBalance If to load balance or use the thread's current core (KThread::coreId)
             */
            void InsertThread(bool loadBalance = true);
            /**
             * @brief Wait for the current thread to be scheduled on it's resident core
             * @note There is an assumption of the thread being on it's resident core queue, if it's not this'll never return
             */
            void WaitSchedule();
            /**
             * @brief Rotates the calling thread's resident core queue, if it is at the front of it
             */
            void Rotate();
            /**
             * @brief Removes the calling thread from it's resident core queue
             */
            void RemoveThread();
        };
    }
 }
--- a/app/src/main/cpp/skyline/kernel/svc.cpp
+++ b/app/src/main/cpp/skyline/kernel/svc.cpp
@ -281,20 +281,27 @@ namespace skyline::kernel::svc {
    }
    void SleepThread(const DeviceState &state) {
        constexpr i64 yieldWithoutCoreMigration{0};
        constexpr i64 yieldWithCoreMigration{-1};
        constexpr i64 yieldToAnyThread{-2};
        i64 in{static_cast<i64>(state.ctx->gpr.x0)};
-        switch (in) {
+        if (in > 0) {
            case 0:
            case -1:
            case -2:
                state.logger->Debug("svcSleepThread: Yielding thread: {}", in);
                break;
            default:
            state.logger->Debug("svcSleepThread: Thread sleeping for {} ns", in);
            struct timespec spec{
-                    .tv_sec = static_cast<time_t>(state.ctx->gpr.x0 / 1000000000),
+                .tv_sec = static_cast<time_t>(in / 1000000000),
-                    .tv_nsec = static_cast<long>(state.ctx->gpr.x0 % 1000000000)
+                .tv_nsec = static_cast<long>(in % 1000000000),
            };
            state.scheduler->Rotate();
            nanosleep(&spec, nullptr);
            state.scheduler->WaitSchedule();
        } else if (in == yieldWithoutCoreMigration || in == yieldWithCoreMigration || in == yieldToAnyThread) {
            // Core Migration doesn't affect us as threads schedule and load balance themselves
            state.logger->Debug("svcSleepThread: Yielding thread ({})", in);
            state.scheduler->Rotate();
            state.scheduler->WaitSchedule();
        }
    }
--- a/app/src/main/cpp/skyline/kernel/types/KThread.cpp
+++ b/app/src/main/cpp/skyline/kernel/types/KThread.cpp
@ -1,8 +1,8 @@
 // SPDX-License-Identifier: MPL-2.0
 // Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
 #include <cxxabi.h>
 #include <unistd.h>
 #include <android/log.h>
 #include <common/signal.h>
 #include <nce.h>
 #include <os.h>
@ -39,6 +39,8 @@ namespace skyline::kernel::type {
        state.thread = shared_from_this();
        if (setjmp(originalCtx)) { // Returns 1 if it's returning from guest, 0 otherwise
            state.scheduler->RemoveThread();
            running = false;
            Signal();
@ -52,6 +54,10 @@ namespace skyline::kernel::type {
        signal::SetSignalHandler({SIGINT, SIGILL, SIGTRAP, SIGBUS, SIGFPE, SIGSEGV}, nce::NCE::SignalHandler);
        try {
            state.scheduler->InsertThread();
            state.scheduler->WaitSchedule();
            asm volatile(
            "MRS X0, TPIDR_EL0\n\t"
            "MSR TPIDR_EL0, %x0\n\t" // Set TLS to ThreadContext
@ -131,6 +137,25 @@ namespace skyline::kernel::type {
            );
            __builtin_unreachable();
        } catch (const std::exception &e) {
            state.logger->Error(e.what());
            if (id) {
                signal::BlockSignal({SIGINT});
                state.process->Kill(false);
            }
            abi::__cxa_end_catch();
            std::longjmp(originalCtx, true);
        } catch (const signal::SignalException &e) {
            if (e.signal != SIGINT) {
                state.logger->Error(e.what());
                if (id) {
                    signal::BlockSignal({SIGINT});
                    state.process->Kill(false);
                }
            }
            abi::__cxa_end_catch();
            std::longjmp(originalCtx, true);
        }
    }
    void KThread::Start(bool self) {
--- a/app/src/main/cpp/skyline/kernel/types/KThread.h
+++ b/app/src/main/cpp/skyline/kernel/types/KThread.h
@ -42,6 +42,8 @@ namespace skyline {
            i8 idealCore; //!< The ideal CPU core for this thread to run on
            i8 coreId; //!< The CPU core on which this thread is running
            CoreMask affinityMask{}; //!< A mask of CPU cores this thread is allowed to run on
            u64 timesliceStart{}; //!< Start of the scheduler timeslice
            u64 averageTimeslice{}; //!< A weighted average of the timeslice duration for this thread
            KThread(const DeviceState &state, KHandle handle, KProcess *parent, size_t id, void *entry, u64 argument, void *stackTop, i8 priority, i8 idealCore);
--- a/app/src/main/cpp/skyline/os.h
+++ b/app/src/main/cpp/skyline/os.h
@ -29,14 +29,5 @@ namespace skyline::kernel {
         * @param romType The type of the ROM file
         */
        void Execute(int romFd, loader::RomFormat romType);
        /**
         * @brief Creates a new process
         * @param entry The entry point for the new process
         * @param argument The argument for the initial function
         * @param stackSize The size of the main stack
         * @return An instance of the KProcess of the created process
         */
        std::shared_ptr<type::KProcess> CreateProcess(u64 entry, u64 argument, size_t stackSize);
    };
 }