diff --git a/app/CMakeLists.txt b/app/CMakeLists.txt index 25760fd4..6f834984 100644 --- a/app/CMakeLists.txt +++ b/app/CMakeLists.txt @@ -41,6 +41,7 @@ add_library(skyline SHARED ${source_DIR}/skyline/jvm.cpp ${source_DIR}/skyline/os.cpp ${source_DIR}/skyline/kernel/memory.cpp + ${source_DIR}/skyline/kernel/scheduler.cpp ${source_DIR}/skyline/kernel/ipc.cpp ${source_DIR}/skyline/kernel/svc.cpp ${source_DIR}/skyline/kernel/types/KProcess.cpp diff --git a/app/src/main/cpp/emu_jni.cpp b/app/src/main/cpp/emu_jni.cpp index dfb42920..5e08f723 100644 --- a/app/src/main/cpp/emu_jni.cpp +++ b/app/src/main/cpp/emu_jni.cpp @@ -5,9 +5,10 @@ #include #include #include -#include "skyline/loader/loader.h" #include "skyline/common.h" +#include "skyline/common/signal.h" #include "skyline/common/settings.h" +#include "skyline/loader/loader.h" #include "skyline/os.h" #include "skyline/jvm.h" #include "skyline/gpu.h" @@ -50,6 +51,8 @@ extern "C" JNIEXPORT void Java_emu_skyline_EmulationActivity_executeApplication( os->Execute(romFd, static_cast(romType)); } catch (std::exception &e) { logger->Error(e.what()); + } catch (const skyline::signal::SignalException &e) { + logger->Error(e.what()); } catch (...) { logger->Error("An unknown exception has occurred"); } diff --git a/app/src/main/cpp/skyline/common.cpp b/app/src/main/cpp/skyline/common.cpp index 9227c1a2..800084f8 100644 --- a/app/src/main/cpp/skyline/common.cpp +++ b/app/src/main/cpp/skyline/common.cpp @@ -62,6 +62,7 @@ namespace skyline { gpu = std::make_shared(*this); audio = std::make_shared(*this); nce = std::make_shared(*this); + scheduler = std::make_shared(*this); input = std::make_shared(*this); } } diff --git a/app/src/main/cpp/skyline/common.h b/app/src/main/cpp/skyline/common.h index 10fb2733..18c47835 100644 --- a/app/src/main/cpp/skyline/common.h +++ b/app/src/main/cpp/skyline/common.h @@ -480,6 +480,7 @@ namespace skyline { class KProcess; class KThread; } + class Scheduler; class OS; } namespace audio { @@ -506,6 +507,7 @@ namespace skyline { std::shared_ptr gpu; std::shared_ptr audio; std::shared_ptr nce; + std::shared_ptr scheduler; std::shared_ptr process; static thread_local inline std::shared_ptr thread{}; //!< The KThread of the thread which accesses this object static thread_local inline nce::ThreadContext *ctx{}; //!< The context of the guest thread for the corresponding host thread diff --git a/app/src/main/cpp/skyline/kernel/scheduler.cpp b/app/src/main/cpp/skyline/kernel/scheduler.cpp new file mode 100644 index 00000000..e1514dd8 --- /dev/null +++ b/app/src/main/cpp/skyline/kernel/scheduler.cpp @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/) + +#include +#include "types/KThread.h" +#include "scheduler.h" + +namespace skyline::kernel { + Scheduler::CoreContext::CoreContext(u8 id) : id(id) {} + + Scheduler::Scheduler(const DeviceState &state) : state(state) {} + + Scheduler::CoreContext &Scheduler::LoadBalance() { + auto &thread{state.thread}; + auto currentCore{&cores.at(thread->coreId)}; + + if (!currentCore->queue.empty() && thread->affinityMask.count() != 1) { + // Select core where the current thread will be scheduled the earliest based off average timeslice durations for resident threads + // There's a preference for the current core as migration isn't free + + size_t minTimeslice{}; + CoreContext *optimalCore{}; + for (auto &candidateCore : cores) { + if (thread->affinityMask.test(candidateCore.id)) { + u64 timeslice{}; + + if (!candidateCore.queue.empty()) { + std::shared_lock lock(candidateCore.mutex); + + auto threadIterator{candidateCore.queue.cbegin()}; + if (threadIterator != candidateCore.queue.cend()) { + const auto &runningThread{*threadIterator}; + timeslice += runningThread->averageTimeslice ? std::min(runningThread->averageTimeslice - (util::GetTimeTicks() - runningThread->timesliceStart), 1UL) : runningThread->timesliceStart ? util::GetTimeTicks() - runningThread->timesliceStart : 1UL; + + while (++threadIterator != candidateCore.queue.cend()) { + const auto &residentThread{*threadIterator}; + if (residentThread->priority <= thread->priority) + timeslice += residentThread->averageTimeslice ? residentThread->averageTimeslice : 1UL; + } + } + } + + if (!optimalCore || timeslice < minTimeslice || (timeslice == minTimeslice && &candidateCore == currentCore)) { + optimalCore = &candidateCore; + minTimeslice = timeslice; + } + } + } + + if (optimalCore != currentCore) { + std::unique_lock lock(currentCore->mutex); + currentCore->queue.erase(std::remove(currentCore->queue.begin(), currentCore->queue.end(), thread), currentCore->queue.end()); + currentCore->mutateCondition.notify_all(); + + thread->coreId = optimalCore->id; + + state.logger->Debug("Load Balancing for #{}: C{} -> C{}", thread->id, currentCore->id, optimalCore->id); + } else { + state.logger->Debug("Load Balancing for #{}: C{} (Late)", thread->id, currentCore->id); + } + + return *optimalCore; + } + + state.logger->Debug("Load Balancing for #{}: C{} (Early)", thread->id, currentCore->id); + + return *currentCore; + } + + void Scheduler::InsertThread(bool loadBalance) { + auto &thread{state.thread}; + auto &core{loadBalance ? LoadBalance() : cores.at(thread->coreId)}; + + std::unique_lock lock(core.mutex); + auto nextThread{std::find_if(core.queue.begin(), core.queue.end(), [&](const std::shared_ptr &it) { return it->priority > thread->priority; })}; + if (nextThread == core.queue.begin() && nextThread != core.queue.end()) { + throw exception("Migration Interrupt Required"); + } else { + core.queue.insert(nextThread, thread); + } + + core.mutateCondition.notify_all(); + } + + void Scheduler::WaitSchedule() { + auto &thread{state.thread}; + auto *core{&cores.at(thread->coreId)}; + + std::shared_lock lock(core->mutex); + if (thread->affinityMask.count() > 1) { + std::chrono::milliseconds loadBalanceThreshold{1}; //!< The amount of time that needs to pass unscheduled for a thread to attempt load balancing + while (!core->mutateCondition.wait_for(lock, loadBalanceThreshold, [&]() { return core->queue.front() == thread; })) { + lock.unlock(); + LoadBalance(); + if (thread->coreId == core->id) { + lock.lock(); + } else { + InsertThread(false); + core = &cores.at(thread->coreId); + lock = std::shared_lock(core->mutex); + } + + loadBalanceThreshold *= 2; // We double the duration required for future load balancing for this invocation to minimize pointless load balancing + } + } else { + core->mutateCondition.wait(lock, [&]() { return core->queue.front() == thread; }); + } + + thread->timesliceStart = util::GetTimeTicks(); + } + + void Scheduler::Rotate() { + auto &thread{state.thread}; + auto &core{cores.at(thread->coreId)}; + + std::unique_lock lock(core.mutex); + if (core.queue.front() == thread) { + thread->averageTimeslice = (thread->averageTimeslice / 4) + (3 * (util::GetTimeTicks() - thread->timesliceStart / 4)); // 0.25 * old timeslice duration + 0.75 * current timeslice duration + + core.queue.pop_front(); + core.queue.push_back(thread); + + core.mutateCondition.notify_all(); + } + } + + void Scheduler::RemoveThread() { + auto &thread{state.thread}; + auto &core{cores.at(thread->coreId)}; + + std::unique_lock lock(core.mutex); + core.queue.erase(std::remove(core.queue.begin(), core.queue.end(), thread), core.queue.end()); + } +} diff --git a/app/src/main/cpp/skyline/kernel/scheduler.h b/app/src/main/cpp/skyline/kernel/scheduler.h index 6ad63db6..5996a61d 100644 --- a/app/src/main/cpp/skyline/kernel/scheduler.h +++ b/app/src/main/cpp/skyline/kernel/scheduler.h @@ -4,6 +4,7 @@ #pragma once #include +#include namespace skyline { namespace constant { @@ -32,5 +33,57 @@ namespace skyline { return (value >= min) && (value <= max); } }; + + /* + * @brief The Scheduler is responsible for determining which threads should run on which virtual cores and when they should be scheduled + * @note We tend to stray a lot from HOS in our scheduler design as we've designed it around our 1 host thread per guest thread which leads to scheduling from the perspective of threads while the HOS scheduler deals with scheduling from the perspective of cores, not doing this would lead to missing out on key optimizations and serialization of scheduling + */ + class Scheduler { + private: + const DeviceState &state; + + struct CoreContext { + u8 id; + std::shared_mutex mutex; //!< Synchronizes all operations on the queue + std::condition_variable_any mutateCondition; //!< A conditional variable which is signalled on every mutation of the queue + std::deque> queue; //!< A queue of threads which are running or to be run on this core + + explicit CoreContext(u8 id); + }; + + std::array cores{CoreContext(0), CoreContext(1), CoreContext(2), CoreContext(3)}; + + public: + Scheduler(const DeviceState &state); + + /** + * @brief Checks all cores and migrates the calling thread to the core where the calling thread should be scheduled the earliest + * @return A reference to the CoreContext of the core which the calling thread is running on after load balancing + * @note This doesn't insert the thread into the migrated process's queue after load-balancing + */ + CoreContext& LoadBalance(); + + /** + * @brief Inserts the calling thread into the scheduler queue at the appropriate location based on it's priority + * @param loadBalance If to load balance or use the thread's current core (KThread::coreId) + */ + void InsertThread(bool loadBalance = true); + + /** + * @brief Wait for the current thread to be scheduled on it's resident core + * @note There is an assumption of the thread being on it's resident core queue, if it's not this'll never return + */ + void WaitSchedule(); + + /** + * @brief Rotates the calling thread's resident core queue, if it is at the front of it + */ + void Rotate(); + + /** + * @brief Removes the calling thread from it's resident core queue + */ + void RemoveThread(); + }; } } diff --git a/app/src/main/cpp/skyline/kernel/svc.cpp b/app/src/main/cpp/skyline/kernel/svc.cpp index 00784f93..b5619416 100644 --- a/app/src/main/cpp/skyline/kernel/svc.cpp +++ b/app/src/main/cpp/skyline/kernel/svc.cpp @@ -281,20 +281,27 @@ namespace skyline::kernel::svc { } void SleepThread(const DeviceState &state) { + constexpr i64 yieldWithoutCoreMigration{0}; + constexpr i64 yieldWithCoreMigration{-1}; + constexpr i64 yieldToAnyThread{-2}; + i64 in{static_cast(state.ctx->gpr.x0)}; - switch (in) { - case 0: - case -1: - case -2: - state.logger->Debug("svcSleepThread: Yielding thread: {}", in); - break; - default: - state.logger->Debug("svcSleepThread: Thread sleeping for {} ns", in); - struct timespec spec{ - .tv_sec = static_cast(state.ctx->gpr.x0 / 1000000000), - .tv_nsec = static_cast(state.ctx->gpr.x0 % 1000000000) - }; - nanosleep(&spec, nullptr); + if (in > 0) { + state.logger->Debug("svcSleepThread: Thread sleeping for {} ns", in); + + struct timespec spec{ + .tv_sec = static_cast(in / 1000000000), + .tv_nsec = static_cast(in % 1000000000), + }; + + state.scheduler->Rotate(); + nanosleep(&spec, nullptr); + state.scheduler->WaitSchedule(); + } else if (in == yieldWithoutCoreMigration || in == yieldWithCoreMigration || in == yieldToAnyThread) { + // Core Migration doesn't affect us as threads schedule and load balance themselves + state.logger->Debug("svcSleepThread: Yielding thread ({})", in); + state.scheduler->Rotate(); + state.scheduler->WaitSchedule(); } } diff --git a/app/src/main/cpp/skyline/kernel/types/KThread.cpp b/app/src/main/cpp/skyline/kernel/types/KThread.cpp index b2845214..5bda2adf 100644 --- a/app/src/main/cpp/skyline/kernel/types/KThread.cpp +++ b/app/src/main/cpp/skyline/kernel/types/KThread.cpp @@ -1,8 +1,8 @@ // SPDX-License-Identifier: MPL-2.0 // Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/) +#include #include -#include #include #include #include @@ -39,6 +39,8 @@ namespace skyline::kernel::type { state.thread = shared_from_this(); if (setjmp(originalCtx)) { // Returns 1 if it's returning from guest, 0 otherwise + state.scheduler->RemoveThread(); + running = false; Signal(); @@ -52,85 +54,108 @@ namespace skyline::kernel::type { signal::SetSignalHandler({SIGINT, SIGILL, SIGTRAP, SIGBUS, SIGFPE, SIGSEGV}, nce::NCE::SignalHandler); - asm volatile( - "MRS X0, TPIDR_EL0\n\t" - "MSR TPIDR_EL0, %x0\n\t" // Set TLS to ThreadContext - "STR X0, [%x0, #0x2A0]\n\t" // Write ThreadContext::hostTpidrEl0 - "MOV X0, SP\n\t" - "STR X0, [%x0, #0x2A8]\n\t" // Write ThreadContext::hostSp - "MOV SP, %x1\n\t" // Replace SP with guest stack - "MOV LR, %x2\n\t" // Store entry in Link Register so it is jumped to on return - "MOV X0, %x3\n\t" // Store the argument in X0 - "MOV X1, %x4\n\t" // Store the thread handle in X1, NCA applications require this - "MOV X2, XZR\n\t" // Zero out other GP and SIMD registers, not doing this will break applications - "MOV X3, XZR\n\t" - "MOV X4, XZR\n\t" - "MOV X5, XZR\n\t" - "MOV X6, XZR\n\t" - "MOV X7, XZR\n\t" - "MOV X8, XZR\n\t" - "MOV X9, XZR\n\t" - "MOV X10, XZR\n\t" - "MOV X11, XZR\n\t" - "MOV X12, XZR\n\t" - "MOV X13, XZR\n\t" - "MOV X14, XZR\n\t" - "MOV X15, XZR\n\t" - "MOV X16, XZR\n\t" - "MOV X17, XZR\n\t" - "MOV X18, XZR\n\t" - "MOV X19, XZR\n\t" - "MOV X20, XZR\n\t" - "MOV X21, XZR\n\t" - "MOV X22, XZR\n\t" - "MOV X23, XZR\n\t" - "MOV X24, XZR\n\t" - "MOV X25, XZR\n\t" - "MOV X26, XZR\n\t" - "MOV X27, XZR\n\t" - "MOV X28, XZR\n\t" - "MOV X29, XZR\n\t" - "MSR FPSR, XZR\n\t" - "MSR FPCR, XZR\n\t" - "DUP V0.16B, WZR\n\t" - "DUP V1.16B, WZR\n\t" - "DUP V2.16B, WZR\n\t" - "DUP V3.16B, WZR\n\t" - "DUP V4.16B, WZR\n\t" - "DUP V5.16B, WZR\n\t" - "DUP V6.16B, WZR\n\t" - "DUP V7.16B, WZR\n\t" - "DUP V8.16B, WZR\n\t" - "DUP V9.16B, WZR\n\t" - "DUP V10.16B, WZR\n\t" - "DUP V11.16B, WZR\n\t" - "DUP V12.16B, WZR\n\t" - "DUP V13.16B, WZR\n\t" - "DUP V14.16B, WZR\n\t" - "DUP V15.16B, WZR\n\t" - "DUP V16.16B, WZR\n\t" - "DUP V17.16B, WZR\n\t" - "DUP V18.16B, WZR\n\t" - "DUP V19.16B, WZR\n\t" - "DUP V20.16B, WZR\n\t" - "DUP V21.16B, WZR\n\t" - "DUP V22.16B, WZR\n\t" - "DUP V23.16B, WZR\n\t" - "DUP V24.16B, WZR\n\t" - "DUP V25.16B, WZR\n\t" - "DUP V26.16B, WZR\n\t" - "DUP V27.16B, WZR\n\t" - "DUP V28.16B, WZR\n\t" - "DUP V29.16B, WZR\n\t" - "DUP V30.16B, WZR\n\t" - "DUP V31.16B, WZR\n\t" - "RET" - : - : "r"(&ctx), "r"(stackTop), "r"(entry), "r"(entryArgument), "r"(handle) - : "x0", "x1", "lr" - ); + try { + state.scheduler->InsertThread(); + state.scheduler->WaitSchedule(); - __builtin_unreachable(); + asm volatile( + "MRS X0, TPIDR_EL0\n\t" + "MSR TPIDR_EL0, %x0\n\t" // Set TLS to ThreadContext + "STR X0, [%x0, #0x2A0]\n\t" // Write ThreadContext::hostTpidrEl0 + "MOV X0, SP\n\t" + "STR X0, [%x0, #0x2A8]\n\t" // Write ThreadContext::hostSp + "MOV SP, %x1\n\t" // Replace SP with guest stack + "MOV LR, %x2\n\t" // Store entry in Link Register so it is jumped to on return + "MOV X0, %x3\n\t" // Store the argument in X0 + "MOV X1, %x4\n\t" // Store the thread handle in X1, NCA applications require this + "MOV X2, XZR\n\t" // Zero out other GP and SIMD registers, not doing this will break applications + "MOV X3, XZR\n\t" + "MOV X4, XZR\n\t" + "MOV X5, XZR\n\t" + "MOV X6, XZR\n\t" + "MOV X7, XZR\n\t" + "MOV X8, XZR\n\t" + "MOV X9, XZR\n\t" + "MOV X10, XZR\n\t" + "MOV X11, XZR\n\t" + "MOV X12, XZR\n\t" + "MOV X13, XZR\n\t" + "MOV X14, XZR\n\t" + "MOV X15, XZR\n\t" + "MOV X16, XZR\n\t" + "MOV X17, XZR\n\t" + "MOV X18, XZR\n\t" + "MOV X19, XZR\n\t" + "MOV X20, XZR\n\t" + "MOV X21, XZR\n\t" + "MOV X22, XZR\n\t" + "MOV X23, XZR\n\t" + "MOV X24, XZR\n\t" + "MOV X25, XZR\n\t" + "MOV X26, XZR\n\t" + "MOV X27, XZR\n\t" + "MOV X28, XZR\n\t" + "MOV X29, XZR\n\t" + "MSR FPSR, XZR\n\t" + "MSR FPCR, XZR\n\t" + "DUP V0.16B, WZR\n\t" + "DUP V1.16B, WZR\n\t" + "DUP V2.16B, WZR\n\t" + "DUP V3.16B, WZR\n\t" + "DUP V4.16B, WZR\n\t" + "DUP V5.16B, WZR\n\t" + "DUP V6.16B, WZR\n\t" + "DUP V7.16B, WZR\n\t" + "DUP V8.16B, WZR\n\t" + "DUP V9.16B, WZR\n\t" + "DUP V10.16B, WZR\n\t" + "DUP V11.16B, WZR\n\t" + "DUP V12.16B, WZR\n\t" + "DUP V13.16B, WZR\n\t" + "DUP V14.16B, WZR\n\t" + "DUP V15.16B, WZR\n\t" + "DUP V16.16B, WZR\n\t" + "DUP V17.16B, WZR\n\t" + "DUP V18.16B, WZR\n\t" + "DUP V19.16B, WZR\n\t" + "DUP V20.16B, WZR\n\t" + "DUP V21.16B, WZR\n\t" + "DUP V22.16B, WZR\n\t" + "DUP V23.16B, WZR\n\t" + "DUP V24.16B, WZR\n\t" + "DUP V25.16B, WZR\n\t" + "DUP V26.16B, WZR\n\t" + "DUP V27.16B, WZR\n\t" + "DUP V28.16B, WZR\n\t" + "DUP V29.16B, WZR\n\t" + "DUP V30.16B, WZR\n\t" + "DUP V31.16B, WZR\n\t" + "RET" + : + : "r"(&ctx), "r"(stackTop), "r"(entry), "r"(entryArgument), "r"(handle) + : "x0", "x1", "lr" + ); + + __builtin_unreachable(); + } catch (const std::exception &e) { + state.logger->Error(e.what()); + if (id) { + signal::BlockSignal({SIGINT}); + state.process->Kill(false); + } + abi::__cxa_end_catch(); + std::longjmp(originalCtx, true); + } catch (const signal::SignalException &e) { + if (e.signal != SIGINT) { + state.logger->Error(e.what()); + if (id) { + signal::BlockSignal({SIGINT}); + state.process->Kill(false); + } + } + abi::__cxa_end_catch(); + std::longjmp(originalCtx, true); + } } void KThread::Start(bool self) { diff --git a/app/src/main/cpp/skyline/kernel/types/KThread.h b/app/src/main/cpp/skyline/kernel/types/KThread.h index f71fd147..2ce4f81e 100644 --- a/app/src/main/cpp/skyline/kernel/types/KThread.h +++ b/app/src/main/cpp/skyline/kernel/types/KThread.h @@ -42,6 +42,8 @@ namespace skyline { i8 idealCore; //!< The ideal CPU core for this thread to run on i8 coreId; //!< The CPU core on which this thread is running CoreMask affinityMask{}; //!< A mask of CPU cores this thread is allowed to run on + u64 timesliceStart{}; //!< Start of the scheduler timeslice + u64 averageTimeslice{}; //!< A weighted average of the timeslice duration for this thread KThread(const DeviceState &state, KHandle handle, KProcess *parent, size_t id, void *entry, u64 argument, void *stackTop, i8 priority, i8 idealCore); diff --git a/app/src/main/cpp/skyline/os.h b/app/src/main/cpp/skyline/os.h index c04ac281..ca6703d1 100644 --- a/app/src/main/cpp/skyline/os.h +++ b/app/src/main/cpp/skyline/os.h @@ -29,14 +29,5 @@ namespace skyline::kernel { * @param romType The type of the ROM file */ void Execute(int romFd, loader::RomFormat romType); - - /** - * @brief Creates a new process - * @param entry The entry point for the new process - * @param argument The argument for the initial function - * @param stackSize The size of the main stack - * @return An instance of the KProcess of the created process - */ - std::shared_ptr CreateProcess(u64 entry, u64 argument, size_t stackSize); }; }