Implement Cooperative Scheduling With Load Balancing

This commit is contained in:
◱ PixelyIon 2020-12-05 23:11:52 +05:30 committed by ◱ Mark
parent 8564edcb16
commit cf000f5750
10 changed files with 321 additions and 102 deletions

View File

@ -41,6 +41,7 @@ add_library(skyline SHARED
${source_DIR}/skyline/jvm.cpp ${source_DIR}/skyline/jvm.cpp
${source_DIR}/skyline/os.cpp ${source_DIR}/skyline/os.cpp
${source_DIR}/skyline/kernel/memory.cpp ${source_DIR}/skyline/kernel/memory.cpp
${source_DIR}/skyline/kernel/scheduler.cpp
${source_DIR}/skyline/kernel/ipc.cpp ${source_DIR}/skyline/kernel/ipc.cpp
${source_DIR}/skyline/kernel/svc.cpp ${source_DIR}/skyline/kernel/svc.cpp
${source_DIR}/skyline/kernel/types/KProcess.cpp ${source_DIR}/skyline/kernel/types/KProcess.cpp

View File

@ -5,9 +5,10 @@
#include <pthread.h> #include <pthread.h>
#include <unistd.h> #include <unistd.h>
#include <android/log.h> #include <android/log.h>
#include "skyline/loader/loader.h"
#include "skyline/common.h" #include "skyline/common.h"
#include "skyline/common/signal.h"
#include "skyline/common/settings.h" #include "skyline/common/settings.h"
#include "skyline/loader/loader.h"
#include "skyline/os.h" #include "skyline/os.h"
#include "skyline/jvm.h" #include "skyline/jvm.h"
#include "skyline/gpu.h" #include "skyline/gpu.h"
@ -50,6 +51,8 @@ extern "C" JNIEXPORT void Java_emu_skyline_EmulationActivity_executeApplication(
os->Execute(romFd, static_cast<skyline::loader::RomFormat>(romType)); os->Execute(romFd, static_cast<skyline::loader::RomFormat>(romType));
} catch (std::exception &e) { } catch (std::exception &e) {
logger->Error(e.what()); logger->Error(e.what());
} catch (const skyline::signal::SignalException &e) {
logger->Error(e.what());
} catch (...) { } catch (...) {
logger->Error("An unknown exception has occurred"); logger->Error("An unknown exception has occurred");
} }

View File

@ -62,6 +62,7 @@ namespace skyline {
gpu = std::make_shared<gpu::GPU>(*this); gpu = std::make_shared<gpu::GPU>(*this);
audio = std::make_shared<audio::Audio>(*this); audio = std::make_shared<audio::Audio>(*this);
nce = std::make_shared<nce::NCE>(*this); nce = std::make_shared<nce::NCE>(*this);
scheduler = std::make_shared<kernel::Scheduler>(*this);
input = std::make_shared<input::Input>(*this); input = std::make_shared<input::Input>(*this);
} }
} }

View File

@ -480,6 +480,7 @@ namespace skyline {
class KProcess; class KProcess;
class KThread; class KThread;
} }
class Scheduler;
class OS; class OS;
} }
namespace audio { namespace audio {
@ -506,6 +507,7 @@ namespace skyline {
std::shared_ptr<gpu::GPU> gpu; std::shared_ptr<gpu::GPU> gpu;
std::shared_ptr<audio::Audio> audio; std::shared_ptr<audio::Audio> audio;
std::shared_ptr<nce::NCE> nce; std::shared_ptr<nce::NCE> nce;
std::shared_ptr<kernel::Scheduler> scheduler;
std::shared_ptr<kernel::type::KProcess> process; std::shared_ptr<kernel::type::KProcess> process;
static thread_local inline std::shared_ptr<kernel::type::KThread> thread{}; //!< The KThread of the thread which accesses this object static thread_local inline std::shared_ptr<kernel::type::KThread> thread{}; //!< The KThread of the thread which accesses this object
static thread_local inline nce::ThreadContext *ctx{}; //!< The context of the guest thread for the corresponding host thread static thread_local inline nce::ThreadContext *ctx{}; //!< The context of the guest thread for the corresponding host thread

View File

@ -0,0 +1,134 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <common/signal.h>
#include "types/KThread.h"
#include "scheduler.h"
namespace skyline::kernel {
Scheduler::CoreContext::CoreContext(u8 id) : id(id) {}
Scheduler::Scheduler(const DeviceState &state) : state(state) {}
Scheduler::CoreContext &Scheduler::LoadBalance() {
auto &thread{state.thread};
auto currentCore{&cores.at(thread->coreId)};
if (!currentCore->queue.empty() && thread->affinityMask.count() != 1) {
// Select core where the current thread will be scheduled the earliest based off average timeslice durations for resident threads
// There's a preference for the current core as migration isn't free
size_t minTimeslice{};
CoreContext *optimalCore{};
for (auto &candidateCore : cores) {
if (thread->affinityMask.test(candidateCore.id)) {
u64 timeslice{};
if (!candidateCore.queue.empty()) {
std::shared_lock lock(candidateCore.mutex);
auto threadIterator{candidateCore.queue.cbegin()};
if (threadIterator != candidateCore.queue.cend()) {
const auto &runningThread{*threadIterator};
timeslice += runningThread->averageTimeslice ? std::min(runningThread->averageTimeslice - (util::GetTimeTicks() - runningThread->timesliceStart), 1UL) : runningThread->timesliceStart ? util::GetTimeTicks() - runningThread->timesliceStart : 1UL;
while (++threadIterator != candidateCore.queue.cend()) {
const auto &residentThread{*threadIterator};
if (residentThread->priority <= thread->priority)
timeslice += residentThread->averageTimeslice ? residentThread->averageTimeslice : 1UL;
}
}
}
if (!optimalCore || timeslice < minTimeslice || (timeslice == minTimeslice && &candidateCore == currentCore)) {
optimalCore = &candidateCore;
minTimeslice = timeslice;
}
}
}
if (optimalCore != currentCore) {
std::unique_lock lock(currentCore->mutex);
currentCore->queue.erase(std::remove(currentCore->queue.begin(), currentCore->queue.end(), thread), currentCore->queue.end());
currentCore->mutateCondition.notify_all();
thread->coreId = optimalCore->id;
state.logger->Debug("Load Balancing for #{}: C{} -> C{}", thread->id, currentCore->id, optimalCore->id);
} else {
state.logger->Debug("Load Balancing for #{}: C{} (Late)", thread->id, currentCore->id);
}
return *optimalCore;
}
state.logger->Debug("Load Balancing for #{}: C{} (Early)", thread->id, currentCore->id);
return *currentCore;
}
void Scheduler::InsertThread(bool loadBalance) {
auto &thread{state.thread};
auto &core{loadBalance ? LoadBalance() : cores.at(thread->coreId)};
std::unique_lock lock(core.mutex);
auto nextThread{std::find_if(core.queue.begin(), core.queue.end(), [&](const std::shared_ptr<type::KThread> &it) { return it->priority > thread->priority; })};
if (nextThread == core.queue.begin() && nextThread != core.queue.end()) {
throw exception("Migration Interrupt Required");
} else {
core.queue.insert(nextThread, thread);
}
core.mutateCondition.notify_all();
}
void Scheduler::WaitSchedule() {
auto &thread{state.thread};
auto *core{&cores.at(thread->coreId)};
std::shared_lock lock(core->mutex);
if (thread->affinityMask.count() > 1) {
std::chrono::milliseconds loadBalanceThreshold{1}; //!< The amount of time that needs to pass unscheduled for a thread to attempt load balancing
while (!core->mutateCondition.wait_for(lock, loadBalanceThreshold, [&]() { return core->queue.front() == thread; })) {
lock.unlock();
LoadBalance();
if (thread->coreId == core->id) {
lock.lock();
} else {
InsertThread(false);
core = &cores.at(thread->coreId);
lock = std::shared_lock(core->mutex);
}
loadBalanceThreshold *= 2; // We double the duration required for future load balancing for this invocation to minimize pointless load balancing
}
} else {
core->mutateCondition.wait(lock, [&]() { return core->queue.front() == thread; });
}
thread->timesliceStart = util::GetTimeTicks();
}
void Scheduler::Rotate() {
auto &thread{state.thread};
auto &core{cores.at(thread->coreId)};
std::unique_lock lock(core.mutex);
if (core.queue.front() == thread) {
thread->averageTimeslice = (thread->averageTimeslice / 4) + (3 * (util::GetTimeTicks() - thread->timesliceStart / 4)); // 0.25 * old timeslice duration + 0.75 * current timeslice duration
core.queue.pop_front();
core.queue.push_back(thread);
core.mutateCondition.notify_all();
}
}
void Scheduler::RemoveThread() {
auto &thread{state.thread};
auto &core{cores.at(thread->coreId)};
std::unique_lock lock(core.mutex);
core.queue.erase(std::remove(core.queue.begin(), core.queue.end(), thread), core.queue.end());
}
}

View File

@ -4,6 +4,7 @@
#pragma once #pragma once
#include <common.h> #include <common.h>
#include <condition_variable>
namespace skyline { namespace skyline {
namespace constant { namespace constant {
@ -32,5 +33,57 @@ namespace skyline {
return (value >= min) && (value <= max); return (value >= min) && (value <= max);
} }
}; };
/*
* @brief The Scheduler is responsible for determining which threads should run on which virtual cores and when they should be scheduled
* @note We tend to stray a lot from HOS in our scheduler design as we've designed it around our 1 host thread per guest thread which leads to scheduling from the perspective of threads while the HOS scheduler deals with scheduling from the perspective of cores, not doing this would lead to missing out on key optimizations and serialization of scheduling
*/
class Scheduler {
private:
const DeviceState &state;
struct CoreContext {
u8 id;
std::shared_mutex mutex; //!< Synchronizes all operations on the queue
std::condition_variable_any mutateCondition; //!< A conditional variable which is signalled on every mutation of the queue
std::deque<std::shared_ptr<type::KThread>> queue; //!< A queue of threads which are running or to be run on this core
explicit CoreContext(u8 id);
};
std::array<CoreContext, constant::CoreCount> cores{CoreContext(0), CoreContext(1), CoreContext(2), CoreContext(3)};
public:
Scheduler(const DeviceState &state);
/**
* @brief Checks all cores and migrates the calling thread to the core where the calling thread should be scheduled the earliest
* @return A reference to the CoreContext of the core which the calling thread is running on after load balancing
* @note This doesn't insert the thread into the migrated process's queue after load-balancing
*/
CoreContext& LoadBalance();
/**
* @brief Inserts the calling thread into the scheduler queue at the appropriate location based on it's priority
* @param loadBalance If to load balance or use the thread's current core (KThread::coreId)
*/
void InsertThread(bool loadBalance = true);
/**
* @brief Wait for the current thread to be scheduled on it's resident core
* @note There is an assumption of the thread being on it's resident core queue, if it's not this'll never return
*/
void WaitSchedule();
/**
* @brief Rotates the calling thread's resident core queue, if it is at the front of it
*/
void Rotate();
/**
* @brief Removes the calling thread from it's resident core queue
*/
void RemoveThread();
};
} }
} }

View File

@ -281,20 +281,27 @@ namespace skyline::kernel::svc {
} }
void SleepThread(const DeviceState &state) { void SleepThread(const DeviceState &state) {
constexpr i64 yieldWithoutCoreMigration{0};
constexpr i64 yieldWithCoreMigration{-1};
constexpr i64 yieldToAnyThread{-2};
i64 in{static_cast<i64>(state.ctx->gpr.x0)}; i64 in{static_cast<i64>(state.ctx->gpr.x0)};
switch (in) { if (in > 0) {
case 0:
case -1:
case -2:
state.logger->Debug("svcSleepThread: Yielding thread: {}", in);
break;
default:
state.logger->Debug("svcSleepThread: Thread sleeping for {} ns", in); state.logger->Debug("svcSleepThread: Thread sleeping for {} ns", in);
struct timespec spec{ struct timespec spec{
.tv_sec = static_cast<time_t>(state.ctx->gpr.x0 / 1000000000), .tv_sec = static_cast<time_t>(in / 1000000000),
.tv_nsec = static_cast<long>(state.ctx->gpr.x0 % 1000000000) .tv_nsec = static_cast<long>(in % 1000000000),
}; };
state.scheduler->Rotate();
nanosleep(&spec, nullptr); nanosleep(&spec, nullptr);
state.scheduler->WaitSchedule();
} else if (in == yieldWithoutCoreMigration || in == yieldWithCoreMigration || in == yieldToAnyThread) {
// Core Migration doesn't affect us as threads schedule and load balance themselves
state.logger->Debug("svcSleepThread: Yielding thread ({})", in);
state.scheduler->Rotate();
state.scheduler->WaitSchedule();
} }
} }

View File

@ -1,8 +1,8 @@
// SPDX-License-Identifier: MPL-2.0 // SPDX-License-Identifier: MPL-2.0
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/) // Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <cxxabi.h>
#include <unistd.h> #include <unistd.h>
#include <android/log.h>
#include <common/signal.h> #include <common/signal.h>
#include <nce.h> #include <nce.h>
#include <os.h> #include <os.h>
@ -39,6 +39,8 @@ namespace skyline::kernel::type {
state.thread = shared_from_this(); state.thread = shared_from_this();
if (setjmp(originalCtx)) { // Returns 1 if it's returning from guest, 0 otherwise if (setjmp(originalCtx)) { // Returns 1 if it's returning from guest, 0 otherwise
state.scheduler->RemoveThread();
running = false; running = false;
Signal(); Signal();
@ -52,6 +54,10 @@ namespace skyline::kernel::type {
signal::SetSignalHandler({SIGINT, SIGILL, SIGTRAP, SIGBUS, SIGFPE, SIGSEGV}, nce::NCE::SignalHandler); signal::SetSignalHandler({SIGINT, SIGILL, SIGTRAP, SIGBUS, SIGFPE, SIGSEGV}, nce::NCE::SignalHandler);
try {
state.scheduler->InsertThread();
state.scheduler->WaitSchedule();
asm volatile( asm volatile(
"MRS X0, TPIDR_EL0\n\t" "MRS X0, TPIDR_EL0\n\t"
"MSR TPIDR_EL0, %x0\n\t" // Set TLS to ThreadContext "MSR TPIDR_EL0, %x0\n\t" // Set TLS to ThreadContext
@ -131,6 +137,25 @@ namespace skyline::kernel::type {
); );
__builtin_unreachable(); __builtin_unreachable();
} catch (const std::exception &e) {
state.logger->Error(e.what());
if (id) {
signal::BlockSignal({SIGINT});
state.process->Kill(false);
}
abi::__cxa_end_catch();
std::longjmp(originalCtx, true);
} catch (const signal::SignalException &e) {
if (e.signal != SIGINT) {
state.logger->Error(e.what());
if (id) {
signal::BlockSignal({SIGINT});
state.process->Kill(false);
}
}
abi::__cxa_end_catch();
std::longjmp(originalCtx, true);
}
} }
void KThread::Start(bool self) { void KThread::Start(bool self) {

View File

@ -42,6 +42,8 @@ namespace skyline {
i8 idealCore; //!< The ideal CPU core for this thread to run on i8 idealCore; //!< The ideal CPU core for this thread to run on
i8 coreId; //!< The CPU core on which this thread is running i8 coreId; //!< The CPU core on which this thread is running
CoreMask affinityMask{}; //!< A mask of CPU cores this thread is allowed to run on CoreMask affinityMask{}; //!< A mask of CPU cores this thread is allowed to run on
u64 timesliceStart{}; //!< Start of the scheduler timeslice
u64 averageTimeslice{}; //!< A weighted average of the timeslice duration for this thread
KThread(const DeviceState &state, KHandle handle, KProcess *parent, size_t id, void *entry, u64 argument, void *stackTop, i8 priority, i8 idealCore); KThread(const DeviceState &state, KHandle handle, KProcess *parent, size_t id, void *entry, u64 argument, void *stackTop, i8 priority, i8 idealCore);

View File

@ -29,14 +29,5 @@ namespace skyline::kernel {
* @param romType The type of the ROM file * @param romType The type of the ROM file
*/ */
void Execute(int romFd, loader::RomFormat romType); void Execute(int romFd, loader::RomFormat romType);
/**
* @brief Creates a new process
* @param entry The entry point for the new process
* @param argument The argument for the initial function
* @param stackSize The size of the main stack
* @return An instance of the KProcess of the created process
*/
std::shared_ptr<type::KProcess> CreateProcess(u64 entry, u64 argument, size_t stackSize);
}; };
} }