From a16c37f0c5b2435a829fc5348c66297d9c762347 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Sat, 4 May 2024 07:05:59 +0200 Subject: [PATCH] coreinit: Rework thread creation New implementation is much closer to console behavior. For example we didn't align the stack which would cause crashes in the Miiverse applet --- src/Cafe/HW/Latte/Core/LatteThread.cpp | 2 +- src/Cafe/OS/libs/coreinit/coreinit.cpp | 10 +- src/Cafe/OS/libs/coreinit/coreinit_GHS.cpp | 4 +- src/Cafe/OS/libs/coreinit/coreinit_IPC.cpp | 2 +- src/Cafe/OS/libs/coreinit/coreinit_Thread.cpp | 315 ++++++++++++++---- src/Cafe/OS/libs/coreinit/coreinit_Thread.h | 69 ++-- src/Cafe/OS/libs/nsysnet/nsysnet.cpp | 4 +- src/Cafe/OS/libs/snd_core/ax_ist.cpp | 2 +- .../ExceptionHandler/ExceptionHandler.cpp | 2 +- .../DebugPPCThreadsWindow.cpp | 4 +- 10 files changed, 297 insertions(+), 117 deletions(-) diff --git a/src/Cafe/HW/Latte/Core/LatteThread.cpp b/src/Cafe/HW/Latte/Core/LatteThread.cpp index a23bd5be..8874ecf4 100644 --- a/src/Cafe/HW/Latte/Core/LatteThread.cpp +++ b/src/Cafe/HW/Latte/Core/LatteThread.cpp @@ -187,7 +187,7 @@ int Latte_ThreadEntry() rule.overwrite_settings.width >= 0 || rule.overwrite_settings.height >= 0 || rule.overwrite_settings.depth >= 0) { LatteGPUState.allowFramebufferSizeOptimization = false; - cemuLog_log(LogType::Force, "Graphic pack {} prevents rendertarget size optimization.", pack->GetName()); + cemuLog_log(LogType::Force, "Graphic pack \"{}\" prevents rendertarget size optimization. This warning can be ignored and is intended for graphic pack developers", pack->GetName()); break; } } diff --git a/src/Cafe/OS/libs/coreinit/coreinit.cpp b/src/Cafe/OS/libs/coreinit/coreinit.cpp index e18d0e8d..49d232f8 100644 --- a/src/Cafe/OS/libs/coreinit/coreinit.cpp +++ b/src/Cafe/OS/libs/coreinit/coreinit.cpp @@ -35,12 +35,12 @@ #include "Cafe/OS/libs/coreinit/coreinit_MEM_BlockHeap.h" #include "Cafe/OS/libs/coreinit/coreinit_MEM_ExpHeap.h" -CoreinitSharedData* gCoreinitData = NULL; +CoreinitSharedData* gCoreinitData = nullptr; sint32 ScoreStackTrace(OSThread_t* thread, MPTR sp) { - uint32 stackMinAddr = _swapEndianU32(thread->stackEnd); - uint32 stackMaxAddr = _swapEndianU32(thread->stackBase); + uint32 stackMinAddr = thread->stackEnd.GetMPTR(); + uint32 stackMaxAddr = thread->stackBase.GetMPTR(); sint32 score = 0; uint32 currentStackPtr = sp; @@ -95,8 +95,8 @@ void DebugLogStackTrace(OSThread_t* thread, MPTR sp) // print stack trace uint32 currentStackPtr = highestScoreSP; - uint32 stackMinAddr = _swapEndianU32(thread->stackEnd); - uint32 stackMaxAddr = _swapEndianU32(thread->stackBase); + uint32 stackMinAddr = thread->stackEnd.GetMPTR(); + uint32 stackMaxAddr = thread->stackBase.GetMPTR(); for (sint32 i = 0; i < 20; i++) { uint32 nextStackPtr = memory_readU32(currentStackPtr); diff --git a/src/Cafe/OS/libs/coreinit/coreinit_GHS.cpp b/src/Cafe/OS/libs/coreinit/coreinit_GHS.cpp index 5699e3e7..e2864fb9 100644 --- a/src/Cafe/OS/libs/coreinit/coreinit_GHS.cpp +++ b/src/Cafe/OS/libs/coreinit/coreinit_GHS.cpp @@ -22,7 +22,7 @@ namespace coreinit MPTR _iob_lock[GHS_FOPEN_MAX]; uint16be __gh_FOPEN_MAX; MEMPTR ghs_environ; - uint32 ghs_Errno; // exposed by __gh_errno_ptr() or via 'errno' data export + uint32 ghs_Errno; // exposed as 'errno' data export }; SysAllocator g_ghs_data; @@ -159,7 +159,7 @@ namespace coreinit void* __gh_errno_ptr() { OSThread_t* currentThread = coreinit::OSGetCurrentThread(); - return ¤tThread->context.error; + return ¤tThread->context.ghs_errno; } void* __get_eh_store_globals() diff --git a/src/Cafe/OS/libs/coreinit/coreinit_IPC.cpp b/src/Cafe/OS/libs/coreinit/coreinit_IPC.cpp index be3cb300..12d83afc 100644 --- a/src/Cafe/OS/libs/coreinit/coreinit_IPC.cpp +++ b/src/Cafe/OS/libs/coreinit/coreinit_IPC.cpp @@ -204,7 +204,7 @@ namespace coreinit // and a message queue large enough to hold the maximum number of commands (IPC_NUM_RESOURCE_BUFFERS) OSInitMessageQueue(gIPCThreadMsgQueue.GetPtr() + coreIndex, _gIPCThreadSemaphoreStorage.GetPtr() + coreIndex * IPC_NUM_RESOURCE_BUFFERS, IPC_NUM_RESOURCE_BUFFERS); OSThread_t* ipcThread = gIPCThread.GetPtr() + coreIndex; - OSCreateThreadType(ipcThread, PPCInterpreter_makeCallableExportDepr(__IPCDriverThreadFunc), 0, nullptr, _gIPCThreadStack.GetPtr() + 0x4000 * coreIndex + 0x4000, 0x4000, 15, (1 << coreIndex), OSThread_t::THREAD_TYPE::TYPE_DRIVER); + __OSCreateThreadType(ipcThread, PPCInterpreter_makeCallableExportDepr(__IPCDriverThreadFunc), 0, nullptr, _gIPCThreadStack.GetPtr() + 0x4000 * coreIndex + 0x4000, 0x4000, 15, (1 << coreIndex), OSThread_t::THREAD_TYPE::TYPE_DRIVER); sprintf((char*)_gIPCThreadNameStorage.GetPtr()+coreIndex*0x18, "{SYS IPC Core %d}", coreIndex); OSSetThreadName(ipcThread, (char*)_gIPCThreadNameStorage.GetPtr() + coreIndex * 0x18); OSResumeThread(ipcThread); diff --git a/src/Cafe/OS/libs/coreinit/coreinit_Thread.cpp b/src/Cafe/OS/libs/coreinit/coreinit_Thread.cpp index 654e57a8..533360aa 100644 --- a/src/Cafe/OS/libs/coreinit/coreinit_Thread.cpp +++ b/src/Cafe/OS/libs/coreinit/coreinit_Thread.cpp @@ -215,14 +215,171 @@ namespace coreinit hCPU->spr.LR = lr; hCPU->gpr[3] = r3; hCPU->gpr[4] = r4; - hCPU->instructionPointer = _swapEndianU32(currentThread->entrypoint); + hCPU->instructionPointer = currentThread->entrypoint.GetMPTR(); } void coreinitExport_OSExitThreadDepr(PPCInterpreter_t* hCPU); - void OSCreateThreadInternal(OSThread_t* thread, uint32 entryPoint, MPTR stackLowerBaseAddr, uint32 stackSize, uint8 affinityMask, OSThread_t::THREAD_TYPE threadType) + void __OSInitContext(OSContext_t* ctx, MEMPTR initialIP, MEMPTR initialStackPointer) + { + ctx->SetContextMagic(); + ctx->gpr[0] = 0; // r0 is left uninitialized on console? + for(auto& it : ctx->gpr) + it = 0; + ctx->gpr[1] = _swapEndianU32(initialStackPointer.GetMPTR()); + ctx->gpr[2] = _swapEndianU32(RPLLoader_GetSDA2Base()); + ctx->gpr[13] = _swapEndianU32(RPLLoader_GetSDA1Base()); + ctx->srr0 = initialIP.GetMPTR(); + ctx->cr = 0; + ctx->ukn0A8 = 0; + ctx->ukn0AC = 0; + ctx->gqr[0] = 0; + ctx->gqr[1] = 0; + ctx->gqr[2] = 0; + ctx->gqr[3] = 0; + ctx->gqr[4] = 0; + ctx->gqr[5] = 0; + ctx->gqr[6] = 0; + ctx->gqr[7] = 0; + ctx->dsi_dar = 0; + ctx->srr1 = 0x9032; + ctx->xer = 0; + ctx->dsi_dsisr = 0; + ctx->upir = 0; + ctx->boostCount = 0; + ctx->state = 0; + for(auto& it : ctx->coretime) + it = 0; + ctx->starttime = 0; + ctx->ghs_errno = 0; + ctx->upmc1 = 0; + ctx->upmc2 = 0; + ctx->upmc3 = 0; + ctx->upmc4 = 0; + ctx->ummcr0 = 0; + ctx->ummcr1 = 0; + } + + void __OSThreadInit(OSThread_t* thread, MEMPTR entrypoint, uint32 argInt, MEMPTR argPtr, MEMPTR stackTop, uint32 stackSize, sint32 priority, uint32 upirCoreIndex, OSThread_t::THREAD_TYPE threadType) + { + thread->effectivePriority = priority; + thread->type = threadType; + thread->basePriority = priority; + thread->SetThreadMagic(); + thread->id = 0x8000; + thread->waitAlarm = nullptr; + thread->entrypoint = entrypoint; + thread->quantumTicks = 0; + if(entrypoint) + { + thread->state = OSThread_t::THREAD_STATE::STATE_READY; + thread->suspendCounter = 1; + } + else + { + thread->state = OSThread_t::THREAD_STATE::STATE_NONE; + thread->suspendCounter = 0; + } + thread->exitValue = (uint32)-1; + thread->requestFlags = OSThread_t::REQUEST_FLAG_BIT::REQUEST_FLAG_NONE; + thread->pendingSuspend = 0; + thread->suspendResult = 0xFFFFFFFF; + thread->coretimeSumQuantumStart = 0; + thread->deallocatorFunc = nullptr; + thread->cleanupCallback = nullptr; + thread->waitingForFastMutex = nullptr; + thread->stateFlags = 0; + thread->waitingForMutex = nullptr; + memset(&thread->crt, 0, sizeof(thread->crt)); + static_assert(sizeof(thread->crt) == 0x1D8); + thread->tlsBlocksMPTR = 0; + thread->numAllocatedTLSBlocks = 0; + thread->tlsStatus = 0; + OSInitThreadQueueEx(&thread->joinQueue, thread); + OSInitThreadQueueEx(&thread->suspendQueue, thread); + thread->mutexQueue.ukn08 = thread; + thread->mutexQueue.ukn0C = 0; + thread->mutexQueue.tail = nullptr; + thread->mutexQueue.head = nullptr; + thread->ownedFastMutex.next = nullptr; + thread->ownedFastMutex.prev = nullptr; + thread->contendedFastMutex.next = nullptr; + thread->contendedFastMutex.prev = nullptr; + + MEMPTR alignedStackTop{MEMPTR(stackTop).GetMPTR() & 0xFFFFFFF8}; + MEMPTR alignedStackTop32{alignedStackTop}; + alignedStackTop32[-1] = 0; + alignedStackTop32[-2] = 0; + + __OSInitContext(&thread->context, MEMPTR(PPCInterpreter_makeCallableExportDepr(threadEntry)), (void*)(alignedStackTop32.GetPtr() - 2)); + thread->stackBase = stackTop; // without alignment + thread->stackEnd = ((uint8*)stackTop.GetPtr() - stackSize); + thread->context.upir = upirCoreIndex; + thread->context.lr = _swapEndianU32(PPCInterpreter_makeCallableExportDepr(coreinitExport_OSExitThreadDepr)); + thread->context.gpr[3] = _swapEndianU32(argInt); + thread->context.gpr[4] = _swapEndianU32(argPtr.GetMPTR()); + + *(uint32be*)((uint8*)stackTop.GetPtr() - stackSize) = 0xDEADBABE; + thread->alarmRelatedUkn = 0; + for(auto& it : thread->specificArray) + it = nullptr; + thread->context.fpscr.fpscr = 4; + for(sint32 i=0; i<32; i++) + { + thread->context.fp_ps0[i] = 0.0; + thread->context.fp_ps1[i] = 0.0; + } + thread->context.gqr[2] = 0x40004; + thread->context.gqr[3] = 0x50005; + thread->context.gqr[4] = 0x60006; + thread->context.gqr[5] = 0x70007; + + for(sint32 i=0; icontext.coretime[i] = 0; + + // currentRunQueue and waitQueueLink is not initialized by COS and instead overwritten without validation + // since we already have integrity checks in other functions, lets initialize it here + for(sint32 i=0; icurrentRunQueue[i] = nullptr; + thread->waitQueueLink.prev = nullptr; + thread->waitQueueLink.next = nullptr; + + thread->wakeTimeRelatedUkn2 = 0; + thread->wakeUpCount = 0; + thread->wakeUpTime = 0; + thread->wakeTimeRelatedUkn1 = 0x7FFFFFFFFFFFFFFF; + thread->quantumTicks = 0; + thread->coretimeSumQuantumStart = 0; + thread->totalCycles = 0; + + for(auto& it : thread->padding68C) + it = 0; + } + + void SetThreadAffinityToCore(OSThread_t* thread, uint32 coreIndex) + { + cemu_assert_debug(coreIndex < 3); + thread->attr &= ~(OSThread_t::ATTR_BIT::ATTR_AFFINITY_CORE0 | OSThread_t::ATTR_BIT::ATTR_AFFINITY_CORE1 | OSThread_t::ATTR_BIT::ATTR_AFFINITY_CORE2 | OSThread_t::ATTR_BIT::ATTR_UKN_010); + thread->context.affinity &= 0xFFFFFFF8; + if (coreIndex == 0) + { + thread->attr |= OSThread_t::ATTR_BIT::ATTR_AFFINITY_CORE0; + thread->context.affinity |= (1<<0); + } + else if (coreIndex == 1) + { + thread->attr |= OSThread_t::ATTR_BIT::ATTR_AFFINITY_CORE1; + thread->context.affinity |= (1<<1); + } + else // if (coreIndex == 2) + { + thread->attr |= OSThread_t::ATTR_BIT::ATTR_AFFINITY_CORE2; + thread->context.affinity |= (1<<2); + } + } + + void __OSCreateThreadOnActiveThreadWorkaround(OSThread_t* thread) { - cemu_assert_debug(thread != nullptr); // make thread struct mandatory. Caller can always use SysAllocator __OSLockScheduler(); bool isThreadStillActive = __OSIsThreadActive(thread); if (isThreadStillActive) @@ -248,84 +405,97 @@ namespace coreinit } cemu_assert_debug(__OSIsThreadActive(thread) == false); __OSUnlockScheduler(); - memset(thread, 0x00, sizeof(OSThread_t)); - // init signatures - thread->SetMagic(); - thread->type = threadType; - thread->state = (entryPoint != MPTR_NULL) ? OSThread_t::THREAD_STATE::STATE_READY : OSThread_t::THREAD_STATE::STATE_NONE; - thread->entrypoint = _swapEndianU32(entryPoint); - __OSSetThreadBasePriority(thread, 0); - __OSUpdateThreadEffectivePriority(thread); - // untested, but seems to work (Batman Arkham City uses these values to calculate the stack size for duplicated threads) - thread->stackBase = _swapEndianU32(stackLowerBaseAddr + stackSize); // these fields are quite important and lots of games rely on them being accurate (Examples: Darksiders 2, SMW3D, Batman Arkham City) - thread->stackEnd = _swapEndianU32(stackLowerBaseAddr); - // init stackpointer - thread->context.gpr[GPR_SP] = _swapEndianU32(stackLowerBaseAddr + stackSize - 0x20); // how many free bytes should there be at the beginning of the stack? - // init misc stuff - thread->attr = affinityMask; - thread->context.setAffinity(affinityMask); - thread->context.srr0 = PPCInterpreter_makeCallableExportDepr(threadEntry); - thread->context.lr = _swapEndianU32(PPCInterpreter_makeCallableExportDepr(coreinitExport_OSExitThreadDepr)); - thread->id = 0x8000; // Warriors Orochi 3 softlocks if this is zero due to confusing threads (_OSActivateThread should set this?) - // init ugqr - thread->context.gqr[0] = 0x00000000; - thread->context.gqr[1] = 0x00000000; - thread->context.gqr[2] = 0x00040004; - thread->context.gqr[3] = 0x00050005; - thread->context.gqr[4] = 0x00060006; - thread->context.gqr[5] = 0x00070007; - thread->context.gqr[6] = 0x00000000; - thread->context.gqr[7] = 0x00000000; - // init r2 (SDA2) and r3 (SDA) - thread->context.gpr[2] = _swapEndianU32(RPLLoader_GetSDA2Base()); - thread->context.gpr[13] = _swapEndianU32(RPLLoader_GetSDA1Base()); - // GHS related thread init? + } - __OSLockScheduler(); - // if entrypoint is non-zero then put the thread on the active list and suspend it - if (entryPoint != MPTR_NULL) + bool __OSCreateThreadInternal2(OSThread_t* thread, MEMPTR entrypoint, uint32 argInt, MEMPTR argPtr, MEMPTR stackBase, uint32 stackSize, sint32 priority, uint32 attrBits, OSThread_t::THREAD_TYPE threadType) + { + __OSCreateThreadOnActiveThreadWorkaround(thread); + OSThread_t* currentThread = OSGetCurrentThread(); + if (priority < 0 || priority >= 32) { - thread->suspendCounter = 1; - __OSActivateThread(thread); - thread->state = OSThread_t::THREAD_STATE::STATE_READY; + cemuLog_log(LogType::APIErrors, "OSCreateThreadInternal: Thread priority must be in range 0-31"); + return false; + } + if (threadType == OSThread_t::THREAD_TYPE::TYPE_IO) + { + priority = priority + 0x20; + } + else if (threadType == OSThread_t::THREAD_TYPE::TYPE_APP) + { + priority = priority + 0x40; + } + if(attrBits >= 0x20 || stackBase == nullptr || stackSize == 0) + { + cemuLog_logDebug(LogType::APIErrors, "OSCreateThreadInternal: Invalid attributes, stack base or size"); + return false; + } + uint32 im = OSDisableInterrupts(); + __OSLockScheduler(thread); + + uint32 coreIndex = PPCInterpreter_getCurrentInstance() ? OSGetCoreId() : 1; + __OSThreadInit(thread, entrypoint, argInt, argPtr, stackBase, stackSize, priority, coreIndex, threadType); + thread->threadName = nullptr; + thread->context.affinity = attrBits & 7; + thread->attr = attrBits; + if ((attrBits & 7) == 0) // if no explicit affinity is given, use the current core + SetThreadAffinityToCore(thread, OSGetCoreId()); + if(currentThread) + { + for(sint32 i=0; idsiCallback[i] = currentThread->dsiCallback[i]; + thread->isiCallback[i] = currentThread->isiCallback[i]; + thread->programCallback[i] = currentThread->programCallback[i]; + thread->perfMonCallback[i] = currentThread->perfMonCallback[i]; + thread->alignmentExceptionCallback[i] = currentThread->alignmentExceptionCallback[i]; + } + thread->context.srr1 = thread->context.srr1 | (currentThread->context.srr1 & 0x900); + thread->context.fpscr.fpscr = thread->context.fpscr.fpscr | (currentThread->context.fpscr.fpscr & 0xF8); } else - thread->suspendCounter = 0; - __OSUnlockScheduler(); + { + for(sint32 i=0; idsiCallback[i] = 0; + thread->isiCallback[i] = 0; + thread->programCallback[i] = 0; + thread->perfMonCallback[i] = 0; + thread->alignmentExceptionCallback[i] = nullptr; + } + } + if (entrypoint) + { + thread->id = 0x8000; + __OSActivateThread(thread); // also handles adding the thread to g_activeThreadQueue + } + __OSUnlockScheduler(thread); + OSRestoreInterrupts(im); + // recompile entry point function + if (entrypoint) + PPCRecompiler_recompileIfUnvisited(entrypoint.GetMPTR()); + return true; } bool OSCreateThreadType(OSThread_t* thread, MPTR entryPoint, sint32 numParam, void* ptrParam, void* stackTop, sint32 stackSize, sint32 priority, uint32 attr, OSThread_t::THREAD_TYPE threadType) { - OSCreateThreadInternal(thread, entryPoint, memory_getVirtualOffsetFromPointer(stackTop) - stackSize, stackSize, attr, threadType); - thread->context.gpr[3] = _swapEndianU32(numParam); // num arguments - thread->context.gpr[4] = _swapEndianU32(memory_getVirtualOffsetFromPointer(ptrParam)); // arguments pointer - __OSSetThreadBasePriority(thread, priority); - __OSUpdateThreadEffectivePriority(thread); - // set affinity - uint8 affinityMask = 0; - affinityMask = attr & 0x7; - // if no core is selected -> set current one - if (affinityMask == 0) - affinityMask |= (1 << PPCInterpreter_getCoreIndex(PPCInterpreter_getCurrentInstance())); - // set attr - // todo: Support for other attr bits - thread->attr = (affinityMask & 0xFF) | (attr & OSThread_t::ATTR_BIT::ATTR_DETACHED); - thread->context.setAffinity(affinityMask); - // recompile entry point function - if (entryPoint != MPTR_NULL) - PPCRecompiler_recompileIfUnvisited(entryPoint); - return true; + if(threadType != OSThread_t::THREAD_TYPE::TYPE_APP && threadType != OSThread_t::THREAD_TYPE::TYPE_IO) + { + cemuLog_logDebug(LogType::APIErrors, "OSCreateThreadType: Invalid thread type"); + cemu_assert_suspicious(); + return false; + } + return __OSCreateThreadInternal2(thread, MEMPTR(entryPoint), numParam, ptrParam, stackTop, stackSize, priority, attr, threadType); } bool OSCreateThread(OSThread_t* thread, MPTR entryPoint, sint32 numParam, void* ptrParam, void* stackTop, sint32 stackSize, sint32 priority, uint32 attr) { - return OSCreateThreadType(thread, entryPoint, numParam, ptrParam, stackTop, stackSize, priority, attr, OSThread_t::THREAD_TYPE::TYPE_APP); + return __OSCreateThreadInternal2(thread, MEMPTR(entryPoint), numParam, ptrParam, stackTop, stackSize, priority, attr, OSThread_t::THREAD_TYPE::TYPE_APP); } - // alias to OSCreateThreadType, similar to OSCreateThread, but with an additional parameter for the thread type + // similar to OSCreateThreadType, but can be used to create any type of thread bool __OSCreateThreadType(OSThread_t* thread, MPTR entryPoint, sint32 numParam, void* ptrParam, void* stackTop, sint32 stackSize, sint32 priority, uint32 attr, OSThread_t::THREAD_TYPE threadType) { - return OSCreateThreadType(thread, entryPoint, numParam, ptrParam, stackTop, stackSize, priority, attr, threadType); + return __OSCreateThreadInternal2(thread, MEMPTR(entryPoint), numParam, ptrParam, stackTop, stackSize, priority, attr, threadType); } bool OSRunThread(OSThread_t* thread, MPTR funcAddress, sint32 numParam, void* ptrParam) @@ -352,7 +522,7 @@ namespace coreinit // set thread state // todo - this should fully reinitialize the thread? - thread->entrypoint = _swapEndianU32(funcAddress); + thread->entrypoint = funcAddress; thread->context.srr0 = PPCInterpreter_makeCallableExportDepr(threadEntry); thread->context.lr = _swapEndianU32(PPCInterpreter_makeCallableExportDepr(coreinitExport_OSExitThreadDepr)); thread->context.gpr[3] = _swapEndianU32(numParam); @@ -378,10 +548,10 @@ namespace coreinit OSThread_t* currentThread = coreinit::OSGetCurrentThread(); // thread cleanup callback - if (!currentThread->cleanupCallback2.IsNull()) + if (currentThread->cleanupCallback) { currentThread->stateFlags = _swapEndianU32(_swapEndianU32(currentThread->stateFlags) | 0x00000001); - PPCCoreCallback(currentThread->cleanupCallback2.GetMPTR(), currentThread, _swapEndianU32(currentThread->stackEnd)); + PPCCoreCallback(currentThread->cleanupCallback.GetMPTR(), currentThread, currentThread->stackEnd); } // cpp exception cleanup if (gCoreinitData->__cpp_exception_cleanup_ptr != 0 && currentThread->crt.eh_globals != nullptr) @@ -602,7 +772,10 @@ namespace coreinit sint32 previousSuspendCount = thread->suspendCounter; cemu_assert_debug(previousSuspendCount >= 0); if (previousSuspendCount == 0) + { + cemuLog_log(LogType::APIErrors, "OSResumeThread: Resuming thread 0x{:08x} which isn't suspended", MEMPTR(thread).GetMPTR()); return 0; + } thread->suspendCounter = previousSuspendCount - resumeCount; if (thread->suspendCounter < 0) thread->suspendCounter = 0; @@ -732,8 +905,8 @@ namespace coreinit void* OSSetThreadCleanupCallback(OSThread_t* thread, void* cleanupCallback) { __OSLockScheduler(); - void* previousFunc = thread->cleanupCallback2.GetPtr(); - thread->cleanupCallback2 = cleanupCallback; + void* previousFunc = thread->cleanupCallback.GetPtr(); + thread->cleanupCallback = cleanupCallback; __OSUnlockScheduler(); return previousFunc; } @@ -1341,7 +1514,7 @@ namespace coreinit void __OSQueueThreadDeallocation(OSThread_t* thread) { uint32 coreIndex = OSGetCoreId(); - TerminatorThread::DeallocatorQueueEntry queueEntry(thread, memory_getPointerFromVirtualOffset(_swapEndianU32(thread->stackEnd)), thread->deallocatorFunc); + TerminatorThread::DeallocatorQueueEntry queueEntry(thread, thread->stackEnd, thread->deallocatorFunc); s_terminatorThreads[coreIndex].queueDeallocators.push(queueEntry); OSSignalSemaphoreInternal(s_terminatorThreads[coreIndex].semaphoreQueuedDeallocators.GetPtr(), false); // do not reschedule here! Current thread must not be interrupted otherwise deallocator will run too early } diff --git a/src/Cafe/OS/libs/coreinit/coreinit_Thread.h b/src/Cafe/OS/libs/coreinit/coreinit_Thread.h index b401d96d..fdbcfea7 100644 --- a/src/Cafe/OS/libs/coreinit/coreinit_Thread.h +++ b/src/Cafe/OS/libs/coreinit/coreinit_Thread.h @@ -2,9 +2,6 @@ #include "Cafe/HW/Espresso/Const.h" #include "Cafe/OS/libs/coreinit/coreinit_Scheduler.h" -#define OS_CONTEXT_MAGIC_0 'OSCo' -#define OS_CONTEXT_MAGIC_1 'ntxt' - struct OSThread_t; struct OSContextRegFPSCR_t @@ -16,6 +13,9 @@ struct OSContextRegFPSCR_t struct OSContext_t { + static constexpr uint32 OS_CONTEXT_MAGIC_0 = 0x4f53436f; // "OSCo" + static constexpr uint32 OS_CONTEXT_MAGIC_1 = 0x6e747874; // "ntxt" + /* +0x000 */ betype magic0; /* +0x004 */ betype magic1; /* +0x008 */ uint32 gpr[32]; @@ -36,24 +36,29 @@ struct OSContext_t /* +0x1BC */ uint32 gqr[8]; // GQR/UGQR /* +0x1DC */ uint32be upir; // set to current core index /* +0x1E0 */ uint64be fp_ps1[32]; - /* +0x2E0 */ uint64 uknTime2E0; - /* +0x2E8 */ uint64 uknTime2E8; - /* +0x2F0 */ uint64 uknTime2F0; - /* +0x2F8 */ uint64 uknTime2F8; - /* +0x300 */ uint32 error; // returned by __gh_errno_ptr() (used by socketlasterr) + /* +0x2E0 */ uint64be coretime[3]; + /* +0x2F8 */ uint64be starttime; + /* +0x300 */ uint32be ghs_errno; // returned by __gh_errno_ptr() (used by socketlasterr) /* +0x304 */ uint32be affinity; - /* +0x308 */ uint32 ukn0308; - /* +0x30C */ uint32 ukn030C; - /* +0x310 */ uint32 ukn0310; - /* +0x314 */ uint32 ukn0314; - /* +0x318 */ uint32 ukn0318; - /* +0x31C */ uint32 ukn031C; + /* +0x308 */ uint32be upmc1; + /* +0x30C */ uint32be upmc2; + /* +0x310 */ uint32be upmc3; + /* +0x314 */ uint32be upmc4; + /* +0x318 */ uint32be ummcr0; + /* +0x31C */ uint32be ummcr1; bool checkMagic() { return magic0 == (uint32)OS_CONTEXT_MAGIC_0 && magic1 == (uint32)OS_CONTEXT_MAGIC_1; } + void SetContextMagic() + { + magic0 = OS_CONTEXT_MAGIC_0; + magic1 = OS_CONTEXT_MAGIC_1; + } + + bool hasCoreAffinitySet(uint32 coreIndex) const { return (((uint32)affinity >> coreIndex) & 1) != 0; @@ -361,6 +366,8 @@ namespace coreinit struct OSThread_t { + static constexpr uint32 MAGIC_THREAD = 0x74487244; // "tHrD" + enum class THREAD_TYPE : uint32 { TYPE_DRIVER = 0, @@ -383,7 +390,7 @@ struct OSThread_t ATTR_AFFINITY_CORE1 = 0x2, ATTR_AFFINITY_CORE2 = 0x4, ATTR_DETACHED = 0x8, - // more flags? + ATTR_UKN_010 = 0x10, }; enum REQUEST_FLAG_BIT : uint32 @@ -404,23 +411,21 @@ struct OSThread_t return 0; } - void SetMagic() + void SetThreadMagic() { - context.magic0 = OS_CONTEXT_MAGIC_0; - context.magic1 = OS_CONTEXT_MAGIC_1; - magic = 'tHrD'; + magic = MAGIC_THREAD; } bool IsValidMagic() const { - return magic == 'tHrD' && context.magic0 == OS_CONTEXT_MAGIC_0 && context.magic1 == OS_CONTEXT_MAGIC_1; + return magic == MAGIC_THREAD && context.magic0 == OSContext_t::OS_CONTEXT_MAGIC_0 && context.magic1 == OSContext_t::OS_CONTEXT_MAGIC_1; } /* +0x000 */ OSContext_t context; - /* +0x320 */ uint32be magic; // 'tHrD' + /* +0x320 */ uint32be magic; // "tHrD" (0x74487244) /* +0x324 */ betype state; /* +0x325 */ uint8 attr; - /* +0x326 */ uint16be id; // Warriors Orochi 3 uses this to identify threads. Seems like this is always set to 0x8000 ? + /* +0x326 */ uint16be id; // Warriors Orochi 3 uses this to identify threads /* +0x328 */ betype suspendCounter; /* +0x32C */ sint32be effectivePriority; // effective priority (lower is higher) /* +0x330 */ sint32be basePriority; // base priority (lower is higher) @@ -440,21 +445,21 @@ struct OSThread_t /* +0x38C */ coreinit::OSThreadLink activeThreadChain; // queue of active threads (g_activeThreadQueue) - /* +0x394 */ MPTR stackBase; // upper limit of stack - /* +0x398 */ MPTR stackEnd; // lower limit of stack + /* +0x394 */ MEMPTR stackBase; // upper limit of stack + /* +0x398 */ MEMPTR stackEnd; // lower limit of stack - /* +0x39C */ MPTR entrypoint; + /* +0x39C */ MEMPTR entrypoint; /* +0x3A0 */ crt_t crt; /* +0x578 */ sint32 alarmRelatedUkn; /* +0x57C */ std::array, 16> specificArray; /* +0x5BC */ betype type; /* +0x5C0 */ MEMPTR threadName; - /* +0x5C4 */ MPTR waitAlarm; // used only by OSWaitEventWithTimeout/OSSignalEvent ? + /* +0x5C4 */ MEMPTR waitAlarm; // used only by OSWaitEventWithTimeout/OSSignalEvent ? /* +0x5C8 */ uint32 userStackPointer; - /* +0x5CC */ MEMPTR cleanupCallback2; + /* +0x5CC */ MEMPTR cleanupCallback; /* +0x5D0 */ MEMPTR deallocatorFunc; /* +0x5D4 */ uint32 stateFlags; // 0x5D4 | various flags? Controls if canceling/suspension is allowed (at cancel points) or not? If 1 -> Cancel/Suspension not allowed, if 0 -> Cancel/Suspension allowed @@ -480,19 +485,21 @@ struct OSThread_t /* +0x660 */ uint32 ukn660; + // todo - some of the members towards the end of the struct were only added in later COS versions. Figure out the mapping between version and members + // TLS /* +0x664 */ uint16 numAllocatedTLSBlocks; /* +0x666 */ sint16 tlsStatus; /* +0x668 */ MPTR tlsBlocksMPTR; - + /* +0x66C */ MEMPTR waitingForFastMutex; /* +0x670 */ coreinit::OSFastMutexLink contendedFastMutex; /* +0x678 */ coreinit::OSFastMutexLink ownedFastMutex; + /* +0x680 */ MEMPTR alignmentExceptionCallback[Espresso::CORE_COUNT]; - /* +0x680 */ uint32 padding680[28 / 4]; + /* +0x68C */ uint32 padding68C[20 / 4]; }; - -static_assert(sizeof(OSThread_t) == 0x6A0-4); // todo - determine correct size +static_assert(sizeof(OSThread_t) == 0x6A0); namespace coreinit { diff --git a/src/Cafe/OS/libs/nsysnet/nsysnet.cpp b/src/Cafe/OS/libs/nsysnet/nsysnet.cpp index 88bca8af..dd7c9189 100644 --- a/src/Cafe/OS/libs/nsysnet/nsysnet.cpp +++ b/src/Cafe/OS/libs/nsysnet/nsysnet.cpp @@ -117,10 +117,10 @@ void nsysnetExport_socket_lib_finish(PPCInterpreter_t* hCPU) osLib_returnFromFunction(hCPU, 0); // 0 -> Success } -uint32* __gh_errno_ptr() +static uint32be* __gh_errno_ptr() { OSThread_t* osThread = coreinit::OSGetCurrentThread(); - return &osThread->context.error; + return &osThread->context.ghs_errno; } void _setSockError(sint32 errCode) diff --git a/src/Cafe/OS/libs/snd_core/ax_ist.cpp b/src/Cafe/OS/libs/snd_core/ax_ist.cpp index 30cbdbb1..17f247e0 100644 --- a/src/Cafe/OS/libs/snd_core/ax_ist.cpp +++ b/src/Cafe/OS/libs/snd_core/ax_ist.cpp @@ -963,7 +963,7 @@ namespace snd_core OSInitMessageQueue(__AXIstThreadMsgQueue.GetPtr(), __AXIstThreadMsgArray.GetPtr(), 0x10); // create thread uint8 istThreadAttr = 0; - coreinit::OSCreateThreadType(__AXIstThread.GetPtr(), PPCInterpreter_makeCallableExportDepr(AXIst_ThreadEntry), 0, &__AXIstThreadMsgQueue, __AXIstThreadStack.GetPtr() + 0x4000, 0x4000, 14, istThreadAttr, OSThread_t::THREAD_TYPE::TYPE_DRIVER); + coreinit::__OSCreateThreadType(__AXIstThread.GetPtr(), PPCInterpreter_makeCallableExportDepr(AXIst_ThreadEntry), 0, &__AXIstThreadMsgQueue, __AXIstThreadStack.GetPtr() + 0x4000, 0x4000, 14, istThreadAttr, OSThread_t::THREAD_TYPE::TYPE_DRIVER); coreinit::OSResumeThread(__AXIstThread.GetPtr()); } diff --git a/src/Common/ExceptionHandler/ExceptionHandler.cpp b/src/Common/ExceptionHandler/ExceptionHandler.cpp index 5fefc8ca..b6755fd8 100644 --- a/src/Common/ExceptionHandler/ExceptionHandler.cpp +++ b/src/Common/ExceptionHandler/ExceptionHandler.cpp @@ -155,7 +155,7 @@ void ExceptionHandler_LogGeneralInfo() const char* threadName = "NULL"; if (!threadItrBE->threadName.IsNull()) threadName = threadItrBE->threadName.GetPtr(); - sprintf(dumpLine, "%08x Ent %08x IP %08x LR %08x %-9s Aff %d%d%d Pri %2d Name %s", threadItrMPTR, _swapEndianU32(threadItrBE->entrypoint), threadItrBE->context.srr0, _swapEndianU32(threadItrBE->context.lr), threadStateStr, (affinity >> 0) & 1, (affinity >> 1) & 1, (affinity >> 2) & 1, effectivePriority, threadName); + sprintf(dumpLine, "%08x Ent %08x IP %08x LR %08x %-9s Aff %d%d%d Pri %2d Name %s", threadItrMPTR, threadItrBE->entrypoint.GetMPTR(), threadItrBE->context.srr0, _swapEndianU32(threadItrBE->context.lr), threadStateStr, (affinity >> 0) & 1, (affinity >> 1) & 1, (affinity >> 2) & 1, effectivePriority, threadName); // write line to log CrashLog_WriteLine(dumpLine); } diff --git a/src/gui/windows/PPCThreadsViewer/DebugPPCThreadsWindow.cpp b/src/gui/windows/PPCThreadsViewer/DebugPPCThreadsWindow.cpp index bd71942f..dfbaf76e 100644 --- a/src/gui/windows/PPCThreadsViewer/DebugPPCThreadsWindow.cpp +++ b/src/gui/windows/PPCThreadsViewer/DebugPPCThreadsWindow.cpp @@ -195,10 +195,10 @@ void DebugPPCThreadsWindow::RefreshThreadList() m_thread_list->InsertItem(item); m_thread_list->SetItemData(item, (long)threadItrMPTR); // entry point - sprintf(tempStr, "%08X", _swapEndianU32(cafeThread->entrypoint)); + sprintf(tempStr, "%08X", cafeThread->entrypoint.GetMPTR()); m_thread_list->SetItem(i, 1, tempStr); // stack base (low) - sprintf(tempStr, "%08X - %08X", _swapEndianU32(cafeThread->stackEnd), _swapEndianU32(cafeThread->stackBase)); + sprintf(tempStr, "%08X - %08X", cafeThread->stackEnd.GetMPTR(), cafeThread->stackBase.GetMPTR()); m_thread_list->SetItem(i, 2, tempStr); // pc RPLStoredSymbol* symbol = rplSymbolStorage_getByAddress(cafeThread->context.srr0);