From 1651298a2c0eab20e0eb7e1b61fc9443a015fbbf Mon Sep 17 00:00:00 2001 From: Mr-Wiseguy Date: Sat, 30 Mar 2024 20:00:02 -0400 Subject: [PATCH] Fixed crash caused by input latency reduction patch --- patches/input_latency.c | 91 ++++++++++++++++++++++++++++++++++------- patches/syms.ld | 3 +- src/game/recomp_api.cpp | 4 ++ src/recomp/cont.cpp | 16 ++++---- ultramodern/events.cpp | 5 ++- 5 files changed, 94 insertions(+), 25 deletions(-) diff --git a/patches/input_latency.c b/patches/input_latency.c index fb9dbf6..e1fead0 100644 --- a/patches/input_latency.c +++ b/patches/input_latency.c @@ -2,6 +2,7 @@ #include "sys_cfb.h" #include "buffers.h" #include "fault.h" +#include "z64speed_meter.h" void recomp_set_current_frame_poll_id(); void PadMgr_HandleRetrace(void); @@ -31,6 +32,10 @@ void PadMgr_GetInput2(Input* inputs, s32 gameRequest) { } extern CfbInfo sGraphCfbInfos[3]; +u32 recomp_time_us(); +void recomp_measure_latency(); + +OSMesgQueue *rdp_queue_ptr = NULL; // @recomp Immediately sends the graphics task instead of queueing it in the scheduler. void Graph_TaskSet00(GraphicsContext* gfxCtx, GameState* gameState) { @@ -41,11 +46,28 @@ void Graph_TaskSet00(GraphicsContext* gfxCtx, GameState* gameState) { OSTimer timer; OSMesg msg; CfbInfo* cfb; + + // @recomp Additional static members for extra scheduling purposes. + static IrqMgrClient irq_client = {0}; + static OSMesgQueue vi_queue = {0}; + static OSMesg vi_buf[8] = {0}; + static OSMesgQueue rdp_queue = {0}; + static OSMesg rdp_mesg = NULL; + static bool created = false; + if (!created) { + created = true; + osCreateMesgQueue(&vi_queue, vi_buf, ARRAY_COUNT(vi_buf)); + osCreateMesgQueue(&rdp_queue, &rdp_mesg, 1); + extern IrqMgr gIrqMgr; + IrqMgr_AddClient(&gIrqMgr, &irq_client, &vi_queue); + } // @recomp Disable the wait here so that it can be moved after task submission for minimizing latency. // retry: // osSetTimer(&timer, OS_USEC_TO_CYCLES(3 * 1000 * 1000), 0, &gfxCtx->queue, (OSMesg)666); -// osRecvMesg(&gfxCtx->queue, &msg, OS_MESG_BLOCK); + u32 count_before = recomp_time_us(); + osRecvMesg(&gfxCtx->queue, &msg, OS_MESG_BLOCK); + u32 count_after = recomp_time_us(); // osStopTimer(&timer); // if (msg == (OSMesg)666) { @@ -97,7 +119,6 @@ void Graph_TaskSet00(GraphicsContext* gfxCtx, GameState* gameState) { { s32 pad; } - cfb = &sGraphCfbInfos[cfbIdx]; cfbIdx = (cfbIdx + 1) % ARRAY_COUNT(sGraphCfbInfos); @@ -122,26 +143,66 @@ void Graph_TaskSet00(GraphicsContext* gfxCtx, GameState* gameState) { osRecvMesg(&gfxCtx->queue, NULL, OS_MESG_NOBLOCK); } + // @recomp Set up the dedicated RDP complete message queue pointer for the scheduler. + rdp_queue_ptr = &rdp_queue; + gfxCtx->schedMsgQ = &gSchedContext.cmdQ; + recomp_measure_latency(); osSendMesg(&gSchedContext.cmdQ, scTask, OS_MESG_BLOCK); Sched_SendEntryMsg(&gSchedContext); + // @recomp Wait for the RDP complete message to mitigate waiting between creating the next task and submitting it. + osRecvMesg(&rdp_queue, NULL, OS_MESG_BLOCK); + rdp_queue_ptr = NULL; + // @recomp Manually wait the required number of VI periods after submitting the task // so that the next frame doesn't need to wait before submitting its task. - static IrqMgrClient irq_client = {0}; - static OSMesgQueue vi_queue = {0}; - static OSMesg vi_buf[8] = {0}; - static bool created = false; - - // Create the message queue and install the VI irq manager - if (!created) { - created = true; - osCreateMesgQueue(&vi_queue, vi_buf, ARRAY_COUNT(vi_buf)); - extern IrqMgr gIrqMgr; - IrqMgr_AddClient(&gIrqMgr, &irq_client, &vi_queue); - } - + u32 vi_count_before = recomp_time_us(); for (int i = 0; i < cfb->updateRate; i++) { osRecvMesg(&vi_queue, NULL, OS_MESG_BLOCK); } + + // @recomp Flush any excess VI messages that came in. + while (osRecvMesg(&vi_queue, NULL, OS_MESG_NOBLOCK) == 0) { + ; + } + u32 vi_count_after = recomp_time_us(); + + // recomp_printf("recv wait added %d us\nVI wait: %d us\n", count_after - count_before, vi_count_after - vi_count_before); +} + +extern OSTime sRDPStartTime; + +// @recomp Patched to send a message to the dedicated RDP complete message queue. +void Sched_HandleRDPDone(SchedContext* sched) { + OSScTask* curRDP; + OSScTask* nextRSP = NULL; + OSScTask* nextRDP = NULL; + s32 state; + + if (sched->curRDPTask == NULL) { + osSyncPrintf("__scHandleRDP:sc->curRDPTask == NULL\n"); + return; + } + + // Log run time + gRDPTimeAcc = osGetTime() - sRDPStartTime; + + // Mark task done + curRDP = sched->curRDPTask; + sched->curRDPTask = NULL; + curRDP->state &= ~OS_SC_DP; + + // @recomp Send a message to the dedicated RDP complete message queue if it's currently set up. + if (rdp_queue_ptr) { + osSendMesg(rdp_queue_ptr, NULL, OS_MESG_BLOCK); + } + + Sched_NotifyDone(sched, curRDP); + + // Schedule and run next task + state = ((sched->curRSPTask == NULL) << 1) | (sched->curRDPTask == NULL); + if (Sched_Schedule(sched, &nextRSP, &nextRDP, state) != state) { + Sched_RunTask(sched, nextRSP, nextRDP); + } } diff --git a/patches/syms.ld b/patches/syms.ld index 662bba8..4412acf 100644 --- a/patches/syms.ld +++ b/patches/syms.ld @@ -23,4 +23,5 @@ __sinf_recomp = 0x8F000038; __cosf_recomp = 0x8F00003C; osCreateMesgQueue_recomp = 0x8F000048; recomp_set_current_frame_poll_id = 0x8F00004C; - +recomp_time_us = 0x8F000050; +recomp_measure_latency = 0x8F000054; diff --git a/src/game/recomp_api.cpp b/src/game/recomp_api.cpp index c47b100..f5c7f7c 100644 --- a/src/game/recomp_api.cpp +++ b/src/game/recomp_api.cpp @@ -77,3 +77,7 @@ extern "C" void recomp_get_bgm_volume(uint8_t* rdram, recomp_context* ctx) { extern "C" void recomp_get_low_health_beeps_enabled(uint8_t* rdram, recomp_context* ctx) { _return(ctx, static_cast(recomp::get_low_health_beeps_enabled())); } + +extern "C" void recomp_time_us(uint8_t* rdram, recomp_context* ctx) { + _return(ctx, static_cast(std::chrono::duration_cast(ultramodern::time_since_start()).count())); +} diff --git a/src/recomp/cont.cpp b/src/recomp/cont.cpp index cfbac86..8e0c18f 100644 --- a/src/recomp/cont.cpp +++ b/src/recomp/cont.cpp @@ -3,22 +3,22 @@ static ultramodern::input_callbacks_t input_callbacks; -constexpr size_t num_poll_ids = 8; -std::chrono::system_clock::time_point input_poll_times[num_poll_ids]; -s32 cur_poll_id = 0; -s32 cur_frame_poll_id = 0; +std::chrono::system_clock::time_point input_poll_time; void update_poll_time() { - cur_poll_id = (cur_poll_id + 1) % num_poll_ids; - input_poll_times[cur_poll_id] = std::chrono::system_clock::now(); + input_poll_time = std::chrono::system_clock::now(); } extern "C" void recomp_set_current_frame_poll_id(uint8_t* rdram, recomp_context* ctx) { - cur_frame_poll_id = cur_poll_id; + // TODO reimplement the system for tagging polls with IDs to handle games with multithreaded input polling. +} + +extern "C" void recomp_measure_latency(uint8_t* rdram, recomp_context* ctx) { + ultramodern::measure_input_latency(); } void ultramodern::measure_input_latency() { - // printf("Delta: %ld micros\n", std::chrono::duration_cast(std::chrono::system_clock::now() - input_poll_times[cur_frame_poll_id])); + // printf("Delta: %ld micros\n", std::chrono::duration_cast(std::chrono::system_clock::now() - input_poll_time)); } void set_input_callbacks(const ultramodern::input_callbacks_t& callbacks) { diff --git a/ultramodern/events.cpp b/ultramodern/events.cpp index b6ecb7f..dce03cc 100644 --- a/ultramodern/events.cpp +++ b/ultramodern/events.cpp @@ -315,14 +315,17 @@ void gfx_thread_func(uint8_t* rdram, std::atomic_flag* thread_ready, ultramodern RT64EnableInstantPresent(application); enabled_instant_present = true; } - ultramodern::measure_input_latency(); // Tell the game that the RSP completed instantly. This will allow it to queue other task types, but it won't // start another graphics task until the RDP is also complete. Games usually preserve the RSP inputs until the RDP // is finished as well, so sending this early shouldn't be an issue in most cases. // If this causes issues then the logic can be replaced with responding to yield requests. sp_complete(); + + auto rt64_start = std::chrono::system_clock::now(); RT64SendDL(rdram, &task_action->task); + auto rt64_end = std::chrono::system_clock::now(); dp_complete(); + // printf("RT64 ProcessDList time: %d us\n", static_cast(std::chrono::duration_cast(rt64_end - rt64_start).count())); } else if (const auto* swap_action = std::get_if(&action)) { events_context.vi.current_buffer = events_context.vi.next_buffer;