Fixed crash caused by input latency reduction patch

2024-12-23 11:31:57 +01:00 · 2024-03-30 20:00:02 -04:00 · 2024-03-30 20:00:02 -04:00 · 1651298a2c
commit 1651298a2c
parent 1caf81e1cf
5 changed files with 94 additions and 25 deletions
--- a/patches/input_latency.c
+++ b/patches/input_latency.c
@ -2,6 +2,7 @@
 #include "sys_cfb.h"
 #include "buffers.h"
 #include "fault.h"
+#include "z64speed_meter.h"

 void recomp_set_current_frame_poll_id();
 void PadMgr_HandleRetrace(void);
@ -31,6 +32,10 @@ void PadMgr_GetInput2(Input* inputs, s32 gameRequest) {
 }

 extern CfbInfo sGraphCfbInfos[3];
+u32 recomp_time_us();
+void recomp_measure_latency();
+
+OSMesgQueue *rdp_queue_ptr = NULL;

 // @recomp Immediately sends the graphics task instead of queueing it in the scheduler.
 void Graph_TaskSet00(GraphicsContext* gfxCtx, GameState* gameState) {
@ -42,10 +47,27 @@ void Graph_TaskSet00(GraphicsContext* gfxCtx, GameState* gameState) {
    OSMesg msg;
    CfbInfo* cfb;
    
+    // @recomp Additional static members for extra scheduling purposes.
+    static IrqMgrClient irq_client = {0};
+    static OSMesgQueue vi_queue = {0};
+    static OSMesg vi_buf[8] = {0};
+    static OSMesgQueue rdp_queue = {0};
+    static OSMesg rdp_mesg = NULL;
+    static bool created = false;
+    if (!created) {
+        created = true;
+        osCreateMesgQueue(&vi_queue, vi_buf, ARRAY_COUNT(vi_buf));
+        osCreateMesgQueue(&rdp_queue, &rdp_mesg, 1);
+        extern IrqMgr gIrqMgr;
+        IrqMgr_AddClient(&gIrqMgr, &irq_client, &vi_queue);
+    }
+
    // @recomp Disable the wait here so that it can be moved after task submission for minimizing latency.
 // retry:
 //     osSetTimer(&timer, OS_USEC_TO_CYCLES(3 * 1000 * 1000), 0, &gfxCtx->queue, (OSMesg)666);
-//     osRecvMesg(&gfxCtx->queue, &msg, OS_MESG_BLOCK);
+    u32 count_before = recomp_time_us();
+    osRecvMesg(&gfxCtx->queue, &msg, OS_MESG_BLOCK);
+    u32 count_after = recomp_time_us();
 //     osStopTimer(&timer);

 //     if (msg == (OSMesg)666) {
@ -97,7 +119,6 @@ void Graph_TaskSet00(GraphicsContext* gfxCtx, GameState* gameState) {

    { s32 pad; }

-
    cfb = &sGraphCfbInfos[cfbIdx];
    cfbIdx = (cfbIdx + 1) % ARRAY_COUNT(sGraphCfbInfos);

@ -122,26 +143,66 @@ void Graph_TaskSet00(GraphicsContext* gfxCtx, GameState* gameState) {
        osRecvMesg(&gfxCtx->queue, NULL, OS_MESG_NOBLOCK);
    }

+    // @recomp Set up the dedicated RDP complete message queue pointer for the scheduler.
+    rdp_queue_ptr = &rdp_queue;
+
    gfxCtx->schedMsgQ = &gSchedContext.cmdQ;
+    recomp_measure_latency();
    osSendMesg(&gSchedContext.cmdQ, scTask, OS_MESG_BLOCK);
    Sched_SendEntryMsg(&gSchedContext);
    
+    // @recomp Wait for the RDP complete message to mitigate waiting between creating the next task and submitting it.
+    osRecvMesg(&rdp_queue, NULL, OS_MESG_BLOCK);
+    rdp_queue_ptr = NULL;
+
    // @recomp Manually wait the required number of VI periods after submitting the task
    // so that the next frame doesn't need to wait before submitting its task.
-    static IrqMgrClient irq_client = {0};
-    static OSMesgQueue vi_queue = {0};
-    static OSMesg vi_buf[8] = {0};
-    static bool created = false;
-
-    // Create the message queue and install the VI irq manager
-    if (!created) {
-        created = true;
-        osCreateMesgQueue(&vi_queue, vi_buf, ARRAY_COUNT(vi_buf));
-        extern IrqMgr gIrqMgr;
-        IrqMgr_AddClient(&gIrqMgr, &irq_client, &vi_queue);
-    }
-
+    u32 vi_count_before = recomp_time_us();
    for (int i = 0; i < cfb->updateRate; i++) {
        osRecvMesg(&vi_queue, NULL, OS_MESG_BLOCK);
    }
+
+    // @recomp Flush any excess VI messages that came in.
+    while (osRecvMesg(&vi_queue, NULL, OS_MESG_NOBLOCK) == 0) {
+        ;
+    }
+    u32 vi_count_after = recomp_time_us();
+
+    // recomp_printf("recv wait added %d us\nVI wait: %d us\n", count_after - count_before, vi_count_after - vi_count_before);
+}
+
+extern OSTime sRDPStartTime;
+
+// @recomp Patched to send a message to the dedicated RDP complete message queue.
+void Sched_HandleRDPDone(SchedContext* sched) {
+    OSScTask* curRDP;
+    OSScTask* nextRSP = NULL;
+    OSScTask* nextRDP = NULL;
+    s32 state;
+
+    if (sched->curRDPTask == NULL) {
+        osSyncPrintf("__scHandleRDP:sc->curRDPTask == NULL\n");
+        return;
+    }
+
+    // Log run time
+    gRDPTimeAcc = osGetTime() - sRDPStartTime;
+
+    // Mark task done
+    curRDP = sched->curRDPTask;
+    sched->curRDPTask = NULL;
+    curRDP->state &= ~OS_SC_DP;
+
+    // @recomp Send a message to the dedicated RDP complete message queue if it's currently set up.
+    if (rdp_queue_ptr) {
+        osSendMesg(rdp_queue_ptr, NULL, OS_MESG_BLOCK);
+    }
+
+    Sched_NotifyDone(sched, curRDP);
+
+    // Schedule and run next task
+    state = ((sched->curRSPTask == NULL) << 1) | (sched->curRDPTask == NULL);
+    if (Sched_Schedule(sched, &nextRSP, &nextRDP, state) != state) {
+        Sched_RunTask(sched, nextRSP, nextRDP);
+    }
 }
--- a/patches/syms.ld
+++ b/patches/syms.ld
@ -23,4 +23,5 @@ __sinf_recomp = 0x8F000038;
 __cosf_recomp = 0x8F00003C;
 osCreateMesgQueue_recomp = 0x8F000048;
 recomp_set_current_frame_poll_id = 0x8F00004C;
-
+recomp_time_us = 0x8F000050;
+recomp_measure_latency = 0x8F000054;
--- a/src/game/recomp_api.cpp
+++ b/src/game/recomp_api.cpp
@ -77,3 +77,7 @@ extern "C" void recomp_get_bgm_volume(uint8_t* rdram, recomp_context* ctx) {
 extern "C" void recomp_get_low_health_beeps_enabled(uint8_t* rdram, recomp_context* ctx) {
    _return(ctx, static_cast<u32>(recomp::get_low_health_beeps_enabled()));
 }
+
+extern "C" void recomp_time_us(uint8_t* rdram, recomp_context* ctx) {
+    _return(ctx, static_cast<u32>(std::chrono::duration_cast<std::chrono::microseconds>(ultramodern::time_since_start()).count()));
+}
--- a/src/recomp/cont.cpp
+++ b/src/recomp/cont.cpp
@ -3,22 +3,22 @@

 static ultramodern::input_callbacks_t input_callbacks;

-constexpr size_t num_poll_ids = 8;
-std::chrono::system_clock::time_point input_poll_times[num_poll_ids];
-s32 cur_poll_id = 0;
-s32 cur_frame_poll_id = 0;
+std::chrono::system_clock::time_point input_poll_time;

 void update_poll_time() {
-    cur_poll_id = (cur_poll_id + 1) % num_poll_ids;
-    input_poll_times[cur_poll_id] = std::chrono::system_clock::now();
+    input_poll_time = std::chrono::system_clock::now();
 }

 extern "C" void recomp_set_current_frame_poll_id(uint8_t* rdram, recomp_context* ctx) {
-    cur_frame_poll_id = cur_poll_id;
+    // TODO reimplement the system for tagging polls with IDs to handle games with multithreaded input polling.
+}
+
+extern "C" void recomp_measure_latency(uint8_t* rdram, recomp_context* ctx) {
+    ultramodern::measure_input_latency();
 }

 void ultramodern::measure_input_latency() {
-    // printf("Delta: %ld micros\n", std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now() - input_poll_times[cur_frame_poll_id]));    
+    // printf("Delta: %ld micros\n", std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now() - input_poll_time));    
 }

 void set_input_callbacks(const ultramodern::input_callbacks_t& callbacks) {
--- a/ultramodern/events.cpp
+++ b/ultramodern/events.cpp
@ -315,14 +315,17 @@ void gfx_thread_func(uint8_t* rdram, std::atomic_flag* thread_ready, ultramodern
                    RT64EnableInstantPresent(application);
                    enabled_instant_present = true;
                }
-                ultramodern::measure_input_latency();
                // Tell the game that the RSP completed instantly. This will allow it to queue other task types, but it won't
                // start another graphics task until the RDP is also complete. Games usually preserve the RSP inputs until the RDP
                // is finished as well, so sending this early shouldn't be an issue in most cases.
                // If this causes issues then the logic can be replaced with responding to yield requests.
                sp_complete();
+
+                auto rt64_start = std::chrono::system_clock::now();
                RT64SendDL(rdram, &task_action->task);
+                auto rt64_end = std::chrono::system_clock::now();
                dp_complete();
+                // printf("RT64 ProcessDList time: %d us\n", static_cast<u32>(std::chrono::duration_cast<std::chrono::microseconds>(rt64_end - rt64_start).count()));
            }
            else if (const auto* swap_action = std::get_if<SwapBuffersAction>(&action)) {
                events_context.vi.current_buffer = events_context.vi.next_buffer;