diff --git a/Source/Core/Core/State.cpp b/Source/Core/Core/State.cpp index c4e4d0db7b..37e42ba94f 100644 --- a/Source/Core/Core/State.cpp +++ b/Source/Core/Core/State.cpp @@ -70,7 +70,7 @@ static Common::Event g_compressAndDumpStateSyncEvent; static std::thread g_save_thread; // Don't forget to increase this after doing changes on the savestate system -static const u32 STATE_VERSION = 58; +static const u32 STATE_VERSION = 59; // Last changed in PR 3490 // Maps savestate versions to Dolphin versions. // Versions after 42 don't need to be added to this list, diff --git a/Source/Core/VideoCommon/CommandProcessor.cpp b/Source/Core/VideoCommon/CommandProcessor.cpp index 32095d57b2..d038b382c6 100644 --- a/Source/Core/VideoCommon/CommandProcessor.cpp +++ b/Source/Core/VideoCommon/CommandProcessor.cpp @@ -268,8 +268,7 @@ void RegisterMMIO(MMIO::Mapping* mmio, u32 base) void GatherPipeBursted() { - if (IsOnThread()) - SetCPStatusFromCPU(); + SetCPStatusFromCPU(); // if we aren't linked, we don't care about gather pipe data if (!m_CPCtrlReg.GPLinkEnable) diff --git a/Source/Core/VideoCommon/Fifo.cpp b/Source/Core/VideoCommon/Fifo.cpp index 97cb8753e0..b021452594 100644 --- a/Source/Core/VideoCommon/Fifo.cpp +++ b/Source/Core/VideoCommon/Fifo.cpp @@ -17,6 +17,7 @@ #include "Core/ConfigManager.h" #include "Core/CoreTiming.h" #include "Core/HW/Memmap.h" +#include "Core/HW/SystemTimers.h" #include "Core/NetPlayProto.h" #include "VideoCommon/AsyncRequests.h" @@ -31,6 +32,7 @@ namespace Fifo { static constexpr u32 FIFO_SIZE = 2 * 1024 * 1024; +static constexpr int GPU_TIME_SLOT_SIZE = 1000; static bool s_skip_current_frame = false; @@ -47,7 +49,6 @@ static u8* s_fifo_aux_read_ptr; // and can change at runtime. static bool s_use_deterministic_gpu_thread; -static u64 s_last_sync_gpu_tick; static CoreTiming::EventType* s_event_sync_gpu; // STATE_TO_SAVE @@ -69,6 +70,7 @@ static u8* s_video_buffer_pp_read_ptr; // - The pp_read_ptr is the CPU preprocessing version of the read_ptr. static std::atomic s_sync_ticks; +static bool s_syncing_suspended; static Common::Event s_sync_wakeup_event; void DoState(PointerWrap& p) @@ -85,7 +87,7 @@ void DoState(PointerWrap& p) } p.Do(s_skip_current_frame); - p.Do(s_last_sync_gpu_tick); + p.Do(s_sync_ticks); } void PauseAndLock(bool doLock, bool unpauseOnUnlock) @@ -422,55 +424,78 @@ bool AtBreakpoint() void RunGpu() { - SCPFifoStruct& fifo = CommandProcessor::fifo; const SConfig& param = SConfig::GetInstance(); - // execute GPU - if (!param.bCPUThread || s_use_deterministic_gpu_thread) - { - bool reset_simd_state = false; - while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint()) - { - if (s_use_deterministic_gpu_thread) - { - ReadDataFromFifoOnCPU(fifo.CPReadPointer); - s_gpu_mainloop.Wakeup(); - } - else - { - if (!reset_simd_state) - { - FPURoundMode::SaveSIMDState(); - FPURoundMode::LoadDefaultSIMDState(); - reset_simd_state = true; - } - ReadDataFromFifo(fifo.CPReadPointer); - s_video_buffer_read_ptr = OpcodeDecoder::Run( - DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), nullptr, false); - } - - // DEBUG_LOG(COMMANDPROCESSOR, "Fifo wraps to base"); - - if (fifo.CPReadPointer == fifo.CPEnd) - fifo.CPReadPointer = fifo.CPBase; - else - fifo.CPReadPointer += 32; - - fifo.CPReadWriteDistance -= 32; - } - CommandProcessor::SetCPStatusFromGPU(); - - if (reset_simd_state) - { - FPURoundMode::LoadSIMDState(); - } - } - // wake up GPU thread - if (param.bCPUThread) + if (param.bCPUThread && !s_use_deterministic_gpu_thread) { s_gpu_mainloop.Wakeup(); } + + // if the sync GPU callback is suspended, wake it up. + if (!SConfig::GetInstance().bCPUThread || s_use_deterministic_gpu_thread || + SConfig::GetInstance().bSyncGPU) + { + if (s_syncing_suspended) + { + s_syncing_suspended = false; + CoreTiming::ScheduleEvent(GPU_TIME_SLOT_SIZE, s_event_sync_gpu, GPU_TIME_SLOT_SIZE); + } + } +} + +static int RunGpuOnCpu(int ticks) +{ + SCPFifoStruct& fifo = CommandProcessor::fifo; + bool reset_simd_state = false; + int available_ticks = int(ticks * SConfig::GetInstance().fSyncGpuOverclock) + s_sync_ticks.load(); + while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint() && + available_ticks >= 0) + { + if (s_use_deterministic_gpu_thread) + { + ReadDataFromFifoOnCPU(fifo.CPReadPointer); + s_gpu_mainloop.Wakeup(); + } + else + { + if (!reset_simd_state) + { + FPURoundMode::SaveSIMDState(); + FPURoundMode::LoadDefaultSIMDState(); + reset_simd_state = true; + } + ReadDataFromFifo(fifo.CPReadPointer); + u32 cycles = 0; + s_video_buffer_read_ptr = OpcodeDecoder::Run( + DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), &cycles, false); + available_ticks -= cycles; + } + + if (fifo.CPReadPointer == fifo.CPEnd) + fifo.CPReadPointer = fifo.CPBase; + else + fifo.CPReadPointer += 32; + + fifo.CPReadWriteDistance -= 32; + } + + CommandProcessor::SetCPStatusFromGPU(); + + if (reset_simd_state) + { + FPURoundMode::LoadSIMDState(); + } + + // Discard all available ticks as there is nothing to do any more. + s_sync_ticks.store(std::min(available_ticks, 0)); + + // If the GPU is idle, drop the handler. + if (available_ticks >= 0) + return -1; + + // Always wait at least for GPU_TIME_SLOT_SIZE cycles. + return -available_ticks + GPU_TIME_SLOT_SIZE; } void UpdateWantDeterminism(bool want) @@ -521,24 +546,27 @@ bool UseDeterministicGPUThread() } /* This function checks the emulated CPU - GPU distance and may wake up the GPU, - * or block the CPU if required. It should be called by the CPU thread regulary. + * or block the CPU if required. It should be called by the CPU thread regularly. * @ticks The gone emulated CPU time. - * @return A good time to call Update() next. + * @return A good time to call WaitForGpuThread() next. */ -static int Update(int ticks) +static int WaitForGpuThread(int ticks) { const SConfig& param = SConfig::GetInstance(); // GPU is sleeping, so no need for synchronization if (s_gpu_mainloop.IsDone() || s_use_deterministic_gpu_thread) { - if (s_sync_ticks.load() < 0) + if ((s_sync_ticks.load() + ticks) < 0) { - int old = s_sync_ticks.fetch_add(ticks); - if (old < param.iSyncGpuMinDistance && old + ticks >= param.iSyncGpuMinDistance) - RunGpu(); + s_sync_ticks.store(s_sync_ticks.load() + ticks); + return 0 - s_sync_ticks.load(); + } + else + { + s_sync_ticks.store(0); + return -1; } - return param.iSyncGpuMaxDistance; } // Wakeup GPU @@ -558,24 +586,29 @@ static int Update(int ticks) return param.iSyncGpuMaxDistance - s_sync_ticks.load(); } -static void SyncGPUCallback(u64 userdata, s64 cyclesLate) +static void SyncGPUCallback(u64 ticks, s64 cyclesLate) { - u64 now = CoreTiming::GetTicks(); - int next = Fifo::Update((int)(now - s_last_sync_gpu_tick)); - s_last_sync_gpu_tick = now; + ticks += cyclesLate; + int next = -1; - if (next > 0) - CoreTiming::ScheduleEvent(next, s_event_sync_gpu); + if (!SConfig::GetInstance().bCPUThread || s_use_deterministic_gpu_thread) + { + next = RunGpuOnCpu((int)ticks); + } + else if (SConfig::GetInstance().bSyncGPU) + { + next = WaitForGpuThread((int)ticks); + } + + s_syncing_suspended = next < 0; + if (!s_syncing_suspended) + CoreTiming::ScheduleEvent(next, s_event_sync_gpu, next); } // Initialize GPU - CPU thread syncing, this gives us a deterministic way to start the GPU thread. void Prepare() { - if (SConfig::GetInstance().bCPUThread && SConfig::GetInstance().bSyncGPU) - { - s_event_sync_gpu = CoreTiming::RegisterEvent("SyncGPUCallback", SyncGPUCallback); - CoreTiming::ScheduleEvent(0, s_event_sync_gpu); - s_last_sync_gpu_tick = CoreTiming::GetTicks(); - } + s_event_sync_gpu = CoreTiming::RegisterEvent("SyncGPUCallback", SyncGPUCallback); + s_syncing_suspended = true; } }