diff --git a/Source/Core/Common/BlockingLoop.h b/Source/Core/Common/BlockingLoop.h new file mode 100644 index 0000000000..8071de1a15 --- /dev/null +++ b/Source/Core/Common/BlockingLoop.h @@ -0,0 +1,214 @@ +// Copyright 2015 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include + +#include "Common/Event.h" +#include "Common/Flag.h" + +namespace Common +{ + +// This class provides a synchronized loop. +// It's a thread-safe way to trigger a new iteration without busy loops. +// It's optimized for high-usage iterations which usually are already running while it's triggered often. +// Be careful on using Wait() and Wakeup() at the same time. Wait() may block forever while Wakeup() is called regulary. +class BlockingLoop +{ +public: + BlockingLoop() + { + m_stopped.Set(); + } + + ~BlockingLoop() + { + Stop(); + } + + // Triggers to rerun the payload of the Run() function at least once again. + // This function will never block and is designed to finish as fast as possible. + void Wakeup() + { + // Already running, so no need for a wakeup. + // This is the common case, so try to get this as fast as possible. + if (m_running_state.load() >= STATE_NEED_EXECUTION) + return; + + // Mark that new data is available. If the old state will rerun the payload + // itself, we don't have to set the event to interrupt the worker. + if (m_running_state.exchange(STATE_NEED_EXECUTION) != STATE_SLEEPING) + return; + + // Else as the worker thread may sleep now, we have to set the event. + m_new_work_event.Set(); + } + + // Wait for a complete payload run after the last Wakeup() call. + // If stopped, this returns immediately. + void Wait() + { + // already done + if (m_stopped.IsSet() || m_running_state.load() <= STATE_DONE) + return; + + // notifying this event will only wake up one thread, so use a mutex here to + // allow only one waiting thread. And in this way, we get an event free wakeup + // but for the first thread for free + std::lock_guard lk(m_wait_lock); + + // Wait for the worker thread to finish. + while (!m_stopped.IsSet() && m_running_state.load() > STATE_DONE) + { + m_done_event.Wait(); + } + + // As we wanted to wait for the other thread, there is likely no work remaining. + // So there is no need for a busy loop any more. + m_may_sleep.Set(); + } + + // Half start the worker. + // So this object is in a running state and Wait() will block until the worker calls Run(). + // This may be called from any thread and is supposed to call at least once before Wait() is used. + void Prepare() + { + // There is a race condition if the other threads call this function while + // the loop thread is initializing. Using this lock will ensure a valid state. + std::lock_guard lk(m_prepare_lock); + + if (!m_stopped.TestAndClear()) + return; + m_running_state.store(STATE_LAST_EXECUTION); // so the payload will only be executed once without any Wakeup call + m_shutdown.Clear(); + m_may_sleep.Set(); + } + + // Mainloop of this object. + // The payload callback is called at least as often as it's needed to match the Wakeup() requirements. + // The optional timeout parameters is a timeout how periodicly the payload should be called. + // Use timeout = 0 to run without a timeout at all. + template void Run(F payload, int64_t timeout = 0) + { + // Asserts that Prepare is called at least once before we enter the loop. + // But a good implementation should call this before already. + Prepare(); + + while (!m_shutdown.IsSet()) + { + payload(); + + switch (m_running_state.load()) + { + case STATE_NEED_EXECUTION: + // We won't get notified while we are in the STATE_NEED_EXECUTION state, so maybe Wakeup was called. + // So we have to assume on finishing the STATE_NEED_EXECUTION state, that there may be some remaining tasks. + // To process this tasks, we call the payload again within the STATE_LAST_EXECUTION state. + m_running_state--; + break; + + case STATE_LAST_EXECUTION: + // If we're still in the STATE_LAST_EXECUTION state, than Wakeup wasn't called within the last + // execution of payload. This means we should be ready now. + // But bad luck, Wakeup might have be called right now. So break and rerun the payload + // if the state was touched right now. + if (m_running_state-- != STATE_LAST_EXECUTION) + break; + + // Else we're likely in the STATE_DONE state now, so wakeup the waiting threads right now. + // However, if we're not in the STATE_DONE state any more, the event should also be + // triggered so that we'll skip the next waiting call quite fast. + m_done_event.Set(); + + case STATE_DONE: + // We're done now. So time to check if we want to sleep or if we want to stay in a busy loop. + if (m_may_sleep.TestAndClear()) + { + // Try to set the sleeping state. + if (m_running_state-- != STATE_DONE) + break; + } + else + { + // Busy loop. + break; + } + + case STATE_SLEEPING: + // Just relax + if (timeout > 0) + { + m_new_work_event.WaitFor(std::chrono::milliseconds(timeout)); + } + else + { + m_new_work_event.Wait(); + } + break; + } + } + + // Shutdown down, so get a safe state + m_running_state.store(STATE_DONE); + m_stopped.Set(); + + // Wake up the last Wait calls. + m_done_event.Set(); + } + + // Quits the mainloop. + // By default, it will wait until the Mainloop quits. + // Be careful to not use the blocking way within the payload of the Run() method. + void Stop(bool block = true) + { + if (m_stopped.IsSet()) + return; + + m_shutdown.Set(); + + // We have to interrupt the sleeping call to let the worker shut down soon. + Wakeup(); + + if (block) + Wait(); + } + + bool IsRunning() const + { + return !m_stopped.IsSet() && !m_shutdown.IsSet(); + } + + // This functions should be triggered by regulary by time. So we will fall back from + // the busy loop to the sleeping way. + void AllowSleep() + { + m_may_sleep.Set(); + } + +private: + std::mutex m_wait_lock; + std::mutex m_prepare_lock; + + Flag m_stopped; // This one is set, Wait() shall not block. + Flag m_shutdown; // If this one is set, the loop shall be quit. + + Event m_new_work_event; + Event m_done_event; + + enum RUNNING_TYPE { + STATE_SLEEPING = 0, + STATE_DONE = 1, + STATE_LAST_EXECUTION = 2, + STATE_NEED_EXECUTION = 3 + }; + std::atomic m_running_state; // must be of type RUNNING_TYPE + + Flag m_may_sleep; // If this one is set, we fall back from the busy loop to an event based synchronization. +}; + +} diff --git a/Source/Core/Common/Common.vcxproj b/Source/Core/Common/Common.vcxproj index 7b2c278cd1..fe3a5f22dc 100644 --- a/Source/Core/Common/Common.vcxproj +++ b/Source/Core/Common/Common.vcxproj @@ -40,6 +40,7 @@ + diff --git a/Source/Core/Common/Common.vcxproj.filters b/Source/Core/Common/Common.vcxproj.filters index ffaf7d6be8..712122b3d1 100644 --- a/Source/Core/Common/Common.vcxproj.filters +++ b/Source/Core/Common/Common.vcxproj.filters @@ -14,6 +14,7 @@ + @@ -126,4 +127,4 @@ - \ No newline at end of file + diff --git a/Source/Core/Core/CoreTiming.cpp b/Source/Core/Core/CoreTiming.cpp index 727c4d3059..0fea458b61 100644 --- a/Source/Core/Core/CoreTiming.cpp +++ b/Source/Core/Core/CoreTiming.cpp @@ -481,7 +481,7 @@ void Idle() { //DEBUG_LOG(POWERPC, "Idle"); - if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPUOnSkipIdleHack) + if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPUOnSkipIdleHack && !SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU) { //When the FIFO is processing data we must not advance because in this way //the VI will be desynchronized. So, We are waiting until the FIFO finish and diff --git a/Source/Core/Core/HW/SystemTimers.cpp b/Source/Core/Core/HW/SystemTimers.cpp index 5c84966489..a44614d9e0 100644 --- a/Source/Core/Core/HW/SystemTimers.cpp +++ b/Source/Core/Core/HW/SystemTimers.cpp @@ -62,6 +62,7 @@ IPC_HLE_PERIOD: For the Wiimote this is the call schedule: #include "Core/PowerPC/PowerPC.h" #include "VideoCommon/CommandProcessor.h" +#include "VideoCommon/Fifo.h" #include "VideoCommon/VideoBackendBase.h" @@ -189,7 +190,7 @@ static void PatchEngineCallback(u64 userdata, int cyclesLate) static void ThrottleCallback(u64 last_time, int cyclesLate) { // Allow the GPU thread to sleep. Setting this flag here limits the wakeups to 1 kHz. - CommandProcessor::s_gpuMaySleep.Set(); + GpuMaySleep(); u32 time = Common::Timer::GetTimeMs(); diff --git a/Source/Core/VideoCommon/CommandProcessor.cpp b/Source/Core/VideoCommon/CommandProcessor.cpp index 449e99982f..88b16afe7e 100644 --- a/Source/Core/VideoCommon/CommandProcessor.cpp +++ b/Source/Core/VideoCommon/CommandProcessor.cpp @@ -49,8 +49,6 @@ static std::atomic s_interrupt_finish_waiting; static std::atomic s_vi_ticks(CommandProcessor::m_cpClockOrigin); -Common::Flag s_gpuMaySleep; - static bool IsOnThread() { return SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread; diff --git a/Source/Core/VideoCommon/CommandProcessor.h b/Source/Core/VideoCommon/CommandProcessor.h index 0b33150ce4..ee130b82b6 100644 --- a/Source/Core/VideoCommon/CommandProcessor.h +++ b/Source/Core/VideoCommon/CommandProcessor.h @@ -17,7 +17,6 @@ namespace CommandProcessor { extern SCPFifoStruct fifo; //This one is shared between gfx thread and emulator thread. -extern Common::Flag s_gpuMaySleep; // internal hardware addresses enum diff --git a/Source/Core/VideoCommon/Fifo.cpp b/Source/Core/VideoCommon/Fifo.cpp index 289a62d8e7..048916f28c 100644 --- a/Source/Core/VideoCommon/Fifo.cpp +++ b/Source/Core/VideoCommon/Fifo.cpp @@ -5,6 +5,7 @@ #include #include "Common/Atomic.h" +#include "Common/BlockingLoop.h" #include "Common/ChunkFile.h" #include "Common/CPUDetect.h" #include "Common/Event.h" @@ -26,11 +27,13 @@ #include "VideoCommon/OpcodeDecoding.h" #include "VideoCommon/PixelEngine.h" #include "VideoCommon/VertexLoaderManager.h" +#include "VideoCommon/VertexManagerBase.h" #include "VideoCommon/VideoConfig.h" bool g_bSkipCurrentFrame = false; -static std::atomic s_gpu_running_state; +static Common::BlockingLoop s_gpu_mainloop; + static std::atomic s_emu_running_state; // Most of this array is unlikely to be faulted in... @@ -41,8 +44,6 @@ static u8* s_fifo_aux_read_ptr; bool g_use_deterministic_gpu_thread; // STATE_TO_SAVE -static std::mutex s_video_buffer_lock; -static std::condition_variable s_video_buffer_cond; static u8* s_video_buffer; static u8* s_video_buffer_read_ptr; static std::atomic s_video_buffer_write_ptr; @@ -60,12 +61,6 @@ static u8* s_video_buffer_pp_read_ptr; // polls, it's just atomic. // - The pp_read_ptr is the CPU preprocessing version of the read_ptr. -static Common::Flag s_gpu_is_running; // If this one is set, the gpu loop will be called at least once again -static Common::Event s_gpu_new_work_event; - -static Common::Flag s_gpu_is_pending; // If this one is set, there might still be work to do -static Common::Event s_gpu_done_event; - void Fifo_DoState(PointerWrap &p) { p.DoArray(s_video_buffer, FIFO_SIZE); @@ -102,13 +97,14 @@ void Fifo_Init() // Padded so that SIMD overreads in the vertex loader are safe s_video_buffer = (u8*)AllocateMemoryPages(FIFO_SIZE + 4); ResetVideoBuffer(); - s_gpu_running_state.store(false); + if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread) + s_gpu_mainloop.Prepare(); CommandProcessor::SetVITicks(CommandProcessor::m_cpClockOrigin); } void Fifo_Shutdown() { - if (s_gpu_running_state.load()) + if (s_gpu_mainloop.IsRunning()) PanicAlert("Fifo shutting down while active"); FreeMemoryPages(s_video_buffer, FIFO_SIZE + 4); @@ -135,27 +131,22 @@ void ExitGpuLoop() FlushGpu(); // Terminate GPU thread loop - s_gpu_running_state.store(false); s_emu_running_state.store(true); - s_gpu_new_work_event.Set(); + s_gpu_mainloop.Stop(false); } void EmulatorState(bool running) { s_emu_running_state.store(running); - s_gpu_new_work_event.Set(); + s_gpu_mainloop.Wakeup(); } void SyncGPU(SyncGPUReason reason, bool may_move_read_ptr) { if (g_use_deterministic_gpu_thread) { - std::unique_lock lk(s_video_buffer_lock); - u8* write_ptr = s_video_buffer_write_ptr; - s_video_buffer_cond.wait(lk, [&]() { - return !s_gpu_running_state.load() || s_video_buffer_seen_ptr == write_ptr; - }); - if (!s_gpu_running_state.load()) + s_gpu_mainloop.Wait(); + if (!s_gpu_mainloop.IsRunning()) return; // Opportunistically reset FIFOs so we don't wrap around. @@ -168,6 +159,8 @@ void SyncGPU(SyncGPUReason reason, bool may_move_read_ptr) if (may_move_read_ptr) { + u8* write_ptr = s_video_buffer_write_ptr; + // what's left over in the buffer size_t size = write_ptr - s_video_buffer_pp_read_ptr; @@ -188,7 +181,7 @@ void PushFifoAuxBuffer(void* ptr, size_t size) if (size > (size_t) (s_fifo_aux_data + FIFO_SIZE - s_fifo_aux_write_ptr)) { SyncGPU(SYNC_GPU_AUX_SPACE, /* may_move_read_ptr */ false); - if (!s_gpu_running_state.load()) + if (!s_gpu_mainloop.IsRunning()) { // GPU is shutting down return; @@ -243,9 +236,9 @@ static void ReadDataFromFifoOnCPU(u32 readPtr) // We can't wrap around while the GPU is working on the data. // This should be very rare due to the reset in SyncGPU. SyncGPU(SYNC_GPU_WRAPAROUND); - if (!s_gpu_running_state.load()) + if (!s_gpu_mainloop.IsRunning()) { - // GPU is shutting down + // GPU is shutting down, so the next asserts may fail return; } @@ -283,18 +276,19 @@ void ResetVideoBuffer() // Purpose: Keep the Core HW updated about the CPU-GPU distance void RunGpuLoop() { - s_gpu_running_state.store(true); - SCPFifoStruct &fifo = CommandProcessor::fifo; - u32 cyclesExecuted = 0; AsyncRequests::GetInstance()->SetEnable(true); AsyncRequests::GetInstance()->SetPassthrough(false); - while (s_gpu_running_state.load()) - { + s_gpu_mainloop.Run( + [] { g_video_backend->PeekMessages(); - if (g_use_deterministic_gpu_thread && s_emu_running_state.load()) + // Do nothing while paused + if (!s_emu_running_state.load()) + return; + + if (g_use_deterministic_gpu_thread) { AsyncRequests::GetInstance()->PullEvents(); @@ -305,16 +299,13 @@ void RunGpuLoop() if (write_ptr > seen_ptr) { s_video_buffer_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_read_ptr, write_ptr), nullptr, false); - - { - std::lock_guard vblk(s_video_buffer_lock); - s_video_buffer_seen_ptr = write_ptr; - s_video_buffer_cond.notify_all(); - } + s_video_buffer_seen_ptr = write_ptr; } } - else if (s_emu_running_state.load()) + else { + SCPFifoStruct &fifo = CommandProcessor::fifo; + AsyncRequests::GetInstance()->PullEvents(); CommandProcessor::SetCPStatusFromGPU(); @@ -333,6 +324,7 @@ void RunGpuLoop() if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU || CommandProcessor::GetVITicks() > CommandProcessor::m_cpClockOrigin) { + u32 cyclesExecuted = 0; u32 readPtr = fifo.CPReadPointer; ReadDataFromFifo(readPtr); @@ -369,31 +361,15 @@ void RunGpuLoop() // leading the CPU thread to wait in Video_BeginField or Video_AccessEFB thus slowing things down. AsyncRequests::GetInstance()->PullEvents(); } + // The fifo is empty and it's unlikely we will get any more work in the near future. + // Make sure VertexManager finishes drawing any primitives it has stored in it's buffer. + VertexManager::Flush(); // don't release the GPU running state on sync GPU waits fifo.isGpuReadingData = !run_loop; } + }, 100); - s_gpu_is_pending.Clear(); - s_gpu_done_event.Set(); - - if (s_gpu_is_running.IsSet()) - { - if (CommandProcessor::s_gpuMaySleep.IsSet()) - { - // Reset the atomic flag. But as the CPU thread might have pushed some new data, we have to rerun the GPU loop - s_gpu_is_pending.Set(); - s_gpu_is_running.Clear(); - CommandProcessor::s_gpuMaySleep.Clear(); - } - } - else - { - s_gpu_new_work_event.WaitFor(std::chrono::milliseconds(100)); - } - } - // wake up SyncGPU if we were interrupted - s_video_buffer_cond.notify_all(); AsyncRequests::GetInstance()->SetEnable(false); AsyncRequests::GetInstance()->SetPassthrough(true); } @@ -403,11 +379,12 @@ void FlushGpu() if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread || g_use_deterministic_gpu_thread) return; - while (s_gpu_is_running.IsSet() || s_gpu_is_pending.IsSet()) - { - CommandProcessor::s_gpuMaySleep.Set(); - s_gpu_done_event.Wait(); - } + s_gpu_mainloop.Wait(); +} + +void GpuMaySleep() +{ + s_gpu_mainloop.AllowSleep(); } bool AtBreakpoint() @@ -429,6 +406,7 @@ void RunGpu() if (g_use_deterministic_gpu_thread) { ReadDataFromFifoOnCPU(fifo.CPReadPointer); + s_gpu_mainloop.Wakeup(); } else { @@ -460,11 +438,9 @@ void RunGpu() } // wake up GPU thread - if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread && !s_gpu_is_running.IsSet()) + if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread) { - s_gpu_is_pending.Set(); - s_gpu_is_running.Set(); - s_gpu_new_work_event.Set(); + s_gpu_mainloop.Wakeup(); } } diff --git a/Source/Core/VideoCommon/Fifo.h b/Source/Core/VideoCommon/Fifo.h index b59004aa03..8a8a954fe0 100644 --- a/Source/Core/VideoCommon/Fifo.h +++ b/Source/Core/VideoCommon/Fifo.h @@ -43,6 +43,7 @@ void* PopFifoAuxBuffer(size_t size); void FlushGpu(); void RunGpu(); +void GpuMaySleep(); void RunGpuLoop(); void ExitGpuLoop(); void EmulatorState(bool running); diff --git a/Source/UnitTests/Common/BlockingLoopTest.cpp b/Source/UnitTests/Common/BlockingLoopTest.cpp new file mode 100644 index 0000000000..805aca446c --- /dev/null +++ b/Source/UnitTests/Common/BlockingLoopTest.cpp @@ -0,0 +1,84 @@ +// Copyright 2014 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include +#include + +#include + +#include "Common/BlockingLoop.h" + +TEST(BlockingLoop, MultiThreaded) +{ + Common::BlockingLoop loop; + std::atomic signaled_a(0); + std::atomic received_a(0); + std::atomic signaled_b(0); + std::atomic received_b(0); + for (int i = 0; i < 100; i++) + { + // Invalidate the current state. + received_a.store(signaled_a.load() + 1); + received_b.store(signaled_b.load() + 123); + + // Must not block as the loop is stopped. + loop.Wait(); + + std::thread loop_thread( + [&]() { + loop.Run( + [&]() { + received_a.store(signaled_a.load()); + received_b.store(signaled_b.load()); + }); + }); + + // Now Wait must block. + loop.Prepare(); + + // The payload must run at least once on startup. + loop.Wait(); + EXPECT_EQ(signaled_a.load(), received_a.load()); + EXPECT_EQ(signaled_b.load(), received_b.load()); + + std::thread run_a_thread( + [&]() { + for (int j = 0; j < 100; j++) + { + for (int k = 0; k < 100; k++) + { + signaled_a++; + loop.Wakeup(); + } + + loop.Wait(); + EXPECT_EQ(signaled_a.load(), received_a.load()); + } + }); + std::thread run_b_thread( + [&]() { + for (int j = 0; j < 100; j++) + { + for (int k = 0; k < 100; k++) + { + signaled_b++; + loop.Wakeup(); + } + + loop.Wait(); + EXPECT_EQ(signaled_b.load(), received_b.load()); + } + }); + + run_a_thread.join(); + run_b_thread.join(); + + loop.Stop(); + + // Must not block + loop.Wait(); + + loop_thread.join(); + } +} diff --git a/Source/UnitTests/Common/BusyLoopTest.cpp b/Source/UnitTests/Common/BusyLoopTest.cpp new file mode 100644 index 0000000000..50d11acb31 --- /dev/null +++ b/Source/UnitTests/Common/BusyLoopTest.cpp @@ -0,0 +1,51 @@ +// Copyright 2014 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include +#include + +#include + +#include "Common/BlockingLoop.h" +#include "Common/Thread.h" + +TEST(BusyLoopTest, MultiThreaded) +{ + Common::BlockingLoop loop; + Common::Event e; + for (int i = 0; i < 100; i++) + { + loop.Prepare(); + std::thread loop_thread( + [&]() { + loop.Run( + [&]() { + e.Set(); + }); + }); + + // Ping - Pong + for (int j = 0; j < 10; j++) + { + loop.Wakeup(); + e.Wait(); + + // Just waste some time. So the main loop did fall back to the sleep state much more likely. + Common::SleepCurrentThread(1); + } + + for (int j = 0; j < 100; j++) + { + // We normally have to call Wakeup to assure the Event is triggered. + // But this check is for an internal feature of the BlockingLoop. + // It's implemented to fall back to a busy loop regulary. + // If we're in the busy loop, the payload (and so the Event) is called all the time. + //loop.Wakeup(); + e.Wait(); + } + + loop.Stop(); + loop_thread.join(); + } +} diff --git a/Source/UnitTests/Common/CMakeLists.txt b/Source/UnitTests/Common/CMakeLists.txt index a35bd455fe..eba08f9453 100644 --- a/Source/UnitTests/Common/CMakeLists.txt +++ b/Source/UnitTests/Common/CMakeLists.txt @@ -1,5 +1,7 @@ add_dolphin_test(BitFieldTest BitFieldTest.cpp) add_dolphin_test(BitSetTest BitSetTest.cpp) +add_dolphin_test(BlockingLoopTest BlockingLoopTest.cpp) +add_dolphin_test(BusyLoopTest BusyLoopTest.cpp) add_dolphin_test(CommonFuncsTest CommonFuncsTest.cpp) add_dolphin_test(EventTest EventTest.cpp) add_dolphin_test(FifoQueueTest FifoQueueTest.cpp)