Fifo: Replace busy loop with condition variable

This commit is contained in:
degasus 2015-03-05 17:12:24 +01:00 committed by degasus
parent ea50dc240d
commit 279c657cda
3 changed files with 54 additions and 38 deletions

View File

@ -1,4 +1,5 @@
#include "VideoCommon/AsyncRequests.h" #include "VideoCommon/AsyncRequests.h"
#include "VideoCommon/Fifo.h"
#include "VideoCommon/RenderBase.h" #include "VideoCommon/RenderBase.h"
AsyncRequests AsyncRequests::s_singleton; AsyncRequests AsyncRequests::s_singleton;
@ -49,6 +50,7 @@ void AsyncRequests::PushEvent(const AsyncRequests::Event& event, bool blocking)
m_queue.push(event); m_queue.push(event);
RunGpu();
if (blocking) if (blocking)
{ {
m_cond.wait(lock, [this]{return m_queue.empty();}); m_cond.wait(lock, [this]{return m_queue.empty();});

View File

@ -322,10 +322,7 @@ void GatherPipeBursted()
ProcessFifoAllDistance(); ProcessFifoAllDistance();
} }
} }
else
{
RunGpu(); RunGpu();
}
return; return;
} }
@ -375,6 +372,7 @@ void UpdateInterrupts(u64 userdata)
} }
CoreTiming::ForceExceptionCheck(0); CoreTiming::ForceExceptionCheck(0);
interruptWaiting = false; interruptWaiting = false;
RunGpu();
} }
void UpdateInterruptsFromVideoBackend(u64 userdata) void UpdateInterruptsFromVideoBackend(u64 userdata)
@ -551,5 +549,7 @@ void Update()
if (fifo.isGpuReadingData) if (fifo.isGpuReadingData)
Common::AtomicAdd(VITicks, SystemTimers::GetTicksPerSecond() / 10000); Common::AtomicAdd(VITicks, SystemTimers::GetTicksPerSecond() / 10000);
RunGpu();
} }
} // end of namespace CommandProcessor } // end of namespace CommandProcessor

View File

@ -5,6 +5,7 @@
#include "Common/Atomic.h" #include "Common/Atomic.h"
#include "Common/ChunkFile.h" #include "Common/ChunkFile.h"
#include "Common/CPUDetect.h" #include "Common/CPUDetect.h"
#include "Common/Event.h"
#include "Common/FPURoundMode.h" #include "Common/FPURoundMode.h"
#include "Common/MemoryUtil.h" #include "Common/MemoryUtil.h"
#include "Common/Thread.h" #include "Common/Thread.h"
@ -58,6 +59,9 @@ static u8* s_video_buffer_pp_read_ptr;
// polls, it's just atomic. // polls, it's just atomic.
// - The pp_read_ptr is the CPU preprocessing version of the read_ptr. // - The pp_read_ptr is the CPU preprocessing version of the read_ptr.
static Common::Flag s_gpu_is_running; // If this one is set, the gpu loop will be called at least once again
static Common::Event s_gpu_new_work_event;
void Fifo_DoState(PointerWrap &p) void Fifo_DoState(PointerWrap &p)
{ {
p.DoArray(s_video_buffer, FIFO_SIZE); p.DoArray(s_video_buffer, FIFO_SIZE);
@ -133,11 +137,13 @@ void ExitGpuLoop()
// Terminate GPU thread loop // Terminate GPU thread loop
GpuRunningState = false; GpuRunningState = false;
EmuRunningState = true; EmuRunningState = true;
s_gpu_new_work_event.Set();
} }
void EmulatorState(bool running) void EmulatorState(bool running)
{ {
EmuRunningState = running; EmuRunningState = running;
s_gpu_new_work_event.Set();
} }
void SyncGPU(SyncGPUReason reason, bool may_move_read_ptr) void SyncGPU(SyncGPUReason reason, bool may_move_read_ptr)
@ -271,10 +277,6 @@ void RunGpuLoop()
SCPFifoStruct &fifo = CommandProcessor::fifo; SCPFifoStruct &fifo = CommandProcessor::fifo;
u32 cyclesExecuted = 0; u32 cyclesExecuted = 0;
// If the host CPU has only two cores, idle loop instead of busy loop
// This allows a system that we are maxing out in dual core mode to do other things
bool yield_cpu = cpu_info.num_cores <= 2;
AsyncRequests::GetInstance()->SetEnable(true); AsyncRequests::GetInstance()->SetEnable(true);
AsyncRequests::GetInstance()->SetPassthrough(false); AsyncRequests::GetInstance()->SetPassthrough(false);
@ -353,11 +355,15 @@ void RunGpuLoop()
if (EmuRunningState) if (EmuRunningState)
{ {
// NOTE(jsd): Calling SwitchToThread() on Windows 7 x64 is a hot spot, according to profiler. if (s_gpu_is_running.IsSet())
// See https://docs.google.com/spreadsheet/ccc?key=0Ah4nh0yGtjrgdFpDeF9pS3V6RUotRVE3S3J4TGM1NlE#gid=0 {
// for benchmark details. // reset the atomic flag. But as the CPU thread might have pushed some new data, we have to rerun the GPU loop
if (yield_cpu) s_gpu_is_running.Clear();
Common::YieldCPU(); }
else
{
s_gpu_new_work_event.WaitFor(std::chrono::milliseconds(100));
}
} }
else else
{ {
@ -386,11 +392,11 @@ bool AtBreakpoint()
void RunGpu() void RunGpu()
{ {
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread &&
!g_use_deterministic_gpu_thread)
return;
SCPFifoStruct &fifo = CommandProcessor::fifo; SCPFifoStruct &fifo = CommandProcessor::fifo;
// execute GPU
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread || g_use_deterministic_gpu_thread)
{
while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint() ) while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint() )
{ {
if (g_use_deterministic_gpu_thread) if (g_use_deterministic_gpu_thread)
@ -416,6 +422,14 @@ void RunGpu()
fifo.CPReadWriteDistance -= 32; fifo.CPReadWriteDistance -= 32;
} }
CommandProcessor::SetCPStatusFromGPU(); CommandProcessor::SetCPStatusFromGPU();
}
// wake up GPU thread
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread && !s_gpu_is_running.IsSet())
{
s_gpu_is_running.Set();
s_gpu_new_work_event.Set();
}
} }
void Fifo_UpdateWantDeterminism(bool want) void Fifo_UpdateWantDeterminism(bool want)