Fifo: Rewrite SyncGpu

The new implementation has 3 options:
 SyncGpuMaxDistance
 SyncGpuMinDistance
 SyncGpuOverclock

The MaxDistance controlls how many CPU cycles the CPU is allowed to be in front
of the GPU. Too low values will slow down extremly, too high values are as
unsynchronized and half of the games will crash.
The -MinDistance (negative) set how many cycles the GPU is allowed to be in
front of the CPU. As we are used to emulate an infinitiv fast GPU, this may be
set to any high (negative) number.

The last parameter is to hack a faster (>1.0) or slower(<1.0) GPU. As we don't
emulate GPU timing very well (eg skip the timings of the pixel stage completely),
an overclock factor of ~0.5 is often much more accurate than 1.0
This commit is contained in:
degasus
2015-06-03 23:21:46 +02:00
parent 5a0daef7f0
commit d31bed8b79
12 changed files with 126 additions and 110 deletions

View File

@ -61,6 +61,9 @@ static u8* s_video_buffer_pp_read_ptr;
// polls, it's just atomic.
// - The pp_read_ptr is the CPU preprocessing version of the read_ptr.
static std::atomic<int> s_sync_ticks;
static Common::Event s_sync_wakeup_event;
void Fifo_DoState(PointerWrap &p)
{
p.DoArray(s_video_buffer, FIFO_SIZE);
@ -99,7 +102,7 @@ void Fifo_Init()
ResetVideoBuffer();
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread)
s_gpu_mainloop.Prepare();
CommandProcessor::SetVITicks(CommandProcessor::m_cpClockOrigin);
s_sync_ticks.store(0);
}
void Fifo_Shutdown()
@ -282,6 +285,8 @@ void RunGpuLoop()
s_gpu_mainloop.Run(
[] {
const SCoreStartupParameter& param = SConfig::GetInstance().m_LocalCoreStartupParameter;
g_video_backend->PeekMessages();
// Do nothing while paused
@ -310,63 +315,59 @@ void RunGpuLoop()
CommandProcessor::SetCPStatusFromGPU();
if (!fifo.isGpuReadingData)
{
CommandProcessor::SetVITicks(CommandProcessor::m_cpClockOrigin);
}
bool run_loop = true;
// check if we are able to run this buffer
while (run_loop && !CommandProcessor::IsInterruptWaiting() && fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint())
while (!CommandProcessor::IsInterruptWaiting() && fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint())
{
fifo.isGpuReadingData = true;
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU && s_sync_ticks.load() < param.iSyncGpuMinDistance)
break;
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU || CommandProcessor::GetVITicks() > CommandProcessor::m_cpClockOrigin)
{
u32 cyclesExecuted = 0;
u32 readPtr = fifo.CPReadPointer;
ReadDataFromFifo(readPtr);
u32 cyclesExecuted = 0;
u32 readPtr = fifo.CPReadPointer;
ReadDataFromFifo(readPtr);
if (readPtr == fifo.CPEnd)
readPtr = fifo.CPBase;
else
readPtr += 32;
_assert_msg_(COMMANDPROCESSOR, (s32)fifo.CPReadWriteDistance - 32 >= 0 ,
"Negative fifo.CPReadWriteDistance = %i in FIFO Loop !\nThat can produce instability in the game. Please report it.", fifo.CPReadWriteDistance - 32);
u8* write_ptr = s_video_buffer_write_ptr;
s_video_buffer_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_read_ptr, write_ptr), &cyclesExecuted, false);
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU && CommandProcessor::GetVITicks() >= cyclesExecuted)
CommandProcessor::DecrementVITicks(cyclesExecuted);
Common::AtomicStore(fifo.CPReadPointer, readPtr);
Common::AtomicAdd(fifo.CPReadWriteDistance, -32);
if ((write_ptr - s_video_buffer_read_ptr) == 0)
Common::AtomicStore(fifo.SafeCPReadPointer, fifo.CPReadPointer);
}
if (readPtr == fifo.CPEnd)
readPtr = fifo.CPBase;
else
{
run_loop = false;
}
readPtr += 32;
_assert_msg_(COMMANDPROCESSOR, (s32)fifo.CPReadWriteDistance - 32 >= 0 ,
"Negative fifo.CPReadWriteDistance = %i in FIFO Loop !\nThat can produce instability in the game. Please report it.", fifo.CPReadWriteDistance - 32);
u8* write_ptr = s_video_buffer_write_ptr;
s_video_buffer_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_read_ptr, write_ptr), &cyclesExecuted, false);
Common::AtomicStore(fifo.CPReadPointer, readPtr);
Common::AtomicAdd(fifo.CPReadWriteDistance, -32);
if ((write_ptr - s_video_buffer_read_ptr) == 0)
Common::AtomicStore(fifo.SafeCPReadPointer, fifo.CPReadPointer);
CommandProcessor::SetCPStatusFromGPU();
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU)
{
cyclesExecuted = (int)(cyclesExecuted / param.fSyncGpuOverclock);
int old = s_sync_ticks.fetch_sub(cyclesExecuted);
if (old > 0 && old - (int)cyclesExecuted <= 0)
s_sync_wakeup_event.Set();
}
// This call is pretty important in DualCore mode and must be called in the FIFO Loop.
// If we don't, s_swapRequested or s_efbAccessRequested won't be set to false
// leading the CPU thread to wait in Video_BeginField or Video_AccessEFB thus slowing things down.
AsyncRequests::GetInstance()->PullEvents();
}
// fast skip remaining GPU time if fifo is empty
if (s_sync_ticks.load() > 0)
{
int old = s_sync_ticks.exchange(0);
if (old > 0)
s_sync_wakeup_event.Set();
}
// The fifo is empty and it's unlikely we will get any more work in the near future.
// Make sure VertexManager finishes drawing any primitives it has stored in it's buffer.
VertexManager::Flush();
// don't release the GPU running state on sync GPU waits
fifo.isGpuReadingData = !run_loop;
}
}, 100);
@ -376,7 +377,9 @@ void RunGpuLoop()
void FlushGpu()
{
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread || g_use_deterministic_gpu_thread)
const SCoreStartupParameter& param = SConfig::GetInstance().m_LocalCoreStartupParameter;
if (!param.bCPUThread || g_use_deterministic_gpu_thread)
return;
s_gpu_mainloop.Wait();
@ -396,9 +399,10 @@ bool AtBreakpoint()
void RunGpu()
{
SCPFifoStruct &fifo = CommandProcessor::fifo;
const SCoreStartupParameter& param = SConfig::GetInstance().m_LocalCoreStartupParameter;
// execute GPU
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread || g_use_deterministic_gpu_thread)
if (!param.bCPUThread || g_use_deterministic_gpu_thread)
{
bool reset_simd_state = false;
while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint() )
@ -438,7 +442,7 @@ void RunGpu()
}
// wake up GPU thread
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread)
if (param.bCPUThread)
{
s_gpu_mainloop.Wakeup();
}
@ -471,7 +475,7 @@ void Fifo_UpdateWantDeterminism(bool want)
break;
}
gpu_thread = gpu_thread && SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread;
gpu_thread = gpu_thread && param.bCPUThread;
if (g_use_deterministic_gpu_thread != gpu_thread)
{
@ -485,3 +489,40 @@ void Fifo_UpdateWantDeterminism(bool want)
}
}
}
int Fifo_Update(int ticks)
{
const SCoreStartupParameter& param = SConfig::GetInstance().m_LocalCoreStartupParameter;
if (ticks == 0)
{
FlushGpu();
return param.iSyncGpuMaxDistance;
}
// GPU is sleeping, so no need for synchronization
if (s_gpu_mainloop.IsDone() || g_use_deterministic_gpu_thread)
{
if (s_sync_ticks.load() < 0)
{
int old = s_sync_ticks.fetch_add(ticks);
if (old < param.iSyncGpuMinDistance && old + ticks >= param.iSyncGpuMinDistance)
RunGpu();
}
return param.iSyncGpuMaxDistance;
}
int old = s_sync_ticks.fetch_add(ticks);
if (old < param.iSyncGpuMinDistance && old + ticks >= param.iSyncGpuMinDistance)
RunGpu();
if (s_sync_ticks.load() >= param.iSyncGpuMaxDistance)
{
while (s_sync_ticks.load() > 0)
{
s_sync_wakeup_event.Wait();
}
}
return param.iSyncGpuMaxDistance - s_sync_ticks.load();
}