From daefb3b550e27f291beb78aaea2bc9c9dc77ac78 Mon Sep 17 00:00:00 2001 From: nitsuja Date: Sat, 7 Jan 2012 20:22:48 -0800 Subject: [PATCH 1/3] a small thread synchronization speedup for dual core mode. it's most noticeable in games where the CPU is running behind compared to the GPU. --- Source/Core/Common/Src/StdThread.h | 2 +- Source/Core/VideoCommon/Src/Fifo.cpp | 16 ++++++++++++---- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/Source/Core/Common/Src/StdThread.h b/Source/Core/Common/Src/StdThread.h index 6e9e903561..661d87cfd1 100644 --- a/Source/Core/Common/Src/StdThread.h +++ b/Source/Core/Common/Src/StdThread.h @@ -279,7 +279,7 @@ namespace this_thread inline void yield() { #ifdef _WIN32 - Sleep(0); + SwitchToThread(); #else sleep(0); #endif diff --git a/Source/Core/VideoCommon/Src/Fifo.cpp b/Source/Core/VideoCommon/Src/Fifo.cpp index 842ff49e78..7330169ddd 100644 --- a/Source/Core/VideoCommon/Src/Fifo.cpp +++ b/Source/Core/VideoCommon/Src/Fifo.cpp @@ -171,14 +171,22 @@ void RunGpuLoop() CommandProcessor::isPossibleWaitingSetDrawDone = false; } - fifo.isGpuReadingData = false; - - if (EmuRunningState) - Common::YieldCPU(); + { + if (fifo.isGpuReadingData) + { + fifo.isGpuReadingData = false; + Common::YieldCPU(); + } + else + { + SLEEP(1); + } + } else { // While the emu is paused, we still handle async request such as Savestates then sleep. + fifo.isGpuReadingData = false; while (!EmuRunningState) { g_video_backend->PeekMessages(); From 2368d88c654d691b8c60179b505113c87a4d72a6 Mon Sep 17 00:00:00 2001 From: nitsuja Date: Sat, 7 Jan 2012 20:24:11 -0800 Subject: [PATCH 2/3] slightly more precise speed percent display (this is really minor) --- Source/Core/Core/Src/Core.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Core/Core/Src/Core.cpp b/Source/Core/Core/Src/Core.cpp index bd4267c074..e729eec9d6 100644 --- a/Source/Core/Core/Src/Core.cpp +++ b/Source/Core/Core/Src/Core.cpp @@ -576,7 +576,7 @@ void VideoThrottle() u32 FPS = Common::AtomicLoad(DrawnFrame) * 1000 / ElapseTime; u32 VPS = DrawnVideo * 1000 / ElapseTime; - u32 Speed = VPS * 100 / VideoInterface::TargetRefreshRate; + u32 Speed = DrawnVideo * (100 * 1000) / (VideoInterface::TargetRefreshRate * ElapseTime); // Settings are shown the same for both extended and summary info std::string SSettings = StringFromFormat("%s %s", cpu_core_base->GetName(), _CoreParameter.bCPUThread ? "DC" : "SC"); From 1603bbb5f44f3046667dabdbcb60cb09b970f979 Mon Sep 17 00:00:00 2001 From: nitsuja Date: Sat, 7 Jan 2012 22:19:45 -0800 Subject: [PATCH 3/3] fixed and reenabled and slightly optimized the JIT version of fcmpo/fcmpu. --- Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp | 2 +- .../Src/PowerPC/Jit64/Jit_FloatingPoint.cpp | 63 ++++++++++++------- 2 files changed, 43 insertions(+), 22 deletions(-) diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp index 0d31893b53..2a2f3c36b0 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp @@ -193,7 +193,7 @@ void Jit64::Init() jo.enableFastMem = false; #endif jo.assumeFPLoadFromMem = Core::g_CoreStartupParameter.bUseFastMem; - jo.fpAccurateFcmp = true; // Fallback to Interpreter + jo.fpAccurateFcmp = Core::g_CoreStartupParameter.bEnableFPRF; jo.optimizeGatherPipe = true; jo.fastInterrupts = false; jo.accurateSinglePrecision = true; diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_FloatingPoint.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_FloatingPoint.cpp index a325e3a150..5fcd2f9e29 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_FloatingPoint.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_FloatingPoint.cpp @@ -229,8 +229,6 @@ void Jit64::fmrx(UGeckoInstruction inst) void Jit64::fcmpx(UGeckoInstruction inst) { - // TODO : This still causes crashes in Nights, and broken graphics - // in Paper Mario, Super Paper Mario as well as SoulCalibur 2 prolly others too.. :( INSTRUCTION_START JITDISABLE(FloatingPoint) if (jo.fpAccurateFcmp) { @@ -243,36 +241,59 @@ void Jit64::fcmpx(UGeckoInstruction inst) int crf = inst.CRFD; fpr.Lock(a,b); - if (a != b) fpr.BindToRegister(a, true); + fpr.BindToRegister(b, true); // Are we masking sNaN invalid floating point exceptions? If not this could crash if we don't handle the exception? - UCOMISD(fpr.R(a).GetSimpleReg(), fpr.R(b)); + UCOMISD(fpr.R(b).GetSimpleReg(), fpr.R(a)); - FixupBranch pNaN = J_CC(CC_P); - FixupBranch pLesser = J_CC(CC_B); - FixupBranch pGreater = J_CC(CC_A); + FixupBranch pNaN, pLesser, pGreater; + FixupBranch continue1, continue2, continue3; + + if (a != b) + { + // if B > A, goto Lesser's jump target + pLesser = J_CC(CC_A); + } + + // if (B != B) or (A != A), goto NaN's jump target + pNaN = J_CC(CC_P); + + if (a != b) + { + // if B < A, goto Greater's jump target + // JB can't precede the NaN check because it doesn't test ZF + pGreater = J_CC(CC_B); + } // Equal MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2)); - FixupBranch continue1 = J(); - - // Greater Than - SetJumpTarget(pGreater); - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4)); - FixupBranch continue2 = J(); - - // Less Than - SetJumpTarget(pLesser); - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8)); - FixupBranch continue3 = J(); - + continue1 = J(); + // NAN SetJumpTarget(pNaN); MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x1)); + + if (a != b) + { + continue2 = J(); + + // Greater Than + SetJumpTarget(pGreater); + MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4)); + continue3 = J(); + + // Less Than + SetJumpTarget(pLesser); + MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8)); + } SetJumpTarget(continue1); - SetJumpTarget(continue2); - SetJumpTarget(continue3); + if (a != b) + { + SetJumpTarget(continue2); + SetJumpTarget(continue3); + } + fpr.UnlockAll(); }