Move CoreTiming::downcount to PowerPC::ppcState.

This isn't technically the correct place to have the downcount variable, but it is similar to what PPSSPP does to gain a bit of extra speed on ARM.
We access this variable quite a bit, with each exit in a block it is subtracted from.
On ARM this required four instructions to load and store the value, while now it only requires two.

This gives an average of 1FPS gain to most games.
Examples:
Crazy Taxi: 54FPS -> 55FPS
Luigi's Mansion: 20FPS -> 21FPS
Wind Waker(Save Screen): 27FPS -> 28FPS

This seems to average a 6mhz to 16mhz CPU core emulation improvement in the few games I've tested.
This commit is contained in:
Ryan Houdek 2014-06-16 22:47:10 -05:00
parent 177658aed6
commit a40ae6883a
13 changed files with 48 additions and 53 deletions

View File

@ -46,7 +46,7 @@ Common::FifoQueue<BaseEvent, false> tsQueue;
// event pools
Event *eventPool = nullptr;
int downcount, slicelength;
int slicelength;
int maxSliceLength = MAX_SLICE_LENGTH;
s64 globalTimer;
@ -113,7 +113,7 @@ void UnregisterAllEvents()
void Init()
{
downcount = maxSliceLength;
PowerPC::ppcState.downcount = maxSliceLength;
slicelength = maxSliceLength;
globalTimer = 0;
idledCycles = 0;
@ -173,7 +173,6 @@ void EventDoState(PointerWrap &p, BaseEvent* ev)
void DoState(PointerWrap &p)
{
std::lock_guard<std::mutex> lk(tsWriteLock);
p.Do(downcount);
p.Do(slicelength);
p.Do(globalTimer);
p.Do(idledCycles);
@ -336,10 +335,10 @@ void SetMaximumSlice(int maximumSliceLength)
void ForceExceptionCheck(int cycles)
{
if (downcount > cycles)
if (PowerPC::ppcState.downcount > cycles)
{
slicelength -= (downcount - cycles); // Account for cycles already executed by adjusting the slicelength
downcount = cycles;
slicelength -= (PowerPC::ppcState.downcount - cycles); // Account for cycles already executed by adjusting the slicelength
PowerPC::ppcState.downcount = cycles;
}
}
@ -390,9 +389,9 @@ void Advance()
{
MoveEvents();
int cyclesExecuted = slicelength - downcount;
int cyclesExecuted = slicelength - PowerPC::ppcState.downcount;
globalTimer += cyclesExecuted;
downcount = slicelength;
PowerPC::ppcState.downcount = slicelength;
while (first)
{
@ -414,14 +413,14 @@ void Advance()
if (!first)
{
WARN_LOG(POWERPC, "WARNING - no events in queue. Setting downcount to 10000");
downcount += 10000;
PowerPC::ppcState.downcount += 10000;
}
else
{
slicelength = (int)(first->time - globalTimer);
if (slicelength > maxSliceLength)
slicelength = maxSliceLength;
downcount = slicelength;
PowerPC::ppcState.downcount = slicelength;
}
if (advanceCallback)
@ -451,8 +450,8 @@ void Idle()
Common::YieldCPU();
}
idledCycles += downcount;
downcount = 0;
idledCycles += PowerPC::ppcState.downcount;
PowerPC::ppcState.downcount = 0;
Advance();
}

View File

@ -78,7 +78,6 @@ void SetFakeTBStartTicks(u64 val);
void ForceExceptionCheck(int cycles);
extern int downcount;
extern int slicelength;
}; // end of namespace

View File

@ -74,7 +74,7 @@ bool FifoPlayer::Play()
{
m_CurrentFrame = m_FrameRangeStart;
CoreTiming::downcount = 0;
PowerPC::ppcState.downcount = 0;
CoreTiming::Advance();
}
else
@ -301,7 +301,7 @@ void FifoPlayer::WriteFifo(u8 *data, u32 start, u32 end)
u32 cyclesUsed = elapsedCycles - m_ElapsedCycles;
m_ElapsedCycles = elapsedCycles;
CoreTiming::downcount -= cyclesUsed;
PowerPC::ppcState.downcount -= cyclesUsed;
CoreTiming::Advance();
}
}

View File

@ -199,7 +199,7 @@ void Interpreter::SingleStep()
SingleStepInner();
CoreTiming::slicelength = 1;
CoreTiming::downcount = 0;
PowerPC::ppcState.downcount = 0;
CoreTiming::Advance();
if (PowerPC::ppcState.Exceptions)
@ -233,7 +233,7 @@ void Interpreter::Run()
// Debugging friendly version of inner loop. Tries to do the timing as similarly to the
// JIT as possible. Does not take into account that some instructions take multiple cycles.
while (CoreTiming::downcount > 0)
while (PowerPC::ppcState.downcount > 0)
{
m_EndBlock = false;
int i;
@ -276,13 +276,13 @@ void Interpreter::Run()
}
SingleStepInner();
}
CoreTiming::downcount -= i;
PowerPC::ppcState.downcount -= i;
}
}
else
{
// "fast" version of inner loop. well, it's not so fast.
while (CoreTiming::downcount > 0)
while (PowerPC::ppcState.downcount > 0)
{
m_EndBlock = false;
@ -291,7 +291,7 @@ void Interpreter::Run()
{
cycles += SingleStepInner();
}
CoreTiming::downcount -= cycles;
PowerPC::ppcState.downcount -= cycles;
}
}

View File

@ -287,7 +287,7 @@ void Jit64::WriteExit(u32 destination)
{
Cleanup();
SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
SUB(32, M(&PowerPC::ppcState.downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
//If nobody has taken care of this yet (this can be removed when all branches are done)
JitBlock *b = js.curBlock;
@ -317,7 +317,7 @@ void Jit64::WriteExitDestInEAX()
{
MOV(32, M(&PC), R(EAX));
Cleanup();
SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
SUB(32, M(&PowerPC::ppcState.downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
JMP(asm_routines.dispatcher, true);
}
@ -327,7 +327,7 @@ void Jit64::WriteRfiExitDestInEAX()
MOV(32, M(&NPC), R(EAX));
Cleanup();
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExceptions));
SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
SUB(32, M(&PowerPC::ppcState.downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
JMP(asm_routines.dispatcher, true);
}
@ -337,7 +337,7 @@ void Jit64::WriteExceptionExit()
MOV(32, R(EAX), M(&PC));
MOV(32, M(&NPC), R(EAX));
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExceptions));
SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
SUB(32, M(&PowerPC::ppcState.downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
JMP(asm_routines.dispatcher, true);
}
@ -347,7 +347,7 @@ void Jit64::WriteExternalExceptionExit()
MOV(32, R(EAX), M(&PC));
MOV(32, M(&NPC), R(EAX));
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExternalExceptions));
SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
SUB(32, M(&PowerPC::ppcState.downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
JMP(asm_routines.dispatcher, true);
}

View File

@ -1726,7 +1726,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) {
// If a FPU exception occurs, the exception handler will read
// from PC. Update PC with the latest value in case that happens.
Jit->MOV(32, M(&PC), Imm32(InstLoc));
Jit->SUB(32, M(&CoreTiming::downcount), Jit->js.downcountAmount > 127 ? Imm32(Jit->js.downcountAmount) : Imm8(Jit->js.downcountAmount));
Jit->SUB(32, M(&PowerPC::ppcState.downcount), Jit->js.downcountAmount > 127 ? Imm32(Jit->js.downcountAmount) : Imm8(Jit->js.downcountAmount));
Jit->OR(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE));
Jit->WriteExceptionExit();
Jit->SetJumpTarget(b1);

View File

@ -388,7 +388,7 @@ void JitIL::WriteExit(u32 destination)
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bJITILTimeProfiling) {
ABI_CallFunction((void *)JitILProfiler::End);
}
SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
SUB(32, M(&PowerPC::ppcState.downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
//If nobody has taken care of this yet (this can be removed when all branches are done)
JitBlock *b = js.curBlock;
@ -420,7 +420,7 @@ void JitIL::WriteExitDestInOpArg(const Gen::OpArg& arg)
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bJITILTimeProfiling) {
ABI_CallFunction((void *)JitILProfiler::End);
}
SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
SUB(32, M(&PowerPC::ppcState.downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
JMP(asm_routines.dispatcher, true);
}
@ -433,7 +433,7 @@ void JitIL::WriteRfiExitDestInOpArg(const Gen::OpArg& arg)
ABI_CallFunction((void *)JitILProfiler::End);
}
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExceptions));
SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
SUB(32, M(&PowerPC::ppcState.downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
JMP(asm_routines.dispatcher, true);
}
@ -446,7 +446,7 @@ void JitIL::WriteExceptionExit()
MOV(32, R(EAX), M(&PC));
MOV(32, M(&NPC), R(EAX));
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExceptions));
SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
SUB(32, M(&PowerPC::ppcState.downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
JMP(asm_routines.dispatcher, true);
}

View File

@ -134,22 +134,19 @@ void JitArm::Cleanup()
void JitArm::DoDownCount()
{
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
MOVI2R(rA, (u32)&CoreTiming::downcount);
LDR(rB, rA);
LDR(rA, R9, PPCSTATE_OFF(downcount));
if (js.downcountAmount < 255) // We can enlarge this if we used rotations
{
SUBS(rB, rB, js.downcountAmount);
STR(rB, rA);
SUBS(rA, rA, js.downcountAmount);
}
else
{
ARMReg rC = gpr.GetReg(false);
MOVI2R(rC, js.downcountAmount);
SUBS(rB, rB, rC);
STR(rB, rA);
ARMReg rB = gpr.GetReg(false);
MOVI2R(rB, js.downcountAmount);
SUBS(rA, rA, rB);
}
gpr.Unlock(rA, rB);
STR(rA, R9, PPCSTATE_OFF(downcount));
gpr.Unlock(rA);
}
void JitArm::WriteExitDestInR(ARMReg Reg)
{

View File

@ -95,7 +95,6 @@ void JitArmAsmRoutineManager::Generate()
// consumed by CALL.
SUB(_SP, _SP, 4);
MOVI2R(R0, (u32)&CoreTiming::downcount);
MOVI2R(R9, (u32)&PowerPC::ppcState.spr[0]);
FixupBranch skipToRealDispatcher = B();

View File

@ -81,22 +81,19 @@ void JitArmIL::Break(UGeckoInstruction _inst)
void JitArmIL::DoDownCount()
{
ARMReg rA = R14;
ARMReg rB = R12;
MOVI2R(rA, (u32)&CoreTiming::downcount);
LDR(rB, rA);
ARMReg rA = R12;
LDR(rA, R9, PPCSTATE_OFF(downcount));
if (js.downcountAmount < 255) // We can enlarge this if we used rotations
{
SUBS(rB, rB, js.downcountAmount);
STR(rB, rA);
SUBS(rA, rA, js.downcountAmount);
}
else
{
ARMReg rC = R11;
MOVI2R(rC, js.downcountAmount);
SUBS(rB, rB, rC);
STR(rB, rA);
ARMReg rB = R11;
MOVI2R(rB, js.downcountAmount);
SUBS(rA, rA, rB);
}
STR(rA, R9, PPCSTATE_OFF(downcount));
}
void JitArmIL::WriteExitDestInReg(ARMReg Reg)

View File

@ -27,7 +27,6 @@ void JitArmILAsmRoutineManager::Generate()
// consumed by CALL.
SUB(_SP, _SP, 4);
MOVI2R(R0, (u32)&CoreTiming::downcount);
MOVI2R(R9, (u32)&PowerPC::ppcState.spr[0]);
FixupBranch skipToRealDispatcher = B();

View File

@ -46,6 +46,11 @@ struct GC_ALIGNED64(PowerPCState)
// Exception management.
volatile u32 Exceptions;
// Downcount for determining when we need to do timing
// This isn't quite the right location for it, but it is here to accelerate the ARM JIT
// This variable should be inside of the CoreTiming namespace if we wanted to be correct.
int downcount;
u32 sr[16]; // Segment registers.
u32 DebugCount;

View File

@ -63,7 +63,7 @@ static Common::Event g_compressAndDumpStateSyncEvent;
static std::thread g_save_thread;
// Don't forget to increase this after doing changes on the savestate system
static const u32 STATE_VERSION = 26;
static const u32 STATE_VERSION = 27;
enum
{