mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-02-13 15:59:23 +01:00
Move CoreTiming::downcount to PowerPC::ppcState.
This isn't technically the correct place to have the downcount variable, but it is similar to what PPSSPP does to gain a bit of extra speed on ARM. We access this variable quite a bit, with each exit in a block it is subtracted from. On ARM this required four instructions to load and store the value, while now it only requires two. This gives an average of 1FPS gain to most games. Examples: Crazy Taxi: 54FPS -> 55FPS Luigi's Mansion: 20FPS -> 21FPS Wind Waker(Save Screen): 27FPS -> 28FPS This seems to average a 6mhz to 16mhz CPU core emulation improvement in the few games I've tested.
This commit is contained in:
parent
177658aed6
commit
a40ae6883a
@ -46,7 +46,7 @@ Common::FifoQueue<BaseEvent, false> tsQueue;
|
||||
// event pools
|
||||
Event *eventPool = nullptr;
|
||||
|
||||
int downcount, slicelength;
|
||||
int slicelength;
|
||||
int maxSliceLength = MAX_SLICE_LENGTH;
|
||||
|
||||
s64 globalTimer;
|
||||
@ -113,7 +113,7 @@ void UnregisterAllEvents()
|
||||
|
||||
void Init()
|
||||
{
|
||||
downcount = maxSliceLength;
|
||||
PowerPC::ppcState.downcount = maxSliceLength;
|
||||
slicelength = maxSliceLength;
|
||||
globalTimer = 0;
|
||||
idledCycles = 0;
|
||||
@ -173,7 +173,6 @@ void EventDoState(PointerWrap &p, BaseEvent* ev)
|
||||
void DoState(PointerWrap &p)
|
||||
{
|
||||
std::lock_guard<std::mutex> lk(tsWriteLock);
|
||||
p.Do(downcount);
|
||||
p.Do(slicelength);
|
||||
p.Do(globalTimer);
|
||||
p.Do(idledCycles);
|
||||
@ -336,10 +335,10 @@ void SetMaximumSlice(int maximumSliceLength)
|
||||
|
||||
void ForceExceptionCheck(int cycles)
|
||||
{
|
||||
if (downcount > cycles)
|
||||
if (PowerPC::ppcState.downcount > cycles)
|
||||
{
|
||||
slicelength -= (downcount - cycles); // Account for cycles already executed by adjusting the slicelength
|
||||
downcount = cycles;
|
||||
slicelength -= (PowerPC::ppcState.downcount - cycles); // Account for cycles already executed by adjusting the slicelength
|
||||
PowerPC::ppcState.downcount = cycles;
|
||||
}
|
||||
}
|
||||
|
||||
@ -390,9 +389,9 @@ void Advance()
|
||||
{
|
||||
MoveEvents();
|
||||
|
||||
int cyclesExecuted = slicelength - downcount;
|
||||
int cyclesExecuted = slicelength - PowerPC::ppcState.downcount;
|
||||
globalTimer += cyclesExecuted;
|
||||
downcount = slicelength;
|
||||
PowerPC::ppcState.downcount = slicelength;
|
||||
|
||||
while (first)
|
||||
{
|
||||
@ -414,14 +413,14 @@ void Advance()
|
||||
if (!first)
|
||||
{
|
||||
WARN_LOG(POWERPC, "WARNING - no events in queue. Setting downcount to 10000");
|
||||
downcount += 10000;
|
||||
PowerPC::ppcState.downcount += 10000;
|
||||
}
|
||||
else
|
||||
{
|
||||
slicelength = (int)(first->time - globalTimer);
|
||||
if (slicelength > maxSliceLength)
|
||||
slicelength = maxSliceLength;
|
||||
downcount = slicelength;
|
||||
PowerPC::ppcState.downcount = slicelength;
|
||||
}
|
||||
|
||||
if (advanceCallback)
|
||||
@ -451,8 +450,8 @@ void Idle()
|
||||
Common::YieldCPU();
|
||||
}
|
||||
|
||||
idledCycles += downcount;
|
||||
downcount = 0;
|
||||
idledCycles += PowerPC::ppcState.downcount;
|
||||
PowerPC::ppcState.downcount = 0;
|
||||
|
||||
Advance();
|
||||
}
|
||||
|
@ -78,7 +78,6 @@ void SetFakeTBStartTicks(u64 val);
|
||||
|
||||
void ForceExceptionCheck(int cycles);
|
||||
|
||||
extern int downcount;
|
||||
extern int slicelength;
|
||||
|
||||
}; // end of namespace
|
||||
|
@ -74,7 +74,7 @@ bool FifoPlayer::Play()
|
||||
{
|
||||
m_CurrentFrame = m_FrameRangeStart;
|
||||
|
||||
CoreTiming::downcount = 0;
|
||||
PowerPC::ppcState.downcount = 0;
|
||||
CoreTiming::Advance();
|
||||
}
|
||||
else
|
||||
@ -301,7 +301,7 @@ void FifoPlayer::WriteFifo(u8 *data, u32 start, u32 end)
|
||||
u32 cyclesUsed = elapsedCycles - m_ElapsedCycles;
|
||||
m_ElapsedCycles = elapsedCycles;
|
||||
|
||||
CoreTiming::downcount -= cyclesUsed;
|
||||
PowerPC::ppcState.downcount -= cyclesUsed;
|
||||
CoreTiming::Advance();
|
||||
}
|
||||
}
|
||||
|
@ -199,7 +199,7 @@ void Interpreter::SingleStep()
|
||||
SingleStepInner();
|
||||
|
||||
CoreTiming::slicelength = 1;
|
||||
CoreTiming::downcount = 0;
|
||||
PowerPC::ppcState.downcount = 0;
|
||||
CoreTiming::Advance();
|
||||
|
||||
if (PowerPC::ppcState.Exceptions)
|
||||
@ -233,7 +233,7 @@ void Interpreter::Run()
|
||||
|
||||
// Debugging friendly version of inner loop. Tries to do the timing as similarly to the
|
||||
// JIT as possible. Does not take into account that some instructions take multiple cycles.
|
||||
while (CoreTiming::downcount > 0)
|
||||
while (PowerPC::ppcState.downcount > 0)
|
||||
{
|
||||
m_EndBlock = false;
|
||||
int i;
|
||||
@ -276,13 +276,13 @@ void Interpreter::Run()
|
||||
}
|
||||
SingleStepInner();
|
||||
}
|
||||
CoreTiming::downcount -= i;
|
||||
PowerPC::ppcState.downcount -= i;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// "fast" version of inner loop. well, it's not so fast.
|
||||
while (CoreTiming::downcount > 0)
|
||||
while (PowerPC::ppcState.downcount > 0)
|
||||
{
|
||||
m_EndBlock = false;
|
||||
|
||||
@ -291,7 +291,7 @@ void Interpreter::Run()
|
||||
{
|
||||
cycles += SingleStepInner();
|
||||
}
|
||||
CoreTiming::downcount -= cycles;
|
||||
PowerPC::ppcState.downcount -= cycles;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -287,7 +287,7 @@ void Jit64::WriteExit(u32 destination)
|
||||
{
|
||||
Cleanup();
|
||||
|
||||
SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
|
||||
SUB(32, M(&PowerPC::ppcState.downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
|
||||
|
||||
//If nobody has taken care of this yet (this can be removed when all branches are done)
|
||||
JitBlock *b = js.curBlock;
|
||||
@ -317,7 +317,7 @@ void Jit64::WriteExitDestInEAX()
|
||||
{
|
||||
MOV(32, M(&PC), R(EAX));
|
||||
Cleanup();
|
||||
SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
|
||||
SUB(32, M(&PowerPC::ppcState.downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
|
||||
JMP(asm_routines.dispatcher, true);
|
||||
}
|
||||
|
||||
@ -327,7 +327,7 @@ void Jit64::WriteRfiExitDestInEAX()
|
||||
MOV(32, M(&NPC), R(EAX));
|
||||
Cleanup();
|
||||
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExceptions));
|
||||
SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
|
||||
SUB(32, M(&PowerPC::ppcState.downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
|
||||
JMP(asm_routines.dispatcher, true);
|
||||
}
|
||||
|
||||
@ -337,7 +337,7 @@ void Jit64::WriteExceptionExit()
|
||||
MOV(32, R(EAX), M(&PC));
|
||||
MOV(32, M(&NPC), R(EAX));
|
||||
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExceptions));
|
||||
SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
|
||||
SUB(32, M(&PowerPC::ppcState.downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
|
||||
JMP(asm_routines.dispatcher, true);
|
||||
}
|
||||
|
||||
@ -347,7 +347,7 @@ void Jit64::WriteExternalExceptionExit()
|
||||
MOV(32, R(EAX), M(&PC));
|
||||
MOV(32, M(&NPC), R(EAX));
|
||||
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExternalExceptions));
|
||||
SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
|
||||
SUB(32, M(&PowerPC::ppcState.downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
|
||||
JMP(asm_routines.dispatcher, true);
|
||||
}
|
||||
|
||||
|
@ -1726,7 +1726,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) {
|
||||
// If a FPU exception occurs, the exception handler will read
|
||||
// from PC. Update PC with the latest value in case that happens.
|
||||
Jit->MOV(32, M(&PC), Imm32(InstLoc));
|
||||
Jit->SUB(32, M(&CoreTiming::downcount), Jit->js.downcountAmount > 127 ? Imm32(Jit->js.downcountAmount) : Imm8(Jit->js.downcountAmount));
|
||||
Jit->SUB(32, M(&PowerPC::ppcState.downcount), Jit->js.downcountAmount > 127 ? Imm32(Jit->js.downcountAmount) : Imm8(Jit->js.downcountAmount));
|
||||
Jit->OR(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE));
|
||||
Jit->WriteExceptionExit();
|
||||
Jit->SetJumpTarget(b1);
|
||||
|
@ -388,7 +388,7 @@ void JitIL::WriteExit(u32 destination)
|
||||
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bJITILTimeProfiling) {
|
||||
ABI_CallFunction((void *)JitILProfiler::End);
|
||||
}
|
||||
SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
|
||||
SUB(32, M(&PowerPC::ppcState.downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
|
||||
|
||||
//If nobody has taken care of this yet (this can be removed when all branches are done)
|
||||
JitBlock *b = js.curBlock;
|
||||
@ -420,7 +420,7 @@ void JitIL::WriteExitDestInOpArg(const Gen::OpArg& arg)
|
||||
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bJITILTimeProfiling) {
|
||||
ABI_CallFunction((void *)JitILProfiler::End);
|
||||
}
|
||||
SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
|
||||
SUB(32, M(&PowerPC::ppcState.downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
|
||||
JMP(asm_routines.dispatcher, true);
|
||||
}
|
||||
|
||||
@ -433,7 +433,7 @@ void JitIL::WriteRfiExitDestInOpArg(const Gen::OpArg& arg)
|
||||
ABI_CallFunction((void *)JitILProfiler::End);
|
||||
}
|
||||
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExceptions));
|
||||
SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
|
||||
SUB(32, M(&PowerPC::ppcState.downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
|
||||
JMP(asm_routines.dispatcher, true);
|
||||
}
|
||||
|
||||
@ -446,7 +446,7 @@ void JitIL::WriteExceptionExit()
|
||||
MOV(32, R(EAX), M(&PC));
|
||||
MOV(32, M(&NPC), R(EAX));
|
||||
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExceptions));
|
||||
SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
|
||||
SUB(32, M(&PowerPC::ppcState.downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
|
||||
JMP(asm_routines.dispatcher, true);
|
||||
}
|
||||
|
||||
|
@ -134,22 +134,19 @@ void JitArm::Cleanup()
|
||||
void JitArm::DoDownCount()
|
||||
{
|
||||
ARMReg rA = gpr.GetReg();
|
||||
ARMReg rB = gpr.GetReg();
|
||||
MOVI2R(rA, (u32)&CoreTiming::downcount);
|
||||
LDR(rB, rA);
|
||||
LDR(rA, R9, PPCSTATE_OFF(downcount));
|
||||
if (js.downcountAmount < 255) // We can enlarge this if we used rotations
|
||||
{
|
||||
SUBS(rB, rB, js.downcountAmount);
|
||||
STR(rB, rA);
|
||||
SUBS(rA, rA, js.downcountAmount);
|
||||
}
|
||||
else
|
||||
{
|
||||
ARMReg rC = gpr.GetReg(false);
|
||||
MOVI2R(rC, js.downcountAmount);
|
||||
SUBS(rB, rB, rC);
|
||||
STR(rB, rA);
|
||||
ARMReg rB = gpr.GetReg(false);
|
||||
MOVI2R(rB, js.downcountAmount);
|
||||
SUBS(rA, rA, rB);
|
||||
}
|
||||
gpr.Unlock(rA, rB);
|
||||
STR(rA, R9, PPCSTATE_OFF(downcount));
|
||||
gpr.Unlock(rA);
|
||||
}
|
||||
void JitArm::WriteExitDestInR(ARMReg Reg)
|
||||
{
|
||||
|
@ -95,7 +95,6 @@ void JitArmAsmRoutineManager::Generate()
|
||||
// consumed by CALL.
|
||||
SUB(_SP, _SP, 4);
|
||||
|
||||
MOVI2R(R0, (u32)&CoreTiming::downcount);
|
||||
MOVI2R(R9, (u32)&PowerPC::ppcState.spr[0]);
|
||||
|
||||
FixupBranch skipToRealDispatcher = B();
|
||||
|
@ -81,22 +81,19 @@ void JitArmIL::Break(UGeckoInstruction _inst)
|
||||
|
||||
void JitArmIL::DoDownCount()
|
||||
{
|
||||
ARMReg rA = R14;
|
||||
ARMReg rB = R12;
|
||||
MOVI2R(rA, (u32)&CoreTiming::downcount);
|
||||
LDR(rB, rA);
|
||||
ARMReg rA = R12;
|
||||
LDR(rA, R9, PPCSTATE_OFF(downcount));
|
||||
if (js.downcountAmount < 255) // We can enlarge this if we used rotations
|
||||
{
|
||||
SUBS(rB, rB, js.downcountAmount);
|
||||
STR(rB, rA);
|
||||
SUBS(rA, rA, js.downcountAmount);
|
||||
}
|
||||
else
|
||||
{
|
||||
ARMReg rC = R11;
|
||||
MOVI2R(rC, js.downcountAmount);
|
||||
SUBS(rB, rB, rC);
|
||||
STR(rB, rA);
|
||||
ARMReg rB = R11;
|
||||
MOVI2R(rB, js.downcountAmount);
|
||||
SUBS(rA, rA, rB);
|
||||
}
|
||||
STR(rA, R9, PPCSTATE_OFF(downcount));
|
||||
}
|
||||
|
||||
void JitArmIL::WriteExitDestInReg(ARMReg Reg)
|
||||
|
@ -27,7 +27,6 @@ void JitArmILAsmRoutineManager::Generate()
|
||||
// consumed by CALL.
|
||||
SUB(_SP, _SP, 4);
|
||||
|
||||
MOVI2R(R0, (u32)&CoreTiming::downcount);
|
||||
MOVI2R(R9, (u32)&PowerPC::ppcState.spr[0]);
|
||||
|
||||
FixupBranch skipToRealDispatcher = B();
|
||||
|
@ -46,6 +46,11 @@ struct GC_ALIGNED64(PowerPCState)
|
||||
// Exception management.
|
||||
volatile u32 Exceptions;
|
||||
|
||||
// Downcount for determining when we need to do timing
|
||||
// This isn't quite the right location for it, but it is here to accelerate the ARM JIT
|
||||
// This variable should be inside of the CoreTiming namespace if we wanted to be correct.
|
||||
int downcount;
|
||||
|
||||
u32 sr[16]; // Segment registers.
|
||||
|
||||
u32 DebugCount;
|
||||
|
@ -63,7 +63,7 @@ static Common::Event g_compressAndDumpStateSyncEvent;
|
||||
static std::thread g_save_thread;
|
||||
|
||||
// Don't forget to increase this after doing changes on the savestate system
|
||||
static const u32 STATE_VERSION = 26;
|
||||
static const u32 STATE_VERSION = 27;
|
||||
|
||||
enum
|
||||
{
|
||||
|
Loading…
x
Reference in New Issue
Block a user