mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-01-10 08:09:26 +01:00
JitCommon: Restructure the profiler calls.
This commit is contained in:
parent
95ce860265
commit
958b75b707
@ -200,7 +200,6 @@ void CachedInterpreter::Jit(u32 address)
|
|||||||
|
|
||||||
b->checkedEntry = GetCodePtr();
|
b->checkedEntry = GetCodePtr();
|
||||||
b->normalEntry = GetCodePtr();
|
b->normalEntry = GetCodePtr();
|
||||||
b->runCount = 0;
|
|
||||||
|
|
||||||
for (u32 i = 0; i < code_block.m_num_instructions; i++)
|
for (u32 i = 0; i < code_block.m_num_instructions; i++)
|
||||||
{
|
{
|
||||||
|
@ -628,7 +628,6 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBloc
|
|||||||
const u8* start =
|
const u8* start =
|
||||||
AlignCode4(); // TODO: Test if this or AlignCode16 make a difference from GetCodePtr
|
AlignCode4(); // TODO: Test if this or AlignCode16 make a difference from GetCodePtr
|
||||||
b->checkedEntry = start;
|
b->checkedEntry = start;
|
||||||
b->runCount = 0;
|
|
||||||
|
|
||||||
// Downcount flag check. The last block decremented downcounter, and the flag should still be
|
// Downcount flag check. The last block decremented downcounter, and the flag should still be
|
||||||
// available.
|
// available.
|
||||||
@ -649,16 +648,13 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBloc
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Conditionally add profiling code.
|
// Conditionally add profiling code.
|
||||||
b->ticCounter = 0;
|
|
||||||
b->ticStart = 0;
|
|
||||||
b->ticStop = 0;
|
|
||||||
if (Profiler::g_ProfileBlocks)
|
if (Profiler::g_ProfileBlocks)
|
||||||
{
|
{
|
||||||
MOV(64, R(RSCRATCH), ImmPtr(&b->runCount));
|
|
||||||
ADD(32, MatR(RSCRATCH), Imm8(1));
|
|
||||||
|
|
||||||
// get start tic
|
// get start tic
|
||||||
MOV(64, R(ABI_PARAM1), Imm64(reinterpret_cast<u64>(&b->ticStart)));
|
MOV(64, R(ABI_PARAM1), Imm64(reinterpret_cast<u64>(&b->profile_data.ticStart)));
|
||||||
|
int offset = static_cast<int>(offsetof(JitBlock::ProfileData, runCount)) -
|
||||||
|
static_cast<int>(offsetof(JitBlock::ProfileData, ticStart));
|
||||||
|
ADD(64, MDisp(ABI_PARAM1, offset), Imm8(1));
|
||||||
ABI_CallFunction(QueryPerformanceCounter);
|
ABI_CallFunction(QueryPerformanceCounter);
|
||||||
}
|
}
|
||||||
#if defined(_DEBUG) || defined(DEBUGFAST) || defined(NAN_CHECK)
|
#if defined(_DEBUG) || defined(DEBUGFAST) || defined(NAN_CHECK)
|
||||||
@ -736,18 +732,20 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBloc
|
|||||||
{
|
{
|
||||||
if (Profiler::g_ProfileBlocks)
|
if (Profiler::g_ProfileBlocks)
|
||||||
{
|
{
|
||||||
// WARNING - cmp->branch merging will screw this up.
|
// TODO: Move this to WriteExit() calls.
|
||||||
BitSet32 registersInUse = CallerSavedRegistersInUse();
|
BitSet32 registersInUse = CallerSavedRegistersInUse();
|
||||||
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
|
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
|
||||||
// get end tic
|
// get end tic
|
||||||
MOV(64, R(ABI_PARAM1), Imm64(reinterpret_cast<u64>(&b->ticStop)));
|
MOV(64, R(ABI_PARAM1), Imm64(reinterpret_cast<u64>(&b->profile_data.ticStop)));
|
||||||
ABI_CallFunction(QueryPerformanceCounter);
|
ABI_CallFunction(QueryPerformanceCounter);
|
||||||
// tic counter += (end tic - start tic)
|
// tic counter += (end tic - start tic)
|
||||||
MOV(64, R(RSCRATCH2), Imm64((u64)b));
|
MOV(64, R(RSCRATCH2), Imm64(reinterpret_cast<u64>(&b->profile_data)));
|
||||||
MOV(64, R(RSCRATCH), MDisp(RSCRATCH2, offsetof(struct JitBlock, ticStop)));
|
MOV(64, R(RSCRATCH), MDisp(RSCRATCH2, offsetof(JitBlock::ProfileData, ticStop)));
|
||||||
SUB(64, R(RSCRATCH), MDisp(RSCRATCH2, offsetof(struct JitBlock, ticStart)));
|
SUB(64, R(RSCRATCH), MDisp(RSCRATCH2, offsetof(JitBlock::ProfileData, ticStart)));
|
||||||
ADD(64, R(RSCRATCH), MDisp(RSCRATCH2, offsetof(struct JitBlock, ticCounter)));
|
ADD(64, R(RSCRATCH), MDisp(RSCRATCH2, offsetof(JitBlock::ProfileData, ticCounter)));
|
||||||
MOV(64, MDisp(RSCRATCH2, offsetof(struct JitBlock, ticCounter)), R(RSCRATCH));
|
ADD(64, MDisp(RSCRATCH2, offsetof(JitBlock::ProfileData, downcountCounter)),
|
||||||
|
Imm32(js.downcountAmount));
|
||||||
|
MOV(64, MDisp(RSCRATCH2, offsetof(JitBlock::ProfileData, ticCounter)), R(RSCRATCH));
|
||||||
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
|
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
|
||||||
}
|
}
|
||||||
js.isLastInstruction = true;
|
js.isLastInstruction = true;
|
||||||
|
@ -520,10 +520,10 @@ void JitArm64::EmitResetCycleCounters()
|
|||||||
const u32 PMCR_EL0_P = 2;
|
const u32 PMCR_EL0_P = 2;
|
||||||
const u32 PMCR_EL0_C = 4;
|
const u32 PMCR_EL0_C = 4;
|
||||||
const u32 PMCR_EL0_LC = 0x40;
|
const u32 PMCR_EL0_LC = 0x40;
|
||||||
_MSR(FIELD_PMCR_EL0, X0);
|
_MSR(FIELD_PMCR_EL0, X10);
|
||||||
MOVI2R(X1, PMCR_EL0_E | PMCR_EL0_P | PMCR_EL0_C | PMCR_EL0_LC);
|
MOVI2R(X11, PMCR_EL0_E | PMCR_EL0_P | PMCR_EL0_C | PMCR_EL0_LC);
|
||||||
ORR(X0, X0, X1);
|
ORR(X10, X10, X11);
|
||||||
MRS(X0, FIELD_PMCR_EL0);
|
MRS(X10, FIELD_PMCR_EL0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64::EmitGetCycles(Arm64Gen::ARM64Reg reg)
|
void JitArm64::EmitGetCycles(Arm64Gen::ARM64Reg reg)
|
||||||
@ -533,47 +533,54 @@ void JitArm64::EmitGetCycles(Arm64Gen::ARM64Reg reg)
|
|||||||
|
|
||||||
void JitArm64::BeginTimeProfile(JitBlock* b)
|
void JitArm64::BeginTimeProfile(JitBlock* b)
|
||||||
{
|
{
|
||||||
b->ticCounter = 0;
|
MOVP2R(X0, &b->profile_data);
|
||||||
b->ticStart = 0;
|
LDR(INDEX_UNSIGNED, X1, X0, offsetof(JitBlock::ProfileData, runCount));
|
||||||
b->ticStop = 0;
|
ADD(X1, X1, 1);
|
||||||
|
|
||||||
if (m_supports_cycle_counter)
|
if (m_supports_cycle_counter)
|
||||||
{
|
{
|
||||||
EmitResetCycleCounters();
|
EmitResetCycleCounters();
|
||||||
EmitGetCycles(X1);
|
EmitGetCycles(X2);
|
||||||
MOVP2R(X0, &b->ticStart);
|
|
||||||
STR(INDEX_UNSIGNED, X1, X0, 0);
|
// stores runCount and ticStart
|
||||||
|
STP(INDEX_UNSIGNED, X1, X2, X0, offsetof(JitBlock::ProfileData, runCount));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
STR(INDEX_UNSIGNED, X1, X0, offsetof(JitBlock::ProfileData, runCount));
|
||||||
|
|
||||||
MOVP2R(X1, &QueryPerformanceCounter);
|
MOVP2R(X1, &QueryPerformanceCounter);
|
||||||
MOVP2R(X0, &b->ticStart);
|
ADD(X0, X0, offsetof(JitBlock::ProfileData, ticStart));
|
||||||
BLR(X1);
|
BLR(X1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64::EndTimeProfile(JitBlock* b)
|
void JitArm64::EndTimeProfile(JitBlock* b)
|
||||||
{
|
{
|
||||||
|
MOVP2R(X20, &b->profile_data);
|
||||||
if (m_supports_cycle_counter)
|
if (m_supports_cycle_counter)
|
||||||
{
|
{
|
||||||
EmitGetCycles(X2);
|
EmitGetCycles(X2);
|
||||||
MOVP2R(X0, &b->ticStart);
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
MOVP2R(X1, &QueryPerformanceCounter);
|
MOVP2R(X1, &QueryPerformanceCounter);
|
||||||
MOVP2R(X0, &b->ticStop);
|
ADD(X0, X20, offsetof(JitBlock::ProfileData, ticStop));
|
||||||
BLR(X1);
|
BLR(X1);
|
||||||
|
|
||||||
MOVP2R(X0, &b->ticStart);
|
LDR(INDEX_UNSIGNED, X2, X20, offsetof(JitBlock::ProfileData, ticStop));
|
||||||
LDR(INDEX_UNSIGNED, X2, X0, 8); // Stop
|
|
||||||
}
|
}
|
||||||
|
|
||||||
LDR(INDEX_UNSIGNED, X1, X0, 0); // Start
|
LDR(INDEX_UNSIGNED, X1, X20, offsetof(JitBlock::ProfileData, ticStart));
|
||||||
LDR(INDEX_UNSIGNED, X3, X0, 16); // Counter
|
|
||||||
|
// loads ticCounter and downcountCounter
|
||||||
|
LDP(INDEX_UNSIGNED, X3, X4, X20, offsetof(JitBlock::ProfileData, ticCounter));
|
||||||
SUB(X2, X2, X1);
|
SUB(X2, X2, X1);
|
||||||
ADD(X3, X3, X2);
|
ADD(X3, X3, X2);
|
||||||
STR(INDEX_UNSIGNED, X3, X0, 16);
|
ADDI2R(X4, X4, js.downcountAmount);
|
||||||
|
|
||||||
|
// stores ticCounter and downcountCounter
|
||||||
|
STP(INDEX_UNSIGNED, X3, X4, X20, offsetof(JitBlock::ProfileData, ticCounter));
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64::Run()
|
void JitArm64::Run()
|
||||||
@ -657,7 +664,6 @@ void JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBlock*
|
|||||||
|
|
||||||
const u8* start = GetCodePtr();
|
const u8* start = GetCodePtr();
|
||||||
b->checkedEntry = start;
|
b->checkedEntry = start;
|
||||||
b->runCount = 0;
|
|
||||||
|
|
||||||
// Downcount flag check, Only valid for linked blocks
|
// Downcount flag check, Only valid for linked blocks
|
||||||
{
|
{
|
||||||
@ -673,15 +679,6 @@ void JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBlock*
|
|||||||
// Conditionally add profiling code.
|
// Conditionally add profiling code.
|
||||||
if (Profiler::g_ProfileBlocks)
|
if (Profiler::g_ProfileBlocks)
|
||||||
{
|
{
|
||||||
ARM64Reg WA = gpr.GetReg();
|
|
||||||
ARM64Reg WB = gpr.GetReg();
|
|
||||||
ARM64Reg XA = EncodeRegTo64(WA);
|
|
||||||
ARM64Reg XB = EncodeRegTo64(WB);
|
|
||||||
MOVP2R(XA, &b->runCount);
|
|
||||||
LDR(INDEX_UNSIGNED, XB, XA, 0);
|
|
||||||
ADD(XB, XB, 1);
|
|
||||||
STR(INDEX_UNSIGNED, XB, XA, 0);
|
|
||||||
gpr.Unlock(WA, WB);
|
|
||||||
// get start tic
|
// get start tic
|
||||||
BeginTimeProfile(b);
|
BeginTimeProfile(b);
|
||||||
}
|
}
|
||||||
|
@ -49,7 +49,6 @@ struct JitBlock
|
|||||||
// The number of PPC instructions represented by this block. Mostly
|
// The number of PPC instructions represented by this block. Mostly
|
||||||
// useful for logging.
|
// useful for logging.
|
||||||
u32 originalSize;
|
u32 originalSize;
|
||||||
int runCount; // for profiling.
|
|
||||||
|
|
||||||
// Information about exits to a known address from this block.
|
// Information about exits to a known address from this block.
|
||||||
// This is used to implement block linking.
|
// This is used to implement block linking.
|
||||||
@ -65,11 +64,15 @@ struct JitBlock
|
|||||||
// This set stores all physical addresses of all occupied instructions.
|
// This set stores all physical addresses of all occupied instructions.
|
||||||
std::set<u32> physical_addresses;
|
std::set<u32> physical_addresses;
|
||||||
|
|
||||||
// we don't really need to save start and stop
|
// Block profiling data, structure is inlined in Jit.cpp
|
||||||
// TODO (mb2): ticStart and ticStop -> "local var" mean "in block" ... low priority ;)
|
struct ProfileData
|
||||||
u64 ticStart; // for profiling - time.
|
{
|
||||||
u64 ticStop; // for profiling - time.
|
u64 ticCounter;
|
||||||
u64 ticCounter; // for profiling - time.
|
u64 downcountCounter;
|
||||||
|
u64 runCount;
|
||||||
|
u64 ticStart;
|
||||||
|
u64 ticStop;
|
||||||
|
} profile_data = {};
|
||||||
|
|
||||||
// This tracks the position if this block within the fast block cache.
|
// This tracks the position if this block within the fast block cache.
|
||||||
// We allow each block to have only one map entry.
|
// We allow each block to have only one map entry.
|
||||||
|
@ -119,12 +119,12 @@ void GetProfileResults(ProfileStats* prof_stats)
|
|||||||
|
|
||||||
QueryPerformanceFrequency((LARGE_INTEGER*)&prof_stats->countsPerSec);
|
QueryPerformanceFrequency((LARGE_INTEGER*)&prof_stats->countsPerSec);
|
||||||
g_jit->GetBlockCache()->RunOnBlocks([&prof_stats](const JitBlock& block) {
|
g_jit->GetBlockCache()->RunOnBlocks([&prof_stats](const JitBlock& block) {
|
||||||
// Rough heuristic. Mem instructions should cost more.
|
const auto& data = block.profile_data;
|
||||||
u64 cost = block.originalSize * (block.runCount / 4);
|
u64 cost = data.downcountCounter;
|
||||||
u64 timecost = block.ticCounter;
|
u64 timecost = data.ticCounter;
|
||||||
// Todo: tweak.
|
// Todo: tweak.
|
||||||
if (block.runCount >= 1)
|
if (data.runCount >= 1)
|
||||||
prof_stats->block_stats.emplace_back(block.effectiveAddress, cost, timecost, block.runCount,
|
prof_stats->block_stats.emplace_back(block.effectiveAddress, cost, timecost, data.runCount,
|
||||||
block.codeSize);
|
block.codeSize);
|
||||||
prof_stats->cost_sum += cost;
|
prof_stats->cost_sum += cost;
|
||||||
prof_stats->timecost_sum += timecost;
|
prof_stats->timecost_sum += timecost;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user