JitCommon: Restructure the profiler calls.

This commit is contained in:
degasus 2017-08-12 22:18:22 +02:00
parent 95ce860265
commit 958b75b707
5 changed files with 52 additions and 55 deletions

View File

@ -200,7 +200,6 @@ void CachedInterpreter::Jit(u32 address)
b->checkedEntry = GetCodePtr(); b->checkedEntry = GetCodePtr();
b->normalEntry = GetCodePtr(); b->normalEntry = GetCodePtr();
b->runCount = 0;
for (u32 i = 0; i < code_block.m_num_instructions; i++) for (u32 i = 0; i < code_block.m_num_instructions; i++)
{ {

View File

@ -628,7 +628,6 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBloc
const u8* start = const u8* start =
AlignCode4(); // TODO: Test if this or AlignCode16 make a difference from GetCodePtr AlignCode4(); // TODO: Test if this or AlignCode16 make a difference from GetCodePtr
b->checkedEntry = start; b->checkedEntry = start;
b->runCount = 0;
// Downcount flag check. The last block decremented downcounter, and the flag should still be // Downcount flag check. The last block decremented downcounter, and the flag should still be
// available. // available.
@ -649,16 +648,13 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBloc
} }
// Conditionally add profiling code. // Conditionally add profiling code.
b->ticCounter = 0;
b->ticStart = 0;
b->ticStop = 0;
if (Profiler::g_ProfileBlocks) if (Profiler::g_ProfileBlocks)
{ {
MOV(64, R(RSCRATCH), ImmPtr(&b->runCount));
ADD(32, MatR(RSCRATCH), Imm8(1));
// get start tic // get start tic
MOV(64, R(ABI_PARAM1), Imm64(reinterpret_cast<u64>(&b->ticStart))); MOV(64, R(ABI_PARAM1), Imm64(reinterpret_cast<u64>(&b->profile_data.ticStart)));
int offset = static_cast<int>(offsetof(JitBlock::ProfileData, runCount)) -
static_cast<int>(offsetof(JitBlock::ProfileData, ticStart));
ADD(64, MDisp(ABI_PARAM1, offset), Imm8(1));
ABI_CallFunction(QueryPerformanceCounter); ABI_CallFunction(QueryPerformanceCounter);
} }
#if defined(_DEBUG) || defined(DEBUGFAST) || defined(NAN_CHECK) #if defined(_DEBUG) || defined(DEBUGFAST) || defined(NAN_CHECK)
@ -736,18 +732,20 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBloc
{ {
if (Profiler::g_ProfileBlocks) if (Profiler::g_ProfileBlocks)
{ {
// WARNING - cmp->branch merging will screw this up. // TODO: Move this to WriteExit() calls.
BitSet32 registersInUse = CallerSavedRegistersInUse(); BitSet32 registersInUse = CallerSavedRegistersInUse();
ABI_PushRegistersAndAdjustStack(registersInUse, 0); ABI_PushRegistersAndAdjustStack(registersInUse, 0);
// get end tic // get end tic
MOV(64, R(ABI_PARAM1), Imm64(reinterpret_cast<u64>(&b->ticStop))); MOV(64, R(ABI_PARAM1), Imm64(reinterpret_cast<u64>(&b->profile_data.ticStop)));
ABI_CallFunction(QueryPerformanceCounter); ABI_CallFunction(QueryPerformanceCounter);
// tic counter += (end tic - start tic) // tic counter += (end tic - start tic)
MOV(64, R(RSCRATCH2), Imm64((u64)b)); MOV(64, R(RSCRATCH2), Imm64(reinterpret_cast<u64>(&b->profile_data)));
MOV(64, R(RSCRATCH), MDisp(RSCRATCH2, offsetof(struct JitBlock, ticStop))); MOV(64, R(RSCRATCH), MDisp(RSCRATCH2, offsetof(JitBlock::ProfileData, ticStop)));
SUB(64, R(RSCRATCH), MDisp(RSCRATCH2, offsetof(struct JitBlock, ticStart))); SUB(64, R(RSCRATCH), MDisp(RSCRATCH2, offsetof(JitBlock::ProfileData, ticStart)));
ADD(64, R(RSCRATCH), MDisp(RSCRATCH2, offsetof(struct JitBlock, ticCounter))); ADD(64, R(RSCRATCH), MDisp(RSCRATCH2, offsetof(JitBlock::ProfileData, ticCounter)));
MOV(64, MDisp(RSCRATCH2, offsetof(struct JitBlock, ticCounter)), R(RSCRATCH)); ADD(64, MDisp(RSCRATCH2, offsetof(JitBlock::ProfileData, downcountCounter)),
Imm32(js.downcountAmount));
MOV(64, MDisp(RSCRATCH2, offsetof(JitBlock::ProfileData, ticCounter)), R(RSCRATCH));
ABI_PopRegistersAndAdjustStack(registersInUse, 0); ABI_PopRegistersAndAdjustStack(registersInUse, 0);
} }
js.isLastInstruction = true; js.isLastInstruction = true;

View File

@ -520,10 +520,10 @@ void JitArm64::EmitResetCycleCounters()
const u32 PMCR_EL0_P = 2; const u32 PMCR_EL0_P = 2;
const u32 PMCR_EL0_C = 4; const u32 PMCR_EL0_C = 4;
const u32 PMCR_EL0_LC = 0x40; const u32 PMCR_EL0_LC = 0x40;
_MSR(FIELD_PMCR_EL0, X0); _MSR(FIELD_PMCR_EL0, X10);
MOVI2R(X1, PMCR_EL0_E | PMCR_EL0_P | PMCR_EL0_C | PMCR_EL0_LC); MOVI2R(X11, PMCR_EL0_E | PMCR_EL0_P | PMCR_EL0_C | PMCR_EL0_LC);
ORR(X0, X0, X1); ORR(X10, X10, X11);
MRS(X0, FIELD_PMCR_EL0); MRS(X10, FIELD_PMCR_EL0);
} }
void JitArm64::EmitGetCycles(Arm64Gen::ARM64Reg reg) void JitArm64::EmitGetCycles(Arm64Gen::ARM64Reg reg)
@ -533,47 +533,54 @@ void JitArm64::EmitGetCycles(Arm64Gen::ARM64Reg reg)
void JitArm64::BeginTimeProfile(JitBlock* b) void JitArm64::BeginTimeProfile(JitBlock* b)
{ {
b->ticCounter = 0; MOVP2R(X0, &b->profile_data);
b->ticStart = 0; LDR(INDEX_UNSIGNED, X1, X0, offsetof(JitBlock::ProfileData, runCount));
b->ticStop = 0; ADD(X1, X1, 1);
if (m_supports_cycle_counter) if (m_supports_cycle_counter)
{ {
EmitResetCycleCounters(); EmitResetCycleCounters();
EmitGetCycles(X1); EmitGetCycles(X2);
MOVP2R(X0, &b->ticStart);
STR(INDEX_UNSIGNED, X1, X0, 0); // stores runCount and ticStart
STP(INDEX_UNSIGNED, X1, X2, X0, offsetof(JitBlock::ProfileData, runCount));
} }
else else
{ {
STR(INDEX_UNSIGNED, X1, X0, offsetof(JitBlock::ProfileData, runCount));
MOVP2R(X1, &QueryPerformanceCounter); MOVP2R(X1, &QueryPerformanceCounter);
MOVP2R(X0, &b->ticStart); ADD(X0, X0, offsetof(JitBlock::ProfileData, ticStart));
BLR(X1); BLR(X1);
} }
} }
void JitArm64::EndTimeProfile(JitBlock* b) void JitArm64::EndTimeProfile(JitBlock* b)
{ {
MOVP2R(X20, &b->profile_data);
if (m_supports_cycle_counter) if (m_supports_cycle_counter)
{ {
EmitGetCycles(X2); EmitGetCycles(X2);
MOVP2R(X0, &b->ticStart);
} }
else else
{ {
MOVP2R(X1, &QueryPerformanceCounter); MOVP2R(X1, &QueryPerformanceCounter);
MOVP2R(X0, &b->ticStop); ADD(X0, X20, offsetof(JitBlock::ProfileData, ticStop));
BLR(X1); BLR(X1);
MOVP2R(X0, &b->ticStart); LDR(INDEX_UNSIGNED, X2, X20, offsetof(JitBlock::ProfileData, ticStop));
LDR(INDEX_UNSIGNED, X2, X0, 8); // Stop
} }
LDR(INDEX_UNSIGNED, X1, X0, 0); // Start LDR(INDEX_UNSIGNED, X1, X20, offsetof(JitBlock::ProfileData, ticStart));
LDR(INDEX_UNSIGNED, X3, X0, 16); // Counter
// loads ticCounter and downcountCounter
LDP(INDEX_UNSIGNED, X3, X4, X20, offsetof(JitBlock::ProfileData, ticCounter));
SUB(X2, X2, X1); SUB(X2, X2, X1);
ADD(X3, X3, X2); ADD(X3, X3, X2);
STR(INDEX_UNSIGNED, X3, X0, 16); ADDI2R(X4, X4, js.downcountAmount);
// stores ticCounter and downcountCounter
STP(INDEX_UNSIGNED, X3, X4, X20, offsetof(JitBlock::ProfileData, ticCounter));
} }
void JitArm64::Run() void JitArm64::Run()
@ -657,7 +664,6 @@ void JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBlock*
const u8* start = GetCodePtr(); const u8* start = GetCodePtr();
b->checkedEntry = start; b->checkedEntry = start;
b->runCount = 0;
// Downcount flag check, Only valid for linked blocks // Downcount flag check, Only valid for linked blocks
{ {
@ -673,15 +679,6 @@ void JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBlock*
// Conditionally add profiling code. // Conditionally add profiling code.
if (Profiler::g_ProfileBlocks) if (Profiler::g_ProfileBlocks)
{ {
ARM64Reg WA = gpr.GetReg();
ARM64Reg WB = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA);
ARM64Reg XB = EncodeRegTo64(WB);
MOVP2R(XA, &b->runCount);
LDR(INDEX_UNSIGNED, XB, XA, 0);
ADD(XB, XB, 1);
STR(INDEX_UNSIGNED, XB, XA, 0);
gpr.Unlock(WA, WB);
// get start tic // get start tic
BeginTimeProfile(b); BeginTimeProfile(b);
} }

View File

@ -49,7 +49,6 @@ struct JitBlock
// The number of PPC instructions represented by this block. Mostly // The number of PPC instructions represented by this block. Mostly
// useful for logging. // useful for logging.
u32 originalSize; u32 originalSize;
int runCount; // for profiling.
// Information about exits to a known address from this block. // Information about exits to a known address from this block.
// This is used to implement block linking. // This is used to implement block linking.
@ -65,11 +64,15 @@ struct JitBlock
// This set stores all physical addresses of all occupied instructions. // This set stores all physical addresses of all occupied instructions.
std::set<u32> physical_addresses; std::set<u32> physical_addresses;
// we don't really need to save start and stop // Block profiling data, structure is inlined in Jit.cpp
// TODO (mb2): ticStart and ticStop -> "local var" mean "in block" ... low priority ;) struct ProfileData
u64 ticStart; // for profiling - time. {
u64 ticStop; // for profiling - time. u64 ticCounter;
u64 ticCounter; // for profiling - time. u64 downcountCounter;
u64 runCount;
u64 ticStart;
u64 ticStop;
} profile_data = {};
// This tracks the position if this block within the fast block cache. // This tracks the position if this block within the fast block cache.
// We allow each block to have only one map entry. // We allow each block to have only one map entry.

View File

@ -119,12 +119,12 @@ void GetProfileResults(ProfileStats* prof_stats)
QueryPerformanceFrequency((LARGE_INTEGER*)&prof_stats->countsPerSec); QueryPerformanceFrequency((LARGE_INTEGER*)&prof_stats->countsPerSec);
g_jit->GetBlockCache()->RunOnBlocks([&prof_stats](const JitBlock& block) { g_jit->GetBlockCache()->RunOnBlocks([&prof_stats](const JitBlock& block) {
// Rough heuristic. Mem instructions should cost more. const auto& data = block.profile_data;
u64 cost = block.originalSize * (block.runCount / 4); u64 cost = data.downcountCounter;
u64 timecost = block.ticCounter; u64 timecost = data.ticCounter;
// Todo: tweak. // Todo: tweak.
if (block.runCount >= 1) if (data.runCount >= 1)
prof_stats->block_stats.emplace_back(block.effectiveAddress, cost, timecost, block.runCount, prof_stats->block_stats.emplace_back(block.effectiveAddress, cost, timecost, data.runCount,
block.codeSize); block.codeSize);
prof_stats->cost_sum += cost; prof_stats->cost_sum += cost;
prof_stats->timecost_sum += timecost; prof_stats->timecost_sum += timecost;