16 #ifndef CPUCOUNTERS_HEADER
17 #define CPUCOUNTERS_HEADER
25 #define INTEL_PCM_VERSION "V2.8 ($Format:%ci ID=%h$)"
42 #include <linux/perf_event.h>
43 #include <sys/syscall.h>
45 #define PCM_PERF_COUNT_HW_REF_CPU_CYCLES (9)
50 #include <semaphore.h>
51 #include <sys/types.h>
84 uint32 num_imc_channels;
88 std::vector<uint64> qpi_speed;
90 uint32 MCX_CHY_REGISTER_DEV_ADDR[2][4];
91 uint32 MCX_CHY_REGISTER_FUNC_ADDR[2][4];
92 uint32 QPI_PORTX_REGISTER_DEV_ADDR[3];
93 uint32 QPI_PORTX_REGISTER_FUNC_ADDR[3];
95 static std::vector<std::pair<uint32,uint32> > socket2bus;
96 void initSocket2Bus();
100 PciHandleM * createIntelPerfMonDevice(uint32 groupnr, uint32 bus, uint32 dev, uint32 func,
bool checkVendor =
false);
166 return qpi_speed.empty() ? 0 : qpi_speed[linkNr];
172 std::cerr.precision(1);
173 std::cerr << std::fixed;
174 for (uint32 i=0; i<qpi_speed.size(); ++i)
175 std::cerr <<
"Max QPI link " << i <<
" speed: " << qpi_speed[i] / (1e9) <<
" GBytes/second (" << qpi_speed[i] / (2e9) <<
" GT/second)" << std::endl;
197 #ifndef HACK_TO_REMOVE_DUPLICATE_ERROR
198 template class INTELPCM_API std::allocator<TopologyEntry>;
199 template class INTELPCM_API std::vector<TopologyEntry>;
200 template class INTELPCM_API std::allocator<CounterWidthExtender*>;
201 template class INTELPCM_API std::vector<CounterWidthExtender*>;
202 template class INTELPCM_API std::allocator<uint32>;
203 template class INTELPCM_API std::vector<uint32>;
204 template class INTELPCM_API std::allocator<char>;
219 int32 cpu_model, original_cpu_model;
220 int32 threads_per_core;
223 int32 num_phys_cores_per_socket;
224 int32 num_online_cores;
225 uint32 core_gen_counter_num_max;
226 uint32 core_gen_counter_num_used;
227 uint32 core_gen_counter_width;
228 uint32 core_fixed_counter_num_max;
229 uint32 core_fixed_counter_num_used;
230 uint32 core_fixed_counter_width;
231 uint32 uncore_gen_counter_num_max;
232 uint32 uncore_gen_counter_num_used;
233 uint32 uncore_gen_counter_width;
234 uint32 uncore_fixed_counter_num_max;
235 uint32 uncore_fixed_counter_num_used;
236 uint32 uncore_fixed_counter_width;
237 int32 perfmon_version;
238 int32 perfmon_config_anythread;
239 uint64 nominal_frequency;
240 uint64 max_qpi_speed;
241 uint32 L3ScalingFactor;
242 int32 pkgThermalSpecPower, pkgMinimumPower, pkgMaximumPower;
244 std::vector<TopologyEntry> topology;
245 std::string errorMessage;
247 static PCM * instance;
248 bool allow_multiple_instances;
252 uint32 PCU_MSR_PMON_BOX_CTL_ADDR, PCU_MSR_PMON_CTRX_ADDR[4];
253 double joulesPerEnergyUnit;
254 std::vector<CounterWidthExtender*> snb_energy_status;
255 std::vector<CounterWidthExtender*> jkt_dram_energy_status;
263 bool disable_JKT_workaround;
266 uint64 * coreCStateMsr;
267 uint64 * pkgCStateMsr;
270 enum { MAX_C_STATE = 10 };
275 if (state == 0 || state == 1)
278 return (coreCStateMsr != NULL && state <= MAX_C_STATE && coreCStateMsr[state] != 0);
284 return (pkgCStateMsr != NULL && state <= MAX_C_STATE && pkgCStateMsr[state] != 0);
288 void setOutput(
const std::string filename);
305 bool isBlocked(
void) {
return blocked; }
306 void setBlocked(
const bool new_blocked) { blocked = new_blocked; }
311 allow_multiple_instances =
true;
339 int32 event_number, umask_value;
356 uint64 OffcoreResponseMsrValue[2];
364 HANDLE numInstancesSemaphore;
367 sem_t * numInstancesSemaphore;
370 std::vector<int32> socketRefCore;
374 std::vector< std::vector<int> > perfEventHandle;
375 void readPerfData(uint32 core, std::vector<uint64> & data);
378 PERF_INST_RETIRED_ANY_POS = 0,
379 PERF_CPU_CLK_UNHALTED_THREAD_POS = 1,
380 PERF_CPU_CLK_UNHALTED_REF_POS = 2,
381 PERF_GEN_EVENT_0_POS = 3,
382 PERF_GEN_EVENT_1_POS = 4,
383 PERF_GEN_EVENT_2_POS = 5,
384 PERF_GEN_EVENT_3_POS = 6
388 PERF_GROUP_LEADER_COUNTER = PERF_INST_RETIRED_ANY_POS
391 std::ofstream *outfile;
392 std::streambuf *backup_ofile;
398 bool decrementInstanceSemaphore();
402 uint32 getNumInstances();
403 uint32 decrementNumInstances();
404 uint32 incrementNumInstances();
408 void computeQPISpeedBeckton(
int core_nr);
410 void computeNominalFrequency();
411 static bool isCPUModelSupported(
int model_);
412 std::string getSupportedUarchCodenames()
const;
413 std::string getUnsupportedMessage()
const;
417 void initCStateSupportTables();
418 bool discoverSystemTopology();
419 void printSystemTopology()
const;
421 bool detectNominalFrequency();
422 void initEnergyMonitoring();
423 void initUncoreObjects();
429 void initL3CacheOccupancyMonitoring();
430 void programBecktonUncore(
int core);
431 void programNehalemEPUncore(
int core);
432 void enableJKTWorkaround(
bool enable);
433 template <
class CounterStateType>
434 void readAndAggregateUncoreMCCounters(
const uint32 socket, CounterStateType & counterState);
435 template <
class CounterStateType>
436 void readAndAggregateEnergyCounters(
const uint32 socket, CounterStateType & counterState);
437 template <
class CounterStateType>
438 void readPackageThermalHeadroom(
const uint32 socket, CounterStateType & counterState);
439 template <
class CounterStateType>
440 void readAndAggregatePackageCStateResidencies(
SafeMsrHandle * msr, CounterStateType & result);
442 void reportQPISpeed()
const;
444 uint32 CX_MSR_PMON_CTRY(uint32 Cbo, uint32 Ctr)
const;
445 uint32 CX_MSR_PMON_BOX_FILTER(uint32 Cbo)
const;
446 uint32 CX_MSR_PMON_BOX_FILTER1(uint32 Cbo)
const;
447 uint32 CX_MSR_PMON_CTLY(uint32 Cbo, uint32 Ctl)
const;
448 uint32 CX_MSR_PMON_BOX_CTL(uint32 Cbo)
const;
449 uint32 getMaxNumOfCBoxes()
const;
450 void programCboOpcodeFilter(
const uint32 opc,
const uint32 cbo,
SafeMsrHandle * msr);
652 END_OF_MODEL_LIST = 0x0ffff
668 return topology[core_id].socket;
680 if (num_sockets == 2)
690 return (server_pcicfg_uncore && server_pcicfg_uncore[0])?(server_pcicfg_uncore[0]->
getNumQPIPorts()):0;
710 return (server_pcicfg_uncore && server_pcicfg_uncore[0])?(server_pcicfg_uncore[0]->
getNumMC()):0;
730 return (server_pcicfg_uncore && server_pcicfg_uncore[0])?(server_pcicfg_uncore[0]->
getNumMCChannels()):0;
769 return 1000000000ULL;
778 uint64
getTickCount(uint64 multiplier = 1000 , uint32 core = 0);
791 {
return hasPCICFGUncore() ? server_pcicfg_uncore[socketNr]->
getQPILinkSpeed(linkNr) : max_qpi_speed; }
809 inline void disableJKTWorkaround() { disable_JKT_workaround =
true; }
839 void programPCIeCounters(
const PCIeEventCode event_,
const uint32 tid_ = 0,
const uint32 miss_ = 0);
840 void programPCIeMissCounters(
const PCIeEventCode event_,
const uint32 tid_ = 0);
847 uint64 extractCoreGenCounterValue(uint64 val);
848 uint64 extractCoreFixedCounterValue(uint64 val);
849 uint64 extractUncoreGenCounterValue(uint64 val);
850 uint64 extractUncoreFixedCounterValue(uint64 val);
851 uint64 extractL3CacheOccupancy(uint64 val);
860 bool packageEnergyMetricsAvailable()
const
863 cpu_model == PCM::JAKETOWN
864 || cpu_model == PCM::IVYTOWN
865 || cpu_model == PCM::SANDY_BRIDGE
866 || cpu_model == PCM::IVY_BRIDGE
867 || cpu_model == PCM::HASWELL
868 || original_cpu_model == PCM::ATOM_AVOTON
869 || cpu_model == PCM::HASWELLX
870 || cpu_model == PCM::BROADWELL
874 bool dramEnergyMetricsAvailable()
const
877 cpu_model == PCM::JAKETOWN
878 || cpu_model == PCM::IVYTOWN
879 || cpu_model == PCM::HASWELLX
883 bool packageThermalMetricsAvailable()
const
885 return packageEnergyMetricsAvailable();
888 bool outgoingQPITrafficMetricsAvailable()
const
891 cpu_model == PCM::NEHALEM_EX
892 || cpu_model == PCM::WESTMERE_EX
893 || cpu_model == PCM::JAKETOWN
894 || cpu_model == PCM::IVYTOWN
895 || cpu_model == PCM::HASWELLX
899 bool qpiUtilizationMetricsAvailable()
const
901 return outgoingQPITrafficMetricsAvailable();
904 bool memoryTrafficMetricsAvailable()
const
907 cpu_model == PCM::ATOM
908 || cpu_model == PCM::CLARKDALE
912 bool memoryIOTrafficMetricAvailable()
const
915 cpu_model == PCM::SANDY_BRIDGE
916 || cpu_model == PCM::IVY_BRIDGE
917 || cpu_model == PCM::HASWELL
918 || cpu_model == PCM::BROADWELL
922 bool hasBecktonUncore()
const
925 cpu_model == PCM::NEHALEM_EX
926 || cpu_model == PCM::WESTMERE_EX
929 bool hasPCICFGUncore() const
932 cpu_model == PCM::JAKETOWN
933 || cpu_model == PCM::IVYTOWN
934 || cpu_model == PCM::HASWELLX
947 template <
class CounterStateType>
948 friend double getExecUsage(
const CounterStateType & before,
const CounterStateType & after);
949 template <
class CounterStateType>
950 friend double getIPC(
const CounterStateType & before,
const CounterStateType & after);
951 template <
class CounterStateType>
952 friend double getAverageFrequency(
const CounterStateType & before,
const CounterStateType & after);
953 template <
class CounterStateType>
955 template <
class CounterStateType>
957 template <
class CounterStateType>
959 template <
class CounterStateType>
960 friend double getRelativeFrequency(
const CounterStateType & before,
const CounterStateType & after);
961 template <
class CounterStateType>
963 template <
class CounterStateType>
964 friend double getL2CacheHitRatio(
const CounterStateType & before,
const CounterStateType & after);
965 template <
class CounterStateType>
966 friend double getL3CacheHitRatio(
const CounterStateType & before,
const CounterStateType & after);
967 template <
class CounterStateType>
968 friend uint64
getL3CacheMisses(
const CounterStateType & before,
const CounterStateType & after);
969 template <
class CounterStateType>
970 friend uint64
getL2CacheMisses(
const CounterStateType & before,
const CounterStateType & after);
971 template <
class CounterStateType>
972 friend uint64
getL2CacheHits(
const CounterStateType & before,
const CounterStateType & after);
973 template <
class CounterStateType>
975 template <
class CounterStateType>
976 friend uint64
getCycles(
const CounterStateType & before,
const CounterStateType & after);
977 template <
class CounterStateType>
979 template <
class CounterStateType>
980 friend uint64
getCycles(
const CounterStateType & now);
981 template <
class CounterStateType>
983 template <
class CounterStateType>
984 friend uint64
getL3CacheHitsNoSnoop(
const CounterStateType & before,
const CounterStateType & after);
985 template <
class CounterStateType>
986 friend uint64
getL3CacheHitsSnoop(
const CounterStateType & before,
const CounterStateType & after);
987 template <
class CounterStateType>
988 friend uint64
getL3CacheHits(
const CounterStateType & before,
const CounterStateType & after);
989 template <
class CounterStateType>
990 friend uint64
getNumberOfCustomEvents(int32 eventCounterNr,
const CounterStateType & before,
const CounterStateType & after);
991 template <
class CounterStateType>
992 friend uint64
getInvariantTSC(
const CounterStateType & before,
const CounterStateType & after);
993 template <
class CounterStateType>
994 friend uint64
getRefCycles(
const CounterStateType & before,
const CounterStateType & after);
995 template <
class CounterStateType>
996 friend double getCoreCStateResidency(
int state,
const CounterStateType & before,
const CounterStateType & after);
998 uint64 InstRetiredAny;
999 uint64 CpuClkUnhaltedThread;
1000 uint64 CpuClkUnhaltedRef;
1008 uint64 L3UnsharedHit;
1020 uint64 InvariantTSC;
1021 uint64 CStateResidency[PCM::MAX_C_STATE + 1];
1022 int32 ThermalHeadroom;
1028 , CpuClkUnhaltedThread(0)
1029 , CpuClkUnhaltedRef(0)
1035 , ThermalHeadroom(PCM_INVALID_THERMAL_HEADROOM)
1038 memset(&(CStateResidency[0]), 0,
sizeof(CStateResidency));
1044 InstRetiredAny += o.InstRetiredAny;
1045 CpuClkUnhaltedThread += o.CpuClkUnhaltedThread;
1046 CpuClkUnhaltedRef += o.CpuClkUnhaltedRef;
1051 InvariantTSC += o.InvariantTSC;
1052 for(
int i=0; i <= PCM::MAX_C_STATE ;++i)
1053 CStateResidency[i] += o.CStateResidency[i];
1055 L3Occupancy += o.L3Occupancy;
1068 template <
class CounterStateType>
1069 uint64
getQPIClocks(uint32 port,
const CounterStateType & before,
const CounterStateType & after)
1071 return after.QPIClocks[port] - before.QPIClocks[port];
1075 template <
class CounterStateType>
1076 int32 getThermalHeadroom(
const CounterStateType & ,
const CounterStateType & after)
1078 return after.getThermalHeadroom();
1086 template <
class CounterStateType>
1087 uint64
getQPIL0pTxCycles(uint32 port,
const CounterStateType & before,
const CounterStateType & after)
1089 return after.QPIL0pTxCycles[port] - before.QPIL0pTxCycles[port];
1097 template <
class CounterStateType>
1098 uint64
getQPIL1Cycles(uint32 port,
const CounterStateType & before,
const CounterStateType & after)
1100 return after.QPIL1Cycles[port] - before.QPIL1Cycles[port];
1109 template <
class CounterStateType>
1121 template <
class CounterStateType>
1132 template <
class CounterStateType>
1133 uint64
getDRAMClocks(uint32 channel,
const CounterStateType & before,
const CounterStateType & after)
1135 return after.DRAMClocks[channel] - before.DRAMClocks[channel];
1144 template <
class CounterStateType>
1145 uint64
getMCCounter(uint32 channel, uint32 counter,
const CounterStateType & before,
const CounterStateType & after)
1147 return after.MCCounter[channel][counter] - before.MCCounter[channel][counter];
1155 template <
class CounterStateType>
1156 uint64
getPCUCounter(uint32 counter,
const CounterStateType & before,
const CounterStateType & after)
1158 return after.PCUCounter[counter] - before.PCUCounter[counter];
1165 template <
class CounterStateType>
1166 uint64
getPCUClocks(
const CounterStateType & before,
const CounterStateType & after)
1175 template <
class CounterStateType>
1178 return after.PackageEnergyStatus - before.PackageEnergyStatus;
1185 template <
class CounterStateType>
1188 return after.DRAMEnergyStatus - before.DRAMEnergyStatus;
1195 template <
class CounterStateType>
1208 template <
class CounterStateType>
1213 double dram_joules_per_energy_unit;
1221 dram_joules_per_energy_unit=0.0000153;
1238 template <
class CounterStateType>
1239 friend uint64
getBytesReadFromMC(
const CounterStateType & before,
const CounterStateType & after);
1240 template <
class CounterStateType>
1241 friend uint64
getBytesWrittenToMC(
const CounterStateType & before,
const CounterStateType & after);
1242 template <
class CounterStateType>
1244 template <
class CounterStateType>
1245 friend uint64
getConsumedEnergy(
const CounterStateType & before,
const CounterStateType & after);
1246 template <
class CounterStateType>
1247 friend uint64
getDRAMConsumedEnergy(
const CounterStateType & before,
const CounterStateType & after);
1248 template <
class CounterStateType>
1251 uint64 UncMCFullWrites;
1252 uint64 UncMCNormalReads;
1253 uint64 UncMCIORequests;
1254 uint64 PackageEnergyStatus;
1255 uint64 DRAMEnergyStatus;
1256 uint64 CStateResidency[PCM::MAX_C_STATE + 1];
1261 , UncMCNormalReads(0)
1262 , UncMCIORequests(0)
1263 , PackageEnergyStatus(0)
1264 , DRAMEnergyStatus(0)
1266 memset(&(CStateResidency[0]), 0,
sizeof(CStateResidency));
1272 UncMCFullWrites += o.UncMCFullWrites;
1273 UncMCNormalReads += o.UncMCNormalReads;
1274 UncMCIORequests += o.UncMCIORequests;
1275 PackageEnergyStatus += o.PackageEnergyStatus;
1276 DRAMEnergyStatus += o.DRAMEnergyStatus;
1277 for(
int i=0; i <= PCM::MAX_C_STATE ;++i)
1278 CStateResidency[i] += o.CStateResidency[i];
1288 uint64 QPIClocks[3], QPIL0pTxCycles[3], QPIL1Cycles[3];
1289 uint64 DRAMClocks[8];
1290 uint64 MCCounter[8][4];
1291 uint64 PCUCounter[4];
1292 int32 PackageThermalHeadroom;
1293 uint64 InvariantTSC;
1295 template <
class CounterStateType>
1296 friend uint64
getQPIClocks(uint32 port,
const CounterStateType & before,
const CounterStateType & after);
1297 template <
class CounterStateType>
1298 friend uint64
getQPIL0pTxCycles(uint32 port,
const CounterStateType & before,
const CounterStateType & after);
1299 template <
class CounterStateType>
1300 friend uint64
getQPIL1Cycles(uint32 port,
const CounterStateType & before,
const CounterStateType & after);
1301 template <
class CounterStateType>
1302 friend uint64
getDRAMClocks(uint32 channel,
const CounterStateType & before,
const CounterStateType & after);
1303 template <
class CounterStateType>
1304 friend uint64
getMCCounter(uint32 channel, uint32 counter,
const CounterStateType & before,
const CounterStateType & after);
1305 template <
class CounterStateType>
1306 friend uint64
getPCUCounter(uint32 counter,
const CounterStateType & before,
const CounterStateType & after);
1307 template <
class CounterStateType>
1308 friend uint64
getConsumedEnergy(
const CounterStateType & before,
const CounterStateType & after);
1309 template <
class CounterStateType>
1310 friend uint64
getDRAMConsumedEnergy(
const CounterStateType & before,
const CounterStateType & after);
1311 template <
class CounterStateType>
1312 friend uint64
getInvariantTSC(
const CounterStateType & before,
const CounterStateType & after);
1317 PackageThermalHeadroom(0)
1320 memset(&(QPIClocks[0]), 0, 3*
sizeof(uint64));
1321 memset(&(QPIL0pTxCycles[0]), 0, 3*
sizeof(uint64));
1322 memset(&(QPIL1Cycles[0]), 0, 3*
sizeof(uint64));
1323 memset(&(DRAMClocks[0]), 0, 8*
sizeof(uint64));
1324 memset(&(PCUCounter[0]), 0, 4*
sizeof(uint64));
1325 for(
int i=0;i<8;++i)
1326 memset(&(MCCounter[i][0]), 0, 4*
sizeof(uint64));
1346 BasicCounterState::readAndAggregate(handle);
1347 UncoreCounterState::readAndAggregate(handle);
1353 BasicCounterState::operator += (o);
1361 std::vector<std::vector<uint64> > incomingQPIPackets;
1362 std::vector<std::vector<uint64> > outgoingQPIIdleFlits;
1363 std::vector<std::vector<uint64> > outgoingQPIDataNonDataFlits;
1369 BasicCounterState::readAndAggregate(handle);
1370 UncoreCounterState::readAndAggregate(handle);
1395 BasicCounterState::operator += (o);
1396 UncoreCounterState::operator += (o);
1437 template <
class CounterStateType>
1438 double getIPC(
const CounterStateType & before,
const CounterStateType & after)
1440 int64 clocks = after.CpuClkUnhaltedThread - before.CpuClkUnhaltedThread;
1442 return double(after.InstRetiredAny - before.InstRetiredAny) / double(clocks);
1453 template <
class CounterStateType>
1456 return after.InstRetiredAny - before.InstRetiredAny;
1465 template <
class CounterStateType>
1466 double getExecUsage(
const CounterStateType & before,
const CounterStateType & after)
1468 int64 timer_clocks = after.InvariantTSC - before.InvariantTSC;
1469 if (timer_clocks != 0)
1470 return double(after.InstRetiredAny - before.InstRetiredAny) / double(timer_clocks);
1479 template <
class CounterStateType>
1482 return now.InstRetiredAny;
1502 template <
class CounterStateType>
1503 uint64
getCycles(
const CounterStateType & before,
const CounterStateType & after)
1505 return after.CpuClkUnhaltedThread - before.CpuClkUnhaltedThread;
1518 template <
class CounterStateType>
1519 uint64
getRefCycles(
const CounterStateType & before,
const CounterStateType & after)
1521 return after.CpuClkUnhaltedRef - before.CpuClkUnhaltedRef;
1531 template <
class CounterStateType>
1534 return now.CpuClkUnhaltedThread;
1547 double ipc =
getIPC(before, after);
1580 template <
class CounterStateType>
1583 int64 clocks = after.CpuClkUnhaltedThread - before.CpuClkUnhaltedThread;
1584 int64 timer_clocks = after.InvariantTSC - before.InvariantTSC;
1586 if (timer_clocks != 0 && m)
1597 template <
class CounterStateType>
1600 int64 clocks = after.CpuClkUnhaltedThread - before.CpuClkUnhaltedThread;
1601 int64 ref_clocks = after.CpuClkUnhaltedRef - before.CpuClkUnhaltedRef;
1603 if (ref_clocks != 0 && m)
1614 template <
class CounterStateType>
1617 int64 clocks = after.CpuClkUnhaltedThread - before.CpuClkUnhaltedThread;
1618 int64 timer_clocks = after.InvariantTSC - before.InvariantTSC;
1619 if (timer_clocks != 0)
1620 return double(clocks) / double(timer_clocks);
1630 template <
class CounterStateType>
1633 int64 clocks = after.CpuClkUnhaltedThread - before.CpuClkUnhaltedThread;
1634 int64 ref_clocks = after.CpuClkUnhaltedRef - before.CpuClkUnhaltedRef;
1635 if (ref_clocks != 0)
1636 return double(clocks) / double(ref_clocks);
1647 template <
class CounterStateType>
1651 int64 clocks = after.CpuClkUnhaltedThread - before.CpuClkUnhaltedThread;
1654 return 180. * double(after.L3Miss - before.L3Miss) / double(clocks);
1667 template <
class CounterStateType>
1671 int64 clocks = after.CpuClkUnhaltedThread - before.CpuClkUnhaltedThread;
1674 double L3UnsharedHit = (double)(after.L3UnsharedHit - before.L3UnsharedHit);
1675 double L2HitM = (double)(after.L2HitM - before.L2HitM);
1676 return (35. * L3UnsharedHit + 74. * L2HitM) / double(clocks);
1688 template <
class CounterStateType>
1693 uint64 L2Miss = after.ArchLLCMiss - before.ArchLLCMiss;
1694 uint64 L2Ref = after.ArchLLCRef - before.ArchLLCRef;
1695 if (L2Ref)
return 1. - (double(L2Miss) / double(L2Ref));
1698 uint64 L3Miss = after.L3Miss - before.L3Miss;
1699 uint64 L3UnsharedHit = after.L3UnsharedHit - before.L3UnsharedHit;
1700 uint64 L2HitM = after.L2HitM - before.L2HitM;
1701 uint64 L2Hit = after.L2Hit - before.L2Hit;
1702 uint64 hits = L2Hit;
1703 uint64 all = L2Hit + L2HitM + L3UnsharedHit + L3Miss;
1704 if (all)
return double(hits) / double(all);
1716 template <
class CounterStateType>
1721 uint64 L3Miss = after.L3Miss - before.L3Miss;
1722 uint64 L3UnsharedHit = after.L3UnsharedHit - before.L3UnsharedHit;
1723 uint64 L2HitM = after.L2HitM - before.L2HitM;
1724 uint64 hits = L3UnsharedHit + L2HitM;
1725 uint64 all = L2HitM + L3UnsharedHit + L3Miss;
1726 if (all)
return double(hits) / double(all);
1738 template <
class CounterStateType>
1742 return after.L3Miss - before.L3Miss;
1752 template <
class CounterStateType>
1757 return after.ArchLLCMiss - before.ArchLLCMiss;
1759 uint64 L3Miss = after.L3Miss - before.L3Miss;
1760 uint64 L3UnsharedHit = after.L3UnsharedHit - before.L3UnsharedHit;
1761 uint64 L2HitM = after.L2HitM - before.L2HitM;
1762 return L2HitM + L3UnsharedHit + L3Miss;
1772 template <
class CounterStateType>
1773 uint64
getL2CacheHits(
const CounterStateType & before,
const CounterStateType & after)
1777 uint64 L2Miss = after.ArchLLCMiss - before.ArchLLCMiss;
1778 uint64 L2Ref = after.ArchLLCRef - before.ArchLLCRef;
1779 return L2Ref - L2Miss;
1781 return after.L2Hit - before.L2Hit;
1787 template <
class CounterStateType>
1791 return now.L3Occupancy ;
1802 template <
class CounterStateType>
1806 return after.L3UnsharedHit - before.L3UnsharedHit;
1816 template <
class CounterStateType>
1820 return after.L2HitM - before.L2HitM;
1831 template <
class CounterStateType>
1832 uint64
getL3CacheHits(
const CounterStateType & before,
const CounterStateType & after)
1846 template <
class CounterStateType>
1849 return after.InvariantTSC - before.InvariantTSC;
1859 template <
class CounterStateType>
1864 if(state == 0)
return double(
getRefCycles(before,after))/tsc;
1869 double result = 1.0 - double(
getRefCycles(before,after))/tsc;
1870 for(
int i = 2; i <= PCM::MAX_C_STATE; ++i)
1872 result -= (after.BasicCounterState::CStateResidency[i] - before.BasicCounterState::CStateResidency[i])/tsc;
1874 if(result < 0.) result = 0.;
1875 else if(result > 1.) result = 1.;
1879 return (after.BasicCounterState::CStateResidency[state] - before.BasicCounterState::CStateResidency[state])/tsc;
1889 template <
class CounterStateType>
1892 return double(after.UncoreCounterState::CStateResidency[state] - before.UncoreCounterState::CStateResidency[state])/double(
getInvariantTSC(before,after));
1902 template <
class CounterStateType>
1905 return (after.UncMCNormalReads - before.UncMCNormalReads) * 64;
1914 template <
class CounterStateType>
1917 return (after.UncMCFullWrites - before.UncMCFullWrites) * 64;
1926 template <
class CounterStateType>
1929 return (after.UncMCIORequests - before.UncMCIORequests) * 64;
1941 template <
class CounterStateType>
1944 return ((&after.Event0)[eventCounterNr] - (&before.Event0)[eventCounterNr]);
1959 uint64 b = before.incomingQPIPackets[socketNr][linkNr];
1960 uint64 a = after.incomingQPIPackets[socketNr][linkNr];
1962 return (a > b) ? (64 * (a - b)) : 0;
1978 if (!(m->qpiUtilizationMetricsAvailable()))
return 0.;
1983 return bytes / max_bytes;
2000 if(m->hasBecktonUncore())
2002 const uint64 b = before.outgoingQPIIdleFlits[socketNr][linkNr];
2003 const uint64 a = after.outgoingQPIIdleFlits[socketNr][linkNr];
2005 const double idle_flits = (double)((a > b) ? (a - b) : 0);
2006 const uint64 bTSC = before.uncoreTSC;
2007 const uint64 aTSC = after.uncoreTSC;
2008 const double tsc = (double)((aTSC > bTSC) ? (aTSC - bTSC) : 0);
2009 if(idle_flits > tsc)
return 0.;
2011 return (1. - (idle_flits / tsc));
2012 }
else if(m->hasPCICFGUncore())
2014 const uint64 b = before.outgoingQPIDataNonDataFlits[socketNr][linkNr];
2015 const uint64 a = after.outgoingQPIDataNonDataFlits[socketNr][linkNr];
2017 const double flits = (double)((a > b) ? (a - b) : 0);
2019 if(flits > max_flits)
return 1.;
2020 return (flits / max_flits);
2039 if (!(m->outgoingQPITrafficMetricsAvailable()))
return 0;
2044 return (uint64)(max_bytes * util);
2063 for (uint32 s = 0; s < ns; ++s)
2064 for (uint32 q = 0; q < qpiLinks; ++q)
2085 for (uint32 s = 0; s < ns; ++s)
2086 for (uint32 q = 0; q < qpiLinks; ++q)
2104 return 64 * now.incomingQPIPackets[socketNr][linkNr];
2121 for (uint32 q = 0; q < qpiLinks; ++q)
2140 for (uint32 s = 0; s < ns; ++s)
2159 return double(totalQPI) / double(memTraffic);
2167 return after.data - before.data;
double getNormalizedQPIL1Cycles(uint32 port, const CounterStateType &before, const CounterStateType &after)
Returns the ratio of QPI cycles in power saving shutdown mode.
Definition: cpucounters.h:1122
uint64 getSocketIncomingQPILinkBytes(uint32 socketNr, const SystemCounterState &now)
Get estimation of total QPI data traffic for this socket.
Definition: cpucounters.h:2115
friend uint64 getInvariantTSC(const CounterStateType &before, const CounterStateType &after)
Computes number of invariant time stamp counter ticks.
Definition: cpucounters.h:1847
uint64 getQPIClocks(uint32 port, const CounterStateType &before, const CounterStateType &after)
Returns QPI LL clock ticks.
Definition: cpucounters.h:1069
friend uint64 getBytesWrittenToMC(const CounterStateType &before, const CounterStateType &after)
Computes number of bytes written to DRAM memory controllers.
Definition: cpucounters.h:1915
friend uint64 getBytesReadFromMC(const CounterStateType &before, const CounterStateType &after)
Computes number of bytes read from DRAM memory controllers.
Definition: cpucounters.h:1903
uint64 getAllOutgoingQPILinkBytes(const SystemCounterState &before, const SystemCounterState &after)
Get estimation of total QPI data+nondata traffic.
Definition: cpucounters.h:2078
void cleanup()
Cleanups resources and stops performance counting.
Definition: cpucounters.cpp:2474
Internal type and constant definitions.
friend uint64 getPCUCounter(uint32 counter, const CounterStateType &before, const CounterStateType &after)
Direct read of power control unit PMU counter (counter meaning depends on the programming: power/perf...
Definition: cpucounters.h:1156
uint32 getNumQPIPorts() const
Returns the number of detected QPI ports.
Definition: cpucounters.h:162
uint64 getQPILinkSpeed(const uint32 linkNr) const
Returns the speed of the QPI link.
Definition: cpucounters.h:165
uint64 getNumberOfEvents(PCIeCounterState before, PCIeCounterState after)
Returns the raw count of PCIe events.
Definition: cpucounters.h:2165
bool good()
Checks the status of PCM object.
Definition: cpucounters.cpp:1491
void setRunState(int new_state)
Set Run State.
Definition: cpucounters.h:297
Socket-wide counter state.
Definition: cpucounters.h:1339
friend uint64 getL3CacheHitsNoSnoop(const CounterStateType &before, const CounterStateType &after)
Computes number of L3 cache hits where no snooping in sibling L2 caches had to be done...
Definition: cpucounters.h:1803
uint64 getL3CacheMisses(const CounterStateType &before, const CounterStateType &after)
Computes number of L3 cache misses.
Definition: cpucounters.h:1739
void freezeServerUncoreCounters()
Freezes uncore event counting (works only on microarchitecture codename SandyBridge-EP and IvyTown) ...
Definition: cpucounters.cpp:2974
Interface to access client bandwidth counters.
uint64 getIncomingQPILinkBytes(uint32 socketNr, uint32 linkNr, const SystemCounterState &before, const SystemCounterState &after)
Get estimation of QPI data traffic per incoming QPI link.
Definition: cpucounters.h:1957
friend uint64 getL3CacheHitsSnoop(const CounterStateType &before, const CounterStateType &after)
Computes number of L3 cache hits where snooping in sibling L2 caches had to be done.
Definition: cpucounters.h:1817
void resetPMU()
Forces PMU reset.
Definition: cpucounters.cpp:2395
INTELPCM_API SocketCounterState getSocketCounterState(uint32 socket)
Reads the counter state of a socket.
Definition: cpucounters.cpp:2665
SupportedCPUModels
Identifiers of supported CPU models.
Definition: cpucounters.h:630
double getCoreIPC(const SystemCounterState &before, const SystemCounterState &after)
Computes average number of retired instructions per core cycle for the entire system combining instru...
Definition: cpucounters.h:1545
uint64 getL3CacheHits(const CounterStateType &before, const CounterStateType &after)
Computes total number of L3 cache hits.
Definition: cpucounters.h:1832
Definition: cpucounters.h:319
uint64 getL3CacheHitsSnoop(const CounterStateType &before, const CounterStateType &after)
Computes number of L3 cache hits where snooping in sibling L2 caches had to be done.
Definition: cpucounters.h:1817
Object to access uncore counters in a socket/processor with microarchitecture codename SandyBridge-EP...
Definition: cpucounters.h:80
uint64 getDRAMConsumedEnergy(const CounterStateType &before, const CounterStateType &after)
Returns energy consumed by DRAM (measured in internal units)
Definition: cpucounters.h:1186
friend double getIPC(const CounterStateType &before, const CounterStateType &after)
Computes average number of retired instructions per core cycle (IPC)
Definition: cpucounters.h:1438
void reportQPISpeed() const
Print QPI Speeds.
Definition: cpucounters.h:170
Definition: cpucounters.h:70
unsigned getMaxRMID() const
returns the max number of RMID supported by socket
Definition: cpucounters.cpp:423
uint64 getL3CacheOccupancy(const CounterStateType &now)
Computes L3 Cache Occupancy.
Definition: cpucounters.h:1788
Definition: cpucounters.h:318
friend double getActiveRelativeFrequency(const CounterStateType &before, const CounterStateType &after)
Computes average core frequency when not in powersaving C0-state (also taking Intel Turbo Boost techn...
Definition: cpucounters.h:1631
friend uint64 getConsumedEnergy(const CounterStateType &before, const CounterStateType &after)
Returns energy consumed by processor, exclusing DRAM (measured in internal units) ...
Definition: cpucounters.h:1176
friend double getCyclesLostDueL2CacheMisses(const CounterStateType &before, const CounterStateType &after)
Estimates how many core cycles were potentially lost due to missing L2 cache but still hitting L3 cac...
Definition: cpucounters.h:1668
friend double getL2CacheHitRatio(const CounterStateType &before, const CounterStateType &after)
Computes L2 cache hit ratio.
Definition: cpucounters.h:1689
uint64 getQPILinkSpeed(uint32 socketNr, uint32 linkNr) const
Return QPI Link Speed in GBytes/second.
Definition: cpucounters.h:790
ProgramMode
Mode of programming (parameter in the program() method)
Definition: cpucounters.h:315
friend uint64 getInvariantTSC(const CounterStateType &before, const CounterStateType &after)
Computes number of invariant time stamp counter ticks.
Definition: cpucounters.h:1847
double getL2CacheHitRatio(const CounterStateType &before, const CounterStateType &after)
Computes L2 cache hit ratio.
Definition: cpucounters.h:1689
double getExecUsage(const CounterStateType &before, const CounterStateType &after)
Computes average number of retired instructions per time intervall.
Definition: cpucounters.h:1466
uint64 getQPIL1Cycles(uint32 port, const CounterStateType &before, const CounterStateType &after)
Returns the number of QPI cycles in power saving shutdown mode.
Definition: cpucounters.h:1098
uint32 getMCChannelsPerSocket() const
Returns the total number of detected memory channels on all integrated memory controllers per socket...
Definition: cpucounters.h:716
int32 getThermalHeadroom() const
Returns current thermal headroom below TjMax.
Definition: cpucounters.h:1060
double getConsumedJoules(const CounterStateType &before, const CounterStateType &after)
Returns Joules consumed by processor (excluding DRAM)
Definition: cpucounters.h:1196
double getCyclesLostDueL3CacheMisses(const CounterStateType &before, const CounterStateType &after)
Estimates how many core cycles were potentially lost due to L3 cache misses.
Definition: cpucounters.h:1648
uint64 computeQPISpeed(const uint32 ref_core, const int cpumodel)
Measures/computes the maximum theoretical QPI link bandwidth speed in GByte/seconds.
Definition: cpucounters.cpp:4183
uint64 getConsumedEnergy(const CounterStateType &before, const CounterStateType &after)
Returns energy consumed by processor, exclusing DRAM (measured in internal units) ...
Definition: cpucounters.h:1176
double getActiveRelativeFrequency(const CounterStateType &before, const CounterStateType &after)
Computes average core frequency when not in powersaving C0-state (also taking Intel Turbo Boost techn...
Definition: cpucounters.h:1631
System-wide counter state.
Definition: cpucounters.h:1358
friend double getL3CacheHitRatio(const CounterStateType &before, const CounterStateType &after)
Computes L3 cache hit ratio.
Definition: cpucounters.h:1717
PCIeCounterState getPCIeCounterState(const uint32 socket_)
Get the state of PCIe counter(s)
Definition: cpucounters.cpp:4392
uint64 getTickCount(uint64 multiplier=1000, uint32 core=0)
Return TSC timer value in time units.
Definition: cpucounters.cpp:2606
uint32 getL3ScalingFactor()
runs CPUID.0xF.0x01 to get the L3 up scaling factor to calculate L3 Occupancy Scaling factor is retur...
Definition: cpucounters.cpp:3333
friend double getCoreCStateResidency(int state, const CounterStateType &before, const CounterStateType &after)
Computes residency in the core C-state.
Definition: cpucounters.h:1860
double getNormalizedQPIL0pTxCycles(uint32 port, const CounterStateType &before, const CounterStateType &after)
Returns the ratio of QPI cycles in power saving half-lane mode.
Definition: cpucounters.h:1110
bool L3CacheOccupancyMetricAvailable()
checks if cache monitoring present
Definition: cpucounters.cpp:416
uint64 getQPIL0pTxCycles(uint32 port, const CounterStateType &before, const CounterStateType &after)
Returns the number of QPI cycles in power saving half-lane mode.
Definition: cpucounters.h:1087
friend uint64 getQPIL0pTxCycles(uint32 port, const CounterStateType &before, const CounterStateType &after)
Returns the number of QPI cycles in power saving half-lane mode.
Definition: cpucounters.h:1087
double getCoreCStateResidency(int state, const CounterStateType &before, const CounterStateType &after)
Computes residency in the core C-state.
Definition: cpucounters.h:1860
INTELPCM_API CoreCounterState getCoreCounterState(uint32 core)
Reads the counter state of a (logical) core.
Definition: cpucounters.cpp:2673
void unfreezeServerUncoreCounters()
Unfreezes uncore event counting (works only on microarchitecture codename SandyBridge-EP and IvyTown)...
Definition: cpucounters.cpp:2982
uint64 getDRAMClocks(uint32 channel, const CounterStateType &before, const CounterStateType &after)
Returns DRAM clock ticks.
Definition: cpucounters.h:1133
ErrorCode programServerUncorePowerMetrics(int mc_profile, int pcu_profile, int *freq_bands=NULL)
Programs uncore power/energy counters on microarchitectures codename SandyBridge-EP and IvyTown...
Definition: cpucounters.cpp:2806
friend double getOutgoingQPILinkUtilization(uint32 socketNr, uint32 linkNr, const SystemCounterState &before, const SystemCounterState &after)
Get utilization of outgoing QPI link (0..1)
Definition: cpucounters.h:1996
uint32 getCPUModel()
Reads CPU model id.
Definition: cpucounters.h:657
friend double getAverageFrequency(const CounterStateType &before, const CounterStateType &after)
Computes average core frequency also taking Intel Turbo Boost technology into account.
Definition: cpucounters.h:1581
Custom Core event description.
Definition: cpucounters.h:337
double getPackageCStateResidency(int state, const CounterStateType &before, const CounterStateType &after)
Computes residency in the package C-state.
Definition: cpucounters.h:1890
uint64 getAllIncomingQPILinkBytes(const SystemCounterState &before, const SystemCounterState &after)
Get estimation of total QPI data traffic.
Definition: cpucounters.h:2056
INTELPCM_API SystemCounterState getSystemCounterState()
Reads the counter state of the system.
Definition: cpucounters.cpp:2657
void program()
Program performance counters (disables programming power counters)
Definition: cpucounters.cpp:3760
uint64 getQPIL0pTxCycles(uint32 port)
Get number cycles on a QPI port when the link was in a power saving half-lane mode.
Definition: cpucounters.cpp:4062
Basic uncore counter state.
Definition: cpucounters.h:1235
void enableJKTWorkaround(bool enable)
Enable correct counting of various LLC events (with memory access perf penalty)
Definition: cpucounters.cpp:4148
uint64 getBytesWrittenToMC(const CounterStateType &before, const CounterStateType &after)
Computes number of bytes written to DRAM memory controllers.
Definition: cpucounters.h:1915
Extended custom core event description.
Definition: cpucounters.h:351
int32 getPackageThermalSpecPower() const
Returns thermal specification power of the package domain in Watt.
Definition: cpucounters.h:797
friend uint64 getNumberOfEvents(PCIeCounterState before, PCIeCounterState after)
Returns the raw count of PCIe events.
Definition: cpucounters.h:2165
double getActiveAverageFrequency(const CounterStateType &before, const CounterStateType &after)
Computes average core frequency when not in powersaving C0-state (also taking Intel Turbo Boost techn...
Definition: cpucounters.h:1598
double getRelativeFrequency(const CounterStateType &before, const CounterStateType &after)
Computes average core frequency also taking Intel Turbo Boost technology into account.
Definition: cpucounters.h:1615
uint64 getTickCountRDTSCP(uint64 multiplier=1000)
Return TSC timer value in time units using rdtscp instruction from current core.
Definition: cpucounters.cpp:2652
uint64 getMCCounter(uint32 channel, uint32 counter, const CounterStateType &before, const CounterStateType &after)
Direct read of memory controller PMU counter (counter meaning depends on the programming: power/perfo...
Definition: cpucounters.h:1145
uint64 getQPILLCounter(uint32 port, uint32 counter)
Direct read of QPI LL PMU counter (counter meaning depends on the programming: power/performance/etc)...
Definition: cpucounters.cpp:4122
uint64 getQPILinksPerSocket() const
Returns the number of Intel(r) Quick Path Interconnect(tm) links per socket.
Definition: cpucounters.h:673
friend double getExecUsage(const CounterStateType &before, const CounterStateType &after)
Computes average number of retired instructions per time intervall.
Definition: cpucounters.h:1466
Definition: cpucounters.h:185
double getL3CacheHitRatio(const CounterStateType &before, const CounterStateType &after)
Computes L3 cache hit ratio.
Definition: cpucounters.h:1717
void programPCIeCounters(const PCIeEventCode event_, const uint32 tid_=0, const uint32 miss_=0)
Program uncore PCIe monitoring event(s)
Definition: cpucounters.cpp:4350
double getAverageFrequency(const CounterStateType &before, const CounterStateType &after)
Computes average core frequency also taking Intel Turbo Boost technology into account.
Definition: cpucounters.h:1581
uint64 getIORequestBytesFromMC(const CounterStateType &before, const CounterStateType &after)
Computes number of bytes of read/write requests from all IO sources.
Definition: cpucounters.h:1927
uint32 getNumSockets()
Reads number of sockets (CPUs) in the system.
Definition: cpucounters.cpp:3313
uint64 getL3CacheHitsNoSnoop(const CounterStateType &before, const CounterStateType &after)
Computes number of L3 cache hits where no snooping in sibling L2 caches had to be done...
Definition: cpucounters.h:1803
int getRunState(void)
Returns program's Run State.
Definition: cpucounters.h:303
uint32 getNumOnlineCores()
Reads number of online logical cores in the system.
Definition: cpucounters.cpp:3308
uint64 getOutgoingQPILinkBytes(uint32 socketNr, uint32 linkNr, const SystemCounterState &before, const SystemCounterState &after)
Get estimation of QPI (data+nondata) traffic per outgoing QPI link.
Definition: cpucounters.h:2036
bool getSMT()
Checks if SMT (HyperThreading) is enabled.
Definition: cpucounters.cpp:3323
void unfreezeCounters()
Unfreezes event counting.
Definition: cpucounters.cpp:4038
friend uint64 getRefCycles(const CounterStateType &before, const CounterStateType &after)
Computes the number of reference clock cycles while clock signal on the core is running.
Definition: cpucounters.h:1519
friend uint64 getMCCounter(uint32 channel, uint32 counter, const CounterStateType &before, const CounterStateType &after)
Direct read of memory controller PMU counter (counter meaning depends on the programming: power/perfo...
Definition: cpucounters.h:1145
uint64 getInvariantTSC(const CounterStateType &before, const CounterStateType &after)
Computes number of invariant time stamp counter ticks.
Definition: cpucounters.h:1847
Definition: cpucounters.h:316
Low level interface to access PCI configuration space.
friend uint64 getIncomingQPILinkBytes(uint32 socketNr, uint32 linkNr, const SystemCounterState &before, const SystemCounterState &after)
Get estimation of QPI data traffic per incoming QPI link.
Definition: cpucounters.h:1957
friend uint64 getL2CacheMisses(const CounterStateType &before, const CounterStateType &after)
Computes number of L2 cache misses.
Definition: cpucounters.h:1753
uint64 getRefCycles(const CounterStateType &before, const CounterStateType &after)
Computes the number of reference clock cycles while clock signal on the core is running.
Definition: cpucounters.h:1519
friend uint64 getL2CacheHits(const CounterStateType &before, const CounterStateType &after)
Computes number of L2 cache hits.
Definition: cpucounters.h:1773
uint64 getPCUClocks(const CounterStateType &before, const CounterStateType &after)
Returns clock ticks of power control unit.
Definition: cpucounters.h:1166
friend uint64 getDRAMConsumedEnergy(const CounterStateType &before, const CounterStateType &after)
Returns energy consumed by DRAM (measured in internal units)
Definition: cpucounters.h:1186
uint32 getOriginalCPUModel()
Reads original CPU model id.
Definition: cpucounters.h:661
Provides 64-bit "virtual" counters from underlying 32-bit HW counters.
void setOutput(const std::string filename)
Redirects output destination to provided file, instead of std::cout.
Definition: cpucounters.cpp:2456
void freezeCounters()
Freezes event counting.
Definition: cpucounters.cpp:4026
static bool initWinRing0Lib()
Loads and initializes Winring0 third party library for access to processor model specific and PCI con...
int32 getSocketId(uint32 core_id)
Determines socket of given core.
Definition: cpucounters.h:666
friend double getRelativeFrequency(const CounterStateType &before, const CounterStateType &after)
Computes average core frequency also taking Intel Turbo Boost technology into account.
Definition: cpucounters.h:1615
void allowMultipleInstances()
Call it before program() to allow multiple running instances of PCM on the same system.
Definition: cpucounters.h:309
friend uint64 getConsumedEnergy(const CounterStateType &before, const CounterStateType &after)
Returns energy consumed by processor, exclusing DRAM (measured in internal units) ...
Definition: cpucounters.h:1176
int32 getPackageMaximumPower() const
Returns maximum power derived from electrical spec of the package domain in Watt. ...
Definition: cpucounters.h:803
friend uint64 getL3CacheOccupancy(const CounterStateType &now)
Computes L3 Cache Occupancy.
Definition: cpucounters.h:1788
friend uint64 getDRAMClocks(uint32 channel, const CounterStateType &before, const CounterStateType &after)
Returns DRAM clock ticks.
Definition: cpucounters.h:1133
void program_power_metrics(int mc_profile)
Program power counters (disables programming performance counters)
Definition: cpucounters.cpp:3909
uint64 getCycles(const CounterStateType &before, const CounterStateType &after)
Computes the number core clock cycles when signal on a specific core is running (not halted) ...
Definition: cpucounters.h:1503
void getAllCounterStates(SystemCounterState &systemState, std::vector< SocketCounterState > &socketStates, std::vector< CoreCounterState > &coreStates)
Reads all counter states (including system, sockets and cores)
Definition: cpucounters.cpp:3254
friend double getPackageCStateResidency(int state, const CounterStateType &before, const CounterStateType &after)
Computes residency in the package C-state.
Definition: cpucounters.h:1890
uint64 getMCCounter(uint32 channel, uint32 counter)
Direct read of memory controller PMU counter (counter meaning depends on the programming: power/perfo...
Definition: cpucounters.cpp:4096
double getIncomingQPILinkUtilization(uint32 socketNr, uint32 linkNr, const SystemCounterState &before, const SystemCounterState &after)
Get data utilization of incoming QPI link (0..1)
Definition: cpucounters.h:1975
uint64 getQPIClocks(uint32 port)
Get number of QPI LL clocks on a QPI port.
Definition: cpucounters.cpp:4050
const char * getUArchCodename(int32 cpu_model_=-1) const
Get a string describing the codename of the processor microarchitecture.
Definition: cpucounters.cpp:2326
uint64 getNumberOfCustomEvents(int32 eventCounterNr, const CounterStateType &before, const CounterStateType &after)
Returns the number of occured custom core events.
Definition: cpucounters.h:1942
static std::string getCPUBrandString()
Get Brand string of processor.
Definition: cpucounters.cpp:2176
Definition: width_extender.h:39
friend uint64 getQPIClocks(uint32 port, const CounterStateType &before, const CounterStateType &after)
Returns QPI LL clock ticks.
Definition: cpucounters.h:1069
double getDRAMConsumedJoules(const CounterStateType &before, const CounterStateType &after)
Returns Joules consumed by DRAM.
Definition: cpucounters.h:1209
uint64 getInstructionsRetired(const CounterStateType &before, const CounterStateType &after)
Computes the number of retired instructions.
Definition: cpucounters.h:1454
uint32 getNumCores()
Reads number of logical cores in the system.
Definition: cpucounters.cpp:3303
CPU Performance Monitor.
Definition: cpucounters.h:212
int32 getPackageMinimumPower() const
Returns minimum power derived from electrical spec of the package domain in Watt. ...
Definition: cpucounters.h:800
friend uint64 getIORequestBytesFromMC(const CounterStateType &before, const CounterStateType &after)
Computes number of bytes of read/write requests from all IO sources.
Definition: cpucounters.h:1927
friend uint64 getL3CacheMisses(const CounterStateType &before, const CounterStateType &after)
Computes number of L3 cache misses.
Definition: cpucounters.h:1739
friend double getCyclesLostDueL3CacheMisses(const CounterStateType &before, const CounterStateType &after)
Estimates how many core cycles were potentially lost due to L3 cache misses.
Definition: cpucounters.h:1648
uint64 getNominalFrequency()
Reads the nominal core frequency.
Definition: cpucounters.cpp:3328
double getOutgoingQPILinkUtilization(uint32 socketNr, uint32 linkNr, const SystemCounterState &before, const SystemCounterState &after)
Get utilization of outgoing QPI link (0..1)
Definition: cpucounters.h:1996
uint64 getImcWrites()
Get the number of integrated controller writes (in cache lines)
Definition: cpucounters.cpp:3877
uint64 getPCUCounter(uint32 counter, const CounterStateType &before, const CounterStateType &after)
Direct read of power control unit PMU counter (counter meaning depends on the programming: power/perf...
Definition: cpucounters.h:1156
friend uint64 getInstructionsRetired(const CounterStateType &before, const CounterStateType &after)
Computes the number of retired instructions.
Definition: cpucounters.h:1454
ErrorCode program(const ProgramMode mode_=DEFAULT_EVENTS, const void *parameter_=NULL)
Programs performance counters.
Definition: cpucounters.cpp:1556
uint64 getPCUFrequency() const
Returns the frequency of Power Control Unit.
Definition: cpucounters.h:761
uint32 getThreadsPerCore()
Reads how many hardware threads has a physical core "Hardware thread" is a logical core in a differen...
Definition: cpucounters.cpp:3318
double getJoulesPerEnergyUnit() const
Returns how many joules are in an internal processor energy unit.
Definition: cpucounters.h:794
void restoreOutput()
Restores output, closes output file if opened.
Definition: cpucounters.cpp:2463
Definition: client_bw.h:40
uint64 getL2CacheMisses(const CounterStateType &before, const CounterStateType &after)
Computes number of L2 cache misses.
Definition: cpucounters.h:1753
const std::string & getErrorMessage() const
Returns the error message.
Definition: cpucounters.h:493
double getIPC(const CounterStateType &before, const CounterStateType &after)
Computes average number of retired instructions per core cycle (IPC)
Definition: cpucounters.h:1438
int32 getPackageThermalHeadroom() const
Returns current thermal headroom below TjMax.
Definition: cpucounters.h:1315
bool isCoreCStateResidencySupported(int state)
Returns true if the specified core C-state residency metric is supported.
Definition: cpucounters.h:273
Server uncore power counter state.
Definition: cpucounters.h:1286
uint32 getNumMCChannels() const
Returns the total number of detected memory channels on all integrated memory controllers.
Definition: cpucounters.h:182
friend uint64 getOutgoingQPILinkBytes(uint32 socketNr, uint32 linkNr, const SystemCounterState &before, const SystemCounterState &after)
Get estimation of QPI (data+nondata) traffic per outgoing QPI link.
Definition: cpucounters.h:2036
Definition: cpucounters.h:317
bool isPackageCStateResidencySupported(int state)
Returns true if the specified package C-state residency metric is supported.
Definition: cpucounters.h:282
friend uint64 getDRAMConsumedEnergy(const CounterStateType &before, const CounterStateType &after)
Returns energy consumed by DRAM (measured in internal units)
Definition: cpucounters.h:1186
(Logical) core-wide counter state
Definition: cpucounters.h:1331
uint64 getL2CacheHits(const CounterStateType &before, const CounterStateType &after)
Computes number of L2 cache hits.
Definition: cpucounters.h:1773
bool isCoreOnline(int32 os_core_id) const
Return true if the core in online.
Definition: cpucounters.cpp:1404
double getCyclesLostDueL2CacheMisses(const CounterStateType &before, const CounterStateType &after)
Estimates how many core cycles were potentially lost due to missing L2 cache but still hitting L3 cac...
Definition: cpucounters.h:1668
uint32 getMCPerSocket() const
Returns the number of detected integrated memory controllers per socket.
Definition: cpucounters.h:696
friend uint64 getCycles(const CounterStateType &before, const CounterStateType &after)
Computes the number core clock cycles when signal on a specific core is running (not halted) ...
Definition: cpucounters.h:1503
uint32 getNumMC() const
Returns the number of detected integrated memory controllers.
Definition: cpucounters.h:179
uint64 getBytesReadFromMC(const CounterStateType &before, const CounterStateType &after)
Computes number of bytes read from DRAM memory controllers.
Definition: cpucounters.h:1903
uint64 getOutgoingDataNonDataFlits(uint32 port)
Get the number of outgoing data and non-data flits from the socket through a port.
Definition: cpucounters.cpp:3904
friend double getActiveAverageFrequency(const CounterStateType &before, const CounterStateType &after)
Computes average core frequency when not in powersaving C0-state (also taking Intel Turbo Boost techn...
Definition: cpucounters.h:1598
uint64 getDRAMClocks(uint32 channel)
Get number DRAM channel cycles.
Definition: cpucounters.cpp:4086
uint64 getQPIL1Cycles(uint32 port)
Get number cycles on a QPI port when the link was in a power saving shutdown mode.
Definition: cpucounters.cpp:4074
double getTotalExecUsage(const SystemCounterState &before, const SystemCounterState &after)
Computes average number of retired instructions per time intervall for the entire system combining in...
Definition: cpucounters.h:1565
uint32 getMaxIPC() const
Returns the max number of instructions per cycle.
Definition: cpucounters.h:737
friend uint64 getL3CacheHits(const CounterStateType &before, const CounterStateType &after)
Computes total number of L3 cache hits.
Definition: cpucounters.h:1832
friend uint64 getNumberOfCustomEvents(int32 eventCounterNr, const CounterStateType &before, const CounterStateType &after)
Returns the number of occured custom core events.
Definition: cpucounters.h:1942
Low level interface to access hardware model specific registers.
ServerUncorePowerState getServerUncorePowerState(uint32 socket)
Reads the power/energy counter state of a socket (works only on microarchitecture codename SandyBridg...
Definition: cpucounters.cpp:3342
ErrorCode
Return codes (e.g. for program(..) method)
Definition: cpucounters.h:323
uint64 getImcReads()
Get the number of integrated controller reads (in cache lines)
Definition: cpucounters.cpp:3863
static PCM * getInstance()
Returns PCM object.
Definition: cpucounters.cpp:196
Basic core counter state.
Definition: cpucounters.h:944
uint64 getIncomingDataFlits(uint32 port)
Get the number of incoming data flits to the socket through a port.
Definition: cpucounters.cpp:3891
double getQPItoMCTrafficRatio(const SystemCounterState &before, const SystemCounterState &after)
Get QPI data to Memory Controller traffic ratio.
Definition: cpucounters.h:2155
friend uint64 getQPIL1Cycles(uint32 port, const CounterStateType &before, const CounterStateType &after)
Returns the number of QPI cycles in power saving shutdown mode.
Definition: cpucounters.h:1098