Intel(r) Performance Counter Monitor
|
Main CPU counters header. More...
#include "types.h"
#include "msr.h"
#include "pci.h"
#include "client_bw.h"
#include "width_extender.h"
#include <vector>
#include <limits>
#include <string>
#include <string.h>
#include <semaphore.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
Go to the source code of this file.
Classes | |
struct | TopologyEntry |
class | ServerPCICFGUncore |
Object to access uncore counters in a socket/processor with microarchitecture codename SandyBridge-EP (Jaketown) or Ivytown-EP or Ivytown-EX. More... | |
class | PCIeCounterState |
class | PCM |
CPU Performance Monitor. More... | |
struct | PCM::CustomCoreEventDescription |
Custom Core event description. More... | |
struct | PCM::ExtendedCustomCoreEventDescription |
Extended custom core event description. More... | |
class | BasicCounterState |
Basic core counter state. More... | |
class | UncoreCounterState |
Basic uncore counter state. More... | |
class | ServerUncorePowerState |
Server uncore power counter state. More... | |
class | CoreCounterState |
(Logical) core-wide counter state More... | |
class | SocketCounterState |
Socket-wide counter state. More... | |
class | SystemCounterState |
System-wide counter state. More... | |
Macros | |
#define | INTEL_PCM_VERSION "V2.8 ($Format:%ci ID=%h$)" |
#define | INTELPCM_API |
#define | NOMINMAX |
Functions | |
template<class CounterStateType > | |
uint64 | getQPIClocks (uint32 port, const CounterStateType &before, const CounterStateType &after) |
Returns QPI LL clock ticks. More... | |
template<class CounterStateType > | |
int32 | getThermalHeadroom (const CounterStateType &, const CounterStateType &after) |
template<class CounterStateType > | |
uint64 | getQPIL0pTxCycles (uint32 port, const CounterStateType &before, const CounterStateType &after) |
Returns the number of QPI cycles in power saving half-lane mode. More... | |
template<class CounterStateType > | |
uint64 | getQPIL1Cycles (uint32 port, const CounterStateType &before, const CounterStateType &after) |
Returns the number of QPI cycles in power saving shutdown mode. More... | |
template<class CounterStateType > | |
double | getNormalizedQPIL0pTxCycles (uint32 port, const CounterStateType &before, const CounterStateType &after) |
Returns the ratio of QPI cycles in power saving half-lane mode. More... | |
template<class CounterStateType > | |
double | getNormalizedQPIL1Cycles (uint32 port, const CounterStateType &before, const CounterStateType &after) |
Returns the ratio of QPI cycles in power saving shutdown mode. More... | |
template<class CounterStateType > | |
uint64 | getDRAMClocks (uint32 channel, const CounterStateType &before, const CounterStateType &after) |
Returns DRAM clock ticks. More... | |
template<class CounterStateType > | |
uint64 | getMCCounter (uint32 channel, uint32 counter, const CounterStateType &before, const CounterStateType &after) |
Direct read of memory controller PMU counter (counter meaning depends on the programming: power/performance/etc) More... | |
template<class CounterStateType > | |
uint64 | getPCUCounter (uint32 counter, const CounterStateType &before, const CounterStateType &after) |
Direct read of power control unit PMU counter (counter meaning depends on the programming: power/performance/etc) More... | |
template<class CounterStateType > | |
uint64 | getPCUClocks (const CounterStateType &before, const CounterStateType &after) |
Returns clock ticks of power control unit. More... | |
template<class CounterStateType > | |
uint64 | getConsumedEnergy (const CounterStateType &before, const CounterStateType &after) |
Returns energy consumed by processor, exclusing DRAM (measured in internal units) More... | |
template<class CounterStateType > | |
uint64 | getDRAMConsumedEnergy (const CounterStateType &before, const CounterStateType &after) |
Returns energy consumed by DRAM (measured in internal units) More... | |
template<class CounterStateType > | |
double | getConsumedJoules (const CounterStateType &before, const CounterStateType &after) |
Returns Joules consumed by processor (excluding DRAM) More... | |
template<class CounterStateType > | |
double | getDRAMConsumedJoules (const CounterStateType &before, const CounterStateType &after) |
Returns Joules consumed by DRAM. More... | |
INTELPCM_API SystemCounterState | getSystemCounterState () |
Reads the counter state of the system. More... | |
INTELPCM_API SocketCounterState | getSocketCounterState (uint32 socket) |
Reads the counter state of a socket. More... | |
INTELPCM_API CoreCounterState | getCoreCounterState (uint32 core) |
Reads the counter state of a (logical) core. More... | |
template<class CounterStateType > | |
double | getIPC (const CounterStateType &before, const CounterStateType &after) |
Computes average number of retired instructions per core cycle (IPC) More... | |
template<class CounterStateType > | |
uint64 | getInstructionsRetired (const CounterStateType &before, const CounterStateType &after) |
Computes the number of retired instructions. More... | |
template<class CounterStateType > | |
double | getExecUsage (const CounterStateType &before, const CounterStateType &after) |
Computes average number of retired instructions per time intervall. More... | |
template<class CounterStateType > | |
uint64 | getInstructionsRetired (const CounterStateType &now) |
Computes the number of retired instructions. More... | |
template<class CounterStateType > | |
uint64 | getCycles (const CounterStateType &before, const CounterStateType &after) |
Computes the number core clock cycles when signal on a specific core is running (not halted) More... | |
template<class CounterStateType > | |
uint64 | getRefCycles (const CounterStateType &before, const CounterStateType &after) |
Computes the number of reference clock cycles while clock signal on the core is running. More... | |
template<class CounterStateType > | |
uint64 | getCycles (const CounterStateType &now) |
Computes the number executed core clock cycles. More... | |
double | getCoreIPC (const SystemCounterState &before, const SystemCounterState &after) |
Computes average number of retired instructions per core cycle for the entire system combining instruction counts from logical cores to corresponding physical cores. More... | |
double | getTotalExecUsage (const SystemCounterState &before, const SystemCounterState &after) |
Computes average number of retired instructions per time intervall for the entire system combining instruction counts from logical cores to corresponding physical cores. More... | |
template<class CounterStateType > | |
double | getAverageFrequency (const CounterStateType &before, const CounterStateType &after) |
Computes average core frequency also taking Intel Turbo Boost technology into account. More... | |
template<class CounterStateType > | |
double | getActiveAverageFrequency (const CounterStateType &before, const CounterStateType &after) |
Computes average core frequency when not in powersaving C0-state (also taking Intel Turbo Boost technology into account) More... | |
template<class CounterStateType > | |
double | getRelativeFrequency (const CounterStateType &before, const CounterStateType &after) |
Computes average core frequency also taking Intel Turbo Boost technology into account. More... | |
template<class CounterStateType > | |
double | getActiveRelativeFrequency (const CounterStateType &before, const CounterStateType &after) |
Computes average core frequency when not in powersaving C0-state (also taking Intel Turbo Boost technology into account) More... | |
template<class CounterStateType > | |
double | getCyclesLostDueL3CacheMisses (const CounterStateType &before, const CounterStateType &after) |
Estimates how many core cycles were potentially lost due to L3 cache misses. More... | |
template<class CounterStateType > | |
double | getCyclesLostDueL2CacheMisses (const CounterStateType &before, const CounterStateType &after) |
Estimates how many core cycles were potentially lost due to missing L2 cache but still hitting L3 cache. More... | |
template<class CounterStateType > | |
double | getL2CacheHitRatio (const CounterStateType &before, const CounterStateType &after) |
Computes L2 cache hit ratio. More... | |
template<class CounterStateType > | |
double | getL3CacheHitRatio (const CounterStateType &before, const CounterStateType &after) |
Computes L3 cache hit ratio. More... | |
template<class CounterStateType > | |
uint64 | getL3CacheMisses (const CounterStateType &before, const CounterStateType &after) |
Computes number of L3 cache misses. More... | |
template<class CounterStateType > | |
uint64 | getL2CacheMisses (const CounterStateType &before, const CounterStateType &after) |
Computes number of L2 cache misses. More... | |
template<class CounterStateType > | |
uint64 | getL2CacheHits (const CounterStateType &before, const CounterStateType &after) |
Computes number of L2 cache hits. More... | |
template<class CounterStateType > | |
uint64 | getL3CacheOccupancy (const CounterStateType &now) |
Computes L3 Cache Occupancy. More... | |
template<class CounterStateType > | |
uint64 | getL3CacheHitsNoSnoop (const CounterStateType &before, const CounterStateType &after) |
Computes number of L3 cache hits where no snooping in sibling L2 caches had to be done. More... | |
template<class CounterStateType > | |
uint64 | getL3CacheHitsSnoop (const CounterStateType &before, const CounterStateType &after) |
Computes number of L3 cache hits where snooping in sibling L2 caches had to be done. More... | |
template<class CounterStateType > | |
uint64 | getL3CacheHits (const CounterStateType &before, const CounterStateType &after) |
Computes total number of L3 cache hits. More... | |
template<class CounterStateType > | |
uint64 | getInvariantTSC (const CounterStateType &before, const CounterStateType &after) |
Computes number of invariant time stamp counter ticks. More... | |
template<class CounterStateType > | |
double | getCoreCStateResidency (int state, const CounterStateType &before, const CounterStateType &after) |
Computes residency in the core C-state. More... | |
template<class CounterStateType > | |
double | getPackageCStateResidency (int state, const CounterStateType &before, const CounterStateType &after) |
Computes residency in the package C-state. More... | |
template<class CounterStateType > | |
uint64 | getBytesReadFromMC (const CounterStateType &before, const CounterStateType &after) |
Computes number of bytes read from DRAM memory controllers. More... | |
template<class CounterStateType > | |
uint64 | getBytesWrittenToMC (const CounterStateType &before, const CounterStateType &after) |
Computes number of bytes written to DRAM memory controllers. More... | |
template<class CounterStateType > | |
uint64 | getIORequestBytesFromMC (const CounterStateType &before, const CounterStateType &after) |
Computes number of bytes of read/write requests from all IO sources. More... | |
template<class CounterStateType > | |
uint64 | getNumberOfCustomEvents (int32 eventCounterNr, const CounterStateType &before, const CounterStateType &after) |
Returns the number of occured custom core events. More... | |
uint64 | getIncomingQPILinkBytes (uint32 socketNr, uint32 linkNr, const SystemCounterState &before, const SystemCounterState &after) |
Get estimation of QPI data traffic per incoming QPI link. More... | |
double | getIncomingQPILinkUtilization (uint32 socketNr, uint32 linkNr, const SystemCounterState &before, const SystemCounterState &after) |
Get data utilization of incoming QPI link (0..1) More... | |
double | getOutgoingQPILinkUtilization (uint32 socketNr, uint32 linkNr, const SystemCounterState &before, const SystemCounterState &after) |
Get utilization of outgoing QPI link (0..1) More... | |
uint64 | getOutgoingQPILinkBytes (uint32 socketNr, uint32 linkNr, const SystemCounterState &before, const SystemCounterState &after) |
Get estimation of QPI (data+nondata) traffic per outgoing QPI link. More... | |
uint64 | getAllIncomingQPILinkBytes (const SystemCounterState &before, const SystemCounterState &after) |
Get estimation of total QPI data traffic. More... | |
uint64 | getAllOutgoingQPILinkBytes (const SystemCounterState &before, const SystemCounterState &after) |
Get estimation of total QPI data+nondata traffic. More... | |
uint64 | getIncomingQPILinkBytes (uint32 socketNr, uint32 linkNr, const SystemCounterState &now) |
Return current value of the counter of QPI data traffic per incoming QPI link. More... | |
uint64 | getSocketIncomingQPILinkBytes (uint32 socketNr, const SystemCounterState &now) |
Get estimation of total QPI data traffic for this socket. More... | |
uint64 | getAllIncomingQPILinkBytes (const SystemCounterState &now) |
Get estimation of Socket QPI data traffic. More... | |
double | getQPItoMCTrafficRatio (const SystemCounterState &before, const SystemCounterState &after) |
Get QPI data to Memory Controller traffic ratio. More... | |
uint64 | getNumberOfEvents (PCIeCounterState before, PCIeCounterState after) |
Returns the raw count of PCIe events. More... | |
Main CPU counters header.
Include this header file if you want to access CPU counters (core and uncore - including memory controller chips and QPI)
double getActiveAverageFrequency | ( | const CounterStateType & | before, |
const CounterStateType & | after | ||
) |
Computes average core frequency when not in powersaving C0-state (also taking Intel Turbo Boost technology into account)
before | CPU counter state before the experiment |
after | CPU counter state after the experiment |
References PCM::getInstance(), and PCM::getNominalFrequency().
double getActiveRelativeFrequency | ( | const CounterStateType & | before, |
const CounterStateType & | after | ||
) |
Computes average core frequency when not in powersaving C0-state (also taking Intel Turbo Boost technology into account)
before | CPU counter state before the experiment |
after | CPU counter state after the experiment |
|
inline |
Get estimation of total QPI data traffic.
Returns an estimation of number of data bytes transferred to all sockets over all Intel(r) Quick Path Interconnect links
before | System CPU counter state before the experiment |
after | System CPU counter state after the experiment |
References getIncomingQPILinkBytes(), PCM::getInstance(), PCM::getNumSockets(), and PCM::getQPILinksPerSocket().
Referenced by getQPItoMCTrafficRatio().
|
inline |
Get estimation of Socket QPI data traffic.
Returns an estimation of number of data bytes transferred to all sockets over all Intel(r) Quick Path Interconnect links
now | System CPU counter state |
References PCM::getInstance(), PCM::getNumSockets(), and getSocketIncomingQPILinkBytes().
|
inline |
Get estimation of total QPI data+nondata traffic.
Returns an estimation of number of data and non-data bytes transferred from all sockets over all Intel(r) Quick Path Interconnect links
before | System CPU counter state before the experiment |
after | System CPU counter state after the experiment |
References PCM::getInstance(), PCM::getNumSockets(), getOutgoingQPILinkBytes(), and PCM::getQPILinksPerSocket().
double getAverageFrequency | ( | const CounterStateType & | before, |
const CounterStateType & | after | ||
) |
Computes average core frequency also taking Intel Turbo Boost technology into account.
before | CPU counter state before the experiment |
after | CPU counter state after the experiment |
References PCM::getInstance(), and PCM::getNominalFrequency().
uint64 getBytesReadFromMC | ( | const CounterStateType & | before, |
const CounterStateType & | after | ||
) |
Computes number of bytes read from DRAM memory controllers.
before | CPU counter state before the experiment |
after | CPU counter state after the experiment |
Referenced by getQPItoMCTrafficRatio().
uint64 getBytesWrittenToMC | ( | const CounterStateType & | before, |
const CounterStateType & | after | ||
) |
Computes number of bytes written to DRAM memory controllers.
before | CPU counter state before the experiment |
after | CPU counter state after the experiment |
Referenced by getQPItoMCTrafficRatio().
uint64 getConsumedEnergy | ( | const CounterStateType & | before, |
const CounterStateType & | after | ||
) |
Returns energy consumed by processor, exclusing DRAM (measured in internal units)
before | CPU counter state before the experiment |
after | CPU counter state after the experiment |
Referenced by getConsumedJoules().
double getConsumedJoules | ( | const CounterStateType & | before, |
const CounterStateType & | after | ||
) |
Returns Joules consumed by processor (excluding DRAM)
before | CPU counter state before the experiment |
after | CPU counter state after the experiment |
References getConsumedEnergy(), PCM::getInstance(), and PCM::getJoulesPerEnergyUnit().
INTELPCM_API CoreCounterState getCoreCounterState | ( | uint32 | core | ) |
Reads the counter state of a (logical) core.
Helper function. Uses PCM object to access counters.
core | core id |
References PCM::getCoreCounterState(), and PCM::getInstance().
|
inline |
Computes residency in the core C-state.
state | C-state |
before | CPU counter state before the experiment |
after | CPU counter state after the experiment |
References PCM::getInstance(), getInvariantTSC(), getRefCycles(), and PCM::isCoreCStateResidencySupported().
|
inline |
Computes average number of retired instructions per core cycle for the entire system combining instruction counts from logical cores to corresponding physical cores.
Use this metric to evaluate IPC improvement between SMT(Hyperthreading) on and SMT off.
before | CPU counter state before the experiment |
after | CPU counter state after the experiment |
References PCM::getInstance(), getIPC(), PCM::getNumCores(), PCM::getNumOnlineCores(), and PCM::getThreadsPerCore().
uint64 getCycles | ( | const CounterStateType & | before, |
const CounterStateType & | after | ||
) |
Computes the number core clock cycles when signal on a specific core is running (not halted)
Returns number of used cycles (halted cyles are not counted). The counter does not advance in the following conditions:
The performance counter for this event counts across performance state transitions using different core clock frequencies
before | CPU counter state before the experiment |
after | CPU counter state after the experiment |
uint64 getCycles | ( | const CounterStateType & | now | ) |
Computes the number executed core clock cycles.
Returns number of used cycles (halted cyles are not counted).
now | Current CPU counter state |
double getCyclesLostDueL2CacheMisses | ( | const CounterStateType & | before, |
const CounterStateType & | after | ||
) |
Estimates how many core cycles were potentially lost due to missing L2 cache but still hitting L3 cache.
before | CPU counter state before the experiment |
after | CPU counter state after the experiment |
References PCM::getCPUModel(), and PCM::getInstance().
double getCyclesLostDueL3CacheMisses | ( | const CounterStateType & | before, |
const CounterStateType & | after | ||
) |
Estimates how many core cycles were potentially lost due to L3 cache misses.
before | CPU counter state before the experiment |
after | CPU counter state after the experiment |
References PCM::getCPUModel(), and PCM::getInstance().
uint64 getDRAMClocks | ( | uint32 | channel, |
const CounterStateType & | before, | ||
const CounterStateType & | after | ||
) |
Returns DRAM clock ticks.
channel | DRAM channel number |
before | CPU counter state before the experiment |
after | CPU counter state after the experiment |
uint64 getDRAMConsumedEnergy | ( | const CounterStateType & | before, |
const CounterStateType & | after | ||
) |
Returns energy consumed by DRAM (measured in internal units)
before | CPU counter state before the experiment |
after | CPU counter state after the experiment |
Referenced by getDRAMConsumedJoules().
double getDRAMConsumedJoules | ( | const CounterStateType & | before, |
const CounterStateType & | after | ||
) |
Returns Joules consumed by DRAM.
before | CPU counter state before the experiment |
after | CPU counter state after the experiment |
References PCM::getCPUModel(), getDRAMConsumedEnergy(), PCM::getInstance(), and PCM::getJoulesPerEnergyUnit().
double getExecUsage | ( | const CounterStateType & | before, |
const CounterStateType & | after | ||
) |
Computes average number of retired instructions per time intervall.
before | CPU counter state before the experiment |
after | CPU counter state after the experiment |
Referenced by getTotalExecUsage().
|
inline |
Get estimation of QPI data traffic per incoming QPI link.
Returns an estimation of number of data bytes transferred to a socket over Intel(r) Quick Path Interconnect
socketNr | socket identifier |
linkNr | linkNr |
before | System CPU counter state before the experiment |
after | System CPU counter state after the experiment |
Referenced by getAllIncomingQPILinkBytes(), getIncomingQPILinkUtilization(), and getSocketIncomingQPILinkBytes().
|
inline |
Return current value of the counter of QPI data traffic per incoming QPI link.
Returns the number of incoming data bytes to a socket over Intel(r) Quick Path Interconnect
socketNr | socket identifier |
linkNr | linkNr |
now | Current System CPU counter state |
|
inline |
Get data utilization of incoming QPI link (0..1)
Returns an estimation of utilization of QPI link by data traffic transferred to a socket over Intel(r) Quick Path Interconnect
socketNr | socket identifier |
linkNr | linkNr |
before | System CPU counter state before the experiment |
after | System CPU counter state after the experiment |
References getIncomingQPILinkBytes(), PCM::getInstance(), getInvariantTSC(), PCM::getNominalFrequency(), PCM::getNumCores(), and PCM::getQPILinkSpeed().
uint64 getInstructionsRetired | ( | const CounterStateType & | before, |
const CounterStateType & | after | ||
) |
Computes the number of retired instructions.
before | CPU counter state before the experiment |
after | CPU counter state after the experiment |
uint64 getInstructionsRetired | ( | const CounterStateType & | now | ) |
Computes the number of retired instructions.
now | Current CPU counter state |
uint64 getInvariantTSC | ( | const CounterStateType & | before, |
const CounterStateType & | after | ||
) |
Computes number of invariant time stamp counter ticks.
This counter counts irrespectively of C-, P- or T-states
before | CPU counter state before the experiment |
after | CPU counter state after the experiment |
Referenced by getCoreCStateResidency(), getIncomingQPILinkUtilization(), getOutgoingQPILinkBytes(), getOutgoingQPILinkUtilization(), getPackageCStateResidency(), and PCM::getTickCount().
uint64 getIORequestBytesFromMC | ( | const CounterStateType & | before, |
const CounterStateType & | after | ||
) |
Computes number of bytes of read/write requests from all IO sources.
before | CPU counter state before the experiment |
after | CPU counter state after the experiment |
double getIPC | ( | const CounterStateType & | before, |
const CounterStateType & | after | ||
) |
Computes average number of retired instructions per core cycle (IPC)
before | CPU counter state before the experiment |
after | CPU counter state after the experiment |
Referenced by getCoreIPC().
double getL2CacheHitRatio | ( | const CounterStateType & | before, |
const CounterStateType & | after | ||
) |
Computes L2 cache hit ratio.
before | CPU counter state before the experiment |
after | CPU counter state after the experiment |
References PCM::getCPUModel(), and PCM::getInstance().
uint64 getL2CacheHits | ( | const CounterStateType & | before, |
const CounterStateType & | after | ||
) |
Computes number of L2 cache hits.
before | CPU counter state before the experiment |
after | CPU counter state after the experiment |
References PCM::getCPUModel(), and PCM::getInstance().
uint64 getL2CacheMisses | ( | const CounterStateType & | before, |
const CounterStateType & | after | ||
) |
Computes number of L2 cache misses.
before | CPU counter state before the experiment |
after | CPU counter state after the experiment |
References PCM::getCPUModel(), and PCM::getInstance().
double getL3CacheHitRatio | ( | const CounterStateType & | before, |
const CounterStateType & | after | ||
) |
Computes L3 cache hit ratio.
before | CPU counter state before the experiment |
after | CPU counter state after the experiment |
References PCM::getCPUModel(), and PCM::getInstance().
uint64 getL3CacheHits | ( | const CounterStateType & | before, |
const CounterStateType & | after | ||
) |
Computes total number of L3 cache hits.
before | CPU counter state before the experiment |
after | CPU counter state after the experiment |
References PCM::getCPUModel(), PCM::getInstance(), getL3CacheHitsNoSnoop(), and getL3CacheHitsSnoop().
uint64 getL3CacheHitsNoSnoop | ( | const CounterStateType & | before, |
const CounterStateType & | after | ||
) |
Computes number of L3 cache hits where no snooping in sibling L2 caches had to be done.
before | CPU counter state before the experiment |
after | CPU counter state after the experiment |
References PCM::getCPUModel(), and PCM::getInstance().
Referenced by getL3CacheHits().
uint64 getL3CacheHitsSnoop | ( | const CounterStateType & | before, |
const CounterStateType & | after | ||
) |
Computes number of L3 cache hits where snooping in sibling L2 caches had to be done.
before | CPU counter state before the experiment |
after | CPU counter state after the experiment |
References PCM::getCPUModel(), and PCM::getInstance().
Referenced by getL3CacheHits().
uint64 getL3CacheMisses | ( | const CounterStateType & | before, |
const CounterStateType & | after | ||
) |
Computes number of L3 cache misses.
before | CPU counter state before the experiment |
after | CPU counter state after the experiment |
References PCM::getCPUModel(), and PCM::getInstance().
uint64 getL3CacheOccupancy | ( | const CounterStateType & | now | ) |
Computes L3 Cache Occupancy.
uint64 getMCCounter | ( | uint32 | channel, |
uint32 | counter, | ||
const CounterStateType & | before, | ||
const CounterStateType & | after | ||
) |
Direct read of memory controller PMU counter (counter meaning depends on the programming: power/performance/etc)
counter | counter number |
channel | channel number |
before | CPU counter state before the experiment |
after | CPU counter state after the experiment |
double getNormalizedQPIL0pTxCycles | ( | uint32 | port, |
const CounterStateType & | before, | ||
const CounterStateType & | after | ||
) |
Returns the ratio of QPI cycles in power saving half-lane mode.
port | QPI port number |
before | CPU counter state before the experiment |
after | CPU counter state after the experiment |
References getQPIClocks(), and getQPIL0pTxCycles().
double getNormalizedQPIL1Cycles | ( | uint32 | port, |
const CounterStateType & | before, | ||
const CounterStateType & | after | ||
) |
Returns the ratio of QPI cycles in power saving shutdown mode.
port | QPI port number |
before | CPU counter state before the experiment |
after | CPU counter state after the experiment |
References getQPIClocks(), and getQPIL1Cycles().
uint64 getNumberOfCustomEvents | ( | int32 | eventCounterNr, |
const CounterStateType & | before, | ||
const CounterStateType & | after | ||
) |
Returns the number of occured custom core events.
Read number of events programmed with the CUSTOM_CORE_EVENTS
eventCounterNr | Event/counter number (value from 0 to 3) |
before | CPU counter state before the experiment |
after | CPU counter state after the experiment |
|
inline |
Returns the raw count of PCIe events.
before | PCIe counter state before the experiment |
after | PCIe counter state after the experiment |
|
inline |
Get estimation of QPI (data+nondata) traffic per outgoing QPI link.
Returns an estimation of number of data bytes transferred from a socket over Intel(r) Quick Path Interconnect
socketNr | socket identifier |
linkNr | linkNr |
before | System CPU counter state before the experiment |
after | System CPU counter state after the experiment |
References PCM::getInstance(), getInvariantTSC(), PCM::getNominalFrequency(), PCM::getNumCores(), getOutgoingQPILinkUtilization(), and PCM::getQPILinkSpeed().
Referenced by getAllOutgoingQPILinkBytes().
|
inline |
Get utilization of outgoing QPI link (0..1)
Returns an estimation of utilization of QPI link by (data+nondata) traffic transferred from a socket over Intel(r) Quick Path Interconnect
socketNr | socket identifier |
linkNr | linkNr |
before | System CPU counter state before the experiment |
after | System CPU counter state after the experiment |
References PCM::getInstance(), getInvariantTSC(), PCM::getNominalFrequency(), PCM::getNumCores(), and PCM::getQPILinkSpeed().
Referenced by getOutgoingQPILinkBytes().
|
inline |
Computes residency in the package C-state.
state | C-state |
before | CPU counter state before the experiment |
after | CPU counter state after the experiment |
References getInvariantTSC().
uint64 getPCUClocks | ( | const CounterStateType & | before, |
const CounterStateType & | after | ||
) |
Returns clock ticks of power control unit.
before | CPU counter state before the experiment |
after | CPU counter state after the experiment |
References getPCUCounter().
uint64 getPCUCounter | ( | uint32 | counter, |
const CounterStateType & | before, | ||
const CounterStateType & | after | ||
) |
Direct read of power control unit PMU counter (counter meaning depends on the programming: power/performance/etc)
counter | counter number |
before | CPU counter state before the experiment |
after | CPU counter state after the experiment |
Referenced by getPCUClocks().
uint64 getQPIClocks | ( | uint32 | port, |
const CounterStateType & | before, | ||
const CounterStateType & | after | ||
) |
Returns QPI LL clock ticks.
port | QPI port number |
before | CPU counter state before the experiment |
after | CPU counter state after the experiment |
Referenced by getNormalizedQPIL0pTxCycles(), and getNormalizedQPIL1Cycles().
uint64 getQPIL0pTxCycles | ( | uint32 | port, |
const CounterStateType & | before, | ||
const CounterStateType & | after | ||
) |
Returns the number of QPI cycles in power saving half-lane mode.
port | QPI port number |
before | CPU counter state before the experiment |
after | CPU counter state after the experiment |
Referenced by getNormalizedQPIL0pTxCycles().
uint64 getQPIL1Cycles | ( | uint32 | port, |
const CounterStateType & | before, | ||
const CounterStateType & | after | ||
) |
Returns the number of QPI cycles in power saving shutdown mode.
port | QPI port number |
before | CPU counter state before the experiment |
after | CPU counter state after the experiment |
Referenced by getNormalizedQPIL1Cycles().
|
inline |
Get QPI data to Memory Controller traffic ratio.
Ideally for NUMA-optmized programs the ratio should be close to 0.
before | System CPU counter state before the experiment |
after | System CPU counter state after the experiment |
References getAllIncomingQPILinkBytes(), getBytesReadFromMC(), and getBytesWrittenToMC().
uint64 getRefCycles | ( | const CounterStateType & | before, |
const CounterStateType & | after | ||
) |
Computes the number of reference clock cycles while clock signal on the core is running.
The reference clock operates at a fixed frequency, irrespective of core frequency changes due to performance state transitions. See Intel(r) Software Developer's Manual for more details
before | CPU counter state before the experiment |
after | CPU counter state after the experiment |
Referenced by getCoreCStateResidency().
double getRelativeFrequency | ( | const CounterStateType & | before, |
const CounterStateType & | after | ||
) |
Computes average core frequency also taking Intel Turbo Boost technology into account.
before | CPU counter state before the experiment |
after | CPU counter state after the experiment |
INTELPCM_API SocketCounterState getSocketCounterState | ( | uint32 | socket | ) |
Reads the counter state of a socket.
Helper function. Uses PCM object to access counters.
socket | socket id |
References PCM::getInstance(), and PCM::getSocketCounterState().
|
inline |
Get estimation of total QPI data traffic for this socket.
Returns an estimation of number of bytes transferred to this sockets over all Intel(r) Quick Path Interconnect links on this socket
before | System CPU counter state before the experiment |
after | System CPU counter state after the experiment |
References getIncomingQPILinkBytes(), PCM::getInstance(), and PCM::getQPILinksPerSocket().
Referenced by getAllIncomingQPILinkBytes().
INTELPCM_API SystemCounterState getSystemCounterState | ( | ) |
Reads the counter state of the system.
Helper function. Uses PCM object to access counters.
System consists of several sockets (CPUs). Socket has a CPU in it. Socket (CPU) consists of several (logical) cores.
References PCM::getInstance(), and PCM::getSystemCounterState().
|
inline |
Computes average number of retired instructions per time intervall for the entire system combining instruction counts from logical cores to corresponding physical cores.
Use this metric to evaluate cores utilization improvement between SMT(Hyperthreading) on and SMT off.
before | CPU counter state before the experiment |
after | CPU counter state after the experiment |
References getExecUsage(), PCM::getInstance(), PCM::getNumCores(), PCM::getNumOnlineCores(), and PCM::getThreadsPerCore().