From cb0e9f39782dee561bbbb582de58a4a1e403ecdc Mon Sep 17 00:00:00 2001 From: "Ten, Nadezhda" Date: Fri, 2 Jun 2023 16:43:47 -0500 Subject: [PATCH] Prepare for 1.2.11 release --- README.md | 4 +- _version.txt | 2 +- events/bdx.txt | 178 +++++----- events/clx_skx.txt | 248 ++++++++------ events/icx.txt | 4 +- events/metric_bdx.json | 663 +++++++++++++++++++------------------ events/metric_skx_clx.json | 57 ++-- events/spr.txt | 4 +- perf-collect.py | 108 +++--- perf-postprocess.py | 90 +++-- src/perf_helpers.py | 111 ++++--- src/prepare_perf_events.py | 7 +- 12 files changed, 769 insertions(+), 707 deletions(-) diff --git a/README.md b/README.md index ad10f85..06ac41b 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ perf-collect: Collects harware events - `sudo ./perf-collect --socket` - `sudo ./perf-collect --thread` - `sudo ./perf-collect --pid ` - - `sudo ./perf-collect --cid ;` + - `sudo ./perf-collect --cid` _by default, selects the 5 containers using the most CPU at start of perf-collect. To monitor specific containers provide up to 5 comma separated cids i.e. ,_ - Duration: - `sudo ./perf-collect` _default run until terminated_ - `sudo ./perf-collect --timeout 10` _run for 10 seconds_ @@ -37,7 +37,7 @@ perf-collect outputs: perf-postprocess outputs: 1. `metric_out.sys.average.csv`: average metrics -2. `metric_out.sys.csv`: metric values at every interval +2. `metric_out.sys.csv`: metric values at every 5 second interval 3. `metric_out.html`: html view of a few select metrics ![basic_stats](https://raw.githubusercontent.com/wiki/intel/PerfSpect/newhtml.gif) diff --git a/_version.txt b/_version.txt index 963ed7c..c114700 100644 --- a/_version.txt +++ b/_version.txt @@ -1 +1 @@ -1.2.10 +1.2.11 diff --git a/events/bdx.txt b/events/bdx.txt index 756a58f..443ac1a 100644 --- a/events/bdx.txt +++ b/events/bdx.txt @@ -5,171 +5,163 @@ # Broadwell event list (default, with extensive TMA collection) -cpu/event=0x51,umask=0x01,period=2000003,name='L1D.REPLACEMENT'/, -cpu/event=0x24,umask=0xe4,period=200003,name='L2_RQSTS.ALL_CODE_RD'/, -cpu/event=0xf1,umask=0x07,period=100003,name='L2_LINES_IN.ALL'/, -cpu/event=0x24,umask=0x24,period=200003,name='L2_RQSTS.CODE_RD_MISS'/, +cpu/event=0xc2,umask=0x02,period=2000003,name='UOPS_RETIRED.RETIRE_SLOTS'/, +cpu/event=0xc5,umask=0x00,name='BR_MISP_RETIRED.ALL_BRANCHES'/, +cpu/event=0xc3,umask=0x01,name='MACHINE_CLEARS.COUNT'/, +cpu/event=0x0e,umask=0x01,period=2000003,name='UOPS_ISSUED.ANY'/, cpu-cycles, ref-cycles, instructions; -cpu/event=0xd1,umask=0x01,period=2000003,name='MEM_LOAD_RETIRED.L1_HIT'/, -cpu/event=0xd1,umask=0x02,period=100003,name='MEM_LOAD_UOPS_RETIRED.L2_HIT'/, -cpu/event=0xd1,umask=0x10,period=50021,name='MEM_LOAD_UOPS_RETIRED.L2_MISS'/, cpu/event=0x3c,umask=0x0,period=2000003,name='CPU_CLK_UNHALTED.THREAD_ANY'/, +cpu/event=0xe6,umask=0x1f,name='BACLEARS.ANY'/, +cpu/event=0x0d,umask=0x03,cmask=1,period=2000003,name='INT_MISC.RECOVERY_CYCLES_ANY'/, +cpu/event=0x0d,umask=0x03,cmask=1,period=2000003,name='INT_MISC.RECOVERY_CYCLES'/, cpu-cycles, ref-cycles, instructions; -cpu/event=0x85,umask=0x0e,period=100003,name='ITLB_MISSES.WALK_COMPLETED'/, -cpu/event=0x85,umask=0x04,period=100003,name='ITLB_MISSES.WALK_COMPLETED_2M_4M'/, -cpu/event=0x85,umask=0x10,cmask=1,period=100003,name='ITLB_MISSES.WALK_DURATION_c1'/, -cpu/event=0x85,umask=0x60,period=100003,name='ITLB_MISSES.STLB_HIT'/, -cpu-cycles, -ref-cycles, -instructions; - -cpu/event=0xb1,umask=0x01,cmask=3,name='UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC'/, -cpu/event=0xb1,umask=0x01,cmask=2,name='UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC'/, -cpu/event=0xb1,umask=0x01,cmask=1,name='UOPS_EXECUTED.CYCLES_GE_1_UOPS_EXEC'/, +cpu/event=0x9c,umask=0x01,period=2000003,name='IDQ_UOPS_NOT_DELIVERED.CORE'/, +cpu/event=0xa3,umask=0x06,cmask=6,name='CYCLE_ACTIVITY.STALLS_MEM_ANY'/, +cpu/event=0xa3,umask=0x04,cmask=4,name='CYCLE_ACTIVITY.STALLS_TOTAL'/, cpu/event=0xa2,umask=0x08,name='RESOURCE_STALLS.SB'/, cpu-cycles, +ref-cycles, instructions; -cpu/event=0xa3,umask=0x04,cmask=4,name='CYCLE_ACTIVITY.STALLS_TOTAL'/, -cpu/event=0xa3,umask=0x06,cmask=6,name='CYCLE_ACTIVITY.STALLS_MEM_ANY'/, -#STALLS_L1D_MISS is supported on pmc2 only -cpu/event=0xa3,umask=0x0c,cmask=0x0c,name='CYCLE_ACTIVITY.STALLS_L1D_MISS'/, -cpu/event=0xa3,umask=0x05,cmask=5,name='CYCLE_ACTIVITY.STALLS_L2_MISS'/, +cpu/event=0xb1,umask=0x01,cmask=1,period=2000003,name='UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC'/, +cpu/event=0xb1,umask=0x01,cmask=2,period=2000003,name='UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC'/, +cpu/event=0xb1,umask=0x01,cmask=3,period=2000003,name='UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC'/, +cpu/event=0x5e,umask=0x01,name='RS_EVENTS.EMPTY_CYCLES'/, cpu-cycles, +ref-cycles, instructions; -cpu/event=0x08,umask=0x10,cmask=1,name='DTLB_LOAD_MISSES.WALK_DURATION_c1'/, -cpu/event=0x08,umask=0x60,name='DTLB_LOAD_MISSES.STLB_HIT'/, -cpu/event=0x49,umask=0x10,cmask=1,name='DTLB_STORE_MISSES.WALK_DURATION_c1'/, -cpu/event=0x03,umask=0x02,name='LD_BLOCKS.STORE_FORWARD'/, +cpu/event=0x9c,umask=0x01,cmask=0x4,period=2000003,name='IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE'/, +cpu/event=0x08,umask=0x0e,period=100003,name='DTLB_LOAD_MISSES.WALK_COMPLETED'/, +cpu/event=0x08,umask=0x10,period=100003,name='DTLB_LOAD_MISSES.WALK_DURATION'/, +cpu/event=0x08,umask=0x04,period=2000003,name='DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M'/, cpu-cycles, +ref-cycles, instructions; cpu/event=0xd1,umask=0x04,name='MEM_LOAD_UOPS_RETIRED.L3_HIT'/, cpu/event=0xd1,umask=0x20,name='MEM_LOAD_UOPS_RETIRED.L3_MISS'/, -cpu/event=0xd1,umask=0x40,name='MEM_LOAD_UOPS_RETIRED.HIT_LFB'/, -cpu/event=0x80,umask=0x04,period=200003,name='ICACHE.IFDATA_STALL'/, +cpu/event=0xa3,umask=0x05,cmask=5,name='CYCLE_ACTIVITY.STALLS_L2_MISS'/, +cpu/event=0xa3,umask=0x0c,cmask=0x0c,name='CYCLE_ACTIVITY.STALLS_L1D_MISS'/, cpu-cycles, +ref-cycles, instructions; cpu/event=0xd2,umask=0x02,name='MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT'/, +cpu/event=0xd1,umask=0x40,name='MEM_LOAD_UOPS_RETIRED.HIT_LFB'/, +cpu/event=0xd1,umask=0x02,period=100003,name='MEM_LOAD_UOPS_RETIRED.L2_HIT'/, cpu/event=0xd2,umask=0x04,name='MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM'/, -cpu/event=0xd2,umask=0x01,name='MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS'/, -cpu/event=0x79,umask=0x30,name='IDQ.MS_UOPS'/, cpu-cycles, +ref-cycles, instructions; -cpu/event=0xd3,umask=0x20,name='MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD'/, cpu/event=0xd3,umask=0x01,name='MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM'/, -cpu/event=0xd3,umask=0x04,name='MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM'/, -cpu/event=0xd3,umask=0x10,name='MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM'/, +cpu/event=0xd1,umask=0x01,period=2000003,name='MEM_LOAD_UOPS_RETIRED.L1_HIT'/, +cpu/event=0xd1,umask=0x10,period=50021,name='MEM_LOAD_UOPS_RETIRED.L2_MISS'/, cpu-cycles, +ref-cycles, instructions; -cpu/event=0xb2,umask=0x01,name='OFFCORE_REQUESTS_BUFFER.SQ_FULL'/, -cpu/event=0x60,umask=0x08,cmask=4,name='OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD_c4'/, -cpu/event=0x60,umask=0x08,cmask=1,name='OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD'/, +cpu/event=0x51,umask=0x01,period=2000003,name='L1D.REPLACEMENT'/, +cpu/event=0x24,umask=0xe4,period=200003,name='L2_RQSTS.ALL_CODE_RD'/, +cpu/event=0xf1,umask=0x07,period=100003,name='L2_LINES_IN.ALL'/, +cpu/event=0x24,umask=0x24,period=200003,name='L2_RQSTS.CODE_RD_MISS'/, cpu-cycles, +ref-cycles, instructions; -cpu/event=0xb1,umask=0x02,cmask=1,name='UOPS_EXECUTED.CORE_c1'/, -cpu/event=0xb1,umask=0x02,cmask=2,name='UOPS_EXECUTED.CORE_c2'/, -cpu/event=0xb1,umask=0x02,cmask=3,name='UOPS_EXECUTED.CORE_c3'/, -cpu/event=0xb1,umask=0x02,cmask=1,inv=1,name='UOPS_EXECUTED.CORE_i1_c1'/, +cpu/event=0x85,umask=0x0e,period=100003,name='ITLB_MISSES.WALK_COMPLETED'/, +cpu/event=0x85,umask=0x04,period=100003,name='ITLB_MISSES.WALK_COMPLETED_2M_4M'/, +cpu/event=0x85,umask=0x10,cmask=0x01,period=100003,name='ITLB_MISSES.WALK_DURATION:c1'/, +cpu/event=0x85,umask=0x60,period=100003,name='ITLB_MISSES.STLB_HIT'/, cpu-cycles, +ref-cycles, instructions; -cpu/event=0x5e,umask=0x01,name='RS_EVENTS.EMPTY_CYCLES'/, -cpu/event=0x5e,umask=0x01,cmask=1,inv=1,name='RS_EVENTS.EMPTY_END'/, -cpu/event=0xab,umask=0x02,name='DSB2MITE_SWITCHES.PENALTY_CYCLES'/, -cpu/event=0x79,umask=0x30,name='IDQ.MS_SWITCHES'/, +cpu/event=0x08,umask=0x10,cmask=1,name='DTLB_LOAD_MISSES.WALK_DURATION:c1'/, +cpu/event=0x08,umask=0x60,name='DTLB_LOAD_MISSES.STLB_HIT'/, +cpu/event=0x49,umask=0x10,cmask=1,name='DTLB_STORE_MISSES.WALK_DURATION:c1'/, +cpu/event=0x80,umask=0x04,period=200003,name='ICACHE.IFDATA_STALL'/, cpu-cycles, +ref-cycles, instructions; -cpu/event=0x14,umask=0x01,period=2000003,name='ARITH.FPU_DIV_ACTIVE'/, -cpu/event=0xc5,umask=0x00,name='BR_MISP_RETIRED.ALL_BRANCHES'/, -cpu/event=0xc3,umask=0x01,name='MACHINE_CLEARS.COUNT'/, -cpu/event=0xe6,umask=0x1f,name='BACLEARS.ANY'/, +cpu/event=0xd2,umask=0x01,name='MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS'/, +cpu/event=0x79,umask=0x30,name='IDQ.MS_UOPS'/, +cpu/event=0x60,umask=0x08,cmask=4,name='OFFCORE_REQUESTS_OUTSTANDING.DATA_RD:c4'/, +cpu/event=0x60,umask=0x08,cmask=1,name='OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD'/, cpu-cycles, +ref-cycles, instructions; -cpu/event=0x9c,umask=0x01,period=2000003,name='IDQ_UOPS_NOT_DELIVERED.CORE'/, -cpu/event=0x9c,umask=0x01,cmask=0x4,period=2000003,name='IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE'/, +cpu/event=0x79,umask=0x24,cmask=0x01,period=2000003,name='IDQ.ALL_MITE_CYCLES_ANY_UOPS'/, +cpu/event=0x79,umask=0x24,cmask=0x04,period=2000003,name='IDQ.ALL_MITE_CYCLES_4_UOPS'/, +cpu/event=0x79,umask=0x18,cmask=0x01,period=2000003,name='IDQ.ALL_DSB_CYCLES_ANY_UOPS'/, +cpu/event=0x79,umask=0x18,cmask=0x04,period=2000003,name='IDQ.ALL_DSB_CYCLES_4_UOPS'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0xc0,umask=0x02,period=2000003,name='INST_RETIRED.X87'/, +cpu/event=0xc7,umask=0x03,period=2000003,name='FP_ARITH_INST_RETIRED.SCALAR_SINGLE:u0x03'/, +cpu/event=0xc7,umask=0x3c,period=2000003,name='FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE:u0x3c'/, +cpu/event=0xd3,umask=0x10,name='MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0xd3,umask=0x20,name='MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD'/, +cpu/event=0xd3,umask=0x04,name='MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM'/, cpu/event=0x49,umask=0x0e,period=100003,name='DTLB_STORE_MISSES.WALK_COMPLETED'/, cpu/event=0x49,umask=0x10,period=100003,name='DTLB_STORE_MISSES.WALK_DURATION'/, cpu-cycles, ref-cycles, instructions; -cpu/event=0x49,umask=0x60,period=100003,name='DTLB_STORE_MISSES.STLB_HIT'/, -cpu/event=0x08,umask=0x0e,period=100003,name='DTLB_LOAD_MISSES.WALK_COMPLETED'/, -cpu/event=0x08,umask=0x10,period=100003,name='DTLB_LOAD_MISSES.WALK_DURATION'/, -cpu/event=0x08,umask=0x04,period=2000003,name='DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M'/, +cpu/event=0xb1,umask=0x02,cmask=1,name='UOPS_EXECUTED.CORE_c1'/, +cpu/event=0xb1,umask=0x02,cmask=2,name='UOPS_EXECUTED.CORE_c2'/, +cpu/event=0xb1,umask=0x02,cmask=3,name='UOPS_EXECUTED.CORE_c3'/, +cpu/event=0xb1,umask=0x02,cmask=1,inv=1,name='UOPS_EXECUTED.CORE_i1_c1'/, +cpu-cycles:k, ref-cycles:k, -instructions:k, -cpu-cycles:k; +instructions:k; #C6 cstate_core/c6-residency/; cstate_pkg/c6-residency/; -#uops delivered from different units -cpu/event=0x0e,umask=0x01,period=2000003,name='UOPS_ISSUED.ANY'/, -cpu/event=0xc2,umask=0x02,period=2000003,name='UOPS_RETIRED.RETIRE_SLOTS'/, -cpu/event=0x0d,umask=0x03,cmask=1,period=2000003,name='INT_MISC.RECOVERY_CYCLES_ANY'/, -cpu-cycles, -ref-cycles, -instructions; - -cpu/event=0x3c,umask=0x2,name='CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE'/, -cpu/event=0x3c,umask=0x1,name='CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY'/; - #offcore response cpu/event=0xb7,umask=0x01,offcore_rsp=0x103FC007F7,name='OCR.ALL_READS.L3_MISS.REMOTE_HITM'/, cpu/event=0xb7,umask=0x01,offcore_rsp=0x083FC007F7,name='OCR.ALL_READS.L3_MISS.REMOTE_HIT_FORWARD'/; #LLC read types -cbox/event=0x35,umask=0x3,filter_opc=0x181,name='UNC_C_TOR_INSERTS.MISS_OPCODE.0x181'/, -cbox/event=0x0,umask=0x0,name='UNC_C_CLOCKTICKS'/; - cbox/event=0x35,umask=0x3,filter_opc=0x180,name='UNC_C_TOR_INSERTS.MISS_OPCODE.0x180'/; +cbox/event=0x35,umask=0x3,filter_opc=0x181,name='UNC_C_TOR_INSERTS.MISS_OPCODE.0x181'/; +cbox/event=0x35,umask=0x3,filter_opc=0x182,name='UNC_C_TOR_INSERTS.MISS_OPCODE.0x182'/; cbox/event=0x35,umask=0x3,filter_opc=0x190,name='UNC_C_TOR_INSERTS.MISS_OPCODE.0x190'/; - -#LLC demand+prefech read/latency -cbox/event=0x35,umask=0x3,filter_opc=0x182,name='UNC_C_TOR_INSERTS.MISS_OPCODE.0x182'/, +cbox/event=0x35,umask=0x3,filter_opc=0x191,name='UNC_C_TOR_INSERTS.MISS_OPCODE.0x191'/, +cbox/event=0x35,umask=0x3,filter_opc=0x192,name='UNC_C_TOR_INSERTS.MISS_OPCODE.0x192'/; +cbox/event=0x35,umask=0x3,filter_opc=0x180,tid_en=1,filter_tid=0x3e,name='UNC_C_TOR_INSERTS.MISS_OPCODE.tid.0x180'/; cbox/event=0x36,umask=0x3,filter_opc=0x182,name='UNC_C_TOR_OCCUPANCY.MISS_OPCODE.0x182'/; - -cbox/event=0x35,umask=0x23,filter_opc=0x182,name='UNC_C_TOR_INSERTS.MISS_LOCAL_OPCODE.0x182'/, cbox/event=0x36,umask=0x23,filter_opc=0x182,name='UNC_C_TOR_OCCUPANCY.MISS_LOCAL_OPCODE.0x182'/; - -cbox/event=0x35,umask=0x83,filter_opc=0x182,name='UNC_C_TOR_INSERTS.MISS_REMOTE_OPCODE.0x182'/, cbox/event=0x36,umask=0x83,filter_opc=0x182,name='UNC_C_TOR_OCCUPANCY.MISS_REMOTE_OPCODE.0x182'/; - -cbox/event=0x35,umask=0x3,filter_opc=0x191,name='UNC_C_TOR_INSERTS.MISS_OPCODE.0x191'/; -cbox/event=0x35,umask=0x3,filter_opc=0x192,name='UNC_C_TOR_INSERTS.MISS_OPCODE.0x192'/; - -#IO reads/writes -cbox/event=0x35,umask=0x1,filter_opc=0x19e,name='UNC_C_TOR_INSERTS.OPCODE.0x19e'/; -cbox/event=0x35,umask=0x1,filter_opc=0x1c8,tid_en=1,filter_tid=0x3e,name='UNC_C_TOR_INSERTS.OPCODE.0x1c8'/; - -cbox/event=0x35,umask=0x1,filter_opc=0x180,tid_en=1,filter_tid=0x3e,name='UNC_C_TOR_INSERTS.OPCODE.0x180'/, -cbox/event=0x35,umask=0x3,filter_opc=0x180,tid_en=1,filter_tid=0x3e,name='UNC_C_TOR_INSERTS.MISS_OPCODE.tid.0x180'/; +cbox/event=0x35,umask=0x83,filter_opc=0x182,name='UNC_C_TOR_INSERTS.MISS_REMOTE_OPCODE.0x182'/; +cbox/event=0x35,umask=0x23,filter_opc=0x182,name='UNC_C_TOR_INSERTS.MISS_LOCAL_OPCODE.0x182'/; +cbox/event=0x35,umask=0x01,filter_opc=0x19e,name='UNC_C_TOR_INSERTS.OPCODE.0x19e'/; +cbox/event=0x35,umask=0x1,filter_opc=0x180,tid_en=1,filter_tid=0x3e,name='UNC_C_TOR_INSERTS.OPCODE.0x180.tid.0x3e'/; +cbox/event=0x35,umask=0x1,filter_opc=0x1c8,tid_en=1,filter_tid=0x3e,name='UNC_C_TOR_INSERTS.OPCODE.0x1c8.tid.0x3e'/; +cbox/event=0x0,umask=0x0,name='UNC_C_CLOCKTICKS'/; #memory read/writes imc/event=0x04,umask=0x03,name='UNC_M_CAS_COUNT.RD'/, imc/event=0x04,umask=0x0c,name='UNC_M_CAS_COUNT.WR'/; -#QPI -qpi/event=0x14,umask=0x0,name='UNC_Q_CLOCKTICKS'/, -qpi/event=0x0,umask=0x2,name='UNC_Q_TxL_FLITS_G0.DATA'/, -qpi/event=0x0,umask=0x4,name='UNC_Q_TxL_FLITS_G0.NON_DATA'/; - #power related power/energy-pkg/, power/energy-ram/; diff --git a/events/clx_skx.txt b/events/clx_skx.txt index b4d8bee..96093dd 100644 --- a/events/clx_skx.txt +++ b/events/clx_skx.txt @@ -5,166 +5,221 @@ # Cascadelake event list (default, with extensive TMA collection) +#avx related power levels +cpu/event=0x28,umask=0x07,period=200003,name='CORE_POWER.LVL0_TURBO_LICENSE'/, +cpu/event=0x28,umask=0x18,period=200003,name='CORE_POWER.LVL1_TURBO_LICENSE'/, +cpu/event=0x28,umask=0x20,period=200003,name='CORE_POWER.LVL2_TURBO_LICENSE'/, cpu/event=0x51,umask=0x01,period=2000003,name='L1D.REPLACEMENT'/, -cpu/event=0x24,umask=0xe4,period=200003,name='L2_RQSTS.ALL_CODE_RD'/, -cpu/event=0xd1,umask=0x01,period=2000003,name='MEM_LOAD_RETIRED.L1_HIT'/, -cpu/event=0xd1,umask=0x02,period=100003,name='MEM_LOAD_RETIRED.L2_HIT'/, cpu-cycles, ref-cycles, instructions; - -cpu/event=0x85,umask=0x04,period=100003,name='ITLB_MISSES.WALK_COMPLETED_2M_4M'/, -cpu/event=0xf1,umask=0x1f,period=100003,name='L2_LINES_IN.ALL'/, -cpu/event=0xd1,umask=0x10,period=50021,name='MEM_LOAD_RETIRED.L2_MISS'/, -cpu/event=0x24,umask=0x24,period=200003,name='L2_RQSTS.CODE_RD_MISS'/, +cpu/event=0xc3,umask=0x01,edge,period=100003,name='MACHINE_CLEARS.COUNT'/, +cpu/event=0xc5,umask=0x00,period=400009,name='BR_MISP_RETIRED.ALL_BRANCHES'/, +cpu/event=0x0d,umask=0x80,period=2000003,name='INT_MISC.CLEAR_RESTEER_CYCLES'/, +cpu/event=0xd1,umask=0x01,period=2000003,name='MEM_LOAD_RETIRED.L1_HIT'/, cpu-cycles, ref-cycles, instructions; -cpu/event=0x08,umask=0x0e,period=100003,name='DTLB_LOAD_MISSES.WALK_COMPLETED'/, -cpu/event=0x08,umask=0x04,period=2000003,name='DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M'/, -cpu/event=0x08,umask=0x02,period=2000003,name='DTLB_LOAD_MISSES.WALK_COMPLETED_4K'/, -cpu/event=0x08,umask=0x08,period=2000003,name='DTLB_LOAD_MISSES.WALK_COMPLETED_1G'/, -cpu-cycles:k, -ref-cycles:k, -instructions:k; +cpu/event=0x0e,umask=0x01,period=2000003,name='UOPS_ISSUED.ANY'/, +cpu/event=0xc2,umask=0x02,period=2000003,name='UOPS_RETIRED.RETIRE_SLOTS'/, +cpu/event=0x0d,umask=0x01,period=2000003,name='INT_MISC.RECOVERY_CYCLES_ANY'/, +cpu/event=0x0d,umask=0x01,period=2000003,name='INT_MISC.RECOVERY_CYCLES'/; +cpu-cycles, +ref-cycles, +instructions; -#C6 -cstate_core/c6-residency/; -cstate_pkg/c6-residency/; +cpu/event=0x3c,umask=0x0,period=2000003,name='CPU_CLK_UNHALTED.THREAD_ANY'/, +cpu/event=0x9c,umask=0x01,period=2000003,name='IDQ_UOPS_NOT_DELIVERED.CORE'/, +cpu/event=0xa3,umask=0x14,cmask=0x14,period=2000003,name='CYCLE_ACTIVITY.STALLS_MEM_ANY'/, +cpu/event=0xa3,umask=0x04,cmask=0x04,period=2000003,name='CYCLE_ACTIVITY.STALLS_TOTAL'/, +cpu-cycles, +ref-cycles, +instructions; -cpu/event=0xb0,umask=0x10,period=100003,name='OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD'/, +cpu/event=0xa6,umask=0x02,period=2000003,name='EXE_ACTIVITY.1_PORTS_UTIL'/, +cpu/event=0xa6,umask=0x04,period=2000003,name='EXE_ACTIVITY.2_PORTS_UTIL'/, +cpu/event=0xa6,umask=0x40,period=2000003,name='EXE_ACTIVITY.BOUND_ON_STORES'/, +cpu/event=0xa6,umask=0x01,period=2000003,name='EXE_ACTIVITY.EXE_BOUND_0_PORTS'/, cpu-cycles, ref-cycles, instructions; +cpu/event=0xd1,umask=0x02,period=100003,name='MEM_LOAD_RETIRED.L2_HIT'/, cpu/event=0xd1,umask=0x40,period=100007,name='MEM_LOAD_RETIRED.FB_HIT'/, cpu/event=0xd1,umask=0x08,period=100003,name='MEM_LOAD_RETIRED.L1_MISS'/, cpu/event=0x48,umask=0x02,cmask=0x01,period=2000003,name='L1D_PEND_MISS.FB_FULL:c1'/, -cpu/event=0xa3,umask=0x04,cmask=0x04,period=2000003,name='CYCLE_ACTIVITY.STALLS_TOTAL'/, cpu-cycles, ref-cycles, instructions; -cpu/event=0xa3,umask=0x10,cmask=0x16,period=2000003,name='CYCLE_ACTIVITY.CYCLES_MEM_ANY'/, -cpu/event=0xa3,umask=0x08,cmask=0x08,period=2000003,name='CYCLE_ACTIVITY.CYCLES_L1D_MISS'/, +cpu/event=0xa3,umask=0x0c,cmask=0x0c,period=2000003,name='CYCLE_ACTIVITY.STALLS_L1D_MISS'/, +cpu/event=0xa3,umask=0x05,cmask=0x05,period=2000003,name='CYCLE_ACTIVITY.STALLS_L2_MISS'/, +cpu/event=0xa3,umask=0x06,cmask=0x06,period=2000003,name='CYCLE_ACTIVITY.STALLS_L3_MISS'/, +cpu/event=0x60,umask=0x04,cmask=0x01,period=200003,name='OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO'/, cpu-cycles, ref-cycles, instructions; +cpu/event=0x85,umask=0x0e,period=100003,name='ITLB_MISSES.WALK_COMPLETED'/, +cpu/event=0x85,umask=0x10,period=100003,name='ITLB_MISSES.WALK_ACTIVE'/, +cpu/event=0x08,umask=0x0e,period=100003,name='DTLB_LOAD_MISSES.WALK_COMPLETED'/, +cpu/event=0x08,umask=0x10,cmask=0x01,period=100003,name='DTLB_LOAD_MISSES.WALK_ACTIVE'/, +cpu-cycles, +ref-cycles, +instructions; -#avx related power levels -cpu/event=0x28,umask=0x07,period=200003,name='CORE_POWER.LVL0_TURBO_LICENSE'/, -cpu/event=0x28,umask=0x18,period=200003,name='CORE_POWER.LVL1_TURBO_LICENSE'/, -cpu/event=0x28,umask=0x20,period=200003,name='CORE_POWER.LVL2_TURBO_LICENSE'/, -cpu/event=0x0e,umask=0x01,period=2000003,name='UOPS_ISSUED.ANY'/; +cpu/event=0x49,umask=0x0e,period=100003,name='DTLB_STORE_MISSES.WALK_COMPLETED'/, +cpu/event=0x49,umask=0x10,period=100003,name='DTLB_STORE_MISSES.WALK_ACTIVE'/, +cpu/event=0x9c,umask=0x01,cmask=0x4,period=2000003,name='IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE'/, +cpu/event=0xe6,umask=0x01,period=100003,name='BACLEARS.ANY'/, +cpu-cycles, +ref-cycles, +instructions; -cpu/event=0x3c,umask=0x0,period=2000003,name='CPU_CLK_UNHALTED.THREAD_ANY'/, -cpu/event=0x9c,umask=0x01,period=2000003,name='IDQ_UOPS_NOT_DELIVERED.CORE'/, -cpu/event=0xc2,umask=0x02,period=2000003,name='UOPS_RETIRED.RETIRE_SLOTS'/; -#INT_MISC.RECOVERY_CYCLES_ANY -cpu/event=0x0d,umask=0x01,period=2000003,name='INT_MISC.RECOVERY_CYCLES_ANY'/, -cpu/event=0x0d,umask=0x01,period=2000003,name='INT_MISC.RECOVERY_CYCLES'/; +cpu/event=0xd0,umask=0x21,cmask=0x01,period=100007,name='MEM_INST_RETIRED.LOCK_LOADS'/, +cpu/event=0x24,umask=0xe2,cmask=0x00,period=200003,name='L2_RQSTS.ALL_RFO'/, +cpu/event=0xd0,umask=0x82,cmask=0x00,period=200003,name='MEM_INST_RETIRED.ALL_STORES'/, +cpu/event=0x24,umask=0xc2,cmask=0x00,period=200003,name='L2_RQSTS.RFO_HIT'/, +cpu-cycles, +ref-cycles, +instructions; -cpu/event=0x79,umask=0x30,period=2000003,name='IDQ.MS_UOPS'/, -cpu/event=0x60,umask=0x10,period=2000003,name='OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD'/, -cpu/event=0x83,umask=0x04,period=200003,name='ICACHE_64B.IFTAG_STALL'/, -cpu/event=0x9c,umask=0x01,cmask=0x4,period=2000003,name='IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE'/, +cpu/event=0xb7,umask=0x01,offcore_rsp=0x10003C0001,period=100003,name='OCR.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE'/, +cpu/event=0xb7,umask=0x01,offcore_rsp=0x8003C0001,period=100003,name='OCR.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD'/, +cpu/event=0x60,umask=0x10,cmask=0x06,period=2000003,name='OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6'/, +cpu/event=0x60,umask=0x10,cmask=0x01,period=2000003,name='OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD'/, cpu-cycles, -ref-cycles; +ref-cycles, +instructions; -cpu/event=0x0d,umask=0x80,period=2000003,name='INT_MISC.CLEAR_RESTEER_CYCLES'/, -cpu/event=0xe6,umask=0x01,period=100003,name='BACLEARS.ANY'/, -cpu/event=0xc3,umask=0x01,edge,period=100003,name='MACHINE_CLEARS.COUNT'/, -cpu/event=0xc5,umask=0x00,period=400009,name='BR_MISP_RETIRED.ALL_BRANCHES'/, -cpu-cycles; +cpu/event=0xb1,umask=0x02,cmask=0x1,period=2000003,name='UOPS_EXECUTED.CORE_CYCLES_GE_1'/, +cpu/event=0xb1,umask=0x02,cmask=0x2,period=2000003,name='UOPS_EXECUTED.CORE_CYCLES_GE_2'/, +cpu/event=0xb1,umask=0x02,cmask=0x3,period=2000003,name='UOPS_EXECUTED.CORE_CYCLES_GE_3'/, +cpu/event=0xc2,umask=0x04,cmask=0x00,period=2000003,name='UOPS_RETIRED.MACRO_FUSED'/, +cpu-cycles, +ref-cycles, +instructions; -cpu/event=0xa3,umask=0x14,cmask=0x14,period=2000003,name='CYCLE_ACTIVITY.STALLS_MEM_ANY'/, -cpu/event=0xa3,umask=0x0c,cmask=0x0c,period=2000003,name='CYCLE_ACTIVITY.STALLS_L1D_MISS'/, -cpu/event=0x08,umask=0x20,cmask=0x01,period=2000003,name='DTLB_LOAD_MISSES.STLB_HIT:c1'/, -cpu/event=0x08,umask=0x10,cmask=0x01,period=100003,name='DTLB_LOAD_MISSES.WALK_ACTIVE'/, -cpu-cycles; +cpu/event=0xc7,umask=0x03,cmask=0x00,period=2000003,name='FP_ARITH_INST_RETIRED.SCALAR_SINGLE:u0x03'/, +cpu/event=0xc7,umask=0xfc,cmask=0x00,period=2000003,name='FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE:u0xfc'/, +cpu/event=0x80,umask=0x4,name='ICACHE_16B.IFDATA_STALL'/, +cpu/event=0x80,umask=0x4,cmask=0x1,edge=0x1,name='ICACHE_16B.IFDATA_STALL:c1:e1'/, +cpu-cycles, +ref-cycles, +instructions; -cpu/event=0xa6,umask=0x01,period=2000003,name='EXE_ACTIVITY.EXE_BOUND_0_PORTS'/, -cpu/event=0xa6,umask=0x40,period=2000003,name='EXE_ACTIVITY.BOUND_ON_STORES'/, -cpu/event=0xa6,umask=0x02,period=2000003,name='EXE_ACTIVITY.1_PORTS_UTIL'/, -cpu/event=0xa6,umask=0x04,period=2000003,name='EXE_ACTIVITY.2_PORTS_UTIL'/, +cpu/event=0x24,umask=0xe4,period=200003,name='L2_RQSTS.ALL_CODE_RD'/, +cpu/event=0x85,umask=0x04,period=100003,name='ITLB_MISSES.WALK_COMPLETED_2M_4M'/, +cpu/event=0xf1,umask=0x1f,period=100003,name='L2_LINES_IN.ALL'/, +cpu/event=0xd1,umask=0x10,period=50021,name='MEM_LOAD_RETIRED.L2_MISS'/, cpu-cycles, +ref-cycles, instructions; -cpu/event=0x03,umask=0x02,period=100003,name='LD_BLOCKS.STORE_FORWARD'/, -cpu/event=0xb2,umask=0x01,period=2000003,name='OFFCORE_REQUESTS_BUFFER.SQ_FULL'/, -cpu/event=0xa3,umask=0x05,cmask=0x05,period=2000003,name='CYCLE_ACTIVITY.STALLS_L2_MISS'/, -cpu/event=0xa3,umask=0x06,cmask=0x06,period=2000003,name='CYCLE_ACTIVITY.STALLS_L3_MISS'/, -cpu-cycles; +cpu/event=0x24,umask=0x24,period=200003,name='L2_RQSTS.CODE_RD_MISS'/, +cpu/event=0x08,umask=0x04,period=2000003,name='DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M'/, +cpu/event=0x08,umask=0x02,period=2000003,name='DTLB_LOAD_MISSES.WALK_COMPLETED_4K'/, +cpu/event=0x08,umask=0x08,period=2000003,name='DTLB_LOAD_MISSES.WALK_COMPLETED_1G'/, +cpu-cycles, +ref-cycles, +instructions; -cpu/event=0x60,umask=0x01,cmask=0x06,period=2000003,name='OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6'/, -cpu/event=0x60,umask=0x10,cmask=0x06,period=2000003,name='OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6'/, -cpu/event=0x60,umask=0x10,cmask=0x01,period=2000003,name='OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD'/, -cpu/event=0x49,umask=0x0e,period=100003,name='DTLB_STORE_MISSES.WALK_COMPLETED'/, -cpu-cycles; +cpu/event=0xb0,umask=0x10,period=100003,name='OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD'/, +cpu/event=0xd1,umask=0x40,period=100007,name='MEM_LOAD_RETIRED.FB_HIT'/, +cpu/event=0xa3,umask=0x10,cmask=0x16,period=2000003,name='CYCLE_ACTIVITY.CYCLES_MEM_ANY'/, +cpu/event=0xa3,umask=0x08,cmask=0x08,period=2000003,name='CYCLE_ACTIVITY.CYCLES_L1D_MISS'/, +cpu-cycles, +ref-cycles, +instructions; -cpu/event=0x60,umask=0x01,cmask=0x01,period=2000003,name='OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD'/, -cpu-cycles; +cpu/event=0x79,umask=0x30,period=2000003,name='IDQ.MS_UOPS'/, +cpu/event=0x60,umask=0x10,period=2000003,name='OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD'/, +cpu/event=0x83,umask=0x04,period=200003,name='ICACHE_64B.IFTAG_STALL'/, +cpu/event=0x08,umask=0x20,cmask=0x01,period=2000003,name='DTLB_LOAD_MISSES.STLB_HIT:c1'/, +cpu-cycles, +ref-cycles, +instructions; cpu/event=0x80,umask=0x4,name='ICACHE_16B.IFDATA_STALL'/, cpu/event=0x80,umask=0x4,cmask=0x1,edge=0x1,name='ICACHE_16B.IFDATA_STALL:c1:e1'/, -cpu/event=0x85,umask=0x0e,period=100003,name='ITLB_MISSES.WALK_COMPLETED'/, -cpu/event=0x85,umask=0x10,period=100003,name='ITLB_MISSES.WALK_ACTIVE'/, -instructions, -cpu-cycles; - -cpu/event=0x49,umask=0x20,cmask=0x01,period=100003,name='DTLB_STORE_MISSES.STLB_HIT:c1'/, -cpu/event=0x49,umask=0x10,period=100003,name='DTLB_STORE_MISSES.WALK_ACTIVE'/, cpu/event=0x14,umask=0x01,period=2000003,name='ARITH.DIVIDER_ACTIVE'/, cpu/event=0xb1,umask=0x02,inv=0x1,cmask=0x1,period=2000003,name='UOPS_EXECUTED.CORE_CYCLES_NONE'/, -cpu-cycles; +cpu-cycles, +ref-cycles, +instructions; +cpu/event=0x3c,umask=0x2,name='CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE'/, +cpu/event=0x3c,umask=0x1,name='CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY'/, +cpu-cycles:k, +ref-cycles:k, +instructions:k; -cpu/event=0xb1,umask=0x02,cmask=0x1,period=2000003,name='UOPS_EXECUTED.CORE_CYCLES_GE_1'/, -cpu/event=0xb1,umask=0x02,cmask=0x2,period=2000003,name='UOPS_EXECUTED.CORE_CYCLES_GE_2'/, -cpu/event=0xb1,umask=0x02,cmask=0x3,period=2000003,name='UOPS_EXECUTED.CORE_CYCLES_GE_3'/, -cpu-cycles; +cpu/event=0x79,umask=0x24,cmask=0x01,period=2000003,name='IDQ.ALL_MITE_CYCLES_ANY_UOPS'/, +cpu/event=0x79,umask=0x24,cmask=0x04,period=2000003,name='IDQ.ALL_MITE_CYCLES_4_UOPS'/, +cpu/event=0xb7,umask=0x01,offcore_rsp=0x10003C0002,period=100003,name='OCR.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE'/, +cpu/event=0xb7,umask=0x01,offcore_rsp=0x10003C0020,period=100003,name='OCR.PF_L2_RFO.L3_HIT.HITM_OTHER_CORE'/, +cpu-cycles, +ref-cycles, +instructions; -cpu/event=0x3c,umask=0x2,name='CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE'/, -cpu/event=0x3c,umask=0x1,name='CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY'/; +cpu/event=0x79,umask=0x18,cmask=0x01,period=2000003,name='IDQ.ALL_DSB_CYCLES_ANY_UOPS'/, +cpu/event=0x79,umask=0x18,cmask=0x04,period=2000003,name='IDQ.ALL_DSB_CYCLES_4_UOPS'/, +cpu/event=0xb7,umask=0x01,offcore_rsp=0x103FC00002,period=100003,name='OCR.DEMAND_RFO.L3_MISS.REMOTE_HITM'/, +cpu/event=0xb7,umask=0x01,offcore_rsp=0x103FC00020,period=100003,name='OCR.PF_L2_RFO.L3_MISS.REMOTE_HITM'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0xd2,umask=0x02,cmask=0x00,period=20011,name='MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT'/, +cpu/event=0xd2,umask=0x04,cmask=0x00,period=20011,name='MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM'/, +cpu/event=0xb7,umask=0x01,offcore_rsp=0x3F840007F7,name='OCR.ALL_READS.L3_MISS_LOCAL_DRAM.ANY_SNOOP'/, +cpu/event=0xb7,umask=0x01,offcore_rsp=0x3FB80007F7,name='OCR.ALL_READS.L3_MISS_LOCAL_DRAM.ANY_SNOOP_ocr_msr_3fB80007f7'/; +cpu-cycles, +ref-cycles, +instructions; -#offcore response +cpu/event=0xb1,umask=0x10,cmask=0x00,period=2000003,name='UOPS_EXECUTED.X87'/, +cpu/event=0xb1,umask=0x01,cmask=0x00,period=2000003,name='UOPS_EXECUTED.THREAD'/, cpu/event=0xb7,umask=0x01,offcore_rsp=0x103FC007F7,name='OCR.ALL_READS.L3_MISS.REMOTE_HITM'/, cpu/event=0xb7,umask=0x01,offcore_rsp=0x083FC007F7,name='OCR.ALL_READS.L3_MISS.REMOTE_HIT_FORWARD'/; +cpu-cycles, +ref-cycles, +instructions; -#OCR -cpu/event=0xb7,umask=0x01,offcore_rsp=0x3F840007F7,name='OCR.ALL_READS.L3_MISS_LOCAL_DRAM.ANY_SNOOP'/, -cpu/event=0xb7,umask=0x01,offcore_rsp=0x3FB80007F7,name='OCR.ALL_READS.L3_MISS_LOCAL_DRAM.ANY_SNOOP_ocr_msr_3fB80007f7'/; +#C6 +cstate_core/c6-residency/; +cstate_pkg/c6-residency/; + +#power related +power/energy-pkg/, +power/energy-ram/; #memory read/writes imc/event=0x04,umask=0x03,name='UNC_M_CAS_COUNT.RD'/, imc/event=0x04,umask=0x0c,name='UNC_M_CAS_COUNT.WR'/; -#demand reads local and remote collected separately +cha/event=0x0,umask=0x0,name='UNC_CHA_CLOCKTICKS'/, +cha/event=0x36,umask=0x21,config1=0x4043300000000,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS.0x40433'/, +cha/event=0x35,umask=0x21,config1=0x4043300000000,name='UNC_CHA_TOR_INSERTS.IA_MISS.0x40433'/; + cha/event=0x35,umask=0x21,config1=0x4043200000000,name='UNC_CHA_TOR_INSERTS.IA_MISS.0x40432'/, cha/event=0x36,umask=0x21,config1=0x4043200000000,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS.0x40432'/; + cha/event=0x35,umask=0x21,config1=0x4043100000000,name='UNC_CHA_TOR_INSERTS.IA_MISS.0x40431'/, cha/event=0x36,umask=0x21,config1=0x4043100000000,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS.0x40431'/; -#UNC_CHA_TOR_INSERTS.IA_MISS_CRD,UNC_CHA_TOR_OCCUPANCY.IA_MISS_CRD cha/event=0x35,umask=0x21,config1=0x12CC023300000000,name='UNC_CHA_TOR_INSERTS.IA_MISS.0x12CC0233'/; -#UNC_CHA_TOR_INSERTS.IA_MISS_RFO,UNC_CHA_TOR_OCCUPANCY.IA_MISS_RFO -cha/event=0x35,umask=0x21,config1=0x12C4003300000000,name='UNC_CHA_TOR_INSERTS.IA_MISS.0x12C40033'/; - -#UNC_CHA_TOR_INSERTS.IA_MISS_DEMAND_RD,UNC_CHA_TOR_OCCUPANCY.IA_MISS_DEMAND_RD (demand data only - both local and remote) -cha/event=0x35,umask=0x21,config1=0x4043300000000,name='UNC_CHA_TOR_INSERTS.IA_MISS.0x40433'/, -cha/event=0x36,umask=0x21,config1=0x4043300000000,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS.0x40433'/, -cha/event=0x0,umask=0x0,name='UNC_CHA_CLOCKTICKS'/; - -#UNC_CHA_TOR_INSERTS.IA_MISS_DRD,UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD cha/event=0x35,umask=0x21,config1=0x12D4043300000000,name='UNC_CHA_TOR_INSERTS.IA_MISS.0x12D40433'/; +cha/event=0x35,umask=0x21,config1=0x12C4003300000000,name='UNC_CHA_TOR_INSERTS.IA_MISS.0x12C40033'/; + #IO bandwidth iio/event=0x83,umask=0x04,ch_mask=0x00,fc_mask=0x07,name='UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0'/, iio/event=0x83,umask=0x04,ch_mask=0x02,fc_mask=0x07,name='UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1'/; + iio/event=0x83,umask=0x04,ch_mask=0x04,fc_mask=0x07,name='UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2'/, iio/event=0x83,umask=0x04,ch_mask=0x08,fc_mask=0x07,name='UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3'/; @@ -176,10 +231,5 @@ iio/event=0x83,umask=0x01,ch_mask=0x08,fc_mask=0x07,name='UNC_IIO_DATA_REQ_OF_CP #UPI related upi/event=0x2,umask=0x0f,name='UNC_UPI_TxL_FLITS.ALL_DATA'/, upi/event=0x2,umask=0x97,name='UNC_UPI_TxL_FLITS.NON_DATA'/, -upi/event=0x1,umask=0x0,name='UNC_UPI_CLOCKTICKS'/; - +upi/event=0x1,umask=0x0,name='UNC_UPI_CLOCKTICKS'/, upi/event=0x21,umask=0x0,name='UNC_UPI_L1_POWER_CYCLES'/; - -#power related -power/energy-pkg/, -power/energy-ram/; diff --git a/events/icx.txt b/events/icx.txt index 87afb61..a9122ea 100644 --- a/events/icx.txt +++ b/events/icx.txt @@ -133,8 +133,8 @@ cpu/event=0xd3,umask=0x08,cmask=0x00,period=100007,name='MEM_LOAD_L3_MISS_RETIRE cpu/event=0xd3,umask=0x04,cmask=0x00,period=100007,name='MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM'/, cpu/event=0xb1,umask=0x10,cmask=0x00,period=2000003,name='UOPS_EXECUTED.X87'/, cpu/event=0xb1,umask=0x01,cmask=0x00,period=2000003,name='UOPS_EXECUTED.THREAD'/, -cpu/event=0xc7,umask=0x02,umask=0x03,period=100003,name='FP_ARITH_INST_RETIRED.SCALAR_SINGLE:u0x03'/, -cpu/event=0xc7,umask=0x04,umask=0xfc,period=100003,name='FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE:u0xfc'/, +cpu/event=0xc7,umask=0x03,period=100003,name='FP_ARITH_INST_RETIRED.SCALAR_SINGLE:u0x03'/, +cpu/event=0xc7,umask=0xfc,period=100003,name='FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE:u0xfc'/, cpu-cycles, ref-cycles, instructions; diff --git a/events/metric_bdx.json b/events/metric_bdx.json index 0f6758b..22c9d85 100644 --- a/events/metric_bdx.json +++ b/events/metric_bdx.json @@ -1,331 +1,336 @@ [ - { - "name": "metric_CPU operating frequency (in GHz)", - "expression": "([cpu-cycles] / [ref-cycles]) * ([SYSTEM_TSC_FREQ] / 1000000000)" - }, - { - "name": "metric_CPU utilization %", - "expression": "100 * [ref-cycles] / [TSC]" - }, - { - "name": "metric_CPU utilization% in kernel mode", - "expression": "100 * [ref-cycles:k] / [TSC]" - }, - { - "name": "metric_CPI", - "expression": "[cpu-cycles] / [instructions]" - }, - { - "name": "metric_kernel_CPI", - "expression": "[cpu-cycles:k] / [instructions:k]" - }, - { - "name": "metric_L1D MPI (includes data+rfo w/ prefetches)", - "tags": "transaction", - "expression": "[L1D.REPLACEMENT] / [instructions]" - }, - { - "name": "metric_L1D demand data read hits per instr", - "expression": "[MEM_LOAD_RETIRED.L1_HIT] / [instructions]" - }, - { - "name": "metric_L1-I code read misses (w/ prefetches) per instr", - "expression": "[L2_RQSTS.ALL_CODE_RD] / [instructions]" - }, - { - "name": "metric_L2 demand data read hits per instr", - "expression": "[MEM_LOAD_UOPS_RETIRED.L2_HIT] / [instructions]" - }, - { - "name": "metric_L2 MPI (includes code+data+rfo w/ prefetches)", - "expression": "[L2_LINES_IN.ALL] / [instructions]" - }, - { - "name": "metric_L2 demand data read MPI", - "expression": "[MEM_LOAD_UOPS_RETIRED.L2_MISS] / [instructions]" - }, - { - "name": "metric_L2 demand code MPI", - "expression": "[L2_RQSTS.CODE_RD_MISS] / [instructions]" - }, - { - "name": "metric_LLC MPI", - "expression": "([UNC_C_TOR_INSERTS.MISS_OPCODE.0x180] + [UNC_C_TOR_INSERTS.MISS_OPCODE.0x181] + [UNC_C_TOR_INSERTS.MISS_OPCODE.0x182] + [UNC_C_TOR_INSERTS.MISS_OPCODE.0x190] + [UNC_C_TOR_INSERTS.MISS_OPCODE.0x191] + [UNC_C_TOR_INSERTS.MISS_OPCODE.0x192] - [UNC_C_TOR_INSERTS.MISS_OPCODE.tid.0x180]) / [instructions]" - }, - { - "name": "metric_LLC code read MPI (demand+prefetch)", - "expression": "([UNC_C_TOR_INSERTS.MISS_OPCODE.0x181] + [UNC_C_TOR_INSERTS.MISS_OPCODE.0x191]) / [instructions]" - }, - { - "name": "metric_LLC data read MPI (demand+prefetch)", - "expression": "([UNC_C_TOR_INSERTS.MISS_OPCODE.0x182] + [UNC_C_TOR_INSERTS.MISS_OPCODE.0x192]) / [instructions]" - }, - { - "name": "metric_LLC total HITM (per instr)", - "expression": "[OCR.ALL_READS.L3_MISS.REMOTE_HITM] / [instructions]" - }, - { - "name": "metric_LLC total HIT clean line forwards (per instr)", - "expression": "[OCR.ALL_READS.L3_MISS.REMOTE_HIT_FORWARD] / [instructions]" - }, - { - "name": "metric_Average LLC data read miss latency (in clks)", - "expression": "[UNC_C_TOR_OCCUPANCY.MISS_OPCODE.0x182] / [UNC_C_TOR_INSERTS.MISS_OPCODE.0x182]" - }, - { - "name": "metric_Average LLC data read miss latency (in ns)", - "expression": "(1000000000 * [UNC_C_TOR_OCCUPANCY.MISS_OPCODE.0x182] / [UNC_C_TOR_INSERTS.MISS_OPCODE.0x182]) / ([UNC_C_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) )" - }, - { - "name": "metric_Average LLC data read miss latency for LOCAL requests (in ns)", - "expression": "(1000000000 * [UNC_C_TOR_OCCUPANCY.MISS_LOCAL_OPCODE.0x182] / [UNC_C_TOR_INSERTS.MISS_LOCAL_OPCODE.0x182]) / ([UNC_C_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]))" - }, - { - "name": "metric_Average LLC data read miss latency for REMOTE requests (in ns)", - "expression": "(1000000000 * [UNC_C_TOR_OCCUPANCY.MISS_REMOTE_OPCODE.0x182] / [UNC_C_TOR_INSERTS.MISS_REMOTE_OPCODE.0x182]) / ([UNC_C_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]))" - }, - { - "name": "metric_ITLB MPI", - "expression": "[ITLB_MISSES.WALK_COMPLETED] / [instructions]" - }, - { - "name": "metric_ITLB large page MPI", - "expression": "[ITLB_MISSES.WALK_COMPLETED_2M_4M] / [instructions]" - }, - { - "name": "metric_DTLB load MPI", - "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [instructions]" - }, - { - "name": "metric_DTLB 2MB large page load MPI", - "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [instructions]" - }, - { - "name": "metric_DTLB store MPI", - "expression": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [instructions]" - }, - { - "name": "metric_DTLB load miss latency (in core clks)", - "expression": "[DTLB_LOAD_MISSES.WALK_DURATION] / [DTLB_LOAD_MISSES.WALK_COMPLETED]" - }, - { - "name": "metric_DTLB store miss latency (in core clks)", - "expression": "[DTLB_STORE_MISSES.WALK_DURATION] / [DTLB_STORE_MISSES.WALK_COMPLETED]" - }, - { - "name": "metric_NUMA %_Reads addressed to local DRAM", - "expression": "100 * [UNC_C_TOR_INSERTS.MISS_LOCAL_OPCODE.0x182] / ([UNC_C_TOR_INSERTS.MISS_LOCAL_OPCODE.0x182] + [UNC_C_TOR_INSERTS.MISS_REMOTE_OPCODE.0x182])" - }, - { - "name": "metric_NUMA %_Reads addressed to remote DRAM", - "expression": "100 * [UNC_C_TOR_INSERTS.MISS_REMOTE_OPCODE.0x182] / ([UNC_C_TOR_INSERTS.MISS_LOCAL_OPCODE.0x182] + [UNC_C_TOR_INSERTS.MISS_REMOTE_OPCODE.0x182])" - }, - { - "name": "metric_uncore frequency GHz", - "expression": "[UNC_C_CLOCKTICKS] / ([CORES_PER_SOCKET] * [SOCKET_COUNT]) / 1000000000" - }, - { - "name": "metric_package power (watts)", - "expression": "[power/energy-pkg/]" - }, - { - "name": "metric_DRAM power (watts)", - "expression": "[power/energy-ram/]" - }, - { - "name": "metric_core c6 residency %", - "expression": "100 * [cstate_core/c6-residency/] / [TSC]" - }, - { - "name": "metric_package c6 residency %", - "expression": "100 * [cstate_pkg/c6-residency/] * [CORES_PER_SOCKET] / [TSC]" - }, - { - "name": "metric_memory bandwidth read (MB/sec)", - "expression": "[UNC_M_CAS_COUNT.RD] * 64 / 1000000" - }, - { - "name": "metric_memory bandwidth write (MB/sec)", - "expression": "[UNC_M_CAS_COUNT.WR] * 64 / 1000000" - }, - { - "name": "metric_memory bandwidth total (MB/sec)", - "expression": "([UNC_M_CAS_COUNT.RD] + [UNC_M_CAS_COUNT.WR]) * 64 / 1000000" - }, - { - "name": "metric_UPI Data transmit BW (MB/sec) (only data)", - "expression": "([UNC_Q_TxL_FLITS_G0.DATA]) * 8 / 1000000" - }, - { - "name": "metric_UPI Data transmit BW (MB/sec) (includes control)", - "expression": "([UNC_Q_TxL_FLITS_G0.DATA] + [UNC_Q_TxL_FLITS_G0.NON_DATA]) * 8 / 1000000" - }, - { - "name": "metric_UPI Transmit utilization_% (includes control)", - "expression": "([UNC_Q_TxL_FLITS_G0.DATA] + [UNC_Q_TxL_FLITS_G0.NON_DATA]) * 100 / [UNC_Q_CLOCKTICKS]" - }, - { - "name": "metric_IO_bandwidth_disk_or_network_writes (MB/sec)", - "expression": "[UNC_C_TOR_INSERTS.OPCODE.0x19e] * 64 / 1000000" - }, - { - "name": "metric_IO_bandwidth_disk_or_network_reads (MB/sec)", - "expression": "([UNC_C_TOR_INSERTS.OPCODE.0x1c8] + [UNC_C_TOR_INSERTS.OPCODE.0x180]) * 64 / 1000000" - }, - { - "name": "metric_TMA_Info_cycles_both_threads_active(%)", - "expression": "100 * ( (1 - ([CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE] / ([CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY] / 2)) ) if [const_thread_count] > 1 else 0)" - }, - { - "name": "metric_TMA_Info_CoreIPC", - "expression": "[instructions] / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])" - }, - { - "name": "metric_TMA_Frontend_Bound(%)", - "expression": "100 * [IDQ_UOPS_NOT_DELIVERED.CORE] / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]))" - }, - { - "name": "metric_TMA_..Frontend_Latency(%)", - "expression": "100 * [IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])" - }, - { - "name": "metric_TMA_....ICache_Misses(%)", - "expression": "100 * [ICACHE.IFDATA_STALL] / [cpu-cycles]" - }, - { - "name": "metric_TMA_....ITLB_Misses(%)", - "expression": "100 * ((14 * [ITLB_MISSES.STLB_HIT]) + [ITLB_MISSES.WALK_DURATION_c1] + (7 * [ITLB_MISSES.WALK_COMPLETED] )) / [cpu-cycles]" - }, - { - "name": "metric_TMA_....Branch_Resteers(%)", - "expression": "100 * (([RS_EVENTS.EMPTY_CYCLES] - [ICACHE.IFDATA_STALL] - (14 * [ITLB_MISSES.STLB_HIT] + [ITLB_MISSES.WALK_DURATION_c1] + 7 * [ITLB_MISSES.WALK_COMPLETED])) / [RS_EVENTS.EMPTY_END]) * ([BR_MISP_RETIRED.ALL_BRANCHES] + [MACHINE_CLEARS.COUNT] + [BACLEARS.ANY]) / [cpu-cycles]" - }, - { - "name": "metric_TMA_....DSB_Switches(%)", - "expression": "100 * 2 * [DSB2MITE_SWITCHES.PENALTY_CYCLES] / [cpu-cycles]" - }, - { - "name": "metric_TMA_....MS_Switches(%)", - "expression": "100 * 2 * [IDQ.MS_SWITCHES] / [cpu-cycles]" - }, - { - "name": "metric_TMA_..Frontend_Bandwidth(%)", - "expression": "100 * ([IDQ_UOPS_NOT_DELIVERED.CORE] - (4 * [IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE])) / (4 * [CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])" - }, - { - "name": "metric_TMA_Bad_Speculation(%)", - "expression": "100 * ([UOPS_ISSUED.ANY] - [UOPS_RETIRED.RETIRE_SLOTS] + ((4 * [INT_MISC.RECOVERY_CYCLES_ANY]) / [const_thread_count])) / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])) " - }, - { - "name": "metric_TMA_..Branch_Mispredicts(%)", - "expression": "([BR_MISP_RETIRED.ALL_BRANCHES] / ([BR_MISP_RETIRED.ALL_BRANCHES] + [MACHINE_CLEARS.COUNT])) * 100 * ([UOPS_ISSUED.ANY] - [UOPS_RETIRED.RETIRE_SLOTS] + (4 * [INT_MISC.RECOVERY_CYCLES_ANY] / [const_thread_count])) / (4 * [CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])" - }, - { - "name": "metric_TMA_..Machine_Clears(%)", - "expression": "([MACHINE_CLEARS.COUNT] / ([BR_MISP_RETIRED.ALL_BRANCHES] + [MACHINE_CLEARS.COUNT])) * 100 * ([UOPS_ISSUED.ANY] - [UOPS_RETIRED.RETIRE_SLOTS] + (4 * [INT_MISC.RECOVERY_CYCLES_ANY] / [const_thread_count])) / (4 * [CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])" - }, - { - "name": "metric_TMA_Backend_Bound(%)", - "expression": "100 - (100 * ([UOPS_ISSUED.ANY] - [UOPS_RETIRED.RETIRE_SLOTS] + 4 * ([INT_MISC.RECOVERY_CYCLES_ANY] / [const_thread_count]) + [IDQ_UOPS_NOT_DELIVERED.CORE] + [UOPS_RETIRED.RETIRE_SLOTS]) / (4 * [CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])) " - }, - { - "name": "metric_TMA_..Memory_Bound(%)", - "expression": "100 * (1 - (([UOPS_ISSUED.ANY] - [UOPS_RETIRED.RETIRE_SLOTS] + 4 * ([INT_MISC.RECOVERY_CYCLES_ANY] / [const_thread_count]) + [IDQ_UOPS_NOT_DELIVERED.CORE] + [UOPS_RETIRED.RETIRE_SLOTS]) / (4 * [CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]))) * ([CYCLE_ACTIVITY.STALLS_MEM_ANY] + [RESOURCE_STALLS.SB]) / ([CYCLE_ACTIVITY.STALLS_TOTAL] + [UOPS_EXECUTED.CYCLES_GE_1_UOPS_EXEC] - ( [UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC] if ([instructions] / [cpu-cycles]) > 1.8 else [UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC]) - ( [RS_EVENTS.EMPTY_CYCLES] if ([IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] / [CPU_CLK_UNHALTED.THREAD_ANY]) > 0.1 else 0) + [RESOURCE_STALLS.SB])" - }, - { - "name": "metric_TMA_....L1_Bound(%)", - "expression": "100 * ([CYCLE_ACTIVITY.STALLS_MEM_ANY] - [CYCLE_ACTIVITY.STALLS_L1D_MISS]) / [cpu-cycles]" - }, - { - "name": "metric_TMA_......DTLB_Load(%)", - "expression": "100 * ([DTLB_LOAD_MISSES.STLB_HIT] * 8 + [DTLB_LOAD_MISSES.WALK_DURATION_c1] + 7 * [DTLB_LOAD_MISSES.WALK_COMPLETED]) / [cpu-cycles]" - }, - { - "name": "metric_TMA_......Store_Fwd_Blk(%)", - "expression": "100 * (13 * [LD_BLOCKS.STORE_FORWARD]) / [cpu-cycles]" - }, - { - "name": "metric_TMA_....L2_Bound(%)", - "expression": "100 * ([CYCLE_ACTIVITY.STALLS_L1D_MISS] - [CYCLE_ACTIVITY.STALLS_L2_MISS]) / [cpu-cycles]" - }, - { - "name": "metric_TMA_....L3_Bound(%)", - "expression": "100 * [MEM_LOAD_UOPS_RETIRED.L3_HIT] / ([MEM_LOAD_UOPS_RETIRED.L3_HIT] + 7 * [MEM_LOAD_UOPS_RETIRED.L3_MISS]) * ([CYCLE_ACTIVITY.STALLS_L2_MISS] / [cpu-cycles])" - }, - { - "name": "metric_TMA_......L3_Latency(%)", - "expression": "100 * 41 * [MEM_LOAD_UOPS_RETIRED.L3_HIT] * ( 1 + [MEM_LOAD_UOPS_RETIRED.HIT_LFB] / ( [MEM_LOAD_UOPS_RETIRED.L2_HIT] + [MEM_LOAD_UOPS_RETIRED.L3_HIT] + [MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT] + [MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM] + [MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS] + [MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM] + [MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM] + [MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM] + [MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD] ) ) / [cpu-cycles] " - }, - { - "name": "metric_TMA_......Contested_Accesses(%)", - "expression": "100 * 60 * ([MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM] + [MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS]) * ( 1 + [MEM_LOAD_UOPS_RETIRED.HIT_LFB] / ( [MEM_LOAD_UOPS_RETIRED.L2_HIT] + [MEM_LOAD_UOPS_RETIRED.L3_HIT] + [MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT] + [MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM] + [MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS] + [MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM] + [MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM] + [MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM] + [MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD] ) ) / [cpu-cycles] " - }, - { - "name": "metric_TMA_......Data_Sharing(%)", - "expression": "100 * 43 * [MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT] * ( 1 + [MEM_LOAD_UOPS_RETIRED.HIT_LFB] / ( [MEM_LOAD_UOPS_RETIRED.L2_HIT] + [MEM_LOAD_UOPS_RETIRED.L3_HIT] + [MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT] + [MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM] + [MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS] + [MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM] + [MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM] + [MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM] + [MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD] ) ) / [cpu-cycles] " - }, - { - "name": "metric_TMA_......SQ_Full(%)", - "expression": "100 * ([OFFCORE_REQUESTS_BUFFER.SQ_FULL] / [const_thread_count]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])" - }, - { - "name": "metric_TMA_....MEM_Bound(%)", - "expression": "100 * (1 - ( [MEM_LOAD_UOPS_RETIRED.L3_HIT] / ([MEM_LOAD_UOPS_RETIRED.L3_HIT] + 7 * [MEM_LOAD_UOPS_RETIRED.L3_MISS])) ) * ([CYCLE_ACTIVITY.STALLS_L2_MISS] / [cpu-cycles])" - }, - { - "name": "metric_TMA_......MEM_Bandwidth(%)", - "expression": "100 * (min([OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD_c4], [cpu-cycles])) / [cpu-cycles]" - }, - { - "name": "metric_TMA_......MEM_Latency(%)", - "expression": "100 * (min([OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD], [cpu-cycles]) - min([OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD_c4], [cpu-cycles])) / [cpu-cycles]" - }, - { - "name": "metric_TMA_....Store_Bound(%)", - "expression": "100 * [RESOURCE_STALLS.SB] / [cpu-cycles]" - }, - { - "name": "metric_TMA_......DTLB_Store(%)", - "expression": "100 * (7 * [DTLB_STORE_MISSES.STLB_HIT] + [DTLB_STORE_MISSES.WALK_DURATION_c1]) / [cpu-cycles]" - }, - { - "name": "metric_TMA_..Core_Bound(%)", - "expression": "100 * ( 1 - (( [UOPS_ISSUED.ANY] - [UOPS_RETIRED.RETIRE_SLOTS] + 4 * ([INT_MISC.RECOVERY_CYCLES_ANY] / [const_thread_count]) + [IDQ_UOPS_NOT_DELIVERED.CORE] + [UOPS_RETIRED.RETIRE_SLOTS] ) / ( 4 * [CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]))) * (1 - (([CYCLE_ACTIVITY.STALLS_MEM_ANY] + [RESOURCE_STALLS.SB]) / ([CYCLE_ACTIVITY.STALLS_TOTAL] + [UOPS_EXECUTED.CYCLES_GE_1_UOPS_EXEC] - ( [UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC] if ([instructions] / [cpu-cycles]) > 1.8 else [UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC]) - ([RS_EVENTS.EMPTY_CYCLES] if ([IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] / [CPU_CLK_UNHALTED.THREAD_ANY]) > 0.1 else 0) + [RESOURCE_STALLS.SB])))" - }, - { - "name": "metric_TMA_....Divider(%)", - "expression": "100 * [ARITH.FPU_DIV_ACTIVE] / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])" - }, - { - "name": "metric_TMA_....Ports_Utilization(%)", - "expression": "100 * (( [CYCLE_ACTIVITY.STALLS_TOTAL] + [UOPS_EXECUTED.CYCLES_GE_1_UOPS_EXEC] - ([UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC] if ([instructions] / [cpu-cycles]) > 1.8 else [UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC]) - ([RS_EVENTS.EMPTY_CYCLES] if ([IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] / [CPU_CLK_UNHALTED.THREAD_ANY]) > 0.1 else 0) + [RESOURCE_STALLS.SB]) - [RESOURCE_STALLS.SB] - [CYCLE_ACTIVITY.STALLS_MEM_ANY] ) /[cpu-cycles]" - }, - { - "name": "metric_TMA_......0_Port_Utilized(%)", - "expression": "100 * (([UOPS_EXECUTED.CORE_i1_c1] / [const_thread_count]) if ([const_thread_count] > 1) else ([RS_EVENTS.EMPTY_CYCLES] if ([CYCLE_ACTIVITY.STALLS_TOTAL] - ([IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])) ) > 0.1 else 0)) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]) " - }, - { - "name": "metric_TMA_......1_Port_Utilized(%)", - "expression": "100 * (([UOPS_EXECUTED.CORE_c1] - [UOPS_EXECUTED.CORE_c2]) / [const_thread_count]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])" - }, - { - "name": "metric_TMA_......2_Port_Utilized(%)", - "expression": "100 * (([UOPS_EXECUTED.CORE_c2] - [UOPS_EXECUTED.CORE_c3]) / [const_thread_count]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])" - }, - { - "name": "metric_TMA_......3m_Ports_Utilized(%)", - "expression": "100 * ([UOPS_EXECUTED.CORE_c3] / [const_thread_count]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])" - }, - { - "name": "metric_TMA_Retiring(%)", - "expression": "100 * [UOPS_RETIRED.RETIRE_SLOTS] / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]))" - }, - { - "name": "metric_TMA_..Base(%)", - "expression": "100 *(([UOPS_RETIRED.RETIRE_SLOTS] / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]))) - (([UOPS_RETIRED.RETIRE_SLOTS] / [UOPS_ISSUED.ANY]) * [IDQ.MS_UOPS] / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]))))" - }, - { - "name": "metric_TMA_..Microcode_Sequencer(%)", - "expression": "100 * (([UOPS_RETIRED.RETIRE_SLOTS] / [UOPS_ISSUED.ANY]) * [IDQ.MS_UOPS] )/ (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]))" - } + { + "name": "metric_CPU operating frequency (in GHz)", + "expression": "(([cpu-cycles] / [ref-cycles] * [SYSTEM_TSC_FREQ]) / 1000000000)" + }, + { + "name": "metric_CPU utilization %", + "expression": "100 * [ref-cycles] / [TSC]" + }, + { + "name": "metric_CPU utilization% in kernel mode", + "expression": "100 * [ref-cycles:k] / [TSC]", + "origin": "perfspect" + }, + { + "name": "metric_CPI", + "expression": "[cpu-cycles] / [instructions]" + }, + { + "name": "metric_kernel_CPI", + "expression": "[cpu-cycles:k] / [instructions:k]", + "origin": "perfspect" + }, + { + "name": "metric_L1D MPI (includes data+rfo w/ prefetches)", + "expression": "[L1D.REPLACEMENT] / [instructions]" + }, + { + "name": "metric_L1D demand data read hits per instr", + "expression": "[MEM_LOAD_UOPS_RETIRED.L1_HIT] / [instructions]" + }, + { + "name": "metric_L1-I code read misses (w/ prefetches) per instr", + "expression": "[L2_RQSTS.ALL_CODE_RD] / [instructions]" + }, + { + "name": "metric_L2 demand data read hits per instr", + "expression": "[MEM_LOAD_UOPS_RETIRED.L2_HIT] / [instructions]" + }, + { + "name": "metric_L2 MPI (includes code+data+rfo w/ prefetches)", + "expression": "[L2_LINES_IN.ALL] / [instructions]" + }, + { + "name": "metric_L2 demand data read MPI", + "expression": "[MEM_LOAD_UOPS_RETIRED.L2_MISS] / [instructions]" + }, + { + "name": "metric_L2 demand code MPI", + "expression": "[L2_RQSTS.CODE_RD_MISS] / [instructions]" + }, + { + "name": "metric_LLC MPI", + "expression": "([UNC_C_TOR_INSERTS.MISS_OPCODE.0x180] + [UNC_C_TOR_INSERTS.MISS_OPCODE.0x181] + [UNC_C_TOR_INSERTS.MISS_OPCODE.0x182] + [UNC_C_TOR_INSERTS.MISS_OPCODE.0x190] + [UNC_C_TOR_INSERTS.MISS_OPCODE.0x191] + [UNC_C_TOR_INSERTS.MISS_OPCODE.0x192] - [UNC_C_TOR_INSERTS.MISS_OPCODE.tid.0x180]) / [instructions]", + "origin": "perfspect" + }, + { + "name": "metric_LLC code read MPI (demand+prefetch)", + "expression": "([UNC_C_TOR_INSERTS.MISS_OPCODE.0x181] + [UNC_C_TOR_INSERTS.MISS_OPCODE.0x191]) / [instructions]" + }, + { + "name": "metric_LLC data read MPI (demand+prefetch)", + "expression": "([UNC_C_TOR_INSERTS.MISS_OPCODE.0x182] + [UNC_C_TOR_INSERTS.MISS_OPCODE.0x192]) / [instructions]" + }, + { + "name": "metric_LLC total HITM (per instr)", + "expression": "[OCR.ALL_READS.L3_MISS.REMOTE_HITM] / [instructions]", + "origin": "perfspect" + }, + { + "name": "metric_LLC total HIT clean line forwards (per instr)", + "expression": "[OCR.ALL_READS.L3_MISS.REMOTE_HIT_FORWARD] / [instructions]", + "origin": "perfspect" + }, + { + "name": "metric_Average LLC data read miss latency (in clks)", + "expression": "[UNC_C_TOR_OCCUPANCY.MISS_OPCODE.0x182] / [UNC_C_TOR_INSERTS.MISS_OPCODE.0x182]", + "origin": "perfspect" + }, + { + "name": "metric_Average LLC data read miss latency (in ns)", + "expression": "(1000000000 * [UNC_C_TOR_OCCUPANCY.MISS_OPCODE.0x182] / [UNC_C_TOR_INSERTS.MISS_OPCODE.0x182]) / ([UNC_C_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) )", + "origin": "perfspect" + }, + { + "name": "metric_Average LLC data read miss latency for LOCAL requests (in ns)", + "expression": "(1000000000 * [UNC_C_TOR_OCCUPANCY.MISS_LOCAL_OPCODE.0x182] / [UNC_C_TOR_INSERTS.MISS_LOCAL_OPCODE.0x182]) / ([UNC_C_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]))", + "origin": "perfspect" + }, + { + "name": "metric_Average LLC data read miss latency for REMOTE requests (in ns)", + "expression": "(1000000000 * [UNC_C_TOR_OCCUPANCY.MISS_REMOTE_OPCODE.0x182] / [UNC_C_TOR_INSERTS.MISS_REMOTE_OPCODE.0x182]) / ([UNC_C_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]))", + "origin": "perfspect" + }, + { + "name": "metric_ITLB MPI", + "expression": "[ITLB_MISSES.WALK_COMPLETED] / [instructions]" + }, + { + "name": "metric_ITLB large page MPI", + "expression": "[ITLB_MISSES.WALK_COMPLETED_2M_4M] / [instructions]" + }, + { + "name": "metric_DTLB load MPI", + "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [instructions]" + }, + { + "name": "metric_DTLB 2MB large page load MPI", + "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [instructions]", + "origin": "perfspect" + }, + { + "name": "metric_DTLB store MPI", + "expression": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [instructions]" + }, + { + "name": "metric_DTLB load miss latency (in core clks)", + "expression": "[DTLB_LOAD_MISSES.WALK_DURATION] / [DTLB_LOAD_MISSES.WALK_COMPLETED]", + "origin": "perfspect" + }, + { + "name": "metric_DTLB store miss latency (in core clks)", + "expression": "[DTLB_STORE_MISSES.WALK_DURATION] / [DTLB_STORE_MISSES.WALK_COMPLETED]", + "origin": "perfspect" + }, + { + "name": "metric_NUMA %_Reads addressed to local DRAM", + "expression": "100 * [UNC_C_TOR_INSERTS.MISS_LOCAL_OPCODE.0x182] / ([UNC_C_TOR_INSERTS.MISS_LOCAL_OPCODE.0x182] + [UNC_C_TOR_INSERTS.MISS_REMOTE_OPCODE.0x182])" + }, + { + "name": "metric_NUMA %_Reads addressed to remote DRAM", + "expression": "100 * [UNC_C_TOR_INSERTS.MISS_REMOTE_OPCODE.0x182] / ([UNC_C_TOR_INSERTS.MISS_LOCAL_OPCODE.0x182] + [UNC_C_TOR_INSERTS.MISS_REMOTE_OPCODE.0x182])" + }, + { + "name": "metric_uncore frequency GHz", + "expression": "([UNC_C_CLOCKTICKS] / ([CORES_PER_SOCKET] * [SOCKET_COUNT]) / 1000000000) / 1" + }, + { + "name": "metric_package power (watts)", + "expression": "[power/energy-pkg/]", + "origin": "perfspect" + }, + { + "name": "metric_DRAM power (watts)", + "expression": "[power/energy-ram/]", + "origin": "perfspect" + }, + { + "name": "metric_core c6 residency %", + "expression": "100 * [cstate_core/c6-residency/] / [TSC]", + "origin": "perfspect" + }, + { + "name": "metric_package c6 residency %", + "expression": "100 * [cstate_pkg/c6-residency/] * [CORES_PER_SOCKET] / [TSC]", + "origin": "perfspect" + }, + { + "name": "metric_memory bandwidth read (MB/sec)", + "expression": "([UNC_M_CAS_COUNT.RD] * 64 / 1000000) / 1" + }, + { + "name": "metric_memory bandwidth write (MB/sec)", + "expression": "([UNC_M_CAS_COUNT.WR] * 64 / 1000000) / 1" + }, + { + "name": "metric_memory bandwidth total (MB/sec)", + "expression": "(([UNC_M_CAS_COUNT.RD] + [UNC_M_CAS_COUNT.WR]) * 64 / 1000000) / 1" + }, + { + "name": "metric_IO_bandwidth_disk_or_network_writes (MB/sec)", + "expression": "([UNC_C_TOR_INSERTS.OPCODE.0x19e] * 64 / 1000000) / 1" + }, + { + "name": "metric_IO_bandwidth_disk_or_network_reads (MB/sec)", + "expression": "(([UNC_C_TOR_INSERTS.OPCODE.0x1c8.tid.0x3e] + [UNC_C_TOR_INSERTS.OPCODE.0x180.tid.0x3e]) * 64 / 1000000) / 1" + }, + { + "name": "metric_TMA_Frontend_Bound(%)", + "expression": "100 * ( [IDQ_UOPS_NOT_DELIVERED.CORE] / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) )" + }, + { + "name": "metric_TMA_..Fetch_Latency(%)", + "expression": "100 * ( ( 4 ) * [IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) )" + }, + { + "name": "metric_TMA_....ICache_Misses(%)", + "expression": "100 * ( [ICACHE.IFDATA_STALL] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....ITLB_Misses(%)", + "expression": "100 * ( ( 14 * [ITLB_MISSES.STLB_HIT] + [ITLB_MISSES.WALK_DURATION:c1] + 7 * [ITLB_MISSES.WALK_COMPLETED] ) / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....Branch_Resteers(%)", + "expression": "100 * ( ( 12 ) * ( [BR_MISP_RETIRED.ALL_BRANCHES] + [MACHINE_CLEARS.COUNT] + [BACLEARS.ANY] ) / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_......Mispredicts_Resteers(%)", + "expression": "100 * ( [BR_MISP_RETIRED.ALL_BRANCHES] * ( ( 12 ) * ( [BR_MISP_RETIRED.ALL_BRANCHES] + [MACHINE_CLEARS.COUNT] + [BACLEARS.ANY] ) / ( [cpu-cycles] ) ) / ( [BR_MISP_RETIRED.ALL_BRANCHES] + [MACHINE_CLEARS.COUNT] + [BACLEARS.ANY] ) )" + }, + { + "name": "metric_TMA_......Clears_Resteers(%)", + "expression": "100 * ( [MACHINE_CLEARS.COUNT] * ( ( 12 ) * ( [BR_MISP_RETIRED.ALL_BRANCHES] + [MACHINE_CLEARS.COUNT] + [BACLEARS.ANY] ) / ( [cpu-cycles] ) ) / ( [BR_MISP_RETIRED.ALL_BRANCHES] + [MACHINE_CLEARS.COUNT] + [BACLEARS.ANY] ) )" + }, + { + "name": "metric_TMA_......Unknown_Branches(%)", + "expression": "100 * ( ( ( 12 ) * ( [BR_MISP_RETIRED.ALL_BRANCHES] + [MACHINE_CLEARS.COUNT] + [BACLEARS.ANY] ) / ( [cpu-cycles] ) ) - ( [BR_MISP_RETIRED.ALL_BRANCHES] * ( ( 12 ) * ( [BR_MISP_RETIRED.ALL_BRANCHES] + [MACHINE_CLEARS.COUNT] + [BACLEARS.ANY] ) / ( [cpu-cycles] ) ) / ( [BR_MISP_RETIRED.ALL_BRANCHES] + [MACHINE_CLEARS.COUNT] + [BACLEARS.ANY] ) ) - ( [MACHINE_CLEARS.COUNT] * ( ( 12 ) * ( [BR_MISP_RETIRED.ALL_BRANCHES] + [MACHINE_CLEARS.COUNT] + [BACLEARS.ANY] ) / ( [cpu-cycles] ) ) / ( [BR_MISP_RETIRED.ALL_BRANCHES] + [MACHINE_CLEARS.COUNT] + [BACLEARS.ANY] ) ) )" + }, + { + "name": "metric_TMA_..Fetch_Bandwidth(%)", + "expression": "100 * ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) ) - ( ( 4 ) * [IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) ) )" + }, + { + "name": "metric_TMA_....MITE(%)", + "expression": "100 * ( ( [IDQ.ALL_MITE_CYCLES_ANY_UOPS] - [IDQ.ALL_MITE_CYCLES_4_UOPS] ) / ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) / 2 )" + }, + { + "name": "metric_TMA_....DSB(%)", + "expression": "100 * ( ( [IDQ.ALL_DSB_CYCLES_ANY_UOPS] - [IDQ.ALL_DSB_CYCLES_4_UOPS] ) / ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) / 2 )" + }, + { + "name": "metric_TMA_Bad_Speculation(%)", + "expression": "100 * ( ( [UOPS_ISSUED.ANY] - ( [UOPS_RETIRED.RETIRE_SLOTS] ) + ( 4 ) * ( ( [INT_MISC.RECOVERY_CYCLES_ANY] / 2 ) if [HYPERTHREADING_ON] else [INT_MISC.RECOVERY_CYCLES] ) ) / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) )" + }, + { + "name": "metric_TMA_..Branch_Mispredicts(%)", + "expression": "100 * ( ( [BR_MISP_RETIRED.ALL_BRANCHES] / ( [BR_MISP_RETIRED.ALL_BRANCHES] + [MACHINE_CLEARS.COUNT] ) ) * ( ( [UOPS_ISSUED.ANY] - ( [UOPS_RETIRED.RETIRE_SLOTS] ) + ( 4 ) * ( ( [INT_MISC.RECOVERY_CYCLES_ANY] / 2 ) if [HYPERTHREADING_ON] else [INT_MISC.RECOVERY_CYCLES] ) ) / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) ) )" + }, + { + "name": "metric_TMA_..Machine_Clears(%)", + "expression": "100 * ( ( ( [UOPS_ISSUED.ANY] - ( [UOPS_RETIRED.RETIRE_SLOTS] ) + ( 4 ) * ( ( [INT_MISC.RECOVERY_CYCLES_ANY] / 2 ) if [HYPERTHREADING_ON] else [INT_MISC.RECOVERY_CYCLES] ) ) / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) ) - ( ( [BR_MISP_RETIRED.ALL_BRANCHES] / ( [BR_MISP_RETIRED.ALL_BRANCHES] + [MACHINE_CLEARS.COUNT] ) ) * ( ( [UOPS_ISSUED.ANY] - ( [UOPS_RETIRED.RETIRE_SLOTS] ) + ( 4 ) * ( ( [INT_MISC.RECOVERY_CYCLES_ANY] / 2 ) if [HYPERTHREADING_ON] else [INT_MISC.RECOVERY_CYCLES] ) ) / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) ) ) )" + }, + { + "name": "metric_TMA_Backend_Bound(%)", + "expression": "100 * ( 1 - ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) ) + ( ( [UOPS_ISSUED.ANY] - ( [UOPS_RETIRED.RETIRE_SLOTS] ) + ( 4 ) * ( ( [INT_MISC.RECOVERY_CYCLES_ANY] / 2 ) if [HYPERTHREADING_ON] else [INT_MISC.RECOVERY_CYCLES] ) ) / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) ) + ( ( [UOPS_RETIRED.RETIRE_SLOTS] ) / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) ) ) )" + }, + { + "name": "metric_TMA_..Memory_Bound(%)", + "expression": "100 * ( ( ( [CYCLE_ACTIVITY.STALLS_MEM_ANY] + [RESOURCE_STALLS.SB] ) / ( ( [CYCLE_ACTIVITY.STALLS_TOTAL] + [UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC] - ( [UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC] if ( ( [instructions] / ( [cpu-cycles] ) ) > 1.8 ) else [UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC] ) - ( [RS_EVENTS.EMPTY_CYCLES] if ( ( ( 4 ) * [IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) ) > 0.1 ) else 0 ) + [RESOURCE_STALLS.SB] ) ) ) * ( 1 - ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) ) + ( ( [UOPS_ISSUED.ANY] - ( [UOPS_RETIRED.RETIRE_SLOTS] ) + ( 4 ) * ( ( [INT_MISC.RECOVERY_CYCLES_ANY] / 2 ) if [HYPERTHREADING_ON] else [INT_MISC.RECOVERY_CYCLES] ) ) / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) ) + ( ( [UOPS_RETIRED.RETIRE_SLOTS] ) / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) ) ) ) )" + }, + { + "name": "metric_TMA_....L1_Bound(%)", + "expression": "100 * ( max( ( [CYCLE_ACTIVITY.STALLS_MEM_ANY] - [CYCLE_ACTIVITY.STALLS_L1D_MISS] ) / ( [cpu-cycles] ) , 0 ) )" + }, + { + "name": "metric_TMA_......DTLB_Load(%)", + "expression": "100 * ( ( ( 8 ) * [DTLB_LOAD_MISSES.STLB_HIT] + [DTLB_LOAD_MISSES.WALK_DURATION:c1] + 7 * [DTLB_LOAD_MISSES.WALK_COMPLETED] ) / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....L2_Bound(%)", + "expression": "100 * ( ( [CYCLE_ACTIVITY.STALLS_L1D_MISS] - [CYCLE_ACTIVITY.STALLS_L2_MISS] ) / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....L3_Bound(%)", + "expression": "100 * ( ( [MEM_LOAD_UOPS_RETIRED.L3_HIT] / ( [MEM_LOAD_UOPS_RETIRED.L3_HIT] + ( 7 ) * [MEM_LOAD_UOPS_RETIRED.L3_MISS] ) ) * [CYCLE_ACTIVITY.STALLS_L2_MISS] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_......Data_Sharing(%)", + "expression": "100 * ( min( ( ( 43 ) * ( [MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT] * ( 1 + [MEM_LOAD_UOPS_RETIRED.HIT_LFB] / ( ( [MEM_LOAD_UOPS_RETIRED.L2_HIT] + [MEM_LOAD_UOPS_RETIRED.L3_HIT] + [MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT] + [MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM] + [MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS] ) + [MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM] + [MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM] + [MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM] + [MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD] ) ) ) / ( [cpu-cycles] ) ) , ( 1 ) ) )" + }, + { + "name": "metric_TMA_....MEM_Bound(%)", + "expression": "100 * (1 - ( [MEM_LOAD_UOPS_RETIRED.L3_HIT] / ([MEM_LOAD_UOPS_RETIRED.L3_HIT] + 7 * [MEM_LOAD_UOPS_RETIRED.L3_MISS])) ) * ([CYCLE_ACTIVITY.STALLS_L2_MISS] / [cpu-cycles])", + "origin": "perfspect" + }, + { + "name": "metric_TMA_......MEM_Bandwidth(%)", + "expression": "100 * ( ( min( [cpu-cycles] , [OFFCORE_REQUESTS_OUTSTANDING.DATA_RD:c4] ) ) / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_......MEM_Latency(%)", + "expression": "100 * ( ( min( [cpu-cycles] , [OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD] ) ) / ( [cpu-cycles] ) - ( ( min( [cpu-cycles] , [OFFCORE_REQUESTS_OUTSTANDING.DATA_RD:c4] ) ) / ( [cpu-cycles] ) ) )" + }, + { + "name": "metric_TMA_....Store_Bound(%)", + "expression": "100 * ( [RESOURCE_STALLS.SB] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_..Core_Bound(%)", + "expression": "100 * ( ( 1 - ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) ) + ( ( [UOPS_ISSUED.ANY] - ( [UOPS_RETIRED.RETIRE_SLOTS] ) + ( 4 ) * ( ( [INT_MISC.RECOVERY_CYCLES_ANY] / 2 ) if [HYPERTHREADING_ON] else [INT_MISC.RECOVERY_CYCLES] ) ) / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) ) + ( ( [UOPS_RETIRED.RETIRE_SLOTS] ) / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) ) ) ) - ( ( ( [CYCLE_ACTIVITY.STALLS_MEM_ANY] + [RESOURCE_STALLS.SB] ) / ( ( [CYCLE_ACTIVITY.STALLS_TOTAL] + [UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC] - ( [UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC] if ( ( [instructions] / ( [cpu-cycles] ) ) > 1.8 ) else [UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC] ) - ( [RS_EVENTS.EMPTY_CYCLES] if ( ( ( 4 ) * [IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) ) > 0.1 ) else 0 ) + [RESOURCE_STALLS.SB] ) ) ) * ( 1 - ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) ) + ( ( [UOPS_ISSUED.ANY] - ( [UOPS_RETIRED.RETIRE_SLOTS] ) + ( 4 ) * ( ( [INT_MISC.RECOVERY_CYCLES_ANY] / 2 ) if [HYPERTHREADING_ON] else [INT_MISC.RECOVERY_CYCLES] ) ) / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) ) + ( ( [UOPS_RETIRED.RETIRE_SLOTS] ) / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) ) ) ) ) )" + }, + { + "name": "metric_TMA_....Ports_Utilization(%)", + "expression": "100 * ( ( ( ( [CYCLE_ACTIVITY.STALLS_TOTAL] + [UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC] - ( [UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC] if ( ( [instructions] / ( [cpu-cycles] ) ) > 1.8 ) else [UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC] ) - ( [RS_EVENTS.EMPTY_CYCLES] if ( ( ( 4 ) * [IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) ) > 0.1 ) else 0 ) + [RESOURCE_STALLS.SB] ) ) - [RESOURCE_STALLS.SB] - [CYCLE_ACTIVITY.STALLS_MEM_ANY] ) / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_......0_Port_Utilized(%)", + "expression": "100 * (([UOPS_EXECUTED.CORE_i1_c1] / [const_thread_count]) if ([const_thread_count] > 1) else ([RS_EVENTS.EMPTY_CYCLES] if ([CYCLE_ACTIVITY.STALLS_TOTAL] - ([IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])) ) > 0.1 else 0)) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]) ", + "origin": "perfspect" + }, + { + "name": "metric_TMA_......1_Port_Utilized(%)", + "expression": "100 * (([UOPS_EXECUTED.CORE_c1] - [UOPS_EXECUTED.CORE_c2]) / [const_thread_count]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])", + "origin": "perfspect" + }, + { + "name": "metric_TMA_......2_Port_Utilized(%)", + "expression": "100 * (([UOPS_EXECUTED.CORE_c2] - [UOPS_EXECUTED.CORE_c3]) / [const_thread_count]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])", + "origin": "perfspect" + }, + { + "name": "metric_TMA_......3m_Ports_Utilized(%)", + "expression": "100 * ([UOPS_EXECUTED.CORE_c3] / [const_thread_count]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])", + "origin": "perfspect" + }, + { + "name": "metric_TMA_Retiring(%)", + "expression": "100 * ( ( [UOPS_RETIRED.RETIRE_SLOTS] ) / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) )" + }, + { + "name": "metric_TMA_..Light_Operations(%)", + "expression": "100 * ( ( ( [UOPS_RETIRED.RETIRE_SLOTS] ) / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) ) - ( ( ( ( [UOPS_RETIRED.RETIRE_SLOTS] ) / [UOPS_ISSUED.ANY] ) * [IDQ.MS_UOPS] / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) ) ) )" + }, + { + "name": "metric_TMA_....FP_Arith(%)", + "expression": "100 * ( ( [INST_RETIRED.X87] * ( ( [UOPS_RETIRED.RETIRE_SLOTS] ) / [instructions] ) / ( [UOPS_RETIRED.RETIRE_SLOTS] ) ) + ( ( [FP_ARITH_INST_RETIRED.SCALAR_SINGLE:u0x03] ) / ( [UOPS_RETIRED.RETIRE_SLOTS] ) ) + ( min( ( ( [FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE:u0x3c] ) / ( [UOPS_RETIRED.RETIRE_SLOTS] ) ) , ( 1 ) ) ) )" + }, + { + "name": "metric_TMA_......FP_Scalar(%)", + "expression": "100 * ( ( [FP_ARITH_INST_RETIRED.SCALAR_SINGLE:u0x03] ) / ( [UOPS_RETIRED.RETIRE_SLOTS] ) )" + }, + { + "name": "metric_TMA_......FP_Vector(%)", + "expression": "100 * ( min( ( ( [FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE:u0x3c] ) / ( [UOPS_RETIRED.RETIRE_SLOTS] ) ) , ( 1 ) ) )" + }, + { + "name": "metric_TMA_..Heavy_Operations(%)", + "expression": "100 * ( ( ( ( [UOPS_RETIRED.RETIRE_SLOTS] ) / [UOPS_ISSUED.ANY] ) * [IDQ.MS_UOPS] / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) ) )" + }, + { + "name": "metric_TMA_..Microcode_Sequencer(%)", + "expression": "100 * (([UOPS_RETIRED.RETIRE_SLOTS] / [UOPS_ISSUED.ANY]) * [IDQ.MS_UOPS] )/ (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]))", + "origin": "perfspect" + } ] \ No newline at end of file diff --git a/events/metric_skx_clx.json b/events/metric_skx_clx.json index 8ac91ba..b59b8cd 100644 --- a/events/metric_skx_clx.json +++ b/events/metric_skx_clx.json @@ -272,6 +272,14 @@ "expression": "100 * ([IDQ_UOPS_NOT_DELIVERED.CORE] - 4 * [IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE]) / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]))", "origin": "perfspect" }, + { + "name": "metric_TMA_....MITE(%)", + "expression": "100 * ( ( [IDQ.ALL_MITE_CYCLES_ANY_UOPS] - [IDQ.ALL_MITE_CYCLES_4_UOPS] ) / ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) / 2 )" + }, + { + "name": "metric_TMA_....DSB(%)", + "expression": "100 * ( ( [IDQ.ALL_DSB_CYCLES_ANY_UOPS] - [IDQ.ALL_DSB_CYCLES_4_UOPS] ) / ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) / 2 )" + }, { "name": "metric_TMA_Bad_Speculation(%)", "expression": "100 * ( ( [UOPS_ISSUED.ANY] - ( [UOPS_RETIRED.RETIRE_SLOTS] ) + ( 4 ) * ( ( [INT_MISC.RECOVERY_CYCLES_ANY] / 2 ) if [HYPERTHREADING_ON] else [INT_MISC.RECOVERY_CYCLES] ) ) / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) )" @@ -301,8 +309,8 @@ "expression": "100 * ( min( ( 9 ) * [DTLB_LOAD_MISSES.STLB_HIT:c1] + [DTLB_LOAD_MISSES.WALK_ACTIVE] , max( [CYCLE_ACTIVITY.CYCLES_MEM_ANY] - [CYCLE_ACTIVITY.CYCLES_L1D_MISS] , 0 ) ) / ( [cpu-cycles] ) )" }, { - "name": "metric_TMA_......Store_Fwd_Blk(%)", - "expression": "100 * ( min( ( 13 * [LD_BLOCKS.STORE_FORWARD] / ( [cpu-cycles] ) ) , ( 1 ) ) )" + "name": "metric_TMA_......Lock_Latency(%)", + "expression": "100 * ( min( ( ( 12 * max( 0 , [MEM_INST_RETIRED.LOCK_LOADS] - [L2_RQSTS.ALL_RFO] ) + ( [MEM_INST_RETIRED.LOCK_LOADS] / [MEM_INST_RETIRED.ALL_STORES] ) * ( ( 11 ) * [L2_RQSTS.RFO_HIT] + ( min( [cpu-cycles] , [OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO] ) ) ) ) / ( [cpu-cycles] ) ) , ( 1 ) ) )" }, { "name": "metric_TMA_....L2_Bound(%)", @@ -313,18 +321,8 @@ "expression": "100 * ( ( [CYCLE_ACTIVITY.STALLS_L2_MISS] - [CYCLE_ACTIVITY.STALLS_L3_MISS] ) / ( [cpu-cycles] ) )" }, { - "name": "metric_TMA_......L3_Latency(%)", - "expression": "100 * (((min([OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD], [cpu-cycles]) - min([OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD], [cpu-cycles])) / [cpu-cycles]) - ((min([OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6], [cpu-cycles]) - min([OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6] , [cpu-cycles])) / [cpu-cycles]))", - "origin": "perfspect" - }, - { - "name": "metric_TMA_......L3_Bandwidth(%)", - "expression": "100 * (min([OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6], [cpu-cycles]) - min([OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6], [cpu-cycles])) / [cpu-cycles]", - "origin": "perfspect" - }, - { - "name": "metric_TMA_......SQ_Full(%)", - "expression": "100 * ( ( ( [OFFCORE_REQUESTS_BUFFER.SQ_FULL] / 2 ) if [HYPERTHREADING_ON] else [OFFCORE_REQUESTS_BUFFER.SQ_FULL] ) / ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) )" + "name": "metric_TMA_......Data_Sharing(%)", + "expression": "100 * ( min( ( ( ( 47.5 * ( ( ( [cpu-cycles] ) / [ref-cycles] ) * [SYSTEM_TSC_FREQ] / ( 1000000000 ) / ( 1000 / 1000 ) ) ) - ( 3.5 * ( ( ( [cpu-cycles] ) / [ref-cycles] ) * [SYSTEM_TSC_FREQ] / ( 1000000000 ) / ( 1000 / 1000 ) ) ) ) * ( [MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT] + [MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM] * ( 1 - ( [OCR.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE] / ( [OCR.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE] + [OCR.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD] ) ) ) ) * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) / 2 ) / ( [cpu-cycles] ) ) , ( 1 ) ) )" }, { "name": "metric_TMA_....MEM_Bound(%)", @@ -346,17 +344,13 @@ "expression": "100 * ( [EXE_ACTIVITY.BOUND_ON_STORES] / ( [cpu-cycles] ) )" }, { - "name": "metric_TMA_......DTLB_Store(%)", - "expression": "100 * ( min( ( ( ( 9 ) * [DTLB_STORE_MISSES.STLB_HIT:c1] + [DTLB_STORE_MISSES.WALK_ACTIVE] ) / ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) , ( 1 ) ) )" + "name": "metric_TMA_......False_Sharing(%)", + "expression": "100 * ( min( ( ( ( 110 * ( ( ( [cpu-cycles] ) / [ref-cycles] ) * [SYSTEM_TSC_FREQ] / ( 1000000000 ) / ( 1000 / 1000 ) ) ) * ( [OCR.DEMAND_RFO.L3_MISS.REMOTE_HITM] + [OCR.PF_L2_RFO.L3_MISS.REMOTE_HITM] ) + ( 47.5 * ( ( ( [cpu-cycles] ) / [ref-cycles] ) * [SYSTEM_TSC_FREQ] / ( 1000000000 ) / ( 1000 / 1000 ) ) ) * ( [OCR.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE] + [OCR.PF_L2_RFO.L3_HIT.HITM_OTHER_CORE] ) ) / ( [cpu-cycles] ) ) , ( 1 ) ) )" }, { "name": "metric_TMA_..Core_Bound(%)", "expression": "100 * ( ( 1 - ( [IDQ_UOPS_NOT_DELIVERED.CORE] / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) ) - ( [UOPS_ISSUED.ANY] + ( 4 ) * ( ( [INT_MISC.RECOVERY_CYCLES_ANY] / 2 ) if [HYPERTHREADING_ON] else [INT_MISC.RECOVERY_CYCLES] ) ) / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) ) - ( ( ( [CYCLE_ACTIVITY.STALLS_MEM_ANY] + [EXE_ACTIVITY.BOUND_ON_STORES] ) / ( [CYCLE_ACTIVITY.STALLS_TOTAL] + ( [EXE_ACTIVITY.1_PORTS_UTIL] + ( ( [UOPS_RETIRED.RETIRE_SLOTS] ) / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) ) * [EXE_ACTIVITY.2_PORTS_UTIL] ) + [EXE_ACTIVITY.BOUND_ON_STORES] ) ) * ( 1 - ( [IDQ_UOPS_NOT_DELIVERED.CORE] / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) ) - ( [UOPS_ISSUED.ANY] + ( 4 ) * ( ( [INT_MISC.RECOVERY_CYCLES_ANY] / 2 ) if [HYPERTHREADING_ON] else [INT_MISC.RECOVERY_CYCLES] ) ) / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) ) ) )" }, - { - "name": "metric_TMA_....Divider(%)", - "expression": "100 * ( [ARITH.DIVIDER_ACTIVE] / ( [cpu-cycles] ) )" - }, { "name": "metric_TMA_....Ports_Utilization(%)", "expression": "100 * ( ( [EXE_ACTIVITY.EXE_BOUND_0_PORTS] + ( [EXE_ACTIVITY.1_PORTS_UTIL] + ( ( [UOPS_RETIRED.RETIRE_SLOTS] ) / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) ) * [EXE_ACTIVITY.2_PORTS_UTIL] ) ) / ( [cpu-cycles] ) if ( [ARITH.DIVIDER_ACTIVE] < ( [CYCLE_ACTIVITY.STALLS_TOTAL] - [CYCLE_ACTIVITY.STALLS_MEM_ANY] ) ) else ( [EXE_ACTIVITY.1_PORTS_UTIL] + ( ( [UOPS_RETIRED.RETIRE_SLOTS] ) / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) ) * [EXE_ACTIVITY.2_PORTS_UTIL] ) / ( [cpu-cycles] ) )" @@ -386,13 +380,28 @@ "expression": "100 * ( ( [UOPS_RETIRED.RETIRE_SLOTS] ) / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) )" }, { - "name": "metric_TMA_..Base(%)", - "expression": "100 * (([UOPS_RETIRED.RETIRE_SLOTS] / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]))) - (([UOPS_RETIRED.RETIRE_SLOTS] / [UOPS_ISSUED.ANY]) * [IDQ.MS_UOPS] / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]))))", - "origin": "perfspect" + "name": "metric_TMA_..Light_Operations(%)", + "expression": "100 * ( ( ( [UOPS_RETIRED.RETIRE_SLOTS] ) / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) ) - ( ( ( [UOPS_RETIRED.RETIRE_SLOTS] ) + [UOPS_RETIRED.MACRO_FUSED] - [instructions] ) / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) ) )" + }, + { + "name": "metric_TMA_....FP_Arith(%)", + "expression": "100 * ( ( ( ( [UOPS_RETIRED.RETIRE_SLOTS] ) / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) ) * [UOPS_EXECUTED.X87] / [UOPS_EXECUTED.THREAD] ) + ( ( [FP_ARITH_INST_RETIRED.SCALAR_SINGLE:u0x03] ) / ( [UOPS_RETIRED.RETIRE_SLOTS] ) ) + ( min( ( ( [FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE:u0xfc] ) / ( [UOPS_RETIRED.RETIRE_SLOTS] ) ) , ( 1 ) ) ) )" + }, + { + "name": "metric_TMA_......FP_Scalar(%)", + "expression": "100 * ( ( [FP_ARITH_INST_RETIRED.SCALAR_SINGLE:u0x03] ) / ( [UOPS_RETIRED.RETIRE_SLOTS] ) )" + }, + { + "name": "metric_TMA_......FP_Vector(%)", + "expression": "100 * ( min( ( ( [FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE:u0xfc] ) / ( [UOPS_RETIRED.RETIRE_SLOTS] ) ) , ( 1 ) ) )" + }, + { + "name": "metric_TMA_..Heavy_Operations(%)", + "expression": "100 * ( ( ( [UOPS_RETIRED.RETIRE_SLOTS] ) + [UOPS_RETIRED.MACRO_FUSED] - [instructions] ) / ( ( 4 ) * ( ( [CPU_CLK_UNHALTED.THREAD_ANY] / 2 ) if [HYPERTHREADING_ON] else ( [cpu-cycles] ) ) ) )" }, { "name": "metric_TMA_..Microcode_Sequencer(%)", "expression": "100 * (([UOPS_RETIRED.RETIRE_SLOTS] / [UOPS_ISSUED.ANY]) * [IDQ.MS_UOPS] / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])))", "origin": "perfspect" } -] +] \ No newline at end of file diff --git a/events/spr.txt b/events/spr.txt index eba75f1..299618c 100644 --- a/events/spr.txt +++ b/events/spr.txt @@ -87,8 +87,8 @@ cpu/event=0x24,umask=0xe2,cmask=0x00,period=2000003,name='L2_RQSTS.ALL_RFO'/, cpu/event=0x24,umask=0xc2,cmask=0x00,period=2000003,name='L2_RQSTS.RFO_HIT'/, cpu/event=0xcf,umask=0x03,cmask=0x00,period=100003,name='FP_ARITH_INST_RETIRED2.SCALAR'/, cpu/event=0xcf,umask=0x1c,cmask=0x00,period=100003,name='FP_ARITH_INST_RETIRED2.VECTOR'/, -cpu/event=0xc7,umask=0x02,umask=0x03,period=100003,name='FP_ARITH_INST_RETIRED.SCALAR_SINGLE:u0x03'/, -cpu/event=0xc7,umask=0x04,umask=0x3c,period=100003,name='FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE:u0x3c'/, +cpu/event=0xc7,umask=0x03,period=100003,name='FP_ARITH_INST_RETIRED.SCALAR_SINGLE:u0x03'/, +cpu/event=0xc7,umask=0x3c,period=100003,name='FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE:u0x3c'/, cpu-cycles, ref-cycles, instructions; diff --git a/perf-collect.py b/perf-collect.py index 5616026..42cc642 100644 --- a/perf-collect.py +++ b/perf-collect.py @@ -11,6 +11,7 @@ import sys import subprocess # nosec import shlex # nosec +import time from argparse import ArgumentParser from src import perf_helpers from src import prepare_perf_events as prep_events @@ -75,9 +76,9 @@ def write_metadata( threadmode = "enabled" if thread else "disabled" socketmode = "enabled" if socket else "disabled" if args.cid is not None: - cgname = "enabled," + perf_helpers.get_comm_from_cid( - args.cid.split(","), cgroups - ) + cgname = "enabled," + for cgroup in cgroups: + cgname += cgroup + "=" + cgroup.replace("/", "-") + "," else: cgname = "disabled" modified.write("cgroups=" + str(cgname) + "\n") @@ -189,9 +190,10 @@ def validate_file(fname): runmode.add_argument( "-c", "--cid", + help="perf-collect on up to 5 cgroups. Provide comma separated cids like e19f4fb59,6edca29db (by default, selects the 5 containers using the most CPU)", type=str, - default=None, - help="perf-collect on selected container ids", + nargs="?", + const="", ) runmode.add_argument( "--thread", help="Collect for thread metrics", action="store_true" @@ -203,8 +205,8 @@ def validate_file(fname): "-m", "--muxinterval", type=int, - default=10, - help="event mux interval in milli seconds, default=10", + default=125, + help="event mux interval in milli seconds, default=125. Lower numbers can cause higher overhead", ) parser.add_argument( "-o", @@ -231,7 +233,18 @@ def validate_file(fname): # disable nmi watchdog before collecting perf nmi_watchdog = perf_helpers.disable_nmi_watchdog() initial_pmus = perf_helpers.pmu_contention_detect() - interval = 1000 + interval = 5000 + + if args.thread: + logging.info("Run mode: thread") + elif args.socket: + logging.info("Run mode: socket") + elif args.pid is not None: + logging.info("Run mode: pid") + elif args.cid is not None: + logging.info("Run mode: cid") + else: + logging.info("Run mode: system") if args.muxinterval > 1000: crash("Input argument muxinterval is too large, max is [1s or 1000ms]") @@ -283,8 +296,7 @@ def validate_file(fname): # parse cgroups cgroups = [] if args.cid is not None: - cgroups = perf_helpers.get_cgroups_from_cids(args.cid.split(",")) - num_cgroups = len(cgroups) + cgroups = perf_helpers.get_cgroups(args.cid) # get perf events to collect collection_events = [] @@ -318,66 +330,28 @@ def validate_file(fname): args.pid is not None or args.cid is not None, ) + if args.thread or args.socket or args.pid is not None or args.cid is not None: + logging.info("Not collecting uncore events in this run mode") + + # build perf stat command collection_type = "-a" if not args.thread and not args.socket else "-a -A" - # start perf stat - if args.pid and args.timeout: - logging.info("Only CPU/core events will be enabled with pid option") - cmd = "perf stat -I %d -x , --pid %s -e %s -o %s sleep %d" % ( - interval, - args.pid, - events, - args.outcsv, - args.timeout, - ) + cmd = f"perf stat -I {interval} -x , {collection_type} -o {args.outcsv}" + if args.pid: + cmd += f" --pid {args.pid}" - elif args.pid: - logging.info("Only CPU/core events will be enabled with pid option") - cmd = "perf stat -I %d -x , --pid %s -e %s -o %s" % ( - interval, - args.pid, - events, - args.outcsv, - ) - elif args.cid and args.timeout: - logging.info("Only CPU/core events will be enabled with cid option") - perf_format = prep_events.get_cgroup_events_format( - cgroups, events, len(collection_events) - ) - cmd = "perf stat -I %d -x , %s -a -o %s sleep %d" % ( - interval, - perf_format, - args.outcsv, - args.timeout, - ) - elif args.cid: - logging.info("Only CPU/core events will be enabled with cid option") + if args.cid is not None: perf_format = prep_events.get_cgroup_events_format( cgroups, events, len(collection_events) ) - cmd = "perf stat -I %d -x , %s -o %s" % (interval, perf_format, args.outcsv) - elif args.app: - cmd = "perf stat %s -I %d -x , -e %s -o %s %s" % ( - collection_type, - interval, - events, - args.outcsv, - args.app, - ) - elif args.timeout: - cmd = "perf stat %s -I %d -x , -e %s -o %s sleep %d" % ( - collection_type, - interval, - events, - args.outcsv, - args.timeout, - ) + cmd += f" {perf_format}" else: - cmd = "perf stat %s -I %d -x , -e %s -o %s" % ( - collection_type, - interval, - events, - args.outcsv, - ) + cmd += f" -e {events}" + + if args.timeout: + cmd += f" sleep {args.timeout}" + elif args.app: + cmd += f" {args.app}" + perfargs = shlex.split(cmd) validate_perfargs(perfargs) perf_helpers.pmu_contention_detect(msrs=initial_pmus, detect=True) @@ -385,7 +359,13 @@ def validate_file(fname): logging.info(cmd) try: logging.info("Collecting perf stat for events in : %s" % eventfilename) + start = time.time() subprocess.call(perfargs) # nosec + end = time.time() + if end - start < 5: + logging.warning( + "PerfSpect was run for less than 5 seconds, some events make be zero because they didn't get scheduled" + ) logging.info("Collection complete! Calculating TSC frequency now") except KeyboardInterrupt: logging.info("Collection stopped! Caculating TSC frequency now") diff --git a/perf-postprocess.py b/perf-postprocess.py index 82bd3b1..c53386a 100644 --- a/perf-postprocess.py +++ b/perf-postprocess.py @@ -447,14 +447,15 @@ def extract_dataframe(perf_data_lines, meta_data, perf_mode): axis=1, ) - # fix metric name X.1, X.2, etc -> just X - perf_data_df["metric"] = perf_data_df.apply( - lambda x: ".".join(x["metric"].split(".")[:-1]) - if len(re.findall(r"^[0-9]*$", x["metric"].split(".")[-1])) > 0 - else x["metric"], - axis=1, - ) - + if perf_mode != Mode.Core and perf_mode != Mode.Socket: + # fix metric name X.1, X.2, etc -> just X + # we don't need this in thread/socket modes + perf_data_df["metric"] = perf_data_df.apply( + lambda x: ".".join(x["metric"].split(".")[:-1]) + if len(re.findall(r"^[0-9]*$", x["metric"].split(".")[-1])) > 0 + else x["metric"], + axis=1, + ) # set data frame types perf_data_df["value"] = pd.to_numeric( perf_data_df["value"], errors="coerce" @@ -505,11 +506,13 @@ def get_event_expression_from_group( "[" + event + "]", str(event_df[0]) ) else: - for index, value in event_df.iterrows(): + for index in event_df.index: + value = event_df["value"][index] if index not in expressions_to_evaluate: expressions_to_evaluate[index] = exp_to_evaluate expressions_to_evaluate[index] = expressions_to_evaluate[index].replace( - "[" + event + "]", str(value[0]) + "[" + event + "]", + str(value), ) return @@ -542,9 +545,6 @@ def generate_metrics_averages( average_metric_file_name = get_extra_out_file(out_file_path, "ca") time_series_df.index.name = "metrics" - # throw out 1st and last datapoints since they tend to be significantly off norm - if len(time_series_df) > 2: - time_series_df = time_series_df.iloc[:, 1:-1] avgcol = time_series_df.mean(numeric_only=True, axis=1).to_frame().reset_index() p95col = time_series_df.quantile(q=0.95, axis=1).to_frame().reset_index() mincol = time_series_df.min(axis=1).to_frame().reset_index() @@ -654,36 +654,59 @@ def generate_metrics( "MULTIPLE GROUPS": set(), } prev_time_slice = 0 + group_to_start_end_indexes = {} for time_slice, item in time_slice_groups: + time_slice_float = float(time_slice) + if time_slice_float - prev_time_slice < 5: + logging.warning("throwing out last sample because it was too short") + continue time_slice_df = time_slice_groups.get_group(time_slice).copy() # normalize by difference between current time slice and previous time slice # this ensures that all our events are per-second, even if perf is collecting # over a longer time slice - time_slice_float = float(time_slice) time_slice_df["value"] = time_slice_df["value"] / ( time_slice_float - prev_time_slice ) - prev_time_slice = time_slice_float current_group_indx = 0 group_to_df = {} - start_index = 0 + start_of_group_index = 0 end_of_group_index = 0 - for index, row in time_slice_df.iterrows(): - if row["metric"] in event_groups["group_" + str(current_group_indx)]: - end_of_group_index += 1 - continue - else: # move to next group - group_to_df["group_" + str(current_group_indx)] = get_group_df( - time_slice_df, start_index, end_of_group_index, perf_mode + if prev_time_slice == 0: # first time slice + for index, row in time_slice_df.iterrows(): + if row["metric"] in event_groups["group_" + str(current_group_indx)]: + end_of_group_index += 1 + continue + else: # move to next group + group_to_df["group_" + str(current_group_indx)] = get_group_df( + time_slice_df, + start_of_group_index, + end_of_group_index, + perf_mode, + ) + group_to_start_end_indexes["group_" + str(current_group_indx)] = ( + start_of_group_index, + end_of_group_index, + ) + start_of_group_index = end_of_group_index + end_of_group_index += 1 + current_group_indx += 1 + # add last group + group_to_df["group_" + str(current_group_indx)] = get_group_df( + time_slice_df, start_of_group_index, time_slice_df.shape[0], perf_mode + ) + group_to_start_end_indexes["group_" + str(current_group_indx)] = ( + start_of_group_index, + time_slice_df.shape[0], + ) + else: # use same start & end indexes from first time slice + for group_id in group_to_start_end_indexes: + start_of_group_index = group_to_start_end_indexes[group_id][0] + end_of_group_index = group_to_start_end_indexes[group_id][1] + group_to_df[group_id] = get_group_df( + time_slice_df, start_of_group_index, end_of_group_index, perf_mode ) - start_index = end_of_group_index - end_of_group_index += 1 - current_group_indx += 1 - # add last group - group_to_df["group_" + str(current_group_indx)] = get_group_df( - time_slice_df, start_index, time_slice_df.shape[0], perf_mode - ) + prev_time_slice = time_slice_float metrics_results = {} for m in metrics: non_constant_events = [] @@ -776,9 +799,12 @@ def generate_metrics( + '"' ) errors["MISSING EVENTS"].add(m["name"]) - continue # skip metric + continue time_metrics_result[time_slice] = metrics_results - time_series_df = pd.DataFrame(time_metrics_result) + time_series_df = pd.DataFrame(time_metrics_result).reindex( + index=list(time_metrics_result[list(time_metrics_result.keys())[0]].keys()) + ) + if verbose: for error in errors: logging.warning( diff --git a/src/perf_helpers.py b/src/perf_helpers.py index 35f5332..78ac091 100644 --- a/src/perf_helpers.py +++ b/src/perf_helpers.py @@ -8,7 +8,6 @@ import collections import fnmatch import logging -import math import os import re import struct @@ -289,8 +288,7 @@ def get_cpuinfo(): def get_lscpu(): cpuinfo = {} try: - lscpu = subprocess.check_output(["lscpu"], universal_newlines=True) # nosec - # print(lscpu.split("\n")) + lscpu = subprocess.check_output(["lscpu"]).decode() # nosec lscpu = [i for i in lscpu.split("\n") if i] for prop in lscpu: key, value = prop.split(":") @@ -304,9 +302,7 @@ def get_lscpu(): # get supported perf events def get_perf_list(): try: - perf_list = subprocess.check_output( # nosec - ["perf", "list"], universal_newlines=True - ) + perf_list = subprocess.check_output(["perf", "list"]).decode() # nosec except FileNotFoundError: crash("Please install Linux perf and re-run") except subprocess.CalledProcessError as e: @@ -395,22 +391,6 @@ def check_file_writeable(outfile): return os.access(dirname, os.W_OK) -# Find the percentile of a list of values -# parameter percent - a float value from 0.0 to 1.0 -def percentile(N, percent): - if not N: - return None - N.sort() - k = (len(N) - 1) * percent - f = math.floor(k) - c = math.ceil(k) - if f == c: - return N[int(k)] - d0 = N[int(f)] * (c - k) - d1 = N[int(c)] * (k - f) - return d0 + d1 - - # convert time to epoch def get_epoch(start_time): words = "".join(start_time).split() @@ -431,44 +411,67 @@ def get_epoch(start_time): return epoch -# get cgroup names by container ids -def get_cgroups_from_cids(cids): - # cgroups is a set to exclude duplicate cids - cgroups = set() +# get cgroups +def get_cgroups(cid): + cids = cid.split(",") try: - p = subprocess.Popen( - ["ps", "-e", "-o", "cgroup"], stdout=subprocess.PIPE, stderr=subprocess.PIPE - ) - p2 = subprocess.Popen( - ["grep", "docker-"], - stdin=p.stdout, + stat = subprocess.Popen( + ["stat", "-fc", "%T", "/sys/fs/cgroup/"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) - p.stdout.close() + except subprocess.SubprocessError as e: + crash( + "Cannot determine cgroup version. failed to open stat subprocess: " + str(e) + ) + out, err = stat.communicate() + out = out.decode("utf-8").strip() + if out == "tmpfs": + logging.info("cgroup v1 detected") + elif out == "cgroup2fs": + logging.info("cgroup v2 detected") + else: + logging.info("unknown cgroup version " + out) + try: + p = subprocess.Popen( + ["ps", "-a", "-x", "-o", "cgroup", "--sort=-%cpu"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) except subprocess.SubprocessError as e: crash("failed to open ps subprocess: " + str(e)) - out, err = p2.communicate() + out, err = p.communicate() if err: crash(f"error reading cgroups: {err}") - lines = out.decode("utf-8").split("\n") - for cid in cids: - found = False - for line in lines: - if ("docker-" + cid) in line: - found = True - cgroups.add(line.split(":")[-1]) - if not found: - crash("invalid container ID: " + cid) - # change cgroups back to list brefore returning - return list(cgroups) - - -# Convert cids to comm/names -# Requires pstools python library -def get_comm_from_cid(cids, cgroups): - cnamelist = "" - for index, cid in enumerate(cids): - cnamelist += cgroups[index] + "=" + cid + "," - return cnamelist + + cgroups = [ + *dict.fromkeys( + filter( + lambda x: ( # must be container runtime + "docker" in x or "containerd" in x + ) + and x.endswith(".scope") # don't include services + and ( # select all or provided cids + len(cids) == 0 or any(map(lambda y: y in x, cids)) + ), + map( + lambda x: x.split(":")[-1], # get trailing cgroup name + filter( # remove extraneous lines + lambda x: x != "" and x != "CGROUP" and x != "-", + out.decode("utf-8").split("\n"), + ), + ), + ) + ) + ] + if len(cgroups) == 0: + crash("no matching cgroups found") + elif len(cgroups) > 5: + logging.warning( + "more than 5 matching cgroups found removing: " + str(cgroups[5:]) + ) + cgroups = cgroups[:5] + for c in cgroups: + logging.info("attaching to cgroup: " + c) + return cgroups diff --git a/src/prepare_perf_events.py b/src/prepare_perf_events.py index dd2230c..710cc2b 100644 --- a/src/prepare_perf_events.py +++ b/src/prepare_perf_events.py @@ -99,17 +99,14 @@ def enumerate_uncore(group, pattern, n, default_range=True): def get_cgroup_events_format(cgroups, events, num_events): eventlist = "" grouplist = "" - # Find total number of cgroups - num_cgroups = len(cgroups) - # cgroups = cgroups.split(",") # "-e" flags: Create event groups as many number of cgroups - for i in range(num_cgroups): + for _ in range(len(cgroups)): eventlist += " -e " + events # "-G" flags: Repeat cgroup name for as many events in each event group for cgroup in cgroups: grouplist = grouplist.rstrip(",") + " -G " - for i in range(num_events): + for _ in range(num_events): grouplist += cgroup + "," perf_format = eventlist + grouplist.rstrip(",")