diff --git a/_version.txt b/_version.txt index c04c650..db6fb4a 100644 --- a/_version.txt +++ b/_version.txt @@ -1 +1 @@ -1.2.7 +1.2.8 diff --git a/events/bdx.txt b/events/bdx.txt index bd8f4ed..756a58f 100644 --- a/events/bdx.txt +++ b/events/bdx.txt @@ -22,7 +22,7 @@ ref-cycles, instructions; cpu/event=0x85,umask=0x0e,period=100003,name='ITLB_MISSES.WALK_COMPLETED'/, -cpu/event=0x85,umask=0x04,period=100003,name='ITLB_MISSES.ITLB_MISSES.WALK_COMPLETED_2M_4M'/, +cpu/event=0x85,umask=0x04,period=100003,name='ITLB_MISSES.WALK_COMPLETED_2M_4M'/, cpu/event=0x85,umask=0x10,cmask=1,period=100003,name='ITLB_MISSES.WALK_DURATION_c1'/, cpu/event=0x85,umask=0x60,period=100003,name='ITLB_MISSES.STLB_HIT'/, cpu-cycles, @@ -92,6 +92,7 @@ cpu/event=0x79,umask=0x30,name='IDQ.MS_SWITCHES'/, cpu-cycles, instructions; +cpu/event=0x14,umask=0x01,period=2000003,name='ARITH.FPU_DIV_ACTIVE'/, cpu/event=0xc5,umask=0x00,name='BR_MISP_RETIRED.ALL_BRANCHES'/, cpu/event=0xc3,umask=0x01,name='MACHINE_CLEARS.COUNT'/, cpu/event=0xe6,umask=0x1f,name='BACLEARS.ANY'/, @@ -106,6 +107,7 @@ cpu-cycles, ref-cycles, instructions; +cpu/event=0x49,umask=0x60,period=100003,name='DTLB_STORE_MISSES.STLB_HIT'/, cpu/event=0x08,umask=0x0e,period=100003,name='DTLB_LOAD_MISSES.WALK_COMPLETED'/, cpu/event=0x08,umask=0x10,period=100003,name='DTLB_LOAD_MISSES.WALK_DURATION'/, cpu/event=0x08,umask=0x04,period=2000003,name='DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M'/, diff --git a/events/clx.txt b/events/clx_skx.txt similarity index 98% rename from events/clx.txt rename to events/clx_skx.txt index 17ad9ca..59f51d6 100644 --- a/events/clx.txt +++ b/events/clx_skx.txt @@ -13,6 +13,8 @@ cpu-cycles, ref-cycles, instructions; + +cpu/event=0x85,umask=0x04,period=100003,name='ITLB_MISSES.WALK_COMPLETED_2M_4M'/, cpu/event=0xf1,umask=0x1f,period=100003,name='L2_LINES_IN.ALL'/, cpu/event=0xd1,umask=0x10,period=50021,name='MEM_LOAD_RETIRED.L2_MISS'/, cpu/event=0x24,umask=0x24,period=200003,name='L2_RQSTS.CODE_RD_MISS'/, @@ -32,6 +34,7 @@ instructions:k; cstate_core/c6-residency/; cstate_pkg/c6-residency/; +cpu/event=0xb0,umask=0x10,period=100003,name='OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD'/, cpu/event=0xa8,umask=0x01,period=2000003,name='LSD.UOPS'/, cpu-cycles, ref-cycles, diff --git a/events/icx.txt b/events/icx.txt index cbcb611..4a42b16 100644 --- a/events/icx.txt +++ b/events/icx.txt @@ -122,6 +122,9 @@ ref-cycles; cpu/event=0xb7,umask=0x01,offcore_rsp=0x104000477,name='OCR.READS_TO_CORE.LOCAL_DRAM'/, cpu/event=0xb7,umask=0x01,offcore_rsp=0x84002380,name='OCR.HWPF_L3.L3_MISS_LOCAL'/; +cpu/event=0xb7,umask=0x01,offcore_rsp=0x1030000477,name='OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM'/, +cpu/event=0xb7,umask=0x01,offcore_rsp=0x830000477,name='OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD'/; + # OCR group 2 (ICX PMU supports a maximum of two OCR counters per group) cpu/event=0xb7,umask=0x01,offcore_rsp=0x730000477,name='OCR.READS_TO_CORE.REMOTE_DRAM'/, cpu/event=0xb7,umask=0x01,offcore_rsp=0x90002380,name='OCR.HWPF_L3.REMOTE'/; @@ -151,15 +154,20 @@ upi/event=0x2,umask=0xf,name='UNC_UPI_TxL_FLITS.ALL_DATA'/, upi/event=0x2,umask=0x97,name='UNC_UPI_TxL_FLITS.NON_DATA'/, upi/event=0x1,umask=0x0,name='UNC_UPI_CLOCKTICKS'/; +cha/event=0x35,umask=0xC8177E01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE'/, +cha/event=0x36,umask=0xc8177e01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE'/; + cha/event=0x35,umask=0xc88ffe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF'/, cha/event=0x35,umask=0xc80ffe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_CRD'/; -cha/event=0x35,umask=0xc897fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF'/, +cha/event=0x35,umask=0xC816FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL'/, +cha/event=0x36,umask=0xC816FE01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL'/; + +cha/event=0x35,umask=0xccd7fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA'/, cha/event=0x35,umask=0xc817fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD'/, -cha/event=0x35,umask=0xccd7fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA'/; +cha/event=0x35,umask=0xc897fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF'/, +cha/event=0x36,umask=0xC817FE01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD'/; -cha/event=0x35,umask=0xC816FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL'/, -cha/event=0x35,umask=0xC8177E01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE'/, cha/event=0x35,umask=0xC896FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL'/, cha/event=0x35,umask=0xC8977E01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE'/; diff --git a/events/metric_bdx.json b/events/metric_bdx.json index c14b6b4..0f6758b 100644 --- a/events/metric_bdx.json +++ b/events/metric_bdx.json @@ -1,15 +1,15 @@ [ { "name": "metric_CPU operating frequency (in GHz)", - "expression": "([cpu-cycles] / [ref-cycles]) * ([const_tsc_freq] / 1000000000)" + "expression": "([cpu-cycles] / [ref-cycles]) * ([SYSTEM_TSC_FREQ] / 1000000000)" }, { "name": "metric_CPU utilization %", - "expression": "100 * [ref-cycles] / [const_TSC]" + "expression": "100 * [ref-cycles] / [TSC]" }, { "name": "metric_CPU utilization% in kernel mode", - "expression": "100 * [ref-cycles:k] / [const_TSC]" + "expression": "100 * [ref-cycles:k] / [TSC]" }, { "name": "metric_CPI", @@ -74,15 +74,15 @@ }, { "name": "metric_Average LLC data read miss latency (in ns)", - "expression": "(1000000000 * [UNC_C_TOR_OCCUPANCY.MISS_OPCODE.0x182] / [UNC_C_TOR_INSERTS.MISS_OPCODE.0x182]) / ([UNC_C_CLOCKTICKS] / ([const_cha_count] * [const_socket_count]) )" + "expression": "(1000000000 * [UNC_C_TOR_OCCUPANCY.MISS_OPCODE.0x182] / [UNC_C_TOR_INSERTS.MISS_OPCODE.0x182]) / ([UNC_C_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) )" }, { "name": "metric_Average LLC data read miss latency for LOCAL requests (in ns)", - "expression": "(1000000000 * [UNC_C_TOR_OCCUPANCY.MISS_LOCAL_OPCODE.0x182] / [UNC_C_TOR_INSERTS.MISS_LOCAL_OPCODE.0x182]) / ([UNC_C_CLOCKTICKS] / ([const_cha_count] * [const_socket_count]))" + "expression": "(1000000000 * [UNC_C_TOR_OCCUPANCY.MISS_LOCAL_OPCODE.0x182] / [UNC_C_TOR_INSERTS.MISS_LOCAL_OPCODE.0x182]) / ([UNC_C_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]))" }, { "name": "metric_Average LLC data read miss latency for REMOTE requests (in ns)", - "expression": "(1000000000 * [UNC_C_TOR_OCCUPANCY.MISS_REMOTE_OPCODE.0x182] / [UNC_C_TOR_INSERTS.MISS_REMOTE_OPCODE.0x182]) / ([UNC_C_CLOCKTICKS] / ([const_cha_count] * [const_socket_count]))" + "expression": "(1000000000 * [UNC_C_TOR_OCCUPANCY.MISS_REMOTE_OPCODE.0x182] / [UNC_C_TOR_INSERTS.MISS_REMOTE_OPCODE.0x182]) / ([UNC_C_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]))" }, { "name": "metric_ITLB MPI", @@ -122,7 +122,7 @@ }, { "name": "metric_uncore frequency GHz", - "expression": "[UNC_C_CLOCKTICKS] / ([const_core_count] * [const_socket_count]) / 1000000000" + "expression": "[UNC_C_CLOCKTICKS] / ([CORES_PER_SOCKET] * [SOCKET_COUNT]) / 1000000000" }, { "name": "metric_package power (watts)", @@ -134,11 +134,11 @@ }, { "name": "metric_core c6 residency %", - "expression": "100 * [cstate_core/c6-residency/] / [const_TSC]" + "expression": "100 * [cstate_core/c6-residency/] / [TSC]" }, { "name": "metric_package c6 residency %", - "expression": "100 * [cstate_pkg/c6-residency/] * [const_core_count] / [const_TSC]" + "expression": "100 * [cstate_pkg/c6-residency/] * [CORES_PER_SOCKET] / [TSC]" }, { "name": "metric_memory bandwidth read (MB/sec)", diff --git a/events/metric_icx.json b/events/metric_icx.json index 58a0465..6957f9a 100644 --- a/events/metric_icx.json +++ b/events/metric_icx.json @@ -1,15 +1,15 @@ [ { "name": "metric_CPU operating frequency (in GHz)", - "expression": "([cpu-cycles] / [ref-cycles]) * ([const_tsc_freq] / 1000000000)" + "expression": "([cpu-cycles] / [ref-cycles]) * ([SYSTEM_TSC_FREQ] / 1000000000)" }, { "name": "metric_CPU utilization %", - "expression": "100 * [ref-cycles] / [const_TSC]" + "expression": "100 * [ref-cycles] / [TSC]" }, { "name": "metric_CPU utilization% in kernel mode", - "expression": "100 * [ref-cycles:k] / [const_TSC]" + "expression": "100 * [ref-cycles:k] / [TSC]" }, { "name": "metric_CPI", @@ -74,11 +74,11 @@ }, { "name": "metric_core c6 residency %", - "expression": "100 * [cstate_core/c6-residency/] / [const_TSC]" + "expression": "100 * [cstate_core/c6-residency/] / [TSC]" }, { "name": "metric_package c6 residency %", - "expression": "100 * [cstate_pkg/c6-residency/] * [const_core_count] / [const_TSC]" + "expression": "100 * [cstate_pkg/c6-residency/] * [CORES_PER_SOCKET] / [TSC]" }, { "name": "metric_core % cycles in non AVX license", @@ -112,22 +112,6 @@ "name": "metric_memory bandwidth total (MB/sec)", "expression": "([UNC_M_CAS_COUNT.RD] + [UNC_M_CAS_COUNT.WR]) * 64 / 1000000" }, - { - "name": "metric_DCPMEM_memory_mode near memory cache read miss rate%", - "expression": "100 * ([UNC_M_TAGCHK.MISS_CLEAN] + [UNC_M_TAGCHK.MISS_DIRTY]) / ([UNC_M_TAGCHK.HIT] + [UNC_M_TAGCHK.MISS_CLEAN] + [UNC_M_TAGCHK.MISS_DIRTY])" - }, - { - "name": "metric_3DXP_memory bandwidth read (MB/sec)", - "expression": "[UNC_M_PMM_RPQ_INSERTS] * 64 / 1000000" - }, - { - "name": "metric_3DXP_memory bandwidth write (MB/sec)", - "expression": "[UNC_M_PMM_WPQ_INSERTS] * 64 / 1000000" - }, - { - "name": "metric_3DXP_memory bandwidth total (MB/sec)", - "expression": "([UNC_M_PMM_RPQ_INSERTS] + [UNC_M_PMM_WPQ_INSERTS]) * 64 / 1000000" - }, { "name": "metric_LLC code read MPI (demand+prefetch)", "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_CRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF]) / [instructions]" @@ -146,15 +130,15 @@ }, { "name": "metric_Average LLC demand data read miss latency (in ns)", - "expression": "(1000000000 * [UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD]) / ([UNC_CHA_CLOCKTICKS] / ([const_cha_count] * [const_socket_count]))" + "expression": "(1000000000 * [UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]))" }, { "name": "metric_Average LLC demand data read miss latency for LOCAL requests (in ns)", - "expression": "(1000000000 * [UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL]) / ([UNC_CHA_CLOCKTICKS] / ([const_cha_count] * [const_socket_count]))" + "expression": "(1000000000 * [UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]))" }, { "name": "metric_Average LLC demand data read miss latency for REMOTE requests (in ns)", - "expression": "(1000000000 * [UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE]) / ([UNC_CHA_CLOCKTICKS] / ([const_cha_count] * [const_socket_count]))" + "expression": "(1000000000 * [UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]))" }, { "name": "metric_ITLB (2nd level) MPI", @@ -182,7 +166,7 @@ }, { "name": "metric_uncore frequency GHz", - "expression": "[UNC_CHA_CLOCKTICKS] / ([const_cha_count] * [const_socket_count]) / 1000000000" + "expression": "[UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) / 1000000000" }, { "name": "metric_TMA_Frontend_Bound(%)", @@ -262,15 +246,15 @@ }, { "name": "metric_TMA_......Contested_Accesses(%)", - "expression": "100 * ((((48 * (([cpu-cycles] / [ref-cycles]) * [const_tsc_freq] / 1000000000)) -(4 * (([cpu-cycles] / [ref-cycles]) * [const_tsc_freq] / 1000000000))) * ([MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD] * ([OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM] / ([OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM] + [OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD]))) + ((47.5 * (([cpu-cycles] / [ref-cycles]) * [const_tsc_freq] / 1000000000)) - (4 * (([cpu-cycles] / [ref-cycles]) * [const_tsc_freq] / 1000000000))) * [MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS]) * (1 + ([MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS]) / 2) / [cpu-cycles])" + "expression": "100 * ((((48 * (([cpu-cycles] / [ref-cycles]) * [SYSTEM_TSC_FREQ] / 1000000000)) -(4 * (([cpu-cycles] / [ref-cycles]) * [SYSTEM_TSC_FREQ] / 1000000000))) * ([MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD] * ([OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM] / ([OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM] + [OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD]))) + ((47.5 * (([cpu-cycles] / [ref-cycles]) * [SYSTEM_TSC_FREQ] / 1000000000)) - (4 * (([cpu-cycles] / [ref-cycles]) * [SYSTEM_TSC_FREQ] / 1000000000))) * [MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS]) * (1 + ([MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS]) / 2) / [cpu-cycles])" }, { "name": "metric_TMA_......Data_Sharing(%)", - "expression": "100 * (((47.5 * (([cpu-cycles] / [ref-cycles]) * [const_tsc_freq] / 1000000000)) - (4 * (([cpu-cycles] / [ref-cycles]) * [const_tsc_freq] / 1000000000))) * ([MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD] + [MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD] * (1 - ([OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM] / ([OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM] + [OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD])))) * (1 + ([MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS]) / 2) / [cpu-cycles])" + "expression": "100 * (((47.5 * (([cpu-cycles] / [ref-cycles]) * [SYSTEM_TSC_FREQ] / 1000000000)) - (4 * (([cpu-cycles] / [ref-cycles]) * [SYSTEM_TSC_FREQ] / 1000000000))) * ([MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD] + [MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD] * (1 - ([OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM] / ([OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM] + [OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD])))) * (1 + ([MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS]) / 2) / [cpu-cycles])" }, { "name": "metric_TMA_......L3_Hit_Latency(%)", - "expression": "100 * (((23 * (([cpu-cycles] / [ref-cycles]) * [const_tsc_freq] / 1000000000)) - (4 * (([cpu-cycles] / [ref-cycles]) * [const_tsc_freq] / 1000000000))) * [MEM_LOAD_RETIRED.L3_HIT] * (1 + ([MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS]) / 2) / [cpu-cycles])" + "expression": "100 * (((23 * (([cpu-cycles] / [ref-cycles]) * [SYSTEM_TSC_FREQ] / 1000000000)) - (4 * (([cpu-cycles] / [ref-cycles]) * [SYSTEM_TSC_FREQ] / 1000000000))) * [MEM_LOAD_RETIRED.L3_HIT] * (1 + ([MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS]) / 2) / [cpu-cycles])" }, { "name": "metric_TMA_......SQ_Full(%)", @@ -330,7 +314,7 @@ }, { "name": "metric_TMA_....Microcode_Sequencer(%)", - "expression": "100 * (((([PERF_METRICS.RETIRING] / (([PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [topdown-be-found]))) * [TOPDOWN.SLOTS]) / [UOPS_ISSUED.ANY]) * [IDQ.MS_UOPS] / [TOPDOWN.SLOTS])" + "expression": "100 * (((([PERF_METRICS.RETIRING] / (([PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND]))) * [TOPDOWN.SLOTS]) / [UOPS_ISSUED.ANY]) * [IDQ.MS_UOPS] / [TOPDOWN.SLOTS])" }, { "name": "metric_TMA_Info_CoreIPC", @@ -338,6 +322,6 @@ }, { "name": "metric_TMA_Info_System_SMT_2T_Utilization", - "expression": "(1 - [CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE] / [CPU_CLK_UNHALTED.REF_DISTRIBUTED]) if [const_socket_count] > 1 else 0" + "expression": "(1 - [CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE] / [CPU_CLK_UNHALTED.REF_DISTRIBUTED]) if [SOCKET_COUNT] > 1 else 0" } ] \ No newline at end of file diff --git a/events/metric_skx_clx.json b/events/metric_skx_clx.json index a7e4260..2b207e8 100644 --- a/events/metric_skx_clx.json +++ b/events/metric_skx_clx.json @@ -1,15 +1,15 @@ [ { "name": "metric_CPU operating frequency (in GHz)", - "expression": "([cpu-cycles] / [ref-cycles]) * ([const_tsc_freq] / 1000000000)" + "expression": "([cpu-cycles] / [ref-cycles]) * ([SYSTEM_TSC_FREQ] / 1000000000)" }, { "name": "metric_CPU utilization %", - "expression": "100 * [ref-cycles] / [const_TSC]" + "expression": "100 * [ref-cycles] / [TSC]" }, { "name": "metric_CPU utilization% in kernel mode", - "expression": "100 * [ref-cycles:k] / [const_TSC]" + "expression": "100 * [ref-cycles:k] / [TSC]" }, { "name": "metric_CPI", @@ -74,15 +74,15 @@ }, { "name": "metric_Average LLC data read miss latency (in ns)", - "expression": "(1000000000 * [UNC_CHA_TOR_OCCUPANCY.IA_MISS.0x40433] / [UNC_CHA_TOR_INSERTS.IA_MISS.0x40433]) / ( [UNC_CHA_CLOCKTICKS] / ([const_cha_count] * [const_socket_count]) )" + "expression": "(1000000000 * [UNC_CHA_TOR_OCCUPANCY.IA_MISS.0x40433] / [UNC_CHA_TOR_INSERTS.IA_MISS.0x40433]) / ( [UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) )" }, { "name": "metric_Average LLC data read miss latency for LOCAL requests (in ns)", - "expression": "(1000000000 * [UNC_CHA_TOR_OCCUPANCY.IA_MISS.0x40432] / [UNC_CHA_TOR_INSERTS.IA_MISS.0x40432]) / ( [UNC_CHA_CLOCKTICKS] / ([const_cha_count] * [const_socket_count]) )" + "expression": "(1000000000 * [UNC_CHA_TOR_OCCUPANCY.IA_MISS.0x40432] / [UNC_CHA_TOR_INSERTS.IA_MISS.0x40432]) / ( [UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) )" }, { "name": "metric_Average LLC data read miss latency for REMOTE requests (in ns)", - "expression": "(1000000000 * [UNC_CHA_TOR_OCCUPANCY.IA_MISS.0x40431] / [UNC_CHA_TOR_INSERTS.IA_MISS.0x40431]) / ( [UNC_CHA_CLOCKTICKS] / ([const_cha_count] * [const_socket_count]) )" + "expression": "(1000000000 * [UNC_CHA_TOR_OCCUPANCY.IA_MISS.0x40431] / [UNC_CHA_TOR_INSERTS.IA_MISS.0x40431]) / ( [UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) )" }, { "name": "metric_ITLB MPI", @@ -138,11 +138,11 @@ }, { "name": "metric_UPI Transmit utilization_% (includes control)", - "expression": "100 * (([UNC_UPI_TxL_FLITS.ALL_DATA] + [UNC_UPI_TxL_FLITS.NON_DATA]) / 3) / ((((([const_tsc_freq] / ([const_cha_count] * [const_thread_count])) / (([const_tsc_freq] / ([const_cha_count] * [const_thread_count])) - [cstate_pkg/c6-residency/])) * ([UNC_UPI_CLOCKTICKS] - [UNC_UPI_L1_POWER_CYCLES])) * 5 / 6))" + "expression": "100 * (([UNC_UPI_TxL_FLITS.ALL_DATA] + [UNC_UPI_TxL_FLITS.NON_DATA]) / 3) / ((((([SYSTEM_TSC_FREQ] / ([CHAS_PER_SOCKET] * [const_thread_count])) / (([SYSTEM_TSC_FREQ] / ([CHAS_PER_SOCKET] * [const_thread_count])) - [cstate_pkg/c6-residency/])) * ([UNC_UPI_CLOCKTICKS] - [UNC_UPI_L1_POWER_CYCLES])) * 5 / 6))" }, { "name": "metric_uncore frequency GHz", - "expression": "[UNC_CHA_CLOCKTICKS] / ([const_cha_count] * [const_socket_count]) / 1000000000" + "expression": "[UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) / 1000000000" }, { "name": "metric_package power (watts)", @@ -154,11 +154,11 @@ }, { "name": "metric_core c6 residency %", - "expression": "100 * [cstate_core/c6-residency/] / [const_TSC]" + "expression": "100 * [cstate_core/c6-residency/] / [TSC]" }, { "name": "metric_package c6 residency %", - "expression": "100 * [cstate_pkg/c6-residency/] * [const_core_count] / [const_TSC]" + "expression": "100 * [cstate_pkg/c6-residency/] * [CORES_PER_SOCKET] / [TSC]" }, { "name": "metric_core % cycles in non AVX license", @@ -192,30 +192,6 @@ "name": "metric_memory bandwidth total (MB/sec)", "expression": "([UNC_M_CAS_COUNT.RD] + [UNC_M_CAS_COUNT.WR]) * 64 / 1000000" }, - { - "name": "metric_DCPMEM_memory_mode near memory cache read miss rate%", - "expression": "100 * ([UNC_M_PMM_RPQ_INSERTS] / ([UNC_M2M_TAG_HIT.NM_RD_HIT_CLEAN] + [UNC_M2M_TAG_HIT.NM_RD_HIT_DIRTY] + [UNC_M_PMM_RPQ_INSERTS]))" - }, - { - "name": "metric_3DXP_memory bandwidth read (MB/sec)", - "expression": "[UNC_M_PMM_RPQ_INSERTS] * 64 / 1000000" - }, - { - "name": "metric_3DXP_memory bandwidth write (MB/sec)", - "expression": "[UNC_M_PMM_WPQ_INSERTS] * 64 / 1000000" - }, - { - "name": "metric_3DXP_memory bandwidth total (MB/sec)", - "expression": "([UNC_M_PMM_RPQ_INSERTS] + [UNC_M_PMM_WPQ_INSERTS]) * 64 / 1000000" - }, - { - "name": "metric_3DXP memory RPQ read latency (ns)", - "expression": "(([UNC_M_PMM_RPQ_OCCUPANCY.ALL] / [UNC_M_PMM_RPQ_INSERTS]) / ([UNC_M_CLOCKTICKS] / ([const_socket_count] * 6))) * 1000000000" - }, - { - "name": "metric_3DXP memory WPQ write latency (ns)", - "expression": "(([UNC_M_PMM_WPQ_OCCUPANCY.ALL] / [UNC_M_PMM_WPQ_INSERTS]) / ([UNC_M_CLOCKTICKS] / ([const_socket_count] * 6))) * 1000000000" - }, { "name": "metric_IO_bandwidth_disk_or_network_writes (MB/sec)", "expression": "([UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0] + [UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1] + [UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2] + [UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3]) * 4 / 1000000" diff --git a/events/metric_spr.json b/events/metric_spr.json index ad5b0af..08c89c4 100644 --- a/events/metric_spr.json +++ b/events/metric_spr.json @@ -1,15 +1,16 @@ [ { "name": "metric_CPU operating frequency (in GHz)", - "expression": "([cpu-cycles] / [ref-cycles]) * ([const_tsc_freq] / 1000000000)" + "expression": "(([cpu-cycles] / [ref-cycles] * [SYSTEM_TSC_FREQ]) / 1000000000)" }, { "name": "metric_CPU utilization %", - "expression": "100 * [ref-cycles] / [const_TSC]" + "expression": "100 * [ref-cycles] / [TSC]" }, { "name": "metric_CPU utilization% in kernel mode", - "expression": "100 * [ref-cycles:k] / [const_TSC]" + "expression": "100 * [ref-cycles:k] / [TSC]", + "origin": "perfspect" }, { "name": "metric_CPI", @@ -17,19 +18,21 @@ }, { "name": "metric_kernel_CPI", - "expression": "[cpu-cycles:k] / [instructions:k]" + "expression": "[cpu-cycles:k] / [instructions:k]", + "origin": "perfspect" }, { "name": "metric_IPC", - "expression": "[instructions] / [cpu-cycles]" + "expression": "[instructions] / [cpu-cycles]", + "origin": "perfspect" }, { "name": "metric_giga_instructions_per_sec", - "expression": "[instructions] / 1000000000" + "expression": "[instructions] / 1000000000", + "origin": "perfspect" }, { "name": "metric_L1D MPI (includes data+rfo w/ prefetches)", - "tags": "transaction", "expression": "[L1D.REPLACEMENT] / [instructions]" }, { @@ -58,47 +61,49 @@ }, { "name": "metric_UPI Data transmit BW (MB/sec) (only data)", - "expression": "[UNC_UPI_TxL_FLITS.ALL_DATA] * (64 / 9) / 1000000" + "expression": "([UNC_UPI_TxL_FLITS.ALL_DATA] * (64 / 9.0) / 1000000) / 1" }, { "name": "metric_package power (watts)", - "expression": "[power/energy-pkg/]" + "expression": "[power/energy-pkg/]", + "origin": "perfspect" }, { "name": "metric_DRAM power (watts)", - "expression": "[power/energy-ram/]" + "expression": "[power/energy-ram/]", + "origin": "perfspect" }, { "name": "metric_core c6 residency %", - "expression": "100 * [cstate_core/c6-residency/] / [const_TSC]" + "expression": "100 * [cstate_core/c6-residency/] / [TSC]", + "origin": "perfspect" }, { "name": "metric_package c6 residency %", - "expression": "100 * [cstate_pkg/c6-residency/] * [const_core_count] / [const_TSC]" + "expression": "100 * [cstate_pkg/c6-residency/] * [CORES_PER_SOCKET] / [TSC]", + "origin": "perfspect" }, { "name": "metric_core initiated local dram read bandwidth (MB/sec)", - "expression": "([OCR.READS_TO_CORE.LOCAL_DRAM] + [OCR.HWPF_L3.L3_MISS_LOCAL]) * 64 / 1000000" + "expression": "([OCR.READS_TO_CORE.LOCAL_DRAM] + [OCR.HWPF_L3.L3_MISS_LOCAL]) * 64 / 1000000", + "origin": "perfspect" }, { "name": "metric_core initiated remote dram read bandwidth (MB/sec)", - "expression": "([OCR.READS_TO_CORE.REMOTE_DRAM] + [OCR.HWPF_L3.REMOTE]) * 64 / 1000000" + "expression": "([OCR.READS_TO_CORE.REMOTE_DRAM] + [OCR.HWPF_L3.REMOTE]) * 64 / 1000000", + "origin": "perfspect" }, { "name": "metric_memory bandwidth read (MB/sec)", - "expression": "[UNC_M_CAS_COUNT.RD] * 64 / 1000000" + "expression": "([UNC_M_CAS_COUNT.RD] * 64 / 1000000) / 1" }, { "name": "metric_memory bandwidth write (MB/sec)", - "expression": "[UNC_M_CAS_COUNT.WR] * 64 / 1000000" + "expression": "([UNC_M_CAS_COUNT.WR] * 64 / 1000000) / 1" }, { "name": "metric_memory bandwidth total (MB/sec)", - "expression": "([UNC_M_CAS_COUNT.RD] + [UNC_M_CAS_COUNT.WR]) * 64 / 1000000" - }, - { - "name": "metric_DCPMEM_memory_mode near memory cache read miss rate%", - "expression": "100 * ([UNC_M_TAGCHK.MISS_CLEAN] + [UNC_M_TAGCHK.MISS_DIRTY]) / ([UNC_M_TAGCHK.HIT] + [UNC_M_TAGCHK.MISS_CLEAN] + [UNC_M_TAGCHK.MISS_DIRTY])" + "expression": "(([UNC_M_CAS_COUNT.RD] + [UNC_M_CAS_COUNT.WR]) * 64 / 1000000) / 1" }, { "name": "metric_LLC code read MPI (demand+prefetch)", @@ -110,23 +115,25 @@ }, { "name": "metric_LLC total HITM (per instr) (excludes LLC prefetches)", - "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [instructions]" + "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [instructions]", + "origin": "perfspect" }, { "name": "metric_LLC total HIT clean line forwards (per instr) (excludes LLC prefetches)", - "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [instructions]" + "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [instructions]", + "origin": "perfspect" }, { "name": "metric_Average LLC demand data read miss latency (in ns)", - "expression": "1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD]) / ([UNC_CHA_CLOCKTICKS] / ([const_cha_count] * [const_socket_count]))" + "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" }, { "name": "metric_Average LLC demand data read miss latency for LOCAL requests (in ns)", - "expression": "(1000000000 * [UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL]) / ([UNC_CHA_CLOCKTICKS] / ([const_cha_count] * [const_socket_count]))" + "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" }, { "name": "metric_Average LLC demand data read miss latency for REMOTE requests (in ns)", - "expression": "(1000000000 * [UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE]) / ([UNC_CHA_CLOCKTICKS] / ([const_cha_count] * [const_socket_count]))" + "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" }, { "name": "metric_ITLB (2nd level) MPI", @@ -154,111 +161,111 @@ }, { "name": "metric_uncore frequency GHz", - "expression": "[UNC_CHA_CLOCKTICKS] / ([const_cha_count] * [const_socket_count]) / 1000000000" + "expression": "([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) / 1000000000) / 1" }, { "name": "metric_IO_bandwidth_disk_or_network_writes (MB/sec)", - "expression": "[UNC_CHA_TOR_INSERTS.IO_PCIRDCUR] * 64 / 1000000" + "expression": "([UNC_CHA_TOR_INSERTS.IO_PCIRDCUR] * 64 / 1000000) / 1" }, { "name": "metric_IO_bandwidth_disk_or_network_reads (MB/sec)", - "expression": "([UNC_CHA_TOR_INSERTS.IO_ITOM] + [UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR]) * 64 / 1000000" + "expression": "(([UNC_CHA_TOR_INSERTS.IO_ITOM] + [UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR]) * 64 / 1000000) / 1" }, { "name": "metric_TMA_Frontend_Bound(%)", - "expression": "100 * ([PERF_METRICS.FRONTEND_BOUND] / (([PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND])) - [INT_MISC.UOP_DROPPING] / ([TOPDOWN.SLOTS]))" + "expression": "100 * ( [PERF_METRICS.FRONTEND_BOUND] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) - [INT_MISC.UOP_DROPPING] / ( [TOPDOWN.SLOTS] ) )" }, { "name": "metric_TMA_..Fetch_Latency(%)", - "expression": "100 * (([PERF_METRICS.FETCH_LATENCY] / (([PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND])) - [INT_MISC.UOP_DROPPING] / ([TOPDOWN.SLOTS])))" + "expression": "100 * ( ( [PERF_METRICS.FETCH_LATENCY] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) - [INT_MISC.UOP_DROPPING] / ( [TOPDOWN.SLOTS] ) ) )" }, { "name": "metric_TMA_....ICache_Misses(%)", - "expression": "100 * ([ICACHE_DATA.STALLS] / [cpu-cycles])" + "expression": "100 * ( [ICACHE_DATA.STALLS] / ( [cpu-cycles] ) )" }, { "name": "metric_TMA_....ITLB_Misses(%)", - "expression": "100 * ([ICACHE_TAG.STALLS] / [cpu-cycles])" + "expression": "100 * ( [ICACHE_TAG.STALLS] / ( [cpu-cycles] ) )" }, { "name": "metric_TMA_....Branch_Resteers(%)", - "expression": "100 * ([INT_MISC.CLEAR_RESTEER_CYCLES] / [cpu-cycles] + ([INT_MISC.UNKNOWN_BRANCH_CYCLES] / [cpu-cycles]))" + "expression": "100 * ( [INT_MISC.CLEAR_RESTEER_CYCLES] / ( [cpu-cycles] ) + ( [INT_MISC.UNKNOWN_BRANCH_CYCLES] / ( [cpu-cycles] ) ) )" }, { "name": "metric_TMA_......Mispredicts_Resteers(%)", - "expression": "100 * ((([PERF_METRICS.BRANCH_MISPREDICTS] / (([PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND]))) / (max(0, (1 - (([PERF_METRICS.FRONTEND_BOUND] / (([PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND])) - [INT_MISC.UOP_DROPPING] / [TOPDOWN.SLOTS]) + ([PERF_METRICS.BACKEND_BOUND] / (([PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND]))) + ([PERF_METRICS.RETIRING] / (([PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND])))))))) * [INT_MISC.CLEAR_RESTEER_CYCLES] / [cpu-cycles])" + "expression": "100 * ( ( ( [PERF_METRICS.BRANCH_MISPREDICTS] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) / ( max( 1 - ( ( [PERF_METRICS.FRONTEND_BOUND] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) - [INT_MISC.UOP_DROPPING] / ( [TOPDOWN.SLOTS] ) ) + ( [PERF_METRICS.BACKEND_BOUND] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) + ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) ) , 0 ) ) ) * [INT_MISC.CLEAR_RESTEER_CYCLES] / ( [cpu-cycles] ) )" }, { "name": "metric_TMA_......Clears_Resteers(%)", - "expression": "100 * ((1 - (([PERF_METRICS.BRANCH_MISPREDICTS] / (([PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND]))) / (max(0, (1 - (([PERF_METRICS.FRONTEND_BOUND] / (([PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND])) - [INT_MISC.UOP_DROPPING] / [TOPDOWN.SLOTS]) + ([PERF_METRICS.BACKEND_BOUND] / (([PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND]))) + ([PERF_METRICS.RETIRING] / (([PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND]))))))))) * [INT_MISC.CLEAR_RESTEER_CYCLES] / [cpu-cycles])" + "expression": "100 * ( ( 1 - ( ( [PERF_METRICS.BRANCH_MISPREDICTS] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) / ( max( 1 - ( ( [PERF_METRICS.FRONTEND_BOUND] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) - [INT_MISC.UOP_DROPPING] / ( [TOPDOWN.SLOTS] ) ) + ( [PERF_METRICS.BACKEND_BOUND] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) + ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) ) , 0 ) ) ) ) * [INT_MISC.CLEAR_RESTEER_CYCLES] / ( [cpu-cycles] ) )" }, { "name": "metric_TMA_......Unknown_Branches(%)", - "expression": "100 * ([INT_MISC.UNKNOWN_BRANCH_CYCLES] / [cpu-cycles])" + "expression": "100 * ( [INT_MISC.UNKNOWN_BRANCH_CYCLES] / ( [cpu-cycles] ) )" }, { "name": "metric_TMA_..Fetch_Bandwidth(%)", - "expression": "100 * (max(0, (([PERF_METRICS.FRONTEND_BOUND] / (([PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND])) - [INT_MISC.UOP_DROPPING] / [TOPDOWN.SLOTS]) - (([PERF_METRICS.FETCH_LATENCY] / (([PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND])) - [INT_MISC.UOP_DROPPING] / [TOPDOWN.SLOTS])))))" + "expression": "100 * ( max( 0 , ( [PERF_METRICS.FRONTEND_BOUND] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) - [INT_MISC.UOP_DROPPING] / ( [TOPDOWN.SLOTS] ) ) - ( ( [PERF_METRICS.FETCH_LATENCY] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) - [INT_MISC.UOP_DROPPING] / ( [TOPDOWN.SLOTS] ) ) ) ) )" }, { "name": "metric_TMA_Bad_Speculation(%)", - "expression": "100 * (max((1 - (([PERF_METRICS.FRONTEND_BOUND] / (([PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND])) - [INT_MISC.UOP_DROPPING] / [TOPDOWN.SLOTS]) + ([PERF_METRICS.BACKEND_BOUND] / (([PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND]))) + ([PERF_METRICS.RETIRING] / (([PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND]))))), 0))" + "expression": "100 * ( max( 1 - ( ( [PERF_METRICS.FRONTEND_BOUND] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) - [INT_MISC.UOP_DROPPING] / ( [TOPDOWN.SLOTS] ) ) + ( [PERF_METRICS.BACKEND_BOUND] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) + ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) ) , 0 ) )" }, { "name": "metric_TMA_..Branch_Mispredicts(%)", - "expression": "100 * ([PERF_METRICS.BRANCH_MISPREDICTS] / (([PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND])))" + "expression": "100 * ( [PERF_METRICS.BRANCH_MISPREDICTS] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) )" }, { "name": "metric_TMA_..Machine_Clears(%)", - "expression": "100 * (max(0, ((max(0, (1 - (([PERF_METRICS.FRONTEND_BOUND] / (([PERF_METRICS.BACKEND_BOUND] + [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING])) - [INT_MISC.UOP_DROPPING] / [TOPDOWN.SLOTS]) + ([PERF_METRICS.BACKEND_BOUND] / (([PERF_METRICS.BACKEND_BOUND] + [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING]))) + ([PERF_METRICS.RETIRING] / (([PERF_METRICS.BACKEND_BOUND] + [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING]))))))) - ([PERF_METRICS.BRANCH_MISPREDICTS] / (([PERF_METRICS.BACKEND_BOUND] + [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING]))))))" + "expression": "100 * ( max( 0 , ( max( 1 - ( ( [PERF_METRICS.FRONTEND_BOUND] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) - [INT_MISC.UOP_DROPPING] / ( [TOPDOWN.SLOTS] ) ) + ( [PERF_METRICS.BACKEND_BOUND] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) + ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) ) , 0 ) ) - ( [PERF_METRICS.BRANCH_MISPREDICTS] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) ) )" }, { "name": "metric_TMA_Backend_Bound(%)", - "expression": "100 * ([PERF_METRICS.BACKEND_BOUND] / (([PERF_METRICS.BACKEND_BOUND] + [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING])))" + "expression": "100 * ( [PERF_METRICS.BACKEND_BOUND] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) )" }, { "name": "metric_TMA_..Memory_Bound(%)", - "expression": "100 * ([PERF_METRICS.MEMORY_BOUND] / (([PERF_METRICS.BACKEND_BOUND] + [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING])))" + "expression": "100 * ( [PERF_METRICS.MEMORY_BOUND] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) )" }, { "name": "metric_TMA_....L1_Bound(%)", - "expression": "100 * (max(0, (([EXE_ACTIVITY.BOUND_ON_LOADS] - [MEMORY_ACTIVITY.STALLS_L1D_MISS]) / [cpu-cycles])))" + "expression": "100 * ( max( ( [EXE_ACTIVITY.BOUND_ON_LOADS] - [MEMORY_ACTIVITY.STALLS_L1D_MISS] ) / ( [cpu-cycles] ) , 0 ) )" }, { "name": "metric_TMA_......DTLB_Load(%)", - "expression": "100 * (min((7) * [DTLB_LOAD_MISSES.STLB_HIT:c1] + [DTLB_LOAD_MISSES.WALK_ACTIVE], max([CYCLE_ACTIVITY.CYCLES_MEM_ANY] - [CYCLE_ACTIVITY.CYCLES_L1D_MISS], 0)) / ( [cpu-cycles]))" + "expression": "100 * ( min( ( 7 ) * [DTLB_LOAD_MISSES.STLB_HIT:c1] + [DTLB_LOAD_MISSES.WALK_ACTIVE] , max( [CYCLE_ACTIVITY.CYCLES_MEM_ANY] - [MEMORY_ACTIVITY.CYCLES_L1D_MISS] , 0 ) ) / ( [cpu-cycles] ) )" }, { "name": "metric_TMA_......Split_Loads(%)", - "expression": "100 * (min(1, ((([L1D_PEND_MISS.PENDING] / ([MEM_LOAD_COMPLETED.L1_MISS_ANY])) * [LD_BLOCKS.NO_SR] / [cpu-cycles]))))" + "expression": "100 * ( min( ( ( [L1D_PEND_MISS.PENDING] / [MEM_LOAD_COMPLETED.L1_MISS_ANY] ) * [LD_BLOCKS.NO_SR] / ( [cpu-cycles] ) ) , ( 1 ) ) )" }, { "name": "metric_TMA_....L2_Bound(%)", - "expression": "100 * (([MEMORY_ACTIVITY.STALLS_L1D_MISS] - [MEMORY_ACTIVITY.STALLS_L2_MISS]) / [cpu-cycles])" + "expression": "100 * ( ( [MEMORY_ACTIVITY.STALLS_L1D_MISS] - [MEMORY_ACTIVITY.STALLS_L2_MISS] ) / ( [cpu-cycles] ) )" }, { "name": "metric_TMA_....L3_Bound(%)", - "expression": "100 * (([MEMORY_ACTIVITY.STALLS_L2_MISS] - [MEMORY_ACTIVITY.STALLS_L3_MISS]) / [cpu-cycles])" + "expression": "100 * ( ( [MEMORY_ACTIVITY.STALLS_L2_MISS] - [MEMORY_ACTIVITY.STALLS_L3_MISS] ) / ( [cpu-cycles] ) )" }, { "name": "metric_TMA_......MEM_Bandwidth(%)", - "expression": "100 * ((min(([cpu-cycles] - 0), ([OFFCORE_REQUESTS_OUTSTANDING.DATA_RD:c4] - 0))) / [cpu-cycles])" + "expression": "100 * ( ( min( [cpu-cycles] , [OFFCORE_REQUESTS_OUTSTANDING.DATA_RD:c4] ) ) / ( [cpu-cycles] ) )" }, { "name": "metric_TMA_......MEM_Latency(%)", - "expression": "100 * ( ( min( [cpu-cycles], [OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD] ) ) / ( [cpu-cycles] ) - ( ( min([cpu-cycles], [OFFCORE_REQUESTS_OUTSTANDING.DATA_RD:c4] ) ) / ( [cpu-cycles] ) ) )" + "expression": "100 * ( ( min( [cpu-cycles] , [OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD] ) ) / ( [cpu-cycles] ) - ( ( min( [cpu-cycles] , [OFFCORE_REQUESTS_OUTSTANDING.DATA_RD:c4] ) ) / ( [cpu-cycles] ) ) )" }, { "name": "metric_TMA_....Store_Bound(%)", - "expression": "100 * ([EXE_ACTIVITY.BOUND_ON_STORES] / [cpu-cycles])" + "expression": "100 * ( [EXE_ACTIVITY.BOUND_ON_STORES] / ( [cpu-cycles] ) )" }, { "name": "metric_TMA_..Core_Bound(%)", - "expression": "100 * (max(0, (([PERF_METRICS.BACKEND_BOUND] / (([PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND]))) - ([PERF_METRICS.MEMORY_BOUND] / (([PERF_METRICS.BACKEND_BOUND] + [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING]))))))" + "expression": "100 * ( max( 0 , ( [PERF_METRICS.BACKEND_BOUND] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) - ( [PERF_METRICS.MEMORY_BOUND] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) ) )" }, { "name": "metric_TMA_....Divider(%)", - "expression": "100 * ([ARITH.DIV_ACTIVE] / [cpu-cycles])" + "expression": "100 * ( [ARITH.DIV_ACTIVE] / ( [cpu-cycles] ) )" }, { "name": "metric_TMA_....Ports_Utilization(%)", @@ -270,42 +277,45 @@ }, { "name": "metric_TMA_......Ports_Utilized_1(%)", - "expression": "100 * ([EXE_ACTIVITY.1_PORTS_UTIL] / [cpu-cycles])" + "expression": "100 * ( [EXE_ACTIVITY.1_PORTS_UTIL] / ( [cpu-cycles] ) )" }, { "name": "metric_TMA_......Ports_Utilized_2(%)", - "expression": "100 * ([EXE_ACTIVITY.2_PORTS_UTIL] / [cpu-cycles])" + "expression": "100 * ( [EXE_ACTIVITY.2_PORTS_UTIL] / ( [cpu-cycles] ) )" }, { "name": "metric_TMA_......Ports_Utilized_3m(%)", - "expression": "100 * [UOPS_EXECUTED.CYCLES_GE_3] / [cpu-cycles]" + "expression": "100 * ( [UOPS_EXECUTED.CYCLES_GE_3] / ( [cpu-cycles] ) )" }, { "name": "metric_TMA_Retiring(%)", - "expression": "100 * ([PERF_METRICS.RETIRING] / ([PERF_METRICS.RETIRING] + [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.BACKEND_BOUND]))" + "expression": "100 * ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) )" }, { "name": "metric_TMA_..Light_Operations(%)", - "expression": "100 * (max(0, (([PERF_METRICS.RETIRING] / (([PERF_METRICS.RETIRING] + [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.BACKEND_BOUND]))) - ([PERF_METRICS.HEAVY_OPERATIONS] / (([PERF_METRICS.RETIRING] + [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.BACKEND_BOUND]))))))" + "expression": "100 * ( max( 0 , ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) - ( [PERF_METRICS.HEAVY_OPERATIONS] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) ) )" }, { "name": "metric_TMA_..Heavy_Operations(%)", - "expression": "100 * ([PERF_METRICS.HEAVY_OPERATIONS] / (([PERF_METRICS.RETIRING] + [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.BACKEND_BOUND])))" + "expression": "100 * ( [PERF_METRICS.HEAVY_OPERATIONS] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) )" }, { "name": "metric_TMA_....Microcode_Sequencer(%)", - "expression": "100 * ([UOPS_RETIRED.MS] / [TOPDOWN.SLOTS])" + "expression": "100 * ( [UOPS_RETIRED.MS] / ( [TOPDOWN.SLOTS] ) )" }, { "name": "metric_TMA_Info_Thread_IPC", - "expression": "[instructions] / [cpu-cycles]" + "expression": "[instructions] / [cpu-cycles]", + "origin": "perfspect" }, { "name": "metric_TMA_Info_Core_ILP", - "expression": "[instructions] / [CPU_CLK_UNHALTED.DISTRIBUTED]" + "expression": "[instructions] / [CPU_CLK_UNHALTED.DISTRIBUTED]", + "origin": "perfspect" }, { "name": "metric_TMA_Info_System_SMT_2T_Utilization", - "expression": "(1 - [CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE] / [CPU_CLK_UNHALTED.REF_DISTRIBUTED]) if [const_socket_count] > 1 else 0" + "expression": "(1 - [CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE] / [CPU_CLK_UNHALTED.REF_DISTRIBUTED]) if [SOCKET_COUNT] > 1 else 0", + "origin": "perfspect" } ] \ No newline at end of file diff --git a/events/skx.txt b/events/skx.txt deleted file mode 100644 index 4235a4e..0000000 --- a/events/skx.txt +++ /dev/null @@ -1,184 +0,0 @@ -########################################################################################################### -# Copyright (C) 2021-2023 Intel Corporation -# SPDX-License-Identifier: BSD-3-Clause -########################################################################################################### - -# Skylake event list (default, with extensive TMA collection) - -cpu/event=0x51,umask=0x01,period=2000003,name='L1D.REPLACEMENT'/, -cpu/event=0x24,umask=0xe4,period=200003,name='L2_RQSTS.ALL_CODE_RD'/, -cpu/event=0xd1,umask=0x01,period=2000003,name='MEM_LOAD_RETIRED.L1_HIT'/, -cpu/event=0xd1,umask=0x02,period=100003,name='MEM_LOAD_RETIRED.L2_HIT'/, -cpu-cycles, -ref-cycles, -instructions; - -cpu/event=0xf1,umask=0x1f,period=100003,name='L2_LINES_IN.ALL'/, -cpu/event=0xd1,umask=0x10,period=50021,name='MEM_LOAD_RETIRED.L2_MISS'/, -cpu/event=0x24,umask=0x24,period=200003,name='L2_RQSTS.CODE_RD_MISS'/, -cpu-cycles, -ref-cycles, -instructions; - -cpu/event=0x08,umask=0x0e,period=100003,name='DTLB_LOAD_MISSES.WALK_COMPLETED'/, -cpu/event=0x08,umask=0x04,period=2000003,name='DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M'/, -cpu/event=0x08,umask=0x02,period=2000003,name='DTLB_LOAD_MISSES.WALK_COMPLETED_4K'/, -cpu/event=0x08,umask=0x08,period=2000003,name='DTLB_LOAD_MISSES.WALK_COMPLETED_1G'/, -cpu-cycles:k, -ref-cycles:k, -instructions:k; - -#C6 -cstate_core/c6-residency/; -cstate_pkg/c6-residency/; - -cpu/event=0xa8,umask=0x01,period=2000003,name='LSD.UOPS'/, -cpu-cycles, -ref-cycles, -instructions; - -#avx related power levels -cpu/event=0x28,umask=0x07,period=200003,name='CORE_POWER.LVL0_TURBO_LICENSE'/, -cpu/event=0x28,umask=0x18,period=200003,name='CORE_POWER.LVL1_TURBO_LICENSE'/, -cpu/event=0x28,umask=0x20,period=200003,name='CORE_POWER.LVL2_TURBO_LICENSE'/, -cpu/event=0x0e,umask=0x01,period=2000003,name='UOPS_ISSUED.ANY'/; - -cpu/event=0x3c,umask=0x0,period=2000003,name='CPU_CLK_UNHALTED.THREAD_ANY'/, -cpu/event=0x9c,umask=0x01,period=2000003,name='IDQ_UOPS_NOT_DELIVERED.CORE'/, -cpu/event=0xc2,umask=0x02,period=2000003,name='UOPS_RETIRED.RETIRE_SLOTS'/, -#INT_MISC.RECOVERY_CYCLES_ANY -cpu/event=0x0d,umask=0x01,period=2000003,name='INT_MISC.RECOVERY_CYCLES_ANY'/; - -cpu/event=0x79,umask=0x30,period=2000003,name='IDQ.MS_UOPS'/, -cpu/event=0x60,umask=0x10,period=2000003,name='OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD'/, -cpu/event=0x83,umask=0x04,period=200003,name='ICACHE_64B.IFTAG_STALL'/, -cpu/event=0x9c,umask=0x01,cmask=0x4,period=2000003,name='IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE'/, -cpu-cycles, -ref-cycles; - -cpu/event=0x0d,umask=0x80,period=2000003,name='INT_MISC.CLEAR_RESTEER_CYCLES'/, -cpu/event=0xe6,umask=0x01,period=100003,name='BACLEARS.ANY'/, -cpu/event=0xc3,umask=0x01,edge,period=100003,name='MACHINE_CLEARS.COUNT'/, -cpu/event=0xc5,umask=0x00,period=400009,name='BR_MISP_RETIRED.ALL_BRANCHES'/, -cpu-cycles; - -cpu/event=0xa3,umask=0x14,cmask=0x14,period=2000003,name='CYCLE_ACTIVITY.STALLS_MEM_ANY'/, -cpu/event=0xa3,umask=0x0c,cmask=0x0c,period=2000003,name='CYCLE_ACTIVITY.STALLS_L1D_MISS'/, -cpu/event=0x08,umask=0x20,period=2000003,name='DTLB_LOAD_MISSES.STLB_HIT'/, -cpu/event=0x08,umask=0x10,cmask=0x01,period=100003,name='DTLB_LOAD_MISSES.WALK_ACTIVE'/, -cpu-cycles; - -cpu/event=0xa6,umask=0x01,period=2000003,name='EXE_ACTIVITY.EXE_BOUND_0_PORTS'/, -cpu/event=0xa6,umask=0x40,period=2000003,name='EXE_ACTIVITY.BOUND_ON_STORES'/, -cpu/event=0xa6,umask=0x02,period=2000003,name='EXE_ACTIVITY.1_PORTS_UTIL'/, -cpu/event=0xa6,umask=0x04,period=2000003,name='EXE_ACTIVITY.2_PORTS_UTIL'/, -cpu-cycles, -instructions; - -cpu/event=0x03,umask=0x02,period=100003,name='LD_BLOCKS.STORE_FORWARD'/, -cpu/event=0xb2,umask=0x01,period=2000003,name='OFFCORE_REQUESTS_BUFFER.SQ_FULL'/, -cpu/event=0xa3,umask=0x05,cmask=0x05,period=2000003,name='CYCLE_ACTIVITY.STALLS_L2_MISS'/, -cpu/event=0xa3,umask=0x06,cmask=0x06,period=2000003,name='CYCLE_ACTIVITY.STALLS_L3_MISS'/, -cpu-cycles; - -cpu/event=0x60,umask=0x01,cmask=0x06,period=2000003,name='OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6'/, -cpu/event=0x60,umask=0x10,cmask=0x06,period=2000003,name='OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6'/, -cpu/event=0x60,umask=0x10,cmask=0x01,period=2000003,name='OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD'/, -cpu/event=0x49,umask=0x0e,period=100003,name='DTLB_STORE_MISSES.WALK_COMPLETED'/, -cpu-cycles; - -cpu/event=0x60,umask=0x01,period=2000003,name='OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD'/, -cpu/event=0x60,umask=0x01,cmask=0x01,period=2000003,name='OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD'/, -cpu-cycles; - -cpu/event=0x80,umask=0x4,name='ICACHE_16B.IFDATA_STALL'/, -cpu/event=0x80,umask=0x4,cmask=0x1,edge=0x1,name='ICACHE_16B_c1_e1_IFDATA_STALL'/, -cpu/event=0x85,umask=0x0e,period=100003,name='ITLB_MISSES.WALK_COMPLETED'/, -cpu/event=0x85,umask=0x10,period=100003,name='ITLB_MISSES.WALK_ACTIVE'/, -instructions, -cpu-cycles; - -cpu/event=0x49,umask=0x20,period=100003,name='DTLB_STORE_MISSES.STLB_HIT'/, -cpu/event=0x49,umask=0x10,period=100003,name='DTLB_STORE_MISSES.WALK_ACTIVE'/, -cpu/event=0x14,umask=0x01,period=2000003,name='ARITH.DIVIDER_ACTIVE'/, -cpu/event=0xb1,umask=0x02,inv=0x1,cmask=0x1,period=2000003,name='UOPS_EXECUTED.CORE_CYCLES_NONE'/, -cpu-cycles; - - -cpu/event=0xb1,umask=0x02,cmask=0x1,period=2000003,name='UOPS_EXECUTED.CORE_CYCLES_GE_1'/, -cpu/event=0xb1,umask=0x02,cmask=0x2,period=2000003,name='UOPS_EXECUTED.CORE_CYCLES_GE_2'/, -cpu/event=0xb1,umask=0x02,cmask=0x3,period=2000003,name='UOPS_EXECUTED.CORE_CYCLES_GE_3'/, -cpu/event=0xb1,umask=0x02,cmask=0x4,period=2000003,name='UOPS_EXECUTED.CORE_CYCLES_GE_4'/, -cpu-cycles; - -cpu/event=0x3c,umask=0x2,name='CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE'/, -cpu/event=0x3c,umask=0x1,name='CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY'/; - -#offcore response -cpu/event=0xb7,umask=0x01,offcore_rsp=0x103FC007F7,name='OCR.ALL_READS.L3_MISS.REMOTE_HITM'/, -cpu/event=0xb7,umask=0x01,offcore_rsp=0x083FC007F7,name='OCR.ALL_READS.L3_MISS.REMOTE_HIT_FORWARD'/; - -#OCR -cpu/event=0xb7,umask=0x01,offcore_rsp=0x3F840007F7,name='OCR.ALL_READS.L3_MISS_LOCAL_DRAM.ANY_SNOOP'/, -cpu/event=0xb7,umask=0x01,offcore_rsp=0x3FB80007F7,name='OCR.ALL_READS.L3_MISS_LOCAL_DRAM.ANY_SNOOP_ocr_msr_3fB80007f7'/; - -#memory read/writes -imc/event=0x04,umask=0x03,name='UNC_M_CAS_COUNT.RD'/, -imc/event=0x04,umask=0x0c,name='UNC_M_CAS_COUNT.WR'/; - -#UNC_M_RPQ_INSERTS/OCCUPANCY -imc/event=0x10,umask=0x0,name='UNC_M_RPQ_INSERTS'/, -imc/event=0x80,umask=0x0,name='UNC_M_RPQ_OCCUPANCY'/, -imc/event=0,umask=0,name='UNC_M_CLOCKTICKS'/; - -#demand reads local and remote collected separately -cha/event=0x35,umask=0x21,config1=0x4043200000000,name='UNC_CHA_TOR_INSERTS.IA_MISS.0x40432'/, -cha/event=0x36,umask=0x21,config1=0x4043200000000,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS.0x40432'/; -cha/event=0x35,umask=0x21,config1=0x4043100000000,name='UNC_CHA_TOR_INSERTS.IA_MISS.0x40431'/, -cha/event=0x36,umask=0x21,config1=0x4043100000000,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS.0x40431'/; - -#UNC_CHA_TOR_INSERTS.IA_MISS_CRD,UNC_CHA_TOR_OCCUPANCY.IA_MISS_CRD -cha/event=0x35,umask=0x21,config1=0x12CC023300000000,name='UNC_CHA_TOR_INSERTS.IA_MISS.0x12CC0233'/, -cha/event=0x36,umask=0x21,config1=0x12CC023300000000,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS.0x12CC0233'/; - -#UNC_CHA_TOR_INSERTS.IA_MISS_RFO,UNC_CHA_TOR_OCCUPANCY.IA_MISS_RFO -cha/event=0x35,umask=0x21,config1=0x12C4003300000000,name='UNC_CHA_TOR_INSERTS.IA_MISS.0x12C40033'/, -cha/event=0x36,umask=0x21,config1=0x12C4003300000000,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS.0x12C40033'/; - -#UNC_CHA_TOR_INSERTS.IA_HIT_DRD,UNC_CHA_TOR_OCCUPANCY.IA_HIT_DRD -cha/event=0x35,umask=0x11,config1=0x4043300000000,name='UNC_CHA_TOR_INSERTS.IA_HIT.0x40433'/, -cha/event=0x36,umask=0x11,config1=0x4043300000000,name='UNC_CHA_TOR_OCCUPANCY.IA_HIT.0x40433'/; - -#UNC_CHA_TOR_INSERTS.IA_MISS_DEMAND_RD,UNC_CHA_TOR_OCCUPANCY.IA_MISS_DEMAND_RD (demand data only - both local and remote) -cha/event=0x35,umask=0x21,config1=0x4043300000000,name='UNC_CHA_TOR_INSERTS.IA_MISS.0x40433'/, -cha/event=0x36,umask=0x21,config1=0x4043300000000,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS.0x40433'/, -cha/event=0x0,umask=0x0,name='UNC_CHA_CLOCKTICKS'/; - -#UNC_CHA_TOR_INSERTS.IA_MISS_DRD,UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD -cha/event=0x35,umask=0x21,config1=0x12D4043300000000,name='UNC_CHA_TOR_INSERTS.IA_MISS.0x12D40433'/, -cha/event=0x36,umask=0x21,config1=0x12D4043300000000,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS.0x12D40433'/; -#cha/event=0xa5,umask=0x02,name='UNC_CHA_FAST_ASSERTED.HORZ'/; - -#IO bandwidth -iio/event=0x83,umask=0x04,ch_mask=0x00,fc_mask=0x07,name='UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0'/, -iio/event=0x83,umask=0x04,ch_mask=0x02,fc_mask=0x07,name='UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1'/; -iio/event=0x83,umask=0x04,ch_mask=0x04,fc_mask=0x07,name='UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2'/, -iio/event=0x83,umask=0x04,ch_mask=0x08,fc_mask=0x07,name='UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3'/; - -iio/event=0x83,umask=0x01,ch_mask=0x00,fc_mask=0x07,name='UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0'/, -iio/event=0x83,umask=0x01,ch_mask=0x02,fc_mask=0x07,name='UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1'/; -iio/event=0x83,umask=0x01,ch_mask=0x04,fc_mask=0x07,name='UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2'/, -iio/event=0x83,umask=0x01,ch_mask=0x08,fc_mask=0x07,name='UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3'/; - -#UPI related -upi/event=0x2,umask=0x0f,name='UNC_UPI_TxL_FLITS.ALL_DATA'/, -upi/event=0x2,umask=0x97,name='UNC_UPI_TxL_FLITS.NON_DATA'/, -upi/event=0x1,umask=0x0,name='UNC_UPI_CLOCKTICKS'/; - -upi/event=0x21,umask=0x0,name='UNC_UPI_L1_POWER_CYCLES'/; - -cstate_pkg/c6-residency/; - -#power related -power/energy-pkg/, -power/energy-ram/; diff --git a/events/spr.txt b/events/spr.txt index 4c18970..b79903e 100644 --- a/events/spr.txt +++ b/events/spr.txt @@ -91,6 +91,7 @@ cpu-cycles; cpu/event=0xc2,umask=0x04,period=2000003,name='UOPS_RETIRED.MS'/, cpu/event=0xec,umask=0x02,period=2000003,name='CPU_CLK_UNHALTED.DISTRIBUTED'/, +cpu/event=0x47,umask=0x02,cmask=0x02,period=1000003,name='MEMORY_ACTIVITY.CYCLES_L1D_MISS'/, cpu-cycles; cpu/event=0x3c,umask=0x02,period=25003,name='CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE'/, @@ -139,11 +140,6 @@ cha/event=0x35,umask=0xCC43FF04,name='UNC_CHA_TOR_INSERTS.IO_ITOM'/, cha/event=0x35,umask=0xCD43FF04,name='UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR'/, cha/event=0x01,umask=0x00,name='UNC_CHA_CLOCKTICKS'/; -#PMEM -imc/event=0xd3,umask=0x01,name='UNC_M_TAGCHK.HIT'/, -imc/event=0xd3,umask=0x02,name='UNC_M_TAGCHK.MISS_CLEAN'/, -imc/event=0xd3,umask=0x04,name='UNC_M_TAGCHK.MISS_DIRTY'/; - #memory read/writes imc/event=0x05,umask=0xcf,name='UNC_M_CAS_COUNT.RD'/, imc/event=0x05,umask=0xf0,name='UNC_M_CAS_COUNT.WR'/; \ No newline at end of file diff --git a/perf-collect.py b/perf-collect.py index 5ce4e36..36f0f2a 100644 --- a/perf-collect.py +++ b/perf-collect.py @@ -53,13 +53,14 @@ def write_metadata( data = original.read() with open(outcsv, "w") as modified: modified.write("### META DATA ###,\n") - modified.write("TSC Frequency(MHz)," + tsc_freq + ",\n") - modified.write("CPU count," + str(perf_helpers.get_cpu_count()) + ",\n") - modified.write("SOCKET count," + str(perf_helpers.get_socket_count()) + ",\n") - modified.write("HT count," + str(perf_helpers.get_ht_count()) + ",\n") - imc, cha, upi = perf_helpers.get_imc_cacheagent_count() + modified.write("SYSTEM_TSC_FREQ (MHz)," + tsc_freq + ",\n") + modified.write("CORES_PER_SOCKET," + str(perf_helpers.get_cpu_count()) + ",\n") + modified.write("SOCKET_COUNT," + str(perf_helpers.get_socket_count()) + ",\n") + modified.write("HYPERTHREADING_ON," + str(perf_helpers.get_ht_status()) + ",\n") + imc, upi = perf_helpers.get_imc_upi_count() + cha = perf_helpers.get_cha_count() modified.write("IMC count," + str(imc) + ",\n") - modified.write("CHA count," + str(cha) + ",\n") + modified.write("CHAS_PER_SOCKET," + str(cha) + ",\n") modified.write("UPI count," + str(upi) + ",\n") modified.write("Architecture," + str(arch) + ",\n") modified.write("Model," + str(cpuname) + ",\n") @@ -88,11 +89,9 @@ def write_metadata( "/sys/fs/cgroup/" + cgroup + "/cpuset.cpus", # cgroup v2 ] cg_path_found = False - for _ in cgroup_paths: + for path in cgroup_paths: try: - cpu_set_file = open( - "/sys/fs/cgroup/cpuset/" + cgroup + "/cpuset.cpus", "r" - ) + cpu_set_file = open(path, "r") cg_path_found = True # no need to check other paths break @@ -104,6 +103,7 @@ def write_metadata( cpu_set = cpu_set_file.read() cpu_set_file.close() cpu_set = cpu_set.strip() + cpu_set = cpu_set.replace(",", "+") if not cg_path_found or cpu_set == "": # A missing path or an empty cpu-set in v2 indicates that the container is running on all CPUs @@ -216,10 +216,7 @@ def validate_file(fname): help="perf stat output in csv format, default=perfstat.csv", ) parser.add_argument( - "-v", - "--verbose", - help="Display debugging information", - action="store_true", + "-v", "--verbose", help="Display debugging information", action="store_true" ) args = parser.parse_args() @@ -257,10 +254,8 @@ def validate_file(fname): eventfile = None if arch == "broadwell": eventfile = "bdx.txt" - elif arch == "skylake": - eventfile = "skx.txt" - elif arch == "cascadelake": - eventfile = "clx.txt" + elif arch == "skylake" or arch == "cascadelake": + eventfile = "clx_skx.txt" elif arch == "icelake": eventfile = "icx.txt" elif arch == "sapphirerapids": @@ -297,7 +292,8 @@ def validate_file(fname): # get perf events to collect collection_events = [] - imc, cha, upi = perf_helpers.get_imc_cacheagent_count() + imc, upi = perf_helpers.get_imc_upi_count() + cha = perf_helpers.get_cha_count() have_uncore = True if imc == 0 and cha == 0 and upi == 0: logging.info("disabling uncore (possibly in a vm?)") diff --git a/perf-collect.spec b/perf-collect.spec index 0326c16..4de6f7a 100644 --- a/perf-collect.spec +++ b/perf-collect.spec @@ -7,7 +7,7 @@ block_cipher = None a = Analysis( ['perf-collect.py'], pathex=[], - datas=[('./src/libtsc.so', '.'), ('./events/bdx.txt', '.'), ('./events/skx.txt', '.'), ('./events/clx.txt', '.'), ('./events/icx.txt', '.'), ('./events/spr.txt', '.')], + datas=[('./src/libtsc.so', '.'), ('./events/bdx.txt', '.'), ('./events/clx_skx.txt', '.'), ('./events/icx.txt', '.'), ('./events/spr.txt', '.')], hiddenimports=[], hookspath=[], hooksconfig={}, diff --git a/perf-postprocess.py b/perf-postprocess.py index c8c4f39..2fc805b 100644 --- a/perf-postprocess.py +++ b/perf-postprocess.py @@ -86,16 +86,10 @@ def get_args(script_path): action="store_true", ) parser.add_argument( - "--rawevents", - help="save raw events in .csv format", - action="store_true", + "--rawevents", help="save raw events in .csv format", action="store_true" ) parser.add_argument( - "-html", - "--html", - type=str, - default=None, - help="Static HTML report", + "-html", "--html", type=str, default=None, help="Static HTML report" ) args = parser.parse_args() @@ -128,6 +122,43 @@ def get_args(script_path): return args +# fix c6-residency data lines +# for system: multiply value by number of HyperThreads +# for socket or thread: add rows for each 2nd hyper thread with same values as 1st thread +def get_fixed_c6_residency_fields(perf_data_lines, perf_mode): + # handle special case events: c6-residency + new_perf_data_lines = [] + if meta_data["constants"]["CONST_THREAD_COUNT"] == 2: + for fields in perf_data_lines: + if perf_mode == Mode.System and fields[3] == "cstate_core/c6-residency/": + # since "cstate_core/c6-residency/" is collected for only one thread + # we double the value for the system wide collection (assign same value to the 2nd thread) + try: + fields[1] = int(fields[1]) * 2 # fields[1] -> event value + except ValueError: + # value can be or + logging.warning( + "Failed to convert cstate_core/c6-residency/ metric value: " + + str(fields[1]) + + " to integer. Skipping" + ) + pass + new_perf_data_lines.append(fields) + elif fields[4] == "cstate_core/c6-residency/": + new_fields = fields.copy() + cpuID = int(fields[1].replace("CPU", "")) + HT_cpuID = cpuID + int( + meta_data["constants"]["CONST_THREAD_COUNT"] + * meta_data["constants"]["CORES_PER_SOCKET"] + ) + new_fields[1] = "CPU" + str(HT_cpuID) + new_perf_data_lines.append(fields) + new_perf_data_lines.append(new_fields) + else: + new_perf_data_lines.append(fields) + return new_perf_data_lines + + # get metadata lines and perf events' lines in three separate lists def get_all_data_lines(input_file_path): with open(input_file_path, "r") as infile: @@ -192,25 +223,23 @@ def get_metadata_as_dict(meta_data_lines): meta_data = {} meta_data["constants"] = {} for line in meta_data_lines: - if line.startswith("TSC"): - meta_data["constants"]["CONST_TSC_FREQ"] = ( + if line.startswith("SYSTEM_TSC_FREQ"): + meta_data["constants"]["SYSTEM_TSC_FREQ"] = ( float(line.split(",")[1]) * 1000000 ) - elif line.startswith("CPU"): - meta_data["constants"]["CONST_CORE_COUNT"] = float(line.split(",")[1]) - elif line.startswith("HT"): - meta_data["constants"]["CONST_HT_COUNT"] = float(line.split(",")[1]) - meta_data["constants"]["CONST_THREAD_COUNT"] = float( - line.split(",")[1] - ) # we use both constants interchangeably - elif line.startswith("SOCKET"): - meta_data["constants"]["CONST_SOCKET_COUNT"] = float(line.split(",")[1]) - elif line.startswith("IMC"): - meta_data["constants"]["CONST_IMC_COUNT"] = float(line.split(",")[1]) - elif line.startswith("CHA") or line.startswith("CBOX"): - meta_data["constants"]["CONST_CHA_COUNT"] = float(line.split(",")[1]) - elif line.startswith("Sampling"): - meta_data["constants"]["CONST_INTERVAL"] = float(line.split(",")[1]) + elif line.startswith("CORES_PER_SOCKET"): + meta_data["constants"]["CORES_PER_SOCKET"] = int(line.split(",")[1]) + elif line.startswith("HYPERTHREADING_ON"): + meta_data["constants"]["HYPERTHREADING_ON"] = int( + line.split(",")[1] == "True" + ) + meta_data["constants"]["CONST_THREAD_COUNT"] = ( + int(line.split(",")[1] == "True") + 1 + ) + elif line.startswith("SOCKET_COUNT"): + meta_data["constants"]["SOCKET_COUNT"] = int(line.split(",")[1]) + elif line.startswith("CHAS_PER_SOCKET") or line.startswith("CBOX"): + meta_data["constants"]["CHAS_PER_SOCKET"] = int(line.split(",")[1]) elif line.startswith("Architecture"): meta_data["constants"]["CONST_ARCH"] = str(line.split(",")[1]) @@ -239,15 +268,19 @@ def get_metadata_as_dict(meta_data_lines): docker_SETS = [] docker_SETS = line.split(",") docker_SETS = docker_SETS[:-1] - # here lognth of docker_HASH should be exactly len(docker_SETS) + # here length of docker_HASH should be exactly len(docker_SETS) assert len(docker_HASH) == len(docker_SETS) meta_data["CPUSETS"] = {} - for i in range(1, len(docker_SETS)): - docker_SET = str(docker_SETS[i]) - docker_SET = ( - int(docker_SET.split("-")[1]) - int(docker_SET.split("-")[0]) + 1 - ) - meta_data["CPUSETS"][docker_HASH[i]] = docker_SET + for i, docker_SET in enumerate(docker_SETS): + if "-" in docker_SET: # range of cpus + num_of_cpus = ( + int(docker_SET.split("-")[1]) + - int(docker_SET.split("-")[0]) + + 1 + ) + else: # either one cpu, or a list of cpus separated by + sign + num_of_cpus = len(docker_SET.split("+")) + meta_data["CPUSETS"][docker_HASH[i]] = num_of_cpus elif line.startswith("Percore mode"): meta_data["PERCORE_MODE"] = ( @@ -273,24 +306,25 @@ def get_metadata_as_dict(meta_data_lines): def set_CONST_TSC(meta_data, perf_mode, num_cpus=0): if perf_mode == Mode.System: - meta_data["constants"]["CONST_TSC"] = ( - meta_data["constants"]["CONST_TSC_FREQ"] - * meta_data["constants"]["CONST_CORE_COUNT"] - * meta_data["constants"]["CONST_HT_COUNT"] - * meta_data["constants"]["CONST_SOCKET_COUNT"] - ) + if meta_data["CGROUPS"] == "enabled" and num_cpus > 0: + meta_data["constants"]["TSC"] = ( + meta_data["constants"]["SYSTEM_TSC_FREQ"] * num_cpus + ) + else: + meta_data["constants"]["TSC"] = ( + meta_data["constants"]["SYSTEM_TSC_FREQ"] + * meta_data["constants"]["CORES_PER_SOCKET"] + * meta_data["constants"]["CONST_THREAD_COUNT"] + * meta_data["constants"]["SOCKET_COUNT"] + ) elif perf_mode == Mode.Socket: - meta_data["constants"]["CONST_TSC"] = ( - meta_data["constants"]["CONST_TSC_FREQ"] - * meta_data["constants"]["CONST_CORE_COUNT"] - * meta_data["constants"]["CONST_HT_COUNT"] + meta_data["constants"]["TSC"] = ( + meta_data["constants"]["SYSTEM_TSC_FREQ"] + * meta_data["constants"]["CORES_PER_SOCKET"] + * meta_data["constants"]["CONST_THREAD_COUNT"] ) elif perf_mode == Mode.Core: # Core should be changed to thread - meta_data["constants"]["CONST_TSC"] = meta_data["constants"]["CONST_TSC_FREQ"] - elif meta_data["CGROUPS"] == "enabled": - meta_data["constants"]["CONST_TSC"] = ( - meta_data["constants"]["CONST_TSC_FREQ"] * num_cpus - ) + meta_data["constants"]["TSC"] = meta_data["constants"]["SYSTEM_TSC_FREQ"] return @@ -386,15 +420,7 @@ def extract_dataframe(perf_data_lines, meta_data, perf_mode): if "CGROUPS" in meta_data and meta_data["CGROUPS"] == "enabled": # 1.001044566,6261968509,,L1D.REPLACEMENT,/system.slice/docker-826c1c9de0bde13b0c3de7c4d96b38710cfb67c2911f30622508905ece7e0a16.scope,6789274819,5.39,, assert len(perf_data_df.columns) >= 7 - columns = [ - "ts", - "value", - "col0", - "metric", - "cgroup", - "col1", - "percentage", - ] + columns = ["ts", "value", "col0", "metric", "cgroup", "col1", "percentage"] # add dummy col names for remaining columns for col in range(7, len(perf_data_df.columns)): columns.append("col" + str(col)) @@ -402,29 +428,14 @@ def extract_dataframe(perf_data_lines, meta_data, perf_mode): elif perf_mode == Mode.System: # Ubuntu 16.04 returns 6 columns, later Ubuntu's and other OS's return 8 columns assert len(perf_data_df.columns) >= 6 - columns = [ - "ts", - "value", - "col0", - "metric", - "value2", - "percentage", - ] + columns = ["ts", "value", "col0", "metric", "value2", "percentage"] # add dummy col names for remaining columns for col in range(6, len(perf_data_df.columns)): columns.append("col" + str(col)) perf_data_df.columns = columns elif perf_mode == Mode.Core or perf_mode == Mode.Socket: assert len(perf_data_df.columns) >= 7 - columns = [ - "ts", - "cpu", - "value", - "col0", - "metric", - "value2", - "percentage", - ] + columns = ["ts", "cpu", "value", "col0", "metric", "value2", "percentage"] # add dummy col names for remaining columns for col in range(7, len(perf_data_df.columns)): columns.append("col" + str(col)) @@ -444,12 +455,9 @@ def extract_dataframe(perf_data_lines, meta_data, perf_mode): ) # set data frame types - # perf_data_df = perf_data_df.astype({'value': 'float'}) perf_data_df["value"] = pd.to_numeric( perf_data_df["value"], errors="coerce" ).fillna(0) - # perf_data_df = perf_data_df.astype({'value2': 'float'}) - # perf_data_df = perf_data_df.astype({"percentage":"float"}) return perf_data_df @@ -521,7 +529,9 @@ def generate_metrics_time_series(time_series_df, perf_mode, out_file_path): return -def generate_metrics_averages(time_series_df, perf_mode, out_file_path): +def generate_metrics_averages( + time_series_df: pd.DataFrame, perf_mode: Mode, out_file_path: str +) -> None: average_metric_file_name = "" if perf_mode == Mode.System: average_metric_file_name = get_extra_out_file(out_file_path, "a") @@ -531,6 +541,9 @@ def generate_metrics_averages(time_series_df, perf_mode, out_file_path): average_metric_file_name = get_extra_out_file(out_file_path, "ca") time_series_df.index.name = "metrics" + # throw out 1st and last datapoints since they tend to be significantly off norm + if len(time_series_df) > 2: + time_series_df = time_series_df.iloc[:, 1:-1] avgcol = time_series_df.mean(numeric_only=True, axis=1).to_frame().reset_index() p95col = time_series_df.quantile(q=0.95, axis=1).to_frame().reset_index() mincol = time_series_df.min(axis=1).to_frame().reset_index() @@ -577,12 +590,12 @@ def generate_metrics( time_slice_groups = perf_data_df.groupby("ts", sort=False) time_metrics_result = {} errors = { - "MISSING DATA": 0, - "ZERO DIVISION": 0, - "MISSING EVENTS": 0, - "MULTIPLE GROUPS": 0, + "MISSING DATA": set(), + "ZERO DIVISION": set(), + "MISSING EVENTS": set(), + "MULTIPLE GROUPS": set(), } - missing_events = set() + for time_slice, item in time_slice_groups: time_slice_df = time_slice_groups.get_group(time_slice) current_group_indx = 0 @@ -640,11 +653,9 @@ def generate_metrics( or "]" in expressions_to_evaluate[instance] ): if verbose: - errors["MISSING DATA"] += 1 + errors["MISSING DATA"].add(m["name"]) log_skip_metric( - m, - expressions_to_evaluate[instance], - "MISSING DATA", + m, expressions_to_evaluate[instance], "MISSING DATA" ) continue # cannot evaluate expression, skipping try: @@ -658,7 +669,7 @@ def generate_metrics( ) except ZeroDivisionError: if verbose: - errors["ZERO DIVISION"] += 1 + errors["ZERO DIVISION"].add(m["name"]) log_skip_metric( m, expressions_to_evaluate[instance], @@ -670,7 +681,7 @@ def generate_metrics( break # no need to check other groups if not single_group: if verbose: - errors["MULTIPLE GROUPS"] += 1 + errors["MULTIPLE GROUPS"].add(m["name"]) logging.warning('MULTIPLE GROUPS: metric "' + m["name"] + '"') # get events from multiple groups remaining_events_to_find = list(non_constant_mertics) @@ -700,11 +711,9 @@ def generate_metrics( or "]" in expressions_to_evaluate[instance] ): if verbose: - errors["MISSING DATA"] += 1 + errors["MISSING DATA"].add(m["name"]) log_skip_metric( - m, - expressions_to_evaluate[instance], - "MISSING DATA", + m, expressions_to_evaluate[instance], "MISSING DATA" ) continue try: @@ -718,7 +727,7 @@ def generate_metrics( ) except ZeroDivisionError: if verbose: - errors["ZERO DIVISION"] += 1 + errors["ZERO DIVISION"].add(m["name"]) log_skip_metric( m, expressions_to_evaluate[instance], @@ -729,7 +738,6 @@ def generate_metrics( metrics_results[m["name"] + sub_txt] = float(result) else: # some events are missing if verbose: - errors["MISSING EVENTS"] += 1 logging.warning( 'MISSING EVENTS: metric "' + m["name"] @@ -737,14 +745,15 @@ def generate_metrics( + str(remaining_events_to_find) + '"' ) - missing_events.update(remaining_events_to_find) + errors["MISSING EVENTS"].update(remaining_events_to_find) continue # skip metric time_metrics_result[time_slice] = metrics_results time_series_df = pd.DataFrame(time_metrics_result) if verbose: for error in errors: - logging.warning("Total " + error + ": " + str(errors[error])) - logging.warning("Missing events: " + str(missing_events)) + logging.warning( + str(len(errors[error])) + " " + error + ": " + str(errors[error]) + ) generate_metrics_time_series(time_series_df, perf_mode, out_file_path) generate_metrics_averages(time_series_df, perf_mode, out_file_path) return @@ -861,6 +870,9 @@ def generate_raw_events(perf_data_df, out_file_path, perf_mode): elif "PERCORE_MODE" in meta_data and meta_data["PERCORE_MODE"]: perf_mode = Mode.Core + # fix c6 residency values + perf_data_lines = get_fixed_c6_residency_fields(perf_data_lines, perf_mode) + # set const TSC accoding to perf_mode set_CONST_TSC(meta_data, perf_mode) @@ -878,17 +890,19 @@ def generate_raw_events(perf_data_df, out_file_path, perf_mode): # generate metrics for each cgroup if "CGROUPS" in meta_data and meta_data["CGROUPS"] == "enabled": - for cid in meta_data["CGROUP_HASH"]: - cid_perf_data_df = perf_data_df[perf_data_df["cgroup"] == cid] - cid_out_file_path = ( + for cgroup_id in meta_data["CGROUP_HASH"]: + container_id = meta_data["CGROUP_HASH"][cgroup_id] + set_CONST_TSC(meta_data, perf_mode, meta_data["CPUSETS"][container_id]) + cgroup_id_perf_data_df = perf_data_df[perf_data_df["cgroup"] == cgroup_id] + cgroup_id_out_file_path = ( out_file_path.rsplit(".csv", 1)[0] + "_" - + meta_data["CGROUP_HASH"][cid] + + meta_data["CGROUP_HASH"][cgroup_id] + ".csv" ) generate_metrics( - cid_perf_data_df, - cid_out_file_path, + cgroup_id_perf_data_df, + cgroup_id_out_file_path, event_groups, meta_data, metrics, @@ -900,11 +914,11 @@ def generate_raw_events(perf_data_df, out_file_path, perf_mode): ) if args.html: report.write_html( - cid_out_file_path, + cgroup_id_out_file_path, perf_mode, meta_data["constants"]["CONST_ARCH"], args.html.replace( - ".html", "_" + meta_data["CGROUP_HASH"][cid] + ".html" + ".html", "_" + meta_data["CGROUP_HASH"][cgroup_id] + ".html" ), ) # generate metrics for system, persocket or percore diff --git a/src/basic_stats.py b/src/basic_stats.py index ba00250..3d39279 100644 --- a/src/basic_stats.py +++ b/src/basic_stats.py @@ -12,6 +12,7 @@ import tempfile from yattag import Doc from src.common import crash +from collections import OrderedDict os.environ["MPLCONFIGDIR"] = tempfile.mkdtemp() @@ -74,113 +75,110 @@ def row_of_1(html_list): return get_row_header() + get_col(html_list) + get_row_footer() -def get_stats_plot(input, arch): +def get_stats_plot(input_file, arch): try: - df = pd.read_csv(input, keep_default_na=False) + df = pd.read_csv(input_file, keep_default_na=False) + except FileNotFoundError: crash(f"{input} file not found") - fig_list = [] - if "metric_CPU operating frequency (in GHz)" in df.columns: - fig1 = get_fig( - df, - y=["metric_CPU operating frequency (in GHz)"], - title="CPU Operating Frequency", - title_text="Freq (GHz)", - name=["Frequency"], - ) - fig_list.append(fig1) - if "metric_CPU utilization %" in df.columns: - fig2 = get_fig( - df, - y=["metric_CPU utilization %", "metric_CPU utilization% in kernel mode"], - title="CPU Utilization", - title_text="Percentage", - name=["User", "Kernel"], - ) - fig_list.append(fig2) - if "metric_CPI" in df.columns: - fig3 = get_fig( - df, - y=["metric_CPI", "metric_kernel_CPI"], - title="CPI", - title_text="CPI", - name=["CPI", "Kernel CPI"], - ) - fig_list.append(fig3) - if "metric_package power (watts)" in df.columns: - fig4 = get_fig( - df, - y=["metric_package power (watts)", "metric_DRAM power (watts)"], - title="Power", - title_text="Watts", - name=["Package", "DRAM"], - ) - fig_list.append(fig4) - if "metric_memory bandwidth read (MB/sec)" in df.columns: - fig5 = get_fig( - df, - y=[ - "metric_memory bandwidth read (MB/sec)", - "metric_memory bandwidth write (MB/sec)", - "metric_memory bandwidth total (MB/sec)", - ], - title="Memory Bandwidth", - title_text="MB/sec", - name=["Read", "Write", "Total"], - ) - fig_list.append(fig5) - if "metric_core % cycles in non AVX license" in df.columns and arch != "broadwell": - fig6 = get_fig( - df, - y=[ - "metric_core % cycles in non AVX license", - "metric_core % cycles in AVX2 license", - "metric_core % cycles in AVX-512 license", - ], - title="AVX Percentage", - title_text="Percentage", - name=["AVX", "AVX2", "AVX512"], - ) - fig_list.append(fig6) - if "metric_NUMA %_Reads addressed to local DRAM" in df.columns: - fig7 = get_fig( - df, - y=[ - "metric_NUMA %_Reads addressed to local DRAM", - "metric_NUMA %_Reads addressed to remote DRAM", - ], - title="NUMA Locality DRAM Reads %", - title_text="Percentage", - name=["Local", "Remote"], - ) - fig_list.append(fig7) - if "metric_TMAM_Frontend_Bound(%)" in df.columns: - fig8 = get_fig( - df, - y=["metric_TMAM_Frontend_Bound(%)", "metric_TMAM_Backend_bound(%)"], - title="TMA", - title_text="Percentage", - name=["TMA_Frontend", "TMA_Backend"], - ) - fig_list.append(fig8) - - cache_mpi = [ - "metric_L1D MPI (includes data+rfo w/ prefetches)", - "metric_L2 MPI (includes code+data+rfo w/ prefetches)", - "metric_LLC data read MPI (demand+prefetch)", - ] - - if "metric_L1D MPI (includes data+rfo w/ prefetches)" in df.columns: - fig9 = get_fig( - df, - y=cache_mpi, - title="Cache MPI", - title_text="MPI", - name=["L1D MPI", "L2 MPI", "LLC MPI"], - ) - fig_list.append(fig9) + figure_to_column_dict = OrderedDict() + figure_to_column_dict["CPU Operating Frequency"] = { + "metrics_prefixes": ["metric_CPU operating frequency (in GHz)"], + "Y_axis_text": "Freq (GHz)", + "name_prefix": ["Frequency"], + } + figure_to_column_dict["CPU Utilization"] = { + "metrics_prefixes": [ + "metric_CPU utilization %", + "metric_CPU utilization% in kernel mode", + ], + "Y_axis_text": "Percentage", + "name_prefix": ["User", "Kernel"], + } + figure_to_column_dict["CPI"] = { + "metrics_prefixes": ["metric_CPI", "metric_kernel_CPI"], + "Y_axis_text": "CPI", + "name_prefix": ["CPI", "Kernel CPI"], + } + figure_to_column_dict["Power"] = { + "metrics_prefixes": [ + "metric_package power (watts)", + "metric_DRAM power (watts)", + ], + "Y_axis_text": "Watts", + "name_prefix": ["Package", "DRAM"], + } + figure_to_column_dict["Memory Bandwidth"] = { + "metrics_prefixes": [ + "metric_memory bandwidth read (MB/sec)", + "metric_memory bandwidth write (MB/sec)", + "metric_memory bandwidth total (MB/sec)", + ], + "Y_axis_text": "MB/sec", + "name_prefix": ["Read", "Write", "Total"], + } + figure_to_column_dict["AVX Percentage"] = { + "metrics_prefixes": [ + "metric_core % cycles in non AVX license", + "metric_core % cycles in AVX2 license", + "metric_core % cycles in AVX-512 license", + ], + "Y_axis_text": "Percentage", + "name_prefix": ["AVX", "AVX2", "AVX512"], + } + figure_to_column_dict["NUMA Locality DRAM Reads %"] = { + "metrics_prefixes": [ + "metric_NUMA %_Reads addressed to local DRAM", + "metric_NUMA %_Reads addressed to remote DRAM", + ], + "Y_axis_text": "Percentage", + "name_prefix": ["Local", "Remote"], + } + figure_to_column_dict["TMA"] = { + "metrics_prefixes": [ + "metric_TMA_Frontend_Bound(%)", + "metric_TMA_Backend_bound(%)", + ], + "Y_axis_text": "Percentage", + "name_prefix": ["TMA_Frontend", "TMA_Backend"], + } + figure_to_column_dict["Cache MPI"] = { + "metrics_prefixes": [ + "metric_L1D MPI (includes data+rfo w/ prefetches)", + "metric_L2 MPI (includes code+data+rfo w/ prefetches)", + "metric_LLC data read MPI (demand+prefetch)", + ], + "Y_axis_text": "MPI", + "name_prefix": ["L1D MPI", "L2 MPI", "LLC MPI"], + } + + figure_list = [] + for figure_title in figure_to_column_dict: + figure_data = figure_to_column_dict[figure_title] + for metric_index, metric_prefix in enumerate(figure_data["metrics_prefixes"]): + for column in df.columns: + if metric_prefix in column: + if "cols" not in figure_data: + figure_data["cols"] = [] + if "names" not in figure_data: + figure_data["names"] = [] + figure_data["cols"].append(column) + series_name = ( + figure_data["name_prefix"][metric_index] + + "_" + + column.replace(metric_prefix, "") + ) + figure_data["names"].append(series_name) + if "cols" in figure_data: + fig = get_fig( + df, + y=figure_data["cols"], + title=figure_title, + title_text=figure_data["Y_axis_text"], + name=figure_data["names"], + ) + figure_list.append(fig) - figure_list = fig_list for fig in figure_list: # update layout fig.update_layout( diff --git a/src/perf_helpers.py b/src/perf_helpers.py index 81849ab..4843295 100644 --- a/src/perf_helpers.py +++ b/src/perf_helpers.py @@ -41,6 +41,12 @@ def get_ht_count(): return int(any([core["siblings"] != core["cpu cores"] for core in cpuinfo])) + 1 +# get hyperthreading status +def get_ht_status(): + cpuinfo = get_cpuinfo() + return any([core["siblings"] != core["cpu cores"] for core in cpuinfo]) + + # get cpu count def get_cpu_count(): cpu_count = 0 @@ -52,7 +58,7 @@ def get_cpu_count(): for c in cpu_list: limit = c.split("-") cpu_count += int(limit[1]) - int(limit[0]) + 1 - return cpu_count / (get_socket_count() * get_ht_count()) + return int(cpu_count / (get_socket_count() * get_ht_count())) # compute tsc frequency @@ -90,28 +96,37 @@ def get_sys_devices(): # get imc and uncore counts # TODO:fix for memory config with some channels populated -def get_imc_cacheagent_count(): +def get_imc_upi_count(): sys_devs = get_sys_devices() - cha_count = 0 imc_count = 0 upi_count = 0 - if "uncore_cha" in sys_devs: - cha_count = int(sys_devs["uncore_cha"]) - if "uncore_cbox" in sys_devs: - cha_count = int(sys_devs["uncore_cbox"]) if "uncore_upi" in sys_devs: upi_count = int(sys_devs["uncore_upi"]) if "uncore_qpi" in sys_devs: upi_count = int(sys_devs["uncore_qpi"]) if "uncore_imc" in sys_devs: imc_count = int(sys_devs["uncore_imc"]) - return imc_count, cha_count, upi_count + return imc_count, upi_count + + +# get CHA count +def get_cha_count(): + cha_msrs = { + "0x396": "uncore client cha count", + "0x702": "uncore cha count", + "0x2FFE": "uncore cha count spr", + } + for msr in cha_msrs.keys(): + result = read_msr(int(msr, 16)) + if result is not None and result != 0: + return result + return 0 # get imc channel ids, channel ids are not consecutive in some cases (observed on bdw) def get_channel_ids(): sysdevices = os.listdir("/sys/bus/event_source/devices") - imc = "uncore_imc_*" + imc = "uncore_imc_[0-9]*" ids = [] for entry in sysdevices: if fnmatch.fnmatch(entry, imc): @@ -153,12 +168,15 @@ def set_perf_event_mux_interval(reset, interval_ms, mux_interval): # read the MSR register and return the value in dec format -def readmsr(msr, cpu=0): - f = os.open("/dev/cpu/%d/msr" % (cpu,), os.O_RDONLY) - os.lseek(f, msr, os.SEEK_SET) - val = struct.unpack("Q", os.read(f, 8))[0] - os.close(f) - return val +def read_msr(msr, cpu=0): + fName = f"/dev/cpu/{cpu}/msr" + try: + with open(fName, "rb") as f: + f.seek(msr) + result = struct.unpack("Q", f.read(8))[0] + except OSError: + result = None + return result # detect if PMU counters are in use @@ -182,7 +200,7 @@ def pmu_contention_detect( warn = False for r in msrs: try: - value = readmsr(int(r, 16)) + value = read_msr(int(r, 16)) if msrs[r]["value"] is not None and value != msrs[r]["value"]: logging.warning("PMU in use: " + msrs[r]["name"]) warn = True @@ -245,6 +263,19 @@ def get_lscpu(): return cpuinfo +# get supported perf events +def get_perf_list(): + try: + perf_list = subprocess.check_output( # nosec + ["perf", "list"], universal_newlines=True + ) + except FileNotFoundError: + crash("Please install Linux perf and re-run") + except subprocess.CalledProcessError as e: + crash(f"Error calling Linux perf, error code: {e.returncode}") + return perf_list + + def get_arch_and_name(procinfo): arch = modelname = "" try: @@ -366,9 +397,7 @@ def get_cgroups_from_cids(cids): cgroups = set() try: p = subprocess.Popen( - ["ps", "-e", "-o", "cgroup"], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, + ["ps", "-e", "-o", "cgroup"], stdout=subprocess.PIPE, stderr=subprocess.PIPE ) p2 = subprocess.Popen( ["grep", "docker-"], diff --git a/src/prepare_perf_events.py b/src/prepare_perf_events.py index 10dd7c1..a97d229 100644 --- a/src/prepare_perf_events.py +++ b/src/prepare_perf_events.py @@ -8,13 +8,12 @@ import logging import os import re -import subprocess # nosec import src.perf_helpers as helper from src.common import crash # test if the event can be collected, check supported events in perf list -def filter_func(event, perf_list): +def is_collectable_event(event, perf_list): tmp_list = event.split("/") name = helper.get_dev_name(tmp_list[0]) unc_name = "uncore_" + name @@ -57,7 +56,7 @@ def expand_unc(line): # check if CPU/core event -def check_cpu_event(line): +def is_cpu_event(line): line = line.strip() tmp_list = line.split("/") # assumes event name without a PMU qualifier is a core event @@ -73,8 +72,6 @@ def check_cpu_event(line): # save the last group names in a list when it is cha or imc # test for cha or imc event. append with count value # once reaches new group, start looping through all imc/cha counts to finish up - - def enumerate_uncore(group, pattern, n, default_range=True): uncore_group = "" ids = [] @@ -119,60 +116,57 @@ def get_cgroup_events_format(cgroups, events, num_events): return perf_format -def prepare_perf_events(event_file, cpu_only): +def filter_events(event_file, cpu_only): if not os.path.isfile(event_file): crash("event file not found") - - start_group = "'{" - end_group = "}'" - group = "" - prev_group = "" - new_group = True - collection_events = [] - + unsupported_events = [] + perf_list = helper.get_perf_list() with open(event_file, "r") as fin: - # get supported perf events - try: - perf_list = subprocess.check_output( # nosec - ["perf", "list"], universal_newlines=True - ) - except FileNotFoundError: - crash("Please install Linux perf and re-run") - - except subprocess.CalledProcessError as e: - crash(f"Error calling Linux perf, error code: {e.returncode}") - - unsupported_events = [] for line in fin: - if (line != "\n") and (not line.startswith("#")): - line = line.strip() - if cpu_only and (not check_cpu_event(line)): - continue - if not filter_func(line, perf_list): - unsupported_events.append(line) - if line.endswith(";") and (len(collection_events) > 1): - end_event = str(collection_events[-1]) - collection_events[-1] = end_event[:-1] + ";" - else: - collection_events.append(line) + line = line.strip() + if ( + line == "" + or line.startswith("#") + or (cpu_only and not is_cpu_event(line)) + ): + continue + if not is_collectable_event(line, perf_list): + # not a collectable event + unsupported_events.append(line) + # if this is the last event in the group, mark the previous event as the last (with a ';') + if line.endswith(";") and len(collection_events) > 1: + end_event = collection_events[-1] + collection_events[-1] = end_event[:-1] + ";" + else: + collection_events.append(line) if any("cpu-cycles" in event for event in unsupported_events): crash("PMU's not available. Run in a full socket VM or baremetal") if len(unsupported_events) > 0: logging.warning( - str("Perf unsupported events not counted: " + str(unsupported_events)) + f"Perf unsupported events not counted: {unsupported_events}" ) + return collection_events, unsupported_events + + +def prepare_perf_events(event_file, cpu_only): + start_group = "'{" + end_group = "}'" + group = "" + prev_group = "" + new_group = True + collection_events, unsupported_events = filter_events(event_file, cpu_only) core_event = [] uncore_event = [] event_names = [] for line in collection_events: if cpu_only: - if check_cpu_event(line): + if is_cpu_event(line): event = line + ":c" core_event.append(event) else: - if check_cpu_event(line): + if is_cpu_event(line): event = line + ":c" core_event.append(event) else: @@ -198,7 +192,6 @@ def prepare_perf_events(event_file, cpu_only): default_range = name != "uncore_imc" group += enumerate_uncore(prev_group, name + "_", unc_count, default_range) - fin.close() group = group[:-1] if len(event_names) == 0: crash("No supported events found on this platform.") diff --git a/src/report.py b/src/report.py index 838cd64..95c8dc0 100644 --- a/src/report.py +++ b/src/report.py @@ -11,15 +11,18 @@ from yattag import Doc, indent -def write_html(tma_inp, perf_mode, arch, html_report_out, data_type="both"): +def write_html(in_file, perf_mode, arch, html_report_out, data_type="both"): if data_type not in ("tma", "basic", "both"): data_type = "both" if str(perf_mode) == "Mode.System": - tma_inp = tma_inp.replace(".csv", ".sys.average.csv") + tma_inp = in_file.replace(".csv", ".sys.csv") + tma_inp_avg = in_file.replace(".csv", ".sys.average.csv") elif str(perf_mode) == "Mode.Socket": - tma_inp = tma_inp.replace(".csv", ".socket.average.csv") + tma_inp = in_file.replace(".csv", ".socket.csv") + tma_inp_avg = in_file.replace(".csv", ".socket.average.csv") elif str(perf_mode) == "Mode.Core": - tma_inp = tma_inp.replace(".csv", ".core.average.csv") + tma_inp = in_file.replace(".csv", ".core.csv") + tma_inp_avg = in_file.replace(".csv", ".core.average.csv") doc, tag, text = Doc().tagtext() with tag("html"): @@ -32,7 +35,7 @@ def write_html(tma_inp, perf_mode, arch, html_report_out, data_type="both"): text("Intel® PerfSpect Report") with tag("body"): if data_type in ("both", "tma"): - fig1 = icicle.get_icicle(tma_inp) + fig1 = icicle.get_icicle(tma_inp_avg) with tag("h2", align="center"): text("TopDown Microarchitecture Analysis (TMA)") with doc.tag("div"):