From 6b8d9b5ff33321e0265a8e1bb08aa75d72647de7 Mon Sep 17 00:00:00 2001 From: hilldani Date: Wed, 10 May 2023 13:10:01 -0700 Subject: [PATCH] improve interval skid accuracy and icx equations (#30) --- _version.txt | 2 +- events/icx.txt | 19 ++- events/metric_icx.json | 139 +++++++++++---------- events/spr.txt | 76 +++++++----- perf-collect.py | 26 ++-- perf-postprocess.py | 205 +++++++++++++------------------ similarity-analyzer/_version.txt | 1 + similarity-analyzer/dopca.py | 2 +- src/basic_stats.py | 2 +- src/perf_helpers.py | 38 ++++++ src/prepare_perf_events.py | 10 +- 11 files changed, 281 insertions(+), 239 deletions(-) create mode 100644 similarity-analyzer/_version.txt diff --git a/_version.txt b/_version.txt index db6fb4a..9d4f823 100644 --- a/_version.txt +++ b/_version.txt @@ -1 +1 @@ -1.2.8 +1.2.9 diff --git a/events/icx.txt b/events/icx.txt index 4a42b16..822ff50 100644 --- a/events/icx.txt +++ b/events/icx.txt @@ -28,8 +28,8 @@ cpu-cycles; cpu/event=0xa3,umask=0x0C,cmask=0x0C,period=1000003,name='CYCLE_ACTIVITY.STALLS_L1D_MISS'/, cpu/event=0xa3,umask=0x05,cmask=0x05,period=1000003,name='CYCLE_ACTIVITY.STALLS_L2_MISS'/, -cpu/event=0xa3,umask=0x04,cmask=0x04,period=1000003,name='CYCLE_ACTIVITY.STALLS_Total'/, -cpu/event=0xa3,umask=0x06,cmask=0x06,period=1000003,name='CYCLE_ACTIVITY.STALLS_L3_Miss'/, +cpu/event=0xa3,umask=0x04,cmask=0x04,period=1000003,name='CYCLE_ACTIVITY.STALLS_TOTAL'/, +cpu/event=0xa3,umask=0x06,cmask=0x06,period=1000003,name='CYCLE_ACTIVITY.STALLS_L3_MISS'/, cpu-cycles; cpu/event=0xa3,umask=0x08,cmask=0x08,period=1000003,name='CYCLE_ACTIVITY.CYCLES_L1D_MISS'/, @@ -74,6 +74,8 @@ instructions; cpu/event=0x9c,umask=0x01,period=1000003,name='IDQ_UOPS_NOT_DELIVERED.CORE'/, cpu/event=0x9c,umask=0x01,cmask=0x05,period=1000003,name='IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE'/, +cpu/event=0xd2,umask=0x02,period=1000003,name='MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT'/, +cpu/event=0xd2,umask=0x04,period=1000003,name='MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM'/, cpu-cycles; cpu/event=0x60,umask=0x10,period=2000003,name='OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD'/, @@ -85,7 +87,7 @@ ref-cycles, instructions; cpu/event=0x60,umask=0x08,cmask=0x01,period=1000003,name='OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD'/, -cpu/event=0x60,umask=0x08,cmask=0x04,period=1000003,name='OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD_c4'/, +cpu/event=0x60,umask=0x08,cmask=0x04,period=1000003,name='OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD:c4'/, cpu-cycles; #TMA related @@ -95,10 +97,12 @@ cpu/event=0x00,umask=0x83,period=10000003,name='PERF_METRICS.BACKEND_BOUND'/, cpu/event=0x00,umask=0x82,period=10000003,name='PERF_METRICS.FRONTEND_BOUND'/, cpu/event=0x00,umask=0x80,period=10000003,name='PERF_METRICS.RETIRING'/, cpu/event=0x0d,umask=0x10,period=1000003,name='INT_MISC.UOP_DROPPING'/, -cpu/event=0x0d,umask=0x01,cmask=0x01,edge=0x01,period=500009,name='INT_MISC.RECOVERY_CYCLES_c1_e1'/; +cpu/event=0x0d,umask=0x01,cmask=0x01,edge=0x01,period=500009,name='INT_MISC.RECOVERY_CYCLES:c1:e1'/; cpu/event=0xec,umask=0x02,period=2000003,name='CPU_CLK_UNHALTED.DISTRIBUTED'/, +cpu/event=0xa6,umask=0x80,period=2000003,name='EXE_ACTIVITY.3_PORTS_UTIL:u0x80'/, +cpu/event=0xa2,umask=0x02,period=2000003,name='RESOURCE_STALLS.SCOREBOARD'/, instructions; cpu/event=0x0d,umask=0x80,period=500009,name='INT_MISC.CLEAR_RESTEER_CYCLES'/, @@ -136,16 +140,19 @@ power/energy-ram/; cpu/event=0xa6,umask=0x40,cmask=0x02,period=1000003,name='EXE_ACTIVITY.BOUND_ON_STORES'/, cpu/event=0xa6,umask=0x02,period=2000003,name='EXE_ACTIVITY.1_PORTS_UTIL'/, cpu/event=0xa6,umask=0x04,period=2000003,name='EXE_ACTIVITY.2_PORTS_UTIL'/, +cpu/event=0x79,umask=0x04,period=100003,name='IDQ.MITE_UOPS'/, cpu-cycles, instructions; cpu/event=0xb1,umask=0x01,cmask=0x03,period=2000003,name='UOPS_EXECUTED.CYCLES_GE_3'/, cpu/event=0x79,umask=0x30,period=100003,name='IDQ.MS_UOPS'/, +cpu/event=0x56,umask=0x01,period=100003,name='UOPS_DECODED.DEC0'/, +cpu/event=0x56,umask=0x01,cmask=0x01,period=100003,name='UOPS_DECODED.DEC0:c1'/, cpu-cycles; cpu/event=0xa3,umask=0x14,cmask=0x14,period=2000003,name='CYCLE_ACTIVITY.STALLS_MEM_ANY'/, cpu/event=0xa3,umask=0x0c,cmask=0x0c,period=1000003,name='CYCLE_ACTIVITY.STALLS_L1D_MISS'/, -cpu/event=0x08,umask=0x20,period=100003,name='DTLB_LOAD_MISSES.STLB_HIT'/, +cpu/event=0x08,umask=0x20,cmask=0x01,period=100003,name='DTLB_LOAD_MISSES.STLB_HIT:c1'/, cpu/event=0x08,umask=0x10,cmask=0x01,period=100003,name='DTLB_LOAD_MISSES.WALK_ACTIVE'/, cpu-cycles; @@ -163,7 +170,7 @@ cha/event=0x35,umask=0xc80ffe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_CRD'/; cha/event=0x35,umask=0xC816FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL'/, cha/event=0x36,umask=0xC816FE01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL'/; -cha/event=0x35,umask=0xccd7fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA'/, +cha/event=0x35,umask=0xccd7fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDRD'/, cha/event=0x35,umask=0xc817fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD'/, cha/event=0x35,umask=0xc897fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF'/, cha/event=0x36,umask=0xC817FE01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD'/; diff --git a/events/metric_icx.json b/events/metric_icx.json index 6957f9a..a31e7b2 100644 --- a/events/metric_icx.json +++ b/events/metric_icx.json @@ -1,7 +1,7 @@ [ { "name": "metric_CPU operating frequency (in GHz)", - "expression": "([cpu-cycles] / [ref-cycles]) * ([SYSTEM_TSC_FREQ] / 1000000000)" + "expression": "(([cpu-cycles] / [ref-cycles] * [SYSTEM_TSC_FREQ]) / 1000000000)" }, { "name": "metric_CPU utilization %", @@ -9,7 +9,8 @@ }, { "name": "metric_CPU utilization% in kernel mode", - "expression": "100 * [ref-cycles:k] / [TSC]" + "expression": "100 * [ref-cycles:k] / [TSC]", + "origin": "perfspect" }, { "name": "metric_CPI", @@ -17,19 +18,21 @@ }, { "name": "metric_kernel_CPI", - "expression": "[cpu-cycles:k] / [instructions:k]" + "expression": "[cpu-cycles:k] / [instructions:k]", + "origin": "perfspect" }, { "name": "metric_IPC", - "expression": "[instructions] / [cpu-cycles]" + "expression": "[instructions] / [cpu-cycles]", + "origin": "perfspect" }, { "name": "metric_giga_instructions_per_sec", - "expression": "[instructions] / 1000000000" + "expression": "[instructions] / 1000000000", + "origin": "perfspect" }, { "name": "metric_L1D MPI (includes data+rfo w/ prefetches)", - "tags": "transaction", "expression": "[L1D.REPLACEMENT] / [instructions]" }, { @@ -58,59 +61,67 @@ }, { "name": "metric_Average LLC data read miss latency (in clks)", - "expression": "[OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD] / [OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD]" + "expression": "[OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD] / [OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD]", + "origin": "perfspect" }, { "name": "metric_UPI Data transmit BW (MB/sec) (only data)", - "expression": "[UNC_UPI_TxL_FLITS.ALL_DATA] * (64 / 9) / 1000000" + "expression": "([UNC_UPI_TxL_FLITS.ALL_DATA] * (64 / 9.0) / 1000000) / 1" }, { "name": "metric_package power (watts)", - "expression": "[power/energy-pkg/]" + "expression": "[power/energy-pkg/]", + "origin": "perfspect" }, { "name": "metric_DRAM power (watts)", - "expression": "[power/energy-ram/]" + "expression": "[power/energy-ram/]", + "origin": "perfspect" }, { "name": "metric_core c6 residency %", - "expression": "100 * [cstate_core/c6-residency/] / [TSC]" + "expression": "100 * [cstate_core/c6-residency/] / [TSC]", + "origin": "perfspect" }, { "name": "metric_package c6 residency %", - "expression": "100 * [cstate_pkg/c6-residency/] * [CORES_PER_SOCKET] / [TSC]" + "expression": "100 * [cstate_pkg/c6-residency/] * [CORES_PER_SOCKET] / [TSC]", + "origin": "perfspect" }, { "name": "metric_core % cycles in non AVX license", - "expression": "(100 * [CORE_POWER.LVL0_TURBO_LICENSE]) / ([CORE_POWER.LVL0_TURBO_LICENSE] + [CORE_POWER.LVL1_TURBO_LICENSE] + [CORE_POWER.LVL2_TURBO_LICENSE])" + "expression": "(100 * [CORE_POWER.LVL0_TURBO_LICENSE]) / ([CORE_POWER.LVL0_TURBO_LICENSE] + [CORE_POWER.LVL1_TURBO_LICENSE] + [CORE_POWER.LVL2_TURBO_LICENSE])", + "origin": "perfspect" }, { "name": "metric_core % cycles in AVX2 license", - "expression": "(100 * [CORE_POWER.LVL1_TURBO_LICENSE]) / ([CORE_POWER.LVL0_TURBO_LICENSE] + [CORE_POWER.LVL1_TURBO_LICENSE] + [CORE_POWER.LVL2_TURBO_LICENSE])" + "expression": "(100 * [CORE_POWER.LVL1_TURBO_LICENSE]) / ([CORE_POWER.LVL0_TURBO_LICENSE] + [CORE_POWER.LVL1_TURBO_LICENSE] + [CORE_POWER.LVL2_TURBO_LICENSE])", + "origin": "perfspect" }, { "name": "metric_core % cycles in AVX-512 license", - "expression": "(100 * [CORE_POWER.LVL2_TURBO_LICENSE]) / ([CORE_POWER.LVL0_TURBO_LICENSE] + [CORE_POWER.LVL1_TURBO_LICENSE] + [CORE_POWER.LVL2_TURBO_LICENSE])" + "expression": "(100 * [CORE_POWER.LVL2_TURBO_LICENSE]) / ([CORE_POWER.LVL0_TURBO_LICENSE] + [CORE_POWER.LVL1_TURBO_LICENSE] + [CORE_POWER.LVL2_TURBO_LICENSE])", + "origin": "perfspect" }, { "name": "metric_core initiated local dram read bandwidth (MB/sec)", - "expression": "([OCR.READS_TO_CORE.LOCAL_DRAM] + [OCR.HWPF_L3.L3_MISS_LOCAL]) * 64 / 1000000" + "expression": "(([OCR.READS_TO_CORE.LOCAL_DRAM] + [OCR.HWPF_L3.L3_MISS_LOCAL]) * 64 / 1000000) / 1" }, { "name": "metric_core initiated remote dram read bandwidth (MB/sec)", - "expression": "([OCR.READS_TO_CORE.REMOTE_DRAM] + [OCR.HWPF_L3.REMOTE]) * 64 / 1000000" + "expression": "(([OCR.READS_TO_CORE.REMOTE_DRAM] + [OCR.HWPF_L3.REMOTE]) * 64 / 1000000) / 1" }, { "name": "metric_memory bandwidth read (MB/sec)", - "expression": "[UNC_M_CAS_COUNT.RD] * 64 / 1000000" + "expression": "([UNC_M_CAS_COUNT.RD] * 64 / 1000000) / 1" }, { "name": "metric_memory bandwidth write (MB/sec)", - "expression": "[UNC_M_CAS_COUNT.WR] * 64 / 1000000" + "expression": "([UNC_M_CAS_COUNT.WR] * 64 / 1000000) / 1" }, { "name": "metric_memory bandwidth total (MB/sec)", - "expression": "([UNC_M_CAS_COUNT.RD] + [UNC_M_CAS_COUNT.WR]) * 64 / 1000000" + "expression": "(([UNC_M_CAS_COUNT.RD] + [UNC_M_CAS_COUNT.WR]) * 64 / 1000000) / 1" }, { "name": "metric_LLC code read MPI (demand+prefetch)", @@ -118,7 +129,7 @@ }, { "name": "metric_LLC data read MPI (demand+prefetch)", - "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [instructions]" + "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [instructions]" }, { "name": "metric_LLC total HITM (per instr) (excludes LLC prefetches)", @@ -130,15 +141,15 @@ }, { "name": "metric_Average LLC demand data read miss latency (in ns)", - "expression": "(1000000000 * [UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]))" + "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" }, { "name": "metric_Average LLC demand data read miss latency for LOCAL requests (in ns)", - "expression": "(1000000000 * [UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]))" + "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" }, { "name": "metric_Average LLC demand data read miss latency for REMOTE requests (in ns)", - "expression": "(1000000000 * [UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]))" + "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" }, { "name": "metric_ITLB (2nd level) MPI", @@ -166,162 +177,162 @@ }, { "name": "metric_uncore frequency GHz", - "expression": "[UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) / 1000000000" + "expression": "([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) / 1000000000) / 1" }, { "name": "metric_TMA_Frontend_Bound(%)", - "expression": "100 * ([PERF_METRICS.FRONTEND_BOUND] / (([PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND])) - [INT_MISC.UOP_DROPPING] / ([TOPDOWN.SLOTS]))" + "expression": "100 * ( [PERF_METRICS.FRONTEND_BOUND] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) - [INT_MISC.UOP_DROPPING] / ( [TOPDOWN.SLOTS] ) )" }, { "name": "metric_TMA_..Fetch_Latency(%)", - "expression": "100 * (5 * [IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] / [TOPDOWN.SLOTS])" + "expression": "100 * ( ( ( 5 ) * [IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS] ) )" }, { "name": "metric_TMA_....ICache_Misses(%)", - "expression": "100 * ([ICACHE_16B.IFDATA_STALL] / [cpu-cycles])" + "expression": "100 * ( [ICACHE_16B.IFDATA_STALL] / ( [cpu-cycles] ) )" }, { "name": "metric_TMA_....ITLB_Misses(%)", - "expression": "100 * ([ICACHE_64B.IFTAG_STALL] / [cpu-cycles])" + "expression": "100 * ( [ICACHE_64B.IFTAG_STALL] / ( [cpu-cycles] ) )" }, { "name": "metric_TMA_....Branch_Resteers(%)", - "expression": "100 * ([INT_MISC.CLEAR_RESTEER_CYCLES] / [cpu-cycles] + (10 * [BACLEARS.ANY] / [cpu-cycles]))" + "expression": "100 * ( [INT_MISC.CLEAR_RESTEER_CYCLES] / ( [cpu-cycles] ) + ( ( 10 ) * [BACLEARS.ANY] / ( [cpu-cycles] ) ) )" }, { "name": "metric_TMA_......Mispredicts_Resteers(%)", - "expression": "100 * (([BR_MISP_RETIRED.ALL_BRANCHES] / ([BR_MISP_RETIRED.ALL_BRANCHES] + [MACHINE_CLEARS.COUNT])) * [INT_MISC.CLEAR_RESTEER_CYCLES] / [cpu-cycles])" + "expression": "100 * ( ( [BR_MISP_RETIRED.ALL_BRANCHES] / ( [BR_MISP_RETIRED.ALL_BRANCHES] + [MACHINE_CLEARS.COUNT] ) ) * [INT_MISC.CLEAR_RESTEER_CYCLES] / ( [cpu-cycles] ) )" }, { "name": "metric_TMA_......Clears_Resteers(%)", - "expression": "100 * ((1 - ([BR_MISP_RETIRED.ALL_BRANCHES] / ([BR_MISP_RETIRED.ALL_BRANCHES] + [MACHINE_CLEARS.COUNT]))) * [INT_MISC.CLEAR_RESTEER_CYCLES] / [cpu-cycles])" + "expression": "100 * ( ( 1 - ( [BR_MISP_RETIRED.ALL_BRANCHES] / ( [BR_MISP_RETIRED.ALL_BRANCHES] + [MACHINE_CLEARS.COUNT] ) ) ) * [INT_MISC.CLEAR_RESTEER_CYCLES] / ( [cpu-cycles] ) )" }, { "name": "metric_TMA_......Unknown_Branches(%)", - "expression": "100 * (10 * [BACLEARS.ANY] / [cpu-cycles])" + "expression": "100 * ( ( 10 ) * [BACLEARS.ANY] / ( [cpu-cycles] ) )" }, { "name": "metric_TMA_..Fetch_Bandwidth(%)", - "expression": "100 * max(0, (([PERF_METRICS.FRONTEND_BOUND] / (([PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND])) - [INT_MISC.UOP_DROPPING] / [TOPDOWN.SLOTS]) - (5 * [IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] / [TOPDOWN.SLOTS])))" + "expression": "100 * ( max( 0 , ( [PERF_METRICS.FRONTEND_BOUND] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) - [INT_MISC.UOP_DROPPING] / ( [TOPDOWN.SLOTS] ) ) - ( ( ( 5 ) * [IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS] ) ) ) )" }, { "name": "metric_TMA_Bad_Speculation(%)", - "expression": "100 * (max((1 - (([PERF_METRICS.FRONTEND_BOUND] / (([PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND])) - [INT_MISC.UOP_DROPPING] / [TOPDOWN.SLOTS]) + ([PERF_METRICS.BACKEND_BOUND] / (([PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND])) + (5 * [INT_MISC.RECOVERY_CYCLES_c1_e1]) / [TOPDOWN.SLOTS]) + ([PERF_METRICS.RETIRING] / (([PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND]))))), 0))" + "expression": "100 * ( max( 1 - ( ( [PERF_METRICS.FRONTEND_BOUND] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) - [INT_MISC.UOP_DROPPING] / ( [TOPDOWN.SLOTS] ) ) + ( [PERF_METRICS.BACKEND_BOUND] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) + ( ( 5 ) * [INT_MISC.RECOVERY_CYCLES:c1:e1] ) / ( [TOPDOWN.SLOTS] ) ) + ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) ) , 0 ) )" }, { "name": "metric_TMA_..Branch_Mispredicts(%)", - "expression": "100 * (([BR_MISP_RETIRED.ALL_BRANCHES] / ([BR_MISP_RETIRED.ALL_BRANCHES] + [MACHINE_CLEARS.COUNT])) * (max((1 - (([PERF_METRICS.FRONTEND_BOUND] / (([PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND])) - [INT_MISC.UOP_DROPPING] / [TOPDOWN.SLOTS]) + ([PERF_METRICS.BACKEND_BOUND] / (([PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND])) + (5 * [INT_MISC.RECOVERY_CYCLES_c1_e1]) / [TOPDOWN.SLOTS]) + ([PERF_METRICS.RETIRING] / (([PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND]))))), 0)))" + "expression": "100 * ( ( [BR_MISP_RETIRED.ALL_BRANCHES] / ( [BR_MISP_RETIRED.ALL_BRANCHES] + [MACHINE_CLEARS.COUNT] ) ) * ( max( 1 - ( ( [PERF_METRICS.FRONTEND_BOUND] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) - [INT_MISC.UOP_DROPPING] / ( [TOPDOWN.SLOTS] ) ) + ( [PERF_METRICS.BACKEND_BOUND] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) + ( ( 5 ) * [INT_MISC.RECOVERY_CYCLES:c1:e1] ) / ( [TOPDOWN.SLOTS] ) ) + ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) ) , 0 ) ) )" }, { "name": "metric_TMA_..Machine_Clears(%)", - "expression": "100 * (max(0, ((max((1 - (([PERF_METRICS.FRONTEND_BOUND] / (([PERF_METRICS.BACKEND_BOUND] + [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING])) - [INT_MISC.UOP_DROPPING] / [TOPDOWN.SLOTS]) + ([PERF_METRICS.BACKEND_BOUND] / (([PERF_METRICS.BACKEND_BOUND] + [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING])) + (5 * [INT_MISC.RECOVERY_CYCLES_c1_e1]) / [TOPDOWN.SLOTS]) + ([PERF_METRICS.RETIRING] / (([PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND]))))), 0)) - (([BR_MISP_RETIRED.ALL_BRANCHES] / ([BR_MISP_RETIRED.ALL_BRANCHES] + [MACHINE_CLEARS.COUNT])) * (max((1 - (([PERF_METRICS.FRONTEND_BOUND] / (([PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND])) - [INT_MISC.UOP_DROPPING] / [TOPDOWN.SLOTS]) + ([PERF_METRICS.BACKEND_BOUND] / (([PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND])) + (5 * [INT_MISC.RECOVERY_CYCLES_c1_e1]) / [TOPDOWN.SLOTS]) + ([PERF_METRICS.RETIRING] / (([PERF_METRICS.BACKEND_BOUND] + [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING]))))), 0))))))" + "expression": "100 * ( max( 0 , ( max( 1 - ( ( [PERF_METRICS.FRONTEND_BOUND] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) - [INT_MISC.UOP_DROPPING] / ( [TOPDOWN.SLOTS] ) ) + ( [PERF_METRICS.BACKEND_BOUND] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) + ( ( 5 ) * [INT_MISC.RECOVERY_CYCLES:c1:e1] ) / ( [TOPDOWN.SLOTS] ) ) + ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) ) , 0 ) ) - ( ( [BR_MISP_RETIRED.ALL_BRANCHES] / ( [BR_MISP_RETIRED.ALL_BRANCHES] + [MACHINE_CLEARS.COUNT] ) ) * ( max( 1 - ( ( [PERF_METRICS.FRONTEND_BOUND] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) - [INT_MISC.UOP_DROPPING] / ( [TOPDOWN.SLOTS] ) ) + ( [PERF_METRICS.BACKEND_BOUND] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) + ( ( 5 ) * [INT_MISC.RECOVERY_CYCLES:c1:e1] ) / ( [TOPDOWN.SLOTS] ) ) + ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) ) , 0 ) ) ) ) )" }, { "name": "metric_TMA_Backend_Bound(%)", - "expression": "100 * ([PERF_METRICS.BACKEND_BOUND] / (([PERF_METRICS.BACKEND_BOUND] + [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING])) + ( 5 * [INT_MISC.RECOVERY_CYCLES_c1_e1]) / [TOPDOWN.SLOTS])" + "expression": "100 * ( [PERF_METRICS.BACKEND_BOUND] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) + ( ( 5 ) * [INT_MISC.RECOVERY_CYCLES:c1:e1] ) / ( [TOPDOWN.SLOTS] ) )" }, { "name": "metric_TMA_..Memory_Bound(%)", - "expression": "100 * ((([CYCLE_ACTIVITY.STALLS_MEM_ANY] + [EXE_ACTIVITY.BOUND_ON_STORES]) / ([CYCLE_ACTIVITY.STALLS_Total] + ([EXE_ACTIVITY.1_PORTS_UTIL] + ([PERF_METRICS.RETIRING] / (([PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND]))) * [EXE_ACTIVITY.2_PORTS_UTIL]) + [EXE_ACTIVITY.BOUND_ON_STORES])) * ([PERF_METRICS.BACKEND_BOUND] / (([PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND])) + (5 * [INT_MISC.RECOVERY_CYCLES_c1_e1]) / [TOPDOWN.SLOTS]))" + "expression": "100 * ( ( ( [CYCLE_ACTIVITY.STALLS_MEM_ANY] + [EXE_ACTIVITY.BOUND_ON_STORES] ) / ( [CYCLE_ACTIVITY.STALLS_TOTAL] + ( [EXE_ACTIVITY.1_PORTS_UTIL] + ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * [EXE_ACTIVITY.2_PORTS_UTIL] ) + [EXE_ACTIVITY.BOUND_ON_STORES] ) ) * ( [PERF_METRICS.BACKEND_BOUND] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) + ( ( 5 ) * [INT_MISC.RECOVERY_CYCLES:c1:e1] ) / ( [TOPDOWN.SLOTS] ) ) )" }, { "name": "metric_TMA_....L1_Bound(%)", - "expression": "100 * max((([CYCLE_ACTIVITY.STALLS_MEM_ANY] - [CYCLE_ACTIVITY.STALLS_L1D_MISS]) / [cpu-cycles]), 0)" + "expression": "100 * ( max( ( [CYCLE_ACTIVITY.STALLS_MEM_ANY] - [CYCLE_ACTIVITY.STALLS_L1D_MISS] ) / ( [cpu-cycles] ) , 0 ) )" }, { "name": "metric_TMA_......DTLB_Load(%)", - "expression": "100 * ((min((7 * [DTLB_LOAD_MISSES.STLB_HIT] + [DTLB_LOAD_MISSES.WALK_ACTIVE]), (max(([CYCLE_ACTIVITY.CYCLES_MEM_ANY] - [CYCLE_ACTIVITY.CYCLES_L1D_MISS]), 0))) / [cpu-cycles]) - ([DTLB_LOAD_MISSES.WALK_ACTIVE] / [cpu-cycles]))" + "expression": "100 * ( min( ( 7 ) * [DTLB_LOAD_MISSES.STLB_HIT:c1] + [DTLB_LOAD_MISSES.WALK_ACTIVE] , max( [CYCLE_ACTIVITY.CYCLES_MEM_ANY] - [CYCLE_ACTIVITY.CYCLES_L1D_MISS] , 0 ) ) / ( [cpu-cycles] ) )" }, { "name": "metric_TMA_......Store_Fwd_Blk(%)", - "expression": "100 * (13 * [LD_BLOCKS.STORE_FORWARD] / [cpu-cycles])" + "expression": "100 * ( min( ( 13 * [LD_BLOCKS.STORE_FORWARD] / ( [cpu-cycles] ) ) , ( 1 ) ) )" }, { "name": "metric_TMA_....L2_Bound(%)", - "expression": "100 * ((([MEM_LOAD_RETIRED.L2_HIT] * (1 + ([MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS]))) / (([MEM_LOAD_RETIRED.L2_HIT] * (1 + ([MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS]))) + [L1D_PEND_MISS.FB_FULL_PERIODS])) * (([CYCLE_ACTIVITY.STALLS_L1D_MISS] - [CYCLE_ACTIVITY.STALLS_L2_MISS]) / [cpu-cycles]))" + "expression": "100 * ( ( ( [MEM_LOAD_RETIRED.L2_HIT] * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) ) ) / ( ( [MEM_LOAD_RETIRED.L2_HIT] * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) ) ) + [L1D_PEND_MISS.FB_FULL_PERIODS] ) ) * ( ( [CYCLE_ACTIVITY.STALLS_L1D_MISS] - [CYCLE_ACTIVITY.STALLS_L2_MISS] ) / ( [cpu-cycles] ) ) )" }, { "name": "metric_TMA_....L3_Bound(%)", - "expression": "100 * (([CYCLE_ACTIVITY.STALLS_L2_MISS] - [CYCLE_ACTIVITY.STALLS_L3_Miss]) / [cpu-cycles])" + "expression": "100 * ( ( [CYCLE_ACTIVITY.STALLS_L2_MISS] - [CYCLE_ACTIVITY.STALLS_L3_MISS] ) / ( [cpu-cycles] ) )" }, { "name": "metric_TMA_......Contested_Accesses(%)", - "expression": "100 * ((((48 * (([cpu-cycles] / [ref-cycles]) * [SYSTEM_TSC_FREQ] / 1000000000)) -(4 * (([cpu-cycles] / [ref-cycles]) * [SYSTEM_TSC_FREQ] / 1000000000))) * ([MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD] * ([OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM] / ([OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM] + [OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD]))) + ((47.5 * (([cpu-cycles] / [ref-cycles]) * [SYSTEM_TSC_FREQ] / 1000000000)) - (4 * (([cpu-cycles] / [ref-cycles]) * [SYSTEM_TSC_FREQ] / 1000000000))) * [MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS]) * (1 + ([MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS]) / 2) / [cpu-cycles])" + "expression": "100 * ( min( ( ( ( ( 48 * ( ( ( [cpu-cycles] ) / [ref-cycles] ) * [SYSTEM_TSC_FREQ] / ( 1000000000 ) / ( 1000 / 1000 ) ) ) - ( 4 * ( ( ( [cpu-cycles] ) / [ref-cycles] ) * [SYSTEM_TSC_FREQ] / ( 1000000000 ) / ( 1000 / 1000 ) ) ) ) * ( [MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM] * ( [OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM] / ( [OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM] + [OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD] ) ) ) + ( ( 47.5 * ( ( ( [cpu-cycles] ) / [ref-cycles] ) * [SYSTEM_TSC_FREQ] / ( 1000000000 ) / ( 1000 / 1000 ) ) ) - ( 4 * ( ( ( [cpu-cycles] ) / [ref-cycles] ) * [SYSTEM_TSC_FREQ] / ( 1000000000 ) / ( 1000 / 1000 ) ) ) ) * ( [MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS] ) ) * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) / 2 ) / ( [cpu-cycles] ) ) , ( 1 ) ) )" }, { "name": "metric_TMA_......Data_Sharing(%)", - "expression": "100 * (((47.5 * (([cpu-cycles] / [ref-cycles]) * [SYSTEM_TSC_FREQ] / 1000000000)) - (4 * (([cpu-cycles] / [ref-cycles]) * [SYSTEM_TSC_FREQ] / 1000000000))) * ([MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD] + [MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD] * (1 - ([OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM] / ([OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM] + [OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD])))) * (1 + ([MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS]) / 2) / [cpu-cycles])" + "expression": "100 * ( min( ( ( ( 47.5 * ( ( ( [cpu-cycles] ) / [ref-cycles] ) * [SYSTEM_TSC_FREQ] / ( 1000000000 ) / ( 1000 / 1000 ) ) ) - ( 4 * ( ( ( [cpu-cycles] ) / [ref-cycles] ) * [SYSTEM_TSC_FREQ] / ( 1000000000 ) / ( 1000 / 1000 ) ) ) ) * ( [MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT] + [MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM] * ( 1 - ( [OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM] / ( [OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM] + [OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD] ) ) ) ) * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) / 2 ) / ( [cpu-cycles] ) ) , ( 1 ) ) )" }, { "name": "metric_TMA_......L3_Hit_Latency(%)", - "expression": "100 * (((23 * (([cpu-cycles] / [ref-cycles]) * [SYSTEM_TSC_FREQ] / 1000000000)) - (4 * (([cpu-cycles] / [ref-cycles]) * [SYSTEM_TSC_FREQ] / 1000000000))) * [MEM_LOAD_RETIRED.L3_HIT] * (1 + ([MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS]) / 2) / [cpu-cycles])" + "expression": "100 * ( min( ( ( ( 23 * ( ( ( [cpu-cycles] ) / [ref-cycles] ) * [SYSTEM_TSC_FREQ] / ( 1000000000 ) / ( 1000 / 1000 ) ) ) - ( 4 * ( ( ( [cpu-cycles] ) / [ref-cycles] ) * [SYSTEM_TSC_FREQ] / ( 1000000000 ) / ( 1000 / 1000 ) ) ) ) * ( [MEM_LOAD_RETIRED.L3_HIT] ) * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) / 2 ) / ( [cpu-cycles] ) ) , ( 1 ) ) )" }, { "name": "metric_TMA_......SQ_Full(%)", - "expression": "100 * ([L1D_PEND_MISS.L2_STALL] / [cpu-cycles])" + "expression": "100 * ( [L1D_PEND_MISS.L2_STALL] / ( [cpu-cycles] ) )" }, { "name": "metric_TMA_......MEM_Bandwidth(%)", - "expression": "100 * ((min(([cpu-cycles] - 0), ([OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD_c4] - 0))) / [cpu-cycles])" + "expression": "100 * ( ( min( [cpu-cycles] , [OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD:c4] ) ) / ( [cpu-cycles] ) )" }, { "name": "metric_TMA_......MEM_Latency(%)", - "expression": "100 * ((min(([cpu-cycles] - 0), ([OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD] - 0))) / [cpu-cycles] - ((min(([cpu-cycles] - 0), ([OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD_c4] - 0))) / [cpu-cycles]))" + "expression": "100 * ( ( min( [cpu-cycles] , [OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD] ) ) / ( [cpu-cycles] ) - ( ( min( [cpu-cycles] , [OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD:c4] ) ) / ( [cpu-cycles] ) ) )" }, { "name": "metric_TMA_....Store_Bound(%)", - "expression": "100 * ([EXE_ACTIVITY.BOUND_ON_STORES] / [cpu-cycles])" + "expression": "100 * ( [EXE_ACTIVITY.BOUND_ON_STORES] / ( [cpu-cycles] ) )" }, { "name": "metric_TMA_..Core_Bound(%)", - "expression": "100 * (max(0, (([PERF_METRICS.BACKEND_BOUND] / (([PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND])) + (5 * [INT_MISC.RECOVERY_CYCLES_c1_e1]) / [TOPDOWN.SLOTS]) - ((([CYCLE_ACTIVITY.CYCLES_MEM_ANY] + [EXE_ACTIVITY.BOUND_ON_STORES]) / ([CYCLE_ACTIVITY.STALLS_Total] + ([EXE_ACTIVITY.1_PORTS_UTIL] + ([PERF_METRICS.RETIRING] / (([PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND]))) * [EXE_ACTIVITY.2_PORTS_UTIL]) + [EXE_ACTIVITY.BOUND_ON_STORES])) * ([PERF_METRICS.BACKEND_BOUND] / (([PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND])) + (5 * [INT_MISC.RECOVERY_CYCLES_c1_e1]) / [TOPDOWN.SLOTS])))))" + "expression": "100 * ( max( 0 , ( [PERF_METRICS.BACKEND_BOUND] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) + ( ( 5 ) * [INT_MISC.RECOVERY_CYCLES:c1:e1] ) / ( [TOPDOWN.SLOTS] ) ) - ( ( ( [CYCLE_ACTIVITY.STALLS_MEM_ANY] + [EXE_ACTIVITY.BOUND_ON_STORES] ) / ( [CYCLE_ACTIVITY.STALLS_TOTAL] + ( [EXE_ACTIVITY.1_PORTS_UTIL] + ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * [EXE_ACTIVITY.2_PORTS_UTIL] ) + [EXE_ACTIVITY.BOUND_ON_STORES] ) ) * ( [PERF_METRICS.BACKEND_BOUND] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) + ( ( 5 ) * [INT_MISC.RECOVERY_CYCLES:c1:e1] ) / ( [TOPDOWN.SLOTS] ) ) ) ) )" }, { "name": "metric_TMA_....Divider(%)", - "expression": "100 * ([ARITH.DIVIDER_ACTIVE] / [cpu-cycles])" + "expression": "100 * ( [ARITH.DIVIDER_ACTIVE] / ( [cpu-cycles] ) )" }, { "name": "metric_TMA_....Ports_Utilization(%)", - "expression": "100 * ((([CYCLE_ACTIVITY.STALLS_Total] - [CYCLE_ACTIVITY.CYCLES_MEM_ANY] + ([EXE_ACTIVITY.1_PORTS_UTIL] + ([PERF_METRICS.RETIRING] / (([PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND]))) * [EXE_ACTIVITY.2_PORTS_UTIL])) / [cpu-cycles]) if ([ARITH.DIVIDER_ACTIVE] - 0) < ([CYCLE_ACTIVITY.STALLS_Total] - [CYCLE_ACTIVITY.CYCLES_MEM_ANY]) else (([EXE_ACTIVITY.1_PORTS_UTIL] + ([PERF_METRICS.RETIRING] / (([PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND]))) * [EXE_ACTIVITY.2_PORTS_UTIL]) / [cpu-cycles]))" + "expression": "100 * ( ( [EXE_ACTIVITY.3_PORTS_UTIL:u0x80] + ( [RESOURCE_STALLS.SCOREBOARD] / ( [cpu-cycles] ) ) * ( [CYCLE_ACTIVITY.STALLS_TOTAL] - [CYCLE_ACTIVITY.STALLS_MEM_ANY] ) + ( [EXE_ACTIVITY.1_PORTS_UTIL] + ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * [EXE_ACTIVITY.2_PORTS_UTIL] ) ) / ( [cpu-cycles] ) if ( [ARITH.DIVIDER_ACTIVE] < ( [CYCLE_ACTIVITY.STALLS_TOTAL] - [CYCLE_ACTIVITY.STALLS_MEM_ANY] ) ) else ( [EXE_ACTIVITY.1_PORTS_UTIL] + ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * [EXE_ACTIVITY.2_PORTS_UTIL] ) / ( [cpu-cycles] ) )" }, { "name": "metric_TMA_......Ports_Utilized_0(%)", - "expression": "100 * max(0, (([CYCLE_ACTIVITY.STALLS_Total] - [CYCLE_ACTIVITY.CYCLES_MEM_ANY]) / [cpu-cycles]))" + "expression": "100 * ( [EXE_ACTIVITY.3_PORTS_UTIL:u0x80] / ( [cpu-cycles] ) + ( [RESOURCE_STALLS.SCOREBOARD] / ( [cpu-cycles] ) ) * ( [CYCLE_ACTIVITY.STALLS_TOTAL] - [CYCLE_ACTIVITY.STALLS_MEM_ANY] ) / ( [cpu-cycles] ) )" }, { "name": "metric_TMA_......Ports_Utilized_1(%)", - "expression": "100 * ([EXE_ACTIVITY.1_PORTS_UTIL] / [cpu-cycles])" + "expression": "100 * ( [EXE_ACTIVITY.1_PORTS_UTIL] / ( [cpu-cycles] ) )" }, { "name": "metric_TMA_......Ports_Utilized_2(%)", - "expression": "100 * ([EXE_ACTIVITY.2_PORTS_UTIL] / [cpu-cycles])" + "expression": "100 * ( [EXE_ACTIVITY.2_PORTS_UTIL] / ( [cpu-cycles] ) )" }, { "name": "metric_TMA_......Ports_Utilized_3m(%)", - "expression": "100 * [UOPS_EXECUTED.CYCLES_GE_3] / [cpu-cycles]" + "expression": "100 * ( [UOPS_EXECUTED.CYCLES_GE_3] / ( [cpu-cycles] ) )" }, { "name": "metric_TMA_Retiring(%)", - "expression": "100 * ([PERF_METRICS.RETIRING] / ([PERF_METRICS.RETIRING] + [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.BACKEND_BOUND]))" + "expression": "100 * ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) )" }, { "name": "metric_TMA_..Light_Operations(%)", - "expression": "100 * (max(0, (([PERF_METRICS.RETIRING] / (([PERF_METRICS.RETIRING] + [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.BACKEND_BOUND]))) - ((((([PERF_METRICS.RETIRING] / (([PERF_METRICS.RETIRING] + [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.BACKEND_BOUND]))) * [TOPDOWN.SLOTS]) / [UOPS_ISSUED.ANY]) * [IDQ.MS_UOPS] / [TOPDOWN.SLOTS])))))" + "expression": "100 * ( max( 0 , ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) - ( ( ( ( ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * ( [TOPDOWN.SLOTS] ) ) / [UOPS_ISSUED.ANY] ) * [IDQ.MS_UOPS] / ( [TOPDOWN.SLOTS] ) ) + ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * ( [UOPS_DECODED.DEC0] - [UOPS_DECODED.DEC0:c1] ) / [IDQ.MITE_UOPS] ) ) )" }, { "name": "metric_TMA_..Heavy_Operations(%)", - "expression": "100 * ((((([PERF_METRICS.RETIRING] / (([PERF_METRICS.RETIRING] + [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.BACKEND_BOUND]))) * [TOPDOWN.SLOTS]) / [UOPS_ISSUED.ANY]) * [IDQ.MS_UOPS] / [TOPDOWN.SLOTS]))" + "expression": "100 * ( ( ( ( ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * ( [TOPDOWN.SLOTS] ) ) / [UOPS_ISSUED.ANY] ) * [IDQ.MS_UOPS] / ( [TOPDOWN.SLOTS] ) ) + ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * ( [UOPS_DECODED.DEC0] - [UOPS_DECODED.DEC0:c1] ) / [IDQ.MITE_UOPS] )" }, { "name": "metric_TMA_....Microcode_Sequencer(%)", - "expression": "100 * (((([PERF_METRICS.RETIRING] / (([PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND]))) * [TOPDOWN.SLOTS]) / [UOPS_ISSUED.ANY]) * [IDQ.MS_UOPS] / [TOPDOWN.SLOTS])" + "expression": "100 * ( ( ( ( [PERF_METRICS.RETIRING] / ( [PERF_METRICS.FRONTEND_BOUND] + [PERF_METRICS.BAD_SPECULATION] + [PERF_METRICS.RETIRING] + [PERF_METRICS.BACKEND_BOUND] ) ) * ( [TOPDOWN.SLOTS] ) ) / [UOPS_ISSUED.ANY] ) * [IDQ.MS_UOPS] / ( [TOPDOWN.SLOTS] ) )" }, { - "name": "metric_TMA_Info_CoreIPC", + "name": "metric_TMA_Info_Core_CoreIPC", "expression": "[instructions] / [CPU_CLK_UNHALTED.DISTRIBUTED]" }, { "name": "metric_TMA_Info_System_SMT_2T_Utilization", - "expression": "(1 - [CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE] / [CPU_CLK_UNHALTED.REF_DISTRIBUTED]) if [SOCKET_COUNT] > 1 else 0" + "expression": "1 - [CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE] / [CPU_CLK_UNHALTED.REF_DISTRIBUTED] if [SOCKET_COUNT] > 1 else 0" } ] \ No newline at end of file diff --git a/events/spr.txt b/events/spr.txt index b79903e..16bc52a 100644 --- a/events/spr.txt +++ b/events/spr.txt @@ -17,8 +17,9 @@ cpu/event=0x80,umask=0x04,period=500009,name='ICACHE_DATA.STALLS'/, cpu/event=0x83,umask=0x04,period=200003,name='ICACHE_TAG.STALLS'/, cpu/event=0xa3,umask=0x08,cmask=0x08,period=1000003,name='CYCLE_ACTIVITY.CYCLES_L1D_MISS'/, cpu/event=0xa3,umask=0x10,cmask=0x10,period=1000003,name='CYCLE_ACTIVITY.CYCLES_MEM_ANY'/, -cpu-cycles; - +cpu-cycles, +ref-cycles, +instructions; cpu/event=0x25,umask=0x1f,period=100003,name='L2_LINES_IN.ALL'/, cpu/event=0xd1,umask=0x10,period=100021,name='MEM_LOAD_RETIRED.L2_MISS'/, @@ -32,21 +33,17 @@ cpu/event=0x12,umask=0x0e,period=100003,name='DTLB_LOAD_MISSES.WALK_COMPLETED'/, cpu/event=0x12,umask=0x04,period=100003,name='DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M'/, cpu/event=0x13,umask=0x0e,period=100003,name='DTLB_STORE_MISSES.WALK_COMPLETED'/, cpu/event=0xd1,umask=0x02,period=200003,name='MEM_LOAD_RETIRED.L2_HIT'/, -cpu-cycles:k, -ref-cycles:k, -instructions:k; - -#C6 -cstate_core/c6-residency/; -cstate_pkg/c6-residency/; +cpu-cycles, +ref-cycles, +instructions; +# OFFCORE cpu/event=0x20,umask=0x08,cmask=0x01,period=1000003,name='OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD'/, cpu/event=0x20,umask=0x08,cmask=0x04,period=1000003,name='OFFCORE_REQUESTS_OUTSTANDING.DATA_RD:c4'/, cpu-cycles, ref-cycles, instructions; -#TMA related cpu/event=0x00,umask=0x04,period=10000003,name='TOPDOWN.SLOTS'/, cpu/event=0x00,umask=0x81,period=10000003,name='PERF_METRICS.BAD_SPECULATION'/, cpu/event=0x00,umask=0x83,period=10000003,name='PERF_METRICS.BACKEND_BOUND'/, @@ -55,18 +52,20 @@ cpu/event=0x00,umask=0x80,period=10000003,name='PERF_METRICS.RETIRING'/, cpu/event=0x00,umask=0x86,period=10000003,name='PERF_METRICS.FETCH_LATENCY'/, cpu/event=0x00,umask=0x87,period=10000003,name='PERF_METRICS.MEMORY_BOUND'/, cpu/event=0x00,umask=0x85,period=10000003,name='PERF_METRICS.BRANCH_MISPREDICTS'/, -cpu/event=0x00,umask=0x84,period=10000003,name='PERF_METRICS.HEAVY_OPERATIONS'/; - +cpu/event=0x00,umask=0x84,period=10000003,name='PERF_METRICS.HEAVY_OPERATIONS'/, cpu/event=0xad,umask=0x10,period=1000003,name='INT_MISC.UOP_DROPPING'/, cpu/event=0xad,umask=0x40,period=1000003,name='INT_MISC.UNKNOWN_BRANCH_CYCLES'/, cpu/event=0xa6,umask=0x21,cmask=0x05,period=2000003,name='EXE_ACTIVITY.BOUND_ON_LOADS'/, -cpu-cycles; +cpu-cycles, +ref-cycles, +instructions; cpu/event=0x47,umask=0x03,cmask=0x03,period=1000003,name='MEMORY_ACTIVITY.STALLS_L1D_MISS'/, cpu/event=0x12,umask=0x20,cmask=0x01,period=100003,name='DTLB_LOAD_MISSES.STLB_HIT:c1'/, cpu/event=0x12,umask=0x10,cmask=0x01,period=100003,name='DTLB_LOAD_MISSES.WALK_ACTIVE'/, cpu/event=0x47,umask=0x05,cmask=0x05,period=1000003,name='MEMORY_ACTIVITY.STALLS_L2_MISS'/, cpu-cycles, +ref-cycles, instructions; cpu/event=0x47,umask=0x09,cmask=0x09,period=1000003,name='MEMORY_ACTIVITY.STALLS_L3_MISS'/, @@ -74,6 +73,7 @@ cpu/event=0xa6,umask=0x40,cmask=0x02,period=1000003,name='EXE_ACTIVITY.BOUND_ON_ cpu/event=0xa6,umask=0x02,period=2000003,name='EXE_ACTIVITY.1_PORTS_UTIL'/, cpu/event=0xa6,umask=0x04,period=2000003,name='EXE_ACTIVITY.2_PORTS_UTIL'/, cpu-cycles, +ref-cycles, instructions; cpu/event=0x43,umask=0xfd,period=2000003,name='MEM_LOAD_COMPLETED.L1_MISS_ANY'/, @@ -81,50 +81,71 @@ cpu/event=0xa2,umask=0x02,period=2000003,name='RESOURCE_STALLS.SCOREBOARD'/, cpu/event=0xa6,umask=0x80,period=2000003,name='EXE_ACTIVITY.3_PORTS_UTIL:u0x80'/, cpu/event=0xa6,umask=0xc,period=2000003,name='EXE_ACTIVITY.2_PORTS_UTIL:u0xc'/, cpu-cycles, +ref-cycles, instructions; cpu/event=0xad,umask=0x80,period=500009,name='INT_MISC.CLEAR_RESTEER_CYCLES'/, cpu/event=0xb1,umask=0x01,cmask=0x03,period=2000003,name='UOPS_EXECUTED.CYCLES_GE_3'/, cpu/event=0x48,umask=0x01,period=1000003,name='L1D_PEND_MISS.PENDING'/, cpu/event=0x03,umask=0x88,period=100003,name='LD_BLOCKS.NO_SR'/, -cpu-cycles; +cpu-cycles, +ref-cycles, +instructions; +cpu/event=0x00,umask=0x04,period=10000003,name='TOPDOWN.SLOTS'/, cpu/event=0xc2,umask=0x04,period=2000003,name='UOPS_RETIRED.MS'/, cpu/event=0xec,umask=0x02,period=2000003,name='CPU_CLK_UNHALTED.DISTRIBUTED'/, cpu/event=0x47,umask=0x02,cmask=0x02,period=1000003,name='MEMORY_ACTIVITY.CYCLES_L1D_MISS'/, -cpu-cycles; +cpu-cycles, +ref-cycles, +instructions; cpu/event=0x3c,umask=0x02,period=25003,name='CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE'/, cpu/event=0x3c,umask=0x08,period=2000003,name='CPU_CLK_UNHALTED.REF_DISTRIBUTED'/, -cpu/event=0xb0,umask=0x09,cmask=0x01,period=1000003,name='ARITH.DIV_ACTIVE'/; +cpu/event=0xb0,umask=0x09,cmask=0x01,period=1000003,name='ARITH.DIV_ACTIVE'/, +cpu-cycles, +ref-cycles, +instructions; #offcore response cpu/event=0x2a,umask=0x01,offcore_rsp=0x104004477,name='OCR.READS_TO_CORE.LOCAL_DRAM'/, -cpu/event=0x2a,umask=0x01,offcore_rsp=0x730004477,name='OCR.READS_TO_CORE.REMOTE_DRAM'/; +cpu/event=0x2a,umask=0x01,offcore_rsp=0x730004477,name='OCR.READS_TO_CORE.REMOTE_DRAM'/, +cpu-cycles:k, +ref-cycles:k, +instructions:k; cpu/event=0x2a,umask=0x01,offcore_rsp=0x1030004477,name='OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM'/, -cpu/event=0x2a,umask=0x01,offcore_rsp=0x830004477,name='OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD'/; +cpu/event=0x2a,umask=0x01,offcore_rsp=0x830004477,name='OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD'/, +cpu-cycles, +ref-cycles, +instructions; cpu/event=0x2a,umask=0x01,offcore_rsp=0x84002380,name='OCR.HWPF_L3.L3_MISS_LOCAL'/, -cpu/event=0x2a,umask=0x01,offcore_rsp=0x90002380,name='OCR.HWPF_L3.REMOTE'/; +cpu/event=0x2a,umask=0x01,offcore_rsp=0x90002380,name='OCR.HWPF_L3.REMOTE'/, +cpu-cycles, +ref-cycles, +instructions; -#power related +#C6 +cstate_core/c6-residency/; +cstate_pkg/c6-residency/; + +#power power/energy-pkg/, power/energy-ram/; -#UPI related +#UPI upi/event=0x02,umask=0x0f,name='UNC_UPI_TxL_FLITS.ALL_DATA'/, upi/event=0x02,umask=0x97,name='UNC_UPI_TxL_FLITS.NON_DATA'/, upi/event=0x1,umask=0x0,name='UNC_UPI_CLOCKTICKS'/; -cha/event=0x35,umask=0xc80ffe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_CRD'/; - +#CHA (Cache) +cha/event=0x35,umask=0xc80ffe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_CRD'/, cha/event=0x35,umask=0xc8177e01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE'/, cha/event=0x36,umask=0xc8177e01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE'/; cha/event=0x35,umask=0xC816FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL'/, -cha/event=0x36,umask=0xc816fe01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL'/; - +cha/event=0x36,umask=0xc816fe01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL'/, cha/event=0x35,umask=0xC896FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL'/, cha/event=0x35,umask=0xC8977E01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE'/; @@ -133,13 +154,12 @@ cha/event=0x35,umask=0xc817fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD'/, cha/event=0x35,umask=0xc897fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF'/, cha/event=0x36,umask=0xC817fe01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD'/; - -#IO Bandwidth +#CHA (IO Bandwidth) cha/event=0x35,umask=0xc8f3ff04,name='UNC_CHA_TOR_INSERTS.IO_PCIRDCUR'/, cha/event=0x35,umask=0xCC43FF04,name='UNC_CHA_TOR_INSERTS.IO_ITOM'/, cha/event=0x35,umask=0xCD43FF04,name='UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR'/, cha/event=0x01,umask=0x00,name='UNC_CHA_CLOCKTICKS'/; -#memory read/writes +#IMC (memory read/writes) imc/event=0x05,umask=0xcf,name='UNC_M_CAS_COUNT.RD'/, imc/event=0x05,umask=0xf0,name='UNC_M_CAS_COUNT.WR'/; \ No newline at end of file diff --git a/perf-collect.py b/perf-collect.py index 36f0f2a..2247b74 100644 --- a/perf-collect.py +++ b/perf-collect.py @@ -198,15 +198,12 @@ def validate_file(fname): runmode.add_argument( "--socket", help="Collect for socket metrics", action="store_true" ) - parser.add_argument( - "-V", "--version", help="display version info", action="store_true" - ) parser.add_argument( "-m", "--muxinterval", type=int, default=10, - help="event mux interval in milli seconds, default=0 i.e. will use the system default", + help="event mux interval in milli seconds, default=10", ) parser.add_argument( "-o", @@ -218,6 +215,9 @@ def validate_file(fname): parser.add_argument( "-v", "--verbose", help="Display debugging information", action="store_true" ) + parser.add_argument( + "-V", "--version", help="display version info", action="store_true" + ) args = parser.parse_args() if args.version: @@ -228,16 +228,7 @@ def validate_file(fname): crash("Must run PerfSpect as root, please re-run") # disable nmi watchdog before collecting perf - nmi_watchdog = 0 - try: - with open("/proc/sys/kernel/nmi_watchdog", "r+") as f_nmi: - nmi_watchdog = f_nmi.read() - if int(nmi_watchdog) != 0: - f_nmi.write("0") - logging.info("nmi_watchdog disabled") - except FileNotFoundError: - pass - + nmi_watchdog = perf_helpers.disable_nmi_watchdog() initial_pmus = perf_helpers.pmu_contention_detect() interval = 1000 @@ -315,6 +306,7 @@ def validate_file(fname): or args.socket or not have_uncore ), + args.pid is not None or args.cid is not None, ) collection_type = "-a" if not args.thread and not args.socket else "-a -A" @@ -407,10 +399,8 @@ def validate_file(fname): os.chmod(args.outcsv, 0o666) # nosec # reset nmi_watchdog to what it was before running perfspect - with open("/proc/sys/kernel/nmi_watchdog", "w") as f_nmi: - if int(nmi_watchdog) != 0: - f_nmi.write(nmi_watchdog) - logging.info("nmi_watchdog re-enabled") + if nmi_watchdog != 0: + perf_helpers.enable_nmi_watchdog() perf_helpers.set_perf_event_mux_interval(True, 1, mux_intervals) diff --git a/perf-postprocess.py b/perf-postprocess.py index 2fc805b..7628725 100644 --- a/perf-postprocess.py +++ b/perf-postprocess.py @@ -300,7 +300,6 @@ def get_metadata_as_dict(meta_data_lines): meta_data["SOCKET_CORES"] = [] cores = ((line.split("\n")[0]).split(",")[1]).split(";")[:-1] meta_data["SOCKET_CORES"].append(cores) - return meta_data @@ -595,9 +594,17 @@ def generate_metrics( "MISSING EVENTS": set(), "MULTIPLE GROUPS": set(), } - + prev_time_slice = 0 for time_slice, item in time_slice_groups: - time_slice_df = time_slice_groups.get_group(time_slice) + time_slice_df = time_slice_groups.get_group(time_slice).copy() + # normalize by difference between current time slice and previous time slice + # this ensures that all our events are per-second, even if perf is collecting + # over a longer time slice + time_slice_float = float(time_slice) + time_slice_df["value"] = time_slice_df["value"] / ( + time_slice_float - prev_time_slice + ) + prev_time_slice = time_slice_float current_group_indx = 0 group_to_df = {} start_index = 0 @@ -620,133 +627,97 @@ def generate_metrics( metrics_results = {} for m in metrics: - non_constant_mertics = [] + non_constant_events = [] exp_to_evaluate = m["expression"] # substitute constants for event in m["events"]: - if ( - event.upper() in metadata["constants"] - ): # all constants are save in metadata in Uppercase + # replace constants + if event.upper() in metadata["constants"]: exp_to_evaluate = exp_to_evaluate.replace( "[" + event + "]", str(metadata["constants"][event.upper()]) ) else: - non_constant_mertics.append(event) - - # find a single group with the events - single_group = False - for g in group_to_event: - if set(non_constant_mertics) <= set( - group_to_event[g] - ): # if all events in metric m exist in group g - single_group = True - g_df = group_to_df[g] - expressions_to_evaluate = {} - for event in non_constant_mertics: + non_constant_events.append(event) + # find non-constant events in groups + remaining_events_to_find = list(non_constant_events) + expressions_to_evaluate = {} + passes = 0 + while len(remaining_events_to_find) > 0: + if ( + passes == 1 + and verbose + and m["name"] not in errors["MULTIPLE GROUPS"] + ): + errors["MULTIPLE GROUPS"].add(m["name"]) + logging.warning( + f'MULTIPLE GROUPS: metric "{m["name"]}", events "{set(non_constant_events)}"' + ) + passes += 1 + # find best group for remaining events + diff_size = sys.maxsize # big number + best_group = None + for group, events in group_to_event.items(): + ds = len(set(remaining_events_to_find) - set(events)) + if ds < diff_size and ds < len(set(remaining_events_to_find)): + diff_size = ds + best_group = group + if diff_size == 0: + break + if best_group is None: + break + for event in remaining_events_to_find[:]: + if event in group_to_event[best_group]: + remaining_events_to_find.remove(event) + g_df = group_to_df[best_group] event_df = g_df.loc[event] get_event_expression_from_group( - expressions_to_evaluate, event_df, exp_to_evaluate, event + expressions_to_evaluate, + event_df, + exp_to_evaluate, + event, ) - for instance in expressions_to_evaluate: - if ( - "[" in expressions_to_evaluate[instance] - or "]" in expressions_to_evaluate[instance] - ): - if verbose: - errors["MISSING DATA"].add(m["name"]) - log_skip_metric( - m, expressions_to_evaluate[instance], "MISSING DATA" - ) - continue # cannot evaluate expression, skipping - try: - result = str( - "{:.8f}".format( - simple_eval( - expressions_to_evaluate[instance], - functions={"min": min, "max": max}, - ) - ) - ) - except ZeroDivisionError: - if verbose: - errors["ZERO DIVISION"].add(m["name"]) - log_skip_metric( - m, - expressions_to_evaluate[instance], - "ZERO DIVISION", - ) - result = 0 - sub_txt = "" if instance == "sys" else "." + instance - metrics_results[m["name"] + sub_txt] = float(result) - break # no need to check other groups - if not single_group: - if verbose: - errors["MULTIPLE GROUPS"].add(m["name"]) - logging.warning('MULTIPLE GROUPS: metric "' + m["name"] + '"') - # get events from multiple groups - remaining_events_to_find = list(non_constant_mertics) - expressions_to_evaluate = {} - for event in non_constant_mertics: - for g in group_to_event: - if event in group_to_event[g]: - remaining_events_to_find.remove(event) - g_df = group_to_df[g] - event_df = g_df.loc[event] - get_event_expression_from_group( - expressions_to_evaluate, - event_df, - exp_to_evaluate, - event, + if len(remaining_events_to_find) == 0: # all events are found + # instance is either system, specific core, or specific socket + for instance in expressions_to_evaluate: + if ( + "[" in expressions_to_evaluate[instance] + or "]" in expressions_to_evaluate[instance] + ): + if verbose and m["name"] not in errors["MISSING DATA"]: + errors["MISSING DATA"].add(m["name"]) + log_skip_metric( + m, expressions_to_evaluate[instance], "MISSING DATA" ) - break # no need to check in other groups - - if len(remaining_events_to_find) == 0: # all events are found - for ( - instance - ) in ( - expressions_to_evaluate - ): # instance is either system, specific core, or specific socket - if ( - "[" in expressions_to_evaluate[instance] - or "]" in expressions_to_evaluate[instance] - ): - if verbose: - errors["MISSING DATA"].add(m["name"]) - log_skip_metric( - m, expressions_to_evaluate[instance], "MISSING DATA" - ) - continue - try: - result = str( - "{:.8f}".format( - simple_eval( - expressions_to_evaluate[instance], - functions={"min": min, "max": max}, - ) - ) + continue + try: + result = "{:.8f}".format( + simple_eval( + expressions_to_evaluate[instance], + functions={"min": min, "max": max}, ) - except ZeroDivisionError: - if verbose: - errors["ZERO DIVISION"].add(m["name"]) - log_skip_metric( - m, - expressions_to_evaluate[instance], - "ZERO DIVISION", - ) - result = 0 - sub_txt = "" if instance == "sys" else "." + instance - metrics_results[m["name"] + sub_txt] = float(result) - else: # some events are missing - if verbose: - logging.warning( - 'MISSING EVENTS: metric "' - + m["name"] - + '" events "' - + str(remaining_events_to_find) - + '"' ) - errors["MISSING EVENTS"].update(remaining_events_to_find) - continue # skip metric + except ZeroDivisionError: + if verbose and m["name"] not in errors["ZERO DIVISION"]: + errors["ZERO DIVISION"].add(m["name"]) + log_skip_metric( + m, + expressions_to_evaluate[instance], + "ZERO DIVISION", + ) + result = 0 + sub_txt = "" if instance == "sys" else "." + instance + metrics_results[m["name"] + sub_txt] = float(result) + else: # some events are missing + if verbose and m["name"] not in errors["MISSING EVENTS"]: + logging.warning( + 'MISSING EVENTS: metric "' + + m["name"] + + '" events "' + + str(remaining_events_to_find) + + '"' + ) + errors["MISSING EVENTS"].add(m["name"]) + continue # skip metric time_metrics_result[time_slice] = metrics_results time_series_df = pd.DataFrame(time_metrics_result) if verbose: diff --git a/similarity-analyzer/_version.txt b/similarity-analyzer/_version.txt new file mode 100644 index 0000000..1cc5f65 --- /dev/null +++ b/similarity-analyzer/_version.txt @@ -0,0 +1 @@ +1.1.0 \ No newline at end of file diff --git a/similarity-analyzer/dopca.py b/similarity-analyzer/dopca.py index 8bbdf0c..95daba3 100644 --- a/similarity-analyzer/dopca.py +++ b/similarity-analyzer/dopca.py @@ -138,7 +138,7 @@ def dopca(dataset, colnames, n_components, cols): # PCA analysis, Create PCA model #pca = PCA(n_components=n_components) #Limitation: If the n_components(no of workloads) are greater than num_val(the number of features), it will throw error. - n_components = len(num_val[0]) #Solution: To scale it for any number of workloads, generate PCAs equivalent to number of features/performance matrics (instead of number of workloads) that we have for each workload. + n_components = min(len(num_val), len(colnames)) #Solution: To scale it for any number of workloads, generate PCAs equivalent to number of features/performance matrics (instead of number of workloads) that we have for each workload. pca = PCA(n_components=n_components) # transform diff --git a/src/basic_stats.py b/src/basic_stats.py index 3d39279..75288ff 100644 --- a/src/basic_stats.py +++ b/src/basic_stats.py @@ -137,7 +137,7 @@ def get_stats_plot(input_file, arch): figure_to_column_dict["TMA"] = { "metrics_prefixes": [ "metric_TMA_Frontend_Bound(%)", - "metric_TMA_Backend_bound(%)", + "metric_TMA_Backend_Bound(%)", ], "Y_axis_text": "Percentage", "name_prefix": ["TMA_Frontend", "TMA_Backend"], diff --git a/src/perf_helpers.py b/src/perf_helpers.py index 4843295..f3ed434 100644 --- a/src/perf_helpers.py +++ b/src/perf_helpers.py @@ -149,6 +149,44 @@ def get_perf_event_mux_interval(): return mux_interval +# disable nmi watchdog and return its initial status +# to restore it after collection +def disable_nmi_watchdog(): + try: + proc_output = subprocess.check_output(["cat", "/proc/sys/kernel/nmi_watchdog"]) + nmi_watchdog_status = int(proc_output.decode().strip()) + if nmi_watchdog_status == 1: + proc_output = subprocess.check_output(["sysctl", "kernel.nmi_watchdog=0"]) + new_watchdog_status = int( + proc_output.decode().strip().replace("kernel.nmi_watchdog = ", "") + ) + if new_watchdog_status != 0: + crash("Failed to disable nmi watchdog!") + logging.info( + "nmi_watchdog is temporary disabled. Will enable after collection." + ) + else: + logging.info("nmi_watchdog disabled!") + return nmi_watchdog_status + except (subprocess.CalledProcessError, ValueError) as e: + crash(e.output + "\nFailed to disable nmi_watchdog.") + + +# enable nmi watchdog +def enable_nmi_watchdog(): + try: + proc_output = subprocess.check_output(["sysctl", "kernel.nmi_watchdog=1"]) + new_watchdog_status = int( + proc_output.decode().strip().replace("kernel.nmi_watchdog = ", "") + ) + if new_watchdog_status != 1: + logging.warning("Failed to re-enable nmi_watchdog!") + else: + logging.info("nmi_watchdog enabled!") + except (subprocess.CalledProcessError, ValueError) as e: + logging.warning(e.output + "\nFailed to re-enable nmi_watchdog!") + + # set/reset perf event mux interval for pmu events def set_perf_event_mux_interval(reset, interval_ms, mux_interval): for f in os.listdir("/sys/devices"): diff --git a/src/prepare_perf_events.py b/src/prepare_perf_events.py index a97d229..dd2230c 100644 --- a/src/prepare_perf_events.py +++ b/src/prepare_perf_events.py @@ -116,7 +116,7 @@ def get_cgroup_events_format(cgroups, events, num_events): return perf_format -def filter_events(event_file, cpu_only): +def filter_events(event_file, cpu_only, PID_CID_mode): if not os.path.isfile(event_file): crash("event file not found") collection_events = [] @@ -131,6 +131,8 @@ def filter_events(event_file, cpu_only): or (cpu_only and not is_cpu_event(line)) ): continue + if PID_CID_mode and line.startswith("cstate_"): + continue if not is_collectable_event(line, perf_list): # not a collectable event unsupported_events.append(line) @@ -149,14 +151,16 @@ def filter_events(event_file, cpu_only): return collection_events, unsupported_events -def prepare_perf_events(event_file, cpu_only): +def prepare_perf_events(event_file, cpu_only, PID_CID_mode): start_group = "'{" end_group = "}'" group = "" prev_group = "" new_group = True - collection_events, unsupported_events = filter_events(event_file, cpu_only) + collection_events, unsupported_events = filter_events( + event_file, cpu_only, PID_CID_mode + ) core_event = [] uncore_event = [] event_names = []