diff --git a/Makefile b/Makefile index d402199..0a1b0e1 100644 --- a/Makefile +++ b/Makefile @@ -58,7 +58,6 @@ dist/$(PACKAGE_EXTERNAL): build_dir build/libtsc build-public/collect build-publ cp build/$(BINARY_COLLECT) dist/$(BINARY_FINAL)/$(BINARY_COLLECT) cp build/$(BINARY_POSTPROCESS) dist/$(BINARY_FINAL)/$(BINARY_POSTPROCESS) cp LICENSE dist/$(BINARY_FINAL)/ - cp README.md dist/$(BINARY_FINAL)/README.md cd dist && tar -czf $(PACKAGE_EXTERNAL) $(BINARY_FINAL) cd dist && cp -r $(BINARY_FINAL) ../build/ rm -rf dist/$(BINARY_FINAL)/ diff --git a/README.md b/README.md index d0baa13..45ceb49 100644 --- a/README.md +++ b/README.md @@ -90,4 +90,4 @@ Requires recent python. On successful build, binaries will be created in `dist` ``` pip3 install -r requirements.txt make -``` +``` \ No newline at end of file diff --git a/_version.txt b/_version.txt index 31e5c84..d0149fe 100644 --- a/_version.txt +++ b/_version.txt @@ -1 +1 @@ -1.3.3 +1.3.4 diff --git a/events/bdx.txt b/events/bdx.txt index 7cc2025..2e1ffd8 100644 --- a/events/bdx.txt +++ b/events/bdx.txt @@ -3,8 +3,7 @@ # SPDX-License-Identifier: BSD-3-Clause ########################################################################################################### -# Broadwell event list (default, with extensive TMA collection) - +# Broadwell event list cpu/event=0xc2,umask=0x02,period=2000003,name='UOPS_RETIRED.RETIRE_SLOTS'/, cpu/event=0xc5,umask=0x00,name='BR_MISP_RETIRED.ALL_BRANCHES'/, cpu/event=0xc3,umask=0x01,name='MACHINE_CLEARS.COUNT'/, @@ -173,4 +172,3 @@ imc/event=0x04,umask=0x0c,name='UNC_M_CAS_COUNT.WR'/; #power related power/energy-pkg/, power/energy-ram/; - diff --git a/events/clx_skx.txt b/events/clx_skx.txt index ed2b2c5..9c2abbb 100644 --- a/events/clx_skx.txt +++ b/events/clx_skx.txt @@ -3,8 +3,7 @@ # SPDX-License-Identifier: BSD-3-Clause ########################################################################################################### -# Cascadelake event list (default, with extensive TMA collection) - +# Cascadelake event list #avx related power levels cpu/event=0x28,umask=0x07,period=200003,name='CORE_POWER.LVL0_TURBO_LICENSE'/, cpu/event=0x28,umask=0x18,period=200003,name='CORE_POWER.LVL1_TURBO_LICENSE'/, @@ -239,4 +238,4 @@ iio/event=0x83,umask=0x01,ch_mask=0x08,fc_mask=0x07,name='UNC_IIO_DATA_REQ_OF_CP upi/event=0x2,umask=0x0f,name='UNC_UPI_TxL_FLITS.ALL_DATA'/, upi/event=0x2,umask=0x97,name='UNC_UPI_TxL_FLITS.NON_DATA'/, upi/event=0x1,umask=0x0,name='UNC_UPI_CLOCKTICKS'/, -upi/event=0x21,umask=0x0,name='UNC_UPI_L1_POWER_CYCLES'/; +upi/event=0x21,umask=0x0,name='UNC_UPI_L1_POWER_CYCLES'/; \ No newline at end of file diff --git a/events/icx.txt b/events/icx.txt index 8a5cdef..590e01b 100644 --- a/events/icx.txt +++ b/events/icx.txt @@ -3,7 +3,7 @@ # SPDX-License-Identifier: BSD-3-Clause ########################################################################################################### -# Icelake event list (default) +# Icelake event list cpu/event=0x51,umask=0x01,period=100003,name='L1D.REPLACEMENT'/, cpu/event=0xd1,umask=0x01,period=1000003,name='MEM_LOAD_RETIRED.L1_HIT'/, cpu/event=0x24,umask=0xe4,period=200003,name='L2_RQSTS.ALL_CODE_RD'/, diff --git a/events/metric_bdx.json b/events/metric_bdx.json index b39fa82..921d8d9 100644 --- a/events/metric_bdx.json +++ b/events/metric_bdx.json @@ -29,7 +29,7 @@ "name": "metric_locks retired per instr", "name-txn": "metric_locks retired per txn", "expression": "[MEM_INST_RETIRED.LOCK_LOADS] / [instructions]", - "expression-txn": "[MEM_INST_RETIRED.LOCK_LOADS] / [TXN]", + "expression-txn": "[MEM_INST_RETIRED.LOCK_LOADS] / [TXN]", "origin": "perfmon website" }, { @@ -386,4 +386,4 @@ "expression": "100 * (([UOPS_RETIRED.RETIRE_SLOTS] / [UOPS_ISSUED.ANY]) * [IDQ.MS_UOPS] )/ (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]))", "origin": "perfspect" } -] +] \ No newline at end of file diff --git a/events/metric_icx.json b/events/metric_icx.json index 1fd9f22..9448c37 100644 --- a/events/metric_icx.json +++ b/events/metric_icx.json @@ -14,22 +14,22 @@ }, { "name": "metric_CPI", - "name-txn": "metric_cycles per txn", + "name-txn": "metric_cycles per txn", "expression": "[cpu-cycles] / [instructions]", - "expression-txn": "[cpu-cycles] / [TXN]" + "expression-txn": "[cpu-cycles] / [TXN]" }, { "name": "metric_kernel_CPI", - "name-txn": "metric_kernel_cycles per txn", + "name-txn": "metric_kernel_cycles per txn", "expression": "[cpu-cycles:k] / [instructions:k]", - "expression-txn": "[cpu-cycles:k] / [TXN]", + "expression-txn": "[cpu-cycles:k] / [TXN]", "origin": "perfspect" }, { "name": "metric_IPC", - "name-txn": "metric_txn per cycles", + "name-txn": "metric_txn per cycles", "expression": "[instructions] / [cpu-cycles]", - "expression-txn": "[instructions] / [TXN]", + "expression-txn": "[instructions] / [TXN]", "origin": "perfspect" }, { @@ -39,52 +39,52 @@ }, { "name": "metric_locks retired per instr", - "name-txn": "metric_locks retired per txn", + "name-txn": "metric_locks retired per txn", "expression": "[MEM_INST_RETIRED.LOCK_LOADS] / [instructions]", - "expression-txn": "[MEM_INST_RETIRED.LOCK_LOADS] / [TXN]", + "expression-txn": "[MEM_INST_RETIRED.LOCK_LOADS] / [TXN]", "origin": "perfmon website" }, { "name": "metric_L1D MPI (includes data+rfo w/ prefetches)", - "name-txn": "metric_L1D misses per txn (includes data+rfo w/ prefetches)", + "name-txn": "metric_L1D misses per txn (includes data+rfo w/ prefetches)", "expression": "[L1D.REPLACEMENT] / [instructions]", - "expression-txn": "[L1D.REPLACEMENT] / [TXN]" + "expression-txn": "[L1D.REPLACEMENT] / [TXN]" }, { "name": "metric_L1D demand data read hits per instr", - "name-txn": "metric_L1D demand data read hits per txn", + "name-txn": "metric_L1D demand data read hits per txn", "expression": "[MEM_LOAD_RETIRED.L1_HIT] / [instructions]", - "expression-txn": "[MEM_LOAD_RETIRED.L1_HIT] / [TXN]" + "expression-txn": "[MEM_LOAD_RETIRED.L1_HIT] / [TXN]" }, { "name": "metric_L1-I code read misses (w/ prefetches) per instr", - "name-txn": "metric_L1I code read misses (includes prefetches) per txn", + "name-txn": "metric_L1I code read misses (includes prefetches) per txn", "expression": "[L2_RQSTS.ALL_CODE_RD] / [instructions]", - "expression-txn": "[L2_RQSTS.ALL_CODE_RD] / [TXN]" + "expression-txn": "[L2_RQSTS.ALL_CODE_RD] / [TXN]" }, { "name": "metric_L2 demand data read hits per instr", - "name-txn": "metric_L2 demand data read hits per txn", + "name-txn": "metric_L2 demand data read hits per txn", "expression": "[MEM_LOAD_RETIRED.L2_HIT] / [instructions]", - "expression-txn": "[MEM_LOAD_RETIRED.L2_HIT] / [TXN]" + "expression-txn": "[MEM_LOAD_RETIRED.L2_HIT] / [TXN]" }, { "name": "metric_L2 MPI (includes code+data+rfo w/ prefetches)", - "name-txn": "metric_L2 misses per txn (includes code+data+rfo w/ prefetches)", + "name-txn": "metric_L2 misses per txn (includes code+data+rfo w/ prefetches)", "expression": "[L2_LINES_IN.ALL] / [instructions]", - "expression-txn": "[L2_LINES_IN.ALL] / [TXN]" + "expression-txn": "[L2_LINES_IN.ALL] / [TXN]" }, { "name": "metric_L2 demand data read MPI", - "name-txn": "metric_L2 demand data read misses per txn", + "name-txn": "metric_L2 demand data read misses per txn", "expression": "[MEM_LOAD_RETIRED.L2_MISS] / [instructions]", - "expression-txn": "[MEM_LOAD_RETIRED.L2_MISS] / [TXN]" + "expression-txn": "[MEM_LOAD_RETIRED.L2_MISS] / [TXN]" }, { "name": "metric_L2 demand code MPI", - "name-txn": "metric_L2 demand code misses per txn", + "name-txn": "metric_L2 demand code misses per txn", "expression": "[L2_RQSTS.CODE_RD_MISS] / [instructions]", - "expression-txn": "[L2_RQSTS.CODE_RD_MISS] / [TXN]" + "expression-txn": "[L2_RQSTS.CODE_RD_MISS] / [TXN]" }, { "name": "metric_Average LLC data read miss latency (in clks)", @@ -160,27 +160,27 @@ }, { "name": "metric_LLC code read MPI (demand+prefetch)", - "name-txn": "metric_LLC code read (demand+prefetch) misses per txn", + "name-txn": "metric_LLC code read (demand+prefetch) misses per txn", "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_CRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF]) / [instructions]", - "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_CRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF]) / [TXN]" + "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_CRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF]) / [TXN]" }, { "name": "metric_LLC data read MPI (demand+prefetch)", - "name-txn": "metric_LLC data read (demand+prefetch) misses per txn", + "name-txn": "metric_LLC data read (demand+prefetch) misses per txn", "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [instructions]", - "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [TXN]" + "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [TXN]" }, { "name": "metric_LLC total HITM (per instr) (excludes LLC prefetches)", - "name-txn": "metric_LLC total HITM per txn (excludes LLC prefetches)", + "name-txn": "metric_LLC total HITM per txn (excludes LLC prefetches)", "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [instructions]", - "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [TXN]" + "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [TXN]" }, { "name": "metric_LLC total HIT clean line forwards (per instr) (excludes LLC prefetches)", - "name-txn": "metric_LLC total HIT clean line forwards per txn (excludes LLC prefetches)", + "name-txn": "metric_LLC total HIT clean line forwards per txn (excludes LLC prefetches)", "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [instructions]", - "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [TXN]" + "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [TXN]" }, { "name": "metric_Average LLC demand data read miss latency (in ns)", @@ -196,27 +196,27 @@ }, { "name": "metric_ITLB (2nd level) MPI", - "name-txn": "metric_ITLB (2nd level) misses per txn", + "name-txn": "metric_ITLB (2nd level) misses per txn", "expression": "[ITLB_MISSES.WALK_COMPLETED] / [instructions]", - "expression-txn": "[ITLB_MISSES.WALK_COMPLETED] / [TXN]" + "expression-txn": "[ITLB_MISSES.WALK_COMPLETED] / [TXN]" }, { "name": "metric_DTLB (2nd level) load MPI", - "name-txn": "metric_DTLB (2nd level) load misses per txn", + "name-txn": "metric_DTLB (2nd level) load misses per txn", "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [instructions]", - "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [TXN]" + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [TXN]" }, { "name": "metric_DTLB (2nd level) 2MB large page load MPI", - "name-txn": "metric_DTLB (2nd level) 2MB large page load misses per txn", + "name-txn": "metric_DTLB (2nd level) 2MB large page load misses per txn", "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [instructions]", - "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [TXN]" + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [TXN]" }, { "name": "metric_DTLB (2nd level) store MPI", - "name-txn": "metric_DTLB (2nd level) store misses per txn", + "name-txn": "metric_DTLB (2nd level) store misses per txn", "expression": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [instructions]", - "expression-txn": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [TXN]" + "expression-txn": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [TXN]" }, { "name": "metric_NUMA %_Reads addressed to local DRAM", @@ -394,4 +394,4 @@ "name": "metric_TMA_Info_System_SMT_2T_Utilization", "expression": "1 - [CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE] / [CPU_CLK_UNHALTED.REF_DISTRIBUTED] if [SOCKET_COUNT] > 1 else 0" } -] +] \ No newline at end of file diff --git a/events/metric_skx_clx.json b/events/metric_skx_clx.json index 5887e67..4caf3fa 100644 --- a/events/metric_skx_clx.json +++ b/events/metric_skx_clx.json @@ -14,97 +14,97 @@ }, { "name": "metric_CPI", - "name-txn": "metric_cycles per txn", + "name-txn": "metric_cycles per txn", "expression": "[cpu-cycles] / [instructions]", - "expression-txn": "[cpu-cycles] / [TXN]" + "expression-txn": "[cpu-cycles] / [TXN]" }, { "name": "metric_kernel_CPI", - "name-txn": "metric_kernel_cycles per txn", + "name-txn": "metric_kernel_cycles per txn", "expression": "[cpu-cycles:k] / [instructions:k]", - "expression-txn": "[cpu-cycles:k] / [TXN]", + "expression-txn": "[cpu-cycles:k] / [TXN]", "origin": "perfspect" }, { "name": "metric_locks retired per instr", - "name-txn": "metric_locks retired per txn", + "name-txn": "metric_locks retired per txn", "expression": "[MEM_INST_RETIRED.LOCK_LOADS] / [instructions]", - "expression-txn": "[MEM_INST_RETIRED.LOCK_LOADS] / [TXN]", + "expression-txn": "[MEM_INST_RETIRED.LOCK_LOADS] / [TXN]", "origin": "perfmon website" }, { "name": "metric_L1D MPI (includes data+rfo w/ prefetches)", - "name-txn": "metric_L1D misses per txn (includes data+rfo w/ prefetches)", + "name-txn": "metric_L1D misses per txn (includes data+rfo w/ prefetches)", "expression": "[L1D.REPLACEMENT] / [instructions]", - "expression-txn": "[L1D.REPLACEMENT] / [TXN]" + "expression-txn": "[L1D.REPLACEMENT] / [TXN]" }, { "name": "metric_L1D demand data read hits per instr", - "name-txn": "metric_L1D demand data read hits per txn", + "name-txn": "metric_L1D demand data read hits per txn", "expression": "[MEM_LOAD_RETIRED.L1_HIT] / [instructions]", - "expression-txn": "[MEM_LOAD_RETIRED.L1_HIT] / [TXN]" + "expression-txn": "[MEM_LOAD_RETIRED.L1_HIT] / [TXN]" }, { "name": "metric_L1-I code read misses (w/ prefetches) per instr", - "name-txn": "metric_L1I code read misses (includes prefetches) per txn", + "name-txn": "metric_L1I code read misses (includes prefetches) per txn", "expression": "[L2_RQSTS.ALL_CODE_RD] / [instructions]", - "expression-txn": "[L2_RQSTS.ALL_CODE_RD] / [TXN]" + "expression-txn": "[L2_RQSTS.ALL_CODE_RD] / [TXN]" }, { "name": "metric_L2 demand data read hits per instr", - "name-txn": "metric_L2 demand data read hits per txn", + "name-txn": "metric_L2 demand data read hits per txn", "expression": "[MEM_LOAD_RETIRED.L2_HIT] / [instructions]", "expression-txn": "[MEM_LOAD_RETIRED.L2_HIT] / [TXN]" }, { "name": "metric_L2 MPI (includes code+data+rfo w/ prefetches)", - "name-txn": "metric_L2 misses per txn (includes code+data+rfo w/ prefetches)", + "name-txn": "metric_L2 misses per txn (includes code+data+rfo w/ prefetches)", "expression": "[L2_LINES_IN.ALL] / [instructions]", - "expression-txn": "[L2_LINES_IN.ALL] / [TXN]" + "expression-txn": "[L2_LINES_IN.ALL] / [TXN]" }, { "name": "metric_L2 demand data read MPI", - "name-txn": "metric_L2 demand data read misses per txn", + "name-txn": "metric_L2 demand data read misses per txn", "expression": "[MEM_LOAD_RETIRED.L2_MISS] / [instructions]", - "exression-txn": "[MEM_LOAD_RETIRED.L2_MISS] / [TXN]" + "exression-txn": "[MEM_LOAD_RETIRED.L2_MISS] / [TXN]" }, { "name": "metric_L2 demand code MPI", - "name-txn": "metric_L2 demand code misses per txn", + "name-txn": "metric_L2 demand code misses per txn", "expression": "[L2_RQSTS.CODE_RD_MISS] / [instructions]", - "expression-txn": "[L2_RQSTS.CODE_RD_MISS] / [TXN]" + "expression-txn": "[L2_RQSTS.CODE_RD_MISS] / [TXN]" }, { "name": "metric_LLC MPI (includes code+data+rfo w/ prefetches)", - "name-txn": "metric_LLC misses per txn (includes code+data+rfo w/ prefetches)", + "name-txn": "metric_LLC misses per txn (includes code+data+rfo w/ prefetches)", "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS.0x12CC0233] + [UNC_CHA_TOR_INSERTS.IA_MISS.0x12D40433] + [UNC_CHA_TOR_INSERTS.IA_MISS.0x12C40033]) / [instructions]", - "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS.0x12CC0233] + [UNC_CHA_TOR_INSERTS.IA_MISS.0x12D40433] + [UNC_CHA_TOR_INSERTS.IA_MISS.0x12C40033]) / [TXN]", + "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS.0x12CC0233] + [UNC_CHA_TOR_INSERTS.IA_MISS.0x12D40433] + [UNC_CHA_TOR_INSERTS.IA_MISS.0x12C40033]) / [TXN]", "origin": "perfspect" }, { "name": "metric_LLC code read MPI (demand+prefetch)", - "name-txn": "metric_LLC code read (demand+prefetch) misses per txn", - "expression": "[UNC_CHA_TOR_INSERTS.IA_MISS.0x12CC0233] / [instructions]", - "expression-txn": "[UNC_CHA_TOR_INSERTS.IA_MISS.0x12CC0233] / [TXN]" + "name-txn": "metric_LLC code read (demand+prefetch) misses per txn", + "expression": "[UNC_CHA_TOR_INSERTS.IA_MISS.0x12CC0233] / [instructions]", + "expression-txn": "[UNC_CHA_TOR_INSERTS.IA_MISS.0x12CC0233] / [TXN]" }, { "name": "metric_LLC data read MPI (demand+prefetch)", - "name-txn": "metric_LLC data read (demand+prefetch) misses per txn", + "name-txn": "metric_LLC data read (demand+prefetch) misses per txn", "expression": "[UNC_CHA_TOR_INSERTS.IA_MISS.0x12D40433] / [instructions]", - "expression-txn": "[UNC_CHA_TOR_INSERTS.IA_MISS.0x12D40433] / [TXN]" + "expression-txn": "[UNC_CHA_TOR_INSERTS.IA_MISS.0x12D40433] / [TXN]" }, { "name": "metric_LLC total HITM (per instr)", "name-txn": "metric_LLC total HITM per txn (excludes LLC prefetches)", "expression": "[OCR.ALL_READS.L3_MISS.REMOTE_HITM] / [instructions]", - "expression-txn": "[OCR.ALL_READS.L3_MISS.REMOTE_HITM] / [TXN]", + "expression-txn": "[OCR.ALL_READS.L3_MISS.REMOTE_HITM] / [TXN]", "origin": "perfspect" }, { "name": "metric_LLC total HIT clean line forwards (per instr)", - "name-txn": "metric_LLC total HIT clean line forwards per txn (excludes LLC prefetches)", + "name-txn": "metric_LLC total HIT clean line forwards per txn (excludes LLC prefetches)", "expression": "[OCR.ALL_READS.L3_MISS.REMOTE_HIT_FORWARD] / [instructions]", - "expression-txn": "[OCR.ALL_READS.L3_MISS.REMOTE_HIT_FORWARD] / [TXN]", + "expression-txn": "[OCR.ALL_READS.L3_MISS.REMOTE_HIT_FORWARD] / [TXN]", "origin": "perfspect" }, { @@ -129,47 +129,47 @@ }, { "name": "metric_ITLB MPI", - "name-txn": "metric_ITLB misses per txn", + "name-txn": "metric_ITLB misses per txn", "expression": "[ITLB_MISSES.WALK_COMPLETED] / [instructions]", - "expression-txn": "[ITLB_MISSES.WALK_COMPLETED] / [TXN]" + "expression-txn": "[ITLB_MISSES.WALK_COMPLETED] / [TXN]" }, { "name": "metric_ITLB large page MPI", - "name-txn": "metric_ITLB large page misses per txn", + "name-txn": "metric_ITLB large page misses per txn", "expression": "[ITLB_MISSES.WALK_COMPLETED_2M_4M] / [instructions]", - "expression-txn": "[ITLB_MISSES.WALK_COMPLETED_2M_4M] / [TXN]" + "expression-txn": "[ITLB_MISSES.WALK_COMPLETED_2M_4M] / [TXN]" }, { "name": "metric_DTLB load MPI", - "name-txn": "metric_DTLB load misses per txn", + "name-txn": "metric_DTLB load misses per txn", "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [instructions]", - "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [TXN]" + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [TXN]" }, { "name": "metric_DTLB 4KB page load MPI", - "name-txn": "metric_DTLB 4KB page load misses per txn", + "name-txn": "metric_DTLB 4KB page load misses per txn", "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_4K] / [instructions]", - "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED_4K] / [TXN]", + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED_4K] / [TXN]", "origin": "perfspect" }, { "name": "metric_DTLB 2MB large page load MPI", - "name-txn": "metric_DTLB 2MB large page load misses per txn", + "name-txn": "metric_DTLB 2MB large page load misses per txn", "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [instructions]", - "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [TXN]" + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [TXN]" }, { "name": "metric_DTLB 1GB large page load MPI", "name-txn": "metric_DTLB 1GB large page load misses per txn", "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_1G] / [instructions]", - "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED_1G] / [TXN]", + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED_1G] / [TXN]", "origin": "perfspect" }, { "name": "metric_DTLB store MPI", - "name-txn": "metric_DTLB store misses per txn", + "name-txn": "metric_DTLB store misses per txn", "expression": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [instructions]", - "expression-txn": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [TXN]" + "expression-txn": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [TXN]" }, { "name": "metric_DTLB load miss latency (in core clks)", @@ -461,4 +461,4 @@ "expression": "100 * (([UOPS_RETIRED.RETIRE_SLOTS] / [UOPS_ISSUED.ANY]) * [IDQ.MS_UOPS] / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])))", "origin": "perfspect" } -] +] \ No newline at end of file diff --git a/events/metric_spr.json b/events/metric_spr.json index ff40466..76e803e 100644 --- a/events/metric_spr.json +++ b/events/metric_spr.json @@ -14,22 +14,22 @@ }, { "name": "metric_CPI", - "name-txn": "metric_cycles per txn", + "name-txn": "metric_cycles per txn", "expression": "[cpu-cycles] / [instructions]", - "expression-txn": "[cpu-cycles] / [TXN]" + "expression-txn": "[cpu-cycles] / [TXN]" }, { "name": "metric_kernel_CPI", - "name-txn": "metric_kernel_cycles per txn", + "name-txn": "metric_kernel_cycles per txn", "expression": "[cpu-cycles:k] / [instructions:k]", - "expression-txn": "[cpu-cycles:k] / [TXN]", + "expression-txn": "[cpu-cycles:k] / [TXN]", "origin": "perfspect" }, { "name": "metric_IPC", - "name-txn": "metric_txn per cycle", + "name-txn": "metric_txn per cycle", "expression": "[instructions] / [cpu-cycles]", - "expression-txn": "[TXN] / [cpu-cycles]", + "expression-txn": "[TXN] / [cpu-cycles]", "origin": "perfspect" }, { @@ -39,52 +39,52 @@ }, { "name": "metric_locks retired per instr", - "name-txn": "metric_locks retired per txn", + "name-txn": "metric_locks retired per txn", "expression": "[MEM_INST_RETIRED.LOCK_LOADS] / [instructions]", - "expression-txn": "[MEM_INST_RETIRED.LOCK_LOADS] / [TXN]", + "expression-txn": "[MEM_INST_RETIRED.LOCK_LOADS] / [TXN]", "origin": "perfmon website" }, { "name": "metric_L1D MPI (includes data+rfo w/ prefetches)", - "name-txn": "metric_L1D misses per txn (includes data+rfo w/ prefetches)", - "expression": "[L1D.REPLACEMENT] / [instructions]", - "expression-txn": "[L1D.REPLACEMENT] / [TXN]" + "name-txn": "metric_L1D misses per txn (includes data+rfo w/ prefetches)", + "expression": "[L1D.REPLACEMENT] / [instructions]", + "expression-txn": "[L1D.REPLACEMENT] / [TXN]" }, { "name": "metric_L1D demand data read hits per instr", - "name-txn": "metric_L1D demand data read hits per txn", + "name-txn": "metric_L1D demand data read hits per txn", "expression": "[MEM_LOAD_RETIRED.L1_HIT] / [instructions]", - "expression-txn": "[MEM_LOAD_RETIRED.L1_HIT] / [TXN]" + "expression-txn": "[MEM_LOAD_RETIRED.L1_HIT] / [TXN]" }, { "name": "metric_L1-I code read misses (w/ prefetches) per instr", - "name-txn": "metric_L1I code read misses (includes prefetches) per txn", + "name-txn": "metric_L1I code read misses (includes prefetches) per txn", "expression": "[L2_RQSTS.ALL_CODE_RD] / [instructions]", - "expression-txn": "[L2_RQSTS.ALL_CODE_RD] / [TXN]" + "expression-txn": "[L2_RQSTS.ALL_CODE_RD] / [TXN]" }, { "name": "metric_L2 demand data read hits per instr", - "name-txn": "metric_L2 demand data read hits per txn", + "name-txn": "metric_L2 demand data read hits per txn", "expression": "[MEM_LOAD_RETIRED.L2_HIT] / [instructions]", - "expression-txn": "[MEM_LOAD_RETIRED.L2_HIT] / [TXN]" + "expression-txn": "[MEM_LOAD_RETIRED.L2_HIT] / [TXN]" }, { "name": "metric_L2 MPI (includes code+data+rfo w/ prefetches)", - "name-txn": "metric_L2 misses per txn (includes code+data+rfo w/ prefetches)", + "name-txn": "metric_L2 misses per txn (includes code+data+rfo w/ prefetches)", "expression": "[L2_LINES_IN.ALL] / [instructions]", "expression-txn": "[L2_LINES_IN.ALL] / [TXN]" }, { "name": "metric_L2 demand data read MPI", - "name-txn": "metric_L2 demand data read misses per txn", + "name-txn": "metric_L2 demand data read misses per txn", "expression": "[MEM_LOAD_RETIRED.L2_MISS] / [instructions]", - "expression-txn": "[MEM_LOAD_RETIRED.L2_MISS] / [TXN]" + "expression-txn": "[MEM_LOAD_RETIRED.L2_MISS] / [TXN]" }, { "name": "metric_L2 demand code MPI", - "name-txn": "metric_L2 demand code misses per txn", + "name-txn": "metric_L2 demand code misses per txn", "expression": "[L2_RQSTS.CODE_RD_MISS] / [instructions]", - "expression-txn": "[L2_RQSTS.CODE_RD_MISS] / [TXN]" + "expression-txn": "[L2_RQSTS.CODE_RD_MISS] / [TXN]" }, { "name": "metric_UPI Data transmit BW (MB/sec) (only data)", @@ -142,28 +142,28 @@ }, { "name": "metric_LLC code read MPI (demand+prefetch)", - "name-txn": "metric_LLC code read (demand+prefetch) misses per txn", + "name-txn": "metric_LLC code read (demand+prefetch) misses per txn", "expression": "[UNC_CHA_TOR_INSERTS.IA_MISS_CRD] / [instructions]", - "expression-txn": "[UNC_CHA_TOR_INSERTS.IA_MISS_CRD] / [TXN]" + "expression-txn": "[UNC_CHA_TOR_INSERTS.IA_MISS_CRD] / [TXN]" }, { "name": "metric_LLC data read MPI (demand+prefetch)", - "name-txn": "metric_LLC data read (demand+prefetch) misses per txn", + "name-txn": "metric_LLC data read (demand+prefetch) misses per txn", "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [instructions]", - "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [TXN]" + "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [TXN]" }, { "name": "metric_LLC total HITM (per instr) (excludes LLC prefetches)", "name-txn": "metric_LLC total HITM per txn (excludes LLC prefetches)", - "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [instructions]", - "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [TXN]", + "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [instructions]", + "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [TXN]", "origin": "perfspect" }, { "name": "metric_LLC total HIT clean line forwards (per instr) (excludes LLC prefetches)", - "name-txn": "metric_LLC total HIT clean line forwards per txn (excludes LLC prefetches)", + "name-txn": "metric_LLC total HIT clean line forwards per txn (excludes LLC prefetches)", "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [instructions]", - "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [TXN]", + "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [TXN]", "origin": "perfspect" }, { @@ -180,27 +180,27 @@ }, { "name": "metric_ITLB (2nd level) MPI", - "name-txn": "metric_ITLB (2nd level) misses per txn", + "name-txn": "metric_ITLB (2nd level) misses per txn", "expression": "[ITLB_MISSES.WALK_COMPLETED] / [instructions]", - "expression-txn": "[ITLB_MISSES.WALK_COMPLETED] / [TXN]" + "expression-txn": "[ITLB_MISSES.WALK_COMPLETED] / [TXN]" }, { "name": "metric_DTLB (2nd level) load MPI", - "name-txn": "metric_DTLB (2nd level) load misses per txn", + "name-txn": "metric_DTLB (2nd level) load misses per txn", "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [instructions]", - "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [TXN]" + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [TXN]" }, { "name": "metric_DTLB (2nd level) 2MB large page load MPI", - "name-txn": "metric_DTLB (2nd level) 2MB large page load misses per txn", + "name-txn": "metric_DTLB (2nd level) 2MB large page load misses per txn", "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [instructions]", - "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [TXN]" + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [TXN]" }, { "name": "metric_DTLB (2nd level) store MPI", "name-txn": "metric_DTLB (2nd level) store misses per txn", "expression": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [instructions]", - "expression-txn": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [TXN]" + "expression-txn": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [TXN]" }, { "name": "metric_NUMA %_Reads addressed to local DRAM", @@ -384,4 +384,4 @@ "expression": "(1 - [CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE] / [CPU_CLK_UNHALTED.REF_DISTRIBUTED]) if [SOCKET_COUNT] > 1 else 0", "origin": "perfspect" } -] +] \ No newline at end of file diff --git a/events/spr.txt b/events/spr.txt index 2a517aa..5702d1d 100644 --- a/events/spr.txt +++ b/events/spr.txt @@ -3,7 +3,7 @@ # SPDX-License-Identifier: BSD-3-Clause ########################################################################################################### -# SapphireRapids event list (default) +# SapphireRapids event list cpu/event=0x51,umask=0x01,period=100003,name='L1D.REPLACEMENT'/, cpu/event=0x24,umask=0xe4,period=200003,name='L2_RQSTS.ALL_CODE_RD'/, diff --git a/src/prepare_perf_events.py b/src/prepare_perf_events.py index d51e028..5ac21f7 100644 --- a/src/prepare_perf_events.py +++ b/src/prepare_perf_events.py @@ -63,6 +63,8 @@ def is_cpu_event(line): if ( (len(tmp_list) == 1 or tmp_list[0] == "cpu" or tmp_list[0].startswith("cstate")) and "OCR." not in line + and "uops_retired.ms" not in line + and "int_misc.unknown_branch_cycles" not in line and "power/" not in line ): return True @@ -119,30 +121,43 @@ def filter_events(event_file, cpu_only, PID_CID_mode, TMA_supported): collection_events = [] unsupported_events = [] perf_list = helper.get_perf_list() + seperate_cycles = [] + if cpu_only: + # since most CSP's hide cycles fixed PMU inside their VM's we put it in its own group + seperate_cycles = [ + "cpu-cycles,", + "cpu-cycles:k,", + "ref-cycles,", + "instructions;", + ] + + def process(line): + line = line.strip() + if line == "" or line.startswith("#") or (cpu_only and not is_cpu_event(line)): + return + if PID_CID_mode and line.startswith("cstate_"): + return + if not TMA_supported and ( + "name='TOPDOWN.SLOTS'" in line or "name='PERF_METRICS." in line + ): + return + if not is_collectable_event(line, perf_list): + # not a collectable event + unsupported_events.append(line) + # if this is the last event in the group, mark the previous event as the last (with a ';') + if line.endswith(";") and len(collection_events) > 1: + end_event = collection_events[-1] + collection_events[-1] = end_event[:-1] + ";" + else: + collection_events.append(line) + with open(event_file, "r") as fin: for line in fin: - line = line.strip() - if ( - line == "" - or line.startswith("#") - or (cpu_only and not is_cpu_event(line)) - ): - continue - if PID_CID_mode and line.startswith("cstate_"): + if cpu_only and "cpu-cycles" in line: continue - if not TMA_supported and ( - "name='TOPDOWN.SLOTS'" in line or "name='PERF_METRICS." in line - ): - continue - if not is_collectable_event(line, perf_list): - # not a collectable event - unsupported_events.append(line) - # if this is the last event in the group, mark the previous event as the last (with a ';') - if line.endswith(";") and len(collection_events) > 1: - end_event = collection_events[-1] - collection_events[-1] = end_event[:-1] + ";" - else: - collection_events.append(line) + process(line) + for line in seperate_cycles: + process(line) if len(unsupported_events) > 0: logging.warning( f"Perf unsupported events not counted: {unsupported_events}"