diff --git a/README.md b/README.md index 333f16e..a6397fa 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# PerfSpect · [![Build](https://github.com/intel/PerfSpect/actions/workflows/build.yml/badge.svg)](https://github.com/intel/PerfSpect/actions/workflows/build.yml)[![License](https://img.shields.io/badge/License-BSD--3-blue)](https://github.com/intel/PerfSpect/blob/master/LICENSE) +# PerfSpect · [![Build](https://github.com/intel/PerfSpect/actions/workflows/build.yml/badge.svg)](https://github.com/intel/PerfSpect/actions/workflows/build.yml)[![CodeQL](https://github.com/intel/PerfSpect/actions/workflows/codeql.yml/badge.svg)](https://github.com/intel/PerfSpect/actions/workflows/codeql.yml)[![License](https://img.shields.io/badge/License-BSD--3-blue)](https://github.com/intel/PerfSpect/blob/master/LICENSE) [Quick Start](#quick-start-requires-perf-installed) | [Output](#output) | [Requirements](#requirements) | [Build from source](#build-from-source) @@ -79,4 +79,4 @@ Requires recent python. On successful build, binaries will be created in `dist` ``` pip3 install -r requirements.txt make -``` \ No newline at end of file +``` diff --git a/_version.txt b/_version.txt index 3a3cd8c..1892b92 100644 --- a/_version.txt +++ b/_version.txt @@ -1 +1 @@ -1.3.1 +1.3.2 diff --git a/events/metric_bdx.json b/events/metric_bdx.json index df3176e..b39fa82 100644 --- a/events/metric_bdx.json +++ b/events/metric_bdx.json @@ -14,67 +14,97 @@ }, { "name": "metric_CPI", - "expression": "[cpu-cycles] / [instructions]" + "name-txn": "metric_cycles per txn", + "expression": "[cpu-cycles] / [instructions]", + "expression-txn": "[cpu-cycles] / [TXN]" }, { "name": "metric_kernel_CPI", + "name-txn": "metric_kernel_cycles per txn", "expression": "[cpu-cycles:k] / [instructions:k]", + "expression-txn": "[cpu-cycles:k] / [TXN]", "origin": "perfspect" }, { "name": "metric_locks retired per instr", + "name-txn": "metric_locks retired per txn", "expression": "[MEM_INST_RETIRED.LOCK_LOADS] / [instructions]", + "expression-txn": "[MEM_INST_RETIRED.LOCK_LOADS] / [TXN]", "origin": "perfmon website" }, { "name": "metric_L1D MPI (includes data+rfo w/ prefetches)", - "expression": "[L1D.REPLACEMENT] / [instructions]" + "name-txn": "metric_L1D misses per txn (includes data+rfo w/ prefetches)", + "expression": "[L1D.REPLACEMENT] / [instructions]", + "expression-txn": "[L1D.REPLACEMENT] / [TXN]" }, { "name": "metric_L1D demand data read hits per instr", - "expression": "[MEM_LOAD_UOPS_RETIRED.L1_HIT] / [instructions]" + "name-txn": "metric_L1D demand data read hits per txn", + "expression": "[MEM_LOAD_UOPS_RETIRED.L1_HIT] / [instructions]", + "expression-txn": "[MEM_LOAD_UOPS_RETIRED.L1_HIT] / [TXN]" }, { "name": "metric_L1-I code read misses (w/ prefetches) per instr", - "expression": "[L2_RQSTS.ALL_CODE_RD] / [instructions]" + "name-txn": "metric_L1-I code read misses (w/ prefetches) per txn", + "expression": "[L2_RQSTS.ALL_CODE_RD] / [instructions]", + "expression-txn": "[L2_RQSTS.ALL_CODE_RD] / [TXN]" }, { "name": "metric_L2 demand data read hits per instr", - "expression": "[MEM_LOAD_UOPS_RETIRED.L2_HIT] / [instructions]" + "name-txn": "metric_L2 demand data read hits per txn", + "expression": "[MEM_LOAD_UOPS_RETIRED.L2_HIT] / [instructions]", + "expression-txn": "[MEM_LOAD_UOPS_RETIRED.L2_HIT] / [txn]" }, { "name": "metric_L2 MPI (includes code+data+rfo w/ prefetches)", - "expression": "[L2_LINES_IN.ALL] / [instructions]" + "name-txn": "metric_L2 misses per txn (includes code+data+rfo w/ prefetches)", + "expression": "[L2_LINES_IN.ALL] / [instructions]", + "expression-txn": "[L2_LINES_IN.ALL] / [TXN]" }, { "name": "metric_L2 demand data read MPI", - "expression": "[MEM_LOAD_UOPS_RETIRED.L2_MISS] / [instructions]" + "name-txn": "metric_L2 demand data read misses per txn", + "expression": "[MEM_LOAD_UOPS_RETIRED.L2_MISS] / [instructions]", + "expression-txn": "[MEM_LOAD_UOPS_RETIRED.L2_MISS] / [TXN]" }, { "name": "metric_L2 demand code MPI", - "expression": "[L2_RQSTS.CODE_RD_MISS] / [instructions]" + "name-txn": "metric_L2 demand code misses per txn", + "expression": "[L2_RQSTS.CODE_RD_MISS] / [instructions]", + "expression-txn": "[L2_RQSTS.CODE_RD_MISS] / [TXN]" }, { "name": "metric_LLC MPI", + "name-txn": "metric_LLC misses per txn (includes code+data+rfo w/ prefetches)", "expression": "([UNC_C_TOR_INSERTS.MISS_OPCODE.0x180] + [UNC_C_TOR_INSERTS.MISS_OPCODE.0x181] + [UNC_C_TOR_INSERTS.MISS_OPCODE.0x182] + [UNC_C_TOR_INSERTS.MISS_OPCODE.0x190] + [UNC_C_TOR_INSERTS.MISS_OPCODE.0x191] + [UNC_C_TOR_INSERTS.MISS_OPCODE.0x192] - [UNC_C_TOR_INSERTS.MISS_OPCODE.tid.0x180]) / [instructions]", + "expression-txn": "([UNC_C_TOR_INSERTS.MISS_OPCODE.0x180] + [UNC_C_TOR_INSERTS.MISS_OPCODE.0x181] + [UNC_C_TOR_INSERTS.MISS_OPCODE.0x182] + [UNC_C_TOR_INSERTS.MISS_OPCODE.0x190] + [UNC_C_TOR_INSERTS.MISS_OPCODE.0x191] + [UNC_C_TOR_INSERTS.MISS_OPCODE.0x192] - [UNC_C_TOR_INSERTS.MISS_OPCODE.tid.0x180]) / [TXN]", "origin": "perfspect" }, { "name": "metric_LLC code read MPI (demand+prefetch)", - "expression": "([UNC_C_TOR_INSERTS.MISS_OPCODE.0x181] + [UNC_C_TOR_INSERTS.MISS_OPCODE.0x191]) / [instructions]" + "name-txn": "metric_LLC code read (demand+prefetch) misses per txn", + "expression": "([UNC_C_TOR_INSERTS.MISS_OPCODE.0x181] + [UNC_C_TOR_INSERTS.MISS_OPCODE.0x191]) / [instructions]", + "expression-txn": "([UNC_C_TOR_INSERTS.MISS_OPCODE.0x181] + [UNC_C_TOR_INSERTS.MISS_OPCODE.0x191]) / [TXN]" }, { "name": "metric_LLC data read MPI (demand+prefetch)", - "expression": "([UNC_C_TOR_INSERTS.MISS_OPCODE.0x182] + [UNC_C_TOR_INSERTS.MISS_OPCODE.0x192]) / [instructions]" + "name-txn": "metric_LLC data read (demand+prefetch) misses per txn", + "expression": "([UNC_C_TOR_INSERTS.MISS_OPCODE.0x182] + [UNC_C_TOR_INSERTS.MISS_OPCODE.0x192]) / [instructions]", + "expression-txn": "([UNC_C_TOR_INSERTS.MISS_OPCODE.0x182] + [UNC_C_TOR_INSERTS.MISS_OPCODE.0x192]) / [TXN]" }, { "name": "metric_LLC total HITM (per instr)", + "name-txn": "metric_LLC total HITM per txn (excludes LLC prefetches)", "expression": "[OCR.ALL_READS.L3_MISS.REMOTE_HITM] / [instructions]", + "expression-txn": "[OCR.ALL_READS.L3_MISS.REMOTE_HITM] / [TXN]", "origin": "perfspect" }, { "name": "metric_LLC total HIT clean line forwards (per instr)", + "name-txn": "metric_LLC total HIT clean line forwards per txn (excludes LLC prefetches)", "expression": "[OCR.ALL_READS.L3_MISS.REMOTE_HIT_FORWARD] / [instructions]", + "expression-txn": "[OCR.ALL_READS.L3_MISS.REMOTE_HIT_FORWARD] / [TXN]", "origin": "perfspect" }, { @@ -99,24 +129,34 @@ }, { "name": "metric_ITLB MPI", - "expression": "[ITLB_MISSES.WALK_COMPLETED] / [instructions]" + "name-txn": "metric_ITLB misses per txn", + "expression": "[ITLB_MISSES.WALK_COMPLETED] / [instructions]", + "expression-txn": "[ITLB_MISSES.WALK_COMPLETED] / [TXN]" }, { "name": "metric_ITLB large page MPI", - "expression": "[ITLB_MISSES.WALK_COMPLETED_2M_4M] / [instructions]" + "name-txn": "metric_ITLB large page misses per txn", + "expression": "[ITLB_MISSES.WALK_COMPLETED_2M_4M] / [instructions]", + "expression-txn": "[ITLB_MISSES.WALK_COMPLETED_2M_4M] / [TXN]" }, { "name": "metric_DTLB load MPI", - "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [instructions]" + "name-txn": "metric_DTLB load misses per txn", + "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [instructions]", + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [TXN]" }, { "name": "metric_DTLB 2MB large page load MPI", + "name-txn": "metric_DTLB 2MB large page load misses per txn", "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [instructions]", + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [TXN]", "origin": "perfspect" }, { "name": "metric_DTLB store MPI", - "expression": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [instructions]" + "name-txn": "metric_DTLB store misses per txn", + "expression": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [instructions]", + "expression-txn": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [TXN]" }, { "name": "metric_DTLB load miss latency (in core clks)", @@ -346,4 +386,4 @@ "expression": "100 * (([UOPS_RETIRED.RETIRE_SLOTS] / [UOPS_ISSUED.ANY]) * [IDQ.MS_UOPS] )/ (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]))", "origin": "perfspect" } -] \ No newline at end of file +] diff --git a/events/metric_icx.json b/events/metric_icx.json index 71a3920..1fd9f22 100644 --- a/events/metric_icx.json +++ b/events/metric_icx.json @@ -14,16 +14,22 @@ }, { "name": "metric_CPI", - "expression": "[cpu-cycles] / [instructions]" + "name-txn": "metric_cycles per txn", + "expression": "[cpu-cycles] / [instructions]", + "expression-txn": "[cpu-cycles] / [TXN]" }, { "name": "metric_kernel_CPI", + "name-txn": "metric_kernel_cycles per txn", "expression": "[cpu-cycles:k] / [instructions:k]", + "expression-txn": "[cpu-cycles:k] / [TXN]", "origin": "perfspect" }, { "name": "metric_IPC", + "name-txn": "metric_txn per cycles", "expression": "[instructions] / [cpu-cycles]", + "expression-txn": "[instructions] / [TXN]", "origin": "perfspect" }, { @@ -33,36 +39,52 @@ }, { "name": "metric_locks retired per instr", + "name-txn": "metric_locks retired per txn", "expression": "[MEM_INST_RETIRED.LOCK_LOADS] / [instructions]", + "expression-txn": "[MEM_INST_RETIRED.LOCK_LOADS] / [TXN]", "origin": "perfmon website" }, { "name": "metric_L1D MPI (includes data+rfo w/ prefetches)", - "expression": "[L1D.REPLACEMENT] / [instructions]" + "name-txn": "metric_L1D misses per txn (includes data+rfo w/ prefetches)", + "expression": "[L1D.REPLACEMENT] / [instructions]", + "expression-txn": "[L1D.REPLACEMENT] / [TXN]" }, { "name": "metric_L1D demand data read hits per instr", - "expression": "[MEM_LOAD_RETIRED.L1_HIT] / [instructions]" + "name-txn": "metric_L1D demand data read hits per txn", + "expression": "[MEM_LOAD_RETIRED.L1_HIT] / [instructions]", + "expression-txn": "[MEM_LOAD_RETIRED.L1_HIT] / [TXN]" }, { "name": "metric_L1-I code read misses (w/ prefetches) per instr", - "expression": "[L2_RQSTS.ALL_CODE_RD] / [instructions]" + "name-txn": "metric_L1I code read misses (includes prefetches) per txn", + "expression": "[L2_RQSTS.ALL_CODE_RD] / [instructions]", + "expression-txn": "[L2_RQSTS.ALL_CODE_RD] / [TXN]" }, { "name": "metric_L2 demand data read hits per instr", - "expression": "[MEM_LOAD_RETIRED.L2_HIT] / [instructions]" + "name-txn": "metric_L2 demand data read hits per txn", + "expression": "[MEM_LOAD_RETIRED.L2_HIT] / [instructions]", + "expression-txn": "[MEM_LOAD_RETIRED.L2_HIT] / [TXN]" }, { "name": "metric_L2 MPI (includes code+data+rfo w/ prefetches)", - "expression": "[L2_LINES_IN.ALL] / [instructions]" + "name-txn": "metric_L2 misses per txn (includes code+data+rfo w/ prefetches)", + "expression": "[L2_LINES_IN.ALL] / [instructions]", + "expression-txn": "[L2_LINES_IN.ALL] / [TXN]" }, { "name": "metric_L2 demand data read MPI", - "expression": "[MEM_LOAD_RETIRED.L2_MISS] / [instructions]" + "name-txn": "metric_L2 demand data read misses per txn", + "expression": "[MEM_LOAD_RETIRED.L2_MISS] / [instructions]", + "expression-txn": "[MEM_LOAD_RETIRED.L2_MISS] / [TXN]" }, { "name": "metric_L2 demand code MPI", - "expression": "[L2_RQSTS.CODE_RD_MISS] / [instructions]" + "name-txn": "metric_L2 demand code misses per txn", + "expression": "[L2_RQSTS.CODE_RD_MISS] / [instructions]", + "expression-txn": "[L2_RQSTS.CODE_RD_MISS] / [TXN]" }, { "name": "metric_Average LLC data read miss latency (in clks)", @@ -138,19 +160,27 @@ }, { "name": "metric_LLC code read MPI (demand+prefetch)", - "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_CRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF]) / [instructions]" + "name-txn": "metric_LLC code read (demand+prefetch) misses per txn", + "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_CRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF]) / [instructions]", + "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_CRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF]) / [TXN]" }, { "name": "metric_LLC data read MPI (demand+prefetch)", - "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [instructions]" + "name-txn": "metric_LLC data read (demand+prefetch) misses per txn", + "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [instructions]", + "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [TXN]" }, { "name": "metric_LLC total HITM (per instr) (excludes LLC prefetches)", - "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [instructions]" + "name-txn": "metric_LLC total HITM per txn (excludes LLC prefetches)", + "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [instructions]", + "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [TXN]" }, { "name": "metric_LLC total HIT clean line forwards (per instr) (excludes LLC prefetches)", - "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [instructions]" + "name-txn": "metric_LLC total HIT clean line forwards per txn (excludes LLC prefetches)", + "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [instructions]", + "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [TXN]" }, { "name": "metric_Average LLC demand data read miss latency (in ns)", @@ -166,19 +196,27 @@ }, { "name": "metric_ITLB (2nd level) MPI", - "expression": "[ITLB_MISSES.WALK_COMPLETED] / [instructions]" + "name-txn": "metric_ITLB (2nd level) misses per txn", + "expression": "[ITLB_MISSES.WALK_COMPLETED] / [instructions]", + "expression-txn": "[ITLB_MISSES.WALK_COMPLETED] / [TXN]" }, { "name": "metric_DTLB (2nd level) load MPI", - "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [instructions]" + "name-txn": "metric_DTLB (2nd level) load misses per txn", + "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [instructions]", + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [TXN]" }, { "name": "metric_DTLB (2nd level) 2MB large page load MPI", - "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [instructions]" + "name-txn": "metric_DTLB (2nd level) 2MB large page load misses per txn", + "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [instructions]", + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [TXN]" }, { "name": "metric_DTLB (2nd level) store MPI", - "expression": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [instructions]" + "name-txn": "metric_DTLB (2nd level) store misses per txn", + "expression": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [instructions]", + "expression-txn": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [TXN]" }, { "name": "metric_NUMA %_Reads addressed to local DRAM", @@ -356,4 +394,4 @@ "name": "metric_TMA_Info_System_SMT_2T_Utilization", "expression": "1 - [CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE] / [CPU_CLK_UNHALTED.REF_DISTRIBUTED] if [SOCKET_COUNT] > 1 else 0" } -] \ No newline at end of file +] diff --git a/events/metric_skx_clx.json b/events/metric_skx_clx.json index e56f777..5887e67 100644 --- a/events/metric_skx_clx.json +++ b/events/metric_skx_clx.json @@ -14,67 +14,97 @@ }, { "name": "metric_CPI", - "expression": "[cpu-cycles] / [instructions]" + "name-txn": "metric_cycles per txn", + "expression": "[cpu-cycles] / [instructions]", + "expression-txn": "[cpu-cycles] / [TXN]" }, { "name": "metric_kernel_CPI", + "name-txn": "metric_kernel_cycles per txn", "expression": "[cpu-cycles:k] / [instructions:k]", + "expression-txn": "[cpu-cycles:k] / [TXN]", "origin": "perfspect" }, { "name": "metric_locks retired per instr", + "name-txn": "metric_locks retired per txn", "expression": "[MEM_INST_RETIRED.LOCK_LOADS] / [instructions]", + "expression-txn": "[MEM_INST_RETIRED.LOCK_LOADS] / [TXN]", "origin": "perfmon website" }, { "name": "metric_L1D MPI (includes data+rfo w/ prefetches)", - "expression": "[L1D.REPLACEMENT] / [instructions]" + "name-txn": "metric_L1D misses per txn (includes data+rfo w/ prefetches)", + "expression": "[L1D.REPLACEMENT] / [instructions]", + "expression-txn": "[L1D.REPLACEMENT] / [TXN]" }, { "name": "metric_L1D demand data read hits per instr", - "expression": "[MEM_LOAD_RETIRED.L1_HIT] / [instructions]" + "name-txn": "metric_L1D demand data read hits per txn", + "expression": "[MEM_LOAD_RETIRED.L1_HIT] / [instructions]", + "expression-txn": "[MEM_LOAD_RETIRED.L1_HIT] / [TXN]" }, { "name": "metric_L1-I code read misses (w/ prefetches) per instr", - "expression": "[L2_RQSTS.ALL_CODE_RD] / [instructions]" + "name-txn": "metric_L1I code read misses (includes prefetches) per txn", + "expression": "[L2_RQSTS.ALL_CODE_RD] / [instructions]", + "expression-txn": "[L2_RQSTS.ALL_CODE_RD] / [TXN]" }, { "name": "metric_L2 demand data read hits per instr", - "expression": "[MEM_LOAD_RETIRED.L2_HIT] / [instructions]" + "name-txn": "metric_L2 demand data read hits per txn", + "expression": "[MEM_LOAD_RETIRED.L2_HIT] / [instructions]", + "expression-txn": "[MEM_LOAD_RETIRED.L2_HIT] / [TXN]" }, { "name": "metric_L2 MPI (includes code+data+rfo w/ prefetches)", - "expression": "[L2_LINES_IN.ALL] / [instructions]" + "name-txn": "metric_L2 misses per txn (includes code+data+rfo w/ prefetches)", + "expression": "[L2_LINES_IN.ALL] / [instructions]", + "expression-txn": "[L2_LINES_IN.ALL] / [TXN]" }, { "name": "metric_L2 demand data read MPI", - "expression": "[MEM_LOAD_RETIRED.L2_MISS] / [instructions]" + "name-txn": "metric_L2 demand data read misses per txn", + "expression": "[MEM_LOAD_RETIRED.L2_MISS] / [instructions]", + "exression-txn": "[MEM_LOAD_RETIRED.L2_MISS] / [TXN]" }, { "name": "metric_L2 demand code MPI", - "expression": "[L2_RQSTS.CODE_RD_MISS] / [instructions]" + "name-txn": "metric_L2 demand code misses per txn", + "expression": "[L2_RQSTS.CODE_RD_MISS] / [instructions]", + "expression-txn": "[L2_RQSTS.CODE_RD_MISS] / [TXN]" }, { "name": "metric_LLC MPI (includes code+data+rfo w/ prefetches)", + "name-txn": "metric_LLC misses per txn (includes code+data+rfo w/ prefetches)", "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS.0x12CC0233] + [UNC_CHA_TOR_INSERTS.IA_MISS.0x12D40433] + [UNC_CHA_TOR_INSERTS.IA_MISS.0x12C40033]) / [instructions]", + "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS.0x12CC0233] + [UNC_CHA_TOR_INSERTS.IA_MISS.0x12D40433] + [UNC_CHA_TOR_INSERTS.IA_MISS.0x12C40033]) / [TXN]", "origin": "perfspect" }, { "name": "metric_LLC code read MPI (demand+prefetch)", - "expression": "[UNC_CHA_TOR_INSERTS.IA_MISS.0x12CC0233] / [instructions]" + "name-txn": "metric_LLC code read (demand+prefetch) misses per txn", + "expression": "[UNC_CHA_TOR_INSERTS.IA_MISS.0x12CC0233] / [instructions]", + "expression-txn": "[UNC_CHA_TOR_INSERTS.IA_MISS.0x12CC0233] / [TXN]" }, { "name": "metric_LLC data read MPI (demand+prefetch)", - "expression": "[UNC_CHA_TOR_INSERTS.IA_MISS.0x12D40433] / [instructions]" + "name-txn": "metric_LLC data read (demand+prefetch) misses per txn", + "expression": "[UNC_CHA_TOR_INSERTS.IA_MISS.0x12D40433] / [instructions]", + "expression-txn": "[UNC_CHA_TOR_INSERTS.IA_MISS.0x12D40433] / [TXN]" }, { "name": "metric_LLC total HITM (per instr)", + "name-txn": "metric_LLC total HITM per txn (excludes LLC prefetches)", "expression": "[OCR.ALL_READS.L3_MISS.REMOTE_HITM] / [instructions]", + "expression-txn": "[OCR.ALL_READS.L3_MISS.REMOTE_HITM] / [TXN]", "origin": "perfspect" }, { "name": "metric_LLC total HIT clean line forwards (per instr)", + "name-txn": "metric_LLC total HIT clean line forwards per txn (excludes LLC prefetches)", "expression": "[OCR.ALL_READS.L3_MISS.REMOTE_HIT_FORWARD] / [instructions]", + "expression-txn": "[OCR.ALL_READS.L3_MISS.REMOTE_HIT_FORWARD] / [TXN]", "origin": "perfspect" }, { @@ -99,33 +129,47 @@ }, { "name": "metric_ITLB MPI", - "expression": "[ITLB_MISSES.WALK_COMPLETED] / [instructions]" + "name-txn": "metric_ITLB misses per txn", + "expression": "[ITLB_MISSES.WALK_COMPLETED] / [instructions]", + "expression-txn": "[ITLB_MISSES.WALK_COMPLETED] / [TXN]" }, { "name": "metric_ITLB large page MPI", - "expression": "[ITLB_MISSES.WALK_COMPLETED_2M_4M] / [instructions]" + "name-txn": "metric_ITLB large page misses per txn", + "expression": "[ITLB_MISSES.WALK_COMPLETED_2M_4M] / [instructions]", + "expression-txn": "[ITLB_MISSES.WALK_COMPLETED_2M_4M] / [TXN]" }, { "name": "metric_DTLB load MPI", - "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [instructions]" + "name-txn": "metric_DTLB load misses per txn", + "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [instructions]", + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [TXN]" }, { "name": "metric_DTLB 4KB page load MPI", + "name-txn": "metric_DTLB 4KB page load misses per txn", "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_4K] / [instructions]", + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED_4K] / [TXN]", "origin": "perfspect" }, { "name": "metric_DTLB 2MB large page load MPI", - "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [instructions]" + "name-txn": "metric_DTLB 2MB large page load misses per txn", + "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [instructions]", + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [TXN]" }, { "name": "metric_DTLB 1GB large page load MPI", + "name-txn": "metric_DTLB 1GB large page load misses per txn", "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_1G] / [instructions]", + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED_1G] / [TXN]", "origin": "perfspect" }, { "name": "metric_DTLB store MPI", - "expression": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [instructions]" + "name-txn": "metric_DTLB store misses per txn", + "expression": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [instructions]", + "expression-txn": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [TXN]" }, { "name": "metric_DTLB load miss latency (in core clks)", @@ -417,4 +461,4 @@ "expression": "100 * (([UOPS_RETIRED.RETIRE_SLOTS] / [UOPS_ISSUED.ANY]) * [IDQ.MS_UOPS] / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])))", "origin": "perfspect" } -] \ No newline at end of file +] diff --git a/events/metric_spr.json b/events/metric_spr.json index a1d1908..ff40466 100644 --- a/events/metric_spr.json +++ b/events/metric_spr.json @@ -14,16 +14,22 @@ }, { "name": "metric_CPI", - "expression": "[cpu-cycles] / [instructions]" + "name-txn": "metric_cycles per txn", + "expression": "[cpu-cycles] / [instructions]", + "expression-txn": "[cpu-cycles] / [TXN]" }, { "name": "metric_kernel_CPI", + "name-txn": "metric_kernel_cycles per txn", "expression": "[cpu-cycles:k] / [instructions:k]", + "expression-txn": "[cpu-cycles:k] / [TXN]", "origin": "perfspect" }, { "name": "metric_IPC", + "name-txn": "metric_txn per cycle", "expression": "[instructions] / [cpu-cycles]", + "expression-txn": "[TXN] / [cpu-cycles]", "origin": "perfspect" }, { @@ -33,36 +39,52 @@ }, { "name": "metric_locks retired per instr", + "name-txn": "metric_locks retired per txn", "expression": "[MEM_INST_RETIRED.LOCK_LOADS] / [instructions]", + "expression-txn": "[MEM_INST_RETIRED.LOCK_LOADS] / [TXN]", "origin": "perfmon website" }, { "name": "metric_L1D MPI (includes data+rfo w/ prefetches)", - "expression": "[L1D.REPLACEMENT] / [instructions]" + "name-txn": "metric_L1D misses per txn (includes data+rfo w/ prefetches)", + "expression": "[L1D.REPLACEMENT] / [instructions]", + "expression-txn": "[L1D.REPLACEMENT] / [TXN]" }, { "name": "metric_L1D demand data read hits per instr", - "expression": "[MEM_LOAD_RETIRED.L1_HIT] / [instructions]" + "name-txn": "metric_L1D demand data read hits per txn", + "expression": "[MEM_LOAD_RETIRED.L1_HIT] / [instructions]", + "expression-txn": "[MEM_LOAD_RETIRED.L1_HIT] / [TXN]" }, { "name": "metric_L1-I code read misses (w/ prefetches) per instr", - "expression": "[L2_RQSTS.ALL_CODE_RD] / [instructions]" + "name-txn": "metric_L1I code read misses (includes prefetches) per txn", + "expression": "[L2_RQSTS.ALL_CODE_RD] / [instructions]", + "expression-txn": "[L2_RQSTS.ALL_CODE_RD] / [TXN]" }, { "name": "metric_L2 demand data read hits per instr", - "expression": "[MEM_LOAD_RETIRED.L2_HIT] / [instructions]" + "name-txn": "metric_L2 demand data read hits per txn", + "expression": "[MEM_LOAD_RETIRED.L2_HIT] / [instructions]", + "expression-txn": "[MEM_LOAD_RETIRED.L2_HIT] / [TXN]" }, { "name": "metric_L2 MPI (includes code+data+rfo w/ prefetches)", - "expression": "[L2_LINES_IN.ALL] / [instructions]" + "name-txn": "metric_L2 misses per txn (includes code+data+rfo w/ prefetches)", + "expression": "[L2_LINES_IN.ALL] / [instructions]", + "expression-txn": "[L2_LINES_IN.ALL] / [TXN]" }, { "name": "metric_L2 demand data read MPI", - "expression": "[MEM_LOAD_RETIRED.L2_MISS] / [instructions]" + "name-txn": "metric_L2 demand data read misses per txn", + "expression": "[MEM_LOAD_RETIRED.L2_MISS] / [instructions]", + "expression-txn": "[MEM_LOAD_RETIRED.L2_MISS] / [TXN]" }, { "name": "metric_L2 demand code MPI", - "expression": "[L2_RQSTS.CODE_RD_MISS] / [instructions]" + "name-txn": "metric_L2 demand code misses per txn", + "expression": "[L2_RQSTS.CODE_RD_MISS] / [instructions]", + "expression-txn": "[L2_RQSTS.CODE_RD_MISS] / [TXN]" }, { "name": "metric_UPI Data transmit BW (MB/sec) (only data)", @@ -120,20 +142,28 @@ }, { "name": "metric_LLC code read MPI (demand+prefetch)", - "expression": "[UNC_CHA_TOR_INSERTS.IA_MISS_CRD] / [instructions]" + "name-txn": "metric_LLC code read (demand+prefetch) misses per txn", + "expression": "[UNC_CHA_TOR_INSERTS.IA_MISS_CRD] / [instructions]", + "expression-txn": "[UNC_CHA_TOR_INSERTS.IA_MISS_CRD] / [TXN]" }, { "name": "metric_LLC data read MPI (demand+prefetch)", - "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [instructions]" + "name-txn": "metric_LLC data read (demand+prefetch) misses per txn", + "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [instructions]", + "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [TXN]" }, { "name": "metric_LLC total HITM (per instr) (excludes LLC prefetches)", - "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [instructions]", + "name-txn": "metric_LLC total HITM per txn (excludes LLC prefetches)", + "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [instructions]", + "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [TXN]", "origin": "perfspect" }, { "name": "metric_LLC total HIT clean line forwards (per instr) (excludes LLC prefetches)", + "name-txn": "metric_LLC total HIT clean line forwards per txn (excludes LLC prefetches)", "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [instructions]", + "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [TXN]", "origin": "perfspect" }, { @@ -150,19 +180,27 @@ }, { "name": "metric_ITLB (2nd level) MPI", - "expression": "[ITLB_MISSES.WALK_COMPLETED] / [instructions]" + "name-txn": "metric_ITLB (2nd level) misses per txn", + "expression": "[ITLB_MISSES.WALK_COMPLETED] / [instructions]", + "expression-txn": "[ITLB_MISSES.WALK_COMPLETED] / [TXN]" }, { "name": "metric_DTLB (2nd level) load MPI", - "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [instructions]" + "name-txn": "metric_DTLB (2nd level) load misses per txn", + "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [instructions]", + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [TXN]" }, { "name": "metric_DTLB (2nd level) 2MB large page load MPI", - "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [instructions]" + "name-txn": "metric_DTLB (2nd level) 2MB large page load misses per txn", + "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [instructions]", + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [TXN]" }, { "name": "metric_DTLB (2nd level) store MPI", - "expression": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [instructions]" + "name-txn": "metric_DTLB (2nd level) store misses per txn", + "expression": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [instructions]", + "expression-txn": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [TXN]" }, { "name": "metric_NUMA %_Reads addressed to local DRAM", @@ -346,4 +384,4 @@ "expression": "(1 - [CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE] / [CPU_CLK_UNHALTED.REF_DISTRIBUTED]) if [SOCKET_COUNT] > 1 else 0", "origin": "perfspect" } -] \ No newline at end of file +] diff --git a/perf-postprocess.py b/perf-postprocess.py index 8a00e2b..904f8fa 100644 --- a/perf-postprocess.py +++ b/perf-postprocess.py @@ -93,6 +93,11 @@ def get_args(script_path): parser.add_argument( "--rawevents", help="save raw events in .csv format", action="store_true" ) + parser.add_argument( + "--pertxn", + type=int, + help="Generate per-transaction metrics using the provided transactions/sec.", + ) args = parser.parse_args() @@ -101,6 +106,13 @@ def get_args(script_path): print(perf_helpers.get_tool_version()) sys.exit() + # check number of transactions > 1 + if args.pertxn is not None: + if args.pertxn < 1: + crash("Number of transactions cannot be < 1" % args.outfile) + else: + args.outfile = args.outfile.replace(".csv", "_txn.csv") + # check rawfile argument is given if args.rawfile is None: crash("Missing raw file, please provide raw csv generated using perf-collect") @@ -228,10 +240,13 @@ def get_all_data_lines(input_file_path): # get_metadata -def get_metadata_as_dict(meta_data_lines): +def get_metadata_as_dict(meta_data_lines, txns=None): meta_data = {} meta_data["constants"] = {} meta_data["metadata"] = {} + if txns is not None: + meta_data["constants"]["TXN"] = txns + for line in meta_data_lines: if line.startswith("SYSTEM_TSC_FREQ"): meta_data["constants"]["SYSTEM_TSC_FREQ"] = ( @@ -420,7 +435,7 @@ def validate_file(fname): crash(str(fname) + " not accessible") -def get_metrics_formula(architecture): +def get_metrics_formula(architecture, txns=None): # get the metric file name based on architecture metric_file = get_metric_file_name(architecture) validate_file(metric_file) @@ -429,8 +444,12 @@ def get_metrics_formula(architecture): try: metrics = json.load(f_metric) for metric in metrics: + if txns is not None: + if "name-txn" in metric: + metric["name"] = metric["name-txn"] + if "expression-txn" in metric: + metric["expression"] = metric["expression-txn"] metric["events"] = re.findall(r"\[(.*?)\]", metric["expression"]) - return metrics except json.decoder.JSONDecodeError: crash("Invalid JSON, please provide a valid JSON as metrics file") @@ -598,7 +617,7 @@ def row(df, name): return "[]" -def write_html(time_series_df, perf_mode, out_file_path, meta_data): +def write_html(time_series_df, perf_mode, out_file_path, meta_data, pertxn=None): html_file = "base.html" if getattr(sys, "frozen", False): basepath = getattr(sys, "_MEIPASS", os.path.dirname(os.path.abspath(__file__))) @@ -614,6 +633,7 @@ def write_html(time_series_df, perf_mode, out_file_path, meta_data): # only show TMA if system-wide mode if perf_mode == Mode.System: + html = html.replace("TRANSACTIONS", str(pertxn is not None).lower()) time_series_df.index.name = "metrics" for metric in [ ["CPUUTIL", "metric_CPU utilization %"], @@ -630,7 +650,13 @@ def write_html(time_series_df, perf_mode, out_file_path, meta_data): ["TOTALDATA", "metric_memory bandwidth total (MB/sec)"], ["REMOTENUMA", "metric_NUMA %_Reads addressed to remote DRAM"], ]: - html = html.replace(metric[0], row(time_series_df, metric[1])) + new_metric = metric[1] + if pertxn is not None: + if "_CPI" in new_metric: + new_metric = new_metric.replace("_CPI", "_cycles per txn") + if " MPI" in new_metric: + new_metric = new_metric.replace(" MPI", " misses per txn") + html = html.replace(metric[0], row(time_series_df, new_metric)) avg = time_series_df.mean(numeric_only=True, axis=1).to_frame() html = html.replace( @@ -896,6 +922,7 @@ def generate_metrics( metadata, metrics, perf_mode, + pertxn=None, verbose=False, fail_postprocessing=False, ): @@ -970,7 +997,7 @@ def generate_metrics( generate_metrics_time_series(time_series_df, perf_mode, out_file_path) generate_metrics_averages(time_series_df, perf_mode, out_file_path) if perf_mode == Mode.System: - write_html(time_series_df, perf_mode, out_file_path, meta_data) + write_html(time_series_df, perf_mode, out_file_path, meta_data, pertxn) return @@ -1074,7 +1101,6 @@ def generate_raw_events(perf_data_df, out_file_path, perf_mode): args = get_args(script_path) input_file_path = args.rawfile out_file_path = args.outfile - # read all metadata, perf evernts, and perf data lines # Note: this might not be feasible for very large files meta_data_lines, perf_event_lines, perf_data_lines = get_all_data_lines( @@ -1082,7 +1108,7 @@ def generate_raw_events(perf_data_df, out_file_path, perf_mode): ) # parse metadata and get mode (system, socket, or CPU) - meta_data = get_metadata_as_dict(meta_data_lines) + meta_data = get_metadata_as_dict(meta_data_lines, args.pertxn) perf_mode = Mode.System if "PERSOCKET_MODE" in meta_data and meta_data["PERSOCKET_MODE"]: perf_mode = Mode.Socket @@ -1101,7 +1127,7 @@ def generate_raw_events(perf_data_df, out_file_path, perf_mode): perf_data_df = extract_dataframe(perf_data_lines, meta_data, perf_mode) # parse metrics expressions - metrics = get_metrics_formula(meta_data["constants"]["CONST_ARCH"]) + metrics = get_metrics_formula(meta_data["constants"]["CONST_ARCH"], args.pertxn) if args.rawevents: # generate raw events for system, socket and CPU generate_raw_events(perf_data_df, out_file_path, perf_mode) @@ -1125,6 +1151,7 @@ def generate_raw_events(perf_data_df, out_file_path, perf_mode): meta_data, metrics, perf_mode, + args.pertxn, args.verbose, args.fail_postprocessing, ) @@ -1139,6 +1166,7 @@ def generate_raw_events(perf_data_df, out_file_path, perf_mode): meta_data, metrics, perf_mode, + args.pertxn, args.verbose, args.fail_postprocessing, ) @@ -1151,6 +1179,7 @@ def generate_raw_events(perf_data_df, out_file_path, perf_mode): meta_data, metrics, Mode.System, + args.pertxn, args.verbose, args.fail_postprocessing, ) diff --git a/src/base.html b/src/base.html index 58880b4..7c58ac1 100644 --- a/src/base.html +++ b/src/base.html @@ -44,6 +44,7 @@ Snackbar, Link, Tabs, + TextField, Icon, Table, TableBody, @@ -168,6 +169,7 @@ }; const all_metrics = ALLMETRICS + const [current_metrics, setCurrent_metrics] = React.useState(JSON.parse(JSON.stringify(all_metrics))); const meta_data = METADATA const description = { "metric_CPU operating frequency (in GHz)": "CPU operating frequency (in GHz)", @@ -182,6 +184,16 @@ "metric_L2 demand code MPI": "Ratio of number of code read request missing L2 cache to the total number of completed instructions", "metric_LLC code read MPI (demand+prefetch)": "Ratio of number of code read requests missing last level core cache (includes demand w/ prefetches) to the total number of completed instructions", "metric_LLC data read MPI (demand+prefetch)": "Ratio of number of data read requests missing last level core cache (includes demand w/ prefetches) to the total number of completed instructions", + "metric_cycles per txn": "Cycles per transaction retired; indicating how much time each executed transaction took; in units of cycles.", + "metric_L1D misses per txn (includes data+rfo w/ prefetches)": "Ratio of number of requests missing L1 data cache (includes data+rfo w/ prefetches) to the total number of completed transactions", + "metric_L1D demand data read hits per txn": "Ratio of number of demand load requests hitting in L1 data cache to the total number of completed transactions", + "metric_L1-I code read misses (w/ prefetches) per txn": "Ratio of number of code read requests missing in L1 instruction cache (includes prefetches) to the total number of completed transactions", + "metric_L2 demand data read hits per txn": "Ratio of number of completed demand load requests hitting in L2 cache to the total number of completed transactions", + "metric_L2 misses per txn (includes code+data+rfo w/ prefetches)": "Ratio of number of requests missing L2 cache (includes code+data+rfo w/ prefetches) to the total number of completed transactions", + "metric_L2 demand data read misses per txn": "Ratio of number of completed data read request missing L2 cache to the total number of completed transactions", + "metric_L2 demand code misses per txn": "Ratio of number of code read request missing L2 cache to the total number of completed transactions", + "metric_LLC code read misses per txn (demand+prefetch)": "Ratio of number of code read requests missing last level core cache (includes demand w/ prefetches) to the total number of completed transactions", + "metric_LLC data read misses per txn (demand+prefetch)": "Ratio of number of data read requests missing last level core cache (includes demand w/ prefetches) to the total number of completed transactions", "metric_NUMA %_Reads addressed to local DRAM": "Memory read that miss the last level cache (LLC) addressed to local DRAM as a percentage of total memory read accesses, does not include LLC prefetches.", "metric_NUMA %_Reads addressed to remote DRAM": "Memory reads that miss the last level cache (LLC) addressed to remote DRAM as a percentage of total memory read accesses, does not include LLC prefetches.", "metric_uncore frequency GHz": "Uncore operating frequency in GHz", @@ -251,6 +263,8 @@ "metric_TMA_Info_System_SMT_2T_Utilization": "Fraction of cycles where both hardware Logical Processors were active", } + const transactions = TRANSACTIONS; + const base_line = { xAxis: { name: "time (s)" @@ -364,6 +378,28 @@ ] } + const diffreport = (e) => { + console.log(e) + var reader = new FileReader(); + + reader.onload = (e) => { + let new_metrics = JSON.parse(e.target.result.split("\n").filter(e => e.includes("const all_metrics ="))[0].split("const all_metrics =")[1]) + let copy = JSON.parse(JSON.stringify(current_metrics)) + for (const metric of copy) { + for (const other_metric of new_metrics) { + if (metric.metrics === other_metric.metrics) { + console.log(metric) + console.log(current_metrics) + metric.other = other_metric["0"] + } + } + } + setCurrent_metrics(copy) + }; + + reader.readAsText(e.target.files[0]); + } + return (