From 1dfc3f93e179c037bdf6f6ceaee07c00b22fc2aa Mon Sep 17 00:00:00 2001 From: ashraf mahgoub Date: Fri, 7 Apr 2023 12:49:25 -0700 Subject: [PATCH] Perfspect updates April, 07, 2023 (#25) --- README.md | 113 +- _version.txt | 2 +- events/icx.txt | 7 + events/metric_bdx.json | 78 +- events/metric_icx.json | 2 +- events/metric_skx_clx.json | 78 +- perf-collect.py | 64 +- perf-postprocess.py | 2259 +++++++++++++----------------------- src/basic_stats.py | 11 +- src/common.py | 9 + src/icicle.py | 9 +- src/perf_helpers.py | 25 +- src/report.py | 41 +- 13 files changed, 976 insertions(+), 1722 deletions(-) create mode 100644 src/common.py diff --git a/README.md b/README.md index b431fc3..fe9aa4a 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,27 @@ # PerfSpect · [![Build](https://github.com/intel/PerfSpect/actions/workflows/build.yml/badge.svg)](https://github.com/intel/PerfSpect/actions/workflows/build.yml)[![License](https://img.shields.io/badge/License-BSD--3-blue)](https://github.com/intel/PerfSpect/blob/master/LICENSE) -[Quick Start](#quick-start-requires-perf-installed) | [Requirements](#requirements) | [Build from source](#build-from-source) | [Collection](#collection) | [Post-processing](#post-processing) | [Caveats](#caveats) | [How to contribute](#how-to-contribute) +[Quick Start](#quick-start-requires-perf-installed) | [Requirements](#requirements) | [Build from source](#build-from-source) | [Caveats](#caveats) | [How to contribute](#how-to-contribute) -PerfSpect is a system performance characterization tool based on linux perf targeting Intel microarchitectures. -The tool has two parts +PerfSpect is a system performance characterization tool built on top of linux perf. It contains two parts -1. perf collection to collect underlying PMU (Performance Monitoring Unit) counters -2. post processing that generates csv output of performance metrics. +perf-collect: Collects harware events -### Quick start (requires perf installed) +- Collection mode: + - `sudo ./perf-collect` _default system wide_ + - `sudo ./perf-collect --socket` + - `sudo ./perf-collect --thread` + - `sudo ./perf-collect --pid ` + - `sudo ./perf-collect --cid ;` +- Duration: + - `sudo ./perf-collect` _default run until terminated_ + - `sudo ./perf-collect --timeout 10` _run for 10 seconds_ + - `sudo ./perf-collect --app "myapp.sh myparameter"` _runs for duration of another process_ + +perf-postprocess: Calculates high level metrics from hardware events + +- `perf-postprocess -r results/perfstat.csv` + +## Quick start (requires perf installed) ``` wget -qO- https://github.com/intel/PerfSpect/releases/latest/download/perfspect.tgz | tar xvz @@ -17,7 +30,7 @@ sudo ./perf-collect --timeout 10 sudo ./perf-postprocess -r results/perfstat.csv --html perfstat.html ``` -### Deploy in Kubernetes +## Deploy in Kubernetes Modify the template [deamonset.yml](docs/daemonset.yml) to deploy in kubernetes @@ -52,97 +65,13 @@ _Note: PerfSpect may work on other Linux distributions, but has not been thoroug ## Build from source -Requires recent python +Requires recent python. On successful build, binaries will be created in `dist` folder ``` pip3 install -r requirements.txt make ``` -On successful build, binaries will be created in `dist` folder - -## Collection: - -``` -(sudo) ./perf-collect (options) -- Some options can be used only with root privileges - -usage: perf-collect [-h] [-t TIMEOUT | -a APP] - [-p PID | -c CID | --thread | --socket] [-V] [-i INTERVAL] - [-m MUXINTERVAL] [-o OUTCSV] [-v] - -optional arguments: - -h, --help show this help message and exit - -t TIMEOUT, --timeout TIMEOUT - perf event collection time - -a APP, --app APP Application to run with perf-collect, perf collection - ends after workload completion - -p PID, --pid PID perf-collect on selected PID(s) - -c CID, --cid CID perf-collect on selected container ids - --thread Collect for thread metrics - --socket Collect for socket metrics - -V, --version display version info - -i INTERVAL, --interval INTERVAL - interval in seconds for time series dump, default=1 - -m MUXINTERVAL, --muxinterval MUXINTERVAL - event mux interval in milli seconds, default=0 i.e. - will use the system default - -o OUTCSV, --outcsv OUTCSV - perf stat output in csv format, - default=results/perfstat.csv - -v, --verbose Display debugging information -``` - -### Examples - -1. sudo ./perf-collect (collect PMU counters using predefined architecture specific event file until collection is terminated) -2. sudo ./perf-collect -a "myapp.sh myparameter" (collect perf for myapp.sh) -3. sudo ./perf-collect --cid "one or more container IDs from docker or kubernetes seperated by semicolon" - -## Post-processing: - -``` -./perf-postprocess (options) - -usage: perf-postprocess [-h] [--version] [-m METRICFILE] [-o OUTFILE] - [--persocket] [--percore] [-v] [--epoch] [-html HTML] - [-r RAWFILE] - -perf-postprocess: perf post process - -optional arguments: - -h, --help show this help message and exit - --version, -V display version information - -m METRICFILE, --metricfile METRICFILE - formula file, default metric file for the architecture - -o OUTFILE, --outfile OUTFILE - perf stat outputs in csv format, - default=results/metric_out.csv - --persocket generate per socket metrics - --percore generate per core metrics - -v, --verbose include debugging information, keeps all intermediate - csv files - --epoch time series in epoch format, default is sample count - -html HTML, --html HTML - Static HTML report - -required arguments: - -r RAWFILE, --rawfile RAWFILE - Raw CSV output from perf-collect -``` - -### Examples - -./perf-postprocess -r results/perfstat.csv (post processes perfstat.csv and creates metric_out.csv, metric_out.average.csv, metric_out.raw.csv) - -./perf-postprocess -r results/perfstat.csv --html perfstat.html (creates a report for TMA analysis and system level metric charts.) - -### Notes - -1. metric_out.csv : Time series dump of the metrics. The metrics are defined in events/metric.json -2. metric_out.averags.csv: Average of metrics over the collection period -3. metric_out.raw.csv: csv file with raw events normalized per second -4. Socket/core level metrics: Additonal csv files outputfile.socket.csv/outputfile.core.csv will be generated. - ## Caveats 1. The tool can collect only the counters supported by underlying linux perf version. diff --git a/_version.txt b/_version.txt index c813fe1..3c43790 100644 --- a/_version.txt +++ b/_version.txt @@ -1 +1 @@ -1.2.5 +1.2.6 diff --git a/events/icx.txt b/events/icx.txt index d969752..cbcb611 100644 --- a/events/icx.txt +++ b/events/icx.txt @@ -151,6 +151,13 @@ upi/event=0x2,umask=0xf,name='UNC_UPI_TxL_FLITS.ALL_DATA'/, upi/event=0x2,umask=0x97,name='UNC_UPI_TxL_FLITS.NON_DATA'/, upi/event=0x1,umask=0x0,name='UNC_UPI_CLOCKTICKS'/; +cha/event=0x35,umask=0xc88ffe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF'/, +cha/event=0x35,umask=0xc80ffe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_CRD'/; + +cha/event=0x35,umask=0xc897fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF'/, +cha/event=0x35,umask=0xc817fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD'/, +cha/event=0x35,umask=0xccd7fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA'/; + cha/event=0x35,umask=0xC816FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL'/, cha/event=0x35,umask=0xC8177E01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE'/, cha/event=0x35,umask=0xC896FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL'/, diff --git a/events/metric_bdx.json b/events/metric_bdx.json index a606a23..c14b6b4 100644 --- a/events/metric_bdx.json +++ b/events/metric_bdx.json @@ -173,159 +173,159 @@ "expression": "([UNC_C_TOR_INSERTS.OPCODE.0x1c8] + [UNC_C_TOR_INSERTS.OPCODE.0x180]) * 64 / 1000000" }, { - "name": "metric_TMAM_Info_cycles_both_threads_active(%)", + "name": "metric_TMA_Info_cycles_both_threads_active(%)", "expression": "100 * ( (1 - ([CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE] / ([CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY] / 2)) ) if [const_thread_count] > 1 else 0)" }, { - "name": "metric_TMAM_Info_CoreIPC", + "name": "metric_TMA_Info_CoreIPC", "expression": "[instructions] / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])" }, { - "name": "metric_TMAM_Frontend_Bound(%)", + "name": "metric_TMA_Frontend_Bound(%)", "expression": "100 * [IDQ_UOPS_NOT_DELIVERED.CORE] / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]))" }, { - "name": "metric_TMAM_..Frontend_Latency(%)", + "name": "metric_TMA_..Frontend_Latency(%)", "expression": "100 * [IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])" }, { - "name": "metric_TMAM_....ICache_Misses(%)", + "name": "metric_TMA_....ICache_Misses(%)", "expression": "100 * [ICACHE.IFDATA_STALL] / [cpu-cycles]" }, { - "name": "metric_TMAM_....ITLB_Misses(%)", + "name": "metric_TMA_....ITLB_Misses(%)", "expression": "100 * ((14 * [ITLB_MISSES.STLB_HIT]) + [ITLB_MISSES.WALK_DURATION_c1] + (7 * [ITLB_MISSES.WALK_COMPLETED] )) / [cpu-cycles]" }, { - "name": "metric_TMAM_....Branch_Resteers(%)", + "name": "metric_TMA_....Branch_Resteers(%)", "expression": "100 * (([RS_EVENTS.EMPTY_CYCLES] - [ICACHE.IFDATA_STALL] - (14 * [ITLB_MISSES.STLB_HIT] + [ITLB_MISSES.WALK_DURATION_c1] + 7 * [ITLB_MISSES.WALK_COMPLETED])) / [RS_EVENTS.EMPTY_END]) * ([BR_MISP_RETIRED.ALL_BRANCHES] + [MACHINE_CLEARS.COUNT] + [BACLEARS.ANY]) / [cpu-cycles]" }, { - "name": "metric_TMAM_....DSB_Switches(%)", + "name": "metric_TMA_....DSB_Switches(%)", "expression": "100 * 2 * [DSB2MITE_SWITCHES.PENALTY_CYCLES] / [cpu-cycles]" }, { - "name": "metric_TMAM_....MS_Switches(%)", + "name": "metric_TMA_....MS_Switches(%)", "expression": "100 * 2 * [IDQ.MS_SWITCHES] / [cpu-cycles]" }, { - "name": "metric_TMAM_..Frontend_Bandwidth(%)", + "name": "metric_TMA_..Frontend_Bandwidth(%)", "expression": "100 * ([IDQ_UOPS_NOT_DELIVERED.CORE] - (4 * [IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE])) / (4 * [CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])" }, { - "name": "metric_TMAM_Bad_Speculation(%)", + "name": "metric_TMA_Bad_Speculation(%)", "expression": "100 * ([UOPS_ISSUED.ANY] - [UOPS_RETIRED.RETIRE_SLOTS] + ((4 * [INT_MISC.RECOVERY_CYCLES_ANY]) / [const_thread_count])) / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])) " }, { - "name": "metric_TMAM_..Branch_Mispredicts(%)", + "name": "metric_TMA_..Branch_Mispredicts(%)", "expression": "([BR_MISP_RETIRED.ALL_BRANCHES] / ([BR_MISP_RETIRED.ALL_BRANCHES] + [MACHINE_CLEARS.COUNT])) * 100 * ([UOPS_ISSUED.ANY] - [UOPS_RETIRED.RETIRE_SLOTS] + (4 * [INT_MISC.RECOVERY_CYCLES_ANY] / [const_thread_count])) / (4 * [CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])" }, { - "name": "metric_TMAM_..Machine_Clears(%)", + "name": "metric_TMA_..Machine_Clears(%)", "expression": "([MACHINE_CLEARS.COUNT] / ([BR_MISP_RETIRED.ALL_BRANCHES] + [MACHINE_CLEARS.COUNT])) * 100 * ([UOPS_ISSUED.ANY] - [UOPS_RETIRED.RETIRE_SLOTS] + (4 * [INT_MISC.RECOVERY_CYCLES_ANY] / [const_thread_count])) / (4 * [CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])" }, { - "name": "metric_TMAM_Backend_bound(%)", + "name": "metric_TMA_Backend_Bound(%)", "expression": "100 - (100 * ([UOPS_ISSUED.ANY] - [UOPS_RETIRED.RETIRE_SLOTS] + 4 * ([INT_MISC.RECOVERY_CYCLES_ANY] / [const_thread_count]) + [IDQ_UOPS_NOT_DELIVERED.CORE] + [UOPS_RETIRED.RETIRE_SLOTS]) / (4 * [CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])) " }, { - "name": "metric_TMAM_..Memory_Bound(%)", + "name": "metric_TMA_..Memory_Bound(%)", "expression": "100 * (1 - (([UOPS_ISSUED.ANY] - [UOPS_RETIRED.RETIRE_SLOTS] + 4 * ([INT_MISC.RECOVERY_CYCLES_ANY] / [const_thread_count]) + [IDQ_UOPS_NOT_DELIVERED.CORE] + [UOPS_RETIRED.RETIRE_SLOTS]) / (4 * [CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]))) * ([CYCLE_ACTIVITY.STALLS_MEM_ANY] + [RESOURCE_STALLS.SB]) / ([CYCLE_ACTIVITY.STALLS_TOTAL] + [UOPS_EXECUTED.CYCLES_GE_1_UOPS_EXEC] - ( [UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC] if ([instructions] / [cpu-cycles]) > 1.8 else [UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC]) - ( [RS_EVENTS.EMPTY_CYCLES] if ([IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] / [CPU_CLK_UNHALTED.THREAD_ANY]) > 0.1 else 0) + [RESOURCE_STALLS.SB])" }, { - "name": "metric_TMAM_....L1_Bound(%)", + "name": "metric_TMA_....L1_Bound(%)", "expression": "100 * ([CYCLE_ACTIVITY.STALLS_MEM_ANY] - [CYCLE_ACTIVITY.STALLS_L1D_MISS]) / [cpu-cycles]" }, { - "name": "metric_TMAM_......DTLB_Load(%)", + "name": "metric_TMA_......DTLB_Load(%)", "expression": "100 * ([DTLB_LOAD_MISSES.STLB_HIT] * 8 + [DTLB_LOAD_MISSES.WALK_DURATION_c1] + 7 * [DTLB_LOAD_MISSES.WALK_COMPLETED]) / [cpu-cycles]" }, { - "name": "metric_TMAM_......Store_Fwd_Blk(%)", + "name": "metric_TMA_......Store_Fwd_Blk(%)", "expression": "100 * (13 * [LD_BLOCKS.STORE_FORWARD]) / [cpu-cycles]" }, { - "name": "metric_TMAM_....L2_Bound(%)", + "name": "metric_TMA_....L2_Bound(%)", "expression": "100 * ([CYCLE_ACTIVITY.STALLS_L1D_MISS] - [CYCLE_ACTIVITY.STALLS_L2_MISS]) / [cpu-cycles]" }, { - "name": "metric_TMAM_....L3_Bound(%)", + "name": "metric_TMA_....L3_Bound(%)", "expression": "100 * [MEM_LOAD_UOPS_RETIRED.L3_HIT] / ([MEM_LOAD_UOPS_RETIRED.L3_HIT] + 7 * [MEM_LOAD_UOPS_RETIRED.L3_MISS]) * ([CYCLE_ACTIVITY.STALLS_L2_MISS] / [cpu-cycles])" }, { - "name": "metric_TMAM_......L3_Latency(%)", + "name": "metric_TMA_......L3_Latency(%)", "expression": "100 * 41 * [MEM_LOAD_UOPS_RETIRED.L3_HIT] * ( 1 + [MEM_LOAD_UOPS_RETIRED.HIT_LFB] / ( [MEM_LOAD_UOPS_RETIRED.L2_HIT] + [MEM_LOAD_UOPS_RETIRED.L3_HIT] + [MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT] + [MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM] + [MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS] + [MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM] + [MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM] + [MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM] + [MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD] ) ) / [cpu-cycles] " }, { - "name": "metric_TMAM_......Contested_Accesses(%)", + "name": "metric_TMA_......Contested_Accesses(%)", "expression": "100 * 60 * ([MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM] + [MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS]) * ( 1 + [MEM_LOAD_UOPS_RETIRED.HIT_LFB] / ( [MEM_LOAD_UOPS_RETIRED.L2_HIT] + [MEM_LOAD_UOPS_RETIRED.L3_HIT] + [MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT] + [MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM] + [MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS] + [MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM] + [MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM] + [MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM] + [MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD] ) ) / [cpu-cycles] " }, { - "name": "metric_TMAM_......Data_Sharing(%)", + "name": "metric_TMA_......Data_Sharing(%)", "expression": "100 * 43 * [MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT] * ( 1 + [MEM_LOAD_UOPS_RETIRED.HIT_LFB] / ( [MEM_LOAD_UOPS_RETIRED.L2_HIT] + [MEM_LOAD_UOPS_RETIRED.L3_HIT] + [MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT] + [MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM] + [MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS] + [MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM] + [MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM] + [MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM] + [MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD] ) ) / [cpu-cycles] " }, { - "name": "metric_TMAM_......SQ_Full(%)", + "name": "metric_TMA_......SQ_Full(%)", "expression": "100 * ([OFFCORE_REQUESTS_BUFFER.SQ_FULL] / [const_thread_count]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])" }, { - "name": "metric_TMAM_....MEM_Bound(%)", + "name": "metric_TMA_....MEM_Bound(%)", "expression": "100 * (1 - ( [MEM_LOAD_UOPS_RETIRED.L3_HIT] / ([MEM_LOAD_UOPS_RETIRED.L3_HIT] + 7 * [MEM_LOAD_UOPS_RETIRED.L3_MISS])) ) * ([CYCLE_ACTIVITY.STALLS_L2_MISS] / [cpu-cycles])" }, { - "name": "metric_TMAM_......MEM_Bandwidth(%)", + "name": "metric_TMA_......MEM_Bandwidth(%)", "expression": "100 * (min([OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD_c4], [cpu-cycles])) / [cpu-cycles]" }, { - "name": "metric_TMAM_......MEM_Latency(%)", + "name": "metric_TMA_......MEM_Latency(%)", "expression": "100 * (min([OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD], [cpu-cycles]) - min([OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD_c4], [cpu-cycles])) / [cpu-cycles]" }, { - "name": "metric_TMAM_....Stores_Bound(%)", + "name": "metric_TMA_....Store_Bound(%)", "expression": "100 * [RESOURCE_STALLS.SB] / [cpu-cycles]" }, { - "name": "metric_TMAM_......DTLB_Store(%)", + "name": "metric_TMA_......DTLB_Store(%)", "expression": "100 * (7 * [DTLB_STORE_MISSES.STLB_HIT] + [DTLB_STORE_MISSES.WALK_DURATION_c1]) / [cpu-cycles]" }, { - "name": "metric_TMAM_..Core_Bound(%)", + "name": "metric_TMA_..Core_Bound(%)", "expression": "100 * ( 1 - (( [UOPS_ISSUED.ANY] - [UOPS_RETIRED.RETIRE_SLOTS] + 4 * ([INT_MISC.RECOVERY_CYCLES_ANY] / [const_thread_count]) + [IDQ_UOPS_NOT_DELIVERED.CORE] + [UOPS_RETIRED.RETIRE_SLOTS] ) / ( 4 * [CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]))) * (1 - (([CYCLE_ACTIVITY.STALLS_MEM_ANY] + [RESOURCE_STALLS.SB]) / ([CYCLE_ACTIVITY.STALLS_TOTAL] + [UOPS_EXECUTED.CYCLES_GE_1_UOPS_EXEC] - ( [UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC] if ([instructions] / [cpu-cycles]) > 1.8 else [UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC]) - ([RS_EVENTS.EMPTY_CYCLES] if ([IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] / [CPU_CLK_UNHALTED.THREAD_ANY]) > 0.1 else 0) + [RESOURCE_STALLS.SB])))" }, { - "name": "metric_TMAM_....Divider(%)", + "name": "metric_TMA_....Divider(%)", "expression": "100 * [ARITH.FPU_DIV_ACTIVE] / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])" }, { - "name": "metric_TMAM_....Ports_Utilization(%)", + "name": "metric_TMA_....Ports_Utilization(%)", "expression": "100 * (( [CYCLE_ACTIVITY.STALLS_TOTAL] + [UOPS_EXECUTED.CYCLES_GE_1_UOPS_EXEC] - ([UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC] if ([instructions] / [cpu-cycles]) > 1.8 else [UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC]) - ([RS_EVENTS.EMPTY_CYCLES] if ([IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] / [CPU_CLK_UNHALTED.THREAD_ANY]) > 0.1 else 0) + [RESOURCE_STALLS.SB]) - [RESOURCE_STALLS.SB] - [CYCLE_ACTIVITY.STALLS_MEM_ANY] ) /[cpu-cycles]" }, { - "name": "metric_TMAM_......0_Port_Utilized(%)", + "name": "metric_TMA_......0_Port_Utilized(%)", "expression": "100 * (([UOPS_EXECUTED.CORE_i1_c1] / [const_thread_count]) if ([const_thread_count] > 1) else ([RS_EVENTS.EMPTY_CYCLES] if ([CYCLE_ACTIVITY.STALLS_TOTAL] - ([IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])) ) > 0.1 else 0)) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]) " }, { - "name": "metric_TMAM_......1_Port_Utilized(%)", + "name": "metric_TMA_......1_Port_Utilized(%)", "expression": "100 * (([UOPS_EXECUTED.CORE_c1] - [UOPS_EXECUTED.CORE_c2]) / [const_thread_count]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])" }, { - "name": "metric_TMAM_......2_Port_Utilized(%)", + "name": "metric_TMA_......2_Port_Utilized(%)", "expression": "100 * (([UOPS_EXECUTED.CORE_c2] - [UOPS_EXECUTED.CORE_c3]) / [const_thread_count]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])" }, { - "name": "metric_TMAM_......3m_Ports_Utilized(%)", + "name": "metric_TMA_......3m_Ports_Utilized(%)", "expression": "100 * ([UOPS_EXECUTED.CORE_c3] / [const_thread_count]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])" }, { - "name": "metric_TMAM_Retiring(%)", + "name": "metric_TMA_Retiring(%)", "expression": "100 * [UOPS_RETIRED.RETIRE_SLOTS] / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]))" }, { - "name": "metric_TMAM_..Base(%)", + "name": "metric_TMA_..Base(%)", "expression": "100 *(([UOPS_RETIRED.RETIRE_SLOTS] / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]))) - (([UOPS_RETIRED.RETIRE_SLOTS] / [UOPS_ISSUED.ANY]) * [IDQ.MS_UOPS] / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]))))" }, { - "name": "metric_TMAM_..Microcode_Sequencer(%)", + "name": "metric_TMA_..Microcode_Sequencer(%)", "expression": "100 * (([UOPS_RETIRED.RETIRE_SLOTS] / [UOPS_ISSUED.ANY]) * [IDQ.MS_UOPS] )/ (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]))" } ] \ No newline at end of file diff --git a/events/metric_icx.json b/events/metric_icx.json index ee14288..58a0465 100644 --- a/events/metric_icx.json +++ b/events/metric_icx.json @@ -130,7 +130,7 @@ }, { "name": "metric_LLC code read MPI (demand+prefetch)", - "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFCODE] + [UNC_CHA_TOR_INSERTS.IA_MISS_CRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF]) / [instructions]" + "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_CRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF]) / [instructions]" }, { "name": "metric_LLC data read MPI (demand+prefetch)", diff --git a/events/metric_skx_clx.json b/events/metric_skx_clx.json index c8b812e..a7e4260 100644 --- a/events/metric_skx_clx.json +++ b/events/metric_skx_clx.json @@ -225,159 +225,159 @@ "expression": "([UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0] + [UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1] + [UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2] + [UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3]) * 4 / 1000000" }, { - "name": "metric_TMAM_Info_cycles_both_threads_active(%)", + "name": "metric_TMA_Info_cycles_both_threads_active(%)", "expression": "100 * ( (1 - ([CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE] / ([CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY] / 2)) ) if [const_thread_count] > 1 else 0)" }, { - "name": "metric_TMAM_Info_CoreIPC", + "name": "metric_TMA_Info_CoreIPC", "expression": "[instructions] / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])" }, { - "name": "metric_TMAM_Frontend_Bound(%)", + "name": "metric_TMA_Frontend_Bound(%)", "expression": "100 * [IDQ_UOPS_NOT_DELIVERED.CORE] / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]))" }, { - "name": "metric_TMAM_..Frontend_Latency(%)", + "name": "metric_TMA_..Frontend_Latency(%)", "expression": "100 * [IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] / ([CPU_CLK_UNHALTED.THREAD_ANY] /[const_thread_count])" }, { - "name": "metric_TMAM_....ICache_Misses(%)", + "name": "metric_TMA_....ICache_Misses(%)", "expression": "100 * ([ICACHE_16B.IFDATA_STALL] + 2 * [ICACHE_16B_c1_e1_IFDATA_STALL]) / [cpu-cycles]" }, { - "name": "metric_TMAM_....ITLB_Misses(%)", + "name": "metric_TMA_....ITLB_Misses(%)", "expression": "100 * [ICACHE_64B.IFTAG_STALL] / [cpu-cycles]" }, { - "name": "metric_TMAM_....Branch_Resteers(%)", + "name": "metric_TMA_....Branch_Resteers(%)", "expression": "100 * ([INT_MISC.CLEAR_RESTEER_CYCLES] + 9 * [BACLEARS.ANY]) / [cpu-cycles]" }, { - "name": "metric_TMAM_......Mispredicts_Resteers(%)", + "name": "metric_TMA_......Mispredicts_Resteers(%)", "expression": "100 * [INT_MISC.CLEAR_RESTEER_CYCLES] * ([BR_MISP_RETIRED.ALL_BRANCHES] / ([BR_MISP_RETIRED.ALL_BRANCHES] + [MACHINE_CLEARS.COUNT])) / [cpu-cycles]" }, { - "name": "metric_TMAM_......Clears_Resteers(%)", + "name": "metric_TMA_......Clears_Resteers(%)", "expression": "100 * [INT_MISC.CLEAR_RESTEER_CYCLES] * (1 - ([BR_MISP_RETIRED.ALL_BRANCHES] / ([BR_MISP_RETIRED.ALL_BRANCHES] + [MACHINE_CLEARS.COUNT]))) / [cpu-cycles]" }, { - "name": "metric_TMAM_......Unknown_Branches_Resteers(%)", + "name": "metric_TMA_......Unknown_Branches_Resteers(%)", "expression": "100 * (9 * [BACLEARS.ANY]) / [cpu-cycles]" }, { - "name": "metric_TMAM_..Frontend_Bandwidth(%)", + "name": "metric_TMA_..Frontend_Bandwidth(%)", "expression": "100 * ([IDQ_UOPS_NOT_DELIVERED.CORE] - 4 * [IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE]) / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]))" }, { - "name": "metric_TMAM_Bad_Speculation(%)", + "name": "metric_TMA_Bad_Speculation(%)", "expression": "100 * ([UOPS_ISSUED.ANY] - [UOPS_RETIRED.RETIRE_SLOTS] + ((4 * [INT_MISC.RECOVERY_CYCLES_ANY]) / [const_thread_count])) / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])) " }, { - "name": "metric_TMAM_..Branch_Mispredicts(%)", + "name": "metric_TMA_..Branch_Mispredicts(%)", "expression": "100 * ([BR_MISP_RETIRED.ALL_BRANCHES] / ([BR_MISP_RETIRED.ALL_BRANCHES] + [MACHINE_CLEARS.COUNT])) * ([UOPS_ISSUED.ANY] - [UOPS_RETIRED.RETIRE_SLOTS] + (4 * [INT_MISC.RECOVERY_CYCLES_ANY] / [const_thread_count])) / (4 * [CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]) " }, { - "name": "metric_TMAM_..Machine_Clears(%)", + "name": "metric_TMA_..Machine_Clears(%)", "expression": "100 * ([MACHINE_CLEARS.COUNT] / ([BR_MISP_RETIRED.ALL_BRANCHES] + [MACHINE_CLEARS.COUNT])) * ([UOPS_ISSUED.ANY] - [UOPS_RETIRED.RETIRE_SLOTS] + (4 * [INT_MISC.RECOVERY_CYCLES_ANY] / [const_thread_count])) / (4 * [CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])" }, { - "name": "metric_TMAM_Backend_bound(%)", + "name": "metric_TMA_Backend_Bound(%)", "expression": "100 - (100 * ([UOPS_ISSUED.ANY] - [UOPS_RETIRED.RETIRE_SLOTS] + 4 * ([INT_MISC.RECOVERY_CYCLES_ANY] / [const_thread_count]) + [IDQ_UOPS_NOT_DELIVERED.CORE] + [UOPS_RETIRED.RETIRE_SLOTS]) / (4 * [CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])) " }, { - "name": "metric_TMAM_..Memory_Bound(%)", + "name": "metric_TMA_..Memory_Bound(%)", "expression": "100 * (1 - (([UOPS_ISSUED.ANY] - [UOPS_RETIRED.RETIRE_SLOTS] + 4 * ([INT_MISC.RECOVERY_CYCLES_ANY] / [const_thread_count]) + [IDQ_UOPS_NOT_DELIVERED.CORE] + [UOPS_RETIRED.RETIRE_SLOTS]) / (4 * [CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]))) * ([CYCLE_ACTIVITY.STALLS_MEM_ANY] + [EXE_ACTIVITY.BOUND_ON_STORES]) / ([EXE_ACTIVITY.EXE_BOUND_0_PORTS] + [EXE_ACTIVITY.1_PORTS_UTIL] + ([EXE_ACTIVITY.2_PORTS_UTIL] if ([instructions] / [cpu-cycles]) > 1.8 else 0) + [CYCLE_ACTIVITY.STALLS_MEM_ANY] + [EXE_ACTIVITY.BOUND_ON_STORES])" }, { - "name": "metric_TMAM_....L1_Bound(%)", + "name": "metric_TMA_....L1_Bound(%)", "expression": "100 * ([CYCLE_ACTIVITY.STALLS_MEM_ANY] - [CYCLE_ACTIVITY.STALLS_L1D_MISS]) / [cpu-cycles]" }, { - "name": "metric_TMAM_......DTLB_Load(%)", + "name": "metric_TMA_......DTLB_Load(%)", "expression": "100 * (7 * [DTLB_LOAD_MISSES.STLB_HIT] + [DTLB_LOAD_MISSES.WALK_ACTIVE]) / [cpu-cycles]" }, { - "name": "metric_TMAM_......Store_Fwd_Blk(%)", + "name": "metric_TMA_......Store_Fwd_Blk(%)", "expression": "100 * (13 * [LD_BLOCKS.STORE_FORWARD]) / [cpu-cycles]" }, { - "name": "metric_TMAM_....L2_Bound(%)", + "name": "metric_TMA_....L2_Bound(%)", "expression": "100 * ([CYCLE_ACTIVITY.STALLS_L1D_MISS] - [CYCLE_ACTIVITY.STALLS_L2_MISS]) / [cpu-cycles]" }, { - "name": "metric_TMAM_....L3_Bound(%)", + "name": "metric_TMA_....L3_Bound(%)", "expression": "100 * ([CYCLE_ACTIVITY.STALLS_L2_MISS] - [CYCLE_ACTIVITY.STALLS_L3_MISS]) / [cpu-cycles]" }, { - "name": "metric_TMAM_......L3_Latency(%)", + "name": "metric_TMA_......L3_Latency(%)", "expression": "100 * (((min([OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD], [cpu-cycles]) - min([OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD], [cpu-cycles])) / [cpu-cycles]) - ((min([OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6], [cpu-cycles]) - min([OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6] , [cpu-cycles])) / [cpu-cycles]))" }, { - "name": "metric_TMAM_......L3_Bandwidth(%)", + "name": "metric_TMA_......L3_Bandwidth(%)", "expression": "100 * (min([OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6], [cpu-cycles]) - min([OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6], [cpu-cycles])) / [cpu-cycles]" }, { - "name": "metric_TMAM_......SQ_Full(%)", + "name": "metric_TMA_......SQ_Full(%)", "expression": "100 * ([OFFCORE_REQUESTS_BUFFER.SQ_FULL] / [const_thread_count]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])" }, { - "name": "metric_TMAM_....MEM_Bound(%)", + "name": "metric_TMA_....MEM_Bound(%)", "expression": "100 * [CYCLE_ACTIVITY.STALLS_L3_MISS] / [cpu-cycles]" }, { - "name": "metric_TMAM_......MEM_Bandwidth(%)", + "name": "metric_TMA_......MEM_Bandwidth(%)", "expression": "100 * min([OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6] , [cpu-cycles]) / [cpu-cycles]" }, { - "name": "metric_TMAM_......MEM_Latency(%)", + "name": "metric_TMA_......MEM_Latency(%)", "expression": "100 * (min([OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD] , [cpu-cycles]) - min([OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6] , [cpu-cycles]))/ [cpu-cycles]" }, { - "name": "metric_TMAM_....Stores_Bound(%)", + "name": "metric_TMA_....Store_Bound(%)", "expression": "100 * [EXE_ACTIVITY.BOUND_ON_STORES] / [cpu-cycles]" }, { - "name": "metric_TMAM_......DTLB_Store(%)", + "name": "metric_TMA_......DTLB_Store(%)", "expression": "100 * (7 * [DTLB_STORE_MISSES.STLB_HIT] + [DTLB_STORE_MISSES.WALK_ACTIVE]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])" }, { - "name": "metric_TMAM_..Core_Bound(%)", + "name": "metric_TMA_..Core_Bound(%)", "expression": "100 * (1 - (([UOPS_ISSUED.ANY] - [UOPS_RETIRED.RETIRE_SLOTS] + (4 * ([INT_MISC.RECOVERY_CYCLES_ANY] / [const_thread_count])) + [IDQ_UOPS_NOT_DELIVERED.CORE] + [UOPS_RETIRED.RETIRE_SLOTS]) / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])))) * (1 - (([CYCLE_ACTIVITY.STALLS_MEM_ANY] + [EXE_ACTIVITY.BOUND_ON_STORES]) / ([EXE_ACTIVITY.EXE_BOUND_0_PORTS] + [EXE_ACTIVITY.1_PORTS_UTIL] + ([EXE_ACTIVITY.2_PORTS_UTIL] if ([instructions] / [cpu-cycles]) > 1.8 else 0) + [CYCLE_ACTIVITY.STALLS_MEM_ANY] + [EXE_ACTIVITY.BOUND_ON_STORES])))" }, { - "name": "metric_TMAM_....Divider(%)", + "name": "metric_TMA_....Divider(%)", "expression": "100 * [ARITH.DIVIDER_ACTIVE] / [cpu-cycles]" }, { - "name": "metric_TMAM_....Ports_Utilization(%)", + "name": "metric_TMA_....Ports_Utilization(%)", "expression": "100 * (([EXE_ACTIVITY.EXE_BOUND_0_PORTS] + [EXE_ACTIVITY.1_PORTS_UTIL] + ([EXE_ACTIVITY.2_PORTS_UTIL] if ([instructions] / [cpu-cycles]) > 1.8 else 0) + [CYCLE_ACTIVITY.STALLS_MEM_ANY] + [EXE_ACTIVITY.BOUND_ON_STORES]) - [CYCLE_ACTIVITY.STALLS_MEM_ANY] - [EXE_ACTIVITY.BOUND_ON_STORES]) / [cpu-cycles]" }, { - "name": "metric_TMAM_......0_Port_Utilized(%)", + "name": "metric_TMA_......0_Port_Utilized(%)", "expression": "100 * (([UOPS_EXECUTED.CORE_CYCLES_NONE] / 2) if ([const_thread_count] > 1) else [EXE_ACTIVITY.EXE_BOUND_0_PORTS]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])" }, { - "name": "metric_TMAM_......1_Port_Utilized(%)", + "name": "metric_TMA_......1_Port_Utilized(%)", "expression": "100 * ((([UOPS_EXECUTED.CORE_CYCLES_GE_1] - [UOPS_EXECUTED.CORE_CYCLES_GE_2]) / 2) if ([const_thread_count] > 1) else [EXE_ACTIVITY.1_PORTS_UTIL]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])" }, { - "name": "metric_TMAM_......2_Port_Utilized(%)", + "name": "metric_TMA_......2_Port_Utilized(%)", "expression": "100 * ((([UOPS_EXECUTED.CORE_CYCLES_GE_2] - [UOPS_EXECUTED.CORE_CYCLES_GE_3]) / 2) if ([const_thread_count] > 1) else [EXE_ACTIVITY.2_PORTS_UTIL]) / ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])" }, { - "name": "metric_TMAM_......3m_Ports_Utilized(%)", + "name": "metric_TMA_......3m_Ports_Utilized(%)", "expression": "100 * [UOPS_EXECUTED.CORE_CYCLES_GE_3] / [CPU_CLK_UNHALTED.THREAD_ANY]" }, { - "name": "metric_TMAM_Retiring(%)", + "name": "metric_TMA_Retiring(%)", "expression": "100 * [UOPS_RETIRED.RETIRE_SLOTS] / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]))" }, { - "name": "metric_TMAM_..Base(%)", + "name": "metric_TMA_..Base(%)", "expression": "100 * (([UOPS_RETIRED.RETIRE_SLOTS] / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]))) - (([UOPS_RETIRED.RETIRE_SLOTS] / [UOPS_ISSUED.ANY]) * [IDQ.MS_UOPS] / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]))))" }, { - "name": "metric_TMAM_..Microcode_Sequencer(%)", + "name": "metric_TMA_..Microcode_Sequencer(%)", "expression": "100 * (([UOPS_RETIRED.RETIRE_SLOTS] / [UOPS_ISSUED.ANY]) * [IDQ.MS_UOPS] / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])))" } ] \ No newline at end of file diff --git a/perf-collect.py b/perf-collect.py index b6574e7..69fe6e0 100644 --- a/perf-collect.py +++ b/perf-collect.py @@ -16,6 +16,7 @@ from argparse import ArgumentParser from src import perf_helpers from src import prepare_perf_events as prep_events +from src.common import crash logging.basicConfig( format="%(asctime)s %(levelname)s: %(message)s", @@ -41,7 +42,6 @@ def write_metadata( arch, cpuname, cpuid_info, - interval, muxinterval, thread, socket, @@ -67,13 +67,12 @@ def write_metadata( modified.write("IMC count," + str(imc) + ",\n") modified.write("CHA count," + str(cha) + ",\n") modified.write("UPI count," + str(upi) + ",\n") - modified.write("Sampling Interval," + str(interval) + ",\n") modified.write("Architecture," + str(arch) + ",\n") modified.write("Model," + str(cpuname) + ",\n") modified.write("kernel version," + perf_helpers.get_version() + "\n") - for socket, cpus in cpuid_info.items(): - modified.write("Socket:" + str(socket) + ",") - for c in cpus: + for _socket, _cpus in cpuid_info.items(): + modified.write("Socket:" + str(_socket) + ",") + for c in _cpus: modified.write(str(c) + ";") modified.write("\n") modified.write("Perf event mux Interval ms," + str(muxinterval) + ",\n") @@ -156,28 +155,24 @@ def resource_path(relative_path): def validate_perfargs(perf): """validate perf command before executing""" if perf[0] != "perf": - log.error("Not a perf command, exiting!") - sys.exit(1) + crash("Not a perf command, exiting!") def validate_file(fname): """validate if file is accessible""" if not os.access(fname, os.R_OK): - log.error(str(fname) + " not accessible") - sys.exit(1) + crash(str(fname) + " not accessible") def is_safe_file(fname, substr): """verify if file name/format is accurate""" if not fname.endswith(substr): - log.error(str(fname) + " isn't appropriate format") - sys.exit(1) + crash(str(fname) + " isn't appropriate format") if __name__ == "__main__": if platform.system() != "Linux": - log.error("PerfSpect currently supports Linux only") - sys.exit(1) + crash("PerfSpect currently supports Linux only") # fix the pyinstaller path script_path = os.path.dirname(os.path.realpath(__file__)) @@ -218,13 +213,6 @@ def is_safe_file(fname, substr): parser.add_argument( "-V", "--version", help="display version info", action="store_true" ) - parser.add_argument( - "-i", - "--interval", - type=float, - default=1, - help="interval in seconds for time series dump, default=1", - ) parser.add_argument( "-m", "--muxinterval", @@ -252,8 +240,7 @@ def is_safe_file(fname, substr): sys.exit(0) if os.geteuid() != 0: - log.error("Must run PerfSpect as root, please re-run") - sys.exit(1) + crash("Must run PerfSpect as root, please re-run") # disable nmi watchdog before collecting perf nmi_watchdog = 0 @@ -267,25 +254,18 @@ def is_safe_file(fname, substr): pass initial_pmus = perf_helpers.pmu_contention_detect() - interval = int(args.interval * 1000) + interval = 1000 if args.muxinterval > 1000: - log.error("Input argument muxinterval is too large, max is [1s or 1000ms]") - sys.exit(1) - if args.interval < 0.1 or args.interval > 300: - log.error( - "Input argument dump interval is too large or too small, range is [0.1 to 300s]" - ) - sys.exit(1) + crash("Input argument muxinterval is too large, max is [1s or 1000ms]") # select architecture default event file if not supplied procinfo = perf_helpers.get_cpuinfo() arch, cpuname = perf_helpers.get_arch_and_name(procinfo) if not arch: - log.error( + crash( f"Unrecognized CPU architecture. Supported architectures: {', '.join(SUPPORTED_ARCHITECTURES)}" ) - sys.exit(1) eventfile = None if arch == "broadwell": eventfile = "bdx.txt" @@ -308,8 +288,7 @@ def is_safe_file(fname, substr): eventfile = script_path + "/events/" + eventfile eventfilename = eventfile else: - log.error("Unknown application type") - sys.exit(1) + crash("Unknown application type") if args.outcsv == default_output_file: # create results dir @@ -318,10 +297,9 @@ def is_safe_file(fname, substr): perf_helpers.fix_path_ownership(result_dir) else: if not perf_helpers.validate_outfile(args.outcsv): - log.error( + crash( "Output filename not accepted. Filename should be a .csv without special characters" ) - sys.exit(1) mux_intervals = perf_helpers.get_perf_event_mux_interval() if args.muxinterval > 0: @@ -340,8 +318,7 @@ def is_safe_file(fname, substr): full_kernel_version = kernel except Exception as e: log.exception(e) - log.info("Unable to get kernel version") - sys.exit(1) + crash("Unable to get kernel version") # get perf events to collect collection_events = [] @@ -353,12 +330,15 @@ def is_safe_file(fname, substr): events, collection_events = prep_events.prepare_perf_events( eventfile, ( - (args.pid or args.cid or args.thread or args.socket) is not None + args.pid is not None + or args.cid is not None + or args.thread + or args.socket or not have_uncore ), ) - collection_type = "-a" if args.thread is False and args.socket is False else "-a -A" + collection_type = "-a" if not args.thread and not args.socket else "-a -A" # start perf stat if args.pid and args.timeout: log.info("Only CPU/core events will be enabled with pid option") @@ -430,8 +410,7 @@ def is_safe_file(fname, substr): except KeyboardInterrupt: log.info("Collection stopped! Caculating TSC frequency now") except Exception: - log.error("perf encountered errors") - sys.exit(1) + crash("perf encountered errors") cpuid_info = perf_helpers.get_cpuid_info(procinfo) write_metadata( @@ -440,7 +419,6 @@ def is_safe_file(fname, substr): arch, cpuname, cpuid_info, - args.interval, args.muxinterval, args.thread, args.socket, diff --git a/perf-postprocess.py b/perf-postprocess.py index e32d45f..63af1a0 100644 --- a/perf-postprocess.py +++ b/perf-postprocess.py @@ -5,18 +5,19 @@ # SPDX-License-Identifier: BSD-3-Clause ########################################################################################################### -from __future__ import print_function -import csv -import collections import json import logging +import numpy as np import os import pandas as pd import re import sys -from src import perf_helpers -from simpleeval import simple_eval from argparse import ArgumentParser +from enum import Enum +from simpleeval import simple_eval +from src.common import crash +from src import perf_helpers +from src import report logging.basicConfig( format="%(asctime)s %(levelname)s: %(message)s", @@ -25,335 +26,23 @@ handlers=[logging.FileHandler("debug.log"), logging.StreamHandler(sys.stdout)], ) log = logging.getLogger(__name__) -script_path = os.path.dirname(os.path.realpath(__file__)) - -# fix the pyinstaller path -if "_MEI" in script_path: - script_path = script_path.rsplit("/", 1)[0] -# temporary output :time series dump of raw events -output_file = script_path + "/_tmp_perf_/tmp_perf_out.csv" -output_files = [] # For per cgroup tmp_perf_out files -# temporary output :time series dump of raw events at socket level -tmp_socket_file = script_path + "/_tmp_perf_/tmp_socket_out.csv" -# temporary output:trasposed view of perf-collect output -time_dump_file = script_path + "/_tmp_perf_/time_dump.csv" -time_dump_files = [] # For per cgroup time-dump file -# final output of post-process -out_metric_file = script_path + "/results/metric_out.csv" -out_metric_files = [] # For per cgroup metrics -metric_file = "" -html_input = "metric_out.average.csv" - - -# globals -# excel output related -class workbook: - def __init__(self): - self.book = None - self.sys_sheet = None - self.sys_avg_sheet = None - self.sys_raw_sheet = None - self.socket_sheet = None - self.socket_avg_sheet = None - self.socket_raw_sheet = None - self.core_sheet = None - self.core_avg_sheet = None - - def initialize(self, name, persocket, percore): - self.book = xlsxwriter.Workbook(name) - filename = os.path.basename(name) - filename = filename[:5] - self.sys_avg_sheet = self.book.add_worksheet( - get_extra_out_file(filename, "a", True) - ) - self.sys_sheet = self.book.add_worksheet( - get_extra_out_file(filename, "m", True) - ) - self.sys_raw_sheet = self.book.add_worksheet( - get_extra_out_file(filename, "r", True) - ) - if percore or persocket: - self.socket_avg_sheet = self.book.add_worksheet( - get_extra_out_file(filename, "sa", True) - ) - self.socket_sheet = self.book.add_worksheet( - get_extra_out_file(filename, "s", True) - ) - self.socket_raw_sheet = self.book.add_worksheet( - get_extra_out_file(name, "sr", True) - ) - if percore: - self.core_avg_sheet = self.book.add_worksheet( - get_extra_out_file(filename, "ca", True) - ) - self.core_sheet = self.book.add_worksheet( - get_extra_out_file(filename, "c", True) - ) - self.core_raw_sheet = self.book.add_worksheet( - get_extra_out_file(filename, "cr", True) - ) - - def writerow(self, row, vals, sheet): - for col, val in enumerate(vals): - if (row != 0) and (col != 0): - val = float(val) - if ( - (row != 0) - and (col == 0) - and (sheet == "m" or sheet == "s" or sheet == "c") - ): - val = float(val) - if sheet == "m": - self.sys_sheet.write(row, col, val) - elif sheet == "a": - self.sys_avg_sheet.write(row, col, val) - elif sheet == "r": - self.sys_raw_sheet.write(row, col, val) - elif sheet == "s": - self.socket_sheet.write(row, col, val) - elif sheet == "sa": - self.socket_avg_sheet.write(row, col, val) - elif sheet == "sr": - self.socket_raw_sheet.write(row, col, val) - elif sheet == "c": - self.core_sheet.write(row, col, val) - elif sheet == "ca": - self.core_avg_sheet.write(row, col, val) - elif sheet == "cr": - self.core_raw_sheet.write(row, col, val) - - def close(self): - self.book.close() - - -# global class object for excel writing -OUT_WORKBOOK = workbook() -EXCEL_OUT = False - -# assumes sampling interval or dump interval is 1s -CONST_INTERVAL = 1.0 -CONST_TSC_FREQ = 0.0 -CONST_CORE_COUNT = 0.0 -CONST_HT_COUNT = 0.0 -CONST_SOCKET_COUNT = 0.0 -CONST_IMC_COUNT = 0.0 -CONST_CHA_COUNT = 0.0 -CONST_ARCH = "" -EVENT_GROUPING = False -PERCORE_MODE = False -TIME_ZONE = "UTC" -PERF_EVENTS = [] -SOCKET_CORES = [] -CGROUPS = False -CGROUP_HASH = {} - - -# get the PMU names from metric expression -def get_metric_events(formula): - f_len = len(formula) - start = 0 - metric_events = [] - while start < f_len: - s_idx = formula.find("[", start) - e_idx = formula.find("]", start) - if s_idx != -1 and e_idx != -1: - metric_events.append(formula[s_idx + 1 : e_idx]) - else: - break - start = e_idx + 1 - return metric_events - - -# get event index based on the groupid -def get_event_index(group_id, event, event_dict): - offset = 0 - for i in range(group_id): - offset += len(event_dict[i]) - idx = offset + event_dict[group_id].index(event) - return idx - - -# evaluate metric expression -def evaluate_expression( - formula, const_dict, value_list, event_dict, level=0, lvl_idx=-1 -): - temp_formula = formula - metric_events = get_metric_events(formula) - formula = formula.replace("[", "") - formula = formula.replace("]", "") - - # use socket count as one when evaluating per socket - # TSC accumulation at socket level and core - if level == 1: - const_dict["const_socket_count"] = 1 - const_dict["const_TSC"] = CONST_TSC_FREQ * CONST_CORE_COUNT * CONST_HT_COUNT - elif level == 2: - const_dict["const_TSC"] = CONST_TSC_FREQ - - # assign consts in the expression and create a list for collected events - collected_events = [] - for event in metric_events: - if event in const_dict: - formula = formula.replace(event, str(const_dict[event])) - else: - collected_events.append(event) - - grouped = False - for group, events in event_dict.items(): - # check if all events needed for the metric are in the same group - if all(event in events for event in collected_events): - grouped = True - for event in collected_events: - if level == 0: - idx = ( - get_event_index(group, event, event_dict) + 1 - ) # add 1 to account for the time column - elif level == 1: - idx = ( - get_event_index(group, event, event_dict) - * int(CONST_SOCKET_COUNT) - + lvl_idx - + 1 - ) - elif level == 2: - idx = ( - get_event_index(group, event, event_dict) - * get_online_corecount() - + lvl_idx - + 1 - ) - try: - # TODO: clean it up. quick fix for strings with / - if event.startswith("power/") or event.startswith("cstate"): - formula = formula.replace(event, str(value_list[idx])) - else: - formula = re.sub( - r"\b" + event + r"\b", str(value_list[idx]), formula - ) - except IndexError: - log.error("Index Error while evaluating expression") - log.error(formula, event, idx, len(value_list)) - exit() - - break - - # pick first matching event from the event list if not grouped - if not grouped: - for event in collected_events: - for group, events in event_dict.items(): - if event in events: - if level == 0: - idx = ( - get_event_index(group, event, event_dict) + 1 - ) # add 1 to account for the time column - elif level == 1: - idx = ( - get_event_index(group, event, event_dict) - * int(CONST_SOCKET_COUNT) - + lvl_idx - + 1 - ) - elif level == 2: - idx = ( - get_event_index(group, event, event_dict) - * get_online_corecount() - + lvl_idx - + 1 - ) - # TODO: clean it up. quick fix for strings with / - if event.startswith("power/") or event.startswith("cstate"): - formula = formula.replace(event, str(value_list[idx])) - else: - formula = re.sub( - r"\b" + event + r"\b", str(value_list[idx]), formula - ) - break - result = "" - global zero_division_errcount - global total_samples - try: - result = str( - "{:.8f}".format(simple_eval(formula, functions={"min": min, "max": max})) - ) - except ZeroDivisionError: - # ignore the systems with no PMEM - if "UNC_M_PMM" not in temp_formula and "UNC_M_TAGC" not in temp_formula: - zero_division_errcount += 1 - result = "0" - pass - except SyntaxError: - log.error("Syntax error evaluating ", formula) - log.error(temp_formula) - sys.exit() - except Exception as e: - log.exception(e) - log.exception(temp_formula) - log.exception("Unknown error evaluating ", formula) - sys.exit() - total_samples += 1 - return result - - -# disable invalid events -def disable_event(index): - global PERF_EVENTS - try: - PERF_EVENTS[index] = "#" + PERF_EVENTS[index] - except IndexError: - exit("Index out of range for disabling perf event") - - -def validate_file(fname): - if not os.access(fname, os.R_OK): - raise SystemExit(str(fname) + " not accessible") -def is_safe_file(fname, substr): - if not fname.endswith(substr): - raise SystemExit(str(fname) + " not a valid file, expecting " + str(substr)) - return 1 - - -# get events from perf event file -def get_perf_events(level): - event_list = [] - event_dict = collections.OrderedDict() - group_id = 0 - for line in PERF_EVENTS: - if (line != "\n") and (line.startswith("#") is False): - if level == 2 and line.strip().endswith( - ":u" - ): # ignore uncore events for percore processing - continue - # remove the core/uncore identifier - line = line.strip()[:-2] - new_group = False - if line.strip().endswith(";"): - new_group = True - - line = line.strip()[:-1] - event = line - if "name=" in line: - event = (line.split("'"))[1] - event_list.append(event) - if event_dict.get(group_id) is None: - event_dict.setdefault(group_id, [event]) - else: - event_dict[group_id].append(event) - if new_group: - group_id += 1 - return event_list, event_dict +class Mode(Enum): + System = 1 + Socket = 2 + Core = 3 # get the filenames for miscellaneous outputs -def get_extra_out_file(out_file, t, excelsheet=False): +def get_extra_out_file(out_file, t): dirname = os.path.dirname(out_file) filename = os.path.basename(out_file) t_file = "" if t == "a": - text = "sys.average" if excelsheet else "average" + text = "sys.average" elif t == "r": - text = "sys.raw" if excelsheet else "raw" + text = "sys.raw" elif t == "s": text = "socket" elif t == "sa": @@ -368,1208 +57,860 @@ def get_extra_out_file(out_file, t, excelsheet=False): text = "core.raw" elif t == "m": text = "sys" - if excelsheet: - return text + parts = os.path.splitext(filename) if len(parts) == 1: t_file = text + "." + filename else: t_file = parts[-2] + "." + text + ".csv" - if is_safe_file(t_file, ".csv"): - pass return os.path.join(dirname, t_file) -# load metrics from json file and evaluate -# level: 0-> system, 1->socket, 2->thread -def load_metrics(infile, outfile, level=0): - global CGROUPS +def get_args(script_path): + parser = ArgumentParser(description="perf-postprocess: perf post process") + required_arg = parser.add_argument_group("required arguments") + required_arg.add_argument( + "-r", + "--rawfile", + type=str, + default=None, + help="Raw CSV output from perf-collect", + ) + parser.add_argument( + "--version", "-V", help="display version information", action="store_true" + ) + parser.add_argument( + "-o", + "--outfile", + type=str, + default=script_path + "/results/metric_out.csv", + help="perf stat outputs in csv format, default=results/metric_out.csv", + ) + parser.add_argument( + "-v", + "--verbose", + help="include debugging information, keeps all intermediate csv files", + action="store_true", + ) + parser.add_argument( + "--rawevents", + help="save raw events in .csv format", + action="store_true", + ) + parser.add_argument( + "-html", + "--html", + type=str, + default=None, + help="Static HTML report", + ) - event_list, event_dict = get_perf_events(level) - metrics = {} - validate_file(metric_file) - with open(metric_file, "r") as f_metric: - try: - metrics = json.load(f_metric) - except json.decoder.JSONDecodeError: - raise SystemExit( - "Invalid JSON, please provide a valid JSON as metrics file" - ) + args = parser.parse_args() - for i, metric in enumerate(metrics): - metric_events = get_metric_events(metric["expression"].strip()) - metrics[i]["add"] = True - # check if metric can be computed from the current events - for e in metric_events: - if e.startswith("const"): - continue - if e not in event_list: - metrics[i]["add"] = False - f_metric.close() - - metric_row = ["time"] - add_metrics = False - if is_safe_file(out_metric_file, ".csv"): - pass - for m in metrics: - if m["add"] is True: - add_metrics = True - if level == 0: - metric_row.append(m["name"]) - if CGROUPS == "enabled": - input_file = infile - f_out = open(outfile, "w") - else: - input_file = output_file - f_out = open(out_metric_file, "w") - sheet_type = "m" - elif level == 1: - for s in range(int(CONST_SOCKET_COUNT)): - metric_row.append(m["name"] + ".S" + str(s)) - socket_file = get_extra_out_file(out_metric_file, "s") - f_out = open(socket_file, "w") - input_file = tmp_socket_file - sheet_type = "s" - elif level == 2: - for c in range( - int(CONST_CORE_COUNT * CONST_HT_COUNT * CONST_SOCKET_COUNT) - ): - metric_row.append(m["name"] + ".C" + str(c)) - core_file = get_extra_out_file(out_metric_file, "c") - f_out = open(core_file, "w") - input_file = time_dump_file - sheet_type = "c" - - # nothing to do, return - if not add_metrics: - return 0 - - metriccsv = csv.writer(f_out, dialect="excel") - metriccsv.writerow(metric_row) - if EXCEL_OUT: - OUT_WORKBOOK.writerow(0, metric_row, sheet_type) - f_pmu = open(input_file, "r") - pmucsv = csv.reader(f_pmu, delimiter=",") - - if CGROUPS == "enabled": - const_TSC = CONST_TSC_FREQ * CPUSETS[infile.rsplit("_", 1)[1].split(".")[0]] - else: - const_TSC = ( - CONST_TSC_FREQ * CONST_CORE_COUNT * CONST_HT_COUNT * CONST_SOCKET_COUNT - ) + # if args.version, print version then exit + if args.version: + print(perf_helpers.get_tool_version()) + sys.exit() - const_dict = { - "const_tsc_freq": CONST_TSC_FREQ, - "const_core_count": CONST_CORE_COUNT, - "const_socket_count": CONST_SOCKET_COUNT, - "const_thread_count": CONST_HT_COUNT, - "const_cha_count": CONST_CHA_COUNT, - "const_TSC": const_TSC, - } - pmu_row_count = 0 - metric_value = [""] * len(metric_row) - for row in pmucsv: - if not row: - continue - if pmu_row_count > 0: - metric_value[0] = row[0] - for metric in metrics: - if metric["add"]: - if level == 0: - idx = metric_row.index(metric["name"]) - result = evaluate_expression( - metric["expression"], const_dict, row, event_dict - ) - metric_value[idx] = result - elif level == 1: - for s in range(int(CONST_SOCKET_COUNT)): - metric_name = metric["name"] + ".S" + str(s) - idx = metric_row.index(metric_name) - result = evaluate_expression( - metric["expression"], - const_dict, - row, - event_dict, - level, - s, - ) - metric_value[idx] = result - elif level == 2: - for c in range( - int(CONST_CORE_COUNT * CONST_HT_COUNT * CONST_SOCKET_COUNT) - ): - metric_name = metric["name"] + ".C" + str(c) - idx = metric_row.index(metric_name) - result = ( - evaluate_expression( - metric["expression"], - const_dict, - row, - event_dict, - level, - c, - ) - if is_online_core(c) - else 0.0 - ) - metric_value[idx] = result - metriccsv.writerow(metric_value) - if EXCEL_OUT: - OUT_WORKBOOK.writerow(pmu_row_count, metric_value, sheet_type) - pmu_row_count += 1 - - f_out.close() - f_pmu.close() - return 1 - - -# generate summary output with averages, min, max, p95 for cgroups -def write_cgroup_summary(): - avgdf = pd.DataFrame(columns=["metrics"]) - for file in out_metric_files: - df = pd.read_csv(file).iloc[:, 1:] - # extract avg, p95, min, and max columns - avgcol = df.mean(axis=0).to_frame().reset_index() - p95col = df.quantile(q=0.95, axis=0).to_frame().reset_index() - mincol = df.min(axis=0).to_frame().reset_index() - maxcol = df.max(axis=0).to_frame().reset_index() - # get container id - container = os.path.basename(file).split(".")[0].split("_")[-1] - # define columns headers - avgcol.columns = ["metrics", "avg"] - p95col.columns = ["metrics", "p95"] - mincol.columns = ["metrics", "min"] - maxcol.columns = ["metrics", "max"] - # merge columns - avgdf = avgdf.merge(avgcol, on="metrics", how="outer") - avgdf = avgdf.merge(p95col, on="metrics", how="outer") - avgdf = avgdf.merge(mincol, on="metrics", how="outer") - avgdf = avgdf.merge(maxcol, on="metrics", how="outer") - # generate output file, one for each container id - sum_file = get_extra_out_file( - out_metric_file.replace(".csv", "_" + container + ".csv"), "a" + # check rawfile argument is given + if args.rawfile is None: + crash("Missing raw file, please provide raw csv generated using perf-collect") + + # check rawfile argument exists + if args.rawfile and not os.path.isfile(args.rawfile): + crash("perf raw data file not found, please provide valid raw file") + + # check output file is valid + if not perf_helpers.validate_outfile(args.outfile, True): + crash( + "Output filename: " + + args.outfile + + " not accepted. Filename should be a .csv without special characters" ) - avgdf.to_csv(sum_file, index=False) - return + # check output file is writable + if not perf_helpers.check_file_writeable(args.outfile): + crash("Output file %s not writeable " % args.outfile) -# generate summary output with averages, min, max, p95 -def write_summary(level=0): - if level == 0: - metric_file = out_metric_file - elif level == 1: - metric_file = get_extra_out_file(out_metric_file, "s") - elif level == 2: - metric_file = get_extra_out_file(out_metric_file, "c") - validate_file(metric_file) - f_metrics = open(metric_file, "r") - columns = collections.defaultdict(list) - reader = csv.DictReader(f_metrics, delimiter=",") - - first_row = True - metrics = [] - for row in reader: - if first_row: - for h in reader.fieldnames: - metrics.append(h) - first_row = False - for k, v in row.items(): - columns[k].append(float(v)) - - sheet_type = "" - if level == 0: - sum_file = get_extra_out_file(out_metric_file, "a") - first_row = ["metrics", "avg", "p95", "min", "max"] - sheet_type = "a" - elif level == 1: - sum_file = get_extra_out_file(out_metric_file, "sa") - first_row = ["metrics"] - out_row = [""] * (int(CONST_SOCKET_COUNT) * 2 + 1) - for t in range(2): - for i in range(int(CONST_SOCKET_COUNT)): - first_row.append("S" + str(i) + (".avg" if t == 0 else ".p95")) - sheet_type = "sa" - elif level == 2: - sum_file = get_extra_out_file(out_metric_file, "ca") - first_row = ["metrics"] - corecount = get_online_corecount() - out_row = [""] * (corecount + 1) - for i in range(corecount): - first_row.append("C" + str(i) + ".avg") - sheet_type = "ca" - - f_sum = open(sum_file, "w") - sumcsv = csv.writer(f_sum, dialect="excel") - sumcsv.writerow(first_row) - if EXCEL_OUT: - OUT_WORKBOOK.writerow(0, first_row, sheet_type) - out_idx = 1 - - for i, h in enumerate(metrics): - if i == 0: - continue - avg = sum(columns[h]) / len(columns[h]) - minval = min(columns[h]) - maxval = max(columns[h]) - p95 = perf_helpers.percentile(columns[h], 0.95) - if level == 0: - sumcsv.writerow([h, avg, p95, minval, maxval]) - if EXCEL_OUT: - OUT_WORKBOOK.writerow(i, [h, avg, p95, minval, maxval], sheet_type) - elif level == 1: - # [metric, S0.avg, S1.avg, S0.p95, S1.p95] - socket_id = (i - 1) % int( - CONST_SOCKET_COUNT - ) # -1 for first column in metrics - time - out_row[socket_id + 1] = avg - out_row[socket_id + 1 + int(CONST_SOCKET_COUNT)] = p95 - if socket_id == (int(CONST_SOCKET_COUNT) - 1): - out_row[0] = h[:-3] # to remove .S0/.S1 etc - sumcsv.writerow(out_row) - if EXCEL_OUT: - OUT_WORKBOOK.writerow(out_idx, out_row, sheet_type) - out_idx += 1 - elif level == 2: - # [metric, C0.avg, C1.avg, .. CN-1.avg] - core_id = (i - 1) % corecount - out_row[core_id + 1] = avg - if core_id == (corecount - 1): - name_len = len(h) - len(h.split(".")[-1]) - 1 - out_row[0] = h[:name_len] - sumcsv.writerow(out_row) - if EXCEL_OUT: - OUT_WORKBOOK.writerow(out_idx, out_row, sheet_type) - out_idx += 1 - - -def get_online_corecount(): - return int(CONST_CORE_COUNT * CONST_HT_COUNT * CONST_SOCKET_COUNT) - - -def is_online_core(c): - return True - - -# get metadata from perf stat dump -def get_metadata(): - global CONST_TSC_FREQ - global CONST_CORE_COUNT - global CONST_HT_COUNT - global CONST_SOCKET_COUNT - global CONST_IMC_COUNT - global CONST_CHA_COUNT - global PERF_EVENTS - global CONST_INTERVAL - global CONST_ARCH - global EVENT_GROUPING - global PERCORE_MODE - global SOCKET_CORES - global TIME_ZONE - global CGROUPS - global CGROUP_HASH - global CPUSETS - - start_events = False - validate_file(dat_file) - # check if metadata exists in raw dat file - with open(dat_file) as f: - if "### META DATA ###" not in f.read(): - raise SystemExit( - "The perf raw file doesn't contain metadata, please re-collect perf raw data" - ) + return args + + +# get metadata lines and perf events' lines in three separate lists +def get_all_data_lines(input_file_path): + with open(input_file_path, "r") as infile: + lines = infile.readlines() + + # input file has three headers: + # 1- ### META DATA ###, + # 2- ### PERF EVENTS ###, + # 3- ### PERF DATA ###, + + meta_data_lines = [] + perf_events_lines = [] + perf_data_lines = [] - f_dat = open(dat_file, "r") - for line in f_dat: - if start_events: - if "PERF DATA" in line: - break - PERF_EVENTS.append(line) - continue + meta_data_started = False + perf_events_started = False + perf_data_started = False + for idx, line in enumerate(lines): + if line.strip() == "": # skip empty lines + continue + + # check first line is META Data header + elif (idx == 0) and ("### META DATA ###" not in line): + crash( + "The perf raw file doesn't contain metadata, please re-collect perf raw data" + ) + elif "### META DATA ###" in line: + meta_data_started = True + perf_events_started = False + perf_data_started = False + + elif "### PERF EVENTS ###" in line: + meta_data_started = False + perf_events_started = True + perf_data_started = False + + elif "### PERF DATA ###" in line: + meta_data_started = False + perf_events_started = False + perf_data_started = True + + elif meta_data_started: + meta_data_lines.append(line.strip()) + + elif perf_events_started: + perf_events_lines.append(line.strip()) + + elif perf_data_started: + if line.startswith("# started on"): + # this line is special, it is under "PERF DATA" (printed by perf), but it is treatesd as metadata + meta_data_lines.append(line.strip()) + else: + fields = line.split(",") + perf_data_lines.append(fields) - EVENT_GROUPING = True + infile.close() + return meta_data_lines, perf_events_lines, perf_data_lines + + +# get_metadata +def get_metadata_as_dict(meta_data_lines): + meta_data = {} + meta_data["constants"] = {} + for line in meta_data_lines: if line.startswith("TSC"): - CONST_TSC_FREQ = float(line.split(",")[1]) * 1000000 + meta_data["constants"]["CONST_TSC_FREQ"] = ( + float(line.split(",")[1]) * 1000000 + ) elif line.startswith("CPU"): - CONST_CORE_COUNT = float(line.split(",")[1]) + meta_data["constants"]["CONST_CORE_COUNT"] = float(line.split(",")[1]) elif line.startswith("HT"): - CONST_HT_COUNT = float(line.split(",")[1]) + meta_data["constants"]["CONST_HT_COUNT"] = float(line.split(",")[1]) + meta_data["constants"]["CONST_THREAD_COUNT"] = float( + line.split(",")[1] + ) # we use both constants interchangeably elif line.startswith("SOCKET"): - CONST_SOCKET_COUNT = float(line.split(",")[1]) + meta_data["constants"]["CONST_SOCKET_COUNT"] = float(line.split(",")[1]) elif line.startswith("IMC"): - CONST_IMC_COUNT = float(line.split(",")[1]) + meta_data["constants"]["CONST_IMC_COUNT"] = float(line.split(",")[1]) elif line.startswith("CHA") or line.startswith("CBOX"): - CONST_CHA_COUNT = float(line.split(",")[1]) + meta_data["constants"]["CONST_CHA_COUNT"] = float(line.split(",")[1]) elif line.startswith("Sampling"): - CONST_INTERVAL = float(line.split(",")[1]) + meta_data["constants"]["CONST_INTERVAL"] = float(line.split(",")[1]) elif line.startswith("Architecture"): - CONST_ARCH = str(line.split(",")[1]) + meta_data["constants"]["CONST_ARCH"] = str(line.split(",")[1]) + + elif line.startswith("Event grouping"): + meta_data["EVENT_GROUPING"] = ( + True if (str(line.split(",")[1]) == "enabled") else False + ) elif line.startswith("cgroups"): - # Get cgroup status and cgroup_id to container_name conversions - CGROUP_HASH = dict( - item.split("=") for item in line.rstrip(",\n").split(",") + if line.startswith("cgroups=disabled"): + meta_data["CGROUPS"] = "disabled" + continue + # Get cgroup status and cgroup_id to container_name mapping + meta_data["CGROUPS"] = "enabled" + meta_data["CGROUP_HASH"] = dict( + item.split("=") + for item in line.split("cgroups=enabled,")[1].rstrip(",\n").split(",") ) docker_HASH = [] - docker_HASH = list(CGROUP_HASH.values()) - CGROUPS = CGROUP_HASH.get("cgroups") - del CGROUP_HASH["cgroups"] - # No percore/socket view with CGROUP mode - if CGROUPS == "enabled": - if args.percore or args.persocket: - raise SystemExit( - "Percore and Persocket views not supported when perf collection with cgroups" - ) - elif line.startswith("cpusets") and CGROUPS == "enabled": - CPUSETS = str(line.split(",")[1]) + docker_HASH = list(meta_data["CGROUP_HASH"].values()) + elif ( + line.startswith("cpusets") + and "CGROUPS" in meta_data + and meta_data["CGROUPS"] == "enabled" + ): + line = line.replace("cpusets,", "") docker_SETS = [] docker_SETS = line.split(",") docker_SETS = docker_SETS[:-1] - CPUSETS = {} + # here lognth of docker_HASH should be exactly len(docker_SETS) + assert len(docker_HASH) == len(docker_SETS) + meta_data["CPUSETS"] = {} for i in range(1, len(docker_SETS)): docker_SET = str(docker_SETS[i]) docker_SET = ( int(docker_SET.split("-")[1]) - int(docker_SET.split("-")[0]) + 1 ) - CPUSETS[docker_HASH[i]] = docker_SET + meta_data["CPUSETS"][docker_HASH[i]] = docker_SET elif line.startswith("Percore mode"): - PERCORE_MODE = True if (str(line.split(",")[1]) == "enabled") else False + meta_data["PERCORE_MODE"] = ( + True if (str(line.split(",")[1]) == "enabled") else False + ) + + elif line.startswith("Persocket mode"): + meta_data["PERSOCKET_MODE"] = ( + True if (str(line.split(",")[1]) == "enabled") else False + ) + elif line.startswith("# started on"): - TIME_ZONE = str(line.split(",")[2]) + meta_data["TIME_ZONE"] = str(line.split("# started on")[1]) + elif line.startswith("Socket"): + if "SOCKET_CORES" not in meta_data: + meta_data["SOCKET_CORES"] = [] cores = ((line.split("\n")[0]).split(",")[1]).split(";")[:-1] - SOCKET_CORES.append(cores) - elif "### PERF EVENTS" in line: - start_events = True - f_dat.close() - - -# write perf output from perf stat dump -def write_perf_tmp_output(use_epoch): - global CONST_TSC_FREQ - global CONST_CORE_COUNT - global CONST_HT_COUNT - global CONST_SOCKET_COUNT - global CONST_IMC_COUNT - global CONST_CHA_COUNT - global CONST_INTERVAL - global PERCORE_MODE - global TIME_ZONE - global CGROUPS - global CGROUP_HASH - global CPUSETS - - outcsv, f_out = {}, {} - fkey = "default" - # Ready the temp files to be written - if CGROUPS == "enabled": - i = 0 - for value in CGROUP_HASH.values(): - time_dump_files.append( - script_path + "/_tmp_perf_/time_dump_" + value + ".csv" - ) - f_out[value] = open(time_dump_files[i], "w") - outcsv[value] = csv.writer(f_out[value], dialect="excel") - i += 1 + meta_data["SOCKET_CORES"].append(cores) + + return meta_data + + +def set_CONST_TSC(meta_data, perf_mode, num_cpus=0): + if perf_mode == Mode.System: + meta_data["constants"]["CONST_TSC"] = ( + meta_data["constants"]["CONST_TSC_FREQ"] + * meta_data["constants"]["CONST_CORE_COUNT"] + * meta_data["constants"]["CONST_HT_COUNT"] + * meta_data["constants"]["CONST_SOCKET_COUNT"] + ) + elif perf_mode == Mode.Socket: + meta_data["constants"]["CONST_TSC"] = ( + meta_data["constants"]["CONST_TSC_FREQ"] + * meta_data["constants"]["CONST_CORE_COUNT"] + * meta_data["constants"]["CONST_HT_COUNT"] + ) + elif perf_mode == Mode.Core: # Core should be changed to thread + meta_data["constants"]["CONST_TSC"] = meta_data["constants"]["CONST_TSC_FREQ"] + elif meta_data["CGROUPS"] == "enabled": + meta_data["constants"]["CONST_TSC"] = ( + meta_data["constants"]["CONST_TSC_FREQ"] * num_cpus + ) + return + + +def get_event_name(event_line): + event_name = event_line + if "name=" in event_name: + matches = re.findall(r"\.*name=\'(.*?)\'.*", event_name) + assert len(matches) > 0 + event_name = matches[0] + if event_name.endswith(":c"): # core event + event_name = event_name.split(":c")[0] + if event_name.endswith(":u"): # uncore event + event_name = event_name.split(":u")[0] + # clean up , or ; + event_name = event_name.replace(",", "").replace(";", "") + + return event_name + + +def get_event_groups(event_lines): + groups = {} + group_indx = 0 + + current_group = [] + for event in event_lines: + if ";" in event: # end of group + current_group.append(get_event_name(event)) + groups["group_" + str(group_indx)] = current_group + group_indx += 1 + current_group = [] + else: + current_group.append(get_event_name(event)) + return groups + + +def get_metric_file_name(microarchitecture): + metric_file = "" + if microarchitecture == "broadwell": + metric_file = "metric_bdx.json" + elif microarchitecture == "skylake" or microarchitecture == "cascadelake": + metric_file = "metric_skx_clx.json" + elif microarchitecture == "icelake": + metric_file = "metric_icx.json" + elif microarchitecture == "sapphirerapids": + metric_file = "metric_spr.json" else: - f_out[fkey] = open(time_dump_file, "w") - outcsv[fkey] = csv.writer(f_out[fkey], dialect="excel") - # Skip header till pattern match - match = 0 - epoch = 0 - validate_file(dat_file) - for n, line in enumerate(open(dat_file)): - if "PERF DATA" in line: - match = n + 3 - # If using EPOCH - if use_epoch: - if "EPOCH" in line: - words = "".join(line).split() - try: - epoch = int(words[-1]) - except ValueError: - exit("Conversion error parsing timestamp") - break - # TO:DO remove "not_counted" and "not_supported" events from dat_file - - # Read in rest of file as Pandas Dataframe - df = pd.read_csv(dat_file, header=None, skipinitialspace=True, skiprows=match) - pd.set_option("display.max_rows", None, "display.max_columns", None) - # Get column indexes from dataframe - time, value, events, percent = 0, 1, 3, 5 - order = [time, events, value, percent] - header = ["time", "event", "value", "percent"] - if PERCORE_MODE: - cpuid, value, events, percent = 1, 2, 4, 6 - order = [time, cpuid, events, value, percent] - header.insert(1, "cpu") - elif CGROUPS == "enabled": - cgroups, percent = 4, 6 - order = [time, cgroups, events, value, percent] - header.insert(1, "cgroup") - - # Slice DF into time chunks and process - group_df = df[order].groupby(time, sort=False) - last_sample_time, samples = 0, 0 # Set time variables for iteration - for key, item in group_df: # Key is time - df = group_df.get_group(key) - df.columns = header # Assign header - if use_epoch: - ctime = int(key) + epoch + crash("Suitable metric file not found") + + # Convert path of json file to relative path if being packaged by pyInstaller into a binary + if getattr(sys, "frozen", False): + basepath = getattr(sys, "_MEIPASS", os.path.dirname(os.path.abspath(__file__))) + metric_file = os.path.join(basepath, metric_file) + elif __file__: + metric_file = script_path + "/events/" + metric_file + else: + crash("Unknown application type") + return metric_file + + +def validate_file(fname): + if not os.access(fname, os.R_OK): + crash(str(fname) + " not accessible") + + +def get_metrics_formula(architecture): + # get the metric file name based on architecture + metric_file = get_metric_file_name(architecture) + validate_file(metric_file) + + with open(metric_file, "r") as f_metric: + try: + metrics = json.load(f_metric) + for m in metrics: + m["events"] = re.findall(r"\[(.*?)\]", m["expression"]) + + return metrics + except json.decoder.JSONDecodeError: + crash("Invalid JSON, please provide a valid JSON as metrics file") + return + + +def get_socket_number(sockets_dict, core): + core_index = core.replace("CPU", "") + for s in range(len(sockets_dict)): + if core_index in sockets_dict[s]: + return s + return + + +def extract_dataframe(perf_data_lines, meta_data, perf_mode): + # parse event data into dataframe and set header names + perf_data_df = pd.DataFrame(perf_data_lines) + if "CGROUPS" in meta_data and meta_data["CGROUPS"] == "enabled": + # 1.001044566,6261968509,,L1D.REPLACEMENT,/system.slice/docker-826c1c9de0bde13b0c3de7c4d96b38710cfb67c2911f30622508905ece7e0a16.scope,6789274819,5.39,, + assert len(perf_data_df.columns) >= 7 + columns = [ + "ts", + "value", + "col0", + "metric", + "cgroup", + "col1", + "percentage", + ] + # add dummy col names for remaining columns + for col in range(7, len(perf_data_df.columns)): + columns.append("col" + str(col)) + perf_data_df.columns = columns + elif perf_mode == Mode.System: + # Ubuntu 16.04 returns 6 columns, later Ubuntu's and other OS's return 8 columns + assert len(perf_data_df.columns) >= 6 + columns = [ + "ts", + "value", + "col0", + "metric", + "value2", + "percentage", + ] + # add dummy col names for remaining columns + for col in range(6, len(perf_data_df.columns)): + columns.append("col" + str(col)) + perf_data_df.columns = columns + elif perf_mode == Mode.Core or perf_mode == Mode.Socket: + assert len(perf_data_df.columns) >= 7 + columns = [ + "ts", + "cpu", + "value", + "col0", + "metric", + "value2", + "percentage", + ] + # add dummy col names for remaining columns + for col in range(7, len(perf_data_df.columns)): + columns.append("col" + str(col)) + perf_data_df.columns = columns + # Add socket column + perf_data_df["socket"] = perf_data_df.apply( + lambda x: "S" + str(get_socket_number(meta_data["SOCKET_CORES"], x["cpu"])), + axis=1, + ) + + # fix metric name X.1, X.2, etc -> just X + perf_data_df["metric"] = perf_data_df.apply( + lambda x: ".".join(x["metric"].split(".")[:-1]) + if len(re.findall(r"^[0-9]*$", x["metric"].split(".")[-1])) > 0 + else x["metric"], + axis=1, + ) + + # set data frame types + # perf_data_df = perf_data_df.astype({'value': 'float'}) + perf_data_df["value"] = pd.to_numeric( + perf_data_df["value"], errors="coerce" + ).fillna(0) + # perf_data_df = perf_data_df.astype({'value2': 'float'}) + # perf_data_df = perf_data_df.astype({"percentage":"float"}) + + return perf_data_df + + +# get group data frame after grouping +def get_group_df(time_slice_df, start_index, end_of_group_index, perf_mode): + g_df = time_slice_df[start_index:end_of_group_index] + if perf_mode == Mode.System: + g_df = g_df[["metric", "value"]].groupby("metric")["value"].sum().to_frame() + elif perf_mode == Mode.Socket: + if "socket" in g_df: + g_df = ( + g_df[["metric", "socket", "value"]] + .groupby(["metric", "socket"])["value"] + .sum() + .to_frame() + ) else: - ctime = samples + 1 - precise_time = float(key) - last_sample_time - # Write back header to outcsv - if PERCORE_MODE: - if last_sample_time == 0: # Extracts header - eventnames = ["time"] + ( - df["event"] + "." + df["cpu"].str.replace("CPU", "") - ).tolist() - outcsv[fkey].writerow(eventnames) - eventvalues = ( - [ctime] - + ( - pd.to_numeric(df["value"], errors="coerce").fillna(0) / precise_time - ).to_list() - + pd.to_numeric(df["percent"], errors="coerce").fillna(0).to_list() - ) # Extracts values - outcsv[fkey].writerow(eventvalues) - elif CGROUPS == "enabled": - cgroup_df = df.groupby("cgroup", sort=False) - for ckey, citem in cgroup_df: - df = cgroup_df.get_group(ckey) - if last_sample_time == 0: # Extracts header - eventnames = ( - ["time"] - + df["event"].tolist() - + [x + " %sample" for x in df["event"].tolist()] - ) - outcsv[CGROUP_HASH[ckey]].writerow(eventnames) - eventvalues = ( - [ctime] - + ( - pd.to_numeric(df["value"], errors="coerce").fillna(0) - / precise_time - ).to_list() - + pd.to_numeric(df["percent"], errors="coerce").fillna(0).to_list() - ) # Format event values + percent - outcsv[CGROUP_HASH[ckey]].writerow(eventvalues) + crash("No socket information found, exiting...") + elif perf_mode == Mode.Core: # check dataframe has cpu colmn, otherwise raise error + if "cpu" in g_df: + g_df = ( + g_df[["metric", "cpu", "value"]] + .groupby(["metric", "cpu"])["value"] + .sum() + .to_frame() + ) else: - if last_sample_time == 0: # Extracts header - eventnames = ( - ["time"] - + df["event"].tolist() - + [x + " %sample" for x in df["event"].tolist()] - ) - outcsv[fkey].writerow(eventnames) - eventvalues = ( - [ctime] - + ( - pd.to_numeric(df["value"], errors="coerce").fillna(0) / precise_time - ).to_list() - + pd.to_numeric(df["percent"], errors="coerce").fillna(0).to_list() - ) # Extracts values - outcsv[fkey].writerow(eventvalues) - - last_sample_time = float(key) - samples += 1 - if CGROUPS == "enabled": - for val in CGROUP_HASH.values(): - f_out[val].close() + crash("No CPU information found, exiting...") + + return g_df + + +def get_event_expression_from_group( + expressions_to_evaluate, event_df, exp_to_evaluate, event +): + if event_df.shape == (1,): # system wide + if "sys" not in expressions_to_evaluate: + expressions_to_evaluate["sys"] = exp_to_evaluate.replace( + "[" + event + "]", str(event_df[0]) + ) + else: + expressions_to_evaluate["sys"] = expressions_to_evaluate["sys"].replace( + "[" + event + "]", str(event_df[0]) + ) else: - f_out[fkey].close() - return samples - - -# core level accumulation -def write_core_view(): - core_file = get_extra_out_file(out_metric_file, "cr") - f_out = open(core_file, "w") - outcsv = csv.writer(f_out, dialect="excel") - f_in = open(time_dump_file, "r") - incsv = csv.reader(f_in, delimiter=",") - rowcount = 0 - names = [] - idxs = [] - events, _ = get_perf_events(2) - sumrow = [] - for row in incsv: - if not row: - continue - if not rowcount: - for i, event in enumerate(row): - id_len = len(event.split(".")[-1]) - name = event[: len(event) - id_len - 1] - - if name in events: - names.append(event) - idxs.append(i) # store indexes of input file - rowcount = rowcount + 1 - sumrow = [0.0] * len(names) - continue - for i, idx in enumerate(idxs): - sumrow[i] += float(row[idx]) - rowcount += 1 - - # summary/raw file. format: - # metrics, c0, c1, c2 .. - # name_of_metric, val0, val1, val2 .. - first_row = ["metrics"] - core_count = get_online_corecount() - for i in range(core_count): - first_row.append("C" + str(i)) - outcsv.writerow(first_row) - if EXCEL_OUT: - OUT_WORKBOOK.writerow(0, first_row, "cr") - out_idx = 1 - tempsum = [0.0] * (core_count) - for i in range(len(sumrow)): - core_id = i % core_count - tempsum[core_id] = int(sumrow[i] / rowcount) - if core_id == core_count - 1: - temprow = [] - name_len = len(names[i]) - len(names[i].split(".")[-1]) - 1 - temprow.append((names[i])[:name_len]) - for s in tempsum: - temprow.append(str(s)) - outcsv.writerow(temprow) - if EXCEL_OUT: - OUT_WORKBOOK.writerow(out_idx, temprow, "cr") - out_idx += 1 - f_out.close() - f_in.close() - - -# for storing column indicies for socket view -class persocket_idx: - def __init__(self, name, idx): - self.name = name - self.idx = idx - - def display(self): - log.info(self.name) - log.info(self.idx) - - def getidx(self): - return self.idx - - def getname(self): - return self.name - - def append(self, level): - for i, val in enumerate(level): - if len(val): - self.idx[i].extend(val) - - -# create socketlevel accumulation -def write_socket_view(level, samples): - global SOCKET_CORES - global EVENT_GROUPING - global EXCEL_OUT - socket_count = len(SOCKET_CORES) - - f_out = open(tmp_socket_file, "w") - outcsv = csv.writer(f_out, dialect="excel") - f_in = open(time_dump_file, "r") - incsv = csv.reader(f_in, delimiter=",") - - row_count = 0 - prev_event_name = "" - outrow0 = [] - mappings = [] - sumrow = [] - writeoutput = True - - for inrow in incsv: - if not inrow: - continue - rowlen = len(inrow) - if row_count == 0: - core_to_idx = [] - for i, name in enumerate(inrow): - if i == 0: - # first column is time - outrow0.append(name) - continue - tmp = name.split(".") - coreid = (name.split("."))[-1] - namelen = len(name) - len(coreid) - 1 - name = name[:namelen] - if name.startswith("UNC") and EVENT_GROUPING: - namelen = len(name) - len(tmp[-2]) - 1 - name = name[:namelen] - - # flushout the indicies to mapping - # new event starting, push it to output list - if name != prev_event_name or i == (rowlen - 1): - if len(core_to_idx): - if i == (rowlen - 1): - for s, cores in enumerate(SOCKET_CORES): - if coreid in cores: - core_to_idx[s].append(i) - break - present = False - if name.startswith("UNC") and EVENT_GROUPING: - for m in mappings: - if m.getname() == prev_event_name: - m.append(core_to_idx) - present = True - break - # add to mapping if not present, or it is the last uncore event (assuming all core events come before the uncore events) - if (not present) or ( - name.startswith("UNC") - and not prev_event_name.startswith("UNC") - ): - mapping = persocket_idx(prev_event_name, core_to_idx) - mappings.append(mapping) - ename = mapping.getname() - for s in range(socket_count): - outrow0.append(ename + "." + str(s)) - core_to_idx = [] - for s in range(socket_count): - core_to_idx.append([]) - if i == (rowlen - 1): - outcsv.writerow(outrow0) - break - - prev_event_name = name - - for s, cores in enumerate(SOCKET_CORES): - if coreid in cores: - core_to_idx[s].append(i) - break - - row_count = row_count + 1 - if len(outrow0) != (len(mappings) * socket_count + 1): - log.error( - "something wrong in socket view processing %d %d" - % (len(outrow0), len(mappings)) - ) - sys.exit() - continue - - outrow = [0.0] * len(outrow0) - sumrow = [0.0] * len(outrow0) - prev_inrow = [0.0] * rowlen - for i, name in enumerate(outrow0): - if not i: - outrow[i] = inrow[i] + for index, value in event_df.iterrows(): + if index not in expressions_to_evaluate: + expressions_to_evaluate[index] = exp_to_evaluate + expressions_to_evaluate[index] = expressions_to_evaluate[index].replace( + "[" + event + "]", str(value[0]) + ) + return + + +def generate_metrics_time_series(time_series_df, perf_mode, out_file_path): + time_series_df_T = time_series_df.T + time_series_df_T.index.name = "time" + metric_file_name = "" + if perf_mode == Mode.System: + metric_file_name = get_extra_out_file(out_file_path, "m") + if perf_mode == Mode.Socket: + metric_file_name = get_extra_out_file(out_file_path, "s") + + if perf_mode == Mode.Core: + metric_file_name = get_extra_out_file(out_file_path, "c") + # generate metrics with time indexes + time_series_df_T.to_csv(metric_file_name) + return + + +def generate_metrics_averages(time_series_df, perf_mode, out_file_path): + average_metric_file_name = "" + if perf_mode == Mode.System: + average_metric_file_name = get_extra_out_file(out_file_path, "a") + if perf_mode == Mode.Socket: + average_metric_file_name = get_extra_out_file(out_file_path, "sa") + if perf_mode == Mode.Core: + average_metric_file_name = get_extra_out_file(out_file_path, "ca") + + time_series_df.index.name = "metrics" + avgcol = time_series_df.mean(numeric_only=True, axis=1).to_frame().reset_index() + p95col = time_series_df.quantile(q=0.95, axis=1).to_frame().reset_index() + mincol = time_series_df.min(axis=1).to_frame().reset_index() + maxcol = time_series_df.max(axis=1).to_frame().reset_index() + # define columns headers + avgcol.columns = ["metrics", "avg"] + p95col.columns = ["metrics", "p95"] + mincol.columns = ["metrics", "min"] + maxcol.columns = ["metrics", "max"] + # merge columns + time_series_df = time_series_df.merge(avgcol, on="metrics", how="outer") + time_series_df = time_series_df.merge(p95col, on="metrics", how="outer") + time_series_df = time_series_df.merge(mincol, on="metrics", how="outer") + time_series_df = time_series_df.merge(maxcol, on="metrics", how="outer") + + time_series_df[["metrics", "avg", "p95", "min", "max"]].to_csv( + average_metric_file_name, index=False + ) + return + + +def log_skip_metric(metric, instance, msg): + log.warning( + msg + + ': metric "' + + metric["name"] + + '" expression "' + + metric["expression"] + + '" values "' + + instance + + '"' + ) + + +def generate_metrics( + perf_data_df, + out_file_path, + group_to_event, + metadata, + metrics, + perf_mode, + verbose=False, +): + time_slice_groups = perf_data_df.groupby("ts", sort=False) + time_metrics_result = {} + for time_slice, item in time_slice_groups: + time_slice_df = time_slice_groups.get_group(time_slice) + current_group_indx = 0 + group_to_df = {} + start_index = 0 + end_of_group_index = 0 + for index, row in time_slice_df.iterrows(): + if row["metric"] in event_groups["group_" + str(current_group_indx)]: + end_of_group_index += 1 continue + else: # move to next group + group_to_df["group_" + str(current_group_indx)] = get_group_df( + time_slice_df, start_index, end_of_group_index, perf_mode + ) + start_index = end_of_group_index + end_of_group_index += 1 + current_group_indx += 1 + # add last group + group_to_df["group_" + str(current_group_indx)] = get_group_df( + time_slice_df, start_index, time_slice_df.shape[0], perf_mode + ) - socket_id = int((name.split("."))[-1]) - mapping_idx = int((i - 1) / socket_count) - mapping = mappings[mapping_idx] - - indices = mapping.getidx() - for idx in indices[socket_id]: - if float(inrow[idx]) >= 0.0: - outrow[i] = outrow[i] + float(inrow[idx]) - else: # invalid perf stat, drop the values if last sample, else use the previous values - if row_count == samples: - writeoutput = False - outrow[i] = outrow[i] + float(prev_inrow[idx]) - sumrow[i] += outrow[i] - - if writeoutput: - outcsv.writerow(outrow) - row_count = row_count + 1 - prev_inrow = inrow - - # summary/raw file - if not level: - return - sum_file = get_extra_out_file(out_metric_file, "sr") - f_sum = open(sum_file, "w") - sumcsv = csv.writer(f_sum, dialect="excel") - first_row = ["metrics"] - for s in range(int(CONST_SOCKET_COUNT)): - first_row.append("S" + str(s)) - sumcsv.writerow(first_row) - if EXCEL_OUT: - OUT_WORKBOOK.writerow(0, first_row, "sr") - tempsum = [0.0] * (int(CONST_SOCKET_COUNT)) - out_idx = 1 - for i in range(len(sumrow)): - if not i: - continue - socket_id = (i - 1) % int(CONST_SOCKET_COUNT) - tempsum[socket_id] = int(sumrow[i] / row_count) - if socket_id == int(CONST_SOCKET_COUNT) - 1: - temprow = [] - temprow.append((outrow0[i])[:-2]) - for s in tempsum: - temprow.append(str(s)) - sumcsv.writerow(temprow) - if EXCEL_OUT: - OUT_WORKBOOK.writerow(out_idx, temprow, "sr") - out_idx += 1 - - f_sum.close() - - -# write system view from socket level data -def write_socket2system(): - f_in = open(tmp_socket_file, "r") - incsv = csv.reader(f_in, delimiter=",") - f_out = open(output_file, "w") - outcsv = csv.writer(f_out, dialect="excel") - - firstrow = True - outrow0 = [] - outrow = [] - rowlen = 0 - sumrow = [] - entries = 0 - for row in incsv: - if not row: - continue - idx = 0 - if firstrow: - rowlen = int((len(row) - 1) / int(CONST_SOCKET_COUNT)) + 1 - outrow0 = [""] * rowlen - for i, name in enumerate(row): - if i == 0: - outrow0[idx] = name - idx += 1 - elif ((i - 1) % int(CONST_SOCKET_COUNT)) == ( - int(CONST_SOCKET_COUNT) - 1 - ): - outrow0[idx] = name[:-2] - idx += 1 - outcsv.writerow(outrow0) - sumrow = [0.0] * rowlen - firstrow = False - continue - - outrow = [0.0] * rowlen - for i, val in enumerate(row): - if i == 0: - outrow[idx] = val - totalval = 0.0 - idx += 1 - elif ((i - 1) % int(CONST_SOCKET_COUNT)) == (int(CONST_SOCKET_COUNT) - 1): - totalval += float(val) - outrow[idx] = str(totalval) - sumrow[idx] += totalval - totalval = 0.0 - idx += 1 - else: - totalval += float(val) - outcsv.writerow(outrow) - entries += 1 - - f_sum = open(get_extra_out_file(out_metric_file, "r"), "w") - sumcsv = csv.writer(f_sum, dialect="excel") - sumcsv.writerow(["metrics", "avg"]) - if EXCEL_OUT: - OUT_WORKBOOK.writerow(0, ["metrics", "avg"], "r") - for i in range(rowlen - 1): - sumrow[i + 1] = sumrow[i + 1] / entries - sumcsv.writerow([outrow0[i + 1], str(sumrow[i + 1])]) - if EXCEL_OUT: - OUT_WORKBOOK.writerow(i + 1, [outrow0[i + 1], str(sumrow[i + 1])], "r") - f_sum.close() - f_out.close() - f_in.close() - - -# combine per cha/imc counters from tmp output to systemview -def write_system_view(infile, outfile): - f_out = open(outfile, "w") - outcsv = csv.writer(f_out, dialect="excel") - f_tmp = open(infile, "r") - tmpcsv = csv.reader(f_tmp, delimiter=",") - row_count = 0 - out_row0 = [] - out_row = [] - sum_row = [] - final_out_row = [] - final_out_row0 = [] - prev_out_row = [] - disabled_events = [] - for in_row in tmpcsv: - if not in_row: - continue - if row_count == 0: - in_row0 = in_row[:] - - for i, event in enumerate(in_row0): - if event.endswith("%sample"): - break - # cumulative sum for uncore event counters - if event.startswith("UNC"): - id_idx_start = event.rfind(".") - # save row0 event name from the first uncore event - if row_count == 0: - if event[id_idx_start + 1 :].isdigit(): - if event.endswith(".0") and event[:-2] not in out_row0: - out_row0.append(event[:-2]) - else: # grouping disabled case: disaggregated uncore events will have the same name - if event not in out_row0: - out_row0.append(event) - else: - # FIX ME: assumes each uncore event occur only once in the event file - if event[id_idx_start + 1 :].isdigit(): - unc_event = event[:id_idx_start] - # core_id=int(event[id_idx_start+1:]) - # FIX ME: some CPUs will have more cha than core count (if high core count die converted to gold) - # if core_id >= CONST_CORE_COUNT: - # continue - idx = out_row0.index(unc_event) - out_row[idx] += float(in_row[in_row0.index(event)]) - else: # grouping disabled case - idx = out_row0.index(event) - out_row[idx] += float(in_row[i]) - else: - if row_count == 0: - out_row0.append(event) - else: - if out_row0.count(event) > 1: - for j, e in enumerate(out_row0): - if e == event and out_row[j] == 0: - out_row[j] = in_row[i] - break - else: - out_row[out_row0.index(event)] = in_row[i] - - # out_row[out_row0.index(event)]=in_row[in_row0.index(event)] - if row_count > 0: - for i, val in enumerate(out_row): - if float(val) >= 0.0: - final_out_row.append(val) - if row_count == 1: - final_out_row0.append(out_row0[i]) + metrics_results = {} + for m in metrics: + non_constant_mertics = [] + exp_to_evaluate = m["expression"] + # substitute constants + for event in m["events"]: + if ( + event.upper() in metadata["constants"] + ): # all constants are save in metadata in Uppercase + exp_to_evaluate = exp_to_evaluate.replace( + "[" + event + "]", str(metadata["constants"][event.upper()]) + ) else: - if row_count == 1: - disable_event(i - 1) - disabled_events.append(out_row0[i]) - # too late to disable events - else: - if len(disabled_events) and (out_row0[i] in disabled_events): - val = 0 - else: - log.error( - "Warning: Invalid value found for %s counter at interval %d (defaults to previous count)" - % (out_row0[i], row_count + 1) + non_constant_mertics.append(event) + + # find a single group with the events + single_group = False + for g in group_to_event: + if set(non_constant_mertics) <= set( + group_to_event[g] + ): # if all events in metric m exist in group g + single_group = True + g_df = group_to_df[g] + expressions_to_evaluate = {} + for event in non_constant_mertics: + event_df = g_df.loc[event] + get_event_expression_from_group( + expressions_to_evaluate, event_df, exp_to_evaluate, event + ) + for instance in expressions_to_evaluate: + if ( + "[" in expressions_to_evaluate[instance] + or "]" in expressions_to_evaluate[instance] + ): + if verbose: + log_skip_metric( + m, + expressions_to_evaluate[instance], + "MISSING DATA", + ) + continue # cannot evaluate expression, skipping + try: + result = str( + "{:.8f}".format( + simple_eval( + expressions_to_evaluate[instance], + functions={"min": min, "max": max}, + ) + ) ) - val = prev_out_row[i] - final_out_row.append(val) - if row_count == 1: - outcsv.writerow(final_out_row0) - sum_row = [0.0] * len(final_out_row0) - outcsv.writerow(final_out_row) - for j in range(len(final_out_row0) - 1): - try: - sum_row[j + 1] += float(final_out_row[j + 1]) - except IndexError: - log.error( - "event=%s, j=%d, len=%d " - % (final_out_row0[j], j, len(final_out_row)) - ) - prev_out_row = final_out_row - final_out_row = [] - - # if row_count==0: - # outcsv.writerow(out_row0) - # sum_row=[0.0]*len(out_row0) - # else: - # outcsv.writerow(out_row) - # for j in range(len(out_row0)-1): - # sum_row[j+1]+=float(out_row[j+1]) - out_row = [0] * len(out_row0) - row_count += 1 - - f_out.close() - f_tmp.close() - - sum_file = get_extra_out_file(out_metric_file, "r") - f_sum = open(sum_file, "w") - sumcsv = csv.writer(f_sum, dialect="excel") - sumcsv.writerow(["metrics", "avg"]) - if EXCEL_OUT: - OUT_WORKBOOK.writerow(0, ["metrics", "avg"], "r") - - for i in range(len(sum_row) - 1): - sumcsv.writerow([final_out_row0[i + 1], int(sum_row[i + 1] / row_count)]) - if EXCEL_OUT: - OUT_WORKBOOK.writerow( - i + 1, [final_out_row0[i + 1], int(sum_row[i + 1] / row_count)], "r" - ) - f_sum.close() - - -# delete given file -def deletefile(tempfile): - if os.path.isfile(tempfile): - os.remove(tempfile) - - -# cleanup temp files -def cleanup(): - for file in time_dump_files: - deletefile(file) - deletefile(time_dump_file) - deletefile(output_file) - for file in output_files: - deletefile(file) - deletefile(tmp_socket_file) - if EXCEL_OUT: - tempfile = get_extra_out_file(out_metric_file, "r") - deletefile(tempfile) - tempfile = get_extra_out_file(out_metric_file, "a") - deletefile(tempfile) - tempfile = get_extra_out_file(out_metric_file, "s") - deletefile(tempfile) - tempfile = get_extra_out_file(out_metric_file, "sr") - deletefile(tempfile) - tempfile = get_extra_out_file(out_metric_file, "sa") - deletefile(tempfile) - tempfile = get_extra_out_file(out_metric_file, "c") - deletefile(tempfile) - tempfile = get_extra_out_file(out_metric_file, "cr") - deletefile(tempfile) - tempfile = get_extra_out_file(out_metric_file, "ca") - deletefile(tempfile) - tempfile = out_metric_file[:-4] + "csv" - deletefile(tempfile) - tmpdir = script_path + "/_tmp_perf_" - os.rmdir(tmpdir) - - -# restrict joining path to same directories -def is_safe_path(base_dir, path, follow_symlinks=True): - if follow_symlinks: - match = os.path.realpath(path).startswith(base_dir) - else: - match = os.path.abspath(path).startswith(base_dir) - return base_dir == os.path.commonpath((base_dir, match)) + except ZeroDivisionError: + if verbose: + log_skip_metric( + m, + expressions_to_evaluate[instance], + "ZERO DIVISION", + ) + result = 0 + sub_txt = "" if instance == "sys" else "." + instance + metrics_results[m["name"] + sub_txt] = float(result) + break # no need to check other groups + if not single_group: + if verbose: + log.warning('MULTIPLE GROUPS: metric "' + m["name"] + '"') + # get events from multiple groups + remaining_events_to_find = list(non_constant_mertics) + expressions_to_evaluate = {} + for event in non_constant_mertics: + for g in group_to_event: + if event in group_to_event[g]: + remaining_events_to_find.remove(event) + g_df = group_to_df[g] + event_df = g_df.loc[event] + get_event_expression_from_group( + expressions_to_evaluate, + event_df, + exp_to_evaluate, + event, + ) + break # no need to check in other groups + + if len(remaining_events_to_find) == 0: # all events are found + for ( + instance + ) in ( + expressions_to_evaluate + ): # instance is either system, specific core, or specific socket + if ( + "[" in expressions_to_evaluate[instance] + or "]" in expressions_to_evaluate[instance] + ): + if verbose: + log_skip_metric( + m, + expressions_to_evaluate[instance], + "MISSING DATA", + ) + continue + try: + result = str( + "{:.8f}".format( + simple_eval( + expressions_to_evaluate[instance], + functions={"min": min, "max": max}, + ) + ) + ) + except ZeroDivisionError: + if verbose: + log_skip_metric( + m, + expressions_to_evaluate[instance], + "ZERO DIVISION", + ) + result = 0 + sub_txt = "" if instance == "sys" else "." + instance + metrics_results[m["name"] + sub_txt] = float(result) + else: # some events are missing + if verbose: + log.warning( + 'MISSING EVENTS: metric "' + + m["name"] + + '" events "' + + str(remaining_events_to_find) + + '"' + ) + continue # skip metric + time_metrics_result[time_slice] = metrics_results + time_series_df = pd.DataFrame(time_metrics_result) + generate_metrics_time_series(time_series_df, perf_mode, out_file_path) + generate_metrics_averages(time_series_df, perf_mode, out_file_path) + return -if __name__ == "__main__": - parser = ArgumentParser(description="perf-postprocess: perf post process") - parser.add_argument( - "--version", "-V", help="display version information", action="store_true" + +def generate_raw_events_system_wide(perf_data_df, out_file_path): + perf_data_df_system_raw = ( + perf_data_df[["metric", "value"]].groupby("metric")["value"].sum().to_frame() ) - parser.add_argument( - "-m", - "--metricfile", - type=str, - default=None, - help="formula file, default metric file for the architecture", + last_time_stamp = float(perf_data_df["ts"].tail(1).values[0]) + # average per second. Last time stamp = total collection duration in seconds + perf_data_df_system_raw["avg"] = np.where( + perf_data_df_system_raw["value"] > 0, + perf_data_df_system_raw["value"] / last_time_stamp, + 0, ) - parser.add_argument( - "-o", - "--outfile", - type=str, - default=out_metric_file, - help="perf stat outputs in csv format, default=results/metric_out.csv", - ) - parser.add_argument( - "--persocket", help="generate per socket metrics", action="store_true" + + sys_raw_file_name = get_extra_out_file(out_file_path, "r") + perf_data_df_system_raw["avg"].to_csv(sys_raw_file_name) + + return + + +def generate_raw_events_socket(perf_data_df, out_file_path): + # print raw values persocket + perf_data_df_scoket_raw = ( + perf_data_df[["metric", "socket", "value"]] + .groupby(["metric", "socket"])["value"] + .sum() + .to_frame() ) - parser.add_argument( - "--percore", help="generate per core metrics", action="store_true" + last_time_stamp = float(perf_data_df["ts"].tail(1).values[0]) + perf_data_df_scoket_raw["avg"] = np.where( + perf_data_df_scoket_raw["value"] > 0, + perf_data_df_scoket_raw["value"] / last_time_stamp, + 0, ) - parser.add_argument( - "-v", - "--verbose", - help="include debugging information, keeps all intermediate csv files", - action="store_true", + + metric_per_socket_frame = pd.pivot_table( + perf_data_df_scoket_raw, + index="metric", + columns="socket", + values="avg", + fill_value=0, ) - parser.add_argument( - "--epoch", - help="time series in epoch format, default is sample count", - action="store_true", + + socket_raw_file_name = get_extra_out_file(out_file_path, "sr") + metric_per_socket_frame.to_csv(socket_raw_file_name) + + return + + +def generate_raw_events_percore(perf_data_df, out_file_path): + # print raw values percore + perf_data_df_core_raw = ( + perf_data_df[["metric", "cpu", "value"]] + .groupby(["metric", "cpu"])["value"] + .sum() + .to_frame() ) - parser.add_argument( - "-html", - "--html", - type=str, - default=None, - help="Static HTML report", + last_time_stamp = float(perf_data_df["ts"].tail(1).values[0]) + perf_data_df_core_raw["avg"] = np.where( + perf_data_df_core_raw["value"] > 0, + perf_data_df_core_raw["value"] / last_time_stamp, + 0, ) - required_arg = parser.add_argument_group("required arguments") - required_arg.add_argument( - "-r", - "--rawfile", - type=str, - default=None, - help="Raw CSV output from perf-collect", + + metric_per_cpu_frame = pd.pivot_table( + perf_data_df_core_raw, index="metric", columns="cpu", values="avg", fill_value=0 ) + # drop uncore and power metrics + to_drop = [] + for metric in metric_per_cpu_frame.index: + if metric.startswith("UNC_") or metric.startswith("power/"): + to_drop.append(metric) + metric_per_cpu_frame.drop(to_drop, inplace=True) - args = parser.parse_args() + core_raw_file_name = get_extra_out_file(out_file_path, "cr", excelsheet=False) + metric_per_cpu_frame.to_csv(core_raw_file_name) - if args.version: - print(perf_helpers.get_tool_version()) - sys.exit(0) + return + + +def generate_raw_events(perf_data_df, out_file_path, perf_mode): + if perf_mode.System: + generate_raw_events_system_wide(perf_data_df, out_file_path) + elif perf_mode.Socket: + generate_raw_events_socket(perf_data_df, out_file_path) + elif perf_mode.Core: + generate_raw_events_percore(perf_data_df, out_file_path) - if not len(sys.argv) > 2: - parser.print_help() - sys.exit() +if __name__ == "__main__": script_path = os.path.dirname(os.path.realpath(__file__)) - # fix the pyinstaller path if "_MEI" in script_path: script_path = script_path.rsplit("/", 1)[0] + # Parse arguments and check validity + args = get_args(script_path) + input_file_path = args.rawfile + out_file_path = args.outfile + + # read all metadata, perf evernts, and perf data lines + # Note: this might not be feasible for very large files + meta_data_lines, perf_event_lines, perf_data_lines = get_all_data_lines( + input_file_path + ) - temp_dir = script_path + "/_tmp_perf_" - # create tmp dir - if not os.path.exists(temp_dir): - os.mkdir(temp_dir) - if args.rawfile is None: - parser.print_usage() - raise SystemExit( - "Missing raw file, please provide raw csv generated using perf-collect" - ) - dat_file = args.rawfile - # default output file - if args.outfile == out_metric_file: - res_dir = script_path + "/results" - if not os.path.exists(res_dir): - os.mkdir(res_dir) - perf_helpers.fix_path_ownership(res_dir) - if args.outfile: - out_metric_file = args.outfile - html_input = out_metric_file.split("/")[-1] - if "/" in out_metric_file: - res_dir = out_metric_file.rpartition("/")[0] - else: - res_dir = script_path - - if args.metricfile: - metric_file = args.metricfile - if dat_file and not os.path.isfile(dat_file): - parser.print_help() - raise SystemExit("perf raw data file not found, please provide valid raw file") - - if not perf_helpers.validate_outfile(args.outfile, True): - raise SystemExit( - "Output filename not accepted. Filename should be a .csv without special characters" - ) - if not perf_helpers.check_file_writeable(args.outfile): - raise SystemExit("Output file %s not writeable " % args.outfile) - if (args.outfile).endswith("xlsx"): - try: - import xlsxwriter - except ImportError: - raise SystemExit( - "xlsxwriter not found to generate excel output. Install xlsxwriter or use .csv" - ) - EXCEL_OUT = True - if args.html: - if not args.html.endswith(".html"): - raise SystemExit( - args.html + " isn't a valid html file name, .html files are accepted" - ) - - # parse header - get_metadata() - zero_division_errcount = 0 - total_samples = 0 - if not metric_file: - if CONST_ARCH == "broadwell": - metric_file = "metric_bdx.json" - elif CONST_ARCH == "skylake" or CONST_ARCH == "cascadelake": - metric_file = "metric_skx_clx.json" - elif CONST_ARCH == "icelake": - metric_file = "metric_icx.json" - elif CONST_ARCH == "sapphirerapids": - metric_file = "metric_spr.json" - else: - raise SystemExit("Suitable metric file not found") - - # Convert path of json file to relative path if being packaged by pyInstaller into a binary - if getattr(sys, "frozen", False): - basepath = getattr( - sys, "_MEIPASS", os.path.dirname(os.path.abspath(__file__)) + # parse metadata and get mode (system, socket, or core) + meta_data = get_metadata_as_dict(meta_data_lines) + perf_mode = Mode.System + if "PERSOCKET_MODE" in meta_data and meta_data["PERSOCKET_MODE"]: + perf_mode = Mode.Socket + elif "PERCORE_MODE" in meta_data and meta_data["PERCORE_MODE"]: + perf_mode = Mode.Core + + # set const TSC accoding to perf_mode + set_CONST_TSC(meta_data, perf_mode) + + # parse event groups + event_groups = get_event_groups(perf_event_lines) + + # extract data frame + perf_data_df = extract_dataframe(perf_data_lines, meta_data, perf_mode) + + # parse metrics expressions + metrics = get_metrics_formula(meta_data["constants"]["CONST_ARCH"]) + + if args.rawevents: # generate raw events for system, persocket and percore + generate_raw_events(perf_data_df, out_file_path, perf_mode) + + # generate metrics for each cgroup + if "CGROUPS" in meta_data and meta_data["CGROUPS"] == "enabled": + for cid in meta_data["CGROUP_HASH"]: + cid_perf_data_df = perf_data_df[perf_data_df["cgroup"] == cid] + cid_out_file_path = ( + out_file_path.rsplit(".csv", 1)[0] + + "_" + + meta_data["CGROUP_HASH"][cid] + + ".csv" ) - if is_safe_file(metric_file, ".json"): - metric_file = os.path.join(basepath, metric_file) - elif __file__: - metric_file = script_path + "/events/" + metric_file - else: - raise SystemExit("Unknown application type") - - if not os.path.isfile(metric_file): - raise SystemExit("metric file not found %s" % metric_file) - - percore_output = False - persocket_output = False - # check if detailed socket and core level data can be generated - if args.percore or args.persocket: - if PERCORE_MODE: - persocket_output = True - if args.percore: - percore_output = True - else: - log.warning( - "Generating system level data only. Run perf-collect with --percore to generate socket/core level data." + generate_metrics( + cid_perf_data_df, + cid_out_file_path, + event_groups, + meta_data, + metrics, + perf_mode, + args.verbose, ) - - if EXCEL_OUT: - OUT_WORKBOOK.initialize(args.outfile, persocket_output, percore_output) - - samples = write_perf_tmp_output(args.epoch) - # levels: 0->system 1->socket 2->core - if percore_output or persocket_output: - write_socket_view(1, samples) - if load_metrics(None, None, level=1): - write_summary(1) - if percore_output: - write_core_view() - if load_metrics(None, None, level=2): - write_summary(2) - write_socket2system() - else: - if PERCORE_MODE: - write_socket_view(0, samples) - write_socket2system() - else: - if CGROUPS == "enabled": - for infile in time_dump_files: - outfile = ( - script_path - + "/_tmp_perf_/tmp_perf_out_" - + infile.split("_")[-1] - ) - output_files.append(outfile) - write_system_view(infile, outfile) - else: - infile = time_dump_file - outfile = output_file - write_system_view(infile, outfile) - - # Load metrics from raw data and summarize - if CGROUPS == "enabled": - for infile in output_files: - outfile = script_path + "/results/metric_out_" + infile.split("_")[-1] - out_metric_files.append(outfile) - load_metrics(infile, outfile, level=0) - write_cgroup_summary() - # if load_metrics(infile, outfile, level=0): - # write_summary(outfile,level=0) + log.info("Generated results file(s) in: " + out_file_path.rsplit("/", 1)[0]) + if args.html: + report.write_html( + cid_out_file_path, + perf_mode, + meta_data["constants"]["CONST_ARCH"], + args.html.replace( + ".html", "_" + meta_data["CGROUP_HASH"][cid] + ".html" + ), + ) + # generate metrics for system, persocket or percore else: - if load_metrics(None, None, level=0): - write_summary() - if not args.verbose: - cleanup() - if EXCEL_OUT: - OUT_WORKBOOK.close() - try: - res_dir - perf_helpers.fix_path_ownership(res_dir, True) - except NameError: - pass - if zero_division_errcount > 0: - log.warning( - str(zero_division_errcount) - + " samples discarded, and " - + str(total_samples) - + " samples were used" + generate_metrics( + perf_data_df, + out_file_path, + event_groups, + meta_data, + metrics, + perf_mode, + args.verbose, ) - log.info("Post processing done, result file:%s" % args.outfile) - - if args.html: - from src import report - - report.write_html(res_dir, html_input, CONST_ARCH, args.html) + log.info("Generated results file(s) in: " + out_file_path.rsplit("/", 1)[0]) + if args.html: + report.write_html( + out_file_path, + perf_mode, + meta_data["constants"]["CONST_ARCH"], + args.html, + ) + log.info("Done!") diff --git a/src/basic_stats.py b/src/basic_stats.py index 6bbe4ea..ba00250 100644 --- a/src/basic_stats.py +++ b/src/basic_stats.py @@ -5,12 +5,13 @@ # SPDX-License-Identifier: BSD-3-Clause ########################################################################################################### -import plotly.graph_objects as go -import plotly -import pandas as pd import os -from yattag import Doc +import pandas as pd +import plotly +import plotly.graph_objects as go import tempfile +from yattag import Doc +from src.common import crash os.environ["MPLCONFIGDIR"] = tempfile.mkdtemp() @@ -77,7 +78,7 @@ def get_stats_plot(input, arch): try: df = pd.read_csv(input, keep_default_na=False) except FileNotFoundError: - raise SystemExit(f"{input} file not found") + crash(f"{input} file not found") fig_list = [] if "metric_CPU operating frequency (in GHz)" in df.columns: fig1 = get_fig( diff --git a/src/common.py b/src/common.py new file mode 100644 index 0000000..3b3baaa --- /dev/null +++ b/src/common.py @@ -0,0 +1,9 @@ +import logging +import sys + +log = logging.getLogger(__name__) + + +def crash(msg): + log.error(msg) + sys.exit(1) diff --git a/src/icicle.py b/src/icicle.py index 840f24b..d640855 100644 --- a/src/icicle.py +++ b/src/icicle.py @@ -5,10 +5,11 @@ # SPDX-License-Identifier: BSD-3-Clause ########################################################################################################### -from yattag import Doc -import plotly.graph_objects as go -import pandas as pd import numpy as np +import pandas as pd +import plotly.graph_objects as go +from yattag import Doc +from src.common import crash doc, tag, text = Doc().tagtext() metric_parent = {} @@ -23,7 +24,7 @@ def get_icicle(input_csv): try: df = pd.read_csv(input_csv, keep_default_na=False) except FileNotFoundError: - raise SystemExit(f"{input_csv} File not found") + crash(f"{input_csv} File not found") unwanted = ["%", "metric_TMA_", ".", "(", ")", "metric_TMAM_"] df = df.replace("N/A", np.nan) diff --git a/src/perf_helpers.py b/src/perf_helpers.py index 5178b37..5619258 100644 --- a/src/perf_helpers.py +++ b/src/perf_helpers.py @@ -5,19 +5,20 @@ # SPDX-License-Identifier: BSD-3-Clause ########################################################################################################### +import collections +import fnmatch +import logging +import math import os import re -import fnmatch -import time import struct -import math -import collections import subprocess # nosec -import logging -from time import strptime +import time from ctypes import cdll, CDLL from datetime import datetime from dateutil import tz +from src.common import crash +from time import strptime version = "PerfSpect_DEV_VERSION" @@ -45,7 +46,7 @@ def get_ht_count(): def get_cpu_count(): cpu_count = 0 if not os.path.isfile("/sys/devices/system/cpu/online"): - raise SystemExit("/sys/devices/system/cpu/online not found to get core count") + crash("/sys/devices/system/cpu/online not found to get core count") with open("/sys/devices/system/cpu/online", "r") as f_online_cpu: content = f_online_cpu.read() cpu_list = content.split(",") @@ -63,7 +64,7 @@ def get_tsc_freq(): tsc = CDLL(tsclib) tsc_freq = str(tsc.Calibrate()) if tsc_freq == 0: - raise SystemExit("can't calculate TSC frequency") + crash("can't calculate TSC frequency") return tsc_freq @@ -250,7 +251,7 @@ def get_lscpu(): value = value.lstrip() cpuinfo[key] = value except subprocess.CalledProcessError as e: - raise SystemExit(e.output + "\nFailed to get CPUInfo") + crash(e.output + "\nFailed to get CPUInfo") return cpuinfo @@ -388,10 +389,10 @@ def get_cgroups_from_cids(cids): p.stdout.close() except subprocess.SubprocessError as e: - raise SystemExit("failed to open ps subprocess: " + e.output) + crash("failed to open ps subprocess: " + e.output) out, err = p2.communicate() if err: - raise SystemExit(f"error reading cgroups: {err}") + crash(f"error reading cgroups: {err}") lines = out.decode("utf-8").split("\n") for cid in cids: found = False @@ -400,7 +401,7 @@ def get_cgroups_from_cids(cids): found = True cgroups.add(line.split(":")[-1]) if not found: - raise SystemExit("invalid container ID: " + cid) + crash("invalid container ID: " + cid) # change cgroups back to list brefore returning return list(cgroups) diff --git a/src/report.py b/src/report.py index 180dddb..ad48daf 100644 --- a/src/report.py +++ b/src/report.py @@ -6,7 +6,6 @@ ########################################################################################################### import logging -import os from src import basic_stats from src import icicle from yattag import Doc, indent @@ -14,46 +13,35 @@ log = logging.getLogger(__name__) -def write_html(res_dir, base_input_file, arch, html_report_out, type="both"): - if type not in ("tma", "basic", "both"): - type = "both" - tma_inp = base_input_file.split(".")[0] + ".average.csv" - basic_inp = os.path.join(res_dir, base_input_file) - tma_inp = os.path.join(res_dir, tma_inp) +def write_html(tma_inp, perf_mode, arch, html_report_out, data_type="both"): + if data_type not in ("tma", "basic", "both"): + data_type = "both" + if str(perf_mode) == "Mode.System": + tma_inp = tma_inp.replace(".csv", ".sys.average.csv") + elif str(perf_mode) == "Mode.Socket": + tma_inp = tma_inp.replace(".csv", ".socket.average.csv") + elif str(perf_mode) == "Mode.Core": + tma_inp = tma_inp.replace(".csv", ".core.average.csv") doc, tag, text = Doc().tagtext() with tag("html"): - # ToDO: add navigation later - # with doc.tag('div'): - # doc.attr(klass='navbar') - # with tag('a', href="#tma", klass="active"): - # text("TMA") - # with tag('a', href="#basic_stats"): - # text("Basic Stats") - with tag("style"): text("h1{text-align: center;background-color: #00ccff;}") text("h2{text-align: center;background-color: #e6faff;}") - # text('.navbar {background-color: #333;overflow: hidden;position: fixed;bottom: 0;width: 100%;}') - # text('.navbar a {float: left;display: block;color: #f2f2f2;text-align: center;padding: 14px 16px;text-decoration: none;font-size: 17px;}') - # text('.navbar a:hover {background-color: #ddd;color: black;}') - # text('.navbar a.active {background-color: #04AA6D;color: white;}') - # text('input{position: fixed;}') - with tag("head"): doc.asis('') with tag("h1"): text("IntelĀ® PerfSpect Report") with tag("body"): - if type in ("both", "tma"): + if data_type in ("both", "tma"): fig1 = icicle.get_icicle(tma_inp) with tag("h2", align="center"): text("TopDown Microarchitecture Analysis (TMA)") with doc.tag("div"): doc.attr(id="tma") doc.asis(fig1.to_html(full_html=False, include_plotlyjs="cdn")) - if type in ("both", "basic"): - fig2 = basic_stats.get_stats_plot(basic_inp, arch) + if data_type in ("both", "basic"): + fig2 = basic_stats.get_stats_plot(tma_inp, arch) with tag("h2", align="center"): text("Basic Statistics") with doc.tag("div"): @@ -61,7 +49,6 @@ def write_html(res_dir, base_input_file, arch, html_report_out, type="both"): doc.stag("br") doc.asis(fig2) result = indent(doc.getvalue()) - out_html = os.path.join(res_dir, html_report_out) - with open(out_html, "w") as file: + with open(html_report_out, "w") as file: file.write(result) - log.info(f"static HTML file written at {out_html}") + log.info(f"static HTML file written at {html_report_out}")