Skip to content

Commit

Permalink
cid and percpu bug fixes and cleanup (#20)
Browse files Browse the repository at this point in the history
* remove unneeded files. fix cid and percore processing errors on icx. improve accuracy of dram bandwidth

* remove golang from build and update readme for no golang or lscgroup

* install gcc

* fix cgroup naming and change output to match other run modes
  • Loading branch information
hilldani authored Mar 27, 2023
1 parent d54d178 commit 9c61c46
Show file tree
Hide file tree
Showing 25 changed files with 142 additions and 822 deletions.
3 changes: 1 addition & 2 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@ jobs:
- name: install dependencies
run: |
yum update -y
yum install -y make python3 epel-release
yum install -y golang
yum install -y make python3 gcc
python3 -m pip install --upgrade pip
- name: build
run: |
Expand Down
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
build/*
dist/*
pmu-checker/pmu-checker
src/libtsc.so
__pycache__
7 changes: 1 addition & 6 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,6 @@ clean_dir:
build_dir: clean_dir
mkdir -p build

build/pmu-checker:
cd pmu-checker && make
cp pmu-checker/pmu-checker build/
strip -s -p --strip-unneeded build/pmu-checker

build/libtsc:
gcc -fno-strict-overflow -fno-delete-null-pointer-checks -fwrapv -fPIC -shared -o src/libtsc.so src/calibrate.c

Expand Down Expand Up @@ -58,7 +53,7 @@ build-public/postprocess:
cp $(TMPDIR)/dist/perf-postprocess build/
rm -rf $(TMPDIR)

dist/$(PACKAGE_EXTERNAL): build_dir build/pmu-checker build/libtsc build-public/collect build-public/postprocess
dist/$(PACKAGE_EXTERNAL): build_dir build/libtsc build-public/collect build-public/postprocess
rm -rf dist/$(BINARY_FINAL)/
mkdir -p dist/$(BINARY_FINAL)
cp build/$(BINARY_COLLECT) dist/$(BINARY_FINAL)/$(BINARY_COLLECT)
Expand Down
7 changes: 3 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,12 @@ sudo ./perf-collect --timeout 10
sudo ./perf-postprocess -r results/perfstat.csv --html perfstat.html
```

![PerfSpect BS](images/basic_stats.JPG "perfspect-bs")
![perfspect-demo1](https://user-images.githubusercontent.com/5321018/205159259-3654fa12-74d6-4cb5-8194-ea1b66aadb25.gif)
![basic_stats](https://raw.githubusercontent.com/wiki/intel/PerfSpect/basic_stats.JPG)
![perfspect-demo1](https://raw.githubusercontent.com/wiki/intel/PerfSpect/demo.gif)

## Requirements
### Packages:
- **perf** - PerfSpect uses the Linux perf tool to collect PMU counters
- **lscgroup** - Perfspect needs lscgroup from the cgroup-tools (libcgroup on RHEL/CentOS) package when collecting data for containers

### Supported kernels

Expand All @@ -45,7 +44,7 @@ sudo ./perf-postprocess -r results/perfstat.csv --html perfstat.html

## Build from source

Requires recent python and golang.
Requires recent python

```
pip3 install -r requirements.txt
Expand Down
4 changes: 2 additions & 2 deletions events/icx.txt
Original file line number Diff line number Diff line change
Expand Up @@ -120,10 +120,10 @@ ref-cycles;

# OCR group 1 (ICX PMU supports a maximum of two OCR counters per group)
cpu/event=0xb7,umask=0x01,offcore_rsp=0x104000477,name='OCR.READS_TO_CORE.LOCAL_DRAM'/,
cpu/event=0xb7,umask=0x01,offcore_rsp=0x730000477,name='OCR.READS_TO_CORE.REMOTE_DRAM'/;
cpu/event=0xb7,umask=0x01,offcore_rsp=0x84002380,name='OCR.HWPF_L3.L3_MISS_LOCAL'/;

# OCR group 2 (ICX PMU supports a maximum of two OCR counters per group)
cpu/event=0xb7,umask=0x01,offcore_rsp=0x84002380,name='OCR.HWPF_L3.L3_MISS_LOCAL'/,
cpu/event=0xb7,umask=0x01,offcore_rsp=0x730000477,name='OCR.READS_TO_CORE.REMOTE_DRAM'/,
cpu/event=0xb7,umask=0x01,offcore_rsp=0x90002380,name='OCR.HWPF_L3.REMOTE'/;

#power related
Expand Down
Binary file removed images/basic_stats.JPG
Binary file not shown.
Binary file removed images/metrics.JPG
Binary file not shown.
Binary file removed images/sim.png
Binary file not shown.
59 changes: 35 additions & 24 deletions perf-collect.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
from src import prepare_perf_events as prep_events


from subprocess import PIPE, run # nosec

SUPPORTED_ARCHITECTURES = [
"Broadwell",
"Skylake",
Expand Down Expand Up @@ -86,13 +84,40 @@ def write_metadata(
cpusets = ""
if args.cid is not None:
for cgroup in cgroups:
set = open("/sys/fs/cgroup/cpuset/" + cgroup + "/cpuset.cpus", "r")
cpu_set = set.read()
set.close()
cpu_set = cpu_set.strip()
cpu_set = str("," + cpu_set)
cpusets += cpu_set
cpusets = str(cpusets)
cgroup_paths = [
"/sys/fs/cgroup/cpuset/" + cgroup + "/cpuset.cpus", # cgroup v1
"/sys/fs/cgroup/" + cgroup + "/cpuset.cpus", # cgroup v2
]
cg_path_found = False
for cg_path in cgroup_paths:
try:
cpu_set_file = open(
"/sys/fs/cgroup/cpuset/" + cgroup + "/cpuset.cpus", "r"
)
cg_path_found = True
# no need to check other paths
break
except FileNotFoundError:
# check next path
continue

if cg_path_found:
cpu_set = cpu_set_file.read()
cpu_set_file.close()
cpu_set = cpu_set.strip()

if not cg_path_found or cpu_set == "":
# A missing path or an empty cpu-set in v2 indicates that the container is running on all CPUs
cpu_set = "0-" + str(
int(
perf_helpers.get_cpu_count()
* perf_helpers.get_socket_count()
* perf_helpers.get_ht_count()
- 1
)
)

cpusets += "," + cpu_set
else:
cpusets = "disabled"

Expand Down Expand Up @@ -468,21 +493,6 @@ def is_safe_file(fname, substr):
args.timeout,
)
elif args.dryrun:
with open("results/pmu-checker.log", "w") as fw:
print("Checking if PMU counters are in-use already...")
pmuargs = resource_path("pmu-checker")
try:
run_result = run( # nosec
shlex.split(pmuargs),
stdout=PIPE,
stderr=PIPE,
universal_newlines=True,
)
fw.write(str(run_result.stdout))

except Exception as e:
print(e)

cmd = "perf stat %s -I %d -x , -e %s -o %s sleep 10" % (
collection_type,
interval,
Expand All @@ -498,6 +508,7 @@ def is_safe_file(fname, substr):
)
perfargs = shlex.split(cmd)
validate_perfargs(perfargs)
perf_helpers.pmu_contention_detect()
if args.verbose:
print(cmd)
try:
Expand Down
1 change: 0 additions & 1 deletion perf-collect.spec
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ block_cipher = None
a = Analysis(
['perf-collect.py'],
pathex=[],
binaries=[('../build/pmu-checker', '.')],
datas=[('./src/libtsc.so', '.'), ('./events/bdx.txt', '.'), ('./events/skx.txt', '.'), ('./events/clx.txt', '.'), ('./events/icx.txt', '.'), ('./events/spr.txt', '.'), ('./events/icx_aws.txt', '.'), ('./events/spr_aws.txt', '.'), ('./events/clx_aws.txt', '.'), ('./events/skx_aws.txt', '.')],
hiddenimports=[],
hookspath=[],
Expand Down
29 changes: 25 additions & 4 deletions perf-postprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -514,16 +514,33 @@ def load_metrics(infile, outfile, level=0):
return 1


# generate summary output with averages, min, max, p95 for cgroups
def write_cgroup_summary():
avgdf = pd.DataFrame(columns=["metrics"])
for file in out_metric_files:
df = pd.read_csv(file).iloc[:, 1:]
# extract avg, p95, min, and max columns
avgcol = df.mean(axis=0).to_frame().reset_index()
p95col = df.quantile(q=0.95, axis=0).to_frame().reset_index()
mincol = df.min(axis=0).to_frame().reset_index()
maxcol = df.max(axis=0).to_frame().reset_index()
# get container id
container = os.path.basename(file).split(".")[0].split("_")[-1]
avgcol.columns = ["metrics", container]
# define columns headers
avgcol.columns = ["metrics", "avg"]
p95col.columns = ["metrics", "p95"]
mincol.columns = ["metrics", "min"]
maxcol.columns = ["metrics", "max"]
# merge columns
avgdf = avgdf.merge(avgcol, on="metrics", how="outer")
sum_file = get_extra_out_file(out_metric_file, "a")
avgdf.to_csv(sum_file)
avgdf = avgdf.merge(p95col, on="metrics", how="outer")
avgdf = avgdf.merge(mincol, on="metrics", how="outer")
avgdf = avgdf.merge(maxcol, on="metrics", how="outer")
# generate output file, one for each container id
sum_file = get_extra_out_file(
out_metric_file.replace(".csv", "_" + container + ".csv"), "a"
)
avgdf.to_csv(sum_file, index=False)
return


Expand Down Expand Up @@ -990,7 +1007,11 @@ def write_socket_view(level, samples):
m.append(core_to_idx)
present = True
break
if not present:
# add to mapping if not present, or it is the last uncore event (assuming all core events come before the uncore events)
if (not present) or (
name.startswith("UNC")
and not prev_event_name.startswith("UNC")
):
mapping = persocket_idx(prev_event_name, core_to_idx)
mappings.append(mapping)
ename = mapping.getname()
Expand Down
32 changes: 0 additions & 32 deletions pmu-checker/Makefile

This file was deleted.

30 changes: 0 additions & 30 deletions pmu-checker/README.md

This file was deleted.

Loading

0 comments on commit 9c61c46

Please sign in to comment.