Skip to content

Commit

Permalink
adding metadata to logs and bug fixes (#39)
Browse files Browse the repository at this point in the history
* adding metadata to logs and bug fixes

* update version
  • Loading branch information
hilldani authored Jun 6, 2023
1 parent f41e963 commit b0a5cf5
Show file tree
Hide file tree
Showing 9 changed files with 122 additions and 53 deletions.
30 changes: 30 additions & 0 deletions .github/ISSUE_TEMPLATE/1-support-bugs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
name: 🐛 Bug Report/Support
description: Ask a question or report an issue
labels: [bug]
body:
- type: markdown
attributes:
value: |
Thank you for submitting a bug report. It helps make PerfSpect better.
Please try to include as much information as possible.
- type: textarea
attributes:
label: Verbose output from perf-collect
render: shell
description: Copy the output of `./perf-collect` with `-v` flag (it will automatically format as a code block)
- type: textarea
attributes:
label: Verbose output from perf-postprocess
render: shell
description: Copy the output of `./perf-postprocess` with `-v` flag (it will automatically format as a code block)
- type: textarea
attributes:
label: What steps can reproduce the bug?
description: Explain the bug, system setup, and provide a code snippet that can reproduce it.
validations:
required: true
- type: textarea
attributes:
label: Additional information
description: Is there anything else you think we should know?
21 changes: 21 additions & 0 deletions .github/ISSUE_TEMPLATE/2-feature-request.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
name: 🚀 Feature Request
description: Suggest an idea, feature, or enhancement
labels: [enhancement]
body:
- type: markdown
attributes:
value: |
Thank you for submitting an idea. It helps make PerfSpect better.
- type: textarea
attributes:
label: What is the problem this feature would solve?
validations:
required: true
- type: textarea
attributes:
label: What is the feature you are proposing to solve the problem?
validations:
required: true
- type: textarea
attributes:
label: What alternatives have you considered?
1 change: 1 addition & 0 deletions .github/ISSUE_TEMPLATE/config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
blank_issues_enabled: true
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

PerfSpect is a system performance characterization tool built on top of linux perf. Most metrics and events come from [perfmon](https://github.com/intel/perfmon) and [TMA v4.5](https://www.intel.com/content/www/us/en/docs/vtune-profiler/cookbook/2023-1/top-down-microarchitecture-analysis-method.html). It contains two parts:

perf-collect: Collects harware events
perf-collect: Collects harware events at a 5 second output interval with practically zero overhead since PMU's run in counting mode.

- Collection mode:
- `sudo ./perf-collect` _default system wide_
Expand Down
2 changes: 1 addition & 1 deletion _version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.2.12
1.2.13
60 changes: 34 additions & 26 deletions perf-collect.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,20 +93,15 @@ def write_metadata(
cg_path_found = False
for path in cgroup_paths:
try:
cpu_set_file = open(path, "r")
cg_path_found = True
# no need to check other paths
break
with open(path, "r") as cpu_set_file:
cg_path_found = True
cpu_set = cpu_set_file.read()
cpu_set = cpu_set.strip()
cpu_set = cpu_set.replace(",", "+")
break
except FileNotFoundError:
# check next path
continue

if cg_path_found:
cpu_set = cpu_set_file.read()
cpu_set_file.close()
cpu_set = cpu_set.strip()
cpu_set = cpu_set.replace(",", "+")

if not cg_path_found or cpu_set == "":
# A missing path or an empty cpu-set in v2 indicates that the container is running on all CPUs
cpu_set = "0-" + str(
Expand Down Expand Up @@ -284,6 +279,18 @@ def validate_file(fname):
else:
crash("Unknown application type")

events, collection_events = prep_events.prepare_perf_events(
eventfile,
(
args.pid is not None
or args.cid is not None
or args.thread
or args.socket
or not have_uncore
),
args.pid is not None or args.cid is not None,
)

if not perf_helpers.validate_outfile(args.outcsv):
crash(
"Output filename not accepted. Filename should be a .csv without special characters"
Expand All @@ -299,7 +306,6 @@ def validate_file(fname):
cgroups = perf_helpers.get_cgroups(args.cid)

# get perf events to collect
collection_events = []
sys_devs = perf_helpers.get_sys_devices()
if (
"uncore_cha" not in sys_devs
Expand All @@ -318,21 +324,24 @@ def validate_file(fname):
logging.warning(
"Due to lack of vPMU support, TMA L1 & L2 events will not be collected"
)
events, collection_events = prep_events.prepare_perf_events(
eventfile,
(
args.pid is not None
or args.cid is not None
or args.thread
or args.socket
or not have_uncore
),
args.pid is not None or args.cid is not None,
)

if args.thread or args.socket or args.pid is not None or args.cid is not None:
logging.info("Not collecting uncore events in this run mode")

# log some metadata
logging.info("Architecture: " + arch)
logging.info("Model: " + cpuname)
logging.info("Kernel version: " + perf_helpers.get_version())
logging.info("Cores per socket: " + str(perf_helpers.get_cpu_count()))
logging.info("Socket: " + str(perf_helpers.get_socket_count()))
logging.info("Hyperthreading on: " + str(perf_helpers.get_ht_status()))
imc, upi = perf_helpers.get_imc_upi_count()
logging.info("IMC count: " + str(imc))
logging.info("CHA per socket: " + str(perf_helpers.get_cha_count()))
logging.info("UPI count: " + str(upi))
logging.info("PerfSpect version: " + perf_helpers.get_tool_version())
logging.info("/sys/devices/: " + str(sys_devs))

# build perf stat command
collection_type = "-a" if not args.thread and not args.socket else "-a -A"
cmd = f"perf stat -I {interval} -x , {collection_type} -o {args.outcsv}"
Expand All @@ -358,13 +367,12 @@ def validate_file(fname):
if args.verbose:
logging.info(cmd)
try:
logging.info("Collecting perf stat for events in : %s" % eventfilename)
start = time.time()
subprocess.call(perfargs) # nosec
end = time.time()
if end - start < 5.2:
if end - start < 7:
logging.warning(
"PerfSpect was run for less than 5 seconds, some events make be zero because they didn't get scheduled"
"PerfSpect was run for a short duration, some events might be zero or blank because they never got scheduled"
)
logging.info("Collection complete! Calculating TSC frequency now")
except KeyboardInterrupt:
Expand Down
15 changes: 13 additions & 2 deletions perf-postprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,10 @@ def get_args(script_path):
# for socket or thread: add rows for each 2nd hyper thread with same values as 1st thread
def get_fixed_c6_residency_fields(perf_data_lines, perf_mode):
# handle special case events: c6-residency
# if hyperthreading is disabled, no fixing is required
if meta_data["constants"]["HYPERTHREADING_ON"] == 0:
return perf_data_lines

new_perf_data_lines = []
if meta_data["constants"]["CONST_THREAD_COUNT"] == 2:
for fields in perf_data_lines:
Expand Down Expand Up @@ -216,8 +220,11 @@ def get_all_data_lines(input_file_path):
fields = line.split(",")
perf_data_lines.append(fields)

infile.close()
return meta_data_lines, perf_events_lines, perf_data_lines
if len(perf_data_lines) == 0:
crash(
"perfstat.csv contains no perf event data, try collecting for a longer time"
)
return meta_data_lines, perf_events_lines, perf_data_lines


# get_metadata
Expand Down Expand Up @@ -416,6 +423,7 @@ def get_socket_number(sockets_dict, core):


def extract_dataframe(perf_data_lines, meta_data, perf_mode):
logging.info("Formatting event data")
# parse event data into dataframe and set header names
perf_data_df = pd.DataFrame(perf_data_lines)
if "CGROUPS" in meta_data and meta_data["CGROUPS"] == "enabled":
Expand Down Expand Up @@ -657,10 +665,13 @@ def generate_metrics(
}
prev_time_slice = 0
group_to_start_end_indexes = {}
logging.info("processing " + str(time_slice_groups.ngroups) + " samples")
for time_slice, item in time_slice_groups:
time_slice_float = float(time_slice)
if time_slice_float - prev_time_slice < 4.5:
logging.warning("throwing out last sample because it was too short")
if time_slice_groups.ngroups == 1:
crash("no remaining samples")
continue
time_slice_df = time_slice_groups.get_group(time_slice).copy()
# normalize by difference between current time slice and previous time slice
Expand Down
24 changes: 12 additions & 12 deletions src/base.html
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@
series: [
{
type: 'line',
data: CPUUTIL.map((e, i) => [i, e]),
data: CPUUTIL.map((e, i) => [i * 5, e]),
}
]
}
Expand All @@ -177,7 +177,7 @@
series: [
{
type: 'line',
data: CPIDATA.map((e, i) => [i, e]),
data: CPIDATA.map((e, i) => [i * 5, e]),
}
]
}
Expand All @@ -187,7 +187,7 @@
series: [
{
type: 'line',
data: CPUFREQ.map((e, i) => [i, e]),
data: CPUFREQ.map((e, i) => [i * 5, e]),
}
]
}
Expand All @@ -197,7 +197,7 @@
series: [
{
type: 'line',
data: REMOTENUMA.map((e, i) => [i, e]),
data: REMOTENUMA.map((e, i) => [i * 5, e]),
}
]
}
Expand All @@ -208,17 +208,17 @@
{
name: "L1D",
type: 'line',
data: L1DATA.map((e, i) => [i, e]),
data: L1DATA.map((e, i) => [i * 5, e]),
},
{
name: "L2",
type: 'line',
data: L2DATA.map((e, i) => [i, e]),
data: L2DATA.map((e, i) => [i * 5, e]),
},
{
name: "LLC Data",
type: 'line',
data: LLCDATA.map((e, i) => [i, e]),
data: LLCDATA.map((e, i) => [i * 5, e]),
},
]
}
Expand All @@ -229,17 +229,17 @@
{
name: "Read",
type: 'line',
data: READDATA.map((e, i) => [i, e]),
data: READDATA.map((e, i) => [i * 5, e]),
},
{
name: "Write",
type: 'line',
data: WRITEDATA.map((e, i) => [i, e]),
data: WRITEDATA.map((e, i) => [i * 5, e]),
},
{
name: "Total",
type: 'line',
data: TOTALDATA.map((e, i) => [i, e]),
data: TOTALDATA.map((e, i) => [i * 5, e]),
},
]
}
Expand All @@ -249,7 +249,7 @@
series: [
{
type: 'line',
data: PKGPOWER.map((e, i) => [i, e]),
data: PKGPOWER.map((e, i) => [i * 5, e]),
}
]
}
Expand All @@ -259,7 +259,7 @@
series: [
{
type: 'line',
data: DRAMPOWER.map((e, i) => [i, e]),
data: DRAMPOWER.map((e, i) => [i * 5, e]),
}
]
}
Expand Down
20 changes: 9 additions & 11 deletions src/perf_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,19 +269,17 @@ def get_cpuinfo():
cpuinfo = []
temp_dict = {}
try:
fo = open("/proc/cpuinfo", "r")
with open("/proc/cpuinfo", "r") as fo:
for line in fo:
try:
key, value = list(map(str.strip, line.split(":", 1)))
except ValueError:
cpuinfo.append(temp_dict)
temp_dict = {}
else:
temp_dict[key] = value
except EnvironmentError as e:
logging.warning(str(e), UserWarning)
else:
for line in fo:
try:
key, value = list(map(str.strip, line.split(":", 1)))
except ValueError:
cpuinfo.append(temp_dict)
temp_dict = {}
else:
temp_dict[key] = value
fo.close()
return cpuinfo


Expand Down

0 comments on commit b0a5cf5

Please sign in to comment.