Skip to content

Commit

Permalink
bug fixes and handle ref-cycles missing (#58)
Browse files Browse the repository at this point in the history
* remove tsc rounding for more accuracy

* instruction mixes

* revert version

* check for perf

* bug fixes and lack of ref-cycle support
  • Loading branch information
hilldani authored Oct 3, 2023
1 parent 5111509 commit 8b186e8
Show file tree
Hide file tree
Showing 12 changed files with 95 additions and 73 deletions.
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ build-public/postprocess:
--add-data "./src/base.html:." \
--runtime-tmpdir . \
--exclude-module readline
--bootloader-ignore-signals
cp $(TMPDIR)/dist/perf-postprocess build/
rm -rf $(TMPDIR)

Expand Down
2 changes: 1 addition & 1 deletion events/bdx.txt
Original file line number Diff line number Diff line change
Expand Up @@ -171,4 +171,4 @@ imc/event=0x04,umask=0x0c,name='UNC_M_CAS_COUNT.WR'/;

#power related
power/energy-pkg/,
power/energy-ram/;
power/energy-ram/;
10 changes: 5 additions & 5 deletions events/clx_skx.txt
Original file line number Diff line number Diff line change
Expand Up @@ -196,10 +196,6 @@ instructions;
cstate_core/c6-residency/;
cstate_pkg/c6-residency/;

#power related
power/energy-pkg/,
power/energy-ram/;

#memory read/writes
imc/event=0x04,umask=0x03,name='UNC_M_CAS_COUNT.RD'/,
imc/event=0x04,umask=0x0c,name='UNC_M_CAS_COUNT.WR'/;
Expand Down Expand Up @@ -236,4 +232,8 @@ iio/event=0x83,umask=0x01,ch_mask=0x08,fc_mask=0x07,name='UNC_IIO_DATA_REQ_OF_CP
upi/event=0x2,umask=0x0f,name='UNC_UPI_TxL_FLITS.ALL_DATA'/,
upi/event=0x2,umask=0x97,name='UNC_UPI_TxL_FLITS.NON_DATA'/,
upi/event=0x1,umask=0x0,name='UNC_UPI_CLOCKTICKS'/,
upi/event=0x21,umask=0x0,name='UNC_UPI_L1_POWER_CYCLES'/;
upi/event=0x21,umask=0x0,name='UNC_UPI_L1_POWER_CYCLES'/;

#power related
power/energy-pkg/,
power/energy-ram/;
10 changes: 5 additions & 5 deletions events/icx.txt
Original file line number Diff line number Diff line change
Expand Up @@ -169,10 +169,6 @@ instructions;
cstate_core/c6-residency/;
cstate_pkg/c6-residency/;

#power related
power/energy-pkg/,
power/energy-ram/;

# UPI related
upi/event=0x2,umask=0xf,name='UNC_UPI_TxL_FLITS.ALL_DATA'/;

Expand All @@ -196,4 +192,8 @@ cha/event=0x36,umask=0xC817FE01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD'/;

#memory read/writes
imc/event=0x04,umask=0x0f,name='UNC_M_CAS_COUNT.RD'/,
imc/event=0x04,umask=0x30,name='UNC_M_CAS_COUNT.WR'/;
imc/event=0x04,umask=0x30,name='UNC_M_CAS_COUNT.WR'/;

#power related
power/energy-pkg/,
power/energy-ram/;
7 changes: 7 additions & 0 deletions events/metric_bdx.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,13 @@
"expression-txn": "[cpu-cycles:k] / [TXN]",
"origin": "perfspect"
},
{
"name": "metric_IPC",
"name-txn": "metric_txn per cycle",
"expression": "[instructions] / [cpu-cycles]",
"expression-txn": "[TXN] / [cpu-cycles]",
"origin": "perfspect"
},
{
"name": "metric_locks retired per instr",
"name-txn": "metric_locks retired per txn",
Expand Down
7 changes: 7 additions & 0 deletions events/metric_skx_clx.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,13 @@
"expression-txn": "[cpu-cycles:k] / [TXN]",
"origin": "perfspect"
},
{
"name": "metric_IPC",
"name-txn": "metric_txn per cycle",
"expression": "[instructions] / [cpu-cycles]",
"expression-txn": "[TXN] / [cpu-cycles]",
"origin": "perfspect"
},
{
"name": "metric_locks retired per instr",
"name-txn": "metric_locks retired per txn",
Expand Down
10 changes: 5 additions & 5 deletions events/spr.txt
Original file line number Diff line number Diff line change
Expand Up @@ -153,10 +153,6 @@ instructions:k;
cstate_core/c6-residency/;
cstate_pkg/c6-residency/;

#power
power/energy-pkg/,
power/energy-ram/;

#UPI
upi/event=0x02,umask=0x0f,name='UNC_UPI_TxL_FLITS.ALL_DATA'/;

Expand All @@ -183,4 +179,8 @@ cha/event=0x01,umask=0x00,name='UNC_CHA_CLOCKTICKS'/;

#IMC (memory read/writes)
imc/event=0x05,umask=0xcf,name='UNC_M_CAS_COUNT.RD'/,
imc/event=0x05,umask=0xf0,name='UNC_M_CAS_COUNT.WR'/;
imc/event=0x05,umask=0xf0,name='UNC_M_CAS_COUNT.WR'/;

#power
power/energy-pkg/,
power/energy-ram/;
28 changes: 24 additions & 4 deletions perf-collect.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,26 @@ def tma_supported():
return True


def ref_cycles_supported():
perf_out = ""
try:
perf = subprocess.Popen(
shlex.split("perf stat -a -e ref-cycles sleep .1"),
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
)
perf_out = perf.communicate()[0].decode()
except subprocess.CalledProcessError:
return False

if "<not supported>" in perf_out:
logging.warning(
"ref-cycles not enabled in VM driver. Contact system owner to enable. Collecting reduced metrics"
)
return False
return True


def resource_path(relative_path):
"""Get absolute path to resource, works for dev and for PyInstaller"""
base_path = getattr(sys, "_MEIPASS", os.path.dirname(os.path.abspath(__file__)))
Expand Down Expand Up @@ -372,12 +392,12 @@ def validate_file(fname):
(args.pid is not None or args.cid is not None or not have_uncore),
include_tma,
not have_uncore,
ref_cycles_supported(),
)

if not perf_helpers.validate_outfile(args.outcsv):
crash(
"Output filename not accepted. Filename should be a .csv without special characters"
)
# check output file is writable
if not perf_helpers.check_file_writeable(args.outcsv):
crash("Output file %s not writeable " % args.outcsv)

mux_intervals = perf_helpers.get_perf_event_mux_interval()
if args.muxinterval > 0:
Expand Down
2 changes: 1 addition & 1 deletion perf-collect.spec
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ exe = EXE(
[],
name='perf-collect',
debug=False,
bootloader_ignore_signals=False,
bootloader_ignore_signals=True,
strip=False,
upx=True,
upx_exclude=[],
Expand Down
20 changes: 10 additions & 10 deletions perf-postprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,14 +121,6 @@ def get_args(script_path):
if args.rawfile and not os.path.isfile(args.rawfile):
crash("perf raw data file not found, please provide valid raw file")

# check output file is valid
if not perf_helpers.validate_outfile(args.outfile, True):
crash(
"Output filename: "
+ args.outfile
+ " not accepted. Filename should be a .csv without special characters"
)

# check output file is writable
if not perf_helpers.check_file_writeable(args.outfile):
crash("Output file %s not writeable " % args.outfile)
Expand Down Expand Up @@ -725,8 +717,16 @@ def get_groups_to_dataframes(
group_start_end_index_dict[group_name] = (start_index, end_index)
start_index = end_index
current_group_indx += 1
group_name = "group_" + str(current_group_indx)
event_list = group_to_event[group_name]
try:
group_name = "group_" + str(current_group_indx)
event_list = group_to_event[group_name]
except KeyError:
crash(
"could not find "
+ str(row)
+ " in event grouping: "
+ str(group_to_event)
)
end_index += 1
group_to_df[group_name] = get_group_df_from_full_frame(
time_slice_df, start_index, time_slice_df.shape[0], perf_mode
Expand Down
42 changes: 9 additions & 33 deletions src/perf_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,18 +150,15 @@ def disable_nmi_watchdog():
proc_output.decode().strip().replace("kernel.nmi_watchdog = ", "")
)
if new_watchdog_status != 0:
crash("Failed to disable nmi watchdog!")
crash("Failed to disable nmi watchdog.")
logging.info(
"nmi_watchdog is temporary disabled. Will enable after collection."
"nmi_watchdog temporarily disabled. Will re-enable after collection."
)
else:
logging.info("nmi_watchdog disabled!")
logging.info("nmi_watchdog already disabled. No change needed.")
return nmi_watchdog_status
except subprocess.CalledProcessError as e:
logging.warning(e)
logging.warning("Failed to disable nmi_watchdog.")
except ValueError as e:
crash(f"Failed to disable watchdog: {e}")
except (ValueError, FileNotFoundError, subprocess.CalledProcessError) as e:
crash(f"Failed to disable nmi_watchdog: {e}")


# enable nmi watchdog
Expand All @@ -172,13 +169,10 @@ def enable_nmi_watchdog():
proc_output.decode().strip().replace("kernel.nmi_watchdog = ", "")
)
if new_watchdog_status != 1:
logging.warning("Failed to re-enable nmi_watchdog!")
logging.warning("Failed to re-enable nmi_watchdog.")
else:
logging.info("nmi_watchdog enabled!")
except subprocess.CalledProcessError as e:
logging.warning(e.output)
logging.warning("Failed to re-enable nmi_watchdog!")
except ValueError as e:
logging.info("nmi_watchdog re-enabled.")
except (ValueError, FileNotFoundError, subprocess.CalledProcessError) as e:
logging.warning(f"Failed to re-enable nmi_watchdog: {e}")


Expand Down Expand Up @@ -308,25 +302,7 @@ def get_cpuid_info(procinfo):
return socketinfo


# check for special characters in output filename
def validate_outfile(filename, xlsx=False):
valid = False
resdir = os.path.dirname(filename)
outfile = os.path.basename(filename)
if resdir and not os.path.exists(resdir):
return False
regx = r"[@!#$%^&*()<>?\|}{~:]"
# regex = re.compile("[@!#$%^&*()<>?/\|}{~:]")
regex = re.compile(regx)
if regex.search(outfile) is None:
if filename.endswith(".csv"):
return True
if xlsx and filename.endswith(".xlsx"):
return True
return valid


# check write permissions
# check write permissions on file, or directory if file doesn't exist
def check_file_writeable(outfile):
if os.path.exists(outfile):
if os.path.isfile(outfile):
Expand Down
29 changes: 20 additions & 9 deletions src/prepare_perf_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def get_cgroup_events_format(cgroups, events, num_events):
return perf_format


def filter_events(event_file, cpu_only, TMA_supported, in_vm):
def filter_events(event_file, cpu_only, TMA_supported, in_vm, supports_ref_cycles):
if not os.path.isfile(event_file):
crash("event file not found")
collection_events = []
Expand All @@ -124,12 +124,19 @@ def filter_events(event_file, cpu_only, TMA_supported, in_vm):
seperate_cycles = []
if in_vm:
# since most CSP's hide cycles fixed PMU inside their VM's we put it in its own group
seperate_cycles = [
"cpu-cycles,",
"cpu-cycles:k,",
"ref-cycles,",
"instructions;",
]
if supports_ref_cycles:
seperate_cycles = [
"cpu-cycles,",
"cpu-cycles:k,",
"ref-cycles,",
"instructions;",
]
else:
seperate_cycles = [
"cpu-cycles,",
"cpu-cycles:k,",
"instructions;",
]

def process(line):
line = line.strip()
Expand All @@ -153,6 +160,8 @@ def process(line):
for line in fin:
if in_vm and "cpu-cycles" in line:
continue
if not supports_ref_cycles and "ref-cycles" in line:
continue
process(line)
for line in seperate_cycles:
process(line)
Expand All @@ -163,15 +172,17 @@ def process(line):
return collection_events, unsupported_events


def prepare_perf_events(event_file, cpu_only, TMA_supported, in_vm):
def prepare_perf_events(
event_file, cpu_only, TMA_supported, in_vm, supports_ref_cycles
):
start_group = "'{"
end_group = "}'"
group = ""
prev_group = ""
new_group = True

collection_events, unsupported_events = filter_events(
event_file, cpu_only, TMA_supported, in_vm
event_file, cpu_only, TMA_supported, in_vm, supports_ref_cycles
)
core_event = []
uncore_event = []
Expand Down

0 comments on commit 8b186e8

Please sign in to comment.