Skip to content

Commit

Permalink
update profiling utils and naming
Browse files Browse the repository at this point in the history
  • Loading branch information
Aspen Cherie Smith committed Dec 12, 2024
1 parent 991075c commit 4662e10
Show file tree
Hide file tree
Showing 7 changed files with 308 additions and 35 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
.vscode/
build/
*.ipynb
*.png
*.png
vtune/
6 changes: 3 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ SRC_DIR=common
ALGO_DIR=algorithms
BUILD_DIR=build

all: brute dp genetic greedy genetic_cuda dp_omp dp_cuda greedy_cuda dp_numa
all: brute dp genetic greedy genetic_cuda dp_omp dp_cuda greedy_cuda dp_omp_numa

brute: $(BUILD_DIR)/brute
dp: $(BUILD_DIR)/dp
Expand All @@ -23,7 +23,7 @@ genetic_cuda: $(BUILD_DIR)/genetic_cuda
dp_omp: $(BUILD_DIR)/dp_omp
greedy_cuda: $(BUILD_DIR)/greedy_cuda
dp_cuda: $(BUILD_DIR)/dp_cuda
dp_numa: $(BUILD_DIR)/dp_numa
dp_omp_numa: $(BUILD_DIR)/dp_omp_numa

$(BUILD_DIR)/brute: $(SRC_DIR)/main.cpp $(ALGO_DIR)/brute.cpp
$(CPP) $^ -o $@ $(CFLAGS) $(OPTFLAGS)
Expand All @@ -49,7 +49,7 @@ $(BUILD_DIR)/greedy_cuda: $(SRC_DIR)/main.cpp $(ALGO_DIR)/greedy_cuda.cu
$(BUILD_DIR)/dp_cuda: $(SRC_DIR)/main.cpp $(ALGO_DIR)/dp_cuda.cu
$(NVCC) $^ -o $@ $(NVCCFLAGS)

$(BUILD_DIR)/dp_numa: $(SRC_DIR)/main.cpp $(ALGO_DIR)/dp_numa.cpp
$(BUILD_DIR)/dp_omp_numa: $(SRC_DIR)/main.cpp $(ALGO_DIR)/dp_omp_numa.cpp
$(CPP) $^ -o $@ $(CFLAGS_NUMA) $(OPTFLAGS_DP)

.PHONY: clean
Expand Down
1 change: 0 additions & 1 deletion algorithms/dp_omp.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
// 0m40.416s
#include <bits/stdc++.h>
#include <omp.h> // Include OpenMP header for parallelization
#include <vector>
Expand Down
File renamed without changes.
25 changes: 25 additions & 0 deletions data/small_25.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
4.179924564742493187e-01,3.441119252755281410e-01
5.893815561138715520e-02,6.795302883652539760e-01
4.784590047029411153e-01,5.381499589254157279e-02
5.211594296883900190e-01,8.066037608804467185e-01
5.806320199484605871e-01,6.029501879313505786e-01
3.091989979348186424e-01,6.552131835038437213e-01
9.198826252545819759e-01,7.476597889282088349e-01
6.553475030164537252e-01,5.356261761284698553e-01
3.492413849207449683e-01,1.091564818399735071e-01
5.410940445399985421e-01,4.961962866529807670e-01
4.490533956886093581e-01,1.723426276209378827e-01
2.823209606253457293e-01,6.839891964077760012e-01
2.958765252205050045e-01,5.298252112514997281e-01
5.634809826131318422e-01,6.252242659304493300e-01
7.151912790861654212e-01,3.151430227027559594e-01
5.176186770719936048e-01,4.915106394415269531e-01
3.520418285904083344e-01,8.201420385192299545e-01
6.327788586249427638e-01,1.326204556352319486e-01
8.731190201719776223e-01,1.213094822729351430e-01
1.678884446672987973e-01,4.462149309981606748e-01
9.874783222814236261e-01,4.093413656794342437e-02
3.494149296248078018e-01,3.005167089028528693e-01
8.262051275325252853e-01,4.093904125930584170e-01
6.546154192348774270e-02,3.725193910445782075e-01
5.353288386336307880e-03,3.399701713068946640e-01
30 changes: 0 additions & 30 deletions data/small_30.csv

This file was deleted.

278 changes: 278 additions & 0 deletions utils/vtune.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,278 @@
import os
import subprocess
import time
import numpy as np
import matplotlib.pyplot as plt
import re
import pandas as pd

# List of h values to test (particle size)
h_values = [8, 16, 32, 64, 128]

# Template for the sbatch script content
sbatch_template = """#!/bin/bash
#SBATCH -J sph
#SBATCH -o vtune_%j.out
#SBATCH -e vtune_%j.err
#SBATCH -A m4776
#SBATCH -C cpu
#SBATCH -c {h_value}
#SBATCH --qos=debug
#SBATCH -t 00:15:00
#SBATCH -N 1
#SBATCH -n 1
module load intel-oneapi/2022.1.0
export SLURM_CPU_BIND="cores"
export OMP_NUM_THREADS={h_value}
export OMP_PROC_BIND=spread
export OMP_PLACES=threads
make clean
make dp_numa
mkdir -p $HOME/cs5220-tsp-optimization/vtune/vtune_results_{h_value}
vtune -collect threading -result-dir $HOME/cs5220-tsp-optimization/vtune/vtune_results_{h_value} -- srun ./build/dp_numa --csv data/small_25.csv
"""

# Placeholder for job ids and their corresponding h values
job_h_map = {}

# Function to check if jobs are running
def are_jobs_running(job_ids):
result = subprocess.run("squeue -u $USER", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
running_jobs = set()
for line in result.stdout.splitlines()[1:]: # Skip header line
parts = line.split()
running_jobs.add(parts[0]) # Add job ID to running_jobs
# Return whether any of the provided job IDs are still running
return any(job_id in running_jobs for job_id in job_ids)

# Submit programs in batches of 2
h_iter = iter(h_values)
batch_size = 2

while True:
# Submit up to two jobs at a time
job_ids = []
for _ in range(batch_size):
try:
h = next(h_iter)
except StopIteration:
break

# Create a temporary sbatch file with the appropriate h value
sbatch_file = f"submit_vtune_job_{h}.sub"
with open(sbatch_file, 'w') as f:
f.write(sbatch_template.format(h_value=h))

# Submit the sbatch job
print(f"submitting program with {h} num threads...")
result = subprocess.run(f"sbatch {sbatch_file}", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)

# Capture the job ID from the submission output
for line in result.stdout.splitlines():
if "Submitted batch job" in line:
job_id = line.split()[-1]
job_h_map[job_id] = h
job_ids.append(job_id)
print(f"job {job_id} submitted for {h} num threads")
submitted = True

# Wait for the jobs to complete before submitting more
if job_ids:
print(f"waiting for jobs {job_ids} to complete...")
while are_jobs_running(job_ids):
time.sleep(10) # Check every 10 seconds

# Rename output files from vtune_<job_id>.out to vtune_h_<h>.out
for job_id in job_ids:
h = job_h_map[job_id]
old_output_file = f"vtune_{job_id}.out"
new_output_file = f"vtune_{h}_thread.out"
if os.path.exists(old_output_file):
os.rename(old_output_file, new_output_file)
print(f"renamed {old_output_file} to {new_output_file}")

# If all h values have been submitted, break the loop
if not job_ids:
print('all values have been submitted')
break

# Placeholder for results: (h, num_particles, execution_time)
results = []

def finalize_vtune_results(result_dir, h):
if not os.path.exists(result_dir):
print(f"result directory {result_dir} does not exist. Skipping finalization.")

# Finalize the results
finalize_command = f"vtune -finalize -r {result_dir}"
result = subprocess.run(finalize_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
if result.returncode != 0:
print(f"error finalizing results for {h} processors")
print(result.stderr)
else:
print(f"finalized results for {h} processors")

# Save the summary report to a CSV file
report_file = f"/global/homes/a/acs378/cs5220-tsp-optimization/vtune/vtune_summaries/summary_{h}_thread.csv"
report_command = f"vtune -report summary -r {result_dir} -format csv -report-output {report_file}"
result = subprocess.run(report_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
if result.returncode != 0:
print(f"error saving report summary for {h} processors")
print(result.stderr)
else:
print(f"saved report summary to {report_file}")

for h in h_values:
finalize_vtune_results(f"/global/homes/a/acs378/cs5220-tsp-optimization/vtune/vtune_results_{h}", h)

h_value_pattern = re.compile(r'summary_(\d+)_thread\.csv')
wait_time_pattern = re.compile(r'Wait Time with poor CPU Utilization\s+(\d+\.\d+)s')
wait_times = []
h_values = []
# Iterate over the files in the directory
for filename in os.listdir('/global/homes/a/acs378/cs5220-tsp-optimization/vtune/vtune_summaries'):
if h_value_pattern.match(filename):
# Extract h_value from the filename
h_value_match = h_value_pattern.search(filename)
if h_value_match:
h_value = int(h_value_match.group(1))

# Read the content of the file
file_path = os.path.join("/global/homes/a/acs378/cs5220-tsp-optimization/vtune/vtune_summaries", filename)
with open(file_path, 'r') as file:
content = file.read()

# Extract the Wait Time with poor CPU Utilization
wait_time_match = wait_time_pattern.search(content)
if wait_time_match:
wait_time = float(wait_time_match.group(1))
print(wait_time)

# Store the h_value and wait_time
h_values.append(h_value)
wait_times.append(wait_time)

# Create a DataFrame for easier plotting
print(h_values)
print(wait_times)
data = pd.DataFrame({
'h_value': h_values,
'Wait Time with poor CPU Utilization': wait_times
})

# Sort data by h_value
data = data.sort_values(by='h_value')

# Plot the results
plt.figure(figsize=(10, 6))
plt.plot(data['h_value'], data['Wait Time with poor CPU Utilization'], '-o', label='Wait Time vs CPU Threads')
plt.xlabel('Number of Threads')
plt.ylabel('Wait Time with Poor CPU Utilization (s)')
plt.title('Wait Time with Poor CPU Utilization vs Number of Threads')
plt.grid(True)
plt.legend()
plt.savefig('vtune/graph.png')

# Directory containing the summary files
directory = '/global/homes/a/acs378/cs5220-tsp-optimization/vtune/vtune_summaries' # Replace with the actual path

# Lists to store extracted data
h_values = []
wait_times = []
total_times = []
serial_times = []
cpu_utilizations = []

# Regex patterns to extract metrics from the file content
h_value_pattern = re.compile(r'summary_(\d+)_thread\.csv')
wait_time_pattern = re.compile(r'Wait Time with poor CPU Utilization\s+(\d+\.\d+)s')
elapsed_time_pattern = re.compile(r'Elapsed Time\s+(\d+\.\d+)')
serial_time_pattern = re.compile(r'Serial Time \(outside parallel regions\)\s+(\d+\.\d+)s')
cpu_utilization_pattern = re.compile(r'Effective CPU Utilization\s+(\d+\.\d+)%')

# Iterate over the files in the directory
for filename in os.listdir(directory):
if h_value_pattern.match(filename):
# Extract h_value from the filename
h_value_match = h_value_pattern.search(filename)
if h_value_match:
h_value = int(h_value_match.group(1))

# Read the content of the file
file_path = os.path.join(directory, filename)
with open(file_path, 'r') as file:
content = file.read()

# Extract the metrics
wait_time_match = wait_time_pattern.search(content)
elapsed_time_match = elapsed_time_pattern.search(content)
serial_time_match = serial_time_pattern.search(content)
cpu_utilization_match = cpu_utilization_pattern.search(content)

if wait_time_match and elapsed_time_match:
wait_time = float(wait_time_match.group(1))
elapsed_time = float(elapsed_time_match.group(1))
serial_time = float(serial_time_match.group(1)) if serial_time_match else None
cpu_utilization = float(cpu_utilization_match.group(1)) if cpu_utilization_match else None

# Store the extracted data
h_values.append(h_value)
wait_times.append(wait_time)
total_times.append(elapsed_time)
if serial_time is not None:
serial_times.append(serial_time)
if cpu_utilization is not None:
cpu_utilizations.append(cpu_utilization)

# Create a DataFrame for easier plotting
data = pd.DataFrame({
'h_value': h_values,
'Wait Time with poor CPU Utilization': wait_times,
'Elapsed Time': total_times
})

if serial_times:
data['Serial Time'] = serial_times

if cpu_utilizations:
data['CPU Utilization'] = cpu_utilizations

data = data.sort_values(by='h_value')

# Calculate normalized wait times
data['Normalized Wait Time'] = data['Wait Time with poor CPU Utilization'] / data['Elapsed Time']

# Plot 2: Total Elapsed Time vs Number of Threads
plt.figure(figsize=(10, 6))
plt.plot(data['h_value'], data['Elapsed Time'], '-o', label='Total Elapsed Time')
plt.xlabel('Number of CPU Cores/Threads (h_value)')
plt.ylabel('Elapsed Time (s)')
plt.title('Total Elapsed Time vs CPU Cores/Threads')
plt.grid(True)
plt.legend()
plt.savefig('vtune/total_elapsed_time_v_num_threads.png')

# Plot 3: Normalized Wait Time (as a fraction of total time) vs Number of Threads
plt.figure(figsize=(10, 6))
plt.plot(data['h_value'], data['Normalized Wait Time'], '-o', label='Normalized Wait Time (fraction of total time)')
plt.xlabel('Number of CPU Cores/Threads (h_value)')
plt.ylabel('Normalized Wait Time (fraction of total time)')
plt.title('Normalized Wait Time vs CPU Cores/Threads')
plt.grid(True)
plt.legend()
plt.savefig('vtune/normalized_wait_time_v_num_threads.png')

# Plot 4: Serial Time vs Number of Threads (if Serial Time is available)
if 'Serial Time' in data.columns:
plt.figure(figsize=(10, 6))
plt.plot(data['h_value'], data['Serial Time'], '-o', label='Serial Time (outside parallel regions)')
plt.xlabel('Number of CPU Cores/Threads (h_value)')
plt.ylabel('Serial Time (s)')
plt.title('Serial Time vs CPU Cores/Threads')
plt.grid(True)
plt.legend()
plt.savefig('vtune/serial_time_v_num_threads.png')

0 comments on commit 4662e10

Please sign in to comment.