Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding stress test script #261

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 120 additions & 0 deletions stressTest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
import argparse
import subprocess
import time
import os
import sys
from config import Config


def run_stress_test(
num_submissions,
submission_delay,
dwang3851 marked this conversation as resolved.
Show resolved Hide resolved
autograder_image,
output_file,
tango_port,
tango_path,
job_name,
job_path,
):
with open(output_file, "a") as f:
f.write(f"Stress testing with {num_submissions} submissions\n")
dwang3851 marked this conversation as resolved.
Show resolved Hide resolved
for i in range(1, num_submissions + 1):
tango_cli = os.path.join(tango_path, "clients/tango-cli.py")
if not os.path.isfile(tango_cli):
raise ValueError(f"Tango CLI not found at: {tango_cli}")
if not os.path.exists(job_path):
raise ValueError(f"Job path does not exist: {job_path}")
if not isinstance(tango_port, int) or not (1024 <= tango_port <= 65535):
raise ValueError("Invalid port number")
command = [
"python3",
tango_cli,
"-P",
str(tango_port),
"-k",
"test",
"-l",
job_name,
"--runJob",
job_path,
"--image",
autograder_image,
]
try:
subprocess.run(command, stdout=f, stderr=f)
f.write(f"Submission {i} submitted \n")
except subprocess.CalledProcessError as e:
f.write(f"Error running submission {i}: {e}\n")
continue
if submission_delay > 0:
time.sleep(submission_delay)

dwang3851 marked this conversation as resolved.
Show resolved Hide resolved
sys.exit(0)


def get_metrics(output_file):
if Config.LOGFILE is None:
print("Make sure logs are recorded in a log file")
dwang3851 marked this conversation as resolved.
Show resolved Hide resolved
sys.exit(0)

job_times = []
with open(Config.LOGFILE, "r") as f:
for line in f:
if "finished after " in line:
start = line.find("finished after ") + len("finished after ")
seconds = int(line[start:].split()[0])
job_times.append(seconds)
dwang3851 marked this conversation as resolved.
Show resolved Hide resolved

with open(output_file, "a") as f:
if len(job_times) == 0:
print("No jobs have been completed")
else:
avg = sum(job_times) / len(job_times)
f.write(f"Total jobs completed: {len(job_times)} \n")
f.write(f"Average job time: {avg} seconds \n")

sys.exit(0)


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Stress test script for Tango")
parser.add_argument(
"--num_submissions", type=int, default=10, help="Number of submissions"
)
parser.add_argument(
"--submission_delay", type=float, default=1.0, help="Delay between submissions"
)
parser.add_argument(
"--autograder_image", type=str, required=True, help="Autograder image"
)
parser.add_argument(
"--output_file", type=str, default="stress_test.out", help="Output file"
)
parser.add_argument(
"--tango_port", type=int, default=4567, help="Tango server port"
)
parser.add_argument("--tango_path", type=str, required=True, help="Path to Tango")
parser.add_argument("--job_name", type=str, required=True, help="Name of the job")
parser.add_argument("--job_path", type=str, required=True, help="Path to the job")
parser.add_argument(
"--get_metrics",
type=bool,
default=False,
help="Set to true to get metrics, does not create new jobs",
)
dwang3851 marked this conversation as resolved.
Show resolved Hide resolved

args = parser.parse_args()

if args.get_metrics:
get_metrics(args.output_file)
else:
run_stress_test(
args.num_submissions,
args.submission_delay,
args.autograder_image,
args.output_file,
args.tango_port,
args.tango_path,
args.job_name,
args.job_path,
)
7 changes: 6 additions & 1 deletion worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ def run(self):
"""run - Step a job through its execution sequence"""
try:
# Hash of return codes for each step
start_time = time.time()
ret = {}
ret["waitvm"] = None
ret["copyin"] = None
Expand Down Expand Up @@ -319,7 +320,11 @@ def run(self):
# Job termination. Notice that Tango considers
# things like runjob timeouts and makefile errors to be
# normal termination and doesn't reschedule the job.
self.log.info("Success: job %s:%d finished" % (self.job.name, self.job.id))
elapsed_secs = time.time() - start_time
self.log.info(
"Success: job %s:%d finished after %d seconds"
% (self.job.name, self.job.id, elapsed_secs)
)

# Move the job from the live queue to the dead queue
# with an explanatory message
Expand Down