Skip to content

Commit

Permalink
Improve printing in hw_decoder_bench.py
Browse files Browse the repository at this point in the history
Signed-off-by: Joaquin Anton Guirao <[email protected]>
  • Loading branch information
jantonguirao committed Nov 25, 2024
1 parent c4f280c commit 2de2edd
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 5 deletions.
47 changes: 43 additions & 4 deletions internal_tools/hw_decoder_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import argparse
import nvidia.dali.fn as fn
import nvidia.dali.types as types
import statistics
import time
from nvidia.dali.pipeline import pipeline_def
import random
Expand Down Expand Up @@ -69,6 +70,15 @@
default=0.75,
type=float,
)

parser.add_argument(
"--print_every_n_iterations",
dest="print_every_n_iterations",
help="If > 0, print statistics every N iterations.",
default=-1,
type=int,
)

args = parser.parse_args()

DALI_INPUT_NAME = "DALI_INPUT_0"
Expand Down Expand Up @@ -369,15 +379,44 @@ def vit_pipeline(is_training=False, image_shape=(384, 384, 3), num_classes=1000)
test_iterations = args.total_images // args.batch_size

print("Test iterations: ", test_iterations)
start_time = time.perf_counter()
execution_times = []
for iteration in range(test_iterations):
iter_start_time = time.perf_counter()
for p in pipes:
feed_input(p, input_tensor)
p.schedule_run()
for p in pipes:
_ = p.share_outputs()
for p in pipes:
p.release_outputs()
end = time.time()
total_time = end - start

print(test_iterations * args.batch_size * args.gpu_num / total_time, "fps")
iter_end_time = time.perf_counter()
iter_duration = iter_end_time - iter_start_time
execution_times.append(iter_duration)

if args.print_every_n_iterations > 0 and iteration % args.print_every_n_iterations == 0:
elapsed_time = time.perf_counter() - start_time
throughput = (iteration + 1) * args.batch_size * args.gpu_num / elapsed_time
mean_t = statistics.mean(execution_times)
median_t = statistics.median(execution_times)
min_t = min(execution_times)
max_t = max(execution_times)
print(f"Iteration {iteration + 1}/{test_iterations} - Throughput: {throughput:.2f} frames/sec (mean={mean_t:.6f}sec, median={median_t:.6f}sec, min={min_t:.6f}sec, max={max_t:.6f}sec)")

end_time = time.perf_counter()
total_time = end_time - start_time
total_throughput = test_iterations * args.batch_size * args.gpu_num / total_time
avg_t = statistics.mean(execution_times)
stdev_t = statistics.stdev(execution_times)
median_t = statistics.median(execution_times)
min_t = min(execution_times)
max_t = max(execution_times)

print("\nFinal Results:")
print(f"Total Execution Time: {total_time:.6f} seconds")
print(f"Total Throughput: {total_throughput:.2f} frames/sec")
print(f"Average time per iteration: {avg_t:.6f} frames/sec")
print(f"Median time per iteration: {median_t:.6f} frames/sec")
print(f"Stddev time per iteration: {stdev_t:.6f} frames/sec")
print(f"Min time per iteration: {min_t:.6f} frames/sec")
print(f"Max time per iteration: {max_t:.6f} frames/sec")
8 changes: 7 additions & 1 deletion qa/TL1_decoder_perf/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,13 @@ test_body() {
MIN_PERF=29000;
python hw_decoder_bench.py --width_hint 6000 --height_hint 6000 -b 408 -d 0 -g gpu -w 100 -t 100000 -i ${DALI_EXTRA_PATH}/db/single/jpeg -p rn50 -j 72 --hw_load 0.11 | tee ${LOG}
fi
PERF=$(grep "fps" ${LOG} | awk '{print $1}')

# Regex Explanation:
# Total Throughput: : Matches the literal string "Total Throughput: ".
# \K: Resets the start of the match, so anything before \K is not included in the output.
# [0-9]+(\.[0-9]+)?: Matches the number, with an optional decimal part.
# (?= frames/sec): ensures " frames/sec" follows the number, but doesn't include it.
PERF=$(grep -oP 'Total Throughput: \K[0-9]+(\.[0-9]+)?(?= frames/sec)')

PERF_RESULT=$(echo "$PERF $MIN_PERF" | awk '{if ($1>=$2) {print "OK"} else { print "FAIL" }}')
if [[ "$PERF_RESULT" == "OK" ]]; then
Expand Down

0 comments on commit 2de2edd

Please sign in to comment.