From d0e38cee1c44a47d344843f702d816d036e8c362 Mon Sep 17 00:00:00 2001 From: Eric Han Date: Fri, 31 Dec 2021 17:46:02 +0000 Subject: [PATCH] convert rnnt to bmlogger and clean up logging in other places --- benchmarks/dlrm/ootb/dlrm_s_pytorch.py | 4 ++-- benchmarks/rnnt/ootb/inference/run.py | 19 ++++++++++--------- benchmarks/rnnt/ootb/train/train.py | 18 ++++++++++-------- benchmarks/run_all.sh | 22 +++++++--------------- benchmarks/xlmr/ootb/xlmr.py | 4 ++-- bmlogging/bmlogger.py | 2 +- 6 files changed, 32 insertions(+), 37 deletions(-) diff --git a/benchmarks/dlrm/ootb/dlrm_s_pytorch.py b/benchmarks/dlrm/ootb/dlrm_s_pytorch.py index bf6f97a..2401244 100644 --- a/benchmarks/dlrm/ootb/dlrm_s_pytorch.py +++ b/benchmarks/dlrm/ootb/dlrm_s_pytorch.py @@ -1192,9 +1192,9 @@ def inference( scores = [] targets = [] - bmlogger = get_bmlogger() # default to Nop logger + bmlogger = get_bmlogger(log_file_path=None) # default to Nop logger if args.fb5logger is not None: - bmlogger = get_bmlogger(args.fb5logger) + bmlogger = get_bmlogger(log_file_path=args.fb5logger) bmlogger.header("DLRM", "OOTB", "eval", args.fb5config, score_metric=loggerconstants.EXPS) for i, testBatch in enumerate(test_ld): diff --git a/benchmarks/rnnt/ootb/inference/run.py b/benchmarks/rnnt/ootb/inference/run.py index 4f688fd..b57963a 100644 --- a/benchmarks/rnnt/ootb/inference/run.py +++ b/benchmarks/rnnt/ootb/inference/run.py @@ -25,9 +25,9 @@ MLPERF_CONF = MLPERF_CONF.resolve() # FB5 Logger -p = Path(__file__).parent.resolve() / "../../../../fb5logging" +p = Path(__file__).parent.resolve() / "../../../../bmlogging" sys.path.append(os.fspath(p)) -from fb5logger import FB5Logger +from bmlogger import get_bmlogger import loggerconstants @@ -62,8 +62,10 @@ def main(): args = get_args() if args.fb5logger is not None: - fb5logger = FB5Logger(args.fb5logger) - fb5logger.header("RNN-T", "OOTB", "infer", args.fb5config, score_metric=loggerconstants.EXPS) + bmlogger = get_bmlogger(log_file_path=args.fb5logger) + else: + bmlogger = get_bmlogger(log_file_path = None) # default to Nop logger + bmlogger.header("RNN-T", "OOTB", "infer", args.fb5config, score_metric=loggerconstants.EXPS) if args.backend == "pytorch": from pytorch_SUT import PytorchSUT @@ -91,12 +93,11 @@ def main(): log_settings.log_output = log_output_settings print("Running Loadgen test...") - if args.fb5logger is not None: - fb5logger.run_start() + bmlogger.run_start() lg.StartTestWithLogSettings(sut.sut, sut.qsl.qsl, settings, log_settings) - if args.fb5logger is not None: - nbatches = sut.qsl.count - fb5logger.run_stop(nbatches, 1) + + nbatches = sut.qsl.count + bmlogger.run_stop(nbatches, 1) if args.accuracy: cmd = f"python3 accuracy_eval.py --log_dir {log_path} --dataset_dir {args.dataset_dir} --manifest {args.manifest}" diff --git a/benchmarks/rnnt/ootb/train/train.py b/benchmarks/rnnt/ootb/train/train.py index b866b3b..0c11069 100644 --- a/benchmarks/rnnt/ootb/train/train.py +++ b/benchmarks/rnnt/ootb/train/train.py @@ -44,9 +44,9 @@ # FB5 Logger import pathlib -p = pathlib.Path(__file__).parent.resolve() / "../../../../fb5logging" +p = pathlib.Path(__file__).parent.resolve() / "../../../../bmlogging" sys.path.append(os.fspath(p)) -from fb5logger import FB5Logger +from bmlogger import get_bmlogger import loggerconstants @@ -203,9 +203,12 @@ def main(): logging.configure_logger('RNNT') logging.log_start(logging.constants.INIT_START) + if args.fb5logger is not None: - fb5logger = FB5Logger(args.fb5logger) - fb5logger.header("RNN-T", "OOTB", "train", args.fb5config, score_metric=loggerconstants.EXPS) + bmlogger = get_bmlogger(log_file_path=args.fb5logger) + else: + bmlogger = get_bmlogger(log_file_path = None) # default to Nop logger + bmlogger.header("RNN-T", "OOTB", "train", args.fb5config, score_metric=loggerconstants.EXPS) assert(torch.cuda.is_available()) assert args.prediction_frequency is None or args.prediction_frequency % args.log_frequency == 0 @@ -443,8 +446,7 @@ def forward(self, x): step = start_epoch * steps_per_epoch + 1 # FB5 Log for a certain amount of time. - if args.fb5logger is not None: - fb5logger.run_start() + bmlogger.run_start() total_batches = 0 start_time = time.time() MAX_TIME = 120.0 @@ -588,7 +590,7 @@ def forward(self, x): if args.mlperf: logging.log_end(logging.constants.RUN_STOP, metadata={'status': 'success'}) if args.fb5logger is not None: - fb5logger.run_stop(total_batches, args.batch_size) + bmlogger.run_stop(total_batches, args.batch_size) print_once(f'Finished after {args.epochs_this_job} epochs.') break if 0 < args.epochs_this_job <= epoch - start_epoch: @@ -602,7 +604,7 @@ def forward(self, x): if args.mlperf: logging.log_end(logging.constants.RUN_STOP, metadata={'status': 'aborted'}) if args.fb5logger is not None: - fb5logger.run_stop(total_batches, args.batch_size) + bmlogger.run_stop(total_batches, args.batch_size) if epoch == args.epochs: evaluate(epoch, step, val_loader, val_feat_proc, tokenizer.detokenize, diff --git a/benchmarks/run_all.sh b/benchmarks/run_all.sh index cfb716f..d477df5 100755 --- a/benchmarks/run_all.sh +++ b/benchmarks/run_all.sh @@ -1,19 +1,11 @@ #!/bin/bash -# Run all major benchmarks with tiny configs as an example -# Also used as a test script to make sure benchmarks run correctly. - -# DLRM OOTB -./run_dlrm_ootb_infer.sh -l results -./run_dlrm_ootb_train.sh -l results # ootb configs use config files. See docs/DLRM.md - -# DLRM UBench -./run_dlrm_ubench_train_linear.sh -c "[(2,2,2,2,2)]" -l results # Config not real -./run_dlrm_ubench_train_embeddingbag.sh -l results -c "[(2,2,2,2),(2,2,2,2),(2,2,2,2),(2,2,2,2),(2,2,2,2)]" # Config not real - -# XLMR OOTB -./run_xlmr_ootb.sh - +./run_xlmr_ootb.sh -c "--inference-only --famconfig=fb-1dev-A --num-batches=100 --batch-size=96 --sequence-length=64 --vocab-size=250000 --half-model --use-gpu --warmup-batches=20" +./run_xlmr_ootb.sh -c "--inference-only --famconfig=fb-1dev-A-realdist --num-batches=100 --batch-size=64 --vocab-size=250000 --half-model --use-gpu --warmup-batches=20 --seqlen-dist=[[1,2462],[0.99,675],[0.95,250],[0.9,147],[0.75,56],[0.7,47],[0.5,23],[0.25,11],[0.05,3],[0,1]] --seqlen-dist-max=256" +./run_xlmr_ootb.sh -c "--inference-only --famconfig=fb-1dev-B --num-batches=100 --batch-size=64 --sequence-length=256 --vocab-size=250000 --half-model --use-gpu --warmup-batches=20" +./run_xlmr_ootb.sh -c "--famconfig=fb-1dev-A-fp32 --num-batches=50 --batch-size=32 --sequence-length=256 --vocab-size=250000 --use-gpu --warmup-batches=10" +./run_xlmr_ootb.sh -c "--famconfig=fb-1dev-A-fp16 --num-batches=50 --batch-size=32 --sequence-length=256 --vocab-size=250000 --half-model --use-gpu --warmup-batches=10" +./run_xlmr_ootb.sh -c "--famconfig=msft-1dev --num-batches=50 --batch-size=16 --sequence-length=512 --vocab-size=250000 --half-model --use-gpu --warmup-batches=10" # view options: [raw_view -> pure json, intermediate_view -> nice table] # intermediate view recommended for filling out table -python ../fb5logging/result_summarizer.py -f results -v intermediate_view \ No newline at end of file +python ../bmlogging/result_summarizer.py -f results -v intermediate_view \ No newline at end of file diff --git a/benchmarks/xlmr/ootb/xlmr.py b/benchmarks/xlmr/ootb/xlmr.py index 63e0501..36949bf 100644 --- a/benchmarks/xlmr/ootb/xlmr.py +++ b/benchmarks/xlmr/ootb/xlmr.py @@ -128,14 +128,14 @@ def run(): device = torch.device("cuda", 0) # prep logger - bmlogger = get_bmlogger() # default to Nop logger + bmlogger = get_bmlogger(log_file_path=None) # default to Nop logger if args.logdir is not None: mode = "train" if(args.inference_only): mode = "eval" logpath = "{}/XLMR_OOTB_{}_{}.log".format(args.logdir, mode, args.famconfig) - bmlogger = get_bmlogger(logpath) + bmlogger = get_bmlogger(log_file_path=logpath) bmlogger.header("XLMR", "OOTB", mode, args.famconfig) # prep model and data diff --git a/bmlogging/bmlogger.py b/bmlogging/bmlogger.py index 2bf42b7..9229df5 100644 --- a/bmlogging/bmlogger.py +++ b/bmlogging/bmlogger.py @@ -7,7 +7,7 @@ def get_bmlogger(log_file_path = None): """ - Get benchmark logger. Call w/o args if want logger that does nothing. + Get benchmark logger. log_file_path = None returns logger that does nothing. """ t = Nop() if log_file_path is None else BMLogger return t(log_file_path)