Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Migrate rnnt to bmlogger and clean up logging in other places #73

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions benchmarks/dlrm/ootb/dlrm_s_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -1192,9 +1192,9 @@ def inference(
scores = []
targets = []

bmlogger = get_bmlogger() # default to Nop logger
bmlogger = get_bmlogger(log_file_path=None) # default to Nop logger
if args.fb5logger is not None:
bmlogger = get_bmlogger(args.fb5logger)
bmlogger = get_bmlogger(log_file_path=args.fb5logger)
bmlogger.header("DLRM", "OOTB", "eval", args.fb5config, score_metric=loggerconstants.EXPS)

for i, testBatch in enumerate(test_ld):
Expand Down
19 changes: 10 additions & 9 deletions benchmarks/rnnt/ootb/inference/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@
MLPERF_CONF = MLPERF_CONF.resolve()

# FB5 Logger
p = Path(__file__).parent.resolve() / "../../../../fb5logging"
p = Path(__file__).parent.resolve() / "../../../../bmlogging"
sys.path.append(os.fspath(p))
from fb5logger import FB5Logger
from bmlogger import get_bmlogger
import loggerconstants


Expand Down Expand Up @@ -62,8 +62,10 @@ def main():
args = get_args()

if args.fb5logger is not None:
fb5logger = FB5Logger(args.fb5logger)
fb5logger.header("RNN-T", "OOTB", "infer", args.fb5config, score_metric=loggerconstants.EXPS)
bmlogger = get_bmlogger(log_file_path=args.fb5logger)
else:
bmlogger = get_bmlogger(log_file_path = None) # default to Nop logger
bmlogger.header("RNN-T", "OOTB", "infer", args.fb5config, score_metric=loggerconstants.EXPS)

if args.backend == "pytorch":
from pytorch_SUT import PytorchSUT
Expand Down Expand Up @@ -91,12 +93,11 @@ def main():
log_settings.log_output = log_output_settings

print("Running Loadgen test...")
if args.fb5logger is not None:
fb5logger.run_start()
bmlogger.run_start()
lg.StartTestWithLogSettings(sut.sut, sut.qsl.qsl, settings, log_settings)
if args.fb5logger is not None:
nbatches = sut.qsl.count
fb5logger.run_stop(nbatches, 1)

nbatches = sut.qsl.count
bmlogger.run_stop(nbatches, 1)

if args.accuracy:
cmd = f"python3 accuracy_eval.py --log_dir {log_path} --dataset_dir {args.dataset_dir} --manifest {args.manifest}"
Expand Down
18 changes: 10 additions & 8 deletions benchmarks/rnnt/ootb/train/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,9 @@

# FB5 Logger
import pathlib
p = pathlib.Path(__file__).parent.resolve() / "../../../../fb5logging"
p = pathlib.Path(__file__).parent.resolve() / "../../../../bmlogging"
sys.path.append(os.fspath(p))
from fb5logger import FB5Logger
from bmlogger import get_bmlogger
import loggerconstants


Expand Down Expand Up @@ -203,9 +203,12 @@ def main():
logging.configure_logger('RNNT')
logging.log_start(logging.constants.INIT_START)


if args.fb5logger is not None:
fb5logger = FB5Logger(args.fb5logger)
fb5logger.header("RNN-T", "OOTB", "train", args.fb5config, score_metric=loggerconstants.EXPS)
bmlogger = get_bmlogger(log_file_path=args.fb5logger)
else:
bmlogger = get_bmlogger(log_file_path = None) # default to Nop logger
bmlogger.header("RNN-T", "OOTB", "train", args.fb5config, score_metric=loggerconstants.EXPS)

assert(torch.cuda.is_available())
assert args.prediction_frequency is None or args.prediction_frequency % args.log_frequency == 0
Expand Down Expand Up @@ -443,8 +446,7 @@ def forward(self, x):
step = start_epoch * steps_per_epoch + 1

# FB5 Log for a certain amount of time.
if args.fb5logger is not None:
fb5logger.run_start()
bmlogger.run_start()
total_batches = 0
start_time = time.time()
MAX_TIME = 120.0
Expand Down Expand Up @@ -588,7 +590,7 @@ def forward(self, x):
if args.mlperf:
logging.log_end(logging.constants.RUN_STOP, metadata={'status': 'success'})
if args.fb5logger is not None:
fb5logger.run_stop(total_batches, args.batch_size)
bmlogger.run_stop(total_batches, args.batch_size)
print_once(f'Finished after {args.epochs_this_job} epochs.')
break
if 0 < args.epochs_this_job <= epoch - start_epoch:
Expand All @@ -602,7 +604,7 @@ def forward(self, x):
if args.mlperf:
logging.log_end(logging.constants.RUN_STOP, metadata={'status': 'aborted'})
if args.fb5logger is not None:
fb5logger.run_stop(total_batches, args.batch_size)
bmlogger.run_stop(total_batches, args.batch_size)

if epoch == args.epochs:
evaluate(epoch, step, val_loader, val_feat_proc, tokenizer.detokenize,
Expand Down
22 changes: 7 additions & 15 deletions benchmarks/run_all.sh
Original file line number Diff line number Diff line change
@@ -1,19 +1,11 @@
#!/bin/bash

# Run all major benchmarks with tiny configs as an example
# Also used as a test script to make sure benchmarks run correctly.

# DLRM OOTB
./run_dlrm_ootb_infer.sh -l results
./run_dlrm_ootb_train.sh -l results # ootb configs use config files. See docs/DLRM.md

# DLRM UBench
./run_dlrm_ubench_train_linear.sh -c "[(2,2,2,2,2)]" -l results # Config not real
./run_dlrm_ubench_train_embeddingbag.sh -l results -c "[(2,2,2,2),(2,2,2,2),(2,2,2,2),(2,2,2,2),(2,2,2,2)]" # Config not real

# XLMR OOTB
./run_xlmr_ootb.sh

./run_xlmr_ootb.sh -c "--inference-only --famconfig=fb-1dev-A --num-batches=100 --batch-size=96 --sequence-length=64 --vocab-size=250000 --half-model --use-gpu --warmup-batches=20"
./run_xlmr_ootb.sh -c "--inference-only --famconfig=fb-1dev-A-realdist --num-batches=100 --batch-size=64 --vocab-size=250000 --half-model --use-gpu --warmup-batches=20 --seqlen-dist=[[1,2462],[0.99,675],[0.95,250],[0.9,147],[0.75,56],[0.7,47],[0.5,23],[0.25,11],[0.05,3],[0,1]] --seqlen-dist-max=256"
./run_xlmr_ootb.sh -c "--inference-only --famconfig=fb-1dev-B --num-batches=100 --batch-size=64 --sequence-length=256 --vocab-size=250000 --half-model --use-gpu --warmup-batches=20"
./run_xlmr_ootb.sh -c "--famconfig=fb-1dev-A-fp32 --num-batches=50 --batch-size=32 --sequence-length=256 --vocab-size=250000 --use-gpu --warmup-batches=10"
./run_xlmr_ootb.sh -c "--famconfig=fb-1dev-A-fp16 --num-batches=50 --batch-size=32 --sequence-length=256 --vocab-size=250000 --half-model --use-gpu --warmup-batches=10"
./run_xlmr_ootb.sh -c "--famconfig=msft-1dev --num-batches=50 --batch-size=16 --sequence-length=512 --vocab-size=250000 --half-model --use-gpu --warmup-batches=10"
# view options: [raw_view -> pure json, intermediate_view -> nice table]
# intermediate view recommended for filling out table
python ../fb5logging/result_summarizer.py -f results -v intermediate_view
python ../bmlogging/result_summarizer.py -f results -v intermediate_view
4 changes: 2 additions & 2 deletions benchmarks/xlmr/ootb/xlmr.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,14 +128,14 @@ def run():
device = torch.device("cuda", 0)

# prep logger
bmlogger = get_bmlogger() # default to Nop logger
bmlogger = get_bmlogger(log_file_path=None) # default to Nop logger
if args.logdir is not None:
mode = "train"
if(args.inference_only):
mode = "eval"

logpath = "{}/XLMR_OOTB_{}_{}.log".format(args.logdir, mode, args.famconfig)
bmlogger = get_bmlogger(logpath)
bmlogger = get_bmlogger(log_file_path=logpath)
bmlogger.header("XLMR", "OOTB", mode, args.famconfig)

# prep model and data
Expand Down
2 changes: 1 addition & 1 deletion bmlogging/bmlogger.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

def get_bmlogger(log_file_path = None):
"""
Get benchmark logger. Call w/o args if want logger that does nothing.
Get benchmark logger. log_file_path = None returns logger that does nothing.
"""
t = Nop() if log_file_path is None else BMLogger
return t(log_file_path)
Expand Down