diff --git a/docs/user-guide/SFT.rst b/docs/user-guide/SFT.rst index a1a5fbe95..0d5fb3d98 100644 --- a/docs/user-guide/SFT.rst +++ b/docs/user-guide/SFT.rst @@ -186,8 +186,8 @@ Now that we have the data we will use NeMo-Aligner to do the supervised fine tun trainer.devices=8 \ trainer.sft.max_steps=-1 \ trainer.sft.limit_val_batches=40 \ - trainer.sft.val_check_interval=1000 \ - trainer.sft.save_interval=50 \ + trainer.sft.val_check_interval=100 \ + trainer.sft.save_interval=100 \ model.megatron_amp_O2=True \ model.restore_from_path=${PRETRAINED_ACTOR_NEMO_FILE} \ model.optim.lr=5e-6 \ diff --git a/docs/user-guide/SteerLM.rst b/docs/user-guide/SteerLM.rst index 1f83607c7..635abe5a3 100644 --- a/docs/user-guide/SteerLM.rst +++ b/docs/user-guide/SteerLM.rst @@ -126,6 +126,7 @@ Note that you would need to set up multi-node training in your cluster env, depe pretrained_checkpoint.restore_from_path=/models/llama13b/llama13b.nemo \ "model.data.data_prefix={train: ["data/merge_train_reg.jsonl"], validation: ["data/merge_val_reg.jsonl"], test: ["data/merge_val_reg.jsonl"]}" \ exp_manager.explicit_log_dir=/results/reward_model_13b \ + trainer.rm.save_interval=100 \ trainer.rm.val_check_interval=10 \ exp_manager.create_wandb_logger=True \ exp_manager.wandb_logger_kwargs.project=steerlm \