NVIDIA · Kipok · Oct 23, 2024 · Oct 23, 2024 · Oct 23, 2024
diff --git a/docs/openmathinstruct2/training.md b/docs/openmathinstruct2/training.md
@@ -81,7 +81,7 @@ ns train \
     --nemo_model=/workspace/llama3.1-8b-nemo \
     --num_nodes=8 \
     --num_gpus=8 \
-    --average_steps 10000 20000 30000 40000 50000 60000 \
+    --average_steps=10000,20000,30000,40000,50000,60000 \
     --training_data=/workspace/openmathinstruct2-sft.jsonl \
     ++model.data.train_ds.micro_batch_size=4 \
     ++model.tensor_model_parallel_size=4 \
@@ -103,7 +103,7 @@ ns train \
     --nemo_model=/workspace/llama3.1-70b-nemo \
     --num_nodes=32 \
     --num_gpus=8 \
-    --average_steps 3330 6660 9990 13320 16650 20000 \
+    --average_steps=3330,6660,9990,13320,16650,20000 \
     --training_data=/workspace/openmathinstruct2-sft-5M.jsonl \
     ++model.data.train_ds.micro_batch_size=1 \
     ++model.tensor_model_parallel_size=8 \

diff --git a/nemo_skills/pipeline/eval.py b/nemo_skills/pipeline/eval.py
@@ -119,6 +119,9 @@ def eval(
     else:
         log_dir = f"{output_dir}/eval-logs"
 
+    if " " in str(benchmarks):
+        raise ValueError("benchmarks should be separated with commas")
+
     if server_address is None:  # we need to host the model
         assert server_gpus is not None, "Need to specify server_gpus if hosting the model"
         server_address = "localhost:5000"

diff --git a/nemo_skills/pipeline/summarize_results.py b/nemo_skills/pipeline/summarize_results.py
@@ -55,6 +55,9 @@ def summarize_results(
     """Summarize results of an evaluation job."""
     setup_logging(disable_hydra_logs=False, log_level=logging.INFO if not debug else logging.DEBUG)
 
+    if " " in str(benchmarks):
+        raise ValueError("benchmarks should be separated with commas")
+
     cluster = cluster or os.environ.get("NEMO_SKILLS_CONFIG")
 
     # copying results from the cluster if necessary

diff --git a/nemo_skills/pipeline/train.py b/nemo_skills/pipeline/train.py
@@ -160,7 +160,9 @@ def train(
     disable_wandb: bool = typer.Option(False, help="Disable wandb logging"),
     with_sandbox: bool = typer.Option(False, help="If sandbox is required for code generation"),
     partition: str = typer.Option(None, help="Specify partition for jobs"),
-    average_steps: list[int] = typer.Option(None, help="List of checkpoint steps to average"),
+    average_steps: str = typer.Option(
+        None, help="List of commas separated checkpoint steps to average. E.g 1000,5000"
+    ),
     run_after: str = typer.Option(None, help="Experiment to run after"),
     config_dir: str = typer.Option(None, help="Can customize where we search for cluster configs"),
     log_dir: str = typer.Option(None, help="Can specify a custom location for slurm logs. "),
@@ -200,6 +202,9 @@ def train(
     if validation_data:
         check_if_mounted(cluster_config, validation_data)
 
+    if " " in str(average_steps):
+        raise ValueError("average steps should be separated with commas")
+
     train_cmd = get_training_cmd(
         cluster_config=cluster_config,
         partition=partition,
@@ -239,7 +244,7 @@ def train(
             nemo_model=nemo_model,
             output_dir=output_dir,
             final_nemo_path=final_nemo_path,
-            average_steps=f"--steps {' '.join(map(str, average_steps))} " if average_steps else "",
+            average_steps=f"--steps {' '.join(average_steps.split(','))} " if average_steps else "",
         )
 
         add_task(