final fixes

stanford-crfm · Oct 23, 2024 · 4733f3b · 4733f3b
1 parent c971ebf
commit 4733f3b
Show file tree

Hide file tree

Showing 2 changed files with 7 additions and 6 deletions.
diff --git a/src/levanter/data/text.py b/src/levanter/data/text.py
@@ -729,7 +729,7 @@ def train_set(
         monitors: Union[bool, List[MetricsMonitor]] = True,
         *,
         key: Optional[PRNGKeyArray] = None,
-        epochs: bool = False,
+        epochs: int = 0,
     ) -> AsyncDataset[np.ndarray]:
 
         ds = self.token_seq_dataset("train", seq_len, monitors)

diff --git a/src/levanter/main/train_lm.py b/src/levanter/main/train_lm.py
@@ -128,11 +128,12 @@ def main(config: TrainLmConfig):
         )
 
 
-        # add epoch logging
-        total_tokens_future = callbacks.get_total_dataset_tokens(train_dataset.dataset, config.model.seq_len)
-        trainer.add_hook(
-            callbacks.log_epoch_progress(total_tokens_future, Pos.size, trainer.config.train_batch_size), every=1
-        )
+        # add epoch logging if epochs specified
+        if config.epoch > 0:
+            total_tokens_future = callbacks.get_total_dataset_tokens(train_dataset.dataset, config.model.seq_len)
+            trainer.add_hook(
+                callbacks.log_epoch_progress(total_tokens_future, Pos.size, trainer.config.train_batch_size), every=1
+            )
 
         # to do partitioning, our dimensions have to be divisible by the size of the physical axes they're mapped to
         # For most things, we just insist you specify the config right, but tokenizers often have strange numbers of