diff --git a/src/levanter/main/sft.py b/src/levanter/main/sft.py index c3dc60eab..71e3c3ef7 100644 --- a/src/levanter/main/sft.py +++ b/src/levanter/main/sft.py @@ -80,6 +80,7 @@ def train(config: SFTConfig): raise ValueError("Must specify either --initialize_from_hf or --initialize_from") else: converter = None + model_config = config.model levanter.initialize(config)