add 600M config

stanford-crfm · May 11, 2024 · b429eac · b429eac
1 parent 8cd2faa
commit b429eac
Showing 1 changed file with 33 additions and 0 deletions.
diff --git a/config/llama2_600m.yaml b/config/llama2_600m.yaml
@@ -0,0 +1,33 @@
+data:
+  cache_dir: "gs://levanter-data/tokenized/pile_llama/"
+  tokenizer: "NousResearch/Llama-2-7b-hf"
+model:
+  # 100M class model
+  type: llama
+  seq_len: 4096
+  hidden_dim: 1536
+  intermediate_dim: 6144
+  num_layers: 24
+  num_heads: 32
+  num_kv_heads: 32
+  use_flash_attention: True
+  flash_attention_block_size: 1024
+trainer:
+  tracker:
+    type: wandb
+    entity: "understanding-sam"
+    project: "levanter"
+    tags: ["pile", "llama"]
+
+  mp: p=f32,c=bfloat16
+  train_batch_size: 1024
+  num_train_steps: 50000
+  steps_per_eval: 1000
+  per_device_eval_parallelism: 64
+  tensor_parallel_axes: ["mlp", "heads"]
+  fsdp_axis: "embed"
+  batch_axis: "batch"
+optimizer:
+  learning_rate: 6E-4
+  weight_decay: 0.1
+  min_lr_ratio: 0.1