Skip to content

Commit

Permalink
add 600M config
Browse files Browse the repository at this point in the history
  • Loading branch information
blahBlahhhJ committed May 11, 2024
1 parent 8cd2faa commit b429eac
Showing 1 changed file with 33 additions and 0 deletions.
33 changes: 33 additions & 0 deletions config/llama2_600m.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
data:
cache_dir: "gs://levanter-data/tokenized/pile_llama/"
tokenizer: "NousResearch/Llama-2-7b-hf"
model:
# 100M class model
type: llama
seq_len: 4096
hidden_dim: 1536
intermediate_dim: 6144
num_layers: 24
num_heads: 32
num_kv_heads: 32
use_flash_attention: True
flash_attention_block_size: 1024
trainer:
tracker:
type: wandb
entity: "understanding-sam"
project: "levanter"
tags: ["pile", "llama"]

mp: p=f32,c=bfloat16
train_batch_size: 1024
num_train_steps: 50000
steps_per_eval: 1000
per_device_eval_parallelism: 64
tensor_parallel_axes: ["mlp", "heads"]
fsdp_axis: "embed"
batch_axis: "batch"
optimizer:
learning_rate: 6E-4
weight_decay: 0.1
min_lr_ratio: 0.1

0 comments on commit b429eac

Please sign in to comment.