Skip to content

Commit

Permalink
update trainer
Browse files Browse the repository at this point in the history
  • Loading branch information
ahmeda14960 committed Sep 11, 2024
1 parent ca31208 commit d09a888
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 6 deletions.
2 changes: 1 addition & 1 deletion config/olmo_7b_debug.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ trainer:
until: 40000
tracker:
type: wandb
project: "trace-train"
project: "marin"
tags: ["pile", "olmo", "web_comparison"]
mp: p=f32,c=bfloat16
model_axis_size: 1
Expand Down
11 changes: 6 additions & 5 deletions config/olmo_sft.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,16 @@ data:
cache_dir: "gs://marin-data/tokenized/tuluv2/"
tokenizer: "allenai/OLMo-1B"
model: # 7B class model
type: llama
type: olmo
# seq_len: 2048
# hidden_dim: 4096
# intermediate_dim: 11008
# num_layers: 32
# num_heads: 32
# num_kv_heads: 32
# use_flash_attention: True
use_bias: false
use_layer_norm_weight: false
# use_bias: false
# use_layer_norm_weight: false
initialize_from_hf: "allenai/OLMo-7B-0724-hf"
use_hf_model_config: true
#flash_attention_block_size: 1024
Expand All @@ -34,12 +34,13 @@ trainer:
fsdp_axis: "embed"
batch_axis: "batch"
optimizer:
learning_rate: 4E-4
learning_rate: 2E-6
weight_decay: 0.0
min_lr_ratio: 0.1
warmup: 0.01
warmup: 0.03

# OLMO SFT config below
# effective bsz is 8 * 16 = 128
# model_name_or_path: allenai/OLMo-7B-hf
# model_revision: main
# use_flash_attn: true
Expand Down

0 comments on commit d09a888

Please sign in to comment.