Skip to content

Commit

Permalink
new config
Browse files Browse the repository at this point in the history
  • Loading branch information
ahmeda14960 committed Sep 11, 2024
1 parent 00ecb15 commit 3fe04f8
Showing 1 changed file with 64 additions and 0 deletions.
64 changes: 64 additions & 0 deletions config/olmo_sft.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
data:
train_urls:
- "gs://marin-us-central2/documents/marin_instructv1/v1_olmo_mix/text/tulu-v2-sft-mixture-000.jsonl.gz"
- "gs://marin-us-central2/documents/marin_instructv1/v1_olmo_mix/text/tulu-v2-sft-mixture-001.jsonl.gz"
validation_urls:
- "gs://marin-us-central2/documents/marin_instructv1/v1_olmo_mix/text/tulu-v2-sft-mixture-002.jsonl.gz"
cache_dir: "gs://marin-data/tokenized/tuluv2/"
tokenizer: "allenai/OLMo-1B"
model: # 7B class model
type: olmo
# seq_len: 2048
# hidden_dim: 4096
# intermediate_dim: 11008
# num_layers: 32
# num_heads: 32
# num_kv_heads: 32
# use_flash_attention: True
# use_bias: false
# use_layer_norm_weight: false
initialize_from_hf: "allenai/OLMo-7B-0724-hf"
use_hf_model_config: true
#flash_attention_block_size: 1024
trainer:
tracker:
type: wandb
project: "marin"
tags: ["dolma", "olmo", "llama"]

mp: p=f32,c=bfloat16
train_batch_size: 64
num_train_steps: 750000 # 3,000,000,000,000 / 4,000,000 = 750,000
steps_per_eval: 1000
tensor_parallel_axes: ["mlp", "heads"]
fsdp_axis: "embed"
batch_axis: "batch"
optimizer:
learning_rate: 4E-4
weight_decay: 0.0
min_lr_ratio: 0.1
warmup: 0.01

# OLMO SFT config below
# model_name_or_path: allenai/OLMo-7B-hf
# model_revision: main
# use_flash_attn: true
# tokenizer_name: allenai/OLMo-7B-hf
# use_slow_tokenizer: false # olmo models only use fast tokenizers
# dataset_name: allenai/tulu-v2-sft-mixture-olmo-2048
# max_seq_length: 2048
# preprocessing_num_workers: 128
# per_device_train_batch_size: 1 # note, this is set up for 8 GPUs
# gradient_accumulation_steps: 16
# learning_rate: 2.0e-06
# lr_scheduler_type: linear
# warmup_ratio: 0.03
# weight_decay: 0.0
# num_train_epochs: 3
# output_dir: output/olmo_instruct/
# with_tracking: true
# report_to:
# - wandb
# logging_steps: 1
# checkpointing_steps: epoch
# add_bos: true

0 comments on commit 3fe04f8

Please sign in to comment.