Skip to content

Commit

Permalink
update fp8 config for reproducing the ug
Browse files Browse the repository at this point in the history
  • Loading branch information
xrsrke committed Jun 28, 2024
1 parent 3ac7655 commit e180743
Showing 1 changed file with 5 additions and 5 deletions.
10 changes: 5 additions & 5 deletions examples/config_fp8_llama.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,9 @@ model:
intermediate_size: 2048
is_llama_config: true
max_position_embeddings: 256
num_attention_heads: 16
num_attention_heads: 4
num_hidden_layers: 2
num_key_value_heads: 16
num_key_value_heads: 4
pad_token_id: null
pretraining_tp: 1
rms_norm_eps: 1.0e-05
Expand Down Expand Up @@ -119,13 +119,13 @@ optimizer:
# clip_grad: 1.0
learning_rate_scheduler:
# learning_rate: 0.0015 # note: 1/2 of pythia use this for a 400m model
learning_rate: 6.0e-4
learning_rate: 0.0006
lr_decay_starting_step: null
lr_decay_steps: null
lr_decay_style: cosine
lr_warmup_steps: 1000 # 10% warm up of total training steps
lr_warmup_style: linear
min_decay_lr: 6.0e-5
min_decay_lr: 0.00006
optimizer_factory:
adam_beta1: 0.9
adam_beta2: 0.95
Expand Down Expand Up @@ -158,7 +158,7 @@ tokens:
batch_accumulation_per_replica: 1
limit_test_batches: 0
limit_val_batches: 0
micro_batch_size: 128 # 256
micro_batch_size: 256 # 256
# micro_batch_size: 1
sequence_length: 256
train_steps: 24376
Expand Down

0 comments on commit e180743

Please sign in to comment.