From caf0a38bd730ded21a8f8654cdf5153a34d82c21 Mon Sep 17 00:00:00 2001 From: Ahmed Ahmed Date: Wed, 6 Nov 2024 16:27:01 -0800 Subject: [PATCH] merge main --- examples/sft/alpaca-llama-fix.yaml | 55 ------------------------------ 1 file changed, 55 deletions(-) delete mode 100644 examples/sft/alpaca-llama-fix.yaml diff --git a/examples/sft/alpaca-llama-fix.yaml b/examples/sft/alpaca-llama-fix.yaml deleted file mode 100644 index 1590b7184..000000000 --- a/examples/sft/alpaca-llama-fix.yaml +++ /dev/null @@ -1,55 +0,0 @@ -# Model configuration -model: - activation_function: silu - gradient_checkpointing: true - hidden_dim: 4096 - initializer_range: 0.02 - intermediate_dim: 11008 - layer_norm_epsilon: 1.0e-05 - num_heads: 32 - num_kv_heads: 32 - num_layers: 32 - reference_checkpoint: meta-llama/Llama-2-7b-hf - seq_len: 4096 - type: llama - use_bias: false - use_layer_norm_weight: false - -# Training configuration -trainer: - mp: p=f32,c=bfloat16 - tracker: - type: wandb - project: "levanter-sft" - tags: ["llama", "sft"] - num_train_steps: 1218 - train_batch_size: 64 - tensor_parallel_axes: ["mlp", "heads"] - fsdp_axis: "embed" - batch_axis: "batch" - steps_per_eval: 1000 - -# Optimizer settings -optimizer: - learning_rate: 2e-5 - weight_decay: 0.0 - min_lr_ratio: 0.1 - warmup: 100 - -# Supervised data configuration -supervised_data: - cache_dir: "gs://levanter-checkpoints/marin/sft_cache/alpaca-olmo" - input_field: "instruction" - output_field: "output" - hf_dataset_name: "tatsu-lab/alpaca" # Changed from id - hf_dataset_split: "train" - name: "alpaca" # Optional metadata - tags: ["instruction-tuning"] # Optional metadata - validation_urls: [] # Empty list for no validation files - -# Additional settings -tokenizer: "allenai/OLMo-1B" -max_tune_length: 2048 -epoch: 3 - -initialize_from_hf: false \ No newline at end of file