Training Tutorial of bunny-stablelm-2-eva-lora
#! /bin/bash
MODEL_TYPE=stablelm-2
OUTPUT_DIR=bunny-$MODEL_TYPE -pretrain
mkdir -p ./checkpoints-pretrain/$OUTPUT_DIR
deepspeed bunny/train/train.py \
--deepspeed ./script/deepspeed/zero2.json \
--model_name_or_path /path/to/stabilityai/stablelm-2-1_6b \
--model_type $MODEL_TYPE \
--version plain \
--data_path ./data/pretrain/bunny_pretrain_laion_2m.json \
--image_folder ./data/pretrain/images \
--vision_tower /path/to/EVA02_CLIP_L_336_psz14_s6B \
--mm_projector_type mlp2x_gelu \
--tune_mm_mlp_adapter True \
--image_aspect_ratio square \
--bf16 True \
--output_dir ./checkpoints-pretrain/$OUTPUT_DIR \
--num_train_epochs 1 \
--per_device_train_batch_size 8 \
--per_device_eval_batch_size 4 \
--gradient_accumulation_steps 4 \
--evaluation_strategy " no" \
--save_strategy " steps" \
--save_steps 24000 \
--save_total_limit 1 \
--learning_rate 1e-3 \
--weight_decay 0. \
--warmup_ratio 0.03 \
--lr_scheduler_type " cosine" \
--logging_steps 1 \
--tf32 True \
--model_max_length 2048 \
--gradient_checkpointing True \
--dataloader_num_workers 4 \
--lazy_preprocess True \
--report_to none | tee 2>&1 ./checkpoints-pretrain/$OUTPUT_DIR /log.txt
Visual Instruction Tuning
#! /bin/bash
MODEL_TYPE=stablelm-2
PRETRAIN_DIR=bunny-$MODEL_TYPE -pretrain
OUTPUT_DIR=bunny-lora-$MODEL_TYPE
mkdir -p ./checkpoints-$MODEL_TYPE /$OUTPUT_DIR
deepspeed bunny/train/train.py \
--lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 \
--deepspeed ./script/deepspeed/zero3.json \
--model_name_or_path /path/to/stabilityai/stablelm-2-1_6b \
--model_type $MODEL_TYPE \
--version bunny \
--data_path ./data/finetune/bunny_695k.json \
--image_folder ./data/finetune/images \
--vision_tower /path/to/EVA02_CLIP_L_336_psz14_s6B \
--pretrain_mm_mlp_adapter ./checkpoints-pretrain/$PRETRAIN_DIR /mm_projector.bin \
--mm_projector_type mlp2x_gelu \
--image_aspect_ratio pad \
--group_by_modality_length False \
--bf16 True \
--output_dir ./checkpoints-$MODEL_TYPE /$OUTPUT_DIR \
--num_train_epochs 1 \
--per_device_train_batch_size 8 \
--per_device_eval_batch_size 4 \
--gradient_accumulation_steps 2 \
--evaluation_strategy " no" \
--save_strategy " steps" \
--save_steps 500 \
--save_total_limit 1 \
--learning_rate 2e-4 \
--weight_decay 0. \
--warmup_ratio 0.03 \
--lr_scheduler_type " cosine" \
--logging_steps 1 \
--tf32 True \
--model_max_length 2048 \
--gradient_checkpointing True \
--dataloader_num_workers 4 \
--lazy_preprocess True \
--report_to none | tee 2>&1 ./checkpoints-$MODEL_TYPE /$OUTPUT_DIR /log.txt