Skip to content

Commit

Permalink
Add llama-3
Browse files Browse the repository at this point in the history
  • Loading branch information
Lorenzo Toniazzi committed Aug 10, 2024
1 parent 0f3b06b commit 35d04e7
Showing 1 changed file with 28 additions and 26 deletions.
54 changes: 28 additions & 26 deletions tests/test_lora_conversion_and_inference.sh
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
#!/bin/bash
set -e

MODELS_REPO=https://huggingface.co/ltoniazzi/reduce-llms-for-testing
MODELS_REPO=reduce-llms-for-testing
MODELS_REPO_URL=https://huggingface.co/ltoniazzi/$MODELS_REPO

# Clone the Hugging Face repository if the directory does not exist
if [ ! -d "reduce-llms-for-testing" ]; then
if [ ! -d "$MODELS_REPO" ]; then
echo "Cloning the Hugging Face repository..."
git clone $MODELS_REPO
git clone $MODELS_REPO_URL
else
echo "Repository already exists. Skipping clone."
fi
Expand All @@ -17,54 +18,55 @@ results=()
run_conversion_and_inference_lora() {
local model_name=$1
local size_matrix=$2
local model_size_mb=$3
local bos_token=$3

# Convert safetensors to gguf
echo "Running convert_hf_to_gguf.py for $model_name with size $size_matrix..."
python convert_hf_to_gguf.py reduce-llms-for-testing/$model_name/size=$size_matrix/base \
python convert_hf_to_gguf.py $MODELS_REPO/$model_name/size=$size_matrix/base \
--outfile $MODELS_REPO/$model_name/size=$size_matrix/base/Base-F32.gguf \
--outtype f32

echo "Running convert_lora_to_gguf.py for $model_name with size $size_matrix..."
python3 convert_lora_to_gguf.py reduce-llms-for-testing/$model_name/size=$size_matrix/lora \
--base reduce-llms-for-testing/$model_name/size=$size_matrix/base \
python3 convert_lora_to_gguf.py $MODELS_REPO/$model_name/size=$size_matrix/lora \
--base $MODELS_REPO/$model_name/size=$size_matrix/base \
--outtype f32

echo "Running llama-export-lora with lora for $model_name with size $size_matrix and model size $model_size_mb..."
echo "Running llama-export-lora with lora for $model_name with size $size_matrix..."
llama-export-lora \
-m reduce-llms-for-testing/$model_name/size=$size_matrix/base/Base-$model_size_mb-F32.gguf \
-o reduce-llms-for-testing/$model_name/size=$size_matrix/base/Base-$model_size_mb-F32-lora-merged.gguf \
--lora reduce-llms-for-testing/$model_name/size=$size_matrix/lora/Lora-F32-LoRA.gguf
-m $MODELS_REPO/$model_name/size=$size_matrix/base/Base-F32.gguf \
-o $MODELS_REPO/$model_name/size=$size_matrix/base/Base-F32-lora-merged.gguf \
--lora $MODELS_REPO/$model_name/size=$size_matrix/lora/Lora-F32-LoRA.gguf

# Run inference
echo "Running llama-cli without lora for $model_name with size $size_matrix and model size $model_size_mb..."
OUTPUT_BASE=$(llama-cli -m reduce-llms-for-testing/$model_name/size=$size_matrix/base/Base-$model_size_mb-F32.gguf \
-p "<bos>When forty winters shall besiege" -n 50 --seed 42)
echo "Running llama-cli without lora for $model_name with size $size_matrix..."
OUTPUT_BASE=$(llama-cli -m $MODELS_REPO/$model_name/size=$size_matrix/base/Base-F32.gguf \
-p "When forty winters shall besiege" -n 50 --seed 42)

echo "Running llama-cli with lora for $model_name with size $size_matrix and model size $model_size_mb..."
OUTPUT_LORA_HOT=$(llama-cli -m reduce-llms-for-testing/$model_name/size=$size_matrix/base/Base-$model_size_mb-F32.gguf \
--lora reduce-llms-for-testing/$model_name/size=$size_matrix/lora/Lora-F32-LoRA.gguf \
-p "<bos>I see a little silhouetto" -n 50 --seed 42)
echo "Running llama-cli with lora for $model_name with size $size_matrix..."
OUTPUT_LORA_HOT=$(llama-cli -m $MODELS_REPO/$model_name/size=$size_matrix/base/Base-F32.gguf \
--lora $MODELS_REPO/$model_name/size=$size_matrix/lora/Lora-F32-LoRA.gguf \
-p "I see a little silhouetto" -n 50 --seed 42)

echo "Running llama-cli with exported lora for $model_name with size $size_matrix and model size $model_size_mb..."
OUTPUT_LORA_MERGED=$(llama-cli -m reduce-llms-for-testing/$model_name/size=$size_matrix/base/Base-$model_size_mb-F32-lora-merged.gguf \
-p "<bos>I see a little silhouetto" -n 50 --seed 42)
echo "Running llama-cli with exported lora for $model_name with size $size_matrix..."
OUTPUT_LORA_MERGED=$(llama-cli -m $MODELS_REPO/$model_name/size=$size_matrix/base/Base-F32-lora-merged.gguf \
-p "I see a little silhouetto" -n 50 --seed 42)

# Store the results in the regular array
results+=("
\n\n\033[1mResults for $model_name with size $size_matrix and model size $model_size_mb:\033[0m
\n\033[1mResults for $model_name with size $size_matrix:\033[0m
\n • \033[32mBase:\n$OUTPUT_BASE
\n • \033[34mLora hot:\n$OUTPUT_LORA_HOT
\n • \033[36mLora merged:\n$OUTPUT_LORA_MERGED
\n\n \033[0m
\n \033[0m
")

echo "All steps completed for $model_name with size $size_matrix and model size $model_size_mb!"
echo "All steps completed for $model_name with size $size_matrix!"
}

# Array of parameters to iterate over
declare -a params=(
"Gemma2ForCausalLM 64 19M"
# "AnotherModel 128 25M"
"Gemma2ForCausalLM 64 <bos>"
"LlamaForCausalLM 64 <|begin_of_text|>"
)

# Loop through each set of parameters
Expand Down

0 comments on commit 35d04e7

Please sign in to comment.