From 9a8f0508eb7b1a9bf36909313022d14854d140b3 Mon Sep 17 00:00:00 2001 From: ltoniazzi Date: Fri, 9 Aug 2024 11:17:26 +0100 Subject: [PATCH 1/2] Add printing to check weights match torch version --- .gitignore | 3 + tests/test-lora-conversion-inference.sh | 138 ++++++++++++++++++++++++ 2 files changed, 141 insertions(+) create mode 100755 tests/test-lora-conversion-inference.sh diff --git a/.gitignore b/.gitignore index 5ae030200f897..9986ac6b19d4e 100644 --- a/.gitignore +++ b/.gitignore @@ -129,3 +129,6 @@ poetry.toml # Scripts !/scripts/install-oneapi.bat + +# Test models for lora adapters +/lora-tests diff --git a/tests/test-lora-conversion-inference.sh b/tests/test-lora-conversion-inference.sh new file mode 100755 index 0000000000000..34de2c7a5a01b --- /dev/null +++ b/tests/test-lora-conversion-inference.sh @@ -0,0 +1,138 @@ +#!/bin/bash +set -e + +# Array of models to iterate over +declare -a params=( + "Gemma2ForCausalLM 64" + "LlamaForCausalLM 64" + "Phi3ForCausalLM 64" +) + +verbose=false +if [[ "$1" == "--verbose" ]]; then + verbose=true +fi + +MODELS_REPO=lora-tests +MODELS_REPO_URL=https://huggingface.co/ggml-org/$MODELS_REPO + +# Clone the Hugging Face repository if the directory does not exist +if [ ! -d "$MODELS_REPO" ]; then + echo "Cloning the Hugging Face repository..." + git clone $MODELS_REPO_URL +else + echo "Repository already exists. Skipping clone." +fi + +# Array to store results to print +results=() + +trim_leading_whitespace() { + local input_string="$1" + echo "${input_string#"${input_string%%[![:space:]]*}"}" +} + +extract_starting_substring() { + local reference_string="$1" + local target_string="$2" + + local target_length=${#target_string} + echo "${reference_string:0:$target_length}" +} + +get_first_word() { + local input_string="$1" + read -r first_word _ <<< "$input_string" + echo "$first_word" +} + +# Load the expected strings +EXPECTED_BASE_FULL=$(cat $MODELS_REPO/data/pale_blue_dot.txt) +EXPECTED_LORA_FULL=$(cat $MODELS_REPO/data/bohemian_rhapsody.txt) +EXPECTED_BASE_FIRST_WORD=$(get_first_word "$EXPECTED_BASE_FULL") +EXPECTED_LORA_FIRST_WORD=$(get_first_word "$EXPECTED_LORA_FULL") + +run_conversion_and_inference_lora() { + local model_name=$1 + local hidden_size=$2 + + # Convert safetensors to gguf + echo "Running convert_hf_to_gguf.py for $model_name with hidden_size $hidden_size..." + python convert_hf_to_gguf.py $MODELS_REPO/$model_name/hidden_size=$hidden_size/base \ + --outfile $MODELS_REPO/$model_name/hidden_size=$hidden_size/base/Base-F32.gguf \ + --outtype f32 + + echo "Running convert_lora_to_gguf.py for $model_name with hidden_size $hidden_size..." + python3 convert_lora_to_gguf.py $MODELS_REPO/$model_name/hidden_size=$hidden_size/lora \ + --base $MODELS_REPO/$model_name/hidden_size=$hidden_size/base \ + --outtype f32 + + echo "Running llama-export-lora with lora for $model_name with hidden_size $hidden_size..." + ./llama-export-lora \ + -m $MODELS_REPO/$model_name/hidden_size=$hidden_size/base/Base-F32.gguf \ + -o $MODELS_REPO/$model_name/hidden_size=$hidden_size/base/Base-F32-lora-merged.gguf \ + --lora $MODELS_REPO/$model_name/hidden_size=$hidden_size/lora/Lora-F32-LoRA.gguf + + # Run inference + echo "Running llama-cli without lora for $model_name with hidden_size $hidden_size..." + OUTPUT_BASE=$(./llama-cli -m $MODELS_REPO/$model_name/hidden_size=$hidden_size/base/Base-F32.gguf \ + -p "$EXPECTED_BASE_FIRST_WORD" -n 50 --seed 42 --temp 0) + + echo "Running llama-cli with hot lora for $model_name with hidden_size $hidden_size..." + OUTPUT_LORA_HOT=$(./llama-cli -m $MODELS_REPO/$model_name/hidden_size=$hidden_size/base/Base-F32.gguf \ + --lora $MODELS_REPO/$model_name/hidden_size=$hidden_size/lora/Lora-F32-LoRA.gguf \ + -p "$EXPECTED_LORA_FIRST_WORD" -n 50 --seed 42 --temp 0) + + echo "Running llama-cli with merged lora for $model_name with hidden_size $hidden_size..." + OUTPUT_LORA_MERGED=$(./llama-cli -m $MODELS_REPO/$model_name/hidden_size=$hidden_size/base/Base-F32-lora-merged.gguf \ + -p "$EXPECTED_LORA_FIRST_WORD" -n 50 --seed 42 --temp 0) + + # Remove any initial white space + OUTPUT_BASE=$(trim_leading_whitespace "$OUTPUT_BASE") + OUTPUT_LORA_HOT=$(trim_leading_whitespace "$OUTPUT_LORA_HOT") + OUTPUT_LORA_MERGED=$(trim_leading_whitespace "$OUTPUT_LORA_MERGED") + # Extract the corresponding substring from full string + EXPECTED_BASE=$(extract_starting_substring "$EXPECTED_BASE_FULL" "$OUTPUT_BASE") + EXPECTED_LORA=$(extract_starting_substring "$EXPECTED_LORA_FULL" "$OUTPUT_LORA_HOT") + + # Assert output equals the expected output + if [[ "$OUTPUT_BASE" != "$EXPECTED_BASE" ]]; then + echo "Error: $model_name OUTPUT_BASE does not start with the expected string." + echo -e "Out=$OUTPUT_BASE\n\nExp=$EXPECTED_BASE" + exit 1 + fi + if [[ "$OUTPUT_LORA_HOT" != "$EXPECTED_LORA" ]]; then + echo "Error: $model_name OUTPUT_LORA_HOT does not start with the expected string." + echo -e "Out=$OUTPUT_LORA_HOT\n\nExp=$EXPECTED_LORA" + exit 1 + fi + if [[ "$OUTPUT_LORA_MERGED" != "$EXPECTED_LORA" ]]; then + echo "Error: $model_name OUTPUT_LORA_MERGED does not start with the expected string." + echo -e "Out=$OUTPUT_LORA_MERGED\n\nExp=$EXPECTED_LORA" + exit 1 + fi + + # Store the results + results+=(" + \n\033[1mResults for $model_name with hidden_size $hidden_size:\033[0m + \n\033[32m • Base:\n$OUTPUT_BASE + \n\033[34m • Lora hot:\n$OUTPUT_LORA_HOT + \n\033[36m • Lora merged:\n$OUTPUT_LORA_MERGED + \n \033[0m + ") + + echo "All tests passed for $model_name with hidden_size $hidden_size!" +} + +# Run test for each model +for param in "${params[@]}"; do + run_conversion_and_inference_lora $param +done + +# Print results +if [ "$verbose" = true ]; then + echo -e "\n\033[1mSummary of All Results:\033[0m" + for result in "${results[@]}"; do + echo -e "$result" + done +fi From d6f7b8f68762e5d211b814373dbb72dce0d6fa46 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Sun, 18 Aug 2024 11:43:48 +0200 Subject: [PATCH 2/2] minor code style changes --- tests/test-lora-conversion-inference.sh | 45 +++++++++++++------------ 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/tests/test-lora-conversion-inference.sh b/tests/test-lora-conversion-inference.sh index 34de2c7a5a01b..c05c8e1878065 100755 --- a/tests/test-lora-conversion-inference.sh +++ b/tests/test-lora-conversion-inference.sh @@ -8,11 +8,6 @@ declare -a params=( "Phi3ForCausalLM 64" ) -verbose=false -if [[ "$1" == "--verbose" ]]; then - verbose=true -fi - MODELS_REPO=lora-tests MODELS_REPO_URL=https://huggingface.co/ggml-org/$MODELS_REPO @@ -56,36 +51,43 @@ run_conversion_and_inference_lora() { local model_name=$1 local hidden_size=$2 + echo -e "\n\n-------- RUNNING TEST FOR MODEL $model_name --------\n\n" + # Convert safetensors to gguf echo "Running convert_hf_to_gguf.py for $model_name with hidden_size $hidden_size..." python convert_hf_to_gguf.py $MODELS_REPO/$model_name/hidden_size=$hidden_size/base \ - --outfile $MODELS_REPO/$model_name/hidden_size=$hidden_size/base/Base-F32.gguf \ - --outtype f32 + --outfile $MODELS_REPO/$model_name/hidden_size=$hidden_size/base/Base-F32.gguf \ + --outtype f32 + echo -e "\n\n---------------------------\n\n" echo "Running convert_lora_to_gguf.py for $model_name with hidden_size $hidden_size..." python3 convert_lora_to_gguf.py $MODELS_REPO/$model_name/hidden_size=$hidden_size/lora \ - --base $MODELS_REPO/$model_name/hidden_size=$hidden_size/base \ - --outtype f32 + --base $MODELS_REPO/$model_name/hidden_size=$hidden_size/base \ + --outtype f32 + echo -e "\n\n---------------------------\n\n" echo "Running llama-export-lora with lora for $model_name with hidden_size $hidden_size..." ./llama-export-lora \ - -m $MODELS_REPO/$model_name/hidden_size=$hidden_size/base/Base-F32.gguf \ - -o $MODELS_REPO/$model_name/hidden_size=$hidden_size/base/Base-F32-lora-merged.gguf \ - --lora $MODELS_REPO/$model_name/hidden_size=$hidden_size/lora/Lora-F32-LoRA.gguf + -m $MODELS_REPO/$model_name/hidden_size=$hidden_size/base/Base-F32.gguf \ + -o $MODELS_REPO/$model_name/hidden_size=$hidden_size/base/Base-F32-lora-merged.gguf \ + --lora $MODELS_REPO/$model_name/hidden_size=$hidden_size/lora/Lora-F32-LoRA.gguf # Run inference + echo -e "\n\n---------------------------\n\n" echo "Running llama-cli without lora for $model_name with hidden_size $hidden_size..." OUTPUT_BASE=$(./llama-cli -m $MODELS_REPO/$model_name/hidden_size=$hidden_size/base/Base-F32.gguf \ - -p "$EXPECTED_BASE_FIRST_WORD" -n 50 --seed 42 --temp 0) + -p "$EXPECTED_BASE_FIRST_WORD" -n 50 --seed 42 --temp 0) + echo -e "\n\n---------------------------\n\n" echo "Running llama-cli with hot lora for $model_name with hidden_size $hidden_size..." OUTPUT_LORA_HOT=$(./llama-cli -m $MODELS_REPO/$model_name/hidden_size=$hidden_size/base/Base-F32.gguf \ - --lora $MODELS_REPO/$model_name/hidden_size=$hidden_size/lora/Lora-F32-LoRA.gguf \ - -p "$EXPECTED_LORA_FIRST_WORD" -n 50 --seed 42 --temp 0) + --lora $MODELS_REPO/$model_name/hidden_size=$hidden_size/lora/Lora-F32-LoRA.gguf \ + -p "$EXPECTED_LORA_FIRST_WORD" -n 50 --seed 42 --temp 0) + echo -e "\n\n---------------------------\n\n" echo "Running llama-cli with merged lora for $model_name with hidden_size $hidden_size..." OUTPUT_LORA_MERGED=$(./llama-cli -m $MODELS_REPO/$model_name/hidden_size=$hidden_size/base/Base-F32-lora-merged.gguf \ - -p "$EXPECTED_LORA_FIRST_WORD" -n 50 --seed 42 --temp 0) + -p "$EXPECTED_LORA_FIRST_WORD" -n 50 --seed 42 --temp 0) # Remove any initial white space OUTPUT_BASE=$(trim_leading_whitespace "$OUTPUT_BASE") @@ -130,9 +132,8 @@ for param in "${params[@]}"; do done # Print results -if [ "$verbose" = true ]; then - echo -e "\n\033[1mSummary of All Results:\033[0m" - for result in "${results[@]}"; do - echo -e "$result" - done -fi +echo -e "\n\n---------------------------\n\n" +echo -e "\n\033[1mSummary of All Results:\033[0m" +for result in "${results[@]}"; do + echo -e "$result" +done