diff --git a/tests/test_lora_conversion_and_inference.sh b/tests/test_lora_conversion_and_inference.sh index cd28eb92d290d8..cf3c0fbdf68bd5 100755 --- a/tests/test_lora_conversion_and_inference.sh +++ b/tests/test_lora_conversion_and_inference.sh @@ -19,6 +19,7 @@ run_conversion_and_inference_lora() { local size_matrix=$2 local model_size_mb=$3 + # Convert safetensors to gguf echo "Running convert_hf_to_gguf.py for $model_name with size $size_matrix..." python convert_hf_to_gguf.py reduce-llms-for-testing/$model_name/size=$size_matrix/base \ --outtype f32 @@ -28,6 +29,13 @@ run_conversion_and_inference_lora() { --base reduce-llms-for-testing/$model_name/size=$size_matrix/base \ --outtype f32 + echo "Running llama-export-lora with lora for $model_name with size $size_matrix and model size $model_size_mb..." + llama-export-lora \ + -m reduce-llms-for-testing/$model_name/size=$size_matrix/base/Base-$model_size_mb-F32.gguf \ + -o reduce-llms-for-testing/$model_name/size=$size_matrix/base/Base-$model_size_mb-F32-lora-merged.gguf \ + --lora reduce-llms-for-testing/$model_name/size=$size_matrix/lora/Lora-F32-LoRA.gguf + + # Run inference echo "Running llama-cli without lora for $model_name with size $size_matrix and model size $model_size_mb..." OUTPUT_BASE=$(llama-cli -m reduce-llms-for-testing/$model_name/size=$size_matrix/base/Base-$model_size_mb-F32.gguf \ -p "When forty winters shall besiege" -n 50 --seed 42) @@ -37,24 +45,17 @@ run_conversion_and_inference_lora() { --lora reduce-llms-for-testing/$model_name/size=$size_matrix/lora/Lora-F32-LoRA.gguf \ -p "I see a little silhouetto" -n 50 --seed 42) - # TODO add merge lora with lora-export and check - echo "Running llama-export-lora with lora for $model_name with size $size_matrix and model size $model_size_mb..." - llama-export-lora \ - -m reduce-llms-for-testing/$model_name/size=$size_matrix/base/Base-$model_size_mb-F32.gguf \ - -o reduce-llms-for-testing/$model_name/size=$size_matrix/base/Base-$model_size_mb-F32-lora-merged.gguf \ - --lora reduce-llms-for-testing/$model_name/size=$size_matrix/lora/Lora-F32-LoRA.gguf \ - echo "Running llama-cli with exported lora for $model_name with size $size_matrix and model size $model_size_mb..." OUTPUT_LORA_MERGED=$(llama-cli -m reduce-llms-for-testing/$model_name/size=$size_matrix/base/Base-$model_size_mb-F32-lora-merged.gguf \ -p "I see a little silhouetto" -n 50 --seed 42) # Store the results in the regular array results+=(" - \n\n\n\033[1mResults for $model_name with size $size_matrix and model size $model_size_mb:\033[0m - \n • \033[32mBase:\n $OUTPUT_BASE - \n • \033[34mLora hot:\n $OUTPUT_LORA_HOT - \n • \033[36mLora merged:\n $OUTPUT_LORA_MERGED - \n\n\n \033[0m + \n\n\033[1mResults for $model_name with size $size_matrix and model size $model_size_mb:\033[0m + \n • \033[32mBase:\n$OUTPUT_BASE + \n • \033[34mLora hot:\n$OUTPUT_LORA_HOT + \n • \033[36mLora merged:\n$OUTPUT_LORA_MERGED + \n\n \033[0m ") echo "All steps completed for $model_name with size $size_matrix and model size $model_size_mb!"