Skip to content

Commit

Permalink
ci : add model tests + script wrapper (ggerganov#4586)
Browse files Browse the repository at this point in the history
* scripts : add lib.sh and lib_test.sh

* scripts : stub out new ci-run.sh script

* scripts : switch to PascalCase for functions

This looks a little odd at first, but I find it very useful as a
convention to know if a command is part of our code vs a builtin.

* scripts : add some fancy conversion from snake_case to PascalCase

* Add venv to ci/run.sh

* Revert scripts work

* scripts : add wrapper script for local use of ci/run.sh

* Simplify .gitignore for tests, clang-tidy fixes

* Label all ctest tests

* ci : ctest uses -L main

* Attempt at writing ctest_with_model

* Update test-model-load-cancel

* ci : add ctest_with_model for debug and release

ggml-ci

* Fix gg_get_model function

ggml-ci

* got stuck on CMake

* Add get_model.cpp to tests/CMakeLists.txt

ggml-ci

* Fix README.md output for ctest_with_model

ggml-ci

* workflows : use `-L main` for all ctest

ggml-ci

* Fixes

* GG_RUN_CTEST_MODELFILE => LLAMACPP_TESTMODELFILE
* Always show warning rather than failing if model file variable is not
  set

* scripts : update usage text for ci-run.sh
  • Loading branch information
crasm authored Jan 26, 2024
1 parent 6dd3c28 commit 413e7b0
Show file tree
Hide file tree
Showing 11 changed files with 199 additions and 48 deletions.
12 changes: 6 additions & 6 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ jobs:
id: cmake_test
run: |
cd build
ctest --verbose --timeout 900
ctest -L main --verbose --timeout 900
ubuntu-latest-cmake-sanitizer:
runs-on: ubuntu-latest
Expand Down Expand Up @@ -107,7 +107,7 @@ jobs:
id: cmake_test
run: |
cd build
ctest --verbose --timeout 900
ctest -L main --verbose --timeout 900
ubuntu-latest-cmake-mpi:
runs-on: ubuntu-latest
Expand Down Expand Up @@ -141,7 +141,7 @@ jobs:
id: cmake_test
run: |
cd build
ctest --verbose
ctest -L main --verbose
# TODO: build with LLAMA_NO_METAL because test-backend-ops fail on "Apple Paravirtual device" and I don't know
# how to debug it.
Expand Down Expand Up @@ -202,7 +202,7 @@ jobs:
id: cmake_test
run: |
cd build
ctest --verbose --timeout 900
ctest -L main --verbose --timeout 900
macOS-latest-cmake-ios:
runs-on: macos-latest
Expand Down Expand Up @@ -394,7 +394,7 @@ jobs:
if: ${{ matrix.build != 'clblast' && (matrix.build != 'avx512' || env.HAS_AVX512F == '1') }} # not all machines have native AVX-512
run: |
cd build
ctest -C Release --verbose --timeout 900
ctest -L main -C Release --verbose --timeout 900
- name: Test (Intel SDE)
id: cmake_test_sde
Expand All @@ -406,7 +406,7 @@ jobs:
7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar
$sde = $(join-path $env:RUNNER_TEMP sde-external-${env:SDE_VERSION}-win/sde.exe)
cd build
& $sde -future -- ctest -C Release --verbose --timeout 900
& $sde -future -- ctest -L main -C Release --verbose --timeout 900
- name: Determine tag name
id: tag
Expand Down
19 changes: 1 addition & 18 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
lcov-report/
gcovr-report/

build*/
build*
out/
tmp/

Expand Down Expand Up @@ -89,20 +89,3 @@ examples/jeopardy/results.txt

poetry.lock
poetry.toml

# Test binaries
/tests/test-grammar-parser
/tests/test-llama-grammar
/tests/test-double-float
/tests/test-grad0
/tests/test-opt
/tests/test-quantize-fns
/tests/test-quantize-perf
/tests/test-sampling
/tests/test-tokenizer-0-llama
/tests/test-tokenizer-0-falcon
/tests/test-tokenizer-1-llama
/tests/test-tokenizer-1-bpe
/tests/test-rope
/tests/test-backend-ops
/tests/test-autorelease
7 changes: 5 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ TEST_TARGETS = \
tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt \
tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama \
tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe tests/test-rope \
tests/test-backend-ops tests/test-autorelease
tests/test-backend-ops tests/test-model-load-cancel tests/test-autorelease

# Code coverage output files
COV_TARGETS = *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report
Expand Down Expand Up @@ -748,5 +748,8 @@ tests/test-c.o: tests/test-c.c llama.h
tests/test-backend-ops: tests/test-backend-ops.cpp ggml.o $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)

tests/test-autorelease: tests/test-autorelease.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
tests/test-model-load-cancel: tests/test-model-load-cancel.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)

tests/test-autorelease: tests/test-autorelease.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
81 changes: 70 additions & 11 deletions ci/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ mkdir -p "$2"
OUT=$(realpath "$1")
MNT=$(realpath "$2")

rm -v $OUT/*.log
rm -v $OUT/*.exit
rm -v $OUT/*.md
rm -f "$OUT/*.log"
rm -f "$OUT/*.exit"
rm -f "$OUT/*.md"

sd=`dirname $0`
cd $sd/../
Expand Down Expand Up @@ -94,7 +94,7 @@ function gg_run_ctest_debug {
(time cmake -DCMAKE_BUILD_TYPE=Debug ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log

(time ctest --output-on-failure -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log
(time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log

set +e
}
Expand Down Expand Up @@ -123,9 +123,9 @@ function gg_run_ctest_release {
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log

if [ -z ${GG_BUILD_LOW_PERF} ]; then
(time ctest --output-on-failure ) 2>&1 | tee -a $OUT/${ci}-ctest.log
(time ctest --output-on-failure -L main ) 2>&1 | tee -a $OUT/${ci}-ctest.log
else
(time ctest --output-on-failure -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log
(time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log
fi

set +e
Expand All @@ -141,6 +141,61 @@ function gg_sum_ctest_release {
gg_printf '```\n'
}

function gg_get_model {
local gguf_3b="$MNT/models/open-llama/3B-v2/ggml-model-f16.gguf"
local gguf_7b="$MNT/models/open-llama/7B-v2/ggml-model-f16.gguf"
if [[ -s $gguf_3b ]]; then
echo -n "$gguf_3b"
elif [[ -s $gguf_7b ]]; then
echo -n "$gguf_7b"
else
echo >&2 "No model found. Can't run gg_run_ctest_with_model."
exit 1
fi
}

function gg_run_ctest_with_model_debug {
cd ${SRC}

local model; model=$(gg_get_model)
cd build-ci-debug
set -e
(LLAMACPP_TEST_MODELFILE="$model" time ctest --output-on-failure -L model) 2>&1 | tee -a $OUT/${ci}-ctest.log
set +e
cd ..
}

function gg_run_ctest_with_model_release {
cd ${SRC}

local model; model=$(gg_get_model)
cd build-ci-release
set -e
(LLAMACPP_TEST_MODELFILE="$model" time ctest --output-on-failure -L model) 2>&1 | tee -a $OUT/${ci}-ctest.log
set +e
cd ..
}

function gg_sum_ctest_with_model_debug {
gg_printf '### %s\n\n' "${ci}"

gg_printf 'Runs ctest with model files in debug mode\n'
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
gg_printf '```\n'
gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)"
gg_printf '```\n'
}

function gg_sum_ctest_with_model_release {
gg_printf '### %s\n\n' "${ci}"

gg_printf 'Runs ctest with model files in release mode\n'
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
gg_printf '```\n'
gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)"
gg_printf '```\n'
}

# open_llama_3b_v2

function gg_run_open_llama_3b_v2 {
Expand Down Expand Up @@ -183,8 +238,6 @@ function gg_run_open_llama_3b_v2 {

wiki_test_60="${path_wiki}/wiki.test-60.raw"

./bin/test-autorelease ${model_f16}

./bin/quantize ${model_f16} ${model_q8_0} q8_0
./bin/quantize ${model_f16} ${model_q4_0} q4_0
./bin/quantize ${model_f16} ${model_q4_1} q4_1
Expand Down Expand Up @@ -507,14 +560,18 @@ function gg_sum_open_llama_7b_v2 {
## main

if [ -z ${GG_BUILD_LOW_PERF} ]; then
# Create symlink: ./llama.cpp/models-mnt -> $MNT/models/models-mnt
rm -rf ${SRC}/models-mnt

mnt_models=${MNT}/models
mkdir -p ${mnt_models}
ln -sfn ${mnt_models} ${SRC}/models-mnt

python3 -m pip install -r ${SRC}/requirements.txt
python3 -m pip install --editable gguf-py
# Create a fresh python3 venv and enter it
python3 -m venv "$MNT/venv"
source "$MNT/venv/bin/activate"

pip install -r ${SRC}/requirements.txt --disable-pip-version-check
pip install --editable gguf-py --disable-pip-version-check
fi

ret=0
Expand All @@ -529,6 +586,8 @@ if [ -z ${GG_BUILD_LOW_PERF} ]; then
else
test $ret -eq 0 && gg_run open_llama_7b_v2
fi
test $ret -eq 0 && gg_run ctest_with_model_debug
test $ret -eq 0 && gg_run ctest_with_model_release
fi
fi

Expand Down
50 changes: 50 additions & 0 deletions scripts/ci-run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/bin/bash
set -euo pipefail
this=$(realpath "$0"); readonly this
cd "$(dirname "$this")"
shellcheck "$this"

if (( $# != 1 && $# != 2 )); then
cat >&2 <<'EOF'
usage:
ci-run.sh <tmp_dir> [<cache_dir>]
This script wraps ci/run.sh:
* If <tmp_dir> is a ramdisk, you can reduce writes to your SSD. If <tmp_dir> is not a ramdisk, keep in mind that total writes will increase by the size of <cache_dir>.
(openllama_3b_v2: quantized models are about 30GB)
* Persistent model and data files are synced to and from <cache_dir>,
excluding generated .gguf files.
(openllama_3b_v2: persistent files are about 6.6GB)
* <cache_dir> defaults to ~/.cache/llama.cpp
EOF
exit 1
fi

cd .. # => llama.cpp repo root

tmp="$1"
mkdir -p "$tmp"
tmp=$(realpath "$tmp")
echo >&2 "Using tmp=$tmp"

cache="${2-$HOME/.cache/llama.cpp}"
mkdir -p "$cache"
cache=$(realpath "$cache")
echo >&2 "Using cache=$cache"

_sync() {
local from="$1"; shift
local to="$1"; shift

echo >&2 "Syncing from $from to $to"
mkdir -p "$from" "$to"
rsync -a "$from" "$to" --delete-during "$@"
}

_sync "$(realpath .)/" "$tmp/llama.cpp"
_sync "$cache/ci-mnt/models/" "$tmp/llama.cpp/ci-mnt/models/"

cd "$tmp/llama.cpp"
bash ci/run.sh ci-out ci-mnt

_sync 'ci-mnt/models/' "$cache/ci-mnt/models/" --exclude='*.gguf' -P
2 changes: 2 additions & 0 deletions tests/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
*
!*.*
14 changes: 11 additions & 3 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,21 +1,27 @@
function(llama_build_executable source)
get_filename_component(TEST_TARGET ${source} NAME_WE)
add_executable(${TEST_TARGET} ${source})
add_executable(${TEST_TARGET} ${source} get-model.cpp)
install(TARGETS ${TEST_TARGET} RUNTIME)
target_link_libraries(${TEST_TARGET} PRIVATE common)
endfunction()

function(llama_test_executable name source)
get_filename_component(TEST_TARGET ${source} NAME_WE)
add_test(NAME ${name} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${ARGN})
set_property(TEST ${name} PROPERTY LABELS "main")
endfunction()

function(llama_build_and_test_executable source)
llama_build_and_test_executable_with_label(${source} "main")
endfunction()

function(llama_build_and_test_executable_with_label source label)
get_filename_component(TEST_TARGET ${source} NAME_WE)
add_executable(${TEST_TARGET} ${source})
add_executable(${TEST_TARGET} ${source} get-model.cpp)
install(TARGETS ${TEST_TARGET} RUNTIME)
target_link_libraries(${TEST_TARGET} PRIVATE common)
add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${ARGN})
set_property(TEST ${TEST_TARGET} PROPERTY LABELS ${label})
endfunction()

# llama_build_and_test_executable(test-double-float.cpp) # SLOW
Expand Down Expand Up @@ -49,10 +55,12 @@ llama_build_and_test_executable(test-llama-grammar.cpp)
llama_build_and_test_executable(test-grad0.cpp)
# llama_build_and_test_executable(test-opt.cpp) # SLOW
llama_build_and_test_executable(test-backend-ops.cpp)
llama_build_and_test_executable(test-autorelease.cpp)

llama_build_and_test_executable(test-rope.cpp)

llama_build_and_test_executable_with_label(test-model-load-cancel.cpp "model")
llama_build_and_test_executable_with_label(test-autorelease.cpp "model")

# dummy executable - not installed
get_filename_component(TEST_TARGET test-c.c NAME_WE)
add_executable(${TEST_TARGET} test-c.c)
Expand Down
21 changes: 21 additions & 0 deletions tests/get-model.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#include <cstdio>
#include <cstdlib>
#include <cstring>

#include "get-model.h"

char * get_model_or_exit(int argc, char *argv[]) {
char * model_path;
if (argc > 1) {
model_path = argv[1];

} else {
model_path = getenv("LLAMACPP_TEST_MODELFILE");
if (!model_path || strlen(model_path) == 0) {
fprintf(stderr, "\033[33mWARNING: No model file provided. Skipping this test. Set LLAMACPP_TEST_MODELFILE=<gguf_model_path> to silence this warning and run this test.\n\033[0m");
exit(EXIT_SUCCESS);
}
}

return model_path;
}
2 changes: 2 additions & 0 deletions tests/get-model.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#pragma once
char * get_model_or_exit(int, char*[]);
12 changes: 4 additions & 8 deletions tests/test-autorelease.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,15 @@
#include <thread>

#include "llama.h"
#include "get-model.h"

// This creates a new context inside a pthread and then tries to exit cleanly.
int main(int argc, char ** argv) {
if (argc < 2) {
printf("Usage: %s model.gguf\n", argv[0]);
return 0; // intentionally return success
}
auto * model_path = get_model_or_exit(argc, argv);

const std::string fname = argv[1];

std::thread([&fname]() {
std::thread([&model_path]() {
llama_backend_init(false);
auto * model = llama_load_model_from_file(fname.c_str(), llama_model_default_params());
auto * model = llama_load_model_from_file(model_path, llama_model_default_params());
auto * ctx = llama_new_context_with_model(model, llama_context_default_params());
llama_free(ctx);
llama_free_model(model);
Expand Down
Loading

0 comments on commit 413e7b0

Please sign in to comment.