Skip to content

Commit

Permalink
add smoke/unit tests scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
dtrifiro committed Jun 19, 2024
1 parent 21c4422 commit 2e69ab7
Show file tree
Hide file tree
Showing 2 changed files with 116 additions and 0 deletions.
73 changes: 73 additions & 0 deletions extras/smoke-test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
#!/bin/bash
set -uxo pipefail

# we will need to download test models off HF hub
unset HF_HUB_OFFLINE

export HTTP_PORT=8080
export GRPC_PORT=8033


function wait_for(){
trap "" ERR # we don't care about errors in this function

name=$1
shift
command=$@

max_retries=10
until $command ; do
echo "Waiting for $name to be up (retries_left=$max_retries)..."
sleep 30
max_retries=$((max_retries-1))
if [[ max_retries -le 0 ]]; then
echo "Timed out waiting for $name server" >&2
exit 1
fi
done
}

# stop the server on any errors
trap 'kill -9 $server_pid && exit 1' ERR

# spin up the OpenAPI server in the background
python -m vllm.entrypoints.openai.api_server --port $HTTP_PORT &
server_pid=$!
server_url="http://localhost:$HTTP_PORT"

wait_for "http server" curl --verbose --connect-timeout 1 --fail-with-body --no-progress-meter "${server_url}/health"

curl -v --no-progress-meter --fail-with-body \
"${server_url}/v1/models" | python -m json.tool || \

curl -v --no-progress-meter --fail-with-body \
--header "Content-Type: application/json" \
--data '{
"prompt": "A red fedora symbolizes ",
"model": "facebook/opt-125m"
}' \
"${server_url}/v1/completions" | python -m json.tool

echo "OpenAI API success" && kill -9 $server_pid


# spin up the grpc server in the background
python -m vllm_tgis_adapter --grpc-port $GRPC_PORT &
server_pid=$!
server_url="localhost:$GRPC_PORT"

# get grpcurl
curl --no-progress-meter --location --output /tmp/grpcurl.tar.gz \
https://github.com/fullstorydev/grpcurl/releases/download/v1.9.1/grpcurl_1.9.1_linux_x86_64.tar.gz
tar -xf /tmp/grpcurl.tar.gz --directory /tmp

wait_for "grpc_server" grpc_healthcheck # healthcheck is part of vllm_tgis_adapter

/tmp/grpcurl -v \
-plaintext \
-use-reflection \
-d '{ "requests": [{"text": "A red fedora symbolizes "}]}' \
"$server_url" \
fmaas.GenerationService/Generate

echo "GRPC API success" && kill -9 $server_pid
43 changes: 43 additions & 0 deletions extras/unit-tests.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#!/bin/bash
# partially copied from from .buildkite/test-pipeline.yml

cd tests || exit 1

# we will need to download test models off HF hub
unset HF_HUB_OFFLINE

# basic correctness
pytest -v -s test_regression.py
pytest -v -s async_engine
VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s basic_correctness/test_basic_correctness.py
VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_basic_correctness.py
VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s basic_correctness/test_chunked_prefill.py
VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_chunked_prefill.py
VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1 pytest -v -s basic_correctness/test_preemption.py

# core
pytest -v -s core

# note: distributed tests are disabled

# engine tests
pytest -v -s engine tokenization test_sequence.py test_config.py test_logger.py
# entrypoint
pytest -v -s entrypoints -m openai

#inputs (note: multimodal tests are skipped)
pytest -v -s test_inputs.py

#models
pytest -v -s models -m \"not vlm\"

# misc
pytest -v -s prefix_caching
pytest -v -s samplers
pytest -v -s test_logits_processor.py
pytest -v -s models -m \"not vlm\"
pytest -v -s worker
VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s spec_decode
# pytest -v -s tensorizer_loader # disabled: requires libsodium
pytest -v -s metrics
pytest -v -s quantization

0 comments on commit 2e69ab7

Please sign in to comment.