diff --git a/extras/smoke-test.sh b/extras/smoke-test.sh new file mode 100644 index 0000000000000..f03edea4f6192 --- /dev/null +++ b/extras/smoke-test.sh @@ -0,0 +1,73 @@ +#!/bin/bash +set -uxo pipefail + +# we will need to download test models off HF hub +unset HF_HUB_OFFLINE + +export HTTP_PORT=8080 +export GRPC_PORT=8033 + + +function wait_for(){ + trap "" ERR # we don't care about errors in this function + + name=$1 + shift + command=$@ + + max_retries=10 + until $command ; do + echo "Waiting for $name to be up (retries_left=$max_retries)..." + sleep 30 + max_retries=$((max_retries-1)) + if [[ max_retries -le 0 ]]; then + echo "Timed out waiting for $name server" >&2 + exit 1 + fi + done +} + +# stop the server on any errors +trap 'kill -9 $server_pid && exit 1' ERR + +# spin up the OpenAPI server in the background +python -m vllm.entrypoints.openai.api_server --port $HTTP_PORT & +server_pid=$! +server_url="http://localhost:$HTTP_PORT" + +wait_for "http server" curl --verbose --connect-timeout 1 --fail-with-body --no-progress-meter "${server_url}/health" + +curl -v --no-progress-meter --fail-with-body \ + "${server_url}/v1/models" | python -m json.tool || \ + +curl -v --no-progress-meter --fail-with-body \ + --header "Content-Type: application/json" \ + --data '{ + "prompt": "A red fedora symbolizes ", + "model": "facebook/opt-125m" +}' \ + "${server_url}/v1/completions" | python -m json.tool + +echo "OpenAI API success" && kill -9 $server_pid + + +# spin up the grpc server in the background +python -m vllm_tgis_adapter --grpc-port $GRPC_PORT & +server_pid=$! +server_url="localhost:$GRPC_PORT" + +# get grpcurl +curl --no-progress-meter --location --output /tmp/grpcurl.tar.gz \ + https://github.com/fullstorydev/grpcurl/releases/download/v1.9.1/grpcurl_1.9.1_linux_x86_64.tar.gz +tar -xf /tmp/grpcurl.tar.gz --directory /tmp + +wait_for "grpc_server" grpc_healthcheck # healthcheck is part of vllm_tgis_adapter + +/tmp/grpcurl -v \ + -plaintext \ + -use-reflection \ + -d '{ "requests": [{"text": "A red fedora symbolizes "}]}' \ + "$server_url" \ + fmaas.GenerationService/Generate + +echo "GRPC API success" && kill -9 $server_pid diff --git a/extras/unit-tests.sh b/extras/unit-tests.sh new file mode 100644 index 0000000000000..4739fb6d65ea3 --- /dev/null +++ b/extras/unit-tests.sh @@ -0,0 +1,43 @@ +#!/bin/bash +# partially copied from from .buildkite/test-pipeline.yml + +cd tests || exit 1 + +# we will need to download test models off HF hub +unset HF_HUB_OFFLINE + +# basic correctness +pytest -v -s test_regression.py +pytest -v -s async_engine +VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s basic_correctness/test_basic_correctness.py +VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_basic_correctness.py +VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s basic_correctness/test_chunked_prefill.py +VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_chunked_prefill.py +VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1 pytest -v -s basic_correctness/test_preemption.py + +# core +pytest -v -s core + +# note: distributed tests are disabled + +# engine tests +pytest -v -s engine tokenization test_sequence.py test_config.py test_logger.py +# entrypoint +pytest -v -s entrypoints -m openai + +#inputs (note: multimodal tests are skipped) +pytest -v -s test_inputs.py + +#models +pytest -v -s models -m \"not vlm\" + +# misc +pytest -v -s prefix_caching +pytest -v -s samplers +pytest -v -s test_logits_processor.py +pytest -v -s models -m \"not vlm\" +pytest -v -s worker +VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s spec_decode +# pytest -v -s tensorizer_loader # disabled: requires libsodium +pytest -v -s metrics +pytest -v -s quantization