Skip to content

Commit

Permalink
fix smoke tests
Browse files Browse the repository at this point in the history
Signed-off-by: NickLucche <[email protected]>
  • Loading branch information
NickLucche authored and dtrifiro committed Jan 23, 2025
1 parent 5ed86b7 commit 9e5a88d
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 9 deletions.
30 changes: 30 additions & 0 deletions extras/print_gpu_memory_stats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import torch


def print_gpu_memory_stats():
if not torch.cuda.is_available():
print("No GPU available")
return

for i in range(torch.cuda.device_count()):
device_name = torch.cuda.get_device_name(i)
# Convert to GB
total_memory = torch.cuda.get_device_properties(i).total_memory / (1024
**3)
allocated = torch.cuda.memory_allocated(i) / (1024**3)
reserved = torch.cuda.memory_reserved(i) / (1024**3)
max_allocated = torch.cuda.max_memory_allocated(i) / (1024**3)
max_reserved = torch.cuda.max_memory_reserved(i) / (1024**3)
free_memory = reserved - allocated

print(f"Device {i}: {device_name}")
print(f" Total Memory: {total_memory:.2f} GB")
print(f" Allocated Memory: {allocated:.2f} GB")
print(f" Reserved Memory: {reserved:.2f} GB")
print(f" Free Memory: {free_memory:.2f} GB")
print(f" Max Allocated: {max_allocated:.2f} GB")
print(f" Max Reserved: {max_reserved:.2f} GB")
print("-" * 40)


print_gpu_memory_stats()
26 changes: 17 additions & 9 deletions extras/smoke-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ function wait_for(){
# shellcheck disable=SC2124
command=$@

max_retries=10
max_retries=15
until $command ; do
echo "Waiting for $name to be up (retries_left=$max_retries)..."
sleep 30
sleep 20
max_retries=$((max_retries-1))
if [[ max_retries -le 0 ]]; then
echo "Timed out waiting for $name server" >&2
Expand All @@ -29,11 +29,17 @@ function wait_for(){
done
}

function gpu_memory_stats(){
# In case `nvidia-smi` is missing
script_dir=$(dirname "$(realpath "$0")")
python "$script_dir"/print_gpu_memory_stats.py
}

# stop the server on any errors
trap 'kill -9 $server_pid && exit 1' ERR
trap 'kill $server_pid && exit 1' ERR

# spin up the OpenAPI server in the background
python -m vllm.entrypoints.openai.api_server --port $HTTP_PORT &
python -m vllm.entrypoints.openai.api_server --port $HTTP_PORT --model facebook/opt-125m --enforce-eager &
server_pid=$!
server_url="http://localhost:$HTTP_PORT"

Expand All @@ -50,14 +56,14 @@ curl -v --no-progress-meter --fail-with-body \
}' \
"${server_url}/v1/completions" | python -m json.tool

echo "OpenAI API success" && kill -9 $server_pid

# Wait for gracious termination to clean up gpu memory
echo "OpenAI API success" && kill $server_pid && wait $server_pid
gpu_memory_stats

# spin up the grpc server in the background
python -m vllm_tgis_adapter --grpc-port $GRPC_PORT &
python -m vllm_tgis_adapter --grpc-port $GRPC_PORT --model facebook/opt-125m --enforce-eager &
server_pid=$!
server_url="localhost:$GRPC_PORT"

# get grpcurl
curl --no-progress-meter --location --output /tmp/grpcurl.tar.gz \
https://github.com/fullstorydev/grpcurl/releases/download/v1.9.1/grpcurl_1.9.1_linux_x86_64.tar.gz
Expand All @@ -72,4 +78,6 @@ wait_for "grpc_server" grpc_healthcheck # healthcheck is part of vllm_tgis_adapt
"$server_url" \
fmaas.GenerationService/Generate

echo "GRPC API success" && kill -9 $server_pid
# Wait for gracious termination to clean up gpu memory
echo "GRPC API success" && kill $server_pid && wait $server_pid
gpu_memory_stats

0 comments on commit 9e5a88d

Please sign in to comment.