Skip to content

Commit

Permalink
fix: eval, do not use external deps
Browse files Browse the repository at this point in the history
The pipeline excepts to have all the functions at its disposal to run.
So helper packages cannot be used.
In this case, helpers only works because the current eval is using a
custom image.
Let's move all the logic inside the component.

Signed-off-by: Sébastien Han <[email protected]>
  • Loading branch information
leseb committed Oct 10, 2024
1 parent 2b9d01e commit 2eb8e1c
Show file tree
Hide file tree
Showing 5 changed files with 229 additions and 24 deletions.
90 changes: 85 additions & 5 deletions eval/mt_bench/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,93 @@ def run_mt_bench_op(
import os

import torch
from helpers import (
VLLM_SERVER,
launch_vllm,
stop_vllm,
)
from instructlab.eval.mt_bench import MTBenchEvaluator

VLLM_SERVER = "http://localhost:8000/v1"

def launch_vllm(
model_path: str, gpu_count: int, retries: int = 120, delay: int = 5
):
import subprocess
import sys
import time

import requests

if gpu_count > 0:
command = [
sys.executable,
"-m",
"vllm.entrypoints.openai.api_server",
"--model",
model_path,
"--tensor-parallel-size",
str(gpu_count),
]
else:
command = [
sys.executable,
"-m",
"vllm.entrypoints.openai.api_server",
"--model",
model_path,
]

subprocess.Popen(args=command)

print(f"Waiting for vLLM server to start at {VLLM_SERVER}...")

for attempt in range(retries):
try:
response = requests.get(f"{VLLM_SERVER}/models")
if response.status_code == 200:
print(f"vLLM server is up and running at {VLLM_SERVER}.")
return
except requests.ConnectionError:
pass

print(
f"Server not available yet, retrying in {delay} seconds (Attempt {attempt + 1}/{retries})..."
)
time.sleep(delay)

raise RuntimeError(
f"Failed to start vLLM server at {VLLM_SERVER} after {retries} retries."
)

# This seems like excessive effort to stop the vllm process, but merely saving & killing the pid doesn't work
# Also, the base image does not include 'pkill' cmd, so can't pkill -f vllm.entrypoints.openai.api_server either
def stop_vllm():
import psutil

for process in psutil.process_iter(attrs=["pid", "name", "cmdline"]):
cmdline = process.info.get("cmdline")
if cmdline and "vllm.entrypoints.openai.api_server" in cmdline:
print(
f"Found vLLM server process with PID: {process.info['pid']}, terminating..."
)
try:
process.terminate() # Try graceful termination
process.wait(timeout=5) # Wait a bit for it to terminate
if process.is_running():
print(
f"Forcefully killing vLLM server process with PID: {process.info['pid']}"
)
process.kill() # Force kill if it's still running
print(
f"Successfully stopped vLLM server with PID: {process.info['pid']}"
)
except psutil.NoSuchProcess:
print(f"Process with PID {process.info['pid']} no longer exists.")
except psutil.AccessDenied:
print(
f"Access denied when trying to terminate process with PID {process.info['pid']}."
)
except Exception as e:
print(
f"Failed to terminate process with PID {process.info['pid']}. Error: {e}"
)

os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

gpu_available = torch.cuda.is_available()
Expand Down
1 change: 1 addition & 0 deletions pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -618,6 +618,7 @@ def change_dsl_function_to_normal_function(rendered_code: list):
"dsl.Input[dsl.Artifact]": "str",
"dsl.Output[dsl.Dataset]": "str",
"dsl.Output[dsl.Model]": "str",
"Output[Artifact]": "str",
"import kfp": "",
"from kfp import dsl": "",
"from kfp.dsl import *": "",
Expand Down
50 changes: 47 additions & 3 deletions pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1206,9 +1206,53 @@ deploymentSpec:
\ max_workers: str,\n models_list: List[str] = None,\n models_folder:\
\ Optional[str] = None,\n device: str = None,\n) -> NamedTuple(\"outputs\"\
, best_model=str, best_score=float):\n import json\n import os\n\n\
\ import torch\n from helpers import (\n VLLM_SERVER,\n \
\ launch_vllm,\n stop_vllm,\n )\n from instructlab.eval.mt_bench\
\ import MTBenchEvaluator\n\n os.environ[\"PYTORCH_CUDA_ALLOC_CONF\"\
\ import torch\n from instructlab.eval.mt_bench import MTBenchEvaluator\n\
\n VLLM_SERVER = \"http://localhost:8000/v1\"\n\n def launch_vllm(\n\
\ model_path: str, gpu_count: int, retries: int = 120, delay: int\
\ = 5\n ):\n import subprocess\n import sys\n import\
\ time\n\n import requests\n\n if gpu_count > 0:\n \
\ command = [\n sys.executable,\n \"-m\"\
,\n \"vllm.entrypoints.openai.api_server\",\n \
\ \"--model\",\n model_path,\n \"--tensor-parallel-size\"\
,\n str(gpu_count),\n ]\n else:\n \
\ command = [\n sys.executable,\n \"\
-m\",\n \"vllm.entrypoints.openai.api_server\",\n \
\ \"--model\",\n model_path,\n ]\n\n \
\ subprocess.Popen(args=command)\n\n print(f\"Waiting for vLLM\
\ server to start at {VLLM_SERVER}...\")\n\n for attempt in range(retries):\n\
\ try:\n response = requests.get(f\"{VLLM_SERVER}/models\"\
)\n if response.status_code == 200:\n \
\ print(f\"vLLM server is up and running at {VLLM_SERVER}.\")\n \
\ return\n except requests.ConnectionError:\n \
\ pass\n\n print(\n f\"Server not available\
\ yet, retrying in {delay} seconds (Attempt {attempt + 1}/{retries})...\"\
\n )\n time.sleep(delay)\n\n raise RuntimeError(\n\
\ f\"Failed to start vLLM server at {VLLM_SERVER} after {retries}\
\ retries.\"\n )\n\n # This seems like excessive effort to stop\
\ the vllm process, but merely saving & killing the pid doesn't work\n \
\ # Also, the base image does not include 'pkill' cmd, so can't pkill\
\ -f vllm.entrypoints.openai.api_server either\n def stop_vllm():\n \
\ import psutil\n\n for process in psutil.process_iter(attrs=[\"\
pid\", \"name\", \"cmdline\"]):\n cmdline = process.info.get(\"\
cmdline\")\n if cmdline and \"vllm.entrypoints.openai.api_server\"\
\ in cmdline:\n print(\n f\"Found vLLM\
\ server process with PID: {process.info['pid']}, terminating...\"\n \
\ )\n try:\n process.terminate()\
\ # Try graceful termination\n process.wait(timeout=5)\
\ # Wait a bit for it to terminate\n if process.is_running():\n\
\ print(\n f\"Forcefully\
\ killing vLLM server process with PID: {process.info['pid']}\"\n \
\ )\n process.kill() # Force kill\
\ if it's still running\n print(\n \
\ f\"Successfully stopped vLLM server with PID: {process.info['pid']}\"\
\n )\n except psutil.NoSuchProcess:\n\
\ print(f\"Process with PID {process.info['pid']} no\
\ longer exists.\")\n except psutil.AccessDenied:\n \
\ print(\n f\"Access denied when trying\
\ to terminate process with PID {process.info['pid']}.\"\n \
\ )\n except Exception as e:\n print(\n\
\ f\"Failed to terminate process with PID {process.info['pid']}.\
\ Error: {e}\"\n )\n\n os.environ[\"PYTORCH_CUDA_ALLOC_CONF\"\
] = \"expandable_segments:True\"\n\n gpu_available = torch.cuda.is_available()\n\
\ gpu_name = (\n torch.cuda.get_device_name(torch.cuda.current_device())\n\
\ if gpu_available\n else \"No GPU available\"\n )\n \
Expand Down
102 changes: 91 additions & 11 deletions standalone/standalone.py
Original file line number Diff line number Diff line change
Expand Up @@ -1366,7 +1366,7 @@ def create_eval_job(
def run_mt_bench_op(
models_path_prefix: str,
mt_bench_output: Output[Artifact],
mt_bench_output: str,
merge_system_user_message: bool,
# generate_answers,judgment uses a magic word for its mt_bench evaluator - 'auto'
# with 'auto', number of gpus allocated for serving is calculated based on environment
Expand All @@ -1380,13 +1380,93 @@ def run_mt_bench_op(
import os
import torch
from helpers import (
VLLM_SERVER,
launch_vllm,
stop_vllm,
)
from instructlab.eval.mt_bench import MTBenchEvaluator
VLLM_SERVER = "http://localhost:8000/v1"
def launch_vllm(
model_path: str, gpu_count: int, retries: int = 120, delay: int = 5
):
import subprocess
import sys
import time
import requests
if gpu_count > 0:
command = [
sys.executable,
"-m",
"vllm.entrypoints.openai.api_server",
"--model",
model_path,
"--tensor-parallel-size",
str(gpu_count),
]
else:
command = [
sys.executable,
"-m",
"vllm.entrypoints.openai.api_server",
"--model",
model_path,
]
subprocess.Popen(args=command)
print(f"Waiting for vLLM server to start at {VLLM_SERVER}...")
for attempt in range(retries):
try:
response = requests.get(f"{VLLM_SERVER}/models")
if response.status_code == 200:
print(f"vLLM server is up and running at {VLLM_SERVER}.")
return
except requests.ConnectionError:
pass
print(
f"Server not available yet, retrying in {delay} seconds (Attempt {attempt + 1}/{retries})..."
)
time.sleep(delay)
raise RuntimeError(
f"Failed to start vLLM server at {VLLM_SERVER} after {retries} retries."
)
# This seems like excessive effort to stop the vllm process, but merely saving & killing the pid doesn't work
# Also, the base image does not include 'pkill' cmd, so can't pkill -f vllm.entrypoints.openai.api_server either
def stop_vllm():
import psutil
for process in psutil.process_iter(attrs=["pid", "name", "cmdline"]):
cmdline = process.info.get("cmdline")
if cmdline and "vllm.entrypoints.openai.api_server" in cmdline:
print(
f"Found vLLM server process with PID: {process.info['pid']}, terminating..."
)
try:
process.terminate() # Try graceful termination
process.wait(timeout=5) # Wait a bit for it to terminate
if process.is_running():
print(
f"Forcefully killing vLLM server process with PID: {process.info['pid']}"
)
process.kill() # Force kill if it's still running
print(
f"Successfully stopped vLLM server with PID: {process.info['pid']}"
)
except psutil.NoSuchProcess:
print(f"Process with PID {process.info['pid']} no longer exists.")
except psutil.AccessDenied:
print(
f"Access denied when trying to terminate process with PID {process.info['pid']}."
)
except Exception as e:
print(
f"Failed to terminate process with PID {process.info['pid']}. Error: {e}"
)
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
gpu_available = torch.cuda.is_available()
Expand Down Expand Up @@ -1477,7 +1557,7 @@ def run_mt_bench_op(
init_containers = [
kubernetes.client.V1Container(
name=f"run-eval-{eval_type}",
image="quay.io/sallyom/instructlab-ocp:eval-10-8",
image="registry.stage.redhat.io/rhelai1/instructlab-nvidia-rhel9:1.2",
command=["/bin/sh", "-ce"],
args=[
PYTHON_EXECUTOR.format(
Expand All @@ -1497,7 +1577,7 @@ def run_mt_bench_op(
]
container = kubernetes.client.V1Container(
name=f"output-eval-{eval_type}-scores",
image="quay.io/sallyom/instructlab-ocp:eval-10-8",
image="registry.stage.redhat.io/rhelai1/instructlab-nvidia-rhel9:1.2",
command=["/bin/sh", "-c"],
args=[f"cat {MT_BENCH_SCORES_PATH}"],
volume_mounts=get_vol_mount(),
Expand Down Expand Up @@ -1900,9 +1980,9 @@ def decode_base64(data):
name=judge_serving_details_secret, namespace=namespace
),
string_data={
"judge_name": judge_serving_model_name,
"judge_api_key": judge_serving_model_api_key,
"judge_endpoint": judge_serving_endpoint,
"JUDGE_NAME": judge_serving_model_name,
"JUDGE_API_KEY": judge_serving_model_api_key,
"JUDGE_ENDPOINT": judge_serving_endpoint,
},
)

Expand Down
10 changes: 5 additions & 5 deletions standalone/standalone.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -1184,7 +1184,7 @@ def create_eval_job(
init_containers = [
kubernetes.client.V1Container(
name=f"run-eval-{eval_type}",
image="{{exec_run_mt_bench_op_image}}",
image="registry.stage.redhat.io/rhelai1/instructlab-nvidia-rhel9:1.2",
command=["/bin/sh", "-ce"],
args=[
PYTHON_EXECUTOR.format(
Expand All @@ -1204,7 +1204,7 @@ def create_eval_job(
]
container = kubernetes.client.V1Container(
name=f"output-eval-{eval_type}-scores",
image="{{exec_run_mt_bench_op_image}}",
image="registry.stage.redhat.io/rhelai1/instructlab-nvidia-rhel9:1.2",
command=["/bin/sh", "-c"],
args=[f"cat {MT_BENCH_SCORES_PATH}"],
volume_mounts=get_vol_mount(),
Expand Down Expand Up @@ -1607,9 +1607,9 @@ def sdg_data_fetch(
name=judge_serving_details_secret, namespace=namespace
),
string_data={
"judge_name": judge_serving_model_name,
"judge_api_key": judge_serving_model_api_key,
"judge_endpoint": judge_serving_endpoint,
"JUDGE_NAME": judge_serving_model_name,
"JUDGE_API_KEY": judge_serving_model_api_key,
"JUDGE_ENDPOINT": judge_serving_endpoint,
},
)

Expand Down

0 comments on commit 2eb8e1c

Please sign in to comment.