Skip to content

Commit

Permalink
Merge pull request #135 from leseb/bp-128
Browse files Browse the repository at this point in the history
feat: allow using custom certificates for serving (backport #128)
  • Loading branch information
MichaelClifford authored Oct 30, 2024
2 parents abe3dc3 + 64402d5 commit d0ce418
Show file tree
Hide file tree
Showing 6 changed files with 443 additions and 179 deletions.
20 changes: 20 additions & 0 deletions eval/final/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,26 @@ def run_final_eval_op(
from instructlab.eval.mt_bench import MTBenchBranchEvaluator
from instructlab.model.evaluate import qa_pairs_to_qna_to_avg_scores, sort_score

if judge_ca_cert := os.getenv("JUDGE_CA_CERT_PATH"):
import httpx
import openai

# Create a custom HTTP client
class CustomHttpClient(httpx.Client):
def __init__(self, *args, **kwargs):
# Use the custom CA certificate
kwargs.setdefault("verify", judge_ca_cert)
super().__init__(*args, **kwargs)

# Create a new OpenAI class that uses the custom HTTP client
class CustomOpenAI(openai.OpenAI):
def __init__(self, *args, **kwargs):
custom_client = CustomHttpClient()
super().__init__(http_client=custom_client, *args, **kwargs)

# Monkey patch the OpenAI class in the openai module, so that the eval lib can use it
openai.OpenAI = CustomOpenAI

print("Starting Final Eval...")

def launch_vllm(
Expand Down
20 changes: 20 additions & 0 deletions eval/mt_bench/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,26 @@ def run_mt_bench_op(
import torch
from instructlab.eval.mt_bench import MTBenchEvaluator

if judge_ca_cert := os.getenv("JUDGE_CA_CERT_PATH"):
import httpx
import openai

# Create a custom HTTP client
class CustomHttpClient(httpx.Client):
def __init__(self, *args, **kwargs):
# Use the custom CA certificate
kwargs.setdefault("verify", judge_ca_cert)
super().__init__(*args, **kwargs)

# Create a new OpenAI class that uses the custom HTTP client
class CustomOpenAI(openai.OpenAI):
def __init__(self, *args, **kwargs):
custom_client = CustomHttpClient()
super().__init__(http_client=custom_client, *args, **kwargs)

# Monkey patch the OpenAI class in the openai module, so that the eval lib can use it
openai.OpenAI = CustomOpenAI

def launch_vllm(
model_path: str, gpu_count: int, retries: int = 120, delay: int = 10
) -> tuple:
Expand Down
81 changes: 52 additions & 29 deletions pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1191,15 +1191,26 @@ deploymentSpec:
\ os\n import subprocess\n\n import torch\n from instructlab.eval.mmlu\
\ import MMLU_TASKS, MMLUBranchEvaluator\n from instructlab.eval.mt_bench\
\ import MTBenchBranchEvaluator\n from instructlab.model.evaluate import\
\ qa_pairs_to_qna_to_avg_scores, sort_score\n\n print(\"Starting Final\
\ Eval...\")\n\n def launch_vllm(\n model_path: str, gpu_count:\
\ int, retries: int = 120, delay: int = 10\n ) -> tuple:\n import\
\ subprocess\n import sys\n import time\n\n import\
\ requests\n from instructlab.model.backends.common import free_tcp_ipv4_port\n\
\n free_port = free_tcp_ipv4_port(\"127.0.0.1\")\n port =\
\ str(free_port)\n vllm_server = f\"http://127.0.0.1:{port}/v1\"\n\
\n command = [\n sys.executable,\n \"-m\",\n\
\ \"vllm.entrypoints.openai.api_server\",\n \"--port\"\
\ qa_pairs_to_qna_to_avg_scores, sort_score\n\n if judge_ca_cert := os.getenv(\"\
JUDGE_CA_CERT_PATH\"):\n import httpx\n import openai\n\n\
\ # Create a custom HTTP client\n class CustomHttpClient(httpx.Client):\n\
\ def __init__(self, *args, **kwargs):\n # Use\
\ the custom CA certificate\n kwargs.setdefault(\"verify\"\
, judge_ca_cert)\n super().__init__(*args, **kwargs)\n\n\
\ # Create a new OpenAI class that uses the custom HTTP client\n\
\ class CustomOpenAI(openai.OpenAI):\n def __init__(self,\
\ *args, **kwargs):\n custom_client = CustomHttpClient()\n\
\ super().__init__(http_client=custom_client, *args, **kwargs)\n\
\n # Monkey patch the OpenAI class in the openai module, so that\
\ the eval lib can use it\n openai.OpenAI = CustomOpenAI\n\n print(\"\
Starting Final Eval...\")\n\n def launch_vllm(\n model_path: str,\
\ gpu_count: int, retries: int = 120, delay: int = 10\n ) -> tuple:\n\
\ import subprocess\n import sys\n import time\n\n\
\ import requests\n from instructlab.model.backends.common\
\ import free_tcp_ipv4_port\n\n free_port = free_tcp_ipv4_port(\"\
127.0.0.1\")\n port = str(free_port)\n vllm_server = f\"http://127.0.0.1:{port}/v1\"\
\n\n command = [\n sys.executable,\n \"-m\"\
,\n \"vllm.entrypoints.openai.api_server\",\n \"--port\"\
,\n port,\n \"--model\",\n model_path,\n\
\ ]\n if gpu_count > 0:\n command += [\n \
\ \"--tensor-parallel-size\",\n str(gpu_count),\n\
Expand Down Expand Up @@ -1446,26 +1457,38 @@ deploymentSpec:
\ Optional[str] = None,\n device: str = None,\n best_score_file: Optional[str]\
\ = None,\n) -> NamedTuple(\"outputs\", best_model=str, best_score=float):\n\
\ import json\n import os\n import subprocess\n\n import torch\n\
\ from instructlab.eval.mt_bench import MTBenchEvaluator\n\n def launch_vllm(\n\
\ model_path: str, gpu_count: int, retries: int = 120, delay: int\
\ = 10\n ) -> tuple:\n import subprocess\n import sys\n\
\ import time\n\n import requests\n from instructlab.model.backends.common\
\ import free_tcp_ipv4_port\n\n free_port = free_tcp_ipv4_port(\"\
127.0.0.1\")\n port = str(free_port)\n vllm_server = f\"http://127.0.0.1:{port}/v1\"\
\n\n command = [\n sys.executable,\n \"-m\"\
,\n \"vllm.entrypoints.openai.api_server\",\n \"--port\"\
,\n port,\n \"--model\",\n model_path,\n\
\ ]\n if gpu_count > 0:\n command += [\n \
\ \"--tensor-parallel-size\",\n str(gpu_count),\n\
\ ]\n\n process = subprocess.Popen(args=command)\n\n \
\ print(f\"Waiting for vLLM server to start at {vllm_server}...\"\
)\n\n for attempt in range(retries):\n try:\n \
\ response = requests.get(f\"{vllm_server}/models\")\n \
\ if response.status_code == 200:\n print(f\"vLLM\
\ server is up and running at {vllm_server}.\")\n return\
\ process, vllm_server\n except requests.ConnectionError:\n \
\ pass\n\n print(\n f\"Server not\
\ available yet, retrying in {delay} seconds (Attempt {attempt + 1}/{retries})...\"\
\ from instructlab.eval.mt_bench import MTBenchEvaluator\n\n if judge_ca_cert\
\ := os.getenv(\"JUDGE_CA_CERT_PATH\"):\n import httpx\n import\
\ openai\n\n # Create a custom HTTP client\n class CustomHttpClient(httpx.Client):\n\
\ def __init__(self, *args, **kwargs):\n # Use\
\ the custom CA certificate\n kwargs.setdefault(\"verify\"\
, judge_ca_cert)\n super().__init__(*args, **kwargs)\n\n\
\ # Create a new OpenAI class that uses the custom HTTP client\n\
\ class CustomOpenAI(openai.OpenAI):\n def __init__(self,\
\ *args, **kwargs):\n custom_client = CustomHttpClient()\n\
\ super().__init__(http_client=custom_client, *args, **kwargs)\n\
\n # Monkey patch the OpenAI class in the openai module, so that\
\ the eval lib can use it\n openai.OpenAI = CustomOpenAI\n\n def\
\ launch_vllm(\n model_path: str, gpu_count: int, retries: int =\
\ 120, delay: int = 10\n ) -> tuple:\n import subprocess\n \
\ import sys\n import time\n\n import requests\n \
\ from instructlab.model.backends.common import free_tcp_ipv4_port\n\n\
\ free_port = free_tcp_ipv4_port(\"127.0.0.1\")\n port = str(free_port)\n\
\ vllm_server = f\"http://127.0.0.1:{port}/v1\"\n\n command\
\ = [\n sys.executable,\n \"-m\",\n \"\
vllm.entrypoints.openai.api_server\",\n \"--port\",\n \
\ port,\n \"--model\",\n model_path,\n \
\ ]\n if gpu_count > 0:\n command += [\n \
\ \"--tensor-parallel-size\",\n str(gpu_count),\n \
\ ]\n\n process = subprocess.Popen(args=command)\n\n \
\ print(f\"Waiting for vLLM server to start at {vllm_server}...\")\n\n\
\ for attempt in range(retries):\n try:\n \
\ response = requests.get(f\"{vllm_server}/models\")\n \
\ if response.status_code == 200:\n print(f\"vLLM server\
\ is up and running at {vllm_server}.\")\n return process,\
\ vllm_server\n except requests.ConnectionError:\n \
\ pass\n\n print(\n f\"Server not available\
\ yet, retrying in {delay} seconds (Attempt {attempt + 1}/{retries})...\"\
\n )\n time.sleep(delay)\n\n raise RuntimeError(\n\
\ f\"Failed to start vLLM server at {vllm_server} after {retries}\
\ retries.\"\n )\n\n def shutdown_vllm(process: subprocess.Popen,\
Expand Down
9 changes: 9 additions & 0 deletions standalone/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,7 @@ evaluation
* `--judge-serving-model-name`: The name of the model to use for evaluation. **Optional**
* `--judge-serving-model-api-key`: The API key for the model to evaluate. `JUDGE_SERVING_MODEL_API_KEY`
environment variable can be used as well. **Optional**
* `--judge-serving-model-ca-cert`: Name of the Kubernetes ConfigMap containing the serving model CA cert. **Optional**
* `--judge-serving-model-secret`: The name of the Kubernetes secret containing the judge serving model
API key. **Optional** - If not provided, the script will expect the provided CLI options to evaluate the model.
* `--force-pull`: Force pull the data (sdg data, model and taxonomy) from the object store even if it already
Expand Down Expand Up @@ -513,9 +514,17 @@ The list of all mandatory keys:
* `JUDGE_ENDPOINT`: Serving endpoint for evaluation - **Required**
* `JUDGE_NAME`: The name of the model to use for evaluation - **Required**

Optional keys:

* `JUDGE_CA_CERT`: The name of ConfigMap containing the custom CA Cert - **Optional**
* `JUDGE_CA_CERT_CM_KEY`: The key of the CA Cert in the ConfigMap - **Optional**

> [!WARNING]
> Mind the upper case of the keys, as the script expects them to be in upper case.

> [!WARNING]
> Make sure the endpoint URL ends with /v1

#### Running the Script Without Kubernetes Secret

Alternatively, you can provide the necessary information directly via CLI options or environment,
Expand Down
Loading

0 comments on commit d0ce418

Please sign in to comment.