Skip to content

Commit

Permalink
feat: allow using custom certificates for serving
Browse files Browse the repository at this point in the history
The model serving endpoint for the judge model does not always have
verified certificates, sometimes they are self-signed. The communication
will be encrypted but the certificate security chain won't be valid. Now
we have a new `--judge-serving-model-ca-cert` flag that allows use
to use custom certificates when interacting with the judge model serving
endpoint.
The secret that holds the judge model serving details can be amended
with a new property: `JUDGE_CA_CERT: "cm-ca-cert"` to point to the
ConfigMap that contains the custom certificates bundle.

Signed-off-by: Sébastien Han <[email protected]>
  • Loading branch information
leseb committed Oct 29, 2024
1 parent abe3dc3 commit b5dfd81
Show file tree
Hide file tree
Showing 6 changed files with 344 additions and 139 deletions.
20 changes: 20 additions & 0 deletions eval/final/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,26 @@ def run_final_eval_op(
from instructlab.eval.mt_bench import MTBenchBranchEvaluator
from instructlab.model.evaluate import qa_pairs_to_qna_to_avg_scores, sort_score

if judge_ca_cert := os.getenv("JUDGE_CA_CERT_PATH"):
import httpx
import openai

# Create a custom HTTP client
class CustomHttpClient(httpx.Client):
def __init__(self, *args, **kwargs):
# Use the custom CA certificate
kwargs.setdefault("verify", judge_ca_cert)
super().__init__(*args, **kwargs)

# Create a new OpenAI class that uses the custom HTTP client
class CustomOpenAI(openai.OpenAI):
def __init__(self, *args, **kwargs):
custom_client = CustomHttpClient()
super().__init__(http_client=custom_client, *args, **kwargs)

# Monkey patch the OpenAI class in the openai module, so that the eval lib can use it
openai.OpenAI = CustomOpenAI

print("Starting Final Eval...")

def launch_vllm(
Expand Down
20 changes: 20 additions & 0 deletions eval/mt_bench/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,26 @@ def run_mt_bench_op(
import torch
from instructlab.eval.mt_bench import MTBenchEvaluator

if judge_ca_cert := os.getenv("JUDGE_CA_CERT_PATH"):
import httpx
import openai

# Create a custom HTTP client
class CustomHttpClient(httpx.Client):
def __init__(self, *args, **kwargs):
# Use the custom CA certificate
kwargs.setdefault("verify", judge_ca_cert)
super().__init__(*args, **kwargs)

# Create a new OpenAI class that uses the custom HTTP client
class CustomOpenAI(openai.OpenAI):
def __init__(self, *args, **kwargs):
custom_client = CustomHttpClient()
super().__init__(http_client=custom_client, *args, **kwargs)

# Monkey patch the OpenAI class in the openai module, so that the eval lib can use it
openai.OpenAI = CustomOpenAI

def launch_vllm(
model_path: str, gpu_count: int, retries: int = 120, delay: int = 10
) -> tuple:
Expand Down
81 changes: 52 additions & 29 deletions pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1191,15 +1191,26 @@ deploymentSpec:
\ os\n import subprocess\n\n import torch\n from instructlab.eval.mmlu\
\ import MMLU_TASKS, MMLUBranchEvaluator\n from instructlab.eval.mt_bench\
\ import MTBenchBranchEvaluator\n from instructlab.model.evaluate import\
\ qa_pairs_to_qna_to_avg_scores, sort_score\n\n print(\"Starting Final\
\ Eval...\")\n\n def launch_vllm(\n model_path: str, gpu_count:\
\ int, retries: int = 120, delay: int = 10\n ) -> tuple:\n import\
\ subprocess\n import sys\n import time\n\n import\
\ requests\n from instructlab.model.backends.common import free_tcp_ipv4_port\n\
\n free_port = free_tcp_ipv4_port(\"127.0.0.1\")\n port =\
\ str(free_port)\n vllm_server = f\"http://127.0.0.1:{port}/v1\"\n\
\n command = [\n sys.executable,\n \"-m\",\n\
\ \"vllm.entrypoints.openai.api_server\",\n \"--port\"\
\ qa_pairs_to_qna_to_avg_scores, sort_score\n\n if judge_ca_cert := os.getenv(\"\
JUDGE_CA_CERT_PATH\"):\n import httpx\n import openai\n\n\
\ # Create a custom HTTP client\n class CustomHttpClient(httpx.Client):\n\
\ def __init__(self, *args, **kwargs):\n # Use\
\ the custom CA certificate\n kwargs.setdefault(\"verify\"\
, judge_ca_cert)\n super().__init__(*args, **kwargs)\n\n\
\ # Create a new OpenAI class that uses the custom HTTP client\n\
\ class CustomOpenAI(openai.OpenAI):\n def __init__(self,\
\ *args, **kwargs):\n custom_client = CustomHttpClient()\n\
\ super().__init__(http_client=custom_client, *args, **kwargs)\n\
\n # Monkey patch the OpenAI class in the openai module, so that\
\ the eval lib can use it\n openai.OpenAI = CustomOpenAI\n\n print(\"\
Starting Final Eval...\")\n\n def launch_vllm(\n model_path: str,\
\ gpu_count: int, retries: int = 120, delay: int = 10\n ) -> tuple:\n\
\ import subprocess\n import sys\n import time\n\n\
\ import requests\n from instructlab.model.backends.common\
\ import free_tcp_ipv4_port\n\n free_port = free_tcp_ipv4_port(\"\
127.0.0.1\")\n port = str(free_port)\n vllm_server = f\"http://127.0.0.1:{port}/v1\"\
\n\n command = [\n sys.executable,\n \"-m\"\
,\n \"vllm.entrypoints.openai.api_server\",\n \"--port\"\
,\n port,\n \"--model\",\n model_path,\n\
\ ]\n if gpu_count > 0:\n command += [\n \
\ \"--tensor-parallel-size\",\n str(gpu_count),\n\
Expand Down Expand Up @@ -1446,26 +1457,38 @@ deploymentSpec:
\ Optional[str] = None,\n device: str = None,\n best_score_file: Optional[str]\
\ = None,\n) -> NamedTuple(\"outputs\", best_model=str, best_score=float):\n\
\ import json\n import os\n import subprocess\n\n import torch\n\
\ from instructlab.eval.mt_bench import MTBenchEvaluator\n\n def launch_vllm(\n\
\ model_path: str, gpu_count: int, retries: int = 120, delay: int\
\ = 10\n ) -> tuple:\n import subprocess\n import sys\n\
\ import time\n\n import requests\n from instructlab.model.backends.common\
\ import free_tcp_ipv4_port\n\n free_port = free_tcp_ipv4_port(\"\
127.0.0.1\")\n port = str(free_port)\n vllm_server = f\"http://127.0.0.1:{port}/v1\"\
\n\n command = [\n sys.executable,\n \"-m\"\
,\n \"vllm.entrypoints.openai.api_server\",\n \"--port\"\
,\n port,\n \"--model\",\n model_path,\n\
\ ]\n if gpu_count > 0:\n command += [\n \
\ \"--tensor-parallel-size\",\n str(gpu_count),\n\
\ ]\n\n process = subprocess.Popen(args=command)\n\n \
\ print(f\"Waiting for vLLM server to start at {vllm_server}...\"\
)\n\n for attempt in range(retries):\n try:\n \
\ response = requests.get(f\"{vllm_server}/models\")\n \
\ if response.status_code == 200:\n print(f\"vLLM\
\ server is up and running at {vllm_server}.\")\n return\
\ process, vllm_server\n except requests.ConnectionError:\n \
\ pass\n\n print(\n f\"Server not\
\ available yet, retrying in {delay} seconds (Attempt {attempt + 1}/{retries})...\"\
\ from instructlab.eval.mt_bench import MTBenchEvaluator\n\n if judge_ca_cert\
\ := os.getenv(\"JUDGE_CA_CERT_PATH\"):\n import httpx\n import\
\ openai\n\n # Create a custom HTTP client\n class CustomHttpClient(httpx.Client):\n\
\ def __init__(self, *args, **kwargs):\n # Use\
\ the custom CA certificate\n kwargs.setdefault(\"verify\"\
, judge_ca_cert)\n super().__init__(*args, **kwargs)\n\n\
\ # Create a new OpenAI class that uses the custom HTTP client\n\
\ class CustomOpenAI(openai.OpenAI):\n def __init__(self,\
\ *args, **kwargs):\n custom_client = CustomHttpClient()\n\
\ super().__init__(http_client=custom_client, *args, **kwargs)\n\
\n # Monkey patch the OpenAI class in the openai module, so that\
\ the eval lib can use it\n openai.OpenAI = CustomOpenAI\n\n def\
\ launch_vllm(\n model_path: str, gpu_count: int, retries: int =\
\ 120, delay: int = 10\n ) -> tuple:\n import subprocess\n \
\ import sys\n import time\n\n import requests\n \
\ from instructlab.model.backends.common import free_tcp_ipv4_port\n\n\
\ free_port = free_tcp_ipv4_port(\"127.0.0.1\")\n port = str(free_port)\n\
\ vllm_server = f\"http://127.0.0.1:{port}/v1\"\n\n command\
\ = [\n sys.executable,\n \"-m\",\n \"\
vllm.entrypoints.openai.api_server\",\n \"--port\",\n \
\ port,\n \"--model\",\n model_path,\n \
\ ]\n if gpu_count > 0:\n command += [\n \
\ \"--tensor-parallel-size\",\n str(gpu_count),\n \
\ ]\n\n process = subprocess.Popen(args=command)\n\n \
\ print(f\"Waiting for vLLM server to start at {vllm_server}...\")\n\n\
\ for attempt in range(retries):\n try:\n \
\ response = requests.get(f\"{vllm_server}/models\")\n \
\ if response.status_code == 200:\n print(f\"vLLM server\
\ is up and running at {vllm_server}.\")\n return process,\
\ vllm_server\n except requests.ConnectionError:\n \
\ pass\n\n print(\n f\"Server not available\
\ yet, retrying in {delay} seconds (Attempt {attempt + 1}/{retries})...\"\
\n )\n time.sleep(delay)\n\n raise RuntimeError(\n\
\ f\"Failed to start vLLM server at {vllm_server} after {retries}\
\ retries.\"\n )\n\n def shutdown_vllm(process: subprocess.Popen,\
Expand Down
8 changes: 8 additions & 0 deletions standalone/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,7 @@ evaluation
* `--judge-serving-model-name`: The name of the model to use for evaluation. **Optional**
* `--judge-serving-model-api-key`: The API key for the model to evaluate. `JUDGE_SERVING_MODEL_API_KEY`
environment variable can be used as well. **Optional**
* `--judge-serving-model-ca-cert`: Name of the Kubernetes ConfigMap containing the serving model CA cert. **Optional**
* `--judge-serving-model-secret`: The name of the Kubernetes secret containing the judge serving model
API key. **Optional** - If not provided, the script will expect the provided CLI options to evaluate the model.
* `--force-pull`: Force pull the data (sdg data, model and taxonomy) from the object store even if it already
Expand Down Expand Up @@ -513,9 +514,16 @@ The list of all mandatory keys:
* `JUDGE_ENDPOINT`: Serving endpoint for evaluation - **Required**
* `JUDGE_NAME`: The name of the model to use for evaluation - **Required**

Optional keys:

* `JUDGE_CA_CERT`: Whether to verify TLS for the evaluation endpoint (default: false) - **Optional**

> [!WARNING]
> Mind the upper case of the keys, as the script expects them to be in upper case.

> [!WARNING]
> Make sure the endpoint URL ends with /v1

#### Running the Script Without Kubernetes Secret

Alternatively, you can provide the necessary information directly via CLI options or environment,
Expand Down
Loading

0 comments on commit b5dfd81

Please sign in to comment.