feat: allow using custom certificates for serving

The model serving endpoint for the judge model does not always have verified certificates, sometimes they are self-signed. The communication will be encrypted but the certificate security chain won't be valid. Now we have a new `--judge-serving-model-ca-cert` flag that allows use to use custom certificates when interacting with the judge model serving endpoint. The secret that holds the judge model serving details can be amended with a new property: `JUDGE_CA_CERT: "cm-ca-cert"` to point to the ConfigMap that contains the custom certificates bundle. Signed-off-by: Sébastien Han <[email protected]>
opendatahub-io · Oct 29, 2024 · b5dfd81 · b5dfd81
1 parent abe3dc3
commit b5dfd81
Show file tree

Hide file tree

Showing 6 changed files with 344 additions and 139 deletions.
diff --git a/eval/final/components.py b/eval/final/components.py
@@ -39,6 +39,26 @@ def run_final_eval_op(
     from instructlab.eval.mt_bench import MTBenchBranchEvaluator
     from instructlab.model.evaluate import qa_pairs_to_qna_to_avg_scores, sort_score
 
+    if judge_ca_cert := os.getenv("JUDGE_CA_CERT_PATH"):
+        import httpx
+        import openai
+
+        # Create a custom HTTP client
+        class CustomHttpClient(httpx.Client):
+            def __init__(self, *args, **kwargs):
+                # Use the custom CA certificate
+                kwargs.setdefault("verify", judge_ca_cert)
+                super().__init__(*args, **kwargs)
+
+        # Create a new OpenAI class that uses the custom HTTP client
+        class CustomOpenAI(openai.OpenAI):
+            def __init__(self, *args, **kwargs):
+                custom_client = CustomHttpClient()
+                super().__init__(http_client=custom_client, *args, **kwargs)
+
+        # Monkey patch the OpenAI class in the openai module, so that the eval lib can use it
+        openai.OpenAI = CustomOpenAI
+
     print("Starting Final Eval...")
 
     def launch_vllm(

diff --git a/eval/mt_bench/components.py b/eval/mt_bench/components.py
@@ -28,6 +28,26 @@ def run_mt_bench_op(
     import torch
     from instructlab.eval.mt_bench import MTBenchEvaluator
 
+    if judge_ca_cert := os.getenv("JUDGE_CA_CERT_PATH"):
+        import httpx
+        import openai
+
+        # Create a custom HTTP client
+        class CustomHttpClient(httpx.Client):
+            def __init__(self, *args, **kwargs):
+                # Use the custom CA certificate
+                kwargs.setdefault("verify", judge_ca_cert)
+                super().__init__(*args, **kwargs)
+
+        # Create a new OpenAI class that uses the custom HTTP client
+        class CustomOpenAI(openai.OpenAI):
+            def __init__(self, *args, **kwargs):
+                custom_client = CustomHttpClient()
+                super().__init__(http_client=custom_client, *args, **kwargs)
+
+        # Monkey patch the OpenAI class in the openai module, so that the eval lib can use it
+        openai.OpenAI = CustomOpenAI
+
     def launch_vllm(
         model_path: str, gpu_count: int, retries: int = 120, delay: int = 10
     ) -> tuple:

diff --git a/pipeline.yaml b/pipeline.yaml
@@ -1191,15 +1191,26 @@ deploymentSpec:
           \ os\n    import subprocess\n\n    import torch\n    from instructlab.eval.mmlu\
           \ import MMLU_TASKS, MMLUBranchEvaluator\n    from instructlab.eval.mt_bench\
           \ import MTBenchBranchEvaluator\n    from instructlab.model.evaluate import\
-          \ qa_pairs_to_qna_to_avg_scores, sort_score\n\n    print(\"Starting Final\
-          \ Eval...\")\n\n    def launch_vllm(\n        model_path: str, gpu_count:\
-          \ int, retries: int = 120, delay: int = 10\n    ) -> tuple:\n        import\
-          \ subprocess\n        import sys\n        import time\n\n        import\
-          \ requests\n        from instructlab.model.backends.common import free_tcp_ipv4_port\n\
-          \n        free_port = free_tcp_ipv4_port(\"127.0.0.1\")\n        port =\
-          \ str(free_port)\n        vllm_server = f\"http://127.0.0.1:{port}/v1\"\n\
-          \n        command = [\n            sys.executable,\n            \"-m\",\n\
-          \            \"vllm.entrypoints.openai.api_server\",\n            \"--port\"\
+          \ qa_pairs_to_qna_to_avg_scores, sort_score\n\n    if judge_ca_cert := os.getenv(\"\
+          JUDGE_CA_CERT_PATH\"):\n        import httpx\n        import openai\n\n\
+          \        # Create a custom HTTP client\n        class CustomHttpClient(httpx.Client):\n\
+          \            def __init__(self, *args, **kwargs):\n                # Use\
+          \ the custom CA certificate\n                kwargs.setdefault(\"verify\"\
+          , judge_ca_cert)\n                super().__init__(*args, **kwargs)\n\n\
+          \        # Create a new OpenAI class that uses the custom HTTP client\n\
+          \        class CustomOpenAI(openai.OpenAI):\n            def __init__(self,\
+          \ *args, **kwargs):\n                custom_client = CustomHttpClient()\n\
+          \                super().__init__(http_client=custom_client, *args, **kwargs)\n\
+          \n        # Monkey patch the OpenAI class in the openai module, so that\
+          \ the eval lib can use it\n        openai.OpenAI = CustomOpenAI\n\n    print(\"\
+          Starting Final Eval...\")\n\n    def launch_vllm(\n        model_path: str,\
+          \ gpu_count: int, retries: int = 120, delay: int = 10\n    ) -> tuple:\n\
+          \        import subprocess\n        import sys\n        import time\n\n\
+          \        import requests\n        from instructlab.model.backends.common\
+          \ import free_tcp_ipv4_port\n\n        free_port = free_tcp_ipv4_port(\"\
+          127.0.0.1\")\n        port = str(free_port)\n        vllm_server = f\"http://127.0.0.1:{port}/v1\"\
+          \n\n        command = [\n            sys.executable,\n            \"-m\"\
+          ,\n            \"vllm.entrypoints.openai.api_server\",\n            \"--port\"\
           ,\n            port,\n            \"--model\",\n            model_path,\n\
           \        ]\n        if gpu_count > 0:\n            command += [\n      \
           \          \"--tensor-parallel-size\",\n                str(gpu_count),\n\
@@ -1446,26 +1457,38 @@ deploymentSpec:
           \ Optional[str] = None,\n    device: str = None,\n    best_score_file: Optional[str]\
           \ = None,\n) -> NamedTuple(\"outputs\", best_model=str, best_score=float):\n\
           \    import json\n    import os\n    import subprocess\n\n    import torch\n\
-          \    from instructlab.eval.mt_bench import MTBenchEvaluator\n\n    def launch_vllm(\n\
-          \        model_path: str, gpu_count: int, retries: int = 120, delay: int\
-          \ = 10\n    ) -> tuple:\n        import subprocess\n        import sys\n\
-          \        import time\n\n        import requests\n        from instructlab.model.backends.common\
-          \ import free_tcp_ipv4_port\n\n        free_port = free_tcp_ipv4_port(\"\
-          127.0.0.1\")\n        port = str(free_port)\n        vllm_server = f\"http://127.0.0.1:{port}/v1\"\
-          \n\n        command = [\n            sys.executable,\n            \"-m\"\
-          ,\n            \"vllm.entrypoints.openai.api_server\",\n            \"--port\"\
-          ,\n            port,\n            \"--model\",\n            model_path,\n\
-          \        ]\n        if gpu_count > 0:\n            command += [\n      \
-          \          \"--tensor-parallel-size\",\n                str(gpu_count),\n\
-          \            ]\n\n        process = subprocess.Popen(args=command)\n\n \
-          \       print(f\"Waiting for vLLM server to start at {vllm_server}...\"\
-          )\n\n        for attempt in range(retries):\n            try:\n        \
-          \        response = requests.get(f\"{vllm_server}/models\")\n          \
-          \      if response.status_code == 200:\n                    print(f\"vLLM\
-          \ server is up and running at {vllm_server}.\")\n                    return\
-          \ process, vllm_server\n            except requests.ConnectionError:\n \
-          \               pass\n\n            print(\n                f\"Server not\
-          \ available yet, retrying in {delay} seconds (Attempt {attempt + 1}/{retries})...\"\
+          \    from instructlab.eval.mt_bench import MTBenchEvaluator\n\n    if judge_ca_cert\
+          \ := os.getenv(\"JUDGE_CA_CERT_PATH\"):\n        import httpx\n        import\
+          \ openai\n\n        # Create a custom HTTP client\n        class CustomHttpClient(httpx.Client):\n\
+          \            def __init__(self, *args, **kwargs):\n                # Use\
+          \ the custom CA certificate\n                kwargs.setdefault(\"verify\"\
+          , judge_ca_cert)\n                super().__init__(*args, **kwargs)\n\n\
+          \        # Create a new OpenAI class that uses the custom HTTP client\n\
+          \        class CustomOpenAI(openai.OpenAI):\n            def __init__(self,\
+          \ *args, **kwargs):\n                custom_client = CustomHttpClient()\n\
+          \                super().__init__(http_client=custom_client, *args, **kwargs)\n\
+          \n        # Monkey patch the OpenAI class in the openai module, so that\
+          \ the eval lib can use it\n        openai.OpenAI = CustomOpenAI\n\n    def\
+          \ launch_vllm(\n        model_path: str, gpu_count: int, retries: int =\
+          \ 120, delay: int = 10\n    ) -> tuple:\n        import subprocess\n   \
+          \     import sys\n        import time\n\n        import requests\n     \
+          \   from instructlab.model.backends.common import free_tcp_ipv4_port\n\n\
+          \        free_port = free_tcp_ipv4_port(\"127.0.0.1\")\n        port = str(free_port)\n\
+          \        vllm_server = f\"http://127.0.0.1:{port}/v1\"\n\n        command\
+          \ = [\n            sys.executable,\n            \"-m\",\n            \"\
+          vllm.entrypoints.openai.api_server\",\n            \"--port\",\n       \
+          \     port,\n            \"--model\",\n            model_path,\n       \
+          \ ]\n        if gpu_count > 0:\n            command += [\n             \
+          \   \"--tensor-parallel-size\",\n                str(gpu_count),\n     \
+          \       ]\n\n        process = subprocess.Popen(args=command)\n\n      \
+          \  print(f\"Waiting for vLLM server to start at {vllm_server}...\")\n\n\
+          \        for attempt in range(retries):\n            try:\n            \
+          \    response = requests.get(f\"{vllm_server}/models\")\n              \
+          \  if response.status_code == 200:\n                    print(f\"vLLM server\
+          \ is up and running at {vllm_server}.\")\n                    return process,\
+          \ vllm_server\n            except requests.ConnectionError:\n          \
+          \      pass\n\n            print(\n                f\"Server not available\
+          \ yet, retrying in {delay} seconds (Attempt {attempt + 1}/{retries})...\"\
           \n            )\n            time.sleep(delay)\n\n        raise RuntimeError(\n\
           \            f\"Failed to start vLLM server at {vllm_server} after {retries}\
           \ retries.\"\n        )\n\n    def shutdown_vllm(process: subprocess.Popen,\

diff --git a/standalone/README.md b/standalone/README.md
@@ -399,6 +399,7 @@ evaluation
 * `--judge-serving-model-name`: The name of the model to use for evaluation. **Optional**
 * `--judge-serving-model-api-key`: The API key for the model to evaluate. `JUDGE_SERVING_MODEL_API_KEY`
   environment variable can be used as well. **Optional**
+* `--judge-serving-model-ca-cert`: Name of the Kubernetes ConfigMap containing the serving model CA cert. **Optional**
 * `--judge-serving-model-secret`: The name of the Kubernetes secret containing the judge serving model
   API key. **Optional** - If not provided, the script will expect the provided CLI options to evaluate the model.
 * `--force-pull`: Force pull the data (sdg data, model and taxonomy) from the object store even if it already
@@ -513,9 +514,16 @@ The list of all mandatory keys:
 * `JUDGE_ENDPOINT`: Serving endpoint for evaluation - **Required**
 * `JUDGE_NAME`: The name of the model to use for evaluation - **Required**
 
+Optional keys:
+
+* `JUDGE_CA_CERT`: Whether to verify TLS for the evaluation endpoint (default: false) - **Optional**
+
 > [!WARNING]
 > Mind the upper case of the keys, as the script expects them to be in upper case.
 
+> [!WARNING]
+> Make sure the endpoint URL ends with /v1
+
 #### Running the Script Without Kubernetes Secret
 
 Alternatively, you can provide the necessary information directly via CLI options or environment,