Docker: Cuda11.8, make dependencies optional (#33)

* dependencies optional and docker * update pydantic dependency * update poetry * update tests * update double start * update unit test --------- Co-authored-by: Michael Feil <[email protected]>
michaelfeil · Nov 11, 2023 · bf9e10d · bf9e10d
1 parent 2b930f1
commit bf9e10d
Show file tree

Hide file tree

Showing 9 changed files with 289 additions and 240 deletions.
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
@@ -39,6 +39,12 @@ jobs:
           working-directory: ${{ inputs.working-directory }}
           cache-key: core
 
+      - name: Install minimal dependencies and import
+        shell: bash
+        run: |
+          poetry install --without test
+          poetry run python -c "import infinity_emb"
+
       - name: Install dependencies
         shell: bash
         run: poetry install --extras all --with test

diff --git a/libs/infinity_emb/Dockerfile b/libs/infinity_emb/Dockerfile
@@ -1,5 +1,5 @@
 # Use the Python base image
-FROM nvidia/cuda:12.2.0-base-ubuntu22.04 AS base
+FROM nvidia/cuda:11.8.0-base-ubuntu22.04 AS base
 
 ENV PYTHONUNBUFFERED=1 \
     # prevents python creating .pyc files

diff --git a/libs/infinity_emb/infinity_emb/fastapi_schemas/errors.py b/libs/infinity_emb/infinity_emb/fastapi_schemas/errors.py
@@ -1,7 +1,10 @@
 from typing import Optional
 
-from fastapi import Request
-from fastapi.responses import JSONResponse
+try:
+    from fastapi import Request
+    from fastapi.responses import JSONResponse
+except ImportError:
+    Request = None
 
 
 class OpenAIException(Exception):

diff --git a/libs/infinity_emb/infinity_emb/infinity_server.py b/libs/infinity_emb/infinity_emb/infinity_server.py
@@ -1,11 +1,6 @@
 import time
 from typing import List
 
-import typer
-import uvicorn
-from fastapi import FastAPI, responses, status
-from prometheus_fastapi_instrumentator import Instrumentator
-
 # prometheus
 import infinity_emb
 from infinity_emb.fastapi_schemas import docs, errors
@@ -59,6 +54,12 @@ def __init__(
 
     async def astart(self):
         """startup engine"""
+        if self.running:
+            raise ValueError(
+                "DoubleSpawn: already started `AsyncEmbeddingEngine`. "
+                " recommended use is via AsyncContextManager"
+                " `async with engine: ..`"
+            )
         self.running = True
         self._batch_handler = BatchHandler(
             max_batch_size=self.batch_size,
@@ -70,20 +71,14 @@ async def astart(self):
 
     async def astop(self):
         """stop engine"""
+        self._check_running()
         self.running = False
         await self._batch_handler.shutdown()
 
     async def __aenter__(self):
-        if self.running:
-            raise ValueError(
-                "DoubleSpawn: already started `AsyncEmbeddingEngine`. "
-                " recommended use is via AsyncContextManager"
-                " `async with engine: ..`"
-            )
         await self.astart()
 
     async def __aexit__(self, *args):
-        self._check_running()
         await self.astop()
 
     def overload_status(self):
@@ -128,10 +123,13 @@ def create_server(
     verbose: bool = False,
     model_warmup=True,
     doc_extra: dict = {},
-) -> FastAPI:
+):
     """
     creates the FastAPI App
     """
+    from fastapi import FastAPI, responses, status
+    from prometheus_fastapi_instrumentator import Instrumentator
+
     app = FastAPI(
         title=docs.FASTAPI_TITLE,
         summary=docs.FASTAPI_SUMMARY,
@@ -274,6 +272,8 @@ def start_uvicorn(
         engine: framework that should perform inference.
         model_warmup: perform model warmup before starting the server. Defaults to True.
     """
+    import uvicorn
+
     engine_load: InferenceEngine = InferenceEngine[engine.name]
     logger.setLevel(log_level.to_int())
 
@@ -291,6 +291,8 @@ def start_uvicorn(
 
 def cli():
     """fires the command line using Python `typer.run()`"""
+    import typer
+
     typer.run(start_uvicorn)
 
 

diff --git a/libs/infinity_emb/infinity_emb/log_handler.py b/libs/infinity_emb/infinity_emb/log_handler.py
@@ -1,17 +1,32 @@
 import logging
 from enum import Enum
-
-from rich.console import Console
-from rich.logging import RichHandler
-from uvicorn.config import LOG_LEVELS
+from typing import Dict
 
 logging.getLogger().handlers.clear()
 
+handlers = []
+try:
+    from rich.console import Console
+    from rich.logging import RichHandler
+
+    handlers.append(RichHandler(console=Console(stderr=True), show_time=False))
+except ImportError:
+    pass
+
+LOG_LEVELS: Dict[str, int] = {
+    "critical": logging.CRITICAL,
+    "error": logging.ERROR,
+    "warning": logging.WARNING,
+    "info": logging.INFO,
+    "debug": logging.DEBUG,
+    "trace": 5,
+}
+
 FORMAT = "%(asctime)s %(name)s %(levelname)s: %(message)s"
 logging.basicConfig(
     level="INFO",
     format=FORMAT,
-    handlers=[RichHandler(console=Console(stderr=True), show_time=False)],
+    handlers=handlers,
 )
 
 logger = logging.getLogger("infinity_emb")

diff --git a/libs/infinity_emb/poetry.lock b/libs/infinity_emb/poetry.lock
diff --git a/libs/infinity_emb/pyproject.toml b/libs/infinity_emb/pyproject.toml
@@ -9,20 +9,21 @@ packages = [{include = "infinity_emb"}]
 
 [tool.poetry.dependencies]
 python = ">=3.10,<3.12"
-# webserver
-fastapi = "^0.103.2"
-pydantic = ">=2.4.2,<3"
-orjson = "^3.9.8"
-prometheus-fastapi-instrumentator = "^6.1.0"
-uvicorn = {extras = ["standard"], version = "^0.23.2"}
 # basics
-rich = "^13.6.0"
-numpy = "^1"
-typer = {extras = ["all"], version = "^0.9.0"}
+numpy = ">=1.20.0"
+pydantic = ">=2.4.0,<3"
+# logging
+rich = {version = "^13", optional=true}
+# webserver-only
+fastapi = {version = "^0.103.2", optional=true}
+orjson = {version = ">=3.9.8,<4", optional=true} 
+prometheus-fastapi-instrumentator = {version = "^6.1.0", optional=true}
+uvicorn = {extras = ["standard"], version = "^0.23.2", optional=true}
+typer = {extras = ["all"], version = "^0.9.0", optional=true}
 # backend
 torch = {version = ">=2.0.0, !=2.0.1, !=2.1.0", optional=true} 
 sentence-transformers = {version = "2.2.2", optional=true} 
-ctranslate2 = {version = "^3.20.0", optional=true}
+ctranslate2 = {version = "^3.21.0", optional=true}
 optimum = {version = "^1.13.2", optional=true}
 fastembed = {version = "0.1.1", optional=true} 
 
@@ -56,7 +57,9 @@ ct2=["ctranslate2","sentence-transformers","torch"]
 optimum=["optimum"]
 fastembed=["fastembed"]
 torch=["sentence-transformers","torch"]
-all=["ctranslate2","fastembed","optimum","sentence-transformers","torch"]
+logging=["rich"]
+server=["fastapi", "pydantic", "orjson", "prometheus-fastapi-instrumentator", "uvicorn", "typer","rich"]
+all=["ctranslate2", "fastapi", "fastembed", "optimum", "orjson", "prometheus-fastapi-instrumentator", "pydantic", "rich", "sentence-transformers", "torch", "typer", "uvicorn"]
 
 [tool.pytest.ini_options]
 markers = [

diff --git a/libs/infinity_emb/tests/script_live.py b/libs/infinity_emb/tests/script_live.py
@@ -46,7 +46,7 @@ def remote(json_data: bytes, iters=1):
     print("Both methods provide the identical output.")
 
     print("Measuring latency via SentenceTransformers")
-    latency_st = timeit.timeit("local(sample, iters=5)", number=2, globals=locals())
+    latency_st = timeit.timeit("local(sample, iters=1)", number=1, globals=locals())
     print("SentenceTransformers latency: ", latency_st)
     model = None
 
@@ -76,4 +76,4 @@ def _post(i):
 
 
 if __name__ == "__main__":
-    latency_single()
+    embedding_live_performance()
diff --git a/libs/infinity_emb/tests/unit_test/test_infinity_server.py b/libs/infinity_emb/tests/unit_test/test_infinity_server.py
@@ -45,10 +45,27 @@ async def test_async_api_fastembed():
     engine = AsyncEmbeddingEngine(engine=transformer.InferenceEngine.fastembed)
     async with engine:
         embeddings = np.array(await engine.embed(sentences))
+        assert not engine.is_overloaded()
         assert embeddings.shape[0] == 2
         assert embeddings.shape[1] >= 10
 
 
+@pytest.mark.anyio
+async def test_async_api_failing():
+    sentences = ["Hi", "how"]
+    engine = AsyncEmbeddingEngine()
+    with pytest.raises(ValueError):
+        await engine.embed(sentences)
+
+    await engine.astart()
+    assert not engine.is_overloaded()
+    assert engine.overload_status()
+
+    with pytest.raises(ValueError):
+        await engine.astart()
+    await engine.astop()
+
+
 def test_cli_help():
     log = subprocess.run(["infinity_emb", "--help"])
     assert log.returncode == 0