Skip to content

Commit

Permalink
Docker: Cuda11.8, make dependencies optional (#33)
Browse files Browse the repository at this point in the history
* dependencies optional and docker

* update pydantic dependency

* update poetry

* update tests

* update double start

* update unit test

---------

Co-authored-by: Michael Feil <[email protected]>
  • Loading branch information
michaelfeil and michaelfeil authored Nov 11, 2023
1 parent 2b930f1 commit bf9e10d
Show file tree
Hide file tree
Showing 9 changed files with 289 additions and 240 deletions.
6 changes: 6 additions & 0 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,12 @@ jobs:
working-directory: ${{ inputs.working-directory }}
cache-key: core

- name: Install minimal dependencies and import
shell: bash
run: |
poetry install --without test
poetry run python -c "import infinity_emb"
- name: Install dependencies
shell: bash
run: poetry install --extras all --with test
Expand Down
2 changes: 1 addition & 1 deletion libs/infinity_emb/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Use the Python base image
FROM nvidia/cuda:12.2.0-base-ubuntu22.04 AS base
FROM nvidia/cuda:11.8.0-base-ubuntu22.04 AS base

ENV PYTHONUNBUFFERED=1 \
# prevents python creating .pyc files
Expand Down
7 changes: 5 additions & 2 deletions libs/infinity_emb/infinity_emb/fastapi_schemas/errors.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from typing import Optional

from fastapi import Request
from fastapi.responses import JSONResponse
try:
from fastapi import Request
from fastapi.responses import JSONResponse
except ImportError:
Request = None


class OpenAIException(Exception):
Expand Down
28 changes: 15 additions & 13 deletions libs/infinity_emb/infinity_emb/infinity_server.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
import time
from typing import List

import typer
import uvicorn
from fastapi import FastAPI, responses, status
from prometheus_fastapi_instrumentator import Instrumentator

# prometheus
import infinity_emb
from infinity_emb.fastapi_schemas import docs, errors
Expand Down Expand Up @@ -59,6 +54,12 @@ def __init__(

async def astart(self):
"""startup engine"""
if self.running:
raise ValueError(
"DoubleSpawn: already started `AsyncEmbeddingEngine`. "
" recommended use is via AsyncContextManager"
" `async with engine: ..`"
)
self.running = True
self._batch_handler = BatchHandler(
max_batch_size=self.batch_size,
Expand All @@ -70,20 +71,14 @@ async def astart(self):

async def astop(self):
"""stop engine"""
self._check_running()
self.running = False
await self._batch_handler.shutdown()

async def __aenter__(self):
if self.running:
raise ValueError(
"DoubleSpawn: already started `AsyncEmbeddingEngine`. "
" recommended use is via AsyncContextManager"
" `async with engine: ..`"
)
await self.astart()

async def __aexit__(self, *args):
self._check_running()
await self.astop()

def overload_status(self):
Expand Down Expand Up @@ -128,10 +123,13 @@ def create_server(
verbose: bool = False,
model_warmup=True,
doc_extra: dict = {},
) -> FastAPI:
):
"""
creates the FastAPI App
"""
from fastapi import FastAPI, responses, status
from prometheus_fastapi_instrumentator import Instrumentator

app = FastAPI(
title=docs.FASTAPI_TITLE,
summary=docs.FASTAPI_SUMMARY,
Expand Down Expand Up @@ -274,6 +272,8 @@ def start_uvicorn(
engine: framework that should perform inference.
model_warmup: perform model warmup before starting the server. Defaults to True.
"""
import uvicorn

engine_load: InferenceEngine = InferenceEngine[engine.name]
logger.setLevel(log_level.to_int())

Expand All @@ -291,6 +291,8 @@ def start_uvicorn(

def cli():
"""fires the command line using Python `typer.run()`"""
import typer

typer.run(start_uvicorn)


Expand Down
25 changes: 20 additions & 5 deletions libs/infinity_emb/infinity_emb/log_handler.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,32 @@
import logging
from enum import Enum

from rich.console import Console
from rich.logging import RichHandler
from uvicorn.config import LOG_LEVELS
from typing import Dict

logging.getLogger().handlers.clear()

handlers = []
try:
from rich.console import Console
from rich.logging import RichHandler

handlers.append(RichHandler(console=Console(stderr=True), show_time=False))
except ImportError:
pass

LOG_LEVELS: Dict[str, int] = {
"critical": logging.CRITICAL,
"error": logging.ERROR,
"warning": logging.WARNING,
"info": logging.INFO,
"debug": logging.DEBUG,
"trace": 5,
}

FORMAT = "%(asctime)s %(name)s %(levelname)s: %(message)s"
logging.basicConfig(
level="INFO",
format=FORMAT,
handlers=[RichHandler(console=Console(stderr=True), show_time=False)],
handlers=handlers,
)

logger = logging.getLogger("infinity_emb")
Expand Down
415 changes: 209 additions & 206 deletions libs/infinity_emb/poetry.lock

Large diffs are not rendered by default.

25 changes: 14 additions & 11 deletions libs/infinity_emb/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,21 @@ packages = [{include = "infinity_emb"}]

[tool.poetry.dependencies]
python = ">=3.10,<3.12"
# webserver
fastapi = "^0.103.2"
pydantic = ">=2.4.2,<3"
orjson = "^3.9.8"
prometheus-fastapi-instrumentator = "^6.1.0"
uvicorn = {extras = ["standard"], version = "^0.23.2"}
# basics
rich = "^13.6.0"
numpy = "^1"
typer = {extras = ["all"], version = "^0.9.0"}
numpy = ">=1.20.0"
pydantic = ">=2.4.0,<3"
# logging
rich = {version = "^13", optional=true}
# webserver-only
fastapi = {version = "^0.103.2", optional=true}
orjson = {version = ">=3.9.8,<4", optional=true}
prometheus-fastapi-instrumentator = {version = "^6.1.0", optional=true}
uvicorn = {extras = ["standard"], version = "^0.23.2", optional=true}
typer = {extras = ["all"], version = "^0.9.0", optional=true}
# backend
torch = {version = ">=2.0.0, !=2.0.1, !=2.1.0", optional=true}
sentence-transformers = {version = "2.2.2", optional=true}
ctranslate2 = {version = "^3.20.0", optional=true}
ctranslate2 = {version = "^3.21.0", optional=true}
optimum = {version = "^1.13.2", optional=true}
fastembed = {version = "0.1.1", optional=true}

Expand Down Expand Up @@ -56,7 +57,9 @@ ct2=["ctranslate2","sentence-transformers","torch"]
optimum=["optimum"]
fastembed=["fastembed"]
torch=["sentence-transformers","torch"]
all=["ctranslate2","fastembed","optimum","sentence-transformers","torch"]
logging=["rich"]
server=["fastapi", "pydantic", "orjson", "prometheus-fastapi-instrumentator", "uvicorn", "typer","rich"]
all=["ctranslate2", "fastapi", "fastembed", "optimum", "orjson", "prometheus-fastapi-instrumentator", "pydantic", "rich", "sentence-transformers", "torch", "typer", "uvicorn"]

[tool.pytest.ini_options]
markers = [
Expand Down
4 changes: 2 additions & 2 deletions libs/infinity_emb/tests/script_live.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def remote(json_data: bytes, iters=1):
print("Both methods provide the identical output.")

print("Measuring latency via SentenceTransformers")
latency_st = timeit.timeit("local(sample, iters=5)", number=2, globals=locals())
latency_st = timeit.timeit("local(sample, iters=1)", number=1, globals=locals())
print("SentenceTransformers latency: ", latency_st)
model = None

Expand Down Expand Up @@ -76,4 +76,4 @@ def _post(i):


if __name__ == "__main__":
latency_single()
embedding_live_performance()
17 changes: 17 additions & 0 deletions libs/infinity_emb/tests/unit_test/test_infinity_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,27 @@ async def test_async_api_fastembed():
engine = AsyncEmbeddingEngine(engine=transformer.InferenceEngine.fastembed)
async with engine:
embeddings = np.array(await engine.embed(sentences))
assert not engine.is_overloaded()
assert embeddings.shape[0] == 2
assert embeddings.shape[1] >= 10


@pytest.mark.anyio
async def test_async_api_failing():
sentences = ["Hi", "how"]
engine = AsyncEmbeddingEngine()
with pytest.raises(ValueError):
await engine.embed(sentences)

await engine.astart()
assert not engine.is_overloaded()
assert engine.overload_status()

with pytest.raises(ValueError):
await engine.astart()
await engine.astop()


def test_cli_help():
log = subprocess.run(["infinity_emb", "--help"])
assert log.returncode == 0
Expand Down

0 comments on commit bf9e10d

Please sign in to comment.