From 65e912d3e80ffb3412be71d3b69ad4771b571d7e Mon Sep 17 00:00:00 2001
From: Hamada Salhab <hamada.a.salhab@gmail.com>
Date: Fri, 25 Oct 2024 00:30:04 +0300
Subject: [PATCH] feat(agents-api): Optimize Search Queries NLP processing
 pipeline (#735)

<!-- ELLIPSIS_HIDDEN -->


> [!IMPORTANT]
> Optimized NLP processing in `nlp.py` with caching, batch processing,
and enhanced query building, and switched deployment to Gunicorn.
>
>   - **Performance Optimization**:
> - Introduced `KeywordMatcher` singleton with batch processing in
`nlp.py` for efficient keyword matching.
> - Added `lru_cache` to `clean_keyword()` and `_create_pattern()` for
caching results.
> - Optimized `extract_keywords()` to process spans in a single pass and
count frequencies efficiently.
>   - **Functionality Changes**:
> - Modified `paragraph_to_custom_queries()` to include `min_keywords`
parameter for filtering low-value queries.
> - Enhanced `find_proximity_groups()` with sorted positions and
union-find for efficient grouping.
> - Improved `build_query()` with cached patterns for query
construction.
>   - **Deployment**:
> - Changed `ENTRYPOINT` in `Dockerfile` to use Gunicorn with
`gunicorn_conf.py`.
>     - Added `gunicorn_conf.py` for Gunicorn configuration.
> - Updated `pyproject.toml` to include `gunicorn` and `uvloop`
dependencies.
>   - **Miscellaneous**:
> - Precompiled regex patterns for whitespace and non-alphanumeric
characters in `nlp.py`.
>     - Disabled unused components in spaCy pipeline for performance.
>
> <sup>This description was created by </sup>[<img alt="Ellipsis"
src="https://img.shields.io/badge/Ellipsis-blue?color=175173">](https://www.ellipsis.dev?ref=julep-ai%2Fjulep&utm_source=github&utm_medium=referral)<sup>
for 0f4c4e0f43fb44f05e2fc522db745a20afc1c700. It will automatically
update as commits are pushed.</sup>


<!-- ELLIPSIS_HIDDEN -->

---------

Signed-off-by: Diwank Singh Tomer <diwank.singh@gmail.com>
Co-authored-by: HamadaSalhab <HamadaSalhab@users.noreply.github.com>
Co-authored-by: Diwank Singh Tomer <diwank.singh@gmail.com>
---
 agents-api/Dockerfile                         |   2 +-
 agents-api/agents_api/common/nlp.py           | 395 ++++++++++--------
 .../models/docs/search_docs_by_embedding.py   |   4 +-
 agents-api/agents_api/web.py                  |   7 +-
 agents-api/gunicorn_conf.py                   |   9 +
 ...rate_1729114011_tweak_proximity_indices.py |  41 ++
 agents-api/poetry.lock                        |  74 +++-
 agents-api/pyproject.toml                     |   2 +
 8 files changed, 361 insertions(+), 173 deletions(-)
 create mode 100644 agents-api/gunicorn_conf.py
diff --git a/agents-api/Dockerfile b/agents-api/Dockerfile
index bd4e29188..bee0a68d5 100644
--- a/agents-api/Dockerfile
+++ b/agents-api/Dockerfile
@@ -41,4 +41,4 @@ RUN poetry install --no-dev --no-root
 
 COPY . ./
 
-ENTRYPOINT ["python", "-m", "agents_api.web", "--host", "0.0.0.0", "--port", "8080"]
+ENTRYPOINT ["gunicorn", "agents_api.web:app", "-c", "gunicorn_conf.py"]
\ No newline at end of file
diff --git a/agents-api/agents_api/common/nlp.py b/agents-api/agents_api/common/nlp.py
index a2f2f17ea..d7dcabe15 100644
--- a/agents-api/agents_api/common/nlp.py
+++ b/agents-api/agents_api/common/nlp.py
@@ -1,221 +1,286 @@
 import re
 from collections import Counter, defaultdict
+from functools import lru_cache
 
 import spacy
+from spacy.matcher import PhraseMatcher
+from spacy.tokens import Doc
+from spacy.util import filter_spans
 
-# Load spaCy English model
-spacy.prefer_gpu()
-nlp = spacy.load("en_core_web_sm")
+# Precompile regex patterns
+WHITESPACE_RE = re.compile(r"\s+")
+NON_ALPHANUM_RE = re.compile(r"[^\w\s\-_]+")
 
+# Initialize spaCy with minimal pipeline
+nlp = spacy.load("en_core_web_sm", exclude=["lemmatizer", "textcat", "tok2vec"])
 
-def extract_keywords(text: str, top_n: int = 10, clean: bool = True) -> list[str]:
-    """
-    Extracts significant keywords and phrases from the text.
+# Add sentencizer for faster sentence tokenization
+sentencizer = nlp.add_pipe("sentencizer")
 
-    Args:
-        text (str): The input text to process.
-        top_n (int): Number of top keywords to extract based on frequency.
-        clean (bool): Strip non-alphanumeric characters from keywords.
 
-    Returns:
-        List[str]: A list of extracted keywords/phrases.
-    """
-    doc = nlp(text)
-
-    # Extract named entities
-    entities = [
-        ent.text.strip()
-        for ent in doc.ents
-        if ent.label_
-        not in ["DATE", "TIME", "PERCENT", "MONEY", "QUANTITY", "ORDINAL", "CARDINAL"]
-    ]
+# Singleton PhraseMatcher for better performance
+class KeywordMatcher:
+    _instance = None
 
-    # Extract nouns and proper nouns
-    nouns = [
-        chunk.text.strip().lower()
-        for chunk in doc.noun_chunks
-        if not chunk.root.is_stop
-    ]
+    def __new__(cls):
+        if cls._instance is None:
+            cls._instance = super().__new__(cls)
+            cls._instance.matcher = PhraseMatcher(nlp.vocab, attr="LOWER")
+            cls._instance.batch_size = 1000  # Adjust based on memory constraints
+            cls._instance.patterns_cache = {}
+        return cls._instance
 
-    # Combine entities and nouns
-    combined = entities + nouns
+    @lru_cache(maxsize=10000)
+    def _create_pattern(self, text: str) -> Doc:
+        return nlp.make_doc(text)
 
-    # Normalize and count frequency
-    normalized = [re.sub(r"\s+", " ", kw).strip() for kw in combined]
-    freq = Counter(normalized)
+    def find_matches(self, doc: Doc, keywords: list[str]) -> dict[str, list[int]]:
+        """Batch process keywords for better performance."""
+        keyword_positions = defaultdict(list)
 
-    # Get top_n keywords
-    keywords = [item for item, count in freq.most_common(top_n)]
+        # Process keywords in batches to avoid memory issues
+        for i in range(0, len(keywords), self.batch_size):
+            batch = keywords[i : i + self.batch_size]
+            patterns = [self._create_pattern(kw) for kw in batch]
 
-    if clean:
-        keywords = [re.sub(r"[^\w\s\-_]+", "", kw) for kw in keywords]
+            # Clear previous patterns and add new batch
+            if "KEYWORDS" in self.matcher:
+                self.matcher.remove("KEYWORDS")
+            self.matcher.add("KEYWORDS", patterns)
 
-    return keywords
+            # Find matches for this batch
+            matches = self.matcher(doc)
+            for match_id, start, end in matches:
+                span_text = doc[start:end].text
+                normalized = WHITESPACE_RE.sub(" ", span_text).lower().strip()
+                keyword_positions[normalized].append(start)
 
+        return keyword_positions
 
-def find_keyword_positions(doc, keyword: str) -> list[int]:
-    """
-    Finds all start indices of the keyword in the tokenized doc.
 
-    Args:
-        doc (spacy.tokens.Doc): The tokenized document.
-        keyword (str): The keyword or phrase to search for.
+# Initialize global matcher
+keyword_matcher = KeywordMatcher()
 
-    Returns:
-        List[int]: List of starting token indices where the keyword appears.
-    """
-    keyword_tokens = keyword.split()
-    n = len(keyword_tokens)
-    positions = []
-    for i in range(len(doc) - n + 1):
-        window = doc[i : i + n]
-        window_text = " ".join([token.text.lower() for token in window])
-        if window_text == keyword:
-            positions.append(i)
-    return positions
 
+@lru_cache(maxsize=10000)
+def clean_keyword(kw: str) -> str:
+    """Cache cleaned keywords for reuse."""
+    return NON_ALPHANUM_RE.sub("", kw).strip()
 
-def find_proximity_groups(
-    text: str, keywords: list[str], n: int = 10
-) -> list[set[str]]:
-    """
-    Groups keywords that appear within n words of each other.
 
-    Args:
-        text (str): The input text.
-        keywords (List[str]): List of keywords to consider.
-        n (int): The proximity window in words.
+def extract_keywords(doc: Doc, top_n: int = 10, clean: bool = True) -> list[str]:
+    """Optimized keyword extraction with minimal behavior change."""
+    excluded_labels = {
+        "DATE",
+        "TIME",
+        "PERCENT",
+        "MONEY",
+        "QUANTITY",
+        "ORDINAL",
+        "CARDINAL",
+    }
 
-    Returns:
-        List[Set[str]]: List of sets, each containing keywords that are proximate.
-    """
-    doc = nlp(text.lower())
-    keyword_positions = defaultdict(list)
+    # Extract and filter spans in a single pass
+    ent_spans = [ent for ent in doc.ents if ent.label_ not in excluded_labels]
+    chunk_spans = [chunk for chunk in doc.noun_chunks if not chunk.root.is_stop]
+    all_spans = filter_spans(ent_spans + chunk_spans)
 
-    for kw in keywords:
-        positions = find_keyword_positions(doc, kw)
-        keyword_positions[kw].extend(positions)
-
-    # Initialize Union-Find structure
-    parent = {}
-
-    def find(u):
-        while parent[u] != u:
-            parent[u] = parent[parent[u]]
-            u = parent[u]
-        return u
-
-    def union(u, v):
-        u_root = find(u)
-        v_root = find(v)
-        if u_root == v_root:
-            return
-        parent[v_root] = u_root
-
-    # Initialize each keyword as its own parent
-    for kw in keywords:
-        parent[kw] = kw
-
-    # Compare all pairs of keywords
-    for i in range(len(keywords)):
-        for j in range(i + 1, len(keywords)):
-            kw1 = keywords[i]
-            kw2 = keywords[j]
-            positions1 = keyword_positions[kw1]
-            positions2 = keyword_positions[kw2]
-            # Check if any positions are within n words
-            for pos1 in positions1:
-                for pos2 in positions2:
-                    distance = abs(pos1 - pos2)
-                    if distance <= n:
-                        union(kw1, kw2)
-                        break
-                else:
-                    continue
-                break
-
-    # Group keywords by their root parent
+    # Process spans efficiently
+    keywords = []
+    seen_texts = set()
+
+    for span in all_spans:
+        text = span.text.strip()
+        lower_text = text.lower()
+
+        # Skip empty or seen texts
+        if not text or lower_text in seen_texts:
+            continue
+
+        seen_texts.add(lower_text)
+        keywords.append(text)
+
+    # Normalize keywords by replacing multiple spaces with single space and stripping
+    normalized_keywords = [WHITESPACE_RE.sub(" ", kw).strip() for kw in keywords]
+
+    # Count frequencies efficiently
+    freq = Counter(normalized_keywords)
+    top_keywords = [kw for kw, _ in freq.most_common(top_n)]
+
+    if clean:
+        return [clean_keyword(kw) for kw in top_keywords]
+    return top_keywords
+
+
+def find_proximity_groups(
+    keywords: list[str], keyword_positions: dict[str, list[int]], n: int = 10
+) -> list[set[str]]:
+    """Optimized proximity grouping using sorted positions."""
+    # Early return for single or no keywords
+    if len(keywords) <= 1:
+        return [{kw} for kw in keywords]
+
+    # Create flat list of positions for efficient processing
+    positions: list[tuple[int, str]] = [
+        (pos, kw) for kw in keywords for pos in keyword_positions[kw]
+    ]
+
+    # Sort positions once
+    positions.sort()
+
+    # Initialize Union-Find with path compression and union by rank
+    parent = {kw: kw for kw in keywords}
+    rank = {kw: 0 for kw in keywords}
+
+    def find(u: str) -> str:
+        if parent[u] != u:
+            parent[u] = find(parent[u])
+        return parent[u]
+
+    def union(u: str, v: str) -> None:
+        u_root, v_root = find(u), find(v)
+        if u_root != v_root:
+            if rank[u_root] < rank[v_root]:
+                u_root, v_root = v_root, u_root
+            parent[v_root] = u_root
+            if rank[u_root] == rank[v_root]:
+                rank[u_root] += 1
+
+    # Use sliding window for proximity checking
+    window = []
+    for pos, kw in positions:
+        # Remove positions outside window
+        while window and pos - window[0][0] > n:
+            window.pop(0)
+
+        # Union with all keywords in window
+        for _, w_kw in window:
+            union(kw, w_kw)
+
+        window.append((pos, kw))
+
+    # Group keywords efficiently
     groups = defaultdict(set)
     for kw in keywords:
         root = find(kw)
         groups[root].add(kw)
 
-    # Convert to list of sets
-    group_list = list(groups.values())
-
-    return group_list
+    return list(groups.values())
 
 
-def build_query(groups: list[set[str]], keywords: list[str], n: int = 10) -> str:
-    """
-    Builds a query string using the custom query language.
+def build_query_pattern(group_size: int, n: int) -> str:
+    """Cache query patterns for common group sizes."""
+    if group_size == 1:
+        return '"{}"'
+    return f"NEAR/{n}(" + " ".join('"{}"' for _ in range(group_size)) + ")"
 
-    Args:
-        groups (List[Set[str]]): List of keyword groups.
-        keywords (List[str]): Original list of keywords.
-        n (int): The proximity window for NEAR.
 
-    Returns:
-        str: The constructed query string.
-    """
-    grouped_keywords = set()
+def build_query(groups: list[set[str]], n: int = 10) -> str:
+    """Build query with cached patterns."""
     clauses = []
 
     for group in groups:
         if len(group) == 1:
-            clauses.append(f'"{list(group)[0]}"')
+            clauses.append(f'"{next(iter(group))}"')
         else:
-            sorted_group = sorted(
-                group, key=lambda x: -len(x)
-            )  # Sort by length to prioritize phrases
-            escaped_keywords = [f'"{kw}"' for kw in sorted_group]
-            near_clause = f"NEAR/{n}(" + " ".join(escaped_keywords) + ")"
-            clauses.append(near_clause)
-        grouped_keywords.update(group)
-
-    # Identify keywords not in any group (if any)
-    remaining = set(keywords) - grouped_keywords
-    for kw in remaining:
-        clauses.append(f'"{kw}"')
+            # Sort by length descending to prioritize longer phrases
+            sorted_group = sorted(group, key=len, reverse=True)
+            # Get cached pattern and format with keywords
+            pattern = build_query_pattern(len(group), n)
+            clause = pattern.format(*sorted_group)
+            clauses.append(clause)
 
-    # Combine all clauses with OR
-    query = " OR ".join(clauses)
+    return " OR ".join(clauses)
 
-    return query
 
-
-def text_to_custom_query(text: str, top_n: int = 10, proximity_n: int = 10) -> str:
+@lru_cache(maxsize=100)
+def paragraph_to_custom_queries(
+    paragraph: str, top_n: int = 10, proximity_n: int = 10, min_keywords: int = 1
+) -> list[str]:
     """
-    Converts arbitrary text to the custom query language.
+    Optimized paragraph processing with minimal behavior changes.
+    Added min_keywords parameter to filter out low-value queries.
 
     Args:
-        text (str): The input text to convert.
-        top_n (int): Number of top keywords to extract.
+        paragraph (str): The input paragraph to convert.
+        top_n (int): Number of top keywords to extract per sentence.
         proximity_n (int): The proximity window for NEAR/n.
+        min_keywords (int): Minimum number of keywords required to form a query.
 
     Returns:
-        str: The custom query string.
+        list[str]: The list of custom query strings.
     """
-    keywords = extract_keywords(text, top_n)
-    if not keywords:
-        return ""
-    groups = find_proximity_groups(text, keywords, proximity_n)
-    query = build_query(groups, keywords, proximity_n)
-    return query
+    if not paragraph or not paragraph.strip():
+        return []
+
+    # Process entire paragraph once
+    doc = nlp(paragraph)
+    queries = []
 
+    # Process sentences
+    for sent in doc.sents:
+        # Convert to doc for consistent API
+        sent_doc = sent.as_doc()
 
-def paragraph_to_custom_queries(paragraph: str) -> list[str]:
+        # Extract and clean keywords
+        keywords = extract_keywords(sent_doc, top_n)
+        if len(keywords) < min_keywords:
+            continue
+
+        # Find keyword positions using matcher
+        keyword_positions = keyword_matcher.find_matches(sent_doc, keywords)
+
+        # Skip if no keywords found in positions
+        if not keyword_positions:
+            continue
+
+        # Find proximity groups and build query
+        groups = find_proximity_groups(keywords, keyword_positions, proximity_n)
+        query = build_query(groups, proximity_n)
+
+        if query:
+            queries.append(query)
+
+    return queries
+
+
+def batch_paragraphs_to_custom_queries(
+    paragraphs: list[str],
+    top_n: int = 10,
+    proximity_n: int = 10,
+    min_keywords: int = 1,
+    n_process: int = 1,
+) -> list[list[str]]:
     """
-    Converts a paragraph to a list of custom query strings.
+    Processes multiple paragraphs using nlp.pipe for better performance.
 
     Args:
-        paragraph (str): The input paragraph to convert.
+        paragraphs (list[str]): list of paragraphs to process.
+        top_n (int): Number of top keywords to extract per sentence.
+        proximity_n (int): The proximity window for NEAR/n.
+        min_keywords (int): Minimum number of keywords required to form a query.
+        n_process (int): Number of processes to use for multiprocessing.
 
     Returns:
-        List[str]: The list of custom query strings.
+        list[list[str]]: A list where each element is a list of queries for a paragraph.
     """
-
-    queries = [text_to_custom_query(sentence.text) for sentence in nlp(paragraph).sents]
-    queries = [q for q in queries if q]
-
-    return queries
+    results = []
+    for doc in nlp.pipe(
+        paragraphs, disable=["lemmatizer", "textcat"], n_process=n_process
+    ):
+        queries = []
+        for sent in doc.sents:
+            sent_doc = sent.as_doc()
+            keywords = extract_keywords(sent_doc, top_n)
+            if len(keywords) < min_keywords:
+                continue
+            keyword_positions = keyword_matcher.find_matches(sent_doc, keywords)
+            if not keyword_positions:
+                continue
+            groups = find_proximity_groups(keywords, keyword_positions, proximity_n)
+            query = build_query(groups, proximity_n)
+            if query:
+                queries.append(query)
+        results.append(queries)
+
+    return results
diff --git a/agents-api/agents_api/models/docs/search_docs_by_embedding.py b/agents-api/agents_api/models/docs/search_docs_by_embedding.py
index 7d3bbbd2f..e346b6b69 100644
--- a/agents-api/agents_api/models/docs/search_docs_by_embedding.py
+++ b/agents-api/agents_api/models/docs/search_docs_by_embedding.py
@@ -48,8 +48,8 @@ def search_docs_by_embedding(
     query_embedding: list[float],
     k: int = 3,
     confidence: float = 0.5,
-    ef: int = 32,
-    mmr_lambda: float = 0.25,
+    ef: int = 50,
+    mmr_lambda: float = 0.5,
     embedding_size: int = 1024,
 ) -> tuple[list[str], dict]:
     """
diff --git a/agents-api/agents_api/web.py b/agents-api/agents_api/web.py
index 56bdbb48a..037767b9f 100644
--- a/agents-api/agents_api/web.py
+++ b/agents-api/agents_api/web.py
@@ -2,12 +2,13 @@
 This module initializes the FastAPI application, registers routes, sets up middleware, and configures exception handlers.
 """
 
+import asyncio
 import logging
 from typing import Any, Callable
 
-import fire
 import sentry_sdk
 import uvicorn
+import uvloop
 from fastapi import APIRouter, Depends, FastAPI, Request, status
 from fastapi.exceptions import HTTPException, RequestValidationError
 from fastapi.middleware.cors import CORSMiddleware
@@ -207,6 +208,4 @@ def main(
     )
 
 
-# Check if the script is being run directly and, if so, start the Uvicorn server with the specified configuration.
-if __name__ == "__main__":
-    fire.Fire(main)
+asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
diff --git a/agents-api/gunicorn_conf.py b/agents-api/gunicorn_conf.py
new file mode 100644
index 000000000..1ecc02d9c
--- /dev/null
+++ b/agents-api/gunicorn_conf.py
@@ -0,0 +1,9 @@
+import multiprocessing
+
+# Gunicorn config variables
+workers = multiprocessing.cpu_count() * 2 + 1
+worker_class = "uvicorn.workers.UvicornWorker"
+bind = "0.0.0.0:8080"
+keepalive = 120
+errorlog = "-"
+accesslog = "-"
diff --git a/agents-api/migrations/migrate_1729114011_tweak_proximity_indices.py b/agents-api/migrations/migrate_1729114011_tweak_proximity_indices.py
index e8fbbaa58..4852f3603 100644
--- a/agents-api/migrations/migrate_1729114011_tweak_proximity_indices.py
+++ b/agents-api/migrations/migrate_1729114011_tweak_proximity_indices.py
@@ -12,6 +12,45 @@ def run(client, *queries):
     client.run(query)
 
 
+# See: https://github.com/nmslib/hnswlib/blob/master/ALGO_PARAMS.md
+drop_snippets_hnsw_index = dict(
+    down="""
+    ::hnsw create snippets:embedding_space {
+        fields: [embedding],
+        filter: !is_null(embedding),
+        dim: 1024,
+        distance: Cosine,
+        m: 64,
+        ef_construction: 256,
+        extend_candidates: true,
+        keep_pruned_connections: false,
+    }
+    """,
+    up="""
+    ::hnsw drop snippets:embedding_space
+    """,
+)
+
+
+# See: https://github.com/nmslib/hnswlib/blob/master/ALGO_PARAMS.md
+snippets_hnsw_index = dict(
+    up="""
+    ::hnsw create snippets:embedding_space {
+        fields: [embedding],
+        filter: !is_null(embedding),
+        dim: 1024,
+        distance: Cosine,
+        m: 64,
+        ef_construction: 800,
+        extend_candidates: false,
+        keep_pruned_connections: false,
+    }
+    """,
+    down="""
+    ::hnsw drop snippets:embedding_space
+    """,
+)
+
 drop_snippets_lsh_index = dict(
     up="""
     ::lsh drop snippets:lsh
@@ -77,8 +116,10 @@ def run(client, *queries):
 )
 
 queries_to_run = [
+    drop_snippets_hnsw_index,
     drop_snippets_lsh_index,
     drop_snippets_fts_index,
+    snippets_hnsw_index,
     snippets_lsh_index,
     snippets_fts_index,
 ]
diff --git a/agents-api/poetry.lock b/agents-api/poetry.lock
index c6507586a..1f5c96c37 100644
--- a/agents-api/poetry.lock
+++ b/agents-api/poetry.lock
@@ -1373,6 +1373,27 @@ files = [
     {file = "google_re2-1.1.20240702.tar.gz", hash = "sha256:8788db69f6c93cb229df62c74b2d9aa8e64bf754e9495700f85812afa32efd2b"},
 ]
 
+[[package]]
+name = "gunicorn"
+version = "23.0.0"
+description = "WSGI HTTP Server for UNIX"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "gunicorn-23.0.0-py3-none-any.whl", hash = "sha256:ec400d38950de4dfd418cff8328b2c8faed0edb0d517d3394e457c317908ca4d"},
+    {file = "gunicorn-23.0.0.tar.gz", hash = "sha256:f014447a0101dc57e294f6c18ca6b40227a4c90e9bdb586042628030cba004ec"},
+]
+
+[package.dependencies]
+packaging = "*"
+
+[package.extras]
+eventlet = ["eventlet (>=0.24.1,!=0.36.0)"]
+gevent = ["gevent (>=1.4.0)"]
+setproctitle = ["setproctitle"]
+testing = ["coverage", "eventlet", "gevent", "pytest", "pytest-cov"]
+tornado = ["tornado (>=0.2)"]
+
 [[package]]
 name = "h11"
 version = "0.14.0"
@@ -5424,6 +5445,57 @@ h11 = ">=0.8"
 [package.extras]
 standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1)", "watchfiles (>=0.13)", "websockets (>=10.4)"]
 
+[[package]]
+name = "uvloop"
+version = "0.21.0"
+description = "Fast implementation of asyncio event loop on top of libuv"
+optional = false
+python-versions = ">=3.8.0"
+files = [
+    {file = "uvloop-0.21.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ec7e6b09a6fdded42403182ab6b832b71f4edaf7f37a9a0e371a01db5f0cb45f"},
+    {file = "uvloop-0.21.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:196274f2adb9689a289ad7d65700d37df0c0930fd8e4e743fa4834e850d7719d"},
+    {file = "uvloop-0.21.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f38b2e090258d051d68a5b14d1da7203a3c3677321cf32a95a6f4db4dd8b6f26"},
+    {file = "uvloop-0.21.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87c43e0f13022b998eb9b973b5e97200c8b90823454d4bc06ab33829e09fb9bb"},
+    {file = "uvloop-0.21.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:10d66943def5fcb6e7b37310eb6b5639fd2ccbc38df1177262b0640c3ca68c1f"},
+    {file = "uvloop-0.21.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:67dd654b8ca23aed0a8e99010b4c34aca62f4b7fce88f39d452ed7622c94845c"},
+    {file = "uvloop-0.21.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c0f3fa6200b3108919f8bdabb9a7f87f20e7097ea3c543754cabc7d717d95cf8"},
+    {file = "uvloop-0.21.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0878c2640cf341b269b7e128b1a5fed890adc4455513ca710d77d5e93aa6d6a0"},
+    {file = "uvloop-0.21.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9fb766bb57b7388745d8bcc53a359b116b8a04c83a2288069809d2b3466c37e"},
+    {file = "uvloop-0.21.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a375441696e2eda1c43c44ccb66e04d61ceeffcd76e4929e527b7fa401b90fb"},
+    {file = "uvloop-0.21.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:baa0e6291d91649c6ba4ed4b2f982f9fa165b5bbd50a9e203c416a2797bab3c6"},
+    {file = "uvloop-0.21.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4509360fcc4c3bd2c70d87573ad472de40c13387f5fda8cb58350a1d7475e58d"},
+    {file = "uvloop-0.21.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:359ec2c888397b9e592a889c4d72ba3d6befba8b2bb01743f72fffbde663b59c"},
+    {file = "uvloop-0.21.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f7089d2dc73179ce5ac255bdf37c236a9f914b264825fdaacaded6990a7fb4c2"},
+    {file = "uvloop-0.21.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:baa4dcdbd9ae0a372f2167a207cd98c9f9a1ea1188a8a526431eef2f8116cc8d"},
+    {file = "uvloop-0.21.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86975dca1c773a2c9864f4c52c5a55631038e387b47eaf56210f873887b6c8dc"},
+    {file = "uvloop-0.21.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:461d9ae6660fbbafedd07559c6a2e57cd553b34b0065b6550685f6653a98c1cb"},
+    {file = "uvloop-0.21.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:183aef7c8730e54c9a3ee3227464daed66e37ba13040bb3f350bc2ddc040f22f"},
+    {file = "uvloop-0.21.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:bfd55dfcc2a512316e65f16e503e9e450cab148ef11df4e4e679b5e8253a5281"},
+    {file = "uvloop-0.21.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:787ae31ad8a2856fc4e7c095341cccc7209bd657d0e71ad0dc2ea83c4a6fa8af"},
+    {file = "uvloop-0.21.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5ee4d4ef48036ff6e5cfffb09dd192c7a5027153948d85b8da7ff705065bacc6"},
+    {file = "uvloop-0.21.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3df876acd7ec037a3d005b3ab85a7e4110422e4d9c1571d4fc89b0fc41b6816"},
+    {file = "uvloop-0.21.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bd53ecc9a0f3d87ab847503c2e1552b690362e005ab54e8a48ba97da3924c0dc"},
+    {file = "uvloop-0.21.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a5c39f217ab3c663dc699c04cbd50c13813e31d917642d459fdcec07555cc553"},
+    {file = "uvloop-0.21.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:17df489689befc72c39a08359efac29bbee8eee5209650d4b9f34df73d22e414"},
+    {file = "uvloop-0.21.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:bc09f0ff191e61c2d592a752423c767b4ebb2986daa9ed62908e2b1b9a9ae206"},
+    {file = "uvloop-0.21.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f0ce1b49560b1d2d8a2977e3ba4afb2414fb46b86a1b64056bc4ab929efdafbe"},
+    {file = "uvloop-0.21.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e678ad6fe52af2c58d2ae3c73dc85524ba8abe637f134bf3564ed07f555c5e79"},
+    {file = "uvloop-0.21.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:460def4412e473896ef179a1671b40c039c7012184b627898eea5072ef6f017a"},
+    {file = "uvloop-0.21.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:10da8046cc4a8f12c91a1c39d1dd1585c41162a15caaef165c2174db9ef18bdc"},
+    {file = "uvloop-0.21.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c097078b8031190c934ed0ebfee8cc5f9ba9642e6eb88322b9958b649750f72b"},
+    {file = "uvloop-0.21.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:46923b0b5ee7fc0020bef24afe7836cb068f5050ca04caf6b487c513dc1a20b2"},
+    {file = "uvloop-0.21.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:53e420a3afe22cdcf2a0f4846e377d16e718bc70103d7088a4f7623567ba5fb0"},
+    {file = "uvloop-0.21.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:88cb67cdbc0e483da00af0b2c3cdad4b7c61ceb1ee0f33fe00e09c81e3a6cb75"},
+    {file = "uvloop-0.21.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:221f4f2a1f46032b403bf3be628011caf75428ee3cc204a22addf96f586b19fd"},
+    {file = "uvloop-0.21.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:2d1f581393673ce119355d56da84fe1dd9d2bb8b3d13ce792524e1607139feff"},
+    {file = "uvloop-0.21.0.tar.gz", hash = "sha256:3bf12b0fda68447806a7ad847bfa591613177275d35b6724b1ee573faa3704e3"},
+]
+
+[package.extras]
+dev = ["Cython (>=3.0,<4.0)", "setuptools (>=60)"]
+docs = ["Sphinx (>=4.1.2,<4.2.0)", "sphinx-rtd-theme (>=0.5.2,<0.6.0)", "sphinxcontrib-asyncio (>=0.3.0,<0.4.0)"]
+test = ["aiohttp (>=3.10.5)", "flake8 (>=5.0,<6.0)", "mypy (>=0.800)", "psutil", "pyOpenSSL (>=23.0.0,<23.1.0)", "pycodestyle (>=2.9.0,<2.10.0)"]
+
 [[package]]
 name = "ward"
 version = "0.68.0b0"
@@ -5885,4 +5957,4 @@ type = ["pytest-mypy"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.12,<3.13"
-content-hash = "0b75ce61bf1e1338e08e99482083f7d1238216f11f9a07edfc29f61d9b620f6f"
+content-hash = "6378447b12d87d1403ad5b9465b5fe7ed97b561b131d31eb52c85871da50449e"
diff --git a/agents-api/pyproject.toml b/agents-api/pyproject.toml
index 47c709286..910f9e53b 100644
--- a/agents-api/pyproject.toml
+++ b/agents-api/pyproject.toml
@@ -45,6 +45,8 @@ xxhash = "^3.5.0"
 spacy = "^3.8.2"
 en-core-web-sm = {url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl"}
 msgpack = "^1.1.0"
+gunicorn = "^23.0.0"
+uvloop = "^0.21.0"
 [tool.poetry.group.dev.dependencies]
 ipython = "^8.26.0"
 ruff = "^0.5.5"