diff --git a/lilac/embeddings/bge.py b/lilac/embeddings/bge.py
index 0c578b1e..5ab1560f 100644
--- a/lilac/embeddings/bge.py
+++ b/lilac/embeddings/bge.py
@@ -1,9 +1,11 @@
"""Gegeral Text Embeddings (GTE) model. Open-source model, designed to run on device."""
import gc
-from typing import TYPE_CHECKING, ClassVar, Iterator, Optional
+from typing import TYPE_CHECKING, ClassVar, Optional
from typing_extensions import override
+from ..utils import log
+
if TYPE_CHECKING:
from FlagEmbedding import BGEM3FlagModel
@@ -15,7 +17,7 @@
from ..splitters.spacy_splitter import clustering_spacy_chunker
from ..tasks import TaskExecutionType
from .embedding import chunked_compute_embedding
-from .transformer_utils import SENTENCE_TRANSFORMER_BATCH_SIZE, setup_model_device
+from .transformer_utils import SENTENCE_TRANSFORMER_BATCH_SIZE
# See https://huggingface.co/spaces/mteb/leaderboard for leaderboard of models.
BGE_M3 = 'BAAI/bge-m3'
@@ -33,8 +35,11 @@ def _get_and_cache_bge_m3(model_name: str) -> 'BGEM3FlagModel':
model = BGEM3FlagModel(
'BAAI/bge-m3', use_fp16=True
) # Setting use_fp16 to True speeds up computation with a slight performance degradation
+
+ log(f'[{model_name}] Using device:', model.device)
+
+ # NOTE: we don't call setup model and device here as this happens internally.
return model
- return setup_model_device(model, model_name)
class BGEM3(TextEmbeddingSignal):
@@ -62,7 +67,7 @@ def setup(self) -> None:
def compute(self, docs: list[str]) -> list[Optional[Item]]:
"""Call the embedding function."""
- def _encode(doc):
+ def _encode(doc: list[str]):
# Extract the dense vectors from the model.
return self._model.encode(doc)['dense_vecs']
@@ -73,16 +78,11 @@ def _encode(doc):
_encode, docs, self.local_batch_size * 16, chunker=clustering_spacy_chunker
)
- @override
- def compute_garden(self, docs: Iterator[str]) -> Iterator[Item]:
- raise NotImplementedError('Garden computation is not supported for BGE-M3.')
-
@override
def teardown(self) -> None:
if not hasattr(self, '_model'):
return
- self._model.cpu()
del self._model
gc.collect()
diff --git a/lilac/embeddings/nomic_embed.py b/lilac/embeddings/nomic_embed.py
new file mode 100644
index 00000000..77c525a7
--- /dev/null
+++ b/lilac/embeddings/nomic_embed.py
@@ -0,0 +1,108 @@
+"""Gegeral Text Embeddings (GTE) model. Open-source model, designed to run on device."""
+import gc
+from typing import TYPE_CHECKING, ClassVar, Optional
+
+from typing_extensions import override
+
+if TYPE_CHECKING:
+ from sentence_transformers import SentenceTransformer
+
+import functools
+
+from ..schema import Item
+from ..signal import TextEmbeddingSignal
+from ..splitters.spacy_splitter import clustering_spacy_chunker
+from ..tasks import TaskExecutionType
+from .embedding import chunked_compute_embedding
+from .transformer_utils import SENTENCE_TRANSFORMER_BATCH_SIZE, setup_model_device
+
+# See https://huggingface.co/spaces/mteb/leaderboard for leaderboard of models.
+NOMIC_EMBED = 'nomic-ai/nomic-embed-text-v1.5'
+
+
+@functools.cache
+def _get_and_cache_model(model_name: str) -> 'SentenceTransformer':
+ try:
+ from sentence_transformers import SentenceTransformer
+ except ImportError:
+ raise ImportError(
+ 'Could not import the "sentence_transformers" python package. '
+ 'Please install it with `pip install "sentence_transformers".'
+ )
+ return setup_model_device(SentenceTransformer(model_name, trust_remote_code=True), model_name)
+
+
+class NomicEmbed15(TextEmbeddingSignal):
+ """Computes Nomic Embeddings 1.5 full (768 dimensions).
+
+
This embedding runs on-device. See the [model card](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5)
+ for details.
+ """
+
+ name: ClassVar[str] = 'nomic-embed-1.5-768'
+ display_name: ClassVar[str] = 'Nomic Embeddings 1.5 784'
+ local_batch_size: ClassVar[int] = SENTENCE_TRANSFORMER_BATCH_SIZE
+ local_parallelism: ClassVar[int] = 1
+ local_strategy: ClassVar[TaskExecutionType] = 'threads'
+ supports_garden: ClassVar[bool] = False
+
+ _model_name = NOMIC_EMBED
+ _model: 'SentenceTransformer'
+ _matryoshka_dim = 768
+
+ @override
+ def setup(self) -> None:
+ self._model = _get_and_cache_model(self._model_name)
+
+ @override
+ def compute(self, docs: list[str]) -> list[Optional[Item]]:
+ """Call the embedding function."""
+ try:
+ import torch.nn.functional as F
+ except ImportError:
+ raise ImportError(
+ 'Could not import the "sentence_transformers" python package. '
+ 'Please install it with `pip install "sentence_transformers".'
+ )
+
+ def _encode(doc: list[str]):
+ embeddings = self._model.encode(doc, convert_to_tensor=True)
+ # Extract the dense vectors from the model.
+ embeddings = F.layer_norm(embeddings, normalized_shape=(embeddings.shape[1],))
+ embeddings = embeddings[:, : self._matryoshka_dim]
+ return embeddings.cpu().numpy()
+
+ # While we get docs in batches of 1024, the chunker expands that by a factor of 3-10.
+ # The sentence transformer API actually does batching internally, so we pass
+ # local_batch_size * 16 to allow the library to see all the chunks at once.
+ return chunked_compute_embedding(
+ _encode, docs, self.local_batch_size * 16, chunker=clustering_spacy_chunker
+ )
+
+ @override
+ def teardown(self) -> None:
+ if not hasattr(self, '_model'):
+ return
+
+ self._model.cpu()
+ del self._model
+ gc.collect()
+
+ try:
+ import torch
+
+ torch.cuda.empty_cache()
+ except ImportError:
+ pass
+
+
+class NomicEmbed15_256(NomicEmbed15):
+ """Computes Nomic Embeddings 1.5 (256 dimensions).
+
+
This embedding runs on-device. See the [model card](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5)
+ for details.
+ """
+
+ name: ClassVar[str] = 'nomic-embed-1.5-256'
+ display_name: ClassVar[str] = 'Nomic Embeddings 1.5 256'
+ _matryoshka_dim = 256
diff --git a/lilac/signals/default_signals.py b/lilac/signals/default_signals.py
index 451788ec..b85c7193 100644
--- a/lilac/signals/default_signals.py
+++ b/lilac/signals/default_signals.py
@@ -4,6 +4,7 @@
from ..embeddings.cohere import Cohere
from ..embeddings.gte import GTEBase, GTESmall, GTETiny
from ..embeddings.jina import JinaV2Base, JinaV2Small
+from ..embeddings.nomic_embed import NomicEmbed15, NomicEmbed15_256
from ..embeddings.openai import OpenAIEmbedding
from ..embeddings.sbert import SBERT
from ..signal import register_signal
@@ -46,3 +47,5 @@ def register_default_signals() -> None:
register_signal(JinaV2Base, exists_ok=True)
register_signal(BGEM3, exists_ok=True)
+ register_signal(NomicEmbed15, exists_ok=True)
+ register_signal(NomicEmbed15_256, exists_ok=True)
diff --git a/poetry.lock b/poetry.lock
index f5717225..df5a2091 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1421,6 +1421,17 @@ files = [
{file = "duckdb-0.9.2.tar.gz", hash = "sha256:3843afeab7c3fc4a4c0b53686a4cc1d9cdbdadcbb468d60fef910355ecafd447"},
]
+[[package]]
+name = "einops"
+version = "0.7.0"
+description = "A new flavour of deep learning operations"
+optional = true
+python-versions = ">=3.8"
+files = [
+ {file = "einops-0.7.0-py3-none-any.whl", hash = "sha256:0f3096f26b914f465f6ff3c66f5478f9a5e380bb367ffc6493a68143fbbf1fd1"},
+ {file = "einops-0.7.0.tar.gz", hash = "sha256:b2b04ad6081a3b227080c9bf5e3ace7160357ff03043cd66cc5b2319eb7031d1"},
+]
+
[[package]]
name = "email-reply-parser"
version = "0.5.12"
@@ -6056,25 +6067,26 @@ win32 = ["pywin32"]
[[package]]
name = "sentence-transformers"
-version = "2.2.2"
+version = "2.3.1"
description = "Multilingual text embeddings"
optional = true
-python-versions = ">=3.6.0"
+python-versions = ">=3.8.0"
files = [
- {file = "sentence-transformers-2.2.2.tar.gz", hash = "sha256:dbc60163b27de21076c9a30d24b5b7b6fa05141d68cf2553fa9a77bf79a29136"},
+ {file = "sentence-transformers-2.3.1.tar.gz", hash = "sha256:d589d85a464f45338cdbdf99ea715f8068e1fb01c582e0bcdbf60bcf3eade6d0"},
+ {file = "sentence_transformers-2.3.1-py3-none-any.whl", hash = "sha256:285d6637726c3b002186aa4b8bcace1101364b32671fb605297c4c2636b8190e"},
]
[package.dependencies]
-huggingface-hub = ">=0.4.0"
+huggingface-hub = ">=0.15.1"
nltk = "*"
numpy = "*"
+Pillow = "*"
scikit-learn = "*"
scipy = "*"
sentencepiece = "*"
-torch = ">=1.6.0"
-torchvision = "*"
+torch = ">=1.11.0"
tqdm = "*"
-transformers = ">=4.6.0,<5.0.0"
+transformers = ">=4.32.0,<5.0.0"
[[package]]
name = "sentencepiece"
@@ -7007,44 +7019,6 @@ typing-extensions = "*"
dynamo = ["jinja2"]
opt-einsum = ["opt-einsum (>=3.3)"]
-[[package]]
-name = "torchvision"
-version = "0.16.2"
-description = "image and video datasets and models for torch deep learning"
-optional = true
-python-versions = ">=3.8"
-files = [
- {file = "torchvision-0.16.2-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:bc86f2800cb2c0c1a09c581409cdd6bff66e62f103dc83fc63f73346264c3756"},
- {file = "torchvision-0.16.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b024bd412df6d3a007dcebf311a894eb3c5c21e1af80d12be382bbcb097a7c3a"},
- {file = "torchvision-0.16.2-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:e89f10f3c8351972b6e3fda95bc3e479ea8dbfc9dfcfd2c32902dbad4ba5cfc5"},
- {file = "torchvision-0.16.2-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:96c7583700112a410bdc4e1e4f118c429dab49c29c9a31a2cc3579bc9b08b19d"},
- {file = "torchvision-0.16.2-cp310-cp310-win_amd64.whl", hash = "sha256:9f4032ebb3277fb07ff6a9b818d50a547fb8fcd89d958cfd9e773322454bb688"},
- {file = "torchvision-0.16.2-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:67b1aaf8b8cb02ce75dd445f291a27c8036a502f8c0aa76e28c37a0faac2e153"},
- {file = "torchvision-0.16.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bef30d03e1d1c629761f4dca51d3b7d8a0dc0acce6f4068ab2a1634e8e7b64e0"},
- {file = "torchvision-0.16.2-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:e59cc7b2bd1ab5c0ce4ae382e4e37be8f1c174e8b5de2f6a23c170de9ae28495"},
- {file = "torchvision-0.16.2-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:e130b08cc9b3cc73a6c59d6edf032394a322f9579bfd21d14bc2e1d0999aa758"},
- {file = "torchvision-0.16.2-cp311-cp311-win_amd64.whl", hash = "sha256:8692ab1e48807e9604046a6f4beeb67b523294cee1b00828654bb0df2cfce2b2"},
- {file = "torchvision-0.16.2-cp38-cp38-macosx_10_13_x86_64.whl", hash = "sha256:b82732dcf876a37c852772342aa6ee3480c03bb3e2a802ae109fc5f7e28d26e9"},
- {file = "torchvision-0.16.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4b065143d1a720fe8a9077fd4be35d491f98819ec80b3dbbc3ec64d0b707a906"},
- {file = "torchvision-0.16.2-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:bc5f274e4ecd1b86062063cdf4fd385a1d39d147a3a2685fbbde9ff08bb720b8"},
- {file = "torchvision-0.16.2-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:335959c43b371c0474af34c1ef2a52efdc7603c45700d29e4475eeb02984170c"},
- {file = "torchvision-0.16.2-cp38-cp38-win_amd64.whl", hash = "sha256:7fd22d86e08eba321af70cad291020c2cdeac069b00ce88b923ca52e06174769"},
- {file = "torchvision-0.16.2-cp39-cp39-macosx_10_13_x86_64.whl", hash = "sha256:56115268b37f0b75364e3654e47ad9abc66ac34c1f9e5e3dfa89a22d6a40017a"},
- {file = "torchvision-0.16.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:82805f8445b094f9d1e770390ee6cc86855e89955e08ce34af2e2274fc0e5c45"},
- {file = "torchvision-0.16.2-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:3f4bd5fcbc361476e2e78016636ac7d5509e59d9962521f06eb98e6803898182"},
- {file = "torchvision-0.16.2-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:8199acdf8ab066a28b84a5b6f4d97b58976d9e164b1acc3a9d14fccfaf74bb3a"},
- {file = "torchvision-0.16.2-cp39-cp39-win_amd64.whl", hash = "sha256:41dd4fa9f176d563fe9f1b9adef3b7e582cdfb60ce8c9bc51b094a025be687c9"},
-]
-
-[package.dependencies]
-numpy = "*"
-pillow = ">=5.3.0,<8.3.dev0 || >=8.4.dev0"
-requests = "*"
-torch = "2.1.2"
-
-[package.extras]
-scipy = ["scipy"]
-
[[package]]
name = "tornado"
version = "6.4"
@@ -8161,6 +8135,7 @@ gte = ["sentence-transformers"]
lang-detection = ["langdetect"]
langsmith = ["langsmith"]
llms = ["openai"]
+nomic = ["einops", "sentence-transformers"]
openai = ["openai"]
pii = ["detect-secrets", "presidio_analyzer"]
sbert = ["sentence-transformers"]
@@ -8171,4 +8146,4 @@ text-stats = ["textacy"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.9,<4.0"
-content-hash = "94b8c942172e5a02cb89c0fe8f7ea134169bdf5356d5486549bd8facb73ba8aa"
+content-hash = "5aec3cf990d020b4c1c66a1ebce20b00778021db680a4c4dd8fdf65aa9fb4295"
diff --git a/pyproject.toml b/pyproject.toml
index 60171251..ec1927fd 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -52,9 +52,10 @@ jinja2 = "^3.1.3" # Used for directory li
# LLM providers.
cohere = { version = "^4.32", optional = true }
openai = { version = "^1.7.1", optional = true }
-sentence-transformers = { version = "^2.2.2", optional = true } # SBERT on-device embeddings.
+sentence-transformers = { version = "^2.3.1", optional = true } # SBERT on-device embeddings.
FlagEmbedding = { version = "^1.2.3", optional = true } # bge on-device embeddings.
transformers = { version = "^4.37.2", optional = true } # bge on-device embeddings.
+einops = { version = "^0.7.0", optional = true } # Nomic on-device embeddings.
# Gmail source.
email-reply-parser = { version = "^0.5.12", optional = true }
@@ -100,6 +101,7 @@ all = [
"langsmith",
"llama-hub",
"llama-index",
+ "nomic",
"openai",
"presidio_analyzer",
"sentence-transformers",
@@ -135,6 +137,7 @@ text_stats = ["textacy"] # Text statistics.
# Individual embeddings.
gte = ["sentence-transformers"]
bge = ["FlagEmbedding", "transformers"]
+nomic = ["sentence-transformers", "einops"]
sbert = ["sentence-transformers"]
cohere = ["cohere"]
openai = ["openai"]
diff --git a/web/lib/fastapi_client/models/ConceptSignal.ts b/web/lib/fastapi_client/models/ConceptSignal.ts
index e67095f9..19ea8906 100644
--- a/web/lib/fastapi_client/models/ConceptSignal.ts
+++ b/web/lib/fastapi_client/models/ConceptSignal.ts
@@ -11,7 +11,7 @@ export type ConceptSignal = {
/**
* The name of the pre-computed embedding.
*/
- embedding: 'cohere' | 'sbert' | 'openai' | 'gte-tiny' | 'gte-small' | 'gte-base' | 'jina-v2-small' | 'jina-v2-base' | 'bge-m3';
+ embedding: 'cohere' | 'sbert' | 'openai' | 'gte-tiny' | 'gte-small' | 'gte-base' | 'jina-v2-small' | 'jina-v2-base' | 'bge-m3' | 'nomic-embed-1.5-768' | 'nomic-embed-1.5-256';
namespace: string;
concept_name: string;
version?: (number | null);
diff --git a/web/lib/fastapi_client/models/SemanticSimilaritySignal.ts b/web/lib/fastapi_client/models/SemanticSimilaritySignal.ts
index 7adb8843..ec964d36 100644
--- a/web/lib/fastapi_client/models/SemanticSimilaritySignal.ts
+++ b/web/lib/fastapi_client/models/SemanticSimilaritySignal.ts
@@ -14,7 +14,7 @@ export type SemanticSimilaritySignal = {
/**
* The name of the pre-computed embedding.
*/
- embedding: 'cohere' | 'sbert' | 'openai' | 'gte-tiny' | 'gte-small' | 'gte-base' | 'jina-v2-small' | 'jina-v2-base' | 'bge-m3';
+ embedding: 'cohere' | 'sbert' | 'openai' | 'gte-tiny' | 'gte-small' | 'gte-base' | 'jina-v2-small' | 'jina-v2-base' | 'bge-m3' | 'nomic-embed-1.5-768' | 'nomic-embed-1.5-256';
query: string;
/**
* The input type of the query, used for the query embedding.