Skip to content
This repository has been archived by the owner on Nov 13, 2024. It is now read-only.

Commit

Permalink
Fix extra dependencies (#292)
Browse files Browse the repository at this point in the history
* Fix extra dependencies

* Remove debug

* Update readme

* Fix typo

* Update readme

* Update

* Fix tests

* Fix test

* Add blank line

* Fix
  • Loading branch information
izellevy authored Feb 13, 2024
1 parent ae69371 commit c9ce812
Show file tree
Hide file tree
Showing 6 changed files with 55 additions and 13 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
TEST_WORKER_COUNT = 8

POETRY_DEFAULT_EXTRAS = -E cohere -E transformers
POETRY_DEFAULT_EXTRAS = -E cohere -E transformers -E grpc
POETRY_INSTALL_ARGS =

REPOSITORY = ghcr.io/pinecone-io/canopy
Expand Down
17 changes: 17 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,23 @@ source canopy-env/bin/activate
pip install canopy-sdk
```

<details>
<summary>You can also install canopy-sdk with extras. <b><u>CLICK HERE</u></b> to see the available extras

<br />
</summary>

### Extras

| Name | Description |
|----------------|----------------------------------------------------------------------------------------------------------------------------------------------------------|
| `grpc` | To unlock some performance improvements by working with the GRPC version of the [Pinecone Client](https://github.com/pinecone-io/pinecone-python-client) |
| `torch` | To enable embeddings provided by [sentence-transformers](https://www.sbert.net/) |
| `transformers` | If you are using Anyscale LLMs, it's recommended to use `LLamaTokenizer` tokenizer which requires transformers as dependency |
| `cohere` | To use Cohere reranker or/and Cohere LLM |

</details>

2. Set up the environment variables

```bash
Expand Down
30 changes: 23 additions & 7 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@ packages = [{include = "canopy", from = "src"},

[tool.poetry.dependencies]
python = ">=3.9,<3.13"
pinecone-client = [{ version = "^3.0.0" },
{ version = "^3.0.0", extras = ["grpc"], optional = true }]
python-dotenv = "^1.0.0"
openai = "^1.2.3"
tiktoken = "^0.3.3"
Expand All @@ -29,21 +27,39 @@ types-pyyaml = "^6.0.12.12"
jsonschema = "^4.2.0"
types-jsonschema = "^4.2.0"
prompt-toolkit = "^3.0.39"
pinecone-text = [{version = "^0.8.0"},
{version = "^0.8.0", extras = ["dense"], optional = true}]

tokenizers = "^0.15.0"
transformers = {version = "^4.35.2", optional = true}
sentencepiece = "^0.1.99"
pandas = "2.0.0"
pyarrow = "^14.0.1"
cohere = { version = ">=4.37", optional = true }


pinecone-text = "^0.8.0"
# Extra: torch (Relies on pinecone-text[dense])
# Dependencies here should be synced with pinecone-text's pyproject.toml
# See: https://github.com/pinecone-io/pinecone-text/blob/0eb00a202f5c9bc8cc48c8b7536fcbabf95f096e/pyproject.toml#L30
torch = { version = ">=1.13.1", optional = true }
sentence-transformers = { version = ">=2.0.0", optional = true }


pinecone-client = "^3.0.0"
# Extra: grpc (Relies on pinecone-client[grpc])
# Dependencies here should be synced with pinecone-python-client's pyproject.toml
# See: https://github.com/pinecone-io/pinecone-python-client/blob/886f932b66521a6ab5b1e076f6a53ba2f16eb41b/pyproject.toml#L94
grpcio = { version = ">=1.44.0", optional = true }
grpc-gateway-protoc-gen-openapiv2 = { version = "0.1.0", optional = true }
googleapis-common-protos = { version = ">=1.53.0", optional = true }
lz4 = { version = ">=3.1.3", optional = true }
protobuf = { version = "~=3.20.0", optional = true }



[tool.poetry.extras]
cohere = ["cohere"]
torch = ["pinecone-text"]
torch = ["torch", "sentence-transformers"]
transformers = ["transformers"]
grpc = ["pinecone-client"]
grpc = ["grpcio", "grpc-gateway-protoc-gen-openapiv2", "googleapis-common-protos", "lz4", "protobuf"]


[tool.poetry.group.dev.dependencies]
Expand Down
5 changes: 3 additions & 2 deletions src/canopy/knowledge_base/knowledge_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,16 @@
from functools import lru_cache

from typing import List, Optional, Dict, Any, Union
from pinecone import (ServerlessSpec, PodSpec, Index,
from pinecone import (ServerlessSpec, PodSpec,
PineconeApiException)

from canopy.utils.debugging import CANOPY_DEBUG_INFO

try:
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone.grpc import GRPCIndex as Index
except ImportError:
from pinecone import Pinecone
from pinecone import Pinecone, Index

from canopy.knowledge_base.base import BaseKnowledgeBase
from canopy.knowledge_base.chunker import Chunker, MarkdownChunker
Expand Down
7 changes: 6 additions & 1 deletion tests/system/knowledge_base/test_knowledge_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@

import pytest
import numpy as np
from pinecone import Index, Pinecone
try:
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone.grpc import GRPCIndex as Index
except ImportError:
from pinecone import Pinecone, Index

from tenacity import (
retry,
stop_after_delay,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,16 @@ def encoder():
def test_dimension(encoder):
with patch('pinecone_text.dense.SentenceTransformerEncoder.encode_documents') \
as mock_encode_documents:
mock_encode_documents.return_value = [[0.1, 0.2, 0.3]]
mock_encode_documents.return_value = [0.1, 0.2, 0.3]
assert encoder.dimension == 3


def custom_encode(*args, **kwargs):
input_to_encode = args[0]
return [[0.1, 0.2, 0.3] for _ in input_to_encode]
if isinstance(input_to_encode, list):
return [[0.1, 0.2, 0.3] for _ in input_to_encode]
else:
return [0.1, 0.2, 0.3]


@pytest.mark.parametrize("items,function",
Expand Down

0 comments on commit c9ce812

Please sign in to comment.