Add support OctoAI LLM and embeddings (#301)

* "fixed typo in dense.py docstring" * adding octoAI embeddings * added octoai system test * increased batch size * added information for OctoAI env vars * updated record_encoder batch size * support for OctoAI LLM adaptor * changed prefix after code review * added OctoAI to llm unit tests * fixed linting * fixing typos on function call
pinecone-io · Mar 26, 2024 · ab5cf51 · ab5cf51
1 parent 7db7737
commit ab5cf51
Show file tree

Hide file tree

Showing 8 changed files with 263 additions and 3 deletions.
diff --git a/README.md b/README.md
@@ -109,6 +109,7 @@ These optional environment variables are used to authenticate to other supported
 | `JINA_API_KEY`        | API key for Jina AI. Used to authenticate to JinaAI's services for embedding and chat API                                    | You can find your OpenAI API key [here](https://platform.openai.com/account/api-keys). You might need to login or register to OpenAI services                                |
 | `AZURE_OPENAI_ENDOINT`| The URL of the Azure OpenAI endpoint you deployed. | You can find this in the Azure OpenAI portal under _Keys and Endpoints`|
 | `AZURE_OPENAI_API_KEY` | The API key to use for your Azure OpenAI models. | You can find this in the Azure OpenAI portal under _Keys and Endpoints`|
+| `OCTOAI_API_KEY`       | API key for OctoAI. Used to authenticate for open source LLMs served in OctoAI                               | You can sign up for OctoAI and find your API key [here](https://octo.ai/)
 
 </details>
 
@@ -280,4 +281,3 @@ gunicorn canopy_server.app:app --worker-class uvicorn.workers.UvicornWorker --bi
 >  The server interacts with services like Pinecone and OpenAI using your own authentication credentials. 
    When deploying the server on a public web hosting provider, it is recommended to enable an authentication mechanism, 
    so that your server would only take requests from authenticated users.
-
diff --git a/src/canopy/config_templates/octoai.yaml b/src/canopy/config_templates/octoai.yaml
@@ -0,0 +1,50 @@
+# ===========================================================
+#            Configuration file for Canopy Server
+# ===========================================================
+tokenizer:
+  # -------------------------------------------------------------------------------------------
+  # Tokenizer configuration
+  # Use LLamaTokenizer from HuggingFace with the relevant OSS model (e.g. LLama2)
+  # -------------------------------------------------------------------------------------------
+  type: LlamaTokenizer                 # Options: [OpenAITokenizer, LlamaTokenizer]
+  params:
+    model_name: hf-internal-testing/llama-tokenizer
+
+chat_engine:
+  # -------------------------------------------------------------------------------------------
+  # Chat engine configuration
+  # Use OctoAI as the open source LLM provider
+  # You can find the list of supported LLMs at https://octo.ai/docs/text-gen-solution/rest-api
+  # -------------------------------------------------------------------------------------------
+  params:
+    max_prompt_tokens: 2048             # The maximum number of tokens to use for input prompt to the LLM.
+  llm: &llm
+    type: OctoAILLM
+    params:
+      model_name: mistral-7b-instruct-fp16         # The name of the model to use.
+
+  # query_builder:
+  #   type: FunctionCallingQueryGenerator     # Options: [FunctionCallingQueryGenerator, LastMessageQueryGenerator, InstructionQueryGenerator]
+  #   llm: 
+  #     type: OctoAILLM
+  #     params:
+  #       model_name: mistral-7b-instruct-fp16
+
+  context_engine:
+    # -------------------------------------------------------------------------------------------------------------
+    # ContextEngine configuration
+    # -------------------------------------------------------------------------------------------------------------
+    knowledge_base:
+      # -----------------------------------------------------------------------------------------------------------
+      # KnowledgeBase configuration
+      # -----------------------------------------------------------------------------------------------------------
+      record_encoder:
+        # --------------------------------------------------------------------------
+        # Configuration for the RecordEncoder subcomponent of the knowledge base.
+        # Use OctoAI's Embedding endpoint for dense encoding
+        # --------------------------------------------------------------------------
+        type: OctoAIRecordEncoder
+        params:
+          model_name:                   # The name of the model to use for encoding
+            thenlper/gte-large
+          batch_size: 2048              # The number of document chunks to encode in each call to the encoding model
diff --git a/src/canopy/knowledge_base/record_encoder/__init__.py b/src/canopy/knowledge_base/record_encoder/__init__.py
@@ -7,3 +7,4 @@
 from .jina import JinaRecordEncoder
 from .sentence_transformers import SentenceTransformerRecordEncoder
 from .hybrid import HybridRecordEncoder
+from .octoai import OctoAIRecordEncoder
diff --git a/src/canopy/knowledge_base/record_encoder/octoai.py b/src/canopy/knowledge_base/record_encoder/octoai.py
@@ -0,0 +1,68 @@
+import os
+from typing import List
+from pinecone_text.dense.openai_encoder import OpenAIEncoder
+from canopy.knowledge_base.models import KBDocChunk, KBEncodedDocChunk, KBQuery
+from canopy.knowledge_base.record_encoder.dense import DenseRecordEncoder
+from canopy.models.data_models import Query
+
+OCTOAI_BASE_URL = "https://text.octoai.run/v1"
+
+
+class OctoAIRecordEncoder(DenseRecordEncoder):
+    """
+    OctoAIRecordEncoder is a type of DenseRecordEncoder that uses the OpenAI `embeddings` API.
+    The implementation uses the `OpenAIEncoder` class from the `pinecone-text` library.
+    For more information about see: https://github.com/pinecone-io/pinecone-text
+
+    """  # noqa: E501
+    """
+    Initialize the OctoAIRecordEncoder
+
+    Args:
+        api_key: The OctoAI Endpoint API Key
+        base_url: The Base URL for the OctoAI Endpoint
+        model_name: The name of the OctoAI embeddings model to use for encoding. See https://octo.ai/docs/text-gen-solution/getting-started
+        batch_size: The number of documents or queries to encode at once.
+                    Defaults to 1.
+        **kwargs: Additional arguments to pass to the underlying `pinecone-text. OpenAIEncoder`.
+    """  # noqa: E501
+    def __init__(self,
+                 *,
+                 api_key: str = "",
+                 base_url: str = OCTOAI_BASE_URL,
+                 model_name: str = "thenlper/gte-large",
+                 batch_size: int = 1024,
+                 **kwargs):
+
+        octoai_api_key = api_key or os.environ.get("OCTOAI_API_KEY")
+        if not octoai_api_key:
+            raise ValueError(
+                "An OctoAI API token is required to use OctoAI. "
+                "Please provide it as an argument "
+                "or set the OCTOAI_API_KEY environment variable."
+            )
+        octoai_base_url = base_url
+        encoder = OpenAIEncoder(model_name,
+                                base_url=octoai_base_url, api_key=octoai_api_key,
+                                **kwargs)
+        super().__init__(dense_encoder=encoder, batch_size=batch_size)
+
+    def encode_documents(self, documents: List[KBDocChunk]) -> List[KBEncodedDocChunk]:
+        """
+        Encode a list of documents, takes a list of KBDocChunk and returns a list of KBEncodedDocChunk.
+
+        Args:
+            documents: A list of KBDocChunk to encode.
+
+        Returns:
+            encoded chunks: A list of KBEncodedDocChunk, with the `values` field populated by the generated embeddings vector.
+        """  # noqa: E501
+        return super().encode_documents(documents)
+
+    async def _aencode_documents_batch(self,
+                                       documents: List[KBDocChunk]
+                                       ) -> List[KBEncodedDocChunk]:
+        raise NotImplementedError
+
+    async def _aencode_queries_batch(self, queries: List[Query]) -> List[KBQuery]:
+        raise NotImplementedError
diff --git a/src/canopy/llm/__init__.py b/src/canopy/llm/__init__.py
@@ -3,3 +3,4 @@
 from .anyscale import AnyscaleLLM
 from .azure_openai_llm import AzureOpenAILLM
 from .cohere import CohereLLM
+from .octoai import OctoAILLM
diff --git a/src/canopy/llm/octoai.py b/src/canopy/llm/octoai.py
@@ -0,0 +1,61 @@
+from typing import Optional, Any
+import os
+from canopy.llm import OpenAILLM
+from canopy.llm.models import Function
+from canopy.models.data_models import Messages
+
+OCTOAI_BASE_URL = "https://text.octoai.run/v1"
+
+
+class OctoAILLM(OpenAILLM):
+    """
+    OctoAI LLM wrapper built on top of the OpenAI Python client.
+
+    Note: OctoAI requires a valid API key to use this class.
+          You can set the "OCTOAI_API_KEY" environment variable.
+    """
+
+    def __init__(
+        self,
+        model_name: str = "mistral-7b-instruct-fp16",
+        *,
+        base_url: Optional[str] = OCTOAI_BASE_URL,
+        api_key: Optional[str] = None,
+        **kwargs: Any,
+    ):
+        octoai_api_key = api_key or os.environ.get("OCTOAI_API_KEY")
+        if not octoai_api_key:
+            raise ValueError(
+                "OctoAI API key is required to use OctoAI. "
+                "If you haven't done it, please sign up at https://octo.ai \n"
+                "The key can be provided as an argument or "
+                "via the OCTOAI_API_KEY environment variable."
+            )
+        octoai_base_url = base_url
+        super().__init__(
+            model_name,
+            api_key=octoai_api_key,
+            base_url=octoai_base_url,
+            **kwargs
+        )
+
+    def enforced_function_call(
+        self,
+        system_prompt: str,
+        chat_history: Messages,
+        function: Function,
+        *,
+        max_tokens: Optional[int] = None,
+        model_params: Optional[dict] = None,
+    ) -> dict:
+        raise NotImplementedError("OctoAI doesn't support function calling.")
+
+    def aenforced_function_call(self,
+                                system_prompt: str,
+                                chat_history: Messages,
+                                function: Function,
+                                *,
+                                max_tokens: Optional[int] = None,
+                                model_params: Optional[dict] = None
+                                ):
+        raise NotImplementedError("OctoAI doesn't support function calling.")
diff --git a/tests/system/llm/test_openai.py b/tests/system/llm/test_openai.py
@@ -4,7 +4,7 @@
 import jsonschema
 import pytest
 
-from canopy.llm import AzureOpenAILLM, AnyscaleLLM
+from canopy.llm import AzureOpenAILLM, AnyscaleLLM, OctoAILLM
 from canopy.models.data_models import Role, MessageBase, Context, StringContextContent  # noqa
 from canopy.models.api_models import ChatResponse, StreamingChatChunk # noqa
 from canopy.llm.openai import OpenAILLM  # noqa
@@ -60,7 +60,7 @@ def model_params_low_temperature():
     return {"temperature": 0.2, "top_p": 0.5, "n": 1}
 
 
-@pytest.fixture(params=[OpenAILLM, AzureOpenAILLM, AnyscaleLLM])
+@pytest.fixture(params=[OpenAILLM, AzureOpenAILLM, AnyscaleLLM, OctoAILLM])
 def openai_llm(request):
     llm_class = request.param
     if llm_class == AzureOpenAILLM:
@@ -73,6 +73,10 @@ def openai_llm(request):
         if os.getenv("ANYSCALE_API_KEY") is None:
             pytest.skip("Couldn't find Anyscale API key. Skipping Anyscale tests.")
         model_name = "mistralai/Mistral-7B-Instruct-v0.1"
+    elif llm_class == OctoAILLM:
+        if os.getenv("OCTOAI_API_KEY") is None:
+            pytest.skip("Couldn't find OctoAI API key. Skipping OctoAI tests.")
+        model_name = "mistral-7b-instruct"
     else:
         model_name = "gpt-3.5-turbo-0613"
 
@@ -121,6 +125,8 @@ def test_chat_completion_with_context(openai_llm, messages):
 def test_enforced_function_call(openai_llm,
                                 messages,
                                 function_query_knowledgebase):
+    if isinstance(openai_llm, OctoAILLM):
+        pytest.skip("OctoAI doesn't support function calling at the moment")
     result = openai_llm.enforced_function_call(
         system_prompt=SYSTEM_PROMPT,
         chat_history=messages,
@@ -134,11 +140,15 @@ def test_chat_completion_high_temperature(openai_llm,
     if isinstance(openai_llm, AnyscaleLLM):
         pytest.skip("Anyscale don't support n>1 for the moment.")
 
+    if isinstance(openai_llm, OctoAILLM):
+        pytest.skip("OctoAI doesn't support n>1 for the moment.")
+
     response = openai_llm.chat_completion(
         system_prompt=SYSTEM_PROMPT,
         chat_history=messages,
         model_params=model_params_high_temperature
     )
+
     assert_chat_completion(response,
                            num_choices=model_params_high_temperature["n"])
 
@@ -160,6 +170,9 @@ def test_enforced_function_call_high_temperature(openai_llm,
     if isinstance(openai_llm, AnyscaleLLM):
         pytest.skip("Anyscale don't support n>1 for the moment.")
 
+    if isinstance(openai_llm, OctoAILLM):
+        pytest.skip("OctoAI doesn't support function calling at the moment")
+
     result = openai_llm.enforced_function_call(
         system_prompt=SYSTEM_PROMPT,
         chat_history=messages,
@@ -177,6 +190,9 @@ def test_enforced_function_call_low_temperature(openai_llm,
     if isinstance(openai_llm, AnyscaleLLM):
         model_params["top_p"] = 1.0
 
+    if isinstance(openai_llm, OctoAILLM):
+        pytest.skip("OctoAI doesn't support function calling at the moment")
+
     result = openai_llm.enforced_function_call(
         system_prompt=SYSTEM_PROMPT,
         chat_history=messages,
@@ -191,6 +207,8 @@ def test_chat_completion_with_model_name(openai_llm, messages):
         pytest.skip("In Azure the model name has to be a valid deployment")
     elif isinstance(openai_llm, AnyscaleLLM):
         new_model_name = "meta-llama/Llama-2-7b-chat-hf"
+    elif isinstance(openai_llm, OctoAILLM):
+        new_model_name = "codellama-7b-instruct"
     else:
         new_model_name = "gpt-3.5-turbo-1106"
 
@@ -248,6 +266,9 @@ def test_chat_complete_api_failure_populates(openai_llm,
 def test_enforce_function_api_failure_populates(openai_llm,
                                                 messages,
                                                 function_query_knowledgebase):
+    if isinstance(openai_llm, OctoAILLM):
+        pytest.skip("OctoAI doesn't support function calling at the moment")
+
     openai_llm._client = MagicMock()
     openai_llm._client.chat.completions.create.side_effect = Exception(
         "API call failed")
@@ -261,6 +282,9 @@ def test_enforce_function_api_failure_populates(openai_llm,
 def test_enforce_function_wrong_output_schema(openai_llm,
                                               messages,
                                               function_query_knowledgebase):
+    if isinstance(openai_llm, OctoAILLM):
+        pytest.skip("OctoAI doesn't support function calling at the moment")
+
     openai_llm._client = MagicMock()
     openai_llm._client.chat.completions.create.return_value = MagicMock(
         choices=[MagicMock(
@@ -302,6 +326,8 @@ def test_enforce_function_unsupported_model(openai_llm,
 def test_available_models(openai_llm):
     if isinstance(openai_llm, AzureOpenAILLM):
         pytest.skip("Azure does not support listing models")
+    if isinstance(openai_llm, OctoAILLM):
+        pytest.skip("OctoAI does not support listing models")
     models = openai_llm.available_models
     assert isinstance(models, list)
     assert len(models) > 0

diff --git a/tests/system/record_encoder/test_octoai_record_encoder.py b/tests/system/record_encoder/test_octoai_record_encoder.py
@@ -0,0 +1,53 @@
+import pytest
+
+from canopy.knowledge_base.models import KBDocChunk
+from canopy.knowledge_base.record_encoder.octoai import OctoAIRecordEncoder
+from canopy.models.data_models import Query
+
+
+documents = [KBDocChunk(
+            id=f"doc_1_{i}",
+            text=f"Sample document {i}",
+            document_id=f"doc_{i}",
+            metadata={"test": i},
+            source="doc_1",
+        )
+        for i in range(4)
+    ]
+
+queries = [Query(text="Sample query 1"),
+           Query(text="Sample query 2"),
+           Query(text="Sample query 3"),
+           Query(text="Sample query 4")]
+
+
+@pytest.fixture
+def encoder():
+    return OctoAIRecordEncoder(batch_size=2)
+
+
+def test_dimension(encoder):
+    assert encoder.dimension == 1024
+
+
+@pytest.mark.parametrize("items,function",
+                         [(documents, "encode_documents"),
+                          (queries, "encode_queries"),
+                          ([], "encode_documents"),
+                          ([], "encode_queries")])
+def test_encode_documents(encoder, items, function):
+
+    encoded_documents = getattr(encoder, function)(items)
+
+    assert len(encoded_documents) == len(items)
+    assert all(len(encoded.values) == encoder.dimension
+               for encoded in encoded_documents)
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("items,function",
+                         [("aencode_documents", documents),
+                          ("aencode_queries", queries)])
+async def test_aencode_not_implemented(encoder, function, items):
+    with pytest.raises(NotImplementedError):
+        await encoder.aencode_queries(items)