From f44bc3a781c6b2624a3da8a26b184948510819d3 Mon Sep 17 00:00:00 2001 From: Amnon Catav Date: Sun, 29 Oct 2023 17:20:06 +0200 Subject: [PATCH 01/21] add quick start notebook --- examples/canopy-lib-quickstart.ipynb | 866 +++++++++++++++++++++++++++ 1 file changed, 866 insertions(+) create mode 100644 examples/canopy-lib-quickstart.ipynb diff --git a/examples/canopy-lib-quickstart.ipynb b/examples/canopy-lib-quickstart.ipynb new file mode 100644 index 00000000..b4929d57 --- /dev/null +++ b/examples/canopy-lib-quickstart.ipynb @@ -0,0 +1,866 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Pinecone Canopy library quick start notebook\n", + "\n", + "**Canopy** is a Sofware Development Kit (SDK) for AI applications. Canopy allows you to test, build and package Retrieval Augmented Applications with Pinecone Vector Database. \n", + "\n", + "This notebook introduce the quick start steps for working with Canopy library. You can find more details about this project and advanced use in the project [documentaion](../README.md).\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prerequisites\n", + "\n", + "install canopy library" + ] + }, + { + "cell_type": "code", + "execution_count": 180, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" + ] + } + ], + "source": [ + "!pip install -qU git+ssh://git@github.com/pinecone-io/canopy.git@dev" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "By default, Canopy uses Pinecone and OpenAI so we need to configure the related API keys.\n", + "\n", + "To get Pinecone free trial API key and environment register or log into your Pinecone account in the [console](https://app.pinecone.io/). You can access your API key from the \"API Keys\" section in the sidebar of your dashboard, and find the environment name next to it.\n", + "\n", + "You can find your free trial OpenAI API key [here](https://platform.openai.com/account/api-keys). You might need to login or register to OpenAI services.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 181, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "os.environ[\"PINECONE_API_KEY\"] = os.environ.get('PINECONE_API_KEY') or 'YOUR_PINECONE_API_KEY'\n", + "os.environ[\"PINECONE_ENVIRONMENT\"] = os.environ.get('PINECONE_ENVIRONMENT') or 'PINECONE_ENVIRONMENT'\n", + "os.environ[\"OPENAI_API_KEY\"] = os.environ.get('OPENAI_API_KEY') or 'OPENAI_API_KEY'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Pinecone Documentation Dataset" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we'll load a crawl of from 25/10/23 of pinecone docs [website](https://docs.pinecone.io/docs/).\n", + "\n", + "We will use this data to demonstrate how to build a RAG pipepline to answer questions about Pinecone DB." + ] + }, + { + "cell_type": "code", + "execution_count": 182, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idtextsourcemetadata
0728aeea1-1dcf-5d0a-91f2-ecccd4dd4272# Scale indexes\\n\\n[Suggest Edits](/edit/scali...https://docs.pinecone.io/docs/scaling-indexes{'created_at': '2023_10_25', 'title': 'scaling...
12f19f269-171f-5556-93f3-a2d7eabbe50f# Understanding organizations\\n\\n[Suggest Edit...https://docs.pinecone.io/docs/organizations{'created_at': '2023_10_25', 'title': 'organiz...
2b2a71cb3-5148-5090-86d5-7f4156edd7cf# Manage datasets\\n\\n[Suggest Edits](/edit/dat...https://docs.pinecone.io/docs/datasets{'created_at': '2023_10_25', 'title': 'datasets'}
31dafe68a-2e78-57f7-a97a-93e043462196# Architecture\\n\\n[Suggest Edits](/edit/archit...https://docs.pinecone.io/docs/architecture{'created_at': '2023_10_25', 'title': 'archite...
48b07b24d-4ec2-58a1-ac91-c8e6267b9ffd# Moving to production\\n\\n[Suggest Edits](/edi...https://docs.pinecone.io/docs/moving-to-produc...{'created_at': '2023_10_25', 'title': 'moving-...
\n", + "
" + ], + "text/plain": [ + " id \\\n", + "0 728aeea1-1dcf-5d0a-91f2-ecccd4dd4272 \n", + "1 2f19f269-171f-5556-93f3-a2d7eabbe50f \n", + "2 b2a71cb3-5148-5090-86d5-7f4156edd7cf \n", + "3 1dafe68a-2e78-57f7-a97a-93e043462196 \n", + "4 8b07b24d-4ec2-58a1-ac91-c8e6267b9ffd \n", + "\n", + " text \\\n", + "0 # Scale indexes\\n\\n[Suggest Edits](/edit/scali... \n", + "1 # Understanding organizations\\n\\n[Suggest Edit... \n", + "2 # Manage datasets\\n\\n[Suggest Edits](/edit/dat... \n", + "3 # Architecture\\n\\n[Suggest Edits](/edit/archit... \n", + "4 # Moving to production\\n\\n[Suggest Edits](/edi... \n", + "\n", + " source \\\n", + "0 https://docs.pinecone.io/docs/scaling-indexes \n", + "1 https://docs.pinecone.io/docs/organizations \n", + "2 https://docs.pinecone.io/docs/datasets \n", + "3 https://docs.pinecone.io/docs/architecture \n", + "4 https://docs.pinecone.io/docs/moving-to-produc... \n", + "\n", + " metadata \n", + "0 {'created_at': '2023_10_25', 'title': 'scaling... \n", + "1 {'created_at': '2023_10_25', 'title': 'organiz... \n", + "2 {'created_at': '2023_10_25', 'title': 'datasets'} \n", + "3 {'created_at': '2023_10_25', 'title': 'archite... \n", + "4 {'created_at': '2023_10_25', 'title': 'moving-... " + ] + }, + "execution_count": 182, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import warnings\n", + "warnings.filterwarnings('ignore')\n", + "\n", + "data = pd.read_parquet(\"https://storage.googleapis.com/pinecone-datasets-dev/pinecone_docs_ada-002/raw/file1.parquet\")\n", + "data.head()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Each record in this dataset represents a single page in Pinecone's documentation. Each row contatins a unique id, the raw text of the page in markdown language, the url of the page as \"source\" and some metadata. " + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Init a Tokenizer\n", + "\n", + "\n", + "Many of Canopy's components are using tokenization, which is a process that splits text into tokens - basic units of text (like word or sub-words) that are used for processing. Therefore, Canopy uses a singleton `Tokenizer` object which needs to be initialized once." + ] + }, + { + "cell_type": "code", + "execution_count": 183, + "metadata": {}, + "outputs": [], + "source": [ + "from canopy.tokenizer import Tokenizer\n", + "Tokenizer.initialize()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "After initilizing the global object, we can simply create an instance from anywhere in our code, without providing any parameters:" + ] + }, + { + "cell_type": "code", + "execution_count": 184, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Hello', ' world', '!']" + ] + }, + "execution_count": 184, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from canopy.tokenizer import Tokenizer\n", + "\n", + "tokenizer = Tokenizer()\n", + "\n", + "tokenizer.tokenize(\"Hello world!\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Creating a KnowledgBase to store our data for search\n", + "\n", + "`KnowledgeBase` is an object that is responsible for storing and query data. It holds a connection to a single Pinecone index and provides a simple API to insert, delete and search textual documents.\n", + "\n", + "During an upsert, the KnowledgeBase divides the text into smaller chunks, transforms them into vector embeddings, and then upsert these vectors in the underlying Pinecone index. When querying, it converts the textual input into a vector and excute the queries against the underlying index to retrieve the top-k most closely matched chunks.\n", + "\n", + "Here we create a `KnowledgeBase` with our desired index name: " + ] + }, + { + "cell_type": "code", + "execution_count": 185, + "metadata": {}, + "outputs": [], + "source": [ + "from canopy.knowledge_base import KnowledgeBase\n", + "\n", + "INDEX_NAME = \"my-index\"\n", + "\n", + "kb = KnowledgeBase(index_name=INDEX_NAME)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we need to create a new index in Pinecone, if it's not already exist:" + ] + }, + { + "cell_type": "code", + "execution_count": 186, + "metadata": {}, + "outputs": [], + "source": [ + "kb.create_canopy_index(indexed_fields=[\"title\"])" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can see the index created in Pinecone's [console](https://app.pinecone.io/)\n", + "\n", + "next time we would like to init a knowledge base instance to this index, we can simply call the connect method:" + ] + }, + { + "cell_type": "code", + "execution_count": 187, + "metadata": {}, + "outputs": [], + "source": [ + "kb = KnowledgeBase(index_name=INDEX_NAME)\n", + "kb.connect()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> 💡 Note: a knowledge base must be connected to an index before excuting any operation. You should call `kb.connect()` to connect an existing index or call `kb.create_canopy_index(INDEX_NANE)` before calling any other method of the KB " + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Upsert data to our KnowledgBase" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, we need to convert our dataset to list of `Document` objects\n", + "\n", + "Each document object can hold id, text, source and metadata:" + ] + }, + { + "cell_type": "code", + "execution_count": 188, + "metadata": {}, + "outputs": [], + "source": [ + "from canopy.models.data_models import Document\n", + "\n", + "example_docs = [Document(id=\"1\",\n", + " text=\"This is text for example\",\n", + " source=\"https://url.com\"),\n", + " Document(id=\"2\",\n", + " text=\"this is another text\",\n", + " source=\"https://another-url.com\",\n", + " metadata={\"my-key\": \"my-value\"})]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Luckily the columns in our dataset fits this scehma, so we can use a simple iteration to prepare our data:" + ] + }, + { + "cell_type": "code", + "execution_count": 189, + "metadata": {}, + "outputs": [], + "source": [ + "documents = [Document(**row) for _, row in data.iterrows()]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we are ready to upsert our data, with only a single command:" + ] + }, + { + "cell_type": "code", + "execution_count": 190, + "metadata": {}, + "outputs": [], + "source": [ + "kb.upsert(documents)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Internally, the KnowledgeBase handle for use all the processing needed to load data into Pinecone. It chunks the text to smaller pieces and encode them to vectors (embeddings) that can be then upserted directly to Pinecone. Later in this notebook we'll learn how to tune and costumize this process.\n", + "\n", + "Currently, in free tier indexes that are under the `gcp-starter` environment, it might take few seconds for the upsert operation to complete. So we need to wait a bit and make sure data is avialable for query before we continue." + ] + }, + { + "cell_type": "code", + "execution_count": 191, + "metadata": {}, + "outputs": [], + "source": [ + "import time\n", + "from canopy.models.data_models import Query\n", + "\n", + "\n", + "if os.environ[\"PINECONE_ENVIRONMENT\"].endswith(\"0\"):\n", + " start_time = time.time()\n", + " timeout = 20\n", + " top_k = 500\n", + " while time.time() - start_time < timeout:\n", + " results = kb.query([Query(text=\"sample query\", top_k=top_k)])\n", + " if len(results[0].documents) == top_k:\n", + " break\n", + " time.sleep(1)\n", + " \n", + " if len(results) == 0:\n", + " raise TimeoutError(f\"Upert failed to finish within {timeout} seconds. Please wait or validate that upsert operation run successfuly.\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Query the KnowledgeBase\n", + "\n", + "Now we can query the knowledge base. The KnowledgeBase will use its default parameters like `top_k` to exectute the query:" + ] + }, + { + "cell_type": "code", + "execution_count": 192, + "metadata": {}, + "outputs": [], + "source": [ + "def print_query_results(results):\n", + " for query_results in results:\n", + " print('query: ' + query_results.query + '\\n')\n", + " for document in query_results.documents:\n", + " print('document: ' + document.text.replace(\"\\n\", \"\\\\n\"))\n", + " print('source: ' + document.source)\n", + " print(f\"score: {document.score}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 193, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "query: p1 pod capacity\n", + "\n", + "document: ### s1 pods\\n\\n\\nThese storage-optimized pods provide large storage capacity and lower overall costs with slightly higher query latencies than p1 pods. They are ideal for very large indexes with moderate or relaxed latency requirements.\\n\\n\\nEach s1 pod has enough capacity for around 5M vectors of 768 dimensions.\\n\\n\\n### p1 pods\\n\\n\\nThese performance-optimized pods provide very low query latencies, but hold fewer vectors per pod than s1 pods. They are ideal for applications with low latency requirements (<100ms).\\n\\n\\nEach p1 pod has enough capacity for around 1M vectors of 768 dimensions.\n", + "source: https://docs.pinecone.io/docs/indexes\n", + "score: 0.843660593\n", + "\n", + "document: ## Pod storage capacity\\n\\n\\nEach **p1** pod has enough capacity for 1M vectors with 768 dimensions.\\n\\n\\nEach **s1** pod has enough capacity for 5M vectors with 768 dimensions.\\n\\n\\n## Metadata\\n\\n\\nMax metadata size per vector is 40 KB.\\n\\n\\nNull metadata values are not supported. Instead of setting a key to hold a null value, we recommend you remove that key from the metadata payload.\\n\\n\\nMetadata with high cardinality, such as a unique value for every vector in a large index, uses more memory than expected and can cause the pods to become full.\n", + "source: https://docs.pinecone.io/docs/limits\n", + "score: 0.842372298\n", + "\n", + "document: #### p2 pod type (Public Preview)(\"Beta\")\\n\\n\\nThe new [p2 pod type](indexes/#p2-pods) provides search speeds of around 5ms and throughput of 200 queries per second per replica, or approximately 10x faster speeds and higher throughput than the p1 pod type, depending on your data and network conditions. \\n\\n\\nThis is a **public preview** feature and is not appropriate for production workloads.\\n\\n\\n#### Improved p1 and s1 performance\\n\\n\\nThe [s1](indexes/#s1-pods) and [p1](indexes/#p1-pods) pod types now offer approximately 50% higher query throughput and 50% lower latency, depending on your workload.\n", + "source: https://docs.pinecone.io/docs/release-notes\n", + "score: 0.834858\n", + "\n", + "document: ### p2 pods\\n\\n\\nThe p2 pod type provides greater query throughput with lower latency. For vectors with fewer than 128 dimension and queries where `topK` is less than 50, p2 pods support up to 200 QPS per replica and return queries in less than 10ms. This means that query throughput and latency are better than s1 and p1.\\n\\n\\nEach p2 pod has enough capacity for around 1M vectors of 768 dimensions. However, capacity may vary with dimensionality.\\n\\n\\nThe data ingestion rate for p2 pods is significantly slower than for p1 pods; this rate decreases as the number of dimensions increases. For example, a p2 pod containing vectors with 128 dimensions can upsert up to 300 updates per second; a p2 pod containing vectors with 768 dimensions or more supports upsert of 50 updates per second. Because query latency and throughput for p2 pods vary from p1 pods, test p2 pod performance with your dataset.\\n\\n\\nThe p2 pod type does not support sparse vector values.\n", + "source: https://docs.pinecone.io/docs/indexes\n", + "score: 0.832658708\n", + "\n", + "document: ## Number of vectors\\n\\n\\nThe most important consideration in sizing is the [number of vectors](/docs/insert-data/) you plan on working with. As a rule of thumb, a single p1 pod can store approximately 1M vectors, while a s1 pod can store 5M vectors. However, this can be affected by other factors, such as dimensionality and metadata, which are explained below.\n", + "source: https://docs.pinecone.io/docs/choosing-index-type-and-size\n", + "score: 0.827741921\n", + "\n" + ] + } + ], + "source": [ + "from canopy.models.data_models import Query\n", + "results = kb.query([Query(text=\"p1 pod capacity\")])\n", + "\n", + "print_query_results(results)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can also use metadata filtering and specify `top_k`:" + ] + }, + { + "cell_type": "code", + "execution_count": 194, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "query: p1 pod capacity\n", + "\n", + "document: ## Pod storage capacity\\n\\n\\nEach **p1** pod has enough capacity for 1M vectors with 768 dimensions.\\n\\n\\nEach **s1** pod has enough capacity for 5M vectors with 768 dimensions.\\n\\n\\n## Metadata\\n\\n\\nMax metadata size per vector is 40 KB.\\n\\n\\nNull metadata values are not supported. Instead of setting a key to hold a null value, we recommend you remove that key from the metadata payload.\\n\\n\\nMetadata with high cardinality, such as a unique value for every vector in a large index, uses more memory than expected and can cause the pods to become full.\n", + "source: https://docs.pinecone.io/docs/limits\n", + "score: 0.842372298\n", + "\n", + "document: ## Retention\\n\\n\\nIn general, indexes on the Starter (free) plan are archived as collections and deleted after 7 days of inactivity; for indexes created by certain open source projects such as AutoGPT, indexes are archived and deleted after 1 day of inactivity. To prevent this, you can send any API request to Pinecone and the counter will reset.\\n\\nUpdated about 1 month ago \\n\\n\\n\\n---\\n\\n* [Table of Contents](#)\\n* + [Upserts](#upserts)\\n\t+ [Queries](#queries)\\n\t+ [Fetch and Delete](#fetch-and-delete)\\n\t+ [Namespaces](#namespaces)\\n\t+ [Pod storage capacity](#pod-storage-capacity)\\n\t+ [Metadata](#metadata)\\n\t+ [Retention](#retention)\n", + "source: https://docs.pinecone.io/docs/limits\n", + "score: 0.71726948\n", + "\n" + ] + } + ], + "source": [ + "from canopy.models.data_models import Query\n", + "results = kb.query([Query(text=\"p1 pod capacity\",\n", + " metadata_filter={\"title\": \"limits\"},\n", + " top_k=2)])\n", + "\n", + "print_query_results(results)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As you can see above, using the metadata filter we get results only from the \"limits\" page" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Query the Context Engine\n", + "\n", + "While the `KnowledgeBase` is in charge of excuting a textual queries against the Pinecone index, `ContextEngine` is a higher level component that holds the KnowledgeBase, but have a slightly different API:\n", + "\n", + "1. The context engine can get user questions in natural langague. It then generate a search queries out of it. For example, given the question *\"What is the capacity of p1 pods?\"*, the ContextEngine would first convert it into the search query *\"p1 pod capacity\"* and then run it against the KnowledgeBase.\n", + "2. The `query` method of context engine support a `max_context_tokens` that can limit the number of tokens used in its results. This capabillity allows the user to better handle tokens budgest and limit in the prompts sending later to the LLM." + ] + }, + { + "cell_type": "code", + "execution_count": 195, + "metadata": {}, + "outputs": [], + "source": [ + "from canopy.context_engine import ContextEngine\n", + "context_engine = ContextEngine(kb)" + ] + }, + { + "cell_type": "code", + "execution_count": 196, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"query\": \"What is the capacity of p1 pods?\",\n", + " \"snippets\": [\n", + " {\n", + " \"source\": \"https://docs.pinecone.io/docs/limits\",\n", + " \"text\": \"## Pod storage capacity\\n\\n\\nEach **p1** pod has enough capacity for 1M vectors with 768 dimensions.\\n\\n\\nEach **s1** pod has enough capacity for 5M vectors with 768 dimensions.\\n\\n\\n## Metadata\\n\\n\\nMax metadata size per vector is 40 KB.\\n\\n\\nNull metadata values are not supported. Instead of setting a key to hold a null value, we recommend you remove that key from the metadata payload.\\n\\n\\nMetadata with high cardinality, such as a unique value for every vector in a large index, uses more memory than expected and can cause the pods to become full.\"\n", + " },\n", + " {\n", + " \"source\": \"https://docs.pinecone.io/docs/indexes\",\n", + " \"text\": \"### s1 pods\\n\\n\\nThese storage-optimized pods provide large storage capacity and lower overall costs with slightly higher query latencies than p1 pods. They are ideal for very large indexes with moderate or relaxed latency requirements.\\n\\n\\nEach s1 pod has enough capacity for around 5M vectors of 768 dimensions.\\n\\n\\n### p1 pods\\n\\n\\nThese performance-optimized pods provide very low query latencies, but hold fewer vectors per pod than s1 pods. They are ideal for applications with low latency requirements (<100ms).\\n\\n\\nEach p1 pod has enough capacity for around 1M vectors of 768 dimensions.\"\n", + " },\n", + " {\n", + " \"source\": \"https://docs.pinecone.io/docs/choosing-index-type-and-size\",\n", + " \"text\": \"## Number of vectors\\n\\n\\nThe most important consideration in sizing is the [number of vectors](/docs/insert-data/) you plan on working with. As a rule of thumb, a single p1 pod can store approximately 1M vectors, while a s1 pod can store 5M vectors. However, this can be affected by other factors, such as dimensionality and metadata, which are explained below.\"\n", + " }\n", + " ]\n", + "}\n", + "\n", + "# tokens in context returned: 435\n" + ] + } + ], + "source": [ + "import json\n", + "\n", + "result = context_engine.query([Query(text=\"What is the capacity of p1 pods?\", top_k=5)], max_context_tokens=512)\n", + "\n", + "print(result.to_text(indent=2))\n", + "print(f\"\\n# tokens in context returned: {result.num_tokens}\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As you can see above, we queried the context engine with a question in natural language. Also, even though we set `top_k=5`, context engine retreived only 3 results in order to satisfy the 512 tokens limit" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Knowledgeable chat engine\n", + "\n", + "Now we are ready to start chatting with our data!\n", + "\n", + "Canopy `ChatEngine` supports OpenAI compatible API, only that behind the scenes it uses the context egine to provide knowledgeable answers to the users questions." + ] + }, + { + "cell_type": "code", + "execution_count": 197, + "metadata": {}, + "outputs": [], + "source": [ + "from canopy.chat_engine import ChatEngine\n", + "chat_engine = ChatEngine(context_engine)" + ] + }, + { + "cell_type": "code", + "execution_count": 198, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The capacity of p1 pods is approximately 1 million vectors of 768 dimensions.\n" + ] + } + ], + "source": [ + "from canopy.models.data_models import MessageBase\n", + "\n", + "response = chat_engine.chat(messages=[MessageBase(role=\"user\", content=\"What is the capacity of p1 pods?\")], stream=False)\n", + "\n", + "print(response.choices[0].message.content)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> 💡 Note: as opposed to OpenAI API, Canopy by default truncate the chat history to recent messages to avoid excceding the prompt tokens limit. This behaviour can change see chat engine [documentation](https://github.com/pinecone-io/canopy/blob/main/src/canopy/chat_engine/chat_engine.py)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Costumization Example\n", + "\n", + "Canopy built as a modular library, where each component can fully be costumized by the user.\n", + "\n", + "Before we start, we would like to have a quick overview of the inner components used by the knowledge base:\n", + "\n", + "- **Index**: A Pinecone index that holds the vector representations of the documents.\n", + "- **Chunker**: A `Chunker` object that is used to chunk the documents into smaller pieces of text.\n", + "- **Encoder**: An `RecordEncoder` object that is used to encode the chunks and queries into vector representations.\n", + "\n", + "In the following example, we show how you can costumize the `Chunker` component used by the knowledge base.\n", + "\n", + "First, we will create a dummy chunker class that simply chunks the text by new lines `\\n`." + ] + }, + { + "cell_type": "code", + "execution_count": 199, + "metadata": {}, + "outputs": [], + "source": [ + "from typing import List\n", + "from canopy.knowledge_base.chunker.base import Chunker\n", + "from canopy.knowledge_base.models import KBDocChunk\n", + "\n", + "class NewLineChunker(Chunker):\n", + "\n", + " def chunk_single_document(self, document: Document) -> List[KBDocChunk]:\n", + " line_chunks = [chunk\n", + " for chunk in document.text.split(\"\\n\")]\n", + " return [KBDocChunk(id=f\"{document.id}_{i}\",\n", + " document_id=document.id,\n", + " text=text_chunk,\n", + " source=document.source,\n", + " metadata=document.metadata)\n", + " for i, text_chunk in enumerate(line_chunks)]\n", + " \n", + " async def achunk_single_document(self, document: Document) -> List[KBDocChunk]:\n", + " raise NotImplementedError()" + ] + }, + { + "cell_type": "code", + "execution_count": 200, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[KBDocChunk(id='id1_0', text='This is first line', source='example', metadata={'title': 'newline'}, document_id='id1'),\n", + " KBDocChunk(id='id1_1', text='This is the second line', source='example', metadata={'title': 'newline'}, document_id='id1')]" + ] + }, + "execution_count": 200, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chunker = NewLineChunker()\n", + "\n", + "document = Document(id=\"id1\",\n", + " text=\"This is first line\\nThis is the second line\",\n", + " source=\"example\",\n", + " metadata={\"title\": \"newline\"})\n", + "chunker.chunk_single_document(document)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we can initialize a new knowledge base to use our new chunker:" + ] + }, + { + "cell_type": "code", + "execution_count": 201, + "metadata": {}, + "outputs": [], + "source": [ + "kb = KnowledgeBase(index_name=INDEX_NAME,\n", + " chunker=chunker)\n", + "kb.connect()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And upsert our example document:" + ] + }, + { + "cell_type": "code", + "execution_count": 202, + "metadata": {}, + "outputs": [], + "source": [ + "kb.upsert([document])" + ] + }, + { + "cell_type": "code", + "execution_count": 205, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "query: second line\n", + "\n", + "document: This is the second line\n", + "source: example\n", + "score: 0.928833485\n", + "\n", + "document: This is first line\n", + "source: example\n", + "score: 0.88751322\n", + "\n" + ] + } + ], + "source": [ + "results = kb.query([Query(text=\"second line\",\n", + " metadata_filter={\"title\": \"newline\"})])\n", + "\n", + "print_query_results(results)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As we can see above, our knowledge base split the document by new line as expected." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Delete the index once you are sure that you do not want to use it anymore. Once the index is deleted, you cannot use it again." + ] + }, + { + "cell_type": "code", + "execution_count": 206, + "metadata": {}, + "outputs": [], + "source": [ + "kb.delete_index()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "canopy-quick", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.8" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "9e9b81017be88d4d093a2a92984a986685ce96a6b6736b12c233fdf6b743e185" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 9427cc69a8b9f8dd761355ba2a67de96a699eb5f Mon Sep 17 00:00:00 2001 From: Amnon Catav Date: Mon, 30 Oct 2023 12:22:10 +0200 Subject: [PATCH 02/21] batch upsert --- examples/canopy-lib-quickstart.ipynb | 170 ++++++++++++--------------- 1 file changed, 74 insertions(+), 96 deletions(-) diff --git a/examples/canopy-lib-quickstart.ipynb b/examples/canopy-lib-quickstart.ipynb index b4929d57..a79a3ee2 100644 --- a/examples/canopy-lib-quickstart.ipynb +++ b/examples/canopy-lib-quickstart.ipynb @@ -24,7 +24,7 @@ }, { "cell_type": "code", - "execution_count": 180, + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -87,7 +87,7 @@ }, { "cell_type": "code", - "execution_count": 182, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -187,7 +187,7 @@ "4 {'created_at': '2023_10_25', 'title': 'moving-... " ] }, - "execution_count": 182, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -222,7 +222,7 @@ }, { "cell_type": "code", - "execution_count": 183, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -240,7 +240,7 @@ }, { "cell_type": "code", - "execution_count": 184, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -249,7 +249,7 @@ "['Hello', ' world', '!']" ] }, - "execution_count": 184, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -278,7 +278,7 @@ }, { "cell_type": "code", - "execution_count": 185, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -299,7 +299,7 @@ }, { "cell_type": "code", - "execution_count": 186, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -318,7 +318,7 @@ }, { "cell_type": "code", - "execution_count": 187, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -354,7 +354,7 @@ }, { "cell_type": "code", - "execution_count": 188, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -379,7 +379,7 @@ }, { "cell_type": "code", - "execution_count": 189, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -396,11 +396,31 @@ }, { "cell_type": "code", - "execution_count": 190, + "execution_count": 11, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "754ff510e75a472b97e474adaf922ea7", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/6 [00:00 2\u001b[0m results \u001b[39m=\u001b[39m kb\u001b[39m.\u001b[39;49mquery([Query(text\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mp1 pod capacity\u001b[39;49m\u001b[39m\"\u001b[39;49m,\n\u001b[1;32m 3\u001b[0m metadata_filter\u001b[39m=\u001b[39;49m{\u001b[39m\"\u001b[39;49m\u001b[39mtitle\u001b[39;49m\u001b[39m\"\u001b[39;49m: \u001b[39m\"\u001b[39;49m\u001b[39mlimits\u001b[39;49m\u001b[39m\"\u001b[39;49m},\n\u001b[1;32m 4\u001b[0m top_k\u001b[39m=\u001b[39;49m\u001b[39m2\u001b[39;49m)])\n\u001b[1;32m 6\u001b[0m print_query_results(results)\n", + "File \u001b[0;32m~/canopy-quick/lib/python3.10/site-packages/canopy/knowledge_base/knowledge_base.py:432\u001b[0m, in \u001b[0;36mKnowledgeBase.query\u001b[0;34m(self, queries, global_metadata_filter)\u001b[0m\n\u001b[1;32m 402\u001b[0m \u001b[39m\u001b[39m\u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m 403\u001b[0m \u001b[39mQuery the knowledge base to retrieve document chunks.\u001b[39;00m\n\u001b[1;32m 404\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 429\u001b[0m \u001b[39m >>> results = kb.query(queries)\u001b[39;00m\n\u001b[1;32m 430\u001b[0m \u001b[39m\"\"\"\u001b[39;00m \u001b[39m# noqa: E501\u001b[39;00m\n\u001b[1;32m 431\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_index \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m--> 432\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_connection_error_msg)\n\u001b[1;32m 434\u001b[0m queries \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_encoder\u001b[39m.\u001b[39mencode_queries(queries)\n\u001b[1;32m 435\u001b[0m results \u001b[39m=\u001b[39m [\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_query_index(q, global_metadata_filter) \u001b[39mfor\u001b[39;00m q \u001b[39min\u001b[39;00m queries]\n", + "\u001b[0;31mRuntimeError\u001b[0m: KnowledgeBase is not connected to index canopy--my-index, Please call knowledge_base.connect(). " ] } ], @@ -565,7 +555,7 @@ }, { "cell_type": "code", - "execution_count": 195, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -575,7 +565,7 @@ }, { "cell_type": "code", - "execution_count": 196, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -586,21 +576,17 @@ " \"query\": \"What is the capacity of p1 pods?\",\n", " \"snippets\": [\n", " {\n", - " \"source\": \"https://docs.pinecone.io/docs/limits\",\n", - " \"text\": \"## Pod storage capacity\\n\\n\\nEach **p1** pod has enough capacity for 1M vectors with 768 dimensions.\\n\\n\\nEach **s1** pod has enough capacity for 5M vectors with 768 dimensions.\\n\\n\\n## Metadata\\n\\n\\nMax metadata size per vector is 40 KB.\\n\\n\\nNull metadata values are not supported. Instead of setting a key to hold a null value, we recommend you remove that key from the metadata payload.\\n\\n\\nMetadata with high cardinality, such as a unique value for every vector in a large index, uses more memory than expected and can cause the pods to become full.\"\n", - " },\n", - " {\n", " \"source\": \"https://docs.pinecone.io/docs/indexes\",\n", " \"text\": \"### s1 pods\\n\\n\\nThese storage-optimized pods provide large storage capacity and lower overall costs with slightly higher query latencies than p1 pods. They are ideal for very large indexes with moderate or relaxed latency requirements.\\n\\n\\nEach s1 pod has enough capacity for around 5M vectors of 768 dimensions.\\n\\n\\n### p1 pods\\n\\n\\nThese performance-optimized pods provide very low query latencies, but hold fewer vectors per pod than s1 pods. They are ideal for applications with low latency requirements (<100ms).\\n\\n\\nEach p1 pod has enough capacity for around 1M vectors of 768 dimensions.\"\n", " },\n", " {\n", - " \"source\": \"https://docs.pinecone.io/docs/choosing-index-type-and-size\",\n", - " \"text\": \"## Number of vectors\\n\\n\\nThe most important consideration in sizing is the [number of vectors](/docs/insert-data/) you plan on working with. As a rule of thumb, a single p1 pod can store approximately 1M vectors, while a s1 pod can store 5M vectors. However, this can be affected by other factors, such as dimensionality and metadata, which are explained below.\"\n", + " \"source\": \"https://docs.pinecone.io/docs/indexes\",\n", + " \"text\": \"### p2 pods\\n\\n\\nThe p2 pod type provides greater query throughput with lower latency. For vectors with fewer than 128 dimension and queries where `topK` is less than 50, p2 pods support up to 200 QPS per replica and return queries in less than 10ms. This means that query throughput and latency are better than s1 and p1.\\n\\n\\nEach p2 pod has enough capacity for around 1M vectors of 768 dimensions. However, capacity may vary with dimensionality.\\n\\n\\nThe data ingestion rate for p2 pods is significantly slower than for p1 pods; this rate decreases as the number of dimensions increases. For example, a p2 pod containing vectors with 128 dimensions can upsert up to 300 updates per second; a p2 pod containing vectors with 768 dimensions or more supports upsert of 50 updates per second. Because query latency and throughput for p2 pods vary from p1 pods, test p2 pod performance with your dataset.\\n\\n\\nThe p2 pod type does not support sparse vector values.\"\n", " }\n", " ]\n", "}\n", "\n", - "# tokens in context returned: 435\n" + "# tokens in context returned: 415\n" ] } ], @@ -635,7 +621,7 @@ }, { "cell_type": "code", - "execution_count": 197, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -645,14 +631,14 @@ }, { "cell_type": "code", - "execution_count": 198, + "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "The capacity of p1 pods is approximately 1 million vectors of 768 dimensions.\n" + "Each p1 pod has enough capacity for around 1 million vectors of 768 dimensions. [Source: Official Pinecone Documentation](https://docs.pinecone.io/docs/limits)\n" ] } ], @@ -694,7 +680,7 @@ }, { "cell_type": "code", - "execution_count": 199, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ @@ -720,7 +706,7 @@ }, { "cell_type": "code", - "execution_count": 200, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -730,7 +716,7 @@ " KBDocChunk(id='id1_1', text='This is the second line', source='example', metadata={'title': 'newline'}, document_id='id1')]" ] }, - "execution_count": 200, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -755,7 +741,7 @@ }, { "cell_type": "code", - "execution_count": 201, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ @@ -774,7 +760,7 @@ }, { "cell_type": "code", - "execution_count": 202, + "execution_count": 22, "metadata": {}, "outputs": [], "source": [ @@ -783,7 +769,7 @@ }, { "cell_type": "code", - "execution_count": 205, + "execution_count": 23, "metadata": {}, "outputs": [ { @@ -791,14 +777,6 @@ "output_type": "stream", "text": [ "query: second line\n", - "\n", - "document: This is the second line\n", - "source: example\n", - "score: 0.928833485\n", - "\n", - "document: This is first line\n", - "source: example\n", - "score: 0.88751322\n", "\n" ] } @@ -828,7 +806,7 @@ }, { "cell_type": "code", - "execution_count": 206, + "execution_count": 24, "metadata": {}, "outputs": [], "source": [ From 0e672ebce6cfa3ec9524abcd8e013d55d169cc6a Mon Sep 17 00:00:00 2001 From: Amnon Catav Date: Mon, 30 Oct 2023 13:15:22 +0200 Subject: [PATCH 03/21] use list indexes to check if index exists --- examples/canopy-lib-quickstart.ipynb | 109 +++++++++++++++------------ 1 file changed, 60 insertions(+), 49 deletions(-) diff --git a/examples/canopy-lib-quickstart.ipynb b/examples/canopy-lib-quickstart.ipynb index a79a3ee2..6caa2eb4 100644 --- a/examples/canopy-lib-quickstart.ipynb +++ b/examples/canopy-lib-quickstart.ipynb @@ -24,7 +24,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 29, "metadata": {}, "outputs": [ { @@ -56,7 +56,7 @@ }, { "cell_type": "code", - "execution_count": 181, + "execution_count": 30, "metadata": {}, "outputs": [], "source": [ @@ -87,7 +87,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 31, "metadata": {}, "outputs": [ { @@ -187,7 +187,7 @@ "4 {'created_at': '2023_10_25', 'title': 'moving-... " ] }, - "execution_count": 3, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } @@ -222,7 +222,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 32, "metadata": {}, "outputs": [], "source": [ @@ -240,7 +240,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 33, "metadata": {}, "outputs": [ { @@ -249,7 +249,7 @@ "['Hello', ' world', '!']" ] }, - "execution_count": 5, + "execution_count": 33, "metadata": {}, "output_type": "execute_result" } @@ -278,7 +278,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 34, "metadata": {}, "outputs": [], "source": [ @@ -299,11 +299,14 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 53, "metadata": {}, "outputs": [], "source": [ - "kb.create_canopy_index(indexed_fields=[\"title\"])" + "from canopy.knowledge_base import list_canopy_indexes\n", + "\n", + "if not any(name.endswith(INDEX_NAME) for name in list_canopy_indexes()):\n", + " kb.create_canopy_index(indexed_fields=[\"title\"])" ] }, { @@ -318,7 +321,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 36, "metadata": {}, "outputs": [], "source": [ @@ -354,7 +357,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 37, "metadata": {}, "outputs": [], "source": [ @@ -379,7 +382,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 38, "metadata": {}, "outputs": [], "source": [ @@ -396,13 +399,13 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "754ff510e75a472b97e474adaf922ea7", + "model_id": "63cd9039008a49d28f96f0bffa16bcc7", "version_major": 2, "version_minor": 0 }, @@ -443,7 +446,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 40, "metadata": {}, "outputs": [], "source": [ @@ -458,7 +461,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 41, "metadata": {}, "outputs": [ { @@ -469,23 +472,23 @@ "\n", "document: ### s1 pods\\n\\n\\nThese storage-optimized pods provide large storage capacity and lower overall costs with slightly higher query latencies than p1 pods. They are ideal for very large indexes with moderate or relaxed latency requirements.\\n\\n\\nEach s1 pod has enough capacity for around 5M vectors of 768 dimensions.\\n\\n\\n### p1 pods\\n\\n\\nThese performance-optimized pods provide very low query latencies, but hold fewer vectors per pod than s1 pods. They are ideal for applications with low latency requirements (<100ms).\\n\\n\\nEach p1 pod has enough capacity for around 1M vectors of 768 dimensions.\n", "source: https://docs.pinecone.io/docs/indexes\n", - "score: 0.842927933\n", + "score: 0.842596233\n", "\n", "document: ### p2 pods\\n\\n\\nThe p2 pod type provides greater query throughput with lower latency. For vectors with fewer than 128 dimension and queries where `topK` is less than 50, p2 pods support up to 200 QPS per replica and return queries in less than 10ms. This means that query throughput and latency are better than s1 and p1.\\n\\n\\nEach p2 pod has enough capacity for around 1M vectors of 768 dimensions. However, capacity may vary with dimensionality.\\n\\n\\nThe data ingestion rate for p2 pods is significantly slower than for p1 pods; this rate decreases as the number of dimensions increases. For example, a p2 pod containing vectors with 128 dimensions can upsert up to 300 updates per second; a p2 pod containing vectors with 768 dimensions or more supports upsert of 50 updates per second. Because query latency and throughput for p2 pods vary from p1 pods, test p2 pod performance with your dataset.\\n\\n\\nThe p2 pod type does not support sparse vector values.\n", "source: https://docs.pinecone.io/docs/indexes\n", - "score: 0.83271873\n", + "score: 0.832544327\n", "\n", "document: ### Pod size and performance\\n\\n\\nPod performance varies depending on a variety of factors. To observe how your workloads perform on a given pod type, experiment with your own data set.\\n\\n\\nEach pod type supports four pod sizes: `x1`, `x2`, `x4`, and `x8`. Your index storage and compute capacity doubles for each size step. The default pod size is `x1`. You can increase the size of a pod after index creation.\\n\\n\\nTo learn about changing the pod size of an index, see [Manage indexes](manage-indexes/#changing-pod-sizes).\n", "source: https://docs.pinecone.io/docs/indexes\n", - "score: 0.799543083\n", + "score: 0.799393296\n", "\n", "document: ## Pods, pod types, and pod sizes\\n\\n\\nPods are pre-configured units of hardware for running a Pinecone service. Each index runs on one or more pods. Generally, more pods mean more storage capacity, lower latency, and higher throughput. You can also create pods of different sizes.\\n\\n\\nOnce an index is created using a particular pod type, you cannot change the pod type for that index. However, you can [create a new index from that collection](manage-indexes/#create-an-index-from-a-collection) with a different pod type.\\n\\n\\nDifferent pod types are priced differently. See [pricing](https://www.pinecone.io/pricing/) for more details.\\n\\n\\n### Starter plan\\n\\n\\nWhen using the starter plan, you can create one pod with enough resources to support approximately 100,000 vectors with 1536-dimensional embeddings and metadata; the capacity is proportional for other dimensions.\\n\\n\\nWhen using a starter plan, all [`create_index`](/reference/create_index) calls ignore the `pod_type` parameter.\n", "source: https://docs.pinecone.io/docs/indexes\n", - "score: 0.791258693\n", + "score: 0.790982306\n", "\n", - "document: ---\\n\\n* [Table of Contents](#)\\n* + [Overview](#overview)\\n\t+ [Pods, pod types, and pod sizes](#pods-pod-types-and-pod-sizes)\\n\t\t- [Starter plan](#starter-plan)\\n\t\t- [s1 pods](#s1-pods)\\n\t\t- [p1 pods](#p1-pods)\\n\t\t- [p2 pods](#p2-pods)\\n\t\t- [Pod size and performance](#pod-size-and-performance)\\n\t\t- [Distance metrics](#distance-metrics)\n", - "source: https://docs.pinecone.io/docs/indexes\n", - "score: 0.786965072\n", + "document: ```\\n\\nPinecone supports sparse vector values of sizes up to 1000 non-zero values and over 4 billion dimensions.\\n\\n\\nAssuming a dense vector component with 768 dimensions, Pinecone supports roughly 2.8M sparse vectors per `s1` pod or 900k per `p1` pod.\n", + "source: https://docs.pinecone.io/docs/sparse-dense-vectors\n", + "score: 0.789729655\n", "\n" ] } @@ -507,19 +510,23 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 42, "metadata": {}, "outputs": [ { - "ename": "RuntimeError", - "evalue": "KnowledgeBase is not connected to index canopy--my-index, Please call knowledge_base.connect(). ", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[25], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mcanopy\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mmodels\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mdata_models\u001b[39;00m \u001b[39mimport\u001b[39;00m Query\n\u001b[0;32m----> 2\u001b[0m results \u001b[39m=\u001b[39m kb\u001b[39m.\u001b[39;49mquery([Query(text\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mp1 pod capacity\u001b[39;49m\u001b[39m\"\u001b[39;49m,\n\u001b[1;32m 3\u001b[0m metadata_filter\u001b[39m=\u001b[39;49m{\u001b[39m\"\u001b[39;49m\u001b[39mtitle\u001b[39;49m\u001b[39m\"\u001b[39;49m: \u001b[39m\"\u001b[39;49m\u001b[39mlimits\u001b[39;49m\u001b[39m\"\u001b[39;49m},\n\u001b[1;32m 4\u001b[0m top_k\u001b[39m=\u001b[39;49m\u001b[39m2\u001b[39;49m)])\n\u001b[1;32m 6\u001b[0m print_query_results(results)\n", - "File \u001b[0;32m~/canopy-quick/lib/python3.10/site-packages/canopy/knowledge_base/knowledge_base.py:432\u001b[0m, in \u001b[0;36mKnowledgeBase.query\u001b[0;34m(self, queries, global_metadata_filter)\u001b[0m\n\u001b[1;32m 402\u001b[0m \u001b[39m\u001b[39m\u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m 403\u001b[0m \u001b[39mQuery the knowledge base to retrieve document chunks.\u001b[39;00m\n\u001b[1;32m 404\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 429\u001b[0m \u001b[39m >>> results = kb.query(queries)\u001b[39;00m\n\u001b[1;32m 430\u001b[0m \u001b[39m\"\"\"\u001b[39;00m \u001b[39m# noqa: E501\u001b[39;00m\n\u001b[1;32m 431\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_index \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m--> 432\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_connection_error_msg)\n\u001b[1;32m 434\u001b[0m queries \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_encoder\u001b[39m.\u001b[39mencode_queries(queries)\n\u001b[1;32m 435\u001b[0m results \u001b[39m=\u001b[39m [\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_query_index(q, global_metadata_filter) \u001b[39mfor\u001b[39;00m q \u001b[39min\u001b[39;00m queries]\n", - "\u001b[0;31mRuntimeError\u001b[0m: KnowledgeBase is not connected to index canopy--my-index, Please call knowledge_base.connect(). " + "name": "stdout", + "output_type": "stream", + "text": [ + "query: p1 pod capacity\n", + "\n", + "document: ## Pod storage capacity\\n\\n\\nEach **p1** pod has enough capacity for 1M vectors with 768 dimensions.\\n\\n\\nEach **s1** pod has enough capacity for 5M vectors with 768 dimensions.\\n\\n\\n## Metadata\\n\\n\\nMax metadata size per vector is 40 KB.\\n\\n\\nNull metadata values are not supported. Instead of setting a key to hold a null value, we recommend you remove that key from the metadata payload.\\n\\n\\nMetadata with high cardinality, such as a unique value for every vector in a large index, uses more memory than expected and can cause the pods to become full.\n", + "source: https://docs.pinecone.io/docs/limits\n", + "score: 0.842372298\n", + "\n", + "document: ## Retention\\n\\n\\nIn general, indexes on the Starter (free) plan are archived as collections and deleted after 7 days of inactivity; for indexes created by certain open source projects such as AutoGPT, indexes are archived and deleted after 1 day of inactivity. To prevent this, you can send any API request to Pinecone and the counter will reset.\\n\\nUpdated about 1 month ago \\n\\n\\n\\n---\\n\\n* [Table of Contents](#)\\n* + [Upserts](#upserts)\\n\t+ [Queries](#queries)\\n\t+ [Fetch and Delete](#fetch-and-delete)\\n\t+ [Namespaces](#namespaces)\\n\t+ [Pod storage capacity](#pod-storage-capacity)\\n\t+ [Metadata](#metadata)\\n\t+ [Retention](#retention)\n", + "source: https://docs.pinecone.io/docs/limits\n", + "score: 0.717811227\n", + "\n" ] } ], @@ -555,7 +562,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 43, "metadata": {}, "outputs": [], "source": [ @@ -565,7 +572,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 44, "metadata": {}, "outputs": [ { @@ -576,17 +583,21 @@ " \"query\": \"What is the capacity of p1 pods?\",\n", " \"snippets\": [\n", " {\n", + " \"source\": \"https://docs.pinecone.io/docs/limits\",\n", + " \"text\": \"## Pod storage capacity\\n\\n\\nEach **p1** pod has enough capacity for 1M vectors with 768 dimensions.\\n\\n\\nEach **s1** pod has enough capacity for 5M vectors with 768 dimensions.\\n\\n\\n## Metadata\\n\\n\\nMax metadata size per vector is 40 KB.\\n\\n\\nNull metadata values are not supported. Instead of setting a key to hold a null value, we recommend you remove that key from the metadata payload.\\n\\n\\nMetadata with high cardinality, such as a unique value for every vector in a large index, uses more memory than expected and can cause the pods to become full.\"\n", + " },\n", + " {\n", " \"source\": \"https://docs.pinecone.io/docs/indexes\",\n", " \"text\": \"### s1 pods\\n\\n\\nThese storage-optimized pods provide large storage capacity and lower overall costs with slightly higher query latencies than p1 pods. They are ideal for very large indexes with moderate or relaxed latency requirements.\\n\\n\\nEach s1 pod has enough capacity for around 5M vectors of 768 dimensions.\\n\\n\\n### p1 pods\\n\\n\\nThese performance-optimized pods provide very low query latencies, but hold fewer vectors per pod than s1 pods. They are ideal for applications with low latency requirements (<100ms).\\n\\n\\nEach p1 pod has enough capacity for around 1M vectors of 768 dimensions.\"\n", " },\n", " {\n", - " \"source\": \"https://docs.pinecone.io/docs/indexes\",\n", - " \"text\": \"### p2 pods\\n\\n\\nThe p2 pod type provides greater query throughput with lower latency. For vectors with fewer than 128 dimension and queries where `topK` is less than 50, p2 pods support up to 200 QPS per replica and return queries in less than 10ms. This means that query throughput and latency are better than s1 and p1.\\n\\n\\nEach p2 pod has enough capacity for around 1M vectors of 768 dimensions. However, capacity may vary with dimensionality.\\n\\n\\nThe data ingestion rate for p2 pods is significantly slower than for p1 pods; this rate decreases as the number of dimensions increases. For example, a p2 pod containing vectors with 128 dimensions can upsert up to 300 updates per second; a p2 pod containing vectors with 768 dimensions or more supports upsert of 50 updates per second. Because query latency and throughput for p2 pods vary from p1 pods, test p2 pod performance with your dataset.\\n\\n\\nThe p2 pod type does not support sparse vector values.\"\n", + " \"source\": \"https://docs.pinecone.io/docs/choosing-index-type-and-size\",\n", + " \"text\": \"## Number of vectors\\n\\n\\nThe most important consideration in sizing is the [number of vectors](/docs/insert-data/) you plan on working with. As a rule of thumb, a single p1 pod can store approximately 1M vectors, while a s1 pod can store 5M vectors. However, this can be affected by other factors, such as dimensionality and metadata, which are explained below.\"\n", " }\n", " ]\n", "}\n", "\n", - "# tokens in context returned: 415\n" + "# tokens in context returned: 435\n" ] } ], @@ -621,7 +632,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 45, "metadata": {}, "outputs": [], "source": [ @@ -631,14 +642,14 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 46, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Each p1 pod has enough capacity for around 1 million vectors of 768 dimensions. [Source: Official Pinecone Documentation](https://docs.pinecone.io/docs/limits)\n" + "Each p1 pod has enough capacity for around 1M vectors of 768 dimensions. [Source: https://docs.pinecone.io/docs/limits]\n" ] } ], @@ -680,7 +691,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 47, "metadata": {}, "outputs": [], "source": [ @@ -706,7 +717,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 48, "metadata": {}, "outputs": [ { @@ -716,7 +727,7 @@ " KBDocChunk(id='id1_1', text='This is the second line', source='example', metadata={'title': 'newline'}, document_id='id1')]" ] }, - "execution_count": 20, + "execution_count": 48, "metadata": {}, "output_type": "execute_result" } @@ -741,7 +752,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 49, "metadata": {}, "outputs": [], "source": [ @@ -760,7 +771,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 50, "metadata": {}, "outputs": [], "source": [ @@ -769,7 +780,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 51, "metadata": {}, "outputs": [ { @@ -806,7 +817,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 52, "metadata": {}, "outputs": [], "source": [ From 303d0340652c9970694598cdd47480b966a1379b Mon Sep 17 00:00:00 2001 From: Amnon Catav Date: Mon, 30 Oct 2023 13:16:43 +0200 Subject: [PATCH 04/21] Update examples/canopy-lib-quickstart.ipynb Co-authored-by: igiloh-pinecone <118673156+igiloh-pinecone@users.noreply.github.com> --- examples/canopy-lib-quickstart.ipynb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/canopy-lib-quickstart.ipynb b/examples/canopy-lib-quickstart.ipynb index 6caa2eb4..f56076b9 100644 --- a/examples/canopy-lib-quickstart.ipynb +++ b/examples/canopy-lib-quickstart.ipynb @@ -269,7 +269,8 @@ "source": [ "## Creating a KnowledgBase to store our data for search\n", "\n", - "`KnowledgeBase` is an object that is responsible for storing and query data. It holds a connection to a single Pinecone index and provides a simple API to insert, delete and search textual documents.\n", + "The `KnowledgeBase` object is responsible for storing and indexing textual documents. Once documents were indexed, the `KnowledgeBase` can be queried with a new unseen text passage, for which the most relevant document chunks are retrieved. +The `KnowledgeBase` holds a connection to a Pinecone index and provides a simple API to insert, delete and search textual documents.\n", "\n", "During an upsert, the KnowledgeBase divides the text into smaller chunks, transforms them into vector embeddings, and then upsert these vectors in the underlying Pinecone index. When querying, it converts the textual input into a vector and excute the queries against the underlying index to retrieve the top-k most closely matched chunks.\n", "\n", From b161a7cfcf812074ed52ef9132e5fa5224e11a5c Mon Sep 17 00:00:00 2001 From: Amnon Catav Date: Mon, 30 Oct 2023 13:17:14 +0200 Subject: [PATCH 05/21] Update examples/canopy-lib-quickstart.ipynb Co-authored-by: igiloh-pinecone <118673156+igiloh-pinecone@users.noreply.github.com> --- examples/canopy-lib-quickstart.ipynb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/canopy-lib-quickstart.ipynb b/examples/canopy-lib-quickstart.ipynb index f56076b9..59a7b08b 100644 --- a/examples/canopy-lib-quickstart.ipynb +++ b/examples/canopy-lib-quickstart.ipynb @@ -272,7 +272,9 @@ "The `KnowledgeBase` object is responsible for storing and indexing textual documents. Once documents were indexed, the `KnowledgeBase` can be queried with a new unseen text passage, for which the most relevant document chunks are retrieved. The `KnowledgeBase` holds a connection to a Pinecone index and provides a simple API to insert, delete and search textual documents.\n", "\n", - "During an upsert, the KnowledgeBase divides the text into smaller chunks, transforms them into vector embeddings, and then upsert these vectors in the underlying Pinecone index. When querying, it converts the textual input into a vector and excute the queries against the underlying index to retrieve the top-k most closely matched chunks.\n", + "The `KnoweldgeBase`'s `upsert()` operation is used to index new documents, or update already stored documents. + The `upsert` process splits each document's text into smaller chunks, transforms these chunks to vector embeddings, then upserts those vectors to the underlying Pinecone index. + At Query time, the `KnowledgeBase` transforms the textual query text to a vector in a similar manner, then queries the underlying Pinecone index to retrieve the top-k most closely matched document chunks.\n", "\n", "Here we create a `KnowledgeBase` with our desired index name: " ] From 21a4e3d158b653f83fd5f117981aa59ca6a3f3cf Mon Sep 17 00:00:00 2001 From: Amnon Catav Date: Mon, 30 Oct 2023 13:17:45 +0200 Subject: [PATCH 06/21] Update examples/canopy-lib-quickstart.ipynb Co-authored-by: igiloh-pinecone <118673156+igiloh-pinecone@users.noreply.github.com> --- examples/canopy-lib-quickstart.ipynb | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/examples/canopy-lib-quickstart.ipynb b/examples/canopy-lib-quickstart.ipynb index 59a7b08b..4a63dbb7 100644 --- a/examples/canopy-lib-quickstart.ipynb +++ b/examples/canopy-lib-quickstart.ipynb @@ -297,7 +297,10 @@ The `KnowledgeBase` holds a connection to a Pinecone index and provides a simple "cell_type": "markdown", "metadata": {}, "source": [ - "Now we need to create a new index in Pinecone, if it's not already exist:" + "In the first one-time setup of a new Canopy service, an underlying Pinecone index needs to be created. + If you have created a Canopy-enabled Pinecone index before - you can skip this step. + + Note: Since Canopy uses a dedicated data schema, it is not recommended to use a pre-existing Pinecone index that wasn't created by Canopy's `create_canopy_index()` method." ] }, { From 64e65e9f177e51f2bebb8b5383723bea056a998d Mon Sep 17 00:00:00 2001 From: Amnon Catav Date: Mon, 30 Oct 2023 13:18:27 +0200 Subject: [PATCH 07/21] Update examples/canopy-lib-quickstart.ipynb Co-authored-by: igiloh-pinecone <118673156+igiloh-pinecone@users.noreply.github.com> --- examples/canopy-lib-quickstart.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/canopy-lib-quickstart.ipynb b/examples/canopy-lib-quickstart.ipynb index 4a63dbb7..a5f8176e 100644 --- a/examples/canopy-lib-quickstart.ipynb +++ b/examples/canopy-lib-quickstart.ipynb @@ -633,7 +633,7 @@ The `KnowledgeBase` holds a connection to a Pinecone index and provides a simple "\n", "Now we are ready to start chatting with our data!\n", "\n", - "Canopy `ChatEngine` supports OpenAI compatible API, only that behind the scenes it uses the context egine to provide knowledgeable answers to the users questions." + "Canopy's `ChatEngine` is a one-stop-shop RAG-infused Chatbot. The `ChatEngine` wraps an underlying LLM such as OpenAI's ChatGPT, enhancing it by providing relevant context from the user's knowledge base." ] }, { From fb563269a052905499a00e3f4feec2ceac5f88e3 Mon Sep 17 00:00:00 2001 From: Amnon Catav Date: Mon, 30 Oct 2023 13:23:11 +0200 Subject: [PATCH 08/21] Update examples/canopy-lib-quickstart.ipynb Co-authored-by: igiloh-pinecone <118673156+igiloh-pinecone@users.noreply.github.com> --- examples/canopy-lib-quickstart.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/canopy-lib-quickstart.ipynb b/examples/canopy-lib-quickstart.ipynb index a5f8176e..4c7f718c 100644 --- a/examples/canopy-lib-quickstart.ipynb +++ b/examples/canopy-lib-quickstart.ipynb @@ -672,7 +672,7 @@ The `KnowledgeBase` holds a connection to a Pinecone index and provides a simple "cell_type": "markdown", "metadata": {}, "source": [ - "> 💡 Note: as opposed to OpenAI API, Canopy by default truncate the chat history to recent messages to avoid excceding the prompt tokens limit. This behaviour can change see chat engine [documentation](https://github.com/pinecone-io/canopy/blob/main/src/canopy/chat_engine/chat_engine.py)" + "> 💡 Note: Canopy calls the underlying LLM, providing both the user-provided chat history and a generated `Context` prompt. This might surpass the `ChatEngine`'s configured `max_prompt_tokens`. By default, the `ChatEngine` would truncate the older most messages in the chat history avoid exceeding this limit. This behavior in configurable, as explained in the [documentation](https://github.com/pinecone-io/canopy/blob/main/src/canopy/chat_engine/chat_engine.py)" ] }, { From 018f71ffd070236c92b8cf6a58835a49ae34fd23 Mon Sep 17 00:00:00 2001 From: Amnon Catav Date: Mon, 30 Oct 2023 13:30:56 +0200 Subject: [PATCH 09/21] use chat with history as example --- examples/canopy-lib-quickstart.ipynb | 64 +++++++++++++++++++++++----- 1 file changed, 54 insertions(+), 10 deletions(-) diff --git a/examples/canopy-lib-quickstart.ipynb b/examples/canopy-lib-quickstart.ipynb index 4c7f718c..93865c20 100644 --- a/examples/canopy-lib-quickstart.ipynb +++ b/examples/canopy-lib-quickstart.ipynb @@ -648,23 +648,67 @@ The `KnowledgeBase` holds a connection to a Pinecone index and provides a simple }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 63, + "metadata": {}, + "outputs": [], + "source": [ + "from typing import Tuple\n", + "from canopy.models.data_models import Messages, UserMessage, AssistantMessage\n", + "\n", + "def chat(new_message: str, history: Messages) -> Tuple[str, Messages]:\n", + " messages = history + [UserMessage(content=new_message)]\n", + " response = chat_engine.chat(messages)\n", + " assistant_response = response.choices[0].message.content\n", + " return assistant_response, messages + [AssistantMessage(content=assistant_response)]" + ] + }, + { + "cell_type": "code", + "execution_count": 65, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Each p1 pod has enough capacity for around 1M vectors of 768 dimensions. [Source: https://docs.pinecone.io/docs/limits]\n" - ] + "data": { + "text/markdown": [ + "Each p1 pod has enough capacity for around 1M vectors of 768 dimensions. Source: https://docs.pinecone.io/docs/indexes and https://docs.pinecone.io/docs/limits" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" } ], "source": [ - "from canopy.models.data_models import MessageBase\n", + "from IPython.display import display, Markdown\n", "\n", - "response = chat_engine.chat(messages=[MessageBase(role=\"user\", content=\"What is the capacity of p1 pods?\")], stream=False)\n", - "\n", - "print(response.choices[0].message.content)" + "history = []\n", + "response, history = chat(\"What is the capacity of p1 pods?\", history)\n", + "display(Markdown(response))" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "P1 pods are ideal for applications with low latency requirements (<100ms). They are performance-optimized pods that provide very low query latencies. Source: https://docs.pinecone.io/docs/indexes" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "response, history = chat(\"And for what latency requirements does it fit?\", history)\n", + "display(Markdown(response))" ] }, { From a6cb70c7ff2a2d614d6a89456d4c515b1cd2bca3 Mon Sep 17 00:00:00 2001 From: Amnon Catav Date: Mon, 30 Oct 2023 13:58:53 +0200 Subject: [PATCH 10/21] update pip install --- examples/canopy-lib-quickstart.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/canopy-lib-quickstart.ipynb b/examples/canopy-lib-quickstart.ipynb index 93865c20..0701c88b 100644 --- a/examples/canopy-lib-quickstart.ipynb +++ b/examples/canopy-lib-quickstart.ipynb @@ -38,7 +38,7 @@ } ], "source": [ - "!pip install -qU git+ssh://git@github.com/pinecone-io/canopy.git@dev" + "!pip install -qU pinecone-canopy" ] }, { From 81c4234516a48f4ee422429a907efe6742eec890 Mon Sep 17 00:00:00 2001 From: Amnon Catav Date: Mon, 30 Oct 2023 14:01:43 +0200 Subject: [PATCH 11/21] link notebook from lib readme --- docs/library.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/library.md b/docs/library.md index 3f1ca61d..d6d57b91 100644 --- a/docs/library.md +++ b/docs/library.md @@ -4,6 +4,8 @@ For most common use cases, users can simply deploy the fully-configurable [Canop For advanced users, this page describes how to use `canopy` core library directly to implement their own custom applications. +> **_💡 NOTE:_** You can also follow the quickstart Jupyter [notebook](../examples/canopy-lib-quickstart.ipynb) + The idea behind Canopy library is to provide a framework to build AI applications on top of Pinecone as a long memory storage for you own data. Canopy library designed with the following principles in mind: - **Easy to use**: Canopy is designed to be easy to use. It is well packaged and can be installed with a single command. From 1e477c2b97c92e5711e7d3e1d7e7642dd883799e Mon Sep 17 00:00:00 2001 From: Amnon Catav Date: Tue, 31 Oct 2023 10:51:08 +0200 Subject: [PATCH 12/21] fix json and remove outputs from git --- examples/canopy-lib-quickstart.ipynb | 371 ++++----------------------- 1 file changed, 57 insertions(+), 314 deletions(-) diff --git a/examples/canopy-lib-quickstart.ipynb b/examples/canopy-lib-quickstart.ipynb index 0701c88b..b6bd4ee4 100644 --- a/examples/canopy-lib-quickstart.ipynb +++ b/examples/canopy-lib-quickstart.ipynb @@ -24,19 +24,9 @@ }, { "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "!pip install -qU pinecone-canopy" ] @@ -56,7 +46,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -87,111 +77,9 @@ }, { "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idtextsourcemetadata
0728aeea1-1dcf-5d0a-91f2-ecccd4dd4272# Scale indexes\\n\\n[Suggest Edits](/edit/scali...https://docs.pinecone.io/docs/scaling-indexes{'created_at': '2023_10_25', 'title': 'scaling...
12f19f269-171f-5556-93f3-a2d7eabbe50f# Understanding organizations\\n\\n[Suggest Edit...https://docs.pinecone.io/docs/organizations{'created_at': '2023_10_25', 'title': 'organiz...
2b2a71cb3-5148-5090-86d5-7f4156edd7cf# Manage datasets\\n\\n[Suggest Edits](/edit/dat...https://docs.pinecone.io/docs/datasets{'created_at': '2023_10_25', 'title': 'datasets'}
31dafe68a-2e78-57f7-a97a-93e043462196# Architecture\\n\\n[Suggest Edits](/edit/archit...https://docs.pinecone.io/docs/architecture{'created_at': '2023_10_25', 'title': 'archite...
48b07b24d-4ec2-58a1-ac91-c8e6267b9ffd# Moving to production\\n\\n[Suggest Edits](/edi...https://docs.pinecone.io/docs/moving-to-produc...{'created_at': '2023_10_25', 'title': 'moving-...
\n", - "
" - ], - "text/plain": [ - " id \\\n", - "0 728aeea1-1dcf-5d0a-91f2-ecccd4dd4272 \n", - "1 2f19f269-171f-5556-93f3-a2d7eabbe50f \n", - "2 b2a71cb3-5148-5090-86d5-7f4156edd7cf \n", - "3 1dafe68a-2e78-57f7-a97a-93e043462196 \n", - "4 8b07b24d-4ec2-58a1-ac91-c8e6267b9ffd \n", - "\n", - " text \\\n", - "0 # Scale indexes\\n\\n[Suggest Edits](/edit/scali... \n", - "1 # Understanding organizations\\n\\n[Suggest Edit... \n", - "2 # Manage datasets\\n\\n[Suggest Edits](/edit/dat... \n", - "3 # Architecture\\n\\n[Suggest Edits](/edit/archit... \n", - "4 # Moving to production\\n\\n[Suggest Edits](/edi... \n", - "\n", - " source \\\n", - "0 https://docs.pinecone.io/docs/scaling-indexes \n", - "1 https://docs.pinecone.io/docs/organizations \n", - "2 https://docs.pinecone.io/docs/datasets \n", - "3 https://docs.pinecone.io/docs/architecture \n", - "4 https://docs.pinecone.io/docs/moving-to-produc... \n", - "\n", - " metadata \n", - "0 {'created_at': '2023_10_25', 'title': 'scaling... \n", - "1 {'created_at': '2023_10_25', 'title': 'organiz... \n", - "2 {'created_at': '2023_10_25', 'title': 'datasets'} \n", - "3 {'created_at': '2023_10_25', 'title': 'archite... \n", - "4 {'created_at': '2023_10_25', 'title': 'moving-... " - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "import pandas as pd\n", "import warnings\n", @@ -222,7 +110,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -240,20 +128,9 @@ }, { "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['Hello', ' world', '!']" - ] - }, - "execution_count": 33, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "from canopy.tokenizer import Tokenizer\n", "\n", @@ -269,19 +146,20 @@ "source": [ "## Creating a KnowledgBase to store our data for search\n", "\n", - "The `KnowledgeBase` object is responsible for storing and indexing textual documents. Once documents were indexed, the `KnowledgeBase` can be queried with a new unseen text passage, for which the most relevant document chunks are retrieved. -The `KnowledgeBase` holds a connection to a Pinecone index and provides a simple API to insert, delete and search textual documents.\n", + "The `KnowledgeBase` object is responsible for storing and indexing textual documents.\n", + "\n", + "Once documents were indexed, the `KnowledgeBase` can be queried with a new unseen text passage, for which the most relevant document chunks are retrieved.\n", "\n", - "The `KnoweldgeBase`'s `upsert()` operation is used to index new documents, or update already stored documents. - The `upsert` process splits each document's text into smaller chunks, transforms these chunks to vector embeddings, then upserts those vectors to the underlying Pinecone index. - At Query time, the `KnowledgeBase` transforms the textual query text to a vector in a similar manner, then queries the underlying Pinecone index to retrieve the top-k most closely matched document chunks.\n", + "The `KnowledgeBase` holds a connection to a Pinecone index and provides a simple API to insert, delete and search textual documents.\n", + "\n", + "The `KnoweldgeBase`'s `upsert()` operation is used to index new documents, or update already stored documents. The `upsert` process splits each document's text into smaller chunks, transforms these chunks to vector embeddings, then upserts those vectors to the underlying Pinecone index. At Query time, the `KnowledgeBase` transforms the textual query text to a vector in a similar manner, then queries the underlying Pinecone index to retrieve the top-k most closely matched document chunks.\n", "\n", "Here we create a `KnowledgeBase` with our desired index name: " ] }, { "cell_type": "code", - "execution_count": 34, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -297,15 +175,14 @@ The `KnowledgeBase` holds a connection to a Pinecone index and provides a simple "cell_type": "markdown", "metadata": {}, "source": [ - "In the first one-time setup of a new Canopy service, an underlying Pinecone index needs to be created. - If you have created a Canopy-enabled Pinecone index before - you can skip this step. - - Note: Since Canopy uses a dedicated data schema, it is not recommended to use a pre-existing Pinecone index that wasn't created by Canopy's `create_canopy_index()` method." + "In the first one-time setup of a new Canopy service, an underlying Pinecone index needs to be created. If you have created a Canopy-enabled Pinecone index before - you can skip this step.\n", + "\n", + "Note: Since Canopy uses a dedicated data schema, it is not recommended to use a pre-existing Pinecone index that wasn't created by Canopy's `create_canopy_index()` method." ] }, { "cell_type": "code", - "execution_count": 53, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -327,7 +204,7 @@ The `KnowledgeBase` holds a connection to a Pinecone index and provides a simple }, { "cell_type": "code", - "execution_count": 36, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -363,7 +240,7 @@ The `KnowledgeBase` holds a connection to a Pinecone index and provides a simple }, { "cell_type": "code", - "execution_count": 37, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -388,7 +265,7 @@ The `KnowledgeBase` holds a connection to a Pinecone index and provides a simple }, { "cell_type": "code", - "execution_count": 38, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -405,24 +282,9 @@ The `KnowledgeBase` holds a connection to a Pinecone index and provides a simple }, { "cell_type": "code", - "execution_count": 39, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "63cd9039008a49d28f96f0bffa16bcc7", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/6 [00:00" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "from IPython.display import display, Markdown\n", "\n", @@ -690,22 +467,9 @@ The `KnowledgeBase` holds a connection to a Pinecone index and provides a simple }, { "cell_type": "code", - "execution_count": 67, - "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "P1 pods are ideal for applications with low latency requirements (<100ms). They are performance-optimized pods that provide very low query latencies. Source: https://docs.pinecone.io/docs/indexes" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "response, history = chat(\"And for what latency requirements does it fit?\", history)\n", "display(Markdown(response))" @@ -741,7 +505,7 @@ The `KnowledgeBase` holds a connection to a Pinecone index and provides a simple }, { "cell_type": "code", - "execution_count": 47, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -767,21 +531,9 @@ The `KnowledgeBase` holds a connection to a Pinecone index and provides a simple }, { "cell_type": "code", - "execution_count": 48, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[KBDocChunk(id='id1_0', text='This is first line', source='example', metadata={'title': 'newline'}, document_id='id1'),\n", - " KBDocChunk(id='id1_1', text='This is the second line', source='example', metadata={'title': 'newline'}, document_id='id1')]" - ] - }, - "execution_count": 48, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "chunker = NewLineChunker()\n", "\n", @@ -802,7 +554,7 @@ The `KnowledgeBase` holds a connection to a Pinecone index and provides a simple }, { "cell_type": "code", - "execution_count": 49, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -821,7 +573,7 @@ The `KnowledgeBase` holds a connection to a Pinecone index and provides a simple }, { "cell_type": "code", - "execution_count": 50, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -830,18 +582,9 @@ The `KnowledgeBase` holds a connection to a Pinecone index and provides a simple }, { "cell_type": "code", - "execution_count": 51, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "query: second line\n", - "\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "results = kb.query([Query(text=\"second line\",\n", " metadata_filter={\"title\": \"newline\"})])\n", @@ -867,7 +610,7 @@ The `KnowledgeBase` holds a connection to a Pinecone index and provides a simple }, { "cell_type": "code", - "execution_count": 52, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ From b0bf6158a62af042c2e69a6c98e230001ef9e165 Mon Sep 17 00:00:00 2001 From: Amnon Catav Date: Tue, 31 Oct 2023 14:03:31 +0200 Subject: [PATCH 13/21] edit markdown cells --- examples/canopy-lib-quickstart.ipynb | 34 ++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/examples/canopy-lib-quickstart.ipynb b/examples/canopy-lib-quickstart.ipynb index b6bd4ee4..404065d7 100644 --- a/examples/canopy-lib-quickstart.ipynb +++ b/examples/canopy-lib-quickstart.ipynb @@ -57,6 +57,27 @@ "os.environ[\"OPENAI_API_KEY\"] = os.environ.get('OPENAI_API_KEY') or 'OPENAI_API_KEY'" ] }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We don't have to do the following step since openai loads the environment variable on import.\n", + "\n", + "When working with Jupyter notebook we'll have to restart the kernel for any mistake in this variable so it's safer to explicitly set the api key." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "import openai\n", + "\n", + "openai.api_key = os.environ[\"OPENAI_API_KEY\"]" + ] + }, { "attachments": {}, "cell_type": "markdown", @@ -376,10 +397,9 @@ "source": [ "## Query the Context Engine\n", "\n", - "While the `KnowledgeBase` is in charge of excuting a textual queries against the Pinecone index, `ContextEngine` is a higher level component that holds the KnowledgeBase, but have a slightly different API:\n", + "`ContextEngine` is an object that responsible to retrieve the most relevant context for a given query and token budget. \n", "\n", - "1. The context engine can get user questions in natural langague. It then generate a search queries out of it. For example, given the question *\"What is the capacity of p1 pods?\"*, the ContextEngine would first convert it into the search query *\"p1 pod capacity\"* and then run it against the KnowledgeBase.\n", - "2. The `query` method of context engine support a `max_context_tokens` that can limit the number of tokens used in its results. This capabillity allows the user to better handle tokens budgest and limit in the prompts sending later to the LLM." + "While `KnowledgeBase` retreivs the full `top-k` structred documens for each query including all the metadata related to them, context engine in charge of transforming this information to a \"prompt ready\" context that can later feeded to an LLM. To achieve this the context engine holds a `ContextBuilder` object that takes query results from the knowledge base and returns a `Context` object. The context builder also considers the `max_context_tokens` budget given to it and build the most relevant context that not exceeds the token budget." ] }, { @@ -400,7 +420,7 @@ "source": [ "import json\n", "\n", - "result = context_engine.query([Query(text=\"What is the capacity of p1 pods?\", top_k=5)], max_context_tokens=512)\n", + "result = context_engine.query([Query(text=\"capacity of p1 pods\", top_k=5)], max_context_tokens=512)\n", "\n", "print(result.to_text(indent=2))\n", "print(f\"\\n# tokens in context returned: {result.num_tokens}\")" @@ -411,7 +431,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "As you can see above, we queried the context engine with a question in natural language. Also, even though we set `top_k=5`, context engine retreived only 3 results in order to satisfy the 512 tokens limit" + "As you can see above, although we set `top_k=5`, context engine retreived only 3 results in order to satisfy the 512 tokens limit. Also, the documents in the context contain only the text and source and not all the metadata that is not necessarily needed by the LLM. " ] }, { @@ -423,7 +443,7 @@ "\n", "Now we are ready to start chatting with our data!\n", "\n", - "Canopy's `ChatEngine` is a one-stop-shop RAG-infused Chatbot. The `ChatEngine` wraps an underlying LLM such as OpenAI's ChatGPT, enhancing it by providing relevant context from the user's knowledge base." + "Canopy's `ChatEngine` is a one-stop-shop RAG-infused Chatbot. The `ChatEngine` wraps an underlying LLM such as OpenAI's ChatGPT, enhancing it by providing relevant context from the user's knowledge base. It also automatically phrases search queries out of the chat history and send them to the knowledge base." ] }, { @@ -634,7 +654,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.8" + "version": "3.10.8 (v3.10.8:aaaf517424, Oct 11 2022, 10:14:40) [Clang 13.0.0 (clang-1300.0.29.30)]" }, "orig_nbformat": 4, "vscode": { From 26c91c913e7c8f7413e2a136fa91801a648c93c4 Mon Sep 17 00:00:00 2001 From: Amnon Catav Date: Tue, 31 Oct 2023 17:32:34 +0200 Subject: [PATCH 14/21] correcting typos --- examples/canopy-lib-quickstart.ipynb | 38 ++++++++++++++-------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/examples/canopy-lib-quickstart.ipynb b/examples/canopy-lib-quickstart.ipynb index 404065d7..f02cddda 100644 --- a/examples/canopy-lib-quickstart.ipynb +++ b/examples/canopy-lib-quickstart.ipynb @@ -7,9 +7,9 @@ "source": [ "# Pinecone Canopy library quick start notebook\n", "\n", - "**Canopy** is a Sofware Development Kit (SDK) for AI applications. Canopy allows you to test, build and package Retrieval Augmented Applications with Pinecone Vector Database. \n", + "**Canopy** is a Software Development Kit (SDK) for AI applications. Canopy allows you to test, build and package Retrieval Augmented Applications with Pinecone Vector Database. \n", "\n", - "This notebook introduce the quick start steps for working with Canopy library. You can find more details about this project and advanced use in the project [documentaion](../README.md).\n" + "This notebook introduces the quick start steps for working with Canopy library. You can find more details about this project and advanced use in the project [documentation](../README.md).\n" ] }, { @@ -64,7 +64,7 @@ "source": [ "We don't have to do the following step since openai loads the environment variable on import.\n", "\n", - "When working with Jupyter notebook we'll have to restart the kernel for any mistake in this variable so it's safer to explicitly set the api key." + "When working with a Jupyter notebook we'll have to restart the kernel for any mistake in this variable so it's safer to explicitly set the api key." ] }, { @@ -91,9 +91,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Now we'll load a crawl of from 25/10/23 of pinecone docs [website](https://docs.pinecone.io/docs/).\n", + "Now we'll load a crawl from 25/10/23 of pinecone docs [website](https://docs.pinecone.io/docs/).\n", "\n", - "We will use this data to demonstrate how to build a RAG pipepline to answer questions about Pinecone DB." + "We will use this data to demonstrate how to build a RAG pipeline to answer questions about Pinecone DB." ] }, { @@ -115,7 +115,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Each record in this dataset represents a single page in Pinecone's documentation. Each row contatins a unique id, the raw text of the page in markdown language, the url of the page as \"source\" and some metadata. " + "Each record in this dataset represents a single page in Pinecone's documentation. Each row contains a unique id, the raw text of the page in markdown language, the url of the page as \"source\" and some metadata. " ] }, { @@ -144,7 +144,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "After initilizing the global object, we can simply create an instance from anywhere in our code, without providing any parameters:" + "After initializing the global object, we can simply create an instance from anywhere in our code, without providing any parameters:" ] }, { @@ -169,11 +169,11 @@ "\n", "The `KnowledgeBase` object is responsible for storing and indexing textual documents.\n", "\n", - "Once documents were indexed, the `KnowledgeBase` can be queried with a new unseen text passage, for which the most relevant document chunks are retrieved.\n", + "Once documents are indexed, the `KnowledgeBase` can be queried with a new unseen text passage, for which the most relevant document chunks are retrieved.\n", "\n", "The `KnowledgeBase` holds a connection to a Pinecone index and provides a simple API to insert, delete and search textual documents.\n", "\n", - "The `KnoweldgeBase`'s `upsert()` operation is used to index new documents, or update already stored documents. The `upsert` process splits each document's text into smaller chunks, transforms these chunks to vector embeddings, then upserts those vectors to the underlying Pinecone index. At Query time, the `KnowledgeBase` transforms the textual query text to a vector in a similar manner, then queries the underlying Pinecone index to retrieve the top-k most closely matched document chunks.\n", + "The `KnowledgeBase`'s `upsert()` operation is used to index new documents, or update already stored documents. The `upsert` process splits each document's text into smaller chunks, transforms these chunks to vector embeddings, then upserts those vectors to the underlying Pinecone index. At Query time, the `KnowledgeBase` transforms the textual query text to a vector in a similar manner, then queries the underlying Pinecone index to retrieve the top-k most closely matched document chunks.\n", "\n", "Here we create a `KnowledgeBase` with our desired index name: " ] @@ -238,7 +238,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "> 💡 Note: a knowledge base must be connected to an index before excuting any operation. You should call `kb.connect()` to connect an existing index or call `kb.create_canopy_index(INDEX_NANE)` before calling any other method of the KB " + "> 💡 Note: a knowledge base must be connected to an index before executing any operation. You should call `kb.connect()` to connect an existing index or call `kb.create_canopy_index(INDEX_NANE)` before calling any other method of the KB " ] }, { @@ -281,7 +281,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Luckily the columns in our dataset fits this scehma, so we can use a simple iteration to prepare our data:" + "Luckily the columns in our dataset fits this schema, so we can use a simple iteration to prepare our data:" ] }, { @@ -320,7 +320,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Internally, the KnowledgeBase handle for use all the processing needed to load data into Pinecone. It chunks the text to smaller pieces and encode them to vectors (embeddings) that can be then upserted directly to Pinecone. Later in this notebook we'll learn how to tune and costumize this process." + "Internally, the KnowledgeBase handle for use all the processing needed to load data into Pinecone. It chunks the text to smaller pieces and encode them to vectors (embeddings) that can be then upserted directly to Pinecone. Later in this notebook we'll learn how to tune and customize this process." ] }, { @@ -330,7 +330,7 @@ "source": [ "## Query the KnowledgeBase\n", "\n", - "Now we can query the knowledge base. The KnowledgeBase will use its default parameters like `top_k` to exectute the query:" + "Now we can query the knowledge base. The KnowledgeBase will use its default parameters like `top_k` to execute the query:" ] }, { @@ -399,7 +399,7 @@ "\n", "`ContextEngine` is an object that responsible to retrieve the most relevant context for a given query and token budget. \n", "\n", - "While `KnowledgeBase` retreivs the full `top-k` structred documens for each query including all the metadata related to them, context engine in charge of transforming this information to a \"prompt ready\" context that can later feeded to an LLM. To achieve this the context engine holds a `ContextBuilder` object that takes query results from the knowledge base and returns a `Context` object. The context builder also considers the `max_context_tokens` budget given to it and build the most relevant context that not exceeds the token budget." + "While `KnowledgeBase` retrieves the full `top-k` structured documents for each query including all the metadata related to them, the context engine in charge of transforming this information to a \"prompt ready\" context that can later feeded to an LLM. To achieve this the context engine holds a `ContextBuilder` object that takes query results from the knowledge base and returns a `Context` object. The context builder also considers the `max_context_tokens` budget given to it and builds the most relevant context that not exceeds the token budget." ] }, { @@ -500,7 +500,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "> 💡 Note: Canopy calls the underlying LLM, providing both the user-provided chat history and a generated `Context` prompt. This might surpass the `ChatEngine`'s configured `max_prompt_tokens`. By default, the `ChatEngine` would truncate the older most messages in the chat history avoid exceeding this limit. This behavior in configurable, as explained in the [documentation](https://github.com/pinecone-io/canopy/blob/main/src/canopy/chat_engine/chat_engine.py)" + "> 💡 Note: Canopy calls the underlying LLM, providing both the user-provided chat history and a generated `Context` prompt. This might surpass the `ChatEngine`'s configured `max_prompt_tokens`. By default, the `ChatEngine` would truncate the oldest messages in the chat history to avoid exceeding this limit. This behavior in configurable, as explained in the [documentation](https://github.com/pinecone-io/canopy/blob/main/src/canopy/chat_engine/chat_engine.py)" ] }, { @@ -508,9 +508,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Costumization Example\n", + "## Customization Example\n", "\n", - "Canopy built as a modular library, where each component can fully be costumized by the user.\n", + "Canopy built as a modular library, where each component can fully be customized by the user.\n", "\n", "Before we start, we would like to have a quick overview of the inner components used by the knowledge base:\n", "\n", @@ -518,7 +518,7 @@ "- **Chunker**: A `Chunker` object that is used to chunk the documents into smaller pieces of text.\n", "- **Encoder**: An `RecordEncoder` object that is used to encode the chunks and queries into vector representations.\n", "\n", - "In the following example, we show how you can costumize the `Chunker` component used by the knowledge base.\n", + "In the following example, we show how you can customize the `Chunker` component used by the knowledge base.\n", "\n", "First, we will create a dummy chunker class that simply chunks the text by new lines `\\n`." ] @@ -617,7 +617,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "As we can see above, our knowledge base split the document by new line as expected." + "As we can see above, our knowledge base split the document by a new line as expected." ] }, { From 20e734131b6e9ae09acbbd90341a9b267bf4551c Mon Sep 17 00:00:00 2001 From: Amnon Catav Date: Wed, 1 Nov 2023 09:31:32 +0200 Subject: [PATCH 15/21] add outputs --- examples/canopy-lib-quickstart.ipynb | 361 +++++++++++++++++++++++---- 1 file changed, 311 insertions(+), 50 deletions(-) diff --git a/examples/canopy-lib-quickstart.ipynb b/examples/canopy-lib-quickstart.ipynb index f02cddda..0a357f44 100644 --- a/examples/canopy-lib-quickstart.ipynb +++ b/examples/canopy-lib-quickstart.ipynb @@ -24,9 +24,19 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" + ] + } + ], "source": [ "!pip install -qU pinecone-canopy" ] @@ -46,7 +56,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 30, "metadata": {}, "outputs": [], "source": [ @@ -69,7 +79,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 31, "metadata": {}, "outputs": [], "source": [ @@ -98,9 +108,111 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idtextsourcemetadata
0728aeea1-1dcf-5d0a-91f2-ecccd4dd4272# Scale indexes\\n\\n[Suggest Edits](/edit/scali...https://docs.pinecone.io/docs/scaling-indexes{'created_at': '2023_10_25', 'title': 'scaling...
12f19f269-171f-5556-93f3-a2d7eabbe50f# Understanding organizations\\n\\n[Suggest Edit...https://docs.pinecone.io/docs/organizations{'created_at': '2023_10_25', 'title': 'organiz...
2b2a71cb3-5148-5090-86d5-7f4156edd7cf# Manage datasets\\n\\n[Suggest Edits](/edit/dat...https://docs.pinecone.io/docs/datasets{'created_at': '2023_10_25', 'title': 'datasets'}
31dafe68a-2e78-57f7-a97a-93e043462196# Architecture\\n\\n[Suggest Edits](/edit/archit...https://docs.pinecone.io/docs/architecture{'created_at': '2023_10_25', 'title': 'archite...
48b07b24d-4ec2-58a1-ac91-c8e6267b9ffd# Moving to production\\n\\n[Suggest Edits](/edi...https://docs.pinecone.io/docs/moving-to-produc...{'created_at': '2023_10_25', 'title': 'moving-...
\n", + "
" + ], + "text/plain": [ + " id \\\n", + "0 728aeea1-1dcf-5d0a-91f2-ecccd4dd4272 \n", + "1 2f19f269-171f-5556-93f3-a2d7eabbe50f \n", + "2 b2a71cb3-5148-5090-86d5-7f4156edd7cf \n", + "3 1dafe68a-2e78-57f7-a97a-93e043462196 \n", + "4 8b07b24d-4ec2-58a1-ac91-c8e6267b9ffd \n", + "\n", + " text \\\n", + "0 # Scale indexes\\n\\n[Suggest Edits](/edit/scali... \n", + "1 # Understanding organizations\\n\\n[Suggest Edit... \n", + "2 # Manage datasets\\n\\n[Suggest Edits](/edit/dat... \n", + "3 # Architecture\\n\\n[Suggest Edits](/edit/archit... \n", + "4 # Moving to production\\n\\n[Suggest Edits](/edi... \n", + "\n", + " source \\\n", + "0 https://docs.pinecone.io/docs/scaling-indexes \n", + "1 https://docs.pinecone.io/docs/organizations \n", + "2 https://docs.pinecone.io/docs/datasets \n", + "3 https://docs.pinecone.io/docs/architecture \n", + "4 https://docs.pinecone.io/docs/moving-to-produc... \n", + "\n", + " metadata \n", + "0 {'created_at': '2023_10_25', 'title': 'scaling... \n", + "1 {'created_at': '2023_10_25', 'title': 'organiz... \n", + "2 {'created_at': '2023_10_25', 'title': 'datasets'} \n", + "3 {'created_at': '2023_10_25', 'title': 'archite... \n", + "4 {'created_at': '2023_10_25', 'title': 'moving-... " + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import pandas as pd\n", "import warnings\n", @@ -131,7 +243,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 33, "metadata": {}, "outputs": [], "source": [ @@ -149,9 +261,20 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Hello', ' world', '!']" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from canopy.tokenizer import Tokenizer\n", "\n", @@ -180,7 +303,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 35, "metadata": {}, "outputs": [], "source": [ @@ -203,7 +326,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 36, "metadata": {}, "outputs": [], "source": [ @@ -225,7 +348,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 37, "metadata": {}, "outputs": [], "source": [ @@ -261,7 +384,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 38, "metadata": {}, "outputs": [], "source": [ @@ -286,7 +409,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 39, "metadata": {}, "outputs": [], "source": [ @@ -303,9 +426,24 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "517e05af0db9477cbf48f8cc06dbb692", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/6 [00:00 per index.\n", + "source: https://docs.pinecone.io/docs/limits\n", + "score: 0.717559636\n", + "\n" + ] + } + ], "source": [ "from canopy.models.data_models import Query\n", "results = kb.query([Query(text=\"p1 pod capacity\",\n", @@ -404,7 +588,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 44, "metadata": {}, "outputs": [], "source": [ @@ -414,9 +598,31 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"query\": \"capacity of p1 pods\",\n", + " \"snippets\": [\n", + " {\n", + " \"source\": \"https://docs.pinecone.io/docs/indexes\",\n", + " \"text\": \"### p2 pods\\n\\n\\nThe p2 pod type provides greater query throughput with lower latency. For vectors with fewer than 128 dimension and queries where `topK` is less than 50, p2 pods support up to 200 QPS per replica and return queries in less than 10ms. This means that query throughput and latency are better than s1 and p1.\\n\\n\\nEach p2 pod has enough capacity for around 1M vectors of 768 dimensions. However, capacity may vary with dimensionality.\\n\\n\\nThe data ingestion rate for p2 pods is significantly slower than for p1 pods; this rate decreases as the number of dimensions increases. For example, a p2 pod containing vectors with 128 dimensions can upsert up to 300 updates per second; a p2 pod containing vectors with 768 dimensions or more supports upsert of 50 updates per second. Because query latency and throughput for p2 pods vary from p1 pods, test p2 pod performance with your dataset.\\n\\n\\nThe p2 pod type does not support sparse vector values.\"\n", + " },\n", + " {\n", + " \"source\": \"https://docs.pinecone.io/docs/indexes\",\n", + " \"text\": \"### s1 pods\\n\\n\\nThese storage-optimized pods provide large storage capacity and lower overall costs with slightly higher query latencies than p1 pods. They are ideal for very large indexes with moderate or relaxed latency requirements.\\n\\n\\nEach s1 pod has enough capacity for around 5M vectors of 768 dimensions.\\n\\n\\n### p1 pods\\n\\n\\nThese performance-optimized pods provide very low query latencies, but hold fewer vectors per pod than s1 pods. They are ideal for applications with low latency requirements (<100ms).\\n\\n\\nEach p1 pod has enough capacity for around 1M vectors of 768 dimensions.\"\n", + " }\n", + " ]\n", + "}\n", + "\n", + "# tokens in context returned: 412\n" + ] + } + ], "source": [ "import json\n", "\n", @@ -448,7 +654,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 46, "metadata": {}, "outputs": [], "source": [ @@ -458,7 +664,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 47, "metadata": {}, "outputs": [], "source": [ @@ -474,9 +680,22 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "Each p1 pod has enough capacity for around 1M vectors of 768 dimensions. [Source: https://docs.pinecone.io/docs/indexes]" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "from IPython.display import display, Markdown\n", "\n", @@ -487,9 +706,22 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "The p1 pods are ideal for applications with low latency requirements (<100ms). [Source: https://docs.pinecone.io/docs/indexes]" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "response, history = chat(\"And for what latency requirements does it fit?\", history)\n", "display(Markdown(response))" @@ -525,7 +757,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 50, "metadata": {}, "outputs": [], "source": [ @@ -551,9 +783,21 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[KBDocChunk(id='id1_0', text='This is first line', source='example', metadata={'title': 'newline'}, document_id='id1'),\n", + " KBDocChunk(id='id1_1', text='This is the second line', source='example', metadata={'title': 'newline'}, document_id='id1')]" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "chunker = NewLineChunker()\n", "\n", @@ -574,7 +818,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 52, "metadata": {}, "outputs": [], "source": [ @@ -593,7 +837,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 53, "metadata": {}, "outputs": [], "source": [ @@ -602,9 +846,26 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "query: second line\n", + "\n", + "document: This is the second line\n", + "source: example\n", + "score: 0.928703\n", + "\n", + "document: This is first line\n", + "source: example\n", + "score: 0.885779679\n", + "\n" + ] + } + ], "source": [ "results = kb.query([Query(text=\"second line\",\n", " metadata_filter={\"title\": \"newline\"})])\n", @@ -630,7 +891,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 55, "metadata": {}, "outputs": [], "source": [ @@ -654,7 +915,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.8 (v3.10.8:aaaf517424, Oct 11 2022, 10:14:40) [Clang 13.0.0 (clang-1300.0.29.30)]" + "version": "3.10.8" }, "orig_nbformat": 4, "vscode": { From f906b5dd50536dcd0425e962a5793109331e0bad Mon Sep 17 00:00:00 2001 From: Amnon Catav Date: Wed, 1 Nov 2023 14:45:54 +0200 Subject: [PATCH 16/21] change a comment --- examples/canopy-lib-quickstart.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/canopy-lib-quickstart.ipynb b/examples/canopy-lib-quickstart.ipynb index 0a357f44..718a0055 100644 --- a/examples/canopy-lib-quickstart.ipynb +++ b/examples/canopy-lib-quickstart.ipynb @@ -458,7 +458,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Internally, the KnowledgeBase handle for use all the processing needed to load data into Pinecone. It chunks the text to smaller pieces and encode them to vectors (embeddings) that can be then upserted directly to Pinecone. Later in this notebook we'll learn how to tune and customize this process." + "Internally, the KnowledgeBase handles all the processing needed to Index the documents. Each document's text is chunked to smaller pieces and encoded to vector embeddings that can be then upserted directly to Pinecone. Later in this notebook we'll learn how to tune and customize this process." ] }, { From 8c9cd751a3affb5d15d20a1f3b8fdb19656e1daf Mon Sep 17 00:00:00 2001 From: Amnon Catav Date: Wed, 1 Nov 2023 14:53:44 +0200 Subject: [PATCH 17/21] changed a comment --- examples/canopy-lib-quickstart.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/canopy-lib-quickstart.ipynb b/examples/canopy-lib-quickstart.ipynb index 718a0055..32b4eb4b 100644 --- a/examples/canopy-lib-quickstart.ipynb +++ b/examples/canopy-lib-quickstart.ipynb @@ -532,7 +532,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "We can also use metadata filtering and specify `top_k`:" + "You can change the `top_k` parameter, to determine the number of top query results that will be returned and also to provide a [metadata filter](https://docs.pinecone.io/docs/metadata-filtering)." ] }, { From 63964800d0aacf31e351d332572f6a3359711e83 Mon Sep 17 00:00:00 2001 From: Amnon Catav Date: Wed, 1 Nov 2023 15:02:44 +0200 Subject: [PATCH 18/21] add title to query results prinitng --- examples/canopy-lib-quickstart.ipynb | 102 +++++++++++++++------------ 1 file changed, 56 insertions(+), 46 deletions(-) diff --git a/examples/canopy-lib-quickstart.ipynb b/examples/canopy-lib-quickstart.ipynb index 32b4eb4b..d26cc376 100644 --- a/examples/canopy-lib-quickstart.ipynb +++ b/examples/canopy-lib-quickstart.ipynb @@ -24,7 +24,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 33, "metadata": {}, "outputs": [ { @@ -56,7 +56,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 34, "metadata": {}, "outputs": [], "source": [ @@ -79,7 +79,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 35, "metadata": {}, "outputs": [], "source": [ @@ -108,7 +108,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 36, "metadata": {}, "outputs": [ { @@ -208,7 +208,7 @@ "4 {'created_at': '2023_10_25', 'title': 'moving-... " ] }, - "execution_count": 32, + "execution_count": 36, "metadata": {}, "output_type": "execute_result" } @@ -243,7 +243,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 37, "metadata": {}, "outputs": [], "source": [ @@ -261,7 +261,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 38, "metadata": {}, "outputs": [ { @@ -270,7 +270,7 @@ "['Hello', ' world', '!']" ] }, - "execution_count": 34, + "execution_count": 38, "metadata": {}, "output_type": "execute_result" } @@ -303,7 +303,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 39, "metadata": {}, "outputs": [], "source": [ @@ -326,7 +326,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 40, "metadata": {}, "outputs": [], "source": [ @@ -348,7 +348,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 41, "metadata": {}, "outputs": [], "source": [ @@ -384,7 +384,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 42, "metadata": {}, "outputs": [], "source": [ @@ -409,7 +409,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 43, "metadata": {}, "outputs": [], "source": [ @@ -426,13 +426,13 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 44, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "517e05af0db9477cbf48f8cc06dbb692", + "model_id": "a80eb9ab18ef4a10b104ab9af8e208ef", "version_major": 2, "version_minor": 0 }, @@ -473,7 +473,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 45, "metadata": {}, "outputs": [], "source": [ @@ -482,13 +482,14 @@ " print('query: ' + query_results.query + '\\n')\n", " for document in query_results.documents:\n", " print('document: ' + document.text.replace(\"\\n\", \"\\\\n\"))\n", + " print(\"title: \" + document.metadata[\"title\"])\n", " print('source: ' + document.source)\n", " print(f\"score: {document.score}\\n\")" ] }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 46, "metadata": {}, "outputs": [ { @@ -498,24 +499,29 @@ "query: p1 pod capacity\n", "\n", "document: ### s1 pods\\n\\n\\nThese storage-optimized pods provide large storage capacity and lower overall costs with slightly higher query latencies than p1 pods. They are ideal for very large indexes with moderate or relaxed latency requirements.\\n\\n\\nEach s1 pod has enough capacity for around 5M vectors of 768 dimensions.\\n\\n\\n### p1 pods\\n\\n\\nThese performance-optimized pods provide very low query latencies, but hold fewer vectors per pod than s1 pods. They are ideal for applications with low latency requirements (<100ms).\\n\\n\\nEach p1 pod has enough capacity for around 1M vectors of 768 dimensions.\n", + "title: indexes\n", "source: https://docs.pinecone.io/docs/indexes\n", - "score: 0.842834711\n", + "score: 0.844001234\n", "\n", "document: ## Pod storage capacity\\n\\n\\nEach **p1** pod has enough capacity for 1M vectors with 768 dimensions.\\n\\n\\nEach **s1** pod has enough capacity for 5M vectors with 768 dimensions.\\n\\n\\n## Metadata\\n\\n\\nMax metadata size per vector is 40 KB.\\n\\n\\nNull metadata values are not supported. Instead of setting a key to hold a null value, we recommend you remove that key from the metadata payload.\\n\\n\\nMetadata with high cardinality, such as a unique value for every vector in a large index, uses more memory than expected and can cause the pods to become full.\n", + "title: limits\n", "source: https://docs.pinecone.io/docs/limits\n", - "score: 0.842082918\n", + "score: 0.842709482\n", "\n", "document: #### p2 pod type (Public Preview)(\"Beta\")\\n\\n\\nThe new [p2 pod type](indexes/#p2-pods) provides search speeds of around 5ms and throughput of 200 queries per second per replica, or approximately 10x faster speeds and higher throughput than the p1 pod type, depending on your data and network conditions. \\n\\n\\nThis is a **public preview** feature and is not appropriate for production workloads.\\n\\n\\n#### Improved p1 and s1 performance\\n\\n\\nThe [s1](indexes/#s1-pods) and [p1](indexes/#p1-pods) pod types now offer approximately 50% higher query throughput and 50% lower latency, depending on your workload.\n", + "title: release-notes\n", "source: https://docs.pinecone.io/docs/release-notes\n", - "score: 0.836096466\n", + "score: 0.834972441\n", "\n", "document: ### p2 pods\\n\\n\\nThe p2 pod type provides greater query throughput with lower latency. For vectors with fewer than 128 dimension and queries where `topK` is less than 50, p2 pods support up to 200 QPS per replica and return queries in less than 10ms. This means that query throughput and latency are better than s1 and p1.\\n\\n\\nEach p2 pod has enough capacity for around 1M vectors of 768 dimensions. However, capacity may vary with dimensionality.\\n\\n\\nThe data ingestion rate for p2 pods is significantly slower than for p1 pods; this rate decreases as the number of dimensions increases. For example, a p2 pod containing vectors with 128 dimensions can upsert up to 300 updates per second; a p2 pod containing vectors with 768 dimensions or more supports upsert of 50 updates per second. Because query latency and throughput for p2 pods vary from p1 pods, test p2 pod performance with your dataset.\\n\\n\\nThe p2 pod type does not support sparse vector values.\n", + "title: indexes\n", "source: https://docs.pinecone.io/docs/indexes\n", - "score: 0.833599687\n", + "score: 0.832246363\n", "\n", "document: ## Number of vectors\\n\\n\\nThe most important consideration in sizing is the [number of vectors](/docs/insert-data/) you plan on working with. As a rule of thumb, a single p1 pod can store approximately 1M vectors, while a s1 pod can store 5M vectors. However, this can be affected by other factors, such as dimensionality and metadata, which are explained below.\n", + "title: choosing-index-type-and-size\n", "source: https://docs.pinecone.io/docs/choosing-index-type-and-size\n", - "score: 0.826832652\n", + "score: 0.828785\n", "\n" ] } @@ -537,7 +543,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 47, "metadata": {}, "outputs": [ { @@ -547,12 +553,14 @@ "query: p1 pod capacity\n", "\n", "document: ## Pod storage capacity\\n\\n\\nEach **p1** pod has enough capacity for 1M vectors with 768 dimensions.\\n\\n\\nEach **s1** pod has enough capacity for 5M vectors with 768 dimensions.\\n\\n\\n## Metadata\\n\\n\\nMax metadata size per vector is 40 KB.\\n\\n\\nNull metadata values are not supported. Instead of setting a key to hold a null value, we recommend you remove that key from the metadata payload.\\n\\n\\nMetadata with high cardinality, such as a unique value for every vector in a large index, uses more memory than expected and can cause the pods to become full.\n", + "title: limits\n", "source: https://docs.pinecone.io/docs/limits\n", - "score: 0.842082918\n", + "score: 0.842464507\n", "\n", - "document: # Limits\\n\\n[Suggest Edits](/edit/limits)This is a summary of current Pinecone limitations. For many of these, there is a workaround or we're working on increasing the limits.\\n\\n\\n## Upserts\\n\\n\\nMax vector dimensionality is 20,000.\\n\\n\\nMax size for an upsert request is 2MB. Recommended upsert limit is 100 vectors per request.\\n\\n\\nVectors may not be visible to queries immediately after upserting. You can check if the vectors were indexed by looking at the total with `describe_index_stats()`, although this method may not work if the index has multiple replicas. Pinecone is eventually consistent.\\n\\n\\nPinecone supports sparse vector values of sizes up to 1000 non-zero values.\\n\\n\\n## Queries\\n\\n\\nMax value for `top_k`, the number of results to return, is 10,000. Max value for `top_k` for queries with `include_metadata=True` or `include_data=True` is 1,000.\\n\\n\\n## Fetch and Delete\\n\\n\\nMax vectors per fetch or delete request is 1,000.\\n\\n\\n## Namespaces\\n\\n\\nThere is no limit to the number of per index.\n", + "document: ## Retention\\n\\n\\nIn general, indexes on the Starter (free) plan are archived as collections and deleted after 7 days of inactivity; for indexes created by certain open source projects such as AutoGPT, indexes are archived and deleted after 1 day of inactivity. To prevent this, you can send any API request to Pinecone and the counter will reset.\\n\\nUpdated about 1 month ago \\n\\n\\n\\n---\\n\\n* [Table of Contents](#)\\n* + [Upserts](#upserts)\\n\t+ [Queries](#queries)\\n\t+ [Fetch and Delete](#fetch-and-delete)\\n\t+ [Namespaces](#namespaces)\\n\t+ [Pod storage capacity](#pod-storage-capacity)\\n\t+ [Metadata](#metadata)\\n\t+ [Retention](#retention)\n", + "title: limits\n", "source: https://docs.pinecone.io/docs/limits\n", - "score: 0.717559636\n", + "score: 0.71726948\n", "\n" ] } @@ -560,7 +568,7 @@ "source": [ "from canopy.models.data_models import Query\n", "results = kb.query([Query(text=\"p1 pod capacity\",\n", - " metadata_filter={\"title\": \"limits\"},\n", + " metadata_filter={\"source\": \"https://docs.pinecone.io/docs/limits\"},\n", " top_k=2)])\n", "\n", "print_query_results(results)" @@ -588,7 +596,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 48, "metadata": {}, "outputs": [], "source": [ @@ -598,7 +606,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 49, "metadata": {}, "outputs": [ { @@ -610,11 +618,11 @@ " \"snippets\": [\n", " {\n", " \"source\": \"https://docs.pinecone.io/docs/indexes\",\n", - " \"text\": \"### p2 pods\\n\\n\\nThe p2 pod type provides greater query throughput with lower latency. For vectors with fewer than 128 dimension and queries where `topK` is less than 50, p2 pods support up to 200 QPS per replica and return queries in less than 10ms. This means that query throughput and latency are better than s1 and p1.\\n\\n\\nEach p2 pod has enough capacity for around 1M vectors of 768 dimensions. However, capacity may vary with dimensionality.\\n\\n\\nThe data ingestion rate for p2 pods is significantly slower than for p1 pods; this rate decreases as the number of dimensions increases. For example, a p2 pod containing vectors with 128 dimensions can upsert up to 300 updates per second; a p2 pod containing vectors with 768 dimensions or more supports upsert of 50 updates per second. Because query latency and throughput for p2 pods vary from p1 pods, test p2 pod performance with your dataset.\\n\\n\\nThe p2 pod type does not support sparse vector values.\"\n", + " \"text\": \"### s1 pods\\n\\n\\nThese storage-optimized pods provide large storage capacity and lower overall costs with slightly higher query latencies than p1 pods. They are ideal for very large indexes with moderate or relaxed latency requirements.\\n\\n\\nEach s1 pod has enough capacity for around 5M vectors of 768 dimensions.\\n\\n\\n### p1 pods\\n\\n\\nThese performance-optimized pods provide very low query latencies, but hold fewer vectors per pod than s1 pods. They are ideal for applications with low latency requirements (<100ms).\\n\\n\\nEach p1 pod has enough capacity for around 1M vectors of 768 dimensions.\"\n", " },\n", " {\n", " \"source\": \"https://docs.pinecone.io/docs/indexes\",\n", - " \"text\": \"### s1 pods\\n\\n\\nThese storage-optimized pods provide large storage capacity and lower overall costs with slightly higher query latencies than p1 pods. They are ideal for very large indexes with moderate or relaxed latency requirements.\\n\\n\\nEach s1 pod has enough capacity for around 5M vectors of 768 dimensions.\\n\\n\\n### p1 pods\\n\\n\\nThese performance-optimized pods provide very low query latencies, but hold fewer vectors per pod than s1 pods. They are ideal for applications with low latency requirements (<100ms).\\n\\n\\nEach p1 pod has enough capacity for around 1M vectors of 768 dimensions.\"\n", + " \"text\": \"### p2 pods\\n\\n\\nThe p2 pod type provides greater query throughput with lower latency. For vectors with fewer than 128 dimension and queries where `topK` is less than 50, p2 pods support up to 200 QPS per replica and return queries in less than 10ms. This means that query throughput and latency are better than s1 and p1.\\n\\n\\nEach p2 pod has enough capacity for around 1M vectors of 768 dimensions. However, capacity may vary with dimensionality.\\n\\n\\nThe data ingestion rate for p2 pods is significantly slower than for p1 pods; this rate decreases as the number of dimensions increases. For example, a p2 pod containing vectors with 128 dimensions can upsert up to 300 updates per second; a p2 pod containing vectors with 768 dimensions or more supports upsert of 50 updates per second. Because query latency and throughput for p2 pods vary from p1 pods, test p2 pod performance with your dataset.\\n\\n\\nThe p2 pod type does not support sparse vector values.\"\n", " }\n", " ]\n", "}\n", @@ -654,7 +662,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 50, "metadata": {}, "outputs": [], "source": [ @@ -664,7 +672,7 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 51, "metadata": {}, "outputs": [], "source": [ @@ -680,13 +688,13 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 52, "metadata": {}, "outputs": [ { "data": { "text/markdown": [ - "Each p1 pod has enough capacity for around 1M vectors of 768 dimensions. [Source: https://docs.pinecone.io/docs/indexes]" + "The capacity of p1 pods is enough for around 1 million vectors of 768 dimensions. Source: [Pinecone Documentation](https://docs.pinecone.io/docs/indexes)" ], "text/plain": [ "" @@ -706,13 +714,13 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 53, "metadata": {}, "outputs": [ { "data": { "text/markdown": [ - "The p1 pods are ideal for applications with low latency requirements (<100ms). [Source: https://docs.pinecone.io/docs/indexes]" + "P1 pods are ideal for applications with low latency requirements, specifically those that require latencies of less than 100 milliseconds. Source: [Pinecone Documentation](https://docs.pinecone.io/docs/indexes)" ], "text/plain": [ "" @@ -757,7 +765,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 54, "metadata": {}, "outputs": [], "source": [ @@ -783,7 +791,7 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 55, "metadata": {}, "outputs": [ { @@ -793,7 +801,7 @@ " KBDocChunk(id='id1_1', text='This is the second line', source='example', metadata={'title': 'newline'}, document_id='id1')]" ] }, - "execution_count": 51, + "execution_count": 55, "metadata": {}, "output_type": "execute_result" } @@ -818,7 +826,7 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 56, "metadata": {}, "outputs": [], "source": [ @@ -837,7 +845,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 57, "metadata": {}, "outputs": [], "source": [ @@ -846,7 +854,7 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 58, "metadata": {}, "outputs": [ { @@ -856,12 +864,14 @@ "query: second line\n", "\n", "document: This is the second line\n", + "title: newline\n", "source: example\n", - "score: 0.928703\n", + "score: 0.928711653\n", "\n", "document: This is first line\n", + "title: newline\n", "source: example\n", - "score: 0.885779679\n", + "score: 0.887627542\n", "\n" ] } @@ -891,7 +901,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 59, "metadata": {}, "outputs": [], "source": [ From 981c298fc73805a819fd537d6cb71ec22e00d3d7 Mon Sep 17 00:00:00 2001 From: Amnon Catav Date: Wed, 1 Nov 2023 15:08:58 +0200 Subject: [PATCH 19/21] changed a commnet --- examples/canopy-lib-quickstart.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/canopy-lib-quickstart.ipynb b/examples/canopy-lib-quickstart.ipynb index d26cc376..b000fe16 100644 --- a/examples/canopy-lib-quickstart.ipynb +++ b/examples/canopy-lib-quickstart.ipynb @@ -589,9 +589,9 @@ "source": [ "## Query the Context Engine\n", "\n", - "`ContextEngine` is an object that responsible to retrieve the most relevant context for a given query and token budget. \n", + "`ContextEngine` is an object responsible for retrieving the most relevant context for a given query and token budget. \n", "\n", - "While `KnowledgeBase` retrieves the full `top-k` structured documents for each query including all the metadata related to them, the context engine in charge of transforming this information to a \"prompt ready\" context that can later feeded to an LLM. To achieve this the context engine holds a `ContextBuilder` object that takes query results from the knowledge base and returns a `Context` object. The context builder also considers the `max_context_tokens` budget given to it and builds the most relevant context that not exceeds the token budget." + "While `KnowledgeBase` retrieves the full `top-k` structured documents for each query including all the metadata related to them, the context engine in charge of transforming this information to a \"prompt ready\" context that can later feeded to an LLM. To achieve this the context engine holds a `ContextBuilder` object that takes query results from the knowledge base and returns a `Context` object. The `ContextEngine`'s default behavior is to use a `StuffingContextBuilder`, which simply stacks retrieved document chunks in a JSON-like manner, hard limiting by the number of chunks that fit the `max_context_tokens` budget. More complex behaviors can be achieved by providing a custom `ContextBuilder` class." ] }, { From f396fbcd57970d1e36a141403ff339d93c18da3c Mon Sep 17 00:00:00 2001 From: Amnon Catav Date: Wed, 1 Nov 2023 15:18:26 +0200 Subject: [PATCH 20/21] change a comment --- examples/canopy-lib-quickstart.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/canopy-lib-quickstart.ipynb b/examples/canopy-lib-quickstart.ipynb index b000fe16..30e976c2 100644 --- a/examples/canopy-lib-quickstart.ipynb +++ b/examples/canopy-lib-quickstart.ipynb @@ -404,7 +404,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Luckily the columns in our dataset fits this schema, so we can use a simple iteration to prepare our data:" + "The data in our example dataset is already provided in this schema, so we can simply iterate over it and instantiate `Document` objects:" ] }, { From e65dff42cc0ee0035b0f3b689ee2804aabd795cd Mon Sep 17 00:00:00 2001 From: Amnon Catav Date: Wed, 1 Nov 2023 15:21:33 +0200 Subject: [PATCH 21/21] merge cells --- examples/canopy-lib-quickstart.ipynb | 23 ++--------------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/examples/canopy-lib-quickstart.ipynb b/examples/canopy-lib-quickstart.ipynb index 30e976c2..11764922 100644 --- a/examples/canopy-lib-quickstart.ipynb +++ b/examples/canopy-lib-quickstart.ipynb @@ -60,30 +60,11 @@ "metadata": {}, "outputs": [], "source": [ - "import os\n", + "import os, openai\n", "\n", "os.environ[\"PINECONE_API_KEY\"] = os.environ.get('PINECONE_API_KEY') or 'YOUR_PINECONE_API_KEY'\n", "os.environ[\"PINECONE_ENVIRONMENT\"] = os.environ.get('PINECONE_ENVIRONMENT') or 'PINECONE_ENVIRONMENT'\n", - "os.environ[\"OPENAI_API_KEY\"] = os.environ.get('OPENAI_API_KEY') or 'OPENAI_API_KEY'" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We don't have to do the following step since openai loads the environment variable on import.\n", - "\n", - "When working with a Jupyter notebook we'll have to restart the kernel for any mistake in this variable so it's safer to explicitly set the api key." - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": {}, - "outputs": [], - "source": [ - "import openai\n", + "os.environ[\"OPENAI_API_KEY\"] = os.environ.get('OPENAI_API_KEY') or 'OPENAI_API_KEY'\n", "\n", "openai.api_key = os.environ[\"OPENAI_API_KEY\"]" ]