From 20e734131b6e9ae09acbbd90341a9b267bf4551c Mon Sep 17 00:00:00 2001 From: Amnon Catav Date: Wed, 1 Nov 2023 09:31:32 +0200 Subject: [PATCH] add outputs --- examples/canopy-lib-quickstart.ipynb | 361 +++++++++++++++++++++++---- 1 file changed, 311 insertions(+), 50 deletions(-) diff --git a/examples/canopy-lib-quickstart.ipynb b/examples/canopy-lib-quickstart.ipynb index f02cddda..0a357f44 100644 --- a/examples/canopy-lib-quickstart.ipynb +++ b/examples/canopy-lib-quickstart.ipynb @@ -24,9 +24,19 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" + ] + } + ], "source": [ "!pip install -qU pinecone-canopy" ] @@ -46,7 +56,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 30, "metadata": {}, "outputs": [], "source": [ @@ -69,7 +79,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 31, "metadata": {}, "outputs": [], "source": [ @@ -98,9 +108,111 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idtextsourcemetadata
0728aeea1-1dcf-5d0a-91f2-ecccd4dd4272# Scale indexes\\n\\n[Suggest Edits](/edit/scali...https://docs.pinecone.io/docs/scaling-indexes{'created_at': '2023_10_25', 'title': 'scaling...
12f19f269-171f-5556-93f3-a2d7eabbe50f# Understanding organizations\\n\\n[Suggest Edit...https://docs.pinecone.io/docs/organizations{'created_at': '2023_10_25', 'title': 'organiz...
2b2a71cb3-5148-5090-86d5-7f4156edd7cf# Manage datasets\\n\\n[Suggest Edits](/edit/dat...https://docs.pinecone.io/docs/datasets{'created_at': '2023_10_25', 'title': 'datasets'}
31dafe68a-2e78-57f7-a97a-93e043462196# Architecture\\n\\n[Suggest Edits](/edit/archit...https://docs.pinecone.io/docs/architecture{'created_at': '2023_10_25', 'title': 'archite...
48b07b24d-4ec2-58a1-ac91-c8e6267b9ffd# Moving to production\\n\\n[Suggest Edits](/edi...https://docs.pinecone.io/docs/moving-to-produc...{'created_at': '2023_10_25', 'title': 'moving-...
\n", + "
" + ], + "text/plain": [ + " id \\\n", + "0 728aeea1-1dcf-5d0a-91f2-ecccd4dd4272 \n", + "1 2f19f269-171f-5556-93f3-a2d7eabbe50f \n", + "2 b2a71cb3-5148-5090-86d5-7f4156edd7cf \n", + "3 1dafe68a-2e78-57f7-a97a-93e043462196 \n", + "4 8b07b24d-4ec2-58a1-ac91-c8e6267b9ffd \n", + "\n", + " text \\\n", + "0 # Scale indexes\\n\\n[Suggest Edits](/edit/scali... \n", + "1 # Understanding organizations\\n\\n[Suggest Edit... \n", + "2 # Manage datasets\\n\\n[Suggest Edits](/edit/dat... \n", + "3 # Architecture\\n\\n[Suggest Edits](/edit/archit... \n", + "4 # Moving to production\\n\\n[Suggest Edits](/edi... \n", + "\n", + " source \\\n", + "0 https://docs.pinecone.io/docs/scaling-indexes \n", + "1 https://docs.pinecone.io/docs/organizations \n", + "2 https://docs.pinecone.io/docs/datasets \n", + "3 https://docs.pinecone.io/docs/architecture \n", + "4 https://docs.pinecone.io/docs/moving-to-produc... \n", + "\n", + " metadata \n", + "0 {'created_at': '2023_10_25', 'title': 'scaling... \n", + "1 {'created_at': '2023_10_25', 'title': 'organiz... \n", + "2 {'created_at': '2023_10_25', 'title': 'datasets'} \n", + "3 {'created_at': '2023_10_25', 'title': 'archite... \n", + "4 {'created_at': '2023_10_25', 'title': 'moving-... " + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import pandas as pd\n", "import warnings\n", @@ -131,7 +243,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 33, "metadata": {}, "outputs": [], "source": [ @@ -149,9 +261,20 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Hello', ' world', '!']" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from canopy.tokenizer import Tokenizer\n", "\n", @@ -180,7 +303,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 35, "metadata": {}, "outputs": [], "source": [ @@ -203,7 +326,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 36, "metadata": {}, "outputs": [], "source": [ @@ -225,7 +348,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 37, "metadata": {}, "outputs": [], "source": [ @@ -261,7 +384,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 38, "metadata": {}, "outputs": [], "source": [ @@ -286,7 +409,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 39, "metadata": {}, "outputs": [], "source": [ @@ -303,9 +426,24 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "517e05af0db9477cbf48f8cc06dbb692", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/6 [00:00 per index.\n", + "source: https://docs.pinecone.io/docs/limits\n", + "score: 0.717559636\n", + "\n" + ] + } + ], "source": [ "from canopy.models.data_models import Query\n", "results = kb.query([Query(text=\"p1 pod capacity\",\n", @@ -404,7 +588,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 44, "metadata": {}, "outputs": [], "source": [ @@ -414,9 +598,31 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"query\": \"capacity of p1 pods\",\n", + " \"snippets\": [\n", + " {\n", + " \"source\": \"https://docs.pinecone.io/docs/indexes\",\n", + " \"text\": \"### p2 pods\\n\\n\\nThe p2 pod type provides greater query throughput with lower latency. For vectors with fewer than 128 dimension and queries where `topK` is less than 50, p2 pods support up to 200 QPS per replica and return queries in less than 10ms. This means that query throughput and latency are better than s1 and p1.\\n\\n\\nEach p2 pod has enough capacity for around 1M vectors of 768 dimensions. However, capacity may vary with dimensionality.\\n\\n\\nThe data ingestion rate for p2 pods is significantly slower than for p1 pods; this rate decreases as the number of dimensions increases. For example, a p2 pod containing vectors with 128 dimensions can upsert up to 300 updates per second; a p2 pod containing vectors with 768 dimensions or more supports upsert of 50 updates per second. Because query latency and throughput for p2 pods vary from p1 pods, test p2 pod performance with your dataset.\\n\\n\\nThe p2 pod type does not support sparse vector values.\"\n", + " },\n", + " {\n", + " \"source\": \"https://docs.pinecone.io/docs/indexes\",\n", + " \"text\": \"### s1 pods\\n\\n\\nThese storage-optimized pods provide large storage capacity and lower overall costs with slightly higher query latencies than p1 pods. They are ideal for very large indexes with moderate or relaxed latency requirements.\\n\\n\\nEach s1 pod has enough capacity for around 5M vectors of 768 dimensions.\\n\\n\\n### p1 pods\\n\\n\\nThese performance-optimized pods provide very low query latencies, but hold fewer vectors per pod than s1 pods. They are ideal for applications with low latency requirements (<100ms).\\n\\n\\nEach p1 pod has enough capacity for around 1M vectors of 768 dimensions.\"\n", + " }\n", + " ]\n", + "}\n", + "\n", + "# tokens in context returned: 412\n" + ] + } + ], "source": [ "import json\n", "\n", @@ -448,7 +654,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 46, "metadata": {}, "outputs": [], "source": [ @@ -458,7 +664,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 47, "metadata": {}, "outputs": [], "source": [ @@ -474,9 +680,22 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "Each p1 pod has enough capacity for around 1M vectors of 768 dimensions. [Source: https://docs.pinecone.io/docs/indexes]" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "from IPython.display import display, Markdown\n", "\n", @@ -487,9 +706,22 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "The p1 pods are ideal for applications with low latency requirements (<100ms). [Source: https://docs.pinecone.io/docs/indexes]" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "response, history = chat(\"And for what latency requirements does it fit?\", history)\n", "display(Markdown(response))" @@ -525,7 +757,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 50, "metadata": {}, "outputs": [], "source": [ @@ -551,9 +783,21 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[KBDocChunk(id='id1_0', text='This is first line', source='example', metadata={'title': 'newline'}, document_id='id1'),\n", + " KBDocChunk(id='id1_1', text='This is the second line', source='example', metadata={'title': 'newline'}, document_id='id1')]" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "chunker = NewLineChunker()\n", "\n", @@ -574,7 +818,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 52, "metadata": {}, "outputs": [], "source": [ @@ -593,7 +837,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 53, "metadata": {}, "outputs": [], "source": [ @@ -602,9 +846,26 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "query: second line\n", + "\n", + "document: This is the second line\n", + "source: example\n", + "score: 0.928703\n", + "\n", + "document: This is first line\n", + "source: example\n", + "score: 0.885779679\n", + "\n" + ] + } + ], "source": [ "results = kb.query([Query(text=\"second line\",\n", " metadata_filter={\"title\": \"newline\"})])\n", @@ -630,7 +891,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 55, "metadata": {}, "outputs": [], "source": [ @@ -654,7 +915,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.8 (v3.10.8:aaaf517424, Oct 11 2022, 10:14:40) [Clang 13.0.0 (clang-1300.0.29.30)]" + "version": "3.10.8" }, "orig_nbformat": 4, "vscode": {