Skip to content

Commit

Permalink
Add Zilliz free tier example
Browse files Browse the repository at this point in the history
Signed-off-by: christy <[email protected]>
  • Loading branch information
christy committed Nov 21, 2023
1 parent 039b706 commit 5f8d441
Showing 1 changed file with 186 additions and 38 deletions.
224 changes: 186 additions & 38 deletions notebooks/llms/langchain/readthedocs_rag_zilliz.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"id": "d7570b2e",
"metadata": {},
"outputs": [],
Expand All @@ -51,7 +51,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"id": "20dcdaf7",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -93,10 +93,18 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"id": "0806d2db",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Type of server: zilliz_cloud\n"
]
}
],
"source": [
"from pymilvus import connections, utility\n",
"\n",
Expand Down Expand Up @@ -134,10 +142,26 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 4,
"id": "dd2be7fd",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"device: cpu\n",
"<class 'sentence_transformers.SentenceTransformer.SentenceTransformer'>\n",
"SentenceTransformer(\n",
" (0): Transformer({'max_seq_length': 512, 'do_lower_case': True}) with Transformer model: BertModel \n",
" (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})\n",
")\n",
"model_name: BAAI/bge-base-en-v1.5\n",
"EMBEDDING_LENGTH: 768\n",
"MAX_SEQ_LENGTH: 512\n"
]
}
],
"source": [
"# Import torch.\n",
"import torch\n",
Expand Down Expand Up @@ -188,9 +212,19 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Embedding length: 768\n",
"Created collection: MIlvusDocs\n",
"Schema: {'auto_id': True, 'description': 'The schema for docs pages', 'fields': [{'name': 'pk', 'description': '', 'type': <DataType.INT64: 5>, 'is_primary': True, 'auto_id': True}, {'name': 'vector', 'description': '', 'type': <DataType.FLOAT_VECTOR: 101>, 'params': {'dim': 768}}], 'enable_dynamic_field': True}\n"
]
}
],
"source": [
"from pymilvus import (\n",
" FieldSchema, DataType, \n",
Expand Down Expand Up @@ -246,9 +280,17 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'loading_progress': '100%'}\n"
]
}
],
"source": [
"# 5. Drop the index, in case it already exists.\n",
"mc.drop_index()\n",
Expand All @@ -266,19 +308,26 @@
" field_name=\"vector\", \n",
" index_params=index_params)\n",
"\n",
"# collection.load()\n",
"\n",
"# Get loading progress\n",
"mc.load()\n",
"progress = utility.loading_progress(COLLECTION_NAME)\n",
"print(progress)"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 7,
"id": "6861beb7",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"loaded 15 documents\n"
]
}
],
"source": [
"## Read docs into LangChain\n",
"#!pip install langchain \n",
Expand Down Expand Up @@ -309,9 +358,23 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"chunking time: 0.01805710792541504\n",
"docs: 15, split into: 15\n",
"split into chunks: 159, type: list of <class 'langchain.schema.document.Document'>\n",
"\n",
"Looking at a sample chunk...\n",
"{'h1': 'Installation', 'h2': 'Installing via pip', 'source': 'rtdocs/pymilvus.readthedocs.io/en/latest/install.html'}\n",
"demonstrate how to install and using PyMilvus in a virtual environment. See virtualenv for more info\n"
]
}
],
"source": [
"from langchain.text_splitter import HTMLHeaderTextSplitter, RecursiveCharacterTextSplitter\n",
"\n",
Expand Down Expand Up @@ -384,10 +447,19 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 9,
"id": "512130a3",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'h1': 'Installation', 'h2': 'Installing via pip', 'source': 'https://pymilvus.readthedocs.io/en/latest/install.html'}\n",
"Installation¶ Installing via pip¶ PyMilvus is in the Python Package Index. PyMilvus only support pyt\n"
]
}
],
"source": [
"# Clean up the metadata urls\n",
"for doc in chunks:\n",
Expand All @@ -413,7 +485,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -445,10 +517,21 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 11,
"id": "b51ff139",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Start inserting entities\n",
"Milvus insert time for 159 vectors: 1.0154786109924316 seconds\n",
"(insert count: 159, delete count: 0, upsert count: 0, timestamp: 445785288603074562, success count: 159, err count: 0)\n",
"[{\"name\":\"_default\",\"collection_name\":\"MIlvusDocs\",\"description\":\"\"}]\n"
]
}
],
"source": [
"# Insert a batch of data into the Milvus collection.\n",
"\n",
Expand Down Expand Up @@ -503,10 +586,18 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 12,
"id": "5e7f41f4",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"query length: 54\n"
]
}
],
"source": [
"# Define a sample question about your data.\n",
"question = \"what is the default distance metric used in AUTOINDEX?\"\n",
Expand Down Expand Up @@ -534,10 +625,20 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 13,
"id": "89642119",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loaded milvus collection into memory.\n",
"Milvus search time: 0.06506514549255371 sec\n",
"type: <class 'pymilvus.client.abstract.SearchResult'>, count: 5\n"
]
}
],
"source": [
"# RETRIEVAL USING MILVUS.\n",
"\n",
Expand Down Expand Up @@ -587,9 +688,17 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2267\n"
]
}
],
"source": [
"# # TODO - remove this before saving in github.\n",
"# for n, hits in enumerate(results):\n",
Expand Down Expand Up @@ -617,10 +726,19 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 15,
"id": "3e7fa0b6",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Question: what is the default distance metric used in AUTOINDEX?\n",
"Answer: lazy dog\n"
]
}
],
"source": [
"# BASELINING THE LLM: ASK A QUESTION WITHOUT ANY RETRIEVED CONTEXT.\n",
"\n",
Expand Down Expand Up @@ -649,10 +767,19 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 16,
"id": "a68e87b1",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Question: what is the default distance metric used in AUTOINDEX?\n",
"Answer: MetricType.L2\n"
]
}
],
"source": [
"# NOW ASK THE SAME LLM THE SAME QUESTION USING THE RETRIEVED CONTEXT.\n",
"QA_input = {\n",
Expand All @@ -673,7 +800,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 17,
"id": "d0e81e68",
"metadata": {},
"outputs": [],
Expand All @@ -684,10 +811,31 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 18,
"id": "c777937e",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Author: Christy Bergman\n",
"\n",
"Python implementation: CPython\n",
"Python version : 3.10.12\n",
"IPython version : 8.15.0\n",
"\n",
"torch : 2.0.1\n",
"transformers: 4.34.1\n",
"milvus : 2.3.3\n",
"pymilvus : 2.3.3\n",
"langchain : 0.0.322\n",
"\n",
"conda environment: py310\n",
"\n"
]
}
],
"source": [
"# Props to Sebastian Raschka for this handy watermark.\n",
"# !pip install watermark\n",
Expand Down

0 comments on commit 5f8d441

Please sign in to comment.