Skip to content

Commit

Permalink
LLM: Update to langchain-cratedb 0.0.0
Browse files Browse the repository at this point in the history
After quite a bit of back and forth, and a slow genesis in general,
this subsystem is finally approaching departures to take off.

On the chrome/surface/interface, this update doesn't change much.
  • Loading branch information
amotl committed Dec 15, 2024
1 parent 706a09d commit d91e077
Show file tree
Hide file tree
Showing 12 changed files with 42 additions and 67 deletions.
10 changes: 5 additions & 5 deletions topic/machine-learning/llm-langchain/conversational_memory.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -59,14 +59,14 @@
"execution_count": 2,
"outputs": [],
"source": [
"from langchain_community.chat_message_histories import CrateDBChatMessageHistory\n",
"from langchain_cratedb.chat_message_histories import CrateDBChatMessageHistory\n",
"\n",
"# Connect to a self-managed CrateDB instance.\n",
"CONNECTION_STRING = \"crate://crate@localhost/?schema=notebook\"\n",
"\n",
"chat_message_history = CrateDBChatMessageHistory(\n",
"\tsession_id=\"test_session\",\n",
"\tconnection_string=CONNECTION_STRING\n",
"\tconnection=CONNECTION_STRING\n",
")\n",
"\n",
"# Make sure to start with a blank canvas.\n",
Expand Down Expand Up @@ -216,7 +216,7 @@
"\n",
"\tchat_message_history = CrateDBChatMessageHistory(\n",
"\t\tsession_id=\"test_session\",\n",
"\t\tconnection_string=CONNECTION_STRING,\n",
"\t\tconnection=CONNECTION_STRING,\n",
"\t\tcustom_message_converter=CustomMessageConverter(\n",
"\t\t\tauthor_email=\"[email protected]\"\n",
"\t\t)\n",
Expand Down Expand Up @@ -286,7 +286,7 @@
"import json\n",
"import typing as t\n",
"\n",
"from langchain_community.chat_message_histories.cratedb import CrateDBMessageConverter\n",
"from langchain_cratedb.chat_message_histories import CrateDBMessageConverter\n",
"from langchain.schema import _message_to_dict\n",
"\n",
"\n",
Expand Down Expand Up @@ -314,7 +314,7 @@
"\n",
"\tchat_message_history = CrateDBChatMessageHistory(\n",
"\t\tsession_id=\"test_session\",\n",
"\t\tconnection_string=CONNECTION_STRING,\n",
"\t\tconnection=CONNECTION_STRING,\n",
"\t\tcustom_message_converter=CustomMessageConverterWithDifferentSessionIdColumn(),\n",
"\t\tsession_id_field_name=\"custom_session_id\",\n",
"\t)\n",
Expand Down
4 changes: 2 additions & 2 deletions topic/machine-learning/llm-langchain/conversational_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import os
from pprint import pprint

from langchain_community.chat_message_histories import CrateDBChatMessageHistory
from langchain_cratedb.chat_message_histories import CrateDBChatMessageHistory


CONNECTION_STRING = os.environ.get(
Expand All @@ -29,7 +29,7 @@ def main():

chat_message_history = CrateDBChatMessageHistory(
session_id="test_session",
connection_string=CONNECTION_STRING,
connection=CONNECTION_STRING,
)
chat_message_history.add_user_message("Hello")
chat_message_history.add_ai_message("Hi")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,13 +75,11 @@
"metadata": {},
"outputs": [],
"source": [
"import openai\n",
"import pandas as pd\n",
"import sqlalchemy as sa\n",
"\n",
"from langchain_community.document_loaders import PyPDFLoader\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
"from langchain_openai import OpenAIEmbeddings"
"from langchain_text_splitters import RecursiveCharacterTextSplitter"
]
},
{
Expand Down Expand Up @@ -162,7 +160,7 @@
"# environment variables.\n",
"import os\n",
"\n",
"CONNECTION_STRING = CrateDBVectorSearch.connection_string_from_db_params(\n",
"CONNECTION_STRING = CrateDBVectorStore.connection_string_from_db_params(\n",
" driver=os.environ.get(\"CRATEDB_DRIVER\", \"crate\"),\n",
" host=os.environ.get(\"CRATEDB_HOST\", \"localhost\"),\n",
" port=int(os.environ.get(\"CRATEDB_PORT\", \"4200\")),\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,20 +65,15 @@
},
"outputs": [],
"source": [
"from langchain.chains import RetrievalQA, ConversationalRetrievalChain\n",
"from langchain_openai import ChatOpenAI, OpenAI, OpenAIEmbeddings\n",
"import pandas as pd\n",
"import sqlalchemy as sa\n",
"from sqlalchemy import create_engine\n",
"from sqlalchemy import text\n",
"import crate\n",
"import openai\n",
"import os\n",
"import requests\n",
"from pueblo.util.environ import getenvpass\n",
"import pandas as pd\n",
"from langchain.chains import RetrievalQA, ConversationalRetrievalChain\n",
"from langchain_community.document_loaders import CSVLoader\n",
"from langchain_community.vectorstores import Chroma\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter"
"from langchain_openai import OpenAIEmbeddings\n",
"from pueblo.util.environ import getenvpass\n",
"from sqlalchemy import create_engine\n",
"from sqlalchemy import text"
]
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@
"from pueblo.util.environ import getenvpass\n",
"from langchain_openai import OpenAIEmbeddings\n",
"from langchain_community.document_loaders import CSVLoader\n",
"from langchain_community.vectorstores import CrateDBVectorSearch\n",
"from langchain_cratedb.vectorstores import CrateDBVectorStore\n",
"\n",
"warnings.filterwarnings('ignore')"
]
Expand Down Expand Up @@ -301,11 +301,11 @@
"source": [
"embeddings = OpenAIEmbeddings()\n",
"\n",
"store = CrateDBVectorSearch.from_documents(\n",
"store = CrateDBVectorStore.from_documents(\n",
" embedding=embeddings,\n",
" documents=data,\n",
" collection_name=COLLECTION_NAME,\n",
" connection_string=CONNECTION_STRING,\n",
" connection=CONNECTION_STRING,\n",
")"
]
},
Expand Down Expand Up @@ -519,11 +519,11 @@
"\n",
"COLLECTION_NAME = \"customer_data_jina\"\n",
"\n",
"store = CrateDBVectorSearch.from_documents(\n",
"store = CrateDBVectorStore.from_documents(\n",
" embedding=embeddings,\n",
" documents=data,\n",
" collection_name=COLLECTION_NAME,\n",
" connection_string=CONNECTION_STRING,\n",
" connection=CONNECTION_STRING,\n",
")\n",
"documents = return_documents(store, my_question)"
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,28 +97,20 @@
"outputs": [],
"source": [
"import os\n",
"import re\n",
"\n",
"import openai\n",
"import pandas as pd\n",
"import warnings\n",
"import requests\n",
"import re\n",
"from typing import Dict, List, Optional, Tuple, Union\n",
"\n",
"import warnings\n",
"\n",
"from pueblo.util.environ import getenvpass\n",
"from google.cloud import aiplatform\n",
"from vertexai.generative_models import (\n",
" GenerationConfig,\n",
" GenerationResponse,\n",
" GenerativeModel,\n",
" HarmBlockThreshold,\n",
" HarmCategory,\n",
")\n",
"from langchain_community.document_loaders import CSVLoader\n",
"from langchain_community.embeddings import VertexAIEmbeddings\n",
"from langchain_community.llms import VertexAI\n",
"from langchain_community.vectorstores import CrateDBVectorSearch\n",
"from langchain_cratedb.vectorstores import CrateDBVectorStore\n",
"\n",
"warnings.filterwarnings('ignore')"
]
Expand Down Expand Up @@ -347,11 +339,11 @@
"source": [
"embeddings = VertexAIEmbeddings(model_name=\"textembedding-gecko@001\")\n",
"\n",
"store = CrateDBVectorSearch.from_documents(\n",
"store = CrateDBVectorStore.from_documents(\n",
" embedding=embeddings,\n",
" documents=data,\n",
" collection_name=COLLECTION_NAME,\n",
" connection_string=CONNECTION_STRING,\n",
" connection=CONNECTION_STRING,\n",
")"
]
},
Expand Down
2 changes: 1 addition & 1 deletion topic/machine-learning/llm-langchain/document_loader.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,8 @@
"outputs": [],
"source": [
"import sqlalchemy as sa\n",
"from langchain_community.document_loaders import CrateDBLoader\n",
"from langchain_community.utilities.sql_database import SQLDatabase\n",
"from langchain_cratedb.document_loaders import CrateDBLoader\n",
"from pprint import pprint\n",
"\n",
"db = SQLDatabase(engine=sa.create_engine(CONNECTION_STRING))\n",
Expand Down
2 changes: 1 addition & 1 deletion topic/machine-learning/llm-langchain/document_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@
import requests
import sqlalchemy as sa
from cratedb_toolkit.util import DatabaseAdapter
from langchain_community.document_loaders import CrateDBLoader
from langchain_community.utilities.sql_database import SQLDatabase
from langchain_cratedb.document_loaders import CrateDBLoader
from pprint import pprint


Expand Down
6 changes: 3 additions & 3 deletions topic/machine-learning/llm-langchain/requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# Real.
# Production.
cratedb-toolkit[io]
pueblo[notebook,testing]

# Development.
# Staging.
# cratedb-toolkit[io] @ git+https://github.com/crate-workbench/cratedb-toolkit.git@main
# pueblo[notebook,testing] @ git+https://github.com/pyveci/pueblo.git@main

# Workstation.
# Development.
#--editable=/Users/amo/dev/crate/ecosystem/cratedb-retentions[io]
#--editable=/Users/amo/dev/pyveci/sources/pueblo[testing]
12 changes: 1 addition & 11 deletions topic/machine-learning/llm-langchain/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,22 +1,12 @@
# Real.
crash
crate>=1.0.0.dev2
google-cloud-aiplatform<2
langchain-cratedb @ git+https://github.com/crate/langchain-cratedb.git@cratedb
langchain-google-vertexai<3
langchain-openai<0.3
langchain-text-splitters<0.4
pueblo[cli,nlp]>=0.0.10
pydantic>=2,<3
pypdf<6
python-dotenv<2
requests<3
requests-cache<2
sqlalchemy==2.*
sqlalchemy-cratedb>=0.40.0
unstructured<0.17

# Development.
# cratedb-toolkit @ git+https://github.com/crate-workbench/cratedb-toolkit.git@main
langchain @ git+https://github.com/crate-workbench/langchain.git@cratedb#subdirectory=libs/langchain
langchain-community @ git+https://github.com/crate-workbench/langchain.git@cratedb#subdirectory=libs/community
# pueblo[cli,fileio,nlp] @ git+https://github.com/pyveci/pueblo.git@main
18 changes: 9 additions & 9 deletions topic/machine-learning/llm-langchain/vector_search.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@
"# environment variables.\n",
"import os\n",
"\n",
"CONNECTION_STRING = CrateDBVectorSearch.connection_string_from_db_params(\n",
"CONNECTION_STRING = CrateDBVectorStore.connection_string_from_db_params(\n",
" driver=os.environ.get(\"CRATEDB_DRIVER\", \"crate\"),\n",
" host=os.environ.get(\"CRATEDB_HOST\", \"localhost\"),\n",
" port=int(os.environ.get(\"CRATEDB_PORT\", \"4200\")),\n",
Expand All @@ -166,8 +166,8 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain_community.vectorstores import CrateDBVectorSearch\n",
"from langchain_core.documents import Document\n",
"from langchain_cratedb.vectorstores import CrateDBVectorStore\n",
"from langchain_openai import OpenAIEmbeddings"
]
},
Expand Down Expand Up @@ -223,11 +223,11 @@
"source": [
"embeddings = OpenAIEmbeddings()\n",
"\n",
"store = CrateDBVectorSearch.from_documents(\n",
"store = CrateDBVectorStore.from_documents(\n",
" embedding=embeddings,\n",
" documents=docs,\n",
" collection_name=COLLECTION_NAME,\n",
" connection_string=CONNECTION_STRING,\n",
" connection=CONNECTION_STRING,\n",
")"
]
},
Expand Down Expand Up @@ -334,10 +334,10 @@
},
"outputs": [],
"source": [
"store = CrateDBVectorSearch(\n",
"store = CrateDBVectorStore(\n",
" collection_name=COLLECTION_NAME,\n",
" connection_string=CONNECTION_STRING,\n",
" embedding_function=embeddings,\n",
" connection=CONNECTION_STRING,\n",
" embeddings=embeddings,\n",
")"
]
},
Expand Down Expand Up @@ -426,11 +426,11 @@
},
"outputs": [],
"source": [
"store = CrateDBVectorSearch.from_documents(\n",
"store = CrateDBVectorStore.from_documents(\n",
" documents=docs,\n",
" embedding=embeddings,\n",
" collection_name=COLLECTION_NAME,\n",
" connection_string=CONNECTION_STRING,\n",
" connection=CONNECTION_STRING,\n",
" pre_delete_collection=True,\n",
")"
]
Expand Down
4 changes: 2 additions & 2 deletions topic/machine-learning/llm-langchain/vector_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
python vector_search.py
""" # noqa: E501

from langchain_community.vectorstores import CrateDBVectorSearch
from langchain_cratedb.vectorstores import CrateDBVectorStore
from langchain_openai import OpenAIEmbeddings

import nltk
Expand All @@ -37,7 +37,7 @@ def main():
documents = CachedWebResource(url).langchain_documents(chunk_size=1000, chunk_overlap=0)

# Embed each chunk, and load them into the vector store.
db = CrateDBVectorSearch.from_documents(documents, OpenAIEmbeddings())
db = CrateDBVectorStore.from_documents(documents, OpenAIEmbeddings(), connection="crate://")

# Invoke a query, and display the first result.
query = "What did the president say about Ketanji Brown Jackson"
Expand Down

0 comments on commit d91e077

Please sign in to comment.