Skip to content

Commit

Permalink
LLM: Update to langchain-cratedb 0.0.0
Browse files Browse the repository at this point in the history
After quite a bit of back and forth, and a slow genesis in general,
this subsystem is finally approaching departures to take off.

On the chrome/surface/interface, this update doesn't change much, just
a few bits of "naming things".
  • Loading branch information
amotl committed Dec 16, 2024
1 parent 706a09d commit a696d3e
Show file tree
Hide file tree
Showing 12 changed files with 58 additions and 89 deletions.
40 changes: 17 additions & 23 deletions topic/machine-learning/llm-langchain/conversational_memory.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -59,18 +59,18 @@
"execution_count": 2,
"outputs": [],
"source": [
"from langchain_community.chat_message_histories import CrateDBChatMessageHistory\n",
"from langchain_cratedb.chat_history import CrateDBChatMessageHistory\n",
"\n",
"# Connect to a self-managed CrateDB instance.\n",
"CONNECTION_STRING = \"crate://crate@localhost/?schema=notebook\"\n",
"\n",
"chat_message_history = CrateDBChatMessageHistory(\n",
"chat_history = CrateDBChatMessageHistory(\n",
"\tsession_id=\"test_session\",\n",
"\tconnection_string=CONNECTION_STRING\n",
")\n",
"\n",
"# Make sure to start with a blank canvas.\n",
"chat_message_history.clear()"
"chat_history.clear()"
],
"metadata": {
"collapsed": false
Expand All @@ -90,8 +90,8 @@
"execution_count": 3,
"outputs": [],
"source": [
"chat_message_history.add_user_message(\"Hello\")\n",
"chat_message_history.add_ai_message(\"Hi\")"
"chat_history.add_user_message(\"Hello\")\n",
"chat_history.add_ai_message(\"Hi\")"
],
"metadata": {
"collapsed": false,
Expand All @@ -117,9 +117,7 @@
"output_type": "execute_result"
}
],
"source": [
"chat_message_history.messages"
],
"source": "chat_history.messages",
"metadata": {
"collapsed": false,
"ExecuteTime": {
Expand Down Expand Up @@ -214,7 +212,7 @@
"\n",
"\tBase.metadata.drop_all(bind=sa.create_engine(CONNECTION_STRING))\n",
"\n",
"\tchat_message_history = CrateDBChatMessageHistory(\n",
"\tchat_history = CrateDBChatMessageHistory(\n",
"\t\tsession_id=\"test_session\",\n",
"\t\tconnection_string=CONNECTION_STRING,\n",
"\t\tcustom_message_converter=CustomMessageConverter(\n",
Expand All @@ -223,10 +221,10 @@
"\t)\n",
"\n",
"\t# Make sure to start with a blank canvas.\n",
"\tchat_message_history.clear()\n",
"\tchat_history.clear()\n",
"\n",
"\tchat_message_history.add_user_message(\"Hello\")\n",
"\tchat_message_history.add_ai_message(\"Hi\")"
"\tchat_history.add_user_message(\"Hello\")\n",
"\tchat_history.add_ai_message(\"Hi\")"
],
"metadata": {
"collapsed": false,
Expand All @@ -252,9 +250,7 @@
"output_type": "execute_result"
}
],
"source": [
"chat_message_history.messages"
],
"source": "chat_history.messages",
"metadata": {
"collapsed": false,
"ExecuteTime": {
Expand Down Expand Up @@ -286,7 +282,7 @@
"import json\n",
"import typing as t\n",
"\n",
"from langchain_community.chat_message_histories.cratedb import CrateDBMessageConverter\n",
"from langchain_cratedb.chat_history import CrateDBMessageConverter\n",
"from langchain.schema import _message_to_dict\n",
"\n",
"\n",
Expand All @@ -312,18 +308,18 @@
"if __name__ == \"__main__\":\n",
"\tBase.metadata.drop_all(bind=sa.create_engine(CONNECTION_STRING))\n",
"\n",
"\tchat_message_history = CrateDBChatMessageHistory(\n",
"\tchat_history = CrateDBChatMessageHistory(\n",
"\t\tsession_id=\"test_session\",\n",
"\t\tconnection_string=CONNECTION_STRING,\n",
"\t\tcustom_message_converter=CustomMessageConverterWithDifferentSessionIdColumn(),\n",
"\t\tsession_id_field_name=\"custom_session_id\",\n",
"\t)\n",
"\n",
"\t# Make sure to start with a blank canvas.\n",
"\tchat_message_history.clear()\n",
"\tchat_history.clear()\n",
"\n",
"\tchat_message_history.add_user_message(\"Hello\")\n",
"\tchat_message_history.add_ai_message(\"Hi\")"
"\tchat_history.add_user_message(\"Hello\")\n",
"\tchat_history.add_ai_message(\"Hi\")"
],
"metadata": {
"collapsed": false
Expand All @@ -344,9 +340,7 @@
"output_type": "execute_result"
}
],
"source": [
"chat_message_history.messages"
],
"source": "chat_history.messages",
"metadata": {
"collapsed": false
}
Expand Down
12 changes: 6 additions & 6 deletions topic/machine-learning/llm-langchain/conversational_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import os
from pprint import pprint

from langchain_community.chat_message_histories import CrateDBChatMessageHistory
from langchain_cratedb.chat_history import CrateDBChatMessageHistory


CONNECTION_STRING = os.environ.get(
Expand All @@ -27,13 +27,13 @@

def main():

chat_message_history = CrateDBChatMessageHistory(
chat_history = CrateDBChatMessageHistory(
session_id="test_session",
connection_string=CONNECTION_STRING,
connection=CONNECTION_STRING,
)
chat_message_history.add_user_message("Hello")
chat_message_history.add_ai_message("Hi")
pprint(chat_message_history.messages)
chat_history.add_user_message("Hello")
chat_history.add_ai_message("Hi")
pprint(chat_history.messages)


if __name__ == "__main__":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,13 +75,11 @@
"metadata": {},
"outputs": [],
"source": [
"import openai\n",
"import pandas as pd\n",
"import sqlalchemy as sa\n",
"\n",
"from langchain_community.document_loaders import PyPDFLoader\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
"from langchain_openai import OpenAIEmbeddings"
"from langchain_text_splitters import RecursiveCharacterTextSplitter"
]
},
{
Expand Down Expand Up @@ -162,7 +160,7 @@
"# environment variables.\n",
"import os\n",
"\n",
"CONNECTION_STRING = CrateDBVectorSearch.connection_string_from_db_params(\n",
"CONNECTION_STRING = CrateDBVectorStore.connection_string_from_db_params(\n",
" driver=os.environ.get(\"CRATEDB_DRIVER\", \"crate\"),\n",
" host=os.environ.get(\"CRATEDB_HOST\", \"localhost\"),\n",
" port=int(os.environ.get(\"CRATEDB_PORT\", \"4200\")),\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,20 +65,15 @@
},
"outputs": [],
"source": [
"from langchain.chains import RetrievalQA, ConversationalRetrievalChain\n",
"from langchain_openai import ChatOpenAI, OpenAI, OpenAIEmbeddings\n",
"import pandas as pd\n",
"import sqlalchemy as sa\n",
"from sqlalchemy import create_engine\n",
"from sqlalchemy import text\n",
"import crate\n",
"import openai\n",
"import os\n",
"import requests\n",
"from pueblo.util.environ import getenvpass\n",
"import pandas as pd\n",
"from langchain.chains import RetrievalQA, ConversationalRetrievalChain\n",
"from langchain_community.document_loaders import CSVLoader\n",
"from langchain_community.vectorstores import Chroma\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter"
"from langchain_openai import OpenAIEmbeddings\n",
"from pueblo.util.environ import getenvpass\n",
"from sqlalchemy import create_engine\n",
"from sqlalchemy import text"
]
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@
"from pueblo.util.environ import getenvpass\n",
"from langchain_openai import OpenAIEmbeddings\n",
"from langchain_community.document_loaders import CSVLoader\n",
"from langchain_community.vectorstores import CrateDBVectorSearch\n",
"from langchain_cratedb.vectorstores import CrateDBVectorStore\n",
"\n",
"warnings.filterwarnings('ignore')"
]
Expand Down Expand Up @@ -301,11 +301,11 @@
"source": [
"embeddings = OpenAIEmbeddings()\n",
"\n",
"store = CrateDBVectorSearch.from_documents(\n",
"store = CrateDBVectorStore.from_documents(\n",
" embedding=embeddings,\n",
" documents=data,\n",
" collection_name=COLLECTION_NAME,\n",
" connection_string=CONNECTION_STRING,\n",
" connection=CONNECTION_STRING,\n",
")"
]
},
Expand Down Expand Up @@ -519,11 +519,11 @@
"\n",
"COLLECTION_NAME = \"customer_data_jina\"\n",
"\n",
"store = CrateDBVectorSearch.from_documents(\n",
"store = CrateDBVectorStore.from_documents(\n",
" embedding=embeddings,\n",
" documents=data,\n",
" collection_name=COLLECTION_NAME,\n",
" connection_string=CONNECTION_STRING,\n",
" connection=CONNECTION_STRING,\n",
")\n",
"documents = return_documents(store, my_question)"
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,28 +97,20 @@
"outputs": [],
"source": [
"import os\n",
"import re\n",
"\n",
"import openai\n",
"import pandas as pd\n",
"import warnings\n",
"import requests\n",
"import re\n",
"from typing import Dict, List, Optional, Tuple, Union\n",
"\n",
"import warnings\n",
"\n",
"from pueblo.util.environ import getenvpass\n",
"from google.cloud import aiplatform\n",
"from vertexai.generative_models import (\n",
" GenerationConfig,\n",
" GenerationResponse,\n",
" GenerativeModel,\n",
" HarmBlockThreshold,\n",
" HarmCategory,\n",
")\n",
"from langchain_community.document_loaders import CSVLoader\n",
"from langchain_community.embeddings import VertexAIEmbeddings\n",
"from langchain_community.llms import VertexAI\n",
"from langchain_community.vectorstores import CrateDBVectorSearch\n",
"from langchain_cratedb.vectorstores import CrateDBVectorStore\n",
"\n",
"warnings.filterwarnings('ignore')"
]
Expand Down Expand Up @@ -347,11 +339,11 @@
"source": [
"embeddings = VertexAIEmbeddings(model_name=\"textembedding-gecko@001\")\n",
"\n",
"store = CrateDBVectorSearch.from_documents(\n",
"store = CrateDBVectorStore.from_documents(\n",
" embedding=embeddings,\n",
" documents=data,\n",
" collection_name=COLLECTION_NAME,\n",
" connection_string=CONNECTION_STRING,\n",
" connection=CONNECTION_STRING,\n",
")"
]
},
Expand Down
2 changes: 1 addition & 1 deletion topic/machine-learning/llm-langchain/document_loader.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,8 @@
"outputs": [],
"source": [
"import sqlalchemy as sa\n",
"from langchain_community.document_loaders import CrateDBLoader\n",
"from langchain_community.utilities.sql_database import SQLDatabase\n",
"from langchain_cratedb.loaders import CrateDBLoader\n",
"from pprint import pprint\n",
"\n",
"db = SQLDatabase(engine=sa.create_engine(CONNECTION_STRING))\n",
Expand Down
2 changes: 1 addition & 1 deletion topic/machine-learning/llm-langchain/document_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@
import requests
import sqlalchemy as sa
from cratedb_toolkit.util import DatabaseAdapter
from langchain_community.document_loaders import CrateDBLoader
from langchain_community.utilities.sql_database import SQLDatabase
from langchain_cratedb.loaders import CrateDBLoader
from pprint import pprint


Expand Down
6 changes: 3 additions & 3 deletions topic/machine-learning/llm-langchain/requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# Real.
# Production.
cratedb-toolkit[io]
pueblo[notebook,testing]

# Development.
# Staging.
# cratedb-toolkit[io] @ git+https://github.com/crate-workbench/cratedb-toolkit.git@main
# pueblo[notebook,testing] @ git+https://github.com/pyveci/pueblo.git@main

# Workstation.
# Development.
#--editable=/Users/amo/dev/crate/ecosystem/cratedb-retentions[io]
#--editable=/Users/amo/dev/pyveci/sources/pueblo[testing]
12 changes: 1 addition & 11 deletions topic/machine-learning/llm-langchain/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,22 +1,12 @@
# Real.
crash
crate>=1.0.0.dev2
google-cloud-aiplatform<2
langchain-cratedb<0.0.1
langchain-google-vertexai<3
langchain-openai<0.3
langchain-text-splitters<0.4
pueblo[cli,nlp]>=0.0.10
pydantic>=2,<3
pypdf<6
python-dotenv<2
requests<3
requests-cache<2
sqlalchemy==2.*
sqlalchemy-cratedb>=0.40.0
unstructured<0.17

# Development.
# cratedb-toolkit @ git+https://github.com/crate-workbench/cratedb-toolkit.git@main
langchain @ git+https://github.com/crate-workbench/langchain.git@cratedb#subdirectory=libs/langchain
langchain-community @ git+https://github.com/crate-workbench/langchain.git@cratedb#subdirectory=libs/community
# pueblo[cli,fileio,nlp] @ git+https://github.com/pyveci/pueblo.git@main
Loading

0 comments on commit a696d3e

Please sign in to comment.