diff --git a/topic/machine-learning/llm-langchain/vector_search.py b/topic/machine-learning/llm-langchain/vector_search.py index 99d2d9eb..8a37c015 100644 --- a/topic/machine-learning/llm-langchain/vector_search.py +++ b/topic/machine-learning/llm-langchain/vector_search.py @@ -24,9 +24,14 @@ from langchain_text_splitters import CharacterTextSplitter from langchain_openai import OpenAIEmbeddings +import nltk + def main(): + nltk.download("averaged_perceptron_tagger_eng") + nltk.download("punkt_tab") + # Load the document, split it into chunks, embed each chunk, # and load it into the vector store. state_of_the_union_url = "https://github.com/langchain-ai/langchain/raw/v0.0.325/docs/docs/modules/state_of_the_union.txt"