-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlangchain_web_docParser_1.py
61 lines (53 loc) · 2.05 KB
/
langchain_web_docParser_1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Mar 11 17:52:00 2024
@author: xdoestech
"""
import os
import requests
import re
from bs4 import BeautifulSoup
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.tools.retriever import create_retriever_tool
from bs4 import BeautifulSoup as Soup
from langchain_community.document_loaders.recursive_url_loader import RecursiveUrlLoader
from langchain.tools.retriever import create_retriever_tool
from langchain.agents import create_openai_functions_agent
from langchain.agents import AgentExecutor
from langchain import hub
from langchain_openai import ChatOpenAI
OPENAI_API_KEY = "-"
TAVILY_API_KEY ="-"
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
os.environ["TAVILY_API_KEY"] = TAVILY_API_KEY
TEST_URL = "https://docs.angr.io/en/latest/"
GOAL = "Find url for the solution script for: Beginner reversing example: little_engine"
#Recursively load 20 urls
url = TEST_URL
loader = RecursiveUrlLoader(
url=url, max_depth=3, extractor=lambda x: Soup(x, "html.parser").text
)
docs_RUL = loader.load()
documents_RUL = RecursiveCharacterTextSplitter(
chunk_size=1000, chunk_overlap=200
).split_documents(docs_RUL)
vector_RUL = FAISS.from_documents(documents_RUL, OpenAIEmbeddings())
retriever_RUL = vector_RUL.as_retriever()
retriever_tool = create_retriever_tool(
retriever_RUL,
"angr_search",
"""
description: finds some relevant text.
use: If you need to find information about angr use this tool!.
""",
)
prompt = hub.pull("hwchase17/openai-functions-agent")
llm = ChatOpenAI(temperature=0, model="gpt-4-0125-preview", streaming=True)
tools = [retriever_tool]
agent = create_openai_functions_agent(llm, tools, prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
agent_executor.invoke({"input": "Find url for the solution script for: Beginner reversing example: little_engine"})