Skip to content

Commit

Permalink
Search debugging!
Browse files Browse the repository at this point in the history
- Update langchain to fix thoughts not appearing issue
- Switch to regular HTML Loader instead of playwright for simplicity and performance
- Update search prompt to fix citation formatting issues
- Downgrade remark to prevent Math rendering error  (source: remarkjs/remark-math#89)
  • Loading branch information
jacobvm04 committed Dec 3, 2023
1 parent 04fc6e4 commit ac4addd
Show file tree
Hide file tree
Showing 6 changed files with 144 additions and 468 deletions.
2 changes: 1 addition & 1 deletion agent/chain.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ async def search_step(cls, messages: list[BaseMessage]):
# search_result_summary = cls.search_tool.run(search_query_message.content)
search_result_summary = await cls.search_tool.arun(search_query_message.content)

messages.append(SystemMessage(content=f"Use the information from these searchs to help answer your question.\nMake sure to not just repeat answers from sources, provide the sources justifications when possible. More detail is better.\n\nRelevant Google Search: {search_query_message.content}\n\n{search_result_summary}\n\nCite your sources via bracket notation with numbers (don't use any other special characters), and include the full links at the end."))
messages.append(SystemMessage(content=f"Use the information from these searchs to help answer your question.\nMake sure to not just repeat answers from sources, provide the sources justifications when possible. More detail is better.\n\nRelevant Google Search: {search_query_message.content}\n\n{search_result_summary}\n\nCite your sources via bracket notation with numbers (don't use any other special characters like \"^\", only use \"[\" and \"]\"), and include the full links at the end."))

return ChatPromptTemplate.from_messages(messages)

Expand Down
6 changes: 3 additions & 3 deletions agent/tools/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
)
from langchain.chains import LLMChain
from langchain.docstore.document import Document
from langchain.document_loaders import AsyncChromiumLoader
from langchain.document_loaders import AsyncHtmlLoader
from langchain.document_transformers import Html2TextTransformer
from langchain.embeddings.base import Embeddings
from langchain.llms.base import BaseLLM
Expand Down Expand Up @@ -96,11 +96,11 @@ async def _aresearch_url(self, url: str, query: str):

try:
# Load HTML
loader = AsyncChromiumLoader([url])
loader = AsyncHtmlLoader([url])
html2text = Html2TextTransformer()
text_splitter = TokenTextSplitter(chunk_size=300, chunk_overlap=0)

html = [Document(page_content=await loader.ascrape_playwright(url), metadata={"source": url})]
html = loader.load()
docs = html2text.transform_documents(html)
docs = text_splitter.split_documents(docs)

Expand Down
Loading

0 comments on commit ac4addd

Please sign in to comment.