Skip to content

Commit

Permalink
examples: Burr + Hamilton (#428)
Browse files Browse the repository at this point in the history
* hamilton + Burr example added

* specified beautifulsoup parser

* added notebook badges; added files required by CI

* added intro and conclusion

---------

Co-authored-by: zilto <tjean@DESKTOP-V6JDCS2>
  • Loading branch information
zilto and zilto authored Nov 21, 2024
1 parent 37b6e56 commit 7a3c9e7
Show file tree
Hide file tree
Showing 12 changed files with 1,792 additions and 0 deletions.
8 changes: 8 additions & 0 deletions examples/hamilton-integration/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Modular RAG with Burr and Hamilton

This examples shows the "2-layer" approach to building RAG and LLM agents using Burr and Hamilton.

You will find:

- `notebook.ipynb` contains a guide on how to build a modular RAG application. It details how a typicaly project evolves and how Burr and Hamilton can help you achieve the desired modularity.
- `application.py` and `actions/` contain the code from the final application version showed in the notebook.
Empty file.
Empty file.
32 changes: 32 additions & 0 deletions examples/hamilton-integration/actions/ask_question.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import lancedb
import openai


def relevant_chunks(user_query: str) -> list[dict]:
chunks_table = lancedb.connect("./blogs").open_table("chunks")
search_results = (
chunks_table.search(user_query).select(["text", "url", "position"]).limit(3).to_list()
)
return search_results


def system_prompt(relevant_chunks: list[dict]) -> str:
relevant_content = "\n".join([c["text"] for c in relevant_chunks])
return (
"Answer the user's questions based on the provided blog post content. "
"Answer in a concise and helpful manner, and tell the user "
"if you don't know the answer or you're unsure.\n\n"
f"BLOG CONTENT:\n{relevant_content}"
)


def llm_answer(system_prompt: str, user_query: str) -> str:
client = openai.OpenAI()
response = client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_query},
],
)
return response.choices[0].message.content
54 changes: 54 additions & 0 deletions examples/hamilton-integration/actions/ingest_blog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import re

import lancedb
import requests
from bs4 import BeautifulSoup
from lancedb.embeddings import get_registry
from lancedb.pydantic import LanceModel, Vector

embedding_model = get_registry().get("openai").create()


class TextDocument(LanceModel):
"""Simple data structure to hold a piece of text associated with a url."""

url: str
position: int
text: str = embedding_model.SourceField()
vector: Vector(dim=embedding_model.ndims()) = embedding_model.VectorField()


def html_content(blog_post_url: str) -> str:
return requests.get(blog_post_url).text


def parsed_text(html_content: str) -> str:
soup = BeautifulSoup(html_content, "html.parser")
return soup.get_text(separator=" ", strip=True)


def sentences(parsed_text: str) -> list[str]:
return [sentence.strip() for sentence in re.split(r"[.!?]+", parsed_text) if sentence.strip()]


def overlapping_chunks(
sentences: list[str], window: int = 5, stride: int = 3, min_window_size: int = 2
) -> list[str]:
overlapping_chunks = []
n_chunks = len(sentences)
for start_i in range(0, n_chunks, stride):
if (start_i + window <= n_chunks) or (n_chunks - start_i >= min_window_size):
overlapping_chunks.append(
" ".join(sentences[start_i : min(start_i + window, n_chunks)])
)
return overlapping_chunks


def embed_chunks(overlapping_chunks: list[str], blog_post_url: str) -> dict:
# embed and store the chunks using LanceDB
con = lancedb.connect("./blogs")
table = con.create_table("chunks", exist_ok=True, schema=TextDocument)
table.add(
[{"text": c, "url": blog_post_url, "position": i} for i, c in enumerate(overlapping_chunks)]
)
return {"n_chunks_embedded": len(overlapping_chunks)}
54 changes: 54 additions & 0 deletions examples/hamilton-integration/application.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from hamilton.driver import Builder, Driver

from burr.core import ApplicationBuilder, State, action


@action(reads=[], writes=[])
def ingest_blog(state: State, blog_post_url: str, dr: Driver) -> State:
"""Download a blog post and parse it"""
dr.execute(["embed_chunks"], inputs={"blog_post_url": blog_post_url})
return state


@action(reads=[], writes=["llm_answer"])
def ask_question(state: State, user_query: str, dr: Driver) -> State:
"""Reply to the user's query using the blog's content."""
results = dr.execute(["llm_answer"], inputs={"user_query": user_query})
return state.update(llm_answer=results["llm_answer"])


if __name__ == "__main__":
# renames to avoid name conflicts with the @action functions
from actions import ask_question as ask_module
from actions import ingest_blog as ingest_module
from hamilton.plugins.h_opentelemetry import OpenTelemetryTracer
from opentelemetry.instrumentation.lancedb import LanceInstrumentor
from opentelemetry.instrumentation.openai import OpenAIInstrumentor

OpenAIInstrumentor().instrument()
LanceInstrumentor().instrument()

dr = (
Builder()
.with_modules(ingest_module, ask_module)
.with_adapters(OpenTelemetryTracer())
.build()
)

app = (
ApplicationBuilder()
.with_actions(ingest_blog.bind(dr=dr), ask_question.bind(dr=dr))
.with_transitions(("ingest_blog", "ask_question"))
.with_entrypoint("ingest_blog")
.with_tracker(project="modular-rag", use_otel_tracing=True)
.build()
)

action_name, results, state = app.run(
halt_after=["ask_question"],
inputs={
"blog_post_url": "https://blog.dagworks.io/p/from-blog-to-bot-build-a-rag-app",
"user_query": "What do you need to monitor in a RAG app?",
},
)
print(state["llm_answer"])
Binary file added examples/hamilton-integration/burr_ui_app_v2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added examples/hamilton-integration/burr_ui_app_v3.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added examples/hamilton-integration/hamilton_ui.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit 7a3c9e7

Please sign in to comment.