Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

examples: Burr + Hamilton #428

Merged
merged 4 commits into from
Nov 21, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
Empty file.
32 changes: 32 additions & 0 deletions examples/hamilton-integration/actions/ask_question.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import lancedb
import openai


def relevant_chunks(user_query: str) -> list[dict]:
skrawcz marked this conversation as resolved.
Show resolved Hide resolved
chunks_table = lancedb.connect("./blogs").open_table("chunks")
search_results = (
chunks_table.search(user_query).select(["text", "url", "position"]).limit(3).to_list()
)
return search_results


def system_prompt(relevant_chunks: list[dict]) -> str:
relevant_content = "\n".join([c["text"] for c in relevant_chunks])
return (
"Answer the user's questions based on the provided blog post content. "
"Answer in a concise and helpful manner, and tell the user "
"if you don't know the answer or you're unsure.\n\n"
f"BLOG CONTENT:\n{relevant_content}"
)


def llm_answer(system_prompt: str, user_query: str) -> str:
client = openai.OpenAI()
skrawcz marked this conversation as resolved.
Show resolved Hide resolved
response = client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_query},
],
)
return response.choices[0].message.content
54 changes: 54 additions & 0 deletions examples/hamilton-integration/actions/ingest_blog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import re

import lancedb
import requests
from bs4 import BeautifulSoup
from lancedb.embeddings import get_registry
from lancedb.pydantic import LanceModel, Vector

embedding_model = get_registry().get("openai").create()


class TextDocument(LanceModel):
"""Simple data structure to hold a piece of text associated with a url."""

url: str
position: int
text: str = embedding_model.SourceField()
vector: Vector(dim=embedding_model.ndims()) = embedding_model.VectorField()


def html_content(blog_post_url: str) -> str:
return requests.get(blog_post_url).text
skrawcz marked this conversation as resolved.
Show resolved Hide resolved


def parsed_text(html_content: str) -> str:
soup = BeautifulSoup(html_content)
skrawcz marked this conversation as resolved.
Show resolved Hide resolved
return soup.get_text(separator=" ", strip=True)


def sentences(parsed_text: str) -> list[str]:
return [sentence.strip() for sentence in re.split(r"[.!?]+", parsed_text) if sentence.strip()]


def overlapping_chunks(
sentences: list[str], window: int = 5, stride: int = 3, min_window_size: int = 2
) -> list[str]:
overlapping_chunks = []
n_chunks = len(sentences)
for start_i in range(0, n_chunks, stride):
if (start_i + window <= n_chunks) or (n_chunks - start_i >= min_window_size):
overlapping_chunks.append(
" ".join(sentences[start_i : min(start_i + window, n_chunks)])
)
return overlapping_chunks


def embed_chunks(overlapping_chunks: list[str], blog_post_url: str) -> dict:
# embed and store the chunks using LanceDB
con = lancedb.connect("./blogs")
table = con.create_table("chunks", exist_ok=True, schema=TextDocument)
table.add(
[{"text": c, "url": blog_post_url, "position": i} for i, c in enumerate(overlapping_chunks)]
)
return {"n_chunks_embedded": len(overlapping_chunks)}
54 changes: 54 additions & 0 deletions examples/hamilton-integration/application.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from hamilton.driver import Builder, Driver

from burr.core import ApplicationBuilder, State, action


@action(reads=[], writes=[])
def ingest_blog(state: State, blog_post_url: str, dr: Driver) -> State:
"""Download a blog post and parse it"""
dr.execute(["embed_chunks"], inputs={"blog_post_url": blog_post_url})
return state


@action(reads=[], writes=["llm_answer"])
def ask_question(state: State, user_query: str, dr: Driver) -> State:
"""Reply to the user's query using the blog's content."""
results = dr.execute(["llm_answer"], inputs={"user_query": user_query})
return state.update(llm_answer=results["llm_answer"])


if __name__ == "__main__":
# renames to avoid name conflicts with the @action functions
from actions import ask_question as ask_module
from actions import ingest_blog as ingest_module
from hamilton.plugins.h_opentelemetry import OpenTelemetryTracer
from opentelemetry.instrumentation.lancedb import LanceInstrumentor
from opentelemetry.instrumentation.openai import OpenAIInstrumentor

OpenAIInstrumentor().instrument()
LanceInstrumentor().instrument()

dr = (
Builder()
.with_modules(ingest_module, ask_module)
.with_adapters(OpenTelemetryTracer())
.build()
)

app = (
ApplicationBuilder()
.with_actions(ingest_blog.bind(dr=dr), ask_question.bind(dr=dr))
.with_transitions(("ingest_blog", "ask_question"))
.with_entrypoint("ingest_blog")
.with_tracker(project="modular-rag", use_otel_tracing=True)
.build()
)

action_name, results, state = app.run(
halt_after=["ask_question"],
inputs={
"blog_post_url": "https://blog.dagworks.io/p/from-blog-to-bot-build-a-rag-app",
"user_query": "What do you need to monitor in a RAG app?",
},
)
print(state["llm_answer"])
Binary file added examples/hamilton-integration/burr_ui_app_v2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added examples/hamilton-integration/burr_ui_app_v3.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added examples/hamilton-integration/hamilton_ui.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Loading