Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Example of Query understanding. #83

Merged
merged 7 commits into from
Apr 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
192 changes: 192 additions & 0 deletions examples/query_expansions/examples.jsonlines
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
{
"query": "What is 10 + 10?",
"extraction": {
"message": "20",
"query": []
}
}
{
"query": "Please summarize the action items from today's meeting",
"extraction": {
"query": [
{
"rewrittenQuery": "summarize action items from meeting on 1/19/2024",
"questionType": [
"TRANSCRIPTS",
"ACTIVITY_LOG"
],
"minDate": "1/19/2024",
"maxDate": "1/19/2024"
}
]
}
}
{
"query": "Give a summary of the daily standup today with Bart. The spoken language is Dutch",
"extraction": {
"query": [
{
"rewrittenQuery": "Summary of the daily standup meeting with Bart on 1/19/2024 in Dutch language",
"questionType": [
"TRANSCRIPTS"
],
"minDate": "1/19/2024",
"maxDate": "1/19/2024"
}
]
}
}
{
"query": "What did I do last week? Write at least 10 bullet points",
"extraction": {
"query": [
{
"rewrittenQuery": "Activity log for the user between 1/12/2024 and 1/19/2024",
"questionType": [
"ACTIVITY_LOG"
],
"minDate": "1/12/2024",
"maxDate": "1/19/2024"
}
]
}
}
{
"query": "I recently looked up a Royal Caribbean cruise. What price did it show me?",
"extraction": {
"query": [
{
"rewrittenQuery": "What was the price of the Royal Caribbean cruise when recently searched?",
"questionType": [
"ACTIVITY_LOG"
],
"minDate": "1/9/2024",
"maxDate": "1/19/2024"
}
]
}
}
{
"query": "what was the name of the standup tool i saw recently",
"extraction": {
"query": [
{
"rewrittenQuery": "What is the name of the latest standup software or tool that I recently viewed or explored?",
"questionType": [
"PERSONAL_DATA",
"ACTIVITY_LOG"
],
"minDate": "01/01/2024",
"maxDate": "01/19/2024"
}
]
}
}
{
"query": "Can you summarize the transcript from 11:00AM to present?",
"extraction": {
"query": [
{
"rewrittenQuery": "show transcript from 11:00AM to 9:14:57 PM on 1/19/2024",
"questionType": [
"TRANSCRIPTS"
],
"minDate": "1/19/2024 11:00:00 AM",
"maxDate": "1/19/2024 9:14:57 PM"
}
]
}
}
{
"query": "What was I doing last week?",
"extraction": {
"query": [
{
"rewrittenQuery": "Activity log for the user from the date range of 1/12/2024 to 1/18/2024",
"questionType": [
"ACTIVITY_LOG"
],
"minDate": "1/12/2024",
"maxDate": "1/18/2024"
}
]
}
}
{
"query": "Summarize the meeting with Pinterest this morning",
"extraction": {
"query": [
{
"rewrittenQuery": "Summary of the morning meeting with Pinterest on 1/19/2024",
"questionType": [
"TRANSCRIPTS",
"ACTIVITY_LOG"
],
"minDate": "1/19/2024 12:00:00 AM",
"maxDate": "1/19/2024 11:59:59 AM"
}
]
}
}
{
"query": "Did he mention anything else about the terminal in the past 2 weeks? Please write 3 action items in the form of bullet points",
"extraction": {
"message": "Please create a summary of the mentions in the form of 3 action items in bullet point format once you find the relevant data",
"query": [
{
"rewrittenQuery": "Reports, conversation transcripts, notes or audio visual records where he has mentioned the terminal within the past 2 weeks",
"questionType": [
"TRANSCRIPTS",
"ACTIVITY_LOG"
],
"minDate": "1/5/2024",
"maxDate": "1/19/2024"
}
]
}
}
{
"query": "Can you summarize the transcript from 11:00AM to present?",
"extraction": {
"query": [
{
"rewrittenQuery": "show transcript from 11:00AM to 9:14:57 PM on 1/19/2024",
"questionType": [
"TRANSCRIPTS"
],
"minDate": "1/19/2024 11:00:00 AM",
"maxDate": "1/19/2024 9:14:57 PM"
}
]
}
}
{
"query": "Please summarize in detail Monday, the 17th of April 2023, starting from 10:00am and highlight any key aspects, todos and so on. Leave out any information about FASD/FAS/PFAS.",
"extraction": {
"query": [
{
"rewrittenQuery": "Detailed summary of activities, key aspects, and tasks scheduled for April 17, 2023, starting from 10:00am excluding any data related to FASD, FAS or PFAS",
"questionType": [
"ACTIVITY_LOG"
],
"minDate": "2023-04-17T10:00:00",
"maxDate": "2023-04-17T23:59:59"
}
]
}
}
{
"query": "How was my day?",
"extraction": {
"query": [
{
"rewrittenQuery": "Summarize the user's activities and interactions for the date January 19, 2024",
"questionType": [
"ACTIVITY_LOG"
],
"minDate": "1/19/2024",
"maxDate": "1/19/2024"
}
]
}
}
105 changes: 105 additions & 0 deletions examples/query_expansions/run.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import readline from "readline"
import Instructor from "@/instructor"
import { OpenAI } from "openai"
import { z } from "zod"

const getSystem = () => {
const currentDate = new Date()
const formattedDate = currentDate.toLocaleDateString()
const time = currentDate.toLocaleTimeString()
const systemPrompt = `
You are a world class query understanding algorithm that is able to:

1. rewrite queries to be specific to the context, include additional terms that are relevant to the context, and remove terms that are not relevant to the context.
2. consider the date and relative and absolute time of the query and the context.
3. When the query is the search query should be fully qualified, and contain all themes, entities, and keywords that are relevant to the context.

Tips:
* PERSONAL_DATA: The query is asking for personal data. The answer should be a list of personal data.
* INTERNET: The query is asking for information on the internet or latest information that may not be available in the context.
* TRANSCRIPTS: The query is asking for a transcript of a conversation or a meeting.

The current date is ${formattedDate} and the current time is ${time}.
`

return systemPrompt
}

const ExtractionValuesSchema = z.object({
rewrittenQuery: z
.string()
.describe(
"Rewrite the query to be specific to the context. This will be used to do semantic search, so make sure it is specific to the context."
),
questionType: z.array(
z
.string()
.describe(
"The type of question that is being asked. This will be used to determine the type of answer that is expected. MUST be one of the following: PERSONAL_DATA, INTERNET, TRANSCRIPTS"
)
),
minDate: z
.string()
.optional()
.describe(
"The earliest date of the context that is relevant to the query, null if the query is not time sensitive"
),
maxDate: z
.string()
.optional()
.describe(
"The latest date of the context that is relevant to the query, null if the query is not time sensitive"
),
keywords: z.array(z.string()).describe("Keywords that are relevant to a Full Text Search Engine")
})

const oai = new OpenAI({
apiKey: process.env.OPENAI_KEY
})

const client = Instructor({
client: oai,
mode: "TOOLS"
})

type Extraction = Partial<z.infer<typeof ExtractionValuesSchema>>

const runExtraction = async (query: string) => {
const systemPrompt = getSystem()

let extraction: Extraction = {}
const extractionStream = await client.chat.completions.create({
messages: [
{ role: "system", content: systemPrompt },
{ role: "user", content: query }
],
model: "gpt-4",
response_model: {
schema: ExtractionValuesSchema,
name: "value_extraction"
},
stream: true,
seed: 1
})

for await (const result of extractionStream) {
try {
extraction = result
console.clear()
console.log(extraction)
} catch (e) {
console.log(e)
break
}
}
}

const rl = readline.createInterface({
input: process.stdin,
output: process.stdout
})

rl.question("Enter your query: ", query => {
runExtraction(query)
rl.close()
})
Loading
Loading