From 392e51a5a45d85a9f8f89a1027bbbf90e4a883b6 Mon Sep 17 00:00:00 2001 From: Jason Liu Date: Fri, 19 Jan 2024 19:04:30 -0500 Subject: [PATCH 1/5] bump example --- examples/query_expansions/run.ts | 93 ++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 examples/query_expansions/run.ts diff --git a/examples/query_expansions/run.ts b/examples/query_expansions/run.ts new file mode 100644 index 00000000..58b8e54a --- /dev/null +++ b/examples/query_expansions/run.ts @@ -0,0 +1,93 @@ +import Instructor from "@/instructor" +import readline from 'readline'; +import { OpenAI } from "openai"; + +import { z } from "zod" + + +const getSystem = () => { + const currentDate = new Date(); + const formattedDate = currentDate.toLocaleDateString(); + const time = currentDate.toLocaleTimeString(); + const systemPrompt = ` + You are a world class query understanding algorithm that is able to: + + 1. rewrite queries to be specific to the context, include additional terms that are relevant to the context, and remove terms that are not relevant to the context. + 2. consider the date and relative and absolute time of the query and the context. + 3. When the query is the search query should be fully qualified, and contain all themes, entities, and keywords that are relevant to the context. + + Tips: + * PERSONAL_DATA: The query is asking for personal data. The answer should be a list of personal data. + * INTERNET: The query is asking for information on the internet or latest information that may not be available in the context. + * TRANSCRIPTS: The query is asking for a transcript of a conversation or a meeting. + + The current date is ${formattedDate} and the current time is ${time}. + `; + + return systemPrompt; +}; + + +const ExtractionValuesSchema = z.object({ + rewrittenQuery: z.string().describe("Rewrite the query to be specific to the context. This will be used to do semantic search, so make sure it is specific to the context."), + questionType: z.array(z.string().describe("The type of question that is being asked. This will be used to determine the type of answer that is expected. MUST be one of the following: PERSONAL_DATA, INTERNET, TRANSCRIPTS")), + minDate: z.string().optional().describe("The earliest date of the context that is relevant to the query, null if the query is not time sensitive"), + maxDate: z.string().optional().describe("The latest date of the context that is relevant to the query, null if the query is not time sensitive"), + keywords: z.array(z.string()).describe("Keywords that are relevant to a Full Text Search Engine"), +}) + +const oai = new OpenAI({ + apiKey: process.env.OPENAI_KEY, +}); + + +const client = Instructor({ + client: oai, + mode: "TOOLS" +}) + +type Extraction = Partial> + + +const runExtraction = async (query: string) => { + const systemPrompt = getSystem(); + + let extraction: Extraction = {} + const extractionStream = await client.chat.completions.create({ + messages: [ + { "role": "system", content: systemPrompt }, + { role: "user", content: query }], + model: "gpt-4", + response_model: { + schema: ExtractionValuesSchema, + name: "value_extraction" + }, + streamOutputType: "READABLE", + stream: true, + seed: 1 + }) + + + for await (const result of extractionStream) { + try { + extraction = result + console.clear() + console.table(extraction) + } catch (e) { + console.log(e) + break + } + } + +} + + +const rl = readline.createInterface({ + input: process.stdin, + output: process.stdout +}); + +rl.question('Enter your query: ', (query) => { + runExtraction(query); + rl.close(); +}); From 9dc6328787d1b7d210da125b9772fac153938a7f Mon Sep 17 00:00:00 2001 From: Jason Liu Date: Fri, 19 Jan 2024 20:43:25 -0500 Subject: [PATCH 2/5] bump --- examples/query_expansions/run.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/query_expansions/run.ts b/examples/query_expansions/run.ts index 58b8e54a..38ff9c6c 100644 --- a/examples/query_expansions/run.ts +++ b/examples/query_expansions/run.ts @@ -62,7 +62,6 @@ const runExtraction = async (query: string) => { schema: ExtractionValuesSchema, name: "value_extraction" }, - streamOutputType: "READABLE", stream: true, seed: 1 }) From ef173a1401003c2257a82ee72388f180bb563a2b Mon Sep 17 00:00:00 2001 From: Jason Liu Date: Fri, 19 Jan 2024 20:44:18 -0500 Subject: [PATCH 3/5] bump --- examples/query_expansions/run.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/query_expansions/run.ts b/examples/query_expansions/run.ts index 38ff9c6c..2e73058d 100644 --- a/examples/query_expansions/run.ts +++ b/examples/query_expansions/run.ts @@ -71,7 +71,7 @@ const runExtraction = async (query: string) => { try { extraction = result console.clear() - console.table(extraction) + console.log(extraction) } catch (e) { console.log(e) break From 492e9ac181f0d3b4869822ad8e2acaea841cdfa2 Mon Sep 17 00:00:00 2001 From: Jason Liu Date: Fri, 19 Jan 2024 21:16:32 -0500 Subject: [PATCH 4/5] bump --- examples/query_expansions/examples.jsonlines | 192 +++++++++++++++++++ examples/query_expansions/run_sync.ts | 137 +++++++++++++ 2 files changed, 329 insertions(+) create mode 100644 examples/query_expansions/examples.jsonlines create mode 100644 examples/query_expansions/run_sync.ts diff --git a/examples/query_expansions/examples.jsonlines b/examples/query_expansions/examples.jsonlines new file mode 100644 index 00000000..3f8b1d08 --- /dev/null +++ b/examples/query_expansions/examples.jsonlines @@ -0,0 +1,192 @@ +{ + "query": "What is 10 + 10?", + "extraction": { + "message": "20", + "query": [] + } +} +{ + "query": "Please summarize the action items from today's meeting", + "extraction": { + "query": [ + { + "rewrittenQuery": "summarize action items from meeting on 1/19/2024", + "questionType": [ + "TRANSCRIPTS", + "ACTIVITY_LOG" + ], + "minDate": "1/19/2024", + "maxDate": "1/19/2024" + } + ] + } +} +{ + "query": "Give a summary of the daily standup today with Bart. The spoken language is Dutch", + "extraction": { + "query": [ + { + "rewrittenQuery": "Summary of the daily standup meeting with Bart on 1/19/2024 in Dutch language", + "questionType": [ + "TRANSCRIPTS" + ], + "minDate": "1/19/2024", + "maxDate": "1/19/2024" + } + ] + } +} +{ + "query": "What did I do last week? Write at least 10 bullet points", + "extraction": { + "query": [ + { + "rewrittenQuery": "Activity log for the user between 1/12/2024 and 1/19/2024", + "questionType": [ + "ACTIVITY_LOG" + ], + "minDate": "1/12/2024", + "maxDate": "1/19/2024" + } + ] + } +} +{ + "query": "I recently looked up a Royal Caribbean cruise. What price did it show me?", + "extraction": { + "query": [ + { + "rewrittenQuery": "What was the price of the Royal Caribbean cruise when recently searched?", + "questionType": [ + "ACTIVITY_LOG" + ], + "minDate": "1/9/2024", + "maxDate": "1/19/2024" + } + ] + } +} +{ + "query": "what was the name of the standup tool i saw recently", + "extraction": { + "query": [ + { + "rewrittenQuery": "What is the name of the latest standup software or tool that I recently viewed or explored?", + "questionType": [ + "PERSONAL_DATA", + "ACTIVITY_LOG" + ], + "minDate": "01/01/2024", + "maxDate": "01/19/2024" + } + ] + } +} +{ + "query": "Can you summarize the transcript from 11:00AM to present?", + "extraction": { + "query": [ + { + "rewrittenQuery": "show transcript from 11:00AM to 9:14:57 PM on 1/19/2024", + "questionType": [ + "TRANSCRIPTS" + ], + "minDate": "1/19/2024 11:00:00 AM", + "maxDate": "1/19/2024 9:14:57 PM" + } + ] + } +} +{ + "query": "What was I doing last week?", + "extraction": { + "query": [ + { + "rewrittenQuery": "Activity log for the user from the date range of 1/12/2024 to 1/18/2024", + "questionType": [ + "ACTIVITY_LOG" + ], + "minDate": "1/12/2024", + "maxDate": "1/18/2024" + } + ] + } +} +{ + "query": "Summarize the meeting with Pinterest this morning", + "extraction": { + "query": [ + { + "rewrittenQuery": "Summary of the morning meeting with Pinterest on 1/19/2024", + "questionType": [ + "TRANSCRIPTS", + "ACTIVITY_LOG" + ], + "minDate": "1/19/2024 12:00:00 AM", + "maxDate": "1/19/2024 11:59:59 AM" + } + ] + } +} +{ + "query": "Did he mention anything else about the terminal in the past 2 weeks? Please write 3 action items in the form of bullet points", + "extraction": { + "message": "Please create a summary of the mentions in the form of 3 action items in bullet point format once you find the relevant data", + "query": [ + { + "rewrittenQuery": "Reports, conversation transcripts, notes or audio visual records where he has mentioned the terminal within the past 2 weeks", + "questionType": [ + "TRANSCRIPTS", + "ACTIVITY_LOG" + ], + "minDate": "1/5/2024", + "maxDate": "1/19/2024" + } + ] + } +} +{ + "query": "Can you summarize the transcript from 11:00AM to present?", + "extraction": { + "query": [ + { + "rewrittenQuery": "show transcript from 11:00AM to 9:14:57 PM on 1/19/2024", + "questionType": [ + "TRANSCRIPTS" + ], + "minDate": "1/19/2024 11:00:00 AM", + "maxDate": "1/19/2024 9:14:57 PM" + } + ] + } +} +{ + "query": "Please summarize in detail Monday, the 17th of April 2023, starting from 10:00am and highlight any key aspects, todos and so on. Leave out any information about FASD/FAS/PFAS.", + "extraction": { + "query": [ + { + "rewrittenQuery": "Detailed summary of activities, key aspects, and tasks scheduled for April 17, 2023, starting from 10:00am excluding any data related to FASD, FAS or PFAS", + "questionType": [ + "ACTIVITY_LOG" + ], + "minDate": "2023-04-17T10:00:00", + "maxDate": "2023-04-17T23:59:59" + } + ] + } +} +{ + "query": "How was my day?", + "extraction": { + "query": [ + { + "rewrittenQuery": "Summarize the user's activities and interactions for the date January 19, 2024", + "questionType": [ + "ACTIVITY_LOG" + ], + "minDate": "1/19/2024", + "maxDate": "1/19/2024" + } + ] + } +} diff --git a/examples/query_expansions/run_sync.ts b/examples/query_expansions/run_sync.ts new file mode 100644 index 00000000..873f6c0a --- /dev/null +++ b/examples/query_expansions/run_sync.ts @@ -0,0 +1,137 @@ +import Instructor from "@/instructor" +import readline from 'readline'; +import { OpenAI } from "openai"; + +import { z } from "zod" + + +const getSystem = () => { + const currentDate = new Date(); + const formattedDate = currentDate.toLocaleDateString(); + const time = currentDate.toLocaleTimeString(); + const systemPrompt = ` + You are a world class query understanding algorithm that is able to: + + 1. rewrite queries to be specific to the context, include additional terms that are relevant to the context, and remove terms that are not relevant to the context. + 2. consider the date and relative and absolute time of the query and the context. + 3. When the query is the search query should be fully qualified, and contain all themes, entities, and keywords that are relevant to the context. + + If you are able to use a search, lead the messages blank. + + Tips for rewriting queries: + * These rewritten queries will be used in a semantic search index. So include alternative meanings and assume that the query should match the embeddings of the answer. + + Tips for tagging the question type: + * PERSONAL_DATA: The query is asking for personal data. The answer should be a list of personal data. + * INTERNET: The query is asking for information on the internet or latest information that may not be available in the context. + * TRANSCRIPTS: The query is asking for a transcript of a conversation or a meeting. + * ACTIVITY LOG: The query is asking for a log of activities or summaries of activities. + + Tip for additional instructions: + * If there are instructions on how to process the response given the search results include them here. For example, if you want to summarize a transcript, you should first search for the transcript and the additional instructions should include the instructions to summarize. + + The current date is ${formattedDate} and the current time is ${time}. + `; + + return systemPrompt; +}; + + +const SearchQuery = z.object({ + rewrittenQuery: z.string().optional().describe("Rewrite the query to be specific to the context. This will be used to do semantic search, so make sure it is specific to the context."), + questionType: z.array(z.string().describe("The type of question that is being asked. This will be used to determine the type of answer that is expected. MUST be one of the following: PERSONAL_DATA, INTERNET, TRANSCRIPTS")), + minDate: z.string().optional().describe("YYYY/MM/DD Format, The earliest date of the context that is relevant to the query, null if the query is not time sensitive"), + maxDate: z.string().optional().describe("YYYY/MM/DD Format, The latest date of the context that is relevant to the query, null if the query is not time sensitive"), + // keywords: z.array(z.string()).describe("Keywords that are relevant to a Full Text Search Engine"), +}) + +const Response = z.object({ + message: z.string().optional().describe("The response to the message, if you need to make a search query, provide it below."), + query: z.array(SearchQuery.describe("If you need additional information, please provide it here. If you do not need additional information, please leave this blank.")), + // additionalInstructions: z.string().optional().describe("If you need additional information, please provide it here. If you do not need additional information, please leave this blank."), +}) + + +const oai = new OpenAI({ + apiKey: process.env.OPENAI_KEY, +}); + + +const client = Instructor({ + client: oai, + mode: "TOOLS" +}) + +type Extraction = Partial> + + +const runExtractionStream = async (query: string) => { + const systemPrompt = getSystem(); + + let extraction: Extraction = {} + const extractionStream = await client.chat.completions.create({ + messages: [ + { "role": "system", content: systemPrompt }, + { role: "user", content: query }], + model: "gpt-4", + response_model: { + schema: SearchQuery, + name: "value_extraction" + }, + stream: true, + seed: 1 + }) + + + for await (const result of extractionStream) { + try { + extraction = result + console.clear() + console.log(extraction) + } catch (e) { + console.log(e) + break + } + } + +} + + +const runExtraction = async (query: string) => { + const systemPrompt = getSystem(); + + const extraction = await client.chat.completions.create({ + messages: [ + { "role": "system", content: systemPrompt }, + { role: "user", content: query }], + model: "gpt-4", + response_model: { + schema: Response, + name: "Respond" + }, + seed: 1 + }) + + console.log(JSON.stringify({ query, extraction }, null, 2)); +} + +const failingQuestions = [ + "What did I do last week? Write at least 10 bullet points", + "Give a summary of the daily standup today with Bart. The spoken language is Dutch", + "Please summarize the action items from today's meeting", + "How was my day?", + "Did he mention anything else about the terminal in the past 2 weeks? Please write 3 action items in the form of bullet points", + "Summarize the meeting with Pinterest this morning", + "Can you summarize the transcript from 11:00AM to present?", + "What was I doing last week?", + "What is 10 + 10?", + "I recently looked up a Royal Caribbean cruise. What price did it show me?", + "Please summarize in detail Monday, the 17th of April 2023, starting from 10:00am and highlight any key aspects, todos and so on. Leave out any information about FASD/FAS/PFAS.", + "what was the name of the standup tool i saw recently", + "Can you summarize the transcript from 11:00AM to present?", +]; + + +failingQuestions.forEach((question) => { + runExtraction(question) +}) \ No newline at end of file From a14f35ce4ed9fbbae052343f13738eb108c73c68 Mon Sep 17 00:00:00 2001 From: Dimitri Kennedy Date: Sat, 20 Apr 2024 13:17:57 -0400 Subject: [PATCH 5/5] run lint fix --- examples/query_expansions/run.ts | 75 ++++++++++++--------- examples/query_expansions/run_sync.ts | 95 ++++++++++++++++----------- 2 files changed, 101 insertions(+), 69 deletions(-) diff --git a/examples/query_expansions/run.ts b/examples/query_expansions/run.ts index 2e73058d..838ad195 100644 --- a/examples/query_expansions/run.ts +++ b/examples/query_expansions/run.ts @@ -1,14 +1,12 @@ +import readline from "readline" import Instructor from "@/instructor" -import readline from 'readline'; -import { OpenAI } from "openai"; - +import { OpenAI } from "openai" import { z } from "zod" - const getSystem = () => { - const currentDate = new Date(); - const formattedDate = currentDate.toLocaleDateString(); - const time = currentDate.toLocaleTimeString(); + const currentDate = new Date() + const formattedDate = currentDate.toLocaleDateString() + const time = currentDate.toLocaleTimeString() const systemPrompt = ` You are a world class query understanding algorithm that is able to: @@ -22,24 +20,42 @@ const getSystem = () => { * TRANSCRIPTS: The query is asking for a transcript of a conversation or a meeting. The current date is ${formattedDate} and the current time is ${time}. - `; - - return systemPrompt; -}; + ` + return systemPrompt +} const ExtractionValuesSchema = z.object({ - rewrittenQuery: z.string().describe("Rewrite the query to be specific to the context. This will be used to do semantic search, so make sure it is specific to the context."), - questionType: z.array(z.string().describe("The type of question that is being asked. This will be used to determine the type of answer that is expected. MUST be one of the following: PERSONAL_DATA, INTERNET, TRANSCRIPTS")), - minDate: z.string().optional().describe("The earliest date of the context that is relevant to the query, null if the query is not time sensitive"), - maxDate: z.string().optional().describe("The latest date of the context that is relevant to the query, null if the query is not time sensitive"), - keywords: z.array(z.string()).describe("Keywords that are relevant to a Full Text Search Engine"), + rewrittenQuery: z + .string() + .describe( + "Rewrite the query to be specific to the context. This will be used to do semantic search, so make sure it is specific to the context." + ), + questionType: z.array( + z + .string() + .describe( + "The type of question that is being asked. This will be used to determine the type of answer that is expected. MUST be one of the following: PERSONAL_DATA, INTERNET, TRANSCRIPTS" + ) + ), + minDate: z + .string() + .optional() + .describe( + "The earliest date of the context that is relevant to the query, null if the query is not time sensitive" + ), + maxDate: z + .string() + .optional() + .describe( + "The latest date of the context that is relevant to the query, null if the query is not time sensitive" + ), + keywords: z.array(z.string()).describe("Keywords that are relevant to a Full Text Search Engine") }) const oai = new OpenAI({ - apiKey: process.env.OPENAI_KEY, -}); - + apiKey: process.env.OPENAI_KEY +}) const client = Instructor({ client: oai, @@ -48,15 +64,15 @@ const client = Instructor({ type Extraction = Partial> - const runExtraction = async (query: string) => { - const systemPrompt = getSystem(); + const systemPrompt = getSystem() let extraction: Extraction = {} const extractionStream = await client.chat.completions.create({ messages: [ - { "role": "system", content: systemPrompt }, - { role: "user", content: query }], + { role: "system", content: systemPrompt }, + { role: "user", content: query } + ], model: "gpt-4", response_model: { schema: ExtractionValuesSchema, @@ -66,7 +82,6 @@ const runExtraction = async (query: string) => { seed: 1 }) - for await (const result of extractionStream) { try { extraction = result @@ -77,16 +92,14 @@ const runExtraction = async (query: string) => { break } } - } - const rl = readline.createInterface({ input: process.stdin, output: process.stdout -}); +}) -rl.question('Enter your query: ', (query) => { - runExtraction(query); - rl.close(); -}); +rl.question("Enter your query: ", query => { + runExtraction(query) + rl.close() +}) diff --git a/examples/query_expansions/run_sync.ts b/examples/query_expansions/run_sync.ts index 873f6c0a..466f646b 100644 --- a/examples/query_expansions/run_sync.ts +++ b/examples/query_expansions/run_sync.ts @@ -1,14 +1,11 @@ import Instructor from "@/instructor" -import readline from 'readline'; -import { OpenAI } from "openai"; - +import { OpenAI } from "openai" import { z } from "zod" - const getSystem = () => { - const currentDate = new Date(); - const formattedDate = currentDate.toLocaleDateString(); - const time = currentDate.toLocaleTimeString(); + const currentDate = new Date() + const formattedDate = currentDate.toLocaleDateString() + const time = currentDate.toLocaleTimeString() const systemPrompt = ` You are a world class query understanding algorithm that is able to: @@ -31,31 +28,56 @@ const getSystem = () => { * If there are instructions on how to process the response given the search results include them here. For example, if you want to summarize a transcript, you should first search for the transcript and the additional instructions should include the instructions to summarize. The current date is ${formattedDate} and the current time is ${time}. - `; - - return systemPrompt; -}; + ` + return systemPrompt +} const SearchQuery = z.object({ - rewrittenQuery: z.string().optional().describe("Rewrite the query to be specific to the context. This will be used to do semantic search, so make sure it is specific to the context."), - questionType: z.array(z.string().describe("The type of question that is being asked. This will be used to determine the type of answer that is expected. MUST be one of the following: PERSONAL_DATA, INTERNET, TRANSCRIPTS")), - minDate: z.string().optional().describe("YYYY/MM/DD Format, The earliest date of the context that is relevant to the query, null if the query is not time sensitive"), - maxDate: z.string().optional().describe("YYYY/MM/DD Format, The latest date of the context that is relevant to the query, null if the query is not time sensitive"), + rewrittenQuery: z + .string() + .optional() + .describe( + "Rewrite the query to be specific to the context. This will be used to do semantic search, so make sure it is specific to the context." + ), + questionType: z.array( + z + .string() + .describe( + "The type of question that is being asked. This will be used to determine the type of answer that is expected. MUST be one of the following: PERSONAL_DATA, INTERNET, TRANSCRIPTS" + ) + ), + minDate: z + .string() + .optional() + .describe( + "YYYY/MM/DD Format, The earliest date of the context that is relevant to the query, null if the query is not time sensitive" + ), + maxDate: z + .string() + .optional() + .describe( + "YYYY/MM/DD Format, The latest date of the context that is relevant to the query, null if the query is not time sensitive" + ) // keywords: z.array(z.string()).describe("Keywords that are relevant to a Full Text Search Engine"), }) const Response = z.object({ - message: z.string().optional().describe("The response to the message, if you need to make a search query, provide it below."), - query: z.array(SearchQuery.describe("If you need additional information, please provide it here. If you do not need additional information, please leave this blank.")), + message: z + .string() + .optional() + .describe("The response to the message, if you need to make a search query, provide it below."), + query: z.array( + SearchQuery.describe( + "If you need additional information, please provide it here. If you do not need additional information, please leave this blank." + ) + ) // additionalInstructions: z.string().optional().describe("If you need additional information, please provide it here. If you do not need additional information, please leave this blank."), }) - const oai = new OpenAI({ - apiKey: process.env.OPENAI_KEY, -}); - + apiKey: process.env.OPENAI_KEY +}) const client = Instructor({ client: oai, @@ -64,15 +86,15 @@ const client = Instructor({ type Extraction = Partial> - -const runExtractionStream = async (query: string) => { - const systemPrompt = getSystem(); +export const runExtractionStream = async (query: string) => { + const systemPrompt = getSystem() let extraction: Extraction = {} const extractionStream = await client.chat.completions.create({ messages: [ - { "role": "system", content: systemPrompt }, - { role: "user", content: query }], + { role: "system", content: systemPrompt }, + { role: "user", content: query } + ], model: "gpt-4", response_model: { schema: SearchQuery, @@ -82,7 +104,6 @@ const runExtractionStream = async (query: string) => { seed: 1 }) - for await (const result of extractionStream) { try { extraction = result @@ -93,17 +114,16 @@ const runExtractionStream = async (query: string) => { break } } - } - const runExtraction = async (query: string) => { - const systemPrompt = getSystem(); + const systemPrompt = getSystem() const extraction = await client.chat.completions.create({ messages: [ - { "role": "system", content: systemPrompt }, - { role: "user", content: query }], + { role: "system", content: systemPrompt }, + { role: "user", content: query } + ], model: "gpt-4", response_model: { schema: Response, @@ -112,7 +132,7 @@ const runExtraction = async (query: string) => { seed: 1 }) - console.log(JSON.stringify({ query, extraction }, null, 2)); + console.log(JSON.stringify({ query, extraction }, null, 2)) } const failingQuestions = [ @@ -128,10 +148,9 @@ const failingQuestions = [ "I recently looked up a Royal Caribbean cruise. What price did it show me?", "Please summarize in detail Monday, the 17th of April 2023, starting from 10:00am and highlight any key aspects, todos and so on. Leave out any information about FASD/FAS/PFAS.", "what was the name of the standup tool i saw recently", - "Can you summarize the transcript from 11:00AM to present?", -]; - + "Can you summarize the transcript from 11:00AM to present?" +] -failingQuestions.forEach((question) => { +failingQuestions.forEach(question => { runExtraction(question) -}) \ No newline at end of file +})