From c885890e0d86ecde83448436c257035624dc397a Mon Sep 17 00:00:00 2001 From: njk112 Date: Tue, 2 Jan 2024 23:10:56 +0000 Subject: [PATCH 1/6] feat: Implement SPAM Classification using OpenAI and Zod - Defined an enum for SPAM and NOT_SPAM categories. - Utilized Zod for schema validation of classification responses. - Developed function to classify texts as SPAM or NOT_SPAM. - Added assertion to validate the classification label against expected results. - Based on the structure and examples from jxnl's instructor library. This commit introduces a new functionality that leverages OpenAI's language model to classify a given text as SPAM or NOT_SPAM. The implementation employs the Zod library for schema validation, ensuring that the responses adhere to our predefined structure. An assertion check further validates the accuracy of the classification. References: - [jxnl/instructor: Simple Prediction Example](https://github.com/jxnl/instructor/tree/main/examples/classification/simple_prediction.py) - [Instructor Library Documentation](https://jxnl.github.io/instructor/examples/classification/) --- .../classification/simple_prediction/index.ts | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 examples/classification/simple_prediction/index.ts diff --git a/examples/classification/simple_prediction/index.ts b/examples/classification/simple_prediction/index.ts new file mode 100644 index 00000000..e1fec7ab --- /dev/null +++ b/examples/classification/simple_prediction/index.ts @@ -0,0 +1,50 @@ +import assert from "assert" +import Instructor from "@/instructor" +import OpenAI from "openai" +import { z } from "zod" + +enum CLASIFICATION_LABELS { + "SPAM" = "SPAM", + "NOT_SPAM" = "NOT_SPAM" +} + +const SimgpleClasificationSchema = z.object({ + class_label: z.nativeEnum(CLASIFICATION_LABELS) +}) + +type SimpleClasification = z.infer + +const oai = new OpenAI({ + apiKey: process.env.OPENAI_API_KEY ?? undefined, + organization: process.env.OPENAI_ORG_ID ?? undefined +}) + +const client = Instructor({ + client: oai, + mode: "FUNCTIONS" +}) + +const createClasification = async (data: string): Promise => { + const clasification = (await client.chat.completions.create({ + messages: [{ role: "user", content: `"Classify the following text: ${data}` }], + model: "gpt-3.5-turbo", + //@ts-expect-error same as above + response_model: SimgpleClasificationSchema, + max_retries: 3 + })) as SimpleClasification + + return clasification || undefined +} + +const clasification = await createClasification( + "Hello there I'm a nigerian prince and I want to give you money" +) + +console.log({ clasification }) + +assert( + clasification.class_label === CLASIFICATION_LABELS.SPAM, + `Expected ${clasification.class_label} to be ${CLASIFICATION_LABELS.SPAM}` +) + +console.log(clasification) From dab4f1b0c9a860b87213205a0a1bc50f3f862333 Mon Sep 17 00:00:00 2001 From: njk112 Date: Tue, 2 Jan 2024 23:12:36 +0000 Subject: [PATCH 2/6] feat: Implement Multi-Label Classification for Support Tickets - Added enums for multi-label classification: BILLING, GENERAL_QUERY, HARDWARE. - Implemented MultiClasificationSchema using Zod for response validation. - Created function for classifying support ticket text. - Included an assertion to validate the presence of specific labels in the classification. This commit introduces a new functionality for classifying support tickets into multiple categories using OpenAI's GPT-3.5 model. The categories include billing, general queries, and hardware issues. The Zod library is used to ensure the structured validation of classification responses. The assertion validates that the predicted labels include both BILLING and HARDWARE for the given test case. Note: This implementation expects the classification to identify multiple aspects of a support ticket, reflecting its complexity and multi-faceted nature. References: - [jxnl/instructor: Multi Prediction Example](https://github.com/jxnl/instructor/blob/main/examples/classification/multi_prediction.py) - [Instructor Library Documentation - Classifying Text](https://jxnl.github.io/instructor/examples/classification/#classifying-text) --- .../classification/multi_prediction/index.ts | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 examples/classification/multi_prediction/index.ts diff --git a/examples/classification/multi_prediction/index.ts b/examples/classification/multi_prediction/index.ts new file mode 100644 index 00000000..cdc9b6b1 --- /dev/null +++ b/examples/classification/multi_prediction/index.ts @@ -0,0 +1,50 @@ +import assert from "assert" +import Instructor from "@/instructor" +import OpenAI from "openai" +import { z } from "zod" + +enum MULTI_CLASIFICATION_LABELS { + "BILLING" = "billing", + "GENERAL_QUERY" = "general_query", + "HARDWARE" = "hardware" +} + +const MultiClasificationSchema = z.object({ + predicted_labels: z.array(z.nativeEnum(MULTI_CLASIFICATION_LABELS)) +}) + +type MultiClasification = z.infer + +const oai = new OpenAI({ + apiKey: process.env.OPENAI_API_KEY ?? undefined, + organization: process.env.OPENAI_ORG_ID ?? undefined +}) + +const client = Instructor({ + client: oai, + mode: "FUNCTIONS" +}) + +const createClasification = async (data: string): Promise => { + const clasification = (await client.chat.completions.create({ + messages: [{ role: "user", content: `"Classify the following support ticket: ${data}` }], + model: "gpt-3.5-turbo", + //@ts-expect-error same as above + response_model: MultiClasificationSchema, + max_retries: 3 + })) as MultiClasification + + return clasification || undefined +} + +const clasification = await createClasification( + "My account is locked and I can't access my billing info. Phone is also broken" +) + +console.log({ clasification }) + +assert( + clasification.predicted_labels.includes(MULTI_CLASIFICATION_LABELS.BILLING) && + clasification.predicted_labels.includes(MULTI_CLASIFICATION_LABELS.HARDWARE), + `Expected ${clasification.predicted_labels} to be include ${MULTI_CLASIFICATION_LABELS.BILLING} and ${MULTI_CLASIFICATION_LABELS.HARDWARE}` +) From 9070a6e0343882402e2690d80a489e274a424148 Mon Sep 17 00:00:00 2001 From: njk112 Date: Tue, 2 Jan 2024 23:18:53 +0000 Subject: [PATCH 3/6] typos --- examples/classification/simple_prediction/index.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/classification/simple_prediction/index.ts b/examples/classification/simple_prediction/index.ts index e1fec7ab..3963e80a 100644 --- a/examples/classification/simple_prediction/index.ts +++ b/examples/classification/simple_prediction/index.ts @@ -8,11 +8,11 @@ enum CLASIFICATION_LABELS { "NOT_SPAM" = "NOT_SPAM" } -const SimgpleClasificationSchema = z.object({ +const SimpleClasificationSchema = z.object({ class_label: z.nativeEnum(CLASIFICATION_LABELS) }) -type SimpleClasification = z.infer +type SimpleClasification = z.infer const oai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY ?? undefined, From c3976a0cd3029fcec4d895a74837f9a486df9f64 Mon Sep 17 00:00:00 2001 From: njk112 Date: Tue, 2 Jan 2024 23:19:55 +0000 Subject: [PATCH 4/6] assertion typos --- examples/classification/multi_prediction/index.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/classification/multi_prediction/index.ts b/examples/classification/multi_prediction/index.ts index cdc9b6b1..80a967cb 100644 --- a/examples/classification/multi_prediction/index.ts +++ b/examples/classification/multi_prediction/index.ts @@ -46,5 +46,5 @@ console.log({ clasification }) assert( clasification.predicted_labels.includes(MULTI_CLASIFICATION_LABELS.BILLING) && clasification.predicted_labels.includes(MULTI_CLASIFICATION_LABELS.HARDWARE), - `Expected ${clasification.predicted_labels} to be include ${MULTI_CLASIFICATION_LABELS.BILLING} and ${MULTI_CLASIFICATION_LABELS.HARDWARE}` + `Expected ${clasification.predicted_labels} to include ${MULTI_CLASIFICATION_LABELS.BILLING} and ${MULTI_CLASIFICATION_LABELS.HARDWARE}` ) From b14c82fb4e7f93e850d1d58a9cfa9535bcaf68a0 Mon Sep 17 00:00:00 2001 From: njk112 Date: Tue, 2 Jan 2024 23:53:06 +0000 Subject: [PATCH 5/6] adding res structure in comments --- examples/classification/multi_prediction/index.ts | 6 +++--- examples/classification/simple_prediction/index.ts | 10 ++++------ 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/examples/classification/multi_prediction/index.ts b/examples/classification/multi_prediction/index.ts index 80a967cb..2dfdd275 100644 --- a/examples/classification/multi_prediction/index.ts +++ b/examples/classification/multi_prediction/index.ts @@ -26,13 +26,12 @@ const client = Instructor({ }) const createClasification = async (data: string): Promise => { - const clasification = (await client.chat.completions.create({ + const clasification: MultiClasification = await client.chat.completions.create({ messages: [{ role: "user", content: `"Classify the following support ticket: ${data}` }], model: "gpt-3.5-turbo", - //@ts-expect-error same as above response_model: MultiClasificationSchema, max_retries: 3 - })) as MultiClasification + }) return clasification || undefined } @@ -40,6 +39,7 @@ const createClasification = async (data: string): Promise => { - const clasification = (await client.chat.completions.create({ + const clasification: SimpleClasification = await client.chat.completions.create({ messages: [{ role: "user", content: `"Classify the following text: ${data}` }], model: "gpt-3.5-turbo", - //@ts-expect-error same as above - response_model: SimgpleClasificationSchema, + response_model: SimpleClasificationSchema, max_retries: 3 - })) as SimpleClasification + }) return clasification || undefined } @@ -39,6 +38,7 @@ const createClasification = async (data: string): Promise Date: Tue, 2 Jan 2024 23:58:58 +0000 Subject: [PATCH 6/6] typos --- .../classification/multi_prediction/index.ts | 26 +++++++++---------- .../classification/simple_prediction/index.ts | 24 ++++++++--------- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/examples/classification/multi_prediction/index.ts b/examples/classification/multi_prediction/index.ts index 2dfdd275..fabbbaee 100644 --- a/examples/classification/multi_prediction/index.ts +++ b/examples/classification/multi_prediction/index.ts @@ -3,17 +3,17 @@ import Instructor from "@/instructor" import OpenAI from "openai" import { z } from "zod" -enum MULTI_CLASIFICATION_LABELS { +enum MULTI_CLASSIFICATION_LABELS { "BILLING" = "billing", "GENERAL_QUERY" = "general_query", "HARDWARE" = "hardware" } -const MultiClasificationSchema = z.object({ - predicted_labels: z.array(z.nativeEnum(MULTI_CLASIFICATION_LABELS)) +const MultiClassificationSchema = z.object({ + predicted_labels: z.array(z.nativeEnum(MULTI_CLASSIFICATION_LABELS)) }) -type MultiClasification = z.infer +type MultiClassification = z.infer const oai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY ?? undefined, @@ -25,26 +25,26 @@ const client = Instructor({ mode: "FUNCTIONS" }) -const createClasification = async (data: string): Promise => { - const clasification: MultiClasification = await client.chat.completions.create({ +const createClassification = async (data: string): Promise => { + const classification: MultiClassification = await client.chat.completions.create({ messages: [{ role: "user", content: `"Classify the following support ticket: ${data}` }], model: "gpt-3.5-turbo", - response_model: MultiClasificationSchema, + response_model: MultiClassificationSchema, max_retries: 3 }) - return clasification || undefined + return classification || undefined } -const clasification = await createClasification( +const classification = await createClassification( "My account is locked and I can't access my billing info. Phone is also broken" ) // OUTPUT: { predicted_labels: [ 'billing', 'hardware' ] } -console.log({ clasification }) +console.log({ classification }) assert( - clasification.predicted_labels.includes(MULTI_CLASIFICATION_LABELS.BILLING) && - clasification.predicted_labels.includes(MULTI_CLASIFICATION_LABELS.HARDWARE), - `Expected ${clasification.predicted_labels} to include ${MULTI_CLASIFICATION_LABELS.BILLING} and ${MULTI_CLASIFICATION_LABELS.HARDWARE}` + classification.predicted_labels.includes(MULTI_CLASSIFICATION_LABELS.BILLING) && + classification.predicted_labels.includes(MULTI_CLASSIFICATION_LABELS.HARDWARE), + `Expected ${classification.predicted_labels} to include ${MULTI_CLASSIFICATION_LABELS.BILLING} and ${MULTI_CLASSIFICATION_LABELS.HARDWARE}` ) diff --git a/examples/classification/simple_prediction/index.ts b/examples/classification/simple_prediction/index.ts index 4fba6b23..4a0b13de 100644 --- a/examples/classification/simple_prediction/index.ts +++ b/examples/classification/simple_prediction/index.ts @@ -3,16 +3,16 @@ import Instructor from "@/instructor" import OpenAI from "openai" import { z } from "zod" -enum CLASIFICATION_LABELS { +enum CLASSIFICATION_LABELS { "SPAM" = "SPAM", "NOT_SPAM" = "NOT_SPAM" } -const SimpleClasificationSchema = z.object({ - class_label: z.nativeEnum(CLASIFICATION_LABELS) +const SimpleClassificationSchema = z.object({ + class_label: z.nativeEnum(CLASSIFICATION_LABELS) }) -type SimpleClasification = z.infer +type SimpleClassification = z.infer const oai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY ?? undefined, @@ -24,25 +24,25 @@ const client = Instructor({ mode: "FUNCTIONS" }) -const createClasification = async (data: string): Promise => { - const clasification: SimpleClasification = await client.chat.completions.create({ +const createClassification = async (data: string): Promise => { + const classification: SimpleClassification = await client.chat.completions.create({ messages: [{ role: "user", content: `"Classify the following text: ${data}` }], model: "gpt-3.5-turbo", - response_model: SimpleClasificationSchema, + response_model: SimpleClassificationSchema, max_retries: 3 }) - return clasification || undefined + return classification || undefined } -const clasification = await createClasification( +const classification = await createClassification( "Hello there I'm a nigerian prince and I want to give you money" ) // OUTPUT: { class_label: 'SPAM' } -console.log({ clasification }) +console.log({ classification }) assert( - clasification.class_label === CLASIFICATION_LABELS.SPAM, - `Expected ${clasification.class_label} to be ${CLASIFICATION_LABELS.SPAM}` + classification.class_label === CLASSIFICATION_LABELS.SPAM, + `Expected ${classification.class_label} to be ${CLASSIFICATION_LABELS.SPAM}` )