Implement DSL and documentation

instructor-ai · Jul 8, 2023 · 3498aa1 · 3498aa1
1 parent f4754fb
commit 3498aa1
Show file tree

Hide file tree

Showing 24 changed files with 647 additions and 145 deletions.
diff --git a/README.md b/README.md
@@ -1,8 +1,7 @@
 # Pydantic is all you need: An OpenAI Function Call Pydantic Integration Module
 
-
 We try to provides a powerful and efficient approach to output parsing when interacting with OpenAI's Function Call API. One that is framework agnostic and minimizes any dependencies. It leverages the data validation capabilities of the Pydantic library to handle output parsing in a more structured and reliable manner.
-If you have any feedback, leave an issue or hit me up on [twitter](https://twitter.com/jxnlco). 
+If you have any feedback, leave an issue or hit me up on [twitter](https://twitter.com/jxnlco).
 
 This repo also contains a range of examples I've used in experimetnation and in production and I welcome new contributions for different types of schemas.
 
@@ -128,49 +127,58 @@ class UserDetails(BaseModel):
 ```python
 from openai_function_call import OpenAISchema
 from openai_function_call.dsl import ChatCompletion, MultiTask, messages as m
+from openai_function_call.dsl.messages import system as s
 
 # Define a subtask you'd like to extract from then,
-# We'll use MultTask to easily map it to a List[Search] 
+# We'll use MultTask to easily map it to a List[Search]
 # so we can extract more than one
 class Search(OpenAISchema):
     id: int
     query: str
 
 tasks = (
     ChatCompletion(name="Acme Inc Email Segmentation", model="gpt-3.5-turbo-0613")
-    | m.ExpertSystem(task="Segment emails into search queries")
+    | s.Identity(identity="World class state of the art agent") # if no identity is provided, this is the default one
+    | s.Task(task="Segment emails into search queries")
+    | s.Style(style="Professional, clear and concise")
+    | s.Guidelines(guidelines=[
+        'You never swear',
+        'You are polite',
+        'You say please and thank you often.'
+    ])
+    | s.Tips(tips=[
+        "When unsure about the correct segmentation, try to think about the task as a whole",
+        "If acronyms are used expand them to their full form",
+        "Use multiple phrases to describe the same thing"]
+                  )
     | MultiTask(subtask_class=Search)
     | m.TaggedMessage(
         tag="email",
         content="Can you find the video I sent last week and also the post about dogs",
     )
-    | m.TipsMessage(
-        tips=[
-            "When unsure about the correct segmentation, try to think about the task as a whole",
-            "If acronyms are used expand them to their full form",
-            "Use multiple phrases to describe the same thing",
-        ]
-    )
     | m.ChainOfThought()
 )
 # Its important that this just builds you request,
 # all these | operators are overloaded and all we do is compile
 # it to the openai kwargs
+# Also note that the System components are combined sequentially
+# so the order matters!
 assert isinstance(tasks, ChatCompletion)
 pprint(tasks.kwargs, indent=3)
 """
 {
     "messages": [
         {
             "role": "system",
-            "content": "You are a world class, state of the art agent capable
-            of correctly completing the task: `Segment emails into search queries`"
+            "content": "You are a world class state of the art agent.\n\nYour purpose is to correctly complete this task:
+                        `Segment emails into search queries`.\n\nYour style when answering is professional, clear and concise\n\n
+                        These are the guidelines you consider when completing your task:\n\n* You never swear\n* You are polite\n* You say please and thank you often.\n\nHere are some tips to help you complete the task:\n\n* When unsure about the correct segmentation, try to think about the task as a whole\n* If acronyms are used expand them to their full form\n* Use multiple phrases to describe the same thing"
         },
+        ...
         {
             "role": "user",
             "content": "<email>Can you find the video I sent last week and also the post about dogs</email>"
         },
-        ...
         {
             "role": "assistant",
             "content": "Lets think step by step to get the correct answer:"
@@ -207,14 +215,13 @@ pprint(tasks.kwargs, indent=3)
     "max_tokens": 1000,
     "temperature": 0.1,
     "model": "gpt-3.5-turbo-0613"
-}
 """
 
 # Once we call .create we'll be returned with a multitask object that contains our list of task
 result = tasks.create()
 
 for task in result.tasks:
-    # We can now extract the list of tasks as we could normally 
+    # We can now extract the list of tasks as we could normally
     assert isinstance(task, Search)
 ```
 

diff --git a/docs/chat-completion.md b/docs/chat-completion.md
@@ -0,0 +1,18 @@
+# Using the Chatcompletion
+
+To get started with this api we must first instantiate a `ChatCompletion` object and build the api call
+by piping messages and functions to it.
+
+::: openai_function_call.dsl.completion
+
+## Messages Types
+
+The basis of a message is defined as a `dataclass`. However we provide helper functions and classes that provide additional functionality in the form of templates. 
+
+::: openai_function_call.dsl.messages.base
+
+## Helper Messages / Templates
+
+::: openai_function_call.dsl.messages.messages
+
+::: openai_function_call.dsl.messages.user
diff --git a/docs/multitask.md b/docs/multitask.md
@@ -0,0 +1,5 @@
+# MultiTask 
+
+We define a helper function `MultiTask` that dynamitcally creates a new schema that has a task attribute defined as a list of the task subclass, it including some prebuild prompts and allows us to avoid writing some extra code.
+
+::: openai_function_call.dsl.multitask
diff --git a/docs/openai_schema.md b/docs/openai_schema.md
@@ -55,4 +55,4 @@ class UserDetails(BaseModel):
 
 ## Code Reference
 
-::: openai_function_call
+::: openai_function_call.function_calls
diff --git a/docs/pipeline-example.md b/docs/pipeline-example.md
@@ -0,0 +1,126 @@
+# Using the pipeline
+
+The pipeapi is some syntactic sugar to help build prompts in a readable way that avoids having to remember best practices around wording and structure. Examples include adding tips, tagging data with xml, or even including the chain of thought prompt as an assistant message.
+
+### Example Pipeline
+
+```python
+from openai_function_call import OpenAISchema, dsl
+from pydantic import Field
+
+
+class SearchQuery(OpenAISchema):
+    query: str = Field(
+        ...,
+        description="Detailed, comprehensive, and specific query to be used for semantic search",
+    )
+
+
+SearchResponse = dsl.MultiTask(
+    subtask_class=SearchQuery,
+)
+
+
+task = (
+    dsl.ChatCompletion(name="Segmenting Search requests example")
+    | dsl.SystemTask(task="Segment search results")
+    | dsl.TaggedMessage(
+        content="can you send me the data about the video investment and the one about spot the dog?",
+        tag="query",
+    )
+    | dsl.TipsMessage(
+        tips=[
+            "Expand query to contain multiple forms of the same word (SSO -> Single Sign On)",
+            "Use the title to explain what the query should return, but use the query to complete the search",
+            "The query should be detailed, specific, and cast a wide net when possible",
+        ]
+    )
+    | SearchResponse
+)
+search_request = task.create()  # type: ignore
+assert isinstance(search_request, SearchResponse)
+print(search_request.json(indent=2))
+```
+
+Output
+
+```json
+{
+  "tasks": [
+    {
+      "query": "data about video investment"
+    },
+    {
+      "query": "data about spot the dog"
+    }
+  ]
+}
+```
+
+## Inspecting the API Call
+
+To make it easy for you to understand what this api is doing we default only construct the kwargs for the chat completion call.
+
+```python
+print(task.kwargs)
+```
+
+```json
+{
+ "messages": [
+  {
+   "role": "system",
+   "content": "You are a world class state of the art algorithm capable of correctly completing the following task: `Segment search results`."
+  },
+  {
+   "role": "user",
+   "content": "Consider the following data:\n\n<query>can you send me the data about the video investment and the one about spot the dog?</query>"
+  },
+  {
+   "role": "user",
+   "content": "Here are some tips to help you complete the task:\n\n* Expand query to contain multiple forms of the same word (SSO -> Single Sign On)\n* Use the title to explain what the query should return, but use the query to complete the search\n* The query should be detailed, specific, and cast a wide net when possible"
+  }
+ ],
+ "functions": [
+  {
+   "name": "MultiSearchQuery",
+   "description": "Correctly segmented set of search queries",
+   "parameters": {
+    "type": "object",
+    "properties": {
+     "tasks": {
+      "description": "Correctly segmented list of `SearchQuery` tasks",
+      "type": "array",
+      "items": {
+       "$ref": "#/definitions/SearchQuery"
+      }
+     }
+    },
+    "definitions": {
+     "SearchQuery": {
+      "type": "object",
+      "properties": {
+       "query": {
+        "description": "Detailed, comprehensive, and specific query to be used for semantic search",
+        "type": "string"
+       }
+      },
+      "required": [
+       "query"
+      ]
+     }
+    },
+    "required": [
+     "tasks"
+    ]
+   }
+  }
+ ],
+ "function_call": {
+  "name": "MultiSearchQuery"
+ },
+ "max_tokens": 1000,
+ "temperature": 0.1,
+ "model": "gpt-3.5-turbo-0613"
+}
+```
diff --git a/examples/__init__.py b/examples/__init__.py
diff --git a/examples/fastapi_app/__init__.py b/examples/fastapi_app/__init__.py
diff --git a/examples/fastapi_app/main.py b/examples/fastapi_app/main.py
@@ -0,0 +1,42 @@
+from fastapi import FastAPI
+from openai_function_call import OpenAISchema
+import openai_function_call.dsl as dsl
+from pydantic import BaseModel, Field
+
+app = FastAPI(title="Example Application using openai_function_call")
+
+
+class SearchRequest(BaseModel):
+    body: str
+
+
+class SearchQuery(OpenAISchema):
+    title: str = Field(..., description="Question that the query answers")
+    query: str = Field(
+        ...,
+        description="Detailed, comprehensive, and specific query to be used for semantic search",
+    )
+
+
+SearchResponse = dsl.MultiTask(
+    subtask_class=SearchQuery,
+    description="Correctly segmented set of search queries",
+)
+
+
+@app.post("/search", response_model=SearchResponse)
+async def search(request: SearchRequest):
+    task = (
+        dsl.ChatCompletion(name="Segmenting Search requests example")
+        | dsl.SystemTask(task="Segment search results")
+        | dsl.TaggedMessage(content=request.body, tag="query")
+        | dsl.TipsMessage(
+            tips=[
+                "Expand query to contain multiple forms of the same word (SSO -> Single Sign On)",
+                "Use the title to explain what the query should return, but use the query to complete the search",
+                "The query should be detailed, specific, and cast a wide net when possible",
+            ]
+        )
+        | SearchRequest
+    )
+    return await task.acreate()
diff --git a/examples/fastapi_app/script.py b/examples/fastapi_app/script.py
@@ -0,0 +1,50 @@
+from openai_function_call import OpenAISchema, dsl
+from pydantic import Field
+
+
+class SearchQuery(OpenAISchema):
+    query: str = Field(
+        ...,
+        description="Detailed, comprehensive, and specific query to be used for semantic search",
+    )
+
+
+SearchResponse = dsl.MultiTask(
+    subtask_class=SearchQuery,
+    description="Correctly segmented set of search queries",
+)
+
+
+task = (
+    dsl.ChatCompletion(name="Segmenting Search requests example")
+    | dsl.SystemTask(task="Segment search results")
+    | dsl.TaggedMessage(
+        content="can you send me the data about the video investment and the one about spot the dog?",
+        tag="query",
+    )
+    | dsl.TipsMessage(
+        tips=[
+            "Expand query to contain multiple forms of the same word (SSO -> Single Sign On)",
+            "Use the title to explain what the query should return, but use the query to complete the search",
+            "The query should be detailed, specific, and cast a wide net when possible",
+        ]
+    )
+    | SearchResponse
+)
+import pprint
+
+import json
+
+print(json.dumps(task.kwargs, indent=1))
+"""
+{
+  "tasks": [
+    {
+      "query": "data about video investment"
+    },
+    {
+      "query": "data about spot the dog"
+    }
+  ]
+}
+"""
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -31,7 +31,10 @@ markdown_extensions:
   - md_in_html
 nav:
   - Home: 'index.md'
-  - Module:
-      - 'Schemas': 'openai_schema.md'
+  - API Reference:
+      - 'OpenAISchema': 'openai_schema.md'
+      - "Helper: MultiTask": "multitask.md"
+      - "Example: Pipeline API": "pipeline-example.md"
+      - "Docs": "chat-completion.md"
   - Examples:
       - 'Missing': 'help.md'
diff --git a/openai_function_call/__init__.py b/openai_function_call/__init__.py
@@ -1,3 +1,4 @@
 from .function_calls import OpenAISchema, openai_function, openai_schema
+from .dsl.multitask import MultiTask
 
-__all__ = ["OpenAISchema", "openai_function", "openai_schema"]
+__all__ = ["OpenAISchema", "openai_function", "MultiTask", "openai_schema"]
diff --git a/openai_function_call/dsl/__init__.py b/openai_function_call/dsl/__init__.py
@@ -1,5 +1,5 @@
 from .completion import ChatCompletion
-from .multitask import MultiTask
 from .messages import *
+from .multitask import MultiTask
 
 __all__ = ["ChatCompletion", "MultiTask", "messages"]
Original file line number	Diff line number	Diff line change
Expand Up		@@ -55,4 +55,4 @@ class UserDetails(BaseModel):

		## Code Reference

		::: openai_function_call
		::: openai_function_call.function_calls