From fae7254d62e9dea3646d7fa9050b8726ac08d43d Mon Sep 17 00:00:00 2001
From: Jason <jason@jxnl.co>
Date: Thu, 27 Jul 2023 22:48:48 +0800
Subject: [PATCH] update fact

---
 .../citation_fuzzy_match.py                   |  4 +-
 examples/citation_with_extraction/main.py     | 49 ++++++++++++-------
 2 files changed, 32 insertions(+), 21 deletions(-)

diff --git a/examples/citation_with_extraction/citation_fuzzy_match.py b/examples/citation_with_extraction/citation_fuzzy_match.py
index a746b027c..e718a38ef 100644
--- a/examples/citation_with_extraction/citation_fuzzy_match.py
+++ b/examples/citation_with_extraction/citation_fuzzy_match.py
@@ -67,7 +67,7 @@ def ask_ai(question: str, context: str) -> QuestionAnswer:
     # Making a request to the hypothetical 'openai' module
     completion = openai.ChatCompletion.create(
         model="gpt-3.5-turbo-0613",
-        temperature=0.2,
+        temperature=0,
         max_tokens=1000,
         functions=[QuestionAnswer.openai_schema],
         function_call={"name": QuestionAnswer.openai_schema["name"]},
@@ -90,7 +90,7 @@ def ask_ai(question: str, context: str) -> QuestionAnswer:
     return QuestionAnswer.from_response(completion)
 
 
-question = "What did the author do during college?"
+question = "What the author's last name?"
 context = """
 My name is Jason Liu, and I grew up in Toronto Canada but I was born in China.I went to an arts highschool but in university I studied Computational Mathematics and physics.  As part of coop I worked at many companies including Stitchfix, Facebook.  I also started the Data Science club at the University of Waterloo and I was the president of the club for 2 years.
 """
diff --git a/examples/citation_with_extraction/main.py b/examples/citation_with_extraction/main.py
index 711ac10e9..1a4bd70fb 100644
--- a/examples/citation_with_extraction/main.py
+++ b/examples/citation_with_extraction/main.py
@@ -2,7 +2,7 @@
 from typing import Iterable, List
 from fastapi import FastAPI, Request, HTTPException
 from fastapi.params import Depends
-from openai_function_call import MultiTask
+from openai_function_call import MultiTask, OpenAISchema
 from pydantic import BaseModel, Field
 from starlette.responses import StreamingResponse
 
@@ -10,6 +10,8 @@
 import openai
 import logging
 
+from openai_function_call.dsl.multitask import MultiTaskBase
+
 logger = logging.getLogger(__name__)
 
 # FastAPI app
@@ -18,19 +20,20 @@
 )
 
 
-class SubResponse(BaseModel):
+class Fact(BaseModel):
     """
-    If there are multiple phrases with difference citations. Each one should be its own object.
-    make sure to break them apart such that each one only uses a set of
-    sources that are relevant to it.
-
-    When possible return `substring_quote` before the `body`.
+    Class representing single statement.
+    Each fact has a body and a list of sources.
+    If there are multiple facts make sure to break them apart such that each one only uses a set of sources that are relevant to it.
     """
 
-    body: str = Field(..., description="Body of the sentences, as part of a response")
+    fact: str = Field(
+        ...,
+        description="Body of the sentences, as part of a response, it should read like a sentence that answers the question",
+    )
     substring_quotes: List[str] = Field(
         ...,
-        description="Each source should be a direct quote from the context, as a substring of the original content but should be a wide enough quote to capture the context of the quote. The citation should at least be long and capture the context and be a full sentence.",
+        description="Each source should be a direct quote from the context, as a substring of the original content",
     )
 
     def _get_span(self, quote, context):
@@ -54,11 +57,19 @@ def get_spans(self, context):
                 yield from self._get_span(quote, context)
 
 
-Answers = MultiTask(
-    SubResponse,
-    name="Answer",
-    description="Correctly answer questions based on a context. Quotes should be full sentences when possible",
-)
+class QuestionAnswer(OpenAISchema, MultiTaskBase):
+    """
+    Class representing a question and its answer as a list of facts each one should have a soruce.
+    each sentence contains a body and a list of sources."""
+
+    question: str = Field(..., description="Question that was asked")
+    tasks: List[Fact] = Field(
+        ...,
+        description="Body of the answer, each fact should be its seperate object with a body and a list of sources",
+    )
+
+
+QuestionAnswer.task_type = Fact
 
 
 class Question(BaseModel):
@@ -67,13 +78,13 @@ class Question(BaseModel):
 
 
 # Function to extract entities from input text using GPT-3.5
-def stream_extract(question: Question) -> Iterable[SubResponse]:
+def stream_extract(question: Question) -> Iterable[Fact]:
     completion = openai.ChatCompletion.create(
         model="gpt-3.5-turbo-0613",
         temperature=0,
         stream=True,
-        functions=[Answers.openai_schema],
-        function_call={"name": Answers.openai_schema["name"]},
+        functions=[QuestionAnswer.openai_schema],
+        function_call={"name": QuestionAnswer.openai_schema["name"]},
         messages=[
             {
                 "role": "system",
@@ -89,7 +100,7 @@ def stream_extract(question: Question) -> Iterable[SubResponse]:
         ],
         max_tokens=2000,
     )
-    return Answers.from_streaming_response(completion)
+    return QuestionAnswer.from_streaming_response(completion)
 
 
 def get_api_key(request: Request):
@@ -121,7 +132,7 @@ async def generate():
             logger.info(f"Fact: {fact}")
             spans = list(fact.get_spans(question.context))
             resp = {
-                "body": fact.body,
+                "body": fact.fact,
                 "spans": spans,
                 "citation": [question.context[a:b] for (a, b) in spans],
             }