From fae7254d62e9dea3646d7fa9050b8726ac08d43d Mon Sep 17 00:00:00 2001 From: Jason Date: Thu, 27 Jul 2023 22:48:48 +0800 Subject: [PATCH] update fact --- .../citation_fuzzy_match.py | 4 +- examples/citation_with_extraction/main.py | 49 ++++++++++++------- 2 files changed, 32 insertions(+), 21 deletions(-) diff --git a/examples/citation_with_extraction/citation_fuzzy_match.py b/examples/citation_with_extraction/citation_fuzzy_match.py index a746b027c..e718a38ef 100644 --- a/examples/citation_with_extraction/citation_fuzzy_match.py +++ b/examples/citation_with_extraction/citation_fuzzy_match.py @@ -67,7 +67,7 @@ def ask_ai(question: str, context: str) -> QuestionAnswer: # Making a request to the hypothetical 'openai' module completion = openai.ChatCompletion.create( model="gpt-3.5-turbo-0613", - temperature=0.2, + temperature=0, max_tokens=1000, functions=[QuestionAnswer.openai_schema], function_call={"name": QuestionAnswer.openai_schema["name"]}, @@ -90,7 +90,7 @@ def ask_ai(question: str, context: str) -> QuestionAnswer: return QuestionAnswer.from_response(completion) -question = "What did the author do during college?" +question = "What the author's last name?" context = """ My name is Jason Liu, and I grew up in Toronto Canada but I was born in China.I went to an arts highschool but in university I studied Computational Mathematics and physics. As part of coop I worked at many companies including Stitchfix, Facebook. I also started the Data Science club at the University of Waterloo and I was the president of the club for 2 years. """ diff --git a/examples/citation_with_extraction/main.py b/examples/citation_with_extraction/main.py index 711ac10e9..1a4bd70fb 100644 --- a/examples/citation_with_extraction/main.py +++ b/examples/citation_with_extraction/main.py @@ -2,7 +2,7 @@ from typing import Iterable, List from fastapi import FastAPI, Request, HTTPException from fastapi.params import Depends -from openai_function_call import MultiTask +from openai_function_call import MultiTask, OpenAISchema from pydantic import BaseModel, Field from starlette.responses import StreamingResponse @@ -10,6 +10,8 @@ import openai import logging +from openai_function_call.dsl.multitask import MultiTaskBase + logger = logging.getLogger(__name__) # FastAPI app @@ -18,19 +20,20 @@ ) -class SubResponse(BaseModel): +class Fact(BaseModel): """ - If there are multiple phrases with difference citations. Each one should be its own object. - make sure to break them apart such that each one only uses a set of - sources that are relevant to it. - - When possible return `substring_quote` before the `body`. + Class representing single statement. + Each fact has a body and a list of sources. + If there are multiple facts make sure to break them apart such that each one only uses a set of sources that are relevant to it. """ - body: str = Field(..., description="Body of the sentences, as part of a response") + fact: str = Field( + ..., + description="Body of the sentences, as part of a response, it should read like a sentence that answers the question", + ) substring_quotes: List[str] = Field( ..., - description="Each source should be a direct quote from the context, as a substring of the original content but should be a wide enough quote to capture the context of the quote. The citation should at least be long and capture the context and be a full sentence.", + description="Each source should be a direct quote from the context, as a substring of the original content", ) def _get_span(self, quote, context): @@ -54,11 +57,19 @@ def get_spans(self, context): yield from self._get_span(quote, context) -Answers = MultiTask( - SubResponse, - name="Answer", - description="Correctly answer questions based on a context. Quotes should be full sentences when possible", -) +class QuestionAnswer(OpenAISchema, MultiTaskBase): + """ + Class representing a question and its answer as a list of facts each one should have a soruce. + each sentence contains a body and a list of sources.""" + + question: str = Field(..., description="Question that was asked") + tasks: List[Fact] = Field( + ..., + description="Body of the answer, each fact should be its seperate object with a body and a list of sources", + ) + + +QuestionAnswer.task_type = Fact class Question(BaseModel): @@ -67,13 +78,13 @@ class Question(BaseModel): # Function to extract entities from input text using GPT-3.5 -def stream_extract(question: Question) -> Iterable[SubResponse]: +def stream_extract(question: Question) -> Iterable[Fact]: completion = openai.ChatCompletion.create( model="gpt-3.5-turbo-0613", temperature=0, stream=True, - functions=[Answers.openai_schema], - function_call={"name": Answers.openai_schema["name"]}, + functions=[QuestionAnswer.openai_schema], + function_call={"name": QuestionAnswer.openai_schema["name"]}, messages=[ { "role": "system", @@ -89,7 +100,7 @@ def stream_extract(question: Question) -> Iterable[SubResponse]: ], max_tokens=2000, ) - return Answers.from_streaming_response(completion) + return QuestionAnswer.from_streaming_response(completion) def get_api_key(request: Request): @@ -121,7 +132,7 @@ async def generate(): logger.info(f"Fact: {fact}") spans = list(fact.get_spans(question.context)) resp = { - "body": fact.body, + "body": fact.fact, "spans": spans, "citation": [question.context[a:b] for (a, b) in spans], }