-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgpt3_correspondence_summarize_3.py
48 lines (31 loc) · 1.36 KB
/
gpt3_correspondence_summarize_3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import os
import openai
import glob
import json
import util
from transformers import GPT2TokenizerFast
tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
openai.api_key = os.getenv("OPENAI_API_KEY")
def count_tokens(text: str) -> int:
"""count the number of tokens in a string"""
return len(tokenizer.encode(text))
page_order = json.load(open('group-ui-util/anthony-correspondence-id-sets.json'))
for resource in glob.glob('anthony-correspondence-resources/*.json'):
print(resource)
data = json.load(open(resource))
if 'gpt' not in data:
data['gpt'] = {}
if 'correspondence-summarize-2-sentences' in data['gpt']:
continue
full_text = data['full_text']
response = openai.Completion.create(
model="text-davinci-003",
prompt=f"Using only the text below extract in full dictonary JSON format who this letter was sent to, who it was sent from, on what date in the format of a UNIX timestamp, summarize the contents in two sentences, and extract the major intangible conceptual concepts mentioned, using the dictonary keys recipient, sender, date, contents, conceptsMentioned:\n---\n{full_text}\n---\n",
temperature=0.25,
max_tokens=506,
top_p=1,
frequency_penalty=0,
presence_penalty=0
)
data['gpt']['correspondence-summarize-2-sentences'] = response['choices'][0]
json.dump(data,open(resource,'w'),indent=2)