forked from seratch/ChatGPT-in-Slack
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathinternals.py
142 lines (128 loc) · 4.19 KB
/
internals.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import logging
import time
import openai
import tiktoken
import re
from typing import List, Dict
from slack_sdk.web import WebClient
#
# Internal functions
#
MAX_TOKENS = 1024
GPT_3_5_TURBO_0301_MODEL = "gpt-3.5-turbo-0301"
def call_openai(
api_key: str, messages: List[Dict[str, str]], user: str, logger: logging.Logger
):
# Remove old user messages to make sure we have room for max_tokens
# See also: https://platform.openai.com/docs/guides/chat/introduction
# > total tokens must be below the model’s maximum limit (4096 tokens for gpt-3.5-turbo-0301)
while calculate_num_tokens(messages) >= 4096 - MAX_TOKENS:
removed = False
for i, message in enumerate(messages):
if message["role"] == "user":
del messages[i]
removed = True
break
if not removed:
# Fall through and let the OpenAI error handler deal with it
break
start = time.time()
response = openai.ChatCompletion.create(
api_key=api_key,
model="gpt-3.5-turbo",
request_timeout=25, # seconds
messages=messages,
top_p=1,
n=1,
max_tokens=MAX_TOKENS,
temperature=1,
presence_penalty=0,
frequency_penalty=0,
logit_bias={},
user=user,
)
elapsed_time = time.time() - start
logger.debug(f"Spent {elapsed_time} seconds for an OpenAI API call")
return response
def post_wip_message(
client: WebClient,
channel: str,
thread_ts: str,
messages: List[Dict[str, str]],
user: str,
):
return client.chat_postMessage(
channel=channel,
thread_ts=thread_ts,
text=":hourglass_flowing_sand: Wait a second, please ...",
metadata={
"event_type": "chat-gpt-convo",
"event_payload": {"messages": messages, "user": user},
},
)
def update_wip_message(
client: WebClient,
channel: str,
ts: str,
text: str,
messages: List[Dict[str, str]],
user: str,
):
client.chat_update(
channel=channel,
ts=ts,
text=text,
metadata={
"event_type": "chat-gpt-convo",
"event_payload": {"messages": messages, "user": user},
},
)
def calculate_num_tokens(
messages: List[Dict[str, str]],
model: str = GPT_3_5_TURBO_0301_MODEL,
) -> int:
"""Returns the number of tokens used by a list of messages."""
try:
encoding = tiktoken.encoding_for_model(model)
except KeyError:
encoding = tiktoken.get_encoding("cl100k_base")
if model == GPT_3_5_TURBO_0301_MODEL: # note: future models may deviate from this
num_tokens = 0
for message in messages:
# every message follows <im_start>{role/name}\n{content}<im_end>\n
num_tokens += 4
for key, value in message.items():
num_tokens += len(encoding.encode(value))
if key == "name": # if there's a name, the role is omitted
num_tokens += -1 # role is always required and always 1 token
num_tokens += 2 # every reply is primed with <im_start>assistant
return num_tokens
else:
error = (
f"Calculating the number of tokens for for model {model} is not yet supported. "
"See https://github.com/openai/openai-python/blob/main/chatml.md "
"for information on how messages are converted to tokens."
)
raise NotImplementedError(error)
def format_assistant_reply(content: str) -> str:
result = content
for o, n in [
("^\n+", ""),
("```[Rr]ust\n", "```\n"),
("```[Rr]uby\n", "```\n"),
("```[Ss]cala\n", "```\n"),
("```[Kk]otlin\n", "```\n"),
("```[Jj]ava\n", "```\n"),
("```[Gg]o\n", "```\n"),
("```[Ss]wift\n", "```\n"),
("```[Oo]objective[Cc]\n", "```\n"),
("```[Cc]\n", "```\n"),
("```[Ss][Qq][Ll]\n", "```\n"),
("```[Pp][Hh][Pp]\n", "```\n"),
("```[Pp][Ee][Rr][Ll]\n", "```\n"),
("```[Jj]ava[Ss]cript", "```\n"),
("```[Ty]ype[Ss]cript", "```\n"),
("```[Pp]ython\n", "```\n"),
]:
result = re.sub(o, n, result)
return result