-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathutil.py
124 lines (95 loc) · 3.2 KB
/
util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import re
import nltk
import google.generativeai as genai
from google.generativeai.types import HarmCategory, HarmBlockThreshold
# Local
from constants import GEMINI_API_KEY
# Define safety settings for the generative model
safety_settings = {
HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE
}
# Initialize the generative model with the defined safety settings
__llm_model__ = genai.GenerativeModel(
"gemini-pro",
safety_settings=safety_settings,
)
# Configure the generative model with the API key
genai.configure(api_key=GEMINI_API_KEY)
def remove_special_chars(text):
"""
Function to remove special characters from a given text.
Args:
text (str): The text to remove special characters from.
Returns:
str: The text with special characters removed.
"""
return re.sub(r'[^a-zA-Z0-9\s]', '', text)
def remove_stop_words_from_text(text):
"""
Function to remove stop words from a given text.
Args:
text (str): The text to remove stop words from.
Returns:
str: The text with stop words removed.
"""
stop_words = set(nltk.corpus.stopwords.words('english'))
words = text.split(" ")
words = [word for word in words if word not in stop_words]
return " ".join(words)
def clean_text(text):
"""
Function to clean a given text by removing special characters, stop words, and extra spaces.
Args:
text (str): The text to clean.
Returns:
str: The cleaned text.
"""
text = text.strip().lower()
text = remove_special_chars(text)
text = remove_stop_words_from_text(text)
text = text.strip()
# Replace 1 or more spaces with single semicolon
text = re.sub(r' +', ' ', text)
text = text.strip()
return text
def get_formatted_key_value_pairs(data: dict):
"""
Function to format a dictionary into a string of key-value pairs.
Args:
data (dict): The dictionary to format.
Returns:
str: The formatted string of key-value pairs.
"""
formatted_pairs = []
for key, value in data.items():
key = key.replace("_", " ")
formatted_pairs.append(f"{key}: {value}")
formatted_pairs = "\n".join(formatted_pairs)
formatted_pairs = formatted_pairs.strip()
return formatted_pairs
def get_unique_values_from_dict(data: dict):
"""
Function to get the unique values from a dictionary.
Args:
data (dict): The dictionary to get the unique values from.
Returns:
set: The set of unique values.
"""
unique_values = set()
for _, value in data.items():
unique_values.add(value)
return unique_values
def get_response_from_llm(prompt: str):
"""
Function to get a response from the language model.
Args:
prompt (str): The prompt to generate a response for.
Returns:
str: The generated response.
"""
response = __llm_model__.generate_content(prompt)
response = response.text.strip()
return response