-
Notifications
You must be signed in to change notification settings - Fork 0
/
voice_language_bot.py
125 lines (108 loc) · 4.85 KB
/
voice_language_bot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import openai
import speech_recognition as sr
from pydub import AudioSegment
from pydub.playback import play
import simpleaudio as sa
from google.cloud import texttospeech as tts
import os
# should define as an env variable later
openai.api_key = "<OPENAI_API_KEY>" # replace with your API key
previousUserPrompt = ""
previousAgentResponse = ""
def listen():
r = sr.Recognizer()
# Set the energy threshold for recording to 4000
r.energy_threshold = 4000
# Set the length of silence that will register as the end of a phrase
r.pause_threshold = 3.0 # Default value is 0.8
with sr.Microphone() as source:
print("Listening...")
# Listen to the microphone, stop listening when silence is detected
audio = r.listen(source, timeout=None)
print("Finished listening.")
with open("input.wav", "wb") as f:
f.write(audio.get_wav_data())
audio_file= open("input.wav", "rb")
return audio_file
def ai_response(prompt, count): #, previousResponse):
global previousUserPrompt
global previousAgentResponse
# if count == 0: # for future implementation, must seperate prompt 1 from the rest if you are using a prompt history
messagesArray = [
{"role": "system", "content": "You are having a conversation with an american user trying to learn spanish. The user's prompts will be in spanglish and your response should be in spanish and followed by an english translation. Any time the user uses english words, you should include a spanish translation of those words in your response."},
{"role": "user", "content": "Hola, buenos dias. What is your name?"},
{"role": "system", "content": "Hola, buenos dias. Me llamo assistant. ¿Como estas?| Hello, I'm good. My name is assistant. How are you? Correction: 'What is your name?' in spanish is '¿Como te llamas?'"},
{"role": "user", "content": prompt},
]
completion = openai.ChatCompletion.create(
model="gpt-4",
messages=messagesArray
)
previousUserPrompt = prompt
previousAgentResponse = completion["choices"][0]["message"]["content"]
print(messagesArray)
return completion
def speak(text, filename):
if text == None:
return
else:
client = tts.TextToSpeechClient()
synthesis_input = tts.SynthesisInput(text=text)
if (filename == "spanish.mp3"):
voice = tts.VoiceSelectionParams(
language_code="es-US",
name="es-US-Neural2-C", # this selects the voice
ssml_gender=tts.SsmlVoiceGender.MALE,
)
elif (filename == "english.mp3"):
voice = tts.VoiceSelectionParams(
language_code="en-GB",
name="en-GB-Neural2-D", # this selects the voice
ssml_gender=tts.SsmlVoiceGender.MALE,
)
else:
print("Error: filename must be either 'spanish.mp3' or 'english.mp3'")
audio_config = tts.AudioConfig(
audio_encoding=tts.AudioEncoding.MP3
)
response = client.synthesize_speech(
input=synthesis_input, voice=voice, audio_config=audio_config
)
# Write the response to the output file.
with open(filename, "wb") as out:
out.write(response.audio_content)
# Convert mp3 file to wav
audio = AudioSegment.from_mp3(filename)
audio.export("temp.wav", format="wav")
# Play the wav file
wave_obj = sa.WaveObject.from_wave_file("temp.wav")
play_obj = wave_obj.play()
play_obj.wait_done() # Wait until sound has finished playing
# Optional: remove temporary file
os.remove("temp.wav")
def split_input(input_string):
split_strings = input_string.split('|')
return [s.strip() for s in split_strings]
def main():
count = 0
while True:
audio_file = listen()
print("Transcribing audio prompt...")
transcription = openai.Audio.transcribe("whisper-1", audio_file)
prompt = transcription["text"]
# prompt = "Hola, me llamo Gabriel. ¿Cómo te llamas?" #test, requires commenting out 119-122
print("Prompt: " + prompt)
print("Generating repsonse...")
print("Count: " + str(count))
full_response = ai_response(prompt, count)
response_content = full_response["choices"][0]["message"]["content"]
print(full_response)
print(response_content)
spanish, english = split_input(response_content)
if spanish:
speak(spanish, "spanish.mp3")
if english:
speak(english, "english.mp3")
count += 1
if __name__ == "__main__":
main()