-
-
Notifications
You must be signed in to change notification settings - Fork 2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(ui): add page to talk with voice, transcription, and tts (#2520)
* feat(ui): add page to talk with voice, transcription, and tts Signed-off-by: Ettore Di Giacinto <[email protected]> * Enhance graphics and status reporting Signed-off-by: Ettore Di Giacinto <[email protected]> * Better UX by blocking unvalid actions Signed-off-by: Ettore Di Giacinto <[email protected]> --------- Signed-off-by: Ettore Di Giacinto <[email protected]>
- Loading branch information
Showing
4 changed files
with
321 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,191 @@ | ||
|
||
const recordButton = document.getElementById('recordButton'); | ||
const audioPlayback = document.getElementById('audioPlayback'); | ||
const resetButton = document.getElementById('resetButton'); | ||
|
||
let mediaRecorder; | ||
let audioChunks = []; | ||
let isRecording = false; | ||
let conversationHistory = []; | ||
let resetTimer; | ||
|
||
function getApiKey() { | ||
return document.getElementById('apiKey').value; | ||
} | ||
|
||
function getModel() { | ||
return document.getElementById('modelSelect').value; | ||
} | ||
|
||
function getWhisperModel() { | ||
return document.getElementById('whisperModelSelect').value; | ||
} | ||
|
||
function getTTSModel() { | ||
return document.getElementById('ttsModelSelect').value; | ||
} | ||
|
||
function resetConversation() { | ||
conversationHistory = []; | ||
console.log("Conversation has been reset."); | ||
clearTimeout(resetTimer); | ||
} | ||
|
||
function setResetTimer() { | ||
clearTimeout(resetTimer); | ||
resetTimer = setTimeout(resetConversation, 300000); // Reset after 5 minutes | ||
} | ||
|
||
recordButton.addEventListener('click', toggleRecording); | ||
resetButton.addEventListener('click', resetConversation); | ||
|
||
function toggleRecording() { | ||
if (!isRecording) { | ||
startRecording(); | ||
} else { | ||
stopRecording(); | ||
} | ||
} | ||
|
||
async function startRecording() { | ||
document.getElementById("recording").style.display = "block"; | ||
document.getElementById("resetButton").style.display = "none"; | ||
if (!navigator.mediaDevices) { | ||
alert('MediaDevices API not supported!'); | ||
return; | ||
} | ||
const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); | ||
mediaRecorder = new MediaRecorder(stream); | ||
audioChunks = []; | ||
mediaRecorder.ondataavailable = (event) => { | ||
audioChunks.push(event.data); | ||
}; | ||
mediaRecorder.start(); | ||
recordButton.textContent = 'Stop Recording'; | ||
// add class bg-red-500 to recordButton | ||
recordButton.classList.add("bg-gray-500"); | ||
|
||
isRecording = true; | ||
} | ||
|
||
function stopRecording() { | ||
mediaRecorder.stop(); | ||
mediaRecorder.onstop = async () => { | ||
document.getElementById("recording").style.display = "none"; | ||
document.getElementById("recordButton").style.display = "none"; | ||
|
||
document.getElementById("loader").style.display = "block"; | ||
const audioBlob = new Blob(audioChunks, { type: 'audio/webm' }); | ||
document.getElementById("statustext").textContent = "Processing audio..."; | ||
const transcript = await sendAudioToWhisper(audioBlob); | ||
console.log("Transcript:", transcript); | ||
document.getElementById("statustext").textContent = "Seems you said: " + transcript+ ". Generating response..."; | ||
const responseText = await sendTextToChatGPT(transcript); | ||
|
||
console.log("Response:", responseText); | ||
document.getElementById("statustext").textContent = "Response generated: '" + responseText + "'. Generating audio response..."; | ||
|
||
const ttsAudio = await getTextToSpeechAudio(responseText); | ||
playAudioResponse(ttsAudio); | ||
|
||
recordButton.textContent = 'Record'; | ||
// remove class bg-red-500 from recordButton | ||
recordButton.classList.remove("bg-gray-500"); | ||
isRecording = false; | ||
document.getElementById("loader").style.display = "none"; | ||
document.getElementById("recordButton").style.display = "block"; | ||
document.getElementById("resetButton").style.display = "block"; | ||
document.getElementById("statustext").textContent = "Press the record button to start recording."; | ||
}; | ||
} | ||
|
||
function submitKey(event) { | ||
event.preventDefault(); | ||
localStorage.setItem("key", document.getElementById("apiKey").value); | ||
document.getElementById("apiKey").blur(); | ||
} | ||
|
||
document.getElementById("key").addEventListener("submit", submitKey); | ||
|
||
|
||
storeKey = localStorage.getItem("key"); | ||
if (storeKey) { | ||
document.getElementById("apiKey").value = storeKey; | ||
} else { | ||
document.getElementById("apiKey").value = null; | ||
} | ||
|
||
|
||
async function sendAudioToWhisper(audioBlob) { | ||
const formData = new FormData(); | ||
formData.append('file', audioBlob); | ||
formData.append('model', getWhisperModel()); | ||
API_KEY = localStorage.getItem("key"); | ||
|
||
const response = await fetch('/v1/audio/transcriptions', { | ||
method: 'POST', | ||
headers: { | ||
'Authorization': `Bearer ${API_KEY}` | ||
}, | ||
body: formData | ||
}); | ||
|
||
const result = await response.json(); | ||
console.log("Whisper result:", result) | ||
return result.text; | ||
} | ||
|
||
async function sendTextToChatGPT(text) { | ||
conversationHistory.push({ role: "user", content: text }); | ||
API_KEY = localStorage.getItem("key"); | ||
|
||
const response = await fetch('/v1/chat/completions', { | ||
method: 'POST', | ||
headers: { | ||
'Authorization': `Bearer ${API_KEY}`, | ||
'Content-Type': 'application/json' | ||
}, | ||
body: JSON.stringify({ | ||
model: getModel(), | ||
messages: conversationHistory | ||
}) | ||
}); | ||
|
||
const result = await response.json(); | ||
const responseText = result.choices[0].message.content; | ||
conversationHistory.push({ role: "assistant", content: responseText }); | ||
|
||
setResetTimer(); | ||
|
||
return responseText; | ||
} | ||
|
||
async function getTextToSpeechAudio(text) { | ||
API_KEY = localStorage.getItem("key"); | ||
|
||
const response = await fetch('/v1/audio/speech', { | ||
|
||
method: 'POST', | ||
headers: { | ||
'Authorization': `Bearer ${API_KEY}`, | ||
'Content-Type': 'application/json' | ||
}, | ||
body: JSON.stringify({ | ||
// "backend": "string", | ||
input: text, | ||
model: getTTSModel(), | ||
// "voice": "string" | ||
}) | ||
}); | ||
|
||
const audioBlob = await response.blob(); | ||
return audioBlob; // Return the blob directly | ||
} | ||
|
||
function playAudioResponse(audioBlob) { | ||
const audioUrl = URL.createObjectURL(audioBlob); | ||
audioPlayback.src = audioUrl; | ||
audioPlayback.hidden = false; | ||
audioPlayback.play(); | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
<!doctype html> | ||
<html lang="en"> | ||
{{template "views/partials/head" .}} | ||
<script defer src="/static/talk.js"></script> | ||
<style> | ||
body { | ||
overflow: hidden; | ||
} | ||
</style> | ||
<body class="bg-gray-900 text-gray-200" x-data="{ key: $store.chat.key }"> | ||
<div class="flex flex-col min-h-screen"> | ||
|
||
{{template "views/partials/navbar"}} | ||
<div class="chat-container mt-2 mr-2 ml-2 mb-2 bg-gray-800 shadow-lg rounded-lg " > | ||
<!-- Chat Header --> | ||
<div class="border-b border-gray-700 p-4" x-data="{ component: 'menu' }"> | ||
|
||
<div class="flex items-center justify-center"> | ||
|
||
<div x-show="component === 'menu'" id="menu"> | ||
|
||
<button @click="component = 'key'" title="Update API key" | ||
class="m-2 float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong" | ||
>Set API Key🔑</button> | ||
|
||
</div> | ||
|
||
<form x-show="component === 'key'" id="key"> | ||
<input | ||
type="password" | ||
id="apiKey" | ||
name="apiKey" | ||
class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none" | ||
placeholder="API Key" | ||
x-model.lazy="key" | ||
/> | ||
<button @click="component = 'menu'" type="submit" title="Save API key"> | ||
<i class="fa-solid fa-arrow-right"></i> | ||
</button> | ||
</form> | ||
</div> | ||
</div> | ||
|
||
<div class="flex items-center justify-center"> | ||
<div class="w-full p-4 max-w-md border-t border-gray-700 "> | ||
<div class="bg-gray-700 shadow-md rounded px-8 pt-6 pb-8 mb-4"> | ||
<div id="recording" class="" style="display: none;"> | ||
<i class="fa-solid fa-microphone animate-pulse text-red-700"></i> | ||
<span class="text-white-700 text-sm font-bold mb-2">Recording... press "Stop recording" to stop</span> | ||
</div> | ||
<div id="loader" class="my-2 loader" style="display: none;"></div> | ||
<div id="statustext" class="my-2 p-2 block text-white-700 text-sm font-bold mb-2" ></div> | ||
<div class="mb-4" > | ||
<label for="modelSelect" class="block text-white-700 text-sm font-bold mb-2">LLM Model:</label> | ||
<select id="modelSelect" | ||
class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none" | ||
> | ||
<option value="" disabled class="text-gray-400" >Select a model</option> | ||
|
||
{{ range .ModelsConfig }} | ||
<option value="{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option> | ||
{{ end }} | ||
</select> | ||
</div> | ||
|
||
<div class="mb-4" > | ||
<label for="whisperModelSelect" class="block text-white-700 text-sm font-bold mb-2">Whisper Model:</label> | ||
<select id="whisperModelSelect" | ||
class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none" | ||
|
||
> | ||
<option value="" disabled class="text-gray-400" >Select a model</option> | ||
|
||
{{ range .ModelsConfig }} | ||
<option value="{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option> | ||
{{ end }} | ||
</select> | ||
</div> | ||
|
||
|
||
<div class="mb-4" > | ||
<label for="ttsModelSelect" class="block text-white-700 text-sm font-bold mb-2">TTS Model:</label> | ||
<select id="ttsModelSelect" | ||
class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none" | ||
> | ||
<option value="" disabled class="text-gray-400" >Select a model</option> | ||
{{ range .ModelsConfig }} | ||
<option value="{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option> | ||
{{ end }} | ||
</select> | ||
</div> | ||
|
||
|
||
<button id="recordButton" | ||
class="bg-red-500 hover:bg-red-700 text-white font-bold py-2 px-4 rounded focus:outline-none focus:shadow-outline" | ||
><i class="fa-solid fa-microphone pr-2"></i>Talk</button> | ||
<a id="resetButton" | ||
class="inline-block align-baseline font-bold text-sm text-blue-500 hover:text-blue-800" | ||
href="#" | ||
>Reset conversation</a> | ||
<audio id="audioPlayback" controls hidden></audio> | ||
|
||
</div> | ||
</div> | ||
</div> | ||
</div> | ||
</body> | ||
</html> |