Skip to content

Commit

Permalink
Add Whisper model selection to audio transcription
Browse files Browse the repository at this point in the history
  • Loading branch information
sweep-ai[bot] authored Jun 8, 2024
1 parent a8d80f9 commit 7740747
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 3 deletions.
3 changes: 2 additions & 1 deletion backend/apps/audio/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ async def speech(request: Request, user=Depends(get_verified_user)):
@app.post("/transcriptions")
def transcribe(
file: UploadFile = File(...),
model: str = Form(...),
user=Depends(get_current_user),
):
log.info(f"file.content_type: {file.content_type}")
Expand All @@ -189,7 +190,7 @@ def transcribe(
f.close()

whisper_kwargs = {
"model_size_or_path": WHISPER_MODEL,
"model_size_or_path": model,
"device": whisper_device_type,
"compute_type": "int8",
"download_root": WHISPER_MODEL_DIR,
Expand Down
35 changes: 35 additions & 0 deletions src/lib/apis/openai/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,41 @@ export const generateTitle = async (
return res?.choices[0]?.message?.content.replace(/["']/g, '') ?? 'New Chat';
};

export const transcribe = async (
token: string = '',
file: File,
model: string = 'whisper-1'
) => {
let error = null;

const formData = new FormData();
formData.append('file', file);
formData.append('model', model);

const res = await fetch(`${OPENAI_API_BASE_URL}/transcriptions`, {
method: 'POST',
headers: {
Authorization: `Bearer ${token}`
},
body: formData
})
.then(async (res) => {
if (!res.ok) throw await res.json();
return res.json();
})
.catch((err) => {
console.log(err);
error = err;
return null;
});

if (error) {
throw error;
}

return res;
};

export const generateSearchQuery = async (
token: string = '',
model: string,
Expand Down
21 changes: 19 additions & 2 deletions src/lib/components/chat/Settings/Audio.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@
let OpenAIKey = '';
let OpenAISpeaker = '';
let STTEngines = ['', 'openai'];
let STTEngines = ['', 'openai', 'whisper-openai'];
let STTEngine = '';
let STTModel = '';
let conversationMode = false;
let speechAutoSend = false;
Expand Down Expand Up @@ -144,6 +145,7 @@
saveSettings({
audio: {
STTEngine: STTEngine !== '' ? STTEngine : undefined,
STTModel: STTModel !== '' ? STTModel : undefined,
TTSEngine: TTSEngine !== '' ? TTSEngine : undefined,
speaker:
(TTSEngine === 'openai' ? OpenAISpeaker : speaker) !== ''
Expand All @@ -162,7 +164,7 @@
<div>
<div class=" mb-1 text-sm font-medium">{$i18n.t('STT Settings')}</div>

<div class=" py-0.5 flex w-full justify-between">
<div class=" py-0.5 flex w/full justify-between">
<div class=" self-center text-xs font-medium">{$i18n.t('Speech-to-Text Engine')}</div>
<div class="flex items-center relative">
<select
Expand All @@ -184,10 +186,25 @@
>
<option value="">{$i18n.t('Default (Web API)')}</option>
<option value="whisper-local">{$i18n.t('Whisper (Local)')}</option>
<option value="whisper-openai">{$i18n.t('Whisper (OpenAI)')}</option>
</select>
</div>
</div>

{#if STTEngine === 'whisper-openai'}
<div class=" py-0.5 flex w-full justify-between">
<div class=" self-center text-xs font-medium">{$i18n.t('STT Model Name')}</div>
<div class="flex items-center relative">
<input
class="dark:bg-gray-900 w-full rounded px-2 p-1 text-xs bg-transparent outline-none"
bind:value={STTModel}
placeholder="Enter STT model name"
required
/>
</div>
</div>
{/if}

<div class=" py-0.5 flex w-full justify-between">
<div class=" self-center text-xs font-medium">{$i18n.t('Conversation Mode')}</div>

Expand Down

0 comments on commit 7740747

Please sign in to comment.