Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Transcribing audio directly from whatsapp #76

Merged
merged 1 commit into from
Oct 15, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 41 additions & 49 deletions app/routers/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -717,79 +717,71 @@ def handle_openai_message(
if docs := get_document(payload):
return f"Dear {sender_name}, We do not support documents."

# Step 1: Retrieve audio information from the payload
if audio_info := get_audio(payload):
if not audio_info:
logging.error("No audio information provided.")
return "Failed to transcribe audio."

# Step 4: Notify the user that the audio has been received
send_message("Audio has been received ...", os.getenv("WHATSAPP_TOKEN"), from_number, phone_number_id)

# Step 2: Fetch the media URL using the WhatsApp token
audio_url = fetch_media_url(audio_info["id"], os.getenv("WHATSAPP_TOKEN"))
if not audio_url:
logging.error("Failed to fetch media URL.")
return "Failed to transcribe audio."

local_audio_path = download_whatsapp_audio(
audio_url, os.getenv("WHATSAPP_TOKEN")
)
# Step 3: Download the audio file locally
local_audio_path = download_whatsapp_audio(audio_url, os.getenv("WHATSAPP_TOKEN"))
if not local_audio_path:
logging.error("Failed to download audio from WhatsApp.")
return "Failed to transcribe audio."

send_message(
"Audio has been received ...", os.getenv("WHATSAPP_TOKEN"), from_number, phone_number_id
)

try:
blob_name, blob_url = upload_audio_file(local_audio_path)
# logging.info(
# f"Audio bucket upload complete: {local_audio_path}, Blob URL: {blob_url}"
# )
# Step 4: Notify the user that the audio has been received
send_message("Audio has been loaded ...", os.getenv("WHATSAPP_TOKEN"), from_number, phone_number_id)

# endpoint = runpod.Endpoint(RUNPOD_ENDPOINT_ID)
# audio_file = blob_name
# request_response = {}
try:
# Step 6: Initialize the Runpod endpoint for transcription
endpoint = runpod.Endpoint(RUNPOD_ENDPOINT_ID)

start_time = time.time()

# if os.path.exists(local_audio_path):
# os.remove(local_audio_path)
# logging.info(f"Cleaned up local audio file: {local_audio_path}")

send_message(
"Your transcription is being processed ...", os.getenv("WHATSAPP_TOKEN"), from_number, phone_number_id
)

# try:
# request_response = endpoint.run_sync(
# {
# "input": {
# "task": "transcribe",
# "target_lang": target_language,
# "adapter": target_language,
# "audio_file": audio_file,
# "recognise_speakers": False,
# }
# },
# timeout=600, # Timeout in seconds.
# )
# except TimeoutError as e:
# logging.error(f"Transcription job timed out: {str(e)}")
# return "Failed to transcribe audio."
# except Exception as e:
# logging.error(f"Unexpected error during transcription: {str(e)}")
# return "Failed to transcribe audio."

send_audio(os.getenv("WHATSAPP_TOKEN"),blob_name,phone_number_id,from_number)

# Step 5: Notify the user that transcription is in progress
send_message("Your transcription is being processed ...", os.getenv("WHATSAPP_TOKEN"), from_number, phone_number_id)

try:
# Step 7: Call the transcription service with the correct parameters
request_response = endpoint.run_sync(
{
"input": {
"task": "transcribe",
"target_lang": target_language,
"adapter": target_language,
"audio_file": local_audio_path, # Corrected to pass local file path
"recognise_speakers": False,
}
},
timeout=150, # Set a timeout for the transcription job.
)

except TimeoutError as e:
logging.error(f"Transcription job timed out: {str(e)}")
return "Failed to transcribe audio."
except Exception as e:
logging.error(f"Unexpected error during transcription: {str(e)}")
return "Failed to transcribe audio."

# Step 8: Log the time taken for the transcription
end_time = time.time()
elapsed_time = end_time - start_time
logging.info(f"Elapsed time: {elapsed_time} seconds for transcription.")

# return request_response.get(
# "audio_transcription"
# )
return "We sent you back your audio, this feature is still in test."
# Step 9: Return the transcription result
return request_response.get("audio_transcription", "Transcription not found.")

finally:
# Step 10: Clean up the local audio file
if os.path.exists(local_audio_path):
os.remove(local_audio_path)
logging.info(f"Cleaned up local audio file: {local_audio_path}")
Expand Down
Loading