diff --git a/.gitignore b/.gitignore index e921906..26a97f1 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,7 @@ key.json # Private JSON files service_account_key.json credentials.json +service-account-file.json # Audio files *.wav diff --git a/app/crud/audio_transcription.py b/app/crud/audio_transcription.py index 6d98c9e..b48aa63 100644 --- a/app/crud/audio_transcription.py +++ b/app/crud/audio_transcription.py @@ -24,11 +24,16 @@ def create_audio_transcription( async def get_audio_transcriptions( - db: Session, username: str + db: Session, username: str, params ) -> List[AudioTranscription]: + order_column = getattr(AudioTranscription, params.order_by) + if params.descending: + order_column = order_column.desc() + return ( db.query(AudioTranscription) .filter(AudioTranscription.username == username) + .order_by(order_column) .all() ) diff --git a/app/inference_services/user_preference.py b/app/inference_services/user_preference.py index 32f062d..038fb70 100644 --- a/app/inference_services/user_preference.py +++ b/app/inference_services/user_preference.py @@ -1,9 +1,8 @@ +import logging import os import firebase_admin from firebase_admin import credentials, firestore -import logging - logging.basicConfig(level=logging.INFO) @@ -63,7 +62,15 @@ def save_user_preference(user_id, source_language, target_language): # } # ) -def save_translation(user_id, original_text, translated_text, source_language, target_language, message_id): + +def save_translation( + user_id, + original_text, + translated_text, + source_language, + target_language, + message_id, +): """ Save translation details to Firestore @@ -108,7 +115,7 @@ def update_feedback(message_id, feedback): try: translations_ref = db.collection("whatsapp_translations") query = translations_ref.where("message_id", "==", message_id).stream() - + for doc in query: doc_ref = translations_ref.document(doc.id) doc_ref.update({"feedback": feedback}) @@ -119,4 +126,3 @@ def update_feedback(message_id, feedback): except Exception as e: logging.error(f"Error updating feedback: {e}") return False - diff --git a/app/inference_services/whats_app_services.py b/app/inference_services/whats_app_services.py index 2bb9e0a..8805374 100644 --- a/app/inference_services/whats_app_services.py +++ b/app/inference_services/whats_app_services.py @@ -47,6 +47,7 @@ # logging.info(f"Response: {r.json()}") # return r.json() + def send_message(message, token, recipient_id, phone_number_id, preview_url=True): """ Sends a text message to a WhatsApp user and returns the message ID @@ -62,10 +63,7 @@ def send_message(message, token, recipient_id, phone_number_id, preview_url=True str: ID of the sent message """ base_url = "https://graph.facebook.com/v12.0" - headers = { - "Authorization": f"Bearer {token}", - "Content-Type": "application/json" - } + headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"} url = f"{base_url}/{phone_number_id}/messages" data = { "messaging_product": "whatsapp", @@ -76,7 +74,7 @@ def send_message(message, token, recipient_id, phone_number_id, preview_url=True r = requests.post(url, headers=headers, json=data) if r.status_code == 200: response_json = r.json() - message_id = response_json.get('messages', [{}])[0].get('id') + message_id = response_json.get("messages", [{}])[0].get("id") logging.info(f"Message sent to {recipient_id} with ID: {message_id}") return message_id else: @@ -86,7 +84,6 @@ def send_message(message, token, recipient_id, phone_number_id, preview_url=True return None - def reply_to_message( token, message_id: str, @@ -974,8 +971,10 @@ def get_media_url(media_id, token): f"Failed to retrieve media URL. HTTP Status: {response.status_code}, Response: {response.text}" ) + # my new code + def valid_payload(payload): return "object" in payload and ( "entry" in payload @@ -1040,6 +1039,7 @@ def welcome_message(sender_name=""): "The Translation and Transcription Service Team" ) + def help_message(): return ( "Help Guide:\n\n" @@ -1065,14 +1065,15 @@ def help_message(): ) - def set_default_target_language(user_id, save_user_preference): default_target_language = "Luganda" defualt_source_language = "English" save_user_preference(user_id, defualt_source_language, default_target_language) -def handle_language_selection(user_id, selection, source_language, save_user_preference, languages_obj): +def handle_language_selection( + user_id, selection, source_language, save_user_preference, languages_obj +): if int(selection) == 6: save_user_preference(user_id, source_language, languages_obj[selection]) return f"Language set to {languages_obj[selection]}. You can now send texts to translate." @@ -1084,27 +1085,27 @@ def handle_language_selection(user_id, selection, source_language, save_user_pre def get_audio(payload: dict): """ Extracts audio information from the webhook payload. - + Args: payload (dict): The incoming webhook payload. - + Returns: dict: Audio information if available, otherwise None. """ try: - if 'entry' in payload: - for entry in payload['entry']: - if 'changes' in entry: - for change in entry['changes']: - if 'value' in change and 'messages' in change['value']: - for message in change['value']['messages']: - if 'audio' in message: + if "entry" in payload: + for entry in payload["entry"]: + if "changes" in entry: + for change in entry["changes"]: + if "value" in change and "messages" in change["value"]: + for message in change["value"]["messages"]: + if "audio" in message: audio_info = { - "id": message['audio']['id'], - "mime_type": message['audio']['mime_type'] + "id": message["audio"]["id"], + "mime_type": message["audio"]["mime_type"], } return audio_info return None except KeyError: logging.error("KeyError: Missing expected key in payload.") - return None \ No newline at end of file + return None diff --git a/app/routers/frontend.py b/app/routers/frontend.py index 057ee70..f803bc5 100644 --- a/app/routers/frontend.py +++ b/app/routers/frontend.py @@ -1,4 +1,5 @@ import json +import logging from datetime import timedelta from typing import List @@ -16,7 +17,7 @@ from app.crud.users import create_user, get_user_by_email, get_user_by_username from app.deps import get_db from app.routers.auth import get_current_user -from app.schemas.audio_transcription import AudioTranscriptionBase +from app.schemas.audio_transcription import AudioTranscriptionBase, ItemQueryParams from app.schemas.users import User, UserCreate, UserInDB from app.utils.auth_utils import ( ACCESS_TOKEN_EXPIRE_MINUTES, @@ -31,6 +32,7 @@ router = APIRouter() templates = Jinja2Templates(directory="app/templates") oauth2_scheme = OAuth2PasswordBearerWithCookie(tokenUrl="/auth/token") +logging.basicConfig(level=logging.INFO) @router.get("/") @@ -175,7 +177,9 @@ async def account( response_model=List[AudioTranscriptionBase], ) async def get_audio_transcriptions( - current_user=Depends(get_current_user), db: Session = Depends(get_db) + current_user=Depends(get_current_user), + params: ItemQueryParams = Depends(), + db: Session = Depends(get_db), ): """ This endpoint returns all the transcriptions per user. @@ -184,7 +188,7 @@ async def get_audio_transcriptions( """ transcriptions = await crud_audio_transcriptions( - db=db, username=current_user.username + db=db, username=current_user.username, params=params ) if not transcriptions: @@ -233,6 +237,7 @@ async def update_audio_transcription( db.commit() db.refresh(transcription) except Exception as e: + logging.error(f"Error: {str(e)}") db.rollback() raise HTTPException( status_code=500, detail="An error occurred while updating the transcription" diff --git a/app/routers/tasks.py b/app/routers/tasks.py index 82b2477..48b175a 100644 --- a/app/routers/tasks.py +++ b/app/routers/tasks.py @@ -24,11 +24,25 @@ update_feedback, ) from app.inference_services.whats_app_services import ( - get_phone_number_id, get_from_number, get_name, help_message, - get_interactive_response, get_location, get_image, get_video, get_document, - get_audio, get_reaction, get_message, query_media_url, download_media, send_message, - valid_payload, welcome_message, handle_language_selection, set_default_target_language - + download_media, + get_audio, + get_document, + get_from_number, + get_image, + get_interactive_response, + get_location, + get_message, + get_name, + get_phone_number_id, + get_reaction, + get_video, + handle_language_selection, + help_message, + query_media_url, + send_message, + set_default_target_language, + valid_payload, + welcome_message, ) from app.routers.auth import get_current_user from app.schemas.tasks import ( @@ -43,8 +57,7 @@ SummarisationRequest, SummarisationResponse, ) -from app.utils.helper_utils import chunk_text -from app.utils.upload_audio_file_gcp import upload_audio_file, upload_file_to_bucket +from app.utils.upload_audio_file_gcp import upload_audio_file router = APIRouter() @@ -288,6 +301,7 @@ async def speech_to_text( transcription = request_response.get("audio_transcription") # Save transcription in database if it exists + audio_transcription_id = None if ( transcription is not None and isinstance(transcription, str) @@ -296,6 +310,7 @@ async def speech_to_text( db_audio_transcription = create_audio_transcription( db, current_user, blob_url, blob_name, transcription ) + audio_transcription_id = db_audio_transcription.id logging.info( f"Audio transcription in database :{db_audio_transcription.to_dict()}" @@ -307,6 +322,7 @@ async def speech_to_text( formatted_diarization_output=request_response.get( "formatted_diarization_output", "" ), + audio_transcription_id=audio_transcription_id, ) @@ -403,10 +419,19 @@ async def webhook(payload: dict): sender_name = get_name(payload) source_language, target_language = get_user_preference(from_number) - message = handle_message(payload, from_number, sender_name, source_language, target_language, phone_number_id) + message = handle_message( + payload, + from_number, + sender_name, + source_language, + target_language, + phone_number_id, + ) if message: - send_message(message, os.getenv("WHATSAPP_TOKEN"), from_number, phone_number_id) + send_message( + message, os.getenv("WHATSAPP_TOKEN"), from_number, phone_number_id + ) return {"status": "success"} @@ -414,6 +439,7 @@ async def webhook(payload: dict): logging.error(f"Error in webhook processing: {str(error)}") raise HTTPException(status_code=500, detail="Internal Server Error") from error + @router.get("/webhook") async def verify_webhook(mode: str, token: str, challenge: str): if mode and token: @@ -424,32 +450,38 @@ async def verify_webhook(mode: str, token: str, challenge: str): return {"challenge": challenge} raise HTTPException(status_code=400, detail="Bad Request") -def handle_message(payload, from_number, sender_name, source_language, target_language, phone_number_id): + +def handle_message( + payload, from_number, sender_name, source_language, target_language, phone_number_id +): if interactive_response := get_interactive_response(payload): return f"Dear {sender_name}, Thanks for that response." - + if location := get_location(payload): return f"Dear {sender_name}, We have no support for messages of type locations." - + if image := get_image(payload): return f"Dear {sender_name}, We have no support for messages of type image." - + if video := get_video(payload): return f"Dear {sender_name}, We have no support for messages of type video." - + if docs := get_document(payload): return f"Dear {sender_name}, We do not support documents." - + if audio := get_audio(payload): return handle_audio_message(audio, target_language, sender_name) - + if reaction := get_reaction(payload): mess_id = reaction["message_id"] emoji = reaction["emoji"] update_feedback(mess_id, emoji) return f"Dear {sender_name}, Thanks for your feedback {emoji}." - - return handle_text_message(payload, from_number, sender_name, source_language, target_language) + + return handle_text_message( + payload, from_number, sender_name, source_language, target_language + ) + def handle_audio_message(audio, target_language, sender_name): try: @@ -457,7 +489,10 @@ def handle_audio_message(audio, target_language, sender_name): mime_type = audio["mime_type"] if target_language: - file_path = download_media(query_media_url(audio_id, os.getenv("WHATSAPP_TOKEN")), os.getenv("WHATSAPP_TOKEN")) + file_path = download_media( + query_media_url(audio_id, os.getenv("WHATSAPP_TOKEN")), + os.getenv("WHATSAPP_TOKEN"), + ) transcription = process_speech_to_text(file_path, target_language) if transcription: return transcription @@ -465,33 +500,47 @@ def handle_audio_message(audio, target_language, sender_name): return "Sorry, there was an issue processing your audio file." else: return f"Dear {sender_name}, Please specify the language for transcription." - + except Exception as e: logging.error(f"Error processing audio file: {str(e)}") return "Sorry, there was an issue processing your audio file." -def handle_text_message(payload, from_number, sender_name, source_language, target_language): + +def handle_text_message( + payload, from_number, sender_name, source_language, target_language +): msg_body = get_message(payload) if not target_language or not source_language: - set_default_target_language(from_number,save_user_preference) + set_default_target_language(from_number, save_user_preference) return welcome_message(sender_name) - + if msg_body.lower() in ["hi", "start"]: return welcome_message(sender_name) - + if msg_body.isdigit() and msg_body in languages_obj: - return handle_language_selection(from_number, msg_body, source_language,save_user_preference, languages_obj) - + return handle_language_selection( + from_number, msg_body, source_language, save_user_preference, languages_obj + ) + if msg_body.lower() == "help": return help_message() - + if 3 <= len(msg_body) <= 200: detected_language = detect_language(msg_body) translation = translate_text(msg_body, detected_language, target_language) - mess_id = send_message(translation, whatsapp_token, from_number, get_phone_number_id(payload)) - - save_translation(from_number, msg_body, translation, detected_language, target_language, mess_id) + mess_id = send_message( + translation, whatsapp_token, from_number, get_phone_number_id(payload) + ) + + save_translation( + from_number, + msg_body, + translation, + detected_language, + target_language, + mess_id, + ) save_user_preference(from_number, detected_language, target_language) return translation @@ -548,6 +597,7 @@ def translate_text(text, source_language, target_language): return translated_text + def process_speech_to_text(file_path, language: str): endpoint = runpod.Endpoint(RUNPOD_ENDPOINT_ID) @@ -582,6 +632,7 @@ def process_speech_to_text(file_path, language: str): return request_response.get("audio_transcription") + def detect_language(text): endpoint = runpod.Endpoint(os.getenv("RUNPOD_ENDPOINT_ID")) request_response = {} @@ -612,4 +663,4 @@ def detect_language(text): raise HTTPException( status_code=408, detail="The language identification job timed out. Please try again later.", - ) \ No newline at end of file + ) diff --git a/app/schemas/audio_transcription.py b/app/schemas/audio_transcription.py index b221f6b..73e2137 100644 --- a/app/schemas/audio_transcription.py +++ b/app/schemas/audio_transcription.py @@ -1,9 +1,20 @@ from datetime import datetime +from enum import Enum from typing import Optional from pydantic import BaseModel, EmailStr, HttpUrl +class OrderBy(str, Enum): + id = "id" + uploaded = "uploaded" + + +class ItemQueryParams(BaseModel): + order_by: OrderBy = OrderBy.uploaded + descending: bool = False + + class AudioTranscriptionBase(BaseModel): id: str username: str diff --git a/app/schemas/tasks.py b/app/schemas/tasks.py index be1100e..c246d09 100644 --- a/app/schemas/tasks.py +++ b/app/schemas/tasks.py @@ -10,6 +10,7 @@ class STTTranscript(BaseModel): audio_transcription: Optional[str] = Field(None) diarization_output: Optional[dict] = Field(None) formatted_diarization_output: Optional[str] = Field(None) + audio_transcription_id: Optional[int] = Field(None) class NllbResponseOutputData(BaseModel): diff --git a/app/utils/upload_audio_file_gcp.py b/app/utils/upload_audio_file_gcp.py index 7446327..b76bf94 100644 --- a/app/utils/upload_audio_file_gcp.py +++ b/app/utils/upload_audio_file_gcp.py @@ -28,7 +28,8 @@ def upload_audio_file(file_path): except Exception as e: print(f"An error occurred: {e}") return None - + + def upload_file_to_bucket(file_path): """ Uploads a file to a Google Cloud Storage bucket.