Skip to content

Commit

Permalink
Feature Run Pod (#317)
Browse files Browse the repository at this point in the history
* Update model methods to run background task when creating the endpoint and creating score endpoint for runpod to use

* Handle error from Runpod, URL inference from yaml file

* Update backend/app/api/endpoints/base/score.py

Co-authored-by: Rafael Mosquera <[email protected]>

* Rename method

* rename endpoint

---------

Co-authored-by: Rafael Mosquera <[email protected]>
  • Loading branch information
shincap8 and remg1997 authored Jan 21, 2025
1 parent 7d8b64e commit 2adf5bb
Show file tree
Hide file tree
Showing 7 changed files with 127 additions and 18 deletions.
16 changes: 14 additions & 2 deletions backend/app/api/endpoints/base/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def heavy_evaluation(
UploadModelToS3AndEvaluateRequest
),
):
return ModelService().upload_model_to_s3(
data = ModelService().upload_and_create_model(
model.model_name,
model.description,
model.num_paramaters,
Expand All @@ -104,8 +104,20 @@ def heavy_evaluation(
model.user_id,
model.task_code,
model.file_to_upload,
background_tasks,
)
background_tasks.add_task(
ModelService().run_heavy_evaluation,
data["model_path"],
data["model_id"],
data["save_s3_path"],
data["inference_url"],
)
background_tasks.add_task(
ModelService().send_upload_email,
data["user_email"],
data["model_name"],
)
return "The model will be evaluated in the background"


@router.get("/initiate_lambda_models")
Expand Down
8 changes: 8 additions & 0 deletions backend/app/api/endpoints/base/score.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
CsvResponseModel,
GetCsvScore,
GetLeaderboardMetadata,
HeavyEvaluationScoresRequest,
)
from app.domain.services.base.score import ScoreService

Expand All @@ -28,3 +29,10 @@ async def read_users_score_csv(model: GetCsvScore):
@router.post("/read_leaderboard_metadata/", response_model={})
async def read_leaderboard_metadata(model: GetLeaderboardMetadata):
return ScoreService().read_leaderboard_metadata(model.task_id, model.round_id)


@router.post("/heavy_evaluation_scores")
def heavy_evaluation_scores(model: HeavyEvaluationScoresRequest):
return ScoreService().add_scores_and_update_model(
model.model_id, model.scores, model.status, model.message
)
7 changes: 7 additions & 0 deletions backend/app/domain/schemas/base/score.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,10 @@ class GetLeaderboardMetadata(BaseModel):
class CsvResponseModel(BaseModel):
data: Union[str, List]
rounds: Union[List[int], None]


class HeavyEvaluationScoresRequest(BaseModel):
model_id: int
message: str
scores: Optional[dict]
status: Optional[int]
52 changes: 44 additions & 8 deletions backend/app/domain/services/base/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import boto3
import requests
import yaml
from fastapi import BackgroundTasks, HTTPException, UploadFile
from fastapi import HTTPException, UploadFile
from pydantic import Json

from app.domain.helpers.email import EmailHelper
Expand Down Expand Up @@ -175,7 +175,7 @@ def upload_model_to_s3_and_evaluate(
def single_model_prediction(self, model_url: str, model_input: dict):
return requests.post(model_url, json=model_input).json()

def upload_model_to_s3(
def upload_and_create_model(
self,
model_name: str,
description: str,
Expand All @@ -186,9 +186,10 @@ def upload_model_to_s3(
user_id: str,
task_code: str,
file_to_upload: UploadFile,
background_tasks: BackgroundTasks,
):
task_id = self.task_repository.get_task_id_by_task_code(task_code)[0]
yaml_file = self.task_repository.get_config_file_by_task_id(task_id)[0]
yaml_file = yaml.safe_load(yaml_file)
task_s3_bucket = self.task_repository.get_s3_bucket_by_task_id(task_id)[0]
user_email = self.user_repository.get_user_email(user_id)[0]

Expand All @@ -203,6 +204,10 @@ def upload_model_to_s3(
model_name_clean = re.sub(r"_+", "_", model_name_clean)

model_path = f"{task_code}/submited_models/{task_id}-{user_id}-{model_name}-{clean_file_name}"
uri_logging = f"s3://{task_s3_bucket}/{task_code}/inference_logs/"
uri_model = f"s3://{task_s3_bucket}/{task_code}/submited_models/{task_id}-{user_id}-{model_name}-{clean_file_name}"
inference_url = yaml_file["evaluation"]["inference_url"]

try:
self.s3.put_object(
Body=file_to_upload.file,
Expand All @@ -211,7 +216,7 @@ def upload_model_to_s3(
ContentType=file_to_upload.content_type,
)
self.user_repository.increment_model_submitted_count(user_id)
self.model_repository.create_new_model(
model = self.model_repository.create_new_model(
task_id=task_id,
user_id=user_id,
model_name=model_name,
Expand All @@ -224,18 +229,49 @@ def upload_model_to_s3(
deployment_status="uploaded",
secret=secrets.token_hex(),
)
background_tasks.add_task(
self.email_helper.send,
return {
"model_path": uri_model,
"save_s3_path": uri_logging,
"model_id": model["id"],
"model_name": model_name,
"user_email": user_email,
"inference_url": inference_url,
}
except Exception as e:
print(f"An unexpected error occurred: {e}")
return "Model upload failed"

def run_heavy_evaluation(
self, model_path: str, model_id: int, save_s3_path: str, inference_url: str
):
try:
requests.post(
inference_url,
json={
"model_path": model_path,
"model_id": model_id,
"save_s3_path": save_s3_path,
"endpoint_url": "https://backend.dynabench.org/score/heavy_evaluation_scores",
},
)
except Exception as e:
print(f"An unexpected error occurred: {e}")

def send_uploaded_model_email(
self,
user_email: str,
model_name: str,
):
try:
self.email_helper.send(
contact=user_email,
cc_contact=self.email_sender,
template_name="model_upload_successful.txt",
msg_dict={"name": model_name},
subject=f"Model {model_name} upload succeeded.",
)
return "Model upload successfully"
except Exception as e:
print(f"An unexpected error occurred: {e}")
return "Model upload failed"

def single_model_prediction_submit(
self,
Expand Down
41 changes: 41 additions & 0 deletions backend/app/domain/services/base/score.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import numpy as np
import pandas as pd

from app.domain.helpers.email import EmailHelper
from app.domain.services.base.dataset import DatasetService
from app.infrastructure.repositories.model import ModelRepository
from app.infrastructure.repositories.score import ScoreRepository
Expand All @@ -29,6 +30,7 @@ def __init__(self):
region_name=os.getenv("AWS_REGION"),
)
self.s3 = self.session.client("s3")
self.email_helper = EmailHelper()

def get_scores_by_dataset_and_model_id(
self,
Expand Down Expand Up @@ -359,3 +361,42 @@ def read_leaderboard_metadata(self, task_id: int, round_id: int):
)
csv_file = pd.read_csv(final_file)
return csv_file.to_dict(orient="records")

def add_scores_and_update_model(
self, model_id: int, scores: dict, status: int, message: str
):
try:
model = self.model_repository.get_model_info_by_id(model_id)
user = self.user_repository.get_user_info_by_id(model.uid)
if status != 200:
self.email_helper.send(
contact=user["email"],
cc_contact=self.email_sender,
template_name="model_evaluation_failed.txt",
msg_dict={"name": model["name"]},
subject=f"Model {model['name']} evaluation failed.",
)
print("error running inference")
print(message)
return {"response": "Error running instance"}
else:
datasets = self.dataset_repository.get_order_datasets_by_task_id(
model.task_id
)
datasets = [dataset.__dict__ for dataset in datasets]
metadata_json = dict(scores)
scores["metadata_json"] = metadata_json
scores["mid"] = model_id
scores["did"] = datasets[0]["id"]
self.score_repository.add(scores)
self.model_repository.update_model_status(model_id)
self.email_helper.send(
contact=user["email"],
cc_contact=self.email_sender,
template_name="model_evaluation_sucessful.txt",
msg_dict={"name": model["name"], "model_id": model["id"]},
subject=f"Model {model['name']} evaluation succeeded.",
)
return {"response": "Scores added successfully"}
except Exception as e:
return {"error": str(e)}
7 changes: 6 additions & 1 deletion backend/app/infrastructure/repositories/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

from sqlalchemy.inspection import inspect
from sqlalchemy.orm import aliased
from sqlalchemy.sql import func

Expand Down Expand Up @@ -95,7 +96,11 @@ def create_new_model(
session.add(model)
session.flush()
session.commit()
return model.__dict__
model_data = {
c.key: getattr(model, c.key) for c in inspect(model).mapper.column_attrs
}

return model_data

def get_active_models_by_task_id(self, task_id: int) -> list:
models = (
Expand Down
14 changes: 7 additions & 7 deletions backend/app/infrastructure/repositories/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,6 @@ def get_task_id_by_task_code(self, task_code: str):
.first()
)

def get_s3_bucket_by_task_id(self, task_id: int):
return (
self.session.query(self.model.s3_bucket)
.filter(self.model.id == task_id)
.first()
)

def get_task_code_by_task_id(self, task_id: int):
return (
self.session.query(self.model.task_code)
Expand Down Expand Up @@ -187,3 +180,10 @@ def get_dynalab_hr_diff(self, task_id: int):
.filter(self.model.id == task_id)
.first()
)

def get_config_file_by_task_id(self, task_id: int):
return (
self.session.query(self.model.config_yaml)
.filter(self.model.id == task_id)
.first()
)

0 comments on commit 2adf5bb

Please sign in to comment.