Skip to content

Commit

Permalink
Add AWS Lambda for upcoming data integration (ARCH-356) (#72)
Browse files Browse the repository at this point in the history
* update docs on setup tools

* define lambda role and function

* update path to Dockerfile

* update README

* trigger the lambda every 5 minutes

* use plural form of the unit

* Remove lambda fct architecture

* Migrate data integration code to L2 constructs

* Add @DataClass to DataIntegrationProps

* Add docstrings

* Replace `_lambda` by `lambda_`

* Add docstrings

* Add docstrings
  • Loading branch information
tschaffter authored Dec 6, 2024
1 parent 26fb4b6 commit 25c5008
Show file tree
Hide file tree
Showing 7 changed files with 163 additions and 1 deletion.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ also include a Python virtual environment where all the Python packages needed
are already installed.

If you decide the develop outside of the dev container, some of the development
tools can be installed by running:
tools can be installed manually by running:

```console
./tools/setup.sh
Expand Down
16 changes: 16 additions & 0 deletions app.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import aws_cdk as cdk
from aws_cdk.aws_scheduler_alpha import ScheduleExpression

from openchallenges.bucket_stack import BucketStack
from openchallenges.network_stack import NetworkStack
Expand All @@ -7,6 +8,8 @@
from openchallenges.service_stack import LoadBalancedServiceStack
from openchallenges.load_balancer_stack import LoadBalancerStack
from openchallenges.service_props import ServiceProps, ContainerVolume
from openchallenges.data_integration_stack import DataIntegrationStack
from openchallenges.data_integration_props import DataIntegrationProps
import openchallenges.utils as utils

app = cdk.App()
Expand Down Expand Up @@ -328,6 +331,19 @@
app, f"{stack_name_prefix}-load-balancer", network_stack.vpc
)

data_integration_props = DataIntegrationProps(
schedule=ScheduleExpression.cron(
minute="*/5",
hour="*",
day="*",
month="*",
time_zone=cdk.TimeZone.AMERICA_LOS_ANGELES,
)
)
data_integration_stack = DataIntegrationStack(
app, f"{stack_name_prefix}-data-integration", data_integration_props
)

api_docs_props = ServiceProps(
"openchallenges-api-docs",
8010,
Expand Down
1 change: 1 addition & 0 deletions cdk_docker/data-integration-lambda/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
FROM ghcr.io/sage-bionetworks/sandbox-lambda-python:sha-b38dc22
62 changes: 62 additions & 0 deletions openchallenges/data_integration_lambda.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from aws_cdk import aws_iam as iam
from aws_cdk import aws_lambda as lambda_
from constructs import Construct


class DataIntegrationLambda(Construct):
"""
A CDK construct to define an AWS Lambda function for data integration.
This construct creates an IAM role with the necessary permissions and a Docker-based
Lambda function for handling data integration tasks.
"""

def __init__(self, scope: Construct, id: str) -> None:
"""
Builds the IAM role for the Lambda function.
This role allows the Lambda function to execute basic AWS operations.
Returns:
iam.Role: The IAM role for the Lambda function.
"""
super().__init__(scope, id)

self.lambda_role = self._build_lambda_role()
self.lambda_function = self._build_lambda_function(self.lambda_role)

def _build_lambda_role(self) -> iam.Role:
return iam.Role(
self,
"LambdaRole",
assumed_by=iam.ServicePrincipal("lambda.amazonaws.com"),
managed_policies=[
iam.ManagedPolicy.from_aws_managed_policy_name(
managed_policy_name=("service-role/AWSLambdaBasicExecutionRole")
)
],
)

def _build_lambda_function(self, role: iam.Role) -> lambda_.Function:
"""
Builds the Docker-based AWS Lambda function.
The Lambda function uses a Docker image built from a local directory.
Args:
role (iam.Role): The IAM role to associate with the Lambda function.
Returns:
_lambda.Function: The Docker-based AWS Lambda function.
"""
return lambda_.DockerImageFunction(
self,
"LambdaFunction",
code=lambda_.DockerImageCode.from_image_asset(
# Directory relative to where you execute cdk deploy contains a
# Dockerfile with build instructions.
directory="cdk_docker/data-integration-lambda"
),
role=role,
memory_size=128,
)
15 changes: 15 additions & 0 deletions openchallenges/data_integration_props.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from dataclasses import dataclass
from aws_cdk.aws_scheduler_alpha import ScheduleExpression


@dataclass
class DataIntegrationProps:
"""
Data integration properties.
Attributes:
schedule (ScheduleExpression): The schedule for triggering the data integration.
"""

schedule: ScheduleExpression
"""The schedule for triggering the data integration."""
66 changes: 66 additions & 0 deletions openchallenges/data_integration_stack.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import aws_cdk as cdk
from aws_cdk import (
aws_scheduler_alpha as scheduler_alpha,
aws_scheduler_targets_alpha as scheduler_targets,
)
from openchallenges.data_integration_lambda import DataIntegrationLambda
from openchallenges.data_integration_props import DataIntegrationProps
from constructs import Construct


class DataIntegrationStack(cdk.Stack):
"""
Defines an AWS CDK stack for data integration.
This stack sets up the resources required for scheduling and executing
data integration tasks using AWS Lambda and EventBridge Scheduler.
The stack includes:
- A Lambda function for data integration.
- An EventBridge Scheduler schedule to trigger the Lambda function.
- An EventBridge Scheduler group for organizing schedules.
Attributes:
scope (Construct): The parent construct.
id (str): The unique identifier for this stack.
props (DataIntegrationProps): The properties for the data integration, including the schedule.
"""

def __init__(
self, scope: Construct, id: str, props: DataIntegrationProps, **kwargs
) -> None:
"""
Initializes the DataIntegrationStack.
Arguments:
scope (Construct): The parent construct for this stack.
id (str): The unique identifier for this stack.
props (DataIntegrationProps): The properties required for data integration,
including the schedule.
**kwargs: Additional arguments passed to the base `cdk.Stack` class.
"""
super().__init__(scope, id, **kwargs)

data_integration_lambda = DataIntegrationLambda(self, "data-integration-lambda")

target = scheduler_targets.LambdaInvoke(
data_integration_lambda.lambda_function,
input=scheduler_alpha.ScheduleTargetInput.from_object({}),
)

# Create a group for the schedule (maybe we want to add more schedules
# to this group the future)
schedule_group = scheduler_alpha.Group(
self,
"group",
group_name="schedule-group",
)

scheduler_alpha.Schedule(
self,
"schedule",
schedule=props.schedule,
target=target,
group=schedule_group,
description="This is a cron-based schedule that will run every 5 minutes",
)
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
aws-cdk-lib==2.139.0
aws-cdk.aws-scheduler-alpha==2.139.0a0
aws-cdk.aws-scheduler-targets-alpha==2.139.0a0
constructs>=10.0.0,<11.0.0
boto3>=1.34.1

0 comments on commit 25c5008

Please sign in to comment.