Merge pull request #162 from cmurp25/mainline

Policy Changes & Added Data Migration Stack
clemsonMakerspace · Nov 20, 2024 · d923e9e · d923e9e
2 parents c642565 + d331147
commit d923e9e
Show file tree

Hide file tree

Showing 5 changed files with 187 additions and 15 deletions.
diff --git a/cdk/api_gateway/backend_api.py b/cdk/api_gateway/backend_api.py
@@ -58,22 +58,22 @@ def __init__(self, scope: core.Construct,
         self.qualifications_handler_lambda(qualifications_table_name, ("https://" + self.domain_name))
         self.equipment_handler_lambda(equipment_table_name, ("https://" + self.domain_name))
 
-        # Give AmazonAPIGatewayInvokeFullAccess to required lambda functions
-        # Defining IAM policy
-        api_invoke_full_access_policy = aws_iam.PolicyStatement(
-            effect=aws_iam.Effect.ALLOW,
-            principals=["AmazonAPIGatewayInvokeFullAccess"],
-            resources=["*"]
+        # Create IAM Role for Lambda function and attach the AmazonAPIGatewayInvokeFullAccess policy
+        self.lambda_role = aws_iam.Role(self, "LambdaApiInvokeRole",
+            assumed_by=aws_iam.ServicePrincipal("lambda.amazonaws.com"),  # Lambda service principal
+            managed_policies=[
+                aws_iam.ManagedPolicy.from_aws_managed_policy_name("AmazonAPIGatewayInvokeFullAccess") 
+            ]
         )
-
+        
         # Giving lambda functions the invoke full access policy
-        self.lambda_visits_handler.role.add_to_policy(api_invoke_full_access_policy)
-        self.lambda_users_handler.role.add_to_policy(api_invoke_full_access_policy)
-        self.lambda_qualifications_handler.role.add_to_policy(api_invoke_full_access_policy)
-        self.lambda_equipment_handler.role.add_to_policy(api_invoke_full_access_policy)
+        self.lambda_visits_handler.role.add_to_policy(self.lambda_role)
+        self.lambda_users_handler.role.add_to_policy(self.lambda_role)
+        self.lambda_qualifications_handler.role.add_to_policy(self.lambda_role)
+        self.lambda_equipment_handler.role.add_to_policy(self.lambda_role)
 
-        # Prepare lambda testing suite
-        self.test_api_lambda(env=stage)
+        # Prepare lambda testing suite #! Must recreate testing
+        # self.test_api_lambda(env=stage)
 
     def visits_handler_lambda(self, visits_table_name: str, users_table_name: str, domain_name: str):
 

diff --git a/cdk/data_migration/__init__.py b/cdk/data_migration/__init__.py
@@ -0,0 +1,99 @@
+from aws_cdk import core
+from aws_cdk import aws_lambda as lambda_
+from aws_cdk import aws_iam as iam
+from aws_cdk import aws_s3 as s3
+from aws_cdk import aws_glue as glue
+from aws_cdk import aws_logs as logs
+from aws_cdk import custom_resources as cr
+import json
+
+#! WIP
+class DataMigrationStack(core.Stack):
+    def __init__(self, scope: core.Construct, id: str, *, env: core.Environment) -> None:
+        super().__init__(scope, id, env=env)
+
+        # 1. Use an already existing S3 bucket for input files (CSV files)
+        input_bucket = s3.Bucket.from_bucket_name(self, "ExistingBucket", "testing-trigger-for-glue-job")
+
+        # 2. Define the IAM role for the Glue job (created in Prod environment)
+        glue_role = iam.Role(self, "GlueJobRole",
+            assumed_by=iam.ServicePrincipal("glue.amazonaws.com"),
+            managed_policies=[
+                iam.ManagedPolicy.from_aws_managed_policy_name("service-role/AWSGlueServiceRole"),
+                iam.ManagedPolicy.from_aws_managed_policy_name("AmazonS3FullAccess"),
+                iam.ManagedPolicy.from_aws_managed_policy_name("AmazonDynamoDBFullAccess"),
+            ]
+        )
+
+        # 3. Define the Glue ETL job
+        glue_job = glue.CfnJob(self, "GlueETLJob",
+            name="beta-S3Pull-CSVParse-DDBStore",  # The Glue job name
+            role=glue_role.role_arn,  # Role created in Prod environment
+            command={
+                "name": "glueetl",
+                "script_location": "s3://path-to-your-glue-script/script.py",  # Replace with your Glue script location
+            },
+            default_arguments={
+                "--TempDir": f"s3://{input_bucket.bucket_name}/temp/",
+                "--input_bucket": input_bucket.bucket_name,
+            },
+            max_capacity=10.0,  # Adjust for cost optimization (keep it as low as possible)
+            worker_type="Standard",  # Use standard worker type to minimize cost
+            number_of_workers=2  # Adjust based on the size of your data
+        )
+
+        # 4. Create the Lambda function that will trigger the Glue job in the Prod account
+        trigger_glue_lambda = lambda_.Function(self, "TriggerGlueJobLambda",
+            runtime=lambda_.Runtime.PYTHON_3_12,
+            handler="data_migration.lambda_handler",
+            code=lambda_.Code.from_asset("lambda_code/data_migration.py"),  # Lambda code directory
+            environment={
+                'BUCKET_NAME': input_bucket.bucket_name,
+            }
+        )
+
+        # 5. Grant Lambda function permissions to read from the S3 bucket in Beta account
+        input_bucket.grant_read(trigger_glue_lambda)
+
+        # 6. Grant Lambda function permissions to start the Glue job
+        trigger_glue_lambda.add_to_role_policy(
+            iam.PolicyStatement(
+                actions=["glue:StartJobRun"],
+                resources=[f"arn:aws:glue:{core.Aws.REGION}:{core.Aws.ACCOUNT_ID}:job/beta-S3Pull-CSVParse-DDBStore"]
+            )
+        )
+
+        # 7. Create a Custom Resource to modify the S3 bucket policy in the Beta account
+        custom_resource = cr.AwsCustomResource(self, "CustomBucketPolicyResource",
+            on_create={
+                "service": "S3",
+                "action": "putBucketPolicy",
+                "parameters": {
+                    "Bucket": input_bucket.bucket_name,
+                    "Policy": json.dumps({
+                        "Version": "2012-10-17",
+                        "Statement": [
+                            {
+                                "Effect": "Allow",
+                                "Principal": {
+                                    "AWS": f"arn:aws:iam::{core.Aws.ACCOUNT_ID}:role/{trigger_glue_lambda.role.role_name}"  # Referencing the Lambda role in Prod
+                                },
+                                "Action": "s3:GetObject",
+                                "Resource": f"arn:aws:s3:::{input_bucket.bucket_name}/*"
+                            }
+                        ]
+                    })
+                },
+                "physical_resource_id": cr.PhysicalResourceId.of(input_bucket.bucket_name),
+            },
+            policy=cr.AwsCustomResourcePolicy.from_statements([
+                iam.PolicyStatement(
+                    actions=["s3:PutBucketPolicy"],
+                    resources=[f"arn:aws:s3:::{input_bucket.bucket_name}"],
+                ),
+            ])
+        )
+
+        # 8. Output Lambda ARN and Glue Job ARN for reference
+        core.CfnOutput(self, "TriggerGlueLambdaARN", value=trigger_glue_lambda.function_arn)
+        core.CfnOutput(self, "GlueJobName", value=glue_job.name)
diff --git a/cdk/data_migration/lambda_code/data_migration.py b/cdk/data_migration/lambda_code/data_migration.py
@@ -0,0 +1,64 @@
+import boto3
+import logging
+import os
+from botocore.exceptions import ClientError
+
+# Initialize logging
+logger = logging.getLogger()
+logger.setLevel(logging.INFO)
+
+# Initialize AWS clients
+s3_client = boto3.client('s3')
+glue_client = boto3.client('glue')
+
+# S3 bucket name (this will come from the environment variable)
+bucket_name = os.environ['BUCKET_NAME']
+
+#! WIP
+def lambda_handler(event, context):
+    try:
+        # Check if the bucket exists
+        try:
+            s3_client.head_bucket(Bucket=bucket_name)
+            logger.info(f"Bucket {bucket_name} exists.")
+        except ClientError as e:
+            # Handle the error if the bucket does not exist
+            logger.error(f"Bucket {bucket_name} does not exist: {str(e)}")
+            raise Exception(f"Bucket {bucket_name} does not exist. Aborting the process.")
+
+        # List objects in the S3 bucket
+        logger.info(f"Listing objects in bucket: {bucket_name}")
+        response = s3_client.list_objects_v2(Bucket=bucket_name)
+
+        # Check if the bucket contains objects
+        if 'Contents' not in response:
+            logger.info("No files found in the bucket.")
+            return {
+                'statusCode': 200,
+                'body': 'No files found in the S3 bucket.'
+            }
+
+        # Trigger Glue job for each CSV file in the S3 bucket
+        for item in response['Contents']:
+            file_key = item['Key']
+
+            # Trigger the Glue job for the file
+            logger.info(f"Triggering Glue job for file: {file_key}")
+            glue_client.start_job_run(
+                JobName='beta-S3Pull-CSVParse-DDBStore',  # Glue job name
+                Arguments={
+                    '--s3_input': f"s3://{bucket_name}/{file_key}"
+                }
+            )
+
+        return {
+            'statusCode': 200,
+            'body': 'Glue job triggered for each file in the bucket.'
+        }
+
+    except Exception as e:
+        logger.error(f"Error processing files: {str(e)}")
+        return {
+            'statusCode': 500,
+            'body': f"Error: {str(e)}"
+        }
diff --git a/cdk/database/__init__.py b/cdk/database/__init__.py
@@ -12,8 +12,6 @@ def __init__(self, scope: core.Construct,
         self.logger = logging.getLogger()
         self.logger.setLevel(logging.INFO)
 
-        #! remove the stage out of the id string, cloudformation already prefixes all 
-        #! dependancies with the stack that its part of and that contains the stack stage
         self.id = 'Database'        
         self.users_id = 'users'
         self.visits_id = 'visits'

diff --git a/cdk/makerspace.py b/cdk/makerspace.py
@@ -6,6 +6,7 @@
 from database import Database
 from dns import (MakerspaceDnsRecords, MakerspaceDns, Domains)
 from cognito.cognito_construct import CognitoConstruct
+from data_migration import DataMigrationStack
 
 class MakerspaceStage(core.Stage):
     def __init__(self, scope: core.Construct, stage: str, *,
@@ -62,6 +63,16 @@ def __init__(self, app: core.Construct, stage: str, *,
 
         if self.create_dns:
             self.dns_records_stack()
+
+        # if self.stage.lower() == 'prod':
+        #     self.data_migration_stack()
+
+    def data_migration_stack(self):
+
+        self.migration = DataMigrationStack(self.app, self.stage, env=self.env)
+
+        self.add_dependency(self.migration)
+
 
     def database_stack(self):