diff --git a/infrastructure/ingestion/aws/README.md b/infrastructure/ingestion/aws/README.md
new file mode 100644
index 0000000..14f611a
--- /dev/null
+++ b/infrastructure/ingestion/aws/README.md
@@ -0,0 +1,86 @@
+# infrastructure
+
+
+## Requirements
+
+| Name | Version |
+|------|---------|
+| [terraform](#requirement\_terraform) | >=1.3 |
+| [aws](#requirement\_aws) | ~> 5.0 |
+
+## Providers
+
+| Name | Version |
+|------|---------|
+| [aws](#provider\_aws) | ~> 5.0 |
+| [local](#provider\_local) | n/a |
+| [null](#provider\_null) | n/a |
+| [random](#provider\_random) | n/a |
+
+## Modules
+
+No modules.
+
+## Resources
+
+| Name | Type |
+|------|------|
+| [aws_cloudwatch_event_rule.ingestion_sfn_trigger_rule](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_event_rule) | resource |
+| [aws_cloudwatch_event_target.ingestion_sfn_trigger](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_event_target) | resource |
+| [aws_db_instance.rds_postgres](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/db_instance) | resource |
+| [aws_iam_policy.policy_for_execution_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource |
+| [aws_iam_role.cloudwatch_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource |
+| [aws_iam_role.lambda_execution_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource |
+| [aws_iam_role.sfn_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource |
+| [aws_iam_role_policy.cloudwatch_policy](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy) | resource |
+| [aws_iam_role_policy.sfn_policy](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy) | resource |
+| [aws_iam_role_policy_attachment.LambdaExecutionRolePolicyAttachment](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource |
+| [aws_lambda_function.ingestion-lambda](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_function) | resource |
+| [aws_lambda_function.migration-lambda](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_function) | resource |
+| [aws_secretsmanager_secret.rds_master_password](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/secretsmanager_secret) | resource |
+| [aws_secretsmanager_secret_version.rds_master_password](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/secretsmanager_secret_version) | resource |
+| [aws_security_group.lambda_sg](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/security_group) | resource |
+| [aws_security_group.rds_sg](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/security_group) | resource |
+| [aws_sfn_state_machine.ingestion-step-function](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/sfn_state_machine) | resource |
+| [null_resource.ingestion_lambda_build](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource |
+| [null_resource.migration_lambda_build](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource |
+| [random_password.rds_master_password](https://registry.terraform.io/providers/hashicorp/random/latest/docs/resources/password) | resource |
+| [aws_caller_identity.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/caller_identity) | data source |
+| [aws_iam_policy_document.cloudwatch_assume_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source |
+| [aws_iam_policy_document.cloudwatch_policy_document](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source |
+| [aws_iam_policy_document.lambda_assume_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source |
+| [aws_iam_policy_document.permissions_for_execution_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source |
+| [aws_iam_policy_document.sf_assume_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source |
+| [aws_iam_policy_document.sfn_policy_document](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source |
+| [aws_security_group.default](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/security_group) | data source |
+| [aws_subnet.selected](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/subnet) | data source |
+| [aws_subnets.default](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/subnets) | data source |
+| [aws_vpc.selected](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/vpc) | data source |
+| [local_file.ingestion_lambda_build](https://registry.terraform.io/providers/hashicorp/local/latest/docs/data-sources/file) | data source |
+| [local_file.migration_lambda_build](https://registry.terraform.io/providers/hashicorp/local/latest/docs/data-sources/file) | data source |
+
+## Inputs
+
+| Name | Description | Type | Default | Required |
+|------|-------------|------|---------|:--------:|
+| [aws\_profile](#input\_aws\_profile) | AWS profile to use for authentication | `string` | n/a | yes |
+| [aws\_region](#input\_aws\_region) | AWS region where to deploy resources | `string` | n/a | yes |
+| [db\_subnet\_group\_name](#input\_db\_subnet\_group\_name) | Name of the RDS subnet group | `string` | n/a | yes |
+| [disable\_ingestion\_schedule](#input\_disable\_ingestion\_schedule) | Disable the ingestion schedule | `bool` | `false` | no |
+| [environment\_type](#input\_environment\_type) | Environment type | `string` | n/a | yes |
+| [ingestion\_schedule](#input\_ingestion\_schedule) | Cron schedule for the CloudWatch Event Rule | `string` | `"rate(24 hours)"` | no |
+| [permissions\_boundary\_arn](#input\_permissions\_boundary\_arn) | ARN of the permissions boundary to use for the IAM role | `string` | n/a | yes |
+| [project\_name](#input\_project\_name) | Name of the project | `string` | `"secrets-finder"` | no |
+| [rds\_db\_name](#input\_rds\_db\_name) | Name of the database to create in the RDS instance | `string` | `"secrets_finder"` | no |
+| [rds\_username](#input\_rds\_username) | Username for the RDS instance | `string` | `"secrets_finder"` | no |
+| [s3\_bucket\_name](#input\_s3\_bucket\_name) | Name of the S3 bucket to create | `string` | n/a | yes |
+| [subnet\_name](#input\_subnet\_name) | Name of the subnet where to deploy the resources (wildcards are allowed: first match is used) | `string` | n/a | yes |
+| [tags](#input\_tags) | A map of tags to add to the resources | `map(string)` | n/a | yes |
+| [vpc\_name](#input\_vpc\_name) | Identifier of the VPC to use for secrets-finder | `string` | n/a | yes |
+
+## Outputs
+
+| Name | Description |
+|------|-------------|
+| [rds\_pg\_endpoint](#output\_rds\_pg\_endpoint) | n/a |
+
diff --git a/infrastructure/ingestion/aws/cloudwatch.tf b/infrastructure/ingestion/aws/cloudwatch.tf
new file mode 100644
index 0000000..90571d3
--- /dev/null
+++ b/infrastructure/ingestion/aws/cloudwatch.tf
@@ -0,0 +1,17 @@
+resource "aws_cloudwatch_event_rule" "ingestion_sfn_trigger_rule" {
+ name = "${var.project_name}-ingestion-sfn-trigger"
+ description = "Triggers the Step function on schedule"
+ schedule_expression = var.ingestion_schedule
+ state = var.disable_ingestion_schedule ? "DISABLED" : "ENABLED"
+}
+
+resource "aws_cloudwatch_event_target" "ingestion_sfn_trigger" {
+ rule = aws_cloudwatch_event_rule.ingestion_sfn_trigger_rule.name
+ arn = aws_sfn_state_machine.ingestion-step-function.arn
+ role_arn = aws_iam_role.cloudwatch_role.arn
+
+ depends_on = [
+ aws_iam_role.cloudwatch_role,
+ aws_iam_role_policy.cloudwatch_policy,
+ ]
+}
diff --git a/infrastructure/ingestion/aws/configuration/ingestion_sfn_definition.json b/infrastructure/ingestion/aws/configuration/ingestion_sfn_definition.json
new file mode 100644
index 0000000..db2f5fd
--- /dev/null
+++ b/infrastructure/ingestion/aws/configuration/ingestion_sfn_definition.json
@@ -0,0 +1,95 @@
+{
+ "Comment": "Ingestion State Machine",
+ "StartAt": "BootStrapState",
+ "States": {
+ "BootStrapState": {
+ "Type": "Task",
+ "Resource": "${migrate_lambda_arn}",
+ "Next": "IngestionState"
+ },
+ "IngestionState": {
+ "Type": "Parallel",
+ "Branches": [
+ {
+ "Comment": "Ingest Scheduled Scan Findings",
+ "StartAt": "ListScheduledScanFindingsFiles",
+ "States": {
+ "ListScheduledScanFindingsFiles": {
+ "Type": "Task",
+ "Resource": "${ingestion_lambda_arn}",
+ "ResultPath": "$.lambdaResult",
+ "Parameters": {
+ "action": "list_files",
+ "prefix": "secrets-finder/scheduled-scans/results/"
+ },
+ "Next": "IngestScheduledScanFindingsFiles"
+ },
+ "IngestScheduledScanFindingsFiles": {
+ "Type": "Map",
+ "ItemsPath": "$.lambdaResult.body.files",
+ "Parameters": {
+ "index.$": "$$.Map.Item.Index",
+ "key.$": "$$.Map.Item.Value"
+ },
+ "Iterator": {
+ "StartAt": "IngestScheduledScanFindings",
+ "States": {
+ "IngestScheduledScanFindings": {
+ "Type": "Task",
+ "Resource": "${ingestion_lambda_arn}",
+ "Parameters": {
+ "action": "ingest_findings",
+ "file_key.$": "$.key"
+ },
+ "End": true
+ }
+ }
+ },
+ "End": true
+ }
+ }
+ },
+ {
+ "Comment": "Ingest Ongoing Scan Findings",
+ "StartAt": "ListOngoingScanFindingsFiles",
+ "States": {
+ "ListOngoingScanFindingsFiles": {
+ "Type": "Task",
+ "Resource": "${ingestion_lambda_arn}",
+ "ResultPath": "$.lambdaResult",
+ "Parameters": {
+ "action": "list_files",
+ "prefix": "secrets-finder/ongoing-scans/results/"
+ },
+ "Next": "IngestOngoingScanFindingsFiles"
+ },
+ "IngestOngoingScanFindingsFiles": {
+ "Type": "Map",
+ "ItemsPath": "$.lambdaResult.body.files",
+ "Parameters": {
+ "index.$": "$$.Map.Item.Index",
+ "key.$": "$$.Map.Item.Value"
+ },
+ "Iterator": {
+ "StartAt": "IngestOngoingScanFindings",
+ "States": {
+ "IngestOngoingScanFindings": {
+ "Type": "Task",
+ "Resource": "${ingestion_lambda_arn}",
+ "Parameters": {
+ "action": "ingest_findings",
+ "file_key.$": "$.key"
+ },
+ "End": true
+ }
+ }
+ },
+ "End": true
+ }
+ }
+ }
+ ],
+ "End": true
+ }
+ }
+}
diff --git a/infrastructure/ingestion/aws/iam.tf b/infrastructure/ingestion/aws/iam.tf
new file mode 100644
index 0000000..15dbf9c
--- /dev/null
+++ b/infrastructure/ingestion/aws/iam.tf
@@ -0,0 +1,160 @@
+# Lambda execution role
+data "aws_iam_policy_document" "lambda_assume_role" {
+ statement {
+ effect = "Allow"
+ principals {
+ identifiers = ["lambda.amazonaws.com"]
+ type = "Service"
+ }
+ actions = ["sts:AssumeRole"]
+ }
+}
+
+resource "aws_iam_role" "lambda_execution_role" {
+ name = "${var.project_name}-ingestion-lambda-execution-role"
+ assume_role_policy = data.aws_iam_policy_document.lambda_assume_role.json
+ path = "/"
+ permissions_boundary = var.permissions_boundary_arn
+}
+
+data "aws_iam_policy_document" "permissions_for_execution_role" {
+ statement {
+ sid = "WriteToCloudWatchLogGroup"
+ effect = "Allow"
+ actions = [
+ "logs:CreateLogStream",
+ "logs:PutLogEvents",
+ ]
+ resources = ["arn:aws:logs:*:*:*"]
+ }
+
+ statement {
+ sid = "AllowAccessToBucket"
+ effect = "Allow"
+ actions = [
+ "s3:ListBucket",
+ "s3:GetObject",
+ "s3:DeleteObject"
+ ]
+ resources = [
+ "${local.s3_bucket_arn}",
+ "${local.s3_bucket_arn}/*"
+ ]
+ }
+
+ statement {
+ sid = "AllowAccessToRDS"
+ effect = "Allow"
+ actions = [
+ "rds-data:ExecuteStatement",
+ "rds-data:BatchExecuteStatement",
+ "rds-data:BeginTransaction",
+ "rds-data:CommitTransaction",
+ "rds-data:RollbackTransaction"
+ ]
+ resources = [
+ aws_db_instance.rds_postgres.arn
+ ]
+ }
+
+ statement {
+ sid = "AllowEC2Perms"
+ effect = "Allow"
+ actions = [
+ "ec2:DescribeNetworkInterfaces",
+ "ec2:CreateNetworkInterface",
+ "ec2:DeleteNetworkInterface",
+ "ec2:DescribeInstances",
+ "ec2:AttachNetworkInterface"
+ ]
+ resources = ["*"]
+ }
+}
+
+resource "aws_iam_policy" "policy_for_execution_role" {
+ name = "${var.project_name}-ingestion-lambda-execution-role-permissions"
+ description = "Policy granting necessary permissions to Lambda execution instance"
+ policy = data.aws_iam_policy_document.permissions_for_execution_role.json
+}
+
+resource "aws_iam_role_policy_attachment" "LambdaExecutionRolePolicyAttachment" {
+ policy_arn = aws_iam_policy.policy_for_execution_role.arn
+ role = aws_iam_role.lambda_execution_role.name
+}
+
+# Step function role
+
+data "aws_iam_policy_document" "sf_assume_role" {
+ statement {
+ effect = "Allow"
+ principals {
+ identifiers = ["states.amazonaws.com"]
+ type = "Service"
+ }
+ actions = ["sts:AssumeRole"]
+ }
+}
+
+resource "aws_iam_role" "sfn_role" {
+ name = "${var.project_name}-ingestion-sf-execution-role"
+ path = "/"
+ permissions_boundary = var.permissions_boundary_arn
+ assume_role_policy = data.aws_iam_policy_document.sf_assume_role.json
+}
+
+data "aws_iam_policy_document" "sfn_policy_document" {
+ statement {
+ effect = "Allow"
+ actions = [
+ "lambda:InvokeFunction"
+ ]
+ resources = [
+ aws_lambda_function.ingestion-lambda.arn,
+ aws_lambda_function.migration-lambda.arn
+ ]
+ }
+}
+
+resource "aws_iam_role_policy" "sfn_policy" {
+ name = "${var.project_name}-ingestion-sf-execution-policy"
+ role = aws_iam_role.sfn_role.id
+ policy = data.aws_iam_policy_document.sfn_policy_document.json
+}
+
+# Cloudwatch role
+
+data "aws_iam_policy_document" "cloudwatch_assume_role" {
+ statement {
+ effect = "Allow"
+ principals {
+ identifiers = ["events.amazonaws.com"]
+ type = "Service"
+ }
+ actions = ["sts:AssumeRole"]
+ }
+}
+
+resource "aws_iam_role" "cloudwatch_role" {
+ name = "${var.project_name}-ingestion-cloud-watch-role"
+ path = "/"
+ permissions_boundary = var.permissions_boundary_arn
+ assume_role_policy = data.aws_iam_policy_document.cloudwatch_assume_role.json
+}
+
+data "aws_iam_policy_document" "cloudwatch_policy_document" {
+ statement {
+ effect = "Allow"
+ actions = [
+ "states:StartExecution"
+ ]
+ resources = [
+ aws_sfn_state_machine.ingestion-step-function.arn
+ ]
+ }
+}
+
+resource "aws_iam_role_policy" "cloudwatch_policy" {
+ name = "${var.project_name}-cloudwatch-event-policy"
+ role = aws_iam_role.cloudwatch_role.id
+ policy = data.aws_iam_policy_document.cloudwatch_policy_document.json
+}
diff --git a/infrastructure/ingestion/aws/lambda.tf b/infrastructure/ingestion/aws/lambda.tf
new file mode 100644
index 0000000..c7d7b03
--- /dev/null
+++ b/infrastructure/ingestion/aws/lambda.tf
@@ -0,0 +1,100 @@
+resource "null_resource" "ingestion_lambda_build" {
+ provisioner "local-exec" {
+ command = "./package.sh"
+ working_dir = "${local.ingestion_lambda_dir}/"
+ }
+
+ triggers = {
+ always_run = timestamp()
+ }
+}
+
+data "local_file" "ingestion_lambda_build" {
+ filename = local.ingestion_lambda_archive
+ depends_on = [null_resource.ingestion_lambda_build]
+}
+
+resource "aws_lambda_function" "ingestion-lambda" {
+ function_name = "${var.project_name}-ingestion-lambda"
+ role = aws_iam_role.lambda_execution_role.arn
+ architectures = ["arm64"]
+ runtime = "python3.9"
+ handler = "ingestion.handler"
+ timeout = 900 # 15 minutes
+ memory_size = 512 # 512 MB
+ filename = local.ingestion_lambda_archive
+ source_code_hash = data.local_file.ingestion_lambda_build.content_sha256
+
+ vpc_config {
+ subnet_ids = [data.aws_subnet.selected.id]
+ security_group_ids = [aws_security_group.lambda_sg.id]
+ }
+
+ ephemeral_storage {
+ size = 1024 # 1 GB
+ }
+
+ environment {
+ variables = {
+ BUCKET_NAME = var.s3_bucket_name
+ DB_URL = local.db_url
+ }
+ }
+
+ depends_on = [
+ data.local_file.ingestion_lambda_build,
+ aws_iam_role.lambda_execution_role,
+ aws_iam_policy.policy_for_execution_role,
+ aws_iam_role_policy_attachment.LambdaExecutionRolePolicyAttachment
+ ]
+}
+
+resource "null_resource" "migration_lambda_build" {
+ provisioner "local-exec" {
+ command = "./package.sh"
+ working_dir = "${local.migration_lambda_dir}/"
+ }
+
+ triggers = {
+ always_run = timestamp()
+ }
+}
+
+data "local_file" "migration_lambda_build" {
+ filename = local.migration_lambda_archive
+ depends_on = [null_resource.migration_lambda_build]
+}
+
+resource "aws_lambda_function" "migration-lambda" {
+ function_name = "${var.project_name}-migration-lambda"
+ role = aws_iam_role.lambda_execution_role.arn
+ architectures = ["arm64"]
+ runtime = "python3.9"
+ handler = "migrate.migrate"
+ timeout = 60 # 1 minute
+ memory_size = 512 # 512 MB
+ filename = local.migration_lambda_archive
+ source_code_hash = data.local_file.migration_lambda_build.content_sha256
+
+ vpc_config {
+ subnet_ids = [data.aws_subnet.selected.id]
+ security_group_ids = [aws_security_group.lambda_sg.id]
+ }
+
+ ephemeral_storage {
+ size = 512 # 512 MB
+ }
+
+ environment {
+ variables = {
+ DB_URL = local.db_url
+ }
+ }
+
+ depends_on = [
+ data.local_file.migration_lambda_build,
+ aws_iam_role.lambda_execution_role,
+ aws_iam_policy.policy_for_execution_role,
+ aws_iam_role_policy_attachment.LambdaExecutionRolePolicyAttachment
+ ]
+}
diff --git a/infrastructure/ingestion/aws/lambda/ingestion/Dockerfile b/infrastructure/ingestion/aws/lambda/ingestion/Dockerfile
new file mode 100644
index 0000000..b81d74f
--- /dev/null
+++ b/infrastructure/ingestion/aws/lambda/ingestion/Dockerfile
@@ -0,0 +1,12 @@
+FROM python@sha256:320a7a4250aba4249f458872adecf92eea88dc6abd2d76dc5c0f01cac9b53990
+
+RUN pip install poetry==1.8.3 --no-cache-dir
+
+WORKDIR /app
+
+COPY . /app/
+
+RUN poetry self add poetry-plugin-lambda-build \
+ && poetry self add poetry-plugin-export \
+ && poetry lock --no-update \
+ && poetry build-lambda
diff --git a/infrastructure/ingestion/aws/lambda/ingestion/README.md b/infrastructure/ingestion/aws/lambda/ingestion/README.md
new file mode 100644
index 0000000..2a8579c
--- /dev/null
+++ b/infrastructure/ingestion/aws/lambda/ingestion/README.md
@@ -0,0 +1,37 @@
+# Ingestion
+
+This directory contains data ingestion lambda. The Lambda is invoked by a Step Function.
+
+The packaging process uses the Poetry Lambda plugin and Docker to generate Lambda packages for the correct platform. This is automated when applying Terraform.
+
+Lambda takes a set of actions as input. Each action performs a specific function.
+
+## Lambda Actions
+
+- `list_files` : This action list files in a S3 bucket at a give prefix
+ Example:
+ ```json
+ {
+ "action": "list_files",
+ "prefix": "secrets-finder/scheduled-scans/results/"
+ }
+ ```
+- `ingest_findings` : This action read a given `.json` file and create new records in `findings`, `scans` and `jobs` table. Corresponding file is deleted from S3 on successful ingestion
+ Example:
+ ```json
+ {
+ "action": "ingest_findings",
+ "file_key": "secrets-finder/scheduled-scans/results/7eb4d1ab-ac6a-4b84-a18d-4bd944d4ef2a.json"
+ }
+ ```
+
+## Add New Ingestion
+
+Creating a new ingestion is a 4 step process.
+
+1. Create necessary DB migration version under `migrations` directory. Refer [Create New Revisions](../../../../../migrations/README.md#creating-new-revision)
+2. Create a new ingestion script under `modules` directory.
+3. Register new ingestion with an action in `ingestion.py` under `ingestion_callback_mapping`
+4. Add a new branch in [step function definition](../../configuration/ingestion_sfn_definition.json).
+
+Use `terraform apply` to build and deploy the Lambda. Once deployed, the next Step Function invocation will automatically trigger the new ingestion.
diff --git a/infrastructure/ingestion/aws/lambda/ingestion/ingestion.py b/infrastructure/ingestion/aws/lambda/ingestion/ingestion.py
new file mode 100644
index 0000000..442b9c7
--- /dev/null
+++ b/infrastructure/ingestion/aws/lambda/ingestion/ingestion.py
@@ -0,0 +1,68 @@
+import os
+from typing import List, Dict, Any, Callable, Union
+import logging
+from modules.common.s3 import S3
+from modules.findings_ingestion import ingest_findings
+
+logging.basicConfig(
+ level=logging.INFO,
+ format="%(asctime)s - %(levelname)s - %(message)s",
+ handlers=[logging.StreamHandler()],
+)
+
+bucket_name: str = os.environ.get("BUCKET_NAME")
+db_url: str = os.environ.get("DB_URL")
+
+ingestion_callback_mapping: Dict[str, Callable[[str, str, str], bool]] = {
+ "ingest_findings": ingest_findings
+}
+
+
+def list_files(prefix: str) -> Dict[str, Union[int, Dict[str, List[str]]]]:
+ s3 = S3(bucket_name)
+ files = s3.list_files(prefix)
+ return {"statusCode": 200, "body": {"files": files}}
+
+
+def handler(event: Dict[str, Any], _) -> Dict[str, Any]:
+ """
+ Handle the Lambda function invocation.
+
+ Args:
+ event (Dict[str, Any]): The event data passed to the Lambda function.
+ _ (Any): The context object representing the runtime information.
+
+ Returns:
+ Dict[str, Any]: The response data returned by the Lambda function.
+
+ Raises:
+ ValueError: If the request is invalid or the action is not supported.
+ """
+ action: str = event.get("action")
+
+ if action == "list_files":
+ prefix: str = event.get("prefix")
+ if not prefix:
+ logging.error("missing prefix in request for action list_files")
+ raise ValueError("Invalid request")
+
+ response: Dict[str, Union[int, Dict[str, List[str]]]] = list_files(prefix)
+ return response
+
+ elif action in ingestion_callback_mapping:
+ file_key: str = event.get("file_key")
+ if not file_key:
+ logging.error("missing file_key in request for action ingest_findings")
+ raise ValueError("Invalid request")
+
+ status: bool = ingestion_callback_mapping[action](db_url, bucket_name, file_key)
+
+ if not status:
+ logging.error("Error ingesting data")
+ raise ValueError("Error ingesting data")
+
+ return {"statusCode": 200, "body": {"success": status}}
+
+ else:
+ logging.error(f"Invalid action: {action}")
+ raise ValueError("Invalid request")
diff --git a/infrastructure/ingestion/aws/lambda/ingestion/modules/__init__.py b/infrastructure/ingestion/aws/lambda/ingestion/modules/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/infrastructure/ingestion/aws/lambda/ingestion/modules/common/__init__.py b/infrastructure/ingestion/aws/lambda/ingestion/modules/common/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/infrastructure/ingestion/aws/lambda/ingestion/modules/common/s3.py b/infrastructure/ingestion/aws/lambda/ingestion/modules/common/s3.py
new file mode 100644
index 0000000..a512dd6
--- /dev/null
+++ b/infrastructure/ingestion/aws/lambda/ingestion/modules/common/s3.py
@@ -0,0 +1,123 @@
+import os
+import tempfile
+import boto3
+from typing import List, Tuple
+
+
+class S3:
+ """
+ Represents an S3 client for interacting with an S3 bucket.
+
+ Args:
+ bucket_name (str): The name of the S3 bucket.
+
+ Attributes:
+ client (boto3.client): The S3 client.
+ bucket_name (str): The name of the S3 bucket.
+
+ """
+
+ client: boto3.client = None
+ bucket_name: str = None
+
+ def __init__(self, bucket_name: str) -> None:
+ """
+ Initializes the S3 client.
+
+ Args:
+ bucket_name (str): The name of the S3 bucket.
+
+ """
+ self.client = boto3.client("s3")
+ self.bucket_name = bucket_name
+
+ def list_files(self, prefix: str) -> List[str]:
+ """
+ Lists all the files in the S3 bucket with the specified prefix.
+
+ Args:
+ prefix (str): The prefix to filter the files.
+
+ Returns:
+ List[str]: A list of file keys.
+
+ """
+ keys: List[str] = []
+ continuation_token: str = None
+
+ if not prefix.endswith("/"):
+ prefix += "/"
+
+ while True:
+ kwargs: dict = {
+ "Bucket": self.bucket_name,
+ "Prefix": prefix,
+ "Delimiter": "/",
+ }
+
+ if continuation_token:
+ kwargs["ContinuationToken"] = continuation_token
+
+ response: dict = self.client.list_objects_v2(**kwargs)
+ contents: List[dict] = response.get("Contents", [])
+ _keys: List[str] = [
+ content["Key"]
+ for content in contents
+ if not content["Key"].endswith("/")
+ ]
+ keys.extend(_keys)
+
+ if not response.get("IsTruncated"):
+ break
+
+ continuation_token = response.get("NextContinuationToken")
+
+ return keys
+
+ def download_file(self, file_key: str) -> str:
+ """
+ Downloads the file with the specified key from the bucket.
+
+ Args:
+ file_key (str): The key of the file to download.
+
+ Returns:
+ str: The local path of the downloaded file.
+
+ """
+ file_name: str = os.path.basename(file_key)
+ local_path: str = os.path.join(tempfile.gettempdir(), file_name)
+ self.client.download_file(self.bucket_name, file_key, local_path)
+ return local_path
+
+ def download_first_file(self, prefix: str) -> Tuple[str, str]:
+ """
+ Downloads the first file with the specified prefix from the bucket.
+
+ Args:
+ prefix (str): The prefix to filter the files.
+
+ Returns:
+ Tuple[str, str]: A tuple containing the file key and the local path of the downloaded file.
+
+ """
+ files = self.list_files(prefix)
+ if not files:
+ return None
+
+ key = files[0]
+ return key, self.download_file(files[0])
+
+ def delete_file(self, file_key: str) -> bool:
+ """
+ Deletes the file with the specified key from the bucket.
+
+ Args:
+ file_key (str): The key of the file to delete.
+
+ Returns:
+ bool: True if the file was successfully deleted, False otherwise.
+
+ """
+ self.client.delete_object(Bucket=self.bucket_name, Key=file_key)
+ return True
diff --git a/infrastructure/ingestion/aws/lambda/ingestion/modules/common/utils.py b/infrastructure/ingestion/aws/lambda/ingestion/modules/common/utils.py
new file mode 100644
index 0000000..c61ddd0
--- /dev/null
+++ b/infrastructure/ingestion/aws/lambda/ingestion/modules/common/utils.py
@@ -0,0 +1 @@
+DATE_TIME_FORMAT: str = "%Y-%m-%dT%H:%M:%S.%f"
diff --git a/infrastructure/ingestion/aws/lambda/ingestion/modules/findings_ingestion.py b/infrastructure/ingestion/aws/lambda/ingestion/modules/findings_ingestion.py
new file mode 100644
index 0000000..9246eb9
--- /dev/null
+++ b/infrastructure/ingestion/aws/lambda/ingestion/modules/findings_ingestion.py
@@ -0,0 +1,197 @@
+import datetime
+import json
+import logging
+from sqlalchemy import (
+ JSON,
+ VARCHAR,
+ Boolean,
+ Column,
+ Integer,
+ String,
+ DateTime,
+ create_engine,
+)
+from sqlalchemy.orm import sessionmaker, declarative_base
+from modules.common.s3 import S3
+from modules.common.utils import DATE_TIME_FORMAT
+import uuid
+
+Base = declarative_base()
+
+
+class Finding(Base):
+ __tablename__ = "findings"
+ uuid = Column(String, primary_key=True)
+ scan_uuid = Column(String, nullable=False)
+ job_uuid = Column(String, nullable=False)
+ organization = Column(String, nullable=True)
+ scan_context = Column(String, nullable=False)
+ created_on = Column(DateTime, nullable=False)
+ decoder_name = Column(String, nullable=False)
+ detector_name = Column(String, nullable=False)
+ detector_type = Column(Integer, nullable=False)
+ raw = Column(VARCHAR, nullable=False)
+ raw_v2 = Column(VARCHAR, nullable=True)
+ redacted = Column(String, nullable=True)
+ source_name = Column(String, nullable=False)
+ source_type = Column(Integer, nullable=False)
+ verified = Column(Boolean, nullable=False)
+ extra_data = Column(JSON, nullable=True)
+ repository = Column(String, nullable=True)
+ filename = Column(String, nullable=False)
+ commit_hash = Column(String, nullable=True)
+ committer_email = Column(String, nullable=True)
+ commit_timestamp = Column(DateTime, nullable=True)
+ line_number = Column(Integer, nullable=False)
+ is_still_valid = Column(Boolean, nullable=False)
+ last_validated_on = Column(DateTime, nullable=False)
+
+
+class Scans(Base):
+ __tablename__ = "scans"
+ uuid = Column(String, primary_key=True)
+ job_uuid = Column(String, nullable=False)
+ scan_identifier = Column(String, nullable=True)
+ scm = Column(String, nullable=False)
+ organization = Column(String, nullable=True)
+ repository = Column(String, nullable=False)
+ scan_context = Column(String, nullable=False)
+ started_on = Column(DateTime, nullable=False)
+ completed_on = Column(DateTime, nullable=False)
+ status = Column(Integer, nullable=False)
+ scan_mode = Column(String, nullable=False)
+ scan_type = Column(String, nullable=False)
+ # metadata is a reserved attribute name in SQLAlchemy
+ metadata_ = Column("metadata", JSON, nullable=True)
+
+
+class Jobs(Base):
+ __tablename__ = "jobs"
+ uuid = Column(String, primary_key=True)
+ scan_identifier = Column(String, nullable=False)
+ scm = Column(String, nullable=False)
+ scan_context = Column(String, nullable=False)
+ started_on = Column(DateTime, nullable=False)
+ completed_on = Column(DateTime, nullable=False)
+ status = Column(Integer, nullable=False)
+ scan_mode = Column(String, nullable=False)
+ scan_type = Column(String, nullable=False)
+
+
+def ingest_findings(db_url: str, bucket_name: str, file_key: str) -> bool:
+ """
+ Ingests findings from a file downloaded from S3 into a database.
+
+ Args:
+ db_url (str): The URL of the database to connect to.
+ bucket_name (str): The name of the S3 bucket.
+ file_key (str): The key of the file in the S3 bucket.
+
+ Returns:
+ bool: True if the ingestion is successful, False otherwise.
+ """
+ logging.info(f"Downloading file from S3, key: {file_key}, bucket: {bucket_name}")
+ s3 = S3(bucket_name)
+ file_path = s3.download_file(file_key)
+ logging.info(f"File downloaded to {file_path}, key: {file_key}")
+
+ with open(file_path, "r") as file:
+ data = json.load(file)
+
+ if not data:
+ logging.error("No data in the file")
+ return False
+
+ # Create a SQLAlchemy engine to connect to the database
+ engine = create_engine(db_url)
+
+ # Create a session
+ Session = sessionmaker(bind=engine)
+ session = Session()
+
+ job = Jobs(
+ uuid=data["scan_uuid"],
+ scan_identifier=data["scan_identifier"],
+ scm=data["scm"],
+ scan_context=data["scan_context"],
+ started_on=datetime.datetime.strptime(data["start"], DATE_TIME_FORMAT),
+ completed_on=datetime.datetime.strptime(data["end"], DATE_TIME_FORMAT),
+ status=data["status"],
+ scan_type=data["scan_type"],
+ scan_mode=data["scan_mode"],
+ )
+
+ session.add(job)
+
+ for result in data.get("results", []):
+ scan = Scans(
+ uuid=result["scan_uuid"],
+ job_uuid=job.uuid,
+ scan_identifier=job.scan_identifier,
+ scm=job.scm,
+ organization=result["organization"],
+ repository=result["repository"],
+ scan_context=job.scan_context,
+ started_on=datetime.datetime.strptime(result["start"], DATE_TIME_FORMAT),
+ completed_on=datetime.datetime.strptime(result["end"], DATE_TIME_FORMAT),
+ status=result.get("status"),
+ scan_mode=job.scan_mode,
+ scan_type=job.scan_type,
+ metadata_=result.get("metadata", {}),
+ )
+
+ logging.info(f'Ingesting scan: {result["scan_uuid"]}')
+ session.add(scan)
+
+ for finding in result.get("findings", []):
+ source_meta_data = list(
+ finding.get("SourceMetadata", {}).get("Data", {}).values()
+ )[0]
+ finding = Finding(
+ uuid=str(uuid.uuid4()),
+ scan_uuid=result["scan_uuid"],
+ job_uuid=job.uuid,
+ organization=result["organization"],
+ scan_context=job.scan_context,
+ created_on=datetime.datetime.now(),
+ decoder_name=finding["DetectorName"],
+ detector_name=finding["DetectorName"],
+ detector_type=finding["DetectorType"],
+ raw=finding["Raw"],
+ raw_v2=finding.get("RawV2", ""),
+ redacted=finding.get("Redacted", ""),
+ source_name=finding["SourceName"],
+ source_type=finding["SourceType"],
+ verified=finding["Verified"],
+ extra_data=finding.get("ExtraData", {}),
+ repository=result["repository"],
+ filename=source_meta_data["file"],
+ commit_hash=source_meta_data.get("commit"),
+ committer_email=source_meta_data.get("email"),
+ commit_timestamp=(
+ datetime.datetime.strptime(
+ source_meta_data.get("timestamp"), "%Y-%m-%d %H:%M:%S %z"
+ )
+ if source_meta_data.get("timestamp")
+ else None
+ ),
+ line_number=source_meta_data["line"],
+ is_still_valid=finding["Verified"],
+ last_validated_on=datetime.datetime.strptime(
+ result["end"], DATE_TIME_FORMAT
+ ),
+ )
+
+ logging.info(
+ f'Ingesting finding: {finding.uuid} for scan: {result["scan_uuid"]}'
+ )
+ session.add(finding)
+
+ if not s3.delete_file(file_key):
+ logging.error(f"Error deleting file from S3, key: {file_key}")
+ session.rollback()
+ return False
+
+ logging.info(f"Deleted file from S3, key: {file_key}")
+ session.commit()
+ return True
diff --git a/infrastructure/ingestion/aws/lambda/ingestion/package.sh b/infrastructure/ingestion/aws/lambda/ingestion/package.sh
new file mode 100755
index 0000000..2866648
--- /dev/null
+++ b/infrastructure/ingestion/aws/lambda/ingestion/package.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+set -e
+
+docker build -t ingestion-lambda --platform=linux/arm64 -f Dockerfile .
+
+docker run --rm -v $(pwd):/output ingestion-lambda cp /app/ingestion.zip /output/
diff --git a/infrastructure/ingestion/aws/lambda/ingestion/pyproject.toml b/infrastructure/ingestion/aws/lambda/ingestion/pyproject.toml
new file mode 100644
index 0000000..e1c9049
--- /dev/null
+++ b/infrastructure/ingestion/aws/lambda/ingestion/pyproject.toml
@@ -0,0 +1,21 @@
+[tool.poetry]
+name = "ingestion"
+version = "0.1.0"
+description = "Lambda to ingest data into the data lake"
+authors = ["Thomson Reuters "]
+license = "mit"
+readme = "README.md"
+include = ["modules/*.py", "modules/common/*"]
+
+[tool.poetry.dependencies]
+python = "^3.9"
+boto3 = "^1.34.130"
+sqlalchemy = "^2.0.31"
+psycopg2-binary = "^2.9.9"
+
+[tool.poetry-plugin-lambda-build]
+package_artifact_path = "ingestion.zip"
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
diff --git a/infrastructure/ingestion/aws/lambda/migration/Dockerfile b/infrastructure/ingestion/aws/lambda/migration/Dockerfile
new file mode 100644
index 0000000..b81d74f
--- /dev/null
+++ b/infrastructure/ingestion/aws/lambda/migration/Dockerfile
@@ -0,0 +1,12 @@
+FROM python@sha256:320a7a4250aba4249f458872adecf92eea88dc6abd2d76dc5c0f01cac9b53990
+
+RUN pip install poetry==1.8.3 --no-cache-dir
+
+WORKDIR /app
+
+COPY . /app/
+
+RUN poetry self add poetry-plugin-lambda-build \
+ && poetry self add poetry-plugin-export \
+ && poetry lock --no-update \
+ && poetry build-lambda
diff --git a/infrastructure/ingestion/aws/lambda/migration/README.md b/infrastructure/ingestion/aws/lambda/migration/README.md
new file mode 100644
index 0000000..3e2206c
--- /dev/null
+++ b/infrastructure/ingestion/aws/lambda/migration/README.md
@@ -0,0 +1,16 @@
+# Migration Lambda
+
+This directory contains all the necessary scripts to package Migrations (located at the root level) as a Lambda function.
+
+The packaging process uses the Poetry Lambda plugin and leverages Docker to ensure Lambda packages are generated for the correct platform.
+
+## Usage
+
+To package lambda, run following command
+
+```bash
+./package.sh
+```
+
+> [!NOTE]
+> Any changes in Migrations should be automatically detected during the repackaging process.
diff --git a/infrastructure/ingestion/aws/lambda/migration/package.sh b/infrastructure/ingestion/aws/lambda/migration/package.sh
new file mode 100755
index 0000000..3ff9917
--- /dev/null
+++ b/infrastructure/ingestion/aws/lambda/migration/package.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+set -e
+
+ROOT_DIR=$(git rev-parse --show-toplevel)
+
+docker build -t migrate-lambda --platform=linux/arm64 -f Dockerfile $ROOT_DIR/migrations
+
+docker run --rm -v $(pwd):/output migrate-lambda cp /app/migration.zip /output/
diff --git a/infrastructure/ingestion/aws/locals.tf b/infrastructure/ingestion/aws/locals.tf
new file mode 100644
index 0000000..ebf70ba
--- /dev/null
+++ b/infrastructure/ingestion/aws/locals.tf
@@ -0,0 +1,11 @@
+locals {
+ environment = replace(lower(var.environment_type), " ", "-")
+ db_url = "postgresql://${var.rds_username}:${random_password.rds_master_password.result}@${aws_db_instance.rds_postgres.address}"
+ configuration_dir = "${path.module}/configuration"
+ ingestion_lambda_dir = "${path.module}/lambda/ingestion"
+ ingestion_lambda_archive = "${local.ingestion_lambda_dir}/ingestion.zip"
+ migration_lambda_dir = "${path.module}/lambda/migration"
+ migration_lambda_archive = "${local.migration_lambda_dir}/migration.zip"
+ s3_bucket_arn = "arn:aws:s3:::${var.s3_bucket_name}"
+ tags = var.tags
+}
diff --git a/infrastructure/ingestion/aws/outputs.tf b/infrastructure/ingestion/aws/outputs.tf
new file mode 100644
index 0000000..185e483
--- /dev/null
+++ b/infrastructure/ingestion/aws/outputs.tf
@@ -0,0 +1,3 @@
+output "rds_pg_endpoint" {
+ value = aws_db_instance.rds_postgres.endpoint
+}
diff --git a/infrastructure/ingestion/aws/providers.tf b/infrastructure/ingestion/aws/providers.tf
new file mode 100644
index 0000000..d3d1e83
--- /dev/null
+++ b/infrastructure/ingestion/aws/providers.tf
@@ -0,0 +1,23 @@
+terraform {
+ required_version = ">=1.3"
+
+ required_providers {
+ aws = {
+ source = "hashicorp/aws"
+ version = "~> 5.0"
+ }
+ }
+
+ backend "s3" {
+ encrypt = true
+ }
+}
+
+provider "aws" {
+ region = var.aws_region
+ profile = var.aws_profile
+
+ default_tags {
+ tags = local.tags
+ }
+}
diff --git a/infrastructure/ingestion/aws/rds.tf b/infrastructure/ingestion/aws/rds.tf
new file mode 100644
index 0000000..a133b4f
--- /dev/null
+++ b/infrastructure/ingestion/aws/rds.tf
@@ -0,0 +1,18 @@
+resource "aws_db_instance" "rds_postgres" {
+ identifier = "${var.project_name}-rds-postgres"
+ allocated_storage = 10
+ engine = "postgres"
+ engine_version = "16.3"
+ instance_class = "db.t3.micro" # Smallest instance type for PostgreSQL
+ username = var.rds_username
+ password = random_password.rds_master_password.result
+ parameter_group_name = "default.postgres16"
+ db_name = var.rds_db_name
+ skip_final_snapshot = true
+ publicly_accessible = false
+ storage_encrypted = true
+ deletion_protection = true
+ backup_retention_period = 7
+ vpc_security_group_ids = [aws_security_group.rds_sg.id]
+ db_subnet_group_name = var.db_subnet_group_name
+}
diff --git a/infrastructure/ingestion/aws/s3.tfbackend b/infrastructure/ingestion/aws/s3.tfbackend
new file mode 100644
index 0000000..6fa4016
--- /dev/null
+++ b/infrastructure/ingestion/aws/s3.tfbackend
@@ -0,0 +1,5 @@
+bucket = ""
+key = ""
+region = ""
+dynamodb_table = ""
+profile = ""
diff --git a/infrastructure/ingestion/aws/secrets.tf b/infrastructure/ingestion/aws/secrets.tf
new file mode 100644
index 0000000..e43778b
--- /dev/null
+++ b/infrastructure/ingestion/aws/secrets.tf
@@ -0,0 +1,19 @@
+resource "random_password" "rds_master_password" {
+ length = 40
+ special = true
+ min_special = 5
+ override_special = "!#$%^&*()-_=+[]{}<>:?"
+ keepers = {
+ pass_version = 1
+ }
+}
+
+resource "aws_secretsmanager_secret" "rds_master_password" {
+ name = "${var.project_name}-rds-master-password"
+ description = "Master password for RDS instance"
+}
+
+resource "aws_secretsmanager_secret_version" "rds_master_password" {
+ secret_id = aws_secretsmanager_secret.rds_master_password.id
+ secret_string = random_password.rds_master_password.result
+}
diff --git a/infrastructure/ingestion/aws/securitygroups.tf b/infrastructure/ingestion/aws/securitygroups.tf
new file mode 100644
index 0000000..9e23393
--- /dev/null
+++ b/infrastructure/ingestion/aws/securitygroups.tf
@@ -0,0 +1,33 @@
+resource "aws_security_group" "rds_sg" {
+ name = "${var.project_name}-rds-sg"
+ description = "Security group for RDS instance"
+ vpc_id = data.aws_vpc.selected.id
+
+ ingress {
+ from_port = 5432
+ to_port = 5432
+ protocol = "tcp"
+ cidr_blocks = [data.aws_vpc.selected.cidr_block]
+ }
+
+ egress {
+ from_port = 0
+ to_port = 0
+ protocol = "-1"
+ cidr_blocks = ["0.0.0.0/0"]
+ }
+}
+
+
+resource "aws_security_group" "lambda_sg" {
+ name = "${var.project_name}-lambda-sg"
+ description = "Security group for Lambda functions"
+ vpc_id = data.aws_vpc.selected.id
+
+ egress {
+ from_port = 0
+ to_port = 0
+ protocol = "-1"
+ cidr_blocks = ["0.0.0.0/0"]
+ }
+}
diff --git a/infrastructure/ingestion/aws/sfn.tf b/infrastructure/ingestion/aws/sfn.tf
new file mode 100644
index 0000000..540f09e
--- /dev/null
+++ b/infrastructure/ingestion/aws/sfn.tf
@@ -0,0 +1,13 @@
+resource "aws_sfn_state_machine" "ingestion-step-function" {
+ name = "${var.project_name}-ingestion-step-function"
+ role_arn = aws_iam_role.sfn_role.arn
+ definition = templatefile("${local.configuration_dir}/ingestion_sfn_definition.json", {
+ migrate_lambda_arn = "${aws_lambda_function.migration-lambda.arn}",
+ ingestion_lambda_arn = "${aws_lambda_function.ingestion-lambda.arn}"
+ })
+
+ depends_on = [
+ aws_iam_role.sfn_role,
+ aws_iam_role_policy.sfn_policy,
+ ]
+}
diff --git a/infrastructure/ingestion/aws/sts.tf b/infrastructure/ingestion/aws/sts.tf
new file mode 100644
index 0000000..8fc4b38
--- /dev/null
+++ b/infrastructure/ingestion/aws/sts.tf
@@ -0,0 +1 @@
+data "aws_caller_identity" "current" {}
diff --git a/infrastructure/ingestion/aws/terraform.tfvars.example b/infrastructure/ingestion/aws/terraform.tfvars.example
new file mode 100644
index 0000000..85c8a35
--- /dev/null
+++ b/infrastructure/ingestion/aws/terraform.tfvars.example
@@ -0,0 +1,17 @@
+aws_region = ""
+aws_profile = ""
+environment_type = ""
+project_name = ""
+vpc_name = ""
+subnet_name = ""
+db_subnet_group_name = ""
+permissions_boundary_arn = ""
+s3_bucket_name = ""
+tags = {
+ "mytag" = "tag"
+ "mytag2" = "tag2"
+}
+rds_username = ""
+rds_db_name = ""
+ingestion_schedule = ""
+disable_ingestion_schedule = ""
diff --git a/infrastructure/ingestion/aws/variables.tf b/infrastructure/ingestion/aws/variables.tf
new file mode 100644
index 0000000..5b1b594
--- /dev/null
+++ b/infrastructure/ingestion/aws/variables.tf
@@ -0,0 +1,115 @@
+variable "aws_region" {
+ type = string
+ description = "AWS region where to deploy resources"
+
+ validation {
+ condition = can(regex("^(af|ap|ca|eu|me|sa|us)-(central|north|(north(?:east|west))|south|south(?:east|west)|east|west)-\\d+$", var.aws_region))
+ error_message = "You should enter a valid AWS region (https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/Concepts.RegionsAndAvailabilityZones.html)"
+ }
+}
+
+variable "aws_profile" {
+ type = string
+ description = "AWS profile to use for authentication"
+}
+
+variable "environment_type" {
+ type = string
+ description = "Environment type"
+
+ validation {
+ condition = contains(["PRODUCTION", "PRE-PRODUCTION", "QUALITY ASSURANCE", "INTEGRATION TESTING", "DEVELOPMENT", "LAB"], var.environment_type)
+ error_message = "The environment type should be one of the following values: PRODUCTION, PRE-PRODUCTION, QUALITY ASSURANCE, INTEGRATION TESTING, DEVELOPMENT, LAB (case sensitive)"
+ }
+}
+
+variable "vpc_name" {
+ type = string
+ description = "Identifier of the VPC to use for secrets-finder"
+}
+
+variable "subnet_name" {
+ type = string
+ description = "Name of the subnet where to deploy the resources (wildcards are allowed: first match is used)"
+}
+
+variable "db_subnet_group_name" {
+ type = string
+ description = "Name of the RDS subnet group"
+}
+
+variable "tags" {
+ type = map(string)
+ description = "A map of tags to add to the resources"
+
+ validation {
+ condition = alltrue([for v in values(var.tags) : v != ""])
+ error_message = "Tag values must not be empty."
+ }
+}
+
+variable "project_name" {
+ type = string
+ description = "Name of the project"
+ default = "secrets-finder"
+}
+
+variable "permissions_boundary_arn" {
+ type = string
+ description = "ARN of the permissions boundary to use for the IAM role"
+
+ validation {
+ condition = can(regex("^arn:aws:iam::[0-9]{12}:policy\\/([a-zA-Z0-9-_.]+)$", var.permissions_boundary_arn))
+ error_message = "The provided ARN is not a valid ARN for a policy"
+ }
+}
+
+variable "s3_bucket_name" {
+ type = string
+ description = "Name of the S3 bucket to create"
+
+ validation {
+ condition = can(regex("^[a-z0-9.-]{3,63}$", var.s3_bucket_name))
+ error_message = "The S3 bucket name must be a valid string with only a-z0-9.- characters and have a length between 3 and 63"
+ }
+}
+
+variable "rds_username" {
+ type = string
+ description = "Username for the RDS instance"
+ default = "secrets_finder"
+
+ validation {
+ condition = can(regex("^[a-z][a-z0-9_]{1,}$", var.rds_username))
+ error_message = "The RDS username must be a valid string with only a-z0-9_ characters, have a length greater than 1, and not start with a number"
+ }
+}
+
+
+variable "rds_db_name" {
+ type = string
+ description = "Name of the database to create in the RDS instance"
+ default = "secrets_finder"
+
+ validation {
+ condition = can(regex("^[a-z][a-z0-9_]{1,}$", var.rds_db_name))
+ error_message = "The RDS database name must be a valid string with only a-z0-9_ characters, have a length greater than 1, and not start with a number"
+ }
+}
+
+variable "ingestion_schedule" {
+ type = string
+ description = "Cron schedule for the CloudWatch Event Rule"
+ default = "rate(24 hours)"
+
+ validation {
+ condition = can(regex("^(rate|cron)\\(\\d+ (minutes|hours|days)\\)$", var.ingestion_schedule))
+ error_message = "The ingestion schedule should be in the format 'rate(n minutes|hours|days)' or 'cron(expression)', where n is a positive integer"
+ }
+}
+
+variable "disable_ingestion_schedule" {
+ type = bool
+ description = "Disable the ingestion schedule"
+ default = false
+}
diff --git a/infrastructure/ingestion/aws/vpc.tf b/infrastructure/ingestion/aws/vpc.tf
new file mode 100644
index 0000000..041279b
--- /dev/null
+++ b/infrastructure/ingestion/aws/vpc.tf
@@ -0,0 +1,22 @@
+data "aws_vpc" "selected" {
+ filter {
+ name = "tag:Name"
+ values = [var.vpc_name]
+ }
+}
+
+data "aws_subnets" "default" {
+ filter {
+ name = "tag:Name"
+ values = [var.subnet_name]
+ }
+}
+
+data "aws_subnet" "selected" {
+ id = element(sort(data.aws_subnets.default.ids), 0)
+}
+
+data "aws_security_group" "default" {
+ vpc_id = data.aws_vpc.selected.id
+ name = "default"
+}