diff --git a/infra/api/app-config/dev.tf b/infra/api/app-config/dev.tf index 0ecb76699..a24894689 100644 --- a/infra/api/app-config/dev.tf +++ b/infra/api/app-config/dev.tf @@ -14,19 +14,6 @@ module "dev_config" { # https://docs.aws.amazon.com/opensearch-service/latest/developerguide/what-is.html#choosing-version search_engine_version = "OpenSearch_2.15" - # Runs, but with everything disabled. - # See api/src/data_migration/command/load_transform.py for argument specifications. - load_transform_args = [ - "poetry", - "run", - "flask", - "data-migration", - "load-transform", - "--no-load", - "--no-transform", - "--no-set-current", - ] - service_override_extra_environment_variables = { } } diff --git a/infra/api/app-config/env-config/environment-variables.tf b/infra/api/app-config/env-config/environment-variables.tf index 1897fb343..5a7559772 100644 --- a/infra/api/app-config/env-config/environment-variables.tf +++ b/infra/api/app-config/env-config/environment-variables.tf @@ -3,6 +3,7 @@ locals { # This is a map rather than a list so that variables can be easily # overridden per environment using terraform's `merge` function default_extra_environment_variables = { + FLASK_APP = "src.app:create_app()" # Example environment variables # WORKER_THREADS_COUNT = 4 # LOG_LEVEL = "info" diff --git a/infra/api/app-config/env-config/outputs.tf b/infra/api/app-config/env-config/outputs.tf index c9375fa4f..d2c785fc8 100644 --- a/infra/api/app-config/env-config/outputs.tf +++ b/infra/api/app-config/env-config/outputs.tf @@ -37,6 +37,10 @@ output "service_config" { } } +output "scheduled_jobs" { + value = local.scheduled_jobs +} + output "incident_management_service_integration" { value = var.has_incident_management_service ? { integration_url_param_name = "/monitoring/${var.app_name}/${var.environment}/incident-management-integration-url" @@ -46,7 +50,3 @@ output "incident_management_service_integration" { output "domain" { value = var.domain } - -output "load_transform_args" { - value = var.load_transform_args -} diff --git a/infra/api/app-config/env-config/scheduled_jobs.tf b/infra/api/app-config/env-config/scheduled_jobs.tf new file mode 100644 index 000000000..9950d79ae --- /dev/null +++ b/infra/api/app-config/env-config/scheduled_jobs.tf @@ -0,0 +1,54 @@ +locals { + # The `task_command` is what you want your scheduled job to run, for example: ["poetry", "run", "flask"]. + # Schedule expression defines the frequency at which the job should run. + # The syntax for `schedule_expression` is explained in the following documentation: + # https://docs.aws.amazon.com/eventbridge/latest/userguide/eb-scheduled-rule-pattern.html + # The `state` is the state of the scheduled job. It can be either "ENABLED" or "DISABLED". + + # See api/src/data_migration/command/load_transform.py for argument specifications. + load-transform-args = { + # Runs, but with everything disabled. + dev = [ + "poetry", + "run", + "flask", + "data-migration", + "load-transform", + "--no-load", + "--no-transform", + "--no-set-current", + ], + staging = [ + "poetry", + "run", + "flask", + "data-migration", + "load-transform", + "--load", + "--transform", + "--set-current", + ], + prod = [ + "poetry", + "run", + "flask", + "data-migration", + "load-transform", + "--load", + "--transform", + "--set-current", + ], + } + scheduled_jobs = { + copy-oracle-data = { + task_command = ["poetry", "run", "flask", "data-migration", "copy-oracle-data"] + schedule_expression = "rate(2 minutes)" + state = "ENABLED" + } + load-transform = { + task_command = local.load-transform-args[var.environment] + schedule_expression = "rate(1 days)" + state = "ENABLED" + } + } +} diff --git a/infra/api/app-config/env-config/variables.tf b/infra/api/app-config/env-config/variables.tf index 8d7ab88b8..2749d34d5 100644 --- a/infra/api/app-config/env-config/variables.tf +++ b/infra/api/app-config/env-config/variables.tf @@ -85,7 +85,3 @@ variable "service_override_extra_environment_variables" { EOT default = {} } - -variable "load_transform_args" { - type = list(string) -} diff --git a/infra/api/app-config/prod.tf b/infra/api/app-config/prod.tf index 2d9fd1993..d16954068 100644 --- a/infra/api/app-config/prod.tf +++ b/infra/api/app-config/prod.tf @@ -23,18 +23,6 @@ module "prod_config" { # https://docs.aws.amazon.com/opensearch-service/latest/developerguide/what-is.html#choosing-version search_engine_version = "OpenSearch_2.15" - # See api/src/data_migration/command/load_transform.py for argument specifications. - load_transform_args = [ - "poetry", - "run", - "flask", - "data-migration", - "load-transform", - "--load", - "--transform", - "--set-current", - ] - service_override_extra_environment_variables = { } } diff --git a/infra/api/app-config/staging.tf b/infra/api/app-config/staging.tf index a14b8b73b..c49e0ff6b 100644 --- a/infra/api/app-config/staging.tf +++ b/infra/api/app-config/staging.tf @@ -14,18 +14,6 @@ module "staging_config" { # https://docs.aws.amazon.com/opensearch-service/latest/developerguide/what-is.html#choosing-version search_engine_version = "OpenSearch_2.15" - # See api/src/data_migration/command/load_transform.py for argument specifications. - load_transform_args = [ - "poetry", - "run", - "flask", - "data-migration", - "load-transform", - "--load", - "--transform", - "--set-current", - ] - service_override_extra_environment_variables = { } } diff --git a/infra/api/service/main.tf b/infra/api/service/main.tf index 69a9e2a6c..889ab1a7a 100644 --- a/infra/api/service/main.tf +++ b/infra/api/service/main.tf @@ -127,6 +127,8 @@ module "service" { app_access_policy_arn = data.aws_iam_policy.app_db_access_policy[0].arn migrator_access_policy_arn = data.aws_iam_policy.migrator_db_access_policy[0].arn + scheduled_jobs = local.environment_config.scheduled_jobs + db_vars = module.app_config.has_database ? { security_group_ids = data.aws_rds_cluster.db_cluster[0].vpc_security_group_ids connection_info = { diff --git a/infra/api/service/sfn_copy_oracle_data.tf b/infra/api/service/sfn_copy_oracle_data.tf deleted file mode 100644 index fd104da1d..000000000 --- a/infra/api/service/sfn_copy_oracle_data.tf +++ /dev/null @@ -1,100 +0,0 @@ -# docs: https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_log_group -resource "aws_cloudwatch_log_group" "copy_oracle_data" { - name_prefix = "/aws/vendedlogs/states/${local.service_name}-copy-oracle-data" - - # Conservatively retain logs for 5 years. - # Looser requirements may allow shorter retention periods - retention_in_days = 1827 - - # checkov:skip=CKV_AWS_158:skip requirement to encrypt with customer managed KMS key -} - -# docs: https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/sfn_state_machine -resource "aws_sfn_state_machine" "copy_oracle_data" { - - name = "${local.service_name}-copy-oracle-data" - role_arn = module.service.task_role_arn - - definition = jsonencode({ - "StartAt" : "ExecuteECSTask", - "States" : { - "ExecuteECSTask" : { - "Type" : "Task", - # docs: https://docs.aws.amazon.com/step-functions/latest/dg/connect-ecs.html - "Resource" : "arn:aws:states:::ecs:runTask.sync", - "Parameters" : { - "Cluster" : module.service.cluster_arn, - "TaskDefinition" : module.service.task_definition_arn, - "LaunchType" : "FARGATE", - "NetworkConfiguration" : { - "AwsvpcConfiguration" : { - "Subnets" : data.aws_subnets.private.ids, - "SecurityGroups" : [module.service.app_security_group_id], - } - }, - "Overrides" : { - "ContainerOverrides" : [ - { - "Name" : local.service_name, - "Environment" : [ - { - "Name" : "FLASK_APP", - "Value" : "src.app:create_app()", - } - ] - "Command" : [ - "poetry", - "run", - "flask", - "data-migration", - "copy-oracle-data", - ] - } - ] - } - }, - "End" : true - } - } - }) - - logging_configuration { - log_destination = "${aws_cloudwatch_log_group.copy_oracle_data.arn}:*" - include_execution_data = true - level = "ERROR" - } - - tracing_configuration { - enabled = true - } -} - -# docs: https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/scheduler_schedule_group -resource "aws_scheduler_schedule_group" "copy_oracle_data" { - name = "${local.service_name}-copy-oracle-data" -} - -# docs: https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/scheduler_schedule -resource "aws_scheduler_schedule" "copy_oracle_data" { - # checkov:skip=CKV_AWS_297:Ignore the managed customer KMS key requirement for now - - name = "${local.service_name}-copy-oracle-data" - state = "ENABLED" - group_name = aws_scheduler_schedule_group.copy_oracle_data.id - schedule_expression = "rate(2 minutes)" - schedule_expression_timezone = "US/Eastern" - - flexible_time_window { - mode = "OFF" - } - - # target is the state machine - target { - arn = aws_sfn_state_machine.copy_oracle_data.arn - role_arn = module.service.task_role_arn - - retry_policy { - maximum_retry_attempts = 0 # dont retry, just wait for the next execution - } - } -} diff --git a/infra/api/service/sfn_load_transform.tf b/infra/api/service/sfn_load_transform.tf deleted file mode 100644 index 194cc68dc..000000000 --- a/infra/api/service/sfn_load_transform.tf +++ /dev/null @@ -1,94 +0,0 @@ -# docs: https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_log_group -resource "aws_cloudwatch_log_group" "load_transform" { - name_prefix = "/aws/vendedlogs/states/${local.service_name}-load-transform" - - # Conservatively retain logs for 5 years. - # Looser requirements may allow shorter retention periods - retention_in_days = 1827 - - # checkov:skip=CKV_AWS_158:skip requirement to encrypt with customer managed KMS key -} - -# docs: https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/sfn_state_machine -resource "aws_sfn_state_machine" "load_transform" { - - name = "${local.service_name}-load-transform" - role_arn = module.service.task_role_arn - - definition = jsonencode({ - "StartAt" : "ExecuteECSTask", - "States" : { - "ExecuteECSTask" : { - "Type" : "Task", - # docs: https://docs.aws.amazon.com/step-functions/latest/dg/connect-ecs.html - "Resource" : "arn:aws:states:::ecs:runTask.sync", - "Parameters" : { - "Cluster" : module.service.cluster_arn, - "TaskDefinition" : module.service.task_definition_arn, - "LaunchType" : "FARGATE", - "NetworkConfiguration" : { - "AwsvpcConfiguration" : { - "Subnets" : data.aws_subnets.private.ids, - "SecurityGroups" : [module.service.app_security_group_id], - } - }, - "Overrides" : { - "ContainerOverrides" : [ - { - "Name" : local.service_name, - "Environment" : [ - { - "Name" : "FLASK_APP", - "Value" : "src.app:create_app()", - } - ] - "Command" : module.app_config.environment_configs[var.environment_name].load_transform_args - } - ] - } - }, - "End" : true - } - } - }) - - logging_configuration { - log_destination = "${aws_cloudwatch_log_group.load_transform.arn}:*" - include_execution_data = true - level = "ERROR" - } - - tracing_configuration { - enabled = true - } -} - -# docs: https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/scheduler_schedule_group -resource "aws_scheduler_schedule_group" "load_transform" { - name = "${local.service_name}-load-transform" -} - -# docs: https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/scheduler_schedule -resource "aws_scheduler_schedule" "load_transform" { - # checkov:skip=CKV_AWS_297:Ignore the managed customer KMS key requirement for now - - name = "${local.service_name}-load-transform" - state = "ENABLED" - group_name = aws_scheduler_schedule_group.load_transform.id - schedule_expression = "rate(1 days)" - schedule_expression_timezone = "US/Eastern" - - flexible_time_window { - mode = "OFF" - } - - # target is the state machine - target { - arn = aws_sfn_state_machine.load_transform.arn - role_arn = module.service.task_role_arn - - retry_policy { - maximum_retry_attempts = 0 # dont retry, just wait for the next execution - } - } -} diff --git a/infra/modules/service/access-control.tf b/infra/modules/service/access-control.tf index 07a37d523..3f10d1162 100644 --- a/infra/modules/service/access-control.tf +++ b/infra/modules/service/access-control.tf @@ -27,14 +27,13 @@ data "aws_iam_policy_document" "ecs_tasks_assume_role_policy" { ] principals { type = "Service" - identifiers = ["ecs-tasks.amazonaws.com", "states.amazonaws.com", "scheduler.amazonaws.com"] + identifiers = ["ecs-tasks.amazonaws.com"] } } } data "aws_iam_policy_document" "task_executor" { # checkov:skip=CKV_AWS_111:Ignore some IAM policy checks for the task executor role - # checkov:skip=CKV_AWS_356:TODO: https://github.com/HHS/simpler-grants-gov/issues/2365 # Allow ECS to log to Cloudwatch. statement { @@ -48,75 +47,6 @@ data "aws_iam_policy_document" "task_executor" { ] } - # via https://docs.aws.amazon.com/step-functions/latest/dg/cw-logs.html - statement { - sid = "UnscopeLogsPermissions" - actions = [ - "logs:CreateLogDelivery", - "logs:CreateLogStream", - "logs:GetLogDelivery", - "logs:UpdateLogDelivery", - "logs:DeleteLogDelivery", - "logs:ListLogDeliveries", - "logs:PutLogEvents", - "logs:PutResourcePolicy", - "logs:DescribeResourcePolicies", - "logs:DescribeLogGroups", - ] - resources = ["*"] - } - - # via https://docs.aws.amazon.com/step-functions/latest/dg/xray-iam.html - statement { - sid = "StepFunctionsXRay" - actions = [ - "xray:PutTraceSegments", - "xray:PutTelemetryRecords", - "xray:GetSamplingRules", - "xray:GetSamplingTargets" - ] - resources = ["*"] - } - - statement { - sid = "StepFunctionsRunTask" - actions = [ - "ecs:RunTask", - "ecs:StopTask", - "ecs:DescribeTasks", - ] - resources = ["*"] - } - - statement { - sid = "StepFunctionsPassRole" - actions = [ - "iam:PassRole", - ] - resources = [ - aws_iam_role.app_service.arn, - aws_iam_role.task_executor.arn, - ] - } - - statement { - sid = "StepFunctionsEvents" - actions = [ - "events:PutTargets", - "events:PutRule", - "events:DescribeRule", - ] - resources = ["*"] - } - - statement { - sid = "StepFunctionsStartExecution" - actions = [ - "states:StartExecution", - ] - resources = ["arn:aws:states:*:*:stateMachine:*"] - } - # Allow ECS to authenticate with ECR statement { sid = "ECRAuth" diff --git a/infra/modules/service/scheduled_jobs.tf b/infra/modules/service/scheduled_jobs.tf new file mode 100644 index 000000000..53693b1a1 --- /dev/null +++ b/infra/modules/service/scheduled_jobs.tf @@ -0,0 +1,86 @@ +resource "aws_scheduler_schedule" "scheduled_jobs" { + for_each = var.scheduled_jobs + + # TODO(https://github.com/navapbc/template-infra/issues/164) Encrypt with customer managed KMS key + # checkov:skip=CKV_AWS_297:Encrypt with customer key in future work + + name = "${var.service_name}-${each.key}" + state = each.value.state + schedule_expression = each.value.schedule_expression + schedule_expression_timezone = "Etc/UTC" + + flexible_time_window { + mode = "OFF" + } + + # target is the state machine + target { + arn = aws_sfn_state_machine.scheduled_jobs[each.key].arn + role_arn = aws_iam_role.scheduler.arn + + retry_policy { + maximum_retry_attempts = 0 + } + } +} + +resource "aws_sfn_state_machine" "scheduled_jobs" { + for_each = var.scheduled_jobs + + name = "${var.service_name}-${each.key}" + role_arn = aws_iam_role.workflow_orchestrator.arn + + definition = jsonencode({ + "StartAt" : "RunTask", + "States" : { + "RunTask" : { + "Type" : "Task", + # docs: https://docs.aws.amazon.com/step-functions/latest/dg/connect-ecs.html + "Resource" : "arn:aws:states:::ecs:runTask.sync", + "Parameters" : { + "Cluster" : aws_ecs_cluster.cluster.arn, + "TaskDefinition" : aws_ecs_task_definition.app.arn, + "LaunchType" : "FARGATE", + "NetworkConfiguration" : { + "AwsvpcConfiguration" : { + "Subnets" : var.private_subnet_ids, + "SecurityGroups" : [aws_security_group.app.id], + } + }, + "Overrides" : { + "ContainerOverrides" : [ + { + "Name" : var.service_name, + "Command" : each.value.task_command + } + ] + } + }, + "End" : true + } + } + }) + + logging_configuration { + log_destination = "${aws_cloudwatch_log_group.scheduled_jobs[each.key].arn}:*" + include_execution_data = true + level = "ERROR" + } + + tracing_configuration { + enabled = true + } +} + +resource "aws_cloudwatch_log_group" "scheduled_jobs" { + for_each = var.scheduled_jobs + + name_prefix = "/aws/vendedlogs/states/${var.service_name}-${each.key}" + + # Conservatively retain logs for 5 years. + # Looser requirements may allow shorter retention periods + retention_in_days = 1827 + + # TODO(https://github.com/navapbc/template-infra/issues/164) Encrypt with customer managed KMS key + # checkov:skip=CKV_AWS_158:Encrypt service logs with customer key in future work +} diff --git a/infra/modules/service/scheduler_role.tf b/infra/modules/service/scheduler_role.tf new file mode 100644 index 000000000..769d7b364 --- /dev/null +++ b/infra/modules/service/scheduler_role.tf @@ -0,0 +1,61 @@ +#---------------------- +# Schedule Manager Role +#---------------------- +# This role and policy are used by EventBridge to manage the scheduled jobs. + +resource "aws_iam_role" "scheduler" { + name = "${var.service_name}-scheduler" + managed_policy_arns = [aws_iam_policy.scheduler.arn] + assume_role_policy = data.aws_iam_policy_document.scheduler_assume_role.json +} + +data "aws_iam_policy_document" "scheduler_assume_role" { + statement { + actions = ["sts:AssumeRole"] + principals { + type = "Service" + identifiers = ["scheduler.amazonaws.com"] + } + } +} + +resource "aws_iam_policy" "scheduler" { + name = "${var.service_name}-scheduler" + policy = data.aws_iam_policy_document.scheduler.json +} + +data "aws_iam_policy_document" "scheduler" { + + statement { + sid = "StepFunctionsEvents" + actions = [ + "events:PutTargets", + "events:PutRule", + "events:DescribeRule", + ] + resources = ["arn:aws:events:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:rule/StepFunctionsGetEventsForStepFunctionsExecutionRule"] + } + + dynamic "statement" { + for_each = aws_sfn_state_machine.scheduled_jobs + + content { + actions = [ + "states:StartExecution", + ] + resources = [statement.value.arn] + } + } + + dynamic "statement" { + for_each = aws_sfn_state_machine.scheduled_jobs + + content { + actions = [ + "states:DescribeExecution", + "states:StopExecution", + ] + resources = ["${statement.value.arn}:*"] + } + } +} diff --git a/infra/modules/service/variables.tf b/infra/modules/service/variables.tf index ce781b385..b635a17a0 100644 --- a/infra/modules/service/variables.tf +++ b/infra/modules/service/variables.tf @@ -75,6 +75,16 @@ variable "extra_environment_variables" { default = {} } +variable "scheduled_jobs" { + description = "Variable for configuration of the step functions scheduled job" + type = map(object({ + task_command = list(string) + schedule_expression = string + state = string + })) + default = {} +} + variable "secrets" { type = set(object({ name = string diff --git a/infra/modules/service/workflow_orchestrator_role.tf b/infra/modules/service/workflow_orchestrator_role.tf new file mode 100644 index 000000000..9f5b2943e --- /dev/null +++ b/infra/modules/service/workflow_orchestrator_role.tf @@ -0,0 +1,110 @@ +#-------------------------------- +# Scheduler Workflow Manager Role +#-------------------------------- +# This role and policy are used by the Step Functions state machine that manages the scheduled jobs workflow. + +resource "aws_iam_role" "workflow_orchestrator" { + name = "${var.service_name}-workflow-orchestrator" + managed_policy_arns = [aws_iam_policy.workflow_orchestrator.arn] + assume_role_policy = data.aws_iam_policy_document.workflow_orchestrator_assume_role.json +} + +data "aws_iam_policy_document" "workflow_orchestrator_assume_role" { + statement { + actions = ["sts:AssumeRole"] + principals { + type = "Service" + identifiers = ["states.amazonaws.com"] + } + condition { + test = "ArnLike" + variable = "aws:SourceArn" + values = ["arn:aws:states:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:stateMachine:*"] + } + + condition { + test = "StringLike" + variable = "aws:SourceAccount" + values = [ + data.aws_caller_identity.current.account_id + ] + } + } +} + +resource "aws_iam_policy" "workflow_orchestrator" { + name = "${var.service_name}-workflow-orchestrator" + policy = data.aws_iam_policy_document.workflow_orchestrator.json +} + +#tfsec:ignore:aws-iam-no-policy-wildcards +data "aws_iam_policy_document" "workflow_orchestrator" { + # checkov:skip=CKV_AWS_111:These permissions are scoped just fine + # checkov:skip=CKV_AWS_356:These permissions are scoped just fine + + statement { + sid = "UnscopeLogsPermissions" + actions = [ + "logs:CreateLogDelivery", + "logs:CreateLogStream", + "logs:GetLogDelivery", + "logs:UpdateLogDelivery", + "logs:DeleteLogDelivery", + "logs:ListLogDeliveries", + "logs:PutLogEvents", + "logs:PutResourcePolicy", + "logs:DescribeResourcePolicies", + "logs:DescribeLogGroups", + ] + resources = ["*"] + } + + statement { + sid = "StepFunctionsEvents" + actions = [ + "events:PutTargets", + "events:PutRule", + "events:DescribeRule", + ] + resources = [ + "arn:aws:events:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:rule/StepFunctionsGetEventsForECSTaskRule", + ] + } + + statement { + effect = "Allow" + actions = ["ecs:RunTask"] + resources = ["${aws_ecs_task_definition.app.arn_without_revision}:*"] + condition { + test = "ArnLike" + variable = "ecs:cluster" + values = [aws_ecs_cluster.cluster.arn] + } + } + + statement { + effect = "Allow" + actions = [ + "ecs:StopTask", + "ecs:DescribeTasks", + ] + resources = ["arn:aws:ecs:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:task/${var.service_name}/*"] + condition { + test = "ArnLike" + variable = "ecs:cluster" + values = [aws_ecs_cluster.cluster.arn] + } + } + + + statement { + sid = "PassRole" + actions = [ + "iam:PassRole", + ] + resources = [ + aws_iam_role.task_executor.arn, + aws_iam_role.app_service.arn, + ] + } +}