From c581d36fcddeee042779440a82b1fa0f8d5095bf Mon Sep 17 00:00:00 2001 From: abhinavkumarsph <122246745+abhinavkumarsph@users.noreply.github.com> Date: Fri, 15 Dec 2023 10:31:04 +0800 Subject: [PATCH] ingestion submodule additional features + refactor (#22) --- examples/ingestion/main.tf | 18 ++++++------- examples/ingestion/providers.tf | 7 +++++ examples/ingestion/versions.tf | 14 ++++++++++ modules/ingestion/iam/README.md | 9 ++++--- modules/ingestion/iam/data.tf | 6 ++--- modules/ingestion/iam/iam.tf | 14 +++++----- modules/ingestion/iam/locals.tf | 3 +++ modules/ingestion/iam/outputs.tf | 5 ++++ modules/ingestion/iam/variables.tf | 11 ++++---- modules/ingestion/pipeline/README.md | 23 ++++++++-------- modules/ingestion/pipeline/cloudwatch.tf | 10 +++---- modules/ingestion/pipeline/data.tf | 7 +++-- modules/ingestion/pipeline/iam.tf | 8 +++--- modules/ingestion/pipeline/locals.tf | 7 +++-- modules/ingestion/pipeline/outputs.tf | 8 +++--- modules/ingestion/pipeline/pipeline.tf | 12 +++++---- modules/ingestion/pipeline/variables.tf | 34 ++++++++++++++---------- 17 files changed, 120 insertions(+), 76 deletions(-) create mode 100644 examples/ingestion/providers.tf create mode 100644 examples/ingestion/versions.tf create mode 100644 modules/ingestion/iam/locals.tf diff --git a/examples/ingestion/main.tf b/examples/ingestion/main.tf index a8d498c..12ec4fa 100644 --- a/examples/ingestion/main.tf +++ b/examples/ingestion/main.tf @@ -15,20 +15,20 @@ module "ingestion_iam" { #checkov:skip=CKV_TF_1:Ensure Terraform module sources use a commit hash source = "../..//modules/ingestion/iam" - domain_name = local.domain_name - domain_arn = "arn:aws:es:${local.region}:${local.account_id}:domain/${local.domain_name}" + pipeline_role_name = "opensearch-ingestion-role" + opensearch_domain_arns = [ + "arn:aws:es:${local.region}:${local.account_id}:domain/${local.domain_name}", + ] } module "ingestion_pipeline" { #checkov:skip=CKV_TF_1:Ensure Terraform module sources use a commit hash source = "../..//modules/ingestion/pipeline" - domain_name = local.domain_name + name = "opensearch-ingestion-pipeline" + min_units = 1 + max_units = 2 - pipeline_name = "${local.domain_name}-pipeline" - pipeline_min_units = 1 - pipeline_max_units = 2 - - pipeline_role_name = module.ingestion_iam.pipeline_role_name - pipeline_configuration_body = templatefile("./pipeline.yaml", local.pipeline_values) + iam_role_name = module.ingestion_iam.pipeline_role_name + configuration_body = templatefile("./pipeline.yaml", local.pipeline_values) } diff --git a/examples/ingestion/providers.tf b/examples/ingestion/providers.tf new file mode 100644 index 0000000..3f30c27 --- /dev/null +++ b/examples/ingestion/providers.tf @@ -0,0 +1,7 @@ +provider "aws" { + region = "ap-southeast-1" +} + +provider "awscc" { + region = "ap-southeast-1" +} diff --git a/examples/ingestion/versions.tf b/examples/ingestion/versions.tf new file mode 100644 index 0000000..585329c --- /dev/null +++ b/examples/ingestion/versions.tf @@ -0,0 +1,14 @@ +terraform { + required_version = ">= 1.4" + + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.15" + } + awscc = { + source = "hashicorp/awscc" + version = "~> 0.60" + } + } +} diff --git a/modules/ingestion/iam/README.md b/modules/ingestion/iam/README.md index fe80520..436351b 100644 --- a/modules/ingestion/iam/README.md +++ b/modules/ingestion/iam/README.md @@ -2,7 +2,7 @@ | Name | Version | |------|---------| -| [terraform](#requirement\_terraform) | >= 1.4 | +| [terraform](#requirement\_terraform) | >= 1.4 | | [aws](#requirement\_aws) | >= 4.38 | ## Providers @@ -22,19 +22,20 @@ | Name | Type | |------|------| -| [aws_iam_policy_document.pipeline_opensearch](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | +| [aws_iam_policy_document.opensearch](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | ## Inputs | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| -| [domain\_arn](#input\_domain\_arn) | The ARN of the OpenSearch cluster | `string` | n/a | yes | -| [domain\_name](#input\_domain\_name) | The name of the OpenSearch cluster | `string` | n/a | yes | +| [opensearch\_domain\_arns](#input\_opensearch\_domain\_arns) | (Optional) The ARN's of the OpenSearch domains to ingest data into | `list(string)` | `[]` | no | +| [pipeline\_role\_name](#input\_pipeline\_role\_name) | The name of the pipline IAM role | `string` | n/a | yes | | [tags](#input\_tags) | A map of tags to add to all resources | `map(string)` | `{}` | no | ## Outputs | Name | Description | |------|-------------| +| [opensearch\_ingestion\_policy\_arn](#output\_opensearch\_ingestion\_policy\_arn) | ARN of the Opensearch ingestion policy | | [pipeline\_role\_arn](#output\_pipeline\_role\_arn) | ARN of the Opensearch ingestion pipeline role | | [pipeline\_role\_name](#output\_pipeline\_role\_name) | Name of the Opensearch ingestion pipeline role | diff --git a/modules/ingestion/iam/data.tf b/modules/ingestion/iam/data.tf index fc8c58d..87b5032 100644 --- a/modules/ingestion/iam/data.tf +++ b/modules/ingestion/iam/data.tf @@ -1,14 +1,14 @@ -data "aws_iam_policy_document" "pipeline_opensearch" { +data "aws_iam_policy_document" "opensearch_ingestion" { statement { effect = "Allow" actions = ["es:DescribeDomain"] - resources = [var.domain_arn] + resources = var.opensearch_domain_arns } statement { effect = "Allow" actions = ["es:ESHttp*"] - resources = ["${var.domain_arn}/*"] + resources = [for domain in var.opensearch_domain_arns : "${domain}/*"] } } diff --git a/modules/ingestion/iam/iam.tf b/modules/ingestion/iam/iam.tf index e1a570b..3fa94fb 100644 --- a/modules/ingestion/iam/iam.tf +++ b/modules/ingestion/iam/iam.tf @@ -2,17 +2,15 @@ module "pipeline_role" { source = "terraform-aws-modules/iam/aws//modules/iam-assumable-role" version = "~> 5.5.0" - create_role = true - - role_name = "${var.domain_name}-pipeline-role" + create_role = true + role_name = var.pipeline_role_name role_description = "IAM Role to be assumed by Opensearch ingestion pipeline" trusted_role_services = [ "osis-pipelines.amazonaws.com", ] - role_requires_mfa = false - custom_role_policy_arns = [module.pipeline_opensearch_policy.arn] + custom_role_policy_arns = local.create_opensearch_ingestion_policy ? [module.pipeline_opensearch_policy.arn] : [] tags = var.tags } @@ -21,12 +19,12 @@ module "pipeline_opensearch_policy" { source = "terraform-aws-modules/iam/aws//modules/iam-policy" version = "~> 5.5.0" - create_policy = true + create_policy = local.create_opensearch_ingestion_policy - name = "${var.domain_name}-ingestion-policy" + name = "${var.pipeline_role_name}-ingestion-policy" path = "/" description = "IAM Policy for Opensearch ingestion" - policy = data.aws_iam_policy_document.pipeline_opensearch.json + policy = data.aws_iam_policy_document.opensearch_ingestion.json tags = var.tags } diff --git a/modules/ingestion/iam/locals.tf b/modules/ingestion/iam/locals.tf new file mode 100644 index 0000000..95233a2 --- /dev/null +++ b/modules/ingestion/iam/locals.tf @@ -0,0 +1,3 @@ +locals { + create_opensearch_ingestion_policy = length(var.opensearch_domain_arns) > 0 ? true : false +} diff --git a/modules/ingestion/iam/outputs.tf b/modules/ingestion/iam/outputs.tf index c70d6fd..fd32ddd 100644 --- a/modules/ingestion/iam/outputs.tf +++ b/modules/ingestion/iam/outputs.tf @@ -7,3 +7,8 @@ output "pipeline_role_arn" { description = "ARN of the Opensearch ingestion pipeline role" value = module.pipeline_role.iam_role_arn } + +output "opensearch_ingestion_policy_arn" { + description = "ARN of the Opensearch ingestion policy" + value = local.create_opensearch_ingestion_policy ? module.pipeline_opensearch_policy.arn : null +} diff --git a/modules/ingestion/iam/variables.tf b/modules/ingestion/iam/variables.tf index 7f3c735..2c321dd 100644 --- a/modules/ingestion/iam/variables.tf +++ b/modules/ingestion/iam/variables.tf @@ -1,11 +1,12 @@ -variable "domain_name" { - description = "The name of the OpenSearch cluster" +variable "pipeline_role_name" { + description = "The name of the pipline IAM role" type = string } -variable "domain_arn" { - description = "The ARN of the OpenSearch cluster" - type = string +variable "opensearch_domain_arns" { + description = "(Optional) The ARN's of the OpenSearch domains to ingest data into" + type = list(string) + default = [] } variable "tags" { diff --git a/modules/ingestion/pipeline/README.md b/modules/ingestion/pipeline/README.md index 84e5af8..1f24a5f 100644 --- a/modules/ingestion/pipeline/README.md +++ b/modules/ingestion/pipeline/README.md @@ -2,7 +2,7 @@ | Name | Version | |------|---------| -| [terraform](#requirement\_terraform) | >= 1.4 | +| [terraform](#requirement\_terraform) | >= 1.4 | | [aws](#requirement\_aws) | >= 4.38 | | [awscc](#requirement\_awscc) | >= 0.52 | @@ -37,20 +37,21 @@ | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| -| [domain\_name](#input\_domain\_name) | The name of the OpenSearch cluster | `string` | n/a | yes | -| [pipeline\_configuration\_body](#input\_pipeline\_configuration\_body) | The Data Prepper pipeline configuration in YAML format | `string` | n/a | yes | -| [pipeline\_enable\_logging](#input\_pipeline\_enable\_logging) | If true, will create a cloudwatch log group to monitor the pipeline | `bool` | `true` | no | -| [pipeline\_log\_group\_retention\_days](#input\_pipeline\_log\_group\_retention\_days) | Duration in days for cloudwatch log group retention | `number` | `30` | no | -| [pipeline\_max\_units](#input\_pipeline\_max\_units) | The maximum pipeline capacity, in Ingestion Compute Units | `number` | n/a | yes | -| [pipeline\_min\_units](#input\_pipeline\_min\_units) | The minimum pipeline capacity, in Ingestion Compute Units | `number` | n/a | yes | -| [pipeline\_name](#input\_pipeline\_name) | Name of the ingestion pipeline | `string` | `null` | no | -| [pipeline\_role\_name](#input\_pipeline\_role\_name) | Name of the pipeline role to use | `string` | n/a | yes | +| [configuration\_body](#input\_configuration\_body) | The Data Prepper pipeline configuration in YAML format | `string` | n/a | yes | +| [enable\_logging](#input\_enable\_logging) | If true, will create a cloudwatch log group to monitor the pipeline | `bool` | `true` | no | +| [iam\_role\_name](#input\_iam\_role\_name) | Name of the pipeline IAM role | `string` | n/a | yes | +| [log\_group\_retention\_days](#input\_log\_group\_retention\_days) | Duration in days for cloudwatch log group retention | `number` | `30` | no | +| [max\_units](#input\_max\_units) | The maximum pipeline capacity, in Ingestion Compute Units | `number` | n/a | yes | +| [min\_units](#input\_min\_units) | The minimum pipeline capacity, in Ingestion Compute Units | `number` | n/a | yes | +| [name](#input\_name) | Name of the ingestion pipeline | `string` | n/a | yes | +| [security\_group\_ids](#input\_security\_group\_ids) | Security group IDs to attach to the pipeline | `list(string)` | `[]` | no | +| [subnet\_ids](#input\_subnet\_ids) | Subnet IDs to deploy pipeline in. Only needed if pipeline is to be deployed in VPC mode | `list(string)` | `[]` | no | | [tags](#input\_tags) | A map of tags to add to all resources | `map(string)` | `{}` | no | ## Outputs | Name | Description | |------|-------------| +| [arn](#output\_arn) | ARN of the ingestion pipeline | +| [id](#output\_id) | ID of the ingestion pipeline | | [ingest\_endpoint\_urls](#output\_ingest\_endpoint\_urls) | The ingestion endpoints for the pipeline that you can send data to | -| [pipeline\_arn](#output\_pipeline\_arn) | ARN of the ingestion pipeline | -| [pipeline\_name](#output\_pipeline\_name) | Name of the ingestion pipeline | diff --git a/modules/ingestion/pipeline/cloudwatch.tf b/modules/ingestion/pipeline/cloudwatch.tf index 9385444..11867dc 100644 --- a/modules/ingestion/pipeline/cloudwatch.tf +++ b/modules/ingestion/pipeline/cloudwatch.tf @@ -1,10 +1,10 @@ resource "aws_cloudwatch_log_group" "this" { #checkov:skip=CKV_AWS_338:Ensure that CloudWatch Log Group specifies retention days - count = var.pipeline_enable_logging ? 1 : 0 + count = var.enable_logging ? 1 : 0 name = local.pipeline_log_group kms_key_id = try(module.cloudwatch_kms_secret[0].key_arn, "") - retention_in_days = var.pipeline_log_group_retention_days + retention_in_days = var.log_group_retention_days tags = var.tags } @@ -12,9 +12,9 @@ resource "aws_cloudwatch_log_group" "this" { module "cloudwatch_kms_secret" { source = "SPHTech-Platform/kms/aws" version = "~> 0.1.0" - count = var.pipeline_enable_logging ? 1 : 0 + count = var.enable_logging ? 1 : 0 - key_description = "Encrypt cloudwatch log group for ${local.pipeline_name}" - alias = "alias/${join("-", [local.pipeline_name, "key"])}" + key_description = "Encrypt cloudwatch log group for ${var.name}" + alias = "alias/${join("-", [var.name, "key"])}" key_policy_statements = [data.aws_iam_policy_document.cloudwatch_log_group.json] } diff --git a/modules/ingestion/pipeline/data.tf b/modules/ingestion/pipeline/data.tf index 1247844..4851b76 100644 --- a/modules/ingestion/pipeline/data.tf +++ b/modules/ingestion/pipeline/data.tf @@ -29,7 +29,7 @@ data "aws_iam_policy_document" "cloudwatch_log_group" { condition { test = "ArnLike" variable = "kms:EncryptionContext:aws:logs:arn" - values = ["arn:aws:logs:${local.region}:${local.account_id}:log-group:*"] + values = ["arn:aws:logs:${local.region}:${local.account_id}:log-group:${local.pipeline_log_group}"] } } } @@ -51,6 +51,9 @@ data "aws_iam_policy_document" "pipeline_cloudwatch" { "logs:ListLogDeliveries" ] - resources = ["*"] + resources = [ + "arn:aws:logs:${local.region}:${local.account_id}:log-group:${local.pipeline_log_group}", + "arn:aws:logs:${local.region}:${local.account_id}:log-group:${local.pipeline_log_group}:log-stream:*" + ] } } diff --git a/modules/ingestion/pipeline/iam.tf b/modules/ingestion/pipeline/iam.tf index 554c18e..1788879 100644 --- a/modules/ingestion/pipeline/iam.tf +++ b/modules/ingestion/pipeline/iam.tf @@ -1,13 +1,13 @@ resource "aws_iam_policy" "cloudwatch" { - count = var.pipeline_enable_logging ? 1 : 0 - name = "${local.pipeline_name}-cloudwatch-policy" + count = var.enable_logging ? 1 : 0 + name = "${var.name}-cloudwatch-policy" policy = data.aws_iam_policy_document.pipeline_cloudwatch.json tags = var.tags } resource "aws_iam_role_policy_attachment" "cloudwatch" { - count = var.pipeline_enable_logging ? 1 : 0 - role = var.pipeline_role_name + count = var.enable_logging ? 1 : 0 + role = var.iam_role_name policy_arn = try(aws_iam_policy.cloudwatch[0].arn, "") } diff --git a/modules/ingestion/pipeline/locals.tf b/modules/ingestion/pipeline/locals.tf index 1ac5599..b455adb 100644 --- a/modules/ingestion/pipeline/locals.tf +++ b/modules/ingestion/pipeline/locals.tf @@ -2,9 +2,12 @@ locals { account_id = data.aws_caller_identity.current.account_id region = data.aws_region.current.name - pipeline_name = var.pipeline_name != null ? var.pipeline_name : var.domain_name + pipeline_log_group = "/aws/vendedlogs/OpenSearchIngestion/${var.name}/audit-logs" - pipeline_log_group = "/aws/vendedlogs/OpenSearchIngestion/${local.pipeline_name}/audit-logs" + vpc_options = length(var.subnet_ids) > 0 ? { + subnet_ids = var.subnet_ids + security_group_ids = var.security_group_ids + } : null pipeline_tags = [for k, v in merge(var.tags, data.aws_default_tags.this.tags) : { key = k diff --git a/modules/ingestion/pipeline/outputs.tf b/modules/ingestion/pipeline/outputs.tf index dda9a17..6214707 100644 --- a/modules/ingestion/pipeline/outputs.tf +++ b/modules/ingestion/pipeline/outputs.tf @@ -1,9 +1,9 @@ -output "pipeline_name" { - description = "Name of the ingestion pipeline" - value = local.pipeline_name +output "id" { + description = "ID of the ingestion pipeline" + value = awscc_osis_pipeline.this.id } -output "pipeline_arn" { +output "arn" { description = "ARN of the ingestion pipeline" value = awscc_osis_pipeline.this.pipeline_arn } diff --git a/modules/ingestion/pipeline/pipeline.tf b/modules/ingestion/pipeline/pipeline.tf index 1a168ab..646e8f2 100644 --- a/modules/ingestion/pipeline/pipeline.tf +++ b/modules/ingestion/pipeline/pipeline.tf @@ -1,12 +1,14 @@ resource "awscc_osis_pipeline" "this" { - pipeline_name = local.pipeline_name - pipeline_configuration_body = var.pipeline_configuration_body + pipeline_name = var.name + pipeline_configuration_body = var.configuration_body - min_units = var.pipeline_min_units - max_units = var.pipeline_max_units + vpc_options = local.vpc_options + min_units = var.min_units + max_units = var.max_units log_publishing_options = { - is_logging_enabled = var.pipeline_enable_logging + is_logging_enabled = var.enable_logging + cloudwatch_log_destination = { log_group = local.pipeline_log_group } diff --git a/modules/ingestion/pipeline/variables.tf b/modules/ingestion/pipeline/variables.tf index 1e55e46..e949ae8 100644 --- a/modules/ingestion/pipeline/variables.tf +++ b/modules/ingestion/pipeline/variables.tf @@ -1,41 +1,47 @@ -variable "domain_name" { - description = "The name of the OpenSearch cluster" - type = string -} - -variable "pipeline_name" { +variable "name" { description = "Name of the ingestion pipeline" type = string - default = null } -variable "pipeline_role_name" { - description = "Name of the pipeline role to use" +variable "iam_role_name" { + description = "Name of the pipeline IAM role" type = string } -variable "pipeline_configuration_body" { +variable "configuration_body" { description = "The Data Prepper pipeline configuration in YAML format" type = string } -variable "pipeline_min_units" { +variable "min_units" { description = "The minimum pipeline capacity, in Ingestion Compute Units" type = number } -variable "pipeline_max_units" { +variable "max_units" { description = "The maximum pipeline capacity, in Ingestion Compute Units" type = number } -variable "pipeline_enable_logging" { +variable "enable_logging" { description = "If true, will create a cloudwatch log group to monitor the pipeline" type = bool default = true } -variable "pipeline_log_group_retention_days" { +variable "subnet_ids" { + description = "Subnet IDs to deploy pipeline in. Only needed if pipeline is to be deployed in VPC mode" + type = list(string) + default = [] +} + +variable "security_group_ids" { + description = "Security group IDs to attach to the pipeline" + type = list(string) + default = [] +} + +variable "log_group_retention_days" { description = "Duration in days for cloudwatch log group retention" type = number default = 30