From c581d36fcddeee042779440a82b1fa0f8d5095bf Mon Sep 17 00:00:00 2001
From: abhinavkumarsph <122246745+abhinavkumarsph@users.noreply.github.com>
Date: Fri, 15 Dec 2023 10:31:04 +0800
Subject: [PATCH] ingestion submodule additional features + refactor (#22)
---
examples/ingestion/main.tf | 18 ++++++-------
examples/ingestion/providers.tf | 7 +++++
examples/ingestion/versions.tf | 14 ++++++++++
modules/ingestion/iam/README.md | 9 ++++---
modules/ingestion/iam/data.tf | 6 ++---
modules/ingestion/iam/iam.tf | 14 +++++-----
modules/ingestion/iam/locals.tf | 3 +++
modules/ingestion/iam/outputs.tf | 5 ++++
modules/ingestion/iam/variables.tf | 11 ++++----
modules/ingestion/pipeline/README.md | 23 ++++++++--------
modules/ingestion/pipeline/cloudwatch.tf | 10 +++----
modules/ingestion/pipeline/data.tf | 7 +++--
modules/ingestion/pipeline/iam.tf | 8 +++---
modules/ingestion/pipeline/locals.tf | 7 +++--
modules/ingestion/pipeline/outputs.tf | 8 +++---
modules/ingestion/pipeline/pipeline.tf | 12 +++++----
modules/ingestion/pipeline/variables.tf | 34 ++++++++++++++----------
17 files changed, 120 insertions(+), 76 deletions(-)
create mode 100644 examples/ingestion/providers.tf
create mode 100644 examples/ingestion/versions.tf
create mode 100644 modules/ingestion/iam/locals.tf
diff --git a/examples/ingestion/main.tf b/examples/ingestion/main.tf
index a8d498c..12ec4fa 100644
--- a/examples/ingestion/main.tf
+++ b/examples/ingestion/main.tf
@@ -15,20 +15,20 @@ module "ingestion_iam" {
#checkov:skip=CKV_TF_1:Ensure Terraform module sources use a commit hash
source = "../..//modules/ingestion/iam"
- domain_name = local.domain_name
- domain_arn = "arn:aws:es:${local.region}:${local.account_id}:domain/${local.domain_name}"
+ pipeline_role_name = "opensearch-ingestion-role"
+ opensearch_domain_arns = [
+ "arn:aws:es:${local.region}:${local.account_id}:domain/${local.domain_name}",
+ ]
}
module "ingestion_pipeline" {
#checkov:skip=CKV_TF_1:Ensure Terraform module sources use a commit hash
source = "../..//modules/ingestion/pipeline"
- domain_name = local.domain_name
+ name = "opensearch-ingestion-pipeline"
+ min_units = 1
+ max_units = 2
- pipeline_name = "${local.domain_name}-pipeline"
- pipeline_min_units = 1
- pipeline_max_units = 2
-
- pipeline_role_name = module.ingestion_iam.pipeline_role_name
- pipeline_configuration_body = templatefile("./pipeline.yaml", local.pipeline_values)
+ iam_role_name = module.ingestion_iam.pipeline_role_name
+ configuration_body = templatefile("./pipeline.yaml", local.pipeline_values)
}
diff --git a/examples/ingestion/providers.tf b/examples/ingestion/providers.tf
new file mode 100644
index 0000000..3f30c27
--- /dev/null
+++ b/examples/ingestion/providers.tf
@@ -0,0 +1,7 @@
+provider "aws" {
+ region = "ap-southeast-1"
+}
+
+provider "awscc" {
+ region = "ap-southeast-1"
+}
diff --git a/examples/ingestion/versions.tf b/examples/ingestion/versions.tf
new file mode 100644
index 0000000..585329c
--- /dev/null
+++ b/examples/ingestion/versions.tf
@@ -0,0 +1,14 @@
+terraform {
+ required_version = ">= 1.4"
+
+ required_providers {
+ aws = {
+ source = "hashicorp/aws"
+ version = "~> 5.15"
+ }
+ awscc = {
+ source = "hashicorp/awscc"
+ version = "~> 0.60"
+ }
+ }
+}
diff --git a/modules/ingestion/iam/README.md b/modules/ingestion/iam/README.md
index fe80520..436351b 100644
--- a/modules/ingestion/iam/README.md
+++ b/modules/ingestion/iam/README.md
@@ -2,7 +2,7 @@
| Name | Version |
|------|---------|
-| [terraform](#requirement\_terraform) | >= 1.4 |
+| [terraform](#requirement\_terraform) | >= 1.4 |
| [aws](#requirement\_aws) | >= 4.38 |
## Providers
@@ -22,19 +22,20 @@
| Name | Type |
|------|------|
-| [aws_iam_policy_document.pipeline_opensearch](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source |
+| [aws_iam_policy_document.opensearch](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source |
## Inputs
| Name | Description | Type | Default | Required |
|------|-------------|------|---------|:--------:|
-| [domain\_arn](#input\_domain\_arn) | The ARN of the OpenSearch cluster | `string` | n/a | yes |
-| [domain\_name](#input\_domain\_name) | The name of the OpenSearch cluster | `string` | n/a | yes |
+| [opensearch\_domain\_arns](#input\_opensearch\_domain\_arns) | (Optional) The ARN's of the OpenSearch domains to ingest data into | `list(string)` | `[]` | no |
+| [pipeline\_role\_name](#input\_pipeline\_role\_name) | The name of the pipline IAM role | `string` | n/a | yes |
| [tags](#input\_tags) | A map of tags to add to all resources | `map(string)` | `{}` | no |
## Outputs
| Name | Description |
|------|-------------|
+| [opensearch\_ingestion\_policy\_arn](#output\_opensearch\_ingestion\_policy\_arn) | ARN of the Opensearch ingestion policy |
| [pipeline\_role\_arn](#output\_pipeline\_role\_arn) | ARN of the Opensearch ingestion pipeline role |
| [pipeline\_role\_name](#output\_pipeline\_role\_name) | Name of the Opensearch ingestion pipeline role |
diff --git a/modules/ingestion/iam/data.tf b/modules/ingestion/iam/data.tf
index fc8c58d..87b5032 100644
--- a/modules/ingestion/iam/data.tf
+++ b/modules/ingestion/iam/data.tf
@@ -1,14 +1,14 @@
-data "aws_iam_policy_document" "pipeline_opensearch" {
+data "aws_iam_policy_document" "opensearch_ingestion" {
statement {
effect = "Allow"
actions = ["es:DescribeDomain"]
- resources = [var.domain_arn]
+ resources = var.opensearch_domain_arns
}
statement {
effect = "Allow"
actions = ["es:ESHttp*"]
- resources = ["${var.domain_arn}/*"]
+ resources = [for domain in var.opensearch_domain_arns : "${domain}/*"]
}
}
diff --git a/modules/ingestion/iam/iam.tf b/modules/ingestion/iam/iam.tf
index e1a570b..3fa94fb 100644
--- a/modules/ingestion/iam/iam.tf
+++ b/modules/ingestion/iam/iam.tf
@@ -2,17 +2,15 @@ module "pipeline_role" {
source = "terraform-aws-modules/iam/aws//modules/iam-assumable-role"
version = "~> 5.5.0"
- create_role = true
-
- role_name = "${var.domain_name}-pipeline-role"
+ create_role = true
+ role_name = var.pipeline_role_name
role_description = "IAM Role to be assumed by Opensearch ingestion pipeline"
trusted_role_services = [
"osis-pipelines.amazonaws.com",
]
-
role_requires_mfa = false
- custom_role_policy_arns = [module.pipeline_opensearch_policy.arn]
+ custom_role_policy_arns = local.create_opensearch_ingestion_policy ? [module.pipeline_opensearch_policy.arn] : []
tags = var.tags
}
@@ -21,12 +19,12 @@ module "pipeline_opensearch_policy" {
source = "terraform-aws-modules/iam/aws//modules/iam-policy"
version = "~> 5.5.0"
- create_policy = true
+ create_policy = local.create_opensearch_ingestion_policy
- name = "${var.domain_name}-ingestion-policy"
+ name = "${var.pipeline_role_name}-ingestion-policy"
path = "/"
description = "IAM Policy for Opensearch ingestion"
- policy = data.aws_iam_policy_document.pipeline_opensearch.json
+ policy = data.aws_iam_policy_document.opensearch_ingestion.json
tags = var.tags
}
diff --git a/modules/ingestion/iam/locals.tf b/modules/ingestion/iam/locals.tf
new file mode 100644
index 0000000..95233a2
--- /dev/null
+++ b/modules/ingestion/iam/locals.tf
@@ -0,0 +1,3 @@
+locals {
+ create_opensearch_ingestion_policy = length(var.opensearch_domain_arns) > 0 ? true : false
+}
diff --git a/modules/ingestion/iam/outputs.tf b/modules/ingestion/iam/outputs.tf
index c70d6fd..fd32ddd 100644
--- a/modules/ingestion/iam/outputs.tf
+++ b/modules/ingestion/iam/outputs.tf
@@ -7,3 +7,8 @@ output "pipeline_role_arn" {
description = "ARN of the Opensearch ingestion pipeline role"
value = module.pipeline_role.iam_role_arn
}
+
+output "opensearch_ingestion_policy_arn" {
+ description = "ARN of the Opensearch ingestion policy"
+ value = local.create_opensearch_ingestion_policy ? module.pipeline_opensearch_policy.arn : null
+}
diff --git a/modules/ingestion/iam/variables.tf b/modules/ingestion/iam/variables.tf
index 7f3c735..2c321dd 100644
--- a/modules/ingestion/iam/variables.tf
+++ b/modules/ingestion/iam/variables.tf
@@ -1,11 +1,12 @@
-variable "domain_name" {
- description = "The name of the OpenSearch cluster"
+variable "pipeline_role_name" {
+ description = "The name of the pipline IAM role"
type = string
}
-variable "domain_arn" {
- description = "The ARN of the OpenSearch cluster"
- type = string
+variable "opensearch_domain_arns" {
+ description = "(Optional) The ARN's of the OpenSearch domains to ingest data into"
+ type = list(string)
+ default = []
}
variable "tags" {
diff --git a/modules/ingestion/pipeline/README.md b/modules/ingestion/pipeline/README.md
index 84e5af8..1f24a5f 100644
--- a/modules/ingestion/pipeline/README.md
+++ b/modules/ingestion/pipeline/README.md
@@ -2,7 +2,7 @@
| Name | Version |
|------|---------|
-| [terraform](#requirement\_terraform) | >= 1.4 |
+| [terraform](#requirement\_terraform) | >= 1.4 |
| [aws](#requirement\_aws) | >= 4.38 |
| [awscc](#requirement\_awscc) | >= 0.52 |
@@ -37,20 +37,21 @@
| Name | Description | Type | Default | Required |
|------|-------------|------|---------|:--------:|
-| [domain\_name](#input\_domain\_name) | The name of the OpenSearch cluster | `string` | n/a | yes |
-| [pipeline\_configuration\_body](#input\_pipeline\_configuration\_body) | The Data Prepper pipeline configuration in YAML format | `string` | n/a | yes |
-| [pipeline\_enable\_logging](#input\_pipeline\_enable\_logging) | If true, will create a cloudwatch log group to monitor the pipeline | `bool` | `true` | no |
-| [pipeline\_log\_group\_retention\_days](#input\_pipeline\_log\_group\_retention\_days) | Duration in days for cloudwatch log group retention | `number` | `30` | no |
-| [pipeline\_max\_units](#input\_pipeline\_max\_units) | The maximum pipeline capacity, in Ingestion Compute Units | `number` | n/a | yes |
-| [pipeline\_min\_units](#input\_pipeline\_min\_units) | The minimum pipeline capacity, in Ingestion Compute Units | `number` | n/a | yes |
-| [pipeline\_name](#input\_pipeline\_name) | Name of the ingestion pipeline | `string` | `null` | no |
-| [pipeline\_role\_name](#input\_pipeline\_role\_name) | Name of the pipeline role to use | `string` | n/a | yes |
+| [configuration\_body](#input\_configuration\_body) | The Data Prepper pipeline configuration in YAML format | `string` | n/a | yes |
+| [enable\_logging](#input\_enable\_logging) | If true, will create a cloudwatch log group to monitor the pipeline | `bool` | `true` | no |
+| [iam\_role\_name](#input\_iam\_role\_name) | Name of the pipeline IAM role | `string` | n/a | yes |
+| [log\_group\_retention\_days](#input\_log\_group\_retention\_days) | Duration in days for cloudwatch log group retention | `number` | `30` | no |
+| [max\_units](#input\_max\_units) | The maximum pipeline capacity, in Ingestion Compute Units | `number` | n/a | yes |
+| [min\_units](#input\_min\_units) | The minimum pipeline capacity, in Ingestion Compute Units | `number` | n/a | yes |
+| [name](#input\_name) | Name of the ingestion pipeline | `string` | n/a | yes |
+| [security\_group\_ids](#input\_security\_group\_ids) | Security group IDs to attach to the pipeline | `list(string)` | `[]` | no |
+| [subnet\_ids](#input\_subnet\_ids) | Subnet IDs to deploy pipeline in. Only needed if pipeline is to be deployed in VPC mode | `list(string)` | `[]` | no |
| [tags](#input\_tags) | A map of tags to add to all resources | `map(string)` | `{}` | no |
## Outputs
| Name | Description |
|------|-------------|
+| [arn](#output\_arn) | ARN of the ingestion pipeline |
+| [id](#output\_id) | ID of the ingestion pipeline |
| [ingest\_endpoint\_urls](#output\_ingest\_endpoint\_urls) | The ingestion endpoints for the pipeline that you can send data to |
-| [pipeline\_arn](#output\_pipeline\_arn) | ARN of the ingestion pipeline |
-| [pipeline\_name](#output\_pipeline\_name) | Name of the ingestion pipeline |
diff --git a/modules/ingestion/pipeline/cloudwatch.tf b/modules/ingestion/pipeline/cloudwatch.tf
index 9385444..11867dc 100644
--- a/modules/ingestion/pipeline/cloudwatch.tf
+++ b/modules/ingestion/pipeline/cloudwatch.tf
@@ -1,10 +1,10 @@
resource "aws_cloudwatch_log_group" "this" {
#checkov:skip=CKV_AWS_338:Ensure that CloudWatch Log Group specifies retention days
- count = var.pipeline_enable_logging ? 1 : 0
+ count = var.enable_logging ? 1 : 0
name = local.pipeline_log_group
kms_key_id = try(module.cloudwatch_kms_secret[0].key_arn, "")
- retention_in_days = var.pipeline_log_group_retention_days
+ retention_in_days = var.log_group_retention_days
tags = var.tags
}
@@ -12,9 +12,9 @@ resource "aws_cloudwatch_log_group" "this" {
module "cloudwatch_kms_secret" {
source = "SPHTech-Platform/kms/aws"
version = "~> 0.1.0"
- count = var.pipeline_enable_logging ? 1 : 0
+ count = var.enable_logging ? 1 : 0
- key_description = "Encrypt cloudwatch log group for ${local.pipeline_name}"
- alias = "alias/${join("-", [local.pipeline_name, "key"])}"
+ key_description = "Encrypt cloudwatch log group for ${var.name}"
+ alias = "alias/${join("-", [var.name, "key"])}"
key_policy_statements = [data.aws_iam_policy_document.cloudwatch_log_group.json]
}
diff --git a/modules/ingestion/pipeline/data.tf b/modules/ingestion/pipeline/data.tf
index 1247844..4851b76 100644
--- a/modules/ingestion/pipeline/data.tf
+++ b/modules/ingestion/pipeline/data.tf
@@ -29,7 +29,7 @@ data "aws_iam_policy_document" "cloudwatch_log_group" {
condition {
test = "ArnLike"
variable = "kms:EncryptionContext:aws:logs:arn"
- values = ["arn:aws:logs:${local.region}:${local.account_id}:log-group:*"]
+ values = ["arn:aws:logs:${local.region}:${local.account_id}:log-group:${local.pipeline_log_group}"]
}
}
}
@@ -51,6 +51,9 @@ data "aws_iam_policy_document" "pipeline_cloudwatch" {
"logs:ListLogDeliveries"
]
- resources = ["*"]
+ resources = [
+ "arn:aws:logs:${local.region}:${local.account_id}:log-group:${local.pipeline_log_group}",
+ "arn:aws:logs:${local.region}:${local.account_id}:log-group:${local.pipeline_log_group}:log-stream:*"
+ ]
}
}
diff --git a/modules/ingestion/pipeline/iam.tf b/modules/ingestion/pipeline/iam.tf
index 554c18e..1788879 100644
--- a/modules/ingestion/pipeline/iam.tf
+++ b/modules/ingestion/pipeline/iam.tf
@@ -1,13 +1,13 @@
resource "aws_iam_policy" "cloudwatch" {
- count = var.pipeline_enable_logging ? 1 : 0
- name = "${local.pipeline_name}-cloudwatch-policy"
+ count = var.enable_logging ? 1 : 0
+ name = "${var.name}-cloudwatch-policy"
policy = data.aws_iam_policy_document.pipeline_cloudwatch.json
tags = var.tags
}
resource "aws_iam_role_policy_attachment" "cloudwatch" {
- count = var.pipeline_enable_logging ? 1 : 0
- role = var.pipeline_role_name
+ count = var.enable_logging ? 1 : 0
+ role = var.iam_role_name
policy_arn = try(aws_iam_policy.cloudwatch[0].arn, "")
}
diff --git a/modules/ingestion/pipeline/locals.tf b/modules/ingestion/pipeline/locals.tf
index 1ac5599..b455adb 100644
--- a/modules/ingestion/pipeline/locals.tf
+++ b/modules/ingestion/pipeline/locals.tf
@@ -2,9 +2,12 @@ locals {
account_id = data.aws_caller_identity.current.account_id
region = data.aws_region.current.name
- pipeline_name = var.pipeline_name != null ? var.pipeline_name : var.domain_name
+ pipeline_log_group = "/aws/vendedlogs/OpenSearchIngestion/${var.name}/audit-logs"
- pipeline_log_group = "/aws/vendedlogs/OpenSearchIngestion/${local.pipeline_name}/audit-logs"
+ vpc_options = length(var.subnet_ids) > 0 ? {
+ subnet_ids = var.subnet_ids
+ security_group_ids = var.security_group_ids
+ } : null
pipeline_tags = [for k, v in merge(var.tags, data.aws_default_tags.this.tags) : {
key = k
diff --git a/modules/ingestion/pipeline/outputs.tf b/modules/ingestion/pipeline/outputs.tf
index dda9a17..6214707 100644
--- a/modules/ingestion/pipeline/outputs.tf
+++ b/modules/ingestion/pipeline/outputs.tf
@@ -1,9 +1,9 @@
-output "pipeline_name" {
- description = "Name of the ingestion pipeline"
- value = local.pipeline_name
+output "id" {
+ description = "ID of the ingestion pipeline"
+ value = awscc_osis_pipeline.this.id
}
-output "pipeline_arn" {
+output "arn" {
description = "ARN of the ingestion pipeline"
value = awscc_osis_pipeline.this.pipeline_arn
}
diff --git a/modules/ingestion/pipeline/pipeline.tf b/modules/ingestion/pipeline/pipeline.tf
index 1a168ab..646e8f2 100644
--- a/modules/ingestion/pipeline/pipeline.tf
+++ b/modules/ingestion/pipeline/pipeline.tf
@@ -1,12 +1,14 @@
resource "awscc_osis_pipeline" "this" {
- pipeline_name = local.pipeline_name
- pipeline_configuration_body = var.pipeline_configuration_body
+ pipeline_name = var.name
+ pipeline_configuration_body = var.configuration_body
- min_units = var.pipeline_min_units
- max_units = var.pipeline_max_units
+ vpc_options = local.vpc_options
+ min_units = var.min_units
+ max_units = var.max_units
log_publishing_options = {
- is_logging_enabled = var.pipeline_enable_logging
+ is_logging_enabled = var.enable_logging
+
cloudwatch_log_destination = {
log_group = local.pipeline_log_group
}
diff --git a/modules/ingestion/pipeline/variables.tf b/modules/ingestion/pipeline/variables.tf
index 1e55e46..e949ae8 100644
--- a/modules/ingestion/pipeline/variables.tf
+++ b/modules/ingestion/pipeline/variables.tf
@@ -1,41 +1,47 @@
-variable "domain_name" {
- description = "The name of the OpenSearch cluster"
- type = string
-}
-
-variable "pipeline_name" {
+variable "name" {
description = "Name of the ingestion pipeline"
type = string
- default = null
}
-variable "pipeline_role_name" {
- description = "Name of the pipeline role to use"
+variable "iam_role_name" {
+ description = "Name of the pipeline IAM role"
type = string
}
-variable "pipeline_configuration_body" {
+variable "configuration_body" {
description = "The Data Prepper pipeline configuration in YAML format"
type = string
}
-variable "pipeline_min_units" {
+variable "min_units" {
description = "The minimum pipeline capacity, in Ingestion Compute Units"
type = number
}
-variable "pipeline_max_units" {
+variable "max_units" {
description = "The maximum pipeline capacity, in Ingestion Compute Units"
type = number
}
-variable "pipeline_enable_logging" {
+variable "enable_logging" {
description = "If true, will create a cloudwatch log group to monitor the pipeline"
type = bool
default = true
}
-variable "pipeline_log_group_retention_days" {
+variable "subnet_ids" {
+ description = "Subnet IDs to deploy pipeline in. Only needed if pipeline is to be deployed in VPC mode"
+ type = list(string)
+ default = []
+}
+
+variable "security_group_ids" {
+ description = "Security group IDs to attach to the pipeline"
+ type = list(string)
+ default = []
+}
+
+variable "log_group_retention_days" {
description = "Duration in days for cloudwatch log group retention"
type = number
default = 30