diff --git a/.gitignore b/.gitignore index 6701488c..c1ee48b4 100644 --- a/.gitignore +++ b/.gitignore @@ -48,4 +48,4 @@ terraform/*/.cache terraform/*/.ash_history terraform/*/.kube terraform/*/.terraform.d -terraform/*/kubeconfig_* \ No newline at end of file +kubeconfig_* diff --git a/README.md b/README.md index 1a869475..2612e86c 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,7 @@ Each brokered AWS EKS provides: - nginx ingress controller for routing and IaaS-independent deployments - Automatic DNSSEC configuration for the cluster using AWS Route 53 - Automatic DNS configuration for workloads using AWS Route53 [via ExternalDNS](https://github.com/kubernetes-sigs/external-dns) +- [Persistent Volumes using AWS EFS](https://aws.amazon.com/blogs/aws/new-aws-fargate-for-amazon-eks-now-supports-amazon-efs/) - [ZooKeeper CRDs](https://github.com/pravega/zookeeper-operator) ready for managing Apache ZooKeeper clusters - [Solr CRDs](https://github.com/apache/solr-operator) for managing diff --git a/docs/instance-cleanup.md b/docs/instance-cleanup.md index d213605a..ca102c30 100644 --- a/docs/instance-cleanup.md +++ b/docs/instance-cleanup.md @@ -33,6 +33,8 @@ You might end up in a situation where the broker is failing to cleanup resources - Look for one tagged with the name of the k8s cluster and delete it if present 1. [Certificate Manager > Certificates](https://console.aws.amazon.com/acm/home?#/certificates/list) - Delete corresponding certificate (it should not be in use if you already deleted the Load Balancer) +1. [EFS > Filesystems](https://console.aws.amazon.com/efs/home#/file-systems) + - Delete corresponding EFS file system 1. [VPC > NAT Gateways](https://console.aws.amazon.com/vpc/home#NatGateways:) - Delete the one corresponding to your cluster - If you don't know which one it is, look for the one tagged with the k8s cluster name diff --git a/eks-service-definition.yml b/eks-service-definition.yml index a1797e9a..db3f2756 100644 --- a/eks-service-definition.yml +++ b/eks-service-definition.yml @@ -72,6 +72,7 @@ provision: rbac: terraform/provision/rbac.tf variables: terraform/provision/variables.tf vpc: terraform/provision/vpc.tf + persistent-storage: terraform/provision/persistent-storage.tf bind: plan_inputs: [] user_inputs: [] diff --git a/manifest.yml b/manifest.yml index d89aacd4..e0086823 100644 --- a/manifest.yml +++ b/manifest.yml @@ -11,14 +11,14 @@ terraform_binaries: version: 0.13.7 source: https://github.com/hashicorp/terraform/archive/v0.13.7.zip - name: terraform-provider-aws - version: 3.61.0 - source: https://releases.hashicorp.com/terraform-provider-aws/3.61.0/terraform-provider-aws_3.61.0_linux_amd64.zip + version: 3.73.0 + source: https://releases.hashicorp.com/terraform-provider-aws/3.73.0/terraform-provider-aws_3.73.0_linux_amd64.zip - name: terraform-provider-helm - version: 2.3.0 - source: https://releases.hashicorp.com/terraform-provider-helm/2.3.0/terraform-provider-helm_2.3.0_linux_amd64.zip + version: 2.4.1 + source: https://releases.hashicorp.com/terraform-provider-helm/2.4.1/terraform-provider-helm_2.4.1_linux_amd64.zip - name: terraform-provider-kubernetes - version: 2.5.0 - source: https://releases.hashicorp.com/terraform-provider-kubernetes/2.5.0/terraform-provider-kubernetes_2.5.0_linux_amd64.zip + version: 2.7.1 + source: https://releases.hashicorp.com/terraform-provider-kubernetes/2.7.1/terraform-provider-kubernetes_2.7.1_linux_amd64.zip - name: terraform-provider-local version: 2.1.0 source: https://releases.hashicorp.com/terraform-provider-local/2.1.0/terraform-provider-local_2.1.0_linux_amd64.zip diff --git a/permission-policies.tf b/permission-policies.tf index d10b07c3..f76d3488 100644 --- a/permission-policies.tf +++ b/permission-policies.tf @@ -3,7 +3,7 @@ locals { - this_aws_account_id = data.aws_caller_identity.current.account_id + this_aws_account_id = data.aws_caller_identity.current.account_id } data "aws_caller_identity" "current" {} diff --git a/terraform/bind/providers.tf b/terraform/bind/providers.tf index c56cbde0..b65e69a5 100644 --- a/terraform/bind/providers.tf +++ b/terraform/bind/providers.tf @@ -1,5 +1,5 @@ provider "kubernetes" { - version = "~>2.5" + version = "~>2.7" host = data.aws_eks_cluster.main.endpoint cluster_ca_certificate = base64decode(data.aws_eks_cluster.main.certificate_authority[0].data) diff --git a/terraform/provision/crds.tf b/terraform/provision/crds.tf index 803c8cbc..4cad11e5 100644 --- a/terraform/provision/crds.tf +++ b/terraform/provision/crds.tf @@ -7,11 +7,11 @@ # solr-operator do it so that it will register and unregister its CRDs as part # of the helm install process. resource "helm_release" "zookeeper-operator" { - name = "zookeeper" - chart = "zookeeper-operator" - repository = "https://charts.pravega.io/" - version = "0.2.12" - namespace = "kube-system" + name = "zookeeper" + chart = "zookeeper-operator" + repository = "https://charts.pravega.io/" + version = "0.2.12" + namespace = "kube-system" set { # See https://github.com/pravega/zookeeper-operator/issues/324#issuecomment-829267141 name = "hooks.delete" @@ -31,11 +31,11 @@ resource "helm_release" "zookeeper-operator" { # We might be able do this with a null_resource that triggers on the content of # the upstream CRD manifest file changing. resource "helm_release" "solr-operator" { - name = "solr" - chart = "solr-operator" - repository = "https://solr.apache.org/charts" - version = "0.5.0" - namespace = "kube-system" + name = "solr" + chart = "solr-operator" + repository = "https://solr.apache.org/charts" + version = "0.5.0" + namespace = "kube-system" set { name = "zookeeper-operator.use" diff --git a/terraform/provision/eks.tf b/terraform/provision/eks.tf index 6e576eb2..71268f83 100644 --- a/terraform/provision/eks.tf +++ b/terraform/provision/eks.tf @@ -12,14 +12,13 @@ module "eks" { version = "~>14.0" cluster_name = local.cluster_name cluster_version = local.cluster_version - vpc_id = module.vpc.aws_vpc_id - subnets = module.vpc.aws_subnet_private_prod_ids + vpc_id = module.vpc.vpc_id + subnets = module.vpc.private_subnets cluster_enabled_log_types = ["api", "audit", "authenticator", "controllerManager", "scheduler"] cluster_log_retention_in_days = 180 manage_aws_auth = false write_kubeconfig = var.write_kubeconfig tags = merge(var.labels, { "domain" = local.domain }) - iam_path = "/${replace(local.cluster_name, "-", "")}/" create_fargate_pod_execution_role = false # fargate_pod_execution_role_name = aws_iam_role.iam_role_fargate.name # fargate_profiles = { @@ -63,7 +62,7 @@ resource "aws_eks_fargate_profile" "default_namespaces" { cluster_name = local.cluster_name fargate_profile_name = "default-namespaces-${local.cluster_name}" pod_execution_role_arn = aws_iam_role.iam_role_fargate.arn - subnet_ids = module.vpc.aws_subnet_private_prod_ids + subnet_ids = module.vpc.private_subnets tags = var.labels timeouts { # For reasons unknown, Fargate profiles can take upward of 20 minutes to @@ -125,4 +124,3 @@ data "aws_eks_cluster" "main" { data "aws_eks_cluster_auth" "main" { name = module.eks.cluster_id } - diff --git a/terraform/provision/ingress.tf b/terraform/provision/ingress.tf index 5ac0ebf9..251551d2 100644 --- a/terraform/provision/ingress.tf +++ b/terraform/provision/ingress.tf @@ -62,7 +62,7 @@ resource "helm_release" "ingress_nginx" { "rbac.create" = true, "clusterName" = module.eks.cluster_id, "region" = local.region, - "vpcId" = module.vpc.aws_vpc_id, + "vpcId" = module.vpc.vpc_id, "aws_iam_role_arn" = module.aws_load_balancer_controller.aws_iam_role_arn } content { diff --git a/terraform/provision/persistent-storage.tf b/terraform/provision/persistent-storage.tf new file mode 100644 index 00000000..e58e9980 --- /dev/null +++ b/terraform/provision/persistent-storage.tf @@ -0,0 +1,155 @@ +locals { + efs_policy = <<-EOF + { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "elasticfilesystem:DescribeAccessPoints", + "elasticfilesystem:DescribeFileSystems" + ], + "Resource": "*" + }, + { + "Effect": "Allow", + "Action": [ + "elasticfilesystem:CreateAccessPoint" + ], + "Resource": "*", + "Condition": { + "StringLike": { + "aws:RequestTag/efs.csi.aws.com/cluster": "true" + } + } + }, + { + "Effect": "Allow", + "Action": "elasticfilesystem:DeleteAccessPoint", + "Resource": "*", + "Condition": { + "StringEquals": { + "aws:ResourceTag/efs.csi.aws.com/cluster": "true" + } + } + } + ] + } + EOF +} + +resource "aws_security_group" "efs_mounts" { + name = "efs_mounts" + description = "Mound EFS Volume in all pods w/i Fargate" + vpc_id = module.vpc.vpc_id + + ingress { + description = "NFS Traffic from Fargate" + from_port = 2049 + to_port = 2049 + protocol = "tcp" + cidr_blocks = module.vpc.private_subnets_cidr_blocks + } + + tags = { + Name = "allow_nfs_for_efs" + } +} + +resource "aws_efs_file_system" "eks_efs" { + creation_token = "${local.cluster_name}-PV" + + # encryption-at-rest + encrypted = true + tags = { + Name = "${local.cluster_name}-PV" + } +} + +resource "aws_efs_file_system_policy" "policy" { + file_system_id = aws_efs_file_system.eks_efs.id + + # encryption-in-transit + policy = <<-POLICY + { + "Version": "2012-10-17", + "Id": "${local.cluster_name}-efs-policy", + "Statement": [ + { + "Effect": "Allow", + "Principal": { + "AWS": "*" + }, + "Action": [ + "elasticfilesystem:ClientRootAccess", + "elasticfilesystem:ClientWrite", + "elasticfilesystem:ClientMount" + ], + "Condition": { + "Bool": { + "elasticfilesystem:AccessedViaMountTarget": "true" + } + } + }, + { + "Effect": "Deny", + "Principal": { + "AWS": "*" + }, + "Action": "*", + "Condition": { + "Bool": { + "aws:SecureTransport": "false" + } + } + } + ] + } + POLICY +} + +resource "aws_efs_mount_target" "efs_vpc" { + count = 3 + file_system_id = aws_efs_file_system.eks_efs.id + subnet_id = module.vpc.private_subnets[count.index] + security_groups = [aws_security_group.efs_mounts.id] +} + +resource "aws_iam_role_policy" "efs-policy" { + name_prefix = "${local.cluster_name}-efs-policy" + role = aws_iam_role.iam_role_fargate.name + policy = local.efs_policy +} + +# This isn't used for Fargate workloads, since they cannot dynamically provision +# volumes: +# https://docs.aws.amazon.com/eks/latest/userguide/efs-csi.html#:~:text=Considerations +# However, we're leaving it here so that non-Fargate workloads can +# still dynamically provision EFS volumes if they want to. +resource "kubernetes_storage_class" "efs-sc" { + metadata { + name = "efs-sc" + } + storage_provisioner = "efs.csi.aws.com" + allow_volume_expansion = true +} + +resource "kubernetes_persistent_volume" "pv" { + metadata { + name = "pv" + } + spec { + capacity = { + storage = "5Gi" + } + access_modes = ["ReadWriteOnce"] + storage_class_name = "" + persistent_volume_reclaim_policy = "Retain" + persistent_volume_source { + csi { + driver = "efs.csi.aws.com" + volume_handle = aws_efs_file_system.eks_efs.id + } + } + } +} diff --git a/terraform/provision/providers.tf b/terraform/provision/providers.tf index c0278cef..48a88e6c 100644 --- a/terraform/provision/providers.tf +++ b/terraform/provision/providers.tf @@ -1,7 +1,5 @@ provider "aws" { - # We need at least 3.31.0 because it was the first version to support DS - # records in aws_route53_record - version = "~> 3.31" + version = "~> 3.63" region = local.region } @@ -9,7 +7,7 @@ provider "aws" { # See https://docs.aws.amazon.com/Route53/latest/DeveloperGuide/dns-configuring-dnssec-cmk-requirements.html provider "aws" { alias = "dnssec-key-provider" - version = "~> 3.31" + version = "~> 3.63" region = "us-east-1" } @@ -24,7 +22,7 @@ provider "kubernetes" { args = ["token", "--cluster-id", data.aws_eks_cluster.main.id] command = "aws-iam-authenticator" } - version = "~>2.5" + version = "~>2.7" } provider "helm" { @@ -39,6 +37,6 @@ provider "helm" { } } - version = "~>2.3" + version = "~>2.4" } diff --git a/terraform/provision/vpc.tf b/terraform/provision/vpc.tf index 844c32dd..402b7218 100644 --- a/terraform/provision/vpc.tf +++ b/terraform/provision/vpc.tf @@ -2,27 +2,35 @@ locals { region = var.region } +data "aws_availability_zones" "available" { +} + module "vpc" { - source = "github.com/FairwindsOps/terraform-vpc.git?ref=v5.0.1" + source = "terraform-aws-modules/vpc/aws" + version = "3.11.4" + # insert the 23 required variables here + name = "eks-vpc" + cidr = "10.31.0.0/16" + + azs = data.aws_availability_zones.available.names + private_subnets = ["10.31.1.0/24", "10.31.2.0/24", "10.31.3.0/24"] + public_subnets = ["10.31.101.0/24", "10.31.102.0/24", "10.31.103.0/24"] - aws_region = local.region - az_count = 2 - aws_azs = "${local.region}b, ${local.region}c" - single_nat_gateway = 1 - multi_az_nat_gateway = 0 + enable_nat_gateway = true + single_nat_gateway = true - enable_s3_vpc_endpoint = "true" + enable_dns_hostnames = true # Tag subnets for use by AWS' load-balancers and the ALB ingress controllers # See https://aws.amazon.com/premiumsupport/knowledge-center/eks-vpc-subnet-discovery/ - global_tags = merge(var.labels, { + tags = merge(var.labels, { "kubernetes.io/cluster/${local.cluster_name}" = "shared", "domain" = local.domain }) public_subnet_tags = { "kubernetes.io/role/elb" = 1 } - private_prod_subnet_tags = { + private_subnet_tags = { "kubernetes.io/role/internal-elb" = 1 } } diff --git a/test.sh b/test.sh index 38891985..b36b6536 100755 --- a/test.sh +++ b/test.sh @@ -19,6 +19,7 @@ echo "export KUBECONFIG=${KUBECONFIG}" echo "export DOMAIN_NAME=${DOMAIN_NAME}" echo "Running tests..." +# Test 1 echo "Deploying the test fixture..." kubectl apply -f terraform/provision/2048_fixture.yml @@ -71,6 +72,26 @@ echo -n "Testing DNSSSEC configuration is valid... " dnssec_validates=$(delv @8.8.8.8 ${DOMAIN_NAME} +yaml | grep -o '\s*\- fully_validated:' | wc -l) if [[ $dnssec_validated != 0 ]]; then echo PASS; else retval=1; echo FAIL; fi + +# Test 2 +echo -n "Provisioning PV resources... " +kubectl apply -f test_specs/pv/efs/claim.yml +kubectl apply -f test_specs/pv/efs/pod.yml + +echo -n "Waiting for Pod to start..." +kubectl wait --for=condition=ready --timeout=600s pod efs-app +sleep 10 + +echo -n "Verify pod can write to EFS volume..." +if [[ $(kubectl exec -ti efs-app -- cat /data/out.txt | grep "Pod was here!") ]]; then + echo PASS +else + retval=1 + echo FAIL +fi + + +# Cleanup rm ${KUBECONFIG} -exit $retval \ No newline at end of file +exit $retval diff --git a/test_specs/pv/efs/README.md b/test_specs/pv/efs/README.md new file mode 100644 index 00000000..0959f3cf --- /dev/null +++ b/test_specs/pv/efs/README.md @@ -0,0 +1,3 @@ +# EFS Static Provisioning Test + +Courtesy of https://github.com/kubernetes-sigs/aws-efs-csi-driver/tree/release-1.3/examples/kubernetes/static_provisioning diff --git a/test_specs/pv/efs/claim.yml b/test_specs/pv/efs/claim.yml new file mode 100644 index 00000000..e32c4cf1 --- /dev/null +++ b/test_specs/pv/efs/claim.yml @@ -0,0 +1,11 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: efs-claim +spec: + accessModes: + - ReadWriteOnce + storageClassName: "" + resources: + requests: + storage: 5Gi diff --git a/test_specs/pv/efs/pod.yml b/test_specs/pv/efs/pod.yml new file mode 100644 index 00000000..24c47530 --- /dev/null +++ b/test_specs/pv/efs/pod.yml @@ -0,0 +1,17 @@ +apiVersion: v1 +kind: Pod +metadata: + name: efs-app +spec: + containers: + - name: app + image: alpine + command: ["/bin/sh"] + args: ["-c", "while true; do echo \"Pod was here!\" >> /data/out.txt; sleep 5; done"] + volumeMounts: + - name: persistent-storage + mountPath: /data + volumes: + - name: persistent-storage + persistentVolumeClaim: + claimName: efs-claim