diff --git a/Readme.md b/Readme.md index f404fe9..3636800 100644 --- a/Readme.md +++ b/Readme.md @@ -13,7 +13,7 @@ This would be an example file that uses our modules. This creates an alb, ecs cl # This makes a load balancer module "alb" { - source = "git::https://github.com/Janus-vistaprint/tf_alb.git" + source = "git::https://github.com/Janus-vistaprint/terraform-autoscale-ecs.git//tf_alb" # the load balancers name lb_name = "${var.app_name}" @@ -32,7 +32,7 @@ module "alb" { # This makes an ecs cluster module "ecs" { - source = "git::https://github.com/Janus-vistaprint/tf_ecs_cluster.git" + source = "git::https://github.com/Janus-vistaprint/terraform-autoscale-ecs.git//tf_ecs_cluster" aws_region = "${var.aws_region}" # how much disk should a server have in gb @@ -55,7 +55,7 @@ module "ecs" { # This registers a "service" (a set of containers) in the cluster made above with the image tag specified. module "ecs_service" { - source = "git:https://github.com/Janus-vistaprint/tf_ecs_default_service.git" + source = "git::https://github.com/Janus-vistaprint/terraform-autoscale-ecs.git//tf_ecs_default_service" vpc_id = "YOUR VPCID" # the port in the container we should forward traffic to diff --git a/tf_alb/README.md b/tf_alb/README.md new file mode 100644 index 0000000..ad2e392 --- /dev/null +++ b/tf_alb/README.md @@ -0,0 +1 @@ +# tf_alb diff --git a/tf_alb/main.tf b/tf_alb/main.tf new file mode 100644 index 0000000..6891661 --- /dev/null +++ b/tf_alb/main.tf @@ -0,0 +1,5 @@ +resource "aws_alb" "main" { + name = "alb-${var.lb_name}" + subnets = ["${var.public_subnets}"] + security_groups = ["${aws_security_group.lb_sg.id}"] +} diff --git a/tf_alb/outputs.tf b/tf_alb/outputs.tf new file mode 100644 index 0000000..39a97bb --- /dev/null +++ b/tf_alb/outputs.tf @@ -0,0 +1,15 @@ +output "lb_security_group" { + value = "${aws_security_group.lb_sg.id}" +} + +output "lb_id" { + value = "${aws_alb.main.id}" +} + +output "lb_dns_name" { + value = "${aws_alb.main.dns_name}" +} + +output "lb_arn" { + value = "${aws_alb.main.arn}" +} diff --git a/tf_alb/route53.tf b/tf_alb/route53.tf new file mode 100644 index 0000000..329be0b --- /dev/null +++ b/tf_alb/route53.tf @@ -0,0 +1,13 @@ +# create DNS record for our LB in Route53 +resource "aws_route53_record" "www" { + count = "${var.route53_dns_name == "" ? 0 : 1}" + zone_id = "${var.route53_dns_zone_id}" + name = "${var.route53_dns_name}" + type = "A" + + alias { + name = "${aws_alb.main.dns_name}" + zone_id = "${aws_alb.main.zone_id}" + evaluate_target_health = true + } +} diff --git a/tf_alb/security.tf b/tf_alb/security.tf new file mode 100644 index 0000000..fd6d40a --- /dev/null +++ b/tf_alb/security.tf @@ -0,0 +1,23 @@ +resource "aws_security_group" "lb_sg" { + description = "controls access to the application ELB" + + vpc_id = "${var.vpc_id}" + name = "tf-ecs-lbsg-${var.lb_name}" + + ingress { + protocol = "tcp" + from_port = "${element(var.lb_port, count.index)}" + to_port = "${element(var.lb_port, count.index)}" + cidr_blocks = ["0.0.0.0/0"] + } + + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + + cidr_blocks = [ + "0.0.0.0/0", + ] + } +} diff --git a/tf_alb/variables.tf b/tf_alb/variables.tf new file mode 100644 index 0000000..8278014 --- /dev/null +++ b/tf_alb/variables.tf @@ -0,0 +1,29 @@ +variable "public_subnets" { + description = "" + type = "list" +} + +variable "lb_name" { + description = "lb name" + type = "string" +} + +variable "lb_port" { + default = [80] +} + +variable "vpc_id" { + type = "string" +} + +variable "route53_dns_name" { + description = "Public DNS name used to refer to this ALB" + type = "string" + default = "" +} + +variable "route53_dns_zone_id" { + description = "Zone ID for Route 53" + type = "string" + default = "" +} diff --git a/tf_ecs_cluster/README.md b/tf_ecs_cluster/README.md new file mode 100644 index 0000000..6eb7f1c --- /dev/null +++ b/tf_ecs_cluster/README.md @@ -0,0 +1 @@ +# tf_ecs_cluster diff --git a/tf_ecs_cluster/ami.tf b/tf_ecs_cluster/ami.tf new file mode 100644 index 0000000..6e837e3 --- /dev/null +++ b/tf_ecs_cluster/ami.tf @@ -0,0 +1,10 @@ +data "aws_ami" "stable_ecs" { + most_recent = true + + filter { + name = "name" + values = ["*ecs-optimized*"] + } + + owners = ["amazon"] # CoreOS +} diff --git a/tf_ecs_cluster/asg-scaling.tf b/tf_ecs_cluster/asg-scaling.tf new file mode 100644 index 0000000..0abf954 --- /dev/null +++ b/tf_ecs_cluster/asg-scaling.tf @@ -0,0 +1,156 @@ +resource "aws_autoscaling_policy" "cpu-scale-up" { + name = "asg-${var.cluster_name}-cpu-scale-up" + scaling_adjustment = 1 + adjustment_type = "ChangeInCapacity" + cooldown = 300 + autoscaling_group_name = "${aws_autoscaling_group.app.name}" + + depends_on = [ + "aws_autoscaling_group.app", + ] +} + +resource "aws_cloudwatch_metric_alarm" "cpu-high" { + alarm_name = "cpu-util-high-asg-${var.cluster_name}" + comparison_operator = "GreaterThanOrEqualToThreshold" + evaluation_periods = "2" + metric_name = "CPUUtilization" + namespace = "AWS/EC2" + period = "300" + statistic = "Average" + threshold = "60" + alarm_description = "This metric monitors ec2 cpu for high utilization on ECS hosts" + + alarm_actions = [ + "${aws_autoscaling_policy.cpu-scale-up.arn}", + ] + + dimensions { + AutoScalingGroupName = "${aws_autoscaling_group.app.name}" + } + + depends_on = [ + "aws_autoscaling_group.app", + ] +} + +resource "aws_autoscaling_policy" "cpu-scale-down" { + name = "asg-${var.cluster_name}-cpu-scale-down" + scaling_adjustment = -1 + adjustment_type = "ChangeInCapacity" + cooldown = 300 + autoscaling_group_name = "${aws_autoscaling_group.app.name}" + + depends_on = [ + "aws_autoscaling_group.app", + ] +} + +resource "aws_cloudwatch_metric_alarm" "cpu-low" { + alarm_name = "cpu-util-low-asg-${var.cluster_name}" + comparison_operator = "LessThanOrEqualToThreshold" + evaluation_periods = "3" + metric_name = "CPUUtilization" + namespace = "AWS/EC2" + period = "300" + statistic = "Average" + + # keeping this very low, as we should let ecs reservations to mostly control this + threshold = "5" + alarm_description = "This metric monitors ec2 cpu for low utilization on ECS hosts" + + alarm_actions = [ + "${aws_autoscaling_policy.cpu-scale-down.arn}", + ] + + dimensions { + AutoScalingGroupName = "${aws_autoscaling_group.app.name}" + } + + depends_on = [ + "aws_autoscaling_group.app", + ] +} + +resource "aws_autoscaling_policy" "mem-scale-up" { + name = "ECS-${var.cluster_name}-mem-scale-up" + + scaling_adjustment = 1 + + adjustment_type = "ChangeInCapacity" + + cooldown = 300 + + autoscaling_group_name = "${aws_autoscaling_group.app.name}" + + depends_on = [ + "aws_autoscaling_group.app", + ] +} + +resource "aws_autoscaling_policy" "mem-scale-down" { + name = "ECS-${var.cluster_name}-mem-scale-down" + + scaling_adjustment = -1 + + adjustment_type = "ChangeInCapacity" + + cooldown = 300 + + autoscaling_group_name = "${aws_autoscaling_group.app.name}" + + depends_on = [ + "aws_autoscaling_group.app", + ] +} + +resource "aws_cloudwatch_metric_alarm" "memory-high" { + alarm_name = "mem-util-high-asg-${var.cluster_name}" + + comparison_operator = "GreaterThanOrEqualToThreshold" + evaluation_periods = "2" + metric_name = "MemoryUtilization" + namespace = "System/Linux" + period = "300" + statistic = "Average" + threshold = "80" + alarm_description = "This metric monitors ec2 memory for high utilization on ECS hosts" + + alarm_actions = [ + "${aws_autoscaling_policy.mem-scale-up.arn}", + ] + + dimensions { + AutoScalingGroupName = "${aws_autoscaling_group.app.name}" + } + + depends_on = [ + "aws_autoscaling_group.app", + ] +} + +resource "aws_cloudwatch_metric_alarm" "memory-low" { + alarm_name = "mem-util-low-asg-${var.cluster_name}" + comparison_operator = "LessThanOrEqualToThreshold" + evaluation_periods = "2" + metric_name = "MemoryUtilization" + namespace = "System/Linux" + period = "300" + statistic = "Average" + + # keeping this very low, as we should let ecs reservations to mostly control this + threshold = "5" + alarm_description = "This metric monitors ec2 memory for low utilization on ECS hosts" + + alarm_actions = [ + "${aws_autoscaling_policy.mem-scale-down.arn}", + ] + + dimensions { + AutoScalingGroupName = "${aws_autoscaling_group.app.name}" + } + + depends_on = [ + "aws_autoscaling_group.app", + ] +} diff --git a/tf_ecs_cluster/asg.tf b/tf_ecs_cluster/asg.tf new file mode 100644 index 0000000..c0124d4 --- /dev/null +++ b/tf_ecs_cluster/asg.tf @@ -0,0 +1,58 @@ +resource "aws_launch_configuration" "app" { + security_groups = [ + "${aws_security_group.instance_sg.id}", + ] + + image_id = "${data.aws_ami.stable_ecs.id}" + instance_type = "${var.instance_type}" + iam_instance_profile = "${aws_iam_instance_profile.ecs.name}" + associate_public_ip_address = false + key_name = "${var.key_name}" + + # ec2 optimized instances + + user_data = < /etc/ecs/ecs.config + sudo yum install -y perl-Switch perl-DateTime perl-Sys-Syslog perl-LWP-Protocol-https zip unzip wget perl-Digest-SHA.x86_64 + cd /home/ec2-user + wget http://ec2-downloads.s3.amazonaws.com/cloudwatch-samples/CloudWatchMonitoringScripts-v1.1.0.zip + unzip CloudWatchMonitoringScripts-v1.1.0.zip + rm CloudWatchMonitoringScripts-v1.1.0.zip + chown ec2-user:ec2-user aws-scripts-mon + (crontab -u ec2-user -l 2>/dev/null; echo "*/1 * * * * /home/ec2-user/aws-scripts-mon/mon-put-instance-data.pl --auto-scaling --mem-util --disk-space-util --disk-path=/ --from-cron") | crontab - + EOF + # user_data = "${data.template_file.cloud_config.rendered}" + lifecycle { + create_before_destroy = true + } + root_block_device { + volume_size = "${var.asg_disk_size}" + } +} + +### Compute + +resource "aws_autoscaling_group" "app" { + name = "tf-${var.cluster_name}" + vpc_zone_identifier = ["${var.private_subnets}"] + min_size = "${var.asg_min}" + max_size = "${var.asg_max}" + desired_capacity = "${var.asg_desired}" + launch_configuration = "${aws_launch_configuration.app.name}" + termination_policies = ["OldestLaunchConfiguration", "OldestInstance"] + depends_on = ["aws_launch_configuration.app"] + + /* + in 0.9.3 deletes are not handled properly when lc, and asg's have create before destroy + https://github.com/hashicorp/terraform/issues/13517 + lifecycle { + create_before_destroy = true + }*/ + + tag { + key = "Name" + value = "tf-${var.cluster_name}" + propagate_at_launch = true + } +} diff --git a/tf_ecs_cluster/ecs-scaling.tf b/tf_ecs_cluster/ecs-scaling.tf new file mode 100644 index 0000000..20bc508 --- /dev/null +++ b/tf_ecs_cluster/ecs-scaling.tf @@ -0,0 +1,145 @@ +/* cpu */ +resource "aws_autoscaling_policy" "cpu-over-reserved" { + name = "ECS-${var.cluster_name}-cpu-over-reserved" + scaling_adjustment = -1 + adjustment_type = "ChangeInCapacity" + cooldown = 300 + autoscaling_group_name = "${aws_autoscaling_group.app.name}" + + depends_on = [ + "aws_autoscaling_group.app", + ] +} + +resource "aws_cloudwatch_metric_alarm" "cpu-over-reserved" { + alarm_name = "cpu-over-reserved-ECS-${var.cluster_name}" + comparison_operator = "LessThanOrEqualToThreshold" + evaluation_periods = "3" + metric_name = "CPUReservation" + namespace = "AWS/ECS" + period = "300" + statistic = "Maximum" + threshold = "20" + alarm_description = "This metric monitors ecs cpu reservation, and scales down machines if we have too much cpu avalible" + + alarm_actions = [ + "${aws_autoscaling_policy.cpu-over-reserved.arn}", + ] + + dimensions { + ClusterName = "${aws_ecs_cluster.main.name}" + } + + depends_on = [ + "aws_autoscaling_group.app", + ] +} + +resource "aws_autoscaling_policy" "cpu-under-reserved" { + name = "ECS-${var.cluster_name}-cpu-under-reserved" + scaling_adjustment = 1 + adjustment_type = "ChangeInCapacity" + cooldown = 120 + autoscaling_group_name = "${aws_autoscaling_group.app.name}" + + depends_on = [ + "aws_autoscaling_group.app", + ] +} + +resource "aws_cloudwatch_metric_alarm" "cpu-under-reserved" { + alarm_name = "cpu-under-reserved-high-ECS-${var.cluster_name}" + comparison_operator = "GreaterThanOrEqualToThreshold" + evaluation_periods = "1" + metric_name = "CPUReservation" + namespace = "AWS/ECS" + period = "60" + statistic = "Maximum" + threshold = "70" + alarm_description = "This metric monitors ecs cpu reservation, and scales up machines if we dont have enough cpu avalible" + + alarm_actions = [ + "${aws_autoscaling_policy.cpu-under-reserved.arn}", + ] + + dimensions { + ClusterName = "${aws_ecs_cluster.main.name}" + } + + depends_on = [ + "aws_autoscaling_group.app", + ] +} + +resource "aws_autoscaling_policy" "memory-over-reserved" { + name = "ECS-${var.cluster_name}-mem-over-reserved" + scaling_adjustment = -1 + adjustment_type = "ChangeInCapacity" + cooldown = 300 + autoscaling_group_name = "${aws_autoscaling_group.app.name}" + + depends_on = [ + "aws_autoscaling_group.app", + ] +} + +/* memory */ +resource "aws_cloudwatch_metric_alarm" "memory-over-reserved" { + alarm_name = "memory-over-reserved-ECS-${var.cluster_name}" + comparison_operator = "LessThanOrEqualToThreshold" + evaluation_periods = "3" + metric_name = "MemoryReservation" + namespace = "AWS/ECS" + period = "300" + statistic = "Maximum" + threshold = "20" + alarm_description = "This metric monitors ecs memory reservation, and scales down machines if we have too much memory avalible" + + alarm_actions = [ + "${aws_autoscaling_policy.memory-over-reserved.arn}", + ] + + dimensions { + ClusterName = "${aws_ecs_cluster.main.name}" + } + + depends_on = [ + "aws_autoscaling_group.app", + ] +} + +resource "aws_autoscaling_policy" "memory-under-reserved" { + name = "ECS-${var.cluster_name}-memory-under-reserved" + scaling_adjustment = 1 + adjustment_type = "ChangeInCapacity" + cooldown = 120 + autoscaling_group_name = "${aws_autoscaling_group.app.name}" + + depends_on = [ + "aws_autoscaling_group.app", + ] +} + +resource "aws_cloudwatch_metric_alarm" "memory-under-reserved" { + alarm_name = "memory-under-reserved-high-ECS-${var.cluster_name}" + comparison_operator = "GreaterThanOrEqualToThreshold" + evaluation_periods = "1" + metric_name = "MemoryReservation" + namespace = "AWS/ECS" + period = "60" + statistic = "Maximum" + threshold = "70" + alarm_description = "This metric monitors ecs memory reservation, and scales up machines if we dont have enough memory avalible" + + alarm_actions = [ + "${aws_autoscaling_policy.memory-under-reserved.arn}", + ] + + dimensions { + ClusterName = "${aws_ecs_cluster.main.name}" + } + + depends_on = [ + "aws_autoscaling_group.app", + ] +} diff --git a/tf_ecs_cluster/ecs.tf b/tf_ecs_cluster/ecs.tf new file mode 100644 index 0000000..cb0f1c6 --- /dev/null +++ b/tf_ecs_cluster/ecs.tf @@ -0,0 +1,11 @@ +## ECS + +resource "aws_ecs_cluster" "main" { + name = "${var.cluster_name}" +} + +## CloudWatch Logs + +resource "aws_cloudwatch_log_group" "ecs" { + name = "tf-ecs-${var.cluster_name}/ecs-agent" +} diff --git a/tf_ecs_cluster/main.tf b/tf_ecs_cluster/main.tf new file mode 100644 index 0000000..b1a9085 --- /dev/null +++ b/tf_ecs_cluster/main.tf @@ -0,0 +1,4 @@ +# Specify the provider and access details +provider "aws" { + region = "${var.aws_region}" +} diff --git a/tf_ecs_cluster/outputs.tf b/tf_ecs_cluster/outputs.tf new file mode 100644 index 0000000..5933c32 --- /dev/null +++ b/tf_ecs_cluster/outputs.tf @@ -0,0 +1,19 @@ +output "instance_security_group" { + value = "${aws_security_group.instance_sg.id}" +} + +output "launch_configuration" { + value = "${aws_launch_configuration.app.id}" +} + +output "asg_name" { + value = "${aws_autoscaling_group.app.id}" +} + +output "cluster_arn" { + value = "${aws_ecs_cluster.main.id}" +} + +output "cluster_name" { + value = "${aws_ecs_cluster.main.name}" +} diff --git a/tf_ecs_cluster/policies/instance-profile-policy.json b/tf_ecs_cluster/policies/instance-profile-policy.json new file mode 100644 index 0000000..c4f38aa --- /dev/null +++ b/tf_ecs_cluster/policies/instance-profile-policy.json @@ -0,0 +1,41 @@ +{ + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "ecsInstanceRole", + "Effect": "Allow", + "Action": [ + "ecs:CreateCluster", + "ecs:DeregisterContainerInstance", + "ecs:DiscoverPollEndpoint", + "ecs:Poll", + "ecs:RegisterContainerInstance", + "ecs:StartTelemetrySession", + "ecs:Submit*", + "ecs:StartTask", + "ecr:BatchCheckLayerAvailability", + "ecr:BatchGetImage", + "ecr:GetDownloadUrlForLayer", + "ecr:GetAuthorizationToken", + "cloudwatch:PutMetricData", + "cloudwatch:GetMetricStatistics", + "cloudwatch:ListMetrics", + "ec2:DescribeTags" + ], + "Resource": [ + "*" + ] + }, + { + "Sid": "allowLoggingToCloudWatch", + "Effect": "Allow", + "Action": [ + "logs:CreateLogStream", + "logs:PutLogEvents" + ], + "Resource": [ + "*" + ] + } + ] +} diff --git a/tf_ecs_cluster/security.tf b/tf_ecs_cluster/security.tf new file mode 100644 index 0000000..005c850 --- /dev/null +++ b/tf_ecs_cluster/security.tf @@ -0,0 +1,63 @@ +resource "aws_security_group" "instance_sg" { + description = "controls direct access to application instances" + vpc_id = "${var.vpc_id}" + name = "tf-ecs-${var.cluster_name}-instsg" + + ingress { + protocol = "-1" + from_port = 0 + to_port = 0 + + security_groups = [ + "${var.lb_security_group}", + ] + } + + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + } +} + +resource "aws_iam_instance_profile" "ecs" { + name = "tf-ecs-${var.cluster_name}-instprofile" + role = "${aws_iam_role.app_instance.name}" +} + +resource "aws_iam_role" "app_instance" { + name = "tf-ecs-${var.cluster_name}-instance-role" + + assume_role_policy = <