Skip to content

Commit

Permalink
Merge pull request #35 from telia-oss/use-cw-agent
Browse files Browse the repository at this point in the history
Use CloudWatch agent for added instance metrics
  • Loading branch information
Kristian authored Aug 27, 2019
2 parents a3d26ed + e995e76 commit 4f68641
Show file tree
Hide file tree
Showing 7 changed files with 202 additions and 63 deletions.
27 changes: 20 additions & 7 deletions modules/atc/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,10 @@ module "atc" {

locals {
shared_cloud_init = templatefile("${path.module}/../cloud-init/shared.yml", {
region = data.aws_region.current.name
log_group_name = aws_cloudwatch_log_group.atc.name
prometheus_enabled = var.prometheus_enabled
region = data.aws_region.current.name
cloudwatch_namespace = var.name_prefix
log_group_name = aws_cloudwatch_log_group.atc.name
prometheus_enabled = var.prometheus_enabled
})

atc_cloud_init = templatefile("${path.module}/../cloud-init/atc.yml", {
Expand Down Expand Up @@ -133,6 +134,21 @@ resource "aws_cloudwatch_log_group" "atc" {
}

data "aws_iam_policy_document" "atc" {
statement {
effect = "Allow"

resources = ["*"]

actions = [
"cloudwatch:PutMetricData",
"cloudwatch:GetMetricStatistics",
"cloudwatch:ListMetrics",
"logs:DescribeLogStreams",
"logs:DescribeLogGroups",
"ec2:DescribeTags",
]
}

statement {
effect = "Allow"

Expand All @@ -147,16 +163,13 @@ data "aws_iam_policy_document" "atc" {
]
}

# Used for cfn-signal
statement {
effect = "Allow"

resources = ["*"]

actions = [
"cloudwatch:PutMetricData",
"cloudwatch:GetMetricStatistics",
"cloudwatch:ListMetrics",
"ec2:DescribeTags",
"elasticloadbalancing:DescribeTargetHealth",
]
}
Expand Down
79 changes: 60 additions & 19 deletions modules/cloud-init/shared.yml
Original file line number Diff line number Diff line change
@@ -1,24 +1,66 @@
#cloud-config
write_files:
- path: "/etc/awslogs/awscli.template"
- path: "/opt/aws/amazon-cloudwatch-agent.json"
permissions: "0644"
owner: "root"
content: |
[plugins]
cwlogs = cwlogs
[default]
region = ${region}
- path: "/etc/awslogs/awslogs.template"
permissions: "0644"
owner: "root"
content: |
[general]
state_file = /var/lib/awslogs/agent-state
[/var/log/concourse.log]
file = /var/log/concourse.log
log_group_name = ${log_group_name}
log_stream_name = {instance_id}
{
"agent": {
"region": "${region}",
"logfile": "/var/log/amazon-cloudwatch-agent.log",
"metrics_collection_interval": 60
},
"metrics": {
"namespace": "${cloudwatch_namespace}",
"append_dimensions": {
"InstanceId": "$${aws:InstanceId}",
"AutoScalingGroupName": "$${aws:AutoScalingGroupName}"
},
"aggregation_dimensions": [
[
"InstanceId"
],
[
"AutoScalingGroupName"
]
],
"metrics_collected": {
"disk": {
"resources": [
"*"
],
"measurement": [
"disk_used_percent"
]
},
"swap": {
"measurement": [
"swap_used_percent"
]
},
"mem": {
"measurement": [
"mem_used_percent"
]
}
}
},
"logs": {
"logs_collected": {
"files": {
"collect_list": [
{
"file_path": "/var/log/concourse.log*",
"log_group_name": "${log_group_name}",
"log_stream_name": "{instance_id}",
"timezone": "UTC"
}
]
}
},
"log_stream_name": "{instance_id}/unknown-log-stream"
}
}
- path: "/etc/systemd/system/node_exporter.service"
permissions: "0644"
owner: "root"
Expand Down Expand Up @@ -67,10 +109,9 @@ write_files:
}
runcmd:
- |
cp /etc/awslogs/awscli.template /etc/awslogs/awscli.conf
cp /etc/awslogs/awslogs.template /etc/awslogs/awslogs.conf
/opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a fetch-config -m ec2 -c file:/opt/aws/amazon-cloudwatch-agent.json
- |
systemctl enable concourse-logging.service --now
systemctl enable awslogsd.service --now
systemctl enable amazon-cloudwatch-agent.service --now
systemctl enable concourse.service --now
%{if prometheus_enabled } systemctl enable node_exporter.service --now %{ endif }
105 changes: 96 additions & 9 deletions modules/dashboard/dashboard.json.template
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,65 @@
"x": 6,
"y": 0,
"width": 6,
"height": 9,
"height": 6,
"properties": {
"metrics": [],
"metrics": [
[
"${cloudwatch_namespace}",
"mem_used_percent",
"AutoScalingGroupName",
"${atc_asg_name}",
{
"label": "ATC"
}
],
[
"${cloudwatch_namespace}",
"mem_used_percent",
"AutoScalingGroupName",
"${worker_asg_name}",
{
"label": "Worker"
}
]
],
"view": "timeSeries",
"region": "${region}",
"title": "Memory usage",
"period": ${period},
"stacked": false
}
},
{
"type": "metric",
"x": 6,
"y": 6,
"width": 6,
"height": 3,
"properties": {
"metrics": [
[
"${cloudwatch_namespace}",
"swap_used_percent",
"AutoScalingGroupName",
"${atc_asg_name}",
{
"label": "ATC"
}
],
[
"${cloudwatch_namespace}",
"swap_used_percent",
"AutoScalingGroupName",
"${worker_asg_name}",
{
"label": "Worker"
}
]
],
"view": "timeSeries",
"region": "${region}",
"title": "PLACEHOLDER",
"title": "Swap usage",
"period": ${period},
"stacked": false
}
Expand All @@ -58,17 +111,17 @@
"properties": {
"metrics": [
[
"AWS/AutoScaling",
"GroupTotalInstances",
"${cloudwatch_namespace}",
"disk_used_percent",
"AutoScalingGroupName",
"${atc_asg_name}",
{
"label": "ATC"
}
],
[
"AWS/AutoScaling",
"GroupTotalInstances",
"${cloudwatch_namespace}",
"disk_used_percent",
"AutoScalingGroupName",
"${worker_asg_name}",
{
Expand All @@ -78,7 +131,7 @@
],
"view": "timeSeries",
"region": "${region}",
"title": "Instance count",
"title": "Disk usage",
"period": ${period},
"stacked": false
}
Expand All @@ -88,7 +141,7 @@
"x": 18,
"y": 0,
"width": 6,
"height": 9,
"height": 6,
"properties": {
"metrics": [
[
Expand Down Expand Up @@ -117,6 +170,40 @@
"period": ${period}
}
},
{
"type": "metric",
"x": 18,
"y": 6,
"width": 6,
"height": 3,
"properties": {
"metrics": [
[
"AWS/AutoScaling",
"GroupTotalInstances",
"AutoScalingGroupName",
"${atc_asg_name}",
{
"label": "ATC"
}
],
[
"AWS/AutoScaling",
"GroupTotalInstances",
"AutoScalingGroupName",
"${worker_asg_name}",
{
"label": "Worker"
}
]
],
"view": "timeSeries",
"region": "${region}",
"title": "Instance count",
"period": ${period},
"stacked": false
}
},
{
"type": "metric",
"x": 0,
Expand Down
1 change: 1 addition & 0 deletions modules/dashboard/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ locals {
resource "aws_cloudwatch_dashboard" "main" {
dashboard_name = var.name_prefix
dashboard_body = templatefile("${path.module}/dashboard.json.template", {
cloudwatch_namespace = var.name_prefix
atc_asg_name = var.atc_asg_name
atc_log_group_name = var.atc_log_group_name
worker_asg_name = var.worker_asg_name
Expand Down
2 changes: 1 addition & 1 deletion modules/dashboard/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ variable "nat_gateway_ids" {
variable "period" {
description = "The default period, in seconds, for all metrics in this widget. The period is the length of time represented by one data point on the graph."
type = number
default = 300
default = 60
}


Expand Down
Loading

0 comments on commit 4f68641

Please sign in to comment.