From ec511f8218c77e51eeb688ac8f8582862552c21e Mon Sep 17 00:00:00 2001 From: Ganesh Hegde Date: Tue, 6 Dec 2016 12:41:37 +0530 Subject: [PATCH] simple EC2/ECS helpers for dockprom --- helpers/aws/README.md | 22 ++++++ helpers/aws/cadvisor_ecs_task_definition.json | 78 +++++++++++++++++++ .../aws/node_exporter_task_definition.json | 22 ++++++ helpers/aws/prometheus.yml | 53 +++++++++++++ 4 files changed, 175 insertions(+) create mode 100644 helpers/aws/README.md create mode 100644 helpers/aws/cadvisor_ecs_task_definition.json create mode 100644 helpers/aws/node_exporter_task_definition.json create mode 100644 helpers/aws/prometheus.yml diff --git a/helpers/aws/README.md b/helpers/aws/README.md new file mode 100644 index 0000000..1e88ca8 --- /dev/null +++ b/helpers/aws/README.md @@ -0,0 +1,22 @@ +# Prometheus on EC2 & ECS: + +Some helpers for anyone configuring Prometheus on ECS and AWS EC2. + +To get started on AWS ECS and EC2: + +*For EC2/ECS nodes*: +- Import the ecs task definition and add cadvisor and node-exporter service/task definition and run them on each host you want to be monitored +- Any hosts which have "Monitoring: On" tag will be automatically added in the targets +- Expose ports 9100 and 9191 to your Prometheus host + +*For Prometheus host*: + +- Copy prometheus.yml configuration present here to base prometheus configuration to enable EC2 service discovery +- `docker compose up -d` + +**Note**: +Set query.staleness-delta to 1m make metrics more realtime + + +### TODO +- Add alerting rules based on ECS diff --git a/helpers/aws/cadvisor_ecs_task_definition.json b/helpers/aws/cadvisor_ecs_task_definition.json new file mode 100644 index 0000000..f9a8527 --- /dev/null +++ b/helpers/aws/cadvisor_ecs_task_definition.json @@ -0,0 +1,78 @@ +{ + "family": "cadvisor", + "containerDefinitions": [ + { + "name": "cadvisor", + "image": "google/cadvisor", + "cpu": 10, + "memory": 300, + "portMappings": [ + { + "containerPort": 9191, + "hostPort": 9191 + } + ], + "essential": true, + "privileged": true, + "mountPoints": [ + { + "sourceVolume": "root", + "containerPath": "/rootfs", + "readOnly": true + }, + { + "sourceVolume": "var_run", + "containerPath": "/var/run", + "readOnly": false + }, + { + "sourceVolume": "sys", + "containerPath": "/sys", + "readOnly": true + }, + { + "sourceVolume": "var_lib_docker", + "containerPath": "/var/lib/docker", + "readOnly": true + }, + { + "sourceVolume": "cgroup", + "containerPath": "/cgroup", + "readOnly": true + } + ] + } + ], + "volumes": [ + { + "name": "root", + "host": { + "sourcePath": "/" + } + }, + { + "name": "var_run", + "host": { + "sourcePath": "/var/run" + } + }, + { + "name": "sys", + "host": { + "sourcePath": "/sys" + } + }, + { + "name": "var_lib_docker", + "host": { + "sourcePath": "/var/lib/docker/" + } + }, + { + "name": "cgroup", + "host": { + "sourcePath": "/cgroup" + } + } + ] +} \ No newline at end of file diff --git a/helpers/aws/node_exporter_task_definition.json b/helpers/aws/node_exporter_task_definition.json new file mode 100644 index 0000000..9a6e4dc --- /dev/null +++ b/helpers/aws/node_exporter_task_definition.json @@ -0,0 +1,22 @@ +{ + "family": "prometheus", + "containerDefinitions": [ + { + "portMappings": [ + { + "hostPort": 9100, + "containerPort": 9100, + "protocol": "tcp" + } + ], + "essential": true, + "name": "node_exporter", + "image": "prom/node-exporter", + "cpu": 0, + "privileged": null, + "memoryReservation": 150 + } + ], + "volumes": [], + "networkMode": "host" +} diff --git a/helpers/aws/prometheus.yml b/helpers/aws/prometheus.yml new file mode 100644 index 0000000..7a94c75 --- /dev/null +++ b/helpers/aws/prometheus.yml @@ -0,0 +1,53 @@ +global: + scrape_interval: 15s + evaluation_interval: 15s + + # Attach these labels to any time series or alerts when communicating with + # external systems (federation, remote storage, Alertmanager). + external_labels: + monitor: 'docker-host-alpha' + +# Load and evaluate rules in this file every 'evaluation_interval' seconds. +rule_files: + - "targets.rules" + - "hosts.rules" + - "containers.rules" + +# A scrape configuration containing exactly one endpoint to scrape. +scrape_configs: + - job_name: 'nodeexporter' + scrape_interval: 5s + static_configs: + - targets: ['nodeexporter:9100'] + + - job_name: 'cadvisor' + scrape_interval: 5s + static_configs: + - targets: ['cadvisor:8080'] + + - job_name: 'prometheus' + scrape_interval: 10s + static_configs: + - targets: ['localhost:9090'] + + +# sample scrape configuration for AWS EC2 + - job_name: 'nodeexporter' + ec2_sd_configs: + - region: us-east-1 + port: 9100 + relabel_configs: + # Only monitor instances which have a tag called Monitoring "Monitoring" + - source_labels: [__meta_ec2_tag_Monitoring] + regex: On + action: keep + + - job_name: 'cadvisor' + ec2_sd_configs: + - region: us-east-1 + port: 9010 + relabel_configs: + # Only monitor instances which have a tag called Monitoring "Monitoring" + - source_labels: [__meta_ec2_tag_Monitoring] + regex: On + action: keep