diff --git a/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/configuration/prometheus.xml b/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/configuration/prometheus-conf.xml similarity index 92% rename from bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/configuration/prometheus.xml rename to bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/configuration/prometheus-conf.xml index b3ea6d0e..09942ee4 100644 --- a/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/configuration/prometheus.xml +++ b/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/configuration/prometheus-conf.xml @@ -36,6 +36,13 @@ global: external_labels: monitor: 'codelab-monitor' +# Rule files specifies a list of globs. Rules and alerts are read from +# all matching files. +rule_files: +<#if rule_files_name??> + - ${rule_files_name} + + # A scrape configuration containing exactly one endpoint to scrape: # Here it's Prometheus itself. scrape_configs: diff --git a/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/configuration/prometheus-rule.xml b/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/configuration/prometheus-rule.xml new file mode 100644 index 00000000..e7ee51e1 --- /dev/null +++ b/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/configuration/prometheus-rule.xml @@ -0,0 +1,65 @@ + + + + + + rules_file_name + prometheus_rules.yml + Rules file name + + + content + This is the freemarker template for rules file + 0.9 + # This expression checks if the average CPU usage over the last 5 minutes for each instance is greater than 90% + for: 5m # The condition must hold true for 5 minutes before the alert is triggered + labels: + severity: critical # Set the severity of the alert as 'critical' + annotations: + summary: "CPU usage on instance {{ $labels.instance }} is over 90% for the last 5 minutes" + # Summary of the alert that will appear when it triggers + description: "The CPU usage on instance {{ $labels.instance }} has been over 90% for the past 5 minutes." + # Detailed description of the alert that will provide more context +]]> + + + longtext + + + \ No newline at end of file diff --git a/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusParams.java b/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusParams.java index c97f3e90..8a78c13d 100644 --- a/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusParams.java +++ b/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusParams.java @@ -51,6 +51,8 @@ public class PrometheusParams extends InfraParams { private String prometheusPort; private String prometheusContent; private String prometheusScrapeInterval; + private String prometheusRulesFilename; + private String prometheusRulesFileContent; public PrometheusParams(CommandPayload commandPayload) { super(commandPayload); @@ -59,6 +61,7 @@ public PrometheusParams(CommandPayload commandPayload) { scrapeJobs.add(agentScrapeJob); globalParamsMap.put("scrape_jobs", scrapeJobs); globalParamsMap.put("scrape_interval", prometheusScrapeInterval); + globalParamsMap.put("rules_file_name", prometheusRulesFilename); } public String dataDir() { @@ -89,7 +92,7 @@ protected List getAllHost() { @GlobalParams public Map prometheusJob() { - Map configuration = LocalSettings.configurations(getServiceName(), "prometheus"); + Map configuration = LocalSettings.configurations(getServiceName(), "prometheus-conf"); prometheusPort = (String) configuration.get("port"); Map job = new HashMap<>(); job.put("name", PROMETHEUS_SELF_JOB_NAME); @@ -106,19 +109,28 @@ public Map agentJob() { job.put("targets_file", targetsConfigFile(BM_AGENT_JOB_NAME)); job.put("targets_list", getAllHost()); agentScrapeJob = job; - return LocalSettings.configurations(getServiceName(), "prometheus"); + return LocalSettings.configurations(getServiceName(), "prometheus-conf"); } @GlobalParams public Map configs() { - Map configuration = LocalSettings.configurations(getServiceName(), "prometheus"); + Map configuration = LocalSettings.configurations(getServiceName(), "prometheus-conf"); prometheusContent = (String) configuration.get("content"); prometheusScrapeInterval = (String) configuration.get("scrape_interval"); return configuration; } - public Object listenAddress() { + @GlobalParams + public Map rules() { + Map configuration = LocalSettings.configurations(getServiceName(), "prometheus-rule"); + + prometheusRulesFilename = (String) configuration.get("rules_file_name"); + prometheusRulesFileContent = (String) configuration.get("content"); + return configuration; + } + + public String listenAddress() { return MessageFormat.format("0.0.0.0:{0}", prometheusPort); } } diff --git a/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusSetup.java b/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusSetup.java index 8731a750..e78a136c 100644 --- a/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusSetup.java +++ b/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusSetup.java @@ -53,6 +53,15 @@ public static ShellResult config(Params params) { Constants.PERMISSION_644, prometheusParams.getGlobalParamsMap()); + LinuxFileUtils.toFileByTemplate( + prometheusParams.getPrometheusRulesFileContent(), + MessageFormat.format( + "{0}/{1}", prometheusParams.confDir(), prometheusParams.getPrometheusRulesFilename()), + user, + group, + Constants.PERMISSION_644, + prometheusParams.getGlobalParamsMap()); + for (int i = 0; i < prometheusParams.getScrapeJobs().size(); i++) { Map job = prometheusParams.getScrapeJobs().get(i); Map> targets = new HashMap<>();